mirror of
https://github.com/bkaradzic/bx.git
synced 2026-02-17 12:42:34 +01:00
Initial commit.
This commit is contained in:
22
LICENSE
Normal file
22
LICENSE
Normal file
@@ -0,0 +1,22 @@
|
||||
Copyright 2010-2012 Branimir Karadzic. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDER ``AS IS'' AND ANY EXPRESS OR
|
||||
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
|
||||
SHALL COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
||||
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
37
README.md
Normal file
37
README.md
Normal file
@@ -0,0 +1,37 @@
|
||||
bx
|
||||
==
|
||||
|
||||
Base library.
|
||||
|
||||
Contact
|
||||
-------
|
||||
|
||||
Twitter @bkaradzic
|
||||
|
||||
Web http://www.stuckingeometry.com
|
||||
|
||||
License
|
||||
-------
|
||||
|
||||
Copyright 2010-2012 Branimir Karadzic. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDER ``AS IS'' AND ANY EXPRESS OR
|
||||
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
|
||||
SHALL COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
||||
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
99
include/bx/blockalloc.h
Normal file
99
include/bx/blockalloc.h
Normal file
@@ -0,0 +1,99 @@
|
||||
/*
|
||||
* Copyright 2010-2011 Branimir Karadzic. All rights reserved.
|
||||
* License: http://www.opensource.org/licenses/BSD-2-Clause
|
||||
*/
|
||||
|
||||
#ifndef __BX_BLOCKALLOC_H__
|
||||
#define __BX_BLOCKALLOC_H__
|
||||
|
||||
#include "bx.h"
|
||||
|
||||
namespace bx
|
||||
{
|
||||
class BlockAlloc
|
||||
{
|
||||
public:
|
||||
static const uint16_t invalidIndex = 0xffff;
|
||||
static const uint32_t minElementSize = 2;
|
||||
|
||||
BlockAlloc()
|
||||
: m_data(NULL)
|
||||
, m_num(0)
|
||||
, m_size(0)
|
||||
, m_numFree(0)
|
||||
, m_freeIndex(invalidIndex)
|
||||
{
|
||||
}
|
||||
|
||||
BlockAlloc(void* _data, uint16_t _num, uint16_t _size)
|
||||
: m_data(_data)
|
||||
, m_num(_num)
|
||||
, m_size(_size)
|
||||
, m_numFree(_num)
|
||||
, m_freeIndex(0)
|
||||
{
|
||||
char* data = (char*)_data;
|
||||
uint16_t* index = (uint16_t*)_data;
|
||||
for (uint16_t ii = 0; ii < m_num-1; ++ii)
|
||||
{
|
||||
*index = ii+1;
|
||||
data += m_size;
|
||||
index = (uint16_t*)data;
|
||||
}
|
||||
*index = invalidIndex;
|
||||
}
|
||||
|
||||
~BlockAlloc()
|
||||
{
|
||||
}
|
||||
|
||||
void* alloc()
|
||||
{
|
||||
if (invalidIndex == m_freeIndex)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void* obj = ( (char*)m_data) + m_freeIndex*m_size;
|
||||
m_freeIndex = *( (uint16_t*)obj);
|
||||
--m_numFree;
|
||||
|
||||
return obj;
|
||||
}
|
||||
|
||||
void free(void* _obj)
|
||||
{
|
||||
uint16_t index = getIndex(_obj);
|
||||
BX_CHECK(index >= 0 && index < m_num, "index %d, m_num %d", index, m_num);
|
||||
|
||||
*( (uint16_t*)_obj) = m_freeIndex;
|
||||
m_freeIndex = index;
|
||||
++m_numFree;
|
||||
}
|
||||
|
||||
uint16_t getIndex(void* _obj) const
|
||||
{
|
||||
return (uint16_t)( ( (char*)_obj - (char*)m_data ) / m_size);
|
||||
}
|
||||
|
||||
uint16_t getNumFree() const
|
||||
{
|
||||
return m_numFree;
|
||||
}
|
||||
|
||||
void* getFromIndex(uint16_t _index)
|
||||
{
|
||||
return (char*)m_data + _index*m_size;
|
||||
}
|
||||
|
||||
private:
|
||||
void* m_data;
|
||||
uint16_t m_num;
|
||||
uint16_t m_size;
|
||||
uint16_t m_numFree;
|
||||
uint16_t m_freeIndex;
|
||||
};
|
||||
|
||||
} // namespace bx
|
||||
|
||||
#endif // __BX_BLOCKALLOC_H__
|
||||
23
include/bx/bx.h
Normal file
23
include/bx/bx.h
Normal file
@@ -0,0 +1,23 @@
|
||||
/*
|
||||
* Copyright 2010-2011 Branimir Karadzic. All rights reserved.
|
||||
* License: http://www.opensource.org/licenses/BSD-2-Clause
|
||||
*/
|
||||
|
||||
#ifndef __BX_H__
|
||||
#define __BX_H__
|
||||
|
||||
#include "platform.h"
|
||||
#include "macros.h"
|
||||
|
||||
namespace bx
|
||||
{
|
||||
}// namespace bx
|
||||
|
||||
#ifndef BX_NAMESPACE
|
||||
# define BX_NAMESPACE 0
|
||||
#elif BX_NAMESPACE
|
||||
using namespace bx;
|
||||
#endif // BX_NAMESPACE
|
||||
|
||||
#endif // __BX_H__
|
||||
|
||||
151
include/bx/commandline.h
Normal file
151
include/bx/commandline.h
Normal file
@@ -0,0 +1,151 @@
|
||||
/*
|
||||
* Copyright 2010-2011 Branimir Karadzic. All rights reserved.
|
||||
* License: http://www.opensource.org/licenses/BSD-2-Clause
|
||||
*/
|
||||
|
||||
#ifndef __BX_COMMANDLINE_H__
|
||||
#define __BX_COMMANDLINE_H__
|
||||
|
||||
#include "bx.h"
|
||||
|
||||
namespace bx
|
||||
{
|
||||
class CommandLine
|
||||
{
|
||||
public:
|
||||
CommandLine()
|
||||
: m_argc(__argc)
|
||||
, m_argv(__argv)
|
||||
{
|
||||
}
|
||||
|
||||
CommandLine(int _argc, char const* const* _argv)
|
||||
: m_argc(_argc)
|
||||
, m_argv(_argv)
|
||||
{
|
||||
}
|
||||
|
||||
const char* findOption(const char _short, const char* _long = NULL, int _numParams = 1)
|
||||
{
|
||||
const char* result = _findOption(_short, _long, _numParams);
|
||||
return result;
|
||||
}
|
||||
|
||||
bool hasArg(const char _short, const char* _long = NULL)
|
||||
{
|
||||
const char* arg = findOption(_short, _long, 0);
|
||||
return NULL != arg;
|
||||
}
|
||||
|
||||
bool hasArg(const char* _long)
|
||||
{
|
||||
const char* arg = findOption('\0', _long, 0);
|
||||
return NULL != arg;
|
||||
}
|
||||
|
||||
bool hasArg(const char*& _value, const char _short, const char* _long = NULL)
|
||||
{
|
||||
const char* arg = findOption(_short, _long, 1);
|
||||
_value = arg;
|
||||
return NULL != arg;
|
||||
}
|
||||
|
||||
bool hasArg(int& _value, const char _short, const char* _long = NULL)
|
||||
{
|
||||
const char* arg = findOption(_short, _long, 1);
|
||||
if (NULL != arg)
|
||||
{
|
||||
_value = atoi(arg);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool hasArg(unsigned int& _value, const char _short, const char* _long = NULL)
|
||||
{
|
||||
const char* arg = findOption(_short, _long, 1);
|
||||
if (NULL != arg)
|
||||
{
|
||||
_value = atoi(arg);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool hasArg(bool& _value, const char _short, const char* _long = NULL)
|
||||
{
|
||||
const char* arg = findOption(_short, _long, 1);
|
||||
if (NULL != arg)
|
||||
{
|
||||
if ('0' == *arg || _stricmp(arg, "false") )
|
||||
{
|
||||
_value = false;
|
||||
}
|
||||
else if ('0' != *arg || _stricmp(arg, "true") )
|
||||
{
|
||||
_value = true;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private:
|
||||
const char* _findOption(const char _short, const char* _long, int _numParams)
|
||||
{
|
||||
for (int ii = 0; ii < m_argc; ++ii)
|
||||
{
|
||||
const char* arg = m_argv[ii];
|
||||
if ('-' == *arg)
|
||||
{
|
||||
++arg;
|
||||
if (_short == *arg)
|
||||
{
|
||||
if (1 == strlen(arg) )
|
||||
{
|
||||
if (0 == _numParams)
|
||||
{
|
||||
return "";
|
||||
}
|
||||
else if (ii+_numParams < m_argc
|
||||
&& '-' != *m_argv[ii+1] )
|
||||
{
|
||||
return m_argv[ii+1];
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
else if (NULL != _long
|
||||
&& '-' == *arg
|
||||
&& 0 == _stricmp(arg+1, _long) )
|
||||
{
|
||||
if (0 == _numParams)
|
||||
{
|
||||
return "";
|
||||
}
|
||||
else if (ii+_numParams < m_argc
|
||||
&& '-' != *m_argv[ii+1] )
|
||||
{
|
||||
return m_argv[ii+1];
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int m_argc;
|
||||
char const* const* m_argv;
|
||||
};
|
||||
|
||||
} // namespace bx
|
||||
|
||||
#endif /// __BX_COMMANDLINE_H__
|
||||
19
include/bx/countof.h
Normal file
19
include/bx/countof.h
Normal file
@@ -0,0 +1,19 @@
|
||||
/*
|
||||
* Copyright 2010-2011 Branimir Karadzic. All rights reserved.
|
||||
* License: http://www.opensource.org/licenses/BSD-2-Clause
|
||||
*/
|
||||
|
||||
#ifndef __BX_COUNTOF_H__
|
||||
#define __BX_COUNTOF_H__
|
||||
|
||||
#include "bx.h"
|
||||
|
||||
namespace bx
|
||||
{
|
||||
// http://cnicholson.net/2011/01/stupid-c-tricks-a-better-sizeof_array/
|
||||
template<typename T, size_t N> char (&COUNTOF_REQUIRES_ARRAY_ARGUMENT(const T(&)[N]) )[N];
|
||||
#define countof(x) sizeof(bx::COUNTOF_REQUIRES_ARRAY_ARGUMENT(x) )
|
||||
|
||||
} // namespace bx
|
||||
|
||||
#endif // __BX_COUNTOF_H__
|
||||
110
include/bx/cpu.h
Normal file
110
include/bx/cpu.h
Normal file
@@ -0,0 +1,110 @@
|
||||
/*
|
||||
* Copyright 2010-2012 Branimir Karadzic. All rights reserved.
|
||||
* License: http://www.opensource.org/licenses/BSD-2-Clause
|
||||
*/
|
||||
|
||||
#ifndef __BX_CPU_H__
|
||||
#define __BX_CPU_H__
|
||||
|
||||
#include "bx.h"
|
||||
|
||||
#if BX_COMPILER_MSVC
|
||||
# if BX_PLATFORM_XBOX360
|
||||
# include <ppcintrinsics.h>
|
||||
# include <xtl.h>
|
||||
# else
|
||||
# include <math.h> // math.h is included because VS bitches:
|
||||
// warning C4985: 'ceil': attributes not present on previous declaration.
|
||||
// must be included before intrin.h.
|
||||
# include <intrin.h>
|
||||
# include <windows.h>
|
||||
# endif // !BX_PLATFORM_XBOX360
|
||||
extern "C" void _ReadBarrier();
|
||||
extern "C" void _WriteBarrier();
|
||||
extern "C" void _ReadWriteBarrier();
|
||||
# pragma intrinsic(_ReadBarrier)
|
||||
# pragma intrinsic(_WriteBarrier)
|
||||
# pragma intrinsic(_ReadWriteBarrier)
|
||||
# pragma intrinsic(_InterlockedIncrement)
|
||||
# pragma intrinsic(_InterlockedDecrement)
|
||||
#endif // BX_COMPILER_MSVC
|
||||
|
||||
namespace bx
|
||||
{
|
||||
#if BX_COMPILER_MSVC
|
||||
# define BX_CACHE_LINE_ALIGN_MARKER() __declspec(align(BX_CACHE_LINE_SIZE) ) struct {}
|
||||
#else
|
||||
# define BX_CACHE_LINE_ALIGN_MARKER() struct {} __attribute__( (__aligned__(BX_CACHE_LINE_SIZE) ) )
|
||||
#endif // BX_COMPILER_
|
||||
|
||||
#define BX_CACHE_LINE_ALIGN(_def) BX_CACHE_LINE_ALIGN_MARKER(); _def; BX_CACHE_LINE_ALIGN_MARKER()
|
||||
|
||||
inline void readBarrier()
|
||||
{
|
||||
#if BX_COMPILER_MSVC
|
||||
_ReadBarrier();
|
||||
#elif BX_COMPILER_GCC || BX_COMPILER_CLANG
|
||||
asm volatile("":::"memory");
|
||||
#endif // BX_COMPILER
|
||||
}
|
||||
|
||||
inline void writeBarrier()
|
||||
{
|
||||
#if BX_COMPILER_MSVC
|
||||
_WriteBarrier();
|
||||
#elif BX_COMPILER_GCC || BX_COMPILER_CLANG
|
||||
asm volatile("":::"memory");
|
||||
#endif // BX_COMPILER
|
||||
}
|
||||
|
||||
inline void readWriteBarrier()
|
||||
{
|
||||
#if BX_COMPILER_MSVC
|
||||
_ReadWriteBarrier();
|
||||
#elif BX_COMPILER_GCC || BX_COMPILER_CLANG
|
||||
asm volatile("":::"memory");
|
||||
#endif // BX_COMPILER
|
||||
}
|
||||
|
||||
inline void memoryBarrier()
|
||||
{
|
||||
#if BX_PLATFORM_XBOX360
|
||||
__lwsync();
|
||||
#elif BX_COMPILER_MSVC
|
||||
_mm_mfence();
|
||||
#else
|
||||
__sync_synchronize();
|
||||
// asm volatile("mfence":::"memory");
|
||||
#endif // BX_COMPILER
|
||||
}
|
||||
|
||||
inline int32_t atomicIncr(volatile void* _var)
|
||||
{
|
||||
#if BX_COMPILER_MSVC
|
||||
return _InterlockedIncrement( (volatile LONG*)(_var) );
|
||||
#elif BX_COMPILER_GCC || BX_COMPILER_CLANG
|
||||
return __sync_fetch_and_add( (volatile int32_t*)_var, 1);
|
||||
#endif // BX_COMPILER
|
||||
}
|
||||
|
||||
inline int32_t atomicDecr(volatile void* _var)
|
||||
{
|
||||
#if BX_COMPILER_MSVC
|
||||
return _InterlockedDecrement( (volatile LONG*)(_var) );
|
||||
#elif BX_COMPILER_GCC || BX_COMPILER_CLANG
|
||||
return __sync_fetch_and_sub( (volatile int32_t*)_var, 1);
|
||||
#endif // BX_COMPILER
|
||||
}
|
||||
|
||||
inline void* atomicExchangePtr(void** _target, void* _ptr)
|
||||
{
|
||||
#if BX_COMPILER_MSVC
|
||||
return InterlockedExchangePointer(_target, _ptr);
|
||||
#elif BX_COMPILER_GCC || BX_COMPILER_CLANG
|
||||
return __sync_lock_test_and_set(_target, _ptr);
|
||||
#endif // BX_COMPILER
|
||||
}
|
||||
|
||||
} // namespace bx
|
||||
|
||||
#endif // __BX_CPU_H__
|
||||
31
include/bx/debug.h
Normal file
31
include/bx/debug.h
Normal file
@@ -0,0 +1,31 @@
|
||||
/*
|
||||
* Copyright 2010-2012 Branimir Karadzic. All rights reserved.
|
||||
* License: http://www.opensource.org/licenses/BSD-2-Clause
|
||||
*/
|
||||
|
||||
#ifndef __BX_DEBUG_H__
|
||||
#define __BX_DEBUG_H__
|
||||
|
||||
#include "bx.h"
|
||||
|
||||
namespace bx
|
||||
{
|
||||
inline void debugBreak()
|
||||
{
|
||||
#if BX_COMPILER_MSVC
|
||||
__debugbreak();
|
||||
#elif BX_CPU_ARM
|
||||
asm("bkpt 0");
|
||||
#elif !BX_PLATFORM_NACL && BX_CPU_X86 && (BX_COMPILER_GCC || BX_COMPILER_CLANG)
|
||||
// NaCl doesn't like int 3:
|
||||
// NativeClient: NaCl module load failed: Validation failure. File violates Native Client safety rules.
|
||||
__asm__ ("int $3");
|
||||
#else // cross platform implementation
|
||||
int* int3 = (int*)3L;
|
||||
*int3 = 3;
|
||||
#endif // BX
|
||||
}
|
||||
|
||||
} // namespace bx
|
||||
|
||||
#endif // __BX_DEBUG_H__
|
||||
71
include/bx/endian.h
Normal file
71
include/bx/endian.h
Normal file
@@ -0,0 +1,71 @@
|
||||
/*
|
||||
* Copyright 2010-2011 Branimir Karadzic. All rights reserved.
|
||||
* License: http://www.opensource.org/licenses/BSD-2-Clause
|
||||
*/
|
||||
|
||||
#ifndef __BX_ENDIAN_H__
|
||||
#define __BX_ENDIAN_H__
|
||||
|
||||
#include "bx.h"
|
||||
|
||||
namespace bx
|
||||
{
|
||||
inline uint16_t endianSwap(uint16_t _in)
|
||||
{
|
||||
return (_in>>8) | (_in<<8);
|
||||
}
|
||||
|
||||
inline uint32_t endianSwap(uint32_t _in)
|
||||
{
|
||||
return (_in>>24) | (_in<<24)
|
||||
| ( (_in&0x00ff0000)>>8) | ( (_in&0x0000ff00)<<8)
|
||||
;
|
||||
}
|
||||
|
||||
inline uint64_t endianSwap(uint64_t _in)
|
||||
{
|
||||
return (_in>>56) | (_in<<56)
|
||||
| ( (_in&UINT64_C(0x00ff000000000000) )>>40) | ( (_in&UINT64_C(0x000000000000ff00) )<<40)
|
||||
| ( (_in&UINT64_C(0x0000ff0000000000) )>>24) | ( (_in&UINT64_C(0x0000000000ff0000) )<<24)
|
||||
| ( (_in&UINT64_C(0x000000ff00000000) )>>8) | ( (_in&UINT64_C(0x00000000ff000000) )<<8)
|
||||
;
|
||||
}
|
||||
|
||||
inline int16_t endianSwap(int16_t _in)
|
||||
{
|
||||
return (int16_t)endianSwap( (uint16_t)_in);
|
||||
}
|
||||
|
||||
inline int32_t endianSwap(int32_t _in)
|
||||
{
|
||||
return (int32_t)endianSwap( (uint32_t)_in);
|
||||
}
|
||||
|
||||
inline int64_t endianSwap(int64_t _in)
|
||||
{
|
||||
return (int64_t)endianSwap( (uint64_t)_in);
|
||||
}
|
||||
|
||||
template <typename Ty>
|
||||
inline Ty littleEndian(Ty& _in)
|
||||
{
|
||||
#if BX_CPU_ENDIAN_BIG
|
||||
endianSwap(_in);
|
||||
#else
|
||||
return _in;
|
||||
#endif // BX_CPU_ENDIAN_BIG
|
||||
}
|
||||
|
||||
template <typename Ty>
|
||||
inline Ty bigEndian(Ty& _in)
|
||||
{
|
||||
#if BX_CPU_ENDIAN_LITTLE
|
||||
return endianSwap(_in);
|
||||
#else
|
||||
return _in;
|
||||
#endif // BX_CPU_ENDIAN_LITTLE
|
||||
}
|
||||
|
||||
} // namespace bx
|
||||
|
||||
#endif // __BX_ENDIAN_H__
|
||||
227
include/bx/float4_neon.h
Normal file
227
include/bx/float4_neon.h
Normal file
@@ -0,0 +1,227 @@
|
||||
/*
|
||||
* Copyright 2010-2012 Branimir Karadzic. All rights reserved.
|
||||
* License: http://www.opensource.org/licenses/BSD-2-Clause
|
||||
*/
|
||||
|
||||
#ifndef __BX_FLOAT4_NEON_H__
|
||||
#define __BX_FLOAT4_NEON_H__
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
namespace bx
|
||||
{
|
||||
|
||||
// Reference:
|
||||
// http://gcc.gnu.org/onlinedocs/gcc/ARM-NEON-Intrinsics.html
|
||||
// http://blogs.arm.com/software-enablement/161-coding-for-neon-part-1-load-and-stores/
|
||||
// http://blogs.arm.com/software-enablement/196-coding-for-neon-part-2-dealing-with-leftovers/
|
||||
// http://blogs.arm.com/software-enablement/241-coding-for-neon-part-3-matrix-multiplication/
|
||||
// http://blogs.arm.com/software-enablement/277-coding-for-neon-part-4-shifting-left-and-right/
|
||||
// http://blogs.arm.com/software-enablement/684-coding-for-neon-part-5-rearranging-vectors/
|
||||
|
||||
typedef __builtin_neon_sf float4_t __attribute__( (__vector_size__(16) ) );
|
||||
|
||||
#define ELEMx 0
|
||||
#define ELEMy 1
|
||||
#define ELEMz 2
|
||||
#define ELEMw 3
|
||||
#define IMPLEMENT_SWIZZLE(_x, _y, _z, _w) \
|
||||
BX_FLOAT4_INLINE float4_t float4_swiz_##_x##_y##_z##_w(float4_t _a) \
|
||||
{ \
|
||||
float4_t result; \
|
||||
result.ixyzw[0] = _a.ixyzw[ELEM##_x]; \
|
||||
result.ixyzw[1] = _a.ixyzw[ELEM##_y]; \
|
||||
result.ixyzw[2] = _a.ixyzw[ELEM##_z]; \
|
||||
result.ixyzw[3] = _a.ixyzw[ELEM##_w]; \
|
||||
return result; \
|
||||
}
|
||||
|
||||
#include "float4_swizzle.inl"
|
||||
|
||||
#undef IMPLEMENT_SWIZZLE
|
||||
#undef ELEMw
|
||||
#undef ELEMz
|
||||
#undef ELEMy
|
||||
#undef ELEMx
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_shuf_xyAB(float4_t _a, float4_t _b)
|
||||
{
|
||||
return _a; //_mm_movelh_ps(_a, _b);
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_shuf_ABxy(float4_t _a, float4_t _b)
|
||||
{
|
||||
return _a; //_mm_movelh_ps(_b, _a);
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_shuf_CDzw(float4_t _a, float4_t _b)
|
||||
{
|
||||
return _a; //_mm_movehl_ps(_a, _b);
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_shuf_zwCD(float4_t _a, float4_t _b)
|
||||
{
|
||||
return _a; //_mm_movehl_ps(_b, _a);
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_shuf_xAyB(float4_t _a, float4_t _b)
|
||||
{
|
||||
return _a; //_mm_unpacklo_ps(_a, _b);
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_shuf_yBxA(float4_t _a, float4_t _b)
|
||||
{
|
||||
return _a; //_mm_unpacklo_ps(_b, _a);
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_shuf_zCwD(float4_t _a, float4_t _b)
|
||||
{
|
||||
return _a; //_mm_unpackhi_ps(_a, _b);
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_shuf_CzDw(float4_t _a, float4_t _b)
|
||||
{
|
||||
return _a; //_mm_unpackhi_ps(_b, _a);
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float float4_x(float4_t _a)
|
||||
{
|
||||
return _a.fxyzw[0];
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float float4_y(float4_t _a)
|
||||
{
|
||||
return _a.fxyzw[1];
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float float4_z(float4_t _a)
|
||||
{
|
||||
return _a.fxyzw[2];
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float float4_w(float4_t _a)
|
||||
{
|
||||
return _a.fxyzw[3];
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_ld(float _x, float _y, float _z, float _w)
|
||||
{
|
||||
const float32_t val[4] = {_x, _y, _z, _w};
|
||||
return __builtin_neon_vld1v4sf(val);
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_ild(uint32_t _x, uint32_t _y, uint32_t _z, uint32_t _w)
|
||||
{
|
||||
const uint32_t val[4] = {_x, _y, _z, _w};
|
||||
return (float4_t)__builtin_neon_vld1v4si( (const __builtin_neon_si*)val);
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_splat(float _a)
|
||||
{
|
||||
return __builtin_neon_vdup_nv4sf(_a);
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_isplat(uint32_t _a)
|
||||
{
|
||||
return (float4_t)__builtin_neon_vdup_nv4si( (__builtin_neon_si)_a);
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_zero()
|
||||
{
|
||||
return vdupq_n_f32(0.0f);
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_add(float4_t _a, float4_t _b)
|
||||
{
|
||||
return vaddq_f32(_a, _b);
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_sub(float4_t _a, float4_t _b)
|
||||
{
|
||||
return vsubq_f32(_a, _b);
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_mul(float4_t _a, float4_t _b)
|
||||
{
|
||||
return vmulq_f32(_a, _b);
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_rcp_est(float4_t _a)
|
||||
{
|
||||
return vrecpeq_f32(_a);
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_rsqrt_est(float4_t _a)
|
||||
{
|
||||
return vrsqrteq_f32(_a);
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_and(float4_t _a, float4_t _b)
|
||||
{
|
||||
return (float4_t)__builtin_neon_vandv4si( (int32x4_t)_a, (int32x4_t)_b, 0);
|
||||
}
|
||||
|
||||
//BX_FLOAT4_INLINE float4_t float4_andc(float4_t _a, float4_t _b)
|
||||
//{
|
||||
// return _mm_andnot_ps(_b, _a);
|
||||
//}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_or(float4_t _a, float4_t _b)
|
||||
{
|
||||
return (float4_t)__builtin_neon_vorrv4si( (int32x4_t)_a, (int32x4_t)_b, 0);
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_iadd(float4_t _a, float4_t _b)
|
||||
{
|
||||
const uint32x4_t tmp0 = vreinterpretq_u32_f32(_a);
|
||||
const uint32x4_t tmp1 = vreinterpretq_u32_f32(_b);
|
||||
const uint32x4_t add = vaddq_u32(tmp0, tmp1);
|
||||
const float4_t result = vreinterpretq_f32_u32(add);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_isub(float4_t _a, float4_t _b)
|
||||
{
|
||||
const uint32x4_t tmp0 = vreinterpretq_u32_f32(_a);
|
||||
const uint32x4_t tmp1 = vreinterpretq_u32_f32(_b);
|
||||
const uint32x4_t sub = vsubq_u32(tmp0, tmp1);
|
||||
const float4_t result = vreinterpretq_f32_u32(sub);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_sll(float4_t _a, int _count)
|
||||
{
|
||||
const uint32x4_t tmp = vreinterpretq_u32_f32(_a);
|
||||
const uint32x4_t shift = vshlq_n_u32(tmp, _count);
|
||||
const float4_t result = vreinterpretq_f32_u32(shift);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_srl(float4_t _a, int _count)
|
||||
{
|
||||
const uint32x4_t tmp = vreinterpretq_i32_f32(_a);
|
||||
const uint32x4_t shift = (uint32x4_t)__builtin_neon_vshr_nv4si( (int32x4_t)tmp, _count, 0);
|
||||
const float4_t result = vreinterpretq_f32_u32(shift);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_sra(float4_t _a, int _count)
|
||||
{
|
||||
const int32x4_t a = vreinterpretq_s32_f32(_a);
|
||||
const int32x4_t shift = __builtin_neon_vshr_nv4si(a, _count, 1);
|
||||
const float4_t result = vreinterpretq_f32_s32(shift);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace bx
|
||||
|
||||
#define float4_div_nr float4_div_nr_ni
|
||||
#define float4_div float4_div_nr_ni
|
||||
#include "float4_ni.h"
|
||||
|
||||
#endif // __BX_FLOAT4_NEON_H__
|
||||
407
include/bx/float4_ni.h
Normal file
407
include/bx/float4_ni.h
Normal file
@@ -0,0 +1,407 @@
|
||||
/*
|
||||
* Copyright 2010-2012 Branimir Karadzic. All rights reserved.
|
||||
* License: http://www.opensource.org/licenses/BSD-2-Clause
|
||||
*/
|
||||
|
||||
#ifndef __BX_FLOAT4_NI_H__
|
||||
#define __BX_FLOAT4_NI_H__
|
||||
|
||||
namespace bx
|
||||
{
|
||||
BX_FLOAT4_INLINE float4_t float4_shuf_xAzC_ni(float4_t _a, float4_t _b)
|
||||
{
|
||||
const float4_t xAyB = float4_shuf_xAyB(_a, _b);
|
||||
const float4_t zCwD = float4_shuf_zCwD(_a, _b);
|
||||
const float4_t result = float4_shuf_xyAB(xAyB, zCwD);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_shuf_yBwD_ni(float4_t _a, float4_t _b)
|
||||
{
|
||||
const float4_t xAyB = float4_shuf_xAyB(_a, _b);
|
||||
const float4_t zCwD = float4_shuf_zCwD(_a, _b);
|
||||
const float4_t result = float4_shuf_zwCD(xAyB, zCwD);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_madd_ni(float4_t _a, float4_t _b, float4_t _c)
|
||||
{
|
||||
const float4_t mul = float4_mul(_a, _b);
|
||||
const float4_t result = float4_add(mul, _c);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_nmsub_ni(float4_t _a, float4_t _b, float4_t _c)
|
||||
{
|
||||
const float4_t mul = float4_mul(_a, _b);
|
||||
const float4_t result = float4_sub(_c, mul);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_div_nr_ni(float4_t _a, float4_t _b)
|
||||
{
|
||||
const float4_t oneish = float4_isplat(0x3f800001);
|
||||
const float4_t est = float4_rcp_est(_b);
|
||||
const float4_t iter0 = float4_mul(_a, est);
|
||||
const float4_t tmp1 = float4_nmsub(_b, est, oneish);
|
||||
const float4_t result = float4_madd(tmp1, iter0, iter0);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_rcp_ni(float4_t _a)
|
||||
{
|
||||
const float4_t one = float4_splat(1.0f);
|
||||
const float4_t result = float4_div(one, _a);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_orx_ni(float4_t _a)
|
||||
{
|
||||
const float4_t zwxy = float4_swiz_zwxy(_a);
|
||||
const float4_t tmp0 = float4_or(_a, zwxy);
|
||||
const float4_t tmp1 = float4_swiz_yyyy(_a);
|
||||
const float4_t tmp2 = float4_or(tmp0, tmp1);
|
||||
const float4_t mf000 = float4_ild(-1, 0, 0, 0);
|
||||
const float4_t result = float4_and(tmp2, mf000);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_orc_ni(float4_t _a, float4_t _b)
|
||||
{
|
||||
const float4_t aorb = float4_or(_a, _b);
|
||||
const float4_t mffff = float4_isplat(-1);
|
||||
const float4_t result = float4_xor(aorb, mffff);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_neg_ni(float4_t _a)
|
||||
{
|
||||
const float4_t zero = float4_zero();
|
||||
const float4_t result = float4_sub(zero, _a);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_selb_ni(float4_t _mask, float4_t _a, float4_t _b)
|
||||
{
|
||||
const float4_t sel_a = float4_and(_a, _mask);
|
||||
const float4_t sel_b = float4_andc(_b, _mask);
|
||||
const float4_t result = float4_or(sel_a, sel_b);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_sels_ni(float4_t _test, float4_t _a, float4_t _b)
|
||||
{
|
||||
const float4_t mask = float4_sra(_test, 31);
|
||||
const float4_t result = float4_selb(mask, _a, _b);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_not_ni(float4_t _a)
|
||||
{
|
||||
const float4_t mffff = float4_isplat(-1);
|
||||
const float4_t result = float4_xor(_a, mffff);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_abs_ni(float4_t _a)
|
||||
{
|
||||
const float4_t a_neg = float4_neg(_a);
|
||||
const float4_t result = float4_max(a_neg, _a);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_clamp_ni(float4_t _a, float4_t _min, float4_t _max)
|
||||
{
|
||||
const float4_t tmp = float4_min(_a, _max);
|
||||
const float4_t result = float4_max(tmp, _min);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_lerp_ni(float4_t _a, float4_t _b, float4_t _s)
|
||||
{
|
||||
const float4_t ba = float4_sub(_b, _a);
|
||||
const float4_t result = float4_madd(_s, ba, _a);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_sqrt_nr_ni(float4_t _a)
|
||||
{
|
||||
const float4_t half = float4_splat(0.5f);
|
||||
const float4_t one = float4_splat(1.0f);
|
||||
const float4_t zero = float4_zero();
|
||||
const float4_t tmp0 = float4_rsqrt_est(_a);
|
||||
const float4_t tmp1 = float4_madd(tmp0, _a, zero);
|
||||
const float4_t tmp2 = float4_madd(tmp1, half, zero);
|
||||
const float4_t tmp3 = float4_nmsub(tmp0, tmp1, one);
|
||||
const float4_t result = float4_madd(tmp3, tmp2, tmp1);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_rsqrt_ni(float4_t _a)
|
||||
{
|
||||
const float4_t one = float4_splat(1.0f);
|
||||
const float4_t sqrt = float4_sqrt(_a);
|
||||
const float4_t result = float4_div(one, sqrt);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_rsqrt_nr_ni(float4_t _a)
|
||||
{
|
||||
const float4_t rsqrt = float4_rsqrt_est(_a);
|
||||
const float4_t iter0 = float4_mul(_a, rsqrt);
|
||||
const float4_t iter1 = float4_mul(iter0, rsqrt);
|
||||
const float4_t half = float4_splat(0.5f);
|
||||
const float4_t half_rsqrt = float4_mul(half, rsqrt);
|
||||
const float4_t three = float4_splat(3.0f);
|
||||
const float4_t three_sub_iter1 = float4_sub(three, iter1);
|
||||
const float4_t result = float4_mul(half_rsqrt, three_sub_iter1);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_rsqrt_carmack_ni(float4_t _a)
|
||||
{
|
||||
const float4_t half = float4_splat(0.5f);
|
||||
const float4_t ah = float4_mul(half, _a);
|
||||
const float4_t ashift = float4_sra(_a, 1);
|
||||
const float4_t magic = float4_isplat(0x5f3759df);
|
||||
const float4_t msuba = float4_isub(magic, ashift);
|
||||
const float4_t msubasq = float4_mul(msuba, msuba);
|
||||
const float4_t tmp0 = float4_splat(1.5f);
|
||||
const float4_t tmp1 = float4_mul(ah, msubasq);
|
||||
const float4_t tmp2 = float4_sub(tmp0, tmp1);
|
||||
const float4_t result = float4_mul(msuba, tmp2);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
namespace float4_logexp_detail
|
||||
{
|
||||
BX_FLOAT4_INLINE float4_t float4_poly0(float4_t _a, float _b)
|
||||
{
|
||||
return float4_splat(_b);
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_poly1(float4_t _a, float _b, float _c)
|
||||
{
|
||||
const float4_t bbbb = float4_splat(_b);
|
||||
const float4_t poly0 = float4_poly0(_a, _c);
|
||||
const float4_t result = float4_madd(poly0, _a, bbbb);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_poly2(float4_t _a, float _b, float _c, float _d)
|
||||
{
|
||||
const float4_t bbbb = float4_splat(_b);
|
||||
const float4_t poly = float4_poly1(_a, _c, _d);
|
||||
const float4_t result = float4_madd(poly, _a, bbbb);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_poly3(float4_t _a, float _b, float _c, float _d, float _e)
|
||||
{
|
||||
const float4_t bbbb = float4_splat(_b);
|
||||
const float4_t poly = float4_poly2(_a, _c, _d, _e);
|
||||
const float4_t result = float4_madd(poly, _a, bbbb);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_poly4(float4_t _a, float _b, float _c, float _d, float _e, float _f)
|
||||
{
|
||||
const float4_t bbbb = float4_splat(_b);
|
||||
const float4_t poly = float4_poly3(_a, _c, _d, _e, _f);
|
||||
const float4_t result = float4_madd(poly, _a, bbbb);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_poly5(float4_t _a, float _b, float _c, float _d, float _e, float _f, float _g)
|
||||
{
|
||||
const float4_t bbbb = float4_splat(_b);
|
||||
const float4_t poly = float4_poly4(_a, _c, _d, _e, _f, _g);
|
||||
const float4_t result = float4_madd(poly, _a, bbbb);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_logpoly(float4_t _a)
|
||||
{
|
||||
#if 1
|
||||
const float4_t result = float4_poly5(_a
|
||||
, 3.11578814719469302614f, -3.32419399085241980044f
|
||||
, 2.59883907202499966007f, -1.23152682416275988241f
|
||||
, 0.318212422185251071475f, -0.0344359067839062357313f
|
||||
);
|
||||
#elif 0
|
||||
const float4_t result = float4_poly4(_a
|
||||
, 2.8882704548164776201f, -2.52074962577807006663f
|
||||
, 1.48116647521213171641f, -0.465725644288844778798f
|
||||
, 0.0596515482674574969533f
|
||||
);
|
||||
#elif 0
|
||||
const float4_t result = float4_poly3(_a
|
||||
, 2.61761038894603480148f, -1.75647175389045657003f
|
||||
, 0.688243882994381274313f, -0.107254423828329604454f
|
||||
);
|
||||
#else
|
||||
const float4_t result = float4_poly2(_a
|
||||
, 2.28330284476918490682f, -1.04913055217340124191f
|
||||
, 0.204446009836232697516f
|
||||
);
|
||||
#endif
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_exppoly(float4_t _a)
|
||||
{
|
||||
#if 1
|
||||
const float4_t result = float4_poly5(_a
|
||||
, 9.9999994e-1f, 6.9315308e-1f
|
||||
, 2.4015361e-1f, 5.5826318e-2f
|
||||
, 8.9893397e-3f, 1.8775767e-3f
|
||||
);
|
||||
#elif 0
|
||||
const float4_t result = float4_poly4(_a
|
||||
, 1.0000026f, 6.9300383e-1f
|
||||
, 2.4144275e-1f, 5.2011464e-2f
|
||||
, 1.3534167e-2f
|
||||
);
|
||||
#elif 0
|
||||
const float4_t result = float4_poly3(_a
|
||||
, 9.9992520e-1f, 6.9583356e-1f
|
||||
, 2.2606716e-1f, 7.8024521e-2f
|
||||
);
|
||||
#else
|
||||
const float4_t result = float4_poly2(_a
|
||||
, 1.0017247f, 6.5763628e-1f
|
||||
, 3.3718944e-1f
|
||||
);
|
||||
#endif // 0
|
||||
|
||||
return result;
|
||||
}
|
||||
} // namespace float4_internal
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_log2_ni(float4_t _a)
|
||||
{
|
||||
const float4_t expmask = float4_isplat(0x7f800000);
|
||||
const float4_t mantmask = float4_isplat(0x007fffff);
|
||||
const float4_t one = float4_splat(1.0f);
|
||||
|
||||
const float4_t c127 = float4_isplat(127);
|
||||
const float4_t aexp = float4_and(_a, expmask);
|
||||
const float4_t aexpsr = float4_srl(aexp, 23);
|
||||
const float4_t tmp0 = float4_isub(aexpsr, c127);
|
||||
const float4_t exp = float4_itof(tmp0);
|
||||
|
||||
const float4_t amask = float4_and(_a, mantmask);
|
||||
const float4_t mant = float4_or(amask, one);
|
||||
|
||||
const float4_t poly = float4_logexp_detail::float4_logpoly(mant);
|
||||
|
||||
const float4_t mandiff = float4_sub(mant, one);
|
||||
const float4_t result = float4_madd(poly, mandiff, exp);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_exp2_ni(float4_t _a)
|
||||
{
|
||||
const float4_t min = float4_splat( 129.0f);
|
||||
const float4_t max = float4_splat(-126.99999f);
|
||||
const float4_t tmp0 = float4_min(_a, min);
|
||||
const float4_t aaaa = float4_max(tmp0, max);
|
||||
|
||||
const float4_t half = float4_splat(0.5f);
|
||||
const float4_t tmp2 = float4_sub(aaaa, half);
|
||||
const float4_t ipart = float4_ftoi(tmp2);
|
||||
const float4_t iround = float4_itof(ipart);
|
||||
const float4_t fpart = float4_sub(aaaa, iround);
|
||||
|
||||
const float4_t c127 = float4_isplat(127);
|
||||
const float4_t tmp5 = float4_iadd(ipart, c127);
|
||||
const float4_t expipart = float4_sll(tmp5, 23);
|
||||
|
||||
const float4_t expfpart = float4_logexp_detail::float4_exppoly(fpart);
|
||||
|
||||
const float4_t result = float4_mul(expipart, expfpart);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_pow_ni(float4_t _a, float4_t _b)
|
||||
{
|
||||
const float4_t alog2 = float4_log2(_a);
|
||||
const float4_t alog2b = float4_mul(alog2, _b);
|
||||
const float4_t result = float4_exp2(alog2b);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_dot3_ni(float4_t _a, float4_t _b)
|
||||
{
|
||||
const float4_t xyzw = float4_mul(_a, _b);
|
||||
const float4_t xxxx = float4_swiz_xxxx(xyzw);
|
||||
const float4_t yyyy = float4_swiz_yyyy(xyzw);
|
||||
const float4_t zzzz = float4_swiz_zzzz(xyzw);
|
||||
const float4_t tmp1 = float4_add(xxxx, yyyy);
|
||||
const float4_t result = float4_add(zzzz, tmp1);
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_cross3_ni(float4_t _a, float4_t _b)
|
||||
{
|
||||
const float4_t a_yzxw = float4_swiz_yzxw(_a);
|
||||
const float4_t a_zxyw = float4_swiz_zxyw(_a);
|
||||
const float4_t b_zxyw = float4_swiz_zxyw(_b);
|
||||
const float4_t b_yzxw = float4_swiz_yzxw(_b);
|
||||
const float4_t tmp = float4_mul(a_yzxw, b_zxyw);
|
||||
const float4_t result = float4_nmsub(a_zxyw, b_yzxw, tmp);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_normalize3_ni(float4_t _a)
|
||||
{
|
||||
const float4_t dot3 = float4_dot3(_a, _a);
|
||||
const float4_t invSqrt = float4_rsqrt(dot3);
|
||||
const float4_t result = float4_mul(_a, invSqrt);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_dot_ni(float4_t _a, float4_t _b)
|
||||
{
|
||||
const float4_t xyzw = float4_mul(_a, _b);
|
||||
const float4_t yzwx = float4_swiz_yzwx(xyzw);
|
||||
const float4_t tmp0 = float4_add(xyzw, yzwx);
|
||||
const float4_t zwxy = float4_swiz_zwxy(tmp0);
|
||||
const float4_t result = float4_add(tmp0, zwxy);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace bx
|
||||
|
||||
#endif // __BX_FLOAT4_NI_H__
|
||||
522
include/bx/float4_ref.h
Normal file
522
include/bx/float4_ref.h
Normal file
@@ -0,0 +1,522 @@
|
||||
/*
|
||||
* Copyright 2010-2012 Branimir Karadzic. All rights reserved.
|
||||
* License: http://www.opensource.org/licenses/BSD-2-Clause
|
||||
*/
|
||||
|
||||
#ifndef __BX_FLOAT4_REF_H__
|
||||
#define __BX_FLOAT4_REF_H__
|
||||
|
||||
#include <math.h> // sqrtf
|
||||
|
||||
namespace bx
|
||||
{
|
||||
typedef union float4_t
|
||||
{
|
||||
int32_t ixyzw[4];
|
||||
uint32_t uxyzw[4];
|
||||
float fxyzw[4];
|
||||
|
||||
} float4_t;
|
||||
|
||||
#define ELEMx 0
|
||||
#define ELEMy 1
|
||||
#define ELEMz 2
|
||||
#define ELEMw 3
|
||||
#define IMPLEMENT_SWIZZLE(_x, _y, _z, _w) \
|
||||
BX_FLOAT4_INLINE float4_t float4_swiz_##_x##_y##_z##_w(float4_t _a) \
|
||||
{ \
|
||||
float4_t result; \
|
||||
result.ixyzw[0] = _a.ixyzw[ELEM##_x]; \
|
||||
result.ixyzw[1] = _a.ixyzw[ELEM##_y]; \
|
||||
result.ixyzw[2] = _a.ixyzw[ELEM##_z]; \
|
||||
result.ixyzw[3] = _a.ixyzw[ELEM##_w]; \
|
||||
return result; \
|
||||
}
|
||||
|
||||
#include "float4_swizzle.inl"
|
||||
|
||||
#undef IMPLEMENT_SWIZZLE
|
||||
#undef ELEMw
|
||||
#undef ELEMz
|
||||
#undef ELEMy
|
||||
#undef ELEMx
|
||||
|
||||
#define IMPLEMENT_TEST(_xyzw, _mask) \
|
||||
BX_FLOAT4_INLINE bool float4_test_any_##_xyzw(float4_t _test) \
|
||||
{ \
|
||||
uint32_t tmp = ( (_test.uxyzw[3]>>31)<<3) \
|
||||
| ( (_test.uxyzw[2]>>31)<<2) \
|
||||
| ( (_test.uxyzw[1]>>31)<<1) \
|
||||
| (_test.uxyzw[0]>>31) \
|
||||
; \
|
||||
return 0 != (tmp&(_mask) ); \
|
||||
} \
|
||||
\
|
||||
BX_FLOAT4_INLINE bool float4_test_all_##_xyzw(float4_t _test) \
|
||||
{ \
|
||||
uint32_t tmp = ( (_test.uxyzw[3]>>31)<<3) \
|
||||
| ( (_test.uxyzw[2]>>31)<<2) \
|
||||
| ( (_test.uxyzw[1]>>31)<<1) \
|
||||
| (_test.uxyzw[0]>>31) \
|
||||
; \
|
||||
return (_mask) == (tmp&(_mask) ); \
|
||||
}
|
||||
|
||||
IMPLEMENT_TEST(x , 0x1);
|
||||
IMPLEMENT_TEST(y , 0x2);
|
||||
IMPLEMENT_TEST(xy , 0x3);
|
||||
IMPLEMENT_TEST(z , 0x4);
|
||||
IMPLEMENT_TEST(xz , 0x5);
|
||||
IMPLEMENT_TEST(yz , 0x6);
|
||||
IMPLEMENT_TEST(xyz , 0x7);
|
||||
IMPLEMENT_TEST(w , 0x8);
|
||||
IMPLEMENT_TEST(xw , 0x9);
|
||||
IMPLEMENT_TEST(yw , 0xa);
|
||||
IMPLEMENT_TEST(xyw , 0xb);
|
||||
IMPLEMENT_TEST(zw , 0xc);
|
||||
IMPLEMENT_TEST(xzw , 0xd);
|
||||
IMPLEMENT_TEST(yzw , 0xe);
|
||||
IMPLEMENT_TEST(xyzw , 0xf);
|
||||
|
||||
#undef IMPLEMENT_TEST
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_shuf_xyAB(float4_t _a, float4_t _b)
|
||||
{
|
||||
float4_t result;
|
||||
result.uxyzw[0] = _a.uxyzw[0];
|
||||
result.uxyzw[1] = _a.uxyzw[1];
|
||||
result.uxyzw[2] = _b.uxyzw[0];
|
||||
result.uxyzw[3] = _b.uxyzw[1];
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_shuf_ABxy(float4_t _a, float4_t _b)
|
||||
{
|
||||
float4_t result;
|
||||
result.uxyzw[0] = _b.uxyzw[0];
|
||||
result.uxyzw[1] = _b.uxyzw[1];
|
||||
result.uxyzw[2] = _a.uxyzw[0];
|
||||
result.uxyzw[3] = _a.uxyzw[1];
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_shuf_CDzw(float4_t _a, float4_t _b)
|
||||
{
|
||||
float4_t result;
|
||||
result.uxyzw[0] = _b.uxyzw[2];
|
||||
result.uxyzw[1] = _b.uxyzw[3];
|
||||
result.uxyzw[2] = _a.uxyzw[2];
|
||||
result.uxyzw[3] = _a.uxyzw[3];
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_shuf_zwCD(float4_t _a, float4_t _b)
|
||||
{
|
||||
float4_t result;
|
||||
result.uxyzw[0] = _a.uxyzw[2];
|
||||
result.uxyzw[1] = _a.uxyzw[3];
|
||||
result.uxyzw[2] = _b.uxyzw[2];
|
||||
result.uxyzw[3] = _b.uxyzw[3];
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_shuf_xAyB(float4_t _a, float4_t _b)
|
||||
{
|
||||
float4_t result;
|
||||
result.uxyzw[0] = _a.uxyzw[0];
|
||||
result.uxyzw[1] = _b.uxyzw[0];
|
||||
result.uxyzw[2] = _a.uxyzw[1];
|
||||
result.uxyzw[3] = _b.uxyzw[1];
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_shuf_yBxA(float4_t _a, float4_t _b)
|
||||
{
|
||||
float4_t result;
|
||||
result.uxyzw[0] = _a.uxyzw[1];
|
||||
result.uxyzw[1] = _b.uxyzw[1];
|
||||
result.uxyzw[2] = _a.uxyzw[0];
|
||||
result.uxyzw[3] = _b.uxyzw[0];
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_shuf_zCwD(float4_t _a, float4_t _b)
|
||||
{
|
||||
float4_t result;
|
||||
result.uxyzw[0] = _a.uxyzw[2];
|
||||
result.uxyzw[1] = _b.uxyzw[2];
|
||||
result.uxyzw[2] = _a.uxyzw[3];
|
||||
result.uxyzw[3] = _b.uxyzw[3];
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_shuf_CzDw(float4_t _a, float4_t _b)
|
||||
{
|
||||
float4_t result;
|
||||
result.uxyzw[0] = _b.uxyzw[2];
|
||||
result.uxyzw[1] = _a.uxyzw[2];
|
||||
result.uxyzw[2] = _b.uxyzw[3];
|
||||
result.uxyzw[3] = _a.uxyzw[3];
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float float4_x(float4_t _a)
|
||||
{
|
||||
return _a.fxyzw[0];
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float float4_y(float4_t _a)
|
||||
{
|
||||
return _a.fxyzw[1];
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float float4_z(float4_t _a)
|
||||
{
|
||||
return _a.fxyzw[2];
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float float4_w(float4_t _a)
|
||||
{
|
||||
return _a.fxyzw[3];
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_ld(const void* _ptr)
|
||||
{
|
||||
return *reinterpret_cast<const float4_t*>(_ptr);
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE void float4_st(void* _ptr, float4_t _a)
|
||||
{
|
||||
*reinterpret_cast<float4_t*>(_ptr) = _a;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_ld(float _x, float _y, float _z, float _w)
|
||||
{
|
||||
float4_t result;
|
||||
result.fxyzw[0] = _x;
|
||||
result.fxyzw[1] = _y;
|
||||
result.fxyzw[2] = _z;
|
||||
result.fxyzw[3] = _w;
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_ild(uint32_t _x, uint32_t _y, uint32_t _z, uint32_t _w)
|
||||
{
|
||||
float4_t result;
|
||||
result.uxyzw[0] = _x;
|
||||
result.uxyzw[1] = _y;
|
||||
result.uxyzw[2] = _z;
|
||||
result.uxyzw[3] = _w;
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_splat(const void* _ptr)
|
||||
{
|
||||
float val = *reinterpret_cast<const float*>(_ptr);
|
||||
return float4_ld(val, val, val, val);
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_splat(float _a)
|
||||
{
|
||||
return float4_ld(_a, _a, _a, _a);
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_isplat(uint32_t _a)
|
||||
{
|
||||
return float4_ild(_a, _a, _a, _a);
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_zero()
|
||||
{
|
||||
return float4_ild(0, 0, 0, 0);
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_itof(float4_t _a)
|
||||
{
|
||||
float4_t result;
|
||||
result.fxyzw[0] = (float)result.ixyzw[0];
|
||||
result.fxyzw[1] = (float)result.ixyzw[1];
|
||||
result.fxyzw[2] = (float)result.ixyzw[2];
|
||||
result.fxyzw[3] = (float)result.ixyzw[3];
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_ftoi(float4_t _a)
|
||||
{
|
||||
float4_t result;
|
||||
result.ixyzw[0] = (int)result.fxyzw[0];
|
||||
result.ixyzw[1] = (int)result.fxyzw[1];
|
||||
result.ixyzw[2] = (int)result.fxyzw[2];
|
||||
result.ixyzw[3] = (int)result.fxyzw[3];
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_round(float4_t _a)
|
||||
{
|
||||
const float4_t tmp = float4_ftoi(_a);
|
||||
const float4_t result = float4_itof(tmp);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_add(float4_t _a, float4_t _b)
|
||||
{
|
||||
float4_t result;
|
||||
result.fxyzw[0] = _a.fxyzw[0] + _b.fxyzw[0];
|
||||
result.fxyzw[1] = _a.fxyzw[1] + _b.fxyzw[1];
|
||||
result.fxyzw[2] = _a.fxyzw[2] + _b.fxyzw[2];
|
||||
result.fxyzw[3] = _a.fxyzw[3] + _b.fxyzw[3];
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_sub(float4_t _a, float4_t _b)
|
||||
{
|
||||
float4_t result;
|
||||
result.fxyzw[0] = _a.fxyzw[0] - _b.fxyzw[0];
|
||||
result.fxyzw[1] = _a.fxyzw[1] - _b.fxyzw[1];
|
||||
result.fxyzw[2] = _a.fxyzw[2] - _b.fxyzw[2];
|
||||
result.fxyzw[3] = _a.fxyzw[3] - _b.fxyzw[3];
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_mul(float4_t _a, float4_t _b)
|
||||
{
|
||||
float4_t result;
|
||||
result.fxyzw[0] = _a.fxyzw[0] * _b.fxyzw[0];
|
||||
result.fxyzw[1] = _a.fxyzw[1] * _b.fxyzw[1];
|
||||
result.fxyzw[2] = _a.fxyzw[2] * _b.fxyzw[2];
|
||||
result.fxyzw[3] = _a.fxyzw[3] * _b.fxyzw[3];
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_div(float4_t _a, float4_t _b)
|
||||
{
|
||||
float4_t result;
|
||||
result.fxyzw[0] = _a.fxyzw[0] * _b.fxyzw[0];
|
||||
result.fxyzw[1] = _a.fxyzw[1] * _b.fxyzw[1];
|
||||
result.fxyzw[2] = _a.fxyzw[2] * _b.fxyzw[2];
|
||||
result.fxyzw[3] = _a.fxyzw[3] * _b.fxyzw[3];
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_rcp_est(float4_t _a)
|
||||
{
|
||||
float4_t result;
|
||||
result.fxyzw[0] = 1.0f / _a.fxyzw[0];
|
||||
result.fxyzw[1] = 1.0f / _a.fxyzw[1];
|
||||
result.fxyzw[2] = 1.0f / _a.fxyzw[2];
|
||||
result.fxyzw[3] = 1.0f / _a.fxyzw[3];
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_sqrt(float4_t _a)
|
||||
{
|
||||
float4_t result;
|
||||
result.fxyzw[0] = sqrtf(_a.fxyzw[0]);
|
||||
result.fxyzw[1] = sqrtf(_a.fxyzw[1]);
|
||||
result.fxyzw[2] = sqrtf(_a.fxyzw[2]);
|
||||
result.fxyzw[3] = sqrtf(_a.fxyzw[3]);
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_rsqrt_est(float4_t _a)
|
||||
{
|
||||
float4_t result;
|
||||
result.fxyzw[0] = 1.0f / sqrtf(_a.fxyzw[0]);
|
||||
result.fxyzw[1] = 1.0f / sqrtf(_a.fxyzw[1]);
|
||||
result.fxyzw[2] = 1.0f / sqrtf(_a.fxyzw[2]);
|
||||
result.fxyzw[3] = 1.0f / sqrtf(_a.fxyzw[3]);
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_cmpeq(float4_t _a, float4_t _b)
|
||||
{
|
||||
float4_t result;
|
||||
result.ixyzw[0] = _a.fxyzw[0] == _b.fxyzw[0] ? 0xffffffff : 0x0;
|
||||
result.ixyzw[1] = _a.fxyzw[1] == _b.fxyzw[1] ? 0xffffffff : 0x0;
|
||||
result.ixyzw[2] = _a.fxyzw[2] == _b.fxyzw[2] ? 0xffffffff : 0x0;
|
||||
result.ixyzw[3] = _a.fxyzw[3] == _b.fxyzw[3] ? 0xffffffff : 0x0;
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_cmplt(float4_t _a, float4_t _b)
|
||||
{
|
||||
float4_t result;
|
||||
result.ixyzw[0] = _a.fxyzw[0] < _b.fxyzw[0] ? 0xffffffff : 0x0;
|
||||
result.ixyzw[1] = _a.fxyzw[1] < _b.fxyzw[1] ? 0xffffffff : 0x0;
|
||||
result.ixyzw[2] = _a.fxyzw[2] < _b.fxyzw[2] ? 0xffffffff : 0x0;
|
||||
result.ixyzw[3] = _a.fxyzw[3] < _b.fxyzw[3] ? 0xffffffff : 0x0;
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_cmple(float4_t _a, float4_t _b)
|
||||
{
|
||||
float4_t result;
|
||||
result.ixyzw[0] = _a.fxyzw[0] <= _b.fxyzw[0] ? 0xffffffff : 0x0;
|
||||
result.ixyzw[1] = _a.fxyzw[1] <= _b.fxyzw[1] ? 0xffffffff : 0x0;
|
||||
result.ixyzw[2] = _a.fxyzw[2] <= _b.fxyzw[2] ? 0xffffffff : 0x0;
|
||||
result.ixyzw[3] = _a.fxyzw[3] <= _b.fxyzw[3] ? 0xffffffff : 0x0;
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_cmpgt(float4_t _a, float4_t _b)
|
||||
{
|
||||
float4_t result;
|
||||
result.ixyzw[0] = _a.fxyzw[0] > _b.fxyzw[0] ? 0xffffffff : 0x0;
|
||||
result.ixyzw[1] = _a.fxyzw[1] > _b.fxyzw[1] ? 0xffffffff : 0x0;
|
||||
result.ixyzw[2] = _a.fxyzw[2] > _b.fxyzw[2] ? 0xffffffff : 0x0;
|
||||
result.ixyzw[3] = _a.fxyzw[3] > _b.fxyzw[3] ? 0xffffffff : 0x0;
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_cmpge(float4_t _a, float4_t _b)
|
||||
{
|
||||
float4_t result;
|
||||
result.ixyzw[0] = _a.fxyzw[0] >= _b.fxyzw[0] ? 0xffffffff : 0x0;
|
||||
result.ixyzw[1] = _a.fxyzw[1] >= _b.fxyzw[1] ? 0xffffffff : 0x0;
|
||||
result.ixyzw[2] = _a.fxyzw[2] >= _b.fxyzw[2] ? 0xffffffff : 0x0;
|
||||
result.ixyzw[3] = _a.fxyzw[3] >= _b.fxyzw[3] ? 0xffffffff : 0x0;
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_min(float4_t _a, float4_t _b)
|
||||
{
|
||||
float4_t result;
|
||||
result.fxyzw[0] = _a.fxyzw[0] < _b.fxyzw[0] ? _a.fxyzw[0] : _b.fxyzw[0];
|
||||
result.fxyzw[1] = _a.fxyzw[1] < _b.fxyzw[1] ? _a.fxyzw[1] : _b.fxyzw[1];
|
||||
result.fxyzw[2] = _a.fxyzw[2] < _b.fxyzw[2] ? _a.fxyzw[2] : _b.fxyzw[2];
|
||||
result.fxyzw[3] = _a.fxyzw[3] < _b.fxyzw[3] ? _a.fxyzw[3] : _b.fxyzw[3];
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_max(float4_t _a, float4_t _b)
|
||||
{
|
||||
float4_t result;
|
||||
result.fxyzw[0] = _a.fxyzw[0] > _b.fxyzw[0] ? _a.fxyzw[0] : _b.fxyzw[0];
|
||||
result.fxyzw[1] = _a.fxyzw[1] > _b.fxyzw[1] ? _a.fxyzw[1] : _b.fxyzw[1];
|
||||
result.fxyzw[2] = _a.fxyzw[2] > _b.fxyzw[2] ? _a.fxyzw[2] : _b.fxyzw[2];
|
||||
result.fxyzw[3] = _a.fxyzw[3] > _b.fxyzw[3] ? _a.fxyzw[3] : _b.fxyzw[3];
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_and(float4_t _a, float4_t _b)
|
||||
{
|
||||
float4_t result;
|
||||
result.uxyzw[0] = _a.uxyzw[0] & _b.uxyzw[0];
|
||||
result.uxyzw[1] = _a.uxyzw[1] & _b.uxyzw[1];
|
||||
result.uxyzw[2] = _a.uxyzw[2] & _b.uxyzw[2];
|
||||
result.uxyzw[3] = _a.uxyzw[3] & _b.uxyzw[3];
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_andc(float4_t _a, float4_t _b)
|
||||
{
|
||||
float4_t result;
|
||||
result.uxyzw[0] = _a.uxyzw[0] & ~_b.uxyzw[0];
|
||||
result.uxyzw[1] = _a.uxyzw[1] & ~_b.uxyzw[1];
|
||||
result.uxyzw[2] = _a.uxyzw[2] & ~_b.uxyzw[2];
|
||||
result.uxyzw[3] = _a.uxyzw[3] & ~_b.uxyzw[3];
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_or(float4_t _a, float4_t _b)
|
||||
{
|
||||
float4_t result;
|
||||
result.uxyzw[0] = _a.uxyzw[0] | _b.uxyzw[0];
|
||||
result.uxyzw[1] = _a.uxyzw[1] | _b.uxyzw[1];
|
||||
result.uxyzw[2] = _a.uxyzw[2] | _b.uxyzw[2];
|
||||
result.uxyzw[3] = _a.uxyzw[3] | _b.uxyzw[3];
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_xor(float4_t _a, float4_t _b)
|
||||
{
|
||||
float4_t result;
|
||||
result.uxyzw[0] = _a.uxyzw[0] ^ _b.uxyzw[0];
|
||||
result.uxyzw[1] = _a.uxyzw[1] ^ _b.uxyzw[1];
|
||||
result.uxyzw[2] = _a.uxyzw[2] ^ _b.uxyzw[2];
|
||||
result.uxyzw[3] = _a.uxyzw[3] ^ _b.uxyzw[3];
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_sll(float4_t _a, int _count)
|
||||
{
|
||||
float4_t result;
|
||||
result.uxyzw[0] = _a.uxyzw[0] << _count;
|
||||
result.uxyzw[1] = _a.uxyzw[1] << _count;
|
||||
result.uxyzw[2] = _a.uxyzw[2] << _count;
|
||||
result.uxyzw[3] = _a.uxyzw[3] << _count;
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_srl(float4_t _a, int _count)
|
||||
{
|
||||
float4_t result;
|
||||
result.uxyzw[0] = _a.uxyzw[0] >> _count;
|
||||
result.uxyzw[1] = _a.uxyzw[1] >> _count;
|
||||
result.uxyzw[2] = _a.uxyzw[2] >> _count;
|
||||
result.uxyzw[3] = _a.uxyzw[3] >> _count;
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_sra(float4_t _a, int _count)
|
||||
{
|
||||
float4_t result;
|
||||
result.ixyzw[0] = _a.ixyzw[0] >> _count;
|
||||
result.ixyzw[1] = _a.ixyzw[1] >> _count;
|
||||
result.ixyzw[2] = _a.ixyzw[2] >> _count;
|
||||
result.ixyzw[3] = _a.ixyzw[3] >> _count;
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_iadd(float4_t _a, float4_t _b)
|
||||
{
|
||||
float4_t result;
|
||||
result.ixyzw[0] = _a.ixyzw[0] + _b.ixyzw[0];
|
||||
result.ixyzw[1] = _a.ixyzw[1] + _b.ixyzw[1];
|
||||
result.ixyzw[2] = _a.ixyzw[2] + _b.ixyzw[2];
|
||||
result.ixyzw[3] = _a.ixyzw[3] + _b.ixyzw[3];
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_isub(float4_t _a, float4_t _b)
|
||||
{
|
||||
float4_t result;
|
||||
result.ixyzw[0] = _a.ixyzw[0] - _b.ixyzw[0];
|
||||
result.ixyzw[1] = _a.ixyzw[1] - _b.ixyzw[1];
|
||||
result.ixyzw[2] = _a.ixyzw[2] - _b.ixyzw[2];
|
||||
result.ixyzw[3] = _a.ixyzw[3] - _b.ixyzw[3];
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace bx
|
||||
|
||||
#define float4_shuf_xAzC float4_shuf_xAzC_ni
|
||||
#define float4_shuf_yBwD float4_shuf_yBwD_ni
|
||||
#define float4_rcp float4_rcp_ni
|
||||
#define float4_orx float4_orx_ni
|
||||
#define float4_orc float4_orc_ni
|
||||
#define float4_neg float4_neg_ni
|
||||
#define float4_madd float4_madd_ni
|
||||
#define float4_nmsub float4_nmsub_ni
|
||||
#define float4_div_nr float4_div_nr_ni
|
||||
#define float4_selb float4_selb_ni
|
||||
#define float4_sels float4_sels_ni
|
||||
#define float4_not float4_not_ni
|
||||
#define float4_abs float4_abs_ni
|
||||
#define float4_clamp float4_clamp_ni
|
||||
#define float4_lerp float4_lerp_ni
|
||||
#define float4_rsqrt float4_rsqrt_ni
|
||||
#define float4_rsqrt_nr float4_rsqrt_nr_ni
|
||||
#define float4_rsqrt_carmack float4_rsqrt_carmack_ni
|
||||
#define float4_sqrt_nr float4_sqrt_nr_ni
|
||||
#define float4_log2 float4_log2_ni
|
||||
#define float4_exp2 float4_exp2_ni
|
||||
#define float4_pow float4_pow_ni
|
||||
#define float4_cross3 float4_cross3_ni
|
||||
#define float4_normalize3 float4_normalize3_ni
|
||||
#define float4_dot3 float4_dot3_ni
|
||||
#define float4_dot float4_dot_ni
|
||||
#include "float4_ni.h"
|
||||
|
||||
#endif // __BX_FLOAT4_REF_H__
|
||||
400
include/bx/float4_sse.h
Normal file
400
include/bx/float4_sse.h
Normal file
@@ -0,0 +1,400 @@
|
||||
/*
|
||||
* Copyright 2010-2011 Branimir Karadzic. All rights reserved.
|
||||
* License: http://www.opensource.org/licenses/BSD-2-Clause
|
||||
*/
|
||||
|
||||
#ifndef __BX_FLOAT4_SSE_H__
|
||||
#define __BX_FLOAT4_SSE_H__
|
||||
|
||||
#if !defined(__SSE2__)
|
||||
# error "float4_t requires at least SSE2"
|
||||
#endif // !defined(__SSE2__)
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include <emmintrin.h> // __m128i
|
||||
#if defined(__SSE4_1__)
|
||||
# include <smmintrin.h>
|
||||
#endif // defined(__SSE4_1__)
|
||||
#include <xmmintrin.h> // __m128
|
||||
|
||||
namespace bx
|
||||
{
|
||||
|
||||
typedef __m128 float4_t;
|
||||
|
||||
#define ELEMx 0
|
||||
#define ELEMy 1
|
||||
#define ELEMz 2
|
||||
#define ELEMw 3
|
||||
#define IMPLEMENT_SWIZZLE(_x, _y, _z, _w) \
|
||||
BX_FLOAT4_INLINE float4_t float4_swiz_##_x##_y##_z##_w(float4_t _a) \
|
||||
{ \
|
||||
return _mm_shuffle_ps( _a, _a, _MM_SHUFFLE(ELEM##_w, ELEM##_z, ELEM##_y, ELEM##_x ) ); \
|
||||
}
|
||||
|
||||
#include "float4_swizzle.inl"
|
||||
|
||||
#undef IMPLEMENT_SWIZZLE
|
||||
#undef ELEMw
|
||||
#undef ELEMz
|
||||
#undef ELEMy
|
||||
#undef ELEMx
|
||||
|
||||
#define IMPLEMENT_TEST(_xyzw, _mask) \
|
||||
BX_FLOAT4_INLINE bool float4_test_any_##_xyzw(float4_t _test) \
|
||||
{ \
|
||||
return 0x0 != (_mm_movemask_ps(_test)&(_mask) ); \
|
||||
} \
|
||||
\
|
||||
BX_FLOAT4_INLINE bool float4_test_all_##_xyzw(float4_t _test) \
|
||||
{ \
|
||||
return (_mask) == (_mm_movemask_ps(_test)&(_mask) ); \
|
||||
}
|
||||
|
||||
IMPLEMENT_TEST(x , 0x1);
|
||||
IMPLEMENT_TEST(y , 0x2);
|
||||
IMPLEMENT_TEST(xy , 0x3);
|
||||
IMPLEMENT_TEST(z , 0x4);
|
||||
IMPLEMENT_TEST(xz , 0x5);
|
||||
IMPLEMENT_TEST(yz , 0x6);
|
||||
IMPLEMENT_TEST(xyz , 0x7);
|
||||
IMPLEMENT_TEST(w , 0x8);
|
||||
IMPLEMENT_TEST(xw , 0x9);
|
||||
IMPLEMENT_TEST(yw , 0xa);
|
||||
IMPLEMENT_TEST(xyw , 0xb);
|
||||
IMPLEMENT_TEST(zw , 0xc);
|
||||
IMPLEMENT_TEST(xzw , 0xd);
|
||||
IMPLEMENT_TEST(yzw , 0xe);
|
||||
IMPLEMENT_TEST(xyzw , 0xf);
|
||||
|
||||
#undef IMPLEMENT_TEST
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_shuf_xyAB(float4_t _a, float4_t _b)
|
||||
{
|
||||
return _mm_movelh_ps(_a, _b);
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_shuf_ABxy(float4_t _a, float4_t _b)
|
||||
{
|
||||
return _mm_movelh_ps(_b, _a);
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_shuf_CDzw(float4_t _a, float4_t _b)
|
||||
{
|
||||
return _mm_movehl_ps(_a, _b);
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_shuf_zwCD(float4_t _a, float4_t _b)
|
||||
{
|
||||
return _mm_movehl_ps(_b, _a);
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_shuf_xAyB(float4_t _a, float4_t _b)
|
||||
{
|
||||
return _mm_unpacklo_ps(_a, _b);
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_shuf_yBxA(float4_t _a, float4_t _b)
|
||||
{
|
||||
return _mm_unpacklo_ps(_b, _a);
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_shuf_zCwD(float4_t _a, float4_t _b)
|
||||
{
|
||||
return _mm_unpackhi_ps(_a, _b);
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_shuf_CzDw(float4_t _a, float4_t _b)
|
||||
{
|
||||
return _mm_unpackhi_ps(_b, _a);
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float float4_x(float4_t _a)
|
||||
{
|
||||
return _mm_cvtss_f32(_a);
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float float4_y(float4_t _a)
|
||||
{
|
||||
const float4_t yyyy = float4_swiz_yyyy(_a);
|
||||
const float result = _mm_cvtss_f32(yyyy);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float float4_z(float4_t _a)
|
||||
{
|
||||
const float4_t zzzz = float4_swiz_zzzz(_a);
|
||||
const float result = _mm_cvtss_f32(zzzz);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float float4_w(float4_t _a)
|
||||
{
|
||||
const float4_t wwww = float4_swiz_wwww(_a);
|
||||
const float result = _mm_cvtss_f32(wwww);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_ld(const void* _ptr)
|
||||
{
|
||||
return _mm_load_ps(reinterpret_cast<const float*>(_ptr) );
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE void float4_st(void* _ptr, float4_t _a)
|
||||
{
|
||||
_mm_store_ps(reinterpret_cast<float*>(_ptr), _a);
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_ld(float _x, float _y, float _z, float _w)
|
||||
{
|
||||
return _mm_set_ps(_w, _z, _y, _x);
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_ild(uint32_t _x, uint32_t _y, uint32_t _z, uint32_t _w)
|
||||
{
|
||||
const __m128i set = _mm_set_epi32(_w, _z, _y, _x);
|
||||
const float4_t result = _mm_castsi128_ps(set);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_splat(const void* _ptr)
|
||||
{
|
||||
const float4_t x___ = _mm_load_ss(reinterpret_cast<const float*>(_ptr) );
|
||||
const float4_t result = float4_swiz_xxxx(x___);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_splat(float _a)
|
||||
{
|
||||
return _mm_set1_ps(_a);
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_isplat(uint32_t _a)
|
||||
{
|
||||
const __m128i splat = _mm_set1_epi32(_a);
|
||||
const float4_t result = _mm_castsi128_ps(splat);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_zero()
|
||||
{
|
||||
return _mm_setzero_ps();
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_itof(float4_t _a)
|
||||
{
|
||||
const __m128i itof = _mm_castps_si128(_a);
|
||||
const float4_t result = _mm_cvtepi32_ps(itof);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_ftoi(float4_t _a)
|
||||
{
|
||||
const __m128i ftoi = _mm_cvtps_epi32(_a);
|
||||
const float4_t result = _mm_castsi128_ps(ftoi);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_round(float4_t _a)
|
||||
{
|
||||
#if defined(__SSE4_1__)
|
||||
return _mm_round_ps(_a, _MM_FROUND_NINT);
|
||||
#else
|
||||
const __m128i round = _mm_cvtps_epi32(_a);
|
||||
const float4_t result = _mm_cvtepi32_ps(round);
|
||||
|
||||
return result;
|
||||
#endif // defined(__SSE4_1__)
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_add(float4_t _a, float4_t _b)
|
||||
{
|
||||
return _mm_add_ps(_a, _b);
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_sub(float4_t _a, float4_t _b)
|
||||
{
|
||||
return _mm_sub_ps(_a, _b);
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_mul(float4_t _a, float4_t _b)
|
||||
{
|
||||
return _mm_mul_ps(_a, _b);
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_div(float4_t _a, float4_t _b)
|
||||
{
|
||||
return _mm_div_ps(_a, _b);
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_rcp_est(float4_t _a)
|
||||
{
|
||||
return _mm_rcp_ps(_a);
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_sqrt(float4_t _a)
|
||||
{
|
||||
return _mm_sqrt_ps(_a);
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_rsqrt_est(float4_t _a)
|
||||
{
|
||||
return _mm_rsqrt_ps(_a);
|
||||
}
|
||||
|
||||
#if defined(__SSE4_1__)
|
||||
BX_FLOAT4_INLINE float4_t float4_dot3(float4_t _a, float4_t _b)
|
||||
{
|
||||
return _mm_dp_ps(_a, _b, 0x77);
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_dot(float4_t _a, float4_t _b)
|
||||
{
|
||||
return _mm_dp_ps(_a, _b, 0xFF);
|
||||
}
|
||||
#endif // defined(__SSE4__)
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_cmpeq(float4_t _a, float4_t _b)
|
||||
{
|
||||
return _mm_cmpeq_ps(_a, _b);
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_cmplt(float4_t _a, float4_t _b)
|
||||
{
|
||||
return _mm_cmplt_ps(_a, _b);
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_cmple(float4_t _a, float4_t _b)
|
||||
{
|
||||
return _mm_cmple_ps(_a, _b);
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_cmpgt(float4_t _a, float4_t _b)
|
||||
{
|
||||
return _mm_cmpgt_ps(_a, _b);
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_cmpge(float4_t _a, float4_t _b)
|
||||
{
|
||||
return _mm_cmpge_ps(_a, _b);
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_min(float4_t _a, float4_t _b)
|
||||
{
|
||||
return _mm_min_ps(_a, _b);
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_max(float4_t _a, float4_t _b)
|
||||
{
|
||||
return _mm_max_ps(_a, _b);
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_and(float4_t _a, float4_t _b)
|
||||
{
|
||||
return _mm_and_ps(_a, _b);
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_andc(float4_t _a, float4_t _b)
|
||||
{
|
||||
return _mm_andnot_ps(_b, _a);
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_or(float4_t _a, float4_t _b)
|
||||
{
|
||||
return _mm_or_ps(_a, _b);
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_xor(float4_t _a, float4_t _b)
|
||||
{
|
||||
return _mm_xor_ps(_a, _b);
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_sll(float4_t _a, int _count)
|
||||
{
|
||||
const __m128i a = _mm_castps_si128(_a);
|
||||
const __m128i shift = _mm_slli_epi32(a, _count);
|
||||
const float4_t result = _mm_castsi128_ps(shift);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_srl(float4_t _a, int _count)
|
||||
{
|
||||
const __m128i a = _mm_castps_si128(_a);
|
||||
const __m128i shift = _mm_srli_epi32(a, _count);
|
||||
const float4_t result = _mm_castsi128_ps(shift);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_sra(float4_t _a, int _count)
|
||||
{
|
||||
const __m128i a = _mm_castps_si128(_a);
|
||||
const __m128i shift = _mm_srai_epi32(a, _count);
|
||||
const float4_t result = _mm_castsi128_ps(shift);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_iadd(float4_t _a, float4_t _b)
|
||||
{
|
||||
const __m128i a = _mm_castps_si128(_a);
|
||||
const __m128i b = _mm_castps_si128(_b);
|
||||
const __m128i add = _mm_add_epi32(a, b);
|
||||
const float4_t result = _mm_castsi128_ps(add);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_isub(float4_t _a, float4_t _b)
|
||||
{
|
||||
const __m128i a = _mm_castps_si128(_a);
|
||||
const __m128i b = _mm_castps_si128(_b);
|
||||
const __m128i sub = _mm_sub_epi32(a, b);
|
||||
const float4_t result = _mm_castsi128_ps(sub);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace bx
|
||||
|
||||
#define float4_shuf_xAzC float4_shuf_xAzC_ni
|
||||
#define float4_shuf_yBwD float4_shuf_yBwD_ni
|
||||
#define float4_rcp float4_rcp_ni
|
||||
#define float4_orx float4_orx_ni
|
||||
#define float4_orc float4_orc_ni
|
||||
#define float4_neg float4_neg_ni
|
||||
#define float4_madd float4_madd_ni
|
||||
#define float4_nmsub float4_nmsub_ni
|
||||
#define float4_div_nr float4_div_nr_ni
|
||||
#define float4_selb float4_selb_ni
|
||||
#define float4_sels float4_sels_ni
|
||||
#define float4_not float4_not_ni
|
||||
#define float4_abs float4_abs_ni
|
||||
#define float4_clamp float4_clamp_ni
|
||||
#define float4_lerp float4_lerp_ni
|
||||
#define float4_rsqrt float4_rsqrt_ni
|
||||
#define float4_rsqrt_nr float4_rsqrt_nr_ni
|
||||
#define float4_rsqrt_carmack float4_rsqrt_carmack_ni
|
||||
#define float4_sqrt_nr float4_sqrt_nr_ni
|
||||
#define float4_log2 float4_log2_ni
|
||||
#define float4_exp2 float4_exp2_ni
|
||||
#define float4_pow float4_pow_ni
|
||||
#define float4_cross3 float4_cross3_ni
|
||||
#define float4_normalize3 float4_normalize3_ni
|
||||
#if !defined(__SSE4_1__)
|
||||
#define float4_dot3 float4_dot3_ni
|
||||
#define float4_dot float4_dot_ni
|
||||
#endif // defined(__SSE4_1__)
|
||||
#include "float4_ni.h"
|
||||
|
||||
#endif // __FLOAT4_SSE_H__
|
||||
266
include/bx/float4_swizzle.inl
Normal file
266
include/bx/float4_swizzle.inl
Normal file
@@ -0,0 +1,266 @@
|
||||
/*
|
||||
* Copyright 2010-2012 Branimir Karadzic. All rights reserved.
|
||||
* License: http://www.opensource.org/licenses/BSD-2-Clause
|
||||
*/
|
||||
|
||||
#ifndef __BX_FLOAT4_T_H__
|
||||
# error "xmacro file, must be included from float4_*.h"
|
||||
#endif // __BX_FLOAT4_T_H__
|
||||
|
||||
// included from float4_t.h
|
||||
IMPLEMENT_SWIZZLE(x, x, x, x)
|
||||
IMPLEMENT_SWIZZLE(x, x, x, y)
|
||||
IMPLEMENT_SWIZZLE(x, x, x, z)
|
||||
IMPLEMENT_SWIZZLE(x, x, x, w)
|
||||
IMPLEMENT_SWIZZLE(x, x, y, x)
|
||||
IMPLEMENT_SWIZZLE(x, x, y, y)
|
||||
IMPLEMENT_SWIZZLE(x, x, y, z)
|
||||
IMPLEMENT_SWIZZLE(x, x, y, w)
|
||||
IMPLEMENT_SWIZZLE(x, x, z, x)
|
||||
IMPLEMENT_SWIZZLE(x, x, z, y)
|
||||
IMPLEMENT_SWIZZLE(x, x, z, z)
|
||||
IMPLEMENT_SWIZZLE(x, x, z, w)
|
||||
IMPLEMENT_SWIZZLE(x, x, w, x)
|
||||
IMPLEMENT_SWIZZLE(x, x, w, y)
|
||||
IMPLEMENT_SWIZZLE(x, x, w, z)
|
||||
IMPLEMENT_SWIZZLE(x, x, w, w)
|
||||
IMPLEMENT_SWIZZLE(x, y, x, x)
|
||||
IMPLEMENT_SWIZZLE(x, y, x, y)
|
||||
IMPLEMENT_SWIZZLE(x, y, x, z)
|
||||
IMPLEMENT_SWIZZLE(x, y, x, w)
|
||||
IMPLEMENT_SWIZZLE(x, y, y, x)
|
||||
IMPLEMENT_SWIZZLE(x, y, y, y)
|
||||
IMPLEMENT_SWIZZLE(x, y, y, z)
|
||||
IMPLEMENT_SWIZZLE(x, y, y, w)
|
||||
IMPLEMENT_SWIZZLE(x, y, z, x)
|
||||
IMPLEMENT_SWIZZLE(x, y, z, y)
|
||||
IMPLEMENT_SWIZZLE(x, y, z, z)
|
||||
// IMPLEMENT_SWIZZLE(x, y, z, w)
|
||||
IMPLEMENT_SWIZZLE(x, y, w, x)
|
||||
IMPLEMENT_SWIZZLE(x, y, w, y)
|
||||
IMPLEMENT_SWIZZLE(x, y, w, z)
|
||||
IMPLEMENT_SWIZZLE(x, y, w, w)
|
||||
IMPLEMENT_SWIZZLE(x, z, x, x)
|
||||
IMPLEMENT_SWIZZLE(x, z, x, y)
|
||||
IMPLEMENT_SWIZZLE(x, z, x, z)
|
||||
IMPLEMENT_SWIZZLE(x, z, x, w)
|
||||
IMPLEMENT_SWIZZLE(x, z, y, x)
|
||||
IMPLEMENT_SWIZZLE(x, z, y, y)
|
||||
IMPLEMENT_SWIZZLE(x, z, y, z)
|
||||
IMPLEMENT_SWIZZLE(x, z, y, w)
|
||||
IMPLEMENT_SWIZZLE(x, z, z, x)
|
||||
IMPLEMENT_SWIZZLE(x, z, z, y)
|
||||
IMPLEMENT_SWIZZLE(x, z, z, z)
|
||||
IMPLEMENT_SWIZZLE(x, z, z, w)
|
||||
IMPLEMENT_SWIZZLE(x, z, w, x)
|
||||
IMPLEMENT_SWIZZLE(x, z, w, y)
|
||||
IMPLEMENT_SWIZZLE(x, z, w, z)
|
||||
IMPLEMENT_SWIZZLE(x, z, w, w)
|
||||
IMPLEMENT_SWIZZLE(x, w, x, x)
|
||||
IMPLEMENT_SWIZZLE(x, w, x, y)
|
||||
IMPLEMENT_SWIZZLE(x, w, x, z)
|
||||
IMPLEMENT_SWIZZLE(x, w, x, w)
|
||||
IMPLEMENT_SWIZZLE(x, w, y, x)
|
||||
IMPLEMENT_SWIZZLE(x, w, y, y)
|
||||
IMPLEMENT_SWIZZLE(x, w, y, z)
|
||||
IMPLEMENT_SWIZZLE(x, w, y, w)
|
||||
IMPLEMENT_SWIZZLE(x, w, z, x)
|
||||
IMPLEMENT_SWIZZLE(x, w, z, y)
|
||||
IMPLEMENT_SWIZZLE(x, w, z, z)
|
||||
IMPLEMENT_SWIZZLE(x, w, z, w)
|
||||
IMPLEMENT_SWIZZLE(x, w, w, x)
|
||||
IMPLEMENT_SWIZZLE(x, w, w, y)
|
||||
IMPLEMENT_SWIZZLE(x, w, w, z)
|
||||
IMPLEMENT_SWIZZLE(x, w, w, w)
|
||||
IMPLEMENT_SWIZZLE(y, x, x, x)
|
||||
IMPLEMENT_SWIZZLE(y, x, x, y)
|
||||
IMPLEMENT_SWIZZLE(y, x, x, z)
|
||||
IMPLEMENT_SWIZZLE(y, x, x, w)
|
||||
IMPLEMENT_SWIZZLE(y, x, y, x)
|
||||
IMPLEMENT_SWIZZLE(y, x, y, y)
|
||||
IMPLEMENT_SWIZZLE(y, x, y, z)
|
||||
IMPLEMENT_SWIZZLE(y, x, y, w)
|
||||
IMPLEMENT_SWIZZLE(y, x, z, x)
|
||||
IMPLEMENT_SWIZZLE(y, x, z, y)
|
||||
IMPLEMENT_SWIZZLE(y, x, z, z)
|
||||
IMPLEMENT_SWIZZLE(y, x, z, w)
|
||||
IMPLEMENT_SWIZZLE(y, x, w, x)
|
||||
IMPLEMENT_SWIZZLE(y, x, w, y)
|
||||
IMPLEMENT_SWIZZLE(y, x, w, z)
|
||||
IMPLEMENT_SWIZZLE(y, x, w, w)
|
||||
IMPLEMENT_SWIZZLE(y, y, x, x)
|
||||
IMPLEMENT_SWIZZLE(y, y, x, y)
|
||||
IMPLEMENT_SWIZZLE(y, y, x, z)
|
||||
IMPLEMENT_SWIZZLE(y, y, x, w)
|
||||
IMPLEMENT_SWIZZLE(y, y, y, x)
|
||||
IMPLEMENT_SWIZZLE(y, y, y, y)
|
||||
IMPLEMENT_SWIZZLE(y, y, y, z)
|
||||
IMPLEMENT_SWIZZLE(y, y, y, w)
|
||||
IMPLEMENT_SWIZZLE(y, y, z, x)
|
||||
IMPLEMENT_SWIZZLE(y, y, z, y)
|
||||
IMPLEMENT_SWIZZLE(y, y, z, z)
|
||||
IMPLEMENT_SWIZZLE(y, y, z, w)
|
||||
IMPLEMENT_SWIZZLE(y, y, w, x)
|
||||
IMPLEMENT_SWIZZLE(y, y, w, y)
|
||||
IMPLEMENT_SWIZZLE(y, y, w, z)
|
||||
IMPLEMENT_SWIZZLE(y, y, w, w)
|
||||
IMPLEMENT_SWIZZLE(y, z, x, x)
|
||||
IMPLEMENT_SWIZZLE(y, z, x, y)
|
||||
IMPLEMENT_SWIZZLE(y, z, x, z)
|
||||
IMPLEMENT_SWIZZLE(y, z, x, w)
|
||||
IMPLEMENT_SWIZZLE(y, z, y, x)
|
||||
IMPLEMENT_SWIZZLE(y, z, y, y)
|
||||
IMPLEMENT_SWIZZLE(y, z, y, z)
|
||||
IMPLEMENT_SWIZZLE(y, z, y, w)
|
||||
IMPLEMENT_SWIZZLE(y, z, z, x)
|
||||
IMPLEMENT_SWIZZLE(y, z, z, y)
|
||||
IMPLEMENT_SWIZZLE(y, z, z, z)
|
||||
IMPLEMENT_SWIZZLE(y, z, z, w)
|
||||
IMPLEMENT_SWIZZLE(y, z, w, x)
|
||||
IMPLEMENT_SWIZZLE(y, z, w, y)
|
||||
IMPLEMENT_SWIZZLE(y, z, w, z)
|
||||
IMPLEMENT_SWIZZLE(y, z, w, w)
|
||||
IMPLEMENT_SWIZZLE(y, w, x, x)
|
||||
IMPLEMENT_SWIZZLE(y, w, x, y)
|
||||
IMPLEMENT_SWIZZLE(y, w, x, z)
|
||||
IMPLEMENT_SWIZZLE(y, w, x, w)
|
||||
IMPLEMENT_SWIZZLE(y, w, y, x)
|
||||
IMPLEMENT_SWIZZLE(y, w, y, y)
|
||||
IMPLEMENT_SWIZZLE(y, w, y, z)
|
||||
IMPLEMENT_SWIZZLE(y, w, y, w)
|
||||
IMPLEMENT_SWIZZLE(y, w, z, x)
|
||||
IMPLEMENT_SWIZZLE(y, w, z, y)
|
||||
IMPLEMENT_SWIZZLE(y, w, z, z)
|
||||
IMPLEMENT_SWIZZLE(y, w, z, w)
|
||||
IMPLEMENT_SWIZZLE(y, w, w, x)
|
||||
IMPLEMENT_SWIZZLE(y, w, w, y)
|
||||
IMPLEMENT_SWIZZLE(y, w, w, z)
|
||||
IMPLEMENT_SWIZZLE(y, w, w, w)
|
||||
IMPLEMENT_SWIZZLE(z, x, x, x)
|
||||
IMPLEMENT_SWIZZLE(z, x, x, y)
|
||||
IMPLEMENT_SWIZZLE(z, x, x, z)
|
||||
IMPLEMENT_SWIZZLE(z, x, x, w)
|
||||
IMPLEMENT_SWIZZLE(z, x, y, x)
|
||||
IMPLEMENT_SWIZZLE(z, x, y, y)
|
||||
IMPLEMENT_SWIZZLE(z, x, y, z)
|
||||
IMPLEMENT_SWIZZLE(z, x, y, w)
|
||||
IMPLEMENT_SWIZZLE(z, x, z, x)
|
||||
IMPLEMENT_SWIZZLE(z, x, z, y)
|
||||
IMPLEMENT_SWIZZLE(z, x, z, z)
|
||||
IMPLEMENT_SWIZZLE(z, x, z, w)
|
||||
IMPLEMENT_SWIZZLE(z, x, w, x)
|
||||
IMPLEMENT_SWIZZLE(z, x, w, y)
|
||||
IMPLEMENT_SWIZZLE(z, x, w, z)
|
||||
IMPLEMENT_SWIZZLE(z, x, w, w)
|
||||
IMPLEMENT_SWIZZLE(z, y, x, x)
|
||||
IMPLEMENT_SWIZZLE(z, y, x, y)
|
||||
IMPLEMENT_SWIZZLE(z, y, x, z)
|
||||
IMPLEMENT_SWIZZLE(z, y, x, w)
|
||||
IMPLEMENT_SWIZZLE(z, y, y, x)
|
||||
IMPLEMENT_SWIZZLE(z, y, y, y)
|
||||
IMPLEMENT_SWIZZLE(z, y, y, z)
|
||||
IMPLEMENT_SWIZZLE(z, y, y, w)
|
||||
IMPLEMENT_SWIZZLE(z, y, z, x)
|
||||
IMPLEMENT_SWIZZLE(z, y, z, y)
|
||||
IMPLEMENT_SWIZZLE(z, y, z, z)
|
||||
IMPLEMENT_SWIZZLE(z, y, z, w)
|
||||
IMPLEMENT_SWIZZLE(z, y, w, x)
|
||||
IMPLEMENT_SWIZZLE(z, y, w, y)
|
||||
IMPLEMENT_SWIZZLE(z, y, w, z)
|
||||
IMPLEMENT_SWIZZLE(z, y, w, w)
|
||||
IMPLEMENT_SWIZZLE(z, z, x, x)
|
||||
IMPLEMENT_SWIZZLE(z, z, x, y)
|
||||
IMPLEMENT_SWIZZLE(z, z, x, z)
|
||||
IMPLEMENT_SWIZZLE(z, z, x, w)
|
||||
IMPLEMENT_SWIZZLE(z, z, y, x)
|
||||
IMPLEMENT_SWIZZLE(z, z, y, y)
|
||||
IMPLEMENT_SWIZZLE(z, z, y, z)
|
||||
IMPLEMENT_SWIZZLE(z, z, y, w)
|
||||
IMPLEMENT_SWIZZLE(z, z, z, x)
|
||||
IMPLEMENT_SWIZZLE(z, z, z, y)
|
||||
IMPLEMENT_SWIZZLE(z, z, z, z)
|
||||
IMPLEMENT_SWIZZLE(z, z, z, w)
|
||||
IMPLEMENT_SWIZZLE(z, z, w, x)
|
||||
IMPLEMENT_SWIZZLE(z, z, w, y)
|
||||
IMPLEMENT_SWIZZLE(z, z, w, z)
|
||||
IMPLEMENT_SWIZZLE(z, z, w, w)
|
||||
IMPLEMENT_SWIZZLE(z, w, x, x)
|
||||
IMPLEMENT_SWIZZLE(z, w, x, y)
|
||||
IMPLEMENT_SWIZZLE(z, w, x, z)
|
||||
IMPLEMENT_SWIZZLE(z, w, x, w)
|
||||
IMPLEMENT_SWIZZLE(z, w, y, x)
|
||||
IMPLEMENT_SWIZZLE(z, w, y, y)
|
||||
IMPLEMENT_SWIZZLE(z, w, y, z)
|
||||
IMPLEMENT_SWIZZLE(z, w, y, w)
|
||||
IMPLEMENT_SWIZZLE(z, w, z, x)
|
||||
IMPLEMENT_SWIZZLE(z, w, z, y)
|
||||
IMPLEMENT_SWIZZLE(z, w, z, z)
|
||||
IMPLEMENT_SWIZZLE(z, w, z, w)
|
||||
IMPLEMENT_SWIZZLE(z, w, w, x)
|
||||
IMPLEMENT_SWIZZLE(z, w, w, y)
|
||||
IMPLEMENT_SWIZZLE(z, w, w, z)
|
||||
IMPLEMENT_SWIZZLE(z, w, w, w)
|
||||
IMPLEMENT_SWIZZLE(w, x, x, x)
|
||||
IMPLEMENT_SWIZZLE(w, x, x, y)
|
||||
IMPLEMENT_SWIZZLE(w, x, x, z)
|
||||
IMPLEMENT_SWIZZLE(w, x, x, w)
|
||||
IMPLEMENT_SWIZZLE(w, x, y, x)
|
||||
IMPLEMENT_SWIZZLE(w, x, y, y)
|
||||
IMPLEMENT_SWIZZLE(w, x, y, z)
|
||||
IMPLEMENT_SWIZZLE(w, x, y, w)
|
||||
IMPLEMENT_SWIZZLE(w, x, z, x)
|
||||
IMPLEMENT_SWIZZLE(w, x, z, y)
|
||||
IMPLEMENT_SWIZZLE(w, x, z, z)
|
||||
IMPLEMENT_SWIZZLE(w, x, z, w)
|
||||
IMPLEMENT_SWIZZLE(w, x, w, x)
|
||||
IMPLEMENT_SWIZZLE(w, x, w, y)
|
||||
IMPLEMENT_SWIZZLE(w, x, w, z)
|
||||
IMPLEMENT_SWIZZLE(w, x, w, w)
|
||||
IMPLEMENT_SWIZZLE(w, y, x, x)
|
||||
IMPLEMENT_SWIZZLE(w, y, x, y)
|
||||
IMPLEMENT_SWIZZLE(w, y, x, z)
|
||||
IMPLEMENT_SWIZZLE(w, y, x, w)
|
||||
IMPLEMENT_SWIZZLE(w, y, y, x)
|
||||
IMPLEMENT_SWIZZLE(w, y, y, y)
|
||||
IMPLEMENT_SWIZZLE(w, y, y, z)
|
||||
IMPLEMENT_SWIZZLE(w, y, y, w)
|
||||
IMPLEMENT_SWIZZLE(w, y, z, x)
|
||||
IMPLEMENT_SWIZZLE(w, y, z, y)
|
||||
IMPLEMENT_SWIZZLE(w, y, z, z)
|
||||
IMPLEMENT_SWIZZLE(w, y, z, w)
|
||||
IMPLEMENT_SWIZZLE(w, y, w, x)
|
||||
IMPLEMENT_SWIZZLE(w, y, w, y)
|
||||
IMPLEMENT_SWIZZLE(w, y, w, z)
|
||||
IMPLEMENT_SWIZZLE(w, y, w, w)
|
||||
IMPLEMENT_SWIZZLE(w, z, x, x)
|
||||
IMPLEMENT_SWIZZLE(w, z, x, y)
|
||||
IMPLEMENT_SWIZZLE(w, z, x, z)
|
||||
IMPLEMENT_SWIZZLE(w, z, x, w)
|
||||
IMPLEMENT_SWIZZLE(w, z, y, x)
|
||||
IMPLEMENT_SWIZZLE(w, z, y, y)
|
||||
IMPLEMENT_SWIZZLE(w, z, y, z)
|
||||
IMPLEMENT_SWIZZLE(w, z, y, w)
|
||||
IMPLEMENT_SWIZZLE(w, z, z, x)
|
||||
IMPLEMENT_SWIZZLE(w, z, z, y)
|
||||
IMPLEMENT_SWIZZLE(w, z, z, z)
|
||||
IMPLEMENT_SWIZZLE(w, z, z, w)
|
||||
IMPLEMENT_SWIZZLE(w, z, w, x)
|
||||
IMPLEMENT_SWIZZLE(w, z, w, y)
|
||||
IMPLEMENT_SWIZZLE(w, z, w, z)
|
||||
IMPLEMENT_SWIZZLE(w, z, w, w)
|
||||
IMPLEMENT_SWIZZLE(w, w, x, x)
|
||||
IMPLEMENT_SWIZZLE(w, w, x, y)
|
||||
IMPLEMENT_SWIZZLE(w, w, x, z)
|
||||
IMPLEMENT_SWIZZLE(w, w, x, w)
|
||||
IMPLEMENT_SWIZZLE(w, w, y, x)
|
||||
IMPLEMENT_SWIZZLE(w, w, y, y)
|
||||
IMPLEMENT_SWIZZLE(w, w, y, z)
|
||||
IMPLEMENT_SWIZZLE(w, w, y, w)
|
||||
IMPLEMENT_SWIZZLE(w, w, z, x)
|
||||
IMPLEMENT_SWIZZLE(w, w, z, y)
|
||||
IMPLEMENT_SWIZZLE(w, w, z, z)
|
||||
IMPLEMENT_SWIZZLE(w, w, z, w)
|
||||
IMPLEMENT_SWIZZLE(w, w, w, x)
|
||||
IMPLEMENT_SWIZZLE(w, w, w, y)
|
||||
IMPLEMENT_SWIZZLE(w, w, w, z)
|
||||
IMPLEMENT_SWIZZLE(w, w, w, w)
|
||||
22
include/bx/float4_t.h
Normal file
22
include/bx/float4_t.h
Normal file
@@ -0,0 +1,22 @@
|
||||
/*
|
||||
* Copyright 2010-2011 Branimir Karadzic. All rights reserved.
|
||||
* License: http://www.opensource.org/licenses/BSD-2-Clause
|
||||
*/
|
||||
|
||||
#ifndef __BX_FLOAT4_T_H__
|
||||
#define __BX_FLOAT4_T_H__
|
||||
|
||||
#include <stdint.h>
|
||||
#include "bx.h"
|
||||
|
||||
#define BX_FLOAT4_INLINE BX_FORCE_INLINE
|
||||
|
||||
#if 0 // defined(__SSE2__)
|
||||
# include "float4_sse.h"
|
||||
#elif 0 // __ARM_NEON__
|
||||
# include "float4_neon.h"
|
||||
#else
|
||||
# include "float4_ref.h"
|
||||
#endif //
|
||||
|
||||
#endif // __BX_FLOAT4_T_H__
|
||||
168
include/bx/float4x4_t.h
Normal file
168
include/bx/float4x4_t.h
Normal file
@@ -0,0 +1,168 @@
|
||||
/*
|
||||
* Copyright 2010-2012 Branimir Karadzic. All rights reserved.
|
||||
* License: http://www.opensource.org/licenses/BSD-2-Clause
|
||||
*/
|
||||
|
||||
#ifndef __BX_FLOAT4X4_H__
|
||||
#define __BX_FLOAT4x4_H__
|
||||
|
||||
#include "float4_t.h"
|
||||
|
||||
namespace bx
|
||||
{
|
||||
typedef BX_ALIGN_STRUCT_16(struct)
|
||||
{
|
||||
float4_t col[4];
|
||||
|
||||
} float4x4_t;
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_mul_xyz1(float4_t _a, const float4x4_t& _b)
|
||||
{
|
||||
const float4_t xxxx = float4_swiz_xxxx(_a);
|
||||
const float4_t yyyy = float4_swiz_yyyy(_a);
|
||||
const float4_t zzzz = float4_swiz_zzzz(_a);
|
||||
const float4_t col0 = float4_mul(_b.col[0], xxxx);
|
||||
const float4_t col1 = float4_mul(_b.col[1], yyyy);
|
||||
const float4_t col2 = float4_madd(_b.col[2], zzzz, col0);
|
||||
const float4_t col3 = float4_add(_b.col[3], col1);
|
||||
const float4_t result = float4_add(col2, col3);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4_t float4_mul(float4_t _a, const float4x4_t& _b)
|
||||
{
|
||||
const float4_t xxxx = float4_swiz_xxxx(_a);
|
||||
const float4_t yyyy = float4_swiz_yyyy(_a);
|
||||
const float4_t zzzz = float4_swiz_zzzz(_a);
|
||||
const float4_t wwww = float4_swiz_wwww(_a);
|
||||
const float4_t col0 = float4_mul(_b.col[0], xxxx);
|
||||
const float4_t col1 = float4_mul(_b.col[1], yyyy);
|
||||
const float4_t col2 = float4_madd(_b.col[2], zzzz, col0);
|
||||
const float4_t col3 = float4_madd(_b.col[3], wwww, col1);
|
||||
const float4_t result = float4_add(col2, col3);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4x4_t float4x4_mul(const float4x4_t& _a, const float4x4_t& _b)
|
||||
{
|
||||
float4x4_t result;
|
||||
result.col[0] = float4_mul(_a.col[0], _b);
|
||||
result.col[1] = float4_mul(_a.col[1], _b);
|
||||
result.col[2] = float4_mul(_a.col[2], _b);
|
||||
result.col[3] = float4_mul(_a.col[3], _b);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4x4_t float4x4_transpose(const float4x4_t& _mtx)
|
||||
{
|
||||
const float4_t aibj = float4_shuf_xAyB(_mtx.col[0], _mtx.col[2]); // aibj
|
||||
const float4_t emfn = float4_shuf_xAyB(_mtx.col[1], _mtx.col[3]); // emfn
|
||||
const float4_t ckdl = float4_shuf_zCwD(_mtx.col[0], _mtx.col[2]); // ckdl
|
||||
const float4_t gohp = float4_shuf_zCwD(_mtx.col[1], _mtx.col[3]); // gohp
|
||||
float4x4_t result;
|
||||
result.col[0] = float4_shuf_xAyB(aibj, emfn); // aeim
|
||||
result.col[1] = float4_shuf_zCwD(aibj, emfn); // bfjn
|
||||
result.col[2] = float4_shuf_xAyB(ckdl, gohp); // cgko
|
||||
result.col[3] = float4_shuf_zCwD(ckdl, gohp); // dhlp
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
BX_FLOAT4_INLINE float4x4_t float4x4_inverse(const float4x4_t& _a)
|
||||
{
|
||||
const float4_t tmp0 = float4_shuf_xAzC(_a.col[0], _a.col[1]);
|
||||
const float4_t tmp1 = float4_shuf_xAzC(_a.col[2], _a.col[3]);
|
||||
const float4_t tmp2 = float4_shuf_yBwD(_a.col[0], _a.col[1]);
|
||||
const float4_t tmp3 = float4_shuf_yBwD(_a.col[2], _a.col[3]);
|
||||
const float4_t t0 = float4_shuf_xyAB(tmp0, tmp1);
|
||||
const float4_t t1 = float4_shuf_xyAB(tmp3, tmp2);
|
||||
const float4_t t2 = float4_shuf_zwCD(tmp0, tmp1);
|
||||
const float4_t t3 = float4_shuf_zwCD(tmp3, tmp2);
|
||||
|
||||
const float4_t t23 = float4_mul(t2, t3);
|
||||
const float4_t t23_yxwz = float4_swiz_yxwz(t23);
|
||||
const float4_t t23_wzyx = float4_swiz_wzyx(t23);
|
||||
|
||||
float4_t cof0, cof1, cof2, cof3;
|
||||
|
||||
const float4_t zero = float4_zero();
|
||||
cof0 = float4_nmsub(t1, t23_yxwz, zero);
|
||||
cof0 = float4_madd(t1, t23_wzyx, cof0);
|
||||
|
||||
cof1 = float4_nmsub(t0, t23_yxwz, zero);
|
||||
cof1 = float4_madd(t0, t23_wzyx, cof1);
|
||||
cof1 = float4_swiz_zwxy(cof1);
|
||||
|
||||
const float4_t t12 = float4_mul(t1, t2);
|
||||
const float4_t t12_yxwz = float4_swiz_yxwz(t12);
|
||||
const float4_t t12_wzyx = float4_swiz_wzyx(t12);
|
||||
|
||||
cof0 = float4_madd(t3, t12_yxwz, cof0);
|
||||
cof0 = float4_nmsub(t3, t12_wzyx, cof0);
|
||||
|
||||
cof3 = float4_mul(t0, t12_yxwz);
|
||||
cof3 = float4_nmsub(t0, t12_wzyx, cof3);
|
||||
cof3 = float4_swiz_zwxy(cof3);
|
||||
|
||||
const float4_t t1_zwxy = float4_swiz_zwxy(t1);
|
||||
const float4_t t2_zwxy = float4_swiz_zwxy(t2);
|
||||
|
||||
const float4_t t13 = float4_mul(t1_zwxy, t3);
|
||||
const float4_t t13_yxwz = float4_swiz_yxwz(t13);
|
||||
const float4_t t13_wzyx = float4_swiz_wzyx(t13);
|
||||
|
||||
cof0 = float4_madd(t2_zwxy, t13_yxwz, cof0);
|
||||
cof0 = float4_nmsub(t2_zwxy, t13_wzyx, cof0);
|
||||
|
||||
cof2 = float4_mul(t0, t13_yxwz);
|
||||
cof2 = float4_nmsub(t0, t13_wzyx, cof2);
|
||||
cof2 = float4_swiz_zwxy(cof2);
|
||||
|
||||
const float4_t t01 = float4_mul(t0, t1);
|
||||
const float4_t t01_yxwz = float4_swiz_yxwz(t01);
|
||||
const float4_t t01_wzyx = float4_swiz_wzyx(t01);
|
||||
|
||||
cof2 = float4_nmsub(t3, t01_yxwz, cof2);
|
||||
cof2 = float4_madd(t3, t01_wzyx, cof2);
|
||||
|
||||
cof3 = float4_madd(t2_zwxy, t01_yxwz, cof3);
|
||||
cof3 = float4_nmsub(t2_zwxy, t01_wzyx, cof3);
|
||||
|
||||
const float4_t t03 = float4_mul(t0, t3);
|
||||
const float4_t t03_yxwz = float4_swiz_yxwz(t03);
|
||||
const float4_t t03_wzyx = float4_swiz_wzyx(t03);
|
||||
|
||||
cof1 = float4_nmsub(t2_zwxy, t03_yxwz, cof1);
|
||||
cof1 = float4_madd(t2_zwxy, t03_wzyx, cof1);
|
||||
|
||||
cof2 = float4_madd(t1, t03_yxwz, cof2);
|
||||
cof2 = float4_nmsub(t1, t03_wzyx, cof2);
|
||||
|
||||
const float4_t t02 = float4_mul(t0, t2_zwxy);
|
||||
const float4_t t02_yxwz = float4_swiz_yxwz(t02);
|
||||
const float4_t t02_wzyx = float4_swiz_wzyx(t02);
|
||||
|
||||
cof1 = float4_madd(t3, t02_yxwz, cof1);
|
||||
cof1 = float4_nmsub(t3, t02_wzyx, cof1);
|
||||
|
||||
cof3 = float4_nmsub(t1, t02_yxwz, cof3);
|
||||
cof3 = float4_madd(t1, t02_wzyx, cof3);
|
||||
|
||||
const float4_t det = float4_dot(t0, cof0);
|
||||
const float4_t invdet = float4_rcp(det);
|
||||
|
||||
float4x4_t result;
|
||||
result.col[0] = float4_mul(cof0, invdet);
|
||||
result.col[1] = float4_mul(cof1, invdet);
|
||||
result.col[2] = float4_mul(cof2, invdet);
|
||||
result.col[3] = float4_mul(cof3, invdet);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace bx
|
||||
|
||||
#endif // __BX_FLOAT4X4_H__
|
||||
71
include/bx/foreach.h
Normal file
71
include/bx/foreach.h
Normal file
@@ -0,0 +1,71 @@
|
||||
/*
|
||||
* Copyright 2010-2011 Branimir Karadzic. All rights reserved.
|
||||
* License: http://www.opensource.org/licenses/BSD-2-Clause
|
||||
*/
|
||||
|
||||
#ifndef __BX_FOREACH_H__
|
||||
#define __BX_FOREACH_H__
|
||||
|
||||
#include "bx.h"
|
||||
|
||||
namespace bx
|
||||
{
|
||||
namespace foreach_ns
|
||||
{
|
||||
struct ContainerBase
|
||||
{
|
||||
};
|
||||
|
||||
template <typename Ty>
|
||||
class Container : public ContainerBase
|
||||
{
|
||||
public:
|
||||
inline Container(const Ty& _container)
|
||||
: m_container(_container)
|
||||
, m_break(0)
|
||||
, m_it( _container.begin() )
|
||||
, m_itEnd( _container.end() )
|
||||
{
|
||||
}
|
||||
|
||||
inline bool condition() const
|
||||
{
|
||||
return (!m_break++ && m_it != m_itEnd);
|
||||
}
|
||||
|
||||
const Ty& m_container;
|
||||
mutable int m_break;
|
||||
mutable typename Ty::const_iterator m_it;
|
||||
mutable typename Ty::const_iterator m_itEnd;
|
||||
};
|
||||
|
||||
template <typename Ty>
|
||||
inline Ty* pointer(const Ty&)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <typename Ty>
|
||||
inline Container<Ty> containerNew(const Ty& _container)
|
||||
{
|
||||
return Container<Ty>(_container);
|
||||
}
|
||||
|
||||
template <typename Ty>
|
||||
inline const Container<Ty>* container(const ContainerBase* _base, const Ty*)
|
||||
{
|
||||
return static_cast<const Container<Ty>*>(_base);
|
||||
}
|
||||
} // namespace foreach_ns
|
||||
|
||||
#define foreach(_variable, _container) \
|
||||
for (const bx::foreach_ns::ContainerBase &__temp_container__ = bx::foreach_ns::containerNew(_container); \
|
||||
bx::foreach_ns::container(&__temp_container__, true ? 0 : bx::foreach_ns::pointer(_container))->condition(); \
|
||||
++bx::foreach_ns::container(&__temp_container__, true ? 0 : bx::foreach_ns::pointer(_container))->m_it) \
|
||||
for (_variable = *container(&__temp_container__, true ? 0 : bx::foreach_ns::pointer(_container))->m_it; \
|
||||
bx::foreach_ns::container(&__temp_container__, true ? 0 : bx::foreach_ns::pointer(_container))->m_break; \
|
||||
--bx::foreach_ns::container(&__temp_container__, true ? 0 : bx::foreach_ns::pointer(_container))->m_break)
|
||||
|
||||
} // namespace bx
|
||||
|
||||
#endif // __BX_FOREACH_H__
|
||||
83
include/bx/handlealloc.h
Normal file
83
include/bx/handlealloc.h
Normal file
@@ -0,0 +1,83 @@
|
||||
/*
|
||||
* Copyright 2010-2011 Branimir Karadzic. All rights reserved.
|
||||
* License: http://www.opensource.org/licenses/BSD-2-Clause
|
||||
*/
|
||||
|
||||
#ifndef __BX_HANDLE_ALLOC_H__
|
||||
#define __BX_HANDLE_ALLOC_H__
|
||||
|
||||
#include "bx.h"
|
||||
|
||||
namespace bx
|
||||
{
|
||||
class HandleAlloc
|
||||
{
|
||||
public:
|
||||
static const uint16_t invalid = 0xffff;
|
||||
|
||||
HandleAlloc(uint16_t _maxHandles)
|
||||
: m_dense(new uint16_t[_maxHandles*2])
|
||||
, m_sparse(&m_dense[_maxHandles])
|
||||
, m_numHandles(0)
|
||||
, m_maxHandles(_maxHandles)
|
||||
{
|
||||
for (uint16_t ii = 0; ii < _maxHandles; ++ii)
|
||||
{
|
||||
m_dense[ii] = ii;
|
||||
}
|
||||
}
|
||||
|
||||
~HandleAlloc()
|
||||
{
|
||||
delete [] m_dense;
|
||||
}
|
||||
|
||||
const uint16_t* getHandles() const
|
||||
{
|
||||
return m_dense;
|
||||
}
|
||||
|
||||
uint16_t getNumHandles() const
|
||||
{
|
||||
return m_numHandles;
|
||||
}
|
||||
|
||||
uint16_t getMaxHandles() const
|
||||
{
|
||||
return m_maxHandles;
|
||||
}
|
||||
|
||||
uint16_t alloc()
|
||||
{
|
||||
if (m_numHandles < m_maxHandles)
|
||||
{
|
||||
uint16_t index = m_numHandles;
|
||||
++m_numHandles;
|
||||
|
||||
uint16_t handle = m_dense[index];
|
||||
m_sparse[handle] = index;
|
||||
return handle;
|
||||
}
|
||||
|
||||
return invalid;
|
||||
}
|
||||
|
||||
void free(uint16_t _handle)
|
||||
{
|
||||
uint16_t index = m_sparse[_handle];
|
||||
--m_numHandles;
|
||||
uint16_t temp = m_dense[m_numHandles];
|
||||
m_dense[m_numHandles] = _handle;
|
||||
m_sparse[temp] = index;
|
||||
m_dense[index] = temp;
|
||||
}
|
||||
|
||||
private:
|
||||
uint16_t* m_dense;
|
||||
uint16_t* m_sparse;
|
||||
uint16_t m_numHandles;
|
||||
uint16_t m_maxHandles;
|
||||
};
|
||||
} // namespace bx
|
||||
|
||||
#endif // __HANDLE_ALLOC_H__
|
||||
90
include/bx/hash.h
Normal file
90
include/bx/hash.h
Normal file
@@ -0,0 +1,90 @@
|
||||
/*
|
||||
* Copyright 2010-2011 Branimir Karadzic. All rights reserved.
|
||||
* License: http://www.opensource.org/licenses/BSD-2-Clause
|
||||
*/
|
||||
|
||||
#ifndef __BX_HASH_H__
|
||||
#define __BX_HASH_H__
|
||||
|
||||
#include "bx.h"
|
||||
|
||||
namespace bx
|
||||
{
|
||||
// MurmurHash2 was written by Austin Appleby, and is placed in the public
|
||||
// domain. The author hereby disclaims copyright to this source code.
|
||||
#define MURMUR_M 0x5bd1e995
|
||||
#define MURMUR_R 24
|
||||
|
||||
#define mmix(_h, _k) { _k *= MURMUR_M; _k ^= _k >> MURMUR_R; _k *= MURMUR_M; _h *= MURMUR_M; _h ^= _k; }
|
||||
|
||||
class HashMurmur2A
|
||||
{
|
||||
public:
|
||||
void begin(uint32_t _seed = 0)
|
||||
{
|
||||
m_hash = _seed;
|
||||
m_tail = 0;
|
||||
m_count = 0;
|
||||
m_size = 0;
|
||||
}
|
||||
|
||||
void add(const void* _data, int _len)
|
||||
{
|
||||
const uint8_t* data = (uint8_t*)_data;
|
||||
m_size += _len;
|
||||
|
||||
mixTail(data, _len);
|
||||
|
||||
while(_len >= 4)
|
||||
{
|
||||
uint32_t kk = *(uint32_t*)data;
|
||||
|
||||
mmix(m_hash, kk);
|
||||
|
||||
data += 4;
|
||||
_len -= 4;
|
||||
}
|
||||
|
||||
mixTail(data, _len);
|
||||
}
|
||||
|
||||
uint32_t end()
|
||||
{
|
||||
mmix(m_hash, m_tail);
|
||||
mmix(m_hash, m_size);
|
||||
|
||||
m_hash ^= m_hash >> 13;
|
||||
m_hash *= MURMUR_M;
|
||||
m_hash ^= m_hash >> 15;
|
||||
|
||||
return m_hash;
|
||||
}
|
||||
|
||||
private:
|
||||
void mixTail(const uint8_t*& _data, int& _len)
|
||||
{
|
||||
while( _len && ((_len<4) || m_count) )
|
||||
{
|
||||
m_tail |= (*_data++) << (m_count * 8);
|
||||
|
||||
m_count++;
|
||||
_len--;
|
||||
|
||||
if(m_count == 4)
|
||||
{
|
||||
mmix(m_hash, m_tail);
|
||||
m_tail = 0;
|
||||
m_count = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t m_hash;
|
||||
uint32_t m_tail;
|
||||
uint32_t m_count;
|
||||
uint32_t m_size;
|
||||
};
|
||||
|
||||
} // namespace bx
|
||||
|
||||
#endif // __BX_HASH_H__
|
||||
62
include/bx/macros.h
Normal file
62
include/bx/macros.h
Normal file
@@ -0,0 +1,62 @@
|
||||
/*
|
||||
* Copyright 2010-2011 Branimir Karadzic. All rights reserved.
|
||||
* License: http://www.opensource.org/licenses/BSD-2-Clause
|
||||
*/
|
||||
|
||||
#ifndef __BX_MACROS_H__
|
||||
#define __BX_MACROS_H__
|
||||
|
||||
#include "bx.h"
|
||||
|
||||
#define BX_VA_ARGS_COUNT_DETAIL(_a1, _a2, _a3, _a4, _a5, _a6, _a7, _a8, _a9, _a10, _a11, _a12, _a13, _a14, _a15, _a16, _last, ...) _last
|
||||
#define BX_VA_ARGS_COUNT(...) BX_VA_ARGS_COUNT_DETAIL(__VA_ARGS__, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1)
|
||||
|
||||
#define BX_MACRO_DISPATCHER_DETAIL1(_func, _argCount) _func ## _argCount
|
||||
#define BX_MACRO_DISPATCHER_DETAIL2(_func, _argCount) BX_MACRO_DISPATCHER_DETAIL1(_func, _argCount)
|
||||
#define BX_MACRO_DISPATCHER(_func, ...) BX_MACRO_DISPATCHER_DETAIL2(_func, VA_ARGS_COUNT(__VA_ARGS__) )
|
||||
|
||||
#define BX_STRINGIZE(_x) BX_STRINGIZE_(_x)
|
||||
#define BX_STRINGIZE_(_x) #_x
|
||||
|
||||
#define BX_FILE_LINE_LITERAL "" __FILE__ "(" BX_STRINGIZE(__LINE__) "): "
|
||||
|
||||
#define BX_ALIGN_MASK(_value, _mask) ( ( (_value)+(_mask) ) & ( (~0)&(~(_mask) ) ) )
|
||||
#define BX_ALIGN_16(_value) BX_ALIGN_MASK(_value, 0xf)
|
||||
#define BX_ALIGN_256(_value) BX_ALIGN_MASK(_value, 0xff)
|
||||
|
||||
#if BX_COMPILER_GCC || BX_COMPILER_CLANG
|
||||
# define BX_ALIGN_STRUCT(_align, _struct) _struct __attribute__( (aligned(_align) ) )
|
||||
# define BX_FUNCTION __PRETTY_FUNCTION__
|
||||
# define BX_NO_INLINE __attribute__( (noinline) )
|
||||
# define BX_FORCE_INLINE __extension__ static __inline __attribute__( (__always_inline__) )
|
||||
# if BX_COMPILER_CLANG
|
||||
# define BX_THREAD /* not supported right now */
|
||||
# else
|
||||
# define BX_THREAD __thread
|
||||
# endif // BX_COMPILER_CLANG
|
||||
#elif BX_COMPILER_MSVC
|
||||
# define BX_ALIGN_STRUCT(_align, _struct) __declspec(align(_align) ) _struct
|
||||
# define BX_FUNCTION __FUNCTION__
|
||||
# define BX_NO_INLINE __declspec(noinline)
|
||||
# define BX_FORCE_INLINE __forceinline
|
||||
# define BX_THREAD __declspec(thread)
|
||||
#else
|
||||
# error "Unknown BX_COMPILER_?"
|
||||
#endif
|
||||
|
||||
#define BX_ALIGN_STRUCT_16(_struct) BX_ALIGN_STRUCT(16, _struct)
|
||||
#define BX_ALIGN_STRUCT_256(_struct) BX_ALIGN_STRUCT(256, _struct)
|
||||
|
||||
#ifndef BX_CHECK
|
||||
# define BX_CHECK(...) do {} while(0)
|
||||
#endif // BX_CHECK
|
||||
|
||||
#ifndef BX_TRACE
|
||||
# define BX_TRACE(...) do {} while(0)
|
||||
#endif // BX_TRACE
|
||||
|
||||
#ifndef BX_CONFIG_SPSCQUEUE_USE_NAIVE
|
||||
# define BX_CONFIG_SPSCQUEUE_USE_NAIVE 0
|
||||
#endif // BX_CONFIG_SPSCQUEUE_USE_NAIVE
|
||||
|
||||
#endif // __BX_MACROS_H__
|
||||
29
include/bx/maputil.h
Normal file
29
include/bx/maputil.h
Normal file
@@ -0,0 +1,29 @@
|
||||
/*
|
||||
* Copyright 2010-2011 Branimir Karadzic. All rights reserved.
|
||||
* License: http://www.opensource.org/licenses/BSD-2-Clause
|
||||
*/
|
||||
|
||||
#ifndef __BX_MAPUTIL_H__
|
||||
#define __BX_MAPUTIL_H__
|
||||
|
||||
#include "bx.h"
|
||||
|
||||
namespace bx
|
||||
{
|
||||
template<typename MapType>
|
||||
typename MapType::iterator mapInsertOrUpdate(MapType& _map, const typename MapType::key_type& _key, const typename MapType::mapped_type& _value)
|
||||
{
|
||||
typename MapType::iterator it = _map.lower_bound(_key);
|
||||
if (it != _map.end()
|
||||
&& !_map.key_comp()(_key, it->first) )
|
||||
{
|
||||
it->second = _value;
|
||||
return it;
|
||||
}
|
||||
|
||||
typename MapType::value_type pair(_key, _value);
|
||||
return _map.insert(it, pair);
|
||||
}
|
||||
} // namespace bx
|
||||
|
||||
#endif // __BX_MAPUTIL_H__
|
||||
171
include/bx/mutex.h
Normal file
171
include/bx/mutex.h
Normal file
@@ -0,0 +1,171 @@
|
||||
/*
|
||||
* Copyright 2010-2012 Branimir Karadzic. All rights reserved.
|
||||
* License: http://www.opensource.org/licenses/BSD-2-Clause
|
||||
*/
|
||||
|
||||
#ifndef __BX_MUTEX_H__
|
||||
#define __BX_MUTEX_H__
|
||||
|
||||
#include "bx.h"
|
||||
#include "cpu.h"
|
||||
#include "sem.h"
|
||||
|
||||
#if BX_PLATFORM_NACL || BX_PLATFORM_LINUX || BX_PLATFORM_ANDROID
|
||||
# include <pthread.h>
|
||||
#elif BX_PLATFORM_WINDOWS || BX_PLATFORM_XBOX360
|
||||
# include <errno.h>
|
||||
#endif // BX_PLATFORM_
|
||||
|
||||
namespace bx
|
||||
{
|
||||
#if BX_PLATFORM_WINDOWS || BX_PLATFORM_XBOX360
|
||||
typedef CRITICAL_SECTION pthread_mutex_t;
|
||||
typedef unsigned pthread_mutexattr_t;
|
||||
|
||||
inline int pthread_mutex_lock(pthread_mutex_t* _mutex)
|
||||
{
|
||||
EnterCriticalSection(_mutex);
|
||||
return 0;
|
||||
}
|
||||
|
||||
inline int pthread_mutex_unlock(pthread_mutex_t* _mutex)
|
||||
{
|
||||
LeaveCriticalSection(_mutex);
|
||||
return 0;
|
||||
}
|
||||
|
||||
inline int pthread_mutex_trylock(pthread_mutex_t* _mutex)
|
||||
{
|
||||
return TryEnterCriticalSection(_mutex) ? 0 : EBUSY;
|
||||
}
|
||||
|
||||
inline int pthread_mutex_init(pthread_mutex_t* _mutex, pthread_mutexattr_t* /*_attr*/)
|
||||
{
|
||||
InitializeCriticalSection(_mutex);
|
||||
return 0;
|
||||
}
|
||||
|
||||
inline int pthread_mutex_destroy(pthread_mutex_t* _mutex)
|
||||
{
|
||||
DeleteCriticalSection(_mutex);
|
||||
return 0;
|
||||
}
|
||||
#endif // BX_PLATFORM_
|
||||
|
||||
class Mutex
|
||||
{
|
||||
public:
|
||||
Mutex()
|
||||
{
|
||||
pthread_mutex_init(&m_handle, NULL);
|
||||
}
|
||||
|
||||
~Mutex()
|
||||
{
|
||||
pthread_mutex_destroy(&m_handle);
|
||||
}
|
||||
|
||||
void lock()
|
||||
{
|
||||
pthread_mutex_lock(&m_handle);
|
||||
}
|
||||
|
||||
void unlock()
|
||||
{
|
||||
pthread_mutex_unlock(&m_handle);
|
||||
}
|
||||
|
||||
private:
|
||||
Mutex(const Mutex& _rhs); // no copy constructor
|
||||
Mutex& operator=(const Mutex& _rhs); // no assignment operator
|
||||
|
||||
pthread_mutex_t m_handle;
|
||||
};
|
||||
|
||||
class MutexScope
|
||||
{
|
||||
public:
|
||||
MutexScope(Mutex& _mutex)
|
||||
: m_mutex(_mutex)
|
||||
{
|
||||
m_mutex.lock();
|
||||
}
|
||||
|
||||
~MutexScope()
|
||||
{
|
||||
m_mutex.unlock();
|
||||
}
|
||||
|
||||
private:
|
||||
MutexScope(); // no default constructor
|
||||
MutexScope(const MutexScope& _rhs); // no copy constructor
|
||||
MutexScope& operator=(const MutexScope& _rhs); // no assignment operator
|
||||
|
||||
Mutex& m_mutex;
|
||||
};
|
||||
|
||||
#if 1
|
||||
typedef Mutex LwMutex;
|
||||
#else
|
||||
class LwMutex
|
||||
{
|
||||
public:
|
||||
LwMutex()
|
||||
: m_count(0)
|
||||
{
|
||||
}
|
||||
|
||||
~LwMutex()
|
||||
{
|
||||
}
|
||||
|
||||
void lock()
|
||||
{
|
||||
if (atomicIncr(&m_count) > 1)
|
||||
{
|
||||
m_sem.wait();
|
||||
}
|
||||
}
|
||||
|
||||
void unlock()
|
||||
{
|
||||
if (atomicDecr(&m_count) > 0)
|
||||
{
|
||||
m_sem.post();
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
LwMutex(const LwMutex& _rhs); // no copy constructor
|
||||
LwMutex& operator=(const LwMutex& _rhs); // no assignment operator
|
||||
|
||||
Semaphore m_sem;
|
||||
volatile int32_t m_count;
|
||||
};
|
||||
#endif // 0
|
||||
|
||||
class LwMutexScope
|
||||
{
|
||||
public:
|
||||
LwMutexScope(LwMutex& _mutex)
|
||||
: m_mutex(_mutex)
|
||||
{
|
||||
m_mutex.lock();
|
||||
}
|
||||
|
||||
~LwMutexScope()
|
||||
{
|
||||
m_mutex.unlock();
|
||||
}
|
||||
|
||||
private:
|
||||
LwMutexScope(); // no default constructor
|
||||
LwMutexScope(const LwMutexScope& _rhs); // no copy constructor
|
||||
LwMutexScope& operator=(const LwMutexScope& _rhs); // no assignment operator
|
||||
|
||||
LwMutex& m_mutex;
|
||||
};
|
||||
|
||||
} // namespace bx
|
||||
|
||||
#endif // __BX_MUTEX_H__
|
||||
46
include/bx/os.h
Normal file
46
include/bx/os.h
Normal file
@@ -0,0 +1,46 @@
|
||||
/*
|
||||
* Copyright 2010-2011 Branimir Karadzic. All rights reserved.
|
||||
* License: http://www.opensource.org/licenses/BSD-2-Clause
|
||||
*/
|
||||
|
||||
#ifndef __BX_OS_H__
|
||||
#define __BX_OS_H__
|
||||
|
||||
#include "bx.h"
|
||||
|
||||
#if BX_PLATFORM_NACL || BX_PLATFORM_ANDROID || BX_PLATFORM_LINUX
|
||||
# include <sched.h> // sched_yield
|
||||
# if BX_PLATFORM_NACL
|
||||
# include <sys/nacl_syscalls.h> // nanosleep
|
||||
# else
|
||||
# include <time.h> // nanosleep
|
||||
# endif // BX_PLATFORM_NACL
|
||||
#endif // BX_PLATFORM_
|
||||
|
||||
namespace bx
|
||||
{
|
||||
inline void sleep(uint32_t _ms)
|
||||
{
|
||||
#if BX_PLATFORM_WINDOWS || BX_PLATFORM_XBOX360
|
||||
Sleep(_ms);
|
||||
#else
|
||||
timespec req = {_ms/1000, (_ms%1000)*1000000};
|
||||
timespec rem = {0, 0};
|
||||
nanosleep(&req, &rem);
|
||||
#endif // BX_PLATFORM_
|
||||
}
|
||||
|
||||
inline void yield()
|
||||
{
|
||||
#if BX_PLATFORM_WINDOWS
|
||||
SwitchToThread();
|
||||
#elif BX_PLATFORM_XBOX360
|
||||
Sleep(0);
|
||||
#else
|
||||
sched_yield();
|
||||
#endif // BX_PLATFORM_
|
||||
}
|
||||
|
||||
} // namespace bx
|
||||
|
||||
#endif // __BX_OS_H__
|
||||
86
include/bx/platform.h
Normal file
86
include/bx/platform.h
Normal file
@@ -0,0 +1,86 @@
|
||||
/*
|
||||
* Copyright 2010-2012 Branimir Karadzic. All rights reserved.
|
||||
* License: http://www.opensource.org/licenses/BSD-2-Clause
|
||||
*/
|
||||
|
||||
#ifndef __BX_PLATFORM_H__
|
||||
#define __BX_PLATFORM_H__
|
||||
|
||||
#define BX_COMPILER_CLANG 0
|
||||
#define BX_COMPILER_GCC 0
|
||||
#define BX_COMPILER_MSVC 0
|
||||
|
||||
#define BX_PLATFORM_ANDROID 0
|
||||
#define BX_PLATFORM_LINUX 0
|
||||
#define BX_PLATFORM_NACL 0
|
||||
#define BX_PLATFORM_WINDOWS 0
|
||||
#define BX_PLATFORM_XBOX360 0
|
||||
|
||||
#define BX_CPU_ARM 0
|
||||
#define BX_CPU_PPC 0
|
||||
#define BX_CPU_X86 0
|
||||
|
||||
#define BX_CPU_ENDIAN_BIG 0
|
||||
#define BX_CPU_ENDIAN_LITTLE 0
|
||||
|
||||
// http://sourceforge.net/apps/mediawiki/predef/index.php?title=Compilers
|
||||
#if defined(_MSC_VER)
|
||||
# undef BX_COMPILER_MSVC
|
||||
# define BX_COMPILER_MSVC 1
|
||||
#elif defined(__clang__)
|
||||
// clang defines __GNUC__
|
||||
# undef BX_COMPILER_CLANG
|
||||
# define BX_COMPILER_CLANG 1
|
||||
#elif defined(__GNUC__)
|
||||
# undef BX_COMPILER_GCC
|
||||
# define BX_COMPILER_GCC 1
|
||||
#else
|
||||
# error "BX_COMPILER_* is not defined!"
|
||||
#endif //
|
||||
|
||||
// http://sourceforge.net/apps/mediawiki/predef/index.php?title=Operating_Systems
|
||||
#if defined(_XBOX_VER)
|
||||
# undef BX_PLATFORM_XBOX360
|
||||
# define BX_PLATFORM_XBOX360 1
|
||||
#elif defined(_WIN32) || defined(_WIN64)
|
||||
# undef BX_PLATFORM_WINDOWS
|
||||
# define BX_PLATFORM_WINDOWS 1
|
||||
#elif defined(__native_client__)
|
||||
// NaCl compiler defines __linux__
|
||||
# undef BX_PLATFORM_NACL
|
||||
# define BX_PLATFORM_NACL 1
|
||||
#elif defined(__ANDROID__)
|
||||
// Android compiler defines __linux__
|
||||
# undef BX_PLATFORM_ANDROID
|
||||
# define BX_PLATFORM_ANDROID 1
|
||||
#elif defined(__linux__)
|
||||
# undef BX_PLATFORM_LINUX
|
||||
# define BX_PLATFORM_LINUX 1
|
||||
#else
|
||||
# error "BX_PLATFORM_* is not defined!"
|
||||
#endif //
|
||||
|
||||
// http://sourceforge.net/apps/mediawiki/predef/index.php?title=Architectures
|
||||
#if defined(__arm__)
|
||||
# undef BX_CPU_ARM
|
||||
# define BX_CPU_ARM 1
|
||||
# define BX_CACHE_LINE_SIZE 64
|
||||
#elif defined(_M_PPC) || defined(__powerpc__) || defined(__powerpc64__)
|
||||
# undef BX_CPU_PPC
|
||||
# define BX_CPU_PPC 1
|
||||
# define BX_CACHE_LINE_SIZE 128
|
||||
#elif defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || defined(__x86_64__)
|
||||
# undef BX_CPU_X86
|
||||
# define BX_CPU_X86 1
|
||||
# define BX_CACHE_LINE_SIZE 64
|
||||
#endif //
|
||||
|
||||
#if BX_CPU_PPC
|
||||
# undef BX_CPU_ENDIAN_BIG
|
||||
# define BX_CPU_ENDIAN_BIG 1
|
||||
#else
|
||||
# undef BX_CPU_ENDIAN_LITTLE
|
||||
# define BX_CPU_ENDIAN_LITTLE 1
|
||||
#endif // BX_PLATFORM_
|
||||
|
||||
#endif // __BX_PLATFORM_H__
|
||||
313
include/bx/ringbuffer.h
Normal file
313
include/bx/ringbuffer.h
Normal file
@@ -0,0 +1,313 @@
|
||||
/*
|
||||
* Copyright 2010-2011 Branimir Karadzic. All rights reserved.
|
||||
* License: http://www.opensource.org/licenses/BSD-2-Clause
|
||||
*/
|
||||
|
||||
#ifndef __BX_RINGBUFFER_H__
|
||||
#define __BX_RINGBUFFER_H__
|
||||
|
||||
#include "bx.h"
|
||||
#include "cpu.h"
|
||||
#include "uint32_t.h"
|
||||
|
||||
namespace bx
|
||||
{
|
||||
class RingBufferControl
|
||||
{
|
||||
public:
|
||||
RingBufferControl(uint32_t _size)
|
||||
: m_size(_size)
|
||||
, m_current(0)
|
||||
, m_write(0)
|
||||
, m_read(0)
|
||||
{
|
||||
}
|
||||
|
||||
~RingBufferControl()
|
||||
{
|
||||
}
|
||||
|
||||
uint32_t available() const
|
||||
{
|
||||
return distance(m_read, m_current);
|
||||
}
|
||||
|
||||
uint32_t consume(uint32_t _size) // consumer only
|
||||
{
|
||||
const uint32_t maxSize = distance(m_read, m_current);
|
||||
const uint32_t sizeNoSign = uint32_and(_size, 0x7FFFFFFF);
|
||||
const uint32_t test = uint32_sub(sizeNoSign, maxSize);
|
||||
const uint32_t size = uint32_sels(test, _size, maxSize);
|
||||
const uint32_t advance = uint32_add(m_read, size);
|
||||
const uint32_t read = uint32_mod(advance, m_size);
|
||||
m_read = read;
|
||||
return size;
|
||||
}
|
||||
|
||||
uint32_t reserve(uint32_t _size) // producer only
|
||||
{
|
||||
const uint32_t dist = distance(m_write, m_read)-1;
|
||||
const uint32_t maxSize = uint32_sels(dist, m_size-1, dist);
|
||||
const uint32_t sizeNoSign = uint32_and(_size, 0x7FFFFFFF);
|
||||
const uint32_t test = uint32_sub(sizeNoSign, maxSize);
|
||||
const uint32_t size = uint32_sels(test, _size, maxSize);
|
||||
const uint32_t advance = uint32_add(m_write, size);
|
||||
const uint32_t write = uint32_mod(advance, m_size);
|
||||
m_write = write;
|
||||
return size;
|
||||
}
|
||||
|
||||
uint32_t commit(uint32_t _size) // producer only
|
||||
{
|
||||
const uint32_t maxSize = distance(m_current, m_write);
|
||||
const uint32_t sizeNoSign = uint32_and(_size, 0x7FFFFFFF);
|
||||
const uint32_t test = uint32_sub(sizeNoSign, maxSize);
|
||||
const uint32_t size = uint32_sels(test, _size, maxSize);
|
||||
const uint32_t advance = uint32_add(m_current, size);
|
||||
const uint32_t current = uint32_mod(advance, m_size);
|
||||
m_current = current;
|
||||
return size;
|
||||
}
|
||||
|
||||
uint32_t distance(uint32_t _from, uint32_t _to) const // both
|
||||
{
|
||||
const uint32_t diff = uint32_sub(_to, _from);
|
||||
const uint32_t le = uint32_add(m_size, diff);
|
||||
const uint32_t result = uint32_sels(diff, le, diff);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
const uint32_t m_size;
|
||||
uint32_t m_current;
|
||||
uint32_t m_write;
|
||||
uint32_t m_read;
|
||||
};
|
||||
|
||||
class SpScRingBufferControl
|
||||
{
|
||||
public:
|
||||
SpScRingBufferControl(uint32_t _size)
|
||||
: m_size(_size)
|
||||
, m_current(0)
|
||||
, m_write(0)
|
||||
, m_read(0)
|
||||
{
|
||||
}
|
||||
|
||||
~SpScRingBufferControl()
|
||||
{
|
||||
}
|
||||
|
||||
uint32_t available() const
|
||||
{
|
||||
return distance(m_read, m_current);
|
||||
}
|
||||
|
||||
uint32_t consume(uint32_t _size) // consumer only
|
||||
{
|
||||
const uint32_t maxSize = distance(m_read, m_current);
|
||||
const uint32_t sizeNoSign = uint32_and(_size, 0x7FFFFFFF);
|
||||
const uint32_t test = uint32_sub(sizeNoSign, maxSize);
|
||||
const uint32_t size = uint32_sels(test, _size, maxSize);
|
||||
const uint32_t advance = uint32_add(m_read, size);
|
||||
const uint32_t read = uint32_mod(advance, m_size);
|
||||
m_read = read;
|
||||
return size;
|
||||
}
|
||||
|
||||
uint32_t reserve(uint32_t _size) // producer only
|
||||
{
|
||||
const uint32_t dist = distance(m_write, m_read)-1;
|
||||
const uint32_t maxSize = uint32_sels(dist, m_size-1, dist);
|
||||
const uint32_t sizeNoSign = uint32_and(_size, 0x7FFFFFFF);
|
||||
const uint32_t test = uint32_sub(sizeNoSign, maxSize);
|
||||
const uint32_t size = uint32_sels(test, _size, maxSize);
|
||||
const uint32_t advance = uint32_add(m_write, size);
|
||||
const uint32_t write = uint32_mod(advance, m_size);
|
||||
m_write = write;
|
||||
return size;
|
||||
}
|
||||
|
||||
uint32_t commit(uint32_t _size) // producer only
|
||||
{
|
||||
const uint32_t maxSize = distance(m_current, m_write);
|
||||
const uint32_t sizeNoSign = uint32_and(_size, 0x7FFFFFFF);
|
||||
const uint32_t test = uint32_sub(sizeNoSign, maxSize);
|
||||
const uint32_t size = uint32_sels(test, _size, maxSize);
|
||||
const uint32_t advance = uint32_add(m_current, size);
|
||||
const uint32_t current = uint32_mod(advance, m_size);
|
||||
|
||||
// must commit all memory writes before moving m_current pointer
|
||||
// once m_current pointer moves data is used by consumer thread
|
||||
memoryBarrier();
|
||||
m_current = current;
|
||||
return size;
|
||||
}
|
||||
|
||||
uint32_t distance(uint32_t _from, uint32_t _to) const // both
|
||||
{
|
||||
const uint32_t diff = uint32_sub(_to, _from);
|
||||
const uint32_t le = uint32_add(m_size, diff);
|
||||
const uint32_t result = uint32_sels(diff, le, diff);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
const uint32_t m_size;
|
||||
uint32_t m_current;
|
||||
uint32_t m_write;
|
||||
uint32_t m_read;
|
||||
};
|
||||
|
||||
template <typename Control>
|
||||
class ReadRingBufferT
|
||||
{
|
||||
public:
|
||||
ReadRingBufferT(Control& _control, const char* _buffer, uint32_t _size)
|
||||
: m_control(_control)
|
||||
, m_read(_control.m_read)
|
||||
, m_end(m_read+_size)
|
||||
, m_size(_size)
|
||||
, m_buffer(_buffer)
|
||||
{
|
||||
BX_CHECK(_control.available() >= _size, "%d >= %d", _control.available(), _size);
|
||||
}
|
||||
|
||||
~ReadRingBufferT()
|
||||
{
|
||||
}
|
||||
|
||||
void end()
|
||||
{
|
||||
m_control.consume(m_size);
|
||||
}
|
||||
|
||||
void read(char* _data, uint32_t _len)
|
||||
{
|
||||
const uint32_t end = (m_read + _len) % m_control.m_size;
|
||||
uint32_t wrap = 0;
|
||||
const char* from = &m_buffer[m_read];
|
||||
|
||||
if (end < m_read)
|
||||
{
|
||||
wrap = m_control.m_size - m_read;
|
||||
memcpy(_data, from, wrap);
|
||||
_data += wrap;
|
||||
from = (const char*)&m_buffer[0];
|
||||
}
|
||||
|
||||
memcpy(_data, from, _len-wrap);
|
||||
|
||||
m_read = end;
|
||||
}
|
||||
|
||||
void skip(uint32_t _len)
|
||||
{
|
||||
m_read += _len;
|
||||
m_read %= m_control.m_size;
|
||||
}
|
||||
|
||||
private:
|
||||
template <typename Ty>
|
||||
friend class WriteRingBufferT;
|
||||
|
||||
ReadRingBufferT();
|
||||
ReadRingBufferT(const Control&);
|
||||
void operator=(const Control&);
|
||||
|
||||
Control& m_control;
|
||||
uint32_t m_read;
|
||||
uint32_t m_end;
|
||||
const uint32_t m_size;
|
||||
const char* m_buffer;
|
||||
};
|
||||
|
||||
typedef ReadRingBufferT<RingBufferControl> ReadRingBuffer;
|
||||
typedef ReadRingBufferT<SpScRingBufferControl> SpScReadRingBuffer;
|
||||
|
||||
template <typename Control>
|
||||
class WriteRingBufferT
|
||||
{
|
||||
public:
|
||||
WriteRingBufferT(Control& _control, char* _buffer, uint32_t _size)
|
||||
: m_control(_control)
|
||||
, m_size(_size)
|
||||
, m_buffer(_buffer)
|
||||
{
|
||||
uint32_t size = m_control.reserve(_size);
|
||||
BX_CHECK(size == _size, "%d == %d", size, _size);
|
||||
m_write = m_control.m_current;
|
||||
m_end = m_write+_size;
|
||||
}
|
||||
|
||||
~WriteRingBufferT()
|
||||
{
|
||||
}
|
||||
|
||||
void end()
|
||||
{
|
||||
m_control.commit(m_size);
|
||||
}
|
||||
|
||||
void write(const char* _data, uint32_t _len)
|
||||
{
|
||||
const uint32_t end = (m_write + _len) % m_control.m_size;
|
||||
uint32_t wrap = 0;
|
||||
char* to = &m_buffer[m_write];
|
||||
|
||||
if (end < m_write)
|
||||
{
|
||||
wrap = m_control.m_size - m_write;
|
||||
memcpy(to, _data, wrap);
|
||||
_data += wrap;
|
||||
to = (char*)&m_buffer[0];
|
||||
}
|
||||
|
||||
memcpy(to, _data, _len-wrap);
|
||||
|
||||
m_write = end;
|
||||
}
|
||||
|
||||
void write(ReadRingBufferT<Control>& _read, uint32_t _len)
|
||||
{
|
||||
const uint32_t end = (_read.m_read + _len) % _read.m_control.m_size;
|
||||
uint32_t wrap = 0;
|
||||
const char* from = &_read.m_buffer[_read.m_read];
|
||||
|
||||
if (end < _read.m_read)
|
||||
{
|
||||
wrap = _read.m_control.m_size - _read.m_read;
|
||||
write(from, wrap);
|
||||
from = (const char*)&_read.m_buffer[0];
|
||||
}
|
||||
|
||||
write(from, _len-wrap);
|
||||
|
||||
_read.m_read = end;
|
||||
}
|
||||
|
||||
void skip(uint32_t _len)
|
||||
{
|
||||
m_write += _len;
|
||||
m_write %= m_control.m_size;
|
||||
}
|
||||
|
||||
private:
|
||||
WriteRingBufferT();
|
||||
WriteRingBufferT(const WriteRingBufferT<Control>&);
|
||||
void operator=(const WriteRingBufferT<Control>&);
|
||||
|
||||
Control& m_control;
|
||||
uint32_t m_write;
|
||||
uint32_t m_end;
|
||||
const uint32_t m_size;
|
||||
char* m_buffer;
|
||||
};
|
||||
|
||||
typedef WriteRingBufferT<RingBufferControl> WriteRingBuffer;
|
||||
typedef WriteRingBufferT<SpScRingBufferControl> SpScWriteRingBuffer;
|
||||
|
||||
} // namespace bx
|
||||
|
||||
#endif // __BX_RINGBUFFER_H__
|
||||
97
include/bx/rng.h
Normal file
97
include/bx/rng.h
Normal file
@@ -0,0 +1,97 @@
|
||||
/*
|
||||
* Copyright 2010-2011 Branimir Karadzic. All rights reserved.
|
||||
* License: http://www.opensource.org/licenses/BSD-2-Clause
|
||||
*/
|
||||
|
||||
#ifndef __BX_RNG_H__
|
||||
#define __BX_RNG_H__
|
||||
|
||||
#include "bx.h"
|
||||
|
||||
namespace bx
|
||||
{
|
||||
// George Marsaglia's MWC
|
||||
class RngMwc
|
||||
{
|
||||
public:
|
||||
RngMwc(uint32_t _z = 12345, uint32_t _w = 65435)
|
||||
: m_z(_z)
|
||||
, m_w(_w)
|
||||
{
|
||||
}
|
||||
|
||||
void reset(uint32_t _z = 12345, uint32_t _w = 65435)
|
||||
{
|
||||
m_z = _z;
|
||||
m_w = _w;
|
||||
}
|
||||
|
||||
uint32_t gen()
|
||||
{
|
||||
m_z = 36969*(m_z&65535)+(m_z>>16);
|
||||
m_w = 18000*(m_w&65535)+(m_w>>16);
|
||||
return (m_z<<16)+m_w;
|
||||
}
|
||||
|
||||
private:
|
||||
uint32_t m_z;
|
||||
uint32_t m_w;
|
||||
};
|
||||
|
||||
// George Marsaglia's FIB
|
||||
class RngFib
|
||||
{
|
||||
public:
|
||||
RngFib()
|
||||
: m_a(9983651)
|
||||
, m_b(95746118)
|
||||
{
|
||||
}
|
||||
|
||||
void reset()
|
||||
{
|
||||
m_a = 9983651;
|
||||
m_b = 95746118;
|
||||
}
|
||||
|
||||
uint32_t gen()
|
||||
{
|
||||
m_b = m_a+m_b;
|
||||
m_a = m_b-m_a;
|
||||
return m_a;
|
||||
}
|
||||
|
||||
private:
|
||||
uint32_t m_a;
|
||||
uint32_t m_b;
|
||||
};
|
||||
|
||||
// George Marsaglia's SHR3
|
||||
class RngShr3
|
||||
{
|
||||
public:
|
||||
RngShr3(uint32_t _jsr = 34221)
|
||||
: m_jsr(_jsr)
|
||||
{
|
||||
}
|
||||
|
||||
void reset(uint32_t _jsr = 34221)
|
||||
{
|
||||
m_jsr = _jsr;
|
||||
}
|
||||
|
||||
uint32_t gen()
|
||||
{
|
||||
m_jsr ^= m_jsr<<17;
|
||||
m_jsr ^= m_jsr>>13;
|
||||
m_jsr ^= m_jsr<<5;
|
||||
return m_jsr;
|
||||
}
|
||||
|
||||
private:
|
||||
uint32_t m_jsr;
|
||||
};
|
||||
|
||||
} // namespace bx
|
||||
|
||||
#endif // __BX_RNG_H__
|
||||
107
include/bx/sem.h
Normal file
107
include/bx/sem.h
Normal file
@@ -0,0 +1,107 @@
|
||||
/*
|
||||
* Copyright 2010-2011 Branimir Karadzic. All rights reserved.
|
||||
* License: http://www.opensource.org/licenses/BSD-2-Clause
|
||||
*/
|
||||
|
||||
#ifndef __BX_SEM_H__
|
||||
#define __BX_SEM_H__
|
||||
|
||||
#include "bx.h"
|
||||
|
||||
#define BX_SEM_CONFIG_POSIX (BX_PLATFORM_NACL || BX_PLATFORM_ANDROID || BX_PLATFORM_LINUX)
|
||||
|
||||
#if BX_SEM_CONFIG_POSIX
|
||||
# include <semaphore.h>
|
||||
# include <time.h>
|
||||
#elif BX_PLATFORM_WINDOWS || BX_PLATFORM_XBOX360
|
||||
# include <limits.h>
|
||||
#endif // BX_PLATFORM_
|
||||
|
||||
namespace bx
|
||||
{
|
||||
#if BX_SEM_CONFIG_POSIX
|
||||
class Semaphore
|
||||
{
|
||||
public:
|
||||
Semaphore()
|
||||
{
|
||||
sem_init(&m_handle, 0, 0);
|
||||
}
|
||||
|
||||
~Semaphore()
|
||||
{
|
||||
sem_destroy(&m_handle);
|
||||
}
|
||||
|
||||
void post(uint32_t _count = 1)
|
||||
{
|
||||
for (uint32_t ii = 0; ii < _count; ++ii)
|
||||
{
|
||||
sem_post(&m_handle);
|
||||
}
|
||||
}
|
||||
|
||||
bool wait(int32_t _msecs = -1)
|
||||
{
|
||||
#if BX_PLATFORM_NACL
|
||||
BX_CHECK(-1 == _msecs, "NaCl doesn't support sem_timedwait at this moment.");
|
||||
return 0 == sem_wait(&m_handle);
|
||||
#else
|
||||
if (0 > _msecs)
|
||||
{
|
||||
return 0 == sem_wait(&m_handle);
|
||||
}
|
||||
|
||||
timespec ts;
|
||||
ts.tv_sec = _msecs/1000;
|
||||
ts.tv_nsec = (_msecs%1000)*1000;
|
||||
return 0 == sem_timedwait(&m_handle, &ts);
|
||||
#endif // BX_PLATFORM_
|
||||
}
|
||||
|
||||
private:
|
||||
Semaphore(const Semaphore& _rhs); // no copy constructor
|
||||
Semaphore& operator=(const Semaphore& _rhs); // no assignment operator
|
||||
|
||||
sem_t m_handle;
|
||||
};
|
||||
|
||||
#elif BX_PLATFORM_WINDOWS || BX_PLATFORM_XBOX360
|
||||
|
||||
class Semaphore
|
||||
{
|
||||
public:
|
||||
Semaphore()
|
||||
{
|
||||
m_handle = CreateSemaphore(NULL, 0, LONG_MAX, NULL);
|
||||
BX_CHECK(NULL != m_handle, "Failed to create Semaphore!");
|
||||
}
|
||||
|
||||
~Semaphore()
|
||||
{
|
||||
CloseHandle(m_handle);
|
||||
}
|
||||
|
||||
void post(uint32_t _count = 1) const
|
||||
{
|
||||
ReleaseSemaphore(m_handle, _count, NULL);
|
||||
}
|
||||
|
||||
bool wait(int32_t _msecs = -1) const
|
||||
{
|
||||
DWORD milliseconds = (0 > _msecs) ? INFINITE : _msecs;
|
||||
return WAIT_OBJECT_0 == WaitForSingleObject(m_handle, milliseconds);
|
||||
}
|
||||
|
||||
private:
|
||||
Semaphore(const Semaphore& _rhs); // no copy constructor
|
||||
Semaphore& operator=(const Semaphore& _rhs); // no assignment operator
|
||||
|
||||
HANDLE m_handle;
|
||||
};
|
||||
|
||||
#endif // BX_PLATFORM_
|
||||
|
||||
} // namespace bx
|
||||
|
||||
#endif // __BX_SEM_H__
|
||||
152
include/bx/spscqueue.h
Normal file
152
include/bx/spscqueue.h
Normal file
@@ -0,0 +1,152 @@
|
||||
/*
|
||||
* Copyright 2010-2011 Branimir Karadzic. All rights reserved.
|
||||
* License: http://www.opensource.org/licenses/BSD-2-Clause
|
||||
*/
|
||||
|
||||
#ifndef __BX_SPSCQUEUE_H__
|
||||
#define __BX_SPSCQUEUE_H__
|
||||
|
||||
#include <list>
|
||||
|
||||
#include "bx.h"
|
||||
#include "cpu.h"
|
||||
#include "mutex.h"
|
||||
#include "uint32_t.h"
|
||||
|
||||
namespace bx
|
||||
{
|
||||
// http://drdobbs.com/article/print?articleId=210604448&siteSectionName=
|
||||
template <typename Ty>
|
||||
class SpScUnboundedQueueOptimized
|
||||
{
|
||||
public:
|
||||
SpScUnboundedQueueOptimized()
|
||||
: m_first(new Node(NULL) )
|
||||
, m_divider(m_first)
|
||||
, m_last(m_first)
|
||||
{
|
||||
}
|
||||
|
||||
~SpScUnboundedQueueOptimized()
|
||||
{
|
||||
while (NULL != m_first)
|
||||
{
|
||||
Node* node = m_first;
|
||||
m_first = node->m_next;
|
||||
delete node;
|
||||
}
|
||||
}
|
||||
|
||||
void push(Ty* _ptr) // producer only
|
||||
{
|
||||
m_last->m_next = new Node(_ptr);
|
||||
atomicExchangePtr((void**)&m_last, m_last->m_next);
|
||||
while (m_first != m_divider)
|
||||
{
|
||||
Node* node = m_first;
|
||||
m_first = m_first->m_next;
|
||||
delete node;
|
||||
}
|
||||
}
|
||||
|
||||
Ty* peek() // consumer only
|
||||
{
|
||||
if (m_divider != m_last)
|
||||
{
|
||||
Ty* ptr = m_divider->m_next->m_ptr;
|
||||
return ptr;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
Ty* pop() // consumer only
|
||||
{
|
||||
if (m_divider != m_last)
|
||||
{
|
||||
Ty* ptr = m_divider->m_next->m_ptr;
|
||||
atomicExchangePtr((void**)&m_divider, m_divider->m_next);
|
||||
return ptr;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
private:
|
||||
SpScUnboundedQueueOptimized(const SpScUnboundedQueueOptimized& _rhs); // no copy constructor
|
||||
SpScUnboundedQueueOptimized& operator=(const SpScUnboundedQueueOptimized& _rhs); // no assignment operator
|
||||
|
||||
struct Node
|
||||
{
|
||||
Node(Ty* _ptr)
|
||||
: m_ptr(_ptr)
|
||||
, m_next(NULL)
|
||||
{
|
||||
}
|
||||
|
||||
Ty* m_ptr;
|
||||
Node* m_next;
|
||||
};
|
||||
|
||||
Node* m_first;
|
||||
Node* m_divider;
|
||||
Node* m_last;
|
||||
};
|
||||
|
||||
template<typename Ty>
|
||||
class SpScUnboundedQueueNaive
|
||||
{
|
||||
public:
|
||||
SpScUnboundedQueueNaive()
|
||||
{
|
||||
}
|
||||
|
||||
~SpScUnboundedQueueNaive()
|
||||
{
|
||||
BX_CHECK(m_queue.empty(), "Queue is not empty!");
|
||||
}
|
||||
|
||||
void push(Ty* _item)
|
||||
{
|
||||
bx::LwMutexScope lock(m_mutex);
|
||||
m_queue.push_back(_item);
|
||||
}
|
||||
|
||||
Ty* peek()
|
||||
{
|
||||
bx::LwMutexScope lock(m_mutex);
|
||||
if (!m_queue.empty() )
|
||||
{
|
||||
return m_queue.front();
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
Ty* pop()
|
||||
{
|
||||
bx::LwMutexScope lock(m_mutex);
|
||||
if (!m_queue.empty() )
|
||||
{
|
||||
Ty* item = m_queue.front();
|
||||
m_queue.pop_front();
|
||||
return item;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
private:
|
||||
bx::LwMutex m_mutex;
|
||||
std::list<Ty*> m_queue;
|
||||
};
|
||||
|
||||
#if BX_CONFIG_SPSCQUEUE_USE_NAIVE
|
||||
# define SpScUnboundedQueue SpScUnboundedQueueNaive
|
||||
#else
|
||||
# define SpScUnboundedQueue SpScUnboundedQueueOptimized
|
||||
#endif // BX_CONFIG_NAIVE
|
||||
|
||||
} // namespace bx
|
||||
|
||||
#endif // __BX_RINGBUFFER_H__
|
||||
53
include/bx/timer.h
Normal file
53
include/bx/timer.h
Normal file
@@ -0,0 +1,53 @@
|
||||
/*
|
||||
* Copyright 2010-2011 Branimir Karadzic. All rights reserved.
|
||||
* License: http://www.opensource.org/licenses/BSD-2-Clause
|
||||
*/
|
||||
|
||||
#ifndef __BX_TIMER_H__
|
||||
#define __BX_TIMER_H__
|
||||
|
||||
#include "bx.h"
|
||||
|
||||
#if BX_PLATFORM_ANDROID
|
||||
# include <time.h> // clock, clock_gettime
|
||||
#elif BX_PLATFORM_NACL | BX_PLATFORM_LINUX
|
||||
# include <sys/time.h> // gettimeofday
|
||||
#endif // BX_PLATFORM_
|
||||
|
||||
namespace bx
|
||||
{
|
||||
inline int64_t getHPCounter()
|
||||
{
|
||||
#if BX_PLATFORM_WINDOWS || BX_PLATFORM_XBOX360
|
||||
LARGE_INTEGER li;
|
||||
// Performance counter value may unexpectedly leap forward
|
||||
// http://support.microsoft.com/kb/274323
|
||||
QueryPerformanceCounter(&li);
|
||||
int64_t i64 = li.QuadPart;
|
||||
#elif BX_PLATFORM_ANDROID
|
||||
int64_t i64 = clock();
|
||||
#else
|
||||
struct timeval now;
|
||||
gettimeofday(&now, 0);
|
||||
int64_t i64 = now.tv_sec*1000000 + now.tv_usec;
|
||||
#endif // BNET_PLATFORM_
|
||||
static int64_t offset = i64;
|
||||
return i64 - offset;
|
||||
}
|
||||
|
||||
inline int64_t getHPFrequency()
|
||||
{
|
||||
#if BX_PLATFORM_WINDOWS || BX_PLATFORM_XBOX360
|
||||
LARGE_INTEGER li;
|
||||
QueryPerformanceFrequency(&li);
|
||||
return li.QuadPart;
|
||||
#elif BX_PLATFORM_ANDROID
|
||||
return CLOCKS_PER_SEC;
|
||||
#else
|
||||
return 1000000;
|
||||
#endif // BNET_PLATFORM_
|
||||
}
|
||||
|
||||
} // namespace bx
|
||||
|
||||
#endif // __BX_TIMER_H__
|
||||
454
include/bx/uint32_t.h
Normal file
454
include/bx/uint32_t.h
Normal file
@@ -0,0 +1,454 @@
|
||||
/*
|
||||
* Copyright 2010-2012 Branimir Karadzic. All rights reserved.
|
||||
* License: http://www.opensource.org/licenses/BSD-2-Clause
|
||||
*/
|
||||
|
||||
// Copyright 2006 Mike Acton <macton@gmail.com>
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a
|
||||
// copy of this software and associated documentation files (the "Software"),
|
||||
// to deal in the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included
|
||||
// in all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE
|
||||
|
||||
#ifndef __BX_UINT32_T_H__
|
||||
#define __BX_UINT32_T_H__
|
||||
|
||||
#include "bx.h"
|
||||
|
||||
#if BX_COMPILER_MSVC
|
||||
# if BX_PLATFORM_WINDOWS
|
||||
# include <math.h> // math.h is included because VS bitches:
|
||||
// warning C4985: 'ceil': attributes not present on previous declaration.
|
||||
// must be included before intrin.h.
|
||||
# include <intrin.h>
|
||||
# pragma intrinsic(_BitScanForward)
|
||||
# pragma intrinsic(_BitScanReverse)
|
||||
# endif // BX_PLATFORM_WINDOWS
|
||||
#endif // BX_COMPILER_MSVC
|
||||
|
||||
namespace bx
|
||||
{
|
||||
inline uint32_t uint32_li(uint32_t _a)
|
||||
{
|
||||
return _a;
|
||||
}
|
||||
|
||||
inline uint32_t uint32_dec(uint32_t _a)
|
||||
{
|
||||
return _a - 1;
|
||||
}
|
||||
|
||||
inline uint32_t uint32_inc(uint32_t _a)
|
||||
{
|
||||
return _a + 1;
|
||||
}
|
||||
|
||||
inline uint32_t uint32_not(uint32_t _a)
|
||||
{
|
||||
return ~_a;
|
||||
}
|
||||
|
||||
inline uint32_t uint32_neg(uint32_t _a)
|
||||
{
|
||||
return -(int32_t)_a;
|
||||
}
|
||||
|
||||
inline uint32_t uint32_ext(uint32_t _a)
|
||||
{
|
||||
return ( (int32_t)_a)>>31;
|
||||
}
|
||||
|
||||
inline uint32_t uint32_and(uint32_t _a, uint32_t _b)
|
||||
{
|
||||
return _a & _b;
|
||||
}
|
||||
|
||||
inline uint32_t uint32_xor(uint32_t _a, uint32_t _b)
|
||||
{
|
||||
return _a ^ _b;
|
||||
}
|
||||
|
||||
inline uint32_t uint32_xorl(uint32_t _a, uint32_t _b)
|
||||
{
|
||||
return !_a != !_b;
|
||||
}
|
||||
|
||||
inline uint32_t uint32_andc(uint32_t _a, uint32_t _b)
|
||||
{
|
||||
return _a & ~_b;
|
||||
}
|
||||
|
||||
inline uint32_t uint32_or(uint32_t _a, uint32_t _b)
|
||||
{
|
||||
return _a | _b;
|
||||
}
|
||||
|
||||
inline uint32_t uint32_sll(uint32_t _a, int _sa)
|
||||
{
|
||||
return _a << _sa;
|
||||
}
|
||||
|
||||
inline uint32_t uint32_srl(uint32_t _a, int _sa)
|
||||
{
|
||||
return _a >> _sa;
|
||||
}
|
||||
|
||||
inline uint32_t uint32_sra(uint32_t _a, int _sa)
|
||||
{
|
||||
return ( (int32_t)_a) >> _sa;
|
||||
}
|
||||
|
||||
inline uint32_t uint32_rol(uint32_t _a, int _sa)
|
||||
{
|
||||
return ( _a << _sa) | (_a >> (32-_sa) );
|
||||
}
|
||||
|
||||
inline uint32_t uint32_ror(uint32_t _a, int _sa)
|
||||
{
|
||||
return ( _a >> _sa) | (_a << (32-_sa) );
|
||||
}
|
||||
|
||||
inline uint32_t uint32_add(uint32_t _a, uint32_t _b)
|
||||
{
|
||||
return _a + _b;
|
||||
}
|
||||
|
||||
inline uint32_t uint32_sub(uint32_t _a, uint32_t _b)
|
||||
{
|
||||
return _a - _b;
|
||||
}
|
||||
|
||||
inline uint32_t uint32_mul(uint32_t _a, uint32_t _b)
|
||||
{
|
||||
return _a * _b;
|
||||
}
|
||||
|
||||
inline uint32_t uint32_div(uint32_t _a, uint32_t _b)
|
||||
{
|
||||
return (_a / _b);
|
||||
}
|
||||
|
||||
inline uint32_t uint32_mod(uint32_t _a, uint32_t _b)
|
||||
{
|
||||
return (_a % _b);
|
||||
}
|
||||
|
||||
inline uint32_t uint32_cmpeq(uint32_t _a, uint32_t _b)
|
||||
{
|
||||
return -(_a == _b);
|
||||
}
|
||||
|
||||
inline uint32_t uint32_cmpneq(uint32_t _a, uint32_t _b)
|
||||
{
|
||||
return -(_a != _b);
|
||||
}
|
||||
|
||||
inline uint32_t uint32_cmplt(uint32_t _a, uint32_t _b)
|
||||
{
|
||||
return -(_a < _b);
|
||||
}
|
||||
|
||||
inline uint32_t uint32_cmple(uint32_t _a, uint32_t _b)
|
||||
{
|
||||
return -(_a <= _b);
|
||||
}
|
||||
|
||||
inline uint32_t uint32_cmpgt(uint32_t _a, uint32_t _b)
|
||||
{
|
||||
return -(_a > _b);
|
||||
}
|
||||
|
||||
inline uint32_t uint32_cmpge(uint32_t _a, uint32_t _b)
|
||||
{
|
||||
return -(_a >= _b);
|
||||
}
|
||||
|
||||
inline uint32_t uint32_setnz(uint32_t _a)
|
||||
{
|
||||
return -!!_a;
|
||||
}
|
||||
|
||||
inline uint32_t uint32_satadd(uint32_t _a, uint32_t _b)
|
||||
{
|
||||
const uint32_t add = uint32_add(_a, _b);
|
||||
const uint32_t lt = uint32_cmplt(add, _a);
|
||||
const uint32_t result = uint32_or(add, lt);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
inline uint32_t uint32_satsub(uint32_t _a, uint32_t _b)
|
||||
{
|
||||
const uint32_t sub = uint32_sub(_a, _b);
|
||||
const uint32_t le = uint32_cmple(sub, _a);
|
||||
const uint32_t result = uint32_and(sub, le);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
inline uint32_t uint32_satmul(uint32_t _a, uint32_t _b)
|
||||
{
|
||||
const uint64_t mul = (uint64_t)_a * (uint64_t)_b;
|
||||
const uint32_t hi = mul >> 32;
|
||||
const uint32_t nz = uint32_setnz(hi);
|
||||
const uint32_t result = uint32_or(uint32_t(mul), nz);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
inline uint32_t uint32_sels(uint32_t test, uint32_t _a, uint32_t _b)
|
||||
{
|
||||
const uint32_t mask = uint32_ext(test);
|
||||
const uint32_t sel_a = uint32_and(_a, mask);
|
||||
const uint32_t sel_b = uint32_andc(_b, mask);
|
||||
const uint32_t result = uint32_or(sel_a, sel_b);
|
||||
|
||||
return (result);
|
||||
}
|
||||
|
||||
inline uint32_t uint32_selb(uint32_t _mask, uint32_t _a, uint32_t _b)
|
||||
{
|
||||
const uint32_t sel_a = uint32_and(_a, _mask);
|
||||
const uint32_t sel_b = uint32_andc(_b, _mask);
|
||||
const uint32_t result = uint32_or(sel_a, sel_b);
|
||||
|
||||
return (result);
|
||||
}
|
||||
|
||||
inline uint32_t uint32_imin(uint32_t _a, uint32_t _b)
|
||||
{
|
||||
const uint32_t a_sub_b = uint32_sub(_a, _b);
|
||||
const uint32_t result = uint32_sels(a_sub_b, _a, _b);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
inline uint32_t uint32_imax(uint32_t _a, uint32_t _b)
|
||||
{
|
||||
const uint32_t b_sub_a = uint32_sub(_b, _a);
|
||||
const uint32_t result = uint32_sels(b_sub_a, _a, _b);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
inline uint32_t uint32_min(uint32_t _a, uint32_t _b)
|
||||
{
|
||||
return _a > _b ? _b : _a;
|
||||
}
|
||||
|
||||
inline uint32_t uint32_max(uint32_t _a, uint32_t _b)
|
||||
{
|
||||
return _a > _b ? _a : _b;
|
||||
}
|
||||
|
||||
inline uint32_t uint32_incwrap(uint32_t _val, uint32_t _min, uint32_t _max)
|
||||
{
|
||||
const uint32_t inc = uint32_inc(_val);
|
||||
const uint32_t max_diff = uint32_sub(_max, _val);
|
||||
const uint32_t neg_max_diff = uint32_neg(max_diff);
|
||||
const uint32_t max_or = uint32_or(max_diff, neg_max_diff);
|
||||
const uint32_t max_diff_nz = uint32_ext(max_or);
|
||||
const uint32_t result = uint32_selb(max_diff_nz, inc, _min);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
inline uint32_t uint32_decwrap(uint32_t _val, uint32_t _min, uint32_t _max)
|
||||
{
|
||||
const uint32_t dec = uint32_dec(_val);
|
||||
const uint32_t min_diff = uint32_sub(_min, _val);
|
||||
const uint32_t neg_min_diff = uint32_neg(min_diff);
|
||||
const uint32_t min_or = uint32_or(min_diff, neg_min_diff);
|
||||
const uint32_t min_diff_nz = uint32_ext(min_or);
|
||||
const uint32_t result = uint32_selb(min_diff_nz, dec, _max);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
inline uint32_t uint32_cntbits_ref(uint32_t _val)
|
||||
{
|
||||
const uint32_t tmp0 = uint32_srl(_val, 1);
|
||||
const uint32_t tmp1 = uint32_and(tmp0, 0x55555555);
|
||||
const uint32_t tmp2 = uint32_sub(_val, tmp1);
|
||||
const uint32_t tmp3 = uint32_and(tmp2, 0xc30c30c3);
|
||||
const uint32_t tmp4 = uint32_srl(tmp2, 2);
|
||||
const uint32_t tmp5 = uint32_and(tmp4, 0xc30c30c3);
|
||||
const uint32_t tmp6 = uint32_srl(tmp2, 4);
|
||||
const uint32_t tmp7 = uint32_and(tmp6, 0xc30c30c3);
|
||||
const uint32_t tmp8 = uint32_add(tmp3, tmp5);
|
||||
const uint32_t tmp9 = uint32_add(tmp7, tmp8);
|
||||
const uint32_t tmpA = uint32_srl(tmp9, 6);
|
||||
const uint32_t tmpB = uint32_add(tmp9, tmpA);
|
||||
const uint32_t tmpC = uint32_srl(tmpB, 12);
|
||||
const uint32_t tmpD = uint32_srl(tmpB, 24);
|
||||
const uint32_t tmpE = uint32_add(tmpB, tmpC);
|
||||
const uint32_t tmpF = uint32_add(tmpD, tmpE);
|
||||
const uint32_t result = uint32_and(tmpF, 0x3f);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/// Count number of bits set.
|
||||
inline uint32_t uint32_cntbits(uint32_t _val)
|
||||
{
|
||||
#if BX_COMPILER_GCC
|
||||
return __builtin_popcount(_val);
|
||||
#elif BX_COMPILER_MSVC && BX_PLATFORM_WINDOWS
|
||||
return __popcnt(_val);
|
||||
#else
|
||||
return uint32_cntbits_ref(_val);
|
||||
#endif // BX_COMPILER_GCC
|
||||
}
|
||||
|
||||
inline uint32_t uint32_cntlz_ref(uint32_t _val)
|
||||
{
|
||||
const uint32_t tmp0 = uint32_srl(_val, 1);
|
||||
const uint32_t tmp1 = uint32_or(tmp0, _val);
|
||||
const uint32_t tmp2 = uint32_srl(tmp1, 2);
|
||||
const uint32_t tmp3 = uint32_or(tmp2, tmp1);
|
||||
const uint32_t tmp4 = uint32_srl(tmp3, 4);
|
||||
const uint32_t tmp5 = uint32_or(tmp4, tmp3);
|
||||
const uint32_t tmp6 = uint32_srl(tmp5, 8);
|
||||
const uint32_t tmp7 = uint32_or(tmp6, tmp5);
|
||||
const uint32_t tmp8 = uint32_srl(tmp7, 16);
|
||||
const uint32_t tmp9 = uint32_or(tmp8, tmp7);
|
||||
const uint32_t tmpA = uint32_not(tmp9);
|
||||
const uint32_t result = uint32_cntbits(tmpA);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/// Count number of leading zeros.
|
||||
inline uint32_t uint32_cntlz(uint32_t _val)
|
||||
{
|
||||
#if BX_COMPILER_GCC
|
||||
return __builtin_clz(_val);
|
||||
#elif BX_COMPILER_MSVC && BX_PLATFORM_WINDOWS
|
||||
unsigned long index;
|
||||
_BitScanReverse(&index, _val);
|
||||
return 31 - index;
|
||||
#else
|
||||
return uint32_cntlz_ref(_val);
|
||||
#endif // BX_COMPILER_
|
||||
}
|
||||
|
||||
inline uint32_t uint32_cnttz_ref(uint32_t _val)
|
||||
{
|
||||
const uint32_t tmp0 = uint32_not(_val);
|
||||
const uint32_t tmp1 = uint32_dec(_val);
|
||||
const uint32_t tmp2 = uint32_and(tmp0, tmp1);
|
||||
const uint32_t result = uint32_cntbits(tmp2);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
inline uint32_t uint32_cnttz(uint32_t _val)
|
||||
{
|
||||
#if BX_COMPILER_MSVC && BX_PLATFORM_WINDOWS
|
||||
unsigned long index;
|
||||
_BitScanForward(&index, _val);
|
||||
return index;
|
||||
#else
|
||||
return uint32_cnttz_ref(_val);
|
||||
#endif // BX_COMPILER_
|
||||
}
|
||||
|
||||
// shuffle:
|
||||
// ---- ---- ---- ---- fedc ba98 7654 3210
|
||||
// to:
|
||||
// -f-e -d-c -b-a -9-8 -7-6 -5-4 -3-2 -1-0
|
||||
inline uint32_t uint32_part1by1(uint32_t _a)
|
||||
{
|
||||
const uint32_t val = uint32_and(_a, 0xffff);
|
||||
|
||||
const uint32_t tmp0 = uint32_sll(val, 8);
|
||||
const uint32_t tmp1 = uint32_xor(val, tmp0);
|
||||
const uint32_t tmp2 = uint32_and(tmp1, 0x00ff00ff);
|
||||
|
||||
const uint32_t tmp3 = uint32_sll(tmp2, 4);
|
||||
const uint32_t tmp4 = uint32_xor(tmp2, tmp3);
|
||||
const uint32_t tmp5 = uint32_and(tmp4, 0x0f0f0f0f);
|
||||
|
||||
const uint32_t tmp6 = uint32_sll(tmp5, 2);
|
||||
const uint32_t tmp7 = uint32_xor(tmp5, tmp6);
|
||||
const uint32_t tmp8 = uint32_and(tmp7, 0x33333333);
|
||||
|
||||
const uint32_t tmp9 = uint32_sll(tmp8, 1);
|
||||
const uint32_t tmpA = uint32_xor(tmp8, tmp9);
|
||||
const uint32_t result = uint32_and(tmpA, 0x55555555);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// shuffle:
|
||||
// ---- ---- ---- ---- ---- --98 7654 3210
|
||||
// to:
|
||||
// ---- 9--8 --7- -6-- 5--4 --3- -2-- 1--0
|
||||
inline uint32_t uint32_part1by2(uint32_t _a)
|
||||
{
|
||||
const uint32_t val = uint32_and(_a, 0x3ff);
|
||||
|
||||
const uint32_t tmp0 = uint32_sll(val, 16);
|
||||
const uint32_t tmp1 = uint32_xor(val, tmp0);
|
||||
const uint32_t tmp2 = uint32_and(tmp1, 0xff0000ff);
|
||||
|
||||
const uint32_t tmp3 = uint32_sll(tmp2, 8);
|
||||
const uint32_t tmp4 = uint32_xor(tmp2, tmp3);
|
||||
const uint32_t tmp5 = uint32_and(tmp4, 0x0300f00f);
|
||||
|
||||
const uint32_t tmp6 = uint32_sll(tmp5, 4);
|
||||
const uint32_t tmp7 = uint32_xor(tmp5, tmp6);
|
||||
const uint32_t tmp8 = uint32_and(tmp7, 0x030c30c3);
|
||||
|
||||
const uint32_t tmp9 = uint32_sll(tmp8, 2);
|
||||
const uint32_t tmpA = uint32_xor(tmp8, tmp9);
|
||||
const uint32_t result = uint32_and(tmpA, 0x09249249);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
inline uint32_t uint32_testpow2(uint32_t _a)
|
||||
{
|
||||
const uint32_t tmp0 = uint32_not(_a);
|
||||
const uint32_t tmp1 = uint32_inc(tmp0);
|
||||
const uint32_t tmp2 = uint32_and(_a, tmp1);
|
||||
const uint32_t tmp3 = uint32_cmpeq(tmp2, _a);
|
||||
const uint32_t tmp4 = uint32_cmpneq(_a, 0);
|
||||
const uint32_t result = uint32_and(tmp3, tmp4);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
inline uint32_t uint32_nextpow2(uint32_t _a)
|
||||
{
|
||||
const uint32_t tmp0 = uint32_dec(_a);
|
||||
const uint32_t tmp1 = uint32_srl(tmp0, 1);
|
||||
const uint32_t tmp2 = uint32_or(tmp0, tmp1);
|
||||
const uint32_t tmp3 = uint32_srl(tmp2, 2);
|
||||
const uint32_t tmp4 = uint32_or(tmp2, tmp3);
|
||||
const uint32_t tmp5 = uint32_srl(tmp4, 4);
|
||||
const uint32_t tmp6 = uint32_or(tmp4, tmp5);
|
||||
const uint32_t tmp7 = uint32_srl(tmp6, 8);
|
||||
const uint32_t tmp8 = uint32_or(tmp6, tmp7);
|
||||
const uint32_t tmp9 = uint32_srl(tmp8, 16);
|
||||
const uint32_t tmpA = uint32_or(tmp8, tmp9);
|
||||
const uint32_t result = uint32_inc(tmpA);
|
||||
|
||||
return result;
|
||||
}
|
||||
} // namespace bx
|
||||
|
||||
#endif // __BX_UINT32_T_H__
|
||||
Reference in New Issue
Block a user