Initial commit.

This commit is contained in:
Branimir Karadzic
2012-04-03 20:17:55 -07:00
commit 4eb80393d1
30 changed files with 4389 additions and 0 deletions

99
include/bx/blockalloc.h Normal file
View File

@@ -0,0 +1,99 @@
/*
* Copyright 2010-2011 Branimir Karadzic. All rights reserved.
* License: http://www.opensource.org/licenses/BSD-2-Clause
*/
#ifndef __BX_BLOCKALLOC_H__
#define __BX_BLOCKALLOC_H__
#include "bx.h"
namespace bx
{
class BlockAlloc
{
public:
static const uint16_t invalidIndex = 0xffff;
static const uint32_t minElementSize = 2;
BlockAlloc()
: m_data(NULL)
, m_num(0)
, m_size(0)
, m_numFree(0)
, m_freeIndex(invalidIndex)
{
}
BlockAlloc(void* _data, uint16_t _num, uint16_t _size)
: m_data(_data)
, m_num(_num)
, m_size(_size)
, m_numFree(_num)
, m_freeIndex(0)
{
char* data = (char*)_data;
uint16_t* index = (uint16_t*)_data;
for (uint16_t ii = 0; ii < m_num-1; ++ii)
{
*index = ii+1;
data += m_size;
index = (uint16_t*)data;
}
*index = invalidIndex;
}
~BlockAlloc()
{
}
void* alloc()
{
if (invalidIndex == m_freeIndex)
{
return NULL;
}
void* obj = ( (char*)m_data) + m_freeIndex*m_size;
m_freeIndex = *( (uint16_t*)obj);
--m_numFree;
return obj;
}
void free(void* _obj)
{
uint16_t index = getIndex(_obj);
BX_CHECK(index >= 0 && index < m_num, "index %d, m_num %d", index, m_num);
*( (uint16_t*)_obj) = m_freeIndex;
m_freeIndex = index;
++m_numFree;
}
uint16_t getIndex(void* _obj) const
{
return (uint16_t)( ( (char*)_obj - (char*)m_data ) / m_size);
}
uint16_t getNumFree() const
{
return m_numFree;
}
void* getFromIndex(uint16_t _index)
{
return (char*)m_data + _index*m_size;
}
private:
void* m_data;
uint16_t m_num;
uint16_t m_size;
uint16_t m_numFree;
uint16_t m_freeIndex;
};
} // namespace bx
#endif // __BX_BLOCKALLOC_H__

23
include/bx/bx.h Normal file
View File

@@ -0,0 +1,23 @@
/*
* Copyright 2010-2011 Branimir Karadzic. All rights reserved.
* License: http://www.opensource.org/licenses/BSD-2-Clause
*/
#ifndef __BX_H__
#define __BX_H__
#include "platform.h"
#include "macros.h"
namespace bx
{
}// namespace bx
#ifndef BX_NAMESPACE
# define BX_NAMESPACE 0
#elif BX_NAMESPACE
using namespace bx;
#endif // BX_NAMESPACE
#endif // __BX_H__

151
include/bx/commandline.h Normal file
View File

@@ -0,0 +1,151 @@
/*
* Copyright 2010-2011 Branimir Karadzic. All rights reserved.
* License: http://www.opensource.org/licenses/BSD-2-Clause
*/
#ifndef __BX_COMMANDLINE_H__
#define __BX_COMMANDLINE_H__
#include "bx.h"
namespace bx
{
class CommandLine
{
public:
CommandLine()
: m_argc(__argc)
, m_argv(__argv)
{
}
CommandLine(int _argc, char const* const* _argv)
: m_argc(_argc)
, m_argv(_argv)
{
}
const char* findOption(const char _short, const char* _long = NULL, int _numParams = 1)
{
const char* result = _findOption(_short, _long, _numParams);
return result;
}
bool hasArg(const char _short, const char* _long = NULL)
{
const char* arg = findOption(_short, _long, 0);
return NULL != arg;
}
bool hasArg(const char* _long)
{
const char* arg = findOption('\0', _long, 0);
return NULL != arg;
}
bool hasArg(const char*& _value, const char _short, const char* _long = NULL)
{
const char* arg = findOption(_short, _long, 1);
_value = arg;
return NULL != arg;
}
bool hasArg(int& _value, const char _short, const char* _long = NULL)
{
const char* arg = findOption(_short, _long, 1);
if (NULL != arg)
{
_value = atoi(arg);
return true;
}
return false;
}
bool hasArg(unsigned int& _value, const char _short, const char* _long = NULL)
{
const char* arg = findOption(_short, _long, 1);
if (NULL != arg)
{
_value = atoi(arg);
return true;
}
return false;
}
bool hasArg(bool& _value, const char _short, const char* _long = NULL)
{
const char* arg = findOption(_short, _long, 1);
if (NULL != arg)
{
if ('0' == *arg || _stricmp(arg, "false") )
{
_value = false;
}
else if ('0' != *arg || _stricmp(arg, "true") )
{
_value = true;
}
return true;
}
return false;
}
private:
const char* _findOption(const char _short, const char* _long, int _numParams)
{
for (int ii = 0; ii < m_argc; ++ii)
{
const char* arg = m_argv[ii];
if ('-' == *arg)
{
++arg;
if (_short == *arg)
{
if (1 == strlen(arg) )
{
if (0 == _numParams)
{
return "";
}
else if (ii+_numParams < m_argc
&& '-' != *m_argv[ii+1] )
{
return m_argv[ii+1];
}
return NULL;
}
}
else if (NULL != _long
&& '-' == *arg
&& 0 == _stricmp(arg+1, _long) )
{
if (0 == _numParams)
{
return "";
}
else if (ii+_numParams < m_argc
&& '-' != *m_argv[ii+1] )
{
return m_argv[ii+1];
}
return NULL;
}
}
}
return NULL;
}
int m_argc;
char const* const* m_argv;
};
} // namespace bx
#endif /// __BX_COMMANDLINE_H__

19
include/bx/countof.h Normal file
View File

@@ -0,0 +1,19 @@
/*
* Copyright 2010-2011 Branimir Karadzic. All rights reserved.
* License: http://www.opensource.org/licenses/BSD-2-Clause
*/
#ifndef __BX_COUNTOF_H__
#define __BX_COUNTOF_H__
#include "bx.h"
namespace bx
{
// http://cnicholson.net/2011/01/stupid-c-tricks-a-better-sizeof_array/
template<typename T, size_t N> char (&COUNTOF_REQUIRES_ARRAY_ARGUMENT(const T(&)[N]) )[N];
#define countof(x) sizeof(bx::COUNTOF_REQUIRES_ARRAY_ARGUMENT(x) )
} // namespace bx
#endif // __BX_COUNTOF_H__

110
include/bx/cpu.h Normal file
View File

@@ -0,0 +1,110 @@
/*
* Copyright 2010-2012 Branimir Karadzic. All rights reserved.
* License: http://www.opensource.org/licenses/BSD-2-Clause
*/
#ifndef __BX_CPU_H__
#define __BX_CPU_H__
#include "bx.h"
#if BX_COMPILER_MSVC
# if BX_PLATFORM_XBOX360
# include <ppcintrinsics.h>
# include <xtl.h>
# else
# include <math.h> // math.h is included because VS bitches:
// warning C4985: 'ceil': attributes not present on previous declaration.
// must be included before intrin.h.
# include <intrin.h>
# include <windows.h>
# endif // !BX_PLATFORM_XBOX360
extern "C" void _ReadBarrier();
extern "C" void _WriteBarrier();
extern "C" void _ReadWriteBarrier();
# pragma intrinsic(_ReadBarrier)
# pragma intrinsic(_WriteBarrier)
# pragma intrinsic(_ReadWriteBarrier)
# pragma intrinsic(_InterlockedIncrement)
# pragma intrinsic(_InterlockedDecrement)
#endif // BX_COMPILER_MSVC
namespace bx
{
#if BX_COMPILER_MSVC
# define BX_CACHE_LINE_ALIGN_MARKER() __declspec(align(BX_CACHE_LINE_SIZE) ) struct {}
#else
# define BX_CACHE_LINE_ALIGN_MARKER() struct {} __attribute__( (__aligned__(BX_CACHE_LINE_SIZE) ) )
#endif // BX_COMPILER_
#define BX_CACHE_LINE_ALIGN(_def) BX_CACHE_LINE_ALIGN_MARKER(); _def; BX_CACHE_LINE_ALIGN_MARKER()
inline void readBarrier()
{
#if BX_COMPILER_MSVC
_ReadBarrier();
#elif BX_COMPILER_GCC || BX_COMPILER_CLANG
asm volatile("":::"memory");
#endif // BX_COMPILER
}
inline void writeBarrier()
{
#if BX_COMPILER_MSVC
_WriteBarrier();
#elif BX_COMPILER_GCC || BX_COMPILER_CLANG
asm volatile("":::"memory");
#endif // BX_COMPILER
}
inline void readWriteBarrier()
{
#if BX_COMPILER_MSVC
_ReadWriteBarrier();
#elif BX_COMPILER_GCC || BX_COMPILER_CLANG
asm volatile("":::"memory");
#endif // BX_COMPILER
}
inline void memoryBarrier()
{
#if BX_PLATFORM_XBOX360
__lwsync();
#elif BX_COMPILER_MSVC
_mm_mfence();
#else
__sync_synchronize();
// asm volatile("mfence":::"memory");
#endif // BX_COMPILER
}
inline int32_t atomicIncr(volatile void* _var)
{
#if BX_COMPILER_MSVC
return _InterlockedIncrement( (volatile LONG*)(_var) );
#elif BX_COMPILER_GCC || BX_COMPILER_CLANG
return __sync_fetch_and_add( (volatile int32_t*)_var, 1);
#endif // BX_COMPILER
}
inline int32_t atomicDecr(volatile void* _var)
{
#if BX_COMPILER_MSVC
return _InterlockedDecrement( (volatile LONG*)(_var) );
#elif BX_COMPILER_GCC || BX_COMPILER_CLANG
return __sync_fetch_and_sub( (volatile int32_t*)_var, 1);
#endif // BX_COMPILER
}
inline void* atomicExchangePtr(void** _target, void* _ptr)
{
#if BX_COMPILER_MSVC
return InterlockedExchangePointer(_target, _ptr);
#elif BX_COMPILER_GCC || BX_COMPILER_CLANG
return __sync_lock_test_and_set(_target, _ptr);
#endif // BX_COMPILER
}
} // namespace bx
#endif // __BX_CPU_H__

31
include/bx/debug.h Normal file
View File

@@ -0,0 +1,31 @@
/*
* Copyright 2010-2012 Branimir Karadzic. All rights reserved.
* License: http://www.opensource.org/licenses/BSD-2-Clause
*/
#ifndef __BX_DEBUG_H__
#define __BX_DEBUG_H__
#include "bx.h"
namespace bx
{
inline void debugBreak()
{
#if BX_COMPILER_MSVC
__debugbreak();
#elif BX_CPU_ARM
asm("bkpt 0");
#elif !BX_PLATFORM_NACL && BX_CPU_X86 && (BX_COMPILER_GCC || BX_COMPILER_CLANG)
// NaCl doesn't like int 3:
// NativeClient: NaCl module load failed: Validation failure. File violates Native Client safety rules.
__asm__ ("int $3");
#else // cross platform implementation
int* int3 = (int*)3L;
*int3 = 3;
#endif // BX
}
} // namespace bx
#endif // __BX_DEBUG_H__

71
include/bx/endian.h Normal file
View File

@@ -0,0 +1,71 @@
/*
* Copyright 2010-2011 Branimir Karadzic. All rights reserved.
* License: http://www.opensource.org/licenses/BSD-2-Clause
*/
#ifndef __BX_ENDIAN_H__
#define __BX_ENDIAN_H__
#include "bx.h"
namespace bx
{
inline uint16_t endianSwap(uint16_t _in)
{
return (_in>>8) | (_in<<8);
}
inline uint32_t endianSwap(uint32_t _in)
{
return (_in>>24) | (_in<<24)
| ( (_in&0x00ff0000)>>8) | ( (_in&0x0000ff00)<<8)
;
}
inline uint64_t endianSwap(uint64_t _in)
{
return (_in>>56) | (_in<<56)
| ( (_in&UINT64_C(0x00ff000000000000) )>>40) | ( (_in&UINT64_C(0x000000000000ff00) )<<40)
| ( (_in&UINT64_C(0x0000ff0000000000) )>>24) | ( (_in&UINT64_C(0x0000000000ff0000) )<<24)
| ( (_in&UINT64_C(0x000000ff00000000) )>>8) | ( (_in&UINT64_C(0x00000000ff000000) )<<8)
;
}
inline int16_t endianSwap(int16_t _in)
{
return (int16_t)endianSwap( (uint16_t)_in);
}
inline int32_t endianSwap(int32_t _in)
{
return (int32_t)endianSwap( (uint32_t)_in);
}
inline int64_t endianSwap(int64_t _in)
{
return (int64_t)endianSwap( (uint64_t)_in);
}
template <typename Ty>
inline Ty littleEndian(Ty& _in)
{
#if BX_CPU_ENDIAN_BIG
endianSwap(_in);
#else
return _in;
#endif // BX_CPU_ENDIAN_BIG
}
template <typename Ty>
inline Ty bigEndian(Ty& _in)
{
#if BX_CPU_ENDIAN_LITTLE
return endianSwap(_in);
#else
return _in;
#endif // BX_CPU_ENDIAN_LITTLE
}
} // namespace bx
#endif // __BX_ENDIAN_H__

227
include/bx/float4_neon.h Normal file
View File

@@ -0,0 +1,227 @@
/*
* Copyright 2010-2012 Branimir Karadzic. All rights reserved.
* License: http://www.opensource.org/licenses/BSD-2-Clause
*/
#ifndef __BX_FLOAT4_NEON_H__
#define __BX_FLOAT4_NEON_H__
#include <arm_neon.h>
namespace bx
{
// Reference:
// http://gcc.gnu.org/onlinedocs/gcc/ARM-NEON-Intrinsics.html
// http://blogs.arm.com/software-enablement/161-coding-for-neon-part-1-load-and-stores/
// http://blogs.arm.com/software-enablement/196-coding-for-neon-part-2-dealing-with-leftovers/
// http://blogs.arm.com/software-enablement/241-coding-for-neon-part-3-matrix-multiplication/
// http://blogs.arm.com/software-enablement/277-coding-for-neon-part-4-shifting-left-and-right/
// http://blogs.arm.com/software-enablement/684-coding-for-neon-part-5-rearranging-vectors/
typedef __builtin_neon_sf float4_t __attribute__( (__vector_size__(16) ) );
#define ELEMx 0
#define ELEMy 1
#define ELEMz 2
#define ELEMw 3
#define IMPLEMENT_SWIZZLE(_x, _y, _z, _w) \
BX_FLOAT4_INLINE float4_t float4_swiz_##_x##_y##_z##_w(float4_t _a) \
{ \
float4_t result; \
result.ixyzw[0] = _a.ixyzw[ELEM##_x]; \
result.ixyzw[1] = _a.ixyzw[ELEM##_y]; \
result.ixyzw[2] = _a.ixyzw[ELEM##_z]; \
result.ixyzw[3] = _a.ixyzw[ELEM##_w]; \
return result; \
}
#include "float4_swizzle.inl"
#undef IMPLEMENT_SWIZZLE
#undef ELEMw
#undef ELEMz
#undef ELEMy
#undef ELEMx
BX_FLOAT4_INLINE float4_t float4_shuf_xyAB(float4_t _a, float4_t _b)
{
return _a; //_mm_movelh_ps(_a, _b);
}
BX_FLOAT4_INLINE float4_t float4_shuf_ABxy(float4_t _a, float4_t _b)
{
return _a; //_mm_movelh_ps(_b, _a);
}
BX_FLOAT4_INLINE float4_t float4_shuf_CDzw(float4_t _a, float4_t _b)
{
return _a; //_mm_movehl_ps(_a, _b);
}
BX_FLOAT4_INLINE float4_t float4_shuf_zwCD(float4_t _a, float4_t _b)
{
return _a; //_mm_movehl_ps(_b, _a);
}
BX_FLOAT4_INLINE float4_t float4_shuf_xAyB(float4_t _a, float4_t _b)
{
return _a; //_mm_unpacklo_ps(_a, _b);
}
BX_FLOAT4_INLINE float4_t float4_shuf_yBxA(float4_t _a, float4_t _b)
{
return _a; //_mm_unpacklo_ps(_b, _a);
}
BX_FLOAT4_INLINE float4_t float4_shuf_zCwD(float4_t _a, float4_t _b)
{
return _a; //_mm_unpackhi_ps(_a, _b);
}
BX_FLOAT4_INLINE float4_t float4_shuf_CzDw(float4_t _a, float4_t _b)
{
return _a; //_mm_unpackhi_ps(_b, _a);
}
BX_FLOAT4_INLINE float float4_x(float4_t _a)
{
return _a.fxyzw[0];
}
BX_FLOAT4_INLINE float float4_y(float4_t _a)
{
return _a.fxyzw[1];
}
BX_FLOAT4_INLINE float float4_z(float4_t _a)
{
return _a.fxyzw[2];
}
BX_FLOAT4_INLINE float float4_w(float4_t _a)
{
return _a.fxyzw[3];
}
BX_FLOAT4_INLINE float4_t float4_ld(float _x, float _y, float _z, float _w)
{
const float32_t val[4] = {_x, _y, _z, _w};
return __builtin_neon_vld1v4sf(val);
}
BX_FLOAT4_INLINE float4_t float4_ild(uint32_t _x, uint32_t _y, uint32_t _z, uint32_t _w)
{
const uint32_t val[4] = {_x, _y, _z, _w};
return (float4_t)__builtin_neon_vld1v4si( (const __builtin_neon_si*)val);
}
BX_FLOAT4_INLINE float4_t float4_splat(float _a)
{
return __builtin_neon_vdup_nv4sf(_a);
}
BX_FLOAT4_INLINE float4_t float4_isplat(uint32_t _a)
{
return (float4_t)__builtin_neon_vdup_nv4si( (__builtin_neon_si)_a);
}
BX_FLOAT4_INLINE float4_t float4_zero()
{
return vdupq_n_f32(0.0f);
}
BX_FLOAT4_INLINE float4_t float4_add(float4_t _a, float4_t _b)
{
return vaddq_f32(_a, _b);
}
BX_FLOAT4_INLINE float4_t float4_sub(float4_t _a, float4_t _b)
{
return vsubq_f32(_a, _b);
}
BX_FLOAT4_INLINE float4_t float4_mul(float4_t _a, float4_t _b)
{
return vmulq_f32(_a, _b);
}
BX_FLOAT4_INLINE float4_t float4_rcp_est(float4_t _a)
{
return vrecpeq_f32(_a);
}
BX_FLOAT4_INLINE float4_t float4_rsqrt_est(float4_t _a)
{
return vrsqrteq_f32(_a);
}
BX_FLOAT4_INLINE float4_t float4_and(float4_t _a, float4_t _b)
{
return (float4_t)__builtin_neon_vandv4si( (int32x4_t)_a, (int32x4_t)_b, 0);
}
//BX_FLOAT4_INLINE float4_t float4_andc(float4_t _a, float4_t _b)
//{
// return _mm_andnot_ps(_b, _a);
//}
BX_FLOAT4_INLINE float4_t float4_or(float4_t _a, float4_t _b)
{
return (float4_t)__builtin_neon_vorrv4si( (int32x4_t)_a, (int32x4_t)_b, 0);
}
BX_FLOAT4_INLINE float4_t float4_iadd(float4_t _a, float4_t _b)
{
const uint32x4_t tmp0 = vreinterpretq_u32_f32(_a);
const uint32x4_t tmp1 = vreinterpretq_u32_f32(_b);
const uint32x4_t add = vaddq_u32(tmp0, tmp1);
const float4_t result = vreinterpretq_f32_u32(add);
return result;
}
BX_FLOAT4_INLINE float4_t float4_isub(float4_t _a, float4_t _b)
{
const uint32x4_t tmp0 = vreinterpretq_u32_f32(_a);
const uint32x4_t tmp1 = vreinterpretq_u32_f32(_b);
const uint32x4_t sub = vsubq_u32(tmp0, tmp1);
const float4_t result = vreinterpretq_f32_u32(sub);
return result;
}
BX_FLOAT4_INLINE float4_t float4_sll(float4_t _a, int _count)
{
const uint32x4_t tmp = vreinterpretq_u32_f32(_a);
const uint32x4_t shift = vshlq_n_u32(tmp, _count);
const float4_t result = vreinterpretq_f32_u32(shift);
return result;
}
BX_FLOAT4_INLINE float4_t float4_srl(float4_t _a, int _count)
{
const uint32x4_t tmp = vreinterpretq_i32_f32(_a);
const uint32x4_t shift = (uint32x4_t)__builtin_neon_vshr_nv4si( (int32x4_t)tmp, _count, 0);
const float4_t result = vreinterpretq_f32_u32(shift);
return result;
}
BX_FLOAT4_INLINE float4_t float4_sra(float4_t _a, int _count)
{
const int32x4_t a = vreinterpretq_s32_f32(_a);
const int32x4_t shift = __builtin_neon_vshr_nv4si(a, _count, 1);
const float4_t result = vreinterpretq_f32_s32(shift);
return result;
}
} // namespace bx
#define float4_div_nr float4_div_nr_ni
#define float4_div float4_div_nr_ni
#include "float4_ni.h"
#endif // __BX_FLOAT4_NEON_H__

407
include/bx/float4_ni.h Normal file
View File

@@ -0,0 +1,407 @@
/*
* Copyright 2010-2012 Branimir Karadzic. All rights reserved.
* License: http://www.opensource.org/licenses/BSD-2-Clause
*/
#ifndef __BX_FLOAT4_NI_H__
#define __BX_FLOAT4_NI_H__
namespace bx
{
BX_FLOAT4_INLINE float4_t float4_shuf_xAzC_ni(float4_t _a, float4_t _b)
{
const float4_t xAyB = float4_shuf_xAyB(_a, _b);
const float4_t zCwD = float4_shuf_zCwD(_a, _b);
const float4_t result = float4_shuf_xyAB(xAyB, zCwD);
return result;
}
BX_FLOAT4_INLINE float4_t float4_shuf_yBwD_ni(float4_t _a, float4_t _b)
{
const float4_t xAyB = float4_shuf_xAyB(_a, _b);
const float4_t zCwD = float4_shuf_zCwD(_a, _b);
const float4_t result = float4_shuf_zwCD(xAyB, zCwD);
return result;
}
BX_FLOAT4_INLINE float4_t float4_madd_ni(float4_t _a, float4_t _b, float4_t _c)
{
const float4_t mul = float4_mul(_a, _b);
const float4_t result = float4_add(mul, _c);
return result;
}
BX_FLOAT4_INLINE float4_t float4_nmsub_ni(float4_t _a, float4_t _b, float4_t _c)
{
const float4_t mul = float4_mul(_a, _b);
const float4_t result = float4_sub(_c, mul);
return result;
}
BX_FLOAT4_INLINE float4_t float4_div_nr_ni(float4_t _a, float4_t _b)
{
const float4_t oneish = float4_isplat(0x3f800001);
const float4_t est = float4_rcp_est(_b);
const float4_t iter0 = float4_mul(_a, est);
const float4_t tmp1 = float4_nmsub(_b, est, oneish);
const float4_t result = float4_madd(tmp1, iter0, iter0);
return result;
}
BX_FLOAT4_INLINE float4_t float4_rcp_ni(float4_t _a)
{
const float4_t one = float4_splat(1.0f);
const float4_t result = float4_div(one, _a);
return result;
}
BX_FLOAT4_INLINE float4_t float4_orx_ni(float4_t _a)
{
const float4_t zwxy = float4_swiz_zwxy(_a);
const float4_t tmp0 = float4_or(_a, zwxy);
const float4_t tmp1 = float4_swiz_yyyy(_a);
const float4_t tmp2 = float4_or(tmp0, tmp1);
const float4_t mf000 = float4_ild(-1, 0, 0, 0);
const float4_t result = float4_and(tmp2, mf000);
return result;
}
BX_FLOAT4_INLINE float4_t float4_orc_ni(float4_t _a, float4_t _b)
{
const float4_t aorb = float4_or(_a, _b);
const float4_t mffff = float4_isplat(-1);
const float4_t result = float4_xor(aorb, mffff);
return result;
}
BX_FLOAT4_INLINE float4_t float4_neg_ni(float4_t _a)
{
const float4_t zero = float4_zero();
const float4_t result = float4_sub(zero, _a);
return result;
}
BX_FLOAT4_INLINE float4_t float4_selb_ni(float4_t _mask, float4_t _a, float4_t _b)
{
const float4_t sel_a = float4_and(_a, _mask);
const float4_t sel_b = float4_andc(_b, _mask);
const float4_t result = float4_or(sel_a, sel_b);
return result;
}
BX_FLOAT4_INLINE float4_t float4_sels_ni(float4_t _test, float4_t _a, float4_t _b)
{
const float4_t mask = float4_sra(_test, 31);
const float4_t result = float4_selb(mask, _a, _b);
return result;
}
BX_FLOAT4_INLINE float4_t float4_not_ni(float4_t _a)
{
const float4_t mffff = float4_isplat(-1);
const float4_t result = float4_xor(_a, mffff);
return result;
}
BX_FLOAT4_INLINE float4_t float4_abs_ni(float4_t _a)
{
const float4_t a_neg = float4_neg(_a);
const float4_t result = float4_max(a_neg, _a);
return result;
}
BX_FLOAT4_INLINE float4_t float4_clamp_ni(float4_t _a, float4_t _min, float4_t _max)
{
const float4_t tmp = float4_min(_a, _max);
const float4_t result = float4_max(tmp, _min);
return result;
}
BX_FLOAT4_INLINE float4_t float4_lerp_ni(float4_t _a, float4_t _b, float4_t _s)
{
const float4_t ba = float4_sub(_b, _a);
const float4_t result = float4_madd(_s, ba, _a);
return result;
}
BX_FLOAT4_INLINE float4_t float4_sqrt_nr_ni(float4_t _a)
{
const float4_t half = float4_splat(0.5f);
const float4_t one = float4_splat(1.0f);
const float4_t zero = float4_zero();
const float4_t tmp0 = float4_rsqrt_est(_a);
const float4_t tmp1 = float4_madd(tmp0, _a, zero);
const float4_t tmp2 = float4_madd(tmp1, half, zero);
const float4_t tmp3 = float4_nmsub(tmp0, tmp1, one);
const float4_t result = float4_madd(tmp3, tmp2, tmp1);
return result;
}
BX_FLOAT4_INLINE float4_t float4_rsqrt_ni(float4_t _a)
{
const float4_t one = float4_splat(1.0f);
const float4_t sqrt = float4_sqrt(_a);
const float4_t result = float4_div(one, sqrt);
return result;
}
BX_FLOAT4_INLINE float4_t float4_rsqrt_nr_ni(float4_t _a)
{
const float4_t rsqrt = float4_rsqrt_est(_a);
const float4_t iter0 = float4_mul(_a, rsqrt);
const float4_t iter1 = float4_mul(iter0, rsqrt);
const float4_t half = float4_splat(0.5f);
const float4_t half_rsqrt = float4_mul(half, rsqrt);
const float4_t three = float4_splat(3.0f);
const float4_t three_sub_iter1 = float4_sub(three, iter1);
const float4_t result = float4_mul(half_rsqrt, three_sub_iter1);
return result;
}
BX_FLOAT4_INLINE float4_t float4_rsqrt_carmack_ni(float4_t _a)
{
const float4_t half = float4_splat(0.5f);
const float4_t ah = float4_mul(half, _a);
const float4_t ashift = float4_sra(_a, 1);
const float4_t magic = float4_isplat(0x5f3759df);
const float4_t msuba = float4_isub(magic, ashift);
const float4_t msubasq = float4_mul(msuba, msuba);
const float4_t tmp0 = float4_splat(1.5f);
const float4_t tmp1 = float4_mul(ah, msubasq);
const float4_t tmp2 = float4_sub(tmp0, tmp1);
const float4_t result = float4_mul(msuba, tmp2);
return result;
}
namespace float4_logexp_detail
{
BX_FLOAT4_INLINE float4_t float4_poly0(float4_t _a, float _b)
{
return float4_splat(_b);
}
BX_FLOAT4_INLINE float4_t float4_poly1(float4_t _a, float _b, float _c)
{
const float4_t bbbb = float4_splat(_b);
const float4_t poly0 = float4_poly0(_a, _c);
const float4_t result = float4_madd(poly0, _a, bbbb);
return result;
}
BX_FLOAT4_INLINE float4_t float4_poly2(float4_t _a, float _b, float _c, float _d)
{
const float4_t bbbb = float4_splat(_b);
const float4_t poly = float4_poly1(_a, _c, _d);
const float4_t result = float4_madd(poly, _a, bbbb);
return result;
}
BX_FLOAT4_INLINE float4_t float4_poly3(float4_t _a, float _b, float _c, float _d, float _e)
{
const float4_t bbbb = float4_splat(_b);
const float4_t poly = float4_poly2(_a, _c, _d, _e);
const float4_t result = float4_madd(poly, _a, bbbb);
return result;
}
BX_FLOAT4_INLINE float4_t float4_poly4(float4_t _a, float _b, float _c, float _d, float _e, float _f)
{
const float4_t bbbb = float4_splat(_b);
const float4_t poly = float4_poly3(_a, _c, _d, _e, _f);
const float4_t result = float4_madd(poly, _a, bbbb);
return result;
}
BX_FLOAT4_INLINE float4_t float4_poly5(float4_t _a, float _b, float _c, float _d, float _e, float _f, float _g)
{
const float4_t bbbb = float4_splat(_b);
const float4_t poly = float4_poly4(_a, _c, _d, _e, _f, _g);
const float4_t result = float4_madd(poly, _a, bbbb);
return result;
}
BX_FLOAT4_INLINE float4_t float4_logpoly(float4_t _a)
{
#if 1
const float4_t result = float4_poly5(_a
, 3.11578814719469302614f, -3.32419399085241980044f
, 2.59883907202499966007f, -1.23152682416275988241f
, 0.318212422185251071475f, -0.0344359067839062357313f
);
#elif 0
const float4_t result = float4_poly4(_a
, 2.8882704548164776201f, -2.52074962577807006663f
, 1.48116647521213171641f, -0.465725644288844778798f
, 0.0596515482674574969533f
);
#elif 0
const float4_t result = float4_poly3(_a
, 2.61761038894603480148f, -1.75647175389045657003f
, 0.688243882994381274313f, -0.107254423828329604454f
);
#else
const float4_t result = float4_poly2(_a
, 2.28330284476918490682f, -1.04913055217340124191f
, 0.204446009836232697516f
);
#endif
return result;
}
BX_FLOAT4_INLINE float4_t float4_exppoly(float4_t _a)
{
#if 1
const float4_t result = float4_poly5(_a
, 9.9999994e-1f, 6.9315308e-1f
, 2.4015361e-1f, 5.5826318e-2f
, 8.9893397e-3f, 1.8775767e-3f
);
#elif 0
const float4_t result = float4_poly4(_a
, 1.0000026f, 6.9300383e-1f
, 2.4144275e-1f, 5.2011464e-2f
, 1.3534167e-2f
);
#elif 0
const float4_t result = float4_poly3(_a
, 9.9992520e-1f, 6.9583356e-1f
, 2.2606716e-1f, 7.8024521e-2f
);
#else
const float4_t result = float4_poly2(_a
, 1.0017247f, 6.5763628e-1f
, 3.3718944e-1f
);
#endif // 0
return result;
}
} // namespace float4_internal
BX_FLOAT4_INLINE float4_t float4_log2_ni(float4_t _a)
{
const float4_t expmask = float4_isplat(0x7f800000);
const float4_t mantmask = float4_isplat(0x007fffff);
const float4_t one = float4_splat(1.0f);
const float4_t c127 = float4_isplat(127);
const float4_t aexp = float4_and(_a, expmask);
const float4_t aexpsr = float4_srl(aexp, 23);
const float4_t tmp0 = float4_isub(aexpsr, c127);
const float4_t exp = float4_itof(tmp0);
const float4_t amask = float4_and(_a, mantmask);
const float4_t mant = float4_or(amask, one);
const float4_t poly = float4_logexp_detail::float4_logpoly(mant);
const float4_t mandiff = float4_sub(mant, one);
const float4_t result = float4_madd(poly, mandiff, exp);
return result;
}
BX_FLOAT4_INLINE float4_t float4_exp2_ni(float4_t _a)
{
const float4_t min = float4_splat( 129.0f);
const float4_t max = float4_splat(-126.99999f);
const float4_t tmp0 = float4_min(_a, min);
const float4_t aaaa = float4_max(tmp0, max);
const float4_t half = float4_splat(0.5f);
const float4_t tmp2 = float4_sub(aaaa, half);
const float4_t ipart = float4_ftoi(tmp2);
const float4_t iround = float4_itof(ipart);
const float4_t fpart = float4_sub(aaaa, iround);
const float4_t c127 = float4_isplat(127);
const float4_t tmp5 = float4_iadd(ipart, c127);
const float4_t expipart = float4_sll(tmp5, 23);
const float4_t expfpart = float4_logexp_detail::float4_exppoly(fpart);
const float4_t result = float4_mul(expipart, expfpart);
return result;
}
BX_FLOAT4_INLINE float4_t float4_pow_ni(float4_t _a, float4_t _b)
{
const float4_t alog2 = float4_log2(_a);
const float4_t alog2b = float4_mul(alog2, _b);
const float4_t result = float4_exp2(alog2b);
return result;
}
BX_FLOAT4_INLINE float4_t float4_dot3_ni(float4_t _a, float4_t _b)
{
const float4_t xyzw = float4_mul(_a, _b);
const float4_t xxxx = float4_swiz_xxxx(xyzw);
const float4_t yyyy = float4_swiz_yyyy(xyzw);
const float4_t zzzz = float4_swiz_zzzz(xyzw);
const float4_t tmp1 = float4_add(xxxx, yyyy);
const float4_t result = float4_add(zzzz, tmp1);
return result;
}
BX_FLOAT4_INLINE float4_t float4_cross3_ni(float4_t _a, float4_t _b)
{
const float4_t a_yzxw = float4_swiz_yzxw(_a);
const float4_t a_zxyw = float4_swiz_zxyw(_a);
const float4_t b_zxyw = float4_swiz_zxyw(_b);
const float4_t b_yzxw = float4_swiz_yzxw(_b);
const float4_t tmp = float4_mul(a_yzxw, b_zxyw);
const float4_t result = float4_nmsub(a_zxyw, b_yzxw, tmp);
return result;
}
BX_FLOAT4_INLINE float4_t float4_normalize3_ni(float4_t _a)
{
const float4_t dot3 = float4_dot3(_a, _a);
const float4_t invSqrt = float4_rsqrt(dot3);
const float4_t result = float4_mul(_a, invSqrt);
return result;
}
BX_FLOAT4_INLINE float4_t float4_dot_ni(float4_t _a, float4_t _b)
{
const float4_t xyzw = float4_mul(_a, _b);
const float4_t yzwx = float4_swiz_yzwx(xyzw);
const float4_t tmp0 = float4_add(xyzw, yzwx);
const float4_t zwxy = float4_swiz_zwxy(tmp0);
const float4_t result = float4_add(tmp0, zwxy);
return result;
}
} // namespace bx
#endif // __BX_FLOAT4_NI_H__

522
include/bx/float4_ref.h Normal file
View File

@@ -0,0 +1,522 @@
/*
* Copyright 2010-2012 Branimir Karadzic. All rights reserved.
* License: http://www.opensource.org/licenses/BSD-2-Clause
*/
#ifndef __BX_FLOAT4_REF_H__
#define __BX_FLOAT4_REF_H__
#include <math.h> // sqrtf
namespace bx
{
typedef union float4_t
{
int32_t ixyzw[4];
uint32_t uxyzw[4];
float fxyzw[4];
} float4_t;
#define ELEMx 0
#define ELEMy 1
#define ELEMz 2
#define ELEMw 3
#define IMPLEMENT_SWIZZLE(_x, _y, _z, _w) \
BX_FLOAT4_INLINE float4_t float4_swiz_##_x##_y##_z##_w(float4_t _a) \
{ \
float4_t result; \
result.ixyzw[0] = _a.ixyzw[ELEM##_x]; \
result.ixyzw[1] = _a.ixyzw[ELEM##_y]; \
result.ixyzw[2] = _a.ixyzw[ELEM##_z]; \
result.ixyzw[3] = _a.ixyzw[ELEM##_w]; \
return result; \
}
#include "float4_swizzle.inl"
#undef IMPLEMENT_SWIZZLE
#undef ELEMw
#undef ELEMz
#undef ELEMy
#undef ELEMx
#define IMPLEMENT_TEST(_xyzw, _mask) \
BX_FLOAT4_INLINE bool float4_test_any_##_xyzw(float4_t _test) \
{ \
uint32_t tmp = ( (_test.uxyzw[3]>>31)<<3) \
| ( (_test.uxyzw[2]>>31)<<2) \
| ( (_test.uxyzw[1]>>31)<<1) \
| (_test.uxyzw[0]>>31) \
; \
return 0 != (tmp&(_mask) ); \
} \
\
BX_FLOAT4_INLINE bool float4_test_all_##_xyzw(float4_t _test) \
{ \
uint32_t tmp = ( (_test.uxyzw[3]>>31)<<3) \
| ( (_test.uxyzw[2]>>31)<<2) \
| ( (_test.uxyzw[1]>>31)<<1) \
| (_test.uxyzw[0]>>31) \
; \
return (_mask) == (tmp&(_mask) ); \
}
IMPLEMENT_TEST(x , 0x1);
IMPLEMENT_TEST(y , 0x2);
IMPLEMENT_TEST(xy , 0x3);
IMPLEMENT_TEST(z , 0x4);
IMPLEMENT_TEST(xz , 0x5);
IMPLEMENT_TEST(yz , 0x6);
IMPLEMENT_TEST(xyz , 0x7);
IMPLEMENT_TEST(w , 0x8);
IMPLEMENT_TEST(xw , 0x9);
IMPLEMENT_TEST(yw , 0xa);
IMPLEMENT_TEST(xyw , 0xb);
IMPLEMENT_TEST(zw , 0xc);
IMPLEMENT_TEST(xzw , 0xd);
IMPLEMENT_TEST(yzw , 0xe);
IMPLEMENT_TEST(xyzw , 0xf);
#undef IMPLEMENT_TEST
BX_FLOAT4_INLINE float4_t float4_shuf_xyAB(float4_t _a, float4_t _b)
{
float4_t result;
result.uxyzw[0] = _a.uxyzw[0];
result.uxyzw[1] = _a.uxyzw[1];
result.uxyzw[2] = _b.uxyzw[0];
result.uxyzw[3] = _b.uxyzw[1];
return result;
}
BX_FLOAT4_INLINE float4_t float4_shuf_ABxy(float4_t _a, float4_t _b)
{
float4_t result;
result.uxyzw[0] = _b.uxyzw[0];
result.uxyzw[1] = _b.uxyzw[1];
result.uxyzw[2] = _a.uxyzw[0];
result.uxyzw[3] = _a.uxyzw[1];
return result;
}
BX_FLOAT4_INLINE float4_t float4_shuf_CDzw(float4_t _a, float4_t _b)
{
float4_t result;
result.uxyzw[0] = _b.uxyzw[2];
result.uxyzw[1] = _b.uxyzw[3];
result.uxyzw[2] = _a.uxyzw[2];
result.uxyzw[3] = _a.uxyzw[3];
return result;
}
BX_FLOAT4_INLINE float4_t float4_shuf_zwCD(float4_t _a, float4_t _b)
{
float4_t result;
result.uxyzw[0] = _a.uxyzw[2];
result.uxyzw[1] = _a.uxyzw[3];
result.uxyzw[2] = _b.uxyzw[2];
result.uxyzw[3] = _b.uxyzw[3];
return result;
}
BX_FLOAT4_INLINE float4_t float4_shuf_xAyB(float4_t _a, float4_t _b)
{
float4_t result;
result.uxyzw[0] = _a.uxyzw[0];
result.uxyzw[1] = _b.uxyzw[0];
result.uxyzw[2] = _a.uxyzw[1];
result.uxyzw[3] = _b.uxyzw[1];
return result;
}
BX_FLOAT4_INLINE float4_t float4_shuf_yBxA(float4_t _a, float4_t _b)
{
float4_t result;
result.uxyzw[0] = _a.uxyzw[1];
result.uxyzw[1] = _b.uxyzw[1];
result.uxyzw[2] = _a.uxyzw[0];
result.uxyzw[3] = _b.uxyzw[0];
return result;
}
BX_FLOAT4_INLINE float4_t float4_shuf_zCwD(float4_t _a, float4_t _b)
{
float4_t result;
result.uxyzw[0] = _a.uxyzw[2];
result.uxyzw[1] = _b.uxyzw[2];
result.uxyzw[2] = _a.uxyzw[3];
result.uxyzw[3] = _b.uxyzw[3];
return result;
}
BX_FLOAT4_INLINE float4_t float4_shuf_CzDw(float4_t _a, float4_t _b)
{
float4_t result;
result.uxyzw[0] = _b.uxyzw[2];
result.uxyzw[1] = _a.uxyzw[2];
result.uxyzw[2] = _b.uxyzw[3];
result.uxyzw[3] = _a.uxyzw[3];
return result;
}
BX_FLOAT4_INLINE float float4_x(float4_t _a)
{
return _a.fxyzw[0];
}
BX_FLOAT4_INLINE float float4_y(float4_t _a)
{
return _a.fxyzw[1];
}
BX_FLOAT4_INLINE float float4_z(float4_t _a)
{
return _a.fxyzw[2];
}
BX_FLOAT4_INLINE float float4_w(float4_t _a)
{
return _a.fxyzw[3];
}
BX_FLOAT4_INLINE float4_t float4_ld(const void* _ptr)
{
return *reinterpret_cast<const float4_t*>(_ptr);
}
BX_FLOAT4_INLINE void float4_st(void* _ptr, float4_t _a)
{
*reinterpret_cast<float4_t*>(_ptr) = _a;
}
BX_FLOAT4_INLINE float4_t float4_ld(float _x, float _y, float _z, float _w)
{
float4_t result;
result.fxyzw[0] = _x;
result.fxyzw[1] = _y;
result.fxyzw[2] = _z;
result.fxyzw[3] = _w;
return result;
}
BX_FLOAT4_INLINE float4_t float4_ild(uint32_t _x, uint32_t _y, uint32_t _z, uint32_t _w)
{
float4_t result;
result.uxyzw[0] = _x;
result.uxyzw[1] = _y;
result.uxyzw[2] = _z;
result.uxyzw[3] = _w;
return result;
}
BX_FLOAT4_INLINE float4_t float4_splat(const void* _ptr)
{
float val = *reinterpret_cast<const float*>(_ptr);
return float4_ld(val, val, val, val);
}
BX_FLOAT4_INLINE float4_t float4_splat(float _a)
{
return float4_ld(_a, _a, _a, _a);
}
BX_FLOAT4_INLINE float4_t float4_isplat(uint32_t _a)
{
return float4_ild(_a, _a, _a, _a);
}
BX_FLOAT4_INLINE float4_t float4_zero()
{
return float4_ild(0, 0, 0, 0);
}
BX_FLOAT4_INLINE float4_t float4_itof(float4_t _a)
{
float4_t result;
result.fxyzw[0] = (float)result.ixyzw[0];
result.fxyzw[1] = (float)result.ixyzw[1];
result.fxyzw[2] = (float)result.ixyzw[2];
result.fxyzw[3] = (float)result.ixyzw[3];
return result;
}
BX_FLOAT4_INLINE float4_t float4_ftoi(float4_t _a)
{
float4_t result;
result.ixyzw[0] = (int)result.fxyzw[0];
result.ixyzw[1] = (int)result.fxyzw[1];
result.ixyzw[2] = (int)result.fxyzw[2];
result.ixyzw[3] = (int)result.fxyzw[3];
return result;
}
BX_FLOAT4_INLINE float4_t float4_round(float4_t _a)
{
const float4_t tmp = float4_ftoi(_a);
const float4_t result = float4_itof(tmp);
return result;
}
BX_FLOAT4_INLINE float4_t float4_add(float4_t _a, float4_t _b)
{
float4_t result;
result.fxyzw[0] = _a.fxyzw[0] + _b.fxyzw[0];
result.fxyzw[1] = _a.fxyzw[1] + _b.fxyzw[1];
result.fxyzw[2] = _a.fxyzw[2] + _b.fxyzw[2];
result.fxyzw[3] = _a.fxyzw[3] + _b.fxyzw[3];
return result;
}
BX_FLOAT4_INLINE float4_t float4_sub(float4_t _a, float4_t _b)
{
float4_t result;
result.fxyzw[0] = _a.fxyzw[0] - _b.fxyzw[0];
result.fxyzw[1] = _a.fxyzw[1] - _b.fxyzw[1];
result.fxyzw[2] = _a.fxyzw[2] - _b.fxyzw[2];
result.fxyzw[3] = _a.fxyzw[3] - _b.fxyzw[3];
return result;
}
BX_FLOAT4_INLINE float4_t float4_mul(float4_t _a, float4_t _b)
{
float4_t result;
result.fxyzw[0] = _a.fxyzw[0] * _b.fxyzw[0];
result.fxyzw[1] = _a.fxyzw[1] * _b.fxyzw[1];
result.fxyzw[2] = _a.fxyzw[2] * _b.fxyzw[2];
result.fxyzw[3] = _a.fxyzw[3] * _b.fxyzw[3];
return result;
}
BX_FLOAT4_INLINE float4_t float4_div(float4_t _a, float4_t _b)
{
float4_t result;
result.fxyzw[0] = _a.fxyzw[0] * _b.fxyzw[0];
result.fxyzw[1] = _a.fxyzw[1] * _b.fxyzw[1];
result.fxyzw[2] = _a.fxyzw[2] * _b.fxyzw[2];
result.fxyzw[3] = _a.fxyzw[3] * _b.fxyzw[3];
return result;
}
BX_FLOAT4_INLINE float4_t float4_rcp_est(float4_t _a)
{
float4_t result;
result.fxyzw[0] = 1.0f / _a.fxyzw[0];
result.fxyzw[1] = 1.0f / _a.fxyzw[1];
result.fxyzw[2] = 1.0f / _a.fxyzw[2];
result.fxyzw[3] = 1.0f / _a.fxyzw[3];
return result;
}
BX_FLOAT4_INLINE float4_t float4_sqrt(float4_t _a)
{
float4_t result;
result.fxyzw[0] = sqrtf(_a.fxyzw[0]);
result.fxyzw[1] = sqrtf(_a.fxyzw[1]);
result.fxyzw[2] = sqrtf(_a.fxyzw[2]);
result.fxyzw[3] = sqrtf(_a.fxyzw[3]);
return result;
}
BX_FLOAT4_INLINE float4_t float4_rsqrt_est(float4_t _a)
{
float4_t result;
result.fxyzw[0] = 1.0f / sqrtf(_a.fxyzw[0]);
result.fxyzw[1] = 1.0f / sqrtf(_a.fxyzw[1]);
result.fxyzw[2] = 1.0f / sqrtf(_a.fxyzw[2]);
result.fxyzw[3] = 1.0f / sqrtf(_a.fxyzw[3]);
return result;
}
BX_FLOAT4_INLINE float4_t float4_cmpeq(float4_t _a, float4_t _b)
{
float4_t result;
result.ixyzw[0] = _a.fxyzw[0] == _b.fxyzw[0] ? 0xffffffff : 0x0;
result.ixyzw[1] = _a.fxyzw[1] == _b.fxyzw[1] ? 0xffffffff : 0x0;
result.ixyzw[2] = _a.fxyzw[2] == _b.fxyzw[2] ? 0xffffffff : 0x0;
result.ixyzw[3] = _a.fxyzw[3] == _b.fxyzw[3] ? 0xffffffff : 0x0;
return result;
}
BX_FLOAT4_INLINE float4_t float4_cmplt(float4_t _a, float4_t _b)
{
float4_t result;
result.ixyzw[0] = _a.fxyzw[0] < _b.fxyzw[0] ? 0xffffffff : 0x0;
result.ixyzw[1] = _a.fxyzw[1] < _b.fxyzw[1] ? 0xffffffff : 0x0;
result.ixyzw[2] = _a.fxyzw[2] < _b.fxyzw[2] ? 0xffffffff : 0x0;
result.ixyzw[3] = _a.fxyzw[3] < _b.fxyzw[3] ? 0xffffffff : 0x0;
return result;
}
BX_FLOAT4_INLINE float4_t float4_cmple(float4_t _a, float4_t _b)
{
float4_t result;
result.ixyzw[0] = _a.fxyzw[0] <= _b.fxyzw[0] ? 0xffffffff : 0x0;
result.ixyzw[1] = _a.fxyzw[1] <= _b.fxyzw[1] ? 0xffffffff : 0x0;
result.ixyzw[2] = _a.fxyzw[2] <= _b.fxyzw[2] ? 0xffffffff : 0x0;
result.ixyzw[3] = _a.fxyzw[3] <= _b.fxyzw[3] ? 0xffffffff : 0x0;
return result;
}
BX_FLOAT4_INLINE float4_t float4_cmpgt(float4_t _a, float4_t _b)
{
float4_t result;
result.ixyzw[0] = _a.fxyzw[0] > _b.fxyzw[0] ? 0xffffffff : 0x0;
result.ixyzw[1] = _a.fxyzw[1] > _b.fxyzw[1] ? 0xffffffff : 0x0;
result.ixyzw[2] = _a.fxyzw[2] > _b.fxyzw[2] ? 0xffffffff : 0x0;
result.ixyzw[3] = _a.fxyzw[3] > _b.fxyzw[3] ? 0xffffffff : 0x0;
return result;
}
BX_FLOAT4_INLINE float4_t float4_cmpge(float4_t _a, float4_t _b)
{
float4_t result;
result.ixyzw[0] = _a.fxyzw[0] >= _b.fxyzw[0] ? 0xffffffff : 0x0;
result.ixyzw[1] = _a.fxyzw[1] >= _b.fxyzw[1] ? 0xffffffff : 0x0;
result.ixyzw[2] = _a.fxyzw[2] >= _b.fxyzw[2] ? 0xffffffff : 0x0;
result.ixyzw[3] = _a.fxyzw[3] >= _b.fxyzw[3] ? 0xffffffff : 0x0;
return result;
}
BX_FLOAT4_INLINE float4_t float4_min(float4_t _a, float4_t _b)
{
float4_t result;
result.fxyzw[0] = _a.fxyzw[0] < _b.fxyzw[0] ? _a.fxyzw[0] : _b.fxyzw[0];
result.fxyzw[1] = _a.fxyzw[1] < _b.fxyzw[1] ? _a.fxyzw[1] : _b.fxyzw[1];
result.fxyzw[2] = _a.fxyzw[2] < _b.fxyzw[2] ? _a.fxyzw[2] : _b.fxyzw[2];
result.fxyzw[3] = _a.fxyzw[3] < _b.fxyzw[3] ? _a.fxyzw[3] : _b.fxyzw[3];
return result;
}
BX_FLOAT4_INLINE float4_t float4_max(float4_t _a, float4_t _b)
{
float4_t result;
result.fxyzw[0] = _a.fxyzw[0] > _b.fxyzw[0] ? _a.fxyzw[0] : _b.fxyzw[0];
result.fxyzw[1] = _a.fxyzw[1] > _b.fxyzw[1] ? _a.fxyzw[1] : _b.fxyzw[1];
result.fxyzw[2] = _a.fxyzw[2] > _b.fxyzw[2] ? _a.fxyzw[2] : _b.fxyzw[2];
result.fxyzw[3] = _a.fxyzw[3] > _b.fxyzw[3] ? _a.fxyzw[3] : _b.fxyzw[3];
return result;
}
BX_FLOAT4_INLINE float4_t float4_and(float4_t _a, float4_t _b)
{
float4_t result;
result.uxyzw[0] = _a.uxyzw[0] & _b.uxyzw[0];
result.uxyzw[1] = _a.uxyzw[1] & _b.uxyzw[1];
result.uxyzw[2] = _a.uxyzw[2] & _b.uxyzw[2];
result.uxyzw[3] = _a.uxyzw[3] & _b.uxyzw[3];
return result;
}
BX_FLOAT4_INLINE float4_t float4_andc(float4_t _a, float4_t _b)
{
float4_t result;
result.uxyzw[0] = _a.uxyzw[0] & ~_b.uxyzw[0];
result.uxyzw[1] = _a.uxyzw[1] & ~_b.uxyzw[1];
result.uxyzw[2] = _a.uxyzw[2] & ~_b.uxyzw[2];
result.uxyzw[3] = _a.uxyzw[3] & ~_b.uxyzw[3];
return result;
}
BX_FLOAT4_INLINE float4_t float4_or(float4_t _a, float4_t _b)
{
float4_t result;
result.uxyzw[0] = _a.uxyzw[0] | _b.uxyzw[0];
result.uxyzw[1] = _a.uxyzw[1] | _b.uxyzw[1];
result.uxyzw[2] = _a.uxyzw[2] | _b.uxyzw[2];
result.uxyzw[3] = _a.uxyzw[3] | _b.uxyzw[3];
return result;
}
BX_FLOAT4_INLINE float4_t float4_xor(float4_t _a, float4_t _b)
{
float4_t result;
result.uxyzw[0] = _a.uxyzw[0] ^ _b.uxyzw[0];
result.uxyzw[1] = _a.uxyzw[1] ^ _b.uxyzw[1];
result.uxyzw[2] = _a.uxyzw[2] ^ _b.uxyzw[2];
result.uxyzw[3] = _a.uxyzw[3] ^ _b.uxyzw[3];
return result;
}
BX_FLOAT4_INLINE float4_t float4_sll(float4_t _a, int _count)
{
float4_t result;
result.uxyzw[0] = _a.uxyzw[0] << _count;
result.uxyzw[1] = _a.uxyzw[1] << _count;
result.uxyzw[2] = _a.uxyzw[2] << _count;
result.uxyzw[3] = _a.uxyzw[3] << _count;
return result;
}
BX_FLOAT4_INLINE float4_t float4_srl(float4_t _a, int _count)
{
float4_t result;
result.uxyzw[0] = _a.uxyzw[0] >> _count;
result.uxyzw[1] = _a.uxyzw[1] >> _count;
result.uxyzw[2] = _a.uxyzw[2] >> _count;
result.uxyzw[3] = _a.uxyzw[3] >> _count;
return result;
}
BX_FLOAT4_INLINE float4_t float4_sra(float4_t _a, int _count)
{
float4_t result;
result.ixyzw[0] = _a.ixyzw[0] >> _count;
result.ixyzw[1] = _a.ixyzw[1] >> _count;
result.ixyzw[2] = _a.ixyzw[2] >> _count;
result.ixyzw[3] = _a.ixyzw[3] >> _count;
return result;
}
BX_FLOAT4_INLINE float4_t float4_iadd(float4_t _a, float4_t _b)
{
float4_t result;
result.ixyzw[0] = _a.ixyzw[0] + _b.ixyzw[0];
result.ixyzw[1] = _a.ixyzw[1] + _b.ixyzw[1];
result.ixyzw[2] = _a.ixyzw[2] + _b.ixyzw[2];
result.ixyzw[3] = _a.ixyzw[3] + _b.ixyzw[3];
return result;
}
BX_FLOAT4_INLINE float4_t float4_isub(float4_t _a, float4_t _b)
{
float4_t result;
result.ixyzw[0] = _a.ixyzw[0] - _b.ixyzw[0];
result.ixyzw[1] = _a.ixyzw[1] - _b.ixyzw[1];
result.ixyzw[2] = _a.ixyzw[2] - _b.ixyzw[2];
result.ixyzw[3] = _a.ixyzw[3] - _b.ixyzw[3];
return result;
}
} // namespace bx
#define float4_shuf_xAzC float4_shuf_xAzC_ni
#define float4_shuf_yBwD float4_shuf_yBwD_ni
#define float4_rcp float4_rcp_ni
#define float4_orx float4_orx_ni
#define float4_orc float4_orc_ni
#define float4_neg float4_neg_ni
#define float4_madd float4_madd_ni
#define float4_nmsub float4_nmsub_ni
#define float4_div_nr float4_div_nr_ni
#define float4_selb float4_selb_ni
#define float4_sels float4_sels_ni
#define float4_not float4_not_ni
#define float4_abs float4_abs_ni
#define float4_clamp float4_clamp_ni
#define float4_lerp float4_lerp_ni
#define float4_rsqrt float4_rsqrt_ni
#define float4_rsqrt_nr float4_rsqrt_nr_ni
#define float4_rsqrt_carmack float4_rsqrt_carmack_ni
#define float4_sqrt_nr float4_sqrt_nr_ni
#define float4_log2 float4_log2_ni
#define float4_exp2 float4_exp2_ni
#define float4_pow float4_pow_ni
#define float4_cross3 float4_cross3_ni
#define float4_normalize3 float4_normalize3_ni
#define float4_dot3 float4_dot3_ni
#define float4_dot float4_dot_ni
#include "float4_ni.h"
#endif // __BX_FLOAT4_REF_H__

400
include/bx/float4_sse.h Normal file
View File

@@ -0,0 +1,400 @@
/*
* Copyright 2010-2011 Branimir Karadzic. All rights reserved.
* License: http://www.opensource.org/licenses/BSD-2-Clause
*/
#ifndef __BX_FLOAT4_SSE_H__
#define __BX_FLOAT4_SSE_H__
#if !defined(__SSE2__)
# error "float4_t requires at least SSE2"
#endif // !defined(__SSE2__)
#include <stdint.h>
#include <emmintrin.h> // __m128i
#if defined(__SSE4_1__)
# include <smmintrin.h>
#endif // defined(__SSE4_1__)
#include <xmmintrin.h> // __m128
namespace bx
{
typedef __m128 float4_t;
#define ELEMx 0
#define ELEMy 1
#define ELEMz 2
#define ELEMw 3
#define IMPLEMENT_SWIZZLE(_x, _y, _z, _w) \
BX_FLOAT4_INLINE float4_t float4_swiz_##_x##_y##_z##_w(float4_t _a) \
{ \
return _mm_shuffle_ps( _a, _a, _MM_SHUFFLE(ELEM##_w, ELEM##_z, ELEM##_y, ELEM##_x ) ); \
}
#include "float4_swizzle.inl"
#undef IMPLEMENT_SWIZZLE
#undef ELEMw
#undef ELEMz
#undef ELEMy
#undef ELEMx
#define IMPLEMENT_TEST(_xyzw, _mask) \
BX_FLOAT4_INLINE bool float4_test_any_##_xyzw(float4_t _test) \
{ \
return 0x0 != (_mm_movemask_ps(_test)&(_mask) ); \
} \
\
BX_FLOAT4_INLINE bool float4_test_all_##_xyzw(float4_t _test) \
{ \
return (_mask) == (_mm_movemask_ps(_test)&(_mask) ); \
}
IMPLEMENT_TEST(x , 0x1);
IMPLEMENT_TEST(y , 0x2);
IMPLEMENT_TEST(xy , 0x3);
IMPLEMENT_TEST(z , 0x4);
IMPLEMENT_TEST(xz , 0x5);
IMPLEMENT_TEST(yz , 0x6);
IMPLEMENT_TEST(xyz , 0x7);
IMPLEMENT_TEST(w , 0x8);
IMPLEMENT_TEST(xw , 0x9);
IMPLEMENT_TEST(yw , 0xa);
IMPLEMENT_TEST(xyw , 0xb);
IMPLEMENT_TEST(zw , 0xc);
IMPLEMENT_TEST(xzw , 0xd);
IMPLEMENT_TEST(yzw , 0xe);
IMPLEMENT_TEST(xyzw , 0xf);
#undef IMPLEMENT_TEST
BX_FLOAT4_INLINE float4_t float4_shuf_xyAB(float4_t _a, float4_t _b)
{
return _mm_movelh_ps(_a, _b);
}
BX_FLOAT4_INLINE float4_t float4_shuf_ABxy(float4_t _a, float4_t _b)
{
return _mm_movelh_ps(_b, _a);
}
BX_FLOAT4_INLINE float4_t float4_shuf_CDzw(float4_t _a, float4_t _b)
{
return _mm_movehl_ps(_a, _b);
}
BX_FLOAT4_INLINE float4_t float4_shuf_zwCD(float4_t _a, float4_t _b)
{
return _mm_movehl_ps(_b, _a);
}
BX_FLOAT4_INLINE float4_t float4_shuf_xAyB(float4_t _a, float4_t _b)
{
return _mm_unpacklo_ps(_a, _b);
}
BX_FLOAT4_INLINE float4_t float4_shuf_yBxA(float4_t _a, float4_t _b)
{
return _mm_unpacklo_ps(_b, _a);
}
BX_FLOAT4_INLINE float4_t float4_shuf_zCwD(float4_t _a, float4_t _b)
{
return _mm_unpackhi_ps(_a, _b);
}
BX_FLOAT4_INLINE float4_t float4_shuf_CzDw(float4_t _a, float4_t _b)
{
return _mm_unpackhi_ps(_b, _a);
}
BX_FLOAT4_INLINE float float4_x(float4_t _a)
{
return _mm_cvtss_f32(_a);
}
BX_FLOAT4_INLINE float float4_y(float4_t _a)
{
const float4_t yyyy = float4_swiz_yyyy(_a);
const float result = _mm_cvtss_f32(yyyy);
return result;
}
BX_FLOAT4_INLINE float float4_z(float4_t _a)
{
const float4_t zzzz = float4_swiz_zzzz(_a);
const float result = _mm_cvtss_f32(zzzz);
return result;
}
BX_FLOAT4_INLINE float float4_w(float4_t _a)
{
const float4_t wwww = float4_swiz_wwww(_a);
const float result = _mm_cvtss_f32(wwww);
return result;
}
BX_FLOAT4_INLINE float4_t float4_ld(const void* _ptr)
{
return _mm_load_ps(reinterpret_cast<const float*>(_ptr) );
}
BX_FLOAT4_INLINE void float4_st(void* _ptr, float4_t _a)
{
_mm_store_ps(reinterpret_cast<float*>(_ptr), _a);
}
BX_FLOAT4_INLINE float4_t float4_ld(float _x, float _y, float _z, float _w)
{
return _mm_set_ps(_w, _z, _y, _x);
}
BX_FLOAT4_INLINE float4_t float4_ild(uint32_t _x, uint32_t _y, uint32_t _z, uint32_t _w)
{
const __m128i set = _mm_set_epi32(_w, _z, _y, _x);
const float4_t result = _mm_castsi128_ps(set);
return result;
}
BX_FLOAT4_INLINE float4_t float4_splat(const void* _ptr)
{
const float4_t x___ = _mm_load_ss(reinterpret_cast<const float*>(_ptr) );
const float4_t result = float4_swiz_xxxx(x___);
return result;
}
BX_FLOAT4_INLINE float4_t float4_splat(float _a)
{
return _mm_set1_ps(_a);
}
BX_FLOAT4_INLINE float4_t float4_isplat(uint32_t _a)
{
const __m128i splat = _mm_set1_epi32(_a);
const float4_t result = _mm_castsi128_ps(splat);
return result;
}
BX_FLOAT4_INLINE float4_t float4_zero()
{
return _mm_setzero_ps();
}
BX_FLOAT4_INLINE float4_t float4_itof(float4_t _a)
{
const __m128i itof = _mm_castps_si128(_a);
const float4_t result = _mm_cvtepi32_ps(itof);
return result;
}
BX_FLOAT4_INLINE float4_t float4_ftoi(float4_t _a)
{
const __m128i ftoi = _mm_cvtps_epi32(_a);
const float4_t result = _mm_castsi128_ps(ftoi);
return result;
}
BX_FLOAT4_INLINE float4_t float4_round(float4_t _a)
{
#if defined(__SSE4_1__)
return _mm_round_ps(_a, _MM_FROUND_NINT);
#else
const __m128i round = _mm_cvtps_epi32(_a);
const float4_t result = _mm_cvtepi32_ps(round);
return result;
#endif // defined(__SSE4_1__)
}
BX_FLOAT4_INLINE float4_t float4_add(float4_t _a, float4_t _b)
{
return _mm_add_ps(_a, _b);
}
BX_FLOAT4_INLINE float4_t float4_sub(float4_t _a, float4_t _b)
{
return _mm_sub_ps(_a, _b);
}
BX_FLOAT4_INLINE float4_t float4_mul(float4_t _a, float4_t _b)
{
return _mm_mul_ps(_a, _b);
}
BX_FLOAT4_INLINE float4_t float4_div(float4_t _a, float4_t _b)
{
return _mm_div_ps(_a, _b);
}
BX_FLOAT4_INLINE float4_t float4_rcp_est(float4_t _a)
{
return _mm_rcp_ps(_a);
}
BX_FLOAT4_INLINE float4_t float4_sqrt(float4_t _a)
{
return _mm_sqrt_ps(_a);
}
BX_FLOAT4_INLINE float4_t float4_rsqrt_est(float4_t _a)
{
return _mm_rsqrt_ps(_a);
}
#if defined(__SSE4_1__)
BX_FLOAT4_INLINE float4_t float4_dot3(float4_t _a, float4_t _b)
{
return _mm_dp_ps(_a, _b, 0x77);
}
BX_FLOAT4_INLINE float4_t float4_dot(float4_t _a, float4_t _b)
{
return _mm_dp_ps(_a, _b, 0xFF);
}
#endif // defined(__SSE4__)
BX_FLOAT4_INLINE float4_t float4_cmpeq(float4_t _a, float4_t _b)
{
return _mm_cmpeq_ps(_a, _b);
}
BX_FLOAT4_INLINE float4_t float4_cmplt(float4_t _a, float4_t _b)
{
return _mm_cmplt_ps(_a, _b);
}
BX_FLOAT4_INLINE float4_t float4_cmple(float4_t _a, float4_t _b)
{
return _mm_cmple_ps(_a, _b);
}
BX_FLOAT4_INLINE float4_t float4_cmpgt(float4_t _a, float4_t _b)
{
return _mm_cmpgt_ps(_a, _b);
}
BX_FLOAT4_INLINE float4_t float4_cmpge(float4_t _a, float4_t _b)
{
return _mm_cmpge_ps(_a, _b);
}
BX_FLOAT4_INLINE float4_t float4_min(float4_t _a, float4_t _b)
{
return _mm_min_ps(_a, _b);
}
BX_FLOAT4_INLINE float4_t float4_max(float4_t _a, float4_t _b)
{
return _mm_max_ps(_a, _b);
}
BX_FLOAT4_INLINE float4_t float4_and(float4_t _a, float4_t _b)
{
return _mm_and_ps(_a, _b);
}
BX_FLOAT4_INLINE float4_t float4_andc(float4_t _a, float4_t _b)
{
return _mm_andnot_ps(_b, _a);
}
BX_FLOAT4_INLINE float4_t float4_or(float4_t _a, float4_t _b)
{
return _mm_or_ps(_a, _b);
}
BX_FLOAT4_INLINE float4_t float4_xor(float4_t _a, float4_t _b)
{
return _mm_xor_ps(_a, _b);
}
BX_FLOAT4_INLINE float4_t float4_sll(float4_t _a, int _count)
{
const __m128i a = _mm_castps_si128(_a);
const __m128i shift = _mm_slli_epi32(a, _count);
const float4_t result = _mm_castsi128_ps(shift);
return result;
}
BX_FLOAT4_INLINE float4_t float4_srl(float4_t _a, int _count)
{
const __m128i a = _mm_castps_si128(_a);
const __m128i shift = _mm_srli_epi32(a, _count);
const float4_t result = _mm_castsi128_ps(shift);
return result;
}
BX_FLOAT4_INLINE float4_t float4_sra(float4_t _a, int _count)
{
const __m128i a = _mm_castps_si128(_a);
const __m128i shift = _mm_srai_epi32(a, _count);
const float4_t result = _mm_castsi128_ps(shift);
return result;
}
BX_FLOAT4_INLINE float4_t float4_iadd(float4_t _a, float4_t _b)
{
const __m128i a = _mm_castps_si128(_a);
const __m128i b = _mm_castps_si128(_b);
const __m128i add = _mm_add_epi32(a, b);
const float4_t result = _mm_castsi128_ps(add);
return result;
}
BX_FLOAT4_INLINE float4_t float4_isub(float4_t _a, float4_t _b)
{
const __m128i a = _mm_castps_si128(_a);
const __m128i b = _mm_castps_si128(_b);
const __m128i sub = _mm_sub_epi32(a, b);
const float4_t result = _mm_castsi128_ps(sub);
return result;
}
} // namespace bx
#define float4_shuf_xAzC float4_shuf_xAzC_ni
#define float4_shuf_yBwD float4_shuf_yBwD_ni
#define float4_rcp float4_rcp_ni
#define float4_orx float4_orx_ni
#define float4_orc float4_orc_ni
#define float4_neg float4_neg_ni
#define float4_madd float4_madd_ni
#define float4_nmsub float4_nmsub_ni
#define float4_div_nr float4_div_nr_ni
#define float4_selb float4_selb_ni
#define float4_sels float4_sels_ni
#define float4_not float4_not_ni
#define float4_abs float4_abs_ni
#define float4_clamp float4_clamp_ni
#define float4_lerp float4_lerp_ni
#define float4_rsqrt float4_rsqrt_ni
#define float4_rsqrt_nr float4_rsqrt_nr_ni
#define float4_rsqrt_carmack float4_rsqrt_carmack_ni
#define float4_sqrt_nr float4_sqrt_nr_ni
#define float4_log2 float4_log2_ni
#define float4_exp2 float4_exp2_ni
#define float4_pow float4_pow_ni
#define float4_cross3 float4_cross3_ni
#define float4_normalize3 float4_normalize3_ni
#if !defined(__SSE4_1__)
#define float4_dot3 float4_dot3_ni
#define float4_dot float4_dot_ni
#endif // defined(__SSE4_1__)
#include "float4_ni.h"
#endif // __FLOAT4_SSE_H__

View File

@@ -0,0 +1,266 @@
/*
* Copyright 2010-2012 Branimir Karadzic. All rights reserved.
* License: http://www.opensource.org/licenses/BSD-2-Clause
*/
#ifndef __BX_FLOAT4_T_H__
# error "xmacro file, must be included from float4_*.h"
#endif // __BX_FLOAT4_T_H__
// included from float4_t.h
IMPLEMENT_SWIZZLE(x, x, x, x)
IMPLEMENT_SWIZZLE(x, x, x, y)
IMPLEMENT_SWIZZLE(x, x, x, z)
IMPLEMENT_SWIZZLE(x, x, x, w)
IMPLEMENT_SWIZZLE(x, x, y, x)
IMPLEMENT_SWIZZLE(x, x, y, y)
IMPLEMENT_SWIZZLE(x, x, y, z)
IMPLEMENT_SWIZZLE(x, x, y, w)
IMPLEMENT_SWIZZLE(x, x, z, x)
IMPLEMENT_SWIZZLE(x, x, z, y)
IMPLEMENT_SWIZZLE(x, x, z, z)
IMPLEMENT_SWIZZLE(x, x, z, w)
IMPLEMENT_SWIZZLE(x, x, w, x)
IMPLEMENT_SWIZZLE(x, x, w, y)
IMPLEMENT_SWIZZLE(x, x, w, z)
IMPLEMENT_SWIZZLE(x, x, w, w)
IMPLEMENT_SWIZZLE(x, y, x, x)
IMPLEMENT_SWIZZLE(x, y, x, y)
IMPLEMENT_SWIZZLE(x, y, x, z)
IMPLEMENT_SWIZZLE(x, y, x, w)
IMPLEMENT_SWIZZLE(x, y, y, x)
IMPLEMENT_SWIZZLE(x, y, y, y)
IMPLEMENT_SWIZZLE(x, y, y, z)
IMPLEMENT_SWIZZLE(x, y, y, w)
IMPLEMENT_SWIZZLE(x, y, z, x)
IMPLEMENT_SWIZZLE(x, y, z, y)
IMPLEMENT_SWIZZLE(x, y, z, z)
// IMPLEMENT_SWIZZLE(x, y, z, w)
IMPLEMENT_SWIZZLE(x, y, w, x)
IMPLEMENT_SWIZZLE(x, y, w, y)
IMPLEMENT_SWIZZLE(x, y, w, z)
IMPLEMENT_SWIZZLE(x, y, w, w)
IMPLEMENT_SWIZZLE(x, z, x, x)
IMPLEMENT_SWIZZLE(x, z, x, y)
IMPLEMENT_SWIZZLE(x, z, x, z)
IMPLEMENT_SWIZZLE(x, z, x, w)
IMPLEMENT_SWIZZLE(x, z, y, x)
IMPLEMENT_SWIZZLE(x, z, y, y)
IMPLEMENT_SWIZZLE(x, z, y, z)
IMPLEMENT_SWIZZLE(x, z, y, w)
IMPLEMENT_SWIZZLE(x, z, z, x)
IMPLEMENT_SWIZZLE(x, z, z, y)
IMPLEMENT_SWIZZLE(x, z, z, z)
IMPLEMENT_SWIZZLE(x, z, z, w)
IMPLEMENT_SWIZZLE(x, z, w, x)
IMPLEMENT_SWIZZLE(x, z, w, y)
IMPLEMENT_SWIZZLE(x, z, w, z)
IMPLEMENT_SWIZZLE(x, z, w, w)
IMPLEMENT_SWIZZLE(x, w, x, x)
IMPLEMENT_SWIZZLE(x, w, x, y)
IMPLEMENT_SWIZZLE(x, w, x, z)
IMPLEMENT_SWIZZLE(x, w, x, w)
IMPLEMENT_SWIZZLE(x, w, y, x)
IMPLEMENT_SWIZZLE(x, w, y, y)
IMPLEMENT_SWIZZLE(x, w, y, z)
IMPLEMENT_SWIZZLE(x, w, y, w)
IMPLEMENT_SWIZZLE(x, w, z, x)
IMPLEMENT_SWIZZLE(x, w, z, y)
IMPLEMENT_SWIZZLE(x, w, z, z)
IMPLEMENT_SWIZZLE(x, w, z, w)
IMPLEMENT_SWIZZLE(x, w, w, x)
IMPLEMENT_SWIZZLE(x, w, w, y)
IMPLEMENT_SWIZZLE(x, w, w, z)
IMPLEMENT_SWIZZLE(x, w, w, w)
IMPLEMENT_SWIZZLE(y, x, x, x)
IMPLEMENT_SWIZZLE(y, x, x, y)
IMPLEMENT_SWIZZLE(y, x, x, z)
IMPLEMENT_SWIZZLE(y, x, x, w)
IMPLEMENT_SWIZZLE(y, x, y, x)
IMPLEMENT_SWIZZLE(y, x, y, y)
IMPLEMENT_SWIZZLE(y, x, y, z)
IMPLEMENT_SWIZZLE(y, x, y, w)
IMPLEMENT_SWIZZLE(y, x, z, x)
IMPLEMENT_SWIZZLE(y, x, z, y)
IMPLEMENT_SWIZZLE(y, x, z, z)
IMPLEMENT_SWIZZLE(y, x, z, w)
IMPLEMENT_SWIZZLE(y, x, w, x)
IMPLEMENT_SWIZZLE(y, x, w, y)
IMPLEMENT_SWIZZLE(y, x, w, z)
IMPLEMENT_SWIZZLE(y, x, w, w)
IMPLEMENT_SWIZZLE(y, y, x, x)
IMPLEMENT_SWIZZLE(y, y, x, y)
IMPLEMENT_SWIZZLE(y, y, x, z)
IMPLEMENT_SWIZZLE(y, y, x, w)
IMPLEMENT_SWIZZLE(y, y, y, x)
IMPLEMENT_SWIZZLE(y, y, y, y)
IMPLEMENT_SWIZZLE(y, y, y, z)
IMPLEMENT_SWIZZLE(y, y, y, w)
IMPLEMENT_SWIZZLE(y, y, z, x)
IMPLEMENT_SWIZZLE(y, y, z, y)
IMPLEMENT_SWIZZLE(y, y, z, z)
IMPLEMENT_SWIZZLE(y, y, z, w)
IMPLEMENT_SWIZZLE(y, y, w, x)
IMPLEMENT_SWIZZLE(y, y, w, y)
IMPLEMENT_SWIZZLE(y, y, w, z)
IMPLEMENT_SWIZZLE(y, y, w, w)
IMPLEMENT_SWIZZLE(y, z, x, x)
IMPLEMENT_SWIZZLE(y, z, x, y)
IMPLEMENT_SWIZZLE(y, z, x, z)
IMPLEMENT_SWIZZLE(y, z, x, w)
IMPLEMENT_SWIZZLE(y, z, y, x)
IMPLEMENT_SWIZZLE(y, z, y, y)
IMPLEMENT_SWIZZLE(y, z, y, z)
IMPLEMENT_SWIZZLE(y, z, y, w)
IMPLEMENT_SWIZZLE(y, z, z, x)
IMPLEMENT_SWIZZLE(y, z, z, y)
IMPLEMENT_SWIZZLE(y, z, z, z)
IMPLEMENT_SWIZZLE(y, z, z, w)
IMPLEMENT_SWIZZLE(y, z, w, x)
IMPLEMENT_SWIZZLE(y, z, w, y)
IMPLEMENT_SWIZZLE(y, z, w, z)
IMPLEMENT_SWIZZLE(y, z, w, w)
IMPLEMENT_SWIZZLE(y, w, x, x)
IMPLEMENT_SWIZZLE(y, w, x, y)
IMPLEMENT_SWIZZLE(y, w, x, z)
IMPLEMENT_SWIZZLE(y, w, x, w)
IMPLEMENT_SWIZZLE(y, w, y, x)
IMPLEMENT_SWIZZLE(y, w, y, y)
IMPLEMENT_SWIZZLE(y, w, y, z)
IMPLEMENT_SWIZZLE(y, w, y, w)
IMPLEMENT_SWIZZLE(y, w, z, x)
IMPLEMENT_SWIZZLE(y, w, z, y)
IMPLEMENT_SWIZZLE(y, w, z, z)
IMPLEMENT_SWIZZLE(y, w, z, w)
IMPLEMENT_SWIZZLE(y, w, w, x)
IMPLEMENT_SWIZZLE(y, w, w, y)
IMPLEMENT_SWIZZLE(y, w, w, z)
IMPLEMENT_SWIZZLE(y, w, w, w)
IMPLEMENT_SWIZZLE(z, x, x, x)
IMPLEMENT_SWIZZLE(z, x, x, y)
IMPLEMENT_SWIZZLE(z, x, x, z)
IMPLEMENT_SWIZZLE(z, x, x, w)
IMPLEMENT_SWIZZLE(z, x, y, x)
IMPLEMENT_SWIZZLE(z, x, y, y)
IMPLEMENT_SWIZZLE(z, x, y, z)
IMPLEMENT_SWIZZLE(z, x, y, w)
IMPLEMENT_SWIZZLE(z, x, z, x)
IMPLEMENT_SWIZZLE(z, x, z, y)
IMPLEMENT_SWIZZLE(z, x, z, z)
IMPLEMENT_SWIZZLE(z, x, z, w)
IMPLEMENT_SWIZZLE(z, x, w, x)
IMPLEMENT_SWIZZLE(z, x, w, y)
IMPLEMENT_SWIZZLE(z, x, w, z)
IMPLEMENT_SWIZZLE(z, x, w, w)
IMPLEMENT_SWIZZLE(z, y, x, x)
IMPLEMENT_SWIZZLE(z, y, x, y)
IMPLEMENT_SWIZZLE(z, y, x, z)
IMPLEMENT_SWIZZLE(z, y, x, w)
IMPLEMENT_SWIZZLE(z, y, y, x)
IMPLEMENT_SWIZZLE(z, y, y, y)
IMPLEMENT_SWIZZLE(z, y, y, z)
IMPLEMENT_SWIZZLE(z, y, y, w)
IMPLEMENT_SWIZZLE(z, y, z, x)
IMPLEMENT_SWIZZLE(z, y, z, y)
IMPLEMENT_SWIZZLE(z, y, z, z)
IMPLEMENT_SWIZZLE(z, y, z, w)
IMPLEMENT_SWIZZLE(z, y, w, x)
IMPLEMENT_SWIZZLE(z, y, w, y)
IMPLEMENT_SWIZZLE(z, y, w, z)
IMPLEMENT_SWIZZLE(z, y, w, w)
IMPLEMENT_SWIZZLE(z, z, x, x)
IMPLEMENT_SWIZZLE(z, z, x, y)
IMPLEMENT_SWIZZLE(z, z, x, z)
IMPLEMENT_SWIZZLE(z, z, x, w)
IMPLEMENT_SWIZZLE(z, z, y, x)
IMPLEMENT_SWIZZLE(z, z, y, y)
IMPLEMENT_SWIZZLE(z, z, y, z)
IMPLEMENT_SWIZZLE(z, z, y, w)
IMPLEMENT_SWIZZLE(z, z, z, x)
IMPLEMENT_SWIZZLE(z, z, z, y)
IMPLEMENT_SWIZZLE(z, z, z, z)
IMPLEMENT_SWIZZLE(z, z, z, w)
IMPLEMENT_SWIZZLE(z, z, w, x)
IMPLEMENT_SWIZZLE(z, z, w, y)
IMPLEMENT_SWIZZLE(z, z, w, z)
IMPLEMENT_SWIZZLE(z, z, w, w)
IMPLEMENT_SWIZZLE(z, w, x, x)
IMPLEMENT_SWIZZLE(z, w, x, y)
IMPLEMENT_SWIZZLE(z, w, x, z)
IMPLEMENT_SWIZZLE(z, w, x, w)
IMPLEMENT_SWIZZLE(z, w, y, x)
IMPLEMENT_SWIZZLE(z, w, y, y)
IMPLEMENT_SWIZZLE(z, w, y, z)
IMPLEMENT_SWIZZLE(z, w, y, w)
IMPLEMENT_SWIZZLE(z, w, z, x)
IMPLEMENT_SWIZZLE(z, w, z, y)
IMPLEMENT_SWIZZLE(z, w, z, z)
IMPLEMENT_SWIZZLE(z, w, z, w)
IMPLEMENT_SWIZZLE(z, w, w, x)
IMPLEMENT_SWIZZLE(z, w, w, y)
IMPLEMENT_SWIZZLE(z, w, w, z)
IMPLEMENT_SWIZZLE(z, w, w, w)
IMPLEMENT_SWIZZLE(w, x, x, x)
IMPLEMENT_SWIZZLE(w, x, x, y)
IMPLEMENT_SWIZZLE(w, x, x, z)
IMPLEMENT_SWIZZLE(w, x, x, w)
IMPLEMENT_SWIZZLE(w, x, y, x)
IMPLEMENT_SWIZZLE(w, x, y, y)
IMPLEMENT_SWIZZLE(w, x, y, z)
IMPLEMENT_SWIZZLE(w, x, y, w)
IMPLEMENT_SWIZZLE(w, x, z, x)
IMPLEMENT_SWIZZLE(w, x, z, y)
IMPLEMENT_SWIZZLE(w, x, z, z)
IMPLEMENT_SWIZZLE(w, x, z, w)
IMPLEMENT_SWIZZLE(w, x, w, x)
IMPLEMENT_SWIZZLE(w, x, w, y)
IMPLEMENT_SWIZZLE(w, x, w, z)
IMPLEMENT_SWIZZLE(w, x, w, w)
IMPLEMENT_SWIZZLE(w, y, x, x)
IMPLEMENT_SWIZZLE(w, y, x, y)
IMPLEMENT_SWIZZLE(w, y, x, z)
IMPLEMENT_SWIZZLE(w, y, x, w)
IMPLEMENT_SWIZZLE(w, y, y, x)
IMPLEMENT_SWIZZLE(w, y, y, y)
IMPLEMENT_SWIZZLE(w, y, y, z)
IMPLEMENT_SWIZZLE(w, y, y, w)
IMPLEMENT_SWIZZLE(w, y, z, x)
IMPLEMENT_SWIZZLE(w, y, z, y)
IMPLEMENT_SWIZZLE(w, y, z, z)
IMPLEMENT_SWIZZLE(w, y, z, w)
IMPLEMENT_SWIZZLE(w, y, w, x)
IMPLEMENT_SWIZZLE(w, y, w, y)
IMPLEMENT_SWIZZLE(w, y, w, z)
IMPLEMENT_SWIZZLE(w, y, w, w)
IMPLEMENT_SWIZZLE(w, z, x, x)
IMPLEMENT_SWIZZLE(w, z, x, y)
IMPLEMENT_SWIZZLE(w, z, x, z)
IMPLEMENT_SWIZZLE(w, z, x, w)
IMPLEMENT_SWIZZLE(w, z, y, x)
IMPLEMENT_SWIZZLE(w, z, y, y)
IMPLEMENT_SWIZZLE(w, z, y, z)
IMPLEMENT_SWIZZLE(w, z, y, w)
IMPLEMENT_SWIZZLE(w, z, z, x)
IMPLEMENT_SWIZZLE(w, z, z, y)
IMPLEMENT_SWIZZLE(w, z, z, z)
IMPLEMENT_SWIZZLE(w, z, z, w)
IMPLEMENT_SWIZZLE(w, z, w, x)
IMPLEMENT_SWIZZLE(w, z, w, y)
IMPLEMENT_SWIZZLE(w, z, w, z)
IMPLEMENT_SWIZZLE(w, z, w, w)
IMPLEMENT_SWIZZLE(w, w, x, x)
IMPLEMENT_SWIZZLE(w, w, x, y)
IMPLEMENT_SWIZZLE(w, w, x, z)
IMPLEMENT_SWIZZLE(w, w, x, w)
IMPLEMENT_SWIZZLE(w, w, y, x)
IMPLEMENT_SWIZZLE(w, w, y, y)
IMPLEMENT_SWIZZLE(w, w, y, z)
IMPLEMENT_SWIZZLE(w, w, y, w)
IMPLEMENT_SWIZZLE(w, w, z, x)
IMPLEMENT_SWIZZLE(w, w, z, y)
IMPLEMENT_SWIZZLE(w, w, z, z)
IMPLEMENT_SWIZZLE(w, w, z, w)
IMPLEMENT_SWIZZLE(w, w, w, x)
IMPLEMENT_SWIZZLE(w, w, w, y)
IMPLEMENT_SWIZZLE(w, w, w, z)
IMPLEMENT_SWIZZLE(w, w, w, w)

22
include/bx/float4_t.h Normal file
View File

@@ -0,0 +1,22 @@
/*
* Copyright 2010-2011 Branimir Karadzic. All rights reserved.
* License: http://www.opensource.org/licenses/BSD-2-Clause
*/
#ifndef __BX_FLOAT4_T_H__
#define __BX_FLOAT4_T_H__
#include <stdint.h>
#include "bx.h"
#define BX_FLOAT4_INLINE BX_FORCE_INLINE
#if 0 // defined(__SSE2__)
# include "float4_sse.h"
#elif 0 // __ARM_NEON__
# include "float4_neon.h"
#else
# include "float4_ref.h"
#endif //
#endif // __BX_FLOAT4_T_H__

168
include/bx/float4x4_t.h Normal file
View File

@@ -0,0 +1,168 @@
/*
* Copyright 2010-2012 Branimir Karadzic. All rights reserved.
* License: http://www.opensource.org/licenses/BSD-2-Clause
*/
#ifndef __BX_FLOAT4X4_H__
#define __BX_FLOAT4x4_H__
#include "float4_t.h"
namespace bx
{
typedef BX_ALIGN_STRUCT_16(struct)
{
float4_t col[4];
} float4x4_t;
BX_FLOAT4_INLINE float4_t float4_mul_xyz1(float4_t _a, const float4x4_t& _b)
{
const float4_t xxxx = float4_swiz_xxxx(_a);
const float4_t yyyy = float4_swiz_yyyy(_a);
const float4_t zzzz = float4_swiz_zzzz(_a);
const float4_t col0 = float4_mul(_b.col[0], xxxx);
const float4_t col1 = float4_mul(_b.col[1], yyyy);
const float4_t col2 = float4_madd(_b.col[2], zzzz, col0);
const float4_t col3 = float4_add(_b.col[3], col1);
const float4_t result = float4_add(col2, col3);
return result;
}
BX_FLOAT4_INLINE float4_t float4_mul(float4_t _a, const float4x4_t& _b)
{
const float4_t xxxx = float4_swiz_xxxx(_a);
const float4_t yyyy = float4_swiz_yyyy(_a);
const float4_t zzzz = float4_swiz_zzzz(_a);
const float4_t wwww = float4_swiz_wwww(_a);
const float4_t col0 = float4_mul(_b.col[0], xxxx);
const float4_t col1 = float4_mul(_b.col[1], yyyy);
const float4_t col2 = float4_madd(_b.col[2], zzzz, col0);
const float4_t col3 = float4_madd(_b.col[3], wwww, col1);
const float4_t result = float4_add(col2, col3);
return result;
}
BX_FLOAT4_INLINE float4x4_t float4x4_mul(const float4x4_t& _a, const float4x4_t& _b)
{
float4x4_t result;
result.col[0] = float4_mul(_a.col[0], _b);
result.col[1] = float4_mul(_a.col[1], _b);
result.col[2] = float4_mul(_a.col[2], _b);
result.col[3] = float4_mul(_a.col[3], _b);
return result;
}
BX_FLOAT4_INLINE float4x4_t float4x4_transpose(const float4x4_t& _mtx)
{
const float4_t aibj = float4_shuf_xAyB(_mtx.col[0], _mtx.col[2]); // aibj
const float4_t emfn = float4_shuf_xAyB(_mtx.col[1], _mtx.col[3]); // emfn
const float4_t ckdl = float4_shuf_zCwD(_mtx.col[0], _mtx.col[2]); // ckdl
const float4_t gohp = float4_shuf_zCwD(_mtx.col[1], _mtx.col[3]); // gohp
float4x4_t result;
result.col[0] = float4_shuf_xAyB(aibj, emfn); // aeim
result.col[1] = float4_shuf_zCwD(aibj, emfn); // bfjn
result.col[2] = float4_shuf_xAyB(ckdl, gohp); // cgko
result.col[3] = float4_shuf_zCwD(ckdl, gohp); // dhlp
return result;
}
BX_FLOAT4_INLINE float4x4_t float4x4_inverse(const float4x4_t& _a)
{
const float4_t tmp0 = float4_shuf_xAzC(_a.col[0], _a.col[1]);
const float4_t tmp1 = float4_shuf_xAzC(_a.col[2], _a.col[3]);
const float4_t tmp2 = float4_shuf_yBwD(_a.col[0], _a.col[1]);
const float4_t tmp3 = float4_shuf_yBwD(_a.col[2], _a.col[3]);
const float4_t t0 = float4_shuf_xyAB(tmp0, tmp1);
const float4_t t1 = float4_shuf_xyAB(tmp3, tmp2);
const float4_t t2 = float4_shuf_zwCD(tmp0, tmp1);
const float4_t t3 = float4_shuf_zwCD(tmp3, tmp2);
const float4_t t23 = float4_mul(t2, t3);
const float4_t t23_yxwz = float4_swiz_yxwz(t23);
const float4_t t23_wzyx = float4_swiz_wzyx(t23);
float4_t cof0, cof1, cof2, cof3;
const float4_t zero = float4_zero();
cof0 = float4_nmsub(t1, t23_yxwz, zero);
cof0 = float4_madd(t1, t23_wzyx, cof0);
cof1 = float4_nmsub(t0, t23_yxwz, zero);
cof1 = float4_madd(t0, t23_wzyx, cof1);
cof1 = float4_swiz_zwxy(cof1);
const float4_t t12 = float4_mul(t1, t2);
const float4_t t12_yxwz = float4_swiz_yxwz(t12);
const float4_t t12_wzyx = float4_swiz_wzyx(t12);
cof0 = float4_madd(t3, t12_yxwz, cof0);
cof0 = float4_nmsub(t3, t12_wzyx, cof0);
cof3 = float4_mul(t0, t12_yxwz);
cof3 = float4_nmsub(t0, t12_wzyx, cof3);
cof3 = float4_swiz_zwxy(cof3);
const float4_t t1_zwxy = float4_swiz_zwxy(t1);
const float4_t t2_zwxy = float4_swiz_zwxy(t2);
const float4_t t13 = float4_mul(t1_zwxy, t3);
const float4_t t13_yxwz = float4_swiz_yxwz(t13);
const float4_t t13_wzyx = float4_swiz_wzyx(t13);
cof0 = float4_madd(t2_zwxy, t13_yxwz, cof0);
cof0 = float4_nmsub(t2_zwxy, t13_wzyx, cof0);
cof2 = float4_mul(t0, t13_yxwz);
cof2 = float4_nmsub(t0, t13_wzyx, cof2);
cof2 = float4_swiz_zwxy(cof2);
const float4_t t01 = float4_mul(t0, t1);
const float4_t t01_yxwz = float4_swiz_yxwz(t01);
const float4_t t01_wzyx = float4_swiz_wzyx(t01);
cof2 = float4_nmsub(t3, t01_yxwz, cof2);
cof2 = float4_madd(t3, t01_wzyx, cof2);
cof3 = float4_madd(t2_zwxy, t01_yxwz, cof3);
cof3 = float4_nmsub(t2_zwxy, t01_wzyx, cof3);
const float4_t t03 = float4_mul(t0, t3);
const float4_t t03_yxwz = float4_swiz_yxwz(t03);
const float4_t t03_wzyx = float4_swiz_wzyx(t03);
cof1 = float4_nmsub(t2_zwxy, t03_yxwz, cof1);
cof1 = float4_madd(t2_zwxy, t03_wzyx, cof1);
cof2 = float4_madd(t1, t03_yxwz, cof2);
cof2 = float4_nmsub(t1, t03_wzyx, cof2);
const float4_t t02 = float4_mul(t0, t2_zwxy);
const float4_t t02_yxwz = float4_swiz_yxwz(t02);
const float4_t t02_wzyx = float4_swiz_wzyx(t02);
cof1 = float4_madd(t3, t02_yxwz, cof1);
cof1 = float4_nmsub(t3, t02_wzyx, cof1);
cof3 = float4_nmsub(t1, t02_yxwz, cof3);
cof3 = float4_madd(t1, t02_wzyx, cof3);
const float4_t det = float4_dot(t0, cof0);
const float4_t invdet = float4_rcp(det);
float4x4_t result;
result.col[0] = float4_mul(cof0, invdet);
result.col[1] = float4_mul(cof1, invdet);
result.col[2] = float4_mul(cof2, invdet);
result.col[3] = float4_mul(cof3, invdet);
return result;
}
} // namespace bx
#endif // __BX_FLOAT4X4_H__

71
include/bx/foreach.h Normal file
View File

@@ -0,0 +1,71 @@
/*
* Copyright 2010-2011 Branimir Karadzic. All rights reserved.
* License: http://www.opensource.org/licenses/BSD-2-Clause
*/
#ifndef __BX_FOREACH_H__
#define __BX_FOREACH_H__
#include "bx.h"
namespace bx
{
namespace foreach_ns
{
struct ContainerBase
{
};
template <typename Ty>
class Container : public ContainerBase
{
public:
inline Container(const Ty& _container)
: m_container(_container)
, m_break(0)
, m_it( _container.begin() )
, m_itEnd( _container.end() )
{
}
inline bool condition() const
{
return (!m_break++ && m_it != m_itEnd);
}
const Ty& m_container;
mutable int m_break;
mutable typename Ty::const_iterator m_it;
mutable typename Ty::const_iterator m_itEnd;
};
template <typename Ty>
inline Ty* pointer(const Ty&)
{
return 0;
}
template <typename Ty>
inline Container<Ty> containerNew(const Ty& _container)
{
return Container<Ty>(_container);
}
template <typename Ty>
inline const Container<Ty>* container(const ContainerBase* _base, const Ty*)
{
return static_cast<const Container<Ty>*>(_base);
}
} // namespace foreach_ns
#define foreach(_variable, _container) \
for (const bx::foreach_ns::ContainerBase &__temp_container__ = bx::foreach_ns::containerNew(_container); \
bx::foreach_ns::container(&__temp_container__, true ? 0 : bx::foreach_ns::pointer(_container))->condition(); \
++bx::foreach_ns::container(&__temp_container__, true ? 0 : bx::foreach_ns::pointer(_container))->m_it) \
for (_variable = *container(&__temp_container__, true ? 0 : bx::foreach_ns::pointer(_container))->m_it; \
bx::foreach_ns::container(&__temp_container__, true ? 0 : bx::foreach_ns::pointer(_container))->m_break; \
--bx::foreach_ns::container(&__temp_container__, true ? 0 : bx::foreach_ns::pointer(_container))->m_break)
} // namespace bx
#endif // __BX_FOREACH_H__

83
include/bx/handlealloc.h Normal file
View File

@@ -0,0 +1,83 @@
/*
* Copyright 2010-2011 Branimir Karadzic. All rights reserved.
* License: http://www.opensource.org/licenses/BSD-2-Clause
*/
#ifndef __BX_HANDLE_ALLOC_H__
#define __BX_HANDLE_ALLOC_H__
#include "bx.h"
namespace bx
{
class HandleAlloc
{
public:
static const uint16_t invalid = 0xffff;
HandleAlloc(uint16_t _maxHandles)
: m_dense(new uint16_t[_maxHandles*2])
, m_sparse(&m_dense[_maxHandles])
, m_numHandles(0)
, m_maxHandles(_maxHandles)
{
for (uint16_t ii = 0; ii < _maxHandles; ++ii)
{
m_dense[ii] = ii;
}
}
~HandleAlloc()
{
delete [] m_dense;
}
const uint16_t* getHandles() const
{
return m_dense;
}
uint16_t getNumHandles() const
{
return m_numHandles;
}
uint16_t getMaxHandles() const
{
return m_maxHandles;
}
uint16_t alloc()
{
if (m_numHandles < m_maxHandles)
{
uint16_t index = m_numHandles;
++m_numHandles;
uint16_t handle = m_dense[index];
m_sparse[handle] = index;
return handle;
}
return invalid;
}
void free(uint16_t _handle)
{
uint16_t index = m_sparse[_handle];
--m_numHandles;
uint16_t temp = m_dense[m_numHandles];
m_dense[m_numHandles] = _handle;
m_sparse[temp] = index;
m_dense[index] = temp;
}
private:
uint16_t* m_dense;
uint16_t* m_sparse;
uint16_t m_numHandles;
uint16_t m_maxHandles;
};
} // namespace bx
#endif // __HANDLE_ALLOC_H__

90
include/bx/hash.h Normal file
View File

@@ -0,0 +1,90 @@
/*
* Copyright 2010-2011 Branimir Karadzic. All rights reserved.
* License: http://www.opensource.org/licenses/BSD-2-Clause
*/
#ifndef __BX_HASH_H__
#define __BX_HASH_H__
#include "bx.h"
namespace bx
{
// MurmurHash2 was written by Austin Appleby, and is placed in the public
// domain. The author hereby disclaims copyright to this source code.
#define MURMUR_M 0x5bd1e995
#define MURMUR_R 24
#define mmix(_h, _k) { _k *= MURMUR_M; _k ^= _k >> MURMUR_R; _k *= MURMUR_M; _h *= MURMUR_M; _h ^= _k; }
class HashMurmur2A
{
public:
void begin(uint32_t _seed = 0)
{
m_hash = _seed;
m_tail = 0;
m_count = 0;
m_size = 0;
}
void add(const void* _data, int _len)
{
const uint8_t* data = (uint8_t*)_data;
m_size += _len;
mixTail(data, _len);
while(_len >= 4)
{
uint32_t kk = *(uint32_t*)data;
mmix(m_hash, kk);
data += 4;
_len -= 4;
}
mixTail(data, _len);
}
uint32_t end()
{
mmix(m_hash, m_tail);
mmix(m_hash, m_size);
m_hash ^= m_hash >> 13;
m_hash *= MURMUR_M;
m_hash ^= m_hash >> 15;
return m_hash;
}
private:
void mixTail(const uint8_t*& _data, int& _len)
{
while( _len && ((_len<4) || m_count) )
{
m_tail |= (*_data++) << (m_count * 8);
m_count++;
_len--;
if(m_count == 4)
{
mmix(m_hash, m_tail);
m_tail = 0;
m_count = 0;
}
}
}
uint32_t m_hash;
uint32_t m_tail;
uint32_t m_count;
uint32_t m_size;
};
} // namespace bx
#endif // __BX_HASH_H__

62
include/bx/macros.h Normal file
View File

@@ -0,0 +1,62 @@
/*
* Copyright 2010-2011 Branimir Karadzic. All rights reserved.
* License: http://www.opensource.org/licenses/BSD-2-Clause
*/
#ifndef __BX_MACROS_H__
#define __BX_MACROS_H__
#include "bx.h"
#define BX_VA_ARGS_COUNT_DETAIL(_a1, _a2, _a3, _a4, _a5, _a6, _a7, _a8, _a9, _a10, _a11, _a12, _a13, _a14, _a15, _a16, _last, ...) _last
#define BX_VA_ARGS_COUNT(...) BX_VA_ARGS_COUNT_DETAIL(__VA_ARGS__, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1)
#define BX_MACRO_DISPATCHER_DETAIL1(_func, _argCount) _func ## _argCount
#define BX_MACRO_DISPATCHER_DETAIL2(_func, _argCount) BX_MACRO_DISPATCHER_DETAIL1(_func, _argCount)
#define BX_MACRO_DISPATCHER(_func, ...) BX_MACRO_DISPATCHER_DETAIL2(_func, VA_ARGS_COUNT(__VA_ARGS__) )
#define BX_STRINGIZE(_x) BX_STRINGIZE_(_x)
#define BX_STRINGIZE_(_x) #_x
#define BX_FILE_LINE_LITERAL "" __FILE__ "(" BX_STRINGIZE(__LINE__) "): "
#define BX_ALIGN_MASK(_value, _mask) ( ( (_value)+(_mask) ) & ( (~0)&(~(_mask) ) ) )
#define BX_ALIGN_16(_value) BX_ALIGN_MASK(_value, 0xf)
#define BX_ALIGN_256(_value) BX_ALIGN_MASK(_value, 0xff)
#if BX_COMPILER_GCC || BX_COMPILER_CLANG
# define BX_ALIGN_STRUCT(_align, _struct) _struct __attribute__( (aligned(_align) ) )
# define BX_FUNCTION __PRETTY_FUNCTION__
# define BX_NO_INLINE __attribute__( (noinline) )
# define BX_FORCE_INLINE __extension__ static __inline __attribute__( (__always_inline__) )
# if BX_COMPILER_CLANG
# define BX_THREAD /* not supported right now */
# else
# define BX_THREAD __thread
# endif // BX_COMPILER_CLANG
#elif BX_COMPILER_MSVC
# define BX_ALIGN_STRUCT(_align, _struct) __declspec(align(_align) ) _struct
# define BX_FUNCTION __FUNCTION__
# define BX_NO_INLINE __declspec(noinline)
# define BX_FORCE_INLINE __forceinline
# define BX_THREAD __declspec(thread)
#else
# error "Unknown BX_COMPILER_?"
#endif
#define BX_ALIGN_STRUCT_16(_struct) BX_ALIGN_STRUCT(16, _struct)
#define BX_ALIGN_STRUCT_256(_struct) BX_ALIGN_STRUCT(256, _struct)
#ifndef BX_CHECK
# define BX_CHECK(...) do {} while(0)
#endif // BX_CHECK
#ifndef BX_TRACE
# define BX_TRACE(...) do {} while(0)
#endif // BX_TRACE
#ifndef BX_CONFIG_SPSCQUEUE_USE_NAIVE
# define BX_CONFIG_SPSCQUEUE_USE_NAIVE 0
#endif // BX_CONFIG_SPSCQUEUE_USE_NAIVE
#endif // __BX_MACROS_H__

29
include/bx/maputil.h Normal file
View File

@@ -0,0 +1,29 @@
/*
* Copyright 2010-2011 Branimir Karadzic. All rights reserved.
* License: http://www.opensource.org/licenses/BSD-2-Clause
*/
#ifndef __BX_MAPUTIL_H__
#define __BX_MAPUTIL_H__
#include "bx.h"
namespace bx
{
template<typename MapType>
typename MapType::iterator mapInsertOrUpdate(MapType& _map, const typename MapType::key_type& _key, const typename MapType::mapped_type& _value)
{
typename MapType::iterator it = _map.lower_bound(_key);
if (it != _map.end()
&& !_map.key_comp()(_key, it->first) )
{
it->second = _value;
return it;
}
typename MapType::value_type pair(_key, _value);
return _map.insert(it, pair);
}
} // namespace bx
#endif // __BX_MAPUTIL_H__

171
include/bx/mutex.h Normal file
View File

@@ -0,0 +1,171 @@
/*
* Copyright 2010-2012 Branimir Karadzic. All rights reserved.
* License: http://www.opensource.org/licenses/BSD-2-Clause
*/
#ifndef __BX_MUTEX_H__
#define __BX_MUTEX_H__
#include "bx.h"
#include "cpu.h"
#include "sem.h"
#if BX_PLATFORM_NACL || BX_PLATFORM_LINUX || BX_PLATFORM_ANDROID
# include <pthread.h>
#elif BX_PLATFORM_WINDOWS || BX_PLATFORM_XBOX360
# include <errno.h>
#endif // BX_PLATFORM_
namespace bx
{
#if BX_PLATFORM_WINDOWS || BX_PLATFORM_XBOX360
typedef CRITICAL_SECTION pthread_mutex_t;
typedef unsigned pthread_mutexattr_t;
inline int pthread_mutex_lock(pthread_mutex_t* _mutex)
{
EnterCriticalSection(_mutex);
return 0;
}
inline int pthread_mutex_unlock(pthread_mutex_t* _mutex)
{
LeaveCriticalSection(_mutex);
return 0;
}
inline int pthread_mutex_trylock(pthread_mutex_t* _mutex)
{
return TryEnterCriticalSection(_mutex) ? 0 : EBUSY;
}
inline int pthread_mutex_init(pthread_mutex_t* _mutex, pthread_mutexattr_t* /*_attr*/)
{
InitializeCriticalSection(_mutex);
return 0;
}
inline int pthread_mutex_destroy(pthread_mutex_t* _mutex)
{
DeleteCriticalSection(_mutex);
return 0;
}
#endif // BX_PLATFORM_
class Mutex
{
public:
Mutex()
{
pthread_mutex_init(&m_handle, NULL);
}
~Mutex()
{
pthread_mutex_destroy(&m_handle);
}
void lock()
{
pthread_mutex_lock(&m_handle);
}
void unlock()
{
pthread_mutex_unlock(&m_handle);
}
private:
Mutex(const Mutex& _rhs); // no copy constructor
Mutex& operator=(const Mutex& _rhs); // no assignment operator
pthread_mutex_t m_handle;
};
class MutexScope
{
public:
MutexScope(Mutex& _mutex)
: m_mutex(_mutex)
{
m_mutex.lock();
}
~MutexScope()
{
m_mutex.unlock();
}
private:
MutexScope(); // no default constructor
MutexScope(const MutexScope& _rhs); // no copy constructor
MutexScope& operator=(const MutexScope& _rhs); // no assignment operator
Mutex& m_mutex;
};
#if 1
typedef Mutex LwMutex;
#else
class LwMutex
{
public:
LwMutex()
: m_count(0)
{
}
~LwMutex()
{
}
void lock()
{
if (atomicIncr(&m_count) > 1)
{
m_sem.wait();
}
}
void unlock()
{
if (atomicDecr(&m_count) > 0)
{
m_sem.post();
}
}
private:
LwMutex(const LwMutex& _rhs); // no copy constructor
LwMutex& operator=(const LwMutex& _rhs); // no assignment operator
Semaphore m_sem;
volatile int32_t m_count;
};
#endif // 0
class LwMutexScope
{
public:
LwMutexScope(LwMutex& _mutex)
: m_mutex(_mutex)
{
m_mutex.lock();
}
~LwMutexScope()
{
m_mutex.unlock();
}
private:
LwMutexScope(); // no default constructor
LwMutexScope(const LwMutexScope& _rhs); // no copy constructor
LwMutexScope& operator=(const LwMutexScope& _rhs); // no assignment operator
LwMutex& m_mutex;
};
} // namespace bx
#endif // __BX_MUTEX_H__

46
include/bx/os.h Normal file
View File

@@ -0,0 +1,46 @@
/*
* Copyright 2010-2011 Branimir Karadzic. All rights reserved.
* License: http://www.opensource.org/licenses/BSD-2-Clause
*/
#ifndef __BX_OS_H__
#define __BX_OS_H__
#include "bx.h"
#if BX_PLATFORM_NACL || BX_PLATFORM_ANDROID || BX_PLATFORM_LINUX
# include <sched.h> // sched_yield
# if BX_PLATFORM_NACL
# include <sys/nacl_syscalls.h> // nanosleep
# else
# include <time.h> // nanosleep
# endif // BX_PLATFORM_NACL
#endif // BX_PLATFORM_
namespace bx
{
inline void sleep(uint32_t _ms)
{
#if BX_PLATFORM_WINDOWS || BX_PLATFORM_XBOX360
Sleep(_ms);
#else
timespec req = {_ms/1000, (_ms%1000)*1000000};
timespec rem = {0, 0};
nanosleep(&req, &rem);
#endif // BX_PLATFORM_
}
inline void yield()
{
#if BX_PLATFORM_WINDOWS
SwitchToThread();
#elif BX_PLATFORM_XBOX360
Sleep(0);
#else
sched_yield();
#endif // BX_PLATFORM_
}
} // namespace bx
#endif // __BX_OS_H__

86
include/bx/platform.h Normal file
View File

@@ -0,0 +1,86 @@
/*
* Copyright 2010-2012 Branimir Karadzic. All rights reserved.
* License: http://www.opensource.org/licenses/BSD-2-Clause
*/
#ifndef __BX_PLATFORM_H__
#define __BX_PLATFORM_H__
#define BX_COMPILER_CLANG 0
#define BX_COMPILER_GCC 0
#define BX_COMPILER_MSVC 0
#define BX_PLATFORM_ANDROID 0
#define BX_PLATFORM_LINUX 0
#define BX_PLATFORM_NACL 0
#define BX_PLATFORM_WINDOWS 0
#define BX_PLATFORM_XBOX360 0
#define BX_CPU_ARM 0
#define BX_CPU_PPC 0
#define BX_CPU_X86 0
#define BX_CPU_ENDIAN_BIG 0
#define BX_CPU_ENDIAN_LITTLE 0
// http://sourceforge.net/apps/mediawiki/predef/index.php?title=Compilers
#if defined(_MSC_VER)
# undef BX_COMPILER_MSVC
# define BX_COMPILER_MSVC 1
#elif defined(__clang__)
// clang defines __GNUC__
# undef BX_COMPILER_CLANG
# define BX_COMPILER_CLANG 1
#elif defined(__GNUC__)
# undef BX_COMPILER_GCC
# define BX_COMPILER_GCC 1
#else
# error "BX_COMPILER_* is not defined!"
#endif //
// http://sourceforge.net/apps/mediawiki/predef/index.php?title=Operating_Systems
#if defined(_XBOX_VER)
# undef BX_PLATFORM_XBOX360
# define BX_PLATFORM_XBOX360 1
#elif defined(_WIN32) || defined(_WIN64)
# undef BX_PLATFORM_WINDOWS
# define BX_PLATFORM_WINDOWS 1
#elif defined(__native_client__)
// NaCl compiler defines __linux__
# undef BX_PLATFORM_NACL
# define BX_PLATFORM_NACL 1
#elif defined(__ANDROID__)
// Android compiler defines __linux__
# undef BX_PLATFORM_ANDROID
# define BX_PLATFORM_ANDROID 1
#elif defined(__linux__)
# undef BX_PLATFORM_LINUX
# define BX_PLATFORM_LINUX 1
#else
# error "BX_PLATFORM_* is not defined!"
#endif //
// http://sourceforge.net/apps/mediawiki/predef/index.php?title=Architectures
#if defined(__arm__)
# undef BX_CPU_ARM
# define BX_CPU_ARM 1
# define BX_CACHE_LINE_SIZE 64
#elif defined(_M_PPC) || defined(__powerpc__) || defined(__powerpc64__)
# undef BX_CPU_PPC
# define BX_CPU_PPC 1
# define BX_CACHE_LINE_SIZE 128
#elif defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || defined(__x86_64__)
# undef BX_CPU_X86
# define BX_CPU_X86 1
# define BX_CACHE_LINE_SIZE 64
#endif //
#if BX_CPU_PPC
# undef BX_CPU_ENDIAN_BIG
# define BX_CPU_ENDIAN_BIG 1
#else
# undef BX_CPU_ENDIAN_LITTLE
# define BX_CPU_ENDIAN_LITTLE 1
#endif // BX_PLATFORM_
#endif // __BX_PLATFORM_H__

313
include/bx/ringbuffer.h Normal file
View File

@@ -0,0 +1,313 @@
/*
* Copyright 2010-2011 Branimir Karadzic. All rights reserved.
* License: http://www.opensource.org/licenses/BSD-2-Clause
*/
#ifndef __BX_RINGBUFFER_H__
#define __BX_RINGBUFFER_H__
#include "bx.h"
#include "cpu.h"
#include "uint32_t.h"
namespace bx
{
class RingBufferControl
{
public:
RingBufferControl(uint32_t _size)
: m_size(_size)
, m_current(0)
, m_write(0)
, m_read(0)
{
}
~RingBufferControl()
{
}
uint32_t available() const
{
return distance(m_read, m_current);
}
uint32_t consume(uint32_t _size) // consumer only
{
const uint32_t maxSize = distance(m_read, m_current);
const uint32_t sizeNoSign = uint32_and(_size, 0x7FFFFFFF);
const uint32_t test = uint32_sub(sizeNoSign, maxSize);
const uint32_t size = uint32_sels(test, _size, maxSize);
const uint32_t advance = uint32_add(m_read, size);
const uint32_t read = uint32_mod(advance, m_size);
m_read = read;
return size;
}
uint32_t reserve(uint32_t _size) // producer only
{
const uint32_t dist = distance(m_write, m_read)-1;
const uint32_t maxSize = uint32_sels(dist, m_size-1, dist);
const uint32_t sizeNoSign = uint32_and(_size, 0x7FFFFFFF);
const uint32_t test = uint32_sub(sizeNoSign, maxSize);
const uint32_t size = uint32_sels(test, _size, maxSize);
const uint32_t advance = uint32_add(m_write, size);
const uint32_t write = uint32_mod(advance, m_size);
m_write = write;
return size;
}
uint32_t commit(uint32_t _size) // producer only
{
const uint32_t maxSize = distance(m_current, m_write);
const uint32_t sizeNoSign = uint32_and(_size, 0x7FFFFFFF);
const uint32_t test = uint32_sub(sizeNoSign, maxSize);
const uint32_t size = uint32_sels(test, _size, maxSize);
const uint32_t advance = uint32_add(m_current, size);
const uint32_t current = uint32_mod(advance, m_size);
m_current = current;
return size;
}
uint32_t distance(uint32_t _from, uint32_t _to) const // both
{
const uint32_t diff = uint32_sub(_to, _from);
const uint32_t le = uint32_add(m_size, diff);
const uint32_t result = uint32_sels(diff, le, diff);
return result;
}
const uint32_t m_size;
uint32_t m_current;
uint32_t m_write;
uint32_t m_read;
};
class SpScRingBufferControl
{
public:
SpScRingBufferControl(uint32_t _size)
: m_size(_size)
, m_current(0)
, m_write(0)
, m_read(0)
{
}
~SpScRingBufferControl()
{
}
uint32_t available() const
{
return distance(m_read, m_current);
}
uint32_t consume(uint32_t _size) // consumer only
{
const uint32_t maxSize = distance(m_read, m_current);
const uint32_t sizeNoSign = uint32_and(_size, 0x7FFFFFFF);
const uint32_t test = uint32_sub(sizeNoSign, maxSize);
const uint32_t size = uint32_sels(test, _size, maxSize);
const uint32_t advance = uint32_add(m_read, size);
const uint32_t read = uint32_mod(advance, m_size);
m_read = read;
return size;
}
uint32_t reserve(uint32_t _size) // producer only
{
const uint32_t dist = distance(m_write, m_read)-1;
const uint32_t maxSize = uint32_sels(dist, m_size-1, dist);
const uint32_t sizeNoSign = uint32_and(_size, 0x7FFFFFFF);
const uint32_t test = uint32_sub(sizeNoSign, maxSize);
const uint32_t size = uint32_sels(test, _size, maxSize);
const uint32_t advance = uint32_add(m_write, size);
const uint32_t write = uint32_mod(advance, m_size);
m_write = write;
return size;
}
uint32_t commit(uint32_t _size) // producer only
{
const uint32_t maxSize = distance(m_current, m_write);
const uint32_t sizeNoSign = uint32_and(_size, 0x7FFFFFFF);
const uint32_t test = uint32_sub(sizeNoSign, maxSize);
const uint32_t size = uint32_sels(test, _size, maxSize);
const uint32_t advance = uint32_add(m_current, size);
const uint32_t current = uint32_mod(advance, m_size);
// must commit all memory writes before moving m_current pointer
// once m_current pointer moves data is used by consumer thread
memoryBarrier();
m_current = current;
return size;
}
uint32_t distance(uint32_t _from, uint32_t _to) const // both
{
const uint32_t diff = uint32_sub(_to, _from);
const uint32_t le = uint32_add(m_size, diff);
const uint32_t result = uint32_sels(diff, le, diff);
return result;
}
const uint32_t m_size;
uint32_t m_current;
uint32_t m_write;
uint32_t m_read;
};
template <typename Control>
class ReadRingBufferT
{
public:
ReadRingBufferT(Control& _control, const char* _buffer, uint32_t _size)
: m_control(_control)
, m_read(_control.m_read)
, m_end(m_read+_size)
, m_size(_size)
, m_buffer(_buffer)
{
BX_CHECK(_control.available() >= _size, "%d >= %d", _control.available(), _size);
}
~ReadRingBufferT()
{
}
void end()
{
m_control.consume(m_size);
}
void read(char* _data, uint32_t _len)
{
const uint32_t end = (m_read + _len) % m_control.m_size;
uint32_t wrap = 0;
const char* from = &m_buffer[m_read];
if (end < m_read)
{
wrap = m_control.m_size - m_read;
memcpy(_data, from, wrap);
_data += wrap;
from = (const char*)&m_buffer[0];
}
memcpy(_data, from, _len-wrap);
m_read = end;
}
void skip(uint32_t _len)
{
m_read += _len;
m_read %= m_control.m_size;
}
private:
template <typename Ty>
friend class WriteRingBufferT;
ReadRingBufferT();
ReadRingBufferT(const Control&);
void operator=(const Control&);
Control& m_control;
uint32_t m_read;
uint32_t m_end;
const uint32_t m_size;
const char* m_buffer;
};
typedef ReadRingBufferT<RingBufferControl> ReadRingBuffer;
typedef ReadRingBufferT<SpScRingBufferControl> SpScReadRingBuffer;
template <typename Control>
class WriteRingBufferT
{
public:
WriteRingBufferT(Control& _control, char* _buffer, uint32_t _size)
: m_control(_control)
, m_size(_size)
, m_buffer(_buffer)
{
uint32_t size = m_control.reserve(_size);
BX_CHECK(size == _size, "%d == %d", size, _size);
m_write = m_control.m_current;
m_end = m_write+_size;
}
~WriteRingBufferT()
{
}
void end()
{
m_control.commit(m_size);
}
void write(const char* _data, uint32_t _len)
{
const uint32_t end = (m_write + _len) % m_control.m_size;
uint32_t wrap = 0;
char* to = &m_buffer[m_write];
if (end < m_write)
{
wrap = m_control.m_size - m_write;
memcpy(to, _data, wrap);
_data += wrap;
to = (char*)&m_buffer[0];
}
memcpy(to, _data, _len-wrap);
m_write = end;
}
void write(ReadRingBufferT<Control>& _read, uint32_t _len)
{
const uint32_t end = (_read.m_read + _len) % _read.m_control.m_size;
uint32_t wrap = 0;
const char* from = &_read.m_buffer[_read.m_read];
if (end < _read.m_read)
{
wrap = _read.m_control.m_size - _read.m_read;
write(from, wrap);
from = (const char*)&_read.m_buffer[0];
}
write(from, _len-wrap);
_read.m_read = end;
}
void skip(uint32_t _len)
{
m_write += _len;
m_write %= m_control.m_size;
}
private:
WriteRingBufferT();
WriteRingBufferT(const WriteRingBufferT<Control>&);
void operator=(const WriteRingBufferT<Control>&);
Control& m_control;
uint32_t m_write;
uint32_t m_end;
const uint32_t m_size;
char* m_buffer;
};
typedef WriteRingBufferT<RingBufferControl> WriteRingBuffer;
typedef WriteRingBufferT<SpScRingBufferControl> SpScWriteRingBuffer;
} // namespace bx
#endif // __BX_RINGBUFFER_H__

97
include/bx/rng.h Normal file
View File

@@ -0,0 +1,97 @@
/*
* Copyright 2010-2011 Branimir Karadzic. All rights reserved.
* License: http://www.opensource.org/licenses/BSD-2-Clause
*/
#ifndef __BX_RNG_H__
#define __BX_RNG_H__
#include "bx.h"
namespace bx
{
// George Marsaglia's MWC
class RngMwc
{
public:
RngMwc(uint32_t _z = 12345, uint32_t _w = 65435)
: m_z(_z)
, m_w(_w)
{
}
void reset(uint32_t _z = 12345, uint32_t _w = 65435)
{
m_z = _z;
m_w = _w;
}
uint32_t gen()
{
m_z = 36969*(m_z&65535)+(m_z>>16);
m_w = 18000*(m_w&65535)+(m_w>>16);
return (m_z<<16)+m_w;
}
private:
uint32_t m_z;
uint32_t m_w;
};
// George Marsaglia's FIB
class RngFib
{
public:
RngFib()
: m_a(9983651)
, m_b(95746118)
{
}
void reset()
{
m_a = 9983651;
m_b = 95746118;
}
uint32_t gen()
{
m_b = m_a+m_b;
m_a = m_b-m_a;
return m_a;
}
private:
uint32_t m_a;
uint32_t m_b;
};
// George Marsaglia's SHR3
class RngShr3
{
public:
RngShr3(uint32_t _jsr = 34221)
: m_jsr(_jsr)
{
}
void reset(uint32_t _jsr = 34221)
{
m_jsr = _jsr;
}
uint32_t gen()
{
m_jsr ^= m_jsr<<17;
m_jsr ^= m_jsr>>13;
m_jsr ^= m_jsr<<5;
return m_jsr;
}
private:
uint32_t m_jsr;
};
} // namespace bx
#endif // __BX_RNG_H__

107
include/bx/sem.h Normal file
View File

@@ -0,0 +1,107 @@
/*
* Copyright 2010-2011 Branimir Karadzic. All rights reserved.
* License: http://www.opensource.org/licenses/BSD-2-Clause
*/
#ifndef __BX_SEM_H__
#define __BX_SEM_H__
#include "bx.h"
#define BX_SEM_CONFIG_POSIX (BX_PLATFORM_NACL || BX_PLATFORM_ANDROID || BX_PLATFORM_LINUX)
#if BX_SEM_CONFIG_POSIX
# include <semaphore.h>
# include <time.h>
#elif BX_PLATFORM_WINDOWS || BX_PLATFORM_XBOX360
# include <limits.h>
#endif // BX_PLATFORM_
namespace bx
{
#if BX_SEM_CONFIG_POSIX
class Semaphore
{
public:
Semaphore()
{
sem_init(&m_handle, 0, 0);
}
~Semaphore()
{
sem_destroy(&m_handle);
}
void post(uint32_t _count = 1)
{
for (uint32_t ii = 0; ii < _count; ++ii)
{
sem_post(&m_handle);
}
}
bool wait(int32_t _msecs = -1)
{
#if BX_PLATFORM_NACL
BX_CHECK(-1 == _msecs, "NaCl doesn't support sem_timedwait at this moment.");
return 0 == sem_wait(&m_handle);
#else
if (0 > _msecs)
{
return 0 == sem_wait(&m_handle);
}
timespec ts;
ts.tv_sec = _msecs/1000;
ts.tv_nsec = (_msecs%1000)*1000;
return 0 == sem_timedwait(&m_handle, &ts);
#endif // BX_PLATFORM_
}
private:
Semaphore(const Semaphore& _rhs); // no copy constructor
Semaphore& operator=(const Semaphore& _rhs); // no assignment operator
sem_t m_handle;
};
#elif BX_PLATFORM_WINDOWS || BX_PLATFORM_XBOX360
class Semaphore
{
public:
Semaphore()
{
m_handle = CreateSemaphore(NULL, 0, LONG_MAX, NULL);
BX_CHECK(NULL != m_handle, "Failed to create Semaphore!");
}
~Semaphore()
{
CloseHandle(m_handle);
}
void post(uint32_t _count = 1) const
{
ReleaseSemaphore(m_handle, _count, NULL);
}
bool wait(int32_t _msecs = -1) const
{
DWORD milliseconds = (0 > _msecs) ? INFINITE : _msecs;
return WAIT_OBJECT_0 == WaitForSingleObject(m_handle, milliseconds);
}
private:
Semaphore(const Semaphore& _rhs); // no copy constructor
Semaphore& operator=(const Semaphore& _rhs); // no assignment operator
HANDLE m_handle;
};
#endif // BX_PLATFORM_
} // namespace bx
#endif // __BX_SEM_H__

152
include/bx/spscqueue.h Normal file
View File

@@ -0,0 +1,152 @@
/*
* Copyright 2010-2011 Branimir Karadzic. All rights reserved.
* License: http://www.opensource.org/licenses/BSD-2-Clause
*/
#ifndef __BX_SPSCQUEUE_H__
#define __BX_SPSCQUEUE_H__
#include <list>
#include "bx.h"
#include "cpu.h"
#include "mutex.h"
#include "uint32_t.h"
namespace bx
{
// http://drdobbs.com/article/print?articleId=210604448&siteSectionName=
template <typename Ty>
class SpScUnboundedQueueOptimized
{
public:
SpScUnboundedQueueOptimized()
: m_first(new Node(NULL) )
, m_divider(m_first)
, m_last(m_first)
{
}
~SpScUnboundedQueueOptimized()
{
while (NULL != m_first)
{
Node* node = m_first;
m_first = node->m_next;
delete node;
}
}
void push(Ty* _ptr) // producer only
{
m_last->m_next = new Node(_ptr);
atomicExchangePtr((void**)&m_last, m_last->m_next);
while (m_first != m_divider)
{
Node* node = m_first;
m_first = m_first->m_next;
delete node;
}
}
Ty* peek() // consumer only
{
if (m_divider != m_last)
{
Ty* ptr = m_divider->m_next->m_ptr;
return ptr;
}
return NULL;
}
Ty* pop() // consumer only
{
if (m_divider != m_last)
{
Ty* ptr = m_divider->m_next->m_ptr;
atomicExchangePtr((void**)&m_divider, m_divider->m_next);
return ptr;
}
return NULL;
}
private:
SpScUnboundedQueueOptimized(const SpScUnboundedQueueOptimized& _rhs); // no copy constructor
SpScUnboundedQueueOptimized& operator=(const SpScUnboundedQueueOptimized& _rhs); // no assignment operator
struct Node
{
Node(Ty* _ptr)
: m_ptr(_ptr)
, m_next(NULL)
{
}
Ty* m_ptr;
Node* m_next;
};
Node* m_first;
Node* m_divider;
Node* m_last;
};
template<typename Ty>
class SpScUnboundedQueueNaive
{
public:
SpScUnboundedQueueNaive()
{
}
~SpScUnboundedQueueNaive()
{
BX_CHECK(m_queue.empty(), "Queue is not empty!");
}
void push(Ty* _item)
{
bx::LwMutexScope lock(m_mutex);
m_queue.push_back(_item);
}
Ty* peek()
{
bx::LwMutexScope lock(m_mutex);
if (!m_queue.empty() )
{
return m_queue.front();
}
return NULL;
}
Ty* pop()
{
bx::LwMutexScope lock(m_mutex);
if (!m_queue.empty() )
{
Ty* item = m_queue.front();
m_queue.pop_front();
return item;
}
return NULL;
}
private:
bx::LwMutex m_mutex;
std::list<Ty*> m_queue;
};
#if BX_CONFIG_SPSCQUEUE_USE_NAIVE
# define SpScUnboundedQueue SpScUnboundedQueueNaive
#else
# define SpScUnboundedQueue SpScUnboundedQueueOptimized
#endif // BX_CONFIG_NAIVE
} // namespace bx
#endif // __BX_RINGBUFFER_H__

53
include/bx/timer.h Normal file
View File

@@ -0,0 +1,53 @@
/*
* Copyright 2010-2011 Branimir Karadzic. All rights reserved.
* License: http://www.opensource.org/licenses/BSD-2-Clause
*/
#ifndef __BX_TIMER_H__
#define __BX_TIMER_H__
#include "bx.h"
#if BX_PLATFORM_ANDROID
# include <time.h> // clock, clock_gettime
#elif BX_PLATFORM_NACL | BX_PLATFORM_LINUX
# include <sys/time.h> // gettimeofday
#endif // BX_PLATFORM_
namespace bx
{
inline int64_t getHPCounter()
{
#if BX_PLATFORM_WINDOWS || BX_PLATFORM_XBOX360
LARGE_INTEGER li;
// Performance counter value may unexpectedly leap forward
// http://support.microsoft.com/kb/274323
QueryPerformanceCounter(&li);
int64_t i64 = li.QuadPart;
#elif BX_PLATFORM_ANDROID
int64_t i64 = clock();
#else
struct timeval now;
gettimeofday(&now, 0);
int64_t i64 = now.tv_sec*1000000 + now.tv_usec;
#endif // BNET_PLATFORM_
static int64_t offset = i64;
return i64 - offset;
}
inline int64_t getHPFrequency()
{
#if BX_PLATFORM_WINDOWS || BX_PLATFORM_XBOX360
LARGE_INTEGER li;
QueryPerformanceFrequency(&li);
return li.QuadPart;
#elif BX_PLATFORM_ANDROID
return CLOCKS_PER_SEC;
#else
return 1000000;
#endif // BNET_PLATFORM_
}
} // namespace bx
#endif // __BX_TIMER_H__

454
include/bx/uint32_t.h Normal file
View File

@@ -0,0 +1,454 @@
/*
* Copyright 2010-2012 Branimir Karadzic. All rights reserved.
* License: http://www.opensource.org/licenses/BSD-2-Clause
*/
// Copyright 2006 Mike Acton <macton@gmail.com>
//
// Permission is hereby granted, free of charge, to any person obtaining a
// copy of this software and associated documentation files (the "Software"),
// to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included
// in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE
#ifndef __BX_UINT32_T_H__
#define __BX_UINT32_T_H__
#include "bx.h"
#if BX_COMPILER_MSVC
# if BX_PLATFORM_WINDOWS
# include <math.h> // math.h is included because VS bitches:
// warning C4985: 'ceil': attributes not present on previous declaration.
// must be included before intrin.h.
# include <intrin.h>
# pragma intrinsic(_BitScanForward)
# pragma intrinsic(_BitScanReverse)
# endif // BX_PLATFORM_WINDOWS
#endif // BX_COMPILER_MSVC
namespace bx
{
inline uint32_t uint32_li(uint32_t _a)
{
return _a;
}
inline uint32_t uint32_dec(uint32_t _a)
{
return _a - 1;
}
inline uint32_t uint32_inc(uint32_t _a)
{
return _a + 1;
}
inline uint32_t uint32_not(uint32_t _a)
{
return ~_a;
}
inline uint32_t uint32_neg(uint32_t _a)
{
return -(int32_t)_a;
}
inline uint32_t uint32_ext(uint32_t _a)
{
return ( (int32_t)_a)>>31;
}
inline uint32_t uint32_and(uint32_t _a, uint32_t _b)
{
return _a & _b;
}
inline uint32_t uint32_xor(uint32_t _a, uint32_t _b)
{
return _a ^ _b;
}
inline uint32_t uint32_xorl(uint32_t _a, uint32_t _b)
{
return !_a != !_b;
}
inline uint32_t uint32_andc(uint32_t _a, uint32_t _b)
{
return _a & ~_b;
}
inline uint32_t uint32_or(uint32_t _a, uint32_t _b)
{
return _a | _b;
}
inline uint32_t uint32_sll(uint32_t _a, int _sa)
{
return _a << _sa;
}
inline uint32_t uint32_srl(uint32_t _a, int _sa)
{
return _a >> _sa;
}
inline uint32_t uint32_sra(uint32_t _a, int _sa)
{
return ( (int32_t)_a) >> _sa;
}
inline uint32_t uint32_rol(uint32_t _a, int _sa)
{
return ( _a << _sa) | (_a >> (32-_sa) );
}
inline uint32_t uint32_ror(uint32_t _a, int _sa)
{
return ( _a >> _sa) | (_a << (32-_sa) );
}
inline uint32_t uint32_add(uint32_t _a, uint32_t _b)
{
return _a + _b;
}
inline uint32_t uint32_sub(uint32_t _a, uint32_t _b)
{
return _a - _b;
}
inline uint32_t uint32_mul(uint32_t _a, uint32_t _b)
{
return _a * _b;
}
inline uint32_t uint32_div(uint32_t _a, uint32_t _b)
{
return (_a / _b);
}
inline uint32_t uint32_mod(uint32_t _a, uint32_t _b)
{
return (_a % _b);
}
inline uint32_t uint32_cmpeq(uint32_t _a, uint32_t _b)
{
return -(_a == _b);
}
inline uint32_t uint32_cmpneq(uint32_t _a, uint32_t _b)
{
return -(_a != _b);
}
inline uint32_t uint32_cmplt(uint32_t _a, uint32_t _b)
{
return -(_a < _b);
}
inline uint32_t uint32_cmple(uint32_t _a, uint32_t _b)
{
return -(_a <= _b);
}
inline uint32_t uint32_cmpgt(uint32_t _a, uint32_t _b)
{
return -(_a > _b);
}
inline uint32_t uint32_cmpge(uint32_t _a, uint32_t _b)
{
return -(_a >= _b);
}
inline uint32_t uint32_setnz(uint32_t _a)
{
return -!!_a;
}
inline uint32_t uint32_satadd(uint32_t _a, uint32_t _b)
{
const uint32_t add = uint32_add(_a, _b);
const uint32_t lt = uint32_cmplt(add, _a);
const uint32_t result = uint32_or(add, lt);
return result;
}
inline uint32_t uint32_satsub(uint32_t _a, uint32_t _b)
{
const uint32_t sub = uint32_sub(_a, _b);
const uint32_t le = uint32_cmple(sub, _a);
const uint32_t result = uint32_and(sub, le);
return result;
}
inline uint32_t uint32_satmul(uint32_t _a, uint32_t _b)
{
const uint64_t mul = (uint64_t)_a * (uint64_t)_b;
const uint32_t hi = mul >> 32;
const uint32_t nz = uint32_setnz(hi);
const uint32_t result = uint32_or(uint32_t(mul), nz);
return result;
}
inline uint32_t uint32_sels(uint32_t test, uint32_t _a, uint32_t _b)
{
const uint32_t mask = uint32_ext(test);
const uint32_t sel_a = uint32_and(_a, mask);
const uint32_t sel_b = uint32_andc(_b, mask);
const uint32_t result = uint32_or(sel_a, sel_b);
return (result);
}
inline uint32_t uint32_selb(uint32_t _mask, uint32_t _a, uint32_t _b)
{
const uint32_t sel_a = uint32_and(_a, _mask);
const uint32_t sel_b = uint32_andc(_b, _mask);
const uint32_t result = uint32_or(sel_a, sel_b);
return (result);
}
inline uint32_t uint32_imin(uint32_t _a, uint32_t _b)
{
const uint32_t a_sub_b = uint32_sub(_a, _b);
const uint32_t result = uint32_sels(a_sub_b, _a, _b);
return result;
}
inline uint32_t uint32_imax(uint32_t _a, uint32_t _b)
{
const uint32_t b_sub_a = uint32_sub(_b, _a);
const uint32_t result = uint32_sels(b_sub_a, _a, _b);
return result;
}
inline uint32_t uint32_min(uint32_t _a, uint32_t _b)
{
return _a > _b ? _b : _a;
}
inline uint32_t uint32_max(uint32_t _a, uint32_t _b)
{
return _a > _b ? _a : _b;
}
inline uint32_t uint32_incwrap(uint32_t _val, uint32_t _min, uint32_t _max)
{
const uint32_t inc = uint32_inc(_val);
const uint32_t max_diff = uint32_sub(_max, _val);
const uint32_t neg_max_diff = uint32_neg(max_diff);
const uint32_t max_or = uint32_or(max_diff, neg_max_diff);
const uint32_t max_diff_nz = uint32_ext(max_or);
const uint32_t result = uint32_selb(max_diff_nz, inc, _min);
return result;
}
inline uint32_t uint32_decwrap(uint32_t _val, uint32_t _min, uint32_t _max)
{
const uint32_t dec = uint32_dec(_val);
const uint32_t min_diff = uint32_sub(_min, _val);
const uint32_t neg_min_diff = uint32_neg(min_diff);
const uint32_t min_or = uint32_or(min_diff, neg_min_diff);
const uint32_t min_diff_nz = uint32_ext(min_or);
const uint32_t result = uint32_selb(min_diff_nz, dec, _max);
return result;
}
inline uint32_t uint32_cntbits_ref(uint32_t _val)
{
const uint32_t tmp0 = uint32_srl(_val, 1);
const uint32_t tmp1 = uint32_and(tmp0, 0x55555555);
const uint32_t tmp2 = uint32_sub(_val, tmp1);
const uint32_t tmp3 = uint32_and(tmp2, 0xc30c30c3);
const uint32_t tmp4 = uint32_srl(tmp2, 2);
const uint32_t tmp5 = uint32_and(tmp4, 0xc30c30c3);
const uint32_t tmp6 = uint32_srl(tmp2, 4);
const uint32_t tmp7 = uint32_and(tmp6, 0xc30c30c3);
const uint32_t tmp8 = uint32_add(tmp3, tmp5);
const uint32_t tmp9 = uint32_add(tmp7, tmp8);
const uint32_t tmpA = uint32_srl(tmp9, 6);
const uint32_t tmpB = uint32_add(tmp9, tmpA);
const uint32_t tmpC = uint32_srl(tmpB, 12);
const uint32_t tmpD = uint32_srl(tmpB, 24);
const uint32_t tmpE = uint32_add(tmpB, tmpC);
const uint32_t tmpF = uint32_add(tmpD, tmpE);
const uint32_t result = uint32_and(tmpF, 0x3f);
return result;
}
/// Count number of bits set.
inline uint32_t uint32_cntbits(uint32_t _val)
{
#if BX_COMPILER_GCC
return __builtin_popcount(_val);
#elif BX_COMPILER_MSVC && BX_PLATFORM_WINDOWS
return __popcnt(_val);
#else
return uint32_cntbits_ref(_val);
#endif // BX_COMPILER_GCC
}
inline uint32_t uint32_cntlz_ref(uint32_t _val)
{
const uint32_t tmp0 = uint32_srl(_val, 1);
const uint32_t tmp1 = uint32_or(tmp0, _val);
const uint32_t tmp2 = uint32_srl(tmp1, 2);
const uint32_t tmp3 = uint32_or(tmp2, tmp1);
const uint32_t tmp4 = uint32_srl(tmp3, 4);
const uint32_t tmp5 = uint32_or(tmp4, tmp3);
const uint32_t tmp6 = uint32_srl(tmp5, 8);
const uint32_t tmp7 = uint32_or(tmp6, tmp5);
const uint32_t tmp8 = uint32_srl(tmp7, 16);
const uint32_t tmp9 = uint32_or(tmp8, tmp7);
const uint32_t tmpA = uint32_not(tmp9);
const uint32_t result = uint32_cntbits(tmpA);
return result;
}
/// Count number of leading zeros.
inline uint32_t uint32_cntlz(uint32_t _val)
{
#if BX_COMPILER_GCC
return __builtin_clz(_val);
#elif BX_COMPILER_MSVC && BX_PLATFORM_WINDOWS
unsigned long index;
_BitScanReverse(&index, _val);
return 31 - index;
#else
return uint32_cntlz_ref(_val);
#endif // BX_COMPILER_
}
inline uint32_t uint32_cnttz_ref(uint32_t _val)
{
const uint32_t tmp0 = uint32_not(_val);
const uint32_t tmp1 = uint32_dec(_val);
const uint32_t tmp2 = uint32_and(tmp0, tmp1);
const uint32_t result = uint32_cntbits(tmp2);
return result;
}
inline uint32_t uint32_cnttz(uint32_t _val)
{
#if BX_COMPILER_MSVC && BX_PLATFORM_WINDOWS
unsigned long index;
_BitScanForward(&index, _val);
return index;
#else
return uint32_cnttz_ref(_val);
#endif // BX_COMPILER_
}
// shuffle:
// ---- ---- ---- ---- fedc ba98 7654 3210
// to:
// -f-e -d-c -b-a -9-8 -7-6 -5-4 -3-2 -1-0
inline uint32_t uint32_part1by1(uint32_t _a)
{
const uint32_t val = uint32_and(_a, 0xffff);
const uint32_t tmp0 = uint32_sll(val, 8);
const uint32_t tmp1 = uint32_xor(val, tmp0);
const uint32_t tmp2 = uint32_and(tmp1, 0x00ff00ff);
const uint32_t tmp3 = uint32_sll(tmp2, 4);
const uint32_t tmp4 = uint32_xor(tmp2, tmp3);
const uint32_t tmp5 = uint32_and(tmp4, 0x0f0f0f0f);
const uint32_t tmp6 = uint32_sll(tmp5, 2);
const uint32_t tmp7 = uint32_xor(tmp5, tmp6);
const uint32_t tmp8 = uint32_and(tmp7, 0x33333333);
const uint32_t tmp9 = uint32_sll(tmp8, 1);
const uint32_t tmpA = uint32_xor(tmp8, tmp9);
const uint32_t result = uint32_and(tmpA, 0x55555555);
return result;
}
// shuffle:
// ---- ---- ---- ---- ---- --98 7654 3210
// to:
// ---- 9--8 --7- -6-- 5--4 --3- -2-- 1--0
inline uint32_t uint32_part1by2(uint32_t _a)
{
const uint32_t val = uint32_and(_a, 0x3ff);
const uint32_t tmp0 = uint32_sll(val, 16);
const uint32_t tmp1 = uint32_xor(val, tmp0);
const uint32_t tmp2 = uint32_and(tmp1, 0xff0000ff);
const uint32_t tmp3 = uint32_sll(tmp2, 8);
const uint32_t tmp4 = uint32_xor(tmp2, tmp3);
const uint32_t tmp5 = uint32_and(tmp4, 0x0300f00f);
const uint32_t tmp6 = uint32_sll(tmp5, 4);
const uint32_t tmp7 = uint32_xor(tmp5, tmp6);
const uint32_t tmp8 = uint32_and(tmp7, 0x030c30c3);
const uint32_t tmp9 = uint32_sll(tmp8, 2);
const uint32_t tmpA = uint32_xor(tmp8, tmp9);
const uint32_t result = uint32_and(tmpA, 0x09249249);
return result;
}
inline uint32_t uint32_testpow2(uint32_t _a)
{
const uint32_t tmp0 = uint32_not(_a);
const uint32_t tmp1 = uint32_inc(tmp0);
const uint32_t tmp2 = uint32_and(_a, tmp1);
const uint32_t tmp3 = uint32_cmpeq(tmp2, _a);
const uint32_t tmp4 = uint32_cmpneq(_a, 0);
const uint32_t result = uint32_and(tmp3, tmp4);
return result;
}
inline uint32_t uint32_nextpow2(uint32_t _a)
{
const uint32_t tmp0 = uint32_dec(_a);
const uint32_t tmp1 = uint32_srl(tmp0, 1);
const uint32_t tmp2 = uint32_or(tmp0, tmp1);
const uint32_t tmp3 = uint32_srl(tmp2, 2);
const uint32_t tmp4 = uint32_or(tmp2, tmp3);
const uint32_t tmp5 = uint32_srl(tmp4, 4);
const uint32_t tmp6 = uint32_or(tmp4, tmp5);
const uint32_t tmp7 = uint32_srl(tmp6, 8);
const uint32_t tmp8 = uint32_or(tmp6, tmp7);
const uint32_t tmp9 = uint32_srl(tmp8, 16);
const uint32_t tmpA = uint32_or(tmp8, tmp9);
const uint32_t result = uint32_inc(tmpA);
return result;
}
} // namespace bx
#endif // __BX_UINT32_T_H__