Initial commit.

This commit is contained in:
Branimir Karadzic
2012-04-03 20:17:55 -07:00
commit 4eb80393d1
30 changed files with 4389 additions and 0 deletions

22
LICENSE Normal file
View File

@@ -0,0 +1,22 @@
Copyright 2010-2012 Branimir Karadzic. All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDER ``AS IS'' AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
SHALL COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
OF THE POSSIBILITY OF SUCH DAMAGE.

37
README.md Normal file
View File

@@ -0,0 +1,37 @@
bx
==
Base library.
Contact
-------
Twitter @bkaradzic
Web http://www.stuckingeometry.com
License
-------
Copyright 2010-2012 Branimir Karadzic. All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDER ``AS IS'' AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
SHALL COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
OF THE POSSIBILITY OF SUCH DAMAGE.

99
include/bx/blockalloc.h Normal file
View File

@@ -0,0 +1,99 @@
/*
* Copyright 2010-2011 Branimir Karadzic. All rights reserved.
* License: http://www.opensource.org/licenses/BSD-2-Clause
*/
#ifndef __BX_BLOCKALLOC_H__
#define __BX_BLOCKALLOC_H__
#include "bx.h"
namespace bx
{
class BlockAlloc
{
public:
static const uint16_t invalidIndex = 0xffff;
static const uint32_t minElementSize = 2;
BlockAlloc()
: m_data(NULL)
, m_num(0)
, m_size(0)
, m_numFree(0)
, m_freeIndex(invalidIndex)
{
}
BlockAlloc(void* _data, uint16_t _num, uint16_t _size)
: m_data(_data)
, m_num(_num)
, m_size(_size)
, m_numFree(_num)
, m_freeIndex(0)
{
char* data = (char*)_data;
uint16_t* index = (uint16_t*)_data;
for (uint16_t ii = 0; ii < m_num-1; ++ii)
{
*index = ii+1;
data += m_size;
index = (uint16_t*)data;
}
*index = invalidIndex;
}
~BlockAlloc()
{
}
void* alloc()
{
if (invalidIndex == m_freeIndex)
{
return NULL;
}
void* obj = ( (char*)m_data) + m_freeIndex*m_size;
m_freeIndex = *( (uint16_t*)obj);
--m_numFree;
return obj;
}
void free(void* _obj)
{
uint16_t index = getIndex(_obj);
BX_CHECK(index >= 0 && index < m_num, "index %d, m_num %d", index, m_num);
*( (uint16_t*)_obj) = m_freeIndex;
m_freeIndex = index;
++m_numFree;
}
uint16_t getIndex(void* _obj) const
{
return (uint16_t)( ( (char*)_obj - (char*)m_data ) / m_size);
}
uint16_t getNumFree() const
{
return m_numFree;
}
void* getFromIndex(uint16_t _index)
{
return (char*)m_data + _index*m_size;
}
private:
void* m_data;
uint16_t m_num;
uint16_t m_size;
uint16_t m_numFree;
uint16_t m_freeIndex;
};
} // namespace bx
#endif // __BX_BLOCKALLOC_H__

23
include/bx/bx.h Normal file
View File

@@ -0,0 +1,23 @@
/*
* Copyright 2010-2011 Branimir Karadzic. All rights reserved.
* License: http://www.opensource.org/licenses/BSD-2-Clause
*/
#ifndef __BX_H__
#define __BX_H__
#include "platform.h"
#include "macros.h"
namespace bx
{
}// namespace bx
#ifndef BX_NAMESPACE
# define BX_NAMESPACE 0
#elif BX_NAMESPACE
using namespace bx;
#endif // BX_NAMESPACE
#endif // __BX_H__

151
include/bx/commandline.h Normal file
View File

@@ -0,0 +1,151 @@
/*
* Copyright 2010-2011 Branimir Karadzic. All rights reserved.
* License: http://www.opensource.org/licenses/BSD-2-Clause
*/
#ifndef __BX_COMMANDLINE_H__
#define __BX_COMMANDLINE_H__
#include "bx.h"
namespace bx
{
class CommandLine
{
public:
CommandLine()
: m_argc(__argc)
, m_argv(__argv)
{
}
CommandLine(int _argc, char const* const* _argv)
: m_argc(_argc)
, m_argv(_argv)
{
}
const char* findOption(const char _short, const char* _long = NULL, int _numParams = 1)
{
const char* result = _findOption(_short, _long, _numParams);
return result;
}
bool hasArg(const char _short, const char* _long = NULL)
{
const char* arg = findOption(_short, _long, 0);
return NULL != arg;
}
bool hasArg(const char* _long)
{
const char* arg = findOption('\0', _long, 0);
return NULL != arg;
}
bool hasArg(const char*& _value, const char _short, const char* _long = NULL)
{
const char* arg = findOption(_short, _long, 1);
_value = arg;
return NULL != arg;
}
bool hasArg(int& _value, const char _short, const char* _long = NULL)
{
const char* arg = findOption(_short, _long, 1);
if (NULL != arg)
{
_value = atoi(arg);
return true;
}
return false;
}
bool hasArg(unsigned int& _value, const char _short, const char* _long = NULL)
{
const char* arg = findOption(_short, _long, 1);
if (NULL != arg)
{
_value = atoi(arg);
return true;
}
return false;
}
bool hasArg(bool& _value, const char _short, const char* _long = NULL)
{
const char* arg = findOption(_short, _long, 1);
if (NULL != arg)
{
if ('0' == *arg || _stricmp(arg, "false") )
{
_value = false;
}
else if ('0' != *arg || _stricmp(arg, "true") )
{
_value = true;
}
return true;
}
return false;
}
private:
const char* _findOption(const char _short, const char* _long, int _numParams)
{
for (int ii = 0; ii < m_argc; ++ii)
{
const char* arg = m_argv[ii];
if ('-' == *arg)
{
++arg;
if (_short == *arg)
{
if (1 == strlen(arg) )
{
if (0 == _numParams)
{
return "";
}
else if (ii+_numParams < m_argc
&& '-' != *m_argv[ii+1] )
{
return m_argv[ii+1];
}
return NULL;
}
}
else if (NULL != _long
&& '-' == *arg
&& 0 == _stricmp(arg+1, _long) )
{
if (0 == _numParams)
{
return "";
}
else if (ii+_numParams < m_argc
&& '-' != *m_argv[ii+1] )
{
return m_argv[ii+1];
}
return NULL;
}
}
}
return NULL;
}
int m_argc;
char const* const* m_argv;
};
} // namespace bx
#endif /// __BX_COMMANDLINE_H__

19
include/bx/countof.h Normal file
View File

@@ -0,0 +1,19 @@
/*
* Copyright 2010-2011 Branimir Karadzic. All rights reserved.
* License: http://www.opensource.org/licenses/BSD-2-Clause
*/
#ifndef __BX_COUNTOF_H__
#define __BX_COUNTOF_H__
#include "bx.h"
namespace bx
{
// http://cnicholson.net/2011/01/stupid-c-tricks-a-better-sizeof_array/
template<typename T, size_t N> char (&COUNTOF_REQUIRES_ARRAY_ARGUMENT(const T(&)[N]) )[N];
#define countof(x) sizeof(bx::COUNTOF_REQUIRES_ARRAY_ARGUMENT(x) )
} // namespace bx
#endif // __BX_COUNTOF_H__

110
include/bx/cpu.h Normal file
View File

@@ -0,0 +1,110 @@
/*
* Copyright 2010-2012 Branimir Karadzic. All rights reserved.
* License: http://www.opensource.org/licenses/BSD-2-Clause
*/
#ifndef __BX_CPU_H__
#define __BX_CPU_H__
#include "bx.h"
#if BX_COMPILER_MSVC
# if BX_PLATFORM_XBOX360
# include <ppcintrinsics.h>
# include <xtl.h>
# else
# include <math.h> // math.h is included because VS bitches:
// warning C4985: 'ceil': attributes not present on previous declaration.
// must be included before intrin.h.
# include <intrin.h>
# include <windows.h>
# endif // !BX_PLATFORM_XBOX360
extern "C" void _ReadBarrier();
extern "C" void _WriteBarrier();
extern "C" void _ReadWriteBarrier();
# pragma intrinsic(_ReadBarrier)
# pragma intrinsic(_WriteBarrier)
# pragma intrinsic(_ReadWriteBarrier)
# pragma intrinsic(_InterlockedIncrement)
# pragma intrinsic(_InterlockedDecrement)
#endif // BX_COMPILER_MSVC
namespace bx
{
#if BX_COMPILER_MSVC
# define BX_CACHE_LINE_ALIGN_MARKER() __declspec(align(BX_CACHE_LINE_SIZE) ) struct {}
#else
# define BX_CACHE_LINE_ALIGN_MARKER() struct {} __attribute__( (__aligned__(BX_CACHE_LINE_SIZE) ) )
#endif // BX_COMPILER_
#define BX_CACHE_LINE_ALIGN(_def) BX_CACHE_LINE_ALIGN_MARKER(); _def; BX_CACHE_LINE_ALIGN_MARKER()
inline void readBarrier()
{
#if BX_COMPILER_MSVC
_ReadBarrier();
#elif BX_COMPILER_GCC || BX_COMPILER_CLANG
asm volatile("":::"memory");
#endif // BX_COMPILER
}
inline void writeBarrier()
{
#if BX_COMPILER_MSVC
_WriteBarrier();
#elif BX_COMPILER_GCC || BX_COMPILER_CLANG
asm volatile("":::"memory");
#endif // BX_COMPILER
}
inline void readWriteBarrier()
{
#if BX_COMPILER_MSVC
_ReadWriteBarrier();
#elif BX_COMPILER_GCC || BX_COMPILER_CLANG
asm volatile("":::"memory");
#endif // BX_COMPILER
}
inline void memoryBarrier()
{
#if BX_PLATFORM_XBOX360
__lwsync();
#elif BX_COMPILER_MSVC
_mm_mfence();
#else
__sync_synchronize();
// asm volatile("mfence":::"memory");
#endif // BX_COMPILER
}
inline int32_t atomicIncr(volatile void* _var)
{
#if BX_COMPILER_MSVC
return _InterlockedIncrement( (volatile LONG*)(_var) );
#elif BX_COMPILER_GCC || BX_COMPILER_CLANG
return __sync_fetch_and_add( (volatile int32_t*)_var, 1);
#endif // BX_COMPILER
}
inline int32_t atomicDecr(volatile void* _var)
{
#if BX_COMPILER_MSVC
return _InterlockedDecrement( (volatile LONG*)(_var) );
#elif BX_COMPILER_GCC || BX_COMPILER_CLANG
return __sync_fetch_and_sub( (volatile int32_t*)_var, 1);
#endif // BX_COMPILER
}
inline void* atomicExchangePtr(void** _target, void* _ptr)
{
#if BX_COMPILER_MSVC
return InterlockedExchangePointer(_target, _ptr);
#elif BX_COMPILER_GCC || BX_COMPILER_CLANG
return __sync_lock_test_and_set(_target, _ptr);
#endif // BX_COMPILER
}
} // namespace bx
#endif // __BX_CPU_H__

31
include/bx/debug.h Normal file
View File

@@ -0,0 +1,31 @@
/*
* Copyright 2010-2012 Branimir Karadzic. All rights reserved.
* License: http://www.opensource.org/licenses/BSD-2-Clause
*/
#ifndef __BX_DEBUG_H__
#define __BX_DEBUG_H__
#include "bx.h"
namespace bx
{
inline void debugBreak()
{
#if BX_COMPILER_MSVC
__debugbreak();
#elif BX_CPU_ARM
asm("bkpt 0");
#elif !BX_PLATFORM_NACL && BX_CPU_X86 && (BX_COMPILER_GCC || BX_COMPILER_CLANG)
// NaCl doesn't like int 3:
// NativeClient: NaCl module load failed: Validation failure. File violates Native Client safety rules.
__asm__ ("int $3");
#else // cross platform implementation
int* int3 = (int*)3L;
*int3 = 3;
#endif // BX
}
} // namespace bx
#endif // __BX_DEBUG_H__

71
include/bx/endian.h Normal file
View File

@@ -0,0 +1,71 @@
/*
* Copyright 2010-2011 Branimir Karadzic. All rights reserved.
* License: http://www.opensource.org/licenses/BSD-2-Clause
*/
#ifndef __BX_ENDIAN_H__
#define __BX_ENDIAN_H__
#include "bx.h"
namespace bx
{
inline uint16_t endianSwap(uint16_t _in)
{
return (_in>>8) | (_in<<8);
}
inline uint32_t endianSwap(uint32_t _in)
{
return (_in>>24) | (_in<<24)
| ( (_in&0x00ff0000)>>8) | ( (_in&0x0000ff00)<<8)
;
}
inline uint64_t endianSwap(uint64_t _in)
{
return (_in>>56) | (_in<<56)
| ( (_in&UINT64_C(0x00ff000000000000) )>>40) | ( (_in&UINT64_C(0x000000000000ff00) )<<40)
| ( (_in&UINT64_C(0x0000ff0000000000) )>>24) | ( (_in&UINT64_C(0x0000000000ff0000) )<<24)
| ( (_in&UINT64_C(0x000000ff00000000) )>>8) | ( (_in&UINT64_C(0x00000000ff000000) )<<8)
;
}
inline int16_t endianSwap(int16_t _in)
{
return (int16_t)endianSwap( (uint16_t)_in);
}
inline int32_t endianSwap(int32_t _in)
{
return (int32_t)endianSwap( (uint32_t)_in);
}
inline int64_t endianSwap(int64_t _in)
{
return (int64_t)endianSwap( (uint64_t)_in);
}
template <typename Ty>
inline Ty littleEndian(Ty& _in)
{
#if BX_CPU_ENDIAN_BIG
endianSwap(_in);
#else
return _in;
#endif // BX_CPU_ENDIAN_BIG
}
template <typename Ty>
inline Ty bigEndian(Ty& _in)
{
#if BX_CPU_ENDIAN_LITTLE
return endianSwap(_in);
#else
return _in;
#endif // BX_CPU_ENDIAN_LITTLE
}
} // namespace bx
#endif // __BX_ENDIAN_H__

227
include/bx/float4_neon.h Normal file
View File

@@ -0,0 +1,227 @@
/*
* Copyright 2010-2012 Branimir Karadzic. All rights reserved.
* License: http://www.opensource.org/licenses/BSD-2-Clause
*/
#ifndef __BX_FLOAT4_NEON_H__
#define __BX_FLOAT4_NEON_H__
#include <arm_neon.h>
namespace bx
{
// Reference:
// http://gcc.gnu.org/onlinedocs/gcc/ARM-NEON-Intrinsics.html
// http://blogs.arm.com/software-enablement/161-coding-for-neon-part-1-load-and-stores/
// http://blogs.arm.com/software-enablement/196-coding-for-neon-part-2-dealing-with-leftovers/
// http://blogs.arm.com/software-enablement/241-coding-for-neon-part-3-matrix-multiplication/
// http://blogs.arm.com/software-enablement/277-coding-for-neon-part-4-shifting-left-and-right/
// http://blogs.arm.com/software-enablement/684-coding-for-neon-part-5-rearranging-vectors/
typedef __builtin_neon_sf float4_t __attribute__( (__vector_size__(16) ) );
#define ELEMx 0
#define ELEMy 1
#define ELEMz 2
#define ELEMw 3
#define IMPLEMENT_SWIZZLE(_x, _y, _z, _w) \
BX_FLOAT4_INLINE float4_t float4_swiz_##_x##_y##_z##_w(float4_t _a) \
{ \
float4_t result; \
result.ixyzw[0] = _a.ixyzw[ELEM##_x]; \
result.ixyzw[1] = _a.ixyzw[ELEM##_y]; \
result.ixyzw[2] = _a.ixyzw[ELEM##_z]; \
result.ixyzw[3] = _a.ixyzw[ELEM##_w]; \
return result; \
}
#include "float4_swizzle.inl"
#undef IMPLEMENT_SWIZZLE
#undef ELEMw
#undef ELEMz
#undef ELEMy
#undef ELEMx
BX_FLOAT4_INLINE float4_t float4_shuf_xyAB(float4_t _a, float4_t _b)
{
return _a; //_mm_movelh_ps(_a, _b);
}
BX_FLOAT4_INLINE float4_t float4_shuf_ABxy(float4_t _a, float4_t _b)
{
return _a; //_mm_movelh_ps(_b, _a);
}
BX_FLOAT4_INLINE float4_t float4_shuf_CDzw(float4_t _a, float4_t _b)
{
return _a; //_mm_movehl_ps(_a, _b);
}
BX_FLOAT4_INLINE float4_t float4_shuf_zwCD(float4_t _a, float4_t _b)
{
return _a; //_mm_movehl_ps(_b, _a);
}
BX_FLOAT4_INLINE float4_t float4_shuf_xAyB(float4_t _a, float4_t _b)
{
return _a; //_mm_unpacklo_ps(_a, _b);
}
BX_FLOAT4_INLINE float4_t float4_shuf_yBxA(float4_t _a, float4_t _b)
{
return _a; //_mm_unpacklo_ps(_b, _a);
}
BX_FLOAT4_INLINE float4_t float4_shuf_zCwD(float4_t _a, float4_t _b)
{
return _a; //_mm_unpackhi_ps(_a, _b);
}
BX_FLOAT4_INLINE float4_t float4_shuf_CzDw(float4_t _a, float4_t _b)
{
return _a; //_mm_unpackhi_ps(_b, _a);
}
BX_FLOAT4_INLINE float float4_x(float4_t _a)
{
return _a.fxyzw[0];
}
BX_FLOAT4_INLINE float float4_y(float4_t _a)
{
return _a.fxyzw[1];
}
BX_FLOAT4_INLINE float float4_z(float4_t _a)
{
return _a.fxyzw[2];
}
BX_FLOAT4_INLINE float float4_w(float4_t _a)
{
return _a.fxyzw[3];
}
BX_FLOAT4_INLINE float4_t float4_ld(float _x, float _y, float _z, float _w)
{
const float32_t val[4] = {_x, _y, _z, _w};
return __builtin_neon_vld1v4sf(val);
}
BX_FLOAT4_INLINE float4_t float4_ild(uint32_t _x, uint32_t _y, uint32_t _z, uint32_t _w)
{
const uint32_t val[4] = {_x, _y, _z, _w};
return (float4_t)__builtin_neon_vld1v4si( (const __builtin_neon_si*)val);
}
BX_FLOAT4_INLINE float4_t float4_splat(float _a)
{
return __builtin_neon_vdup_nv4sf(_a);
}
BX_FLOAT4_INLINE float4_t float4_isplat(uint32_t _a)
{
return (float4_t)__builtin_neon_vdup_nv4si( (__builtin_neon_si)_a);
}
BX_FLOAT4_INLINE float4_t float4_zero()
{
return vdupq_n_f32(0.0f);
}
BX_FLOAT4_INLINE float4_t float4_add(float4_t _a, float4_t _b)
{
return vaddq_f32(_a, _b);
}
BX_FLOAT4_INLINE float4_t float4_sub(float4_t _a, float4_t _b)
{
return vsubq_f32(_a, _b);
}
BX_FLOAT4_INLINE float4_t float4_mul(float4_t _a, float4_t _b)
{
return vmulq_f32(_a, _b);
}
BX_FLOAT4_INLINE float4_t float4_rcp_est(float4_t _a)
{
return vrecpeq_f32(_a);
}
BX_FLOAT4_INLINE float4_t float4_rsqrt_est(float4_t _a)
{
return vrsqrteq_f32(_a);
}
BX_FLOAT4_INLINE float4_t float4_and(float4_t _a, float4_t _b)
{
return (float4_t)__builtin_neon_vandv4si( (int32x4_t)_a, (int32x4_t)_b, 0);
}
//BX_FLOAT4_INLINE float4_t float4_andc(float4_t _a, float4_t _b)
//{
// return _mm_andnot_ps(_b, _a);
//}
BX_FLOAT4_INLINE float4_t float4_or(float4_t _a, float4_t _b)
{
return (float4_t)__builtin_neon_vorrv4si( (int32x4_t)_a, (int32x4_t)_b, 0);
}
BX_FLOAT4_INLINE float4_t float4_iadd(float4_t _a, float4_t _b)
{
const uint32x4_t tmp0 = vreinterpretq_u32_f32(_a);
const uint32x4_t tmp1 = vreinterpretq_u32_f32(_b);
const uint32x4_t add = vaddq_u32(tmp0, tmp1);
const float4_t result = vreinterpretq_f32_u32(add);
return result;
}
BX_FLOAT4_INLINE float4_t float4_isub(float4_t _a, float4_t _b)
{
const uint32x4_t tmp0 = vreinterpretq_u32_f32(_a);
const uint32x4_t tmp1 = vreinterpretq_u32_f32(_b);
const uint32x4_t sub = vsubq_u32(tmp0, tmp1);
const float4_t result = vreinterpretq_f32_u32(sub);
return result;
}
BX_FLOAT4_INLINE float4_t float4_sll(float4_t _a, int _count)
{
const uint32x4_t tmp = vreinterpretq_u32_f32(_a);
const uint32x4_t shift = vshlq_n_u32(tmp, _count);
const float4_t result = vreinterpretq_f32_u32(shift);
return result;
}
BX_FLOAT4_INLINE float4_t float4_srl(float4_t _a, int _count)
{
const uint32x4_t tmp = vreinterpretq_i32_f32(_a);
const uint32x4_t shift = (uint32x4_t)__builtin_neon_vshr_nv4si( (int32x4_t)tmp, _count, 0);
const float4_t result = vreinterpretq_f32_u32(shift);
return result;
}
BX_FLOAT4_INLINE float4_t float4_sra(float4_t _a, int _count)
{
const int32x4_t a = vreinterpretq_s32_f32(_a);
const int32x4_t shift = __builtin_neon_vshr_nv4si(a, _count, 1);
const float4_t result = vreinterpretq_f32_s32(shift);
return result;
}
} // namespace bx
#define float4_div_nr float4_div_nr_ni
#define float4_div float4_div_nr_ni
#include "float4_ni.h"
#endif // __BX_FLOAT4_NEON_H__

407
include/bx/float4_ni.h Normal file
View File

@@ -0,0 +1,407 @@
/*
* Copyright 2010-2012 Branimir Karadzic. All rights reserved.
* License: http://www.opensource.org/licenses/BSD-2-Clause
*/
#ifndef __BX_FLOAT4_NI_H__
#define __BX_FLOAT4_NI_H__
namespace bx
{
BX_FLOAT4_INLINE float4_t float4_shuf_xAzC_ni(float4_t _a, float4_t _b)
{
const float4_t xAyB = float4_shuf_xAyB(_a, _b);
const float4_t zCwD = float4_shuf_zCwD(_a, _b);
const float4_t result = float4_shuf_xyAB(xAyB, zCwD);
return result;
}
BX_FLOAT4_INLINE float4_t float4_shuf_yBwD_ni(float4_t _a, float4_t _b)
{
const float4_t xAyB = float4_shuf_xAyB(_a, _b);
const float4_t zCwD = float4_shuf_zCwD(_a, _b);
const float4_t result = float4_shuf_zwCD(xAyB, zCwD);
return result;
}
BX_FLOAT4_INLINE float4_t float4_madd_ni(float4_t _a, float4_t _b, float4_t _c)
{
const float4_t mul = float4_mul(_a, _b);
const float4_t result = float4_add(mul, _c);
return result;
}
BX_FLOAT4_INLINE float4_t float4_nmsub_ni(float4_t _a, float4_t _b, float4_t _c)
{
const float4_t mul = float4_mul(_a, _b);
const float4_t result = float4_sub(_c, mul);
return result;
}
BX_FLOAT4_INLINE float4_t float4_div_nr_ni(float4_t _a, float4_t _b)
{
const float4_t oneish = float4_isplat(0x3f800001);
const float4_t est = float4_rcp_est(_b);
const float4_t iter0 = float4_mul(_a, est);
const float4_t tmp1 = float4_nmsub(_b, est, oneish);
const float4_t result = float4_madd(tmp1, iter0, iter0);
return result;
}
BX_FLOAT4_INLINE float4_t float4_rcp_ni(float4_t _a)
{
const float4_t one = float4_splat(1.0f);
const float4_t result = float4_div(one, _a);
return result;
}
BX_FLOAT4_INLINE float4_t float4_orx_ni(float4_t _a)
{
const float4_t zwxy = float4_swiz_zwxy(_a);
const float4_t tmp0 = float4_or(_a, zwxy);
const float4_t tmp1 = float4_swiz_yyyy(_a);
const float4_t tmp2 = float4_or(tmp0, tmp1);
const float4_t mf000 = float4_ild(-1, 0, 0, 0);
const float4_t result = float4_and(tmp2, mf000);
return result;
}
BX_FLOAT4_INLINE float4_t float4_orc_ni(float4_t _a, float4_t _b)
{
const float4_t aorb = float4_or(_a, _b);
const float4_t mffff = float4_isplat(-1);
const float4_t result = float4_xor(aorb, mffff);
return result;
}
BX_FLOAT4_INLINE float4_t float4_neg_ni(float4_t _a)
{
const float4_t zero = float4_zero();
const float4_t result = float4_sub(zero, _a);
return result;
}
BX_FLOAT4_INLINE float4_t float4_selb_ni(float4_t _mask, float4_t _a, float4_t _b)
{
const float4_t sel_a = float4_and(_a, _mask);
const float4_t sel_b = float4_andc(_b, _mask);
const float4_t result = float4_or(sel_a, sel_b);
return result;
}
BX_FLOAT4_INLINE float4_t float4_sels_ni(float4_t _test, float4_t _a, float4_t _b)
{
const float4_t mask = float4_sra(_test, 31);
const float4_t result = float4_selb(mask, _a, _b);
return result;
}
BX_FLOAT4_INLINE float4_t float4_not_ni(float4_t _a)
{
const float4_t mffff = float4_isplat(-1);
const float4_t result = float4_xor(_a, mffff);
return result;
}
BX_FLOAT4_INLINE float4_t float4_abs_ni(float4_t _a)
{
const float4_t a_neg = float4_neg(_a);
const float4_t result = float4_max(a_neg, _a);
return result;
}
BX_FLOAT4_INLINE float4_t float4_clamp_ni(float4_t _a, float4_t _min, float4_t _max)
{
const float4_t tmp = float4_min(_a, _max);
const float4_t result = float4_max(tmp, _min);
return result;
}
BX_FLOAT4_INLINE float4_t float4_lerp_ni(float4_t _a, float4_t _b, float4_t _s)
{
const float4_t ba = float4_sub(_b, _a);
const float4_t result = float4_madd(_s, ba, _a);
return result;
}
BX_FLOAT4_INLINE float4_t float4_sqrt_nr_ni(float4_t _a)
{
const float4_t half = float4_splat(0.5f);
const float4_t one = float4_splat(1.0f);
const float4_t zero = float4_zero();
const float4_t tmp0 = float4_rsqrt_est(_a);
const float4_t tmp1 = float4_madd(tmp0, _a, zero);
const float4_t tmp2 = float4_madd(tmp1, half, zero);
const float4_t tmp3 = float4_nmsub(tmp0, tmp1, one);
const float4_t result = float4_madd(tmp3, tmp2, tmp1);
return result;
}
BX_FLOAT4_INLINE float4_t float4_rsqrt_ni(float4_t _a)
{
const float4_t one = float4_splat(1.0f);
const float4_t sqrt = float4_sqrt(_a);
const float4_t result = float4_div(one, sqrt);
return result;
}
BX_FLOAT4_INLINE float4_t float4_rsqrt_nr_ni(float4_t _a)
{
const float4_t rsqrt = float4_rsqrt_est(_a);
const float4_t iter0 = float4_mul(_a, rsqrt);
const float4_t iter1 = float4_mul(iter0, rsqrt);
const float4_t half = float4_splat(0.5f);
const float4_t half_rsqrt = float4_mul(half, rsqrt);
const float4_t three = float4_splat(3.0f);
const float4_t three_sub_iter1 = float4_sub(three, iter1);
const float4_t result = float4_mul(half_rsqrt, three_sub_iter1);
return result;
}
BX_FLOAT4_INLINE float4_t float4_rsqrt_carmack_ni(float4_t _a)
{
const float4_t half = float4_splat(0.5f);
const float4_t ah = float4_mul(half, _a);
const float4_t ashift = float4_sra(_a, 1);
const float4_t magic = float4_isplat(0x5f3759df);
const float4_t msuba = float4_isub(magic, ashift);
const float4_t msubasq = float4_mul(msuba, msuba);
const float4_t tmp0 = float4_splat(1.5f);
const float4_t tmp1 = float4_mul(ah, msubasq);
const float4_t tmp2 = float4_sub(tmp0, tmp1);
const float4_t result = float4_mul(msuba, tmp2);
return result;
}
namespace float4_logexp_detail
{
BX_FLOAT4_INLINE float4_t float4_poly0(float4_t _a, float _b)
{
return float4_splat(_b);
}
BX_FLOAT4_INLINE float4_t float4_poly1(float4_t _a, float _b, float _c)
{
const float4_t bbbb = float4_splat(_b);
const float4_t poly0 = float4_poly0(_a, _c);
const float4_t result = float4_madd(poly0, _a, bbbb);
return result;
}
BX_FLOAT4_INLINE float4_t float4_poly2(float4_t _a, float _b, float _c, float _d)
{
const float4_t bbbb = float4_splat(_b);
const float4_t poly = float4_poly1(_a, _c, _d);
const float4_t result = float4_madd(poly, _a, bbbb);
return result;
}
BX_FLOAT4_INLINE float4_t float4_poly3(float4_t _a, float _b, float _c, float _d, float _e)
{
const float4_t bbbb = float4_splat(_b);
const float4_t poly = float4_poly2(_a, _c, _d, _e);
const float4_t result = float4_madd(poly, _a, bbbb);
return result;
}
BX_FLOAT4_INLINE float4_t float4_poly4(float4_t _a, float _b, float _c, float _d, float _e, float _f)
{
const float4_t bbbb = float4_splat(_b);
const float4_t poly = float4_poly3(_a, _c, _d, _e, _f);
const float4_t result = float4_madd(poly, _a, bbbb);
return result;
}
BX_FLOAT4_INLINE float4_t float4_poly5(float4_t _a, float _b, float _c, float _d, float _e, float _f, float _g)
{
const float4_t bbbb = float4_splat(_b);
const float4_t poly = float4_poly4(_a, _c, _d, _e, _f, _g);
const float4_t result = float4_madd(poly, _a, bbbb);
return result;
}
BX_FLOAT4_INLINE float4_t float4_logpoly(float4_t _a)
{
#if 1
const float4_t result = float4_poly5(_a
, 3.11578814719469302614f, -3.32419399085241980044f
, 2.59883907202499966007f, -1.23152682416275988241f
, 0.318212422185251071475f, -0.0344359067839062357313f
);
#elif 0
const float4_t result = float4_poly4(_a
, 2.8882704548164776201f, -2.52074962577807006663f
, 1.48116647521213171641f, -0.465725644288844778798f
, 0.0596515482674574969533f
);
#elif 0
const float4_t result = float4_poly3(_a
, 2.61761038894603480148f, -1.75647175389045657003f
, 0.688243882994381274313f, -0.107254423828329604454f
);
#else
const float4_t result = float4_poly2(_a
, 2.28330284476918490682f, -1.04913055217340124191f
, 0.204446009836232697516f
);
#endif
return result;
}
BX_FLOAT4_INLINE float4_t float4_exppoly(float4_t _a)
{
#if 1
const float4_t result = float4_poly5(_a
, 9.9999994e-1f, 6.9315308e-1f
, 2.4015361e-1f, 5.5826318e-2f
, 8.9893397e-3f, 1.8775767e-3f
);
#elif 0
const float4_t result = float4_poly4(_a
, 1.0000026f, 6.9300383e-1f
, 2.4144275e-1f, 5.2011464e-2f
, 1.3534167e-2f
);
#elif 0
const float4_t result = float4_poly3(_a
, 9.9992520e-1f, 6.9583356e-1f
, 2.2606716e-1f, 7.8024521e-2f
);
#else
const float4_t result = float4_poly2(_a
, 1.0017247f, 6.5763628e-1f
, 3.3718944e-1f
);
#endif // 0
return result;
}
} // namespace float4_internal
BX_FLOAT4_INLINE float4_t float4_log2_ni(float4_t _a)
{
const float4_t expmask = float4_isplat(0x7f800000);
const float4_t mantmask = float4_isplat(0x007fffff);
const float4_t one = float4_splat(1.0f);
const float4_t c127 = float4_isplat(127);
const float4_t aexp = float4_and(_a, expmask);
const float4_t aexpsr = float4_srl(aexp, 23);
const float4_t tmp0 = float4_isub(aexpsr, c127);
const float4_t exp = float4_itof(tmp0);
const float4_t amask = float4_and(_a, mantmask);
const float4_t mant = float4_or(amask, one);
const float4_t poly = float4_logexp_detail::float4_logpoly(mant);
const float4_t mandiff = float4_sub(mant, one);
const float4_t result = float4_madd(poly, mandiff, exp);
return result;
}
BX_FLOAT4_INLINE float4_t float4_exp2_ni(float4_t _a)
{
const float4_t min = float4_splat( 129.0f);
const float4_t max = float4_splat(-126.99999f);
const float4_t tmp0 = float4_min(_a, min);
const float4_t aaaa = float4_max(tmp0, max);
const float4_t half = float4_splat(0.5f);
const float4_t tmp2 = float4_sub(aaaa, half);
const float4_t ipart = float4_ftoi(tmp2);
const float4_t iround = float4_itof(ipart);
const float4_t fpart = float4_sub(aaaa, iround);
const float4_t c127 = float4_isplat(127);
const float4_t tmp5 = float4_iadd(ipart, c127);
const float4_t expipart = float4_sll(tmp5, 23);
const float4_t expfpart = float4_logexp_detail::float4_exppoly(fpart);
const float4_t result = float4_mul(expipart, expfpart);
return result;
}
BX_FLOAT4_INLINE float4_t float4_pow_ni(float4_t _a, float4_t _b)
{
const float4_t alog2 = float4_log2(_a);
const float4_t alog2b = float4_mul(alog2, _b);
const float4_t result = float4_exp2(alog2b);
return result;
}
BX_FLOAT4_INLINE float4_t float4_dot3_ni(float4_t _a, float4_t _b)
{
const float4_t xyzw = float4_mul(_a, _b);
const float4_t xxxx = float4_swiz_xxxx(xyzw);
const float4_t yyyy = float4_swiz_yyyy(xyzw);
const float4_t zzzz = float4_swiz_zzzz(xyzw);
const float4_t tmp1 = float4_add(xxxx, yyyy);
const float4_t result = float4_add(zzzz, tmp1);
return result;
}
BX_FLOAT4_INLINE float4_t float4_cross3_ni(float4_t _a, float4_t _b)
{
const float4_t a_yzxw = float4_swiz_yzxw(_a);
const float4_t a_zxyw = float4_swiz_zxyw(_a);
const float4_t b_zxyw = float4_swiz_zxyw(_b);
const float4_t b_yzxw = float4_swiz_yzxw(_b);
const float4_t tmp = float4_mul(a_yzxw, b_zxyw);
const float4_t result = float4_nmsub(a_zxyw, b_yzxw, tmp);
return result;
}
BX_FLOAT4_INLINE float4_t float4_normalize3_ni(float4_t _a)
{
const float4_t dot3 = float4_dot3(_a, _a);
const float4_t invSqrt = float4_rsqrt(dot3);
const float4_t result = float4_mul(_a, invSqrt);
return result;
}
BX_FLOAT4_INLINE float4_t float4_dot_ni(float4_t _a, float4_t _b)
{
const float4_t xyzw = float4_mul(_a, _b);
const float4_t yzwx = float4_swiz_yzwx(xyzw);
const float4_t tmp0 = float4_add(xyzw, yzwx);
const float4_t zwxy = float4_swiz_zwxy(tmp0);
const float4_t result = float4_add(tmp0, zwxy);
return result;
}
} // namespace bx
#endif // __BX_FLOAT4_NI_H__

522
include/bx/float4_ref.h Normal file
View File

@@ -0,0 +1,522 @@
/*
* Copyright 2010-2012 Branimir Karadzic. All rights reserved.
* License: http://www.opensource.org/licenses/BSD-2-Clause
*/
#ifndef __BX_FLOAT4_REF_H__
#define __BX_FLOAT4_REF_H__
#include <math.h> // sqrtf
namespace bx
{
typedef union float4_t
{
int32_t ixyzw[4];
uint32_t uxyzw[4];
float fxyzw[4];
} float4_t;
#define ELEMx 0
#define ELEMy 1
#define ELEMz 2
#define ELEMw 3
#define IMPLEMENT_SWIZZLE(_x, _y, _z, _w) \
BX_FLOAT4_INLINE float4_t float4_swiz_##_x##_y##_z##_w(float4_t _a) \
{ \
float4_t result; \
result.ixyzw[0] = _a.ixyzw[ELEM##_x]; \
result.ixyzw[1] = _a.ixyzw[ELEM##_y]; \
result.ixyzw[2] = _a.ixyzw[ELEM##_z]; \
result.ixyzw[3] = _a.ixyzw[ELEM##_w]; \
return result; \
}
#include "float4_swizzle.inl"
#undef IMPLEMENT_SWIZZLE
#undef ELEMw
#undef ELEMz
#undef ELEMy
#undef ELEMx
#define IMPLEMENT_TEST(_xyzw, _mask) \
BX_FLOAT4_INLINE bool float4_test_any_##_xyzw(float4_t _test) \
{ \
uint32_t tmp = ( (_test.uxyzw[3]>>31)<<3) \
| ( (_test.uxyzw[2]>>31)<<2) \
| ( (_test.uxyzw[1]>>31)<<1) \
| (_test.uxyzw[0]>>31) \
; \
return 0 != (tmp&(_mask) ); \
} \
\
BX_FLOAT4_INLINE bool float4_test_all_##_xyzw(float4_t _test) \
{ \
uint32_t tmp = ( (_test.uxyzw[3]>>31)<<3) \
| ( (_test.uxyzw[2]>>31)<<2) \
| ( (_test.uxyzw[1]>>31)<<1) \
| (_test.uxyzw[0]>>31) \
; \
return (_mask) == (tmp&(_mask) ); \
}
IMPLEMENT_TEST(x , 0x1);
IMPLEMENT_TEST(y , 0x2);
IMPLEMENT_TEST(xy , 0x3);
IMPLEMENT_TEST(z , 0x4);
IMPLEMENT_TEST(xz , 0x5);
IMPLEMENT_TEST(yz , 0x6);
IMPLEMENT_TEST(xyz , 0x7);
IMPLEMENT_TEST(w , 0x8);
IMPLEMENT_TEST(xw , 0x9);
IMPLEMENT_TEST(yw , 0xa);
IMPLEMENT_TEST(xyw , 0xb);
IMPLEMENT_TEST(zw , 0xc);
IMPLEMENT_TEST(xzw , 0xd);
IMPLEMENT_TEST(yzw , 0xe);
IMPLEMENT_TEST(xyzw , 0xf);
#undef IMPLEMENT_TEST
BX_FLOAT4_INLINE float4_t float4_shuf_xyAB(float4_t _a, float4_t _b)
{
float4_t result;
result.uxyzw[0] = _a.uxyzw[0];
result.uxyzw[1] = _a.uxyzw[1];
result.uxyzw[2] = _b.uxyzw[0];
result.uxyzw[3] = _b.uxyzw[1];
return result;
}
BX_FLOAT4_INLINE float4_t float4_shuf_ABxy(float4_t _a, float4_t _b)
{
float4_t result;
result.uxyzw[0] = _b.uxyzw[0];
result.uxyzw[1] = _b.uxyzw[1];
result.uxyzw[2] = _a.uxyzw[0];
result.uxyzw[3] = _a.uxyzw[1];
return result;
}
BX_FLOAT4_INLINE float4_t float4_shuf_CDzw(float4_t _a, float4_t _b)
{
float4_t result;
result.uxyzw[0] = _b.uxyzw[2];
result.uxyzw[1] = _b.uxyzw[3];
result.uxyzw[2] = _a.uxyzw[2];
result.uxyzw[3] = _a.uxyzw[3];
return result;
}
BX_FLOAT4_INLINE float4_t float4_shuf_zwCD(float4_t _a, float4_t _b)
{
float4_t result;
result.uxyzw[0] = _a.uxyzw[2];
result.uxyzw[1] = _a.uxyzw[3];
result.uxyzw[2] = _b.uxyzw[2];
result.uxyzw[3] = _b.uxyzw[3];
return result;
}
BX_FLOAT4_INLINE float4_t float4_shuf_xAyB(float4_t _a, float4_t _b)
{
float4_t result;
result.uxyzw[0] = _a.uxyzw[0];
result.uxyzw[1] = _b.uxyzw[0];
result.uxyzw[2] = _a.uxyzw[1];
result.uxyzw[3] = _b.uxyzw[1];
return result;
}
BX_FLOAT4_INLINE float4_t float4_shuf_yBxA(float4_t _a, float4_t _b)
{
float4_t result;
result.uxyzw[0] = _a.uxyzw[1];
result.uxyzw[1] = _b.uxyzw[1];
result.uxyzw[2] = _a.uxyzw[0];
result.uxyzw[3] = _b.uxyzw[0];
return result;
}
BX_FLOAT4_INLINE float4_t float4_shuf_zCwD(float4_t _a, float4_t _b)
{
float4_t result;
result.uxyzw[0] = _a.uxyzw[2];
result.uxyzw[1] = _b.uxyzw[2];
result.uxyzw[2] = _a.uxyzw[3];
result.uxyzw[3] = _b.uxyzw[3];
return result;
}
BX_FLOAT4_INLINE float4_t float4_shuf_CzDw(float4_t _a, float4_t _b)
{
float4_t result;
result.uxyzw[0] = _b.uxyzw[2];
result.uxyzw[1] = _a.uxyzw[2];
result.uxyzw[2] = _b.uxyzw[3];
result.uxyzw[3] = _a.uxyzw[3];
return result;
}
BX_FLOAT4_INLINE float float4_x(float4_t _a)
{
return _a.fxyzw[0];
}
BX_FLOAT4_INLINE float float4_y(float4_t _a)
{
return _a.fxyzw[1];
}
BX_FLOAT4_INLINE float float4_z(float4_t _a)
{
return _a.fxyzw[2];
}
BX_FLOAT4_INLINE float float4_w(float4_t _a)
{
return _a.fxyzw[3];
}
BX_FLOAT4_INLINE float4_t float4_ld(const void* _ptr)
{
return *reinterpret_cast<const float4_t*>(_ptr);
}
BX_FLOAT4_INLINE void float4_st(void* _ptr, float4_t _a)
{
*reinterpret_cast<float4_t*>(_ptr) = _a;
}
BX_FLOAT4_INLINE float4_t float4_ld(float _x, float _y, float _z, float _w)
{
float4_t result;
result.fxyzw[0] = _x;
result.fxyzw[1] = _y;
result.fxyzw[2] = _z;
result.fxyzw[3] = _w;
return result;
}
BX_FLOAT4_INLINE float4_t float4_ild(uint32_t _x, uint32_t _y, uint32_t _z, uint32_t _w)
{
float4_t result;
result.uxyzw[0] = _x;
result.uxyzw[1] = _y;
result.uxyzw[2] = _z;
result.uxyzw[3] = _w;
return result;
}
BX_FLOAT4_INLINE float4_t float4_splat(const void* _ptr)
{
float val = *reinterpret_cast<const float*>(_ptr);
return float4_ld(val, val, val, val);
}
BX_FLOAT4_INLINE float4_t float4_splat(float _a)
{
return float4_ld(_a, _a, _a, _a);
}
BX_FLOAT4_INLINE float4_t float4_isplat(uint32_t _a)
{
return float4_ild(_a, _a, _a, _a);
}
BX_FLOAT4_INLINE float4_t float4_zero()
{
return float4_ild(0, 0, 0, 0);
}
BX_FLOAT4_INLINE float4_t float4_itof(float4_t _a)
{
float4_t result;
result.fxyzw[0] = (float)result.ixyzw[0];
result.fxyzw[1] = (float)result.ixyzw[1];
result.fxyzw[2] = (float)result.ixyzw[2];
result.fxyzw[3] = (float)result.ixyzw[3];
return result;
}
BX_FLOAT4_INLINE float4_t float4_ftoi(float4_t _a)
{
float4_t result;
result.ixyzw[0] = (int)result.fxyzw[0];
result.ixyzw[1] = (int)result.fxyzw[1];
result.ixyzw[2] = (int)result.fxyzw[2];
result.ixyzw[3] = (int)result.fxyzw[3];
return result;
}
BX_FLOAT4_INLINE float4_t float4_round(float4_t _a)
{
const float4_t tmp = float4_ftoi(_a);
const float4_t result = float4_itof(tmp);
return result;
}
BX_FLOAT4_INLINE float4_t float4_add(float4_t _a, float4_t _b)
{
float4_t result;
result.fxyzw[0] = _a.fxyzw[0] + _b.fxyzw[0];
result.fxyzw[1] = _a.fxyzw[1] + _b.fxyzw[1];
result.fxyzw[2] = _a.fxyzw[2] + _b.fxyzw[2];
result.fxyzw[3] = _a.fxyzw[3] + _b.fxyzw[3];
return result;
}
BX_FLOAT4_INLINE float4_t float4_sub(float4_t _a, float4_t _b)
{
float4_t result;
result.fxyzw[0] = _a.fxyzw[0] - _b.fxyzw[0];
result.fxyzw[1] = _a.fxyzw[1] - _b.fxyzw[1];
result.fxyzw[2] = _a.fxyzw[2] - _b.fxyzw[2];
result.fxyzw[3] = _a.fxyzw[3] - _b.fxyzw[3];
return result;
}
BX_FLOAT4_INLINE float4_t float4_mul(float4_t _a, float4_t _b)
{
float4_t result;
result.fxyzw[0] = _a.fxyzw[0] * _b.fxyzw[0];
result.fxyzw[1] = _a.fxyzw[1] * _b.fxyzw[1];
result.fxyzw[2] = _a.fxyzw[2] * _b.fxyzw[2];
result.fxyzw[3] = _a.fxyzw[3] * _b.fxyzw[3];
return result;
}
BX_FLOAT4_INLINE float4_t float4_div(float4_t _a, float4_t _b)
{
float4_t result;
result.fxyzw[0] = _a.fxyzw[0] * _b.fxyzw[0];
result.fxyzw[1] = _a.fxyzw[1] * _b.fxyzw[1];
result.fxyzw[2] = _a.fxyzw[2] * _b.fxyzw[2];
result.fxyzw[3] = _a.fxyzw[3] * _b.fxyzw[3];
return result;
}
BX_FLOAT4_INLINE float4_t float4_rcp_est(float4_t _a)
{
float4_t result;
result.fxyzw[0] = 1.0f / _a.fxyzw[0];
result.fxyzw[1] = 1.0f / _a.fxyzw[1];
result.fxyzw[2] = 1.0f / _a.fxyzw[2];
result.fxyzw[3] = 1.0f / _a.fxyzw[3];
return result;
}
BX_FLOAT4_INLINE float4_t float4_sqrt(float4_t _a)
{
float4_t result;
result.fxyzw[0] = sqrtf(_a.fxyzw[0]);
result.fxyzw[1] = sqrtf(_a.fxyzw[1]);
result.fxyzw[2] = sqrtf(_a.fxyzw[2]);
result.fxyzw[3] = sqrtf(_a.fxyzw[3]);
return result;
}
BX_FLOAT4_INLINE float4_t float4_rsqrt_est(float4_t _a)
{
float4_t result;
result.fxyzw[0] = 1.0f / sqrtf(_a.fxyzw[0]);
result.fxyzw[1] = 1.0f / sqrtf(_a.fxyzw[1]);
result.fxyzw[2] = 1.0f / sqrtf(_a.fxyzw[2]);
result.fxyzw[3] = 1.0f / sqrtf(_a.fxyzw[3]);
return result;
}
BX_FLOAT4_INLINE float4_t float4_cmpeq(float4_t _a, float4_t _b)
{
float4_t result;
result.ixyzw[0] = _a.fxyzw[0] == _b.fxyzw[0] ? 0xffffffff : 0x0;
result.ixyzw[1] = _a.fxyzw[1] == _b.fxyzw[1] ? 0xffffffff : 0x0;
result.ixyzw[2] = _a.fxyzw[2] == _b.fxyzw[2] ? 0xffffffff : 0x0;
result.ixyzw[3] = _a.fxyzw[3] == _b.fxyzw[3] ? 0xffffffff : 0x0;
return result;
}
BX_FLOAT4_INLINE float4_t float4_cmplt(float4_t _a, float4_t _b)
{
float4_t result;
result.ixyzw[0] = _a.fxyzw[0] < _b.fxyzw[0] ? 0xffffffff : 0x0;
result.ixyzw[1] = _a.fxyzw[1] < _b.fxyzw[1] ? 0xffffffff : 0x0;
result.ixyzw[2] = _a.fxyzw[2] < _b.fxyzw[2] ? 0xffffffff : 0x0;
result.ixyzw[3] = _a.fxyzw[3] < _b.fxyzw[3] ? 0xffffffff : 0x0;
return result;
}
BX_FLOAT4_INLINE float4_t float4_cmple(float4_t _a, float4_t _b)
{
float4_t result;
result.ixyzw[0] = _a.fxyzw[0] <= _b.fxyzw[0] ? 0xffffffff : 0x0;
result.ixyzw[1] = _a.fxyzw[1] <= _b.fxyzw[1] ? 0xffffffff : 0x0;
result.ixyzw[2] = _a.fxyzw[2] <= _b.fxyzw[2] ? 0xffffffff : 0x0;
result.ixyzw[3] = _a.fxyzw[3] <= _b.fxyzw[3] ? 0xffffffff : 0x0;
return result;
}
BX_FLOAT4_INLINE float4_t float4_cmpgt(float4_t _a, float4_t _b)
{
float4_t result;
result.ixyzw[0] = _a.fxyzw[0] > _b.fxyzw[0] ? 0xffffffff : 0x0;
result.ixyzw[1] = _a.fxyzw[1] > _b.fxyzw[1] ? 0xffffffff : 0x0;
result.ixyzw[2] = _a.fxyzw[2] > _b.fxyzw[2] ? 0xffffffff : 0x0;
result.ixyzw[3] = _a.fxyzw[3] > _b.fxyzw[3] ? 0xffffffff : 0x0;
return result;
}
BX_FLOAT4_INLINE float4_t float4_cmpge(float4_t _a, float4_t _b)
{
float4_t result;
result.ixyzw[0] = _a.fxyzw[0] >= _b.fxyzw[0] ? 0xffffffff : 0x0;
result.ixyzw[1] = _a.fxyzw[1] >= _b.fxyzw[1] ? 0xffffffff : 0x0;
result.ixyzw[2] = _a.fxyzw[2] >= _b.fxyzw[2] ? 0xffffffff : 0x0;
result.ixyzw[3] = _a.fxyzw[3] >= _b.fxyzw[3] ? 0xffffffff : 0x0;
return result;
}
BX_FLOAT4_INLINE float4_t float4_min(float4_t _a, float4_t _b)
{
float4_t result;
result.fxyzw[0] = _a.fxyzw[0] < _b.fxyzw[0] ? _a.fxyzw[0] : _b.fxyzw[0];
result.fxyzw[1] = _a.fxyzw[1] < _b.fxyzw[1] ? _a.fxyzw[1] : _b.fxyzw[1];
result.fxyzw[2] = _a.fxyzw[2] < _b.fxyzw[2] ? _a.fxyzw[2] : _b.fxyzw[2];
result.fxyzw[3] = _a.fxyzw[3] < _b.fxyzw[3] ? _a.fxyzw[3] : _b.fxyzw[3];
return result;
}
BX_FLOAT4_INLINE float4_t float4_max(float4_t _a, float4_t _b)
{
float4_t result;
result.fxyzw[0] = _a.fxyzw[0] > _b.fxyzw[0] ? _a.fxyzw[0] : _b.fxyzw[0];
result.fxyzw[1] = _a.fxyzw[1] > _b.fxyzw[1] ? _a.fxyzw[1] : _b.fxyzw[1];
result.fxyzw[2] = _a.fxyzw[2] > _b.fxyzw[2] ? _a.fxyzw[2] : _b.fxyzw[2];
result.fxyzw[3] = _a.fxyzw[3] > _b.fxyzw[3] ? _a.fxyzw[3] : _b.fxyzw[3];
return result;
}
BX_FLOAT4_INLINE float4_t float4_and(float4_t _a, float4_t _b)
{
float4_t result;
result.uxyzw[0] = _a.uxyzw[0] & _b.uxyzw[0];
result.uxyzw[1] = _a.uxyzw[1] & _b.uxyzw[1];
result.uxyzw[2] = _a.uxyzw[2] & _b.uxyzw[2];
result.uxyzw[3] = _a.uxyzw[3] & _b.uxyzw[3];
return result;
}
BX_FLOAT4_INLINE float4_t float4_andc(float4_t _a, float4_t _b)
{
float4_t result;
result.uxyzw[0] = _a.uxyzw[0] & ~_b.uxyzw[0];
result.uxyzw[1] = _a.uxyzw[1] & ~_b.uxyzw[1];
result.uxyzw[2] = _a.uxyzw[2] & ~_b.uxyzw[2];
result.uxyzw[3] = _a.uxyzw[3] & ~_b.uxyzw[3];
return result;
}
BX_FLOAT4_INLINE float4_t float4_or(float4_t _a, float4_t _b)
{
float4_t result;
result.uxyzw[0] = _a.uxyzw[0] | _b.uxyzw[0];
result.uxyzw[1] = _a.uxyzw[1] | _b.uxyzw[1];
result.uxyzw[2] = _a.uxyzw[2] | _b.uxyzw[2];
result.uxyzw[3] = _a.uxyzw[3] | _b.uxyzw[3];
return result;
}
BX_FLOAT4_INLINE float4_t float4_xor(float4_t _a, float4_t _b)
{
float4_t result;
result.uxyzw[0] = _a.uxyzw[0] ^ _b.uxyzw[0];
result.uxyzw[1] = _a.uxyzw[1] ^ _b.uxyzw[1];
result.uxyzw[2] = _a.uxyzw[2] ^ _b.uxyzw[2];
result.uxyzw[3] = _a.uxyzw[3] ^ _b.uxyzw[3];
return result;
}
BX_FLOAT4_INLINE float4_t float4_sll(float4_t _a, int _count)
{
float4_t result;
result.uxyzw[0] = _a.uxyzw[0] << _count;
result.uxyzw[1] = _a.uxyzw[1] << _count;
result.uxyzw[2] = _a.uxyzw[2] << _count;
result.uxyzw[3] = _a.uxyzw[3] << _count;
return result;
}
BX_FLOAT4_INLINE float4_t float4_srl(float4_t _a, int _count)
{
float4_t result;
result.uxyzw[0] = _a.uxyzw[0] >> _count;
result.uxyzw[1] = _a.uxyzw[1] >> _count;
result.uxyzw[2] = _a.uxyzw[2] >> _count;
result.uxyzw[3] = _a.uxyzw[3] >> _count;
return result;
}
BX_FLOAT4_INLINE float4_t float4_sra(float4_t _a, int _count)
{
float4_t result;
result.ixyzw[0] = _a.ixyzw[0] >> _count;
result.ixyzw[1] = _a.ixyzw[1] >> _count;
result.ixyzw[2] = _a.ixyzw[2] >> _count;
result.ixyzw[3] = _a.ixyzw[3] >> _count;
return result;
}
BX_FLOAT4_INLINE float4_t float4_iadd(float4_t _a, float4_t _b)
{
float4_t result;
result.ixyzw[0] = _a.ixyzw[0] + _b.ixyzw[0];
result.ixyzw[1] = _a.ixyzw[1] + _b.ixyzw[1];
result.ixyzw[2] = _a.ixyzw[2] + _b.ixyzw[2];
result.ixyzw[3] = _a.ixyzw[3] + _b.ixyzw[3];
return result;
}
BX_FLOAT4_INLINE float4_t float4_isub(float4_t _a, float4_t _b)
{
float4_t result;
result.ixyzw[0] = _a.ixyzw[0] - _b.ixyzw[0];
result.ixyzw[1] = _a.ixyzw[1] - _b.ixyzw[1];
result.ixyzw[2] = _a.ixyzw[2] - _b.ixyzw[2];
result.ixyzw[3] = _a.ixyzw[3] - _b.ixyzw[3];
return result;
}
} // namespace bx
#define float4_shuf_xAzC float4_shuf_xAzC_ni
#define float4_shuf_yBwD float4_shuf_yBwD_ni
#define float4_rcp float4_rcp_ni
#define float4_orx float4_orx_ni
#define float4_orc float4_orc_ni
#define float4_neg float4_neg_ni
#define float4_madd float4_madd_ni
#define float4_nmsub float4_nmsub_ni
#define float4_div_nr float4_div_nr_ni
#define float4_selb float4_selb_ni
#define float4_sels float4_sels_ni
#define float4_not float4_not_ni
#define float4_abs float4_abs_ni
#define float4_clamp float4_clamp_ni
#define float4_lerp float4_lerp_ni
#define float4_rsqrt float4_rsqrt_ni
#define float4_rsqrt_nr float4_rsqrt_nr_ni
#define float4_rsqrt_carmack float4_rsqrt_carmack_ni
#define float4_sqrt_nr float4_sqrt_nr_ni
#define float4_log2 float4_log2_ni
#define float4_exp2 float4_exp2_ni
#define float4_pow float4_pow_ni
#define float4_cross3 float4_cross3_ni
#define float4_normalize3 float4_normalize3_ni
#define float4_dot3 float4_dot3_ni
#define float4_dot float4_dot_ni
#include "float4_ni.h"
#endif // __BX_FLOAT4_REF_H__

400
include/bx/float4_sse.h Normal file
View File

@@ -0,0 +1,400 @@
/*
* Copyright 2010-2011 Branimir Karadzic. All rights reserved.
* License: http://www.opensource.org/licenses/BSD-2-Clause
*/
#ifndef __BX_FLOAT4_SSE_H__
#define __BX_FLOAT4_SSE_H__
#if !defined(__SSE2__)
# error "float4_t requires at least SSE2"
#endif // !defined(__SSE2__)
#include <stdint.h>
#include <emmintrin.h> // __m128i
#if defined(__SSE4_1__)
# include <smmintrin.h>
#endif // defined(__SSE4_1__)
#include <xmmintrin.h> // __m128
namespace bx
{
typedef __m128 float4_t;
#define ELEMx 0
#define ELEMy 1
#define ELEMz 2
#define ELEMw 3
#define IMPLEMENT_SWIZZLE(_x, _y, _z, _w) \
BX_FLOAT4_INLINE float4_t float4_swiz_##_x##_y##_z##_w(float4_t _a) \
{ \
return _mm_shuffle_ps( _a, _a, _MM_SHUFFLE(ELEM##_w, ELEM##_z, ELEM##_y, ELEM##_x ) ); \
}
#include "float4_swizzle.inl"
#undef IMPLEMENT_SWIZZLE
#undef ELEMw
#undef ELEMz
#undef ELEMy
#undef ELEMx
#define IMPLEMENT_TEST(_xyzw, _mask) \
BX_FLOAT4_INLINE bool float4_test_any_##_xyzw(float4_t _test) \
{ \
return 0x0 != (_mm_movemask_ps(_test)&(_mask) ); \
} \
\
BX_FLOAT4_INLINE bool float4_test_all_##_xyzw(float4_t _test) \
{ \
return (_mask) == (_mm_movemask_ps(_test)&(_mask) ); \
}
IMPLEMENT_TEST(x , 0x1);
IMPLEMENT_TEST(y , 0x2);
IMPLEMENT_TEST(xy , 0x3);
IMPLEMENT_TEST(z , 0x4);
IMPLEMENT_TEST(xz , 0x5);
IMPLEMENT_TEST(yz , 0x6);
IMPLEMENT_TEST(xyz , 0x7);
IMPLEMENT_TEST(w , 0x8);
IMPLEMENT_TEST(xw , 0x9);
IMPLEMENT_TEST(yw , 0xa);
IMPLEMENT_TEST(xyw , 0xb);
IMPLEMENT_TEST(zw , 0xc);
IMPLEMENT_TEST(xzw , 0xd);
IMPLEMENT_TEST(yzw , 0xe);
IMPLEMENT_TEST(xyzw , 0xf);
#undef IMPLEMENT_TEST
BX_FLOAT4_INLINE float4_t float4_shuf_xyAB(float4_t _a, float4_t _b)
{
return _mm_movelh_ps(_a, _b);
}
BX_FLOAT4_INLINE float4_t float4_shuf_ABxy(float4_t _a, float4_t _b)
{
return _mm_movelh_ps(_b, _a);
}
BX_FLOAT4_INLINE float4_t float4_shuf_CDzw(float4_t _a, float4_t _b)
{
return _mm_movehl_ps(_a, _b);
}
BX_FLOAT4_INLINE float4_t float4_shuf_zwCD(float4_t _a, float4_t _b)
{
return _mm_movehl_ps(_b, _a);
}
BX_FLOAT4_INLINE float4_t float4_shuf_xAyB(float4_t _a, float4_t _b)
{
return _mm_unpacklo_ps(_a, _b);
}
BX_FLOAT4_INLINE float4_t float4_shuf_yBxA(float4_t _a, float4_t _b)
{
return _mm_unpacklo_ps(_b, _a);
}
BX_FLOAT4_INLINE float4_t float4_shuf_zCwD(float4_t _a, float4_t _b)
{
return _mm_unpackhi_ps(_a, _b);
}
BX_FLOAT4_INLINE float4_t float4_shuf_CzDw(float4_t _a, float4_t _b)
{
return _mm_unpackhi_ps(_b, _a);
}
BX_FLOAT4_INLINE float float4_x(float4_t _a)
{
return _mm_cvtss_f32(_a);
}
BX_FLOAT4_INLINE float float4_y(float4_t _a)
{
const float4_t yyyy = float4_swiz_yyyy(_a);
const float result = _mm_cvtss_f32(yyyy);
return result;
}
BX_FLOAT4_INLINE float float4_z(float4_t _a)
{
const float4_t zzzz = float4_swiz_zzzz(_a);
const float result = _mm_cvtss_f32(zzzz);
return result;
}
BX_FLOAT4_INLINE float float4_w(float4_t _a)
{
const float4_t wwww = float4_swiz_wwww(_a);
const float result = _mm_cvtss_f32(wwww);
return result;
}
BX_FLOAT4_INLINE float4_t float4_ld(const void* _ptr)
{
return _mm_load_ps(reinterpret_cast<const float*>(_ptr) );
}
BX_FLOAT4_INLINE void float4_st(void* _ptr, float4_t _a)
{
_mm_store_ps(reinterpret_cast<float*>(_ptr), _a);
}
BX_FLOAT4_INLINE float4_t float4_ld(float _x, float _y, float _z, float _w)
{
return _mm_set_ps(_w, _z, _y, _x);
}
BX_FLOAT4_INLINE float4_t float4_ild(uint32_t _x, uint32_t _y, uint32_t _z, uint32_t _w)
{
const __m128i set = _mm_set_epi32(_w, _z, _y, _x);
const float4_t result = _mm_castsi128_ps(set);
return result;
}
BX_FLOAT4_INLINE float4_t float4_splat(const void* _ptr)
{
const float4_t x___ = _mm_load_ss(reinterpret_cast<const float*>(_ptr) );
const float4_t result = float4_swiz_xxxx(x___);
return result;
}
BX_FLOAT4_INLINE float4_t float4_splat(float _a)
{
return _mm_set1_ps(_a);
}
BX_FLOAT4_INLINE float4_t float4_isplat(uint32_t _a)
{
const __m128i splat = _mm_set1_epi32(_a);
const float4_t result = _mm_castsi128_ps(splat);
return result;
}
BX_FLOAT4_INLINE float4_t float4_zero()
{
return _mm_setzero_ps();
}
BX_FLOAT4_INLINE float4_t float4_itof(float4_t _a)
{
const __m128i itof = _mm_castps_si128(_a);
const float4_t result = _mm_cvtepi32_ps(itof);
return result;
}
BX_FLOAT4_INLINE float4_t float4_ftoi(float4_t _a)
{
const __m128i ftoi = _mm_cvtps_epi32(_a);
const float4_t result = _mm_castsi128_ps(ftoi);
return result;
}
BX_FLOAT4_INLINE float4_t float4_round(float4_t _a)
{
#if defined(__SSE4_1__)
return _mm_round_ps(_a, _MM_FROUND_NINT);
#else
const __m128i round = _mm_cvtps_epi32(_a);
const float4_t result = _mm_cvtepi32_ps(round);
return result;
#endif // defined(__SSE4_1__)
}
BX_FLOAT4_INLINE float4_t float4_add(float4_t _a, float4_t _b)
{
return _mm_add_ps(_a, _b);
}
BX_FLOAT4_INLINE float4_t float4_sub(float4_t _a, float4_t _b)
{
return _mm_sub_ps(_a, _b);
}
BX_FLOAT4_INLINE float4_t float4_mul(float4_t _a, float4_t _b)
{
return _mm_mul_ps(_a, _b);
}
BX_FLOAT4_INLINE float4_t float4_div(float4_t _a, float4_t _b)
{
return _mm_div_ps(_a, _b);
}
BX_FLOAT4_INLINE float4_t float4_rcp_est(float4_t _a)
{
return _mm_rcp_ps(_a);
}
BX_FLOAT4_INLINE float4_t float4_sqrt(float4_t _a)
{
return _mm_sqrt_ps(_a);
}
BX_FLOAT4_INLINE float4_t float4_rsqrt_est(float4_t _a)
{
return _mm_rsqrt_ps(_a);
}
#if defined(__SSE4_1__)
BX_FLOAT4_INLINE float4_t float4_dot3(float4_t _a, float4_t _b)
{
return _mm_dp_ps(_a, _b, 0x77);
}
BX_FLOAT4_INLINE float4_t float4_dot(float4_t _a, float4_t _b)
{
return _mm_dp_ps(_a, _b, 0xFF);
}
#endif // defined(__SSE4__)
BX_FLOAT4_INLINE float4_t float4_cmpeq(float4_t _a, float4_t _b)
{
return _mm_cmpeq_ps(_a, _b);
}
BX_FLOAT4_INLINE float4_t float4_cmplt(float4_t _a, float4_t _b)
{
return _mm_cmplt_ps(_a, _b);
}
BX_FLOAT4_INLINE float4_t float4_cmple(float4_t _a, float4_t _b)
{
return _mm_cmple_ps(_a, _b);
}
BX_FLOAT4_INLINE float4_t float4_cmpgt(float4_t _a, float4_t _b)
{
return _mm_cmpgt_ps(_a, _b);
}
BX_FLOAT4_INLINE float4_t float4_cmpge(float4_t _a, float4_t _b)
{
return _mm_cmpge_ps(_a, _b);
}
BX_FLOAT4_INLINE float4_t float4_min(float4_t _a, float4_t _b)
{
return _mm_min_ps(_a, _b);
}
BX_FLOAT4_INLINE float4_t float4_max(float4_t _a, float4_t _b)
{
return _mm_max_ps(_a, _b);
}
BX_FLOAT4_INLINE float4_t float4_and(float4_t _a, float4_t _b)
{
return _mm_and_ps(_a, _b);
}
BX_FLOAT4_INLINE float4_t float4_andc(float4_t _a, float4_t _b)
{
return _mm_andnot_ps(_b, _a);
}
BX_FLOAT4_INLINE float4_t float4_or(float4_t _a, float4_t _b)
{
return _mm_or_ps(_a, _b);
}
BX_FLOAT4_INLINE float4_t float4_xor(float4_t _a, float4_t _b)
{
return _mm_xor_ps(_a, _b);
}
BX_FLOAT4_INLINE float4_t float4_sll(float4_t _a, int _count)
{
const __m128i a = _mm_castps_si128(_a);
const __m128i shift = _mm_slli_epi32(a, _count);
const float4_t result = _mm_castsi128_ps(shift);
return result;
}
BX_FLOAT4_INLINE float4_t float4_srl(float4_t _a, int _count)
{
const __m128i a = _mm_castps_si128(_a);
const __m128i shift = _mm_srli_epi32(a, _count);
const float4_t result = _mm_castsi128_ps(shift);
return result;
}
BX_FLOAT4_INLINE float4_t float4_sra(float4_t _a, int _count)
{
const __m128i a = _mm_castps_si128(_a);
const __m128i shift = _mm_srai_epi32(a, _count);
const float4_t result = _mm_castsi128_ps(shift);
return result;
}
BX_FLOAT4_INLINE float4_t float4_iadd(float4_t _a, float4_t _b)
{
const __m128i a = _mm_castps_si128(_a);
const __m128i b = _mm_castps_si128(_b);
const __m128i add = _mm_add_epi32(a, b);
const float4_t result = _mm_castsi128_ps(add);
return result;
}
BX_FLOAT4_INLINE float4_t float4_isub(float4_t _a, float4_t _b)
{
const __m128i a = _mm_castps_si128(_a);
const __m128i b = _mm_castps_si128(_b);
const __m128i sub = _mm_sub_epi32(a, b);
const float4_t result = _mm_castsi128_ps(sub);
return result;
}
} // namespace bx
#define float4_shuf_xAzC float4_shuf_xAzC_ni
#define float4_shuf_yBwD float4_shuf_yBwD_ni
#define float4_rcp float4_rcp_ni
#define float4_orx float4_orx_ni
#define float4_orc float4_orc_ni
#define float4_neg float4_neg_ni
#define float4_madd float4_madd_ni
#define float4_nmsub float4_nmsub_ni
#define float4_div_nr float4_div_nr_ni
#define float4_selb float4_selb_ni
#define float4_sels float4_sels_ni
#define float4_not float4_not_ni
#define float4_abs float4_abs_ni
#define float4_clamp float4_clamp_ni
#define float4_lerp float4_lerp_ni
#define float4_rsqrt float4_rsqrt_ni
#define float4_rsqrt_nr float4_rsqrt_nr_ni
#define float4_rsqrt_carmack float4_rsqrt_carmack_ni
#define float4_sqrt_nr float4_sqrt_nr_ni
#define float4_log2 float4_log2_ni
#define float4_exp2 float4_exp2_ni
#define float4_pow float4_pow_ni
#define float4_cross3 float4_cross3_ni
#define float4_normalize3 float4_normalize3_ni
#if !defined(__SSE4_1__)
#define float4_dot3 float4_dot3_ni
#define float4_dot float4_dot_ni
#endif // defined(__SSE4_1__)
#include "float4_ni.h"
#endif // __FLOAT4_SSE_H__

View File

@@ -0,0 +1,266 @@
/*
* Copyright 2010-2012 Branimir Karadzic. All rights reserved.
* License: http://www.opensource.org/licenses/BSD-2-Clause
*/
#ifndef __BX_FLOAT4_T_H__
# error "xmacro file, must be included from float4_*.h"
#endif // __BX_FLOAT4_T_H__
// included from float4_t.h
IMPLEMENT_SWIZZLE(x, x, x, x)
IMPLEMENT_SWIZZLE(x, x, x, y)
IMPLEMENT_SWIZZLE(x, x, x, z)
IMPLEMENT_SWIZZLE(x, x, x, w)
IMPLEMENT_SWIZZLE(x, x, y, x)
IMPLEMENT_SWIZZLE(x, x, y, y)
IMPLEMENT_SWIZZLE(x, x, y, z)
IMPLEMENT_SWIZZLE(x, x, y, w)
IMPLEMENT_SWIZZLE(x, x, z, x)
IMPLEMENT_SWIZZLE(x, x, z, y)
IMPLEMENT_SWIZZLE(x, x, z, z)
IMPLEMENT_SWIZZLE(x, x, z, w)
IMPLEMENT_SWIZZLE(x, x, w, x)
IMPLEMENT_SWIZZLE(x, x, w, y)
IMPLEMENT_SWIZZLE(x, x, w, z)
IMPLEMENT_SWIZZLE(x, x, w, w)
IMPLEMENT_SWIZZLE(x, y, x, x)
IMPLEMENT_SWIZZLE(x, y, x, y)
IMPLEMENT_SWIZZLE(x, y, x, z)
IMPLEMENT_SWIZZLE(x, y, x, w)
IMPLEMENT_SWIZZLE(x, y, y, x)
IMPLEMENT_SWIZZLE(x, y, y, y)
IMPLEMENT_SWIZZLE(x, y, y, z)
IMPLEMENT_SWIZZLE(x, y, y, w)
IMPLEMENT_SWIZZLE(x, y, z, x)
IMPLEMENT_SWIZZLE(x, y, z, y)
IMPLEMENT_SWIZZLE(x, y, z, z)
// IMPLEMENT_SWIZZLE(x, y, z, w)
IMPLEMENT_SWIZZLE(x, y, w, x)
IMPLEMENT_SWIZZLE(x, y, w, y)
IMPLEMENT_SWIZZLE(x, y, w, z)
IMPLEMENT_SWIZZLE(x, y, w, w)
IMPLEMENT_SWIZZLE(x, z, x, x)
IMPLEMENT_SWIZZLE(x, z, x, y)
IMPLEMENT_SWIZZLE(x, z, x, z)
IMPLEMENT_SWIZZLE(x, z, x, w)
IMPLEMENT_SWIZZLE(x, z, y, x)
IMPLEMENT_SWIZZLE(x, z, y, y)
IMPLEMENT_SWIZZLE(x, z, y, z)
IMPLEMENT_SWIZZLE(x, z, y, w)
IMPLEMENT_SWIZZLE(x, z, z, x)
IMPLEMENT_SWIZZLE(x, z, z, y)
IMPLEMENT_SWIZZLE(x, z, z, z)
IMPLEMENT_SWIZZLE(x, z, z, w)
IMPLEMENT_SWIZZLE(x, z, w, x)
IMPLEMENT_SWIZZLE(x, z, w, y)
IMPLEMENT_SWIZZLE(x, z, w, z)
IMPLEMENT_SWIZZLE(x, z, w, w)
IMPLEMENT_SWIZZLE(x, w, x, x)
IMPLEMENT_SWIZZLE(x, w, x, y)
IMPLEMENT_SWIZZLE(x, w, x, z)
IMPLEMENT_SWIZZLE(x, w, x, w)
IMPLEMENT_SWIZZLE(x, w, y, x)
IMPLEMENT_SWIZZLE(x, w, y, y)
IMPLEMENT_SWIZZLE(x, w, y, z)
IMPLEMENT_SWIZZLE(x, w, y, w)
IMPLEMENT_SWIZZLE(x, w, z, x)
IMPLEMENT_SWIZZLE(x, w, z, y)
IMPLEMENT_SWIZZLE(x, w, z, z)
IMPLEMENT_SWIZZLE(x, w, z, w)
IMPLEMENT_SWIZZLE(x, w, w, x)
IMPLEMENT_SWIZZLE(x, w, w, y)
IMPLEMENT_SWIZZLE(x, w, w, z)
IMPLEMENT_SWIZZLE(x, w, w, w)
IMPLEMENT_SWIZZLE(y, x, x, x)
IMPLEMENT_SWIZZLE(y, x, x, y)
IMPLEMENT_SWIZZLE(y, x, x, z)
IMPLEMENT_SWIZZLE(y, x, x, w)
IMPLEMENT_SWIZZLE(y, x, y, x)
IMPLEMENT_SWIZZLE(y, x, y, y)
IMPLEMENT_SWIZZLE(y, x, y, z)
IMPLEMENT_SWIZZLE(y, x, y, w)
IMPLEMENT_SWIZZLE(y, x, z, x)
IMPLEMENT_SWIZZLE(y, x, z, y)
IMPLEMENT_SWIZZLE(y, x, z, z)
IMPLEMENT_SWIZZLE(y, x, z, w)
IMPLEMENT_SWIZZLE(y, x, w, x)
IMPLEMENT_SWIZZLE(y, x, w, y)
IMPLEMENT_SWIZZLE(y, x, w, z)
IMPLEMENT_SWIZZLE(y, x, w, w)
IMPLEMENT_SWIZZLE(y, y, x, x)
IMPLEMENT_SWIZZLE(y, y, x, y)
IMPLEMENT_SWIZZLE(y, y, x, z)
IMPLEMENT_SWIZZLE(y, y, x, w)
IMPLEMENT_SWIZZLE(y, y, y, x)
IMPLEMENT_SWIZZLE(y, y, y, y)
IMPLEMENT_SWIZZLE(y, y, y, z)
IMPLEMENT_SWIZZLE(y, y, y, w)
IMPLEMENT_SWIZZLE(y, y, z, x)
IMPLEMENT_SWIZZLE(y, y, z, y)
IMPLEMENT_SWIZZLE(y, y, z, z)
IMPLEMENT_SWIZZLE(y, y, z, w)
IMPLEMENT_SWIZZLE(y, y, w, x)
IMPLEMENT_SWIZZLE(y, y, w, y)
IMPLEMENT_SWIZZLE(y, y, w, z)
IMPLEMENT_SWIZZLE(y, y, w, w)
IMPLEMENT_SWIZZLE(y, z, x, x)
IMPLEMENT_SWIZZLE(y, z, x, y)
IMPLEMENT_SWIZZLE(y, z, x, z)
IMPLEMENT_SWIZZLE(y, z, x, w)
IMPLEMENT_SWIZZLE(y, z, y, x)
IMPLEMENT_SWIZZLE(y, z, y, y)
IMPLEMENT_SWIZZLE(y, z, y, z)
IMPLEMENT_SWIZZLE(y, z, y, w)
IMPLEMENT_SWIZZLE(y, z, z, x)
IMPLEMENT_SWIZZLE(y, z, z, y)
IMPLEMENT_SWIZZLE(y, z, z, z)
IMPLEMENT_SWIZZLE(y, z, z, w)
IMPLEMENT_SWIZZLE(y, z, w, x)
IMPLEMENT_SWIZZLE(y, z, w, y)
IMPLEMENT_SWIZZLE(y, z, w, z)
IMPLEMENT_SWIZZLE(y, z, w, w)
IMPLEMENT_SWIZZLE(y, w, x, x)
IMPLEMENT_SWIZZLE(y, w, x, y)
IMPLEMENT_SWIZZLE(y, w, x, z)
IMPLEMENT_SWIZZLE(y, w, x, w)
IMPLEMENT_SWIZZLE(y, w, y, x)
IMPLEMENT_SWIZZLE(y, w, y, y)
IMPLEMENT_SWIZZLE(y, w, y, z)
IMPLEMENT_SWIZZLE(y, w, y, w)
IMPLEMENT_SWIZZLE(y, w, z, x)
IMPLEMENT_SWIZZLE(y, w, z, y)
IMPLEMENT_SWIZZLE(y, w, z, z)
IMPLEMENT_SWIZZLE(y, w, z, w)
IMPLEMENT_SWIZZLE(y, w, w, x)
IMPLEMENT_SWIZZLE(y, w, w, y)
IMPLEMENT_SWIZZLE(y, w, w, z)
IMPLEMENT_SWIZZLE(y, w, w, w)
IMPLEMENT_SWIZZLE(z, x, x, x)
IMPLEMENT_SWIZZLE(z, x, x, y)
IMPLEMENT_SWIZZLE(z, x, x, z)
IMPLEMENT_SWIZZLE(z, x, x, w)
IMPLEMENT_SWIZZLE(z, x, y, x)
IMPLEMENT_SWIZZLE(z, x, y, y)
IMPLEMENT_SWIZZLE(z, x, y, z)
IMPLEMENT_SWIZZLE(z, x, y, w)
IMPLEMENT_SWIZZLE(z, x, z, x)
IMPLEMENT_SWIZZLE(z, x, z, y)
IMPLEMENT_SWIZZLE(z, x, z, z)
IMPLEMENT_SWIZZLE(z, x, z, w)
IMPLEMENT_SWIZZLE(z, x, w, x)
IMPLEMENT_SWIZZLE(z, x, w, y)
IMPLEMENT_SWIZZLE(z, x, w, z)
IMPLEMENT_SWIZZLE(z, x, w, w)
IMPLEMENT_SWIZZLE(z, y, x, x)
IMPLEMENT_SWIZZLE(z, y, x, y)
IMPLEMENT_SWIZZLE(z, y, x, z)
IMPLEMENT_SWIZZLE(z, y, x, w)
IMPLEMENT_SWIZZLE(z, y, y, x)
IMPLEMENT_SWIZZLE(z, y, y, y)
IMPLEMENT_SWIZZLE(z, y, y, z)
IMPLEMENT_SWIZZLE(z, y, y, w)
IMPLEMENT_SWIZZLE(z, y, z, x)
IMPLEMENT_SWIZZLE(z, y, z, y)
IMPLEMENT_SWIZZLE(z, y, z, z)
IMPLEMENT_SWIZZLE(z, y, z, w)
IMPLEMENT_SWIZZLE(z, y, w, x)
IMPLEMENT_SWIZZLE(z, y, w, y)
IMPLEMENT_SWIZZLE(z, y, w, z)
IMPLEMENT_SWIZZLE(z, y, w, w)
IMPLEMENT_SWIZZLE(z, z, x, x)
IMPLEMENT_SWIZZLE(z, z, x, y)
IMPLEMENT_SWIZZLE(z, z, x, z)
IMPLEMENT_SWIZZLE(z, z, x, w)
IMPLEMENT_SWIZZLE(z, z, y, x)
IMPLEMENT_SWIZZLE(z, z, y, y)
IMPLEMENT_SWIZZLE(z, z, y, z)
IMPLEMENT_SWIZZLE(z, z, y, w)
IMPLEMENT_SWIZZLE(z, z, z, x)
IMPLEMENT_SWIZZLE(z, z, z, y)
IMPLEMENT_SWIZZLE(z, z, z, z)
IMPLEMENT_SWIZZLE(z, z, z, w)
IMPLEMENT_SWIZZLE(z, z, w, x)
IMPLEMENT_SWIZZLE(z, z, w, y)
IMPLEMENT_SWIZZLE(z, z, w, z)
IMPLEMENT_SWIZZLE(z, z, w, w)
IMPLEMENT_SWIZZLE(z, w, x, x)
IMPLEMENT_SWIZZLE(z, w, x, y)
IMPLEMENT_SWIZZLE(z, w, x, z)
IMPLEMENT_SWIZZLE(z, w, x, w)
IMPLEMENT_SWIZZLE(z, w, y, x)
IMPLEMENT_SWIZZLE(z, w, y, y)
IMPLEMENT_SWIZZLE(z, w, y, z)
IMPLEMENT_SWIZZLE(z, w, y, w)
IMPLEMENT_SWIZZLE(z, w, z, x)
IMPLEMENT_SWIZZLE(z, w, z, y)
IMPLEMENT_SWIZZLE(z, w, z, z)
IMPLEMENT_SWIZZLE(z, w, z, w)
IMPLEMENT_SWIZZLE(z, w, w, x)
IMPLEMENT_SWIZZLE(z, w, w, y)
IMPLEMENT_SWIZZLE(z, w, w, z)
IMPLEMENT_SWIZZLE(z, w, w, w)
IMPLEMENT_SWIZZLE(w, x, x, x)
IMPLEMENT_SWIZZLE(w, x, x, y)
IMPLEMENT_SWIZZLE(w, x, x, z)
IMPLEMENT_SWIZZLE(w, x, x, w)
IMPLEMENT_SWIZZLE(w, x, y, x)
IMPLEMENT_SWIZZLE(w, x, y, y)
IMPLEMENT_SWIZZLE(w, x, y, z)
IMPLEMENT_SWIZZLE(w, x, y, w)
IMPLEMENT_SWIZZLE(w, x, z, x)
IMPLEMENT_SWIZZLE(w, x, z, y)
IMPLEMENT_SWIZZLE(w, x, z, z)
IMPLEMENT_SWIZZLE(w, x, z, w)
IMPLEMENT_SWIZZLE(w, x, w, x)
IMPLEMENT_SWIZZLE(w, x, w, y)
IMPLEMENT_SWIZZLE(w, x, w, z)
IMPLEMENT_SWIZZLE(w, x, w, w)
IMPLEMENT_SWIZZLE(w, y, x, x)
IMPLEMENT_SWIZZLE(w, y, x, y)
IMPLEMENT_SWIZZLE(w, y, x, z)
IMPLEMENT_SWIZZLE(w, y, x, w)
IMPLEMENT_SWIZZLE(w, y, y, x)
IMPLEMENT_SWIZZLE(w, y, y, y)
IMPLEMENT_SWIZZLE(w, y, y, z)
IMPLEMENT_SWIZZLE(w, y, y, w)
IMPLEMENT_SWIZZLE(w, y, z, x)
IMPLEMENT_SWIZZLE(w, y, z, y)
IMPLEMENT_SWIZZLE(w, y, z, z)
IMPLEMENT_SWIZZLE(w, y, z, w)
IMPLEMENT_SWIZZLE(w, y, w, x)
IMPLEMENT_SWIZZLE(w, y, w, y)
IMPLEMENT_SWIZZLE(w, y, w, z)
IMPLEMENT_SWIZZLE(w, y, w, w)
IMPLEMENT_SWIZZLE(w, z, x, x)
IMPLEMENT_SWIZZLE(w, z, x, y)
IMPLEMENT_SWIZZLE(w, z, x, z)
IMPLEMENT_SWIZZLE(w, z, x, w)
IMPLEMENT_SWIZZLE(w, z, y, x)
IMPLEMENT_SWIZZLE(w, z, y, y)
IMPLEMENT_SWIZZLE(w, z, y, z)
IMPLEMENT_SWIZZLE(w, z, y, w)
IMPLEMENT_SWIZZLE(w, z, z, x)
IMPLEMENT_SWIZZLE(w, z, z, y)
IMPLEMENT_SWIZZLE(w, z, z, z)
IMPLEMENT_SWIZZLE(w, z, z, w)
IMPLEMENT_SWIZZLE(w, z, w, x)
IMPLEMENT_SWIZZLE(w, z, w, y)
IMPLEMENT_SWIZZLE(w, z, w, z)
IMPLEMENT_SWIZZLE(w, z, w, w)
IMPLEMENT_SWIZZLE(w, w, x, x)
IMPLEMENT_SWIZZLE(w, w, x, y)
IMPLEMENT_SWIZZLE(w, w, x, z)
IMPLEMENT_SWIZZLE(w, w, x, w)
IMPLEMENT_SWIZZLE(w, w, y, x)
IMPLEMENT_SWIZZLE(w, w, y, y)
IMPLEMENT_SWIZZLE(w, w, y, z)
IMPLEMENT_SWIZZLE(w, w, y, w)
IMPLEMENT_SWIZZLE(w, w, z, x)
IMPLEMENT_SWIZZLE(w, w, z, y)
IMPLEMENT_SWIZZLE(w, w, z, z)
IMPLEMENT_SWIZZLE(w, w, z, w)
IMPLEMENT_SWIZZLE(w, w, w, x)
IMPLEMENT_SWIZZLE(w, w, w, y)
IMPLEMENT_SWIZZLE(w, w, w, z)
IMPLEMENT_SWIZZLE(w, w, w, w)

22
include/bx/float4_t.h Normal file
View File

@@ -0,0 +1,22 @@
/*
* Copyright 2010-2011 Branimir Karadzic. All rights reserved.
* License: http://www.opensource.org/licenses/BSD-2-Clause
*/
#ifndef __BX_FLOAT4_T_H__
#define __BX_FLOAT4_T_H__
#include <stdint.h>
#include "bx.h"
#define BX_FLOAT4_INLINE BX_FORCE_INLINE
#if 0 // defined(__SSE2__)
# include "float4_sse.h"
#elif 0 // __ARM_NEON__
# include "float4_neon.h"
#else
# include "float4_ref.h"
#endif //
#endif // __BX_FLOAT4_T_H__

168
include/bx/float4x4_t.h Normal file
View File

@@ -0,0 +1,168 @@
/*
* Copyright 2010-2012 Branimir Karadzic. All rights reserved.
* License: http://www.opensource.org/licenses/BSD-2-Clause
*/
#ifndef __BX_FLOAT4X4_H__
#define __BX_FLOAT4x4_H__
#include "float4_t.h"
namespace bx
{
typedef BX_ALIGN_STRUCT_16(struct)
{
float4_t col[4];
} float4x4_t;
BX_FLOAT4_INLINE float4_t float4_mul_xyz1(float4_t _a, const float4x4_t& _b)
{
const float4_t xxxx = float4_swiz_xxxx(_a);
const float4_t yyyy = float4_swiz_yyyy(_a);
const float4_t zzzz = float4_swiz_zzzz(_a);
const float4_t col0 = float4_mul(_b.col[0], xxxx);
const float4_t col1 = float4_mul(_b.col[1], yyyy);
const float4_t col2 = float4_madd(_b.col[2], zzzz, col0);
const float4_t col3 = float4_add(_b.col[3], col1);
const float4_t result = float4_add(col2, col3);
return result;
}
BX_FLOAT4_INLINE float4_t float4_mul(float4_t _a, const float4x4_t& _b)
{
const float4_t xxxx = float4_swiz_xxxx(_a);
const float4_t yyyy = float4_swiz_yyyy(_a);
const float4_t zzzz = float4_swiz_zzzz(_a);
const float4_t wwww = float4_swiz_wwww(_a);
const float4_t col0 = float4_mul(_b.col[0], xxxx);
const float4_t col1 = float4_mul(_b.col[1], yyyy);
const float4_t col2 = float4_madd(_b.col[2], zzzz, col0);
const float4_t col3 = float4_madd(_b.col[3], wwww, col1);
const float4_t result = float4_add(col2, col3);
return result;
}
BX_FLOAT4_INLINE float4x4_t float4x4_mul(const float4x4_t& _a, const float4x4_t& _b)
{
float4x4_t result;
result.col[0] = float4_mul(_a.col[0], _b);
result.col[1] = float4_mul(_a.col[1], _b);
result.col[2] = float4_mul(_a.col[2], _b);
result.col[3] = float4_mul(_a.col[3], _b);
return result;
}
BX_FLOAT4_INLINE float4x4_t float4x4_transpose(const float4x4_t& _mtx)
{
const float4_t aibj = float4_shuf_xAyB(_mtx.col[0], _mtx.col[2]); // aibj
const float4_t emfn = float4_shuf_xAyB(_mtx.col[1], _mtx.col[3]); // emfn
const float4_t ckdl = float4_shuf_zCwD(_mtx.col[0], _mtx.col[2]); // ckdl
const float4_t gohp = float4_shuf_zCwD(_mtx.col[1], _mtx.col[3]); // gohp
float4x4_t result;
result.col[0] = float4_shuf_xAyB(aibj, emfn); // aeim
result.col[1] = float4_shuf_zCwD(aibj, emfn); // bfjn
result.col[2] = float4_shuf_xAyB(ckdl, gohp); // cgko
result.col[3] = float4_shuf_zCwD(ckdl, gohp); // dhlp
return result;
}
BX_FLOAT4_INLINE float4x4_t float4x4_inverse(const float4x4_t& _a)
{
const float4_t tmp0 = float4_shuf_xAzC(_a.col[0], _a.col[1]);
const float4_t tmp1 = float4_shuf_xAzC(_a.col[2], _a.col[3]);
const float4_t tmp2 = float4_shuf_yBwD(_a.col[0], _a.col[1]);
const float4_t tmp3 = float4_shuf_yBwD(_a.col[2], _a.col[3]);
const float4_t t0 = float4_shuf_xyAB(tmp0, tmp1);
const float4_t t1 = float4_shuf_xyAB(tmp3, tmp2);
const float4_t t2 = float4_shuf_zwCD(tmp0, tmp1);
const float4_t t3 = float4_shuf_zwCD(tmp3, tmp2);
const float4_t t23 = float4_mul(t2, t3);
const float4_t t23_yxwz = float4_swiz_yxwz(t23);
const float4_t t23_wzyx = float4_swiz_wzyx(t23);
float4_t cof0, cof1, cof2, cof3;
const float4_t zero = float4_zero();
cof0 = float4_nmsub(t1, t23_yxwz, zero);
cof0 = float4_madd(t1, t23_wzyx, cof0);
cof1 = float4_nmsub(t0, t23_yxwz, zero);
cof1 = float4_madd(t0, t23_wzyx, cof1);
cof1 = float4_swiz_zwxy(cof1);
const float4_t t12 = float4_mul(t1, t2);
const float4_t t12_yxwz = float4_swiz_yxwz(t12);
const float4_t t12_wzyx = float4_swiz_wzyx(t12);
cof0 = float4_madd(t3, t12_yxwz, cof0);
cof0 = float4_nmsub(t3, t12_wzyx, cof0);
cof3 = float4_mul(t0, t12_yxwz);
cof3 = float4_nmsub(t0, t12_wzyx, cof3);
cof3 = float4_swiz_zwxy(cof3);
const float4_t t1_zwxy = float4_swiz_zwxy(t1);
const float4_t t2_zwxy = float4_swiz_zwxy(t2);
const float4_t t13 = float4_mul(t1_zwxy, t3);
const float4_t t13_yxwz = float4_swiz_yxwz(t13);
const float4_t t13_wzyx = float4_swiz_wzyx(t13);
cof0 = float4_madd(t2_zwxy, t13_yxwz, cof0);
cof0 = float4_nmsub(t2_zwxy, t13_wzyx, cof0);
cof2 = float4_mul(t0, t13_yxwz);
cof2 = float4_nmsub(t0, t13_wzyx, cof2);
cof2 = float4_swiz_zwxy(cof2);
const float4_t t01 = float4_mul(t0, t1);
const float4_t t01_yxwz = float4_swiz_yxwz(t01);
const float4_t t01_wzyx = float4_swiz_wzyx(t01);
cof2 = float4_nmsub(t3, t01_yxwz, cof2);
cof2 = float4_madd(t3, t01_wzyx, cof2);
cof3 = float4_madd(t2_zwxy, t01_yxwz, cof3);
cof3 = float4_nmsub(t2_zwxy, t01_wzyx, cof3);
const float4_t t03 = float4_mul(t0, t3);
const float4_t t03_yxwz = float4_swiz_yxwz(t03);
const float4_t t03_wzyx = float4_swiz_wzyx(t03);
cof1 = float4_nmsub(t2_zwxy, t03_yxwz, cof1);
cof1 = float4_madd(t2_zwxy, t03_wzyx, cof1);
cof2 = float4_madd(t1, t03_yxwz, cof2);
cof2 = float4_nmsub(t1, t03_wzyx, cof2);
const float4_t t02 = float4_mul(t0, t2_zwxy);
const float4_t t02_yxwz = float4_swiz_yxwz(t02);
const float4_t t02_wzyx = float4_swiz_wzyx(t02);
cof1 = float4_madd(t3, t02_yxwz, cof1);
cof1 = float4_nmsub(t3, t02_wzyx, cof1);
cof3 = float4_nmsub(t1, t02_yxwz, cof3);
cof3 = float4_madd(t1, t02_wzyx, cof3);
const float4_t det = float4_dot(t0, cof0);
const float4_t invdet = float4_rcp(det);
float4x4_t result;
result.col[0] = float4_mul(cof0, invdet);
result.col[1] = float4_mul(cof1, invdet);
result.col[2] = float4_mul(cof2, invdet);
result.col[3] = float4_mul(cof3, invdet);
return result;
}
} // namespace bx
#endif // __BX_FLOAT4X4_H__

71
include/bx/foreach.h Normal file
View File

@@ -0,0 +1,71 @@
/*
* Copyright 2010-2011 Branimir Karadzic. All rights reserved.
* License: http://www.opensource.org/licenses/BSD-2-Clause
*/
#ifndef __BX_FOREACH_H__
#define __BX_FOREACH_H__
#include "bx.h"
namespace bx
{
namespace foreach_ns
{
struct ContainerBase
{
};
template <typename Ty>
class Container : public ContainerBase
{
public:
inline Container(const Ty& _container)
: m_container(_container)
, m_break(0)
, m_it( _container.begin() )
, m_itEnd( _container.end() )
{
}
inline bool condition() const
{
return (!m_break++ && m_it != m_itEnd);
}
const Ty& m_container;
mutable int m_break;
mutable typename Ty::const_iterator m_it;
mutable typename Ty::const_iterator m_itEnd;
};
template <typename Ty>
inline Ty* pointer(const Ty&)
{
return 0;
}
template <typename Ty>
inline Container<Ty> containerNew(const Ty& _container)
{
return Container<Ty>(_container);
}
template <typename Ty>
inline const Container<Ty>* container(const ContainerBase* _base, const Ty*)
{
return static_cast<const Container<Ty>*>(_base);
}
} // namespace foreach_ns
#define foreach(_variable, _container) \
for (const bx::foreach_ns::ContainerBase &__temp_container__ = bx::foreach_ns::containerNew(_container); \
bx::foreach_ns::container(&__temp_container__, true ? 0 : bx::foreach_ns::pointer(_container))->condition(); \
++bx::foreach_ns::container(&__temp_container__, true ? 0 : bx::foreach_ns::pointer(_container))->m_it) \
for (_variable = *container(&__temp_container__, true ? 0 : bx::foreach_ns::pointer(_container))->m_it; \
bx::foreach_ns::container(&__temp_container__, true ? 0 : bx::foreach_ns::pointer(_container))->m_break; \
--bx::foreach_ns::container(&__temp_container__, true ? 0 : bx::foreach_ns::pointer(_container))->m_break)
} // namespace bx
#endif // __BX_FOREACH_H__

83
include/bx/handlealloc.h Normal file
View File

@@ -0,0 +1,83 @@
/*
* Copyright 2010-2011 Branimir Karadzic. All rights reserved.
* License: http://www.opensource.org/licenses/BSD-2-Clause
*/
#ifndef __BX_HANDLE_ALLOC_H__
#define __BX_HANDLE_ALLOC_H__
#include "bx.h"
namespace bx
{
class HandleAlloc
{
public:
static const uint16_t invalid = 0xffff;
HandleAlloc(uint16_t _maxHandles)
: m_dense(new uint16_t[_maxHandles*2])
, m_sparse(&m_dense[_maxHandles])
, m_numHandles(0)
, m_maxHandles(_maxHandles)
{
for (uint16_t ii = 0; ii < _maxHandles; ++ii)
{
m_dense[ii] = ii;
}
}
~HandleAlloc()
{
delete [] m_dense;
}
const uint16_t* getHandles() const
{
return m_dense;
}
uint16_t getNumHandles() const
{
return m_numHandles;
}
uint16_t getMaxHandles() const
{
return m_maxHandles;
}
uint16_t alloc()
{
if (m_numHandles < m_maxHandles)
{
uint16_t index = m_numHandles;
++m_numHandles;
uint16_t handle = m_dense[index];
m_sparse[handle] = index;
return handle;
}
return invalid;
}
void free(uint16_t _handle)
{
uint16_t index = m_sparse[_handle];
--m_numHandles;
uint16_t temp = m_dense[m_numHandles];
m_dense[m_numHandles] = _handle;
m_sparse[temp] = index;
m_dense[index] = temp;
}
private:
uint16_t* m_dense;
uint16_t* m_sparse;
uint16_t m_numHandles;
uint16_t m_maxHandles;
};
} // namespace bx
#endif // __HANDLE_ALLOC_H__

90
include/bx/hash.h Normal file
View File

@@ -0,0 +1,90 @@
/*
* Copyright 2010-2011 Branimir Karadzic. All rights reserved.
* License: http://www.opensource.org/licenses/BSD-2-Clause
*/
#ifndef __BX_HASH_H__
#define __BX_HASH_H__
#include "bx.h"
namespace bx
{
// MurmurHash2 was written by Austin Appleby, and is placed in the public
// domain. The author hereby disclaims copyright to this source code.
#define MURMUR_M 0x5bd1e995
#define MURMUR_R 24
#define mmix(_h, _k) { _k *= MURMUR_M; _k ^= _k >> MURMUR_R; _k *= MURMUR_M; _h *= MURMUR_M; _h ^= _k; }
class HashMurmur2A
{
public:
void begin(uint32_t _seed = 0)
{
m_hash = _seed;
m_tail = 0;
m_count = 0;
m_size = 0;
}
void add(const void* _data, int _len)
{
const uint8_t* data = (uint8_t*)_data;
m_size += _len;
mixTail(data, _len);
while(_len >= 4)
{
uint32_t kk = *(uint32_t*)data;
mmix(m_hash, kk);
data += 4;
_len -= 4;
}
mixTail(data, _len);
}
uint32_t end()
{
mmix(m_hash, m_tail);
mmix(m_hash, m_size);
m_hash ^= m_hash >> 13;
m_hash *= MURMUR_M;
m_hash ^= m_hash >> 15;
return m_hash;
}
private:
void mixTail(const uint8_t*& _data, int& _len)
{
while( _len && ((_len<4) || m_count) )
{
m_tail |= (*_data++) << (m_count * 8);
m_count++;
_len--;
if(m_count == 4)
{
mmix(m_hash, m_tail);
m_tail = 0;
m_count = 0;
}
}
}
uint32_t m_hash;
uint32_t m_tail;
uint32_t m_count;
uint32_t m_size;
};
} // namespace bx
#endif // __BX_HASH_H__

62
include/bx/macros.h Normal file
View File

@@ -0,0 +1,62 @@
/*
* Copyright 2010-2011 Branimir Karadzic. All rights reserved.
* License: http://www.opensource.org/licenses/BSD-2-Clause
*/
#ifndef __BX_MACROS_H__
#define __BX_MACROS_H__
#include "bx.h"
#define BX_VA_ARGS_COUNT_DETAIL(_a1, _a2, _a3, _a4, _a5, _a6, _a7, _a8, _a9, _a10, _a11, _a12, _a13, _a14, _a15, _a16, _last, ...) _last
#define BX_VA_ARGS_COUNT(...) BX_VA_ARGS_COUNT_DETAIL(__VA_ARGS__, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1)
#define BX_MACRO_DISPATCHER_DETAIL1(_func, _argCount) _func ## _argCount
#define BX_MACRO_DISPATCHER_DETAIL2(_func, _argCount) BX_MACRO_DISPATCHER_DETAIL1(_func, _argCount)
#define BX_MACRO_DISPATCHER(_func, ...) BX_MACRO_DISPATCHER_DETAIL2(_func, VA_ARGS_COUNT(__VA_ARGS__) )
#define BX_STRINGIZE(_x) BX_STRINGIZE_(_x)
#define BX_STRINGIZE_(_x) #_x
#define BX_FILE_LINE_LITERAL "" __FILE__ "(" BX_STRINGIZE(__LINE__) "): "
#define BX_ALIGN_MASK(_value, _mask) ( ( (_value)+(_mask) ) & ( (~0)&(~(_mask) ) ) )
#define BX_ALIGN_16(_value) BX_ALIGN_MASK(_value, 0xf)
#define BX_ALIGN_256(_value) BX_ALIGN_MASK(_value, 0xff)
#if BX_COMPILER_GCC || BX_COMPILER_CLANG
# define BX_ALIGN_STRUCT(_align, _struct) _struct __attribute__( (aligned(_align) ) )
# define BX_FUNCTION __PRETTY_FUNCTION__
# define BX_NO_INLINE __attribute__( (noinline) )
# define BX_FORCE_INLINE __extension__ static __inline __attribute__( (__always_inline__) )
# if BX_COMPILER_CLANG
# define BX_THREAD /* not supported right now */
# else
# define BX_THREAD __thread
# endif // BX_COMPILER_CLANG
#elif BX_COMPILER_MSVC
# define BX_ALIGN_STRUCT(_align, _struct) __declspec(align(_align) ) _struct
# define BX_FUNCTION __FUNCTION__
# define BX_NO_INLINE __declspec(noinline)
# define BX_FORCE_INLINE __forceinline
# define BX_THREAD __declspec(thread)
#else
# error "Unknown BX_COMPILER_?"
#endif
#define BX_ALIGN_STRUCT_16(_struct) BX_ALIGN_STRUCT(16, _struct)
#define BX_ALIGN_STRUCT_256(_struct) BX_ALIGN_STRUCT(256, _struct)
#ifndef BX_CHECK
# define BX_CHECK(...) do {} while(0)
#endif // BX_CHECK
#ifndef BX_TRACE
# define BX_TRACE(...) do {} while(0)
#endif // BX_TRACE
#ifndef BX_CONFIG_SPSCQUEUE_USE_NAIVE
# define BX_CONFIG_SPSCQUEUE_USE_NAIVE 0
#endif // BX_CONFIG_SPSCQUEUE_USE_NAIVE
#endif // __BX_MACROS_H__

29
include/bx/maputil.h Normal file
View File

@@ -0,0 +1,29 @@
/*
* Copyright 2010-2011 Branimir Karadzic. All rights reserved.
* License: http://www.opensource.org/licenses/BSD-2-Clause
*/
#ifndef __BX_MAPUTIL_H__
#define __BX_MAPUTIL_H__
#include "bx.h"
namespace bx
{
template<typename MapType>
typename MapType::iterator mapInsertOrUpdate(MapType& _map, const typename MapType::key_type& _key, const typename MapType::mapped_type& _value)
{
typename MapType::iterator it = _map.lower_bound(_key);
if (it != _map.end()
&& !_map.key_comp()(_key, it->first) )
{
it->second = _value;
return it;
}
typename MapType::value_type pair(_key, _value);
return _map.insert(it, pair);
}
} // namespace bx
#endif // __BX_MAPUTIL_H__

171
include/bx/mutex.h Normal file
View File

@@ -0,0 +1,171 @@
/*
* Copyright 2010-2012 Branimir Karadzic. All rights reserved.
* License: http://www.opensource.org/licenses/BSD-2-Clause
*/
#ifndef __BX_MUTEX_H__
#define __BX_MUTEX_H__
#include "bx.h"
#include "cpu.h"
#include "sem.h"
#if BX_PLATFORM_NACL || BX_PLATFORM_LINUX || BX_PLATFORM_ANDROID
# include <pthread.h>
#elif BX_PLATFORM_WINDOWS || BX_PLATFORM_XBOX360
# include <errno.h>
#endif // BX_PLATFORM_
namespace bx
{
#if BX_PLATFORM_WINDOWS || BX_PLATFORM_XBOX360
typedef CRITICAL_SECTION pthread_mutex_t;
typedef unsigned pthread_mutexattr_t;
inline int pthread_mutex_lock(pthread_mutex_t* _mutex)
{
EnterCriticalSection(_mutex);
return 0;
}
inline int pthread_mutex_unlock(pthread_mutex_t* _mutex)
{
LeaveCriticalSection(_mutex);
return 0;
}
inline int pthread_mutex_trylock(pthread_mutex_t* _mutex)
{
return TryEnterCriticalSection(_mutex) ? 0 : EBUSY;
}
inline int pthread_mutex_init(pthread_mutex_t* _mutex, pthread_mutexattr_t* /*_attr*/)
{
InitializeCriticalSection(_mutex);
return 0;
}
inline int pthread_mutex_destroy(pthread_mutex_t* _mutex)
{
DeleteCriticalSection(_mutex);
return 0;
}
#endif // BX_PLATFORM_
class Mutex
{
public:
Mutex()
{
pthread_mutex_init(&m_handle, NULL);
}
~Mutex()
{
pthread_mutex_destroy(&m_handle);
}
void lock()
{
pthread_mutex_lock(&m_handle);
}
void unlock()
{
pthread_mutex_unlock(&m_handle);
}
private:
Mutex(const Mutex& _rhs); // no copy constructor
Mutex& operator=(const Mutex& _rhs); // no assignment operator
pthread_mutex_t m_handle;
};
class MutexScope
{
public:
MutexScope(Mutex& _mutex)
: m_mutex(_mutex)
{
m_mutex.lock();
}
~MutexScope()
{
m_mutex.unlock();
}
private:
MutexScope(); // no default constructor
MutexScope(const MutexScope& _rhs); // no copy constructor
MutexScope& operator=(const MutexScope& _rhs); // no assignment operator
Mutex& m_mutex;
};
#if 1
typedef Mutex LwMutex;
#else
class LwMutex
{
public:
LwMutex()
: m_count(0)
{
}
~LwMutex()
{
}
void lock()
{
if (atomicIncr(&m_count) > 1)
{
m_sem.wait();
}
}
void unlock()
{
if (atomicDecr(&m_count) > 0)
{
m_sem.post();
}
}
private:
LwMutex(const LwMutex& _rhs); // no copy constructor
LwMutex& operator=(const LwMutex& _rhs); // no assignment operator
Semaphore m_sem;
volatile int32_t m_count;
};
#endif // 0
class LwMutexScope
{
public:
LwMutexScope(LwMutex& _mutex)
: m_mutex(_mutex)
{
m_mutex.lock();
}
~LwMutexScope()
{
m_mutex.unlock();
}
private:
LwMutexScope(); // no default constructor
LwMutexScope(const LwMutexScope& _rhs); // no copy constructor
LwMutexScope& operator=(const LwMutexScope& _rhs); // no assignment operator
LwMutex& m_mutex;
};
} // namespace bx
#endif // __BX_MUTEX_H__

46
include/bx/os.h Normal file
View File

@@ -0,0 +1,46 @@
/*
* Copyright 2010-2011 Branimir Karadzic. All rights reserved.
* License: http://www.opensource.org/licenses/BSD-2-Clause
*/
#ifndef __BX_OS_H__
#define __BX_OS_H__
#include "bx.h"
#if BX_PLATFORM_NACL || BX_PLATFORM_ANDROID || BX_PLATFORM_LINUX
# include <sched.h> // sched_yield
# if BX_PLATFORM_NACL
# include <sys/nacl_syscalls.h> // nanosleep
# else
# include <time.h> // nanosleep
# endif // BX_PLATFORM_NACL
#endif // BX_PLATFORM_
namespace bx
{
inline void sleep(uint32_t _ms)
{
#if BX_PLATFORM_WINDOWS || BX_PLATFORM_XBOX360
Sleep(_ms);
#else
timespec req = {_ms/1000, (_ms%1000)*1000000};
timespec rem = {0, 0};
nanosleep(&req, &rem);
#endif // BX_PLATFORM_
}
inline void yield()
{
#if BX_PLATFORM_WINDOWS
SwitchToThread();
#elif BX_PLATFORM_XBOX360
Sleep(0);
#else
sched_yield();
#endif // BX_PLATFORM_
}
} // namespace bx
#endif // __BX_OS_H__

86
include/bx/platform.h Normal file
View File

@@ -0,0 +1,86 @@
/*
* Copyright 2010-2012 Branimir Karadzic. All rights reserved.
* License: http://www.opensource.org/licenses/BSD-2-Clause
*/
#ifndef __BX_PLATFORM_H__
#define __BX_PLATFORM_H__
#define BX_COMPILER_CLANG 0
#define BX_COMPILER_GCC 0
#define BX_COMPILER_MSVC 0
#define BX_PLATFORM_ANDROID 0
#define BX_PLATFORM_LINUX 0
#define BX_PLATFORM_NACL 0
#define BX_PLATFORM_WINDOWS 0
#define BX_PLATFORM_XBOX360 0
#define BX_CPU_ARM 0
#define BX_CPU_PPC 0
#define BX_CPU_X86 0
#define BX_CPU_ENDIAN_BIG 0
#define BX_CPU_ENDIAN_LITTLE 0
// http://sourceforge.net/apps/mediawiki/predef/index.php?title=Compilers
#if defined(_MSC_VER)
# undef BX_COMPILER_MSVC
# define BX_COMPILER_MSVC 1
#elif defined(__clang__)
// clang defines __GNUC__
# undef BX_COMPILER_CLANG
# define BX_COMPILER_CLANG 1
#elif defined(__GNUC__)
# undef BX_COMPILER_GCC
# define BX_COMPILER_GCC 1
#else
# error "BX_COMPILER_* is not defined!"
#endif //
// http://sourceforge.net/apps/mediawiki/predef/index.php?title=Operating_Systems
#if defined(_XBOX_VER)
# undef BX_PLATFORM_XBOX360
# define BX_PLATFORM_XBOX360 1
#elif defined(_WIN32) || defined(_WIN64)
# undef BX_PLATFORM_WINDOWS
# define BX_PLATFORM_WINDOWS 1
#elif defined(__native_client__)
// NaCl compiler defines __linux__
# undef BX_PLATFORM_NACL
# define BX_PLATFORM_NACL 1
#elif defined(__ANDROID__)
// Android compiler defines __linux__
# undef BX_PLATFORM_ANDROID
# define BX_PLATFORM_ANDROID 1
#elif defined(__linux__)
# undef BX_PLATFORM_LINUX
# define BX_PLATFORM_LINUX 1
#else
# error "BX_PLATFORM_* is not defined!"
#endif //
// http://sourceforge.net/apps/mediawiki/predef/index.php?title=Architectures
#if defined(__arm__)
# undef BX_CPU_ARM
# define BX_CPU_ARM 1
# define BX_CACHE_LINE_SIZE 64
#elif defined(_M_PPC) || defined(__powerpc__) || defined(__powerpc64__)
# undef BX_CPU_PPC
# define BX_CPU_PPC 1
# define BX_CACHE_LINE_SIZE 128
#elif defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || defined(__x86_64__)
# undef BX_CPU_X86
# define BX_CPU_X86 1
# define BX_CACHE_LINE_SIZE 64
#endif //
#if BX_CPU_PPC
# undef BX_CPU_ENDIAN_BIG
# define BX_CPU_ENDIAN_BIG 1
#else
# undef BX_CPU_ENDIAN_LITTLE
# define BX_CPU_ENDIAN_LITTLE 1
#endif // BX_PLATFORM_
#endif // __BX_PLATFORM_H__

313
include/bx/ringbuffer.h Normal file
View File

@@ -0,0 +1,313 @@
/*
* Copyright 2010-2011 Branimir Karadzic. All rights reserved.
* License: http://www.opensource.org/licenses/BSD-2-Clause
*/
#ifndef __BX_RINGBUFFER_H__
#define __BX_RINGBUFFER_H__
#include "bx.h"
#include "cpu.h"
#include "uint32_t.h"
namespace bx
{
class RingBufferControl
{
public:
RingBufferControl(uint32_t _size)
: m_size(_size)
, m_current(0)
, m_write(0)
, m_read(0)
{
}
~RingBufferControl()
{
}
uint32_t available() const
{
return distance(m_read, m_current);
}
uint32_t consume(uint32_t _size) // consumer only
{
const uint32_t maxSize = distance(m_read, m_current);
const uint32_t sizeNoSign = uint32_and(_size, 0x7FFFFFFF);
const uint32_t test = uint32_sub(sizeNoSign, maxSize);
const uint32_t size = uint32_sels(test, _size, maxSize);
const uint32_t advance = uint32_add(m_read, size);
const uint32_t read = uint32_mod(advance, m_size);
m_read = read;
return size;
}
uint32_t reserve(uint32_t _size) // producer only
{
const uint32_t dist = distance(m_write, m_read)-1;
const uint32_t maxSize = uint32_sels(dist, m_size-1, dist);
const uint32_t sizeNoSign = uint32_and(_size, 0x7FFFFFFF);
const uint32_t test = uint32_sub(sizeNoSign, maxSize);
const uint32_t size = uint32_sels(test, _size, maxSize);
const uint32_t advance = uint32_add(m_write, size);
const uint32_t write = uint32_mod(advance, m_size);
m_write = write;
return size;
}
uint32_t commit(uint32_t _size) // producer only
{
const uint32_t maxSize = distance(m_current, m_write);
const uint32_t sizeNoSign = uint32_and(_size, 0x7FFFFFFF);
const uint32_t test = uint32_sub(sizeNoSign, maxSize);
const uint32_t size = uint32_sels(test, _size, maxSize);
const uint32_t advance = uint32_add(m_current, size);
const uint32_t current = uint32_mod(advance, m_size);
m_current = current;
return size;
}
uint32_t distance(uint32_t _from, uint32_t _to) const // both
{
const uint32_t diff = uint32_sub(_to, _from);
const uint32_t le = uint32_add(m_size, diff);
const uint32_t result = uint32_sels(diff, le, diff);
return result;
}
const uint32_t m_size;
uint32_t m_current;
uint32_t m_write;
uint32_t m_read;
};
class SpScRingBufferControl
{
public:
SpScRingBufferControl(uint32_t _size)
: m_size(_size)
, m_current(0)
, m_write(0)
, m_read(0)
{
}
~SpScRingBufferControl()
{
}
uint32_t available() const
{
return distance(m_read, m_current);
}
uint32_t consume(uint32_t _size) // consumer only
{
const uint32_t maxSize = distance(m_read, m_current);
const uint32_t sizeNoSign = uint32_and(_size, 0x7FFFFFFF);
const uint32_t test = uint32_sub(sizeNoSign, maxSize);
const uint32_t size = uint32_sels(test, _size, maxSize);
const uint32_t advance = uint32_add(m_read, size);
const uint32_t read = uint32_mod(advance, m_size);
m_read = read;
return size;
}
uint32_t reserve(uint32_t _size) // producer only
{
const uint32_t dist = distance(m_write, m_read)-1;
const uint32_t maxSize = uint32_sels(dist, m_size-1, dist);
const uint32_t sizeNoSign = uint32_and(_size, 0x7FFFFFFF);
const uint32_t test = uint32_sub(sizeNoSign, maxSize);
const uint32_t size = uint32_sels(test, _size, maxSize);
const uint32_t advance = uint32_add(m_write, size);
const uint32_t write = uint32_mod(advance, m_size);
m_write = write;
return size;
}
uint32_t commit(uint32_t _size) // producer only
{
const uint32_t maxSize = distance(m_current, m_write);
const uint32_t sizeNoSign = uint32_and(_size, 0x7FFFFFFF);
const uint32_t test = uint32_sub(sizeNoSign, maxSize);
const uint32_t size = uint32_sels(test, _size, maxSize);
const uint32_t advance = uint32_add(m_current, size);
const uint32_t current = uint32_mod(advance, m_size);
// must commit all memory writes before moving m_current pointer
// once m_current pointer moves data is used by consumer thread
memoryBarrier();
m_current = current;
return size;
}
uint32_t distance(uint32_t _from, uint32_t _to) const // both
{
const uint32_t diff = uint32_sub(_to, _from);
const uint32_t le = uint32_add(m_size, diff);
const uint32_t result = uint32_sels(diff, le, diff);
return result;
}
const uint32_t m_size;
uint32_t m_current;
uint32_t m_write;
uint32_t m_read;
};
template <typename Control>
class ReadRingBufferT
{
public:
ReadRingBufferT(Control& _control, const char* _buffer, uint32_t _size)
: m_control(_control)
, m_read(_control.m_read)
, m_end(m_read+_size)
, m_size(_size)
, m_buffer(_buffer)
{
BX_CHECK(_control.available() >= _size, "%d >= %d", _control.available(), _size);
}
~ReadRingBufferT()
{
}
void end()
{
m_control.consume(m_size);
}
void read(char* _data, uint32_t _len)
{
const uint32_t end = (m_read + _len) % m_control.m_size;
uint32_t wrap = 0;
const char* from = &m_buffer[m_read];
if (end < m_read)
{
wrap = m_control.m_size - m_read;
memcpy(_data, from, wrap);
_data += wrap;
from = (const char*)&m_buffer[0];
}
memcpy(_data, from, _len-wrap);
m_read = end;
}
void skip(uint32_t _len)
{
m_read += _len;
m_read %= m_control.m_size;
}
private:
template <typename Ty>
friend class WriteRingBufferT;
ReadRingBufferT();
ReadRingBufferT(const Control&);
void operator=(const Control&);
Control& m_control;
uint32_t m_read;
uint32_t m_end;
const uint32_t m_size;
const char* m_buffer;
};
typedef ReadRingBufferT<RingBufferControl> ReadRingBuffer;
typedef ReadRingBufferT<SpScRingBufferControl> SpScReadRingBuffer;
template <typename Control>
class WriteRingBufferT
{
public:
WriteRingBufferT(Control& _control, char* _buffer, uint32_t _size)
: m_control(_control)
, m_size(_size)
, m_buffer(_buffer)
{
uint32_t size = m_control.reserve(_size);
BX_CHECK(size == _size, "%d == %d", size, _size);
m_write = m_control.m_current;
m_end = m_write+_size;
}
~WriteRingBufferT()
{
}
void end()
{
m_control.commit(m_size);
}
void write(const char* _data, uint32_t _len)
{
const uint32_t end = (m_write + _len) % m_control.m_size;
uint32_t wrap = 0;
char* to = &m_buffer[m_write];
if (end < m_write)
{
wrap = m_control.m_size - m_write;
memcpy(to, _data, wrap);
_data += wrap;
to = (char*)&m_buffer[0];
}
memcpy(to, _data, _len-wrap);
m_write = end;
}
void write(ReadRingBufferT<Control>& _read, uint32_t _len)
{
const uint32_t end = (_read.m_read + _len) % _read.m_control.m_size;
uint32_t wrap = 0;
const char* from = &_read.m_buffer[_read.m_read];
if (end < _read.m_read)
{
wrap = _read.m_control.m_size - _read.m_read;
write(from, wrap);
from = (const char*)&_read.m_buffer[0];
}
write(from, _len-wrap);
_read.m_read = end;
}
void skip(uint32_t _len)
{
m_write += _len;
m_write %= m_control.m_size;
}
private:
WriteRingBufferT();
WriteRingBufferT(const WriteRingBufferT<Control>&);
void operator=(const WriteRingBufferT<Control>&);
Control& m_control;
uint32_t m_write;
uint32_t m_end;
const uint32_t m_size;
char* m_buffer;
};
typedef WriteRingBufferT<RingBufferControl> WriteRingBuffer;
typedef WriteRingBufferT<SpScRingBufferControl> SpScWriteRingBuffer;
} // namespace bx
#endif // __BX_RINGBUFFER_H__

97
include/bx/rng.h Normal file
View File

@@ -0,0 +1,97 @@
/*
* Copyright 2010-2011 Branimir Karadzic. All rights reserved.
* License: http://www.opensource.org/licenses/BSD-2-Clause
*/
#ifndef __BX_RNG_H__
#define __BX_RNG_H__
#include "bx.h"
namespace bx
{
// George Marsaglia's MWC
class RngMwc
{
public:
RngMwc(uint32_t _z = 12345, uint32_t _w = 65435)
: m_z(_z)
, m_w(_w)
{
}
void reset(uint32_t _z = 12345, uint32_t _w = 65435)
{
m_z = _z;
m_w = _w;
}
uint32_t gen()
{
m_z = 36969*(m_z&65535)+(m_z>>16);
m_w = 18000*(m_w&65535)+(m_w>>16);
return (m_z<<16)+m_w;
}
private:
uint32_t m_z;
uint32_t m_w;
};
// George Marsaglia's FIB
class RngFib
{
public:
RngFib()
: m_a(9983651)
, m_b(95746118)
{
}
void reset()
{
m_a = 9983651;
m_b = 95746118;
}
uint32_t gen()
{
m_b = m_a+m_b;
m_a = m_b-m_a;
return m_a;
}
private:
uint32_t m_a;
uint32_t m_b;
};
// George Marsaglia's SHR3
class RngShr3
{
public:
RngShr3(uint32_t _jsr = 34221)
: m_jsr(_jsr)
{
}
void reset(uint32_t _jsr = 34221)
{
m_jsr = _jsr;
}
uint32_t gen()
{
m_jsr ^= m_jsr<<17;
m_jsr ^= m_jsr>>13;
m_jsr ^= m_jsr<<5;
return m_jsr;
}
private:
uint32_t m_jsr;
};
} // namespace bx
#endif // __BX_RNG_H__

107
include/bx/sem.h Normal file
View File

@@ -0,0 +1,107 @@
/*
* Copyright 2010-2011 Branimir Karadzic. All rights reserved.
* License: http://www.opensource.org/licenses/BSD-2-Clause
*/
#ifndef __BX_SEM_H__
#define __BX_SEM_H__
#include "bx.h"
#define BX_SEM_CONFIG_POSIX (BX_PLATFORM_NACL || BX_PLATFORM_ANDROID || BX_PLATFORM_LINUX)
#if BX_SEM_CONFIG_POSIX
# include <semaphore.h>
# include <time.h>
#elif BX_PLATFORM_WINDOWS || BX_PLATFORM_XBOX360
# include <limits.h>
#endif // BX_PLATFORM_
namespace bx
{
#if BX_SEM_CONFIG_POSIX
class Semaphore
{
public:
Semaphore()
{
sem_init(&m_handle, 0, 0);
}
~Semaphore()
{
sem_destroy(&m_handle);
}
void post(uint32_t _count = 1)
{
for (uint32_t ii = 0; ii < _count; ++ii)
{
sem_post(&m_handle);
}
}
bool wait(int32_t _msecs = -1)
{
#if BX_PLATFORM_NACL
BX_CHECK(-1 == _msecs, "NaCl doesn't support sem_timedwait at this moment.");
return 0 == sem_wait(&m_handle);
#else
if (0 > _msecs)
{
return 0 == sem_wait(&m_handle);
}
timespec ts;
ts.tv_sec = _msecs/1000;
ts.tv_nsec = (_msecs%1000)*1000;
return 0 == sem_timedwait(&m_handle, &ts);
#endif // BX_PLATFORM_
}
private:
Semaphore(const Semaphore& _rhs); // no copy constructor
Semaphore& operator=(const Semaphore& _rhs); // no assignment operator
sem_t m_handle;
};
#elif BX_PLATFORM_WINDOWS || BX_PLATFORM_XBOX360
class Semaphore
{
public:
Semaphore()
{
m_handle = CreateSemaphore(NULL, 0, LONG_MAX, NULL);
BX_CHECK(NULL != m_handle, "Failed to create Semaphore!");
}
~Semaphore()
{
CloseHandle(m_handle);
}
void post(uint32_t _count = 1) const
{
ReleaseSemaphore(m_handle, _count, NULL);
}
bool wait(int32_t _msecs = -1) const
{
DWORD milliseconds = (0 > _msecs) ? INFINITE : _msecs;
return WAIT_OBJECT_0 == WaitForSingleObject(m_handle, milliseconds);
}
private:
Semaphore(const Semaphore& _rhs); // no copy constructor
Semaphore& operator=(const Semaphore& _rhs); // no assignment operator
HANDLE m_handle;
};
#endif // BX_PLATFORM_
} // namespace bx
#endif // __BX_SEM_H__

152
include/bx/spscqueue.h Normal file
View File

@@ -0,0 +1,152 @@
/*
* Copyright 2010-2011 Branimir Karadzic. All rights reserved.
* License: http://www.opensource.org/licenses/BSD-2-Clause
*/
#ifndef __BX_SPSCQUEUE_H__
#define __BX_SPSCQUEUE_H__
#include <list>
#include "bx.h"
#include "cpu.h"
#include "mutex.h"
#include "uint32_t.h"
namespace bx
{
// http://drdobbs.com/article/print?articleId=210604448&siteSectionName=
template <typename Ty>
class SpScUnboundedQueueOptimized
{
public:
SpScUnboundedQueueOptimized()
: m_first(new Node(NULL) )
, m_divider(m_first)
, m_last(m_first)
{
}
~SpScUnboundedQueueOptimized()
{
while (NULL != m_first)
{
Node* node = m_first;
m_first = node->m_next;
delete node;
}
}
void push(Ty* _ptr) // producer only
{
m_last->m_next = new Node(_ptr);
atomicExchangePtr((void**)&m_last, m_last->m_next);
while (m_first != m_divider)
{
Node* node = m_first;
m_first = m_first->m_next;
delete node;
}
}
Ty* peek() // consumer only
{
if (m_divider != m_last)
{
Ty* ptr = m_divider->m_next->m_ptr;
return ptr;
}
return NULL;
}
Ty* pop() // consumer only
{
if (m_divider != m_last)
{
Ty* ptr = m_divider->m_next->m_ptr;
atomicExchangePtr((void**)&m_divider, m_divider->m_next);
return ptr;
}
return NULL;
}
private:
SpScUnboundedQueueOptimized(const SpScUnboundedQueueOptimized& _rhs); // no copy constructor
SpScUnboundedQueueOptimized& operator=(const SpScUnboundedQueueOptimized& _rhs); // no assignment operator
struct Node
{
Node(Ty* _ptr)
: m_ptr(_ptr)
, m_next(NULL)
{
}
Ty* m_ptr;
Node* m_next;
};
Node* m_first;
Node* m_divider;
Node* m_last;
};
template<typename Ty>
class SpScUnboundedQueueNaive
{
public:
SpScUnboundedQueueNaive()
{
}
~SpScUnboundedQueueNaive()
{
BX_CHECK(m_queue.empty(), "Queue is not empty!");
}
void push(Ty* _item)
{
bx::LwMutexScope lock(m_mutex);
m_queue.push_back(_item);
}
Ty* peek()
{
bx::LwMutexScope lock(m_mutex);
if (!m_queue.empty() )
{
return m_queue.front();
}
return NULL;
}
Ty* pop()
{
bx::LwMutexScope lock(m_mutex);
if (!m_queue.empty() )
{
Ty* item = m_queue.front();
m_queue.pop_front();
return item;
}
return NULL;
}
private:
bx::LwMutex m_mutex;
std::list<Ty*> m_queue;
};
#if BX_CONFIG_SPSCQUEUE_USE_NAIVE
# define SpScUnboundedQueue SpScUnboundedQueueNaive
#else
# define SpScUnboundedQueue SpScUnboundedQueueOptimized
#endif // BX_CONFIG_NAIVE
} // namespace bx
#endif // __BX_RINGBUFFER_H__

53
include/bx/timer.h Normal file
View File

@@ -0,0 +1,53 @@
/*
* Copyright 2010-2011 Branimir Karadzic. All rights reserved.
* License: http://www.opensource.org/licenses/BSD-2-Clause
*/
#ifndef __BX_TIMER_H__
#define __BX_TIMER_H__
#include "bx.h"
#if BX_PLATFORM_ANDROID
# include <time.h> // clock, clock_gettime
#elif BX_PLATFORM_NACL | BX_PLATFORM_LINUX
# include <sys/time.h> // gettimeofday
#endif // BX_PLATFORM_
namespace bx
{
inline int64_t getHPCounter()
{
#if BX_PLATFORM_WINDOWS || BX_PLATFORM_XBOX360
LARGE_INTEGER li;
// Performance counter value may unexpectedly leap forward
// http://support.microsoft.com/kb/274323
QueryPerformanceCounter(&li);
int64_t i64 = li.QuadPart;
#elif BX_PLATFORM_ANDROID
int64_t i64 = clock();
#else
struct timeval now;
gettimeofday(&now, 0);
int64_t i64 = now.tv_sec*1000000 + now.tv_usec;
#endif // BNET_PLATFORM_
static int64_t offset = i64;
return i64 - offset;
}
inline int64_t getHPFrequency()
{
#if BX_PLATFORM_WINDOWS || BX_PLATFORM_XBOX360
LARGE_INTEGER li;
QueryPerformanceFrequency(&li);
return li.QuadPart;
#elif BX_PLATFORM_ANDROID
return CLOCKS_PER_SEC;
#else
return 1000000;
#endif // BNET_PLATFORM_
}
} // namespace bx
#endif // __BX_TIMER_H__

454
include/bx/uint32_t.h Normal file
View File

@@ -0,0 +1,454 @@
/*
* Copyright 2010-2012 Branimir Karadzic. All rights reserved.
* License: http://www.opensource.org/licenses/BSD-2-Clause
*/
// Copyright 2006 Mike Acton <macton@gmail.com>
//
// Permission is hereby granted, free of charge, to any person obtaining a
// copy of this software and associated documentation files (the "Software"),
// to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included
// in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE
#ifndef __BX_UINT32_T_H__
#define __BX_UINT32_T_H__
#include "bx.h"
#if BX_COMPILER_MSVC
# if BX_PLATFORM_WINDOWS
# include <math.h> // math.h is included because VS bitches:
// warning C4985: 'ceil': attributes not present on previous declaration.
// must be included before intrin.h.
# include <intrin.h>
# pragma intrinsic(_BitScanForward)
# pragma intrinsic(_BitScanReverse)
# endif // BX_PLATFORM_WINDOWS
#endif // BX_COMPILER_MSVC
namespace bx
{
inline uint32_t uint32_li(uint32_t _a)
{
return _a;
}
inline uint32_t uint32_dec(uint32_t _a)
{
return _a - 1;
}
inline uint32_t uint32_inc(uint32_t _a)
{
return _a + 1;
}
inline uint32_t uint32_not(uint32_t _a)
{
return ~_a;
}
inline uint32_t uint32_neg(uint32_t _a)
{
return -(int32_t)_a;
}
inline uint32_t uint32_ext(uint32_t _a)
{
return ( (int32_t)_a)>>31;
}
inline uint32_t uint32_and(uint32_t _a, uint32_t _b)
{
return _a & _b;
}
inline uint32_t uint32_xor(uint32_t _a, uint32_t _b)
{
return _a ^ _b;
}
inline uint32_t uint32_xorl(uint32_t _a, uint32_t _b)
{
return !_a != !_b;
}
inline uint32_t uint32_andc(uint32_t _a, uint32_t _b)
{
return _a & ~_b;
}
inline uint32_t uint32_or(uint32_t _a, uint32_t _b)
{
return _a | _b;
}
inline uint32_t uint32_sll(uint32_t _a, int _sa)
{
return _a << _sa;
}
inline uint32_t uint32_srl(uint32_t _a, int _sa)
{
return _a >> _sa;
}
inline uint32_t uint32_sra(uint32_t _a, int _sa)
{
return ( (int32_t)_a) >> _sa;
}
inline uint32_t uint32_rol(uint32_t _a, int _sa)
{
return ( _a << _sa) | (_a >> (32-_sa) );
}
inline uint32_t uint32_ror(uint32_t _a, int _sa)
{
return ( _a >> _sa) | (_a << (32-_sa) );
}
inline uint32_t uint32_add(uint32_t _a, uint32_t _b)
{
return _a + _b;
}
inline uint32_t uint32_sub(uint32_t _a, uint32_t _b)
{
return _a - _b;
}
inline uint32_t uint32_mul(uint32_t _a, uint32_t _b)
{
return _a * _b;
}
inline uint32_t uint32_div(uint32_t _a, uint32_t _b)
{
return (_a / _b);
}
inline uint32_t uint32_mod(uint32_t _a, uint32_t _b)
{
return (_a % _b);
}
inline uint32_t uint32_cmpeq(uint32_t _a, uint32_t _b)
{
return -(_a == _b);
}
inline uint32_t uint32_cmpneq(uint32_t _a, uint32_t _b)
{
return -(_a != _b);
}
inline uint32_t uint32_cmplt(uint32_t _a, uint32_t _b)
{
return -(_a < _b);
}
inline uint32_t uint32_cmple(uint32_t _a, uint32_t _b)
{
return -(_a <= _b);
}
inline uint32_t uint32_cmpgt(uint32_t _a, uint32_t _b)
{
return -(_a > _b);
}
inline uint32_t uint32_cmpge(uint32_t _a, uint32_t _b)
{
return -(_a >= _b);
}
inline uint32_t uint32_setnz(uint32_t _a)
{
return -!!_a;
}
inline uint32_t uint32_satadd(uint32_t _a, uint32_t _b)
{
const uint32_t add = uint32_add(_a, _b);
const uint32_t lt = uint32_cmplt(add, _a);
const uint32_t result = uint32_or(add, lt);
return result;
}
inline uint32_t uint32_satsub(uint32_t _a, uint32_t _b)
{
const uint32_t sub = uint32_sub(_a, _b);
const uint32_t le = uint32_cmple(sub, _a);
const uint32_t result = uint32_and(sub, le);
return result;
}
inline uint32_t uint32_satmul(uint32_t _a, uint32_t _b)
{
const uint64_t mul = (uint64_t)_a * (uint64_t)_b;
const uint32_t hi = mul >> 32;
const uint32_t nz = uint32_setnz(hi);
const uint32_t result = uint32_or(uint32_t(mul), nz);
return result;
}
inline uint32_t uint32_sels(uint32_t test, uint32_t _a, uint32_t _b)
{
const uint32_t mask = uint32_ext(test);
const uint32_t sel_a = uint32_and(_a, mask);
const uint32_t sel_b = uint32_andc(_b, mask);
const uint32_t result = uint32_or(sel_a, sel_b);
return (result);
}
inline uint32_t uint32_selb(uint32_t _mask, uint32_t _a, uint32_t _b)
{
const uint32_t sel_a = uint32_and(_a, _mask);
const uint32_t sel_b = uint32_andc(_b, _mask);
const uint32_t result = uint32_or(sel_a, sel_b);
return (result);
}
inline uint32_t uint32_imin(uint32_t _a, uint32_t _b)
{
const uint32_t a_sub_b = uint32_sub(_a, _b);
const uint32_t result = uint32_sels(a_sub_b, _a, _b);
return result;
}
inline uint32_t uint32_imax(uint32_t _a, uint32_t _b)
{
const uint32_t b_sub_a = uint32_sub(_b, _a);
const uint32_t result = uint32_sels(b_sub_a, _a, _b);
return result;
}
inline uint32_t uint32_min(uint32_t _a, uint32_t _b)
{
return _a > _b ? _b : _a;
}
inline uint32_t uint32_max(uint32_t _a, uint32_t _b)
{
return _a > _b ? _a : _b;
}
inline uint32_t uint32_incwrap(uint32_t _val, uint32_t _min, uint32_t _max)
{
const uint32_t inc = uint32_inc(_val);
const uint32_t max_diff = uint32_sub(_max, _val);
const uint32_t neg_max_diff = uint32_neg(max_diff);
const uint32_t max_or = uint32_or(max_diff, neg_max_diff);
const uint32_t max_diff_nz = uint32_ext(max_or);
const uint32_t result = uint32_selb(max_diff_nz, inc, _min);
return result;
}
inline uint32_t uint32_decwrap(uint32_t _val, uint32_t _min, uint32_t _max)
{
const uint32_t dec = uint32_dec(_val);
const uint32_t min_diff = uint32_sub(_min, _val);
const uint32_t neg_min_diff = uint32_neg(min_diff);
const uint32_t min_or = uint32_or(min_diff, neg_min_diff);
const uint32_t min_diff_nz = uint32_ext(min_or);
const uint32_t result = uint32_selb(min_diff_nz, dec, _max);
return result;
}
inline uint32_t uint32_cntbits_ref(uint32_t _val)
{
const uint32_t tmp0 = uint32_srl(_val, 1);
const uint32_t tmp1 = uint32_and(tmp0, 0x55555555);
const uint32_t tmp2 = uint32_sub(_val, tmp1);
const uint32_t tmp3 = uint32_and(tmp2, 0xc30c30c3);
const uint32_t tmp4 = uint32_srl(tmp2, 2);
const uint32_t tmp5 = uint32_and(tmp4, 0xc30c30c3);
const uint32_t tmp6 = uint32_srl(tmp2, 4);
const uint32_t tmp7 = uint32_and(tmp6, 0xc30c30c3);
const uint32_t tmp8 = uint32_add(tmp3, tmp5);
const uint32_t tmp9 = uint32_add(tmp7, tmp8);
const uint32_t tmpA = uint32_srl(tmp9, 6);
const uint32_t tmpB = uint32_add(tmp9, tmpA);
const uint32_t tmpC = uint32_srl(tmpB, 12);
const uint32_t tmpD = uint32_srl(tmpB, 24);
const uint32_t tmpE = uint32_add(tmpB, tmpC);
const uint32_t tmpF = uint32_add(tmpD, tmpE);
const uint32_t result = uint32_and(tmpF, 0x3f);
return result;
}
/// Count number of bits set.
inline uint32_t uint32_cntbits(uint32_t _val)
{
#if BX_COMPILER_GCC
return __builtin_popcount(_val);
#elif BX_COMPILER_MSVC && BX_PLATFORM_WINDOWS
return __popcnt(_val);
#else
return uint32_cntbits_ref(_val);
#endif // BX_COMPILER_GCC
}
inline uint32_t uint32_cntlz_ref(uint32_t _val)
{
const uint32_t tmp0 = uint32_srl(_val, 1);
const uint32_t tmp1 = uint32_or(tmp0, _val);
const uint32_t tmp2 = uint32_srl(tmp1, 2);
const uint32_t tmp3 = uint32_or(tmp2, tmp1);
const uint32_t tmp4 = uint32_srl(tmp3, 4);
const uint32_t tmp5 = uint32_or(tmp4, tmp3);
const uint32_t tmp6 = uint32_srl(tmp5, 8);
const uint32_t tmp7 = uint32_or(tmp6, tmp5);
const uint32_t tmp8 = uint32_srl(tmp7, 16);
const uint32_t tmp9 = uint32_or(tmp8, tmp7);
const uint32_t tmpA = uint32_not(tmp9);
const uint32_t result = uint32_cntbits(tmpA);
return result;
}
/// Count number of leading zeros.
inline uint32_t uint32_cntlz(uint32_t _val)
{
#if BX_COMPILER_GCC
return __builtin_clz(_val);
#elif BX_COMPILER_MSVC && BX_PLATFORM_WINDOWS
unsigned long index;
_BitScanReverse(&index, _val);
return 31 - index;
#else
return uint32_cntlz_ref(_val);
#endif // BX_COMPILER_
}
inline uint32_t uint32_cnttz_ref(uint32_t _val)
{
const uint32_t tmp0 = uint32_not(_val);
const uint32_t tmp1 = uint32_dec(_val);
const uint32_t tmp2 = uint32_and(tmp0, tmp1);
const uint32_t result = uint32_cntbits(tmp2);
return result;
}
inline uint32_t uint32_cnttz(uint32_t _val)
{
#if BX_COMPILER_MSVC && BX_PLATFORM_WINDOWS
unsigned long index;
_BitScanForward(&index, _val);
return index;
#else
return uint32_cnttz_ref(_val);
#endif // BX_COMPILER_
}
// shuffle:
// ---- ---- ---- ---- fedc ba98 7654 3210
// to:
// -f-e -d-c -b-a -9-8 -7-6 -5-4 -3-2 -1-0
inline uint32_t uint32_part1by1(uint32_t _a)
{
const uint32_t val = uint32_and(_a, 0xffff);
const uint32_t tmp0 = uint32_sll(val, 8);
const uint32_t tmp1 = uint32_xor(val, tmp0);
const uint32_t tmp2 = uint32_and(tmp1, 0x00ff00ff);
const uint32_t tmp3 = uint32_sll(tmp2, 4);
const uint32_t tmp4 = uint32_xor(tmp2, tmp3);
const uint32_t tmp5 = uint32_and(tmp4, 0x0f0f0f0f);
const uint32_t tmp6 = uint32_sll(tmp5, 2);
const uint32_t tmp7 = uint32_xor(tmp5, tmp6);
const uint32_t tmp8 = uint32_and(tmp7, 0x33333333);
const uint32_t tmp9 = uint32_sll(tmp8, 1);
const uint32_t tmpA = uint32_xor(tmp8, tmp9);
const uint32_t result = uint32_and(tmpA, 0x55555555);
return result;
}
// shuffle:
// ---- ---- ---- ---- ---- --98 7654 3210
// to:
// ---- 9--8 --7- -6-- 5--4 --3- -2-- 1--0
inline uint32_t uint32_part1by2(uint32_t _a)
{
const uint32_t val = uint32_and(_a, 0x3ff);
const uint32_t tmp0 = uint32_sll(val, 16);
const uint32_t tmp1 = uint32_xor(val, tmp0);
const uint32_t tmp2 = uint32_and(tmp1, 0xff0000ff);
const uint32_t tmp3 = uint32_sll(tmp2, 8);
const uint32_t tmp4 = uint32_xor(tmp2, tmp3);
const uint32_t tmp5 = uint32_and(tmp4, 0x0300f00f);
const uint32_t tmp6 = uint32_sll(tmp5, 4);
const uint32_t tmp7 = uint32_xor(tmp5, tmp6);
const uint32_t tmp8 = uint32_and(tmp7, 0x030c30c3);
const uint32_t tmp9 = uint32_sll(tmp8, 2);
const uint32_t tmpA = uint32_xor(tmp8, tmp9);
const uint32_t result = uint32_and(tmpA, 0x09249249);
return result;
}
inline uint32_t uint32_testpow2(uint32_t _a)
{
const uint32_t tmp0 = uint32_not(_a);
const uint32_t tmp1 = uint32_inc(tmp0);
const uint32_t tmp2 = uint32_and(_a, tmp1);
const uint32_t tmp3 = uint32_cmpeq(tmp2, _a);
const uint32_t tmp4 = uint32_cmpneq(_a, 0);
const uint32_t result = uint32_and(tmp3, tmp4);
return result;
}
inline uint32_t uint32_nextpow2(uint32_t _a)
{
const uint32_t tmp0 = uint32_dec(_a);
const uint32_t tmp1 = uint32_srl(tmp0, 1);
const uint32_t tmp2 = uint32_or(tmp0, tmp1);
const uint32_t tmp3 = uint32_srl(tmp2, 2);
const uint32_t tmp4 = uint32_or(tmp2, tmp3);
const uint32_t tmp5 = uint32_srl(tmp4, 4);
const uint32_t tmp6 = uint32_or(tmp4, tmp5);
const uint32_t tmp7 = uint32_srl(tmp6, 8);
const uint32_t tmp8 = uint32_or(tmp6, tmp7);
const uint32_t tmp9 = uint32_srl(tmp8, 16);
const uint32_t tmpA = uint32_or(tmp8, tmp9);
const uint32_t result = uint32_inc(tmpA);
return result;
}
} // namespace bx
#endif // __BX_UINT32_T_H__