diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..ddb52c3 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,6 @@ +*.cpp eol=lf +*.h eol=lf +*.sc eol=lf +*.sh eol=lf +*.md eol=lf +*.lua eol=lf diff --git a/README.md b/README.md index 08cc33b..075a723 100644 --- a/README.md +++ b/README.md @@ -1,39 +1,39 @@ -bx -== - -Base library. - -Contact -------- - -[@bkaradzic](https://twitter.com/bkaradzic) -http://www.stuckingeometry.com - -Project page -https://github.com/bkaradzic/bx - -License -------- - -Copyright 2010-2012 Branimir Karadzic. All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDER ``AS IS'' AND ANY EXPRESS OR -IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -SHALL COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, -WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE -OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED -OF THE POSSIBILITY OF SUCH DAMAGE. +bx +== + +Base library. + +Contact +------- + +[@bkaradzic](https://twitter.com/bkaradzic) +http://www.stuckingeometry.com + +Project page +https://github.com/bkaradzic/bx + +License +------- + +Copyright 2010-2012 Branimir Karadzic. All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDER ``AS IS'' AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +SHALL COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/include/bx/bx.h b/include/bx/bx.h index 3429a42..4543e50 100644 --- a/include/bx/bx.h +++ b/include/bx/bx.h @@ -1,23 +1,23 @@ -/* - * Copyright 2010-2012 Branimir Karadzic. All rights reserved. - * License: http://www.opensource.org/licenses/BSD-2-Clause - */ - -#ifndef __BX_H__ -#define __BX_H__ - -#include -#include "platform.h" -#include "macros.h" - -namespace bx -{ -}// namespace bx - -#ifndef BX_NAMESPACE -# define BX_NAMESPACE 0 -#elif BX_NAMESPACE -using namespace bx; -#endif // BX_NAMESPACE - -#endif // __BX_H__ +/* + * Copyright 2010-2012 Branimir Karadzic. All rights reserved. + * License: http://www.opensource.org/licenses/BSD-2-Clause + */ + +#ifndef __BX_H__ +#define __BX_H__ + +#include +#include "platform.h" +#include "macros.h" + +namespace bx +{ +}// namespace bx + +#ifndef BX_NAMESPACE +# define BX_NAMESPACE 0 +#elif BX_NAMESPACE +using namespace bx; +#endif // BX_NAMESPACE + +#endif // __BX_H__ diff --git a/include/bx/commandline.h b/include/bx/commandline.h index 522c86f..c472baa 100644 --- a/include/bx/commandline.h +++ b/include/bx/commandline.h @@ -1,164 +1,164 @@ -/* - * Copyright 2010-2012 Branimir Karadzic. All rights reserved. - * License: http://www.opensource.org/licenses/BSD-2-Clause - */ - -#ifndef __BX_COMMANDLINE_H__ -#define __BX_COMMANDLINE_H__ - -#include "bx.h" -#include "string.h" - -namespace bx -{ - class CommandLine - { - public: - CommandLine(int _argc, char const* const* _argv) - : m_argc(_argc) - , m_argv(_argv) - { - } - - const char* findOption(const char* _long, const char* _default) const - { - const char* result = find('\0', _long, 1); - return result == NULL ? _default : result; - } - - const char* findOption(const char _short, const char* _long, const char* _default) const - { - const char* result = find(_short, _long, 1); - return result == NULL ? _default : result; - } - - const char* findOption(const char* _long, int _numParams = 1) const - { - const char* result = find('\0', _long, _numParams); - return result; - } - - const char* findOption(const char _short, const char* _long = NULL, int _numParams = 1) const - { - const char* result = find(_short, _long, _numParams); - return result; - } - - bool hasArg(const char _short, const char* _long = NULL) const - { - const char* arg = findOption(_short, _long, 0); - return NULL != arg; - } - - bool hasArg(const char* _long) const - { - const char* arg = findOption('\0', _long, 0); - return NULL != arg; - } - - bool hasArg(const char*& _value, const char _short, const char* _long = NULL) const - { - const char* arg = findOption(_short, _long, 1); - _value = arg; - return NULL != arg; - } - - bool hasArg(int& _value, const char _short, const char* _long = NULL) const - { - const char* arg = findOption(_short, _long, 1); - if (NULL != arg) - { - _value = atoi(arg); - return true; - } - - return false; - } - - bool hasArg(unsigned int& _value, const char _short, const char* _long = NULL) const - { - const char* arg = findOption(_short, _long, 1); - if (NULL != arg) - { - _value = atoi(arg); - return true; - } - - return false; - } - - bool hasArg(bool& _value, const char _short, const char* _long = NULL) const - { - const char* arg = findOption(_short, _long, 1); - if (NULL != arg) - { - if ('0' == *arg || stricmp(arg, "false") ) - { - _value = false; - } - else if ('0' != *arg || stricmp(arg, "true") ) - { - _value = true; - } - - return true; - } - - return false; - } - - private: - const char* find(const char _short, const char* _long, int _numParams) const - { - for (int ii = 0; ii < m_argc; ++ii) - { - const char* arg = m_argv[ii]; - if ('-' == *arg) - { - ++arg; - if (_short == *arg) - { - if (1 == strlen(arg) ) - { - if (0 == _numParams) - { - return ""; - } - else if (ii+_numParams < m_argc - && '-' != *m_argv[ii+1] ) - { - return m_argv[ii+1]; - } - - return NULL; - } - } - else if (NULL != _long - && '-' == *arg - && 0 == stricmp(arg+1, _long) ) - { - if (0 == _numParams) - { - return ""; - } - else if (ii+_numParams < m_argc - && '-' != *m_argv[ii+1] ) - { - return m_argv[ii+1]; - } - - return NULL; - } - } - } - - return NULL; - } - - int m_argc; - char const* const* m_argv; - }; - -} // namespace bx - -#endif /// __BX_COMMANDLINE_H__ +/* + * Copyright 2010-2012 Branimir Karadzic. All rights reserved. + * License: http://www.opensource.org/licenses/BSD-2-Clause + */ + +#ifndef __BX_COMMANDLINE_H__ +#define __BX_COMMANDLINE_H__ + +#include "bx.h" +#include "string.h" + +namespace bx +{ + class CommandLine + { + public: + CommandLine(int _argc, char const* const* _argv) + : m_argc(_argc) + , m_argv(_argv) + { + } + + const char* findOption(const char* _long, const char* _default) const + { + const char* result = find('\0', _long, 1); + return result == NULL ? _default : result; + } + + const char* findOption(const char _short, const char* _long, const char* _default) const + { + const char* result = find(_short, _long, 1); + return result == NULL ? _default : result; + } + + const char* findOption(const char* _long, int _numParams = 1) const + { + const char* result = find('\0', _long, _numParams); + return result; + } + + const char* findOption(const char _short, const char* _long = NULL, int _numParams = 1) const + { + const char* result = find(_short, _long, _numParams); + return result; + } + + bool hasArg(const char _short, const char* _long = NULL) const + { + const char* arg = findOption(_short, _long, 0); + return NULL != arg; + } + + bool hasArg(const char* _long) const + { + const char* arg = findOption('\0', _long, 0); + return NULL != arg; + } + + bool hasArg(const char*& _value, const char _short, const char* _long = NULL) const + { + const char* arg = findOption(_short, _long, 1); + _value = arg; + return NULL != arg; + } + + bool hasArg(int& _value, const char _short, const char* _long = NULL) const + { + const char* arg = findOption(_short, _long, 1); + if (NULL != arg) + { + _value = atoi(arg); + return true; + } + + return false; + } + + bool hasArg(unsigned int& _value, const char _short, const char* _long = NULL) const + { + const char* arg = findOption(_short, _long, 1); + if (NULL != arg) + { + _value = atoi(arg); + return true; + } + + return false; + } + + bool hasArg(bool& _value, const char _short, const char* _long = NULL) const + { + const char* arg = findOption(_short, _long, 1); + if (NULL != arg) + { + if ('0' == *arg || stricmp(arg, "false") ) + { + _value = false; + } + else if ('0' != *arg || stricmp(arg, "true") ) + { + _value = true; + } + + return true; + } + + return false; + } + + private: + const char* find(const char _short, const char* _long, int _numParams) const + { + for (int ii = 0; ii < m_argc; ++ii) + { + const char* arg = m_argv[ii]; + if ('-' == *arg) + { + ++arg; + if (_short == *arg) + { + if (1 == strlen(arg) ) + { + if (0 == _numParams) + { + return ""; + } + else if (ii+_numParams < m_argc + && '-' != *m_argv[ii+1] ) + { + return m_argv[ii+1]; + } + + return NULL; + } + } + else if (NULL != _long + && '-' == *arg + && 0 == stricmp(arg+1, _long) ) + { + if (0 == _numParams) + { + return ""; + } + else if (ii+_numParams < m_argc + && '-' != *m_argv[ii+1] ) + { + return m_argv[ii+1]; + } + + return NULL; + } + } + } + + return NULL; + } + + int m_argc; + char const* const* m_argv; + }; + +} // namespace bx + +#endif /// __BX_COMMANDLINE_H__ diff --git a/include/bx/countof.h b/include/bx/countof.h index 128e427..86d6207 100644 --- a/include/bx/countof.h +++ b/include/bx/countof.h @@ -1,19 +1,19 @@ -/* - * Copyright 2010-2012 Branimir Karadzic. All rights reserved. - * License: http://www.opensource.org/licenses/BSD-2-Clause - */ - -#ifndef __BX_COUNTOF_H__ -#define __BX_COUNTOF_H__ - -#include "bx.h" - -namespace bx -{ - // http://cnicholson.net/2011/01/stupid-c-tricks-a-better-sizeof_array/ - template char (&COUNTOF_REQUIRES_ARRAY_ARGUMENT(const T(&)[N]) )[N]; -#define countof(x) sizeof(bx::COUNTOF_REQUIRES_ARRAY_ARGUMENT(x) ) - -} // namespace bx - -#endif // __BX_COUNTOF_H__ +/* + * Copyright 2010-2012 Branimir Karadzic. All rights reserved. + * License: http://www.opensource.org/licenses/BSD-2-Clause + */ + +#ifndef __BX_COUNTOF_H__ +#define __BX_COUNTOF_H__ + +#include "bx.h" + +namespace bx +{ + // http://cnicholson.net/2011/01/stupid-c-tricks-a-better-sizeof_array/ + template char (&COUNTOF_REQUIRES_ARRAY_ARGUMENT(const T(&)[N]) )[N]; +#define countof(x) sizeof(bx::COUNTOF_REQUIRES_ARRAY_ARGUMENT(x) ) + +} // namespace bx + +#endif // __BX_COUNTOF_H__ diff --git a/include/bx/cpu.h b/include/bx/cpu.h index 90d3204..be70b35 100644 --- a/include/bx/cpu.h +++ b/include/bx/cpu.h @@ -1,102 +1,102 @@ -/* - * Copyright 2010-2012 Branimir Karadzic. All rights reserved. - * License: http://www.opensource.org/licenses/BSD-2-Clause - */ - -#ifndef __BX_CPU_H__ -#define __BX_CPU_H__ - -#include "bx.h" - -#if BX_COMPILER_MSVC -# if BX_PLATFORM_XBOX360 -# include -# include -# else -# include // math.h is included because VS bitches: - // warning C4985: 'ceil': attributes not present on previous declaration. - // must be included before intrin.h. -# include -# include -# endif // !BX_PLATFORM_XBOX360 -extern "C" void _ReadBarrier(); -extern "C" void _WriteBarrier(); -extern "C" void _ReadWriteBarrier(); -# pragma intrinsic(_ReadBarrier) -# pragma intrinsic(_WriteBarrier) -# pragma intrinsic(_ReadWriteBarrier) -# pragma intrinsic(_InterlockedIncrement) -# pragma intrinsic(_InterlockedDecrement) -#endif // BX_COMPILER_MSVC - -namespace bx -{ - inline void readBarrier() - { -#if BX_COMPILER_MSVC - _ReadBarrier(); -#elif BX_COMPILER_GCC || BX_COMPILER_CLANG - asm volatile("":::"memory"); -#endif // BX_COMPILER - } - - inline void writeBarrier() - { -#if BX_COMPILER_MSVC - _WriteBarrier(); -#elif BX_COMPILER_GCC || BX_COMPILER_CLANG - asm volatile("":::"memory"); -#endif // BX_COMPILER - } - - inline void readWriteBarrier() - { -#if BX_COMPILER_MSVC - _ReadWriteBarrier(); -#elif BX_COMPILER_GCC || BX_COMPILER_CLANG - asm volatile("":::"memory"); -#endif // BX_COMPILER - } - - inline void memoryBarrier() - { -#if BX_PLATFORM_XBOX360 - __lwsync(); -#elif BX_COMPILER_MSVC - _mm_mfence(); -#else - __sync_synchronize(); -// asm volatile("mfence":::"memory"); -#endif // BX_COMPILER - } - - inline int32_t atomicIncr(volatile void* _var) - { -#if BX_COMPILER_MSVC - return _InterlockedIncrement( (volatile LONG*)(_var) ); -#elif BX_COMPILER_GCC || BX_COMPILER_CLANG - return __sync_fetch_and_add( (volatile int32_t*)_var, 1); -#endif // BX_COMPILER - } - - inline int32_t atomicDecr(volatile void* _var) - { -#if BX_COMPILER_MSVC - return _InterlockedDecrement( (volatile LONG*)(_var) ); -#elif BX_COMPILER_GCC || BX_COMPILER_CLANG - return __sync_fetch_and_sub( (volatile int32_t*)_var, 1); -#endif // BX_COMPILER - } - - inline void* atomicExchangePtr(void** _target, void* _ptr) - { -#if BX_COMPILER_MSVC - return InterlockedExchangePointer(_target, _ptr); -#elif BX_COMPILER_GCC || BX_COMPILER_CLANG - return __sync_lock_test_and_set(_target, _ptr); -#endif // BX_COMPILER - } - -} // namespace bx - -#endif // __BX_CPU_H__ +/* + * Copyright 2010-2012 Branimir Karadzic. All rights reserved. + * License: http://www.opensource.org/licenses/BSD-2-Clause + */ + +#ifndef __BX_CPU_H__ +#define __BX_CPU_H__ + +#include "bx.h" + +#if BX_COMPILER_MSVC +# if BX_PLATFORM_XBOX360 +# include +# include +# else +# include // math.h is included because VS bitches: + // warning C4985: 'ceil': attributes not present on previous declaration. + // must be included before intrin.h. +# include +# include +# endif // !BX_PLATFORM_XBOX360 +extern "C" void _ReadBarrier(); +extern "C" void _WriteBarrier(); +extern "C" void _ReadWriteBarrier(); +# pragma intrinsic(_ReadBarrier) +# pragma intrinsic(_WriteBarrier) +# pragma intrinsic(_ReadWriteBarrier) +# pragma intrinsic(_InterlockedIncrement) +# pragma intrinsic(_InterlockedDecrement) +#endif // BX_COMPILER_MSVC + +namespace bx +{ + inline void readBarrier() + { +#if BX_COMPILER_MSVC + _ReadBarrier(); +#elif BX_COMPILER_GCC || BX_COMPILER_CLANG + asm volatile("":::"memory"); +#endif // BX_COMPILER + } + + inline void writeBarrier() + { +#if BX_COMPILER_MSVC + _WriteBarrier(); +#elif BX_COMPILER_GCC || BX_COMPILER_CLANG + asm volatile("":::"memory"); +#endif // BX_COMPILER + } + + inline void readWriteBarrier() + { +#if BX_COMPILER_MSVC + _ReadWriteBarrier(); +#elif BX_COMPILER_GCC || BX_COMPILER_CLANG + asm volatile("":::"memory"); +#endif // BX_COMPILER + } + + inline void memoryBarrier() + { +#if BX_PLATFORM_XBOX360 + __lwsync(); +#elif BX_COMPILER_MSVC + _mm_mfence(); +#else + __sync_synchronize(); +// asm volatile("mfence":::"memory"); +#endif // BX_COMPILER + } + + inline int32_t atomicIncr(volatile void* _var) + { +#if BX_COMPILER_MSVC + return _InterlockedIncrement( (volatile LONG*)(_var) ); +#elif BX_COMPILER_GCC || BX_COMPILER_CLANG + return __sync_fetch_and_add( (volatile int32_t*)_var, 1); +#endif // BX_COMPILER + } + + inline int32_t atomicDecr(volatile void* _var) + { +#if BX_COMPILER_MSVC + return _InterlockedDecrement( (volatile LONG*)(_var) ); +#elif BX_COMPILER_GCC || BX_COMPILER_CLANG + return __sync_fetch_and_sub( (volatile int32_t*)_var, 1); +#endif // BX_COMPILER + } + + inline void* atomicExchangePtr(void** _target, void* _ptr) + { +#if BX_COMPILER_MSVC + return InterlockedExchangePointer(_target, _ptr); +#elif BX_COMPILER_GCC || BX_COMPILER_CLANG + return __sync_lock_test_and_set(_target, _ptr); +#endif // BX_COMPILER + } + +} // namespace bx + +#endif // __BX_CPU_H__ diff --git a/include/bx/debug.h b/include/bx/debug.h index 1ff7a1e..8464c0c 100644 --- a/include/bx/debug.h +++ b/include/bx/debug.h @@ -1,31 +1,31 @@ -/* - * Copyright 2010-2012 Branimir Karadzic. All rights reserved. - * License: http://www.opensource.org/licenses/BSD-2-Clause - */ - -#ifndef __BX_DEBUG_H__ -#define __BX_DEBUG_H__ - -#include "bx.h" - -namespace bx -{ - inline void debugBreak() - { -#if BX_COMPILER_MSVC - __debugbreak(); -#elif BX_CPU_ARM - asm("bkpt 0"); -#elif !BX_PLATFORM_NACL && BX_CPU_X86 && (BX_COMPILER_GCC || BX_COMPILER_CLANG) - // NaCl doesn't like int 3: - // NativeClient: NaCl module load failed: Validation failure. File violates Native Client safety rules. - __asm__ ("int $3"); -#else // cross platform implementation - int* int3 = (int*)3L; - *int3 = 3; -#endif // BX - } - -} // namespace bx - -#endif // __BX_DEBUG_H__ +/* + * Copyright 2010-2012 Branimir Karadzic. All rights reserved. + * License: http://www.opensource.org/licenses/BSD-2-Clause + */ + +#ifndef __BX_DEBUG_H__ +#define __BX_DEBUG_H__ + +#include "bx.h" + +namespace bx +{ + inline void debugBreak() + { +#if BX_COMPILER_MSVC + __debugbreak(); +#elif BX_CPU_ARM + asm("bkpt 0"); +#elif !BX_PLATFORM_NACL && BX_CPU_X86 && (BX_COMPILER_GCC || BX_COMPILER_CLANG) + // NaCl doesn't like int 3: + // NativeClient: NaCl module load failed: Validation failure. File violates Native Client safety rules. + __asm__ ("int $3"); +#else // cross platform implementation + int* int3 = (int*)3L; + *int3 = 3; +#endif // BX + } + +} // namespace bx + +#endif // __BX_DEBUG_H__ diff --git a/include/bx/endian.h b/include/bx/endian.h index 4056dce..5e34841 100644 --- a/include/bx/endian.h +++ b/include/bx/endian.h @@ -1,71 +1,71 @@ -/* - * Copyright 2010-2012 Branimir Karadzic. All rights reserved. - * License: http://www.opensource.org/licenses/BSD-2-Clause - */ - -#ifndef __BX_ENDIAN_H__ -#define __BX_ENDIAN_H__ - -#include "bx.h" - -namespace bx -{ - inline uint16_t endianSwap(uint16_t _in) - { - return (_in>>8) | (_in<<8); - } - - inline uint32_t endianSwap(uint32_t _in) - { - return (_in>>24) | (_in<<24) - | ( (_in&0x00ff0000)>>8) | ( (_in&0x0000ff00)<<8) - ; - } - - inline uint64_t endianSwap(uint64_t _in) - { - return (_in>>56) | (_in<<56) - | ( (_in&UINT64_C(0x00ff000000000000) )>>40) | ( (_in&UINT64_C(0x000000000000ff00) )<<40) - | ( (_in&UINT64_C(0x0000ff0000000000) )>>24) | ( (_in&UINT64_C(0x0000000000ff0000) )<<24) - | ( (_in&UINT64_C(0x000000ff00000000) )>>8) | ( (_in&UINT64_C(0x00000000ff000000) )<<8) - ; - } - - inline int16_t endianSwap(int16_t _in) - { - return (int16_t)endianSwap( (uint16_t)_in); - } - - inline int32_t endianSwap(int32_t _in) - { - return (int32_t)endianSwap( (uint32_t)_in); - } - - inline int64_t endianSwap(int64_t _in) - { - return (int64_t)endianSwap( (uint64_t)_in); - } - - template - inline Ty littleEndian(Ty& _in) - { -#if BX_CPU_ENDIAN_BIG - endianSwap(_in); -#else - return _in; -#endif // BX_CPU_ENDIAN_BIG - } - - template - inline Ty bigEndian(Ty& _in) - { -#if BX_CPU_ENDIAN_LITTLE - return endianSwap(_in); -#else - return _in; -#endif // BX_CPU_ENDIAN_LITTLE - } - -} // namespace bx - -#endif // __BX_ENDIAN_H__ +/* + * Copyright 2010-2012 Branimir Karadzic. All rights reserved. + * License: http://www.opensource.org/licenses/BSD-2-Clause + */ + +#ifndef __BX_ENDIAN_H__ +#define __BX_ENDIAN_H__ + +#include "bx.h" + +namespace bx +{ + inline uint16_t endianSwap(uint16_t _in) + { + return (_in>>8) | (_in<<8); + } + + inline uint32_t endianSwap(uint32_t _in) + { + return (_in>>24) | (_in<<24) + | ( (_in&0x00ff0000)>>8) | ( (_in&0x0000ff00)<<8) + ; + } + + inline uint64_t endianSwap(uint64_t _in) + { + return (_in>>56) | (_in<<56) + | ( (_in&UINT64_C(0x00ff000000000000) )>>40) | ( (_in&UINT64_C(0x000000000000ff00) )<<40) + | ( (_in&UINT64_C(0x0000ff0000000000) )>>24) | ( (_in&UINT64_C(0x0000000000ff0000) )<<24) + | ( (_in&UINT64_C(0x000000ff00000000) )>>8) | ( (_in&UINT64_C(0x00000000ff000000) )<<8) + ; + } + + inline int16_t endianSwap(int16_t _in) + { + return (int16_t)endianSwap( (uint16_t)_in); + } + + inline int32_t endianSwap(int32_t _in) + { + return (int32_t)endianSwap( (uint32_t)_in); + } + + inline int64_t endianSwap(int64_t _in) + { + return (int64_t)endianSwap( (uint64_t)_in); + } + + template + inline Ty littleEndian(Ty& _in) + { +#if BX_CPU_ENDIAN_BIG + endianSwap(_in); +#else + return _in; +#endif // BX_CPU_ENDIAN_BIG + } + + template + inline Ty bigEndian(Ty& _in) + { +#if BX_CPU_ENDIAN_LITTLE + return endianSwap(_in); +#else + return _in; +#endif // BX_CPU_ENDIAN_LITTLE + } + +} // namespace bx + +#endif // __BX_ENDIAN_H__ diff --git a/include/bx/float4_neon.h b/include/bx/float4_neon.h index d22e76a..f2668a3 100644 --- a/include/bx/float4_neon.h +++ b/include/bx/float4_neon.h @@ -1,244 +1,244 @@ -/* - * Copyright 2010-2012 Branimir Karadzic. All rights reserved. - * License: http://www.opensource.org/licenses/BSD-2-Clause - */ - -#ifndef __BX_FLOAT4_NEON_H__ -#define __BX_FLOAT4_NEON_H__ - -#include - -namespace bx -{ - -// Reference: -// http://gcc.gnu.org/onlinedocs/gcc/ARM-NEON-Intrinsics.html -// http://blogs.arm.com/software-enablement/161-coding-for-neon-part-1-load-and-stores/ -// http://blogs.arm.com/software-enablement/196-coding-for-neon-part-2-dealing-with-leftovers/ -// http://blogs.arm.com/software-enablement/241-coding-for-neon-part-3-matrix-multiplication/ -// http://blogs.arm.com/software-enablement/277-coding-for-neon-part-4-shifting-left-and-right/ -// http://blogs.arm.com/software-enablement/684-coding-for-neon-part-5-rearranging-vectors/ - - typedef __builtin_neon_sf float4_t __attribute__( (__vector_size__(16) ) ); - -#define ELEMx 0 -#define ELEMy 1 -#define ELEMz 2 -#define ELEMw 3 -#define IMPLEMENT_SWIZZLE(_x, _y, _z, _w) \ - BX_FLOAT4_INLINE float4_t float4_swiz_##_x##_y##_z##_w(float4_t _a) \ - { \ - float4_t result; \ - result.ixyzw[0] = _a.ixyzw[ELEM##_x]; \ - result.ixyzw[1] = _a.ixyzw[ELEM##_y]; \ - result.ixyzw[2] = _a.ixyzw[ELEM##_z]; \ - result.ixyzw[3] = _a.ixyzw[ELEM##_w]; \ - return result; \ - } - -#include "float4_swizzle.inl" - -#undef IMPLEMENT_SWIZZLE -#undef ELEMw -#undef ELEMz -#undef ELEMy -#undef ELEMx - - BX_FLOAT4_INLINE float4_t float4_shuf_xyAB(float4_t _a, float4_t _b) - { - return _a; //_mm_movelh_ps(_a, _b); - } - - BX_FLOAT4_INLINE float4_t float4_shuf_ABxy(float4_t _a, float4_t _b) - { - return _a; //_mm_movelh_ps(_b, _a); - } - - BX_FLOAT4_INLINE float4_t float4_shuf_CDzw(float4_t _a, float4_t _b) - { - return _a; //_mm_movehl_ps(_a, _b); - } - - BX_FLOAT4_INLINE float4_t float4_shuf_zwCD(float4_t _a, float4_t _b) - { - return _a; //_mm_movehl_ps(_b, _a); - } - - BX_FLOAT4_INLINE float4_t float4_shuf_xAyB(float4_t _a, float4_t _b) - { - return _a; //_mm_unpacklo_ps(_a, _b); - } - - BX_FLOAT4_INLINE float4_t float4_shuf_yBxA(float4_t _a, float4_t _b) - { - return _a; //_mm_unpacklo_ps(_b, _a); - } - - BX_FLOAT4_INLINE float4_t float4_shuf_zCwD(float4_t _a, float4_t _b) - { - return _a; //_mm_unpackhi_ps(_a, _b); - } - - BX_FLOAT4_INLINE float4_t float4_shuf_CzDw(float4_t _a, float4_t _b) - { - return _a; //_mm_unpackhi_ps(_b, _a); - } - - BX_FLOAT4_INLINE float float4_x(float4_t _a) - { - return _a.fxyzw[0]; - } - - BX_FLOAT4_INLINE float float4_y(float4_t _a) - { - return _a.fxyzw[1]; - } - - BX_FLOAT4_INLINE float float4_z(float4_t _a) - { - return _a.fxyzw[2]; - } - - BX_FLOAT4_INLINE float float4_w(float4_t _a) - { - return _a.fxyzw[3]; - } - -// BX_FLOAT4_INLINE float4_t float4_ld(const void* _ptr) -// { -// return _mm_load_ps(reinterpret_cast(_ptr) ); -// } - -// BX_FLOAT4_INLINE void float4_st(void* _ptr, float4_t _a) -// { -// _mm_store_ps(reinterpret_cast(_ptr), _a); -// } - -// BX_FLOAT4_INLINE void float4_stream(void* _ptr, float4_t _a) -// { -// _mm_stream_ps(reinterpret_cast(_ptr), _a); -// } - - BX_FLOAT4_INLINE float4_t float4_ld(float _x, float _y, float _z, float _w) - { - const float32_t val[4] = {_x, _y, _z, _w}; - return __builtin_neon_vld1v4sf(val); - } - - BX_FLOAT4_INLINE float4_t float4_ild(uint32_t _x, uint32_t _y, uint32_t _z, uint32_t _w) - { - const uint32_t val[4] = {_x, _y, _z, _w}; - return (float4_t)__builtin_neon_vld1v4si( (const __builtin_neon_si*)val); - } - - BX_FLOAT4_INLINE float4_t float4_splat(float _a) - { - return __builtin_neon_vdup_nv4sf(_a); - } - - BX_FLOAT4_INLINE float4_t float4_isplat(uint32_t _a) - { - return (float4_t)__builtin_neon_vdup_nv4si( (__builtin_neon_si)_a); - } - - BX_FLOAT4_INLINE float4_t float4_zero() - { - return vdupq_n_f32(0.0f); - } - - BX_FLOAT4_INLINE float4_t float4_add(float4_t _a, float4_t _b) - { - return vaddq_f32(_a, _b); - } - - BX_FLOAT4_INLINE float4_t float4_sub(float4_t _a, float4_t _b) - { - return vsubq_f32(_a, _b); - } - - BX_FLOAT4_INLINE float4_t float4_mul(float4_t _a, float4_t _b) - { - return vmulq_f32(_a, _b); - } - - BX_FLOAT4_INLINE float4_t float4_rcp_est(float4_t _a) - { - return vrecpeq_f32(_a); - } - - BX_FLOAT4_INLINE float4_t float4_rsqrt_est(float4_t _a) - { - return vrsqrteq_f32(_a); - } - - BX_FLOAT4_INLINE float4_t float4_and(float4_t _a, float4_t _b) - { - return (float4_t)__builtin_neon_vandv4si( (int32x4_t)_a, (int32x4_t)_b, 0); - } - - //BX_FLOAT4_INLINE float4_t float4_andc(float4_t _a, float4_t _b) - //{ - // return _mm_andnot_ps(_b, _a); - //} - - BX_FLOAT4_INLINE float4_t float4_or(float4_t _a, float4_t _b) - { - return (float4_t)__builtin_neon_vorrv4si( (int32x4_t)_a, (int32x4_t)_b, 0); - } - - BX_FLOAT4_INLINE float4_t float4_iadd(float4_t _a, float4_t _b) - { - const uint32x4_t tmp0 = vreinterpretq_u32_f32(_a); - const uint32x4_t tmp1 = vreinterpretq_u32_f32(_b); - const uint32x4_t add = vaddq_u32(tmp0, tmp1); - const float4_t result = vreinterpretq_f32_u32(add); - - return result; - } - - BX_FLOAT4_INLINE float4_t float4_isub(float4_t _a, float4_t _b) - { - const uint32x4_t tmp0 = vreinterpretq_u32_f32(_a); - const uint32x4_t tmp1 = vreinterpretq_u32_f32(_b); - const uint32x4_t sub = vsubq_u32(tmp0, tmp1); - const float4_t result = vreinterpretq_f32_u32(sub); - - return result; - } - - BX_FLOAT4_INLINE float4_t float4_sll(float4_t _a, int _count) - { - const uint32x4_t tmp = vreinterpretq_u32_f32(_a); - const uint32x4_t shift = vshlq_n_u32(tmp, _count); - const float4_t result = vreinterpretq_f32_u32(shift); - - return result; - } - - BX_FLOAT4_INLINE float4_t float4_srl(float4_t _a, int _count) - { - const uint32x4_t tmp = vreinterpretq_i32_f32(_a); - const uint32x4_t shift = (uint32x4_t)__builtin_neon_vshr_nv4si( (int32x4_t)tmp, _count, 0); - const float4_t result = vreinterpretq_f32_u32(shift); - - return result; - } - - BX_FLOAT4_INLINE float4_t float4_sra(float4_t _a, int _count) - { - const int32x4_t a = vreinterpretq_s32_f32(_a); - const int32x4_t shift = __builtin_neon_vshr_nv4si(a, _count, 1); - const float4_t result = vreinterpretq_f32_s32(shift); - - return result; - } - -} // namespace bx - -#define float4_div_nr float4_div_nr_ni -#define float4_div float4_div_nr_ni -#define float4_ceil float4_ceil_ni -#define float4_floor float4_floor_ni -#include "float4_ni.h" - -#endif // __BX_FLOAT4_NEON_H__ +/* + * Copyright 2010-2012 Branimir Karadzic. All rights reserved. + * License: http://www.opensource.org/licenses/BSD-2-Clause + */ + +#ifndef __BX_FLOAT4_NEON_H__ +#define __BX_FLOAT4_NEON_H__ + +#include + +namespace bx +{ + +// Reference: +// http://gcc.gnu.org/onlinedocs/gcc/ARM-NEON-Intrinsics.html +// http://blogs.arm.com/software-enablement/161-coding-for-neon-part-1-load-and-stores/ +// http://blogs.arm.com/software-enablement/196-coding-for-neon-part-2-dealing-with-leftovers/ +// http://blogs.arm.com/software-enablement/241-coding-for-neon-part-3-matrix-multiplication/ +// http://blogs.arm.com/software-enablement/277-coding-for-neon-part-4-shifting-left-and-right/ +// http://blogs.arm.com/software-enablement/684-coding-for-neon-part-5-rearranging-vectors/ + + typedef __builtin_neon_sf float4_t __attribute__( (__vector_size__(16) ) ); + +#define ELEMx 0 +#define ELEMy 1 +#define ELEMz 2 +#define ELEMw 3 +#define IMPLEMENT_SWIZZLE(_x, _y, _z, _w) \ + BX_FLOAT4_INLINE float4_t float4_swiz_##_x##_y##_z##_w(float4_t _a) \ + { \ + float4_t result; \ + result.ixyzw[0] = _a.ixyzw[ELEM##_x]; \ + result.ixyzw[1] = _a.ixyzw[ELEM##_y]; \ + result.ixyzw[2] = _a.ixyzw[ELEM##_z]; \ + result.ixyzw[3] = _a.ixyzw[ELEM##_w]; \ + return result; \ + } + +#include "float4_swizzle.inl" + +#undef IMPLEMENT_SWIZZLE +#undef ELEMw +#undef ELEMz +#undef ELEMy +#undef ELEMx + + BX_FLOAT4_INLINE float4_t float4_shuf_xyAB(float4_t _a, float4_t _b) + { + return _a; //_mm_movelh_ps(_a, _b); + } + + BX_FLOAT4_INLINE float4_t float4_shuf_ABxy(float4_t _a, float4_t _b) + { + return _a; //_mm_movelh_ps(_b, _a); + } + + BX_FLOAT4_INLINE float4_t float4_shuf_CDzw(float4_t _a, float4_t _b) + { + return _a; //_mm_movehl_ps(_a, _b); + } + + BX_FLOAT4_INLINE float4_t float4_shuf_zwCD(float4_t _a, float4_t _b) + { + return _a; //_mm_movehl_ps(_b, _a); + } + + BX_FLOAT4_INLINE float4_t float4_shuf_xAyB(float4_t _a, float4_t _b) + { + return _a; //_mm_unpacklo_ps(_a, _b); + } + + BX_FLOAT4_INLINE float4_t float4_shuf_yBxA(float4_t _a, float4_t _b) + { + return _a; //_mm_unpacklo_ps(_b, _a); + } + + BX_FLOAT4_INLINE float4_t float4_shuf_zCwD(float4_t _a, float4_t _b) + { + return _a; //_mm_unpackhi_ps(_a, _b); + } + + BX_FLOAT4_INLINE float4_t float4_shuf_CzDw(float4_t _a, float4_t _b) + { + return _a; //_mm_unpackhi_ps(_b, _a); + } + + BX_FLOAT4_INLINE float float4_x(float4_t _a) + { + return _a.fxyzw[0]; + } + + BX_FLOAT4_INLINE float float4_y(float4_t _a) + { + return _a.fxyzw[1]; + } + + BX_FLOAT4_INLINE float float4_z(float4_t _a) + { + return _a.fxyzw[2]; + } + + BX_FLOAT4_INLINE float float4_w(float4_t _a) + { + return _a.fxyzw[3]; + } + +// BX_FLOAT4_INLINE float4_t float4_ld(const void* _ptr) +// { +// return _mm_load_ps(reinterpret_cast(_ptr) ); +// } + +// BX_FLOAT4_INLINE void float4_st(void* _ptr, float4_t _a) +// { +// _mm_store_ps(reinterpret_cast(_ptr), _a); +// } + +// BX_FLOAT4_INLINE void float4_stream(void* _ptr, float4_t _a) +// { +// _mm_stream_ps(reinterpret_cast(_ptr), _a); +// } + + BX_FLOAT4_INLINE float4_t float4_ld(float _x, float _y, float _z, float _w) + { + const float32_t val[4] = {_x, _y, _z, _w}; + return __builtin_neon_vld1v4sf(val); + } + + BX_FLOAT4_INLINE float4_t float4_ild(uint32_t _x, uint32_t _y, uint32_t _z, uint32_t _w) + { + const uint32_t val[4] = {_x, _y, _z, _w}; + return (float4_t)__builtin_neon_vld1v4si( (const __builtin_neon_si*)val); + } + + BX_FLOAT4_INLINE float4_t float4_splat(float _a) + { + return __builtin_neon_vdup_nv4sf(_a); + } + + BX_FLOAT4_INLINE float4_t float4_isplat(uint32_t _a) + { + return (float4_t)__builtin_neon_vdup_nv4si( (__builtin_neon_si)_a); + } + + BX_FLOAT4_INLINE float4_t float4_zero() + { + return vdupq_n_f32(0.0f); + } + + BX_FLOAT4_INLINE float4_t float4_add(float4_t _a, float4_t _b) + { + return vaddq_f32(_a, _b); + } + + BX_FLOAT4_INLINE float4_t float4_sub(float4_t _a, float4_t _b) + { + return vsubq_f32(_a, _b); + } + + BX_FLOAT4_INLINE float4_t float4_mul(float4_t _a, float4_t _b) + { + return vmulq_f32(_a, _b); + } + + BX_FLOAT4_INLINE float4_t float4_rcp_est(float4_t _a) + { + return vrecpeq_f32(_a); + } + + BX_FLOAT4_INLINE float4_t float4_rsqrt_est(float4_t _a) + { + return vrsqrteq_f32(_a); + } + + BX_FLOAT4_INLINE float4_t float4_and(float4_t _a, float4_t _b) + { + return (float4_t)__builtin_neon_vandv4si( (int32x4_t)_a, (int32x4_t)_b, 0); + } + + //BX_FLOAT4_INLINE float4_t float4_andc(float4_t _a, float4_t _b) + //{ + // return _mm_andnot_ps(_b, _a); + //} + + BX_FLOAT4_INLINE float4_t float4_or(float4_t _a, float4_t _b) + { + return (float4_t)__builtin_neon_vorrv4si( (int32x4_t)_a, (int32x4_t)_b, 0); + } + + BX_FLOAT4_INLINE float4_t float4_iadd(float4_t _a, float4_t _b) + { + const uint32x4_t tmp0 = vreinterpretq_u32_f32(_a); + const uint32x4_t tmp1 = vreinterpretq_u32_f32(_b); + const uint32x4_t add = vaddq_u32(tmp0, tmp1); + const float4_t result = vreinterpretq_f32_u32(add); + + return result; + } + + BX_FLOAT4_INLINE float4_t float4_isub(float4_t _a, float4_t _b) + { + const uint32x4_t tmp0 = vreinterpretq_u32_f32(_a); + const uint32x4_t tmp1 = vreinterpretq_u32_f32(_b); + const uint32x4_t sub = vsubq_u32(tmp0, tmp1); + const float4_t result = vreinterpretq_f32_u32(sub); + + return result; + } + + BX_FLOAT4_INLINE float4_t float4_sll(float4_t _a, int _count) + { + const uint32x4_t tmp = vreinterpretq_u32_f32(_a); + const uint32x4_t shift = vshlq_n_u32(tmp, _count); + const float4_t result = vreinterpretq_f32_u32(shift); + + return result; + } + + BX_FLOAT4_INLINE float4_t float4_srl(float4_t _a, int _count) + { + const uint32x4_t tmp = vreinterpretq_i32_f32(_a); + const uint32x4_t shift = (uint32x4_t)__builtin_neon_vshr_nv4si( (int32x4_t)tmp, _count, 0); + const float4_t result = vreinterpretq_f32_u32(shift); + + return result; + } + + BX_FLOAT4_INLINE float4_t float4_sra(float4_t _a, int _count) + { + const int32x4_t a = vreinterpretq_s32_f32(_a); + const int32x4_t shift = __builtin_neon_vshr_nv4si(a, _count, 1); + const float4_t result = vreinterpretq_f32_s32(shift); + + return result; + } + +} // namespace bx + +#define float4_div_nr float4_div_nr_ni +#define float4_div float4_div_nr_ni +#define float4_ceil float4_ceil_ni +#define float4_floor float4_floor_ni +#include "float4_ni.h" + +#endif // __BX_FLOAT4_NEON_H__ diff --git a/include/bx/float4_ni.h b/include/bx/float4_ni.h index 7d998de..d423545 100644 --- a/include/bx/float4_ni.h +++ b/include/bx/float4_ni.h @@ -1,431 +1,431 @@ -/* - * Copyright 2010-2012 Branimir Karadzic. All rights reserved. - * License: http://www.opensource.org/licenses/BSD-2-Clause - */ - -#ifndef __BX_FLOAT4_NI_H__ -#define __BX_FLOAT4_NI_H__ - -namespace bx -{ - BX_FLOAT4_INLINE float4_t float4_shuf_xAzC_ni(float4_t _a, float4_t _b) - { - const float4_t xAyB = float4_shuf_xAyB(_a, _b); - const float4_t zCwD = float4_shuf_zCwD(_a, _b); - const float4_t result = float4_shuf_xyAB(xAyB, zCwD); - - return result; - } - - BX_FLOAT4_INLINE float4_t float4_shuf_yBwD_ni(float4_t _a, float4_t _b) - { - const float4_t xAyB = float4_shuf_xAyB(_a, _b); - const float4_t zCwD = float4_shuf_zCwD(_a, _b); - const float4_t result = float4_shuf_zwCD(xAyB, zCwD); - - return result; - } - - BX_FLOAT4_INLINE float4_t float4_madd_ni(float4_t _a, float4_t _b, float4_t _c) - { - const float4_t mul = float4_mul(_a, _b); - const float4_t result = float4_add(mul, _c); - - return result; - } - - BX_FLOAT4_INLINE float4_t float4_nmsub_ni(float4_t _a, float4_t _b, float4_t _c) - { - const float4_t mul = float4_mul(_a, _b); - const float4_t result = float4_sub(_c, mul); - - return result; - } - - BX_FLOAT4_INLINE float4_t float4_div_nr_ni(float4_t _a, float4_t _b) - { - const float4_t oneish = float4_isplat(0x3f800001); - const float4_t est = float4_rcp_est(_b); - const float4_t iter0 = float4_mul(_a, est); - const float4_t tmp1 = float4_nmsub(_b, est, oneish); - const float4_t result = float4_madd(tmp1, iter0, iter0); - - return result; - } - - BX_FLOAT4_INLINE float4_t float4_rcp_ni(float4_t _a) - { - const float4_t one = float4_splat(1.0f); - const float4_t result = float4_div(one, _a); - - return result; - } - - BX_FLOAT4_INLINE float4_t float4_orx_ni(float4_t _a) - { - const float4_t zwxy = float4_swiz_zwxy(_a); - const float4_t tmp0 = float4_or(_a, zwxy); - const float4_t tmp1 = float4_swiz_yyyy(_a); - const float4_t tmp2 = float4_or(tmp0, tmp1); - const float4_t mf000 = float4_ild(-1, 0, 0, 0); - const float4_t result = float4_and(tmp2, mf000); - - return result; - } - - BX_FLOAT4_INLINE float4_t float4_orc_ni(float4_t _a, float4_t _b) - { - const float4_t aorb = float4_or(_a, _b); - const float4_t mffff = float4_isplat(-1); - const float4_t result = float4_xor(aorb, mffff); - - return result; - } - - BX_FLOAT4_INLINE float4_t float4_neg_ni(float4_t _a) - { - const float4_t zero = float4_zero(); - const float4_t result = float4_sub(zero, _a); - - return result; - } - - BX_FLOAT4_INLINE float4_t float4_selb_ni(float4_t _mask, float4_t _a, float4_t _b) - { - const float4_t sel_a = float4_and(_a, _mask); - const float4_t sel_b = float4_andc(_b, _mask); - const float4_t result = float4_or(sel_a, sel_b); - - return result; - } - - BX_FLOAT4_INLINE float4_t float4_sels_ni(float4_t _test, float4_t _a, float4_t _b) - { - const float4_t mask = float4_sra(_test, 31); - const float4_t result = float4_selb(mask, _a, _b); - - return result; - } - - BX_FLOAT4_INLINE float4_t float4_not_ni(float4_t _a) - { - const float4_t mffff = float4_isplat(-1); - const float4_t result = float4_xor(_a, mffff); - - return result; - } - - BX_FLOAT4_INLINE float4_t float4_abs_ni(float4_t _a) - { - const float4_t a_neg = float4_neg(_a); - const float4_t result = float4_max(a_neg, _a); - - return result; - } - - BX_FLOAT4_INLINE float4_t float4_clamp_ni(float4_t _a, float4_t _min, float4_t _max) - { - const float4_t tmp = float4_min(_a, _max); - const float4_t result = float4_max(tmp, _min); - - return result; - } - - BX_FLOAT4_INLINE float4_t float4_lerp_ni(float4_t _a, float4_t _b, float4_t _s) - { - const float4_t ba = float4_sub(_b, _a); - const float4_t result = float4_madd(_s, ba, _a); - - return result; - } - - BX_FLOAT4_INLINE float4_t float4_sqrt_nr_ni(float4_t _a) - { - const float4_t half = float4_splat(0.5f); - const float4_t one = float4_splat(1.0f); - const float4_t zero = float4_zero(); - const float4_t tmp0 = float4_rsqrt_est(_a); - const float4_t tmp1 = float4_madd(tmp0, _a, zero); - const float4_t tmp2 = float4_madd(tmp1, half, zero); - const float4_t tmp3 = float4_nmsub(tmp0, tmp1, one); - const float4_t result = float4_madd(tmp3, tmp2, tmp1); - - return result; - } - - BX_FLOAT4_INLINE float4_t float4_rsqrt_ni(float4_t _a) - { - const float4_t one = float4_splat(1.0f); - const float4_t sqrt = float4_sqrt(_a); - const float4_t result = float4_div(one, sqrt); - - return result; - } - - BX_FLOAT4_INLINE float4_t float4_rsqrt_nr_ni(float4_t _a) - { - const float4_t rsqrt = float4_rsqrt_est(_a); - const float4_t iter0 = float4_mul(_a, rsqrt); - const float4_t iter1 = float4_mul(iter0, rsqrt); - const float4_t half = float4_splat(0.5f); - const float4_t half_rsqrt = float4_mul(half, rsqrt); - const float4_t three = float4_splat(3.0f); - const float4_t three_sub_iter1 = float4_sub(three, iter1); - const float4_t result = float4_mul(half_rsqrt, three_sub_iter1); - - return result; - } - - BX_FLOAT4_INLINE float4_t float4_rsqrt_carmack_ni(float4_t _a) - { - const float4_t half = float4_splat(0.5f); - const float4_t ah = float4_mul(half, _a); - const float4_t ashift = float4_sra(_a, 1); - const float4_t magic = float4_isplat(0x5f3759df); - const float4_t msuba = float4_isub(magic, ashift); - const float4_t msubasq = float4_mul(msuba, msuba); - const float4_t tmp0 = float4_splat(1.5f); - const float4_t tmp1 = float4_mul(ah, msubasq); - const float4_t tmp2 = float4_sub(tmp0, tmp1); - const float4_t result = float4_mul(msuba, tmp2); - - return result; - } - - namespace float4_logexp_detail - { - BX_FLOAT4_INLINE float4_t float4_poly0(float4_t _a, float _b) - { - return float4_splat(_b); - } - - BX_FLOAT4_INLINE float4_t float4_poly1(float4_t _a, float _b, float _c) - { - const float4_t bbbb = float4_splat(_b); - const float4_t poly0 = float4_poly0(_a, _c); - const float4_t result = float4_madd(poly0, _a, bbbb); - - return result; - } - - BX_FLOAT4_INLINE float4_t float4_poly2(float4_t _a, float _b, float _c, float _d) - { - const float4_t bbbb = float4_splat(_b); - const float4_t poly = float4_poly1(_a, _c, _d); - const float4_t result = float4_madd(poly, _a, bbbb); - - return result; - } - - BX_FLOAT4_INLINE float4_t float4_poly3(float4_t _a, float _b, float _c, float _d, float _e) - { - const float4_t bbbb = float4_splat(_b); - const float4_t poly = float4_poly2(_a, _c, _d, _e); - const float4_t result = float4_madd(poly, _a, bbbb); - - return result; - } - - BX_FLOAT4_INLINE float4_t float4_poly4(float4_t _a, float _b, float _c, float _d, float _e, float _f) - { - const float4_t bbbb = float4_splat(_b); - const float4_t poly = float4_poly3(_a, _c, _d, _e, _f); - const float4_t result = float4_madd(poly, _a, bbbb); - - return result; - } - - BX_FLOAT4_INLINE float4_t float4_poly5(float4_t _a, float _b, float _c, float _d, float _e, float _f, float _g) - { - const float4_t bbbb = float4_splat(_b); - const float4_t poly = float4_poly4(_a, _c, _d, _e, _f, _g); - const float4_t result = float4_madd(poly, _a, bbbb); - - return result; - } - - BX_FLOAT4_INLINE float4_t float4_logpoly(float4_t _a) - { -#if 1 - const float4_t result = float4_poly5(_a - , 3.11578814719469302614f, -3.32419399085241980044f - , 2.59883907202499966007f, -1.23152682416275988241f - , 0.318212422185251071475f, -0.0344359067839062357313f - ); -#elif 0 - const float4_t result = float4_poly4(_a - , 2.8882704548164776201f, -2.52074962577807006663f - , 1.48116647521213171641f, -0.465725644288844778798f - , 0.0596515482674574969533f - ); -#elif 0 - const float4_t result = float4_poly3(_a - , 2.61761038894603480148f, -1.75647175389045657003f - , 0.688243882994381274313f, -0.107254423828329604454f - ); -#else - const float4_t result = float4_poly2(_a - , 2.28330284476918490682f, -1.04913055217340124191f - , 0.204446009836232697516f - ); -#endif - - return result; - } - - BX_FLOAT4_INLINE float4_t float4_exppoly(float4_t _a) - { -#if 1 - const float4_t result = float4_poly5(_a - , 9.9999994e-1f, 6.9315308e-1f - , 2.4015361e-1f, 5.5826318e-2f - , 8.9893397e-3f, 1.8775767e-3f - ); -#elif 0 - const float4_t result = float4_poly4(_a - , 1.0000026f, 6.9300383e-1f - , 2.4144275e-1f, 5.2011464e-2f - , 1.3534167e-2f - ); -#elif 0 - const float4_t result = float4_poly3(_a - , 9.9992520e-1f, 6.9583356e-1f - , 2.2606716e-1f, 7.8024521e-2f - ); -#else - const float4_t result = float4_poly2(_a - , 1.0017247f, 6.5763628e-1f - , 3.3718944e-1f - ); -#endif // 0 - - return result; - } - } // namespace float4_internal - - BX_FLOAT4_INLINE float4_t float4_log2_ni(float4_t _a) - { - const float4_t expmask = float4_isplat(0x7f800000); - const float4_t mantmask = float4_isplat(0x007fffff); - const float4_t one = float4_splat(1.0f); - - const float4_t c127 = float4_isplat(127); - const float4_t aexp = float4_and(_a, expmask); - const float4_t aexpsr = float4_srl(aexp, 23); - const float4_t tmp0 = float4_isub(aexpsr, c127); - const float4_t exp = float4_itof(tmp0); - - const float4_t amask = float4_and(_a, mantmask); - const float4_t mant = float4_or(amask, one); - - const float4_t poly = float4_logexp_detail::float4_logpoly(mant); - - const float4_t mandiff = float4_sub(mant, one); - const float4_t result = float4_madd(poly, mandiff, exp); - - return result; - } - - BX_FLOAT4_INLINE float4_t float4_exp2_ni(float4_t _a) - { - const float4_t min = float4_splat( 129.0f); - const float4_t max = float4_splat(-126.99999f); - const float4_t tmp0 = float4_min(_a, min); - const float4_t aaaa = float4_max(tmp0, max); - - const float4_t half = float4_splat(0.5f); - const float4_t tmp2 = float4_sub(aaaa, half); - const float4_t ipart = float4_ftoi(tmp2); - const float4_t iround = float4_itof(ipart); - const float4_t fpart = float4_sub(aaaa, iround); - - const float4_t c127 = float4_isplat(127); - const float4_t tmp5 = float4_iadd(ipart, c127); - const float4_t expipart = float4_sll(tmp5, 23); - - const float4_t expfpart = float4_logexp_detail::float4_exppoly(fpart); - - const float4_t result = float4_mul(expipart, expfpart); - - return result; - } - - BX_FLOAT4_INLINE float4_t float4_pow_ni(float4_t _a, float4_t _b) - { - const float4_t alog2 = float4_log2(_a); - const float4_t alog2b = float4_mul(alog2, _b); - const float4_t result = float4_exp2(alog2b); - - return result; - } - - BX_FLOAT4_INLINE float4_t float4_dot3_ni(float4_t _a, float4_t _b) - { - const float4_t xyzw = float4_mul(_a, _b); - const float4_t xxxx = float4_swiz_xxxx(xyzw); - const float4_t yyyy = float4_swiz_yyyy(xyzw); - const float4_t zzzz = float4_swiz_zzzz(xyzw); - const float4_t tmp1 = float4_add(xxxx, yyyy); - const float4_t result = float4_add(zzzz, tmp1); - return result; - } - - BX_FLOAT4_INLINE float4_t float4_cross3_ni(float4_t _a, float4_t _b) - { - const float4_t a_yzxw = float4_swiz_yzxw(_a); - const float4_t a_zxyw = float4_swiz_zxyw(_a); - const float4_t b_zxyw = float4_swiz_zxyw(_b); - const float4_t b_yzxw = float4_swiz_yzxw(_b); - const float4_t tmp = float4_mul(a_yzxw, b_zxyw); - const float4_t result = float4_nmsub(a_zxyw, b_yzxw, tmp); - - return result; - } - - BX_FLOAT4_INLINE float4_t float4_normalize3_ni(float4_t _a) - { - const float4_t dot3 = float4_dot3(_a, _a); - const float4_t invSqrt = float4_rsqrt(dot3); - const float4_t result = float4_mul(_a, invSqrt); - - return result; - } - - BX_FLOAT4_INLINE float4_t float4_dot_ni(float4_t _a, float4_t _b) - { - const float4_t xyzw = float4_mul(_a, _b); - const float4_t yzwx = float4_swiz_yzwx(xyzw); - const float4_t tmp0 = float4_add(xyzw, yzwx); - const float4_t zwxy = float4_swiz_zwxy(tmp0); - const float4_t result = float4_add(tmp0, zwxy); - - return result; - } - - BX_FLOAT4_INLINE float4_t float4_ceil_ni(float4_t _a) - { - const float4_t tmp0 = float4_ftoi(_a); - const float4_t tmp1 = float4_itof(tmp0); - const float4_t mask = float4_cmplt(tmp1, _a); - const float4_t one = float4_splat(1.0f); - const float4_t tmp2 = float4_and(one, mask); - const float4_t result = float4_add(tmp1, tmp2); - - return result; - } - - BX_FLOAT4_INLINE float4_t float4_floor_ni(float4_t _a) - { - const float4_t tmp0 = float4_ftoi(_a); - const float4_t tmp1 = float4_itof(tmp0); - const float4_t mask = float4_cmpgt(tmp1, _a); - const float4_t one = float4_splat(1.0f); - const float4_t tmp2 = float4_and(one, mask); - const float4_t result = float4_sub(tmp1, tmp2); - - return result; - } - -} // namespace bx - -#endif // __BX_FLOAT4_NI_H__ +/* + * Copyright 2010-2012 Branimir Karadzic. All rights reserved. + * License: http://www.opensource.org/licenses/BSD-2-Clause + */ + +#ifndef __BX_FLOAT4_NI_H__ +#define __BX_FLOAT4_NI_H__ + +namespace bx +{ + BX_FLOAT4_INLINE float4_t float4_shuf_xAzC_ni(float4_t _a, float4_t _b) + { + const float4_t xAyB = float4_shuf_xAyB(_a, _b); + const float4_t zCwD = float4_shuf_zCwD(_a, _b); + const float4_t result = float4_shuf_xyAB(xAyB, zCwD); + + return result; + } + + BX_FLOAT4_INLINE float4_t float4_shuf_yBwD_ni(float4_t _a, float4_t _b) + { + const float4_t xAyB = float4_shuf_xAyB(_a, _b); + const float4_t zCwD = float4_shuf_zCwD(_a, _b); + const float4_t result = float4_shuf_zwCD(xAyB, zCwD); + + return result; + } + + BX_FLOAT4_INLINE float4_t float4_madd_ni(float4_t _a, float4_t _b, float4_t _c) + { + const float4_t mul = float4_mul(_a, _b); + const float4_t result = float4_add(mul, _c); + + return result; + } + + BX_FLOAT4_INLINE float4_t float4_nmsub_ni(float4_t _a, float4_t _b, float4_t _c) + { + const float4_t mul = float4_mul(_a, _b); + const float4_t result = float4_sub(_c, mul); + + return result; + } + + BX_FLOAT4_INLINE float4_t float4_div_nr_ni(float4_t _a, float4_t _b) + { + const float4_t oneish = float4_isplat(0x3f800001); + const float4_t est = float4_rcp_est(_b); + const float4_t iter0 = float4_mul(_a, est); + const float4_t tmp1 = float4_nmsub(_b, est, oneish); + const float4_t result = float4_madd(tmp1, iter0, iter0); + + return result; + } + + BX_FLOAT4_INLINE float4_t float4_rcp_ni(float4_t _a) + { + const float4_t one = float4_splat(1.0f); + const float4_t result = float4_div(one, _a); + + return result; + } + + BX_FLOAT4_INLINE float4_t float4_orx_ni(float4_t _a) + { + const float4_t zwxy = float4_swiz_zwxy(_a); + const float4_t tmp0 = float4_or(_a, zwxy); + const float4_t tmp1 = float4_swiz_yyyy(_a); + const float4_t tmp2 = float4_or(tmp0, tmp1); + const float4_t mf000 = float4_ild(-1, 0, 0, 0); + const float4_t result = float4_and(tmp2, mf000); + + return result; + } + + BX_FLOAT4_INLINE float4_t float4_orc_ni(float4_t _a, float4_t _b) + { + const float4_t aorb = float4_or(_a, _b); + const float4_t mffff = float4_isplat(-1); + const float4_t result = float4_xor(aorb, mffff); + + return result; + } + + BX_FLOAT4_INLINE float4_t float4_neg_ni(float4_t _a) + { + const float4_t zero = float4_zero(); + const float4_t result = float4_sub(zero, _a); + + return result; + } + + BX_FLOAT4_INLINE float4_t float4_selb_ni(float4_t _mask, float4_t _a, float4_t _b) + { + const float4_t sel_a = float4_and(_a, _mask); + const float4_t sel_b = float4_andc(_b, _mask); + const float4_t result = float4_or(sel_a, sel_b); + + return result; + } + + BX_FLOAT4_INLINE float4_t float4_sels_ni(float4_t _test, float4_t _a, float4_t _b) + { + const float4_t mask = float4_sra(_test, 31); + const float4_t result = float4_selb(mask, _a, _b); + + return result; + } + + BX_FLOAT4_INLINE float4_t float4_not_ni(float4_t _a) + { + const float4_t mffff = float4_isplat(-1); + const float4_t result = float4_xor(_a, mffff); + + return result; + } + + BX_FLOAT4_INLINE float4_t float4_abs_ni(float4_t _a) + { + const float4_t a_neg = float4_neg(_a); + const float4_t result = float4_max(a_neg, _a); + + return result; + } + + BX_FLOAT4_INLINE float4_t float4_clamp_ni(float4_t _a, float4_t _min, float4_t _max) + { + const float4_t tmp = float4_min(_a, _max); + const float4_t result = float4_max(tmp, _min); + + return result; + } + + BX_FLOAT4_INLINE float4_t float4_lerp_ni(float4_t _a, float4_t _b, float4_t _s) + { + const float4_t ba = float4_sub(_b, _a); + const float4_t result = float4_madd(_s, ba, _a); + + return result; + } + + BX_FLOAT4_INLINE float4_t float4_sqrt_nr_ni(float4_t _a) + { + const float4_t half = float4_splat(0.5f); + const float4_t one = float4_splat(1.0f); + const float4_t zero = float4_zero(); + const float4_t tmp0 = float4_rsqrt_est(_a); + const float4_t tmp1 = float4_madd(tmp0, _a, zero); + const float4_t tmp2 = float4_madd(tmp1, half, zero); + const float4_t tmp3 = float4_nmsub(tmp0, tmp1, one); + const float4_t result = float4_madd(tmp3, tmp2, tmp1); + + return result; + } + + BX_FLOAT4_INLINE float4_t float4_rsqrt_ni(float4_t _a) + { + const float4_t one = float4_splat(1.0f); + const float4_t sqrt = float4_sqrt(_a); + const float4_t result = float4_div(one, sqrt); + + return result; + } + + BX_FLOAT4_INLINE float4_t float4_rsqrt_nr_ni(float4_t _a) + { + const float4_t rsqrt = float4_rsqrt_est(_a); + const float4_t iter0 = float4_mul(_a, rsqrt); + const float4_t iter1 = float4_mul(iter0, rsqrt); + const float4_t half = float4_splat(0.5f); + const float4_t half_rsqrt = float4_mul(half, rsqrt); + const float4_t three = float4_splat(3.0f); + const float4_t three_sub_iter1 = float4_sub(three, iter1); + const float4_t result = float4_mul(half_rsqrt, three_sub_iter1); + + return result; + } + + BX_FLOAT4_INLINE float4_t float4_rsqrt_carmack_ni(float4_t _a) + { + const float4_t half = float4_splat(0.5f); + const float4_t ah = float4_mul(half, _a); + const float4_t ashift = float4_sra(_a, 1); + const float4_t magic = float4_isplat(0x5f3759df); + const float4_t msuba = float4_isub(magic, ashift); + const float4_t msubasq = float4_mul(msuba, msuba); + const float4_t tmp0 = float4_splat(1.5f); + const float4_t tmp1 = float4_mul(ah, msubasq); + const float4_t tmp2 = float4_sub(tmp0, tmp1); + const float4_t result = float4_mul(msuba, tmp2); + + return result; + } + + namespace float4_logexp_detail + { + BX_FLOAT4_INLINE float4_t float4_poly0(float4_t _a, float _b) + { + return float4_splat(_b); + } + + BX_FLOAT4_INLINE float4_t float4_poly1(float4_t _a, float _b, float _c) + { + const float4_t bbbb = float4_splat(_b); + const float4_t poly0 = float4_poly0(_a, _c); + const float4_t result = float4_madd(poly0, _a, bbbb); + + return result; + } + + BX_FLOAT4_INLINE float4_t float4_poly2(float4_t _a, float _b, float _c, float _d) + { + const float4_t bbbb = float4_splat(_b); + const float4_t poly = float4_poly1(_a, _c, _d); + const float4_t result = float4_madd(poly, _a, bbbb); + + return result; + } + + BX_FLOAT4_INLINE float4_t float4_poly3(float4_t _a, float _b, float _c, float _d, float _e) + { + const float4_t bbbb = float4_splat(_b); + const float4_t poly = float4_poly2(_a, _c, _d, _e); + const float4_t result = float4_madd(poly, _a, bbbb); + + return result; + } + + BX_FLOAT4_INLINE float4_t float4_poly4(float4_t _a, float _b, float _c, float _d, float _e, float _f) + { + const float4_t bbbb = float4_splat(_b); + const float4_t poly = float4_poly3(_a, _c, _d, _e, _f); + const float4_t result = float4_madd(poly, _a, bbbb); + + return result; + } + + BX_FLOAT4_INLINE float4_t float4_poly5(float4_t _a, float _b, float _c, float _d, float _e, float _f, float _g) + { + const float4_t bbbb = float4_splat(_b); + const float4_t poly = float4_poly4(_a, _c, _d, _e, _f, _g); + const float4_t result = float4_madd(poly, _a, bbbb); + + return result; + } + + BX_FLOAT4_INLINE float4_t float4_logpoly(float4_t _a) + { +#if 1 + const float4_t result = float4_poly5(_a + , 3.11578814719469302614f, -3.32419399085241980044f + , 2.59883907202499966007f, -1.23152682416275988241f + , 0.318212422185251071475f, -0.0344359067839062357313f + ); +#elif 0 + const float4_t result = float4_poly4(_a + , 2.8882704548164776201f, -2.52074962577807006663f + , 1.48116647521213171641f, -0.465725644288844778798f + , 0.0596515482674574969533f + ); +#elif 0 + const float4_t result = float4_poly3(_a + , 2.61761038894603480148f, -1.75647175389045657003f + , 0.688243882994381274313f, -0.107254423828329604454f + ); +#else + const float4_t result = float4_poly2(_a + , 2.28330284476918490682f, -1.04913055217340124191f + , 0.204446009836232697516f + ); +#endif + + return result; + } + + BX_FLOAT4_INLINE float4_t float4_exppoly(float4_t _a) + { +#if 1 + const float4_t result = float4_poly5(_a + , 9.9999994e-1f, 6.9315308e-1f + , 2.4015361e-1f, 5.5826318e-2f + , 8.9893397e-3f, 1.8775767e-3f + ); +#elif 0 + const float4_t result = float4_poly4(_a + , 1.0000026f, 6.9300383e-1f + , 2.4144275e-1f, 5.2011464e-2f + , 1.3534167e-2f + ); +#elif 0 + const float4_t result = float4_poly3(_a + , 9.9992520e-1f, 6.9583356e-1f + , 2.2606716e-1f, 7.8024521e-2f + ); +#else + const float4_t result = float4_poly2(_a + , 1.0017247f, 6.5763628e-1f + , 3.3718944e-1f + ); +#endif // 0 + + return result; + } + } // namespace float4_internal + + BX_FLOAT4_INLINE float4_t float4_log2_ni(float4_t _a) + { + const float4_t expmask = float4_isplat(0x7f800000); + const float4_t mantmask = float4_isplat(0x007fffff); + const float4_t one = float4_splat(1.0f); + + const float4_t c127 = float4_isplat(127); + const float4_t aexp = float4_and(_a, expmask); + const float4_t aexpsr = float4_srl(aexp, 23); + const float4_t tmp0 = float4_isub(aexpsr, c127); + const float4_t exp = float4_itof(tmp0); + + const float4_t amask = float4_and(_a, mantmask); + const float4_t mant = float4_or(amask, one); + + const float4_t poly = float4_logexp_detail::float4_logpoly(mant); + + const float4_t mandiff = float4_sub(mant, one); + const float4_t result = float4_madd(poly, mandiff, exp); + + return result; + } + + BX_FLOAT4_INLINE float4_t float4_exp2_ni(float4_t _a) + { + const float4_t min = float4_splat( 129.0f); + const float4_t max = float4_splat(-126.99999f); + const float4_t tmp0 = float4_min(_a, min); + const float4_t aaaa = float4_max(tmp0, max); + + const float4_t half = float4_splat(0.5f); + const float4_t tmp2 = float4_sub(aaaa, half); + const float4_t ipart = float4_ftoi(tmp2); + const float4_t iround = float4_itof(ipart); + const float4_t fpart = float4_sub(aaaa, iround); + + const float4_t c127 = float4_isplat(127); + const float4_t tmp5 = float4_iadd(ipart, c127); + const float4_t expipart = float4_sll(tmp5, 23); + + const float4_t expfpart = float4_logexp_detail::float4_exppoly(fpart); + + const float4_t result = float4_mul(expipart, expfpart); + + return result; + } + + BX_FLOAT4_INLINE float4_t float4_pow_ni(float4_t _a, float4_t _b) + { + const float4_t alog2 = float4_log2(_a); + const float4_t alog2b = float4_mul(alog2, _b); + const float4_t result = float4_exp2(alog2b); + + return result; + } + + BX_FLOAT4_INLINE float4_t float4_dot3_ni(float4_t _a, float4_t _b) + { + const float4_t xyzw = float4_mul(_a, _b); + const float4_t xxxx = float4_swiz_xxxx(xyzw); + const float4_t yyyy = float4_swiz_yyyy(xyzw); + const float4_t zzzz = float4_swiz_zzzz(xyzw); + const float4_t tmp1 = float4_add(xxxx, yyyy); + const float4_t result = float4_add(zzzz, tmp1); + return result; + } + + BX_FLOAT4_INLINE float4_t float4_cross3_ni(float4_t _a, float4_t _b) + { + const float4_t a_yzxw = float4_swiz_yzxw(_a); + const float4_t a_zxyw = float4_swiz_zxyw(_a); + const float4_t b_zxyw = float4_swiz_zxyw(_b); + const float4_t b_yzxw = float4_swiz_yzxw(_b); + const float4_t tmp = float4_mul(a_yzxw, b_zxyw); + const float4_t result = float4_nmsub(a_zxyw, b_yzxw, tmp); + + return result; + } + + BX_FLOAT4_INLINE float4_t float4_normalize3_ni(float4_t _a) + { + const float4_t dot3 = float4_dot3(_a, _a); + const float4_t invSqrt = float4_rsqrt(dot3); + const float4_t result = float4_mul(_a, invSqrt); + + return result; + } + + BX_FLOAT4_INLINE float4_t float4_dot_ni(float4_t _a, float4_t _b) + { + const float4_t xyzw = float4_mul(_a, _b); + const float4_t yzwx = float4_swiz_yzwx(xyzw); + const float4_t tmp0 = float4_add(xyzw, yzwx); + const float4_t zwxy = float4_swiz_zwxy(tmp0); + const float4_t result = float4_add(tmp0, zwxy); + + return result; + } + + BX_FLOAT4_INLINE float4_t float4_ceil_ni(float4_t _a) + { + const float4_t tmp0 = float4_ftoi(_a); + const float4_t tmp1 = float4_itof(tmp0); + const float4_t mask = float4_cmplt(tmp1, _a); + const float4_t one = float4_splat(1.0f); + const float4_t tmp2 = float4_and(one, mask); + const float4_t result = float4_add(tmp1, tmp2); + + return result; + } + + BX_FLOAT4_INLINE float4_t float4_floor_ni(float4_t _a) + { + const float4_t tmp0 = float4_ftoi(_a); + const float4_t tmp1 = float4_itof(tmp0); + const float4_t mask = float4_cmpgt(tmp1, _a); + const float4_t one = float4_splat(1.0f); + const float4_t tmp2 = float4_and(one, mask); + const float4_t result = float4_sub(tmp1, tmp2); + + return result; + } + +} // namespace bx + +#endif // __BX_FLOAT4_NI_H__ diff --git a/include/bx/float4_ref.h b/include/bx/float4_ref.h index e9dde1a..4a9cf04 100644 --- a/include/bx/float4_ref.h +++ b/include/bx/float4_ref.h @@ -1,529 +1,529 @@ -/* - * Copyright 2010-2012 Branimir Karadzic. All rights reserved. - * License: http://www.opensource.org/licenses/BSD-2-Clause - */ - -#ifndef __BX_FLOAT4_REF_H__ -#define __BX_FLOAT4_REF_H__ - -#include // sqrtf - -namespace bx -{ - typedef union float4_t - { - int32_t ixyzw[4]; - uint32_t uxyzw[4]; - float fxyzw[4]; - - } float4_t; - -#define ELEMx 0 -#define ELEMy 1 -#define ELEMz 2 -#define ELEMw 3 -#define IMPLEMENT_SWIZZLE(_x, _y, _z, _w) \ - BX_FLOAT4_INLINE float4_t float4_swiz_##_x##_y##_z##_w(float4_t _a) \ - { \ - float4_t result; \ - result.ixyzw[0] = _a.ixyzw[ELEM##_x]; \ - result.ixyzw[1] = _a.ixyzw[ELEM##_y]; \ - result.ixyzw[2] = _a.ixyzw[ELEM##_z]; \ - result.ixyzw[3] = _a.ixyzw[ELEM##_w]; \ - return result; \ - } - -#include "float4_swizzle.inl" - -#undef IMPLEMENT_SWIZZLE -#undef ELEMw -#undef ELEMz -#undef ELEMy -#undef ELEMx - -#define IMPLEMENT_TEST(_xyzw, _mask) \ - BX_FLOAT4_INLINE bool float4_test_any_##_xyzw(float4_t _test) \ - { \ - uint32_t tmp = ( (_test.uxyzw[3]>>31)<<3) \ - | ( (_test.uxyzw[2]>>31)<<2) \ - | ( (_test.uxyzw[1]>>31)<<1) \ - | (_test.uxyzw[0]>>31) \ - ; \ - return 0 != (tmp&(_mask) ); \ - } \ - \ - BX_FLOAT4_INLINE bool float4_test_all_##_xyzw(float4_t _test) \ - { \ - uint32_t tmp = ( (_test.uxyzw[3]>>31)<<3) \ - | ( (_test.uxyzw[2]>>31)<<2) \ - | ( (_test.uxyzw[1]>>31)<<1) \ - | (_test.uxyzw[0]>>31) \ - ; \ - return (_mask) == (tmp&(_mask) ); \ - } - -IMPLEMENT_TEST(x , 0x1); -IMPLEMENT_TEST(y , 0x2); -IMPLEMENT_TEST(xy , 0x3); -IMPLEMENT_TEST(z , 0x4); -IMPLEMENT_TEST(xz , 0x5); -IMPLEMENT_TEST(yz , 0x6); -IMPLEMENT_TEST(xyz , 0x7); -IMPLEMENT_TEST(w , 0x8); -IMPLEMENT_TEST(xw , 0x9); -IMPLEMENT_TEST(yw , 0xa); -IMPLEMENT_TEST(xyw , 0xb); -IMPLEMENT_TEST(zw , 0xc); -IMPLEMENT_TEST(xzw , 0xd); -IMPLEMENT_TEST(yzw , 0xe); -IMPLEMENT_TEST(xyzw , 0xf); - -#undef IMPLEMENT_TEST - - BX_FLOAT4_INLINE float4_t float4_shuf_xyAB(float4_t _a, float4_t _b) - { - float4_t result; - result.uxyzw[0] = _a.uxyzw[0]; - result.uxyzw[1] = _a.uxyzw[1]; - result.uxyzw[2] = _b.uxyzw[0]; - result.uxyzw[3] = _b.uxyzw[1]; - return result; - } - - BX_FLOAT4_INLINE float4_t float4_shuf_ABxy(float4_t _a, float4_t _b) - { - float4_t result; - result.uxyzw[0] = _b.uxyzw[0]; - result.uxyzw[1] = _b.uxyzw[1]; - result.uxyzw[2] = _a.uxyzw[0]; - result.uxyzw[3] = _a.uxyzw[1]; - return result; - } - - BX_FLOAT4_INLINE float4_t float4_shuf_CDzw(float4_t _a, float4_t _b) - { - float4_t result; - result.uxyzw[0] = _b.uxyzw[2]; - result.uxyzw[1] = _b.uxyzw[3]; - result.uxyzw[2] = _a.uxyzw[2]; - result.uxyzw[3] = _a.uxyzw[3]; - return result; - } - - BX_FLOAT4_INLINE float4_t float4_shuf_zwCD(float4_t _a, float4_t _b) - { - float4_t result; - result.uxyzw[0] = _a.uxyzw[2]; - result.uxyzw[1] = _a.uxyzw[3]; - result.uxyzw[2] = _b.uxyzw[2]; - result.uxyzw[3] = _b.uxyzw[3]; - return result; - } - - BX_FLOAT4_INLINE float4_t float4_shuf_xAyB(float4_t _a, float4_t _b) - { - float4_t result; - result.uxyzw[0] = _a.uxyzw[0]; - result.uxyzw[1] = _b.uxyzw[0]; - result.uxyzw[2] = _a.uxyzw[1]; - result.uxyzw[3] = _b.uxyzw[1]; - return result; - } - - BX_FLOAT4_INLINE float4_t float4_shuf_yBxA(float4_t _a, float4_t _b) - { - float4_t result; - result.uxyzw[0] = _a.uxyzw[1]; - result.uxyzw[1] = _b.uxyzw[1]; - result.uxyzw[2] = _a.uxyzw[0]; - result.uxyzw[3] = _b.uxyzw[0]; - return result; - } - - BX_FLOAT4_INLINE float4_t float4_shuf_zCwD(float4_t _a, float4_t _b) - { - float4_t result; - result.uxyzw[0] = _a.uxyzw[2]; - result.uxyzw[1] = _b.uxyzw[2]; - result.uxyzw[2] = _a.uxyzw[3]; - result.uxyzw[3] = _b.uxyzw[3]; - return result; - } - - BX_FLOAT4_INLINE float4_t float4_shuf_CzDw(float4_t _a, float4_t _b) - { - float4_t result; - result.uxyzw[0] = _b.uxyzw[2]; - result.uxyzw[1] = _a.uxyzw[2]; - result.uxyzw[2] = _b.uxyzw[3]; - result.uxyzw[3] = _a.uxyzw[3]; - return result; - } - - BX_FLOAT4_INLINE float float4_x(float4_t _a) - { - return _a.fxyzw[0]; - } - - BX_FLOAT4_INLINE float float4_y(float4_t _a) - { - return _a.fxyzw[1]; - } - - BX_FLOAT4_INLINE float float4_z(float4_t _a) - { - return _a.fxyzw[2]; - } - - BX_FLOAT4_INLINE float float4_w(float4_t _a) - { - return _a.fxyzw[3]; - } - - BX_FLOAT4_INLINE float4_t float4_ld(const void* _ptr) - { - return *reinterpret_cast(_ptr); - } - - BX_FLOAT4_INLINE void float4_st(void* _ptr, float4_t _a) - { - *reinterpret_cast(_ptr) = _a; - } - - BX_FLOAT4_INLINE void float4_stream(void* _ptr, float4_t _a) - { - *reinterpret_cast(_ptr) = _a; - } - - BX_FLOAT4_INLINE float4_t float4_ld(float _x, float _y, float _z, float _w) - { - float4_t result; - result.fxyzw[0] = _x; - result.fxyzw[1] = _y; - result.fxyzw[2] = _z; - result.fxyzw[3] = _w; - return result; - } - - BX_FLOAT4_INLINE float4_t float4_ild(uint32_t _x, uint32_t _y, uint32_t _z, uint32_t _w) - { - float4_t result; - result.uxyzw[0] = _x; - result.uxyzw[1] = _y; - result.uxyzw[2] = _z; - result.uxyzw[3] = _w; - return result; - } - - BX_FLOAT4_INLINE float4_t float4_splat(const void* _ptr) - { - float val = *reinterpret_cast(_ptr); - return float4_ld(val, val, val, val); - } - - BX_FLOAT4_INLINE float4_t float4_splat(float _a) - { - return float4_ld(_a, _a, _a, _a); - } - - BX_FLOAT4_INLINE float4_t float4_isplat(uint32_t _a) - { - return float4_ild(_a, _a, _a, _a); - } - - BX_FLOAT4_INLINE float4_t float4_zero() - { - return float4_ild(0, 0, 0, 0); - } - - BX_FLOAT4_INLINE float4_t float4_itof(float4_t _a) - { - float4_t result; - result.fxyzw[0] = (float)result.ixyzw[0]; - result.fxyzw[1] = (float)result.ixyzw[1]; - result.fxyzw[2] = (float)result.ixyzw[2]; - result.fxyzw[3] = (float)result.ixyzw[3]; - return result; - } - - BX_FLOAT4_INLINE float4_t float4_ftoi(float4_t _a) - { - float4_t result; - result.ixyzw[0] = (int)result.fxyzw[0]; - result.ixyzw[1] = (int)result.fxyzw[1]; - result.ixyzw[2] = (int)result.fxyzw[2]; - result.ixyzw[3] = (int)result.fxyzw[3]; - return result; - } - - BX_FLOAT4_INLINE float4_t float4_round(float4_t _a) - { - const float4_t tmp = float4_ftoi(_a); - const float4_t result = float4_itof(tmp); - - return result; - } - - BX_FLOAT4_INLINE float4_t float4_add(float4_t _a, float4_t _b) - { - float4_t result; - result.fxyzw[0] = _a.fxyzw[0] + _b.fxyzw[0]; - result.fxyzw[1] = _a.fxyzw[1] + _b.fxyzw[1]; - result.fxyzw[2] = _a.fxyzw[2] + _b.fxyzw[2]; - result.fxyzw[3] = _a.fxyzw[3] + _b.fxyzw[3]; - return result; - } - - BX_FLOAT4_INLINE float4_t float4_sub(float4_t _a, float4_t _b) - { - float4_t result; - result.fxyzw[0] = _a.fxyzw[0] - _b.fxyzw[0]; - result.fxyzw[1] = _a.fxyzw[1] - _b.fxyzw[1]; - result.fxyzw[2] = _a.fxyzw[2] - _b.fxyzw[2]; - result.fxyzw[3] = _a.fxyzw[3] - _b.fxyzw[3]; - return result; - } - - BX_FLOAT4_INLINE float4_t float4_mul(float4_t _a, float4_t _b) - { - float4_t result; - result.fxyzw[0] = _a.fxyzw[0] * _b.fxyzw[0]; - result.fxyzw[1] = _a.fxyzw[1] * _b.fxyzw[1]; - result.fxyzw[2] = _a.fxyzw[2] * _b.fxyzw[2]; - result.fxyzw[3] = _a.fxyzw[3] * _b.fxyzw[3]; - return result; - } - - BX_FLOAT4_INLINE float4_t float4_div(float4_t _a, float4_t _b) - { - float4_t result; - result.fxyzw[0] = _a.fxyzw[0] * _b.fxyzw[0]; - result.fxyzw[1] = _a.fxyzw[1] * _b.fxyzw[1]; - result.fxyzw[2] = _a.fxyzw[2] * _b.fxyzw[2]; - result.fxyzw[3] = _a.fxyzw[3] * _b.fxyzw[3]; - return result; - } - - BX_FLOAT4_INLINE float4_t float4_rcp_est(float4_t _a) - { - float4_t result; - result.fxyzw[0] = 1.0f / _a.fxyzw[0]; - result.fxyzw[1] = 1.0f / _a.fxyzw[1]; - result.fxyzw[2] = 1.0f / _a.fxyzw[2]; - result.fxyzw[3] = 1.0f / _a.fxyzw[3]; - return result; - } - - BX_FLOAT4_INLINE float4_t float4_sqrt(float4_t _a) - { - float4_t result; - result.fxyzw[0] = sqrtf(_a.fxyzw[0]); - result.fxyzw[1] = sqrtf(_a.fxyzw[1]); - result.fxyzw[2] = sqrtf(_a.fxyzw[2]); - result.fxyzw[3] = sqrtf(_a.fxyzw[3]); - return result; - } - - BX_FLOAT4_INLINE float4_t float4_rsqrt_est(float4_t _a) - { - float4_t result; - result.fxyzw[0] = 1.0f / sqrtf(_a.fxyzw[0]); - result.fxyzw[1] = 1.0f / sqrtf(_a.fxyzw[1]); - result.fxyzw[2] = 1.0f / sqrtf(_a.fxyzw[2]); - result.fxyzw[3] = 1.0f / sqrtf(_a.fxyzw[3]); - return result; - } - - BX_FLOAT4_INLINE float4_t float4_cmpeq(float4_t _a, float4_t _b) - { - float4_t result; - result.ixyzw[0] = _a.fxyzw[0] == _b.fxyzw[0] ? 0xffffffff : 0x0; - result.ixyzw[1] = _a.fxyzw[1] == _b.fxyzw[1] ? 0xffffffff : 0x0; - result.ixyzw[2] = _a.fxyzw[2] == _b.fxyzw[2] ? 0xffffffff : 0x0; - result.ixyzw[3] = _a.fxyzw[3] == _b.fxyzw[3] ? 0xffffffff : 0x0; - return result; - } - - BX_FLOAT4_INLINE float4_t float4_cmplt(float4_t _a, float4_t _b) - { - float4_t result; - result.ixyzw[0] = _a.fxyzw[0] < _b.fxyzw[0] ? 0xffffffff : 0x0; - result.ixyzw[1] = _a.fxyzw[1] < _b.fxyzw[1] ? 0xffffffff : 0x0; - result.ixyzw[2] = _a.fxyzw[2] < _b.fxyzw[2] ? 0xffffffff : 0x0; - result.ixyzw[3] = _a.fxyzw[3] < _b.fxyzw[3] ? 0xffffffff : 0x0; - return result; - } - - BX_FLOAT4_INLINE float4_t float4_cmple(float4_t _a, float4_t _b) - { - float4_t result; - result.ixyzw[0] = _a.fxyzw[0] <= _b.fxyzw[0] ? 0xffffffff : 0x0; - result.ixyzw[1] = _a.fxyzw[1] <= _b.fxyzw[1] ? 0xffffffff : 0x0; - result.ixyzw[2] = _a.fxyzw[2] <= _b.fxyzw[2] ? 0xffffffff : 0x0; - result.ixyzw[3] = _a.fxyzw[3] <= _b.fxyzw[3] ? 0xffffffff : 0x0; - return result; - } - - BX_FLOAT4_INLINE float4_t float4_cmpgt(float4_t _a, float4_t _b) - { - float4_t result; - result.ixyzw[0] = _a.fxyzw[0] > _b.fxyzw[0] ? 0xffffffff : 0x0; - result.ixyzw[1] = _a.fxyzw[1] > _b.fxyzw[1] ? 0xffffffff : 0x0; - result.ixyzw[2] = _a.fxyzw[2] > _b.fxyzw[2] ? 0xffffffff : 0x0; - result.ixyzw[3] = _a.fxyzw[3] > _b.fxyzw[3] ? 0xffffffff : 0x0; - return result; - } - - BX_FLOAT4_INLINE float4_t float4_cmpge(float4_t _a, float4_t _b) - { - float4_t result; - result.ixyzw[0] = _a.fxyzw[0] >= _b.fxyzw[0] ? 0xffffffff : 0x0; - result.ixyzw[1] = _a.fxyzw[1] >= _b.fxyzw[1] ? 0xffffffff : 0x0; - result.ixyzw[2] = _a.fxyzw[2] >= _b.fxyzw[2] ? 0xffffffff : 0x0; - result.ixyzw[3] = _a.fxyzw[3] >= _b.fxyzw[3] ? 0xffffffff : 0x0; - return result; - } - - BX_FLOAT4_INLINE float4_t float4_min(float4_t _a, float4_t _b) - { - float4_t result; - result.fxyzw[0] = _a.fxyzw[0] < _b.fxyzw[0] ? _a.fxyzw[0] : _b.fxyzw[0]; - result.fxyzw[1] = _a.fxyzw[1] < _b.fxyzw[1] ? _a.fxyzw[1] : _b.fxyzw[1]; - result.fxyzw[2] = _a.fxyzw[2] < _b.fxyzw[2] ? _a.fxyzw[2] : _b.fxyzw[2]; - result.fxyzw[3] = _a.fxyzw[3] < _b.fxyzw[3] ? _a.fxyzw[3] : _b.fxyzw[3]; - return result; - } - - BX_FLOAT4_INLINE float4_t float4_max(float4_t _a, float4_t _b) - { - float4_t result; - result.fxyzw[0] = _a.fxyzw[0] > _b.fxyzw[0] ? _a.fxyzw[0] : _b.fxyzw[0]; - result.fxyzw[1] = _a.fxyzw[1] > _b.fxyzw[1] ? _a.fxyzw[1] : _b.fxyzw[1]; - result.fxyzw[2] = _a.fxyzw[2] > _b.fxyzw[2] ? _a.fxyzw[2] : _b.fxyzw[2]; - result.fxyzw[3] = _a.fxyzw[3] > _b.fxyzw[3] ? _a.fxyzw[3] : _b.fxyzw[3]; - return result; - } - - BX_FLOAT4_INLINE float4_t float4_and(float4_t _a, float4_t _b) - { - float4_t result; - result.uxyzw[0] = _a.uxyzw[0] & _b.uxyzw[0]; - result.uxyzw[1] = _a.uxyzw[1] & _b.uxyzw[1]; - result.uxyzw[2] = _a.uxyzw[2] & _b.uxyzw[2]; - result.uxyzw[3] = _a.uxyzw[3] & _b.uxyzw[3]; - return result; - } - - BX_FLOAT4_INLINE float4_t float4_andc(float4_t _a, float4_t _b) - { - float4_t result; - result.uxyzw[0] = _a.uxyzw[0] & ~_b.uxyzw[0]; - result.uxyzw[1] = _a.uxyzw[1] & ~_b.uxyzw[1]; - result.uxyzw[2] = _a.uxyzw[2] & ~_b.uxyzw[2]; - result.uxyzw[3] = _a.uxyzw[3] & ~_b.uxyzw[3]; - return result; - } - - BX_FLOAT4_INLINE float4_t float4_or(float4_t _a, float4_t _b) - { - float4_t result; - result.uxyzw[0] = _a.uxyzw[0] | _b.uxyzw[0]; - result.uxyzw[1] = _a.uxyzw[1] | _b.uxyzw[1]; - result.uxyzw[2] = _a.uxyzw[2] | _b.uxyzw[2]; - result.uxyzw[3] = _a.uxyzw[3] | _b.uxyzw[3]; - return result; - } - - BX_FLOAT4_INLINE float4_t float4_xor(float4_t _a, float4_t _b) - { - float4_t result; - result.uxyzw[0] = _a.uxyzw[0] ^ _b.uxyzw[0]; - result.uxyzw[1] = _a.uxyzw[1] ^ _b.uxyzw[1]; - result.uxyzw[2] = _a.uxyzw[2] ^ _b.uxyzw[2]; - result.uxyzw[3] = _a.uxyzw[3] ^ _b.uxyzw[3]; - return result; - } - - BX_FLOAT4_INLINE float4_t float4_sll(float4_t _a, int _count) - { - float4_t result; - result.uxyzw[0] = _a.uxyzw[0] << _count; - result.uxyzw[1] = _a.uxyzw[1] << _count; - result.uxyzw[2] = _a.uxyzw[2] << _count; - result.uxyzw[3] = _a.uxyzw[3] << _count; - return result; - } - - BX_FLOAT4_INLINE float4_t float4_srl(float4_t _a, int _count) - { - float4_t result; - result.uxyzw[0] = _a.uxyzw[0] >> _count; - result.uxyzw[1] = _a.uxyzw[1] >> _count; - result.uxyzw[2] = _a.uxyzw[2] >> _count; - result.uxyzw[3] = _a.uxyzw[3] >> _count; - return result; - } - - BX_FLOAT4_INLINE float4_t float4_sra(float4_t _a, int _count) - { - float4_t result; - result.ixyzw[0] = _a.ixyzw[0] >> _count; - result.ixyzw[1] = _a.ixyzw[1] >> _count; - result.ixyzw[2] = _a.ixyzw[2] >> _count; - result.ixyzw[3] = _a.ixyzw[3] >> _count; - return result; - } - - BX_FLOAT4_INLINE float4_t float4_iadd(float4_t _a, float4_t _b) - { - float4_t result; - result.ixyzw[0] = _a.ixyzw[0] + _b.ixyzw[0]; - result.ixyzw[1] = _a.ixyzw[1] + _b.ixyzw[1]; - result.ixyzw[2] = _a.ixyzw[2] + _b.ixyzw[2]; - result.ixyzw[3] = _a.ixyzw[3] + _b.ixyzw[3]; - return result; - } - - BX_FLOAT4_INLINE float4_t float4_isub(float4_t _a, float4_t _b) - { - float4_t result; - result.ixyzw[0] = _a.ixyzw[0] - _b.ixyzw[0]; - result.ixyzw[1] = _a.ixyzw[1] - _b.ixyzw[1]; - result.ixyzw[2] = _a.ixyzw[2] - _b.ixyzw[2]; - result.ixyzw[3] = _a.ixyzw[3] - _b.ixyzw[3]; - return result; - } - -} // namespace bx - -#define float4_shuf_xAzC float4_shuf_xAzC_ni -#define float4_shuf_yBwD float4_shuf_yBwD_ni -#define float4_rcp float4_rcp_ni -#define float4_orx float4_orx_ni -#define float4_orc float4_orc_ni -#define float4_neg float4_neg_ni -#define float4_madd float4_madd_ni -#define float4_nmsub float4_nmsub_ni -#define float4_div_nr float4_div_nr_ni -#define float4_selb float4_selb_ni -#define float4_sels float4_sels_ni -#define float4_not float4_not_ni -#define float4_abs float4_abs_ni -#define float4_clamp float4_clamp_ni -#define float4_lerp float4_lerp_ni -#define float4_rsqrt float4_rsqrt_ni -#define float4_rsqrt_nr float4_rsqrt_nr_ni -#define float4_rsqrt_carmack float4_rsqrt_carmack_ni -#define float4_sqrt_nr float4_sqrt_nr_ni -#define float4_log2 float4_log2_ni -#define float4_exp2 float4_exp2_ni -#define float4_pow float4_pow_ni -#define float4_cross3 float4_cross3_ni -#define float4_normalize3 float4_normalize3_ni -#define float4_dot3 float4_dot3_ni -#define float4_dot float4_dot_ni -#define float4_ceil float4_ceil_ni -#define float4_floor float4_floor_ni -#include "float4_ni.h" - -#endif // __BX_FLOAT4_REF_H__ +/* + * Copyright 2010-2012 Branimir Karadzic. All rights reserved. + * License: http://www.opensource.org/licenses/BSD-2-Clause + */ + +#ifndef __BX_FLOAT4_REF_H__ +#define __BX_FLOAT4_REF_H__ + +#include // sqrtf + +namespace bx +{ + typedef union float4_t + { + int32_t ixyzw[4]; + uint32_t uxyzw[4]; + float fxyzw[4]; + + } float4_t; + +#define ELEMx 0 +#define ELEMy 1 +#define ELEMz 2 +#define ELEMw 3 +#define IMPLEMENT_SWIZZLE(_x, _y, _z, _w) \ + BX_FLOAT4_INLINE float4_t float4_swiz_##_x##_y##_z##_w(float4_t _a) \ + { \ + float4_t result; \ + result.ixyzw[0] = _a.ixyzw[ELEM##_x]; \ + result.ixyzw[1] = _a.ixyzw[ELEM##_y]; \ + result.ixyzw[2] = _a.ixyzw[ELEM##_z]; \ + result.ixyzw[3] = _a.ixyzw[ELEM##_w]; \ + return result; \ + } + +#include "float4_swizzle.inl" + +#undef IMPLEMENT_SWIZZLE +#undef ELEMw +#undef ELEMz +#undef ELEMy +#undef ELEMx + +#define IMPLEMENT_TEST(_xyzw, _mask) \ + BX_FLOAT4_INLINE bool float4_test_any_##_xyzw(float4_t _test) \ + { \ + uint32_t tmp = ( (_test.uxyzw[3]>>31)<<3) \ + | ( (_test.uxyzw[2]>>31)<<2) \ + | ( (_test.uxyzw[1]>>31)<<1) \ + | (_test.uxyzw[0]>>31) \ + ; \ + return 0 != (tmp&(_mask) ); \ + } \ + \ + BX_FLOAT4_INLINE bool float4_test_all_##_xyzw(float4_t _test) \ + { \ + uint32_t tmp = ( (_test.uxyzw[3]>>31)<<3) \ + | ( (_test.uxyzw[2]>>31)<<2) \ + | ( (_test.uxyzw[1]>>31)<<1) \ + | (_test.uxyzw[0]>>31) \ + ; \ + return (_mask) == (tmp&(_mask) ); \ + } + +IMPLEMENT_TEST(x , 0x1); +IMPLEMENT_TEST(y , 0x2); +IMPLEMENT_TEST(xy , 0x3); +IMPLEMENT_TEST(z , 0x4); +IMPLEMENT_TEST(xz , 0x5); +IMPLEMENT_TEST(yz , 0x6); +IMPLEMENT_TEST(xyz , 0x7); +IMPLEMENT_TEST(w , 0x8); +IMPLEMENT_TEST(xw , 0x9); +IMPLEMENT_TEST(yw , 0xa); +IMPLEMENT_TEST(xyw , 0xb); +IMPLEMENT_TEST(zw , 0xc); +IMPLEMENT_TEST(xzw , 0xd); +IMPLEMENT_TEST(yzw , 0xe); +IMPLEMENT_TEST(xyzw , 0xf); + +#undef IMPLEMENT_TEST + + BX_FLOAT4_INLINE float4_t float4_shuf_xyAB(float4_t _a, float4_t _b) + { + float4_t result; + result.uxyzw[0] = _a.uxyzw[0]; + result.uxyzw[1] = _a.uxyzw[1]; + result.uxyzw[2] = _b.uxyzw[0]; + result.uxyzw[3] = _b.uxyzw[1]; + return result; + } + + BX_FLOAT4_INLINE float4_t float4_shuf_ABxy(float4_t _a, float4_t _b) + { + float4_t result; + result.uxyzw[0] = _b.uxyzw[0]; + result.uxyzw[1] = _b.uxyzw[1]; + result.uxyzw[2] = _a.uxyzw[0]; + result.uxyzw[3] = _a.uxyzw[1]; + return result; + } + + BX_FLOAT4_INLINE float4_t float4_shuf_CDzw(float4_t _a, float4_t _b) + { + float4_t result; + result.uxyzw[0] = _b.uxyzw[2]; + result.uxyzw[1] = _b.uxyzw[3]; + result.uxyzw[2] = _a.uxyzw[2]; + result.uxyzw[3] = _a.uxyzw[3]; + return result; + } + + BX_FLOAT4_INLINE float4_t float4_shuf_zwCD(float4_t _a, float4_t _b) + { + float4_t result; + result.uxyzw[0] = _a.uxyzw[2]; + result.uxyzw[1] = _a.uxyzw[3]; + result.uxyzw[2] = _b.uxyzw[2]; + result.uxyzw[3] = _b.uxyzw[3]; + return result; + } + + BX_FLOAT4_INLINE float4_t float4_shuf_xAyB(float4_t _a, float4_t _b) + { + float4_t result; + result.uxyzw[0] = _a.uxyzw[0]; + result.uxyzw[1] = _b.uxyzw[0]; + result.uxyzw[2] = _a.uxyzw[1]; + result.uxyzw[3] = _b.uxyzw[1]; + return result; + } + + BX_FLOAT4_INLINE float4_t float4_shuf_yBxA(float4_t _a, float4_t _b) + { + float4_t result; + result.uxyzw[0] = _a.uxyzw[1]; + result.uxyzw[1] = _b.uxyzw[1]; + result.uxyzw[2] = _a.uxyzw[0]; + result.uxyzw[3] = _b.uxyzw[0]; + return result; + } + + BX_FLOAT4_INLINE float4_t float4_shuf_zCwD(float4_t _a, float4_t _b) + { + float4_t result; + result.uxyzw[0] = _a.uxyzw[2]; + result.uxyzw[1] = _b.uxyzw[2]; + result.uxyzw[2] = _a.uxyzw[3]; + result.uxyzw[3] = _b.uxyzw[3]; + return result; + } + + BX_FLOAT4_INLINE float4_t float4_shuf_CzDw(float4_t _a, float4_t _b) + { + float4_t result; + result.uxyzw[0] = _b.uxyzw[2]; + result.uxyzw[1] = _a.uxyzw[2]; + result.uxyzw[2] = _b.uxyzw[3]; + result.uxyzw[3] = _a.uxyzw[3]; + return result; + } + + BX_FLOAT4_INLINE float float4_x(float4_t _a) + { + return _a.fxyzw[0]; + } + + BX_FLOAT4_INLINE float float4_y(float4_t _a) + { + return _a.fxyzw[1]; + } + + BX_FLOAT4_INLINE float float4_z(float4_t _a) + { + return _a.fxyzw[2]; + } + + BX_FLOAT4_INLINE float float4_w(float4_t _a) + { + return _a.fxyzw[3]; + } + + BX_FLOAT4_INLINE float4_t float4_ld(const void* _ptr) + { + return *reinterpret_cast(_ptr); + } + + BX_FLOAT4_INLINE void float4_st(void* _ptr, float4_t _a) + { + *reinterpret_cast(_ptr) = _a; + } + + BX_FLOAT4_INLINE void float4_stream(void* _ptr, float4_t _a) + { + *reinterpret_cast(_ptr) = _a; + } + + BX_FLOAT4_INLINE float4_t float4_ld(float _x, float _y, float _z, float _w) + { + float4_t result; + result.fxyzw[0] = _x; + result.fxyzw[1] = _y; + result.fxyzw[2] = _z; + result.fxyzw[3] = _w; + return result; + } + + BX_FLOAT4_INLINE float4_t float4_ild(uint32_t _x, uint32_t _y, uint32_t _z, uint32_t _w) + { + float4_t result; + result.uxyzw[0] = _x; + result.uxyzw[1] = _y; + result.uxyzw[2] = _z; + result.uxyzw[3] = _w; + return result; + } + + BX_FLOAT4_INLINE float4_t float4_splat(const void* _ptr) + { + float val = *reinterpret_cast(_ptr); + return float4_ld(val, val, val, val); + } + + BX_FLOAT4_INLINE float4_t float4_splat(float _a) + { + return float4_ld(_a, _a, _a, _a); + } + + BX_FLOAT4_INLINE float4_t float4_isplat(uint32_t _a) + { + return float4_ild(_a, _a, _a, _a); + } + + BX_FLOAT4_INLINE float4_t float4_zero() + { + return float4_ild(0, 0, 0, 0); + } + + BX_FLOAT4_INLINE float4_t float4_itof(float4_t _a) + { + float4_t result; + result.fxyzw[0] = (float)result.ixyzw[0]; + result.fxyzw[1] = (float)result.ixyzw[1]; + result.fxyzw[2] = (float)result.ixyzw[2]; + result.fxyzw[3] = (float)result.ixyzw[3]; + return result; + } + + BX_FLOAT4_INLINE float4_t float4_ftoi(float4_t _a) + { + float4_t result; + result.ixyzw[0] = (int)result.fxyzw[0]; + result.ixyzw[1] = (int)result.fxyzw[1]; + result.ixyzw[2] = (int)result.fxyzw[2]; + result.ixyzw[3] = (int)result.fxyzw[3]; + return result; + } + + BX_FLOAT4_INLINE float4_t float4_round(float4_t _a) + { + const float4_t tmp = float4_ftoi(_a); + const float4_t result = float4_itof(tmp); + + return result; + } + + BX_FLOAT4_INLINE float4_t float4_add(float4_t _a, float4_t _b) + { + float4_t result; + result.fxyzw[0] = _a.fxyzw[0] + _b.fxyzw[0]; + result.fxyzw[1] = _a.fxyzw[1] + _b.fxyzw[1]; + result.fxyzw[2] = _a.fxyzw[2] + _b.fxyzw[2]; + result.fxyzw[3] = _a.fxyzw[3] + _b.fxyzw[3]; + return result; + } + + BX_FLOAT4_INLINE float4_t float4_sub(float4_t _a, float4_t _b) + { + float4_t result; + result.fxyzw[0] = _a.fxyzw[0] - _b.fxyzw[0]; + result.fxyzw[1] = _a.fxyzw[1] - _b.fxyzw[1]; + result.fxyzw[2] = _a.fxyzw[2] - _b.fxyzw[2]; + result.fxyzw[3] = _a.fxyzw[3] - _b.fxyzw[3]; + return result; + } + + BX_FLOAT4_INLINE float4_t float4_mul(float4_t _a, float4_t _b) + { + float4_t result; + result.fxyzw[0] = _a.fxyzw[0] * _b.fxyzw[0]; + result.fxyzw[1] = _a.fxyzw[1] * _b.fxyzw[1]; + result.fxyzw[2] = _a.fxyzw[2] * _b.fxyzw[2]; + result.fxyzw[3] = _a.fxyzw[3] * _b.fxyzw[3]; + return result; + } + + BX_FLOAT4_INLINE float4_t float4_div(float4_t _a, float4_t _b) + { + float4_t result; + result.fxyzw[0] = _a.fxyzw[0] * _b.fxyzw[0]; + result.fxyzw[1] = _a.fxyzw[1] * _b.fxyzw[1]; + result.fxyzw[2] = _a.fxyzw[2] * _b.fxyzw[2]; + result.fxyzw[3] = _a.fxyzw[3] * _b.fxyzw[3]; + return result; + } + + BX_FLOAT4_INLINE float4_t float4_rcp_est(float4_t _a) + { + float4_t result; + result.fxyzw[0] = 1.0f / _a.fxyzw[0]; + result.fxyzw[1] = 1.0f / _a.fxyzw[1]; + result.fxyzw[2] = 1.0f / _a.fxyzw[2]; + result.fxyzw[3] = 1.0f / _a.fxyzw[3]; + return result; + } + + BX_FLOAT4_INLINE float4_t float4_sqrt(float4_t _a) + { + float4_t result; + result.fxyzw[0] = sqrtf(_a.fxyzw[0]); + result.fxyzw[1] = sqrtf(_a.fxyzw[1]); + result.fxyzw[2] = sqrtf(_a.fxyzw[2]); + result.fxyzw[3] = sqrtf(_a.fxyzw[3]); + return result; + } + + BX_FLOAT4_INLINE float4_t float4_rsqrt_est(float4_t _a) + { + float4_t result; + result.fxyzw[0] = 1.0f / sqrtf(_a.fxyzw[0]); + result.fxyzw[1] = 1.0f / sqrtf(_a.fxyzw[1]); + result.fxyzw[2] = 1.0f / sqrtf(_a.fxyzw[2]); + result.fxyzw[3] = 1.0f / sqrtf(_a.fxyzw[3]); + return result; + } + + BX_FLOAT4_INLINE float4_t float4_cmpeq(float4_t _a, float4_t _b) + { + float4_t result; + result.ixyzw[0] = _a.fxyzw[0] == _b.fxyzw[0] ? 0xffffffff : 0x0; + result.ixyzw[1] = _a.fxyzw[1] == _b.fxyzw[1] ? 0xffffffff : 0x0; + result.ixyzw[2] = _a.fxyzw[2] == _b.fxyzw[2] ? 0xffffffff : 0x0; + result.ixyzw[3] = _a.fxyzw[3] == _b.fxyzw[3] ? 0xffffffff : 0x0; + return result; + } + + BX_FLOAT4_INLINE float4_t float4_cmplt(float4_t _a, float4_t _b) + { + float4_t result; + result.ixyzw[0] = _a.fxyzw[0] < _b.fxyzw[0] ? 0xffffffff : 0x0; + result.ixyzw[1] = _a.fxyzw[1] < _b.fxyzw[1] ? 0xffffffff : 0x0; + result.ixyzw[2] = _a.fxyzw[2] < _b.fxyzw[2] ? 0xffffffff : 0x0; + result.ixyzw[3] = _a.fxyzw[3] < _b.fxyzw[3] ? 0xffffffff : 0x0; + return result; + } + + BX_FLOAT4_INLINE float4_t float4_cmple(float4_t _a, float4_t _b) + { + float4_t result; + result.ixyzw[0] = _a.fxyzw[0] <= _b.fxyzw[0] ? 0xffffffff : 0x0; + result.ixyzw[1] = _a.fxyzw[1] <= _b.fxyzw[1] ? 0xffffffff : 0x0; + result.ixyzw[2] = _a.fxyzw[2] <= _b.fxyzw[2] ? 0xffffffff : 0x0; + result.ixyzw[3] = _a.fxyzw[3] <= _b.fxyzw[3] ? 0xffffffff : 0x0; + return result; + } + + BX_FLOAT4_INLINE float4_t float4_cmpgt(float4_t _a, float4_t _b) + { + float4_t result; + result.ixyzw[0] = _a.fxyzw[0] > _b.fxyzw[0] ? 0xffffffff : 0x0; + result.ixyzw[1] = _a.fxyzw[1] > _b.fxyzw[1] ? 0xffffffff : 0x0; + result.ixyzw[2] = _a.fxyzw[2] > _b.fxyzw[2] ? 0xffffffff : 0x0; + result.ixyzw[3] = _a.fxyzw[3] > _b.fxyzw[3] ? 0xffffffff : 0x0; + return result; + } + + BX_FLOAT4_INLINE float4_t float4_cmpge(float4_t _a, float4_t _b) + { + float4_t result; + result.ixyzw[0] = _a.fxyzw[0] >= _b.fxyzw[0] ? 0xffffffff : 0x0; + result.ixyzw[1] = _a.fxyzw[1] >= _b.fxyzw[1] ? 0xffffffff : 0x0; + result.ixyzw[2] = _a.fxyzw[2] >= _b.fxyzw[2] ? 0xffffffff : 0x0; + result.ixyzw[3] = _a.fxyzw[3] >= _b.fxyzw[3] ? 0xffffffff : 0x0; + return result; + } + + BX_FLOAT4_INLINE float4_t float4_min(float4_t _a, float4_t _b) + { + float4_t result; + result.fxyzw[0] = _a.fxyzw[0] < _b.fxyzw[0] ? _a.fxyzw[0] : _b.fxyzw[0]; + result.fxyzw[1] = _a.fxyzw[1] < _b.fxyzw[1] ? _a.fxyzw[1] : _b.fxyzw[1]; + result.fxyzw[2] = _a.fxyzw[2] < _b.fxyzw[2] ? _a.fxyzw[2] : _b.fxyzw[2]; + result.fxyzw[3] = _a.fxyzw[3] < _b.fxyzw[3] ? _a.fxyzw[3] : _b.fxyzw[3]; + return result; + } + + BX_FLOAT4_INLINE float4_t float4_max(float4_t _a, float4_t _b) + { + float4_t result; + result.fxyzw[0] = _a.fxyzw[0] > _b.fxyzw[0] ? _a.fxyzw[0] : _b.fxyzw[0]; + result.fxyzw[1] = _a.fxyzw[1] > _b.fxyzw[1] ? _a.fxyzw[1] : _b.fxyzw[1]; + result.fxyzw[2] = _a.fxyzw[2] > _b.fxyzw[2] ? _a.fxyzw[2] : _b.fxyzw[2]; + result.fxyzw[3] = _a.fxyzw[3] > _b.fxyzw[3] ? _a.fxyzw[3] : _b.fxyzw[3]; + return result; + } + + BX_FLOAT4_INLINE float4_t float4_and(float4_t _a, float4_t _b) + { + float4_t result; + result.uxyzw[0] = _a.uxyzw[0] & _b.uxyzw[0]; + result.uxyzw[1] = _a.uxyzw[1] & _b.uxyzw[1]; + result.uxyzw[2] = _a.uxyzw[2] & _b.uxyzw[2]; + result.uxyzw[3] = _a.uxyzw[3] & _b.uxyzw[3]; + return result; + } + + BX_FLOAT4_INLINE float4_t float4_andc(float4_t _a, float4_t _b) + { + float4_t result; + result.uxyzw[0] = _a.uxyzw[0] & ~_b.uxyzw[0]; + result.uxyzw[1] = _a.uxyzw[1] & ~_b.uxyzw[1]; + result.uxyzw[2] = _a.uxyzw[2] & ~_b.uxyzw[2]; + result.uxyzw[3] = _a.uxyzw[3] & ~_b.uxyzw[3]; + return result; + } + + BX_FLOAT4_INLINE float4_t float4_or(float4_t _a, float4_t _b) + { + float4_t result; + result.uxyzw[0] = _a.uxyzw[0] | _b.uxyzw[0]; + result.uxyzw[1] = _a.uxyzw[1] | _b.uxyzw[1]; + result.uxyzw[2] = _a.uxyzw[2] | _b.uxyzw[2]; + result.uxyzw[3] = _a.uxyzw[3] | _b.uxyzw[3]; + return result; + } + + BX_FLOAT4_INLINE float4_t float4_xor(float4_t _a, float4_t _b) + { + float4_t result; + result.uxyzw[0] = _a.uxyzw[0] ^ _b.uxyzw[0]; + result.uxyzw[1] = _a.uxyzw[1] ^ _b.uxyzw[1]; + result.uxyzw[2] = _a.uxyzw[2] ^ _b.uxyzw[2]; + result.uxyzw[3] = _a.uxyzw[3] ^ _b.uxyzw[3]; + return result; + } + + BX_FLOAT4_INLINE float4_t float4_sll(float4_t _a, int _count) + { + float4_t result; + result.uxyzw[0] = _a.uxyzw[0] << _count; + result.uxyzw[1] = _a.uxyzw[1] << _count; + result.uxyzw[2] = _a.uxyzw[2] << _count; + result.uxyzw[3] = _a.uxyzw[3] << _count; + return result; + } + + BX_FLOAT4_INLINE float4_t float4_srl(float4_t _a, int _count) + { + float4_t result; + result.uxyzw[0] = _a.uxyzw[0] >> _count; + result.uxyzw[1] = _a.uxyzw[1] >> _count; + result.uxyzw[2] = _a.uxyzw[2] >> _count; + result.uxyzw[3] = _a.uxyzw[3] >> _count; + return result; + } + + BX_FLOAT4_INLINE float4_t float4_sra(float4_t _a, int _count) + { + float4_t result; + result.ixyzw[0] = _a.ixyzw[0] >> _count; + result.ixyzw[1] = _a.ixyzw[1] >> _count; + result.ixyzw[2] = _a.ixyzw[2] >> _count; + result.ixyzw[3] = _a.ixyzw[3] >> _count; + return result; + } + + BX_FLOAT4_INLINE float4_t float4_iadd(float4_t _a, float4_t _b) + { + float4_t result; + result.ixyzw[0] = _a.ixyzw[0] + _b.ixyzw[0]; + result.ixyzw[1] = _a.ixyzw[1] + _b.ixyzw[1]; + result.ixyzw[2] = _a.ixyzw[2] + _b.ixyzw[2]; + result.ixyzw[3] = _a.ixyzw[3] + _b.ixyzw[3]; + return result; + } + + BX_FLOAT4_INLINE float4_t float4_isub(float4_t _a, float4_t _b) + { + float4_t result; + result.ixyzw[0] = _a.ixyzw[0] - _b.ixyzw[0]; + result.ixyzw[1] = _a.ixyzw[1] - _b.ixyzw[1]; + result.ixyzw[2] = _a.ixyzw[2] - _b.ixyzw[2]; + result.ixyzw[3] = _a.ixyzw[3] - _b.ixyzw[3]; + return result; + } + +} // namespace bx + +#define float4_shuf_xAzC float4_shuf_xAzC_ni +#define float4_shuf_yBwD float4_shuf_yBwD_ni +#define float4_rcp float4_rcp_ni +#define float4_orx float4_orx_ni +#define float4_orc float4_orc_ni +#define float4_neg float4_neg_ni +#define float4_madd float4_madd_ni +#define float4_nmsub float4_nmsub_ni +#define float4_div_nr float4_div_nr_ni +#define float4_selb float4_selb_ni +#define float4_sels float4_sels_ni +#define float4_not float4_not_ni +#define float4_abs float4_abs_ni +#define float4_clamp float4_clamp_ni +#define float4_lerp float4_lerp_ni +#define float4_rsqrt float4_rsqrt_ni +#define float4_rsqrt_nr float4_rsqrt_nr_ni +#define float4_rsqrt_carmack float4_rsqrt_carmack_ni +#define float4_sqrt_nr float4_sqrt_nr_ni +#define float4_log2 float4_log2_ni +#define float4_exp2 float4_exp2_ni +#define float4_pow float4_pow_ni +#define float4_cross3 float4_cross3_ni +#define float4_normalize3 float4_normalize3_ni +#define float4_dot3 float4_dot3_ni +#define float4_dot float4_dot_ni +#define float4_ceil float4_ceil_ni +#define float4_floor float4_floor_ni +#include "float4_ni.h" + +#endif // __BX_FLOAT4_REF_H__ diff --git a/include/bx/float4_sse.h b/include/bx/float4_sse.h index 7936298..4e33781 100644 --- a/include/bx/float4_sse.h +++ b/include/bx/float4_sse.h @@ -1,401 +1,401 @@ -/* - * Copyright 2010-2012 Branimir Karadzic. All rights reserved. - * License: http://www.opensource.org/licenses/BSD-2-Clause - */ - -#ifndef __BX_FLOAT4_SSE_H__ -#define __BX_FLOAT4_SSE_H__ - -#include // __m128i -#if defined(__SSE4_1__) -# include -#endif // defined(__SSE4_1__) -#include // __m128 - -namespace bx -{ - - typedef __m128 float4_t; - -#define ELEMx 0 -#define ELEMy 1 -#define ELEMz 2 -#define ELEMw 3 -#define IMPLEMENT_SWIZZLE(_x, _y, _z, _w) \ - BX_FLOAT4_INLINE float4_t float4_swiz_##_x##_y##_z##_w(float4_t _a) \ - { \ - return _mm_shuffle_ps( _a, _a, _MM_SHUFFLE(ELEM##_w, ELEM##_z, ELEM##_y, ELEM##_x ) ); \ - } - -#include "float4_swizzle.inl" - -#undef IMPLEMENT_SWIZZLE -#undef ELEMw -#undef ELEMz -#undef ELEMy -#undef ELEMx - -#define IMPLEMENT_TEST(_xyzw, _mask) \ - BX_FLOAT4_INLINE bool float4_test_any_##_xyzw(float4_t _test) \ - { \ - return 0x0 != (_mm_movemask_ps(_test)&(_mask) ); \ - } \ - \ - BX_FLOAT4_INLINE bool float4_test_all_##_xyzw(float4_t _test) \ - { \ - return (_mask) == (_mm_movemask_ps(_test)&(_mask) ); \ - } - -IMPLEMENT_TEST(x , 0x1); -IMPLEMENT_TEST(y , 0x2); -IMPLEMENT_TEST(xy , 0x3); -IMPLEMENT_TEST(z , 0x4); -IMPLEMENT_TEST(xz , 0x5); -IMPLEMENT_TEST(yz , 0x6); -IMPLEMENT_TEST(xyz , 0x7); -IMPLEMENT_TEST(w , 0x8); -IMPLEMENT_TEST(xw , 0x9); -IMPLEMENT_TEST(yw , 0xa); -IMPLEMENT_TEST(xyw , 0xb); -IMPLEMENT_TEST(zw , 0xc); -IMPLEMENT_TEST(xzw , 0xd); -IMPLEMENT_TEST(yzw , 0xe); -IMPLEMENT_TEST(xyzw , 0xf); - -#undef IMPLEMENT_TEST - - BX_FLOAT4_INLINE float4_t float4_shuf_xyAB(float4_t _a, float4_t _b) - { - return _mm_movelh_ps(_a, _b); - } - - BX_FLOAT4_INLINE float4_t float4_shuf_ABxy(float4_t _a, float4_t _b) - { - return _mm_movelh_ps(_b, _a); - } - - BX_FLOAT4_INLINE float4_t float4_shuf_CDzw(float4_t _a, float4_t _b) - { - return _mm_movehl_ps(_a, _b); - } - - BX_FLOAT4_INLINE float4_t float4_shuf_zwCD(float4_t _a, float4_t _b) - { - return _mm_movehl_ps(_b, _a); - } - - BX_FLOAT4_INLINE float4_t float4_shuf_xAyB(float4_t _a, float4_t _b) - { - return _mm_unpacklo_ps(_a, _b); - } - - BX_FLOAT4_INLINE float4_t float4_shuf_yBxA(float4_t _a, float4_t _b) - { - return _mm_unpacklo_ps(_b, _a); - } - - BX_FLOAT4_INLINE float4_t float4_shuf_zCwD(float4_t _a, float4_t _b) - { - return _mm_unpackhi_ps(_a, _b); - } - - BX_FLOAT4_INLINE float4_t float4_shuf_CzDw(float4_t _a, float4_t _b) - { - return _mm_unpackhi_ps(_b, _a); - } - - BX_FLOAT4_INLINE float float4_x(float4_t _a) - { - return _mm_cvtss_f32(_a); - } - - BX_FLOAT4_INLINE float float4_y(float4_t _a) - { - const float4_t yyyy = float4_swiz_yyyy(_a); - const float result = _mm_cvtss_f32(yyyy); - - return result; - } - - BX_FLOAT4_INLINE float float4_z(float4_t _a) - { - const float4_t zzzz = float4_swiz_zzzz(_a); - const float result = _mm_cvtss_f32(zzzz); - - return result; - } - - BX_FLOAT4_INLINE float float4_w(float4_t _a) - { - const float4_t wwww = float4_swiz_wwww(_a); - const float result = _mm_cvtss_f32(wwww); - - return result; - } - - BX_FLOAT4_INLINE float4_t float4_ld(const void* _ptr) - { - return _mm_load_ps(reinterpret_cast(_ptr) ); - } - - BX_FLOAT4_INLINE void float4_st(void* _ptr, float4_t _a) - { - _mm_store_ps(reinterpret_cast(_ptr), _a); - } - - BX_FLOAT4_INLINE void float4_stream(void* _ptr, float4_t _a) - { - _mm_stream_ps(reinterpret_cast(_ptr), _a); - } - - BX_FLOAT4_INLINE float4_t float4_ld(float _x, float _y, float _z, float _w) - { - return _mm_set_ps(_w, _z, _y, _x); - } - - BX_FLOAT4_INLINE float4_t float4_ild(uint32_t _x, uint32_t _y, uint32_t _z, uint32_t _w) - { - const __m128i set = _mm_set_epi32(_w, _z, _y, _x); - const float4_t result = _mm_castsi128_ps(set); - - return result; - } - - BX_FLOAT4_INLINE float4_t float4_splat(const void* _ptr) - { - const float4_t x___ = _mm_load_ss(reinterpret_cast(_ptr) ); - const float4_t result = float4_swiz_xxxx(x___); - - return result; - } - - BX_FLOAT4_INLINE float4_t float4_splat(float _a) - { - return _mm_set1_ps(_a); - } - - BX_FLOAT4_INLINE float4_t float4_isplat(uint32_t _a) - { - const __m128i splat = _mm_set1_epi32(_a); - const float4_t result = _mm_castsi128_ps(splat); - - return result; - } - - BX_FLOAT4_INLINE float4_t float4_zero() - { - return _mm_setzero_ps(); - } - - BX_FLOAT4_INLINE float4_t float4_itof(float4_t _a) - { - const __m128i itof = _mm_castps_si128(_a); - const float4_t result = _mm_cvtepi32_ps(itof); - - return result; - } - - BX_FLOAT4_INLINE float4_t float4_ftoi(float4_t _a) - { - const __m128i ftoi = _mm_cvtps_epi32(_a); - const float4_t result = _mm_castsi128_ps(ftoi); - - return result; - } - - BX_FLOAT4_INLINE float4_t float4_round(float4_t _a) - { -#if defined(__SSE4_1__) - return _mm_round_ps(_a, _MM_FROUND_NINT); -#else - const __m128i round = _mm_cvtps_epi32(_a); - const float4_t result = _mm_cvtepi32_ps(round); - - return result; -#endif // defined(__SSE4_1__) - } - - BX_FLOAT4_INLINE float4_t float4_add(float4_t _a, float4_t _b) - { - return _mm_add_ps(_a, _b); - } - - BX_FLOAT4_INLINE float4_t float4_sub(float4_t _a, float4_t _b) - { - return _mm_sub_ps(_a, _b); - } - - BX_FLOAT4_INLINE float4_t float4_mul(float4_t _a, float4_t _b) - { - return _mm_mul_ps(_a, _b); - } - - BX_FLOAT4_INLINE float4_t float4_div(float4_t _a, float4_t _b) - { - return _mm_div_ps(_a, _b); - } - - BX_FLOAT4_INLINE float4_t float4_rcp_est(float4_t _a) - { - return _mm_rcp_ps(_a); - } - - BX_FLOAT4_INLINE float4_t float4_sqrt(float4_t _a) - { - return _mm_sqrt_ps(_a); - } - - BX_FLOAT4_INLINE float4_t float4_rsqrt_est(float4_t _a) - { - return _mm_rsqrt_ps(_a); - } - -#if defined(__SSE4_1__) - BX_FLOAT4_INLINE float4_t float4_dot3(float4_t _a, float4_t _b) - { - return _mm_dp_ps(_a, _b, 0x77); - } - - BX_FLOAT4_INLINE float4_t float4_dot(float4_t _a, float4_t _b) - { - return _mm_dp_ps(_a, _b, 0xFF); - } -#endif // defined(__SSE4__) - - BX_FLOAT4_INLINE float4_t float4_cmpeq(float4_t _a, float4_t _b) - { - return _mm_cmpeq_ps(_a, _b); - } - - BX_FLOAT4_INLINE float4_t float4_cmplt(float4_t _a, float4_t _b) - { - return _mm_cmplt_ps(_a, _b); - } - - BX_FLOAT4_INLINE float4_t float4_cmple(float4_t _a, float4_t _b) - { - return _mm_cmple_ps(_a, _b); - } - - BX_FLOAT4_INLINE float4_t float4_cmpgt(float4_t _a, float4_t _b) - { - return _mm_cmpgt_ps(_a, _b); - } - - BX_FLOAT4_INLINE float4_t float4_cmpge(float4_t _a, float4_t _b) - { - return _mm_cmpge_ps(_a, _b); - } - - BX_FLOAT4_INLINE float4_t float4_min(float4_t _a, float4_t _b) - { - return _mm_min_ps(_a, _b); - } - - BX_FLOAT4_INLINE float4_t float4_max(float4_t _a, float4_t _b) - { - return _mm_max_ps(_a, _b); - } - - BX_FLOAT4_INLINE float4_t float4_and(float4_t _a, float4_t _b) - { - return _mm_and_ps(_a, _b); - } - - BX_FLOAT4_INLINE float4_t float4_andc(float4_t _a, float4_t _b) - { - return _mm_andnot_ps(_b, _a); - } - - BX_FLOAT4_INLINE float4_t float4_or(float4_t _a, float4_t _b) - { - return _mm_or_ps(_a, _b); - } - - BX_FLOAT4_INLINE float4_t float4_xor(float4_t _a, float4_t _b) - { - return _mm_xor_ps(_a, _b); - } - - BX_FLOAT4_INLINE float4_t float4_sll(float4_t _a, int _count) - { - const __m128i a = _mm_castps_si128(_a); - const __m128i shift = _mm_slli_epi32(a, _count); - const float4_t result = _mm_castsi128_ps(shift); - - return result; - } - - BX_FLOAT4_INLINE float4_t float4_srl(float4_t _a, int _count) - { - const __m128i a = _mm_castps_si128(_a); - const __m128i shift = _mm_srli_epi32(a, _count); - const float4_t result = _mm_castsi128_ps(shift); - - return result; - } - - BX_FLOAT4_INLINE float4_t float4_sra(float4_t _a, int _count) - { - const __m128i a = _mm_castps_si128(_a); - const __m128i shift = _mm_srai_epi32(a, _count); - const float4_t result = _mm_castsi128_ps(shift); - - return result; - } - - BX_FLOAT4_INLINE float4_t float4_iadd(float4_t _a, float4_t _b) - { - const __m128i a = _mm_castps_si128(_a); - const __m128i b = _mm_castps_si128(_b); - const __m128i add = _mm_add_epi32(a, b); - const float4_t result = _mm_castsi128_ps(add); - - return result; - } - - BX_FLOAT4_INLINE float4_t float4_isub(float4_t _a, float4_t _b) - { - const __m128i a = _mm_castps_si128(_a); - const __m128i b = _mm_castps_si128(_b); - const __m128i sub = _mm_sub_epi32(a, b); - const float4_t result = _mm_castsi128_ps(sub); - - return result; - } - -} // namespace bx - -#define float4_shuf_xAzC float4_shuf_xAzC_ni -#define float4_shuf_yBwD float4_shuf_yBwD_ni -#define float4_rcp float4_rcp_ni -#define float4_orx float4_orx_ni -#define float4_orc float4_orc_ni -#define float4_neg float4_neg_ni -#define float4_madd float4_madd_ni -#define float4_nmsub float4_nmsub_ni -#define float4_div_nr float4_div_nr_ni -#define float4_selb float4_selb_ni -#define float4_sels float4_sels_ni -#define float4_not float4_not_ni -#define float4_abs float4_abs_ni -#define float4_clamp float4_clamp_ni -#define float4_lerp float4_lerp_ni -#define float4_rsqrt float4_rsqrt_ni -#define float4_rsqrt_nr float4_rsqrt_nr_ni -#define float4_rsqrt_carmack float4_rsqrt_carmack_ni -#define float4_sqrt_nr float4_sqrt_nr_ni -#define float4_log2 float4_log2_ni -#define float4_exp2 float4_exp2_ni -#define float4_pow float4_pow_ni -#define float4_cross3 float4_cross3_ni -#define float4_normalize3 float4_normalize3_ni -#if !defined(__SSE4_1__) -#define float4_dot3 float4_dot3_ni -#define float4_dot float4_dot_ni -#endif // defined(__SSE4_1__) -#define float4_ceil float4_ceil_ni -#define float4_floor float4_floor_ni -#include "float4_ni.h" - -#endif // __FLOAT4_SSE_H__ +/* + * Copyright 2010-2012 Branimir Karadzic. All rights reserved. + * License: http://www.opensource.org/licenses/BSD-2-Clause + */ + +#ifndef __BX_FLOAT4_SSE_H__ +#define __BX_FLOAT4_SSE_H__ + +#include // __m128i +#if defined(__SSE4_1__) +# include +#endif // defined(__SSE4_1__) +#include // __m128 + +namespace bx +{ + + typedef __m128 float4_t; + +#define ELEMx 0 +#define ELEMy 1 +#define ELEMz 2 +#define ELEMw 3 +#define IMPLEMENT_SWIZZLE(_x, _y, _z, _w) \ + BX_FLOAT4_INLINE float4_t float4_swiz_##_x##_y##_z##_w(float4_t _a) \ + { \ + return _mm_shuffle_ps( _a, _a, _MM_SHUFFLE(ELEM##_w, ELEM##_z, ELEM##_y, ELEM##_x ) ); \ + } + +#include "float4_swizzle.inl" + +#undef IMPLEMENT_SWIZZLE +#undef ELEMw +#undef ELEMz +#undef ELEMy +#undef ELEMx + +#define IMPLEMENT_TEST(_xyzw, _mask) \ + BX_FLOAT4_INLINE bool float4_test_any_##_xyzw(float4_t _test) \ + { \ + return 0x0 != (_mm_movemask_ps(_test)&(_mask) ); \ + } \ + \ + BX_FLOAT4_INLINE bool float4_test_all_##_xyzw(float4_t _test) \ + { \ + return (_mask) == (_mm_movemask_ps(_test)&(_mask) ); \ + } + +IMPLEMENT_TEST(x , 0x1); +IMPLEMENT_TEST(y , 0x2); +IMPLEMENT_TEST(xy , 0x3); +IMPLEMENT_TEST(z , 0x4); +IMPLEMENT_TEST(xz , 0x5); +IMPLEMENT_TEST(yz , 0x6); +IMPLEMENT_TEST(xyz , 0x7); +IMPLEMENT_TEST(w , 0x8); +IMPLEMENT_TEST(xw , 0x9); +IMPLEMENT_TEST(yw , 0xa); +IMPLEMENT_TEST(xyw , 0xb); +IMPLEMENT_TEST(zw , 0xc); +IMPLEMENT_TEST(xzw , 0xd); +IMPLEMENT_TEST(yzw , 0xe); +IMPLEMENT_TEST(xyzw , 0xf); + +#undef IMPLEMENT_TEST + + BX_FLOAT4_INLINE float4_t float4_shuf_xyAB(float4_t _a, float4_t _b) + { + return _mm_movelh_ps(_a, _b); + } + + BX_FLOAT4_INLINE float4_t float4_shuf_ABxy(float4_t _a, float4_t _b) + { + return _mm_movelh_ps(_b, _a); + } + + BX_FLOAT4_INLINE float4_t float4_shuf_CDzw(float4_t _a, float4_t _b) + { + return _mm_movehl_ps(_a, _b); + } + + BX_FLOAT4_INLINE float4_t float4_shuf_zwCD(float4_t _a, float4_t _b) + { + return _mm_movehl_ps(_b, _a); + } + + BX_FLOAT4_INLINE float4_t float4_shuf_xAyB(float4_t _a, float4_t _b) + { + return _mm_unpacklo_ps(_a, _b); + } + + BX_FLOAT4_INLINE float4_t float4_shuf_yBxA(float4_t _a, float4_t _b) + { + return _mm_unpacklo_ps(_b, _a); + } + + BX_FLOAT4_INLINE float4_t float4_shuf_zCwD(float4_t _a, float4_t _b) + { + return _mm_unpackhi_ps(_a, _b); + } + + BX_FLOAT4_INLINE float4_t float4_shuf_CzDw(float4_t _a, float4_t _b) + { + return _mm_unpackhi_ps(_b, _a); + } + + BX_FLOAT4_INLINE float float4_x(float4_t _a) + { + return _mm_cvtss_f32(_a); + } + + BX_FLOAT4_INLINE float float4_y(float4_t _a) + { + const float4_t yyyy = float4_swiz_yyyy(_a); + const float result = _mm_cvtss_f32(yyyy); + + return result; + } + + BX_FLOAT4_INLINE float float4_z(float4_t _a) + { + const float4_t zzzz = float4_swiz_zzzz(_a); + const float result = _mm_cvtss_f32(zzzz); + + return result; + } + + BX_FLOAT4_INLINE float float4_w(float4_t _a) + { + const float4_t wwww = float4_swiz_wwww(_a); + const float result = _mm_cvtss_f32(wwww); + + return result; + } + + BX_FLOAT4_INLINE float4_t float4_ld(const void* _ptr) + { + return _mm_load_ps(reinterpret_cast(_ptr) ); + } + + BX_FLOAT4_INLINE void float4_st(void* _ptr, float4_t _a) + { + _mm_store_ps(reinterpret_cast(_ptr), _a); + } + + BX_FLOAT4_INLINE void float4_stream(void* _ptr, float4_t _a) + { + _mm_stream_ps(reinterpret_cast(_ptr), _a); + } + + BX_FLOAT4_INLINE float4_t float4_ld(float _x, float _y, float _z, float _w) + { + return _mm_set_ps(_w, _z, _y, _x); + } + + BX_FLOAT4_INLINE float4_t float4_ild(uint32_t _x, uint32_t _y, uint32_t _z, uint32_t _w) + { + const __m128i set = _mm_set_epi32(_w, _z, _y, _x); + const float4_t result = _mm_castsi128_ps(set); + + return result; + } + + BX_FLOAT4_INLINE float4_t float4_splat(const void* _ptr) + { + const float4_t x___ = _mm_load_ss(reinterpret_cast(_ptr) ); + const float4_t result = float4_swiz_xxxx(x___); + + return result; + } + + BX_FLOAT4_INLINE float4_t float4_splat(float _a) + { + return _mm_set1_ps(_a); + } + + BX_FLOAT4_INLINE float4_t float4_isplat(uint32_t _a) + { + const __m128i splat = _mm_set1_epi32(_a); + const float4_t result = _mm_castsi128_ps(splat); + + return result; + } + + BX_FLOAT4_INLINE float4_t float4_zero() + { + return _mm_setzero_ps(); + } + + BX_FLOAT4_INLINE float4_t float4_itof(float4_t _a) + { + const __m128i itof = _mm_castps_si128(_a); + const float4_t result = _mm_cvtepi32_ps(itof); + + return result; + } + + BX_FLOAT4_INLINE float4_t float4_ftoi(float4_t _a) + { + const __m128i ftoi = _mm_cvtps_epi32(_a); + const float4_t result = _mm_castsi128_ps(ftoi); + + return result; + } + + BX_FLOAT4_INLINE float4_t float4_round(float4_t _a) + { +#if defined(__SSE4_1__) + return _mm_round_ps(_a, _MM_FROUND_NINT); +#else + const __m128i round = _mm_cvtps_epi32(_a); + const float4_t result = _mm_cvtepi32_ps(round); + + return result; +#endif // defined(__SSE4_1__) + } + + BX_FLOAT4_INLINE float4_t float4_add(float4_t _a, float4_t _b) + { + return _mm_add_ps(_a, _b); + } + + BX_FLOAT4_INLINE float4_t float4_sub(float4_t _a, float4_t _b) + { + return _mm_sub_ps(_a, _b); + } + + BX_FLOAT4_INLINE float4_t float4_mul(float4_t _a, float4_t _b) + { + return _mm_mul_ps(_a, _b); + } + + BX_FLOAT4_INLINE float4_t float4_div(float4_t _a, float4_t _b) + { + return _mm_div_ps(_a, _b); + } + + BX_FLOAT4_INLINE float4_t float4_rcp_est(float4_t _a) + { + return _mm_rcp_ps(_a); + } + + BX_FLOAT4_INLINE float4_t float4_sqrt(float4_t _a) + { + return _mm_sqrt_ps(_a); + } + + BX_FLOAT4_INLINE float4_t float4_rsqrt_est(float4_t _a) + { + return _mm_rsqrt_ps(_a); + } + +#if defined(__SSE4_1__) + BX_FLOAT4_INLINE float4_t float4_dot3(float4_t _a, float4_t _b) + { + return _mm_dp_ps(_a, _b, 0x77); + } + + BX_FLOAT4_INLINE float4_t float4_dot(float4_t _a, float4_t _b) + { + return _mm_dp_ps(_a, _b, 0xFF); + } +#endif // defined(__SSE4__) + + BX_FLOAT4_INLINE float4_t float4_cmpeq(float4_t _a, float4_t _b) + { + return _mm_cmpeq_ps(_a, _b); + } + + BX_FLOAT4_INLINE float4_t float4_cmplt(float4_t _a, float4_t _b) + { + return _mm_cmplt_ps(_a, _b); + } + + BX_FLOAT4_INLINE float4_t float4_cmple(float4_t _a, float4_t _b) + { + return _mm_cmple_ps(_a, _b); + } + + BX_FLOAT4_INLINE float4_t float4_cmpgt(float4_t _a, float4_t _b) + { + return _mm_cmpgt_ps(_a, _b); + } + + BX_FLOAT4_INLINE float4_t float4_cmpge(float4_t _a, float4_t _b) + { + return _mm_cmpge_ps(_a, _b); + } + + BX_FLOAT4_INLINE float4_t float4_min(float4_t _a, float4_t _b) + { + return _mm_min_ps(_a, _b); + } + + BX_FLOAT4_INLINE float4_t float4_max(float4_t _a, float4_t _b) + { + return _mm_max_ps(_a, _b); + } + + BX_FLOAT4_INLINE float4_t float4_and(float4_t _a, float4_t _b) + { + return _mm_and_ps(_a, _b); + } + + BX_FLOAT4_INLINE float4_t float4_andc(float4_t _a, float4_t _b) + { + return _mm_andnot_ps(_b, _a); + } + + BX_FLOAT4_INLINE float4_t float4_or(float4_t _a, float4_t _b) + { + return _mm_or_ps(_a, _b); + } + + BX_FLOAT4_INLINE float4_t float4_xor(float4_t _a, float4_t _b) + { + return _mm_xor_ps(_a, _b); + } + + BX_FLOAT4_INLINE float4_t float4_sll(float4_t _a, int _count) + { + const __m128i a = _mm_castps_si128(_a); + const __m128i shift = _mm_slli_epi32(a, _count); + const float4_t result = _mm_castsi128_ps(shift); + + return result; + } + + BX_FLOAT4_INLINE float4_t float4_srl(float4_t _a, int _count) + { + const __m128i a = _mm_castps_si128(_a); + const __m128i shift = _mm_srli_epi32(a, _count); + const float4_t result = _mm_castsi128_ps(shift); + + return result; + } + + BX_FLOAT4_INLINE float4_t float4_sra(float4_t _a, int _count) + { + const __m128i a = _mm_castps_si128(_a); + const __m128i shift = _mm_srai_epi32(a, _count); + const float4_t result = _mm_castsi128_ps(shift); + + return result; + } + + BX_FLOAT4_INLINE float4_t float4_iadd(float4_t _a, float4_t _b) + { + const __m128i a = _mm_castps_si128(_a); + const __m128i b = _mm_castps_si128(_b); + const __m128i add = _mm_add_epi32(a, b); + const float4_t result = _mm_castsi128_ps(add); + + return result; + } + + BX_FLOAT4_INLINE float4_t float4_isub(float4_t _a, float4_t _b) + { + const __m128i a = _mm_castps_si128(_a); + const __m128i b = _mm_castps_si128(_b); + const __m128i sub = _mm_sub_epi32(a, b); + const float4_t result = _mm_castsi128_ps(sub); + + return result; + } + +} // namespace bx + +#define float4_shuf_xAzC float4_shuf_xAzC_ni +#define float4_shuf_yBwD float4_shuf_yBwD_ni +#define float4_rcp float4_rcp_ni +#define float4_orx float4_orx_ni +#define float4_orc float4_orc_ni +#define float4_neg float4_neg_ni +#define float4_madd float4_madd_ni +#define float4_nmsub float4_nmsub_ni +#define float4_div_nr float4_div_nr_ni +#define float4_selb float4_selb_ni +#define float4_sels float4_sels_ni +#define float4_not float4_not_ni +#define float4_abs float4_abs_ni +#define float4_clamp float4_clamp_ni +#define float4_lerp float4_lerp_ni +#define float4_rsqrt float4_rsqrt_ni +#define float4_rsqrt_nr float4_rsqrt_nr_ni +#define float4_rsqrt_carmack float4_rsqrt_carmack_ni +#define float4_sqrt_nr float4_sqrt_nr_ni +#define float4_log2 float4_log2_ni +#define float4_exp2 float4_exp2_ni +#define float4_pow float4_pow_ni +#define float4_cross3 float4_cross3_ni +#define float4_normalize3 float4_normalize3_ni +#if !defined(__SSE4_1__) +#define float4_dot3 float4_dot3_ni +#define float4_dot float4_dot_ni +#endif // defined(__SSE4_1__) +#define float4_ceil float4_ceil_ni +#define float4_floor float4_floor_ni +#include "float4_ni.h" + +#endif // __FLOAT4_SSE_H__ diff --git a/include/bx/float4_t.h b/include/bx/float4_t.h index 83a0775..21d001f 100644 --- a/include/bx/float4_t.h +++ b/include/bx/float4_t.h @@ -1,21 +1,21 @@ -/* - * Copyright 2010-2012 Branimir Karadzic. All rights reserved. - * License: http://www.opensource.org/licenses/BSD-2-Clause - */ - -#ifndef __BX_FLOAT4_T_H__ -#define __BX_FLOAT4_T_H__ - -#include "bx.h" - -#define BX_FLOAT4_INLINE BX_FORCE_INLINE - -#if defined(__SSE2__) || (BX_COMPILER_MSVC && (BX_ARCH_64BIT || _M_IX86_FP >= 2) ) -# include "float4_sse.h" -#elif 0 // __ARM_NEON__ -# include "float4_neon.h" -#else -# include "float4_ref.h" -#endif // - -#endif // __BX_FLOAT4_T_H__ +/* + * Copyright 2010-2012 Branimir Karadzic. All rights reserved. + * License: http://www.opensource.org/licenses/BSD-2-Clause + */ + +#ifndef __BX_FLOAT4_T_H__ +#define __BX_FLOAT4_T_H__ + +#include "bx.h" + +#define BX_FLOAT4_INLINE BX_FORCE_INLINE + +#if defined(__SSE2__) || (BX_COMPILER_MSVC && (BX_ARCH_64BIT || _M_IX86_FP >= 2) ) +# include "float4_sse.h" +#elif 0 // __ARM_NEON__ +# include "float4_neon.h" +#else +# include "float4_ref.h" +#endif // + +#endif // __BX_FLOAT4_T_H__ diff --git a/include/bx/float4x4_t.h b/include/bx/float4x4_t.h index a552425..d99c3c1 100644 --- a/include/bx/float4x4_t.h +++ b/include/bx/float4x4_t.h @@ -1,168 +1,168 @@ -/* - * Copyright 2010-2012 Branimir Karadzic. All rights reserved. - * License: http://www.opensource.org/licenses/BSD-2-Clause - */ - -#ifndef __BX_FLOAT4X4_H__ -#define __BX_FLOAT4x4_H__ - -#include "float4_t.h" - -namespace bx -{ - typedef BX_ALIGN_STRUCT_16(struct) - { - float4_t col[4]; - - } float4x4_t; - - BX_FLOAT4_INLINE float4_t float4_mul_xyz1(float4_t _a, const float4x4_t& _b) - { - const float4_t xxxx = float4_swiz_xxxx(_a); - const float4_t yyyy = float4_swiz_yyyy(_a); - const float4_t zzzz = float4_swiz_zzzz(_a); - const float4_t col0 = float4_mul(_b.col[0], xxxx); - const float4_t col1 = float4_mul(_b.col[1], yyyy); - const float4_t col2 = float4_madd(_b.col[2], zzzz, col0); - const float4_t col3 = float4_add(_b.col[3], col1); - const float4_t result = float4_add(col2, col3); - - return result; - } - - BX_FLOAT4_INLINE float4_t float4_mul(float4_t _a, const float4x4_t& _b) - { - const float4_t xxxx = float4_swiz_xxxx(_a); - const float4_t yyyy = float4_swiz_yyyy(_a); - const float4_t zzzz = float4_swiz_zzzz(_a); - const float4_t wwww = float4_swiz_wwww(_a); - const float4_t col0 = float4_mul(_b.col[0], xxxx); - const float4_t col1 = float4_mul(_b.col[1], yyyy); - const float4_t col2 = float4_madd(_b.col[2], zzzz, col0); - const float4_t col3 = float4_madd(_b.col[3], wwww, col1); - const float4_t result = float4_add(col2, col3); - - return result; - } - - BX_FLOAT4_INLINE float4x4_t float4x4_mul(const float4x4_t& _a, const float4x4_t& _b) - { - float4x4_t result; - result.col[0] = float4_mul(_a.col[0], _b); - result.col[1] = float4_mul(_a.col[1], _b); - result.col[2] = float4_mul(_a.col[2], _b); - result.col[3] = float4_mul(_a.col[3], _b); - - return result; - } - - BX_FLOAT4_INLINE float4x4_t float4x4_transpose(const float4x4_t& _mtx) - { - const float4_t aibj = float4_shuf_xAyB(_mtx.col[0], _mtx.col[2]); // aibj - const float4_t emfn = float4_shuf_xAyB(_mtx.col[1], _mtx.col[3]); // emfn - const float4_t ckdl = float4_shuf_zCwD(_mtx.col[0], _mtx.col[2]); // ckdl - const float4_t gohp = float4_shuf_zCwD(_mtx.col[1], _mtx.col[3]); // gohp - float4x4_t result; - result.col[0] = float4_shuf_xAyB(aibj, emfn); // aeim - result.col[1] = float4_shuf_zCwD(aibj, emfn); // bfjn - result.col[2] = float4_shuf_xAyB(ckdl, gohp); // cgko - result.col[3] = float4_shuf_zCwD(ckdl, gohp); // dhlp - - return result; - } - - BX_FLOAT4_INLINE float4x4_t float4x4_inverse(const float4x4_t& _a) - { - const float4_t tmp0 = float4_shuf_xAzC(_a.col[0], _a.col[1]); - const float4_t tmp1 = float4_shuf_xAzC(_a.col[2], _a.col[3]); - const float4_t tmp2 = float4_shuf_yBwD(_a.col[0], _a.col[1]); - const float4_t tmp3 = float4_shuf_yBwD(_a.col[2], _a.col[3]); - const float4_t t0 = float4_shuf_xyAB(tmp0, tmp1); - const float4_t t1 = float4_shuf_xyAB(tmp3, tmp2); - const float4_t t2 = float4_shuf_zwCD(tmp0, tmp1); - const float4_t t3 = float4_shuf_zwCD(tmp3, tmp2); - - const float4_t t23 = float4_mul(t2, t3); - const float4_t t23_yxwz = float4_swiz_yxwz(t23); - const float4_t t23_wzyx = float4_swiz_wzyx(t23); - - float4_t cof0, cof1, cof2, cof3; - - const float4_t zero = float4_zero(); - cof0 = float4_nmsub(t1, t23_yxwz, zero); - cof0 = float4_madd(t1, t23_wzyx, cof0); - - cof1 = float4_nmsub(t0, t23_yxwz, zero); - cof1 = float4_madd(t0, t23_wzyx, cof1); - cof1 = float4_swiz_zwxy(cof1); - - const float4_t t12 = float4_mul(t1, t2); - const float4_t t12_yxwz = float4_swiz_yxwz(t12); - const float4_t t12_wzyx = float4_swiz_wzyx(t12); - - cof0 = float4_madd(t3, t12_yxwz, cof0); - cof0 = float4_nmsub(t3, t12_wzyx, cof0); - - cof3 = float4_mul(t0, t12_yxwz); - cof3 = float4_nmsub(t0, t12_wzyx, cof3); - cof3 = float4_swiz_zwxy(cof3); - - const float4_t t1_zwxy = float4_swiz_zwxy(t1); - const float4_t t2_zwxy = float4_swiz_zwxy(t2); - - const float4_t t13 = float4_mul(t1_zwxy, t3); - const float4_t t13_yxwz = float4_swiz_yxwz(t13); - const float4_t t13_wzyx = float4_swiz_wzyx(t13); - - cof0 = float4_madd(t2_zwxy, t13_yxwz, cof0); - cof0 = float4_nmsub(t2_zwxy, t13_wzyx, cof0); - - cof2 = float4_mul(t0, t13_yxwz); - cof2 = float4_nmsub(t0, t13_wzyx, cof2); - cof2 = float4_swiz_zwxy(cof2); - - const float4_t t01 = float4_mul(t0, t1); - const float4_t t01_yxwz = float4_swiz_yxwz(t01); - const float4_t t01_wzyx = float4_swiz_wzyx(t01); - - cof2 = float4_nmsub(t3, t01_yxwz, cof2); - cof2 = float4_madd(t3, t01_wzyx, cof2); - - cof3 = float4_madd(t2_zwxy, t01_yxwz, cof3); - cof3 = float4_nmsub(t2_zwxy, t01_wzyx, cof3); - - const float4_t t03 = float4_mul(t0, t3); - const float4_t t03_yxwz = float4_swiz_yxwz(t03); - const float4_t t03_wzyx = float4_swiz_wzyx(t03); - - cof1 = float4_nmsub(t2_zwxy, t03_yxwz, cof1); - cof1 = float4_madd(t2_zwxy, t03_wzyx, cof1); - - cof2 = float4_madd(t1, t03_yxwz, cof2); - cof2 = float4_nmsub(t1, t03_wzyx, cof2); - - const float4_t t02 = float4_mul(t0, t2_zwxy); - const float4_t t02_yxwz = float4_swiz_yxwz(t02); - const float4_t t02_wzyx = float4_swiz_wzyx(t02); - - cof1 = float4_madd(t3, t02_yxwz, cof1); - cof1 = float4_nmsub(t3, t02_wzyx, cof1); - - cof3 = float4_nmsub(t1, t02_yxwz, cof3); - cof3 = float4_madd(t1, t02_wzyx, cof3); - - const float4_t det = float4_dot(t0, cof0); - const float4_t invdet = float4_rcp(det); - - float4x4_t result; - result.col[0] = float4_mul(cof0, invdet); - result.col[1] = float4_mul(cof1, invdet); - result.col[2] = float4_mul(cof2, invdet); - result.col[3] = float4_mul(cof3, invdet); - - return result; - } - -} // namespace bx - -#endif // __BX_FLOAT4X4_H__ +/* + * Copyright 2010-2012 Branimir Karadzic. All rights reserved. + * License: http://www.opensource.org/licenses/BSD-2-Clause + */ + +#ifndef __BX_FLOAT4X4_H__ +#define __BX_FLOAT4x4_H__ + +#include "float4_t.h" + +namespace bx +{ + typedef BX_ALIGN_STRUCT_16(struct) + { + float4_t col[4]; + + } float4x4_t; + + BX_FLOAT4_INLINE float4_t float4_mul_xyz1(float4_t _a, const float4x4_t& _b) + { + const float4_t xxxx = float4_swiz_xxxx(_a); + const float4_t yyyy = float4_swiz_yyyy(_a); + const float4_t zzzz = float4_swiz_zzzz(_a); + const float4_t col0 = float4_mul(_b.col[0], xxxx); + const float4_t col1 = float4_mul(_b.col[1], yyyy); + const float4_t col2 = float4_madd(_b.col[2], zzzz, col0); + const float4_t col3 = float4_add(_b.col[3], col1); + const float4_t result = float4_add(col2, col3); + + return result; + } + + BX_FLOAT4_INLINE float4_t float4_mul(float4_t _a, const float4x4_t& _b) + { + const float4_t xxxx = float4_swiz_xxxx(_a); + const float4_t yyyy = float4_swiz_yyyy(_a); + const float4_t zzzz = float4_swiz_zzzz(_a); + const float4_t wwww = float4_swiz_wwww(_a); + const float4_t col0 = float4_mul(_b.col[0], xxxx); + const float4_t col1 = float4_mul(_b.col[1], yyyy); + const float4_t col2 = float4_madd(_b.col[2], zzzz, col0); + const float4_t col3 = float4_madd(_b.col[3], wwww, col1); + const float4_t result = float4_add(col2, col3); + + return result; + } + + BX_FLOAT4_INLINE float4x4_t float4x4_mul(const float4x4_t& _a, const float4x4_t& _b) + { + float4x4_t result; + result.col[0] = float4_mul(_a.col[0], _b); + result.col[1] = float4_mul(_a.col[1], _b); + result.col[2] = float4_mul(_a.col[2], _b); + result.col[3] = float4_mul(_a.col[3], _b); + + return result; + } + + BX_FLOAT4_INLINE float4x4_t float4x4_transpose(const float4x4_t& _mtx) + { + const float4_t aibj = float4_shuf_xAyB(_mtx.col[0], _mtx.col[2]); // aibj + const float4_t emfn = float4_shuf_xAyB(_mtx.col[1], _mtx.col[3]); // emfn + const float4_t ckdl = float4_shuf_zCwD(_mtx.col[0], _mtx.col[2]); // ckdl + const float4_t gohp = float4_shuf_zCwD(_mtx.col[1], _mtx.col[3]); // gohp + float4x4_t result; + result.col[0] = float4_shuf_xAyB(aibj, emfn); // aeim + result.col[1] = float4_shuf_zCwD(aibj, emfn); // bfjn + result.col[2] = float4_shuf_xAyB(ckdl, gohp); // cgko + result.col[3] = float4_shuf_zCwD(ckdl, gohp); // dhlp + + return result; + } + + BX_FLOAT4_INLINE float4x4_t float4x4_inverse(const float4x4_t& _a) + { + const float4_t tmp0 = float4_shuf_xAzC(_a.col[0], _a.col[1]); + const float4_t tmp1 = float4_shuf_xAzC(_a.col[2], _a.col[3]); + const float4_t tmp2 = float4_shuf_yBwD(_a.col[0], _a.col[1]); + const float4_t tmp3 = float4_shuf_yBwD(_a.col[2], _a.col[3]); + const float4_t t0 = float4_shuf_xyAB(tmp0, tmp1); + const float4_t t1 = float4_shuf_xyAB(tmp3, tmp2); + const float4_t t2 = float4_shuf_zwCD(tmp0, tmp1); + const float4_t t3 = float4_shuf_zwCD(tmp3, tmp2); + + const float4_t t23 = float4_mul(t2, t3); + const float4_t t23_yxwz = float4_swiz_yxwz(t23); + const float4_t t23_wzyx = float4_swiz_wzyx(t23); + + float4_t cof0, cof1, cof2, cof3; + + const float4_t zero = float4_zero(); + cof0 = float4_nmsub(t1, t23_yxwz, zero); + cof0 = float4_madd(t1, t23_wzyx, cof0); + + cof1 = float4_nmsub(t0, t23_yxwz, zero); + cof1 = float4_madd(t0, t23_wzyx, cof1); + cof1 = float4_swiz_zwxy(cof1); + + const float4_t t12 = float4_mul(t1, t2); + const float4_t t12_yxwz = float4_swiz_yxwz(t12); + const float4_t t12_wzyx = float4_swiz_wzyx(t12); + + cof0 = float4_madd(t3, t12_yxwz, cof0); + cof0 = float4_nmsub(t3, t12_wzyx, cof0); + + cof3 = float4_mul(t0, t12_yxwz); + cof3 = float4_nmsub(t0, t12_wzyx, cof3); + cof3 = float4_swiz_zwxy(cof3); + + const float4_t t1_zwxy = float4_swiz_zwxy(t1); + const float4_t t2_zwxy = float4_swiz_zwxy(t2); + + const float4_t t13 = float4_mul(t1_zwxy, t3); + const float4_t t13_yxwz = float4_swiz_yxwz(t13); + const float4_t t13_wzyx = float4_swiz_wzyx(t13); + + cof0 = float4_madd(t2_zwxy, t13_yxwz, cof0); + cof0 = float4_nmsub(t2_zwxy, t13_wzyx, cof0); + + cof2 = float4_mul(t0, t13_yxwz); + cof2 = float4_nmsub(t0, t13_wzyx, cof2); + cof2 = float4_swiz_zwxy(cof2); + + const float4_t t01 = float4_mul(t0, t1); + const float4_t t01_yxwz = float4_swiz_yxwz(t01); + const float4_t t01_wzyx = float4_swiz_wzyx(t01); + + cof2 = float4_nmsub(t3, t01_yxwz, cof2); + cof2 = float4_madd(t3, t01_wzyx, cof2); + + cof3 = float4_madd(t2_zwxy, t01_yxwz, cof3); + cof3 = float4_nmsub(t2_zwxy, t01_wzyx, cof3); + + const float4_t t03 = float4_mul(t0, t3); + const float4_t t03_yxwz = float4_swiz_yxwz(t03); + const float4_t t03_wzyx = float4_swiz_wzyx(t03); + + cof1 = float4_nmsub(t2_zwxy, t03_yxwz, cof1); + cof1 = float4_madd(t2_zwxy, t03_wzyx, cof1); + + cof2 = float4_madd(t1, t03_yxwz, cof2); + cof2 = float4_nmsub(t1, t03_wzyx, cof2); + + const float4_t t02 = float4_mul(t0, t2_zwxy); + const float4_t t02_yxwz = float4_swiz_yxwz(t02); + const float4_t t02_wzyx = float4_swiz_wzyx(t02); + + cof1 = float4_madd(t3, t02_yxwz, cof1); + cof1 = float4_nmsub(t3, t02_wzyx, cof1); + + cof3 = float4_nmsub(t1, t02_yxwz, cof3); + cof3 = float4_madd(t1, t02_wzyx, cof3); + + const float4_t det = float4_dot(t0, cof0); + const float4_t invdet = float4_rcp(det); + + float4x4_t result; + result.col[0] = float4_mul(cof0, invdet); + result.col[1] = float4_mul(cof1, invdet); + result.col[2] = float4_mul(cof2, invdet); + result.col[3] = float4_mul(cof3, invdet); + + return result; + } + +} // namespace bx + +#endif // __BX_FLOAT4X4_H__ diff --git a/include/bx/foreach.h b/include/bx/foreach.h index 76ba6fb..00aeb56 100644 --- a/include/bx/foreach.h +++ b/include/bx/foreach.h @@ -1,71 +1,71 @@ -/* - * Copyright 2010-2012 Branimir Karadzic. All rights reserved. - * License: http://www.opensource.org/licenses/BSD-2-Clause - */ - -#ifndef __BX_FOREACH_H__ -#define __BX_FOREACH_H__ - -#include "bx.h" - -namespace bx -{ - namespace foreach_ns - { - struct ContainerBase - { - }; - - template - class Container : public ContainerBase - { - public: - inline Container(const Ty& _container) - : m_container(_container) - , m_break(0) - , m_it( _container.begin() ) - , m_itEnd( _container.end() ) - { - } - - inline bool condition() const - { - return (!m_break++ && m_it != m_itEnd); - } - - const Ty& m_container; - mutable int m_break; - mutable typename Ty::const_iterator m_it; - mutable typename Ty::const_iterator m_itEnd; - }; - - template - inline Ty* pointer(const Ty&) - { - return 0; - } - - template - inline Container containerNew(const Ty& _container) - { - return Container(_container); - } - - template - inline const Container* container(const ContainerBase* _base, const Ty*) - { - return static_cast*>(_base); - } - } // namespace foreach_ns - -#define foreach(_variable, _container) \ - for (const bx::foreach_ns::ContainerBase &__temp_container__ = bx::foreach_ns::containerNew(_container); \ - bx::foreach_ns::container(&__temp_container__, true ? 0 : bx::foreach_ns::pointer(_container) )->condition(); \ - ++bx::foreach_ns::container(&__temp_container__, true ? 0 : bx::foreach_ns::pointer(_container) )->m_it) \ - for (_variable = *container(&__temp_container__, true ? 0 : bx::foreach_ns::pointer(_container) )->m_it; \ - bx::foreach_ns::container(&__temp_container__, true ? 0 : bx::foreach_ns::pointer(_container) )->m_break; \ - --bx::foreach_ns::container(&__temp_container__, true ? 0 : bx::foreach_ns::pointer(_container) )->m_break) - -} // namespace bx - -#endif // __BX_FOREACH_H__ +/* + * Copyright 2010-2012 Branimir Karadzic. All rights reserved. + * License: http://www.opensource.org/licenses/BSD-2-Clause + */ + +#ifndef __BX_FOREACH_H__ +#define __BX_FOREACH_H__ + +#include "bx.h" + +namespace bx +{ + namespace foreach_ns + { + struct ContainerBase + { + }; + + template + class Container : public ContainerBase + { + public: + inline Container(const Ty& _container) + : m_container(_container) + , m_break(0) + , m_it( _container.begin() ) + , m_itEnd( _container.end() ) + { + } + + inline bool condition() const + { + return (!m_break++ && m_it != m_itEnd); + } + + const Ty& m_container; + mutable int m_break; + mutable typename Ty::const_iterator m_it; + mutable typename Ty::const_iterator m_itEnd; + }; + + template + inline Ty* pointer(const Ty&) + { + return 0; + } + + template + inline Container containerNew(const Ty& _container) + { + return Container(_container); + } + + template + inline const Container* container(const ContainerBase* _base, const Ty*) + { + return static_cast*>(_base); + } + } // namespace foreach_ns + +#define foreach(_variable, _container) \ + for (const bx::foreach_ns::ContainerBase &__temp_container__ = bx::foreach_ns::containerNew(_container); \ + bx::foreach_ns::container(&__temp_container__, true ? 0 : bx::foreach_ns::pointer(_container) )->condition(); \ + ++bx::foreach_ns::container(&__temp_container__, true ? 0 : bx::foreach_ns::pointer(_container) )->m_it) \ + for (_variable = *container(&__temp_container__, true ? 0 : bx::foreach_ns::pointer(_container) )->m_it; \ + bx::foreach_ns::container(&__temp_container__, true ? 0 : bx::foreach_ns::pointer(_container) )->m_break; \ + --bx::foreach_ns::container(&__temp_container__, true ? 0 : bx::foreach_ns::pointer(_container) )->m_break) + +} // namespace bx + +#endif // __BX_FOREACH_H__ diff --git a/include/bx/handlealloc.h b/include/bx/handlealloc.h index fd66642..276eea0 100644 --- a/include/bx/handlealloc.h +++ b/include/bx/handlealloc.h @@ -1,88 +1,88 @@ -/* - * Copyright 2010-2012 Branimir Karadzic. All rights reserved. - * License: http://www.opensource.org/licenses/BSD-2-Clause - */ - -#ifndef __BX_HANDLE_ALLOC_H__ -#define __BX_HANDLE_ALLOC_H__ - -#include "bx.h" - -namespace bx -{ - class HandleAlloc - { - public: - static const uint16_t invalid = 0xffff; - - HandleAlloc(uint16_t _maxHandles) - : m_dense(new uint16_t[_maxHandles*2]) - , m_sparse(&m_dense[_maxHandles]) - , m_numHandles(0) - , m_maxHandles(_maxHandles) - { - for (uint16_t ii = 0; ii < _maxHandles; ++ii) - { - m_dense[ii] = ii; - } - } - - ~HandleAlloc() - { - delete [] m_dense; - } - - const uint16_t* getHandles() const - { - return m_dense; - } - - uint16_t getHandleAt(uint16_t _at) const - { - return m_dense[_at]; - } - - uint16_t getNumHandles() const - { - return m_numHandles; - } - - uint16_t getMaxHandles() const - { - return m_maxHandles; - } - - uint16_t alloc() - { - if (m_numHandles < m_maxHandles) - { - uint16_t index = m_numHandles; - ++m_numHandles; - - uint16_t handle = m_dense[index]; - m_sparse[handle] = index; - return handle; - } - - return invalid; - } - - void free(uint16_t _handle) - { - uint16_t index = m_sparse[_handle]; - --m_numHandles; - uint16_t temp = m_dense[m_numHandles]; - m_dense[m_numHandles] = _handle; - m_sparse[temp] = index; - m_dense[index] = temp; - } - - private: - uint16_t* m_dense; - uint16_t* m_sparse; - uint16_t m_numHandles; - uint16_t m_maxHandles; - }; -} // namespace bx - -#endif // __HANDLE_ALLOC_H__ +/* + * Copyright 2010-2012 Branimir Karadzic. All rights reserved. + * License: http://www.opensource.org/licenses/BSD-2-Clause + */ + +#ifndef __BX_HANDLE_ALLOC_H__ +#define __BX_HANDLE_ALLOC_H__ + +#include "bx.h" + +namespace bx +{ + class HandleAlloc + { + public: + static const uint16_t invalid = 0xffff; + + HandleAlloc(uint16_t _maxHandles) + : m_dense(new uint16_t[_maxHandles*2]) + , m_sparse(&m_dense[_maxHandles]) + , m_numHandles(0) + , m_maxHandles(_maxHandles) + { + for (uint16_t ii = 0; ii < _maxHandles; ++ii) + { + m_dense[ii] = ii; + } + } + + ~HandleAlloc() + { + delete [] m_dense; + } + + const uint16_t* getHandles() const + { + return m_dense; + } + + uint16_t getHandleAt(uint16_t _at) const + { + return m_dense[_at]; + } + + uint16_t getNumHandles() const + { + return m_numHandles; + } + + uint16_t getMaxHandles() const + { + return m_maxHandles; + } + + uint16_t alloc() + { + if (m_numHandles < m_maxHandles) + { + uint16_t index = m_numHandles; + ++m_numHandles; + + uint16_t handle = m_dense[index]; + m_sparse[handle] = index; + return handle; + } + + return invalid; + } + + void free(uint16_t _handle) + { + uint16_t index = m_sparse[_handle]; + --m_numHandles; + uint16_t temp = m_dense[m_numHandles]; + m_dense[m_numHandles] = _handle; + m_sparse[temp] = index; + m_dense[index] = temp; + } + + private: + uint16_t* m_dense; + uint16_t* m_sparse; + uint16_t m_numHandles; + uint16_t m_maxHandles; + }; +} // namespace bx + +#endif // __HANDLE_ALLOC_H__ diff --git a/include/bx/maputil.h b/include/bx/maputil.h index b92c9cc..b6e69ee 100644 --- a/include/bx/maputil.h +++ b/include/bx/maputil.h @@ -1,29 +1,29 @@ -/* - * Copyright 2010-2012 Branimir Karadzic. All rights reserved. - * License: http://www.opensource.org/licenses/BSD-2-Clause - */ - -#ifndef __BX_MAPUTIL_H__ -#define __BX_MAPUTIL_H__ - -#include "bx.h" - -namespace bx -{ - template - typename MapType::iterator mapInsertOrUpdate(MapType& _map, const typename MapType::key_type& _key, const typename MapType::mapped_type& _value) - { - typename MapType::iterator it = _map.lower_bound(_key); - if (it != _map.end() - && !_map.key_comp()(_key, it->first) ) - { - it->second = _value; - return it; - } - - typename MapType::value_type pair(_key, _value); - return _map.insert(it, pair); - } -} // namespace bx - -#endif // __BX_MAPUTIL_H__ +/* + * Copyright 2010-2012 Branimir Karadzic. All rights reserved. + * License: http://www.opensource.org/licenses/BSD-2-Clause + */ + +#ifndef __BX_MAPUTIL_H__ +#define __BX_MAPUTIL_H__ + +#include "bx.h" + +namespace bx +{ + template + typename MapType::iterator mapInsertOrUpdate(MapType& _map, const typename MapType::key_type& _key, const typename MapType::mapped_type& _value) + { + typename MapType::iterator it = _map.lower_bound(_key); + if (it != _map.end() + && !_map.key_comp()(_key, it->first) ) + { + it->second = _value; + return it; + } + + typename MapType::value_type pair(_key, _value); + return _map.insert(it, pair); + } +} // namespace bx + +#endif // __BX_MAPUTIL_H__ diff --git a/include/bx/mutex.h b/include/bx/mutex.h index b20bb9a..73d55f7 100644 --- a/include/bx/mutex.h +++ b/include/bx/mutex.h @@ -1,171 +1,171 @@ -/* - * Copyright 2010-2012 Branimir Karadzic. All rights reserved. - * License: http://www.opensource.org/licenses/BSD-2-Clause - */ - -#ifndef __BX_MUTEX_H__ -#define __BX_MUTEX_H__ - -#include "bx.h" -#include "cpu.h" -#include "sem.h" - -#if BX_PLATFORM_NACL || BX_PLATFORM_LINUX || BX_PLATFORM_ANDROID || BX_PLATFORM_OSX -# include -#elif BX_PLATFORM_WINDOWS || BX_PLATFORM_XBOX360 -# include -#endif // BX_PLATFORM_ - -namespace bx -{ -#if BX_PLATFORM_WINDOWS || BX_PLATFORM_XBOX360 - typedef CRITICAL_SECTION pthread_mutex_t; - typedef unsigned pthread_mutexattr_t; - - inline int pthread_mutex_lock(pthread_mutex_t* _mutex) - { - EnterCriticalSection(_mutex); - return 0; - } - - inline int pthread_mutex_unlock(pthread_mutex_t* _mutex) - { - LeaveCriticalSection(_mutex); - return 0; - } - - inline int pthread_mutex_trylock(pthread_mutex_t* _mutex) - { - return TryEnterCriticalSection(_mutex) ? 0 : EBUSY; - } - - inline int pthread_mutex_init(pthread_mutex_t* _mutex, pthread_mutexattr_t* /*_attr*/) - { - InitializeCriticalSection(_mutex); - return 0; - } - - inline int pthread_mutex_destroy(pthread_mutex_t* _mutex) - { - DeleteCriticalSection(_mutex); - return 0; - } -#endif // BX_PLATFORM_ - - class Mutex - { - public: - Mutex() - { - pthread_mutex_init(&m_handle, NULL); - } - - ~Mutex() - { - pthread_mutex_destroy(&m_handle); - } - - void lock() - { - pthread_mutex_lock(&m_handle); - } - - void unlock() - { - pthread_mutex_unlock(&m_handle); - } - - private: - Mutex(const Mutex& _rhs); // no copy constructor - Mutex& operator=(const Mutex& _rhs); // no assignment operator - - pthread_mutex_t m_handle; - }; - - class MutexScope - { - public: - MutexScope(Mutex& _mutex) - : m_mutex(_mutex) - { - m_mutex.lock(); - } - - ~MutexScope() - { - m_mutex.unlock(); - } - - private: - MutexScope(); // no default constructor - MutexScope(const MutexScope& _rhs); // no copy constructor - MutexScope& operator=(const MutexScope& _rhs); // no assignment operator - - Mutex& m_mutex; - }; - -#if 1 - typedef Mutex LwMutex; -#else - class LwMutex - { - public: - LwMutex() - : m_count(0) - { - } - - ~LwMutex() - { - } - - void lock() - { - if (atomicIncr(&m_count) > 1) - { - m_sem.wait(); - } - } - - void unlock() - { - if (atomicDecr(&m_count) > 0) - { - m_sem.post(); - } - } - - private: - LwMutex(const LwMutex& _rhs); // no copy constructor - LwMutex& operator=(const LwMutex& _rhs); // no assignment operator - - Semaphore m_sem; - volatile int32_t m_count; - }; -#endif // 0 - - class LwMutexScope - { - public: - LwMutexScope(LwMutex& _mutex) - : m_mutex(_mutex) - { - m_mutex.lock(); - } - - ~LwMutexScope() - { - m_mutex.unlock(); - } - - private: - LwMutexScope(); // no default constructor - LwMutexScope(const LwMutexScope& _rhs); // no copy constructor - LwMutexScope& operator=(const LwMutexScope& _rhs); // no assignment operator - - LwMutex& m_mutex; - }; - -} // namespace bx - -#endif // __BX_MUTEX_H__ +/* + * Copyright 2010-2012 Branimir Karadzic. All rights reserved. + * License: http://www.opensource.org/licenses/BSD-2-Clause + */ + +#ifndef __BX_MUTEX_H__ +#define __BX_MUTEX_H__ + +#include "bx.h" +#include "cpu.h" +#include "sem.h" + +#if BX_PLATFORM_NACL || BX_PLATFORM_LINUX || BX_PLATFORM_ANDROID || BX_PLATFORM_OSX +# include +#elif BX_PLATFORM_WINDOWS || BX_PLATFORM_XBOX360 +# include +#endif // BX_PLATFORM_ + +namespace bx +{ +#if BX_PLATFORM_WINDOWS || BX_PLATFORM_XBOX360 + typedef CRITICAL_SECTION pthread_mutex_t; + typedef unsigned pthread_mutexattr_t; + + inline int pthread_mutex_lock(pthread_mutex_t* _mutex) + { + EnterCriticalSection(_mutex); + return 0; + } + + inline int pthread_mutex_unlock(pthread_mutex_t* _mutex) + { + LeaveCriticalSection(_mutex); + return 0; + } + + inline int pthread_mutex_trylock(pthread_mutex_t* _mutex) + { + return TryEnterCriticalSection(_mutex) ? 0 : EBUSY; + } + + inline int pthread_mutex_init(pthread_mutex_t* _mutex, pthread_mutexattr_t* /*_attr*/) + { + InitializeCriticalSection(_mutex); + return 0; + } + + inline int pthread_mutex_destroy(pthread_mutex_t* _mutex) + { + DeleteCriticalSection(_mutex); + return 0; + } +#endif // BX_PLATFORM_ + + class Mutex + { + public: + Mutex() + { + pthread_mutex_init(&m_handle, NULL); + } + + ~Mutex() + { + pthread_mutex_destroy(&m_handle); + } + + void lock() + { + pthread_mutex_lock(&m_handle); + } + + void unlock() + { + pthread_mutex_unlock(&m_handle); + } + + private: + Mutex(const Mutex& _rhs); // no copy constructor + Mutex& operator=(const Mutex& _rhs); // no assignment operator + + pthread_mutex_t m_handle; + }; + + class MutexScope + { + public: + MutexScope(Mutex& _mutex) + : m_mutex(_mutex) + { + m_mutex.lock(); + } + + ~MutexScope() + { + m_mutex.unlock(); + } + + private: + MutexScope(); // no default constructor + MutexScope(const MutexScope& _rhs); // no copy constructor + MutexScope& operator=(const MutexScope& _rhs); // no assignment operator + + Mutex& m_mutex; + }; + +#if 1 + typedef Mutex LwMutex; +#else + class LwMutex + { + public: + LwMutex() + : m_count(0) + { + } + + ~LwMutex() + { + } + + void lock() + { + if (atomicIncr(&m_count) > 1) + { + m_sem.wait(); + } + } + + void unlock() + { + if (atomicDecr(&m_count) > 0) + { + m_sem.post(); + } + } + + private: + LwMutex(const LwMutex& _rhs); // no copy constructor + LwMutex& operator=(const LwMutex& _rhs); // no assignment operator + + Semaphore m_sem; + volatile int32_t m_count; + }; +#endif // 0 + + class LwMutexScope + { + public: + LwMutexScope(LwMutex& _mutex) + : m_mutex(_mutex) + { + m_mutex.lock(); + } + + ~LwMutexScope() + { + m_mutex.unlock(); + } + + private: + LwMutexScope(); // no default constructor + LwMutexScope(const LwMutexScope& _rhs); // no copy constructor + LwMutexScope& operator=(const LwMutexScope& _rhs); // no assignment operator + + LwMutex& m_mutex; + }; + +} // namespace bx + +#endif // __BX_MUTEX_H__ diff --git a/include/bx/os.h b/include/bx/os.h index 9d073b8..447ee94 100644 --- a/include/bx/os.h +++ b/include/bx/os.h @@ -1,46 +1,46 @@ -/* - * Copyright 2010-2012 Branimir Karadzic. All rights reserved. - * License: http://www.opensource.org/licenses/BSD-2-Clause - */ - -#ifndef __BX_OS_H__ -#define __BX_OS_H__ - -#include "bx.h" - -#if BX_PLATFORM_NACL || BX_PLATFORM_ANDROID || BX_PLATFORM_LINUX || BX_PLATFORM_OSX -# include // sched_yield -# if BX_PLATFORM_NACL -# include // nanosleep -# else -# include // nanosleep -# endif // BX_PLATFORM_NACL -#endif // BX_PLATFORM_ - -namespace bx -{ - inline void sleep(uint32_t _ms) - { -#if BX_PLATFORM_WINDOWS || BX_PLATFORM_XBOX360 - Sleep(_ms); -#else - timespec req = {(time_t)_ms/1000, (long)((_ms%1000)*1000000)}; - timespec rem = {0, 0}; - nanosleep(&req, &rem); -#endif // BX_PLATFORM_ - } - - inline void yield() - { -#if BX_PLATFORM_WINDOWS - SwitchToThread(); -#elif BX_PLATFORM_XBOX360 - Sleep(0); -#else - sched_yield(); -#endif // BX_PLATFORM_ - } - -} // namespace bx - -#endif // __BX_OS_H__ +/* + * Copyright 2010-2012 Branimir Karadzic. All rights reserved. + * License: http://www.opensource.org/licenses/BSD-2-Clause + */ + +#ifndef __BX_OS_H__ +#define __BX_OS_H__ + +#include "bx.h" + +#if BX_PLATFORM_NACL || BX_PLATFORM_ANDROID || BX_PLATFORM_LINUX || BX_PLATFORM_OSX +# include // sched_yield +# if BX_PLATFORM_NACL +# include // nanosleep +# else +# include // nanosleep +# endif // BX_PLATFORM_NACL +#endif // BX_PLATFORM_ + +namespace bx +{ + inline void sleep(uint32_t _ms) + { +#if BX_PLATFORM_WINDOWS || BX_PLATFORM_XBOX360 + Sleep(_ms); +#else + timespec req = {(time_t)_ms/1000, (long)((_ms%1000)*1000000)}; + timespec rem = {0, 0}; + nanosleep(&req, &rem); +#endif // BX_PLATFORM_ + } + + inline void yield() + { +#if BX_PLATFORM_WINDOWS + SwitchToThread(); +#elif BX_PLATFORM_XBOX360 + Sleep(0); +#else + sched_yield(); +#endif // BX_PLATFORM_ + } + +} // namespace bx + +#endif // __BX_OS_H__ diff --git a/include/bx/platform.h b/include/bx/platform.h index b0b0798..50f1834 100644 --- a/include/bx/platform.h +++ b/include/bx/platform.h @@ -119,17 +119,17 @@ #if BX_CONFIG_ENABLE_MSVC_LEVEL4_WARNINGS && BX_COMPILER_MSVC # pragma warning(error:4062) // ENABLE warning C4062: enumerator'...' in switch of enum '...' is not handled -# pragma warning(error:4121) // ENABLE warning C4121: 'symbol' : alignment of a member was sensitive to packing -# pragma warning(error:4130) // ENABLE warning C4130: 'operator' : logical operation on address of string constant -# pragma warning(error:4239) // ENABLE warning C4239: nonstandard extension used : 'argument' : conversion from '*' to '* &' A non-const reference may only be bound to an lvalue -//# pragma warning(error:4244) // ENABLE warning C4244: 'conversion' conversion from 'type1' to 'type2', possible loss of data -# pragma warning(error:4263) // ENABLE warning C4263: 'function' : member function does not override any base class virtual member function -# pragma warning(error:4265) // ENABLE warning C4265: class has virtual functions, but destructor is not virtual -# pragma warning(error:4431) // ENABLE warning C4431: missing type specifier - int assumed. Note: C no longer supports default-int -# pragma warning(error:4545) // ENABLE warning C4545: expression before comma evaluates to a function which is missing an argument list -# pragma warning(error:4549) // ENABLE warning C4549: 'operator' : operator before comma has no effect; did you intend 'operator'? -# pragma warning(error:4701) // ENABLE warning C4701: potentially uninitialized local variable 'name' used -# pragma warning(error:4706) // ENABLE warning C4706: assignment within conditional expression +# pragma warning(error:4121) // ENABLE warning C4121: 'symbol' : alignment of a member was sensitive to packing +# pragma warning(error:4130) // ENABLE warning C4130: 'operator' : logical operation on address of string constant +# pragma warning(error:4239) // ENABLE warning C4239: nonstandard extension used : 'argument' : conversion from '*' to '* &' A non-const reference may only be bound to an lvalue +//# pragma warning(error:4244) // ENABLE warning C4244: 'conversion' conversion from 'type1' to 'type2', possible loss of data +# pragma warning(error:4263) // ENABLE warning C4263: 'function' : member function does not override any base class virtual member function +# pragma warning(error:4265) // ENABLE warning C4265: class has virtual functions, but destructor is not virtual +# pragma warning(error:4431) // ENABLE warning C4431: missing type specifier - int assumed. Note: C no longer supports default-int +# pragma warning(error:4545) // ENABLE warning C4545: expression before comma evaluates to a function which is missing an argument list +# pragma warning(error:4549) // ENABLE warning C4549: 'operator' : operator before comma has no effect; did you intend 'operator'? +# pragma warning(error:4701) // ENABLE warning C4701: potentially uninitialized local variable 'name' used +# pragma warning(error:4706) // ENABLE warning C4706: assignment within conditional expression #endif // BX_CONFIG_ENABLE_MSVC_LEVEL4_WARNINGS && BX_COMPILER_MSVC #endif // __BX_PLATFORM_H__ diff --git a/include/bx/radixsort.h b/include/bx/radixsort.h index f5ab322..d77e210 100644 --- a/include/bx/radixsort.h +++ b/include/bx/radixsort.h @@ -1,111 +1,111 @@ -/* - * Copyright 2010-2012 Branimir Karadzic. All rights reserved. - * License: http://www.opensource.org/licenses/BSD-2-Clause - */ - -#ifndef __BX_RADIXSORT_H__ -#define __BX_RADIXSORT_H__ - -#include "bx.h" - -namespace bx -{ -#define BX_RADIXSORT_BITS 11 -#define BX_RADIXSORT_HISTOGRAM_SIZE (1< - void radixSort32(uint32_t* _keys, uint32_t* _tempKeys, Ty* _values, Ty* _tempValues, uint32_t _size) - { - uint16_t histogram[BX_RADIXSORT_HISTOGRAM_SIZE]; - uint16_t shift = 0; - for (uint32_t pass = 0; pass < 3; ++pass) - { - memset(histogram, 0, sizeof(uint16_t)*BX_RADIXSORT_HISTOGRAM_SIZE); - for (uint32_t ii = 0; ii < _size; ++ii) - { - uint32_t key = _keys[ii]; - uint16_t index = (key>>shift)&BX_RADIXSORT_BIT_MASK; - ++histogram[index]; - } - - uint16_t offset = 0; - for (uint32_t ii = 0; ii < BX_RADIXSORT_HISTOGRAM_SIZE; ++ii) - { - uint16_t count = histogram[ii]; - histogram[ii] = offset; - offset += count; - } - - for (uint32_t ii = 0; ii < _size; ++ii) - { - uint32_t key = _keys[ii]; - uint16_t index = (key>>shift)&BX_RADIXSORT_BIT_MASK; - uint16_t dest = histogram[index]++; - _tempKeys[dest] = key; - _tempValues[dest] = _values[ii]; - } - - uint32_t* swapKeys = _tempKeys; - _tempKeys = _keys; - _keys = swapKeys; - - Ty* swapValues = _tempValues; - _tempValues = _values; - _values = swapValues; - - shift += BX_RADIXSORT_BITS; - } - } - - template - void radixSort64(uint64_t* _keys, uint64_t* _tempKeys, Ty* _values, Ty* _tempValues, uint32_t _size) - { - uint16_t histogram[BX_RADIXSORT_HISTOGRAM_SIZE]; - uint16_t shift = 0; - for (uint32_t pass = 0; pass < 6; ++pass) - { - memset(histogram, 0, sizeof(uint16_t)*BX_RADIXSORT_HISTOGRAM_SIZE); - for (uint32_t ii = 0; ii < _size; ++ii) - { - uint64_t key = _keys[ii]; - uint16_t index = (key>>shift)&BX_RADIXSORT_BIT_MASK; - ++histogram[index]; - } - - uint16_t offset = 0; - for (uint32_t ii = 0; ii < BX_RADIXSORT_HISTOGRAM_SIZE; ++ii) - { - uint16_t count = histogram[ii]; - histogram[ii] = offset; - offset += count; - } - - for (uint32_t ii = 0; ii < _size; ++ii) - { - uint64_t key = _keys[ii]; - uint16_t index = (key>>shift)&BX_RADIXSORT_BIT_MASK; - uint16_t dest = histogram[index]++; - _tempKeys[dest] = key; - _tempValues[dest] = _values[ii]; - } - - uint64_t* swapKeys = _tempKeys; - _tempKeys = _keys; - _keys = swapKeys; - - Ty* swapValues = _tempValues; - _tempValues = _values; - _values = swapValues; - - shift += BX_RADIXSORT_BITS; - } - } - -#undef BX_RADIXSORT_BITS -#undef BX_RADIXSORT_HISTOGRAM_SIZE -#undef BX_RADIXSORT_BIT_MASK - -} // namespace bx - -#endif // __BX_RADIXSORT_H__ +/* + * Copyright 2010-2012 Branimir Karadzic. All rights reserved. + * License: http://www.opensource.org/licenses/BSD-2-Clause + */ + +#ifndef __BX_RADIXSORT_H__ +#define __BX_RADIXSORT_H__ + +#include "bx.h" + +namespace bx +{ +#define BX_RADIXSORT_BITS 11 +#define BX_RADIXSORT_HISTOGRAM_SIZE (1< + void radixSort32(uint32_t* _keys, uint32_t* _tempKeys, Ty* _values, Ty* _tempValues, uint32_t _size) + { + uint16_t histogram[BX_RADIXSORT_HISTOGRAM_SIZE]; + uint16_t shift = 0; + for (uint32_t pass = 0; pass < 3; ++pass) + { + memset(histogram, 0, sizeof(uint16_t)*BX_RADIXSORT_HISTOGRAM_SIZE); + for (uint32_t ii = 0; ii < _size; ++ii) + { + uint32_t key = _keys[ii]; + uint16_t index = (key>>shift)&BX_RADIXSORT_BIT_MASK; + ++histogram[index]; + } + + uint16_t offset = 0; + for (uint32_t ii = 0; ii < BX_RADIXSORT_HISTOGRAM_SIZE; ++ii) + { + uint16_t count = histogram[ii]; + histogram[ii] = offset; + offset += count; + } + + for (uint32_t ii = 0; ii < _size; ++ii) + { + uint32_t key = _keys[ii]; + uint16_t index = (key>>shift)&BX_RADIXSORT_BIT_MASK; + uint16_t dest = histogram[index]++; + _tempKeys[dest] = key; + _tempValues[dest] = _values[ii]; + } + + uint32_t* swapKeys = _tempKeys; + _tempKeys = _keys; + _keys = swapKeys; + + Ty* swapValues = _tempValues; + _tempValues = _values; + _values = swapValues; + + shift += BX_RADIXSORT_BITS; + } + } + + template + void radixSort64(uint64_t* _keys, uint64_t* _tempKeys, Ty* _values, Ty* _tempValues, uint32_t _size) + { + uint16_t histogram[BX_RADIXSORT_HISTOGRAM_SIZE]; + uint16_t shift = 0; + for (uint32_t pass = 0; pass < 6; ++pass) + { + memset(histogram, 0, sizeof(uint16_t)*BX_RADIXSORT_HISTOGRAM_SIZE); + for (uint32_t ii = 0; ii < _size; ++ii) + { + uint64_t key = _keys[ii]; + uint16_t index = (key>>shift)&BX_RADIXSORT_BIT_MASK; + ++histogram[index]; + } + + uint16_t offset = 0; + for (uint32_t ii = 0; ii < BX_RADIXSORT_HISTOGRAM_SIZE; ++ii) + { + uint16_t count = histogram[ii]; + histogram[ii] = offset; + offset += count; + } + + for (uint32_t ii = 0; ii < _size; ++ii) + { + uint64_t key = _keys[ii]; + uint16_t index = (key>>shift)&BX_RADIXSORT_BIT_MASK; + uint16_t dest = histogram[index]++; + _tempKeys[dest] = key; + _tempValues[dest] = _values[ii]; + } + + uint64_t* swapKeys = _tempKeys; + _tempKeys = _keys; + _keys = swapKeys; + + Ty* swapValues = _tempValues; + _tempValues = _values; + _values = swapValues; + + shift += BX_RADIXSORT_BITS; + } + } + +#undef BX_RADIXSORT_BITS +#undef BX_RADIXSORT_HISTOGRAM_SIZE +#undef BX_RADIXSORT_BIT_MASK + +} // namespace bx + +#endif // __BX_RADIXSORT_H__ diff --git a/include/bx/readerwriter.h b/include/bx/readerwriter.h index c343d84..ff01213 100644 --- a/include/bx/readerwriter.h +++ b/include/bx/readerwriter.h @@ -1,270 +1,270 @@ -/* - * Copyright 2010-2012 Branimir Karadzic. All rights reserved. - * License: http://www.opensource.org/licenses/BSD-2-Clause - */ - -#ifndef __BX_READERWRITER_H__ -#define __BX_READERWRITER_H__ - -#include -#include - -#include "bx.h" -#include "uint32_t.h" - -#if BX_COMPILER_MSVC -# define fseeko64 _fseeki64 -# define ftello64 _ftelli64 -#elif BX_PLATFORM_OSX -# define fseeko64 fseeko -# define ftello64 ftello -#endif // BX_ - -namespace bx -{ - struct Whence - { - enum Enum - { - Begin, - Current, - End, - }; - }; - - struct BX_NO_VTABLE ReaderI - { - virtual ~ReaderI() = 0; - virtual int32_t read(void* _data, int32_t _size) = 0; - }; - - inline ReaderI::~ReaderI() - { - } - - struct BX_NO_VTABLE WriterI - { - virtual ~WriterI() = 0; - virtual int32_t write(const void* _data, int32_t _size) = 0; - }; - - inline WriterI::~WriterI() - { - } - - struct BX_NO_VTABLE SeekerI - { - virtual ~SeekerI() = 0; - virtual int64_t seek(int64_t _offset = 0, Whence::Enum _whence = Whence::Current) = 0; - }; - - inline SeekerI::~SeekerI() - { - } - - inline int32_t read(ReaderI* _reader, void* _data, int32_t _size) - { - return _reader->read(_data, _size); - } - - template - inline int32_t read(ReaderI* _reader, Ty& _value) - { - return _reader->read(&_value, sizeof(Ty) ); - } - - inline int32_t write(WriterI* _writer, const void* _data, int32_t _size) - { - return _writer->write(_data, _size); - } - - template - inline int32_t write(WriterI* _writer, const Ty& _value) - { - return _writer->write(&_value, sizeof(Ty) ); - } - - inline int64_t skip(SeekerI* _seeker, int64_t _offset) - { - return _seeker->seek(_offset, Whence::Current); - } - - inline int64_t getSize(SeekerI* _seeker) - { - int64_t offset = _seeker->seek(); - int64_t size = _seeker->seek(0, Whence::End); - _seeker->seek(offset, Whence::Begin); - return size; - } - - struct BX_NO_VTABLE ReaderSeekerI : public ReaderI, public SeekerI - { - }; - - struct BX_NO_VTABLE WriterSeekerI : public WriterI, public SeekerI - { - }; - - struct BX_NO_VTABLE FileReaderI : public ReaderSeekerI - { - virtual int32_t open(const char* _filePath) = 0; - virtual int32_t close() = 0; - }; - - struct BX_NO_VTABLE FileWriterI : public WriterSeekerI - { - virtual int32_t open(const char* _filePath, bool _append = false) = 0; - virtual int32_t close() = 0; - }; - - struct BX_NO_VTABLE MemoryBlockI - { - virtual void* more(uint32_t _size = 0) = 0; - virtual uint32_t getSize() = 0; - }; - - class StaticMemoryBlock : public MemoryBlockI - { - public: - StaticMemoryBlock(void* _data, uint32_t _size) - : m_data(_data) - , m_size(_size) - { - } - - virtual ~StaticMemoryBlock() - { - } - - virtual void* more(uint32_t /*_size*/ = 0) BX_OVERRIDE - { - return m_data; - } - - virtual uint32_t getSize() BX_OVERRIDE - { - return m_size; - } - - private: - void* m_data; - uint32_t m_size; - }; - - inline int64_t int64_min(int64_t _a, int64_t _b) - { - return _a < _b ? _a : _b; - } - - inline int64_t int64_max(int64_t _a, int64_t _b) - { - return _a > _b ? _a : _b; - } - - inline int64_t int64_clamp(int64_t _a, int64_t _min, int64_t _max) - { - const int64_t min = int64_min(_a, _max); - const int64_t result = int64_max(_min, min); - - return result; - } - - class SizerWriter : public WriterSeekerI - { - public: - SizerWriter() - : m_pos(0) - , m_top(0) - { - } - - virtual ~SizerWriter() - { - } - - virtual int64_t seek(int64_t _offset = 0, Whence::Enum _whence = Whence::Current) BX_OVERRIDE - { - switch (_whence) - { - case Whence::Begin: - m_pos = _offset; - break; - - case Whence::Current: - m_pos = int64_clamp(m_pos + _offset, 0, m_top); - break; - - case Whence::End: - m_pos = int64_clamp(m_top - _offset, 0, m_top); - break; - } - - return m_pos; - } - - virtual int32_t write(const void* /*_data*/, int32_t _size) BX_OVERRIDE - { - int32_t morecore = int32_t(m_pos - m_top) + _size; - - if (0 < morecore) - { - m_top += morecore; - } - - int64_t reminder = m_top-m_pos; - int32_t size = uint32_min(_size, int32_t(reminder > INT32_MAX ? INT32_MAX : reminder) ); - m_pos += size; - return size; - } - - private: - int64_t m_pos; - int64_t m_top; - }; - - class MemoryReader : public ReaderSeekerI - { - public: - MemoryReader(const void* _data, uint32_t _size) - : m_data( (const uint8_t*)_data) - , m_pos(0) - , m_top(_size) - { - } - - virtual ~MemoryReader() - { - } - - virtual int64_t seek(int64_t _offset, Whence::Enum _whence) BX_OVERRIDE - { - switch (_whence) - { - case Whence::Begin: - m_pos = _offset; - break; - - case Whence::Current: - m_pos = int64_clamp(m_pos + _offset, 0, m_top); - break; - - case Whence::End: - m_pos = int64_clamp(m_top - _offset, 0, m_top); - break; - } - - return m_pos; - } - - virtual int32_t read(void* _data, int32_t _size) BX_OVERRIDE - { - int64_t reminder = m_top-m_pos; - int32_t size = uint32_min(_size, int32_t(reminder > INT32_MAX ? INT32_MAX : reminder) ); - memcpy(_data, &m_data[m_pos], size); - m_pos += size; - return size; - } - +/* + * Copyright 2010-2012 Branimir Karadzic. All rights reserved. + * License: http://www.opensource.org/licenses/BSD-2-Clause + */ + +#ifndef __BX_READERWRITER_H__ +#define __BX_READERWRITER_H__ + +#include +#include + +#include "bx.h" +#include "uint32_t.h" + +#if BX_COMPILER_MSVC +# define fseeko64 _fseeki64 +# define ftello64 _ftelli64 +#elif BX_PLATFORM_OSX +# define fseeko64 fseeko +# define ftello64 ftello +#endif // BX_ + +namespace bx +{ + struct Whence + { + enum Enum + { + Begin, + Current, + End, + }; + }; + + struct BX_NO_VTABLE ReaderI + { + virtual ~ReaderI() = 0; + virtual int32_t read(void* _data, int32_t _size) = 0; + }; + + inline ReaderI::~ReaderI() + { + } + + struct BX_NO_VTABLE WriterI + { + virtual ~WriterI() = 0; + virtual int32_t write(const void* _data, int32_t _size) = 0; + }; + + inline WriterI::~WriterI() + { + } + + struct BX_NO_VTABLE SeekerI + { + virtual ~SeekerI() = 0; + virtual int64_t seek(int64_t _offset = 0, Whence::Enum _whence = Whence::Current) = 0; + }; + + inline SeekerI::~SeekerI() + { + } + + inline int32_t read(ReaderI* _reader, void* _data, int32_t _size) + { + return _reader->read(_data, _size); + } + + template + inline int32_t read(ReaderI* _reader, Ty& _value) + { + return _reader->read(&_value, sizeof(Ty) ); + } + + inline int32_t write(WriterI* _writer, const void* _data, int32_t _size) + { + return _writer->write(_data, _size); + } + + template + inline int32_t write(WriterI* _writer, const Ty& _value) + { + return _writer->write(&_value, sizeof(Ty) ); + } + + inline int64_t skip(SeekerI* _seeker, int64_t _offset) + { + return _seeker->seek(_offset, Whence::Current); + } + + inline int64_t getSize(SeekerI* _seeker) + { + int64_t offset = _seeker->seek(); + int64_t size = _seeker->seek(0, Whence::End); + _seeker->seek(offset, Whence::Begin); + return size; + } + + struct BX_NO_VTABLE ReaderSeekerI : public ReaderI, public SeekerI + { + }; + + struct BX_NO_VTABLE WriterSeekerI : public WriterI, public SeekerI + { + }; + + struct BX_NO_VTABLE FileReaderI : public ReaderSeekerI + { + virtual int32_t open(const char* _filePath) = 0; + virtual int32_t close() = 0; + }; + + struct BX_NO_VTABLE FileWriterI : public WriterSeekerI + { + virtual int32_t open(const char* _filePath, bool _append = false) = 0; + virtual int32_t close() = 0; + }; + + struct BX_NO_VTABLE MemoryBlockI + { + virtual void* more(uint32_t _size = 0) = 0; + virtual uint32_t getSize() = 0; + }; + + class StaticMemoryBlock : public MemoryBlockI + { + public: + StaticMemoryBlock(void* _data, uint32_t _size) + : m_data(_data) + , m_size(_size) + { + } + + virtual ~StaticMemoryBlock() + { + } + + virtual void* more(uint32_t /*_size*/ = 0) BX_OVERRIDE + { + return m_data; + } + + virtual uint32_t getSize() BX_OVERRIDE + { + return m_size; + } + + private: + void* m_data; + uint32_t m_size; + }; + + inline int64_t int64_min(int64_t _a, int64_t _b) + { + return _a < _b ? _a : _b; + } + + inline int64_t int64_max(int64_t _a, int64_t _b) + { + return _a > _b ? _a : _b; + } + + inline int64_t int64_clamp(int64_t _a, int64_t _min, int64_t _max) + { + const int64_t min = int64_min(_a, _max); + const int64_t result = int64_max(_min, min); + + return result; + } + + class SizerWriter : public WriterSeekerI + { + public: + SizerWriter() + : m_pos(0) + , m_top(0) + { + } + + virtual ~SizerWriter() + { + } + + virtual int64_t seek(int64_t _offset = 0, Whence::Enum _whence = Whence::Current) BX_OVERRIDE + { + switch (_whence) + { + case Whence::Begin: + m_pos = _offset; + break; + + case Whence::Current: + m_pos = int64_clamp(m_pos + _offset, 0, m_top); + break; + + case Whence::End: + m_pos = int64_clamp(m_top - _offset, 0, m_top); + break; + } + + return m_pos; + } + + virtual int32_t write(const void* /*_data*/, int32_t _size) BX_OVERRIDE + { + int32_t morecore = int32_t(m_pos - m_top) + _size; + + if (0 < morecore) + { + m_top += morecore; + } + + int64_t reminder = m_top-m_pos; + int32_t size = uint32_min(_size, int32_t(reminder > INT32_MAX ? INT32_MAX : reminder) ); + m_pos += size; + return size; + } + + private: + int64_t m_pos; + int64_t m_top; + }; + + class MemoryReader : public ReaderSeekerI + { + public: + MemoryReader(const void* _data, uint32_t _size) + : m_data( (const uint8_t*)_data) + , m_pos(0) + , m_top(_size) + { + } + + virtual ~MemoryReader() + { + } + + virtual int64_t seek(int64_t _offset, Whence::Enum _whence) BX_OVERRIDE + { + switch (_whence) + { + case Whence::Begin: + m_pos = _offset; + break; + + case Whence::Current: + m_pos = int64_clamp(m_pos + _offset, 0, m_top); + break; + + case Whence::End: + m_pos = int64_clamp(m_top - _offset, 0, m_top); + break; + } + + return m_pos; + } + + virtual int32_t read(void* _data, int32_t _size) BX_OVERRIDE + { + int64_t reminder = m_top-m_pos; + int32_t size = uint32_min(_size, int32_t(reminder > INT32_MAX ? INT32_MAX : reminder) ); + memcpy(_data, &m_data[m_pos], size); + m_pos += size; + return size; + } + const uint8_t* getDataPtr() const { return &m_data[m_pos]; @@ -280,180 +280,180 @@ namespace bx return m_top-m_pos; } - private: - const uint8_t* m_data; - int64_t m_pos; - int64_t m_top; - }; - - class MemoryWriter : public WriterSeekerI - { - public: - MemoryWriter(MemoryBlockI* _memBlock) - : m_memBlock(_memBlock) - , m_data(NULL) - , m_pos(0) - , m_top(0) - , m_size(0) - { - } - - virtual ~MemoryWriter() - { - } - - virtual int64_t seek(int64_t _offset = 0, Whence::Enum _whence = Whence::Current) BX_OVERRIDE - { - switch (_whence) - { - case Whence::Begin: - m_pos = _offset; - break; - - case Whence::Current: - m_pos = int64_clamp(m_pos + _offset, 0, m_top); - break; - - case Whence::End: - m_pos = int64_clamp(m_top - _offset, 0, m_top); - break; - } - - return m_pos; - } - - virtual int32_t write(const void* _data, int32_t _size) BX_OVERRIDE - { - int32_t morecore = int32_t(m_pos - m_size) + _size; - - if (0 < morecore) - { - morecore = BX_ALIGN_MASK(morecore, 0xfff); - m_data = (uint8_t*)m_memBlock->more(morecore); - m_size = m_memBlock->getSize(); - } - - int64_t reminder = m_size-m_pos; - int32_t size = uint32_min(_size, int32_t(reminder > INT32_MAX ? INT32_MAX : reminder) ); - memcpy(&m_data[m_pos], _data, size); - m_pos += size; - m_top = int64_max(m_top, m_pos); - return size; - } - - private: - MemoryBlockI* m_memBlock; - uint8_t* m_data; - int64_t m_pos; - int64_t m_top; - int64_t m_size; - }; - - class StaticMemoryBlockWriter : public MemoryWriter - { - public: - StaticMemoryBlockWriter(void* _data, uint32_t _size) - : MemoryWriter(&m_smb) - , m_smb(_data, _size) - { - } - - ~StaticMemoryBlockWriter() - { - } - - private: - StaticMemoryBlock m_smb; - }; - -#if BX_CONFIG_CRT_FILE_READER_WRITER - class CrtFileReader : public FileReaderI - { - public: - CrtFileReader() - : m_file(NULL) - { - } - - virtual ~CrtFileReader() - { - } - - virtual int32_t open(const char* _filePath) BX_OVERRIDE - { - m_file = fopen(_filePath, "rb"); - return NULL == m_file; - } - - virtual int32_t close() BX_OVERRIDE - { - fclose(m_file); - return 0; - } - - virtual int64_t seek(int64_t _offset = 0, Whence::Enum _whence = Whence::Current) BX_OVERRIDE - { - fseeko64(m_file, _offset, _whence); - return ftello64(m_file); - } - - virtual int32_t read(void* _data, int32_t _size) BX_OVERRIDE - { - return (int32_t)fread(_data, 1, _size, m_file); - } - - private: - FILE* m_file; - }; - - class CrtFileWriter : public FileWriterI - { - public: - CrtFileWriter() - : m_file(NULL) - { - } - - virtual ~CrtFileWriter() - { - } - - virtual int32_t open(const char* _filePath, bool _append = false) BX_OVERRIDE - { - if (_append) - { - m_file = fopen(_filePath, "ab"); - } - else - { - m_file = fopen(_filePath, "wb"); - } - - return NULL == m_file; - } - - virtual int32_t close() BX_OVERRIDE - { - fclose(m_file); - return 0; - } - - virtual int64_t seek(int64_t _offset = 0, Whence::Enum _whence = Whence::Current) BX_OVERRIDE - { - fseeko64(m_file, _offset, _whence); - return ftello64(m_file); - } - - virtual int32_t write(const void* _data, int32_t _size) BX_OVERRIDE - { - return (int32_t)fwrite(_data, 1, _size, m_file); - } - - private: - FILE* m_file; - }; -#endif // BX_CONFIG_CRT_FILE_READER_WRITER - -} // namespace bx - -#endif // __BX_READERWRITER_H__ + private: + const uint8_t* m_data; + int64_t m_pos; + int64_t m_top; + }; + + class MemoryWriter : public WriterSeekerI + { + public: + MemoryWriter(MemoryBlockI* _memBlock) + : m_memBlock(_memBlock) + , m_data(NULL) + , m_pos(0) + , m_top(0) + , m_size(0) + { + } + + virtual ~MemoryWriter() + { + } + + virtual int64_t seek(int64_t _offset = 0, Whence::Enum _whence = Whence::Current) BX_OVERRIDE + { + switch (_whence) + { + case Whence::Begin: + m_pos = _offset; + break; + + case Whence::Current: + m_pos = int64_clamp(m_pos + _offset, 0, m_top); + break; + + case Whence::End: + m_pos = int64_clamp(m_top - _offset, 0, m_top); + break; + } + + return m_pos; + } + + virtual int32_t write(const void* _data, int32_t _size) BX_OVERRIDE + { + int32_t morecore = int32_t(m_pos - m_size) + _size; + + if (0 < morecore) + { + morecore = BX_ALIGN_MASK(morecore, 0xfff); + m_data = (uint8_t*)m_memBlock->more(morecore); + m_size = m_memBlock->getSize(); + } + + int64_t reminder = m_size-m_pos; + int32_t size = uint32_min(_size, int32_t(reminder > INT32_MAX ? INT32_MAX : reminder) ); + memcpy(&m_data[m_pos], _data, size); + m_pos += size; + m_top = int64_max(m_top, m_pos); + return size; + } + + private: + MemoryBlockI* m_memBlock; + uint8_t* m_data; + int64_t m_pos; + int64_t m_top; + int64_t m_size; + }; + + class StaticMemoryBlockWriter : public MemoryWriter + { + public: + StaticMemoryBlockWriter(void* _data, uint32_t _size) + : MemoryWriter(&m_smb) + , m_smb(_data, _size) + { + } + + ~StaticMemoryBlockWriter() + { + } + + private: + StaticMemoryBlock m_smb; + }; + +#if BX_CONFIG_CRT_FILE_READER_WRITER + class CrtFileReader : public FileReaderI + { + public: + CrtFileReader() + : m_file(NULL) + { + } + + virtual ~CrtFileReader() + { + } + + virtual int32_t open(const char* _filePath) BX_OVERRIDE + { + m_file = fopen(_filePath, "rb"); + return NULL == m_file; + } + + virtual int32_t close() BX_OVERRIDE + { + fclose(m_file); + return 0; + } + + virtual int64_t seek(int64_t _offset = 0, Whence::Enum _whence = Whence::Current) BX_OVERRIDE + { + fseeko64(m_file, _offset, _whence); + return ftello64(m_file); + } + + virtual int32_t read(void* _data, int32_t _size) BX_OVERRIDE + { + return (int32_t)fread(_data, 1, _size, m_file); + } + + private: + FILE* m_file; + }; + + class CrtFileWriter : public FileWriterI + { + public: + CrtFileWriter() + : m_file(NULL) + { + } + + virtual ~CrtFileWriter() + { + } + + virtual int32_t open(const char* _filePath, bool _append = false) BX_OVERRIDE + { + if (_append) + { + m_file = fopen(_filePath, "ab"); + } + else + { + m_file = fopen(_filePath, "wb"); + } + + return NULL == m_file; + } + + virtual int32_t close() BX_OVERRIDE + { + fclose(m_file); + return 0; + } + + virtual int64_t seek(int64_t _offset = 0, Whence::Enum _whence = Whence::Current) BX_OVERRIDE + { + fseeko64(m_file, _offset, _whence); + return ftello64(m_file); + } + + virtual int32_t write(const void* _data, int32_t _size) BX_OVERRIDE + { + return (int32_t)fwrite(_data, 1, _size, m_file); + } + + private: + FILE* m_file; + }; +#endif // BX_CONFIG_CRT_FILE_READER_WRITER + +} // namespace bx + +#endif // __BX_READERWRITER_H__ diff --git a/include/bx/uint32_t.h b/include/bx/uint32_t.h index 2bcab80..1b13a72 100644 --- a/include/bx/uint32_t.h +++ b/include/bx/uint32_t.h @@ -1,455 +1,455 @@ -/* - * Copyright 2010-2012 Branimir Karadzic. All rights reserved. - * License: http://www.opensource.org/licenses/BSD-2-Clause - */ - -// Copyright 2006 Mike Acton -// -// Permission is hereby granted, free of charge, to any person obtaining a -// copy of this software and associated documentation files (the "Software"), -// to deal in the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included -// in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE - -#ifndef __BX_UINT32_T_H__ -#define __BX_UINT32_T_H__ - -#include "bx.h" - -#if BX_COMPILER_MSVC -# if BX_PLATFORM_WINDOWS -# include // math.h is included because VS bitches: - // warning C4985: 'ceil': attributes not present on previous declaration. - // must be included before intrin.h. -# include -# pragma intrinsic(_BitScanForward) -# pragma intrinsic(_BitScanReverse) -# endif // BX_PLATFORM_WINDOWS -#endif // BX_COMPILER_MSVC - -namespace bx -{ - inline uint32_t uint32_li(uint32_t _a) - { - return _a; - } - - inline uint32_t uint32_dec(uint32_t _a) - { - return _a - 1; - } - - inline uint32_t uint32_inc(uint32_t _a) - { - return _a + 1; - } - - inline uint32_t uint32_not(uint32_t _a) - { - return ~_a; - } - - inline uint32_t uint32_neg(uint32_t _a) - { - return -(int32_t)_a; - } - - inline uint32_t uint32_ext(uint32_t _a) - { - return ( (int32_t)_a)>>31; - } - - inline uint32_t uint32_and(uint32_t _a, uint32_t _b) - { - return _a & _b; - } - - inline uint32_t uint32_xor(uint32_t _a, uint32_t _b) - { - return _a ^ _b; - } - - inline uint32_t uint32_xorl(uint32_t _a, uint32_t _b) - { - return !_a != !_b; - } - - inline uint32_t uint32_andc(uint32_t _a, uint32_t _b) - { - return _a & ~_b; - } - - inline uint32_t uint32_or(uint32_t _a, uint32_t _b) - { - return _a | _b; - } - - inline uint32_t uint32_sll(uint32_t _a, int _sa) - { - return _a << _sa; - } - - inline uint32_t uint32_srl(uint32_t _a, int _sa) - { - return _a >> _sa; - } - - inline uint32_t uint32_sra(uint32_t _a, int _sa) - { - return ( (int32_t)_a) >> _sa; - } - - inline uint32_t uint32_rol(uint32_t _a, int _sa) - { - return ( _a << _sa) | (_a >> (32-_sa) ); - } - - inline uint32_t uint32_ror(uint32_t _a, int _sa) - { - return ( _a >> _sa) | (_a << (32-_sa) ); - } - - inline uint32_t uint32_add(uint32_t _a, uint32_t _b) - { - return _a + _b; - } - - inline uint32_t uint32_sub(uint32_t _a, uint32_t _b) - { - return _a - _b; - } - - inline uint32_t uint32_mul(uint32_t _a, uint32_t _b) - { - return _a * _b; - } - - inline uint32_t uint32_div(uint32_t _a, uint32_t _b) - { - return (_a / _b); - } - - inline uint32_t uint32_mod(uint32_t _a, uint32_t _b) - { - return (_a % _b); - } - - inline uint32_t uint32_cmpeq(uint32_t _a, uint32_t _b) - { - return -(_a == _b); - } - - inline uint32_t uint32_cmpneq(uint32_t _a, uint32_t _b) - { - return -(_a != _b); - } - - inline uint32_t uint32_cmplt(uint32_t _a, uint32_t _b) - { - return -(_a < _b); - } - - inline uint32_t uint32_cmple(uint32_t _a, uint32_t _b) - { - return -(_a <= _b); - } - - inline uint32_t uint32_cmpgt(uint32_t _a, uint32_t _b) - { - return -(_a > _b); - } - - inline uint32_t uint32_cmpge(uint32_t _a, uint32_t _b) - { - return -(_a >= _b); - } - - inline uint32_t uint32_setnz(uint32_t _a) - { - return -!!_a; - } - - inline uint32_t uint32_satadd(uint32_t _a, uint32_t _b) - { - const uint32_t add = uint32_add(_a, _b); - const uint32_t lt = uint32_cmplt(add, _a); - const uint32_t result = uint32_or(add, lt); - - return result; - } - - inline uint32_t uint32_satsub(uint32_t _a, uint32_t _b) - { - const uint32_t sub = uint32_sub(_a, _b); - const uint32_t le = uint32_cmple(sub, _a); - const uint32_t result = uint32_and(sub, le); - - return result; - } - - inline uint32_t uint32_satmul(uint32_t _a, uint32_t _b) - { - const uint64_t mul = (uint64_t)_a * (uint64_t)_b; - const uint32_t hi = mul >> 32; - const uint32_t nz = uint32_setnz(hi); - const uint32_t result = uint32_or(uint32_t(mul), nz); - - return result; - } - - inline uint32_t uint32_sels(uint32_t test, uint32_t _a, uint32_t _b) - { - const uint32_t mask = uint32_ext(test); - const uint32_t sel_a = uint32_and(_a, mask); - const uint32_t sel_b = uint32_andc(_b, mask); - const uint32_t result = uint32_or(sel_a, sel_b); - - return (result); - } - - inline uint32_t uint32_selb(uint32_t _mask, uint32_t _a, uint32_t _b) - { - const uint32_t sel_a = uint32_and(_a, _mask); - const uint32_t sel_b = uint32_andc(_b, _mask); - const uint32_t result = uint32_or(sel_a, sel_b); - - return (result); - } - - inline uint32_t uint32_imin(uint32_t _a, uint32_t _b) - { - const uint32_t a_sub_b = uint32_sub(_a, _b); - const uint32_t result = uint32_sels(a_sub_b, _a, _b); - - return result; - } - - inline uint32_t uint32_imax(uint32_t _a, uint32_t _b) - { - const uint32_t b_sub_a = uint32_sub(_b, _a); - const uint32_t result = uint32_sels(b_sub_a, _a, _b); - - return result; - } - - inline uint32_t uint32_min(uint32_t _a, uint32_t _b) - { - return _a > _b ? _b : _a; - } - - inline uint32_t uint32_max(uint32_t _a, uint32_t _b) - { - return _a > _b ? _a : _b; - } - - inline uint32_t uint32_incwrap(uint32_t _val, uint32_t _min, uint32_t _max) - { - const uint32_t inc = uint32_inc(_val); - const uint32_t max_diff = uint32_sub(_max, _val); - const uint32_t neg_max_diff = uint32_neg(max_diff); - const uint32_t max_or = uint32_or(max_diff, neg_max_diff); - const uint32_t max_diff_nz = uint32_ext(max_or); - const uint32_t result = uint32_selb(max_diff_nz, inc, _min); - - return result; - } - - inline uint32_t uint32_decwrap(uint32_t _val, uint32_t _min, uint32_t _max) - { - const uint32_t dec = uint32_dec(_val); - const uint32_t min_diff = uint32_sub(_min, _val); - const uint32_t neg_min_diff = uint32_neg(min_diff); - const uint32_t min_or = uint32_or(min_diff, neg_min_diff); - const uint32_t min_diff_nz = uint32_ext(min_or); - const uint32_t result = uint32_selb(min_diff_nz, dec, _max); - - return result; - } - - inline uint32_t uint32_cntbits_ref(uint32_t _val) - { - const uint32_t tmp0 = uint32_srl(_val, 1); - const uint32_t tmp1 = uint32_and(tmp0, 0x55555555); - const uint32_t tmp2 = uint32_sub(_val, tmp1); - const uint32_t tmp3 = uint32_and(tmp2, 0xc30c30c3); - const uint32_t tmp4 = uint32_srl(tmp2, 2); - const uint32_t tmp5 = uint32_and(tmp4, 0xc30c30c3); - const uint32_t tmp6 = uint32_srl(tmp2, 4); - const uint32_t tmp7 = uint32_and(tmp6, 0xc30c30c3); - const uint32_t tmp8 = uint32_add(tmp3, tmp5); - const uint32_t tmp9 = uint32_add(tmp7, tmp8); - const uint32_t tmpA = uint32_srl(tmp9, 6); - const uint32_t tmpB = uint32_add(tmp9, tmpA); - const uint32_t tmpC = uint32_srl(tmpB, 12); - const uint32_t tmpD = uint32_srl(tmpB, 24); - const uint32_t tmpE = uint32_add(tmpB, tmpC); - const uint32_t tmpF = uint32_add(tmpD, tmpE); - const uint32_t result = uint32_and(tmpF, 0x3f); - - return result; - } - - /// Count number of bits set. - inline uint32_t uint32_cntbits(uint32_t _val) - { -#if BX_COMPILER_GCC - return __builtin_popcount(_val); -#elif BX_COMPILER_MSVC && BX_PLATFORM_WINDOWS - return __popcnt(_val); -#else - return uint32_cntbits_ref(_val); -#endif // BX_COMPILER_GCC - } - - inline uint32_t uint32_cntlz_ref(uint32_t _val) - { - const uint32_t tmp0 = uint32_srl(_val, 1); - const uint32_t tmp1 = uint32_or(tmp0, _val); - const uint32_t tmp2 = uint32_srl(tmp1, 2); - const uint32_t tmp3 = uint32_or(tmp2, tmp1); - const uint32_t tmp4 = uint32_srl(tmp3, 4); - const uint32_t tmp5 = uint32_or(tmp4, tmp3); - const uint32_t tmp6 = uint32_srl(tmp5, 8); - const uint32_t tmp7 = uint32_or(tmp6, tmp5); - const uint32_t tmp8 = uint32_srl(tmp7, 16); - const uint32_t tmp9 = uint32_or(tmp8, tmp7); - const uint32_t tmpA = uint32_not(tmp9); - const uint32_t result = uint32_cntbits(tmpA); - - return result; - } - - /// Count number of leading zeros. - inline uint32_t uint32_cntlz(uint32_t _val) - { -#if BX_COMPILER_GCC - return __builtin_clz(_val); -#elif BX_COMPILER_MSVC && BX_PLATFORM_WINDOWS - unsigned long index; - _BitScanReverse(&index, _val); - return 31 - index; -#else - return uint32_cntlz_ref(_val); -#endif // BX_COMPILER_ - } - - inline uint32_t uint32_cnttz_ref(uint32_t _val) - { - const uint32_t tmp0 = uint32_not(_val); - const uint32_t tmp1 = uint32_dec(_val); - const uint32_t tmp2 = uint32_and(tmp0, tmp1); - const uint32_t result = uint32_cntbits(tmp2); - - return result; - } - - inline uint32_t uint32_cnttz(uint32_t _val) - { -#if BX_COMPILER_MSVC && BX_PLATFORM_WINDOWS - unsigned long index; - _BitScanForward(&index, _val); - return index; -#else - return uint32_cnttz_ref(_val); -#endif // BX_COMPILER_ - } - - // shuffle: - // ---- ---- ---- ---- fedc ba98 7654 3210 - // to: - // -f-e -d-c -b-a -9-8 -7-6 -5-4 -3-2 -1-0 - inline uint32_t uint32_part1by1(uint32_t _a) - { - const uint32_t val = uint32_and(_a, 0xffff); - - const uint32_t tmp0 = uint32_sll(val, 8); - const uint32_t tmp1 = uint32_xor(val, tmp0); - const uint32_t tmp2 = uint32_and(tmp1, 0x00ff00ff); - - const uint32_t tmp3 = uint32_sll(tmp2, 4); - const uint32_t tmp4 = uint32_xor(tmp2, tmp3); - const uint32_t tmp5 = uint32_and(tmp4, 0x0f0f0f0f); - - const uint32_t tmp6 = uint32_sll(tmp5, 2); - const uint32_t tmp7 = uint32_xor(tmp5, tmp6); - const uint32_t tmp8 = uint32_and(tmp7, 0x33333333); - - const uint32_t tmp9 = uint32_sll(tmp8, 1); - const uint32_t tmpA = uint32_xor(tmp8, tmp9); - const uint32_t result = uint32_and(tmpA, 0x55555555); - - return result; - } - - // shuffle: - // ---- ---- ---- ---- ---- --98 7654 3210 - // to: - // ---- 9--8 --7- -6-- 5--4 --3- -2-- 1--0 - inline uint32_t uint32_part1by2(uint32_t _a) - { - const uint32_t val = uint32_and(_a, 0x3ff); - - const uint32_t tmp0 = uint32_sll(val, 16); - const uint32_t tmp1 = uint32_xor(val, tmp0); - const uint32_t tmp2 = uint32_and(tmp1, 0xff0000ff); - - const uint32_t tmp3 = uint32_sll(tmp2, 8); - const uint32_t tmp4 = uint32_xor(tmp2, tmp3); - const uint32_t tmp5 = uint32_and(tmp4, 0x0300f00f); - - const uint32_t tmp6 = uint32_sll(tmp5, 4); - const uint32_t tmp7 = uint32_xor(tmp5, tmp6); - const uint32_t tmp8 = uint32_and(tmp7, 0x030c30c3); - - const uint32_t tmp9 = uint32_sll(tmp8, 2); - const uint32_t tmpA = uint32_xor(tmp8, tmp9); - const uint32_t result = uint32_and(tmpA, 0x09249249); - - return result; - } - - inline uint32_t uint32_testpow2(uint32_t _a) - { - const uint32_t tmp0 = uint32_not(_a); - const uint32_t tmp1 = uint32_inc(tmp0); - const uint32_t tmp2 = uint32_and(_a, tmp1); - const uint32_t tmp3 = uint32_cmpeq(tmp2, _a); - const uint32_t tmp4 = uint32_cmpneq(_a, 0); - const uint32_t result = uint32_and(tmp3, tmp4); - - return result; - } - - inline uint32_t uint32_nextpow2(uint32_t _a) - { - const uint32_t tmp0 = uint32_dec(_a); - const uint32_t tmp1 = uint32_srl(tmp0, 1); - const uint32_t tmp2 = uint32_or(tmp0, tmp1); - const uint32_t tmp3 = uint32_srl(tmp2, 2); - const uint32_t tmp4 = uint32_or(tmp2, tmp3); - const uint32_t tmp5 = uint32_srl(tmp4, 4); - const uint32_t tmp6 = uint32_or(tmp4, tmp5); - const uint32_t tmp7 = uint32_srl(tmp6, 8); - const uint32_t tmp8 = uint32_or(tmp6, tmp7); - const uint32_t tmp9 = uint32_srl(tmp8, 16); - const uint32_t tmpA = uint32_or(tmp8, tmp9); - const uint32_t result = uint32_inc(tmpA); - - return result; - } - +/* + * Copyright 2010-2012 Branimir Karadzic. All rights reserved. + * License: http://www.opensource.org/licenses/BSD-2-Clause + */ + +// Copyright 2006 Mike Acton +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE + +#ifndef __BX_UINT32_T_H__ +#define __BX_UINT32_T_H__ + +#include "bx.h" + +#if BX_COMPILER_MSVC +# if BX_PLATFORM_WINDOWS +# include // math.h is included because VS bitches: + // warning C4985: 'ceil': attributes not present on previous declaration. + // must be included before intrin.h. +# include +# pragma intrinsic(_BitScanForward) +# pragma intrinsic(_BitScanReverse) +# endif // BX_PLATFORM_WINDOWS +#endif // BX_COMPILER_MSVC + +namespace bx +{ + inline uint32_t uint32_li(uint32_t _a) + { + return _a; + } + + inline uint32_t uint32_dec(uint32_t _a) + { + return _a - 1; + } + + inline uint32_t uint32_inc(uint32_t _a) + { + return _a + 1; + } + + inline uint32_t uint32_not(uint32_t _a) + { + return ~_a; + } + + inline uint32_t uint32_neg(uint32_t _a) + { + return -(int32_t)_a; + } + + inline uint32_t uint32_ext(uint32_t _a) + { + return ( (int32_t)_a)>>31; + } + + inline uint32_t uint32_and(uint32_t _a, uint32_t _b) + { + return _a & _b; + } + + inline uint32_t uint32_xor(uint32_t _a, uint32_t _b) + { + return _a ^ _b; + } + + inline uint32_t uint32_xorl(uint32_t _a, uint32_t _b) + { + return !_a != !_b; + } + + inline uint32_t uint32_andc(uint32_t _a, uint32_t _b) + { + return _a & ~_b; + } + + inline uint32_t uint32_or(uint32_t _a, uint32_t _b) + { + return _a | _b; + } + + inline uint32_t uint32_sll(uint32_t _a, int _sa) + { + return _a << _sa; + } + + inline uint32_t uint32_srl(uint32_t _a, int _sa) + { + return _a >> _sa; + } + + inline uint32_t uint32_sra(uint32_t _a, int _sa) + { + return ( (int32_t)_a) >> _sa; + } + + inline uint32_t uint32_rol(uint32_t _a, int _sa) + { + return ( _a << _sa) | (_a >> (32-_sa) ); + } + + inline uint32_t uint32_ror(uint32_t _a, int _sa) + { + return ( _a >> _sa) | (_a << (32-_sa) ); + } + + inline uint32_t uint32_add(uint32_t _a, uint32_t _b) + { + return _a + _b; + } + + inline uint32_t uint32_sub(uint32_t _a, uint32_t _b) + { + return _a - _b; + } + + inline uint32_t uint32_mul(uint32_t _a, uint32_t _b) + { + return _a * _b; + } + + inline uint32_t uint32_div(uint32_t _a, uint32_t _b) + { + return (_a / _b); + } + + inline uint32_t uint32_mod(uint32_t _a, uint32_t _b) + { + return (_a % _b); + } + + inline uint32_t uint32_cmpeq(uint32_t _a, uint32_t _b) + { + return -(_a == _b); + } + + inline uint32_t uint32_cmpneq(uint32_t _a, uint32_t _b) + { + return -(_a != _b); + } + + inline uint32_t uint32_cmplt(uint32_t _a, uint32_t _b) + { + return -(_a < _b); + } + + inline uint32_t uint32_cmple(uint32_t _a, uint32_t _b) + { + return -(_a <= _b); + } + + inline uint32_t uint32_cmpgt(uint32_t _a, uint32_t _b) + { + return -(_a > _b); + } + + inline uint32_t uint32_cmpge(uint32_t _a, uint32_t _b) + { + return -(_a >= _b); + } + + inline uint32_t uint32_setnz(uint32_t _a) + { + return -!!_a; + } + + inline uint32_t uint32_satadd(uint32_t _a, uint32_t _b) + { + const uint32_t add = uint32_add(_a, _b); + const uint32_t lt = uint32_cmplt(add, _a); + const uint32_t result = uint32_or(add, lt); + + return result; + } + + inline uint32_t uint32_satsub(uint32_t _a, uint32_t _b) + { + const uint32_t sub = uint32_sub(_a, _b); + const uint32_t le = uint32_cmple(sub, _a); + const uint32_t result = uint32_and(sub, le); + + return result; + } + + inline uint32_t uint32_satmul(uint32_t _a, uint32_t _b) + { + const uint64_t mul = (uint64_t)_a * (uint64_t)_b; + const uint32_t hi = mul >> 32; + const uint32_t nz = uint32_setnz(hi); + const uint32_t result = uint32_or(uint32_t(mul), nz); + + return result; + } + + inline uint32_t uint32_sels(uint32_t test, uint32_t _a, uint32_t _b) + { + const uint32_t mask = uint32_ext(test); + const uint32_t sel_a = uint32_and(_a, mask); + const uint32_t sel_b = uint32_andc(_b, mask); + const uint32_t result = uint32_or(sel_a, sel_b); + + return (result); + } + + inline uint32_t uint32_selb(uint32_t _mask, uint32_t _a, uint32_t _b) + { + const uint32_t sel_a = uint32_and(_a, _mask); + const uint32_t sel_b = uint32_andc(_b, _mask); + const uint32_t result = uint32_or(sel_a, sel_b); + + return (result); + } + + inline uint32_t uint32_imin(uint32_t _a, uint32_t _b) + { + const uint32_t a_sub_b = uint32_sub(_a, _b); + const uint32_t result = uint32_sels(a_sub_b, _a, _b); + + return result; + } + + inline uint32_t uint32_imax(uint32_t _a, uint32_t _b) + { + const uint32_t b_sub_a = uint32_sub(_b, _a); + const uint32_t result = uint32_sels(b_sub_a, _a, _b); + + return result; + } + + inline uint32_t uint32_min(uint32_t _a, uint32_t _b) + { + return _a > _b ? _b : _a; + } + + inline uint32_t uint32_max(uint32_t _a, uint32_t _b) + { + return _a > _b ? _a : _b; + } + + inline uint32_t uint32_incwrap(uint32_t _val, uint32_t _min, uint32_t _max) + { + const uint32_t inc = uint32_inc(_val); + const uint32_t max_diff = uint32_sub(_max, _val); + const uint32_t neg_max_diff = uint32_neg(max_diff); + const uint32_t max_or = uint32_or(max_diff, neg_max_diff); + const uint32_t max_diff_nz = uint32_ext(max_or); + const uint32_t result = uint32_selb(max_diff_nz, inc, _min); + + return result; + } + + inline uint32_t uint32_decwrap(uint32_t _val, uint32_t _min, uint32_t _max) + { + const uint32_t dec = uint32_dec(_val); + const uint32_t min_diff = uint32_sub(_min, _val); + const uint32_t neg_min_diff = uint32_neg(min_diff); + const uint32_t min_or = uint32_or(min_diff, neg_min_diff); + const uint32_t min_diff_nz = uint32_ext(min_or); + const uint32_t result = uint32_selb(min_diff_nz, dec, _max); + + return result; + } + + inline uint32_t uint32_cntbits_ref(uint32_t _val) + { + const uint32_t tmp0 = uint32_srl(_val, 1); + const uint32_t tmp1 = uint32_and(tmp0, 0x55555555); + const uint32_t tmp2 = uint32_sub(_val, tmp1); + const uint32_t tmp3 = uint32_and(tmp2, 0xc30c30c3); + const uint32_t tmp4 = uint32_srl(tmp2, 2); + const uint32_t tmp5 = uint32_and(tmp4, 0xc30c30c3); + const uint32_t tmp6 = uint32_srl(tmp2, 4); + const uint32_t tmp7 = uint32_and(tmp6, 0xc30c30c3); + const uint32_t tmp8 = uint32_add(tmp3, tmp5); + const uint32_t tmp9 = uint32_add(tmp7, tmp8); + const uint32_t tmpA = uint32_srl(tmp9, 6); + const uint32_t tmpB = uint32_add(tmp9, tmpA); + const uint32_t tmpC = uint32_srl(tmpB, 12); + const uint32_t tmpD = uint32_srl(tmpB, 24); + const uint32_t tmpE = uint32_add(tmpB, tmpC); + const uint32_t tmpF = uint32_add(tmpD, tmpE); + const uint32_t result = uint32_and(tmpF, 0x3f); + + return result; + } + + /// Count number of bits set. + inline uint32_t uint32_cntbits(uint32_t _val) + { +#if BX_COMPILER_GCC + return __builtin_popcount(_val); +#elif BX_COMPILER_MSVC && BX_PLATFORM_WINDOWS + return __popcnt(_val); +#else + return uint32_cntbits_ref(_val); +#endif // BX_COMPILER_GCC + } + + inline uint32_t uint32_cntlz_ref(uint32_t _val) + { + const uint32_t tmp0 = uint32_srl(_val, 1); + const uint32_t tmp1 = uint32_or(tmp0, _val); + const uint32_t tmp2 = uint32_srl(tmp1, 2); + const uint32_t tmp3 = uint32_or(tmp2, tmp1); + const uint32_t tmp4 = uint32_srl(tmp3, 4); + const uint32_t tmp5 = uint32_or(tmp4, tmp3); + const uint32_t tmp6 = uint32_srl(tmp5, 8); + const uint32_t tmp7 = uint32_or(tmp6, tmp5); + const uint32_t tmp8 = uint32_srl(tmp7, 16); + const uint32_t tmp9 = uint32_or(tmp8, tmp7); + const uint32_t tmpA = uint32_not(tmp9); + const uint32_t result = uint32_cntbits(tmpA); + + return result; + } + + /// Count number of leading zeros. + inline uint32_t uint32_cntlz(uint32_t _val) + { +#if BX_COMPILER_GCC + return __builtin_clz(_val); +#elif BX_COMPILER_MSVC && BX_PLATFORM_WINDOWS + unsigned long index; + _BitScanReverse(&index, _val); + return 31 - index; +#else + return uint32_cntlz_ref(_val); +#endif // BX_COMPILER_ + } + + inline uint32_t uint32_cnttz_ref(uint32_t _val) + { + const uint32_t tmp0 = uint32_not(_val); + const uint32_t tmp1 = uint32_dec(_val); + const uint32_t tmp2 = uint32_and(tmp0, tmp1); + const uint32_t result = uint32_cntbits(tmp2); + + return result; + } + + inline uint32_t uint32_cnttz(uint32_t _val) + { +#if BX_COMPILER_MSVC && BX_PLATFORM_WINDOWS + unsigned long index; + _BitScanForward(&index, _val); + return index; +#else + return uint32_cnttz_ref(_val); +#endif // BX_COMPILER_ + } + + // shuffle: + // ---- ---- ---- ---- fedc ba98 7654 3210 + // to: + // -f-e -d-c -b-a -9-8 -7-6 -5-4 -3-2 -1-0 + inline uint32_t uint32_part1by1(uint32_t _a) + { + const uint32_t val = uint32_and(_a, 0xffff); + + const uint32_t tmp0 = uint32_sll(val, 8); + const uint32_t tmp1 = uint32_xor(val, tmp0); + const uint32_t tmp2 = uint32_and(tmp1, 0x00ff00ff); + + const uint32_t tmp3 = uint32_sll(tmp2, 4); + const uint32_t tmp4 = uint32_xor(tmp2, tmp3); + const uint32_t tmp5 = uint32_and(tmp4, 0x0f0f0f0f); + + const uint32_t tmp6 = uint32_sll(tmp5, 2); + const uint32_t tmp7 = uint32_xor(tmp5, tmp6); + const uint32_t tmp8 = uint32_and(tmp7, 0x33333333); + + const uint32_t tmp9 = uint32_sll(tmp8, 1); + const uint32_t tmpA = uint32_xor(tmp8, tmp9); + const uint32_t result = uint32_and(tmpA, 0x55555555); + + return result; + } + + // shuffle: + // ---- ---- ---- ---- ---- --98 7654 3210 + // to: + // ---- 9--8 --7- -6-- 5--4 --3- -2-- 1--0 + inline uint32_t uint32_part1by2(uint32_t _a) + { + const uint32_t val = uint32_and(_a, 0x3ff); + + const uint32_t tmp0 = uint32_sll(val, 16); + const uint32_t tmp1 = uint32_xor(val, tmp0); + const uint32_t tmp2 = uint32_and(tmp1, 0xff0000ff); + + const uint32_t tmp3 = uint32_sll(tmp2, 8); + const uint32_t tmp4 = uint32_xor(tmp2, tmp3); + const uint32_t tmp5 = uint32_and(tmp4, 0x0300f00f); + + const uint32_t tmp6 = uint32_sll(tmp5, 4); + const uint32_t tmp7 = uint32_xor(tmp5, tmp6); + const uint32_t tmp8 = uint32_and(tmp7, 0x030c30c3); + + const uint32_t tmp9 = uint32_sll(tmp8, 2); + const uint32_t tmpA = uint32_xor(tmp8, tmp9); + const uint32_t result = uint32_and(tmpA, 0x09249249); + + return result; + } + + inline uint32_t uint32_testpow2(uint32_t _a) + { + const uint32_t tmp0 = uint32_not(_a); + const uint32_t tmp1 = uint32_inc(tmp0); + const uint32_t tmp2 = uint32_and(_a, tmp1); + const uint32_t tmp3 = uint32_cmpeq(tmp2, _a); + const uint32_t tmp4 = uint32_cmpneq(_a, 0); + const uint32_t result = uint32_and(tmp3, tmp4); + + return result; + } + + inline uint32_t uint32_nextpow2(uint32_t _a) + { + const uint32_t tmp0 = uint32_dec(_a); + const uint32_t tmp1 = uint32_srl(tmp0, 1); + const uint32_t tmp2 = uint32_or(tmp0, tmp1); + const uint32_t tmp3 = uint32_srl(tmp2, 2); + const uint32_t tmp4 = uint32_or(tmp2, tmp3); + const uint32_t tmp5 = uint32_srl(tmp4, 4); + const uint32_t tmp6 = uint32_or(tmp4, tmp5); + const uint32_t tmp7 = uint32_srl(tmp6, 8); + const uint32_t tmp8 = uint32_or(tmp6, tmp7); + const uint32_t tmp9 = uint32_srl(tmp8, 16); + const uint32_t tmpA = uint32_or(tmp8, tmp9); + const uint32_t result = uint32_inc(tmpA); + + return result; + } + inline uint16_t halfFromFloat(float _a) { union { uint32_t ui; float flt; } ftou; @@ -564,8 +564,8 @@ namespace bx union { uint32_t ui; float flt; } utof; utof.ui = f_result; return utof.flt; - } - -} // namespace bx - -#endif // __BX_UINT32_T_H__ + } + +} // namespace bx + +#endif // __BX_UINT32_T_H__ diff --git a/include/compat/mingw/alloca.h b/include/compat/mingw/alloca.h index 2da04de..196379c 100644 --- a/include/compat/mingw/alloca.h +++ b/include/compat/mingw/alloca.h @@ -1,6 +1,6 @@ -#ifndef __MINGW32__ALLOCA_H__ -#define __MINGW32__ALLOCA_H__ - -#include - -#endif // __MINGW32__ALLOCA_H__ +#ifndef __MINGW32__ALLOCA_H__ +#define __MINGW32__ALLOCA_H__ + +#include + +#endif // __MINGW32__ALLOCA_H__ diff --git a/include/compat/mingw/sal.h b/include/compat/mingw/sal.h index 26e4aec..a2165e8 100644 --- a/include/compat/mingw/sal.h +++ b/include/compat/mingw/sal.h @@ -1,253 +1,253 @@ -#pragma once - -#if __GNUC__ >=3 -#pragma GCC system_header -#endif - -//#define __null // << Conflicts with GCC internal type __null -#define __notnull -#define __maybenull -#define __readonly -#define __notreadonly -#define __maybereadonly -#define __valid -#define __notvalid -#define __maybevalid -#define __readableTo(extent) -#define __elem_readableTo(size) -#define __byte_readableTo(size) -#define __writableTo(size) -#define __elem_writableTo(size) -#define __byte_writableTo(size) -#define __deref -#define __pre -#define __post -#define __precond(expr) -#define __postcond(expr) -#define __exceptthat -#define __execeptthat -#define __inner_success(expr) -#define __inner_checkReturn -#define __inner_typefix(ctype) -#define __inner_override -#define __inner_callback -#define __inner_blocksOn(resource) -#define __inner_fallthrough_dec -#define __inner_fallthrough -#define __refparam -#define __inner_control_entrypoint(category) -#define __inner_data_entrypoint(category) - -#define __ecount(size) -#define __bcount(size) -#define __in -#define __in_ecount(size) -#define __in_bcount(size) -#define __in_z -#define __in_ecount_z(size) -#define __in_bcount_z(size) -#define __in_nz -#define __in_ecount_nz(size) -#define __in_bcount_nz(size) -#define __in_xcount_opt(size) -#define __out -#define __out_ecount(size) -#define __out_bcount(size) -#define __out_ecount_part(size,length) -#define __out_bcount_part(size,length) -#define __out_ecount_full(size) -#define __out_bcount_full(size) -#define __out_z -#define __out_z_opt -#define __out_ecount_z(size) -#define __out_bcount_z(size) -#define __out_ecount_part_z(size,length) -#define __out_bcount_part_z(size,length) -#define __out_ecount_full_z(size) -#define __out_bcount_full_z(size) -#define __out_nz -#define __out_nz_opt -#define __out_ecount_nz(size) -#define __out_bcount_nz(size) -#define __inout -#define __inout_ecount(size) -#define __inout_bcount(size) -#define __inout_ecount_part(size,length) -#define __inout_bcount_part(size,length) -#define __inout_ecount_full(size) -#define __inout_bcount_full(size) -#define __inout_z -#define __inout_ecount_z(size) -#define __inout_bcount_z(size) -#define __inout_nz -#define __inout_ecount_nz(size) -#define __inout_bcount_nz(size) -#define __ecount_opt(size) -#define __bcount_opt(size) -#define __in_opt -#define __in_ecount_opt(size) -#define __in_bcount_opt(size) -#define __in_z_opt -#define __in_ecount_z_opt(size) -#define __in_bcount_z_opt(size) -#define __in_nz_opt -#define __in_ecount_nz_opt(size) -#define __in_bcount_nz_opt(size) -#define __out_opt -#define __out_ecount_opt(size) -#define __out_bcount_opt(size) -#define __out_ecount_part_opt(size,length) -#define __out_bcount_part_opt(size,length) -#define __out_ecount_full_opt(size) -#define __out_bcount_full_opt(size) -#define __out_ecount_z_opt(size) -#define __out_bcount_z_opt(size) -#define __out_ecount_part_z_opt(size,length) -#define __out_bcount_part_z_opt(size,length) -#define __out_ecount_full_z_opt(size) -#define __out_bcount_full_z_opt(size) -#define __out_ecount_nz_opt(size) -#define __out_bcount_nz_opt(size) -#define __inout_opt -#define __inout_ecount_opt(size) -#define __inout_bcount_opt(size) -#define __inout_ecount_part_opt(size,length) -#define __inout_bcount_part_opt(size,length) -#define __inout_ecount_full_opt(size) -#define __inout_bcount_full_opt(size) -#define __inout_z_opt -#define __inout_ecount_z_opt(size) -#define __inout_ecount_z_opt(size) -#define __inout_bcount_z_opt(size) -#define __inout_nz_opt -#define __inout_ecount_nz_opt(size) -#define __inout_bcount_nz_opt(size) -#define __deref_ecount(size) -#define __deref_bcount(size) -#define __deref_out -#define __deref_out_ecount(size) -#define __deref_out_bcount(size) -#define __deref_out_ecount_part(size,length) -#define __deref_out_bcount_part(size,length) -#define __deref_out_ecount_full(size) -#define __deref_out_bcount_full(size) -#define __deref_out_z -#define __deref_out_ecount_z(size) -#define __deref_out_bcount_z(size) -#define __deref_out_nz -#define __deref_out_ecount_nz(size) -#define __deref_out_bcount_nz(size) -#define __deref_inout -#define __deref_inout_z -#define __deref_inout_ecount(size) -#define __deref_inout_bcount(size) -#define __deref_inout_ecount_part(size,length) -#define __deref_inout_bcount_part(size,length) -#define __deref_inout_ecount_full(size) -#define __deref_inout_bcount_full(size) -#define __deref_inout_z -#define __deref_inout_ecount_z(size) -#define __deref_inout_bcount_z(size) -#define __deref_inout_nz -#define __deref_inout_ecount_nz(size) -#define __deref_inout_bcount_nz(size) -#define __deref_ecount_opt(size) -#define __deref_bcount_opt(size) -#define __deref_out_opt -#define __deref_out_ecount_opt(size) -#define __deref_out_bcount_opt(size) -#define __deref_out_ecount_part_opt(size,length) -#define __deref_out_bcount_part_opt(size,length) -#define __deref_out_ecount_full_opt(size) -#define __deref_out_bcount_full_opt(size) -#define __deref_out_z_opt -#define __deref_out_ecount_z_opt(size) -#define __deref_out_bcount_z_opt(size) -#define __deref_out_nz_opt -#define __deref_out_ecount_nz_opt(size) -#define __deref_out_bcount_nz_opt(size) -#define __deref_inout_opt -#define __deref_inout_ecount_opt(size) -#define __deref_inout_bcount_opt(size) -#define __deref_inout_ecount_part_opt(size,length) -#define __deref_inout_bcount_part_opt(size,length) -#define __deref_inout_ecount_full_opt(size) -#define __deref_inout_bcount_full_opt(size) -#define __deref_inout_z_opt -#define __deref_inout_ecount_z_opt(size) -#define __deref_inout_bcount_z_opt(size) -#define __deref_inout_nz_opt -#define __deref_inout_ecount_nz_opt(size) -#define __deref_inout_bcount_nz_opt(size) -#define __deref_opt_ecount(size) -#define __deref_opt_bcount(size) -#define __deref_opt_out -#define __deref_opt_out_z -#define __deref_opt_out_ecount(size) -#define __deref_opt_out_bcount(size) -#define __deref_opt_out_ecount_part(size,length) -#define __deref_opt_out_bcount_part(size,length) -#define __deref_opt_out_ecount_full(size) -#define __deref_opt_out_bcount_full(size) -#define __deref_opt_inout -#define __deref_opt_inout_ecount(size) -#define __deref_opt_inout_bcount(size) -#define __deref_opt_inout_ecount_part(size,length) -#define __deref_opt_inout_bcount_part(size,length) -#define __deref_opt_inout_ecount_full(size) -#define __deref_opt_inout_bcount_full(size) -#define __deref_opt_inout_z -#define __deref_opt_inout_ecount_z(size) -#define __deref_opt_inout_bcount_z(size) -#define __deref_opt_inout_nz -#define __deref_opt_inout_ecount_nz(size) -#define __deref_opt_inout_bcount_nz(size) -#define __deref_opt_ecount_opt(size) -#define __deref_opt_bcount_opt(size) -#define __deref_opt_out_opt -#define __deref_opt_out_ecount_opt(size) -#define __deref_opt_out_bcount_opt(size) -#define __deref_opt_out_ecount_part_opt(size,length) -#define __deref_opt_out_bcount_part_opt(size,length) -#define __deref_opt_out_ecount_full_opt(size) -#define __deref_opt_out_bcount_full_opt(size) -#define __deref_opt_out_z_opt -#define __deref_opt_out_ecount_z_opt(size) -#define __deref_opt_out_bcount_z_opt(size) -#define __deref_opt_out_nz_opt -#define __deref_opt_out_ecount_nz_opt(size) -#define __deref_opt_out_bcount_nz_opt(size) -#define __deref_opt_inout_opt -#define __deref_opt_inout_ecount_opt(size) -#define __deref_opt_inout_bcount_opt(size) -#define __deref_opt_inout_ecount_part_opt(size,length) -#define __deref_opt_inout_bcount_part_opt(size,length) -#define __deref_opt_inout_ecount_full_opt(size) -#define __deref_opt_inout_bcount_full_opt(size) -#define __deref_opt_inout_z_opt -#define __deref_opt_inout_ecount_z_opt(size) -#define __deref_opt_inout_bcount_z_opt(size) -#define __deref_opt_inout_nz_opt -#define __deref_opt_inout_ecount_nz_opt(size) -#define __deref_opt_inout_bcount_nz_opt(size) - -#define __success(expr) -#define __nullterminated -#define __nullnullterminated -#define __reserved -#define __checkReturn -#define __typefix(ctype) -#define __override -#define __callback -#define __format_string -#define __blocksOn(resource) -#define __control_entrypoint(category) -#define __data_entrypoint(category) - -#ifndef __fallthrough - #define __fallthrough __inner_fallthrough -#endif - -#ifndef __analysis_assume - #define __analysis_assume(expr) -#endif +#pragma once + +#if __GNUC__ >=3 +#pragma GCC system_header +#endif + +//#define __null // << Conflicts with GCC internal type __null +#define __notnull +#define __maybenull +#define __readonly +#define __notreadonly +#define __maybereadonly +#define __valid +#define __notvalid +#define __maybevalid +#define __readableTo(extent) +#define __elem_readableTo(size) +#define __byte_readableTo(size) +#define __writableTo(size) +#define __elem_writableTo(size) +#define __byte_writableTo(size) +#define __deref +#define __pre +#define __post +#define __precond(expr) +#define __postcond(expr) +#define __exceptthat +#define __execeptthat +#define __inner_success(expr) +#define __inner_checkReturn +#define __inner_typefix(ctype) +#define __inner_override +#define __inner_callback +#define __inner_blocksOn(resource) +#define __inner_fallthrough_dec +#define __inner_fallthrough +#define __refparam +#define __inner_control_entrypoint(category) +#define __inner_data_entrypoint(category) + +#define __ecount(size) +#define __bcount(size) +#define __in +#define __in_ecount(size) +#define __in_bcount(size) +#define __in_z +#define __in_ecount_z(size) +#define __in_bcount_z(size) +#define __in_nz +#define __in_ecount_nz(size) +#define __in_bcount_nz(size) +#define __in_xcount_opt(size) +#define __out +#define __out_ecount(size) +#define __out_bcount(size) +#define __out_ecount_part(size,length) +#define __out_bcount_part(size,length) +#define __out_ecount_full(size) +#define __out_bcount_full(size) +#define __out_z +#define __out_z_opt +#define __out_ecount_z(size) +#define __out_bcount_z(size) +#define __out_ecount_part_z(size,length) +#define __out_bcount_part_z(size,length) +#define __out_ecount_full_z(size) +#define __out_bcount_full_z(size) +#define __out_nz +#define __out_nz_opt +#define __out_ecount_nz(size) +#define __out_bcount_nz(size) +#define __inout +#define __inout_ecount(size) +#define __inout_bcount(size) +#define __inout_ecount_part(size,length) +#define __inout_bcount_part(size,length) +#define __inout_ecount_full(size) +#define __inout_bcount_full(size) +#define __inout_z +#define __inout_ecount_z(size) +#define __inout_bcount_z(size) +#define __inout_nz +#define __inout_ecount_nz(size) +#define __inout_bcount_nz(size) +#define __ecount_opt(size) +#define __bcount_opt(size) +#define __in_opt +#define __in_ecount_opt(size) +#define __in_bcount_opt(size) +#define __in_z_opt +#define __in_ecount_z_opt(size) +#define __in_bcount_z_opt(size) +#define __in_nz_opt +#define __in_ecount_nz_opt(size) +#define __in_bcount_nz_opt(size) +#define __out_opt +#define __out_ecount_opt(size) +#define __out_bcount_opt(size) +#define __out_ecount_part_opt(size,length) +#define __out_bcount_part_opt(size,length) +#define __out_ecount_full_opt(size) +#define __out_bcount_full_opt(size) +#define __out_ecount_z_opt(size) +#define __out_bcount_z_opt(size) +#define __out_ecount_part_z_opt(size,length) +#define __out_bcount_part_z_opt(size,length) +#define __out_ecount_full_z_opt(size) +#define __out_bcount_full_z_opt(size) +#define __out_ecount_nz_opt(size) +#define __out_bcount_nz_opt(size) +#define __inout_opt +#define __inout_ecount_opt(size) +#define __inout_bcount_opt(size) +#define __inout_ecount_part_opt(size,length) +#define __inout_bcount_part_opt(size,length) +#define __inout_ecount_full_opt(size) +#define __inout_bcount_full_opt(size) +#define __inout_z_opt +#define __inout_ecount_z_opt(size) +#define __inout_ecount_z_opt(size) +#define __inout_bcount_z_opt(size) +#define __inout_nz_opt +#define __inout_ecount_nz_opt(size) +#define __inout_bcount_nz_opt(size) +#define __deref_ecount(size) +#define __deref_bcount(size) +#define __deref_out +#define __deref_out_ecount(size) +#define __deref_out_bcount(size) +#define __deref_out_ecount_part(size,length) +#define __deref_out_bcount_part(size,length) +#define __deref_out_ecount_full(size) +#define __deref_out_bcount_full(size) +#define __deref_out_z +#define __deref_out_ecount_z(size) +#define __deref_out_bcount_z(size) +#define __deref_out_nz +#define __deref_out_ecount_nz(size) +#define __deref_out_bcount_nz(size) +#define __deref_inout +#define __deref_inout_z +#define __deref_inout_ecount(size) +#define __deref_inout_bcount(size) +#define __deref_inout_ecount_part(size,length) +#define __deref_inout_bcount_part(size,length) +#define __deref_inout_ecount_full(size) +#define __deref_inout_bcount_full(size) +#define __deref_inout_z +#define __deref_inout_ecount_z(size) +#define __deref_inout_bcount_z(size) +#define __deref_inout_nz +#define __deref_inout_ecount_nz(size) +#define __deref_inout_bcount_nz(size) +#define __deref_ecount_opt(size) +#define __deref_bcount_opt(size) +#define __deref_out_opt +#define __deref_out_ecount_opt(size) +#define __deref_out_bcount_opt(size) +#define __deref_out_ecount_part_opt(size,length) +#define __deref_out_bcount_part_opt(size,length) +#define __deref_out_ecount_full_opt(size) +#define __deref_out_bcount_full_opt(size) +#define __deref_out_z_opt +#define __deref_out_ecount_z_opt(size) +#define __deref_out_bcount_z_opt(size) +#define __deref_out_nz_opt +#define __deref_out_ecount_nz_opt(size) +#define __deref_out_bcount_nz_opt(size) +#define __deref_inout_opt +#define __deref_inout_ecount_opt(size) +#define __deref_inout_bcount_opt(size) +#define __deref_inout_ecount_part_opt(size,length) +#define __deref_inout_bcount_part_opt(size,length) +#define __deref_inout_ecount_full_opt(size) +#define __deref_inout_bcount_full_opt(size) +#define __deref_inout_z_opt +#define __deref_inout_ecount_z_opt(size) +#define __deref_inout_bcount_z_opt(size) +#define __deref_inout_nz_opt +#define __deref_inout_ecount_nz_opt(size) +#define __deref_inout_bcount_nz_opt(size) +#define __deref_opt_ecount(size) +#define __deref_opt_bcount(size) +#define __deref_opt_out +#define __deref_opt_out_z +#define __deref_opt_out_ecount(size) +#define __deref_opt_out_bcount(size) +#define __deref_opt_out_ecount_part(size,length) +#define __deref_opt_out_bcount_part(size,length) +#define __deref_opt_out_ecount_full(size) +#define __deref_opt_out_bcount_full(size) +#define __deref_opt_inout +#define __deref_opt_inout_ecount(size) +#define __deref_opt_inout_bcount(size) +#define __deref_opt_inout_ecount_part(size,length) +#define __deref_opt_inout_bcount_part(size,length) +#define __deref_opt_inout_ecount_full(size) +#define __deref_opt_inout_bcount_full(size) +#define __deref_opt_inout_z +#define __deref_opt_inout_ecount_z(size) +#define __deref_opt_inout_bcount_z(size) +#define __deref_opt_inout_nz +#define __deref_opt_inout_ecount_nz(size) +#define __deref_opt_inout_bcount_nz(size) +#define __deref_opt_ecount_opt(size) +#define __deref_opt_bcount_opt(size) +#define __deref_opt_out_opt +#define __deref_opt_out_ecount_opt(size) +#define __deref_opt_out_bcount_opt(size) +#define __deref_opt_out_ecount_part_opt(size,length) +#define __deref_opt_out_bcount_part_opt(size,length) +#define __deref_opt_out_ecount_full_opt(size) +#define __deref_opt_out_bcount_full_opt(size) +#define __deref_opt_out_z_opt +#define __deref_opt_out_ecount_z_opt(size) +#define __deref_opt_out_bcount_z_opt(size) +#define __deref_opt_out_nz_opt +#define __deref_opt_out_ecount_nz_opt(size) +#define __deref_opt_out_bcount_nz_opt(size) +#define __deref_opt_inout_opt +#define __deref_opt_inout_ecount_opt(size) +#define __deref_opt_inout_bcount_opt(size) +#define __deref_opt_inout_ecount_part_opt(size,length) +#define __deref_opt_inout_bcount_part_opt(size,length) +#define __deref_opt_inout_ecount_full_opt(size) +#define __deref_opt_inout_bcount_full_opt(size) +#define __deref_opt_inout_z_opt +#define __deref_opt_inout_ecount_z_opt(size) +#define __deref_opt_inout_bcount_z_opt(size) +#define __deref_opt_inout_nz_opt +#define __deref_opt_inout_ecount_nz_opt(size) +#define __deref_opt_inout_bcount_nz_opt(size) + +#define __success(expr) +#define __nullterminated +#define __nullnullterminated +#define __reserved +#define __checkReturn +#define __typefix(ctype) +#define __override +#define __callback +#define __format_string +#define __blocksOn(resource) +#define __control_entrypoint(category) +#define __data_entrypoint(category) + +#ifndef __fallthrough + #define __fallthrough __inner_fallthrough +#endif + +#ifndef __analysis_assume + #define __analysis_assume(expr) +#endif diff --git a/include/compat/mingw/specstrings_strict.h b/include/compat/mingw/specstrings_strict.h index 982dd20..bb2b90c 100644 --- a/include/compat/mingw/specstrings_strict.h +++ b/include/compat/mingw/specstrings_strict.h @@ -1 +1 @@ -#define __reserved +#define __reserved diff --git a/include/compat/mingw/specstrings_undef.h b/include/compat/mingw/specstrings_undef.h index 69d4315..82ed3f7 100644 --- a/include/compat/mingw/specstrings_undef.h +++ b/include/compat/mingw/specstrings_undef.h @@ -1,2 +1,2 @@ -#undef __reserved - +#undef __reserved + diff --git a/include/compat/msvc/alloca.h b/include/compat/msvc/alloca.h index f8fa6f1..c0d7985 100644 --- a/include/compat/msvc/alloca.h +++ b/include/compat/msvc/alloca.h @@ -1 +1 @@ -#include +#include diff --git a/include/compat/msvc/inttypes.h b/include/compat/msvc/inttypes.h index 2554277..4b3828a 100644 --- a/include/compat/msvc/inttypes.h +++ b/include/compat/msvc/inttypes.h @@ -1,305 +1,305 @@ -// ISO C9x compliant inttypes.h for Microsoft Visual Studio -// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 -// -// Copyright (c) 2006 Alexander Chemeris -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// 1. Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. The name of the author may be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED -// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO -// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; -// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, -// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR -// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF -// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -/////////////////////////////////////////////////////////////////////////////// - -#ifndef _MSC_VER // [ -#error "Use this header only with Microsoft Visual C++ compilers!" -#endif // _MSC_VER ] - -#ifndef _MSC_INTTYPES_H_ // [ -#define _MSC_INTTYPES_H_ - -#if _MSC_VER > 1000 -#pragma once -#endif - -#include "stdint.h" - -// 7.8 Format conversion of integer types - -typedef struct { - intmax_t quot; - intmax_t rem; -} imaxdiv_t; - -// 7.8.1 Macros for format specifiers - -#if !defined(__cplusplus) || defined(__STDC_FORMAT_MACROS) // [ See footnote 185 at page 198 - -// The fprintf macros for signed integers are: -#define PRId8 "d" -#define PRIi8 "i" -#define PRIdLEAST8 "d" -#define PRIiLEAST8 "i" -#define PRIdFAST8 "d" -#define PRIiFAST8 "i" - -#define PRId16 "hd" -#define PRIi16 "hi" -#define PRIdLEAST16 "hd" -#define PRIiLEAST16 "hi" -#define PRIdFAST16 "hd" -#define PRIiFAST16 "hi" - -#define PRId32 "I32d" -#define PRIi32 "I32i" -#define PRIdLEAST32 "I32d" -#define PRIiLEAST32 "I32i" -#define PRIdFAST32 "I32d" -#define PRIiFAST32 "I32i" - -#define PRId64 "I64d" -#define PRIi64 "I64i" -#define PRIdLEAST64 "I64d" -#define PRIiLEAST64 "I64i" -#define PRIdFAST64 "I64d" -#define PRIiFAST64 "I64i" - -#define PRIdMAX "I64d" -#define PRIiMAX "I64i" - -#define PRIdPTR "Id" -#define PRIiPTR "Ii" - -// The fprintf macros for unsigned integers are: -#define PRIo8 "o" -#define PRIu8 "u" -#define PRIx8 "x" -#define PRIX8 "X" -#define PRIoLEAST8 "o" -#define PRIuLEAST8 "u" -#define PRIxLEAST8 "x" -#define PRIXLEAST8 "X" -#define PRIoFAST8 "o" -#define PRIuFAST8 "u" -#define PRIxFAST8 "x" -#define PRIXFAST8 "X" - -#define PRIo16 "ho" -#define PRIu16 "hu" -#define PRIx16 "hx" -#define PRIX16 "hX" -#define PRIoLEAST16 "ho" -#define PRIuLEAST16 "hu" -#define PRIxLEAST16 "hx" -#define PRIXLEAST16 "hX" -#define PRIoFAST16 "ho" -#define PRIuFAST16 "hu" -#define PRIxFAST16 "hx" -#define PRIXFAST16 "hX" - -#define PRIo32 "I32o" -#define PRIu32 "I32u" -#define PRIx32 "I32x" -#define PRIX32 "I32X" -#define PRIoLEAST32 "I32o" -#define PRIuLEAST32 "I32u" -#define PRIxLEAST32 "I32x" -#define PRIXLEAST32 "I32X" -#define PRIoFAST32 "I32o" -#define PRIuFAST32 "I32u" -#define PRIxFAST32 "I32x" -#define PRIXFAST32 "I32X" - -#define PRIo64 "I64o" -#define PRIu64 "I64u" -#define PRIx64 "I64x" -#define PRIX64 "I64X" -#define PRIoLEAST64 "I64o" -#define PRIuLEAST64 "I64u" -#define PRIxLEAST64 "I64x" -#define PRIXLEAST64 "I64X" -#define PRIoFAST64 "I64o" -#define PRIuFAST64 "I64u" -#define PRIxFAST64 "I64x" -#define PRIXFAST64 "I64X" - -#define PRIoMAX "I64o" -#define PRIuMAX "I64u" -#define PRIxMAX "I64x" -#define PRIXMAX "I64X" - -#define PRIoPTR "Io" -#define PRIuPTR "Iu" -#define PRIxPTR "Ix" -#define PRIXPTR "IX" - -// The fscanf macros for signed integers are: -#define SCNd8 "d" -#define SCNi8 "i" -#define SCNdLEAST8 "d" -#define SCNiLEAST8 "i" -#define SCNdFAST8 "d" -#define SCNiFAST8 "i" - -#define SCNd16 "hd" -#define SCNi16 "hi" -#define SCNdLEAST16 "hd" -#define SCNiLEAST16 "hi" -#define SCNdFAST16 "hd" -#define SCNiFAST16 "hi" - -#define SCNd32 "ld" -#define SCNi32 "li" -#define SCNdLEAST32 "ld" -#define SCNiLEAST32 "li" -#define SCNdFAST32 "ld" -#define SCNiFAST32 "li" - -#define SCNd64 "I64d" -#define SCNi64 "I64i" -#define SCNdLEAST64 "I64d" -#define SCNiLEAST64 "I64i" -#define SCNdFAST64 "I64d" -#define SCNiFAST64 "I64i" - -#define SCNdMAX "I64d" -#define SCNiMAX "I64i" - -#ifdef _WIN64 // [ -# define SCNdPTR "I64d" -# define SCNiPTR "I64i" -#else // _WIN64 ][ -# define SCNdPTR "ld" -# define SCNiPTR "li" -#endif // _WIN64 ] - -// The fscanf macros for unsigned integers are: -#define SCNo8 "o" -#define SCNu8 "u" -#define SCNx8 "x" -#define SCNX8 "X" -#define SCNoLEAST8 "o" -#define SCNuLEAST8 "u" -#define SCNxLEAST8 "x" -#define SCNXLEAST8 "X" -#define SCNoFAST8 "o" -#define SCNuFAST8 "u" -#define SCNxFAST8 "x" -#define SCNXFAST8 "X" - -#define SCNo16 "ho" -#define SCNu16 "hu" -#define SCNx16 "hx" -#define SCNX16 "hX" -#define SCNoLEAST16 "ho" -#define SCNuLEAST16 "hu" -#define SCNxLEAST16 "hx" -#define SCNXLEAST16 "hX" -#define SCNoFAST16 "ho" -#define SCNuFAST16 "hu" -#define SCNxFAST16 "hx" -#define SCNXFAST16 "hX" - -#define SCNo32 "lo" -#define SCNu32 "lu" -#define SCNx32 "lx" -#define SCNX32 "lX" -#define SCNoLEAST32 "lo" -#define SCNuLEAST32 "lu" -#define SCNxLEAST32 "lx" -#define SCNXLEAST32 "lX" -#define SCNoFAST32 "lo" -#define SCNuFAST32 "lu" -#define SCNxFAST32 "lx" -#define SCNXFAST32 "lX" - -#define SCNo64 "I64o" -#define SCNu64 "I64u" -#define SCNx64 "I64x" -#define SCNX64 "I64X" -#define SCNoLEAST64 "I64o" -#define SCNuLEAST64 "I64u" -#define SCNxLEAST64 "I64x" -#define SCNXLEAST64 "I64X" -#define SCNoFAST64 "I64o" -#define SCNuFAST64 "I64u" -#define SCNxFAST64 "I64x" -#define SCNXFAST64 "I64X" - -#define SCNoMAX "I64o" -#define SCNuMAX "I64u" -#define SCNxMAX "I64x" -#define SCNXMAX "I64X" - -#ifdef _WIN64 // [ -# define SCNoPTR "I64o" -# define SCNuPTR "I64u" -# define SCNxPTR "I64x" -# define SCNXPTR "I64X" -#else // _WIN64 ][ -# define SCNoPTR "lo" -# define SCNuPTR "lu" -# define SCNxPTR "lx" -# define SCNXPTR "lX" -#endif // _WIN64 ] - -#endif // __STDC_FORMAT_MACROS ] - -// 7.8.2 Functions for greatest-width integer types - -// 7.8.2.1 The imaxabs function -#define imaxabs _abs64 - -// 7.8.2.2 The imaxdiv function - -// This is modified version of div() function from Microsoft's div.c found -// in %MSVC.NET%\crt\src\div.c -#ifdef STATIC_IMAXDIV // [ -static -#else // STATIC_IMAXDIV ][ -_inline -#endif // STATIC_IMAXDIV ] -imaxdiv_t __cdecl imaxdiv(intmax_t numer, intmax_t denom) -{ - imaxdiv_t result; - - result.quot = numer / denom; - result.rem = numer % denom; - - if (numer < 0 && result.rem > 0) { - // did division wrong; must fix up - ++result.quot; - result.rem -= denom; - } - - return result; -} - -// 7.8.2.3 The strtoimax and strtoumax functions -#define strtoimax _strtoi64 -#define strtoumax _strtoui64 - -// 7.8.2.4 The wcstoimax and wcstoumax functions -#define wcstoimax _wcstoi64 -#define wcstoumax _wcstoui64 - - -#endif // _MSC_INTTYPES_H_ ] +// ISO C9x compliant inttypes.h for Microsoft Visual Studio +// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 +// +// Copyright (c) 2006 Alexander Chemeris +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. The name of the author may be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED +// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO +// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef _MSC_VER // [ +#error "Use this header only with Microsoft Visual C++ compilers!" +#endif // _MSC_VER ] + +#ifndef _MSC_INTTYPES_H_ // [ +#define _MSC_INTTYPES_H_ + +#if _MSC_VER > 1000 +#pragma once +#endif + +#include "stdint.h" + +// 7.8 Format conversion of integer types + +typedef struct { + intmax_t quot; + intmax_t rem; +} imaxdiv_t; + +// 7.8.1 Macros for format specifiers + +#if !defined(__cplusplus) || defined(__STDC_FORMAT_MACROS) // [ See footnote 185 at page 198 + +// The fprintf macros for signed integers are: +#define PRId8 "d" +#define PRIi8 "i" +#define PRIdLEAST8 "d" +#define PRIiLEAST8 "i" +#define PRIdFAST8 "d" +#define PRIiFAST8 "i" + +#define PRId16 "hd" +#define PRIi16 "hi" +#define PRIdLEAST16 "hd" +#define PRIiLEAST16 "hi" +#define PRIdFAST16 "hd" +#define PRIiFAST16 "hi" + +#define PRId32 "I32d" +#define PRIi32 "I32i" +#define PRIdLEAST32 "I32d" +#define PRIiLEAST32 "I32i" +#define PRIdFAST32 "I32d" +#define PRIiFAST32 "I32i" + +#define PRId64 "I64d" +#define PRIi64 "I64i" +#define PRIdLEAST64 "I64d" +#define PRIiLEAST64 "I64i" +#define PRIdFAST64 "I64d" +#define PRIiFAST64 "I64i" + +#define PRIdMAX "I64d" +#define PRIiMAX "I64i" + +#define PRIdPTR "Id" +#define PRIiPTR "Ii" + +// The fprintf macros for unsigned integers are: +#define PRIo8 "o" +#define PRIu8 "u" +#define PRIx8 "x" +#define PRIX8 "X" +#define PRIoLEAST8 "o" +#define PRIuLEAST8 "u" +#define PRIxLEAST8 "x" +#define PRIXLEAST8 "X" +#define PRIoFAST8 "o" +#define PRIuFAST8 "u" +#define PRIxFAST8 "x" +#define PRIXFAST8 "X" + +#define PRIo16 "ho" +#define PRIu16 "hu" +#define PRIx16 "hx" +#define PRIX16 "hX" +#define PRIoLEAST16 "ho" +#define PRIuLEAST16 "hu" +#define PRIxLEAST16 "hx" +#define PRIXLEAST16 "hX" +#define PRIoFAST16 "ho" +#define PRIuFAST16 "hu" +#define PRIxFAST16 "hx" +#define PRIXFAST16 "hX" + +#define PRIo32 "I32o" +#define PRIu32 "I32u" +#define PRIx32 "I32x" +#define PRIX32 "I32X" +#define PRIoLEAST32 "I32o" +#define PRIuLEAST32 "I32u" +#define PRIxLEAST32 "I32x" +#define PRIXLEAST32 "I32X" +#define PRIoFAST32 "I32o" +#define PRIuFAST32 "I32u" +#define PRIxFAST32 "I32x" +#define PRIXFAST32 "I32X" + +#define PRIo64 "I64o" +#define PRIu64 "I64u" +#define PRIx64 "I64x" +#define PRIX64 "I64X" +#define PRIoLEAST64 "I64o" +#define PRIuLEAST64 "I64u" +#define PRIxLEAST64 "I64x" +#define PRIXLEAST64 "I64X" +#define PRIoFAST64 "I64o" +#define PRIuFAST64 "I64u" +#define PRIxFAST64 "I64x" +#define PRIXFAST64 "I64X" + +#define PRIoMAX "I64o" +#define PRIuMAX "I64u" +#define PRIxMAX "I64x" +#define PRIXMAX "I64X" + +#define PRIoPTR "Io" +#define PRIuPTR "Iu" +#define PRIxPTR "Ix" +#define PRIXPTR "IX" + +// The fscanf macros for signed integers are: +#define SCNd8 "d" +#define SCNi8 "i" +#define SCNdLEAST8 "d" +#define SCNiLEAST8 "i" +#define SCNdFAST8 "d" +#define SCNiFAST8 "i" + +#define SCNd16 "hd" +#define SCNi16 "hi" +#define SCNdLEAST16 "hd" +#define SCNiLEAST16 "hi" +#define SCNdFAST16 "hd" +#define SCNiFAST16 "hi" + +#define SCNd32 "ld" +#define SCNi32 "li" +#define SCNdLEAST32 "ld" +#define SCNiLEAST32 "li" +#define SCNdFAST32 "ld" +#define SCNiFAST32 "li" + +#define SCNd64 "I64d" +#define SCNi64 "I64i" +#define SCNdLEAST64 "I64d" +#define SCNiLEAST64 "I64i" +#define SCNdFAST64 "I64d" +#define SCNiFAST64 "I64i" + +#define SCNdMAX "I64d" +#define SCNiMAX "I64i" + +#ifdef _WIN64 // [ +# define SCNdPTR "I64d" +# define SCNiPTR "I64i" +#else // _WIN64 ][ +# define SCNdPTR "ld" +# define SCNiPTR "li" +#endif // _WIN64 ] + +// The fscanf macros for unsigned integers are: +#define SCNo8 "o" +#define SCNu8 "u" +#define SCNx8 "x" +#define SCNX8 "X" +#define SCNoLEAST8 "o" +#define SCNuLEAST8 "u" +#define SCNxLEAST8 "x" +#define SCNXLEAST8 "X" +#define SCNoFAST8 "o" +#define SCNuFAST8 "u" +#define SCNxFAST8 "x" +#define SCNXFAST8 "X" + +#define SCNo16 "ho" +#define SCNu16 "hu" +#define SCNx16 "hx" +#define SCNX16 "hX" +#define SCNoLEAST16 "ho" +#define SCNuLEAST16 "hu" +#define SCNxLEAST16 "hx" +#define SCNXLEAST16 "hX" +#define SCNoFAST16 "ho" +#define SCNuFAST16 "hu" +#define SCNxFAST16 "hx" +#define SCNXFAST16 "hX" + +#define SCNo32 "lo" +#define SCNu32 "lu" +#define SCNx32 "lx" +#define SCNX32 "lX" +#define SCNoLEAST32 "lo" +#define SCNuLEAST32 "lu" +#define SCNxLEAST32 "lx" +#define SCNXLEAST32 "lX" +#define SCNoFAST32 "lo" +#define SCNuFAST32 "lu" +#define SCNxFAST32 "lx" +#define SCNXFAST32 "lX" + +#define SCNo64 "I64o" +#define SCNu64 "I64u" +#define SCNx64 "I64x" +#define SCNX64 "I64X" +#define SCNoLEAST64 "I64o" +#define SCNuLEAST64 "I64u" +#define SCNxLEAST64 "I64x" +#define SCNXLEAST64 "I64X" +#define SCNoFAST64 "I64o" +#define SCNuFAST64 "I64u" +#define SCNxFAST64 "I64x" +#define SCNXFAST64 "I64X" + +#define SCNoMAX "I64o" +#define SCNuMAX "I64u" +#define SCNxMAX "I64x" +#define SCNXMAX "I64X" + +#ifdef _WIN64 // [ +# define SCNoPTR "I64o" +# define SCNuPTR "I64u" +# define SCNxPTR "I64x" +# define SCNXPTR "I64X" +#else // _WIN64 ][ +# define SCNoPTR "lo" +# define SCNuPTR "lu" +# define SCNxPTR "lx" +# define SCNXPTR "lX" +#endif // _WIN64 ] + +#endif // __STDC_FORMAT_MACROS ] + +// 7.8.2 Functions for greatest-width integer types + +// 7.8.2.1 The imaxabs function +#define imaxabs _abs64 + +// 7.8.2.2 The imaxdiv function + +// This is modified version of div() function from Microsoft's div.c found +// in %MSVC.NET%\crt\src\div.c +#ifdef STATIC_IMAXDIV // [ +static +#else // STATIC_IMAXDIV ][ +_inline +#endif // STATIC_IMAXDIV ] +imaxdiv_t __cdecl imaxdiv(intmax_t numer, intmax_t denom) +{ + imaxdiv_t result; + + result.quot = numer / denom; + result.rem = numer % denom; + + if (numer < 0 && result.rem > 0) { + // did division wrong; must fix up + ++result.quot; + result.rem -= denom; + } + + return result; +} + +// 7.8.2.3 The strtoimax and strtoumax functions +#define strtoimax _strtoi64 +#define strtoumax _strtoui64 + +// 7.8.2.4 The wcstoimax and wcstoumax functions +#define wcstoimax _wcstoi64 +#define wcstoumax _wcstoui64 + + +#endif // _MSC_INTTYPES_H_ ] diff --git a/include/compat/msvc/stdint.h b/include/compat/msvc/stdint.h index 59d0673..d02608a 100644 --- a/include/compat/msvc/stdint.h +++ b/include/compat/msvc/stdint.h @@ -1,247 +1,247 @@ -// ISO C9x compliant stdint.h for Microsoft Visual Studio -// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 -// -// Copyright (c) 2006-2008 Alexander Chemeris -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// 1. Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. The name of the author may be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED -// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO -// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; -// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, -// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR -// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF -// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -/////////////////////////////////////////////////////////////////////////////// - -#ifndef _MSC_VER // [ -#error "Use this header only with Microsoft Visual C++ compilers!" -#endif // _MSC_VER ] - -#ifndef _MSC_STDINT_H_ // [ -#define _MSC_STDINT_H_ - -#if _MSC_VER > 1000 -#pragma once -#endif - -#include - -// For Visual Studio 6 in C++ mode and for many Visual Studio versions when -// compiling for ARM we should wrap include with 'extern "C++" {}' -// or compiler give many errors like this: -// error C2733: second C linkage of overloaded function 'wmemchr' not allowed -#ifdef __cplusplus -extern "C" { -#endif -# include -#ifdef __cplusplus -} -#endif - -// Define _W64 macros to mark types changing their size, like intptr_t. -#ifndef _W64 -# if !defined(__midl) && (defined(_X86_) || defined(_M_IX86)) && _MSC_VER >= 1300 -# define _W64 __w64 -# else -# define _W64 -# endif -#endif - - -// 7.18.1 Integer types - -// 7.18.1.1 Exact-width integer types - -// Visual Studio 6 and Embedded Visual C++ 4 doesn't -// realize that, e.g. char has the same size as __int8 -// so we give up on __intX for them. -#if (_MSC_VER < 1300) - typedef signed char int8_t; - typedef signed short int16_t; - typedef signed int int32_t; - typedef unsigned char uint8_t; - typedef unsigned short uint16_t; - typedef unsigned int uint32_t; -#else - typedef signed __int8 int8_t; - typedef signed __int16 int16_t; - typedef signed __int32 int32_t; - typedef unsigned __int8 uint8_t; - typedef unsigned __int16 uint16_t; - typedef unsigned __int32 uint32_t; -#endif -typedef signed __int64 int64_t; -typedef unsigned __int64 uint64_t; - - -// 7.18.1.2 Minimum-width integer types -typedef int8_t int_least8_t; -typedef int16_t int_least16_t; -typedef int32_t int_least32_t; -typedef int64_t int_least64_t; -typedef uint8_t uint_least8_t; -typedef uint16_t uint_least16_t; -typedef uint32_t uint_least32_t; -typedef uint64_t uint_least64_t; - -// 7.18.1.3 Fastest minimum-width integer types -typedef int8_t int_fast8_t; -typedef int16_t int_fast16_t; -typedef int32_t int_fast32_t; -typedef int64_t int_fast64_t; -typedef uint8_t uint_fast8_t; -typedef uint16_t uint_fast16_t; -typedef uint32_t uint_fast32_t; -typedef uint64_t uint_fast64_t; - -// 7.18.1.4 Integer types capable of holding object pointers -#ifdef _WIN64 // [ - typedef signed __int64 intptr_t; - typedef unsigned __int64 uintptr_t; -#else // _WIN64 ][ - typedef _W64 signed int intptr_t; - typedef _W64 unsigned int uintptr_t; -#endif // _WIN64 ] - -// 7.18.1.5 Greatest-width integer types -typedef int64_t intmax_t; -typedef uint64_t uintmax_t; - - -// 7.18.2 Limits of specified-width integer types - -#if !defined(__cplusplus) || defined(__STDC_LIMIT_MACROS) // [ See footnote 220 at page 257 and footnote 221 at page 259 - -// 7.18.2.1 Limits of exact-width integer types -#define INT8_MIN ((int8_t)_I8_MIN) -#define INT8_MAX _I8_MAX -#define INT16_MIN ((int16_t)_I16_MIN) -#define INT16_MAX _I16_MAX -#define INT32_MIN ((int32_t)_I32_MIN) -#define INT32_MAX _I32_MAX -#define INT64_MIN ((int64_t)_I64_MIN) -#define INT64_MAX _I64_MAX -#define UINT8_MAX _UI8_MAX -#define UINT16_MAX _UI16_MAX -#define UINT32_MAX _UI32_MAX -#define UINT64_MAX _UI64_MAX - -// 7.18.2.2 Limits of minimum-width integer types -#define INT_LEAST8_MIN INT8_MIN -#define INT_LEAST8_MAX INT8_MAX -#define INT_LEAST16_MIN INT16_MIN -#define INT_LEAST16_MAX INT16_MAX -#define INT_LEAST32_MIN INT32_MIN -#define INT_LEAST32_MAX INT32_MAX -#define INT_LEAST64_MIN INT64_MIN -#define INT_LEAST64_MAX INT64_MAX -#define UINT_LEAST8_MAX UINT8_MAX -#define UINT_LEAST16_MAX UINT16_MAX -#define UINT_LEAST32_MAX UINT32_MAX -#define UINT_LEAST64_MAX UINT64_MAX - -// 7.18.2.3 Limits of fastest minimum-width integer types -#define INT_FAST8_MIN INT8_MIN -#define INT_FAST8_MAX INT8_MAX -#define INT_FAST16_MIN INT16_MIN -#define INT_FAST16_MAX INT16_MAX -#define INT_FAST32_MIN INT32_MIN -#define INT_FAST32_MAX INT32_MAX -#define INT_FAST64_MIN INT64_MIN -#define INT_FAST64_MAX INT64_MAX -#define UINT_FAST8_MAX UINT8_MAX -#define UINT_FAST16_MAX UINT16_MAX -#define UINT_FAST32_MAX UINT32_MAX -#define UINT_FAST64_MAX UINT64_MAX - -// 7.18.2.4 Limits of integer types capable of holding object pointers -#ifdef _WIN64 // [ -# define INTPTR_MIN INT64_MIN -# define INTPTR_MAX INT64_MAX -# define UINTPTR_MAX UINT64_MAX -#else // _WIN64 ][ -# define INTPTR_MIN INT32_MIN -# define INTPTR_MAX INT32_MAX -# define UINTPTR_MAX UINT32_MAX -#endif // _WIN64 ] - -// 7.18.2.5 Limits of greatest-width integer types -#define INTMAX_MIN INT64_MIN -#define INTMAX_MAX INT64_MAX -#define UINTMAX_MAX UINT64_MAX - -// 7.18.3 Limits of other integer types - -#ifdef _WIN64 // [ -# define PTRDIFF_MIN _I64_MIN -# define PTRDIFF_MAX _I64_MAX -#else // _WIN64 ][ -# define PTRDIFF_MIN _I32_MIN -# define PTRDIFF_MAX _I32_MAX -#endif // _WIN64 ] - -#define SIG_ATOMIC_MIN INT_MIN -#define SIG_ATOMIC_MAX INT_MAX - -#ifndef SIZE_MAX // [ -# ifdef _WIN64 // [ -# define SIZE_MAX _UI64_MAX -# else // _WIN64 ][ -# define SIZE_MAX _UI32_MAX -# endif // _WIN64 ] -#endif // SIZE_MAX ] - -// WCHAR_MIN and WCHAR_MAX are also defined in -#ifndef WCHAR_MIN // [ -# define WCHAR_MIN 0 -#endif // WCHAR_MIN ] -#ifndef WCHAR_MAX // [ -# define WCHAR_MAX _UI16_MAX -#endif // WCHAR_MAX ] - -#define WINT_MIN 0 -#define WINT_MAX _UI16_MAX - -#endif // __STDC_LIMIT_MACROS ] - - -// 7.18.4 Limits of other integer types - -#if !defined(__cplusplus) || defined(__STDC_CONSTANT_MACROS) // [ See footnote 224 at page 260 - -// 7.18.4.1 Macros for minimum-width integer constants - -#define INT8_C(val) val##i8 -#define INT16_C(val) val##i16 -#define INT32_C(val) val##i32 -#define INT64_C(val) val##i64 - -#define UINT8_C(val) val##ui8 -#define UINT16_C(val) val##ui16 -#define UINT32_C(val) val##ui32 -#define UINT64_C(val) val##ui64 - -// 7.18.4.2 Macros for greatest-width integer constants -#define INTMAX_C INT64_C -#define UINTMAX_C UINT64_C - -#endif // __STDC_CONSTANT_MACROS ] - - -#endif // _MSC_STDINT_H_ ] +// ISO C9x compliant stdint.h for Microsoft Visual Studio +// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 +// +// Copyright (c) 2006-2008 Alexander Chemeris +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. The name of the author may be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED +// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO +// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef _MSC_VER // [ +#error "Use this header only with Microsoft Visual C++ compilers!" +#endif // _MSC_VER ] + +#ifndef _MSC_STDINT_H_ // [ +#define _MSC_STDINT_H_ + +#if _MSC_VER > 1000 +#pragma once +#endif + +#include + +// For Visual Studio 6 in C++ mode and for many Visual Studio versions when +// compiling for ARM we should wrap include with 'extern "C++" {}' +// or compiler give many errors like this: +// error C2733: second C linkage of overloaded function 'wmemchr' not allowed +#ifdef __cplusplus +extern "C" { +#endif +# include +#ifdef __cplusplus +} +#endif + +// Define _W64 macros to mark types changing their size, like intptr_t. +#ifndef _W64 +# if !defined(__midl) && (defined(_X86_) || defined(_M_IX86)) && _MSC_VER >= 1300 +# define _W64 __w64 +# else +# define _W64 +# endif +#endif + + +// 7.18.1 Integer types + +// 7.18.1.1 Exact-width integer types + +// Visual Studio 6 and Embedded Visual C++ 4 doesn't +// realize that, e.g. char has the same size as __int8 +// so we give up on __intX for them. +#if (_MSC_VER < 1300) + typedef signed char int8_t; + typedef signed short int16_t; + typedef signed int int32_t; + typedef unsigned char uint8_t; + typedef unsigned short uint16_t; + typedef unsigned int uint32_t; +#else + typedef signed __int8 int8_t; + typedef signed __int16 int16_t; + typedef signed __int32 int32_t; + typedef unsigned __int8 uint8_t; + typedef unsigned __int16 uint16_t; + typedef unsigned __int32 uint32_t; +#endif +typedef signed __int64 int64_t; +typedef unsigned __int64 uint64_t; + + +// 7.18.1.2 Minimum-width integer types +typedef int8_t int_least8_t; +typedef int16_t int_least16_t; +typedef int32_t int_least32_t; +typedef int64_t int_least64_t; +typedef uint8_t uint_least8_t; +typedef uint16_t uint_least16_t; +typedef uint32_t uint_least32_t; +typedef uint64_t uint_least64_t; + +// 7.18.1.3 Fastest minimum-width integer types +typedef int8_t int_fast8_t; +typedef int16_t int_fast16_t; +typedef int32_t int_fast32_t; +typedef int64_t int_fast64_t; +typedef uint8_t uint_fast8_t; +typedef uint16_t uint_fast16_t; +typedef uint32_t uint_fast32_t; +typedef uint64_t uint_fast64_t; + +// 7.18.1.4 Integer types capable of holding object pointers +#ifdef _WIN64 // [ + typedef signed __int64 intptr_t; + typedef unsigned __int64 uintptr_t; +#else // _WIN64 ][ + typedef _W64 signed int intptr_t; + typedef _W64 unsigned int uintptr_t; +#endif // _WIN64 ] + +// 7.18.1.5 Greatest-width integer types +typedef int64_t intmax_t; +typedef uint64_t uintmax_t; + + +// 7.18.2 Limits of specified-width integer types + +#if !defined(__cplusplus) || defined(__STDC_LIMIT_MACROS) // [ See footnote 220 at page 257 and footnote 221 at page 259 + +// 7.18.2.1 Limits of exact-width integer types +#define INT8_MIN ((int8_t)_I8_MIN) +#define INT8_MAX _I8_MAX +#define INT16_MIN ((int16_t)_I16_MIN) +#define INT16_MAX _I16_MAX +#define INT32_MIN ((int32_t)_I32_MIN) +#define INT32_MAX _I32_MAX +#define INT64_MIN ((int64_t)_I64_MIN) +#define INT64_MAX _I64_MAX +#define UINT8_MAX _UI8_MAX +#define UINT16_MAX _UI16_MAX +#define UINT32_MAX _UI32_MAX +#define UINT64_MAX _UI64_MAX + +// 7.18.2.2 Limits of minimum-width integer types +#define INT_LEAST8_MIN INT8_MIN +#define INT_LEAST8_MAX INT8_MAX +#define INT_LEAST16_MIN INT16_MIN +#define INT_LEAST16_MAX INT16_MAX +#define INT_LEAST32_MIN INT32_MIN +#define INT_LEAST32_MAX INT32_MAX +#define INT_LEAST64_MIN INT64_MIN +#define INT_LEAST64_MAX INT64_MAX +#define UINT_LEAST8_MAX UINT8_MAX +#define UINT_LEAST16_MAX UINT16_MAX +#define UINT_LEAST32_MAX UINT32_MAX +#define UINT_LEAST64_MAX UINT64_MAX + +// 7.18.2.3 Limits of fastest minimum-width integer types +#define INT_FAST8_MIN INT8_MIN +#define INT_FAST8_MAX INT8_MAX +#define INT_FAST16_MIN INT16_MIN +#define INT_FAST16_MAX INT16_MAX +#define INT_FAST32_MIN INT32_MIN +#define INT_FAST32_MAX INT32_MAX +#define INT_FAST64_MIN INT64_MIN +#define INT_FAST64_MAX INT64_MAX +#define UINT_FAST8_MAX UINT8_MAX +#define UINT_FAST16_MAX UINT16_MAX +#define UINT_FAST32_MAX UINT32_MAX +#define UINT_FAST64_MAX UINT64_MAX + +// 7.18.2.4 Limits of integer types capable of holding object pointers +#ifdef _WIN64 // [ +# define INTPTR_MIN INT64_MIN +# define INTPTR_MAX INT64_MAX +# define UINTPTR_MAX UINT64_MAX +#else // _WIN64 ][ +# define INTPTR_MIN INT32_MIN +# define INTPTR_MAX INT32_MAX +# define UINTPTR_MAX UINT32_MAX +#endif // _WIN64 ] + +// 7.18.2.5 Limits of greatest-width integer types +#define INTMAX_MIN INT64_MIN +#define INTMAX_MAX INT64_MAX +#define UINTMAX_MAX UINT64_MAX + +// 7.18.3 Limits of other integer types + +#ifdef _WIN64 // [ +# define PTRDIFF_MIN _I64_MIN +# define PTRDIFF_MAX _I64_MAX +#else // _WIN64 ][ +# define PTRDIFF_MIN _I32_MIN +# define PTRDIFF_MAX _I32_MAX +#endif // _WIN64 ] + +#define SIG_ATOMIC_MIN INT_MIN +#define SIG_ATOMIC_MAX INT_MAX + +#ifndef SIZE_MAX // [ +# ifdef _WIN64 // [ +# define SIZE_MAX _UI64_MAX +# else // _WIN64 ][ +# define SIZE_MAX _UI32_MAX +# endif // _WIN64 ] +#endif // SIZE_MAX ] + +// WCHAR_MIN and WCHAR_MAX are also defined in +#ifndef WCHAR_MIN // [ +# define WCHAR_MIN 0 +#endif // WCHAR_MIN ] +#ifndef WCHAR_MAX // [ +# define WCHAR_MAX _UI16_MAX +#endif // WCHAR_MAX ] + +#define WINT_MIN 0 +#define WINT_MAX _UI16_MAX + +#endif // __STDC_LIMIT_MACROS ] + + +// 7.18.4 Limits of other integer types + +#if !defined(__cplusplus) || defined(__STDC_CONSTANT_MACROS) // [ See footnote 224 at page 260 + +// 7.18.4.1 Macros for minimum-width integer constants + +#define INT8_C(val) val##i8 +#define INT16_C(val) val##i16 +#define INT32_C(val) val##i32 +#define INT64_C(val) val##i64 + +#define UINT8_C(val) val##ui8 +#define UINT16_C(val) val##ui16 +#define UINT32_C(val) val##ui32 +#define UINT64_C(val) val##ui64 + +// 7.18.4.2 Macros for greatest-width integer constants +#define INTMAX_C INT64_C +#define UINTMAX_C UINT64_C + +#endif // __STDC_CONSTANT_MACROS ] + + +#endif // _MSC_STDINT_H_ ] diff --git a/include/compat/nacl/memory.h b/include/compat/nacl/memory.h index 8e69c1f..3b2f590 100644 --- a/include/compat/nacl/memory.h +++ b/include/compat/nacl/memory.h @@ -1 +1 @@ -#include +#include diff --git a/premake/bx.lua b/premake/bx.lua index 53d1cb8..ce1fd0d 100644 --- a/premake/bx.lua +++ b/premake/bx.lua @@ -1,7 +1,7 @@ -project "bx" - uuid "4db0b09e-d6df-11e1-a0ec-65ccdd6a022f" - kind "StaticLib" - - files { - "../include/**.h", - } +project "bx" + uuid "4db0b09e-d6df-11e1-a0ec-65ccdd6a022f" + kind "StaticLib" + + files { + "../include/**.h", + }