diff --git a/include/bx/simd256_avx.inl b/include/bx/simd256_avx.inl index 905a6cc..33f5af7 100644 --- a/include/bx/simd256_avx.inl +++ b/include/bx/simd256_avx.inl @@ -1,9 +1,45 @@ -/* - * Copyright 2010-2016 Branimir Karadzic. All rights reserved. - * License: https://github.com/bkaradzic/bx#license-bsd-2-clause - */ - -#ifndef BX_SIMD256_AVX_H_HEADER_GUARD -#define BX_SIMD256_AVX_H_HEADER_GUARD - -#endif // BX_SIMD256_AVX_H_HEADER_GUARD +/* + * Copyright 2010-2016 Branimir Karadzic. All rights reserved. + * License: https://github.com/bkaradzic/bx#license-bsd-2-clause + */ + +#ifndef BX_SIMD256_AVX_H_HEADER_GUARD +#define BX_SIMD256_AVX_H_HEADER_GUARD + +#include "simd_ni.inl" + +namespace bx +{ + + template<> + BX_SIMD_FORCE_INLINE simd256_avx_t simd_ld(const void* _ptr) + { + return _mm256_load_ps(reinterpret_cast(_ptr) ); + } + + template<> + BX_SIMD_FORCE_INLINE void simd_st(void* _ptr, simd256_avx_t _a) + { + _mm256_store_ps(reinterpret_cast(_ptr), _a); + } + + template<> + BX_SIMD_FORCE_INLINE simd256_avx_t simd_ld(float _x, float _y, float _z, float _w, float _A, float _B, float _C, float _D) + { + return _mm256_set_ps(_D, _C, _B, _A, _w, _z, _y, _x); + } + + template<> + BX_SIMD_FORCE_INLINE simd256_avx_t simd_ild(uint32_t _x, uint32_t _y, uint32_t _z, uint32_t _w, uint32_t _A, uint32_t _B, uint32_t _C, uint32_t _D) + { + const __m256i set = _mm256_set_epi32(_D, _C, _B, _A, _w, _z, _y, _x); + const simd256_avx_t result = _mm256_castsi256_ps(set); + + return result; + } + + typedef simd256_avx_t simd256_t; + +} // namespace bx + +#endif // BX_SIMD256_AVX_H_HEADER_GUARD diff --git a/include/bx/simd256_ref.inl b/include/bx/simd256_ref.inl index b05ec3e..15e2754 100644 --- a/include/bx/simd256_ref.inl +++ b/include/bx/simd256_ref.inl @@ -1,9 +1,51 @@ -/* - * Copyright 2010-2016 Branimir Karadzic. All rights reserved. - * License: https://github.com/bkaradzic/bx#license-bsd-2-clause - */ - -#ifndef BX_SIMD256_REF_H_HEADER_GUARD -#define BX_SIMD256_REF_H_HEADER_GUARD - -#endif // BX_SIMD256_REF_H_HEADER_GUARD +/* + * Copyright 2010-2016 Branimir Karadzic. All rights reserved. + * License: https://github.com/bkaradzic/bx#license-bsd-2-clause + */ + +#ifndef BX_SIMD256_REF_H_HEADER_GUARD +#define BX_SIMD256_REF_H_HEADER_GUARD + +#include "simd_ni.inl" + +namespace bx +{ + template<> + BX_SIMD_FORCE_INLINE simd256_ref_t simd_ld(const void* _ptr) + { + const simd128_t* ptr = reinterpret_cast(_ptr); + simd256_ref_t result; + result.simd128[0] = simd_ld(&ptr[0]); + result.simd128[1] = simd_ld(&ptr[1]); + return result; + } + + template<> + BX_SIMD_FORCE_INLINE void simd_st(void* _ptr, simd256_ref_t _a) + { + simd128_t* result = reinterpret_cast(_ptr); + simd_st(&result[0], _a.simd128[0]); + simd_st(&result[1], _a.simd128[1]); + } + + template<> + BX_SIMD_FORCE_INLINE simd256_ref_t simd_ld(float _x, float _y, float _z, float _w, float _A, float _B, float _C, float _D) + { + simd256_ref_t result; + result.simd128[0] = simd_ld(_x, _y, _z, _w); + result.simd128[1] = simd_ld(_A, _B, _C, _D); + return result; + } + + template<> + BX_SIMD_FORCE_INLINE simd256_ref_t simd_ild(uint32_t _x, uint32_t _y, uint32_t _z, uint32_t _w, uint32_t _A, uint32_t _B, uint32_t _C, uint32_t _D) + { + simd256_ref_t result; + result.simd128[0] = simd_ild(_x, _y, _z, _w); + result.simd128[1] = simd_ild(_A, _B, _C, _D); + return result; + } + +} // namespace bx + +#endif // BX_SIMD256_REF_H_HEADER_GUARD diff --git a/include/bx/simd_t.h b/include/bx/simd_t.h index a2884f6..c10a337 100644 --- a/include/bx/simd_t.h +++ b/include/bx/simd_t.h @@ -135,9 +135,15 @@ BX_SIMD128_IMPLEMENT_TEST(xyzw); template BX_SIMD_FORCE_INLINE Ty simd_ld(float _x, float _y, float _z, float _w); + template + BX_SIMD_FORCE_INLINE Ty simd_ld(float _x, float _y, float _z, float _w, float _A, float _B, float _C, float _D); + template BX_SIMD_FORCE_INLINE Ty simd_ild(uint32_t _x, uint32_t _y, uint32_t _z, uint32_t _w); + template + BX_SIMD_FORCE_INLINE Ty simd_ild(uint32_t _x, uint32_t _y, uint32_t _z, uint32_t _w, uint32_t _A, uint32_t _B, uint32_t _C, uint32_t _D); + template BX_SIMD_FORCE_INLINE Ty simd_splat(const void* _ptr); @@ -352,14 +358,6 @@ BX_SIMD128_IMPLEMENT_TEST(xyzw); typedef __m128 simd128_sse_t; #endif // BX_SIMD_SSE - union simd128_ref_t - { - float fxyzw[4]; - int32_t ixyzw[4]; - uint32_t uxyzw[4]; - - }; - } // namespace bx #if BX_SIMD_AVX @@ -378,27 +376,50 @@ BX_SIMD128_IMPLEMENT_TEST(xyzw); # include "simd128_sse.inl" #endif // BX_SIMD_SSE -#include "simd128_ref.inl" -#include "simd256_ref.inl" - namespace bx { -#if !( BX_SIMD_AVX \ - || BX_SIMD_LANGEXT \ + union simd128_ref_t + { + float fxyzw[4]; + int32_t ixyzw[4]; + uint32_t uxyzw[4]; + }; + +#ifndef BX_SIMD_WARN_REFERENCE_IMPL +# define BX_SIMD_WARN_REFERENCE_IMPL 0 +#endif // BX_SIMD_WARN_REFERENCE_IMPL + +#if !( BX_SIMD_LANGEXT \ || BX_SIMD_NEON \ || BX_SIMD_SSE \ ) -# ifndef BX_SIMD_WARN_REFERENCE_IMPL -# define BX_SIMD_WARN_REFERENCE_IMPL 0 -# endif // BX_SIMD_WARN_REFERENCE_IMPL - # if BX_SIMD_WARN_REFERENCE_IMPL -# pragma message("************************************\nUsing SIMD reference implementation!\n************************************") +# pragma message("*** Using SIMD128 reference implementation! ***") # endif // BX_SIMD_WARN_REFERENCE_IMPL typedef simd128_ref_t simd128_t; #endif // + union simd256_ref_t + { + simd128_t simd128[2]; + }; + +#if !BX_SIMD_AVX +# if BX_SIMD_WARN_REFERENCE_IMPL +# pragma message("*** Using SIMD256 reference implementation! ***") +# endif // BX_SIMD_WARN_REFERENCE_IMPL + + typedef simd256_ref_t simd256_t; +#endif // !BX_SIMD_AVX + +} // namespace bx + +#include "simd128_ref.inl" +#include "simd256_ref.inl" + +namespace bx +{ BX_SIMD_FORCE_INLINE simd128_t simd_zero() { return simd_zero(); diff --git a/scripts/toolchain.lua b/scripts/toolchain.lua index d1c1452..7a03e29 100755 --- a/scripts/toolchain.lua +++ b/scripts/toolchain.lua @@ -100,6 +100,11 @@ function toolchain(_buildDir, _libDir) description = "Use 32-bit compiler instead 64-bit.", } + newoption { + trigger = "with-avx", + description = "Use AVX extension.", + } + -- Avoid error when invoking genie --help. if (_ACTION == nil) then return false end @@ -460,6 +465,10 @@ function toolchain(_buildDir, _libDir) flags { "StaticRuntime" } end + if _OPTIONS["with-avx"] then + flags { "EnableAVX" } + end + flags { "NoPCH", "NativeWChar", diff --git a/tests/simd_t.cpp b/tests/simd_t.cpp index 999438a..05790d1 100644 --- a/tests/simd_t.cpp +++ b/tests/simd_t.cpp @@ -12,11 +12,12 @@ using namespace bx; union simd_cast { - bx::simd128_t f4; - float f[4]; - uint32_t ui[4]; - int32_t i[4]; - char c[16]; + bx::simd256_t simd256; + bx::simd128_t simd128; + float f[8]; + uint32_t ui[8]; + int32_t i[8]; + char c[32]; }; void simd_check_bool(const char* _str, bool _a, bool _0) @@ -30,9 +31,16 @@ void simd_check_bool(const char* _str, bool _a, bool _0) CHECK_EQUAL(_a, _0); } -void simd_check_int32(const char* _str, bx::simd128_t _a, int32_t _0, int32_t _1, int32_t _2, int32_t _3) +void simd_check_int32( + const char* _str + , bx::simd128_t _a + , int32_t _0 + , int32_t _1 + , int32_t _2 + , int32_t _3 + ) { - simd_cast c; c.f4 = _a; + simd_cast c; c.simd128 = _a; DBG("%s (%d, %d, %d, %d) == (%d, %d, %d, %d)" , _str , c.i[0], c.i[1], c.i[2], c.i[3] @@ -45,9 +53,46 @@ void simd_check_int32(const char* _str, bx::simd128_t _a, int32_t _0, int32_t _1 CHECK_EQUAL(c.i[3], _3); } -void simd_check_uint32(const char* _str, bx::simd128_t _a, uint32_t _0, uint32_t _1, uint32_t _2, uint32_t _3) +void simd_check_int32( + const char* _str + , bx::simd256_t _a + , int32_t _0 + , int32_t _1 + , int32_t _2 + , int32_t _3 + , int32_t _4 + , int32_t _5 + , int32_t _6 + , int32_t _7 + ) { - simd_cast c; c.f4 = _a; + simd_cast c; c.simd256 = _a; + DBG("%s (%d, %d, %d, %d, %d, %d, %d, %d) == (%d, %d, %d, %d, %d, %d, %d, %d)" + , _str + , c.i[0], c.i[1], c.i[2], c.i[3], c.i[4], c.i[5], c.i[6], c.i[7] + , _0, _1, _2, _3, _4, _5, _6, _7 + ); + + CHECK_EQUAL(c.i[0], _0); + CHECK_EQUAL(c.i[1], _1); + CHECK_EQUAL(c.i[2], _2); + CHECK_EQUAL(c.i[3], _3); + CHECK_EQUAL(c.i[4], _4); + CHECK_EQUAL(c.i[5], _5); + CHECK_EQUAL(c.i[6], _6); + CHECK_EQUAL(c.i[7], _7); +} + +void simd_check_uint32( + const char* _str + , bx::simd128_t _a + , uint32_t _0 + , uint32_t _1 + , uint32_t _2 + , uint32_t _3 + ) +{ + simd_cast c; c.simd128 = _a; DBG("%s (0x%08x, 0x%08x, 0x%08x, 0x%08x) == (0x%08x, 0x%08x, 0x%08x, 0x%08x)" , _str @@ -61,9 +106,47 @@ void simd_check_uint32(const char* _str, bx::simd128_t _a, uint32_t _0, uint32_t CHECK_EQUAL(c.ui[3], _3); } -void simd_check_float(const char* _str, bx::simd128_t _a, float _0, float _1, float _2, float _3) +void simd_check_uint32( + const char* _str + , bx::simd256_t _a + , uint32_t _0 + , uint32_t _1 + , uint32_t _2 + , uint32_t _3 + , uint32_t _4 + , uint32_t _5 + , uint32_t _6 + , uint32_t _7 + ) { - simd_cast c; c.f4 = _a; + simd_cast c; c.simd256 = _a; + + DBG("%s (0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x) == (0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x)" + , _str + , c.ui[0], c.ui[1], c.ui[2], c.ui[3], c.ui[4], c.ui[5], c.ui[6], c.ui[7] + , _0, _1, _2, _3, _4, _5, _6, _7 + ); + + CHECK_EQUAL(c.ui[0], _0); + CHECK_EQUAL(c.ui[1], _1); + CHECK_EQUAL(c.ui[2], _2); + CHECK_EQUAL(c.ui[3], _3); + CHECK_EQUAL(c.ui[4], _4); + CHECK_EQUAL(c.ui[5], _5); + CHECK_EQUAL(c.ui[6], _6); + CHECK_EQUAL(c.ui[7], _7); +} + +void simd_check_float( + const char* _str + , bx::simd128_t _a + , float _0 + , float _1 + , float _2 + , float _3 + ) +{ + simd_cast c; c.simd128 = _a; DBG("%s (%f, %f, %f, %f) == (%f, %f, %f, %f)" , _str @@ -77,9 +160,40 @@ void simd_check_float(const char* _str, bx::simd128_t _a, float _0, float _1, fl CHECK(bx::fequal(c.f[3], _3, 0.0001f) ); } +void simd_check_float( + const char* _str + , bx::simd256_t _a + , float _0 + , float _1 + , float _2 + , float _3 + , float _4 + , float _5 + , float _6 + , float _7 + ) +{ + simd_cast c; c.simd256 = _a; + + DBG("%s (%f, %f, %f, %f, %f, %f, %f, %f) == (%f, %f, %f, %f, %f, %f, %f, %f)" + , _str + , c.f[0], c.f[1], c.f[2], c.f[3], c.f[4], c.f[5], c.f[6], c.f[7] + , _0, _1, _2, _3, _4, _5, _6, _7 + ); + + CHECK(bx::fequal(c.f[0], _0, 0.0001f) ); + CHECK(bx::fequal(c.f[1], _1, 0.0001f) ); + CHECK(bx::fequal(c.f[2], _2, 0.0001f) ); + CHECK(bx::fequal(c.f[3], _3, 0.0001f) ); + CHECK(bx::fequal(c.f[4], _4, 0.0001f) ); + CHECK(bx::fequal(c.f[5], _5, 0.0001f) ); + CHECK(bx::fequal(c.f[6], _6, 0.0001f) ); + CHECK(bx::fequal(c.f[7], _7, 0.0001f) ); +} + void simd_check_string(const char* _str, bx::simd128_t _a) { - simd_cast c; c.f4 = _a; + simd_cast c; c.simd128 = _a; const char test[5] = { c.c[0], c.c[4], c.c[8], c.c[12], '\0' }; DBG("%s %s", _str, test); @@ -200,11 +314,21 @@ TEST(simd_load) , 0.0f, 1.0f, 2.0f, 3.0f ); + simd_check_float("ld" + , simd_ld(0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f) + , 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f + ); + simd_check_int32("ild" , simd_ild(uint32_t(-1), 0, 1, 2) , uint32_t(-1), 0, 1, 2 ); + simd_check_int32("ild" + , simd_ild(uint32_t(-1), 0, 1, 2, 3, 4, 5, 6) + , uint32_t(-1), 0, 1, 2, 3, 4, 5, 6 + ); + simd_check_int32("ild" , simd_ild(uint32_t(-1), uint32_t(-2), uint32_t(-3), uint32_t(-4) ) , uint32_t(-1), uint32_t(-2), uint32_t(-3), uint32_t(-4)