From 30559bbd476ce4ab84fe6441b1bdfb055d0b8abf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Branimir=20Karad=C5=BEi=C4=87?= Date: Sat, 24 Dec 2016 15:42:59 -0800 Subject: [PATCH] Cleanup. --- include/bx/simd256_avx.inl | 33 +++++++++ include/bx/simd256_ref.inl | 138 +++++++++++++++++++++++-------------- tests/simd_test.cpp | 30 +++++--- 3 files changed, 139 insertions(+), 62 deletions(-) diff --git a/include/bx/simd256_avx.inl b/include/bx/simd256_avx.inl index 5abaa65..c8af735 100644 --- a/include/bx/simd256_avx.inl +++ b/include/bx/simd256_avx.inl @@ -37,6 +37,39 @@ namespace bx return result; } + template<> + BX_SIMD_FORCE_INLINE simd256_avx_t_t simd_splat(float _a) + { + return _mm256_set1_ps(_a); + } + + template<> + BX_SIMD_FORCE_INLINE simd256_avx_t_t simd_isplat(uint32_t _a) + { + const __m256i splat = _mm256_set1_epi32(_a); + const simd256_avx_t_t result = _mm256_castsi256_ps(splat); + + return result; + } + + template<> + BX_SIMD_FORCE_INLINE simd256_avx_t_t simd_itof(simd256_avx_t_t _a) + { + const __m256i itof = _mm256_castps_si256(_a); + const simd256_avx_t_t result = _mm256_cvtepi32_ps(itof); + + return result; + } + + template<> + BX_SIMD_FORCE_INLINE simd256_avx_t_t simd_ftoi(simd256_avx_t_t _a) + { + const __m256i ftoi = _mm256_cvtps_epi32(_a); + const simd256_avx_t_t result = _mm256_castsi256_ps(ftoi); + + return result; + } + typedef simd256_avx_t simd256_t; } // namespace bx diff --git a/include/bx/simd256_ref.inl b/include/bx/simd256_ref.inl index c3847bd..6d9a5a3 100644 --- a/include/bx/simd256_ref.inl +++ b/include/bx/simd256_ref.inl @@ -1,51 +1,87 @@ -/* - * Copyright 2010-2016 Branimir Karadzic. All rights reserved. - * License: https://github.com/bkaradzic/bx#license-bsd-2-clause - */ - -#ifndef BX_SIMD256_REF_H_HEADER_GUARD -#define BX_SIMD256_REF_H_HEADER_GUARD - -#include "simd_ni.inl" - -namespace bx -{ - template<> - BX_SIMD_FORCE_INLINE simd256_ref_t simd_ld(const void* _ptr) - { - const simd256_ref_t::type* ptr = reinterpret_cast(_ptr); - simd256_ref_t result; - result.simd128_0 = simd_ld(&ptr[0]); - result.simd128_1 = simd_ld(&ptr[1]); - return result; - } - - template<> - BX_SIMD_FORCE_INLINE void simd_st(void* _ptr, simd256_ref_t& _a) - { - simd256_ref_t* result = reinterpret_cast(_ptr); - simd_st(&result[0], _a.simd128_0); - simd_st(&result[1], _a.simd128_1); - } - - template<> - BX_SIMD_FORCE_INLINE simd256_ref_t simd_ld(float _x, float _y, float _z, float _w, float _a, float _b, float _c, float _d) - { - simd256_ref_t result; - result.simd128_0 = simd_ld(_x, _y, _z, _w); - result.simd128_1 = simd_ld(_a, _b, _c, _d); - return result; - } - - template<> - BX_SIMD_FORCE_INLINE simd256_ref_t simd_ild(uint32_t _x, uint32_t _y, uint32_t _z, uint32_t _w, uint32_t _a, uint32_t _b, uint32_t _c, uint32_t _d) - { - simd256_ref_t result; - result.simd128_0 = simd_ild(_x, _y, _z, _w); - result.simd128_1 = simd_ild(_a, _b, _c, _d); - return result; - } - -} // namespace bx - -#endif // BX_SIMD256_REF_H_HEADER_GUARD +/* + * Copyright 2010-2016 Branimir Karadzic. All rights reserved. + * License: https://github.com/bkaradzic/bx#license-bsd-2-clause + */ + +#ifndef BX_SIMD256_REF_H_HEADER_GUARD +#define BX_SIMD256_REF_H_HEADER_GUARD + +#include "simd_ni.inl" + +namespace bx +{ + template<> + BX_SIMD_FORCE_INLINE simd256_ref_t simd_ld(const void* _ptr) + { + const simd256_ref_t::type* ptr = reinterpret_cast(_ptr); + simd256_ref_t result; + result.simd128_0 = simd_ld(&ptr[0]); + result.simd128_1 = simd_ld(&ptr[1]); + return result; + } + + template<> + BX_SIMD_FORCE_INLINE void simd_st(void* _ptr, simd256_ref_t& _a) + { + simd256_ref_t* result = reinterpret_cast(_ptr); + simd_st(&result[0], _a.simd128_0); + simd_st(&result[1], _a.simd128_1); + } + + template<> + BX_SIMD_FORCE_INLINE simd256_ref_t simd_ld(float _x, float _y, float _z, float _w, float _a, float _b, float _c, float _d) + { + simd256_ref_t result; + result.simd128_0 = simd_ld(_x, _y, _z, _w); + result.simd128_1 = simd_ld(_a, _b, _c, _d); + return result; + } + + template<> + BX_SIMD_FORCE_INLINE simd256_ref_t simd_ild(uint32_t _x, uint32_t _y, uint32_t _z, uint32_t _w, uint32_t _a, uint32_t _b, uint32_t _c, uint32_t _d) + { + simd256_ref_t result; + result.simd128_0 = simd_ild(_x, _y, _z, _w); + result.simd128_1 = simd_ild(_a, _b, _c, _d); + return result; + } + + template<> + BX_SIMD_FORCE_INLINE simd256_ref_t simd_splat(float _a) + { + simd256_ref_t result; + result.simd128_0 = simd_splat(_a); + result.simd128_1 = simd_splat(_a); + return result; + } + + template<> + BX_SIMD_FORCE_INLINE simd256_ref_t simd_isplat(uint32_t _a) + { + simd256_ref_t result; + result.simd128_0 = simd_isplat(_a); + result.simd128_1 = simd_isplat(_a); + return result; + } + + template<> + BX_SIMD_FORCE_INLINE simd256_ref_t simd_itof(simd256_ref_t _a) + { + simd256_ref_t result; + result.simd128_0 = simd_itof(_a.simd128_0); + result.simd128_1 = simd_itof(_a.simd128_1); + return result; + } + + template<> + BX_SIMD_FORCE_INLINE simd256_ref_t simd_ftoi(simd256_ref_t _a) + { + simd256_ref_t result; + result.simd128_0 = simd_ftoi(_a.simd128_0); + result.simd128_1 = simd_ftoi(_a.simd128_1); + return result; + } + +} // namespace bx + +#endif // BX_SIMD256_REF_H_HEADER_GUARD diff --git a/tests/simd_test.cpp b/tests/simd_test.cpp index a845a50..66e6498 100644 --- a/tests/simd_test.cpp +++ b/tests/simd_test.cpp @@ -322,20 +322,20 @@ TEST_CASE("simd_load", "") , 0.0f, 1.0f, 2.0f, 3.0f ); -// simd_check_float("ld" -// , simd_ld(0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f) -// , 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f -// ); + simd_check_float("ld" + , simd_ld(0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f) + , 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f + ); simd_check_int32("ild" , simd_ild(uint32_t(-1), 0, 1, 2) , uint32_t(-1), 0, 1, 2 ); -// simd_check_int32("ild" -// , simd_ild(uint32_t(-1), 0, 1, 2, 3, 4, 5, 6) -// , uint32_t(-1), 0, 1, 2, 3, 4, 5, 6 -// ); + simd_check_int32("ild" + , simd_ild(uint32_t(-1), 0, 1, 2, 3, 4, 5, 6) + , uint32_t(-1), 0, 1, 2, 3, 4, 5, 6 + ); simd_check_int32("ild" , simd_ild(uint32_t(-1), uint32_t(-2), uint32_t(-3), uint32_t(-4) ) @@ -346,13 +346,21 @@ TEST_CASE("simd_load", "") , 0, 0, 0, 0 ); - simd_check_uint32("isplat", simd_isplat(0x80000001) + simd_check_uint32("isplat", simd_isplat(0x80000001) , 0x80000001, 0x80000001, 0x80000001, 0x80000001 ); - simd_check_float("isplat", simd_splat(1.0f) + simd_check_float("splat", simd_splat(1.0f) , 1.0f, 1.0f, 1.0f, 1.0f ); + + simd_check_uint32("isplat", simd_isplat(0x80000001) + , 0x80000001, 0x80000001, 0x80000001, 0x80000001, 0x80000001, 0x80000001, 0x80000001, 0x80000001 + ); + + simd_check_float("splat", simd_splat(1.0f) + , 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f + ); } TEST_CASE("simd_arithmetic", "") @@ -386,7 +394,7 @@ TEST_CASE("simd_sqrt", "") ); } -TEST_CASE("float4", "") +TEST_CASE("simd", "") { const simd128_t isplat = simd_isplat(0x80000001); simd_check_uint32("sll"