This commit is contained in:
Branimir Karadžić
2016-12-24 15:42:59 -08:00
parent 4924141ca5
commit 30559bbd47
3 changed files with 139 additions and 62 deletions

View File

@@ -37,6 +37,39 @@ namespace bx
return result;
}
template<>
BX_SIMD_FORCE_INLINE simd256_avx_t_t simd_splat(float _a)
{
return _mm256_set1_ps(_a);
}
template<>
BX_SIMD_FORCE_INLINE simd256_avx_t_t simd_isplat(uint32_t _a)
{
const __m256i splat = _mm256_set1_epi32(_a);
const simd256_avx_t_t result = _mm256_castsi256_ps(splat);
return result;
}
template<>
BX_SIMD_FORCE_INLINE simd256_avx_t_t simd_itof(simd256_avx_t_t _a)
{
const __m256i itof = _mm256_castps_si256(_a);
const simd256_avx_t_t result = _mm256_cvtepi32_ps(itof);
return result;
}
template<>
BX_SIMD_FORCE_INLINE simd256_avx_t_t simd_ftoi(simd256_avx_t_t _a)
{
const __m256i ftoi = _mm256_cvtps_epi32(_a);
const simd256_avx_t_t result = _mm256_castsi256_ps(ftoi);
return result;
}
typedef simd256_avx_t simd256_t;
} // namespace bx

View File

@@ -1,51 +1,87 @@
/*
* Copyright 2010-2016 Branimir Karadzic. All rights reserved.
* License: https://github.com/bkaradzic/bx#license-bsd-2-clause
*/
#ifndef BX_SIMD256_REF_H_HEADER_GUARD
#define BX_SIMD256_REF_H_HEADER_GUARD
#include "simd_ni.inl"
namespace bx
{
template<>
BX_SIMD_FORCE_INLINE simd256_ref_t simd_ld(const void* _ptr)
{
const simd256_ref_t::type* ptr = reinterpret_cast<const simd256_ref_t::type*>(_ptr);
simd256_ref_t result;
result.simd128_0 = simd_ld<simd256_ref_t::type>(&ptr[0]);
result.simd128_1 = simd_ld<simd256_ref_t::type>(&ptr[1]);
return result;
}
template<>
BX_SIMD_FORCE_INLINE void simd_st(void* _ptr, simd256_ref_t& _a)
{
simd256_ref_t* result = reinterpret_cast<simd256_ref_t*>(_ptr);
simd_st<simd256_ref_t::type>(&result[0], _a.simd128_0);
simd_st<simd256_ref_t::type>(&result[1], _a.simd128_1);
}
template<>
BX_SIMD_FORCE_INLINE simd256_ref_t simd_ld(float _x, float _y, float _z, float _w, float _a, float _b, float _c, float _d)
{
simd256_ref_t result;
result.simd128_0 = simd_ld<simd256_ref_t::type>(_x, _y, _z, _w);
result.simd128_1 = simd_ld<simd256_ref_t::type>(_a, _b, _c, _d);
return result;
}
template<>
BX_SIMD_FORCE_INLINE simd256_ref_t simd_ild(uint32_t _x, uint32_t _y, uint32_t _z, uint32_t _w, uint32_t _a, uint32_t _b, uint32_t _c, uint32_t _d)
{
simd256_ref_t result;
result.simd128_0 = simd_ild<simd256_ref_t::type>(_x, _y, _z, _w);
result.simd128_1 = simd_ild<simd256_ref_t::type>(_a, _b, _c, _d);
return result;
}
} // namespace bx
#endif // BX_SIMD256_REF_H_HEADER_GUARD
/*
* Copyright 2010-2016 Branimir Karadzic. All rights reserved.
* License: https://github.com/bkaradzic/bx#license-bsd-2-clause
*/
#ifndef BX_SIMD256_REF_H_HEADER_GUARD
#define BX_SIMD256_REF_H_HEADER_GUARD
#include "simd_ni.inl"
namespace bx
{
template<>
BX_SIMD_FORCE_INLINE simd256_ref_t simd_ld(const void* _ptr)
{
const simd256_ref_t::type* ptr = reinterpret_cast<const simd256_ref_t::type*>(_ptr);
simd256_ref_t result;
result.simd128_0 = simd_ld<simd256_ref_t::type>(&ptr[0]);
result.simd128_1 = simd_ld<simd256_ref_t::type>(&ptr[1]);
return result;
}
template<>
BX_SIMD_FORCE_INLINE void simd_st(void* _ptr, simd256_ref_t& _a)
{
simd256_ref_t* result = reinterpret_cast<simd256_ref_t*>(_ptr);
simd_st<simd256_ref_t::type>(&result[0], _a.simd128_0);
simd_st<simd256_ref_t::type>(&result[1], _a.simd128_1);
}
template<>
BX_SIMD_FORCE_INLINE simd256_ref_t simd_ld(float _x, float _y, float _z, float _w, float _a, float _b, float _c, float _d)
{
simd256_ref_t result;
result.simd128_0 = simd_ld<simd256_ref_t::type>(_x, _y, _z, _w);
result.simd128_1 = simd_ld<simd256_ref_t::type>(_a, _b, _c, _d);
return result;
}
template<>
BX_SIMD_FORCE_INLINE simd256_ref_t simd_ild(uint32_t _x, uint32_t _y, uint32_t _z, uint32_t _w, uint32_t _a, uint32_t _b, uint32_t _c, uint32_t _d)
{
simd256_ref_t result;
result.simd128_0 = simd_ild<simd256_ref_t::type>(_x, _y, _z, _w);
result.simd128_1 = simd_ild<simd256_ref_t::type>(_a, _b, _c, _d);
return result;
}
template<>
BX_SIMD_FORCE_INLINE simd256_ref_t simd_splat(float _a)
{
simd256_ref_t result;
result.simd128_0 = simd_splat<simd256_ref_t::type>(_a);
result.simd128_1 = simd_splat<simd256_ref_t::type>(_a);
return result;
}
template<>
BX_SIMD_FORCE_INLINE simd256_ref_t simd_isplat(uint32_t _a)
{
simd256_ref_t result;
result.simd128_0 = simd_isplat<simd256_ref_t::type>(_a);
result.simd128_1 = simd_isplat<simd256_ref_t::type>(_a);
return result;
}
template<>
BX_SIMD_FORCE_INLINE simd256_ref_t simd_itof(simd256_ref_t _a)
{
simd256_ref_t result;
result.simd128_0 = simd_itof(_a.simd128_0);
result.simd128_1 = simd_itof(_a.simd128_1);
return result;
}
template<>
BX_SIMD_FORCE_INLINE simd256_ref_t simd_ftoi(simd256_ref_t _a)
{
simd256_ref_t result;
result.simd128_0 = simd_ftoi(_a.simd128_0);
result.simd128_1 = simd_ftoi(_a.simd128_1);
return result;
}
} // namespace bx
#endif // BX_SIMD256_REF_H_HEADER_GUARD

View File

@@ -322,20 +322,20 @@ TEST_CASE("simd_load", "")
, 0.0f, 1.0f, 2.0f, 3.0f
);
// simd_check_float("ld"
// , simd_ld<simd256_t>(0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f)
// , 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f
// );
simd_check_float("ld"
, simd_ld<simd256_t>(0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f)
, 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f
);
simd_check_int32("ild"
, simd_ild(uint32_t(-1), 0, 1, 2)
, uint32_t(-1), 0, 1, 2
);
// simd_check_int32("ild"
// , simd_ild<simd256_t>(uint32_t(-1), 0, 1, 2, 3, 4, 5, 6)
// , uint32_t(-1), 0, 1, 2, 3, 4, 5, 6
// );
simd_check_int32("ild"
, simd_ild<simd256_t>(uint32_t(-1), 0, 1, 2, 3, 4, 5, 6)
, uint32_t(-1), 0, 1, 2, 3, 4, 5, 6
);
simd_check_int32("ild"
, simd_ild(uint32_t(-1), uint32_t(-2), uint32_t(-3), uint32_t(-4) )
@@ -346,13 +346,21 @@ TEST_CASE("simd_load", "")
, 0, 0, 0, 0
);
simd_check_uint32("isplat", simd_isplat(0x80000001)
simd_check_uint32("isplat", simd_isplat<simd128_t>(0x80000001)
, 0x80000001, 0x80000001, 0x80000001, 0x80000001
);
simd_check_float("isplat", simd_splat(1.0f)
simd_check_float("splat", simd_splat<simd128_t>(1.0f)
, 1.0f, 1.0f, 1.0f, 1.0f
);
simd_check_uint32("isplat", simd_isplat<simd256_t>(0x80000001)
, 0x80000001, 0x80000001, 0x80000001, 0x80000001, 0x80000001, 0x80000001, 0x80000001, 0x80000001
);
simd_check_float("splat", simd_splat<simd256_t>(1.0f)
, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f
);
}
TEST_CASE("simd_arithmetic", "")
@@ -386,7 +394,7 @@ TEST_CASE("simd_sqrt", "")
);
}
TEST_CASE("float4", "")
TEST_CASE("simd", "")
{
const simd128_t isplat = simd_isplat(0x80000001);
simd_check_uint32("sll"