Force SSE2 for all builds. Fixed SIMD load/store reference implementation.

This commit is contained in:
bkaradzic
2013-08-21 22:44:05 -07:00
parent 947742a5d0
commit 7d02e14aba
4 changed files with 32 additions and 13 deletions

View File

@@ -194,16 +194,11 @@ namespace bx
namespace float4_logexp_detail
{
BX_FLOAT4_INLINE float4_t float4_poly0(float4_t _a, float _b)
{
return float4_splat(_b);
}
BX_FLOAT4_INLINE float4_t float4_poly1(float4_t _a, float _b, float _c)
{
const float4_t bbbb = float4_splat(_b);
const float4_t poly0 = float4_poly0(_a, _c);
const float4_t result = float4_madd(poly0, _a, bbbb);
const float4_t cccc = float4_splat(_c);
const float4_t result = float4_madd(cccc, _a, bbbb);
return result;
}

View File

@@ -182,22 +182,37 @@ IMPLEMENT_TEST(xyzw , 0xf);
BX_FLOAT4_INLINE float4_t float4_ld(const void* _ptr)
{
return *reinterpret_cast<const float4_t*>(_ptr);
const uint32_t* input = reinterpret_cast<const uint32_t*>(_ptr);
float4_t result;
result.uxyzw[0] = input[0];
result.uxyzw[1] = input[1];
result.uxyzw[2] = input[2];
result.uxyzw[3] = input[3];
return result;
}
BX_FLOAT4_INLINE void float4_st(void* _ptr, float4_t _a)
{
*reinterpret_cast<float4_t*>(_ptr) = _a;
uint32_t* result = reinterpret_cast<uint32_t*>(_ptr);
result[0] = _a.uxyzw[0];
result[1] = _a.uxyzw[1];
result[2] = _a.uxyzw[2];
result[3] = _a.uxyzw[3];
}
BX_FLOAT4_INLINE void float4_stx(void* _ptr, float4_t _a)
{
*reinterpret_cast<uint32_t*>(_ptr) = _a.uxyzw[0];
uint32_t* result = reinterpret_cast<uint32_t*>(_ptr);
result[0] = _a.uxyzw[0];
}
BX_FLOAT4_INLINE void float4_stream(void* _ptr, float4_t _a)
{
*reinterpret_cast<float4_t*>(_ptr) = _a;
uint32_t* result = reinterpret_cast<uint32_t*>(_ptr);
result[0] = _a.uxyzw[0];
result[1] = _a.uxyzw[1];
result[2] = _a.uxyzw[2];
result[3] = _a.uxyzw[3];
}
BX_FLOAT4_INLINE float4_t float4_ld(float _x, float _y, float _z, float _w)
@@ -222,8 +237,13 @@ IMPLEMENT_TEST(xyzw , 0xf);
BX_FLOAT4_INLINE float4_t float4_splat(const void* _ptr)
{
float val = *reinterpret_cast<const float*>(_ptr);
return float4_ld(val, val, val, val);
const uint32_t val = *reinterpret_cast<const uint32_t*>(_ptr);
float4_t result;
result.uxyzw[0] = val;
result.uxyzw[1] = val;
result.uxyzw[2] = val;
result.uxyzw[3] = val;
return result;
}
BX_FLOAT4_INLINE float4_t float4_splat(float _a)

View File

@@ -15,6 +15,7 @@
#elif 0 // __ARM_NEON__
# include "float4_neon.h"
#else
# pragma message("************************************\nUsing SIMD reference implementation!\n************************************")
# include "float4_ref.h"
#endif //

View File

@@ -183,6 +183,9 @@ function toolchain(_buildDir, _libDir)
targetsuffix "Release"
configuration { "vs*" }
flags {
"EnableSSE2",
}
includedirs { bxDir .. "include/compat/msvc" }
defines {
"WIN32",