This commit is contained in:
Branimir Karadžić
2016-03-12 12:07:41 -08:00
parent 15a36340ef
commit 096a70b7ff
2 changed files with 40 additions and 7 deletions

View File

@@ -177,16 +177,30 @@ namespace bx
{
const float4_t half = float4_splat(0.5f);
const float4_t one = float4_splat(1.0f);
const float4_t zero = float4_zero();
const float4_t tmp0 = float4_rsqrt_est(_a);
const float4_t tmp1 = float4_madd(tmp0, _a, zero);
const float4_t tmp2 = float4_madd(tmp1, half, zero);
const float4_t tmp1 = float4_mul(tmp0, _a);
const float4_t tmp2 = float4_mul(tmp1, half);
const float4_t tmp3 = float4_nmsub(tmp0, tmp1, one);
const float4_t result = float4_madd(tmp3, tmp2, tmp1);
return result;
}
BX_FLOAT4_INLINE float4_t float4_sqrt_nr1_ni(float4_t _a)
{
const float4_t half = float4_splat(0.5f);
float4_t result = _a;
for (uint32_t ii = 0; ii < 11; ++ii)
{
const float4_t tmp1 = float4_div(_a, result);
const float4_t tmp2 = float4_add(tmp1, result);
result = float4_mul(tmp2, half);
}
return result;
}
BX_FLOAT4_INLINE float4_t float4_rsqrt_ni(float4_t _a)
{
const float4_t one = float4_splat(1.0f);

View File

@@ -5,6 +5,7 @@
#include "test.h"
#include <bx/float4_t.h>
#include <bx/fpumath.h>
#include <string.h>
using namespace bx;
@@ -70,10 +71,10 @@ void float4_check_float(const char* _str, bx::float4_t _a, float _0, float _1, f
, _0, _1, _2, _3
);
CHECK_EQUAL(c.f[0], _0);
CHECK_EQUAL(c.f[1], _1);
CHECK_EQUAL(c.f[2], _2);
CHECK_EQUAL(c.f[3], _3);
CHECK(bx::fequal(c.f[0], _0, 0.0001f) );
CHECK(bx::fequal(c.f[1], _1, 0.0001f) );
CHECK(bx::fequal(c.f[2], _2, 0.0001f) );
CHECK(bx::fequal(c.f[3], _3, 0.0001f) );
}
void float4_check_string(const char* _str, bx::float4_t _a)
@@ -235,6 +236,24 @@ TEST(float4_arithmetic)
);
}
TEST(float4_sqrt)
{
float4_check_float("float4_sqrt"
, float4_sqrt(float4_ld(1.0f, 16.0f, 65536.0f, 123456.0f) )
, 1.0f, 4.0f, 256.0f, 351.363060096f
);
float4_check_float("float4_sqrt_nr_ni"
, float4_sqrt_nr_ni(float4_ld(1.0f, 16.0f, 65536.0f, 123456.0f) )
, 1.0f, 4.0f, 256.0f, 351.363060096f
);
float4_check_float("float4_sqrt_nr1_ni"
, float4_sqrt_nr1_ni(float4_ld(1.0f, 16.0f, 65536.0f, 123456.0f) )
, 1.0f, 4.0f, 256.0f, 351.363060096f
);
}
TEST(float4)
{
const float4_t isplat = float4_isplat(0x80000001);