From 096a70b7fff44c97576ff48b3440a96d82076fb0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Branimir=20Karad=C5=BEi=C4=87?= Date: Sat, 12 Mar 2016 12:07:41 -0800 Subject: [PATCH] Cleanup. --- include/bx/float4_ni.h | 20 +++++++++++++++++--- tests/float4_t.cpp | 27 +++++++++++++++++++++++---- 2 files changed, 40 insertions(+), 7 deletions(-) diff --git a/include/bx/float4_ni.h b/include/bx/float4_ni.h index e85c1cd..644fa6e 100644 --- a/include/bx/float4_ni.h +++ b/include/bx/float4_ni.h @@ -177,16 +177,30 @@ namespace bx { const float4_t half = float4_splat(0.5f); const float4_t one = float4_splat(1.0f); - const float4_t zero = float4_zero(); const float4_t tmp0 = float4_rsqrt_est(_a); - const float4_t tmp1 = float4_madd(tmp0, _a, zero); - const float4_t tmp2 = float4_madd(tmp1, half, zero); + const float4_t tmp1 = float4_mul(tmp0, _a); + const float4_t tmp2 = float4_mul(tmp1, half); const float4_t tmp3 = float4_nmsub(tmp0, tmp1, one); const float4_t result = float4_madd(tmp3, tmp2, tmp1); return result; } + BX_FLOAT4_INLINE float4_t float4_sqrt_nr1_ni(float4_t _a) + { + const float4_t half = float4_splat(0.5f); + + float4_t result = _a; + for (uint32_t ii = 0; ii < 11; ++ii) + { + const float4_t tmp1 = float4_div(_a, result); + const float4_t tmp2 = float4_add(tmp1, result); + result = float4_mul(tmp2, half); + } + + return result; + } + BX_FLOAT4_INLINE float4_t float4_rsqrt_ni(float4_t _a) { const float4_t one = float4_splat(1.0f); diff --git a/tests/float4_t.cpp b/tests/float4_t.cpp index 9f39fcc..3bdfb19 100644 --- a/tests/float4_t.cpp +++ b/tests/float4_t.cpp @@ -5,6 +5,7 @@ #include "test.h" #include +#include #include using namespace bx; @@ -70,10 +71,10 @@ void float4_check_float(const char* _str, bx::float4_t _a, float _0, float _1, f , _0, _1, _2, _3 ); - CHECK_EQUAL(c.f[0], _0); - CHECK_EQUAL(c.f[1], _1); - CHECK_EQUAL(c.f[2], _2); - CHECK_EQUAL(c.f[3], _3); + CHECK(bx::fequal(c.f[0], _0, 0.0001f) ); + CHECK(bx::fequal(c.f[1], _1, 0.0001f) ); + CHECK(bx::fequal(c.f[2], _2, 0.0001f) ); + CHECK(bx::fequal(c.f[3], _3, 0.0001f) ); } void float4_check_string(const char* _str, bx::float4_t _a) @@ -235,6 +236,24 @@ TEST(float4_arithmetic) ); } +TEST(float4_sqrt) +{ + float4_check_float("float4_sqrt" + , float4_sqrt(float4_ld(1.0f, 16.0f, 65536.0f, 123456.0f) ) + , 1.0f, 4.0f, 256.0f, 351.363060096f + ); + + float4_check_float("float4_sqrt_nr_ni" + , float4_sqrt_nr_ni(float4_ld(1.0f, 16.0f, 65536.0f, 123456.0f) ) + , 1.0f, 4.0f, 256.0f, 351.363060096f + ); + + float4_check_float("float4_sqrt_nr1_ni" + , float4_sqrt_nr1_ni(float4_ld(1.0f, 16.0f, 65536.0f, 123456.0f) ) + , 1.0f, 4.0f, 256.0f, 351.363060096f + ); +} + TEST(float4) { const float4_t isplat = float4_isplat(0x80000001);