From 03c169e5c49c0b96bc5a20fe8a8d2baf87ee4680 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Branimir=20Karad=C5=BEi=C4=87?= Date: Sat, 9 May 2015 10:11:43 -0700 Subject: [PATCH] cross3 with one less swizzle. --- include/bx/float4_ni.h | 21 +++++++++++++++------ tests/float4_t.cpp | 5 +++++ 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/include/bx/float4_ni.h b/include/bx/float4_ni.h index f8a2a77..c709151 100644 --- a/include/bx/float4_ni.h +++ b/include/bx/float4_ni.h @@ -15,7 +15,7 @@ namespace bx const float4_t xAyB = float4_shuf_xAyB(_a, _b); const float4_t zCwD = float4_shuf_zCwD(_a, _b); const float4_t result = float4_shuf_xyAB(xAyB, zCwD); - + return result; } @@ -24,7 +24,7 @@ namespace bx const float4_t xAyB = float4_shuf_xAyB(_a, _b); const float4_t zCwD = float4_shuf_zCwD(_a, _b); const float4_t result = float4_shuf_zwCD(xAyB, zCwD); - + return result; } @@ -192,7 +192,7 @@ namespace bx const float4_t one = float4_splat(1.0f); const float4_t sqrt = float4_sqrt(_a); const float4_t result = float4_div(one, sqrt); - + return result; } @@ -206,7 +206,7 @@ namespace bx const float4_t three = float4_splat(3.0f); const float4_t three_sub_iter1 = float4_sub(three, iter1); const float4_t result = float4_mul(half_rsqrt, three_sub_iter1); - + return result; } @@ -375,7 +375,7 @@ namespace bx const float4_t expfpart = float4_logexp_detail::float4_exppoly(fpart); const float4_t result = float4_mul(expipart, expfpart); - + return result; } @@ -401,12 +401,21 @@ namespace bx BX_FLOAT4_INLINE float4_t float4_cross3_ni(float4_t _a, float4_t _b) { + // a.yzx * b.zxy - a.zxy * b.yzx == (a * b.yzx - a.yzx * b).yzx +#if 0 const float4_t a_yzxw = float4_swiz_yzxw(_a); const float4_t a_zxyw = float4_swiz_zxyw(_a); const float4_t b_zxyw = float4_swiz_zxyw(_b); const float4_t b_yzxw = float4_swiz_yzxw(_b); const float4_t tmp = float4_mul(a_yzxw, b_zxyw); const float4_t result = float4_nmsub(a_zxyw, b_yzxw, tmp); +#else + const float4_t a_yzxw = float4_swiz_yzxw(_a); + const float4_t b_yzxw = float4_swiz_yzxw(_b); + const float4_t tmp0 = float4_mul(_a, b_yzxw); + const float4_t tmp1 = float4_nmsub(a_yzxw, _b, tmp0); + const float4_t result = float4_swiz_yzxw(tmp1); +#endif return result; } @@ -416,7 +425,7 @@ namespace bx const float4_t dot3 = float4_dot3(_a, _a); const float4_t invSqrt = float4_rsqrt(dot3); const float4_t result = float4_mul(_a, invSqrt); - + return result; } diff --git a/tests/float4_t.cpp b/tests/float4_t.cpp index a9a15af..04a0604 100644 --- a/tests/float4_t.cpp +++ b/tests/float4_t.cpp @@ -228,6 +228,11 @@ TEST(float4_arithmetic) , float4_madd(float4_ld(0.0f, 1.0f, 2.0f, 3.0f), float4_ld(4.0f, 5.0f, 6.0f, 7.0f), float4_ld(8.0f, 9.0f, 10.0f, 11.0f) ) , 8.0f, 14.0f, 22.0f, 32.0f ); + + float4_check_float("cross3" + , float4_cross3(float4_ld(1.0f, 0.0f, 0.0f, 0.0f), float4_ld(0.0f, 1.0f, 0.0f, 0.0f) ) + , 0.0f, 0.0f, 1.0f, 0.0f + ); } TEST(float4)