From 539076a75e18f6828847419fa29629e0cd1939b2 Mon Sep 17 00:00:00 2001 From: bkaradzic Date: Sun, 29 Dec 2013 11:54:49 -0800 Subject: [PATCH] Added missing functions to float4 reference implementation. --- include/bx/float4_neon.h | 2 +- include/bx/float4_ref.h | 50 ++++++++++++++++++++++++++++++++++++++++ tests/float4_t.cpp | 8 +++++++ 3 files changed, 59 insertions(+), 1 deletion(-) diff --git a/include/bx/float4_neon.h b/include/bx/float4_neon.h index 945f42b..f4195c1 100644 --- a/include/bx/float4_neon.h +++ b/include/bx/float4_neon.h @@ -349,7 +349,7 @@ namespace bx BX_FLOAT4_INLINE float4_t float4_nmsub(float4_t _a, float4_t _b, float4_t _c) { - return __builtin_neon_vmlav4sf(_c, _a, _b, 3); + return __builtin_neon_vmlsv4sf(_c, _a, _b, 3); } BX_FLOAT4_INLINE float4_t float4_icmpeq(float4_t _a, float4_t _b) diff --git a/include/bx/float4_ref.h b/include/bx/float4_ref.h index 7d2ab95..e8667d6 100644 --- a/include/bx/float4_ref.h +++ b/include/bx/float4_ref.h @@ -499,6 +499,56 @@ IMPLEMENT_TEST(xyzw , 0xf); return result; } + BX_FLOAT4_INLINE float4_t float4_icmpeq(float4_t _a, float4_t _b) + { + float4_t result; + result.ixyzw[0] = _a.ixyzw[0] == _b.ixyzw[0] ? 0xffffffff : 0x0; + result.ixyzw[1] = _a.ixyzw[1] == _b.ixyzw[1] ? 0xffffffff : 0x0; + result.ixyzw[2] = _a.ixyzw[2] == _b.ixyzw[2] ? 0xffffffff : 0x0; + result.ixyzw[3] = _a.ixyzw[3] == _b.ixyzw[3] ? 0xffffffff : 0x0; + return result; + } + + BX_FLOAT4_INLINE float4_t float4_icmplt(float4_t _a, float4_t _b) + { + float4_t result; + result.ixyzw[0] = _a.ixyzw[0] < _b.ixyzw[0] ? 0xffffffff : 0x0; + result.ixyzw[1] = _a.ixyzw[1] < _b.ixyzw[1] ? 0xffffffff : 0x0; + result.ixyzw[2] = _a.ixyzw[2] < _b.ixyzw[2] ? 0xffffffff : 0x0; + result.ixyzw[3] = _a.ixyzw[3] < _b.ixyzw[3] ? 0xffffffff : 0x0; + return result; + } + + BX_FLOAT4_INLINE float4_t float4_icmpgt(float4_t _a, float4_t _b) + { + float4_t result; + result.ixyzw[0] = _a.ixyzw[0] > _b.ixyzw[0] ? 0xffffffff : 0x0; + result.ixyzw[1] = _a.ixyzw[1] > _b.ixyzw[1] ? 0xffffffff : 0x0; + result.ixyzw[2] = _a.ixyzw[2] > _b.ixyzw[2] ? 0xffffffff : 0x0; + result.ixyzw[3] = _a.ixyzw[3] > _b.ixyzw[3] ? 0xffffffff : 0x0; + return result; + } + + BX_FLOAT4_INLINE float4_t float4_imin(float4_t _a, float4_t _b) + { + float4_t result; + result.ixyzw[0] = _a.ixyzw[0] < _b.ixyzw[0] ? _a.ixyzw[0] : _b.ixyzw[0]; + result.ixyzw[1] = _a.ixyzw[1] < _b.ixyzw[1] ? _a.ixyzw[1] : _b.ixyzw[1]; + result.ixyzw[2] = _a.ixyzw[2] < _b.ixyzw[2] ? _a.ixyzw[2] : _b.ixyzw[2]; + result.ixyzw[3] = _a.ixyzw[3] < _b.ixyzw[3] ? _a.ixyzw[3] : _b.ixyzw[3]; + return result; + } + + BX_NO_INLINE float4_t float4_imax(float4_t _a, float4_t _b) + { + float4_t result; + result.ixyzw[0] = _a.ixyzw[0] > _b.ixyzw[0] ? _a.ixyzw[0] : _b.ixyzw[0]; + result.ixyzw[1] = _a.ixyzw[1] > _b.ixyzw[1] ? _a.ixyzw[1] : _b.ixyzw[1]; + result.ixyzw[2] = _a.ixyzw[2] > _b.ixyzw[2] ? _a.ixyzw[2] : _b.ixyzw[2]; + result.ixyzw[3] = _a.ixyzw[3] > _b.ixyzw[3] ? _a.ixyzw[3] : _b.ixyzw[3]; + return result; + } + BX_FLOAT4_INLINE float4_t float4_iadd(float4_t _a, float4_t _b) { float4_t result; diff --git a/tests/float4_t.cpp b/tests/float4_t.cpp index 8638c5a..80916c8 100644 --- a/tests/float4_t.cpp +++ b/tests/float4_t.cpp @@ -183,6 +183,14 @@ TEST(float4_load) ); } +TEST(float4_arithmetic) +{ + float4_check_float("madd" + , float4_madd(float4_ld(0.0f, 1.0f, 2.0f, 3.0f), float4_ld(4.0f, 5.0f, 6.0f, 7.0f), float4_ld(8.0f, 9.0f, 10.0f, 11.0f) ) + , 8.0f, 14.0f, 22.0f, 32.0f + ); +} + TEST(float4) { const float4_t isplat = float4_isplat(0x80000001);