diff --git a/include/bx/float4_neon.h b/include/bx/float4_neon.h index f4195c1..8fbac3d 100644 --- a/include/bx/float4_neon.h +++ b/include/bx/float4_neon.h @@ -32,6 +32,28 @@ namespace bx #undef ELEMy #undef ELEMx +#define IMPLEMENT_TEST(_xyzw, _swizzle) \ + BX_FLOAT4_INLINE bool float4_test_any_##_xyzw(float4_t _test); \ + BX_FLOAT4_INLINE bool float4_test_all_##_xyzw(float4_t _test); + +IMPLEMENT_TEST(x , xxxx); +IMPLEMENT_TEST(y , yyyy); +IMPLEMENT_TEST(xy , xyyy); +IMPLEMENT_TEST(z , zzzz); +IMPLEMENT_TEST(xz , xzzz); +IMPLEMENT_TEST(yz , yzzz); +IMPLEMENT_TEST(xyz , xyzz); +IMPLEMENT_TEST(w , wwww); +IMPLEMENT_TEST(xw , xwww); +IMPLEMENT_TEST(yw , ywww); +IMPLEMENT_TEST(xyw , xyww); +IMPLEMENT_TEST(zw , zwww); +IMPLEMENT_TEST(xzw , xzww); +IMPLEMENT_TEST(yzw , yzww); +IMPLEMENT_TEST(xyzw , xyzw); + +#undef IMPLEMENT_TEST + BX_FLOAT4_INLINE float4_t float4_shuf_xyAB(float4_t _a, float4_t _b) { return __builtin_shuffle(_a, _b, (_u32x4_t){ 0, 1, 4, 5 }); @@ -457,4 +479,47 @@ namespace bx #include "float4_ni.h" +namespace bx +{ +#define IMPLEMENT_TEST(_xyzw, _swizzle) \ + BX_FLOAT4_INLINE bool float4_test_any_##_xyzw(float4_t _test) \ + { \ + const float4_t tmp0 = float4_swiz_##_swizzle(_test); \ + return float4_test_any_ni(tmp0); \ + } \ + \ + BX_FLOAT4_INLINE bool float4_test_all_##_xyzw(float4_t _test) \ + { \ + const float4_t tmp0 = float4_swiz_##_swizzle(_test); \ + return float4_test_all_ni(tmp0); \ + } + +IMPLEMENT_TEST(x , xxxx); +IMPLEMENT_TEST(y , yyyy); +IMPLEMENT_TEST(xy , xyyy); +IMPLEMENT_TEST(z , zzzz); +IMPLEMENT_TEST(xz , xzzz); +IMPLEMENT_TEST(yz , yzzz); +IMPLEMENT_TEST(xyz , xyzz); +IMPLEMENT_TEST(w , wwww); +IMPLEMENT_TEST(xw , xwww); +IMPLEMENT_TEST(yw , ywww); +IMPLEMENT_TEST(xyw , xyww); +IMPLEMENT_TEST(zw , zwww); +IMPLEMENT_TEST(xzw , xzww); +IMPLEMENT_TEST(yzw , yzww); + + BX_FLOAT4_INLINE bool float4_test_any_xyzw(float4_t _test) + { + return float4_test_any_ni(_test); + } + + BX_FLOAT4_INLINE bool float4_test_all_xyzw(float4_t _test) + { + return float4_test_all_ni(_test); + } + +#undef IMPLEMENT_TEST +} // namespace bx + #endif // BX_FLOAT4_NEON_H_HEADER_GUARD diff --git a/include/bx/float4_ni.h b/include/bx/float4_ni.h index 7aa8757..ad3026e 100644 --- a/include/bx/float4_ni.h +++ b/include/bx/float4_ni.h @@ -437,6 +437,32 @@ namespace bx return result; } + BX_FLOAT4_INLINE bool float4_test_any_ni(float4_t _a) + { + const float4_t mask = float4_sra(_a, 31); + const float4_t zwxy = float4_swiz_zwxy(mask); + const float4_t tmp0 = float4_or(mask, zwxy); + const float4_t tmp1 = float4_swiz_yyyy(tmp0); + const float4_t tmp2 = float4_or(tmp0, tmp1); + int res; + float4_stx(&res, tmp2); + return 0 != res; + } + + BX_FLOAT4_INLINE bool float4_test_all_ni(float4_t _a) + { + const float4_t bits = float4_sra(_a, 31); + const float4_t m1248 = float4_ild(1, 2, 4, 8); + const float4_t mask = float4_and(bits, m1248); + const float4_t zwxy = float4_swiz_zwxy(mask); + const float4_t tmp0 = float4_or(mask, zwxy); + const float4_t tmp1 = float4_swiz_yyyy(tmp0); + const float4_t tmp2 = float4_or(tmp0, tmp1); + int res; + float4_stx(&res, tmp2); + return 0xf == res; + } + } // namespace bx #endif // BX_FLOAT4_NI_H_HEADER_GUARD diff --git a/tests/float4_t.cpp b/tests/float4_t.cpp index 80916c8..245d18e 100644 --- a/tests/float4_t.cpp +++ b/tests/float4_t.cpp @@ -18,6 +18,17 @@ union float4_cast char c[16]; }; +void float4_check_bool(const char* _str, bool _a, bool _0) +{ + DBG("%s %d == %d" + , _str + , _a + , _0 + ); + + CHECK_EQUAL(_a, _0); +} + void float4_check_int32(const char* _str, bx::float4_t _a, int32_t _0, int32_t _1, int32_t _2, int32_t _3) { float4_cast c; c.f4 = _a; @@ -151,6 +162,21 @@ TEST(float4_compare) , float4_icmpgt(float4_ild(0, 1, 2, 3), float4_ild(0, -2, 1, 3) ) , 0, -1, -1, 0 ); + + float4_check_bool("test_any_xyzw" + , float4_test_any_xyzw(float4_ild(-1, 0, 0, 0) ) + , true + ); + + float4_check_bool("test_all_xw" + , float4_test_all_xw(float4_ild(-1, 0, 0, -1) ) + , true + ); + + float4_check_bool("test_all_xzw" + , float4_test_all_xzw(float4_ild(-1, 0, 0, -1) ) + , false + ); } TEST(float4_load)