Added cmpneq.

This commit is contained in:
Бранимир Караџић
2020-02-06 08:31:17 -08:00
parent f48bd19bf8
commit a3fd8d384f
6 changed files with 56 additions and 21 deletions

View File

@@ -346,6 +346,14 @@ BX_SIMD128_IMPLEMENT_TEST(xyzw , 0xf);
return result;
}
template<>
BX_SIMD_FORCE_INLINE simd128_langext_t simd_cmpneq(simd128_langext_t _a, simd128_langext_t _b)
{
simd128_langext_t result;
result.vi = _a.vf != _b.vf;
return result;
}
template<>
BX_SIMD_FORCE_INLINE simd128_langext_t simd_cmplt(simd128_langext_t _a, simd128_langext_t _b)
{

View File

@@ -9,23 +9,15 @@
namespace bx
{
#if BX_COMPILER_CLANG
#define SHUFFLE_A(_a, _i0, _i1, _i2, _i3) \
__builtin_shufflevector(_a, _a, _i0, _i1, _i2, _i3 )
#define SHUFFLE_AB(_a, _b, _i0, _i1, _i2, _i3) \
__builtin_shufflevector(_a, _b, _i0, _i1, _i2, _i3 )
# define SHUFFLE_A(_a, _i0, _i1, _i2, _i3) __builtin_shufflevector(_a, _a, _i0, _i1, _i2, _i3 )
# define SHUFFLE_AB(_a, _b, _i0, _i1, _i2, _i3) __builtin_shufflevector(_a, _b, _i0, _i1, _i2, _i3 )
#else
#define SHUFFLE_A(_a, _i0, _i1, _i2, _i3) \
__builtin_shuffle(_a, (uint32x4_t){ _i0, _i1, _i2, _i3 })
#define SHUFFLE_AB(_a, _b, _i0, _i1, _i2, _i3) \
__builtin_shuffle(_a, _b, (uint32x4_t){ _i0, _i1, _i2, _i3 })
# define SHUFFLE_A(_a, _i0, _i1, _i2, _i3) __builtin_shuffle(_a, (uint32x4_t){ _i0, _i1, _i2, _i3 })
# define SHUFFLE_AB(_a, _b, _i0, _i1, _i2, _i3) __builtin_shuffle(_a, _b, (uint32x4_t){ _i0, _i1, _i2, _i3 })
#endif
#define ELEMx 0
#define ELEMy 1
#define ELEMz 2
@@ -291,11 +283,17 @@ BX_SIMD128_IMPLEMENT_TEST(yzw, yzww);
return result;
}
template<>
BX_SIMD_FORCE_INLINE simd128_neon_t simd_cmpneq(simd128_neon_t _a, simd128_neon_t _b)
{
return simd_cmpneq_ni(_a, _b);
}
template<>
BX_SIMD_FORCE_INLINE simd128_neon_t simd_cmplt(simd128_neon_t _a, simd128_neon_t _b)
{
const uint32x4_t tmp = vcltq_f32(_a, _b);
const simd128_neon_t result = vreinterpretq_f32_u32(tmp);
const uint32x4_t tmp = vcltq_f32(_a, _b);
const simd128_neon_t result = vreinterpretq_f32_u32(tmp);
return result;
}
@@ -303,8 +301,8 @@ BX_SIMD128_IMPLEMENT_TEST(yzw, yzww);
template<>
BX_SIMD_FORCE_INLINE simd128_neon_t simd_cmple(simd128_neon_t _a, simd128_neon_t _b)
{
const uint32x4_t tmp = vcleq_f32(_a, _b);
const simd128_neon_t result = vreinterpretq_f32_u32(tmp);
const uint32x4_t tmp = vcleq_f32(_a, _b);
const simd128_neon_t result = vreinterpretq_f32_u32(tmp);
return result;
}
@@ -312,8 +310,8 @@ BX_SIMD128_IMPLEMENT_TEST(yzw, yzww);
template<>
BX_SIMD_FORCE_INLINE simd128_neon_t simd_cmpgt(simd128_neon_t _a, simd128_neon_t _b)
{
const uint32x4_t tmp = vcgtq_f32(_a, _b);
const simd128_neon_t result = vreinterpretq_f32_u32(tmp);
const uint32x4_t tmp = vcgtq_f32(_a, _b);
const simd128_neon_t result = vreinterpretq_f32_u32(tmp);
return result;
}
@@ -321,8 +319,8 @@ BX_SIMD128_IMPLEMENT_TEST(yzw, yzww);
template<>
BX_SIMD_FORCE_INLINE simd128_neon_t simd_cmpge(simd128_neon_t _a, simd128_neon_t _b)
{
const uint32x4_t tmp = vcgeq_f32(_a, _b);
const simd128_neon_t result = vreinterpretq_f32_u32(tmp);
const uint32x4_t tmp = vcgeq_f32(_a, _b);
const simd128_neon_t result = vreinterpretq_f32_u32(tmp);
return result;
}

View File

@@ -396,6 +396,17 @@ BX_SIMD128_IMPLEMENT_TEST(xyzw , 0xf);
return result;
}
template<>
BX_SIMD_FORCE_INLINE simd128_ref_t simd_cmpneq(simd128_ref_t _a, simd128_ref_t _b)
{
simd128_ref_t result;
result.ixyzw[0] = _a.fxyzw[0] != _b.fxyzw[0] ? 0xffffffff : 0x0;
result.ixyzw[1] = _a.fxyzw[1] != _b.fxyzw[1] ? 0xffffffff : 0x0;
result.ixyzw[2] = _a.fxyzw[2] != _b.fxyzw[2] ? 0xffffffff : 0x0;
result.ixyzw[3] = _a.fxyzw[3] != _b.fxyzw[3] ? 0xffffffff : 0x0;
return result;
}
template<>
BX_SIMD_FORCE_INLINE simd128_ref_t simd_cmplt(simd128_ref_t _a, simd128_ref_t _b)
{

View File

@@ -308,6 +308,12 @@ BX_SIMD128_IMPLEMENT_TEST(xyzw , 0xf);
return _mm_cmpeq_ps(_a, _b);
}
template<>
BX_SIMD_FORCE_INLINE simd128_sse_t simd_cmpneq(simd128_sse_t _a, simd128_sse_t _b)
{
return _mm_cmpneq_ps(_a, _b);
}
template<>
BX_SIMD_FORCE_INLINE simd128_sse_t simd_cmplt(simd128_sse_t _a, simd128_sse_t _b)
{

View File

@@ -124,6 +124,15 @@ namespace bx
return result;
}
template<typename Ty>
BX_SIMD_INLINE Ty simd_cmpneq_ni(Ty _a, Ty _b)
{
const Ty tmp0 = simd_cmpeq(_a, _b);
const Ty result = simd_not(tmp0);
return result;
}
template<typename Ty>
BX_SIMD_INLINE Ty simd_min_ni(Ty _a, Ty _b)
{

View File

@@ -196,6 +196,9 @@ BX_SIMD128_IMPLEMENT_TEST(xyzw);
template<typename Ty>
Ty simd_cmpeq(Ty _a, Ty _b);
template<typename Ty>
Ty simd_cmpneq(Ty _a, Ty _b);
template<typename Ty>
Ty simd_cmplt(Ty _a, Ty _b);