diff --git a/include/bx/float4_neon.h b/include/bx/float4_neon.h index 479bcba..2331309 100644 --- a/include/bx/float4_neon.h +++ b/include/bx/float4_neon.h @@ -222,6 +222,8 @@ namespace bx #define float4_div_nr float4_div_nr_ni #define float4_div float4_div_nr_ni +#define float4_ceil float4_ceil_ni +#define float4_floor float4_floor_ni #include "float4_ni.h" #endif // __BX_FLOAT4_NEON_H__ diff --git a/include/bx/float4_ni.h b/include/bx/float4_ni.h index 328c51c..08e4c55 100644 --- a/include/bx/float4_ni.h +++ b/include/bx/float4_ni.h @@ -402,6 +402,30 @@ namespace bx return result; } + BX_FLOAT4_INLINE float4_t float4_ceil_ni(float4_t _a) + { + const float4_t tmp0 = float4_ftoi(_a); + const float4_t tmp1 = float4_itof(tmp0); + const float4_t mask = float4_cmplt(tmp1, _a); + const float4_t one = float4_one(); + const float4_t tmp2 = float4_and(one, mask); + const float4_t result = float4_add(tmp1, tmp2); + + return result; + } + + BX_FLOAT4_INLINE float4_t float4_floor_ni(float4_t _a) + { + const float4_t tmp0 = float4_ftoi(_a); + const float4_t tmp1 = float4_itof(tmp0); + const float4_t mask = float4_cmpgt(tmp1, _a); + const float4_t one = float4_one(); + const float4_t tmp2 = float4_and(one, mask); + const float4_t result = float4_sub(tmp1, tmp2); + + return result; + } + } // namespace bx #endif // __BX_FLOAT4_NI_H__ diff --git a/include/bx/float4_ref.h b/include/bx/float4_ref.h index bf74b40..4359119 100644 --- a/include/bx/float4_ref.h +++ b/include/bx/float4_ref.h @@ -517,6 +517,8 @@ IMPLEMENT_TEST(xyzw , 0xf); #define float4_normalize3 float4_normalize3_ni #define float4_dot3 float4_dot3_ni #define float4_dot float4_dot_ni +#define float4_ceil float4_ceil_ni +#define float4_floor float4_floor_ni #include "float4_ni.h" #endif // __BX_FLOAT4_REF_H__ diff --git a/include/bx/float4_sse.h b/include/bx/float4_sse.h index ff14710..239fdaa 100644 --- a/include/bx/float4_sse.h +++ b/include/bx/float4_sse.h @@ -395,6 +395,8 @@ IMPLEMENT_TEST(xyzw , 0xf); #define float4_dot3 float4_dot3_ni #define float4_dot float4_dot_ni #endif // defined(__SSE4_1__) +#define float4_ceil float4_ceil_ni +#define float4_floor float4_floor_ni #include "float4_ni.h" #endif // __FLOAT4_SSE_H__ diff --git a/include/bx/float4_t.h b/include/bx/float4_t.h index bdeeb91..9b251f8 100644 --- a/include/bx/float4_t.h +++ b/include/bx/float4_t.h @@ -11,7 +11,7 @@ #define BX_FLOAT4_INLINE BX_FORCE_INLINE -#if 0 // defined(__SSE2__) +#if defined(__SSE2__) # include "float4_sse.h" #elif 0 // __ARM_NEON__ # include "float4_neon.h"