diff --git a/3rdparty/UnitTest++/src/TestReporterStdout.cpp b/3rdparty/UnitTest++/src/TestReporterStdout.cpp index 563113c..e63d3e2 100644 --- a/3rdparty/UnitTest++/src/TestReporterStdout.cpp +++ b/3rdparty/UnitTest++/src/TestReporterStdout.cpp @@ -8,6 +8,13 @@ namespace std {} #endif +#if defined(__ANDROID__) +# include +# define outf(format, ...) __android_log_print(ANDROID_LOG_DEBUG, "", format, ##__VA_ARGS__) +#else +# define outf(format, ...) printf(format, ##__VA_ARGS__) +#endif // defined(__ANDROID__) + namespace UnitTest { void TestReporterStdout::ReportFailure(TestDetails const& details, char const* failure) @@ -18,8 +25,8 @@ void TestReporterStdout::ReportFailure(TestDetails const& details, char const* f char const* const errorFormat = "%s(%d): error: Failure in %s: %s\n"; #endif - using namespace std; - printf(errorFormat, details.filename, details.lineNumber, details.testName, failure); + using namespace std; + outf(errorFormat, details.filename, details.lineNumber, details.testName, failure); } void TestReporterStdout::ReportTestStart(TestDetails const& /*test*/) @@ -36,11 +43,15 @@ void TestReporterStdout::ReportSummary(int const totalTestCount, int const faile using namespace std; if (failureCount > 0) - printf("FAILURE: %d out of %d tests failed (%d failures).\n", failedTestCount, totalTestCount, failureCount); + { + outf("FAILURE: %d out of %d tests failed (%d failures).\n", failedTestCount, totalTestCount, failureCount); + } else - printf("Success: %d tests passed.\n", totalTestCount); + { + outf("Success: %d tests passed.\n", totalTestCount); + } - printf("Test time: %.2f seconds.\n", secondsElapsed); + outf("Test time: %.2f seconds.\n", secondsElapsed); } } diff --git a/include/bx/float4_neon.h b/include/bx/float4_neon.h index c07d57d..4f36133 100644 --- a/include/bx/float4_neon.h +++ b/include/bx/float4_neon.h @@ -6,20 +6,13 @@ #ifndef BX_FLOAT4_NEON_H_HEADER_GUARD #define BX_FLOAT4_NEON_H_HEADER_GUARD -#include - namespace bx { + typedef __builtin_neon_sf float4_t __attribute__( (__vector_size__(16) ) ); -// Reference: -// http://gcc.gnu.org/onlinedocs/gcc/ARM-NEON-Intrinsics.html -// http://blogs.arm.com/software-enablement/161-coding-for-neon-part-1-load-and-stores/ -// http://blogs.arm.com/software-enablement/196-coding-for-neon-part-2-dealing-with-leftovers/ -// http://blogs.arm.com/software-enablement/241-coding-for-neon-part-3-matrix-multiplication/ -// http://blogs.arm.com/software-enablement/277-coding-for-neon-part-4-shifting-left-and-right/ -// http://blogs.arm.com/software-enablement/684-coding-for-neon-part-5-rearranging-vectors/ - - typedef __builtin_neon_sf float4_t __attribute__( (__vector_size__(16) ) ); + typedef __builtin_neon_sf _f32x2_t __attribute__( (__vector_size__( 8) ) ); + typedef __builtin_neon_si _i32x4_t __attribute__( (__vector_size__(16) ) ); + typedef __builtin_neon_usi _u32x4_t __attribute__( (__vector_size__(16) ) ); #define ELEMx 0 #define ELEMy 1 @@ -28,12 +21,7 @@ namespace bx #define IMPLEMENT_SWIZZLE(_x, _y, _z, _w) \ BX_FLOAT4_INLINE float4_t float4_swiz_##_x##_y##_z##_w(float4_t _a) \ { \ - float4_t result; \ - result.ixyzw[0] = _a.ixyzw[ELEM##_x]; \ - result.ixyzw[1] = _a.ixyzw[ELEM##_y]; \ - result.ixyzw[2] = _a.ixyzw[ELEM##_z]; \ - result.ixyzw[3] = _a.ixyzw[ELEM##_w]; \ - return result; \ + return __builtin_shuffle(_a, (_u32x4_t){ ELEM##_x, ELEM##_y, ELEM##_z, ELEM##_w }); \ } #include "float4_swizzle.inl" @@ -46,89 +34,106 @@ namespace bx BX_FLOAT4_INLINE float4_t float4_shuf_xyAB(float4_t _a, float4_t _b) { - return _a; //_mm_movelh_ps(_a, _b); + return __builtin_shuffle(_a, _b, (_u32x4_t){ 0, 1, 4, 5 }); } BX_FLOAT4_INLINE float4_t float4_shuf_ABxy(float4_t _a, float4_t _b) { - return _a; //_mm_movelh_ps(_b, _a); + return __builtin_shuffle(_a, _b, (_u32x4_t){ 4, 5, 0, 1 }); } BX_FLOAT4_INLINE float4_t float4_shuf_CDzw(float4_t _a, float4_t _b) { - return _a; //_mm_movehl_ps(_a, _b); + return __builtin_shuffle(_a, _b, (_u32x4_t){ 6, 7, 2, 3 }); } BX_FLOAT4_INLINE float4_t float4_shuf_zwCD(float4_t _a, float4_t _b) { - return _a; //_mm_movehl_ps(_b, _a); + return __builtin_shuffle(_a, _b, (_u32x4_t){ 2, 3, 6, 7 }); } BX_FLOAT4_INLINE float4_t float4_shuf_xAyB(float4_t _a, float4_t _b) { - return _a; //_mm_unpacklo_ps(_a, _b); + return __builtin_shuffle(_a, _b, (_u32x4_t){ 0, 4, 1, 5 }); } BX_FLOAT4_INLINE float4_t float4_shuf_yBxA(float4_t _a, float4_t _b) { - return _a; //_mm_unpacklo_ps(_b, _a); + return __builtin_shuffle(_a, _b, (_u32x4_t){ 1, 5, 0, 4 }); } BX_FLOAT4_INLINE float4_t float4_shuf_zCwD(float4_t _a, float4_t _b) { - return _a; //_mm_unpackhi_ps(_a, _b); + return __builtin_shuffle(_a, _b, (_u32x4_t){ 2, 6, 3, 7 }); } BX_FLOAT4_INLINE float4_t float4_shuf_CzDw(float4_t _a, float4_t _b) { - return _a; //_mm_unpackhi_ps(_b, _a); + return __builtin_shuffle(_a, _b, (_u32x4_t){ 6, 2, 7, 3 }); } BX_FLOAT4_INLINE float float4_x(float4_t _a) { - return _a.fxyzw[0]; + return __builtin_neon_vget_lanev4sf(_a, 0, 3); } BX_FLOAT4_INLINE float float4_y(float4_t _a) { - return _a.fxyzw[1]; + return __builtin_neon_vget_lanev4sf(_a, 1, 3); } BX_FLOAT4_INLINE float float4_z(float4_t _a) { - return _a.fxyzw[2]; + return __builtin_neon_vget_lanev4sf(_a, 2, 3); } BX_FLOAT4_INLINE float float4_w(float4_t _a) { - return _a.fxyzw[3]; + return __builtin_neon_vget_lanev4sf(_a, 3, 3); } -// BX_FLOAT4_INLINE float4_t float4_ld(const void* _ptr) -// { -// return _mm_load_ps(reinterpret_cast(_ptr) ); -// } + BX_FLOAT4_INLINE float4_t float4_ld(const void* _ptr) + { + return __builtin_neon_vld1v4sf( (const __builtin_neon_sf*)_ptr); + } -// BX_FLOAT4_INLINE void float4_st(void* _ptr, float4_t _a) -// { -// _mm_store_ps(reinterpret_cast(_ptr), _a); -// } + BX_FLOAT4_INLINE void float4_st(void* _ptr, float4_t _a) + { + __builtin_neon_vst1v4sf( (__builtin_neon_sf*)_ptr, _a); + } -// BX_FLOAT4_INLINE void float4_stream(void* _ptr, float4_t _a) -// { -// _mm_stream_ps(reinterpret_cast(_ptr), _a); -// } + BX_FLOAT4_INLINE void float4_stx(void* _ptr, float4_t _a) + { + __builtin_neon_vst1_lanev4sf( (__builtin_neon_sf*)_ptr, _a, 0); + } + + BX_FLOAT4_INLINE void float4_stream(void* _ptr, float4_t _a) + { + __builtin_neon_vst1v4sf( (__builtin_neon_sf*)_ptr, _a); + } BX_FLOAT4_INLINE float4_t float4_ld(float _x, float _y, float _z, float _w) { - const float32_t val[4] = {_x, _y, _z, _w}; - return __builtin_neon_vld1v4sf(val); + const float4_t val[4] = {_x, _y, _z, _w}; + return float4_ld(val); } BX_FLOAT4_INLINE float4_t float4_ild(uint32_t _x, uint32_t _y, uint32_t _z, uint32_t _w) { const uint32_t val[4] = {_x, _y, _z, _w}; - return (float4_t)__builtin_neon_vld1v4si( (const __builtin_neon_si*)val); + const _i32x4_t tmp = __builtin_neon_vld1v4si( (const __builtin_neon_si*)val); + const float4_t result = __builtin_neon_vreinterpretv4sfv4si(tmp); + + return result; + } + + BX_FLOAT4_INLINE float4_t float4_splat(const void* _ptr) + { + const float4_t tmp0 = __builtin_neon_vld1v4sf( (const __builtin_neon_sf *)_ptr); + const _f32x2_t tmp1 = __builtin_neon_vget_lowv4sf(tmp0); + const float4_t result = __builtin_neon_vdup_lanev4sf(tmp1, 0); + + return result; } BX_FLOAT4_INLINE float4_t float4_splat(float _a) @@ -138,107 +143,318 @@ namespace bx BX_FLOAT4_INLINE float4_t float4_isplat(uint32_t _a) { - return (float4_t)__builtin_neon_vdup_nv4si( (__builtin_neon_si)_a); - } - - BX_FLOAT4_INLINE float4_t float4_zero() - { - return vdupq_n_f32(0.0f); - } - - BX_FLOAT4_INLINE float4_t float4_add(float4_t _a, float4_t _b) - { - return vaddq_f32(_a, _b); - } - - BX_FLOAT4_INLINE float4_t float4_sub(float4_t _a, float4_t _b) - { - return vsubq_f32(_a, _b); - } - - BX_FLOAT4_INLINE float4_t float4_mul(float4_t _a, float4_t _b) - { - return vmulq_f32(_a, _b); - } - - BX_FLOAT4_INLINE float4_t float4_rcp_est(float4_t _a) - { - return vrecpeq_f32(_a); - } - - BX_FLOAT4_INLINE float4_t float4_rsqrt_est(float4_t _a) - { - return vrsqrteq_f32(_a); - } - - BX_FLOAT4_INLINE float4_t float4_and(float4_t _a, float4_t _b) - { - return (float4_t)__builtin_neon_vandv4si( (int32x4_t)_a, (int32x4_t)_b, 0); - } - - //BX_FLOAT4_INLINE float4_t float4_andc(float4_t _a, float4_t _b) - //{ - // return _mm_andnot_ps(_b, _a); - //} - - BX_FLOAT4_INLINE float4_t float4_or(float4_t _a, float4_t _b) - { - return (float4_t)__builtin_neon_vorrv4si( (int32x4_t)_a, (int32x4_t)_b, 0); - } - - BX_FLOAT4_INLINE float4_t float4_iadd(float4_t _a, float4_t _b) - { - const uint32x4_t tmp0 = vreinterpretq_u32_f32(_a); - const uint32x4_t tmp1 = vreinterpretq_u32_f32(_b); - const uint32x4_t add = vaddq_u32(tmp0, tmp1); - const float4_t result = vreinterpretq_f32_u32(add); + const _i32x4_t tmp = __builtin_neon_vdup_nv4si( (__builtin_neon_si)_a); + const float4_t result = __builtin_neon_vreinterpretv4sfv4si(tmp); return result; } - BX_FLOAT4_INLINE float4_t float4_isub(float4_t _a, float4_t _b) + BX_FLOAT4_INLINE float4_t float4_zero() { - const uint32x4_t tmp0 = vreinterpretq_u32_f32(_a); - const uint32x4_t tmp1 = vreinterpretq_u32_f32(_b); - const uint32x4_t sub = vsubq_u32(tmp0, tmp1); - const float4_t result = vreinterpretq_f32_u32(sub); + return float4_isplat(0); + } + + BX_FLOAT4_INLINE float4_t float4_itof(float4_t _a) + { + const _i32x4_t itof = __builtin_neon_vreinterpretv4siv4sf(_a); + const float4_t result = __builtin_neon_vcvtv4si(itof, 1); + + return result; + } + + BX_FLOAT4_INLINE float4_t float4_ftoi(float4_t _a) + { + const _i32x4_t ftoi = __builtin_neon_vcvtv4sf(_a, 1); + const float4_t result = __builtin_neon_vreinterpretv4sfv4si(ftoi); + + return result; + } + + BX_FLOAT4_INLINE float4_t float4_add(float4_t _a, float4_t _b) + { + return __builtin_neon_vaddv4sf(_a, _b, 3); + } + + BX_FLOAT4_INLINE float4_t float4_sub(float4_t _a, float4_t _b) + { + return __builtin_neon_vsubv4sf(_a, _b, 3); + } + + BX_FLOAT4_INLINE float4_t float4_mul(float4_t _a, float4_t _b) + { + return __builtin_neon_vmulv4sf(_a, _b, 3); + } + + BX_FLOAT4_INLINE float4_t float4_rcp_est(float4_t _a) + { + return __builtin_neon_vrecpev4sf(_a, 3); + } + + BX_FLOAT4_INLINE float4_t float4_rsqrt_est(float4_t _a) + { + return __builtin_neon_vrsqrtev4sf(_a, 3); + } + + BX_FLOAT4_INLINE float4_t float4_cmpeq(float4_t _a, float4_t _b) + { + const _i32x4_t tmp = __builtin_neon_vceqv4sf(_a, _b, 3); + const float4_t result = __builtin_neon_vreinterpretv4sfv4si(tmp); + + return result; + } + + BX_FLOAT4_INLINE float4_t float4_cmplt(float4_t _a, float4_t _b) + { + const _i32x4_t tmp = __builtin_neon_vcgtv4sf(_b, _a, 3); + const float4_t result = __builtin_neon_vreinterpretv4sfv4si(tmp); + + return result; + } + + BX_FLOAT4_INLINE float4_t float4_cmple(float4_t _a, float4_t _b) + { + const _i32x4_t tmp = __builtin_neon_vcgev4sf(_b, _a, 3); + const float4_t result = __builtin_neon_vreinterpretv4sfv4si(tmp); + + return result; + } + + BX_FLOAT4_INLINE float4_t float4_cmpgt(float4_t _a, float4_t _b) + { + const _i32x4_t tmp = __builtin_neon_vcgtv4sf(_a, _b, 3); + const float4_t result = __builtin_neon_vreinterpretv4sfv4si(tmp); + + return result; + } + + BX_FLOAT4_INLINE float4_t float4_cmpge(float4_t _a, float4_t _b) + { + const _i32x4_t tmp = __builtin_neon_vcgev4sf(_a, _b, 3); + const float4_t result = __builtin_neon_vreinterpretv4sfv4si(tmp); + + return result; + } + + BX_FLOAT4_INLINE float4_t float4_min(float4_t _a, float4_t _b) + { + return __builtin_neon_vminv4sf(_a, _b, 3); + } + + BX_FLOAT4_INLINE float4_t float4_max(float4_t _a, float4_t _b) + { + return __builtin_neon_vmaxv4sf(_a, _b, 3); + } + + BX_FLOAT4_INLINE float4_t float4_and(float4_t _a, float4_t _b) + { + const _i32x4_t tmp0 = __builtin_neon_vreinterpretv4siv4sf(_a); + const _i32x4_t tmp1 = __builtin_neon_vreinterpretv4siv4sf(_b); + const _i32x4_t tmp2 = __builtin_neon_vandv4si(tmp0, tmp1, 0); + const float4_t result = __builtin_neon_vreinterpretv4sfv4si(tmp2); + + return result; + } + + BX_FLOAT4_INLINE float4_t float4_andc(float4_t _a, float4_t _b) + { + const _i32x4_t tmp0 = __builtin_neon_vreinterpretv4siv4sf(_a); + const _i32x4_t tmp1 = __builtin_neon_vreinterpretv4siv4sf(_b); + const _i32x4_t tmp2 = __builtin_neon_vbicv4si(tmp0, tmp1, 0); + const float4_t result = __builtin_neon_vreinterpretv4sfv4si(tmp2); + + return result; + } + + BX_FLOAT4_INLINE float4_t float4_or(float4_t _a, float4_t _b) + { + const _i32x4_t tmp0 = __builtin_neon_vreinterpretv4siv4sf(_a); + const _i32x4_t tmp1 = __builtin_neon_vreinterpretv4siv4sf(_b); + const _i32x4_t tmp2 = __builtin_neon_vorrv4si(tmp0, tmp1, 0); + const float4_t result = __builtin_neon_vreinterpretv4sfv4si(tmp2); + + return result; + } + + BX_FLOAT4_INLINE float4_t float4_xor(float4_t _a, float4_t _b) + { + const _i32x4_t tmp0 = __builtin_neon_vreinterpretv4siv4sf(_a); + const _i32x4_t tmp1 = __builtin_neon_vreinterpretv4siv4sf(_b); + const _i32x4_t tmp2 = __builtin_neon_veorv4si(tmp0, tmp1, 0); + const float4_t result = __builtin_neon_vreinterpretv4sfv4si(tmp2); return result; } BX_FLOAT4_INLINE float4_t float4_sll(float4_t _a, int _count) { - const uint32x4_t tmp = vreinterpretq_u32_f32(_a); - const uint32x4_t shift = vshlq_n_u32(tmp, _count); - const float4_t result = vreinterpretq_f32_u32(shift); + if (__builtin_constant_p(_count) ) + { + const _i32x4_t tmp0 = __builtin_neon_vreinterpretv4siv4sf(_a); + const _i32x4_t tmp1 = __builtin_neon_vshl_nv4si(tmp0, _count, 0); + const float4_t result = __builtin_neon_vreinterpretv4sfv4si(tmp1); + + return result; + } + + const _i32x4_t tmp0 = __builtin_neon_vreinterpretv4siv4sf(_a); + const _i32x4_t shift = __builtin_neon_vdup_nv4si( (__builtin_neon_si)_count); + const _i32x4_t tmp1 = __builtin_neon_vshlv4si(tmp0, shift, 1); + const float4_t result = __builtin_neon_vreinterpretv4sfv4si(tmp1); return result; } BX_FLOAT4_INLINE float4_t float4_srl(float4_t _a, int _count) { - const uint32x4_t tmp = vreinterpretq_i32_f32(_a); - const uint32x4_t shift = (uint32x4_t)__builtin_neon_vshr_nv4si( (int32x4_t)tmp, _count, 0); - const float4_t result = vreinterpretq_f32_u32(shift); + if (__builtin_constant_p(_count) ) + { + const _i32x4_t tmp0 = __builtin_neon_vreinterpretv4siv4sf(_a); + const _i32x4_t tmp1 = __builtin_neon_vshr_nv4si(tmp0, _count, 0); + const float4_t result = __builtin_neon_vreinterpretv4sfv4si(tmp1); + + return result; + } + + const _i32x4_t tmp0 = __builtin_neon_vreinterpretv4siv4sf(_a); + const _i32x4_t shift = __builtin_neon_vdup_nv4si( (__builtin_neon_si)-_count); + const _i32x4_t tmp1 = __builtin_neon_vshlv4si(tmp0, shift, 1); + const float4_t result = __builtin_neon_vreinterpretv4sfv4si(tmp1); return result; } BX_FLOAT4_INLINE float4_t float4_sra(float4_t _a, int _count) { - const int32x4_t a = vreinterpretq_s32_f32(_a); - const int32x4_t shift = __builtin_neon_vshr_nv4si(a, _count, 1); - const float4_t result = vreinterpretq_f32_s32(shift); + if (__builtin_constant_p(_count) ) + { + const _i32x4_t tmp0 = __builtin_neon_vreinterpretv4siv4sf(_a); + const _i32x4_t tmp1 = __builtin_neon_vshr_nv4si(tmp0, _count, 1); + const float4_t result = __builtin_neon_vreinterpretv4sfv4si(tmp1); + + return result; + } + + const _i32x4_t tmp0 = __builtin_neon_vreinterpretv4siv4sf(_a); + const _i32x4_t shift = __builtin_neon_vdup_nv4si( (__builtin_neon_si)-_count); + const _i32x4_t tmp1 = __builtin_neon_vshlv4si(tmp0, shift, 1); + const float4_t result = __builtin_neon_vreinterpretv4sfv4si(tmp1); + + return result; + } + + BX_FLOAT4_INLINE float4_t float4_madd(float4_t _a, float4_t _b, float4_t _c) + { + return __builtin_neon_vmlav4sf(_c, _a, _b, 3); + } + + BX_FLOAT4_INLINE float4_t float4_nmsub(float4_t _a, float4_t _b, float4_t _c) + { + return __builtin_neon_vmlav4sf(_c, _a, _b, 3); + } + + BX_FLOAT4_INLINE float4_t float4_icmpeq(float4_t _a, float4_t _b) + { + const _i32x4_t tmp0 = __builtin_neon_vreinterpretv4siv4sf(_a); + const _i32x4_t tmp1 = __builtin_neon_vreinterpretv4siv4sf(_b); + const _i32x4_t tmp2 = __builtin_neon_vceqv4si(tmp0, tmp1, 1); + const float4_t result = __builtin_neon_vreinterpretv4sfv4si(tmp2); + + return result; + } + + BX_FLOAT4_INLINE float4_t float4_icmplt(float4_t _a, float4_t _b) + { + const _i32x4_t tmp0 = __builtin_neon_vreinterpretv4siv4sf(_a); + const _i32x4_t tmp1 = __builtin_neon_vreinterpretv4siv4sf(_b); + const _i32x4_t tmp2 = __builtin_neon_vcgtv4si(tmp1, tmp0, 1); + const float4_t result = __builtin_neon_vreinterpretv4sfv4si(tmp2); + + return result; + } + + BX_FLOAT4_INLINE float4_t float4_icmpgt(float4_t _a, float4_t _b) + { + const _i32x4_t tmp0 = __builtin_neon_vreinterpretv4siv4sf(_a); + const _i32x4_t tmp1 = __builtin_neon_vreinterpretv4siv4sf(_b); + const _i32x4_t tmp2 = __builtin_neon_vcgtv4si(tmp0, tmp1, 1); + const float4_t result = __builtin_neon_vreinterpretv4sfv4si(tmp2); + + return result; + } + + BX_FLOAT4_INLINE float4_t float4_imin(float4_t _a, float4_t _b) + { + const _i32x4_t tmp0 = __builtin_neon_vreinterpretv4siv4sf(_a); + const _i32x4_t tmp1 = __builtin_neon_vreinterpretv4siv4sf(_b); + const _i32x4_t tmp2 = __builtin_neon_vminv4si(tmp0, tmp1, 1); + const float4_t result = __builtin_neon_vreinterpretv4sfv4si(tmp2); + + return result; + } + + BX_FLOAT4_INLINE float4_t float4_imax(float4_t _a, float4_t _b) + { + const _i32x4_t tmp0 = __builtin_neon_vreinterpretv4siv4sf(_a); + const _i32x4_t tmp1 = __builtin_neon_vreinterpretv4siv4sf(_b); + const _i32x4_t tmp2 = __builtin_neon_vmaxv4si(tmp0, tmp1, 1); + const float4_t result = __builtin_neon_vreinterpretv4sfv4si(tmp2); + + return result; + } + + BX_FLOAT4_INLINE float4_t float4_iadd(float4_t _a, float4_t _b) + { + const _i32x4_t tmp0 = __builtin_neon_vreinterpretv4siv4sf(_a); + const _i32x4_t tmp1 = __builtin_neon_vreinterpretv4siv4sf(_b); + const _i32x4_t tmp2 = __builtin_neon_vaddv4si(tmp0, tmp1, 1); + const float4_t result = __builtin_neon_vreinterpretv4sfv4si(tmp2); + + return result; + } + + BX_FLOAT4_INLINE float4_t float4_isub(float4_t _a, float4_t _b) + { + const _i32x4_t tmp0 = __builtin_neon_vreinterpretv4siv4sf(_a); + const _i32x4_t tmp1 = __builtin_neon_vreinterpretv4siv4sf(_b); + const _i32x4_t tmp2 = __builtin_neon_vsubv4si(tmp0, tmp1, 1); + const float4_t result = __builtin_neon_vreinterpretv4sfv4si(tmp2); return result; } } // namespace bx -#define float4_div_nr float4_div_nr_ni -#define float4_div float4_div_nr_ni -#define float4_ceil float4_ceil_ni -#define float4_floor float4_floor_ni +#define float4_shuf_xAzC float4_shuf_xAzC_ni +#define float4_shuf_yBwD float4_shuf_yBwD_ni +#define float4_rcp float4_rcp_ni +#define float4_orx float4_orx_ni +#define float4_orc float4_orc_ni +#define float4_neg float4_neg_ni +#define float4_madd float4_madd_ni +#define float4_nmsub float4_nmsub_ni +#define float4_div_nr float4_div_nr_ni +#define float4_div float4_div_nr_ni +#define float4_selb float4_selb_ni +#define float4_sels float4_sels_ni +#define float4_not float4_not_ni +#define float4_abs float4_abs_ni +#define float4_clamp float4_clamp_ni +#define float4_lerp float4_lerp_ni +#define float4_rsqrt float4_rsqrt_ni +#define float4_rsqrt_nr float4_rsqrt_nr_ni +#define float4_rsqrt_carmack float4_rsqrt_carmack_ni +#define float4_sqrt_nr float4_sqrt_nr_ni +#define float4_sqrt float4_sqrt_nr_ni +#define float4_log2 float4_log2_ni +#define float4_exp2 float4_exp2_ni +#define float4_pow float4_pow_ni +#define float4_cross3 float4_cross3_ni +#define float4_normalize3 float4_normalize3_ni +#define float4_dot3 float4_dot3_ni +#define float4_dot float4_dot_ni +#define float4_ceil float4_ceil_ni +#define float4_floor float4_floor_ni + #include "float4_ni.h" #endif // BX_FLOAT4_NEON_H_HEADER_GUARD diff --git a/include/bx/float4_ni.h b/include/bx/float4_ni.h index 7d84721..7aa8757 100644 --- a/include/bx/float4_ni.h +++ b/include/bx/float4_ni.h @@ -123,6 +123,22 @@ namespace bx return result; } + BX_FLOAT4_INLINE float4_t float4_imin_ni(float4_t _a, float4_t _b) + { + const float4_t mask = float4_icmplt(_a, _b); + const float4_t result = float4_selb(mask, _a, _b); + + return result; + } + + BX_FLOAT4_INLINE float4_t float4_imax_ni(float4_t _a, float4_t _b) + { + const float4_t mask = float4_icmpgt(_a, _b); + const float4_t result = float4_selb(mask, _a, _b); + + return result; + } + BX_FLOAT4_INLINE float4_t float4_clamp_ni(float4_t _a, float4_t _min, float4_t _max) { const float4_t tmp = float4_min(_a, _max); diff --git a/include/bx/float4_ref.h b/include/bx/float4_ref.h index 6341d7e..7d2ab95 100644 --- a/include/bx/float4_ref.h +++ b/include/bx/float4_ref.h @@ -419,7 +419,7 @@ IMPLEMENT_TEST(xyzw , 0xf); return result; } - BX_FLOAT4_INLINE float4_t float4_max(float4_t _a, float4_t _b) + BX_NO_INLINE float4_t float4_max(float4_t _a, float4_t _b) { float4_t result; result.fxyzw[0] = _a.fxyzw[0] > _b.fxyzw[0] ? _a.fxyzw[0] : _b.fxyzw[0]; diff --git a/include/bx/float4_sse.h b/include/bx/float4_sse.h index f75a1d8..e2ab61e 100644 --- a/include/bx/float4_sse.h +++ b/include/bx/float4_sse.h @@ -14,7 +14,6 @@ namespace bx { - typedef __m128 float4_t; #define ELEMx 0 @@ -349,6 +348,58 @@ IMPLEMENT_TEST(xyzw , 0xf); return result; } + BX_FLOAT4_INLINE float4_t float4_icmpeq(float4_t _a, float4_t _b) + { + const __m128i tmp0 = _mm_castps_si128(_a); + const __m128i tmp1 = _mm_castps_si128(_b); + const __m128i tmp2 = _mm_cmpeq_epi32(tmp0, tmp1); + const float4_t result = _mm_castsi128_ps(tmp2); + + return result; + } + + BX_FLOAT4_INLINE float4_t float4_icmplt(float4_t _a, float4_t _b) + { + const __m128i tmp0 = _mm_castps_si128(_a); + const __m128i tmp1 = _mm_castps_si128(_b); + const __m128i tmp2 = _mm_cmplt_epi32(tmp0, tmp1); + const float4_t result = _mm_castsi128_ps(tmp2); + + return result; + } + + BX_FLOAT4_INLINE float4_t float4_icmpgt(float4_t _a, float4_t _b) + { + const __m128i tmp0 = _mm_castps_si128(_a); + const __m128i tmp1 = _mm_castps_si128(_b); + const __m128i tmp2 = _mm_cmpgt_epi32(tmp0, tmp1); + const float4_t result = _mm_castsi128_ps(tmp2); + + return result; + } + +#if defined(__SSE4_1__) + BX_FLOAT4_INLINE float4_t float4_imin(float4_t _a, float4_t _b) + { + const __m128i tmp0 = _mm_castps_si128(_a); + const __m128i tmp1 = _mm_castps_si128(_b); + const __m128i tmp2 = _mm_min_epi32(tmp0, tmp1); + const float4_t result = _mm_castsi128_ps(tmp2); + + return result; + } + + BX_FLOAT4_INLINE float4_t float4_imax(float4_t _a, float4_t _b) + { + const __m128i tmp0 = _mm_castps_si128(_a); + const __m128i tmp1 = _mm_castps_si128(_b); + const __m128i tmp2 = _mm_max_epi32(tmp0, tmp1); + const float4_t result = _mm_castsi128_ps(tmp2); + + return result; + } +#endif // defined(__SSE4_1__) + BX_FLOAT4_INLINE float4_t float4_iadd(float4_t _a, float4_t _b) { const __m128i a = _mm_castps_si128(_a); @@ -371,36 +422,40 @@ IMPLEMENT_TEST(xyzw , 0xf); } // namespace bx -#define float4_shuf_xAzC float4_shuf_xAzC_ni -#define float4_shuf_yBwD float4_shuf_yBwD_ni -#define float4_rcp float4_rcp_ni -#define float4_orx float4_orx_ni -#define float4_orc float4_orc_ni -#define float4_neg float4_neg_ni -#define float4_madd float4_madd_ni -#define float4_nmsub float4_nmsub_ni -#define float4_div_nr float4_div_nr_ni -#define float4_selb float4_selb_ni -#define float4_sels float4_sels_ni -#define float4_not float4_not_ni -#define float4_abs float4_abs_ni -#define float4_clamp float4_clamp_ni -#define float4_lerp float4_lerp_ni -#define float4_rsqrt float4_rsqrt_ni -#define float4_rsqrt_nr float4_rsqrt_nr_ni +#define float4_shuf_xAzC float4_shuf_xAzC_ni +#define float4_shuf_yBwD float4_shuf_yBwD_ni +#define float4_rcp float4_rcp_ni +#define float4_orx float4_orx_ni +#define float4_orc float4_orc_ni +#define float4_neg float4_neg_ni +#define float4_madd float4_madd_ni +#define float4_nmsub float4_nmsub_ni +#define float4_div_nr float4_div_nr_ni +#define float4_selb float4_selb_ni +#define float4_sels float4_sels_ni +#define float4_not float4_not_ni +#define float4_abs float4_abs_ni +#define float4_clamp float4_clamp_ni +#define float4_lerp float4_lerp_ni +#define float4_rsqrt float4_rsqrt_ni +#define float4_rsqrt_nr float4_rsqrt_nr_ni #define float4_rsqrt_carmack float4_rsqrt_carmack_ni -#define float4_sqrt_nr float4_sqrt_nr_ni -#define float4_log2 float4_log2_ni -#define float4_exp2 float4_exp2_ni -#define float4_pow float4_pow_ni -#define float4_cross3 float4_cross3_ni -#define float4_normalize3 float4_normalize3_ni +#define float4_sqrt_nr float4_sqrt_nr_ni +#define float4_log2 float4_log2_ni +#define float4_exp2 float4_exp2_ni +#define float4_pow float4_pow_ni +#define float4_cross3 float4_cross3_ni +#define float4_normalize3 float4_normalize3_ni +#define float4_ceil float4_ceil_ni +#define float4_floor float4_floor_ni + #if !defined(__SSE4_1__) -#define float4_dot3 float4_dot3_ni -#define float4_dot float4_dot_ni +# define float4_dot3 float4_dot3_ni +# define float4_dot float4_dot_ni +# define float4_imin float4_imin_ni +# define float4_imax float4_imax_ni #endif // defined(__SSE4_1__) -#define float4_ceil float4_ceil_ni -#define float4_floor float4_floor_ni + #include "float4_ni.h" #endif // BX_FLOAT4_SSE_H_HEADER_GUARD diff --git a/include/bx/float4_swizzle.inl b/include/bx/float4_swizzle.inl index 52b3d81..cd9ddbd 100644 --- a/include/bx/float4_swizzle.inl +++ b/include/bx/float4_swizzle.inl @@ -1,5 +1,5 @@ /* - * Copyright 2010-2012 Branimir Karadzic. All rights reserved. + * Copyright 2010-2013 Branimir Karadzic. All rights reserved. * License: http://www.opensource.org/licenses/BSD-2-Clause */ diff --git a/include/bx/float4_t.h b/include/bx/float4_t.h index 8fc5620..504400c 100644 --- a/include/bx/float4_t.h +++ b/include/bx/float4_t.h @@ -12,7 +12,7 @@ #if defined(__SSE2__) || (BX_COMPILER_MSVC && (BX_ARCH_64BIT || _M_IX86_FP >= 2) ) # include "float4_sse.h" -#elif 0 // __ARM_NEON__ +#elif __ARM_NEON__ # include "float4_neon.h" #else # pragma message("************************************\nUsing SIMD reference implementation!\n************************************") diff --git a/makefile b/makefile index 65a70ae..5bf6050 100644 --- a/makefile +++ b/makefile @@ -1,57 +1,158 @@ -# -# Copyright 2011-2013 Branimir Karadzic. All rights reserved. -# License: http://www.opensource.org/licenses/BSD-2-Clause -# - -all: - premake4 --file=premake/premake4.lua vs2008 - premake4 --file=premake/premake4.lua vs2010 - premake4 --file=premake/premake4.lua --gcc=mingw gmake - premake4 --file=premake/premake4.lua --gcc=linux-gcc gmake - premake4 --file=premake/premake4.lua --gcc=osx gmake - premake4 --file=premake/premake4.lua xcode4 - -linux-debug32: - make -R -C .build/projects/gmake-linux config=debug32 -linux-release32: - make -R -C .build/projects/gmake-linux config=release32 -linux-debug64: - make -R -C .build/projects/gmake-linux config=debug64 -linux-release64: - make -R -C .build/projects/gmake-linux config=release64 -linux: linux-debug32 linux-release32 linux-debug64 linux-release64 - -mingw-debug32: - make -R -C .build/projects/gmake-mingw config=debug32 -mingw-release32: - make -R -C .build/projects/gmake-mingw config=release32 -mingw-debug64: - make -R -C .build/projects/gmake-mingw config=debug64 -mingw-release64: - make -R -C .build/projects/gmake-mingw config=release64 -mingw: mingw-debug32 mingw-release32 mingw-debug64 mingw-release64 - -vs2008-debug32: - devenv .build/projects/vs2008/bgfx.sln /Build "Debug|Win32" -vs2008-release32: - devenv .build/projects/vs2008/bgfx.sln /Build "Release|Win32" -vs2008-debug64: - devenv .build/projects/vs2008/bgfx.sln /Build "Debug|x64" -vs2008-release64: - devenv .build/projects/vs2008/bgfx.sln /Build "Release|x64" -vs2008: vs2008-debug32 vs2008-release32 vs2008-debug64 vs2008-release64 - -osx-debug32: - make -C .build/projects/gmake-osx config=debug32 -osx-release32: - make -C .build/projects/gmake-osx config=release32 -osx-debug64: - make -C .build/projects/gmake-osx config=debug64 -osx-release64: - make -C .build/projects/gmake-osx config=release64 -osx: osx-debug32 osx-release32 osx-debug64 osx-release64 - -clean: - @echo Cleaning... - -rm -r .build - -rm -r .debug +# +# Copyright 2011-2013 Branimir Karadzic. All rights reserved. +# License: http://www.opensource.org/licenses/BSD-2-Clause +# + +all: + premake4 --file=premake/premake4.lua vs2008 + premake4 --file=premake/premake4.lua vs2010 + premake4 --file=premake/premake4.lua vs2012 + premake4 --file=premake/premake4.lua --gcc=android-arm gmake + premake4 --file=premake/premake4.lua --gcc=android-mips gmake + premake4 --file=premake/premake4.lua --gcc=android-x86 gmake + premake4 --file=premake/premake4.lua --gcc=nacl gmake + premake4 --file=premake/premake4.lua --gcc=nacl-arm gmake + premake4 --file=premake/premake4.lua --gcc=pnacl gmake + premake4 --file=premake/premake4.lua --gcc=mingw gmake + premake4 --file=premake/premake4.lua --gcc=linux-gcc gmake + premake4 --file=premake/premake4.lua --gcc=osx gmake + premake4 --file=premake/premake4.lua --gcc=ios-arm gmake + premake4 --file=premake/premake4.lua --gcc=ios-simulator gmake + premake4 --file=premake/premake4.lua xcode4 + +.build/projects/gmake-android-arm: + premake4 --file=premake/premake4.lua --gcc=android-arm gmake +android-arm-debug: .build/projects/gmake-android-arm + make -R -C .build/projects/gmake-android-arm config=debug +android-arm-release: .build/projects/gmake-android-arm + make -R -C .build/projects/gmake-android-arm config=release +android-arm: android-arm-debug android-arm-release + +.build/projects/gmake-android-mips: + premake4 --file=premake/premake4.lua --gcc=android-mips gmake +android-mips-debug: .build/projects/gmake-android-mips + make -R -C .build/projects/gmake-android-mips config=debug +android-mips-release: .build/projects/gmake-android-mips + make -R -C .build/projects/gmake-android-mips config=release +android-mips: android-mips-debug android-mips-release + +.build/projects/gmake-android-x86: + premake4 --file=premake/premake4.lua --gcc=android-x86 gmake +android-x86-debug: .build/projects/gmake-android-x86 + make -R -C .build/projects/gmake-android-x86 config=debug +android-x86-release: .build/projects/gmake-android-x86 + make -R -C .build/projects/gmake-android-x86 config=release +android-x86: android-x86-debug android-x86-release + +.build/projects/gmake-linux: + premake4 --file=premake/premake4.lua --gcc=linux-gcc gmake +linux-debug32: .build/projects/gmake-linux + make -R -C .build/projects/gmake-linux config=debug32 +linux-release32: .build/projects/gmake-linux + make -R -C .build/projects/gmake-linux config=release32 +linux-debug64: .build/projects/gmake-linux + make -R -C .build/projects/gmake-linux config=debug64 +linux-release64: .build/projects/gmake-linux + make -R -C .build/projects/gmake-linux config=release64 +linux: linux-debug32 linux-release32 linux-debug64 linux-release64 + +.build/projects/gmake-mingw: + premake4 --file=premake/premake4.lua --gcc=mingw gmake +mingw-debug32: .build/projects/gmake-mingw + make -R -C .build/projects/gmake-mingw config=debug32 +mingw-release32: .build/projects/gmake-mingw + make -R -C .build/projects/gmake-mingw config=release32 +mingw-debug64: .build/projects/gmake-mingw + make -R -C .build/projects/gmake-mingw config=debug64 +mingw-release64: .build/projects/gmake-mingw + make -R -C .build/projects/gmake-mingw config=release64 +mingw: mingw-debug32 mingw-release32 mingw-debug64 mingw-release64 + +.build/projects/vs2008: + premake4 --file=premake/premake4.lua vs2008 +vs2008-debug32: + devenv .build/projects/vs2008/bgfx.sln /Build "Debug|Win32" +vs2008-release32: + devenv .build/projects/vs2008/bgfx.sln /Build "Release|Win32" +vs2008-debug64: + devenv .build/projects/vs2008/bgfx.sln /Build "Debug|x64" +vs2008-release64: + devenv .build/projects/vs2008/bgfx.sln /Build "Release|x64" +vs2008: vs2008-debug32 vs2008-release32 vs2008-debug64 vs2008-release64 + +.build/projects/vs2010: + premake4 --file=premake/premake4.lua vs2010 + +.build/projects/vs2012: + premake4 --file=premake/premake4.lua vs2012 + +.build/projects/gmake-nacl: + premake4 --file=premake/premake4.lua --gcc=nacl gmake +nacl-debug32: .build/projects/gmake-nacl + make -R -C .build/projects/gmake-nacl config=debug32 +nacl-release32: .build/projects/gmake-nacl + make -R -C .build/projects/gmake-nacl config=release32 +nacl-debug64: .build/projects/gmake-nacl + make -R -C .build/projects/gmake-nacl config=debug64 +nacl-release64: .build/projects/gmake-nacl + make -R -C .build/projects/gmake-nacl config=release64 +nacl: nacl-debug32 nacl-release32 nacl-debug64 nacl-release64 + +.build/projects/gmake-nacl-arm: + premake4 --file=premake/premake4.lua --gcc=nacl-arm gmake +nacl-arm-debug: .build/projects/gmake-nacl-arm + make -R -C .build/projects/gmake-nacl-arm config=debug +nacl-arm-release: .build/projects/gmake-nacl-arm + make -R -C .build/projects/gmake-nacl-arm config=release +nacl-arm: nacl-arm-debug32 nacl-arm-release32 + +.build/projects/gmake-pnacl: + premake4 --file=premake/premake4.lua --gcc=pnacl gmake +pnacl-debug: .build/projects/gmake-pnacl + make -R -C .build/projects/gmake-pnacl config=debug +pnacl-release: .build/projects/gmake-pnacl + make -R -C .build/projects/gmake-pnacl config=release +pnacl: pnacl-debug pnacl-release + +.build/projects/gmake-osx: + premake4 --file=premake/premake4.lua --gcc=osx gmake +osx-debug32: .build/projects/gmake-osx + make -C .build/projects/gmake-osx config=debug32 +osx-release32: .build/projects/gmake-osx + make -C .build/projects/gmake-osx config=release32 +osx-debug64: .build/projects/gmake-osx + make -C .build/projects/gmake-osx config=debug64 +osx-release64: .build/projects/gmake-osx + make -C .build/projects/gmake-osx config=release64 +osx: osx-debug32 osx-release32 osx-debug64 osx-release64 + +.build/projects/gmake-ios-arm: + premake4 --file=premake/premake4.lua --gcc=ios-arm gmake +ios-arm-debug: .build/projects/gmake-ios-arm + make -R -C .build/projects/gmake-ios-arm config=debug +ios-arm-release: .build/projects/gmake-ios-arm + make -R -C .build/projects/gmake-ios-arm config=release +ios-arm: ios-arm-debug ios-arm-release + +.build/projects/gmake-ios-simulator: + premake4 --file=premake/premake4.lua --gcc=ios-simulator gmake +ios-simulator-debug: .build/projects/gmake-ios-simulator + make -R -C .build/projects/gmake-ios-simulator config=debug +ios-simulator-release: .build/projects/gmake-ios-simulator + make -R -C .build/projects/gmake-ios-simulator config=release +ios-simulator: ios-simulator-debug ios-simulator-release + +rebuild-shaders: + make -R -C examples rebuild + +analyze: + cppcheck src/ + cppcheck examples/ + +docs: + doxygen premake/bgfx.doxygen + markdown README.md > .build/docs/readme.html + +clean: + @echo Cleaning... + -@rm -rf .build diff --git a/premake/premake4.lua b/premake/premake4.lua index 14c8bee..d7b530c 100644 --- a/premake/premake4.lua +++ b/premake/premake4.lua @@ -12,6 +12,7 @@ solution "bx" platforms { "x32", "x64", + "Native", -- for targets where bitness is not specified } language "C++" @@ -52,3 +53,35 @@ project "bx.test" BX_DIR .. "tests/**.cpp", BX_DIR .. "tests/**.H", } + + configuration { "vs*" } + + configuration { "android*" } + kind "ConsoleApp" + targetextension ".so" + linkoptions { + "-shared", + } + + configuration { "nacl or nacl-arm" } + kind "ConsoleApp" + targetextension ".nexe" + links { + "ppapi", + "pthread", + } + + configuration { "pnacl" } + kind "ConsoleApp" + targetextension ".pexe" + links { + "ppapi", + "pthread", + } + + configuration { "linux-*" } + links { + "pthread", + } + + configuration {} diff --git a/premake/toolchain.lua b/premake/toolchain.lua index 395a596..eae2aa6 100755 --- a/premake/toolchain.lua +++ b/premake/toolchain.lua @@ -419,7 +419,7 @@ function toolchain(_buildDir, _libDir) "-mthumb", "-march=armv7-a", "-mfloat-abi=softfp", - "-mfpu=vfpv3-d16", + "-mfpu=neon", } configuration { "android-mips" } diff --git a/premake/unittest++.lua b/premake/unittest++.lua index f895a0c..f7fa4e3 100644 --- a/premake/unittest++.lua +++ b/premake/unittest++.lua @@ -29,13 +29,13 @@ project "UnitTest++" "../3rdparty/UnitTest++/src/*.h", } - configuration { "linux or osx" } + configuration { "linux or osx or android-*" } files { "../3rdparty/UnitTest++/src/Posix/**.cpp", "../3rdparty/UnitTest++/src/Posix/**.h", } - configuration { "windows" } + configuration { "mingw or vs*" } files { "../3rdparty/UnitTest++/src/Win32/**.cpp", "../3rdparty/UnitTest++/src/Win32/**.h", diff --git a/tests/dbg.cpp b/tests/dbg.cpp new file mode 100644 index 0000000..9c210aa --- /dev/null +++ b/tests/dbg.cpp @@ -0,0 +1,87 @@ +/* + * Copyright 2011-2013 Branimir Karadzic. All rights reserved. + * License: http://www.opensource.org/licenses/BSD-2-Clause + */ + +#include +#include +#include +#include // isprint + +#include "dbg.h" +#include +#include + +void dbgPrintfVargs(const char* _format, va_list _argList) +{ + char temp[8192]; + char* out = temp; + int32_t len = bx::vsnprintf(out, sizeof(temp), _format, _argList); + if ( (int32_t)sizeof(temp) < len) + { + out = (char*)alloca(len+1); + len = bx::vsnprintf(out, len, _format, _argList); + } + out[len] = '\0'; + bx::debugOutput(out); +} + +void dbgPrintf(const char* _format, ...) +{ + va_list argList; + va_start(argList, _format); + dbgPrintfVargs(_format, argList); + va_end(argList); +} + +#define DBG_ADDRESS "%" PRIxPTR + +void dbgPrintfData(const void* _data, uint32_t _size, const char* _format, ...) +{ +#define HEX_DUMP_WIDTH 16 +#define HEX_DUMP_SPACE_WIDTH 48 +#define HEX_DUMP_FORMAT "%-" DBG_STRINGIZE(HEX_DUMP_SPACE_WIDTH) "." DBG_STRINGIZE(HEX_DUMP_SPACE_WIDTH) "s" + + va_list argList; + va_start(argList, _format); + dbgPrintfVargs(_format, argList); + va_end(argList); + + dbgPrintf("\ndata: " DBG_ADDRESS ", size: %d\n", _data, _size); + + if (NULL != _data) + { + const uint8_t* data = reinterpret_cast(_data); + char hex[HEX_DUMP_WIDTH*3+1]; + char ascii[HEX_DUMP_WIDTH+1]; + uint32_t hexPos = 0; + uint32_t asciiPos = 0; + for (uint32_t ii = 0; ii < _size; ++ii) + { + bx::snprintf(&hex[hexPos], sizeof(hex)-hexPos, "%02x ", data[asciiPos]); + hexPos += 3; + + ascii[asciiPos] = isprint(data[asciiPos]) ? data[asciiPos] : '.'; + asciiPos++; + + if (HEX_DUMP_WIDTH == asciiPos) + { + ascii[asciiPos] = '\0'; + dbgPrintf("\t" DBG_ADDRESS "\t" HEX_DUMP_FORMAT "\t%s\n", data, hex, ascii); + data += asciiPos; + hexPos = 0; + asciiPos = 0; + } + } + + if (0 != asciiPos) + { + ascii[asciiPos] = '\0'; + dbgPrintf("\t" DBG_ADDRESS "\t" HEX_DUMP_FORMAT "\t%s\n", data, hex, ascii); + } + } + +#undef HEX_DUMP_WIDTH +#undef HEX_DUMP_SPACE_WIDTH +#undef HEX_DUMP_FORMAT +} diff --git a/tests/dbg.h b/tests/dbg.h new file mode 100644 index 0000000..ff4bc1b --- /dev/null +++ b/tests/dbg.h @@ -0,0 +1,21 @@ +/* + * Copyright 2011-2013 Branimir Karadzic. All rights reserved. + * License: http://www.opensource.org/licenses/BSD-2-Clause + */ + +#ifndef DBG_H_HEADER_GUARD +#define DBG_H_HEADER_GUARD + +#include // va_list +#include + +#define DBG_STRINGIZE(_x) DBG_STRINGIZE_(_x) +#define DBG_STRINGIZE_(_x) #_x +#define DBG_FILE_LINE_LITERAL "" __FILE__ "(" DBG_STRINGIZE(__LINE__) "): " +#define DBG(_format, ...) dbgPrintf(DBG_FILE_LINE_LITERAL "" _format "\n", ##__VA_ARGS__) + +extern void dbgPrintfVargs(const char* _format, va_list _argList); +extern void dbgPrintf(const char* _format, ...); +extern void dbgPrintfData(const void* _data, uint32_t _size, const char* _format, ...); + +#endif // DBG_H_HEADER_GUARD diff --git a/tests/float4_t.cpp b/tests/float4_t.cpp new file mode 100644 index 0000000..8638c5a --- /dev/null +++ b/tests/float4_t.cpp @@ -0,0 +1,238 @@ +/* + * Copyright 2010-2013 Branimir Karadzic. All rights reserved. + * License: http://www.opensource.org/licenses/BSD-2-Clause + */ + +#include "test.h" +#include +#include + +using namespace bx; + +union float4_cast +{ + bx::float4_t f4; + float f[4]; + uint32_t ui[4]; + int32_t i[4]; + char c[16]; +}; + +void float4_check_int32(const char* _str, bx::float4_t _a, int32_t _0, int32_t _1, int32_t _2, int32_t _3) +{ + float4_cast c; c.f4 = _a; + DBG("%s (%d, %d, %d, %d) == (%d, %d, %d, %d)" + , _str + , c.i[0], c.i[1], c.i[2], c.i[3] + , _0, _1, _2, _3 + ); + + CHECK_EQUAL(c.i[0], _0); + CHECK_EQUAL(c.i[1], _1); + CHECK_EQUAL(c.i[2], _2); + CHECK_EQUAL(c.i[3], _3); +} + +void float4_check_uint32(const char* _str, bx::float4_t _a, uint32_t _0, uint32_t _1, uint32_t _2, uint32_t _3) +{ + float4_cast c; c.f4 = _a; + + DBG("%s (0x%08x, 0x%08x, 0x%08x, 0x%08x) == (0x%08x, 0x%08x, 0x%08x, 0x%08x)" + , _str + , c.ui[0], c.ui[1], c.ui[2], c.ui[3] + , _0, _1, _2, _3 + ); + + CHECK_EQUAL(c.ui[0], _0); + CHECK_EQUAL(c.ui[1], _1); + CHECK_EQUAL(c.ui[2], _2); + CHECK_EQUAL(c.ui[3], _3); +} + +void float4_check_float(const char* _str, bx::float4_t _a, float _0, float _1, float _2, float _3) +{ + float4_cast c; c.f4 = _a; + + DBG("%s (%f, %f, %f, %f) == (%f, %f, %f, %f)" + , _str + , c.f[0], c.f[1], c.f[2], c.f[3] + , _0, _1, _2, _3 + ); + + CHECK_EQUAL(c.f[0], _0); + CHECK_EQUAL(c.f[1], _1); + CHECK_EQUAL(c.f[2], _2); + CHECK_EQUAL(c.f[3], _3); +} + +void float4_check_string(const char* _str, bx::float4_t _a) +{ + float4_cast c; c.f4 = _a; + const char test[5] = { c.c[0], c.c[4], c.c[8], c.c[12], '\0' }; + + DBG("%s %s", _str, test); + + CHECK(0 == strcmp(_str, test) ); +} + +TEST(float4_swizzle) +{ + const float4_t xyzw = float4_ild(0x78787878, 0x79797979, 0x7a7a7a7a, 0x77777777); + +#define ELEMx 0 +#define ELEMy 1 +#define ELEMz 2 +#define ELEMw 3 +#define IMPLEMENT_SWIZZLE(_x, _y, _z, _w) \ + float4_check_string("" #_x #_y #_z #_w "", float4_swiz_##_x##_y##_z##_w(xyzw) ); \ + +#include + +#undef IMPLEMENT_SWIZZLE +#undef ELEMw +#undef ELEMz +#undef ELEMy +#undef ELEMx +} + +TEST(float4_shuffle) +{ + const float4_t xyzw = float4_ild(0x78787878, 0x79797979, 0x7a7a7a7a, 0x77777777); + const float4_t ABCD = float4_ild(0x41414141, 0x42424242, 0x43434343, 0x44444444); + float4_check_string("xyAB", float4_shuf_xyAB(xyzw, ABCD) ); + float4_check_string("ABxy", float4_shuf_ABxy(xyzw, ABCD) ); + float4_check_string("zwCD", float4_shuf_zwCD(xyzw, ABCD) ); + float4_check_string("CDzw", float4_shuf_CDzw(xyzw, ABCD) ); + float4_check_string("xAyB", float4_shuf_xAyB(xyzw, ABCD) ); + float4_check_string("zCwD", float4_shuf_zCwD(xyzw, ABCD) ); + float4_check_string("xAzC", float4_shuf_xAzC(xyzw, ABCD) ); + float4_check_string("yBwD", float4_shuf_yBwD(xyzw, ABCD) ); + float4_check_string("CzDw", float4_shuf_CzDw(xyzw, ABCD) ); +} + +TEST(float4_compare) +{ + float4_check_uint32("cmpeq" + , float4_cmpeq(float4_ld(1.0f, 2.0f, 3.0f, 4.0f), float4_ld(0.0f, 2.0f, 0.0f, 3.0f) ) + , 0, -1, 0, 0 + ); + + float4_check_uint32("cmplt" + , float4_cmplt(float4_ld(1.0f, 2.0f, 3.0f, 4.0f), float4_ld(0.0f, 2.0f, 0.0f, 3.0f) ) + , 0, 0, 0, 0 + ); + + float4_check_uint32("cmple" + , float4_cmple(float4_ld(1.0f, 2.0f, 3.0f, 4.0f), float4_ld(0.0f, 2.0f, 0.0f, 3.0f) ) + , 0, -1, 0, 0 + ); + + float4_check_uint32("cmpgt" + , float4_cmpgt(float4_ld(1.0f, 2.0f, 3.0f, 4.0f), float4_ld(0.0f, 2.0f, 0.0f, 3.0f) ) + , -1, 0, -1, -1 + ); + + float4_check_uint32("cmpge" + , float4_cmpge(float4_ld(1.0f, 2.0f, 3.0f, 4.0f), float4_ld(0.0f, 2.0f, 0.0f, 3.0f) ) + , -1, -1, -1, -1 + ); + + float4_check_uint32("icmpeq" + , float4_icmpeq(float4_ild(0, 1, 2, 3), float4_ild(0, -2, 1, 3) ) + , -1, 0, 0, -1 + ); + + float4_check_uint32("icmplt" + , float4_icmplt(float4_ild(0, 1, 2, 3), float4_ild(0, -2, 1, 3) ) + , 0, 0, 0, 0 + ); + + float4_check_uint32("icmpgt" + , float4_icmpgt(float4_ild(0, 1, 2, 3), float4_ild(0, -2, 1, 3) ) + , 0, -1, -1, 0 + ); +} + +TEST(float4_load) +{ + float4_check_float("ld" + , float4_ld(0.0f, 1.0f, 2.0f, 3.0f) + , 0.0f, 1.0f, 2.0f, 3.0f + ); + + float4_check_int32("ild" + , float4_ild(-1, 0, 1, 2) + , -1, 0, 1, 2 + ); + + float4_check_int32("ild" + , float4_ild(-1, -2, -3, -4) + , -1, -2, -3, -4 + ); + + float4_check_uint32("zero", float4_zero() + , 0, 0, 0, 0 + ); + + float4_check_uint32("isplat", float4_isplat(0x80000001) + , 0x80000001, 0x80000001, 0x80000001, 0x80000001 + ); + + float4_check_float("isplat", float4_splat(1.0f) + , 1.0f, 1.0f, 1.0f, 1.0f + ); +} + +TEST(float4) +{ + const float4_t isplat = float4_isplat(0x80000001); + float4_check_uint32("sll" + , float4_sll(isplat, 1) + , 0x00000002, 0x00000002, 0x00000002, 0x00000002 + ); + + float4_check_uint32("srl" + , float4_srl(isplat, 1) + , 0x40000000, 0x40000000, 0x40000000, 0x40000000 + ); + + float4_check_uint32("sra" + , float4_sra(isplat, 1) + , 0xc0000000, 0xc0000000, 0xc0000000, 0xc0000000 + ); + + float4_check_uint32("and" + , float4_and(float4_isplat(0x55555555), float4_isplat(0xaaaaaaaa) ) + , 0, 0, 0, 0 + ); + + float4_check_uint32("or " + , float4_or(float4_isplat(0x55555555), float4_isplat(0xaaaaaaaa) ) + , -1, -1, -1, -1 + ); + + float4_check_uint32("xor" + , float4_or(float4_isplat(0x55555555), float4_isplat(0xaaaaaaaa) ) + , -1, -1, -1, -1 + ); + + float4_check_int32("imin" + , float4_imin(float4_ild(0, 1, 2, 3), float4_ild(-1, 2, -2, 1) ) + , -1, 1, -2, 1 + ); + + float4_check_float("min" + , float4_min(float4_ld(0.0f, 1.0f, 2.0f, 3.0f), float4_ld(-1.0f, 2.0f, -2.0f, 1.0f) ) + , -1.0f, 1.0f, -2.0f, 1.0f + ); + + float4_check_int32("imax" + , float4_imax(float4_ild(0, 1, 2, 3), float4_ild(-1, 2, -2, 1) ) + , 0, 2, 2, 3 + ); + + float4_check_float("max" + , float4_max(float4_ld(0.0f, 1.0f, 2.0f, 3.0f), float4_ld(-1.0f, 2.0f, -2.0f, 1.0f) ) + , 0.0f, 2.0f, 2.0f, 3.0f + ); +} diff --git a/tests/main.cpp b/tests/main.cpp index 5186dc3..fa98f9a 100644 --- a/tests/main.cpp +++ b/tests/main.cpp @@ -1,4 +1,9 @@ -/*- +/* + * Copyright 2010-2013 Branimir Karadzic. All rights reserved. + * License: http://www.opensource.org/licenses/BSD-2-Clause + */ + +/* * Copyright 2012 Matthew Endsley * All rights reserved * @@ -26,6 +31,16 @@ #include "test.h" -int main() { +#if BX_PLATFORM_ANDROID +#include + +void ANativeActivity_onCreate(ANativeActivity*, void*, size_t) +{ + exit(UnitTest::RunAllTests() ); +} +#else +int main() +{ return UnitTest::RunAllTests(); } +#endif // BX_PLATFORM diff --git a/tests/test.h b/tests/test.h index 934d033..bfebccb 100644 --- a/tests/test.h +++ b/tests/test.h @@ -8,6 +8,7 @@ #include #include +#include "dbg.h" #if !BX_COMPILER_MSVC # define _strdup strdup