diff --git a/include/bx/inline/math.inl b/include/bx/inline/math.inl index 710ff0b..1664e85 100644 --- a/include/bx/inline/math.inl +++ b/include/bx/inline/math.inl @@ -127,6 +127,41 @@ namespace bx return cos(_a - kPiHalf); } + inline float sinh(float _a) + { + return 0.5f*(exp(_a) - exp(-_a) ); + } + + inline float asin(float _a) + { + return kPiHalf - acos(_a); + } + + inline float cosh(float _a) + { + return 0.5f*(exp(_a) + exp(-_a) ); + } + + inline float tan(float _a) + { + return sin(_a) / cos(_a); + } + + inline float tanh(float _a) + { + const float tmp0 = exp(2.0f*_a); + const float tmp1 = tmp0 - 1.0f; + const float tmp2 = tmp0 + 1.0f; + const float result = tmp1 / tmp2; + + return result; + } + + inline float atan(float _a) + { + return atan2(_a, 1.0f); + } + inline float pow(float _a, float _b) { return exp(_b * log(_a) ); @@ -142,9 +177,19 @@ namespace bx return log(_a) * kInvLogNat2; } + inline float sqrt(float _a) + { + if (_a < kNearZero) + { + return 0.0f; + } + + return 1.0f/rsqrt(_a); + } + inline float rsqrt(float _a) { - return 1.0f/sqrt(_a); + return pow(_a, -0.5f); } inline float trunc(float _a) @@ -157,6 +202,11 @@ namespace bx return _a - trunc(_a); } + inline float mad(float _a, float _b, float _c) + { + return _a * _b + _c; + } + inline float mod(float _a, float _b) { return _a - _b * floor(_a / _b); diff --git a/include/bx/inline/simd128_langext.inl b/include/bx/inline/simd128_langext.inl index 4aa051f..0c4479e 100644 --- a/include/bx/inline/simd128_langext.inl +++ b/include/bx/inline/simd128_langext.inl @@ -318,10 +318,10 @@ BX_SIMD128_IMPLEMENT_TEST(xyzw , 0xf); BX_SIMD_FORCE_INLINE simd128_langext_t simd_sqrt(simd128_langext_t _a) { simd128_langext_t result; - result.vf[0] = sqrtf(_a.vf[0]); - result.vf[1] = sqrtf(_a.vf[1]); - result.vf[2] = sqrtf(_a.vf[2]); - result.vf[3] = sqrtf(_a.vf[3]); + result.vf[0] = sqrt(_a.vf[0]); + result.vf[1] = sqrt(_a.vf[1]); + result.vf[2] = sqrt(_a.vf[2]); + result.vf[3] = sqrt(_a.vf[3]); return result; } @@ -329,10 +329,10 @@ BX_SIMD128_IMPLEMENT_TEST(xyzw , 0xf); BX_SIMD_FORCE_INLINE simd128_langext_t simd_rsqrt_est(simd128_langext_t _a) { simd128_langext_t result; - result.vf[0] = 1.0f / sqrtf(_a.vf[0]); - result.vf[1] = 1.0f / sqrtf(_a.vf[1]); - result.vf[2] = 1.0f / sqrtf(_a.vf[2]); - result.vf[3] = 1.0f / sqrtf(_a.vf[3]); + result.vf[0] = 1.0f / sqrt(_a.vf[0]); + result.vf[1] = 1.0f / sqrt(_a.vf[1]); + result.vf[2] = 1.0f / sqrt(_a.vf[2]); + result.vf[3] = 1.0f / sqrt(_a.vf[3]); return result; } diff --git a/include/bx/inline/uint32_t.inl b/include/bx/inline/uint32_t.inl index 4f97f65..8b85bbe 100644 --- a/include/bx/inline/uint32_t.inl +++ b/include/bx/inline/uint32_t.inl @@ -119,11 +119,21 @@ namespace bx return _a + _b; } + inline uint32_t uint32_iadd(uint32_t _a, uint32_t _b) + { + return int32_t(_a) + int32_t(_b); + } + inline uint32_t uint32_sub(uint32_t _a, uint32_t _b) { return _a - _b; } + inline uint32_t uint32_isub(uint32_t _a, uint32_t _b) + { + return int32_t(_a) - int32_t(_b); + } + inline uint32_t uint32_mul(uint32_t _a, uint32_t _b) { return _a * _b; diff --git a/include/bx/math.h b/include/bx/math.h index 834fbe9..b4707d5 100644 --- a/include/bx/math.h +++ b/include/bx/math.h @@ -17,9 +17,15 @@ namespace bx extern const float kPi2; extern const float kInvPi; extern const float kPiHalf; + extern const float kPiQuarter; extern const float kSqrt2; + extern const float kLogNat10; extern const float kInvLogNat2; - extern const float kHuge; + extern const float kLogNat2Hi; + extern const float kLogNat2Lo; + extern const float kE; + extern const float kNearZero; + extern const float kInfinity; /// typedef float (*LerpFn)(float _a, float _b, float _t); @@ -65,22 +71,22 @@ namespace bx /// uint32_t floatFlip(uint32_t _value); - /// + /// Returns true if _f is a number that is NaN. bool isNan(float _f); - /// + /// Returns true if _f is a number that is NaN. bool isNan(double _f); - /// + /// Returns true if _f is not infinite and is not a NaN. bool isFinite(float _f); - /// + /// Returns true if _f is not infinite and is not a NaN. bool isFinite(double _f); - /// + /// Returns true if _f is infinite and is not a NaN. bool isInfinite(float _f); - /// + /// Returns true if _f is infinite and is not a NaN. bool isInfinite(double _f); /// @@ -98,49 +104,64 @@ namespace bx /// float sign(float _a); - /// + /// Returns the absolute of _a. float abs(float _a); - /// + /// Returns the square of _a. float square(float _a); - /// + /// Returns the cosine of the argument _a. float sin(float _a); - /// + /// Returns hyperbolic sine of the argument _a. + float sinh(float _a); + + /// Returns radian angle between -pi/2 and +pi/2 whose sine is _a. float asin(float _a); - /// + /// Returns the cosine of the argument _a. float cos(float _a); - /// - float tan(float _a); + /// Returns hyperbolic cosine of the argument _a. + float cosh(float _a); - /// + /// Returns radian angle between 0 and pi whose cosine is _a. float acos(float _a); - /// + /// Returns the circular tangent of the radian argument _a. + float tan(float _a); + + /// Returns hyperbolic tangent of the argument _a. + float tanh(float _a); + + /// Returns radian angle between -pi/2 and +pi/2 whose tangent is _a. + float atan(float _a); + + /// Retruns the inverse tangent of _y/_x. float atan2(float _y, float _x); - /// + /// Computes _a raised to the _b power. float pow(float _a, float _b); /// + float ldexp(float _a, int32_t _b); + + /// Returns e (2.71828...) raised to the _a power. float exp(float _a); - /// + /// Returns 2 raised to the _a power. float exp2(float _a); - /// + /// Returns the base e (2.71828...) logarithm of _a. float log(float _a); - /// + /// Returns the base 2 logarithm of _a. float log2(float _a); - /// + /// Returns the square root of _a. float sqrt(float _a); - /// + /// Returns reciprocal square root of _a. float rsqrt(float _a); /// @@ -149,6 +170,9 @@ namespace bx /// float fract(float _a); + /// Returns result of multipla and add (_a * _b + _c). + float mad(float _a, float _b, float _c); + /// float mod(float _a, float _b); diff --git a/include/bx/simd_t.h b/include/bx/simd_t.h index 8676a93..2757194 100644 --- a/include/bx/simd_t.h +++ b/include/bx/simd_t.h @@ -39,7 +39,6 @@ && !BX_PLATFORM_EMSCRIPTEN \ && !BX_PLATFORM_IOS \ && BX_CLANG_HAS_EXTENSION(attribute_ext_vector_type) -# include # undef BX_SIMD_LANGEXT # define BX_SIMD_LANGEXT 1 #endif // diff --git a/src/math.cpp b/src/math.cpp index 7fe5a79..53f36cc 100644 --- a/src/math.cpp +++ b/src/math.cpp @@ -5,8 +5,7 @@ #include "bx_p.h" #include - -#include +#include namespace bx { @@ -14,18 +13,17 @@ namespace bx const float kPi2 = 6.2831853071795864769252867665590f; const float kInvPi = 1.0f/kPi; const float kPiHalf = 1.5707963267948966192313216916398f; + const float kPiQuarter = 0.7853981633974483096156608458199f; const float kSqrt2 = 1.4142135623730950488016887242097f; + const float kLogNat10 = 2.3025850929940456840179914546844f; const float kInvLogNat2 = 1.4426950408889634073599246810019f; -#if BX_COMPILER_MSVC - const float kHuge = float(HUGE_VAL); -#else - const float kHuge = HUGE_VALF; -#endif // BX_COMPILER_MSVC - - float asin(float _a) - { - return ::asinf(_a); - } + const float kLogNat2Hi = 0.6931471805599453094172321214582f; + const float kLogNat2Lo = 1.90821492927058770002e-10f; + const float kE = 2.7182818284590452353602874713527f; + const float kNearZero = 1.0f/float(1 << 28); + const float kFloatMin = 1.175494e-38f; + const float kFloatMax = 3.402823e+38f; + const float kInfinity = bitsToFloat(UINT32_C(0x7f800000) ); namespace { @@ -72,14 +70,13 @@ namespace bx c10 = kSinC10; } - const float xsq = square(xx); - float result; - result = xsq * c10 + c8; - result = xsq * result + c6; - result = xsq * result + c4; - result = xsq * result + c2; - result = xsq * result + 1.0f; - result *= c0; + const float xsq = square(xx); + const float tmp0 = mad(c10, xsq, c8 ); + const float tmp1 = mad(tmp0, xsq, c6 ); + const float tmp2 = mad(tmp1, xsq, c4 ); + const float tmp3 = mad(tmp2, xsq, c2 ); + const float tmp4 = mad(tmp3, xsq, 1.0); + const float result = tmp4 * c0; return bits == 1 || bits == 2 ? -result @@ -87,38 +84,168 @@ namespace bx ; } - float tan(float _a) + namespace { -#if 0 - return sin(_a) / cos(_a); -#else - return ::tanf(_a); -#endif - } + static const float kAcosC0 = 1.5707288f; + static const float kAcosC1 = -0.2121144f; + static const float kAcosC2 = 0.0742610f; + static const float kAcosC3 = -0.0187293f; + + } // namespace float acos(float _a) { - return ::acosf(_a); + const float absa = abs(_a); + const float tmp0 = mad(kAcosC3, absa, kAcosC2); + const float tmp1 = mad(tmp0, absa, kAcosC1); + const float tmp2 = mad(tmp1, absa, kAcosC0); + const float tmp3 = tmp2 * sqrt(1.0 - absa); + const float negate = float(_a < 0.0f); + const float tmp4 = tmp3 - 2.0f*negate*tmp3; + const float result = negate*kPi + tmp4; + + return result; } + namespace + { + static const float kAtan2C0 = -0.013480470f; + static const float kAtan2C1 = 0.057477314f; + static const float kAtan2C2 = -0.121239071f; + static const float kAtan2C3 = 0.195635925f; + static const float kAtan2C4 = -0.332994597f; + static const float kAtan2C5 = 0.999995630f; + + } // namespace + float atan2(float _y, float _x) { - return ::atan2f(_y, _x); + const float ax = abs(_x); + const float ay = abs(_y); + const float maxaxy = max(ax, ay); + const float minaxy = min(ax, ay); + const float mxy = minaxy / maxaxy; + const float mxysq = square(mxy); + const float tmp0 = mad(kAtan2C0, mxysq, kAtan2C1); + const float tmp1 = mad(tmp0, mxysq, kAtan2C2); + const float tmp2 = mad(tmp1, mxysq, kAtan2C3); + const float tmp3 = mad(tmp2, mxysq, kAtan2C4); + const float tmp4 = mad(tmp3, mxysq, kAtan2C5); + const float tmp5 = tmp4 * mxy; + const float tmp6 = ay > ax ? kPiHalf - tmp5 : tmp5; + const float tmp7 = _x < 0.0f ? kPi - tmp6 : tmp6; + const float result = sign(_y)*tmp7; + + return result; } + float ldexp(float _a, int32_t _b) + { + const uint32_t ftob = floatToBits(_a); + const uint32_t masked = uint32_and(ftob, UINT32_C(0xff800000) ); + const uint32_t expsign0 = uint32_sra(masked, 23); + const uint32_t tmp = uint32_iadd(expsign0, _b); + const uint32_t expsign1 = uint32_sll(tmp, 23); + const uint32_t mantissa = uint32_and(ftob, UINT32_C(0x007fffff) ); + const uint32_t bits = uint32_or(mantissa, expsign1); + const float result = bitsToFloat(bits); + + return result; + } + + float frexp(float _a, int32_t* _exp) + { + const uint32_t ftob = floatToBits(_a); + const uint32_t masked0 = uint32_and(ftob, UINT32_C(0x7f800000) ); + const uint32_t exp0 = uint32_srl(masked0, 23); + const uint32_t masked1 = uint32_and(ftob, UINT32_C(0x807fffff) ); + const uint32_t bits = uint32_or(masked1, UINT32_C(0x3f000000) ); + const float result = bitsToFloat(bits); + + *_exp = int32_t(exp0 - 0x7e); + + return result; + } + + namespace + { + static const float kExpC0 = 1.66666666666666019037e-01f; + static const float kExpC1 = -2.77777777770155933842e-03f; + static const float kExpC2 = 6.61375632143793436117e-05f; + static const float kExpC3 = -1.65339022054652515390e-06f; + static const float kExpC4 = 4.13813679705723846039e-08f; + static const float kExpMax = 7.09782712893383973096e+02f; + + } // namespace + float exp(float _a) { - return ::expf(_a); + if (abs(_a) <= kNearZero) + { + return _a + 1.0f; + } + + const float kk = round(_a*kInvLogNat2); + const float hi = _a - kk*kLogNat2Hi; + const float lo = kk*kLogNat2Lo; + const float hml = hi - lo; + const float hmlsq = square(hml); + const float tmp0 = mad(kExpC4, hmlsq, kExpC3); + const float tmp1 = mad(tmp0, hmlsq, kExpC2); + const float tmp2 = mad(tmp1, hmlsq, kExpC1); + const float tmp3 = mad(tmp2, hmlsq, kExpC0); + const float tmp4 = hml - hmlsq * tmp3; + const float tmp5 = hml*tmp4/(2.0f-tmp4); + const float tmp6 = 1.0f - ( (lo - tmp5) - hi); + const float result = ldexp(tmp6, int32_t(kk) ); + + return result; } + namespace + { + static const float kLogC0 = 6.666666666666735130e-01f; + static const float kLogC1 = 3.999999999940941908e-01f; + static const float kLogC2 = 2.857142874366239149e-01f; + static const float kLogC3 = 2.222219843214978396e-01f; + static const float kLogC4 = 1.818357216161805012e-01f; + static const float kLogC5 = 1.531383769920937332e-01f; + static const float kLogC6 = 1.479819860511658591e-01f; + + } // namespace + float log(float _a) { - return ::logf(_a); - } + int32_t exp; + float ff = frexp(_a, &exp); + if (ff < kSqrt2*0.5f) + { + ff *= 2.0f; + --exp; + } - float sqrt(float _a) - { - return ::sqrtf(_a); + ff -= 1.0f; + const float kk = float(exp); + const float hi = kk*kLogNat2Hi; + const float lo = kk*kLogNat2Lo; + const float ss = ff / (2.0f + ff); + const float s2 = square(ss); + const float s4 = square(s2); + + const float tmp0 = mad(kLogC6, s4, kLogC4); + const float tmp1 = mad(tmp0, s4, kLogC2); + const float tmp2 = mad(tmp1, s4, kLogC0); + const float t1 = s2*tmp2; + + const float tmp3 = mad(kLogC5, s4, kLogC3); + const float tmp4 = mad(tmp3, s4, kLogC1); + const float t2 = s4*tmp4; + + const float t12 = t1 + t2; + const float hfsq = 0.5*square(ff); + const float result = hi - ( (hfsq - (ss*(hfsq+t12) + lo) ) - ff); + + return result; } float floor(float _a) @@ -126,7 +253,7 @@ namespace bx if (_a < 0.0f) { const float fr = fract(-_a); - float result = -_a - fr; + const float result = -_a - fr; return -(0.0f != fr ? result + 1.0f diff --git a/tests/math_test.cpp b/tests/math_test.cpp index 13a75b4..2114674 100644 --- a/tests/math_test.cpp +++ b/tests/math_test.cpp @@ -5,8 +5,9 @@ #include "test.h" #include +#include -#include +#include #if !BX_COMPILER_MSVC || BX_COMPILER_MSVC >= 1800 TEST_CASE("isFinite, isInfinite, isNan", "") @@ -34,6 +35,8 @@ TEST_CASE("log2", "") TEST_CASE("libm", "") { + bx::WriterI* writer = bx::getNullOut(); + REQUIRE(1389.0f == bx::abs(-1389.0f) ); REQUIRE(1389.0f == bx::abs( 1389.0f) ); REQUIRE( 0.0f == bx::abs(-0.0f) ); @@ -52,24 +55,88 @@ TEST_CASE("libm", "") REQUIRE(bx::equal( 0.89f, bx::fract( 13.89f), 0.000001f) ); REQUIRE(bx::equal(-0.89f, bx::fract(-13.89f), 0.000001f) ); - for (float xx = -100.0f; xx < 100.0f; xx += 0.1f) + for (int32_t yy = -10; yy < 10; ++yy) { - REQUIRE(bx::equal(bx::pow(1.389f, xx), ::pow(1.389f, xx), 0.00001f) ); + for (float xx = -100.0f; xx < 100.0f; xx += 0.1f) + { + bx::writePrintf(writer, "ldexp(%f, %d) == %f (expected: %f)\n", xx, yy, bx::ldexp(xx, yy), ::ldexpf(xx, yy) ); + REQUIRE(bx::equal(bx::ldexp(xx, yy), ::ldexpf(xx, yy), 0.00001f) ); + } + } + + for (float xx = -80.0f; xx < 80.0f; xx += 0.1f) + { + bx::writePrintf(writer, "exp(%f) == %f (expected: %f)\n", xx, bx::exp(xx), ::expf(xx) ); + REQUIRE(bx::equal(bx::exp(xx), ::expf(xx), 0.00001f) ); + } + + for (float xx = 0.0f; xx < 100.0f; xx += 0.1f) + { + bx::writePrintf(writer, "sqrt(%f) == %f (expected: %f)\n", xx, bx::sqrt(xx), ::sqrtf(xx) ); + REQUIRE(bx::equal(bx::sqrt(xx), ::sqrtf(xx), 0.00001f) ); } for (float xx = -100.0f; xx < 100.0f; xx += 0.1f) { - REQUIRE(bx::equal(bx::sin(xx), ::sin(xx), 0.00001f) ); + bx::writePrintf(writer, "pow(1.389f, %f) == %f (expected: %f)\n", xx, bx::pow(1.389f, xx), ::powf(1.389f, xx) ); + REQUIRE(bx::equal(bx::pow(1.389f, xx), ::powf(1.389f, xx), 0.00001f) ); + } + + for (float xx = -1.0f; xx < 1.0f; xx += 0.001f) + { + bx::writePrintf(writer, "asin(%f) == %f (expected: %f)\n", xx, bx::asin(xx), ::asinf(xx) ); + REQUIRE(bx::equal(bx::asin(xx), ::asinf(xx), 0.0001f) ); } for (float xx = -100.0f; xx < 100.0f; xx += 0.1f) { - REQUIRE(bx::equal(bx::cos(xx), ::cos(xx), 0.00001f) ); + bx::writePrintf(writer, "sin(%f) == %f (expected: %f)\n", xx, bx::sin(xx), ::sinf(xx) ); + REQUIRE(bx::equal(bx::sin(xx), ::sinf(xx), 0.00001f) ); + } + + for (float xx = -1.0f; xx < 1.0f; xx += 0.1f) + { + bx::writePrintf(writer, "sinh(%f) == %f (expected: %f)\n", xx, bx::sinh(xx), ::sinhf(xx) ); + REQUIRE(bx::equal(bx::sinh(xx), ::sinhf(xx), 0.00001f) ); + } + + for (float xx = -1.0f; xx < 1.0f; xx += 0.001f) + { + bx::writePrintf(writer, "acos(%f) == %f (expected: %f\n)", xx, bx::acos(xx), ::acosf(xx) ); + REQUIRE(bx::equal(bx::acos(xx), ::acosf(xx), 0.0001f) ); } for (float xx = -100.0f; xx < 100.0f; xx += 0.1f) { - REQUIRE(bx::equal(bx::tan(xx), ::tan(xx), 0.00001f) ); + bx::writePrintf(writer, "cos(%f) == %f (expected: %f)\n", xx, bx::cos(xx), ::cosf(xx) ); + REQUIRE(bx::equal(bx::cos(xx), ::cosf(xx), 0.00001f) ); + } + + for (float xx = -100.0f; xx < 100.0f; xx += 0.1f) + { + bx::writePrintf(writer, "tan(%f) == %f (expected: %f)\n", xx, bx::tan(xx), ::tanf(xx) ); + REQUIRE(bx::equal(bx::tan(xx), ::tanf(xx), 0.001f) ); + } + + for (float xx = -1.0f; xx < 1.0f; xx += 0.1f) + { + bx::writePrintf(writer, "tanh(%f) == %f (expected: %f\n", xx, bx::tanh(xx), ::tanhf(xx) ); + REQUIRE(bx::equal(bx::tanh(xx), ::tanhf(xx), 0.00001f) ); + } + + for (float xx = -100.0f; xx < 100.0f; xx += 0.1f) + { + bx::writePrintf(writer, "atan(%f) == %f (expected: %f)\n", xx, bx::atan(xx), ::atanf(xx) ); + REQUIRE(bx::equal(bx::atan(xx), ::atanf(xx), 0.00001f) ); + } + + for (float yy = -100.0f; yy < 100.0f; yy += 0.1f) + { + for (float xx = -100.0f; xx < 100.0f; xx += 0.1f) + { + bx::writePrintf(writer, "atan2(%f, %f) == %f (expected: %f)\n", yy, xx, bx::atan2(yy, xx), ::atan2f(yy, xx) ); + REQUIRE(bx::equal(bx::atan2(yy, xx), ::atan2f(yy, xx), 0.00001f) ); + } } }