Moved math functions to be constexpr.

This commit is contained in:
Бранимир Караџић
2024-11-27 19:55:42 -08:00
parent c8ff296934
commit 7eace058a2
8 changed files with 335 additions and 317 deletions

View File

@@ -216,6 +216,9 @@ namespace bx
template<typename Ty>
constexpr bool isPowerOf2(Ty _a);
/// Returns true if it's evaluated as constexpr.
constexpr bool isConstantEvaluated();
/// Returns a value of type `Ty` by reinterpreting the object representation of `FromT`.
template <typename Ty, typename FromT>
constexpr Ty bitCast(const FromT& _from);

View File

@@ -42,10 +42,7 @@ namespace bx
constexpr float kInvLogNat2 = 1.4426950408889634073599246810019f;
/// The natural logarithm of the 2. ln(2)
constexpr float kLogNat2Hi = 0.6931471805599453094172321214582f;
///
constexpr float kLogNat2Lo = 1.90821492927058770002e-10f;
constexpr float kLogNat2 = 0.6931471805599453094172321214582f;
/// The base of natural logarithms. e(1)
constexpr float kE = 2.7182818284590452353602874713527f;
@@ -83,7 +80,7 @@ namespace bx
/// Maximum representable floating-point number.
constexpr float kFloatLargest = 3.402823466e+38f;
///
/// Floating-point infinity.
// constexpr float kFloatInfinity;
///
@@ -104,7 +101,7 @@ namespace bx
/// Largest representable double-precision floating-point number.
constexpr double kDoubleLargest = 1.7976931348623158e+308;
//
// Double-precision floating-point infinity.
// constexpr double kDoubleInfinity;
} // namespace bx

View File

@@ -147,6 +147,11 @@ namespace bx
return _a && !(_a & (_a - 1) );
}
constexpr bool isConstantEvaluated()
{
return __builtin_is_constant_evaluated();
}
template <typename Ty, typename FromT>
inline constexpr Ty bitCast(const FromT& _from)
{

View File

@@ -158,6 +158,76 @@ namespace bx
return _a * _a;
}
inline BX_CONSTEXPR_FUNC float cos(float _a)
{
const float scaled = _a * 2.0f*kInvPi;
const float real = floor(scaled);
const float xx = _a - real * kPiHalf;
const int32_t bits = int32_t(real) & 3;
constexpr float kSinC2 = -0.16666667163372039794921875f;
constexpr float kSinC4 = 8.333347737789154052734375e-3f;
constexpr float kSinC6 = -1.9842604524455964565277099609375e-4f;
constexpr float kSinC8 = 2.760012648650445044040679931640625e-6f;
constexpr float kSinC10 = -2.50293279435709337121807038784027099609375e-8f;
constexpr float kCosC2 = -0.5f;
constexpr float kCosC4 = 4.166664183139801025390625e-2f;
constexpr float kCosC6 = -1.388833043165504932403564453125e-3f;
constexpr float kCosC8 = 2.47562347794882953166961669921875e-5f;
constexpr float kCosC10 = -2.59630184018533327616751194000244140625e-7f;
float c0 = xx;
float c2 = kSinC2;
float c4 = kSinC4;
float c6 = kSinC6;
float c8 = kSinC8;
float c10 = kSinC10;
if (bits == 0
|| bits == 2)
{
c0 = 1.0f;
c2 = kCosC2;
c4 = kCosC4;
c6 = kCosC6;
c8 = kCosC8;
c10 = kCosC10;
}
const float xsq = square(xx);
const float tmp0 = mad(c10, xsq, c8 );
const float tmp1 = mad(tmp0, xsq, c6 );
const float tmp2 = mad(tmp1, xsq, c4 );
const float tmp3 = mad(tmp2, xsq, c2 );
const float tmp4 = mad(tmp3, xsq, 1.0);
const float result = tmp4 * c0;
return bits == 1 || bits == 2
? -result
: result
;
}
inline BX_CONSTEXPR_FUNC float acos(float _a)
{
constexpr float kAcosC0 = 1.5707288f;
constexpr float kAcosC1 = -0.2121144f;
constexpr float kAcosC2 = 0.0742610f;
constexpr float kAcosC3 = -0.0187293f;
const float absa = abs(_a);
const float tmp0 = mad(kAcosC3, absa, kAcosC2);
const float tmp1 = mad(tmp0, absa, kAcosC1);
const float tmp2 = mad(tmp1, absa, kAcosC0);
const float tmp3 = tmp2 * sqrt(1.0f - absa);
const float negate = float(_a < 0.0f);
const float tmp4 = tmp3 - 2.0f*negate*tmp3;
const float result = negate*kPi + tmp4;
return result;
}
inline void sinCosApprox(float& _outSinApprox, float& _outCos, float _a)
{
const float aa = _a - floor(_a*kInvPi2)*kPi2;
@@ -172,32 +242,32 @@ namespace bx
_outCos = cosA;
}
inline BX_CONST_FUNC float sin(float _a)
inline BX_CONSTEXPR_FUNC float sin(float _a)
{
return cos(_a - kPiHalf);
}
inline BX_CONST_FUNC float sinh(float _a)
inline BX_CONSTEXPR_FUNC float sinh(float _a)
{
return 0.5f*(exp(_a) - exp(-_a) );
}
inline BX_CONST_FUNC float asin(float _a)
inline BX_CONSTEXPR_FUNC float asin(float _a)
{
return kPiHalf - acos(_a);
}
inline BX_CONST_FUNC float cosh(float _a)
inline BX_CONSTEXPR_FUNC float cosh(float _a)
{
return 0.5f*(exp(_a) + exp(-_a) );
}
inline BX_CONST_FUNC float tan(float _a)
inline BX_CONSTEXPR_FUNC float tan(float _a)
{
return sin(_a) / cos(_a);
}
inline BX_CONST_FUNC float tanh(float _a)
inline BX_CONSTEXPR_FUNC float tanh(float _a)
{
const float tmp0 = exp(2.0f*_a);
const float tmp1 = tmp0 - 1.0f;
@@ -207,34 +277,57 @@ namespace bx
return result;
}
inline BX_CONST_FUNC float atan(float _a)
inline BX_CONSTEXPR_FUNC float atan(float _a)
{
return atan2(_a, 1.0f);
}
inline BX_CONST_FUNC float pow(float _a, float _b)
inline BX_CONSTEXPR_FUNC float atan2(float _y, float _x)
{
if (abs(_b) < kFloatSmallest)
const float ax = abs(_x);
const float ay = abs(_y);
const float maxaxy = max(ax, ay);
const float minaxy = min(ax, ay);
if (maxaxy == 0.0f)
{
return 1.0f;
return _y < 0.0f ? -0.0f : 0.0f;
}
if (abs(_a) < kFloatSmallest)
{
return 0.0f;
}
constexpr float kAtan2C0 = -0.013480470f;
constexpr float kAtan2C1 = 0.057477314f;
constexpr float kAtan2C2 = -0.121239071f;
constexpr float kAtan2C3 = 0.195635925f;
constexpr float kAtan2C4 = -0.332994597f;
constexpr float kAtan2C5 = 0.999995630f;
return copySign(exp(_b * log(abs(_a) ) ), _a);
const float mxy = minaxy / maxaxy;
const float mxysq = square(mxy);
const float tmp0 = mad(kAtan2C0, mxysq, kAtan2C1);
const float tmp1 = mad(tmp0, mxysq, kAtan2C2);
const float tmp2 = mad(tmp1, mxysq, kAtan2C3);
const float tmp3 = mad(tmp2, mxysq, kAtan2C4);
const float tmp4 = mad(tmp3, mxysq, kAtan2C5);
const float tmp5 = tmp4 * mxy;
const float tmp6 = ay > ax ? kPiHalf - tmp5 : tmp5;
const float tmp7 = _x < 0.0f ? kPi - tmp6 : tmp6;
const float result = _y < 0.0f ? -tmp7 : tmp7;
return result;
}
inline BX_CONST_FUNC float exp2(float _a)
inline BX_CONSTEXPR_FUNC float frexp(float _a, int32_t* _outExp)
{
return pow(2.0f, _a);
}
const uint32_t ftob = floatToBits(_a);
const uint32_t masked0 = uint32_and(ftob, kFloatExponentMask);
const uint32_t exp0 = uint32_srl(masked0, kFloatExponentBitShift);
const uint32_t masked1 = uint32_and(ftob, kFloatSignMask | kFloatMantissaMask);
const uint32_t bits = uint32_or(masked1, UINT32_C(0x3f000000) );
const float result = bitsToFloat(bits);
inline BX_CONST_FUNC float log2(float _a)
{
return log(_a) * kInvLogNat2;
*_outExp = int32_t(exp0 - 0x7e);
return result;
}
inline BX_CONSTEXPR_FUNC float ldexp(float _a, int32_t _b)
@@ -251,6 +344,106 @@ namespace bx
return result;
}
inline BX_CONSTEXPR_FUNC float exp(float _a)
{
if (abs(_a) <= kNearZero)
{
return _a + 1.0f;
}
constexpr float kExpC0 = 1.66666666666666019037e-01f;
constexpr float kExpC1 = -2.77777777770155933842e-03f;
constexpr float kExpC2 = 6.61375632143793436117e-05f;
constexpr float kExpC3 = -1.65339022054652515390e-06f;
constexpr float kExpC4 = 4.13813679705723846039e-08f;
constexpr float kLogNat2Lo = 1.90821492927058770002e-10f;
const float kk = round(_a*kInvLogNat2);
const float hi = _a - kk*kLogNat2;
const float lo = kk*kLogNat2Lo;
const float hml = hi - lo;
const float hmlsq = square(hml);
const float tmp0 = mad(kExpC4, hmlsq, kExpC3);
const float tmp1 = mad(tmp0, hmlsq, kExpC2);
const float tmp2 = mad(tmp1, hmlsq, kExpC1);
const float tmp3 = mad(tmp2, hmlsq, kExpC0);
const float tmp4 = hml - hmlsq * tmp3;
const float tmp5 = hml*tmp4/(2.0f-tmp4);
const float tmp6 = 1.0f - ( (lo - tmp5) - hi);
const float result = ldexp(tmp6, int32_t(kk) );
return result;
}
inline BX_CONSTEXPR_FUNC float log(float _a)
{
int32_t exp = 0;
float ff = frexp(_a, &exp);
if (ff < kSqrt2*0.5f)
{
ff *= 2.0f;
--exp;
}
constexpr float kLogC0 = 6.666666666666735130e-01f;
constexpr float kLogC1 = 3.999999999940941908e-01f;
constexpr float kLogC2 = 2.857142874366239149e-01f;
constexpr float kLogC3 = 2.222219843214978396e-01f;
constexpr float kLogC4 = 1.818357216161805012e-01f;
constexpr float kLogC5 = 1.531383769920937332e-01f;
constexpr float kLogC6 = 1.479819860511658591e-01f;
constexpr float kLogNat2Lo = 1.90821492927058770002e-10f;
ff -= 1.0f;
const float kk = float(exp);
const float hi = kk*kLogNat2;
const float lo = kk*kLogNat2Lo;
const float ss = ff / (2.0f + ff);
const float s2 = square(ss);
const float s4 = square(s2);
const float tmp0 = mad(kLogC6, s4, kLogC4);
const float tmp1 = mad(tmp0, s4, kLogC2);
const float tmp2 = mad(tmp1, s4, kLogC0);
const float t1 = s2*tmp2;
const float tmp3 = mad(kLogC5, s4, kLogC3);
const float tmp4 = mad(tmp3, s4, kLogC1);
const float t2 = s4*tmp4;
const float t12 = t1 + t2;
const float hfsq = 0.5f*square(ff);
const float result = hi - ( (hfsq - (ss*(hfsq+t12) + lo) ) - ff);
return result;
}
inline BX_CONSTEXPR_FUNC float pow(float _a, float _b)
{
if (abs(_b) < kFloatSmallest)
{
return 1.0f;
}
if (abs(_a) < kFloatSmallest)
{
return 0.0f;
}
return copySign(exp(_b * log(abs(_a) ) ), _a);
}
inline BX_CONSTEXPR_FUNC float exp2(float _a)
{
return pow(2.0f, _a);
}
inline BX_CONSTEXPR_FUNC float log2(float _a)
{
return log(_a) * kInvLogNat2;
}
template<>
inline BX_CONSTEXPR_FUNC uint8_t countBits(uint32_t _val)
{
@@ -437,7 +630,7 @@ namespace bx
return Ty(1)<<log2;
}
inline BX_CONST_FUNC float rsqrtRef(float _a)
inline BX_CONSTEXPR_FUNC float rsqrtRef(float _a)
{
if (_a < kFloatSmallest)
{
@@ -461,13 +654,13 @@ namespace bx
const simd128_t rsqrta = simd_rsqrt_ni(aa);
#endif // BX_SIMD_NEON
float result;
float result = 0.0f;
simd_stx(&result, rsqrta);
return result;
}
inline BX_CONST_FUNC float sqrtRef(float _a)
inline BX_CONSTEXPR_FUNC float sqrtRef(float _a)
{
if (_a < 0.0f)
{
@@ -491,24 +684,34 @@ namespace bx
const simd128_t aa = simd_splat(_a);
const simd128_t sqrt = simd_sqrt(aa);
float result;
float result = 0.0f;
simd_stx(&result, sqrt);
return result;
}
inline BX_CONST_FUNC float rsqrt(float _a)
inline BX_CONSTEXPR_FUNC float rsqrt(float _a)
{
#if BX_SIMD_SUPPORTED
if (isConstantEvaluated() )
{
return rsqrtRef(_a);
}
return rsqrtSimd(_a);
#else
return rsqrtRef(_a);
#endif // BX_SIMD_SUPPORTED
}
inline BX_CONST_FUNC float sqrt(float _a)
inline BX_CONSTEXPR_FUNC float sqrt(float _a)
{
#if BX_SIMD_SUPPORTED
if (isConstantEvaluated() )
{
return sqrtRef(_a);
}
return sqrtSimd(_a);
#else
return sqrtRef(_a);
@@ -628,7 +831,7 @@ namespace bx
return square(_a)*(3.0f - 2.0f*_a);
}
inline BX_CONST_FUNC float invSmoothStep(float _a)
inline BX_CONSTEXPR_FUNC float invSmoothStep(float _a)
{
return 0.5f - sin(asin(1.0f - 2.0f * _a) / 3.0f);
}
@@ -906,18 +1109,18 @@ namespace bx
};
}
inline BX_CONST_FUNC float length(const Vec3 _a)
inline BX_CONSTEXPR_FUNC float length(const Vec3 _a)
{
return sqrt(dot(_a, _a) );
}
inline BX_CONST_FUNC float distanceSq(const Vec3 _a, const Vec3 _b)
inline BX_CONSTEXPR_FUNC float distanceSq(const Vec3 _a, const Vec3 _b)
{
const Vec3 ba = sub(_b, _a);
return dot(ba, ba);
}
inline BX_CONST_FUNC float distance(const Vec3 _a, const Vec3 _b)
inline BX_CONSTEXPR_FUNC float distance(const Vec3 _a, const Vec3 _b)
{
return length(sub(_b, _a) );
}
@@ -942,7 +1145,7 @@ namespace bx
};
}
inline BX_CONST_FUNC Vec3 normalize(const Vec3 _a)
inline BX_CONSTEXPR_FUNC Vec3 normalize(const Vec3 _a)
{
const float len = length(_a);
const Vec3 result = divSafe(_a, len);
@@ -1035,9 +1238,8 @@ namespace bx
_outB = cross(_n, _outT);
}
inline BX_CONST_FUNC Vec3 fromLatLong(float _u, float _v)
inline BX_CONSTEXPR_FUNC Vec3 fromLatLong(float _u, float _v)
{
Vec3 result(InitNone);
const float phi = _u * kPi2;
const float theta = _v * kPi;
@@ -1046,10 +1248,12 @@ namespace bx
const float ct = cos(theta);
const float cp = cos(phi);
result.x = -st*sp;
result.y = ct;
result.z = -st*cp;
return result;
return
{
-st*sp,
ct,
-st*cp,
};
}
inline void toLatLong(float* _outU, float* _outV, const Vec3 _dir)
@@ -1583,7 +1787,7 @@ namespace bx
;
}
inline BX_CONST_FUNC float toLinear(float _a)
inline BX_CONSTEXPR_FUNC float toLinear(float _a)
{
const float lo = _a / 12.92f;
const float hi = pow( (_a + 0.055f) / 1.055f, 2.4f);
@@ -1591,7 +1795,7 @@ namespace bx
return result;
}
inline BX_CONST_FUNC float toGamma(float _a)
inline BX_CONSTEXPR_FUNC float toGamma(float _a)
{
const float lo = _a * 12.92f;
const float hi = pow(abs(_a), 1.0f/2.4f) * 1.055f - 0.055f;

View File

@@ -9,8 +9,8 @@
namespace bx
{
BX_CONST_FUNC float sqrt(float);
BX_CONST_FUNC float rsqrt(float);
BX_CONSTEXPR_FUNC float sqrt(float);
BX_CONSTEXPR_FUNC float rsqrt(float);
#define ELEMx 0
#define ELEMy 1

View File

@@ -212,47 +212,47 @@ namespace bx
/// Returns the sine of the argument _a.
///
BX_CONST_FUNC float sin(float _a);
BX_CONSTEXPR_FUNC float sin(float _a);
/// Returns hyperbolic sine of the argument _a.
///
BX_CONST_FUNC float sinh(float _a);
BX_CONSTEXPR_FUNC float sinh(float _a);
/// Returns radian angle between -pi/2 and +pi/2 whose sine is _a.
///
BX_CONST_FUNC float asin(float _a);
BX_CONSTEXPR_FUNC float asin(float _a);
/// Returns the cosine of the argument _a.
///
BX_CONST_FUNC float cos(float _a);
BX_CONSTEXPR_FUNC float cos(float _a);
/// Returns hyperbolic cosine of the argument _a.
///
BX_CONST_FUNC float cosh(float _a);
BX_CONSTEXPR_FUNC float cosh(float _a);
/// Returns radian angle between 0 and pi whose cosine is _a.
///
BX_CONST_FUNC float acos(float _a);
BX_CONSTEXPR_FUNC float acos(float _a);
/// Returns the circular tangent of the radian argument _a.
///
BX_CONST_FUNC float tan(float _a);
BX_CONSTEXPR_FUNC float tan(float _a);
/// Returns hyperbolic tangent of the argument _a.
///
BX_CONST_FUNC float tanh(float _a);
BX_CONSTEXPR_FUNC float tanh(float _a);
/// Returns radian angle between -pi/2 and +pi/2 whose tangent is _a.
///
BX_CONST_FUNC float atan(float _a);
BX_CONSTEXPR_FUNC float atan(float _a);
/// Returns the inverse tangent of _y/_x.
///
BX_CONST_FUNC float atan2(float _y, float _x);
BX_CONSTEXPR_FUNC float atan2(float _y, float _x);
/// Computes _a raised to the _b power.
///
BX_CONST_FUNC float pow(float _a, float _b);
BX_CONSTEXPR_FUNC float pow(float _a, float _b);
/// Returns the result of multiplying _a by 2 raised to the power of the exponent `_a * (2^_b)`.
///
@@ -261,23 +261,23 @@ namespace bx
/// Returns decomposed given floating point value _a into a normalized fraction and
/// an integral power of two.
///
float frexp(float _a, int32_t* _outExp);
BX_CONSTEXPR_FUNC float frexp(float _a, int32_t* _outExp);
/// Returns e (2.71828...) raised to the _a power.
///
BX_CONST_FUNC float exp(float _a);
BX_CONSTEXPR_FUNC float exp(float _a);
/// Returns 2 raised to the _a power.
///
BX_CONST_FUNC float exp2(float _a);
BX_CONSTEXPR_FUNC float exp2(float _a);
/// Returns the base e (2.71828...) logarithm of _a.
///
BX_CONST_FUNC float log(float _a);
BX_CONSTEXPR_FUNC float log(float _a);
/// Returns the base 2 logarithm of _a.
///
BX_CONST_FUNC float log2(float _a);
BX_CONSTEXPR_FUNC float log2(float _a);
/// Count number of bits set.
///
@@ -321,11 +321,11 @@ namespace bx
/// Returns the square root of _a.
///
BX_CONST_FUNC float sqrt(float _a);
BX_CONSTEXPR_FUNC float sqrt(float _a);
/// Returns reciprocal square root of _a.
///
BX_CONST_FUNC float rsqrt(float _a);
BX_CONSTEXPR_FUNC float rsqrt(float _a);
/// Returns the nearest integer not greater in magnitude than _a.
///
@@ -403,7 +403,7 @@ namespace bx
BX_CONSTEXPR_FUNC float smoothStep(float _a);
///
BX_CONST_FUNC float invSmoothStep(float _a);
BX_CONSTEXPR_FUNC float invSmoothStep(float _a);
///
BX_CONSTEXPR_FUNC float bias(float _time, float _bias);
@@ -486,13 +486,13 @@ namespace bx
BX_CONSTEXPR_FUNC Vec3 cross(const Vec3 _a, const Vec3 _b);
///
BX_CONST_FUNC float length(const Vec3 _a);
BX_CONSTEXPR_FUNC float length(const Vec3 _a);
///
BX_CONST_FUNC float distanceSq(const Vec3 _a, const Vec3 _b);
BX_CONSTEXPR_FUNC float distanceSq(const Vec3 _a, const Vec3 _b);
///
BX_CONST_FUNC float distance(const Vec3 _a, const Vec3 _b);
BX_CONSTEXPR_FUNC float distance(const Vec3 _a, const Vec3 _b);
///
BX_CONSTEXPR_FUNC Vec3 lerp(const Vec3 _a, const Vec3 _b, float _t);
@@ -501,7 +501,7 @@ namespace bx
BX_CONSTEXPR_FUNC Vec3 lerp(const Vec3 _a, const Vec3 _b, const Vec3 _t);
///
BX_CONST_FUNC Vec3 normalize(const Vec3 _a);
BX_CONSTEXPR_FUNC Vec3 normalize(const Vec3 _a);
///
BX_CONSTEXPR_FUNC Vec3 min(const Vec3 _a, const Vec3 _b);
@@ -527,7 +527,7 @@ namespace bx
void calcTangentFrame(Vec3& _outT, Vec3& _outB, const Vec3 _n, float _angle);
///
BX_CONST_FUNC Vec3 fromLatLong(float _u, float _v);
BX_CONSTEXPR_FUNC Vec3 fromLatLong(float _u, float _v);
///
void toLatLong(float* _outU, float* _outV, const Vec3 _dir);
@@ -811,10 +811,10 @@ namespace bx
void hsvToRgb(float _rgb[3], const float _hsv[3]);
///
BX_CONST_FUNC float toLinear(float _a);
BX_CONSTEXPR_FUNC float toLinear(float _a);
///
BX_CONST_FUNC float toGamma(float _a);
BX_CONSTEXPR_FUNC float toGamma(float _a);
} // namespace bx

View File

@@ -10,221 +10,6 @@
namespace bx
{
namespace
{
constexpr float kSinC2 = -0.16666667163372039794921875f;
constexpr float kSinC4 = 8.333347737789154052734375e-3f;
constexpr float kSinC6 = -1.9842604524455964565277099609375e-4f;
constexpr float kSinC8 = 2.760012648650445044040679931640625e-6f;
constexpr float kSinC10 = -2.50293279435709337121807038784027099609375e-8f;
constexpr float kCosC2 = -0.5f;
constexpr float kCosC4 = 4.166664183139801025390625e-2f;
constexpr float kCosC6 = -1.388833043165504932403564453125e-3f;
constexpr float kCosC8 = 2.47562347794882953166961669921875e-5f;
constexpr float kCosC10 = -2.59630184018533327616751194000244140625e-7f;
} // namespace
BX_CONST_FUNC float cos(float _a)
{
const float scaled = _a * 2.0f*kInvPi;
const float real = floor(scaled);
const float xx = _a - real * kPiHalf;
const int32_t bits = int32_t(real) & 3;
float c0, c2, c4, c6, c8, c10;
if (bits == 0
|| bits == 2)
{
c0 = 1.0f;
c2 = kCosC2;
c4 = kCosC4;
c6 = kCosC6;
c8 = kCosC8;
c10 = kCosC10;
}
else
{
c0 = xx;
c2 = kSinC2;
c4 = kSinC4;
c6 = kSinC6;
c8 = kSinC8;
c10 = kSinC10;
}
const float xsq = square(xx);
const float tmp0 = mad(c10, xsq, c8 );
const float tmp1 = mad(tmp0, xsq, c6 );
const float tmp2 = mad(tmp1, xsq, c4 );
const float tmp3 = mad(tmp2, xsq, c2 );
const float tmp4 = mad(tmp3, xsq, 1.0);
const float result = tmp4 * c0;
return bits == 1 || bits == 2
? -result
: result
;
}
namespace
{
constexpr float kAcosC0 = 1.5707288f;
constexpr float kAcosC1 = -0.2121144f;
constexpr float kAcosC2 = 0.0742610f;
constexpr float kAcosC3 = -0.0187293f;
} // namespace
BX_CONST_FUNC float acos(float _a)
{
const float absa = abs(_a);
const float tmp0 = mad(kAcosC3, absa, kAcosC2);
const float tmp1 = mad(tmp0, absa, kAcosC1);
const float tmp2 = mad(tmp1, absa, kAcosC0);
const float tmp3 = tmp2 * sqrt(1.0f - absa);
const float negate = float(_a < 0.0f);
const float tmp4 = tmp3 - 2.0f*negate*tmp3;
const float result = negate*kPi + tmp4;
return result;
}
namespace
{
constexpr float kAtan2C0 = -0.013480470f;
constexpr float kAtan2C1 = 0.057477314f;
constexpr float kAtan2C2 = -0.121239071f;
constexpr float kAtan2C3 = 0.195635925f;
constexpr float kAtan2C4 = -0.332994597f;
constexpr float kAtan2C5 = 0.999995630f;
} // namespace
BX_CONST_FUNC float atan2(float _y, float _x)
{
const float ax = abs(_x);
const float ay = abs(_y);
const float maxaxy = max(ax, ay);
const float minaxy = min(ax, ay);
if (maxaxy == 0.0f)
{
return _y < 0.0f ? -0.0f : 0.0f;
}
const float mxy = minaxy / maxaxy;
const float mxysq = square(mxy);
const float tmp0 = mad(kAtan2C0, mxysq, kAtan2C1);
const float tmp1 = mad(tmp0, mxysq, kAtan2C2);
const float tmp2 = mad(tmp1, mxysq, kAtan2C3);
const float tmp3 = mad(tmp2, mxysq, kAtan2C4);
const float tmp4 = mad(tmp3, mxysq, kAtan2C5);
const float tmp5 = tmp4 * mxy;
const float tmp6 = ay > ax ? kPiHalf - tmp5 : tmp5;
const float tmp7 = _x < 0.0f ? kPi - tmp6 : tmp6;
const float result = _y < 0.0f ? -tmp7 : tmp7;
return result;
}
float frexp(float _a, int32_t* _outExp)
{
const uint32_t ftob = floatToBits(_a);
const uint32_t masked0 = uint32_and(ftob, kFloatExponentMask);
const uint32_t exp0 = uint32_srl(masked0, kFloatExponentBitShift);
const uint32_t masked1 = uint32_and(ftob, kFloatSignMask | kFloatMantissaMask);
const uint32_t bits = uint32_or(masked1, UINT32_C(0x3f000000) );
const float result = bitsToFloat(bits);
*_outExp = int32_t(exp0 - 0x7e);
return result;
}
namespace
{
constexpr float kExpC0 = 1.66666666666666019037e-01f;
constexpr float kExpC1 = -2.77777777770155933842e-03f;
constexpr float kExpC2 = 6.61375632143793436117e-05f;
constexpr float kExpC3 = -1.65339022054652515390e-06f;
constexpr float kExpC4 = 4.13813679705723846039e-08f;
} // namespace
BX_CONST_FUNC float exp(float _a)
{
if (abs(_a) <= kNearZero)
{
return _a + 1.0f;
}
const float kk = round(_a*kInvLogNat2);
const float hi = _a - kk*kLogNat2Hi;
const float lo = kk*kLogNat2Lo;
const float hml = hi - lo;
const float hmlsq = square(hml);
const float tmp0 = mad(kExpC4, hmlsq, kExpC3);
const float tmp1 = mad(tmp0, hmlsq, kExpC2);
const float tmp2 = mad(tmp1, hmlsq, kExpC1);
const float tmp3 = mad(tmp2, hmlsq, kExpC0);
const float tmp4 = hml - hmlsq * tmp3;
const float tmp5 = hml*tmp4/(2.0f-tmp4);
const float tmp6 = 1.0f - ( (lo - tmp5) - hi);
const float result = ldexp(tmp6, int32_t(kk) );
return result;
}
namespace
{
constexpr float kLogC0 = 6.666666666666735130e-01f;
constexpr float kLogC1 = 3.999999999940941908e-01f;
constexpr float kLogC2 = 2.857142874366239149e-01f;
constexpr float kLogC3 = 2.222219843214978396e-01f;
constexpr float kLogC4 = 1.818357216161805012e-01f;
constexpr float kLogC5 = 1.531383769920937332e-01f;
constexpr float kLogC6 = 1.479819860511658591e-01f;
} // namespace
BX_CONST_FUNC float log(float _a)
{
int32_t exp;
float ff = frexp(_a, &exp);
if (ff < kSqrt2*0.5f)
{
ff *= 2.0f;
--exp;
}
ff -= 1.0f;
const float kk = float(exp);
const float hi = kk*kLogNat2Hi;
const float lo = kk*kLogNat2Lo;
const float ss = ff / (2.0f + ff);
const float s2 = square(ss);
const float s4 = square(s2);
const float tmp0 = mad(kLogC6, s4, kLogC4);
const float tmp1 = mad(tmp0, s4, kLogC2);
const float tmp2 = mad(tmp1, s4, kLogC0);
const float t1 = s2*tmp2;
const float tmp3 = mad(kLogC5, s4, kLogC3);
const float tmp4 = mad(tmp3, s4, kLogC1);
const float t2 = s4*tmp4;
const float t12 = t1 + t2;
const float hfsq = 0.5f*square(ff);
const float result = hi - ( (hfsq - (ss*(hfsq+t12) + lo) ) - ff);
return result;
}
void mtxLookAt(float* _result, const Vec3& _eye, const Vec3& _at, const Vec3& _up, Handedness::Enum _handedness)
{
const Vec3 view = normalize(

View File

@@ -33,9 +33,16 @@ TEST_CASE("isFinite, isInfinite, isNan", "[math]")
}
}
static bool testLog2(float _a)
TEST_CASE("log", "[math][libm]")
{
return bx::log2(_a) == bx::log(_a) * (1.0f / bx::log(2.0f) );
STATIC_REQUIRE(0.0f == bx::log(1.0f) );
STATIC_REQUIRE(1.0f == bx::log(bx::kE) );
STATIC_REQUIRE(bx::kLogNat2 == bx::log(2.0f) );
}
static void testLog2(float _a)
{
REQUIRE(bx::log2(_a) == bx::log(_a) * (1.0f / bx::log(2.0f) ) );
}
TEST_CASE("log2", "[math][libm]")
@@ -43,15 +50,15 @@ TEST_CASE("log2", "[math][libm]")
testLog2(0.0f);
testLog2(256.0f);
REQUIRE(0.0f == bx::log2(1.0f) );
REQUIRE(1.0f == bx::log2(2.0f) );
REQUIRE(2.0f == bx::log2(4.0f) );
REQUIRE(3.0f == bx::log2(8.0f) );
REQUIRE(4.0f == bx::log2(16.0f) );
REQUIRE(5.0f == bx::log2(32.0f) );
REQUIRE(6.0f == bx::log2(64.0f) );
REQUIRE(7.0f == bx::log2(128.0f) );
REQUIRE(8.0f == bx::log2(256.0f) );
STATIC_REQUIRE(0.0f == bx::log2(1.0f) );
STATIC_REQUIRE(1.0f == bx::log2(2.0f) );
STATIC_REQUIRE(2.0f == bx::log2(4.0f) );
STATIC_REQUIRE(3.0f == bx::log2(8.0f) );
STATIC_REQUIRE(4.0f == bx::log2(16.0f) );
STATIC_REQUIRE(5.0f == bx::log2(32.0f) );
STATIC_REQUIRE(6.0f == bx::log2(64.0f) );
STATIC_REQUIRE(7.0f == bx::log2(128.0f) );
STATIC_REQUIRE(8.0f == bx::log2(256.0f) );
}
TEST_CASE("ceilLog2", "[math]")
@@ -324,7 +331,7 @@ TEST_CASE("rsqrt", "[math][libm]")
bx::Error err;
// rsqrtRef
REQUIRE(bx::isInfinite(bx::rsqrtRef(0.0f) ) );
STATIC_REQUIRE(bx::isInfinite(bx::rsqrtRef(0.0f) ) );
for (float xx = bx::kNearZero; xx < 100.0f; xx += 0.1f)
{
@@ -361,7 +368,8 @@ TEST_CASE("sqrt", "[math][libm]")
bx::Error err;
// sqrtRef
REQUIRE(bx::isNan(bx::sqrtRef(-1.0f) ) );
STATIC_REQUIRE(bx::isNan(bx::sqrtRef(-1.0f) ) );
REQUIRE(bx::isEqual(bx::sqrtRef(0.0f), ::sqrtf(0.0f), 0.0f) );
REQUIRE(bx::isEqual(bx::sqrtRef(1.0f), ::sqrtf(1.0f), 0.0f) );
@@ -434,7 +442,7 @@ TEST_CASE("mod", "[math][libm]")
typedef float (*MathFloatFn)(float);
template<MathFloatFn BxT, MathFloatFn CrtT>
void testMathFunc1Float(float _value)
static void testMathFunc1Float(float _value)
{
REQUIRE(CrtT(_value) == BxT(_value) );
}
@@ -519,21 +527,21 @@ TEST_CASE("exp", "[math][libm]")
TEST_CASE("pow", "[math][libm]")
{
REQUIRE(1.0f == bx::pow(0.0f, 0.0f) );
REQUIRE(1.0f == bx::pow(1.0f, 0.0f) );
REQUIRE(1.0f == bx::pow(3.0f, 0.0f) );
REQUIRE(1.0f == bx::pow(8.0f, 0.0f) );
REQUIRE(1.0f == bx::pow(9.0f, 0.0f) );
REQUIRE(0.0f == bx::pow(0.0f, 2.0f) );
STATIC_REQUIRE(1.0f == bx::pow(0.0f, 0.0f) );
STATIC_REQUIRE(1.0f == bx::pow(1.0f, 0.0f) );
STATIC_REQUIRE(1.0f == bx::pow(3.0f, 0.0f) );
STATIC_REQUIRE(1.0f == bx::pow(8.0f, 0.0f) );
STATIC_REQUIRE(1.0f == bx::pow(9.0f, 0.0f) );
STATIC_REQUIRE(0.0f == bx::pow(0.0f, 2.0f) );
REQUIRE( 4.0f == bx::pow( 2.0f, 2.0f) );
REQUIRE( -4.0f == bx::pow(-2.0f, 2.0f) );
REQUIRE( 0.25f == bx::pow( 2.0f, -2.0f) );
REQUIRE( -0.25f == bx::pow(-2.0f, -2.0f) );
REQUIRE( 8.0f == bx::pow( 2.0f, 3.0f) );
REQUIRE( -8.0f == bx::pow(-2.0f, 3.0f) );
REQUIRE( 0.125f == bx::pow( 2.0f, -3.0f) );
REQUIRE(-0.125f == bx::pow(-2.0f, -3.0f) );
STATIC_REQUIRE( 4.0f == bx::pow( 2.0f, 2.0f) );
STATIC_REQUIRE( -4.0f == bx::pow(-2.0f, 2.0f) );
STATIC_REQUIRE( 0.25f == bx::pow( 2.0f, -2.0f) );
STATIC_REQUIRE( -0.25f == bx::pow(-2.0f, -2.0f) );
STATIC_REQUIRE( 8.0f == bx::pow( 2.0f, 3.0f) );
STATIC_REQUIRE( -8.0f == bx::pow(-2.0f, 3.0f) );
STATIC_REQUIRE( 0.125f == bx::pow( 2.0f, -3.0f) );
STATIC_REQUIRE(-0.125f == bx::pow(-2.0f, -3.0f) );
bx::WriterI* writer = bx::getNullOut();
bx::Error err;
@@ -548,6 +556,9 @@ TEST_CASE("pow", "[math][libm]")
TEST_CASE("asin", "[math][libm]")
{
STATIC_REQUIRE(bx::isEqual( 0.0f, bx::asin(0.0f), 0.0001f) );
STATIC_REQUIRE(bx::isEqual(bx::kPiHalf, bx::asin(1.0f), 0.0001f) );
bx::WriterI* writer = bx::getNullOut();
bx::Error err;
@@ -561,6 +572,11 @@ TEST_CASE("asin", "[math][libm]")
TEST_CASE("sin", "[math][libm]")
{
STATIC_REQUIRE(bx::isEqual( 0.0f, bx::sin(0.0f ), 0.0000001f) );
STATIC_REQUIRE(bx::isEqual( 1.0f, bx::sin(bx::kPiHalf ), 0.0000001f) );
STATIC_REQUIRE(bx::isEqual( 0.0f, bx::sin(bx::kPi ), 0.0000001f) );
STATIC_REQUIRE(bx::isEqual(-1.0f, bx::sin(bx::kPiHalf*3.0f), 0.0000001f) );
bx::WriterI* writer = bx::getNullOut();
bx::Error err;
@@ -624,6 +640,9 @@ TEST_CASE("sinh", "[math][libm]")
TEST_CASE("acos", "[math][libm]")
{
STATIC_REQUIRE(bx::isEqual(bx::kPiHalf, bx::acos(0.0f), 0.0001f) );
STATIC_REQUIRE(bx::isEqual( 0.0f, bx::acos(1.0f), 0.0001f) );
bx::WriterI* writer = bx::getNullOut();
bx::Error err;
@@ -637,6 +656,11 @@ TEST_CASE("acos", "[math][libm]")
TEST_CASE("cos", "[math][libm]")
{
STATIC_REQUIRE(bx::isEqual( 1.0f, bx::cos(0.0f ), 0.0000001f) );
STATIC_REQUIRE(bx::isEqual( 0.0f, bx::cos(bx::kPiHalf ), 0.0000001f) );
STATIC_REQUIRE(bx::isEqual(-1.0f, bx::cos(bx::kPi ), 0.0000001f) );
STATIC_REQUIRE(bx::isEqual( 0.0f, bx::cos(bx::kPiHalf*3.0f), 0.0000001f) );
bx::WriterI* writer = bx::getNullOut();
bx::Error err;