This commit is contained in:
Бранимир Караџић
2023-04-21 19:04:07 -07:00
parent 5a9987eeba
commit dd4c9427a4
5 changed files with 70 additions and 30 deletions

View File

@@ -50,12 +50,51 @@ namespace bx
///
constexpr float kNearZero = 1.0f/float(1 << 28);
///
constexpr uint8_t kHalfSignNumBits = 1;
constexpr uint8_t kHalfSignBitShift = 15;
constexpr uint16_t kHalfSignMask = UINT16_C(0x8000);
constexpr uint8_t kHalfExponentNumBits = 5;
constexpr uint8_t kHalfExponentBitShift = 10;
constexpr uint16_t kHalfExponentMask = UINT16_C(0x7c00);
constexpr uint32_t kHalfExponentBias = 15;
constexpr uint8_t kHalfMantissaNumBits = 10;
constexpr uint8_t kHalfMantissaBitShift = 0;
constexpr uint16_t kHalfMantissaMask = UINT16_C(0x03ff);
///
constexpr uint8_t kFloatSignNumBits = 1;
constexpr uint8_t kFloatSignBitShift = 31;
constexpr uint32_t kFloatSignMask = UINT32_C(0x80000000);
constexpr uint8_t kFloatExponentNumBits = 8;
constexpr uint8_t kFloatExponentBitShift = 23;
constexpr uint32_t kFloatExponentMask = UINT32_C(0x7f800000);
constexpr uint32_t kFloatExponentBias = 127;
constexpr uint8_t kFloatMantissaNumBits = 23;
constexpr uint8_t kFloatMantissaBitShift = 0;
constexpr uint32_t kFloatMantissaMask = UINT32_C(0x007fffff);
/// Smallest normalized positive floating-point number.
constexpr float kFloatSmallest = 1.175494351e-38f;
/// Maximum representable floating-point number.
constexpr float kFloatLargest = 3.402823466e+38f;
///
extern const float kFloatInfinity;
///
constexpr uint8_t kDoubleSignNumBits = 1;
constexpr uint8_t kDoubleSignBitShift = 63;
constexpr uint64_t kDoubleSignMask = UINT64_C(0x8000000000000000);
constexpr uint8_t kDoubleExponentNumBits = 11;
constexpr uint8_t kDoubleExponentShift = 52;
constexpr uint64_t kDoubleExponentMask = UINT64_C(0x7ff0000000000000);
constexpr uint32_t kDoubleExponentBias = 1023;
constexpr uint8_t kDoubleMantissaNumBits = 52;
constexpr uint8_t kDoubleMantissaShift = 0;
constexpr uint64_t kDoubleMantissaMask = UINT64_C(0x000fffffffffffff);
/// Smallest normalized positive double-precision floating-point number.
constexpr double kDoubleSmallest = 2.2250738585072014e-308;
@@ -63,7 +102,7 @@ namespace bx
constexpr double kDoubleLargest = 1.7976931348623158e+308;
///
extern const float kInfinity;
extern const double kDoubleInfinity;
} // namespace bx

View File

@@ -55,7 +55,7 @@ namespace bx
//
const uint32_t tmp0 = uint32_sra(_value, 31);
const uint32_t tmp1 = uint32_neg(tmp0);
const uint32_t mask = uint32_or(tmp1, 0x80000000);
const uint32_t mask = uint32_or(tmp1, kFloatSignMask);
const uint32_t result = uint32_xor(_value, mask);
return result;
}
@@ -63,37 +63,37 @@ namespace bx
inline BX_CONST_FUNC bool isNan(float _f)
{
const uint32_t tmp = floatToBits(_f) & INT32_MAX;
return tmp > UINT32_C(0x7f800000);
return tmp > kFloatExponentMask;
}
inline BX_CONST_FUNC bool isNan(double _f)
{
const uint64_t tmp = doubleToBits(_f) & INT64_MAX;
return tmp > UINT64_C(0x7ff0000000000000);
return tmp > kDoubleExponentMask;
}
inline BX_CONST_FUNC bool isFinite(float _f)
{
const uint32_t tmp = floatToBits(_f) & INT32_MAX;
return tmp < UINT32_C(0x7f800000);
return tmp < kFloatExponentMask;
}
inline BX_CONST_FUNC bool isFinite(double _f)
{
const uint64_t tmp = doubleToBits(_f) & INT64_MAX;
return tmp < UINT64_C(0x7ff0000000000000);
return tmp < kDoubleExponentMask;
}
inline BX_CONST_FUNC bool isInfinite(float _f)
{
const uint32_t tmp = floatToBits(_f) & INT32_MAX;
return tmp == UINT32_C(0x7f800000);
return tmp == kFloatExponentMask;
}
inline BX_CONST_FUNC bool isInfinite(double _f)
{
const uint64_t tmp = doubleToBits(_f) & INT64_MAX;
return tmp == UINT64_C(0x7ff0000000000000);
return tmp == kDoubleExponentMask;
}
inline BX_CONSTEXPR_FUNC float floor(float _a)
@@ -214,7 +214,7 @@ namespace bx
{
if (_a < kNearZero)
{
return kInfinity;
return kFloatInfinity;
}
return pow(_a, -0.5f);

View File

@@ -384,14 +384,14 @@ namespace bx
template<typename Ty>
BX_SIMD_INLINE Ty simd_log2_ni(Ty _a)
{
const Ty expmask = simd_isplat<Ty>(0x7f800000);
const Ty mantmask = simd_isplat<Ty>(0x007fffff);
const Ty expmask = simd_isplat<Ty>(kFloatExponentMask);
const Ty mantmask = simd_isplat<Ty>(kFloatMantissaMask);
const Ty one = simd_splat<Ty>(1.0f);
const Ty c127 = simd_isplat<Ty>(127);
const Ty expbias = simd_isplat<Ty>(kFloatExponentBias);
const Ty aexp = simd_and(_a, expmask);
const Ty aexpsr = simd_srl(aexp, 23);
const Ty tmp0 = simd_isub(aexpsr, c127);
const Ty aexpsr = simd_srl(aexp, kFloatExponentBitShift);
const Ty tmp0 = simd_isub(aexpsr, expbias);
const Ty exp = simd_itof(tmp0);
const Ty amask = simd_and(_a, mantmask);
@@ -419,8 +419,8 @@ namespace bx
const Ty iround = simd_itof(ipart);
const Ty fpart = simd_sub(aaaa, iround);
const Ty c127 = simd_isplat<Ty>(127);
const Ty tmp5 = simd_iadd(ipart, c127);
const Ty expbias = simd_isplat<Ty>(kFloatExponentBias);
const Ty tmp5 = simd_iadd(ipart, expbias);
const Ty expipart = simd_sll(tmp5, 23);
const Ty expfpart = simd_logexp_detail::simd_exppoly(fpart);

View File

@@ -778,15 +778,15 @@ namespace bx
ftou.flt = _a;
const uint32_t one = uint32_li(0x00000001);
const uint32_t f_s_mask = uint32_li(0x80000000);
const uint32_t f_e_mask = uint32_li(0x7f800000);
const uint32_t f_m_mask = uint32_li(0x007fffff);
const uint32_t f_s_mask = uint32_li(kFloatSignMask);
const uint32_t f_e_mask = uint32_li(kFloatExponentMask);
const uint32_t f_m_mask = uint32_li(kFloatMantissaMask);
const uint32_t f_m_hidden_bit = uint32_li(0x00800000);
const uint32_t f_m_round_bit = uint32_li(0x00001000);
const uint32_t f_snan_mask = uint32_li(0x7fc00000);
const uint32_t f_e_pos = uint32_li(0x00000017);
const uint32_t h_e_pos = uint32_li(0x0000000a);
const uint32_t h_e_mask = uint32_li(0x00007c00);
const uint32_t h_e_mask = uint32_li(kHalfExponentMask);
const uint32_t h_snan_mask = uint32_li(0x00007e00);
const uint32_t h_e_mask_value = uint32_li(0x0000001f);
const uint32_t f_h_s_pos_offset = uint32_li(0x00000010);
@@ -841,14 +841,14 @@ namespace bx
inline BX_CONST_FUNC float halfToFloat(uint16_t _a)
{
const uint32_t h_e_mask = uint32_li(0x00007c00);
const uint32_t h_m_mask = uint32_li(0x000003ff);
const uint32_t h_s_mask = uint32_li(0x00008000);
const uint32_t h_e_mask = uint32_li(kHalfExponentMask);
const uint32_t h_m_mask = uint32_li(kHalfMantissaMask);
const uint32_t h_s_mask = uint32_li(kHalfSignMask);
const uint32_t h_f_s_pos_offset = uint32_li(0x00000010);
const uint32_t h_f_e_pos_offset = uint32_li(0x0000000d);
const uint32_t h_f_bias_offset = uint32_li(0x0001c000);
const uint32_t f_e_mask = uint32_li(0x7f800000);
const uint32_t f_m_mask = uint32_li(0x007fffff);
const uint32_t f_e_mask = uint32_li(kFloatExponentMask);
const uint32_t f_m_mask = uint32_li(kFloatMantissaMask);
const uint32_t h_f_e_denorm_bias = uint32_li(0x0000007e);
const uint32_t h_f_m_denorm_sa_bias = uint32_li(0x00000008);
const uint32_t f_e_pos = uint32_li(0x00000017);

View File

@@ -10,7 +10,8 @@
namespace bx
{
const float kInfinity = bitsToFloat(UINT32_C(0x7f800000) );
const float kFloatInfinity = bitsToFloat(kFloatExponentMask);
const double kDoubleInfinity = bitsToDouble(kDoubleExponentMask);
namespace
{
@@ -135,11 +136,11 @@ namespace bx
BX_CONST_FUNC float ldexp(float _a, int32_t _b)
{
const uint32_t ftob = floatToBits(_a);
const uint32_t masked = uint32_and(ftob, UINT32_C(0xff800000) );
const uint32_t masked = uint32_and(ftob, kFloatSignMask | kFloatExponentMask);
const uint32_t expsign0 = uint32_sra(masked, 23);
const uint32_t tmp = uint32_iadd(expsign0, _b);
const uint32_t expsign1 = uint32_sll(tmp, 23);
const uint32_t mantissa = uint32_and(ftob, UINT32_C(0x007fffff) );
const uint32_t mantissa = uint32_and(ftob, kFloatMantissaMask);
const uint32_t bits = uint32_or(mantissa, expsign1);
const float result = bitsToFloat(bits);
@@ -149,9 +150,9 @@ namespace bx
float frexp(float _a, int32_t* _outExp)
{
const uint32_t ftob = floatToBits(_a);
const uint32_t masked0 = uint32_and(ftob, UINT32_C(0x7f800000) );
const uint32_t masked0 = uint32_and(ftob, kFloatExponentMask);
const uint32_t exp0 = uint32_srl(masked0, 23);
const uint32_t masked1 = uint32_and(ftob, UINT32_C(0x807fffff) );
const uint32_t masked1 = uint32_and(ftob, kFloatSignMask | kFloatMantissaMask);
const uint32_t bits = uint32_or(masked1, UINT32_C(0x3f000000) );
const float result = bitsToFloat(bits);