diff --git a/include/bx/inline/uint32_t.inl b/include/bx/inline/uint32_t.inl index 4796f8a..3a63ffb 100644 --- a/include/bx/inline/uint32_t.inl +++ b/include/bx/inline/uint32_t.inl @@ -29,162 +29,162 @@ namespace bx { - inline uint32_t uint32_li(uint32_t _a) + inline constexpr BX_CONST_FUNC uint32_t uint32_li(uint32_t _a) { return _a; } - inline uint32_t uint32_dec(uint32_t _a) + inline constexpr BX_CONST_FUNC uint32_t uint32_dec(uint32_t _a) { return _a - 1; } - inline uint32_t uint32_inc(uint32_t _a) + inline constexpr BX_CONST_FUNC uint32_t uint32_inc(uint32_t _a) { return _a + 1; } - inline uint32_t uint32_not(uint32_t _a) + inline constexpr BX_CONST_FUNC uint32_t uint32_not(uint32_t _a) { return ~_a; } - inline uint32_t uint32_neg(uint32_t _a) + inline constexpr BX_CONST_FUNC uint32_t uint32_neg(uint32_t _a) { return -(int32_t)_a; } - inline uint32_t uint32_ext(uint32_t _a) + inline constexpr BX_CONST_FUNC uint32_t uint32_ext(uint32_t _a) { return ( (int32_t)_a)>>31; } - inline uint32_t uint32_and(uint32_t _a, uint32_t _b) + inline constexpr BX_CONST_FUNC uint32_t uint32_and(uint32_t _a, uint32_t _b) { return _a & _b; } - inline uint32_t uint32_andc(uint32_t _a, uint32_t _b) + inline constexpr BX_CONST_FUNC uint32_t uint32_andc(uint32_t _a, uint32_t _b) { return _a & ~_b; } - inline uint32_t uint32_xor(uint32_t _a, uint32_t _b) + inline constexpr BX_CONST_FUNC uint32_t uint32_xor(uint32_t _a, uint32_t _b) { return _a ^ _b; } - inline uint32_t uint32_xorl(uint32_t _a, uint32_t _b) + inline constexpr BX_CONST_FUNC uint32_t uint32_xorl(uint32_t _a, uint32_t _b) { return !_a != !_b; } - inline uint32_t uint32_or(uint32_t _a, uint32_t _b) + inline constexpr BX_CONST_FUNC uint32_t uint32_or(uint32_t _a, uint32_t _b) { return _a | _b; } - inline uint32_t uint32_orc(uint32_t _a, uint32_t _b) + inline constexpr BX_CONST_FUNC uint32_t uint32_orc(uint32_t _a, uint32_t _b) { return _a | ~_b; } - inline uint32_t uint32_sll(uint32_t _a, int _sa) + inline constexpr BX_CONST_FUNC uint32_t uint32_sll(uint32_t _a, int32_t _sa) { return _a << _sa; } - inline uint32_t uint32_srl(uint32_t _a, int _sa) + inline constexpr BX_CONST_FUNC uint32_t uint32_srl(uint32_t _a, int32_t _sa) { return _a >> _sa; } - inline uint32_t uint32_sra(uint32_t _a, int _sa) + inline constexpr BX_CONST_FUNC uint32_t uint32_sra(uint32_t _a, int32_t _sa) { return ( (int32_t)_a) >> _sa; } - inline uint32_t uint32_rol(uint32_t _a, int _sa) + inline constexpr BX_CONST_FUNC uint32_t uint32_rol(uint32_t _a, int32_t _sa) { return ( _a << _sa) | (_a >> (32-_sa) ); } - inline uint32_t uint32_ror(uint32_t _a, int _sa) + inline constexpr BX_CONST_FUNC uint32_t uint32_ror(uint32_t _a, int32_t _sa) { return ( _a >> _sa) | (_a << (32-_sa) ); } - inline uint32_t uint32_add(uint32_t _a, uint32_t _b) + inline constexpr BX_CONST_FUNC uint32_t uint32_add(uint32_t _a, uint32_t _b) { return _a + _b; } - inline uint32_t uint32_iadd(uint32_t _a, uint32_t _b) + inline constexpr BX_CONST_FUNC uint32_t uint32_iadd(uint32_t _a, uint32_t _b) { return int32_t(_a) + int32_t(_b); } - inline uint32_t uint32_sub(uint32_t _a, uint32_t _b) + inline constexpr BX_CONST_FUNC uint32_t uint32_sub(uint32_t _a, uint32_t _b) { return _a - _b; } - inline uint32_t uint32_isub(uint32_t _a, uint32_t _b) + inline constexpr BX_CONST_FUNC uint32_t uint32_isub(uint32_t _a, uint32_t _b) { return int32_t(_a) - int32_t(_b); } - inline uint32_t uint32_mul(uint32_t _a, uint32_t _b) + inline constexpr BX_CONST_FUNC uint32_t uint32_mul(uint32_t _a, uint32_t _b) { return _a * _b; } - inline uint32_t uint32_div(uint32_t _a, uint32_t _b) + inline constexpr BX_CONST_FUNC uint32_t uint32_div(uint32_t _a, uint32_t _b) { - return (_a / _b); + return _a / _b; } - inline uint32_t uint32_mod(uint32_t _a, uint32_t _b) + inline constexpr BX_CONST_FUNC uint32_t uint32_mod(uint32_t _a, uint32_t _b) { - return (_a % _b); + return _a % _b; } - inline uint32_t uint32_cmpeq(uint32_t _a, uint32_t _b) + inline constexpr BX_CONST_FUNC uint32_t uint32_cmpeq(uint32_t _a, uint32_t _b) { return -(_a == _b); } - inline uint32_t uint32_cmpneq(uint32_t _a, uint32_t _b) + inline constexpr BX_CONST_FUNC uint32_t uint32_cmpneq(uint32_t _a, uint32_t _b) { return -(_a != _b); } - inline uint32_t uint32_cmplt(uint32_t _a, uint32_t _b) + inline constexpr BX_CONST_FUNC uint32_t uint32_cmplt(uint32_t _a, uint32_t _b) { return -(_a < _b); } - inline uint32_t uint32_cmple(uint32_t _a, uint32_t _b) + inline constexpr BX_CONST_FUNC uint32_t uint32_cmple(uint32_t _a, uint32_t _b) { return -(_a <= _b); } - inline uint32_t uint32_cmpgt(uint32_t _a, uint32_t _b) + inline constexpr BX_CONST_FUNC uint32_t uint32_cmpgt(uint32_t _a, uint32_t _b) { return -(_a > _b); } - inline uint32_t uint32_cmpge(uint32_t _a, uint32_t _b) + inline constexpr BX_CONST_FUNC uint32_t uint32_cmpge(uint32_t _a, uint32_t _b) { return -(_a >= _b); } - inline uint32_t uint32_setnz(uint32_t _a) + inline constexpr BX_CONST_FUNC uint32_t uint32_setnz(uint32_t _a) { return -!!_a; } - inline uint32_t uint32_satadd(uint32_t _a, uint32_t _b) + inline constexpr BX_CONST_FUNC uint32_t uint32_satadd(uint32_t _a, uint32_t _b) { const uint32_t add = uint32_add(_a, _b); const uint32_t lt = uint32_cmplt(add, _a); @@ -193,7 +193,7 @@ namespace bx return result; } - inline uint32_t uint32_satsub(uint32_t _a, uint32_t _b) + inline constexpr BX_CONST_FUNC uint32_t uint32_satsub(uint32_t _a, uint32_t _b) { const uint32_t sub = uint32_sub(_a, _b); const uint32_t le = uint32_cmple(sub, _a); @@ -202,7 +202,7 @@ namespace bx return result; } - inline uint32_t uint32_satmul(uint32_t _a, uint32_t _b) + inline constexpr BX_CONST_FUNC uint32_t uint32_satmul(uint32_t _a, uint32_t _b) { const uint64_t mul = (uint64_t)_a * (uint64_t)_b; const uint32_t hi = mul >> 32; @@ -212,7 +212,7 @@ namespace bx return result; } - inline uint32_t uint32_sels(uint32_t test, uint32_t _a, uint32_t _b) + inline constexpr BX_CONST_FUNC uint32_t uint32_sels(uint32_t test, uint32_t _a, uint32_t _b) { const uint32_t mask = uint32_ext(test); const uint32_t sel_a = uint32_and(_a, mask); @@ -222,7 +222,7 @@ namespace bx return (result); } - inline uint32_t uint32_selb(uint32_t _mask, uint32_t _a, uint32_t _b) + inline constexpr BX_CONST_FUNC uint32_t uint32_selb(uint32_t _mask, uint32_t _a, uint32_t _b) { const uint32_t sel_a = uint32_and(_a, _mask); const uint32_t sel_b = uint32_andc(_b, _mask); @@ -231,7 +231,7 @@ namespace bx return (result); } - inline uint32_t uint32_imin(uint32_t _a, uint32_t _b) + inline constexpr BX_CONST_FUNC uint32_t uint32_imin(uint32_t _a, uint32_t _b) { const uint32_t a_sub_b = uint32_sub(_a, _b); const uint32_t result = uint32_sels(a_sub_b, _a, _b); @@ -239,7 +239,7 @@ namespace bx return result; } - inline uint32_t uint32_imax(uint32_t _a, uint32_t _b) + inline constexpr BX_CONST_FUNC uint32_t uint32_imax(uint32_t _a, uint32_t _b) { const uint32_t b_sub_a = uint32_sub(_b, _a); const uint32_t result = uint32_sels(b_sub_a, _a, _b); @@ -247,27 +247,27 @@ namespace bx return result; } - inline uint32_t uint32_min(uint32_t _a, uint32_t _b) + inline constexpr BX_CONST_FUNC uint32_t uint32_min(uint32_t _a, uint32_t _b) { return _a > _b ? _b : _a; } - inline uint32_t uint32_min(uint32_t _a, uint32_t _b, uint32_t _c) + inline constexpr BX_CONST_FUNC uint32_t uint32_min(uint32_t _a, uint32_t _b, uint32_t _c) { return uint32_min(_a, uint32_min(_b, _c) ); } - inline uint32_t uint32_max(uint32_t _a, uint32_t _b) + inline constexpr BX_CONST_FUNC uint32_t uint32_max(uint32_t _a, uint32_t _b) { return _a > _b ? _a : _b; } - inline uint32_t uint32_max(uint32_t _a, uint32_t _b, uint32_t _c) + inline constexpr BX_CONST_FUNC uint32_t uint32_max(uint32_t _a, uint32_t _b, uint32_t _c) { return uint32_max(_a, uint32_max(_b, _c) ); } - inline uint32_t uint32_clamp(uint32_t _a, uint32_t _min, uint32_t _max) + inline constexpr BX_CONST_FUNC uint32_t uint32_clamp(uint32_t _a, uint32_t _min, uint32_t _max) { const uint32_t tmp = uint32_max(_a, _min); const uint32_t result = uint32_min(tmp, _max); @@ -275,7 +275,7 @@ namespace bx return result; } - inline uint32_t uint32_iclamp(uint32_t _a, uint32_t _min, uint32_t _max) + inline constexpr BX_CONST_FUNC uint32_t uint32_iclamp(uint32_t _a, uint32_t _min, uint32_t _max) { const uint32_t tmp = uint32_imax(_a, _min); const uint32_t result = uint32_imin(tmp, _max); @@ -283,7 +283,7 @@ namespace bx return result; } - inline uint32_t uint32_incwrap(uint32_t _val, uint32_t _min, uint32_t _max) + inline constexpr BX_CONST_FUNC uint32_t uint32_incwrap(uint32_t _val, uint32_t _min, uint32_t _max) { const uint32_t inc = uint32_inc(_val); const uint32_t max_diff = uint32_sub(_max, _val); @@ -295,7 +295,7 @@ namespace bx return result; } - inline uint32_t uint32_decwrap(uint32_t _val, uint32_t _min, uint32_t _max) + inline constexpr BX_CONST_FUNC uint32_t uint32_decwrap(uint32_t _val, uint32_t _min, uint32_t _max) { const uint32_t dec = uint32_dec(_val); const uint32_t min_diff = uint32_sub(_min, _val); @@ -307,7 +307,7 @@ namespace bx return result; } - inline uint32_t uint32_cntbits_ref(uint32_t _val) + inline constexpr BX_CONST_FUNC uint32_t uint32_cntbits(uint32_t _val) { const uint32_t tmp0 = uint32_srl(_val, 1); const uint32_t tmp1 = uint32_and(tmp0, 0x55555555); @@ -330,19 +330,7 @@ namespace bx return result; } - /// Count number of bits set. - inline uint32_t uint32_cntbits(uint32_t _val) - { -#if BX_COMPILER_GCC || BX_COMPILER_CLANG - return __builtin_popcount(_val); -#elif BX_COMPILER_MSVC && BX_PLATFORM_WINDOWS - return __popcnt(_val); -#else - return uint32_cntbits_ref(_val); -#endif // BX_COMPILER_ - } - - inline uint32_t uint32_cntlz_ref(uint32_t _val) + inline constexpr BX_CONST_FUNC uint32_t uint32_cntlz(uint32_t _val) { const uint32_t tmp0 = uint32_srl(_val, 1); const uint32_t tmp1 = uint32_or(tmp0, _val); @@ -360,21 +348,7 @@ namespace bx return result; } - /// Count number of leading zeros. - inline uint32_t uint32_cntlz(uint32_t _val) - { -#if BX_COMPILER_GCC || BX_COMPILER_CLANG - return __builtin_clz(_val); -#elif BX_COMPILER_MSVC && BX_PLATFORM_WINDOWS - unsigned long index; - _BitScanReverse(&index, _val); - return 31 - index; -#else - return uint32_cntlz_ref(_val); -#endif // BX_COMPILER_ - } - - inline uint32_t uint32_cnttz_ref(uint32_t _val) + inline constexpr BX_CONST_FUNC uint32_t uint32_cnttz(uint32_t _val) { const uint32_t tmp0 = uint32_not(_val); const uint32_t tmp1 = uint32_dec(_val); @@ -384,25 +358,13 @@ namespace bx return result; } - inline uint32_t uint32_cnttz(uint32_t _val) + inline constexpr BX_CONST_FUNC uint32_t uint32_part1by1(uint32_t _a) { -#if BX_COMPILER_GCC || BX_COMPILER_CLANG - return __builtin_ctz(_val); -#elif BX_COMPILER_MSVC && BX_PLATFORM_WINDOWS - unsigned long index; - _BitScanForward(&index, _val); - return index; -#else - return uint32_cnttz_ref(_val); -#endif // BX_COMPILER_ - } + // shuffle: + // ---- ---- ---- ---- fedc ba98 7654 3210 + // to: + // -f-e -d-c -b-a -9-8 -7-6 -5-4 -3-2 -1-0 - // shuffle: - // ---- ---- ---- ---- fedc ba98 7654 3210 - // to: - // -f-e -d-c -b-a -9-8 -7-6 -5-4 -3-2 -1-0 - inline uint32_t uint32_part1by1(uint32_t _a) - { const uint32_t val = uint32_and(_a, 0xffff); const uint32_t tmp0 = uint32_sll(val, 8); @@ -424,12 +386,13 @@ namespace bx return result; } - // shuffle: - // ---- ---- ---- ---- ---- --98 7654 3210 - // to: - // ---- 9--8 --7- -6-- 5--4 --3- -2-- 1--0 - inline uint32_t uint32_part1by2(uint32_t _a) + inline constexpr BX_CONST_FUNC uint32_t uint32_part1by2(uint32_t _a) { + // shuffle: + // ---- ---- ---- ---- ---- --98 7654 3210 + // to: + // ---- 9--8 --7- -6-- 5--4 --3- -2-- 1--0 + const uint32_t val = uint32_and(_a, 0x3ff); const uint32_t tmp0 = uint32_sll(val, 16); @@ -451,7 +414,7 @@ namespace bx return result; } - inline uint32_t uint32_testpow2(uint32_t _a) + inline constexpr BX_CONST_FUNC uint32_t uint32_testpow2(uint32_t _a) { const uint32_t tmp0 = uint32_dec(_a); const uint32_t tmp1 = uint32_xor(_a, tmp0); @@ -461,7 +424,7 @@ namespace bx return result; } - inline uint32_t uint32_nextpow2(uint32_t _a) + inline constexpr BX_CONST_FUNC uint32_t uint32_nextpow2(uint32_t _a) { const uint32_t tmp0 = uint32_dec(_a); const uint32_t tmp1 = uint32_srl(tmp0, 1); @@ -479,123 +442,7 @@ namespace bx return result; } - inline uint16_t halfFromFloat(float _a) - { - union { uint32_t ui; float flt; } ftou; - ftou.flt = _a; - - const uint32_t one = uint32_li(0x00000001); - const uint32_t f_s_mask = uint32_li(0x80000000); - const uint32_t f_e_mask = uint32_li(0x7f800000); - const uint32_t f_m_mask = uint32_li(0x007fffff); - const uint32_t f_m_hidden_bit = uint32_li(0x00800000); - const uint32_t f_m_round_bit = uint32_li(0x00001000); - const uint32_t f_snan_mask = uint32_li(0x7fc00000); - const uint32_t f_e_pos = uint32_li(0x00000017); - const uint32_t h_e_pos = uint32_li(0x0000000a); - const uint32_t h_e_mask = uint32_li(0x00007c00); - const uint32_t h_snan_mask = uint32_li(0x00007e00); - const uint32_t h_e_mask_value = uint32_li(0x0000001f); - const uint32_t f_h_s_pos_offset = uint32_li(0x00000010); - const uint32_t f_h_bias_offset = uint32_li(0x00000070); - const uint32_t f_h_m_pos_offset = uint32_li(0x0000000d); - const uint32_t h_nan_min = uint32_li(0x00007c01); - const uint32_t f_h_e_biased_flag = uint32_li(0x0000008f); - const uint32_t f_s = uint32_and(ftou.ui, f_s_mask); - const uint32_t f_e = uint32_and(ftou.ui, f_e_mask); - const uint16_t h_s = (uint16_t)uint32_srl(f_s, f_h_s_pos_offset); - const uint32_t f_m = uint32_and(ftou.ui, f_m_mask); - const uint16_t f_e_amount = (uint16_t)uint32_srl(f_e, f_e_pos); - const uint32_t f_e_half_bias = uint32_sub(f_e_amount, f_h_bias_offset); - const uint32_t f_snan = uint32_and(ftou.ui, f_snan_mask); - const uint32_t f_m_round_mask = uint32_and(f_m, f_m_round_bit); - const uint32_t f_m_round_offset = uint32_sll(f_m_round_mask, one); - const uint32_t f_m_rounded = uint32_add(f_m, f_m_round_offset); - const uint32_t f_m_denorm_sa = uint32_sub(one, f_e_half_bias); - const uint32_t f_m_with_hidden = uint32_or(f_m_rounded, f_m_hidden_bit); - const uint32_t f_m_denorm = uint32_srl(f_m_with_hidden, f_m_denorm_sa); - const uint32_t h_m_denorm = uint32_srl(f_m_denorm, f_h_m_pos_offset); - const uint32_t f_m_rounded_overflow = uint32_and(f_m_rounded, f_m_hidden_bit); - const uint32_t m_nan = uint32_srl(f_m, f_h_m_pos_offset); - const uint32_t h_em_nan = uint32_or(h_e_mask, m_nan); - const uint32_t h_e_norm_overflow_offset = uint32_inc(f_e_half_bias); - const uint32_t h_e_norm_overflow = uint32_sll(h_e_norm_overflow_offset, h_e_pos); - const uint32_t h_e_norm = uint32_sll(f_e_half_bias, h_e_pos); - const uint32_t h_m_norm = uint32_srl(f_m_rounded, f_h_m_pos_offset); - const uint32_t h_em_norm = uint32_or(h_e_norm, h_m_norm); - const uint32_t is_h_ndenorm_msb = uint32_sub(f_h_bias_offset, f_e_amount); - const uint32_t is_f_e_flagged_msb = uint32_sub(f_h_e_biased_flag, f_e_half_bias); - const uint32_t is_h_denorm_msb = uint32_not(is_h_ndenorm_msb); - const uint32_t is_f_m_eqz_msb = uint32_dec(f_m); - const uint32_t is_h_nan_eqz_msb = uint32_dec(m_nan); - const uint32_t is_f_inf_msb = uint32_and(is_f_e_flagged_msb, is_f_m_eqz_msb); - const uint32_t is_f_nan_underflow_msb = uint32_and(is_f_e_flagged_msb, is_h_nan_eqz_msb); - const uint32_t is_e_overflow_msb = uint32_sub(h_e_mask_value, f_e_half_bias); - const uint32_t is_h_inf_msb = uint32_or(is_e_overflow_msb, is_f_inf_msb); - const uint32_t is_f_nsnan_msb = uint32_sub(f_snan, f_snan_mask); - const uint32_t is_m_norm_overflow_msb = uint32_neg(f_m_rounded_overflow); - const uint32_t is_f_snan_msb = uint32_not(is_f_nsnan_msb); - const uint32_t h_em_overflow_result = uint32_sels(is_m_norm_overflow_msb, h_e_norm_overflow, h_em_norm); - const uint32_t h_em_nan_result = uint32_sels(is_f_e_flagged_msb, h_em_nan, h_em_overflow_result); - const uint32_t h_em_nan_underflow_result = uint32_sels(is_f_nan_underflow_msb, h_nan_min, h_em_nan_result); - const uint32_t h_em_inf_result = uint32_sels(is_h_inf_msb, h_e_mask, h_em_nan_underflow_result); - const uint32_t h_em_denorm_result = uint32_sels(is_h_denorm_msb, h_m_denorm, h_em_inf_result); - const uint32_t h_em_snan_result = uint32_sels(is_f_snan_msb, h_snan_mask, h_em_denorm_result); - const uint32_t h_result = uint32_or(h_s, h_em_snan_result); - - return (uint16_t)(h_result); - } - - inline float halfToFloat(uint16_t _a) - { - const uint32_t h_e_mask = uint32_li(0x00007c00); - const uint32_t h_m_mask = uint32_li(0x000003ff); - const uint32_t h_s_mask = uint32_li(0x00008000); - const uint32_t h_f_s_pos_offset = uint32_li(0x00000010); - const uint32_t h_f_e_pos_offset = uint32_li(0x0000000d); - const uint32_t h_f_bias_offset = uint32_li(0x0001c000); - const uint32_t f_e_mask = uint32_li(0x7f800000); - const uint32_t f_m_mask = uint32_li(0x007fffff); - const uint32_t h_f_e_denorm_bias = uint32_li(0x0000007e); - const uint32_t h_f_m_denorm_sa_bias = uint32_li(0x00000008); - const uint32_t f_e_pos = uint32_li(0x00000017); - const uint32_t h_e_mask_minus_one = uint32_li(0x00007bff); - const uint32_t h_e = uint32_and(_a, h_e_mask); - const uint32_t h_m = uint32_and(_a, h_m_mask); - const uint32_t h_s = uint32_and(_a, h_s_mask); - const uint32_t h_e_f_bias = uint32_add(h_e, h_f_bias_offset); - const uint32_t h_m_nlz = uint32_cntlz(h_m); - const uint32_t f_s = uint32_sll(h_s, h_f_s_pos_offset); - const uint32_t f_e = uint32_sll(h_e_f_bias, h_f_e_pos_offset); - const uint32_t f_m = uint32_sll(h_m, h_f_e_pos_offset); - const uint32_t f_em = uint32_or(f_e, f_m); - const uint32_t h_f_m_sa = uint32_sub(h_m_nlz, h_f_m_denorm_sa_bias); - const uint32_t f_e_denorm_unpacked = uint32_sub(h_f_e_denorm_bias, h_f_m_sa); - const uint32_t h_f_m = uint32_sll(h_m, h_f_m_sa); - const uint32_t f_m_denorm = uint32_and(h_f_m, f_m_mask); - const uint32_t f_e_denorm = uint32_sll(f_e_denorm_unpacked, f_e_pos); - const uint32_t f_em_denorm = uint32_or(f_e_denorm, f_m_denorm); - const uint32_t f_em_nan = uint32_or(f_e_mask, f_m); - const uint32_t is_e_eqz_msb = uint32_dec(h_e); - const uint32_t is_m_nez_msb = uint32_neg(h_m); - const uint32_t is_e_flagged_msb = uint32_sub(h_e_mask_minus_one, h_e); - const uint32_t is_zero_msb = uint32_andc(is_e_eqz_msb, is_m_nez_msb); - const uint32_t is_inf_msb = uint32_andc(is_e_flagged_msb, is_m_nez_msb); - const uint32_t is_denorm_msb = uint32_and(is_m_nez_msb, is_e_eqz_msb); - const uint32_t is_nan_msb = uint32_and(is_e_flagged_msb, is_m_nez_msb); - const uint32_t is_zero = uint32_ext(is_zero_msb); - const uint32_t f_zero_result = uint32_andc(f_em, is_zero); - const uint32_t f_denorm_result = uint32_sels(is_denorm_msb, f_em_denorm, f_zero_result); - const uint32_t f_inf_result = uint32_sels(is_inf_msb, f_e_mask, f_denorm_result); - const uint32_t f_nan_result = uint32_sels(is_nan_msb, f_em_nan, f_inf_result); - const uint32_t f_result = uint32_or(f_s, f_nan_result); - - union { uint32_t ui; float flt; } utof; - utof.ui = f_result; - return utof.flt; - } - - inline uint32_t uint64_cntbits_ref(uint64_t _val) + inline constexpr BX_CONST_FUNC uint32_t uint64_cntbits(uint64_t _val) { const uint32_t lo = uint32_t(_val&UINT32_MAX); const uint32_t hi = uint32_t(_val>>32); @@ -605,19 +452,7 @@ namespace bx return total; } - /// Count number of bits set. - inline uint32_t uint64_cntbits(uint64_t _val) - { -#if BX_COMPILER_GCC || BX_COMPILER_CLANG - return __builtin_popcountll(_val); -#elif BX_COMPILER_MSVC && BX_ARCH_64BIT - return uint32_t(__popcnt64(_val) ); -#else - return uint64_cntbits_ref(_val); -#endif // BX_COMPILER_ - } - - inline uint32_t uint64_cntlz_ref(uint64_t _val) + inline constexpr BX_CONST_FUNC uint32_t uint64_cntlz(uint64_t _val) { return _val & UINT64_C(0xffffffff00000000) ? uint32_cntlz(uint32_t(_val>>32) ) @@ -625,21 +460,7 @@ namespace bx ; } - /// Count number of leading zeros. - inline uint32_t uint64_cntlz(uint64_t _val) - { -#if BX_COMPILER_GCC || BX_COMPILER_CLANG - return __builtin_clzll(_val); -#elif BX_COMPILER_MSVC && BX_PLATFORM_WINDOWS && BX_ARCH_64BIT - unsigned long index; - _BitScanReverse64(&index, _val); - return uint32_t(63 - index); -#else - return uint64_cntlz_ref(_val); -#endif // BX_COMPILER_ - } - - inline uint32_t uint64_cnttz_ref(uint64_t _val) + inline constexpr BX_CONST_FUNC uint32_t uint64_cnttz(uint64_t _val) { return _val & UINT64_C(0xffffffff) ? uint32_cnttz(uint32_t(_val) ) @@ -647,65 +468,51 @@ namespace bx ; } - inline uint32_t uint64_cnttz(uint64_t _val) - { -#if BX_COMPILER_GCC || BX_COMPILER_CLANG - return __builtin_ctzll(_val); -#elif BX_COMPILER_MSVC && BX_PLATFORM_WINDOWS && BX_ARCH_64BIT - unsigned long index; - _BitScanForward64(&index, _val); - return uint32_t(index); -#else - return uint64_cnttz_ref(_val); -#endif // BX_COMPILER_ - } - - inline uint64_t uint64_sll(uint64_t _a, int _sa) + inline constexpr BX_CONST_FUNC uint64_t uint64_sll(uint64_t _a, int32_t _sa) { return _a << _sa; } - inline uint64_t uint64_srl(uint64_t _a, int _sa) + inline constexpr BX_CONST_FUNC uint64_t uint64_srl(uint64_t _a, int32_t _sa) { return _a >> _sa; } - inline uint64_t uint64_sra(uint64_t _a, int _sa) + inline constexpr BX_CONST_FUNC uint64_t uint64_sra(uint64_t _a, int32_t _sa) { return ( (int64_t)_a) >> _sa; } - inline uint64_t uint64_rol(uint64_t _a, int _sa) + inline constexpr BX_CONST_FUNC uint64_t uint64_rol(uint64_t _a, int32_t _sa) { return ( _a << _sa) | (_a >> (64-_sa) ); } - inline uint64_t uint64_ror(uint64_t _a, int _sa) + inline constexpr BX_CONST_FUNC uint64_t uint64_ror(uint64_t _a, int32_t _sa) { return ( _a >> _sa) | (_a << (64-_sa) ); } - inline uint64_t uint64_add(uint64_t _a, uint64_t _b) + inline constexpr BX_CONST_FUNC uint64_t uint64_add(uint64_t _a, uint64_t _b) { return _a + _b; } - inline uint64_t uint64_sub(uint64_t _a, uint64_t _b) + inline constexpr BX_CONST_FUNC uint64_t uint64_sub(uint64_t _a, uint64_t _b) { return _a - _b; } - inline uint64_t uint64_mul(uint64_t _a, uint64_t _b) + inline constexpr BX_CONST_FUNC uint64_t uint64_mul(uint64_t _a, uint64_t _b) { return _a * _b; } - /// Greatest common divisor. - inline uint32_t uint32_gcd(uint32_t _a, uint32_t _b) + inline constexpr BX_CONST_FUNC uint32_t uint32_gcd(uint32_t _a, uint32_t _b) { do { - uint32_t tmp = _a % _b; + const uint32_t tmp = uint32_mod(_a, _b); _a = _b; _b = tmp; } @@ -714,14 +521,12 @@ namespace bx return _a; } - /// Least common multiple. - inline uint32_t uint32_lcm(uint32_t _a, uint32_t _b) + inline constexpr BX_CONST_FUNC uint32_t uint32_lcm(uint32_t _a, uint32_t _b) { return _a * (_b / uint32_gcd(_a, _b) ); } - /// Align to arbitrary stride. - inline uint32_t strideAlign(uint32_t _offset, uint32_t _stride) + inline constexpr BX_CONST_FUNC uint32_t strideAlign(uint32_t _offset, uint32_t _stride) { const uint32_t mod = uint32_mod(_offset, _stride); const uint32_t add = uint32_sub(_stride, mod); @@ -732,8 +537,7 @@ namespace bx return result; } - /// Align to arbitrary stride and 16-bytes. - inline uint32_t strideAlign16(uint32_t _offset, uint32_t _stride) + inline constexpr BX_CONST_FUNC uint32_t strideAlign16(uint32_t _offset, uint32_t _stride) { const uint32_t align = uint32_lcm(16, _stride); const uint32_t mod = uint32_mod(_offset, align); @@ -745,8 +549,7 @@ namespace bx return result; } - /// Align to arbitrary stride and 256-bytes. - inline uint32_t strideAlign256(uint32_t _offset, uint32_t _stride) + inline constexpr BX_CONST_FUNC uint32_t strideAlign256(uint32_t _offset, uint32_t _stride) { const uint32_t align = uint32_lcm(256, _stride); const uint32_t mod = uint32_mod(_offset, align); @@ -758,4 +561,120 @@ namespace bx return result; } + inline BX_CONST_FUNC uint16_t halfFromFloat(float _a) + { + union { uint32_t ui; float flt; } ftou; + ftou.flt = _a; + + const uint32_t one = uint32_li(0x00000001); + const uint32_t f_s_mask = uint32_li(0x80000000); + const uint32_t f_e_mask = uint32_li(0x7f800000); + const uint32_t f_m_mask = uint32_li(0x007fffff); + const uint32_t f_m_hidden_bit = uint32_li(0x00800000); + const uint32_t f_m_round_bit = uint32_li(0x00001000); + const uint32_t f_snan_mask = uint32_li(0x7fc00000); + const uint32_t f_e_pos = uint32_li(0x00000017); + const uint32_t h_e_pos = uint32_li(0x0000000a); + const uint32_t h_e_mask = uint32_li(0x00007c00); + const uint32_t h_snan_mask = uint32_li(0x00007e00); + const uint32_t h_e_mask_value = uint32_li(0x0000001f); + const uint32_t f_h_s_pos_offset = uint32_li(0x00000010); + const uint32_t f_h_bias_offset = uint32_li(0x00000070); + const uint32_t f_h_m_pos_offset = uint32_li(0x0000000d); + const uint32_t h_nan_min = uint32_li(0x00007c01); + const uint32_t f_h_e_biased_flag = uint32_li(0x0000008f); + const uint32_t f_s = uint32_and(ftou.ui, f_s_mask); + const uint32_t f_e = uint32_and(ftou.ui, f_e_mask); + const uint16_t h_s = (uint16_t)uint32_srl(f_s, f_h_s_pos_offset); + const uint32_t f_m = uint32_and(ftou.ui, f_m_mask); + const uint16_t f_e_amount = (uint16_t)uint32_srl(f_e, f_e_pos); + const uint32_t f_e_half_bias = uint32_sub(f_e_amount, f_h_bias_offset); + const uint32_t f_snan = uint32_and(ftou.ui, f_snan_mask); + const uint32_t f_m_round_mask = uint32_and(f_m, f_m_round_bit); + const uint32_t f_m_round_offset = uint32_sll(f_m_round_mask, one); + const uint32_t f_m_rounded = uint32_add(f_m, f_m_round_offset); + const uint32_t f_m_denorm_sa = uint32_sub(one, f_e_half_bias); + const uint32_t f_m_with_hidden = uint32_or(f_m_rounded, f_m_hidden_bit); + const uint32_t f_m_denorm = uint32_srl(f_m_with_hidden, f_m_denorm_sa); + const uint32_t h_m_denorm = uint32_srl(f_m_denorm, f_h_m_pos_offset); + const uint32_t f_m_rounded_overflow = uint32_and(f_m_rounded, f_m_hidden_bit); + const uint32_t m_nan = uint32_srl(f_m, f_h_m_pos_offset); + const uint32_t h_em_nan = uint32_or(h_e_mask, m_nan); + const uint32_t h_e_norm_overflow_offset = uint32_inc(f_e_half_bias); + const uint32_t h_e_norm_overflow = uint32_sll(h_e_norm_overflow_offset, h_e_pos); + const uint32_t h_e_norm = uint32_sll(f_e_half_bias, h_e_pos); + const uint32_t h_m_norm = uint32_srl(f_m_rounded, f_h_m_pos_offset); + const uint32_t h_em_norm = uint32_or(h_e_norm, h_m_norm); + const uint32_t is_h_ndenorm_msb = uint32_sub(f_h_bias_offset, f_e_amount); + const uint32_t is_f_e_flagged_msb = uint32_sub(f_h_e_biased_flag, f_e_half_bias); + const uint32_t is_h_denorm_msb = uint32_not(is_h_ndenorm_msb); + const uint32_t is_f_m_eqz_msb = uint32_dec(f_m); + const uint32_t is_h_nan_eqz_msb = uint32_dec(m_nan); + const uint32_t is_f_inf_msb = uint32_and(is_f_e_flagged_msb, is_f_m_eqz_msb); + const uint32_t is_f_nan_underflow_msb = uint32_and(is_f_e_flagged_msb, is_h_nan_eqz_msb); + const uint32_t is_e_overflow_msb = uint32_sub(h_e_mask_value, f_e_half_bias); + const uint32_t is_h_inf_msb = uint32_or(is_e_overflow_msb, is_f_inf_msb); + const uint32_t is_f_nsnan_msb = uint32_sub(f_snan, f_snan_mask); + const uint32_t is_m_norm_overflow_msb = uint32_neg(f_m_rounded_overflow); + const uint32_t is_f_snan_msb = uint32_not(is_f_nsnan_msb); + const uint32_t h_em_overflow_result = uint32_sels(is_m_norm_overflow_msb, h_e_norm_overflow, h_em_norm); + const uint32_t h_em_nan_result = uint32_sels(is_f_e_flagged_msb, h_em_nan, h_em_overflow_result); + const uint32_t h_em_nan_underflow_result = uint32_sels(is_f_nan_underflow_msb, h_nan_min, h_em_nan_result); + const uint32_t h_em_inf_result = uint32_sels(is_h_inf_msb, h_e_mask, h_em_nan_underflow_result); + const uint32_t h_em_denorm_result = uint32_sels(is_h_denorm_msb, h_m_denorm, h_em_inf_result); + const uint32_t h_em_snan_result = uint32_sels(is_f_snan_msb, h_snan_mask, h_em_denorm_result); + const uint32_t h_result = uint32_or(h_s, h_em_snan_result); + + return (uint16_t)(h_result); + } + + inline BX_CONST_FUNC float halfToFloat(uint16_t _a) + { + const uint32_t h_e_mask = uint32_li(0x00007c00); + const uint32_t h_m_mask = uint32_li(0x000003ff); + const uint32_t h_s_mask = uint32_li(0x00008000); + const uint32_t h_f_s_pos_offset = uint32_li(0x00000010); + const uint32_t h_f_e_pos_offset = uint32_li(0x0000000d); + const uint32_t h_f_bias_offset = uint32_li(0x0001c000); + const uint32_t f_e_mask = uint32_li(0x7f800000); + const uint32_t f_m_mask = uint32_li(0x007fffff); + const uint32_t h_f_e_denorm_bias = uint32_li(0x0000007e); + const uint32_t h_f_m_denorm_sa_bias = uint32_li(0x00000008); + const uint32_t f_e_pos = uint32_li(0x00000017); + const uint32_t h_e_mask_minus_one = uint32_li(0x00007bff); + const uint32_t h_e = uint32_and(_a, h_e_mask); + const uint32_t h_m = uint32_and(_a, h_m_mask); + const uint32_t h_s = uint32_and(_a, h_s_mask); + const uint32_t h_e_f_bias = uint32_add(h_e, h_f_bias_offset); + const uint32_t h_m_nlz = uint32_cntlz(h_m); + const uint32_t f_s = uint32_sll(h_s, h_f_s_pos_offset); + const uint32_t f_e = uint32_sll(h_e_f_bias, h_f_e_pos_offset); + const uint32_t f_m = uint32_sll(h_m, h_f_e_pos_offset); + const uint32_t f_em = uint32_or(f_e, f_m); + const uint32_t h_f_m_sa = uint32_sub(h_m_nlz, h_f_m_denorm_sa_bias); + const uint32_t f_e_denorm_unpacked = uint32_sub(h_f_e_denorm_bias, h_f_m_sa); + const uint32_t h_f_m = uint32_sll(h_m, h_f_m_sa); + const uint32_t f_m_denorm = uint32_and(h_f_m, f_m_mask); + const uint32_t f_e_denorm = uint32_sll(f_e_denorm_unpacked, f_e_pos); + const uint32_t f_em_denorm = uint32_or(f_e_denorm, f_m_denorm); + const uint32_t f_em_nan = uint32_or(f_e_mask, f_m); + const uint32_t is_e_eqz_msb = uint32_dec(h_e); + const uint32_t is_m_nez_msb = uint32_neg(h_m); + const uint32_t is_e_flagged_msb = uint32_sub(h_e_mask_minus_one, h_e); + const uint32_t is_zero_msb = uint32_andc(is_e_eqz_msb, is_m_nez_msb); + const uint32_t is_inf_msb = uint32_andc(is_e_flagged_msb, is_m_nez_msb); + const uint32_t is_denorm_msb = uint32_and(is_m_nez_msb, is_e_eqz_msb); + const uint32_t is_nan_msb = uint32_and(is_e_flagged_msb, is_m_nez_msb); + const uint32_t is_zero = uint32_ext(is_zero_msb); + const uint32_t f_zero_result = uint32_andc(f_em, is_zero); + const uint32_t f_denorm_result = uint32_sels(is_denorm_msb, f_em_denorm, f_zero_result); + const uint32_t f_inf_result = uint32_sels(is_inf_msb, f_e_mask, f_denorm_result); + const uint32_t f_nan_result = uint32_sels(is_nan_msb, f_em_nan, f_inf_result); + const uint32_t f_result = uint32_or(f_s, f_nan_result); + + union { uint32_t ui; float flt; } utof; + utof.ui = f_result; + return utof.flt; + } + } // namespace bx diff --git a/include/bx/uint32_t.h b/include/bx/uint32_t.h index deb82e3..186792a 100644 --- a/include/bx/uint32_t.h +++ b/include/bx/uint32_t.h @@ -8,265 +8,230 @@ #include "bx.h" -#if BX_COMPILER_MSVC -# if BX_PLATFORM_WINDOWS || BX_PLATFORM_XBOXONE || BX_PLATFORM_WINRT -# include -# pragma intrinsic(_BitScanForward) -# pragma intrinsic(_BitScanReverse) -# if BX_ARCH_64BIT -# pragma intrinsic(_BitScanForward64) -# pragma intrinsic(_BitScanReverse64) -# endif // BX_ARCH_64BIT -# endif // BX_PLATFORM_WINDOWS -#endif // BX_COMPILER_MSVC - -#define BX_HALF_FLOAT_ZERO UINT16_C(0) -#define BX_HALF_FLOAT_HALF UINT16_C(0x3800) -#define BX_HALF_FLOAT_ONE UINT16_C(0x3c00) -#define BX_HALF_FLOAT_TWO UINT16_C(0x4000) - namespace bx { - /// - uint32_t uint32_li(uint32_t _a); + constexpr uint16_t kHalfFloatZero = UINT16_C(0); + constexpr uint16_t kHalfFloatHalf = UINT16_C(0x3800); + constexpr uint16_t kHalfFloatOne = UINT16_C(0x3c00); + constexpr uint16_t kHalfFloatTwo = UINT16_C(0x4000); /// - uint32_t uint32_dec(uint32_t _a); + constexpr BX_CONST_FUNC uint32_t uint32_li(uint32_t _a); /// - uint32_t uint32_inc(uint32_t _a); + constexpr BX_CONST_FUNC uint32_t uint32_dec(uint32_t _a); /// - uint32_t uint32_not(uint32_t _a); + constexpr BX_CONST_FUNC uint32_t uint32_inc(uint32_t _a); /// - uint32_t uint32_neg(uint32_t _a); + constexpr BX_CONST_FUNC uint32_t uint32_not(uint32_t _a); /// - uint32_t uint32_ext(uint32_t _a); + constexpr BX_CONST_FUNC uint32_t uint32_neg(uint32_t _a); /// - uint32_t uint32_and(uint32_t _a, uint32_t _b); + constexpr BX_CONST_FUNC uint32_t uint32_ext(uint32_t _a); /// - uint32_t uint32_andc(uint32_t _a, uint32_t _b); + constexpr BX_CONST_FUNC uint32_t uint32_and(uint32_t _a, uint32_t _b); /// - uint32_t uint32_xor(uint32_t _a, uint32_t _b); + constexpr BX_CONST_FUNC uint32_t uint32_andc(uint32_t _a, uint32_t _b); /// - uint32_t uint32_xorl(uint32_t _a, uint32_t _b); + constexpr BX_CONST_FUNC uint32_t uint32_xor(uint32_t _a, uint32_t _b); /// - uint32_t uint32_or(uint32_t _a, uint32_t _b); + constexpr BX_CONST_FUNC uint32_t uint32_xorl(uint32_t _a, uint32_t _b); /// - uint32_t uint32_orc(uint32_t _a, uint32_t _b); + constexpr BX_CONST_FUNC uint32_t uint32_or(uint32_t _a, uint32_t _b); /// - uint32_t uint32_sll(uint32_t _a, int _sa); + constexpr BX_CONST_FUNC uint32_t uint32_orc(uint32_t _a, uint32_t _b); /// - uint32_t uint32_srl(uint32_t _a, int _sa); + constexpr BX_CONST_FUNC uint32_t uint32_sll(uint32_t _a, int32_t _sa); /// - uint32_t uint32_sra(uint32_t _a, int _sa); + constexpr BX_CONST_FUNC uint32_t uint32_srl(uint32_t _a, int32_t _sa); /// - uint32_t uint32_rol(uint32_t _a, int _sa); + constexpr BX_CONST_FUNC uint32_t uint32_sra(uint32_t _a, int32_t _sa); /// - uint32_t uint32_ror(uint32_t _a, int _sa); + constexpr BX_CONST_FUNC uint32_t uint32_rol(uint32_t _a, int32_t _sa); /// - uint32_t uint32_add(uint32_t _a, uint32_t _b); + constexpr BX_CONST_FUNC uint32_t uint32_ror(uint32_t _a, int32_t _sa); /// - uint32_t uint32_sub(uint32_t _a, uint32_t _b); + constexpr BX_CONST_FUNC uint32_t uint32_add(uint32_t _a, uint32_t _b); /// - uint32_t uint32_mul(uint32_t _a, uint32_t _b); + constexpr BX_CONST_FUNC uint32_t uint32_sub(uint32_t _a, uint32_t _b); /// - uint32_t uint32_div(uint32_t _a, uint32_t _b); + constexpr BX_CONST_FUNC uint32_t uint32_mul(uint32_t _a, uint32_t _b); /// - uint32_t uint32_mod(uint32_t _a, uint32_t _b); + constexpr BX_CONST_FUNC uint32_t uint32_div(uint32_t _a, uint32_t _b); /// - uint32_t uint32_cmpeq(uint32_t _a, uint32_t _b); + constexpr BX_CONST_FUNC uint32_t uint32_mod(uint32_t _a, uint32_t _b); /// - uint32_t uint32_cmpneq(uint32_t _a, uint32_t _b); + constexpr BX_CONST_FUNC uint32_t uint32_cmpeq(uint32_t _a, uint32_t _b); /// - uint32_t uint32_cmplt(uint32_t _a, uint32_t _b); + constexpr BX_CONST_FUNC uint32_t uint32_cmpneq(uint32_t _a, uint32_t _b); /// - uint32_t uint32_cmple(uint32_t _a, uint32_t _b); + constexpr BX_CONST_FUNC uint32_t uint32_cmplt(uint32_t _a, uint32_t _b); /// - uint32_t uint32_cmpgt(uint32_t _a, uint32_t _b); + constexpr BX_CONST_FUNC uint32_t uint32_cmple(uint32_t _a, uint32_t _b); /// - uint32_t uint32_cmpge(uint32_t _a, uint32_t _b); + constexpr BX_CONST_FUNC uint32_t uint32_cmpgt(uint32_t _a, uint32_t _b); /// - uint32_t uint32_setnz(uint32_t _a); + constexpr BX_CONST_FUNC uint32_t uint32_cmpge(uint32_t _a, uint32_t _b); /// - uint32_t uint32_satadd(uint32_t _a, uint32_t _b); + constexpr BX_CONST_FUNC uint32_t uint32_setnz(uint32_t _a); /// - uint32_t uint32_satsub(uint32_t _a, uint32_t _b); + constexpr BX_CONST_FUNC uint32_t uint32_satadd(uint32_t _a, uint32_t _b); /// - uint32_t uint32_satmul(uint32_t _a, uint32_t _b); + constexpr BX_CONST_FUNC uint32_t uint32_satsub(uint32_t _a, uint32_t _b); /// - uint32_t uint32_sels(uint32_t test, uint32_t _a, uint32_t _b); + constexpr BX_CONST_FUNC uint32_t uint32_satmul(uint32_t _a, uint32_t _b); /// - uint32_t uint32_selb(uint32_t _mask, uint32_t _a, uint32_t _b); + constexpr BX_CONST_FUNC uint32_t uint32_sels(uint32_t test, uint32_t _a, uint32_t _b); /// - uint32_t uint32_imin(uint32_t _a, uint32_t _b); + constexpr BX_CONST_FUNC uint32_t uint32_selb(uint32_t _mask, uint32_t _a, uint32_t _b); /// - uint32_t uint32_imax(uint32_t _a, uint32_t _b); + constexpr BX_CONST_FUNC uint32_t uint32_imin(uint32_t _a, uint32_t _b); /// - uint32_t uint32_min(uint32_t _a, uint32_t _b); + constexpr BX_CONST_FUNC uint32_t uint32_imax(uint32_t _a, uint32_t _b); /// - uint32_t uint32_min(uint32_t _a, uint32_t _b, uint32_t _c); + constexpr BX_CONST_FUNC uint32_t uint32_min(uint32_t _a, uint32_t _b); /// - uint32_t uint32_max(uint32_t _a, uint32_t _b); + constexpr BX_CONST_FUNC uint32_t uint32_min(uint32_t _a, uint32_t _b, uint32_t _c); /// - uint32_t uint32_max(uint32_t _a, uint32_t _b, uint32_t _c); + constexpr BX_CONST_FUNC uint32_t uint32_max(uint32_t _a, uint32_t _b); /// - uint32_t uint32_clamp(uint32_t _a, uint32_t _min, uint32_t _max); + constexpr BX_CONST_FUNC uint32_t uint32_max(uint32_t _a, uint32_t _b, uint32_t _c); /// - uint32_t uint32_iclamp(uint32_t _a, uint32_t _min, uint32_t _max); + constexpr BX_CONST_FUNC uint32_t uint32_clamp(uint32_t _a, uint32_t _min, uint32_t _max); /// - uint32_t uint32_incwrap(uint32_t _val, uint32_t _min, uint32_t _max); + constexpr BX_CONST_FUNC uint32_t uint32_iclamp(uint32_t _a, uint32_t _min, uint32_t _max); /// - uint32_t uint32_decwrap(uint32_t _val, uint32_t _min, uint32_t _max); + constexpr BX_CONST_FUNC uint32_t uint32_incwrap(uint32_t _val, uint32_t _min, uint32_t _max); /// - uint32_t uint32_cntbits_ref(uint32_t _val); + constexpr BX_CONST_FUNC uint32_t uint32_decwrap(uint32_t _val, uint32_t _min, uint32_t _max); /// Count number of bits set. /// - uint32_t uint32_cntbits(uint32_t _val); - - /// - uint32_t uint32_cntlz_ref(uint32_t _val); + constexpr BX_CONST_FUNC uint32_t uint32_cntbits(uint32_t _val); /// Count number of leading zeros. /// - uint32_t uint32_cntlz(uint32_t _val); + constexpr BX_CONST_FUNC uint32_t uint32_cntlz(uint32_t _val); /// - uint32_t uint32_cnttz_ref(uint32_t _val); + constexpr BX_CONST_FUNC uint32_t uint32_cnttz(uint32_t _val); /// - uint32_t uint32_cnttz(uint32_t _val); - - // shuffle: - // ---- ---- ---- ---- fedc ba98 7654 3210 - // to: - // -f-e -d-c -b-a -9-8 -7-6 -5-4 -3-2 -1-0 - uint32_t uint32_part1by1(uint32_t _a); - - // shuffle: - // ---- ---- ---- ---- ---- --98 7654 3210 - // to: - // ---- 9--8 --7- -6-- 5--4 --3- -2-- 1--0 - /// - uint32_t uint32_part1by2(uint32_t _a); + constexpr BX_CONST_FUNC uint32_t uint32_part1by1(uint32_t _a); /// - uint32_t uint32_testpow2(uint32_t _a); + constexpr BX_CONST_FUNC uint32_t uint32_part1by2(uint32_t _a); /// - uint32_t uint32_nextpow2(uint32_t _a); + constexpr BX_CONST_FUNC uint32_t uint32_testpow2(uint32_t _a); /// - uint16_t halfFromFloat(float _a); - - /// - float halfToFloat(uint16_t _a); - - /// - uint32_t uint64_cntbits_ref(uint64_t _val); + constexpr BX_CONST_FUNC uint32_t uint32_nextpow2(uint32_t _a); /// Count number of bits set. /// - uint32_t uint64_cntbits(uint64_t _val); - - /// - uint32_t uint64_cntlz_ref(uint64_t _val); + constexpr BX_CONST_FUNC uint32_t uint64_cntbits(uint64_t _val); /// Count number of leading zeros. /// - uint32_t uint64_cntlz(uint64_t _val); + constexpr BX_CONST_FUNC uint32_t uint64_cntlz(uint64_t _val); /// - uint32_t uint64_cnttz_ref(uint64_t _val); + constexpr BX_CONST_FUNC uint32_t uint64_cnttz(uint64_t _val); /// - uint32_t uint64_cnttz(uint64_t _val); + constexpr BX_CONST_FUNC uint64_t uint64_sll(uint64_t _a, int32_t _sa); /// - uint64_t uint64_sll(uint64_t _a, int _sa); + constexpr BX_CONST_FUNC uint64_t uint64_srl(uint64_t _a, int32_t _sa); /// - uint64_t uint64_srl(uint64_t _a, int _sa); + constexpr BX_CONST_FUNC uint64_t uint64_sra(uint64_t _a, int32_t _sa); /// - uint64_t uint64_sra(uint64_t _a, int _sa); + constexpr BX_CONST_FUNC uint64_t uint64_rol(uint64_t _a, int32_t _sa); /// - uint64_t uint64_rol(uint64_t _a, int _sa); + constexpr BX_CONST_FUNC uint64_t uint64_ror(uint64_t _a, int32_t _sa); /// - uint64_t uint64_ror(uint64_t _a, int _sa); + constexpr BX_CONST_FUNC uint64_t uint64_add(uint64_t _a, uint64_t _b); /// - uint64_t uint64_add(uint64_t _a, uint64_t _b); + constexpr BX_CONST_FUNC uint64_t uint64_sub(uint64_t _a, uint64_t _b); /// - uint64_t uint64_sub(uint64_t _a, uint64_t _b); - - /// - uint64_t uint64_mul(uint64_t _a, uint64_t _b); + constexpr BX_CONST_FUNC uint64_t uint64_mul(uint64_t _a, uint64_t _b); /// Greatest common divisor. /// - uint32_t uint32_gcd(uint32_t _a, uint32_t _b); + constexpr BX_CONST_FUNC uint32_t uint32_gcd(uint32_t _a, uint32_t _b); /// Least common multiple. /// - uint32_t uint32_lcm(uint32_t _a, uint32_t _b); + constexpr BX_CONST_FUNC uint32_t uint32_lcm(uint32_t _a, uint32_t _b); /// Align to arbitrary stride. /// - uint32_t strideAlign(uint32_t _offset, uint32_t _stride); + constexpr BX_CONST_FUNC uint32_t strideAlign(uint32_t _offset, uint32_t _stride); /// Align to arbitrary stride and 16-bytes. /// - uint32_t strideAlign16(uint32_t _offset, uint32_t _stride); + constexpr BX_CONST_FUNC uint32_t strideAlign16(uint32_t _offset, uint32_t _stride); /// Align to arbitrary stride and 256-bytes. /// - uint32_t strideAlign256(uint32_t _offset, uint32_t _stride); + constexpr BX_CONST_FUNC uint32_t strideAlign256(uint32_t _offset, uint32_t _stride); + + /// Convert float to half-float. + /// + BX_CONST_FUNC uint16_t halfFromFloat(float _a); + + /// Convert half-float to float. + /// + BX_CONST_FUNC float halfToFloat(uint16_t _a); } // namespace bx diff --git a/tests/uint32_test.cpp b/tests/uint32_test.cpp index 8fa05ed..bb7e0dc 100644 --- a/tests/uint32_test.cpp +++ b/tests/uint32_test.cpp @@ -24,25 +24,18 @@ TEST_CASE("StrideAlign") TEST_CASE("uint32_cnt") { REQUIRE( 0 == bx::uint32_cnttz(UINT32_C(1) ) ); - REQUIRE( 0 == bx::uint32_cnttz_ref(UINT32_C(1) ) ); REQUIRE(31 == bx::uint32_cntlz(UINT32_C(1) ) ); - REQUIRE(31 == bx::uint32_cntlz_ref(UINT32_C(1) ) ); REQUIRE( 0 == bx::uint64_cnttz(UINT64_C(1) ) ); - REQUIRE( 0 == bx::uint64_cnttz_ref(UINT64_C(1) ) ); REQUIRE(63 == bx::uint64_cntlz(UINT64_C(1) ) ); - REQUIRE(63 == bx::uint64_cntlz_ref(UINT64_C(1) ) ); REQUIRE( 1 == bx::uint32_cntbits(1) ); - REQUIRE( 1 == bx::uint32_cntbits_ref(1) ); REQUIRE(16 == bx::uint32_cntbits(UINT16_MAX) ); - REQUIRE(16 == bx::uint32_cntbits_ref(UINT16_MAX) ); REQUIRE(32 == bx::uint32_cntbits(UINT32_MAX) ); - REQUIRE(32 == bx::uint32_cntbits_ref(UINT32_MAX) ); } TEST_CASE("uint32_part")