Cleanup bitwise ops.

This commit is contained in:
Бранимир Караџић
2023-11-23 09:41:57 -08:00
parent 837de0582e
commit 08baf8fa64
6 changed files with 346 additions and 183 deletions

View File

@@ -212,16 +212,180 @@ namespace bx
return pow(2.0f, _a);
}
template<>
inline BX_CONST_FUNC float log2(float _a)
{
return log(_a) * kInvLogNat2;
}
template<>
inline BX_CONST_FUNC int32_t log2(int32_t _a)
inline BX_CONSTEXPR_FUNC uint8_t countBits(uint32_t _val)
{
return 31 - uint32_cntlz(_a);
#if BX_COMPILER_GCC || BX_COMPILER_CLANG
return __builtin_popcount(_val);
#else
const uint32_t tmp0 = uint32_srl(_val, 1);
const uint32_t tmp1 = uint32_and(tmp0, 0x55555555);
const uint32_t tmp2 = uint32_sub(_val, tmp1);
const uint32_t tmp3 = uint32_and(tmp2, 0xc30c30c3);
const uint32_t tmp4 = uint32_srl(tmp2, 2);
const uint32_t tmp5 = uint32_and(tmp4, 0xc30c30c3);
const uint32_t tmp6 = uint32_srl(tmp2, 4);
const uint32_t tmp7 = uint32_and(tmp6, 0xc30c30c3);
const uint32_t tmp8 = uint32_add(tmp3, tmp5);
const uint32_t tmp9 = uint32_add(tmp7, tmp8);
const uint32_t tmpA = uint32_srl(tmp9, 6);
const uint32_t tmpB = uint32_add(tmp9, tmpA);
const uint32_t tmpC = uint32_srl(tmpB, 12);
const uint32_t tmpD = uint32_srl(tmpB, 24);
const uint32_t tmpE = uint32_add(tmpB, tmpC);
const uint32_t tmpF = uint32_add(tmpD, tmpE);
const uint32_t result = uint32_and(tmpF, 0x3f);
return result;
#endif // BX_COMPILER_*
}
template<>
inline BX_CONSTEXPR_FUNC uint8_t countBits(unsigned long long _val)
{
#if BX_COMPILER_GCC || BX_COMPILER_CLANG
return __builtin_popcountll(_val);
#else
const uint32_t lo = uint32_t(_val&UINT32_MAX);
const uint32_t hi = uint32_t(_val>>32);
return uint32_cntbits(lo)
+ uint32_cntbits(hi)
;
#endif // BX_COMPILER_*
}
template<>
inline BX_CONSTEXPR_FUNC uint8_t countBits(unsigned long _val)
{
return countBits<unsigned long long>(_val);
}
template<> inline BX_CONSTEXPR_FUNC uint8_t countBits(uint8_t _val) { return countBits<uint32_t>(_val); }
template<> inline BX_CONSTEXPR_FUNC uint8_t countBits(int8_t _val) { return countBits<uint8_t >(_val); }
template<> inline BX_CONSTEXPR_FUNC uint8_t countBits(uint16_t _val) { return countBits<uint32_t>(_val); }
template<> inline BX_CONSTEXPR_FUNC uint8_t countBits(int16_t _val) { return countBits<uint16_t>(_val); }
template<> inline BX_CONSTEXPR_FUNC uint8_t countBits(int32_t _val) { return countBits<uint32_t>(_val); }
template<> inline BX_CONSTEXPR_FUNC uint8_t countBits(int64_t _val) { return countBits<uint64_t>(_val); }
template<>
inline BX_CONSTEXPR_FUNC uint8_t countLeadingZeros(uint32_t _val)
{
#if BX_COMPILER_GCC || BX_COMPILER_CLANG
return 0 == _val ? 32 : __builtin_clz(_val);
#else
const uint32_t tmp0 = uint32_srl(_val, 1);
const uint32_t tmp1 = uint32_or(tmp0, _val);
const uint32_t tmp2 = uint32_srl(tmp1, 2);
const uint32_t tmp3 = uint32_or(tmp2, tmp1);
const uint32_t tmp4 = uint32_srl(tmp3, 4);
const uint32_t tmp5 = uint32_or(tmp4, tmp3);
const uint32_t tmp6 = uint32_srl(tmp5, 8);
const uint32_t tmp7 = uint32_or(tmp6, tmp5);
const uint32_t tmp8 = uint32_srl(tmp7, 16);
const uint32_t tmp9 = uint32_or(tmp8, tmp7);
const uint32_t tmpA = uint32_not(tmp9);
const uint32_t result = uint32_cntbits(tmpA);
return result;
#endif // BX_COMPILER_*
}
template<>
inline BX_CONSTEXPR_FUNC uint8_t countLeadingZeros(unsigned long long _val)
{
#if BX_COMPILER_GCC || BX_COMPILER_CLANG
return 0 == _val ? 64 : __builtin_clzll(_val);
#else
return _val & UINT64_C(0xffffffff00000000)
? uint32_cntlz(uint32_t(_val>>32) )
: uint32_cntlz(uint32_t(_val) ) + 32
;
#endif // BX_COMPILER_*
}
template<>
inline BX_CONSTEXPR_FUNC uint8_t countLeadingZeros(unsigned long _val)
{
return countLeadingZeros<unsigned long long>(_val);
}
template<> inline BX_CONSTEXPR_FUNC uint8_t countLeadingZeros(uint8_t _val) { return countLeadingZeros<uint32_t>(_val)-24; }
template<> inline BX_CONSTEXPR_FUNC uint8_t countLeadingZeros(int8_t _val) { return countLeadingZeros<uint8_t >(_val); }
template<> inline BX_CONSTEXPR_FUNC uint8_t countLeadingZeros(uint16_t _val) { return countLeadingZeros<uint32_t>(_val)-16; }
template<> inline BX_CONSTEXPR_FUNC uint8_t countLeadingZeros(int16_t _val) { return countLeadingZeros<uint16_t>(_val); }
template<> inline BX_CONSTEXPR_FUNC uint8_t countLeadingZeros(int32_t _val) { return countLeadingZeros<uint32_t>(_val); }
template<> inline BX_CONSTEXPR_FUNC uint8_t countLeadingZeros(int64_t _val) { return countLeadingZeros<uint64_t>(_val); }
template<>
inline BX_CONSTEXPR_FUNC uint8_t countTrailingZeros(uint32_t _val)
{
#if BX_COMPILER_GCC || BX_COMPILER_CLANG
return 0 == _val ? 32 : __builtin_ctz(_val);
#else
const uint32_t tmp0 = uint32_not(_val);
const uint32_t tmp1 = uint32_dec(_val);
const uint32_t tmp2 = uint32_and(tmp0, tmp1);
const uint32_t result = uint32_cntbits(tmp2);
return result;
#endif // BX_COMPILER_*
}
template<>
inline BX_CONSTEXPR_FUNC uint8_t countTrailingZeros(unsigned long long _val)
{
#if BX_COMPILER_GCC || BX_COMPILER_CLANG
return 0 == _val ? 64 : __builtin_ctzll(_val);
#else
return _val & UINT64_C(0xffffffff)
? uint32_cnttz(uint32_t(_val) )
: uint32_cnttz(uint32_t(_val>>32) ) + 32
;
#endif // BX_COMPILER_*
}
template<>
inline BX_CONSTEXPR_FUNC uint8_t countTrailingZeros(unsigned long _val)
{
return countTrailingZeros<unsigned long long>(_val);
}
template<> inline BX_CONSTEXPR_FUNC uint8_t countTrailingZeros(uint8_t _val) { return bx::min(8u, countTrailingZeros<uint32_t>(_val) ); }
template<> inline BX_CONSTEXPR_FUNC uint8_t countTrailingZeros(int8_t _val) { return countTrailingZeros<uint8_t >(_val); }
template<> inline BX_CONSTEXPR_FUNC uint8_t countTrailingZeros(uint16_t _val) { return bx::min(16u, countTrailingZeros<uint32_t>(_val) ); }
template<> inline BX_CONSTEXPR_FUNC uint8_t countTrailingZeros(int16_t _val) { return countTrailingZeros<uint16_t>(_val); }
template<> inline BX_CONSTEXPR_FUNC uint8_t countTrailingZeros(int32_t _val) { return countTrailingZeros<uint32_t>(_val); }
template<> inline BX_CONSTEXPR_FUNC uint8_t countTrailingZeros(int64_t _val) { return countTrailingZeros<uint64_t>(_val); }
template<typename Ty>
inline BX_CONSTEXPR_FUNC uint8_t findFirstSet(Ty _x)
{
return Ty(0) == _x ? uint8_t(0) : countTrailingZeros<Ty>(_x) + 1;
}
template<typename Ty>
inline BX_CONSTEXPR_FUNC uint8_t ceilLog2(Ty _a)
{
BX_STATIC_ASSERT(isInteger<Ty>(), "Type Ty must be of integer type!");
return Ty(_a) < Ty(1) ? Ty(0) : sizeof(Ty)*8 - countLeadingZeros<Ty>(_a - 1);
}
template<typename Ty>
inline BX_CONSTEXPR_FUNC Ty nextPow2(Ty _a)
{
const uint8_t log2 = ceilLog2(_a);
BX_ASSERT(log2 < sizeof(Ty)*8
, "Type Ty cannot represent the next power-of-two value (1<<%u is larger than %u-bit type)."
, log2
, sizeof(Ty)*8
);
return Ty(1)<<log2;
}
inline BX_CONST_FUNC float rsqrtRef(float _a)

View File

@@ -307,7 +307,6 @@ namespace bx
return result;
}
template<>
inline BX_CONSTEXPR_FUNC uint32_t uint32_cntbits(uint32_t _val)
{
#if BX_COMPILER_GCC || BX_COMPILER_CLANG
@@ -335,35 +334,6 @@ namespace bx
#endif // BX_COMPILER_*
}
template<>
inline BX_CONSTEXPR_FUNC uint32_t uint32_cntbits(unsigned long long _val)
{
#if BX_COMPILER_GCC || BX_COMPILER_CLANG
return __builtin_popcountll(_val);
#else
const uint32_t lo = uint32_t(_val&UINT32_MAX);
const uint32_t hi = uint32_t(_val>>32);
return uint32_cntbits(lo)
+ uint32_cntbits(hi)
;
#endif // BX_COMPILER_*
}
template<>
inline BX_CONSTEXPR_FUNC uint32_t uint32_cntbits(unsigned long _val)
{
return uint32_cntbits<unsigned long long>(_val);
}
template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cntbits(uint8_t _val) { return uint32_cntbits<uint32_t>(_val); }
template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cntbits(int8_t _val) { return uint32_cntbits<uint8_t >(_val); }
template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cntbits(uint16_t _val) { return uint32_cntbits<uint32_t>(_val); }
template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cntbits(int16_t _val) { return uint32_cntbits<uint16_t>(_val); }
template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cntbits(int32_t _val) { return uint32_cntbits<uint32_t>(_val); }
template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cntbits(int64_t _val) { return uint32_cntbits<uint64_t>(_val); }
template<>
inline BX_CONSTEXPR_FUNC uint32_t uint32_cntlz(uint32_t _val)
{
#if BX_COMPILER_GCC || BX_COMPILER_CLANG
@@ -386,33 +356,6 @@ namespace bx
#endif // BX_COMPILER_*
}
template<>
inline BX_CONSTEXPR_FUNC uint32_t uint32_cntlz(unsigned long long _val)
{
#if BX_COMPILER_GCC || BX_COMPILER_CLANG
return 0 == _val ? 64 : __builtin_clzll(_val);
#else
return _val & UINT64_C(0xffffffff00000000)
? uint32_cntlz(uint32_t(_val>>32) )
: uint32_cntlz(uint32_t(_val) ) + 32
;
#endif // BX_COMPILER_*
}
template<>
inline BX_CONSTEXPR_FUNC uint32_t uint32_cntlz(unsigned long _val)
{
return uint32_cntlz<unsigned long long>(_val);
}
template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cntlz(uint8_t _val) { return uint32_cntlz<uint32_t>(_val)-24; }
template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cntlz(int8_t _val) { return uint32_cntlz<uint8_t >(_val); }
template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cntlz(uint16_t _val) { return uint32_cntlz<uint32_t>(_val)-16; }
template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cntlz(int16_t _val) { return uint32_cntlz<uint16_t>(_val); }
template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cntlz(int32_t _val) { return uint32_cntlz<uint32_t>(_val); }
template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cntlz(int64_t _val) { return uint32_cntlz<uint64_t>(_val); }
template<>
inline BX_CONSTEXPR_FUNC uint32_t uint32_cnttz(uint32_t _val)
{
#if BX_COMPILER_GCC || BX_COMPILER_CLANG
@@ -427,39 +370,11 @@ namespace bx
#endif // BX_COMPILER_*
}
template<>
inline BX_CONSTEXPR_FUNC uint32_t uint32_cnttz(unsigned long long _val)
inline BX_CONSTEXPR_FUNC uint32_t uint32_ffs(uint32_t _x)
{
#if BX_COMPILER_GCC || BX_COMPILER_CLANG
return 0 == _val ? 64 : __builtin_ctzll(_val);
#else
return _val & UINT64_C(0xffffffff)
? uint32_cnttz(uint32_t(_val) )
: uint32_cnttz(uint32_t(_val>>32) ) + 32
;
#endif // BX_COMPILER_*
return 0 == _x ? 0 : uint32_cnttz(_x) + 1;
}
template<>
inline BX_CONSTEXPR_FUNC uint32_t uint32_cnttz(unsigned long _val)
{
return uint32_cnttz<unsigned long long>(_val);
}
template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cnttz(uint8_t _val) { return bx::min(8u, uint32_cnttz<uint32_t>(_val) ); }
template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cnttz(int8_t _val) { return uint32_cnttz<uint8_t >(_val); }
template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cnttz(uint16_t _val) { return bx::min(16u, uint32_cnttz<uint32_t>(_val) ); }
template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cnttz(int16_t _val) { return uint32_cnttz<uint16_t>(_val); }
template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cnttz(int32_t _val) { return uint32_cnttz<uint32_t>(_val); }
template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cnttz(int64_t _val) { return uint32_cnttz<uint64_t>(_val); }
template<typename Ty>
inline BX_CONSTEXPR_FUNC uint32_t uint32_ffs(Ty _x)
{
return Ty(0) == _x ? uint32_t(0) : uint32_cnttz<Ty>(_x) + 1;
}
inline BX_CONSTEXPR_FUNC uint32_t uint32_part1by1(uint32_t _a)
{
// shuffle:
@@ -644,19 +559,42 @@ namespace bx
return _a * _b;
}
inline BX_CONSTEXPR_FUNC uint32_t uint64_cntbits(uint64_t _val)
inline BX_CONSTEXPR_FUNC uint64_t uint64_cntbits(uint64_t _val)
{
return uint32_cntbits(_val);
#if BX_COMPILER_GCC || BX_COMPILER_CLANG
return __builtin_popcountll(_val);
#else
const uint32_t lo = uint32_t(_val&UINT32_MAX);
const uint32_t hi = uint32_t(_val>>32);
return uint32_cntbits(lo)
+ uint32_cntbits(hi)
;
#endif // BX_COMPILER_*
}
inline BX_CONSTEXPR_FUNC uint32_t uint64_cntlz(uint64_t _val)
inline BX_CONSTEXPR_FUNC uint64_t uint64_cntlz(uint64_t _val)
{
return uint32_cntlz(_val);
#if BX_COMPILER_GCC || BX_COMPILER_CLANG
return 0 == _val ? 64 : __builtin_clzll(_val);
#else
return _val & UINT64_C(0xffffffff00000000)
? uint32_cntlz(uint32_t(_val>>32) )
: uint32_cntlz(uint32_t(_val) ) + 32
;
#endif // BX_COMPILER_*
}
inline BX_CONSTEXPR_FUNC uint32_t uint64_cnttz(uint64_t _val)
inline BX_CONSTEXPR_FUNC uint64_t uint64_cnttz(uint64_t _val)
{
return uint32_cnttz(_val);
#if BX_COMPILER_GCC || BX_COMPILER_CLANG
return 0 == _val ? 64 : __builtin_ctzll(_val);
#else
return _val & UINT64_C(0xffffffff)
? uint32_cnttz(uint32_t(_val) )
: uint32_cnttz(uint32_t(_val>>32) ) + 32
;
#endif // BX_COMPILER_*
}
inline BX_CONSTEXPR_FUNC uint32_t uint32_gcd(uint32_t _a, uint32_t _b)

View File

@@ -270,8 +270,37 @@ namespace bx
/// Returns the base 2 logarithm of _a.
///
BX_CONST_FUNC float log2(float _a);
/// Count number of bits set.
///
template<typename Ty>
BX_CONST_FUNC Ty log2(Ty _a);
BX_CONSTEXPR_FUNC uint8_t countBits(Ty _val);
/// Count number of leading zeros.
///
template<typename Ty>
BX_CONSTEXPR_FUNC uint8_t countLeadingZeros(Ty _val);
/// Count number of trailing zeros.
///
template<typename Ty>
BX_CONSTEXPR_FUNC uint8_t countTrailingZeros(Ty _val);
/// Find first set.
///
template<typename Ty>
BX_CONSTEXPR_FUNC uint8_t findFirstSet(Ty _val);
/// Returns the next smallest integer base 2 logarithm of _a.
///
template<typename Ty>
BX_CONSTEXPR_FUNC uint8_t ceilLog2(Ty _a);
/// Returns the next smallest power of two value.
///
template<typename Ty>
BX_CONSTEXPR_FUNC Ty nextPow2(Ty _a);
/// Returns the square root of _a.
///

View File

@@ -149,23 +149,19 @@ namespace bx
/// Count number of bits set.
///
template<typename Ty>
BX_CONSTEXPR_FUNC uint32_t uint32_cntbits(Ty _val);
BX_CONSTEXPR_FUNC uint32_t uint32_cntbits(uint32_t _val);
/// Count number of leading zeros.
///
template<typename Ty>
BX_CONSTEXPR_FUNC uint32_t uint32_cntlz(Ty _val);
BX_CONSTEXPR_FUNC uint32_t uint32_cntlz(uint32_t _val);
/// Count number of trailing zeros.
///
template<typename Ty>
BX_CONSTEXPR_FUNC uint32_t uint32_cnttz(Ty _val);
BX_CONSTEXPR_FUNC uint32_t uint32_cnttz(uint32_t _val);
/// Find first set.
///
template<typename Ty>
BX_CONSTEXPR_FUNC uint32_t uint32_ffs(Ty _val);
BX_CONSTEXPR_FUNC uint32_t uint32_ffs(uint32_t _val);
///
BX_CONSTEXPR_FUNC uint32_t uint32_part1by1(uint32_t _a);
@@ -240,13 +236,13 @@ namespace bx
BX_CONSTEXPR_FUNC uint64_t uint64_mul(uint64_t _a, uint64_t _b);
///
BX_CONSTEXPR_FUNC uint32_t uint64_cntbits(uint64_t _val);
BX_CONSTEXPR_FUNC uint64_t uint64_cntbits(uint64_t _val);
///
BX_CONSTEXPR_FUNC uint32_t uint64_cntlz(uint64_t _val);
BX_CONSTEXPR_FUNC uint64_t uint64_cntlz(uint64_t _val);
///
BX_CONSTEXPR_FUNC uint32_t uint64_cnttz(uint64_t _val);
BX_CONSTEXPR_FUNC uint64_t uint64_cnttz(uint64_t _val);
/// Greatest common divisor.
///