Updated count bits, count leading/trailing zeros.

This commit is contained in:
Бранимир Караџић
2019-09-09 20:36:48 -07:00
parent da9248697e
commit 421eaf58a9
3 changed files with 111 additions and 21 deletions

View File

@@ -307,8 +307,12 @@ namespace bx
return result;
}
template<>
inline BX_CONSTEXPR_FUNC uint32_t uint32_cntbits(uint32_t _val)
{
#if BX_COMPILER_GCC || BX_COMPILER_CLANG
return __builtin_popcount(_val);
#else
const uint32_t tmp0 = uint32_srl(_val, 1);
const uint32_t tmp1 = uint32_and(tmp0, 0x55555555);
const uint32_t tmp2 = uint32_sub(_val, tmp1);
@@ -328,10 +332,37 @@ namespace bx
const uint32_t result = uint32_and(tmpF, 0x3f);
return result;
#endif // BX_COMPILER_*
}
template<>
inline BX_CONSTEXPR_FUNC uint32_t uint32_cntbits(uint64_t _val)
{
#if BX_COMPILER_GCC || BX_COMPILER_CLANG
return __builtin_popcountll(_val);
#else
const uint32_t lo = uint32_t(_val&UINT32_MAX);
const uint32_t hi = uint32_t(_val>>32);
const uint32_t total = uint32_cntbits(lo)
+ uint32_cntbits(hi);
return total;
#endif // BX_COMPILER_*
}
template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cntbits(uint8_t _val) { return uint32_cntbits<uint32_t>(_val); }
template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cntbits(int8_t _val) { return uint32_cntbits<uint8_t >(_val); }
template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cntbits(uint16_t _val) { return uint32_cntbits<uint32_t>(_val); }
template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cntbits(int16_t _val) { return uint32_cntbits<uint16_t>(_val); }
template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cntbits(int32_t _val) { return uint32_cntbits<uint32_t>(_val); }
template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cntbits(int64_t _val) { return uint32_cntbits<uint64_t>(_val); }
template<>
inline BX_CONSTEXPR_FUNC uint32_t uint32_cntlz(uint32_t _val)
{
#if BX_COMPILER_GCC || BX_COMPILER_CLANG
return 0 == _val ? 32 : __builtin_clz(_val);
#else
const uint32_t tmp0 = uint32_srl(_val, 1);
const uint32_t tmp1 = uint32_or(tmp0, _val);
const uint32_t tmp2 = uint32_srl(tmp1, 2);
@@ -346,18 +377,64 @@ namespace bx
const uint32_t result = uint32_cntbits(tmpA);
return result;
#endif // BX_COMPILER_*
}
template<>
inline BX_CONSTEXPR_FUNC uint32_t uint32_cntlz(uint64_t _val)
{
#if BX_COMPILER_GCC || BX_COMPILER_CLANG
return 0 == _val ? 64 : __builtin_clzll(_val);
#else
return _val & UINT64_C(0xffffffff00000000)
? uint32_cntlz(uint32_t(_val>>32) )
: uint32_cntlz(uint32_t(_val) ) + 32
;
#endif // BX_COMPILER_*
}
template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cntlz(uint8_t _val) { return uint32_cntlz<uint32_t>(_val)-24; }
template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cntlz(int8_t _val) { return uint32_cntlz<uint8_t >(_val); }
template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cntlz(uint16_t _val) { return uint32_cntlz<uint32_t>(_val)-16; }
template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cntlz(int16_t _val) { return uint32_cntlz<uint16_t>(_val); }
template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cntlz(int32_t _val) { return uint32_cntlz<uint32_t>(_val); }
template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cntlz(int64_t _val) { return uint32_cntlz<uint64_t>(_val); }
template<>
inline BX_CONSTEXPR_FUNC uint32_t uint32_cnttz(uint32_t _val)
{
#if BX_COMPILER_GCC || BX_COMPILER_CLANG
return 0 == _val ? 32 : __builtin_ctz(_val);
#else
const uint32_t tmp0 = uint32_not(_val);
const uint32_t tmp1 = uint32_dec(_val);
const uint32_t tmp2 = uint32_and(tmp0, tmp1);
const uint32_t result = uint32_cntbits(tmp2);
return result;
#endif // BX_COMPILER_*
}
template<>
inline BX_CONSTEXPR_FUNC uint32_t uint32_cnttz(uint64_t _val)
{
#if BX_COMPILER_GCC || BX_COMPILER_CLANG
return 0 == _val ? 64 : __builtin_ctzll(_val);
#else
return _val & UINT64_C(0xffffffff)
? uint32_cnttz(uint32_t(_val) )
: uint32_cnttz(uint32_t(_val>>32) ) + 32
;
#endif // BX_COMPILER_*
}
template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cnttz(uint8_t _val) { return bx::min(8u, uint32_cnttz<uint32_t>(_val) ); }
template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cnttz(int8_t _val) { return uint32_cnttz<uint8_t >(_val); }
template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cnttz(uint16_t _val) { return bx::min(16u, uint32_cnttz<uint32_t>(_val) ); }
template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cnttz(int16_t _val) { return uint32_cnttz<uint16_t>(_val); }
template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cnttz(int32_t _val) { return uint32_cnttz<uint32_t>(_val); }
template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cnttz(int64_t _val) { return uint32_cnttz<uint64_t>(_val); }
inline BX_CONSTEXPR_FUNC uint32_t uint32_part1by1(uint32_t _a)
{
// shuffle:

View File

@@ -149,14 +149,18 @@ namespace bx
/// Count number of bits set.
///
BX_CONSTEXPR_FUNC uint32_t uint32_cntbits(uint32_t _val);
template<typename Ty>
BX_CONSTEXPR_FUNC uint32_t uint32_cntbits(Ty _val);
/// Count number of leading zeros.
///
BX_CONSTEXPR_FUNC uint32_t uint32_cntlz(uint32_t _val);
template<typename Ty>
BX_CONSTEXPR_FUNC uint32_t uint32_cntlz(Ty _val);
/// Count number of trailing zeros.
///
BX_CONSTEXPR_FUNC uint32_t uint32_cnttz(uint32_t _val);
template<typename Ty>
BX_CONSTEXPR_FUNC uint32_t uint32_cnttz(Ty _val);
///
BX_CONSTEXPR_FUNC uint32_t uint32_part1by1(uint32_t _a);
@@ -170,17 +174,6 @@ namespace bx
///
BX_CONSTEXPR_FUNC uint32_t uint32_nextpow2(uint32_t _a);
/// Count number of bits set.
///
BX_CONSTEXPR_FUNC uint32_t uint64_cntbits(uint64_t _val);
/// Count number of leading zeros.
///
BX_CONSTEXPR_FUNC uint32_t uint64_cntlz(uint64_t _val);
///
BX_CONSTEXPR_FUNC uint32_t uint64_cnttz(uint64_t _val);
///
BX_CONSTEXPR_FUNC uint64_t uint64_li(uint64_t _a);

View File

@@ -30,19 +30,39 @@ TEST_CASE("StrideAlign")
TEST_CASE("uint32_cnt")
{
REQUIRE( 0 == bx::uint32_cnttz(UINT32_C(1) ) );
REQUIRE( 0 == bx::uint32_cnttz<uint8_t >(1) );
REQUIRE( 7 == bx::uint32_cnttz<uint8_t >(1<<7) );
REQUIRE( 8 == bx::uint32_cnttz<uint8_t >(0) );
REQUIRE( 0 == bx::uint32_cnttz<uint16_t>(1) );
REQUIRE(15 == bx::uint32_cnttz<uint16_t>(1<<15) );
REQUIRE(16 == bx::uint32_cnttz<uint16_t>(0) );
REQUIRE( 0 == bx::uint32_cnttz<uint32_t>(1) );
REQUIRE(32 == bx::uint32_cnttz<uint32_t>(0) );
REQUIRE(31 == bx::uint32_cnttz<uint32_t>(1<<31) );
REQUIRE( 0 == bx::uint32_cnttz<uint64_t>(1) );
REQUIRE(64 == bx::uint32_cnttz<uint64_t>(0) );
REQUIRE(31 == bx::uint32_cntlz(UINT32_C(1) ) );
REQUIRE( 0 == bx::uint64_cnttz(UINT64_C(1) ) );
REQUIRE(63 == bx::uint64_cntlz(UINT64_C(1) ) );
REQUIRE( 7 == bx::uint32_cntlz<uint8_t >(1) );
REQUIRE( 8 == bx::uint32_cntlz<uint8_t >(0) );
REQUIRE(15 == bx::uint32_cntlz<uint16_t>(1) );
REQUIRE(16 == bx::uint32_cntlz<uint16_t>(0) );
REQUIRE(31 == bx::uint32_cntlz<uint32_t>(1) );
REQUIRE(32 == bx::uint32_cntlz<uint32_t>(0) );
REQUIRE(63 == bx::uint32_cntlz<uint64_t>(1) );
REQUIRE(64 == bx::uint32_cntlz<uint64_t>(0) );
REQUIRE( 0 == bx::uint32_cntbits(0) );
REQUIRE( 1 == bx::uint32_cntbits(1) );
REQUIRE(16 == bx::uint32_cntbits(UINT16_MAX) );
REQUIRE( 4 == bx::uint32_cntbits<uint8_t>(0x55) );
REQUIRE( 8 == bx::uint32_cntbits<uint16_t>(0x5555) );
REQUIRE(16 == bx::uint32_cntbits<uint32_t>(0x55555555) );
REQUIRE(32 == bx::uint32_cntbits<uint64_t>(0x5555555555555555) );
REQUIRE( 8 == bx::uint32_cntbits(UINT8_MAX) );
REQUIRE(16 == bx::uint32_cntbits(UINT16_MAX) );
REQUIRE(32 == bx::uint32_cntbits(UINT32_MAX) );
REQUIRE(64 == bx::uint32_cntbits(UINT64_MAX) );
}
TEST_CASE("uint32_part")