From 421eaf58a9237ca60af96448e7227adafbab2d16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=91=D1=80=D0=B0=D0=BD=D0=B8=D0=BC=D0=B8=D1=80=20=D0=9A?= =?UTF-8?q?=D0=B0=D1=80=D0=B0=D1=9F=D0=B8=D1=9B?= Date: Mon, 9 Sep 2019 20:36:48 -0700 Subject: [PATCH] Updated count bits, count leading/trailing zeros. --- include/bx/inline/uint32_t.inl | 77 ++++++++++++++++++++++++++++++++++ include/bx/uint32_t.h | 21 ++++------ tests/uint32_test.cpp | 34 +++++++++++---- 3 files changed, 111 insertions(+), 21 deletions(-) diff --git a/include/bx/inline/uint32_t.inl b/include/bx/inline/uint32_t.inl index 6e047c3..8fc1af6 100644 --- a/include/bx/inline/uint32_t.inl +++ b/include/bx/inline/uint32_t.inl @@ -307,8 +307,12 @@ namespace bx return result; } + template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cntbits(uint32_t _val) { +#if BX_COMPILER_GCC || BX_COMPILER_CLANG + return __builtin_popcount(_val); +#else const uint32_t tmp0 = uint32_srl(_val, 1); const uint32_t tmp1 = uint32_and(tmp0, 0x55555555); const uint32_t tmp2 = uint32_sub(_val, tmp1); @@ -328,10 +332,37 @@ namespace bx const uint32_t result = uint32_and(tmpF, 0x3f); return result; +#endif // BX_COMPILER_* } + template<> + inline BX_CONSTEXPR_FUNC uint32_t uint32_cntbits(uint64_t _val) + { +#if BX_COMPILER_GCC || BX_COMPILER_CLANG + return __builtin_popcountll(_val); +#else + const uint32_t lo = uint32_t(_val&UINT32_MAX); + const uint32_t hi = uint32_t(_val>>32); + + const uint32_t total = uint32_cntbits(lo) + + uint32_cntbits(hi); + return total; +#endif // BX_COMPILER_* + } + + template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cntbits(uint8_t _val) { return uint32_cntbits(_val); } + template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cntbits(int8_t _val) { return uint32_cntbits(_val); } + template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cntbits(uint16_t _val) { return uint32_cntbits(_val); } + template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cntbits(int16_t _val) { return uint32_cntbits(_val); } + template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cntbits(int32_t _val) { return uint32_cntbits(_val); } + template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cntbits(int64_t _val) { return uint32_cntbits(_val); } + + template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cntlz(uint32_t _val) { +#if BX_COMPILER_GCC || BX_COMPILER_CLANG + return 0 == _val ? 32 : __builtin_clz(_val); +#else const uint32_t tmp0 = uint32_srl(_val, 1); const uint32_t tmp1 = uint32_or(tmp0, _val); const uint32_t tmp2 = uint32_srl(tmp1, 2); @@ -346,18 +377,64 @@ namespace bx const uint32_t result = uint32_cntbits(tmpA); return result; +#endif // BX_COMPILER_* } + template<> + inline BX_CONSTEXPR_FUNC uint32_t uint32_cntlz(uint64_t _val) + { +#if BX_COMPILER_GCC || BX_COMPILER_CLANG + return 0 == _val ? 64 : __builtin_clzll(_val); +#else + return _val & UINT64_C(0xffffffff00000000) + ? uint32_cntlz(uint32_t(_val>>32) ) + : uint32_cntlz(uint32_t(_val) ) + 32 + ; +#endif // BX_COMPILER_* + } + + template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cntlz(uint8_t _val) { return uint32_cntlz(_val)-24; } + template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cntlz(int8_t _val) { return uint32_cntlz(_val); } + template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cntlz(uint16_t _val) { return uint32_cntlz(_val)-16; } + template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cntlz(int16_t _val) { return uint32_cntlz(_val); } + template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cntlz(int32_t _val) { return uint32_cntlz(_val); } + template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cntlz(int64_t _val) { return uint32_cntlz(_val); } + + template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cnttz(uint32_t _val) { +#if BX_COMPILER_GCC || BX_COMPILER_CLANG + return 0 == _val ? 32 : __builtin_ctz(_val); +#else const uint32_t tmp0 = uint32_not(_val); const uint32_t tmp1 = uint32_dec(_val); const uint32_t tmp2 = uint32_and(tmp0, tmp1); const uint32_t result = uint32_cntbits(tmp2); return result; +#endif // BX_COMPILER_* } + template<> + inline BX_CONSTEXPR_FUNC uint32_t uint32_cnttz(uint64_t _val) + { +#if BX_COMPILER_GCC || BX_COMPILER_CLANG + return 0 == _val ? 64 : __builtin_ctzll(_val); +#else + return _val & UINT64_C(0xffffffff) + ? uint32_cnttz(uint32_t(_val) ) + : uint32_cnttz(uint32_t(_val>>32) ) + 32 + ; +#endif // BX_COMPILER_* + } + + template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cnttz(uint8_t _val) { return bx::min(8u, uint32_cnttz(_val) ); } + template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cnttz(int8_t _val) { return uint32_cnttz(_val); } + template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cnttz(uint16_t _val) { return bx::min(16u, uint32_cnttz(_val) ); } + template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cnttz(int16_t _val) { return uint32_cnttz(_val); } + template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cnttz(int32_t _val) { return uint32_cnttz(_val); } + template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cnttz(int64_t _val) { return uint32_cnttz(_val); } + inline BX_CONSTEXPR_FUNC uint32_t uint32_part1by1(uint32_t _a) { // shuffle: diff --git a/include/bx/uint32_t.h b/include/bx/uint32_t.h index d78e2ac..3eb8bdf 100644 --- a/include/bx/uint32_t.h +++ b/include/bx/uint32_t.h @@ -149,14 +149,18 @@ namespace bx /// Count number of bits set. /// - BX_CONSTEXPR_FUNC uint32_t uint32_cntbits(uint32_t _val); + template + BX_CONSTEXPR_FUNC uint32_t uint32_cntbits(Ty _val); /// Count number of leading zeros. /// - BX_CONSTEXPR_FUNC uint32_t uint32_cntlz(uint32_t _val); + template + BX_CONSTEXPR_FUNC uint32_t uint32_cntlz(Ty _val); + /// Count number of trailing zeros. /// - BX_CONSTEXPR_FUNC uint32_t uint32_cnttz(uint32_t _val); + template + BX_CONSTEXPR_FUNC uint32_t uint32_cnttz(Ty _val); /// BX_CONSTEXPR_FUNC uint32_t uint32_part1by1(uint32_t _a); @@ -170,17 +174,6 @@ namespace bx /// BX_CONSTEXPR_FUNC uint32_t uint32_nextpow2(uint32_t _a); - /// Count number of bits set. - /// - BX_CONSTEXPR_FUNC uint32_t uint64_cntbits(uint64_t _val); - - /// Count number of leading zeros. - /// - BX_CONSTEXPR_FUNC uint32_t uint64_cntlz(uint64_t _val); - - /// - BX_CONSTEXPR_FUNC uint32_t uint64_cnttz(uint64_t _val); - /// BX_CONSTEXPR_FUNC uint64_t uint64_li(uint64_t _a); diff --git a/tests/uint32_test.cpp b/tests/uint32_test.cpp index 69fd2d7..a3e3d07 100644 --- a/tests/uint32_test.cpp +++ b/tests/uint32_test.cpp @@ -30,19 +30,39 @@ TEST_CASE("StrideAlign") TEST_CASE("uint32_cnt") { - REQUIRE( 0 == bx::uint32_cnttz(UINT32_C(1) ) ); + REQUIRE( 0 == bx::uint32_cnttz(1) ); + REQUIRE( 7 == bx::uint32_cnttz(1<<7) ); + REQUIRE( 8 == bx::uint32_cnttz(0) ); + REQUIRE( 0 == bx::uint32_cnttz(1) ); + REQUIRE(15 == bx::uint32_cnttz(1<<15) ); + REQUIRE(16 == bx::uint32_cnttz(0) ); + REQUIRE( 0 == bx::uint32_cnttz(1) ); + REQUIRE(32 == bx::uint32_cnttz(0) ); + REQUIRE(31 == bx::uint32_cnttz(1<<31) ); + REQUIRE( 0 == bx::uint32_cnttz(1) ); + REQUIRE(64 == bx::uint32_cnttz(0) ); - REQUIRE(31 == bx::uint32_cntlz(UINT32_C(1) ) ); - - REQUIRE( 0 == bx::uint64_cnttz(UINT64_C(1) ) ); - - REQUIRE(63 == bx::uint64_cntlz(UINT64_C(1) ) ); + REQUIRE( 7 == bx::uint32_cntlz(1) ); + REQUIRE( 8 == bx::uint32_cntlz(0) ); + REQUIRE(15 == bx::uint32_cntlz(1) ); + REQUIRE(16 == bx::uint32_cntlz(0) ); + REQUIRE(31 == bx::uint32_cntlz(1) ); + REQUIRE(32 == bx::uint32_cntlz(0) ); + REQUIRE(63 == bx::uint32_cntlz(1) ); + REQUIRE(64 == bx::uint32_cntlz(0) ); + REQUIRE( 0 == bx::uint32_cntbits(0) ); REQUIRE( 1 == bx::uint32_cntbits(1) ); - REQUIRE(16 == bx::uint32_cntbits(UINT16_MAX) ); + REQUIRE( 4 == bx::uint32_cntbits(0x55) ); + REQUIRE( 8 == bx::uint32_cntbits(0x5555) ); + REQUIRE(16 == bx::uint32_cntbits(0x55555555) ); + REQUIRE(32 == bx::uint32_cntbits(0x5555555555555555) ); + REQUIRE( 8 == bx::uint32_cntbits(UINT8_MAX) ); + REQUIRE(16 == bx::uint32_cntbits(UINT16_MAX) ); REQUIRE(32 == bx::uint32_cntbits(UINT32_MAX) ); + REQUIRE(64 == bx::uint32_cntbits(UINT64_MAX) ); } TEST_CASE("uint32_part")