diff --git a/include/bx/inline/uint32_t.inl b/include/bx/inline/uint32_t.inl index 881995d..ee4b025 100644 --- a/include/bx/inline/uint32_t.inl +++ b/include/bx/inline/uint32_t.inl @@ -184,6 +184,26 @@ namespace bx return -!!_a; } + template<> + inline BX_CONSTEXPR_FUNC uint32_t uint32_splat(uint8_t _val) + { + const uint32_t tmp0 = uint32_sll(_val, 8); + const uint32_t tmp1 = uint32_or(tmp0, _val); + const uint32_t tmp2 = uint32_sll(tmp1, 16); + const uint32_t result = uint32_or(tmp2, tmp1); + + return result; + } + + template<> + inline BX_CONSTEXPR_FUNC uint32_t uint32_splat(uint16_t _val) + { + const uint32_t tmp = uint32_sll(_val, 16); + const uint32_t result = uint32_or(tmp, _val); + + return result; + } + inline BX_CONSTEXPR_FUNC uint32_t uint32_satadd(uint32_t _a, uint32_t _b) { const uint32_t add = uint32_add(_a, _b); @@ -559,6 +579,39 @@ namespace bx return _a * _b; } + template<> + inline BX_CONSTEXPR_FUNC uint64_t uint64_splat(uint8_t _val) + { + const uint64_t tmp0 = uint64_sll(_val, 8); + const uint64_t tmp1 = uint64_or(tmp0, _val); + const uint64_t tmp2 = uint64_sll(tmp1, 16); + const uint64_t tmp3 = uint64_or(tmp2, tmp1); + const uint64_t tmp4 = uint64_sll(tmp3, 32); + const uint64_t result = uint64_or(tmp4, tmp3); + + return result; + } + + template<> + inline BX_CONSTEXPR_FUNC uint64_t uint64_splat(uint16_t _val) + { + const uint64_t tmp0 = uint64_sll(_val, 16); + const uint64_t tmp1 = uint64_or(tmp0, _val); + const uint64_t tmp2 = uint64_sll(tmp1, 32); + const uint64_t result = uint64_or(tmp2, tmp1); + + return result; + } + + template<> + inline BX_CONSTEXPR_FUNC uint64_t uint64_splat(uint32_t _val) + { + const uint64_t tmp = uint64_sll(_val, 32); + const uint64_t result = uint64_or(tmp, _val); + + return result; + } + inline BX_CONSTEXPR_FUNC uint64_t uint64_cntbits(uint64_t _val) { #if BX_COMPILER_GCC || BX_COMPILER_CLANG diff --git a/include/bx/uint32_t.h b/include/bx/uint32_t.h index 83f52a1..81735f8 100644 --- a/include/bx/uint32_t.h +++ b/include/bx/uint32_t.h @@ -102,6 +102,10 @@ namespace bx /// BX_CONSTEXPR_FUNC uint32_t uint32_setnz(uint32_t _a); + /// + template + BX_CONSTEXPR_FUNC uint32_t uint32_splat(Ty _val); + /// BX_CONSTEXPR_FUNC uint32_t uint32_satadd(uint32_t _a, uint32_t _b); @@ -235,6 +239,10 @@ namespace bx /// BX_CONSTEXPR_FUNC uint64_t uint64_mul(uint64_t _a, uint64_t _b); + /// + template + BX_CONSTEXPR_FUNC uint64_t uint64_splat(Ty _val); + /// BX_CONSTEXPR_FUNC uint64_t uint64_cntbits(uint64_t _val); diff --git a/tests/uint32_test.cpp b/tests/uint32_test.cpp index 3693c86..85549e6 100644 --- a/tests/uint32_test.cpp +++ b/tests/uint32_test.cpp @@ -34,6 +34,21 @@ TEST_CASE("uint32_part", "[uint32_t]") REQUIRE(UINT32_C(0x09249249) == bx::uint32_part1by2(0x3ff) ); } +TEST_CASE("uint32_splat", "[uint32_t]") +{ + REQUIRE(UINT32_C(0x01010101) == bx::uint32_splat(0x01) ); + REQUIRE(UINT32_C(0x55555555) == bx::uint32_splat(0x55) ); + REQUIRE(UINT32_C(0x13891389) == bx::uint32_splat(0x1389) ); +} + +TEST_CASE("uint64_splat", "[uint32_t]") +{ + REQUIRE(UINT64_C(0x0101010101010101) == bx::uint64_splat(0x01) ); + REQUIRE(UINT64_C(0x5555555555555555) == bx::uint64_splat(0x55) ); + REQUIRE(UINT32_C(0x1389138913891389) == bx::uint64_splat(0x1389) ); + REQUIRE(UINT32_C(0x1506138915061389) == bx::uint64_splat(0x15061389) ); +} + TEST_CASE("uint32_gcd", "[uint32_t]") { REQUIRE(1 == bx::uint32_gcd(13, 89) );