From 3ed36d14b098c84d7f0561bbc797cf805ea4a58c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Branimir=20Karad=C5=BEi=C4=87?= Date: Mon, 26 Jan 2026 18:00:52 -0800 Subject: [PATCH] Added load/store aligned/unaligned. (#366) --- include/bx/bx.h | 32 ++++++++++++++++ include/bx/inline/bx.inl | 75 +++++++++++++++++++++++++++++++++++++ src/hash.cpp | 80 ++++++++++++---------------------------- 3 files changed, 130 insertions(+), 57 deletions(-) diff --git a/include/bx/bx.h b/include/bx/bx.h index 76f9351..3a167b7 100644 --- a/include/bx/bx.h +++ b/include/bx/bx.h @@ -174,6 +174,38 @@ namespace bx template const Ty* addressOf(const void* _ptr, ptrdiff_t _offsetInBytes = 0); + /// Loads a value of type Ty from an naturally aligned memory location. + /// + /// @param[in] _ptr Pointer to the memory location. + /// @returns The loaded value of type Ty. + /// + template + inline Ty loadAligned(const void* _ptr); + + /// Loads a value of type Ty from a potentially unaligned memory location. + /// + /// @param[in] _ptr Pointer to the memory location. + /// @returns The loaded value of type Ty. + /// + template + inline Ty loadUnaligned(const void* _ptr); + + /// Stores a value of type Ty to an naturally aligned memory location. + /// + /// @param[out] _ptr Pointer to the destination memory. + /// @param[in] _value The value to store. + /// + template + inline void storeAligned(void* _outPtr, const Ty& _value); + + /// Stores a value of type Ty to a potentially unaligned memory location. + /// + /// @param[out] _ptr Pointer to the destination memory. + /// @param[in] _value The value to store. + /// + template + inline void storeUnaligned(void* _outPtr, const Ty& _value); + /// Swap two values. template void swap(Ty& _a, Ty& _b); diff --git a/include/bx/inline/bx.inl b/include/bx/inline/bx.inl index 5fcb269..afadbb4 100644 --- a/include/bx/inline/bx.inl +++ b/include/bx/inline/bx.inl @@ -59,6 +59,81 @@ namespace bx return (const Ty*)( (const uint8_t*)_ptr + _offsetInBytes); } + template + inline Ty loadAligned(const void* _ptr) + { + static_assert(isTriviallyCopyable(), "Ty must be trivially copyable type."); + + return *(const Ty*)_ptr; + } + + template + inline Ty loadUnaligned(const void* _ptr) + { + static_assert(isTriviallyCopyable(), "Ty must be trivially copyable type."); + +#if BX_COMPILER_GCC || BX_COMPILER_CLANG + typedef Ty BX_ATTRIBUTE(aligned(1) ) UnalignedTy; + return *(UnalignedTy*)_ptr; +#else + Ty value; + memCopy(&value, _ptr, sizeof(Ty) ); + + return value; +#endif // BX_COMPILER_* + } + + template<> + inline uint32_t loadUnaligned(const void* _ptr) + { + const uint8_t* data = (const uint8_t*)_ptr; + + return 0 + | uint32_t(data[3])<<24 + | uint32_t(data[2])<<16 + | uint32_t(data[1])<<8 + | uint32_t(data[0]) + ; + } + + template<> + inline uint64_t loadUnaligned(const void* _ptr) + { + const uint8_t* data = (const uint8_t*)_ptr; + + return 0 + | uint64_t(data[7])<<56 + | uint64_t(data[6])<<48 + | uint64_t(data[5])<<40 + | uint64_t(data[4])<<32 + | uint64_t(data[3])<<24 + | uint64_t(data[2])<<16 + | uint64_t(data[1])<<8 + | uint64_t(data[0]) + ; + } + + template + inline void storeAligned(void* _ptr, const Ty& _value) + { + static_assert(isTriviallyCopyable(), "Ty must be trivially copyable type."); + + *(Ty*)_ptr = _value; + } + + template + inline void storeUnaligned(void* _ptr, const Ty& _value) + { + static_assert(isTriviallyCopyable(), "Ty must be trivially copyable type."); + +#if BX_COMPILER_GCC || BX_COMPILER_CLANG + typedef Ty BX_ATTRIBUTE(aligned(1) ) UnalignedTy; + *(UnalignedTy*)_ptr = _value; +#else + memCopy(_ptr, &_value, sizeof(Ty) ); +#endif // BX_COMPILER_* + } + template inline void swap(Ty& _a, Ty& _b) { diff --git a/src/hash.cpp b/src/hash.cpp index 69971c7..901ad29 100644 --- a/src/hash.cpp +++ b/src/hash.cpp @@ -147,40 +147,6 @@ void HashCrc32::add(const void* _data, int32_t _len) m_hash = hash; } -BX_FORCE_INLINE uint32_t readAligned32(const uint8_t* _data) -{ - return *(uint32_t*)_data; -} - -BX_FORCE_INLINE uint32_t readUnaligned32(const uint8_t* _data) -{ - return 0 - | uint32_t(_data[3])<<24 - | uint32_t(_data[2])<<16 - | uint32_t(_data[1])<<8 - | uint32_t(_data[0]) - ; -} - -BX_FORCE_INLINE uint64_t readAligned64(const uint8_t* _data) -{ - return *(uint64_t*)_data; -} - -BX_FORCE_INLINE uint64_t readUnaligned64(const uint8_t* _data) -{ - return 0 - | uint64_t(_data[7])<<56 - | uint64_t(_data[6])<<48 - | uint64_t(_data[5])<<40 - | uint64_t(_data[4])<<32 - | uint64_t(_data[3])<<24 - | uint64_t(_data[2])<<16 - | uint64_t(_data[1])<<8 - | uint64_t(_data[0]) - ; -} - namespace { @@ -202,9 +168,9 @@ void mixTail32(Ty& _self, const uint8_t*& _data, int32_t& _len) } } -typedef uint32_t (*ReadData32Fn)(const uint8_t* _data); +typedef uint32_t (*LoadData32Fn)(const void* _data); -template +template void addData32(Ty& _self, const uint8_t* _data, int32_t _len) { while (_len >= 4) @@ -232,11 +198,11 @@ void addData32(ThisT* _this, const void* _data, int32_t _len) if (BX_UNLIKELY(!isAligned(data, 4) ) ) { - addData32(self, data, _len); + addData32>(self, data, _len); return; } - addData32(self, data, _len); + addData32>(self, data, _len); } template @@ -258,9 +224,9 @@ void mixTail128(Ty& _self, const uint8_t*& _data, int32_t& _len) } } -typedef uint64_t (*ReadData64Fn)(const uint8_t* _data); +typedef uint64_t (*LoadData64Fn)(const void* _data); -template +template void addData128(Ty& _self, const uint8_t* _data, int32_t _len) { while (_len >= 16) @@ -289,11 +255,11 @@ void addData128(ThisT* _this, const void* _data, int32_t _len) if (BX_UNLIKELY(!isAligned(data, 8) ) ) { - addData128(self, data, _len); + addData128>(self, data, _len); return; } - addData128(self, data, _len); + addData128>(self, data, _len); } } // namespace @@ -488,23 +454,23 @@ struct HashMurmur3_64Pod switch (m_count) { - case 15: kk[1] ^= uint64_t(m_tail[14]) << 48; [[fallthrough]]; - case 14: kk[1] ^= uint64_t(m_tail[13]) << 40; [[fallthrough]]; - case 13: kk[1] ^= uint64_t(m_tail[12]) << 32; [[fallthrough]]; - case 12: kk[1] ^= uint64_t(m_tail[11]) << 24; [[fallthrough]]; - case 11: kk[1] ^= uint64_t(m_tail[10]) << 16; [[fallthrough]]; - case 10: kk[1] ^= uint64_t(m_tail[ 9]) << 8; [[fallthrough]]; - case 9: kk[1] ^= uint64_t(m_tail[ 8]); mix2(kk[1]); + case 15: kk[1] |= uint64_t(m_tail[14]) << 48; [[fallthrough]]; + case 14: kk[1] |= uint64_t(m_tail[13]) << 40; [[fallthrough]]; + case 13: kk[1] |= uint64_t(m_tail[12]) << 32; [[fallthrough]]; + case 12: kk[1] |= uint64_t(m_tail[11]) << 24; [[fallthrough]]; + case 11: kk[1] |= uint64_t(m_tail[10]) << 16; [[fallthrough]]; + case 10: kk[1] |= uint64_t(m_tail[ 9]) << 8; [[fallthrough]]; + case 9: kk[1] |= uint64_t(m_tail[ 8]); mix2(kk[1]); [[fallthrough]]; - case 8: kk[0] ^= uint64_t(m_tail[ 7]) << 56; [[fallthrough]]; - case 7: kk[0] ^= uint64_t(m_tail[ 6]) << 48; [[fallthrough]]; - case 6: kk[0] ^= uint64_t(m_tail[ 5]) << 40; [[fallthrough]]; - case 5: kk[0] ^= uint64_t(m_tail[ 4]) << 32; [[fallthrough]]; - case 4: kk[0] ^= uint64_t(m_tail[ 3]) << 24; [[fallthrough]]; - case 3: kk[0] ^= uint64_t(m_tail[ 2]) << 16; [[fallthrough]]; - case 2: kk[0] ^= uint64_t(m_tail[ 1]) << 8; [[fallthrough]]; - case 1: kk[0] ^= uint64_t(m_tail[ 0]); mix1(kk[0]); + case 8: kk[0] |= uint64_t(m_tail[ 7]) << 56; [[fallthrough]]; + case 7: kk[0] |= uint64_t(m_tail[ 6]) << 48; [[fallthrough]]; + case 6: kk[0] |= uint64_t(m_tail[ 5]) << 40; [[fallthrough]]; + case 5: kk[0] |= uint64_t(m_tail[ 4]) << 32; [[fallthrough]]; + case 4: kk[0] |= uint64_t(m_tail[ 3]) << 24; [[fallthrough]]; + case 3: kk[0] |= uint64_t(m_tail[ 2]) << 16; [[fallthrough]]; + case 2: kk[0] |= uint64_t(m_tail[ 1]) << 8; [[fallthrough]]; + case 1: kk[0] |= uint64_t(m_tail[ 0]); mix1(kk[0]); break; case 0: break;