Added load/store aligned/unaligned. (#366)

This commit is contained in:
Branimir Karadžić
2026-01-26 18:00:52 -08:00
committed by GitHub
parent 0e7d969a8a
commit 3ed36d14b0
3 changed files with 130 additions and 57 deletions

View File

@@ -174,6 +174,38 @@ namespace bx
template<typename Ty> template<typename Ty>
const Ty* addressOf(const void* _ptr, ptrdiff_t _offsetInBytes = 0); const Ty* addressOf(const void* _ptr, ptrdiff_t _offsetInBytes = 0);
/// Loads a value of type Ty from an naturally aligned memory location.
///
/// @param[in] _ptr Pointer to the memory location.
/// @returns The loaded value of type Ty.
///
template<typename Ty>
inline Ty loadAligned(const void* _ptr);
/// Loads a value of type Ty from a potentially unaligned memory location.
///
/// @param[in] _ptr Pointer to the memory location.
/// @returns The loaded value of type Ty.
///
template<typename Ty>
inline Ty loadUnaligned(const void* _ptr);
/// Stores a value of type Ty to an naturally aligned memory location.
///
/// @param[out] _ptr Pointer to the destination memory.
/// @param[in] _value The value to store.
///
template<typename Ty>
inline void storeAligned(void* _outPtr, const Ty& _value);
/// Stores a value of type Ty to a potentially unaligned memory location.
///
/// @param[out] _ptr Pointer to the destination memory.
/// @param[in] _value The value to store.
///
template<typename Ty>
inline void storeUnaligned(void* _outPtr, const Ty& _value);
/// Swap two values. /// Swap two values.
template<typename Ty> template<typename Ty>
void swap(Ty& _a, Ty& _b); void swap(Ty& _a, Ty& _b);

View File

@@ -59,6 +59,81 @@ namespace bx
return (const Ty*)( (const uint8_t*)_ptr + _offsetInBytes); return (const Ty*)( (const uint8_t*)_ptr + _offsetInBytes);
} }
template<typename Ty>
inline Ty loadAligned(const void* _ptr)
{
static_assert(isTriviallyCopyable<Ty>(), "Ty must be trivially copyable type.");
return *(const Ty*)_ptr;
}
template<typename Ty>
inline Ty loadUnaligned(const void* _ptr)
{
static_assert(isTriviallyCopyable<Ty>(), "Ty must be trivially copyable type.");
#if BX_COMPILER_GCC || BX_COMPILER_CLANG
typedef Ty BX_ATTRIBUTE(aligned(1) ) UnalignedTy;
return *(UnalignedTy*)_ptr;
#else
Ty value;
memCopy(&value, _ptr, sizeof(Ty) );
return value;
#endif // BX_COMPILER_*
}
template<>
inline uint32_t loadUnaligned(const void* _ptr)
{
const uint8_t* data = (const uint8_t*)_ptr;
return 0
| uint32_t(data[3])<<24
| uint32_t(data[2])<<16
| uint32_t(data[1])<<8
| uint32_t(data[0])
;
}
template<>
inline uint64_t loadUnaligned(const void* _ptr)
{
const uint8_t* data = (const uint8_t*)_ptr;
return 0
| uint64_t(data[7])<<56
| uint64_t(data[6])<<48
| uint64_t(data[5])<<40
| uint64_t(data[4])<<32
| uint64_t(data[3])<<24
| uint64_t(data[2])<<16
| uint64_t(data[1])<<8
| uint64_t(data[0])
;
}
template<typename Ty>
inline void storeAligned(void* _ptr, const Ty& _value)
{
static_assert(isTriviallyCopyable<Ty>(), "Ty must be trivially copyable type.");
*(Ty*)_ptr = _value;
}
template<typename Ty>
inline void storeUnaligned(void* _ptr, const Ty& _value)
{
static_assert(isTriviallyCopyable<Ty>(), "Ty must be trivially copyable type.");
#if BX_COMPILER_GCC || BX_COMPILER_CLANG
typedef Ty BX_ATTRIBUTE(aligned(1) ) UnalignedTy;
*(UnalignedTy*)_ptr = _value;
#else
memCopy(_ptr, &_value, sizeof(Ty) );
#endif // BX_COMPILER_*
}
template<typename Ty> template<typename Ty>
inline void swap(Ty& _a, Ty& _b) inline void swap(Ty& _a, Ty& _b)
{ {

View File

@@ -147,40 +147,6 @@ void HashCrc32::add(const void* _data, int32_t _len)
m_hash = hash; m_hash = hash;
} }
BX_FORCE_INLINE uint32_t readAligned32(const uint8_t* _data)
{
return *(uint32_t*)_data;
}
BX_FORCE_INLINE uint32_t readUnaligned32(const uint8_t* _data)
{
return 0
| uint32_t(_data[3])<<24
| uint32_t(_data[2])<<16
| uint32_t(_data[1])<<8
| uint32_t(_data[0])
;
}
BX_FORCE_INLINE uint64_t readAligned64(const uint8_t* _data)
{
return *(uint64_t*)_data;
}
BX_FORCE_INLINE uint64_t readUnaligned64(const uint8_t* _data)
{
return 0
| uint64_t(_data[7])<<56
| uint64_t(_data[6])<<48
| uint64_t(_data[5])<<40
| uint64_t(_data[4])<<32
| uint64_t(_data[3])<<24
| uint64_t(_data[2])<<16
| uint64_t(_data[1])<<8
| uint64_t(_data[0])
;
}
namespace namespace
{ {
@@ -202,9 +168,9 @@ void mixTail32(Ty& _self, const uint8_t*& _data, int32_t& _len)
} }
} }
typedef uint32_t (*ReadData32Fn)(const uint8_t* _data); typedef uint32_t (*LoadData32Fn)(const void* _data);
template<typename Ty, ReadData32Fn FnT> template<typename Ty, LoadData32Fn FnT>
void addData32(Ty& _self, const uint8_t* _data, int32_t _len) void addData32(Ty& _self, const uint8_t* _data, int32_t _len)
{ {
while (_len >= 4) while (_len >= 4)
@@ -232,11 +198,11 @@ void addData32(ThisT* _this, const void* _data, int32_t _len)
if (BX_UNLIKELY(!isAligned(data, 4) ) ) if (BX_UNLIKELY(!isAligned(data, 4) ) )
{ {
addData32<SelfT, readUnaligned32>(self, data, _len); addData32<SelfT, loadUnaligned<uint32_t>>(self, data, _len);
return; return;
} }
addData32<SelfT, readAligned32>(self, data, _len); addData32<SelfT, loadAligned<uint32_t>>(self, data, _len);
} }
template<typename Ty> template<typename Ty>
@@ -258,9 +224,9 @@ void mixTail128(Ty& _self, const uint8_t*& _data, int32_t& _len)
} }
} }
typedef uint64_t (*ReadData64Fn)(const uint8_t* _data); typedef uint64_t (*LoadData64Fn)(const void* _data);
template<typename Ty, ReadData64Fn FnT> template<typename Ty, LoadData64Fn FnT>
void addData128(Ty& _self, const uint8_t* _data, int32_t _len) void addData128(Ty& _self, const uint8_t* _data, int32_t _len)
{ {
while (_len >= 16) while (_len >= 16)
@@ -289,11 +255,11 @@ void addData128(ThisT* _this, const void* _data, int32_t _len)
if (BX_UNLIKELY(!isAligned(data, 8) ) ) if (BX_UNLIKELY(!isAligned(data, 8) ) )
{ {
addData128<SelfT, readUnaligned64>(self, data, _len); addData128<SelfT, loadUnaligned<uint64_t>>(self, data, _len);
return; return;
} }
addData128<SelfT, readAligned64>(self, data, _len); addData128<SelfT, loadAligned<uint64_t>>(self, data, _len);
} }
} // namespace } // namespace
@@ -488,23 +454,23 @@ struct HashMurmur3_64Pod
switch (m_count) switch (m_count)
{ {
case 15: kk[1] ^= uint64_t(m_tail[14]) << 48; [[fallthrough]]; case 15: kk[1] |= uint64_t(m_tail[14]) << 48; [[fallthrough]];
case 14: kk[1] ^= uint64_t(m_tail[13]) << 40; [[fallthrough]]; case 14: kk[1] |= uint64_t(m_tail[13]) << 40; [[fallthrough]];
case 13: kk[1] ^= uint64_t(m_tail[12]) << 32; [[fallthrough]]; case 13: kk[1] |= uint64_t(m_tail[12]) << 32; [[fallthrough]];
case 12: kk[1] ^= uint64_t(m_tail[11]) << 24; [[fallthrough]]; case 12: kk[1] |= uint64_t(m_tail[11]) << 24; [[fallthrough]];
case 11: kk[1] ^= uint64_t(m_tail[10]) << 16; [[fallthrough]]; case 11: kk[1] |= uint64_t(m_tail[10]) << 16; [[fallthrough]];
case 10: kk[1] ^= uint64_t(m_tail[ 9]) << 8; [[fallthrough]]; case 10: kk[1] |= uint64_t(m_tail[ 9]) << 8; [[fallthrough]];
case 9: kk[1] ^= uint64_t(m_tail[ 8]); mix2(kk[1]); case 9: kk[1] |= uint64_t(m_tail[ 8]); mix2(kk[1]);
[[fallthrough]]; [[fallthrough]];
case 8: kk[0] ^= uint64_t(m_tail[ 7]) << 56; [[fallthrough]]; case 8: kk[0] |= uint64_t(m_tail[ 7]) << 56; [[fallthrough]];
case 7: kk[0] ^= uint64_t(m_tail[ 6]) << 48; [[fallthrough]]; case 7: kk[0] |= uint64_t(m_tail[ 6]) << 48; [[fallthrough]];
case 6: kk[0] ^= uint64_t(m_tail[ 5]) << 40; [[fallthrough]]; case 6: kk[0] |= uint64_t(m_tail[ 5]) << 40; [[fallthrough]];
case 5: kk[0] ^= uint64_t(m_tail[ 4]) << 32; [[fallthrough]]; case 5: kk[0] |= uint64_t(m_tail[ 4]) << 32; [[fallthrough]];
case 4: kk[0] ^= uint64_t(m_tail[ 3]) << 24; [[fallthrough]]; case 4: kk[0] |= uint64_t(m_tail[ 3]) << 24; [[fallthrough]];
case 3: kk[0] ^= uint64_t(m_tail[ 2]) << 16; [[fallthrough]]; case 3: kk[0] |= uint64_t(m_tail[ 2]) << 16; [[fallthrough]];
case 2: kk[0] ^= uint64_t(m_tail[ 1]) << 8; [[fallthrough]]; case 2: kk[0] |= uint64_t(m_tail[ 1]) << 8; [[fallthrough]];
case 1: kk[0] ^= uint64_t(m_tail[ 0]); mix1(kk[0]); case 1: kk[0] |= uint64_t(m_tail[ 0]); mix1(kk[0]);
break; break;
case 0: break; case 0: break;