mirror of
https://github.com/bkaradzic/bx.git
synced 2026-02-17 20:52:37 +01:00
Added load/store aligned/unaligned. (#366)
This commit is contained in:
committed by
GitHub
parent
0e7d969a8a
commit
3ed36d14b0
@@ -174,6 +174,38 @@ namespace bx
|
||||
template<typename Ty>
|
||||
const Ty* addressOf(const void* _ptr, ptrdiff_t _offsetInBytes = 0);
|
||||
|
||||
/// Loads a value of type Ty from an naturally aligned memory location.
|
||||
///
|
||||
/// @param[in] _ptr Pointer to the memory location.
|
||||
/// @returns The loaded value of type Ty.
|
||||
///
|
||||
template<typename Ty>
|
||||
inline Ty loadAligned(const void* _ptr);
|
||||
|
||||
/// Loads a value of type Ty from a potentially unaligned memory location.
|
||||
///
|
||||
/// @param[in] _ptr Pointer to the memory location.
|
||||
/// @returns The loaded value of type Ty.
|
||||
///
|
||||
template<typename Ty>
|
||||
inline Ty loadUnaligned(const void* _ptr);
|
||||
|
||||
/// Stores a value of type Ty to an naturally aligned memory location.
|
||||
///
|
||||
/// @param[out] _ptr Pointer to the destination memory.
|
||||
/// @param[in] _value The value to store.
|
||||
///
|
||||
template<typename Ty>
|
||||
inline void storeAligned(void* _outPtr, const Ty& _value);
|
||||
|
||||
/// Stores a value of type Ty to a potentially unaligned memory location.
|
||||
///
|
||||
/// @param[out] _ptr Pointer to the destination memory.
|
||||
/// @param[in] _value The value to store.
|
||||
///
|
||||
template<typename Ty>
|
||||
inline void storeUnaligned(void* _outPtr, const Ty& _value);
|
||||
|
||||
/// Swap two values.
|
||||
template<typename Ty>
|
||||
void swap(Ty& _a, Ty& _b);
|
||||
|
||||
@@ -59,6 +59,81 @@ namespace bx
|
||||
return (const Ty*)( (const uint8_t*)_ptr + _offsetInBytes);
|
||||
}
|
||||
|
||||
template<typename Ty>
|
||||
inline Ty loadAligned(const void* _ptr)
|
||||
{
|
||||
static_assert(isTriviallyCopyable<Ty>(), "Ty must be trivially copyable type.");
|
||||
|
||||
return *(const Ty*)_ptr;
|
||||
}
|
||||
|
||||
template<typename Ty>
|
||||
inline Ty loadUnaligned(const void* _ptr)
|
||||
{
|
||||
static_assert(isTriviallyCopyable<Ty>(), "Ty must be trivially copyable type.");
|
||||
|
||||
#if BX_COMPILER_GCC || BX_COMPILER_CLANG
|
||||
typedef Ty BX_ATTRIBUTE(aligned(1) ) UnalignedTy;
|
||||
return *(UnalignedTy*)_ptr;
|
||||
#else
|
||||
Ty value;
|
||||
memCopy(&value, _ptr, sizeof(Ty) );
|
||||
|
||||
return value;
|
||||
#endif // BX_COMPILER_*
|
||||
}
|
||||
|
||||
template<>
|
||||
inline uint32_t loadUnaligned(const void* _ptr)
|
||||
{
|
||||
const uint8_t* data = (const uint8_t*)_ptr;
|
||||
|
||||
return 0
|
||||
| uint32_t(data[3])<<24
|
||||
| uint32_t(data[2])<<16
|
||||
| uint32_t(data[1])<<8
|
||||
| uint32_t(data[0])
|
||||
;
|
||||
}
|
||||
|
||||
template<>
|
||||
inline uint64_t loadUnaligned(const void* _ptr)
|
||||
{
|
||||
const uint8_t* data = (const uint8_t*)_ptr;
|
||||
|
||||
return 0
|
||||
| uint64_t(data[7])<<56
|
||||
| uint64_t(data[6])<<48
|
||||
| uint64_t(data[5])<<40
|
||||
| uint64_t(data[4])<<32
|
||||
| uint64_t(data[3])<<24
|
||||
| uint64_t(data[2])<<16
|
||||
| uint64_t(data[1])<<8
|
||||
| uint64_t(data[0])
|
||||
;
|
||||
}
|
||||
|
||||
template<typename Ty>
|
||||
inline void storeAligned(void* _ptr, const Ty& _value)
|
||||
{
|
||||
static_assert(isTriviallyCopyable<Ty>(), "Ty must be trivially copyable type.");
|
||||
|
||||
*(Ty*)_ptr = _value;
|
||||
}
|
||||
|
||||
template<typename Ty>
|
||||
inline void storeUnaligned(void* _ptr, const Ty& _value)
|
||||
{
|
||||
static_assert(isTriviallyCopyable<Ty>(), "Ty must be trivially copyable type.");
|
||||
|
||||
#if BX_COMPILER_GCC || BX_COMPILER_CLANG
|
||||
typedef Ty BX_ATTRIBUTE(aligned(1) ) UnalignedTy;
|
||||
*(UnalignedTy*)_ptr = _value;
|
||||
#else
|
||||
memCopy(_ptr, &_value, sizeof(Ty) );
|
||||
#endif // BX_COMPILER_*
|
||||
}
|
||||
|
||||
template<typename Ty>
|
||||
inline void swap(Ty& _a, Ty& _b)
|
||||
{
|
||||
|
||||
80
src/hash.cpp
80
src/hash.cpp
@@ -147,40 +147,6 @@ void HashCrc32::add(const void* _data, int32_t _len)
|
||||
m_hash = hash;
|
||||
}
|
||||
|
||||
BX_FORCE_INLINE uint32_t readAligned32(const uint8_t* _data)
|
||||
{
|
||||
return *(uint32_t*)_data;
|
||||
}
|
||||
|
||||
BX_FORCE_INLINE uint32_t readUnaligned32(const uint8_t* _data)
|
||||
{
|
||||
return 0
|
||||
| uint32_t(_data[3])<<24
|
||||
| uint32_t(_data[2])<<16
|
||||
| uint32_t(_data[1])<<8
|
||||
| uint32_t(_data[0])
|
||||
;
|
||||
}
|
||||
|
||||
BX_FORCE_INLINE uint64_t readAligned64(const uint8_t* _data)
|
||||
{
|
||||
return *(uint64_t*)_data;
|
||||
}
|
||||
|
||||
BX_FORCE_INLINE uint64_t readUnaligned64(const uint8_t* _data)
|
||||
{
|
||||
return 0
|
||||
| uint64_t(_data[7])<<56
|
||||
| uint64_t(_data[6])<<48
|
||||
| uint64_t(_data[5])<<40
|
||||
| uint64_t(_data[4])<<32
|
||||
| uint64_t(_data[3])<<24
|
||||
| uint64_t(_data[2])<<16
|
||||
| uint64_t(_data[1])<<8
|
||||
| uint64_t(_data[0])
|
||||
;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
@@ -202,9 +168,9 @@ void mixTail32(Ty& _self, const uint8_t*& _data, int32_t& _len)
|
||||
}
|
||||
}
|
||||
|
||||
typedef uint32_t (*ReadData32Fn)(const uint8_t* _data);
|
||||
typedef uint32_t (*LoadData32Fn)(const void* _data);
|
||||
|
||||
template<typename Ty, ReadData32Fn FnT>
|
||||
template<typename Ty, LoadData32Fn FnT>
|
||||
void addData32(Ty& _self, const uint8_t* _data, int32_t _len)
|
||||
{
|
||||
while (_len >= 4)
|
||||
@@ -232,11 +198,11 @@ void addData32(ThisT* _this, const void* _data, int32_t _len)
|
||||
|
||||
if (BX_UNLIKELY(!isAligned(data, 4) ) )
|
||||
{
|
||||
addData32<SelfT, readUnaligned32>(self, data, _len);
|
||||
addData32<SelfT, loadUnaligned<uint32_t>>(self, data, _len);
|
||||
return;
|
||||
}
|
||||
|
||||
addData32<SelfT, readAligned32>(self, data, _len);
|
||||
addData32<SelfT, loadAligned<uint32_t>>(self, data, _len);
|
||||
}
|
||||
|
||||
template<typename Ty>
|
||||
@@ -258,9 +224,9 @@ void mixTail128(Ty& _self, const uint8_t*& _data, int32_t& _len)
|
||||
}
|
||||
}
|
||||
|
||||
typedef uint64_t (*ReadData64Fn)(const uint8_t* _data);
|
||||
typedef uint64_t (*LoadData64Fn)(const void* _data);
|
||||
|
||||
template<typename Ty, ReadData64Fn FnT>
|
||||
template<typename Ty, LoadData64Fn FnT>
|
||||
void addData128(Ty& _self, const uint8_t* _data, int32_t _len)
|
||||
{
|
||||
while (_len >= 16)
|
||||
@@ -289,11 +255,11 @@ void addData128(ThisT* _this, const void* _data, int32_t _len)
|
||||
|
||||
if (BX_UNLIKELY(!isAligned(data, 8) ) )
|
||||
{
|
||||
addData128<SelfT, readUnaligned64>(self, data, _len);
|
||||
addData128<SelfT, loadUnaligned<uint64_t>>(self, data, _len);
|
||||
return;
|
||||
}
|
||||
|
||||
addData128<SelfT, readAligned64>(self, data, _len);
|
||||
addData128<SelfT, loadAligned<uint64_t>>(self, data, _len);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
@@ -488,23 +454,23 @@ struct HashMurmur3_64Pod
|
||||
|
||||
switch (m_count)
|
||||
{
|
||||
case 15: kk[1] ^= uint64_t(m_tail[14]) << 48; [[fallthrough]];
|
||||
case 14: kk[1] ^= uint64_t(m_tail[13]) << 40; [[fallthrough]];
|
||||
case 13: kk[1] ^= uint64_t(m_tail[12]) << 32; [[fallthrough]];
|
||||
case 12: kk[1] ^= uint64_t(m_tail[11]) << 24; [[fallthrough]];
|
||||
case 11: kk[1] ^= uint64_t(m_tail[10]) << 16; [[fallthrough]];
|
||||
case 10: kk[1] ^= uint64_t(m_tail[ 9]) << 8; [[fallthrough]];
|
||||
case 9: kk[1] ^= uint64_t(m_tail[ 8]); mix2(kk[1]);
|
||||
case 15: kk[1] |= uint64_t(m_tail[14]) << 48; [[fallthrough]];
|
||||
case 14: kk[1] |= uint64_t(m_tail[13]) << 40; [[fallthrough]];
|
||||
case 13: kk[1] |= uint64_t(m_tail[12]) << 32; [[fallthrough]];
|
||||
case 12: kk[1] |= uint64_t(m_tail[11]) << 24; [[fallthrough]];
|
||||
case 11: kk[1] |= uint64_t(m_tail[10]) << 16; [[fallthrough]];
|
||||
case 10: kk[1] |= uint64_t(m_tail[ 9]) << 8; [[fallthrough]];
|
||||
case 9: kk[1] |= uint64_t(m_tail[ 8]); mix2(kk[1]);
|
||||
[[fallthrough]];
|
||||
|
||||
case 8: kk[0] ^= uint64_t(m_tail[ 7]) << 56; [[fallthrough]];
|
||||
case 7: kk[0] ^= uint64_t(m_tail[ 6]) << 48; [[fallthrough]];
|
||||
case 6: kk[0] ^= uint64_t(m_tail[ 5]) << 40; [[fallthrough]];
|
||||
case 5: kk[0] ^= uint64_t(m_tail[ 4]) << 32; [[fallthrough]];
|
||||
case 4: kk[0] ^= uint64_t(m_tail[ 3]) << 24; [[fallthrough]];
|
||||
case 3: kk[0] ^= uint64_t(m_tail[ 2]) << 16; [[fallthrough]];
|
||||
case 2: kk[0] ^= uint64_t(m_tail[ 1]) << 8; [[fallthrough]];
|
||||
case 1: kk[0] ^= uint64_t(m_tail[ 0]); mix1(kk[0]);
|
||||
case 8: kk[0] |= uint64_t(m_tail[ 7]) << 56; [[fallthrough]];
|
||||
case 7: kk[0] |= uint64_t(m_tail[ 6]) << 48; [[fallthrough]];
|
||||
case 6: kk[0] |= uint64_t(m_tail[ 5]) << 40; [[fallthrough]];
|
||||
case 5: kk[0] |= uint64_t(m_tail[ 4]) << 32; [[fallthrough]];
|
||||
case 4: kk[0] |= uint64_t(m_tail[ 3]) << 24; [[fallthrough]];
|
||||
case 3: kk[0] |= uint64_t(m_tail[ 2]) << 16; [[fallthrough]];
|
||||
case 2: kk[0] |= uint64_t(m_tail[ 1]) << 8; [[fallthrough]];
|
||||
case 1: kk[0] |= uint64_t(m_tail[ 0]); mix1(kk[0]);
|
||||
break;
|
||||
|
||||
case 0: break;
|
||||
|
||||
Reference in New Issue
Block a user