Added MurmurHash3.

This commit is contained in:
Бранимир Караџић
2023-10-21 11:07:25 -07:00
parent 97332257ff
commit ac1401faad
4 changed files with 339 additions and 92 deletions

View File

@@ -76,7 +76,7 @@ namespace bx
uint32_t m_hash;
};
/// 32-bit multiply and rotate hash.
/// 32-bit non-cryptographic multiply and rotate hash.
class HashMurmur2A
{
public:
@@ -101,9 +101,39 @@ namespace bx
private:
uint32_t m_hash;
uint32_t m_tail;
uint32_t m_count;
uint32_t m_size;
uint8_t m_tail[4];
uint8_t m_count;
};
/// 32-bit non-cryptographic multiply and rotate hash.
class HashMurmur3
{
public:
///
void begin(uint32_t _seed = 0);
///
void add(const void* _data, int32_t _len);
///
void add(const char* _data);
///
void add(const StringView& _data);
///
template<typename Ty>
void add(const Ty& _data);
///
uint32_t end();
private:
uint32_t m_hash;
uint32_t m_size;
uint8_t m_tail[4];
uint8_t m_count;
};
///

View File

@@ -73,9 +73,8 @@ namespace bx
inline void HashMurmur2A::begin(uint32_t _seed)
{
m_hash = _seed;
m_tail = 0;
m_count = 0;
m_size = 0;
m_count = 0;
}
inline void HashMurmur2A::add(const char* _data)
@@ -94,6 +93,29 @@ namespace bx
add(&_data, sizeof(Ty) );
}
inline void HashMurmur3::begin(uint32_t _seed)
{
m_hash = _seed;
m_size = 0;
m_count = 0;
}
inline void HashMurmur3::add(const char* _data)
{
return add(StringView(_data) );
}
inline void HashMurmur3::add(const StringView& _data)
{
return add(_data.getPtr(), _data.getLength() );
}
template<typename Ty>
inline void HashMurmur3::add(const Ty& _data)
{
add(&_data, sizeof(Ty) );
}
template<typename HashT>
inline uint32_t hash(const void* _data, uint32_t _size)
{

View File

@@ -147,47 +147,6 @@ void HashCrc32::add(const void* _data, int32_t _len)
m_hash = hash;
}
struct HashMurmur2APod
{
uint32_t m_hash;
uint32_t m_tail;
uint32_t m_count;
uint32_t m_size;
};
BX_STATIC_ASSERT(sizeof(HashMurmur2A) == sizeof(HashMurmur2APod) );
BX_FORCE_INLINE void mmix(uint32_t& _h, uint32_t& _k)
{
constexpr uint32_t kMurmurMul = 0x5bd1e995;
constexpr uint32_t kMurmurRightShift = 24;
_k *= kMurmurMul;
_k ^= _k >> kMurmurRightShift;
_k *= kMurmurMul;
_h *= kMurmurMul;
_h ^= _k;
}
static void mixTail(HashMurmur2APod& _self, const uint8_t*& _data, int32_t& _len)
{
while (_len
&& ( (_len<4) || _self.m_count)
)
{
_self.m_tail |= (*_data++) << (_self.m_count * 8);
_self.m_count++;
_len--;
if (_self.m_count == 4)
{
mmix(_self.m_hash, _self.m_tail);
_self.m_tail = 0;
_self.m_count = 0;
}
}
}
BX_FORCE_INLINE uint32_t readAligned(const uint8_t* _data)
{
return *(uint32_t*)_data;
@@ -195,36 +154,45 @@ BX_FORCE_INLINE uint32_t readAligned(const uint8_t* _data)
BX_FORCE_INLINE uint32_t readUnaligned(const uint8_t* _data)
{
if (BX_ENABLED(BX_CPU_ENDIAN_BIG) )
return 0
| _data[3]<<24
| _data[2]<<16
| _data[1]<<8
| _data[0]
;
}
namespace
{
template<typename Ty>
void mixTail(Ty& _self, const uint8_t*& _data, int32_t& _len)
{
while (0 != _len
&& (0 < _self.m_count || 4 > _len) )
{
return 0
| _data[0]<<24
| _data[1]<<16
| _data[2]<<8
| _data[3]
;
}
else
{
return 0
| _data[0]
| _data[1]<<8
| _data[2]<<16
| _data[3]<<24
;
_self.m_tail[_self.m_count++] = *_data++;
_len--;
if (4 == _self.m_count)
{
uint32_t kk = *( (uint32_t*)&_self.m_tail[0]);
_self.mix(kk);
_self.m_count = 0;
}
}
}
typedef uint32_t (*ReadDataFn)(const uint8_t* _data);
template<ReadDataFn FnT>
static void addData(HashMurmur2APod& _self, const uint8_t* _data, int32_t _len)
template<typename Ty, ReadDataFn FnT>
void addData(Ty& _self, const uint8_t* _data, int32_t _len)
{
while (_len >= 4)
{
uint32_t kk = FnT(_data);
mmix(_self.m_hash, kk);
_self.mix(kk);
_data += 4;
_len -= 4;
@@ -233,36 +201,149 @@ static void addData(HashMurmur2APod& _self, const uint8_t* _data, int32_t _len)
mixTail(_self, _data, _len);
}
void HashMurmur2A::add(const void* _data, int32_t _len)
template<typename SelfT, typename ThisT>
void addData(ThisT* _this, const void* _data, int32_t _len)
{
HashMurmur2APod& self = *(HashMurmur2APod*)this;
SelfT& self = *(SelfT*)_this;
const uint8_t* data = (const uint8_t*)_data;
m_size += _len;
self.m_size += _len;
mixTail(self, data, _len);
if (BX_UNLIKELY(!isAligned(data, 4) ) )
{
addData<readUnaligned>(self, data, _len);
addData<SelfT, readUnaligned>(self, data, _len);
return;
}
addData<readAligned>(self, data, _len);
addData<SelfT, readAligned>(self, data, _len);
}
template<typename SelfT, typename ThisT>
uint32_t finalize(ThisT* _this)
{
SelfT& self = *(SelfT*)_this;
self.finalize();
return self.m_hash;
}
} // namespace
struct HashMurmur2APod
{
uint32_t m_hash;
uint32_t m_size;
uint8_t m_tail[4];
uint8_t m_count;
static constexpr uint32_t kMurmur2AMul = 0x5bd1e995;
BX_FORCE_INLINE void mix(uint32_t& _k)
{
_k *= kMurmur2AMul;
_k ^= _k >> 24;
_k *= kMurmur2AMul;
m_hash *= kMurmur2AMul;
m_hash ^= _k;
}
void finalize()
{
uint32_t kk = 0;
switch (m_count)
{
case 3: kk |= m_tail[2] << 16; BX_FALLTHROUGH;
case 2: kk |= m_tail[1] << 8; BX_FALLTHROUGH;
case 1: kk |= m_tail[0]; BX_FALLTHROUGH;
case 0: mix(kk); break;
default: BX_ASSERT(false, "Bug, m_count can't be %d (expected < 4).", m_count); BX_UNREACHABLE;
}
mix(m_size);
m_hash ^= m_hash >> 13;
m_hash *= kMurmur2AMul;
m_hash ^= m_hash >> 15;
}
};
BX_STATIC_ASSERT(sizeof(HashMurmur2A) == sizeof(HashMurmur2APod) );
void HashMurmur2A::add(const void* _data, int32_t _len)
{
addData<HashMurmur2APod>(this, _data, _len);
}
uint32_t HashMurmur2A::end()
{
constexpr uint32_t kMurmurMul = 0x5bd1e995;
return finalize<HashMurmur2APod>(this);
}
mmix(m_hash, m_tail);
mmix(m_hash, m_size);
struct HashMurmur3Pod
{
uint32_t m_hash;
uint32_t m_size;
uint8_t m_tail[4];
uint8_t m_count;
m_hash ^= m_hash >> 13;
m_hash *= kMurmurMul;
m_hash ^= m_hash >> 15;
static constexpr uint32_t kMurmur3Mul1 = 0xcc9e2d51;
static constexpr uint32_t kMurmur3Mul2 = 0x1b873593;
static constexpr uint32_t kMurmur3Mul3 = 0x85ebca6b;
static constexpr uint32_t kMurmur3Mul4 = 0xc2b2ae35;
static constexpr uint32_t kMurmur3Add = 0xe6546b64;
return m_hash;
BX_FORCE_INLINE void mix1(uint32_t _k)
{
_k *= kMurmur3Mul1;
_k = uint32_rol(_k, 15);
_k *= kMurmur3Mul2;
m_hash ^= _k;
}
BX_FORCE_INLINE void mix(uint32_t _k)
{
mix1(_k);
m_hash = uint32_rol(m_hash, 13);
m_hash = m_hash*5 + kMurmur3Add;
}
void finalize()
{
uint32_t kk = 0;
switch (m_count)
{
case 3: kk |= m_tail[2] << 16; BX_FALLTHROUGH;
case 2: kk |= m_tail[1] << 8; BX_FALLTHROUGH;
case 1: kk |= m_tail[0]; mix1(kk); break;
case 0: break;
default: BX_ASSERT(false, "Bug, m_count can't be %d (expected < 4).", m_count); BX_UNREACHABLE;
}
m_hash ^= m_size;
m_hash ^= m_hash >> 16;
m_hash *= kMurmur3Mul3;
m_hash ^= m_hash >> 13;
m_hash *= kMurmur3Mul4;
m_hash ^= m_hash >> 16;
}
};
BX_STATIC_ASSERT(sizeof(HashMurmur3) == sizeof(HashMurmur3Pod) );
void HashMurmur3::add(const void* _data, int32_t _len)
{
addData<HashMurmur3Pod>(this, _data, _len);
}
uint32_t HashMurmur3::end()
{
return finalize<HashMurmur3Pod>(this);
}
} // namespace bx

View File

@@ -32,22 +32,23 @@ struct HashTest
uint32_t crc32[bx::HashCrc32::Count];
uint32_t adler32;
uint32_t murmur2a;
uint32_t murmur3;
const char* input;
};
const HashTest s_hashTest[] =
{
// Crc32 | Adler32 | Murmur2A | Input
// Ieee Castagnoli Koopman | | |
{ { 0, 0, 0 }, 1, 0, "" },
{ { 0xe8b7be43, 0xc1d04330, 0x0da2aa8a }, 0x00620062, 0x0803888b, "a" },
{ { 0x9e83486d, 0xe2a22936, 0x31ec935a }, 0x012600c4, 0x618515af, "ab" },
{ { 0xc340daab, 0x49e1b6e3, 0x945a1e78 }, 0x06060205, 0x94e3dc4d, "abvgd" },
{ { 0x07642fe2, 0x45a04162, 0x3d4bf72d }, 0x020a00d6, 0xe602fc07, "1389" },
{ { 0x26d75737, 0xb73d7b80, 0xd524eb40 }, 0x04530139, 0x58d37863, "555333" },
// Crc32 | Adler32 | Murmur2A | Murmur3 | Input
// Ieee Castagnoli Koopman | | | |
{ { 0, 0, 0 }, 1, 0, 0, "" },
{ { 0xe8b7be43, 0xc1d04330, 0x0da2aa8a }, 0x00620062, 0x0803888b, 0x3c2569b2, "a" },
{ { 0x9e83486d, 0xe2a22936, 0x31ec935a }, 0x012600c4, 0x618515af, 0x9bbfd75f, "ab" },
{ { 0xc340daab, 0x49e1b6e3, 0x945a1e78 }, 0x06060205, 0x94e3dc4d, 0x1e661875, "abvgd" },
{ { 0x07642fe2, 0x45a04162, 0x3d4bf72d }, 0x020a00d6, 0xe602fc07, 0x7af40d31, "1389" },
{ { 0x26d75737, 0xb73d7b80, 0xd524eb40 }, 0x04530139, 0x58d37863, 0x0c090160, "555333" },
};
TEST_CASE("HashCrc32", "")
TEST_CASE("HashCrc32", "[hash]")
{
#if 0
makeCrcTable(0xedb88320);
@@ -71,7 +72,7 @@ TEST_CASE("HashCrc32", "")
}
}
TEST_CASE("HashAdler32", "")
TEST_CASE("HashAdler32", "[hash]")
{
for (uint32_t ii = 0; ii < BX_COUNTOF(s_hashTest); ++ii)
{
@@ -84,6 +85,9 @@ TEST_CASE("HashAdler32", "")
}
}
namespace
{
/*-----------------------------------------------------------------------------
// MurmurHash2A, by Austin Appleby
//
@@ -96,8 +100,6 @@ TEST_CASE("HashAdler32", "")
// more amenable to incremental implementations.
*/
#define mmix(h,k) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
uint32_t MurmurHash2A(const void * key, int len, uint32_t seed = 0)
{
const uint32_t m = 0x5bd1e995;
@@ -108,6 +110,8 @@ uint32_t MurmurHash2A(const void * key, int len, uint32_t seed = 0)
uint32_t h = seed;
#define mmix(h,k) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
while(len >= 4)
{
uint32_t k = *(uint32_t*)data;
@@ -130,6 +134,8 @@ uint32_t MurmurHash2A(const void * key, int len, uint32_t seed = 0)
mmix(h,t);
mmix(h,l);
#undef mmix
h ^= h >> 13;
h *= m;
h ^= h >> 15;
@@ -137,7 +143,9 @@ uint32_t MurmurHash2A(const void * key, int len, uint32_t seed = 0)
return h;
}
TEST_CASE("HashMurmur2A", "")
} // namespace
TEST_CASE("HashMurmur2A", "[hash]")
{
uint32_t seed = 0;
@@ -154,11 +162,117 @@ TEST_CASE("HashMurmur2A", "")
}
}
TEST_CASE("HashMurmur2A-Separate-Add", "")
TEST_CASE("HashMurmur2A-Separate-Add", "[hash]")
{
bx::HashMurmur2A hash;
hash.begin();
hash.add("0123456789");
hash.add("abvgd012345");
REQUIRE(MurmurHash2A("0123456789abvgd012345", 21) == hash.end() );
hash.add("1389");
hash.add("555333");
REQUIRE(MurmurHash2A("0123456789abvgd0123451389555333", 31) == hash.end() );
}
namespace
{
BX_FORCE_INLINE uint32_t fmix32 ( uint32_t h )
{
h ^= h >> 16;
h *= 0x85ebca6b;
h ^= h >> 13;
h *= 0xc2b2ae35;
h ^= h >> 16;
return h;
}
inline uint32_t rotl32 ( uint32_t x, int8_t r )
{
return (x << r) | (x >> (32 - r));
}
uint32_t MurmurHash3_x86_32(const void * key, int len, uint32_t seed)
{
const uint8_t * data = (const uint8_t*)key;
const int nblocks = len / 4;
uint32_t h1 = seed;
const uint32_t c1 = 0xcc9e2d51;
const uint32_t c2 = 0x1b873593;
//----------
// body
const uint32_t * blocks = (const uint32_t *)(data + nblocks*4);
for(int i = -nblocks; i; i++)
{
uint32_t k1 = blocks[i];
k1 *= c1;
k1 = rotl32(k1,15);
k1 *= c2;
h1 ^= k1;
h1 = rotl32(h1,13);
h1 = h1*5+0xe6546b64;
}
//----------
// tail
const uint8_t * tail = (const uint8_t*)(data + nblocks*4);
uint32_t k1 = 0;
switch(len & 3)
{
case 3: k1 ^= tail[2] << 16; BX_FALLTHROUGH;
case 2: k1 ^= tail[1] << 8; BX_FALLTHROUGH;
case 1: k1 ^= tail[0];
k1 *= c1; k1 = rotl32(k1,15); k1 *= c2; h1 ^= k1;
};
//----------
// finalization
h1 ^= len;
h1 = fmix32(h1);
return h1;
}
} // namespace
TEST_CASE("HashMurmur3", "[hash]")
{
uint32_t seed = 0;
for (uint32_t ii = 0; ii < BX_COUNTOF(s_hashTest); ++ii)
{
const HashTest& test = s_hashTest[ii];
bx::HashMurmur3 hash;
hash.begin(seed);
hash.add(test.input, bx::strLen(test.input) );
const uint32_t result = hash.end();
const uint32_t sanity = MurmurHash3_x86_32(test.input, bx::strLen(test.input), seed);
REQUIRE(test.murmur3 == result);
REQUIRE(test.murmur3 == sanity);
}
}
TEST_CASE("HashMurmur3-Separate-Add", "[hash]")
{
bx::HashMurmur3 hash;
hash.begin();
hash.add("0123456789");
hash.add("abvgd012345");
hash.add("1389");
hash.add("555333");
REQUIRE(MurmurHash3_x86_32("0123456789abvgd0123451389555333", 31, 0) == hash.end() );
}