Files
space_game/3rdparty/emilib/hash_set.hpp
2026-01-29 17:06:34 +01:00

587 lines
13 KiB
C++

// By Emil Ernerfeldt 2014-2016
// LICENSE:
// This software is dual-licensed to the public domain and under the following
// license: you are granted a perpetual, irrevocable license to copy, modify,
// publish, and distribute this file as you see fit.
#pragma once
#include <cstdlib> // malloc
#include <iterator>
#include <utility>
#include <assert.h>
namespace emilib {
/// like std::equal_to but no need to `#include <functional>`
template<typename T>
struct HashSetEqualTo
{
constexpr bool operator()(const T& lhs, const T& rhs) const
{
return lhs == rhs;
}
};
/// A cache-friendly hash set with open addressing, linear probing and power-of-two capacity
template <typename KeyT, typename HashT = std::hash<KeyT>, typename EqT = HashSetEqualTo<KeyT>>
class HashSet
{
private:
using MyType = HashSet<KeyT, HashT, EqT>;
public:
using size_type = size_t;
using value_type = KeyT;
using reference = KeyT&;
using const_reference = const KeyT&;
class iterator
{
public:
using iterator_category = std::forward_iterator_tag;
using difference_type = size_t;
using distance_type = size_t;
using value_type = KeyT;
using pointer = value_type*;
using reference = value_type&;
iterator() { }
iterator(MyType* hash_set, size_t bucket) : _set(hash_set), _bucket(bucket)
{
}
iterator& operator++()
{
this->goto_next_element();
return *this;
}
iterator operator++(int)
{
size_t old_index = _bucket;
this->goto_next_element();
return iterator(_set, old_index);
}
reference operator*() const
{
return _set->_keys[_bucket];
}
pointer operator->() const
{
return _set->_keys + _bucket;
}
bool operator==(const iterator& rhs) const
{
DCHECK_EQ_F(_set, rhs._set);
return this->_bucket == rhs._bucket;
}
bool operator!=(const iterator& rhs) const
{
DCHECK_EQ_F(_set, rhs._set);
return this->_bucket != rhs._bucket;
}
private:
void goto_next_element()
{
DCHECK_LT_F(_bucket, _set->_num_buckets);
do {
_bucket++;
} while (_bucket < _set->_num_buckets && _set->_states[_bucket] != State::FILLED);
}
//private:
// friend class MyType;
public:
MyType* _set;
size_t _bucket;
};
class const_iterator
{
public:
using iterator_category = std::forward_iterator_tag;
using difference_type = size_t;
using distance_type = size_t;
using value_type = const KeyT;
using pointer = value_type*;
using reference = value_type&;
const_iterator() { }
const_iterator(iterator proto) : _set(proto._set), _bucket(proto._bucket)
{
}
const_iterator(const MyType* hash_set, size_t bucket) : _set(hash_set), _bucket(bucket)
{
}
const_iterator& operator++()
{
this->goto_next_element();
return *this;
}
const_iterator operator++(int)
{
size_t old_index = _bucket;
this->goto_next_element();
return const_iterator(_set, old_index);
}
reference operator*() const
{
return _set->_keys[_bucket];
}
pointer operator->() const
{
return _set->_keys + _bucket;
}
bool operator==(const const_iterator& rhs) const
{
DCHECK_EQ_F(_set, rhs._set);
return this->_bucket == rhs._bucket;
}
bool operator!=(const const_iterator& rhs) const
{
DCHECK_EQ_F(_set, rhs._set);
return this->_bucket != rhs._bucket;
}
private:
void goto_next_element()
{
DCHECK_LT_F(_bucket, _set->_num_buckets);
do {
_bucket++;
} while (_bucket < _set->_num_buckets && _set->_states[_bucket] != State::FILLED);
}
//private:
// friend class MyType;
public:
const MyType* _set;
size_t _bucket;
};
// ------------------------------------------------------------------------
HashSet() = default;
HashSet(const HashSet& other)
{
reserve(other.size());
insert(other.cbegin(), other.cend());
}
HashSet(HashSet&& other)
{
*this = std::move(other);
}
HashSet& operator=(const HashSet& other)
{
clear();
reserve(other.size());
insert(other.cbegin(), other.cend());
return *this;
}
void operator=(HashSet&& other)
{
this->swap(other);
}
~HashSet()
{
for (size_t bucket=0; bucket<_num_buckets; ++bucket) {
if (_states[bucket] == State::FILLED) {
_keys[bucket].~KeyT();
}
}
free(_states);
free(_keys);
}
void swap(HashSet& other)
{
std::swap(_hasher, other._hasher);
std::swap(_eq, other._eq);
std::swap(_states, other._states);
std::swap(_keys, other._keys);
std::swap(_num_buckets, other._num_buckets);
std::swap(_num_filled, other._num_filled);
std::swap(_max_probe_length, other._max_probe_length);
std::swap(_mask, other._mask);
}
// -------------------------------------------------------------
iterator begin()
{
size_t bucket = 0;
while (bucket<_num_buckets && _states[bucket] != State::FILLED) {
++bucket;
}
return iterator(this, bucket);
}
const_iterator cbegin() const
{
size_t bucket = 0;
while (bucket<_num_buckets && _states[bucket] != State::FILLED) {
++bucket;
}
return const_iterator(this, bucket);
}
const_iterator begin() const
{
return cbegin();
}
iterator end()
{
return iterator(this, _num_buckets);
}
const_iterator cend() const
{
return const_iterator(this, _num_buckets);
}
const_iterator end() const
{
return cend();
}
size_t size() const
{
return _num_filled;
}
bool empty() const
{
return _num_filled==0;
}
// Returns the number of buckets.
size_t bucket_count() const
{
return _num_buckets;
}
/// Returns average number of elements per bucket.
float load_factor() const
{
return static_cast<float>(_num_filled) / static_cast<float>(_num_buckets);
}
// ------------------------------------------------------------
iterator find(const KeyT& key)
{
auto bucket = this->find_filled_bucket(key);
if (bucket == (size_t)-1) {
return this->end();
}
return iterator(this, bucket);
}
const_iterator find(const KeyT& key) const
{
auto bucket = this->find_filled_bucket(key);
if (bucket == (size_t)-1) {
return this->end();
}
return const_iterator(this, bucket);
}
bool contains(const KeyT& k) const
{
return find_filled_bucket(k) != (size_t)-1;
}
size_t count(const KeyT& k) const
{
return find_filled_bucket(k) != (size_t)-1 ? 1 : 0;
}
// -----------------------------------------------------
/// Insert an element, unless it already exists.
/// Returns a pair consisting of an iterator to the inserted element
/// (or to the element that prevented the insertion)
/// and a bool denoting whether the insertion took place.
std::pair<iterator, bool> insert(const KeyT& key)
{
check_expand_need();
auto bucket = find_or_allocate(key);
if (_states[bucket] == State::FILLED) {
return { iterator(this, bucket), false };
} else {
_states[bucket] = State::FILLED;
new(_keys + bucket) KeyT(key);
_num_filled++;
return { iterator(this, bucket), true };
}
}
/// Insert an element, unless it already exists.
/// Returns a pair consisting of an iterator to the inserted element
/// (or to the element that prevented the insertion)
/// and a bool denoting whether the insertion took place.
std::pair<iterator, bool> insert(KeyT&& key)
{
check_expand_need();
auto bucket = find_or_allocate(key);
if (_states[bucket] == State::FILLED) {
return { iterator(this, bucket), false };
} else {
_states[bucket] = State::FILLED;
new(_keys + bucket) KeyT(std::move(key));
_num_filled++;
return { iterator(this, bucket), true };
}
}
template<class... Args>
std::pair<iterator, bool> emplace(Args&&... args)
{
return insert(KeyT(std::forward<Args>(args)...));
}
void insert(const_iterator begin, const_iterator end)
{
// TODO: reserve space exactly once.
for (; begin != end; ++begin) {
insert(*begin);
}
}
/// Same as above, but contains(key) MUST be false
void insert_unique(KeyT key)
{
DCHECK_F(!contains(key));
check_expand_need();
auto bucket = find_empty_bucket(key);
_states[bucket] = State::FILLED;
new(_keys + bucket) KeyT(std::move(key));
_num_filled++;
}
// -------------------------------------------------------
/// Erase an element from the hash set.
/// return false if element was not found.
bool erase(const KeyT& key)
{
auto bucket = find_filled_bucket(key);
if (bucket != (size_t)-1) {
_states[bucket] = State::ACTIVE;
_keys[bucket].~KeyT();
_num_filled -= 1;
return true;
} else {
return false;
}
}
/// Erase an element using an iterator.
/// Returns an iterator to the next element (or end()).
iterator erase(iterator it)
{
DCHECK_EQ_F(it._set, this);
DCHECK_LT_F(it._bucket, _num_buckets);
_states[it._bucket] = State::ACTIVE;
_keys[it._bucket].~KeyT();
_num_filled -= 1;
return ++it;
}
/// Remove all elements, keeping full capacity.
void clear()
{
for (size_t bucket=0; bucket<_num_buckets; ++bucket) {
if (_states[bucket] == State::FILLED) {
_states[bucket] = State::INACTIVE;
_keys[bucket].~KeyT();
}
}
_num_filled = 0;
_max_probe_length = -1;
}
/// Make room for this many elements
void reserve(size_t num_elems)
{
size_t required_buckets = num_elems + num_elems/2 + 1;
if (required_buckets <= _num_buckets) {
return;
}
size_t num_buckets = 4;
while (num_buckets < required_buckets) { num_buckets *= 2; }
auto new_states = (State*)malloc(num_buckets * sizeof(State));
auto new_keys = (KeyT*)malloc(num_buckets * sizeof(KeyT));
if (!new_states || !new_keys) {
free(new_states);
free(new_keys);
// throw std::bad_alloc();
std::abort();
}
// auto old_num_filled = _num_filled;
auto old_num_buckets = _num_buckets;
auto old_states = _states;
auto old_keys = _keys;
_num_filled = 0;
_num_buckets = num_buckets;
_mask = _num_buckets - 1;
_states = new_states;
_keys = new_keys;
std::fill_n(_states, num_buckets, State::INACTIVE);
_max_probe_length = -1;
for (size_t src_bucket=0; src_bucket<old_num_buckets; src_bucket++) {
if (old_states[src_bucket] == State::FILLED) {
auto& src = old_keys[src_bucket];
auto dst_bucket = find_empty_bucket(src);
DCHECK_NE_F(dst_bucket, (size_t)-1);
DCHECK_NE_F(_states[dst_bucket], State::FILLED);
_states[dst_bucket] = State::FILLED;
new(_keys + dst_bucket) KeyT(std::move(src));
_num_filled += 1;
src.~KeyT();
}
}
// DCHECK_EQ_F(old_num_filled, _num_filled);
free(old_states);
free(old_keys);
}
private:
// Can we fit another element?
void check_expand_need()
{
reserve(_num_filled + 1);
}
// Find the bucket with this key, or return (size_t)-1
size_t find_filled_bucket(const KeyT& key) const
{
if (empty()) { return (size_t)-1; } // Optimization
auto hash_value = _hasher(key);
for (int offset=0; offset<=_max_probe_length; ++offset) {
auto bucket = (hash_value + offset) & _mask;
if (_states[bucket] == State::FILLED) {
if (_eq(_keys[bucket], key)) {
return bucket;
}
} else if (_states[bucket] == State::INACTIVE) {
return (size_t)-1; // End of the chain!
}
}
return (size_t)-1;
}
// Find the bucket with this key, or return a good empty bucket to place the key in.
// In the latter case, the bucket is expected to be filled.
size_t find_or_allocate(const KeyT& key)
{
auto hash_value = _hasher(key);
size_t hole = (size_t)-1;
int offset=0;
for (; offset<=_max_probe_length; ++offset) {
auto bucket = (hash_value + offset) & _mask;
if (_states[bucket] == State::FILLED) {
if (_eq(_keys[bucket], key)) {
return bucket;
}
} else if (_states[bucket] == State::INACTIVE) {
return bucket;
} else {
// ACTIVE: keep searching
if (hole == (size_t)-1) {
hole = bucket;
}
}
}
// No key found - but maybe a hole for it
assert(offset == _max_probe_length+1);
if (hole != (size_t)-1) {
return hole;
}
// No hole found within _max_probe_length
for (; ; ++offset) {
auto bucket = (hash_value + offset) & _mask;
if (_states[bucket] != State::FILLED) {
_max_probe_length = offset;
return bucket;
}
}
}
// key is not in this map. Find a place to put it.
size_t find_empty_bucket(const KeyT& key)
{
auto hash_value = _hasher(key);
for (int offset=0; ; ++offset) {
auto bucket = (hash_value + offset) & _mask;
if (_states[bucket] != State::FILLED) {
if (offset > _max_probe_length) {
_max_probe_length = offset;
}
return bucket;
}
}
}
private:
enum class State : uint8_t
{
INACTIVE, // Never been touched
ACTIVE, // Is inside a search-chain, but is empty
FILLED // Is set with key/value
};
HashT _hasher;
EqT _eq;
State* _states = nullptr;
KeyT* _keys = nullptr;
size_t _num_buckets = 0;
size_t _num_filled = 0;
int _max_probe_length = -1; // Our longest bucket-brigade is this long. ONLY when we have zero elements is this ever negative (-1).
size_t _mask = 0; // _num_buckets minus one
};
} // namespace emilib