Start work on hash_set
This commit is contained in:
@ -12,6 +12,7 @@ cc_library(
|
||||
"format.hpp",
|
||||
"functional.hpp",
|
||||
"hash.hpp",
|
||||
"hash_set.hpp",
|
||||
"integers.hpp",
|
||||
"io.hpp",
|
||||
"layout.hpp",
|
||||
@ -59,6 +60,7 @@ cc_library(
|
||||
"format",
|
||||
"functional",
|
||||
"hash",
|
||||
"hash_set",
|
||||
"integers",
|
||||
"maybe_uninit",
|
||||
"meta",
|
||||
|
255
asl/hash_set.hpp
Normal file
255
asl/hash_set.hpp
Normal file
@ -0,0 +1,255 @@
|
||||
#pragma once
|
||||
|
||||
#include "asl/annotations.hpp"
|
||||
#include "asl/meta.hpp"
|
||||
#include "asl/utility.hpp"
|
||||
#include "asl/maybe_uninit.hpp"
|
||||
#include "asl/hash.hpp"
|
||||
#include "asl/allocator.hpp"
|
||||
#include "asl/memory.hpp"
|
||||
|
||||
namespace asl
|
||||
{
|
||||
|
||||
template<is_object T, allocator Allocator = DefaultAllocator>
|
||||
requires hashable<T> && move_constructible<T> && move_assignable<T> && equality_comparable<T>
|
||||
class hash_set
|
||||
{
|
||||
static constexpr uint8_t kHasValue = 0x80;
|
||||
static constexpr uint8_t kHashMask = 0x7f;
|
||||
static constexpr uint8_t kEmpty = 0x00;
|
||||
static constexpr uint8_t kTombstone = 0x01;
|
||||
|
||||
static constexpr isize_t kMinCapacity = 8;
|
||||
|
||||
// Important so we can memzero the tags
|
||||
static_assert(kEmpty == 0);
|
||||
|
||||
uint8_t* m_tags{};
|
||||
maybe_uninit<T>* m_values{};
|
||||
isize_t m_capacity{};
|
||||
isize_t m_size{};
|
||||
|
||||
ASL_NO_UNIQUE_ADDRESS Allocator m_allocator;
|
||||
|
||||
constexpr isize_t max_size() const
|
||||
{
|
||||
// Max load factor is 75%
|
||||
return (m_capacity >> 1) + (m_capacity >> 2);
|
||||
}
|
||||
|
||||
static void insert_inner(
|
||||
T&& value,
|
||||
uint8_t* tags,
|
||||
maybe_uninit<T>* values,
|
||||
isize_t capacity,
|
||||
isize_t* size)
|
||||
{
|
||||
ASL_ASSERT(*size < capacity);
|
||||
ASL_ASSERT(is_pow2(capacity));
|
||||
|
||||
const isize_t capacity_mask = capacity - 1;
|
||||
const uint64_t hash = hash_value(value);
|
||||
const uint8_t tag = static_cast<uint8_t>(hash & kHashMask) | kHasValue;
|
||||
const auto starting_index = static_cast<isize_t>(hash >> 7) & capacity_mask;
|
||||
|
||||
isize_t first_available_index = -1;
|
||||
isize_t already_present_index = -1;
|
||||
|
||||
// NOLINTBEGIN(*-pointer-arithmetic)
|
||||
|
||||
for (
|
||||
isize_t i = starting_index;
|
||||
i != starting_index || first_available_index < 0;
|
||||
i = (i + 1) & capacity_mask)
|
||||
{
|
||||
uint8_t t = tags[i];
|
||||
|
||||
if ((t & kHasValue) == 0 && first_available_index < 0)
|
||||
{
|
||||
first_available_index = i;
|
||||
}
|
||||
|
||||
if (t == tag && values[i].as_init_unsafe() == value)
|
||||
{
|
||||
ASL_ASSERT(already_present_index < 0);
|
||||
already_present_index = i;
|
||||
if (first_available_index < 0)
|
||||
{
|
||||
first_available_index = i;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (t == kEmpty) { break; }
|
||||
}
|
||||
|
||||
ASL_ASSERT(first_available_index >= 0 || already_present_index >= 0);
|
||||
|
||||
if (already_present_index == first_available_index)
|
||||
{
|
||||
ASL_ASSERT((tags[already_present_index] & kHasValue) != 0);
|
||||
values[already_present_index].assign_unsafe(ASL_MOVE(value));
|
||||
}
|
||||
else
|
||||
{
|
||||
ASL_ASSERT((tags[first_available_index] & kHasValue) == 0);
|
||||
if (already_present_index >= 0)
|
||||
{
|
||||
ASL_ASSERT((tags[already_present_index] & kHasValue) != 0);
|
||||
values[already_present_index].destroy_unsafe();
|
||||
tags[already_present_index] = kTombstone;
|
||||
}
|
||||
else
|
||||
{
|
||||
*size += 1;
|
||||
}
|
||||
|
||||
values[first_available_index].construct_unsafe(ASL_MOVE(value));
|
||||
tags[first_available_index] = tag;
|
||||
}
|
||||
|
||||
// NOLINTEND(*-pointer-arithmetic)
|
||||
}
|
||||
|
||||
isize_t find_slot(const T& value) const
|
||||
{
|
||||
ASL_ASSERT(is_pow2(m_capacity));
|
||||
|
||||
const isize_t capacity_mask = m_capacity - 1;
|
||||
const uint64_t hash = hash_value(value);
|
||||
const uint8_t tag = static_cast<uint8_t>(hash & kHashMask) | kHasValue;
|
||||
const auto starting_index = static_cast<isize_t>(hash >> 7) & capacity_mask;
|
||||
|
||||
// NOLINTBEGIN(*-pointer-arithmetic)
|
||||
|
||||
isize_t i = starting_index;
|
||||
do
|
||||
{
|
||||
const uint8_t t = m_tags[i];
|
||||
|
||||
if (t == tag && m_values[i].as_init_unsafe() == value) { return i; }
|
||||
if (t == kEmpty) { break; }
|
||||
|
||||
i = (i + 1) & capacity_mask;
|
||||
} while (i != starting_index);
|
||||
|
||||
// NOLINTEND(*-pointer-arithmetic)
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
void grow_and_rehash()
|
||||
{
|
||||
isize_t new_capacity = max(kMinCapacity, m_capacity * 2);
|
||||
|
||||
auto* new_tags = reinterpret_cast<uint8_t*>(m_allocator.alloc(layout::array<uint8_t>(new_capacity)));
|
||||
auto* new_values = reinterpret_cast<maybe_uninit<T>*>(m_allocator.alloc(layout::array<maybe_uninit<T>>(new_capacity)));
|
||||
asl::memzero(new_tags, new_capacity);
|
||||
|
||||
isize_t new_size = 0;
|
||||
|
||||
// NOLINTBEGIN(*-pointer-arithmetic)
|
||||
for (isize_t i = 0; i < m_capacity; ++i)
|
||||
{
|
||||
if ((m_tags[i] & kHasValue) == 0) { continue; }
|
||||
|
||||
insert_inner(ASL_MOVE(m_values[i].as_init_unsafe()), new_tags, new_values, new_capacity, &new_size);
|
||||
|
||||
// Destroy now so that destroy() has less things to do
|
||||
m_values[i].destroy_unsafe();
|
||||
m_tags[i] = kTombstone;
|
||||
}
|
||||
// NOLINTEND(*-pointer-arithmetic)
|
||||
|
||||
ASL_ASSERT(new_size == m_size);
|
||||
|
||||
m_size = 0;
|
||||
destroy();
|
||||
|
||||
m_tags = new_tags;
|
||||
m_values = new_values;
|
||||
m_capacity = new_capacity;
|
||||
m_size = new_size;
|
||||
}
|
||||
|
||||
void clear_values()
|
||||
{
|
||||
if constexpr (!trivially_destructible<T>)
|
||||
{
|
||||
if (m_size > 0)
|
||||
{
|
||||
for (isize_t i = 0; i < m_capacity; ++i)
|
||||
{
|
||||
if ((m_tags[i] & kHasValue) != 0) // NOLINT(*-pointer-arithmetic)
|
||||
{
|
||||
m_values[i].destroy_unsafe(); // NOLINT(*-pointer-arithmetic)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
constexpr hash_set() requires default_constructible<Allocator>
|
||||
: m_allocator{}
|
||||
{}
|
||||
|
||||
explicit constexpr hash_set(Allocator allocator)
|
||||
: m_allocator{ASL_MOVE(allocator)}
|
||||
{}
|
||||
|
||||
// @Todo Copy, move
|
||||
|
||||
~hash_set()
|
||||
{
|
||||
destroy();
|
||||
}
|
||||
|
||||
void destroy()
|
||||
{
|
||||
clear_values();
|
||||
m_size = 0;
|
||||
|
||||
if (m_capacity > 0)
|
||||
{
|
||||
m_allocator.dealloc(m_tags, layout::array<uint8_t>(m_capacity));
|
||||
m_allocator.dealloc(m_values, layout::array<maybe_uninit<T>>(m_capacity));
|
||||
m_capacity = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void clear()
|
||||
{
|
||||
clear_values();
|
||||
m_size = 0;
|
||||
|
||||
if (m_capacity > 0)
|
||||
{
|
||||
asl::memzero(m_tags, m_capacity);
|
||||
}
|
||||
}
|
||||
|
||||
constexpr isize_t size() const { return m_size; }
|
||||
|
||||
template<typename... Args>
|
||||
void insert(Args&&... args)
|
||||
requires constructible_from<T, Args&&...>
|
||||
{
|
||||
if (m_size >= max_size())
|
||||
{
|
||||
grow_and_rehash();
|
||||
}
|
||||
ASL_ASSERT(m_size < max_size());
|
||||
insert_inner(ASL_MOVE(T{ASL_FWD(args)...}), m_tags, m_values, m_capacity, &m_size);
|
||||
}
|
||||
|
||||
bool contains(const T& value) const
|
||||
{
|
||||
if (m_size == 0) { return false; }
|
||||
return find_slot(value) >= 0;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace asl
|
||||
|
@ -23,6 +23,11 @@ constexpr void memcpy(void* dst, const void* src, isize_t size)
|
||||
__builtin_memcpy(dst, src, static_cast<size_t>(size));
|
||||
}
|
||||
|
||||
inline void memzero(void* dst, isize_t size)
|
||||
{
|
||||
__builtin_memset(dst, 0, static_cast<size_t>(size));
|
||||
}
|
||||
|
||||
constexpr isize_t strlen(const char* s)
|
||||
{
|
||||
return static_cast<isize_t>(__builtin_strlen(s));
|
||||
|
59
asl/tests/hash_set_tests.cpp
Normal file
59
asl/tests/hash_set_tests.cpp
Normal file
@ -0,0 +1,59 @@
|
||||
#include "asl/hash_set.hpp"
|
||||
#include "asl/testing/testing.hpp"
|
||||
#include "asl/string.hpp"
|
||||
#include "asl/string_view.hpp"
|
||||
|
||||
ASL_TEST(empty)
|
||||
{
|
||||
asl::hash_set<int> set;
|
||||
|
||||
ASL_TEST_EXPECT(set.size() == 0);
|
||||
|
||||
for (int i = 0; i < 100; ++i)
|
||||
{
|
||||
ASL_TEST_EXPECT(!set.contains(i));
|
||||
}
|
||||
}
|
||||
|
||||
ASL_TEST(a_bunch_of_strings)
|
||||
{
|
||||
asl::hash_set<asl::string<>> set;
|
||||
|
||||
set.insert("Hello, world!");
|
||||
ASL_TEST_EXPECT(set.size() == 1);
|
||||
|
||||
set.insert("Hello, puppy!");
|
||||
ASL_TEST_EXPECT(set.size() == 2);
|
||||
|
||||
set.insert("Hello, puppy!");
|
||||
ASL_TEST_EXPECT(set.size() == 2);
|
||||
|
||||
ASL_TEST_EXPECT(set.contains("Hello, world!"_sv));
|
||||
ASL_TEST_EXPECT(set.contains("Hello, puppy!"_sv));
|
||||
ASL_TEST_EXPECT(!set.contains("Hello, Steven!"_sv));
|
||||
|
||||
}
|
||||
|
||||
ASL_TEST(a_bunch_of_ints)
|
||||
{
|
||||
asl::hash_set<int> set;
|
||||
|
||||
int count = 3000;
|
||||
|
||||
for (int i = 0; i < count; ++i)
|
||||
{
|
||||
set.insert(i);
|
||||
}
|
||||
|
||||
ASL_TEST_EXPECT(set.size() == count);
|
||||
|
||||
for (int i = 0; i < count * 2; ++i)
|
||||
{
|
||||
ASL_TEST_EXPECT(set.contains(i) == (i < count));
|
||||
}
|
||||
}
|
||||
|
||||
// @Todo Remove elements
|
||||
|
||||
// @Todo Test destructors
|
||||
|
@ -37,11 +37,17 @@ constexpr U bit_cast(T value) requires (size_of<T> == size_of<U>)
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
T min(T a, T b)
|
||||
constexpr T min(T a, T b)
|
||||
{
|
||||
return (a <= b) ? a : b;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
constexpr T max(T a, T b)
|
||||
{
|
||||
return (a >= b) ? a : b;
|
||||
}
|
||||
|
||||
constexpr uint64_t round_up_pow2(uint64_t v)
|
||||
{
|
||||
ASL_ASSERT(v <= 0x8000'0000'0000'0000);
|
||||
@ -58,6 +64,11 @@ constexpr uint64_t round_up_pow2(uint64_t v)
|
||||
return v + 1;
|
||||
}
|
||||
|
||||
constexpr bool is_pow2(isize_t v)
|
||||
{
|
||||
return v > 0 && ((v - 1) & v) == 0;
|
||||
}
|
||||
|
||||
#define ASL_DELETE_COPY(T) \
|
||||
T(const T&) = delete; \
|
||||
T& operator=(const T&) = delete;
|
||||
|
Reference in New Issue
Block a user