diff options
author | Steven Le Rouzic <steven.lerouzic@gmail.com> | 2025-01-14 00:01:55 +0100 |
---|---|---|
committer | Steven Le Rouzic <steven.lerouzic@gmail.com> | 2025-01-14 00:02:02 +0100 |
commit | 1c00f6ed444dab15430a955e41cf155049e3cec4 (patch) | |
tree | 1be0c44df0e102dfa577f4c11abc56ef4fe57bcc /asl | |
parent | 5d6e9aac39551db657df5f48ae2baa7b77c5c093 (diff) |
Start work on hash_set
Diffstat (limited to 'asl')
-rw-r--r-- | asl/BUILD.bazel | 2 | ||||
-rw-r--r-- | asl/hash_set.hpp | 255 | ||||
-rw-r--r-- | asl/memory.hpp | 5 | ||||
-rw-r--r-- | asl/tests/hash_set_tests.cpp | 59 | ||||
-rw-r--r-- | asl/utility.hpp | 13 |
5 files changed, 333 insertions, 1 deletions
diff --git a/asl/BUILD.bazel b/asl/BUILD.bazel index 5621cbd..468ad58 100644 --- a/asl/BUILD.bazel +++ b/asl/BUILD.bazel @@ -12,6 +12,7 @@ cc_library( "format.hpp",
"functional.hpp",
"hash.hpp",
+ "hash_set.hpp",
"integers.hpp",
"io.hpp",
"layout.hpp",
@@ -59,6 +60,7 @@ cc_library( "format",
"functional",
"hash",
+ "hash_set",
"integers",
"maybe_uninit",
"meta",
diff --git a/asl/hash_set.hpp b/asl/hash_set.hpp new file mode 100644 index 0000000..22e47e0 --- /dev/null +++ b/asl/hash_set.hpp @@ -0,0 +1,255 @@ +#pragma once
+
+#include "asl/annotations.hpp"
+#include "asl/meta.hpp"
+#include "asl/utility.hpp"
+#include "asl/maybe_uninit.hpp"
+#include "asl/hash.hpp"
+#include "asl/allocator.hpp"
+#include "asl/memory.hpp"
+
+namespace asl
+{
+
+template<is_object T, allocator Allocator = DefaultAllocator>
+requires hashable<T> && move_constructible<T> && move_assignable<T> && equality_comparable<T>
+class hash_set
+{
+ static constexpr uint8_t kHasValue = 0x80;
+ static constexpr uint8_t kHashMask = 0x7f;
+ static constexpr uint8_t kEmpty = 0x00;
+ static constexpr uint8_t kTombstone = 0x01;
+
+ static constexpr isize_t kMinCapacity = 8;
+
+ // Important so we can memzero the tags
+ static_assert(kEmpty == 0);
+
+ uint8_t* m_tags{};
+ maybe_uninit<T>* m_values{};
+ isize_t m_capacity{};
+ isize_t m_size{};
+
+ ASL_NO_UNIQUE_ADDRESS Allocator m_allocator;
+
+ constexpr isize_t max_size() const
+ {
+ // Max load factor is 75%
+ return (m_capacity >> 1) + (m_capacity >> 2);
+ }
+
+ static void insert_inner(
+ T&& value,
+ uint8_t* tags,
+ maybe_uninit<T>* values,
+ isize_t capacity,
+ isize_t* size)
+ {
+ ASL_ASSERT(*size < capacity);
+ ASL_ASSERT(is_pow2(capacity));
+
+ const isize_t capacity_mask = capacity - 1;
+ const uint64_t hash = hash_value(value);
+ const uint8_t tag = static_cast<uint8_t>(hash & kHashMask) | kHasValue;
+ const auto starting_index = static_cast<isize_t>(hash >> 7) & capacity_mask;
+
+ isize_t first_available_index = -1;
+ isize_t already_present_index = -1;
+
+ // NOLINTBEGIN(*-pointer-arithmetic)
+
+ for (
+ isize_t i = starting_index;
+ i != starting_index || first_available_index < 0;
+ i = (i + 1) & capacity_mask)
+ {
+ uint8_t t = tags[i];
+
+ if ((t & kHasValue) == 0 && first_available_index < 0)
+ {
+ first_available_index = i;
+ }
+
+ if (t == tag && values[i].as_init_unsafe() == value)
+ {
+ ASL_ASSERT(already_present_index < 0);
+ already_present_index = i;
+ if (first_available_index < 0)
+ {
+ first_available_index = i;
+ }
+ break;
+ }
+
+ if (t == kEmpty) { break; }
+ }
+
+ ASL_ASSERT(first_available_index >= 0 || already_present_index >= 0);
+
+ if (already_present_index == first_available_index)
+ {
+ ASL_ASSERT((tags[already_present_index] & kHasValue) != 0);
+ values[already_present_index].assign_unsafe(ASL_MOVE(value));
+ }
+ else
+ {
+ ASL_ASSERT((tags[first_available_index] & kHasValue) == 0);
+ if (already_present_index >= 0)
+ {
+ ASL_ASSERT((tags[already_present_index] & kHasValue) != 0);
+ values[already_present_index].destroy_unsafe();
+ tags[already_present_index] = kTombstone;
+ }
+ else
+ {
+ *size += 1;
+ }
+
+ values[first_available_index].construct_unsafe(ASL_MOVE(value));
+ tags[first_available_index] = tag;
+ }
+
+ // NOLINTEND(*-pointer-arithmetic)
+ }
+
+ isize_t find_slot(const T& value) const
+ {
+ ASL_ASSERT(is_pow2(m_capacity));
+
+ const isize_t capacity_mask = m_capacity - 1;
+ const uint64_t hash = hash_value(value);
+ const uint8_t tag = static_cast<uint8_t>(hash & kHashMask) | kHasValue;
+ const auto starting_index = static_cast<isize_t>(hash >> 7) & capacity_mask;
+
+ // NOLINTBEGIN(*-pointer-arithmetic)
+
+ isize_t i = starting_index;
+ do
+ {
+ const uint8_t t = m_tags[i];
+
+ if (t == tag && m_values[i].as_init_unsafe() == value) { return i; }
+ if (t == kEmpty) { break; }
+
+ i = (i + 1) & capacity_mask;
+ } while (i != starting_index);
+
+ // NOLINTEND(*-pointer-arithmetic)
+
+ return -1;
+ }
+
+ void grow_and_rehash()
+ {
+ isize_t new_capacity = max(kMinCapacity, m_capacity * 2);
+
+ auto* new_tags = reinterpret_cast<uint8_t*>(m_allocator.alloc(layout::array<uint8_t>(new_capacity)));
+ auto* new_values = reinterpret_cast<maybe_uninit<T>*>(m_allocator.alloc(layout::array<maybe_uninit<T>>(new_capacity)));
+ asl::memzero(new_tags, new_capacity);
+
+ isize_t new_size = 0;
+
+ // NOLINTBEGIN(*-pointer-arithmetic)
+ for (isize_t i = 0; i < m_capacity; ++i)
+ {
+ if ((m_tags[i] & kHasValue) == 0) { continue; }
+
+ insert_inner(ASL_MOVE(m_values[i].as_init_unsafe()), new_tags, new_values, new_capacity, &new_size);
+
+ // Destroy now so that destroy() has less things to do
+ m_values[i].destroy_unsafe();
+ m_tags[i] = kTombstone;
+ }
+ // NOLINTEND(*-pointer-arithmetic)
+
+ ASL_ASSERT(new_size == m_size);
+
+ m_size = 0;
+ destroy();
+
+ m_tags = new_tags;
+ m_values = new_values;
+ m_capacity = new_capacity;
+ m_size = new_size;
+ }
+
+ void clear_values()
+ {
+ if constexpr (!trivially_destructible<T>)
+ {
+ if (m_size > 0)
+ {
+ for (isize_t i = 0; i < m_capacity; ++i)
+ {
+ if ((m_tags[i] & kHasValue) != 0) // NOLINT(*-pointer-arithmetic)
+ {
+ m_values[i].destroy_unsafe(); // NOLINT(*-pointer-arithmetic)
+ }
+ }
+ }
+ }
+ }
+
+public:
+ constexpr hash_set() requires default_constructible<Allocator>
+ : m_allocator{}
+ {}
+
+ explicit constexpr hash_set(Allocator allocator)
+ : m_allocator{ASL_MOVE(allocator)}
+ {}
+
+ // @Todo Copy, move
+
+ ~hash_set()
+ {
+ destroy();
+ }
+
+ void destroy()
+ {
+ clear_values();
+ m_size = 0;
+
+ if (m_capacity > 0)
+ {
+ m_allocator.dealloc(m_tags, layout::array<uint8_t>(m_capacity));
+ m_allocator.dealloc(m_values, layout::array<maybe_uninit<T>>(m_capacity));
+ m_capacity = 0;
+ }
+ }
+
+ void clear()
+ {
+ clear_values();
+ m_size = 0;
+
+ if (m_capacity > 0)
+ {
+ asl::memzero(m_tags, m_capacity);
+ }
+ }
+
+ constexpr isize_t size() const { return m_size; }
+
+ template<typename... Args>
+ void insert(Args&&... args)
+ requires constructible_from<T, Args&&...>
+ {
+ if (m_size >= max_size())
+ {
+ grow_and_rehash();
+ }
+ ASL_ASSERT(m_size < max_size());
+ insert_inner(ASL_MOVE(T{ASL_FWD(args)...}), m_tags, m_values, m_capacity, &m_size);
+ }
+
+ bool contains(const T& value) const
+ {
+ if (m_size == 0) { return false; }
+ return find_slot(value) >= 0;
+ }
+};
+
+} // namespace asl
+
diff --git a/asl/memory.hpp b/asl/memory.hpp index 209f1d1..8a8a6bf 100644 --- a/asl/memory.hpp +++ b/asl/memory.hpp @@ -23,6 +23,11 @@ constexpr void memcpy(void* dst, const void* src, isize_t size) __builtin_memcpy(dst, src, static_cast<size_t>(size));
}
+inline void memzero(void* dst, isize_t size)
+{
+ __builtin_memset(dst, 0, static_cast<size_t>(size));
+}
+
constexpr isize_t strlen(const char* s)
{
return static_cast<isize_t>(__builtin_strlen(s));
diff --git a/asl/tests/hash_set_tests.cpp b/asl/tests/hash_set_tests.cpp new file mode 100644 index 0000000..e8d744b --- /dev/null +++ b/asl/tests/hash_set_tests.cpp @@ -0,0 +1,59 @@ +#include "asl/hash_set.hpp"
+#include "asl/testing/testing.hpp"
+#include "asl/string.hpp"
+#include "asl/string_view.hpp"
+
+ASL_TEST(empty)
+{
+ asl::hash_set<int> set;
+
+ ASL_TEST_EXPECT(set.size() == 0);
+
+ for (int i = 0; i < 100; ++i)
+ {
+ ASL_TEST_EXPECT(!set.contains(i));
+ }
+}
+
+ASL_TEST(a_bunch_of_strings)
+{
+ asl::hash_set<asl::string<>> set;
+
+ set.insert("Hello, world!");
+ ASL_TEST_EXPECT(set.size() == 1);
+
+ set.insert("Hello, puppy!");
+ ASL_TEST_EXPECT(set.size() == 2);
+
+ set.insert("Hello, puppy!");
+ ASL_TEST_EXPECT(set.size() == 2);
+
+ ASL_TEST_EXPECT(set.contains("Hello, world!"_sv));
+ ASL_TEST_EXPECT(set.contains("Hello, puppy!"_sv));
+ ASL_TEST_EXPECT(!set.contains("Hello, Steven!"_sv));
+
+}
+
+ASL_TEST(a_bunch_of_ints)
+{
+ asl::hash_set<int> set;
+
+ int count = 3000;
+
+ for (int i = 0; i < count; ++i)
+ {
+ set.insert(i);
+ }
+
+ ASL_TEST_EXPECT(set.size() == count);
+
+ for (int i = 0; i < count * 2; ++i)
+ {
+ ASL_TEST_EXPECT(set.contains(i) == (i < count));
+ }
+}
+
+// @Todo Remove elements
+
+// @Todo Test destructors
+
diff --git a/asl/utility.hpp b/asl/utility.hpp index 4f27017..6a43852 100644 --- a/asl/utility.hpp +++ b/asl/utility.hpp @@ -37,11 +37,17 @@ constexpr U bit_cast(T value) requires (size_of<T> == size_of<U>) }
template<typename T>
-T min(T a, T b)
+constexpr T min(T a, T b)
{
return (a <= b) ? a : b;
}
+template<typename T>
+constexpr T max(T a, T b)
+{
+ return (a >= b) ? a : b;
+}
+
constexpr uint64_t round_up_pow2(uint64_t v)
{
ASL_ASSERT(v <= 0x8000'0000'0000'0000);
@@ -58,6 +64,11 @@ constexpr uint64_t round_up_pow2(uint64_t v) return v + 1;
}
+constexpr bool is_pow2(isize_t v)
+{
+ return v > 0 && ((v - 1) & v) == 0;
+}
+
#define ASL_DELETE_COPY(T) \
T(const T&) = delete; \
T& operator=(const T&) = delete;
|