From 1c00f6ed444dab15430a955e41cf155049e3cec4 Mon Sep 17 00:00:00 2001 From: Steven Le Rouzic Date: Tue, 14 Jan 2025 00:01:55 +0100 Subject: Start work on hash_set --- asl/BUILD.bazel | 2 + asl/hash_set.hpp | 255 +++++++++++++++++++++++++++++++++++++++++++ asl/memory.hpp | 5 + asl/tests/hash_set_tests.cpp | 59 ++++++++++ asl/utility.hpp | 13 ++- 5 files changed, 333 insertions(+), 1 deletion(-) create mode 100644 asl/hash_set.hpp create mode 100644 asl/tests/hash_set_tests.cpp (limited to 'asl') diff --git a/asl/BUILD.bazel b/asl/BUILD.bazel index 5621cbd..468ad58 100644 --- a/asl/BUILD.bazel +++ b/asl/BUILD.bazel @@ -12,6 +12,7 @@ cc_library( "format.hpp", "functional.hpp", "hash.hpp", + "hash_set.hpp", "integers.hpp", "io.hpp", "layout.hpp", @@ -59,6 +60,7 @@ cc_library( "format", "functional", "hash", + "hash_set", "integers", "maybe_uninit", "meta", diff --git a/asl/hash_set.hpp b/asl/hash_set.hpp new file mode 100644 index 0000000..22e47e0 --- /dev/null +++ b/asl/hash_set.hpp @@ -0,0 +1,255 @@ +#pragma once + +#include "asl/annotations.hpp" +#include "asl/meta.hpp" +#include "asl/utility.hpp" +#include "asl/maybe_uninit.hpp" +#include "asl/hash.hpp" +#include "asl/allocator.hpp" +#include "asl/memory.hpp" + +namespace asl +{ + +template +requires hashable && move_constructible && move_assignable && equality_comparable +class hash_set +{ + static constexpr uint8_t kHasValue = 0x80; + static constexpr uint8_t kHashMask = 0x7f; + static constexpr uint8_t kEmpty = 0x00; + static constexpr uint8_t kTombstone = 0x01; + + static constexpr isize_t kMinCapacity = 8; + + // Important so we can memzero the tags + static_assert(kEmpty == 0); + + uint8_t* m_tags{}; + maybe_uninit* m_values{}; + isize_t m_capacity{}; + isize_t m_size{}; + + ASL_NO_UNIQUE_ADDRESS Allocator m_allocator; + + constexpr isize_t max_size() const + { + // Max load factor is 75% + return (m_capacity >> 1) + (m_capacity >> 2); + } + + static void insert_inner( + T&& value, + uint8_t* tags, + maybe_uninit* values, + isize_t capacity, + isize_t* size) + { + ASL_ASSERT(*size < capacity); + ASL_ASSERT(is_pow2(capacity)); + + const isize_t capacity_mask = capacity - 1; + const uint64_t hash = hash_value(value); + const uint8_t tag = static_cast(hash & kHashMask) | kHasValue; + const auto starting_index = static_cast(hash >> 7) & capacity_mask; + + isize_t first_available_index = -1; + isize_t already_present_index = -1; + + // NOLINTBEGIN(*-pointer-arithmetic) + + for ( + isize_t i = starting_index; + i != starting_index || first_available_index < 0; + i = (i + 1) & capacity_mask) + { + uint8_t t = tags[i]; + + if ((t & kHasValue) == 0 && first_available_index < 0) + { + first_available_index = i; + } + + if (t == tag && values[i].as_init_unsafe() == value) + { + ASL_ASSERT(already_present_index < 0); + already_present_index = i; + if (first_available_index < 0) + { + first_available_index = i; + } + break; + } + + if (t == kEmpty) { break; } + } + + ASL_ASSERT(first_available_index >= 0 || already_present_index >= 0); + + if (already_present_index == first_available_index) + { + ASL_ASSERT((tags[already_present_index] & kHasValue) != 0); + values[already_present_index].assign_unsafe(ASL_MOVE(value)); + } + else + { + ASL_ASSERT((tags[first_available_index] & kHasValue) == 0); + if (already_present_index >= 0) + { + ASL_ASSERT((tags[already_present_index] & kHasValue) != 0); + values[already_present_index].destroy_unsafe(); + tags[already_present_index] = kTombstone; + } + else + { + *size += 1; + } + + values[first_available_index].construct_unsafe(ASL_MOVE(value)); + tags[first_available_index] = tag; + } + + // NOLINTEND(*-pointer-arithmetic) + } + + isize_t find_slot(const T& value) const + { + ASL_ASSERT(is_pow2(m_capacity)); + + const isize_t capacity_mask = m_capacity - 1; + const uint64_t hash = hash_value(value); + const uint8_t tag = static_cast(hash & kHashMask) | kHasValue; + const auto starting_index = static_cast(hash >> 7) & capacity_mask; + + // NOLINTBEGIN(*-pointer-arithmetic) + + isize_t i = starting_index; + do + { + const uint8_t t = m_tags[i]; + + if (t == tag && m_values[i].as_init_unsafe() == value) { return i; } + if (t == kEmpty) { break; } + + i = (i + 1) & capacity_mask; + } while (i != starting_index); + + // NOLINTEND(*-pointer-arithmetic) + + return -1; + } + + void grow_and_rehash() + { + isize_t new_capacity = max(kMinCapacity, m_capacity * 2); + + auto* new_tags = reinterpret_cast(m_allocator.alloc(layout::array(new_capacity))); + auto* new_values = reinterpret_cast*>(m_allocator.alloc(layout::array>(new_capacity))); + asl::memzero(new_tags, new_capacity); + + isize_t new_size = 0; + + // NOLINTBEGIN(*-pointer-arithmetic) + for (isize_t i = 0; i < m_capacity; ++i) + { + if ((m_tags[i] & kHasValue) == 0) { continue; } + + insert_inner(ASL_MOVE(m_values[i].as_init_unsafe()), new_tags, new_values, new_capacity, &new_size); + + // Destroy now so that destroy() has less things to do + m_values[i].destroy_unsafe(); + m_tags[i] = kTombstone; + } + // NOLINTEND(*-pointer-arithmetic) + + ASL_ASSERT(new_size == m_size); + + m_size = 0; + destroy(); + + m_tags = new_tags; + m_values = new_values; + m_capacity = new_capacity; + m_size = new_size; + } + + void clear_values() + { + if constexpr (!trivially_destructible) + { + if (m_size > 0) + { + for (isize_t i = 0; i < m_capacity; ++i) + { + if ((m_tags[i] & kHasValue) != 0) // NOLINT(*-pointer-arithmetic) + { + m_values[i].destroy_unsafe(); // NOLINT(*-pointer-arithmetic) + } + } + } + } + } + +public: + constexpr hash_set() requires default_constructible + : m_allocator{} + {} + + explicit constexpr hash_set(Allocator allocator) + : m_allocator{ASL_MOVE(allocator)} + {} + + // @Todo Copy, move + + ~hash_set() + { + destroy(); + } + + void destroy() + { + clear_values(); + m_size = 0; + + if (m_capacity > 0) + { + m_allocator.dealloc(m_tags, layout::array(m_capacity)); + m_allocator.dealloc(m_values, layout::array>(m_capacity)); + m_capacity = 0; + } + } + + void clear() + { + clear_values(); + m_size = 0; + + if (m_capacity > 0) + { + asl::memzero(m_tags, m_capacity); + } + } + + constexpr isize_t size() const { return m_size; } + + template + void insert(Args&&... args) + requires constructible_from + { + if (m_size >= max_size()) + { + grow_and_rehash(); + } + ASL_ASSERT(m_size < max_size()); + insert_inner(ASL_MOVE(T{ASL_FWD(args)...}), m_tags, m_values, m_capacity, &m_size); + } + + bool contains(const T& value) const + { + if (m_size == 0) { return false; } + return find_slot(value) >= 0; + } +}; + +} // namespace asl + diff --git a/asl/memory.hpp b/asl/memory.hpp index 209f1d1..8a8a6bf 100644 --- a/asl/memory.hpp +++ b/asl/memory.hpp @@ -23,6 +23,11 @@ constexpr void memcpy(void* dst, const void* src, isize_t size) __builtin_memcpy(dst, src, static_cast(size)); } +inline void memzero(void* dst, isize_t size) +{ + __builtin_memset(dst, 0, static_cast(size)); +} + constexpr isize_t strlen(const char* s) { return static_cast(__builtin_strlen(s)); diff --git a/asl/tests/hash_set_tests.cpp b/asl/tests/hash_set_tests.cpp new file mode 100644 index 0000000..e8d744b --- /dev/null +++ b/asl/tests/hash_set_tests.cpp @@ -0,0 +1,59 @@ +#include "asl/hash_set.hpp" +#include "asl/testing/testing.hpp" +#include "asl/string.hpp" +#include "asl/string_view.hpp" + +ASL_TEST(empty) +{ + asl::hash_set set; + + ASL_TEST_EXPECT(set.size() == 0); + + for (int i = 0; i < 100; ++i) + { + ASL_TEST_EXPECT(!set.contains(i)); + } +} + +ASL_TEST(a_bunch_of_strings) +{ + asl::hash_set> set; + + set.insert("Hello, world!"); + ASL_TEST_EXPECT(set.size() == 1); + + set.insert("Hello, puppy!"); + ASL_TEST_EXPECT(set.size() == 2); + + set.insert("Hello, puppy!"); + ASL_TEST_EXPECT(set.size() == 2); + + ASL_TEST_EXPECT(set.contains("Hello, world!"_sv)); + ASL_TEST_EXPECT(set.contains("Hello, puppy!"_sv)); + ASL_TEST_EXPECT(!set.contains("Hello, Steven!"_sv)); + +} + +ASL_TEST(a_bunch_of_ints) +{ + asl::hash_set set; + + int count = 3000; + + for (int i = 0; i < count; ++i) + { + set.insert(i); + } + + ASL_TEST_EXPECT(set.size() == count); + + for (int i = 0; i < count * 2; ++i) + { + ASL_TEST_EXPECT(set.contains(i) == (i < count)); + } +} + +// @Todo Remove elements + +// @Todo Test destructors + diff --git a/asl/utility.hpp b/asl/utility.hpp index 4f27017..6a43852 100644 --- a/asl/utility.hpp +++ b/asl/utility.hpp @@ -37,11 +37,17 @@ constexpr U bit_cast(T value) requires (size_of == size_of) } template -T min(T a, T b) +constexpr T min(T a, T b) { return (a <= b) ? a : b; } +template +constexpr T max(T a, T b) +{ + return (a >= b) ? a : b; +} + constexpr uint64_t round_up_pow2(uint64_t v) { ASL_ASSERT(v <= 0x8000'0000'0000'0000); @@ -58,6 +64,11 @@ constexpr uint64_t round_up_pow2(uint64_t v) return v + 1; } +constexpr bool is_pow2(isize_t v) +{ + return v > 0 && ((v - 1) & v) == 0; +} + #define ASL_DELETE_COPY(T) \ T(const T&) = delete; \ T& operator=(const T&) = delete; -- cgit