diff options
author | Steven Le Rouzic <steven.lerouzic@gmail.com> | 2025-07-03 18:37:18 +0200 |
---|---|---|
committer | Steven Le Rouzic <steven.lerouzic@gmail.com> | 2025-07-04 20:44:04 +0200 |
commit | bcdad5b8762060c82a0b7840cb905e69ddb9a65e (patch) | |
tree | 468694d1662c61c12f813689520f43c8e1767538 /asl | |
parent | cca2e267241a90f238e424e47501b1e8613a5955 (diff) |
Diffstat (limited to 'asl')
-rw-r--r-- | asl/strings/BUILD.bazel | 20 | ||||
-rw-r--r-- | asl/strings/parse_number.cpp | 162 | ||||
-rw-r--r-- | asl/strings/parse_number.hpp | 34 | ||||
-rw-r--r-- | asl/strings/parse_number_float.cpp | 30 | ||||
-rw-r--r-- | asl/strings/parse_number_integer_table.py | 24 | ||||
-rw-r--r-- | asl/strings/parse_number_tests.cpp | 203 | ||||
-rw-r--r-- | asl/strings/string_view.hpp | 7 |
7 files changed, 480 insertions, 0 deletions
diff --git a/asl/strings/BUILD.bazel b/asl/strings/BUILD.bazel index 79fc631..d6b83b0 100644 --- a/asl/strings/BUILD.bazel +++ b/asl/strings/BUILD.bazel @@ -45,6 +45,24 @@ cc_library( visibility = ["//visibility:public"], ) +cc_library( + name = "parse_number", + hdrs = [ + "parse_number.hpp", + ], + srcs = [ + "parse_number_float.cpp", + "parse_number.cpp", + ], + deps = [ + "//asl/base", + "//asl/types:status", + ":string_view", + "//vendor/fast_float", + ], + visibility = ["//visibility:public"], +) + [cc_test( name = "%s_tests" % name, srcs = [ @@ -54,6 +72,7 @@ cc_library( ":string", ":string_builder", ":string_view", + ":parse_number", "//asl/tests:utils", "//asl/testing", ], @@ -61,4 +80,5 @@ cc_library( "string", "string_view", "string_builder", + "parse_number", ]] diff --git a/asl/strings/parse_number.cpp b/asl/strings/parse_number.cpp new file mode 100644 index 0000000..3b6b689 --- /dev/null +++ b/asl/strings/parse_number.cpp @@ -0,0 +1,162 @@ +// Copyright 2025 Steven Le Rouzic +// +// SPDX-License-Identifier: BSD-3-Clause + +#include "asl/strings/parse_number.hpp" + +namespace asl +{ + +bool parse_float_impl(const char** begin, const char* end, float*); +bool parse_double_impl(const char** begin, const char* end, double*); + +} // namespace asl + +asl::status_or<asl::parse_number_result<float>> asl::parse_float(asl::string_view sv) +{ + const auto* begin = sv.data(); + // NOLINTNEXTLINE(*-pointer-arithmetic) + const auto* end = begin + sv.size(); + if (float value{}; parse_float_impl(&begin, end, &value)) + { + return parse_number_result<float>{ + .value = value, + .remaining = string_view{begin, end}, + }; + } + return invalid_argument_error(); +} + +asl::status_or<asl::parse_number_result<double>> asl::parse_double(asl::string_view sv) +{ + const auto* begin = sv.data(); + // NOLINTNEXTLINE(*-pointer-arithmetic) + const auto* end = begin + sv.size(); + if (float value{}; parse_float_impl(&begin, end, &value)) + { + return parse_number_result<double>{ + .value = value, + .remaining = string_view{begin, end}, + }; + } + return invalid_argument_error(); +} + +namespace +{ + +constexpr int8_t kBase16Table[256] = { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, + -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, +}; + +// @Todo Maybe monomorph this for common bases (2, 16, 10)? + +template<typename T> +asl::status_or<asl::parse_number_result<T>> parse_integer(asl::string_view sv, int base) +{ + ASL_ASSERT(base >= 2 && base <= 16); + + if (sv.is_empty()) { return asl::invalid_argument_error(); } + + T value = 0; + bool is_negative = false; + + if (asl::is_signed_integer<T> && sv[0] == '-') + { + is_negative = true; + sv = sv.substr(1); + } + + isize_t cursor = 0; + while (cursor < sv.size()) + { + // NOLINTNEXTLINE(*-array-index) + int8_t digit = kBase16Table[static_cast<uint8_t>(sv[cursor])]; + if (digit < 0 || digit >= base) { break; } + + if (__builtin_mul_overflow(value, static_cast<T>(base), &value)) + { + return asl::invalid_argument_error("overflow"); + } + + if (asl::is_signed_integer<T> && is_negative) + { + digit = static_cast<T>(-digit); + } + + if (__builtin_add_overflow(value, static_cast<T>(digit), &value)) + { + return asl::invalid_argument_error("overflow"); + } + + cursor += 1; + } + + if (cursor == 0) + { + return asl::invalid_argument_error(); + } + + return asl::parse_number_result<T>{ + .value = value, + .remaining = sv.substr(cursor), + }; +} + +} // anonymous namespace + +asl::status_or<asl::parse_number_result<uint8_t>> asl::parse_uint8(string_view sv, int base) +{ + return parse_integer<uint8_t>(sv, base); +} + +asl::status_or<asl::parse_number_result<uint16_t>> asl::parse_uint16(string_view sv, int base) +{ + return parse_integer<uint16_t>(sv, base); +} + +asl::status_or<asl::parse_number_result<uint32_t>> asl::parse_uint32(string_view sv, int base) +{ + return parse_integer<uint32_t>(sv, base); +} + +asl::status_or<asl::parse_number_result<uint64_t>> asl::parse_uint64(string_view sv, int base) +{ + return parse_integer<uint64_t>(sv, base); +} + +asl::status_or<asl::parse_number_result<int8_t>> asl::parse_int8(string_view sv, int base) +{ + return parse_integer<int8_t>(sv, base); +} + +asl::status_or<asl::parse_number_result<int16_t>> asl::parse_int16(string_view sv, int base) +{ + return parse_integer<int16_t>(sv, base); +} + +asl::status_or<asl::parse_number_result<int32_t>> asl::parse_int32(string_view sv, int base) +{ + return parse_integer<int32_t>(sv, base); +} + +asl::status_or<asl::parse_number_result<int64_t>> asl::parse_int64(string_view sv, int base) +{ + return parse_integer<int64_t>(sv, base); +} + diff --git a/asl/strings/parse_number.hpp b/asl/strings/parse_number.hpp new file mode 100644 index 0000000..fae0841 --- /dev/null +++ b/asl/strings/parse_number.hpp @@ -0,0 +1,34 @@ +// Copyright 2025 Steven Le Rouzic +// +// SPDX-License-Identifier: BSD-3-Clause + +#pragma once + +#include "asl/types/status_or.hpp" +#include "asl/strings/string_view.hpp" + +namespace asl +{ + +template<typename T> +struct parse_number_result +{ + T value; + string_view remaining; +}; + +status_or<parse_number_result<float>> parse_float(string_view); +status_or<parse_number_result<double>> parse_double(string_view); + +status_or<parse_number_result<uint8_t>> parse_uint8(string_view, int base = 10); +status_or<parse_number_result<uint16_t>> parse_uint16(string_view, int base = 10); +status_or<parse_number_result<uint32_t>> parse_uint32(string_view, int base = 10); +status_or<parse_number_result<uint64_t>> parse_uint64(string_view, int base = 10); + +status_or<parse_number_result<int8_t>> parse_int8(string_view, int base = 10); +status_or<parse_number_result<int16_t>> parse_int16(string_view, int base = 10); +status_or<parse_number_result<int32_t>> parse_int32(string_view, int base = 10); +status_or<parse_number_result<int64_t>> parse_int64(string_view, int base = 10); + +} // namespace asl + diff --git a/asl/strings/parse_number_float.cpp b/asl/strings/parse_number_float.cpp new file mode 100644 index 0000000..4568278 --- /dev/null +++ b/asl/strings/parse_number_float.cpp @@ -0,0 +1,30 @@ +// Copyright 2025 Steven Le Rouzic +// +// SPDX-License-Identifier: BSD-3-Clause + +#include <fast_float.h> + +// We need to isolate fast_float.h completely from asl +// because it conflicts with our redefinitions of things +// from the STL. In this case it's operator new, but there +// might be other conflicts. + +namespace asl +{ + +extern bool parse_float_impl(const char** begin, const char* end, float* value) +{ + auto res = fast_float::from_chars(*begin, end, *value); + *begin = res.ptr; + return res.ec == std::errc{}; +} + +extern bool parse_double_impl(const char** begin, const char* end, double* value) +{ + auto res = fast_float::from_chars(*begin, end, *value); + *begin = res.ptr; + return res.ec == std::errc{}; +} + +} // namespace asl + diff --git a/asl/strings/parse_number_integer_table.py b/asl/strings/parse_number_integer_table.py new file mode 100644 index 0000000..7db41fb --- /dev/null +++ b/asl/strings/parse_number_integer_table.py @@ -0,0 +1,24 @@ +a = ord('a') +f = ord('f') +A = ord('A') +F = ord('F') +n0 = ord('0') +n9 = ord('9') + +output = "" + +for i in range(0, 16): + for j in range(0, 16): + v = i * 16 + j + n = -1 + if v >= a and v <= f: + n = v - a + 10 + elif v >= A and v <= F: + n = v - A + 10 + elif v >= n0 and v <= n9: + n = v - n0 + output += f"{n:>2}, " + output += "\n" + +print(output) + diff --git a/asl/strings/parse_number_tests.cpp b/asl/strings/parse_number_tests.cpp new file mode 100644 index 0000000..df759e9 --- /dev/null +++ b/asl/strings/parse_number_tests.cpp @@ -0,0 +1,203 @@ +// Copyright 2025 Steven Le Rouzic +// +// SPDX-License-Identifier: BSD-3-Clause + +#include "asl/strings/parse_number.hpp" +#include "asl/testing/testing.hpp" + +// @Todo Once we have an equivalent of std::numeric_limits, +// properly compare floating point values in these tests. + +ASL_TEST(parse_float_error) +{ + const asl::string_view sv = "this is not a number lmao"; + auto res = asl::parse_float(sv); + ASL_TEST_EXPECT(!res.ok()); +} + +ASL_TEST(parse_float_empty) +{ + const asl::string_view sv = ""; + auto res = asl::parse_float(sv); + ASL_TEST_EXPECT(!res.ok()); +} + +ASL_TEST(parse_float_simple) +{ + const asl::string_view sv = "3.1415"; + auto res = asl::parse_float(sv); + ASL_TEST_EXPECT(res.ok()); + ASL_TEST_EXPECT(res.value().value == 3.1415F); + ASL_TEST_EXPECT(res.value().remaining.size() == 0); +} + +ASL_TEST(parse_float_integer) +{ + const asl::string_view sv = "31415"; + auto res = asl::parse_float(sv); + ASL_TEST_EXPECT(res.ok()); + ASL_TEST_EXPECT(res.value().value == 31415.0F); + ASL_TEST_EXPECT(res.value().remaining.size() == 0); +} + +ASL_TEST(parse_float_scientific) +{ + const asl::string_view sv = "314.15e-2"; + auto res = asl::parse_float(sv); + ASL_TEST_EXPECT(res.ok()); + ASL_TEST_EXPECT(res.value().value == 3.1415F); + ASL_TEST_EXPECT(res.value().remaining.size() == 0); +} + +ASL_TEST(parse_float_suffix) +{ + const asl::string_view sv = "3.1415 yoyoyo"; + auto res = asl::parse_float(sv); + ASL_TEST_EXPECT(res.ok()); + ASL_TEST_EXPECT(res.value().value == 3.1415F); + ASL_TEST_EXPECT(res.value().remaining == " yoyoyo"); +} + +ASL_TEST(parse_int) +{ + const asl::string_view sv = "926473"; + auto res = asl::parse_uint32(sv); + ASL_TEST_EXPECT(res.ok()); + ASL_TEST_EXPECT(res.value().value == 926473); + ASL_TEST_EXPECT(res.value().remaining.is_empty()); +} + +ASL_TEST(parse_int_negative) +{ + const asl::string_view sv = "-926473"; + auto res = asl::parse_int32(sv); + ASL_TEST_EXPECT(res.ok()); + ASL_TEST_EXPECT(res.value().value == -926473); + ASL_TEST_EXPECT(res.value().remaining.is_empty()); +} + +ASL_TEST(parse_int_suffix) +{ + const asl::string_view sv = "926473 what's this then"; + auto res = asl::parse_uint32(sv); + ASL_TEST_EXPECT(res.ok()); + ASL_TEST_EXPECT(res.value().value == 926473); + ASL_TEST_EXPECT(res.value().remaining == " what's this then"); +} + +ASL_TEST(parse_uint_with_minus) +{ + const asl::string_view sv = "-926473"; + auto res = asl::parse_uint32(sv); + ASL_TEST_EXPECT(!res.ok()); +} + +ASL_TEST(parse_int_with_only_minus) +{ + const asl::string_view sv = "-@"; + auto res = asl::parse_int32(sv); + ASL_TEST_EXPECT(!res.ok()); +} + +ASL_TEST(parse_uint_invalid) +{ + const asl::string_view sv = "abcd"; + auto res = asl::parse_uint32(sv); + ASL_TEST_EXPECT(!res.ok()); +} + +ASL_TEST(parse_uint_empty) +{ + const asl::string_view sv = ""; + auto res = asl::parse_uint32(sv); + ASL_TEST_EXPECT(!res.ok()); +} + +ASL_TEST(parse_uint_overflow) +{ + ASL_TEST_EXPECT(!asl::parse_uint16("80000").ok()); + ASL_TEST_EXPECT(!asl::parse_uint16("65536").ok()); +} + +ASL_TEST(parse_uint16_max) +{ + const asl::string_view sv = "65535"; + auto res = asl::parse_uint16(sv); + ASL_TEST_EXPECT(res.ok()); + ASL_TEST_EXPECT(res.value().value == 65535); + ASL_TEST_EXPECT(res.value().remaining.is_empty()); +} + +ASL_TEST(parse_uint16_zero) +{ + const asl::string_view sv = "0"; + auto res = asl::parse_uint16(sv); + ASL_TEST_EXPECT(res.ok()); + ASL_TEST_EXPECT(res.value().value == 0); + ASL_TEST_EXPECT(res.value().remaining.is_empty()); +} + +ASL_TEST(parse_uint16_zeros) +{ + const asl::string_view sv = "00000"; + auto res = asl::parse_uint16(sv); + ASL_TEST_EXPECT(res.ok()); + ASL_TEST_EXPECT(res.value().value == 0); + ASL_TEST_EXPECT(res.value().remaining.is_empty()); +} + +ASL_TEST(parse_int_overflow) +{ + ASL_TEST_EXPECT(!asl::parse_int16("80000").ok()); + ASL_TEST_EXPECT(!asl::parse_int16("40000").ok()); + ASL_TEST_EXPECT(!asl::parse_int16("32768").ok()); + ASL_TEST_EXPECT(!asl::parse_int16("-80000").ok()); + ASL_TEST_EXPECT(!asl::parse_int16("-40000").ok()); + ASL_TEST_EXPECT(!asl::parse_int16("-32769").ok()); +} + +ASL_TEST(parse_int16_max) +{ + const asl::string_view sv = "32767"; + auto res = asl::parse_int16(sv); + ASL_TEST_EXPECT(res.ok()); + ASL_TEST_EXPECT(res.value().value == 32767); + ASL_TEST_EXPECT(res.value().remaining.is_empty()); +} + +ASL_TEST(parse_int16_min) +{ + const asl::string_view sv = "-32768"; + auto res = asl::parse_int16(sv); + ASL_TEST_EXPECT(res.ok()); + ASL_TEST_EXPECT(res.value().value == -32768); + ASL_TEST_EXPECT(res.value().remaining.is_empty()); +} + +ASL_TEST(parse_hex) +{ + const asl::string_view sv = "1000a"; + auto res = asl::parse_uint32(sv, 16); + ASL_TEST_EXPECT(res.ok()); + ASL_TEST_EXPECT(res.value().value == 65546); + ASL_TEST_EXPECT(res.value().remaining.is_empty()); +} + +ASL_TEST(parse_bin) +{ + const asl::string_view sv = "101010"; + auto res = asl::parse_uint32(sv, 2); + ASL_TEST_EXPECT(res.ok()); + ASL_TEST_EXPECT(res.value().value == 42); + ASL_TEST_EXPECT(res.value().remaining.is_empty()); +} + +ASL_TEST(parse_oct) +{ + const asl::string_view sv = "644"; + auto res = asl::parse_uint32(sv, 8); + ASL_TEST_EXPECT(res.ok()); + ASL_TEST_EXPECT(res.value().value == 6 * 64 + 4 * 8 + 4); + ASL_TEST_EXPECT(res.value().remaining.is_empty()); +} + diff --git a/asl/strings/string_view.hpp b/asl/strings/string_view.hpp index 287b88f..d614512 100644 --- a/asl/strings/string_view.hpp +++ b/asl/strings/string_view.hpp @@ -28,6 +28,13 @@ public: , m_size{size} {} + constexpr string_view(const char* begin, const char* end) + : m_data{begin} + , m_size{end - begin} + { + ASL_ASSERT(begin <= end); + } + template<isize_t kSize> constexpr string_view(const char (&str)[kSize]) // NOLINT(*explicit*) requires (kSize >= 1) |