Add numbers parsing

This commit is contained in:
2025-07-03 18:37:18 +02:00
parent cca2e26724
commit bcdad5b876
12 changed files with 4977 additions and 1 deletions

View File

@ -21,3 +21,4 @@ Checks:
- "-cppcoreguidelines-pro-type-union-access" - "-cppcoreguidelines-pro-type-union-access"
- "-*-copy-assignment-signature" - "-*-copy-assignment-signature"
- "-*-unconventional-assign-operator" - "-*-unconventional-assign-operator"
- "-readability-math-missing-parentheses"

View File

@ -45,6 +45,24 @@ cc_library(
visibility = ["//visibility:public"], visibility = ["//visibility:public"],
) )
cc_library(
name = "parse_number",
hdrs = [
"parse_number.hpp",
],
srcs = [
"parse_number_float.cpp",
"parse_number.cpp",
],
deps = [
"//asl/base",
"//asl/types:status",
":string_view",
"//vendor/fast_float",
],
visibility = ["//visibility:public"],
)
[cc_test( [cc_test(
name = "%s_tests" % name, name = "%s_tests" % name,
srcs = [ srcs = [
@ -54,6 +72,7 @@ cc_library(
":string", ":string",
":string_builder", ":string_builder",
":string_view", ":string_view",
":parse_number",
"//asl/tests:utils", "//asl/tests:utils",
"//asl/testing", "//asl/testing",
], ],
@ -61,4 +80,5 @@ cc_library(
"string", "string",
"string_view", "string_view",
"string_builder", "string_builder",
"parse_number",
]] ]]

View File

@ -0,0 +1,162 @@
// Copyright 2025 Steven Le Rouzic
//
// SPDX-License-Identifier: BSD-3-Clause
#include "asl/strings/parse_number.hpp"
namespace asl
{
bool parse_float_impl(const char** begin, const char* end, float*);
bool parse_double_impl(const char** begin, const char* end, double*);
} // namespace asl
asl::status_or<asl::parse_number_result<float>> asl::parse_float(asl::string_view sv)
{
const auto* begin = sv.data();
// NOLINTNEXTLINE(*-pointer-arithmetic)
const auto* end = begin + sv.size();
if (float value{}; parse_float_impl(&begin, end, &value))
{
return parse_number_result<float>{
.value = value,
.remaining = string_view{begin, end},
};
}
return invalid_argument_error();
}
asl::status_or<asl::parse_number_result<double>> asl::parse_double(asl::string_view sv)
{
const auto* begin = sv.data();
// NOLINTNEXTLINE(*-pointer-arithmetic)
const auto* end = begin + sv.size();
if (float value{}; parse_float_impl(&begin, end, &value))
{
return parse_number_result<double>{
.value = value,
.remaining = string_view{begin, end},
};
}
return invalid_argument_error();
}
namespace
{
constexpr int8_t kBase16Table[256] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
};
// @Todo Maybe monomorph this for common bases (2, 16, 10)?
template<typename T>
asl::status_or<asl::parse_number_result<T>> parse_integer(asl::string_view sv, int base)
{
ASL_ASSERT(base >= 2 && base <= 16);
if (sv.is_empty()) { return asl::invalid_argument_error(); }
T value = 0;
bool is_negative = false;
if (asl::is_signed_integer<T> && sv[0] == '-')
{
is_negative = true;
sv = sv.substr(1);
}
isize_t cursor = 0;
while (cursor < sv.size())
{
// NOLINTNEXTLINE(*-array-index)
int8_t digit = kBase16Table[static_cast<uint8_t>(sv[cursor])];
if (digit < 0 || digit >= base) { break; }
if (__builtin_mul_overflow(value, static_cast<T>(base), &value))
{
return asl::invalid_argument_error("overflow");
}
if (asl::is_signed_integer<T> && is_negative)
{
digit = static_cast<T>(-digit);
}
if (__builtin_add_overflow(value, static_cast<T>(digit), &value))
{
return asl::invalid_argument_error("overflow");
}
cursor += 1;
}
if (cursor == 0)
{
return asl::invalid_argument_error();
}
return asl::parse_number_result<T>{
.value = value,
.remaining = sv.substr(cursor),
};
}
} // anonymous namespace
asl::status_or<asl::parse_number_result<uint8_t>> asl::parse_uint8(string_view sv, int base)
{
return parse_integer<uint8_t>(sv, base);
}
asl::status_or<asl::parse_number_result<uint16_t>> asl::parse_uint16(string_view sv, int base)
{
return parse_integer<uint16_t>(sv, base);
}
asl::status_or<asl::parse_number_result<uint32_t>> asl::parse_uint32(string_view sv, int base)
{
return parse_integer<uint32_t>(sv, base);
}
asl::status_or<asl::parse_number_result<uint64_t>> asl::parse_uint64(string_view sv, int base)
{
return parse_integer<uint64_t>(sv, base);
}
asl::status_or<asl::parse_number_result<int8_t>> asl::parse_int8(string_view sv, int base)
{
return parse_integer<int8_t>(sv, base);
}
asl::status_or<asl::parse_number_result<int16_t>> asl::parse_int16(string_view sv, int base)
{
return parse_integer<int16_t>(sv, base);
}
asl::status_or<asl::parse_number_result<int32_t>> asl::parse_int32(string_view sv, int base)
{
return parse_integer<int32_t>(sv, base);
}
asl::status_or<asl::parse_number_result<int64_t>> asl::parse_int64(string_view sv, int base)
{
return parse_integer<int64_t>(sv, base);
}

View File

@ -0,0 +1,34 @@
// Copyright 2025 Steven Le Rouzic
//
// SPDX-License-Identifier: BSD-3-Clause
#pragma once
#include "asl/types/status_or.hpp"
#include "asl/strings/string_view.hpp"
namespace asl
{
template<typename T>
struct parse_number_result
{
T value;
string_view remaining;
};
status_or<parse_number_result<float>> parse_float(string_view);
status_or<parse_number_result<double>> parse_double(string_view);
status_or<parse_number_result<uint8_t>> parse_uint8(string_view, int base = 10);
status_or<parse_number_result<uint16_t>> parse_uint16(string_view, int base = 10);
status_or<parse_number_result<uint32_t>> parse_uint32(string_view, int base = 10);
status_or<parse_number_result<uint64_t>> parse_uint64(string_view, int base = 10);
status_or<parse_number_result<int8_t>> parse_int8(string_view, int base = 10);
status_or<parse_number_result<int16_t>> parse_int16(string_view, int base = 10);
status_or<parse_number_result<int32_t>> parse_int32(string_view, int base = 10);
status_or<parse_number_result<int64_t>> parse_int64(string_view, int base = 10);
} // namespace asl

View File

@ -0,0 +1,30 @@
// Copyright 2025 Steven Le Rouzic
//
// SPDX-License-Identifier: BSD-3-Clause
#include <fast_float.h>
// We need to isolate fast_float.h completely from asl
// because it conflicts with our redefinitions of things
// from the STL. In this case it's operator new, but there
// might be other conflicts.
namespace asl
{
extern bool parse_float_impl(const char** begin, const char* end, float* value)
{
auto res = fast_float::from_chars(*begin, end, *value);
*begin = res.ptr;
return res.ec == std::errc{};
}
extern bool parse_double_impl(const char** begin, const char* end, double* value)
{
auto res = fast_float::from_chars(*begin, end, *value);
*begin = res.ptr;
return res.ec == std::errc{};
}
} // namespace asl

View File

@ -0,0 +1,24 @@
a = ord('a')
f = ord('f')
A = ord('A')
F = ord('F')
n0 = ord('0')
n9 = ord('9')
output = ""
for i in range(0, 16):
for j in range(0, 16):
v = i * 16 + j
n = -1
if v >= a and v <= f:
n = v - a + 10
elif v >= A and v <= F:
n = v - A + 10
elif v >= n0 and v <= n9:
n = v - n0
output += f"{n:>2}, "
output += "\n"
print(output)

View File

@ -0,0 +1,203 @@
// Copyright 2025 Steven Le Rouzic
//
// SPDX-License-Identifier: BSD-3-Clause
#include "asl/strings/parse_number.hpp"
#include "asl/testing/testing.hpp"
// @Todo Once we have an equivalent of std::numeric_limits,
// properly compare floating point values in these tests.
ASL_TEST(parse_float_error)
{
const asl::string_view sv = "this is not a number lmao";
auto res = asl::parse_float(sv);
ASL_TEST_EXPECT(!res.ok());
}
ASL_TEST(parse_float_empty)
{
const asl::string_view sv = "";
auto res = asl::parse_float(sv);
ASL_TEST_EXPECT(!res.ok());
}
ASL_TEST(parse_float_simple)
{
const asl::string_view sv = "3.1415";
auto res = asl::parse_float(sv);
ASL_TEST_EXPECT(res.ok());
ASL_TEST_EXPECT(res.value().value == 3.1415F);
ASL_TEST_EXPECT(res.value().remaining.size() == 0);
}
ASL_TEST(parse_float_integer)
{
const asl::string_view sv = "31415";
auto res = asl::parse_float(sv);
ASL_TEST_EXPECT(res.ok());
ASL_TEST_EXPECT(res.value().value == 31415.0F);
ASL_TEST_EXPECT(res.value().remaining.size() == 0);
}
ASL_TEST(parse_float_scientific)
{
const asl::string_view sv = "314.15e-2";
auto res = asl::parse_float(sv);
ASL_TEST_EXPECT(res.ok());
ASL_TEST_EXPECT(res.value().value == 3.1415F);
ASL_TEST_EXPECT(res.value().remaining.size() == 0);
}
ASL_TEST(parse_float_suffix)
{
const asl::string_view sv = "3.1415 yoyoyo";
auto res = asl::parse_float(sv);
ASL_TEST_EXPECT(res.ok());
ASL_TEST_EXPECT(res.value().value == 3.1415F);
ASL_TEST_EXPECT(res.value().remaining == " yoyoyo");
}
ASL_TEST(parse_int)
{
const asl::string_view sv = "926473";
auto res = asl::parse_uint32(sv);
ASL_TEST_EXPECT(res.ok());
ASL_TEST_EXPECT(res.value().value == 926473);
ASL_TEST_EXPECT(res.value().remaining.is_empty());
}
ASL_TEST(parse_int_negative)
{
const asl::string_view sv = "-926473";
auto res = asl::parse_int32(sv);
ASL_TEST_EXPECT(res.ok());
ASL_TEST_EXPECT(res.value().value == -926473);
ASL_TEST_EXPECT(res.value().remaining.is_empty());
}
ASL_TEST(parse_int_suffix)
{
const asl::string_view sv = "926473 what's this then";
auto res = asl::parse_uint32(sv);
ASL_TEST_EXPECT(res.ok());
ASL_TEST_EXPECT(res.value().value == 926473);
ASL_TEST_EXPECT(res.value().remaining == " what's this then");
}
ASL_TEST(parse_uint_with_minus)
{
const asl::string_view sv = "-926473";
auto res = asl::parse_uint32(sv);
ASL_TEST_EXPECT(!res.ok());
}
ASL_TEST(parse_int_with_only_minus)
{
const asl::string_view sv = "-@";
auto res = asl::parse_int32(sv);
ASL_TEST_EXPECT(!res.ok());
}
ASL_TEST(parse_uint_invalid)
{
const asl::string_view sv = "abcd";
auto res = asl::parse_uint32(sv);
ASL_TEST_EXPECT(!res.ok());
}
ASL_TEST(parse_uint_empty)
{
const asl::string_view sv = "";
auto res = asl::parse_uint32(sv);
ASL_TEST_EXPECT(!res.ok());
}
ASL_TEST(parse_uint_overflow)
{
ASL_TEST_EXPECT(!asl::parse_uint16("80000").ok());
ASL_TEST_EXPECT(!asl::parse_uint16("65536").ok());
}
ASL_TEST(parse_uint16_max)
{
const asl::string_view sv = "65535";
auto res = asl::parse_uint16(sv);
ASL_TEST_EXPECT(res.ok());
ASL_TEST_EXPECT(res.value().value == 65535);
ASL_TEST_EXPECT(res.value().remaining.is_empty());
}
ASL_TEST(parse_uint16_zero)
{
const asl::string_view sv = "0";
auto res = asl::parse_uint16(sv);
ASL_TEST_EXPECT(res.ok());
ASL_TEST_EXPECT(res.value().value == 0);
ASL_TEST_EXPECT(res.value().remaining.is_empty());
}
ASL_TEST(parse_uint16_zeros)
{
const asl::string_view sv = "00000";
auto res = asl::parse_uint16(sv);
ASL_TEST_EXPECT(res.ok());
ASL_TEST_EXPECT(res.value().value == 0);
ASL_TEST_EXPECT(res.value().remaining.is_empty());
}
ASL_TEST(parse_int_overflow)
{
ASL_TEST_EXPECT(!asl::parse_int16("80000").ok());
ASL_TEST_EXPECT(!asl::parse_int16("40000").ok());
ASL_TEST_EXPECT(!asl::parse_int16("32768").ok());
ASL_TEST_EXPECT(!asl::parse_int16("-80000").ok());
ASL_TEST_EXPECT(!asl::parse_int16("-40000").ok());
ASL_TEST_EXPECT(!asl::parse_int16("-32769").ok());
}
ASL_TEST(parse_int16_max)
{
const asl::string_view sv = "32767";
auto res = asl::parse_int16(sv);
ASL_TEST_EXPECT(res.ok());
ASL_TEST_EXPECT(res.value().value == 32767);
ASL_TEST_EXPECT(res.value().remaining.is_empty());
}
ASL_TEST(parse_int16_min)
{
const asl::string_view sv = "-32768";
auto res = asl::parse_int16(sv);
ASL_TEST_EXPECT(res.ok());
ASL_TEST_EXPECT(res.value().value == -32768);
ASL_TEST_EXPECT(res.value().remaining.is_empty());
}
ASL_TEST(parse_hex)
{
const asl::string_view sv = "1000a";
auto res = asl::parse_uint32(sv, 16);
ASL_TEST_EXPECT(res.ok());
ASL_TEST_EXPECT(res.value().value == 65546);
ASL_TEST_EXPECT(res.value().remaining.is_empty());
}
ASL_TEST(parse_bin)
{
const asl::string_view sv = "101010";
auto res = asl::parse_uint32(sv, 2);
ASL_TEST_EXPECT(res.ok());
ASL_TEST_EXPECT(res.value().value == 42);
ASL_TEST_EXPECT(res.value().remaining.is_empty());
}
ASL_TEST(parse_oct)
{
const asl::string_view sv = "644";
auto res = asl::parse_uint32(sv, 8);
ASL_TEST_EXPECT(res.ok());
ASL_TEST_EXPECT(res.value().value == 6 * 64 + 4 * 8 + 4);
ASL_TEST_EXPECT(res.value().remaining.is_empty());
}

View File

@ -28,6 +28,13 @@ public:
, m_size{size} , m_size{size}
{} {}
constexpr string_view(const char* begin, const char* end)
: m_data{begin}
, m_size{end - begin}
{
ASL_ASSERT(begin <= end);
}
template<isize_t kSize> template<isize_t kSize>
constexpr string_view(const char (&str)[kSize]) // NOLINT(*explicit*) constexpr string_view(const char (&str)[kSize]) // NOLINT(*explicit*)
requires (kSize >= 1) requires (kSize >= 1)

View File

@ -18,7 +18,7 @@ cc_library(
name = "dragonbox", name = "dragonbox",
hdrs = ["dragonbox.h"], hdrs = ["dragonbox.h"],
includes = ["."], includes = ["."],
visibility = ["//:__subpackages__"], visibility = ["//asl:__subpackages__"],
applicable_licenses = [ applicable_licenses = [
":license", ":license",
], ],

25
vendor/fast_float/BUILD.bazel vendored Normal file
View File

@ -0,0 +1,25 @@
# Copyright 2025 Steven Le Rouzic
#
# SPDX-License-Identifier: BSD-3-Clause
load("@rules_license//rules:license.bzl", "license")
license(
name = "license",
license_kinds = [
"@rules_license//licenses/spdx:MIT",
],
license_text = "LICENSE.txt",
package_name = "fast_float",
package_url = "https://github.com/fastfloat/fast_float",
)
cc_library(
name = "fast_float",
hdrs = ["fast_float.h"],
includes = ["."],
visibility = ["//asl:__subpackages__"],
applicable_licenses = [
":license",
],
)

27
vendor/fast_float/LICENSE.txt vendored Normal file
View File

@ -0,0 +1,27 @@
MIT License
Copyright (c) 2021 The fast_float authors
Permission is hereby granted, free of charge, to any
person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the
Software without restriction, including without
limitation the rights to use, copy, modify, merge,
publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software
is furnished to do so, subject to the following
conditions:
The above copyright notice and this permission notice
shall be included in all copies or substantial portions
of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.

4443
vendor/fast_float/fast_float.h vendored Normal file

File diff suppressed because it is too large Load Diff