From 27c3969e69f5525ccb0bc462a615a14142b5f178 Mon Sep 17 00:00:00 2001 From: Steven Le Rouzic Date: Thu, 28 Nov 2024 23:53:55 +0100 Subject: Add float utilities, & float formatting with dragonbox --- asl/BUILD.bazel | 5 + asl/float.hpp | 17 + asl/format.cpp | 19 +- asl/format.hpp | 2 + asl/format_float.cpp | 97 +- asl/meta.hpp | 6 + asl/string_view.hpp | 10 + asl/tests/float_tests.cpp | 23 + asl/tests/format_tests.cpp | 27 +- asl/tests/meta_tests.cpp | 6 + asl/tests/string_view_tests.cpp | 32 + vendor/dragonbox/BUILD.bazel | 6 + vendor/dragonbox/LICENSE.txt | 218 ++ vendor/dragonbox/dragonbox.h | 4205 +++++++++++++++++++++++++++++++++++++++ 14 files changed, 4653 insertions(+), 20 deletions(-) create mode 100644 asl/float.hpp create mode 100644 asl/tests/float_tests.cpp create mode 100644 vendor/dragonbox/BUILD.bazel create mode 100644 vendor/dragonbox/LICENSE.txt create mode 100644 vendor/dragonbox/dragonbox.h diff --git a/asl/BUILD.bazel b/asl/BUILD.bazel index b9875ab..391ae9d 100644 --- a/asl/BUILD.bazel +++ b/asl/BUILD.bazel @@ -6,6 +6,7 @@ cc_library( "assert.hpp", "box.hpp", "config.hpp", + "float.hpp", "format.hpp", "functional.hpp", "integers.hpp", @@ -26,6 +27,9 @@ cc_library( "format_float.cpp", "print.cpp", ], + deps = [ + "//vendor/dragonbox", + ], visibility = ["//visibility:public"], ) @@ -41,6 +45,7 @@ cc_library( ], ) for name in [ "box", + "float", "format", "functional", "integers", diff --git a/asl/float.hpp b/asl/float.hpp new file mode 100644 index 0000000..8ecf5fa --- /dev/null +++ b/asl/float.hpp @@ -0,0 +1,17 @@ +#pragma once + +#include "asl/meta.hpp" + +namespace asl +{ + +template constexpr T infinity() { return __builtin_inf(); } + +template constexpr T nan() { return static_cast(__builtin_nanf("")); } + +template constexpr bool is_infinity(T f) { return __builtin_isinf(f); } + +template constexpr bool is_nan(T f) { return __builtin_isnan(f); } + +} // namespace asl + diff --git a/asl/format.cpp b/asl/format.cpp index 6a3e5de..1371431 100644 --- a/asl/format.cpp +++ b/asl/format.cpp @@ -106,7 +106,9 @@ void asl::AslFormat(Formatter& f, uint32_t v) AslFormat(f, static_cast(v)); } -void asl::AslFormat(Formatter& f, uint64_t v) +static constexpr int32_t kMaxUint64Digits = 20; + +asl::string_view asl::format_uint64(uint64_t v, asl::span buffer) { static constexpr char s_pairs_storage[] = { '0', '0', '0', '1', '0', '2', '0', '3', '0', '4', @@ -132,10 +134,7 @@ void asl::AslFormat(Formatter& f, uint64_t v) }; static constexpr span s_pairs = s_pairs_storage; - - static constexpr int32_t kMaxDigits = 20; - char buffer[kMaxDigits]; - int32_t cursor = kMaxDigits; + int32_t cursor = kMaxUint64Digits; auto write_two = [&buffer, &cursor](span str) { @@ -161,13 +160,19 @@ void asl::AslFormat(Formatter& f, uint64_t v) { write_two(s_pairs.subspan(static_cast(v * 2)).first<2>()); } - else if (v > 0 || cursor == kMaxDigits) + else if (v > 0 || cursor == kMaxUint64Digits) { ASL_ASSERT(v < 10); write_one(static_cast('0' + v)); } - f.write(string_view(buffer, kMaxDigits).substr(cursor)); + return string_view(buffer.data(), kMaxUint64Digits).substr(cursor); +} + +void asl::AslFormat(Formatter& f, uint64_t v) +{ + char buffer[kMaxUint64Digits]; + f.write(format_uint64(v, buffer)); } void asl::AslFormat(Formatter& f, int8_t v) diff --git a/asl/format.hpp b/asl/format.hpp index 9504701..ac51693 100644 --- a/asl/format.hpp +++ b/asl/format.hpp @@ -102,4 +102,6 @@ void AslFormat(Formatter& f, int16_t); void AslFormat(Formatter& f, int32_t); void AslFormat(Formatter& f, int64_t); +string_view format_uint64(uint64_t value, span buffer); + } // namespace asl diff --git a/asl/format_float.cpp b/asl/format_float.cpp index 425586f..0a081d5 100644 --- a/asl/format_float.cpp +++ b/asl/format_float.cpp @@ -1,21 +1,98 @@ #include "asl/format.hpp" +#include "asl/float.hpp" -#include +#define JKJ_STD_REPLACEMENT_NAMESPACE_DEFINED 0 +#define JKJ_STATIC_DATA_SECTION_DEFINED 0 +#include -// @Todo Use something like ryu or dragonbox +static constexpr isize_t kZeroCount = 100; +static constexpr char kZeros[kZeroCount] = { + '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', + '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', + '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', + '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', + '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', + '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', + '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', + '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', + '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', + '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', +}; + +template +static void format_float(asl::Formatter& f, T value) +{ + if (asl::is_infinity(value)) + { + if (value > 0) + { + f.write("Infinity"_sv); + } + else + { + f.write("-Infinity"_sv); + } + return; + } + + if (value == static_cast(0)) + { + f.write("0"_sv); + return; + } + + if (asl::is_nan(value)) + { + f.write("NaN"_sv); + return; + } + + auto decimal = jkj::dragonbox::to_decimal(value); + + if (decimal.is_negative) { f.write("-"); } + + char buffer[20]; + asl::string_view digits = asl::format_uint64(decimal.significand, buffer); + + if (decimal.exponent >= 0) + { + f.write(digits); + while (decimal.exponent > 0) + { + isize_t to_write = asl::min(static_cast(decimal.exponent), kZeroCount); + f.write(asl::string_view(kZeros, to_write)); + decimal.exponent -= to_write; + } + } + else + { + if (digits.size() <= -decimal.exponent) + { + f.write("0."); + decimal.exponent = -decimal.exponent - static_cast(digits.size()); + while (decimal.exponent > 0) + { + isize_t to_write = asl::min(static_cast(decimal.exponent), kZeroCount); + f.write(asl::string_view(kZeros, to_write)); + decimal.exponent -= to_write; + } + f.write(digits); + } + else + { + f.write(digits.first(digits.size() + decimal.exponent)); + f.write("."); + f.write(digits.last(-decimal.exponent)); + } + } +} void asl::AslFormat(Formatter& f, float value) { - static constexpr isize_t kBufferLength = 64; - char buffer[kBufferLength]; - int output_length = snprintf(buffer, kBufferLength, "%f", (double)value); - f.write(string_view(buffer, output_length)); + format_float(f, value); } void asl::AslFormat(Formatter& f, double value) { - static constexpr isize_t kBufferLength = 64; - char buffer[kBufferLength]; - int output_length = snprintf(buffer, kBufferLength, "%f", value); - f.write(string_view(buffer, output_length)); + format_float(f, value); } diff --git a/asl/meta.hpp b/asl/meta.hpp index ab25a88..35e238b 100644 --- a/asl/meta.hpp +++ b/asl/meta.hpp @@ -160,6 +160,12 @@ template struct _is_array_helper : true_type {}; template concept is_array = _is_array_helper::value; +template struct _is_floating_point_helper : false_type {}; +template<> struct _is_floating_point_helper : true_type {}; +template<> struct _is_floating_point_helper : true_type {}; + +template concept is_floating_point = _is_floating_point_helper>::value; + struct niche {}; template diff --git a/asl/string_view.hpp b/asl/string_view.hpp index 51d4234..c75695c 100644 --- a/asl/string_view.hpp +++ b/asl/string_view.hpp @@ -70,6 +70,16 @@ public: return string_view{m_data + offset, m_size - offset}; // NOLINT(*-pointer-arithmetic) } + constexpr string_view first(isize_t size) const + { + return substr(0, size); + } + + constexpr string_view last(isize_t size) const + { + return substr(m_size - size); + } + constexpr bool operator==(string_view other) const { if (m_size != other.m_size) { return false; } diff --git a/asl/tests/float_tests.cpp b/asl/tests/float_tests.cpp new file mode 100644 index 0000000..981279f --- /dev/null +++ b/asl/tests/float_tests.cpp @@ -0,0 +1,23 @@ +#include + +#include + +ASL_TEST(is_infinity) +{ + ASL_TEST_EXPECT(!asl::is_infinity(0.0F)); + ASL_TEST_EXPECT(!asl::is_infinity(-25.0F)); + ASL_TEST_EXPECT(asl::is_infinity(45.0F / 0.0F)); + ASL_TEST_EXPECT(asl::is_infinity(-45.0F / 0.0F)); + ASL_TEST_EXPECT(asl::is_infinity(asl::infinity())); + ASL_TEST_EXPECT(asl::is_infinity(-asl::infinity())); +} + +ASL_TEST(is_nan) +{ + ASL_TEST_EXPECT(!asl::is_nan(0.0F)); + ASL_TEST_EXPECT(!asl::is_nan(-25.0F)); + ASL_TEST_EXPECT(!asl::is_nan(45.0F / 0.0F)); + ASL_TEST_EXPECT(asl::is_nan(asl::nan())); + ASL_TEST_EXPECT(asl::is_nan(asl::nan())); +} + diff --git a/asl/tests/format_tests.cpp b/asl/tests/format_tests.cpp index dd3db1c..906fce6 100644 --- a/asl/tests/format_tests.cpp +++ b/asl/tests/format_tests.cpp @@ -1,6 +1,7 @@ #include "asl/format.hpp" #include "asl/testing/testing.hpp" #include "asl/allocator.hpp" +#include "asl/float.hpp" static_assert(asl::formattable); @@ -110,11 +111,31 @@ ASL_TEST(format_floats) sink.reset(); asl::format(&sink, "{} {} {}", 0.0F, 1.0, 2.0F); - ASL_TEST_EXPECT(sink.str() == "0.000000 1.000000 2.000000"_sv); + ASL_TEST_EXPECT(sink.str() == "0 1 2"_sv); + + sink.reset(); + asl::format(&sink, "{} {} {}", 0.1F, 0.001F, 0.123F); + ASL_TEST_EXPECT(sink.str() == "0.1 0.001 0.123"_sv); + + sink.reset(); + asl::format(&sink, "{} {}", 1.25F, -22.3); + ASL_TEST_EXPECT(sink.str() == "1.25 -22.3"_sv); + + sink.reset(); + asl::format(&sink, "{}", 1e32); + ASL_TEST_EXPECT(sink.str() == "100000000000000000000000000000000"_sv); + + sink.reset(); + asl::format(&sink, "{}", 123e-8); + ASL_TEST_EXPECT(sink.str() == "0.00000123"_sv); + + sink.reset(); + asl::format(&sink, "{} {}", asl::infinity(), -asl::infinity()); + ASL_TEST_EXPECT(sink.str() == "Infinity -Infinity"_sv); sink.reset(); - asl::format(&sink, "{} {}", 10.25F, -22.3); - ASL_TEST_EXPECT(sink.str() == "10.250000 -22.300000"_sv); + asl::format(&sink, "{}", asl::nan()); + ASL_TEST_EXPECT(sink.str() == "NaN"_sv); } ASL_TEST(format_boolean) diff --git a/asl/tests/meta_tests.cpp b/asl/tests/meta_tests.cpp index 354cea6..fcafe1c 100644 --- a/asl/tests/meta_tests.cpp +++ b/asl/tests/meta_tests.cpp @@ -203,3 +203,9 @@ static_assert(asl::is_const); static_assert(!asl::is_const); static_assert(asl::is_const); +static_assert(asl::is_floating_point); +static_assert(asl::is_floating_point); +static_assert(asl::is_floating_point); +static_assert(!asl::is_floating_point); +static_assert(!asl::is_floating_point); +static_assert(!asl::is_floating_point); diff --git a/asl/tests/string_view_tests.cpp b/asl/tests/string_view_tests.cpp index 1340247..708877d 100644 --- a/asl/tests/string_view_tests.cpp +++ b/asl/tests/string_view_tests.cpp @@ -58,6 +58,38 @@ ASL_TEST(substr2) ASL_TEST_ASSERT(s2.size() == 0); } +ASL_TEST(first) +{ + asl::string_view s1 = "abcd"; + + asl::string_view s2 = s1.first(0); + ASL_TEST_ASSERT(s2.size() == 0); + + s2 = s1.first(2); + ASL_TEST_ASSERT(s2.size() == 2); + ASL_TEST_EXPECT(asl::memcmp(s2.data(), "ab", 2) == 0); + + s2 = s1.first(4); + ASL_TEST_ASSERT(s2.size() == 4); + ASL_TEST_EXPECT(asl::memcmp(s2.data(), "abcd", 4) == 0); +} + +ASL_TEST(last) +{ + asl::string_view s1 = "abcd"; + + asl::string_view s2 = s1.last(0); + ASL_TEST_ASSERT(s2.size() == 0); + + s2 = s1.last(2); + ASL_TEST_ASSERT(s2.size() == 2); + ASL_TEST_EXPECT(asl::memcmp(s2.data(), "cd", 2) == 0); + + s2 = s1.last(4); + ASL_TEST_ASSERT(s2.size() == 4); + ASL_TEST_EXPECT(asl::memcmp(s2.data(), "abcd", 4) == 0); +} + ASL_TEST(equal) { ASL_TEST_EXPECT("abc"_sv == "abc"_sv); diff --git a/vendor/dragonbox/BUILD.bazel b/vendor/dragonbox/BUILD.bazel new file mode 100644 index 0000000..7a531d4 --- /dev/null +++ b/vendor/dragonbox/BUILD.bazel @@ -0,0 +1,6 @@ +cc_library( + name = "dragonbox", + hdrs = ["dragonbox.h"], + includes = ["."], + visibility = ["//:__subpackages__"], +) diff --git a/vendor/dragonbox/LICENSE.txt b/vendor/dragonbox/LICENSE.txt new file mode 100644 index 0000000..bd8b243 --- /dev/null +++ b/vendor/dragonbox/LICENSE.txt @@ -0,0 +1,218 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +--- LLVM Exceptions to the Apache 2.0 License ---- + +As an exception, if, as a result of your compiling your source code, portions +of this Software are embedded into an Object form of such source code, you +may redistribute such embedded portions in such Object form without complying +with the conditions of Sections 4(a), 4(b) and 4(d) of the License. + +In addition, if you combine or link compiled forms of this Software with +software that is licensed under the GPLv2 ("Combined Software") and if a +court of competent jurisdiction determines that the patent provision (Section +3), the indemnity provision (Section 9) or other Section of the License +conflicts with the conditions of the GPLv2, you may retroactively and +prospectively choose to deem waived or otherwise exclude such Section(s) of +the License, but only in their entirety and only with respect to the Combined +Software. diff --git a/vendor/dragonbox/dragonbox.h b/vendor/dragonbox/dragonbox.h new file mode 100644 index 0000000..7cd8d19 --- /dev/null +++ b/vendor/dragonbox/dragonbox.h @@ -0,0 +1,4205 @@ +// Copyright 2020-2024 Junekey Jeon +// +// The contents of this file may be used under the terms of +// the Apache License v2.0 with LLVM Exceptions. +// +// (See accompanying file LICENSE-Apache or copy at +// https://llvm.org/foundation/relicensing/LICENSE.txt) +// +// Alternatively, the contents of this file may be used under the terms of +// the Boost Software License, Version 1.0. +// (See accompanying file LICENSE-Boost or copy at +// https://www.boost.org/LICENSE_1_0.txt) +// +// Unless required by applicable law or agreed to in writing, this software +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. + + +#ifndef JKJ_HEADER_DRAGONBOX +#define JKJ_HEADER_DRAGONBOX + +// Attribute for storing static data into a dedicated place, e.g. flash memory. Every ODR-used +// static data declaration will be decorated with this macro. The users may define this macro, +// before including the library headers, into whatever they want. +#ifndef JKJ_STATIC_DATA_SECTION + #define JKJ_STATIC_DATA_SECTION +#else + #define JKJ_STATIC_DATA_SECTION_DEFINED 1 +#endif + +// To use the library with toolchains without standard C++ headers, the users may define this macro +// into their custom namespace which contains the definitions of all the standard C++ library +// features used in this header. (The list can be found below.) +#ifndef JKJ_STD_REPLACEMENT_NAMESPACE + #define JKJ_STD_REPLACEMENT_NAMESPACE std + #include + #include + #include + #include + #include + + #ifdef __has_include + #if __has_include() + #include + #endif + #endif +#else + #define JKJ_STD_REPLACEMENT_NAMESPACE_DEFINED 1 +#endif + +//////////////////////////////////////////////////////////////////////////////////////// +// Language feature detections. +//////////////////////////////////////////////////////////////////////////////////////// + +// C++14 constexpr +#if defined(__cpp_constexpr) && __cpp_constexpr >= 201304L + #define JKJ_HAS_CONSTEXPR14 1 +#elif __cplusplus >= 201402L + #define JKJ_HAS_CONSTEXPR14 1 +#elif defined(_MSC_VER) && _MSC_VER >= 1910 && _MSVC_LANG >= 201402L + #define JKJ_HAS_CONSTEXPR14 1 +#else + #define JKJ_HAS_CONSTEXPR14 0 +#endif + +#if JKJ_HAS_CONSTEXPR14 + #define JKJ_CONSTEXPR14 constexpr +#else + #define JKJ_CONSTEXPR14 +#endif + +// C++17 constexpr lambdas +#if defined(__cpp_constexpr) && __cpp_constexpr >= 201603L + #define JKJ_HAS_CONSTEXPR17 1 +#elif __cplusplus >= 201703L + #define JKJ_HAS_CONSTEXPR17 1 +#elif defined(_MSC_VER) && _MSC_VER >= 1911 && _MSVC_LANG >= 201703L + #define JKJ_HAS_CONSTEXPR17 1 +#else + #define JKJ_HAS_CONSTEXPR17 0 +#endif + +// C++17 inline variables +#if defined(__cpp_inline_variables) && __cpp_inline_variables >= 201606L + #define JKJ_HAS_INLINE_VARIABLE 1 +#elif __cplusplus >= 201703L + #define JKJ_HAS_INLINE_VARIABLE 1 +#elif defined(_MSC_VER) && _MSC_VER >= 1912 && _MSVC_LANG >= 201703L + #define JKJ_HAS_INLINE_VARIABLE 1 +#else + #define JKJ_HAS_INLINE_VARIABLE 0 +#endif + +#if JKJ_HAS_INLINE_VARIABLE + #define JKJ_INLINE_VARIABLE inline constexpr +#else + #define JKJ_INLINE_VARIABLE static constexpr +#endif + +// C++17 if constexpr +#if defined(__cpp_if_constexpr) && __cpp_if_constexpr >= 201606L + #define JKJ_HAS_IF_CONSTEXPR 1 +#elif __cplusplus >= 201703L + #define JKJ_HAS_IF_CONSTEXPR 1 +#elif defined(_MSC_VER) && _MSC_VER >= 1911 && _MSVC_LANG >= 201703L + #define JKJ_HAS_IF_CONSTEXPR 1 +#else + #define JKJ_HAS_IF_CONSTEXPR 0 +#endif + +#if JKJ_HAS_IF_CONSTEXPR + #define JKJ_IF_CONSTEXPR if constexpr +#else + #define JKJ_IF_CONSTEXPR if +#endif + +// C++20 std::bit_cast +#if JKJ_STD_REPLACEMENT_NAMESPACE_DEFINED + #if JKJ_STD_REPLACEMENT_HAS_BIT_CAST + #define JKJ_HAS_BIT_CAST 1 + #else + #define JKJ_HAS_BIT_CAST 0 + #endif +#elif defined(__cpp_lib_bit_cast) && __cpp_lib_bit_cast >= 201806L + #include + #define JKJ_HAS_BIT_CAST 1 +#else + #define JKJ_HAS_BIT_CAST 0 +#endif + +// C++23 if consteval or C++20 std::is_constant_evaluated +#if defined(__cpp_if_consteval) && __cpp_is_consteval >= 202106L + #define JKJ_IF_CONSTEVAL if consteval + #define JKJ_IF_NOT_CONSTEVAL if !consteval + #define JKJ_CAN_BRANCH_ON_CONSTEVAL 1 + #define JKJ_USE_IS_CONSTANT_EVALUATED 0 +#elif JKJ_STD_REPLACEMENT_NAMESPACE_DEFINED + #if JKJ_STD_REPLACEMENT_HAS_IS_CONSTANT_EVALUATED + #define JKJ_IF_CONSTEVAL if (stdr::is_constant_evaluated()) + #define JKJ_IF_NOT_CONSTEVAL if (!stdr::is_constant_evaluated()) + #define JKJ_CAN_BRANCH_ON_CONSTEVAL 1 + #define JKJ_USE_IS_CONSTANT_EVALUATED 1 + #elif JKJ_HAS_IF_CONSTEXPR + #define JKJ_IF_CONSTEVAL if constexpr (false) + #define JKJ_IF_NOT_CONSTEVAL if constexpr (true) + #define JKJ_CAN_BRANCH_ON_CONSTEVAL 0 + #define JKJ_USE_IS_CONSTANT_EVALUATED 0 + #else + #define JKJ_IF_CONSTEVAL if (false) + #define JKJ_IF_NOT_CONSTEVAL if (true) + #define JKJ_CAN_BRANCH_ON_CONSTEVAL 0 + #define JKJ_USE_IS_CONSTANT_EVALUATED 0 + #endif +#else + #if defined(__cpp_lib_is_constant_evaluated) && __cpp_lib_is_constant_evaluated >= 201811L + #define JKJ_IF_CONSTEVAL if (stdr::is_constant_evaluated()) + #define JKJ_IF_NOT_CONSTEVAL if (!stdr::is_constant_evaluated()) + #define JKJ_CAN_BRANCH_ON_CONSTEVAL 1 + #define JKJ_USE_IS_CONSTANT_EVALUATED 1 + #elif JKJ_HAS_IF_CONSTEXPR + #define JKJ_IF_CONSTEVAL if constexpr (false) + #define JKJ_IF_NOT_CONSTEVAL if constexpr (true) + #define JKJ_CAN_BRANCH_ON_CONSTEVAL 0 + #define JKJ_USE_IS_CONSTANT_EVALUATED 0 + #else + #define JKJ_IF_CONSTEVAL if (false) + #define JKJ_IF_NOT_CONSTEVAL if (true) + #define JKJ_CAN_BRANCH_ON_CONSTEVAL 0 + #define JKJ_USE_IS_CONSTANT_EVALUATED 0 + #endif +#endif + +#if JKJ_CAN_BRANCH_ON_CONSTEVAL && JKJ_HAS_BIT_CAST + #define JKJ_CONSTEXPR20 constexpr +#else + #define JKJ_CONSTEXPR20 +#endif + +// Suppress additional buffer overrun check. +// I have no idea why MSVC thinks some functions here are vulnerable to the buffer overrun +// attacks. No, they aren't. +#if defined(__GNUC__) || defined(__clang__) + #define JKJ_SAFEBUFFERS + #define JKJ_FORCEINLINE inline __attribute__((always_inline)) +#elif defined(_MSC_VER) + #define JKJ_SAFEBUFFERS __declspec(safebuffers) + #define JKJ_FORCEINLINE __forceinline +#else + #define JKJ_SAFEBUFFERS + #define JKJ_FORCEINLINE inline +#endif + +#if defined(__has_builtin) + #define JKJ_HAS_BUILTIN(x) __has_builtin(x) +#else + #define JKJ_HAS_BUILTIN(x) false +#endif + +#if defined(_MSC_VER) + #include +#elif defined(__INTEL_COMPILER) + #include +#endif + +namespace jkj { + namespace dragonbox { + //////////////////////////////////////////////////////////////////////////////////////// + // The Compatibility layer for toolchains without standard C++ headers. + //////////////////////////////////////////////////////////////////////////////////////// + namespace detail { + namespace stdr { + // +#if JKJ_HAS_BIT_CAST + using JKJ_STD_REPLACEMENT_NAMESPACE::bit_cast; +#endif + + // + // We need assert() macro, but it is not namespaced anyway, so nothing to do here. + + // + using JKJ_STD_REPLACEMENT_NAMESPACE::int_least8_t; + using JKJ_STD_REPLACEMENT_NAMESPACE::int_least16_t; + using JKJ_STD_REPLACEMENT_NAMESPACE::int_least32_t; + using JKJ_STD_REPLACEMENT_NAMESPACE::int_fast8_t; + using JKJ_STD_REPLACEMENT_NAMESPACE::int_fast16_t; + using JKJ_STD_REPLACEMENT_NAMESPACE::int_fast32_t; + using JKJ_STD_REPLACEMENT_NAMESPACE::uint_least8_t; + using JKJ_STD_REPLACEMENT_NAMESPACE::uint_least16_t; + using JKJ_STD_REPLACEMENT_NAMESPACE::uint_least32_t; + using JKJ_STD_REPLACEMENT_NAMESPACE::uint_least64_t; + using JKJ_STD_REPLACEMENT_NAMESPACE::uint_fast8_t; + using JKJ_STD_REPLACEMENT_NAMESPACE::uint_fast16_t; + using JKJ_STD_REPLACEMENT_NAMESPACE::uint_fast32_t; + // We need INT32_C, UINT32_C and UINT64_C macros too, but again there is nothing to do + // here. + + // + using JKJ_STD_REPLACEMENT_NAMESPACE::size_t; + using JKJ_STD_REPLACEMENT_NAMESPACE::memcpy; + + // + template + using numeric_limits = JKJ_STD_REPLACEMENT_NAMESPACE::numeric_limits; + + // + template + using enable_if = JKJ_STD_REPLACEMENT_NAMESPACE::enable_if; + template + using add_rvalue_reference = JKJ_STD_REPLACEMENT_NAMESPACE::add_rvalue_reference; + template + using conditional = JKJ_STD_REPLACEMENT_NAMESPACE::conditional; +#if JKJ_USE_IS_CONSTANT_EVALUATED + using JKJ_STD_REPLACEMENT_NAMESPACE::is_constant_evaluated; +#endif + template + using is_same = JKJ_STD_REPLACEMENT_NAMESPACE::is_same; +#if !JKJ_HAS_BIT_CAST + template + using is_trivially_copyable = JKJ_STD_REPLACEMENT_NAMESPACE::is_trivially_copyable; +#endif + template + using is_integral = JKJ_STD_REPLACEMENT_NAMESPACE::is_integral; + template + using is_signed = JKJ_STD_REPLACEMENT_NAMESPACE::is_signed; + template + using is_unsigned = JKJ_STD_REPLACEMENT_NAMESPACE::is_unsigned; + } + } + + + //////////////////////////////////////////////////////////////////////////////////////// + // Some general utilities for C++11-compatibility. + //////////////////////////////////////////////////////////////////////////////////////// + namespace detail { +#if !JKJ_HAS_CONSTEXPR17 + template + struct index_sequence {}; + + template + struct make_index_sequence_impl { + using type = typename make_index_sequence_impl::type; + }; + + template + struct make_index_sequence_impl { + using type = index_sequence; + }; + + template + using make_index_sequence = typename make_index_sequence_impl<0, N, void>::type; +#endif + + // Available since C++11, but including just for this is an overkill. + template + typename stdr::add_rvalue_reference::type declval() noexcept; + + // Similarly, including is an overkill. + template + struct array { + T data_[N]; + constexpr T operator[](stdr::size_t idx) const noexcept { return data_[idx]; } + JKJ_CONSTEXPR14 T& operator[](stdr::size_t idx) noexcept { return data_[idx]; } + }; + } + + + //////////////////////////////////////////////////////////////////////////////////////// + // Some basic features for encoding/decoding IEEE-754 formats. + //////////////////////////////////////////////////////////////////////////////////////// + namespace detail { + template + struct physical_bits { + static constexpr stdr::size_t value = + sizeof(T) * stdr::numeric_limits::digits; + }; + template + struct value_bits { + static constexpr stdr::size_t value = stdr::numeric_limits< + typename stdr::enable_if::value, T>::type>::digits; + }; + + template + JKJ_CONSTEXPR20 To bit_cast(const From& from) { +#if JKJ_HAS_BIT_CAST + return stdr::bit_cast(from); +#else + static_assert(sizeof(From) == sizeof(To), ""); + static_assert(stdr::is_trivially_copyable::value, ""); + static_assert(stdr::is_trivially_copyable::value, ""); + To to; + stdr::memcpy(&to, &from, sizeof(To)); + return to; +#endif + } + } + + // These classes expose encoding specs of IEEE-754-like floating-point formats. + // Currently available formats are IEEE-754 binary32 & IEEE-754 binary64. + + struct ieee754_binary32 { + static constexpr int total_bits = 32; + static constexpr int significand_bits = 23; + static constexpr int exponent_bits = 8; + static constexpr int min_exponent = -126; + static constexpr int max_exponent = 127; + static constexpr int exponent_bias = -127; + static constexpr int decimal_significand_digits = 9; + static constexpr int decimal_exponent_digits = 2; + }; + struct ieee754_binary64 { + static constexpr int total_bits = 64; + static constexpr int significand_bits = 52; + static constexpr int exponent_bits = 11; + static constexpr int min_exponent = -1022; + static constexpr int max_exponent = 1023; + static constexpr int exponent_bias = -1023; + static constexpr int decimal_significand_digits = 17; + static constexpr int decimal_exponent_digits = 3; + }; + + // A floating-point format traits class defines ways to interpret a bit pattern of given size as + // an encoding of floating-point number. This is an implementation of such a traits class, + // supporting ways to interpret IEEE-754 binary floating-point numbers. + template + struct ieee754_binary_traits { + // CarrierUInt needs to have enough size to hold the entire contents of floating-point + // numbers. The actual bits are assumed to be aligned to the LSB, and every other bits are + // assumed to be zeroed. + static_assert(detail::value_bits::value >= Format::total_bits, + "jkj::dragonbox: insufficient number of bits"); + static_assert(detail::stdr::is_unsigned::value, ""); + + // ExponentUInt needs to be large enough to hold (unsigned) exponent bits as well as the + // (signed) actual exponent. + // TODO: static overflow guard against intermediate computations. + static_assert(detail::value_bits::value >= Format::exponent_bits + 1, + "jkj::dragonbox: insufficient number of bits"); + static_assert(detail::stdr::is_signed::value, ""); + + using format = Format; + using carrier_uint = CarrierUInt; + static constexpr int carrier_bits = int(detail::value_bits::value); + using exponent_int = ExponentInt; + + // Extract exponent bits from a bit pattern. + // The result must be aligned to the LSB so that there is no additional zero paddings + // on the right. This function does not do bias adjustment. + static constexpr exponent_int extract_exponent_bits(carrier_uint u) noexcept { + return exponent_int((u >> format::significand_bits) & + ((exponent_int(1) << format::exponent_bits) - 1)); + } + + // Extract significand bits from a bit pattern. + // The result must be aligned to the LSB so that there is no additional zero paddings + // on the right. The result does not contain the implicit bit. + static constexpr carrier_uint extract_significand_bits(carrier_uint u) noexcept { + return carrier_uint(u & ((carrier_uint(1) << format::significand_bits) - 1u)); + } + + // Remove the exponent bits and extract significand bits together with the sign bit. + static constexpr carrier_uint remove_exponent_bits(carrier_uint u) noexcept { + return carrier_uint(u & ~(((carrier_uint(1) << format::exponent_bits) - 1u) + << format::significand_bits)); + } + + // Shift the obtained signed significand bits to the left by 1 to remove the sign bit. + static constexpr carrier_uint remove_sign_bit_and_shift(carrier_uint u) noexcept { + return carrier_uint((carrier_uint(u) << 1) & + ((((carrier_uint(1) << (Format::total_bits - 1)) - 1u) << 1) | 1u)); + } + + // Obtain the actual value of the binary exponent from the extracted exponent bits. + static constexpr exponent_int binary_exponent(exponent_int exponent_bits) noexcept { + return exponent_int(exponent_bits == 0 ? format::min_exponent + : exponent_bits + format::exponent_bias); + } + + // Obtain the actual value of the binary significand from the extracted significand bits + // and exponent bits. + static constexpr carrier_uint binary_significand(carrier_uint significand_bits, + exponent_int exponent_bits) noexcept { + return carrier_uint( + exponent_bits == 0 + ? significand_bits + : (significand_bits | (carrier_uint(1) << format::significand_bits))); + } + + /* Various boolean observer functions */ + + static constexpr bool is_nonzero(carrier_uint u) noexcept { + return (u & ((carrier_uint(1) << (format::significand_bits + format::exponent_bits)) - + 1u)) != 0; + } + static constexpr bool is_positive(carrier_uint u) noexcept { + return u < (carrier_uint(1) << (format::significand_bits + format::exponent_bits)); + } + static constexpr bool is_negative(carrier_uint u) noexcept { return !is_positive(u); } + static constexpr bool is_finite(exponent_int exponent_bits) noexcept { + return exponent_bits != ((exponent_int(1) << format::exponent_bits) - 1); + } + static constexpr bool has_all_zero_significand_bits(carrier_uint u) noexcept { + return ((u << 1) & + ((((carrier_uint(1) << (Format::total_bits - 1)) - 1u) << 1) | 1u)) == 0; + } + static constexpr bool has_even_significand_bits(carrier_uint u) noexcept { + return u % 2 == 0; + } + }; + + // Convert between bit patterns stored in carrier_uint and instances of an actual + // floating-point type. Depending on format and carrier_uint, this operation might not + // be possible for some specific bit patterns. However, the contract is that u always + // denotes a valid bit pattern, so the functions here are assumed to be noexcept. + // Users might specialize this class to change the behavior for certain types. + // The default provided by the library is to treat the given floating-point type Float as either + // IEEE-754 binary32 or IEEE-754 binary64, depending on the bitwise size of Float. + template + struct default_float_bit_carrier_conversion_traits { + // Guards against types that have different internal representations than IEEE-754 + // binary32/64. I don't know if there is a truly reliable way of detecting IEEE-754 binary + // formats. I just did my best here. Note that in some cases + // numeric_limits::is_iec559 may report false even if the internal representation is + // IEEE-754 compatible. In such a case, the user can specialize this traits template and + // remove this static sanity check in order to make Dragonbox work for Float. + static_assert(detail::stdr::numeric_limits::is_iec559 && + detail::stdr::numeric_limits::radix == 2 && + (detail::physical_bits::value == 32 || + detail::physical_bits::value == 64), + "jkj::dragonbox: Float may not be of IEEE-754 binary32/binary64"); + + // Specifies the unsigned integer type to hold bitwise value of Float. + using carrier_uint = + typename detail::stdr::conditional::value == 32, + detail::stdr::uint_least32_t, + detail::stdr::uint_least64_t>::type; + + // Specifies the floating-point format. + using format = typename detail::stdr::conditional::value == 32, + ieee754_binary32, ieee754_binary64>::type; + + // Converts the floating-point type into the bit-carrier unsigned integer type. + static JKJ_CONSTEXPR20 carrier_uint float_to_carrier(Float x) noexcept { + return detail::bit_cast(x); + } + + // Converts the bit-carrier unsigned integer type into the floating-point type. + static JKJ_CONSTEXPR20 Float carrier_to_float(carrier_uint x) noexcept { + return detail::bit_cast(x); + } + }; + + // Convenient wrappers for floating-point traits classes. + // In order to reduce the argument passing overhead, these classes should be as simple as + // possible (e.g., no inheritance, no private non-static data member, etc.; this is an + // unfortunate fact about common ABI convention). + + template + struct signed_significand_bits { + using format_traits = FormatTraits; + using carrier_uint = typename format_traits::carrier_uint; + + carrier_uint u; + + signed_significand_bits() = default; + constexpr explicit signed_significand_bits(carrier_uint bit_pattern) noexcept + : u{bit_pattern} {} + + // Shift the obtained signed significand bits to the left by 1 to remove the sign bit. + constexpr carrier_uint remove_sign_bit_and_shift() const noexcept { + return format_traits::remove_sign_bit_and_shift(u); + } + + constexpr bool is_positive() const noexcept { return format_traits::is_positive(u); } + constexpr bool is_negative() const noexcept { return format_traits::is_negative(u); } + constexpr bool has_all_zero_significand_bits() const noexcept { + return format_traits::has_all_zero_significand_bits(u); + } + constexpr bool has_even_significand_bits() const noexcept { + return format_traits::has_even_significand_bits(u); + } + }; + + template + struct float_bits { + using format_traits = FormatTraits; + using carrier_uint = typename format_traits::carrier_uint; + using exponent_int = typename format_traits::exponent_int; + + carrier_uint u; + + float_bits() = default; + constexpr explicit float_bits(carrier_uint bit_pattern) noexcept : u{bit_pattern} {} + + // Extract exponent bits from a bit pattern. + // The result must be aligned to the LSB so that there is no additional zero paddings + // on the right. This function does not do bias adjustment. + constexpr exponent_int extract_exponent_bits() const noexcept { + return format_traits::extract_exponent_bits(u); + } + + // Extract significand bits from a bit pattern. + // The result must be aligned to the LSB so that there is no additional zero paddings + // on the right. The result does not contain the implicit bit. + constexpr carrier_uint extract_significand_bits() const noexcept { + return format_traits::extract_significand_bits(u); + } + + // Remove the exponent bits and extract significand bits together with the sign bit. + constexpr signed_significand_bits remove_exponent_bits() const noexcept { + return signed_significand_bits(format_traits::remove_exponent_bits(u)); + } + + // Obtain the actual value of the binary exponent from the extracted exponent bits. + static constexpr exponent_int binary_exponent(exponent_int exponent_bits) noexcept { + return format_traits::binary_exponent(exponent_bits); + } + constexpr exponent_int binary_exponent() const noexcept { + return binary_exponent(extract_exponent_bits()); + } + + // Obtain the actual value of the binary exponent from the extracted significand bits + // and exponent bits. + static constexpr carrier_uint binary_significand(carrier_uint significand_bits, + exponent_int exponent_bits) noexcept { + return format_traits::binary_significand(significand_bits, exponent_bits); + } + constexpr carrier_uint binary_significand() const noexcept { + return binary_significand(extract_significand_bits(), extract_exponent_bits()); + } + + constexpr bool is_nonzero() const noexcept { return format_traits::is_nonzero(u); } + constexpr bool is_positive() const noexcept { return format_traits::is_positive(u); } + constexpr bool is_negative() const noexcept { return format_traits::is_negative(u); } + constexpr bool is_finite(exponent_int exponent_bits) const noexcept { + return format_traits::is_finite(exponent_bits); + } + constexpr bool is_finite() const noexcept { + return format_traits::is_finite(extract_exponent_bits()); + } + constexpr bool has_even_significand_bits() const noexcept { + return format_traits::has_even_significand_bits(u); + } + }; + + template , + class FormatTraits = ieee754_binary_traits> + JKJ_CONSTEXPR20 float_bits make_float_bits(Float x) noexcept { + return float_bits(ConversionTraits::float_to_carrier(x)); + } + + namespace detail { + //////////////////////////////////////////////////////////////////////////////////////// + // Bit operation intrinsics. + //////////////////////////////////////////////////////////////////////////////////////// + + namespace bits { + // Most compilers should be able to optimize this into the ROR instruction. + // n is assumed to be at most of bit_width bits. + template + JKJ_CONSTEXPR14 UInt rotr(UInt n, unsigned int r) noexcept { + static_assert(bit_width > 0, "jkj::dragonbox: rotation bit-width must be positive"); + static_assert(bit_width <= value_bits::value, + "jkj::dragonbox: rotation bit-width is too large"); + r &= (bit_width - 1); + return (n >> r) | (n << ((bit_width - r) & (bit_width - 1))); + } + } + + //////////////////////////////////////////////////////////////////////////////////////// + // Utilities for wide unsigned integer arithmetic. + //////////////////////////////////////////////////////////////////////////////////////// + + namespace wuint { + // Compilers might support built-in 128-bit integer types. However, it seems that + // emulating them with a pair of 64-bit integers actually produces a better code, + // so we avoid using those built-ins. That said, they are still useful for + // implementing 64-bit x 64-bit -> 128-bit multiplication. + + // clang-format off +#if defined(__SIZEOF_INT128__) + // To silence "error: ISO C++ does not support '__int128' for 'type name' + // [-Wpedantic]" +#if defined(__GNUC__) + __extension__ +#endif + using builtin_uint128_t = unsigned __int128; +#endif + // clang-format on + + struct uint128 { + uint128() = default; + + stdr::uint_least64_t high_; + stdr::uint_least64_t low_; + + constexpr uint128(stdr::uint_least64_t high, stdr::uint_least64_t low) noexcept + : high_{high}, low_{low} {} + + constexpr stdr::uint_least64_t high() const noexcept { return high_; } + constexpr stdr::uint_least64_t low() const noexcept { return low_; } + + JKJ_CONSTEXPR20 uint128& operator+=(stdr::uint_least64_t n) & noexcept { + auto const generic_impl = [&] { + auto const sum = (low_ + n) & UINT64_C(0xffffffffffffffff); + high_ += (sum < low_ ? 1 : 0); + low_ = sum; + }; + // To suppress warning. + static_cast(generic_impl); + + JKJ_IF_CONSTEXPR(value_bits::value > 64) { + generic_impl(); + return *this; + } + + JKJ_IF_CONSTEVAL { + generic_impl(); + return *this; + } + + // See https://github.com/fmtlib/fmt/pull/2985. +#if JKJ_HAS_BUILTIN(__builtin_addcll) && !defined(__ibmxl__) + JKJ_IF_CONSTEXPR( + stdr::is_same::value) { + unsigned long long carry{}; + low_ = stdr::uint_least64_t(__builtin_addcll(low_, n, 0, &carry)); + high_ = stdr::uint_least64_t(__builtin_addcll(high_, 0, carry, &carry)); + return *this; + } +#endif +#if JKJ_HAS_BUILTIN(__builtin_addcl) && !defined(__ibmxl__) + JKJ_IF_CONSTEXPR(stdr::is_same::value) { + unsigned long carry{}; + low_ = stdr::uint_least64_t( + __builtin_addcl(static_cast(low_), + static_cast(n), 0, &carry)); + high_ = stdr::uint_least64_t( + __builtin_addcl(static_cast(high_), 0, carry, &carry)); + return *this; + } +#endif +#if JKJ_HAS_BUILTIN(__builtin_addc) && !defined(__ibmxl__) + JKJ_IF_CONSTEXPR(stdr::is_same::value) { + unsigned int carry{}; + low_ = stdr::uint_least64_t(__builtin_addc(static_cast(low_), + static_cast(n), 0, + &carry)); + high_ = stdr::uint_least64_t( + __builtin_addc(static_cast(high_), 0, carry, &carry)); + return *this; + } +#endif + +#if JKJ_HAS_BUILTIN(__builtin_ia32_addcarry_u64) + // __builtin_ia32_addcarry_u64 is not documented, but it seems it takes unsigned + // long long arguments. + unsigned long long result{}; + auto const carry = __builtin_ia32_addcarry_u64(0, low_, n, &result); + low_ = stdr::uint_least64_t(result); + __builtin_ia32_addcarry_u64(carry, high_, 0, &result); + high_ = stdr::uint_least64_t(result); +#elif defined(_MSC_VER) && defined(_M_X64) + // On MSVC, uint_least64_t and __int64 must be unsigned long long; see + // https://learn.microsoft.com/en-us/cpp/c-runtime-library/standard-types + // and https://learn.microsoft.com/en-us/cpp/cpp/int8-int16-int32-int64. + static_assert(stdr::is_same::value, + ""); + auto const carry = _addcarry_u64(0, low_, n, &low_); + _addcarry_u64(carry, high_, 0, &high_); +#elif defined(__INTEL_COMPILER) && (defined(_M_X64) || defined(__x86_64)) + // Cannot find any documentation on how things are defined, but hopefully this + // is always true... + static_assert(stdr::is_same::value, ""); + auto const carry = _addcarry_u64(0, low_, n, &low_); + _addcarry_u64(carry, high_, 0, &high_); +#else + generic_impl(); +#endif + return *this; + } + }; + + inline JKJ_CONSTEXPR20 stdr::uint_least64_t umul64(stdr::uint_least32_t x, + stdr::uint_least32_t y) noexcept { +#if defined(_MSC_VER) && defined(_M_IX86) + JKJ_IF_NOT_CONSTEVAL { return __emulu(x, y); } +#endif + return x * stdr::uint_least64_t(y); + } + + // Get 128-bit result of multiplication of two 64-bit unsigned integers. + JKJ_SAFEBUFFERS inline JKJ_CONSTEXPR20 uint128 + umul128(stdr::uint_least64_t x, stdr::uint_least64_t y) noexcept { + auto const generic_impl = [=]() -> uint128 { + auto const a = stdr::uint_least32_t(x >> 32); + auto const b = stdr::uint_least32_t(x); + auto const c = stdr::uint_least32_t(y >> 32); + auto const d = stdr::uint_least32_t(y); + + auto const ac = umul64(a, c); + auto const bc = umul64(b, c); + auto const ad = umul64(a, d); + auto const bd = umul64(b, d); + + auto const intermediate = + (bd >> 32) + stdr::uint_least32_t(ad) + stdr::uint_least32_t(bc); + + return {ac + (intermediate >> 32) + (ad >> 32) + (bc >> 32), + (intermediate << 32) + stdr::uint_least32_t(bd)}; + }; + // To silence warning. + static_cast(generic_impl); + +#if defined(__SIZEOF_INT128__) + auto const result = builtin_uint128_t(x) * builtin_uint128_t(y); + return {stdr::uint_least64_t(result >> 64), stdr::uint_least64_t(result)}; +#elif defined(_MSC_VER) && defined(_M_X64) + JKJ_IF_CONSTEVAL { + // This redundant variable is to workaround MSVC's codegen bug caused by the + // interaction of NRVO and intrinsics. + auto const result = generic_impl(); + return result; + } + uint128 result; + #if defined(__AVX2__) + result.low_ = _mulx_u64(x, y, &result.high_); + #else + result.low_ = _umul128(x, y, &result.high_); + #endif + return result; +#else + return generic_impl(); +#endif + } + + // Get high half of the 128-bit result of multiplication of two 64-bit unsigned + // integers. + JKJ_SAFEBUFFERS inline JKJ_CONSTEXPR20 stdr::uint_least64_t + umul128_upper64(stdr::uint_least64_t x, stdr::uint_least64_t y) noexcept { + auto const generic_impl = [=]() -> stdr::uint_least64_t { + auto const a = stdr::uint_least32_t(x >> 32); + auto const b = stdr::uint_least32_t(x); + auto const c = stdr::uint_least32_t(y >> 32); + auto const d = stdr::uint_least32_t(y); + + auto const ac =