From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1888) id CB6E93858D3C; Mon, 17 Jan 2022 19:33:29 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org CB6E93858D3C MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset="utf-8" From: Patrick Palka To: gcc-cvs@gcc.gnu.org, libstdc++-cvs@gcc.gnu.org Subject: [gcc r12-6645] libstdc++: Directly implement hexfloat std::from_chars for binary32/64 X-Act-Checkin: gcc X-Git-Author: Patrick Palka X-Git-Refname: refs/heads/master X-Git-Oldrev: 6cb61e50ade62f1e7c8653b8d6fc6a632fd3977f X-Git-Newrev: cc3bf3404e4b1cdd1110e450bd5df45fdaaaae85 Message-Id: <20220117193329.CB6E93858D3C@sourceware.org> Date: Mon, 17 Jan 2022 19:33:29 +0000 (GMT) X-BeenThere: gcc-cvs@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-cvs mailing list List-Unsubscribe: , List-Archive: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 17 Jan 2022 19:33:29 -0000 https://gcc.gnu.org/g:cc3bf3404e4b1cdd1110e450bd5df45fdaaaae85 commit r12-6645-gcc3bf3404e4b1cdd1110e450bd5df45fdaaaae85 Author: Patrick Palka Date: Mon Jan 17 14:32:13 2022 -0500 libstdc++: Directly implement hexfloat std::from_chars for binary32/64 libstdc++-v3/ChangeLog: * src/c++17/floating_from_chars.cc: Include . (ascii_to_hexit, starts_with_ci): Conditionally define. (__floating_from_chars_hex): Conditionally define. (from_chars): Use __floating_from_chars_hex for chars_format::hex parsing of binary32/64 float/double. (testsuite/20_util/from_chars/7.cc): New test. Diff: --- libstdc++-v3/src/c++17/floating_from_chars.cc | 374 +++++++++++++++++++++++++ libstdc++-v3/testsuite/20_util/from_chars/7.cc | 152 ++++++++++ 2 files changed, 526 insertions(+) diff --git a/libstdc++-v3/src/c++17/floating_from_chars.cc b/libstdc++-v3/src/c++17/floating_from_chars.cc index 479e042bb5f..3ad5b409da7 100644 --- a/libstdc++-v3/src/c++17/floating_from_chars.cc +++ b/libstdc++-v3/src/c++17/floating_from_chars.cc @@ -31,6 +31,7 @@ #define _GLIBCXX_USE_CXX11_ABI 1 #include +#include #include #include #include @@ -396,6 +397,369 @@ namespace } #endif +#if _GLIBCXX_FLOAT_IS_IEEE_BINARY32 && _GLIBCXX_DOUBLE_IS_IEEE_BINARY64 + // If the given ASCII character represents a hexit, return that hexit. + // Otherwise return -1. + int + ascii_to_hexit(char ch) + { + if (ch >= '0' && ch <= '9') + return ch - '0'; + if (ch >= 'a' && ch <= 'f') + return ch - 'a' + 10; + if (ch >= 'A' && ch <= 'F') + return ch - 'A' + 10; + return -1; + } + + // Return true iff [FIRST,LAST) begins with PREFIX, ignoring case. + bool + starts_with_ci(const char* first, const char* last, string_view prefix) + { + __glibcxx_requires_valid_range(first, last); + + for (char ch : prefix) + { + __glibcxx_assert(ch >= 'a' && ch <= 'z'); + if (first == last || (*first != ch && *first != ch - 32)) + return false; + ++first; + } + + return true; + } + + // An implementation of hexadecimal float parsing for binary32/64. + template + from_chars_result + __floating_from_chars_hex(const char* first, const char* last, T& value) + { + static_assert(is_same_v || is_same_v); + + using uint_t = conditional_t, uint32_t, uint64_t>; + constexpr int mantissa_bits = is_same_v ? 23 : 52; + constexpr int exponent_bits = is_same_v ? 8 : 11; + constexpr int exponent_bias = (1 << (exponent_bits - 1)) - 1; + + __glibcxx_requires_valid_range(first, last); + if (first == last) + return {first, errc::invalid_argument}; + + // Consume the sign bit. + const char* const orig_first = first; + bool sign_bit = false; + if (*first == '-') + { + sign_bit = true; + ++first; + } + + // Handle "inf", "infinity", "NaN" and variants thereof. + if (first != last) + if (*first == 'i' || *first == 'I' || *first == 'n' || *first == 'N') [[unlikely]] + { + if (starts_with_ci(first, last, "inf"sv)) + { + first += strlen("inf"); + if (starts_with_ci(first, last, "inity"sv)) + first += strlen("inity"); + + uint_t result = 0; + result |= sign_bit; + result <<= exponent_bits; + result |= (1ull << exponent_bits) - 1; + result <<= mantissa_bits; + memcpy(&value, &result, sizeof(result)); + + return {first, errc{}}; + } + else if (starts_with_ci(first, last, "nan")) + { + first += strlen("nan"); + + if (first != last && *first == '(') + { + // Tentatively consume the '(' as we look for an optional + // n-char-sequence followed by a ')'. + const char* const fallback_first = first; + for (;;) + { + ++first; + if (first == last) + { + first = fallback_first; + break; + } + + char ch = *first; + if (ch == ')') + { + ++first; + break; + } + else if ((ch >= '0' && ch <= '9') + || (ch >= 'a' && ch <= 'z') + || (ch >= 'A' && ch <= 'Z') + || ch == '_') + continue; + else + { + first = fallback_first; + break; + } + } + } + + // We make the implementation-defined decision of ignoring the + // sign bit and the n-char-sequence when assembling the NaN. + uint_t result = 0; + result <<= exponent_bits; + result |= (1ull << exponent_bits) - 1; + result <<= mantissa_bits; + result |= (1ull << (mantissa_bits - 1)) | 1; + memcpy(&value, &result, sizeof(result)); + + return {first, errc{}}; + } + } + + // Consume all insignificant leading zeros in the whole part of the + // mantissa. + bool seen_hexit = false; + while (first != last && *first == '0') + { + seen_hexit = true; + ++first; + } + + // Now consume the rest of the written mantissa, populating MANTISSA with + // the first MANTISSA_BITS+k significant bits of the written mantissa, where + // 1 <= k <= 4 is the bit width of the leading significant written hexit. + // + // Examples: + // After parsing "1.2f3", MANTISSA is 0x12f30000000000 (bit_width=52+1). + // After parsing ".0000f0e", MANTISSA is 0xf0e00000000000 (bit_width=52+4). + // After parsing ".1234567890abcd8", MANTISSA is 0x1234567890abcd (bit_width=52+1) + // and MIDPOINT_BIT is true (and NONZERO_TAIL is false). + uint_t mantissa = 0; + int mantissa_idx = mantissa_bits; // The current bit index into MANTISSA + // into which we'll write the next hexit. + int exponent_adjustment = 0; // How much we'd have to adjust the written + // exponent in order to represent the mantissa + // in scientific form h.hhhhhhhhhhhhh. + bool midpoint_bit = false; // Whether the MANTISSA_BITS+k+1 significant + // bit is set in the written mantissa. + bool nonzero_tail = false; // Whether some bit thereafter is set in the + // written mantissa. + bool seen_decimal_point = false; + for (; first != last; ++first) + { + char ch = *first; + if (ch == '.' && !seen_decimal_point) + { + seen_decimal_point = true; + continue; + } + + int hexit = ascii_to_hexit(ch); + if (hexit == -1) + break; + seen_hexit = true; + + if (!seen_decimal_point && mantissa != 0) + exponent_adjustment += 4; + else if (seen_decimal_point && mantissa == 0) + { + exponent_adjustment -= 4; + if (hexit == 0x0) + continue; + } + + if (mantissa_idx >= 0) + mantissa |= uint_t(hexit) << mantissa_idx; + else if (mantissa_idx >= -4) + { + if constexpr (is_same_v) + { + __glibcxx_assert(mantissa_idx == -1); + mantissa |= hexit >> 1; + midpoint_bit = (hexit & 0b0001) != 0; + } + else + { + __glibcxx_assert(mantissa_idx == -4); + midpoint_bit = (hexit & 0b1000) != 0; + nonzero_tail = (hexit & 0b0111) != 0; + } + } + else + nonzero_tail |= (hexit != 0x0); + + mantissa_idx -= 4; + } + if (mantissa != 0) + __glibcxx_assert(__bit_width(mantissa) >= mantissa_bits + 1 + && __bit_width(mantissa) <= mantissa_bits + 4); + else + __glibcxx_assert(!midpoint_bit && !nonzero_tail); + + if (!seen_hexit) + // If we haven't seen any hexit at this point, the parse failed. + return {orig_first, errc::invalid_argument}; + + // Parse the written exponent. + int written_exponent = 0; + if (first != last && *first == 'p') + { + // Tentatively consume the 'p' and try to parse a decimal number. + const char* const fallback_first = first; + ++first; + if (first != last && *first == '+') + ++first; + from_chars_result fcr = from_chars(first, last, written_exponent, 10); + if (fcr.ptr == first) + // The parse failed, so undo consuming the 'p' and carry on as if the + // exponent was omitted (i.e. is 0). + first = fallback_first; + else + { + first = fcr.ptr; + if (mantissa != 0 && fcr.ec == errc::result_out_of_range) + // Punt on very large exponents for now. FIXME + return {first, errc::result_out_of_range}; + } + } + int biased_exponent = written_exponent + exponent_bias; + if (exponent_adjustment != 0) + // The mantissa wasn't written in scientific form. Adjust the exponent + // so that we may assume scientific form. + // + // Examples; + // For input "a.bcp5", EXPONENT_ADJUSTMENT would be 0 since this + // written mantissa is already in scientific form. + // For input "ab.cp5", EXPONENT_ADJUSTMENT would be 4 since the + // scientific form is "a.bcp9". + // For input 0.0abcp5", EXPONENT_ADJUSTMENT would be -8 since the + // scientific form is "a.bcp-3". + biased_exponent += exponent_adjustment; + + // Shifts the mantissa to the right by AMOUNT while updating + // BIASED_EXPONENT, MIDPOINT_BIT and NONZERO_TAIL accordingly. + auto shift_mantissa = [&] (int amount) { + __glibcxx_assert(amount >= 0); + if (amount > mantissa_bits + 1) + { + // Shifting the mantissa by an amount greater than its precision. + nonzero_tail |= midpoint_bit; + nonzero_tail |= mantissa != 0; + midpoint_bit = false; + mantissa = 0; + biased_exponent += amount; + } + else if (amount != 0) + { + nonzero_tail |= midpoint_bit; + nonzero_tail |= (mantissa & ((1ull << (amount - 1)) - 1)) != 0; + midpoint_bit = (mantissa & (1ull << (amount - 1))) != 0; + mantissa >>= amount; + biased_exponent += amount; + } + }; + + if (mantissa != 0) + { + // If the leading hexit is not '1', shift MANTISSA to make it so. + // This normalizes input like "4.08p0" into "1.02p2". + const int leading_hexit = mantissa >> mantissa_bits; + const int leading_hexit_width = __bit_width(leading_hexit); // FIXME: optimize? + __glibcxx_assert(leading_hexit_width >= 1 && leading_hexit_width <= 4); + shift_mantissa(leading_hexit_width - 1); + // After this adjustment, we can assume the leading hexit is '1'. + __glibcxx_assert((mantissa >> mantissa_bits) == 0x1); + } + + if (biased_exponent <= 0) + { + // This number is too small to be represented as a normal number, so + // try for a subnormal number by shifting the mantissa sufficiently. + // We need to shift by 1 more than -BIASED_EXPONENT because the leading + // mantissa bit is omitted in the representation of a normal number but + // not in a subnormal number. + shift_mantissa(-biased_exponent + 1); + __glibcxx_assert(!(mantissa & (1ull << mantissa_bits))); + __glibcxx_assert(biased_exponent == 1); + biased_exponent = 0; + } + + // Perform round-to-nearest, tie-to-even rounding according to + // MIDPOINT_BIT and NONZERO_TAIL. + if (midpoint_bit && (nonzero_tail || (mantissa % 2) != 0)) + { + // Rounding away from zero. + ++mantissa; + midpoint_bit = false; + nonzero_tail = false; + + // Deal with a couple of corner cases after rounding. + if (mantissa == (1ull << mantissa_bits)) + { + // We rounded the subnormal number 1.fffffffffffff...p-1023 + // up to the normal number 1p-1022. + __glibcxx_assert(biased_exponent == 0); + ++biased_exponent; + } + else if (mantissa & (1ull << (mantissa_bits + 1))) + { + // We rounded the normal number 1.fffffffffffff8pN (with maximal + // mantissa) up to to 1p(N+1). + mantissa >>= 1; + ++biased_exponent; + } + } + else + { + // Rounding toward zero. + + if (mantissa == 0 && (midpoint_bit || nonzero_tail)) + { + // A nonzero number that rounds to zero is unrepresentable. + __glibcxx_assert(biased_exponent == 0); + return {first, errc::result_out_of_range}; + } + + midpoint_bit = false; + nonzero_tail = false; + } + + if (mantissa != 0 && biased_exponent >= (1 << exponent_bits) - 1) + // The exponent of this number is too large to be representable. + return {first, errc::result_out_of_range}; + + uint_t result = 0; + if (mantissa == 0) + { + // Assemble a (possibly signed) zero. + if (sign_bit) + result |= 1ull << (exponent_bits + mantissa_bits); + } + else + { + // Assemble a nonzero normal or subnormal value. + result |= sign_bit; + result <<= exponent_bits; + result |= biased_exponent; + result <<= mantissa_bits; + result |= mantissa & ((1ull << mantissa_bits) - 1); + // The implicit leading mantissa bit is set iff the number is normal. + __glibcxx_assert(((mantissa & (1ull << mantissa_bits)) != 0) + == (biased_exponent != 0)); + } + memcpy(&value, &result, sizeof(result)); + + return {first, errc{}}; + } +#endif + } // namespace // FIXME: This should be reimplemented so it doesn't use strtod and newlocale. @@ -406,6 +770,11 @@ from_chars_result from_chars(const char* first, const char* last, float& value, chars_format fmt) noexcept { +#if _GLIBCXX_FLOAT_IS_IEEE_BINARY32 && _GLIBCXX_DOUBLE_IS_IEEE_BINARY64 + if (fmt == chars_format::hex) + return __floating_from_chars_hex(first, last, value); +#endif + errc ec = errc::invalid_argument; #if _GLIBCXX_USE_CXX11_ABI buffer_resource mr; @@ -432,6 +801,11 @@ from_chars_result from_chars(const char* first, const char* last, double& value, chars_format fmt) noexcept { +#if _GLIBCXX_FLOAT_IS_IEEE_BINARY32 && _GLIBCXX_DOUBLE_IS_IEEE_BINARY64 + if (fmt == chars_format::hex) + return __floating_from_chars_hex(first, last, value); +#endif + errc ec = errc::invalid_argument; #if _GLIBCXX_USE_CXX11_ABI buffer_resource mr; diff --git a/libstdc++-v3/testsuite/20_util/from_chars/7.cc b/libstdc++-v3/testsuite/20_util/from_chars/7.cc new file mode 100644 index 00000000000..2a78c7441e2 --- /dev/null +++ b/libstdc++-v3/testsuite/20_util/from_chars/7.cc @@ -0,0 +1,152 @@ +// Testcases for binary64 hexfloat std::from_chars. +// { dg-do run { target c++17 } } +// { dg-require-effective-target ieee_floats } + +#include + +#include +#include +#include +#include +#include +#include + +struct testcase { + const char* input; + size_t correct_idx; + std::errc correct_ec; + double correct_value; +}; + +constexpr testcase testcases[] = { + { "1.fffffffffffff8p0", 18, {}, 0x1.fffffffffffff8p0 }, + { "0.fffffffffffff8p-1022", 22, std::errc{}, 0x0.fffffffffffffep-1022 }, + { "inf", 3, {}, std::numeric_limits::infinity() }, + { "inff", 3, {}, std::numeric_limits::infinity() }, + { "-inf", 4, {}, -std::numeric_limits::infinity() }, + { "-inff", 4, {}, -std::numeric_limits::infinity() }, + { "NAN", 3, {}, std::numeric_limits::quiet_NaN() }, + { "-NAN", 4, {}, std::numeric_limits::quiet_NaN() }, + { "NAN()", 5, {}, std::numeric_limits::quiet_NaN() }, + { "-NAN()", 6, {}, std::numeric_limits::quiet_NaN() }, + { "-NAN(test)", 10, {}, std::numeric_limits::quiet_NaN() }, + { "-NAN(test", 4, {}, std::numeric_limits::quiet_NaN() }, + { "-NAN(", 4, {}, std::numeric_limits::quiet_NaN() }, + { "0.000000000000001p-100000000000000000", 37, std::errc::result_out_of_range, 0 }, + { "-lol", 0, std::errc::invalid_argument, 1 }, + { " 0", 0, std::errc::invalid_argument, 1 }, + { "", 0, std::errc::invalid_argument, 0 }, + { "1", 1, {}, 1 }, + { "2", 1, {}, 2 }, + { "3", 1, {}, 3 }, + { "4", 1, {}, 4 }, + { "5", 1, {}, 5 }, + { "6", 1, {}, 6 }, + { "7", 1, {}, 7 }, + { "8", 1, {}, 8 }, + { "9", 1, {}, 9 }, + { "a", 1, {}, 0xa }, + { "b", 1, {}, 0xb }, + { "c", 1, {}, 0xc }, + { "d", 1, {}, 0xd }, + { "e", 1, {}, 0xe }, + { "f", 1, {}, 0xf }, + { "0.000000000000000000000000000000000000000000001p-1022", 53, + std::errc::result_out_of_range, 0 }, + { "0.00000000000000p-1022", 22, {}, 0 }, + { "0.00000000000009", 16, {}, 0x0.00000000000009p0 }, + { "0.0", 3, {}, 0 }, + { "1p10000000000000000000000", 25, std::errc::result_out_of_range, 0 }, + { "-0.0", 4, {}, -0.0 }, + { "0.00000000000000", 16, {}, 0 }, + { "0.0000000000000p-1022", 21, {}, 0 }, + { ".", 0, std::errc::invalid_argument, 0 }, + { "-.", 0, std::errc::invalid_argument, 0 }, + { "0", 1, {}, 0 }, + { "00", 2, {}, 0 }, + { "00.", 3, {}, 0 }, + { "0.", 2, {}, 0 }, + { "1.ffffFFFFFFFFFF", 16, {}, 0x2 }, + { "1.ffffffffffffff", 16, {}, 0x2 }, + { "1.00000000000029", 16, {}, 0x1.0000000000003p0 }, + { "0.00000000000008p-1022", 22, std::errc::result_out_of_range, 0 }, + { "1.fffffffffffffp-1023", 21, {}, 0x1p-1022 }, + { "1.fffffffffffff8p+1023", 22, std::errc::result_out_of_range, 0 }, + { "0.ffffffffffffe8p-1022", 22, {}, 0x0.ffffffffffffep-1022 }, + { "2.11111111111111", 16, {}, 0x1.0888888888889p+1 }, + { "1.1111111111111", 15, {}, 0x1.1111111111111p0 }, + { "1.11111111111111", 16, {}, 0x1.1111111111111p0 }, + { "1.11111111111118", 16, {}, 0x1.1111111111112p0 }, + { "1.11111111111128", 16, {}, 0x1.1111111111112p0 }, + { "1.1111111111112801", 18, {}, 0x1.1111111111113p0 }, + { "1.08888888888888", 16, {}, 0x1.0888888888888p0 }, + { "1.088888888888888", 17, {}, 0x1.0888888888889p0 }, + { "2.00000000000029", 16, {}, 0x2.0000000000002p0 }, + { "0.ffffffffffffep-1022", 21, {}, 0x0.ffffffffffffep-1022 }, + { "3.ffffffffffffep-1024", 21, {}, 0x1p-1022 }, + { "1.00000000000008p+0", 19, {}, 1 }, + { "1p-1023", 7, {}, 0x0.8p-1022 }, + { "1p-1022", 7, {}, 0x1p-1022 }, + { "1.1p-1033", 9, {}, 0x1.1p-1033 }, // 0.0022p-1022 + { "22p-1038", 8, {}, 0x1.1p-1033 }, + { "5", 1, {}, 0x5 }, + { "a", 1, {}, 0xa }, + { "1", 1, {}, 1.0 }, + { "1p1", 3, {}, 0x1p1 }, + { "1p-1", 4, {}, 0x1p-1 }, + { "0", 1, {}, 0.0 }, + { "A", 1, {}, 0xA }, + { "-1", 2, {}, -1.0 }, + { "-0", 2, {}, -0.0 }, + { "42", 2, {}, 0x42p0 }, + { "-42", 3, {}, -0x42p0 }, + { ".1", 2, {}, 0x0.1p0 }, + { "22p-1000", 8, {}, 0x22p-1000 }, + { ".0000008", 8, {}, 0x.0000008p0 }, + { ".0000008p-1022", 14, {}, 0x.0000008p-1022 }, + { "1p-1074", 7, {}, 0x.0000000000001p-1022 }, + { "9999999999999", 13, {}, 0x9999999999999p0 }, + { "1.000000000000a000", 18, {}, 0x1.000000000000ap0 }, + { "1.000000000000a001", 18, {}, 0x1.000000000000ap0 }, + { "1.000000000000a800", 18, {}, 0x1.000000000000ap0 }, + { "1.000000000000a801", 18, {}, 0x1.000000000000bp0 }, + { "1.000000000000b800", 18, {}, 0x1.000000000000cp0 }, + { "000000", 6, {}, 0x0 }, + { "1p", 1, {}, 0x1 }, + { "0p99999999999999999999", 22, {}, 0 }, + { "1p99999999999999999999", 22, std::errc::result_out_of_range, 0 }, + { "0p-99999999999999999999", 23, {}, 0 }, + { "1p-99999999999999999999", 23, std::errc::result_out_of_range, 0 }, + { "99999999999999999999999", 23, {}, 0x99999999999999999999999p0 }, + { "-1.fffffffffffffp-1023", 22, {}, -0x1p-1022 }, + { "1.337", 5, {}, 0x1.337p0 }, +}; + +void +test01() +{ + for (auto [input,correct_idx,correct_ec,correct_value] : testcases) + { + double value; + auto [ptr,ec] = std::from_chars(input, input+strlen(input), + value, std::chars_format::hex); + VERIFY( ptr == input + correct_idx ); + VERIFY( ec == correct_ec ); + if (ec == std::errc{}) + { + if (std::isnan(value) || std::isnan(correct_value)) + VERIFY( std::isnan(value) && std::isnan(correct_value) ); + else + { + VERIFY( value == correct_value ); + VERIFY( !memcmp(&value, &correct_value, sizeof(double)) ); + } + } + } +} + +int +main() +{ + test01(); +}