From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1888) id 19C703870C27; Thu, 21 Apr 2022 16:12:22 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 19C703870C27 MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset="utf-8" From: Patrick Palka To: gcc-cvs@gcc.gnu.org, libstdc++-cvs@gcc.gnu.org Subject: [gcc r12-8218] libstdc++: Avoid ASCII assumptions in floating_from_chars.cc X-Act-Checkin: gcc X-Git-Author: Patrick Palka X-Git-Refname: refs/heads/master X-Git-Oldrev: 605a80bb733b225e5e53db0f0298374213cdcd95 X-Git-Newrev: 93dd7f36f2066ec52137178ee52052f293e5e743 Message-Id: <20220421161222.19C703870C27@sourceware.org> Date: Thu, 21 Apr 2022 16:12:22 +0000 (GMT) X-BeenThere: libstdc++-cvs@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Libstdc++-cvs mailing list List-Unsubscribe: , List-Archive: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 21 Apr 2022 16:12:22 -0000 https://gcc.gnu.org/g:93dd7f36f2066ec52137178ee52052f293e5e743 commit r12-8218-g93dd7f36f2066ec52137178ee52052f293e5e743 Author: Patrick Palka Date: Thu Apr 21 12:11:01 2022 -0400 libstdc++: Avoid ASCII assumptions in floating_from_chars.cc In starts_with_ci and in __floating_from_chars_hex's inf/nan handling, we were assuming that the letters are contiguous and that 'A' + 32 == 'a' which is true for ASCII but not for other character encodings. This patch fixes starts_with_ci by using a constexpr lookup table that maps uppercase letters to lowercase, and fixes __floating_from_chars_hex by using __from_chars_alnum_to_val. libstdc++-v3/ChangeLog: * include/std/charconv (__from_chars_alnum_to_val_table): Simplify initialization of __lower/__upper_letters. (__from_chars_alnum_to_val): Default the template parameter to false. * src/c++17/floating_from_chars.cc (starts_with_ci): Don't assume the uppercase and lowercase letters are contiguous. (__floating_from_chars_hex): Likewise. Diff: --- libstdc++-v3/include/std/charconv | 12 +++------- libstdc++-v3/src/c++17/floating_from_chars.cc | 33 ++++++++++++++++++++------- 2 files changed, 28 insertions(+), 17 deletions(-) diff --git a/libstdc++-v3/include/std/charconv b/libstdc++-v3/include/std/charconv index 561234cb2fc..218813e4797 100644 --- a/libstdc++-v3/include/std/charconv +++ b/libstdc++-v3/include/std/charconv @@ -412,14 +412,8 @@ namespace __detail constexpr auto __from_chars_alnum_to_val_table() { - constexpr unsigned char __lower_letters[] - = { 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', - 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', - 'u', 'v', 'w', 'x', 'y', 'z' }; - constexpr unsigned char __upper_letters[] - = { 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', - 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', - 'U', 'V', 'W', 'X', 'Y', 'Z' }; + constexpr unsigned char __lower_letters[27] = "abcdefghijklmnopqrstuvwxyz"; + constexpr unsigned char __upper_letters[27] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; struct { unsigned char __data[1u << __CHAR_BIT__] = {}; } __table; for (auto& __entry : __table.__data) __entry = 127; @@ -437,7 +431,7 @@ namespace __detail // return its corresponding base-10 value, otherwise return a value >= 127. // If _DecOnly is false: if the character is an alphanumeric digit, then // return its corresponding base-36 value, otherwise return a value >= 127. - template + template unsigned char __from_chars_alnum_to_val(unsigned char __c) { diff --git a/libstdc++-v3/src/c++17/floating_from_chars.cc b/libstdc++-v3/src/c++17/floating_from_chars.cc index 0f5183aa9b5..13de1e346ab 100644 --- a/libstdc++-v3/src/c++17/floating_from_chars.cc +++ b/libstdc++-v3/src/c++17/floating_from_chars.cc @@ -30,6 +30,7 @@ // Prefer to use std::pmr::string if possible, which requires the cxx11 ABI. #define _GLIBCXX_USE_CXX11_ABI 1 +#include #include #include #include @@ -451,15 +452,33 @@ namespace #if _GLIBCXX_FLOAT_IS_IEEE_BINARY32 && _GLIBCXX_DOUBLE_IS_IEEE_BINARY64 // Return true iff [FIRST,LAST) begins with PREFIX, ignoring case. + // PREFIX is assumed to not contain any uppercase letters. bool starts_with_ci(const char* first, const char* last, string_view prefix) { __glibcxx_requires_valid_range(first, last); - for (char ch : prefix) + // A lookup table that maps uppercase letters to lowercase and + // is otherwise the identity mapping. + static constexpr auto upper_to_lower_table = [] { + constexpr unsigned char lower_letters[27] = "abcdefghijklmnopqrstuvwxyz"; + constexpr unsigned char upper_letters[27] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + std::array table = {}; + for (unsigned i = 0; i < table.size(); ++i) + table[i] = i; + for (unsigned i = 0; i < 26; ++i) + table[upper_letters[i]] = lower_letters[i]; + return table; + }(); + + if (last - first < static_cast(prefix.length())) + return false; + + for (const unsigned char pch : prefix) { - __glibcxx_assert(ch >= 'a' && ch <= 'z'); - if (first == last || (*first != ch && *first != ch - 32)) + // __glibcxx_assert(pch == upper_to_lower_table[pch]); + const unsigned char ch = *first; + if (ch != pch && upper_to_lower_table[ch] != pch) return false; ++first; } @@ -535,10 +554,8 @@ namespace ++first; break; } - else if ((ch >= '0' && ch <= '9') - || (ch >= 'a' && ch <= 'z') - || (ch >= 'A' && ch <= 'Z') - || ch == '_') + else if (ch == '_' + || __detail::__from_chars_alnum_to_val(ch) < 127) continue; else { @@ -599,7 +616,7 @@ namespace continue; } - int hexit = __detail::__from_chars_alnum_to_val(ch); + int hexit = __detail::__from_chars_alnum_to_val(ch); if (hexit >= 16) break; seen_hexit = true;