public inbox for libstdc++-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r12-8218] libstdc++: Avoid ASCII assumptions in floating_from_chars.cc
@ 2022-04-21 16:12 Patrick Palka
  0 siblings, 0 replies; only message in thread
From: Patrick Palka @ 2022-04-21 16:12 UTC (permalink / raw)
  To: gcc-cvs, libstdc++-cvs

https://gcc.gnu.org/g:93dd7f36f2066ec52137178ee52052f293e5e743

commit r12-8218-g93dd7f36f2066ec52137178ee52052f293e5e743
Author: Patrick Palka <ppalka@redhat.com>
Date:   Thu Apr 21 12:11:01 2022 -0400

    libstdc++: Avoid ASCII assumptions in floating_from_chars.cc
    
    In starts_with_ci and in __floating_from_chars_hex's inf/nan handling,
    we were assuming that the letters are contiguous and that 'A' + 32 == 'a'
    which is true for ASCII but not for other character encodings.
    
    This patch fixes starts_with_ci by using a constexpr lookup table that
    maps uppercase letters to lowercase, and fixes __floating_from_chars_hex
    by using __from_chars_alnum_to_val.
    
    libstdc++-v3/ChangeLog:
    
            * include/std/charconv (__from_chars_alnum_to_val_table):
            Simplify initialization of __lower/__upper_letters.
            (__from_chars_alnum_to_val): Default the template parameter to
            false.
            * src/c++17/floating_from_chars.cc (starts_with_ci): Don't
            assume the uppercase and lowercase letters are contiguous.
            (__floating_from_chars_hex): Likewise.

Diff:
---
 libstdc++-v3/include/std/charconv             | 12 +++-------
 libstdc++-v3/src/c++17/floating_from_chars.cc | 33 ++++++++++++++++++++-------
 2 files changed, 28 insertions(+), 17 deletions(-)

diff --git a/libstdc++-v3/include/std/charconv b/libstdc++-v3/include/std/charconv
index 561234cb2fc..218813e4797 100644
--- a/libstdc++-v3/include/std/charconv
+++ b/libstdc++-v3/include/std/charconv
@@ -412,14 +412,8 @@ namespace __detail
   constexpr auto
   __from_chars_alnum_to_val_table()
   {
-    constexpr unsigned char __lower_letters[]
-      = { 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j',
-	  'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't',
-	  'u', 'v', 'w', 'x', 'y', 'z' };
-    constexpr unsigned char __upper_letters[]
-      = { 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
-	  'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',
-	  'U', 'V', 'W', 'X', 'Y', 'Z' };
+    constexpr unsigned char __lower_letters[27] = "abcdefghijklmnopqrstuvwxyz";
+    constexpr unsigned char __upper_letters[27] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
     struct { unsigned char __data[1u << __CHAR_BIT__] = {}; } __table;
     for (auto& __entry : __table.__data)
       __entry = 127;
@@ -437,7 +431,7 @@ namespace __detail
   // return its corresponding base-10 value, otherwise return a value >= 127.
   // If _DecOnly is false: if the character is an alphanumeric digit, then
   // return its corresponding base-36 value, otherwise return a value >= 127.
-  template<bool _DecOnly>
+  template<bool _DecOnly = false>
     unsigned char
     __from_chars_alnum_to_val(unsigned char __c)
     {
diff --git a/libstdc++-v3/src/c++17/floating_from_chars.cc b/libstdc++-v3/src/c++17/floating_from_chars.cc
index 0f5183aa9b5..13de1e346ab 100644
--- a/libstdc++-v3/src/c++17/floating_from_chars.cc
+++ b/libstdc++-v3/src/c++17/floating_from_chars.cc
@@ -30,6 +30,7 @@
 // Prefer to use std::pmr::string if possible, which requires the cxx11 ABI.
 #define _GLIBCXX_USE_CXX11_ABI 1
 
+#include <array>
 #include <charconv>
 #include <bit>
 #include <string>
@@ -451,15 +452,33 @@ namespace
 
 #if _GLIBCXX_FLOAT_IS_IEEE_BINARY32 && _GLIBCXX_DOUBLE_IS_IEEE_BINARY64
   // Return true iff [FIRST,LAST) begins with PREFIX, ignoring case.
+  // PREFIX is assumed to not contain any uppercase letters.
   bool
   starts_with_ci(const char* first, const char* last, string_view prefix)
   {
     __glibcxx_requires_valid_range(first, last);
 
-    for (char ch : prefix)
+    // A lookup table that maps uppercase letters to lowercase and
+    // is otherwise the identity mapping.
+    static constexpr auto upper_to_lower_table = [] {
+      constexpr unsigned char lower_letters[27] = "abcdefghijklmnopqrstuvwxyz";
+      constexpr unsigned char upper_letters[27] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+      std::array<unsigned char, (1u << __CHAR_BIT__)> table = {};
+      for (unsigned i = 0; i < table.size(); ++i)
+	table[i] = i;
+      for (unsigned i = 0; i < 26; ++i)
+	table[upper_letters[i]] = lower_letters[i];
+      return table;
+    }();
+
+    if (last - first < static_cast<ptrdiff_t>(prefix.length()))
+      return false;
+
+    for (const unsigned char pch : prefix)
       {
-	__glibcxx_assert(ch >= 'a' && ch <= 'z');
-	if (first == last || (*first != ch && *first != ch - 32))
+	// __glibcxx_assert(pch == upper_to_lower_table[pch]);
+	const unsigned char ch = *first;
+	if (ch != pch && upper_to_lower_table[ch] != pch)
 	  return false;
 	++first;
       }
@@ -535,10 +554,8 @@ namespace
 			  ++first;
 			  break;
 			}
-		      else if ((ch >= '0' && ch <= '9')
-			       || (ch >= 'a' && ch <= 'z')
-			       || (ch >= 'A' && ch <= 'Z')
-			       || ch == '_')
+		      else if (ch == '_'
+			       || __detail::__from_chars_alnum_to_val(ch) < 127)
 			continue;
 		      else
 			{
@@ -599,7 +616,7 @@ namespace
 	    continue;
 	  }
 
-	int hexit = __detail::__from_chars_alnum_to_val<false>(ch);
+	int hexit = __detail::__from_chars_alnum_to_val(ch);
 	if (hexit >= 16)
 	  break;
 	seen_hexit = true;


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2022-04-21 16:12 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-04-21 16:12 [gcc r12-8218] libstdc++: Avoid ASCII assumptions in floating_from_chars.cc Patrick Palka

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).