public inbox for libstdc++-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r14-8648] libstdc++: Add "ASCII" as an alias for std::text_encoding::id::ASCII
@ 2024-01-31  9:50 Jonathan Wakely
  0 siblings, 0 replies; only message in thread
From: Jonathan Wakely @ 2024-01-31  9:50 UTC (permalink / raw)
  To: gcc-cvs, libstdc++-cvs

https://gcc.gnu.org/g:358fd42aabec56e471ed3c8e6f3dccbc305ff6f7

commit r14-8648-g358fd42aabec56e471ed3c8e6f3dccbc305ff6f7
Author: Jonathan Wakely <jwakely@redhat.com>
Date:   Tue Jan 23 14:57:15 2024 +0000

    libstdc++: Add "ASCII" as an alias for std::text_encoding::id::ASCII
    
    As noted in LWG 4043, "ASCII" is not an alias for any known registered
    character encoding, so std::text_encoding("ASCII").mib() == id::other.
    Add the alias "ASCII" to the implementation-defined superset of aliases
    for that encoding.
    
    libstdc++-v3/ChangeLog:
    
            * include/bits/text_encoding-data.h: Regenerate.
            * scripts/gen_text_encoding_data.py: Add extra_aliases dict
            containing "ASCII".
            * testsuite/std/text_encoding/cons.cc: Check "ascii" is known.
    
    Co-authored-by: Ewan Higgs <ewan.higgs@gmail.com>
    Signed-off-by: Ewan Higgs <ewan.higgs@gmail.com>

Diff:
---
 libstdc++-v3/include/bits/text_encoding-data.h   |  3 ++-
 libstdc++-v3/scripts/gen_text_encoding_data.py   | 24 +++++++++++++++++++++++-
 libstdc++-v3/testsuite/std/text_encoding/cons.cc |  5 +++++
 3 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/libstdc++-v3/include/bits/text_encoding-data.h b/libstdc++-v3/include/bits/text_encoding-data.h
index 7ac2e9dc3d92..5041e738d214 100644
--- a/libstdc++-v3/include/bits/text_encoding-data.h
+++ b/libstdc++-v3/include/bits/text_encoding-data.h
@@ -14,6 +14,7 @@
   {    3, "IBM367" },
   {    3, "cp367" },
   {    3, "csASCII" },
+  {    3, "ASCII" }, // libstdc++ extension
   {    4, "ISO_8859-1:1987" },
   {    4, "iso-ir-100" },
   {    4, "ISO_8859-1" },
@@ -417,7 +418,7 @@
   {  104, "csISO2022CN" },
   {  105, "ISO-2022-CN-EXT" },
   {  105, "csISO2022CNEXT" },
-#define _GLIBCXX_TEXT_ENCODING_UTF8_OFFSET 413
+#define _GLIBCXX_TEXT_ENCODING_UTF8_OFFSET 414
   {  106, "UTF-8" },
   {  106, "csUTF8" },
   {  109, "ISO-8859-13" },
diff --git a/libstdc++-v3/scripts/gen_text_encoding_data.py b/libstdc++-v3/scripts/gen_text_encoding_data.py
index 2d6f3e4077a4..f0ebb42d8c20 100755
--- a/libstdc++-v3/scripts/gen_text_encoding_data.py
+++ b/libstdc++-v3/scripts/gen_text_encoding_data.py
@@ -36,6 +36,18 @@ print("#ifndef _GLIBCXX_GET_ENCODING_DATA")
 print('# error "This is not a public header, do not include it directly"')
 print("#endif\n")
 
+# We need to generate a list of initializers of the form { mib, alias }, e.g.,
+# { 3, "US-ASCII" },
+# { 3, "ISO646-US" },
+# { 3, "csASCII" },
+# { 4, "ISO_8859-1:1987" },
+# { 4, "latin1" },
+# The initializers must be sorted by the mib value. The first entry for
+# a given mib must be the primary name for the encoding. Any aliases for
+# the encoding come after the primary name.
+# We also define a macro _GLIBCXX_TEXT_ENCODING_UTF8_OFFSET which is the
+# offset into the list of the mib=106, alias="UTF-8" entry. This is used
+# to optimize the common case, so we don't need to search for "UTF-8".
 
 charsets = {}
 with open(sys.argv[1], newline='') as f:
@@ -52,10 +64,15 @@ with open(sys.argv[1], newline='') as f:
             aliases.remove(name)
         charsets[mib] = [name] + aliases
 
-# Remove "NATS-DANO" and "NATS-DANO-ADD"
+# Remove "NATS-DANO" and "NATS-DANO-ADD" as specified by the C++ standard.
 charsets.pop(33, None)
 charsets.pop(34, None)
 
+# This is not an official IANA alias, but we include it in the
+# implementation-defined superset of aliases for US-ASCII.
+# See also LWG 4043.
+extra_aliases = {3: ["ASCII"]}
+
 count = 0
 for mib in sorted(charsets.keys()):
     names = charsets[mib]
@@ -64,6 +81,11 @@ for mib in sorted(charsets.keys()):
     for name in names:
         print('  {{ {:4}, "{}" }},'.format(mib, name))
     count += len(names)
+    if mib in extra_aliases:
+        names = extra_aliases[mib]
+        for name in names:
+            print('  {{ {:4}, "{}" }}, // libstdc++ extension'.format(mib, name))
+        count += len(names)
 
 # <text_encoding> gives an error if this macro is left defined.
 # Do this last, so that the generated output is not usable unless we reach here.
diff --git a/libstdc++-v3/testsuite/std/text_encoding/cons.cc b/libstdc++-v3/testsuite/std/text_encoding/cons.cc
index b9d93641de44..8fcc2ec8c3b9 100644
--- a/libstdc++-v3/testsuite/std/text_encoding/cons.cc
+++ b/libstdc++-v3/testsuite/std/text_encoding/cons.cc
@@ -53,6 +53,11 @@ test_construct_by_name()
   VERIFY( e4.name() == s );
   VERIFY( ! e4.aliases().empty() );
   VERIFY( e4.aliases().front() == "US-ASCII"sv ); // primary name
+
+  s = "ascii";
+  std::text_encoding e5(s);
+  VERIFY( e5.mib() == std::text_encoding::ASCII );
+  VERIFY( e5.name() == s );
 }
 
 constexpr void

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2024-01-31  9:50 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-01-31  9:50 [gcc r14-8648] libstdc++: Add "ASCII" as an alias for std::text_encoding::id::ASCII Jonathan Wakely

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).