public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] libcpp: Update to Unicode 15
@ 2022-11-04  8:55 Jakub Jelinek
  2022-11-04 13:38 ` Marek Polacek
  0 siblings, 1 reply; 6+ messages in thread
From: Jakub Jelinek @ 2022-11-04  8:55 UTC (permalink / raw)
  To: Jason Merrill, Joseph S. Myers, Marek Polacek; +Cc: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 21727 bytes --]

Hi!

The following patch (included in the mail is for uname2c.h part
just a pseudo patch with a lot of changes replaced with ...
because it is too large but the important changes like
-static const char uname2c_dict[59418] =
+static const char uname2c_dict[59891] =
-static const unsigned char uname2c_tree[208765] = {
+static const unsigned char uname2c_tree[210697] = {
are shown, full patch xz compressed attached) regenerates the
libcpp tables with Unicode 15.0.0 which added 4489 new characters.

As mentioned previously, this isn't just a matter of running the
two libcpp/make*.cc programs on the new Unicode files, but one needs
to manually update a table inside of makeuname2c.cc according to
a table in Unicode text (which is partially reflected in the text
files, but e.g. in Unicode 14.0.0 not 100% accurately, in 15.0.0
actually accurately).
I've also added some randomly chosen subset of those 4489 new
characters to a testcase.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2022-11-04  Jakub Jelinek  <jakub@redhat.com>

gcc/testsuite/
	* c-c++-common/cpp/named-universal-char-escape-1.c: Add tests for some
	characters newly added in Unicode 15.0.0.
libcpp/
	* makeuname2c.cc (struct generated): Update from Unicode 15.0.0
	table 4-8.
	* ucnid.h: Regenerated for Unicode 15.0.0.
	* uname2c.h: Likewise.

--- gcc/testsuite/c-c++-common/cpp/named-universal-char-escape-1.c.jj	2022-08-27 23:01:28.319565957 +0200
+++ gcc/testsuite/c-c++-common/cpp/named-universal-char-escape-1.c	2022-11-04 09:41:45.908527440 +0100
@@ -117,6 +117,27 @@ typedef __CHAR32_TYPE__ char32_t;
     || U'\u0FD0' != U'\N{TIBETAN MARK BSKA- SHOG GI MGO RGYAN}' \
     || U'\uFE18' != U'\N{PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRAKCET}' \
     || U'\uFE18' != U'\N{PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRACKET}' \
+    || U'\u0CF3' != U'\N{KANNADA SIGN COMBINING ANUSVARA ABOVE RIGHT}' \
+    || U'\u0ECE' != U'\N{LAO YAMAKKAN}' \
+    || U'\U00010EFE' != U'\N{ARABIC SMALL LOW WORD QASR}' \
+    || U'\U00011241' != U'\N{KHOJKI VOWEL SIGN VOCALIC R}' \
+    || U'\U00011B06' != U'\N{DEVANAGARI SIGN WESTERN FIVE-LIKE BHALE}' \
+    || U'\U00011F0B' != U'\N{KAWI LETTER VOCALIC RR}' \
+    || U'\U0001342F' != U'\N{EGYPTIAN HIEROGLYPH V011D}' \
+    || U'\U00013451' != U'\N{EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT START AND BOTTOM}' \
+    || U'\U0001B132' != U'\N{HIRAGANA LETTER SMALL KO}' \
+    || U'\U0001B155' != U'\N{KATAKANA LETTER SMALL KO}' \
+    || U'\U0001D2C4' != U'\N{KAKTOVIK NUMERAL FOUR}' \
+    || U'\U0001DF27' != U'\N{LATIN SMALL LETTER N WITH MID-HEIGHT LEFT HOOK}' \
+    || U'\U0001E036' != U'\N{MODIFIER LETTER CYRILLIC SMALL ZHE}' \
+    || U'\U0001E05B' != U'\N{CYRILLIC SUBSCRIPT SMALL LETTER EL}' \
+    || U'\U0001E4E5' != U'\N{NAG MUNDARI LETTER ENN}' \
+    || U'\U0001F6DC' != U'\N{WIRELESS}' \
+    || U'\U0001F77E' != U'\N{QUAOAR}' \
+    || U'\U0001F7D9' != U'\N{NINE POINTED WHITE STAR}' \
+    || U'\U0001FA76' != U'\N{GREY HEART}' \
+    || U'\U0001FA88' != U'\N{FLUTE}' \
+    || U'\U0001FABC' != U'\N{JELLYFISH}' \
     || U'\uAC00' != U'\N{HANGUL SYLLABLE GA}' \
     || U'\uAC02' != U'\N{HANGUL SYLLABLE GAGG}' \
     || U'\uAD8D' != U'\N{HANGUL SYLLABLE GWEONJ}' \
@@ -134,6 +155,7 @@ typedef __CHAR32_TYPE__ char32_t;
     || U'\U0002A6DD' != U'\N{CJK UNIFIED IDEOGRAPH-2A6DD}' \
     || U'\U00020700' != U'\N{CJK UNIFIED IDEOGRAPH-20700}' \
     || U'\U0002B734' != U'\N{CJK UNIFIED IDEOGRAPH-2B734}' \
+    || U'\U0002B739' != U'\N{CJK UNIFIED IDEOGRAPH-2B739}' \
     || U'\U0002B740' != U'\N{CJK UNIFIED IDEOGRAPH-2B740}' \
     || U'\U0002B81D' != U'\N{CJK UNIFIED IDEOGRAPH-2B81D}' \
     || U'\U0002B820' != U'\N{CJK UNIFIED IDEOGRAPH-2B820}' \
@@ -142,6 +164,8 @@ typedef __CHAR32_TYPE__ char32_t;
     || U'\U0002EBE0' != U'\N{CJK UNIFIED IDEOGRAPH-2EBE0}' \
     || U'\U00030000' != U'\N{CJK UNIFIED IDEOGRAPH-30000}' \
     || U'\U0003134A' != U'\N{CJK UNIFIED IDEOGRAPH-3134A}' \
+    || U'\U00031350' != U'\N{CJK UNIFIED IDEOGRAPH-31350}' \
+    || U'\U000323AF' != U'\N{CJK UNIFIED IDEOGRAPH-323AF}' \
     || U'\U00017000' != U'\N{TANGUT IDEOGRAPH-17000}' \
     || U'\U000187F7' != U'\N{TANGUT IDEOGRAPH-187F7}' \
     || U'\U00018D00' != U'\N{TANGUT IDEOGRAPH-18D00}' \
--- libcpp/makeuname2c.cc.jj	2022-08-31 10:22:33.439166029 +0200
+++ libcpp/makeuname2c.cc	2022-11-03 10:38:03.341964913 +0100
@@ -69,7 +69,7 @@ struct entry { const char *name; unsigne
 static struct entry *entries;
 static unsigned long num_allocated, num_entries;
 
-/* Unicode 14 Table 4-8.  */
+/* Unicode 15 Table 4-8.  */
 struct generated {
   const char *prefix;
   /* max_high is a workaround for UnicodeData.txt inconsistencies
@@ -81,13 +81,14 @@ struct generated {
 static struct generated generated_ranges[] =
 { { "HANGUL SYLLABLE ", 0xac00, 0xd7a3, 0, 0, 0 }, /* NR1 rule */
   { "CJK UNIFIED IDEOGRAPH-", 0x3400, 0x4dbf, 0, 1, 0 }, /* NR2 rules */
-  { "CJK UNIFIED IDEOGRAPH-", 0x4e00, 0x9ffc, 0x9fff, 1, 0 },
-  { "CJK UNIFIED IDEOGRAPH-", 0x20000, 0x2a6dd, 0x2a6df, 1, 0 },
-  { "CJK UNIFIED IDEOGRAPH-", 0x2a700, 0x2b734, 0x2b738, 1, 0 },
+  { "CJK UNIFIED IDEOGRAPH-", 0x4e00, 0x9fff, 0, 1, 0 },
+  { "CJK UNIFIED IDEOGRAPH-", 0x20000, 0x2a6df, 0, 1, 0 },
+  { "CJK UNIFIED IDEOGRAPH-", 0x2a700, 0x2b739, 0, 1, 0 },
   { "CJK UNIFIED IDEOGRAPH-", 0x2b740, 0x2b81d, 0, 1, 0 },
   { "CJK UNIFIED IDEOGRAPH-", 0x2b820, 0x2cea1, 0, 1, 0 },
   { "CJK UNIFIED IDEOGRAPH-", 0x2ceb0, 0x2ebe0, 0, 1, 0 },
   { "CJK UNIFIED IDEOGRAPH-", 0x30000, 0x3134a, 0, 1, 0 },
+  { "CJK UNIFIED IDEOGRAPH-", 0x31350, 0x323af, 0, 1, 0 },
   { "TANGUT IDEOGRAPH-", 0x17000, 0x187f7, 0, 2, 0 },
   { "TANGUT IDEOGRAPH-", 0x18d00, 0x18d08, 0, 2, 0 },
   { "KHITAN SMALL SCRIPT CHARACTER-", 0x18b00, 0x18cd5, 0, 3, 0 },
--- libcpp/ucnid.h.jj	2022-06-29 11:43:36.163905476 +0200
+++ libcpp/ucnid.h	2022-11-03 10:39:44.997575818 +0100
@@ -766,6 +766,7 @@ static const struct ucnrange ucnranges[]
 { C99|N99|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0,   0, 0x0cef },
 {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x0cf0 },
 {   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x0cf2 },
+{   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0,   0, 0x0cf3 },
 {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x0cff },
 {   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0,   0, 0x0d01 },
 { C99|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0,   0, 0x0d03 },
@@ -889,6 +890,7 @@ static const struct ucnrange ucnranges[]
 {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x0ec7 },
 { C99|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0, 122, 0x0ecb },
 { C99|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0,   0, 0x0ecd },
+{   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0,   0, 0x0ece },
 {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x0ecf },
 { C99|N99|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0,   0, 0x0ed9 },
 {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x0edb },
@@ -2085,7 +2087,8 @@ static const struct ucnrange ucnranges[]
 {   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0, 230, 0x10eac },
 {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x10eaf },
 {   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x10eb1 },
-{   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x10eff },
+{   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x10efc },
+{   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0, 220, 0x10eff },
 {   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x10f1c },
 {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x10f26 },
 {   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x10f27 },
@@ -2180,6 +2183,8 @@ static const struct ucnrange ucnranges[]
 {   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0,   0, 0x11237 },
 {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x1123d },
 {   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0,   0, 0x1123e },
+{   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x11240 },
+{   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0,   0, 0x11241 },
 {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x1127f },
 {   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x11286 },
 {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x11287 },
@@ -2420,6 +2425,19 @@ static const struct ucnrange ucnranges[]
 {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x11edf },
 {   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x11ef2 },
 {   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0,   0, 0x11ef6 },
+{   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x11eff },
+{   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0,   0, 0x11f01 },
+{   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x11f02 },
+{   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0,   0, 0x11f03 },
+{   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x11f10 },
+{   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x11f11 },
+{   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x11f33 },
+{   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0,   0, 0x11f3a },
+{   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x11f3d },
+{   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0,   0, 0x11f40 },
+{   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0,   9, 0x11f42 },
+{   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x11f4f },
+{   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0,   0, 0x11f59 },
 {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x11faf },
 {   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x11fb0 },
 {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x11fff },
@@ -2431,7 +2449,11 @@ static const struct ucnrange ucnranges[]
 {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x12f8f },
 {   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x12ff0 },
 {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x12fff },
-{   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x1342e },
+{   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x1342f },
+{   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x1343f },
+{   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0,   0, 0x13440 },
+{   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x13446 },
+{   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0,   0, 0x13455 },
 {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x143ff },
 {   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x14646 },
 {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x167ff },
@@ -2491,8 +2513,12 @@ static const struct ucnrange ucnranges[]
 {   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x1affe },
 {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x1afff },
 {   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x1b122 },
+{   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x1b131 },
+{   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x1b132 },
 {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x1b14f },
 {   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x1b152 },
+{   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x1b154 },
+{   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x1b155 },
 {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x1b163 },
 {   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x1b167 },
 {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x1b16f },
@@ -2606,6 +2632,8 @@ static const struct ucnrange ucnranges[]
 {   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0,   0, 0x1daaf },
 {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x1deff },
 {   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x1df1e },
+{   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x1df24 },
+{   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x1df2a },
 {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x1dfff },
 {   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0, 230, 0x1e006 },
 {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x1e007 },
@@ -2616,6 +2644,10 @@ static const struct ucnrange ucnranges[]
 {   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0, 230, 0x1e024 },
 {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x1e025 },
 {   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0, 230, 0x1e02a },
+{   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x1e02f },
+{   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|  0|  0,   0, 0x1e06d },
+{   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x1e08e },
+{   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0, 230, 0x1e08f },
 {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x1e0ff },
 {   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x1e12c },
 {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x1e12f },
@@ -2632,6 +2664,12 @@ static const struct ucnrange ucnranges[]
 {   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x1e2eb },
 {   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0, 230, 0x1e2ef },
 {   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0,   0, 0x1e2f9 },
+{   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x1e4cf },
+{   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x1e4eb },
+{   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0, 232, 0x1e4ed },
+{   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0, 220, 0x1e4ee },
+{   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0, 230, 0x1e4ef },
+{   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0,   0, 0x1e4f9 },
 {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x1e7df },
 {   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x1e7e6 },
 {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x1e7e7 },
@@ -2741,7 +2779,7 @@ static const struct ucnrange ucnranges[]
 {   0|  0|  0|  0|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x1ffff },
 {   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x2a6df },
 {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x2a6ff },
-{   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x2b738 },
+{   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x2b739 },
 {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x2b73f },
 {   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x2b81d },
 {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x2b81f },
@@ -2753,6 +2791,8 @@ static const struct ucnrange ucnranges[]
 {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x2fffd },
 {   0|  0|  0|  0|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x2ffff },
 {   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x3134a },
+{   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x3134f },
+{   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x323af },
 {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x3fffd },
 {   0|  0|  0|  0|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x3ffff },
 {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x4fffd },
--- libcpp/uname2c.h.jj	2022-08-26 09:24:12.133615373 +0200
+++ libcpp/uname2c.h	2022-11-03 10:40:33.840908387 +0100
@@ -52,7 +52,7 @@
    use or other dealings in these Data Files or Software without prior
    written authorization of the copyright holder.  */
 
-static const char uname2c_dict[59418] =
+static const char uname2c_dict[59891] =
 "DIVIDED BY HORIZONTAL BAR AND TOP HALF DIVIDED BY VERTICAL BARUIGHUR KIRGHIZ "
 "YEH WITH HAMZA ABOVE WITH ALEF MAKSURA LANTED EQUAL ABOVE GREATER-THAN ABOVE "
 "SLANTED EQUAL WITH EXCLAMATION MARK WITH LEFT RIGHT ARROW ABOVELANTED EQUAL A"
@@ -77,435 +77,439 @@ static const char uname2c_dict[59418] =
 "IGHTWARDS ARROW ABOVE LEFTWARDS ARROWURNED SWIRL BIRGA WITH DOUBLE ORNAMENTWA"
 "RDS AND LEFTWARDS OPEN CIRCLE ARROWSWO DOTS VERTICALLY BELOW AND SMALL TAHIOU"
 "S FACE WITH SYMBOLS COVERING MOUTHONCAVE-POINTED BLACK RIGHTWARDS ARROWOVER R"
-"IGHTWARDS TRIANGLE-HEADED ARROWOZENGE CONTAINING BLACK SMALL LOZENGE AND LOWE"
-"R HALF INVERSE MEDIUM SHADE WITH HORIZONTAL MIDDLE BLACK STRIPEDOUBLE-LINE EQ"
-"UAL ABOVE GREATER-THANGREATER-THAN ABOVE DOUBLE-LINE EQUALIGHT TORTOISE SHELL"
-" BRACKET ORNAMENTLEFT TORTOISE SHELL BRACKET ORNAMENTOR LOWER RIGHT CURLY BRA"
-"CKET SECTIONOVER LEFTWARDS TRIANGLE-HEADED ARROWPART BETWEEN MIDDLE AND RING "
-"FINGERSSINGLE COMMA QUOTATION MARK ORNAMENTSMALL ARABIC LETTER TAH AND TWO DO"
-"TSTURNED COMMA QUOTATION MARK ORNAMENTBESIDE AND JOINED WITH INTERSECTIONBOTT"
-"OM-LIGHTED RIGHTWARDS ARROWHEADGVEDIC KASHMIRI INDEPENDENT SVARITAND UPPER AN"
-"D LOWER ONE EIGHTH BLOCKOR LOWER LEFT CURLY BRACKET SECTIONORNER ARROWS CIRCL"
-"ING ANTICLOCKWISEOUNDED HIGH STOP WITH FILLED CENTRERIGHT-POINTING ANGLE QUOT"
-"ATION MARKTWO HORIZONTAL STROKES TO THE RIGHT TWO DOTS OVER ONE DOT PUNCTUATI"
-"ONDOWNWARDS ARROW WITH TIP LEFTWARDSIGHUR KAZAKH KIRGHIZ ALEF MAKSURA MODIFIE"
-"R LETTER LABIALIZATION MARKOVER IGI SHIR OVER SHIR UD OVER UDOVER TAB NI OVER"
...
+"IGHTWARDS TRIANGLE-HEADED ARROWOZENGE CONTAINING BLACK SMALL LOZENGESMALL LET"
+"TER BYELORUSSIAN-UKRAINIAN I AND LOWER HALF INVERSE MEDIUM SHADE WITH HORIZON"
+"TAL MIDDLE BLACK STRIPEDOUBLE-LINE EQUAL ABOVE GREATER-THANGREATER-THAN ABOVE"
+" DOUBLE-LINE EQUALIGHT TORTOISE SHELL BRACKET ORNAMENTLEFT TORTOISE SHELL BRA"
+"CKET ORNAMENTOR LOWER RIGHT CURLY BRACKET SECTIONOVER LEFTWARDS TRIANGLE-HEAD"
+"ED ARROWPART BETWEEN MIDDLE AND RING FINGERSSINGLE COMMA QUOTATION MARK ORNAM"
+"ENTSMALL ARABIC LETTER TAH AND TWO DOTSTURNED COMMA QUOTATION MARK ORNAMENTBE"
+"SIDE AND JOINED WITH INTERSECTIONBOTTOM-LIGHTED RIGHTWARDS ARROWHEADGVEDIC KA"
+"SHMIRI INDEPENDENT SVARITAND UPPER AND LOWER ONE EIGHTH BLOCKOR LOWER LEFT CU"
+"RLY BRACKET SECTIONORNER ARROWS CIRCLING ANTICLOCKWISEOUNDED HIGH STOP WITH F"
+"ILLED CENTRERIGHT-POINTING ANGLE QUOTATION MARKTWO HORIZONTAL STROKES TO THE "
+"RIGHT TWO DOTS OVER ONE DOT PUNCTUATIONDOWNWARDS ARROW WITH TIP LEFTWARDSIGHU"
+"R KAZAKH KIRGHIZ ALEF MAKSURA MODIFIER LETTER LABIALIZATION MARKOVER IGI SHIR"
...
+"U5-05505575B66-06206D77 O72C8 A8 I8F09819E3A7AAEFAL2ALKAUJAWXAZUB57B89BIBBXGC"
+"A9CAHCAICIGCWICYAD42D70DA2DE6DIBDJADZEE80EEGEIEEYKEZHF14F8CFAJFLYFOMFUEHAQHOJ"
+"HOXI-IIMNIWRJAHJEUJHAK00KUEKUGLFALK LULNIIOAYOIXPOQPUQQ00QARQIFQIGQOFQOTQUFSI"
+"ISUUTJETUJUDYUEZUMXUOPUQAVAUVNOVOKVOYVUUWAUWOQX00XAUXEHXWGXWVY00YOTZJEZOOZORZ"
+"UPZZE16171D1F343638394048494B4E6490929599C0D0G0G3G9LXPVVDVWZ0";
 
-static const unsigned char uname2c_tree[208765] = {
-  0xa1, 0xc5, 0xd5, 0x0b, 0xa2, 0xad, 0xdb, 0x0a, 0xa3, 0xcf, 0x8c, 0x09,
-  0xa4, 0xd7, 0xdb, 0x08, 0xa5, 0xd9, 0x94, 0x08, 0xa6, 0xcf, 0x87, 0x08,
-  0xa7, 0xf4, 0xc7, 0x07, 0xa8, 0xbf, 0xfe, 0x06, 0xa9, 0xf9, 0xe9, 0x06,
-  0xaa, 0x86, 0xe4, 0x06, 0xab, 0x93, 0xb6, 0x06, 0xac, 0xd3, 0xb3, 0x05,
-  0xad, 0xa1, 0x8e, 0x04, 0xae, 0xf4, 0xea, 0x03, 0xaf, 0xe6, 0xbf, 0x03,
-  0xb0, 0xb4, 0x93, 0x03, 0x02, 0x7c, 0x00, 0x8d, 0x92, 0x03, 0xb2, 0xb6,
-  0xf1, 0x02, 0xb3, 0xb1, 0xec, 0x01, 0xb4, 0xde, 0x80, 0x01, 0xb5, 0xda,
-  0x74, 0xb6, 0x8a, 0x56, 0xb7, 0xdf, 0x3f, 0xb8, 0xe5, 0x3e, 0xb9, 0xd3,
...
+static const unsigned char uname2c_tree[210697] = {
+  0xa1, 0xb5, 0xe4, 0x0b, 0xa2, 0x9d, 0xea, 0x0a, 0xa3, 0x9d, 0x9a, 0x09,
+  0xa4, 0xd2, 0xe8, 0x08, 0xa5, 0xfd, 0x9f, 0x08, 0xa6, 0xe0, 0x92, 0x08,
+  0xa7, 0xee, 0xd2, 0x07, 0xa8, 0xa2, 0x89, 0x07, 0xa9, 0xdc, 0xf4, 0x06,
+  0xaa, 0xe1, 0xee, 0x06, 0xab, 0x97, 0xbb, 0x06, 0xac, 0xf7, 0xb7, 0x05,
+  0xad, 0xdb, 0x90, 0x04, 0xae, 0xb6, 0xeb, 0x03, 0xaf, 0x9c, 0xc0, 0x03,
+  0xb0, 0xde, 0x93, 0x03, 0x02, 0x7c, 0x00, 0xb1, 0x92, 0x03, 0xb2, 0xd1,
+  0xf1, 0x02, 0xb3, 0xc0, 0xec, 0x01, 0xb4, 0xed, 0x80, 0x01, 0xb5, 0xe9,
+  0x74, 0xb6, 0x99, 0x56, 0xb7, 0xdf, 0x3f, 0xb8, 0xe5, 0x3e, 0xb9, 0xd3,
+  0x10, 0xba, 0x01, 0xff, 0x0a, 0x5d, 0x1b, 0xe9, 0x0f, 0x10, 0xe9, 0x5a,
+  0x80, 0x0c, 0xa5, 0xd1, 0x0b, 0x50, 0xd9, 0x5d, 0x10, 0xf9, 0x01, 0x08,
+  0xa5, 0xbe, 0x06, 0x45, 0xb8, 0xde, 0xdf, 0xf9, 0x41, 0x0a, 0x99, 0x53,
+  0xaa, 0x07, 0x06, 0xb5, 0xd3, 0x23, 0x11, 0x62, 0x57, 0x01, 0xff, 0x4e,
+  0x12, 0x70, 0x44, 0xcf, 0x01, 0x45, 0xdb, 0x80, 0x45, 0xcf, 0x01, 0x06,
...
   0xff, 0x45, 0xe0, 0x03, 0xb4, 0x00, 0x00, 0x44, 0x05, 0x02, 0x9f, 0x29,
-  0x40, 0x53, 0x89, 0x42, 0x6d, 0x20, 0x00, 0x52, 0x8b, 0x4f, 0x6b, 0x20,
-  0x40, 0x45, 0xae, 0xdd, 0x97, 0xfa, 0x01, 0x46, 0xcb, 0x63, 0x00, 0x21,
+  0x40, 0x53, 0xcb, 0x42, 0x6d, 0x20, 0x00, 0x52, 0xdf, 0x4f, 0x6b, 0x20,
+  0x40, 0x45, 0x6c, 0xdf, 0x97, 0xfa, 0x01, 0x46, 0x5f, 0x64, 0x00, 0x21,
   0x40 };
 
 static const cppchar_t uname2c_pairs[] = {
   0xac00, 0xd7a3 /* HANGUL SYLLABLE  */, 0,
   0x3400, 0x4dbf /* CJK UNIFIED IDEOGRAPH- */,
-  0x4e00, 0x9ffc /* CJK UNIFIED IDEOGRAPH- */,
-  0x20000, 0x2a6dd /* CJK UNIFIED IDEOGRAPH- */,
-  0x2a700, 0x2b734 /* CJK UNIFIED IDEOGRAPH- */,
+  0x4e00, 0x9fff /* CJK UNIFIED IDEOGRAPH- */,
+  0x20000, 0x2a6df /* CJK UNIFIED IDEOGRAPH- */,
+  0x2a700, 0x2b739 /* CJK UNIFIED IDEOGRAPH- */,
   0x2b740, 0x2b81d /* CJK UNIFIED IDEOGRAPH- */,
   0x2b820, 0x2cea1 /* CJK UNIFIED IDEOGRAPH- */,
   0x2ceb0, 0x2ebe0 /* CJK UNIFIED IDEOGRAPH- */,
-  0x30000, 0x3134a /* CJK UNIFIED IDEOGRAPH- */, 0,
+  0x30000, 0x3134a /* CJK UNIFIED IDEOGRAPH- */,
+  0x31350, 0x323af /* CJK UNIFIED IDEOGRAPH- */, 0,
   0x17000, 0x187f7 /* TANGUT IDEOGRAPH- */,
   0x18d00, 0x18d08 /* TANGUT IDEOGRAPH- */, 0,
   0x18b00, 0x18cd5 /* KHITAN SMALL SCRIPT CHARACTER- */, 0,
@@ -18247,10 +18415,10 @@ static const cppchar_t uname2c_pairs[] =
 static const unsigned char uname2c_generated[] = {
   0 /* HANGUL SYLLABLE  */,
   3 /* CJK UNIFIED IDEOGRAPH- */,
-  20 /* TANGUT IDEOGRAPH- */,
-  25 /* KHITAN SMALL SCRIPT CHARACTER- */,
-  28 /* NUSHU CHARACTER- */,
-  31 /* CJK COMPATIBILITY IDEOGRAPH- */ };
+  22 /* TANGUT IDEOGRAPH- */,
+  27 /* KHITAN SMALL SCRIPT CHARACTER- */,
+  30 /* NUSHU CHARACTER- */,
+  33 /* CJK COMPATIBILITY IDEOGRAPH- */ };
 
 static const unsigned int uname2c_max_name_len = 88;
 

	Jakub

[-- Attachment #2: R638.xz --]
[-- Type: application/x-xz, Size: 293200 bytes --]

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] libcpp: Update to Unicode 15
  2022-11-04  8:55 [PATCH] libcpp: Update to Unicode 15 Jakub Jelinek
@ 2022-11-04 13:38 ` Marek Polacek
  0 siblings, 0 replies; 6+ messages in thread
From: Marek Polacek @ 2022-11-04 13:38 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: Jason Merrill, Joseph S. Myers, gcc-patches

On Fri, Nov 04, 2022 at 09:55:55AM +0100, Jakub Jelinek wrote:
> Hi!
> 
> The following patch (included in the mail is for uname2c.h part
> just a pseudo patch with a lot of changes replaced with ...
> because it is too large but the important changes like
> -static const char uname2c_dict[59418] =
> +static const char uname2c_dict[59891] =
> -static const unsigned char uname2c_tree[208765] = {
> +static const unsigned char uname2c_tree[210697] = {
> are shown, full patch xz compressed attached) regenerates the
> libcpp tables with Unicode 15.0.0 which added 4489 new characters.
> 
> As mentioned previously, this isn't just a matter of running the
> two libcpp/make*.cc programs on the new Unicode files, but one needs
> to manually update a table inside of makeuname2c.cc according to
> a table in Unicode text (which is partially reflected in the text
> files, but e.g. in Unicode 14.0.0 not 100% accurately, in 15.0.0
> actually accurately).
> I've also added some randomly chosen subset of those 4489 new
> characters to a testcase.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK, thanks.
 
> 2022-11-04  Jakub Jelinek  <jakub@redhat.com>
> 
> gcc/testsuite/
> 	* c-c++-common/cpp/named-universal-char-escape-1.c: Add tests for some
> 	characters newly added in Unicode 15.0.0.

Cool, thanks for updating the test as well.

> libcpp/
> 	* makeuname2c.cc (struct generated): Update from Unicode 15.0.0
> 	table 4-8.
> 	* ucnid.h: Regenerated for Unicode 15.0.0.
> 	* uname2c.h: Likewise.
> 
> --- gcc/testsuite/c-c++-common/cpp/named-universal-char-escape-1.c.jj	2022-08-27 23:01:28.319565957 +0200
> +++ gcc/testsuite/c-c++-common/cpp/named-universal-char-escape-1.c	2022-11-04 09:41:45.908527440 +0100
> @@ -117,6 +117,27 @@ typedef __CHAR32_TYPE__ char32_t;
>      || U'\u0FD0' != U'\N{TIBETAN MARK BSKA- SHOG GI MGO RGYAN}' \
>      || U'\uFE18' != U'\N{PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRAKCET}' \
>      || U'\uFE18' != U'\N{PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRACKET}' \
> +    || U'\u0CF3' != U'\N{KANNADA SIGN COMBINING ANUSVARA ABOVE RIGHT}' \
> +    || U'\u0ECE' != U'\N{LAO YAMAKKAN}' \
> +    || U'\U00010EFE' != U'\N{ARABIC SMALL LOW WORD QASR}' \
> +    || U'\U00011241' != U'\N{KHOJKI VOWEL SIGN VOCALIC R}' \
> +    || U'\U00011B06' != U'\N{DEVANAGARI SIGN WESTERN FIVE-LIKE BHALE}' \
> +    || U'\U00011F0B' != U'\N{KAWI LETTER VOCALIC RR}' \
> +    || U'\U0001342F' != U'\N{EGYPTIAN HIEROGLYPH V011D}' \
> +    || U'\U00013451' != U'\N{EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT START AND BOTTOM}' \
> +    || U'\U0001B132' != U'\N{HIRAGANA LETTER SMALL KO}' \
> +    || U'\U0001B155' != U'\N{KATAKANA LETTER SMALL KO}' \
> +    || U'\U0001D2C4' != U'\N{KAKTOVIK NUMERAL FOUR}' \
> +    || U'\U0001DF27' != U'\N{LATIN SMALL LETTER N WITH MID-HEIGHT LEFT HOOK}' \
> +    || U'\U0001E036' != U'\N{MODIFIER LETTER CYRILLIC SMALL ZHE}' \
> +    || U'\U0001E05B' != U'\N{CYRILLIC SUBSCRIPT SMALL LETTER EL}' \
> +    || U'\U0001E4E5' != U'\N{NAG MUNDARI LETTER ENN}' \
> +    || U'\U0001F6DC' != U'\N{WIRELESS}' \
> +    || U'\U0001F77E' != U'\N{QUAOAR}' \
> +    || U'\U0001F7D9' != U'\N{NINE POINTED WHITE STAR}' \
> +    || U'\U0001FA76' != U'\N{GREY HEART}' \
> +    || U'\U0001FA88' != U'\N{FLUTE}' \
> +    || U'\U0001FABC' != U'\N{JELLYFISH}' \
>      || U'\uAC00' != U'\N{HANGUL SYLLABLE GA}' \
>      || U'\uAC02' != U'\N{HANGUL SYLLABLE GAGG}' \
>      || U'\uAD8D' != U'\N{HANGUL SYLLABLE GWEONJ}' \
> @@ -134,6 +155,7 @@ typedef __CHAR32_TYPE__ char32_t;
>      || U'\U0002A6DD' != U'\N{CJK UNIFIED IDEOGRAPH-2A6DD}' \
>      || U'\U00020700' != U'\N{CJK UNIFIED IDEOGRAPH-20700}' \
>      || U'\U0002B734' != U'\N{CJK UNIFIED IDEOGRAPH-2B734}' \
> +    || U'\U0002B739' != U'\N{CJK UNIFIED IDEOGRAPH-2B739}' \
>      || U'\U0002B740' != U'\N{CJK UNIFIED IDEOGRAPH-2B740}' \
>      || U'\U0002B81D' != U'\N{CJK UNIFIED IDEOGRAPH-2B81D}' \
>      || U'\U0002B820' != U'\N{CJK UNIFIED IDEOGRAPH-2B820}' \
> @@ -142,6 +164,8 @@ typedef __CHAR32_TYPE__ char32_t;
>      || U'\U0002EBE0' != U'\N{CJK UNIFIED IDEOGRAPH-2EBE0}' \
>      || U'\U00030000' != U'\N{CJK UNIFIED IDEOGRAPH-30000}' \
>      || U'\U0003134A' != U'\N{CJK UNIFIED IDEOGRAPH-3134A}' \
> +    || U'\U00031350' != U'\N{CJK UNIFIED IDEOGRAPH-31350}' \
> +    || U'\U000323AF' != U'\N{CJK UNIFIED IDEOGRAPH-323AF}' \
>      || U'\U00017000' != U'\N{TANGUT IDEOGRAPH-17000}' \
>      || U'\U000187F7' != U'\N{TANGUT IDEOGRAPH-187F7}' \
>      || U'\U00018D00' != U'\N{TANGUT IDEOGRAPH-18D00}' \
> --- libcpp/makeuname2c.cc.jj	2022-08-31 10:22:33.439166029 +0200
> +++ libcpp/makeuname2c.cc	2022-11-03 10:38:03.341964913 +0100
> @@ -69,7 +69,7 @@ struct entry { const char *name; unsigne
>  static struct entry *entries;
>  static unsigned long num_allocated, num_entries;
>  
> -/* Unicode 14 Table 4-8.  */
> +/* Unicode 15 Table 4-8.  */
>  struct generated {
>    const char *prefix;
>    /* max_high is a workaround for UnicodeData.txt inconsistencies
> @@ -81,13 +81,14 @@ struct generated {
>  static struct generated generated_ranges[] =
>  { { "HANGUL SYLLABLE ", 0xac00, 0xd7a3, 0, 0, 0 }, /* NR1 rule */
>    { "CJK UNIFIED IDEOGRAPH-", 0x3400, 0x4dbf, 0, 1, 0 }, /* NR2 rules */
> -  { "CJK UNIFIED IDEOGRAPH-", 0x4e00, 0x9ffc, 0x9fff, 1, 0 },
> -  { "CJK UNIFIED IDEOGRAPH-", 0x20000, 0x2a6dd, 0x2a6df, 1, 0 },
> -  { "CJK UNIFIED IDEOGRAPH-", 0x2a700, 0x2b734, 0x2b738, 1, 0 },
> +  { "CJK UNIFIED IDEOGRAPH-", 0x4e00, 0x9fff, 0, 1, 0 },
> +  { "CJK UNIFIED IDEOGRAPH-", 0x20000, 0x2a6df, 0, 1, 0 },
> +  { "CJK UNIFIED IDEOGRAPH-", 0x2a700, 0x2b739, 0, 1, 0 },
>    { "CJK UNIFIED IDEOGRAPH-", 0x2b740, 0x2b81d, 0, 1, 0 },
>    { "CJK UNIFIED IDEOGRAPH-", 0x2b820, 0x2cea1, 0, 1, 0 },
>    { "CJK UNIFIED IDEOGRAPH-", 0x2ceb0, 0x2ebe0, 0, 1, 0 },
>    { "CJK UNIFIED IDEOGRAPH-", 0x30000, 0x3134a, 0, 1, 0 },
> +  { "CJK UNIFIED IDEOGRAPH-", 0x31350, 0x323af, 0, 1, 0 },
>    { "TANGUT IDEOGRAPH-", 0x17000, 0x187f7, 0, 2, 0 },
>    { "TANGUT IDEOGRAPH-", 0x18d00, 0x18d08, 0, 2, 0 },
>    { "KHITAN SMALL SCRIPT CHARACTER-", 0x18b00, 0x18cd5, 0, 3, 0 },
> --- libcpp/ucnid.h.jj	2022-06-29 11:43:36.163905476 +0200
> +++ libcpp/ucnid.h	2022-11-03 10:39:44.997575818 +0100
> @@ -766,6 +766,7 @@ static const struct ucnrange ucnranges[]
>  { C99|N99|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0,   0, 0x0cef },
>  {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x0cf0 },
>  {   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x0cf2 },
> +{   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0,   0, 0x0cf3 },
>  {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x0cff },
>  {   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0,   0, 0x0d01 },
>  { C99|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0,   0, 0x0d03 },
> @@ -889,6 +890,7 @@ static const struct ucnrange ucnranges[]
>  {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x0ec7 },
>  { C99|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0, 122, 0x0ecb },
>  { C99|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0,   0, 0x0ecd },
> +{   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0,   0, 0x0ece },
>  {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x0ecf },
>  { C99|N99|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0,   0, 0x0ed9 },
>  {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x0edb },
> @@ -2085,7 +2087,8 @@ static const struct ucnrange ucnranges[]
>  {   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0, 230, 0x10eac },
>  {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x10eaf },
>  {   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x10eb1 },
> -{   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x10eff },
> +{   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x10efc },
> +{   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0, 220, 0x10eff },
>  {   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x10f1c },
>  {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x10f26 },
>  {   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x10f27 },
> @@ -2180,6 +2183,8 @@ static const struct ucnrange ucnranges[]
>  {   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0,   0, 0x11237 },
>  {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x1123d },
>  {   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0,   0, 0x1123e },
> +{   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x11240 },
> +{   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0,   0, 0x11241 },
>  {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x1127f },
>  {   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x11286 },
>  {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x11287 },
> @@ -2420,6 +2425,19 @@ static const struct ucnrange ucnranges[]
>  {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x11edf },
>  {   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x11ef2 },
>  {   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0,   0, 0x11ef6 },
> +{   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x11eff },
> +{   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0,   0, 0x11f01 },
> +{   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x11f02 },
> +{   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0,   0, 0x11f03 },
> +{   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x11f10 },
> +{   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x11f11 },
> +{   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x11f33 },
> +{   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0,   0, 0x11f3a },
> +{   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x11f3d },
> +{   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0,   0, 0x11f40 },
> +{   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0,   9, 0x11f42 },
> +{   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x11f4f },
> +{   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0,   0, 0x11f59 },
>  {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x11faf },
>  {   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x11fb0 },
>  {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x11fff },
> @@ -2431,7 +2449,11 @@ static const struct ucnrange ucnranges[]
>  {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x12f8f },
>  {   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x12ff0 },
>  {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x12fff },
> -{   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x1342e },
> +{   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x1342f },
> +{   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x1343f },
> +{   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0,   0, 0x13440 },
> +{   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x13446 },
> +{   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0,   0, 0x13455 },
>  {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x143ff },
>  {   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x14646 },
>  {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x167ff },
> @@ -2491,8 +2513,12 @@ static const struct ucnrange ucnranges[]
>  {   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x1affe },
>  {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x1afff },
>  {   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x1b122 },
> +{   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x1b131 },
> +{   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x1b132 },
>  {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x1b14f },
>  {   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x1b152 },
> +{   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x1b154 },
> +{   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x1b155 },
>  {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x1b163 },
>  {   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x1b167 },
>  {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x1b16f },
> @@ -2606,6 +2632,8 @@ static const struct ucnrange ucnranges[]
>  {   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0,   0, 0x1daaf },
>  {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x1deff },
>  {   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x1df1e },
> +{   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x1df24 },
> +{   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x1df2a },
>  {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x1dfff },
>  {   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0, 230, 0x1e006 },
>  {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x1e007 },
> @@ -2616,6 +2644,10 @@ static const struct ucnrange ucnranges[]
>  {   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0, 230, 0x1e024 },
>  {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x1e025 },
>  {   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0, 230, 0x1e02a },
> +{   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x1e02f },
> +{   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|  0|  0,   0, 0x1e06d },
> +{   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x1e08e },
> +{   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0, 230, 0x1e08f },
>  {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x1e0ff },
>  {   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x1e12c },
>  {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x1e12f },
> @@ -2632,6 +2664,12 @@ static const struct ucnrange ucnranges[]
>  {   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x1e2eb },
>  {   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0, 230, 0x1e2ef },
>  {   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0,   0, 0x1e2f9 },
> +{   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x1e4cf },
> +{   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x1e4eb },
> +{   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0, 232, 0x1e4ed },
> +{   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0, 220, 0x1e4ee },
> +{   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0, 230, 0x1e4ef },
> +{   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0,   0, 0x1e4f9 },
>  {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x1e7df },
>  {   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x1e7e6 },
>  {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x1e7e7 },
> @@ -2741,7 +2779,7 @@ static const struct ucnrange ucnranges[]
>  {   0|  0|  0|  0|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x1ffff },
>  {   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x2a6df },
>  {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x2a6ff },
> -{   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x2b738 },
> +{   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x2b739 },
>  {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x2b73f },
>  {   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x2b81d },
>  {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x2b81f },
> @@ -2753,6 +2791,8 @@ static const struct ucnrange ucnranges[]
>  {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x2fffd },
>  {   0|  0|  0|  0|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x2ffff },
>  {   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x3134a },
> +{   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x3134f },
> +{   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x323af },
>  {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x3fffd },
>  {   0|  0|  0|  0|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x3ffff },
>  {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x4fffd },
> --- libcpp/uname2c.h.jj	2022-08-26 09:24:12.133615373 +0200
> +++ libcpp/uname2c.h	2022-11-03 10:40:33.840908387 +0100
> @@ -52,7 +52,7 @@
>     use or other dealings in these Data Files or Software without prior
>     written authorization of the copyright holder.  */
>  
> -static const char uname2c_dict[59418] =
> +static const char uname2c_dict[59891] =
>  "DIVIDED BY HORIZONTAL BAR AND TOP HALF DIVIDED BY VERTICAL BARUIGHUR KIRGHIZ "
>  "YEH WITH HAMZA ABOVE WITH ALEF MAKSURA LANTED EQUAL ABOVE GREATER-THAN ABOVE "
>  "SLANTED EQUAL WITH EXCLAMATION MARK WITH LEFT RIGHT ARROW ABOVELANTED EQUAL A"
> @@ -77,435 +77,439 @@ static const char uname2c_dict[59418] =
>  "IGHTWARDS ARROW ABOVE LEFTWARDS ARROWURNED SWIRL BIRGA WITH DOUBLE ORNAMENTWA"
>  "RDS AND LEFTWARDS OPEN CIRCLE ARROWSWO DOTS VERTICALLY BELOW AND SMALL TAHIOU"
>  "S FACE WITH SYMBOLS COVERING MOUTHONCAVE-POINTED BLACK RIGHTWARDS ARROWOVER R"
> -"IGHTWARDS TRIANGLE-HEADED ARROWOZENGE CONTAINING BLACK SMALL LOZENGE AND LOWE"
> -"R HALF INVERSE MEDIUM SHADE WITH HORIZONTAL MIDDLE BLACK STRIPEDOUBLE-LINE EQ"
> -"UAL ABOVE GREATER-THANGREATER-THAN ABOVE DOUBLE-LINE EQUALIGHT TORTOISE SHELL"
> -" BRACKET ORNAMENTLEFT TORTOISE SHELL BRACKET ORNAMENTOR LOWER RIGHT CURLY BRA"
> -"CKET SECTIONOVER LEFTWARDS TRIANGLE-HEADED ARROWPART BETWEEN MIDDLE AND RING "
> -"FINGERSSINGLE COMMA QUOTATION MARK ORNAMENTSMALL ARABIC LETTER TAH AND TWO DO"
> -"TSTURNED COMMA QUOTATION MARK ORNAMENTBESIDE AND JOINED WITH INTERSECTIONBOTT"
> -"OM-LIGHTED RIGHTWARDS ARROWHEADGVEDIC KASHMIRI INDEPENDENT SVARITAND UPPER AN"
> -"D LOWER ONE EIGHTH BLOCKOR LOWER LEFT CURLY BRACKET SECTIONORNER ARROWS CIRCL"
> -"ING ANTICLOCKWISEOUNDED HIGH STOP WITH FILLED CENTRERIGHT-POINTING ANGLE QUOT"
> -"ATION MARKTWO HORIZONTAL STROKES TO THE RIGHT TWO DOTS OVER ONE DOT PUNCTUATI"
> -"ONDOWNWARDS ARROW WITH TIP LEFTWARDSIGHUR KAZAKH KIRGHIZ ALEF MAKSURA MODIFIE"
> -"R LETTER LABIALIZATION MARKOVER IGI SHIR OVER SHIR UD OVER UDOVER TAB NI OVER"
> ...
> +"IGHTWARDS TRIANGLE-HEADED ARROWOZENGE CONTAINING BLACK SMALL LOZENGESMALL LET"
> +"TER BYELORUSSIAN-UKRAINIAN I AND LOWER HALF INVERSE MEDIUM SHADE WITH HORIZON"
> +"TAL MIDDLE BLACK STRIPEDOUBLE-LINE EQUAL ABOVE GREATER-THANGREATER-THAN ABOVE"
> +" DOUBLE-LINE EQUALIGHT TORTOISE SHELL BRACKET ORNAMENTLEFT TORTOISE SHELL BRA"
> +"CKET ORNAMENTOR LOWER RIGHT CURLY BRACKET SECTIONOVER LEFTWARDS TRIANGLE-HEAD"
> +"ED ARROWPART BETWEEN MIDDLE AND RING FINGERSSINGLE COMMA QUOTATION MARK ORNAM"
> +"ENTSMALL ARABIC LETTER TAH AND TWO DOTSTURNED COMMA QUOTATION MARK ORNAMENTBE"
> +"SIDE AND JOINED WITH INTERSECTIONBOTTOM-LIGHTED RIGHTWARDS ARROWHEADGVEDIC KA"
> +"SHMIRI INDEPENDENT SVARITAND UPPER AND LOWER ONE EIGHTH BLOCKOR LOWER LEFT CU"
> +"RLY BRACKET SECTIONORNER ARROWS CIRCLING ANTICLOCKWISEOUNDED HIGH STOP WITH F"
> +"ILLED CENTRERIGHT-POINTING ANGLE QUOTATION MARKTWO HORIZONTAL STROKES TO THE "
> +"RIGHT TWO DOTS OVER ONE DOT PUNCTUATIONDOWNWARDS ARROW WITH TIP LEFTWARDSIGHU"
> +"R KAZAKH KIRGHIZ ALEF MAKSURA MODIFIER LETTER LABIALIZATION MARKOVER IGI SHIR"
> ...
> +"U5-05505575B66-06206D77 O72C8 A8 I8F09819E3A7AAEFAL2ALKAUJAWXAZUB57B89BIBBXGC"
> +"A9CAHCAICIGCWICYAD42D70DA2DE6DIBDJADZEE80EEGEIEEYKEZHF14F8CFAJFLYFOMFUEHAQHOJ"
> +"HOXI-IIMNIWRJAHJEUJHAK00KUEKUGLFALK LULNIIOAYOIXPOQPUQQ00QARQIFQIGQOFQOTQUFSI"
> +"ISUUTJETUJUDYUEZUMXUOPUQAVAUVNOVOKVOYVUUWAUWOQX00XAUXEHXWGXWVY00YOTZJEZOOZORZ"
> +"UPZZE16171D1F343638394048494B4E6490929599C0D0G0G3G9LXPVVDVWZ0";
>  
> -static const unsigned char uname2c_tree[208765] = {
> -  0xa1, 0xc5, 0xd5, 0x0b, 0xa2, 0xad, 0xdb, 0x0a, 0xa3, 0xcf, 0x8c, 0x09,
> -  0xa4, 0xd7, 0xdb, 0x08, 0xa5, 0xd9, 0x94, 0x08, 0xa6, 0xcf, 0x87, 0x08,
> -  0xa7, 0xf4, 0xc7, 0x07, 0xa8, 0xbf, 0xfe, 0x06, 0xa9, 0xf9, 0xe9, 0x06,
> -  0xaa, 0x86, 0xe4, 0x06, 0xab, 0x93, 0xb6, 0x06, 0xac, 0xd3, 0xb3, 0x05,
> -  0xad, 0xa1, 0x8e, 0x04, 0xae, 0xf4, 0xea, 0x03, 0xaf, 0xe6, 0xbf, 0x03,
> -  0xb0, 0xb4, 0x93, 0x03, 0x02, 0x7c, 0x00, 0x8d, 0x92, 0x03, 0xb2, 0xb6,
> -  0xf1, 0x02, 0xb3, 0xb1, 0xec, 0x01, 0xb4, 0xde, 0x80, 0x01, 0xb5, 0xda,
> -  0x74, 0xb6, 0x8a, 0x56, 0xb7, 0xdf, 0x3f, 0xb8, 0xe5, 0x3e, 0xb9, 0xd3,
> ...
> +static const unsigned char uname2c_tree[210697] = {
> +  0xa1, 0xb5, 0xe4, 0x0b, 0xa2, 0x9d, 0xea, 0x0a, 0xa3, 0x9d, 0x9a, 0x09,
> +  0xa4, 0xd2, 0xe8, 0x08, 0xa5, 0xfd, 0x9f, 0x08, 0xa6, 0xe0, 0x92, 0x08,
> +  0xa7, 0xee, 0xd2, 0x07, 0xa8, 0xa2, 0x89, 0x07, 0xa9, 0xdc, 0xf4, 0x06,
> +  0xaa, 0xe1, 0xee, 0x06, 0xab, 0x97, 0xbb, 0x06, 0xac, 0xf7, 0xb7, 0x05,
> +  0xad, 0xdb, 0x90, 0x04, 0xae, 0xb6, 0xeb, 0x03, 0xaf, 0x9c, 0xc0, 0x03,
> +  0xb0, 0xde, 0x93, 0x03, 0x02, 0x7c, 0x00, 0xb1, 0x92, 0x03, 0xb2, 0xd1,
> +  0xf1, 0x02, 0xb3, 0xc0, 0xec, 0x01, 0xb4, 0xed, 0x80, 0x01, 0xb5, 0xe9,
> +  0x74, 0xb6, 0x99, 0x56, 0xb7, 0xdf, 0x3f, 0xb8, 0xe5, 0x3e, 0xb9, 0xd3,
> +  0x10, 0xba, 0x01, 0xff, 0x0a, 0x5d, 0x1b, 0xe9, 0x0f, 0x10, 0xe9, 0x5a,
> +  0x80, 0x0c, 0xa5, 0xd1, 0x0b, 0x50, 0xd9, 0x5d, 0x10, 0xf9, 0x01, 0x08,
> +  0xa5, 0xbe, 0x06, 0x45, 0xb8, 0xde, 0xdf, 0xf9, 0x41, 0x0a, 0x99, 0x53,
> +  0xaa, 0x07, 0x06, 0xb5, 0xd3, 0x23, 0x11, 0x62, 0x57, 0x01, 0xff, 0x4e,
> +  0x12, 0x70, 0x44, 0xcf, 0x01, 0x45, 0xdb, 0x80, 0x45, 0xcf, 0x01, 0x06,
> ...
>    0xff, 0x45, 0xe0, 0x03, 0xb4, 0x00, 0x00, 0x44, 0x05, 0x02, 0x9f, 0x29,
> -  0x40, 0x53, 0x89, 0x42, 0x6d, 0x20, 0x00, 0x52, 0x8b, 0x4f, 0x6b, 0x20,
> -  0x40, 0x45, 0xae, 0xdd, 0x97, 0xfa, 0x01, 0x46, 0xcb, 0x63, 0x00, 0x21,
> +  0x40, 0x53, 0xcb, 0x42, 0x6d, 0x20, 0x00, 0x52, 0xdf, 0x4f, 0x6b, 0x20,
> +  0x40, 0x45, 0x6c, 0xdf, 0x97, 0xfa, 0x01, 0x46, 0x5f, 0x64, 0x00, 0x21,
>    0x40 };
>  
>  static const cppchar_t uname2c_pairs[] = {
>    0xac00, 0xd7a3 /* HANGUL SYLLABLE  */, 0,
>    0x3400, 0x4dbf /* CJK UNIFIED IDEOGRAPH- */,
> -  0x4e00, 0x9ffc /* CJK UNIFIED IDEOGRAPH- */,
> -  0x20000, 0x2a6dd /* CJK UNIFIED IDEOGRAPH- */,
> -  0x2a700, 0x2b734 /* CJK UNIFIED IDEOGRAPH- */,
> +  0x4e00, 0x9fff /* CJK UNIFIED IDEOGRAPH- */,
> +  0x20000, 0x2a6df /* CJK UNIFIED IDEOGRAPH- */,
> +  0x2a700, 0x2b739 /* CJK UNIFIED IDEOGRAPH- */,
>    0x2b740, 0x2b81d /* CJK UNIFIED IDEOGRAPH- */,
>    0x2b820, 0x2cea1 /* CJK UNIFIED IDEOGRAPH- */,
>    0x2ceb0, 0x2ebe0 /* CJK UNIFIED IDEOGRAPH- */,
> -  0x30000, 0x3134a /* CJK UNIFIED IDEOGRAPH- */, 0,
> +  0x30000, 0x3134a /* CJK UNIFIED IDEOGRAPH- */,
> +  0x31350, 0x323af /* CJK UNIFIED IDEOGRAPH- */, 0,
>    0x17000, 0x187f7 /* TANGUT IDEOGRAPH- */,
>    0x18d00, 0x18d08 /* TANGUT IDEOGRAPH- */, 0,
>    0x18b00, 0x18cd5 /* KHITAN SMALL SCRIPT CHARACTER- */, 0,
> @@ -18247,10 +18415,10 @@ static const cppchar_t uname2c_pairs[] =
>  static const unsigned char uname2c_generated[] = {
>    0 /* HANGUL SYLLABLE  */,
>    3 /* CJK UNIFIED IDEOGRAPH- */,
> -  20 /* TANGUT IDEOGRAPH- */,
> -  25 /* KHITAN SMALL SCRIPT CHARACTER- */,
> -  28 /* NUSHU CHARACTER- */,
> -  31 /* CJK COMPATIBILITY IDEOGRAPH- */ };
> +  22 /* TANGUT IDEOGRAPH- */,
> +  27 /* KHITAN SMALL SCRIPT CHARACTER- */,
> +  30 /* NUSHU CHARACTER- */,
> +  33 /* CJK COMPATIBILITY IDEOGRAPH- */ };
>  
>  static const unsigned int uname2c_max_name_len = 88;
>  
> 
> 	Jakub



Marek


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] libcpp: Update to Unicode 15
  2023-03-09 23:27 ` Lewis Hyatt
@ 2023-03-11 17:31   ` Jeff Law
  0 siblings, 0 replies; 6+ messages in thread
From: Jeff Law @ 2023-03-11 17:31 UTC (permalink / raw)
  To: Lewis Hyatt, gcc-patches; +Cc: Jakub Jelinek



On 3/9/23 16:27, Lewis Hyatt via Gcc-patches wrote:
> On Fri, Nov 04, 2022 at 10:03:13AM +0100, Jakub Jelinek via Gcc-patches wrote:
>> Hi!
>>
>> The following pseudo-patch (for uname2c.h part
>> just a pseudo patch with a lot of changes replaced with ...
>> because it is too large but the important changes like
>> -static const char uname2c_dict[59418] =
>> +static const char uname2c_dict[59891] =
>> -static const unsigned char uname2c_tree[208765] = {
>> +static const unsigned char uname2c_tree[210697] = {
>> are shown, full patch xz compressed will be posted separately
>> due to mail limit) regenerates the libcpp tables with Unicode 15.0.0
>> which added 4489 new characters.
>>
>> As mentioned previously, this isn't just a matter of running the
>> two libcpp/make*.cc programs on the new Unicode files, but one needs
>> to manually update a table inside of makeuname2c.cc according to
>> a table in Unicode text (which is partially reflected in the text
>> files, but e.g. in Unicode 14.0.0 not 100% accurately, in 15.0.0
>> actually accurately).
>> I've also added some randomly chosen subset of those 4489 new
>> characters to a testcase.
>>
>> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
> 
> Hi Jakub-
> 
> In addition to these files you updated last year for Unicode 15, we also need
> to update generated_cpp_wcwidth.h, which implements cpp_wcwidth() for
> diagnostics so we can output correct column numbers. There is a procedure
> outlined in the file contrib/unicode/README that accomplishes this. Is it OK
> to push the attached patch (gzipped since it is large and uninformative),
> which is the result of following the procedure? It went straightforwardly as
> expected, and bootstrap+regtest on x86-64 Linux is clean. Thanks!
Yes, please go ahead and push it.

jeff

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] libcpp: Update to Unicode 15
  2022-11-04  9:03 Jakub Jelinek
  2022-11-04  9:05 ` Jakub Jelinek
@ 2023-03-09 23:27 ` Lewis Hyatt
  2023-03-11 17:31   ` Jeff Law
  1 sibling, 1 reply; 6+ messages in thread
From: Lewis Hyatt @ 2023-03-09 23:27 UTC (permalink / raw)
  To: gcc-patches; +Cc: Jakub Jelinek

[-- Attachment #1: Type: text/plain, Size: 1691 bytes --]

On Fri, Nov 04, 2022 at 10:03:13AM +0100, Jakub Jelinek via Gcc-patches wrote:
> Hi!
> 
> The following pseudo-patch (for uname2c.h part
> just a pseudo patch with a lot of changes replaced with ...
> because it is too large but the important changes like
> -static const char uname2c_dict[59418] =
> +static const char uname2c_dict[59891] =
> -static const unsigned char uname2c_tree[208765] = {
> +static const unsigned char uname2c_tree[210697] = {
> are shown, full patch xz compressed will be posted separately
> due to mail limit) regenerates the libcpp tables with Unicode 15.0.0
> which added 4489 new characters.
> 
> As mentioned previously, this isn't just a matter of running the
> two libcpp/make*.cc programs on the new Unicode files, but one needs
> to manually update a table inside of makeuname2c.cc according to
> a table in Unicode text (which is partially reflected in the text
> files, but e.g. in Unicode 14.0.0 not 100% accurately, in 15.0.0
> actually accurately).
> I've also added some randomly chosen subset of those 4489 new
> characters to a testcase.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

Hi Jakub-

In addition to these files you updated last year for Unicode 15, we also need
to update generated_cpp_wcwidth.h, which implements cpp_wcwidth() for
diagnostics so we can output correct column numbers. There is a procedure
outlined in the file contrib/unicode/README that accomplishes this. Is it OK
to push the attached patch (gzipped since it is large and uninformative),
which is the result of following the procedure? It went straightforwardly as
expected, and bootstrap+regtest on x86-64 Linux is clean. Thanks!

-Lewis

[-- Attachment #2: unicode_15_wcwidth-0.txt --]
[-- Type: text/plain, Size: 521 bytes --]

[PATCH] libcpp: Update cpp_wcwidth() to Unicode 15

Updates cpp_wcwidth() to Unicode 15, following the procedure in
contrib/unicode/README mechanically without incident.

contrib/ChangeLog:

	* unicode/DerivedCoreProperties.txt: Update to Unicode 15.
	* unicode/DerivedNormalizationProps.txt: Likewise.
	* unicode/EastAsianWidth.txt: Likwise.
	* unicode/PropList.txt: Likewise.
	* unicode/README: Likewise.
	* unicode/UnicodeData.txt: Likewise.

libcpp/ChangeLog:

	* generated_cpp_wcwidth.h: Regenerated for Unicode 15.

[-- Attachment #3: unicode_15_wcwidth-1.txt.gz --]
[-- Type: application/x-gunzip, Size: 25776 bytes --]

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] libcpp: Update to Unicode 15
  2022-11-04  9:03 Jakub Jelinek
@ 2022-11-04  9:05 ` Jakub Jelinek
  2023-03-09 23:27 ` Lewis Hyatt
  1 sibling, 0 replies; 6+ messages in thread
From: Jakub Jelinek @ 2022-11-04  9:05 UTC (permalink / raw)
  To: Jason Merrill, Joseph S. Myers, Marek Polacek; +Cc: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 50 bytes --]

And here is the full xz compressed patch.

	Jakub

[-- Attachment #2: R638.xz --]
[-- Type: application/x-xz, Size: 293200 bytes --]

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH] libcpp: Update to Unicode 15
@ 2022-11-04  9:03 Jakub Jelinek
  2022-11-04  9:05 ` Jakub Jelinek
  2023-03-09 23:27 ` Lewis Hyatt
  0 siblings, 2 replies; 6+ messages in thread
From: Jakub Jelinek @ 2022-11-04  9:03 UTC (permalink / raw)
  To: gcc-patches

Hi!

The following pseudo-patch (for uname2c.h part
just a pseudo patch with a lot of changes replaced with ...
because it is too large but the important changes like
-static const char uname2c_dict[59418] =
+static const char uname2c_dict[59891] =
-static const unsigned char uname2c_tree[208765] = {
+static const unsigned char uname2c_tree[210697] = {
are shown, full patch xz compressed will be posted separately
due to mail limit) regenerates the libcpp tables with Unicode 15.0.0
which added 4489 new characters.

As mentioned previously, this isn't just a matter of running the
two libcpp/make*.cc programs on the new Unicode files, but one needs
to manually update a table inside of makeuname2c.cc according to
a table in Unicode text (which is partially reflected in the text
files, but e.g. in Unicode 14.0.0 not 100% accurately, in 15.0.0
actually accurately).
I've also added some randomly chosen subset of those 4489 new
characters to a testcase.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2022-11-04  Jakub Jelinek  <jakub@redhat.com>

gcc/testsuite/
	* c-c++-common/cpp/named-universal-char-escape-1.c: Add tests for some
	characters newly added in Unicode 15.0.0.
libcpp/
	* makeuname2c.cc (struct generated): Update from Unicode 15.0.0
	table 4-8.
	* ucnid.h: Regenerated for Unicode 15.0.0.
	* uname2c.h: Likewise.

--- gcc/testsuite/c-c++-common/cpp/named-universal-char-escape-1.c.jj	2022-08-27 23:01:28.319565957 +0200
+++ gcc/testsuite/c-c++-common/cpp/named-universal-char-escape-1.c	2022-11-04 09:41:45.908527440 +0100
@@ -117,6 +117,27 @@ typedef __CHAR32_TYPE__ char32_t;
     || U'\u0FD0' != U'\N{TIBETAN MARK BSKA- SHOG GI MGO RGYAN}' \
     || U'\uFE18' != U'\N{PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRAKCET}' \
     || U'\uFE18' != U'\N{PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRACKET}' \
+    || U'\u0CF3' != U'\N{KANNADA SIGN COMBINING ANUSVARA ABOVE RIGHT}' \
+    || U'\u0ECE' != U'\N{LAO YAMAKKAN}' \
+    || U'\U00010EFE' != U'\N{ARABIC SMALL LOW WORD QASR}' \
+    || U'\U00011241' != U'\N{KHOJKI VOWEL SIGN VOCALIC R}' \
+    || U'\U00011B06' != U'\N{DEVANAGARI SIGN WESTERN FIVE-LIKE BHALE}' \
+    || U'\U00011F0B' != U'\N{KAWI LETTER VOCALIC RR}' \
+    || U'\U0001342F' != U'\N{EGYPTIAN HIEROGLYPH V011D}' \
+    || U'\U00013451' != U'\N{EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT START AND BOTTOM}' \
+    || U'\U0001B132' != U'\N{HIRAGANA LETTER SMALL KO}' \
+    || U'\U0001B155' != U'\N{KATAKANA LETTER SMALL KO}' \
+    || U'\U0001D2C4' != U'\N{KAKTOVIK NUMERAL FOUR}' \
+    || U'\U0001DF27' != U'\N{LATIN SMALL LETTER N WITH MID-HEIGHT LEFT HOOK}' \
+    || U'\U0001E036' != U'\N{MODIFIER LETTER CYRILLIC SMALL ZHE}' \
+    || U'\U0001E05B' != U'\N{CYRILLIC SUBSCRIPT SMALL LETTER EL}' \
+    || U'\U0001E4E5' != U'\N{NAG MUNDARI LETTER ENN}' \
+    || U'\U0001F6DC' != U'\N{WIRELESS}' \
+    || U'\U0001F77E' != U'\N{QUAOAR}' \
+    || U'\U0001F7D9' != U'\N{NINE POINTED WHITE STAR}' \
+    || U'\U0001FA76' != U'\N{GREY HEART}' \
+    || U'\U0001FA88' != U'\N{FLUTE}' \
+    || U'\U0001FABC' != U'\N{JELLYFISH}' \
     || U'\uAC00' != U'\N{HANGUL SYLLABLE GA}' \
     || U'\uAC02' != U'\N{HANGUL SYLLABLE GAGG}' \
     || U'\uAD8D' != U'\N{HANGUL SYLLABLE GWEONJ}' \
@@ -134,6 +155,7 @@ typedef __CHAR32_TYPE__ char32_t;
     || U'\U0002A6DD' != U'\N{CJK UNIFIED IDEOGRAPH-2A6DD}' \
     || U'\U00020700' != U'\N{CJK UNIFIED IDEOGRAPH-20700}' \
     || U'\U0002B734' != U'\N{CJK UNIFIED IDEOGRAPH-2B734}' \
+    || U'\U0002B739' != U'\N{CJK UNIFIED IDEOGRAPH-2B739}' \
     || U'\U0002B740' != U'\N{CJK UNIFIED IDEOGRAPH-2B740}' \
     || U'\U0002B81D' != U'\N{CJK UNIFIED IDEOGRAPH-2B81D}' \
     || U'\U0002B820' != U'\N{CJK UNIFIED IDEOGRAPH-2B820}' \
@@ -142,6 +164,8 @@ typedef __CHAR32_TYPE__ char32_t;
     || U'\U0002EBE0' != U'\N{CJK UNIFIED IDEOGRAPH-2EBE0}' \
     || U'\U00030000' != U'\N{CJK UNIFIED IDEOGRAPH-30000}' \
     || U'\U0003134A' != U'\N{CJK UNIFIED IDEOGRAPH-3134A}' \
+    || U'\U00031350' != U'\N{CJK UNIFIED IDEOGRAPH-31350}' \
+    || U'\U000323AF' != U'\N{CJK UNIFIED IDEOGRAPH-323AF}' \
     || U'\U00017000' != U'\N{TANGUT IDEOGRAPH-17000}' \
     || U'\U000187F7' != U'\N{TANGUT IDEOGRAPH-187F7}' \
     || U'\U00018D00' != U'\N{TANGUT IDEOGRAPH-18D00}' \
--- libcpp/makeuname2c.cc.jj	2022-08-31 10:22:33.439166029 +0200
+++ libcpp/makeuname2c.cc	2022-11-03 10:38:03.341964913 +0100
@@ -69,7 +69,7 @@ struct entry { const char *name; unsigne
 static struct entry *entries;
 static unsigned long num_allocated, num_entries;
 
-/* Unicode 14 Table 4-8.  */
+/* Unicode 15 Table 4-8.  */
 struct generated {
   const char *prefix;
   /* max_high is a workaround for UnicodeData.txt inconsistencies
@@ -81,13 +81,14 @@ struct generated {
 static struct generated generated_ranges[] =
 { { "HANGUL SYLLABLE ", 0xac00, 0xd7a3, 0, 0, 0 }, /* NR1 rule */
   { "CJK UNIFIED IDEOGRAPH-", 0x3400, 0x4dbf, 0, 1, 0 }, /* NR2 rules */
-  { "CJK UNIFIED IDEOGRAPH-", 0x4e00, 0x9ffc, 0x9fff, 1, 0 },
-  { "CJK UNIFIED IDEOGRAPH-", 0x20000, 0x2a6dd, 0x2a6df, 1, 0 },
-  { "CJK UNIFIED IDEOGRAPH-", 0x2a700, 0x2b734, 0x2b738, 1, 0 },
+  { "CJK UNIFIED IDEOGRAPH-", 0x4e00, 0x9fff, 0, 1, 0 },
+  { "CJK UNIFIED IDEOGRAPH-", 0x20000, 0x2a6df, 0, 1, 0 },
+  { "CJK UNIFIED IDEOGRAPH-", 0x2a700, 0x2b739, 0, 1, 0 },
   { "CJK UNIFIED IDEOGRAPH-", 0x2b740, 0x2b81d, 0, 1, 0 },
   { "CJK UNIFIED IDEOGRAPH-", 0x2b820, 0x2cea1, 0, 1, 0 },
   { "CJK UNIFIED IDEOGRAPH-", 0x2ceb0, 0x2ebe0, 0, 1, 0 },
   { "CJK UNIFIED IDEOGRAPH-", 0x30000, 0x3134a, 0, 1, 0 },
+  { "CJK UNIFIED IDEOGRAPH-", 0x31350, 0x323af, 0, 1, 0 },
   { "TANGUT IDEOGRAPH-", 0x17000, 0x187f7, 0, 2, 0 },
   { "TANGUT IDEOGRAPH-", 0x18d00, 0x18d08, 0, 2, 0 },
   { "KHITAN SMALL SCRIPT CHARACTER-", 0x18b00, 0x18cd5, 0, 3, 0 },
--- libcpp/ucnid.h.jj	2022-06-29 11:43:36.163905476 +0200
+++ libcpp/ucnid.h	2022-11-03 10:39:44.997575818 +0100
@@ -766,6 +766,7 @@ static const struct ucnrange ucnranges[]
 { C99|N99|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0,   0, 0x0cef },
 {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x0cf0 },
 {   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x0cf2 },
+{   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0,   0, 0x0cf3 },
 {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x0cff },
 {   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0,   0, 0x0d01 },
 { C99|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0,   0, 0x0d03 },
@@ -889,6 +890,7 @@ static const struct ucnrange ucnranges[]
 {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x0ec7 },
 { C99|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0, 122, 0x0ecb },
 { C99|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0,   0, 0x0ecd },
+{   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0,   0, 0x0ece },
 {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x0ecf },
 { C99|N99|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0,   0, 0x0ed9 },
 {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x0edb },
@@ -2085,7 +2087,8 @@ static const struct ucnrange ucnranges[]
 {   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0, 230, 0x10eac },
 {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x10eaf },
 {   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x10eb1 },
-{   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x10eff },
+{   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x10efc },
+{   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0, 220, 0x10eff },
 {   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x10f1c },
 {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x10f26 },
 {   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x10f27 },
@@ -2180,6 +2183,8 @@ static const struct ucnrange ucnranges[]
 {   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0,   0, 0x11237 },
 {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x1123d },
 {   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0,   0, 0x1123e },
+{   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x11240 },
+{   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0,   0, 0x11241 },
 {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x1127f },
 {   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x11286 },
 {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x11287 },
@@ -2420,6 +2425,19 @@ static const struct ucnrange ucnranges[]
 {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x11edf },
 {   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x11ef2 },
 {   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0,   0, 0x11ef6 },
+{   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x11eff },
+{   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0,   0, 0x11f01 },
+{   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x11f02 },
+{   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0,   0, 0x11f03 },
+{   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x11f10 },
+{   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x11f11 },
+{   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x11f33 },
+{   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0,   0, 0x11f3a },
+{   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x11f3d },
+{   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0,   0, 0x11f40 },
+{   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0,   9, 0x11f42 },
+{   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x11f4f },
+{   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0,   0, 0x11f59 },
 {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x11faf },
 {   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x11fb0 },
 {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x11fff },
@@ -2431,7 +2449,11 @@ static const struct ucnrange ucnranges[]
 {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x12f8f },
 {   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x12ff0 },
 {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x12fff },
-{   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x1342e },
+{   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x1342f },
+{   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x1343f },
+{   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0,   0, 0x13440 },
+{   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x13446 },
+{   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0,   0, 0x13455 },
 {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x143ff },
 {   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x14646 },
 {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x167ff },
@@ -2491,8 +2513,12 @@ static const struct ucnrange ucnranges[]
 {   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x1affe },
 {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x1afff },
 {   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x1b122 },
+{   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x1b131 },
+{   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x1b132 },
 {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x1b14f },
 {   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x1b152 },
+{   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x1b154 },
+{   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x1b155 },
 {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x1b163 },
 {   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x1b167 },
 {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x1b16f },
@@ -2606,6 +2632,8 @@ static const struct ucnrange ucnranges[]
 {   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0,   0, 0x1daaf },
 {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x1deff },
 {   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x1df1e },
+{   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x1df24 },
+{   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x1df2a },
 {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x1dfff },
 {   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0, 230, 0x1e006 },
 {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x1e007 },
@@ -2616,6 +2644,10 @@ static const struct ucnrange ucnranges[]
 {   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0, 230, 0x1e024 },
 {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x1e025 },
 {   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0, 230, 0x1e02a },
+{   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x1e02f },
+{   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|  0|  0,   0, 0x1e06d },
+{   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x1e08e },
+{   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0, 230, 0x1e08f },
 {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x1e0ff },
 {   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x1e12c },
 {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x1e12f },
@@ -2632,6 +2664,12 @@ static const struct ucnrange ucnranges[]
 {   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x1e2eb },
 {   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0, 230, 0x1e2ef },
 {   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0,   0, 0x1e2f9 },
+{   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x1e4cf },
+{   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x1e4eb },
+{   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0, 232, 0x1e4ed },
+{   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0, 220, 0x1e4ee },
+{   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0, 230, 0x1e4ef },
+{   0|  0|  0|C11|  0|CXX23|NXX23|CID|NFC|NKC|  0,   0, 0x1e4f9 },
 {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x1e7df },
 {   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x1e7e6 },
 {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x1e7e7 },
@@ -2741,7 +2779,7 @@ static const struct ucnrange ucnranges[]
 {   0|  0|  0|  0|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x1ffff },
 {   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x2a6df },
 {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x2a6ff },
-{   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x2b738 },
+{   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x2b739 },
 {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x2b73f },
 {   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x2b81d },
 {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x2b81f },
@@ -2753,6 +2791,8 @@ static const struct ucnrange ucnranges[]
 {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x2fffd },
 {   0|  0|  0|  0|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x2ffff },
 {   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x3134a },
+{   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x3134f },
+{   0|  0|  0|C11|  0|CXX23|    0|CID|NFC|NKC|  0,   0, 0x323af },
 {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x3fffd },
 {   0|  0|  0|  0|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x3ffff },
 {   0|  0|  0|C11|  0|    0|    0|CID|NFC|NKC|  0,   0, 0x4fffd },
--- libcpp/uname2c.h.jj	2022-08-26 09:24:12.133615373 +0200
+++ libcpp/uname2c.h	2022-11-03 10:40:33.840908387 +0100
@@ -52,7 +52,7 @@
    use or other dealings in these Data Files or Software without prior
    written authorization of the copyright holder.  */
 
-static const char uname2c_dict[59418] =
+static const char uname2c_dict[59891] =
 "DIVIDED BY HORIZONTAL BAR AND TOP HALF DIVIDED BY VERTICAL BARUIGHUR KIRGHIZ "
 "YEH WITH HAMZA ABOVE WITH ALEF MAKSURA LANTED EQUAL ABOVE GREATER-THAN ABOVE "
 "SLANTED EQUAL WITH EXCLAMATION MARK WITH LEFT RIGHT ARROW ABOVELANTED EQUAL A"
@@ -77,435 +77,439 @@ static const char uname2c_dict[59418] =
 "IGHTWARDS ARROW ABOVE LEFTWARDS ARROWURNED SWIRL BIRGA WITH DOUBLE ORNAMENTWA"
 "RDS AND LEFTWARDS OPEN CIRCLE ARROWSWO DOTS VERTICALLY BELOW AND SMALL TAHIOU"
 "S FACE WITH SYMBOLS COVERING MOUTHONCAVE-POINTED BLACK RIGHTWARDS ARROWOVER R"
-"IGHTWARDS TRIANGLE-HEADED ARROWOZENGE CONTAINING BLACK SMALL LOZENGE AND LOWE"
-"R HALF INVERSE MEDIUM SHADE WITH HORIZONTAL MIDDLE BLACK STRIPEDOUBLE-LINE EQ"
-"UAL ABOVE GREATER-THANGREATER-THAN ABOVE DOUBLE-LINE EQUALIGHT TORTOISE SHELL"
-" BRACKET ORNAMENTLEFT TORTOISE SHELL BRACKET ORNAMENTOR LOWER RIGHT CURLY BRA"
-"CKET SECTIONOVER LEFTWARDS TRIANGLE-HEADED ARROWPART BETWEEN MIDDLE AND RING "
-"FINGERSSINGLE COMMA QUOTATION MARK ORNAMENTSMALL ARABIC LETTER TAH AND TWO DO"
-"TSTURNED COMMA QUOTATION MARK ORNAMENTBESIDE AND JOINED WITH INTERSECTIONBOTT"
-"OM-LIGHTED RIGHTWARDS ARROWHEADGVEDIC KASHMIRI INDEPENDENT SVARITAND UPPER AN"
-"D LOWER ONE EIGHTH BLOCKOR LOWER LEFT CURLY BRACKET SECTIONORNER ARROWS CIRCL"
-"ING ANTICLOCKWISEOUNDED HIGH STOP WITH FILLED CENTRERIGHT-POINTING ANGLE QUOT"
-"ATION MARKTWO HORIZONTAL STROKES TO THE RIGHT TWO DOTS OVER ONE DOT PUNCTUATI"
-"ONDOWNWARDS ARROW WITH TIP LEFTWARDSIGHUR KAZAKH KIRGHIZ ALEF MAKSURA MODIFIE"
-"R LETTER LABIALIZATION MARKOVER IGI SHIR OVER SHIR UD OVER UDOVER TAB NI OVER"
...
+"IGHTWARDS TRIANGLE-HEADED ARROWOZENGE CONTAINING BLACK SMALL LOZENGESMALL LET"
+"TER BYELORUSSIAN-UKRAINIAN I AND LOWER HALF INVERSE MEDIUM SHADE WITH HORIZON"
+"TAL MIDDLE BLACK STRIPEDOUBLE-LINE EQUAL ABOVE GREATER-THANGREATER-THAN ABOVE"
+" DOUBLE-LINE EQUALIGHT TORTOISE SHELL BRACKET ORNAMENTLEFT TORTOISE SHELL BRA"
+"CKET ORNAMENTOR LOWER RIGHT CURLY BRACKET SECTIONOVER LEFTWARDS TRIANGLE-HEAD"
+"ED ARROWPART BETWEEN MIDDLE AND RING FINGERSSINGLE COMMA QUOTATION MARK ORNAM"
+"ENTSMALL ARABIC LETTER TAH AND TWO DOTSTURNED COMMA QUOTATION MARK ORNAMENTBE"
+"SIDE AND JOINED WITH INTERSECTIONBOTTOM-LIGHTED RIGHTWARDS ARROWHEADGVEDIC KA"
+"SHMIRI INDEPENDENT SVARITAND UPPER AND LOWER ONE EIGHTH BLOCKOR LOWER LEFT CU"
+"RLY BRACKET SECTIONORNER ARROWS CIRCLING ANTICLOCKWISEOUNDED HIGH STOP WITH F"
+"ILLED CENTRERIGHT-POINTING ANGLE QUOTATION MARKTWO HORIZONTAL STROKES TO THE "
+"RIGHT TWO DOTS OVER ONE DOT PUNCTUATIONDOWNWARDS ARROW WITH TIP LEFTWARDSIGHU"
+"R KAZAKH KIRGHIZ ALEF MAKSURA MODIFIER LETTER LABIALIZATION MARKOVER IGI SHIR"
...
+"U5-05505575B66-06206D77 O72C8 A8 I8F09819E3A7AAEFAL2ALKAUJAWXAZUB57B89BIBBXGC"
+"A9CAHCAICIGCWICYAD42D70DA2DE6DIBDJADZEE80EEGEIEEYKEZHF14F8CFAJFLYFOMFUEHAQHOJ"
+"HOXI-IIMNIWRJAHJEUJHAK00KUEKUGLFALK LULNIIOAYOIXPOQPUQQ00QARQIFQIGQOFQOTQUFSI"
+"ISUUTJETUJUDYUEZUMXUOPUQAVAUVNOVOKVOYVUUWAUWOQX00XAUXEHXWGXWVY00YOTZJEZOOZORZ"
+"UPZZE16171D1F343638394048494B4E6490929599C0D0G0G3G9LXPVVDVWZ0";
 
-static const unsigned char uname2c_tree[208765] = {
-  0xa1, 0xc5, 0xd5, 0x0b, 0xa2, 0xad, 0xdb, 0x0a, 0xa3, 0xcf, 0x8c, 0x09,
-  0xa4, 0xd7, 0xdb, 0x08, 0xa5, 0xd9, 0x94, 0x08, 0xa6, 0xcf, 0x87, 0x08,
-  0xa7, 0xf4, 0xc7, 0x07, 0xa8, 0xbf, 0xfe, 0x06, 0xa9, 0xf9, 0xe9, 0x06,
-  0xaa, 0x86, 0xe4, 0x06, 0xab, 0x93, 0xb6, 0x06, 0xac, 0xd3, 0xb3, 0x05,
-  0xad, 0xa1, 0x8e, 0x04, 0xae, 0xf4, 0xea, 0x03, 0xaf, 0xe6, 0xbf, 0x03,
-  0xb0, 0xb4, 0x93, 0x03, 0x02, 0x7c, 0x00, 0x8d, 0x92, 0x03, 0xb2, 0xb6,
-  0xf1, 0x02, 0xb3, 0xb1, 0xec, 0x01, 0xb4, 0xde, 0x80, 0x01, 0xb5, 0xda,
-  0x74, 0xb6, 0x8a, 0x56, 0xb7, 0xdf, 0x3f, 0xb8, 0xe5, 0x3e, 0xb9, 0xd3,
...
+static const unsigned char uname2c_tree[210697] = {
+  0xa1, 0xb5, 0xe4, 0x0b, 0xa2, 0x9d, 0xea, 0x0a, 0xa3, 0x9d, 0x9a, 0x09,
+  0xa4, 0xd2, 0xe8, 0x08, 0xa5, 0xfd, 0x9f, 0x08, 0xa6, 0xe0, 0x92, 0x08,
+  0xa7, 0xee, 0xd2, 0x07, 0xa8, 0xa2, 0x89, 0x07, 0xa9, 0xdc, 0xf4, 0x06,
+  0xaa, 0xe1, 0xee, 0x06, 0xab, 0x97, 0xbb, 0x06, 0xac, 0xf7, 0xb7, 0x05,
+  0xad, 0xdb, 0x90, 0x04, 0xae, 0xb6, 0xeb, 0x03, 0xaf, 0x9c, 0xc0, 0x03,
+  0xb0, 0xde, 0x93, 0x03, 0x02, 0x7c, 0x00, 0xb1, 0x92, 0x03, 0xb2, 0xd1,
+  0xf1, 0x02, 0xb3, 0xc0, 0xec, 0x01, 0xb4, 0xed, 0x80, 0x01, 0xb5, 0xe9,
+  0x74, 0xb6, 0x99, 0x56, 0xb7, 0xdf, 0x3f, 0xb8, 0xe5, 0x3e, 0xb9, 0xd3,
+  0x10, 0xba, 0x01, 0xff, 0x0a, 0x5d, 0x1b, 0xe9, 0x0f, 0x10, 0xe9, 0x5a,
+  0x80, 0x0c, 0xa5, 0xd1, 0x0b, 0x50, 0xd9, 0x5d, 0x10, 0xf9, 0x01, 0x08,
+  0xa5, 0xbe, 0x06, 0x45, 0xb8, 0xde, 0xdf, 0xf9, 0x41, 0x0a, 0x99, 0x53,
+  0xaa, 0x07, 0x06, 0xb5, 0xd3, 0x23, 0x11, 0x62, 0x57, 0x01, 0xff, 0x4e,
+  0x12, 0x70, 0x44, 0xcf, 0x01, 0x45, 0xdb, 0x80, 0x45, 0xcf, 0x01, 0x06,
...
   0xff, 0x45, 0xe0, 0x03, 0xb4, 0x00, 0x00, 0x44, 0x05, 0x02, 0x9f, 0x29,
-  0x40, 0x53, 0x89, 0x42, 0x6d, 0x20, 0x00, 0x52, 0x8b, 0x4f, 0x6b, 0x20,
-  0x40, 0x45, 0xae, 0xdd, 0x97, 0xfa, 0x01, 0x46, 0xcb, 0x63, 0x00, 0x21,
+  0x40, 0x53, 0xcb, 0x42, 0x6d, 0x20, 0x00, 0x52, 0xdf, 0x4f, 0x6b, 0x20,
+  0x40, 0x45, 0x6c, 0xdf, 0x97, 0xfa, 0x01, 0x46, 0x5f, 0x64, 0x00, 0x21,
   0x40 };
 
 static const cppchar_t uname2c_pairs[] = {
   0xac00, 0xd7a3 /* HANGUL SYLLABLE  */, 0,
   0x3400, 0x4dbf /* CJK UNIFIED IDEOGRAPH- */,
-  0x4e00, 0x9ffc /* CJK UNIFIED IDEOGRAPH- */,
-  0x20000, 0x2a6dd /* CJK UNIFIED IDEOGRAPH- */,
-  0x2a700, 0x2b734 /* CJK UNIFIED IDEOGRAPH- */,
+  0x4e00, 0x9fff /* CJK UNIFIED IDEOGRAPH- */,
+  0x20000, 0x2a6df /* CJK UNIFIED IDEOGRAPH- */,
+  0x2a700, 0x2b739 /* CJK UNIFIED IDEOGRAPH- */,
   0x2b740, 0x2b81d /* CJK UNIFIED IDEOGRAPH- */,
   0x2b820, 0x2cea1 /* CJK UNIFIED IDEOGRAPH- */,
   0x2ceb0, 0x2ebe0 /* CJK UNIFIED IDEOGRAPH- */,
-  0x30000, 0x3134a /* CJK UNIFIED IDEOGRAPH- */, 0,
+  0x30000, 0x3134a /* CJK UNIFIED IDEOGRAPH- */,
+  0x31350, 0x323af /* CJK UNIFIED IDEOGRAPH- */, 0,
   0x17000, 0x187f7 /* TANGUT IDEOGRAPH- */,
   0x18d00, 0x18d08 /* TANGUT IDEOGRAPH- */, 0,
   0x18b00, 0x18cd5 /* KHITAN SMALL SCRIPT CHARACTER- */, 0,
@@ -18247,10 +18415,10 @@ static const cppchar_t uname2c_pairs[] =
 static const unsigned char uname2c_generated[] = {
   0 /* HANGUL SYLLABLE  */,
   3 /* CJK UNIFIED IDEOGRAPH- */,
-  20 /* TANGUT IDEOGRAPH- */,
-  25 /* KHITAN SMALL SCRIPT CHARACTER- */,
-  28 /* NUSHU CHARACTER- */,
-  31 /* CJK COMPATIBILITY IDEOGRAPH- */ };
+  22 /* TANGUT IDEOGRAPH- */,
+  27 /* KHITAN SMALL SCRIPT CHARACTER- */,
+  30 /* NUSHU CHARACTER- */,
+  33 /* CJK COMPATIBILITY IDEOGRAPH- */ };
 
 static const unsigned int uname2c_max_name_len = 88;
 


	Jakub


^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2023-03-11 17:31 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-11-04  8:55 [PATCH] libcpp: Update to Unicode 15 Jakub Jelinek
2022-11-04 13:38 ` Marek Polacek
2022-11-04  9:03 Jakub Jelinek
2022-11-04  9:05 ` Jakub Jelinek
2023-03-09 23:27 ` Lewis Hyatt
2023-03-11 17:31   ` Jeff Law

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).