Hi! The following patch (included in the mail is for uname2c.h part just a pseudo patch with a lot of changes replaced with ... because it is too large but the important changes like -static const char uname2c_dict[59418] = +static const char uname2c_dict[59891] = -static const unsigned char uname2c_tree[208765] = { +static const unsigned char uname2c_tree[210697] = { are shown, full patch xz compressed attached) regenerates the libcpp tables with Unicode 15.0.0 which added 4489 new characters. As mentioned previously, this isn't just a matter of running the two libcpp/make*.cc programs on the new Unicode files, but one needs to manually update a table inside of makeuname2c.cc according to a table in Unicode text (which is partially reflected in the text files, but e.g. in Unicode 14.0.0 not 100% accurately, in 15.0.0 actually accurately). I've also added some randomly chosen subset of those 4489 new characters to a testcase. Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2022-11-04 Jakub Jelinek gcc/testsuite/ * c-c++-common/cpp/named-universal-char-escape-1.c: Add tests for some characters newly added in Unicode 15.0.0. libcpp/ * makeuname2c.cc (struct generated): Update from Unicode 15.0.0 table 4-8. * ucnid.h: Regenerated for Unicode 15.0.0. * uname2c.h: Likewise. --- gcc/testsuite/c-c++-common/cpp/named-universal-char-escape-1.c.jj 2022-08-27 23:01:28.319565957 +0200 +++ gcc/testsuite/c-c++-common/cpp/named-universal-char-escape-1.c 2022-11-04 09:41:45.908527440 +0100 @@ -117,6 +117,27 @@ typedef __CHAR32_TYPE__ char32_t; || U'\u0FD0' != U'\N{TIBETAN MARK BSKA- SHOG GI MGO RGYAN}' \ || U'\uFE18' != U'\N{PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRAKCET}' \ || U'\uFE18' != U'\N{PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRACKET}' \ + || U'\u0CF3' != U'\N{KANNADA SIGN COMBINING ANUSVARA ABOVE RIGHT}' \ + || U'\u0ECE' != U'\N{LAO YAMAKKAN}' \ + || U'\U00010EFE' != U'\N{ARABIC SMALL LOW WORD QASR}' \ + || U'\U00011241' != U'\N{KHOJKI VOWEL SIGN VOCALIC R}' \ + || U'\U00011B06' != U'\N{DEVANAGARI SIGN WESTERN FIVE-LIKE BHALE}' \ + || U'\U00011F0B' != U'\N{KAWI LETTER VOCALIC RR}' \ + || U'\U0001342F' != U'\N{EGYPTIAN HIEROGLYPH V011D}' \ + || U'\U00013451' != U'\N{EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT START AND BOTTOM}' \ + || U'\U0001B132' != U'\N{HIRAGANA LETTER SMALL KO}' \ + || U'\U0001B155' != U'\N{KATAKANA LETTER SMALL KO}' \ + || U'\U0001D2C4' != U'\N{KAKTOVIK NUMERAL FOUR}' \ + || U'\U0001DF27' != U'\N{LATIN SMALL LETTER N WITH MID-HEIGHT LEFT HOOK}' \ + || U'\U0001E036' != U'\N{MODIFIER LETTER CYRILLIC SMALL ZHE}' \ + || U'\U0001E05B' != U'\N{CYRILLIC SUBSCRIPT SMALL LETTER EL}' \ + || U'\U0001E4E5' != U'\N{NAG MUNDARI LETTER ENN}' \ + || U'\U0001F6DC' != U'\N{WIRELESS}' \ + || U'\U0001F77E' != U'\N{QUAOAR}' \ + || U'\U0001F7D9' != U'\N{NINE POINTED WHITE STAR}' \ + || U'\U0001FA76' != U'\N{GREY HEART}' \ + || U'\U0001FA88' != U'\N{FLUTE}' \ + || U'\U0001FABC' != U'\N{JELLYFISH}' \ || U'\uAC00' != U'\N{HANGUL SYLLABLE GA}' \ || U'\uAC02' != U'\N{HANGUL SYLLABLE GAGG}' \ || U'\uAD8D' != U'\N{HANGUL SYLLABLE GWEONJ}' \ @@ -134,6 +155,7 @@ typedef __CHAR32_TYPE__ char32_t; || U'\U0002A6DD' != U'\N{CJK UNIFIED IDEOGRAPH-2A6DD}' \ || U'\U00020700' != U'\N{CJK UNIFIED IDEOGRAPH-20700}' \ || U'\U0002B734' != U'\N{CJK UNIFIED IDEOGRAPH-2B734}' \ + || U'\U0002B739' != U'\N{CJK UNIFIED IDEOGRAPH-2B739}' \ || U'\U0002B740' != U'\N{CJK UNIFIED IDEOGRAPH-2B740}' \ || U'\U0002B81D' != U'\N{CJK UNIFIED IDEOGRAPH-2B81D}' \ || U'\U0002B820' != U'\N{CJK UNIFIED IDEOGRAPH-2B820}' \ @@ -142,6 +164,8 @@ typedef __CHAR32_TYPE__ char32_t; || U'\U0002EBE0' != U'\N{CJK UNIFIED IDEOGRAPH-2EBE0}' \ || U'\U00030000' != U'\N{CJK UNIFIED IDEOGRAPH-30000}' \ || U'\U0003134A' != U'\N{CJK UNIFIED IDEOGRAPH-3134A}' \ + || U'\U00031350' != U'\N{CJK UNIFIED IDEOGRAPH-31350}' \ + || U'\U000323AF' != U'\N{CJK UNIFIED IDEOGRAPH-323AF}' \ || U'\U00017000' != U'\N{TANGUT IDEOGRAPH-17000}' \ || U'\U000187F7' != U'\N{TANGUT IDEOGRAPH-187F7}' \ || U'\U00018D00' != U'\N{TANGUT IDEOGRAPH-18D00}' \ --- libcpp/makeuname2c.cc.jj 2022-08-31 10:22:33.439166029 +0200 +++ libcpp/makeuname2c.cc 2022-11-03 10:38:03.341964913 +0100 @@ -69,7 +69,7 @@ struct entry { const char *name; unsigne static struct entry *entries; static unsigned long num_allocated, num_entries; -/* Unicode 14 Table 4-8. */ +/* Unicode 15 Table 4-8. */ struct generated { const char *prefix; /* max_high is a workaround for UnicodeData.txt inconsistencies @@ -81,13 +81,14 @@ struct generated { static struct generated generated_ranges[] = { { "HANGUL SYLLABLE ", 0xac00, 0xd7a3, 0, 0, 0 }, /* NR1 rule */ { "CJK UNIFIED IDEOGRAPH-", 0x3400, 0x4dbf, 0, 1, 0 }, /* NR2 rules */ - { "CJK UNIFIED IDEOGRAPH-", 0x4e00, 0x9ffc, 0x9fff, 1, 0 }, - { "CJK UNIFIED IDEOGRAPH-", 0x20000, 0x2a6dd, 0x2a6df, 1, 0 }, - { "CJK UNIFIED IDEOGRAPH-", 0x2a700, 0x2b734, 0x2b738, 1, 0 }, + { "CJK UNIFIED IDEOGRAPH-", 0x4e00, 0x9fff, 0, 1, 0 }, + { "CJK UNIFIED IDEOGRAPH-", 0x20000, 0x2a6df, 0, 1, 0 }, + { "CJK UNIFIED IDEOGRAPH-", 0x2a700, 0x2b739, 0, 1, 0 }, { "CJK UNIFIED IDEOGRAPH-", 0x2b740, 0x2b81d, 0, 1, 0 }, { "CJK UNIFIED IDEOGRAPH-", 0x2b820, 0x2cea1, 0, 1, 0 }, { "CJK UNIFIED IDEOGRAPH-", 0x2ceb0, 0x2ebe0, 0, 1, 0 }, { "CJK UNIFIED IDEOGRAPH-", 0x30000, 0x3134a, 0, 1, 0 }, + { "CJK UNIFIED IDEOGRAPH-", 0x31350, 0x323af, 0, 1, 0 }, { "TANGUT IDEOGRAPH-", 0x17000, 0x187f7, 0, 2, 0 }, { "TANGUT IDEOGRAPH-", 0x18d00, 0x18d08, 0, 2, 0 }, { "KHITAN SMALL SCRIPT CHARACTER-", 0x18b00, 0x18cd5, 0, 3, 0 }, --- libcpp/ucnid.h.jj 2022-06-29 11:43:36.163905476 +0200 +++ libcpp/ucnid.h 2022-11-03 10:39:44.997575818 +0100 @@ -766,6 +766,7 @@ static const struct ucnrange ucnranges[] { C99|N99| 0|C11| 0|CXX23|NXX23|CID|NFC|NKC| 0, 0, 0x0cef }, { 0| 0| 0|C11| 0| 0| 0|CID|NFC|NKC| 0, 0, 0x0cf0 }, { 0| 0| 0|C11| 0|CXX23| 0|CID|NFC|NKC| 0, 0, 0x0cf2 }, +{ 0| 0| 0|C11| 0|CXX23|NXX23|CID|NFC|NKC| 0, 0, 0x0cf3 }, { 0| 0| 0|C11| 0| 0| 0|CID|NFC|NKC| 0, 0, 0x0cff }, { 0| 0| 0|C11| 0|CXX23|NXX23|CID|NFC|NKC| 0, 0, 0x0d01 }, { C99| 0| 0|C11| 0|CXX23|NXX23|CID|NFC|NKC| 0, 0, 0x0d03 }, @@ -889,6 +890,7 @@ static const struct ucnrange ucnranges[] { 0| 0| 0|C11| 0| 0| 0|CID|NFC|NKC| 0, 0, 0x0ec7 }, { C99| 0| 0|C11| 0|CXX23|NXX23|CID|NFC|NKC| 0, 122, 0x0ecb }, { C99| 0| 0|C11| 0|CXX23|NXX23|CID|NFC|NKC| 0, 0, 0x0ecd }, +{ 0| 0| 0|C11| 0|CXX23|NXX23|CID|NFC|NKC| 0, 0, 0x0ece }, { 0| 0| 0|C11| 0| 0| 0|CID|NFC|NKC| 0, 0, 0x0ecf }, { C99|N99| 0|C11| 0|CXX23|NXX23|CID|NFC|NKC| 0, 0, 0x0ed9 }, { 0| 0| 0|C11| 0| 0| 0|CID|NFC|NKC| 0, 0, 0x0edb }, @@ -2085,7 +2087,8 @@ static const struct ucnrange ucnranges[] { 0| 0| 0|C11| 0|CXX23|NXX23|CID|NFC|NKC| 0, 230, 0x10eac }, { 0| 0| 0|C11| 0| 0| 0|CID|NFC|NKC| 0, 0, 0x10eaf }, { 0| 0| 0|C11| 0|CXX23| 0|CID|NFC|NKC| 0, 0, 0x10eb1 }, -{ 0| 0| 0|C11| 0| 0| 0|CID|NFC|NKC| 0, 0, 0x10eff }, +{ 0| 0| 0|C11| 0| 0| 0|CID|NFC|NKC| 0, 0, 0x10efc }, +{ 0| 0| 0|C11| 0|CXX23|NXX23|CID|NFC|NKC| 0, 220, 0x10eff }, { 0| 0| 0|C11| 0|CXX23| 0|CID|NFC|NKC| 0, 0, 0x10f1c }, { 0| 0| 0|C11| 0| 0| 0|CID|NFC|NKC| 0, 0, 0x10f26 }, { 0| 0| 0|C11| 0|CXX23| 0|CID|NFC|NKC| 0, 0, 0x10f27 }, @@ -2180,6 +2183,8 @@ static const struct ucnrange ucnranges[] { 0| 0| 0|C11| 0|CXX23|NXX23|CID|NFC|NKC| 0, 0, 0x11237 }, { 0| 0| 0|C11| 0| 0| 0|CID|NFC|NKC| 0, 0, 0x1123d }, { 0| 0| 0|C11| 0|CXX23|NXX23|CID|NFC|NKC| 0, 0, 0x1123e }, +{ 0| 0| 0|C11| 0|CXX23| 0|CID|NFC|NKC| 0, 0, 0x11240 }, +{ 0| 0| 0|C11| 0|CXX23|NXX23|CID|NFC|NKC| 0, 0, 0x11241 }, { 0| 0| 0|C11| 0| 0| 0|CID|NFC|NKC| 0, 0, 0x1127f }, { 0| 0| 0|C11| 0|CXX23| 0|CID|NFC|NKC| 0, 0, 0x11286 }, { 0| 0| 0|C11| 0| 0| 0|CID|NFC|NKC| 0, 0, 0x11287 }, @@ -2420,6 +2425,19 @@ static const struct ucnrange ucnranges[] { 0| 0| 0|C11| 0| 0| 0|CID|NFC|NKC| 0, 0, 0x11edf }, { 0| 0| 0|C11| 0|CXX23| 0|CID|NFC|NKC| 0, 0, 0x11ef2 }, { 0| 0| 0|C11| 0|CXX23|NXX23|CID|NFC|NKC| 0, 0, 0x11ef6 }, +{ 0| 0| 0|C11| 0| 0| 0|CID|NFC|NKC| 0, 0, 0x11eff }, +{ 0| 0| 0|C11| 0|CXX23|NXX23|CID|NFC|NKC| 0, 0, 0x11f01 }, +{ 0| 0| 0|C11| 0|CXX23| 0|CID|NFC|NKC| 0, 0, 0x11f02 }, +{ 0| 0| 0|C11| 0|CXX23|NXX23|CID|NFC|NKC| 0, 0, 0x11f03 }, +{ 0| 0| 0|C11| 0|CXX23| 0|CID|NFC|NKC| 0, 0, 0x11f10 }, +{ 0| 0| 0|C11| 0| 0| 0|CID|NFC|NKC| 0, 0, 0x11f11 }, +{ 0| 0| 0|C11| 0|CXX23| 0|CID|NFC|NKC| 0, 0, 0x11f33 }, +{ 0| 0| 0|C11| 0|CXX23|NXX23|CID|NFC|NKC| 0, 0, 0x11f3a }, +{ 0| 0| 0|C11| 0| 0| 0|CID|NFC|NKC| 0, 0, 0x11f3d }, +{ 0| 0| 0|C11| 0|CXX23|NXX23|CID|NFC|NKC| 0, 0, 0x11f40 }, +{ 0| 0| 0|C11| 0|CXX23|NXX23|CID|NFC|NKC| 0, 9, 0x11f42 }, +{ 0| 0| 0|C11| 0| 0| 0|CID|NFC|NKC| 0, 0, 0x11f4f }, +{ 0| 0| 0|C11| 0|CXX23|NXX23|CID|NFC|NKC| 0, 0, 0x11f59 }, { 0| 0| 0|C11| 0| 0| 0|CID|NFC|NKC| 0, 0, 0x11faf }, { 0| 0| 0|C11| 0|CXX23| 0|CID|NFC|NKC| 0, 0, 0x11fb0 }, { 0| 0| 0|C11| 0| 0| 0|CID|NFC|NKC| 0, 0, 0x11fff }, @@ -2431,7 +2449,11 @@ static const struct ucnrange ucnranges[] { 0| 0| 0|C11| 0| 0| 0|CID|NFC|NKC| 0, 0, 0x12f8f }, { 0| 0| 0|C11| 0|CXX23| 0|CID|NFC|NKC| 0, 0, 0x12ff0 }, { 0| 0| 0|C11| 0| 0| 0|CID|NFC|NKC| 0, 0, 0x12fff }, -{ 0| 0| 0|C11| 0|CXX23| 0|CID|NFC|NKC| 0, 0, 0x1342e }, +{ 0| 0| 0|C11| 0|CXX23| 0|CID|NFC|NKC| 0, 0, 0x1342f }, +{ 0| 0| 0|C11| 0| 0| 0|CID|NFC|NKC| 0, 0, 0x1343f }, +{ 0| 0| 0|C11| 0|CXX23|NXX23|CID|NFC|NKC| 0, 0, 0x13440 }, +{ 0| 0| 0|C11| 0|CXX23| 0|CID|NFC|NKC| 0, 0, 0x13446 }, +{ 0| 0| 0|C11| 0|CXX23|NXX23|CID|NFC|NKC| 0, 0, 0x13455 }, { 0| 0| 0|C11| 0| 0| 0|CID|NFC|NKC| 0, 0, 0x143ff }, { 0| 0| 0|C11| 0|CXX23| 0|CID|NFC|NKC| 0, 0, 0x14646 }, { 0| 0| 0|C11| 0| 0| 0|CID|NFC|NKC| 0, 0, 0x167ff }, @@ -2491,8 +2513,12 @@ static const struct ucnrange ucnranges[] { 0| 0| 0|C11| 0|CXX23| 0|CID|NFC|NKC| 0, 0, 0x1affe }, { 0| 0| 0|C11| 0| 0| 0|CID|NFC|NKC| 0, 0, 0x1afff }, { 0| 0| 0|C11| 0|CXX23| 0|CID|NFC|NKC| 0, 0, 0x1b122 }, +{ 0| 0| 0|C11| 0| 0| 0|CID|NFC|NKC| 0, 0, 0x1b131 }, +{ 0| 0| 0|C11| 0|CXX23| 0|CID|NFC|NKC| 0, 0, 0x1b132 }, { 0| 0| 0|C11| 0| 0| 0|CID|NFC|NKC| 0, 0, 0x1b14f }, { 0| 0| 0|C11| 0|CXX23| 0|CID|NFC|NKC| 0, 0, 0x1b152 }, +{ 0| 0| 0|C11| 0| 0| 0|CID|NFC|NKC| 0, 0, 0x1b154 }, +{ 0| 0| 0|C11| 0|CXX23| 0|CID|NFC|NKC| 0, 0, 0x1b155 }, { 0| 0| 0|C11| 0| 0| 0|CID|NFC|NKC| 0, 0, 0x1b163 }, { 0| 0| 0|C11| 0|CXX23| 0|CID|NFC|NKC| 0, 0, 0x1b167 }, { 0| 0| 0|C11| 0| 0| 0|CID|NFC|NKC| 0, 0, 0x1b16f }, @@ -2606,6 +2632,8 @@ static const struct ucnrange ucnranges[] { 0| 0| 0|C11| 0|CXX23|NXX23|CID|NFC|NKC| 0, 0, 0x1daaf }, { 0| 0| 0|C11| 0| 0| 0|CID|NFC|NKC| 0, 0, 0x1deff }, { 0| 0| 0|C11| 0|CXX23| 0|CID|NFC|NKC| 0, 0, 0x1df1e }, +{ 0| 0| 0|C11| 0| 0| 0|CID|NFC|NKC| 0, 0, 0x1df24 }, +{ 0| 0| 0|C11| 0|CXX23| 0|CID|NFC|NKC| 0, 0, 0x1df2a }, { 0| 0| 0|C11| 0| 0| 0|CID|NFC|NKC| 0, 0, 0x1dfff }, { 0| 0| 0|C11| 0|CXX23|NXX23|CID|NFC|NKC| 0, 230, 0x1e006 }, { 0| 0| 0|C11| 0| 0| 0|CID|NFC|NKC| 0, 0, 0x1e007 }, @@ -2616,6 +2644,10 @@ static const struct ucnrange ucnranges[] { 0| 0| 0|C11| 0|CXX23|NXX23|CID|NFC|NKC| 0, 230, 0x1e024 }, { 0| 0| 0|C11| 0| 0| 0|CID|NFC|NKC| 0, 0, 0x1e025 }, { 0| 0| 0|C11| 0|CXX23|NXX23|CID|NFC|NKC| 0, 230, 0x1e02a }, +{ 0| 0| 0|C11| 0| 0| 0|CID|NFC|NKC| 0, 0, 0x1e02f }, +{ 0| 0| 0|C11| 0|CXX23| 0|CID|NFC| 0| 0, 0, 0x1e06d }, +{ 0| 0| 0|C11| 0| 0| 0|CID|NFC|NKC| 0, 0, 0x1e08e }, +{ 0| 0| 0|C11| 0|CXX23|NXX23|CID|NFC|NKC| 0, 230, 0x1e08f }, { 0| 0| 0|C11| 0| 0| 0|CID|NFC|NKC| 0, 0, 0x1e0ff }, { 0| 0| 0|C11| 0|CXX23| 0|CID|NFC|NKC| 0, 0, 0x1e12c }, { 0| 0| 0|C11| 0| 0| 0|CID|NFC|NKC| 0, 0, 0x1e12f }, @@ -2632,6 +2664,12 @@ static const struct ucnrange ucnranges[] { 0| 0| 0|C11| 0|CXX23| 0|CID|NFC|NKC| 0, 0, 0x1e2eb }, { 0| 0| 0|C11| 0|CXX23|NXX23|CID|NFC|NKC| 0, 230, 0x1e2ef }, { 0| 0| 0|C11| 0|CXX23|NXX23|CID|NFC|NKC| 0, 0, 0x1e2f9 }, +{ 0| 0| 0|C11| 0| 0| 0|CID|NFC|NKC| 0, 0, 0x1e4cf }, +{ 0| 0| 0|C11| 0|CXX23| 0|CID|NFC|NKC| 0, 0, 0x1e4eb }, +{ 0| 0| 0|C11| 0|CXX23|NXX23|CID|NFC|NKC| 0, 232, 0x1e4ed }, +{ 0| 0| 0|C11| 0|CXX23|NXX23|CID|NFC|NKC| 0, 220, 0x1e4ee }, +{ 0| 0| 0|C11| 0|CXX23|NXX23|CID|NFC|NKC| 0, 230, 0x1e4ef }, +{ 0| 0| 0|C11| 0|CXX23|NXX23|CID|NFC|NKC| 0, 0, 0x1e4f9 }, { 0| 0| 0|C11| 0| 0| 0|CID|NFC|NKC| 0, 0, 0x1e7df }, { 0| 0| 0|C11| 0|CXX23| 0|CID|NFC|NKC| 0, 0, 0x1e7e6 }, { 0| 0| 0|C11| 0| 0| 0|CID|NFC|NKC| 0, 0, 0x1e7e7 }, @@ -2741,7 +2779,7 @@ static const struct ucnrange ucnranges[] { 0| 0| 0| 0| 0| 0| 0|CID|NFC|NKC| 0, 0, 0x1ffff }, { 0| 0| 0|C11| 0|CXX23| 0|CID|NFC|NKC| 0, 0, 0x2a6df }, { 0| 0| 0|C11| 0| 0| 0|CID|NFC|NKC| 0, 0, 0x2a6ff }, -{ 0| 0| 0|C11| 0|CXX23| 0|CID|NFC|NKC| 0, 0, 0x2b738 }, +{ 0| 0| 0|C11| 0|CXX23| 0|CID|NFC|NKC| 0, 0, 0x2b739 }, { 0| 0| 0|C11| 0| 0| 0|CID|NFC|NKC| 0, 0, 0x2b73f }, { 0| 0| 0|C11| 0|CXX23| 0|CID|NFC|NKC| 0, 0, 0x2b81d }, { 0| 0| 0|C11| 0| 0| 0|CID|NFC|NKC| 0, 0, 0x2b81f }, @@ -2753,6 +2791,8 @@ static const struct ucnrange ucnranges[] { 0| 0| 0|C11| 0| 0| 0|CID|NFC|NKC| 0, 0, 0x2fffd }, { 0| 0| 0| 0| 0| 0| 0|CID|NFC|NKC| 0, 0, 0x2ffff }, { 0| 0| 0|C11| 0|CXX23| 0|CID|NFC|NKC| 0, 0, 0x3134a }, +{ 0| 0| 0|C11| 0| 0| 0|CID|NFC|NKC| 0, 0, 0x3134f }, +{ 0| 0| 0|C11| 0|CXX23| 0|CID|NFC|NKC| 0, 0, 0x323af }, { 0| 0| 0|C11| 0| 0| 0|CID|NFC|NKC| 0, 0, 0x3fffd }, { 0| 0| 0| 0| 0| 0| 0|CID|NFC|NKC| 0, 0, 0x3ffff }, { 0| 0| 0|C11| 0| 0| 0|CID|NFC|NKC| 0, 0, 0x4fffd }, --- libcpp/uname2c.h.jj 2022-08-26 09:24:12.133615373 +0200 +++ libcpp/uname2c.h 2022-11-03 10:40:33.840908387 +0100 @@ -52,7 +52,7 @@ use or other dealings in these Data Files or Software without prior written authorization of the copyright holder. */ -static const char uname2c_dict[59418] = +static const char uname2c_dict[59891] = "DIVIDED BY HORIZONTAL BAR AND TOP HALF DIVIDED BY VERTICAL BARUIGHUR KIRGHIZ " "YEH WITH HAMZA ABOVE WITH ALEF MAKSURA LANTED EQUAL ABOVE GREATER-THAN ABOVE " "SLANTED EQUAL WITH EXCLAMATION MARK WITH LEFT RIGHT ARROW ABOVELANTED EQUAL A" @@ -77,435 +77,439 @@ static const char uname2c_dict[59418] = "IGHTWARDS ARROW ABOVE LEFTWARDS ARROWURNED SWIRL BIRGA WITH DOUBLE ORNAMENTWA" "RDS AND LEFTWARDS OPEN CIRCLE ARROWSWO DOTS VERTICALLY BELOW AND SMALL TAHIOU" "S FACE WITH SYMBOLS COVERING MOUTHONCAVE-POINTED BLACK RIGHTWARDS ARROWOVER R" -"IGHTWARDS TRIANGLE-HEADED ARROWOZENGE CONTAINING BLACK SMALL LOZENGE AND LOWE" -"R HALF INVERSE MEDIUM SHADE WITH HORIZONTAL MIDDLE BLACK STRIPEDOUBLE-LINE EQ" -"UAL ABOVE GREATER-THANGREATER-THAN ABOVE DOUBLE-LINE EQUALIGHT TORTOISE SHELL" -" BRACKET ORNAMENTLEFT TORTOISE SHELL BRACKET ORNAMENTOR LOWER RIGHT CURLY BRA" -"CKET SECTIONOVER LEFTWARDS TRIANGLE-HEADED ARROWPART BETWEEN MIDDLE AND RING " -"FINGERSSINGLE COMMA QUOTATION MARK ORNAMENTSMALL ARABIC LETTER TAH AND TWO DO" -"TSTURNED COMMA QUOTATION MARK ORNAMENTBESIDE AND JOINED WITH INTERSECTIONBOTT" -"OM-LIGHTED RIGHTWARDS ARROWHEADGVEDIC KASHMIRI INDEPENDENT SVARITAND UPPER AN" -"D LOWER ONE EIGHTH BLOCKOR LOWER LEFT CURLY BRACKET SECTIONORNER ARROWS CIRCL" -"ING ANTICLOCKWISEOUNDED HIGH STOP WITH FILLED CENTRERIGHT-POINTING ANGLE QUOT" -"ATION MARKTWO HORIZONTAL STROKES TO THE RIGHT TWO DOTS OVER ONE DOT PUNCTUATI" -"ONDOWNWARDS ARROW WITH TIP LEFTWARDSIGHUR KAZAKH KIRGHIZ ALEF MAKSURA MODIFIE" -"R LETTER LABIALIZATION MARKOVER IGI SHIR OVER SHIR UD OVER UDOVER TAB NI OVER" ... +"IGHTWARDS TRIANGLE-HEADED ARROWOZENGE CONTAINING BLACK SMALL LOZENGESMALL LET" +"TER BYELORUSSIAN-UKRAINIAN I AND LOWER HALF INVERSE MEDIUM SHADE WITH HORIZON" +"TAL MIDDLE BLACK STRIPEDOUBLE-LINE EQUAL ABOVE GREATER-THANGREATER-THAN ABOVE" +" DOUBLE-LINE EQUALIGHT TORTOISE SHELL BRACKET ORNAMENTLEFT TORTOISE SHELL BRA" +"CKET ORNAMENTOR LOWER RIGHT CURLY BRACKET SECTIONOVER LEFTWARDS TRIANGLE-HEAD" +"ED ARROWPART BETWEEN MIDDLE AND RING FINGERSSINGLE COMMA QUOTATION MARK ORNAM" +"ENTSMALL ARABIC LETTER TAH AND TWO DOTSTURNED COMMA QUOTATION MARK ORNAMENTBE" +"SIDE AND JOINED WITH INTERSECTIONBOTTOM-LIGHTED RIGHTWARDS ARROWHEADGVEDIC KA" +"SHMIRI INDEPENDENT SVARITAND UPPER AND LOWER ONE EIGHTH BLOCKOR LOWER LEFT CU" +"RLY BRACKET SECTIONORNER ARROWS CIRCLING ANTICLOCKWISEOUNDED HIGH STOP WITH F" +"ILLED CENTRERIGHT-POINTING ANGLE QUOTATION MARKTWO HORIZONTAL STROKES TO THE " +"RIGHT TWO DOTS OVER ONE DOT PUNCTUATIONDOWNWARDS ARROW WITH TIP LEFTWARDSIGHU" +"R KAZAKH KIRGHIZ ALEF MAKSURA MODIFIER LETTER LABIALIZATION MARKOVER IGI SHIR" ... +"U5-05505575B66-06206D77 O72C8 A8 I8F09819E3A7AAEFAL2ALKAUJAWXAZUB57B89BIBBXGC" +"A9CAHCAICIGCWICYAD42D70DA2DE6DIBDJADZEE80EEGEIEEYKEZHF14F8CFAJFLYFOMFUEHAQHOJ" +"HOXI-IIMNIWRJAHJEUJHAK00KUEKUGLFALK LULNIIOAYOIXPOQPUQQ00QARQIFQIGQOFQOTQUFSI" +"ISUUTJETUJUDYUEZUMXUOPUQAVAUVNOVOKVOYVUUWAUWOQX00XAUXEHXWGXWVY00YOTZJEZOOZORZ" +"UPZZE16171D1F343638394048494B4E6490929599C0D0G0G3G9LXPVVDVWZ0"; -static const unsigned char uname2c_tree[208765] = { - 0xa1, 0xc5, 0xd5, 0x0b, 0xa2, 0xad, 0xdb, 0x0a, 0xa3, 0xcf, 0x8c, 0x09, - 0xa4, 0xd7, 0xdb, 0x08, 0xa5, 0xd9, 0x94, 0x08, 0xa6, 0xcf, 0x87, 0x08, - 0xa7, 0xf4, 0xc7, 0x07, 0xa8, 0xbf, 0xfe, 0x06, 0xa9, 0xf9, 0xe9, 0x06, - 0xaa, 0x86, 0xe4, 0x06, 0xab, 0x93, 0xb6, 0x06, 0xac, 0xd3, 0xb3, 0x05, - 0xad, 0xa1, 0x8e, 0x04, 0xae, 0xf4, 0xea, 0x03, 0xaf, 0xe6, 0xbf, 0x03, - 0xb0, 0xb4, 0x93, 0x03, 0x02, 0x7c, 0x00, 0x8d, 0x92, 0x03, 0xb2, 0xb6, - 0xf1, 0x02, 0xb3, 0xb1, 0xec, 0x01, 0xb4, 0xde, 0x80, 0x01, 0xb5, 0xda, - 0x74, 0xb6, 0x8a, 0x56, 0xb7, 0xdf, 0x3f, 0xb8, 0xe5, 0x3e, 0xb9, 0xd3, ... +static const unsigned char uname2c_tree[210697] = { + 0xa1, 0xb5, 0xe4, 0x0b, 0xa2, 0x9d, 0xea, 0x0a, 0xa3, 0x9d, 0x9a, 0x09, + 0xa4, 0xd2, 0xe8, 0x08, 0xa5, 0xfd, 0x9f, 0x08, 0xa6, 0xe0, 0x92, 0x08, + 0xa7, 0xee, 0xd2, 0x07, 0xa8, 0xa2, 0x89, 0x07, 0xa9, 0xdc, 0xf4, 0x06, + 0xaa, 0xe1, 0xee, 0x06, 0xab, 0x97, 0xbb, 0x06, 0xac, 0xf7, 0xb7, 0x05, + 0xad, 0xdb, 0x90, 0x04, 0xae, 0xb6, 0xeb, 0x03, 0xaf, 0x9c, 0xc0, 0x03, + 0xb0, 0xde, 0x93, 0x03, 0x02, 0x7c, 0x00, 0xb1, 0x92, 0x03, 0xb2, 0xd1, + 0xf1, 0x02, 0xb3, 0xc0, 0xec, 0x01, 0xb4, 0xed, 0x80, 0x01, 0xb5, 0xe9, + 0x74, 0xb6, 0x99, 0x56, 0xb7, 0xdf, 0x3f, 0xb8, 0xe5, 0x3e, 0xb9, 0xd3, + 0x10, 0xba, 0x01, 0xff, 0x0a, 0x5d, 0x1b, 0xe9, 0x0f, 0x10, 0xe9, 0x5a, + 0x80, 0x0c, 0xa5, 0xd1, 0x0b, 0x50, 0xd9, 0x5d, 0x10, 0xf9, 0x01, 0x08, + 0xa5, 0xbe, 0x06, 0x45, 0xb8, 0xde, 0xdf, 0xf9, 0x41, 0x0a, 0x99, 0x53, + 0xaa, 0x07, 0x06, 0xb5, 0xd3, 0x23, 0x11, 0x62, 0x57, 0x01, 0xff, 0x4e, + 0x12, 0x70, 0x44, 0xcf, 0x01, 0x45, 0xdb, 0x80, 0x45, 0xcf, 0x01, 0x06, ... 0xff, 0x45, 0xe0, 0x03, 0xb4, 0x00, 0x00, 0x44, 0x05, 0x02, 0x9f, 0x29, - 0x40, 0x53, 0x89, 0x42, 0x6d, 0x20, 0x00, 0x52, 0x8b, 0x4f, 0x6b, 0x20, - 0x40, 0x45, 0xae, 0xdd, 0x97, 0xfa, 0x01, 0x46, 0xcb, 0x63, 0x00, 0x21, + 0x40, 0x53, 0xcb, 0x42, 0x6d, 0x20, 0x00, 0x52, 0xdf, 0x4f, 0x6b, 0x20, + 0x40, 0x45, 0x6c, 0xdf, 0x97, 0xfa, 0x01, 0x46, 0x5f, 0x64, 0x00, 0x21, 0x40 }; static const cppchar_t uname2c_pairs[] = { 0xac00, 0xd7a3 /* HANGUL SYLLABLE */, 0, 0x3400, 0x4dbf /* CJK UNIFIED IDEOGRAPH- */, - 0x4e00, 0x9ffc /* CJK UNIFIED IDEOGRAPH- */, - 0x20000, 0x2a6dd /* CJK UNIFIED IDEOGRAPH- */, - 0x2a700, 0x2b734 /* CJK UNIFIED IDEOGRAPH- */, + 0x4e00, 0x9fff /* CJK UNIFIED IDEOGRAPH- */, + 0x20000, 0x2a6df /* CJK UNIFIED IDEOGRAPH- */, + 0x2a700, 0x2b739 /* CJK UNIFIED IDEOGRAPH- */, 0x2b740, 0x2b81d /* CJK UNIFIED IDEOGRAPH- */, 0x2b820, 0x2cea1 /* CJK UNIFIED IDEOGRAPH- */, 0x2ceb0, 0x2ebe0 /* CJK UNIFIED IDEOGRAPH- */, - 0x30000, 0x3134a /* CJK UNIFIED IDEOGRAPH- */, 0, + 0x30000, 0x3134a /* CJK UNIFIED IDEOGRAPH- */, + 0x31350, 0x323af /* CJK UNIFIED IDEOGRAPH- */, 0, 0x17000, 0x187f7 /* TANGUT IDEOGRAPH- */, 0x18d00, 0x18d08 /* TANGUT IDEOGRAPH- */, 0, 0x18b00, 0x18cd5 /* KHITAN SMALL SCRIPT CHARACTER- */, 0, @@ -18247,10 +18415,10 @@ static const cppchar_t uname2c_pairs[] = static const unsigned char uname2c_generated[] = { 0 /* HANGUL SYLLABLE */, 3 /* CJK UNIFIED IDEOGRAPH- */, - 20 /* TANGUT IDEOGRAPH- */, - 25 /* KHITAN SMALL SCRIPT CHARACTER- */, - 28 /* NUSHU CHARACTER- */, - 31 /* CJK COMPATIBILITY IDEOGRAPH- */ }; + 22 /* TANGUT IDEOGRAPH- */, + 27 /* KHITAN SMALL SCRIPT CHARACTER- */, + 30 /* NUSHU CHARACTER- */, + 33 /* CJK COMPATIBILITY IDEOGRAPH- */ }; static const unsigned int uname2c_max_name_len = 88; Jakub