public inbox for libstdc++-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r14-7082] libstdc++: Fix Unicode property detection functions
@ 2024-01-09 23:44 Jonathan Wakely
  0 siblings, 0 replies; only message in thread
From: Jonathan Wakely @ 2024-01-09 23:44 UTC (permalink / raw)
  To: gcc-cvs, libstdc++-cvs

https://gcc.gnu.org/g:ea314ccd625aada7ed8a324ac07cfc3a8aa0f03f

commit r14-7082-gea314ccd625aada7ed8a324ac07cfc3a8aa0f03f
Author: Jonathan Wakely <jwakely@redhat.com>
Date:   Tue Jan 9 14:43:40 2024 +0000

    libstdc++: Fix Unicode property detection functions
    
    Fix some copy & pasted logic in __is_extended_pictographic. This
    function should yield false for the values before the first edge, not
    true. Also add a missing boundary condition check in __incb_property.
    
    Also Fix an off-by-one error in _Utf_iterator::operator++() that would
    make dereferencing a past-the-end iterator undefined (where the intended
    design is that the iterator is always incrementable and dereferenceable,
    for better memory safety).
    
    Also simplify the grapheme view iterator, which still contained some
    remnants of an earlier design I was experimenting with.
    
    Slightly tweak the gen_libstdcxx_unicode_data.py script so that the
    _Gcb_property enumerators are in the order we encounter them in the data
    file, instead of sorting them alphabetically. Start with the "Other"
    property at value 0, because that's the default property for anything
    not in the file. This makes no practical difference, but seems cleaner.
    It causes the values in the __gcb_edges table to change, so can only be
    done now before anybody is using this code yet. The enumerator values
    and table entries become ABI artefacts for the function using them.
    
    contrib/ChangeLog:
    
            * unicode/gen_libstdcxx_unicode_data.py: Print out Gcb_property
            enumerators in the order they're seen, not alphabetical order.
    
    libstdc++-v3/ChangeLog:
    
            * include/bits/unicode-data.h: Regenerate.
            * include/bits/unicode.h (_Utf_iterator::operator++()): Fix off
            by one error.
            (__incb_property): Add missing check for values before the
            first edge.
            (__is_extended_pictographic): Invert return values to fix
            copy&pasted logic.
            (_Grapheme_cluster_view::_Iterator): Remove second iterator
            member and find end of cluster lazily.
            * testsuite/ext/unicode/grapheme_view.cc: New test.
            * testsuite/ext/unicode/properties.cc: New test.
            * testsuite/ext/unicode/view.cc: New test.

Diff:
---
 contrib/unicode/gen_libstdcxx_unicode_data.py      |   5 +-
 libstdc++-v3/include/bits/unicode-data.h           | 596 ++++++++++-----------
 libstdc++-v3/include/bits/unicode.h                |  51 +-
 .../testsuite/ext/unicode/grapheme_view.cc         |  95 ++++
 libstdc++-v3/testsuite/ext/unicode/properties.cc   | 128 +++++
 libstdc++-v3/testsuite/ext/unicode/view.cc         |  30 ++
 6 files changed, 581 insertions(+), 324 deletions(-)

diff --git a/contrib/unicode/gen_libstdcxx_unicode_data.py b/contrib/unicode/gen_libstdcxx_unicode_data.py
index 14491451435..f2f2f8a8ec2 100755
--- a/contrib/unicode/gen_libstdcxx_unicode_data.py
+++ b/contrib/unicode/gen_libstdcxx_unicode_data.py
@@ -122,7 +122,10 @@ for line in open("GraphemeBreakProperty.txt", "r"):
         process_code_points(code_points, grapheme_property.strip())
 
 edges = find_edges(all_code_points)
-gcb_props = {p:i+1 for i,p in enumerate(sorted(set([x[1] for x in edges])))}
+gcb_props = {"Other":0}
+for c, p in edges:
+    if p not in gcb_props:
+        gcb_props[p] = len(gcb_props)
 shift_bits = int(math.ceil(math.log2(len(gcb_props))))
 
 # Enum definition for std::__unicode::_Gcb_property
diff --git a/libstdc++-v3/include/bits/unicode-data.h b/libstdc++-v3/include/bits/unicode-data.h
index c0c7e7d86ff..83968096499 100644
--- a/libstdc++-v3/include/bits/unicode-data.h
+++ b/libstdc++-v3/include/bits/unicode-data.h
@@ -37,20 +37,20 @@
   };
 
   enum class _Gcb_property {
-    _Gcb_CR = 1,
-    _Gcb_Control = 2,
-    _Gcb_Extend = 3,
-    _Gcb_L = 4,
-    _Gcb_LF = 5,
-    _Gcb_LV = 6,
-    _Gcb_LVT = 7,
-    _Gcb_Other = 8,
-    _Gcb_Prepend = 9,
-    _Gcb_Regional_Indicator = 10,
-    _Gcb_SpacingMark = 11,
-    _Gcb_T = 12,
-    _Gcb_V = 13,
-    _Gcb_ZWJ = 14,
+    _Gcb_Other = 0,
+    _Gcb_Control = 1,
+    _Gcb_LF = 2,
+    _Gcb_CR = 3,
+    _Gcb_Extend = 4,
+    _Gcb_Prepend = 5,
+    _Gcb_SpacingMark = 6,
+    _Gcb_L = 7,
+    _Gcb_V = 8,
+    _Gcb_T = 9,
+    _Gcb_ZWJ = 10,
+    _Gcb_LV = 11,
+    _Gcb_LVT = 12,
+    _Gcb_Regional_Indicator = 13,
   };
 
   // Values generated by contrib/unicode/gen_std_format_width.py,
@@ -58,290 +58,290 @@
   // Entries are (code_point << shift_bits) + property.
   inline constexpr int __gcb_shift_bits = 0x4;
   inline constexpr uint32_t __gcb_edges[] = {
-    0x2, 0xa5, 0xb2, 0xd1, 0xe2, 0x208,
-    0x7f2, 0xa08, 0xad2, 0xae8, 0x3003, 0x3708,
-    0x4833, 0x48a8, 0x5913, 0x5be8, 0x5bf3, 0x5c08,
-    0x5c13, 0x5c38, 0x5c43, 0x5c68, 0x5c73, 0x5c88,
-    0x6009, 0x6068, 0x6103, 0x61b8, 0x61c2, 0x61d8,
-    0x64b3, 0x6608, 0x6703, 0x6718, 0x6d63, 0x6dd9,
-    0x6de8, 0x6df3, 0x6e58, 0x6e73, 0x6e98, 0x6ea3,
-    0x6ee8, 0x70f9, 0x7108, 0x7113, 0x7128, 0x7303,
-    0x74b8, 0x7a63, 0x7b18, 0x7eb3, 0x7f48, 0x7fd3,
-    0x7fe8, 0x8163, 0x81a8, 0x81b3, 0x8248, 0x8253,
-    0x8288, 0x8293, 0x82e8, 0x8593, 0x85c8, 0x8909,
-    0x8928, 0x8983, 0x8a08, 0x8ca3, 0x8e29, 0x8e33,
-    0x903b, 0x9048, 0x93a3, 0x93bb, 0x93c3, 0x93d8,
-    0x93eb, 0x9413, 0x949b, 0x94d3, 0x94eb, 0x9508,
-    0x9513, 0x9588, 0x9623, 0x9648, 0x9813, 0x982b,
-    0x9848, 0x9bc3, 0x9bd8, 0x9be3, 0x9bfb, 0x9c13,
-    0x9c58, 0x9c7b, 0x9c98, 0x9cbb, 0x9cd3, 0x9ce8,
-    0x9d73, 0x9d88, 0x9e23, 0x9e48, 0x9fe3, 0x9ff8,
-    0xa013, 0xa03b, 0xa048, 0xa3c3, 0xa3d8, 0xa3eb,
-    0xa413, 0xa438, 0xa473, 0xa498, 0xa4b3, 0xa4e8,
-    0xa513, 0xa528, 0xa703, 0xa728, 0xa753, 0xa768,
-    0xa813, 0xa83b, 0xa848, 0xabc3, 0xabd8, 0xabeb,
-    0xac13, 0xac68, 0xac73, 0xac9b, 0xaca8, 0xacbb,
-    0xacd3, 0xace8, 0xae23, 0xae48, 0xafa3, 0xb008,
-    0xb013, 0xb02b, 0xb048, 0xb3c3, 0xb3d8, 0xb3e3,
-    0xb40b, 0xb413, 0xb458, 0xb47b, 0xb498, 0xb4bb,
-    0xb4d3, 0xb4e8, 0xb553, 0xb588, 0xb623, 0xb648,
-    0xb823, 0xb838, 0xbbe3, 0xbbfb, 0xbc03, 0xbc1b,
-    0xbc38, 0xbc6b, 0xbc98, 0xbcab, 0xbcd3, 0xbce8,
-    0xbd73, 0xbd88, 0xc003, 0xc01b, 0xc043, 0xc058,
-    0xc3c3, 0xc3d8, 0xc3e3, 0xc41b, 0xc458, 0xc463,
-    0xc498, 0xc4a3, 0xc4e8, 0xc553, 0xc578, 0xc623,
-    0xc648, 0xc813, 0xc82b, 0xc848, 0xcbc3, 0xcbd8,
-    0xcbeb, 0xcbf3, 0xcc0b, 0xcc23, 0xcc3b, 0xcc58,
-    0xcc63, 0xcc7b, 0xcc98, 0xccab, 0xccc3, 0xcce8,
-    0xcd53, 0xcd78, 0xce23, 0xce48, 0xcf3b, 0xcf48,
-    0xd003, 0xd02b, 0xd048, 0xd3b3, 0xd3d8, 0xd3e3,
-    0xd3fb, 0xd413, 0xd458, 0xd46b, 0xd498, 0xd4ab,
-    0xd4d3, 0xd4e9, 0xd4f8, 0xd573, 0xd588, 0xd623,
-    0xd648, 0xd813, 0xd82b, 0xd848, 0xdca3, 0xdcb8,
-    0xdcf3, 0xdd0b, 0xdd23, 0xdd58, 0xdd63, 0xdd78,
-    0xdd8b, 0xddf3, 0xde08, 0xdf2b, 0xdf48, 0xe313,
-    0xe328, 0xe33b, 0xe343, 0xe3b8, 0xe473, 0xe4f8,
-    0xeb13, 0xeb28, 0xeb3b, 0xeb43, 0xebd8, 0xec83,
-    0xecf8, 0xf183, 0xf1a8, 0xf353, 0xf368, 0xf373,
-    0xf388, 0xf393, 0xf3a8, 0xf3eb, 0xf408, 0xf713,
-    0xf7fb, 0xf803, 0xf858, 0xf863, 0xf888, 0xf8d3,
-    0xf988, 0xf993, 0xfbd8, 0xfc63, 0xfc78, 0x102d3,
-    0x1031b, 0x10323, 0x10388, 0x10393, 0x103bb, 0x103d3,
-    0x103f8, 0x1056b, 0x10583, 0x105a8, 0x105e3, 0x10618,
-    0x10713, 0x10758, 0x10823, 0x10838, 0x1084b, 0x10853,
-    0x10878, 0x108d3, 0x108e8, 0x109d3, 0x109e8, 0x11004,
-    0x1160d, 0x11a8c, 0x12008, 0x135d3, 0x13608, 0x17123,
-    0x1715b, 0x17168, 0x17323, 0x1734b, 0x17358, 0x17523,
-    0x17548, 0x17723, 0x17748, 0x17b43, 0x17b6b, 0x17b73,
-    0x17beb, 0x17c63, 0x17c7b, 0x17c93, 0x17d48, 0x17dd3,
-    0x17de8, 0x180b3, 0x180e2, 0x180f3, 0x18108, 0x18853,
-    0x18878, 0x18a93, 0x18aa8, 0x19203, 0x1923b, 0x19273,
-    0x1929b, 0x192c8, 0x1930b, 0x19323, 0x1933b, 0x19393,
-    0x193c8, 0x1a173, 0x1a19b, 0x1a1b3, 0x1a1c8, 0x1a55b,
-    0x1a563, 0x1a57b, 0x1a583, 0x1a5f8, 0x1a603, 0x1a618,
-    0x1a623, 0x1a638, 0x1a653, 0x1a6db, 0x1a733, 0x1a7d8,
-    0x1a7f3, 0x1a808, 0x1ab03, 0x1acf8, 0x1b003, 0x1b04b,
-    0x1b058, 0x1b343, 0x1b3bb, 0x1b3c3, 0x1b3db, 0x1b423,
-    0x1b43b, 0x1b458, 0x1b6b3, 0x1b748, 0x1b803, 0x1b82b,
-    0x1b838, 0x1ba1b, 0x1ba23, 0x1ba6b, 0x1ba83, 0x1baab,
-    0x1bab3, 0x1bae8, 0x1be63, 0x1be7b, 0x1be83, 0x1beab,
-    0x1bed3, 0x1beeb, 0x1bef3, 0x1bf2b, 0x1bf48, 0x1c24b,
-    0x1c2c3, 0x1c34b, 0x1c363, 0x1c388, 0x1cd03, 0x1cd38,
-    0x1cd43, 0x1ce1b, 0x1ce23, 0x1ce98, 0x1ced3, 0x1cee8,
-    0x1cf43, 0x1cf58, 0x1cf7b, 0x1cf83, 0x1cfa8, 0x1dc03,
-    0x1e008, 0x200b2, 0x200c3, 0x200de, 0x200e2, 0x20108,
-    0x20282, 0x202f8, 0x20602, 0x20708, 0x20d03, 0x20f18,
-    0x2cef3, 0x2cf28, 0x2d7f3, 0x2d808, 0x2de03, 0x2e008,
-    0x302a3, 0x30308, 0x30993, 0x309b8, 0xa66f3, 0xa6738,
-    0xa6743, 0xa67e8, 0xa69e3, 0xa6a08, 0xa6f03, 0xa6f28,
-    0xa8023, 0xa8038, 0xa8063, 0xa8078, 0xa80b3, 0xa80c8,
-    0xa823b, 0xa8253, 0xa827b, 0xa8288, 0xa82c3, 0xa82d8,
-    0xa880b, 0xa8828, 0xa8b4b, 0xa8c43, 0xa8c68, 0xa8e03,
-    0xa8f28, 0xa8ff3, 0xa9008, 0xa9263, 0xa92e8, 0xa9473,
-    0xa952b, 0xa9548, 0xa9604, 0xa97d8, 0xa9803, 0xa983b,
-    0xa9848, 0xa9b33, 0xa9b4b, 0xa9b63, 0xa9bab, 0xa9bc3,
-    0xa9beb, 0xa9c18, 0xa9e53, 0xa9e68, 0xaa293, 0xaa2fb,
-    0xaa313, 0xaa33b, 0xaa353, 0xaa378, 0xaa433, 0xaa448,
-    0xaa4c3, 0xaa4db, 0xaa4e8, 0xaa7c3, 0xaa7d8, 0xaab03,
-    0xaab18, 0xaab23, 0xaab58, 0xaab73, 0xaab98, 0xaabe3,
-    0xaac08, 0xaac13, 0xaac28, 0xaaebb, 0xaaec3, 0xaaeeb,
-    0xaaf08, 0xaaf5b, 0xaaf63, 0xaaf78, 0xabe3b, 0xabe53,
-    0xabe6b, 0xabe83, 0xabe9b, 0xabeb8, 0xabecb, 0xabed3,
-    0xabee8, 0xac006, 0xac017, 0xac1c6, 0xac1d7, 0xac386,
-    0xac397, 0xac546, 0xac557, 0xac706, 0xac717, 0xac8c6,
-    0xac8d7, 0xaca86, 0xaca97, 0xacc46, 0xacc57, 0xace06,
-    0xace17, 0xacfc6, 0xacfd7, 0xad186, 0xad197, 0xad346,
-    0xad357, 0xad506, 0xad517, 0xad6c6, 0xad6d7, 0xad886,
-    0xad897, 0xada46, 0xada57, 0xadc06, 0xadc17, 0xaddc6,
-    0xaddd7, 0xadf86, 0xadf97, 0xae146, 0xae157, 0xae306,
-    0xae317, 0xae4c6, 0xae4d7, 0xae686, 0xae697, 0xae846,
-    0xae857, 0xaea06, 0xaea17, 0xaebc6, 0xaebd7, 0xaed86,
-    0xaed97, 0xaef46, 0xaef57, 0xaf106, 0xaf117, 0xaf2c6,
-    0xaf2d7, 0xaf486, 0xaf497, 0xaf646, 0xaf657, 0xaf806,
-    0xaf817, 0xaf9c6, 0xaf9d7, 0xafb86, 0xafb97, 0xafd46,
-    0xafd57, 0xaff06, 0xaff17, 0xb00c6, 0xb00d7, 0xb0286,
-    0xb0297, 0xb0446, 0xb0457, 0xb0606, 0xb0617, 0xb07c6,
-    0xb07d7, 0xb0986, 0xb0997, 0xb0b46, 0xb0b57, 0xb0d06,
-    0xb0d17, 0xb0ec6, 0xb0ed7, 0xb1086, 0xb1097, 0xb1246,
-    0xb1257, 0xb1406, 0xb1417, 0xb15c6, 0xb15d7, 0xb1786,
-    0xb1797, 0xb1946, 0xb1957, 0xb1b06, 0xb1b17, 0xb1cc6,
-    0xb1cd7, 0xb1e86, 0xb1e97, 0xb2046, 0xb2057, 0xb2206,
-    0xb2217, 0xb23c6, 0xb23d7, 0xb2586, 0xb2597, 0xb2746,
-    0xb2757, 0xb2906, 0xb2917, 0xb2ac6, 0xb2ad7, 0xb2c86,
-    0xb2c97, 0xb2e46, 0xb2e57, 0xb3006, 0xb3017, 0xb31c6,
-    0xb31d7, 0xb3386, 0xb3397, 0xb3546, 0xb3557, 0xb3706,
-    0xb3717, 0xb38c6, 0xb38d7, 0xb3a86, 0xb3a97, 0xb3c46,
-    0xb3c57, 0xb3e06, 0xb3e17, 0xb3fc6, 0xb3fd7, 0xb4186,
-    0xb4197, 0xb4346, 0xb4357, 0xb4506, 0xb4517, 0xb46c6,
-    0xb46d7, 0xb4886, 0xb4897, 0xb4a46, 0xb4a57, 0xb4c06,
-    0xb4c17, 0xb4dc6, 0xb4dd7, 0xb4f86, 0xb4f97, 0xb5146,
-    0xb5157, 0xb5306, 0xb5317, 0xb54c6, 0xb54d7, 0xb5686,
-    0xb5697, 0xb5846, 0xb5857, 0xb5a06, 0xb5a17, 0xb5bc6,
-    0xb5bd7, 0xb5d86, 0xb5d97, 0xb5f46, 0xb5f57, 0xb6106,
-    0xb6117, 0xb62c6, 0xb62d7, 0xb6486, 0xb6497, 0xb6646,
-    0xb6657, 0xb6806, 0xb6817, 0xb69c6, 0xb69d7, 0xb6b86,
-    0xb6b97, 0xb6d46, 0xb6d57, 0xb6f06, 0xb6f17, 0xb70c6,
-    0xb70d7, 0xb7286, 0xb7297, 0xb7446, 0xb7457, 0xb7606,
-    0xb7617, 0xb77c6, 0xb77d7, 0xb7986, 0xb7997, 0xb7b46,
-    0xb7b57, 0xb7d06, 0xb7d17, 0xb7ec6, 0xb7ed7, 0xb8086,
-    0xb8097, 0xb8246, 0xb8257, 0xb8406, 0xb8417, 0xb85c6,
-    0xb85d7, 0xb8786, 0xb8797, 0xb8946, 0xb8957, 0xb8b06,
-    0xb8b17, 0xb8cc6, 0xb8cd7, 0xb8e86, 0xb8e97, 0xb9046,
-    0xb9057, 0xb9206, 0xb9217, 0xb93c6, 0xb93d7, 0xb9586,
-    0xb9597, 0xb9746, 0xb9757, 0xb9906, 0xb9917, 0xb9ac6,
-    0xb9ad7, 0xb9c86, 0xb9c97, 0xb9e46, 0xb9e57, 0xba006,
-    0xba017, 0xba1c6, 0xba1d7, 0xba386, 0xba397, 0xba546,
-    0xba557, 0xba706, 0xba717, 0xba8c6, 0xba8d7, 0xbaa86,
-    0xbaa97, 0xbac46, 0xbac57, 0xbae06, 0xbae17, 0xbafc6,
-    0xbafd7, 0xbb186, 0xbb197, 0xbb346, 0xbb357, 0xbb506,
-    0xbb517, 0xbb6c6, 0xbb6d7, 0xbb886, 0xbb897, 0xbba46,
-    0xbba57, 0xbbc06, 0xbbc17, 0xbbdc6, 0xbbdd7, 0xbbf86,
-    0xbbf97, 0xbc146, 0xbc157, 0xbc306, 0xbc317, 0xbc4c6,
-    0xbc4d7, 0xbc686, 0xbc697, 0xbc846, 0xbc857, 0xbca06,
-    0xbca17, 0xbcbc6, 0xbcbd7, 0xbcd86, 0xbcd97, 0xbcf46,
-    0xbcf57, 0xbd106, 0xbd117, 0xbd2c6, 0xbd2d7, 0xbd486,
-    0xbd497, 0xbd646, 0xbd657, 0xbd806, 0xbd817, 0xbd9c6,
-    0xbd9d7, 0xbdb86, 0xbdb97, 0xbdd46, 0xbdd57, 0xbdf06,
-    0xbdf17, 0xbe0c6, 0xbe0d7, 0xbe286, 0xbe297, 0xbe446,
-    0xbe457, 0xbe606, 0xbe617, 0xbe7c6, 0xbe7d7, 0xbe986,
-    0xbe997, 0xbeb46, 0xbeb57, 0xbed06, 0xbed17, 0xbeec6,
-    0xbeed7, 0xbf086, 0xbf097, 0xbf246, 0xbf257, 0xbf406,
-    0xbf417, 0xbf5c6, 0xbf5d7, 0xbf786, 0xbf797, 0xbf946,
-    0xbf957, 0xbfb06, 0xbfb17, 0xbfcc6, 0xbfcd7, 0xbfe86,
-    0xbfe97, 0xc0046, 0xc0057, 0xc0206, 0xc0217, 0xc03c6,
-    0xc03d7, 0xc0586, 0xc0597, 0xc0746, 0xc0757, 0xc0906,
-    0xc0917, 0xc0ac6, 0xc0ad7, 0xc0c86, 0xc0c97, 0xc0e46,
-    0xc0e57, 0xc1006, 0xc1017, 0xc11c6, 0xc11d7, 0xc1386,
-    0xc1397, 0xc1546, 0xc1557, 0xc1706, 0xc1717, 0xc18c6,
-    0xc18d7, 0xc1a86, 0xc1a97, 0xc1c46, 0xc1c57, 0xc1e06,
-    0xc1e17, 0xc1fc6, 0xc1fd7, 0xc2186, 0xc2197, 0xc2346,
-    0xc2357, 0xc2506, 0xc2517, 0xc26c6, 0xc26d7, 0xc2886,
-    0xc2897, 0xc2a46, 0xc2a57, 0xc2c06, 0xc2c17, 0xc2dc6,
-    0xc2dd7, 0xc2f86, 0xc2f97, 0xc3146, 0xc3157, 0xc3306,
-    0xc3317, 0xc34c6, 0xc34d7, 0xc3686, 0xc3697, 0xc3846,
-    0xc3857, 0xc3a06, 0xc3a17, 0xc3bc6, 0xc3bd7, 0xc3d86,
-    0xc3d97, 0xc3f46, 0xc3f57, 0xc4106, 0xc4117, 0xc42c6,
-    0xc42d7, 0xc4486, 0xc4497, 0xc4646, 0xc4657, 0xc4806,
-    0xc4817, 0xc49c6, 0xc49d7, 0xc4b86, 0xc4b97, 0xc4d46,
-    0xc4d57, 0xc4f06, 0xc4f17, 0xc50c6, 0xc50d7, 0xc5286,
-    0xc5297, 0xc5446, 0xc5457, 0xc5606, 0xc5617, 0xc57c6,
-    0xc57d7, 0xc5986, 0xc5997, 0xc5b46, 0xc5b57, 0xc5d06,
-    0xc5d17, 0xc5ec6, 0xc5ed7, 0xc6086, 0xc6097, 0xc6246,
-    0xc6257, 0xc6406, 0xc6417, 0xc65c6, 0xc65d7, 0xc6786,
-    0xc6797, 0xc6946, 0xc6957, 0xc6b06, 0xc6b17, 0xc6cc6,
-    0xc6cd7, 0xc6e86, 0xc6e97, 0xc7046, 0xc7057, 0xc7206,
-    0xc7217, 0xc73c6, 0xc73d7, 0xc7586, 0xc7597, 0xc7746,
-    0xc7757, 0xc7906, 0xc7917, 0xc7ac6, 0xc7ad7, 0xc7c86,
-    0xc7c97, 0xc7e46, 0xc7e57, 0xc8006, 0xc8017, 0xc81c6,
-    0xc81d7, 0xc8386, 0xc8397, 0xc8546, 0xc8557, 0xc8706,
-    0xc8717, 0xc88c6, 0xc88d7, 0xc8a86, 0xc8a97, 0xc8c46,
-    0xc8c57, 0xc8e06, 0xc8e17, 0xc8fc6, 0xc8fd7, 0xc9186,
-    0xc9197, 0xc9346, 0xc9357, 0xc9506, 0xc9517, 0xc96c6,
-    0xc96d7, 0xc9886, 0xc9897, 0xc9a46, 0xc9a57, 0xc9c06,
-    0xc9c17, 0xc9dc6, 0xc9dd7, 0xc9f86, 0xc9f97, 0xca146,
-    0xca157, 0xca306, 0xca317, 0xca4c6, 0xca4d7, 0xca686,
-    0xca697, 0xca846, 0xca857, 0xcaa06, 0xcaa17, 0xcabc6,
-    0xcabd7, 0xcad86, 0xcad97, 0xcaf46, 0xcaf57, 0xcb106,
-    0xcb117, 0xcb2c6, 0xcb2d7, 0xcb486, 0xcb497, 0xcb646,
-    0xcb657, 0xcb806, 0xcb817, 0xcb9c6, 0xcb9d7, 0xcbb86,
-    0xcbb97, 0xcbd46, 0xcbd57, 0xcbf06, 0xcbf17, 0xcc0c6,
-    0xcc0d7, 0xcc286, 0xcc297, 0xcc446, 0xcc457, 0xcc606,
-    0xcc617, 0xcc7c6, 0xcc7d7, 0xcc986, 0xcc997, 0xccb46,
-    0xccb57, 0xccd06, 0xccd17, 0xccec6, 0xcced7, 0xcd086,
-    0xcd097, 0xcd246, 0xcd257, 0xcd406, 0xcd417, 0xcd5c6,
-    0xcd5d7, 0xcd786, 0xcd797, 0xcd946, 0xcd957, 0xcdb06,
-    0xcdb17, 0xcdcc6, 0xcdcd7, 0xcde86, 0xcde97, 0xce046,
-    0xce057, 0xce206, 0xce217, 0xce3c6, 0xce3d7, 0xce586,
-    0xce597, 0xce746, 0xce757, 0xce906, 0xce917, 0xceac6,
-    0xcead7, 0xcec86, 0xcec97, 0xcee46, 0xcee57, 0xcf006,
-    0xcf017, 0xcf1c6, 0xcf1d7, 0xcf386, 0xcf397, 0xcf546,
-    0xcf557, 0xcf706, 0xcf717, 0xcf8c6, 0xcf8d7, 0xcfa86,
-    0xcfa97, 0xcfc46, 0xcfc57, 0xcfe06, 0xcfe17, 0xcffc6,
-    0xcffd7, 0xd0186, 0xd0197, 0xd0346, 0xd0357, 0xd0506,
-    0xd0517, 0xd06c6, 0xd06d7, 0xd0886, 0xd0897, 0xd0a46,
-    0xd0a57, 0xd0c06, 0xd0c17, 0xd0dc6, 0xd0dd7, 0xd0f86,
-    0xd0f97, 0xd1146, 0xd1157, 0xd1306, 0xd1317, 0xd14c6,
-    0xd14d7, 0xd1686, 0xd1697, 0xd1846, 0xd1857, 0xd1a06,
-    0xd1a17, 0xd1bc6, 0xd1bd7, 0xd1d86, 0xd1d97, 0xd1f46,
-    0xd1f57, 0xd2106, 0xd2117, 0xd22c6, 0xd22d7, 0xd2486,
-    0xd2497, 0xd2646, 0xd2657, 0xd2806, 0xd2817, 0xd29c6,
-    0xd29d7, 0xd2b86, 0xd2b97, 0xd2d46, 0xd2d57, 0xd2f06,
-    0xd2f17, 0xd30c6, 0xd30d7, 0xd3286, 0xd3297, 0xd3446,
-    0xd3457, 0xd3606, 0xd3617, 0xd37c6, 0xd37d7, 0xd3986,
-    0xd3997, 0xd3b46, 0xd3b57, 0xd3d06, 0xd3d17, 0xd3ec6,
-    0xd3ed7, 0xd4086, 0xd4097, 0xd4246, 0xd4257, 0xd4406,
-    0xd4417, 0xd45c6, 0xd45d7, 0xd4786, 0xd4797, 0xd4946,
-    0xd4957, 0xd4b06, 0xd4b17, 0xd4cc6, 0xd4cd7, 0xd4e86,
-    0xd4e97, 0xd5046, 0xd5057, 0xd5206, 0xd5217, 0xd53c6,
-    0xd53d7, 0xd5586, 0xd5597, 0xd5746, 0xd5757, 0xd5906,
-    0xd5917, 0xd5ac6, 0xd5ad7, 0xd5c86, 0xd5c97, 0xd5e46,
-    0xd5e57, 0xd6006, 0xd6017, 0xd61c6, 0xd61d7, 0xd6386,
-    0xd6397, 0xd6546, 0xd6557, 0xd6706, 0xd6717, 0xd68c6,
-    0xd68d7, 0xd6a86, 0xd6a97, 0xd6c46, 0xd6c57, 0xd6e06,
-    0xd6e17, 0xd6fc6, 0xd6fd7, 0xd7186, 0xd7197, 0xd7346,
-    0xd7357, 0xd7506, 0xd7517, 0xd76c6, 0xd76d7, 0xd7886,
-    0xd7897, 0xd7a48, 0xd7b0d, 0xd7c78, 0xd7cbc, 0xd7fc8,
-    0xfb1e3, 0xfb1f8, 0xfe003, 0xfe108, 0xfe203, 0xfe308,
-    0xfeff2, 0xff008, 0xff9e3, 0xffa08, 0xfff02, 0xfffc8,
-    0x101fd3, 0x101fe8, 0x102e03, 0x102e18, 0x103763, 0x1037b8,
-    0x10a013, 0x10a048, 0x10a053, 0x10a078, 0x10a0c3, 0x10a108,
-    0x10a383, 0x10a3b8, 0x10a3f3, 0x10a408, 0x10ae53, 0x10ae78,
-    0x10d243, 0x10d288, 0x10eab3, 0x10ead8, 0x10efd3, 0x10f008,
-    0x10f463, 0x10f518, 0x10f823, 0x10f868, 0x11000b, 0x110013,
-    0x11002b, 0x110038, 0x110383, 0x110478, 0x110703, 0x110718,
-    0x110733, 0x110758, 0x1107f3, 0x11082b, 0x110838, 0x110b0b,
-    0x110b33, 0x110b7b, 0x110b93, 0x110bb8, 0x110bd9, 0x110be8,
-    0x110c23, 0x110c38, 0x110cd9, 0x110ce8, 0x111003, 0x111038,
-    0x111273, 0x1112cb, 0x1112d3, 0x111358, 0x11145b, 0x111478,
-    0x111733, 0x111748, 0x111803, 0x11182b, 0x111838, 0x111b3b,
-    0x111b63, 0x111bfb, 0x111c18, 0x111c29, 0x111c48, 0x111c93,
-    0x111cd8, 0x111ceb, 0x111cf3, 0x111d08, 0x1122cb, 0x1122f3,
-    0x11232b, 0x112343, 0x11235b, 0x112363, 0x112388, 0x1123e3,
-    0x1123f8, 0x112413, 0x112428, 0x112df3, 0x112e0b, 0x112e33,
-    0x112eb8, 0x113003, 0x11302b, 0x113048, 0x1133b3, 0x1133d8,
-    0x1133e3, 0x1133fb, 0x113403, 0x11341b, 0x113458, 0x11347b,
-    0x113498, 0x1134bb, 0x1134e8, 0x113573, 0x113588, 0x11362b,
-    0x113648, 0x113663, 0x1136d8, 0x113703, 0x113758, 0x11435b,
-    0x114383, 0x11440b, 0x114423, 0x11445b, 0x114463, 0x114478,
-    0x1145e3, 0x1145f8, 0x114b03, 0x114b1b, 0x114b33, 0x114b9b,
-    0x114ba3, 0x114bbb, 0x114bd3, 0x114beb, 0x114bf3, 0x114c1b,
-    0x114c23, 0x114c48, 0x115af3, 0x115b0b, 0x115b23, 0x115b68,
-    0x115b8b, 0x115bc3, 0x115beb, 0x115bf3, 0x115c18, 0x115dc3,
-    0x115de8, 0x11630b, 0x116333, 0x1163bb, 0x1163d3, 0x1163eb,
-    0x1163f3, 0x116418, 0x116ab3, 0x116acb, 0x116ad3, 0x116aeb,
-    0x116b03, 0x116b6b, 0x116b73, 0x116b88, 0x1171d3, 0x117208,
-    0x117223, 0x11726b, 0x117273, 0x1172c8, 0x1182cb, 0x1182f3,
-    0x11838b, 0x118393, 0x1183b8, 0x119303, 0x11931b, 0x119368,
-    0x11937b, 0x119398, 0x1193b3, 0x1193db, 0x1193e3, 0x1193f9,
-    0x11940b, 0x119419, 0x11942b, 0x119433, 0x119448, 0x119d1b,
-    0x119d43, 0x119d88, 0x119da3, 0x119dcb, 0x119e03, 0x119e18,
-    0x119e4b, 0x119e58, 0x11a013, 0x11a0b8, 0x11a333, 0x11a39b,
-    0x11a3a9, 0x11a3b3, 0x11a3f8, 0x11a473, 0x11a488, 0x11a513,
-    0x11a57b, 0x11a593, 0x11a5c8, 0x11a849, 0x11a8a3, 0x11a97b,
-    0x11a983, 0x11a9a8, 0x11c2fb, 0x11c303, 0x11c378, 0x11c383,
-    0x11c3eb, 0x11c3f3, 0x11c408, 0x11c923, 0x11ca88, 0x11ca9b,
-    0x11caa3, 0x11cb1b, 0x11cb23, 0x11cb4b, 0x11cb53, 0x11cb78,
-    0x11d313, 0x11d378, 0x11d3a3, 0x11d3b8, 0x11d3c3, 0x11d3e8,
-    0x11d3f3, 0x11d469, 0x11d473, 0x11d488, 0x11d8ab, 0x11d8f8,
-    0x11d903, 0x11d928, 0x11d93b, 0x11d953, 0x11d96b, 0x11d973,
-    0x11d988, 0x11ef33, 0x11ef5b, 0x11ef78, 0x11f003, 0x11f029,
-    0x11f03b, 0x11f048, 0x11f34b, 0x11f363, 0x11f3b8, 0x11f3eb,
-    0x11f403, 0x11f41b, 0x11f423, 0x11f438, 0x134302, 0x134403,
-    0x134418, 0x134473, 0x134568, 0x16af03, 0x16af58, 0x16b303,
-    0x16b378, 0x16f4f3, 0x16f508, 0x16f51b, 0x16f888, 0x16f8f3,
-    0x16f938, 0x16fe43, 0x16fe58, 0x16ff0b, 0x16ff28, 0x1bc9d3,
-    0x1bc9f8, 0x1bca02, 0x1bca48, 0x1cf003, 0x1cf2e8, 0x1cf303,
-    0x1cf478, 0x1d1653, 0x1d166b, 0x1d1673, 0x1d16a8, 0x1d16db,
-    0x1d16e3, 0x1d1732, 0x1d17b3, 0x1d1838, 0x1d1853, 0x1d18c8,
-    0x1d1aa3, 0x1d1ae8, 0x1d2423, 0x1d2458, 0x1da003, 0x1da378,
-    0x1da3b3, 0x1da6d8, 0x1da753, 0x1da768, 0x1da843, 0x1da858,
-    0x1da9b3, 0x1daa08, 0x1daa13, 0x1dab08, 0x1e0003, 0x1e0078,
-    0x1e0083, 0x1e0198, 0x1e01b3, 0x1e0228, 0x1e0233, 0x1e0258,
-    0x1e0263, 0x1e02b8, 0x1e08f3, 0x1e0908, 0x1e1303, 0x1e1378,
-    0x1e2ae3, 0x1e2af8, 0x1e2ec3, 0x1e2f08, 0x1e4ec3, 0x1e4f08,
-    0x1e8d03, 0x1e8d78, 0x1e9443, 0x1e94b8, 0x1f1e6a, 0x1f2008,
-    0x1f3fb3, 0x1f4008, 0xe00002, 0xe00203, 0xe00802, 0xe01003,
-    0xe01f02, 0xe10008,
+    0x1, 0xa2, 0xb1, 0xd3, 0xe1, 0x200,
+    0x7f1, 0xa00, 0xad1, 0xae0, 0x3004, 0x3700,
+    0x4834, 0x48a0, 0x5914, 0x5be0, 0x5bf4, 0x5c00,
+    0x5c14, 0x5c30, 0x5c44, 0x5c60, 0x5c74, 0x5c80,
+    0x6005, 0x6060, 0x6104, 0x61b0, 0x61c1, 0x61d0,
+    0x64b4, 0x6600, 0x6704, 0x6710, 0x6d64, 0x6dd5,
+    0x6de0, 0x6df4, 0x6e50, 0x6e74, 0x6e90, 0x6ea4,
+    0x6ee0, 0x70f5, 0x7100, 0x7114, 0x7120, 0x7304,
+    0x74b0, 0x7a64, 0x7b10, 0x7eb4, 0x7f40, 0x7fd4,
+    0x7fe0, 0x8164, 0x81a0, 0x81b4, 0x8240, 0x8254,
+    0x8280, 0x8294, 0x82e0, 0x8594, 0x85c0, 0x8905,
+    0x8920, 0x8984, 0x8a00, 0x8ca4, 0x8e25, 0x8e34,
+    0x9036, 0x9040, 0x93a4, 0x93b6, 0x93c4, 0x93d0,
+    0x93e6, 0x9414, 0x9496, 0x94d4, 0x94e6, 0x9500,
+    0x9514, 0x9580, 0x9624, 0x9640, 0x9814, 0x9826,
+    0x9840, 0x9bc4, 0x9bd0, 0x9be4, 0x9bf6, 0x9c14,
+    0x9c50, 0x9c76, 0x9c90, 0x9cb6, 0x9cd4, 0x9ce0,
+    0x9d74, 0x9d80, 0x9e24, 0x9e40, 0x9fe4, 0x9ff0,
+    0xa014, 0xa036, 0xa040, 0xa3c4, 0xa3d0, 0xa3e6,
+    0xa414, 0xa430, 0xa474, 0xa490, 0xa4b4, 0xa4e0,
+    0xa514, 0xa520, 0xa704, 0xa720, 0xa754, 0xa760,
+    0xa814, 0xa836, 0xa840, 0xabc4, 0xabd0, 0xabe6,
+    0xac14, 0xac60, 0xac74, 0xac96, 0xaca0, 0xacb6,
+    0xacd4, 0xace0, 0xae24, 0xae40, 0xafa4, 0xb000,
+    0xb014, 0xb026, 0xb040, 0xb3c4, 0xb3d0, 0xb3e4,
+    0xb406, 0xb414, 0xb450, 0xb476, 0xb490, 0xb4b6,
+    0xb4d4, 0xb4e0, 0xb554, 0xb580, 0xb624, 0xb640,
+    0xb824, 0xb830, 0xbbe4, 0xbbf6, 0xbc04, 0xbc16,
+    0xbc30, 0xbc66, 0xbc90, 0xbca6, 0xbcd4, 0xbce0,
+    0xbd74, 0xbd80, 0xc004, 0xc016, 0xc044, 0xc050,
+    0xc3c4, 0xc3d0, 0xc3e4, 0xc416, 0xc450, 0xc464,
+    0xc490, 0xc4a4, 0xc4e0, 0xc554, 0xc570, 0xc624,
+    0xc640, 0xc814, 0xc826, 0xc840, 0xcbc4, 0xcbd0,
+    0xcbe6, 0xcbf4, 0xcc06, 0xcc24, 0xcc36, 0xcc50,
+    0xcc64, 0xcc76, 0xcc90, 0xcca6, 0xccc4, 0xcce0,
+    0xcd54, 0xcd70, 0xce24, 0xce40, 0xcf36, 0xcf40,
+    0xd004, 0xd026, 0xd040, 0xd3b4, 0xd3d0, 0xd3e4,
+    0xd3f6, 0xd414, 0xd450, 0xd466, 0xd490, 0xd4a6,
+    0xd4d4, 0xd4e5, 0xd4f0, 0xd574, 0xd580, 0xd624,
+    0xd640, 0xd814, 0xd826, 0xd840, 0xdca4, 0xdcb0,
+    0xdcf4, 0xdd06, 0xdd24, 0xdd50, 0xdd64, 0xdd70,
+    0xdd86, 0xddf4, 0xde00, 0xdf26, 0xdf40, 0xe314,
+    0xe320, 0xe336, 0xe344, 0xe3b0, 0xe474, 0xe4f0,
+    0xeb14, 0xeb20, 0xeb36, 0xeb44, 0xebd0, 0xec84,
+    0xecf0, 0xf184, 0xf1a0, 0xf354, 0xf360, 0xf374,
+    0xf380, 0xf394, 0xf3a0, 0xf3e6, 0xf400, 0xf714,
+    0xf7f6, 0xf804, 0xf850, 0xf864, 0xf880, 0xf8d4,
+    0xf980, 0xf994, 0xfbd0, 0xfc64, 0xfc70, 0x102d4,
+    0x10316, 0x10324, 0x10380, 0x10394, 0x103b6, 0x103d4,
+    0x103f0, 0x10566, 0x10584, 0x105a0, 0x105e4, 0x10610,
+    0x10714, 0x10750, 0x10824, 0x10830, 0x10846, 0x10854,
+    0x10870, 0x108d4, 0x108e0, 0x109d4, 0x109e0, 0x11007,
+    0x11608, 0x11a89, 0x12000, 0x135d4, 0x13600, 0x17124,
+    0x17156, 0x17160, 0x17324, 0x17346, 0x17350, 0x17524,
+    0x17540, 0x17724, 0x17740, 0x17b44, 0x17b66, 0x17b74,
+    0x17be6, 0x17c64, 0x17c76, 0x17c94, 0x17d40, 0x17dd4,
+    0x17de0, 0x180b4, 0x180e1, 0x180f4, 0x18100, 0x18854,
+    0x18870, 0x18a94, 0x18aa0, 0x19204, 0x19236, 0x19274,
+    0x19296, 0x192c0, 0x19306, 0x19324, 0x19336, 0x19394,
+    0x193c0, 0x1a174, 0x1a196, 0x1a1b4, 0x1a1c0, 0x1a556,
+    0x1a564, 0x1a576, 0x1a584, 0x1a5f0, 0x1a604, 0x1a610,
+    0x1a624, 0x1a630, 0x1a654, 0x1a6d6, 0x1a734, 0x1a7d0,
+    0x1a7f4, 0x1a800, 0x1ab04, 0x1acf0, 0x1b004, 0x1b046,
+    0x1b050, 0x1b344, 0x1b3b6, 0x1b3c4, 0x1b3d6, 0x1b424,
+    0x1b436, 0x1b450, 0x1b6b4, 0x1b740, 0x1b804, 0x1b826,
+    0x1b830, 0x1ba16, 0x1ba24, 0x1ba66, 0x1ba84, 0x1baa6,
+    0x1bab4, 0x1bae0, 0x1be64, 0x1be76, 0x1be84, 0x1bea6,
+    0x1bed4, 0x1bee6, 0x1bef4, 0x1bf26, 0x1bf40, 0x1c246,
+    0x1c2c4, 0x1c346, 0x1c364, 0x1c380, 0x1cd04, 0x1cd30,
+    0x1cd44, 0x1ce16, 0x1ce24, 0x1ce90, 0x1ced4, 0x1cee0,
+    0x1cf44, 0x1cf50, 0x1cf76, 0x1cf84, 0x1cfa0, 0x1dc04,
+    0x1e000, 0x200b1, 0x200c4, 0x200da, 0x200e1, 0x20100,
+    0x20281, 0x202f0, 0x20601, 0x20700, 0x20d04, 0x20f10,
+    0x2cef4, 0x2cf20, 0x2d7f4, 0x2d800, 0x2de04, 0x2e000,
+    0x302a4, 0x30300, 0x30994, 0x309b0, 0xa66f4, 0xa6730,
+    0xa6744, 0xa67e0, 0xa69e4, 0xa6a00, 0xa6f04, 0xa6f20,
+    0xa8024, 0xa8030, 0xa8064, 0xa8070, 0xa80b4, 0xa80c0,
+    0xa8236, 0xa8254, 0xa8276, 0xa8280, 0xa82c4, 0xa82d0,
+    0xa8806, 0xa8820, 0xa8b46, 0xa8c44, 0xa8c60, 0xa8e04,
+    0xa8f20, 0xa8ff4, 0xa9000, 0xa9264, 0xa92e0, 0xa9474,
+    0xa9526, 0xa9540, 0xa9607, 0xa97d0, 0xa9804, 0xa9836,
+    0xa9840, 0xa9b34, 0xa9b46, 0xa9b64, 0xa9ba6, 0xa9bc4,
+    0xa9be6, 0xa9c10, 0xa9e54, 0xa9e60, 0xaa294, 0xaa2f6,
+    0xaa314, 0xaa336, 0xaa354, 0xaa370, 0xaa434, 0xaa440,
+    0xaa4c4, 0xaa4d6, 0xaa4e0, 0xaa7c4, 0xaa7d0, 0xaab04,
+    0xaab10, 0xaab24, 0xaab50, 0xaab74, 0xaab90, 0xaabe4,
+    0xaac00, 0xaac14, 0xaac20, 0xaaeb6, 0xaaec4, 0xaaee6,
+    0xaaf00, 0xaaf56, 0xaaf64, 0xaaf70, 0xabe36, 0xabe54,
+    0xabe66, 0xabe84, 0xabe96, 0xabeb0, 0xabec6, 0xabed4,
+    0xabee0, 0xac00b, 0xac01c, 0xac1cb, 0xac1dc, 0xac38b,
+    0xac39c, 0xac54b, 0xac55c, 0xac70b, 0xac71c, 0xac8cb,
+    0xac8dc, 0xaca8b, 0xaca9c, 0xacc4b, 0xacc5c, 0xace0b,
+    0xace1c, 0xacfcb, 0xacfdc, 0xad18b, 0xad19c, 0xad34b,
+    0xad35c, 0xad50b, 0xad51c, 0xad6cb, 0xad6dc, 0xad88b,
+    0xad89c, 0xada4b, 0xada5c, 0xadc0b, 0xadc1c, 0xaddcb,
+    0xadddc, 0xadf8b, 0xadf9c, 0xae14b, 0xae15c, 0xae30b,
+    0xae31c, 0xae4cb, 0xae4dc, 0xae68b, 0xae69c, 0xae84b,
+    0xae85c, 0xaea0b, 0xaea1c, 0xaebcb, 0xaebdc, 0xaed8b,
+    0xaed9c, 0xaef4b, 0xaef5c, 0xaf10b, 0xaf11c, 0xaf2cb,
+    0xaf2dc, 0xaf48b, 0xaf49c, 0xaf64b, 0xaf65c, 0xaf80b,
+    0xaf81c, 0xaf9cb, 0xaf9dc, 0xafb8b, 0xafb9c, 0xafd4b,
+    0xafd5c, 0xaff0b, 0xaff1c, 0xb00cb, 0xb00dc, 0xb028b,
+    0xb029c, 0xb044b, 0xb045c, 0xb060b, 0xb061c, 0xb07cb,
+    0xb07dc, 0xb098b, 0xb099c, 0xb0b4b, 0xb0b5c, 0xb0d0b,
+    0xb0d1c, 0xb0ecb, 0xb0edc, 0xb108b, 0xb109c, 0xb124b,
+    0xb125c, 0xb140b, 0xb141c, 0xb15cb, 0xb15dc, 0xb178b,
+    0xb179c, 0xb194b, 0xb195c, 0xb1b0b, 0xb1b1c, 0xb1ccb,
+    0xb1cdc, 0xb1e8b, 0xb1e9c, 0xb204b, 0xb205c, 0xb220b,
+    0xb221c, 0xb23cb, 0xb23dc, 0xb258b, 0xb259c, 0xb274b,
+    0xb275c, 0xb290b, 0xb291c, 0xb2acb, 0xb2adc, 0xb2c8b,
+    0xb2c9c, 0xb2e4b, 0xb2e5c, 0xb300b, 0xb301c, 0xb31cb,
+    0xb31dc, 0xb338b, 0xb339c, 0xb354b, 0xb355c, 0xb370b,
+    0xb371c, 0xb38cb, 0xb38dc, 0xb3a8b, 0xb3a9c, 0xb3c4b,
+    0xb3c5c, 0xb3e0b, 0xb3e1c, 0xb3fcb, 0xb3fdc, 0xb418b,
+    0xb419c, 0xb434b, 0xb435c, 0xb450b, 0xb451c, 0xb46cb,
+    0xb46dc, 0xb488b, 0xb489c, 0xb4a4b, 0xb4a5c, 0xb4c0b,
+    0xb4c1c, 0xb4dcb, 0xb4ddc, 0xb4f8b, 0xb4f9c, 0xb514b,
+    0xb515c, 0xb530b, 0xb531c, 0xb54cb, 0xb54dc, 0xb568b,
+    0xb569c, 0xb584b, 0xb585c, 0xb5a0b, 0xb5a1c, 0xb5bcb,
+    0xb5bdc, 0xb5d8b, 0xb5d9c, 0xb5f4b, 0xb5f5c, 0xb610b,
+    0xb611c, 0xb62cb, 0xb62dc, 0xb648b, 0xb649c, 0xb664b,
+    0xb665c, 0xb680b, 0xb681c, 0xb69cb, 0xb69dc, 0xb6b8b,
+    0xb6b9c, 0xb6d4b, 0xb6d5c, 0xb6f0b, 0xb6f1c, 0xb70cb,
+    0xb70dc, 0xb728b, 0xb729c, 0xb744b, 0xb745c, 0xb760b,
+    0xb761c, 0xb77cb, 0xb77dc, 0xb798b, 0xb799c, 0xb7b4b,
+    0xb7b5c, 0xb7d0b, 0xb7d1c, 0xb7ecb, 0xb7edc, 0xb808b,
+    0xb809c, 0xb824b, 0xb825c, 0xb840b, 0xb841c, 0xb85cb,
+    0xb85dc, 0xb878b, 0xb879c, 0xb894b, 0xb895c, 0xb8b0b,
+    0xb8b1c, 0xb8ccb, 0xb8cdc, 0xb8e8b, 0xb8e9c, 0xb904b,
+    0xb905c, 0xb920b, 0xb921c, 0xb93cb, 0xb93dc, 0xb958b,
+    0xb959c, 0xb974b, 0xb975c, 0xb990b, 0xb991c, 0xb9acb,
+    0xb9adc, 0xb9c8b, 0xb9c9c, 0xb9e4b, 0xb9e5c, 0xba00b,
+    0xba01c, 0xba1cb, 0xba1dc, 0xba38b, 0xba39c, 0xba54b,
+    0xba55c, 0xba70b, 0xba71c, 0xba8cb, 0xba8dc, 0xbaa8b,
+    0xbaa9c, 0xbac4b, 0xbac5c, 0xbae0b, 0xbae1c, 0xbafcb,
+    0xbafdc, 0xbb18b, 0xbb19c, 0xbb34b, 0xbb35c, 0xbb50b,
+    0xbb51c, 0xbb6cb, 0xbb6dc, 0xbb88b, 0xbb89c, 0xbba4b,
+    0xbba5c, 0xbbc0b, 0xbbc1c, 0xbbdcb, 0xbbddc, 0xbbf8b,
+    0xbbf9c, 0xbc14b, 0xbc15c, 0xbc30b, 0xbc31c, 0xbc4cb,
+    0xbc4dc, 0xbc68b, 0xbc69c, 0xbc84b, 0xbc85c, 0xbca0b,
+    0xbca1c, 0xbcbcb, 0xbcbdc, 0xbcd8b, 0xbcd9c, 0xbcf4b,
+    0xbcf5c, 0xbd10b, 0xbd11c, 0xbd2cb, 0xbd2dc, 0xbd48b,
+    0xbd49c, 0xbd64b, 0xbd65c, 0xbd80b, 0xbd81c, 0xbd9cb,
+    0xbd9dc, 0xbdb8b, 0xbdb9c, 0xbdd4b, 0xbdd5c, 0xbdf0b,
+    0xbdf1c, 0xbe0cb, 0xbe0dc, 0xbe28b, 0xbe29c, 0xbe44b,
+    0xbe45c, 0xbe60b, 0xbe61c, 0xbe7cb, 0xbe7dc, 0xbe98b,
+    0xbe99c, 0xbeb4b, 0xbeb5c, 0xbed0b, 0xbed1c, 0xbeecb,
+    0xbeedc, 0xbf08b, 0xbf09c, 0xbf24b, 0xbf25c, 0xbf40b,
+    0xbf41c, 0xbf5cb, 0xbf5dc, 0xbf78b, 0xbf79c, 0xbf94b,
+    0xbf95c, 0xbfb0b, 0xbfb1c, 0xbfccb, 0xbfcdc, 0xbfe8b,
+    0xbfe9c, 0xc004b, 0xc005c, 0xc020b, 0xc021c, 0xc03cb,
+    0xc03dc, 0xc058b, 0xc059c, 0xc074b, 0xc075c, 0xc090b,
+    0xc091c, 0xc0acb, 0xc0adc, 0xc0c8b, 0xc0c9c, 0xc0e4b,
+    0xc0e5c, 0xc100b, 0xc101c, 0xc11cb, 0xc11dc, 0xc138b,
+    0xc139c, 0xc154b, 0xc155c, 0xc170b, 0xc171c, 0xc18cb,
+    0xc18dc, 0xc1a8b, 0xc1a9c, 0xc1c4b, 0xc1c5c, 0xc1e0b,
+    0xc1e1c, 0xc1fcb, 0xc1fdc, 0xc218b, 0xc219c, 0xc234b,
+    0xc235c, 0xc250b, 0xc251c, 0xc26cb, 0xc26dc, 0xc288b,
+    0xc289c, 0xc2a4b, 0xc2a5c, 0xc2c0b, 0xc2c1c, 0xc2dcb,
+    0xc2ddc, 0xc2f8b, 0xc2f9c, 0xc314b, 0xc315c, 0xc330b,
+    0xc331c, 0xc34cb, 0xc34dc, 0xc368b, 0xc369c, 0xc384b,
+    0xc385c, 0xc3a0b, 0xc3a1c, 0xc3bcb, 0xc3bdc, 0xc3d8b,
+    0xc3d9c, 0xc3f4b, 0xc3f5c, 0xc410b, 0xc411c, 0xc42cb,
+    0xc42dc, 0xc448b, 0xc449c, 0xc464b, 0xc465c, 0xc480b,
+    0xc481c, 0xc49cb, 0xc49dc, 0xc4b8b, 0xc4b9c, 0xc4d4b,
+    0xc4d5c, 0xc4f0b, 0xc4f1c, 0xc50cb, 0xc50dc, 0xc528b,
+    0xc529c, 0xc544b, 0xc545c, 0xc560b, 0xc561c, 0xc57cb,
+    0xc57dc, 0xc598b, 0xc599c, 0xc5b4b, 0xc5b5c, 0xc5d0b,
+    0xc5d1c, 0xc5ecb, 0xc5edc, 0xc608b, 0xc609c, 0xc624b,
+    0xc625c, 0xc640b, 0xc641c, 0xc65cb, 0xc65dc, 0xc678b,
+    0xc679c, 0xc694b, 0xc695c, 0xc6b0b, 0xc6b1c, 0xc6ccb,
+    0xc6cdc, 0xc6e8b, 0xc6e9c, 0xc704b, 0xc705c, 0xc720b,
+    0xc721c, 0xc73cb, 0xc73dc, 0xc758b, 0xc759c, 0xc774b,
+    0xc775c, 0xc790b, 0xc791c, 0xc7acb, 0xc7adc, 0xc7c8b,
+    0xc7c9c, 0xc7e4b, 0xc7e5c, 0xc800b, 0xc801c, 0xc81cb,
+    0xc81dc, 0xc838b, 0xc839c, 0xc854b, 0xc855c, 0xc870b,
+    0xc871c, 0xc88cb, 0xc88dc, 0xc8a8b, 0xc8a9c, 0xc8c4b,
+    0xc8c5c, 0xc8e0b, 0xc8e1c, 0xc8fcb, 0xc8fdc, 0xc918b,
+    0xc919c, 0xc934b, 0xc935c, 0xc950b, 0xc951c, 0xc96cb,
+    0xc96dc, 0xc988b, 0xc989c, 0xc9a4b, 0xc9a5c, 0xc9c0b,
+    0xc9c1c, 0xc9dcb, 0xc9ddc, 0xc9f8b, 0xc9f9c, 0xca14b,
+    0xca15c, 0xca30b, 0xca31c, 0xca4cb, 0xca4dc, 0xca68b,
+    0xca69c, 0xca84b, 0xca85c, 0xcaa0b, 0xcaa1c, 0xcabcb,
+    0xcabdc, 0xcad8b, 0xcad9c, 0xcaf4b, 0xcaf5c, 0xcb10b,
+    0xcb11c, 0xcb2cb, 0xcb2dc, 0xcb48b, 0xcb49c, 0xcb64b,
+    0xcb65c, 0xcb80b, 0xcb81c, 0xcb9cb, 0xcb9dc, 0xcbb8b,
+    0xcbb9c, 0xcbd4b, 0xcbd5c, 0xcbf0b, 0xcbf1c, 0xcc0cb,
+    0xcc0dc, 0xcc28b, 0xcc29c, 0xcc44b, 0xcc45c, 0xcc60b,
+    0xcc61c, 0xcc7cb, 0xcc7dc, 0xcc98b, 0xcc99c, 0xccb4b,
+    0xccb5c, 0xccd0b, 0xccd1c, 0xccecb, 0xccedc, 0xcd08b,
+    0xcd09c, 0xcd24b, 0xcd25c, 0xcd40b, 0xcd41c, 0xcd5cb,
+    0xcd5dc, 0xcd78b, 0xcd79c, 0xcd94b, 0xcd95c, 0xcdb0b,
+    0xcdb1c, 0xcdccb, 0xcdcdc, 0xcde8b, 0xcde9c, 0xce04b,
+    0xce05c, 0xce20b, 0xce21c, 0xce3cb, 0xce3dc, 0xce58b,
+    0xce59c, 0xce74b, 0xce75c, 0xce90b, 0xce91c, 0xceacb,
+    0xceadc, 0xcec8b, 0xcec9c, 0xcee4b, 0xcee5c, 0xcf00b,
+    0xcf01c, 0xcf1cb, 0xcf1dc, 0xcf38b, 0xcf39c, 0xcf54b,
+    0xcf55c, 0xcf70b, 0xcf71c, 0xcf8cb, 0xcf8dc, 0xcfa8b,
+    0xcfa9c, 0xcfc4b, 0xcfc5c, 0xcfe0b, 0xcfe1c, 0xcffcb,
+    0xcffdc, 0xd018b, 0xd019c, 0xd034b, 0xd035c, 0xd050b,
+    0xd051c, 0xd06cb, 0xd06dc, 0xd088b, 0xd089c, 0xd0a4b,
+    0xd0a5c, 0xd0c0b, 0xd0c1c, 0xd0dcb, 0xd0ddc, 0xd0f8b,
+    0xd0f9c, 0xd114b, 0xd115c, 0xd130b, 0xd131c, 0xd14cb,
+    0xd14dc, 0xd168b, 0xd169c, 0xd184b, 0xd185c, 0xd1a0b,
+    0xd1a1c, 0xd1bcb, 0xd1bdc, 0xd1d8b, 0xd1d9c, 0xd1f4b,
+    0xd1f5c, 0xd210b, 0xd211c, 0xd22cb, 0xd22dc, 0xd248b,
+    0xd249c, 0xd264b, 0xd265c, 0xd280b, 0xd281c, 0xd29cb,
+    0xd29dc, 0xd2b8b, 0xd2b9c, 0xd2d4b, 0xd2d5c, 0xd2f0b,
+    0xd2f1c, 0xd30cb, 0xd30dc, 0xd328b, 0xd329c, 0xd344b,
+    0xd345c, 0xd360b, 0xd361c, 0xd37cb, 0xd37dc, 0xd398b,
+    0xd399c, 0xd3b4b, 0xd3b5c, 0xd3d0b, 0xd3d1c, 0xd3ecb,
+    0xd3edc, 0xd408b, 0xd409c, 0xd424b, 0xd425c, 0xd440b,
+    0xd441c, 0xd45cb, 0xd45dc, 0xd478b, 0xd479c, 0xd494b,
+    0xd495c, 0xd4b0b, 0xd4b1c, 0xd4ccb, 0xd4cdc, 0xd4e8b,
+    0xd4e9c, 0xd504b, 0xd505c, 0xd520b, 0xd521c, 0xd53cb,
+    0xd53dc, 0xd558b, 0xd559c, 0xd574b, 0xd575c, 0xd590b,
+    0xd591c, 0xd5acb, 0xd5adc, 0xd5c8b, 0xd5c9c, 0xd5e4b,
+    0xd5e5c, 0xd600b, 0xd601c, 0xd61cb, 0xd61dc, 0xd638b,
+    0xd639c, 0xd654b, 0xd655c, 0xd670b, 0xd671c, 0xd68cb,
+    0xd68dc, 0xd6a8b, 0xd6a9c, 0xd6c4b, 0xd6c5c, 0xd6e0b,
+    0xd6e1c, 0xd6fcb, 0xd6fdc, 0xd718b, 0xd719c, 0xd734b,
+    0xd735c, 0xd750b, 0xd751c, 0xd76cb, 0xd76dc, 0xd788b,
+    0xd789c, 0xd7a40, 0xd7b08, 0xd7c70, 0xd7cb9, 0xd7fc0,
+    0xfb1e4, 0xfb1f0, 0xfe004, 0xfe100, 0xfe204, 0xfe300,
+    0xfeff1, 0xff000, 0xff9e4, 0xffa00, 0xfff01, 0xfffc0,
+    0x101fd4, 0x101fe0, 0x102e04, 0x102e10, 0x103764, 0x1037b0,
+    0x10a014, 0x10a040, 0x10a054, 0x10a070, 0x10a0c4, 0x10a100,
+    0x10a384, 0x10a3b0, 0x10a3f4, 0x10a400, 0x10ae54, 0x10ae70,
+    0x10d244, 0x10d280, 0x10eab4, 0x10ead0, 0x10efd4, 0x10f000,
+    0x10f464, 0x10f510, 0x10f824, 0x10f860, 0x110006, 0x110014,
+    0x110026, 0x110030, 0x110384, 0x110470, 0x110704, 0x110710,
+    0x110734, 0x110750, 0x1107f4, 0x110826, 0x110830, 0x110b06,
+    0x110b34, 0x110b76, 0x110b94, 0x110bb0, 0x110bd5, 0x110be0,
+    0x110c24, 0x110c30, 0x110cd5, 0x110ce0, 0x111004, 0x111030,
+    0x111274, 0x1112c6, 0x1112d4, 0x111350, 0x111456, 0x111470,
+    0x111734, 0x111740, 0x111804, 0x111826, 0x111830, 0x111b36,
+    0x111b64, 0x111bf6, 0x111c10, 0x111c25, 0x111c40, 0x111c94,
+    0x111cd0, 0x111ce6, 0x111cf4, 0x111d00, 0x1122c6, 0x1122f4,
+    0x112326, 0x112344, 0x112356, 0x112364, 0x112380, 0x1123e4,
+    0x1123f0, 0x112414, 0x112420, 0x112df4, 0x112e06, 0x112e34,
+    0x112eb0, 0x113004, 0x113026, 0x113040, 0x1133b4, 0x1133d0,
+    0x1133e4, 0x1133f6, 0x113404, 0x113416, 0x113450, 0x113476,
+    0x113490, 0x1134b6, 0x1134e0, 0x113574, 0x113580, 0x113626,
+    0x113640, 0x113664, 0x1136d0, 0x113704, 0x113750, 0x114356,
+    0x114384, 0x114406, 0x114424, 0x114456, 0x114464, 0x114470,
+    0x1145e4, 0x1145f0, 0x114b04, 0x114b16, 0x114b34, 0x114b96,
+    0x114ba4, 0x114bb6, 0x114bd4, 0x114be6, 0x114bf4, 0x114c16,
+    0x114c24, 0x114c40, 0x115af4, 0x115b06, 0x115b24, 0x115b60,
+    0x115b86, 0x115bc4, 0x115be6, 0x115bf4, 0x115c10, 0x115dc4,
+    0x115de0, 0x116306, 0x116334, 0x1163b6, 0x1163d4, 0x1163e6,
+    0x1163f4, 0x116410, 0x116ab4, 0x116ac6, 0x116ad4, 0x116ae6,
+    0x116b04, 0x116b66, 0x116b74, 0x116b80, 0x1171d4, 0x117200,
+    0x117224, 0x117266, 0x117274, 0x1172c0, 0x1182c6, 0x1182f4,
+    0x118386, 0x118394, 0x1183b0, 0x119304, 0x119316, 0x119360,
+    0x119376, 0x119390, 0x1193b4, 0x1193d6, 0x1193e4, 0x1193f5,
+    0x119406, 0x119415, 0x119426, 0x119434, 0x119440, 0x119d16,
+    0x119d44, 0x119d80, 0x119da4, 0x119dc6, 0x119e04, 0x119e10,
+    0x119e46, 0x119e50, 0x11a014, 0x11a0b0, 0x11a334, 0x11a396,
+    0x11a3a5, 0x11a3b4, 0x11a3f0, 0x11a474, 0x11a480, 0x11a514,
+    0x11a576, 0x11a594, 0x11a5c0, 0x11a845, 0x11a8a4, 0x11a976,
+    0x11a984, 0x11a9a0, 0x11c2f6, 0x11c304, 0x11c370, 0x11c384,
+    0x11c3e6, 0x11c3f4, 0x11c400, 0x11c924, 0x11ca80, 0x11ca96,
+    0x11caa4, 0x11cb16, 0x11cb24, 0x11cb46, 0x11cb54, 0x11cb70,
+    0x11d314, 0x11d370, 0x11d3a4, 0x11d3b0, 0x11d3c4, 0x11d3e0,
+    0x11d3f4, 0x11d465, 0x11d474, 0x11d480, 0x11d8a6, 0x11d8f0,
+    0x11d904, 0x11d920, 0x11d936, 0x11d954, 0x11d966, 0x11d974,
+    0x11d980, 0x11ef34, 0x11ef56, 0x11ef70, 0x11f004, 0x11f025,
+    0x11f036, 0x11f040, 0x11f346, 0x11f364, 0x11f3b0, 0x11f3e6,
+    0x11f404, 0x11f416, 0x11f424, 0x11f430, 0x134301, 0x134404,
+    0x134410, 0x134474, 0x134560, 0x16af04, 0x16af50, 0x16b304,
+    0x16b370, 0x16f4f4, 0x16f500, 0x16f516, 0x16f880, 0x16f8f4,
+    0x16f930, 0x16fe44, 0x16fe50, 0x16ff06, 0x16ff20, 0x1bc9d4,
+    0x1bc9f0, 0x1bca01, 0x1bca40, 0x1cf004, 0x1cf2e0, 0x1cf304,
+    0x1cf470, 0x1d1654, 0x1d1666, 0x1d1674, 0x1d16a0, 0x1d16d6,
+    0x1d16e4, 0x1d1731, 0x1d17b4, 0x1d1830, 0x1d1854, 0x1d18c0,
+    0x1d1aa4, 0x1d1ae0, 0x1d2424, 0x1d2450, 0x1da004, 0x1da370,
+    0x1da3b4, 0x1da6d0, 0x1da754, 0x1da760, 0x1da844, 0x1da850,
+    0x1da9b4, 0x1daa00, 0x1daa14, 0x1dab00, 0x1e0004, 0x1e0070,
+    0x1e0084, 0x1e0190, 0x1e01b4, 0x1e0220, 0x1e0234, 0x1e0250,
+    0x1e0264, 0x1e02b0, 0x1e08f4, 0x1e0900, 0x1e1304, 0x1e1370,
+    0x1e2ae4, 0x1e2af0, 0x1e2ec4, 0x1e2f00, 0x1e4ec4, 0x1e4f00,
+    0x1e8d04, 0x1e8d70, 0x1e9444, 0x1e94b0, 0x1f1e6d, 0x1f2000,
+    0x1f3fb4, 0x1f4000, 0xe00001, 0xe00204, 0xe00801, 0xe01004,
+    0xe01f01, 0xe10000,
   };
 
   inline constexpr char32_t __incb_linkers[] = {
diff --git a/libstdc++-v3/include/bits/unicode.h b/libstdc++-v3/include/bits/unicode.h
index e49498a0531..f1b2b359bdf 100644
--- a/libstdc++-v3/include/bits/unicode.h
+++ b/libstdc++-v3/include/bits/unicode.h
@@ -163,7 +163,7 @@ namespace __unicode
 	    else
 	      _M_read();
 	  }
-	else if (_M_buf_index + 1 <= _M_buf_last)
+	else if (_M_buf_index + 1 < _M_buf_last)
 	  ++_M_buf_index;
 	return *this;
       }
@@ -603,6 +603,7 @@ inline namespace __v15_1_0
     return (__p - __width_edges) % 2 + 1;
   }
 
+  // @pre c <= 0x10FFFF
   constexpr _Gcb_property
   __grapheme_cluster_break_property(char32_t __c) noexcept
   {
@@ -621,9 +622,13 @@ inline namespace __v15_1_0
     return std::find(__incb_linkers, __end, __c) != __end;
   }
 
+  // @pre c <= 0x10FFFF
   constexpr _InCB
   __incb_property(char32_t __c) noexcept
   {
+    if ((__c << 2) < __incb_edges[0]) [[likely]]
+      return _InCB(0);
+
     constexpr uint32_t __mask = 0x3;
     auto* __end = std::end(__incb_edges);
     auto* __p = std::lower_bound(__incb_edges, __end, (__c << 2) | __mask);
@@ -634,10 +639,10 @@ inline namespace __v15_1_0
   __is_extended_pictographic(char32_t __c)
   {
     if (__c < __xpicto_edges[0]) [[likely]]
-      return 1;
+      return 0;
 
     auto* __p = std::upper_bound(__xpicto_edges, std::end(__xpicto_edges), __c);
-    return (__p - __xpicto_edges) % 2 + 1;
+    return (__p - __xpicto_edges) % 2;
   }
 
   struct _Grapheme_cluster_iterator_base
@@ -732,22 +737,22 @@ inline namespace __v15_1_0
 
       public:
 	// TODO: Change value_type to be subrange<_U32_iterator> instead?
-	// That would be the whole cluster, not just the first code point.
-	// Would need to change type of _M_start to _U32_iterator, so that
-	// operator* just does return value_type{_M_start, _M_next}.
 	// Alternatively, value_type could be _Utf32_view<iterator_t<_View>>.
+	// That would be the whole cluster, not just the first code point.
+	// Would need to store two iterators and find end of current cluster
+	// on increment, so operator* returns value_type(_M_base, _M_next).
 	using value_type = char32_t;
 	using iterator_concept = forward_iterator_tag;
+	using difference_type = ptrdiff_t;
 
 	constexpr
 	_Iterator(_U32_iterator __i)
-	: _M_start(__i.base()), _M_next(__i)
+	: _M_base(__i)
 	{
-	  if (_M_start != __i.end())
+	  if (__i != __i.end())
 	    {
 	      _M_c = *__i;
 	      _M_prop = __grapheme_cluster_break_property(_M_c);
-	      operator++(); // Finds the end of the first cluster.
 	    }
 	}
 
@@ -764,11 +769,11 @@ inline namespace __v15_1_0
 	constexpr _Iterator&
 	operator++()
 	{
-	  const auto __end = _M_next.end();
-	  if (_M_next != __end)
+	  const auto __end = _M_base.end();
+	  if (_M_base != __end)
 	    {
 	      auto __p_prev = _M_prop;
-	      auto __it = _M_next;
+	      auto __it = _M_base;
 	      while (++__it != __end)
 		{
 		  char32_t __c = *__it;
@@ -784,11 +789,8 @@ inline namespace __v15_1_0
 		    }
 		  __p_prev = __p;
 		}
-	      _M_start = _M_next.base();
-	      _M_next = __it;
+	      _M_base = __it;
 	    }
-	  else
-	    _M_start = __end;
 	  return *this;
 	}
 
@@ -802,18 +804,18 @@ inline namespace __v15_1_0
 
 	constexpr bool
 	operator==(const _Iterator& __i) const
-	{ return _M_start == __i._M_start; }
+	{ return _M_base == __i._M_base; }
 
 	// This supports iter != iter.end()
 	constexpr bool
 	operator==(const ranges::sentinel_t<_View>& __i) const
-	{ return _M_start == __i; }
+	{ return _M_base == __i; }
 
 	// Iterator to the start of the current cluster.
-	constexpr auto base() const { return _M_start; }
+	constexpr auto base() const { return _M_base.base(); }
 
 	// The end of the underlying view (not the end of the current cluster!)
-	constexpr auto end() const { return _M_next.end(); }
+	constexpr auto end() const { return _M_base.end(); }
 
 	// Field width of the first code point in the cluster.
 	constexpr int
@@ -821,8 +823,7 @@ inline namespace __v15_1_0
 	{ return __field_width(_M_c); }
 
       private:
-	ranges::iterator_t<_View> _M_start;
-	_U32_iterator _M_next;
+	_U32_iterator _M_base;
 
 	// Implement the Grapheme Cluster Boundary Rules from Unicode Annex #29
 	// http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundary_Rules
@@ -891,13 +892,13 @@ inline namespace __v15_1_0
 	  // Do not break within certain combinations with
 	  // Indic_Conjunct_Break (InCB)=Linker.
 	  if (_M_incb_linker_seen
-		&& __incb_property(*_M_start) == _InCB::_Consonant
+		&& __incb_property(_M_c) == _InCB::_Consonant
 		&& __incb_property(*__curr) == _InCB::_Consonant)
 	    {
-	      // Match [_M_start, __curr] against regular expression
+	      // Match [_M_base, __curr] against regular expression
 	      // Consonant ([Extend Linker]* Linker [Extend Linker]* Consonant)+
 	      bool __have_linker = false;
-	      auto __it = _M_start;
+	      auto __it = _M_base;
 	      while (++__it != __curr)
 		{
 		  if (__is_incb_linker(*__it))
diff --git a/libstdc++-v3/testsuite/ext/unicode/grapheme_view.cc b/libstdc++-v3/testsuite/ext/unicode/grapheme_view.cc
new file mode 100644
index 00000000000..ac1e8c50b05
--- /dev/null
+++ b/libstdc++-v3/testsuite/ext/unicode/grapheme_view.cc
@@ -0,0 +1,95 @@
+// { dg-do compile { target c++20 } }
+
+#include <format> // includes <bits/unicode.h>
+#include <string_view>
+#include <ranges>
+#include <testsuite_hooks.h>
+
+namespace uc = std::__unicode;
+using namespace std::string_view_literals;
+
+constexpr void
+test_breaks()
+{
+  VERIFY(uc::__field_width(u8"\N{LATIN SMALL LETTER E WITH ACUTE}"sv) == 1 );
+
+  auto sv = u8"ee\N{COMBINING ACUTE ACCENT}e"sv;
+  auto data = sv.data();
+  VERIFY( uc::__field_width(sv) == 3 );
+  VERIFY( uc::__truncate(sv, 3) == 3 );
+  VERIFY( uc::__truncate(sv, 4) == 3 );
+  VERIFY( sv == data );
+
+  VERIFY( uc::__truncate(sv, 2) == 2 );
+  VERIFY( sv == u8"ee\N{COMBINING ACUTE ACCENT}"sv );
+
+  sv = data;
+  sv.remove_prefix(1);
+  VERIFY( uc::__field_width(sv) == 2 );
+  VERIFY( uc::__truncate(sv, 3) == 2 );
+  VERIFY( sv == data+1 );
+
+  sv = u8"\N{REGIONAL INDICATOR SYMBOL LETTER G}"
+	 "\N{REGIONAL INDICATOR SYMBOL LETTER B}"; // GB flag emoji
+  data = sv.data();
+  VERIFY( uc::__field_width(sv) == 1 );
+  VERIFY( uc::__truncate(sv, 2) == 1 );
+  VERIFY( sv == data );
+  VERIFY( uc::__truncate(sv, 1) == 1 ); // Do not break inside a flag emoji.
+  VERIFY( sv == data );
+
+  sv = u8"abcd"
+    "\N{REGIONAL INDICATOR SYMBOL LETTER G}" // 4 bytes
+    "\N{REGIONAL INDICATOR SYMBOL LETTER B}" // 4 bytes
+    "\N{DEVANAGARI LETTER KA}"               // 3 bytes
+    "\N{DEVANAGARI SIGN VIRAMA}"             // 3 bytes
+    "\N{DEVANAGARI LETTER RA}"               // 3 bytes
+    "\N{MAN}\N{ZERO WIDTH JOINER}"           // 4+3 bytes
+    "\N{WOMAN}\N{ZERO WIDTH JOINER}"         // 4+3 bytes
+    "\N{GIRL}\N{ZERO WIDTH JOINER}"          // 4+3 bytes
+    "\N{BOY}\N{ZERO WIDTH JOINER}"           // 4+3 bytes
+    "\N{HANGUL CHOSEONG KIYEOK}"             // 3 bytes
+    "\N{HANGUL CHOSEONG KIYEOK}"             // 3 bytes
+    "\N{HANGUL CHOSEONG KIYEOK}"             // 3 bytes
+    "\N{HANGUL CHOSEONG KIYEOK}"             // 3 bytes
+    "\N{HANGUL JUNGSEONG A}"                 // 3 bytes
+    "\N{HANGUL JONGSEONG KIYEOK}"            // 3 bytes
+    "\N{HANGUL JONGSEONG KIYEOK}"            // 3 bytes
+    "\N{HANGUL JONGSEONG KIYEOK}";           // 3 bytes
+
+  uc::_Grapheme_cluster_view gv(sv);
+  auto iter = gv.begin();
+  VERIFY( iter.base() == sv.data() );
+  VERIFY( *iter == U'a' );
+  std::ranges::advance(iter, 3);
+  VERIFY( *iter == U'd' );
+  VERIFY( iter.base() == sv.data() + 3 );
+  ++iter;
+  VERIFY( *iter == U'\N{REGIONAL INDICATOR SYMBOL LETTER G}' );
+  VERIFY( iter.base() == sv.data() + 4 );
+  ++iter;
+  VERIFY( *iter == U'\N{DEVANAGARI LETTER KA}' );
+  VERIFY( iter.base() == sv.data() + 4 + 8 );
+  ++iter;
+  VERIFY( *iter == U'\N{MAN}' );
+  VERIFY( iter.base() == sv.data() + 4 + 8 + 9 );
+  ++iter;
+  VERIFY( iter.base() == sv.data() + 4 + 8 + 9 + 28 );
+  VERIFY( *iter == U'\N{HANGUL CHOSEONG KIYEOK}' );
+  ++iter;
+  VERIFY( iter.base() == sv.data() + 4 + 8 + 9 + 28 + 24 );
+  VERIFY( iter == gv.end() );
+  ++iter;
+  VERIFY( iter == gv.end() );
+}
+
+int main()
+{
+  auto run_tests = []{
+    test_breaks();
+    return true;
+  };
+
+  VERIFY( run_tests() );
+  static_assert( run_tests() );
+}
diff --git a/libstdc++-v3/testsuite/ext/unicode/properties.cc b/libstdc++-v3/testsuite/ext/unicode/properties.cc
new file mode 100644
index 00000000000..8600a3d31d9
--- /dev/null
+++ b/libstdc++-v3/testsuite/ext/unicode/properties.cc
@@ -0,0 +1,128 @@
+// { dg-do compile { target c++20 } }
+
+#include <format> // includes <bits/unicode.h>
+#include <string_view>
+#include <ranges>
+#include <testsuite_hooks.h>
+
+namespace uc = std::__unicode;
+using namespace std::string_view_literals;
+
+constexpr char32_t riA = U'\N{REGIONAL INDICATOR SYMBOL LETTER A}';
+constexpr char32_t riZ = U'\N{REGIONAL INDICATOR SYMBOL LETTER Z}';
+
+static_assert( uc::__field_width(U'\0') == 1 );
+static_assert( uc::__field_width(U'1') == 1 );
+static_assert( uc::__field_width(U'a') == 1 );
+static_assert( uc::__field_width(riA) == 1 );
+static_assert( uc::__field_width(U'\N{OBLIQUE HYPHEN}') == 1 );
+static_assert( uc::__field_width(U'\N{CIRCLED NUMBER EIGHTY ON BLACK SQUARE}')
+    == 1 );
+
+static_assert( uc::__field_width(U'\N{SESQUIQUADRATE}') == 1 );
+static_assert( uc::__field_width(U'\N{SOCCER BALL}') == 2 );
+static_assert( uc::__field_width(U'\N{BASEBALL}') == 2 );
+static_assert( uc::__field_width(U'\N{SQUARED KEY}') == 1 );
+static_assert( uc::__field_width(U'\N{BLACK DRAUGHTS KING}') == 1 );
+static_assert( uc::__field_width(U'\N{SNOWMAN WITHOUT SNOW}') == 2 );
+
+static_assert( uc::__field_width(U'\N{IDEOGRAPHIC SPACE}') == 2 );
+static_assert( uc::__field_width(U'\N{IDEOGRAPHIC COMMA}') == 2 );
+static_assert( uc::__field_width(U'\N{CIRCLED IDEOGRAPH ONE}') == 2 );
+
+// EastAsianWidth.txt says these are normal width, but C++ says width 2:
+static_assert( uc::__field_width(U'\u4DC0') == 2 );
+static_assert( uc::__field_width(U'\u4DC1') == 2 );
+static_assert( uc::__field_width(U'\u4DFF') == 2 );
+// EastAsianWidth.txt says W and C++ says 2:
+static_assert( uc::__field_width(U'\U0001F300') == 2 );
+static_assert( uc::__field_width(U'\U0001F320') == 2 );
+// EastAsianWidth.txt says N but C++ says 2:
+static_assert( uc::__field_width(U'\U0001F321') == 2 );
+static_assert( uc::__field_width(U'\U0001F5FA') == 2 );
+// EastAsianWidth.txt says W and C++ says 2:
+static_assert( uc::__field_width(U'\U0001F5FF') == 2 );
+static_assert( uc::__field_width(U'\U0001F600') == 2 );
+
+static_assert( uc::__field_width(U'\U0001F900') == 2 );
+static_assert( uc::__field_width(U'\U0001F90B') == 2 );
+static_assert( uc::__field_width(U'\U0001F90C') == 2 );
+static_assert( uc::__field_width(U'\U0001F93B') == 2 );
+static_assert( uc::__field_width(U'\U0001F9FF') == 2 );
+static_assert( uc::__field_width(U'\U0001FA00') == 1 );
+static_assert( uc::__field_width(U'\U0001FA69') == 1 );
+static_assert( uc::__field_width(U'\U0001FA70') == 2 );
+static_assert( uc::__field_width(U'\U0001FAF8') == 2 );
+static_assert( uc::__field_width(U'\U0001FAF9') == 1 );
+
+using enum uc::_Gcb_property;
+static_assert( uc::__grapheme_cluster_break_property(U'\0') == _Gcb_Control );
+static_assert( uc::__grapheme_cluster_break_property(U'a') == _Gcb_Other );
+static_assert( uc::__grapheme_cluster_break_property(riA)
+    == _Gcb_Regional_Indicator );
+static_assert( uc::__grapheme_cluster_break_property(riZ)
+    == _Gcb_Regional_Indicator );
+static_assert( uc::__grapheme_cluster_break_property(riA - 1) == _Gcb_Other );
+static_assert( uc::__grapheme_cluster_break_property(riZ + 1) == _Gcb_Other );
+static_assert( uc::__grapheme_cluster_break_property(U'\uD788') == _Gcb_LV );
+static_assert( uc::__grapheme_cluster_break_property(U'\uD7A3') == _Gcb_LVT );
+static_assert( uc::__grapheme_cluster_break_property(U'\u200D') == _Gcb_ZWJ );
+static_assert( uc::__grapheme_cluster_break_property(U'\U0001D16D')
+    == _Gcb_SpacingMark );
+static_assert( uc::__grapheme_cluster_break_property(U'\U0001D16E')
+    == _Gcb_Extend );
+static_assert( uc::__grapheme_cluster_break_property(U'\U000E01EF')
+    == _Gcb_Extend );
+static_assert( uc::__grapheme_cluster_break_property(U'\U000E01F0')
+    == _Gcb_Control );
+static_assert( uc::__grapheme_cluster_break_property(U'\U000E0FFF')
+    == _Gcb_Control );
+static_assert( uc::__grapheme_cluster_break_property(U'\U000E1000')
+    == _Gcb_Other );
+
+static_assert( uc::__incb_property(U'\0') == uc::_InCB{0} );
+static_assert( uc::__incb_property(U'a') == uc::_InCB{0} );
+static_assert( uc::__incb_property(U'\N{DEVANAGARI LETTER KA}')
+		== uc::_InCB::_Consonant );
+static_assert( uc::__incb_property(U'\N{DEVANAGARI LETTER RA}')
+		== uc::_InCB::_Consonant );
+static_assert( uc::__incb_property(U'\N{DEVANAGARI LETTER YYA}')
+		== uc::_InCB::_Consonant );
+static_assert( uc::__incb_property(U'\N{DEVANAGARI LETTER YYA}' + 1)
+		== uc::_InCB{0} );
+static_assert( uc::__incb_property(U'\N{DEVANAGARI SIGN NUKTA}')
+		== uc::_InCB::_Extend );
+static_assert( uc::__incb_property(U'\N{DEVANAGARI SIGN NUKTA}' + 1)
+		== uc::_InCB{0} );
+static_assert( uc::__incb_property(U'\U0001E94A') == uc::_InCB::_Extend );
+static_assert( uc::__incb_property(U'\U0001E94B') == uc::_InCB{0} );
+
+static_assert( ! uc::__is_incb_linker(U'\0') );
+static_assert( ! uc::__is_incb_linker(U'a') );
+static_assert( uc::__is_incb_linker(U'\N{DEVANAGARI SIGN VIRAMA}') );
+static_assert( ! uc::__is_incb_linker(U'\N{DEVANAGARI SIGN VIRAMA}' + 1) );
+static_assert( ! uc::__is_incb_linker(U'\N{DEVANAGARI SIGN VIRAMA}' - 1) );
+static_assert( ! uc::__is_incb_linker(U'\u0FFF') );
+static_assert( ! uc::__is_incb_linker(U'\uFFFD') );
+
+static_assert( ! uc::__is_extended_pictographic(U'\0') );
+static_assert( ! uc::__is_extended_pictographic(U'a') );
+static_assert( ! uc::__is_extended_pictographic(riA) );
+static_assert( ! uc::__is_extended_pictographic(riZ) );
+static_assert( ! uc::__is_extended_pictographic(U'\N{COPYRIGHT SIGN}' - 1) );
+static_assert( uc::__is_extended_pictographic(U'\N{COPYRIGHT SIGN}') );
+static_assert( ! uc::__is_extended_pictographic(U'\N{COPYRIGHT SIGN}' + 1) );
+static_assert( ! uc::__is_extended_pictographic(U'\N{INFORMATION SOURCE}' - 1) );
+static_assert( uc::__is_extended_pictographic(U'\N{INFORMATION SOURCE}') );
+static_assert( ! uc::__is_extended_pictographic(U'\N{INFORMATION SOURCE}' + 1) );
+static_assert( ! uc::__is_extended_pictographic(U'\N{LEFT RIGHT ARROW}' - 1) );
+static_assert( uc::__is_extended_pictographic(U'\N{LEFT RIGHT ARROW}') );
+static_assert( uc::__is_extended_pictographic(U'\N{LEFT RIGHT ARROW}' + 1) );
+static_assert( uc::__is_extended_pictographic(U'\N{SOUTH WEST ARROW}') );
+static_assert( ! uc::__is_extended_pictographic(U'\N{SOUTH WEST ARROW}' + 1) );
+static_assert( uc::__is_extended_pictographic(U'\N{POSTBOX}') );
+static_assert( ! uc::__is_extended_pictographic(U'\U0001EFFF') );
+static_assert( uc::__is_extended_pictographic(U'\U0001F000') );
+static_assert( uc::__is_extended_pictographic(U'\U0001FFFD') );
+static_assert( ! uc::__is_extended_pictographic(U'\U0001FFFE') );
+static_assert( ! uc::__is_extended_pictographic(U'\U0001FFFF') );
diff --git a/libstdc++-v3/testsuite/ext/unicode/view.cc b/libstdc++-v3/testsuite/ext/unicode/view.cc
index eaab5c79edc..79ea2bbc6b7 100644
--- a/libstdc++-v3/testsuite/ext/unicode/view.cc
+++ b/libstdc++-v3/testsuite/ext/unicode/view.cc
@@ -85,6 +85,35 @@ test_illformed_utf32()
   VERIFY( std::ranges::equal(uc::_Utf32_view(s), U"\uFFFD"sv) );
 }
 
+constexpr void
+test_past_the_end()
+{
+  const auto s8 = u8"1234"sv;
+  uc::_Utf32_view v(s8);
+  auto iter = v.begin();
+  std::advance(iter, 4);
+  VERIFY( iter == v.end() );
+  // Incrementing past the end has well-defined behaviour.
+  ++iter;
+  VERIFY( iter == v.end() );
+  VERIFY( *iter == U'4' ); // Still dereferenceable.
+  ++iter;
+  VERIFY( iter == v.end() );
+  VERIFY( *iter == U'4' );
+  iter++;
+  VERIFY( iter == v.end() );
+  VERIFY( *iter == U'4' );
+
+  std::string_view empty;
+  uc::_Utf32_view v2(empty);
+  auto iter2 = v2.begin();
+  VERIFY( iter2 == v2.end() );
+  VERIFY( *iter2 == U'\0' );
+  iter++;
+  VERIFY( iter2 == v2.end() );
+  VERIFY( *iter2 == U'\0' );
+}
+
 int main()
 {
   auto run_tests = []{
@@ -94,6 +123,7 @@ int main()
     test_illformed_utf8();
     test_illformed_utf16();
     test_illformed_utf32();
+    test_past_the_end();
     return true;
   };

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2024-01-09 23:44 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-01-09 23:44 [gcc r14-7082] libstdc++: Fix Unicode property detection functions Jonathan Wakely

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).