--- libcpp/charset.cc.jj 2022-08-21 19:03:48.747215419 +0200 +++ libcpp/charset.cc 2022-08-21 19:39:04.717421624 +0200 @@ -944,6 +944,7 @@ struct uname2c_data { char *canon_name; char prev_char; + char *spellcheck; }; /* Map NAME, a Unicode character name or correction/control/alternate @@ -1006,6 +1007,11 @@ _cpp_uname2c (const char *name, size_t l ret = memcmp (name, key, len > key_len ? key_len : len); len_adj = key_len; } + else if (data->spellcheck) + { + ret = 0; + len_adj = 0; + } else { const char *p = name, *q = key; @@ -1076,7 +1082,54 @@ _cpp_uname2c (const char *name, size_t l { name += len_adj; len -= len_adj; - if (codepoint == 0xd800) + if (__builtin_expect (data != NULL, 0) && data->spellcheck) + { + memcpy (data->canon_name, key, key_len); + if (codepoint == 0xd800) + { + /* NR1 - Hangul syllables. */ + size_t i, j, k, s1 = hangul_count[0] + hangul_count[1]; + size_t s2 = s1 + hangul_count[2]; + + for (i = 0; i < (size_t) hangul_count[0]; ++i) + { + strcpy (data->canon_name + key_len, + hangul_syllables[i]); + char *p1 = strchr (data->canon_name + key_len, '\0'); + for (j = hangul_count[0]; j < s1; ++j) + { + strcpy (p1, hangul_syllables[j]); + char *p2 = strchr (p1, '\0'); + for (k = s1; k < s2; ++k) + { + strcpy (p2, hangul_syllables[k]); + /* In place of get_edit_distance etc. */ + fprintf (stderr, "%s\n", data->spellcheck); + } + } + } + } + else + { + /* NR2 - prefix followed by hexadecimal codepoint. */ + const cppchar_t *p + = (uname2c_pairs + + uname2c_generated[codepoint - 0xd800]); + for (; *p; p += 2) + { + cppchar_t c; + + for (c = p[0]; c <= p[1]; ++c) + { + sprintf (data->canon_name + key_len, "%lX", + (long) c); + /* In place of get_edit_distance etc. */ + fprintf (stderr, "%s\n", data->spellcheck); + } + } + } + } + else if (codepoint == 0xd800) { /* NR1 - Hangul syllables. */ size_t start = 0, end, i, j; @@ -1158,7 +1211,17 @@ _cpp_uname2c (const char *name, size_t l } else if (__builtin_expect (data != NULL, 0)) { - if (len == len_adj) + if (data->spellcheck) + { + if (codepoint != (cppchar_t) -1) + { + memcpy (data->canon_name, key, key_len); + data->canon_name[key_len] = '\0'; + /* In place of get_edit_distance etc. */ + fprintf (stderr, "%s\n", data->spellcheck); + } + } + else if (len == len_adj) { memcpy (data->canon_name, key, key_len); data->canon_name[key_len] = '\0'; @@ -1490,6 +1553,7 @@ _cpp_valid_ucn (cpp_reader *pfile, const struct uname2c_data data; data.canon_name = canon_name; data.prev_char = ' '; + data.spellcheck = NULL; /* Hangul Jungseong O- E after UAX44-LM2 should be HANGULJUNGSEONGO-E and so should match U+1180. */