public inbox for gcc-bugs@sourceware.org help / color / mirror / Atom feed
From: "jakub at gcc dot gnu.org" <gcc-bugzilla@gcc.gnu.org> To: gcc-bugs@gcc.gnu.org Subject: [Bug c++/100977] [C++23] Implement C++ Identifier Syntax using Unicode Standard Annex 31 Date: Wed, 04 Aug 2021 16:14:50 +0000 [thread overview] Message-ID: <bug-100977-4-9xdw2hjG5w@http.gcc.gnu.org/bugzilla/> (raw) In-Reply-To: <bug-100977-4@http.gcc.gnu.org/bugzilla/> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100977 --- Comment #3 from Jakub Jelinek <jakub at gcc dot gnu.org> --- Incrementally, here is a makeucnid.c patch to also emit CXX23 and NXX23 flags (CXX23 for valid as C++23 identifier and NXX23 for valid as C++23 identifier but not as the first character), but doesn't contain changes to actually handle it on the libcpp side. --- libcpp/makeucnid.c.jj 2021-08-04 17:35:35.995944075 +0200 +++ libcpp/makeucnid.c 2021-08-04 18:13:56.399062234 +0200 @@ -17,7 +17,7 @@ along with this program; see the file CO /* Run this program as ./makeucnid ucnid.tab UnicodeData.txt DerivedNormalizationProps.txt \ - > ucnid.h + DerivedCoreProperties.txt > ucnid.h */ #include <stdio.h> @@ -32,10 +32,12 @@ enum { N99 = 4, C11 = 8, N11 = 16, - all_languages = C99 | CXX | C11, - not_NFC = 32, - not_NFKC = 64, - maybe_not_NFC = 128 + CXX23 = 32, + NXX23 = 64, + all_languages = C99 | CXX | C11 | CXX23 | NXX23, + not_NFC = 128, + not_NFKC = 256, + maybe_not_NFC = 512 }; #define NUM_CODE_POINTS 0x110000 @@ -241,6 +243,74 @@ read_derived (const char *fname) fclose (f); } +/* Read DerivedCoreProperties.txt and fill in languages version in + flags from the XID_Start and XID_Continue properties. */ + +static void +read_derivedcore (char *fname) +{ + FILE * f = fopen (fname, "r"); + + if (!f) + fail ("opening DerivedCoreProperties.txt"); + for (;;) + { + char line[256]; + unsigned long codepoint_start, codepoint_end; + char *l; + int i, j; + + if (!fgets (line, sizeof (line), f)) + break; + if (line[0] == '#' || line[0] == '\n' || line[0] == '\r') + continue; + codepoint_start = strtoul (line, &l, 16); + if (l == line) + fail ("parsing DerivedCoreProperties.txt, reading code point"); + if (codepoint_start > MAX_CODE_POINT) + fail ("parsing DerivedCoreProperties.txt, code point too large"); + + if (*l == '.' && l[1] == '.') + { + char *l2 = l + 2; + codepoint_end = strtoul (l + 2, &l, 16); + if (l == l2 || codepoint_end < codepoint_start) + fail ("parsing DerivedCoreProperties.txt, reading code point"); + if (codepoint_end > MAX_CODE_POINT) + fail ("parsing DerivedCoreProperties.txt, code point too large"); + } + else + codepoint_end = codepoint_start; + + while (*l == ' ') + l++; + if (*l++ != ';') + fail ("parsing DerivedCoreProperties.txt, reading code point"); + + while (*l == ' ') + l++; + + if (codepoint_end < 0x80) + continue; + + if (strncmp (l, "XID_Start ", 10) == 0) + { + for (; codepoint_start <= codepoint_end; codepoint_start++) + flags[codepoint_start] + = (flags[codepoint_start] | CXX23) & ~NXX23; + } + else if (strncmp (l, "XID_Continue ", 13) == 0) + { + for (; codepoint_start <= codepoint_end; codepoint_start++) + if ((flags[codepoint_start] & CXX23) == 0) + flags[codepoint_start] |= CXX23 | NXX23; + } + } + if (ferror (f)) + fail ("reading DerivedCoreProperties.txt"); + fclose (f); +} + /* Write out the table. The table consists of two words per entry. The first word is the flags for the unicode code points up to and including the second word. */ @@ -261,12 +331,14 @@ write_table (void) || really_safe != (decomp[i][0] == 0) || combining_value[i] != last_combine) { - printf ("{ %s|%s|%s|%s|%s|%s|%s|%s|%s, %3d, %#06x },\n", + printf ("{ %s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s, %3d, %#06x },\n", last_flag & C99 ? "C99" : " 0", last_flag & N99 ? "N99" : " 0", last_flag & CXX ? "CXX" : " 0", last_flag & C11 ? "C11" : " 0", last_flag & N11 ? "N11" : " 0", + last_flag & CXX23 ? "CXX23" : " 0", + last_flag & NXX23 ? "NXX23" : " 0", really_safe ? "CID" : " 0", last_flag & not_NFC ? " 0" : "NFC", last_flag & not_NFKC ? " 0" : "NKC", @@ -439,11 +511,12 @@ write_copyright (void) int main(int argc, char ** argv) { - if (argc != 4) + if (argc != 5) fail ("too few arguments to makeucn"); read_ucnid (argv[1]); read_table (argv[2]); read_derived (argv[3]); + read_derivedcore (argv[4]); write_copyright (); write_table ();
next prev parent reply other threads:[~2021-08-04 16:14 UTC|newest] Thread overview: 17+ messages / expand[flat|nested] mbox.gz Atom feed top 2021-06-08 18:15 [Bug c++/100977] New: " jason at gcc dot gnu.org 2021-06-08 18:19 ` [Bug c++/100977] " mpolacek at gcc dot gnu.org 2021-08-04 13:39 ` jakub at gcc dot gnu.org 2021-08-04 14:08 ` jakub at gcc dot gnu.org 2021-08-04 16:14 ` jakub at gcc dot gnu.org [this message] 2021-08-04 18:34 ` joseph at codesourcery dot com 2021-08-04 18:40 ` jakub at gcc dot gnu.org 2021-08-04 19:06 ` ubizjak at gmail dot com 2021-08-04 19:20 ` jakub at gcc dot gnu.org 2021-08-04 19:25 ` ubizjak at gmail dot com 2021-08-05 10:17 ` jakub at gcc dot gnu.org 2021-08-05 15:34 ` cvs-commit at gcc dot gnu.org 2021-08-05 15:35 ` cvs-commit at gcc dot gnu.org 2021-09-01 20:37 ` cvs-commit at gcc dot gnu.org 2021-09-01 20:38 ` jakub at gcc dot gnu.org 2021-11-30 8:51 ` cvs-commit at gcc dot gnu.org 2021-12-01 9:22 ` cvs-commit at gcc dot gnu.org
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=bug-100977-4-9xdw2hjG5w@http.gcc.gnu.org/bugzilla/ \ --to=gcc-bugzilla@gcc.gnu.org \ --cc=gcc-bugs@gcc.gnu.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).