public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
From: Joseph Myers <jsm28@gcc.gnu.org>
To: gcc-cvs@gcc.gnu.org
Subject: [gcc r13-3310] preprocessor: C2x identifier rules
Date: Fri, 14 Oct 2022 23:09:12 +0000 (GMT)	[thread overview]
Message-ID: <20221014230912.52F683858D38@sourceware.org> (raw)

https://gcc.gnu.org/g:36d20fa4a83d1a294462c2622ca76eac93465c2c

commit r13-3310-g36d20fa4a83d1a294462c2622ca76eac93465c2c
Author: Joseph Myers <joseph@codesourcery.com>
Date:   Fri Oct 14 23:07:50 2022 +0000

    preprocessor: C2x identifier rules
    
    C2x has, like C++, adopted rules for identifiers based directly on an
    unversioned normative reference to Unicode.  Make libcpp follow those
    rules for c2x / gnu2x standards (this involves bringing back a flag
    separate from the C++ one for whether to use these identifier rules,
    but this time enabled for all C++ language versions since that was the
    conclusion adopted for C++ identifier handling).
    
    There is one change here that affects C++.  I believe the new
    normative requirement for NFC only applies to identifiers, not to the
    use of identifier-continue characters in pp-numbers, where there is no
    such requirement and so the diagnostic ought to be a warning not a
    pedwarn in pp-numbers, and that this is the case for both C and C++.
    
    Bootstrapped with no regressions for x86_64-pc-linux-gnu.
    
    libcpp/
            * charset.cc (ucn_valid_in_identifier): Check xid_identifiers not
            cplusplus to determine whether to use CXX23 and NXX23 flags.
            * include/cpplib.h (struct cpp_options): Add xid_identifiers.
            * init.cc (struct lang_flags, lang_defaults): Add xid_identifiers.
            (cpp_set_lang): Set xid_identifiers.
            * lex.cc (warn_about_normalization): Add parameter identifier.
            Only pedwarn about non-NFC for identifiers, not pp-numbers.
            (_cpp_lex_direct): Update calls to warn_about_normalization.
    
    gcc/testsuite/
            * gcc.dg/cpp/c2x-ucnid-1-utf8.c, gcc.dg/cpp/c2x-ucnid-1.c: New
            tests.

Diff:
---
 gcc/testsuite/gcc.dg/cpp/c2x-ucnid-1-utf8.c | 13 ++++++++
 gcc/testsuite/gcc.dg/cpp/c2x-ucnid-1.c      | 13 ++++++++
 libcpp/charset.cc                           |  4 +--
 libcpp/include/cpplib.h                     |  4 +++
 libcpp/init.cc                              | 52 +++++++++++++++--------------
 libcpp/lex.cc                               | 13 ++++----
 6 files changed, 66 insertions(+), 33 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/cpp/c2x-ucnid-1-utf8.c b/gcc/testsuite/gcc.dg/cpp/c2x-ucnid-1-utf8.c
new file mode 100644
index 00000000000..55d22819563
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/cpp/c2x-ucnid-1-utf8.c
@@ -0,0 +1,13 @@
+/* Test C2x (= Unicode) rules for characters in identifiers.  */
+/* { dg-do preprocess } */
+/* { dg-options "-std=c2x -pedantic-errors" } */
+
+¨
+
+/* The requirement for NFC only applies in identifiers, not pp-numbers.  */
+
+À /* { dg-error "not in NFC" } */
+ÿÀ /* { dg-error "not in NFC" } */
+
+0À /* { dg-warning "not in NFC" } */
+.1À /* { dg-warning "not in NFC" } */
diff --git a/gcc/testsuite/gcc.dg/cpp/c2x-ucnid-1.c b/gcc/testsuite/gcc.dg/cpp/c2x-ucnid-1.c
new file mode 100644
index 00000000000..f9fdbea6ece
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/cpp/c2x-ucnid-1.c
@@ -0,0 +1,13 @@
+/* Test C2x (= Unicode) rules for characters in identifiers.  */
+/* { dg-do preprocess } */
+/* { dg-options "-std=c2x -pedantic-errors" } */
+
+\u00A8 /* { dg-error "is not valid in an identifier" } */
+
+/* The requirement for NFC only applies in identifiers, not pp-numbers.  */
+
+A\u0300 /* { dg-error "not in NFC" } */
+\u00ffA\u0300 /* { dg-error "not in NFC" } */
+
+0A\u0300 /* { dg-warning "not in NFC" } */
+.1A\u0300 /* { dg-warning "not in NFC" } */
diff --git a/libcpp/charset.cc b/libcpp/charset.cc
index 6834969a919..12a398e7527 100644
--- a/libcpp/charset.cc
+++ b/libcpp/charset.cc
@@ -1291,7 +1291,7 @@ ucn_valid_in_identifier (cpp_reader *pfile, cppchar_t c,
   valid_flags = C99 | CXX | C11 | CXX23;
   if (CPP_PEDANTIC (pfile))
     {
-      if (CPP_OPTION (pfile, cplusplus))
+      if (CPP_OPTION (pfile, xid_identifiers))
 	valid_flags = CXX23;
       else if (CPP_OPTION (pfile, c11_identifiers))
 	valid_flags = C11;
@@ -1355,7 +1355,7 @@ ucn_valid_in_identifier (cpp_reader *pfile, cppchar_t c,
       return 2;
     }
 
-  if (CPP_OPTION (pfile, cplusplus))
+  if (CPP_OPTION (pfile, xid_identifiers))
     invalid_start_flags = NXX23;
   else if (CPP_OPTION (pfile, c11_identifiers))
     invalid_start_flags = N11;
diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h
index e97993e04bc..d5ef12a30ea 100644
--- a/libcpp/include/cpplib.h
+++ b/libcpp/include/cpplib.h
@@ -496,6 +496,10 @@ struct cpp_options
      in C11.  */
   unsigned char c11_identifiers;
 
+  /* Nonzero means extended identifiers allow the characters specified
+     by Unicode XID_Start and XID_Continue properties.  */
+  unsigned char xid_identifiers;
+
   /* Nonzero for C++ 2014 Standard binary constants.  */
   unsigned char binary_constants;
 
diff --git a/libcpp/init.cc b/libcpp/init.cc
index d3b4f00994b..5f34e3515d2 100644
--- a/libcpp/init.cc
+++ b/libcpp/init.cc
@@ -82,6 +82,7 @@ struct lang_flags
   char extended_numbers;
   char extended_identifiers;
   char c11_identifiers;
+  char xid_identifiers;
   char std;
   char digraphs;
   char uliterals;
@@ -102,31 +103,31 @@ struct lang_flags
 };
 
 static const struct lang_flags lang_defaults[] =
-{ /*              c99 c++ xnum xid c11 std digr ulit rlit udlit bincst digsep trig u8chlit vaopt scope dfp szlit elifdef warndir delim trufal */
-  /* GNUC89   */  { 0,  0,  1,  0,  0,  0,  1,   0,   0,   0,    0,     0,     0,   0,      1,   1,     0,   0,   0,      0,      0,    0 },
-  /* GNUC99   */  { 1,  0,  1,  1,  0,  0,  1,   1,   1,   0,    0,     0,     0,   0,      1,   1,     0,   0,   0,      0,      0,    0 },
-  /* GNUC11   */  { 1,  0,  1,  1,  1,  0,  1,   1,   1,   0,    0,     0,     0,   0,      1,   1,     0,   0,   0,      0,      0,    0 },
-  /* GNUC17   */  { 1,  0,  1,  1,  1,  0,  1,   1,   1,   0,    0,     0,     0,   0,      1,   1,     0,   0,   0,      0,      0,    0 },
-  /* GNUC2X   */  { 1,  0,  1,  1,  1,  0,  1,   1,   1,   0,    1,     1,     0,   1,      1,   1,     1,   0,   1,      1,      0,    1 },
-  /* STDC89   */  { 0,  0,  0,  0,  0,  1,  0,   0,   0,   0,    0,     0,     1,   0,      0,   0,     0,   0,   0,      0,      0,    0 },
-  /* STDC94   */  { 0,  0,  0,  0,  0,  1,  1,   0,   0,   0,    0,     0,     1,   0,      0,   0,     0,   0,   0,      0,      0,    0 },
-  /* STDC99   */  { 1,  0,  1,  1,  0,  1,  1,   0,   0,   0,    0,     0,     1,   0,      0,   0,     0,   0,   0,      0,      0,    0 },
-  /* STDC11   */  { 1,  0,  1,  1,  1,  1,  1,   1,   0,   0,    0,     0,     1,   0,      0,   0,     0,   0,   0,      0,      0,    0 },
-  /* STDC17   */  { 1,  0,  1,  1,  1,  1,  1,   1,   0,   0,    0,     0,     1,   0,      0,   0,     0,   0,   0,      0,      0,    0 },
-  /* STDC2X   */  { 1,  0,  1,  1,  1,  1,  1,   1,   0,   0,    1,     1,     0,   1,      0,   1,     1,   0,   1,      1,      0,    1 },
-  /* GNUCXX   */  { 0,  1,  1,  1,  0,  0,  1,   0,   0,   0,    0,     0,     0,   0,      1,   1,     0,   0,   0,      0,      0,    1 },
-  /* CXX98    */  { 0,  1,  0,  1,  0,  1,  1,   0,   0,   0,    0,     0,     1,   0,      0,   1,     0,   0,   0,      0,      0,    1 },
-  /* GNUCXX11 */  { 1,  1,  1,  1,  1,  0,  1,   1,   1,   1,    0,     0,     0,   0,      1,   1,     0,   0,   0,      0,      0,    1 },
-  /* CXX11    */  { 1,  1,  0,  1,  1,  1,  1,   1,   1,   1,    0,     0,     1,   0,      0,   1,     0,   0,   0,      0,      0,    1 },
-  /* GNUCXX14 */  { 1,  1,  1,  1,  1,  0,  1,   1,   1,   1,    1,     1,     0,   0,      1,   1,     0,   0,   0,      0,      0,    1 },
-  /* CXX14    */  { 1,  1,  0,  1,  1,  1,  1,   1,   1,   1,    1,     1,     1,   0,      0,   1,     0,   0,   0,      0,      0,    1 },
-  /* GNUCXX17 */  { 1,  1,  1,  1,  1,  0,  1,   1,   1,   1,    1,     1,     0,   1,      1,   1,     0,   0,   0,      0,      0,    1 },
-  /* CXX17    */  { 1,  1,  1,  1,  1,  1,  1,   1,   1,   1,    1,     1,     0,   1,      0,   1,     0,   0,   0,      0,      0,    1 },
-  /* GNUCXX20 */  { 1,  1,  1,  1,  1,  0,  1,   1,   1,   1,    1,     1,     0,   1,      1,   1,     0,   0,   0,      0,      0,    1 },
-  /* CXX20    */  { 1,  1,  1,  1,  1,  1,  1,   1,   1,   1,    1,     1,     0,   1,      1,   1,     0,   0,   0,      0,      0,    1 },
-  /* GNUCXX23 */  { 1,  1,  1,  1,  1,  0,  1,   1,   1,   1,    1,     1,     0,   1,      1,   1,     0,   1,   1,      1,      1,    1 },
-  /* CXX23    */  { 1,  1,  1,  1,  1,  1,  1,   1,   1,   1,    1,     1,     0,   1,      1,   1,     0,   1,   1,      1,      1,    1 },
-  /* ASM      */  { 0,  0,  1,  0,  0,  0,  0,   0,   0,   0,    0,     0,     0,   0,      0,   0,     0,   0,   0,      0,      0,    0 }
+{ /*              c99 c++ xnum xid c11 xidid std digr ulit rlit udlit bincst digsep trig u8chlit vaopt scope dfp szlit elifdef warndir delim trufal */
+  /* GNUC89   */  { 0,  0,  1,  0,  0,  0,    0,  1,   0,   0,   0,    0,     0,     0,   0,      1,   1,     0,   0,   0,      0,      0,    0 },
+  /* GNUC99   */  { 1,  0,  1,  1,  0,  0,    0,  1,   1,   1,   0,    0,     0,     0,   0,      1,   1,     0,   0,   0,      0,      0,    0 },
+  /* GNUC11   */  { 1,  0,  1,  1,  1,  0,    0,  1,   1,   1,   0,    0,     0,     0,   0,      1,   1,     0,   0,   0,      0,      0,    0 },
+  /* GNUC17   */  { 1,  0,  1,  1,  1,  0,    0,  1,   1,   1,   0,    0,     0,     0,   0,      1,   1,     0,   0,   0,      0,      0,    0 },
+  /* GNUC2X   */  { 1,  0,  1,  1,  1,  1,    0,  1,   1,   1,   0,    1,     1,     0,   1,      1,   1,     1,   0,   1,      1,      0,    1 },
+  /* STDC89   */  { 0,  0,  0,  0,  0,  0,    1,  0,   0,   0,   0,    0,     0,     1,   0,      0,   0,     0,   0,   0,      0,      0,    0 },
+  /* STDC94   */  { 0,  0,  0,  0,  0,  0,    1,  1,   0,   0,   0,    0,     0,     1,   0,      0,   0,     0,   0,   0,      0,      0,    0 },
+  /* STDC99   */  { 1,  0,  1,  1,  0,  0,    1,  1,   0,   0,   0,    0,     0,     1,   0,      0,   0,     0,   0,   0,      0,      0,    0 },
+  /* STDC11   */  { 1,  0,  1,  1,  1,  0,    1,  1,   1,   0,   0,    0,     0,     1,   0,      0,   0,     0,   0,   0,      0,      0,    0 },
+  /* STDC17   */  { 1,  0,  1,  1,  1,  0,    1,  1,   1,   0,   0,    0,     0,     1,   0,      0,   0,     0,   0,   0,      0,      0,    0 },
+  /* STDC2X   */  { 1,  0,  1,  1,  1,  1,    1,  1,   1,   0,   0,    1,     1,     0,   1,      0,   1,     1,   0,   1,      1,      0,    1 },
+  /* GNUCXX   */  { 0,  1,  1,  1,  0,  1,    0,  1,   0,   0,   0,    0,     0,     0,   0,      1,   1,     0,   0,   0,      0,      0,    1 },
+  /* CXX98    */  { 0,  1,  0,  1,  0,  1,    1,  1,   0,   0,   0,    0,     0,     1,   0,      0,   1,     0,   0,   0,      0,      0,    1 },
+  /* GNUCXX11 */  { 1,  1,  1,  1,  1,  1,    0,  1,   1,   1,   1,    0,     0,     0,   0,      1,   1,     0,   0,   0,      0,      0,    1 },
+  /* CXX11    */  { 1,  1,  0,  1,  1,  1,    1,  1,   1,   1,   1,    0,     0,     1,   0,      0,   1,     0,   0,   0,      0,      0,    1 },
+  /* GNUCXX14 */  { 1,  1,  1,  1,  1,  1,    0,  1,   1,   1,   1,    1,     1,     0,   0,      1,   1,     0,   0,   0,      0,      0,    1 },
+  /* CXX14    */  { 1,  1,  0,  1,  1,  1,    1,  1,   1,   1,   1,    1,     1,     1,   0,      0,   1,     0,   0,   0,      0,      0,    1 },
+  /* GNUCXX17 */  { 1,  1,  1,  1,  1,  1,    0,  1,   1,   1,   1,    1,     1,     0,   1,      1,   1,     0,   0,   0,      0,      0,    1 },
+  /* CXX17    */  { 1,  1,  1,  1,  1,  1,    1,  1,   1,   1,   1,    1,     1,     0,   1,      0,   1,     0,   0,   0,      0,      0,    1 },
+  /* GNUCXX20 */  { 1,  1,  1,  1,  1,  1,    0,  1,   1,   1,   1,    1,     1,     0,   1,      1,   1,     0,   0,   0,      0,      0,    1 },
+  /* CXX20    */  { 1,  1,  1,  1,  1,  1,    1,  1,   1,   1,   1,    1,     1,     0,   1,      1,   1,     0,   0,   0,      0,      0,    1 },
+  /* GNUCXX23 */  { 1,  1,  1,  1,  1,  1,    0,  1,   1,   1,   1,    1,     1,     0,   1,      1,   1,     0,   1,   1,      1,      1,    1 },
+  /* CXX23    */  { 1,  1,  1,  1,  1,  1,    1,  1,   1,   1,   1,    1,     1,     0,   1,      1,   1,     0,   1,   1,      1,      1,    1 },
+  /* ASM      */  { 0,  0,  1,  0,  0,  0,    0,  0,   0,   0,   0,    0,     0,     0,   0,      0,   0,     0,   0,   0,      0,      0,    0 }
 };
 
 /* Sets internal flags correctly for a given language.  */
@@ -142,6 +143,7 @@ cpp_set_lang (cpp_reader *pfile, enum c_lang lang)
   CPP_OPTION (pfile, extended_numbers)		 = l->extended_numbers;
   CPP_OPTION (pfile, extended_identifiers)	 = l->extended_identifiers;
   CPP_OPTION (pfile, c11_identifiers)		 = l->c11_identifiers;
+  CPP_OPTION (pfile, xid_identifiers)		 = l->xid_identifiers;
   CPP_OPTION (pfile, std)			 = l->std;
   CPP_OPTION (pfile, digraphs)			 = l->digraphs;
   CPP_OPTION (pfile, uliterals)			 = l->uliterals;
diff --git a/libcpp/lex.cc b/libcpp/lex.cc
index a429a3d44ce..cc12a52d282 100644
--- a/libcpp/lex.cc
+++ b/libcpp/lex.cc
@@ -2007,7 +2007,8 @@ name_p (cpp_reader *pfile, const cpp_string *string)
 static void
 warn_about_normalization (cpp_reader *pfile, 
 			  const cpp_token *token,
-			  const struct normalize_state *s)
+			  const struct normalize_state *s,
+			  bool identifier)
 {
   if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s)
       && !pfile->state.skipping)
@@ -2043,7 +2044,7 @@ warn_about_normalization (cpp_reader *pfile,
       if (NORMALIZE_STATE_RESULT (s) == normalized_C)
 	cpp_warning_at (pfile, CPP_W_NORMALIZE, &rich_loc,
 			"`%.*s' is not in NFKC", (int) sz, buf);
-      else if (CPP_OPTION (pfile, cplusplus))
+      else if (identifier && CPP_OPTION (pfile, xid_identifiers))
 	cpp_pedwarning_at (pfile, CPP_W_NORMALIZE, &rich_loc,
 				  "`%.*s' is not in NFC", (int) sz, buf);
       else
@@ -3839,7 +3840,7 @@ _cpp_lex_direct (cpp_reader *pfile)
 	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
 	result->type = CPP_NUMBER;
 	lex_number (pfile, &result->val.str, &nst);
-	warn_about_normalization (pfile, result, &nst);
+	warn_about_normalization (pfile, result, &nst, false);
 	break;
       }
 
@@ -3888,7 +3889,7 @@ _cpp_lex_direct (cpp_reader *pfile)
 	result->val.node.node = lex_identifier (pfile, buffer->cur - 1, false,
 						&nst,
 						&result->val.node.spelling);
-	warn_about_normalization (pfile, result, &nst);
+	warn_about_normalization (pfile, result, &nst, true);
       }
 
       /* Convert named operators to their proper types.  */
@@ -4101,7 +4102,7 @@ _cpp_lex_direct (cpp_reader *pfile)
 	  struct normalize_state nst = INITIAL_NORMALIZE_STATE;
 	  result->type = CPP_NUMBER;
 	  lex_number (pfile, &result->val.str, &nst);
-	  warn_about_normalization (pfile, result, &nst);
+	  warn_about_normalization (pfile, result, &nst, false);
 	}
       else if (*buffer->cur == '.' && buffer->cur[1] == '.')
 	buffer->cur += 2, result->type = CPP_ELLIPSIS;
@@ -4192,7 +4193,7 @@ _cpp_lex_direct (cpp_reader *pfile)
 	    result->type = CPP_NAME;
 	    result->val.node.node = lex_identifier (pfile, base, true, &nst,
 						    &result->val.node.spelling);
-	    warn_about_normalization (pfile, result, &nst);
+	    warn_about_normalization (pfile, result, &nst, true);
 	    break;
 	  }

                 reply	other threads:[~2022-10-14 23:09 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20221014230912.52F683858D38@sourceware.org \
    --to=jsm28@gcc.gnu.org \
    --cc=gcc-cvs@gcc.gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).