[PATCH] libcpp: Implement C++23 P2290R3 - Delimited escape sequences [PR106645]

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

* [PATCH] libcpp: Implement C++23 P2290R3 - Delimited escape sequences [PR106645]
@ 2022-08-17  7:17 Jakub Jelinek
  2022-08-17 20:47 ` Jason Merrill
  0 siblings, 1 reply; 6+ messages in thread
From: Jakub Jelinek @ 2022-08-17  7:17 UTC (permalink / raw)
  To: Jason Merrill, Marek Polacek, Joseph S. Myers; +Cc: gcc-patches

Hi!

The following patch implements the C++23 P2290R3 paper.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2022-08-17  Jakub Jelinek  <jakub@redhat.com>

	PR c++/106645
libcpp/
	* include/cpplib.h (struct cpp_options): Implement
	P2290R3 - Delimited escape sequences.  Add delimite_escape_seqs
	member.
	* init.cc (struct lang_flags): Likewise.
	(lang_defaults): Add delim column.
	(cpp_set_lang): Copy over delimite_escape_seqs.
	* charset.cc (_cpp_valid_ucn): Handle delimited escape sequences.
	(convert_hex): Likewise.
	(convert_oct): Likewise.
	(convert_escape): Call convert_oct even for \o.
	(_cpp_interpret_identifier): Handle delimited escape sequences.
	* lex.cc (get_bidi_ucn_1): Likewise.  Add end argument, fill it in.
	(get_bidi_ucn): Adjust get_bidi_ucn_1 caller.  Use end argument to
	compute num_bytes.
gcc/testsuite/
	* c-c++-common/cpp/delimited-escape-seq-1.c: New test.
	* c-c++-common/cpp/delimited-escape-seq-2.c: New test.
	* c-c++-common/cpp/delimited-escape-seq-3.c: New test.
	* c-c++-common/Wbidi-chars-24.c: New test.
	* gcc.dg/cpp/delimited-escape-seq-1.c: New test.
	* gcc.dg/cpp/delimited-escape-seq-2.c: New test.
	* g++.dg/cpp/delimited-escape-seq-1.C: New test.
	* g++.dg/cpp/delimited-escape-seq-2.C: New test.

--- libcpp/include/cpplib.h.jj	2022-08-10 09:06:53.268209449 +0200
+++ libcpp/include/cpplib.h	2022-08-15 19:32:53.743213474 +0200
@@ -519,6 +519,9 @@ struct cpp_options
   /* Nonzero for C++23 size_t literals.  */
   unsigned char size_t_literals;
 
+  /* Nonzero for C++23 delimited escape sequences.  */
+  unsigned char delimited_escape_seqs;
+
   /* Holds the name of the target (execution) character set.  */
   const char *narrow_charset;
 
--- libcpp/init.cc.jj	2022-08-10 09:06:53.268209449 +0200
+++ libcpp/init.cc	2022-08-15 16:09:01.403020485 +0200
@@ -96,34 +96,35 @@ struct lang_flags
   char dfp_constants;
   char size_t_literals;
   char elifdef;
+  char delimited_escape_seqs;
 };
 
 static const struct lang_flags lang_defaults[] =
-{ /*              c99 c++ xnum xid c11 std digr ulit rlit udlit bincst digsep trig u8chlit vaopt scope dfp szlit elifdef */
-  /* GNUC89   */  { 0,  0,  1,  0,  0,  0,  1,   0,   0,   0,    0,     0,     0,   0,      1,   1,     0,   0,   0 },
-  /* GNUC99   */  { 1,  0,  1,  1,  0,  0,  1,   1,   1,   0,    0,     0,     0,   0,      1,   1,     0,   0,   0 },
-  /* GNUC11   */  { 1,  0,  1,  1,  1,  0,  1,   1,   1,   0,    0,     0,     0,   0,      1,   1,     0,   0,   0 },
-  /* GNUC17   */  { 1,  0,  1,  1,  1,  0,  1,   1,   1,   0,    0,     0,     0,   0,      1,   1,     0,   0,   0 },
-  /* GNUC2X   */  { 1,  0,  1,  1,  1,  0,  1,   1,   1,   0,    1,     1,     0,   1,      1,   1,     1,   0,   1 },
-  /* STDC89   */  { 0,  0,  0,  0,  0,  1,  0,   0,   0,   0,    0,     0,     1,   0,      0,   0,     0,   0,   0 },
-  /* STDC94   */  { 0,  0,  0,  0,  0,  1,  1,   0,   0,   0,    0,     0,     1,   0,      0,   0,     0,   0,   0 },
-  /* STDC99   */  { 1,  0,  1,  1,  0,  1,  1,   0,   0,   0,    0,     0,     1,   0,      0,   0,     0,   0,   0 },
-  /* STDC11   */  { 1,  0,  1,  1,  1,  1,  1,   1,   0,   0,    0,     0,     1,   0,      0,   0,     0,   0,   0 },
-  /* STDC17   */  { 1,  0,  1,  1,  1,  1,  1,   1,   0,   0,    0,     0,     1,   0,      0,   0,     0,   0,   0 },
-  /* STDC2X   */  { 1,  0,  1,  1,  1,  1,  1,   1,   0,   0,    1,     1,     1,   1,      0,   1,     1,   0,   1 },
-  /* GNUCXX   */  { 0,  1,  1,  1,  0,  0,  1,   0,   0,   0,    0,     0,     0,   0,      1,   1,     0,   0,   0 },
-  /* CXX98    */  { 0,  1,  0,  1,  0,  1,  1,   0,   0,   0,    0,     0,     1,   0,      0,   1,     0,   0,   0 },
-  /* GNUCXX11 */  { 1,  1,  1,  1,  1,  0,  1,   1,   1,   1,    0,     0,     0,   0,      1,   1,     0,   0,   0 },
-  /* CXX11    */  { 1,  1,  0,  1,  1,  1,  1,   1,   1,   1,    0,     0,     1,   0,      0,   1,     0,   0,   0 },
-  /* GNUCXX14 */  { 1,  1,  1,  1,  1,  0,  1,   1,   1,   1,    1,     1,     0,   0,      1,   1,     0,   0,   0 },
-  /* CXX14    */  { 1,  1,  0,  1,  1,  1,  1,   1,   1,   1,    1,     1,     1,   0,      0,   1,     0,   0,   0 },
-  /* GNUCXX17 */  { 1,  1,  1,  1,  1,  0,  1,   1,   1,   1,    1,     1,     0,   1,      1,   1,     0,   0,   0 },
-  /* CXX17    */  { 1,  1,  1,  1,  1,  1,  1,   1,   1,   1,    1,     1,     0,   1,      0,   1,     0,   0,   0 },
-  /* GNUCXX20 */  { 1,  1,  1,  1,  1,  0,  1,   1,   1,   1,    1,     1,     0,   1,      1,   1,     0,   0,   0 },
-  /* CXX20    */  { 1,  1,  1,  1,  1,  1,  1,   1,   1,   1,    1,     1,     0,   1,      1,   1,     0,   0,   0 },
-  /* GNUCXX23 */  { 1,  1,  1,  1,  1,  0,  1,   1,   1,   1,    1,     1,     0,   1,      1,   1,     0,   1,   1 },
-  /* CXX23    */  { 1,  1,  1,  1,  1,  1,  1,   1,   1,   1,    1,     1,     0,   1,      1,   1,     0,   1,   1 },
-  /* ASM      */  { 0,  0,  1,  0,  0,  0,  0,   0,   0,   0,    0,     0,     0,   0,      0,   0,     0,   0,   0 }
+{ /*              c99 c++ xnum xid c11 std digr ulit rlit udlit bincst digsep trig u8chlit vaopt scope dfp szlit elifdef delim */
+  /* GNUC89   */  { 0,  0,  1,  0,  0,  0,  1,   0,   0,   0,    0,     0,     0,   0,      1,   1,     0,   0,   0,      0 },
+  /* GNUC99   */  { 1,  0,  1,  1,  0,  0,  1,   1,   1,   0,    0,     0,     0,   0,      1,   1,     0,   0,   0,      0 },
+  /* GNUC11   */  { 1,  0,  1,  1,  1,  0,  1,   1,   1,   0,    0,     0,     0,   0,      1,   1,     0,   0,   0,      0 },
+  /* GNUC17   */  { 1,  0,  1,  1,  1,  0,  1,   1,   1,   0,    0,     0,     0,   0,      1,   1,     0,   0,   0,      0 },
+  /* GNUC2X   */  { 1,  0,  1,  1,  1,  0,  1,   1,   1,   0,    1,     1,     0,   1,      1,   1,     1,   0,   1,      0 },
+  /* STDC89   */  { 0,  0,  0,  0,  0,  1,  0,   0,   0,   0,    0,     0,     1,   0,      0,   0,     0,   0,   0,      0 },
+  /* STDC94   */  { 0,  0,  0,  0,  0,  1,  1,   0,   0,   0,    0,     0,     1,   0,      0,   0,     0,   0,   0,      0 },
+  /* STDC99   */  { 1,  0,  1,  1,  0,  1,  1,   0,   0,   0,    0,     0,     1,   0,      0,   0,     0,   0,   0,      0 },
+  /* STDC11   */  { 1,  0,  1,  1,  1,  1,  1,   1,   0,   0,    0,     0,     1,   0,      0,   0,     0,   0,   0,      0 },
+  /* STDC17   */  { 1,  0,  1,  1,  1,  1,  1,   1,   0,   0,    0,     0,     1,   0,      0,   0,     0,   0,   0,      0 },
+  /* STDC2X   */  { 1,  0,  1,  1,  1,  1,  1,   1,   0,   0,    1,     1,     1,   1,      0,   1,     1,   0,   1,      0 },
+  /* GNUCXX   */  { 0,  1,  1,  1,  0,  0,  1,   0,   0,   0,    0,     0,     0,   0,      1,   1,     0,   0,   0,      0 },
+  /* CXX98    */  { 0,  1,  0,  1,  0,  1,  1,   0,   0,   0,    0,     0,     1,   0,      0,   1,     0,   0,   0,      0 },
+  /* GNUCXX11 */  { 1,  1,  1,  1,  1,  0,  1,   1,   1,   1,    0,     0,     0,   0,      1,   1,     0,   0,   0,      0 },
+  /* CXX11    */  { 1,  1,  0,  1,  1,  1,  1,   1,   1,   1,    0,     0,     1,   0,      0,   1,     0,   0,   0,      0 },
+  /* GNUCXX14 */  { 1,  1,  1,  1,  1,  0,  1,   1,   1,   1,    1,     1,     0,   0,      1,   1,     0,   0,   0,      0 },
+  /* CXX14    */  { 1,  1,  0,  1,  1,  1,  1,   1,   1,   1,    1,     1,     1,   0,      0,   1,     0,   0,   0,      0 },
+  /* GNUCXX17 */  { 1,  1,  1,  1,  1,  0,  1,   1,   1,   1,    1,     1,     0,   1,      1,   1,     0,   0,   0,      0 },
+  /* CXX17    */  { 1,  1,  1,  1,  1,  1,  1,   1,   1,   1,    1,     1,     0,   1,      0,   1,     0,   0,   0,      0 },
+  /* GNUCXX20 */  { 1,  1,  1,  1,  1,  0,  1,   1,   1,   1,    1,     1,     0,   1,      1,   1,     0,   0,   0,      0 },
+  /* CXX20    */  { 1,  1,  1,  1,  1,  1,  1,   1,   1,   1,    1,     1,     0,   1,      1,   1,     0,   0,   0,      0 },
+  /* GNUCXX23 */  { 1,  1,  1,  1,  1,  0,  1,   1,   1,   1,    1,     1,     0,   1,      1,   1,     0,   1,   1,      1 },
+  /* CXX23    */  { 1,  1,  1,  1,  1,  1,  1,   1,   1,   1,    1,     1,     0,   1,      1,   1,     0,   1,   1,      1 },
+  /* ASM      */  { 0,  0,  1,  0,  0,  0,  0,   0,   0,   0,    0,     0,     0,   0,      0,   0,     0,   0,   0,      0 }
 };
 
 /* Sets internal flags correctly for a given language.  */
@@ -153,6 +154,7 @@ cpp_set_lang (cpp_reader *pfile, enum c_
   CPP_OPTION (pfile, dfp_constants)		 = l->dfp_constants;
   CPP_OPTION (pfile, size_t_literals)		 = l->size_t_literals;
   CPP_OPTION (pfile, elifdef)			 = l->elifdef;
+  CPP_OPTION (pfile, delimited_escape_seqs)	 = l->delimited_escape_seqs;
 }
 
 /* Initialize library global state.  */
--- libcpp/charset.cc.jj	2022-08-15 12:52:43.213902801 +0200
+++ libcpp/charset.cc	2022-08-16 11:42:27.729948705 +0200
@@ -1081,6 +1081,7 @@ _cpp_valid_ucn (cpp_reader *pfile, const
   unsigned int length;
   const uchar *str = *pstr;
   const uchar *base = str - 2;
+  bool delimited = false;
 
   if (!CPP_OPTION (pfile, cplusplus) && !CPP_OPTION (pfile, c99))
     cpp_error (pfile, CPP_DL_WARNING,
@@ -1095,7 +1096,17 @@ _cpp_valid_ucn (cpp_reader *pfile, const
 	         (int) str[-1]);
 
   if (str[-1] == 'u')
-    length = 4;
+    {
+      length = 4;
+      if (str < limit && *str == '{')
+	{
+	  str++;
+	  length = 32;
+	  delimited = true;
+	  if (loc_reader)
+	    char_range->m_finish = loc_reader->get_next ().m_finish;
+	}
+    }
   else if (str[-1] == 'U')
     length = 8;
   else
@@ -1107,6 +1118,8 @@ _cpp_valid_ucn (cpp_reader *pfile, const
   result = 0;
   do
     {
+      if (str == limit)
+	break;
       c = *str;
       if (!ISXDIGIT (c))
 	break;
@@ -1116,9 +1129,41 @@ _cpp_valid_ucn (cpp_reader *pfile, const
 	  gcc_assert (char_range);
 	  char_range->m_finish = loc_reader->get_next ().m_finish;
 	}
+      if (delimited)
+	{
+	  if (!result)
+	    /* Accept arbitrary number of leading zeros.  */
+	    length = 16;
+	  else if (length == 8)
+	    {
+	      /* Make sure we detect overflows.  */
+	      result |= 0x8000000;
+	      ++length;
+	    }
+	}
+
       result = (result << 4) + hex_value (c);
     }
-  while (--length && str < limit);
+  while (--length);
+
+  if (delimited
+      && str < limit
+      && *str == '}'
+      && (length != 32 || !identifier_pos))
+    {
+      if (length == 32)
+	cpp_error (pfile, CPP_DL_ERROR,
+		   "empty delimited escape sequence");
+      else if (!CPP_OPTION (pfile, delimited_escape_seqs)
+	       && CPP_OPTION (pfile, cpp_pedantic))
+	cpp_error (pfile, CPP_DL_PEDWARN,
+		   "delimited escape sequences are only valid in C++23");
+      str++;
+      length = 0;
+      delimited = false;
+      if (loc_reader)
+	char_range->m_finish = loc_reader->get_next ().m_finish;
+    }
 
   /* Partial UCNs are not valid in strings, but decompose into
      multiple tokens in identifiers, so we can't give a helpful
@@ -1132,9 +1177,14 @@ _cpp_valid_ucn (cpp_reader *pfile, const
   *pstr = str;
   if (length)
     {
-      cpp_error (pfile, CPP_DL_ERROR,
-		 "incomplete universal character name %.*s",
-		 (int) (str - base), base);
+      if (!delimited)
+	cpp_error (pfile, CPP_DL_ERROR,
+		   "incomplete universal character name %.*s",
+		   (int) (str - base), base);
+      else
+	cpp_error (pfile, CPP_DL_ERROR,
+		   "'\\u{' not terminated with '}' after %.*s",
+		   (int) (str - base), base);
       result = 1;
     }
   /* The C99 standard permits $, @ and ` to be specified as UCNs.  We use
@@ -1392,6 +1442,8 @@ convert_hex (cpp_reader *pfile, const uc
   int digits_found = 0;
   size_t width = cvt.width;
   size_t mask = width_to_mask (width);
+  bool delimited = false;
+  const uchar *base = from - 1;
 
   /* loc_reader and ranges must either be both NULL, or both be non-NULL.  */
   gcc_assert ((loc_reader != NULL) == (ranges != NULL));
@@ -1407,6 +1459,14 @@ convert_hex (cpp_reader *pfile, const uc
   if (loc_reader)
     char_range.m_finish = loc_reader->get_next ().m_finish;
 
+  if (from < limit && *from == '{')
+    {
+      delimited = true;
+      from++;
+      if (loc_reader)
+	char_range.m_finish = loc_reader->get_next ().m_finish;
+    }
+
   while (from < limit)
     {
       c = *from;
@@ -1420,12 +1480,37 @@ convert_hex (cpp_reader *pfile, const uc
       digits_found = 1;
     }
 
+  if (delimited && from < limit && *from == '}')
+    {
+      from++;
+      if (!digits_found)
+	{
+	  cpp_error (pfile, CPP_DL_ERROR,
+		     "empty delimited escape sequence");
+	  return from;
+	}
+     else if (!CPP_OPTION (pfile, delimited_escape_seqs)
+	      && CPP_OPTION (pfile, cpp_pedantic))
+	cpp_error (pfile, CPP_DL_PEDWARN,
+		   "delimited escape sequences are only valid in C++23");
+      delimited = false;
+      if (loc_reader)
+	char_range.m_finish = loc_reader->get_next ().m_finish;
+    }
+
   if (!digits_found)
     {
       cpp_error (pfile, CPP_DL_ERROR,
 		 "\\x used with no following hex digits");
       return from;
     }
+  else if (delimited)
+    {
+      cpp_error (pfile, CPP_DL_ERROR,
+		 "'\\x{' not terminated with '}' after %.*s",
+		 (int) (from - base), base);
+      return from;
+    }
 
   if (overflow | (n != (n & mask)))
     {
@@ -1459,13 +1544,31 @@ convert_oct (cpp_reader *pfile, const uc
 	     cpp_substring_ranges *ranges)
 {
   size_t count = 0;
-  cppchar_t c, n = 0;
+  cppchar_t c, n = 0, overflow = 0;
   size_t width = cvt.width;
   size_t mask = width_to_mask (width);
+  bool delimited = false;
+  const uchar *base = from - 1;
 
   /* loc_reader and ranges must either be both NULL, or both be non-NULL.  */
   gcc_assert ((loc_reader != NULL) == (ranges != NULL));
 
+  if (from < limit && *from == 'o')
+    {
+      from++;
+      if (loc_reader)
+	char_range.m_finish = loc_reader->get_next ().m_finish;
+      if (from == limit || *from != '{')
+	cpp_error (pfile, CPP_DL_ERROR, "'\\o' not followed by '{'");
+      else
+	{
+	  from++;
+	  if (loc_reader)
+	    char_range.m_finish = loc_reader->get_next ().m_finish;
+	  delimited = true;
+	}
+    }
+
   while (from < limit && count++ < 3)
     {
       c = *from;
@@ -1474,10 +1577,42 @@ convert_oct (cpp_reader *pfile, const uc
       from++;
       if (loc_reader)
 	char_range.m_finish = loc_reader->get_next ().m_finish;
+      if (delimited)
+	{
+	  count = 2;
+	  overflow |= n ^ (n << 3 >> 3);
+	}
       n = (n << 3) + c - '0';
     }
 
-  if (n != (n & mask))
+  if (delimited)
+    {
+      if (from < limit && *from == '}')
+	{
+	  from++;
+	  if (count == 1)
+	    {
+	      cpp_error (pfile, CPP_DL_ERROR,
+			 "empty delimited escape sequence");
+	      return from;
+	    }
+	  else if (!CPP_OPTION (pfile, delimited_escape_seqs)
+		   && CPP_OPTION (pfile, cpp_pedantic))
+	    cpp_error (pfile, CPP_DL_PEDWARN,
+		       "delimited escape sequences are only valid in C++23");
+	  if (loc_reader)
+	    char_range.m_finish = loc_reader->get_next ().m_finish;
+	}
+      else
+	{
+	  cpp_error (pfile, CPP_DL_ERROR,
+		     "'\\o{' not terminated with '}' after %.*s",
+		     (int) (from - base), base);
+	  return from;
+	}
+    }
+
+  if (overflow | (n != (n & mask)))
     {
       cpp_error (pfile, CPP_DL_PEDWARN,
 		 "octal escape sequence out of range");
@@ -1535,6 +1670,7 @@ convert_escape (cpp_reader *pfile, const
 
     case '0':  case '1':  case '2':  case '3':
     case '4':  case '5':  case '6':  case '7':
+    case 'o':
       return convert_oct (pfile, from, limit, tbuf, cvt,
 			  char_range, loc_reader, ranges);
 
@@ -2119,15 +2255,23 @@ _cpp_interpret_identifier (cpp_reader *p
 	cppchar_t value = 0;
 	size_t bufleft = len - (bufp - buf);
 	int rval;
+	bool delimited = false;
 
 	idp += 2;
+	if (length == 4 && id[idp] == '{')
+	  {
+	    delimited = true;
+	    idp++;
+	  }
 	while (length && idp < len && ISXDIGIT (id[idp]))
 	  {
 	    value = (value << 4) + hex_value (id[idp]);
 	    idp++;
-	    length--;
+	    if (!delimited)
+	      length--;
 	  }
-	idp--;
+	if (!delimited)
+	  idp--;
 
 	/* Special case for EBCDIC: if the identifier contains
 	   a '$' specified using a UCN, translate it to EBCDIC.  */
--- libcpp/lex.cc.jj	2022-05-23 10:59:06.235591348 +0200
+++ libcpp/lex.cc	2022-08-16 11:57:53.772823661 +0200
@@ -1426,19 +1426,35 @@ get_bidi_utf8 (cpp_reader *pfile, const
 /* Parse a UCN where P points just past \u or \U and return its bidi code.  */
 
 static bidi::kind
-get_bidi_ucn_1 (const unsigned char *p, bool is_U)
+get_bidi_ucn_1 (const unsigned char *p, bool is_U, const unsigned char **end)
 {
   /* 6.4.3 Universal Character Names
       \u hex-quad
       \U hex-quad hex-quad
+      \u { simple-hexadecimal-digit-sequence }
      where \unnnn means \U0000nnnn.  */
 
+  *end = p + 4;
   if (is_U)
     {
       if (p[0] != '0' || p[1] != '0' || p[2] != '0' || p[3] != '0')
 	return bidi::kind::NONE;
       /* Skip 4B so we can treat \u and \U the same below.  */
       p += 4;
+      *end += 4;
+    }
+  else if (p[0] == '{')
+    {
+      p++;
+      while (*p == '0')
+	p++;
+      if (p[0] != '2'
+	  || p[1] != '0'
+	  || !ISXDIGIT (p[2])
+	  || !ISXDIGIT (p[3])
+	  || p[4] != '}')
+	return bidi::kind::NONE;
+      *end = p + 5;
     }
 
   /* All code points we are looking for start with 20xx.  */
@@ -1499,14 +1515,15 @@ get_bidi_ucn_1 (const unsigned char *p,
    If the kind is not NONE, write the location to *OUT.*/
 
 static bidi::kind
-get_bidi_ucn (cpp_reader *pfile,  const unsigned char *p, bool is_U,
+get_bidi_ucn (cpp_reader *pfile, const unsigned char *p, bool is_U,
 	      location_t *out)
 {
-  bidi::kind result = get_bidi_ucn_1 (p, is_U);
+  const unsigned char *end;
+  bidi::kind result = get_bidi_ucn_1 (p, is_U, &end);
   if (result != bidi::kind::NONE)
     {
       const unsigned char *start = p - 2;
-      size_t num_bytes = 2 + (is_U ? 8 : 4);
+      size_t num_bytes = end - start;
       *out = get_location_for_byte_range_in_cur_line (pfile, start, num_bytes);
     }
   return result;
--- gcc/testsuite/c-c++-common/cpp/delimited-escape-seq-1.c.jj	2022-08-16 10:47:38.693022740 +0200
+++ gcc/testsuite/c-c++-common/cpp/delimited-escape-seq-1.c	2022-08-16 12:18:42.235477632 +0200
@@ -0,0 +1,92 @@
+/* P2290R3 - Delimited escape sequences */
+/* { dg-do run } */
+/* { dg-require-effective-target wchar } */
+/* { dg-options "-std=gnu99 -Wno-c++-compat" { target c } } */
+/* { dg-options "-std=c++23" { target c++ } } */
+
+#ifndef __cplusplus
+#include <wchar.h>
+typedef __CHAR16_TYPE__ char16_t;
+typedef __CHAR32_TYPE__ char32_t;
+#endif
+
+const char32_t *a = U"\u{1234}\u{10fffd}\u{000000000000000000000000000000000000000000000000000000000001234}\u{10FFFD}";
+const char32_t *b = U"\x{1234}\x{10fffd}\x{000000000000000000000000000000000000000000000000000000000001234}";
+const char32_t *c = U"\o{1234}\o{4177775}\o{000000000000000000000000000000000000000000000000000000000000000000000000004177775}";
+const char16_t *d = u"\u{1234}\u{bFFd}\u{00000000000000000000000000000001234}";
+const char16_t *e = u"\x{1234}\x{BffD}\x{000001234}";
+const char16_t *f = u"\o{1234}\o{137775}\o{000000000000000137775}";
+const wchar_t *g = L"\u{1234}\u{bFFd}\u{00000000000000000000000000000001234}";
+const wchar_t *h = L"\x{1234}\x{bFFd}\x{000001234}";
+const wchar_t *i = L"\o{1234}\o{137775}\o{000000000000000137775}";
+#ifdef __cplusplus
+const char *j = "\u{34}\u{000000000000000003D}";
+#endif
+const char *k = "\x{34}\x{000000000000000003D}";
+const char *l = "\o{34}\o{000000000000000176}";
+
+#if U'\u{1234}' != U'\u1234' || U'\u{10fffd}' != U'\U0010FFFD' \
+    || U'\x{00000001234}' != U'\x1234' || U'\x{010fffd}' != U'\x10FFFD' \
+    || U'\o{1234}' != U'\x29c' || U'\o{004177775}' != U'\x10FFFD' \
+    || u'\u{1234}' != u'\u1234' || u'\u{0bffd}' != u'\uBFFD' \
+    || u'\x{00000001234}' != u'\x1234' || u'\x{0Bffd}' != u'\x0bFFD' \
+    || u'\o{1234}' != u'\x29c' || u'\o{00137775}' != u'\xBFFD' \
+    || L'\u{1234}' != L'\u1234' || L'\u{0bffd}' != L'\uBFFD' \
+    || L'\x{00000001234}' != L'\x1234' || L'\x{0bffd}' != L'\x0bFFD' \
+    || L'\o{1234}' != L'\x29c' || L'\o{00137775}' != L'\xBFFD' \
+    || '\x{34}' != '\x034' || '\x{0003d}' != '\x003D' \
+    || '\o{34}' != '\x1C' || '\o{176}' != '\x007E'
+#error Bad
+#endif
+#ifdef __cplusplus
+#if '\u{0000000034}' != '\u0034' || '\u{3d}' != '\u003D'
+#error Bad
+#endif
+#endif
+
+int
+main ()
+{
+  if (a[0] != U'\u1234' || a[0] != U'\u{1234}'
+      || a[1] != U'\U0010FFFD' || a[1] != U'\u{000010fFfD}'
+      || a[2] != a[0]
+      || a[3] != a[1]
+      || b[0] != U'\x1234' || b[0] != U'\x{001234}'
+      || b[1] != U'\x10FFFD' || b[1] != U'\x{0010fFfD}'
+      || b[2] != b[0]
+      || c[0] != U'\x29c' || c[0] != U'\o{001234}'
+      || c[1] != U'\x10FFFD' || c[1] != U'\o{4177775}'
+      || c[2] != c[1])
+    __builtin_abort ();
+  if (d[0] != u'\u1234' || d[0] != u'\u{1234}'
+      || d[1] != u'\U0000BFFD' || d[1] != u'\u{00000bFfD}'
+      || d[2] != d[0]
+      || e[0] != u'\x1234' || e[0] != u'\x{001234}'
+      || e[1] != u'\xBFFD' || e[1] != u'\x{00bFfD}'
+      || e[2] != e[0]
+      || f[0] != u'\x29c' || f[0] != u'\o{001234}'
+      || f[1] != u'\xbFFD' || f[1] != u'\o{137775}'
+      || f[2] != f[1])
+    __builtin_abort ();
+  if (g[0] != L'\u1234' || g[0] != L'\u{1234}'
+      || g[1] != L'\U0000BFFD' || g[1] != L'\u{00000bFfD}'
+      || g[2] != g[0]
+      || h[0] != L'\x1234' || h[0] != L'\x{001234}'
+      || h[1] != L'\xBFFD' || h[1] != L'\x{00bFfD}'
+      || h[2] != h[0]
+      || i[0] != L'\x29c' || i[0] != L'\o{001234}'
+      || i[1] != L'\xbFFD' || i[1] != L'\o{137775}'
+      || i[2] != i[1])
+    __builtin_abort ();
+#ifdef __cplusplus
+  if (j[0] != '\u0034' || j[0] != '\u{034}'
+      || j[1] != '\U0000003D' || j[1] != '\u{000003d}')
+    __builtin_abort ();
+#endif
+  if (k[0] != '\x034' || k[0] != '\x{0034}'
+      || k[1] != '\x3D' || k[1] != '\x{3d}'
+      || l[0] != '\x1c' || l[0] != '\o{0034}'
+      || l[1] != '\x07e' || l[1] != '\o{176}' || l[1] != '\176')
+    __builtin_abort ();
+  return 0;
+}
--- gcc/testsuite/c-c++-common/cpp/delimited-escape-seq-2.c.jj	2022-08-16 10:47:41.846981390 +0200
+++ gcc/testsuite/c-c++-common/cpp/delimited-escape-seq-2.c	2022-08-16 12:18:58.807260607 +0200
@@ -0,0 +1,18 @@
+/* P2290R3 - Delimited escape sequences */
+/* { dg-do compile } */
+/* { dg-options "-std=gnu99 -Wno-c++-compat" { target c } } */
+/* { dg-options "-std=c++23" { target c++ } } */
+
+int jalape\u{f1}o = 42;
+
+int
+caf\u{000e9} (void)
+{
+  return jalape\u00F1o;
+}
+
+int
+test (void)
+{
+  return caf\u00e9 ();
+}
--- gcc/testsuite/c-c++-common/cpp/delimited-escape-seq-3.c.jj	2022-08-16 12:18:19.308777922 +0200
+++ gcc/testsuite/c-c++-common/cpp/delimited-escape-seq-3.c	2022-08-16 12:41:23.693648138 +0200
@@ -0,0 +1,33 @@
+/* P2290R3 - Delimited escape sequences */
+/* { dg-do compile } */
+/* { dg-require-effective-target wchar } */
+/* { dg-options "-std=gnu99 -Wno-c++-compat" { target c } } */
+/* { dg-options "-std=c++23" { target c++ } } */
+
+#ifndef __cplusplus
+typedef __CHAR32_TYPE__ char32_t;
+#endif
+
+const char32_t *a = U"\u{}";				/* { dg-error "empty delimited escape sequence" } */
+							/* { dg-error "is not a valid universal character" "" { target c } .-1 } */
+const char32_t *b = U"\u{12" "34}";			/* { dg-error "'\\\\u\\{' not terminated with '\\}' after" } */
+const char32_t *c = U"\u{0000ffffffff}";		/* { dg-error "is not a valid universal character" } */
+const char32_t *d = U"\u{010000edcb}";			/* { dg-error "is not a valid universal character" } */
+const char32_t *e = U"\u{02000000000000000000edcb}";	/* { dg-error "is not a valid universal character" } */
+const char32_t *f = U"\u{123ghij}";			/* { dg-error "'\\\\u\\{' not terminated with '\\}' after" } */
+const char32_t *g = U"\u{123.}";			/* { dg-error "'\\\\u\\{' not terminated with '\\}' after" } */
+const char32_t *h = U"\u{.}";				/* { dg-error "'\\\\u\\{' not terminated with '\\}' after" } */
+const char32_t *i = U"\x{}";				/* { dg-error "empty delimited escape sequence" } */
+const char32_t *j = U"\x{12" "34}";			/* { dg-error "'\\\\x\\{' not terminated with '\\}' after" } */
+const char32_t *k = U"\x{0000ffffffff}";
+const char32_t *l = U"\x{010000edcb}";			/* { dg-warning "hex escape sequence out of range" } */
+const char32_t *m = U"\x{02000000000000000000edcb}";	/* { dg-warning "hex escape sequence out of range" } */
+const char32_t *n = U"\x{123ghij}";			/* { dg-error "'\\\\x\\{' not terminated with '\\}' after" } */
+const char32_t *o = U"\x{123.}";			/* { dg-error "'\\\\x\\{' not terminated with '\\}' after" } */
+const char32_t *p = U"\o{}";				/* { dg-error "empty delimited escape sequence" } */
+const char32_t *q = U"\o{12" "34}";			/* { dg-error "'\\\\o\\{' not terminated with '\\}' after" } */
+const char32_t *r = U"\o{0000037777777777}";
+const char32_t *s = U"\o{040000166713}";		/* { dg-warning "octal escape sequence out of range" } */
+const char32_t *t = U"\o{02000000000000000000000166713}";/* { dg-warning "octal escape sequence out of range" } */
+const char32_t *u = U"\o{1238}";			/* { dg-error "'\\\\o\\{' not terminated with '\\}' after" } */
+const char32_t *v = U"\o{.}";				/* { dg-error "'\\\\o\\{' not terminated with '\\}' after" } */
--- gcc/testsuite/c-c++-common/Wbidi-chars-24.c.jj	2022-08-16 12:03:19.350561676 +0200
+++ gcc/testsuite/c-c++-common/Wbidi-chars-24.c	2022-08-16 12:06:46.381851525 +0200
@@ -0,0 +1,28 @@
+/* PR preprocessor/103026 */
+/* { dg-do compile } */
+/* { dg-options "-Wbidi-chars=ucn,unpaired" } */
+/* Test nesting of bidi chars in various contexts.  */
+
+void
+g1 ()
+{
+  const char *s1 = "a b c LRE\u{202a} 1 2 3 PDI\u{00000000000000000000000002069} x y z";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+  const char *s2 = "a b c RLE\u{00202b} 1 2 3 PDI\u{2069} x y z";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+  const char *s3 = "a b c LRO\u{000000202d} 1 2 3 PDI\u{02069} x y z";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+  const char *s4 = "a b c RLO\u{202e} 1 2 3 PDI\u{00000002069} x y z";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+  const char *s5 = "a b c LRI\u{002066} 1 2 3 PDF\u{202C} x y z";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+  const char *s6 = "a b c RLI\u{02067} 1 2 3 PDF\u{202c} x y z";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+  const char *s7 = "a b c FSI\u{0002068} 1 2 3 PDF\u{0202c} x y z";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+}
+
+int A\u{202a}B\u{2069}C;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+int a\u{00000202b}B\u{000000002069}c;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
--- gcc/testsuite/gcc.dg/cpp/delimited-escape-seq-1.c.jj	2022-08-16 10:47:38.693022740 +0200
+++ gcc/testsuite/gcc.dg/cpp/delimited-escape-seq-1.c	2022-08-16 12:46:56.508291006 +0200
@@ -0,0 +1,10 @@
+/* P2290R3 - Delimited escape sequences */
+/* { dg-do compile } */
+/* { dg-require-effective-target wchar } */
+/* { dg-options "-std=gnu99 -Wno-c++-compat -pedantic" } */
+
+typedef __CHAR32_TYPE__ char32_t;
+
+const char32_t *a = U"\u{1234}";	/* { dg-warning "delimited escape sequences are only valid in" } */
+const char32_t *b = U"\x{1234}";	/* { dg-warning "delimited escape sequences are only valid in" } */
+const char32_t *c = U"\o{1234}";	/* { dg-warning "delimited escape sequences are only valid in" } */
--- gcc/testsuite/gcc.dg/cpp/delimited-escape-seq-2.c.jj	2022-08-16 10:47:41.846981390 +0200
+++ gcc/testsuite/gcc.dg/cpp/delimited-escape-seq-2.c	2022-08-16 12:47:05.955167423 +0200
@@ -0,0 +1,10 @@
+/* P2290R3 - Delimited escape sequences */
+/* { dg-do compile } */
+/* { dg-require-effective-target wchar } */
+/* { dg-options "-std=gnu99 -Wno-c++-compat -pedantic-errors" } */
+
+typedef __CHAR32_TYPE__ char32_t;
+
+const char32_t *a = U"\u{1234}";	/* { dg-error "delimited escape sequences are only valid in" } */
+const char32_t *b = U"\x{1234}";	/* { dg-error "delimited escape sequences are only valid in" } */
+const char32_t *c = U"\o{1234}";	/* { dg-error "delimited escape sequences are only valid in" } */
--- gcc/testsuite/g++.dg/cpp/delimited-escape-seq-1.C.jj	2022-08-16 12:46:43.368462901 +0200
+++ gcc/testsuite/g++.dg/cpp/delimited-escape-seq-1.C	2022-08-16 12:49:21.532393786 +0200
@@ -0,0 +1,8 @@
+// P2290R3 - Delimited escape sequences
+// { dg-do compile { target c++11 } }
+// { dg-require-effective-target wchar }
+// { dg-options "-pedantic" }
+
+const char32_t *a = U"\u{1234}";	// { dg-warning "delimited escape sequences are only valid in" "" { target c++20_down } }
+const char32_t *b = U"\x{1234}";	// { dg-warning "delimited escape sequences are only valid in" "" { target c++20_down } }
+const char32_t *c = U"\o{1234}";	// { dg-warning "delimited escape sequences are only valid in" "" { target c++20_down } }
--- gcc/testsuite/g++.dg/cpp/delimited-escape-seq-2.C.jj	2022-08-16 12:46:46.281424798 +0200
+++ gcc/testsuite/g++.dg/cpp/delimited-escape-seq-2.C	2022-08-16 12:49:33.761233803 +0200
@@ -0,0 +1,8 @@
+// P2290R3 - Delimited escape sequences
+// { dg-do compile { target c++11 } }
+// { dg-require-effective-target wchar }
+// { dg-options "-pedantic-errors" }
+
+const char32_t *a = U"\u{1234}";	// { dg-error "delimited escape sequences are only valid in" "" { target c++20_down } }
+const char32_t *b = U"\x{1234}";	// { dg-error "delimited escape sequences are only valid in" "" { target c++20_down } }
+const char32_t *c = U"\o{1234}";	// { dg-error "delimited escape sequences are only valid in" "" { target c++20_down } }

	Jakub


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] libcpp: Implement C++23 P2290R3 - Delimited escape sequences [PR106645]
  2022-08-17  7:17 [PATCH] libcpp: Implement C++23 P2290R3 - Delimited escape sequences [PR106645] Jakub Jelinek
@ 2022-08-17 20:47 ` Jason Merrill
  2022-08-17 21:19   ` Jakub Jelinek
  0 siblings, 1 reply; 6+ messages in thread
From: Jason Merrill @ 2022-08-17 20:47 UTC (permalink / raw)
  To: Jakub Jelinek, Marek Polacek, Joseph S. Myers; +Cc: gcc-patches

On 8/17/22 00:17, Jakub Jelinek wrote:
> Hi!
> 
> The following patch implements the C++23 P2290R3 paper.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
> 
> 2022-08-17  Jakub Jelinek  <jakub@redhat.com>
> 
> 	PR c++/106645
> libcpp/
> 	* include/cpplib.h (struct cpp_options): Implement
> 	P2290R3 - Delimited escape sequences.  Add delimite_escape_seqs
> 	member.
> 	* init.cc (struct lang_flags): Likewise.
> 	(lang_defaults): Add delim column.
> 	(cpp_set_lang): Copy over delimite_escape_seqs.
> 	* charset.cc (_cpp_valid_ucn): Handle delimited escape sequences.
> 	(convert_hex): Likewise.
> 	(convert_oct): Likewise.
> 	(convert_escape): Call convert_oct even for \o.
> 	(_cpp_interpret_identifier): Handle delimited escape sequences.
> 	* lex.cc (get_bidi_ucn_1): Likewise.  Add end argument, fill it in.
> 	(get_bidi_ucn): Adjust get_bidi_ucn_1 caller.  Use end argument to
> 	compute num_bytes.
> gcc/testsuite/
> 	* c-c++-common/cpp/delimited-escape-seq-1.c: New test.
> 	* c-c++-common/cpp/delimited-escape-seq-2.c: New test.
> 	* c-c++-common/cpp/delimited-escape-seq-3.c: New test.
> 	* c-c++-common/Wbidi-chars-24.c: New test.
> 	* gcc.dg/cpp/delimited-escape-seq-1.c: New test.
> 	* gcc.dg/cpp/delimited-escape-seq-2.c: New test.
> 	* g++.dg/cpp/delimited-escape-seq-1.C: New test.
> 	* g++.dg/cpp/delimited-escape-seq-2.C: New test.
> 
> --- libcpp/include/cpplib.h.jj	2022-08-10 09:06:53.268209449 +0200
> +++ libcpp/include/cpplib.h	2022-08-15 19:32:53.743213474 +0200
> @@ -519,6 +519,9 @@ struct cpp_options
>     /* Nonzero for C++23 size_t literals.  */
>     unsigned char size_t_literals;
>   
> +  /* Nonzero for C++23 delimited escape sequences.  */
> +  unsigned char delimited_escape_seqs;
> +
>     /* Holds the name of the target (execution) character set.  */
>     const char *narrow_charset;
>   
> --- libcpp/init.cc.jj	2022-08-10 09:06:53.268209449 +0200
> +++ libcpp/init.cc	2022-08-15 16:09:01.403020485 +0200
> @@ -96,34 +96,35 @@ struct lang_flags
>     char dfp_constants;
>     char size_t_literals;
>     char elifdef;
> +  char delimited_escape_seqs;
>   };
>   
>   static const struct lang_flags lang_defaults[] =
> -{ /*              c99 c++ xnum xid c11 std digr ulit rlit udlit bincst digsep trig u8chlit vaopt scope dfp szlit elifdef */
> -  /* GNUC89   */  { 0,  0,  1,  0,  0,  0,  1,   0,   0,   0,    0,     0,     0,   0,      1,   1,     0,   0,   0 },
> -  /* GNUC99   */  { 1,  0,  1,  1,  0,  0,  1,   1,   1,   0,    0,     0,     0,   0,      1,   1,     0,   0,   0 },
> -  /* GNUC11   */  { 1,  0,  1,  1,  1,  0,  1,   1,   1,   0,    0,     0,     0,   0,      1,   1,     0,   0,   0 },
> -  /* GNUC17   */  { 1,  0,  1,  1,  1,  0,  1,   1,   1,   0,    0,     0,     0,   0,      1,   1,     0,   0,   0 },
> -  /* GNUC2X   */  { 1,  0,  1,  1,  1,  0,  1,   1,   1,   0,    1,     1,     0,   1,      1,   1,     1,   0,   1 },
> -  /* STDC89   */  { 0,  0,  0,  0,  0,  1,  0,   0,   0,   0,    0,     0,     1,   0,      0,   0,     0,   0,   0 },
> -  /* STDC94   */  { 0,  0,  0,  0,  0,  1,  1,   0,   0,   0,    0,     0,     1,   0,      0,   0,     0,   0,   0 },
> -  /* STDC99   */  { 1,  0,  1,  1,  0,  1,  1,   0,   0,   0,    0,     0,     1,   0,      0,   0,     0,   0,   0 },
> -  /* STDC11   */  { 1,  0,  1,  1,  1,  1,  1,   1,   0,   0,    0,     0,     1,   0,      0,   0,     0,   0,   0 },
> -  /* STDC17   */  { 1,  0,  1,  1,  1,  1,  1,   1,   0,   0,    0,     0,     1,   0,      0,   0,     0,   0,   0 },
> -  /* STDC2X   */  { 1,  0,  1,  1,  1,  1,  1,   1,   0,   0,    1,     1,     1,   1,      0,   1,     1,   0,   1 },
> -  /* GNUCXX   */  { 0,  1,  1,  1,  0,  0,  1,   0,   0,   0,    0,     0,     0,   0,      1,   1,     0,   0,   0 },
> -  /* CXX98    */  { 0,  1,  0,  1,  0,  1,  1,   0,   0,   0,    0,     0,     1,   0,      0,   1,     0,   0,   0 },
> -  /* GNUCXX11 */  { 1,  1,  1,  1,  1,  0,  1,   1,   1,   1,    0,     0,     0,   0,      1,   1,     0,   0,   0 },
> -  /* CXX11    */  { 1,  1,  0,  1,  1,  1,  1,   1,   1,   1,    0,     0,     1,   0,      0,   1,     0,   0,   0 },
> -  /* GNUCXX14 */  { 1,  1,  1,  1,  1,  0,  1,   1,   1,   1,    1,     1,     0,   0,      1,   1,     0,   0,   0 },
> -  /* CXX14    */  { 1,  1,  0,  1,  1,  1,  1,   1,   1,   1,    1,     1,     1,   0,      0,   1,     0,   0,   0 },
> -  /* GNUCXX17 */  { 1,  1,  1,  1,  1,  0,  1,   1,   1,   1,    1,     1,     0,   1,      1,   1,     0,   0,   0 },
> -  /* CXX17    */  { 1,  1,  1,  1,  1,  1,  1,   1,   1,   1,    1,     1,     0,   1,      0,   1,     0,   0,   0 },
> -  /* GNUCXX20 */  { 1,  1,  1,  1,  1,  0,  1,   1,   1,   1,    1,     1,     0,   1,      1,   1,     0,   0,   0 },
> -  /* CXX20    */  { 1,  1,  1,  1,  1,  1,  1,   1,   1,   1,    1,     1,     0,   1,      1,   1,     0,   0,   0 },
> -  /* GNUCXX23 */  { 1,  1,  1,  1,  1,  0,  1,   1,   1,   1,    1,     1,     0,   1,      1,   1,     0,   1,   1 },
> -  /* CXX23    */  { 1,  1,  1,  1,  1,  1,  1,   1,   1,   1,    1,     1,     0,   1,      1,   1,     0,   1,   1 },
> -  /* ASM      */  { 0,  0,  1,  0,  0,  0,  0,   0,   0,   0,    0,     0,     0,   0,      0,   0,     0,   0,   0 }
> +{ /*              c99 c++ xnum xid c11 std digr ulit rlit udlit bincst digsep trig u8chlit vaopt scope dfp szlit elifdef delim */
> +  /* GNUC89   */  { 0,  0,  1,  0,  0,  0,  1,   0,   0,   0,    0,     0,     0,   0,      1,   1,     0,   0,   0,      0 },
> +  /* GNUC99   */  { 1,  0,  1,  1,  0,  0,  1,   1,   1,   0,    0,     0,     0,   0,      1,   1,     0,   0,   0,      0 },
> +  /* GNUC11   */  { 1,  0,  1,  1,  1,  0,  1,   1,   1,   0,    0,     0,     0,   0,      1,   1,     0,   0,   0,      0 },
> +  /* GNUC17   */  { 1,  0,  1,  1,  1,  0,  1,   1,   1,   0,    0,     0,     0,   0,      1,   1,     0,   0,   0,      0 },
> +  /* GNUC2X   */  { 1,  0,  1,  1,  1,  0,  1,   1,   1,   0,    1,     1,     0,   1,      1,   1,     1,   0,   1,      0 },
> +  /* STDC89   */  { 0,  0,  0,  0,  0,  1,  0,   0,   0,   0,    0,     0,     1,   0,      0,   0,     0,   0,   0,      0 },
> +  /* STDC94   */  { 0,  0,  0,  0,  0,  1,  1,   0,   0,   0,    0,     0,     1,   0,      0,   0,     0,   0,   0,      0 },
> +  /* STDC99   */  { 1,  0,  1,  1,  0,  1,  1,   0,   0,   0,    0,     0,     1,   0,      0,   0,     0,   0,   0,      0 },
> +  /* STDC11   */  { 1,  0,  1,  1,  1,  1,  1,   1,   0,   0,    0,     0,     1,   0,      0,   0,     0,   0,   0,      0 },
> +  /* STDC17   */  { 1,  0,  1,  1,  1,  1,  1,   1,   0,   0,    0,     0,     1,   0,      0,   0,     0,   0,   0,      0 },
> +  /* STDC2X   */  { 1,  0,  1,  1,  1,  1,  1,   1,   0,   0,    1,     1,     1,   1,      0,   1,     1,   0,   1,      0 },
> +  /* GNUCXX   */  { 0,  1,  1,  1,  0,  0,  1,   0,   0,   0,    0,     0,     0,   0,      1,   1,     0,   0,   0,      0 },
> +  /* CXX98    */  { 0,  1,  0,  1,  0,  1,  1,   0,   0,   0,    0,     0,     1,   0,      0,   1,     0,   0,   0,      0 },
> +  /* GNUCXX11 */  { 1,  1,  1,  1,  1,  0,  1,   1,   1,   1,    0,     0,     0,   0,      1,   1,     0,   0,   0,      0 },
> +  /* CXX11    */  { 1,  1,  0,  1,  1,  1,  1,   1,   1,   1,    0,     0,     1,   0,      0,   1,     0,   0,   0,      0 },
> +  /* GNUCXX14 */  { 1,  1,  1,  1,  1,  0,  1,   1,   1,   1,    1,     1,     0,   0,      1,   1,     0,   0,   0,      0 },
> +  /* CXX14    */  { 1,  1,  0,  1,  1,  1,  1,   1,   1,   1,    1,     1,     1,   0,      0,   1,     0,   0,   0,      0 },
> +  /* GNUCXX17 */  { 1,  1,  1,  1,  1,  0,  1,   1,   1,   1,    1,     1,     0,   1,      1,   1,     0,   0,   0,      0 },
> +  /* CXX17    */  { 1,  1,  1,  1,  1,  1,  1,   1,   1,   1,    1,     1,     0,   1,      0,   1,     0,   0,   0,      0 },
> +  /* GNUCXX20 */  { 1,  1,  1,  1,  1,  0,  1,   1,   1,   1,    1,     1,     0,   1,      1,   1,     0,   0,   0,      0 },
> +  /* CXX20    */  { 1,  1,  1,  1,  1,  1,  1,   1,   1,   1,    1,     1,     0,   1,      1,   1,     0,   0,   0,      0 },
> +  /* GNUCXX23 */  { 1,  1,  1,  1,  1,  0,  1,   1,   1,   1,    1,     1,     0,   1,      1,   1,     0,   1,   1,      1 },
> +  /* CXX23    */  { 1,  1,  1,  1,  1,  1,  1,   1,   1,   1,    1,     1,     0,   1,      1,   1,     0,   1,   1,      1 },
> +  /* ASM      */  { 0,  0,  1,  0,  0,  0,  0,   0,   0,   0,    0,     0,     0,   0,      0,   0,     0,   0,   0,      0 }
>   };
>   
>   /* Sets internal flags correctly for a given language.  */
> @@ -153,6 +154,7 @@ cpp_set_lang (cpp_reader *pfile, enum c_
>     CPP_OPTION (pfile, dfp_constants)		 = l->dfp_constants;
>     CPP_OPTION (pfile, size_t_literals)		 = l->size_t_literals;
>     CPP_OPTION (pfile, elifdef)			 = l->elifdef;
> +  CPP_OPTION (pfile, delimited_escape_seqs)	 = l->delimited_escape_seqs;
>   }
>   
>   /* Initialize library global state.  */
> --- libcpp/charset.cc.jj	2022-08-15 12:52:43.213902801 +0200
> +++ libcpp/charset.cc	2022-08-16 11:42:27.729948705 +0200
> @@ -1081,6 +1081,7 @@ _cpp_valid_ucn (cpp_reader *pfile, const
>     unsigned int length;
>     const uchar *str = *pstr;
>     const uchar *base = str - 2;
> +  bool delimited = false;
>   
>     if (!CPP_OPTION (pfile, cplusplus) && !CPP_OPTION (pfile, c99))
>       cpp_error (pfile, CPP_DL_WARNING,
> @@ -1095,7 +1096,17 @@ _cpp_valid_ucn (cpp_reader *pfile, const
>   	         (int) str[-1]);
>   
>     if (str[-1] == 'u')
> -    length = 4;
> +    {
> +      length = 4;
> +      if (str < limit && *str == '{')
> +	{
> +	  str++;
> +	  length = 32;

/* Magic value to indicate no digits seen.  */

> +	  delimited = true;
> +	  if (loc_reader)
> +	    char_range->m_finish = loc_reader->get_next ().m_finish;
> +	}
> +    }
>     else if (str[-1] == 'U')
>       length = 8;
>     else
> @@ -1107,6 +1118,8 @@ _cpp_valid_ucn (cpp_reader *pfile, const
>     result = 0;
>     do
>       {
> +      if (str == limit)
> +	break;
>         c = *str;
>         if (!ISXDIGIT (c))
>   	break;
> @@ -1116,9 +1129,41 @@ _cpp_valid_ucn (cpp_reader *pfile, const
>   	  gcc_assert (char_range);
>   	  char_range->m_finish = loc_reader->get_next ().m_finish;
>   	}
> +      if (delimited)
> +	{
> +	  if (!result)
> +	    /* Accept arbitrary number of leading zeros.  */
> +	    length = 16;
> +	  else if (length == 8)
> +	    {
> +	      /* Make sure we detect overflows.  */
> +	      result |= 0x8000000;
> +	      ++length;
> +	    }

16 above so that this case happens after we read 8 digits after leading 
zeroes?

> +	}
> +
>         result = (result << 4) + hex_value (c);
>       }
> -  while (--length && str < limit);
> +  while (--length);
> +
> +  if (delimited
> +      && str < limit
> +      && *str == '}'
> +      && (length != 32 || !identifier_pos))
> +    {
> +      if (length == 32)
> +	cpp_error (pfile, CPP_DL_ERROR,
> +		   "empty delimited escape sequence");
> +      else if (!CPP_OPTION (pfile, delimited_escape_seqs)
> +	       && CPP_OPTION (pfile, cpp_pedantic))
> +	cpp_error (pfile, CPP_DL_PEDWARN,
> +		   "delimited escape sequences are only valid in C++23");
> +      str++;
> +      length = 0;
> +      delimited = false;
> +      if (loc_reader)
> +	char_range->m_finish = loc_reader->get_next ().m_finish;

Here and in other functions, the pattern of increment the input pointer 
and update m_finish seems like it should be a macro?

> +    }
>   
>     /* Partial UCNs are not valid in strings, but decompose into
>        multiple tokens in identifiers, so we can't give a helpful
> @@ -1132,9 +1177,14 @@ _cpp_valid_ucn (cpp_reader *pfile, const
>     *pstr = str;
>     if (length)
>       {
> -      cpp_error (pfile, CPP_DL_ERROR,
> -		 "incomplete universal character name %.*s",
> -		 (int) (str - base), base);
> +      if (!delimited)
> +	cpp_error (pfile, CPP_DL_ERROR,
> +		   "incomplete universal character name %.*s",
> +		   (int) (str - base), base);
> +      else
> +	cpp_error (pfile, CPP_DL_ERROR,
> +		   "'\\u{' not terminated with '}' after %.*s",
> +		   (int) (str - base), base);
>         result = 1;
>       }
>     /* The C99 standard permits $, @ and ` to be specified as UCNs.  We use
> @@ -1392,6 +1442,8 @@ convert_hex (cpp_reader *pfile, const uc
>     int digits_found = 0;
>     size_t width = cvt.width;
>     size_t mask = width_to_mask (width);
> +  bool delimited = false;
> +  const uchar *base = from - 1;
>   
>     /* loc_reader and ranges must either be both NULL, or both be non-NULL.  */
>     gcc_assert ((loc_reader != NULL) == (ranges != NULL));
> @@ -1407,6 +1459,14 @@ convert_hex (cpp_reader *pfile, const uc
>     if (loc_reader)
>       char_range.m_finish = loc_reader->get_next ().m_finish;
>   
> +  if (from < limit && *from == '{')
> +    {
> +      delimited = true;
> +      from++;
> +      if (loc_reader)
> +	char_range.m_finish = loc_reader->get_next ().m_finish;
> +    }
> +
>     while (from < limit)
>       {
>         c = *from;
> @@ -1420,12 +1480,37 @@ convert_hex (cpp_reader *pfile, const uc
>         digits_found = 1;
>       }
>   
> +  if (delimited && from < limit && *from == '}')
> +    {
> +      from++;
> +      if (!digits_found)
> +	{
> +	  cpp_error (pfile, CPP_DL_ERROR,
> +		     "empty delimited escape sequence");
> +	  return from;
> +	}
> +     else if (!CPP_OPTION (pfile, delimited_escape_seqs)
> +	      && CPP_OPTION (pfile, cpp_pedantic))
> +	cpp_error (pfile, CPP_DL_PEDWARN,
> +		   "delimited escape sequences are only valid in C++23");
> +      delimited = false;
> +      if (loc_reader)
> +	char_range.m_finish = loc_reader->get_next ().m_finish;
> +    }
> +
>     if (!digits_found)
>       {
>         cpp_error (pfile, CPP_DL_ERROR,
>   		 "\\x used with no following hex digits");
>         return from;
>       }
> +  else if (delimited)
> +    {
> +      cpp_error (pfile, CPP_DL_ERROR,
> +		 "'\\x{' not terminated with '}' after %.*s",
> +		 (int) (from - base), base);
> +      return from;
> +    }
>   
>     if (overflow | (n != (n & mask)))
>       {
> @@ -1459,13 +1544,31 @@ convert_oct (cpp_reader *pfile, const uc
>   	     cpp_substring_ranges *ranges)
>   {
>     size_t count = 0;
> -  cppchar_t c, n = 0;
> +  cppchar_t c, n = 0, overflow = 0;
>     size_t width = cvt.width;
>     size_t mask = width_to_mask (width);
> +  bool delimited = false;
> +  const uchar *base = from - 1;
>   
>     /* loc_reader and ranges must either be both NULL, or both be non-NULL.  */
>     gcc_assert ((loc_reader != NULL) == (ranges != NULL));
>   
> +  if (from < limit && *from == 'o')
> +    {
> +      from++;
> +      if (loc_reader)
> +	char_range.m_finish = loc_reader->get_next ().m_finish;
> +      if (from == limit || *from != '{')
> +	cpp_error (pfile, CPP_DL_ERROR, "'\\o' not followed by '{'");
> +      else
> +	{
> +	  from++;
> +	  if (loc_reader)
> +	    char_range.m_finish = loc_reader->get_next ().m_finish;
> +	  delimited = true;
> +	}
> +    }
> +
>     while (from < limit && count++ < 3)
>       {
>         c = *from;
> @@ -1474,10 +1577,42 @@ convert_oct (cpp_reader *pfile, const uc
>         from++;
>         if (loc_reader)
>   	char_range.m_finish = loc_reader->get_next ().m_finish;
> +      if (delimited)
> +	{
> +	  count = 2;
> +	  overflow |= n ^ (n << 3 >> 3);
> +	}
>         n = (n << 3) + c - '0';
>       }
>   
> -  if (n != (n & mask))
> +  if (delimited)
> +    {
> +      if (from < limit && *from == '}')
> +	{
> +	  from++;
> +	  if (count == 1)
> +	    {
> +	      cpp_error (pfile, CPP_DL_ERROR,
> +			 "empty delimited escape sequence");
> +	      return from;
> +	    }
> +	  else if (!CPP_OPTION (pfile, delimited_escape_seqs)
> +		   && CPP_OPTION (pfile, cpp_pedantic))
> +	    cpp_error (pfile, CPP_DL_PEDWARN,
> +		       "delimited escape sequences are only valid in C++23");
> +	  if (loc_reader)
> +	    char_range.m_finish = loc_reader->get_next ().m_finish;
> +	}
> +      else
> +	{
> +	  cpp_error (pfile, CPP_DL_ERROR,
> +		     "'\\o{' not terminated with '}' after %.*s",
> +		     (int) (from - base), base);
> +	  return from;
> +	}
> +    }
> +
> +  if (overflow | (n != (n & mask)))
>       {
>         cpp_error (pfile, CPP_DL_PEDWARN,
>   		 "octal escape sequence out of range");
> @@ -1535,6 +1670,7 @@ convert_escape (cpp_reader *pfile, const
>   
>       case '0':  case '1':  case '2':  case '3':
>       case '4':  case '5':  case '6':  case '7':
> +    case 'o':
>         return convert_oct (pfile, from, limit, tbuf, cvt,
>   			  char_range, loc_reader, ranges);
>   
> @@ -2119,15 +2255,23 @@ _cpp_interpret_identifier (cpp_reader *p
>   	cppchar_t value = 0;
>   	size_t bufleft = len - (bufp - buf);
>   	int rval;
> +	bool delimited = false;
>   
>   	idp += 2;
> +	if (length == 4 && id[idp] == '{')
> +	  {
> +	    delimited = true;
> +	    idp++;
> +	  }
>   	while (length && idp < len && ISXDIGIT (id[idp]))
>   	  {
>   	    value = (value << 4) + hex_value (id[idp]);
>   	    idp++;
> -	    length--;
> +	    if (!delimited)
> +	      length--;
>   	  }
> -	idp--;
> +	if (!delimited)
> +	  idp--;

Don't we need to check that the first non-xdigit is a }?

>   
>   	/* Special case for EBCDIC: if the identifier contains
>   	   a '$' specified using a UCN, translate it to EBCDIC.  */
> --- libcpp/lex.cc.jj	2022-05-23 10:59:06.235591348 +0200
> +++ libcpp/lex.cc	2022-08-16 11:57:53.772823661 +0200
> @@ -1426,19 +1426,35 @@ get_bidi_utf8 (cpp_reader *pfile, const
>   /* Parse a UCN where P points just past \u or \U and return its bidi code.  */
>   
>   static bidi::kind
> -get_bidi_ucn_1 (const unsigned char *p, bool is_U)
> +get_bidi_ucn_1 (const unsigned char *p, bool is_U, const unsigned char **end)
>   {
>     /* 6.4.3 Universal Character Names
>         \u hex-quad
>         \U hex-quad hex-quad
> +      \u { simple-hexadecimal-digit-sequence }
>        where \unnnn means \U0000nnnn.  */
>   
> +  *end = p + 4;
>     if (is_U)
>       {
>         if (p[0] != '0' || p[1] != '0' || p[2] != '0' || p[3] != '0')
>   	return bidi::kind::NONE;
>         /* Skip 4B so we can treat \u and \U the same below.  */
>         p += 4;
> +      *end += 4;
> +    }
> +  else if (p[0] == '{')
> +    {
> +      p++;
> +      while (*p == '0')
> +	p++;
> +      if (p[0] != '2'
> +	  || p[1] != '0'
> +	  || !ISXDIGIT (p[2])
> +	  || !ISXDIGIT (p[3])
> +	  || p[4] != '}')
> +	return bidi::kind::NONE;
> +      *end = p + 5;
>       }
>   
>     /* All code points we are looking for start with 20xx.  */
> @@ -1499,14 +1515,15 @@ get_bidi_ucn_1 (const unsigned char *p,
>      If the kind is not NONE, write the location to *OUT.*/
>   
>   static bidi::kind
> -get_bidi_ucn (cpp_reader *pfile,  const unsigned char *p, bool is_U,
> +get_bidi_ucn (cpp_reader *pfile, const unsigned char *p, bool is_U,
>   	      location_t *out)
>   {
> -  bidi::kind result = get_bidi_ucn_1 (p, is_U);
> +  const unsigned char *end;
> +  bidi::kind result = get_bidi_ucn_1 (p, is_U, &end);
>     if (result != bidi::kind::NONE)
>       {
>         const unsigned char *start = p - 2;
> -      size_t num_bytes = 2 + (is_U ? 8 : 4);
> +      size_t num_bytes = end - start;
>         *out = get_location_for_byte_range_in_cur_line (pfile, start, num_bytes);
>       }
>     return result;
> --- gcc/testsuite/c-c++-common/cpp/delimited-escape-seq-1.c.jj	2022-08-16 10:47:38.693022740 +0200
> +++ gcc/testsuite/c-c++-common/cpp/delimited-escape-seq-1.c	2022-08-16 12:18:42.235477632 +0200
> @@ -0,0 +1,92 @@
> +/* P2290R3 - Delimited escape sequences */
> +/* { dg-do run } */
> +/* { dg-require-effective-target wchar } */
> +/* { dg-options "-std=gnu99 -Wno-c++-compat" { target c } } */
> +/* { dg-options "-std=c++23" { target c++ } } */
> +
> +#ifndef __cplusplus
> +#include <wchar.h>
> +typedef __CHAR16_TYPE__ char16_t;
> +typedef __CHAR32_TYPE__ char32_t;
> +#endif
> +
> +const char32_t *a = U"\u{1234}\u{10fffd}\u{000000000000000000000000000000000000000000000000000000000001234}\u{10FFFD}";
> +const char32_t *b = U"\x{1234}\x{10fffd}\x{000000000000000000000000000000000000000000000000000000000001234}";
> +const char32_t *c = U"\o{1234}\o{4177775}\o{000000000000000000000000000000000000000000000000000000000000000000000000004177775}";
> +const char16_t *d = u"\u{1234}\u{bFFd}\u{00000000000000000000000000000001234}";
> +const char16_t *e = u"\x{1234}\x{BffD}\x{000001234}";
> +const char16_t *f = u"\o{1234}\o{137775}\o{000000000000000137775}";
> +const wchar_t *g = L"\u{1234}\u{bFFd}\u{00000000000000000000000000000001234}";
> +const wchar_t *h = L"\x{1234}\x{bFFd}\x{000001234}";
> +const wchar_t *i = L"\o{1234}\o{137775}\o{000000000000000137775}";
> +#ifdef __cplusplus
> +const char *j = "\u{34}\u{000000000000000003D}";
> +#endif
> +const char *k = "\x{34}\x{000000000000000003D}";
> +const char *l = "\o{34}\o{000000000000000176}";
> +
> +#if U'\u{1234}' != U'\u1234' || U'\u{10fffd}' != U'\U0010FFFD' \
> +    || U'\x{00000001234}' != U'\x1234' || U'\x{010fffd}' != U'\x10FFFD' \
> +    || U'\o{1234}' != U'\x29c' || U'\o{004177775}' != U'\x10FFFD' \
> +    || u'\u{1234}' != u'\u1234' || u'\u{0bffd}' != u'\uBFFD' \
> +    || u'\x{00000001234}' != u'\x1234' || u'\x{0Bffd}' != u'\x0bFFD' \
> +    || u'\o{1234}' != u'\x29c' || u'\o{00137775}' != u'\xBFFD' \
> +    || L'\u{1234}' != L'\u1234' || L'\u{0bffd}' != L'\uBFFD' \
> +    || L'\x{00000001234}' != L'\x1234' || L'\x{0bffd}' != L'\x0bFFD' \
> +    || L'\o{1234}' != L'\x29c' || L'\o{00137775}' != L'\xBFFD' \
> +    || '\x{34}' != '\x034' || '\x{0003d}' != '\x003D' \
> +    || '\o{34}' != '\x1C' || '\o{176}' != '\x007E'
> +#error Bad
> +#endif
> +#ifdef __cplusplus
> +#if '\u{0000000034}' != '\u0034' || '\u{3d}' != '\u003D'
> +#error Bad
> +#endif
> +#endif
> +
> +int
> +main ()
> +{
> +  if (a[0] != U'\u1234' || a[0] != U'\u{1234}'
> +      || a[1] != U'\U0010FFFD' || a[1] != U'\u{000010fFfD}'
> +      || a[2] != a[0]
> +      || a[3] != a[1]
> +      || b[0] != U'\x1234' || b[0] != U'\x{001234}'
> +      || b[1] != U'\x10FFFD' || b[1] != U'\x{0010fFfD}'
> +      || b[2] != b[0]
> +      || c[0] != U'\x29c' || c[0] != U'\o{001234}'
> +      || c[1] != U'\x10FFFD' || c[1] != U'\o{4177775}'
> +      || c[2] != c[1])
> +    __builtin_abort ();
> +  if (d[0] != u'\u1234' || d[0] != u'\u{1234}'
> +      || d[1] != u'\U0000BFFD' || d[1] != u'\u{00000bFfD}'
> +      || d[2] != d[0]
> +      || e[0] != u'\x1234' || e[0] != u'\x{001234}'
> +      || e[1] != u'\xBFFD' || e[1] != u'\x{00bFfD}'
> +      || e[2] != e[0]
> +      || f[0] != u'\x29c' || f[0] != u'\o{001234}'
> +      || f[1] != u'\xbFFD' || f[1] != u'\o{137775}'
> +      || f[2] != f[1])
> +    __builtin_abort ();
> +  if (g[0] != L'\u1234' || g[0] != L'\u{1234}'
> +      || g[1] != L'\U0000BFFD' || g[1] != L'\u{00000bFfD}'
> +      || g[2] != g[0]
> +      || h[0] != L'\x1234' || h[0] != L'\x{001234}'
> +      || h[1] != L'\xBFFD' || h[1] != L'\x{00bFfD}'
> +      || h[2] != h[0]
> +      || i[0] != L'\x29c' || i[0] != L'\o{001234}'
> +      || i[1] != L'\xbFFD' || i[1] != L'\o{137775}'
> +      || i[2] != i[1])
> +    __builtin_abort ();
> +#ifdef __cplusplus
> +  if (j[0] != '\u0034' || j[0] != '\u{034}'
> +      || j[1] != '\U0000003D' || j[1] != '\u{000003d}')
> +    __builtin_abort ();
> +#endif
> +  if (k[0] != '\x034' || k[0] != '\x{0034}'
> +      || k[1] != '\x3D' || k[1] != '\x{3d}'
> +      || l[0] != '\x1c' || l[0] != '\o{0034}'
> +      || l[1] != '\x07e' || l[1] != '\o{176}' || l[1] != '\176')
> +    __builtin_abort ();
> +  return 0;
> +}
> --- gcc/testsuite/c-c++-common/cpp/delimited-escape-seq-2.c.jj	2022-08-16 10:47:41.846981390 +0200
> +++ gcc/testsuite/c-c++-common/cpp/delimited-escape-seq-2.c	2022-08-16 12:18:58.807260607 +0200
> @@ -0,0 +1,18 @@
> +/* P2290R3 - Delimited escape sequences */
> +/* { dg-do compile } */
> +/* { dg-options "-std=gnu99 -Wno-c++-compat" { target c } } */
> +/* { dg-options "-std=c++23" { target c++ } } */
> +
> +int jalape\u{f1}o = 42;
> +
> +int
> +caf\u{000e9} (void)
> +{
> +  return jalape\u00F1o;
> +}
> +
> +int
> +test (void)
> +{
> +  return caf\u00e9 ();
> +}
> --- gcc/testsuite/c-c++-common/cpp/delimited-escape-seq-3.c.jj	2022-08-16 12:18:19.308777922 +0200
> +++ gcc/testsuite/c-c++-common/cpp/delimited-escape-seq-3.c	2022-08-16 12:41:23.693648138 +0200
> @@ -0,0 +1,33 @@
> +/* P2290R3 - Delimited escape sequences */
> +/* { dg-do compile } */
> +/* { dg-require-effective-target wchar } */
> +/* { dg-options "-std=gnu99 -Wno-c++-compat" { target c } } */
> +/* { dg-options "-std=c++23" { target c++ } } */
> +
> +#ifndef __cplusplus
> +typedef __CHAR32_TYPE__ char32_t;
> +#endif
> +
> +const char32_t *a = U"\u{}";				/* { dg-error "empty delimited escape sequence" } */
> +							/* { dg-error "is not a valid universal character" "" { target c } .-1 } */
> +const char32_t *b = U"\u{12" "34}";			/* { dg-error "'\\\\u\\{' not terminated with '\\}' after" } */
> +const char32_t *c = U"\u{0000ffffffff}";		/* { dg-error "is not a valid universal character" } */
> +const char32_t *d = U"\u{010000edcb}";			/* { dg-error "is not a valid universal character" } */
> +const char32_t *e = U"\u{02000000000000000000edcb}";	/* { dg-error "is not a valid universal character" } */
> +const char32_t *f = U"\u{123ghij}";			/* { dg-error "'\\\\u\\{' not terminated with '\\}' after" } */
> +const char32_t *g = U"\u{123.}";			/* { dg-error "'\\\\u\\{' not terminated with '\\}' after" } */
> +const char32_t *h = U"\u{.}";				/* { dg-error "'\\\\u\\{' not terminated with '\\}' after" } */
> +const char32_t *i = U"\x{}";				/* { dg-error "empty delimited escape sequence" } */
> +const char32_t *j = U"\x{12" "34}";			/* { dg-error "'\\\\x\\{' not terminated with '\\}' after" } */
> +const char32_t *k = U"\x{0000ffffffff}";
> +const char32_t *l = U"\x{010000edcb}";			/* { dg-warning "hex escape sequence out of range" } */
> +const char32_t *m = U"\x{02000000000000000000edcb}";	/* { dg-warning "hex escape sequence out of range" } */
> +const char32_t *n = U"\x{123ghij}";			/* { dg-error "'\\\\x\\{' not terminated with '\\}' after" } */
> +const char32_t *o = U"\x{123.}";			/* { dg-error "'\\\\x\\{' not terminated with '\\}' after" } */
> +const char32_t *p = U"\o{}";				/* { dg-error "empty delimited escape sequence" } */
> +const char32_t *q = U"\o{12" "34}";			/* { dg-error "'\\\\o\\{' not terminated with '\\}' after" } */
> +const char32_t *r = U"\o{0000037777777777}";
> +const char32_t *s = U"\o{040000166713}";		/* { dg-warning "octal escape sequence out of range" } */
> +const char32_t *t = U"\o{02000000000000000000000166713}";/* { dg-warning "octal escape sequence out of range" } */
> +const char32_t *u = U"\o{1238}";			/* { dg-error "'\\\\o\\{' not terminated with '\\}' after" } */
> +const char32_t *v = U"\o{.}";				/* { dg-error "'\\\\o\\{' not terminated with '\\}' after" } */
> --- gcc/testsuite/c-c++-common/Wbidi-chars-24.c.jj	2022-08-16 12:03:19.350561676 +0200
> +++ gcc/testsuite/c-c++-common/Wbidi-chars-24.c	2022-08-16 12:06:46.381851525 +0200
> @@ -0,0 +1,28 @@
> +/* PR preprocessor/103026 */
> +/* { dg-do compile } */
> +/* { dg-options "-Wbidi-chars=ucn,unpaired" } */
> +/* Test nesting of bidi chars in various contexts.  */
> +
> +void
> +g1 ()
> +{
> +  const char *s1 = "a b c LRE\u{202a} 1 2 3 PDI\u{00000000000000000000000002069} x y z";
> +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
> +  const char *s2 = "a b c RLE\u{00202b} 1 2 3 PDI\u{2069} x y z";
> +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
> +  const char *s3 = "a b c LRO\u{000000202d} 1 2 3 PDI\u{02069} x y z";
> +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
> +  const char *s4 = "a b c RLO\u{202e} 1 2 3 PDI\u{00000002069} x y z";
> +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
> +  const char *s5 = "a b c LRI\u{002066} 1 2 3 PDF\u{202C} x y z";
> +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
> +  const char *s6 = "a b c RLI\u{02067} 1 2 3 PDF\u{202c} x y z";
> +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
> +  const char *s7 = "a b c FSI\u{0002068} 1 2 3 PDF\u{0202c} x y z";
> +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
> +}
> +
> +int A\u{202a}B\u{2069}C;
> +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
> +int a\u{00000202b}B\u{000000002069}c;
> +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
> --- gcc/testsuite/gcc.dg/cpp/delimited-escape-seq-1.c.jj	2022-08-16 10:47:38.693022740 +0200
> +++ gcc/testsuite/gcc.dg/cpp/delimited-escape-seq-1.c	2022-08-16 12:46:56.508291006 +0200
> @@ -0,0 +1,10 @@
> +/* P2290R3 - Delimited escape sequences */
> +/* { dg-do compile } */
> +/* { dg-require-effective-target wchar } */
> +/* { dg-options "-std=gnu99 -Wno-c++-compat -pedantic" } */
> +
> +typedef __CHAR32_TYPE__ char32_t;
> +
> +const char32_t *a = U"\u{1234}";	/* { dg-warning "delimited escape sequences are only valid in" } */
> +const char32_t *b = U"\x{1234}";	/* { dg-warning "delimited escape sequences are only valid in" } */
> +const char32_t *c = U"\o{1234}";	/* { dg-warning "delimited escape sequences are only valid in" } */
> --- gcc/testsuite/gcc.dg/cpp/delimited-escape-seq-2.c.jj	2022-08-16 10:47:41.846981390 +0200
> +++ gcc/testsuite/gcc.dg/cpp/delimited-escape-seq-2.c	2022-08-16 12:47:05.955167423 +0200
> @@ -0,0 +1,10 @@
> +/* P2290R3 - Delimited escape sequences */
> +/* { dg-do compile } */
> +/* { dg-require-effective-target wchar } */
> +/* { dg-options "-std=gnu99 -Wno-c++-compat -pedantic-errors" } */
> +
> +typedef __CHAR32_TYPE__ char32_t;
> +
> +const char32_t *a = U"\u{1234}";	/* { dg-error "delimited escape sequences are only valid in" } */
> +const char32_t *b = U"\x{1234}";	/* { dg-error "delimited escape sequences are only valid in" } */
> +const char32_t *c = U"\o{1234}";	/* { dg-error "delimited escape sequences are only valid in" } */
> --- gcc/testsuite/g++.dg/cpp/delimited-escape-seq-1.C.jj	2022-08-16 12:46:43.368462901 +0200
> +++ gcc/testsuite/g++.dg/cpp/delimited-escape-seq-1.C	2022-08-16 12:49:21.532393786 +0200
> @@ -0,0 +1,8 @@
> +// P2290R3 - Delimited escape sequences
> +// { dg-do compile { target c++11 } }
> +// { dg-require-effective-target wchar }
> +// { dg-options "-pedantic" }
> +
> +const char32_t *a = U"\u{1234}";	// { dg-warning "delimited escape sequences are only valid in" "" { target c++20_down } }
> +const char32_t *b = U"\x{1234}";	// { dg-warning "delimited escape sequences are only valid in" "" { target c++20_down } }
> +const char32_t *c = U"\o{1234}";	// { dg-warning "delimited escape sequences are only valid in" "" { target c++20_down } }
> --- gcc/testsuite/g++.dg/cpp/delimited-escape-seq-2.C.jj	2022-08-16 12:46:46.281424798 +0200
> +++ gcc/testsuite/g++.dg/cpp/delimited-escape-seq-2.C	2022-08-16 12:49:33.761233803 +0200
> @@ -0,0 +1,8 @@
> +// P2290R3 - Delimited escape sequences
> +// { dg-do compile { target c++11 } }
> +// { dg-require-effective-target wchar }
> +// { dg-options "-pedantic-errors" }
> +
> +const char32_t *a = U"\u{1234}";	// { dg-error "delimited escape sequences are only valid in" "" { target c++20_down } }
> +const char32_t *b = U"\x{1234}";	// { dg-error "delimited escape sequences are only valid in" "" { target c++20_down } }
> +const char32_t *c = U"\o{1234}";	// { dg-error "delimited escape sequences are only valid in" "" { target c++20_down } }
> 
> 	Jakub
> 


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] libcpp: Implement C++23 P2290R3 - Delimited escape sequences [PR106645]
  2022-08-17 20:47 ` Jason Merrill
@ 2022-08-17 21:19   ` Jakub Jelinek
  2022-08-18  2:22     ` Jason Merrill
  0 siblings, 1 reply; 6+ messages in thread
From: Jakub Jelinek @ 2022-08-17 21:19 UTC (permalink / raw)
  To: Jason Merrill; +Cc: Marek Polacek, Joseph S. Myers, gcc-patches

On Wed, Aug 17, 2022 at 04:47:19PM -0400, Jason Merrill via Gcc-patches wrote:
> > +	  length = 32;
> 
> /* Magic value to indicate no digits seen.  */

Indeed, will add the comment.

> > +	  delimited = true;
> > +	  if (loc_reader)
> > +	    char_range->m_finish = loc_reader->get_next ().m_finish;
> > +	}
> > +    }
> >     else if (str[-1] == 'U')
> >       length = 8;
> >     else
> > @@ -1107,6 +1118,8 @@ _cpp_valid_ucn (cpp_reader *pfile, const
> >     result = 0;
> >     do
> >       {
> > +      if (str == limit)
> > +	break;
> >         c = *str;
> >         if (!ISXDIGIT (c))
> >   	break;
> > @@ -1116,9 +1129,41 @@ _cpp_valid_ucn (cpp_reader *pfile, const
> >   	  gcc_assert (char_range);
> >   	  char_range->m_finish = loc_reader->get_next ().m_finish;
> >   	}
> > +      if (delimited)
> > +	{
> > +	  if (!result)
> > +	    /* Accept arbitrary number of leading zeros.  */
> > +	    length = 16;
> > +	  else if (length == 8)
> > +	    {
> > +	      /* Make sure we detect overflows.  */
> > +	      result |= 0x8000000;
> > +	      ++length;
> > +	    }
> 
> 16 above so that this case happens after we read 8 digits after leading
> zeroes?

Another magic value less than the no digits seen one and
> 8, so that it can count 8 digits with the first non-zero one after
which to or in the overflow flag.  The intent is not to break the loop
if there are further digits, just that there will be overflow.
Another option would be those overflow |= n ^ (n << 4 >> 4);
tests that convert_hex does and just making sure length is never decremented
(except we need a way to distinguish between \u{} and at least one digit).

> > +      if (loc_reader)
> > +	char_range->m_finish = loc_reader->get_next ().m_finish;
> 
> Here and in other functions, the pattern of increment the input pointer and
> update m_finish seems like it should be a macro?

Perhaps or inline function.  Before my patch, there are 5 such ifs
(some with char_range.m_finish and others char_range->m_finish),
the patch adds another 7 such spots.

> > @@ -2119,15 +2255,23 @@ _cpp_interpret_identifier (cpp_reader *p
> >   	cppchar_t value = 0;
> >   	size_t bufleft = len - (bufp - buf);
> >   	int rval;
> > +	bool delimited = false;
> >   	idp += 2;
> > +	if (length == 4 && id[idp] == '{')
> > +	  {
> > +	    delimited = true;
> > +	    idp++;
> > +	  }
> >   	while (length && idp < len && ISXDIGIT (id[idp]))
> >   	  {
> >   	    value = (value << 4) + hex_value (id[idp]);
> >   	    idp++;
> > -	    length--;
> > +	    if (!delimited)
> > +	      length--;
> >   	  }
> > -	idp--;
> > +	if (!delimited)
> > +	  idp--;
> 
> Don't we need to check that the first non-xdigit is a }?

The comments and my understanding of the code say that we first
check what is a valid identifier and the above is only called on
a valid identifier.  So, if it would be delimited \u{ not terminated
with }, then it would fail forms_identifier_p and wouldn't be included
in the range.  Thus e.g. the ISXDIGIT (id[id]) test is probably not needed
unless delimited is true because we've checked earlier that it has 4 or 8
hex digits.
But sure, if you want a id[idp] == '}' test or assertion, it can be
added.

	Jakub


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] libcpp: Implement C++23 P2290R3 - Delimited escape sequences [PR106645]
  2022-08-17 21:19   ` Jakub Jelinek
@ 2022-08-18  2:22     ` Jason Merrill
  2022-08-18  8:17       ` [PATCH] libcpp, v2: " Jakub Jelinek
  0 siblings, 1 reply; 6+ messages in thread
From: Jason Merrill @ 2022-08-18  2:22 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: Marek Polacek, Joseph S. Myers, gcc-patches

On 8/17/22 14:19, Jakub Jelinek wrote:
> On Wed, Aug 17, 2022 at 04:47:19PM -0400, Jason Merrill via Gcc-patches wrote:
>>> +	  length = 32;
>>
>> /* Magic value to indicate no digits seen.  */
> 
> Indeed, will add the comment.
> 
>>> +	  delimited = true;
>>> +	  if (loc_reader)
>>> +	    char_range->m_finish = loc_reader->get_next ().m_finish;
>>> +	}
>>> +    }
>>>      else if (str[-1] == 'U')
>>>        length = 8;
>>>      else
>>> @@ -1107,6 +1118,8 @@ _cpp_valid_ucn (cpp_reader *pfile, const
>>>      result = 0;
>>>      do
>>>        {
>>> +      if (str == limit)
>>> +	break;
>>>          c = *str;
>>>          if (!ISXDIGIT (c))
>>>    	break;
>>> @@ -1116,9 +1129,41 @@ _cpp_valid_ucn (cpp_reader *pfile, const
>>>    	  gcc_assert (char_range);
>>>    	  char_range->m_finish = loc_reader->get_next ().m_finish;
>>>    	}
>>> +      if (delimited)
>>> +	{
>>> +	  if (!result)
>>> +	    /* Accept arbitrary number of leading zeros.  */
>>> +	    length = 16;
>>> +	  else if (length == 8)
>>> +	    {
>>> +	      /* Make sure we detect overflows.  */
>>> +	      result |= 0x8000000;
>>> +	      ++length;
>>> +	    }
>>
>> 16 above so that this case happens after we read 8 digits after leading
>> zeroes?
> 
> Another magic value less than the no digits seen one and >8,
> so that it can count 8 digits with the first non-zero one after
> which to or in the overflow flag.  The intent is not to break the loop
> if there are further digits, just that there will be overflow.
> Another option would be those overflow |= n ^ (n << 4 >> 4);
> tests that convert_hex does and just making sure length is never decremented
> (except we need a way to distinguish between \u{} and at least one digit).

This way is fine, could just use more comment.

>>> +      if (loc_reader)
>>> +	char_range->m_finish = loc_reader->get_next ().m_finish;
>>
>> Here and in other functions, the pattern of increment the input pointer and
>> update m_finish seems like it should be a macro?
> 
> Perhaps or inline function.  Before my patch, there are 5 such ifs
> (some with char_range.m_finish and others char_range->m_finish),
> the patch adds another 7 such spots.

Either way is fine.

>>> @@ -2119,15 +2255,23 @@ _cpp_interpret_identifier (cpp_reader *p
>>>    	cppchar_t value = 0;
>>>    	size_t bufleft = len - (bufp - buf);
>>>    	int rval;
>>> +	bool delimited = false;
>>>    	idp += 2;
>>> +	if (length == 4 && id[idp] == '{')
>>> +	  {
>>> +	    delimited = true;
>>> +	    idp++;
>>> +	  }
>>>    	while (length && idp < len && ISXDIGIT (id[idp]))
>>>    	  {
>>>    	    value = (value << 4) + hex_value (id[idp]);
>>>    	    idp++;
>>> -	    length--;
>>> +	    if (!delimited)
>>> +	      length--;
>>>    	  }
>>> -	idp--;
>>> +	if (!delimited)
>>> +	  idp--;
>>
>> Don't we need to check that the first non-xdigit is a }?
> 
> The comments and my understanding of the code say that we first
> check what is a valid identifier and the above is only called on
> a valid identifier.  So, if it would be delimited \u{ not terminated
> with }, then it would fail forms_identifier_p and wouldn't be included
> in the range.  Thus e.g. the ISXDIGIT (id[id]) test is probably not needed
> unless delimited is true because we've checked earlier that it has 4 or 8
> hex digits.
> But sure, if you want a id[idp] == '}' test or assertion, it can be
> added.

OK, a comment mentioning this should be sufficient.

Jason


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH] libcpp, v2: Implement C++23 P2290R3 - Delimited escape sequences [PR106645]
  2022-08-18  2:22     ` Jason Merrill
@ 2022-08-18  8:17       ` Jakub Jelinek
  2022-08-19  0:34         ` Jason Merrill
  0 siblings, 1 reply; 6+ messages in thread
From: Jakub Jelinek @ 2022-08-18  8:17 UTC (permalink / raw)
  To: Jason Merrill; +Cc: Marek Polacek, Joseph S. Myers, gcc-patches

On Wed, Aug 17, 2022 at 10:22:03PM -0400, Jason Merrill wrote:
> OK, a comment mentioning this should be sufficient.

Here is an updated patch with those changes in.
So far successfully tested with
GXX_TESTSUITE_STDS=98,11,14,17,20,2b make -j32 -k check-gcc check-g++ RUNTESTFLAGS="dg.exp='Wbidi* cpp/*' cpp.exp"
ok if it passes full bootstrap/regtest tonight?

2022-08-18  Jakub Jelinek  <jakub@redhat.com>

	PR c++/106645
libcpp/
	* include/cpplib.h (struct cpp_options): Implement
	P2290R3 - Delimited escape sequences.  Add delimite_escape_seqs
	member.
	* init.cc (struct lang_flags): Likewise.
	(lang_defaults): Add delim column.
	(cpp_set_lang): Copy over delimite_escape_seqs.
	* charset.cc (extend_char_range): New function.
	(_cpp_valid_ucn): Use it.  Handle delimited escape sequences.
	(convert_hex): Likewise.
	(convert_oct): Likewise.
	(convert_ucn): Use extend_char_range.
	(convert_escape): Call convert_oct even for \o.
	(_cpp_interpret_identifier): Handle delimited escape sequences.
	* lex.cc (get_bidi_ucn_1): Likewise.  Add end argument, fill it in.
	(get_bidi_ucn): Adjust get_bidi_ucn_1 caller.  Use end argument to
	compute num_bytes.
gcc/testsuite/
	* c-c++-common/cpp/delimited-escape-seq-1.c: New test.
	* c-c++-common/cpp/delimited-escape-seq-2.c: New test.
	* c-c++-common/cpp/delimited-escape-seq-3.c: New test.
	* c-c++-common/Wbidi-chars-24.c: New test.
	* gcc.dg/cpp/delimited-escape-seq-1.c: New test.
	* gcc.dg/cpp/delimited-escape-seq-2.c: New test.
	* g++.dg/cpp/delimited-escape-seq-1.C: New test.
	* g++.dg/cpp/delimited-escape-seq-2.C: New test.

--- libcpp/include/cpplib.h.jj	2022-08-10 09:06:53.268209449 +0200
+++ libcpp/include/cpplib.h	2022-08-15 19:32:53.743213474 +0200
@@ -519,6 +519,9 @@ struct cpp_options
   /* Nonzero for C++23 size_t literals.  */
   unsigned char size_t_literals;
 
+  /* Nonzero for C++23 delimited escape sequences.  */
+  unsigned char delimited_escape_seqs;
+
   /* Holds the name of the target (execution) character set.  */
   const char *narrow_charset;
 
--- libcpp/init.cc.jj	2022-08-10 09:06:53.268209449 +0200
+++ libcpp/init.cc	2022-08-15 16:09:01.403020485 +0200
@@ -96,34 +96,35 @@ struct lang_flags
   char dfp_constants;
   char size_t_literals;
   char elifdef;
+  char delimited_escape_seqs;
 };
 
 static const struct lang_flags lang_defaults[] =
-{ /*              c99 c++ xnum xid c11 std digr ulit rlit udlit bincst digsep trig u8chlit vaopt scope dfp szlit elifdef */
-  /* GNUC89   */  { 0,  0,  1,  0,  0,  0,  1,   0,   0,   0,    0,     0,     0,   0,      1,   1,     0,   0,   0 },
-  /* GNUC99   */  { 1,  0,  1,  1,  0,  0,  1,   1,   1,   0,    0,     0,     0,   0,      1,   1,     0,   0,   0 },
-  /* GNUC11   */  { 1,  0,  1,  1,  1,  0,  1,   1,   1,   0,    0,     0,     0,   0,      1,   1,     0,   0,   0 },
-  /* GNUC17   */  { 1,  0,  1,  1,  1,  0,  1,   1,   1,   0,    0,     0,     0,   0,      1,   1,     0,   0,   0 },
-  /* GNUC2X   */  { 1,  0,  1,  1,  1,  0,  1,   1,   1,   0,    1,     1,     0,   1,      1,   1,     1,   0,   1 },
-  /* STDC89   */  { 0,  0,  0,  0,  0,  1,  0,   0,   0,   0,    0,     0,     1,   0,      0,   0,     0,   0,   0 },
-  /* STDC94   */  { 0,  0,  0,  0,  0,  1,  1,   0,   0,   0,    0,     0,     1,   0,      0,   0,     0,   0,   0 },
-  /* STDC99   */  { 1,  0,  1,  1,  0,  1,  1,   0,   0,   0,    0,     0,     1,   0,      0,   0,     0,   0,   0 },
-  /* STDC11   */  { 1,  0,  1,  1,  1,  1,  1,   1,   0,   0,    0,     0,     1,   0,      0,   0,     0,   0,   0 },
-  /* STDC17   */  { 1,  0,  1,  1,  1,  1,  1,   1,   0,   0,    0,     0,     1,   0,      0,   0,     0,   0,   0 },
-  /* STDC2X   */  { 1,  0,  1,  1,  1,  1,  1,   1,   0,   0,    1,     1,     1,   1,      0,   1,     1,   0,   1 },
-  /* GNUCXX   */  { 0,  1,  1,  1,  0,  0,  1,   0,   0,   0,    0,     0,     0,   0,      1,   1,     0,   0,   0 },
-  /* CXX98    */  { 0,  1,  0,  1,  0,  1,  1,   0,   0,   0,    0,     0,     1,   0,      0,   1,     0,   0,   0 },
-  /* GNUCXX11 */  { 1,  1,  1,  1,  1,  0,  1,   1,   1,   1,    0,     0,     0,   0,      1,   1,     0,   0,   0 },
-  /* CXX11    */  { 1,  1,  0,  1,  1,  1,  1,   1,   1,   1,    0,     0,     1,   0,      0,   1,     0,   0,   0 },
-  /* GNUCXX14 */  { 1,  1,  1,  1,  1,  0,  1,   1,   1,   1,    1,     1,     0,   0,      1,   1,     0,   0,   0 },
-  /* CXX14    */  { 1,  1,  0,  1,  1,  1,  1,   1,   1,   1,    1,     1,     1,   0,      0,   1,     0,   0,   0 },
-  /* GNUCXX17 */  { 1,  1,  1,  1,  1,  0,  1,   1,   1,   1,    1,     1,     0,   1,      1,   1,     0,   0,   0 },
-  /* CXX17    */  { 1,  1,  1,  1,  1,  1,  1,   1,   1,   1,    1,     1,     0,   1,      0,   1,     0,   0,   0 },
-  /* GNUCXX20 */  { 1,  1,  1,  1,  1,  0,  1,   1,   1,   1,    1,     1,     0,   1,      1,   1,     0,   0,   0 },
-  /* CXX20    */  { 1,  1,  1,  1,  1,  1,  1,   1,   1,   1,    1,     1,     0,   1,      1,   1,     0,   0,   0 },
-  /* GNUCXX23 */  { 1,  1,  1,  1,  1,  0,  1,   1,   1,   1,    1,     1,     0,   1,      1,   1,     0,   1,   1 },
-  /* CXX23    */  { 1,  1,  1,  1,  1,  1,  1,   1,   1,   1,    1,     1,     0,   1,      1,   1,     0,   1,   1 },
-  /* ASM      */  { 0,  0,  1,  0,  0,  0,  0,   0,   0,   0,    0,     0,     0,   0,      0,   0,     0,   0,   0 }
+{ /*              c99 c++ xnum xid c11 std digr ulit rlit udlit bincst digsep trig u8chlit vaopt scope dfp szlit elifdef delim */
+  /* GNUC89   */  { 0,  0,  1,  0,  0,  0,  1,   0,   0,   0,    0,     0,     0,   0,      1,   1,     0,   0,   0,      0 },
+  /* GNUC99   */  { 1,  0,  1,  1,  0,  0,  1,   1,   1,   0,    0,     0,     0,   0,      1,   1,     0,   0,   0,      0 },
+  /* GNUC11   */  { 1,  0,  1,  1,  1,  0,  1,   1,   1,   0,    0,     0,     0,   0,      1,   1,     0,   0,   0,      0 },
+  /* GNUC17   */  { 1,  0,  1,  1,  1,  0,  1,   1,   1,   0,    0,     0,     0,   0,      1,   1,     0,   0,   0,      0 },
+  /* GNUC2X   */  { 1,  0,  1,  1,  1,  0,  1,   1,   1,   0,    1,     1,     0,   1,      1,   1,     1,   0,   1,      0 },
+  /* STDC89   */  { 0,  0,  0,  0,  0,  1,  0,   0,   0,   0,    0,     0,     1,   0,      0,   0,     0,   0,   0,      0 },
+  /* STDC94   */  { 0,  0,  0,  0,  0,  1,  1,   0,   0,   0,    0,     0,     1,   0,      0,   0,     0,   0,   0,      0 },
+  /* STDC99   */  { 1,  0,  1,  1,  0,  1,  1,   0,   0,   0,    0,     0,     1,   0,      0,   0,     0,   0,   0,      0 },
+  /* STDC11   */  { 1,  0,  1,  1,  1,  1,  1,   1,   0,   0,    0,     0,     1,   0,      0,   0,     0,   0,   0,      0 },
+  /* STDC17   */  { 1,  0,  1,  1,  1,  1,  1,   1,   0,   0,    0,     0,     1,   0,      0,   0,     0,   0,   0,      0 },
+  /* STDC2X   */  { 1,  0,  1,  1,  1,  1,  1,   1,   0,   0,    1,     1,     1,   1,      0,   1,     1,   0,   1,      0 },
+  /* GNUCXX   */  { 0,  1,  1,  1,  0,  0,  1,   0,   0,   0,    0,     0,     0,   0,      1,   1,     0,   0,   0,      0 },
+  /* CXX98    */  { 0,  1,  0,  1,  0,  1,  1,   0,   0,   0,    0,     0,     1,   0,      0,   1,     0,   0,   0,      0 },
+  /* GNUCXX11 */  { 1,  1,  1,  1,  1,  0,  1,   1,   1,   1,    0,     0,     0,   0,      1,   1,     0,   0,   0,      0 },
+  /* CXX11    */  { 1,  1,  0,  1,  1,  1,  1,   1,   1,   1,    0,     0,     1,   0,      0,   1,     0,   0,   0,      0 },
+  /* GNUCXX14 */  { 1,  1,  1,  1,  1,  0,  1,   1,   1,   1,    1,     1,     0,   0,      1,   1,     0,   0,   0,      0 },
+  /* CXX14    */  { 1,  1,  0,  1,  1,  1,  1,   1,   1,   1,    1,     1,     1,   0,      0,   1,     0,   0,   0,      0 },
+  /* GNUCXX17 */  { 1,  1,  1,  1,  1,  0,  1,   1,   1,   1,    1,     1,     0,   1,      1,   1,     0,   0,   0,      0 },
+  /* CXX17    */  { 1,  1,  1,  1,  1,  1,  1,   1,   1,   1,    1,     1,     0,   1,      0,   1,     0,   0,   0,      0 },
+  /* GNUCXX20 */  { 1,  1,  1,  1,  1,  0,  1,   1,   1,   1,    1,     1,     0,   1,      1,   1,     0,   0,   0,      0 },
+  /* CXX20    */  { 1,  1,  1,  1,  1,  1,  1,   1,   1,   1,    1,     1,     0,   1,      1,   1,     0,   0,   0,      0 },
+  /* GNUCXX23 */  { 1,  1,  1,  1,  1,  0,  1,   1,   1,   1,    1,     1,     0,   1,      1,   1,     0,   1,   1,      1 },
+  /* CXX23    */  { 1,  1,  1,  1,  1,  1,  1,   1,   1,   1,    1,     1,     0,   1,      1,   1,     0,   1,   1,      1 },
+  /* ASM      */  { 0,  0,  1,  0,  0,  0,  0,   0,   0,   0,    0,     0,     0,   0,      0,   0,     0,   0,   0,      0 }
 };
 
 /* Sets internal flags correctly for a given language.  */
@@ -153,6 +154,7 @@ cpp_set_lang (cpp_reader *pfile, enum c_
   CPP_OPTION (pfile, dfp_constants)		 = l->dfp_constants;
   CPP_OPTION (pfile, size_t_literals)		 = l->size_t_literals;
   CPP_OPTION (pfile, elifdef)			 = l->elifdef;
+  CPP_OPTION (pfile, delimited_escape_seqs)	 = l->delimited_escape_seqs;
 }
 
 /* Initialize library global state.  */
--- libcpp/charset.cc.jj	2022-08-15 12:52:43.213902801 +0200
+++ libcpp/charset.cc	2022-08-18 10:01:22.569112418 +0200
@@ -1036,6 +1036,19 @@ ucn_valid_in_identifier (cpp_reader *pfi
   return 1;
 }
 
+/* Increment char_range->m_finish by a single character.  */
+
+static void
+extend_char_range (source_range *char_range,
+		   cpp_string_location_reader *loc_reader)
+{
+  if (loc_reader)
+    {
+      gcc_assert (char_range);
+      char_range->m_finish = loc_reader->get_next ().m_finish;
+    }
+}
+
 /* [lex.charset]: The character designated by the universal character
    name \UNNNNNNNN is that character whose character short name in
    ISO/IEC 10646 is NNNNNNNN; the character designated by the
@@ -1081,6 +1094,7 @@ _cpp_valid_ucn (cpp_reader *pfile, const
   unsigned int length;
   const uchar *str = *pstr;
   const uchar *base = str - 2;
+  bool delimited = false;
 
   if (!CPP_OPTION (pfile, cplusplus) && !CPP_OPTION (pfile, c99))
     cpp_error (pfile, CPP_DL_WARNING,
@@ -1095,7 +1109,17 @@ _cpp_valid_ucn (cpp_reader *pfile, const
 	         (int) str[-1]);
 
   if (str[-1] == 'u')
-    length = 4;
+    {
+      length = 4;
+      if (str < limit && *str == '{')
+	{
+	  str++;
+	  /* Magic value to indicate no digits seen.  */
+	  length = 32;
+	  delimited = true;
+	  extend_char_range (char_range, loc_reader);
+	}
+    }
   else if (str[-1] == 'U')
     length = 8;
   else
@@ -1107,18 +1131,53 @@ _cpp_valid_ucn (cpp_reader *pfile, const
   result = 0;
   do
     {
+      if (str == limit)
+	break;
       c = *str;
       if (!ISXDIGIT (c))
 	break;
       str++;
-      if (loc_reader)
+      extend_char_range (char_range, loc_reader);
+      if (delimited)
 	{
-	  gcc_assert (char_range);
-	  char_range->m_finish = loc_reader->get_next ().m_finish;
+	  if (!result)
+	    /* Accept arbitrary number of leading zeros.
+	       16 is another magic value, smaller than 32 above
+	       and bigger than 8, so that upon encountering first
+	       non-zero digit we can count 8 digits and after that
+	       or in overflow bit and ensure length doesn't decrease
+	       to 0, as delimited escape sequence doesn't have upper
+	       bound on the number of hex digits.  */
+	    length = 16;
+	  else if (length == 16 - 8)
+	    {
+	      /* Make sure we detect overflows.  */
+	      result |= 0x8000000;
+	      ++length;
+	    }
 	}
+
       result = (result << 4) + hex_value (c);
     }
-  while (--length && str < limit);
+  while (--length);
+
+  if (delimited
+      && str < limit
+      && *str == '}'
+      && (length != 32 || !identifier_pos))
+    {
+      if (length == 32)
+	cpp_error (pfile, CPP_DL_ERROR,
+		   "empty delimited escape sequence");
+      else if (!CPP_OPTION (pfile, delimited_escape_seqs)
+	       && CPP_OPTION (pfile, cpp_pedantic))
+	cpp_error (pfile, CPP_DL_PEDWARN,
+		   "delimited escape sequences are only valid in C++23");
+      str++;
+      length = 0;
+      delimited = false;
+      extend_char_range (char_range, loc_reader);
+    }
 
   /* Partial UCNs are not valid in strings, but decompose into
      multiple tokens in identifiers, so we can't give a helpful
@@ -1132,9 +1191,14 @@ _cpp_valid_ucn (cpp_reader *pfile, const
   *pstr = str;
   if (length)
     {
-      cpp_error (pfile, CPP_DL_ERROR,
-		 "incomplete universal character name %.*s",
-		 (int) (str - base), base);
+      if (!delimited)
+	cpp_error (pfile, CPP_DL_ERROR,
+		   "incomplete universal character name %.*s",
+		   (int) (str - base), base);
+      else
+	cpp_error (pfile, CPP_DL_ERROR,
+		   "'\\u{' not terminated with '}' after %.*s",
+		   (int) (str - base), base);
       result = 1;
     }
   /* The C99 standard permits $, @ and ` to be specified as UCNs.  We use
@@ -1212,9 +1276,8 @@ convert_ucn (cpp_reader *pfile, const uc
 
   from++;  /* Skip u/U.  */
 
-  if (loc_reader)
-    /* The u/U is part of the spelling of this character.  */
-    char_range.m_finish = loc_reader->get_next ().m_finish;
+  /* The u/U is part of the spelling of this character.  */
+  extend_char_range (&char_range, loc_reader);
 
   _cpp_valid_ucn (pfile, &from, limit, 0, &nst,
 		  &ucn, &char_range, loc_reader);
@@ -1392,6 +1455,8 @@ convert_hex (cpp_reader *pfile, const uc
   int digits_found = 0;
   size_t width = cvt.width;
   size_t mask = width_to_mask (width);
+  bool delimited = false;
+  const uchar *base = from - 1;
 
   /* loc_reader and ranges must either be both NULL, or both be non-NULL.  */
   gcc_assert ((loc_reader != NULL) == (ranges != NULL));
@@ -1404,8 +1469,14 @@ convert_hex (cpp_reader *pfile, const uc
   from++;
 
   /* The 'x' is part of the spelling of this character.  */
-  if (loc_reader)
-    char_range.m_finish = loc_reader->get_next ().m_finish;
+  extend_char_range (&char_range, loc_reader);
+
+  if (from < limit && *from == '{')
+    {
+      delimited = true;
+      from++;
+      extend_char_range (&char_range, loc_reader);
+    }
 
   while (from < limit)
     {
@@ -1413,19 +1484,42 @@ convert_hex (cpp_reader *pfile, const uc
       if (! hex_p (c))
 	break;
       from++;
-      if (loc_reader)
-	char_range.m_finish = loc_reader->get_next ().m_finish;
+      extend_char_range (&char_range, loc_reader);
       overflow |= n ^ (n << 4 >> 4);
       n = (n << 4) + hex_value (c);
       digits_found = 1;
     }
 
+  if (delimited && from < limit && *from == '}')
+    {
+      from++;
+      if (!digits_found)
+	{
+	  cpp_error (pfile, CPP_DL_ERROR,
+		     "empty delimited escape sequence");
+	  return from;
+	}
+     else if (!CPP_OPTION (pfile, delimited_escape_seqs)
+	      && CPP_OPTION (pfile, cpp_pedantic))
+	cpp_error (pfile, CPP_DL_PEDWARN,
+		   "delimited escape sequences are only valid in C++23");
+      delimited = false;
+      extend_char_range (&char_range, loc_reader);
+    }
+
   if (!digits_found)
     {
       cpp_error (pfile, CPP_DL_ERROR,
 		 "\\x used with no following hex digits");
       return from;
     }
+  else if (delimited)
+    {
+      cpp_error (pfile, CPP_DL_ERROR,
+		 "'\\x{' not terminated with '}' after %.*s",
+		 (int) (from - base), base);
+      return from;
+    }
 
   if (overflow | (n != (n & mask)))
     {
@@ -1459,25 +1553,71 @@ convert_oct (cpp_reader *pfile, const uc
 	     cpp_substring_ranges *ranges)
 {
   size_t count = 0;
-  cppchar_t c, n = 0;
+  cppchar_t c, n = 0, overflow = 0;
   size_t width = cvt.width;
   size_t mask = width_to_mask (width);
+  bool delimited = false;
+  const uchar *base = from - 1;
 
   /* loc_reader and ranges must either be both NULL, or both be non-NULL.  */
   gcc_assert ((loc_reader != NULL) == (ranges != NULL));
 
+  if (from < limit && *from == 'o')
+    {
+      from++;
+      extend_char_range (&char_range, loc_reader);
+      if (from == limit || *from != '{')
+	cpp_error (pfile, CPP_DL_ERROR, "'\\o' not followed by '{'");
+      else
+	{
+	  from++;
+	  extend_char_range (&char_range, loc_reader);
+	  delimited = true;
+	}
+    }
+
   while (from < limit && count++ < 3)
     {
       c = *from;
       if (c < '0' || c > '7')
 	break;
       from++;
-      if (loc_reader)
-	char_range.m_finish = loc_reader->get_next ().m_finish;
+      extend_char_range (&char_range, loc_reader);
+      if (delimited)
+	{
+	  count = 2;
+	  overflow |= n ^ (n << 3 >> 3);
+	}
       n = (n << 3) + c - '0';
     }
 
-  if (n != (n & mask))
+  if (delimited)
+    {
+      if (from < limit && *from == '}')
+	{
+	  from++;
+	  if (count == 1)
+	    {
+	      cpp_error (pfile, CPP_DL_ERROR,
+			 "empty delimited escape sequence");
+	      return from;
+	    }
+	  else if (!CPP_OPTION (pfile, delimited_escape_seqs)
+		   && CPP_OPTION (pfile, cpp_pedantic))
+	    cpp_error (pfile, CPP_DL_PEDWARN,
+		       "delimited escape sequences are only valid in C++23");
+	  extend_char_range (&char_range, loc_reader);
+	}
+      else
+	{
+	  cpp_error (pfile, CPP_DL_ERROR,
+		     "'\\o{' not terminated with '}' after %.*s",
+		     (int) (from - base), base);
+	  return from;
+	}
+    }
+
+  if (overflow | (n != (n & mask)))
     {
       cpp_error (pfile, CPP_DL_PEDWARN,
 		 "octal escape sequence out of range");
@@ -1535,6 +1675,7 @@ convert_escape (cpp_reader *pfile, const
 
     case '0':  case '1':  case '2':  case '3':
     case '4':  case '5':  case '6':  case '7':
+    case 'o':
       return convert_oct (pfile, from, limit, tbuf, cvt,
 			  char_range, loc_reader, ranges);
 
@@ -2119,15 +2260,27 @@ _cpp_interpret_identifier (cpp_reader *p
 	cppchar_t value = 0;
 	size_t bufleft = len - (bufp - buf);
 	int rval;
+	bool delimited = false;
 
 	idp += 2;
+	if (length == 4 && id[idp] == '{')
+	  {
+	    delimited = true;
+	    idp++;
+	  }
 	while (length && idp < len && ISXDIGIT (id[idp]))
 	  {
 	    value = (value << 4) + hex_value (id[idp]);
 	    idp++;
-	    length--;
+	    if (!delimited)
+	      length--;
 	  }
-	idp--;
+	if (!delimited)
+	  idp--;
+        /* else
+	     assert (id[idp] == '}');
+	   As the caller ensures it is a valid identifier, if it is
+	   delimited escape sequence, it must be terminated by }.  */
 
 	/* Special case for EBCDIC: if the identifier contains
 	   a '$' specified using a UCN, translate it to EBCDIC.  */
--- libcpp/lex.cc.jj	2022-05-23 10:59:06.235591348 +0200
+++ libcpp/lex.cc	2022-08-16 11:57:53.772823661 +0200
@@ -1426,19 +1426,35 @@ get_bidi_utf8 (cpp_reader *pfile, const
 /* Parse a UCN where P points just past \u or \U and return its bidi code.  */
 
 static bidi::kind
-get_bidi_ucn_1 (const unsigned char *p, bool is_U)
+get_bidi_ucn_1 (const unsigned char *p, bool is_U, const unsigned char **end)
 {
   /* 6.4.3 Universal Character Names
       \u hex-quad
       \U hex-quad hex-quad
+      \u { simple-hexadecimal-digit-sequence }
      where \unnnn means \U0000nnnn.  */
 
+  *end = p + 4;
   if (is_U)
     {
       if (p[0] != '0' || p[1] != '0' || p[2] != '0' || p[3] != '0')
 	return bidi::kind::NONE;
       /* Skip 4B so we can treat \u and \U the same below.  */
       p += 4;
+      *end += 4;
+    }
+  else if (p[0] == '{')
+    {
+      p++;
+      while (*p == '0')
+	p++;
+      if (p[0] != '2'
+	  || p[1] != '0'
+	  || !ISXDIGIT (p[2])
+	  || !ISXDIGIT (p[3])
+	  || p[4] != '}')
+	return bidi::kind::NONE;
+      *end = p + 5;
     }
 
   /* All code points we are looking for start with 20xx.  */
@@ -1499,14 +1515,15 @@ get_bidi_ucn_1 (const unsigned char *p,
    If the kind is not NONE, write the location to *OUT.*/
 
 static bidi::kind
-get_bidi_ucn (cpp_reader *pfile,  const unsigned char *p, bool is_U,
+get_bidi_ucn (cpp_reader *pfile, const unsigned char *p, bool is_U,
 	      location_t *out)
 {
-  bidi::kind result = get_bidi_ucn_1 (p, is_U);
+  const unsigned char *end;
+  bidi::kind result = get_bidi_ucn_1 (p, is_U, &end);
   if (result != bidi::kind::NONE)
     {
       const unsigned char *start = p - 2;
-      size_t num_bytes = 2 + (is_U ? 8 : 4);
+      size_t num_bytes = end - start;
       *out = get_location_for_byte_range_in_cur_line (pfile, start, num_bytes);
     }
   return result;
--- gcc/testsuite/c-c++-common/cpp/delimited-escape-seq-1.c.jj	2022-08-16 10:47:38.693022740 +0200
+++ gcc/testsuite/c-c++-common/cpp/delimited-escape-seq-1.c	2022-08-16 12:18:42.235477632 +0200
@@ -0,0 +1,92 @@
+/* P2290R3 - Delimited escape sequences */
+/* { dg-do run } */
+/* { dg-require-effective-target wchar } */
+/* { dg-options "-std=gnu99 -Wno-c++-compat" { target c } } */
+/* { dg-options "-std=c++23" { target c++ } } */
+
+#ifndef __cplusplus
+#include <wchar.h>
+typedef __CHAR16_TYPE__ char16_t;
+typedef __CHAR32_TYPE__ char32_t;
+#endif
+
+const char32_t *a = U"\u{1234}\u{10fffd}\u{000000000000000000000000000000000000000000000000000000000001234}\u{10FFFD}";
+const char32_t *b = U"\x{1234}\x{10fffd}\x{000000000000000000000000000000000000000000000000000000000001234}";
+const char32_t *c = U"\o{1234}\o{4177775}\o{000000000000000000000000000000000000000000000000000000000000000000000000004177775}";
+const char16_t *d = u"\u{1234}\u{bFFd}\u{00000000000000000000000000000001234}";
+const char16_t *e = u"\x{1234}\x{BffD}\x{000001234}";
+const char16_t *f = u"\o{1234}\o{137775}\o{000000000000000137775}";
+const wchar_t *g = L"\u{1234}\u{bFFd}\u{00000000000000000000000000000001234}";
+const wchar_t *h = L"\x{1234}\x{bFFd}\x{000001234}";
+const wchar_t *i = L"\o{1234}\o{137775}\o{000000000000000137775}";
+#ifdef __cplusplus
+const char *j = "\u{34}\u{000000000000000003D}";
+#endif
+const char *k = "\x{34}\x{000000000000000003D}";
+const char *l = "\o{34}\o{000000000000000176}";
+
+#if U'\u{1234}' != U'\u1234' || U'\u{10fffd}' != U'\U0010FFFD' \
+    || U'\x{00000001234}' != U'\x1234' || U'\x{010fffd}' != U'\x10FFFD' \
+    || U'\o{1234}' != U'\x29c' || U'\o{004177775}' != U'\x10FFFD' \
+    || u'\u{1234}' != u'\u1234' || u'\u{0bffd}' != u'\uBFFD' \
+    || u'\x{00000001234}' != u'\x1234' || u'\x{0Bffd}' != u'\x0bFFD' \
+    || u'\o{1234}' != u'\x29c' || u'\o{00137775}' != u'\xBFFD' \
+    || L'\u{1234}' != L'\u1234' || L'\u{0bffd}' != L'\uBFFD' \
+    || L'\x{00000001234}' != L'\x1234' || L'\x{0bffd}' != L'\x0bFFD' \
+    || L'\o{1234}' != L'\x29c' || L'\o{00137775}' != L'\xBFFD' \
+    || '\x{34}' != '\x034' || '\x{0003d}' != '\x003D' \
+    || '\o{34}' != '\x1C' || '\o{176}' != '\x007E'
+#error Bad
+#endif
+#ifdef __cplusplus
+#if '\u{0000000034}' != '\u0034' || '\u{3d}' != '\u003D'
+#error Bad
+#endif
+#endif
+
+int
+main ()
+{
+  if (a[0] != U'\u1234' || a[0] != U'\u{1234}'
+      || a[1] != U'\U0010FFFD' || a[1] != U'\u{000010fFfD}'
+      || a[2] != a[0]
+      || a[3] != a[1]
+      || b[0] != U'\x1234' || b[0] != U'\x{001234}'
+      || b[1] != U'\x10FFFD' || b[1] != U'\x{0010fFfD}'
+      || b[2] != b[0]
+      || c[0] != U'\x29c' || c[0] != U'\o{001234}'
+      || c[1] != U'\x10FFFD' || c[1] != U'\o{4177775}'
+      || c[2] != c[1])
+    __builtin_abort ();
+  if (d[0] != u'\u1234' || d[0] != u'\u{1234}'
+      || d[1] != u'\U0000BFFD' || d[1] != u'\u{00000bFfD}'
+      || d[2] != d[0]
+      || e[0] != u'\x1234' || e[0] != u'\x{001234}'
+      || e[1] != u'\xBFFD' || e[1] != u'\x{00bFfD}'
+      || e[2] != e[0]
+      || f[0] != u'\x29c' || f[0] != u'\o{001234}'
+      || f[1] != u'\xbFFD' || f[1] != u'\o{137775}'
+      || f[2] != f[1])
+    __builtin_abort ();
+  if (g[0] != L'\u1234' || g[0] != L'\u{1234}'
+      || g[1] != L'\U0000BFFD' || g[1] != L'\u{00000bFfD}'
+      || g[2] != g[0]
+      || h[0] != L'\x1234' || h[0] != L'\x{001234}'
+      || h[1] != L'\xBFFD' || h[1] != L'\x{00bFfD}'
+      || h[2] != h[0]
+      || i[0] != L'\x29c' || i[0] != L'\o{001234}'
+      || i[1] != L'\xbFFD' || i[1] != L'\o{137775}'
+      || i[2] != i[1])
+    __builtin_abort ();
+#ifdef __cplusplus
+  if (j[0] != '\u0034' || j[0] != '\u{034}'
+      || j[1] != '\U0000003D' || j[1] != '\u{000003d}')
+    __builtin_abort ();
+#endif
+  if (k[0] != '\x034' || k[0] != '\x{0034}'
+      || k[1] != '\x3D' || k[1] != '\x{3d}'
+      || l[0] != '\x1c' || l[0] != '\o{0034}'
+      || l[1] != '\x07e' || l[1] != '\o{176}' || l[1] != '\176')
+    __builtin_abort ();
+  return 0;
+}
--- gcc/testsuite/c-c++-common/cpp/delimited-escape-seq-2.c.jj	2022-08-16 10:47:41.846981390 +0200
+++ gcc/testsuite/c-c++-common/cpp/delimited-escape-seq-2.c	2022-08-16 12:18:58.807260607 +0200
@@ -0,0 +1,18 @@
+/* P2290R3 - Delimited escape sequences */
+/* { dg-do compile } */
+/* { dg-options "-std=gnu99 -Wno-c++-compat" { target c } } */
+/* { dg-options "-std=c++23" { target c++ } } */
+
+int jalape\u{f1}o = 42;
+
+int
+caf\u{000e9} (void)
+{
+  return jalape\u00F1o;
+}
+
+int
+test (void)
+{
+  return caf\u00e9 ();
+}
--- gcc/testsuite/c-c++-common/cpp/delimited-escape-seq-3.c.jj	2022-08-16 12:18:19.308777922 +0200
+++ gcc/testsuite/c-c++-common/cpp/delimited-escape-seq-3.c	2022-08-16 12:41:23.693648138 +0200
@@ -0,0 +1,33 @@
+/* P2290R3 - Delimited escape sequences */
+/* { dg-do compile } */
+/* { dg-require-effective-target wchar } */
+/* { dg-options "-std=gnu99 -Wno-c++-compat" { target c } } */
+/* { dg-options "-std=c++23" { target c++ } } */
+
+#ifndef __cplusplus
+typedef __CHAR32_TYPE__ char32_t;
+#endif
+
+const char32_t *a = U"\u{}";				/* { dg-error "empty delimited escape sequence" } */
+							/* { dg-error "is not a valid universal character" "" { target c } .-1 } */
+const char32_t *b = U"\u{12" "34}";			/* { dg-error "'\\\\u\\{' not terminated with '\\}' after" } */
+const char32_t *c = U"\u{0000ffffffff}";		/* { dg-error "is not a valid universal character" } */
+const char32_t *d = U"\u{010000edcb}";			/* { dg-error "is not a valid universal character" } */
+const char32_t *e = U"\u{02000000000000000000edcb}";	/* { dg-error "is not a valid universal character" } */
+const char32_t *f = U"\u{123ghij}";			/* { dg-error "'\\\\u\\{' not terminated with '\\}' after" } */
+const char32_t *g = U"\u{123.}";			/* { dg-error "'\\\\u\\{' not terminated with '\\}' after" } */
+const char32_t *h = U"\u{.}";				/* { dg-error "'\\\\u\\{' not terminated with '\\}' after" } */
+const char32_t *i = U"\x{}";				/* { dg-error "empty delimited escape sequence" } */
+const char32_t *j = U"\x{12" "34}";			/* { dg-error "'\\\\x\\{' not terminated with '\\}' after" } */
+const char32_t *k = U"\x{0000ffffffff}";
+const char32_t *l = U"\x{010000edcb}";			/* { dg-warning "hex escape sequence out of range" } */
+const char32_t *m = U"\x{02000000000000000000edcb}";	/* { dg-warning "hex escape sequence out of range" } */
+const char32_t *n = U"\x{123ghij}";			/* { dg-error "'\\\\x\\{' not terminated with '\\}' after" } */
+const char32_t *o = U"\x{123.}";			/* { dg-error "'\\\\x\\{' not terminated with '\\}' after" } */
+const char32_t *p = U"\o{}";				/* { dg-error "empty delimited escape sequence" } */
+const char32_t *q = U"\o{12" "34}";			/* { dg-error "'\\\\o\\{' not terminated with '\\}' after" } */
+const char32_t *r = U"\o{0000037777777777}";
+const char32_t *s = U"\o{040000166713}";		/* { dg-warning "octal escape sequence out of range" } */
+const char32_t *t = U"\o{02000000000000000000000166713}";/* { dg-warning "octal escape sequence out of range" } */
+const char32_t *u = U"\o{1238}";			/* { dg-error "'\\\\o\\{' not terminated with '\\}' after" } */
+const char32_t *v = U"\o{.}";				/* { dg-error "'\\\\o\\{' not terminated with '\\}' after" } */
--- gcc/testsuite/c-c++-common/Wbidi-chars-24.c.jj	2022-08-16 12:03:19.350561676 +0200
+++ gcc/testsuite/c-c++-common/Wbidi-chars-24.c	2022-08-16 12:06:46.381851525 +0200
@@ -0,0 +1,28 @@
+/* PR preprocessor/103026 */
+/* { dg-do compile } */
+/* { dg-options "-Wbidi-chars=ucn,unpaired" } */
+/* Test nesting of bidi chars in various contexts.  */
+
+void
+g1 ()
+{
+  const char *s1 = "a b c LRE\u{202a} 1 2 3 PDI\u{00000000000000000000000002069} x y z";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+  const char *s2 = "a b c RLE\u{00202b} 1 2 3 PDI\u{2069} x y z";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+  const char *s3 = "a b c LRO\u{000000202d} 1 2 3 PDI\u{02069} x y z";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+  const char *s4 = "a b c RLO\u{202e} 1 2 3 PDI\u{00000002069} x y z";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+  const char *s5 = "a b c LRI\u{002066} 1 2 3 PDF\u{202C} x y z";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+  const char *s6 = "a b c RLI\u{02067} 1 2 3 PDF\u{202c} x y z";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+  const char *s7 = "a b c FSI\u{0002068} 1 2 3 PDF\u{0202c} x y z";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+}
+
+int A\u{202a}B\u{2069}C;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+int a\u{00000202b}B\u{000000002069}c;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
--- gcc/testsuite/gcc.dg/cpp/delimited-escape-seq-1.c.jj	2022-08-16 10:47:38.693022740 +0200
+++ gcc/testsuite/gcc.dg/cpp/delimited-escape-seq-1.c	2022-08-16 12:46:56.508291006 +0200
@@ -0,0 +1,10 @@
+/* P2290R3 - Delimited escape sequences */
+/* { dg-do compile } */
+/* { dg-require-effective-target wchar } */
+/* { dg-options "-std=gnu99 -Wno-c++-compat -pedantic" } */
+
+typedef __CHAR32_TYPE__ char32_t;
+
+const char32_t *a = U"\u{1234}";	/* { dg-warning "delimited escape sequences are only valid in" } */
+const char32_t *b = U"\x{1234}";	/* { dg-warning "delimited escape sequences are only valid in" } */
+const char32_t *c = U"\o{1234}";	/* { dg-warning "delimited escape sequences are only valid in" } */
--- gcc/testsuite/gcc.dg/cpp/delimited-escape-seq-2.c.jj	2022-08-16 10:47:41.846981390 +0200
+++ gcc/testsuite/gcc.dg/cpp/delimited-escape-seq-2.c	2022-08-16 12:47:05.955167423 +0200
@@ -0,0 +1,10 @@
+/* P2290R3 - Delimited escape sequences */
+/* { dg-do compile } */
+/* { dg-require-effective-target wchar } */
+/* { dg-options "-std=gnu99 -Wno-c++-compat -pedantic-errors" } */
+
+typedef __CHAR32_TYPE__ char32_t;
+
+const char32_t *a = U"\u{1234}";	/* { dg-error "delimited escape sequences are only valid in" } */
+const char32_t *b = U"\x{1234}";	/* { dg-error "delimited escape sequences are only valid in" } */
+const char32_t *c = U"\o{1234}";	/* { dg-error "delimited escape sequences are only valid in" } */
--- gcc/testsuite/g++.dg/cpp/delimited-escape-seq-1.C.jj	2022-08-16 12:46:43.368462901 +0200
+++ gcc/testsuite/g++.dg/cpp/delimited-escape-seq-1.C	2022-08-16 12:49:21.532393786 +0200
@@ -0,0 +1,8 @@
+// P2290R3 - Delimited escape sequences
+// { dg-do compile { target c++11 } }
+// { dg-require-effective-target wchar }
+// { dg-options "-pedantic" }
+
+const char32_t *a = U"\u{1234}";	// { dg-warning "delimited escape sequences are only valid in" "" { target c++20_down } }
+const char32_t *b = U"\x{1234}";	// { dg-warning "delimited escape sequences are only valid in" "" { target c++20_down } }
+const char32_t *c = U"\o{1234}";	// { dg-warning "delimited escape sequences are only valid in" "" { target c++20_down } }
--- gcc/testsuite/g++.dg/cpp/delimited-escape-seq-2.C.jj	2022-08-16 12:46:46.281424798 +0200
+++ gcc/testsuite/g++.dg/cpp/delimited-escape-seq-2.C	2022-08-16 12:49:33.761233803 +0200
@@ -0,0 +1,8 @@
+// P2290R3 - Delimited escape sequences
+// { dg-do compile { target c++11 } }
+// { dg-require-effective-target wchar }
+// { dg-options "-pedantic-errors" }
+
+const char32_t *a = U"\u{1234}";	// { dg-error "delimited escape sequences are only valid in" "" { target c++20_down } }
+const char32_t *b = U"\x{1234}";	// { dg-error "delimited escape sequences are only valid in" "" { target c++20_down } }
+const char32_t *c = U"\o{1234}";	// { dg-error "delimited escape sequences are only valid in" "" { target c++20_down } }


	Jakub


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] libcpp, v2: Implement C++23 P2290R3 - Delimited escape sequences [PR106645]
  2022-08-18  8:17       ` [PATCH] libcpp, v2: " Jakub Jelinek
@ 2022-08-19  0:34         ` Jason Merrill
  0 siblings, 0 replies; 6+ messages in thread
From: Jason Merrill @ 2022-08-19  0:34 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: Marek Polacek, Joseph S. Myers, gcc-patches

On 8/18/22 01:17, Jakub Jelinek wrote:
> On Wed, Aug 17, 2022 at 10:22:03PM -0400, Jason Merrill wrote:
>> OK, a comment mentioning this should be sufficient.
> 
> Here is an updated patch with those changes in.
> So far successfully tested with
> GXX_TESTSUITE_STDS=98,11,14,17,20,2b make -j32 -k check-gcc check-g++ RUNTESTFLAGS="dg.exp='Wbidi* cpp/*' cpp.exp"
> ok if it passes full bootstrap/regtest tonight?

OK.

> 2022-08-18  Jakub Jelinek  <jakub@redhat.com>
> 
> 	PR c++/106645
> libcpp/
> 	* include/cpplib.h (struct cpp_options): Implement
> 	P2290R3 - Delimited escape sequences.  Add delimite_escape_seqs
> 	member.
> 	* init.cc (struct lang_flags): Likewise.
> 	(lang_defaults): Add delim column.
> 	(cpp_set_lang): Copy over delimite_escape_seqs.
> 	* charset.cc (extend_char_range): New function.
> 	(_cpp_valid_ucn): Use it.  Handle delimited escape sequences.
> 	(convert_hex): Likewise.
> 	(convert_oct): Likewise.
> 	(convert_ucn): Use extend_char_range.
> 	(convert_escape): Call convert_oct even for \o.
> 	(_cpp_interpret_identifier): Handle delimited escape sequences.
> 	* lex.cc (get_bidi_ucn_1): Likewise.  Add end argument, fill it in.
> 	(get_bidi_ucn): Adjust get_bidi_ucn_1 caller.  Use end argument to
> 	compute num_bytes.
> gcc/testsuite/
> 	* c-c++-common/cpp/delimited-escape-seq-1.c: New test.
> 	* c-c++-common/cpp/delimited-escape-seq-2.c: New test.
> 	* c-c++-common/cpp/delimited-escape-seq-3.c: New test.
> 	* c-c++-common/Wbidi-chars-24.c: New test.
> 	* gcc.dg/cpp/delimited-escape-seq-1.c: New test.
> 	* gcc.dg/cpp/delimited-escape-seq-2.c: New test.
> 	* g++.dg/cpp/delimited-escape-seq-1.C: New test.
> 	* g++.dg/cpp/delimited-escape-seq-2.C: New test.
> 
> --- libcpp/include/cpplib.h.jj	2022-08-10 09:06:53.268209449 +0200
> +++ libcpp/include/cpplib.h	2022-08-15 19:32:53.743213474 +0200
> @@ -519,6 +519,9 @@ struct cpp_options
>     /* Nonzero for C++23 size_t literals.  */
>     unsigned char size_t_literals;
>   
> +  /* Nonzero for C++23 delimited escape sequences.  */
> +  unsigned char delimited_escape_seqs;
> +
>     /* Holds the name of the target (execution) character set.  */
>     const char *narrow_charset;
>   
> --- libcpp/init.cc.jj	2022-08-10 09:06:53.268209449 +0200
> +++ libcpp/init.cc	2022-08-15 16:09:01.403020485 +0200
> @@ -96,34 +96,35 @@ struct lang_flags
>     char dfp_constants;
>     char size_t_literals;
>     char elifdef;
> +  char delimited_escape_seqs;
>   };
>   
>   static const struct lang_flags lang_defaults[] =
> -{ /*              c99 c++ xnum xid c11 std digr ulit rlit udlit bincst digsep trig u8chlit vaopt scope dfp szlit elifdef */
> -  /* GNUC89   */  { 0,  0,  1,  0,  0,  0,  1,   0,   0,   0,    0,     0,     0,   0,      1,   1,     0,   0,   0 },
> -  /* GNUC99   */  { 1,  0,  1,  1,  0,  0,  1,   1,   1,   0,    0,     0,     0,   0,      1,   1,     0,   0,   0 },
> -  /* GNUC11   */  { 1,  0,  1,  1,  1,  0,  1,   1,   1,   0,    0,     0,     0,   0,      1,   1,     0,   0,   0 },
> -  /* GNUC17   */  { 1,  0,  1,  1,  1,  0,  1,   1,   1,   0,    0,     0,     0,   0,      1,   1,     0,   0,   0 },
> -  /* GNUC2X   */  { 1,  0,  1,  1,  1,  0,  1,   1,   1,   0,    1,     1,     0,   1,      1,   1,     1,   0,   1 },
> -  /* STDC89   */  { 0,  0,  0,  0,  0,  1,  0,   0,   0,   0,    0,     0,     1,   0,      0,   0,     0,   0,   0 },
> -  /* STDC94   */  { 0,  0,  0,  0,  0,  1,  1,   0,   0,   0,    0,     0,     1,   0,      0,   0,     0,   0,   0 },
> -  /* STDC99   */  { 1,  0,  1,  1,  0,  1,  1,   0,   0,   0,    0,     0,     1,   0,      0,   0,     0,   0,   0 },
> -  /* STDC11   */  { 1,  0,  1,  1,  1,  1,  1,   1,   0,   0,    0,     0,     1,   0,      0,   0,     0,   0,   0 },
> -  /* STDC17   */  { 1,  0,  1,  1,  1,  1,  1,   1,   0,   0,    0,     0,     1,   0,      0,   0,     0,   0,   0 },
> -  /* STDC2X   */  { 1,  0,  1,  1,  1,  1,  1,   1,   0,   0,    1,     1,     1,   1,      0,   1,     1,   0,   1 },
> -  /* GNUCXX   */  { 0,  1,  1,  1,  0,  0,  1,   0,   0,   0,    0,     0,     0,   0,      1,   1,     0,   0,   0 },
> -  /* CXX98    */  { 0,  1,  0,  1,  0,  1,  1,   0,   0,   0,    0,     0,     1,   0,      0,   1,     0,   0,   0 },
> -  /* GNUCXX11 */  { 1,  1,  1,  1,  1,  0,  1,   1,   1,   1,    0,     0,     0,   0,      1,   1,     0,   0,   0 },
> -  /* CXX11    */  { 1,  1,  0,  1,  1,  1,  1,   1,   1,   1,    0,     0,     1,   0,      0,   1,     0,   0,   0 },
> -  /* GNUCXX14 */  { 1,  1,  1,  1,  1,  0,  1,   1,   1,   1,    1,     1,     0,   0,      1,   1,     0,   0,   0 },
> -  /* CXX14    */  { 1,  1,  0,  1,  1,  1,  1,   1,   1,   1,    1,     1,     1,   0,      0,   1,     0,   0,   0 },
> -  /* GNUCXX17 */  { 1,  1,  1,  1,  1,  0,  1,   1,   1,   1,    1,     1,     0,   1,      1,   1,     0,   0,   0 },
> -  /* CXX17    */  { 1,  1,  1,  1,  1,  1,  1,   1,   1,   1,    1,     1,     0,   1,      0,   1,     0,   0,   0 },
> -  /* GNUCXX20 */  { 1,  1,  1,  1,  1,  0,  1,   1,   1,   1,    1,     1,     0,   1,      1,   1,     0,   0,   0 },
> -  /* CXX20    */  { 1,  1,  1,  1,  1,  1,  1,   1,   1,   1,    1,     1,     0,   1,      1,   1,     0,   0,   0 },
> -  /* GNUCXX23 */  { 1,  1,  1,  1,  1,  0,  1,   1,   1,   1,    1,     1,     0,   1,      1,   1,     0,   1,   1 },
> -  /* CXX23    */  { 1,  1,  1,  1,  1,  1,  1,   1,   1,   1,    1,     1,     0,   1,      1,   1,     0,   1,   1 },
> -  /* ASM      */  { 0,  0,  1,  0,  0,  0,  0,   0,   0,   0,    0,     0,     0,   0,      0,   0,     0,   0,   0 }
> +{ /*              c99 c++ xnum xid c11 std digr ulit rlit udlit bincst digsep trig u8chlit vaopt scope dfp szlit elifdef delim */
> +  /* GNUC89   */  { 0,  0,  1,  0,  0,  0,  1,   0,   0,   0,    0,     0,     0,   0,      1,   1,     0,   0,   0,      0 },
> +  /* GNUC99   */  { 1,  0,  1,  1,  0,  0,  1,   1,   1,   0,    0,     0,     0,   0,      1,   1,     0,   0,   0,      0 },
> +  /* GNUC11   */  { 1,  0,  1,  1,  1,  0,  1,   1,   1,   0,    0,     0,     0,   0,      1,   1,     0,   0,   0,      0 },
> +  /* GNUC17   */  { 1,  0,  1,  1,  1,  0,  1,   1,   1,   0,    0,     0,     0,   0,      1,   1,     0,   0,   0,      0 },
> +  /* GNUC2X   */  { 1,  0,  1,  1,  1,  0,  1,   1,   1,   0,    1,     1,     0,   1,      1,   1,     1,   0,   1,      0 },
> +  /* STDC89   */  { 0,  0,  0,  0,  0,  1,  0,   0,   0,   0,    0,     0,     1,   0,      0,   0,     0,   0,   0,      0 },
> +  /* STDC94   */  { 0,  0,  0,  0,  0,  1,  1,   0,   0,   0,    0,     0,     1,   0,      0,   0,     0,   0,   0,      0 },
> +  /* STDC99   */  { 1,  0,  1,  1,  0,  1,  1,   0,   0,   0,    0,     0,     1,   0,      0,   0,     0,   0,   0,      0 },
> +  /* STDC11   */  { 1,  0,  1,  1,  1,  1,  1,   1,   0,   0,    0,     0,     1,   0,      0,   0,     0,   0,   0,      0 },
> +  /* STDC17   */  { 1,  0,  1,  1,  1,  1,  1,   1,   0,   0,    0,     0,     1,   0,      0,   0,     0,   0,   0,      0 },
> +  /* STDC2X   */  { 1,  0,  1,  1,  1,  1,  1,   1,   0,   0,    1,     1,     1,   1,      0,   1,     1,   0,   1,      0 },
> +  /* GNUCXX   */  { 0,  1,  1,  1,  0,  0,  1,   0,   0,   0,    0,     0,     0,   0,      1,   1,     0,   0,   0,      0 },
> +  /* CXX98    */  { 0,  1,  0,  1,  0,  1,  1,   0,   0,   0,    0,     0,     1,   0,      0,   1,     0,   0,   0,      0 },
> +  /* GNUCXX11 */  { 1,  1,  1,  1,  1,  0,  1,   1,   1,   1,    0,     0,     0,   0,      1,   1,     0,   0,   0,      0 },
> +  /* CXX11    */  { 1,  1,  0,  1,  1,  1,  1,   1,   1,   1,    0,     0,     1,   0,      0,   1,     0,   0,   0,      0 },
> +  /* GNUCXX14 */  { 1,  1,  1,  1,  1,  0,  1,   1,   1,   1,    1,     1,     0,   0,      1,   1,     0,   0,   0,      0 },
> +  /* CXX14    */  { 1,  1,  0,  1,  1,  1,  1,   1,   1,   1,    1,     1,     1,   0,      0,   1,     0,   0,   0,      0 },
> +  /* GNUCXX17 */  { 1,  1,  1,  1,  1,  0,  1,   1,   1,   1,    1,     1,     0,   1,      1,   1,     0,   0,   0,      0 },
> +  /* CXX17    */  { 1,  1,  1,  1,  1,  1,  1,   1,   1,   1,    1,     1,     0,   1,      0,   1,     0,   0,   0,      0 },
> +  /* GNUCXX20 */  { 1,  1,  1,  1,  1,  0,  1,   1,   1,   1,    1,     1,     0,   1,      1,   1,     0,   0,   0,      0 },
> +  /* CXX20    */  { 1,  1,  1,  1,  1,  1,  1,   1,   1,   1,    1,     1,     0,   1,      1,   1,     0,   0,   0,      0 },
> +  /* GNUCXX23 */  { 1,  1,  1,  1,  1,  0,  1,   1,   1,   1,    1,     1,     0,   1,      1,   1,     0,   1,   1,      1 },
> +  /* CXX23    */  { 1,  1,  1,  1,  1,  1,  1,   1,   1,   1,    1,     1,     0,   1,      1,   1,     0,   1,   1,      1 },
> +  /* ASM      */  { 0,  0,  1,  0,  0,  0,  0,   0,   0,   0,    0,     0,     0,   0,      0,   0,     0,   0,   0,      0 }
>   };
>   
>   /* Sets internal flags correctly for a given language.  */
> @@ -153,6 +154,7 @@ cpp_set_lang (cpp_reader *pfile, enum c_
>     CPP_OPTION (pfile, dfp_constants)		 = l->dfp_constants;
>     CPP_OPTION (pfile, size_t_literals)		 = l->size_t_literals;
>     CPP_OPTION (pfile, elifdef)			 = l->elifdef;
> +  CPP_OPTION (pfile, delimited_escape_seqs)	 = l->delimited_escape_seqs;
>   }
>   
>   /* Initialize library global state.  */
> --- libcpp/charset.cc.jj	2022-08-15 12:52:43.213902801 +0200
> +++ libcpp/charset.cc	2022-08-18 10:01:22.569112418 +0200
> @@ -1036,6 +1036,19 @@ ucn_valid_in_identifier (cpp_reader *pfi
>     return 1;
>   }
>   
> +/* Increment char_range->m_finish by a single character.  */
> +
> +static void
> +extend_char_range (source_range *char_range,
> +		   cpp_string_location_reader *loc_reader)
> +{
> +  if (loc_reader)
> +    {
> +      gcc_assert (char_range);
> +      char_range->m_finish = loc_reader->get_next ().m_finish;
> +    }
> +}
> +
>   /* [lex.charset]: The character designated by the universal character
>      name \UNNNNNNNN is that character whose character short name in
>      ISO/IEC 10646 is NNNNNNNN; the character designated by the
> @@ -1081,6 +1094,7 @@ _cpp_valid_ucn (cpp_reader *pfile, const
>     unsigned int length;
>     const uchar *str = *pstr;
>     const uchar *base = str - 2;
> +  bool delimited = false;
>   
>     if (!CPP_OPTION (pfile, cplusplus) && !CPP_OPTION (pfile, c99))
>       cpp_error (pfile, CPP_DL_WARNING,
> @@ -1095,7 +1109,17 @@ _cpp_valid_ucn (cpp_reader *pfile, const
>   	         (int) str[-1]);
>   
>     if (str[-1] == 'u')
> -    length = 4;
> +    {
> +      length = 4;
> +      if (str < limit && *str == '{')
> +	{
> +	  str++;
> +	  /* Magic value to indicate no digits seen.  */
> +	  length = 32;
> +	  delimited = true;
> +	  extend_char_range (char_range, loc_reader);
> +	}
> +    }
>     else if (str[-1] == 'U')
>       length = 8;
>     else
> @@ -1107,18 +1131,53 @@ _cpp_valid_ucn (cpp_reader *pfile, const
>     result = 0;
>     do
>       {
> +      if (str == limit)
> +	break;
>         c = *str;
>         if (!ISXDIGIT (c))
>   	break;
>         str++;
> -      if (loc_reader)
> +      extend_char_range (char_range, loc_reader);
> +      if (delimited)
>   	{
> -	  gcc_assert (char_range);
> -	  char_range->m_finish = loc_reader->get_next ().m_finish;
> +	  if (!result)
> +	    /* Accept arbitrary number of leading zeros.
> +	       16 is another magic value, smaller than 32 above
> +	       and bigger than 8, so that upon encountering first
> +	       non-zero digit we can count 8 digits and after that
> +	       or in overflow bit and ensure length doesn't decrease
> +	       to 0, as delimited escape sequence doesn't have upper
> +	       bound on the number of hex digits.  */
> +	    length = 16;
> +	  else if (length == 16 - 8)
> +	    {
> +	      /* Make sure we detect overflows.  */
> +	      result |= 0x8000000;
> +	      ++length;
> +	    }
>   	}
> +
>         result = (result << 4) + hex_value (c);
>       }
> -  while (--length && str < limit);
> +  while (--length);
> +
> +  if (delimited
> +      && str < limit
> +      && *str == '}'
> +      && (length != 32 || !identifier_pos))
> +    {
> +      if (length == 32)
> +	cpp_error (pfile, CPP_DL_ERROR,
> +		   "empty delimited escape sequence");
> +      else if (!CPP_OPTION (pfile, delimited_escape_seqs)
> +	       && CPP_OPTION (pfile, cpp_pedantic))
> +	cpp_error (pfile, CPP_DL_PEDWARN,
> +		   "delimited escape sequences are only valid in C++23");
> +      str++;
> +      length = 0;
> +      delimited = false;
> +      extend_char_range (char_range, loc_reader);
> +    }
>   
>     /* Partial UCNs are not valid in strings, but decompose into
>        multiple tokens in identifiers, so we can't give a helpful
> @@ -1132,9 +1191,14 @@ _cpp_valid_ucn (cpp_reader *pfile, const
>     *pstr = str;
>     if (length)
>       {
> -      cpp_error (pfile, CPP_DL_ERROR,
> -		 "incomplete universal character name %.*s",
> -		 (int) (str - base), base);
> +      if (!delimited)
> +	cpp_error (pfile, CPP_DL_ERROR,
> +		   "incomplete universal character name %.*s",
> +		   (int) (str - base), base);
> +      else
> +	cpp_error (pfile, CPP_DL_ERROR,
> +		   "'\\u{' not terminated with '}' after %.*s",
> +		   (int) (str - base), base);
>         result = 1;
>       }
>     /* The C99 standard permits $, @ and ` to be specified as UCNs.  We use
> @@ -1212,9 +1276,8 @@ convert_ucn (cpp_reader *pfile, const uc
>   
>     from++;  /* Skip u/U.  */
>   
> -  if (loc_reader)
> -    /* The u/U is part of the spelling of this character.  */
> -    char_range.m_finish = loc_reader->get_next ().m_finish;
> +  /* The u/U is part of the spelling of this character.  */
> +  extend_char_range (&char_range, loc_reader);
>   
>     _cpp_valid_ucn (pfile, &from, limit, 0, &nst,
>   		  &ucn, &char_range, loc_reader);
> @@ -1392,6 +1455,8 @@ convert_hex (cpp_reader *pfile, const uc
>     int digits_found = 0;
>     size_t width = cvt.width;
>     size_t mask = width_to_mask (width);
> +  bool delimited = false;
> +  const uchar *base = from - 1;
>   
>     /* loc_reader and ranges must either be both NULL, or both be non-NULL.  */
>     gcc_assert ((loc_reader != NULL) == (ranges != NULL));
> @@ -1404,8 +1469,14 @@ convert_hex (cpp_reader *pfile, const uc
>     from++;
>   
>     /* The 'x' is part of the spelling of this character.  */
> -  if (loc_reader)
> -    char_range.m_finish = loc_reader->get_next ().m_finish;
> +  extend_char_range (&char_range, loc_reader);
> +
> +  if (from < limit && *from == '{')
> +    {
> +      delimited = true;
> +      from++;
> +      extend_char_range (&char_range, loc_reader);
> +    }
>   
>     while (from < limit)
>       {
> @@ -1413,19 +1484,42 @@ convert_hex (cpp_reader *pfile, const uc
>         if (! hex_p (c))
>   	break;
>         from++;
> -      if (loc_reader)
> -	char_range.m_finish = loc_reader->get_next ().m_finish;
> +      extend_char_range (&char_range, loc_reader);
>         overflow |= n ^ (n << 4 >> 4);
>         n = (n << 4) + hex_value (c);
>         digits_found = 1;
>       }
>   
> +  if (delimited && from < limit && *from == '}')
> +    {
> +      from++;
> +      if (!digits_found)
> +	{
> +	  cpp_error (pfile, CPP_DL_ERROR,
> +		     "empty delimited escape sequence");
> +	  return from;
> +	}
> +     else if (!CPP_OPTION (pfile, delimited_escape_seqs)
> +	      && CPP_OPTION (pfile, cpp_pedantic))
> +	cpp_error (pfile, CPP_DL_PEDWARN,
> +		   "delimited escape sequences are only valid in C++23");
> +      delimited = false;
> +      extend_char_range (&char_range, loc_reader);
> +    }
> +
>     if (!digits_found)
>       {
>         cpp_error (pfile, CPP_DL_ERROR,
>   		 "\\x used with no following hex digits");
>         return from;
>       }
> +  else if (delimited)
> +    {
> +      cpp_error (pfile, CPP_DL_ERROR,
> +		 "'\\x{' not terminated with '}' after %.*s",
> +		 (int) (from - base), base);
> +      return from;
> +    }
>   
>     if (overflow | (n != (n & mask)))
>       {
> @@ -1459,25 +1553,71 @@ convert_oct (cpp_reader *pfile, const uc
>   	     cpp_substring_ranges *ranges)
>   {
>     size_t count = 0;
> -  cppchar_t c, n = 0;
> +  cppchar_t c, n = 0, overflow = 0;
>     size_t width = cvt.width;
>     size_t mask = width_to_mask (width);
> +  bool delimited = false;
> +  const uchar *base = from - 1;
>   
>     /* loc_reader and ranges must either be both NULL, or both be non-NULL.  */
>     gcc_assert ((loc_reader != NULL) == (ranges != NULL));
>   
> +  if (from < limit && *from == 'o')
> +    {
> +      from++;
> +      extend_char_range (&char_range, loc_reader);
> +      if (from == limit || *from != '{')
> +	cpp_error (pfile, CPP_DL_ERROR, "'\\o' not followed by '{'");
> +      else
> +	{
> +	  from++;
> +	  extend_char_range (&char_range, loc_reader);
> +	  delimited = true;
> +	}
> +    }
> +
>     while (from < limit && count++ < 3)
>       {
>         c = *from;
>         if (c < '0' || c > '7')
>   	break;
>         from++;
> -      if (loc_reader)
> -	char_range.m_finish = loc_reader->get_next ().m_finish;
> +      extend_char_range (&char_range, loc_reader);
> +      if (delimited)
> +	{
> +	  count = 2;
> +	  overflow |= n ^ (n << 3 >> 3);
> +	}
>         n = (n << 3) + c - '0';
>       }
>   
> -  if (n != (n & mask))
> +  if (delimited)
> +    {
> +      if (from < limit && *from == '}')
> +	{
> +	  from++;
> +	  if (count == 1)
> +	    {
> +	      cpp_error (pfile, CPP_DL_ERROR,
> +			 "empty delimited escape sequence");
> +	      return from;
> +	    }
> +	  else if (!CPP_OPTION (pfile, delimited_escape_seqs)
> +		   && CPP_OPTION (pfile, cpp_pedantic))
> +	    cpp_error (pfile, CPP_DL_PEDWARN,
> +		       "delimited escape sequences are only valid in C++23");
> +	  extend_char_range (&char_range, loc_reader);
> +	}
> +      else
> +	{
> +	  cpp_error (pfile, CPP_DL_ERROR,
> +		     "'\\o{' not terminated with '}' after %.*s",
> +		     (int) (from - base), base);
> +	  return from;
> +	}
> +    }
> +
> +  if (overflow | (n != (n & mask)))
>       {
>         cpp_error (pfile, CPP_DL_PEDWARN,
>   		 "octal escape sequence out of range");
> @@ -1535,6 +1675,7 @@ convert_escape (cpp_reader *pfile, const
>   
>       case '0':  case '1':  case '2':  case '3':
>       case '4':  case '5':  case '6':  case '7':
> +    case 'o':
>         return convert_oct (pfile, from, limit, tbuf, cvt,
>   			  char_range, loc_reader, ranges);
>   
> @@ -2119,15 +2260,27 @@ _cpp_interpret_identifier (cpp_reader *p
>   	cppchar_t value = 0;
>   	size_t bufleft = len - (bufp - buf);
>   	int rval;
> +	bool delimited = false;
>   
>   	idp += 2;
> +	if (length == 4 && id[idp] == '{')
> +	  {
> +	    delimited = true;
> +	    idp++;
> +	  }
>   	while (length && idp < len && ISXDIGIT (id[idp]))
>   	  {
>   	    value = (value << 4) + hex_value (id[idp]);
>   	    idp++;
> -	    length--;
> +	    if (!delimited)
> +	      length--;
>   	  }
> -	idp--;
> +	if (!delimited)
> +	  idp--;
> +        /* else
> +	     assert (id[idp] == '}');
> +	   As the caller ensures it is a valid identifier, if it is
> +	   delimited escape sequence, it must be terminated by }.  */
>   
>   	/* Special case for EBCDIC: if the identifier contains
>   	   a '$' specified using a UCN, translate it to EBCDIC.  */
> --- libcpp/lex.cc.jj	2022-05-23 10:59:06.235591348 +0200
> +++ libcpp/lex.cc	2022-08-16 11:57:53.772823661 +0200
> @@ -1426,19 +1426,35 @@ get_bidi_utf8 (cpp_reader *pfile, const
>   /* Parse a UCN where P points just past \u or \U and return its bidi code.  */
>   
>   static bidi::kind
> -get_bidi_ucn_1 (const unsigned char *p, bool is_U)
> +get_bidi_ucn_1 (const unsigned char *p, bool is_U, const unsigned char **end)
>   {
>     /* 6.4.3 Universal Character Names
>         \u hex-quad
>         \U hex-quad hex-quad
> +      \u { simple-hexadecimal-digit-sequence }
>        where \unnnn means \U0000nnnn.  */
>   
> +  *end = p + 4;
>     if (is_U)
>       {
>         if (p[0] != '0' || p[1] != '0' || p[2] != '0' || p[3] != '0')
>   	return bidi::kind::NONE;
>         /* Skip 4B so we can treat \u and \U the same below.  */
>         p += 4;
> +      *end += 4;
> +    }
> +  else if (p[0] == '{')
> +    {
> +      p++;
> +      while (*p == '0')
> +	p++;
> +      if (p[0] != '2'
> +	  || p[1] != '0'
> +	  || !ISXDIGIT (p[2])
> +	  || !ISXDIGIT (p[3])
> +	  || p[4] != '}')
> +	return bidi::kind::NONE;
> +      *end = p + 5;
>       }
>   
>     /* All code points we are looking for start with 20xx.  */
> @@ -1499,14 +1515,15 @@ get_bidi_ucn_1 (const unsigned char *p,
>      If the kind is not NONE, write the location to *OUT.*/
>   
>   static bidi::kind
> -get_bidi_ucn (cpp_reader *pfile,  const unsigned char *p, bool is_U,
> +get_bidi_ucn (cpp_reader *pfile, const unsigned char *p, bool is_U,
>   	      location_t *out)
>   {
> -  bidi::kind result = get_bidi_ucn_1 (p, is_U);
> +  const unsigned char *end;
> +  bidi::kind result = get_bidi_ucn_1 (p, is_U, &end);
>     if (result != bidi::kind::NONE)
>       {
>         const unsigned char *start = p - 2;
> -      size_t num_bytes = 2 + (is_U ? 8 : 4);
> +      size_t num_bytes = end - start;
>         *out = get_location_for_byte_range_in_cur_line (pfile, start, num_bytes);
>       }
>     return result;
> --- gcc/testsuite/c-c++-common/cpp/delimited-escape-seq-1.c.jj	2022-08-16 10:47:38.693022740 +0200
> +++ gcc/testsuite/c-c++-common/cpp/delimited-escape-seq-1.c	2022-08-16 12:18:42.235477632 +0200
> @@ -0,0 +1,92 @@
> +/* P2290R3 - Delimited escape sequences */
> +/* { dg-do run } */
> +/* { dg-require-effective-target wchar } */
> +/* { dg-options "-std=gnu99 -Wno-c++-compat" { target c } } */
> +/* { dg-options "-std=c++23" { target c++ } } */
> +
> +#ifndef __cplusplus
> +#include <wchar.h>
> +typedef __CHAR16_TYPE__ char16_t;
> +typedef __CHAR32_TYPE__ char32_t;
> +#endif
> +
> +const char32_t *a = U"\u{1234}\u{10fffd}\u{000000000000000000000000000000000000000000000000000000000001234}\u{10FFFD}";
> +const char32_t *b = U"\x{1234}\x{10fffd}\x{000000000000000000000000000000000000000000000000000000000001234}";
> +const char32_t *c = U"\o{1234}\o{4177775}\o{000000000000000000000000000000000000000000000000000000000000000000000000004177775}";
> +const char16_t *d = u"\u{1234}\u{bFFd}\u{00000000000000000000000000000001234}";
> +const char16_t *e = u"\x{1234}\x{BffD}\x{000001234}";
> +const char16_t *f = u"\o{1234}\o{137775}\o{000000000000000137775}";
> +const wchar_t *g = L"\u{1234}\u{bFFd}\u{00000000000000000000000000000001234}";
> +const wchar_t *h = L"\x{1234}\x{bFFd}\x{000001234}";
> +const wchar_t *i = L"\o{1234}\o{137775}\o{000000000000000137775}";
> +#ifdef __cplusplus
> +const char *j = "\u{34}\u{000000000000000003D}";
> +#endif
> +const char *k = "\x{34}\x{000000000000000003D}";
> +const char *l = "\o{34}\o{000000000000000176}";
> +
> +#if U'\u{1234}' != U'\u1234' || U'\u{10fffd}' != U'\U0010FFFD' \
> +    || U'\x{00000001234}' != U'\x1234' || U'\x{010fffd}' != U'\x10FFFD' \
> +    || U'\o{1234}' != U'\x29c' || U'\o{004177775}' != U'\x10FFFD' \
> +    || u'\u{1234}' != u'\u1234' || u'\u{0bffd}' != u'\uBFFD' \
> +    || u'\x{00000001234}' != u'\x1234' || u'\x{0Bffd}' != u'\x0bFFD' \
> +    || u'\o{1234}' != u'\x29c' || u'\o{00137775}' != u'\xBFFD' \
> +    || L'\u{1234}' != L'\u1234' || L'\u{0bffd}' != L'\uBFFD' \
> +    || L'\x{00000001234}' != L'\x1234' || L'\x{0bffd}' != L'\x0bFFD' \
> +    || L'\o{1234}' != L'\x29c' || L'\o{00137775}' != L'\xBFFD' \
> +    || '\x{34}' != '\x034' || '\x{0003d}' != '\x003D' \
> +    || '\o{34}' != '\x1C' || '\o{176}' != '\x007E'
> +#error Bad
> +#endif
> +#ifdef __cplusplus
> +#if '\u{0000000034}' != '\u0034' || '\u{3d}' != '\u003D'
> +#error Bad
> +#endif
> +#endif
> +
> +int
> +main ()
> +{
> +  if (a[0] != U'\u1234' || a[0] != U'\u{1234}'
> +      || a[1] != U'\U0010FFFD' || a[1] != U'\u{000010fFfD}'
> +      || a[2] != a[0]
> +      || a[3] != a[1]
> +      || b[0] != U'\x1234' || b[0] != U'\x{001234}'
> +      || b[1] != U'\x10FFFD' || b[1] != U'\x{0010fFfD}'
> +      || b[2] != b[0]
> +      || c[0] != U'\x29c' || c[0] != U'\o{001234}'
> +      || c[1] != U'\x10FFFD' || c[1] != U'\o{4177775}'
> +      || c[2] != c[1])
> +    __builtin_abort ();
> +  if (d[0] != u'\u1234' || d[0] != u'\u{1234}'
> +      || d[1] != u'\U0000BFFD' || d[1] != u'\u{00000bFfD}'
> +      || d[2] != d[0]
> +      || e[0] != u'\x1234' || e[0] != u'\x{001234}'
> +      || e[1] != u'\xBFFD' || e[1] != u'\x{00bFfD}'
> +      || e[2] != e[0]
> +      || f[0] != u'\x29c' || f[0] != u'\o{001234}'
> +      || f[1] != u'\xbFFD' || f[1] != u'\o{137775}'
> +      || f[2] != f[1])
> +    __builtin_abort ();
> +  if (g[0] != L'\u1234' || g[0] != L'\u{1234}'
> +      || g[1] != L'\U0000BFFD' || g[1] != L'\u{00000bFfD}'
> +      || g[2] != g[0]
> +      || h[0] != L'\x1234' || h[0] != L'\x{001234}'
> +      || h[1] != L'\xBFFD' || h[1] != L'\x{00bFfD}'
> +      || h[2] != h[0]
> +      || i[0] != L'\x29c' || i[0] != L'\o{001234}'
> +      || i[1] != L'\xbFFD' || i[1] != L'\o{137775}'
> +      || i[2] != i[1])
> +    __builtin_abort ();
> +#ifdef __cplusplus
> +  if (j[0] != '\u0034' || j[0] != '\u{034}'
> +      || j[1] != '\U0000003D' || j[1] != '\u{000003d}')
> +    __builtin_abort ();
> +#endif
> +  if (k[0] != '\x034' || k[0] != '\x{0034}'
> +      || k[1] != '\x3D' || k[1] != '\x{3d}'
> +      || l[0] != '\x1c' || l[0] != '\o{0034}'
> +      || l[1] != '\x07e' || l[1] != '\o{176}' || l[1] != '\176')
> +    __builtin_abort ();
> +  return 0;
> +}
> --- gcc/testsuite/c-c++-common/cpp/delimited-escape-seq-2.c.jj	2022-08-16 10:47:41.846981390 +0200
> +++ gcc/testsuite/c-c++-common/cpp/delimited-escape-seq-2.c	2022-08-16 12:18:58.807260607 +0200
> @@ -0,0 +1,18 @@
> +/* P2290R3 - Delimited escape sequences */
> +/* { dg-do compile } */
> +/* { dg-options "-std=gnu99 -Wno-c++-compat" { target c } } */
> +/* { dg-options "-std=c++23" { target c++ } } */
> +
> +int jalape\u{f1}o = 42;
> +
> +int
> +caf\u{000e9} (void)
> +{
> +  return jalape\u00F1o;
> +}
> +
> +int
> +test (void)
> +{
> +  return caf\u00e9 ();
> +}
> --- gcc/testsuite/c-c++-common/cpp/delimited-escape-seq-3.c.jj	2022-08-16 12:18:19.308777922 +0200
> +++ gcc/testsuite/c-c++-common/cpp/delimited-escape-seq-3.c	2022-08-16 12:41:23.693648138 +0200
> @@ -0,0 +1,33 @@
> +/* P2290R3 - Delimited escape sequences */
> +/* { dg-do compile } */
> +/* { dg-require-effective-target wchar } */
> +/* { dg-options "-std=gnu99 -Wno-c++-compat" { target c } } */
> +/* { dg-options "-std=c++23" { target c++ } } */
> +
> +#ifndef __cplusplus
> +typedef __CHAR32_TYPE__ char32_t;
> +#endif
> +
> +const char32_t *a = U"\u{}";				/* { dg-error "empty delimited escape sequence" } */
> +							/* { dg-error "is not a valid universal character" "" { target c } .-1 } */
> +const char32_t *b = U"\u{12" "34}";			/* { dg-error "'\\\\u\\{' not terminated with '\\}' after" } */
> +const char32_t *c = U"\u{0000ffffffff}";		/* { dg-error "is not a valid universal character" } */
> +const char32_t *d = U"\u{010000edcb}";			/* { dg-error "is not a valid universal character" } */
> +const char32_t *e = U"\u{02000000000000000000edcb}";	/* { dg-error "is not a valid universal character" } */
> +const char32_t *f = U"\u{123ghij}";			/* { dg-error "'\\\\u\\{' not terminated with '\\}' after" } */
> +const char32_t *g = U"\u{123.}";			/* { dg-error "'\\\\u\\{' not terminated with '\\}' after" } */
> +const char32_t *h = U"\u{.}";				/* { dg-error "'\\\\u\\{' not terminated with '\\}' after" } */
> +const char32_t *i = U"\x{}";				/* { dg-error "empty delimited escape sequence" } */
> +const char32_t *j = U"\x{12" "34}";			/* { dg-error "'\\\\x\\{' not terminated with '\\}' after" } */
> +const char32_t *k = U"\x{0000ffffffff}";
> +const char32_t *l = U"\x{010000edcb}";			/* { dg-warning "hex escape sequence out of range" } */
> +const char32_t *m = U"\x{02000000000000000000edcb}";	/* { dg-warning "hex escape sequence out of range" } */
> +const char32_t *n = U"\x{123ghij}";			/* { dg-error "'\\\\x\\{' not terminated with '\\}' after" } */
> +const char32_t *o = U"\x{123.}";			/* { dg-error "'\\\\x\\{' not terminated with '\\}' after" } */
> +const char32_t *p = U"\o{}";				/* { dg-error "empty delimited escape sequence" } */
> +const char32_t *q = U"\o{12" "34}";			/* { dg-error "'\\\\o\\{' not terminated with '\\}' after" } */
> +const char32_t *r = U"\o{0000037777777777}";
> +const char32_t *s = U"\o{040000166713}";		/* { dg-warning "octal escape sequence out of range" } */
> +const char32_t *t = U"\o{02000000000000000000000166713}";/* { dg-warning "octal escape sequence out of range" } */
> +const char32_t *u = U"\o{1238}";			/* { dg-error "'\\\\o\\{' not terminated with '\\}' after" } */
> +const char32_t *v = U"\o{.}";				/* { dg-error "'\\\\o\\{' not terminated with '\\}' after" } */
> --- gcc/testsuite/c-c++-common/Wbidi-chars-24.c.jj	2022-08-16 12:03:19.350561676 +0200
> +++ gcc/testsuite/c-c++-common/Wbidi-chars-24.c	2022-08-16 12:06:46.381851525 +0200
> @@ -0,0 +1,28 @@
> +/* PR preprocessor/103026 */
> +/* { dg-do compile } */
> +/* { dg-options "-Wbidi-chars=ucn,unpaired" } */
> +/* Test nesting of bidi chars in various contexts.  */
> +
> +void
> +g1 ()
> +{
> +  const char *s1 = "a b c LRE\u{202a} 1 2 3 PDI\u{00000000000000000000000002069} x y z";
> +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
> +  const char *s2 = "a b c RLE\u{00202b} 1 2 3 PDI\u{2069} x y z";
> +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
> +  const char *s3 = "a b c LRO\u{000000202d} 1 2 3 PDI\u{02069} x y z";
> +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
> +  const char *s4 = "a b c RLO\u{202e} 1 2 3 PDI\u{00000002069} x y z";
> +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
> +  const char *s5 = "a b c LRI\u{002066} 1 2 3 PDF\u{202C} x y z";
> +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
> +  const char *s6 = "a b c RLI\u{02067} 1 2 3 PDF\u{202c} x y z";
> +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
> +  const char *s7 = "a b c FSI\u{0002068} 1 2 3 PDF\u{0202c} x y z";
> +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
> +}
> +
> +int A\u{202a}B\u{2069}C;
> +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
> +int a\u{00000202b}B\u{000000002069}c;
> +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
> --- gcc/testsuite/gcc.dg/cpp/delimited-escape-seq-1.c.jj	2022-08-16 10:47:38.693022740 +0200
> +++ gcc/testsuite/gcc.dg/cpp/delimited-escape-seq-1.c	2022-08-16 12:46:56.508291006 +0200
> @@ -0,0 +1,10 @@
> +/* P2290R3 - Delimited escape sequences */
> +/* { dg-do compile } */
> +/* { dg-require-effective-target wchar } */
> +/* { dg-options "-std=gnu99 -Wno-c++-compat -pedantic" } */
> +
> +typedef __CHAR32_TYPE__ char32_t;
> +
> +const char32_t *a = U"\u{1234}";	/* { dg-warning "delimited escape sequences are only valid in" } */
> +const char32_t *b = U"\x{1234}";	/* { dg-warning "delimited escape sequences are only valid in" } */
> +const char32_t *c = U"\o{1234}";	/* { dg-warning "delimited escape sequences are only valid in" } */
> --- gcc/testsuite/gcc.dg/cpp/delimited-escape-seq-2.c.jj	2022-08-16 10:47:41.846981390 +0200
> +++ gcc/testsuite/gcc.dg/cpp/delimited-escape-seq-2.c	2022-08-16 12:47:05.955167423 +0200
> @@ -0,0 +1,10 @@
> +/* P2290R3 - Delimited escape sequences */
> +/* { dg-do compile } */
> +/* { dg-require-effective-target wchar } */
> +/* { dg-options "-std=gnu99 -Wno-c++-compat -pedantic-errors" } */
> +
> +typedef __CHAR32_TYPE__ char32_t;
> +
> +const char32_t *a = U"\u{1234}";	/* { dg-error "delimited escape sequences are only valid in" } */
> +const char32_t *b = U"\x{1234}";	/* { dg-error "delimited escape sequences are only valid in" } */
> +const char32_t *c = U"\o{1234}";	/* { dg-error "delimited escape sequences are only valid in" } */
> --- gcc/testsuite/g++.dg/cpp/delimited-escape-seq-1.C.jj	2022-08-16 12:46:43.368462901 +0200
> +++ gcc/testsuite/g++.dg/cpp/delimited-escape-seq-1.C	2022-08-16 12:49:21.532393786 +0200
> @@ -0,0 +1,8 @@
> +// P2290R3 - Delimited escape sequences
> +// { dg-do compile { target c++11 } }
> +// { dg-require-effective-target wchar }
> +// { dg-options "-pedantic" }
> +
> +const char32_t *a = U"\u{1234}";	// { dg-warning "delimited escape sequences are only valid in" "" { target c++20_down } }
> +const char32_t *b = U"\x{1234}";	// { dg-warning "delimited escape sequences are only valid in" "" { target c++20_down } }
> +const char32_t *c = U"\o{1234}";	// { dg-warning "delimited escape sequences are only valid in" "" { target c++20_down } }
> --- gcc/testsuite/g++.dg/cpp/delimited-escape-seq-2.C.jj	2022-08-16 12:46:46.281424798 +0200
> +++ gcc/testsuite/g++.dg/cpp/delimited-escape-seq-2.C	2022-08-16 12:49:33.761233803 +0200
> @@ -0,0 +1,8 @@
> +// P2290R3 - Delimited escape sequences
> +// { dg-do compile { target c++11 } }
> +// { dg-require-effective-target wchar }
> +// { dg-options "-pedantic-errors" }
> +
> +const char32_t *a = U"\u{1234}";	// { dg-error "delimited escape sequences are only valid in" "" { target c++20_down } }
> +const char32_t *b = U"\x{1234}";	// { dg-error "delimited escape sequences are only valid in" "" { target c++20_down } }
> +const char32_t *c = U"\o{1234}";	// { dg-error "delimited escape sequences are only valid in" "" { target c++20_down } }
> 
> 
> 	Jakub
> 


^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2022-08-19  0:34 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-08-17  7:17 [PATCH] libcpp: Implement C++23 P2290R3 - Delimited escape sequences [PR106645] Jakub Jelinek
2022-08-17 20:47 ` Jason Merrill
2022-08-17 21:19   ` Jakub Jelinek
2022-08-18  2:22     ` Jason Merrill
2022-08-18  8:17       ` [PATCH] libcpp, v2: " Jakub Jelinek
2022-08-19  0:34         ` Jason Merrill

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).