public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] c++: Reject UDLs in certain contexts [PR105300]
@ 2022-11-12 16:53 Marek Polacek
  2022-11-15 23:58 ` Jason Merrill
  0 siblings, 1 reply; 13+ messages in thread
From: Marek Polacek @ 2022-11-12 16:53 UTC (permalink / raw)
  To: GCC Patches, Jason Merrill

In this PR, we are crashing because we've encountered a UDL where a
string-literal is expected.  This patch makes the parser reject string
and character UDLs in all places where the grammar requires a
string-literal and not a user-defined-string-literal.

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

	PR c++/105300

gcc/c-family/ChangeLog:

	* c-pragma.cc (handle_pragma_message): Warn for CPP_STRING_USERDEF.

gcc/cp/ChangeLog:

	* parser.cc (cp_parser_string_literal): Add a bool parameter.
	Give an error when UDLs are not permitted.
	(cp_parser_primary_expression): Adjust the call to
	cp_parser_string_literal.
	(cp_parser_linkage_specification): Likewise.
	(cp_parser_static_assert): Likewise.
	(cp_parser_operator): Likewise.
	(cp_parser_asm_definition): Likewise.
	(cp_parser_asm_specification_opt): Likewise.
	(cp_parser_asm_operand_list): Likewise.
	(cp_parser_asm_clobber_list): Likewise.
	(cp_parser_omp_context_selector): Likewise.
	(pragma_lex): Likewise.

gcc/testsuite/ChangeLog:

	* g++.dg/cpp0x/udlit-error1.C: New test.
---
 gcc/c-family/c-pragma.cc                  |  3 +
 gcc/cp/parser.cc                          | 69 ++++++++++++++---------
 gcc/testsuite/g++.dg/cpp0x/udlit-error1.C | 21 +++++++
 3 files changed, 65 insertions(+), 28 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp0x/udlit-error1.C

diff --git a/gcc/c-family/c-pragma.cc b/gcc/c-family/c-pragma.cc
index 142a46441ac..49f405b605b 100644
--- a/gcc/c-family/c-pragma.cc
+++ b/gcc/c-family/c-pragma.cc
@@ -1390,6 +1390,9 @@ handle_pragma_message (cpp_reader *)
     }
   else if (token == CPP_STRING)
     message = x;
+  else if (token == CPP_STRING_USERDEF)
+    GCC_BAD ("string literal with user-defined suffix is invalid in this "
+	     "context");
   else
     GCC_BAD ("expected a string after %<#pragma message%>");
 
diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc
index e4021835ed5..ae2798e2a33 100644
--- a/gcc/cp/parser.cc
+++ b/gcc/cp/parser.cc
@@ -2226,7 +2226,7 @@ pop_unparsed_function_queues (cp_parser *parser)
 static cp_expr cp_parser_identifier
   (cp_parser *);
 static cp_expr cp_parser_string_literal
-  (cp_parser *, bool, bool, bool);
+  (cp_parser *, bool, bool, bool, bool);
 static cp_expr cp_parser_userdef_char_literal
   (cp_parser *);
 static tree cp_parser_userdef_string_literal
@@ -4402,7 +4402,8 @@ cp_parser_identifier (cp_parser* parser)
    TREE_STRING representing the combined, nul-terminated string
    constant.  If TRANSLATE is true, translate the string to the
    execution character set.  If WIDE_OK is true, a wide string is
-   invalid here.
+   valid here.  If UDL_OK is true, a string literal with user-defined
+   suffix can be used in this context.
 
    C++98 [lex.string] says that if a narrow string literal token is
    adjacent to a wide string literal token, the behavior is undefined.
@@ -4414,7 +4415,7 @@ cp_parser_identifier (cp_parser* parser)
    FUTURE: ObjC++ will need to handle @-strings here.  */
 static cp_expr
 cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
-			  bool lookup_udlit = true)
+			  bool udl_ok, bool lookup_udlit = true)
 {
   tree value;
   size_t count;
@@ -4439,6 +4440,12 @@ cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
 
   if (cpp_userdef_string_p (tok->type))
     {
+      if (!udl_ok)
+	{
+	  error_at (loc, "string literal with user-defined suffix "
+		    "is invalid in this context");
+	  return error_mark_node;
+	}
       string_tree = USERDEF_LITERAL_VALUE (tok->u.value);
       curr_type = cpp_userdef_string_remove_type (tok->type);
       curr_tok_is_userdef_p = true;
@@ -5655,7 +5662,7 @@ cp_parser_primary_expression (cp_parser *parser,
 	 argument to cp_parser_string_literal.  */
       return (cp_parser_string_literal (parser,
 					parser->translate_strings_p,
-					true)
+					/*wide_ok=*/true, /*udl_ok=*/true)
 	      .maybe_add_location_wrapper ());
 
     case CPP_OPEN_PAREN:
@@ -16161,15 +16168,14 @@ cp_parser_function_specifier_opt (cp_parser* parser,
 static void
 cp_parser_linkage_specification (cp_parser* parser, tree prefix_attr)
 {
-  tree linkage;
-
   /* Look for the `extern' keyword.  */
   cp_token *extern_token
     = cp_parser_require_keyword (parser, RID_EXTERN, RT_EXTERN);
 
   /* Look for the string-literal.  */
   cp_token *string_token = cp_lexer_peek_token (parser->lexer);
-  linkage = cp_parser_string_literal (parser, false, false);
+  tree linkage = cp_parser_string_literal (parser, /*translate=*/false,
+					   /*wide_ok=*/false, /*udl_ok=*/false);
 
   /* Transform the literal into an identifier.  If the literal is a
      wide-character string, or contains embedded NULs, then we can't
@@ -16300,8 +16306,8 @@ cp_parser_static_assert(cp_parser *parser, bool member_p)
 
       /* Parse the string-literal message.  */
       message = cp_parser_string_literal (parser,
-                                	  /*translate=*/false,
-                                	  /*wide_ok=*/true);
+					  /*translate=*/false, /*wide_ok=*/true,
+					  /*udl_ok=*/false);
 
       /* A `)' completes the static assertion.  */
       if (!parens.require_close (parser))
@@ -17349,7 +17355,6 @@ cp_parser_operator (cp_parser* parser, location_t start_loc)
     case CPP_STRING16_USERDEF:
     case CPP_STRING32_USERDEF:
       {
-	cp_expr str;
 	tree string_tree;
 	int sz, len;
 
@@ -17357,8 +17362,10 @@ cp_parser_operator (cp_parser* parser, location_t start_loc)
 	  maybe_warn_cpp0x (CPP0X_USER_DEFINED_LITERALS);
 
 	/* Consume the string.  */
-	str = cp_parser_string_literal (parser, /*translate=*/true,
-				      /*wide_ok=*/true, /*lookup_udlit=*/false);
+	cp_expr str = cp_parser_string_literal (parser, /*translate=*/true,
+						/*wide_ok=*/true,
+						/*udl_ok=*/true,
+						/*lookup_udlit=*/false);
 	if (str == error_mark_node)
 	  return error_mark_node;
 	else if (TREE_CODE (str) == USERDEF_LITERAL)
@@ -21975,7 +21982,6 @@ cp_parser_using_directive (cp_parser* parser)
 static void
 cp_parser_asm_definition (cp_parser* parser)
 {
-  tree string;
   tree outputs = NULL_TREE;
   tree inputs = NULL_TREE;
   tree clobbers = NULL_TREE;
@@ -22083,7 +22089,8 @@ cp_parser_asm_definition (cp_parser* parser)
   if (!cp_parser_require (parser, CPP_OPEN_PAREN, RT_OPEN_PAREN))
     return;
   /* Look for the string.  */
-  string = cp_parser_string_literal (parser, false, false);
+  tree string = cp_parser_string_literal (parser, /*translate=*/false,
+					  /*wide_ok=*/false, /*udl_ok=*/false);
   if (string == error_mark_node)
     {
       cp_parser_skip_to_closing_parenthesis (parser, true, false,
@@ -28485,11 +28492,8 @@ cp_parser_yield_expression (cp_parser* parser)
 static tree
 cp_parser_asm_specification_opt (cp_parser* parser)
 {
-  cp_token *token;
-  tree asm_specification;
-
   /* Peek at the next token.  */
-  token = cp_lexer_peek_token (parser->lexer);
+  cp_token *token = cp_lexer_peek_token (parser->lexer);
   /* If the next token isn't the `asm' keyword, then there's no
      asm-specification.  */
   if (!cp_parser_is_keyword (token, RID_ASM))
@@ -28502,7 +28506,10 @@ cp_parser_asm_specification_opt (cp_parser* parser)
   parens.require_open (parser);
 
   /* Look for the string-literal.  */
-  asm_specification = cp_parser_string_literal (parser, false, false);
+  tree asm_specification = cp_parser_string_literal (parser,
+						     /*translate=*/false,
+						     /*wide_ok=*/false,
+						     /*udl_ok=*/false);
 
   /* Look for the `)'.  */
   parens.require_close (parser);
@@ -28535,8 +28542,6 @@ cp_parser_asm_operand_list (cp_parser* parser)
 
   while (true)
     {
-      tree string_literal;
-      tree expression;
       tree name;
 
       if (cp_lexer_next_token_is (parser->lexer, CPP_OPEN_SQUARE))
@@ -28554,13 +28559,16 @@ cp_parser_asm_operand_list (cp_parser* parser)
       else
 	name = NULL_TREE;
       /* Look for the string-literal.  */
-      string_literal = cp_parser_string_literal (parser, false, false);
+      tree string_literal = cp_parser_string_literal (parser,
+						      /*translate=*/false,
+						      /*wide_ok=*/false,
+						      /*udl_ok=*/false);
 
       /* Look for the `('.  */
       matching_parens parens;
       parens.require_open (parser);
       /* Parse the expression.  */
-      expression = cp_parser_expression (parser);
+      tree expression = cp_parser_expression (parser);
       /* Look for the `)'.  */
       parens.require_close (parser);
 
@@ -28600,10 +28608,11 @@ cp_parser_asm_clobber_list (cp_parser* parser)
 
   while (true)
     {
-      tree string_literal;
-
       /* Look for the string literal.  */
-      string_literal = cp_parser_string_literal (parser, false, false);
+      tree string_literal = cp_parser_string_literal (parser,
+						      /*translate=*/false,
+						      /*wide_ok=*/false,
+						      /*udl_ok=*/false);
       /* Add it to the list.  */
       clobbers = tree_cons (NULL_TREE, string_literal, clobbers);
       /* If the next token is not a `,', then the list is
@@ -45761,7 +45770,10 @@ cp_parser_omp_context_selector (cp_parser *parser, tree set, bool has_parms_p)
 		      cp_lexer_consume_token (parser->lexer);
 		    }
 		  else if (cp_lexer_next_token_is (parser->lexer, CPP_STRING))
-		    value = cp_parser_string_literal (parser, false, false);
+		    value = cp_parser_string_literal (parser,
+						      /*translate=*/false,
+						      /*wide_ok=*/false,
+						      /*udl_ok=*/false);
 		  else
 		    {
 		      cp_parser_error (parser, "expected identifier or "
@@ -48783,7 +48795,8 @@ pragma_lex (tree *value, location_t *loc)
   if (ret == CPP_PRAGMA_EOL)
     ret = CPP_EOF;
   else if (ret == CPP_STRING)
-    *value = cp_parser_string_literal (the_parser, false, false);
+    *value = cp_parser_string_literal (the_parser, /*translate=*/false,
+				       /*wide_ok=*/false, /*udl_ok=*/false);
   else
     {
       if (ret == CPP_KEYWORD)
diff --git a/gcc/testsuite/g++.dg/cpp0x/udlit-error1.C b/gcc/testsuite/g++.dg/cpp0x/udlit-error1.C
new file mode 100644
index 00000000000..66e300e350f
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/udlit-error1.C
@@ -0,0 +1,21 @@
+// PR c++/105300
+// { dg-do compile { target c++11 } }
+
+void operator""_x(const char *, decltype(sizeof(0)));
+
+#include ""_x		  // { dg-error "include expects" }
+#line ""_x		  // { dg-error "not a positive integer" }
+#if __has_include(""_x)	  // { dg-error "requires a header-name" }
+#endif
+
+#pragma message "hi"_x	  // { dg-warning "string literal with user-defined suffix is invalid in this context" }
+
+extern "C"_x { void g(); } // { dg-error "before user-defined string literal" }
+static_assert(true, "foo"_x); // { dg-error "string literal with user-defined suffix is invalid in this context|expected" }
+
+[[deprecated("oof"_x)]]
+void
+lol () // { dg-error "not a string" }
+{
+  asm (""_x); // { dg-error "string literal with user-defined suffix is invalid in this context" }
+}

base-commit: f232715d15618e91c90eb210e23de10909590944
-- 
2.38.1


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] c++: Reject UDLs in certain contexts [PR105300]
  2022-11-12 16:53 [PATCH] c++: Reject UDLs in certain contexts [PR105300] Marek Polacek
@ 2022-11-15 23:58 ` Jason Merrill
  2022-11-16  0:35   ` Marek Polacek
  0 siblings, 1 reply; 13+ messages in thread
From: Jason Merrill @ 2022-11-15 23:58 UTC (permalink / raw)
  To: Marek Polacek, GCC Patches

On 11/12/22 06:53, Marek Polacek wrote:
> In this PR, we are crashing because we've encountered a UDL where a
> string-literal is expected.  This patch makes the parser reject string
> and character UDLs in all places where the grammar requires a
> string-literal and not a user-defined-string-literal.
> 
> Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

Since the grammar has

user-defined-string-literal :
	string-literal ud-suffix

maybe we want to move the UDL handling out to a 
cp_parser_udl_string_literal that calls cp_parser_string_literal?

> 	PR c++/105300
> 
> gcc/c-family/ChangeLog:
> 
> 	* c-pragma.cc (handle_pragma_message): Warn for CPP_STRING_USERDEF.
> 
> gcc/cp/ChangeLog:
> 
> 	* parser.cc (cp_parser_string_literal): Add a bool parameter.
> 	Give an error when UDLs are not permitted.
> 	(cp_parser_primary_expression): Adjust the call to
> 	cp_parser_string_literal.
> 	(cp_parser_linkage_specification): Likewise.
> 	(cp_parser_static_assert): Likewise.
> 	(cp_parser_operator): Likewise.
> 	(cp_parser_asm_definition): Likewise.
> 	(cp_parser_asm_specification_opt): Likewise.
> 	(cp_parser_asm_operand_list): Likewise.
> 	(cp_parser_asm_clobber_list): Likewise.
> 	(cp_parser_omp_context_selector): Likewise.
> 	(pragma_lex): Likewise.
> 
> gcc/testsuite/ChangeLog:
> 
> 	* g++.dg/cpp0x/udlit-error1.C: New test.
> ---
>   gcc/c-family/c-pragma.cc                  |  3 +
>   gcc/cp/parser.cc                          | 69 ++++++++++++++---------
>   gcc/testsuite/g++.dg/cpp0x/udlit-error1.C | 21 +++++++
>   3 files changed, 65 insertions(+), 28 deletions(-)
>   create mode 100644 gcc/testsuite/g++.dg/cpp0x/udlit-error1.C
> 
> diff --git a/gcc/c-family/c-pragma.cc b/gcc/c-family/c-pragma.cc
> index 142a46441ac..49f405b605b 100644
> --- a/gcc/c-family/c-pragma.cc
> +++ b/gcc/c-family/c-pragma.cc
> @@ -1390,6 +1390,9 @@ handle_pragma_message (cpp_reader *)
>       }
>     else if (token == CPP_STRING)
>       message = x;
> +  else if (token == CPP_STRING_USERDEF)
> +    GCC_BAD ("string literal with user-defined suffix is invalid in this "
> +	     "context");
>     else
>       GCC_BAD ("expected a string after %<#pragma message%>");
>   
> diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc
> index e4021835ed5..ae2798e2a33 100644
> --- a/gcc/cp/parser.cc
> +++ b/gcc/cp/parser.cc
> @@ -2226,7 +2226,7 @@ pop_unparsed_function_queues (cp_parser *parser)
>   static cp_expr cp_parser_identifier
>     (cp_parser *);
>   static cp_expr cp_parser_string_literal
> -  (cp_parser *, bool, bool, bool);
> +  (cp_parser *, bool, bool, bool, bool);
>   static cp_expr cp_parser_userdef_char_literal
>     (cp_parser *);
>   static tree cp_parser_userdef_string_literal
> @@ -4402,7 +4402,8 @@ cp_parser_identifier (cp_parser* parser)
>      TREE_STRING representing the combined, nul-terminated string
>      constant.  If TRANSLATE is true, translate the string to the
>      execution character set.  If WIDE_OK is true, a wide string is
> -   invalid here.
> +   valid here.  If UDL_OK is true, a string literal with user-defined
> +   suffix can be used in this context.
>   
>      C++98 [lex.string] says that if a narrow string literal token is
>      adjacent to a wide string literal token, the behavior is undefined.
> @@ -4414,7 +4415,7 @@ cp_parser_identifier (cp_parser* parser)
>      FUTURE: ObjC++ will need to handle @-strings here.  */
>   static cp_expr
>   cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
> -			  bool lookup_udlit = true)
> +			  bool udl_ok, bool lookup_udlit = true)
>   {
>     tree value;
>     size_t count;
> @@ -4439,6 +4440,12 @@ cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
>   
>     if (cpp_userdef_string_p (tok->type))
>       {
> +      if (!udl_ok)
> +	{
> +	  error_at (loc, "string literal with user-defined suffix "
> +		    "is invalid in this context");
> +	  return error_mark_node;
> +	}
>         string_tree = USERDEF_LITERAL_VALUE (tok->u.value);
>         curr_type = cpp_userdef_string_remove_type (tok->type);
>         curr_tok_is_userdef_p = true;
> @@ -5655,7 +5662,7 @@ cp_parser_primary_expression (cp_parser *parser,
>   	 argument to cp_parser_string_literal.  */
>         return (cp_parser_string_literal (parser,
>   					parser->translate_strings_p,
> -					true)
> +					/*wide_ok=*/true, /*udl_ok=*/true)
>   	      .maybe_add_location_wrapper ());
>   
>       case CPP_OPEN_PAREN:
> @@ -16161,15 +16168,14 @@ cp_parser_function_specifier_opt (cp_parser* parser,
>   static void
>   cp_parser_linkage_specification (cp_parser* parser, tree prefix_attr)
>   {
> -  tree linkage;
> -
>     /* Look for the `extern' keyword.  */
>     cp_token *extern_token
>       = cp_parser_require_keyword (parser, RID_EXTERN, RT_EXTERN);
>   
>     /* Look for the string-literal.  */
>     cp_token *string_token = cp_lexer_peek_token (parser->lexer);
> -  linkage = cp_parser_string_literal (parser, false, false);
> +  tree linkage = cp_parser_string_literal (parser, /*translate=*/false,
> +					   /*wide_ok=*/false, /*udl_ok=*/false);
>   
>     /* Transform the literal into an identifier.  If the literal is a
>        wide-character string, or contains embedded NULs, then we can't
> @@ -16300,8 +16306,8 @@ cp_parser_static_assert(cp_parser *parser, bool member_p)
>   
>         /* Parse the string-literal message.  */
>         message = cp_parser_string_literal (parser,
> -                                	  /*translate=*/false,
> -                                	  /*wide_ok=*/true);
> +					  /*translate=*/false, /*wide_ok=*/true,
> +					  /*udl_ok=*/false);
>   
>         /* A `)' completes the static assertion.  */
>         if (!parens.require_close (parser))
> @@ -17349,7 +17355,6 @@ cp_parser_operator (cp_parser* parser, location_t start_loc)
>       case CPP_STRING16_USERDEF:
>       case CPP_STRING32_USERDEF:
>         {
> -	cp_expr str;
>   	tree string_tree;
>   	int sz, len;
>   
> @@ -17357,8 +17362,10 @@ cp_parser_operator (cp_parser* parser, location_t start_loc)
>   	  maybe_warn_cpp0x (CPP0X_USER_DEFINED_LITERALS);
>   
>   	/* Consume the string.  */
> -	str = cp_parser_string_literal (parser, /*translate=*/true,
> -				      /*wide_ok=*/true, /*lookup_udlit=*/false);
> +	cp_expr str = cp_parser_string_literal (parser, /*translate=*/true,
> +						/*wide_ok=*/true,
> +						/*udl_ok=*/true,
> +						/*lookup_udlit=*/false);
>   	if (str == error_mark_node)
>   	  return error_mark_node;
>   	else if (TREE_CODE (str) == USERDEF_LITERAL)
> @@ -21975,7 +21982,6 @@ cp_parser_using_directive (cp_parser* parser)
>   static void
>   cp_parser_asm_definition (cp_parser* parser)
>   {
> -  tree string;
>     tree outputs = NULL_TREE;
>     tree inputs = NULL_TREE;
>     tree clobbers = NULL_TREE;
> @@ -22083,7 +22089,8 @@ cp_parser_asm_definition (cp_parser* parser)
>     if (!cp_parser_require (parser, CPP_OPEN_PAREN, RT_OPEN_PAREN))
>       return;
>     /* Look for the string.  */
> -  string = cp_parser_string_literal (parser, false, false);
> +  tree string = cp_parser_string_literal (parser, /*translate=*/false,
> +					  /*wide_ok=*/false, /*udl_ok=*/false);
>     if (string == error_mark_node)
>       {
>         cp_parser_skip_to_closing_parenthesis (parser, true, false,
> @@ -28485,11 +28492,8 @@ cp_parser_yield_expression (cp_parser* parser)
>   static tree
>   cp_parser_asm_specification_opt (cp_parser* parser)
>   {
> -  cp_token *token;
> -  tree asm_specification;
> -
>     /* Peek at the next token.  */
> -  token = cp_lexer_peek_token (parser->lexer);
> +  cp_token *token = cp_lexer_peek_token (parser->lexer);
>     /* If the next token isn't the `asm' keyword, then there's no
>        asm-specification.  */
>     if (!cp_parser_is_keyword (token, RID_ASM))
> @@ -28502,7 +28506,10 @@ cp_parser_asm_specification_opt (cp_parser* parser)
>     parens.require_open (parser);
>   
>     /* Look for the string-literal.  */
> -  asm_specification = cp_parser_string_literal (parser, false, false);
> +  tree asm_specification = cp_parser_string_literal (parser,
> +						     /*translate=*/false,
> +						     /*wide_ok=*/false,
> +						     /*udl_ok=*/false);
>   
>     /* Look for the `)'.  */
>     parens.require_close (parser);
> @@ -28535,8 +28542,6 @@ cp_parser_asm_operand_list (cp_parser* parser)
>   
>     while (true)
>       {
> -      tree string_literal;
> -      tree expression;
>         tree name;
>   
>         if (cp_lexer_next_token_is (parser->lexer, CPP_OPEN_SQUARE))
> @@ -28554,13 +28559,16 @@ cp_parser_asm_operand_list (cp_parser* parser)
>         else
>   	name = NULL_TREE;
>         /* Look for the string-literal.  */
> -      string_literal = cp_parser_string_literal (parser, false, false);
> +      tree string_literal = cp_parser_string_literal (parser,
> +						      /*translate=*/false,
> +						      /*wide_ok=*/false,
> +						      /*udl_ok=*/false);
>   
>         /* Look for the `('.  */
>         matching_parens parens;
>         parens.require_open (parser);
>         /* Parse the expression.  */
> -      expression = cp_parser_expression (parser);
> +      tree expression = cp_parser_expression (parser);
>         /* Look for the `)'.  */
>         parens.require_close (parser);
>   
> @@ -28600,10 +28608,11 @@ cp_parser_asm_clobber_list (cp_parser* parser)
>   
>     while (true)
>       {
> -      tree string_literal;
> -
>         /* Look for the string literal.  */
> -      string_literal = cp_parser_string_literal (parser, false, false);
> +      tree string_literal = cp_parser_string_literal (parser,
> +						      /*translate=*/false,
> +						      /*wide_ok=*/false,
> +						      /*udl_ok=*/false);
>         /* Add it to the list.  */
>         clobbers = tree_cons (NULL_TREE, string_literal, clobbers);
>         /* If the next token is not a `,', then the list is
> @@ -45761,7 +45770,10 @@ cp_parser_omp_context_selector (cp_parser *parser, tree set, bool has_parms_p)
>   		      cp_lexer_consume_token (parser->lexer);
>   		    }
>   		  else if (cp_lexer_next_token_is (parser->lexer, CPP_STRING))
> -		    value = cp_parser_string_literal (parser, false, false);
> +		    value = cp_parser_string_literal (parser,
> +						      /*translate=*/false,
> +						      /*wide_ok=*/false,
> +						      /*udl_ok=*/false);
>   		  else
>   		    {
>   		      cp_parser_error (parser, "expected identifier or "
> @@ -48783,7 +48795,8 @@ pragma_lex (tree *value, location_t *loc)
>     if (ret == CPP_PRAGMA_EOL)
>       ret = CPP_EOF;
>     else if (ret == CPP_STRING)
> -    *value = cp_parser_string_literal (the_parser, false, false);
> +    *value = cp_parser_string_literal (the_parser, /*translate=*/false,
> +				       /*wide_ok=*/false, /*udl_ok=*/false);
>     else
>       {
>         if (ret == CPP_KEYWORD)
> diff --git a/gcc/testsuite/g++.dg/cpp0x/udlit-error1.C b/gcc/testsuite/g++.dg/cpp0x/udlit-error1.C
> new file mode 100644
> index 00000000000..66e300e350f
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/cpp0x/udlit-error1.C
> @@ -0,0 +1,21 @@
> +// PR c++/105300
> +// { dg-do compile { target c++11 } }
> +
> +void operator""_x(const char *, decltype(sizeof(0)));
> +
> +#include ""_x		  // { dg-error "include expects" }
> +#line ""_x		  // { dg-error "not a positive integer" }
> +#if __has_include(""_x)	  // { dg-error "requires a header-name" }
> +#endif
> +
> +#pragma message "hi"_x	  // { dg-warning "string literal with user-defined suffix is invalid in this context" }
> +
> +extern "C"_x { void g(); } // { dg-error "before user-defined string literal" }
> +static_assert(true, "foo"_x); // { dg-error "string literal with user-defined suffix is invalid in this context|expected" }
> +
> +[[deprecated("oof"_x)]]
> +void
> +lol () // { dg-error "not a string" }
> +{
> +  asm (""_x); // { dg-error "string literal with user-defined suffix is invalid in this context" }
> +}
> 
> base-commit: f232715d15618e91c90eb210e23de10909590944


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] c++: Reject UDLs in certain contexts [PR105300]
  2022-11-15 23:58 ` Jason Merrill
@ 2022-11-16  0:35   ` Marek Polacek
  2022-11-16 13:22     ` Jason Merrill
  0 siblings, 1 reply; 13+ messages in thread
From: Marek Polacek @ 2022-11-16  0:35 UTC (permalink / raw)
  To: Jason Merrill; +Cc: GCC Patches

On Tue, Nov 15, 2022 at 06:58:39PM -0500, Jason Merrill wrote:
> On 11/12/22 06:53, Marek Polacek wrote:
> > In this PR, we are crashing because we've encountered a UDL where a
> > string-literal is expected.  This patch makes the parser reject string
> > and character UDLs in all places where the grammar requires a
> > string-literal and not a user-defined-string-literal.
> > 
> > Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?
> 
> Since the grammar has
> 
> user-defined-string-literal :
> 	string-literal ud-suffix
> 
> maybe we want to move the UDL handling out to a cp_parser_udl_string_literal
> that calls cp_parser_string_literal?

Umm, maybe, but the UDL handling code seems to be too entrenched in
cp_parser_string_literal and I don't think it's going to be easy to extract
it :/.

Marek


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] c++: Reject UDLs in certain contexts [PR105300]
  2022-11-16  0:35   ` Marek Polacek
@ 2022-11-16 13:22     ` Jason Merrill
  2022-11-17  1:12       ` Marek Polacek
  0 siblings, 1 reply; 13+ messages in thread
From: Jason Merrill @ 2022-11-16 13:22 UTC (permalink / raw)
  To: Marek Polacek; +Cc: GCC Patches

On 11/15/22 19:35, Marek Polacek wrote:
> On Tue, Nov 15, 2022 at 06:58:39PM -0500, Jason Merrill wrote:
>> On 11/12/22 06:53, Marek Polacek wrote:
>>> In this PR, we are crashing because we've encountered a UDL where a
>>> string-literal is expected.  This patch makes the parser reject string
>>> and character UDLs in all places where the grammar requires a
>>> string-literal and not a user-defined-string-literal.
>>>
>>> Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?
>>
>> Since the grammar has
>>
>> user-defined-string-literal :
>> 	string-literal ud-suffix
>>
>> maybe we want to move the UDL handling out to a cp_parser_udl_string_literal
>> that calls cp_parser_string_literal?
> 
> Umm, maybe, but the UDL handling code seems to be too entrenched in
> cp_parser_string_literal and I don't think it's going to be easy to extract
> it :/.

Fair enough; maybe a wrapper, then?

Jason


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] c++: Reject UDLs in certain contexts [PR105300]
  2022-11-16 13:22     ` Jason Merrill
@ 2022-11-17  1:12       ` Marek Polacek
  2022-11-18  0:06         ` Jason Merrill
  0 siblings, 1 reply; 13+ messages in thread
From: Marek Polacek @ 2022-11-17  1:12 UTC (permalink / raw)
  To: Jason Merrill; +Cc: GCC Patches

On Wed, Nov 16, 2022 at 08:22:39AM -0500, Jason Merrill wrote:
> On 11/15/22 19:35, Marek Polacek wrote:
> > On Tue, Nov 15, 2022 at 06:58:39PM -0500, Jason Merrill wrote:
> > > On 11/12/22 06:53, Marek Polacek wrote:
> > > > In this PR, we are crashing because we've encountered a UDL where a
> > > > string-literal is expected.  This patch makes the parser reject string
> > > > and character UDLs in all places where the grammar requires a
> > > > string-literal and not a user-defined-string-literal.
> > > > 
> > > > Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?
> > > 
> > > Since the grammar has
> > > 
> > > user-defined-string-literal :
> > > 	string-literal ud-suffix
> > > 
> > > maybe we want to move the UDL handling out to a cp_parser_udl_string_literal
> > > that calls cp_parser_string_literal?
> > 
> > Umm, maybe, but the UDL handling code seems to be too entrenched in
> > cp_parser_string_literal and I don't think it's going to be easy to extract
> > it :/.
> 
> Fair enough; maybe a wrapper, then?

As in, have a cp_parser_udl_string_literal wrapper that calls
cp_parser_string_literal with udl_ok=true, rename cp_parser_string_literal,
introduce a new cp_parser_string_literal wrapper that passes udl_ok=false?

One problem with cp_parser_udl_string_literal is that it's too similar to
cp_parser_userdef_string_literal, which would be confusing, I think.

Marek


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] c++: Reject UDLs in certain contexts [PR105300]
  2022-11-17  1:12       ` Marek Polacek
@ 2022-11-18  0:06         ` Jason Merrill
  2022-11-18 23:52           ` [PATCH v2] " Marek Polacek
  0 siblings, 1 reply; 13+ messages in thread
From: Jason Merrill @ 2022-11-18  0:06 UTC (permalink / raw)
  To: Marek Polacek; +Cc: GCC Patches

On 11/16/22 20:12, Marek Polacek wrote:
> On Wed, Nov 16, 2022 at 08:22:39AM -0500, Jason Merrill wrote:
>> On 11/15/22 19:35, Marek Polacek wrote:
>>> On Tue, Nov 15, 2022 at 06:58:39PM -0500, Jason Merrill wrote:
>>>> On 11/12/22 06:53, Marek Polacek wrote:
>>>>> In this PR, we are crashing because we've encountered a UDL where a
>>>>> string-literal is expected.  This patch makes the parser reject string
>>>>> and character UDLs in all places where the grammar requires a
>>>>> string-literal and not a user-defined-string-literal.
>>>>>
>>>>> Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?
>>>>
>>>> Since the grammar has
>>>>
>>>> user-defined-string-literal :
>>>> 	string-literal ud-suffix
>>>>
>>>> maybe we want to move the UDL handling out to a cp_parser_udl_string_literal
>>>> that calls cp_parser_string_literal?
>>>
>>> Umm, maybe, but the UDL handling code seems to be too entrenched in
>>> cp_parser_string_literal and I don't think it's going to be easy to extract
>>> it :/.
>>
>> Fair enough; maybe a wrapper, then?
> 
> As in, have a cp_parser_udl_string_literal wrapper that calls
> cp_parser_string_literal with udl_ok=true, rename cp_parser_string_literal,
> introduce a new cp_parser_string_literal wrapper that passes udl_ok=false?

That's what I was thinking.  And the new cp_parser_string_literal could 
also omit the lookup_udlit parm.

> One problem with cp_parser_udl_string_literal is that it's too similar to
> cp_parser_userdef_string_literal, which would be confusing, I think.

True, probably better to use that name instead, and rename the current 
one to something like finish_userdef_string_literal

Jason


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH v2] c++: Reject UDLs in certain contexts [PR105300]
  2022-11-18  0:06         ` Jason Merrill
@ 2022-11-18 23:52           ` Marek Polacek
  2022-11-19  1:39             ` Jason Merrill
  0 siblings, 1 reply; 13+ messages in thread
From: Marek Polacek @ 2022-11-18 23:52 UTC (permalink / raw)
  To: Jason Merrill; +Cc: GCC Patches

On Thu, Nov 17, 2022 at 07:06:34PM -0500, Jason Merrill wrote:
> On 11/16/22 20:12, Marek Polacek wrote:
> > On Wed, Nov 16, 2022 at 08:22:39AM -0500, Jason Merrill wrote:
> > > On 11/15/22 19:35, Marek Polacek wrote:
> > > > On Tue, Nov 15, 2022 at 06:58:39PM -0500, Jason Merrill wrote:
> > > > > On 11/12/22 06:53, Marek Polacek wrote:
> > > > > > In this PR, we are crashing because we've encountered a UDL where a
> > > > > > string-literal is expected.  This patch makes the parser reject string
> > > > > > and character UDLs in all places where the grammar requires a
> > > > > > string-literal and not a user-defined-string-literal.
> > > > > > 
> > > > > > Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?
> > > > > 
> > > > > Since the grammar has
> > > > > 
> > > > > user-defined-string-literal :
> > > > > 	string-literal ud-suffix
> > > > > 
> > > > > maybe we want to move the UDL handling out to a cp_parser_udl_string_literal
> > > > > that calls cp_parser_string_literal?
> > > > 
> > > > Umm, maybe, but the UDL handling code seems to be too entrenched in
> > > > cp_parser_string_literal and I don't think it's going to be easy to extract
> > > > it :/.
> > > 
> > > Fair enough; maybe a wrapper, then?
> > 
> > As in, have a cp_parser_udl_string_literal wrapper that calls
> > cp_parser_string_literal with udl_ok=true, rename cp_parser_string_literal,
> > introduce a new cp_parser_string_literal wrapper that passes udl_ok=false?
> 
> That's what I was thinking.  And the new cp_parser_string_literal could also
> omit the lookup_udlit parm.
> 
> > One problem with cp_parser_udl_string_literal is that it's too similar to
> > cp_parser_userdef_string_literal, which would be confusing, I think.
> 
> True, probably better to use that name instead, and rename the current one
> to something like finish_userdef_string_literal

Sounds good, here's the patch.

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

-- >8 --
In this PR, we are crashing because we've encountered a UDL where a
string-literal is expected.  This patch makes the parser reject string
and character UDLs in all places where the grammar requires a
string-literal and not a user-defined-string-literal.

I've introduced two new wrappers; the existing cp_parser_string_literal
was renamed to cp_parser_string_literal_common and should not be called
directly.  finish_userdef_string_literal is renamed from
cp_parser_userdef_string_literal.

	PR c++/105300

gcc/c-family/ChangeLog:

	* c-pragma.cc (handle_pragma_message): Warn for CPP_STRING_USERDEF.

gcc/cp/ChangeLog:

	* parser.cc: Remove unnecessary forward declarations.
	(cp_parser_string_literal): New wrapper.
	(cp_parser_string_literal_common): Renamed from
	cp_parser_string_literal.  Add a bool parameter.  Give an error when
	UDLs are not permitted.
	(cp_parser_userdef_string_literal): New wrapper.
	(finish_userdef_string_literal): Renamed from
	cp_parser_userdef_string_literal.
	(cp_parser_primary_expression): Call cp_parser_userdef_string_literal
	instead of cp_parser_string_literal.
	(cp_parser_linkage_specification): Move a variable declaration closer
	to its first use.
	(cp_parser_static_assert): Likewise.
	(cp_parser_operator): Call cp_parser_userdef_string_literal instead of
	cp_parser_string_literal.
	(cp_parser_asm_definition): Move a variable declaration closer to its
	first use.
	(cp_parser_asm_specification_opt): Move variable declarations closer to
	their first use.
	(cp_parser_asm_operand_list): Likewise.
	(cp_parser_asm_clobber_list): Likewise.

gcc/testsuite/ChangeLog:

	* g++.dg/cpp0x/udlit-error1.C: New test.
---
 gcc/c-family/c-pragma.cc                  |   3 +
 gcc/cp/parser.cc                          | 131 ++++++++++++++--------
 gcc/testsuite/g++.dg/cpp0x/udlit-error1.C |  21 ++++
 3 files changed, 111 insertions(+), 44 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp0x/udlit-error1.C

diff --git a/gcc/c-family/c-pragma.cc b/gcc/c-family/c-pragma.cc
index 142a46441ac..49f405b605b 100644
--- a/gcc/c-family/c-pragma.cc
+++ b/gcc/c-family/c-pragma.cc
@@ -1390,6 +1390,9 @@ handle_pragma_message (cpp_reader *)
     }
   else if (token == CPP_STRING)
     message = x;
+  else if (token == CPP_STRING_USERDEF)
+    GCC_BAD ("string literal with user-defined suffix is invalid in this "
+	     "context");
   else
     GCC_BAD ("expected a string after %<#pragma message%>");
 
diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc
index c5929a6cc5f..e3bd94ffe11 100644
--- a/gcc/cp/parser.cc
+++ b/gcc/cp/parser.cc
@@ -2223,16 +2223,8 @@ pop_unparsed_function_queues (cp_parser *parser)
 
 /* Lexical conventions [gram.lex]  */
 
-static cp_expr cp_parser_identifier
-  (cp_parser *);
-static cp_expr cp_parser_string_literal
-  (cp_parser *, bool, bool, bool);
-static cp_expr cp_parser_userdef_char_literal
-  (cp_parser *);
-static tree cp_parser_userdef_string_literal
+static tree finish_userdef_string_literal
   (tree);
-static cp_expr cp_parser_userdef_numeric_literal
-  (cp_parser *);
 
 /* Basic concepts [gram.basic]  */
 
@@ -4398,11 +4390,15 @@ cp_parser_identifier (cp_parser* parser)
     return error_mark_node;
 }
 
-/* Parse a sequence of adjacent string constants.  Returns a
+/* Worker for cp_parser_string_literal and cp_parser_userdef_string_literal.
+   Do not call this directly; use either of the above.
+
+   Parse a sequence of adjacent string constants.  Return a
    TREE_STRING representing the combined, nul-terminated string
    constant.  If TRANSLATE is true, translate the string to the
    execution character set.  If WIDE_OK is true, a wide string is
-   invalid here.
+   valid here.  If UDL_OK is true, a string literal with user-defined
+   suffix can be used in this context.
 
    C++98 [lex.string] says that if a narrow string literal token is
    adjacent to a wide string literal token, the behavior is undefined.
@@ -4412,9 +4408,11 @@ cp_parser_identifier (cp_parser* parser)
    This code is largely lifted from lex_string() in c-lex.cc.
 
    FUTURE: ObjC++ will need to handle @-strings here.  */
+
 static cp_expr
-cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
-			  bool lookup_udlit = true)
+cp_parser_string_literal_common (cp_parser *parser, bool translate,
+				 bool wide_ok, bool udl_ok,
+				 bool lookup_udlit)
 {
   tree value;
   size_t count;
@@ -4439,6 +4437,12 @@ cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
 
   if (cpp_userdef_string_p (tok->type))
     {
+      if (!udl_ok)
+	{
+	  error_at (loc, "string literal with user-defined suffix "
+		    "is invalid in this context");
+	  return error_mark_node;
+	}
       string_tree = USERDEF_LITERAL_VALUE (tok->u.value);
       curr_type = cpp_userdef_string_remove_type (tok->type);
       curr_tok_is_userdef_p = true;
@@ -4529,6 +4533,12 @@ cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
 	  tok = cp_lexer_peek_token (parser->lexer);
 	  if (cpp_userdef_string_p (tok->type))
 	    {
+	      if (!udl_ok)
+		{
+		  error_at (loc, "string literal with user-defined suffix "
+			    "is invalid in this context");
+		  return error_mark_node;
+		}
 	      string_tree = USERDEF_LITERAL_VALUE (tok->u.value);
 	      curr_type = cpp_userdef_string_remove_type (tok->type);
 	      curr_tok_is_userdef_p = true;
@@ -4598,7 +4608,7 @@ cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
 	  tree literal = build_userdef_literal (suffix_id, value,
 						OT_NONE, NULL_TREE);
 	  if (lookup_udlit)
-	    value = cp_parser_userdef_string_literal (literal);
+	    value = finish_userdef_string_literal (literal);
 	  else
 	    value = literal;
 	}
@@ -4616,6 +4626,38 @@ cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
   return cp_expr (value, loc);
 }
 
+/* Parse a sequence of adjacent string constants.  Return a TREE_STRING
+   representing the combined, nul-terminated string constant.  If
+   TRANSLATE is true, translate the string to the execution character set.
+   If WIDE_OK is true, a wide string is valid here.
+
+   This function issues an error if a user defined string literal is
+   encountered; use cp_parser_userdef_string_literal if UDLs are allowed.  */
+
+static inline cp_expr
+cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok)
+{
+  return cp_parser_string_literal_common (parser, translate, wide_ok,
+					  /*udl_ok=*/false,
+					  /*lookup_udlit=*/false);
+}
+
+/* Parse a string literal or user defined string literal.
+
+   user-defined-string-literal :
+     string-literal ud-suffix
+
+   Parameters as for cp_parser_string_literal.  If LOOKUP_UDLIT, perform
+   a lookup for a suitable template function.  */
+
+static inline cp_expr
+cp_parser_userdef_string_literal (cp_parser *parser, bool translate,
+				  bool wide_ok, bool lookup_udlit = true)
+{
+  return cp_parser_string_literal_common (parser, translate, wide_ok,
+					  /*udl_ok=*/true, lookup_udlit);
+}
+
 /* Look up a literal operator with the name and the exact arguments.  */
 
 static tree
@@ -4913,7 +4955,7 @@ cp_parser_userdef_numeric_literal (cp_parser *parser)
    as arguments.  */
 
 static tree
-cp_parser_userdef_string_literal (tree literal)
+finish_userdef_string_literal (tree literal)
 {
   tree suffix_id = USERDEF_LITERAL_SUFFIX_ID (literal);
   tree name = cp_literal_operator_id (IDENTIFIER_POINTER (suffix_id));
@@ -5652,10 +5694,10 @@ cp_parser_primary_expression (cp_parser *parser,
     case CPP_UTF8STRING_USERDEF:
       /* ??? Should wide strings be allowed when parser->translate_strings_p
 	 is false (i.e. in attributes)?  If not, we can kill the third
-	 argument to cp_parser_string_literal.  */
-      return (cp_parser_string_literal (parser,
-					parser->translate_strings_p,
-					true)
+	 argument to cp_parser_{,userdef}string_literal.  */
+      return (cp_parser_userdef_string_literal (parser,
+						parser->translate_strings_p,
+						/*wide_ok=*/true)
 	      .maybe_add_location_wrapper ());
 
     case CPP_OPEN_PAREN:
@@ -16161,15 +16203,14 @@ cp_parser_function_specifier_opt (cp_parser* parser,
 static void
 cp_parser_linkage_specification (cp_parser* parser, tree prefix_attr)
 {
-  tree linkage;
-
   /* Look for the `extern' keyword.  */
   cp_token *extern_token
     = cp_parser_require_keyword (parser, RID_EXTERN, RT_EXTERN);
 
   /* Look for the string-literal.  */
   cp_token *string_token = cp_lexer_peek_token (parser->lexer);
-  linkage = cp_parser_string_literal (parser, false, false);
+  tree linkage = cp_parser_string_literal (parser, /*translate=*/false,
+					   /*wide_ok=*/false);
 
   /* Transform the literal into an identifier.  If the literal is a
      wide-character string, or contains embedded NULs, then we can't
@@ -16299,9 +16340,8 @@ cp_parser_static_assert(cp_parser *parser, bool member_p)
       cp_parser_require (parser, CPP_COMMA, RT_COMMA);
 
       /* Parse the string-literal message.  */
-      message = cp_parser_string_literal (parser,
-                                	  /*translate=*/false,
-                                	  /*wide_ok=*/true);
+      message = cp_parser_string_literal (parser, /*translate=*/false,
+					  /*wide_ok=*/true);
 
       /* A `)' completes the static assertion.  */
       if (!parens.require_close (parser))
@@ -17349,7 +17389,6 @@ cp_parser_operator (cp_parser* parser, location_t start_loc)
     case CPP_STRING16_USERDEF:
     case CPP_STRING32_USERDEF:
       {
-	cp_expr str;
 	tree string_tree;
 	int sz, len;
 
@@ -17357,8 +17396,10 @@ cp_parser_operator (cp_parser* parser, location_t start_loc)
 	  maybe_warn_cpp0x (CPP0X_USER_DEFINED_LITERALS);
 
 	/* Consume the string.  */
-	str = cp_parser_string_literal (parser, /*translate=*/true,
-				      /*wide_ok=*/true, /*lookup_udlit=*/false);
+	cp_expr str = cp_parser_userdef_string_literal (parser,
+							/*translate=*/true,
+							/*wide_ok=*/true,
+							/*lookup_udlit=*/false);
 	if (str == error_mark_node)
 	  return error_mark_node;
 	else if (TREE_CODE (str) == USERDEF_LITERAL)
@@ -21975,7 +22016,6 @@ cp_parser_using_directive (cp_parser* parser)
 static void
 cp_parser_asm_definition (cp_parser* parser)
 {
-  tree string;
   tree outputs = NULL_TREE;
   tree inputs = NULL_TREE;
   tree clobbers = NULL_TREE;
@@ -22083,7 +22123,8 @@ cp_parser_asm_definition (cp_parser* parser)
   if (!cp_parser_require (parser, CPP_OPEN_PAREN, RT_OPEN_PAREN))
     return;
   /* Look for the string.  */
-  string = cp_parser_string_literal (parser, false, false);
+  tree string = cp_parser_string_literal (parser, /*translate=*/false,
+					  /*wide_ok=*/false);
   if (string == error_mark_node)
     {
       cp_parser_skip_to_closing_parenthesis (parser, true, false,
@@ -28485,11 +28526,8 @@ cp_parser_yield_expression (cp_parser* parser)
 static tree
 cp_parser_asm_specification_opt (cp_parser* parser)
 {
-  cp_token *token;
-  tree asm_specification;
-
   /* Peek at the next token.  */
-  token = cp_lexer_peek_token (parser->lexer);
+  cp_token *token = cp_lexer_peek_token (parser->lexer);
   /* If the next token isn't the `asm' keyword, then there's no
      asm-specification.  */
   if (!cp_parser_is_keyword (token, RID_ASM))
@@ -28502,7 +28540,9 @@ cp_parser_asm_specification_opt (cp_parser* parser)
   parens.require_open (parser);
 
   /* Look for the string-literal.  */
-  asm_specification = cp_parser_string_literal (parser, false, false);
+  tree asm_specification = cp_parser_string_literal (parser,
+						     /*translate=*/false,
+						     /*wide_ok=*/false);
 
   /* Look for the `)'.  */
   parens.require_close (parser);
@@ -28535,8 +28575,6 @@ cp_parser_asm_operand_list (cp_parser* parser)
 
   while (true)
     {
-      tree string_literal;
-      tree expression;
       tree name;
 
       if (cp_lexer_next_token_is (parser->lexer, CPP_OPEN_SQUARE))
@@ -28554,13 +28592,15 @@ cp_parser_asm_operand_list (cp_parser* parser)
       else
 	name = NULL_TREE;
       /* Look for the string-literal.  */
-      string_literal = cp_parser_string_literal (parser, false, false);
+      tree string_literal = cp_parser_string_literal (parser,
+						      /*translate=*/false,
+						      /*wide_ok=*/false);
 
       /* Look for the `('.  */
       matching_parens parens;
       parens.require_open (parser);
       /* Parse the expression.  */
-      expression = cp_parser_expression (parser);
+      tree expression = cp_parser_expression (parser);
       /* Look for the `)'.  */
       parens.require_close (parser);
 
@@ -28600,10 +28640,10 @@ cp_parser_asm_clobber_list (cp_parser* parser)
 
   while (true)
     {
-      tree string_literal;
-
       /* Look for the string literal.  */
-      string_literal = cp_parser_string_literal (parser, false, false);
+      tree string_literal = cp_parser_string_literal (parser,
+						      /*translate=*/false,
+						      /*wide_ok=*/false);
       /* Add it to the list.  */
       clobbers = tree_cons (NULL_TREE, string_literal, clobbers);
       /* If the next token is not a `,', then the list is
@@ -45763,7 +45803,9 @@ cp_parser_omp_context_selector (cp_parser *parser, tree set, bool has_parms_p)
 		      cp_lexer_consume_token (parser->lexer);
 		    }
 		  else if (cp_lexer_next_token_is (parser->lexer, CPP_STRING))
-		    value = cp_parser_string_literal (parser, false, false);
+		    value = cp_parser_string_literal (parser,
+						      /*translate=*/false,
+						      /*wide_ok=*/false);
 		  else
 		    {
 		      cp_parser_error (parser, "expected identifier or "
@@ -48785,7 +48827,8 @@ pragma_lex (tree *value, location_t *loc)
   if (ret == CPP_PRAGMA_EOL)
     ret = CPP_EOF;
   else if (ret == CPP_STRING)
-    *value = cp_parser_string_literal (the_parser, false, false);
+    *value = cp_parser_string_literal (the_parser, /*translate=*/false,
+				       /*wide_ok=*/false);
   else
     {
       if (ret == CPP_KEYWORD)
diff --git a/gcc/testsuite/g++.dg/cpp0x/udlit-error1.C b/gcc/testsuite/g++.dg/cpp0x/udlit-error1.C
new file mode 100644
index 00000000000..66e300e350f
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/udlit-error1.C
@@ -0,0 +1,21 @@
+// PR c++/105300
+// { dg-do compile { target c++11 } }
+
+void operator""_x(const char *, decltype(sizeof(0)));
+
+#include ""_x		  // { dg-error "include expects" }
+#line ""_x		  // { dg-error "not a positive integer" }
+#if __has_include(""_x)	  // { dg-error "requires a header-name" }
+#endif
+
+#pragma message "hi"_x	  // { dg-warning "string literal with user-defined suffix is invalid in this context" }
+
+extern "C"_x { void g(); } // { dg-error "before user-defined string literal" }
+static_assert(true, "foo"_x); // { dg-error "string literal with user-defined suffix is invalid in this context|expected" }
+
+[[deprecated("oof"_x)]]
+void
+lol () // { dg-error "not a string" }
+{
+  asm (""_x); // { dg-error "string literal with user-defined suffix is invalid in this context" }
+}

base-commit: 1c4a7881c492790eaad9aec1bcc2c1370f41740f
-- 
2.38.1


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v2] c++: Reject UDLs in certain contexts [PR105300]
  2022-11-18 23:52           ` [PATCH v2] " Marek Polacek
@ 2022-11-19  1:39             ` Jason Merrill
  2022-12-02 23:58               ` [PATCH v3] " Marek Polacek
  0 siblings, 1 reply; 13+ messages in thread
From: Jason Merrill @ 2022-11-19  1:39 UTC (permalink / raw)
  To: Marek Polacek; +Cc: GCC Patches

On 11/18/22 18:52, Marek Polacek wrote:
> On Thu, Nov 17, 2022 at 07:06:34PM -0500, Jason Merrill wrote:
>> On 11/16/22 20:12, Marek Polacek wrote:
>>> On Wed, Nov 16, 2022 at 08:22:39AM -0500, Jason Merrill wrote:
>>>> On 11/15/22 19:35, Marek Polacek wrote:
>>>>> On Tue, Nov 15, 2022 at 06:58:39PM -0500, Jason Merrill wrote:
>>>>>> On 11/12/22 06:53, Marek Polacek wrote:
>>>>>>> In this PR, we are crashing because we've encountered a UDL where a
>>>>>>> string-literal is expected.  This patch makes the parser reject string
>>>>>>> and character UDLs in all places where the grammar requires a
>>>>>>> string-literal and not a user-defined-string-literal.
>>>>>>>
>>>>>>> Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?
>>>>>>
>>>>>> Since the grammar has
>>>>>>
>>>>>> user-defined-string-literal :
>>>>>> 	string-literal ud-suffix
>>>>>>
>>>>>> maybe we want to move the UDL handling out to a cp_parser_udl_string_literal
>>>>>> that calls cp_parser_string_literal?
>>>>>
>>>>> Umm, maybe, but the UDL handling code seems to be too entrenched in
>>>>> cp_parser_string_literal and I don't think it's going to be easy to extract
>>>>> it :/.
>>>>
>>>> Fair enough; maybe a wrapper, then?
>>>
>>> As in, have a cp_parser_udl_string_literal wrapper that calls
>>> cp_parser_string_literal with udl_ok=true, rename cp_parser_string_literal,
>>> introduce a new cp_parser_string_literal wrapper that passes udl_ok=false?
>>
>> That's what I was thinking.  And the new cp_parser_string_literal could also
>> omit the lookup_udlit parm.
>>
>>> One problem with cp_parser_udl_string_literal is that it's too similar to
>>> cp_parser_userdef_string_literal, which would be confusing, I think.
>>
>> True, probably better to use that name instead, and rename the current one
>> to something like finish_userdef_string_literal
> 
> Sounds good, here's the patch.
> 
> Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?
> 
> -- >8 --
> In this PR, we are crashing because we've encountered a UDL where a
> string-literal is expected.  This patch makes the parser reject string
> and character UDLs in all places where the grammar requires a
> string-literal and not a user-defined-string-literal.
> 
> I've introduced two new wrappers; the existing cp_parser_string_literal
> was renamed to cp_parser_string_literal_common and should not be called
> directly.  finish_userdef_string_literal is renamed from
> cp_parser_userdef_string_literal.
> 
> 	PR c++/105300
> 
> gcc/c-family/ChangeLog:
> 
> 	* c-pragma.cc (handle_pragma_message): Warn for CPP_STRING_USERDEF.
> 
> gcc/cp/ChangeLog:
> 
> 	* parser.cc: Remove unnecessary forward declarations.
> 	(cp_parser_string_literal): New wrapper.
> 	(cp_parser_string_literal_common): Renamed from
> 	cp_parser_string_literal.  Add a bool parameter.  Give an error when
> 	UDLs are not permitted.
> 	(cp_parser_userdef_string_literal): New wrapper.
> 	(finish_userdef_string_literal): Renamed from
> 	cp_parser_userdef_string_literal.
> 	(cp_parser_primary_expression): Call cp_parser_userdef_string_literal
> 	instead of cp_parser_string_literal.
> 	(cp_parser_linkage_specification): Move a variable declaration closer
> 	to its first use.
> 	(cp_parser_static_assert): Likewise.
> 	(cp_parser_operator): Call cp_parser_userdef_string_literal instead of
> 	cp_parser_string_literal.
> 	(cp_parser_asm_definition): Move a variable declaration closer to its
> 	first use.
> 	(cp_parser_asm_specification_opt): Move variable declarations closer to
> 	their first use.
> 	(cp_parser_asm_operand_list): Likewise.
> 	(cp_parser_asm_clobber_list): Likewise.
> 
> gcc/testsuite/ChangeLog:
> 
> 	* g++.dg/cpp0x/udlit-error1.C: New test.
> ---
>   gcc/c-family/c-pragma.cc                  |   3 +
>   gcc/cp/parser.cc                          | 131 ++++++++++++++--------
>   gcc/testsuite/g++.dg/cpp0x/udlit-error1.C |  21 ++++
>   3 files changed, 111 insertions(+), 44 deletions(-)
>   create mode 100644 gcc/testsuite/g++.dg/cpp0x/udlit-error1.C
> 
> diff --git a/gcc/c-family/c-pragma.cc b/gcc/c-family/c-pragma.cc
> index 142a46441ac..49f405b605b 100644
> --- a/gcc/c-family/c-pragma.cc
> +++ b/gcc/c-family/c-pragma.cc
> @@ -1390,6 +1390,9 @@ handle_pragma_message (cpp_reader *)
>       }
>     else if (token == CPP_STRING)
>       message = x;
> +  else if (token == CPP_STRING_USERDEF)
> +    GCC_BAD ("string literal with user-defined suffix is invalid in this "
> +	     "context");
>     else
>       GCC_BAD ("expected a string after %<#pragma message%>");
>   
> diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc
> index c5929a6cc5f..e3bd94ffe11 100644
> --- a/gcc/cp/parser.cc
> +++ b/gcc/cp/parser.cc
> @@ -2223,16 +2223,8 @@ pop_unparsed_function_queues (cp_parser *parser)
>   
>   /* Lexical conventions [gram.lex]  */
>   
> -static cp_expr cp_parser_identifier
> -  (cp_parser *);
> -static cp_expr cp_parser_string_literal
> -  (cp_parser *, bool, bool, bool);
> -static cp_expr cp_parser_userdef_char_literal
> -  (cp_parser *);
> -static tree cp_parser_userdef_string_literal
> +static tree finish_userdef_string_literal
>     (tree);
> -static cp_expr cp_parser_userdef_numeric_literal
> -  (cp_parser *);
>   
>   /* Basic concepts [gram.basic]  */
>   
> @@ -4398,11 +4390,15 @@ cp_parser_identifier (cp_parser* parser)
>       return error_mark_node;
>   }
>   
> -/* Parse a sequence of adjacent string constants.  Returns a
> +/* Worker for cp_parser_string_literal and cp_parser_userdef_string_literal.
> +   Do not call this directly; use either of the above.
> +
> +   Parse a sequence of adjacent string constants.  Return a
>      TREE_STRING representing the combined, nul-terminated string
>      constant.  If TRANSLATE is true, translate the string to the
>      execution character set.  If WIDE_OK is true, a wide string is
> -   invalid here.
> +   valid here.  If UDL_OK is true, a string literal with user-defined
> +   suffix can be used in this context.
>   
>      C++98 [lex.string] says that if a narrow string literal token is
>      adjacent to a wide string literal token, the behavior is undefined.
> @@ -4412,9 +4408,11 @@ cp_parser_identifier (cp_parser* parser)
>      This code is largely lifted from lex_string() in c-lex.cc.
>   
>      FUTURE: ObjC++ will need to handle @-strings here.  */
> +
>   static cp_expr
> -cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
> -			  bool lookup_udlit = true)
> +cp_parser_string_literal_common (cp_parser *parser, bool translate,
> +				 bool wide_ok, bool udl_ok,
> +				 bool lookup_udlit)
>   {
>     tree value;
>     size_t count;
> @@ -4439,6 +4437,12 @@ cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
>   
>     if (cpp_userdef_string_p (tok->type))
>       {
> +      if (!udl_ok)
> +	{
> +	  error_at (loc, "string literal with user-defined suffix "
> +		    "is invalid in this context");
> +	  return error_mark_node;
> +	}
>         string_tree = USERDEF_LITERAL_VALUE (tok->u.value);
>         curr_type = cpp_userdef_string_remove_type (tok->type);
>         curr_tok_is_userdef_p = true;
> @@ -4529,6 +4533,12 @@ cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
>   	  tok = cp_lexer_peek_token (parser->lexer);
>   	  if (cpp_userdef_string_p (tok->type))
>   	    {
> +	      if (!udl_ok)
> +		{
> +		  error_at (loc, "string literal with user-defined suffix "
> +			    "is invalid in this context");
> +		  return error_mark_node;
> +		}
>   	      string_tree = USERDEF_LITERAL_VALUE (tok->u.value);
>   	      curr_type = cpp_userdef_string_remove_type (tok->type);
>   	      curr_tok_is_userdef_p = true;
> @@ -4598,7 +4608,7 @@ cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
>   	  tree literal = build_userdef_literal (suffix_id, value,
>   						OT_NONE, NULL_TREE);
>   	  if (lookup_udlit)
> -	    value = cp_parser_userdef_string_literal (literal);
> +	    value = finish_userdef_string_literal (literal);
>   	  else
>   	    value = literal;
>   	}
> @@ -4616,6 +4626,38 @@ cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
>     return cp_expr (value, loc);
>   }
>   
> +/* Parse a sequence of adjacent string constants.  Return a TREE_STRING
> +   representing the combined, nul-terminated string constant.  If
> +   TRANSLATE is true, translate the string to the execution character set.
> +   If WIDE_OK is true, a wide string is valid here.
> +
> +   This function issues an error if a user defined string literal is
> +   encountered; use cp_parser_userdef_string_literal if UDLs are allowed.  */
> +
> +static inline cp_expr
> +cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok)
> +{
> +  return cp_parser_string_literal_common (parser, translate, wide_ok,
> +					  /*udl_ok=*/false,
> +					  /*lookup_udlit=*/false);
> +}
> +
> +/* Parse a string literal or user defined string literal.
> +
> +   user-defined-string-literal :
> +     string-literal ud-suffix
> +
> +   Parameters as for cp_parser_string_literal.  If LOOKUP_UDLIT, perform
> +   a lookup for a suitable template function.  */
> +
> +static inline cp_expr
> +cp_parser_userdef_string_literal (cp_parser *parser, bool translate,
> +				  bool wide_ok, bool lookup_udlit = true)

I think this function doesn't need the translate and wide_ok parms, they 
can always be true.

> +{
> +  return cp_parser_string_literal_common (parser, translate, wide_ok,
> +					  /*udl_ok=*/true, lookup_udlit);
> +}
> +
>   /* Look up a literal operator with the name and the exact arguments.  */
>   
>   static tree
> @@ -4913,7 +4955,7 @@ cp_parser_userdef_numeric_literal (cp_parser *parser)
>      as arguments.  */
>   
>   static tree
> -cp_parser_userdef_string_literal (tree literal)
> +finish_userdef_string_literal (tree literal)
>   {
>     tree suffix_id = USERDEF_LITERAL_SUFFIX_ID (literal);
>     tree name = cp_literal_operator_id (IDENTIFIER_POINTER (suffix_id));
> @@ -5652,10 +5694,10 @@ cp_parser_primary_expression (cp_parser *parser,
>       case CPP_UTF8STRING_USERDEF:
>         /* ??? Should wide strings be allowed when parser->translate_strings_p
>   	 is false (i.e. in attributes)?  If not, we can kill the third
> -	 argument to cp_parser_string_literal.  */

I think the answer to this old question is no: if we have an 
encoding-prefix, we should be translating.

> -      return (cp_parser_string_literal (parser,
> -					parser->translate_strings_p,
> -					true)
> +	 argument to cp_parser_{,userdef}string_literal.  */
> +      return (cp_parser_userdef_string_literal (parser,
> +						parser->translate_strings_p,
> +						/*wide_ok=*/true)

For CPP_*STRING* without _USERDEF, we should still call 
cp_parser_string_literal.

>   	      .maybe_add_location_wrapper ());
>   
>       case CPP_OPEN_PAREN:
> @@ -16161,15 +16203,14 @@ cp_parser_function_specifier_opt (cp_parser* parser,
>   static void
>   cp_parser_linkage_specification (cp_parser* parser, tree prefix_attr)
>   {
> -  tree linkage;
> -
>     /* Look for the `extern' keyword.  */
>     cp_token *extern_token
>       = cp_parser_require_keyword (parser, RID_EXTERN, RT_EXTERN);
>   
>     /* Look for the string-literal.  */
>     cp_token *string_token = cp_lexer_peek_token (parser->lexer);
> -  linkage = cp_parser_string_literal (parser, false, false);
> +  tree linkage = cp_parser_string_literal (parser, /*translate=*/false,
> +					   /*wide_ok=*/false);
>   
>     /* Transform the literal into an identifier.  If the literal is a
>        wide-character string, or contains embedded NULs, then we can't
> @@ -16299,9 +16340,8 @@ cp_parser_static_assert(cp_parser *parser, bool member_p)
>         cp_parser_require (parser, CPP_COMMA, RT_COMMA);
>   
>         /* Parse the string-literal message.  */
> -      message = cp_parser_string_literal (parser,
> -                                	  /*translate=*/false,
> -                                	  /*wide_ok=*/true);
> +      message = cp_parser_string_literal (parser, /*translate=*/false,
> +					  /*wide_ok=*/true);
>   
>         /* A `)' completes the static assertion.  */
>         if (!parens.require_close (parser))
> @@ -17349,7 +17389,6 @@ cp_parser_operator (cp_parser* parser, location_t start_loc)
>       case CPP_STRING16_USERDEF:
>       case CPP_STRING32_USERDEF:
>         {
> -	cp_expr str;
>   	tree string_tree;
>   	int sz, len;
>   
> @@ -17357,8 +17396,10 @@ cp_parser_operator (cp_parser* parser, location_t start_loc)
>   	  maybe_warn_cpp0x (CPP0X_USER_DEFINED_LITERALS);
>   
>   	/* Consume the string.  */
> -	str = cp_parser_string_literal (parser, /*translate=*/true,
> -				      /*wide_ok=*/true, /*lookup_udlit=*/false);
> +	cp_expr str = cp_parser_userdef_string_literal (parser,
> +							/*translate=*/true,
> +							/*wide_ok=*/true,
> +							/*lookup_udlit=*/false);
>   	if (str == error_mark_node)
>   	  return error_mark_node;
>   	else if (TREE_CODE (str) == USERDEF_LITERAL)
> @@ -21975,7 +22016,6 @@ cp_parser_using_directive (cp_parser* parser)
>   static void
>   cp_parser_asm_definition (cp_parser* parser)
>   {
> -  tree string;
>     tree outputs = NULL_TREE;
>     tree inputs = NULL_TREE;
>     tree clobbers = NULL_TREE;
> @@ -22083,7 +22123,8 @@ cp_parser_asm_definition (cp_parser* parser)
>     if (!cp_parser_require (parser, CPP_OPEN_PAREN, RT_OPEN_PAREN))
>       return;
>     /* Look for the string.  */
> -  string = cp_parser_string_literal (parser, false, false);
> +  tree string = cp_parser_string_literal (parser, /*translate=*/false,
> +					  /*wide_ok=*/false);
>     if (string == error_mark_node)
>       {
>         cp_parser_skip_to_closing_parenthesis (parser, true, false,
> @@ -28485,11 +28526,8 @@ cp_parser_yield_expression (cp_parser* parser)
>   static tree
>   cp_parser_asm_specification_opt (cp_parser* parser)
>   {
> -  cp_token *token;
> -  tree asm_specification;
> -
>     /* Peek at the next token.  */
> -  token = cp_lexer_peek_token (parser->lexer);
> +  cp_token *token = cp_lexer_peek_token (parser->lexer);
>     /* If the next token isn't the `asm' keyword, then there's no
>        asm-specification.  */
>     if (!cp_parser_is_keyword (token, RID_ASM))
> @@ -28502,7 +28540,9 @@ cp_parser_asm_specification_opt (cp_parser* parser)
>     parens.require_open (parser);
>   
>     /* Look for the string-literal.  */
> -  asm_specification = cp_parser_string_literal (parser, false, false);
> +  tree asm_specification = cp_parser_string_literal (parser,
> +						     /*translate=*/false,
> +						     /*wide_ok=*/false);
>   
>     /* Look for the `)'.  */
>     parens.require_close (parser);
> @@ -28535,8 +28575,6 @@ cp_parser_asm_operand_list (cp_parser* parser)
>   
>     while (true)
>       {
> -      tree string_literal;
> -      tree expression;
>         tree name;
>   
>         if (cp_lexer_next_token_is (parser->lexer, CPP_OPEN_SQUARE))
> @@ -28554,13 +28592,15 @@ cp_parser_asm_operand_list (cp_parser* parser)
>         else
>   	name = NULL_TREE;
>         /* Look for the string-literal.  */
> -      string_literal = cp_parser_string_literal (parser, false, false);
> +      tree string_literal = cp_parser_string_literal (parser,
> +						      /*translate=*/false,
> +						      /*wide_ok=*/false);
>   
>         /* Look for the `('.  */
>         matching_parens parens;
>         parens.require_open (parser);
>         /* Parse the expression.  */
> -      expression = cp_parser_expression (parser);
> +      tree expression = cp_parser_expression (parser);
>         /* Look for the `)'.  */
>         parens.require_close (parser);
>   
> @@ -28600,10 +28640,10 @@ cp_parser_asm_clobber_list (cp_parser* parser)
>   
>     while (true)
>       {
> -      tree string_literal;
> -
>         /* Look for the string literal.  */
> -      string_literal = cp_parser_string_literal (parser, false, false);
> +      tree string_literal = cp_parser_string_literal (parser,
> +						      /*translate=*/false,
> +						      /*wide_ok=*/false);
>         /* Add it to the list.  */
>         clobbers = tree_cons (NULL_TREE, string_literal, clobbers);
>         /* If the next token is not a `,', then the list is
> @@ -45763,7 +45803,9 @@ cp_parser_omp_context_selector (cp_parser *parser, tree set, bool has_parms_p)
>   		      cp_lexer_consume_token (parser->lexer);
>   		    }
>   		  else if (cp_lexer_next_token_is (parser->lexer, CPP_STRING))
> -		    value = cp_parser_string_literal (parser, false, false);
> +		    value = cp_parser_string_literal (parser,
> +						      /*translate=*/false,
> +						      /*wide_ok=*/false);
>   		  else
>   		    {
>   		      cp_parser_error (parser, "expected identifier or "
> @@ -48785,7 +48827,8 @@ pragma_lex (tree *value, location_t *loc)
>     if (ret == CPP_PRAGMA_EOL)
>       ret = CPP_EOF;
>     else if (ret == CPP_STRING)
> -    *value = cp_parser_string_literal (the_parser, false, false);
> +    *value = cp_parser_string_literal (the_parser, /*translate=*/false,
> +				       /*wide_ok=*/false);
>     else
>       {
>         if (ret == CPP_KEYWORD)
> diff --git a/gcc/testsuite/g++.dg/cpp0x/udlit-error1.C b/gcc/testsuite/g++.dg/cpp0x/udlit-error1.C
> new file mode 100644
> index 00000000000..66e300e350f
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/cpp0x/udlit-error1.C
> @@ -0,0 +1,21 @@
> +// PR c++/105300
> +// { dg-do compile { target c++11 } }
> +
> +void operator""_x(const char *, decltype(sizeof(0)));
> +
> +#include ""_x		  // { dg-error "include expects" }
> +#line ""_x		  // { dg-error "not a positive integer" }
> +#if __has_include(""_x)	  // { dg-error "requires a header-name" }
> +#endif
> +
> +#pragma message "hi"_x	  // { dg-warning "string literal with user-defined suffix is invalid in this context" }
> +
> +extern "C"_x { void g(); } // { dg-error "before user-defined string literal" }
> +static_assert(true, "foo"_x); // { dg-error "string literal with user-defined suffix is invalid in this context|expected" }
> +
> +[[deprecated("oof"_x)]]
> +void
> +lol () // { dg-error "not a string" }
> +{
> +  asm (""_x); // { dg-error "string literal with user-defined suffix is invalid in this context" }
> +}
> 
> base-commit: 1c4a7881c492790eaad9aec1bcc2c1370f41740f


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH v3] c++: Reject UDLs in certain contexts [PR105300]
  2022-11-19  1:39             ` Jason Merrill
@ 2022-12-02 23:58               ` Marek Polacek
  2022-12-03 19:58                 ` Jason Merrill
  0 siblings, 1 reply; 13+ messages in thread
From: Marek Polacek @ 2022-12-02 23:58 UTC (permalink / raw)
  To: Jason Merrill; +Cc: GCC Patches

On Fri, Nov 18, 2022 at 08:39:10PM -0500, Jason Merrill wrote:
> On 11/18/22 18:52, Marek Polacek wrote:
> > +/* Parse a string literal or user defined string literal.
> > +
> > +   user-defined-string-literal :
> > +     string-literal ud-suffix
> > +
> > +   Parameters as for cp_parser_string_literal.  If LOOKUP_UDLIT, perform
> > +   a lookup for a suitable template function.  */
> > +
> > +static inline cp_expr
> > +cp_parser_userdef_string_literal (cp_parser *parser, bool translate,
> > +				  bool wide_ok, bool lookup_udlit = true)
> 
> I think this function doesn't need the translate and wide_ok parms, they can
> always be true.

I've dropped the wide_ok one, but not the other, because...
 
> > +{
> > +  return cp_parser_string_literal_common (parser, translate, wide_ok,
> > +					  /*udl_ok=*/true, lookup_udlit);
> > +}
> > +
> >   /* Look up a literal operator with the name and the exact arguments.  */
> >   static tree
> > @@ -4913,7 +4955,7 @@ cp_parser_userdef_numeric_literal (cp_parser *parser)
> >      as arguments.  */
> >   static tree
> > -cp_parser_userdef_string_literal (tree literal)
> > +finish_userdef_string_literal (tree literal)
> >   {
> >     tree suffix_id = USERDEF_LITERAL_SUFFIX_ID (literal);
> >     tree name = cp_literal_operator_id (IDENTIFIER_POINTER (suffix_id));
> > @@ -5652,10 +5694,10 @@ cp_parser_primary_expression (cp_parser *parser,
> >       case CPP_UTF8STRING_USERDEF:
> >         /* ??? Should wide strings be allowed when parser->translate_strings_p
> >   	 is false (i.e. in attributes)?  If not, we can kill the third
> > -	 argument to cp_parser_string_literal.  */
> 
> I think the answer to this old question is no: if we have an
> encoding-prefix, we should be translating.

...I don't actually know how to resolve this.  wide_ok is always true here.
Should that change?  Or rather, should translate be false for CPP_STRING only?

> > -      return (cp_parser_string_literal (parser,
> > -					parser->translate_strings_p,
> > -					true)
> > +	 argument to cp_parser_{,userdef}string_literal.  */
> > +      return (cp_parser_userdef_string_literal (parser,
> > +						parser->translate_strings_p,
> > +						/*wide_ok=*/true)
> 
> For CPP_*STRING* without _USERDEF, we should still call
> cp_parser_string_literal.

It looks like we always have to call cp_parser_userdef_string_literal
otherwise this would be reejcted:

  std::string concat01 = "Hello, " "World!"_www;

Because first we see a CPP_STRING but the subsequent UDL shouldn't
be rejected.

So here's an updated version which drops the always-true parameter but
doesn't resolve the old question.

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

-- >8 --
In this PR, we are crashing because we've encountered a UDL where a
string-literal is expected.  This patch makes the parser reject string
and character UDLs in all places where the grammar requires a
string-literal and not a user-defined-string-literal.

I've introduced two new wrappers; the existing cp_parser_string_literal
was renamed to cp_parser_string_literal_common and should not be called
directly.  finish_userdef_string_literal is renamed from
cp_parser_userdef_string_literal.

	PR c++/105300

gcc/c-family/ChangeLog:

	* c-pragma.cc (handle_pragma_message): Warn for CPP_STRING_USERDEF.

gcc/cp/ChangeLog:

	* parser.cc: Remove unnecessary forward declarations.
	(cp_parser_string_literal): New wrapper.
	(cp_parser_string_literal_common): Renamed from
	cp_parser_string_literal.  Add a bool parameter.  Give an error when
	UDLs are not permitted.
	(cp_parser_userdef_string_literal): New wrapper.
	(finish_userdef_string_literal): Renamed from
	cp_parser_userdef_string_literal.
	(cp_parser_primary_expression): Call cp_parser_userdef_string_literal
	instead of cp_parser_string_literal.
	(cp_parser_linkage_specification): Move a variable declaration closer
	to its first use.
	(cp_parser_static_assert): Likewise.
	(cp_parser_operator): Call cp_parser_userdef_string_literal instead of
	cp_parser_string_literal.
	(cp_parser_asm_definition): Move a variable declaration closer to its
	first use.
	(cp_parser_asm_specification_opt): Move variable declarations closer to
	their first use.
	(cp_parser_asm_operand_list): Likewise.
	(cp_parser_asm_clobber_list): Likewise.

gcc/testsuite/ChangeLog:

	* g++.dg/cpp0x/udlit-error1.C: New test.
---
 gcc/c-family/c-pragma.cc                  |   3 +
 gcc/cp/parser.cc                          | 128 ++++++++++++++--------
 gcc/testsuite/g++.dg/cpp0x/udlit-error1.C |  21 ++++
 3 files changed, 109 insertions(+), 43 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp0x/udlit-error1.C

diff --git a/gcc/c-family/c-pragma.cc b/gcc/c-family/c-pragma.cc
index 142a46441ac..49f405b605b 100644
--- a/gcc/c-family/c-pragma.cc
+++ b/gcc/c-family/c-pragma.cc
@@ -1390,6 +1390,9 @@ handle_pragma_message (cpp_reader *)
     }
   else if (token == CPP_STRING)
     message = x;
+  else if (token == CPP_STRING_USERDEF)
+    GCC_BAD ("string literal with user-defined suffix is invalid in this "
+	     "context");
   else
     GCC_BAD ("expected a string after %<#pragma message%>");
 
diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc
index e8a50904243..de3eff90871 100644
--- a/gcc/cp/parser.cc
+++ b/gcc/cp/parser.cc
@@ -2227,16 +2227,8 @@ pop_unparsed_function_queues (cp_parser *parser)
 
 /* Lexical conventions [gram.lex]  */
 
-static cp_expr cp_parser_identifier
-  (cp_parser *);
-static cp_expr cp_parser_string_literal
-  (cp_parser *, bool, bool, bool);
-static cp_expr cp_parser_userdef_char_literal
-  (cp_parser *);
-static tree cp_parser_userdef_string_literal
+static tree finish_userdef_string_literal
   (tree);
-static cp_expr cp_parser_userdef_numeric_literal
-  (cp_parser *);
 
 /* Basic concepts [gram.basic]  */
 
@@ -4408,11 +4400,15 @@ cp_parser_identifier (cp_parser* parser)
     return error_mark_node;
 }
 
-/* Parse a sequence of adjacent string constants.  Returns a
+/* Worker for cp_parser_string_literal and cp_parser_userdef_string_literal.
+   Do not call this directly; use either of the above.
+
+   Parse a sequence of adjacent string constants.  Return a
    TREE_STRING representing the combined, nul-terminated string
    constant.  If TRANSLATE is true, translate the string to the
    execution character set.  If WIDE_OK is true, a wide string is
-   invalid here.
+   valid here.  If UDL_OK is true, a string literal with user-defined
+   suffix can be used in this context.
 
    C++98 [lex.string] says that if a narrow string literal token is
    adjacent to a wide string literal token, the behavior is undefined.
@@ -4422,9 +4418,11 @@ cp_parser_identifier (cp_parser* parser)
    This code is largely lifted from lex_string() in c-lex.cc.
 
    FUTURE: ObjC++ will need to handle @-strings here.  */
+
 static cp_expr
-cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
-			  bool lookup_udlit = true)
+cp_parser_string_literal_common (cp_parser *parser, bool translate,
+				 bool wide_ok, bool udl_ok,
+				 bool lookup_udlit)
 {
   tree value;
   size_t count;
@@ -4449,6 +4447,12 @@ cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
 
   if (cpp_userdef_string_p (tok->type))
     {
+      if (!udl_ok)
+	{
+	  error_at (loc, "string literal with user-defined suffix "
+		    "is invalid in this context");
+	  return error_mark_node;
+	}
       string_tree = USERDEF_LITERAL_VALUE (tok->u.value);
       curr_type = cpp_userdef_string_remove_type (tok->type);
       curr_tok_is_userdef_p = true;
@@ -4539,6 +4543,12 @@ cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
 	  tok = cp_lexer_peek_token (parser->lexer);
 	  if (cpp_userdef_string_p (tok->type))
 	    {
+	      if (!udl_ok)
+		{
+		  error_at (loc, "string literal with user-defined suffix "
+			    "is invalid in this context");
+		  return error_mark_node;
+		}
 	      string_tree = USERDEF_LITERAL_VALUE (tok->u.value);
 	      curr_type = cpp_userdef_string_remove_type (tok->type);
 	      curr_tok_is_userdef_p = true;
@@ -4608,7 +4618,7 @@ cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
 	  tree literal = build_userdef_literal (suffix_id, value,
 						OT_NONE, NULL_TREE);
 	  if (lookup_udlit)
-	    value = cp_parser_userdef_string_literal (literal);
+	    value = finish_userdef_string_literal (literal);
 	  else
 	    value = literal;
 	}
@@ -4626,6 +4636,39 @@ cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
   return cp_expr (value, loc);
 }
 
+/* Parse a sequence of adjacent string constants.  Return a TREE_STRING
+   representing the combined, nul-terminated string constant.  If
+   TRANSLATE is true, translate the string to the execution character set.
+   If WIDE_OK is true, a wide string is valid here.
+
+   This function issues an error if a user defined string literal is
+   encountered; use cp_parser_userdef_string_literal if UDLs are allowed.  */
+
+static inline cp_expr
+cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok)
+{
+  return cp_parser_string_literal_common (parser, translate, wide_ok,
+					  /*udl_ok=*/false,
+					  /*lookup_udlit=*/false);
+}
+
+/* Parse a string literal or user defined string literal.
+
+   user-defined-string-literal :
+     string-literal ud-suffix
+
+   If TRANSLATE is true, translate the string to the execution character set.
+   If LOOKUP_UDLIT, perform a lookup for a suitable template function.  */
+
+static inline cp_expr
+cp_parser_userdef_string_literal (cp_parser *parser, bool translate,
+				  bool lookup_udlit = true)
+{
+  return cp_parser_string_literal_common (parser, translate,
+					  /*wide_ok=*/true, /*udl_ok=*/true,
+					  lookup_udlit);
+}
+
 /* Look up a literal operator with the name and the exact arguments.  */
 
 static tree
@@ -4923,7 +4966,7 @@ cp_parser_userdef_numeric_literal (cp_parser *parser)
    as arguments.  */
 
 static tree
-cp_parser_userdef_string_literal (tree literal)
+finish_userdef_string_literal (tree literal)
 {
   tree suffix_id = USERDEF_LITERAL_SUFFIX_ID (literal);
   tree name = cp_literal_operator_id (IDENTIFIER_POINTER (suffix_id));
@@ -5663,9 +5706,8 @@ cp_parser_primary_expression (cp_parser *parser,
       /* ??? Should wide strings be allowed when parser->translate_strings_p
 	 is false (i.e. in attributes)?  If not, we can kill the third
 	 argument to cp_parser_string_literal.  */
-      return (cp_parser_string_literal (parser,
-					parser->translate_strings_p,
-					true)
+      return (cp_parser_userdef_string_literal (parser,
+						parser->translate_strings_p)
 	      .maybe_add_location_wrapper ());
 
     case CPP_OPEN_PAREN:
@@ -16219,15 +16261,14 @@ cp_parser_function_specifier_opt (cp_parser* parser,
 static void
 cp_parser_linkage_specification (cp_parser* parser, tree prefix_attr)
 {
-  tree linkage;
-
   /* Look for the `extern' keyword.  */
   cp_token *extern_token
     = cp_parser_require_keyword (parser, RID_EXTERN, RT_EXTERN);
 
   /* Look for the string-literal.  */
   cp_token *string_token = cp_lexer_peek_token (parser->lexer);
-  linkage = cp_parser_string_literal (parser, false, false);
+  tree linkage = cp_parser_string_literal (parser, /*translate=*/false,
+					   /*wide_ok=*/false);
 
   /* Transform the literal into an identifier.  If the literal is a
      wide-character string, or contains embedded NULs, then we can't
@@ -16357,9 +16398,8 @@ cp_parser_static_assert(cp_parser *parser, bool member_p)
       cp_parser_require (parser, CPP_COMMA, RT_COMMA);
 
       /* Parse the string-literal message.  */
-      message = cp_parser_string_literal (parser,
-                                	  /*translate=*/false,
-                                	  /*wide_ok=*/true);
+      message = cp_parser_string_literal (parser, /*translate=*/false,
+					  /*wide_ok=*/true);
 
       /* A `)' completes the static assertion.  */
       if (!parens.require_close (parser))
@@ -17407,7 +17447,6 @@ cp_parser_operator (cp_parser* parser, location_t start_loc)
     case CPP_STRING16_USERDEF:
     case CPP_STRING32_USERDEF:
       {
-	cp_expr str;
 	tree string_tree;
 	int sz, len;
 
@@ -17415,8 +17454,9 @@ cp_parser_operator (cp_parser* parser, location_t start_loc)
 	  maybe_warn_cpp0x (CPP0X_USER_DEFINED_LITERALS);
 
 	/* Consume the string.  */
-	str = cp_parser_string_literal (parser, /*translate=*/true,
-				      /*wide_ok=*/true, /*lookup_udlit=*/false);
+	cp_expr str = cp_parser_userdef_string_literal (parser,
+							/*translate=*/true,
+							/*lookup_udlit=*/false);
 	if (str == error_mark_node)
 	  return error_mark_node;
 	else if (TREE_CODE (str) == USERDEF_LITERAL)
@@ -22033,7 +22073,6 @@ cp_parser_using_directive (cp_parser* parser)
 static void
 cp_parser_asm_definition (cp_parser* parser)
 {
-  tree string;
   tree outputs = NULL_TREE;
   tree inputs = NULL_TREE;
   tree clobbers = NULL_TREE;
@@ -22141,7 +22180,8 @@ cp_parser_asm_definition (cp_parser* parser)
   if (!cp_parser_require (parser, CPP_OPEN_PAREN, RT_OPEN_PAREN))
     return;
   /* Look for the string.  */
-  string = cp_parser_string_literal (parser, false, false);
+  tree string = cp_parser_string_literal (parser, /*translate=*/false,
+					  /*wide_ok=*/false);
   if (string == error_mark_node)
     {
       cp_parser_skip_to_closing_parenthesis (parser, true, false,
@@ -28604,11 +28644,8 @@ cp_parser_yield_expression (cp_parser* parser)
 static tree
 cp_parser_asm_specification_opt (cp_parser* parser)
 {
-  cp_token *token;
-  tree asm_specification;
-
   /* Peek at the next token.  */
-  token = cp_lexer_peek_token (parser->lexer);
+  cp_token *token = cp_lexer_peek_token (parser->lexer);
   /* If the next token isn't the `asm' keyword, then there's no
      asm-specification.  */
   if (!cp_parser_is_keyword (token, RID_ASM))
@@ -28621,7 +28658,9 @@ cp_parser_asm_specification_opt (cp_parser* parser)
   parens.require_open (parser);
 
   /* Look for the string-literal.  */
-  asm_specification = cp_parser_string_literal (parser, false, false);
+  tree asm_specification = cp_parser_string_literal (parser,
+						     /*translate=*/false,
+						     /*wide_ok=*/false);
 
   /* Look for the `)'.  */
   parens.require_close (parser);
@@ -28654,8 +28693,6 @@ cp_parser_asm_operand_list (cp_parser* parser)
 
   while (true)
     {
-      tree string_literal;
-      tree expression;
       tree name;
 
       if (cp_lexer_next_token_is (parser->lexer, CPP_OPEN_SQUARE))
@@ -28673,13 +28710,15 @@ cp_parser_asm_operand_list (cp_parser* parser)
       else
 	name = NULL_TREE;
       /* Look for the string-literal.  */
-      string_literal = cp_parser_string_literal (parser, false, false);
+      tree string_literal = cp_parser_string_literal (parser,
+						      /*translate=*/false,
+						      /*wide_ok=*/false);
 
       /* Look for the `('.  */
       matching_parens parens;
       parens.require_open (parser);
       /* Parse the expression.  */
-      expression = cp_parser_expression (parser);
+      tree expression = cp_parser_expression (parser);
       /* Look for the `)'.  */
       parens.require_close (parser);
 
@@ -28719,10 +28758,10 @@ cp_parser_asm_clobber_list (cp_parser* parser)
 
   while (true)
     {
-      tree string_literal;
-
       /* Look for the string literal.  */
-      string_literal = cp_parser_string_literal (parser, false, false);
+      tree string_literal = cp_parser_string_literal (parser,
+						      /*translate=*/false,
+						      /*wide_ok=*/false);
       /* Add it to the list.  */
       clobbers = tree_cons (NULL_TREE, string_literal, clobbers);
       /* If the next token is not a `,', then the list is
@@ -46294,7 +46333,9 @@ cp_parser_omp_context_selector (cp_parser *parser, tree set, bool has_parms_p)
 		      cp_lexer_consume_token (parser->lexer);
 		    }
 		  else if (cp_lexer_next_token_is (parser->lexer, CPP_STRING))
-		    value = cp_parser_string_literal (parser, false, false);
+		    value = cp_parser_string_literal (parser,
+						      /*translate=*/false,
+						      /*wide_ok=*/false);
 		  else
 		    {
 		      cp_parser_error (parser, "expected identifier or "
@@ -49316,7 +49357,8 @@ pragma_lex (tree *value, location_t *loc)
   if (ret == CPP_PRAGMA_EOL)
     ret = CPP_EOF;
   else if (ret == CPP_STRING)
-    *value = cp_parser_string_literal (the_parser, false, false);
+    *value = cp_parser_string_literal (the_parser, /*translate=*/false,
+				       /*wide_ok=*/false);
   else
     {
       if (ret == CPP_KEYWORD)
diff --git a/gcc/testsuite/g++.dg/cpp0x/udlit-error1.C b/gcc/testsuite/g++.dg/cpp0x/udlit-error1.C
new file mode 100644
index 00000000000..66e300e350f
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/udlit-error1.C
@@ -0,0 +1,21 @@
+// PR c++/105300
+// { dg-do compile { target c++11 } }
+
+void operator""_x(const char *, decltype(sizeof(0)));
+
+#include ""_x		  // { dg-error "include expects" }
+#line ""_x		  // { dg-error "not a positive integer" }
+#if __has_include(""_x)	  // { dg-error "requires a header-name" }
+#endif
+
+#pragma message "hi"_x	  // { dg-warning "string literal with user-defined suffix is invalid in this context" }
+
+extern "C"_x { void g(); } // { dg-error "before user-defined string literal" }
+static_assert(true, "foo"_x); // { dg-error "string literal with user-defined suffix is invalid in this context|expected" }
+
+[[deprecated("oof"_x)]]
+void
+lol () // { dg-error "not a string" }
+{
+  asm (""_x); // { dg-error "string literal with user-defined suffix is invalid in this context" }
+}

base-commit: 36a4ee406b95ae24a59b8b3f8ebe29af6fd5261e
-- 
2.38.1


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v3] c++: Reject UDLs in certain contexts [PR105300]
  2022-12-02 23:58               ` [PATCH v3] " Marek Polacek
@ 2022-12-03 19:58                 ` Jason Merrill
  2023-01-13 23:22                   ` [PATCH v4] " Marek Polacek
  0 siblings, 1 reply; 13+ messages in thread
From: Jason Merrill @ 2022-12-03 19:58 UTC (permalink / raw)
  To: Marek Polacek; +Cc: GCC Patches

On 12/2/22 18:58, Marek Polacek wrote:
> On Fri, Nov 18, 2022 at 08:39:10PM -0500, Jason Merrill wrote:
>> On 11/18/22 18:52, Marek Polacek wrote:
>>> +/* Parse a string literal or user defined string literal.
>>> +
>>> +   user-defined-string-literal :
>>> +     string-literal ud-suffix
>>> +
>>> +   Parameters as for cp_parser_string_literal.  If LOOKUP_UDLIT, perform
>>> +   a lookup for a suitable template function.  */
>>> +
>>> +static inline cp_expr
>>> +cp_parser_userdef_string_literal (cp_parser *parser, bool translate,
>>> +				  bool wide_ok, bool lookup_udlit = true)
>>
>> I think this function doesn't need the translate and wide_ok parms, they can
>> always be true.
> 
> I've dropped the wide_ok one, but not the other, because...
>   
>>> +{
>>> +  return cp_parser_string_literal_common (parser, translate, wide_ok,
>>> +					  /*udl_ok=*/true, lookup_udlit);
>>> +}
>>> +
>>>    /* Look up a literal operator with the name and the exact arguments.  */
>>>    static tree
>>> @@ -4913,7 +4955,7 @@ cp_parser_userdef_numeric_literal (cp_parser *parser)
>>>       as arguments.  */
>>>    static tree
>>> -cp_parser_userdef_string_literal (tree literal)
>>> +finish_userdef_string_literal (tree literal)
>>>    {
>>>      tree suffix_id = USERDEF_LITERAL_SUFFIX_ID (literal);
>>>      tree name = cp_literal_operator_id (IDENTIFIER_POINTER (suffix_id));
>>> @@ -5652,10 +5694,10 @@ cp_parser_primary_expression (cp_parser *parser,
>>>        case CPP_UTF8STRING_USERDEF:
>>>          /* ??? Should wide strings be allowed when parser->translate_strings_p
>>>    	 is false (i.e. in attributes)?  If not, we can kill the third
>>> -	 argument to cp_parser_string_literal.  */
>>
>> I think the answer to this old question is no: if we have an
>> encoding-prefix, we should be translating.
> 
> ...I don't actually know how to resolve this.  wide_ok is always true here.
> Should that change?  Or rather, should translate be false for CPP_STRING only?

The one current exception to my assertion above is static_assert, for 
which we currently allow encoding-prefixes but don't translate.  I think 
this is wrong, that we should translate the string.  But I'm not 
confident of that.

But to your question, yes: when translate is false, I think we also 
don't want to allow UDLs.  So _userdef can always pass true for 
translate.  And as below we should call it only when translate would be 
true.

Incidentally, it seems that we set translate off for all attributes, 
even ones that would take a normal expression argument where presumably 
we do want translation (and UDLs).  The whole business of different 
parsing for different attributes is a headache.  You don't need to deal 
with this now.

>>> -      return (cp_parser_string_literal (parser,
>>> -					parser->translate_strings_p,
>>> -					true)
>>> +	 argument to cp_parser_{,userdef}string_literal.  */
>>> +      return (cp_parser_userdef_string_literal (parser,
>>> +						parser->translate_strings_p,
>>> +						/*wide_ok=*/true)
>>
>> For CPP_*STRING* without _USERDEF, we should still call
>> cp_parser_string_literal.
> 
> It looks like we always have to call cp_parser_userdef_string_literal
> otherwise this would be reejcted:
> 
>    std::string concat01 = "Hello, " "World!"_www;
> 
> Because first we see a CPP_STRING but the subsequent UDL shouldn't
> be rejected.

Ah, I didn't notice the function was handling a sequence of 
string-literals.  So maybe we want to call _userdef here when 
translate_strings_p, and not when it's false.

> So here's an updated version which drops the always-true parameter but
> doesn't resolve the old question.
> 
> Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?
> 
> -- >8 --
> In this PR, we are crashing because we've encountered a UDL where a
> string-literal is expected.  This patch makes the parser reject string
> and character UDLs in all places where the grammar requires a
> string-literal and not a user-defined-string-literal.
> 
> I've introduced two new wrappers; the existing cp_parser_string_literal
> was renamed to cp_parser_string_literal_common and should not be called
> directly.  finish_userdef_string_literal is renamed from
> cp_parser_userdef_string_literal.
> 
> 	PR c++/105300
> 
> gcc/c-family/ChangeLog:
> 
> 	* c-pragma.cc (handle_pragma_message): Warn for CPP_STRING_USERDEF.
> 
> gcc/cp/ChangeLog:
> 
> 	* parser.cc: Remove unnecessary forward declarations.
> 	(cp_parser_string_literal): New wrapper.
> 	(cp_parser_string_literal_common): Renamed from
> 	cp_parser_string_literal.  Add a bool parameter.  Give an error when
> 	UDLs are not permitted.
> 	(cp_parser_userdef_string_literal): New wrapper.
> 	(finish_userdef_string_literal): Renamed from
> 	cp_parser_userdef_string_literal.
> 	(cp_parser_primary_expression): Call cp_parser_userdef_string_literal
> 	instead of cp_parser_string_literal.
> 	(cp_parser_linkage_specification): Move a variable declaration closer
> 	to its first use.
> 	(cp_parser_static_assert): Likewise.
> 	(cp_parser_operator): Call cp_parser_userdef_string_literal instead of
> 	cp_parser_string_literal.
> 	(cp_parser_asm_definition): Move a variable declaration closer to its
> 	first use.
> 	(cp_parser_asm_specification_opt): Move variable declarations closer to
> 	their first use.
> 	(cp_parser_asm_operand_list): Likewise.
> 	(cp_parser_asm_clobber_list): Likewise.
> 
> gcc/testsuite/ChangeLog:
> 
> 	* g++.dg/cpp0x/udlit-error1.C: New test.
> ---
>   gcc/c-family/c-pragma.cc                  |   3 +
>   gcc/cp/parser.cc                          | 128 ++++++++++++++--------
>   gcc/testsuite/g++.dg/cpp0x/udlit-error1.C |  21 ++++
>   3 files changed, 109 insertions(+), 43 deletions(-)
>   create mode 100644 gcc/testsuite/g++.dg/cpp0x/udlit-error1.C
> 
> diff --git a/gcc/c-family/c-pragma.cc b/gcc/c-family/c-pragma.cc
> index 142a46441ac..49f405b605b 100644
> --- a/gcc/c-family/c-pragma.cc
> +++ b/gcc/c-family/c-pragma.cc
> @@ -1390,6 +1390,9 @@ handle_pragma_message (cpp_reader *)
>       }
>     else if (token == CPP_STRING)
>       message = x;
> +  else if (token == CPP_STRING_USERDEF)
> +    GCC_BAD ("string literal with user-defined suffix is invalid in this "
> +	     "context");
>     else
>       GCC_BAD ("expected a string after %<#pragma message%>");
>   
> diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc
> index e8a50904243..de3eff90871 100644
> --- a/gcc/cp/parser.cc
> +++ b/gcc/cp/parser.cc
> @@ -2227,16 +2227,8 @@ pop_unparsed_function_queues (cp_parser *parser)
>   
>   /* Lexical conventions [gram.lex]  */
>   
> -static cp_expr cp_parser_identifier
> -  (cp_parser *);
> -static cp_expr cp_parser_string_literal
> -  (cp_parser *, bool, bool, bool);
> -static cp_expr cp_parser_userdef_char_literal
> -  (cp_parser *);
> -static tree cp_parser_userdef_string_literal
> +static tree finish_userdef_string_literal
>     (tree);
> -static cp_expr cp_parser_userdef_numeric_literal
> -  (cp_parser *);
>   
>   /* Basic concepts [gram.basic]  */
>   
> @@ -4408,11 +4400,15 @@ cp_parser_identifier (cp_parser* parser)
>       return error_mark_node;
>   }
>   
> -/* Parse a sequence of adjacent string constants.  Returns a
> +/* Worker for cp_parser_string_literal and cp_parser_userdef_string_literal.
> +   Do not call this directly; use either of the above.
> +
> +   Parse a sequence of adjacent string constants.  Return a
>      TREE_STRING representing the combined, nul-terminated string
>      constant.  If TRANSLATE is true, translate the string to the
>      execution character set.  If WIDE_OK is true, a wide string is
> -   invalid here.
> +   valid here.  If UDL_OK is true, a string literal with user-defined
> +   suffix can be used in this context.
>   
>      C++98 [lex.string] says that if a narrow string literal token is
>      adjacent to a wide string literal token, the behavior is undefined.
> @@ -4422,9 +4418,11 @@ cp_parser_identifier (cp_parser* parser)
>      This code is largely lifted from lex_string() in c-lex.cc.
>   
>      FUTURE: ObjC++ will need to handle @-strings here.  */
> +
>   static cp_expr
> -cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
> -			  bool lookup_udlit = true)
> +cp_parser_string_literal_common (cp_parser *parser, bool translate,
> +				 bool wide_ok, bool udl_ok,
> +				 bool lookup_udlit)
>   {
>     tree value;
>     size_t count;
> @@ -4449,6 +4447,12 @@ cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
>   
>     if (cpp_userdef_string_p (tok->type))
>       {
> +      if (!udl_ok)
> +	{
> +	  error_at (loc, "string literal with user-defined suffix "
> +		    "is invalid in this context");
> +	  return error_mark_node;
> +	}
>         string_tree = USERDEF_LITERAL_VALUE (tok->u.value);
>         curr_type = cpp_userdef_string_remove_type (tok->type);
>         curr_tok_is_userdef_p = true;
> @@ -4539,6 +4543,12 @@ cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
>   	  tok = cp_lexer_peek_token (parser->lexer);
>   	  if (cpp_userdef_string_p (tok->type))
>   	    {
> +	      if (!udl_ok)
> +		{
> +		  error_at (loc, "string literal with user-defined suffix "
> +			    "is invalid in this context");
> +		  return error_mark_node;
> +		}
>   	      string_tree = USERDEF_LITERAL_VALUE (tok->u.value);
>   	      curr_type = cpp_userdef_string_remove_type (tok->type);
>   	      curr_tok_is_userdef_p = true;
> @@ -4608,7 +4618,7 @@ cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
>   	  tree literal = build_userdef_literal (suffix_id, value,
>   						OT_NONE, NULL_TREE);
>   	  if (lookup_udlit)
> -	    value = cp_parser_userdef_string_literal (literal);
> +	    value = finish_userdef_string_literal (literal);
>   	  else
>   	    value = literal;
>   	}
> @@ -4626,6 +4636,39 @@ cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
>     return cp_expr (value, loc);
>   }
>   
> +/* Parse a sequence of adjacent string constants.  Return a TREE_STRING
> +   representing the combined, nul-terminated string constant.  If
> +   TRANSLATE is true, translate the string to the execution character set.
> +   If WIDE_OK is true, a wide string is valid here.
> +
> +   This function issues an error if a user defined string literal is
> +   encountered; use cp_parser_userdef_string_literal if UDLs are allowed.  */
> +
> +static inline cp_expr
> +cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok)
> +{
> +  return cp_parser_string_literal_common (parser, translate, wide_ok,
> +					  /*udl_ok=*/false,
> +					  /*lookup_udlit=*/false);
> +}
> +
> +/* Parse a string literal or user defined string literal.
> +
> +   user-defined-string-literal :
> +     string-literal ud-suffix
> +
> +   If TRANSLATE is true, translate the string to the execution character set.
> +   If LOOKUP_UDLIT, perform a lookup for a suitable template function.  */
> +
> +static inline cp_expr
> +cp_parser_userdef_string_literal (cp_parser *parser, bool translate,
> +				  bool lookup_udlit = true)
> +{
> +  return cp_parser_string_literal_common (parser, translate,
> +					  /*wide_ok=*/true, /*udl_ok=*/true,
> +					  lookup_udlit);
> +}
> +
>   /* Look up a literal operator with the name and the exact arguments.  */
>   
>   static tree
> @@ -4923,7 +4966,7 @@ cp_parser_userdef_numeric_literal (cp_parser *parser)
>      as arguments.  */
>   
>   static tree
> -cp_parser_userdef_string_literal (tree literal)
> +finish_userdef_string_literal (tree literal)
>   {
>     tree suffix_id = USERDEF_LITERAL_SUFFIX_ID (literal);
>     tree name = cp_literal_operator_id (IDENTIFIER_POINTER (suffix_id));
> @@ -5663,9 +5706,8 @@ cp_parser_primary_expression (cp_parser *parser,
>         /* ??? Should wide strings be allowed when parser->translate_strings_p
>   	 is false (i.e. in attributes)?  If not, we can kill the third
>   	 argument to cp_parser_string_literal.  */
> -      return (cp_parser_string_literal (parser,
> -					parser->translate_strings_p,
> -					true)
> +      return (cp_parser_userdef_string_literal (parser,
> +						parser->translate_strings_p)
>   	      .maybe_add_location_wrapper ());
>   
>       case CPP_OPEN_PAREN:
> @@ -16219,15 +16261,14 @@ cp_parser_function_specifier_opt (cp_parser* parser,
>   static void
>   cp_parser_linkage_specification (cp_parser* parser, tree prefix_attr)
>   {
> -  tree linkage;
> -
>     /* Look for the `extern' keyword.  */
>     cp_token *extern_token
>       = cp_parser_require_keyword (parser, RID_EXTERN, RT_EXTERN);
>   
>     /* Look for the string-literal.  */
>     cp_token *string_token = cp_lexer_peek_token (parser->lexer);
> -  linkage = cp_parser_string_literal (parser, false, false);
> +  tree linkage = cp_parser_string_literal (parser, /*translate=*/false,
> +					   /*wide_ok=*/false);
>   
>     /* Transform the literal into an identifier.  If the literal is a
>        wide-character string, or contains embedded NULs, then we can't
> @@ -16357,9 +16398,8 @@ cp_parser_static_assert(cp_parser *parser, bool member_p)
>         cp_parser_require (parser, CPP_COMMA, RT_COMMA);
>   
>         /* Parse the string-literal message.  */
> -      message = cp_parser_string_literal (parser,
> -                                	  /*translate=*/false,
> -                                	  /*wide_ok=*/true);
> +      message = cp_parser_string_literal (parser, /*translate=*/false,
> +					  /*wide_ok=*/true);
>   
>         /* A `)' completes the static assertion.  */
>         if (!parens.require_close (parser))
> @@ -17407,7 +17447,6 @@ cp_parser_operator (cp_parser* parser, location_t start_loc)
>       case CPP_STRING16_USERDEF:
>       case CPP_STRING32_USERDEF:
>         {
> -	cp_expr str;
>   	tree string_tree;
>   	int sz, len;
>   
> @@ -17415,8 +17454,9 @@ cp_parser_operator (cp_parser* parser, location_t start_loc)
>   	  maybe_warn_cpp0x (CPP0X_USER_DEFINED_LITERALS);
>   
>   	/* Consume the string.  */
> -	str = cp_parser_string_literal (parser, /*translate=*/true,
> -				      /*wide_ok=*/true, /*lookup_udlit=*/false);
> +	cp_expr str = cp_parser_userdef_string_literal (parser,
> +							/*translate=*/true,
> +							/*lookup_udlit=*/false);
>   	if (str == error_mark_node)
>   	  return error_mark_node;
>   	else if (TREE_CODE (str) == USERDEF_LITERAL)
> @@ -22033,7 +22073,6 @@ cp_parser_using_directive (cp_parser* parser)
>   static void
>   cp_parser_asm_definition (cp_parser* parser)
>   {
> -  tree string;
>     tree outputs = NULL_TREE;
>     tree inputs = NULL_TREE;
>     tree clobbers = NULL_TREE;
> @@ -22141,7 +22180,8 @@ cp_parser_asm_definition (cp_parser* parser)
>     if (!cp_parser_require (parser, CPP_OPEN_PAREN, RT_OPEN_PAREN))
>       return;
>     /* Look for the string.  */
> -  string = cp_parser_string_literal (parser, false, false);
> +  tree string = cp_parser_string_literal (parser, /*translate=*/false,
> +					  /*wide_ok=*/false);
>     if (string == error_mark_node)
>       {
>         cp_parser_skip_to_closing_parenthesis (parser, true, false,
> @@ -28604,11 +28644,8 @@ cp_parser_yield_expression (cp_parser* parser)
>   static tree
>   cp_parser_asm_specification_opt (cp_parser* parser)
>   {
> -  cp_token *token;
> -  tree asm_specification;
> -
>     /* Peek at the next token.  */
> -  token = cp_lexer_peek_token (parser->lexer);
> +  cp_token *token = cp_lexer_peek_token (parser->lexer);
>     /* If the next token isn't the `asm' keyword, then there's no
>        asm-specification.  */
>     if (!cp_parser_is_keyword (token, RID_ASM))
> @@ -28621,7 +28658,9 @@ cp_parser_asm_specification_opt (cp_parser* parser)
>     parens.require_open (parser);
>   
>     /* Look for the string-literal.  */
> -  asm_specification = cp_parser_string_literal (parser, false, false);
> +  tree asm_specification = cp_parser_string_literal (parser,
> +						     /*translate=*/false,
> +						     /*wide_ok=*/false);
>   
>     /* Look for the `)'.  */
>     parens.require_close (parser);
> @@ -28654,8 +28693,6 @@ cp_parser_asm_operand_list (cp_parser* parser)
>   
>     while (true)
>       {
> -      tree string_literal;
> -      tree expression;
>         tree name;
>   
>         if (cp_lexer_next_token_is (parser->lexer, CPP_OPEN_SQUARE))
> @@ -28673,13 +28710,15 @@ cp_parser_asm_operand_list (cp_parser* parser)
>         else
>   	name = NULL_TREE;
>         /* Look for the string-literal.  */
> -      string_literal = cp_parser_string_literal (parser, false, false);
> +      tree string_literal = cp_parser_string_literal (parser,
> +						      /*translate=*/false,
> +						      /*wide_ok=*/false);
>   
>         /* Look for the `('.  */
>         matching_parens parens;
>         parens.require_open (parser);
>         /* Parse the expression.  */
> -      expression = cp_parser_expression (parser);
> +      tree expression = cp_parser_expression (parser);
>         /* Look for the `)'.  */
>         parens.require_close (parser);
>   
> @@ -28719,10 +28758,10 @@ cp_parser_asm_clobber_list (cp_parser* parser)
>   
>     while (true)
>       {
> -      tree string_literal;
> -
>         /* Look for the string literal.  */
> -      string_literal = cp_parser_string_literal (parser, false, false);
> +      tree string_literal = cp_parser_string_literal (parser,
> +						      /*translate=*/false,
> +						      /*wide_ok=*/false);
>         /* Add it to the list.  */
>         clobbers = tree_cons (NULL_TREE, string_literal, clobbers);
>         /* If the next token is not a `,', then the list is
> @@ -46294,7 +46333,9 @@ cp_parser_omp_context_selector (cp_parser *parser, tree set, bool has_parms_p)
>   		      cp_lexer_consume_token (parser->lexer);
>   		    }
>   		  else if (cp_lexer_next_token_is (parser->lexer, CPP_STRING))
> -		    value = cp_parser_string_literal (parser, false, false);
> +		    value = cp_parser_string_literal (parser,
> +						      /*translate=*/false,
> +						      /*wide_ok=*/false);
>   		  else
>   		    {
>   		      cp_parser_error (parser, "expected identifier or "
> @@ -49316,7 +49357,8 @@ pragma_lex (tree *value, location_t *loc)
>     if (ret == CPP_PRAGMA_EOL)
>       ret = CPP_EOF;
>     else if (ret == CPP_STRING)
> -    *value = cp_parser_string_literal (the_parser, false, false);
> +    *value = cp_parser_string_literal (the_parser, /*translate=*/false,
> +				       /*wide_ok=*/false);
>     else
>       {
>         if (ret == CPP_KEYWORD)
> diff --git a/gcc/testsuite/g++.dg/cpp0x/udlit-error1.C b/gcc/testsuite/g++.dg/cpp0x/udlit-error1.C
> new file mode 100644
> index 00000000000..66e300e350f
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/cpp0x/udlit-error1.C
> @@ -0,0 +1,21 @@
> +// PR c++/105300
> +// { dg-do compile { target c++11 } }
> +
> +void operator""_x(const char *, decltype(sizeof(0)));
> +
> +#include ""_x		  // { dg-error "include expects" }
> +#line ""_x		  // { dg-error "not a positive integer" }
> +#if __has_include(""_x)	  // { dg-error "requires a header-name" }
> +#endif
> +
> +#pragma message "hi"_x	  // { dg-warning "string literal with user-defined suffix is invalid in this context" }
> +
> +extern "C"_x { void g(); } // { dg-error "before user-defined string literal" }
> +static_assert(true, "foo"_x); // { dg-error "string literal with user-defined suffix is invalid in this context|expected" }
> +
> +[[deprecated("oof"_x)]]
> +void
> +lol () // { dg-error "not a string" }
> +{
> +  asm (""_x); // { dg-error "string literal with user-defined suffix is invalid in this context" }
> +}
> 
> base-commit: 36a4ee406b95ae24a59b8b3f8ebe29af6fd5261e


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH v4] c++: Reject UDLs in certain contexts [PR105300]
  2022-12-03 19:58                 ` Jason Merrill
@ 2023-01-13 23:22                   ` Marek Polacek
  2023-01-24 22:49                     ` Marek Polacek
  2023-01-25 19:36                     ` Jason Merrill
  0 siblings, 2 replies; 13+ messages in thread
From: Marek Polacek @ 2023-01-13 23:22 UTC (permalink / raw)
  To: Jason Merrill; +Cc: GCC Patches

On Sat, Dec 03, 2022 at 02:58:16PM -0500, Jason Merrill wrote:
> On 12/2/22 18:58, Marek Polacek wrote:
> > On Fri, Nov 18, 2022 at 08:39:10PM -0500, Jason Merrill wrote:
> > > On 11/18/22 18:52, Marek Polacek wrote:
> > > > +/* Parse a string literal or user defined string literal.
> > > > +
> > > > +   user-defined-string-literal :
> > > > +     string-literal ud-suffix
> > > > +
> > > > +   Parameters as for cp_parser_string_literal.  If LOOKUP_UDLIT, perform
> > > > +   a lookup for a suitable template function.  */
> > > > +
> > > > +static inline cp_expr
> > > > +cp_parser_userdef_string_literal (cp_parser *parser, bool translate,
> > > > +				  bool wide_ok, bool lookup_udlit = true)
> > > 
> > > I think this function doesn't need the translate and wide_ok parms, they can
> > > always be true.
> > 
> > I've dropped the wide_ok one, but not the other, because...
> > > > +{
> > > > +  return cp_parser_string_literal_common (parser, translate, wide_ok,
> > > > +					  /*udl_ok=*/true, lookup_udlit);
> > > > +}
> > > > +
> > > >    /* Look up a literal operator with the name and the exact arguments.  */
> > > >    static tree
> > > > @@ -4913,7 +4955,7 @@ cp_parser_userdef_numeric_literal (cp_parser *parser)
> > > >       as arguments.  */
> > > >    static tree
> > > > -cp_parser_userdef_string_literal (tree literal)
> > > > +finish_userdef_string_literal (tree literal)
> > > >    {
> > > >      tree suffix_id = USERDEF_LITERAL_SUFFIX_ID (literal);
> > > >      tree name = cp_literal_operator_id (IDENTIFIER_POINTER (suffix_id));
> > > > @@ -5652,10 +5694,10 @@ cp_parser_primary_expression (cp_parser *parser,
> > > >        case CPP_UTF8STRING_USERDEF:
> > > >          /* ??? Should wide strings be allowed when parser->translate_strings_p
> > > >    	 is false (i.e. in attributes)?  If not, we can kill the third
> > > > -	 argument to cp_parser_string_literal.  */
> > > 
> > > I think the answer to this old question is no: if we have an
> > > encoding-prefix, we should be translating.
> > 
> > ...I don't actually know how to resolve this.  wide_ok is always true here.
> > Should that change?  Or rather, should translate be false for CPP_STRING only?

Sorry it's taken so long to get back to this.
 
> The one current exception to my assertion above is static_assert, for which
> we currently allow encoding-prefixes but don't translate.  I think this is
> wrong, that we should translate the string.  But I'm not confident of that.
> 
> But to your question, yes: when translate is false, I think we also don't
> want to allow UDLs.  So _userdef can always pass true for translate.  And as
> below we should call it only when translate would be true.

Done: _userdef no longer has the translate paramater and it's only called
when parser->translate_strings_p.
 
> Incidentally, it seems that we set translate off for all attributes, even
> ones that would take a normal expression argument where presumably we do
> want translation (and UDLs).  The whole business of different parsing for
> different attributes is a headache.  You don't need to deal with this now.
> 
> > > > -      return (cp_parser_string_literal (parser,
> > > > -					parser->translate_strings_p,
> > > > -					true)
> > > > +	 argument to cp_parser_{,userdef}string_literal.  */
> > > > +      return (cp_parser_userdef_string_literal (parser,
> > > > +						parser->translate_strings_p,
> > > > +						/*wide_ok=*/true)
> > > 
> > > For CPP_*STRING* without _USERDEF, we should still call
> > > cp_parser_string_literal.
> > 
> > It looks like we always have to call cp_parser_userdef_string_literal
> > otherwise this would be reejcted:
> > 
> >    std::string concat01 = "Hello, " "World!"_www;
> > 
> > Because first we see a CPP_STRING but the subsequent UDL shouldn't
> > be rejected.
> 
> Ah, I didn't notice the function was handling a sequence of string-literals.
> So maybe we want to call _userdef here when translate_strings_p, and not
> when it's false.

Resolved by the change above.  Thanks,

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

-- >8 --
In this PR, we are crashing because we've encountered a UDL where a
string-literal is expected.  This patch makes the parser reject string
and character UDLs in all places where the grammar requires a
string-literal and not a user-defined-string-literal.

I've introduced two new wrappers; the existing cp_parser_string_literal
was renamed to cp_parser_string_literal_common and should not be called
directly.  finish_userdef_string_literal is renamed from
cp_parser_userdef_string_literal.

	PR c++/105300

gcc/c-family/ChangeLog:

	* c-pragma.cc (handle_pragma_message): Warn for CPP_STRING_USERDEF.

gcc/cp/ChangeLog:

	* parser.cc: Remove unnecessary forward declarations.
	(cp_parser_string_literal): New wrapper.
	(cp_parser_string_literal_common): Renamed from
	cp_parser_string_literal.  Add a bool parameter.  Give an error when
	UDLs are not permitted.
	(cp_parser_userdef_string_literal): New wrapper.
	(finish_userdef_string_literal): Renamed from
	cp_parser_userdef_string_literal.
	(cp_parser_primary_expression): Call cp_parser_userdef_string_literal
	instead of cp_parser_string_literal.
	(cp_parser_linkage_specification): Move a variable declaration closer
	to its first use.
	(cp_parser_static_assert): Likewise.
	(cp_parser_operator): Call cp_parser_userdef_string_literal instead of
	cp_parser_string_literal.
	(cp_parser_asm_definition): Move a variable declaration closer to its
	first use.
	(cp_parser_asm_specification_opt): Move variable declarations closer to
	their first use.
	(cp_parser_asm_operand_list): Likewise.
	(cp_parser_asm_clobber_list): Likewise.

gcc/testsuite/ChangeLog:

	* g++.dg/cpp0x/udlit-error1.C: New test.
---
 gcc/c-family/c-pragma.cc                  |   3 +
 gcc/cp/parser.cc                          | 133 +++++++++++++++-------
 gcc/testsuite/g++.dg/cpp0x/udlit-error1.C |  21 ++++
 3 files changed, 113 insertions(+), 44 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp0x/udlit-error1.C

diff --git a/gcc/c-family/c-pragma.cc b/gcc/c-family/c-pragma.cc
index 91fabf0a513..bba9172e8a1 100644
--- a/gcc/c-family/c-pragma.cc
+++ b/gcc/c-family/c-pragma.cc
@@ -1390,6 +1390,9 @@ handle_pragma_message (cpp_reader *)
     }
   else if (token == CPP_STRING)
     message = x;
+  else if (token == CPP_STRING_USERDEF)
+    GCC_BAD ("string literal with user-defined suffix is invalid in this "
+	     "context");
   else
     GCC_BAD ("expected a string after %<#pragma message%>");
 
diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc
index 8b1658decba..4b366d6c64f 100644
--- a/gcc/cp/parser.cc
+++ b/gcc/cp/parser.cc
@@ -2227,16 +2227,8 @@ pop_unparsed_function_queues (cp_parser *parser)
 
 /* Lexical conventions [gram.lex]  */
 
-static cp_expr cp_parser_identifier
-  (cp_parser *);
-static cp_expr cp_parser_string_literal
-  (cp_parser *, bool, bool, bool);
-static cp_expr cp_parser_userdef_char_literal
-  (cp_parser *);
-static tree cp_parser_userdef_string_literal
+static tree finish_userdef_string_literal
   (tree);
-static cp_expr cp_parser_userdef_numeric_literal
-  (cp_parser *);
 
 /* Basic concepts [gram.basic]  */
 
@@ -4408,11 +4400,15 @@ cp_parser_identifier (cp_parser* parser)
     return error_mark_node;
 }
 
-/* Parse a sequence of adjacent string constants.  Returns a
+/* Worker for cp_parser_string_literal and cp_parser_userdef_string_literal.
+   Do not call this directly; use either of the above.
+
+   Parse a sequence of adjacent string constants.  Return a
    TREE_STRING representing the combined, nul-terminated string
    constant.  If TRANSLATE is true, translate the string to the
    execution character set.  If WIDE_OK is true, a wide string is
-   invalid here.
+   valid here.  If UDL_OK is true, a string literal with user-defined
+   suffix can be used in this context.
 
    C++98 [lex.string] says that if a narrow string literal token is
    adjacent to a wide string literal token, the behavior is undefined.
@@ -4422,9 +4418,11 @@ cp_parser_identifier (cp_parser* parser)
    This code is largely lifted from lex_string() in c-lex.cc.
 
    FUTURE: ObjC++ will need to handle @-strings here.  */
+
 static cp_expr
-cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
-			  bool lookup_udlit = true)
+cp_parser_string_literal_common (cp_parser *parser, bool translate,
+				 bool wide_ok, bool udl_ok,
+				 bool lookup_udlit)
 {
   tree value;
   size_t count;
@@ -4449,6 +4447,12 @@ cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
 
   if (cpp_userdef_string_p (tok->type))
     {
+      if (!udl_ok)
+	{
+	  error_at (loc, "string literal with user-defined suffix "
+		    "is invalid in this context");
+	  return error_mark_node;
+	}
       string_tree = USERDEF_LITERAL_VALUE (tok->u.value);
       curr_type = cpp_userdef_string_remove_type (tok->type);
       curr_tok_is_userdef_p = true;
@@ -4539,6 +4543,12 @@ cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
 	  tok = cp_lexer_peek_token (parser->lexer);
 	  if (cpp_userdef_string_p (tok->type))
 	    {
+	      if (!udl_ok)
+		{
+		  error_at (loc, "string literal with user-defined suffix "
+			    "is invalid in this context");
+		  return error_mark_node;
+		}
 	      string_tree = USERDEF_LITERAL_VALUE (tok->u.value);
 	      curr_type = cpp_userdef_string_remove_type (tok->type);
 	      curr_tok_is_userdef_p = true;
@@ -4608,7 +4618,7 @@ cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
 	  tree literal = build_userdef_literal (suffix_id, value,
 						OT_NONE, NULL_TREE);
 	  if (lookup_udlit)
-	    value = cp_parser_userdef_string_literal (literal);
+	    value = finish_userdef_string_literal (literal);
 	  else
 	    value = literal;
 	}
@@ -4626,6 +4636,37 @@ cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
   return cp_expr (value, loc);
 }
 
+/* Parse a sequence of adjacent string constants.  Return a TREE_STRING
+   representing the combined, nul-terminated string constant.  If
+   TRANSLATE is true, translate the string to the execution character set.
+   If WIDE_OK is true, a wide string is valid here.
+
+   This function issues an error if a user defined string literal is
+   encountered; use cp_parser_userdef_string_literal if UDLs are allowed.  */
+
+static inline cp_expr
+cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok)
+{
+  return cp_parser_string_literal_common (parser, translate, wide_ok,
+					  /*udl_ok=*/false,
+					  /*lookup_udlit=*/false);
+}
+
+/* Parse a string literal or user defined string literal.
+
+   user-defined-string-literal :
+     string-literal ud-suffix
+
+   If LOOKUP_UDLIT, perform a lookup for a suitable template function.  */
+
+static inline cp_expr
+cp_parser_userdef_string_literal (cp_parser *parser, bool lookup_udlit)
+{
+  return cp_parser_string_literal_common (parser, /*translate=*/true,
+					  /*wide_ok=*/true, /*udl_ok=*/true,
+					  lookup_udlit);
+}
+
 /* Look up a literal operator with the name and the exact arguments.  */
 
 static tree
@@ -4923,7 +4964,7 @@ cp_parser_userdef_numeric_literal (cp_parser *parser)
    as arguments.  */
 
 static tree
-cp_parser_userdef_string_literal (tree literal)
+finish_userdef_string_literal (tree literal)
 {
   tree suffix_id = USERDEF_LITERAL_SUFFIX_ID (literal);
   tree name = cp_literal_operator_id (IDENTIFIER_POINTER (suffix_id));
@@ -5663,10 +5704,15 @@ cp_parser_primary_expression (cp_parser *parser,
       /* ??? Should wide strings be allowed when parser->translate_strings_p
 	 is false (i.e. in attributes)?  If not, we can kill the third
 	 argument to cp_parser_string_literal.  */
-      return (cp_parser_string_literal (parser,
-					parser->translate_strings_p,
-					true)
-	      .maybe_add_location_wrapper ());
+      if (parser->translate_strings_p)
+	return (cp_parser_userdef_string_literal (parser,
+						  /*lookup_udlit=*/true)
+		.maybe_add_location_wrapper ());
+      else
+	return (cp_parser_string_literal (parser,
+					  /*translate=*/false,
+					  /*wide_ok=*/true)
+		.maybe_add_location_wrapper ());
 
     case CPP_OPEN_PAREN:
       /* If we see `( { ' then we are looking at the beginning of
@@ -16222,15 +16268,14 @@ cp_parser_function_specifier_opt (cp_parser* parser,
 static void
 cp_parser_linkage_specification (cp_parser* parser, tree prefix_attr)
 {
-  tree linkage;
-
   /* Look for the `extern' keyword.  */
   cp_token *extern_token
     = cp_parser_require_keyword (parser, RID_EXTERN, RT_EXTERN);
 
   /* Look for the string-literal.  */
   cp_token *string_token = cp_lexer_peek_token (parser->lexer);
-  linkage = cp_parser_string_literal (parser, false, false);
+  tree linkage = cp_parser_string_literal (parser, /*translate=*/false,
+					   /*wide_ok=*/false);
 
   /* Transform the literal into an identifier.  If the literal is a
      wide-character string, or contains embedded NULs, then we can't
@@ -16360,9 +16405,8 @@ cp_parser_static_assert(cp_parser *parser, bool member_p)
       cp_parser_require (parser, CPP_COMMA, RT_COMMA);
 
       /* Parse the string-literal message.  */
-      message = cp_parser_string_literal (parser,
-                                	  /*translate=*/false,
-                                	  /*wide_ok=*/true);
+      message = cp_parser_string_literal (parser, /*translate=*/false,
+					  /*wide_ok=*/true);
 
       /* A `)' completes the static assertion.  */
       if (!parens.require_close (parser))
@@ -17410,7 +17454,6 @@ cp_parser_operator (cp_parser* parser, location_t start_loc)
     case CPP_STRING16_USERDEF:
     case CPP_STRING32_USERDEF:
       {
-	cp_expr str;
 	tree string_tree;
 	int sz, len;
 
@@ -17418,8 +17461,8 @@ cp_parser_operator (cp_parser* parser, location_t start_loc)
 	  maybe_warn_cpp0x (CPP0X_USER_DEFINED_LITERALS);
 
 	/* Consume the string.  */
-	str = cp_parser_string_literal (parser, /*translate=*/true,
-				      /*wide_ok=*/true, /*lookup_udlit=*/false);
+	cp_expr str = cp_parser_userdef_string_literal (parser,
+							/*lookup_udlit=*/false);
 	if (str == error_mark_node)
 	  return error_mark_node;
 	else if (TREE_CODE (str) == USERDEF_LITERAL)
@@ -22072,7 +22115,6 @@ cp_parser_using_directive (cp_parser* parser)
 static void
 cp_parser_asm_definition (cp_parser* parser)
 {
-  tree string;
   tree outputs = NULL_TREE;
   tree inputs = NULL_TREE;
   tree clobbers = NULL_TREE;
@@ -22180,7 +22222,8 @@ cp_parser_asm_definition (cp_parser* parser)
   if (!cp_parser_require (parser, CPP_OPEN_PAREN, RT_OPEN_PAREN))
     return;
   /* Look for the string.  */
-  string = cp_parser_string_literal (parser, false, false);
+  tree string = cp_parser_string_literal (parser, /*translate=*/false,
+					  /*wide_ok=*/false);
   if (string == error_mark_node)
     {
       cp_parser_skip_to_closing_parenthesis (parser, true, false,
@@ -28655,11 +28698,8 @@ cp_parser_yield_expression (cp_parser* parser)
 static tree
 cp_parser_asm_specification_opt (cp_parser* parser)
 {
-  cp_token *token;
-  tree asm_specification;
-
   /* Peek at the next token.  */
-  token = cp_lexer_peek_token (parser->lexer);
+  cp_token *token = cp_lexer_peek_token (parser->lexer);
   /* If the next token isn't the `asm' keyword, then there's no
      asm-specification.  */
   if (!cp_parser_is_keyword (token, RID_ASM))
@@ -28672,7 +28712,9 @@ cp_parser_asm_specification_opt (cp_parser* parser)
   parens.require_open (parser);
 
   /* Look for the string-literal.  */
-  asm_specification = cp_parser_string_literal (parser, false, false);
+  tree asm_specification = cp_parser_string_literal (parser,
+						     /*translate=*/false,
+						     /*wide_ok=*/false);
 
   /* Look for the `)'.  */
   parens.require_close (parser);
@@ -28705,8 +28747,6 @@ cp_parser_asm_operand_list (cp_parser* parser)
 
   while (true)
     {
-      tree string_literal;
-      tree expression;
       tree name;
 
       if (cp_lexer_next_token_is (parser->lexer, CPP_OPEN_SQUARE))
@@ -28724,13 +28764,15 @@ cp_parser_asm_operand_list (cp_parser* parser)
       else
 	name = NULL_TREE;
       /* Look for the string-literal.  */
-      string_literal = cp_parser_string_literal (parser, false, false);
+      tree string_literal = cp_parser_string_literal (parser,
+						      /*translate=*/false,
+						      /*wide_ok=*/false);
 
       /* Look for the `('.  */
       matching_parens parens;
       parens.require_open (parser);
       /* Parse the expression.  */
-      expression = cp_parser_expression (parser);
+      tree expression = cp_parser_expression (parser);
       /* Look for the `)'.  */
       parens.require_close (parser);
 
@@ -28770,10 +28812,10 @@ cp_parser_asm_clobber_list (cp_parser* parser)
 
   while (true)
     {
-      tree string_literal;
-
       /* Look for the string literal.  */
-      string_literal = cp_parser_string_literal (parser, false, false);
+      tree string_literal = cp_parser_string_literal (parser,
+						      /*translate=*/false,
+						      /*wide_ok=*/false);
       /* Add it to the list.  */
       clobbers = tree_cons (NULL_TREE, string_literal, clobbers);
       /* If the next token is not a `,', then the list is
@@ -46345,7 +46387,9 @@ cp_parser_omp_context_selector (cp_parser *parser, tree set, bool has_parms_p)
 		      cp_lexer_consume_token (parser->lexer);
 		    }
 		  else if (cp_lexer_next_token_is (parser->lexer, CPP_STRING))
-		    value = cp_parser_string_literal (parser, false, false);
+		    value = cp_parser_string_literal (parser,
+						      /*translate=*/false,
+						      /*wide_ok=*/false);
 		  else
 		    {
 		      cp_parser_error (parser, "expected identifier or "
@@ -49367,7 +49411,8 @@ pragma_lex (tree *value, location_t *loc)
   if (ret == CPP_PRAGMA_EOL)
     ret = CPP_EOF;
   else if (ret == CPP_STRING)
-    *value = cp_parser_string_literal (the_parser, false, false);
+    *value = cp_parser_string_literal (the_parser, /*translate=*/false,
+				       /*wide_ok=*/false);
   else
     {
       if (ret == CPP_KEYWORD)
diff --git a/gcc/testsuite/g++.dg/cpp0x/udlit-error1.C b/gcc/testsuite/g++.dg/cpp0x/udlit-error1.C
new file mode 100644
index 00000000000..66e300e350f
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/udlit-error1.C
@@ -0,0 +1,21 @@
+// PR c++/105300
+// { dg-do compile { target c++11 } }
+
+void operator""_x(const char *, decltype(sizeof(0)));
+
+#include ""_x		  // { dg-error "include expects" }
+#line ""_x		  // { dg-error "not a positive integer" }
+#if __has_include(""_x)	  // { dg-error "requires a header-name" }
+#endif
+
+#pragma message "hi"_x	  // { dg-warning "string literal with user-defined suffix is invalid in this context" }
+
+extern "C"_x { void g(); } // { dg-error "before user-defined string literal" }
+static_assert(true, "foo"_x); // { dg-error "string literal with user-defined suffix is invalid in this context|expected" }
+
+[[deprecated("oof"_x)]]
+void
+lol () // { dg-error "not a string" }
+{
+  asm (""_x); // { dg-error "string literal with user-defined suffix is invalid in this context" }
+}

base-commit: 6071e495e5802a8949d2b02df6aa31a5f40f2af9
-- 
2.39.0


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v4] c++: Reject UDLs in certain contexts [PR105300]
  2023-01-13 23:22                   ` [PATCH v4] " Marek Polacek
@ 2023-01-24 22:49                     ` Marek Polacek
  2023-01-25 19:36                     ` Jason Merrill
  1 sibling, 0 replies; 13+ messages in thread
From: Marek Polacek @ 2023-01-24 22:49 UTC (permalink / raw)
  To: Jason Merrill; +Cc: GCC Patches

Ping.

On Fri, Jan 13, 2023 at 06:22:38PM -0500, Marek Polacek wrote:
> On Sat, Dec 03, 2022 at 02:58:16PM -0500, Jason Merrill wrote:
> > On 12/2/22 18:58, Marek Polacek wrote:
> > > On Fri, Nov 18, 2022 at 08:39:10PM -0500, Jason Merrill wrote:
> > > > On 11/18/22 18:52, Marek Polacek wrote:
> > > > > +/* Parse a string literal or user defined string literal.
> > > > > +
> > > > > +   user-defined-string-literal :
> > > > > +     string-literal ud-suffix
> > > > > +
> > > > > +   Parameters as for cp_parser_string_literal.  If LOOKUP_UDLIT, perform
> > > > > +   a lookup for a suitable template function.  */
> > > > > +
> > > > > +static inline cp_expr
> > > > > +cp_parser_userdef_string_literal (cp_parser *parser, bool translate,
> > > > > +				  bool wide_ok, bool lookup_udlit = true)
> > > > 
> > > > I think this function doesn't need the translate and wide_ok parms, they can
> > > > always be true.
> > > 
> > > I've dropped the wide_ok one, but not the other, because...
> > > > > +{
> > > > > +  return cp_parser_string_literal_common (parser, translate, wide_ok,
> > > > > +					  /*udl_ok=*/true, lookup_udlit);
> > > > > +}
> > > > > +
> > > > >    /* Look up a literal operator with the name and the exact arguments.  */
> > > > >    static tree
> > > > > @@ -4913,7 +4955,7 @@ cp_parser_userdef_numeric_literal (cp_parser *parser)
> > > > >       as arguments.  */
> > > > >    static tree
> > > > > -cp_parser_userdef_string_literal (tree literal)
> > > > > +finish_userdef_string_literal (tree literal)
> > > > >    {
> > > > >      tree suffix_id = USERDEF_LITERAL_SUFFIX_ID (literal);
> > > > >      tree name = cp_literal_operator_id (IDENTIFIER_POINTER (suffix_id));
> > > > > @@ -5652,10 +5694,10 @@ cp_parser_primary_expression (cp_parser *parser,
> > > > >        case CPP_UTF8STRING_USERDEF:
> > > > >          /* ??? Should wide strings be allowed when parser->translate_strings_p
> > > > >    	 is false (i.e. in attributes)?  If not, we can kill the third
> > > > > -	 argument to cp_parser_string_literal.  */
> > > > 
> > > > I think the answer to this old question is no: if we have an
> > > > encoding-prefix, we should be translating.
> > > 
> > > ...I don't actually know how to resolve this.  wide_ok is always true here.
> > > Should that change?  Or rather, should translate be false for CPP_STRING only?
> 
> Sorry it's taken so long to get back to this.
>  
> > The one current exception to my assertion above is static_assert, for which
> > we currently allow encoding-prefixes but don't translate.  I think this is
> > wrong, that we should translate the string.  But I'm not confident of that.
> > 
> > But to your question, yes: when translate is false, I think we also don't
> > want to allow UDLs.  So _userdef can always pass true for translate.  And as
> > below we should call it only when translate would be true.
> 
> Done: _userdef no longer has the translate paramater and it's only called
> when parser->translate_strings_p.
>  
> > Incidentally, it seems that we set translate off for all attributes, even
> > ones that would take a normal expression argument where presumably we do
> > want translation (and UDLs).  The whole business of different parsing for
> > different attributes is a headache.  You don't need to deal with this now.
> > 
> > > > > -      return (cp_parser_string_literal (parser,
> > > > > -					parser->translate_strings_p,
> > > > > -					true)
> > > > > +	 argument to cp_parser_{,userdef}string_literal.  */
> > > > > +      return (cp_parser_userdef_string_literal (parser,
> > > > > +						parser->translate_strings_p,
> > > > > +						/*wide_ok=*/true)
> > > > 
> > > > For CPP_*STRING* without _USERDEF, we should still call
> > > > cp_parser_string_literal.
> > > 
> > > It looks like we always have to call cp_parser_userdef_string_literal
> > > otherwise this would be reejcted:
> > > 
> > >    std::string concat01 = "Hello, " "World!"_www;
> > > 
> > > Because first we see a CPP_STRING but the subsequent UDL shouldn't
> > > be rejected.
> > 
> > Ah, I didn't notice the function was handling a sequence of string-literals.
> > So maybe we want to call _userdef here when translate_strings_p, and not
> > when it's false.
> 
> Resolved by the change above.  Thanks,
> 
> Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?
> 
> -- >8 --
> In this PR, we are crashing because we've encountered a UDL where a
> string-literal is expected.  This patch makes the parser reject string
> and character UDLs in all places where the grammar requires a
> string-literal and not a user-defined-string-literal.
> 
> I've introduced two new wrappers; the existing cp_parser_string_literal
> was renamed to cp_parser_string_literal_common and should not be called
> directly.  finish_userdef_string_literal is renamed from
> cp_parser_userdef_string_literal.
> 
> 	PR c++/105300
> 
> gcc/c-family/ChangeLog:
> 
> 	* c-pragma.cc (handle_pragma_message): Warn for CPP_STRING_USERDEF.
> 
> gcc/cp/ChangeLog:
> 
> 	* parser.cc: Remove unnecessary forward declarations.
> 	(cp_parser_string_literal): New wrapper.
> 	(cp_parser_string_literal_common): Renamed from
> 	cp_parser_string_literal.  Add a bool parameter.  Give an error when
> 	UDLs are not permitted.
> 	(cp_parser_userdef_string_literal): New wrapper.
> 	(finish_userdef_string_literal): Renamed from
> 	cp_parser_userdef_string_literal.
> 	(cp_parser_primary_expression): Call cp_parser_userdef_string_literal
> 	instead of cp_parser_string_literal.
> 	(cp_parser_linkage_specification): Move a variable declaration closer
> 	to its first use.
> 	(cp_parser_static_assert): Likewise.
> 	(cp_parser_operator): Call cp_parser_userdef_string_literal instead of
> 	cp_parser_string_literal.
> 	(cp_parser_asm_definition): Move a variable declaration closer to its
> 	first use.
> 	(cp_parser_asm_specification_opt): Move variable declarations closer to
> 	their first use.
> 	(cp_parser_asm_operand_list): Likewise.
> 	(cp_parser_asm_clobber_list): Likewise.
> 
> gcc/testsuite/ChangeLog:
> 
> 	* g++.dg/cpp0x/udlit-error1.C: New test.
> ---
>  gcc/c-family/c-pragma.cc                  |   3 +
>  gcc/cp/parser.cc                          | 133 +++++++++++++++-------
>  gcc/testsuite/g++.dg/cpp0x/udlit-error1.C |  21 ++++
>  3 files changed, 113 insertions(+), 44 deletions(-)
>  create mode 100644 gcc/testsuite/g++.dg/cpp0x/udlit-error1.C
> 
> diff --git a/gcc/c-family/c-pragma.cc b/gcc/c-family/c-pragma.cc
> index 91fabf0a513..bba9172e8a1 100644
> --- a/gcc/c-family/c-pragma.cc
> +++ b/gcc/c-family/c-pragma.cc
> @@ -1390,6 +1390,9 @@ handle_pragma_message (cpp_reader *)
>      }
>    else if (token == CPP_STRING)
>      message = x;
> +  else if (token == CPP_STRING_USERDEF)
> +    GCC_BAD ("string literal with user-defined suffix is invalid in this "
> +	     "context");
>    else
>      GCC_BAD ("expected a string after %<#pragma message%>");
>  
> diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc
> index 8b1658decba..4b366d6c64f 100644
> --- a/gcc/cp/parser.cc
> +++ b/gcc/cp/parser.cc
> @@ -2227,16 +2227,8 @@ pop_unparsed_function_queues (cp_parser *parser)
>  
>  /* Lexical conventions [gram.lex]  */
>  
> -static cp_expr cp_parser_identifier
> -  (cp_parser *);
> -static cp_expr cp_parser_string_literal
> -  (cp_parser *, bool, bool, bool);
> -static cp_expr cp_parser_userdef_char_literal
> -  (cp_parser *);
> -static tree cp_parser_userdef_string_literal
> +static tree finish_userdef_string_literal
>    (tree);
> -static cp_expr cp_parser_userdef_numeric_literal
> -  (cp_parser *);
>  
>  /* Basic concepts [gram.basic]  */
>  
> @@ -4408,11 +4400,15 @@ cp_parser_identifier (cp_parser* parser)
>      return error_mark_node;
>  }
>  
> -/* Parse a sequence of adjacent string constants.  Returns a
> +/* Worker for cp_parser_string_literal and cp_parser_userdef_string_literal.
> +   Do not call this directly; use either of the above.
> +
> +   Parse a sequence of adjacent string constants.  Return a
>     TREE_STRING representing the combined, nul-terminated string
>     constant.  If TRANSLATE is true, translate the string to the
>     execution character set.  If WIDE_OK is true, a wide string is
> -   invalid here.
> +   valid here.  If UDL_OK is true, a string literal with user-defined
> +   suffix can be used in this context.
>  
>     C++98 [lex.string] says that if a narrow string literal token is
>     adjacent to a wide string literal token, the behavior is undefined.
> @@ -4422,9 +4418,11 @@ cp_parser_identifier (cp_parser* parser)
>     This code is largely lifted from lex_string() in c-lex.cc.
>  
>     FUTURE: ObjC++ will need to handle @-strings here.  */
> +
>  static cp_expr
> -cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
> -			  bool lookup_udlit = true)
> +cp_parser_string_literal_common (cp_parser *parser, bool translate,
> +				 bool wide_ok, bool udl_ok,
> +				 bool lookup_udlit)
>  {
>    tree value;
>    size_t count;
> @@ -4449,6 +4447,12 @@ cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
>  
>    if (cpp_userdef_string_p (tok->type))
>      {
> +      if (!udl_ok)
> +	{
> +	  error_at (loc, "string literal with user-defined suffix "
> +		    "is invalid in this context");
> +	  return error_mark_node;
> +	}
>        string_tree = USERDEF_LITERAL_VALUE (tok->u.value);
>        curr_type = cpp_userdef_string_remove_type (tok->type);
>        curr_tok_is_userdef_p = true;
> @@ -4539,6 +4543,12 @@ cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
>  	  tok = cp_lexer_peek_token (parser->lexer);
>  	  if (cpp_userdef_string_p (tok->type))
>  	    {
> +	      if (!udl_ok)
> +		{
> +		  error_at (loc, "string literal with user-defined suffix "
> +			    "is invalid in this context");
> +		  return error_mark_node;
> +		}
>  	      string_tree = USERDEF_LITERAL_VALUE (tok->u.value);
>  	      curr_type = cpp_userdef_string_remove_type (tok->type);
>  	      curr_tok_is_userdef_p = true;
> @@ -4608,7 +4618,7 @@ cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
>  	  tree literal = build_userdef_literal (suffix_id, value,
>  						OT_NONE, NULL_TREE);
>  	  if (lookup_udlit)
> -	    value = cp_parser_userdef_string_literal (literal);
> +	    value = finish_userdef_string_literal (literal);
>  	  else
>  	    value = literal;
>  	}
> @@ -4626,6 +4636,37 @@ cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
>    return cp_expr (value, loc);
>  }
>  
> +/* Parse a sequence of adjacent string constants.  Return a TREE_STRING
> +   representing the combined, nul-terminated string constant.  If
> +   TRANSLATE is true, translate the string to the execution character set.
> +   If WIDE_OK is true, a wide string is valid here.
> +
> +   This function issues an error if a user defined string literal is
> +   encountered; use cp_parser_userdef_string_literal if UDLs are allowed.  */
> +
> +static inline cp_expr
> +cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok)
> +{
> +  return cp_parser_string_literal_common (parser, translate, wide_ok,
> +					  /*udl_ok=*/false,
> +					  /*lookup_udlit=*/false);
> +}
> +
> +/* Parse a string literal or user defined string literal.
> +
> +   user-defined-string-literal :
> +     string-literal ud-suffix
> +
> +   If LOOKUP_UDLIT, perform a lookup for a suitable template function.  */
> +
> +static inline cp_expr
> +cp_parser_userdef_string_literal (cp_parser *parser, bool lookup_udlit)
> +{
> +  return cp_parser_string_literal_common (parser, /*translate=*/true,
> +					  /*wide_ok=*/true, /*udl_ok=*/true,
> +					  lookup_udlit);
> +}
> +
>  /* Look up a literal operator with the name and the exact arguments.  */
>  
>  static tree
> @@ -4923,7 +4964,7 @@ cp_parser_userdef_numeric_literal (cp_parser *parser)
>     as arguments.  */
>  
>  static tree
> -cp_parser_userdef_string_literal (tree literal)
> +finish_userdef_string_literal (tree literal)
>  {
>    tree suffix_id = USERDEF_LITERAL_SUFFIX_ID (literal);
>    tree name = cp_literal_operator_id (IDENTIFIER_POINTER (suffix_id));
> @@ -5663,10 +5704,15 @@ cp_parser_primary_expression (cp_parser *parser,
>        /* ??? Should wide strings be allowed when parser->translate_strings_p
>  	 is false (i.e. in attributes)?  If not, we can kill the third
>  	 argument to cp_parser_string_literal.  */
> -      return (cp_parser_string_literal (parser,
> -					parser->translate_strings_p,
> -					true)
> -	      .maybe_add_location_wrapper ());
> +      if (parser->translate_strings_p)
> +	return (cp_parser_userdef_string_literal (parser,
> +						  /*lookup_udlit=*/true)
> +		.maybe_add_location_wrapper ());
> +      else
> +	return (cp_parser_string_literal (parser,
> +					  /*translate=*/false,
> +					  /*wide_ok=*/true)
> +		.maybe_add_location_wrapper ());
>  
>      case CPP_OPEN_PAREN:
>        /* If we see `( { ' then we are looking at the beginning of
> @@ -16222,15 +16268,14 @@ cp_parser_function_specifier_opt (cp_parser* parser,
>  static void
>  cp_parser_linkage_specification (cp_parser* parser, tree prefix_attr)
>  {
> -  tree linkage;
> -
>    /* Look for the `extern' keyword.  */
>    cp_token *extern_token
>      = cp_parser_require_keyword (parser, RID_EXTERN, RT_EXTERN);
>  
>    /* Look for the string-literal.  */
>    cp_token *string_token = cp_lexer_peek_token (parser->lexer);
> -  linkage = cp_parser_string_literal (parser, false, false);
> +  tree linkage = cp_parser_string_literal (parser, /*translate=*/false,
> +					   /*wide_ok=*/false);
>  
>    /* Transform the literal into an identifier.  If the literal is a
>       wide-character string, or contains embedded NULs, then we can't
> @@ -16360,9 +16405,8 @@ cp_parser_static_assert(cp_parser *parser, bool member_p)
>        cp_parser_require (parser, CPP_COMMA, RT_COMMA);
>  
>        /* Parse the string-literal message.  */
> -      message = cp_parser_string_literal (parser,
> -                                	  /*translate=*/false,
> -                                	  /*wide_ok=*/true);
> +      message = cp_parser_string_literal (parser, /*translate=*/false,
> +					  /*wide_ok=*/true);
>  
>        /* A `)' completes the static assertion.  */
>        if (!parens.require_close (parser))
> @@ -17410,7 +17454,6 @@ cp_parser_operator (cp_parser* parser, location_t start_loc)
>      case CPP_STRING16_USERDEF:
>      case CPP_STRING32_USERDEF:
>        {
> -	cp_expr str;
>  	tree string_tree;
>  	int sz, len;
>  
> @@ -17418,8 +17461,8 @@ cp_parser_operator (cp_parser* parser, location_t start_loc)
>  	  maybe_warn_cpp0x (CPP0X_USER_DEFINED_LITERALS);
>  
>  	/* Consume the string.  */
> -	str = cp_parser_string_literal (parser, /*translate=*/true,
> -				      /*wide_ok=*/true, /*lookup_udlit=*/false);
> +	cp_expr str = cp_parser_userdef_string_literal (parser,
> +							/*lookup_udlit=*/false);
>  	if (str == error_mark_node)
>  	  return error_mark_node;
>  	else if (TREE_CODE (str) == USERDEF_LITERAL)
> @@ -22072,7 +22115,6 @@ cp_parser_using_directive (cp_parser* parser)
>  static void
>  cp_parser_asm_definition (cp_parser* parser)
>  {
> -  tree string;
>    tree outputs = NULL_TREE;
>    tree inputs = NULL_TREE;
>    tree clobbers = NULL_TREE;
> @@ -22180,7 +22222,8 @@ cp_parser_asm_definition (cp_parser* parser)
>    if (!cp_parser_require (parser, CPP_OPEN_PAREN, RT_OPEN_PAREN))
>      return;
>    /* Look for the string.  */
> -  string = cp_parser_string_literal (parser, false, false);
> +  tree string = cp_parser_string_literal (parser, /*translate=*/false,
> +					  /*wide_ok=*/false);
>    if (string == error_mark_node)
>      {
>        cp_parser_skip_to_closing_parenthesis (parser, true, false,
> @@ -28655,11 +28698,8 @@ cp_parser_yield_expression (cp_parser* parser)
>  static tree
>  cp_parser_asm_specification_opt (cp_parser* parser)
>  {
> -  cp_token *token;
> -  tree asm_specification;
> -
>    /* Peek at the next token.  */
> -  token = cp_lexer_peek_token (parser->lexer);
> +  cp_token *token = cp_lexer_peek_token (parser->lexer);
>    /* If the next token isn't the `asm' keyword, then there's no
>       asm-specification.  */
>    if (!cp_parser_is_keyword (token, RID_ASM))
> @@ -28672,7 +28712,9 @@ cp_parser_asm_specification_opt (cp_parser* parser)
>    parens.require_open (parser);
>  
>    /* Look for the string-literal.  */
> -  asm_specification = cp_parser_string_literal (parser, false, false);
> +  tree asm_specification = cp_parser_string_literal (parser,
> +						     /*translate=*/false,
> +						     /*wide_ok=*/false);
>  
>    /* Look for the `)'.  */
>    parens.require_close (parser);
> @@ -28705,8 +28747,6 @@ cp_parser_asm_operand_list (cp_parser* parser)
>  
>    while (true)
>      {
> -      tree string_literal;
> -      tree expression;
>        tree name;
>  
>        if (cp_lexer_next_token_is (parser->lexer, CPP_OPEN_SQUARE))
> @@ -28724,13 +28764,15 @@ cp_parser_asm_operand_list (cp_parser* parser)
>        else
>  	name = NULL_TREE;
>        /* Look for the string-literal.  */
> -      string_literal = cp_parser_string_literal (parser, false, false);
> +      tree string_literal = cp_parser_string_literal (parser,
> +						      /*translate=*/false,
> +						      /*wide_ok=*/false);
>  
>        /* Look for the `('.  */
>        matching_parens parens;
>        parens.require_open (parser);
>        /* Parse the expression.  */
> -      expression = cp_parser_expression (parser);
> +      tree expression = cp_parser_expression (parser);
>        /* Look for the `)'.  */
>        parens.require_close (parser);
>  
> @@ -28770,10 +28812,10 @@ cp_parser_asm_clobber_list (cp_parser* parser)
>  
>    while (true)
>      {
> -      tree string_literal;
> -
>        /* Look for the string literal.  */
> -      string_literal = cp_parser_string_literal (parser, false, false);
> +      tree string_literal = cp_parser_string_literal (parser,
> +						      /*translate=*/false,
> +						      /*wide_ok=*/false);
>        /* Add it to the list.  */
>        clobbers = tree_cons (NULL_TREE, string_literal, clobbers);
>        /* If the next token is not a `,', then the list is
> @@ -46345,7 +46387,9 @@ cp_parser_omp_context_selector (cp_parser *parser, tree set, bool has_parms_p)
>  		      cp_lexer_consume_token (parser->lexer);
>  		    }
>  		  else if (cp_lexer_next_token_is (parser->lexer, CPP_STRING))
> -		    value = cp_parser_string_literal (parser, false, false);
> +		    value = cp_parser_string_literal (parser,
> +						      /*translate=*/false,
> +						      /*wide_ok=*/false);
>  		  else
>  		    {
>  		      cp_parser_error (parser, "expected identifier or "
> @@ -49367,7 +49411,8 @@ pragma_lex (tree *value, location_t *loc)
>    if (ret == CPP_PRAGMA_EOL)
>      ret = CPP_EOF;
>    else if (ret == CPP_STRING)
> -    *value = cp_parser_string_literal (the_parser, false, false);
> +    *value = cp_parser_string_literal (the_parser, /*translate=*/false,
> +				       /*wide_ok=*/false);
>    else
>      {
>        if (ret == CPP_KEYWORD)
> diff --git a/gcc/testsuite/g++.dg/cpp0x/udlit-error1.C b/gcc/testsuite/g++.dg/cpp0x/udlit-error1.C
> new file mode 100644
> index 00000000000..66e300e350f
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/cpp0x/udlit-error1.C
> @@ -0,0 +1,21 @@
> +// PR c++/105300
> +// { dg-do compile { target c++11 } }
> +
> +void operator""_x(const char *, decltype(sizeof(0)));
> +
> +#include ""_x		  // { dg-error "include expects" }
> +#line ""_x		  // { dg-error "not a positive integer" }
> +#if __has_include(""_x)	  // { dg-error "requires a header-name" }
> +#endif
> +
> +#pragma message "hi"_x	  // { dg-warning "string literal with user-defined suffix is invalid in this context" }
> +
> +extern "C"_x { void g(); } // { dg-error "before user-defined string literal" }
> +static_assert(true, "foo"_x); // { dg-error "string literal with user-defined suffix is invalid in this context|expected" }
> +
> +[[deprecated("oof"_x)]]
> +void
> +lol () // { dg-error "not a string" }
> +{
> +  asm (""_x); // { dg-error "string literal with user-defined suffix is invalid in this context" }
> +}
> 
> base-commit: 6071e495e5802a8949d2b02df6aa31a5f40f2af9
> -- 
> 2.39.0
> 

Marek


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v4] c++: Reject UDLs in certain contexts [PR105300]
  2023-01-13 23:22                   ` [PATCH v4] " Marek Polacek
  2023-01-24 22:49                     ` Marek Polacek
@ 2023-01-25 19:36                     ` Jason Merrill
  1 sibling, 0 replies; 13+ messages in thread
From: Jason Merrill @ 2023-01-25 19:36 UTC (permalink / raw)
  To: Marek Polacek; +Cc: GCC Patches

On 1/13/23 18:22, Marek Polacek wrote:
> On Sat, Dec 03, 2022 at 02:58:16PM -0500, Jason Merrill wrote:
>> On 12/2/22 18:58, Marek Polacek wrote:
>>> On Fri, Nov 18, 2022 at 08:39:10PM -0500, Jason Merrill wrote:
>>>> On 11/18/22 18:52, Marek Polacek wrote:
>>>>> +/* Parse a string literal or user defined string literal.
>>>>> +
>>>>> +   user-defined-string-literal :
>>>>> +     string-literal ud-suffix
>>>>> +
>>>>> +   Parameters as for cp_parser_string_literal.  If LOOKUP_UDLIT, perform
>>>>> +   a lookup for a suitable template function.  */
>>>>> +
>>>>> +static inline cp_expr
>>>>> +cp_parser_userdef_string_literal (cp_parser *parser, bool translate,
>>>>> +				  bool wide_ok, bool lookup_udlit = true)
>>>>
>>>> I think this function doesn't need the translate and wide_ok parms, they can
>>>> always be true.
>>>
>>> I've dropped the wide_ok one, but not the other, because...
>>>>> +{
>>>>> +  return cp_parser_string_literal_common (parser, translate, wide_ok,
>>>>> +					  /*udl_ok=*/true, lookup_udlit);
>>>>> +}
>>>>> +
>>>>>     /* Look up a literal operator with the name and the exact arguments.  */
>>>>>     static tree
>>>>> @@ -4913,7 +4955,7 @@ cp_parser_userdef_numeric_literal (cp_parser *parser)
>>>>>        as arguments.  */
>>>>>     static tree
>>>>> -cp_parser_userdef_string_literal (tree literal)
>>>>> +finish_userdef_string_literal (tree literal)
>>>>>     {
>>>>>       tree suffix_id = USERDEF_LITERAL_SUFFIX_ID (literal);
>>>>>       tree name = cp_literal_operator_id (IDENTIFIER_POINTER (suffix_id));
>>>>> @@ -5652,10 +5694,10 @@ cp_parser_primary_expression (cp_parser *parser,
>>>>>         case CPP_UTF8STRING_USERDEF:
>>>>>           /* ??? Should wide strings be allowed when parser->translate_strings_p
>>>>>     	 is false (i.e. in attributes)?  If not, we can kill the third
>>>>> -	 argument to cp_parser_string_literal.  */
>>>>
>>>> I think the answer to this old question is no: if we have an
>>>> encoding-prefix, we should be translating.
>>>
>>> ...I don't actually know how to resolve this.  wide_ok is always true here.
>>> Should that change?  Or rather, should translate be false for CPP_STRING only?
> 
> Sorry it's taken so long to get back to this.
>   
>> The one current exception to my assertion above is static_assert, for which
>> we currently allow encoding-prefixes but don't translate.  I think this is
>> wrong, that we should translate the string.  But I'm not confident of that.
>>
>> But to your question, yes: when translate is false, I think we also don't
>> want to allow UDLs.  So _userdef can always pass true for translate.  And as
>> below we should call it only when translate would be true.
> 
> Done: _userdef no longer has the translate paramater and it's only called
> when parser->translate_strings_p.
>   
>> Incidentally, it seems that we set translate off for all attributes, even
>> ones that would take a normal expression argument where presumably we do
>> want translation (and UDLs).  The whole business of different parsing for
>> different attributes is a headache.  You don't need to deal with this now.
>>
>>>>> -      return (cp_parser_string_literal (parser,
>>>>> -					parser->translate_strings_p,
>>>>> -					true)
>>>>> +	 argument to cp_parser_{,userdef}string_literal.  */
>>>>> +      return (cp_parser_userdef_string_literal (parser,
>>>>> +						parser->translate_strings_p,
>>>>> +						/*wide_ok=*/true)
>>>>
>>>> For CPP_*STRING* without _USERDEF, we should still call
>>>> cp_parser_string_literal.
>>>
>>> It looks like we always have to call cp_parser_userdef_string_literal
>>> otherwise this would be reejcted:
>>>
>>>     std::string concat01 = "Hello, " "World!"_www;
>>>
>>> Because first we see a CPP_STRING but the subsequent UDL shouldn't
>>> be rejected.
>>
>> Ah, I didn't notice the function was handling a sequence of string-literals.
>> So maybe we want to call _userdef here when translate_strings_p, and not
>> when it's false.
> 
> Resolved by the change above.  Thanks,
> 
> Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

OK, thanks.

> -- >8 --
> In this PR, we are crashing because we've encountered a UDL where a
> string-literal is expected.  This patch makes the parser reject string
> and character UDLs in all places where the grammar requires a
> string-literal and not a user-defined-string-literal.
> 
> I've introduced two new wrappers; the existing cp_parser_string_literal
> was renamed to cp_parser_string_literal_common and should not be called
> directly.  finish_userdef_string_literal is renamed from
> cp_parser_userdef_string_literal.
> 
> 	PR c++/105300
> 
> gcc/c-family/ChangeLog:
> 
> 	* c-pragma.cc (handle_pragma_message): Warn for CPP_STRING_USERDEF.
> 
> gcc/cp/ChangeLog:
> 
> 	* parser.cc: Remove unnecessary forward declarations.
> 	(cp_parser_string_literal): New wrapper.
> 	(cp_parser_string_literal_common): Renamed from
> 	cp_parser_string_literal.  Add a bool parameter.  Give an error when
> 	UDLs are not permitted.
> 	(cp_parser_userdef_string_literal): New wrapper.
> 	(finish_userdef_string_literal): Renamed from
> 	cp_parser_userdef_string_literal.
> 	(cp_parser_primary_expression): Call cp_parser_userdef_string_literal
> 	instead of cp_parser_string_literal.
> 	(cp_parser_linkage_specification): Move a variable declaration closer
> 	to its first use.
> 	(cp_parser_static_assert): Likewise.
> 	(cp_parser_operator): Call cp_parser_userdef_string_literal instead of
> 	cp_parser_string_literal.
> 	(cp_parser_asm_definition): Move a variable declaration closer to its
> 	first use.
> 	(cp_parser_asm_specification_opt): Move variable declarations closer to
> 	their first use.
> 	(cp_parser_asm_operand_list): Likewise.
> 	(cp_parser_asm_clobber_list): Likewise.
> 
> gcc/testsuite/ChangeLog:
> 
> 	* g++.dg/cpp0x/udlit-error1.C: New test.
> ---
>   gcc/c-family/c-pragma.cc                  |   3 +
>   gcc/cp/parser.cc                          | 133 +++++++++++++++-------
>   gcc/testsuite/g++.dg/cpp0x/udlit-error1.C |  21 ++++
>   3 files changed, 113 insertions(+), 44 deletions(-)
>   create mode 100644 gcc/testsuite/g++.dg/cpp0x/udlit-error1.C
> 
> diff --git a/gcc/c-family/c-pragma.cc b/gcc/c-family/c-pragma.cc
> index 91fabf0a513..bba9172e8a1 100644
> --- a/gcc/c-family/c-pragma.cc
> +++ b/gcc/c-family/c-pragma.cc
> @@ -1390,6 +1390,9 @@ handle_pragma_message (cpp_reader *)
>       }
>     else if (token == CPP_STRING)
>       message = x;
> +  else if (token == CPP_STRING_USERDEF)
> +    GCC_BAD ("string literal with user-defined suffix is invalid in this "
> +	     "context");
>     else
>       GCC_BAD ("expected a string after %<#pragma message%>");
>   
> diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc
> index 8b1658decba..4b366d6c64f 100644
> --- a/gcc/cp/parser.cc
> +++ b/gcc/cp/parser.cc
> @@ -2227,16 +2227,8 @@ pop_unparsed_function_queues (cp_parser *parser)
>   
>   /* Lexical conventions [gram.lex]  */
>   
> -static cp_expr cp_parser_identifier
> -  (cp_parser *);
> -static cp_expr cp_parser_string_literal
> -  (cp_parser *, bool, bool, bool);
> -static cp_expr cp_parser_userdef_char_literal
> -  (cp_parser *);
> -static tree cp_parser_userdef_string_literal
> +static tree finish_userdef_string_literal
>     (tree);
> -static cp_expr cp_parser_userdef_numeric_literal
> -  (cp_parser *);
>   
>   /* Basic concepts [gram.basic]  */
>   
> @@ -4408,11 +4400,15 @@ cp_parser_identifier (cp_parser* parser)
>       return error_mark_node;
>   }
>   
> -/* Parse a sequence of adjacent string constants.  Returns a
> +/* Worker for cp_parser_string_literal and cp_parser_userdef_string_literal.
> +   Do not call this directly; use either of the above.
> +
> +   Parse a sequence of adjacent string constants.  Return a
>      TREE_STRING representing the combined, nul-terminated string
>      constant.  If TRANSLATE is true, translate the string to the
>      execution character set.  If WIDE_OK is true, a wide string is
> -   invalid here.
> +   valid here.  If UDL_OK is true, a string literal with user-defined
> +   suffix can be used in this context.
>   
>      C++98 [lex.string] says that if a narrow string literal token is
>      adjacent to a wide string literal token, the behavior is undefined.
> @@ -4422,9 +4418,11 @@ cp_parser_identifier (cp_parser* parser)
>      This code is largely lifted from lex_string() in c-lex.cc.
>   
>      FUTURE: ObjC++ will need to handle @-strings here.  */
> +
>   static cp_expr
> -cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
> -			  bool lookup_udlit = true)
> +cp_parser_string_literal_common (cp_parser *parser, bool translate,
> +				 bool wide_ok, bool udl_ok,
> +				 bool lookup_udlit)
>   {
>     tree value;
>     size_t count;
> @@ -4449,6 +4447,12 @@ cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
>   
>     if (cpp_userdef_string_p (tok->type))
>       {
> +      if (!udl_ok)
> +	{
> +	  error_at (loc, "string literal with user-defined suffix "
> +		    "is invalid in this context");
> +	  return error_mark_node;
> +	}
>         string_tree = USERDEF_LITERAL_VALUE (tok->u.value);
>         curr_type = cpp_userdef_string_remove_type (tok->type);
>         curr_tok_is_userdef_p = true;
> @@ -4539,6 +4543,12 @@ cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
>   	  tok = cp_lexer_peek_token (parser->lexer);
>   	  if (cpp_userdef_string_p (tok->type))
>   	    {
> +	      if (!udl_ok)
> +		{
> +		  error_at (loc, "string literal with user-defined suffix "
> +			    "is invalid in this context");
> +		  return error_mark_node;
> +		}
>   	      string_tree = USERDEF_LITERAL_VALUE (tok->u.value);
>   	      curr_type = cpp_userdef_string_remove_type (tok->type);
>   	      curr_tok_is_userdef_p = true;
> @@ -4608,7 +4618,7 @@ cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
>   	  tree literal = build_userdef_literal (suffix_id, value,
>   						OT_NONE, NULL_TREE);
>   	  if (lookup_udlit)
> -	    value = cp_parser_userdef_string_literal (literal);
> +	    value = finish_userdef_string_literal (literal);
>   	  else
>   	    value = literal;
>   	}
> @@ -4626,6 +4636,37 @@ cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
>     return cp_expr (value, loc);
>   }
>   
> +/* Parse a sequence of adjacent string constants.  Return a TREE_STRING
> +   representing the combined, nul-terminated string constant.  If
> +   TRANSLATE is true, translate the string to the execution character set.
> +   If WIDE_OK is true, a wide string is valid here.
> +
> +   This function issues an error if a user defined string literal is
> +   encountered; use cp_parser_userdef_string_literal if UDLs are allowed.  */
> +
> +static inline cp_expr
> +cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok)
> +{
> +  return cp_parser_string_literal_common (parser, translate, wide_ok,
> +					  /*udl_ok=*/false,
> +					  /*lookup_udlit=*/false);
> +}
> +
> +/* Parse a string literal or user defined string literal.
> +
> +   user-defined-string-literal :
> +     string-literal ud-suffix
> +
> +   If LOOKUP_UDLIT, perform a lookup for a suitable template function.  */
> +
> +static inline cp_expr
> +cp_parser_userdef_string_literal (cp_parser *parser, bool lookup_udlit)
> +{
> +  return cp_parser_string_literal_common (parser, /*translate=*/true,
> +					  /*wide_ok=*/true, /*udl_ok=*/true,
> +					  lookup_udlit);
> +}
> +
>   /* Look up a literal operator with the name and the exact arguments.  */
>   
>   static tree
> @@ -4923,7 +4964,7 @@ cp_parser_userdef_numeric_literal (cp_parser *parser)
>      as arguments.  */
>   
>   static tree
> -cp_parser_userdef_string_literal (tree literal)
> +finish_userdef_string_literal (tree literal)
>   {
>     tree suffix_id = USERDEF_LITERAL_SUFFIX_ID (literal);
>     tree name = cp_literal_operator_id (IDENTIFIER_POINTER (suffix_id));
> @@ -5663,10 +5704,15 @@ cp_parser_primary_expression (cp_parser *parser,
>         /* ??? Should wide strings be allowed when parser->translate_strings_p
>   	 is false (i.e. in attributes)?  If not, we can kill the third
>   	 argument to cp_parser_string_literal.  */
> -      return (cp_parser_string_literal (parser,
> -					parser->translate_strings_p,
> -					true)
> -	      .maybe_add_location_wrapper ());
> +      if (parser->translate_strings_p)
> +	return (cp_parser_userdef_string_literal (parser,
> +						  /*lookup_udlit=*/true)
> +		.maybe_add_location_wrapper ());
> +      else
> +	return (cp_parser_string_literal (parser,
> +					  /*translate=*/false,
> +					  /*wide_ok=*/true)
> +		.maybe_add_location_wrapper ());
>   
>       case CPP_OPEN_PAREN:
>         /* If we see `( { ' then we are looking at the beginning of
> @@ -16222,15 +16268,14 @@ cp_parser_function_specifier_opt (cp_parser* parser,
>   static void
>   cp_parser_linkage_specification (cp_parser* parser, tree prefix_attr)
>   {
> -  tree linkage;
> -
>     /* Look for the `extern' keyword.  */
>     cp_token *extern_token
>       = cp_parser_require_keyword (parser, RID_EXTERN, RT_EXTERN);
>   
>     /* Look for the string-literal.  */
>     cp_token *string_token = cp_lexer_peek_token (parser->lexer);
> -  linkage = cp_parser_string_literal (parser, false, false);
> +  tree linkage = cp_parser_string_literal (parser, /*translate=*/false,
> +					   /*wide_ok=*/false);
>   
>     /* Transform the literal into an identifier.  If the literal is a
>        wide-character string, or contains embedded NULs, then we can't
> @@ -16360,9 +16405,8 @@ cp_parser_static_assert(cp_parser *parser, bool member_p)
>         cp_parser_require (parser, CPP_COMMA, RT_COMMA);
>   
>         /* Parse the string-literal message.  */
> -      message = cp_parser_string_literal (parser,
> -                                	  /*translate=*/false,
> -                                	  /*wide_ok=*/true);
> +      message = cp_parser_string_literal (parser, /*translate=*/false,
> +					  /*wide_ok=*/true);
>   
>         /* A `)' completes the static assertion.  */
>         if (!parens.require_close (parser))
> @@ -17410,7 +17454,6 @@ cp_parser_operator (cp_parser* parser, location_t start_loc)
>       case CPP_STRING16_USERDEF:
>       case CPP_STRING32_USERDEF:
>         {
> -	cp_expr str;
>   	tree string_tree;
>   	int sz, len;
>   
> @@ -17418,8 +17461,8 @@ cp_parser_operator (cp_parser* parser, location_t start_loc)
>   	  maybe_warn_cpp0x (CPP0X_USER_DEFINED_LITERALS);
>   
>   	/* Consume the string.  */
> -	str = cp_parser_string_literal (parser, /*translate=*/true,
> -				      /*wide_ok=*/true, /*lookup_udlit=*/false);
> +	cp_expr str = cp_parser_userdef_string_literal (parser,
> +							/*lookup_udlit=*/false);
>   	if (str == error_mark_node)
>   	  return error_mark_node;
>   	else if (TREE_CODE (str) == USERDEF_LITERAL)
> @@ -22072,7 +22115,6 @@ cp_parser_using_directive (cp_parser* parser)
>   static void
>   cp_parser_asm_definition (cp_parser* parser)
>   {
> -  tree string;
>     tree outputs = NULL_TREE;
>     tree inputs = NULL_TREE;
>     tree clobbers = NULL_TREE;
> @@ -22180,7 +22222,8 @@ cp_parser_asm_definition (cp_parser* parser)
>     if (!cp_parser_require (parser, CPP_OPEN_PAREN, RT_OPEN_PAREN))
>       return;
>     /* Look for the string.  */
> -  string = cp_parser_string_literal (parser, false, false);
> +  tree string = cp_parser_string_literal (parser, /*translate=*/false,
> +					  /*wide_ok=*/false);
>     if (string == error_mark_node)
>       {
>         cp_parser_skip_to_closing_parenthesis (parser, true, false,
> @@ -28655,11 +28698,8 @@ cp_parser_yield_expression (cp_parser* parser)
>   static tree
>   cp_parser_asm_specification_opt (cp_parser* parser)
>   {
> -  cp_token *token;
> -  tree asm_specification;
> -
>     /* Peek at the next token.  */
> -  token = cp_lexer_peek_token (parser->lexer);
> +  cp_token *token = cp_lexer_peek_token (parser->lexer);
>     /* If the next token isn't the `asm' keyword, then there's no
>        asm-specification.  */
>     if (!cp_parser_is_keyword (token, RID_ASM))
> @@ -28672,7 +28712,9 @@ cp_parser_asm_specification_opt (cp_parser* parser)
>     parens.require_open (parser);
>   
>     /* Look for the string-literal.  */
> -  asm_specification = cp_parser_string_literal (parser, false, false);
> +  tree asm_specification = cp_parser_string_literal (parser,
> +						     /*translate=*/false,
> +						     /*wide_ok=*/false);
>   
>     /* Look for the `)'.  */
>     parens.require_close (parser);
> @@ -28705,8 +28747,6 @@ cp_parser_asm_operand_list (cp_parser* parser)
>   
>     while (true)
>       {
> -      tree string_literal;
> -      tree expression;
>         tree name;
>   
>         if (cp_lexer_next_token_is (parser->lexer, CPP_OPEN_SQUARE))
> @@ -28724,13 +28764,15 @@ cp_parser_asm_operand_list (cp_parser* parser)
>         else
>   	name = NULL_TREE;
>         /* Look for the string-literal.  */
> -      string_literal = cp_parser_string_literal (parser, false, false);
> +      tree string_literal = cp_parser_string_literal (parser,
> +						      /*translate=*/false,
> +						      /*wide_ok=*/false);
>   
>         /* Look for the `('.  */
>         matching_parens parens;
>         parens.require_open (parser);
>         /* Parse the expression.  */
> -      expression = cp_parser_expression (parser);
> +      tree expression = cp_parser_expression (parser);
>         /* Look for the `)'.  */
>         parens.require_close (parser);
>   
> @@ -28770,10 +28812,10 @@ cp_parser_asm_clobber_list (cp_parser* parser)
>   
>     while (true)
>       {
> -      tree string_literal;
> -
>         /* Look for the string literal.  */
> -      string_literal = cp_parser_string_literal (parser, false, false);
> +      tree string_literal = cp_parser_string_literal (parser,
> +						      /*translate=*/false,
> +						      /*wide_ok=*/false);
>         /* Add it to the list.  */
>         clobbers = tree_cons (NULL_TREE, string_literal, clobbers);
>         /* If the next token is not a `,', then the list is
> @@ -46345,7 +46387,9 @@ cp_parser_omp_context_selector (cp_parser *parser, tree set, bool has_parms_p)
>   		      cp_lexer_consume_token (parser->lexer);
>   		    }
>   		  else if (cp_lexer_next_token_is (parser->lexer, CPP_STRING))
> -		    value = cp_parser_string_literal (parser, false, false);
> +		    value = cp_parser_string_literal (parser,
> +						      /*translate=*/false,
> +						      /*wide_ok=*/false);
>   		  else
>   		    {
>   		      cp_parser_error (parser, "expected identifier or "
> @@ -49367,7 +49411,8 @@ pragma_lex (tree *value, location_t *loc)
>     if (ret == CPP_PRAGMA_EOL)
>       ret = CPP_EOF;
>     else if (ret == CPP_STRING)
> -    *value = cp_parser_string_literal (the_parser, false, false);
> +    *value = cp_parser_string_literal (the_parser, /*translate=*/false,
> +				       /*wide_ok=*/false);
>     else
>       {
>         if (ret == CPP_KEYWORD)
> diff --git a/gcc/testsuite/g++.dg/cpp0x/udlit-error1.C b/gcc/testsuite/g++.dg/cpp0x/udlit-error1.C
> new file mode 100644
> index 00000000000..66e300e350f
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/cpp0x/udlit-error1.C
> @@ -0,0 +1,21 @@
> +// PR c++/105300
> +// { dg-do compile { target c++11 } }
> +
> +void operator""_x(const char *, decltype(sizeof(0)));
> +
> +#include ""_x		  // { dg-error "include expects" }
> +#line ""_x		  // { dg-error "not a positive integer" }
> +#if __has_include(""_x)	  // { dg-error "requires a header-name" }
> +#endif
> +
> +#pragma message "hi"_x	  // { dg-warning "string literal with user-defined suffix is invalid in this context" }
> +
> +extern "C"_x { void g(); } // { dg-error "before user-defined string literal" }
> +static_assert(true, "foo"_x); // { dg-error "string literal with user-defined suffix is invalid in this context|expected" }
> +
> +[[deprecated("oof"_x)]]
> +void
> +lol () // { dg-error "not a string" }
> +{
> +  asm (""_x); // { dg-error "string literal with user-defined suffix is invalid in this context" }
> +}
> 
> base-commit: 6071e495e5802a8949d2b02df6aa31a5f40f2af9


^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2023-01-25 19:36 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-11-12 16:53 [PATCH] c++: Reject UDLs in certain contexts [PR105300] Marek Polacek
2022-11-15 23:58 ` Jason Merrill
2022-11-16  0:35   ` Marek Polacek
2022-11-16 13:22     ` Jason Merrill
2022-11-17  1:12       ` Marek Polacek
2022-11-18  0:06         ` Jason Merrill
2022-11-18 23:52           ` [PATCH v2] " Marek Polacek
2022-11-19  1:39             ` Jason Merrill
2022-12-02 23:58               ` [PATCH v3] " Marek Polacek
2022-12-03 19:58                 ` Jason Merrill
2023-01-13 23:22                   ` [PATCH v4] " Marek Polacek
2023-01-24 22:49                     ` Marek Polacek
2023-01-25 19:36                     ` Jason Merrill

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).