* [PATCH] c++: Reject UDLs in certain contexts [PR105300]
@ 2022-11-12 16:53 Marek Polacek
2022-11-15 23:58 ` Jason Merrill
0 siblings, 1 reply; 13+ messages in thread
From: Marek Polacek @ 2022-11-12 16:53 UTC (permalink / raw)
To: GCC Patches, Jason Merrill
In this PR, we are crashing because we've encountered a UDL where a
string-literal is expected. This patch makes the parser reject string
and character UDLs in all places where the grammar requires a
string-literal and not a user-defined-string-literal.
Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?
PR c++/105300
gcc/c-family/ChangeLog:
* c-pragma.cc (handle_pragma_message): Warn for CPP_STRING_USERDEF.
gcc/cp/ChangeLog:
* parser.cc (cp_parser_string_literal): Add a bool parameter.
Give an error when UDLs are not permitted.
(cp_parser_primary_expression): Adjust the call to
cp_parser_string_literal.
(cp_parser_linkage_specification): Likewise.
(cp_parser_static_assert): Likewise.
(cp_parser_operator): Likewise.
(cp_parser_asm_definition): Likewise.
(cp_parser_asm_specification_opt): Likewise.
(cp_parser_asm_operand_list): Likewise.
(cp_parser_asm_clobber_list): Likewise.
(cp_parser_omp_context_selector): Likewise.
(pragma_lex): Likewise.
gcc/testsuite/ChangeLog:
* g++.dg/cpp0x/udlit-error1.C: New test.
---
gcc/c-family/c-pragma.cc | 3 +
gcc/cp/parser.cc | 69 ++++++++++++++---------
gcc/testsuite/g++.dg/cpp0x/udlit-error1.C | 21 +++++++
3 files changed, 65 insertions(+), 28 deletions(-)
create mode 100644 gcc/testsuite/g++.dg/cpp0x/udlit-error1.C
diff --git a/gcc/c-family/c-pragma.cc b/gcc/c-family/c-pragma.cc
index 142a46441ac..49f405b605b 100644
--- a/gcc/c-family/c-pragma.cc
+++ b/gcc/c-family/c-pragma.cc
@@ -1390,6 +1390,9 @@ handle_pragma_message (cpp_reader *)
}
else if (token == CPP_STRING)
message = x;
+ else if (token == CPP_STRING_USERDEF)
+ GCC_BAD ("string literal with user-defined suffix is invalid in this "
+ "context");
else
GCC_BAD ("expected a string after %<#pragma message%>");
diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc
index e4021835ed5..ae2798e2a33 100644
--- a/gcc/cp/parser.cc
+++ b/gcc/cp/parser.cc
@@ -2226,7 +2226,7 @@ pop_unparsed_function_queues (cp_parser *parser)
static cp_expr cp_parser_identifier
(cp_parser *);
static cp_expr cp_parser_string_literal
- (cp_parser *, bool, bool, bool);
+ (cp_parser *, bool, bool, bool, bool);
static cp_expr cp_parser_userdef_char_literal
(cp_parser *);
static tree cp_parser_userdef_string_literal
@@ -4402,7 +4402,8 @@ cp_parser_identifier (cp_parser* parser)
TREE_STRING representing the combined, nul-terminated string
constant. If TRANSLATE is true, translate the string to the
execution character set. If WIDE_OK is true, a wide string is
- invalid here.
+ valid here. If UDL_OK is true, a string literal with user-defined
+ suffix can be used in this context.
C++98 [lex.string] says that if a narrow string literal token is
adjacent to a wide string literal token, the behavior is undefined.
@@ -4414,7 +4415,7 @@ cp_parser_identifier (cp_parser* parser)
FUTURE: ObjC++ will need to handle @-strings here. */
static cp_expr
cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
- bool lookup_udlit = true)
+ bool udl_ok, bool lookup_udlit = true)
{
tree value;
size_t count;
@@ -4439,6 +4440,12 @@ cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
if (cpp_userdef_string_p (tok->type))
{
+ if (!udl_ok)
+ {
+ error_at (loc, "string literal with user-defined suffix "
+ "is invalid in this context");
+ return error_mark_node;
+ }
string_tree = USERDEF_LITERAL_VALUE (tok->u.value);
curr_type = cpp_userdef_string_remove_type (tok->type);
curr_tok_is_userdef_p = true;
@@ -5655,7 +5662,7 @@ cp_parser_primary_expression (cp_parser *parser,
argument to cp_parser_string_literal. */
return (cp_parser_string_literal (parser,
parser->translate_strings_p,
- true)
+ /*wide_ok=*/true, /*udl_ok=*/true)
.maybe_add_location_wrapper ());
case CPP_OPEN_PAREN:
@@ -16161,15 +16168,14 @@ cp_parser_function_specifier_opt (cp_parser* parser,
static void
cp_parser_linkage_specification (cp_parser* parser, tree prefix_attr)
{
- tree linkage;
-
/* Look for the `extern' keyword. */
cp_token *extern_token
= cp_parser_require_keyword (parser, RID_EXTERN, RT_EXTERN);
/* Look for the string-literal. */
cp_token *string_token = cp_lexer_peek_token (parser->lexer);
- linkage = cp_parser_string_literal (parser, false, false);
+ tree linkage = cp_parser_string_literal (parser, /*translate=*/false,
+ /*wide_ok=*/false, /*udl_ok=*/false);
/* Transform the literal into an identifier. If the literal is a
wide-character string, or contains embedded NULs, then we can't
@@ -16300,8 +16306,8 @@ cp_parser_static_assert(cp_parser *parser, bool member_p)
/* Parse the string-literal message. */
message = cp_parser_string_literal (parser,
- /*translate=*/false,
- /*wide_ok=*/true);
+ /*translate=*/false, /*wide_ok=*/true,
+ /*udl_ok=*/false);
/* A `)' completes the static assertion. */
if (!parens.require_close (parser))
@@ -17349,7 +17355,6 @@ cp_parser_operator (cp_parser* parser, location_t start_loc)
case CPP_STRING16_USERDEF:
case CPP_STRING32_USERDEF:
{
- cp_expr str;
tree string_tree;
int sz, len;
@@ -17357,8 +17362,10 @@ cp_parser_operator (cp_parser* parser, location_t start_loc)
maybe_warn_cpp0x (CPP0X_USER_DEFINED_LITERALS);
/* Consume the string. */
- str = cp_parser_string_literal (parser, /*translate=*/true,
- /*wide_ok=*/true, /*lookup_udlit=*/false);
+ cp_expr str = cp_parser_string_literal (parser, /*translate=*/true,
+ /*wide_ok=*/true,
+ /*udl_ok=*/true,
+ /*lookup_udlit=*/false);
if (str == error_mark_node)
return error_mark_node;
else if (TREE_CODE (str) == USERDEF_LITERAL)
@@ -21975,7 +21982,6 @@ cp_parser_using_directive (cp_parser* parser)
static void
cp_parser_asm_definition (cp_parser* parser)
{
- tree string;
tree outputs = NULL_TREE;
tree inputs = NULL_TREE;
tree clobbers = NULL_TREE;
@@ -22083,7 +22089,8 @@ cp_parser_asm_definition (cp_parser* parser)
if (!cp_parser_require (parser, CPP_OPEN_PAREN, RT_OPEN_PAREN))
return;
/* Look for the string. */
- string = cp_parser_string_literal (parser, false, false);
+ tree string = cp_parser_string_literal (parser, /*translate=*/false,
+ /*wide_ok=*/false, /*udl_ok=*/false);
if (string == error_mark_node)
{
cp_parser_skip_to_closing_parenthesis (parser, true, false,
@@ -28485,11 +28492,8 @@ cp_parser_yield_expression (cp_parser* parser)
static tree
cp_parser_asm_specification_opt (cp_parser* parser)
{
- cp_token *token;
- tree asm_specification;
-
/* Peek at the next token. */
- token = cp_lexer_peek_token (parser->lexer);
+ cp_token *token = cp_lexer_peek_token (parser->lexer);
/* If the next token isn't the `asm' keyword, then there's no
asm-specification. */
if (!cp_parser_is_keyword (token, RID_ASM))
@@ -28502,7 +28506,10 @@ cp_parser_asm_specification_opt (cp_parser* parser)
parens.require_open (parser);
/* Look for the string-literal. */
- asm_specification = cp_parser_string_literal (parser, false, false);
+ tree asm_specification = cp_parser_string_literal (parser,
+ /*translate=*/false,
+ /*wide_ok=*/false,
+ /*udl_ok=*/false);
/* Look for the `)'. */
parens.require_close (parser);
@@ -28535,8 +28542,6 @@ cp_parser_asm_operand_list (cp_parser* parser)
while (true)
{
- tree string_literal;
- tree expression;
tree name;
if (cp_lexer_next_token_is (parser->lexer, CPP_OPEN_SQUARE))
@@ -28554,13 +28559,16 @@ cp_parser_asm_operand_list (cp_parser* parser)
else
name = NULL_TREE;
/* Look for the string-literal. */
- string_literal = cp_parser_string_literal (parser, false, false);
+ tree string_literal = cp_parser_string_literal (parser,
+ /*translate=*/false,
+ /*wide_ok=*/false,
+ /*udl_ok=*/false);
/* Look for the `('. */
matching_parens parens;
parens.require_open (parser);
/* Parse the expression. */
- expression = cp_parser_expression (parser);
+ tree expression = cp_parser_expression (parser);
/* Look for the `)'. */
parens.require_close (parser);
@@ -28600,10 +28608,11 @@ cp_parser_asm_clobber_list (cp_parser* parser)
while (true)
{
- tree string_literal;
-
/* Look for the string literal. */
- string_literal = cp_parser_string_literal (parser, false, false);
+ tree string_literal = cp_parser_string_literal (parser,
+ /*translate=*/false,
+ /*wide_ok=*/false,
+ /*udl_ok=*/false);
/* Add it to the list. */
clobbers = tree_cons (NULL_TREE, string_literal, clobbers);
/* If the next token is not a `,', then the list is
@@ -45761,7 +45770,10 @@ cp_parser_omp_context_selector (cp_parser *parser, tree set, bool has_parms_p)
cp_lexer_consume_token (parser->lexer);
}
else if (cp_lexer_next_token_is (parser->lexer, CPP_STRING))
- value = cp_parser_string_literal (parser, false, false);
+ value = cp_parser_string_literal (parser,
+ /*translate=*/false,
+ /*wide_ok=*/false,
+ /*udl_ok=*/false);
else
{
cp_parser_error (parser, "expected identifier or "
@@ -48783,7 +48795,8 @@ pragma_lex (tree *value, location_t *loc)
if (ret == CPP_PRAGMA_EOL)
ret = CPP_EOF;
else if (ret == CPP_STRING)
- *value = cp_parser_string_literal (the_parser, false, false);
+ *value = cp_parser_string_literal (the_parser, /*translate=*/false,
+ /*wide_ok=*/false, /*udl_ok=*/false);
else
{
if (ret == CPP_KEYWORD)
diff --git a/gcc/testsuite/g++.dg/cpp0x/udlit-error1.C b/gcc/testsuite/g++.dg/cpp0x/udlit-error1.C
new file mode 100644
index 00000000000..66e300e350f
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/udlit-error1.C
@@ -0,0 +1,21 @@
+// PR c++/105300
+// { dg-do compile { target c++11 } }
+
+void operator""_x(const char *, decltype(sizeof(0)));
+
+#include ""_x // { dg-error "include expects" }
+#line ""_x // { dg-error "not a positive integer" }
+#if __has_include(""_x) // { dg-error "requires a header-name" }
+#endif
+
+#pragma message "hi"_x // { dg-warning "string literal with user-defined suffix is invalid in this context" }
+
+extern "C"_x { void g(); } // { dg-error "before user-defined string literal" }
+static_assert(true, "foo"_x); // { dg-error "string literal with user-defined suffix is invalid in this context|expected" }
+
+[[deprecated("oof"_x)]]
+void
+lol () // { dg-error "not a string" }
+{
+ asm (""_x); // { dg-error "string literal with user-defined suffix is invalid in this context" }
+}
base-commit: f232715d15618e91c90eb210e23de10909590944
--
2.38.1
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] c++: Reject UDLs in certain contexts [PR105300]
2022-11-12 16:53 [PATCH] c++: Reject UDLs in certain contexts [PR105300] Marek Polacek
@ 2022-11-15 23:58 ` Jason Merrill
2022-11-16 0:35 ` Marek Polacek
0 siblings, 1 reply; 13+ messages in thread
From: Jason Merrill @ 2022-11-15 23:58 UTC (permalink / raw)
To: Marek Polacek, GCC Patches
On 11/12/22 06:53, Marek Polacek wrote:
> In this PR, we are crashing because we've encountered a UDL where a
> string-literal is expected. This patch makes the parser reject string
> and character UDLs in all places where the grammar requires a
> string-literal and not a user-defined-string-literal.
>
> Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?
Since the grammar has
user-defined-string-literal :
string-literal ud-suffix
maybe we want to move the UDL handling out to a
cp_parser_udl_string_literal that calls cp_parser_string_literal?
> PR c++/105300
>
> gcc/c-family/ChangeLog:
>
> * c-pragma.cc (handle_pragma_message): Warn for CPP_STRING_USERDEF.
>
> gcc/cp/ChangeLog:
>
> * parser.cc (cp_parser_string_literal): Add a bool parameter.
> Give an error when UDLs are not permitted.
> (cp_parser_primary_expression): Adjust the call to
> cp_parser_string_literal.
> (cp_parser_linkage_specification): Likewise.
> (cp_parser_static_assert): Likewise.
> (cp_parser_operator): Likewise.
> (cp_parser_asm_definition): Likewise.
> (cp_parser_asm_specification_opt): Likewise.
> (cp_parser_asm_operand_list): Likewise.
> (cp_parser_asm_clobber_list): Likewise.
> (cp_parser_omp_context_selector): Likewise.
> (pragma_lex): Likewise.
>
> gcc/testsuite/ChangeLog:
>
> * g++.dg/cpp0x/udlit-error1.C: New test.
> ---
> gcc/c-family/c-pragma.cc | 3 +
> gcc/cp/parser.cc | 69 ++++++++++++++---------
> gcc/testsuite/g++.dg/cpp0x/udlit-error1.C | 21 +++++++
> 3 files changed, 65 insertions(+), 28 deletions(-)
> create mode 100644 gcc/testsuite/g++.dg/cpp0x/udlit-error1.C
>
> diff --git a/gcc/c-family/c-pragma.cc b/gcc/c-family/c-pragma.cc
> index 142a46441ac..49f405b605b 100644
> --- a/gcc/c-family/c-pragma.cc
> +++ b/gcc/c-family/c-pragma.cc
> @@ -1390,6 +1390,9 @@ handle_pragma_message (cpp_reader *)
> }
> else if (token == CPP_STRING)
> message = x;
> + else if (token == CPP_STRING_USERDEF)
> + GCC_BAD ("string literal with user-defined suffix is invalid in this "
> + "context");
> else
> GCC_BAD ("expected a string after %<#pragma message%>");
>
> diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc
> index e4021835ed5..ae2798e2a33 100644
> --- a/gcc/cp/parser.cc
> +++ b/gcc/cp/parser.cc
> @@ -2226,7 +2226,7 @@ pop_unparsed_function_queues (cp_parser *parser)
> static cp_expr cp_parser_identifier
> (cp_parser *);
> static cp_expr cp_parser_string_literal
> - (cp_parser *, bool, bool, bool);
> + (cp_parser *, bool, bool, bool, bool);
> static cp_expr cp_parser_userdef_char_literal
> (cp_parser *);
> static tree cp_parser_userdef_string_literal
> @@ -4402,7 +4402,8 @@ cp_parser_identifier (cp_parser* parser)
> TREE_STRING representing the combined, nul-terminated string
> constant. If TRANSLATE is true, translate the string to the
> execution character set. If WIDE_OK is true, a wide string is
> - invalid here.
> + valid here. If UDL_OK is true, a string literal with user-defined
> + suffix can be used in this context.
>
> C++98 [lex.string] says that if a narrow string literal token is
> adjacent to a wide string literal token, the behavior is undefined.
> @@ -4414,7 +4415,7 @@ cp_parser_identifier (cp_parser* parser)
> FUTURE: ObjC++ will need to handle @-strings here. */
> static cp_expr
> cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
> - bool lookup_udlit = true)
> + bool udl_ok, bool lookup_udlit = true)
> {
> tree value;
> size_t count;
> @@ -4439,6 +4440,12 @@ cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
>
> if (cpp_userdef_string_p (tok->type))
> {
> + if (!udl_ok)
> + {
> + error_at (loc, "string literal with user-defined suffix "
> + "is invalid in this context");
> + return error_mark_node;
> + }
> string_tree = USERDEF_LITERAL_VALUE (tok->u.value);
> curr_type = cpp_userdef_string_remove_type (tok->type);
> curr_tok_is_userdef_p = true;
> @@ -5655,7 +5662,7 @@ cp_parser_primary_expression (cp_parser *parser,
> argument to cp_parser_string_literal. */
> return (cp_parser_string_literal (parser,
> parser->translate_strings_p,
> - true)
> + /*wide_ok=*/true, /*udl_ok=*/true)
> .maybe_add_location_wrapper ());
>
> case CPP_OPEN_PAREN:
> @@ -16161,15 +16168,14 @@ cp_parser_function_specifier_opt (cp_parser* parser,
> static void
> cp_parser_linkage_specification (cp_parser* parser, tree prefix_attr)
> {
> - tree linkage;
> -
> /* Look for the `extern' keyword. */
> cp_token *extern_token
> = cp_parser_require_keyword (parser, RID_EXTERN, RT_EXTERN);
>
> /* Look for the string-literal. */
> cp_token *string_token = cp_lexer_peek_token (parser->lexer);
> - linkage = cp_parser_string_literal (parser, false, false);
> + tree linkage = cp_parser_string_literal (parser, /*translate=*/false,
> + /*wide_ok=*/false, /*udl_ok=*/false);
>
> /* Transform the literal into an identifier. If the literal is a
> wide-character string, or contains embedded NULs, then we can't
> @@ -16300,8 +16306,8 @@ cp_parser_static_assert(cp_parser *parser, bool member_p)
>
> /* Parse the string-literal message. */
> message = cp_parser_string_literal (parser,
> - /*translate=*/false,
> - /*wide_ok=*/true);
> + /*translate=*/false, /*wide_ok=*/true,
> + /*udl_ok=*/false);
>
> /* A `)' completes the static assertion. */
> if (!parens.require_close (parser))
> @@ -17349,7 +17355,6 @@ cp_parser_operator (cp_parser* parser, location_t start_loc)
> case CPP_STRING16_USERDEF:
> case CPP_STRING32_USERDEF:
> {
> - cp_expr str;
> tree string_tree;
> int sz, len;
>
> @@ -17357,8 +17362,10 @@ cp_parser_operator (cp_parser* parser, location_t start_loc)
> maybe_warn_cpp0x (CPP0X_USER_DEFINED_LITERALS);
>
> /* Consume the string. */
> - str = cp_parser_string_literal (parser, /*translate=*/true,
> - /*wide_ok=*/true, /*lookup_udlit=*/false);
> + cp_expr str = cp_parser_string_literal (parser, /*translate=*/true,
> + /*wide_ok=*/true,
> + /*udl_ok=*/true,
> + /*lookup_udlit=*/false);
> if (str == error_mark_node)
> return error_mark_node;
> else if (TREE_CODE (str) == USERDEF_LITERAL)
> @@ -21975,7 +21982,6 @@ cp_parser_using_directive (cp_parser* parser)
> static void
> cp_parser_asm_definition (cp_parser* parser)
> {
> - tree string;
> tree outputs = NULL_TREE;
> tree inputs = NULL_TREE;
> tree clobbers = NULL_TREE;
> @@ -22083,7 +22089,8 @@ cp_parser_asm_definition (cp_parser* parser)
> if (!cp_parser_require (parser, CPP_OPEN_PAREN, RT_OPEN_PAREN))
> return;
> /* Look for the string. */
> - string = cp_parser_string_literal (parser, false, false);
> + tree string = cp_parser_string_literal (parser, /*translate=*/false,
> + /*wide_ok=*/false, /*udl_ok=*/false);
> if (string == error_mark_node)
> {
> cp_parser_skip_to_closing_parenthesis (parser, true, false,
> @@ -28485,11 +28492,8 @@ cp_parser_yield_expression (cp_parser* parser)
> static tree
> cp_parser_asm_specification_opt (cp_parser* parser)
> {
> - cp_token *token;
> - tree asm_specification;
> -
> /* Peek at the next token. */
> - token = cp_lexer_peek_token (parser->lexer);
> + cp_token *token = cp_lexer_peek_token (parser->lexer);
> /* If the next token isn't the `asm' keyword, then there's no
> asm-specification. */
> if (!cp_parser_is_keyword (token, RID_ASM))
> @@ -28502,7 +28506,10 @@ cp_parser_asm_specification_opt (cp_parser* parser)
> parens.require_open (parser);
>
> /* Look for the string-literal. */
> - asm_specification = cp_parser_string_literal (parser, false, false);
> + tree asm_specification = cp_parser_string_literal (parser,
> + /*translate=*/false,
> + /*wide_ok=*/false,
> + /*udl_ok=*/false);
>
> /* Look for the `)'. */
> parens.require_close (parser);
> @@ -28535,8 +28542,6 @@ cp_parser_asm_operand_list (cp_parser* parser)
>
> while (true)
> {
> - tree string_literal;
> - tree expression;
> tree name;
>
> if (cp_lexer_next_token_is (parser->lexer, CPP_OPEN_SQUARE))
> @@ -28554,13 +28559,16 @@ cp_parser_asm_operand_list (cp_parser* parser)
> else
> name = NULL_TREE;
> /* Look for the string-literal. */
> - string_literal = cp_parser_string_literal (parser, false, false);
> + tree string_literal = cp_parser_string_literal (parser,
> + /*translate=*/false,
> + /*wide_ok=*/false,
> + /*udl_ok=*/false);
>
> /* Look for the `('. */
> matching_parens parens;
> parens.require_open (parser);
> /* Parse the expression. */
> - expression = cp_parser_expression (parser);
> + tree expression = cp_parser_expression (parser);
> /* Look for the `)'. */
> parens.require_close (parser);
>
> @@ -28600,10 +28608,11 @@ cp_parser_asm_clobber_list (cp_parser* parser)
>
> while (true)
> {
> - tree string_literal;
> -
> /* Look for the string literal. */
> - string_literal = cp_parser_string_literal (parser, false, false);
> + tree string_literal = cp_parser_string_literal (parser,
> + /*translate=*/false,
> + /*wide_ok=*/false,
> + /*udl_ok=*/false);
> /* Add it to the list. */
> clobbers = tree_cons (NULL_TREE, string_literal, clobbers);
> /* If the next token is not a `,', then the list is
> @@ -45761,7 +45770,10 @@ cp_parser_omp_context_selector (cp_parser *parser, tree set, bool has_parms_p)
> cp_lexer_consume_token (parser->lexer);
> }
> else if (cp_lexer_next_token_is (parser->lexer, CPP_STRING))
> - value = cp_parser_string_literal (parser, false, false);
> + value = cp_parser_string_literal (parser,
> + /*translate=*/false,
> + /*wide_ok=*/false,
> + /*udl_ok=*/false);
> else
> {
> cp_parser_error (parser, "expected identifier or "
> @@ -48783,7 +48795,8 @@ pragma_lex (tree *value, location_t *loc)
> if (ret == CPP_PRAGMA_EOL)
> ret = CPP_EOF;
> else if (ret == CPP_STRING)
> - *value = cp_parser_string_literal (the_parser, false, false);
> + *value = cp_parser_string_literal (the_parser, /*translate=*/false,
> + /*wide_ok=*/false, /*udl_ok=*/false);
> else
> {
> if (ret == CPP_KEYWORD)
> diff --git a/gcc/testsuite/g++.dg/cpp0x/udlit-error1.C b/gcc/testsuite/g++.dg/cpp0x/udlit-error1.C
> new file mode 100644
> index 00000000000..66e300e350f
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/cpp0x/udlit-error1.C
> @@ -0,0 +1,21 @@
> +// PR c++/105300
> +// { dg-do compile { target c++11 } }
> +
> +void operator""_x(const char *, decltype(sizeof(0)));
> +
> +#include ""_x // { dg-error "include expects" }
> +#line ""_x // { dg-error "not a positive integer" }
> +#if __has_include(""_x) // { dg-error "requires a header-name" }
> +#endif
> +
> +#pragma message "hi"_x // { dg-warning "string literal with user-defined suffix is invalid in this context" }
> +
> +extern "C"_x { void g(); } // { dg-error "before user-defined string literal" }
> +static_assert(true, "foo"_x); // { dg-error "string literal with user-defined suffix is invalid in this context|expected" }
> +
> +[[deprecated("oof"_x)]]
> +void
> +lol () // { dg-error "not a string" }
> +{
> + asm (""_x); // { dg-error "string literal with user-defined suffix is invalid in this context" }
> +}
>
> base-commit: f232715d15618e91c90eb210e23de10909590944
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] c++: Reject UDLs in certain contexts [PR105300]
2022-11-15 23:58 ` Jason Merrill
@ 2022-11-16 0:35 ` Marek Polacek
2022-11-16 13:22 ` Jason Merrill
0 siblings, 1 reply; 13+ messages in thread
From: Marek Polacek @ 2022-11-16 0:35 UTC (permalink / raw)
To: Jason Merrill; +Cc: GCC Patches
On Tue, Nov 15, 2022 at 06:58:39PM -0500, Jason Merrill wrote:
> On 11/12/22 06:53, Marek Polacek wrote:
> > In this PR, we are crashing because we've encountered a UDL where a
> > string-literal is expected. This patch makes the parser reject string
> > and character UDLs in all places where the grammar requires a
> > string-literal and not a user-defined-string-literal.
> >
> > Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?
>
> Since the grammar has
>
> user-defined-string-literal :
> string-literal ud-suffix
>
> maybe we want to move the UDL handling out to a cp_parser_udl_string_literal
> that calls cp_parser_string_literal?
Umm, maybe, but the UDL handling code seems to be too entrenched in
cp_parser_string_literal and I don't think it's going to be easy to extract
it :/.
Marek
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] c++: Reject UDLs in certain contexts [PR105300]
2022-11-16 0:35 ` Marek Polacek
@ 2022-11-16 13:22 ` Jason Merrill
2022-11-17 1:12 ` Marek Polacek
0 siblings, 1 reply; 13+ messages in thread
From: Jason Merrill @ 2022-11-16 13:22 UTC (permalink / raw)
To: Marek Polacek; +Cc: GCC Patches
On 11/15/22 19:35, Marek Polacek wrote:
> On Tue, Nov 15, 2022 at 06:58:39PM -0500, Jason Merrill wrote:
>> On 11/12/22 06:53, Marek Polacek wrote:
>>> In this PR, we are crashing because we've encountered a UDL where a
>>> string-literal is expected. This patch makes the parser reject string
>>> and character UDLs in all places where the grammar requires a
>>> string-literal and not a user-defined-string-literal.
>>>
>>> Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?
>>
>> Since the grammar has
>>
>> user-defined-string-literal :
>> string-literal ud-suffix
>>
>> maybe we want to move the UDL handling out to a cp_parser_udl_string_literal
>> that calls cp_parser_string_literal?
>
> Umm, maybe, but the UDL handling code seems to be too entrenched in
> cp_parser_string_literal and I don't think it's going to be easy to extract
> it :/.
Fair enough; maybe a wrapper, then?
Jason
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] c++: Reject UDLs in certain contexts [PR105300]
2022-11-16 13:22 ` Jason Merrill
@ 2022-11-17 1:12 ` Marek Polacek
2022-11-18 0:06 ` Jason Merrill
0 siblings, 1 reply; 13+ messages in thread
From: Marek Polacek @ 2022-11-17 1:12 UTC (permalink / raw)
To: Jason Merrill; +Cc: GCC Patches
On Wed, Nov 16, 2022 at 08:22:39AM -0500, Jason Merrill wrote:
> On 11/15/22 19:35, Marek Polacek wrote:
> > On Tue, Nov 15, 2022 at 06:58:39PM -0500, Jason Merrill wrote:
> > > On 11/12/22 06:53, Marek Polacek wrote:
> > > > In this PR, we are crashing because we've encountered a UDL where a
> > > > string-literal is expected. This patch makes the parser reject string
> > > > and character UDLs in all places where the grammar requires a
> > > > string-literal and not a user-defined-string-literal.
> > > >
> > > > Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?
> > >
> > > Since the grammar has
> > >
> > > user-defined-string-literal :
> > > string-literal ud-suffix
> > >
> > > maybe we want to move the UDL handling out to a cp_parser_udl_string_literal
> > > that calls cp_parser_string_literal?
> >
> > Umm, maybe, but the UDL handling code seems to be too entrenched in
> > cp_parser_string_literal and I don't think it's going to be easy to extract
> > it :/.
>
> Fair enough; maybe a wrapper, then?
As in, have a cp_parser_udl_string_literal wrapper that calls
cp_parser_string_literal with udl_ok=true, rename cp_parser_string_literal,
introduce a new cp_parser_string_literal wrapper that passes udl_ok=false?
One problem with cp_parser_udl_string_literal is that it's too similar to
cp_parser_userdef_string_literal, which would be confusing, I think.
Marek
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] c++: Reject UDLs in certain contexts [PR105300]
2022-11-17 1:12 ` Marek Polacek
@ 2022-11-18 0:06 ` Jason Merrill
2022-11-18 23:52 ` [PATCH v2] " Marek Polacek
0 siblings, 1 reply; 13+ messages in thread
From: Jason Merrill @ 2022-11-18 0:06 UTC (permalink / raw)
To: Marek Polacek; +Cc: GCC Patches
On 11/16/22 20:12, Marek Polacek wrote:
> On Wed, Nov 16, 2022 at 08:22:39AM -0500, Jason Merrill wrote:
>> On 11/15/22 19:35, Marek Polacek wrote:
>>> On Tue, Nov 15, 2022 at 06:58:39PM -0500, Jason Merrill wrote:
>>>> On 11/12/22 06:53, Marek Polacek wrote:
>>>>> In this PR, we are crashing because we've encountered a UDL where a
>>>>> string-literal is expected. This patch makes the parser reject string
>>>>> and character UDLs in all places where the grammar requires a
>>>>> string-literal and not a user-defined-string-literal.
>>>>>
>>>>> Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?
>>>>
>>>> Since the grammar has
>>>>
>>>> user-defined-string-literal :
>>>> string-literal ud-suffix
>>>>
>>>> maybe we want to move the UDL handling out to a cp_parser_udl_string_literal
>>>> that calls cp_parser_string_literal?
>>>
>>> Umm, maybe, but the UDL handling code seems to be too entrenched in
>>> cp_parser_string_literal and I don't think it's going to be easy to extract
>>> it :/.
>>
>> Fair enough; maybe a wrapper, then?
>
> As in, have a cp_parser_udl_string_literal wrapper that calls
> cp_parser_string_literal with udl_ok=true, rename cp_parser_string_literal,
> introduce a new cp_parser_string_literal wrapper that passes udl_ok=false?
That's what I was thinking. And the new cp_parser_string_literal could
also omit the lookup_udlit parm.
> One problem with cp_parser_udl_string_literal is that it's too similar to
> cp_parser_userdef_string_literal, which would be confusing, I think.
True, probably better to use that name instead, and rename the current
one to something like finish_userdef_string_literal
Jason
^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH v2] c++: Reject UDLs in certain contexts [PR105300]
2022-11-18 0:06 ` Jason Merrill
@ 2022-11-18 23:52 ` Marek Polacek
2022-11-19 1:39 ` Jason Merrill
0 siblings, 1 reply; 13+ messages in thread
From: Marek Polacek @ 2022-11-18 23:52 UTC (permalink / raw)
To: Jason Merrill; +Cc: GCC Patches
On Thu, Nov 17, 2022 at 07:06:34PM -0500, Jason Merrill wrote:
> On 11/16/22 20:12, Marek Polacek wrote:
> > On Wed, Nov 16, 2022 at 08:22:39AM -0500, Jason Merrill wrote:
> > > On 11/15/22 19:35, Marek Polacek wrote:
> > > > On Tue, Nov 15, 2022 at 06:58:39PM -0500, Jason Merrill wrote:
> > > > > On 11/12/22 06:53, Marek Polacek wrote:
> > > > > > In this PR, we are crashing because we've encountered a UDL where a
> > > > > > string-literal is expected. This patch makes the parser reject string
> > > > > > and character UDLs in all places where the grammar requires a
> > > > > > string-literal and not a user-defined-string-literal.
> > > > > >
> > > > > > Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?
> > > > >
> > > > > Since the grammar has
> > > > >
> > > > > user-defined-string-literal :
> > > > > string-literal ud-suffix
> > > > >
> > > > > maybe we want to move the UDL handling out to a cp_parser_udl_string_literal
> > > > > that calls cp_parser_string_literal?
> > > >
> > > > Umm, maybe, but the UDL handling code seems to be too entrenched in
> > > > cp_parser_string_literal and I don't think it's going to be easy to extract
> > > > it :/.
> > >
> > > Fair enough; maybe a wrapper, then?
> >
> > As in, have a cp_parser_udl_string_literal wrapper that calls
> > cp_parser_string_literal with udl_ok=true, rename cp_parser_string_literal,
> > introduce a new cp_parser_string_literal wrapper that passes udl_ok=false?
>
> That's what I was thinking. And the new cp_parser_string_literal could also
> omit the lookup_udlit parm.
>
> > One problem with cp_parser_udl_string_literal is that it's too similar to
> > cp_parser_userdef_string_literal, which would be confusing, I think.
>
> True, probably better to use that name instead, and rename the current one
> to something like finish_userdef_string_literal
Sounds good, here's the patch.
Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?
-- >8 --
In this PR, we are crashing because we've encountered a UDL where a
string-literal is expected. This patch makes the parser reject string
and character UDLs in all places where the grammar requires a
string-literal and not a user-defined-string-literal.
I've introduced two new wrappers; the existing cp_parser_string_literal
was renamed to cp_parser_string_literal_common and should not be called
directly. finish_userdef_string_literal is renamed from
cp_parser_userdef_string_literal.
PR c++/105300
gcc/c-family/ChangeLog:
* c-pragma.cc (handle_pragma_message): Warn for CPP_STRING_USERDEF.
gcc/cp/ChangeLog:
* parser.cc: Remove unnecessary forward declarations.
(cp_parser_string_literal): New wrapper.
(cp_parser_string_literal_common): Renamed from
cp_parser_string_literal. Add a bool parameter. Give an error when
UDLs are not permitted.
(cp_parser_userdef_string_literal): New wrapper.
(finish_userdef_string_literal): Renamed from
cp_parser_userdef_string_literal.
(cp_parser_primary_expression): Call cp_parser_userdef_string_literal
instead of cp_parser_string_literal.
(cp_parser_linkage_specification): Move a variable declaration closer
to its first use.
(cp_parser_static_assert): Likewise.
(cp_parser_operator): Call cp_parser_userdef_string_literal instead of
cp_parser_string_literal.
(cp_parser_asm_definition): Move a variable declaration closer to its
first use.
(cp_parser_asm_specification_opt): Move variable declarations closer to
their first use.
(cp_parser_asm_operand_list): Likewise.
(cp_parser_asm_clobber_list): Likewise.
gcc/testsuite/ChangeLog:
* g++.dg/cpp0x/udlit-error1.C: New test.
---
gcc/c-family/c-pragma.cc | 3 +
gcc/cp/parser.cc | 131 ++++++++++++++--------
gcc/testsuite/g++.dg/cpp0x/udlit-error1.C | 21 ++++
3 files changed, 111 insertions(+), 44 deletions(-)
create mode 100644 gcc/testsuite/g++.dg/cpp0x/udlit-error1.C
diff --git a/gcc/c-family/c-pragma.cc b/gcc/c-family/c-pragma.cc
index 142a46441ac..49f405b605b 100644
--- a/gcc/c-family/c-pragma.cc
+++ b/gcc/c-family/c-pragma.cc
@@ -1390,6 +1390,9 @@ handle_pragma_message (cpp_reader *)
}
else if (token == CPP_STRING)
message = x;
+ else if (token == CPP_STRING_USERDEF)
+ GCC_BAD ("string literal with user-defined suffix is invalid in this "
+ "context");
else
GCC_BAD ("expected a string after %<#pragma message%>");
diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc
index c5929a6cc5f..e3bd94ffe11 100644
--- a/gcc/cp/parser.cc
+++ b/gcc/cp/parser.cc
@@ -2223,16 +2223,8 @@ pop_unparsed_function_queues (cp_parser *parser)
/* Lexical conventions [gram.lex] */
-static cp_expr cp_parser_identifier
- (cp_parser *);
-static cp_expr cp_parser_string_literal
- (cp_parser *, bool, bool, bool);
-static cp_expr cp_parser_userdef_char_literal
- (cp_parser *);
-static tree cp_parser_userdef_string_literal
+static tree finish_userdef_string_literal
(tree);
-static cp_expr cp_parser_userdef_numeric_literal
- (cp_parser *);
/* Basic concepts [gram.basic] */
@@ -4398,11 +4390,15 @@ cp_parser_identifier (cp_parser* parser)
return error_mark_node;
}
-/* Parse a sequence of adjacent string constants. Returns a
+/* Worker for cp_parser_string_literal and cp_parser_userdef_string_literal.
+ Do not call this directly; use either of the above.
+
+ Parse a sequence of adjacent string constants. Return a
TREE_STRING representing the combined, nul-terminated string
constant. If TRANSLATE is true, translate the string to the
execution character set. If WIDE_OK is true, a wide string is
- invalid here.
+ valid here. If UDL_OK is true, a string literal with user-defined
+ suffix can be used in this context.
C++98 [lex.string] says that if a narrow string literal token is
adjacent to a wide string literal token, the behavior is undefined.
@@ -4412,9 +4408,11 @@ cp_parser_identifier (cp_parser* parser)
This code is largely lifted from lex_string() in c-lex.cc.
FUTURE: ObjC++ will need to handle @-strings here. */
+
static cp_expr
-cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
- bool lookup_udlit = true)
+cp_parser_string_literal_common (cp_parser *parser, bool translate,
+ bool wide_ok, bool udl_ok,
+ bool lookup_udlit)
{
tree value;
size_t count;
@@ -4439,6 +4437,12 @@ cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
if (cpp_userdef_string_p (tok->type))
{
+ if (!udl_ok)
+ {
+ error_at (loc, "string literal with user-defined suffix "
+ "is invalid in this context");
+ return error_mark_node;
+ }
string_tree = USERDEF_LITERAL_VALUE (tok->u.value);
curr_type = cpp_userdef_string_remove_type (tok->type);
curr_tok_is_userdef_p = true;
@@ -4529,6 +4533,12 @@ cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
tok = cp_lexer_peek_token (parser->lexer);
if (cpp_userdef_string_p (tok->type))
{
+ if (!udl_ok)
+ {
+ error_at (loc, "string literal with user-defined suffix "
+ "is invalid in this context");
+ return error_mark_node;
+ }
string_tree = USERDEF_LITERAL_VALUE (tok->u.value);
curr_type = cpp_userdef_string_remove_type (tok->type);
curr_tok_is_userdef_p = true;
@@ -4598,7 +4608,7 @@ cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
tree literal = build_userdef_literal (suffix_id, value,
OT_NONE, NULL_TREE);
if (lookup_udlit)
- value = cp_parser_userdef_string_literal (literal);
+ value = finish_userdef_string_literal (literal);
else
value = literal;
}
@@ -4616,6 +4626,38 @@ cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
return cp_expr (value, loc);
}
+/* Parse a sequence of adjacent string constants. Return a TREE_STRING
+ representing the combined, nul-terminated string constant. If
+ TRANSLATE is true, translate the string to the execution character set.
+ If WIDE_OK is true, a wide string is valid here.
+
+ This function issues an error if a user defined string literal is
+ encountered; use cp_parser_userdef_string_literal if UDLs are allowed. */
+
+static inline cp_expr
+cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok)
+{
+ return cp_parser_string_literal_common (parser, translate, wide_ok,
+ /*udl_ok=*/false,
+ /*lookup_udlit=*/false);
+}
+
+/* Parse a string literal or user defined string literal.
+
+ user-defined-string-literal :
+ string-literal ud-suffix
+
+ Parameters as for cp_parser_string_literal. If LOOKUP_UDLIT, perform
+ a lookup for a suitable template function. */
+
+static inline cp_expr
+cp_parser_userdef_string_literal (cp_parser *parser, bool translate,
+ bool wide_ok, bool lookup_udlit = true)
+{
+ return cp_parser_string_literal_common (parser, translate, wide_ok,
+ /*udl_ok=*/true, lookup_udlit);
+}
+
/* Look up a literal operator with the name and the exact arguments. */
static tree
@@ -4913,7 +4955,7 @@ cp_parser_userdef_numeric_literal (cp_parser *parser)
as arguments. */
static tree
-cp_parser_userdef_string_literal (tree literal)
+finish_userdef_string_literal (tree literal)
{
tree suffix_id = USERDEF_LITERAL_SUFFIX_ID (literal);
tree name = cp_literal_operator_id (IDENTIFIER_POINTER (suffix_id));
@@ -5652,10 +5694,10 @@ cp_parser_primary_expression (cp_parser *parser,
case CPP_UTF8STRING_USERDEF:
/* ??? Should wide strings be allowed when parser->translate_strings_p
is false (i.e. in attributes)? If not, we can kill the third
- argument to cp_parser_string_literal. */
- return (cp_parser_string_literal (parser,
- parser->translate_strings_p,
- true)
+ argument to cp_parser_{,userdef}string_literal. */
+ return (cp_parser_userdef_string_literal (parser,
+ parser->translate_strings_p,
+ /*wide_ok=*/true)
.maybe_add_location_wrapper ());
case CPP_OPEN_PAREN:
@@ -16161,15 +16203,14 @@ cp_parser_function_specifier_opt (cp_parser* parser,
static void
cp_parser_linkage_specification (cp_parser* parser, tree prefix_attr)
{
- tree linkage;
-
/* Look for the `extern' keyword. */
cp_token *extern_token
= cp_parser_require_keyword (parser, RID_EXTERN, RT_EXTERN);
/* Look for the string-literal. */
cp_token *string_token = cp_lexer_peek_token (parser->lexer);
- linkage = cp_parser_string_literal (parser, false, false);
+ tree linkage = cp_parser_string_literal (parser, /*translate=*/false,
+ /*wide_ok=*/false);
/* Transform the literal into an identifier. If the literal is a
wide-character string, or contains embedded NULs, then we can't
@@ -16299,9 +16340,8 @@ cp_parser_static_assert(cp_parser *parser, bool member_p)
cp_parser_require (parser, CPP_COMMA, RT_COMMA);
/* Parse the string-literal message. */
- message = cp_parser_string_literal (parser,
- /*translate=*/false,
- /*wide_ok=*/true);
+ message = cp_parser_string_literal (parser, /*translate=*/false,
+ /*wide_ok=*/true);
/* A `)' completes the static assertion. */
if (!parens.require_close (parser))
@@ -17349,7 +17389,6 @@ cp_parser_operator (cp_parser* parser, location_t start_loc)
case CPP_STRING16_USERDEF:
case CPP_STRING32_USERDEF:
{
- cp_expr str;
tree string_tree;
int sz, len;
@@ -17357,8 +17396,10 @@ cp_parser_operator (cp_parser* parser, location_t start_loc)
maybe_warn_cpp0x (CPP0X_USER_DEFINED_LITERALS);
/* Consume the string. */
- str = cp_parser_string_literal (parser, /*translate=*/true,
- /*wide_ok=*/true, /*lookup_udlit=*/false);
+ cp_expr str = cp_parser_userdef_string_literal (parser,
+ /*translate=*/true,
+ /*wide_ok=*/true,
+ /*lookup_udlit=*/false);
if (str == error_mark_node)
return error_mark_node;
else if (TREE_CODE (str) == USERDEF_LITERAL)
@@ -21975,7 +22016,6 @@ cp_parser_using_directive (cp_parser* parser)
static void
cp_parser_asm_definition (cp_parser* parser)
{
- tree string;
tree outputs = NULL_TREE;
tree inputs = NULL_TREE;
tree clobbers = NULL_TREE;
@@ -22083,7 +22123,8 @@ cp_parser_asm_definition (cp_parser* parser)
if (!cp_parser_require (parser, CPP_OPEN_PAREN, RT_OPEN_PAREN))
return;
/* Look for the string. */
- string = cp_parser_string_literal (parser, false, false);
+ tree string = cp_parser_string_literal (parser, /*translate=*/false,
+ /*wide_ok=*/false);
if (string == error_mark_node)
{
cp_parser_skip_to_closing_parenthesis (parser, true, false,
@@ -28485,11 +28526,8 @@ cp_parser_yield_expression (cp_parser* parser)
static tree
cp_parser_asm_specification_opt (cp_parser* parser)
{
- cp_token *token;
- tree asm_specification;
-
/* Peek at the next token. */
- token = cp_lexer_peek_token (parser->lexer);
+ cp_token *token = cp_lexer_peek_token (parser->lexer);
/* If the next token isn't the `asm' keyword, then there's no
asm-specification. */
if (!cp_parser_is_keyword (token, RID_ASM))
@@ -28502,7 +28540,9 @@ cp_parser_asm_specification_opt (cp_parser* parser)
parens.require_open (parser);
/* Look for the string-literal. */
- asm_specification = cp_parser_string_literal (parser, false, false);
+ tree asm_specification = cp_parser_string_literal (parser,
+ /*translate=*/false,
+ /*wide_ok=*/false);
/* Look for the `)'. */
parens.require_close (parser);
@@ -28535,8 +28575,6 @@ cp_parser_asm_operand_list (cp_parser* parser)
while (true)
{
- tree string_literal;
- tree expression;
tree name;
if (cp_lexer_next_token_is (parser->lexer, CPP_OPEN_SQUARE))
@@ -28554,13 +28592,15 @@ cp_parser_asm_operand_list (cp_parser* parser)
else
name = NULL_TREE;
/* Look for the string-literal. */
- string_literal = cp_parser_string_literal (parser, false, false);
+ tree string_literal = cp_parser_string_literal (parser,
+ /*translate=*/false,
+ /*wide_ok=*/false);
/* Look for the `('. */
matching_parens parens;
parens.require_open (parser);
/* Parse the expression. */
- expression = cp_parser_expression (parser);
+ tree expression = cp_parser_expression (parser);
/* Look for the `)'. */
parens.require_close (parser);
@@ -28600,10 +28640,10 @@ cp_parser_asm_clobber_list (cp_parser* parser)
while (true)
{
- tree string_literal;
-
/* Look for the string literal. */
- string_literal = cp_parser_string_literal (parser, false, false);
+ tree string_literal = cp_parser_string_literal (parser,
+ /*translate=*/false,
+ /*wide_ok=*/false);
/* Add it to the list. */
clobbers = tree_cons (NULL_TREE, string_literal, clobbers);
/* If the next token is not a `,', then the list is
@@ -45763,7 +45803,9 @@ cp_parser_omp_context_selector (cp_parser *parser, tree set, bool has_parms_p)
cp_lexer_consume_token (parser->lexer);
}
else if (cp_lexer_next_token_is (parser->lexer, CPP_STRING))
- value = cp_parser_string_literal (parser, false, false);
+ value = cp_parser_string_literal (parser,
+ /*translate=*/false,
+ /*wide_ok=*/false);
else
{
cp_parser_error (parser, "expected identifier or "
@@ -48785,7 +48827,8 @@ pragma_lex (tree *value, location_t *loc)
if (ret == CPP_PRAGMA_EOL)
ret = CPP_EOF;
else if (ret == CPP_STRING)
- *value = cp_parser_string_literal (the_parser, false, false);
+ *value = cp_parser_string_literal (the_parser, /*translate=*/false,
+ /*wide_ok=*/false);
else
{
if (ret == CPP_KEYWORD)
diff --git a/gcc/testsuite/g++.dg/cpp0x/udlit-error1.C b/gcc/testsuite/g++.dg/cpp0x/udlit-error1.C
new file mode 100644
index 00000000000..66e300e350f
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/udlit-error1.C
@@ -0,0 +1,21 @@
+// PR c++/105300
+// { dg-do compile { target c++11 } }
+
+void operator""_x(const char *, decltype(sizeof(0)));
+
+#include ""_x // { dg-error "include expects" }
+#line ""_x // { dg-error "not a positive integer" }
+#if __has_include(""_x) // { dg-error "requires a header-name" }
+#endif
+
+#pragma message "hi"_x // { dg-warning "string literal with user-defined suffix is invalid in this context" }
+
+extern "C"_x { void g(); } // { dg-error "before user-defined string literal" }
+static_assert(true, "foo"_x); // { dg-error "string literal with user-defined suffix is invalid in this context|expected" }
+
+[[deprecated("oof"_x)]]
+void
+lol () // { dg-error "not a string" }
+{
+ asm (""_x); // { dg-error "string literal with user-defined suffix is invalid in this context" }
+}
base-commit: 1c4a7881c492790eaad9aec1bcc2c1370f41740f
--
2.38.1
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v2] c++: Reject UDLs in certain contexts [PR105300]
2022-11-18 23:52 ` [PATCH v2] " Marek Polacek
@ 2022-11-19 1:39 ` Jason Merrill
2022-12-02 23:58 ` [PATCH v3] " Marek Polacek
0 siblings, 1 reply; 13+ messages in thread
From: Jason Merrill @ 2022-11-19 1:39 UTC (permalink / raw)
To: Marek Polacek; +Cc: GCC Patches
On 11/18/22 18:52, Marek Polacek wrote:
> On Thu, Nov 17, 2022 at 07:06:34PM -0500, Jason Merrill wrote:
>> On 11/16/22 20:12, Marek Polacek wrote:
>>> On Wed, Nov 16, 2022 at 08:22:39AM -0500, Jason Merrill wrote:
>>>> On 11/15/22 19:35, Marek Polacek wrote:
>>>>> On Tue, Nov 15, 2022 at 06:58:39PM -0500, Jason Merrill wrote:
>>>>>> On 11/12/22 06:53, Marek Polacek wrote:
>>>>>>> In this PR, we are crashing because we've encountered a UDL where a
>>>>>>> string-literal is expected. This patch makes the parser reject string
>>>>>>> and character UDLs in all places where the grammar requires a
>>>>>>> string-literal and not a user-defined-string-literal.
>>>>>>>
>>>>>>> Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?
>>>>>>
>>>>>> Since the grammar has
>>>>>>
>>>>>> user-defined-string-literal :
>>>>>> string-literal ud-suffix
>>>>>>
>>>>>> maybe we want to move the UDL handling out to a cp_parser_udl_string_literal
>>>>>> that calls cp_parser_string_literal?
>>>>>
>>>>> Umm, maybe, but the UDL handling code seems to be too entrenched in
>>>>> cp_parser_string_literal and I don't think it's going to be easy to extract
>>>>> it :/.
>>>>
>>>> Fair enough; maybe a wrapper, then?
>>>
>>> As in, have a cp_parser_udl_string_literal wrapper that calls
>>> cp_parser_string_literal with udl_ok=true, rename cp_parser_string_literal,
>>> introduce a new cp_parser_string_literal wrapper that passes udl_ok=false?
>>
>> That's what I was thinking. And the new cp_parser_string_literal could also
>> omit the lookup_udlit parm.
>>
>>> One problem with cp_parser_udl_string_literal is that it's too similar to
>>> cp_parser_userdef_string_literal, which would be confusing, I think.
>>
>> True, probably better to use that name instead, and rename the current one
>> to something like finish_userdef_string_literal
>
> Sounds good, here's the patch.
>
> Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?
>
> -- >8 --
> In this PR, we are crashing because we've encountered a UDL where a
> string-literal is expected. This patch makes the parser reject string
> and character UDLs in all places where the grammar requires a
> string-literal and not a user-defined-string-literal.
>
> I've introduced two new wrappers; the existing cp_parser_string_literal
> was renamed to cp_parser_string_literal_common and should not be called
> directly. finish_userdef_string_literal is renamed from
> cp_parser_userdef_string_literal.
>
> PR c++/105300
>
> gcc/c-family/ChangeLog:
>
> * c-pragma.cc (handle_pragma_message): Warn for CPP_STRING_USERDEF.
>
> gcc/cp/ChangeLog:
>
> * parser.cc: Remove unnecessary forward declarations.
> (cp_parser_string_literal): New wrapper.
> (cp_parser_string_literal_common): Renamed from
> cp_parser_string_literal. Add a bool parameter. Give an error when
> UDLs are not permitted.
> (cp_parser_userdef_string_literal): New wrapper.
> (finish_userdef_string_literal): Renamed from
> cp_parser_userdef_string_literal.
> (cp_parser_primary_expression): Call cp_parser_userdef_string_literal
> instead of cp_parser_string_literal.
> (cp_parser_linkage_specification): Move a variable declaration closer
> to its first use.
> (cp_parser_static_assert): Likewise.
> (cp_parser_operator): Call cp_parser_userdef_string_literal instead of
> cp_parser_string_literal.
> (cp_parser_asm_definition): Move a variable declaration closer to its
> first use.
> (cp_parser_asm_specification_opt): Move variable declarations closer to
> their first use.
> (cp_parser_asm_operand_list): Likewise.
> (cp_parser_asm_clobber_list): Likewise.
>
> gcc/testsuite/ChangeLog:
>
> * g++.dg/cpp0x/udlit-error1.C: New test.
> ---
> gcc/c-family/c-pragma.cc | 3 +
> gcc/cp/parser.cc | 131 ++++++++++++++--------
> gcc/testsuite/g++.dg/cpp0x/udlit-error1.C | 21 ++++
> 3 files changed, 111 insertions(+), 44 deletions(-)
> create mode 100644 gcc/testsuite/g++.dg/cpp0x/udlit-error1.C
>
> diff --git a/gcc/c-family/c-pragma.cc b/gcc/c-family/c-pragma.cc
> index 142a46441ac..49f405b605b 100644
> --- a/gcc/c-family/c-pragma.cc
> +++ b/gcc/c-family/c-pragma.cc
> @@ -1390,6 +1390,9 @@ handle_pragma_message (cpp_reader *)
> }
> else if (token == CPP_STRING)
> message = x;
> + else if (token == CPP_STRING_USERDEF)
> + GCC_BAD ("string literal with user-defined suffix is invalid in this "
> + "context");
> else
> GCC_BAD ("expected a string after %<#pragma message%>");
>
> diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc
> index c5929a6cc5f..e3bd94ffe11 100644
> --- a/gcc/cp/parser.cc
> +++ b/gcc/cp/parser.cc
> @@ -2223,16 +2223,8 @@ pop_unparsed_function_queues (cp_parser *parser)
>
> /* Lexical conventions [gram.lex] */
>
> -static cp_expr cp_parser_identifier
> - (cp_parser *);
> -static cp_expr cp_parser_string_literal
> - (cp_parser *, bool, bool, bool);
> -static cp_expr cp_parser_userdef_char_literal
> - (cp_parser *);
> -static tree cp_parser_userdef_string_literal
> +static tree finish_userdef_string_literal
> (tree);
> -static cp_expr cp_parser_userdef_numeric_literal
> - (cp_parser *);
>
> /* Basic concepts [gram.basic] */
>
> @@ -4398,11 +4390,15 @@ cp_parser_identifier (cp_parser* parser)
> return error_mark_node;
> }
>
> -/* Parse a sequence of adjacent string constants. Returns a
> +/* Worker for cp_parser_string_literal and cp_parser_userdef_string_literal.
> + Do not call this directly; use either of the above.
> +
> + Parse a sequence of adjacent string constants. Return a
> TREE_STRING representing the combined, nul-terminated string
> constant. If TRANSLATE is true, translate the string to the
> execution character set. If WIDE_OK is true, a wide string is
> - invalid here.
> + valid here. If UDL_OK is true, a string literal with user-defined
> + suffix can be used in this context.
>
> C++98 [lex.string] says that if a narrow string literal token is
> adjacent to a wide string literal token, the behavior is undefined.
> @@ -4412,9 +4408,11 @@ cp_parser_identifier (cp_parser* parser)
> This code is largely lifted from lex_string() in c-lex.cc.
>
> FUTURE: ObjC++ will need to handle @-strings here. */
> +
> static cp_expr
> -cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
> - bool lookup_udlit = true)
> +cp_parser_string_literal_common (cp_parser *parser, bool translate,
> + bool wide_ok, bool udl_ok,
> + bool lookup_udlit)
> {
> tree value;
> size_t count;
> @@ -4439,6 +4437,12 @@ cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
>
> if (cpp_userdef_string_p (tok->type))
> {
> + if (!udl_ok)
> + {
> + error_at (loc, "string literal with user-defined suffix "
> + "is invalid in this context");
> + return error_mark_node;
> + }
> string_tree = USERDEF_LITERAL_VALUE (tok->u.value);
> curr_type = cpp_userdef_string_remove_type (tok->type);
> curr_tok_is_userdef_p = true;
> @@ -4529,6 +4533,12 @@ cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
> tok = cp_lexer_peek_token (parser->lexer);
> if (cpp_userdef_string_p (tok->type))
> {
> + if (!udl_ok)
> + {
> + error_at (loc, "string literal with user-defined suffix "
> + "is invalid in this context");
> + return error_mark_node;
> + }
> string_tree = USERDEF_LITERAL_VALUE (tok->u.value);
> curr_type = cpp_userdef_string_remove_type (tok->type);
> curr_tok_is_userdef_p = true;
> @@ -4598,7 +4608,7 @@ cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
> tree literal = build_userdef_literal (suffix_id, value,
> OT_NONE, NULL_TREE);
> if (lookup_udlit)
> - value = cp_parser_userdef_string_literal (literal);
> + value = finish_userdef_string_literal (literal);
> else
> value = literal;
> }
> @@ -4616,6 +4626,38 @@ cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
> return cp_expr (value, loc);
> }
>
> +/* Parse a sequence of adjacent string constants. Return a TREE_STRING
> + representing the combined, nul-terminated string constant. If
> + TRANSLATE is true, translate the string to the execution character set.
> + If WIDE_OK is true, a wide string is valid here.
> +
> + This function issues an error if a user defined string literal is
> + encountered; use cp_parser_userdef_string_literal if UDLs are allowed. */
> +
> +static inline cp_expr
> +cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok)
> +{
> + return cp_parser_string_literal_common (parser, translate, wide_ok,
> + /*udl_ok=*/false,
> + /*lookup_udlit=*/false);
> +}
> +
> +/* Parse a string literal or user defined string literal.
> +
> + user-defined-string-literal :
> + string-literal ud-suffix
> +
> + Parameters as for cp_parser_string_literal. If LOOKUP_UDLIT, perform
> + a lookup for a suitable template function. */
> +
> +static inline cp_expr
> +cp_parser_userdef_string_literal (cp_parser *parser, bool translate,
> + bool wide_ok, bool lookup_udlit = true)
I think this function doesn't need the translate and wide_ok parms, they
can always be true.
> +{
> + return cp_parser_string_literal_common (parser, translate, wide_ok,
> + /*udl_ok=*/true, lookup_udlit);
> +}
> +
> /* Look up a literal operator with the name and the exact arguments. */
>
> static tree
> @@ -4913,7 +4955,7 @@ cp_parser_userdef_numeric_literal (cp_parser *parser)
> as arguments. */
>
> static tree
> -cp_parser_userdef_string_literal (tree literal)
> +finish_userdef_string_literal (tree literal)
> {
> tree suffix_id = USERDEF_LITERAL_SUFFIX_ID (literal);
> tree name = cp_literal_operator_id (IDENTIFIER_POINTER (suffix_id));
> @@ -5652,10 +5694,10 @@ cp_parser_primary_expression (cp_parser *parser,
> case CPP_UTF8STRING_USERDEF:
> /* ??? Should wide strings be allowed when parser->translate_strings_p
> is false (i.e. in attributes)? If not, we can kill the third
> - argument to cp_parser_string_literal. */
I think the answer to this old question is no: if we have an
encoding-prefix, we should be translating.
> - return (cp_parser_string_literal (parser,
> - parser->translate_strings_p,
> - true)
> + argument to cp_parser_{,userdef}string_literal. */
> + return (cp_parser_userdef_string_literal (parser,
> + parser->translate_strings_p,
> + /*wide_ok=*/true)
For CPP_*STRING* without _USERDEF, we should still call
cp_parser_string_literal.
> .maybe_add_location_wrapper ());
>
> case CPP_OPEN_PAREN:
> @@ -16161,15 +16203,14 @@ cp_parser_function_specifier_opt (cp_parser* parser,
> static void
> cp_parser_linkage_specification (cp_parser* parser, tree prefix_attr)
> {
> - tree linkage;
> -
> /* Look for the `extern' keyword. */
> cp_token *extern_token
> = cp_parser_require_keyword (parser, RID_EXTERN, RT_EXTERN);
>
> /* Look for the string-literal. */
> cp_token *string_token = cp_lexer_peek_token (parser->lexer);
> - linkage = cp_parser_string_literal (parser, false, false);
> + tree linkage = cp_parser_string_literal (parser, /*translate=*/false,
> + /*wide_ok=*/false);
>
> /* Transform the literal into an identifier. If the literal is a
> wide-character string, or contains embedded NULs, then we can't
> @@ -16299,9 +16340,8 @@ cp_parser_static_assert(cp_parser *parser, bool member_p)
> cp_parser_require (parser, CPP_COMMA, RT_COMMA);
>
> /* Parse the string-literal message. */
> - message = cp_parser_string_literal (parser,
> - /*translate=*/false,
> - /*wide_ok=*/true);
> + message = cp_parser_string_literal (parser, /*translate=*/false,
> + /*wide_ok=*/true);
>
> /* A `)' completes the static assertion. */
> if (!parens.require_close (parser))
> @@ -17349,7 +17389,6 @@ cp_parser_operator (cp_parser* parser, location_t start_loc)
> case CPP_STRING16_USERDEF:
> case CPP_STRING32_USERDEF:
> {
> - cp_expr str;
> tree string_tree;
> int sz, len;
>
> @@ -17357,8 +17396,10 @@ cp_parser_operator (cp_parser* parser, location_t start_loc)
> maybe_warn_cpp0x (CPP0X_USER_DEFINED_LITERALS);
>
> /* Consume the string. */
> - str = cp_parser_string_literal (parser, /*translate=*/true,
> - /*wide_ok=*/true, /*lookup_udlit=*/false);
> + cp_expr str = cp_parser_userdef_string_literal (parser,
> + /*translate=*/true,
> + /*wide_ok=*/true,
> + /*lookup_udlit=*/false);
> if (str == error_mark_node)
> return error_mark_node;
> else if (TREE_CODE (str) == USERDEF_LITERAL)
> @@ -21975,7 +22016,6 @@ cp_parser_using_directive (cp_parser* parser)
> static void
> cp_parser_asm_definition (cp_parser* parser)
> {
> - tree string;
> tree outputs = NULL_TREE;
> tree inputs = NULL_TREE;
> tree clobbers = NULL_TREE;
> @@ -22083,7 +22123,8 @@ cp_parser_asm_definition (cp_parser* parser)
> if (!cp_parser_require (parser, CPP_OPEN_PAREN, RT_OPEN_PAREN))
> return;
> /* Look for the string. */
> - string = cp_parser_string_literal (parser, false, false);
> + tree string = cp_parser_string_literal (parser, /*translate=*/false,
> + /*wide_ok=*/false);
> if (string == error_mark_node)
> {
> cp_parser_skip_to_closing_parenthesis (parser, true, false,
> @@ -28485,11 +28526,8 @@ cp_parser_yield_expression (cp_parser* parser)
> static tree
> cp_parser_asm_specification_opt (cp_parser* parser)
> {
> - cp_token *token;
> - tree asm_specification;
> -
> /* Peek at the next token. */
> - token = cp_lexer_peek_token (parser->lexer);
> + cp_token *token = cp_lexer_peek_token (parser->lexer);
> /* If the next token isn't the `asm' keyword, then there's no
> asm-specification. */
> if (!cp_parser_is_keyword (token, RID_ASM))
> @@ -28502,7 +28540,9 @@ cp_parser_asm_specification_opt (cp_parser* parser)
> parens.require_open (parser);
>
> /* Look for the string-literal. */
> - asm_specification = cp_parser_string_literal (parser, false, false);
> + tree asm_specification = cp_parser_string_literal (parser,
> + /*translate=*/false,
> + /*wide_ok=*/false);
>
> /* Look for the `)'. */
> parens.require_close (parser);
> @@ -28535,8 +28575,6 @@ cp_parser_asm_operand_list (cp_parser* parser)
>
> while (true)
> {
> - tree string_literal;
> - tree expression;
> tree name;
>
> if (cp_lexer_next_token_is (parser->lexer, CPP_OPEN_SQUARE))
> @@ -28554,13 +28592,15 @@ cp_parser_asm_operand_list (cp_parser* parser)
> else
> name = NULL_TREE;
> /* Look for the string-literal. */
> - string_literal = cp_parser_string_literal (parser, false, false);
> + tree string_literal = cp_parser_string_literal (parser,
> + /*translate=*/false,
> + /*wide_ok=*/false);
>
> /* Look for the `('. */
> matching_parens parens;
> parens.require_open (parser);
> /* Parse the expression. */
> - expression = cp_parser_expression (parser);
> + tree expression = cp_parser_expression (parser);
> /* Look for the `)'. */
> parens.require_close (parser);
>
> @@ -28600,10 +28640,10 @@ cp_parser_asm_clobber_list (cp_parser* parser)
>
> while (true)
> {
> - tree string_literal;
> -
> /* Look for the string literal. */
> - string_literal = cp_parser_string_literal (parser, false, false);
> + tree string_literal = cp_parser_string_literal (parser,
> + /*translate=*/false,
> + /*wide_ok=*/false);
> /* Add it to the list. */
> clobbers = tree_cons (NULL_TREE, string_literal, clobbers);
> /* If the next token is not a `,', then the list is
> @@ -45763,7 +45803,9 @@ cp_parser_omp_context_selector (cp_parser *parser, tree set, bool has_parms_p)
> cp_lexer_consume_token (parser->lexer);
> }
> else if (cp_lexer_next_token_is (parser->lexer, CPP_STRING))
> - value = cp_parser_string_literal (parser, false, false);
> + value = cp_parser_string_literal (parser,
> + /*translate=*/false,
> + /*wide_ok=*/false);
> else
> {
> cp_parser_error (parser, "expected identifier or "
> @@ -48785,7 +48827,8 @@ pragma_lex (tree *value, location_t *loc)
> if (ret == CPP_PRAGMA_EOL)
> ret = CPP_EOF;
> else if (ret == CPP_STRING)
> - *value = cp_parser_string_literal (the_parser, false, false);
> + *value = cp_parser_string_literal (the_parser, /*translate=*/false,
> + /*wide_ok=*/false);
> else
> {
> if (ret == CPP_KEYWORD)
> diff --git a/gcc/testsuite/g++.dg/cpp0x/udlit-error1.C b/gcc/testsuite/g++.dg/cpp0x/udlit-error1.C
> new file mode 100644
> index 00000000000..66e300e350f
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/cpp0x/udlit-error1.C
> @@ -0,0 +1,21 @@
> +// PR c++/105300
> +// { dg-do compile { target c++11 } }
> +
> +void operator""_x(const char *, decltype(sizeof(0)));
> +
> +#include ""_x // { dg-error "include expects" }
> +#line ""_x // { dg-error "not a positive integer" }
> +#if __has_include(""_x) // { dg-error "requires a header-name" }
> +#endif
> +
> +#pragma message "hi"_x // { dg-warning "string literal with user-defined suffix is invalid in this context" }
> +
> +extern "C"_x { void g(); } // { dg-error "before user-defined string literal" }
> +static_assert(true, "foo"_x); // { dg-error "string literal with user-defined suffix is invalid in this context|expected" }
> +
> +[[deprecated("oof"_x)]]
> +void
> +lol () // { dg-error "not a string" }
> +{
> + asm (""_x); // { dg-error "string literal with user-defined suffix is invalid in this context" }
> +}
>
> base-commit: 1c4a7881c492790eaad9aec1bcc2c1370f41740f
^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH v3] c++: Reject UDLs in certain contexts [PR105300]
2022-11-19 1:39 ` Jason Merrill
@ 2022-12-02 23:58 ` Marek Polacek
2022-12-03 19:58 ` Jason Merrill
0 siblings, 1 reply; 13+ messages in thread
From: Marek Polacek @ 2022-12-02 23:58 UTC (permalink / raw)
To: Jason Merrill; +Cc: GCC Patches
On Fri, Nov 18, 2022 at 08:39:10PM -0500, Jason Merrill wrote:
> On 11/18/22 18:52, Marek Polacek wrote:
> > +/* Parse a string literal or user defined string literal.
> > +
> > + user-defined-string-literal :
> > + string-literal ud-suffix
> > +
> > + Parameters as for cp_parser_string_literal. If LOOKUP_UDLIT, perform
> > + a lookup for a suitable template function. */
> > +
> > +static inline cp_expr
> > +cp_parser_userdef_string_literal (cp_parser *parser, bool translate,
> > + bool wide_ok, bool lookup_udlit = true)
>
> I think this function doesn't need the translate and wide_ok parms, they can
> always be true.
I've dropped the wide_ok one, but not the other, because...
> > +{
> > + return cp_parser_string_literal_common (parser, translate, wide_ok,
> > + /*udl_ok=*/true, lookup_udlit);
> > +}
> > +
> > /* Look up a literal operator with the name and the exact arguments. */
> > static tree
> > @@ -4913,7 +4955,7 @@ cp_parser_userdef_numeric_literal (cp_parser *parser)
> > as arguments. */
> > static tree
> > -cp_parser_userdef_string_literal (tree literal)
> > +finish_userdef_string_literal (tree literal)
> > {
> > tree suffix_id = USERDEF_LITERAL_SUFFIX_ID (literal);
> > tree name = cp_literal_operator_id (IDENTIFIER_POINTER (suffix_id));
> > @@ -5652,10 +5694,10 @@ cp_parser_primary_expression (cp_parser *parser,
> > case CPP_UTF8STRING_USERDEF:
> > /* ??? Should wide strings be allowed when parser->translate_strings_p
> > is false (i.e. in attributes)? If not, we can kill the third
> > - argument to cp_parser_string_literal. */
>
> I think the answer to this old question is no: if we have an
> encoding-prefix, we should be translating.
...I don't actually know how to resolve this. wide_ok is always true here.
Should that change? Or rather, should translate be false for CPP_STRING only?
> > - return (cp_parser_string_literal (parser,
> > - parser->translate_strings_p,
> > - true)
> > + argument to cp_parser_{,userdef}string_literal. */
> > + return (cp_parser_userdef_string_literal (parser,
> > + parser->translate_strings_p,
> > + /*wide_ok=*/true)
>
> For CPP_*STRING* without _USERDEF, we should still call
> cp_parser_string_literal.
It looks like we always have to call cp_parser_userdef_string_literal
otherwise this would be reejcted:
std::string concat01 = "Hello, " "World!"_www;
Because first we see a CPP_STRING but the subsequent UDL shouldn't
be rejected.
So here's an updated version which drops the always-true parameter but
doesn't resolve the old question.
Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?
-- >8 --
In this PR, we are crashing because we've encountered a UDL where a
string-literal is expected. This patch makes the parser reject string
and character UDLs in all places where the grammar requires a
string-literal and not a user-defined-string-literal.
I've introduced two new wrappers; the existing cp_parser_string_literal
was renamed to cp_parser_string_literal_common and should not be called
directly. finish_userdef_string_literal is renamed from
cp_parser_userdef_string_literal.
PR c++/105300
gcc/c-family/ChangeLog:
* c-pragma.cc (handle_pragma_message): Warn for CPP_STRING_USERDEF.
gcc/cp/ChangeLog:
* parser.cc: Remove unnecessary forward declarations.
(cp_parser_string_literal): New wrapper.
(cp_parser_string_literal_common): Renamed from
cp_parser_string_literal. Add a bool parameter. Give an error when
UDLs are not permitted.
(cp_parser_userdef_string_literal): New wrapper.
(finish_userdef_string_literal): Renamed from
cp_parser_userdef_string_literal.
(cp_parser_primary_expression): Call cp_parser_userdef_string_literal
instead of cp_parser_string_literal.
(cp_parser_linkage_specification): Move a variable declaration closer
to its first use.
(cp_parser_static_assert): Likewise.
(cp_parser_operator): Call cp_parser_userdef_string_literal instead of
cp_parser_string_literal.
(cp_parser_asm_definition): Move a variable declaration closer to its
first use.
(cp_parser_asm_specification_opt): Move variable declarations closer to
their first use.
(cp_parser_asm_operand_list): Likewise.
(cp_parser_asm_clobber_list): Likewise.
gcc/testsuite/ChangeLog:
* g++.dg/cpp0x/udlit-error1.C: New test.
---
gcc/c-family/c-pragma.cc | 3 +
gcc/cp/parser.cc | 128 ++++++++++++++--------
gcc/testsuite/g++.dg/cpp0x/udlit-error1.C | 21 ++++
3 files changed, 109 insertions(+), 43 deletions(-)
create mode 100644 gcc/testsuite/g++.dg/cpp0x/udlit-error1.C
diff --git a/gcc/c-family/c-pragma.cc b/gcc/c-family/c-pragma.cc
index 142a46441ac..49f405b605b 100644
--- a/gcc/c-family/c-pragma.cc
+++ b/gcc/c-family/c-pragma.cc
@@ -1390,6 +1390,9 @@ handle_pragma_message (cpp_reader *)
}
else if (token == CPP_STRING)
message = x;
+ else if (token == CPP_STRING_USERDEF)
+ GCC_BAD ("string literal with user-defined suffix is invalid in this "
+ "context");
else
GCC_BAD ("expected a string after %<#pragma message%>");
diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc
index e8a50904243..de3eff90871 100644
--- a/gcc/cp/parser.cc
+++ b/gcc/cp/parser.cc
@@ -2227,16 +2227,8 @@ pop_unparsed_function_queues (cp_parser *parser)
/* Lexical conventions [gram.lex] */
-static cp_expr cp_parser_identifier
- (cp_parser *);
-static cp_expr cp_parser_string_literal
- (cp_parser *, bool, bool, bool);
-static cp_expr cp_parser_userdef_char_literal
- (cp_parser *);
-static tree cp_parser_userdef_string_literal
+static tree finish_userdef_string_literal
(tree);
-static cp_expr cp_parser_userdef_numeric_literal
- (cp_parser *);
/* Basic concepts [gram.basic] */
@@ -4408,11 +4400,15 @@ cp_parser_identifier (cp_parser* parser)
return error_mark_node;
}
-/* Parse a sequence of adjacent string constants. Returns a
+/* Worker for cp_parser_string_literal and cp_parser_userdef_string_literal.
+ Do not call this directly; use either of the above.
+
+ Parse a sequence of adjacent string constants. Return a
TREE_STRING representing the combined, nul-terminated string
constant. If TRANSLATE is true, translate the string to the
execution character set. If WIDE_OK is true, a wide string is
- invalid here.
+ valid here. If UDL_OK is true, a string literal with user-defined
+ suffix can be used in this context.
C++98 [lex.string] says that if a narrow string literal token is
adjacent to a wide string literal token, the behavior is undefined.
@@ -4422,9 +4418,11 @@ cp_parser_identifier (cp_parser* parser)
This code is largely lifted from lex_string() in c-lex.cc.
FUTURE: ObjC++ will need to handle @-strings here. */
+
static cp_expr
-cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
- bool lookup_udlit = true)
+cp_parser_string_literal_common (cp_parser *parser, bool translate,
+ bool wide_ok, bool udl_ok,
+ bool lookup_udlit)
{
tree value;
size_t count;
@@ -4449,6 +4447,12 @@ cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
if (cpp_userdef_string_p (tok->type))
{
+ if (!udl_ok)
+ {
+ error_at (loc, "string literal with user-defined suffix "
+ "is invalid in this context");
+ return error_mark_node;
+ }
string_tree = USERDEF_LITERAL_VALUE (tok->u.value);
curr_type = cpp_userdef_string_remove_type (tok->type);
curr_tok_is_userdef_p = true;
@@ -4539,6 +4543,12 @@ cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
tok = cp_lexer_peek_token (parser->lexer);
if (cpp_userdef_string_p (tok->type))
{
+ if (!udl_ok)
+ {
+ error_at (loc, "string literal with user-defined suffix "
+ "is invalid in this context");
+ return error_mark_node;
+ }
string_tree = USERDEF_LITERAL_VALUE (tok->u.value);
curr_type = cpp_userdef_string_remove_type (tok->type);
curr_tok_is_userdef_p = true;
@@ -4608,7 +4618,7 @@ cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
tree literal = build_userdef_literal (suffix_id, value,
OT_NONE, NULL_TREE);
if (lookup_udlit)
- value = cp_parser_userdef_string_literal (literal);
+ value = finish_userdef_string_literal (literal);
else
value = literal;
}
@@ -4626,6 +4636,39 @@ cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
return cp_expr (value, loc);
}
+/* Parse a sequence of adjacent string constants. Return a TREE_STRING
+ representing the combined, nul-terminated string constant. If
+ TRANSLATE is true, translate the string to the execution character set.
+ If WIDE_OK is true, a wide string is valid here.
+
+ This function issues an error if a user defined string literal is
+ encountered; use cp_parser_userdef_string_literal if UDLs are allowed. */
+
+static inline cp_expr
+cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok)
+{
+ return cp_parser_string_literal_common (parser, translate, wide_ok,
+ /*udl_ok=*/false,
+ /*lookup_udlit=*/false);
+}
+
+/* Parse a string literal or user defined string literal.
+
+ user-defined-string-literal :
+ string-literal ud-suffix
+
+ If TRANSLATE is true, translate the string to the execution character set.
+ If LOOKUP_UDLIT, perform a lookup for a suitable template function. */
+
+static inline cp_expr
+cp_parser_userdef_string_literal (cp_parser *parser, bool translate,
+ bool lookup_udlit = true)
+{
+ return cp_parser_string_literal_common (parser, translate,
+ /*wide_ok=*/true, /*udl_ok=*/true,
+ lookup_udlit);
+}
+
/* Look up a literal operator with the name and the exact arguments. */
static tree
@@ -4923,7 +4966,7 @@ cp_parser_userdef_numeric_literal (cp_parser *parser)
as arguments. */
static tree
-cp_parser_userdef_string_literal (tree literal)
+finish_userdef_string_literal (tree literal)
{
tree suffix_id = USERDEF_LITERAL_SUFFIX_ID (literal);
tree name = cp_literal_operator_id (IDENTIFIER_POINTER (suffix_id));
@@ -5663,9 +5706,8 @@ cp_parser_primary_expression (cp_parser *parser,
/* ??? Should wide strings be allowed when parser->translate_strings_p
is false (i.e. in attributes)? If not, we can kill the third
argument to cp_parser_string_literal. */
- return (cp_parser_string_literal (parser,
- parser->translate_strings_p,
- true)
+ return (cp_parser_userdef_string_literal (parser,
+ parser->translate_strings_p)
.maybe_add_location_wrapper ());
case CPP_OPEN_PAREN:
@@ -16219,15 +16261,14 @@ cp_parser_function_specifier_opt (cp_parser* parser,
static void
cp_parser_linkage_specification (cp_parser* parser, tree prefix_attr)
{
- tree linkage;
-
/* Look for the `extern' keyword. */
cp_token *extern_token
= cp_parser_require_keyword (parser, RID_EXTERN, RT_EXTERN);
/* Look for the string-literal. */
cp_token *string_token = cp_lexer_peek_token (parser->lexer);
- linkage = cp_parser_string_literal (parser, false, false);
+ tree linkage = cp_parser_string_literal (parser, /*translate=*/false,
+ /*wide_ok=*/false);
/* Transform the literal into an identifier. If the literal is a
wide-character string, or contains embedded NULs, then we can't
@@ -16357,9 +16398,8 @@ cp_parser_static_assert(cp_parser *parser, bool member_p)
cp_parser_require (parser, CPP_COMMA, RT_COMMA);
/* Parse the string-literal message. */
- message = cp_parser_string_literal (parser,
- /*translate=*/false,
- /*wide_ok=*/true);
+ message = cp_parser_string_literal (parser, /*translate=*/false,
+ /*wide_ok=*/true);
/* A `)' completes the static assertion. */
if (!parens.require_close (parser))
@@ -17407,7 +17447,6 @@ cp_parser_operator (cp_parser* parser, location_t start_loc)
case CPP_STRING16_USERDEF:
case CPP_STRING32_USERDEF:
{
- cp_expr str;
tree string_tree;
int sz, len;
@@ -17415,8 +17454,9 @@ cp_parser_operator (cp_parser* parser, location_t start_loc)
maybe_warn_cpp0x (CPP0X_USER_DEFINED_LITERALS);
/* Consume the string. */
- str = cp_parser_string_literal (parser, /*translate=*/true,
- /*wide_ok=*/true, /*lookup_udlit=*/false);
+ cp_expr str = cp_parser_userdef_string_literal (parser,
+ /*translate=*/true,
+ /*lookup_udlit=*/false);
if (str == error_mark_node)
return error_mark_node;
else if (TREE_CODE (str) == USERDEF_LITERAL)
@@ -22033,7 +22073,6 @@ cp_parser_using_directive (cp_parser* parser)
static void
cp_parser_asm_definition (cp_parser* parser)
{
- tree string;
tree outputs = NULL_TREE;
tree inputs = NULL_TREE;
tree clobbers = NULL_TREE;
@@ -22141,7 +22180,8 @@ cp_parser_asm_definition (cp_parser* parser)
if (!cp_parser_require (parser, CPP_OPEN_PAREN, RT_OPEN_PAREN))
return;
/* Look for the string. */
- string = cp_parser_string_literal (parser, false, false);
+ tree string = cp_parser_string_literal (parser, /*translate=*/false,
+ /*wide_ok=*/false);
if (string == error_mark_node)
{
cp_parser_skip_to_closing_parenthesis (parser, true, false,
@@ -28604,11 +28644,8 @@ cp_parser_yield_expression (cp_parser* parser)
static tree
cp_parser_asm_specification_opt (cp_parser* parser)
{
- cp_token *token;
- tree asm_specification;
-
/* Peek at the next token. */
- token = cp_lexer_peek_token (parser->lexer);
+ cp_token *token = cp_lexer_peek_token (parser->lexer);
/* If the next token isn't the `asm' keyword, then there's no
asm-specification. */
if (!cp_parser_is_keyword (token, RID_ASM))
@@ -28621,7 +28658,9 @@ cp_parser_asm_specification_opt (cp_parser* parser)
parens.require_open (parser);
/* Look for the string-literal. */
- asm_specification = cp_parser_string_literal (parser, false, false);
+ tree asm_specification = cp_parser_string_literal (parser,
+ /*translate=*/false,
+ /*wide_ok=*/false);
/* Look for the `)'. */
parens.require_close (parser);
@@ -28654,8 +28693,6 @@ cp_parser_asm_operand_list (cp_parser* parser)
while (true)
{
- tree string_literal;
- tree expression;
tree name;
if (cp_lexer_next_token_is (parser->lexer, CPP_OPEN_SQUARE))
@@ -28673,13 +28710,15 @@ cp_parser_asm_operand_list (cp_parser* parser)
else
name = NULL_TREE;
/* Look for the string-literal. */
- string_literal = cp_parser_string_literal (parser, false, false);
+ tree string_literal = cp_parser_string_literal (parser,
+ /*translate=*/false,
+ /*wide_ok=*/false);
/* Look for the `('. */
matching_parens parens;
parens.require_open (parser);
/* Parse the expression. */
- expression = cp_parser_expression (parser);
+ tree expression = cp_parser_expression (parser);
/* Look for the `)'. */
parens.require_close (parser);
@@ -28719,10 +28758,10 @@ cp_parser_asm_clobber_list (cp_parser* parser)
while (true)
{
- tree string_literal;
-
/* Look for the string literal. */
- string_literal = cp_parser_string_literal (parser, false, false);
+ tree string_literal = cp_parser_string_literal (parser,
+ /*translate=*/false,
+ /*wide_ok=*/false);
/* Add it to the list. */
clobbers = tree_cons (NULL_TREE, string_literal, clobbers);
/* If the next token is not a `,', then the list is
@@ -46294,7 +46333,9 @@ cp_parser_omp_context_selector (cp_parser *parser, tree set, bool has_parms_p)
cp_lexer_consume_token (parser->lexer);
}
else if (cp_lexer_next_token_is (parser->lexer, CPP_STRING))
- value = cp_parser_string_literal (parser, false, false);
+ value = cp_parser_string_literal (parser,
+ /*translate=*/false,
+ /*wide_ok=*/false);
else
{
cp_parser_error (parser, "expected identifier or "
@@ -49316,7 +49357,8 @@ pragma_lex (tree *value, location_t *loc)
if (ret == CPP_PRAGMA_EOL)
ret = CPP_EOF;
else if (ret == CPP_STRING)
- *value = cp_parser_string_literal (the_parser, false, false);
+ *value = cp_parser_string_literal (the_parser, /*translate=*/false,
+ /*wide_ok=*/false);
else
{
if (ret == CPP_KEYWORD)
diff --git a/gcc/testsuite/g++.dg/cpp0x/udlit-error1.C b/gcc/testsuite/g++.dg/cpp0x/udlit-error1.C
new file mode 100644
index 00000000000..66e300e350f
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/udlit-error1.C
@@ -0,0 +1,21 @@
+// PR c++/105300
+// { dg-do compile { target c++11 } }
+
+void operator""_x(const char *, decltype(sizeof(0)));
+
+#include ""_x // { dg-error "include expects" }
+#line ""_x // { dg-error "not a positive integer" }
+#if __has_include(""_x) // { dg-error "requires a header-name" }
+#endif
+
+#pragma message "hi"_x // { dg-warning "string literal with user-defined suffix is invalid in this context" }
+
+extern "C"_x { void g(); } // { dg-error "before user-defined string literal" }
+static_assert(true, "foo"_x); // { dg-error "string literal with user-defined suffix is invalid in this context|expected" }
+
+[[deprecated("oof"_x)]]
+void
+lol () // { dg-error "not a string" }
+{
+ asm (""_x); // { dg-error "string literal with user-defined suffix is invalid in this context" }
+}
base-commit: 36a4ee406b95ae24a59b8b3f8ebe29af6fd5261e
--
2.38.1
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v3] c++: Reject UDLs in certain contexts [PR105300]
2022-12-02 23:58 ` [PATCH v3] " Marek Polacek
@ 2022-12-03 19:58 ` Jason Merrill
2023-01-13 23:22 ` [PATCH v4] " Marek Polacek
0 siblings, 1 reply; 13+ messages in thread
From: Jason Merrill @ 2022-12-03 19:58 UTC (permalink / raw)
To: Marek Polacek; +Cc: GCC Patches
On 12/2/22 18:58, Marek Polacek wrote:
> On Fri, Nov 18, 2022 at 08:39:10PM -0500, Jason Merrill wrote:
>> On 11/18/22 18:52, Marek Polacek wrote:
>>> +/* Parse a string literal or user defined string literal.
>>> +
>>> + user-defined-string-literal :
>>> + string-literal ud-suffix
>>> +
>>> + Parameters as for cp_parser_string_literal. If LOOKUP_UDLIT, perform
>>> + a lookup for a suitable template function. */
>>> +
>>> +static inline cp_expr
>>> +cp_parser_userdef_string_literal (cp_parser *parser, bool translate,
>>> + bool wide_ok, bool lookup_udlit = true)
>>
>> I think this function doesn't need the translate and wide_ok parms, they can
>> always be true.
>
> I've dropped the wide_ok one, but not the other, because...
>
>>> +{
>>> + return cp_parser_string_literal_common (parser, translate, wide_ok,
>>> + /*udl_ok=*/true, lookup_udlit);
>>> +}
>>> +
>>> /* Look up a literal operator with the name and the exact arguments. */
>>> static tree
>>> @@ -4913,7 +4955,7 @@ cp_parser_userdef_numeric_literal (cp_parser *parser)
>>> as arguments. */
>>> static tree
>>> -cp_parser_userdef_string_literal (tree literal)
>>> +finish_userdef_string_literal (tree literal)
>>> {
>>> tree suffix_id = USERDEF_LITERAL_SUFFIX_ID (literal);
>>> tree name = cp_literal_operator_id (IDENTIFIER_POINTER (suffix_id));
>>> @@ -5652,10 +5694,10 @@ cp_parser_primary_expression (cp_parser *parser,
>>> case CPP_UTF8STRING_USERDEF:
>>> /* ??? Should wide strings be allowed when parser->translate_strings_p
>>> is false (i.e. in attributes)? If not, we can kill the third
>>> - argument to cp_parser_string_literal. */
>>
>> I think the answer to this old question is no: if we have an
>> encoding-prefix, we should be translating.
>
> ...I don't actually know how to resolve this. wide_ok is always true here.
> Should that change? Or rather, should translate be false for CPP_STRING only?
The one current exception to my assertion above is static_assert, for
which we currently allow encoding-prefixes but don't translate. I think
this is wrong, that we should translate the string. But I'm not
confident of that.
But to your question, yes: when translate is false, I think we also
don't want to allow UDLs. So _userdef can always pass true for
translate. And as below we should call it only when translate would be
true.
Incidentally, it seems that we set translate off for all attributes,
even ones that would take a normal expression argument where presumably
we do want translation (and UDLs). The whole business of different
parsing for different attributes is a headache. You don't need to deal
with this now.
>>> - return (cp_parser_string_literal (parser,
>>> - parser->translate_strings_p,
>>> - true)
>>> + argument to cp_parser_{,userdef}string_literal. */
>>> + return (cp_parser_userdef_string_literal (parser,
>>> + parser->translate_strings_p,
>>> + /*wide_ok=*/true)
>>
>> For CPP_*STRING* without _USERDEF, we should still call
>> cp_parser_string_literal.
>
> It looks like we always have to call cp_parser_userdef_string_literal
> otherwise this would be reejcted:
>
> std::string concat01 = "Hello, " "World!"_www;
>
> Because first we see a CPP_STRING but the subsequent UDL shouldn't
> be rejected.
Ah, I didn't notice the function was handling a sequence of
string-literals. So maybe we want to call _userdef here when
translate_strings_p, and not when it's false.
> So here's an updated version which drops the always-true parameter but
> doesn't resolve the old question.
>
> Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?
>
> -- >8 --
> In this PR, we are crashing because we've encountered a UDL where a
> string-literal is expected. This patch makes the parser reject string
> and character UDLs in all places where the grammar requires a
> string-literal and not a user-defined-string-literal.
>
> I've introduced two new wrappers; the existing cp_parser_string_literal
> was renamed to cp_parser_string_literal_common and should not be called
> directly. finish_userdef_string_literal is renamed from
> cp_parser_userdef_string_literal.
>
> PR c++/105300
>
> gcc/c-family/ChangeLog:
>
> * c-pragma.cc (handle_pragma_message): Warn for CPP_STRING_USERDEF.
>
> gcc/cp/ChangeLog:
>
> * parser.cc: Remove unnecessary forward declarations.
> (cp_parser_string_literal): New wrapper.
> (cp_parser_string_literal_common): Renamed from
> cp_parser_string_literal. Add a bool parameter. Give an error when
> UDLs are not permitted.
> (cp_parser_userdef_string_literal): New wrapper.
> (finish_userdef_string_literal): Renamed from
> cp_parser_userdef_string_literal.
> (cp_parser_primary_expression): Call cp_parser_userdef_string_literal
> instead of cp_parser_string_literal.
> (cp_parser_linkage_specification): Move a variable declaration closer
> to its first use.
> (cp_parser_static_assert): Likewise.
> (cp_parser_operator): Call cp_parser_userdef_string_literal instead of
> cp_parser_string_literal.
> (cp_parser_asm_definition): Move a variable declaration closer to its
> first use.
> (cp_parser_asm_specification_opt): Move variable declarations closer to
> their first use.
> (cp_parser_asm_operand_list): Likewise.
> (cp_parser_asm_clobber_list): Likewise.
>
> gcc/testsuite/ChangeLog:
>
> * g++.dg/cpp0x/udlit-error1.C: New test.
> ---
> gcc/c-family/c-pragma.cc | 3 +
> gcc/cp/parser.cc | 128 ++++++++++++++--------
> gcc/testsuite/g++.dg/cpp0x/udlit-error1.C | 21 ++++
> 3 files changed, 109 insertions(+), 43 deletions(-)
> create mode 100644 gcc/testsuite/g++.dg/cpp0x/udlit-error1.C
>
> diff --git a/gcc/c-family/c-pragma.cc b/gcc/c-family/c-pragma.cc
> index 142a46441ac..49f405b605b 100644
> --- a/gcc/c-family/c-pragma.cc
> +++ b/gcc/c-family/c-pragma.cc
> @@ -1390,6 +1390,9 @@ handle_pragma_message (cpp_reader *)
> }
> else if (token == CPP_STRING)
> message = x;
> + else if (token == CPP_STRING_USERDEF)
> + GCC_BAD ("string literal with user-defined suffix is invalid in this "
> + "context");
> else
> GCC_BAD ("expected a string after %<#pragma message%>");
>
> diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc
> index e8a50904243..de3eff90871 100644
> --- a/gcc/cp/parser.cc
> +++ b/gcc/cp/parser.cc
> @@ -2227,16 +2227,8 @@ pop_unparsed_function_queues (cp_parser *parser)
>
> /* Lexical conventions [gram.lex] */
>
> -static cp_expr cp_parser_identifier
> - (cp_parser *);
> -static cp_expr cp_parser_string_literal
> - (cp_parser *, bool, bool, bool);
> -static cp_expr cp_parser_userdef_char_literal
> - (cp_parser *);
> -static tree cp_parser_userdef_string_literal
> +static tree finish_userdef_string_literal
> (tree);
> -static cp_expr cp_parser_userdef_numeric_literal
> - (cp_parser *);
>
> /* Basic concepts [gram.basic] */
>
> @@ -4408,11 +4400,15 @@ cp_parser_identifier (cp_parser* parser)
> return error_mark_node;
> }
>
> -/* Parse a sequence of adjacent string constants. Returns a
> +/* Worker for cp_parser_string_literal and cp_parser_userdef_string_literal.
> + Do not call this directly; use either of the above.
> +
> + Parse a sequence of adjacent string constants. Return a
> TREE_STRING representing the combined, nul-terminated string
> constant. If TRANSLATE is true, translate the string to the
> execution character set. If WIDE_OK is true, a wide string is
> - invalid here.
> + valid here. If UDL_OK is true, a string literal with user-defined
> + suffix can be used in this context.
>
> C++98 [lex.string] says that if a narrow string literal token is
> adjacent to a wide string literal token, the behavior is undefined.
> @@ -4422,9 +4418,11 @@ cp_parser_identifier (cp_parser* parser)
> This code is largely lifted from lex_string() in c-lex.cc.
>
> FUTURE: ObjC++ will need to handle @-strings here. */
> +
> static cp_expr
> -cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
> - bool lookup_udlit = true)
> +cp_parser_string_literal_common (cp_parser *parser, bool translate,
> + bool wide_ok, bool udl_ok,
> + bool lookup_udlit)
> {
> tree value;
> size_t count;
> @@ -4449,6 +4447,12 @@ cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
>
> if (cpp_userdef_string_p (tok->type))
> {
> + if (!udl_ok)
> + {
> + error_at (loc, "string literal with user-defined suffix "
> + "is invalid in this context");
> + return error_mark_node;
> + }
> string_tree = USERDEF_LITERAL_VALUE (tok->u.value);
> curr_type = cpp_userdef_string_remove_type (tok->type);
> curr_tok_is_userdef_p = true;
> @@ -4539,6 +4543,12 @@ cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
> tok = cp_lexer_peek_token (parser->lexer);
> if (cpp_userdef_string_p (tok->type))
> {
> + if (!udl_ok)
> + {
> + error_at (loc, "string literal with user-defined suffix "
> + "is invalid in this context");
> + return error_mark_node;
> + }
> string_tree = USERDEF_LITERAL_VALUE (tok->u.value);
> curr_type = cpp_userdef_string_remove_type (tok->type);
> curr_tok_is_userdef_p = true;
> @@ -4608,7 +4618,7 @@ cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
> tree literal = build_userdef_literal (suffix_id, value,
> OT_NONE, NULL_TREE);
> if (lookup_udlit)
> - value = cp_parser_userdef_string_literal (literal);
> + value = finish_userdef_string_literal (literal);
> else
> value = literal;
> }
> @@ -4626,6 +4636,39 @@ cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
> return cp_expr (value, loc);
> }
>
> +/* Parse a sequence of adjacent string constants. Return a TREE_STRING
> + representing the combined, nul-terminated string constant. If
> + TRANSLATE is true, translate the string to the execution character set.
> + If WIDE_OK is true, a wide string is valid here.
> +
> + This function issues an error if a user defined string literal is
> + encountered; use cp_parser_userdef_string_literal if UDLs are allowed. */
> +
> +static inline cp_expr
> +cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok)
> +{
> + return cp_parser_string_literal_common (parser, translate, wide_ok,
> + /*udl_ok=*/false,
> + /*lookup_udlit=*/false);
> +}
> +
> +/* Parse a string literal or user defined string literal.
> +
> + user-defined-string-literal :
> + string-literal ud-suffix
> +
> + If TRANSLATE is true, translate the string to the execution character set.
> + If LOOKUP_UDLIT, perform a lookup for a suitable template function. */
> +
> +static inline cp_expr
> +cp_parser_userdef_string_literal (cp_parser *parser, bool translate,
> + bool lookup_udlit = true)
> +{
> + return cp_parser_string_literal_common (parser, translate,
> + /*wide_ok=*/true, /*udl_ok=*/true,
> + lookup_udlit);
> +}
> +
> /* Look up a literal operator with the name and the exact arguments. */
>
> static tree
> @@ -4923,7 +4966,7 @@ cp_parser_userdef_numeric_literal (cp_parser *parser)
> as arguments. */
>
> static tree
> -cp_parser_userdef_string_literal (tree literal)
> +finish_userdef_string_literal (tree literal)
> {
> tree suffix_id = USERDEF_LITERAL_SUFFIX_ID (literal);
> tree name = cp_literal_operator_id (IDENTIFIER_POINTER (suffix_id));
> @@ -5663,9 +5706,8 @@ cp_parser_primary_expression (cp_parser *parser,
> /* ??? Should wide strings be allowed when parser->translate_strings_p
> is false (i.e. in attributes)? If not, we can kill the third
> argument to cp_parser_string_literal. */
> - return (cp_parser_string_literal (parser,
> - parser->translate_strings_p,
> - true)
> + return (cp_parser_userdef_string_literal (parser,
> + parser->translate_strings_p)
> .maybe_add_location_wrapper ());
>
> case CPP_OPEN_PAREN:
> @@ -16219,15 +16261,14 @@ cp_parser_function_specifier_opt (cp_parser* parser,
> static void
> cp_parser_linkage_specification (cp_parser* parser, tree prefix_attr)
> {
> - tree linkage;
> -
> /* Look for the `extern' keyword. */
> cp_token *extern_token
> = cp_parser_require_keyword (parser, RID_EXTERN, RT_EXTERN);
>
> /* Look for the string-literal. */
> cp_token *string_token = cp_lexer_peek_token (parser->lexer);
> - linkage = cp_parser_string_literal (parser, false, false);
> + tree linkage = cp_parser_string_literal (parser, /*translate=*/false,
> + /*wide_ok=*/false);
>
> /* Transform the literal into an identifier. If the literal is a
> wide-character string, or contains embedded NULs, then we can't
> @@ -16357,9 +16398,8 @@ cp_parser_static_assert(cp_parser *parser, bool member_p)
> cp_parser_require (parser, CPP_COMMA, RT_COMMA);
>
> /* Parse the string-literal message. */
> - message = cp_parser_string_literal (parser,
> - /*translate=*/false,
> - /*wide_ok=*/true);
> + message = cp_parser_string_literal (parser, /*translate=*/false,
> + /*wide_ok=*/true);
>
> /* A `)' completes the static assertion. */
> if (!parens.require_close (parser))
> @@ -17407,7 +17447,6 @@ cp_parser_operator (cp_parser* parser, location_t start_loc)
> case CPP_STRING16_USERDEF:
> case CPP_STRING32_USERDEF:
> {
> - cp_expr str;
> tree string_tree;
> int sz, len;
>
> @@ -17415,8 +17454,9 @@ cp_parser_operator (cp_parser* parser, location_t start_loc)
> maybe_warn_cpp0x (CPP0X_USER_DEFINED_LITERALS);
>
> /* Consume the string. */
> - str = cp_parser_string_literal (parser, /*translate=*/true,
> - /*wide_ok=*/true, /*lookup_udlit=*/false);
> + cp_expr str = cp_parser_userdef_string_literal (parser,
> + /*translate=*/true,
> + /*lookup_udlit=*/false);
> if (str == error_mark_node)
> return error_mark_node;
> else if (TREE_CODE (str) == USERDEF_LITERAL)
> @@ -22033,7 +22073,6 @@ cp_parser_using_directive (cp_parser* parser)
> static void
> cp_parser_asm_definition (cp_parser* parser)
> {
> - tree string;
> tree outputs = NULL_TREE;
> tree inputs = NULL_TREE;
> tree clobbers = NULL_TREE;
> @@ -22141,7 +22180,8 @@ cp_parser_asm_definition (cp_parser* parser)
> if (!cp_parser_require (parser, CPP_OPEN_PAREN, RT_OPEN_PAREN))
> return;
> /* Look for the string. */
> - string = cp_parser_string_literal (parser, false, false);
> + tree string = cp_parser_string_literal (parser, /*translate=*/false,
> + /*wide_ok=*/false);
> if (string == error_mark_node)
> {
> cp_parser_skip_to_closing_parenthesis (parser, true, false,
> @@ -28604,11 +28644,8 @@ cp_parser_yield_expression (cp_parser* parser)
> static tree
> cp_parser_asm_specification_opt (cp_parser* parser)
> {
> - cp_token *token;
> - tree asm_specification;
> -
> /* Peek at the next token. */
> - token = cp_lexer_peek_token (parser->lexer);
> + cp_token *token = cp_lexer_peek_token (parser->lexer);
> /* If the next token isn't the `asm' keyword, then there's no
> asm-specification. */
> if (!cp_parser_is_keyword (token, RID_ASM))
> @@ -28621,7 +28658,9 @@ cp_parser_asm_specification_opt (cp_parser* parser)
> parens.require_open (parser);
>
> /* Look for the string-literal. */
> - asm_specification = cp_parser_string_literal (parser, false, false);
> + tree asm_specification = cp_parser_string_literal (parser,
> + /*translate=*/false,
> + /*wide_ok=*/false);
>
> /* Look for the `)'. */
> parens.require_close (parser);
> @@ -28654,8 +28693,6 @@ cp_parser_asm_operand_list (cp_parser* parser)
>
> while (true)
> {
> - tree string_literal;
> - tree expression;
> tree name;
>
> if (cp_lexer_next_token_is (parser->lexer, CPP_OPEN_SQUARE))
> @@ -28673,13 +28710,15 @@ cp_parser_asm_operand_list (cp_parser* parser)
> else
> name = NULL_TREE;
> /* Look for the string-literal. */
> - string_literal = cp_parser_string_literal (parser, false, false);
> + tree string_literal = cp_parser_string_literal (parser,
> + /*translate=*/false,
> + /*wide_ok=*/false);
>
> /* Look for the `('. */
> matching_parens parens;
> parens.require_open (parser);
> /* Parse the expression. */
> - expression = cp_parser_expression (parser);
> + tree expression = cp_parser_expression (parser);
> /* Look for the `)'. */
> parens.require_close (parser);
>
> @@ -28719,10 +28758,10 @@ cp_parser_asm_clobber_list (cp_parser* parser)
>
> while (true)
> {
> - tree string_literal;
> -
> /* Look for the string literal. */
> - string_literal = cp_parser_string_literal (parser, false, false);
> + tree string_literal = cp_parser_string_literal (parser,
> + /*translate=*/false,
> + /*wide_ok=*/false);
> /* Add it to the list. */
> clobbers = tree_cons (NULL_TREE, string_literal, clobbers);
> /* If the next token is not a `,', then the list is
> @@ -46294,7 +46333,9 @@ cp_parser_omp_context_selector (cp_parser *parser, tree set, bool has_parms_p)
> cp_lexer_consume_token (parser->lexer);
> }
> else if (cp_lexer_next_token_is (parser->lexer, CPP_STRING))
> - value = cp_parser_string_literal (parser, false, false);
> + value = cp_parser_string_literal (parser,
> + /*translate=*/false,
> + /*wide_ok=*/false);
> else
> {
> cp_parser_error (parser, "expected identifier or "
> @@ -49316,7 +49357,8 @@ pragma_lex (tree *value, location_t *loc)
> if (ret == CPP_PRAGMA_EOL)
> ret = CPP_EOF;
> else if (ret == CPP_STRING)
> - *value = cp_parser_string_literal (the_parser, false, false);
> + *value = cp_parser_string_literal (the_parser, /*translate=*/false,
> + /*wide_ok=*/false);
> else
> {
> if (ret == CPP_KEYWORD)
> diff --git a/gcc/testsuite/g++.dg/cpp0x/udlit-error1.C b/gcc/testsuite/g++.dg/cpp0x/udlit-error1.C
> new file mode 100644
> index 00000000000..66e300e350f
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/cpp0x/udlit-error1.C
> @@ -0,0 +1,21 @@
> +// PR c++/105300
> +// { dg-do compile { target c++11 } }
> +
> +void operator""_x(const char *, decltype(sizeof(0)));
> +
> +#include ""_x // { dg-error "include expects" }
> +#line ""_x // { dg-error "not a positive integer" }
> +#if __has_include(""_x) // { dg-error "requires a header-name" }
> +#endif
> +
> +#pragma message "hi"_x // { dg-warning "string literal with user-defined suffix is invalid in this context" }
> +
> +extern "C"_x { void g(); } // { dg-error "before user-defined string literal" }
> +static_assert(true, "foo"_x); // { dg-error "string literal with user-defined suffix is invalid in this context|expected" }
> +
> +[[deprecated("oof"_x)]]
> +void
> +lol () // { dg-error "not a string" }
> +{
> + asm (""_x); // { dg-error "string literal with user-defined suffix is invalid in this context" }
> +}
>
> base-commit: 36a4ee406b95ae24a59b8b3f8ebe29af6fd5261e
^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH v4] c++: Reject UDLs in certain contexts [PR105300]
2022-12-03 19:58 ` Jason Merrill
@ 2023-01-13 23:22 ` Marek Polacek
2023-01-24 22:49 ` Marek Polacek
2023-01-25 19:36 ` Jason Merrill
0 siblings, 2 replies; 13+ messages in thread
From: Marek Polacek @ 2023-01-13 23:22 UTC (permalink / raw)
To: Jason Merrill; +Cc: GCC Patches
On Sat, Dec 03, 2022 at 02:58:16PM -0500, Jason Merrill wrote:
> On 12/2/22 18:58, Marek Polacek wrote:
> > On Fri, Nov 18, 2022 at 08:39:10PM -0500, Jason Merrill wrote:
> > > On 11/18/22 18:52, Marek Polacek wrote:
> > > > +/* Parse a string literal or user defined string literal.
> > > > +
> > > > + user-defined-string-literal :
> > > > + string-literal ud-suffix
> > > > +
> > > > + Parameters as for cp_parser_string_literal. If LOOKUP_UDLIT, perform
> > > > + a lookup for a suitable template function. */
> > > > +
> > > > +static inline cp_expr
> > > > +cp_parser_userdef_string_literal (cp_parser *parser, bool translate,
> > > > + bool wide_ok, bool lookup_udlit = true)
> > >
> > > I think this function doesn't need the translate and wide_ok parms, they can
> > > always be true.
> >
> > I've dropped the wide_ok one, but not the other, because...
> > > > +{
> > > > + return cp_parser_string_literal_common (parser, translate, wide_ok,
> > > > + /*udl_ok=*/true, lookup_udlit);
> > > > +}
> > > > +
> > > > /* Look up a literal operator with the name and the exact arguments. */
> > > > static tree
> > > > @@ -4913,7 +4955,7 @@ cp_parser_userdef_numeric_literal (cp_parser *parser)
> > > > as arguments. */
> > > > static tree
> > > > -cp_parser_userdef_string_literal (tree literal)
> > > > +finish_userdef_string_literal (tree literal)
> > > > {
> > > > tree suffix_id = USERDEF_LITERAL_SUFFIX_ID (literal);
> > > > tree name = cp_literal_operator_id (IDENTIFIER_POINTER (suffix_id));
> > > > @@ -5652,10 +5694,10 @@ cp_parser_primary_expression (cp_parser *parser,
> > > > case CPP_UTF8STRING_USERDEF:
> > > > /* ??? Should wide strings be allowed when parser->translate_strings_p
> > > > is false (i.e. in attributes)? If not, we can kill the third
> > > > - argument to cp_parser_string_literal. */
> > >
> > > I think the answer to this old question is no: if we have an
> > > encoding-prefix, we should be translating.
> >
> > ...I don't actually know how to resolve this. wide_ok is always true here.
> > Should that change? Or rather, should translate be false for CPP_STRING only?
Sorry it's taken so long to get back to this.
> The one current exception to my assertion above is static_assert, for which
> we currently allow encoding-prefixes but don't translate. I think this is
> wrong, that we should translate the string. But I'm not confident of that.
>
> But to your question, yes: when translate is false, I think we also don't
> want to allow UDLs. So _userdef can always pass true for translate. And as
> below we should call it only when translate would be true.
Done: _userdef no longer has the translate paramater and it's only called
when parser->translate_strings_p.
> Incidentally, it seems that we set translate off for all attributes, even
> ones that would take a normal expression argument where presumably we do
> want translation (and UDLs). The whole business of different parsing for
> different attributes is a headache. You don't need to deal with this now.
>
> > > > - return (cp_parser_string_literal (parser,
> > > > - parser->translate_strings_p,
> > > > - true)
> > > > + argument to cp_parser_{,userdef}string_literal. */
> > > > + return (cp_parser_userdef_string_literal (parser,
> > > > + parser->translate_strings_p,
> > > > + /*wide_ok=*/true)
> > >
> > > For CPP_*STRING* without _USERDEF, we should still call
> > > cp_parser_string_literal.
> >
> > It looks like we always have to call cp_parser_userdef_string_literal
> > otherwise this would be reejcted:
> >
> > std::string concat01 = "Hello, " "World!"_www;
> >
> > Because first we see a CPP_STRING but the subsequent UDL shouldn't
> > be rejected.
>
> Ah, I didn't notice the function was handling a sequence of string-literals.
> So maybe we want to call _userdef here when translate_strings_p, and not
> when it's false.
Resolved by the change above. Thanks,
Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?
-- >8 --
In this PR, we are crashing because we've encountered a UDL where a
string-literal is expected. This patch makes the parser reject string
and character UDLs in all places where the grammar requires a
string-literal and not a user-defined-string-literal.
I've introduced two new wrappers; the existing cp_parser_string_literal
was renamed to cp_parser_string_literal_common and should not be called
directly. finish_userdef_string_literal is renamed from
cp_parser_userdef_string_literal.
PR c++/105300
gcc/c-family/ChangeLog:
* c-pragma.cc (handle_pragma_message): Warn for CPP_STRING_USERDEF.
gcc/cp/ChangeLog:
* parser.cc: Remove unnecessary forward declarations.
(cp_parser_string_literal): New wrapper.
(cp_parser_string_literal_common): Renamed from
cp_parser_string_literal. Add a bool parameter. Give an error when
UDLs are not permitted.
(cp_parser_userdef_string_literal): New wrapper.
(finish_userdef_string_literal): Renamed from
cp_parser_userdef_string_literal.
(cp_parser_primary_expression): Call cp_parser_userdef_string_literal
instead of cp_parser_string_literal.
(cp_parser_linkage_specification): Move a variable declaration closer
to its first use.
(cp_parser_static_assert): Likewise.
(cp_parser_operator): Call cp_parser_userdef_string_literal instead of
cp_parser_string_literal.
(cp_parser_asm_definition): Move a variable declaration closer to its
first use.
(cp_parser_asm_specification_opt): Move variable declarations closer to
their first use.
(cp_parser_asm_operand_list): Likewise.
(cp_parser_asm_clobber_list): Likewise.
gcc/testsuite/ChangeLog:
* g++.dg/cpp0x/udlit-error1.C: New test.
---
gcc/c-family/c-pragma.cc | 3 +
gcc/cp/parser.cc | 133 +++++++++++++++-------
gcc/testsuite/g++.dg/cpp0x/udlit-error1.C | 21 ++++
3 files changed, 113 insertions(+), 44 deletions(-)
create mode 100644 gcc/testsuite/g++.dg/cpp0x/udlit-error1.C
diff --git a/gcc/c-family/c-pragma.cc b/gcc/c-family/c-pragma.cc
index 91fabf0a513..bba9172e8a1 100644
--- a/gcc/c-family/c-pragma.cc
+++ b/gcc/c-family/c-pragma.cc
@@ -1390,6 +1390,9 @@ handle_pragma_message (cpp_reader *)
}
else if (token == CPP_STRING)
message = x;
+ else if (token == CPP_STRING_USERDEF)
+ GCC_BAD ("string literal with user-defined suffix is invalid in this "
+ "context");
else
GCC_BAD ("expected a string after %<#pragma message%>");
diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc
index 8b1658decba..4b366d6c64f 100644
--- a/gcc/cp/parser.cc
+++ b/gcc/cp/parser.cc
@@ -2227,16 +2227,8 @@ pop_unparsed_function_queues (cp_parser *parser)
/* Lexical conventions [gram.lex] */
-static cp_expr cp_parser_identifier
- (cp_parser *);
-static cp_expr cp_parser_string_literal
- (cp_parser *, bool, bool, bool);
-static cp_expr cp_parser_userdef_char_literal
- (cp_parser *);
-static tree cp_parser_userdef_string_literal
+static tree finish_userdef_string_literal
(tree);
-static cp_expr cp_parser_userdef_numeric_literal
- (cp_parser *);
/* Basic concepts [gram.basic] */
@@ -4408,11 +4400,15 @@ cp_parser_identifier (cp_parser* parser)
return error_mark_node;
}
-/* Parse a sequence of adjacent string constants. Returns a
+/* Worker for cp_parser_string_literal and cp_parser_userdef_string_literal.
+ Do not call this directly; use either of the above.
+
+ Parse a sequence of adjacent string constants. Return a
TREE_STRING representing the combined, nul-terminated string
constant. If TRANSLATE is true, translate the string to the
execution character set. If WIDE_OK is true, a wide string is
- invalid here.
+ valid here. If UDL_OK is true, a string literal with user-defined
+ suffix can be used in this context.
C++98 [lex.string] says that if a narrow string literal token is
adjacent to a wide string literal token, the behavior is undefined.
@@ -4422,9 +4418,11 @@ cp_parser_identifier (cp_parser* parser)
This code is largely lifted from lex_string() in c-lex.cc.
FUTURE: ObjC++ will need to handle @-strings here. */
+
static cp_expr
-cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
- bool lookup_udlit = true)
+cp_parser_string_literal_common (cp_parser *parser, bool translate,
+ bool wide_ok, bool udl_ok,
+ bool lookup_udlit)
{
tree value;
size_t count;
@@ -4449,6 +4447,12 @@ cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
if (cpp_userdef_string_p (tok->type))
{
+ if (!udl_ok)
+ {
+ error_at (loc, "string literal with user-defined suffix "
+ "is invalid in this context");
+ return error_mark_node;
+ }
string_tree = USERDEF_LITERAL_VALUE (tok->u.value);
curr_type = cpp_userdef_string_remove_type (tok->type);
curr_tok_is_userdef_p = true;
@@ -4539,6 +4543,12 @@ cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
tok = cp_lexer_peek_token (parser->lexer);
if (cpp_userdef_string_p (tok->type))
{
+ if (!udl_ok)
+ {
+ error_at (loc, "string literal with user-defined suffix "
+ "is invalid in this context");
+ return error_mark_node;
+ }
string_tree = USERDEF_LITERAL_VALUE (tok->u.value);
curr_type = cpp_userdef_string_remove_type (tok->type);
curr_tok_is_userdef_p = true;
@@ -4608,7 +4618,7 @@ cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
tree literal = build_userdef_literal (suffix_id, value,
OT_NONE, NULL_TREE);
if (lookup_udlit)
- value = cp_parser_userdef_string_literal (literal);
+ value = finish_userdef_string_literal (literal);
else
value = literal;
}
@@ -4626,6 +4636,37 @@ cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
return cp_expr (value, loc);
}
+/* Parse a sequence of adjacent string constants. Return a TREE_STRING
+ representing the combined, nul-terminated string constant. If
+ TRANSLATE is true, translate the string to the execution character set.
+ If WIDE_OK is true, a wide string is valid here.
+
+ This function issues an error if a user defined string literal is
+ encountered; use cp_parser_userdef_string_literal if UDLs are allowed. */
+
+static inline cp_expr
+cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok)
+{
+ return cp_parser_string_literal_common (parser, translate, wide_ok,
+ /*udl_ok=*/false,
+ /*lookup_udlit=*/false);
+}
+
+/* Parse a string literal or user defined string literal.
+
+ user-defined-string-literal :
+ string-literal ud-suffix
+
+ If LOOKUP_UDLIT, perform a lookup for a suitable template function. */
+
+static inline cp_expr
+cp_parser_userdef_string_literal (cp_parser *parser, bool lookup_udlit)
+{
+ return cp_parser_string_literal_common (parser, /*translate=*/true,
+ /*wide_ok=*/true, /*udl_ok=*/true,
+ lookup_udlit);
+}
+
/* Look up a literal operator with the name and the exact arguments. */
static tree
@@ -4923,7 +4964,7 @@ cp_parser_userdef_numeric_literal (cp_parser *parser)
as arguments. */
static tree
-cp_parser_userdef_string_literal (tree literal)
+finish_userdef_string_literal (tree literal)
{
tree suffix_id = USERDEF_LITERAL_SUFFIX_ID (literal);
tree name = cp_literal_operator_id (IDENTIFIER_POINTER (suffix_id));
@@ -5663,10 +5704,15 @@ cp_parser_primary_expression (cp_parser *parser,
/* ??? Should wide strings be allowed when parser->translate_strings_p
is false (i.e. in attributes)? If not, we can kill the third
argument to cp_parser_string_literal. */
- return (cp_parser_string_literal (parser,
- parser->translate_strings_p,
- true)
- .maybe_add_location_wrapper ());
+ if (parser->translate_strings_p)
+ return (cp_parser_userdef_string_literal (parser,
+ /*lookup_udlit=*/true)
+ .maybe_add_location_wrapper ());
+ else
+ return (cp_parser_string_literal (parser,
+ /*translate=*/false,
+ /*wide_ok=*/true)
+ .maybe_add_location_wrapper ());
case CPP_OPEN_PAREN:
/* If we see `( { ' then we are looking at the beginning of
@@ -16222,15 +16268,14 @@ cp_parser_function_specifier_opt (cp_parser* parser,
static void
cp_parser_linkage_specification (cp_parser* parser, tree prefix_attr)
{
- tree linkage;
-
/* Look for the `extern' keyword. */
cp_token *extern_token
= cp_parser_require_keyword (parser, RID_EXTERN, RT_EXTERN);
/* Look for the string-literal. */
cp_token *string_token = cp_lexer_peek_token (parser->lexer);
- linkage = cp_parser_string_literal (parser, false, false);
+ tree linkage = cp_parser_string_literal (parser, /*translate=*/false,
+ /*wide_ok=*/false);
/* Transform the literal into an identifier. If the literal is a
wide-character string, or contains embedded NULs, then we can't
@@ -16360,9 +16405,8 @@ cp_parser_static_assert(cp_parser *parser, bool member_p)
cp_parser_require (parser, CPP_COMMA, RT_COMMA);
/* Parse the string-literal message. */
- message = cp_parser_string_literal (parser,
- /*translate=*/false,
- /*wide_ok=*/true);
+ message = cp_parser_string_literal (parser, /*translate=*/false,
+ /*wide_ok=*/true);
/* A `)' completes the static assertion. */
if (!parens.require_close (parser))
@@ -17410,7 +17454,6 @@ cp_parser_operator (cp_parser* parser, location_t start_loc)
case CPP_STRING16_USERDEF:
case CPP_STRING32_USERDEF:
{
- cp_expr str;
tree string_tree;
int sz, len;
@@ -17418,8 +17461,8 @@ cp_parser_operator (cp_parser* parser, location_t start_loc)
maybe_warn_cpp0x (CPP0X_USER_DEFINED_LITERALS);
/* Consume the string. */
- str = cp_parser_string_literal (parser, /*translate=*/true,
- /*wide_ok=*/true, /*lookup_udlit=*/false);
+ cp_expr str = cp_parser_userdef_string_literal (parser,
+ /*lookup_udlit=*/false);
if (str == error_mark_node)
return error_mark_node;
else if (TREE_CODE (str) == USERDEF_LITERAL)
@@ -22072,7 +22115,6 @@ cp_parser_using_directive (cp_parser* parser)
static void
cp_parser_asm_definition (cp_parser* parser)
{
- tree string;
tree outputs = NULL_TREE;
tree inputs = NULL_TREE;
tree clobbers = NULL_TREE;
@@ -22180,7 +22222,8 @@ cp_parser_asm_definition (cp_parser* parser)
if (!cp_parser_require (parser, CPP_OPEN_PAREN, RT_OPEN_PAREN))
return;
/* Look for the string. */
- string = cp_parser_string_literal (parser, false, false);
+ tree string = cp_parser_string_literal (parser, /*translate=*/false,
+ /*wide_ok=*/false);
if (string == error_mark_node)
{
cp_parser_skip_to_closing_parenthesis (parser, true, false,
@@ -28655,11 +28698,8 @@ cp_parser_yield_expression (cp_parser* parser)
static tree
cp_parser_asm_specification_opt (cp_parser* parser)
{
- cp_token *token;
- tree asm_specification;
-
/* Peek at the next token. */
- token = cp_lexer_peek_token (parser->lexer);
+ cp_token *token = cp_lexer_peek_token (parser->lexer);
/* If the next token isn't the `asm' keyword, then there's no
asm-specification. */
if (!cp_parser_is_keyword (token, RID_ASM))
@@ -28672,7 +28712,9 @@ cp_parser_asm_specification_opt (cp_parser* parser)
parens.require_open (parser);
/* Look for the string-literal. */
- asm_specification = cp_parser_string_literal (parser, false, false);
+ tree asm_specification = cp_parser_string_literal (parser,
+ /*translate=*/false,
+ /*wide_ok=*/false);
/* Look for the `)'. */
parens.require_close (parser);
@@ -28705,8 +28747,6 @@ cp_parser_asm_operand_list (cp_parser* parser)
while (true)
{
- tree string_literal;
- tree expression;
tree name;
if (cp_lexer_next_token_is (parser->lexer, CPP_OPEN_SQUARE))
@@ -28724,13 +28764,15 @@ cp_parser_asm_operand_list (cp_parser* parser)
else
name = NULL_TREE;
/* Look for the string-literal. */
- string_literal = cp_parser_string_literal (parser, false, false);
+ tree string_literal = cp_parser_string_literal (parser,
+ /*translate=*/false,
+ /*wide_ok=*/false);
/* Look for the `('. */
matching_parens parens;
parens.require_open (parser);
/* Parse the expression. */
- expression = cp_parser_expression (parser);
+ tree expression = cp_parser_expression (parser);
/* Look for the `)'. */
parens.require_close (parser);
@@ -28770,10 +28812,10 @@ cp_parser_asm_clobber_list (cp_parser* parser)
while (true)
{
- tree string_literal;
-
/* Look for the string literal. */
- string_literal = cp_parser_string_literal (parser, false, false);
+ tree string_literal = cp_parser_string_literal (parser,
+ /*translate=*/false,
+ /*wide_ok=*/false);
/* Add it to the list. */
clobbers = tree_cons (NULL_TREE, string_literal, clobbers);
/* If the next token is not a `,', then the list is
@@ -46345,7 +46387,9 @@ cp_parser_omp_context_selector (cp_parser *parser, tree set, bool has_parms_p)
cp_lexer_consume_token (parser->lexer);
}
else if (cp_lexer_next_token_is (parser->lexer, CPP_STRING))
- value = cp_parser_string_literal (parser, false, false);
+ value = cp_parser_string_literal (parser,
+ /*translate=*/false,
+ /*wide_ok=*/false);
else
{
cp_parser_error (parser, "expected identifier or "
@@ -49367,7 +49411,8 @@ pragma_lex (tree *value, location_t *loc)
if (ret == CPP_PRAGMA_EOL)
ret = CPP_EOF;
else if (ret == CPP_STRING)
- *value = cp_parser_string_literal (the_parser, false, false);
+ *value = cp_parser_string_literal (the_parser, /*translate=*/false,
+ /*wide_ok=*/false);
else
{
if (ret == CPP_KEYWORD)
diff --git a/gcc/testsuite/g++.dg/cpp0x/udlit-error1.C b/gcc/testsuite/g++.dg/cpp0x/udlit-error1.C
new file mode 100644
index 00000000000..66e300e350f
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/udlit-error1.C
@@ -0,0 +1,21 @@
+// PR c++/105300
+// { dg-do compile { target c++11 } }
+
+void operator""_x(const char *, decltype(sizeof(0)));
+
+#include ""_x // { dg-error "include expects" }
+#line ""_x // { dg-error "not a positive integer" }
+#if __has_include(""_x) // { dg-error "requires a header-name" }
+#endif
+
+#pragma message "hi"_x // { dg-warning "string literal with user-defined suffix is invalid in this context" }
+
+extern "C"_x { void g(); } // { dg-error "before user-defined string literal" }
+static_assert(true, "foo"_x); // { dg-error "string literal with user-defined suffix is invalid in this context|expected" }
+
+[[deprecated("oof"_x)]]
+void
+lol () // { dg-error "not a string" }
+{
+ asm (""_x); // { dg-error "string literal with user-defined suffix is invalid in this context" }
+}
base-commit: 6071e495e5802a8949d2b02df6aa31a5f40f2af9
--
2.39.0
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v4] c++: Reject UDLs in certain contexts [PR105300]
2023-01-13 23:22 ` [PATCH v4] " Marek Polacek
@ 2023-01-24 22:49 ` Marek Polacek
2023-01-25 19:36 ` Jason Merrill
1 sibling, 0 replies; 13+ messages in thread
From: Marek Polacek @ 2023-01-24 22:49 UTC (permalink / raw)
To: Jason Merrill; +Cc: GCC Patches
Ping.
On Fri, Jan 13, 2023 at 06:22:38PM -0500, Marek Polacek wrote:
> On Sat, Dec 03, 2022 at 02:58:16PM -0500, Jason Merrill wrote:
> > On 12/2/22 18:58, Marek Polacek wrote:
> > > On Fri, Nov 18, 2022 at 08:39:10PM -0500, Jason Merrill wrote:
> > > > On 11/18/22 18:52, Marek Polacek wrote:
> > > > > +/* Parse a string literal or user defined string literal.
> > > > > +
> > > > > + user-defined-string-literal :
> > > > > + string-literal ud-suffix
> > > > > +
> > > > > + Parameters as for cp_parser_string_literal. If LOOKUP_UDLIT, perform
> > > > > + a lookup for a suitable template function. */
> > > > > +
> > > > > +static inline cp_expr
> > > > > +cp_parser_userdef_string_literal (cp_parser *parser, bool translate,
> > > > > + bool wide_ok, bool lookup_udlit = true)
> > > >
> > > > I think this function doesn't need the translate and wide_ok parms, they can
> > > > always be true.
> > >
> > > I've dropped the wide_ok one, but not the other, because...
> > > > > +{
> > > > > + return cp_parser_string_literal_common (parser, translate, wide_ok,
> > > > > + /*udl_ok=*/true, lookup_udlit);
> > > > > +}
> > > > > +
> > > > > /* Look up a literal operator with the name and the exact arguments. */
> > > > > static tree
> > > > > @@ -4913,7 +4955,7 @@ cp_parser_userdef_numeric_literal (cp_parser *parser)
> > > > > as arguments. */
> > > > > static tree
> > > > > -cp_parser_userdef_string_literal (tree literal)
> > > > > +finish_userdef_string_literal (tree literal)
> > > > > {
> > > > > tree suffix_id = USERDEF_LITERAL_SUFFIX_ID (literal);
> > > > > tree name = cp_literal_operator_id (IDENTIFIER_POINTER (suffix_id));
> > > > > @@ -5652,10 +5694,10 @@ cp_parser_primary_expression (cp_parser *parser,
> > > > > case CPP_UTF8STRING_USERDEF:
> > > > > /* ??? Should wide strings be allowed when parser->translate_strings_p
> > > > > is false (i.e. in attributes)? If not, we can kill the third
> > > > > - argument to cp_parser_string_literal. */
> > > >
> > > > I think the answer to this old question is no: if we have an
> > > > encoding-prefix, we should be translating.
> > >
> > > ...I don't actually know how to resolve this. wide_ok is always true here.
> > > Should that change? Or rather, should translate be false for CPP_STRING only?
>
> Sorry it's taken so long to get back to this.
>
> > The one current exception to my assertion above is static_assert, for which
> > we currently allow encoding-prefixes but don't translate. I think this is
> > wrong, that we should translate the string. But I'm not confident of that.
> >
> > But to your question, yes: when translate is false, I think we also don't
> > want to allow UDLs. So _userdef can always pass true for translate. And as
> > below we should call it only when translate would be true.
>
> Done: _userdef no longer has the translate paramater and it's only called
> when parser->translate_strings_p.
>
> > Incidentally, it seems that we set translate off for all attributes, even
> > ones that would take a normal expression argument where presumably we do
> > want translation (and UDLs). The whole business of different parsing for
> > different attributes is a headache. You don't need to deal with this now.
> >
> > > > > - return (cp_parser_string_literal (parser,
> > > > > - parser->translate_strings_p,
> > > > > - true)
> > > > > + argument to cp_parser_{,userdef}string_literal. */
> > > > > + return (cp_parser_userdef_string_literal (parser,
> > > > > + parser->translate_strings_p,
> > > > > + /*wide_ok=*/true)
> > > >
> > > > For CPP_*STRING* without _USERDEF, we should still call
> > > > cp_parser_string_literal.
> > >
> > > It looks like we always have to call cp_parser_userdef_string_literal
> > > otherwise this would be reejcted:
> > >
> > > std::string concat01 = "Hello, " "World!"_www;
> > >
> > > Because first we see a CPP_STRING but the subsequent UDL shouldn't
> > > be rejected.
> >
> > Ah, I didn't notice the function was handling a sequence of string-literals.
> > So maybe we want to call _userdef here when translate_strings_p, and not
> > when it's false.
>
> Resolved by the change above. Thanks,
>
> Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?
>
> -- >8 --
> In this PR, we are crashing because we've encountered a UDL where a
> string-literal is expected. This patch makes the parser reject string
> and character UDLs in all places where the grammar requires a
> string-literal and not a user-defined-string-literal.
>
> I've introduced two new wrappers; the existing cp_parser_string_literal
> was renamed to cp_parser_string_literal_common and should not be called
> directly. finish_userdef_string_literal is renamed from
> cp_parser_userdef_string_literal.
>
> PR c++/105300
>
> gcc/c-family/ChangeLog:
>
> * c-pragma.cc (handle_pragma_message): Warn for CPP_STRING_USERDEF.
>
> gcc/cp/ChangeLog:
>
> * parser.cc: Remove unnecessary forward declarations.
> (cp_parser_string_literal): New wrapper.
> (cp_parser_string_literal_common): Renamed from
> cp_parser_string_literal. Add a bool parameter. Give an error when
> UDLs are not permitted.
> (cp_parser_userdef_string_literal): New wrapper.
> (finish_userdef_string_literal): Renamed from
> cp_parser_userdef_string_literal.
> (cp_parser_primary_expression): Call cp_parser_userdef_string_literal
> instead of cp_parser_string_literal.
> (cp_parser_linkage_specification): Move a variable declaration closer
> to its first use.
> (cp_parser_static_assert): Likewise.
> (cp_parser_operator): Call cp_parser_userdef_string_literal instead of
> cp_parser_string_literal.
> (cp_parser_asm_definition): Move a variable declaration closer to its
> first use.
> (cp_parser_asm_specification_opt): Move variable declarations closer to
> their first use.
> (cp_parser_asm_operand_list): Likewise.
> (cp_parser_asm_clobber_list): Likewise.
>
> gcc/testsuite/ChangeLog:
>
> * g++.dg/cpp0x/udlit-error1.C: New test.
> ---
> gcc/c-family/c-pragma.cc | 3 +
> gcc/cp/parser.cc | 133 +++++++++++++++-------
> gcc/testsuite/g++.dg/cpp0x/udlit-error1.C | 21 ++++
> 3 files changed, 113 insertions(+), 44 deletions(-)
> create mode 100644 gcc/testsuite/g++.dg/cpp0x/udlit-error1.C
>
> diff --git a/gcc/c-family/c-pragma.cc b/gcc/c-family/c-pragma.cc
> index 91fabf0a513..bba9172e8a1 100644
> --- a/gcc/c-family/c-pragma.cc
> +++ b/gcc/c-family/c-pragma.cc
> @@ -1390,6 +1390,9 @@ handle_pragma_message (cpp_reader *)
> }
> else if (token == CPP_STRING)
> message = x;
> + else if (token == CPP_STRING_USERDEF)
> + GCC_BAD ("string literal with user-defined suffix is invalid in this "
> + "context");
> else
> GCC_BAD ("expected a string after %<#pragma message%>");
>
> diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc
> index 8b1658decba..4b366d6c64f 100644
> --- a/gcc/cp/parser.cc
> +++ b/gcc/cp/parser.cc
> @@ -2227,16 +2227,8 @@ pop_unparsed_function_queues (cp_parser *parser)
>
> /* Lexical conventions [gram.lex] */
>
> -static cp_expr cp_parser_identifier
> - (cp_parser *);
> -static cp_expr cp_parser_string_literal
> - (cp_parser *, bool, bool, bool);
> -static cp_expr cp_parser_userdef_char_literal
> - (cp_parser *);
> -static tree cp_parser_userdef_string_literal
> +static tree finish_userdef_string_literal
> (tree);
> -static cp_expr cp_parser_userdef_numeric_literal
> - (cp_parser *);
>
> /* Basic concepts [gram.basic] */
>
> @@ -4408,11 +4400,15 @@ cp_parser_identifier (cp_parser* parser)
> return error_mark_node;
> }
>
> -/* Parse a sequence of adjacent string constants. Returns a
> +/* Worker for cp_parser_string_literal and cp_parser_userdef_string_literal.
> + Do not call this directly; use either of the above.
> +
> + Parse a sequence of adjacent string constants. Return a
> TREE_STRING representing the combined, nul-terminated string
> constant. If TRANSLATE is true, translate the string to the
> execution character set. If WIDE_OK is true, a wide string is
> - invalid here.
> + valid here. If UDL_OK is true, a string literal with user-defined
> + suffix can be used in this context.
>
> C++98 [lex.string] says that if a narrow string literal token is
> adjacent to a wide string literal token, the behavior is undefined.
> @@ -4422,9 +4418,11 @@ cp_parser_identifier (cp_parser* parser)
> This code is largely lifted from lex_string() in c-lex.cc.
>
> FUTURE: ObjC++ will need to handle @-strings here. */
> +
> static cp_expr
> -cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
> - bool lookup_udlit = true)
> +cp_parser_string_literal_common (cp_parser *parser, bool translate,
> + bool wide_ok, bool udl_ok,
> + bool lookup_udlit)
> {
> tree value;
> size_t count;
> @@ -4449,6 +4447,12 @@ cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
>
> if (cpp_userdef_string_p (tok->type))
> {
> + if (!udl_ok)
> + {
> + error_at (loc, "string literal with user-defined suffix "
> + "is invalid in this context");
> + return error_mark_node;
> + }
> string_tree = USERDEF_LITERAL_VALUE (tok->u.value);
> curr_type = cpp_userdef_string_remove_type (tok->type);
> curr_tok_is_userdef_p = true;
> @@ -4539,6 +4543,12 @@ cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
> tok = cp_lexer_peek_token (parser->lexer);
> if (cpp_userdef_string_p (tok->type))
> {
> + if (!udl_ok)
> + {
> + error_at (loc, "string literal with user-defined suffix "
> + "is invalid in this context");
> + return error_mark_node;
> + }
> string_tree = USERDEF_LITERAL_VALUE (tok->u.value);
> curr_type = cpp_userdef_string_remove_type (tok->type);
> curr_tok_is_userdef_p = true;
> @@ -4608,7 +4618,7 @@ cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
> tree literal = build_userdef_literal (suffix_id, value,
> OT_NONE, NULL_TREE);
> if (lookup_udlit)
> - value = cp_parser_userdef_string_literal (literal);
> + value = finish_userdef_string_literal (literal);
> else
> value = literal;
> }
> @@ -4626,6 +4636,37 @@ cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
> return cp_expr (value, loc);
> }
>
> +/* Parse a sequence of adjacent string constants. Return a TREE_STRING
> + representing the combined, nul-terminated string constant. If
> + TRANSLATE is true, translate the string to the execution character set.
> + If WIDE_OK is true, a wide string is valid here.
> +
> + This function issues an error if a user defined string literal is
> + encountered; use cp_parser_userdef_string_literal if UDLs are allowed. */
> +
> +static inline cp_expr
> +cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok)
> +{
> + return cp_parser_string_literal_common (parser, translate, wide_ok,
> + /*udl_ok=*/false,
> + /*lookup_udlit=*/false);
> +}
> +
> +/* Parse a string literal or user defined string literal.
> +
> + user-defined-string-literal :
> + string-literal ud-suffix
> +
> + If LOOKUP_UDLIT, perform a lookup for a suitable template function. */
> +
> +static inline cp_expr
> +cp_parser_userdef_string_literal (cp_parser *parser, bool lookup_udlit)
> +{
> + return cp_parser_string_literal_common (parser, /*translate=*/true,
> + /*wide_ok=*/true, /*udl_ok=*/true,
> + lookup_udlit);
> +}
> +
> /* Look up a literal operator with the name and the exact arguments. */
>
> static tree
> @@ -4923,7 +4964,7 @@ cp_parser_userdef_numeric_literal (cp_parser *parser)
> as arguments. */
>
> static tree
> -cp_parser_userdef_string_literal (tree literal)
> +finish_userdef_string_literal (tree literal)
> {
> tree suffix_id = USERDEF_LITERAL_SUFFIX_ID (literal);
> tree name = cp_literal_operator_id (IDENTIFIER_POINTER (suffix_id));
> @@ -5663,10 +5704,15 @@ cp_parser_primary_expression (cp_parser *parser,
> /* ??? Should wide strings be allowed when parser->translate_strings_p
> is false (i.e. in attributes)? If not, we can kill the third
> argument to cp_parser_string_literal. */
> - return (cp_parser_string_literal (parser,
> - parser->translate_strings_p,
> - true)
> - .maybe_add_location_wrapper ());
> + if (parser->translate_strings_p)
> + return (cp_parser_userdef_string_literal (parser,
> + /*lookup_udlit=*/true)
> + .maybe_add_location_wrapper ());
> + else
> + return (cp_parser_string_literal (parser,
> + /*translate=*/false,
> + /*wide_ok=*/true)
> + .maybe_add_location_wrapper ());
>
> case CPP_OPEN_PAREN:
> /* If we see `( { ' then we are looking at the beginning of
> @@ -16222,15 +16268,14 @@ cp_parser_function_specifier_opt (cp_parser* parser,
> static void
> cp_parser_linkage_specification (cp_parser* parser, tree prefix_attr)
> {
> - tree linkage;
> -
> /* Look for the `extern' keyword. */
> cp_token *extern_token
> = cp_parser_require_keyword (parser, RID_EXTERN, RT_EXTERN);
>
> /* Look for the string-literal. */
> cp_token *string_token = cp_lexer_peek_token (parser->lexer);
> - linkage = cp_parser_string_literal (parser, false, false);
> + tree linkage = cp_parser_string_literal (parser, /*translate=*/false,
> + /*wide_ok=*/false);
>
> /* Transform the literal into an identifier. If the literal is a
> wide-character string, or contains embedded NULs, then we can't
> @@ -16360,9 +16405,8 @@ cp_parser_static_assert(cp_parser *parser, bool member_p)
> cp_parser_require (parser, CPP_COMMA, RT_COMMA);
>
> /* Parse the string-literal message. */
> - message = cp_parser_string_literal (parser,
> - /*translate=*/false,
> - /*wide_ok=*/true);
> + message = cp_parser_string_literal (parser, /*translate=*/false,
> + /*wide_ok=*/true);
>
> /* A `)' completes the static assertion. */
> if (!parens.require_close (parser))
> @@ -17410,7 +17454,6 @@ cp_parser_operator (cp_parser* parser, location_t start_loc)
> case CPP_STRING16_USERDEF:
> case CPP_STRING32_USERDEF:
> {
> - cp_expr str;
> tree string_tree;
> int sz, len;
>
> @@ -17418,8 +17461,8 @@ cp_parser_operator (cp_parser* parser, location_t start_loc)
> maybe_warn_cpp0x (CPP0X_USER_DEFINED_LITERALS);
>
> /* Consume the string. */
> - str = cp_parser_string_literal (parser, /*translate=*/true,
> - /*wide_ok=*/true, /*lookup_udlit=*/false);
> + cp_expr str = cp_parser_userdef_string_literal (parser,
> + /*lookup_udlit=*/false);
> if (str == error_mark_node)
> return error_mark_node;
> else if (TREE_CODE (str) == USERDEF_LITERAL)
> @@ -22072,7 +22115,6 @@ cp_parser_using_directive (cp_parser* parser)
> static void
> cp_parser_asm_definition (cp_parser* parser)
> {
> - tree string;
> tree outputs = NULL_TREE;
> tree inputs = NULL_TREE;
> tree clobbers = NULL_TREE;
> @@ -22180,7 +22222,8 @@ cp_parser_asm_definition (cp_parser* parser)
> if (!cp_parser_require (parser, CPP_OPEN_PAREN, RT_OPEN_PAREN))
> return;
> /* Look for the string. */
> - string = cp_parser_string_literal (parser, false, false);
> + tree string = cp_parser_string_literal (parser, /*translate=*/false,
> + /*wide_ok=*/false);
> if (string == error_mark_node)
> {
> cp_parser_skip_to_closing_parenthesis (parser, true, false,
> @@ -28655,11 +28698,8 @@ cp_parser_yield_expression (cp_parser* parser)
> static tree
> cp_parser_asm_specification_opt (cp_parser* parser)
> {
> - cp_token *token;
> - tree asm_specification;
> -
> /* Peek at the next token. */
> - token = cp_lexer_peek_token (parser->lexer);
> + cp_token *token = cp_lexer_peek_token (parser->lexer);
> /* If the next token isn't the `asm' keyword, then there's no
> asm-specification. */
> if (!cp_parser_is_keyword (token, RID_ASM))
> @@ -28672,7 +28712,9 @@ cp_parser_asm_specification_opt (cp_parser* parser)
> parens.require_open (parser);
>
> /* Look for the string-literal. */
> - asm_specification = cp_parser_string_literal (parser, false, false);
> + tree asm_specification = cp_parser_string_literal (parser,
> + /*translate=*/false,
> + /*wide_ok=*/false);
>
> /* Look for the `)'. */
> parens.require_close (parser);
> @@ -28705,8 +28747,6 @@ cp_parser_asm_operand_list (cp_parser* parser)
>
> while (true)
> {
> - tree string_literal;
> - tree expression;
> tree name;
>
> if (cp_lexer_next_token_is (parser->lexer, CPP_OPEN_SQUARE))
> @@ -28724,13 +28764,15 @@ cp_parser_asm_operand_list (cp_parser* parser)
> else
> name = NULL_TREE;
> /* Look for the string-literal. */
> - string_literal = cp_parser_string_literal (parser, false, false);
> + tree string_literal = cp_parser_string_literal (parser,
> + /*translate=*/false,
> + /*wide_ok=*/false);
>
> /* Look for the `('. */
> matching_parens parens;
> parens.require_open (parser);
> /* Parse the expression. */
> - expression = cp_parser_expression (parser);
> + tree expression = cp_parser_expression (parser);
> /* Look for the `)'. */
> parens.require_close (parser);
>
> @@ -28770,10 +28812,10 @@ cp_parser_asm_clobber_list (cp_parser* parser)
>
> while (true)
> {
> - tree string_literal;
> -
> /* Look for the string literal. */
> - string_literal = cp_parser_string_literal (parser, false, false);
> + tree string_literal = cp_parser_string_literal (parser,
> + /*translate=*/false,
> + /*wide_ok=*/false);
> /* Add it to the list. */
> clobbers = tree_cons (NULL_TREE, string_literal, clobbers);
> /* If the next token is not a `,', then the list is
> @@ -46345,7 +46387,9 @@ cp_parser_omp_context_selector (cp_parser *parser, tree set, bool has_parms_p)
> cp_lexer_consume_token (parser->lexer);
> }
> else if (cp_lexer_next_token_is (parser->lexer, CPP_STRING))
> - value = cp_parser_string_literal (parser, false, false);
> + value = cp_parser_string_literal (parser,
> + /*translate=*/false,
> + /*wide_ok=*/false);
> else
> {
> cp_parser_error (parser, "expected identifier or "
> @@ -49367,7 +49411,8 @@ pragma_lex (tree *value, location_t *loc)
> if (ret == CPP_PRAGMA_EOL)
> ret = CPP_EOF;
> else if (ret == CPP_STRING)
> - *value = cp_parser_string_literal (the_parser, false, false);
> + *value = cp_parser_string_literal (the_parser, /*translate=*/false,
> + /*wide_ok=*/false);
> else
> {
> if (ret == CPP_KEYWORD)
> diff --git a/gcc/testsuite/g++.dg/cpp0x/udlit-error1.C b/gcc/testsuite/g++.dg/cpp0x/udlit-error1.C
> new file mode 100644
> index 00000000000..66e300e350f
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/cpp0x/udlit-error1.C
> @@ -0,0 +1,21 @@
> +// PR c++/105300
> +// { dg-do compile { target c++11 } }
> +
> +void operator""_x(const char *, decltype(sizeof(0)));
> +
> +#include ""_x // { dg-error "include expects" }
> +#line ""_x // { dg-error "not a positive integer" }
> +#if __has_include(""_x) // { dg-error "requires a header-name" }
> +#endif
> +
> +#pragma message "hi"_x // { dg-warning "string literal with user-defined suffix is invalid in this context" }
> +
> +extern "C"_x { void g(); } // { dg-error "before user-defined string literal" }
> +static_assert(true, "foo"_x); // { dg-error "string literal with user-defined suffix is invalid in this context|expected" }
> +
> +[[deprecated("oof"_x)]]
> +void
> +lol () // { dg-error "not a string" }
> +{
> + asm (""_x); // { dg-error "string literal with user-defined suffix is invalid in this context" }
> +}
>
> base-commit: 6071e495e5802a8949d2b02df6aa31a5f40f2af9
> --
> 2.39.0
>
Marek
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v4] c++: Reject UDLs in certain contexts [PR105300]
2023-01-13 23:22 ` [PATCH v4] " Marek Polacek
2023-01-24 22:49 ` Marek Polacek
@ 2023-01-25 19:36 ` Jason Merrill
1 sibling, 0 replies; 13+ messages in thread
From: Jason Merrill @ 2023-01-25 19:36 UTC (permalink / raw)
To: Marek Polacek; +Cc: GCC Patches
On 1/13/23 18:22, Marek Polacek wrote:
> On Sat, Dec 03, 2022 at 02:58:16PM -0500, Jason Merrill wrote:
>> On 12/2/22 18:58, Marek Polacek wrote:
>>> On Fri, Nov 18, 2022 at 08:39:10PM -0500, Jason Merrill wrote:
>>>> On 11/18/22 18:52, Marek Polacek wrote:
>>>>> +/* Parse a string literal or user defined string literal.
>>>>> +
>>>>> + user-defined-string-literal :
>>>>> + string-literal ud-suffix
>>>>> +
>>>>> + Parameters as for cp_parser_string_literal. If LOOKUP_UDLIT, perform
>>>>> + a lookup for a suitable template function. */
>>>>> +
>>>>> +static inline cp_expr
>>>>> +cp_parser_userdef_string_literal (cp_parser *parser, bool translate,
>>>>> + bool wide_ok, bool lookup_udlit = true)
>>>>
>>>> I think this function doesn't need the translate and wide_ok parms, they can
>>>> always be true.
>>>
>>> I've dropped the wide_ok one, but not the other, because...
>>>>> +{
>>>>> + return cp_parser_string_literal_common (parser, translate, wide_ok,
>>>>> + /*udl_ok=*/true, lookup_udlit);
>>>>> +}
>>>>> +
>>>>> /* Look up a literal operator with the name and the exact arguments. */
>>>>> static tree
>>>>> @@ -4913,7 +4955,7 @@ cp_parser_userdef_numeric_literal (cp_parser *parser)
>>>>> as arguments. */
>>>>> static tree
>>>>> -cp_parser_userdef_string_literal (tree literal)
>>>>> +finish_userdef_string_literal (tree literal)
>>>>> {
>>>>> tree suffix_id = USERDEF_LITERAL_SUFFIX_ID (literal);
>>>>> tree name = cp_literal_operator_id (IDENTIFIER_POINTER (suffix_id));
>>>>> @@ -5652,10 +5694,10 @@ cp_parser_primary_expression (cp_parser *parser,
>>>>> case CPP_UTF8STRING_USERDEF:
>>>>> /* ??? Should wide strings be allowed when parser->translate_strings_p
>>>>> is false (i.e. in attributes)? If not, we can kill the third
>>>>> - argument to cp_parser_string_literal. */
>>>>
>>>> I think the answer to this old question is no: if we have an
>>>> encoding-prefix, we should be translating.
>>>
>>> ...I don't actually know how to resolve this. wide_ok is always true here.
>>> Should that change? Or rather, should translate be false for CPP_STRING only?
>
> Sorry it's taken so long to get back to this.
>
>> The one current exception to my assertion above is static_assert, for which
>> we currently allow encoding-prefixes but don't translate. I think this is
>> wrong, that we should translate the string. But I'm not confident of that.
>>
>> But to your question, yes: when translate is false, I think we also don't
>> want to allow UDLs. So _userdef can always pass true for translate. And as
>> below we should call it only when translate would be true.
>
> Done: _userdef no longer has the translate paramater and it's only called
> when parser->translate_strings_p.
>
>> Incidentally, it seems that we set translate off for all attributes, even
>> ones that would take a normal expression argument where presumably we do
>> want translation (and UDLs). The whole business of different parsing for
>> different attributes is a headache. You don't need to deal with this now.
>>
>>>>> - return (cp_parser_string_literal (parser,
>>>>> - parser->translate_strings_p,
>>>>> - true)
>>>>> + argument to cp_parser_{,userdef}string_literal. */
>>>>> + return (cp_parser_userdef_string_literal (parser,
>>>>> + parser->translate_strings_p,
>>>>> + /*wide_ok=*/true)
>>>>
>>>> For CPP_*STRING* without _USERDEF, we should still call
>>>> cp_parser_string_literal.
>>>
>>> It looks like we always have to call cp_parser_userdef_string_literal
>>> otherwise this would be reejcted:
>>>
>>> std::string concat01 = "Hello, " "World!"_www;
>>>
>>> Because first we see a CPP_STRING but the subsequent UDL shouldn't
>>> be rejected.
>>
>> Ah, I didn't notice the function was handling a sequence of string-literals.
>> So maybe we want to call _userdef here when translate_strings_p, and not
>> when it's false.
>
> Resolved by the change above. Thanks,
>
> Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?
OK, thanks.
> -- >8 --
> In this PR, we are crashing because we've encountered a UDL where a
> string-literal is expected. This patch makes the parser reject string
> and character UDLs in all places where the grammar requires a
> string-literal and not a user-defined-string-literal.
>
> I've introduced two new wrappers; the existing cp_parser_string_literal
> was renamed to cp_parser_string_literal_common and should not be called
> directly. finish_userdef_string_literal is renamed from
> cp_parser_userdef_string_literal.
>
> PR c++/105300
>
> gcc/c-family/ChangeLog:
>
> * c-pragma.cc (handle_pragma_message): Warn for CPP_STRING_USERDEF.
>
> gcc/cp/ChangeLog:
>
> * parser.cc: Remove unnecessary forward declarations.
> (cp_parser_string_literal): New wrapper.
> (cp_parser_string_literal_common): Renamed from
> cp_parser_string_literal. Add a bool parameter. Give an error when
> UDLs are not permitted.
> (cp_parser_userdef_string_literal): New wrapper.
> (finish_userdef_string_literal): Renamed from
> cp_parser_userdef_string_literal.
> (cp_parser_primary_expression): Call cp_parser_userdef_string_literal
> instead of cp_parser_string_literal.
> (cp_parser_linkage_specification): Move a variable declaration closer
> to its first use.
> (cp_parser_static_assert): Likewise.
> (cp_parser_operator): Call cp_parser_userdef_string_literal instead of
> cp_parser_string_literal.
> (cp_parser_asm_definition): Move a variable declaration closer to its
> first use.
> (cp_parser_asm_specification_opt): Move variable declarations closer to
> their first use.
> (cp_parser_asm_operand_list): Likewise.
> (cp_parser_asm_clobber_list): Likewise.
>
> gcc/testsuite/ChangeLog:
>
> * g++.dg/cpp0x/udlit-error1.C: New test.
> ---
> gcc/c-family/c-pragma.cc | 3 +
> gcc/cp/parser.cc | 133 +++++++++++++++-------
> gcc/testsuite/g++.dg/cpp0x/udlit-error1.C | 21 ++++
> 3 files changed, 113 insertions(+), 44 deletions(-)
> create mode 100644 gcc/testsuite/g++.dg/cpp0x/udlit-error1.C
>
> diff --git a/gcc/c-family/c-pragma.cc b/gcc/c-family/c-pragma.cc
> index 91fabf0a513..bba9172e8a1 100644
> --- a/gcc/c-family/c-pragma.cc
> +++ b/gcc/c-family/c-pragma.cc
> @@ -1390,6 +1390,9 @@ handle_pragma_message (cpp_reader *)
> }
> else if (token == CPP_STRING)
> message = x;
> + else if (token == CPP_STRING_USERDEF)
> + GCC_BAD ("string literal with user-defined suffix is invalid in this "
> + "context");
> else
> GCC_BAD ("expected a string after %<#pragma message%>");
>
> diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc
> index 8b1658decba..4b366d6c64f 100644
> --- a/gcc/cp/parser.cc
> +++ b/gcc/cp/parser.cc
> @@ -2227,16 +2227,8 @@ pop_unparsed_function_queues (cp_parser *parser)
>
> /* Lexical conventions [gram.lex] */
>
> -static cp_expr cp_parser_identifier
> - (cp_parser *);
> -static cp_expr cp_parser_string_literal
> - (cp_parser *, bool, bool, bool);
> -static cp_expr cp_parser_userdef_char_literal
> - (cp_parser *);
> -static tree cp_parser_userdef_string_literal
> +static tree finish_userdef_string_literal
> (tree);
> -static cp_expr cp_parser_userdef_numeric_literal
> - (cp_parser *);
>
> /* Basic concepts [gram.basic] */
>
> @@ -4408,11 +4400,15 @@ cp_parser_identifier (cp_parser* parser)
> return error_mark_node;
> }
>
> -/* Parse a sequence of adjacent string constants. Returns a
> +/* Worker for cp_parser_string_literal and cp_parser_userdef_string_literal.
> + Do not call this directly; use either of the above.
> +
> + Parse a sequence of adjacent string constants. Return a
> TREE_STRING representing the combined, nul-terminated string
> constant. If TRANSLATE is true, translate the string to the
> execution character set. If WIDE_OK is true, a wide string is
> - invalid here.
> + valid here. If UDL_OK is true, a string literal with user-defined
> + suffix can be used in this context.
>
> C++98 [lex.string] says that if a narrow string literal token is
> adjacent to a wide string literal token, the behavior is undefined.
> @@ -4422,9 +4418,11 @@ cp_parser_identifier (cp_parser* parser)
> This code is largely lifted from lex_string() in c-lex.cc.
>
> FUTURE: ObjC++ will need to handle @-strings here. */
> +
> static cp_expr
> -cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
> - bool lookup_udlit = true)
> +cp_parser_string_literal_common (cp_parser *parser, bool translate,
> + bool wide_ok, bool udl_ok,
> + bool lookup_udlit)
> {
> tree value;
> size_t count;
> @@ -4449,6 +4447,12 @@ cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
>
> if (cpp_userdef_string_p (tok->type))
> {
> + if (!udl_ok)
> + {
> + error_at (loc, "string literal with user-defined suffix "
> + "is invalid in this context");
> + return error_mark_node;
> + }
> string_tree = USERDEF_LITERAL_VALUE (tok->u.value);
> curr_type = cpp_userdef_string_remove_type (tok->type);
> curr_tok_is_userdef_p = true;
> @@ -4539,6 +4543,12 @@ cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
> tok = cp_lexer_peek_token (parser->lexer);
> if (cpp_userdef_string_p (tok->type))
> {
> + if (!udl_ok)
> + {
> + error_at (loc, "string literal with user-defined suffix "
> + "is invalid in this context");
> + return error_mark_node;
> + }
> string_tree = USERDEF_LITERAL_VALUE (tok->u.value);
> curr_type = cpp_userdef_string_remove_type (tok->type);
> curr_tok_is_userdef_p = true;
> @@ -4608,7 +4618,7 @@ cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
> tree literal = build_userdef_literal (suffix_id, value,
> OT_NONE, NULL_TREE);
> if (lookup_udlit)
> - value = cp_parser_userdef_string_literal (literal);
> + value = finish_userdef_string_literal (literal);
> else
> value = literal;
> }
> @@ -4626,6 +4636,37 @@ cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
> return cp_expr (value, loc);
> }
>
> +/* Parse a sequence of adjacent string constants. Return a TREE_STRING
> + representing the combined, nul-terminated string constant. If
> + TRANSLATE is true, translate the string to the execution character set.
> + If WIDE_OK is true, a wide string is valid here.
> +
> + This function issues an error if a user defined string literal is
> + encountered; use cp_parser_userdef_string_literal if UDLs are allowed. */
> +
> +static inline cp_expr
> +cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok)
> +{
> + return cp_parser_string_literal_common (parser, translate, wide_ok,
> + /*udl_ok=*/false,
> + /*lookup_udlit=*/false);
> +}
> +
> +/* Parse a string literal or user defined string literal.
> +
> + user-defined-string-literal :
> + string-literal ud-suffix
> +
> + If LOOKUP_UDLIT, perform a lookup for a suitable template function. */
> +
> +static inline cp_expr
> +cp_parser_userdef_string_literal (cp_parser *parser, bool lookup_udlit)
> +{
> + return cp_parser_string_literal_common (parser, /*translate=*/true,
> + /*wide_ok=*/true, /*udl_ok=*/true,
> + lookup_udlit);
> +}
> +
> /* Look up a literal operator with the name and the exact arguments. */
>
> static tree
> @@ -4923,7 +4964,7 @@ cp_parser_userdef_numeric_literal (cp_parser *parser)
> as arguments. */
>
> static tree
> -cp_parser_userdef_string_literal (tree literal)
> +finish_userdef_string_literal (tree literal)
> {
> tree suffix_id = USERDEF_LITERAL_SUFFIX_ID (literal);
> tree name = cp_literal_operator_id (IDENTIFIER_POINTER (suffix_id));
> @@ -5663,10 +5704,15 @@ cp_parser_primary_expression (cp_parser *parser,
> /* ??? Should wide strings be allowed when parser->translate_strings_p
> is false (i.e. in attributes)? If not, we can kill the third
> argument to cp_parser_string_literal. */
> - return (cp_parser_string_literal (parser,
> - parser->translate_strings_p,
> - true)
> - .maybe_add_location_wrapper ());
> + if (parser->translate_strings_p)
> + return (cp_parser_userdef_string_literal (parser,
> + /*lookup_udlit=*/true)
> + .maybe_add_location_wrapper ());
> + else
> + return (cp_parser_string_literal (parser,
> + /*translate=*/false,
> + /*wide_ok=*/true)
> + .maybe_add_location_wrapper ());
>
> case CPP_OPEN_PAREN:
> /* If we see `( { ' then we are looking at the beginning of
> @@ -16222,15 +16268,14 @@ cp_parser_function_specifier_opt (cp_parser* parser,
> static void
> cp_parser_linkage_specification (cp_parser* parser, tree prefix_attr)
> {
> - tree linkage;
> -
> /* Look for the `extern' keyword. */
> cp_token *extern_token
> = cp_parser_require_keyword (parser, RID_EXTERN, RT_EXTERN);
>
> /* Look for the string-literal. */
> cp_token *string_token = cp_lexer_peek_token (parser->lexer);
> - linkage = cp_parser_string_literal (parser, false, false);
> + tree linkage = cp_parser_string_literal (parser, /*translate=*/false,
> + /*wide_ok=*/false);
>
> /* Transform the literal into an identifier. If the literal is a
> wide-character string, or contains embedded NULs, then we can't
> @@ -16360,9 +16405,8 @@ cp_parser_static_assert(cp_parser *parser, bool member_p)
> cp_parser_require (parser, CPP_COMMA, RT_COMMA);
>
> /* Parse the string-literal message. */
> - message = cp_parser_string_literal (parser,
> - /*translate=*/false,
> - /*wide_ok=*/true);
> + message = cp_parser_string_literal (parser, /*translate=*/false,
> + /*wide_ok=*/true);
>
> /* A `)' completes the static assertion. */
> if (!parens.require_close (parser))
> @@ -17410,7 +17454,6 @@ cp_parser_operator (cp_parser* parser, location_t start_loc)
> case CPP_STRING16_USERDEF:
> case CPP_STRING32_USERDEF:
> {
> - cp_expr str;
> tree string_tree;
> int sz, len;
>
> @@ -17418,8 +17461,8 @@ cp_parser_operator (cp_parser* parser, location_t start_loc)
> maybe_warn_cpp0x (CPP0X_USER_DEFINED_LITERALS);
>
> /* Consume the string. */
> - str = cp_parser_string_literal (parser, /*translate=*/true,
> - /*wide_ok=*/true, /*lookup_udlit=*/false);
> + cp_expr str = cp_parser_userdef_string_literal (parser,
> + /*lookup_udlit=*/false);
> if (str == error_mark_node)
> return error_mark_node;
> else if (TREE_CODE (str) == USERDEF_LITERAL)
> @@ -22072,7 +22115,6 @@ cp_parser_using_directive (cp_parser* parser)
> static void
> cp_parser_asm_definition (cp_parser* parser)
> {
> - tree string;
> tree outputs = NULL_TREE;
> tree inputs = NULL_TREE;
> tree clobbers = NULL_TREE;
> @@ -22180,7 +22222,8 @@ cp_parser_asm_definition (cp_parser* parser)
> if (!cp_parser_require (parser, CPP_OPEN_PAREN, RT_OPEN_PAREN))
> return;
> /* Look for the string. */
> - string = cp_parser_string_literal (parser, false, false);
> + tree string = cp_parser_string_literal (parser, /*translate=*/false,
> + /*wide_ok=*/false);
> if (string == error_mark_node)
> {
> cp_parser_skip_to_closing_parenthesis (parser, true, false,
> @@ -28655,11 +28698,8 @@ cp_parser_yield_expression (cp_parser* parser)
> static tree
> cp_parser_asm_specification_opt (cp_parser* parser)
> {
> - cp_token *token;
> - tree asm_specification;
> -
> /* Peek at the next token. */
> - token = cp_lexer_peek_token (parser->lexer);
> + cp_token *token = cp_lexer_peek_token (parser->lexer);
> /* If the next token isn't the `asm' keyword, then there's no
> asm-specification. */
> if (!cp_parser_is_keyword (token, RID_ASM))
> @@ -28672,7 +28712,9 @@ cp_parser_asm_specification_opt (cp_parser* parser)
> parens.require_open (parser);
>
> /* Look for the string-literal. */
> - asm_specification = cp_parser_string_literal (parser, false, false);
> + tree asm_specification = cp_parser_string_literal (parser,
> + /*translate=*/false,
> + /*wide_ok=*/false);
>
> /* Look for the `)'. */
> parens.require_close (parser);
> @@ -28705,8 +28747,6 @@ cp_parser_asm_operand_list (cp_parser* parser)
>
> while (true)
> {
> - tree string_literal;
> - tree expression;
> tree name;
>
> if (cp_lexer_next_token_is (parser->lexer, CPP_OPEN_SQUARE))
> @@ -28724,13 +28764,15 @@ cp_parser_asm_operand_list (cp_parser* parser)
> else
> name = NULL_TREE;
> /* Look for the string-literal. */
> - string_literal = cp_parser_string_literal (parser, false, false);
> + tree string_literal = cp_parser_string_literal (parser,
> + /*translate=*/false,
> + /*wide_ok=*/false);
>
> /* Look for the `('. */
> matching_parens parens;
> parens.require_open (parser);
> /* Parse the expression. */
> - expression = cp_parser_expression (parser);
> + tree expression = cp_parser_expression (parser);
> /* Look for the `)'. */
> parens.require_close (parser);
>
> @@ -28770,10 +28812,10 @@ cp_parser_asm_clobber_list (cp_parser* parser)
>
> while (true)
> {
> - tree string_literal;
> -
> /* Look for the string literal. */
> - string_literal = cp_parser_string_literal (parser, false, false);
> + tree string_literal = cp_parser_string_literal (parser,
> + /*translate=*/false,
> + /*wide_ok=*/false);
> /* Add it to the list. */
> clobbers = tree_cons (NULL_TREE, string_literal, clobbers);
> /* If the next token is not a `,', then the list is
> @@ -46345,7 +46387,9 @@ cp_parser_omp_context_selector (cp_parser *parser, tree set, bool has_parms_p)
> cp_lexer_consume_token (parser->lexer);
> }
> else if (cp_lexer_next_token_is (parser->lexer, CPP_STRING))
> - value = cp_parser_string_literal (parser, false, false);
> + value = cp_parser_string_literal (parser,
> + /*translate=*/false,
> + /*wide_ok=*/false);
> else
> {
> cp_parser_error (parser, "expected identifier or "
> @@ -49367,7 +49411,8 @@ pragma_lex (tree *value, location_t *loc)
> if (ret == CPP_PRAGMA_EOL)
> ret = CPP_EOF;
> else if (ret == CPP_STRING)
> - *value = cp_parser_string_literal (the_parser, false, false);
> + *value = cp_parser_string_literal (the_parser, /*translate=*/false,
> + /*wide_ok=*/false);
> else
> {
> if (ret == CPP_KEYWORD)
> diff --git a/gcc/testsuite/g++.dg/cpp0x/udlit-error1.C b/gcc/testsuite/g++.dg/cpp0x/udlit-error1.C
> new file mode 100644
> index 00000000000..66e300e350f
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/cpp0x/udlit-error1.C
> @@ -0,0 +1,21 @@
> +// PR c++/105300
> +// { dg-do compile { target c++11 } }
> +
> +void operator""_x(const char *, decltype(sizeof(0)));
> +
> +#include ""_x // { dg-error "include expects" }
> +#line ""_x // { dg-error "not a positive integer" }
> +#if __has_include(""_x) // { dg-error "requires a header-name" }
> +#endif
> +
> +#pragma message "hi"_x // { dg-warning "string literal with user-defined suffix is invalid in this context" }
> +
> +extern "C"_x { void g(); } // { dg-error "before user-defined string literal" }
> +static_assert(true, "foo"_x); // { dg-error "string literal with user-defined suffix is invalid in this context|expected" }
> +
> +[[deprecated("oof"_x)]]
> +void
> +lol () // { dg-error "not a string" }
> +{
> + asm (""_x); // { dg-error "string literal with user-defined suffix is invalid in this context" }
> +}
>
> base-commit: 6071e495e5802a8949d2b02df6aa31a5f40f2af9
^ permalink raw reply [flat|nested] 13+ messages in thread
end of thread, other threads:[~2023-01-25 19:36 UTC | newest]
Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-11-12 16:53 [PATCH] c++: Reject UDLs in certain contexts [PR105300] Marek Polacek
2022-11-15 23:58 ` Jason Merrill
2022-11-16 0:35 ` Marek Polacek
2022-11-16 13:22 ` Jason Merrill
2022-11-17 1:12 ` Marek Polacek
2022-11-18 0:06 ` Jason Merrill
2022-11-18 23:52 ` [PATCH v2] " Marek Polacek
2022-11-19 1:39 ` Jason Merrill
2022-12-02 23:58 ` [PATCH v3] " Marek Polacek
2022-12-03 19:58 ` Jason Merrill
2023-01-13 23:22 ` [PATCH v4] " Marek Polacek
2023-01-24 22:49 ` Marek Polacek
2023-01-25 19:36 ` Jason Merrill
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).