From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1643) id 753B638877DE; Wed, 8 Jun 2022 12:22:23 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 753B638877DE Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: Thomas Schwinge To: gcc-cvs@gcc.gnu.org Subject: [gcc/devel/rust/master] parser: Add better restrictions around semicolons in statements X-Act-Checkin: gcc X-Git-Author: Arthur Cohen X-Git-Refname: refs/heads/devel/rust/master X-Git-Oldrev: cc6e405912c83aee41efd3015d9157cdbe9134fe X-Git-Newrev: ef5638186202daac03feed7a20eb975991965403 Message-Id: <20220608122223.753B638877DE@sourceware.org> Date: Wed, 8 Jun 2022 12:22:23 +0000 (GMT) X-BeenThere: gcc-cvs@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-cvs mailing list List-Unsubscribe: , List-Archive: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 08 Jun 2022 12:22:23 -0000 https://gcc.gnu.org/g:ef5638186202daac03feed7a20eb975991965403 commit ef5638186202daac03feed7a20eb975991965403 Author: Arthur Cohen Date: Mon Mar 21 17:27:05 2022 +0100 parser: Add better restrictions around semicolons in statements When parsing macro invocations, rustc does not actually consume the statement's trailing semicolon. Let's take the following example: ```rust macro_rules! one_stmt { ($s:stmt) => {}; } macro_rules! one_or_more_stmt { ($($s:stmt)*) => {}; } one_stmt!(let a = 1); one_stmt!(let b = 2;); // error one_or_more_stmt!(;); // valid one_or_more_stmt!(let a = 15;); // valid, two statements! one_or_more_stmt!(let a = 15 let b = 13); // valid, two statements again ``` A semicolon can count as a valid empty statement, but cannot be part of a statement (in macro invocations). This commit adds more restrictions that allow the parser to not always expect a semicolon token after the statement. Furthermore, this fixes a test that was previously accepted by the compiler but not by rustc. Diff: --- gcc/rust/expand/rust-macro-expand.cc | 16 +++++++++++----- gcc/rust/parse/rust-parse-impl.h | 34 +++++++++++++--------------------- gcc/rust/parse/rust-parse.h | 16 +++++++++------- gcc/testsuite/rust/compile/macro18.rs | 2 +- gcc/testsuite/rust/compile/macro32.rs | 19 +++++++++++++++++++ 5 files changed, 53 insertions(+), 34 deletions(-) diff --git a/gcc/rust/expand/rust-macro-expand.cc b/gcc/rust/expand/rust-macro-expand.cc index 6b26f98dede..8294008ec4e 100644 --- a/gcc/rust/expand/rust-macro-expand.cc +++ b/gcc/rust/expand/rust-macro-expand.cc @@ -477,9 +477,12 @@ MacroExpander::match_fragment (Parser &parser, parser.parse_visibility (); break; - case AST::MacroFragSpec::STMT: - parser.parse_stmt (/* allow_no_semi */ true); - break; + case AST::MacroFragSpec::STMT: { + auto restrictions = ParseRestrictions (); + restrictions.consume_semi = false; + parser.parse_stmt (restrictions); + break; + } case AST::MacroFragSpec::LIFETIME: parser.parse_lifetime_params (); @@ -887,11 +890,14 @@ transcribe_many_trait_impl_items (Parser &parser, static std::vector transcribe_many_stmts (Parser &parser, TokenId &delimiter) { + auto restrictions = ParseRestrictions (); + restrictions.consume_semi = false; + // FIXME: This is invalid! It needs to also handle cases where the macro // transcriber is an expression, but since the macro call is followed by // a semicolon, it's a valid ExprStmt - return parse_many (parser, delimiter, [&parser] () { - auto stmt = parser.parse_stmt (/* allow_no_semi */ true); + return parse_many (parser, delimiter, [&parser, restrictions] () { + auto stmt = parser.parse_stmt (restrictions); return AST::SingleASTNode (std::move (stmt)); }); } diff --git a/gcc/rust/parse/rust-parse-impl.h b/gcc/rust/parse/rust-parse-impl.h index 644e7898a70..a34f3d4d6eb 100644 --- a/gcc/rust/parse/rust-parse-impl.h +++ b/gcc/rust/parse/rust-parse-impl.h @@ -6091,7 +6091,7 @@ Parser::parse_named_function_param ( // Parses a statement (will further disambiguate any statement). template std::unique_ptr -Parser::parse_stmt (bool allow_no_semi) +Parser::parse_stmt (ParseRestrictions restrictions) { // quick exit for empty statement // FIXME: Can we have empty statements without semicolons? Just nothing? @@ -6116,7 +6116,7 @@ Parser::parse_stmt (bool allow_no_semi) { case LET: // let statement - return parse_let_stmt (std::move (outer_attrs), allow_no_semi); + return parse_let_stmt (std::move (outer_attrs), restrictions); case PUB: case MOD: case EXTERN_TOK: @@ -6171,7 +6171,7 @@ Parser::parse_stmt (bool allow_no_semi) // TODO: find out how to disable gcc "implicit fallthrough" warning default: // fallback: expression statement - return parse_expr_stmt (std::move (outer_attrs), allow_no_semi); + return parse_expr_stmt (std::move (outer_attrs), restrictions); break; } } @@ -6180,7 +6180,7 @@ Parser::parse_stmt (bool allow_no_semi) template std::unique_ptr Parser::parse_let_stmt (AST::AttrVec outer_attrs, - bool allow_no_semi) + ParseRestrictions restrictions) { Location locus = lexer.peek_token ()->get_locus (); skip_token (LET); @@ -6235,13 +6235,9 @@ Parser::parse_let_stmt (AST::AttrVec outer_attrs, } } - if (!maybe_skip_token (SEMICOLON) && !allow_no_semi) - { - // skip after somewhere + if (restrictions.consume_semi) + if (!skip_token (SEMICOLON)) return nullptr; - /* TODO: how wise is it to ditch a mostly-valid let statement just - * because a semicolon is missing? */ - } return std::unique_ptr ( new AST::LetStmt (std::move (pattern), std::move (expr), std::move (type), @@ -7076,7 +7072,7 @@ Parser::parse_method () template std::unique_ptr Parser::parse_expr_stmt (AST::AttrVec outer_attrs, - bool allow_no_semi) + ParseRestrictions restrictions) { /* potential thoughts - define new virtual method "has_block()" on expr. parse * expr and then determine whether semicolon is needed as a result of this @@ -7116,7 +7112,7 @@ Parser::parse_expr_stmt (AST::AttrVec outer_attrs, else { return parse_expr_stmt_without_block (std::move (outer_attrs), - allow_no_semi); + restrictions); } } case UNSAFE: { @@ -7130,7 +7126,7 @@ Parser::parse_expr_stmt (AST::AttrVec outer_attrs, else { return parse_expr_stmt_without_block (std::move (outer_attrs), - allow_no_semi); + restrictions); } } default: @@ -7139,7 +7135,7 @@ Parser::parse_expr_stmt (AST::AttrVec outer_attrs, * initial tokens in order to prevent more syntactical errors at parse * time. */ return parse_expr_stmt_without_block (std::move (outer_attrs), - allow_no_semi); + restrictions); } } @@ -7255,7 +7251,7 @@ Parser::parse_expr_stmt_with_block ( template std::unique_ptr Parser::parse_expr_stmt_without_block ( - AST::AttrVec outer_attrs, bool allow_no_semi) + AST::AttrVec outer_attrs, ParseRestrictions restrictions) { /* TODO: maybe move more logic for expr without block in here for better error * handling */ @@ -7264,7 +7260,6 @@ Parser::parse_expr_stmt_without_block ( std::unique_ptr expr = nullptr; Location locus = lexer.peek_token ()->get_locus (); - auto restrictions = ParseRestrictions (); restrictions.expr_can_be_stmt = true; expr = parse_expr_without_block (std::move (outer_attrs), restrictions); @@ -7279,12 +7274,9 @@ Parser::parse_expr_stmt_without_block ( return nullptr; } - // skip semicolon at end that is required - if (!maybe_skip_token (SEMICOLON) && !allow_no_semi) - { - // skip somewhere? + if (restrictions.consume_semi) + if (!skip_token (SEMICOLON)) return nullptr; - } return std::unique_ptr ( new AST::ExprStmtWithoutBlock (std::move (expr), locus)); diff --git a/gcc/rust/parse/rust-parse.h b/gcc/rust/parse/rust-parse.h index 9a31fb6d625..c00dc11d6bb 100644 --- a/gcc/rust/parse/rust-parse.h +++ b/gcc/rust/parse/rust-parse.h @@ -81,6 +81,7 @@ struct ParseRestrictions bool entered_from_unary = false; bool expr_can_be_null = false; bool expr_can_be_stmt = false; + bool consume_semi = true; }; // Parser implementation for gccrs. @@ -129,11 +130,9 @@ public: * | LetStatement * | ExpressionStatement * | MacroInvocationSemi - * - * @param allow_no_semi Allow the parser to not parse a semicolon after - * the statement without erroring out */ - std::unique_ptr parse_stmt (bool allow_no_semi = false); + std::unique_ptr parse_stmt (ParseRestrictions restrictions + = ParseRestrictions ()); std::unique_ptr parse_type (); std::unique_ptr parse_external_item (); std::unique_ptr parse_trait_item (); @@ -616,14 +615,17 @@ private: * semicolon to follow it */ std::unique_ptr parse_let_stmt (AST::AttrVec outer_attrs, - bool allow_no_semi = false); + ParseRestrictions restrictions + = ParseRestrictions ()); std::unique_ptr parse_expr_stmt (AST::AttrVec outer_attrs, - bool allow_no_semi = false); + ParseRestrictions restrictions + = ParseRestrictions ()); std::unique_ptr parse_expr_stmt_with_block (AST::AttrVec outer_attrs); std::unique_ptr parse_expr_stmt_without_block (AST::AttrVec outer_attrs, - bool allow_no_semi = false); + ParseRestrictions restrictions + = ParseRestrictions ()); ExprOrStmt parse_stmt_or_expr_without_block (); ExprOrStmt parse_stmt_or_expr_with_block (AST::AttrVec outer_attrs); ExprOrStmt parse_macro_invocation_maybe_semi (AST::AttrVec outer_attrs); diff --git a/gcc/testsuite/rust/compile/macro18.rs b/gcc/testsuite/rust/compile/macro18.rs index c297107d6f3..5418725b619 100644 --- a/gcc/testsuite/rust/compile/macro18.rs +++ b/gcc/testsuite/rust/compile/macro18.rs @@ -7,7 +7,7 @@ macro_rules! take_stmt { } fn main() -> i32 { - take_stmt!(let complete = 15;); + take_stmt!(let complete = 15;); // { dg-error "Failed to match any rule within macro" } take_stmt!(let lacking = 14); 0 diff --git a/gcc/testsuite/rust/compile/macro32.rs b/gcc/testsuite/rust/compile/macro32.rs new file mode 100644 index 00000000000..d1d6305e6bd --- /dev/null +++ b/gcc/testsuite/rust/compile/macro32.rs @@ -0,0 +1,19 @@ +macro_rules! s { + ($s:stmt) => {{}}; +} + +macro_rules! multi_s { + ($($s:stmt)+) => {{}}; +} + +fn main() -> i32 { + s!(let a = 15); + s!(;); // Empty statement + s!(let a = 15;); // { dg-error "Failed to match any rule within macro" } + multi_s!(let a = 15;); + // ^ this actually gets parsed as two statements - one LetStmt and one + // empty statement. This is the same behavior as rustc, which you can + // see using a count!() macro + + 32 +}