public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc/devel/rust/master] parser: Add better restrictions around semicolons in statements
@ 2022-06-08 12:22 Thomas Schwinge
  0 siblings, 0 replies; only message in thread
From: Thomas Schwinge @ 2022-06-08 12:22 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:ef5638186202daac03feed7a20eb975991965403

commit ef5638186202daac03feed7a20eb975991965403
Author: Arthur Cohen <arthur.cohen@embecosm.com>
Date:   Mon Mar 21 17:27:05 2022 +0100

    parser: Add better restrictions around semicolons in statements
    
    When parsing macro invocations, rustc does not actually consume the
    statement's trailing semicolon.
    
    Let's take the following example:
    ```rust
    macro_rules! one_stmt {
        ($s:stmt) => {};
    }
    
    macro_rules! one_or_more_stmt {
        ($($s:stmt)*) => {};
    }
    
    one_stmt!(let a = 1);
    one_stmt!(let b = 2;); // error
    
    one_or_more_stmt!(;); // valid
    one_or_more_stmt!(let a = 15;); // valid, two statements!
    one_or_more_stmt!(let a = 15 let b = 13); // valid, two statements again
    ```
    
    A semicolon can count as a valid empty statement, but cannot be part of
    a statement (in macro invocations). This commit adds more restrictions
    that allow the parser to not always expect a semicolon token after the
    statement. Furthermore, this fixes a test that was previously accepted
    by the compiler but not by rustc.

Diff:
---
 gcc/rust/expand/rust-macro-expand.cc  | 16 +++++++++++-----
 gcc/rust/parse/rust-parse-impl.h      | 34 +++++++++++++---------------------
 gcc/rust/parse/rust-parse.h           | 16 +++++++++-------
 gcc/testsuite/rust/compile/macro18.rs |  2 +-
 gcc/testsuite/rust/compile/macro32.rs | 19 +++++++++++++++++++
 5 files changed, 53 insertions(+), 34 deletions(-)

diff --git a/gcc/rust/expand/rust-macro-expand.cc b/gcc/rust/expand/rust-macro-expand.cc
index 6b26f98dede..8294008ec4e 100644
--- a/gcc/rust/expand/rust-macro-expand.cc
+++ b/gcc/rust/expand/rust-macro-expand.cc
@@ -477,9 +477,12 @@ MacroExpander::match_fragment (Parser<MacroInvocLexer> &parser,
       parser.parse_visibility ();
       break;
 
-    case AST::MacroFragSpec::STMT:
-      parser.parse_stmt (/* allow_no_semi */ true);
-      break;
+      case AST::MacroFragSpec::STMT: {
+	auto restrictions = ParseRestrictions ();
+	restrictions.consume_semi = false;
+	parser.parse_stmt (restrictions);
+	break;
+      }
 
     case AST::MacroFragSpec::LIFETIME:
       parser.parse_lifetime_params ();
@@ -887,11 +890,14 @@ transcribe_many_trait_impl_items (Parser<MacroInvocLexer> &parser,
 static std::vector<AST::SingleASTNode>
 transcribe_many_stmts (Parser<MacroInvocLexer> &parser, TokenId &delimiter)
 {
+  auto restrictions = ParseRestrictions ();
+  restrictions.consume_semi = false;
+
   // FIXME: This is invalid! It needs to also handle cases where the macro
   // transcriber is an expression, but since the macro call is followed by
   // a semicolon, it's a valid ExprStmt
-  return parse_many (parser, delimiter, [&parser] () {
-    auto stmt = parser.parse_stmt (/* allow_no_semi */ true);
+  return parse_many (parser, delimiter, [&parser, restrictions] () {
+    auto stmt = parser.parse_stmt (restrictions);
     return AST::SingleASTNode (std::move (stmt));
   });
 }
diff --git a/gcc/rust/parse/rust-parse-impl.h b/gcc/rust/parse/rust-parse-impl.h
index 644e7898a70..a34f3d4d6eb 100644
--- a/gcc/rust/parse/rust-parse-impl.h
+++ b/gcc/rust/parse/rust-parse-impl.h
@@ -6091,7 +6091,7 @@ Parser<ManagedTokenSource>::parse_named_function_param (
 // Parses a statement (will further disambiguate any statement).
 template <typename ManagedTokenSource>
 std::unique_ptr<AST::Stmt>
-Parser<ManagedTokenSource>::parse_stmt (bool allow_no_semi)
+Parser<ManagedTokenSource>::parse_stmt (ParseRestrictions restrictions)
 {
   // quick exit for empty statement
   // FIXME: Can we have empty statements without semicolons? Just nothing?
@@ -6116,7 +6116,7 @@ Parser<ManagedTokenSource>::parse_stmt (bool allow_no_semi)
     {
     case LET:
       // let statement
-      return parse_let_stmt (std::move (outer_attrs), allow_no_semi);
+      return parse_let_stmt (std::move (outer_attrs), restrictions);
     case PUB:
     case MOD:
     case EXTERN_TOK:
@@ -6171,7 +6171,7 @@ Parser<ManagedTokenSource>::parse_stmt (bool allow_no_semi)
       // TODO: find out how to disable gcc "implicit fallthrough" warning
     default:
       // fallback: expression statement
-      return parse_expr_stmt (std::move (outer_attrs), allow_no_semi);
+      return parse_expr_stmt (std::move (outer_attrs), restrictions);
       break;
     }
 }
@@ -6180,7 +6180,7 @@ Parser<ManagedTokenSource>::parse_stmt (bool allow_no_semi)
 template <typename ManagedTokenSource>
 std::unique_ptr<AST::LetStmt>
 Parser<ManagedTokenSource>::parse_let_stmt (AST::AttrVec outer_attrs,
-					    bool allow_no_semi)
+					    ParseRestrictions restrictions)
 {
   Location locus = lexer.peek_token ()->get_locus ();
   skip_token (LET);
@@ -6235,13 +6235,9 @@ Parser<ManagedTokenSource>::parse_let_stmt (AST::AttrVec outer_attrs,
 	}
     }
 
-  if (!maybe_skip_token (SEMICOLON) && !allow_no_semi)
-    {
-      // skip after somewhere
+  if (restrictions.consume_semi)
+    if (!skip_token (SEMICOLON))
       return nullptr;
-      /* TODO: how wise is it to ditch a mostly-valid let statement just
-       * because a semicolon is missing? */
-    }
 
   return std::unique_ptr<AST::LetStmt> (
     new AST::LetStmt (std::move (pattern), std::move (expr), std::move (type),
@@ -7076,7 +7072,7 @@ Parser<ManagedTokenSource>::parse_method ()
 template <typename ManagedTokenSource>
 std::unique_ptr<AST::ExprStmt>
 Parser<ManagedTokenSource>::parse_expr_stmt (AST::AttrVec outer_attrs,
-					     bool allow_no_semi)
+					     ParseRestrictions restrictions)
 {
   /* potential thoughts - define new virtual method "has_block()" on expr. parse
    * expr and then determine whether semicolon is needed as a result of this
@@ -7116,7 +7112,7 @@ Parser<ManagedTokenSource>::parse_expr_stmt (AST::AttrVec outer_attrs,
 	else
 	  {
 	    return parse_expr_stmt_without_block (std::move (outer_attrs),
-						  allow_no_semi);
+						  restrictions);
 	  }
       }
       case UNSAFE: {
@@ -7130,7 +7126,7 @@ Parser<ManagedTokenSource>::parse_expr_stmt (AST::AttrVec outer_attrs,
 	else
 	  {
 	    return parse_expr_stmt_without_block (std::move (outer_attrs),
-						  allow_no_semi);
+						  restrictions);
 	  }
       }
     default:
@@ -7139,7 +7135,7 @@ Parser<ManagedTokenSource>::parse_expr_stmt (AST::AttrVec outer_attrs,
        * initial tokens in order to prevent more syntactical errors at parse
        * time. */
       return parse_expr_stmt_without_block (std::move (outer_attrs),
-					    allow_no_semi);
+					    restrictions);
     }
 }
 
@@ -7255,7 +7251,7 @@ Parser<ManagedTokenSource>::parse_expr_stmt_with_block (
 template <typename ManagedTokenSource>
 std::unique_ptr<AST::ExprStmtWithoutBlock>
 Parser<ManagedTokenSource>::parse_expr_stmt_without_block (
-  AST::AttrVec outer_attrs, bool allow_no_semi)
+  AST::AttrVec outer_attrs, ParseRestrictions restrictions)
 {
   /* TODO: maybe move more logic for expr without block in here for better error
    * handling */
@@ -7264,7 +7260,6 @@ Parser<ManagedTokenSource>::parse_expr_stmt_without_block (
   std::unique_ptr<AST::ExprWithoutBlock> expr = nullptr;
   Location locus = lexer.peek_token ()->get_locus ();
 
-  auto restrictions = ParseRestrictions ();
   restrictions.expr_can_be_stmt = true;
 
   expr = parse_expr_without_block (std::move (outer_attrs), restrictions);
@@ -7279,12 +7274,9 @@ Parser<ManagedTokenSource>::parse_expr_stmt_without_block (
       return nullptr;
     }
 
-  // skip semicolon at end that is required
-  if (!maybe_skip_token (SEMICOLON) && !allow_no_semi)
-    {
-      // skip somewhere?
+  if (restrictions.consume_semi)
+    if (!skip_token (SEMICOLON))
       return nullptr;
-    }
 
   return std::unique_ptr<AST::ExprStmtWithoutBlock> (
     new AST::ExprStmtWithoutBlock (std::move (expr), locus));
diff --git a/gcc/rust/parse/rust-parse.h b/gcc/rust/parse/rust-parse.h
index 9a31fb6d625..c00dc11d6bb 100644
--- a/gcc/rust/parse/rust-parse.h
+++ b/gcc/rust/parse/rust-parse.h
@@ -81,6 +81,7 @@ struct ParseRestrictions
   bool entered_from_unary = false;
   bool expr_can_be_null = false;
   bool expr_can_be_stmt = false;
+  bool consume_semi = true;
 };
 
 // Parser implementation for gccrs.
@@ -129,11 +130,9 @@ public:
    *    | LetStatement
    *    | ExpressionStatement
    *    | MacroInvocationSemi
-   *
-   * @param allow_no_semi Allow the parser to not parse a semicolon after
-   * 		the statement without erroring out
    */
-  std::unique_ptr<AST::Stmt> parse_stmt (bool allow_no_semi = false);
+  std::unique_ptr<AST::Stmt> parse_stmt (ParseRestrictions restrictions
+					 = ParseRestrictions ());
   std::unique_ptr<AST::Type> parse_type ();
   std::unique_ptr<AST::ExternalItem> parse_external_item ();
   std::unique_ptr<AST::TraitItem> parse_trait_item ();
@@ -616,14 +615,17 @@ private:
    * 		semicolon to follow it
    */
   std::unique_ptr<AST::LetStmt> parse_let_stmt (AST::AttrVec outer_attrs,
-						bool allow_no_semi = false);
+						ParseRestrictions restrictions
+						= ParseRestrictions ());
   std::unique_ptr<AST::ExprStmt> parse_expr_stmt (AST::AttrVec outer_attrs,
-						  bool allow_no_semi = false);
+						  ParseRestrictions restrictions
+						  = ParseRestrictions ());
   std::unique_ptr<AST::ExprStmtWithBlock>
   parse_expr_stmt_with_block (AST::AttrVec outer_attrs);
   std::unique_ptr<AST::ExprStmtWithoutBlock>
   parse_expr_stmt_without_block (AST::AttrVec outer_attrs,
-				 bool allow_no_semi = false);
+				 ParseRestrictions restrictions
+				 = ParseRestrictions ());
   ExprOrStmt parse_stmt_or_expr_without_block ();
   ExprOrStmt parse_stmt_or_expr_with_block (AST::AttrVec outer_attrs);
   ExprOrStmt parse_macro_invocation_maybe_semi (AST::AttrVec outer_attrs);
diff --git a/gcc/testsuite/rust/compile/macro18.rs b/gcc/testsuite/rust/compile/macro18.rs
index c297107d6f3..5418725b619 100644
--- a/gcc/testsuite/rust/compile/macro18.rs
+++ b/gcc/testsuite/rust/compile/macro18.rs
@@ -7,7 +7,7 @@ macro_rules! take_stmt {
 }
 
 fn main() -> i32 {
-    take_stmt!(let complete = 15;);
+    take_stmt!(let complete = 15;); // { dg-error "Failed to match any rule within macro" }
     take_stmt!(let lacking = 14);
 
     0
diff --git a/gcc/testsuite/rust/compile/macro32.rs b/gcc/testsuite/rust/compile/macro32.rs
new file mode 100644
index 00000000000..d1d6305e6bd
--- /dev/null
+++ b/gcc/testsuite/rust/compile/macro32.rs
@@ -0,0 +1,19 @@
+macro_rules! s {
+    ($s:stmt) => {{}};
+}
+
+macro_rules! multi_s {
+    ($($s:stmt)+) => {{}};
+}
+
+fn main() -> i32 {
+    s!(let a = 15);
+    s!(;); // Empty statement
+    s!(let a = 15;); // { dg-error "Failed to match any rule within macro" }
+    multi_s!(let a = 15;);
+    // ^ this actually gets parsed as two statements - one LetStmt and one
+    // empty statement. This is the same behavior as rustc, which you can
+    // see using a count!() macro
+
+    32
+}


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2022-06-08 12:22 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-06-08 12:22 [gcc/devel/rust/master] parser: Add better restrictions around semicolons in statements Thomas Schwinge

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).