public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc/devel/rust/master] macros: Add base functions to check for follow-set ambiguities
@ 2022-06-08 12:22 Thomas Schwinge
  0 siblings, 0 replies; only message in thread
From: Thomas Schwinge @ 2022-06-08 12:22 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:35ca685200830626e5abd623f65a850649beace2

commit 35ca685200830626e5abd623f65a850649beace2
Author: Arthur Cohen <arthur.cohen@embecosm.com>
Date:   Fri Mar 18 16:20:47 2022 +0100

    macros: Add base functions to check for follow-set ambiguities
    
    Rust does not allow for all macro fragments to be followed by any kind
    of tokens: We must check tokens following those fragments that might
    contain restrictions and make sure that they are allowed, conforming to
    the Macro Follow-Set Ambiguity specification
    
    Co-authored-by: philberty <philip.herron@embecosm.com>
    
    macro-frag-spec: Transform enum into a class
    
    This allows us to add methods on the fragment specifier, which are
    needed to make sure that follow-set ambiguities are respected
    
    tests: Add tests for forbidden follow-up tokens
    
    This also fix a test that was previously accepted but invalid: rustc
    also rejected it

Diff:
---
 gcc/rust/ast/rust-ast-full-test.cc             |  41 +-----
 gcc/rust/ast/rust-macro.h                      | 181 +++++++++++++++++--------
 gcc/rust/expand/rust-macro-expand.cc           |   2 +-
 gcc/rust/parse/rust-parse-impl.h               |  13 +-
 gcc/rust/parse/rust-parse.cc                   | 141 +++++++++++++++++++
 gcc/rust/parse/rust-parse.h                    |  12 ++
 gcc/testsuite/rust/compile/macro27.rs          |   8 ++
 gcc/testsuite/rust/compile/macro28.rs          |   8 ++
 gcc/testsuite/rust/compile/macro29.rs          |   8 ++
 gcc/testsuite/rust/compile/macro30.rs          |   8 ++
 gcc/testsuite/rust/compile/macro31.rs          |   8 ++
 gcc/testsuite/rust/execute/torture/macros20.rs |   6 +-
 12 files changed, 336 insertions(+), 100 deletions(-)

diff --git a/gcc/rust/ast/rust-ast-full-test.cc b/gcc/rust/ast/rust-ast-full-test.cc
index 826d41af658..d15c09cfa2d 100644
--- a/gcc/rust/ast/rust-ast-full-test.cc
+++ b/gcc/rust/ast/rust-ast-full-test.cc
@@ -76,45 +76,6 @@ get_string_in_delims (std::string str_input, DelimType delim_type)
   gcc_unreachable ();
 }
 
-// Converts a frag spec enum item to a string form.
-std::string
-frag_spec_to_str (MacroFragSpec frag_spec)
-{
-  switch (frag_spec)
-    {
-    case BLOCK:
-      return "block";
-    case EXPR:
-      return "expr";
-    case IDENT:
-      return "ident";
-    case ITEM:
-      return "item";
-    case LIFETIME:
-      return "lifetime";
-    case LITERAL:
-      return "literal";
-    case META:
-      return "meta";
-    case PAT:
-      return "pat";
-    case PATH:
-      return "path";
-    case STMT:
-      return "stmt";
-    case TT:
-      return "tt";
-    case TY:
-      return "ty";
-    case VIS:
-      return "vis";
-    case INVALID:
-      return "INVALID_FRAG_SPEC";
-    default:
-      return "ERROR_MARK_STRING - unknown frag spec";
-    }
-}
-
 enum AttrMode
 {
   OUTER,
@@ -2396,7 +2357,7 @@ LifetimeParam::as_string () const
 std::string
 MacroMatchFragment::as_string () const
 {
-  return "$" + ident + ": " + frag_spec_to_str (frag_spec);
+  return "$" + ident + ": " + frag_spec.as_string ();
 }
 
 std::string
diff --git a/gcc/rust/ast/rust-macro.h b/gcc/rust/ast/rust-macro.h
index 5ecd5d72611..478d3ab2f11 100644
--- a/gcc/rust/ast/rust-macro.h
+++ b/gcc/rust/ast/rust-macro.h
@@ -28,60 +28,128 @@ namespace AST {
 // Decls as definitions moved to rust-ast.h
 class MacroItem;
 
-enum MacroFragSpec
+class MacroFragSpec
 {
-  BLOCK,
-  EXPR,
-  IDENT,
-  ITEM,
-  LIFETIME,
-  LITERAL,
-  META,
-  PAT,
-  PATH,
-  STMT,
-  TT,
-  TY,
-  VIS,
-  INVALID // not really a specifier, but used to mark invalid one passed in
-};
+public:
+  enum Kind
+  {
+    BLOCK,
+    EXPR,
+    IDENT,
+    ITEM,
+    LIFETIME,
+    LITERAL,
+    META,
+    PAT,
+    PATH,
+    STMT,
+    TT,
+    TY,
+    VIS,
+    INVALID // not really a specifier, but used to mark invalid one passed in
+  };
 
-inline MacroFragSpec
-get_frag_spec_from_str (std::string str)
-{
-  if (str == "block")
-    return BLOCK;
-  else if (str == "expr")
-    return EXPR;
-  else if (str == "ident")
-    return IDENT;
-  else if (str == "item")
-    return ITEM;
-  else if (str == "lifetime")
-    return LIFETIME;
-  else if (str == "literal")
-    return LITERAL;
-  else if (str == "meta")
-    return META;
-  else if (str == "pat")
-    return PAT;
-  else if (str == "path")
-    return PATH;
-  else if (str == "stmt")
-    return STMT;
-  else if (str == "tt")
-    return TT;
-  else if (str == "ty")
-    return TY;
-  else if (str == "vis")
-    return VIS;
-  else
-    {
-      // error_at("invalid string '%s' used as fragment specifier",
-      // str->c_str());
-      return INVALID;
-    }
-}
+  MacroFragSpec (Kind kind) : kind (kind) {}
+
+  static MacroFragSpec get_frag_spec_from_str (const std::string &str)
+  {
+    if (str == "block")
+      return MacroFragSpec (BLOCK);
+    else if (str == "expr")
+      return MacroFragSpec (EXPR);
+    else if (str == "ident")
+      return MacroFragSpec (IDENT);
+    else if (str == "item")
+      return MacroFragSpec (ITEM);
+    else if (str == "lifetime")
+      return MacroFragSpec (LIFETIME);
+    else if (str == "literal")
+      return MacroFragSpec (LITERAL);
+    else if (str == "meta")
+      return MacroFragSpec (META);
+    else if (str == "pat" || str == "pat_param")
+      return MacroFragSpec (PAT);
+    else if (str == "path")
+      return MacroFragSpec (PATH);
+    else if (str == "stmt")
+      return MacroFragSpec (STMT);
+    else if (str == "tt")
+      return MacroFragSpec (TT);
+    else if (str == "ty")
+      return MacroFragSpec (TY);
+    else if (str == "vis")
+      return MacroFragSpec (VIS);
+    else
+      {
+	// error_at("invalid string '%s' used as fragment specifier",
+	// str->c_str()));
+	return MacroFragSpec (INVALID);
+      }
+  }
+
+  Kind get_kind () const { return kind; }
+  bool is_error () const { return kind == Kind::INVALID; }
+
+  // Converts a frag spec enum item to a string form.
+  std::string as_string () const
+  {
+    switch (kind)
+      {
+      case BLOCK:
+	return "block";
+      case EXPR:
+	return "expr";
+      case IDENT:
+	return "ident";
+      case ITEM:
+	return "item";
+      case LIFETIME:
+	return "lifetime";
+      case LITERAL:
+	return "literal";
+      case META:
+	return "meta";
+      case PAT:
+	return "pat";
+      case PATH:
+	return "path";
+      case STMT:
+	return "stmt";
+      case TT:
+	return "tt";
+      case TY:
+	return "ty";
+      case VIS:
+	return "vis";
+      case INVALID:
+	return "INVALID_FRAG_SPEC";
+      default:
+	return "ERROR_MARK_STRING - unknown frag spec";
+      }
+  }
+
+  bool has_follow_set_restrictions ()
+  {
+    switch (kind)
+      {
+      case EXPR:
+      case STMT:
+	// FIXME: Add the following cases once we can handle them properly
+	// in `is_match_compatible()`
+	// case PAT:
+	// // case PAT_PARAM: FIXME: Doesn't <metavar>:pat_param exist?
+	// case PATH:
+	// case TY:
+	// case VIS:
+	return true;
+      default:
+	return false;
+      }
+  }
+
+private:
+  Kind kind;
+};
 
 // A macro match that has an identifier and fragment spec
 class MacroMatchFragment : public MacroMatch
@@ -96,12 +164,17 @@ public:
   {}
 
   // Returns whether macro match fragment is in an error state.
-  bool is_error () const { return frag_spec == INVALID; }
+  bool is_error () const
+  {
+    return frag_spec.get_kind () == MacroFragSpec::INVALID;
+  }
 
   // Creates an error state macro match fragment.
   static MacroMatchFragment create_error (Location locus)
   {
-    return MacroMatchFragment (std::string (""), INVALID, locus);
+    return MacroMatchFragment (std::string (""),
+			       MacroFragSpec (MacroFragSpec::Kind::INVALID),
+			       locus);
   }
 
   std::string as_string () const override;
diff --git a/gcc/rust/expand/rust-macro-expand.cc b/gcc/rust/expand/rust-macro-expand.cc
index 6b26f98dede..35bd84a23ae 100644
--- a/gcc/rust/expand/rust-macro-expand.cc
+++ b/gcc/rust/expand/rust-macro-expand.cc
@@ -439,7 +439,7 @@ bool
 MacroExpander::match_fragment (Parser<MacroInvocLexer> &parser,
 			       AST::MacroMatchFragment &fragment)
 {
-  switch (fragment.get_frag_spec ())
+  switch (fragment.get_frag_spec ().get_kind ())
     {
     case AST::MacroFragSpec::EXPR:
       parser.parse_expr ();
diff --git a/gcc/rust/parse/rust-parse-impl.h b/gcc/rust/parse/rust-parse-impl.h
index 644e7898a70..1d1b624e19b 100644
--- a/gcc/rust/parse/rust-parse-impl.h
+++ b/gcc/rust/parse/rust-parse-impl.h
@@ -22,6 +22,7 @@ along with GCC; see the file COPYING3.  If not see
 
 #include "rust-diagnostics.h"
 #include "util/rust-make-unique.h"
+#include <algorithm>
 
 namespace Rust {
 // Left binding powers of operations.
@@ -1767,6 +1768,13 @@ Parser<ManagedTokenSource>::parse_macro_matcher ()
 	  return AST::MacroMatcher::create_error (t->get_locus ());
 	}
 
+      if (matches.size () > 0)
+	{
+	  auto &last_match = matches.back ();
+	  if (!is_match_compatible (*last_match, *match))
+	    return AST::MacroMatcher::create_error (match->get_match_locus ());
+	}
+
       matches.push_back (std::move (match));
 
       // DEBUG
@@ -1955,8 +1963,9 @@ Parser<ManagedTokenSource>::parse_macro_match_fragment ()
   if (t == nullptr)
     return nullptr;
 
-  AST::MacroFragSpec frag = AST::get_frag_spec_from_str (t->get_str ());
-  if (frag == AST::INVALID)
+  AST::MacroFragSpec frag
+    = AST::MacroFragSpec::get_frag_spec_from_str (t->get_str ());
+  if (frag.is_error ())
     {
       Error error (t->get_locus (),
 		   "invalid fragment specifier %qs in fragment macro match",
diff --git a/gcc/rust/parse/rust-parse.cc b/gcc/rust/parse/rust-parse.cc
index f995e4b5db2..16ed4a0763b 100644
--- a/gcc/rust/parse/rust-parse.cc
+++ b/gcc/rust/parse/rust-parse.cc
@@ -92,4 +92,145 @@ extract_module_path (const AST::AttrVec &inner_attrs,
 
   return path;
 }
+
+static bool
+peculiar_fragment_match_compatible (AST::MacroMatchFragment &last_match,
+				    AST::MacroMatch &match)
+{
+  static std::unordered_map<AST::MacroFragSpec::Kind, std::vector<TokenId>>
+    follow_set = {
+      {AST::MacroFragSpec::EXPR, {MATCH_ARROW, COMMA, SEMICOLON}},
+      {AST::MacroFragSpec::STMT, {MATCH_ARROW, COMMA, SEMICOLON}},
+    };
+
+  Location error_locus = match.get_match_locus ();
+
+  // There are two behaviors to handle here: If the follow-up match is a token,
+  // we want to check if it is allowed.
+  // If it is a fragment, repetition or matcher then we know that it will be
+  // an error.
+  // For repetitions and matchers we want to extract a proper location to report
+  // the error.
+  switch (match.get_macro_match_type ())
+    {
+      case AST::MacroMatch::Tok: {
+	auto tok = static_cast<AST::Token *> (&match);
+	auto &allowed_toks
+	  = follow_set[last_match.get_frag_spec ().get_kind ()];
+	auto is_valid = std::find (allowed_toks.begin (), allowed_toks.end (),
+				   tok->get_id ())
+			!= allowed_toks.end ();
+	if (!is_valid)
+	  // FIXME: Add hint about allowed fragments
+	  rust_error_at (tok->get_match_locus (),
+			 "token %<%s%> is not allowed after %<%s%> fragment",
+			 tok->get_str ().c_str (),
+			 last_match.get_frag_spec ().as_string ().c_str ());
+	return is_valid;
+      }
+      break;
+      case AST::MacroMatch::Repetition: {
+	auto repetition = static_cast<AST::MacroMatchRepetition *> (&match);
+	auto &matches = repetition->get_matches ();
+	if (!matches.empty ())
+	  error_locus = matches.front ()->get_match_locus ();
+	break;
+      }
+      case AST::MacroMatch::Matcher: {
+	auto matcher = static_cast<AST::MacroMatcher *> (&match);
+	auto &matches = matcher->get_matches ();
+	if (!matches.empty ())
+	  error_locus = matches.front ()->get_match_locus ();
+	break;
+      }
+    default:
+      break;
+    }
+
+  rust_error_at (error_locus, "fragment not allowed after %<%s%> fragment",
+		 last_match.get_frag_spec ().as_string ().c_str ());
+
+  return false;
+}
+
+/**
+ * Avoid UB by calling .front() and .back() on empty containers...
+ */
+
+template <typename T>
+static T *
+get_back_ptr (std::vector<std::unique_ptr<T>> &values)
+{
+  if (values.empty ())
+    return nullptr;
+
+  return values.back ().get ();
+}
+
+template <typename T>
+static T *
+get_front_ptr (std::vector<std::unique_ptr<T>> &values)
+{
+  if (values.empty ())
+    return nullptr;
+
+  return values.front ().get ();
+}
+
+bool
+is_match_compatible (AST::MacroMatch &last_match, AST::MacroMatch &match)
+{
+  AST::MacroMatch *new_last = nullptr;
+
+  // We want to "extract" the concerning matches. In cases such as matchers and
+  // repetitions, we actually store multiple matchers, but are only concerned
+  // about the follow-set ambiguities of certain elements.
+  // There are some cases where we can short-circuit the algorithm: There will
+  // never be restrictions on token literals, or on certain fragments which do
+  // not have a set of follow-restrictions.
+
+  switch (last_match.get_macro_match_type ())
+    {
+      // This is our main stop condition: When we are finally looking at the
+      // last match (or its actual last component), and it is a fragment, it
+      // may contain some follow up restrictions.
+      case AST::MacroMatch::Fragment: {
+	auto fragment = static_cast<AST::MacroMatchFragment *> (&last_match);
+	if (fragment->get_frag_spec ().has_follow_set_restrictions ())
+	  return peculiar_fragment_match_compatible (*fragment, match);
+	else
+	  return true;
+      }
+      case AST::MacroMatch::Repetition: {
+	// A repetition on the left hand side means we want to make sure the
+	// last match of the repetition is compatible with the new match
+	auto repetition
+	  = static_cast<AST::MacroMatchRepetition *> (&last_match);
+	new_last = get_back_ptr (repetition->get_matches ());
+	// If there are no matches in the matcher, then it can be followed by
+	// anything
+	if (!new_last)
+	  return true;
+	break;
+      }
+      case AST::MacroMatch::Matcher: {
+	// Likewise for another matcher
+	auto matcher = static_cast<AST::MacroMatcher *> (&last_match);
+	new_last = get_back_ptr (matcher->get_matches ());
+	// If there are no matches in the matcher, then it can be followed by
+	// anything
+	if (!new_last)
+	  return true;
+	break;
+      }
+    case AST::MacroMatch::Tok:
+      return true;
+    }
+
+  rust_assert (new_last);
+
+  // We check recursively until we find a terminating condition
+  // FIXME: Does expansion depth/limit matter here?
+  return is_match_compatible (*new_last, match);
+}
 } // namespace Rust
diff --git a/gcc/rust/parse/rust-parse.h b/gcc/rust/parse/rust-parse.h
index 9a31fb6d625..c86d194c159 100644
--- a/gcc/rust/parse/rust-parse.h
+++ b/gcc/rust/parse/rust-parse.h
@@ -702,6 +702,18 @@ private:
 std::string
 extract_module_path (const AST::AttrVec &inner_attrs,
 		     const AST::AttrVec &outer_attrs, const std::string &name);
+
+/**
+ * Check if a MacroMatch is allowed to follow the last parsed MacroMatch.
+ *
+ * @param last_match Last matcher parsed before the current match
+ * @param match Current matcher to check
+ *
+ * @return true if the follow-up is valid, false otherwise
+ */
+bool
+is_match_compatible (AST::MacroMatch &last_match,
+		     AST::MacroMatch &current_match);
 } // namespace Rust
 
 // as now template, include implementations of all methods
diff --git a/gcc/testsuite/rust/compile/macro27.rs b/gcc/testsuite/rust/compile/macro27.rs
new file mode 100644
index 00000000000..c477d979ec6
--- /dev/null
+++ b/gcc/testsuite/rust/compile/macro27.rs
@@ -0,0 +1,8 @@
+macro_rules! m {
+    ($a:expr tok) => {
+        // { dg-error "token .tok. is not allowed after .expr. fragment" "" { target *-*-* } .-1 }
+        // { dg-error "required first macro rule in macro rules definition could not be parsed" "" { target *-*-* } .-2 }
+        // { dg-error "failed to parse item in crate" "" { target *-*-* } .-3 }
+        $a
+    };
+}
diff --git a/gcc/testsuite/rust/compile/macro28.rs b/gcc/testsuite/rust/compile/macro28.rs
new file mode 100644
index 00000000000..ff557da4f2c
--- /dev/null
+++ b/gcc/testsuite/rust/compile/macro28.rs
@@ -0,0 +1,8 @@
+macro_rules! m {
+    ($a:expr $(tok $es:expr)*) => {
+        // { dg-error "fragment not allowed after .expr. fragment" "" { target *-*-* } .-1 }
+        // { dg-error "required first macro rule in macro rules definition could not be parsed" "" { target *-*-* } .-2 }
+        // { dg-error "failed to parse item in crate" "" { target *-*-* } .-3 }
+        $a
+    };
+}
diff --git a/gcc/testsuite/rust/compile/macro29.rs b/gcc/testsuite/rust/compile/macro29.rs
new file mode 100644
index 00000000000..b26ddb13b41
--- /dev/null
+++ b/gcc/testsuite/rust/compile/macro29.rs
@@ -0,0 +1,8 @@
+macro_rules! m {
+    ($($es:expr)* tok) => {
+        // { dg-error "token .tok. is not allowed after .expr. fragment" "" { target *-*-* } .-1 }
+        // { dg-error "required first macro rule in macro rules definition could not be parsed" "" { target *-*-* } .-2 }
+        // { dg-error "failed to parse item in crate" "" { target *-*-* } .-3 }
+        $a
+    };
+}
diff --git a/gcc/testsuite/rust/compile/macro30.rs b/gcc/testsuite/rust/compile/macro30.rs
new file mode 100644
index 00000000000..da4645a434b
--- /dev/null
+++ b/gcc/testsuite/rust/compile/macro30.rs
@@ -0,0 +1,8 @@
+macro_rules! m {
+    ($e:expr $f:expr) => {
+        // { dg-error "fragment not allowed after .expr. fragment" "" { target *-*-* } .-1 }
+        // { dg-error "required first macro rule in macro rules definition could not be parsed" "" { target *-*-* } .-2 }
+        // { dg-error "failed to parse item in crate" "" { target *-*-* } .-3 }
+        $e
+    };
+}
diff --git a/gcc/testsuite/rust/compile/macro31.rs b/gcc/testsuite/rust/compile/macro31.rs
new file mode 100644
index 00000000000..28d84ac43f1
--- /dev/null
+++ b/gcc/testsuite/rust/compile/macro31.rs
@@ -0,0 +1,8 @@
+macro_rules! m {
+    ($($e:expr)* $($f:expr)*) => {
+        // { dg-error "fragment not allowed after .expr. fragment" "" { target *-*-* } .-1 }
+        // { dg-error "required first macro rule in macro rules definition could not be parsed" "" { target *-*-* } .-2 }
+        // { dg-error "failed to parse item in crate" "" { target *-*-* } .-3 }
+        $e
+    };
+}
diff --git a/gcc/testsuite/rust/execute/torture/macros20.rs b/gcc/testsuite/rust/execute/torture/macros20.rs
index fc116d089c4..97317a0879e 100644
--- a/gcc/testsuite/rust/execute/torture/macros20.rs
+++ b/gcc/testsuite/rust/execute/torture/macros20.rs
@@ -1,6 +1,6 @@
 macro_rules! add {
-    ($e:expr big_tok $($es:expr) big_tok *) => {
-        $e + add!($($es) big_tok *)
+    ($e:expr , $($es:expr) , *) => {
+        $e + add!($($es) , *)
     };
     ($e:expr) => {
         $e
@@ -8,7 +8,7 @@ macro_rules! add {
 }
 
 fn main() -> i32 {
-    let a = add!(15 big_tok 2 big_tok 9); // 26
+    let a = add!(15, 2, 9); // 26
 
     a - 26
 }


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2022-06-08 12:22 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-06-08 12:22 [gcc/devel/rust/master] macros: Add base functions to check for follow-set ambiguities Thomas Schwinge

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).