public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc/devel/rust/master] lexer: Add ability to lex strings directly
@ 2022-06-08 12:13 Thomas Schwinge
  0 siblings, 0 replies; only message in thread
From: Thomas Schwinge @ 2022-06-08 12:13 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:ede68b7ba64ace5fbcb900b37c7e8572a2ddefb5

commit ede68b7ba64ace5fbcb900b37c7e8572a2ddefb5
Author: Arthur Cohen <arthur.cohen@embecosm.com>
Date:   Sat Feb 26 11:10:38 2022 +0100

    lexer: Add ability to lex strings directly
    
    By allowing us to parse strings directly instead of necessarily a
    filename, we are now able to reuse the parser and lexer in various
    places of the compiler. This is useful for -frust-cfg, but may also come
    in handy for
    other compiler mechanics such as the include!() builtin macro, where we
    do not actually want location info but just a stream of tokens.

Diff:
---
 gcc/rust/lex/rust-lex.cc         |  9 +++-
 gcc/rust/lex/rust-lex.h          | 22 +++++++++
 gcc/rust/rust-session-manager.cc | 97 +++++++++++++++++++++-------------------
 gcc/rust/rust-session-manager.h  |  2 +-
 gcc/rust/util/rust-hir-map.cc    |  5 ++-
 5 files changed, 84 insertions(+), 51 deletions(-)

diff --git a/gcc/rust/lex/rust-lex.cc b/gcc/rust/lex/rust-lex.cc
index c23088fab06..f620e15e1d3 100644
--- a/gcc/rust/lex/rust-lex.cc
+++ b/gcc/rust/lex/rust-lex.cc
@@ -128,7 +128,8 @@ Lexer::Lexer (const char *filename, RAIIFile file_input, Linemap *linemap)
     token_queue (TokenSource (this))
 {
   // inform line_table that file is being entered and is in line 1
-  line_map->start_file (filename, current_line);
+  if (linemap)
+    line_map->start_file (filename, current_line);
 }
 
 Lexer::~Lexer ()
@@ -152,7 +153,11 @@ Lexer::~Lexer ()
 Location
 Lexer::get_current_location ()
 {
-  return line_map->get_location (current_column);
+  if (line_map)
+    return line_map->get_location (current_column);
+  else
+    // If we have no linemap, we're lexing something without proper locations
+    return Location ();
 }
 
 int
diff --git a/gcc/rust/lex/rust-lex.h b/gcc/rust/lex/rust-lex.h
index 0ae07fe2cd1..b0d7494f063 100644
--- a/gcc/rust/lex/rust-lex.h
+++ b/gcc/rust/lex/rust-lex.h
@@ -23,6 +23,7 @@
 #include "rust-buffered-queue.h"
 #include "rust-token.h"
 
+#include <cstdio>
 #include <utility>
 #include <tuple>
 
@@ -49,6 +50,13 @@ public:
       file = fopen (filename, "r");
   }
 
+  /**
+   * Create a RAIIFile from an existing instance of FILE*
+   */
+  RAIIFile (FILE *raw, const char *filename = nullptr)
+    : file (raw), filename (filename)
+  {}
+
   RAIIFile (const RAIIFile &other) = delete;
   RAIIFile &operator= (const RAIIFile &other) = delete;
 
@@ -57,6 +65,7 @@ public:
   {
     other.file = nullptr;
   }
+
   RAIIFile &operator= (RAIIFile &&other)
   {
     close ();
@@ -132,6 +141,19 @@ public:
   Lexer (const char *filename, RAIIFile input, Linemap *linemap);
   ~Lexer ();
 
+  /**
+   * Lex the contents of a string instead of a file
+   */
+  static Lexer lex_string (std::string &input)
+  {
+    // We can perform this ugly cast to a non-const char* since we're only
+    // *reading* the string. This would not be valid if we were doing any
+    // modification to it.
+    auto string_file = fmemopen (&input[0], input.length (), "r");
+
+    return Lexer (nullptr, RAIIFile (string_file), nullptr);
+  }
+
   // don't allow copy semantics (for now, at least)
   Lexer (const Lexer &other) = delete;
   Lexer &operator= (const Lexer &other) = delete;
diff --git a/gcc/rust/rust-session-manager.cc b/gcc/rust/rust-session-manager.cc
index cd2c5902481..54a64435a60 100644
--- a/gcc/rust/rust-session-manager.cc
+++ b/gcc/rust/rust-session-manager.cc
@@ -366,9 +366,11 @@ Session::handle_option (
       Compile::Mangler::set_mangling (flag_rust_mangling);
       break;
 
-    case OPT_frust_cfg_:
-      ret = handle_cfg_option (std::string (arg));
-      break;
+      case OPT_frust_cfg_: {
+	auto string_arg = std::string (arg);
+	ret = handle_cfg_option (string_arg);
+	break;
+      }
 
     default:
       break;
@@ -378,7 +380,7 @@ Session::handle_option (
 }
 
 bool
-Session::handle_cfg_option (const std::string &input)
+Session::handle_cfg_option (std::string &input)
 {
   std::string key;
   std::string value;
@@ -402,8 +404,8 @@ Session::handle_cfg_option (const std::string &input)
   return true;
 }
 
-/* Enables a certain dump depending on the name passed in. Returns true if name
- * is valid, false otherwise. */
+/* Enables a certain dump depending on the name passed in. Returns true if
+ * name is valid, false otherwise. */
 bool
 Session::enable_dump (std::string arg)
 {
@@ -532,8 +534,8 @@ Session::parse_file (const char *filename)
    * line into crate root)
    *  - injection (some lint checks or dummy, register builtin macros, crate
    * injection)
-   *  - expansion (expands all macros, maybe build test harness, AST validation,
-   * maybe macro crate)
+   *  - expansion (expands all macros, maybe build test harness, AST
+   * validation, maybe macro crate)
    *  - resolution (name resolution, type resolution, maybe feature checking,
    * maybe buffered lints)
    *  TODO not done */
@@ -603,8 +605,8 @@ Session::parse_file (const char *filename)
   if (saw_errors ())
     return;
 
-  // scan unused has to be done after type resolution since methods are resolved
-  // at that point
+  // scan unused has to be done after type resolution since methods are
+  // resolved at that point
   Resolver::ScanUnused::Scan ();
 
   if (saw_errors ())
@@ -638,11 +640,11 @@ Session::debug_dump_load_crates (Parser<Lexer> &parser)
 
   /* TODO: search through inner attrs and see whether any of those attr paths
    * contain "no_core", "no_std", "compiler_builtins". If so/not, save certain
-   * crate names. In these names, insert items at beginning of crate items. This
-   * is crate injection. Also, inject prelude use decl at beginning (first name
-   * is assumed to be prelude - prelude is a use decl automatically generated to
-   * enable using Option and Copy without qualifying it or importing it via
-   * 'use' manually) */
+   * crate names. In these names, insert items at beginning of crate items.
+   * This is crate injection. Also, inject prelude use decl at beginning
+   * (first name is assumed to be prelude - prelude is a use decl
+   * automatically generated to enable using Option and Copy without
+   * qualifying it or importing it via 'use' manually) */
 
   std::vector<std::string> crate_names;
   for (const auto &item : crate.items)
@@ -695,8 +697,8 @@ Session::injection (AST::Crate &crate)
 
   // register builtin macros
   /* In rustc, builtin macros are divided into 3 categories depending on use -
-   * "bang" macros, "attr" macros, and "derive" macros. I think the meanings of
-   * these categories should be fairly obvious to anyone who has used rust.
+   * "bang" macros, "attr" macros, and "derive" macros. I think the meanings
+   * of these categories should be fairly obvious to anyone who has used rust.
    * Builtin macro list by category: Bang
    *      - asm
    *      - assert
@@ -739,8 +741,8 @@ Session::injection (AST::Crate &crate)
    * rustc also has a "quote" macro that is defined differently and is
    * supposedly not stable so eh. */
   /* TODO: actually implement injection of these macros. In particular, derive
-   * macros, cfg, and test should be prioritised since they seem to be used the
-   * most. */
+   * macros, cfg, and test should be prioritised since they seem to be used
+   * the most. */
 
   // crate injection
   std::vector<std::string> names;
@@ -804,11 +806,11 @@ Session::injection (AST::Crate &crate)
   crate.items.insert (crate.items.begin (), std::move (use_decl));
 
   /* TODO: potentially add checking attribute crate type? I can't figure out
-   * what this does currently comment says "Unconditionally collect crate types
-   * from attributes to make them used", which presumably refers to checking the
-   * linkage info by "crate_type". It also seems to ensure that an invalid crate
-   * type is not specified, so maybe just do that. Valid crate types: bin lib
-   * dylib staticlib cdylib rlib proc-macro */
+   * what this does currently comment says "Unconditionally collect crate
+   * types from attributes to make them used", which presumably refers to
+   * checking the linkage info by "crate_type". It also seems to ensure that
+   * an invalid crate type is not specified, so maybe just do that. Valid
+   * crate types: bin lib dylib staticlib cdylib rlib proc-macro */
 
   rust_debug ("finished injection");
 }
@@ -818,8 +820,8 @@ Session::expansion (AST::Crate &crate)
 {
   rust_debug ("started expansion");
 
-  /* rustc has a modification to windows PATH temporarily here, which may end up
-   * being required */
+  /* rustc has a modification to windows PATH temporarily here, which may end
+   * up being required */
 
   // create macro expansion config?
   // if not, would at least have to configure recursion_limit
@@ -1036,10 +1038,10 @@ TargetOptions::enable_implicit_feature_reqs (std::string feature)
  * [types/values] or absolute paths)
  *  - HIR lower (convert modified AST to simpler HIR [both expressions and
  * module tree])
- *  - resolve type aliases (replace any usages of type aliases with actual type
- * [except associated types])
- *  - resolve bind (iterate HIR tree and set binding annotations on all concrete
- * types [avoids path lookups later])
+ *  - resolve type aliases (replace any usages of type aliases with actual
+ * type [except associated types])
+ *  - resolve bind (iterate HIR tree and set binding annotations on all
+ * concrete types [avoids path lookups later])
  *  - resolve HIR markings (generate "markings" [e.g. for Copy/Send/Sync/...]
  * for all types
  *  - sort impls (small pass - sort impls into groups)
@@ -1059,8 +1061,8 @@ TargetOptions::enable_implicit_feature_reqs (std::string feature)
  * function calls)
  *  - expand HIR reborrows (apply reborrow rules [taking '&mut *v' instead of
  * 'v'])
- *  - expand HIR erasedtype (replace all erased types 'impl Trait' with the true
- * type)
+ *  - expand HIR erasedtype (replace all erased types 'impl Trait' with the
+ * true type)
  *  - typecheck expressions (validate - double check that previous passes
  * haven't broke type system rules)
  *  - lower MIR (convert HIR exprs into a control-flow graph [MIR])
@@ -1071,15 +1073,16 @@ TargetOptions::enable_implicit_feature_reqs (std::string feature)
  *  - MIR optimise (perform various simple optimisations on the MIR - constant
  * propagation, dead code elimination, borrow elimination, some inlining)
  *  - MIR validate PO (re-validate the MIR)
- *  - MIR validate full (optionally: perform expensive state-tracking validation
- * on MIR)
- *  - trans enumerate (enumerate all items needed for code generation, primarily
- * types used for generics)
- *  - trans auto impls (create magic trait impls as enumerated in previous pass)
+ *  - MIR validate full (optionally: perform expensive state-tracking
+ * validation on MIR)
+ *  - trans enumerate (enumerate all items needed for code generation,
+ * primarily types used for generics)
+ *  - trans auto impls (create magic trait impls as enumerated in previous
+ * pass)
  *  - trans monomorph (generate monomorphised copies of all functions [with
  * generics replaced with real types])
- *  - MIR optimise inline (run optimisation again, this time with full type info
- * [primarily for inlining])
+ *  - MIR optimise inline (run optimisation again, this time with full type
+ * info [primarily for inlining])
  *  - HIR serialise (write out HIR dump [module tree and generic/inline MIR])
  *  - trans codegen (generate final output file: emit C source file and call C
  * compiler) */
@@ -1087,8 +1090,8 @@ TargetOptions::enable_implicit_feature_reqs (std::string feature)
 /* rustc compile pipeline (basic, in way less detail):
  *  - parse input (parse .rs to AST)
  *  - name resolution, macro expansion, and configuration (process AST
- * recursively, resolving paths, expanding macros, processing #[cfg] nodes [i.e.
- * maybe stripping stuff from AST])
+ * recursively, resolving paths, expanding macros, processing #[cfg] nodes
+ * [i.e. maybe stripping stuff from AST])
  *  - lower to HIR
  *  - type check and other analyses (e.g. privacy checking)
  *  - lower to MIR and post-processing (and do stuff like borrow checking)
@@ -1100,14 +1103,14 @@ TargetOptions::enable_implicit_feature_reqs (std::string feature)
  *  - register plugins (attributes injection, set various options, register
  * lints, load plugins)
  *  - expansion/configure and expand (initial 'cfg' processing, 'loading
- * compiler plugins', syntax expansion, secondary 'cfg' expansion, synthesis of
- * a test harness if required, injection of any std lib dependency and prelude,
- * and name resolution) - actually documented inline
+ * compiler plugins', syntax expansion, secondary 'cfg' expansion, synthesis
+ * of a test harness if required, injection of any std lib dependency and
+ * prelude, and name resolution) - actually documented inline
  *      - seeming pierced-together order: pre-AST expansion lint checks,
  * registering builtin macros, crate injection, then expand all macros, then
- * maybe build test harness, AST validation, maybe create a macro crate (if not
- * rustdoc), name resolution, complete gated feature checking, add all buffered
- * lints
+ * maybe build test harness, AST validation, maybe create a macro crate (if
+ * not rustdoc), name resolution, complete gated feature checking, add all
+ * buffered lints
  *  - create global context (lower to HIR)
  *  - analysis on global context (HIR optimisations? create MIR?)
  *  - code generation
diff --git a/gcc/rust/rust-session-manager.h b/gcc/rust/rust-session-manager.h
index ea0523bef1b..99d16287973 100644
--- a/gcc/rust/rust-session-manager.h
+++ b/gcc/rust/rust-session-manager.h
@@ -288,7 +288,7 @@ private:
   void expansion (AST::Crate &crate);
 
   // handle cfg_option
-  bool handle_cfg_option (const std::string &data);
+  bool handle_cfg_option (std::string &data);
 };
 } // namespace Rust
 
diff --git a/gcc/rust/util/rust-hir-map.cc b/gcc/rust/util/rust-hir-map.cc
index 1c0e8fcb9ca..da0db00e1e7 100644
--- a/gcc/rust/util/rust-hir-map.cc
+++ b/gcc/rust/util/rust-hir-map.cc
@@ -139,7 +139,10 @@ NodeId
 Mappings::get_next_node_id (CrateNum crateNum)
 {
   auto it = nodeIdIter.find (crateNum);
-  rust_assert (it != nodeIdIter.end ());
+  // We're probably *not* parsing actual rust code... but mostly reusing
+  // the parser in another way. Return 0
+  if (it == nodeIdIter.end ())
+    return 0;
 
   auto id = it->second + 1;
   nodeIdIter[crateNum] = id;


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2022-06-08 12:13 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-06-08 12:13 [gcc/devel/rust/master] lexer: Add ability to lex strings directly Thomas Schwinge

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).