From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1643) id 6B67F3816EEC; Wed, 8 Jun 2022 12:13:45 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 6B67F3816EEC Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: Thomas Schwinge To: gcc-cvs@gcc.gnu.org Subject: [gcc/devel/rust/master] lexer: Add ability to lex strings directly X-Act-Checkin: gcc X-Git-Author: Arthur Cohen X-Git-Refname: refs/heads/devel/rust/master X-Git-Oldrev: bf92a1012264f2544e73a7a8dd0ac1e473c7f658 X-Git-Newrev: ede68b7ba64ace5fbcb900b37c7e8572a2ddefb5 Message-Id: <20220608121345.6B67F3816EEC@sourceware.org> Date: Wed, 8 Jun 2022 12:13:45 +0000 (GMT) X-BeenThere: gcc-cvs@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-cvs mailing list List-Unsubscribe: , List-Archive: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 08 Jun 2022 12:13:45 -0000 https://gcc.gnu.org/g:ede68b7ba64ace5fbcb900b37c7e8572a2ddefb5 commit ede68b7ba64ace5fbcb900b37c7e8572a2ddefb5 Author: Arthur Cohen Date: Sat Feb 26 11:10:38 2022 +0100 lexer: Add ability to lex strings directly By allowing us to parse strings directly instead of necessarily a filename, we are now able to reuse the parser and lexer in various places of the compiler. This is useful for -frust-cfg, but may also come in handy for other compiler mechanics such as the include!() builtin macro, where we do not actually want location info but just a stream of tokens. Diff: --- gcc/rust/lex/rust-lex.cc | 9 +++- gcc/rust/lex/rust-lex.h | 22 +++++++++ gcc/rust/rust-session-manager.cc | 97 +++++++++++++++++++++------------------- gcc/rust/rust-session-manager.h | 2 +- gcc/rust/util/rust-hir-map.cc | 5 ++- 5 files changed, 84 insertions(+), 51 deletions(-) diff --git a/gcc/rust/lex/rust-lex.cc b/gcc/rust/lex/rust-lex.cc index c23088fab06..f620e15e1d3 100644 --- a/gcc/rust/lex/rust-lex.cc +++ b/gcc/rust/lex/rust-lex.cc @@ -128,7 +128,8 @@ Lexer::Lexer (const char *filename, RAIIFile file_input, Linemap *linemap) token_queue (TokenSource (this)) { // inform line_table that file is being entered and is in line 1 - line_map->start_file (filename, current_line); + if (linemap) + line_map->start_file (filename, current_line); } Lexer::~Lexer () @@ -152,7 +153,11 @@ Lexer::~Lexer () Location Lexer::get_current_location () { - return line_map->get_location (current_column); + if (line_map) + return line_map->get_location (current_column); + else + // If we have no linemap, we're lexing something without proper locations + return Location (); } int diff --git a/gcc/rust/lex/rust-lex.h b/gcc/rust/lex/rust-lex.h index 0ae07fe2cd1..b0d7494f063 100644 --- a/gcc/rust/lex/rust-lex.h +++ b/gcc/rust/lex/rust-lex.h @@ -23,6 +23,7 @@ #include "rust-buffered-queue.h" #include "rust-token.h" +#include #include #include @@ -49,6 +50,13 @@ public: file = fopen (filename, "r"); } + /** + * Create a RAIIFile from an existing instance of FILE* + */ + RAIIFile (FILE *raw, const char *filename = nullptr) + : file (raw), filename (filename) + {} + RAIIFile (const RAIIFile &other) = delete; RAIIFile &operator= (const RAIIFile &other) = delete; @@ -57,6 +65,7 @@ public: { other.file = nullptr; } + RAIIFile &operator= (RAIIFile &&other) { close (); @@ -132,6 +141,19 @@ public: Lexer (const char *filename, RAIIFile input, Linemap *linemap); ~Lexer (); + /** + * Lex the contents of a string instead of a file + */ + static Lexer lex_string (std::string &input) + { + // We can perform this ugly cast to a non-const char* since we're only + // *reading* the string. This would not be valid if we were doing any + // modification to it. + auto string_file = fmemopen (&input[0], input.length (), "r"); + + return Lexer (nullptr, RAIIFile (string_file), nullptr); + } + // don't allow copy semantics (for now, at least) Lexer (const Lexer &other) = delete; Lexer &operator= (const Lexer &other) = delete; diff --git a/gcc/rust/rust-session-manager.cc b/gcc/rust/rust-session-manager.cc index cd2c5902481..54a64435a60 100644 --- a/gcc/rust/rust-session-manager.cc +++ b/gcc/rust/rust-session-manager.cc @@ -366,9 +366,11 @@ Session::handle_option ( Compile::Mangler::set_mangling (flag_rust_mangling); break; - case OPT_frust_cfg_: - ret = handle_cfg_option (std::string (arg)); - break; + case OPT_frust_cfg_: { + auto string_arg = std::string (arg); + ret = handle_cfg_option (string_arg); + break; + } default: break; @@ -378,7 +380,7 @@ Session::handle_option ( } bool -Session::handle_cfg_option (const std::string &input) +Session::handle_cfg_option (std::string &input) { std::string key; std::string value; @@ -402,8 +404,8 @@ Session::handle_cfg_option (const std::string &input) return true; } -/* Enables a certain dump depending on the name passed in. Returns true if name - * is valid, false otherwise. */ +/* Enables a certain dump depending on the name passed in. Returns true if + * name is valid, false otherwise. */ bool Session::enable_dump (std::string arg) { @@ -532,8 +534,8 @@ Session::parse_file (const char *filename) * line into crate root) * - injection (some lint checks or dummy, register builtin macros, crate * injection) - * - expansion (expands all macros, maybe build test harness, AST validation, - * maybe macro crate) + * - expansion (expands all macros, maybe build test harness, AST + * validation, maybe macro crate) * - resolution (name resolution, type resolution, maybe feature checking, * maybe buffered lints) * TODO not done */ @@ -603,8 +605,8 @@ Session::parse_file (const char *filename) if (saw_errors ()) return; - // scan unused has to be done after type resolution since methods are resolved - // at that point + // scan unused has to be done after type resolution since methods are + // resolved at that point Resolver::ScanUnused::Scan (); if (saw_errors ()) @@ -638,11 +640,11 @@ Session::debug_dump_load_crates (Parser &parser) /* TODO: search through inner attrs and see whether any of those attr paths * contain "no_core", "no_std", "compiler_builtins". If so/not, save certain - * crate names. In these names, insert items at beginning of crate items. This - * is crate injection. Also, inject prelude use decl at beginning (first name - * is assumed to be prelude - prelude is a use decl automatically generated to - * enable using Option and Copy without qualifying it or importing it via - * 'use' manually) */ + * crate names. In these names, insert items at beginning of crate items. + * This is crate injection. Also, inject prelude use decl at beginning + * (first name is assumed to be prelude - prelude is a use decl + * automatically generated to enable using Option and Copy without + * qualifying it or importing it via 'use' manually) */ std::vector crate_names; for (const auto &item : crate.items) @@ -695,8 +697,8 @@ Session::injection (AST::Crate &crate) // register builtin macros /* In rustc, builtin macros are divided into 3 categories depending on use - - * "bang" macros, "attr" macros, and "derive" macros. I think the meanings of - * these categories should be fairly obvious to anyone who has used rust. + * "bang" macros, "attr" macros, and "derive" macros. I think the meanings + * of these categories should be fairly obvious to anyone who has used rust. * Builtin macro list by category: Bang * - asm * - assert @@ -739,8 +741,8 @@ Session::injection (AST::Crate &crate) * rustc also has a "quote" macro that is defined differently and is * supposedly not stable so eh. */ /* TODO: actually implement injection of these macros. In particular, derive - * macros, cfg, and test should be prioritised since they seem to be used the - * most. */ + * macros, cfg, and test should be prioritised since they seem to be used + * the most. */ // crate injection std::vector names; @@ -804,11 +806,11 @@ Session::injection (AST::Crate &crate) crate.items.insert (crate.items.begin (), std::move (use_decl)); /* TODO: potentially add checking attribute crate type? I can't figure out - * what this does currently comment says "Unconditionally collect crate types - * from attributes to make them used", which presumably refers to checking the - * linkage info by "crate_type". It also seems to ensure that an invalid crate - * type is not specified, so maybe just do that. Valid crate types: bin lib - * dylib staticlib cdylib rlib proc-macro */ + * what this does currently comment says "Unconditionally collect crate + * types from attributes to make them used", which presumably refers to + * checking the linkage info by "crate_type". It also seems to ensure that + * an invalid crate type is not specified, so maybe just do that. Valid + * crate types: bin lib dylib staticlib cdylib rlib proc-macro */ rust_debug ("finished injection"); } @@ -818,8 +820,8 @@ Session::expansion (AST::Crate &crate) { rust_debug ("started expansion"); - /* rustc has a modification to windows PATH temporarily here, which may end up - * being required */ + /* rustc has a modification to windows PATH temporarily here, which may end + * up being required */ // create macro expansion config? // if not, would at least have to configure recursion_limit @@ -1036,10 +1038,10 @@ TargetOptions::enable_implicit_feature_reqs (std::string feature) * [types/values] or absolute paths) * - HIR lower (convert modified AST to simpler HIR [both expressions and * module tree]) - * - resolve type aliases (replace any usages of type aliases with actual type - * [except associated types]) - * - resolve bind (iterate HIR tree and set binding annotations on all concrete - * types [avoids path lookups later]) + * - resolve type aliases (replace any usages of type aliases with actual + * type [except associated types]) + * - resolve bind (iterate HIR tree and set binding annotations on all + * concrete types [avoids path lookups later]) * - resolve HIR markings (generate "markings" [e.g. for Copy/Send/Sync/...] * for all types * - sort impls (small pass - sort impls into groups) @@ -1059,8 +1061,8 @@ TargetOptions::enable_implicit_feature_reqs (std::string feature) * function calls) * - expand HIR reborrows (apply reborrow rules [taking '&mut *v' instead of * 'v']) - * - expand HIR erasedtype (replace all erased types 'impl Trait' with the true - * type) + * - expand HIR erasedtype (replace all erased types 'impl Trait' with the + * true type) * - typecheck expressions (validate - double check that previous passes * haven't broke type system rules) * - lower MIR (convert HIR exprs into a control-flow graph [MIR]) @@ -1071,15 +1073,16 @@ TargetOptions::enable_implicit_feature_reqs (std::string feature) * - MIR optimise (perform various simple optimisations on the MIR - constant * propagation, dead code elimination, borrow elimination, some inlining) * - MIR validate PO (re-validate the MIR) - * - MIR validate full (optionally: perform expensive state-tracking validation - * on MIR) - * - trans enumerate (enumerate all items needed for code generation, primarily - * types used for generics) - * - trans auto impls (create magic trait impls as enumerated in previous pass) + * - MIR validate full (optionally: perform expensive state-tracking + * validation on MIR) + * - trans enumerate (enumerate all items needed for code generation, + * primarily types used for generics) + * - trans auto impls (create magic trait impls as enumerated in previous + * pass) * - trans monomorph (generate monomorphised copies of all functions [with * generics replaced with real types]) - * - MIR optimise inline (run optimisation again, this time with full type info - * [primarily for inlining]) + * - MIR optimise inline (run optimisation again, this time with full type + * info [primarily for inlining]) * - HIR serialise (write out HIR dump [module tree and generic/inline MIR]) * - trans codegen (generate final output file: emit C source file and call C * compiler) */ @@ -1087,8 +1090,8 @@ TargetOptions::enable_implicit_feature_reqs (std::string feature) /* rustc compile pipeline (basic, in way less detail): * - parse input (parse .rs to AST) * - name resolution, macro expansion, and configuration (process AST - * recursively, resolving paths, expanding macros, processing #[cfg] nodes [i.e. - * maybe stripping stuff from AST]) + * recursively, resolving paths, expanding macros, processing #[cfg] nodes + * [i.e. maybe stripping stuff from AST]) * - lower to HIR * - type check and other analyses (e.g. privacy checking) * - lower to MIR and post-processing (and do stuff like borrow checking) @@ -1100,14 +1103,14 @@ TargetOptions::enable_implicit_feature_reqs (std::string feature) * - register plugins (attributes injection, set various options, register * lints, load plugins) * - expansion/configure and expand (initial 'cfg' processing, 'loading - * compiler plugins', syntax expansion, secondary 'cfg' expansion, synthesis of - * a test harness if required, injection of any std lib dependency and prelude, - * and name resolution) - actually documented inline + * compiler plugins', syntax expansion, secondary 'cfg' expansion, synthesis + * of a test harness if required, injection of any std lib dependency and + * prelude, and name resolution) - actually documented inline * - seeming pierced-together order: pre-AST expansion lint checks, * registering builtin macros, crate injection, then expand all macros, then - * maybe build test harness, AST validation, maybe create a macro crate (if not - * rustdoc), name resolution, complete gated feature checking, add all buffered - * lints + * maybe build test harness, AST validation, maybe create a macro crate (if + * not rustdoc), name resolution, complete gated feature checking, add all + * buffered lints * - create global context (lower to HIR) * - analysis on global context (HIR optimisations? create MIR?) * - code generation diff --git a/gcc/rust/rust-session-manager.h b/gcc/rust/rust-session-manager.h index ea0523bef1b..99d16287973 100644 --- a/gcc/rust/rust-session-manager.h +++ b/gcc/rust/rust-session-manager.h @@ -288,7 +288,7 @@ private: void expansion (AST::Crate &crate); // handle cfg_option - bool handle_cfg_option (const std::string &data); + bool handle_cfg_option (std::string &data); }; } // namespace Rust diff --git a/gcc/rust/util/rust-hir-map.cc b/gcc/rust/util/rust-hir-map.cc index 1c0e8fcb9ca..da0db00e1e7 100644 --- a/gcc/rust/util/rust-hir-map.cc +++ b/gcc/rust/util/rust-hir-map.cc @@ -139,7 +139,10 @@ NodeId Mappings::get_next_node_id (CrateNum crateNum) { auto it = nodeIdIter.find (crateNum); - rust_assert (it != nodeIdIter.end ()); + // We're probably *not* parsing actual rust code... but mostly reusing + // the parser in another way. Return 0 + if (it == nodeIdIter.end ()) + return 0; auto id = it->second + 1; nodeIdIter[crateNum] = id;