From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1643) id B1B4F3858D32; Thu, 1 Dec 2022 08:12:16 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org B1B4F3858D32 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1669882336; bh=WkoEi1VoqsobfqmCliTa0SlPP56iBetwYkCWzQtoqpw=; h=From:To:Subject:Date:From; b=ZFCwbjfctogLDkHck0uHpvxSJpKc2gWI3RImtwGoNs0iuhoD88LmOsw7DVInJ71hv HE0lxHNzHP6RCF5H0rJg9qCPNqz6obOKZT1QeFjYfxURGhqfG5z7NGPNmAIEf9bO6b ngM5CRNfkIWjgFSWeV5+mvM4aBct/6aBfN9sUKe8= Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: Thomas Schwinge To: gcc-cvs@gcc.gnu.org Subject: [gcc/devel/rust/master] Improve lexer dump X-Act-Checkin: gcc X-Git-Author: Raiki Tamura X-Git-Refname: refs/heads/devel/rust/master X-Git-Oldrev: 716ae8d024dcddd5000f65fa5c7c0dbd9f03c869 X-Git-Newrev: b43c5d4fd82c220419f9234588fed8131d416fff Message-Id: <20221201081216.B1B4F3858D32@sourceware.org> Date: Thu, 1 Dec 2022 08:12:16 +0000 (GMT) List-Id: https://gcc.gnu.org/g:b43c5d4fd82c220419f9234588fed8131d416fff commit b43c5d4fd82c220419f9234588fed8131d416fff Author: Raiki Tamura Date: Wed Nov 16 17:15:24 2022 +0900 Improve lexer dump Diff: --- gcc/rust/lex/rust-lex.cc | 47 +++++++++++++++++++++++++++++++++++++--- gcc/rust/lex/rust-lex.h | 12 ++++++++-- gcc/rust/parse/rust-parse-impl.h | 41 ----------------------------------- gcc/rust/parse/rust-parse.h | 2 -- gcc/rust/rust-session-manager.cc | 41 +++++++++++++++-------------------- gcc/rust/util/rust-optional.h | 1 + 6 files changed, 72 insertions(+), 72 deletions(-) diff --git a/gcc/rust/lex/rust-lex.cc b/gcc/rust/lex/rust-lex.cc index 692af5d8fb8..ea17ecc731f 100644 --- a/gcc/rust/lex/rust-lex.cc +++ b/gcc/rust/lex/rust-lex.cc @@ -118,13 +118,15 @@ is_non_decimal_int_literal_separator (char character) Lexer::Lexer (const std::string &input) : input (RAIIFile::create_error ()), current_line (1), current_column (1), - line_map (nullptr), raw_input_source (new BufferInputSource (input, 0)), + line_map (nullptr), dump_lex_out (Optional::none ()), + raw_input_source (new BufferInputSource (input, 0)), input_queue{*raw_input_source}, token_queue (TokenSource (this)) {} -Lexer::Lexer (const char *filename, RAIIFile file_input, Linemap *linemap) +Lexer::Lexer (const char *filename, RAIIFile file_input, Linemap *linemap, + Optional dump_lex_opt) : input (std::move (file_input)), current_line (1), current_column (1), - line_map (linemap), + line_map (linemap), dump_lex_out (dump_lex_opt), raw_input_source (new FileInputSource (input.get_raw ())), input_queue{*raw_input_source}, token_queue (TokenSource (this)) { @@ -186,6 +188,45 @@ Lexer::skip_input () skip_input (0); } +void +Lexer::skip_token (int n) +{ + // dump tokens if dump-lex option is enabled + if (dump_lex_out.is_some ()) + dump_and_skip (n); + else + token_queue.skip (n); +} + +void +Lexer::dump_and_skip (int n) +{ + std::ofstream &out = dump_lex_out.get (); + bool found_eof = false; + const_TokenPtr tok; + for (int i = 0; i < n + 1; i++) + { + if (!found_eof) + { + tok = peek_token (); + found_eof |= tok->get_id () == Rust::END_OF_FILE; + + Location loc = tok->get_locus (); + + out << "token_id_to_str (); + out << (tok->has_str () ? (std::string (", text=") + tok->get_str () + + std::string (", typehint=") + + std::string (tok->get_type_hint_str ())) + : "") + << " "; + out << get_line_map ()->to_string (loc) << " "; + } + + token_queue.skip (0); + } +} + void Lexer::replace_current_token (TokenPtr replacement) { diff --git a/gcc/rust/lex/rust-lex.h b/gcc/rust/lex/rust-lex.h index 27120d1546d..c05e2678c3d 100644 --- a/gcc/rust/lex/rust-lex.h +++ b/gcc/rust/lex/rust-lex.h @@ -22,6 +22,7 @@ #include "rust-linemap.h" #include "rust-buffered-queue.h" #include "rust-token.h" +#include "rust-optional.h" namespace Rust { // Simple wrapper for FILE* that simplifies destruction. @@ -139,7 +140,9 @@ private: public: // Construct lexer with input file and filename provided - Lexer (const char *filename, RAIIFile input, Linemap *linemap); + Lexer (const char *filename, RAIIFile input, Linemap *linemap, + Optional dump_lex_opt + = Optional::none ()); // Lex the contents of a string instead of a file Lexer (const std::string &input); @@ -161,10 +164,13 @@ public: const_TokenPtr peek_token () { return peek_token (0); } // Advances current token to n + 1 tokens ahead of current position. - void skip_token (int n) { token_queue.skip (n); } + void skip_token (int n); // Skips the current token. void skip_token () { skip_token (0); } + // Dumps and advances by n + 1 tokens. + void dump_and_skip (int n); + // Replaces the current token with a specified token. void replace_current_token (TokenPtr replacement); // FIXME: don't use anymore @@ -197,6 +203,8 @@ private: * allocating new linemap */ static const int max_column_hint = 80; + Optional dump_lex_out; + // Input source wrapper thing. class InputSource { diff --git a/gcc/rust/parse/rust-parse-impl.h b/gcc/rust/parse/rust-parse-impl.h index 54f3c5c25a6..0346ce68a23 100644 --- a/gcc/rust/parse/rust-parse-impl.h +++ b/gcc/rust/parse/rust-parse-impl.h @@ -14887,47 +14887,6 @@ Parser::done_end () return (t->get_id () == RIGHT_CURLY || t->get_id () == END_OF_FILE); } -// Dumps lexer output to stderr. -template -void -Parser::debug_dump_lex_output (std::ostream &out) -{ - /* TODO: a better implementation of "lexer dump" (as in dump what was - * actually tokenised) would actually be to "write" a token to a file every - * time skip_token() here was called. This would reflect the parser - * modifications to the token stream, such as fixing the template angle - * brackets. */ - - const_TokenPtr tok = lexer.peek_token (); - - while (true) - { - if (tok->get_id () == Rust::END_OF_FILE) - break; - - bool has_text = tok->get_id () == Rust::IDENTIFIER - || tok->get_id () == Rust::INT_LITERAL - || tok->get_id () == Rust::FLOAT_LITERAL - || tok->get_id () == Rust::STRING_LITERAL - || tok->get_id () == Rust::CHAR_LITERAL - || tok->get_id () == Rust::BYTE_STRING_LITERAL - || tok->get_id () == Rust::BYTE_CHAR_LITERAL; - - Location loc = tok->get_locus (); - - out << "token_id_to_str (); - out << has_text ? (std::string (", text=") + tok->get_str () - + std::string (", typehint=") - + std::string (tok->get_type_hint_str ())) - : ""; - out << lexer.get_line_map ()->to_string (loc); - - lexer.skip_token (); - tok = lexer.peek_token (); - } -} - // Parses crate and dumps AST to stderr, recursively. template void diff --git a/gcc/rust/parse/rust-parse.h b/gcc/rust/parse/rust-parse.h index e4c5a2c5c9f..8449181b12f 100644 --- a/gcc/rust/parse/rust-parse.h +++ b/gcc/rust/parse/rust-parse.h @@ -671,8 +671,6 @@ public: // Main entry point for parser. std::unique_ptr parse_crate (); - // Dumps all lexer output. - void debug_dump_lex_output (std::ostream &out); void debug_dump_ast_output (AST::Crate &crate, std::ostream &out); // Returns whether any parsing errors have occurred. diff --git a/gcc/rust/rust-session-manager.cc b/gcc/rust/rust-session-manager.cc index 524171fdfd3..513bf50cb88 100644 --- a/gcc/rust/rust-session-manager.cc +++ b/gcc/rust/rust-session-manager.cc @@ -439,7 +439,22 @@ Session::compile_crate (const char *filename) // parse file here /* create lexer and parser - these are file-specific and so aren't instance * variables */ - Lexer lex (filename, std::move (file_wrap), linemap); + Optional dump_lex_opt = Optional::none (); + std::ofstream dump_lex_stream; + if (options.dump_option_enabled (CompileOptions::LEXER_DUMP)) + { + dump_lex_stream.open (kLexDumpFile); + if (dump_lex_stream.fail ()) + { + rust_error_at (Linemap::unknown_location (), + "cannot open %s:%m; ignored", kLexDumpFile); + } + auto stream = Optional::some (dump_lex_stream); + dump_lex_opt = std::move (stream); + } + + Lexer lex (filename, std::move (file_wrap), linemap, dump_lex_opt); + Parser parser (lex); // generate crate from parser @@ -448,11 +463,7 @@ Session::compile_crate (const char *filename) // handle crate name handle_crate_name (*ast_crate.get ()); - // dump options - if (options.dump_option_enabled (CompileOptions::LEXER_DUMP)) - { - dump_lex (parser); - } + // dump options except lexer dump if (options.dump_option_enabled (CompileOptions::PARSER_AST_DUMP)) { dump_ast (parser, *ast_crate.get ()); @@ -819,24 +830,6 @@ Session::expansion (AST::Crate &crate) rust_debug ("finished expansion"); } -void -Session::dump_lex (Parser &parser) const -{ - std::ofstream out; - out.open (kLexDumpFile); - if (out.fail ()) - { - rust_error_at (Linemap::unknown_location (), "cannot open %s:%m; ignored", - kLexDumpFile); - return; - } - - // TODO: rewrite lexer dump or something so that it allows for the crate - // to already be parsed - parser.debug_dump_lex_output (out); - out.close (); -} - void Session::dump_ast (Parser &parser, AST::Crate &crate) const { diff --git a/gcc/rust/util/rust-optional.h b/gcc/rust/util/rust-optional.h index 56465400250..0d87a9c31d1 100644 --- a/gcc/rust/util/rust-optional.h +++ b/gcc/rust/util/rust-optional.h @@ -194,6 +194,7 @@ private: public: Optional (const Optional &other) = default; Optional (Optional &&other) = default; + Optional &operator= (Optional &&other) = default; static Optional some (T &value) {