From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-wr1-x42a.google.com (mail-wr1-x42a.google.com [IPv6:2a00:1450:4864:20::42a]) by sourceware.org (Postfix) with ESMTPS id 543203886C46 for ; Tue, 21 Feb 2023 12:04:37 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.2 sourceware.org 543203886C46 Authentication-Results: sourceware.org; dmarc=none (p=none dis=none) header.from=embecosm.com Authentication-Results: sourceware.org; spf=pass smtp.mailfrom=embecosm.com Received: by mail-wr1-x42a.google.com with SMTP id l25so3760477wrb.3 for ; Tue, 21 Feb 2023 04:04:37 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=embecosm.com; s=google; h=content-transfer-encoding:mime-version:reply-to:references :in-reply-to:message-id:date:subject:cc:to:from:from:to:cc:subject :date:message-id:reply-to; bh=IxHBAILdar9Cv3XQf/HDDc7bXPwJFtHB6rG28jATgZQ=; b=YbTTHaKAuxGNxfZp7RSvb/8IBuqrAPnXV9bMNZsNM5Z0VSkyFLZm7Rncyk0H7nffHz VQFnSBNmq79DlxzHj/0vcw0SghvLR6xdpJspGwjauNJKoMDaAj08SLkrvjUCWdbhUHlk nCWhRJa03UKxUxpI1TsnlO9ywiW8GEs7d3xy/aXwF+VFVYZgV1F/w2k1wTA6/e7PU9Sh Lu4MZYm2ebCzlFzSPqNoRhs/KH6gygVWPJOTZc4Cq7jGSgXt501SJzOUkhyEmEuwOeBw OTDrexRaQr6qmku9kvhzekS5IB/fSc9EciwnjbCppf+bO08Ezgy3CjkaQmjaIdYnhk2x dJwg== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=content-transfer-encoding:mime-version:reply-to:references :in-reply-to:message-id:date:subject:cc:to:from:x-gm-message-state :from:to:cc:subject:date:message-id:reply-to; bh=IxHBAILdar9Cv3XQf/HDDc7bXPwJFtHB6rG28jATgZQ=; b=xa+m0XRd8wrvKUTRoYzVwDaPxKSLOLmOWjMhGTj9gbjTyCaxIt810cF8H120NVTFsq RkiqH514je/Txe7hocLs/60ZIEPeKx1K7UmpY5VGF23VS1x79r8NCPP051a38hrja31s t9Sexyk7kAlzFOOh6N2LcofW8eXfSXFT9gxHcoLUpLhqCClnKEgSA5mY1Y5DMTSLbpol 5Xo/PIG/A9dK7yhKCW8MmuHo5sdo3DJFDhr1NgBLGqBZ4hPi/YxpxiUeUHrXNUjFgfkp JHwE4N+rP+mORbtUD3wo7x8tRTX0jxQt13zs2w2bO9yq1JNbHuNpxwHqAJtnakOT3exl ZztQ== X-Gm-Message-State: AO0yUKXIoznMvFyu1gdwGE1kWQjzADXDFPOH7UELanScGQUAvE4zQCeY AP37Wf4y+jmOVTdkjon/ZzU9ZBB99yiCDz5ooQ== X-Google-Smtp-Source: AK7set/j0L/yk6tn22E1yVXyQnIzIJoVC9z7mOdbOOldk8rTEtHoDq0CsWprJghV7ZsurNYSQMzzvQ== X-Received: by 2002:adf:f348:0:b0:298:4baf:ac8a with SMTP id e8-20020adff348000000b002984bafac8amr2757565wrp.44.1676981076841; Tue, 21 Feb 2023 04:04:36 -0800 (PST) Received: from platypus.localdomain ([62.23.166.218]) by smtp.gmail.com with ESMTPSA id c15-20020adffb4f000000b002c55b0e6ef1sm5013811wrs.4.2023.02.21.04.04.36 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 21 Feb 2023 04:04:36 -0800 (PST) From: arthur.cohen@embecosm.com To: gcc-patches@gcc.gnu.org Cc: gcc-rust@gcc.gnu.org, Raiki Tamura Subject: [committed 092/103] gccrs: Improve lexer dump Date: Tue, 21 Feb 2023 13:02:22 +0100 Message-Id: <20230221120230.596966-93-arthur.cohen@embecosm.com> X-Mailer: git-send-email 2.39.1 In-Reply-To: <20230221120230.596966-1-arthur.cohen@embecosm.com> References: <20230221120230.596966-1-arthur.cohen@embecosm.com> Reply-To: arthur.cohen@embecosm.com MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Spam-Status: No, score=-14.7 required=5.0 tests=BAYES_00,DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF,GIT_PATCH_0,RCVD_IN_DNSWL_NONE,SPF_HELO_NONE,SPF_PASS,TXREP autolearn=ham autolearn_force=no version=3.4.6 X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on server2.sourceware.org List-Id: From: Raiki Tamura gcc/rust/ChangeLog: * lex/rust-lex.cc (Lexer::Lexer): Add `dump_lex` boolean flag. (Lexer::skip_token): Dump tokens if flag is enabled. (Lexer::dump_and_skip): New function. * lex/rust-lex.h: Include optional.h and declare functions. * parse/rust-parse-impl.h (Parser::debug_dump_lex_output): Remove old unused function. * parse/rust-parse.h: Likewise. * rust-session-manager.cc (Session::compile_crate): Pass lexer dump option to lexer. (Session::dump_lex): New function. * util/rust-optional.h: Add missing constructor. Signed-off-by: Raiki Tamura --- gcc/rust/lex/rust-lex.cc | 47 ++++++++++++++++++++++++++++++-- gcc/rust/lex/rust-lex.h | 12 ++++++-- gcc/rust/parse/rust-parse-impl.h | 41 ---------------------------- gcc/rust/parse/rust-parse.h | 2 -- gcc/rust/rust-session-manager.cc | 41 ++++++++++++---------------- gcc/rust/util/rust-optional.h | 1 + 6 files changed, 72 insertions(+), 72 deletions(-) diff --git a/gcc/rust/lex/rust-lex.cc b/gcc/rust/lex/rust-lex.cc index 3467a3160ed..53c7aecd25b 100644 --- a/gcc/rust/lex/rust-lex.cc +++ b/gcc/rust/lex/rust-lex.cc @@ -118,13 +118,15 @@ is_non_decimal_int_literal_separator (char character) Lexer::Lexer (const std::string &input) : input (RAIIFile::create_error ()), current_line (1), current_column (1), - line_map (nullptr), raw_input_source (new BufferInputSource (input, 0)), + line_map (nullptr), dump_lex_out (Optional::none ()), + raw_input_source (new BufferInputSource (input, 0)), input_queue{*raw_input_source}, token_queue (TokenSource (this)) {} -Lexer::Lexer (const char *filename, RAIIFile file_input, Linemap *linemap) +Lexer::Lexer (const char *filename, RAIIFile file_input, Linemap *linemap, + Optional dump_lex_opt) : input (std::move (file_input)), current_line (1), current_column (1), - line_map (linemap), + line_map (linemap), dump_lex_out (dump_lex_opt), raw_input_source (new FileInputSource (input.get_raw ())), input_queue{*raw_input_source}, token_queue (TokenSource (this)) { @@ -186,6 +188,45 @@ Lexer::skip_input () skip_input (0); } +void +Lexer::skip_token (int n) +{ + // dump tokens if dump-lex option is enabled + if (dump_lex_out.is_some ()) + dump_and_skip (n); + else + token_queue.skip (n); +} + +void +Lexer::dump_and_skip (int n) +{ + std::ofstream &out = dump_lex_out.get (); + bool found_eof = false; + const_TokenPtr tok; + for (int i = 0; i < n + 1; i++) + { + if (!found_eof) + { + tok = peek_token (); + found_eof |= tok->get_id () == Rust::END_OF_FILE; + + Location loc = tok->get_locus (); + + out << "token_id_to_str (); + out << (tok->has_str () ? (std::string (", text=") + tok->get_str () + + std::string (", typehint=") + + std::string (tok->get_type_hint_str ())) + : "") + << " "; + out << get_line_map ()->to_string (loc) << " "; + } + + token_queue.skip (0); + } +} + void Lexer::replace_current_token (TokenPtr replacement) { diff --git a/gcc/rust/lex/rust-lex.h b/gcc/rust/lex/rust-lex.h index 6e8c5999f51..a170e91f2cc 100644 --- a/gcc/rust/lex/rust-lex.h +++ b/gcc/rust/lex/rust-lex.h @@ -22,6 +22,7 @@ #include "rust-linemap.h" #include "rust-buffered-queue.h" #include "rust-token.h" +#include "rust-optional.h" namespace Rust { // Simple wrapper for FILE* that simplifies destruction. @@ -139,7 +140,9 @@ private: public: // Construct lexer with input file and filename provided - Lexer (const char *filename, RAIIFile input, Linemap *linemap); + Lexer (const char *filename, RAIIFile input, Linemap *linemap, + Optional dump_lex_opt + = Optional::none ()); // Lex the contents of a string instead of a file Lexer (const std::string &input); @@ -161,10 +164,13 @@ public: const_TokenPtr peek_token () { return peek_token (0); } // Advances current token to n + 1 tokens ahead of current position. - void skip_token (int n) { token_queue.skip (n); } + void skip_token (int n); // Skips the current token. void skip_token () { skip_token (0); } + // Dumps and advances by n + 1 tokens. + void dump_and_skip (int n); + // Replaces the current token with a specified token. void replace_current_token (TokenPtr replacement); // FIXME: don't use anymore @@ -197,6 +203,8 @@ private: * allocating new linemap */ static const int max_column_hint = 80; + Optional dump_lex_out; + // Input source wrapper thing. class InputSource { diff --git a/gcc/rust/parse/rust-parse-impl.h b/gcc/rust/parse/rust-parse-impl.h index ee0282bdab3..cbd40efcc9b 100644 --- a/gcc/rust/parse/rust-parse-impl.h +++ b/gcc/rust/parse/rust-parse-impl.h @@ -14897,47 +14897,6 @@ Parser::done_end () return (t->get_id () == RIGHT_CURLY || t->get_id () == END_OF_FILE); } -// Dumps lexer output to stderr. -template -void -Parser::debug_dump_lex_output (std::ostream &out) -{ - /* TODO: a better implementation of "lexer dump" (as in dump what was - * actually tokenised) would actually be to "write" a token to a file every - * time skip_token() here was called. This would reflect the parser - * modifications to the token stream, such as fixing the template angle - * brackets. */ - - const_TokenPtr tok = lexer.peek_token (); - - while (true) - { - if (tok->get_id () == Rust::END_OF_FILE) - break; - - bool has_text = tok->get_id () == Rust::IDENTIFIER - || tok->get_id () == Rust::INT_LITERAL - || tok->get_id () == Rust::FLOAT_LITERAL - || tok->get_id () == Rust::STRING_LITERAL - || tok->get_id () == Rust::CHAR_LITERAL - || tok->get_id () == Rust::BYTE_STRING_LITERAL - || tok->get_id () == Rust::BYTE_CHAR_LITERAL; - - Location loc = tok->get_locus (); - - out << "token_id_to_str (); - out << has_text ? (std::string (", text=") + tok->get_str () - + std::string (", typehint=") - + std::string (tok->get_type_hint_str ())) - : ""; - out << lexer.get_line_map ()->to_string (loc); - - lexer.skip_token (); - tok = lexer.peek_token (); - } -} - // Parses crate and dumps AST to stderr, recursively. template void diff --git a/gcc/rust/parse/rust-parse.h b/gcc/rust/parse/rust-parse.h index e4c5a2c5c9f..8449181b12f 100644 --- a/gcc/rust/parse/rust-parse.h +++ b/gcc/rust/parse/rust-parse.h @@ -671,8 +671,6 @@ public: // Main entry point for parser. std::unique_ptr parse_crate (); - // Dumps all lexer output. - void debug_dump_lex_output (std::ostream &out); void debug_dump_ast_output (AST::Crate &crate, std::ostream &out); // Returns whether any parsing errors have occurred. diff --git a/gcc/rust/rust-session-manager.cc b/gcc/rust/rust-session-manager.cc index 6f51bd2e5a1..732aabe1f26 100644 --- a/gcc/rust/rust-session-manager.cc +++ b/gcc/rust/rust-session-manager.cc @@ -455,7 +455,22 @@ Session::compile_crate (const char *filename) // parse file here /* create lexer and parser - these are file-specific and so aren't instance * variables */ - Lexer lex (filename, std::move (file_wrap), linemap); + Optional dump_lex_opt = Optional::none (); + std::ofstream dump_lex_stream; + if (options.dump_option_enabled (CompileOptions::LEXER_DUMP)) + { + dump_lex_stream.open (kLexDumpFile); + if (dump_lex_stream.fail ()) + { + rust_error_at (Linemap::unknown_location (), + "cannot open %s:%m; ignored", kLexDumpFile); + } + auto stream = Optional::some (dump_lex_stream); + dump_lex_opt = std::move (stream); + } + + Lexer lex (filename, std::move (file_wrap), linemap, dump_lex_opt); + Parser parser (lex); // generate crate from parser @@ -464,11 +479,7 @@ Session::compile_crate (const char *filename) // handle crate name handle_crate_name (*ast_crate.get ()); - // dump options - if (options.dump_option_enabled (CompileOptions::LEXER_DUMP)) - { - dump_lex (parser); - } + // dump options except lexer dump if (options.dump_option_enabled (CompileOptions::PARSER_AST_DUMP)) { dump_ast (parser, *ast_crate.get ()); @@ -835,24 +846,6 @@ Session::expansion (AST::Crate &crate) rust_debug ("finished expansion"); } -void -Session::dump_lex (Parser &parser) const -{ - std::ofstream out; - out.open (kLexDumpFile); - if (out.fail ()) - { - rust_error_at (Linemap::unknown_location (), "cannot open %s:%m; ignored", - kLexDumpFile); - return; - } - - // TODO: rewrite lexer dump or something so that it allows for the crate - // to already be parsed - parser.debug_dump_lex_output (out); - out.close (); -} - void Session::dump_ast (Parser &parser, AST::Crate &crate) const { diff --git a/gcc/rust/util/rust-optional.h b/gcc/rust/util/rust-optional.h index eba3a7886ac..d7349820b38 100644 --- a/gcc/rust/util/rust-optional.h +++ b/gcc/rust/util/rust-optional.h @@ -194,6 +194,7 @@ private: public: Optional (const Optional &other) = default; Optional (Optional &&other) = default; + Optional &operator= (Optional &&other) = default; static Optional some (T &value) { -- 2.39.1