public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r13-6245] gccrs: Improve lexer dump
@ 2023-02-21 12:03 Arthur Cohen
  0 siblings, 0 replies; only message in thread
From: Arthur Cohen @ 2023-02-21 12:03 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:0ef795c3278e35cdd274f22bb0ab9783a983af57

commit r13-6245-g0ef795c3278e35cdd274f22bb0ab9783a983af57
Author: Raiki Tamura <tamaron1203@gmail.com>
Date:   Wed Nov 16 17:15:24 2022 +0900

    gccrs: Improve lexer dump
    
    gcc/rust/ChangeLog:
    
            * lex/rust-lex.cc (Lexer::Lexer): Add `dump_lex` boolean flag.
            (Lexer::skip_token): Dump tokens if flag is enabled.
            (Lexer::dump_and_skip): New function.
            * lex/rust-lex.h: Include optional.h and declare functions.
            * parse/rust-parse-impl.h (Parser::debug_dump_lex_output): Remove old
            unused function.
            * parse/rust-parse.h: Likewise.
            * rust-session-manager.cc (Session::compile_crate): Pass lexer dump
            option to lexer.
            (Session::dump_lex): New function.
            * util/rust-optional.h: Add missing constructor.
    
    Signed-off-by: Raiki Tamura <tamaron1203@gmail.com>

Diff:
---
 gcc/rust/lex/rust-lex.cc         | 47 +++++++++++++++++++++++++++++++++++++---
 gcc/rust/lex/rust-lex.h          | 12 ++++++++--
 gcc/rust/parse/rust-parse-impl.h | 41 -----------------------------------
 gcc/rust/parse/rust-parse.h      |  2 --
 gcc/rust/rust-session-manager.cc | 41 +++++++++++++++--------------------
 gcc/rust/util/rust-optional.h    |  1 +
 6 files changed, 72 insertions(+), 72 deletions(-)

diff --git a/gcc/rust/lex/rust-lex.cc b/gcc/rust/lex/rust-lex.cc
index 3467a3160ed..53c7aecd25b 100644
--- a/gcc/rust/lex/rust-lex.cc
+++ b/gcc/rust/lex/rust-lex.cc
@@ -118,13 +118,15 @@ is_non_decimal_int_literal_separator (char character)
 
 Lexer::Lexer (const std::string &input)
   : input (RAIIFile::create_error ()), current_line (1), current_column (1),
-    line_map (nullptr), raw_input_source (new BufferInputSource (input, 0)),
+    line_map (nullptr), dump_lex_out (Optional<std::ofstream &>::none ()),
+    raw_input_source (new BufferInputSource (input, 0)),
     input_queue{*raw_input_source}, token_queue (TokenSource (this))
 {}
 
-Lexer::Lexer (const char *filename, RAIIFile file_input, Linemap *linemap)
+Lexer::Lexer (const char *filename, RAIIFile file_input, Linemap *linemap,
+	      Optional<std::ofstream &> dump_lex_opt)
   : input (std::move (file_input)), current_line (1), current_column (1),
-    line_map (linemap),
+    line_map (linemap), dump_lex_out (dump_lex_opt),
     raw_input_source (new FileInputSource (input.get_raw ())),
     input_queue{*raw_input_source}, token_queue (TokenSource (this))
 {
@@ -186,6 +188,45 @@ Lexer::skip_input ()
   skip_input (0);
 }
 
+void
+Lexer::skip_token (int n)
+{
+  // dump tokens if dump-lex option is enabled
+  if (dump_lex_out.is_some ())
+    dump_and_skip (n);
+  else
+    token_queue.skip (n);
+}
+
+void
+Lexer::dump_and_skip (int n)
+{
+  std::ofstream &out = dump_lex_out.get ();
+  bool found_eof = false;
+  const_TokenPtr tok;
+  for (int i = 0; i < n + 1; i++)
+    {
+      if (!found_eof)
+	{
+	  tok = peek_token ();
+	  found_eof |= tok->get_id () == Rust::END_OF_FILE;
+
+	  Location loc = tok->get_locus ();
+
+	  out << "<id=";
+	  out << tok->token_id_to_str ();
+	  out << (tok->has_str () ? (std::string (", text=") + tok->get_str ()
+				     + std::string (", typehint=")
+				     + std::string (tok->get_type_hint_str ()))
+				  : "")
+	      << " ";
+	  out << get_line_map ()->to_string (loc) << " ";
+	}
+
+      token_queue.skip (0);
+    }
+}
+
 void
 Lexer::replace_current_token (TokenPtr replacement)
 {
diff --git a/gcc/rust/lex/rust-lex.h b/gcc/rust/lex/rust-lex.h
index 6e8c5999f51..a170e91f2cc 100644
--- a/gcc/rust/lex/rust-lex.h
+++ b/gcc/rust/lex/rust-lex.h
@@ -22,6 +22,7 @@
 #include "rust-linemap.h"
 #include "rust-buffered-queue.h"
 #include "rust-token.h"
+#include "rust-optional.h"
 
 namespace Rust {
 // Simple wrapper for FILE* that simplifies destruction.
@@ -139,7 +140,9 @@ private:
 
 public:
   // Construct lexer with input file and filename provided
-  Lexer (const char *filename, RAIIFile input, Linemap *linemap);
+  Lexer (const char *filename, RAIIFile input, Linemap *linemap,
+	 Optional<std::ofstream &> dump_lex_opt
+	 = Optional<std::ofstream &>::none ());
 
   // Lex the contents of a string instead of a file
   Lexer (const std::string &input);
@@ -161,10 +164,13 @@ public:
   const_TokenPtr peek_token () { return peek_token (0); }
 
   // Advances current token to n + 1 tokens ahead of current position.
-  void skip_token (int n) { token_queue.skip (n); }
+  void skip_token (int n);
   // Skips the current token.
   void skip_token () { skip_token (0); }
 
+  // Dumps and advances by n + 1 tokens.
+  void dump_and_skip (int n);
+
   // Replaces the current token with a specified token.
   void replace_current_token (TokenPtr replacement);
   // FIXME: don't use anymore
@@ -197,6 +203,8 @@ private:
    * allocating new linemap */
   static const int max_column_hint = 80;
 
+  Optional<std::ofstream &> dump_lex_out;
+
   // Input source wrapper thing.
   class InputSource
   {
diff --git a/gcc/rust/parse/rust-parse-impl.h b/gcc/rust/parse/rust-parse-impl.h
index ee0282bdab3..cbd40efcc9b 100644
--- a/gcc/rust/parse/rust-parse-impl.h
+++ b/gcc/rust/parse/rust-parse-impl.h
@@ -14897,47 +14897,6 @@ Parser<ManagedTokenSource>::done_end ()
   return (t->get_id () == RIGHT_CURLY || t->get_id () == END_OF_FILE);
 }
 
-// Dumps lexer output to stderr.
-template <typename ManagedTokenSource>
-void
-Parser<ManagedTokenSource>::debug_dump_lex_output (std::ostream &out)
-{
-  /* TODO: a better implementation of "lexer dump" (as in dump what was
-   * actually tokenised) would actually be to "write" a token to a file every
-   * time skip_token() here was called. This would reflect the parser
-   * modifications to the token stream, such as fixing the template angle
-   * brackets. */
-
-  const_TokenPtr tok = lexer.peek_token ();
-
-  while (true)
-    {
-      if (tok->get_id () == Rust::END_OF_FILE)
-	break;
-
-      bool has_text = tok->get_id () == Rust::IDENTIFIER
-		      || tok->get_id () == Rust::INT_LITERAL
-		      || tok->get_id () == Rust::FLOAT_LITERAL
-		      || tok->get_id () == Rust::STRING_LITERAL
-		      || tok->get_id () == Rust::CHAR_LITERAL
-		      || tok->get_id () == Rust::BYTE_STRING_LITERAL
-		      || tok->get_id () == Rust::BYTE_CHAR_LITERAL;
-
-      Location loc = tok->get_locus ();
-
-      out << "<id=";
-      out << tok->token_id_to_str ();
-      out << has_text ? (std::string (", text=") + tok->get_str ()
-			 + std::string (", typehint=")
-			 + std::string (tok->get_type_hint_str ()))
-		      : "";
-      out << lexer.get_line_map ()->to_string (loc);
-
-      lexer.skip_token ();
-      tok = lexer.peek_token ();
-    }
-}
-
 // Parses crate and dumps AST to stderr, recursively.
 template <typename ManagedTokenSource>
 void
diff --git a/gcc/rust/parse/rust-parse.h b/gcc/rust/parse/rust-parse.h
index e4c5a2c5c9f..8449181b12f 100644
--- a/gcc/rust/parse/rust-parse.h
+++ b/gcc/rust/parse/rust-parse.h
@@ -671,8 +671,6 @@ public:
   // Main entry point for parser.
   std::unique_ptr<AST::Crate> parse_crate ();
 
-  // Dumps all lexer output.
-  void debug_dump_lex_output (std::ostream &out);
   void debug_dump_ast_output (AST::Crate &crate, std::ostream &out);
 
   // Returns whether any parsing errors have occurred.
diff --git a/gcc/rust/rust-session-manager.cc b/gcc/rust/rust-session-manager.cc
index 6f51bd2e5a1..732aabe1f26 100644
--- a/gcc/rust/rust-session-manager.cc
+++ b/gcc/rust/rust-session-manager.cc
@@ -455,7 +455,22 @@ Session::compile_crate (const char *filename)
   // parse file here
   /* create lexer and parser - these are file-specific and so aren't instance
    * variables */
-  Lexer lex (filename, std::move (file_wrap), linemap);
+  Optional<std::ofstream &> dump_lex_opt = Optional<std::ofstream &>::none ();
+  std::ofstream dump_lex_stream;
+  if (options.dump_option_enabled (CompileOptions::LEXER_DUMP))
+    {
+      dump_lex_stream.open (kLexDumpFile);
+      if (dump_lex_stream.fail ())
+	{
+	  rust_error_at (Linemap::unknown_location (),
+			 "cannot open %s:%m; ignored", kLexDumpFile);
+	}
+      auto stream = Optional<std::ofstream &>::some (dump_lex_stream);
+      dump_lex_opt = std::move (stream);
+    }
+
+  Lexer lex (filename, std::move (file_wrap), linemap, dump_lex_opt);
+
   Parser<Lexer> parser (lex);
 
   // generate crate from parser
@@ -464,11 +479,7 @@ Session::compile_crate (const char *filename)
   // handle crate name
   handle_crate_name (*ast_crate.get ());
 
-  // dump options
-  if (options.dump_option_enabled (CompileOptions::LEXER_DUMP))
-    {
-      dump_lex (parser);
-    }
+  // dump options except lexer dump
   if (options.dump_option_enabled (CompileOptions::PARSER_AST_DUMP))
     {
       dump_ast (parser, *ast_crate.get ());
@@ -835,24 +846,6 @@ Session::expansion (AST::Crate &crate)
   rust_debug ("finished expansion");
 }
 
-void
-Session::dump_lex (Parser<Lexer> &parser) const
-{
-  std::ofstream out;
-  out.open (kLexDumpFile);
-  if (out.fail ())
-    {
-      rust_error_at (Linemap::unknown_location (), "cannot open %s:%m; ignored",
-		     kLexDumpFile);
-      return;
-    }
-
-  // TODO: rewrite lexer dump or something so that it allows for the crate
-  // to already be parsed
-  parser.debug_dump_lex_output (out);
-  out.close ();
-}
-
 void
 Session::dump_ast (Parser<Lexer> &parser, AST::Crate &crate) const
 {
diff --git a/gcc/rust/util/rust-optional.h b/gcc/rust/util/rust-optional.h
index eba3a7886ac..d7349820b38 100644
--- a/gcc/rust/util/rust-optional.h
+++ b/gcc/rust/util/rust-optional.h
@@ -194,6 +194,7 @@ private:
 public:
   Optional (const Optional &other) = default;
   Optional (Optional &&other) = default;
+  Optional &operator= (Optional &&other) = default;
 
   static Optional<T &> some (T &value)
   {

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-02-21 12:03 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-02-21 12:03 [gcc r13-6245] gccrs: Improve lexer dump Arthur Cohen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).