public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r14-2893] c-family: Implement pragma_lex () for preprocess-only mode
@ 2023-08-01  1:57 Lewis Hyatt
  0 siblings, 0 replies; only message in thread
From: Lewis Hyatt @ 2023-08-01  1:57 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:e664ea960a200aac88ffc3c7fb9fe55ea4df2011

commit r14-2893-ge664ea960a200aac88ffc3c7fb9fe55ea4df2011
Author: Lewis Hyatt <lhyatt@gmail.com>
Date:   Fri Jun 30 18:23:24 2023 -0400

    c-family: Implement pragma_lex () for preprocess-only mode
    
    In order to support processing #pragma in preprocess-only mode (-E or
    -save-temps for gcc/g++), we need a way to obtain the #pragma tokens from
    libcpp. In full compilation modes, this is accomplished by calling
    pragma_lex (), which is a symbol that must be exported by the frontend, and
    which is currently implemented for C and C++. Neither of those frontends
    initializes its parser machinery in preprocess-only mode, and consequently
    pragma_lex () does not work in this case.
    
    Address that by adding a new function c_init_preprocess () for the frontends
    to implement, which arranges for pragma_lex () to work in preprocess-only
    mode, and adjusting pragma_lex () accordingly.
    
    In preprocess-only mode, the preprocessor is accustomed to controlling the
    interaction with libcpp, and it only knows about tokens that it has called
    into libcpp itself to obtain. Since it still needs to see the tokens
    obtained by pragma_lex () so that they can be streamed to the output, also
    adjust c_lex_with_flags () and related functions in c-family/c-lex.cc to
    inform the preprocessor about any tokens it won't be aware of.
    
    Currently, there is one place where we are already supporting #pragma in
    preprocess-only mode, namely the handling of `#pragma GCC diagnostic'.  That
    was done by directly interfacing with libcpp, rather than making use of
    pragma_lex (). Now that pragma_lex () works, that code is no longer
    necessary; remove it.
    
    gcc/c-family/ChangeLog:
    
            * c-common.h (c_init_preprocess): Declare new function.
            * c-opts.cc (c_common_init): Call it.
            * c-lex.cc (cb_def_pragma): Add a comment.
            (get_token): New function wrapping cpp_get_token.
            (c_lex_with_flags): Use the new wrapper function to support
            obtaining tokens in preprocess_only mode.
            (lex_string): Likewise.
            * c-pragma.cc (pragma_diagnostic_lex_normal): Rename to...
            (pragma_diagnostic_lex): ...this.
            (pragma_diagnostic_lex_pp): Remove.
            (handle_pragma_diagnostic_impl): Call pragma_diagnostic_lex () in
            all modes.
            (c_pp_invoke_early_pragma_handler): Adapt to support pragma_lex ()
            usage.
            * c-pragma.h (pragma_lex_discard_to_eol): Declare.
    
    gcc/c/ChangeLog:
    
            * c-parser.cc (pragma_lex_discard_to_eol): New function.
            (c_init_preprocess): New function.
    
    gcc/cp/ChangeLog:
    
            * parser.cc (c_init_preprocess): New function.
            (maybe_read_tokens_for_pragma_lex): New function.
            (pragma_lex): Support preprocess-only mode.
            (pragma_lex_discard_to_eol): New function.

Diff:
---
 gcc/c-family/c-common.h  |  3 +++
 gcc/c-family/c-lex.cc    | 38 +++++++++++++++++++++++++++-----
 gcc/c-family/c-opts.cc   |  1 +
 gcc/c-family/c-pragma.cc | 56 +++++++-----------------------------------------
 gcc/c-family/c-pragma.h  |  2 ++
 gcc/c/c-parser.cc        | 21 ++++++++++++++++++
 gcc/cp/parser.cc         | 45 ++++++++++++++++++++++++++++++++++++++
 7 files changed, 113 insertions(+), 53 deletions(-)

diff --git a/gcc/c-family/c-common.h b/gcc/c-family/c-common.h
index b5ef5ff6b2c..78fc5248ba6 100644
--- a/gcc/c-family/c-common.h
+++ b/gcc/c-family/c-common.h
@@ -990,6 +990,9 @@ extern void c_parse_file (void);
 
 extern void c_parse_final_cleanups (void);
 
+/* This initializes for preprocess-only mode.  */
+extern void c_init_preprocess (void);
+
 /* These macros provide convenient access to the various _STMT nodes.  */
 
 /* Nonzero if a given STATEMENT_LIST represents the outermost binding
diff --git a/gcc/c-family/c-lex.cc b/gcc/c-family/c-lex.cc
index dcd061c7cb1..d8aa2907c51 100644
--- a/gcc/c-family/c-lex.cc
+++ b/gcc/c-family/c-lex.cc
@@ -249,6 +249,10 @@ cb_def_pragma (cpp_reader *pfile, location_t loc)
       location_t fe_loc = loc;
 
       space = name = (const unsigned char *) "";
+
+      /* N.B.  It's fine to call cpp_get_token () directly here (rather than our
+	 local wrapper get_token ()), because this callback is not used with
+	 flag_preprocess_only==true.  */
       s = cpp_get_token (pfile);
       if (s->type != CPP_EOF)
 	{
@@ -284,8 +288,32 @@ cb_undef (cpp_reader *pfile, location_t loc, cpp_hashnode *node)
 			 (const char *) NODE_NAME (node));
 }
 
+/* Wrapper around cpp_get_token_with_location to stream the token to the
+   preprocessor so it can output it.  This is necessary with
+   flag_preprocess_only if we are obtaining tokens here instead of from the loop
+   in c-ppoutput.cc, such as while processing a #pragma.  */
+
+static const cpp_token *
+get_token (cpp_reader *pfile, location_t *loc = nullptr)
+{
+  if (flag_preprocess_only)
+    {
+      location_t x;
+      if (!loc)
+	loc = &x;
+      const auto tok = cpp_get_token_with_location (pfile, loc);
+      c_pp_stream_token (pfile, tok, *loc);
+      return tok;
+    }
+  else
+    return cpp_get_token_with_location (pfile, loc);
+}
+
 /* Wrapper around cpp_get_token to skip CPP_PADDING tokens
-   and not consume CPP_EOF.  */
+   and not consume CPP_EOF.  This does not perform the optional
+   streaming in preprocess_only mode, so is suitable to be used
+   when processing builtin expansions such as c_common_has_attribute.  */
+
 static const cpp_token *
 get_token_no_padding (cpp_reader *pfile)
 {
@@ -492,7 +520,7 @@ c_lex_with_flags (tree *value, location_t *loc, unsigned char *cpp_flags,
 
   timevar_push (TV_CPP);
  retry:
-  tok = cpp_get_token_with_location (parse_in, loc);
+  tok = get_token (parse_in, loc);
   type = tok->type;
 
  retry_after_at:
@@ -566,7 +594,7 @@ c_lex_with_flags (tree *value, location_t *loc, unsigned char *cpp_flags,
 	  location_t newloc;
 
 	retry_at:
-	  tok = cpp_get_token_with_location (parse_in, &newloc);
+	  tok = get_token (parse_in, &newloc);
 	  type = tok->type;
 	  switch (type)
 	    {
@@ -716,7 +744,7 @@ c_lex_with_flags (tree *value, location_t *loc, unsigned char *cpp_flags,
 	{
 	  do
 	    {
-	      tok = cpp_get_token_with_location (parse_in, loc);
+	      tok = get_token (parse_in, loc);
 	      type = tok->type;
 	    }
 	  while (type == CPP_PADDING || type == CPP_COMMENT);
@@ -1308,7 +1336,7 @@ lex_string (const cpp_token *tok, tree *valp, bool objc_string, bool translate)
   bool objc_at_sign_was_seen = false;
 
  retry:
-  tok = cpp_get_token (parse_in);
+  tok = get_token (parse_in);
   switch (tok->type)
     {
     case CPP_PADDING:
diff --git a/gcc/c-family/c-opts.cc b/gcc/c-family/c-opts.cc
index af19140e382..4961af63de8 100644
--- a/gcc/c-family/c-opts.cc
+++ b/gcc/c-family/c-opts.cc
@@ -1232,6 +1232,7 @@ c_common_init (void)
   if (flag_preprocess_only)
     {
       c_finish_options ();
+      c_init_preprocess ();
       preprocess_file (parse_in);
       return false;
     }
diff --git a/gcc/c-family/c-pragma.cc b/gcc/c-family/c-pragma.cc
index 0d2b333cebb..73d59df3bf4 100644
--- a/gcc/c-family/c-pragma.cc
+++ b/gcc/c-family/c-pragma.cc
@@ -840,11 +840,11 @@ public:
 
 };
 
-/* When compiling normally, use pragma_lex () to obtain the needed tokens.
-   This will call into either the C or C++ frontends as appropriate.  */
+/* This will call into either the C or C++ frontends as appropriate to get
+   tokens from libcpp for the pragma.  */
 
 static void
-pragma_diagnostic_lex_normal (pragma_diagnostic_data *result)
+pragma_diagnostic_lex (pragma_diagnostic_data *result)
 {
   result->clear ();
   tree x;
@@ -866,46 +866,6 @@ pragma_diagnostic_lex_normal (pragma_diagnostic_data *result)
   result->valid = true;
 }
 
-/* When preprocessing only, pragma_lex () is not available, so obtain the
-   tokens directly from libcpp.  We also need to inform the token streamer
-   about all tokens we lex ourselves here, so it outputs them too; this is
-   done by calling c_pp_stream_token () for each.
-
-   ???  If we need to support more pragmas in the future, maybe initialize
-   this_parser with the pragma tokens and call pragma_lex () instead?  */
-
-static void
-pragma_diagnostic_lex_pp (pragma_diagnostic_data *result)
-{
-  result->clear ();
-
-  auto tok = cpp_get_token_with_location (parse_in, &result->loc_kind);
-  c_pp_stream_token (parse_in, tok, result->loc_kind);
-  if (!(tok->type == CPP_NAME || tok->type == CPP_KEYWORD))
-    return;
-  const unsigned char *const kind_u = cpp_token_as_text (parse_in, tok);
-  result->set_kind ((const char *)kind_u);
-  if (result->pd_kind == pragma_diagnostic_data::PK_INVALID)
-    return;
-
-  if (result->needs_option ())
-    {
-      tok = cpp_get_token_with_location (parse_in, &result->loc_option);
-      c_pp_stream_token (parse_in, tok, result->loc_option);
-      if (tok->type != CPP_STRING)
-	return;
-      cpp_string str;
-      if (!cpp_interpret_string_notranslate (parse_in, &tok->val.str, 1, &str,
-					     CPP_STRING)
-	  || !str.len)
-	return;
-      result->option_str = (const char *)str.text;
-      result->own_option_str = true;
-    }
-
-  result->valid = true;
-}
-
 /* Handle #pragma GCC diagnostic.  Early mode is used by frontends (such as C++)
    that do not process the deferred pragma while they are consuming tokens; they
    can use early mode to make sure diagnostics affecting the preprocessor itself
@@ -916,10 +876,7 @@ handle_pragma_diagnostic_impl ()
   static const bool want_diagnostics = (is_pp || !early);
 
   pragma_diagnostic_data data;
-  if (is_pp)
-    pragma_diagnostic_lex_pp (&data);
-  else
-    pragma_diagnostic_lex_normal (&data);
+  pragma_diagnostic_lex (&data);
 
   if (!data.kind_str)
     {
@@ -1808,7 +1765,10 @@ c_pp_invoke_early_pragma_handler (unsigned int id)
 {
   const auto data = &registered_pp_pragmas[id - PRAGMA_FIRST_EXTERNAL];
   if (data->early_handler)
-    data->early_handler (parse_in);
+    {
+      data->early_handler (parse_in);
+      pragma_lex_discard_to_eol ();
+    }
 }
 
 /* Set up front-end pragmas.  */
diff --git a/gcc/c-family/c-pragma.h b/gcc/c-family/c-pragma.h
index 9cc95ab3ee3..198fa7723e5 100644
--- a/gcc/c-family/c-pragma.h
+++ b/gcc/c-family/c-pragma.h
@@ -263,7 +263,9 @@ extern tree maybe_apply_renaming_pragma (tree, tree);
 extern void maybe_apply_pragma_scalar_storage_order (tree);
 extern void add_to_renaming_pragma_list (tree, tree);
 
+/* These are to be implemented in each frontend that needs them.  */
 extern enum cpp_ttype pragma_lex (tree *, location_t *loc = NULL);
+extern void pragma_lex_discard_to_eol ();
 
 /* Flags for use with c_lex_with_flags.  The values here were picked
    so that 0 means to translate and join strings.  */
diff --git a/gcc/c/c-parser.cc b/gcc/c/c-parser.cc
index 80920b31f83..cf82b0306d1 100644
--- a/gcc/c/c-parser.cc
+++ b/gcc/c/c-parser.cc
@@ -13376,6 +13376,18 @@ pragma_lex (tree *value, location_t *loc)
   return ret;
 }
 
+void
+pragma_lex_discard_to_eol ()
+{
+  cpp_ttype type;
+  do
+    {
+      type = c_parser_peek_token (the_parser)->type;
+      gcc_assert (type != CPP_EOF);
+      c_parser_consume_token (the_parser);
+    } while (type != CPP_PRAGMA_EOL);
+}
+
 static void
 c_parser_pragma_pch_preprocess (c_parser *parser)
 {
@@ -24761,6 +24773,15 @@ c_parse_file (void)
   the_parser = NULL;
 }
 
+void
+c_init_preprocess (void)
+{
+  /* Create a parser for use by pragma_lex during preprocessing.  */
+  the_parser = ggc_alloc<c_parser> ();
+  memset (the_parser, 0, sizeof (c_parser));
+  the_parser->tokens = &the_parser->tokens_buf[0];
+}
+
 /* Parse the body of a function declaration marked with "__RTL".
 
    The RTL parser works on the level of characters read from a
diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc
index b1d2e141e35..2e245865ea7 100644
--- a/gcc/cp/parser.cc
+++ b/gcc/cp/parser.cc
@@ -765,6 +765,15 @@ cp_lexer_new_main (void)
   return lexer;
 }
 
+/* Create a lexer and parser to be used during preprocess-only mode.
+   This will be filled with tokens to parse when needed by pragma_lex ().  */
+void
+c_init_preprocess ()
+{
+  gcc_assert (!the_parser);
+  the_parser = cp_parser_new (cp_lexer_alloc ());
+}
+
 /* Create a new lexer whose token stream is primed with the tokens in
    CACHE.  When these tokens are exhausted, no new tokens will be read.  */
 
@@ -49694,11 +49703,37 @@ cp_parser_pragma (cp_parser *parser, enum pragma_context context, bool *if_p)
   return ret;
 }
 
+/* Helper for pragma_lex in preprocess-only mode; in this mode, we have not
+   populated the lexer with any tokens (the tokens rather being read by
+   c-ppoutput.c's machinery), so we need to read enough tokens now to handle
+   a pragma.  */
+static void
+maybe_read_tokens_for_pragma_lex ()
+{
+  const auto lexer = the_parser->lexer;
+  if (!lexer->buffer->is_empty ())
+    return;
+
+  /* Read the rest of the tokens comprising the pragma line.  */
+  cp_token *tok;
+  do
+    {
+      tok = vec_safe_push (lexer->buffer, cp_token ());
+      cp_lexer_get_preprocessor_token (C_LEX_STRING_NO_JOIN, tok);
+      gcc_assert (tok->type != CPP_EOF);
+    } while (tok->type != CPP_PRAGMA_EOL);
+  lexer->next_token = lexer->buffer->address ();
+  lexer->last_token = lexer->next_token + lexer->buffer->length () - 1;
+}
+
 /* The interface the pragma parsers have to the lexer.  */
 
 enum cpp_ttype
 pragma_lex (tree *value, location_t *loc)
 {
+  if (flag_preprocess_only)
+    maybe_read_tokens_for_pragma_lex ();
+
   cp_token *tok = cp_lexer_peek_token (the_parser->lexer);
   enum cpp_ttype ret = tok->type;
 
@@ -49721,6 +49756,16 @@ pragma_lex (tree *value, location_t *loc)
   return ret;
 }
 
+void
+pragma_lex_discard_to_eol ()
+{
+  /* We have already read all the tokens, so we just need to discard
+     them here.  */
+  const auto lexer = the_parser->lexer;
+  lexer->next_token = lexer->last_token;
+  lexer->buffer->truncate (0);
+}
+
 \f
 /* External interface.  */

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-08-01  1:57 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-08-01  1:57 [gcc r14-2893] c-family: Implement pragma_lex () for preprocess-only mode Lewis Hyatt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).