From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-wm1-x32e.google.com (mail-wm1-x32e.google.com [IPv6:2a00:1450:4864:20::32e]) by sourceware.org (Postfix) with ESMTPS id 8C1DC389EC63 for ; Tue, 31 Jan 2023 13:23:19 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.2 sourceware.org 8C1DC389EC63 Authentication-Results: sourceware.org; dmarc=none (p=none dis=none) header.from=embecosm.com Authentication-Results: sourceware.org; spf=pass smtp.mailfrom=embecosm.com Received: by mail-wm1-x32e.google.com with SMTP id bg13-20020a05600c3c8d00b003d9712b29d2so12448853wmb.2 for ; Tue, 31 Jan 2023 05:23:19 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=embecosm.com; s=google; h=content-transfer-encoding:mime-version:message-id:date:subject:cc :to:from:from:to:cc:subject:date:message-id:reply-to; bh=e7U2EYWr8CvxwCv9Pv0DYC8KwLU+0TIZbEsKBqCSbgM=; b=EuHt/ZbwOgShW7YNgotIwifOqhZanwHMhizfOXGUQ+ohRv28ix05q1MlXm3IrvX+Pr c6I8HqTTufjcd341AJ1O8iremOyh6S86XCQxPJFzKk1FLJN9pMzhQxva2fwgEHh23Wqt edej6KD4Rfxh6Hl6v/3W5eHzeV8Itg+Z8/ah2M+DoGte0a+ffnSQwHVrz2wDQ4yGSo42 jve7N1cKYtzWFgdVos6w6MqnR4fFdXuzvgqXlcWgWic8MtpInmZ3Wj64OVHmnpGkzgSG XraAGLFYBirAFBpEPmYc82OwHPUzhXBFQsPTBSFFi7GMBL6ETbfQZeUOCJuHPfyKYmin Jn2Q== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=content-transfer-encoding:mime-version:message-id:date:subject:cc :to:from:x-gm-message-state:from:to:cc:subject:date:message-id :reply-to; bh=e7U2EYWr8CvxwCv9Pv0DYC8KwLU+0TIZbEsKBqCSbgM=; b=3RgM8Gef2/taLq68igbUMUXcHM7gC6qCh0IuMolOdo1y2Ox/aYQ+2mpUpjqw2rEuQo A13loYA/ISBsXk6EC4XRXEb3IrHmC0Nug/ADWtAVMSWKbQTKBQc0/eyZzo7vFVoE1Vw5 Czv4NFhNf9949INBTTMV8rsDTpmvWT+/ShNQIrnzrEai2pT6ONMPoLa32DcI3SjxPcQC C6p90BEOS7WVrQmcW+bH7X4vD10tWVRmo91clna/fKP6S8ODHXIHQuZDBHSDsZShPnKu JWsVxvwjhPLsw248FhypLEP+KzGA1kIa7DbnfuwxsMk2ShmI9Faq9jBRtJQfGOFr+GX2 3biA== X-Gm-Message-State: AFqh2krRkGkTPyslzuPoAm6TVW0UPTgFFLWki/pS4KfzMLAAtazKAPy0 z0MHoAd6ovLHEnbDop6ZGkdv X-Google-Smtp-Source: AMrXdXtBOXnf5Eo+1bD6ruxOVELQoLh+gmZOCH5sykzlWgHQmdTcjUvnUAFhp6ckpTx22/QBq+fN6Q== X-Received: by 2002:a05:600c:1e1f:b0:3db:e13:750c with SMTP id ay31-20020a05600c1e1f00b003db0e13750cmr52217004wmb.29.1675171398332; Tue, 31 Jan 2023 05:23:18 -0800 (PST) Received: from platypus.lan ([2001:861:5e4c:3bb0:6424:328a:1734:3249]) by smtp.gmail.com with ESMTPSA id n17-20020a1c7211000000b003dc3f07c876sm13264685wmc.46.2023.01.31.05.23.17 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 31 Jan 2023 05:23:17 -0800 (PST) From: Arthur Cohen To: gcc-patches@gcc.gnu.org Cc: gcc-rust@gcc.gnu.org, liushuyu Subject: [COMMITTED] gccrs: expand: eager evaluate macros inside builtin macros Date: Tue, 31 Jan 2023 14:27:03 +0100 Message-Id: <20230131132703.663677-1-arthur.cohen@embecosm.com> X-Mailer: git-send-email 2.39.1 MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Spam-Status: No, score=-15.3 required=5.0 tests=BAYES_00,DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF,GIT_PATCH_0,KAM_SHORT,RCVD_IN_DNSWL_NONE,SPF_HELO_NONE,SPF_PASS,TXREP autolearn=ham autolearn_force=no version=3.4.6 X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on server2.sourceware.org List-Id: From: liushuyu gcc/rust/ChangeLog: * ast/rust-ast.h (class MacroInvocData): Store expander as member of the class. (class Expr): Add `is_literal` virtual method * ast/rust-expr.h: Override `is_literal` for `LiteralExpr`s. * expand/rust-macro-builtins.cc (try_expand_macro_expression): New function. (try_extract_string_literal_from_fragment): Likewise. (try_expand_single_string_literal): Likewise. (try_expand_many_expr): Likewise. (parse_single_string_literal): Add macro expander as argument. (MacroBuiltin::include_bytes): Pass expander as argument to `parse_single_string_literal`. (MacroBuiltin::include_str): Likewise. (MacroBuiltin::compile_error): Likewise. (MacroBuiltin::include): Likewise. (MacroBuiltin::concat): Likewise and add better error handling. (MacroBuiltin::env): Likewise. * expand/rust-macro-expand.cc (MacroExpander::expand_invoc): Expand invocations recursively. gcc/testsuite/ChangeLog: * rust/compile/builtin_macro_concat.rs: Fix test error messages. * rust/compile/builtin_macro_env.rs: Likewise. Signed-off-by: Zixing Liu Tested on x86_64-pc-linux-gnu, committed on master. --- gcc/rust/ast/rust-ast.h | 12 + gcc/rust/ast/rust-expr.h | 2 + gcc/rust/expand/rust-macro-builtins.cc | 206 ++++++++++++++---- gcc/rust/expand/rust-macro-expand.cc | 1 + .../rust/compile/builtin_macro_concat.rs | 8 +- .../rust/compile/builtin_macro_env.rs | 4 +- 6 files changed, 182 insertions(+), 51 deletions(-) diff --git a/gcc/rust/ast/rust-ast.h b/gcc/rust/ast/rust-ast.h index 9e1b8b11373..ccabccd6aff 100644 --- a/gcc/rust/ast/rust-ast.h +++ b/gcc/rust/ast/rust-ast.h @@ -31,6 +31,7 @@ namespace Rust { typedef std::string Identifier; typedef int TupleIndex; struct Session; +struct MacroExpander; namespace AST { // foward decl: ast visitor @@ -951,6 +952,8 @@ public: virtual Location get_locus () const = 0; + virtual bool is_literal () const { return false; } + // HACK: strictly not needed, but faster than full downcast clone virtual bool is_expr_without_block () const = 0; @@ -1471,6 +1474,7 @@ private: // One way of parsing the macro. Probably not applicable for all macros. std::vector > parsed_items; bool parsed_to_meta_item = false; + MacroExpander *expander = nullptr; public: std::string as_string () const; @@ -1495,6 +1499,7 @@ public: path = other.path; token_tree = other.token_tree; parsed_to_meta_item = other.parsed_to_meta_item; + expander = other.expander; parsed_items.reserve (other.parsed_items.size ()); for (const auto &e : other.parsed_items) @@ -1523,6 +1528,13 @@ public: SimplePath &get_path () { return path; } const SimplePath &get_path () const { return path; } + void set_expander (MacroExpander *new_expander) { expander = new_expander; } + MacroExpander *get_expander () + { + rust_assert (expander); + return expander; + } + void set_meta_item_output (std::vector > new_items) { diff --git a/gcc/rust/ast/rust-expr.h b/gcc/rust/ast/rust-expr.h index 1966a590c94..c764f9c4c66 100644 --- a/gcc/rust/ast/rust-expr.h +++ b/gcc/rust/ast/rust-expr.h @@ -67,6 +67,8 @@ public: Location get_locus () const override final { return locus; } + bool is_literal () const override final { return true; } + Literal get_literal () const { return literal; } void accept_vis (ASTVisitor &vis) override; diff --git a/gcc/rust/expand/rust-macro-builtins.cc b/gcc/rust/expand/rust-macro-builtins.cc index f5e3e188423..606f33c65bc 100644 --- a/gcc/rust/expand/rust-macro-builtins.cc +++ b/gcc/rust/expand/rust-macro-builtins.cc @@ -17,12 +17,14 @@ // . #include "rust-macro-builtins.h" +#include "rust-ast.h" #include "rust-diagnostics.h" #include "rust-expr.h" #include "rust-session-manager.h" #include "rust-macro-invoc-lexer.h" #include "rust-lex.h" #include "rust-parse.h" +#include "rust-attribute-visitor.h" namespace Rust { namespace { @@ -61,13 +63,119 @@ macro_end_token (AST::DelimTokenTree &invoc_token_tree, return last_token_id; } +/* Expand and extract an expression from the macro */ + +static inline AST::ASTFragment +try_expand_macro_expression (AST::Expr *expr, MacroExpander *expander) +{ + rust_assert (expander); + + auto vis = Rust::AttrVisitor (*expander); + expr->accept_vis (vis); + return expander->take_expanded_fragment (vis); +} + +/* Expand and then extract a string literal from the macro */ + +static std::unique_ptr +try_extract_string_literal_from_fragment (const Location &parent_locus, + std::unique_ptr &node) +{ + auto maybe_lit = static_cast (node.get ()); + if (!node || !node->is_literal () + || maybe_lit->get_lit_type () != AST::Literal::STRING) + { + rust_error_at (parent_locus, "argument must be a string literal"); + if (node) + rust_inform (node->get_locus (), "expanded from here"); + return nullptr; + } + return std::unique_ptr ( + static_cast (node->clone_expr ().release ())); +} + +static std::unique_ptr +try_expand_single_string_literal (AST::Expr *input_expr, + const Location &invoc_locus, + MacroExpander *expander) +{ + auto nodes = try_expand_macro_expression (input_expr, expander); + if (nodes.is_error () || nodes.is_expression_fragment ()) + { + rust_error_at (input_expr->get_locus (), + "argument must be a string literal"); + return nullptr; + } + auto expr = nodes.take_expression_fragment (); + return try_extract_string_literal_from_fragment (input_expr->get_locus (), + expr); +} + +static std::vector> +try_expand_many_expr (Parser &parser, + const Location &invoc_locus, const TokenId last_token_id, + MacroExpander *expander, bool &has_error) +{ + auto restrictions = Rust::ParseRestrictions (); + // stop parsing when encountered a braces/brackets + restrictions.expr_can_be_null = true; + // we can't use std::optional, so... + auto result = std::vector> (); + auto empty_expr = std::vector> (); + + auto first_token = parser.peek_current_token ()->get_id (); + if (first_token == COMMA) + { + rust_error_at (parser.peek_current_token ()->get_locus (), + "expected expression, found %<,%>"); + has_error = true; + return empty_expr; + } + + while (parser.peek_current_token ()->get_id () != last_token_id + && parser.peek_current_token ()->get_id () != END_OF_FILE) + { + auto expr = parser.parse_expr (AST::AttrVec (), restrictions); + // something must be so wrong that the expression could not be parsed + rust_assert (expr); + auto nodes = try_expand_macro_expression (expr.get (), expander); + if (nodes.is_error ()) + { + // not macro + result.push_back (std::move (expr)); + } + else if (!nodes.is_expression_fragment ()) + { + rust_error_at (expr->get_locus (), "expected expression"); + has_error = true; + return empty_expr; + } + else + { + result.push_back (nodes.take_expression_fragment ()); + } + + auto next_token = parser.peek_current_token (); + if (!parser.skip_token (COMMA) && next_token->get_id () != last_token_id) + { + rust_error_at (next_token->get_locus (), "expected token: %<,%>"); + // TODO: is this recoverable? to avoid crashing the parser in the next + // fragment we have to exit early here + has_error = true; + return empty_expr; + } + } + + return result; +} + /* Parse a single string literal from the given delimited token tree, and return the LiteralExpr for it. Allow for an optional trailing comma, but otherwise enforce that these are the only tokens. */ std::unique_ptr parse_single_string_literal (AST::DelimTokenTree &invoc_token_tree, - Location invoc_locus) + Location invoc_locus, MacroExpander *expander) { MacroInvocLexer lex (invoc_token_tree.to_token_stream ()); Parser parser (lex); @@ -89,7 +197,13 @@ parse_single_string_literal (AST::DelimTokenTree &invoc_token_tree, else if (parser.peek_current_token ()->get_id () == last_token_id) rust_error_at (invoc_locus, "macro takes 1 argument"); else - rust_error_at (invoc_locus, "argument must be a string literal"); + { + // when the expression does not seem to be a string literal, we then try + // to parse/expand it as macro to see if it expands to a string literal + auto expr = parser.parse_expr (); + lit_expr + = try_expand_single_string_literal (expr.get (), invoc_locus, expander); + } parser.skip_token (last_token_id); @@ -188,7 +302,8 @@ MacroBuiltin::include_bytes (Location invoc_locus, AST::MacroInvocData &invoc) /* Get target filename from the macro invocation, which is treated as a path relative to the include!-ing file (currently being compiled). */ auto lit_expr - = parse_single_string_literal (invoc.get_delim_tok_tree (), invoc_locus); + = parse_single_string_literal (invoc.get_delim_tok_tree (), invoc_locus, + invoc.get_expander ()); if (lit_expr == nullptr) return AST::ASTFragment::create_error (); @@ -230,7 +345,8 @@ MacroBuiltin::include_str (Location invoc_locus, AST::MacroInvocData &invoc) /* Get target filename from the macro invocation, which is treated as a path relative to the include!-ing file (currently being compiled). */ auto lit_expr - = parse_single_string_literal (invoc.get_delim_tok_tree (), invoc_locus); + = parse_single_string_literal (invoc.get_delim_tok_tree (), invoc_locus, + invoc.get_expander ()); if (lit_expr == nullptr) return AST::ASTFragment::create_error (); @@ -252,7 +368,8 @@ AST::ASTFragment MacroBuiltin::compile_error (Location invoc_locus, AST::MacroInvocData &invoc) { auto lit_expr - = parse_single_string_literal (invoc.get_delim_tok_tree (), invoc_locus); + = parse_single_string_literal (invoc.get_delim_tok_tree (), invoc_locus, + invoc.get_expander ()); if (lit_expr == nullptr) return AST::ASTFragment::create_error (); @@ -278,23 +395,30 @@ MacroBuiltin::concat (Location invoc_locus, AST::MacroInvocData &invoc) auto last_token_id = macro_end_token (invoc_token_tree, parser); /* NOTE: concat! could accept no argument, so we don't have any checks here */ - while (parser.peek_current_token ()->get_id () != last_token_id) + auto expanded_expr = try_expand_many_expr (parser, invoc_locus, last_token_id, + invoc.get_expander (), has_error); + for (auto &expr : expanded_expr) { - auto lit_expr = parser.parse_literal_expr (); - if (lit_expr) + if (!expr->is_literal ()) { - str += lit_expr->as_string (); + has_error = true; + rust_error_at (expr->get_locus (), "expected a literal"); + // diagnostics copied from rustc + rust_inform (expr->get_locus (), + "only literals (like %<\"foo\"%>, %<42%> and " + "%<3.14%>) can be passed to %"); + continue; } - else + auto *literal = static_cast (expr.get ()); + if (literal->get_lit_type () == AST::Literal::BYTE + || literal->get_lit_type () == AST::Literal::BYTE_STRING) { - auto current_token = parser.peek_current_token (); - rust_error_at (current_token->get_locus (), - "argument must be a constant literal"); has_error = true; - // Just crash if the current token can't be skipped - rust_assert (parser.skip_token (current_token->get_id ())); + rust_error_at (expr->get_locus (), + "cannot concatenate a byte string literal"); + continue; } - parser.maybe_skip_token (COMMA); + str += literal->as_string (); } parser.skip_token (last_token_id); @@ -317,45 +441,36 @@ MacroBuiltin::env (Location invoc_locus, AST::MacroInvocData &invoc) Parser parser (lex); auto last_token_id = macro_end_token (invoc_token_tree, parser); + std::unique_ptr error_expr = nullptr; + std::unique_ptr lit_expr = nullptr; + bool has_error = false; - if (parser.peek_current_token ()->get_id () != STRING_LITERAL) + auto expanded_expr = try_expand_many_expr (parser, invoc_locus, last_token_id, + invoc.get_expander (), has_error); + if (has_error) + return AST::ASTFragment::create_error (); + if (expanded_expr.size () < 1 || expanded_expr.size () > 2) { - if (parser.peek_current_token ()->get_id () == last_token_id) - rust_error_at (invoc_locus, "env! takes 1 or 2 arguments"); - else - rust_error_at (parser.peek_current_token ()->get_locus (), - "argument must be a string literal"); + rust_error_at (invoc_locus, "env! takes 1 or 2 arguments"); return AST::ASTFragment::create_error (); } - - auto lit_expr = parser.parse_literal_expr (); - auto comma_skipped = parser.maybe_skip_token (COMMA); - - std::unique_ptr error_expr = nullptr; - - if (parser.peek_current_token ()->get_id () != last_token_id) + if (expanded_expr.size () > 0) { - if (!comma_skipped) + if (!(lit_expr + = try_extract_string_literal_from_fragment (invoc_locus, + expanded_expr[0]))) { - rust_error_at (parser.peek_current_token ()->get_locus (), - "expected token: %<,%>"); return AST::ASTFragment::create_error (); } - if (parser.peek_current_token ()->get_id () != STRING_LITERAL) + } + if (expanded_expr.size () > 1) + { + if (!(error_expr + = try_extract_string_literal_from_fragment (invoc_locus, + expanded_expr[1]))) { - rust_error_at (parser.peek_current_token ()->get_locus (), - "argument must be a string literal"); return AST::ASTFragment::create_error (); } - - error_expr = parser.parse_literal_expr (); - parser.maybe_skip_token (COMMA); - } - - if (parser.peek_current_token ()->get_id () != last_token_id) - { - rust_error_at (invoc_locus, "env! takes 1 or 2 arguments"); - return AST::ASTFragment::create_error (); } parser.skip_token (last_token_id); @@ -421,7 +536,8 @@ MacroBuiltin::include (Location invoc_locus, AST::MacroInvocData &invoc) /* Get target filename from the macro invocation, which is treated as a path relative to the include!-ing file (currently being compiled). */ auto lit_expr - = parse_single_string_literal (invoc.get_delim_tok_tree (), invoc_locus); + = parse_single_string_literal (invoc.get_delim_tok_tree (), invoc_locus, + invoc.get_expander ()); if (lit_expr == nullptr) return AST::ASTFragment::create_error (); diff --git a/gcc/rust/expand/rust-macro-expand.cc b/gcc/rust/expand/rust-macro-expand.cc index c68faba86ad..fbf33209c5f 100644 --- a/gcc/rust/expand/rust-macro-expand.cc +++ b/gcc/rust/expand/rust-macro-expand.cc @@ -163,6 +163,7 @@ MacroExpander::expand_invoc (AST::MacroInvocation &invoc, bool has_semicolon) rust_assert (ok); auto fragment = AST::ASTFragment::create_error (); + invoc_data.set_expander (this); if (rules_def->is_builtin ()) fragment diff --git a/gcc/testsuite/rust/compile/builtin_macro_concat.rs b/gcc/testsuite/rust/compile/builtin_macro_concat.rs index 9b878af764d..3b8eb2dcda7 100644 --- a/gcc/testsuite/rust/compile/builtin_macro_concat.rs +++ b/gcc/testsuite/rust/compile/builtin_macro_concat.rs @@ -6,12 +6,12 @@ macro_rules! concat { fn main() { let not_literal = "identifier"; concat!(); - concat! (,); // { dg-error "argument must be a constant literal" } - concat!(not_literal); // { dg-error "argument must be a constant literal" } + concat! (,); // { dg-error "expected expression, found .,." } + concat!(not_literal); // { dg-error "expected a literal" } concat!("message"); concat!("message",); concat!("message", 1, true, false, 1.0, 10usize, 2000u64); concat!("message", 1, true, false, 1.0, 10usize, 2000u64,); - concat! ("m", not_literal); // { dg-error "argument must be a constant literal" } - concat!(not_literal invalid 'm' !!,); // { dg-error "argument must be a constant literal" } + concat! ("m", not_literal); // { dg-error "expected a literal" } + concat!(not_literal invalid 'm' !!,); // { dg-error "expected token: .,." } } diff --git a/gcc/testsuite/rust/compile/builtin_macro_env.rs b/gcc/testsuite/rust/compile/builtin_macro_env.rs index 289e6db2cf1..337f0ae4316 100644 --- a/gcc/testsuite/rust/compile/builtin_macro_env.rs +++ b/gcc/testsuite/rust/compile/builtin_macro_env.rs @@ -7,13 +7,13 @@ fn main () { let message = "error message"; env! (message); // { dg-error "argument must be a string literal" "" } env! (); // { dg-error "env! takes 1 or 2 arguments" "" } - env! (,); // { dg-error "argument must be a string literal" "" } + env! (,); // { dg-error "expected expression, found .,." "" } env! (1); // { dg-error "argument must be a string literal" "" } env! ("NOT_DEFINED"); // { dg-error "environment variable 'NOT_DEFINED' not defined" "" } env! ("NOT_DEFINED",); // { dg-error "environment variable 'NOT_DEFINED' not defined" "" } env! ("NOT_DEFINED", 1); // { dg-error "argument must be a string literal" "" } env! ("NOT_DEFINED", "two", "three"); // { dg-error "env! takes 1 or 2 arguments" "" } - env! ("NOT_DEFINED" "expected error message"); // { dg-error "expected token: ','" "" } + env! ("NOT_DEFINED" "expected error message"); // { dg-error "expected token: .,." "" } env! ("NOT_DEFINED", "expected error message"); // { dg-error "expected error message" "" } env! ("NOT_DEFINED", "expected error message",); // { dg-error "expected error message" "" } env! (1, "two"); // { dg-error "argument must be a string literal" "" } -- 2.39.1