From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-wr1-x430.google.com (mail-wr1-x430.google.com [IPv6:2a00:1450:4864:20::430]) by sourceware.org (Postfix) with ESMTPS id A63C0385277F; Wed, 24 Aug 2022 12:00:44 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org A63C0385277F Authentication-Results: sourceware.org; dmarc=pass (p=quarantine dis=none) header.from=googlemail.com Authentication-Results: sourceware.org; spf=pass smtp.mailfrom=googlemail.com Received: by mail-wr1-x430.google.com with SMTP id bs25so20504009wrb.2; Wed, 24 Aug 2022 05:00:44 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=googlemail.com; s=20210112; h=content-transfer-encoding:mime-version:reply-to:references :in-reply-to:message-id:date:subject:cc:to:from:from:to:cc; bh=u+P/lET5eck5YWq9+lbetr5nISOUb8hr9dlhdBdnjIc=; b=JGmu/Bc3Y1tKXfoy9qeZJ7morBtB9Kx9WVyZufQQxILbZVw8FigH/2w0OchkJuXDKD UavtiXS1TuTtIoxPSO/DXBAh+1NfSPK/40lfcY9TKI6ocz2M2fyd+HeUCD37XzzEp71q 5y8alOT51gTITyf8ROh/n5Y6XLjO0+ukSMjpZ2KcQPUFnZdwdmZOcaB/qQ5dCry5cyIU 5Qdd+dhPbP+NloMZ1JkIyIKvjCFpBKOu8wE5NUrPEtYAiuY++mBy3ro0Mf0KVTFs5dw0 zzYqeVcbWtPbSfkeA5nq45sxuyyxLVCs+XhzreePWX54jpmnPFbb1XB7ujGmB6+MtHeb FaqQ== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=content-transfer-encoding:mime-version:reply-to:references :in-reply-to:message-id:date:subject:cc:to:from:x-gm-message-state :from:to:cc; bh=u+P/lET5eck5YWq9+lbetr5nISOUb8hr9dlhdBdnjIc=; b=bWmS3kOw/dxXlzyj7avurX3s25Irex/7Zh5Q3Wj1NXko1kKuL0EqUeqJfumEUDHffU yJ+Wu4AO9Zp1lfZzfb/8eb5QFT62E8TFk6GTx+Lq4MG+wubreQ3ufC6s07GDcE/d3tCT UI5gQpEPNKqujGsCjY+1AMnsnDwhFgb1lcXKyUvu9owIPmCvfyFfEIbn6jnyAmT0RpQI GoiF/7oplXJntZ7VlQv6XccWX8EQKLVFjMT5mOD4z1VWtWGskxk40BNsv9X8FJpxxlRJ imVNekOYFUxLaqx4ld7IGtocdeJEE1oh4njnM0271kaDPWf5n3cOKKq86Ho9KveJTisL 43Fg== X-Gm-Message-State: ACgBeo1XfdUQHqSYJelmnzD4QtMnu79AOjmVgcu4TkOg/nbBeXWcptMZ v3DlIynkZFcD4OP6omHW21q4WbpQgWM= X-Google-Smtp-Source: AA6agR6uWBeLkvl4KoQ2YZdENEebZuddaYwI4MKI14NWV93UPlM83k9/f446W8EqtaoYIKsP78mfFA== X-Received: by 2002:adf:f804:0:b0:225:1d9c:c40e with SMTP id s4-20020adff804000000b002251d9cc40emr15703552wrp.486.1661342441062; Wed, 24 Aug 2022 05:00:41 -0700 (PDT) Received: from localhost.localdomain ([86.14.124.218]) by smtp.gmail.com with ESMTPSA id cc19-20020a5d5c13000000b0022571d43d32sm1697676wrb.21.2022.08.24.05.00.39 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Wed, 24 Aug 2022 05:00:40 -0700 (PDT) From: herron.philip@googlemail.com X-Google-Original-From: philip.herron@embecosm.com To: gcc-patches@gcc.gnu.org Cc: gcc-rust@gcc.gnu.org, The Other , Philip Herron Subject: [PATCH Rust front-end v2 10/37] gccrs: Add Parser for Rust front-end Date: Wed, 24 Aug 2022 12:59:29 +0100 Message-Id: <20220824115956.737931-11-philip.herron@embecosm.com> X-Mailer: git-send-email 2.25.1 In-Reply-To: <20220824115956.737931-1-philip.herron@embecosm.com> References: <20220824115956.737931-1-philip.herron@embecosm.com> Reply-To: philip.herron@embecosm.com MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Spam-Status: No, score=-11.4 required=5.0 tests=BAYES_00,DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF,FREEMAIL_FROM,GIT_PATCH_0,KAM_SHORT,RCVD_IN_DNSWL_NONE,SPF_HELO_NONE,SPF_PASS,TXREP,T_SCC_BODY_TEXT_LINE autolearn=ham autolearn_force=no version=3.4.6 X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on server2.sourceware.org List-Id: From: The Other This is a Pratt style parser for Rust implementing all of the AST. The rust-parser-impl.h is the implementation of the parser which is a template so we can give it any ManagedTokenSource and avoid virtual calls. The down side is it takes time to compile when used. see: https://en.wikipedia.org/wiki/Operator-precedence_parser#Pratt_parsing Co-authored-by: Philip Herron Co-authored-by: Arthur Cohen (lexer); + + auto token = parser.peek_current_token (); + if (token->get_id () != IDENTIFIER) + { + return false; + } + + key = token->get_str (); + + rust_assert (parser.skip_token (IDENTIFIER)); + token = parser.peek_current_token (); + + switch (token->get_id ()) + { + case END_OF_FILE: + // we're done parsing, we had a valid key, return happily + return true; + case EQUAL: + // We have an equal sign: Skip the token and parse an identifier + { + rust_assert (parser.skip_token (EQUAL)); + + auto value_expr = parser.parse_literal_expr (); + // We had an equal sign but no value, error out + if (!value_expr) + return false; + + if (value_expr->get_lit_type () != AST::Literal::LitType::STRING) + return false; + + value = value_expr->get_literal ().as_string (); + return true; + } + default: + return false; + } +} +} // namespace Rust + +#if CHECKING_P + +namespace selftest { + +void +rust_cfg_parser_test (void) +{ + std::string key; + std::string value; + + auto input = std::string ("key_no_value"); + + ASSERT_TRUE (Rust::parse_cfg_option (input, key, value)); + ASSERT_EQ (key, "key_no_value"); + ASSERT_TRUE (value.empty ()); + + input = std::string ("k=\"v\""); + + ASSERT_TRUE (Rust::parse_cfg_option (input, key, value)); + ASSERT_EQ (key, "k"); + ASSERT_EQ (value, "v"); + + // values should be between double quotes + input = std::string ("k=v"); + ASSERT_FALSE (Rust::parse_cfg_option (input, key, value)); + + // No value is an error if there is an equal sign + input = std::string ("k="); + ASSERT_FALSE (Rust::parse_cfg_option (input, key, value)); + + // No key is an error + input = std::string ("="); + ASSERT_FALSE (Rust::parse_cfg_option (input, key, value)); + + input = std::string ("=value"); + ASSERT_FALSE (Rust::parse_cfg_option (input, key, value)); + + // values that are not string literals are an error + input = std::string ("key=b\"a\""); + ASSERT_FALSE (Rust::parse_cfg_option (input, key, value)); + + input = std::string ("key='v'"); + ASSERT_FALSE (Rust::parse_cfg_option (input, key, value)); + + input = std::string ("key=155"); + ASSERT_FALSE (Rust::parse_cfg_option (input, key, value)); + + input = std::string ("key=3.14"); + ASSERT_FALSE (Rust::parse_cfg_option (input, key, value)); + + // kebab case is not valid for an identifier + input = std::string ("key-no-value"); + ASSERT_FALSE (Rust::parse_cfg_option (input, key, value)); +} +} // namespace selftest + +#endif // CHECKING_P diff --git a/gcc/rust/parse/rust-cfg-parser.h b/gcc/rust/parse/rust-cfg-parser.h new file mode 100644 index 00000000000..895e058057b --- /dev/null +++ b/gcc/rust/parse/rust-cfg-parser.h @@ -0,0 +1,54 @@ +/* This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#ifndef RUST_CFG_PARSER_H +#define RUST_CFG_PARSER_H + +#include "config.h" +#include "rust-system.h" +#include "coretypes.h" + +namespace Rust { +/** + * Parse a `key` or `key="value"` pair given to the `-frust-cfg` compiler + * option. + * + * The format is as follows: + * + * -frust-cfg= + * + * cfg_input: identifier | identifier '=' '"' identifier '"' + * + * @param input User input given to the -frust-cfg option + * @param key String in which to store the parsed `key`. + * @param value String in which to store the parsed `value` if it exists + * + * @return false if the given input was invalid, true otherwise + */ +bool +parse_cfg_option (std::string &input, std::string &key, std::string &value); +} // namespace Rust + +#if CHECKING_P + +namespace selftest { +extern void +rust_cfg_parser_test (void); +} // namespace selftest + +#endif // CHECKING_P + +#endif // RUST_CFG_PARSER_H diff --git a/gcc/rust/parse/rust-parse-impl.h b/gcc/rust/parse/rust-parse-impl.h new file mode 100644 index 00000000000..d925aca05e9 --- /dev/null +++ b/gcc/rust/parse/rust-parse-impl.h @@ -0,0 +1,14927 @@ +// Copyright (C) 2020-2022 Free Software Foundation, Inc. + +// This file is part of GCC. + +// GCC is free software; you can redistribute it and/or modify it under +// the terms of the GNU General Public License as published by the Free +// Software Foundation; either version 3, or (at your option) any later +// version. + +// GCC is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. + +// You should have received a copy of the GNU General Public License +// along with GCC; see the file COPYING3. If not see +// . + +/* Template implementation for Rust::Parser. Previously in rust-parse.cc (before + * Parser was template). Separated from rust-parse.h for readability. */ + +/* DO NOT INCLUDE ANYWHERE - this is automatically included with rust-parse.h + * This is also the reason why there are no include guards. */ + +#define INCLUDE_ALGORITHM +#include "rust-diagnostics.h" +#include "rust-make-unique.h" + +namespace Rust { +// Left binding powers of operations. +enum binding_powers +{ + // Highest priority + LBP_HIGHEST = 100, + + LBP_PATH = 95, + + LBP_METHOD_CALL = 90, + + LBP_FIELD_EXPR = 85, + + LBP_FUNCTION_CALL = 80, + LBP_ARRAY_REF = LBP_FUNCTION_CALL, + + LBP_QUESTION_MARK = 75, // unary postfix - counts as left + + LBP_UNARY_PLUS = 70, // Used only when the null denotation is + + LBP_UNARY_MINUS = LBP_UNARY_PLUS, // Used only when the null denotation is - + LBP_UNARY_ASTERISK = LBP_UNARY_PLUS, // deref operator - unary prefix + LBP_UNARY_EXCLAM = LBP_UNARY_PLUS, + LBP_UNARY_AMP = LBP_UNARY_PLUS, + LBP_UNARY_AMP_MUT = LBP_UNARY_PLUS, + + LBP_AS = 65, + + LBP_MUL = 60, + LBP_DIV = LBP_MUL, + LBP_MOD = LBP_MUL, + + LBP_PLUS = 55, + LBP_MINUS = LBP_PLUS, + + LBP_L_SHIFT = 50, + LBP_R_SHIFT = LBP_L_SHIFT, + + LBP_AMP = 45, + + LBP_CARET = 40, + + LBP_PIPE = 35, + + LBP_EQUAL = 30, + LBP_NOT_EQUAL = LBP_EQUAL, + LBP_SMALLER_THAN = LBP_EQUAL, + LBP_SMALLER_EQUAL = LBP_EQUAL, + LBP_GREATER_THAN = LBP_EQUAL, + LBP_GREATER_EQUAL = LBP_EQUAL, + + LBP_LOGICAL_AND = 25, + + LBP_LOGICAL_OR = 20, + + LBP_DOT_DOT = 15, + LBP_DOT_DOT_EQ = LBP_DOT_DOT, + + // TODO: note all these assig operators are RIGHT associative! + LBP_ASSIG = 10, + LBP_PLUS_ASSIG = LBP_ASSIG, + LBP_MINUS_ASSIG = LBP_ASSIG, + LBP_MULT_ASSIG = LBP_ASSIG, + LBP_DIV_ASSIG = LBP_ASSIG, + LBP_MOD_ASSIG = LBP_ASSIG, + LBP_AMP_ASSIG = LBP_ASSIG, + LBP_PIPE_ASSIG = LBP_ASSIG, + LBP_CARET_ASSIG = LBP_ASSIG, + LBP_L_SHIFT_ASSIG = LBP_ASSIG, + LBP_R_SHIFT_ASSIG = LBP_ASSIG, + + // return, break, and closures as lowest priority? + LBP_RETURN = 5, + LBP_BREAK = LBP_RETURN, + LBP_CLOSURE = LBP_RETURN, // unary prefix operators + +#if 0 + // rust precedences + PREC_CLOSURE = -40, // used for closures + PREC_JUMP = -30, // used for break, continue, return, and yield + PREC_RANGE = -10, // used for range (although weird comment in rustc about this) + PREC_BINOP = FROM_ASSOC_OP, + // used for binary operators mentioned below - also cast, colon (type), assign, assign_op + PREC_PREFIX = 50, // used for box, address_of, let, unary (again, weird comment on let) + PREC_POSTFIX = 60, // used for await, call, method call, field, index, try, inline asm, macro invocation + PREC_PAREN = 99, // used for array, repeat, tuple, literal, path, paren, if, while, for, 'loop', match, block, try block, async, struct + PREC_FORCE_PAREN = 100, +#endif + + // lowest priority + LBP_LOWEST = 0 +}; + +/* Returns whether the token can start a type (i.e. there is a valid type + * beginning with the token). */ +inline bool +can_tok_start_type (TokenId id) +{ + switch (id) + { + case EXCLAM: + case LEFT_SQUARE: + case LEFT_ANGLE: + case UNDERSCORE: + case ASTERISK: + case AMP: + case LIFETIME: + case IDENTIFIER: + case SUPER: + case SELF: + case SELF_ALIAS: + case CRATE: + case DOLLAR_SIGN: + case SCOPE_RESOLUTION: + case LEFT_PAREN: + case FOR: + case ASYNC: + case CONST: + case UNSAFE: + case EXTERN_TOK: + case FN_TOK: + case IMPL: + case DYN: + case QUESTION_MARK: + return true; + default: + return false; + } +} + +/* Returns whether the token id is (or is likely to be) a right angle bracket. + * i.e. '>', '>>', '>=' and '>>=' tokens. */ +inline bool +is_right_angle_tok (TokenId id) +{ + switch (id) + { + case RIGHT_ANGLE: + case RIGHT_SHIFT: + case GREATER_OR_EQUAL: + case RIGHT_SHIFT_EQ: + return true; + default: + return false; + } +} + +/* HACK-y special handling for skipping a right angle token at the end of + * generic arguments. + * Currently, this replaces the "current token" with one that is identical + * except has the leading '>' removed (e.g. '>>' becomes '>'). This is bad + * for several reasons - it modifies the token stream to something that + * actually doesn't make syntactic sense, it may not worked if the token + * has already been skipped, etc. It was done because it would not + * actually require inserting new items into the token stream (which I + * thought would take more work to not mess up) and because I wasn't sure + * if the "already seen right angle" flag in the parser would work + * correctly. + * Those two other approaches listed are in my opinion actually better + * long-term - insertion is probably best as it reflects syntactically + * what occurs. On the other hand, I need to do a code audit to make sure + * that insertion doesn't mess anything up. So that's a FIXME. */ +template +bool +Parser::skip_generics_right_angle () +{ + /* OK, new great idea. Have a lexer method called + * "split_current_token(TokenType newLeft, TokenType newRight)", which is + * called here with whatever arguments are appropriate. That lexer method + * handles "replacing" the current token with the "newLeft" and "inserting" + * the next token with the "newRight" (and creating a location, etc. for it) + */ + + /* HACK: special handling for right shift '>>', greater or equal '>=', and + * right shift assig */ + // '>>=' + const_TokenPtr tok = lexer.peek_token (); + switch (tok->get_id ()) + { + case RIGHT_ANGLE: + // this is good - skip token + lexer.skip_token (); + return true; + case RIGHT_SHIFT: { + // new implementation that should be better + lexer.split_current_token (RIGHT_ANGLE, RIGHT_ANGLE); + lexer.skip_token (); + return true; + } + case GREATER_OR_EQUAL: { + // new implementation that should be better + lexer.split_current_token (RIGHT_ANGLE, EQUAL); + lexer.skip_token (); + return true; + } + case RIGHT_SHIFT_EQ: { + // new implementation that should be better + lexer.split_current_token (RIGHT_ANGLE, GREATER_OR_EQUAL); + lexer.skip_token (); + return true; + } + default: + add_error (Error (tok->get_locus (), + "expected %<>%> at end of generic argument - found %qs", + tok->get_token_description ())); + return false; + } +} + +/* Gets left binding power for specified token. + * Not suitable for use at the moment or possibly ever because binding power + * cannot be purely determined from operator token with Rust grammar - e.g. + * method call and field access have + * different left binding powers but the same operator token. */ +template +int +Parser::left_binding_power (const_TokenPtr token) +{ + // HACK: called with "peek_token()", so lookahead is "peek_token(1)" + switch (token->get_id ()) + { + /* TODO: issue here - distinguish between method calls and field access + * somehow? Also would have to distinguish between paths and function + * calls (:: operator), maybe more stuff. */ + /* Current plan for tackling LBP - don't do it based on token, use + * lookahead. Or alternatively, only use Pratt parsing for OperatorExpr + * and handle other expressions without it. rustc only considers + * arithmetic, logical/relational, 'as', + * '?=', ranges, colons, and assignment to have operator precedence and + * associativity rules applicable. It then has + * a separate "ExprPrecedence" that also includes binary operators. */ + + // TODO: handle operator overloading - have a function replace the + // operator? + + /*case DOT: + return LBP_DOT;*/ + + case SCOPE_RESOLUTION: + rust_debug ( + "possible error - looked up LBP of scope resolution operator. should " + "be handled elsewhere."); + return LBP_PATH; + + /* Resolved by lookahead HACK that should work with current code. If next + * token is identifier and token after that isn't parenthesised expression + * list, it is a field reference. */ + case DOT: + if (lexer.peek_token (1)->get_id () == IDENTIFIER + && lexer.peek_token (2)->get_id () != LEFT_PAREN) + { + return LBP_FIELD_EXPR; + } + return LBP_METHOD_CALL; + + case LEFT_PAREN: + return LBP_FUNCTION_CALL; + + case LEFT_SQUARE: + return LBP_ARRAY_REF; + + // postfix question mark (i.e. error propagation expression) + case QUESTION_MARK: + return LBP_QUESTION_MARK; + + case AS: + return LBP_AS; + + case ASTERISK: + return LBP_MUL; + case DIV: + return LBP_DIV; + case PERCENT: + return LBP_MOD; + + case PLUS: + return LBP_PLUS; + case MINUS: + return LBP_MINUS; + + case LEFT_SHIFT: + return LBP_L_SHIFT; + case RIGHT_SHIFT: + return LBP_R_SHIFT; + + // binary & operator + case AMP: + return LBP_AMP; + + // binary ^ operator + case CARET: + return LBP_CARET; + + // binary | operator + case PIPE: + return LBP_PIPE; + + case EQUAL_EQUAL: + return LBP_EQUAL; + case NOT_EQUAL: + return LBP_NOT_EQUAL; + case RIGHT_ANGLE: + return LBP_GREATER_THAN; + case GREATER_OR_EQUAL: + return LBP_GREATER_EQUAL; + case LEFT_ANGLE: + return LBP_SMALLER_THAN; + case LESS_OR_EQUAL: + return LBP_SMALLER_EQUAL; + + case LOGICAL_AND: + return LBP_LOGICAL_AND; + + case OR: + return LBP_LOGICAL_OR; + + case DOT_DOT: + return LBP_DOT_DOT; + + case DOT_DOT_EQ: + return LBP_DOT_DOT_EQ; + + case EQUAL: + return LBP_ASSIG; + case PLUS_EQ: + return LBP_PLUS_ASSIG; + case MINUS_EQ: + return LBP_MINUS_ASSIG; + case ASTERISK_EQ: + return LBP_MULT_ASSIG; + case DIV_EQ: + return LBP_DIV_ASSIG; + case PERCENT_EQ: + return LBP_MOD_ASSIG; + case AMP_EQ: + return LBP_AMP_ASSIG; + case PIPE_EQ: + return LBP_PIPE_ASSIG; + case CARET_EQ: + return LBP_CARET_ASSIG; + case LEFT_SHIFT_EQ: + return LBP_L_SHIFT_ASSIG; + case RIGHT_SHIFT_EQ: + return LBP_R_SHIFT_ASSIG; + + /* HACK: float literal due to lexer misidentifying a dot then an integer as + * a float */ + case FLOAT_LITERAL: + return LBP_FIELD_EXPR; + // field expr is same as tuple expr in precedence, i imagine + // TODO: is this needed anymore? lexer shouldn't do that anymore + + // anything that can't appear in an infix position is given lowest priority + default: + return LBP_LOWEST; + } +} + +// Returns true when current token is EOF. +template +bool +Parser::done_end_of_file () +{ + return lexer.peek_token ()->get_id () == END_OF_FILE; +} + +// Parses a sequence of items within a module or the implicit top-level module +// in a crate +template +std::vector> +Parser::parse_items () +{ + std::vector> items; + + const_TokenPtr t = lexer.peek_token (); + while (t->get_id () != END_OF_FILE) + { + std::unique_ptr item = parse_item (false); + if (item == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse item in crate"); + add_error (std::move (error)); + + // TODO: should all items be cleared? + items = std::vector> (); + break; + } + + items.push_back (std::move (item)); + + t = lexer.peek_token (); + } + + return items; +} + +// Parses a crate (compilation unit) - entry point +template +std::unique_ptr +Parser::parse_crate () +{ + // parse inner attributes + AST::AttrVec inner_attrs = parse_inner_attributes (); + + // parse items + std::vector> items = parse_items (); + + // emit all errors + for (const auto &error : error_table) + error.emit_error (); + + return std::unique_ptr ( + new AST::Crate (std::move (items), std::move (inner_attrs))); +} + +// Parse a contiguous block of inner attributes. +template +AST::AttrVec +Parser::parse_inner_attributes () +{ + AST::AttrVec inner_attributes; + + // only try to parse it if it starts with "#!" not only "#" + while ((lexer.peek_token ()->get_id () == HASH + && lexer.peek_token (1)->get_id () == EXCLAM) + || lexer.peek_token ()->get_id () == INNER_DOC_COMMENT) + { + AST::Attribute inner_attr = parse_inner_attribute (); + + /* Ensure only valid inner attributes are added to the inner_attributes + * list */ + if (!inner_attr.is_empty ()) + { + inner_attributes.push_back (std::move (inner_attr)); + } + else + { + /* If no more valid inner attributes, break out of loop (only + * contiguous inner attributes parsed). */ + break; + } + } + + inner_attributes.shrink_to_fit (); + return inner_attributes; +} + +// Parse a inner or outer doc comment into an doc attribute +template +AST::Attribute +Parser::parse_doc_comment () +{ + const_TokenPtr token = lexer.peek_token (); + Location locus = token->get_locus (); + AST::SimplePathSegment segment ("doc", locus); + std::vector segments; + segments.push_back (std::move (segment)); + AST::SimplePath attr_path (std::move (segments), false, locus); + AST::LiteralExpr lit_expr (token->get_str (), AST::Literal::STRING, + PrimitiveCoreType::CORETYPE_STR, {}, locus); + std::unique_ptr attr_input ( + new AST::AttrInputLiteral (std::move (lit_expr))); + lexer.skip_token (); + return AST::Attribute (std::move (attr_path), std::move (attr_input), locus); +} + +// Parse a single inner attribute. +template +AST::Attribute +Parser::parse_inner_attribute () +{ + if (lexer.peek_token ()->get_id () == INNER_DOC_COMMENT) + return parse_doc_comment (); + + if (lexer.peek_token ()->get_id () != HASH) + { + Error error (lexer.peek_token ()->get_locus (), + "BUG: token %<#%> is missing, but % " + "was invoked"); + add_error (std::move (error)); + + return AST::Attribute::create_empty (); + } + lexer.skip_token (); + + if (lexer.peek_token ()->get_id () != EXCLAM) + { + Error error (lexer.peek_token ()->get_locus (), + "expected % or %<[%> for inner attribute"); + add_error (std::move (error)); + + return AST::Attribute::create_empty (); + } + lexer.skip_token (); + + if (!skip_token (LEFT_SQUARE)) + return AST::Attribute::create_empty (); + + AST::Attribute actual_attribute = parse_attribute_body (); + + if (!skip_token (RIGHT_SQUARE)) + return AST::Attribute::create_empty (); + + return actual_attribute; +} + +// Parses the body of an attribute (inner or outer). +template +AST::Attribute +Parser::parse_attribute_body () +{ + Location locus = lexer.peek_token ()->get_locus (); + + AST::SimplePath attr_path = parse_simple_path (); + // ensure path is valid to parse attribute input + if (attr_path.is_empty ()) + { + Error error (lexer.peek_token ()->get_locus (), + "empty simple path in attribute"); + add_error (std::move (error)); + + // Skip past potential further info in attribute (i.e. attr_input) + skip_after_end_attribute (); + return AST::Attribute::create_empty (); + } + + std::unique_ptr attr_input = parse_attr_input (); + // AttrInput is allowed to be null, so no checks here + + return AST::Attribute (std::move (attr_path), std::move (attr_input), locus); +} + +/* Determines whether token is a valid simple path segment. This does not + * include scope resolution operators. */ +inline bool +is_simple_path_segment (TokenId id) +{ + switch (id) + { + case IDENTIFIER: + case SUPER: + case SELF: + case CRATE: + return true; + case DOLLAR_SIGN: + // assume that dollar sign leads to $crate + return true; + default: + return false; + } +} + +// Parses a SimplePath AST node, if it exists. Does nothing otherwise. +template +AST::SimplePath +Parser::parse_simple_path () +{ + bool has_opening_scope_resolution = false; + Location locus = Linemap::unknown_location (); + + // don't parse anything if not a path upfront + if (!is_simple_path_segment (lexer.peek_token ()->get_id ()) + && !is_simple_path_segment (lexer.peek_token (1)->get_id ())) + return AST::SimplePath::create_empty (); + + /* Checks for opening scope resolution (i.e. global scope fully-qualified + * path) */ + if (lexer.peek_token ()->get_id () == SCOPE_RESOLUTION) + { + has_opening_scope_resolution = true; + + locus = lexer.peek_token ()->get_locus (); + + lexer.skip_token (); + } + + // Parse single required simple path segment + AST::SimplePathSegment segment = parse_simple_path_segment (); + + // get location if not gotten already + if (locus == Linemap::unknown_location ()) + locus = segment.get_locus (); + + std::vector segments; + + // Return empty vector if first, actually required segment is an error + if (segment.is_error ()) + return AST::SimplePath::create_empty (); + + segments.push_back (std::move (segment)); + + // Parse all other simple path segments + while (lexer.peek_token ()->get_id () == SCOPE_RESOLUTION) + { + // Skip scope resolution operator + lexer.skip_token (); + + AST::SimplePathSegment new_segment = parse_simple_path_segment (); + + // Return path as currently constructed if segment in error state. + if (new_segment.is_error ()) + break; + + segments.push_back (std::move (new_segment)); + } + + // DEBUG: check for any empty segments + for (const auto &seg : segments) + { + if (seg.is_error ()) + { + rust_debug ( + "when parsing simple path, somehow empty path segment was " + "not filtered out. Path begins with '%s'", + segments.at (0).as_string ().c_str ()); + } + } + + return AST::SimplePath (std::move (segments), has_opening_scope_resolution, + locus); + /* TODO: now that is_simple_path_segment exists, could probably start + * actually making errors upon parse failure of segments and whatever */ +} + +/* Parses a single SimplePathSegment (does not handle the scope resolution + * operators) */ +template +AST::SimplePathSegment +Parser::parse_simple_path_segment () +{ + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case IDENTIFIER: + lexer.skip_token (); + + return AST::SimplePathSegment (t->get_str (), t->get_locus ()); + case SUPER: + lexer.skip_token (); + + return AST::SimplePathSegment ("super", t->get_locus ()); + case SELF: + lexer.skip_token (); + + return AST::SimplePathSegment ("self", t->get_locus ()); + case CRATE: + lexer.skip_token (); + + return AST::SimplePathSegment ("crate", t->get_locus ()); + case DOLLAR_SIGN: + if (lexer.peek_token (1)->get_id () == CRATE) + { + lexer.skip_token (1); + + return AST::SimplePathSegment ("$crate", t->get_locus ()); + } + gcc_fallthrough (); + default: + // do nothing but inactivates warning from gcc when compiling + /* could put the rust_error_at thing here but fallthrough (from failing + * $crate condition) isn't completely obvious if it is. */ + + // test prevent error + return AST::SimplePathSegment::create_error (); + } + gcc_unreachable (); + /*rust_error_at( + t->get_locus(), "invalid token '%s' in simple path segment", + t->get_token_description());*/ + // this is not necessarily an error, e.g. end of path + // return AST::SimplePathSegment::create_error(); +} + +// Parses a PathIdentSegment - an identifier segment of a non-SimplePath path. +template +AST::PathIdentSegment +Parser::parse_path_ident_segment () +{ + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case IDENTIFIER: + lexer.skip_token (); + + return AST::PathIdentSegment (t->get_str (), t->get_locus ()); + case SUPER: + lexer.skip_token (); + + return AST::PathIdentSegment ("super", t->get_locus ()); + case SELF: + lexer.skip_token (); + + return AST::PathIdentSegment ("self", t->get_locus ()); + case SELF_ALIAS: + lexer.skip_token (); + + return AST::PathIdentSegment ("Self", t->get_locus ()); + case CRATE: + lexer.skip_token (); + + return AST::PathIdentSegment ("crate", t->get_locus ()); + case DOLLAR_SIGN: + if (lexer.peek_token (1)->get_id () == CRATE) + { + lexer.skip_token (1); + + return AST::PathIdentSegment ("$crate", t->get_locus ()); + } + gcc_fallthrough (); + default: + /* do nothing but inactivates warning from gcc when compiling + * could put the error_at thing here but fallthrough (from failing $crate + * condition) isn't completely obvious if it is. */ + + // test prevent error + return AST::PathIdentSegment::create_error (); + } + gcc_unreachable (); + // not necessarily an error +} + +// Parses an AttrInput AST node (polymorphic, as AttrInput is abstract) +template +std::unique_ptr +Parser::parse_attr_input () +{ + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case LEFT_PAREN: + case LEFT_SQUARE: + case LEFT_CURLY: { + // must be a delimited token tree, so parse that + std::unique_ptr input_tree ( + new AST::DelimTokenTree (parse_delim_token_tree ())); + + // TODO: potential checks on DelimTokenTree before returning + + return input_tree; + } + case EQUAL: { + // = LiteralExpr + lexer.skip_token (); + + t = lexer.peek_token (); + + /* Ensure token is a "literal expression" (literally only a literal + * token of any type) */ + if (!t->is_literal ()) + { + Error error ( + t->get_locus (), + "unknown token %qs in attribute body - literal expected", + t->get_token_description ()); + add_error (std::move (error)); + + skip_after_end_attribute (); + return nullptr; + } + + AST::Literal::LitType lit_type = AST::Literal::STRING; + // Crappy mapping of token type to literal type + switch (t->get_id ()) + { + case INT_LITERAL: + lit_type = AST::Literal::INT; + break; + case FLOAT_LITERAL: + lit_type = AST::Literal::FLOAT; + break; + case CHAR_LITERAL: + lit_type = AST::Literal::CHAR; + break; + case BYTE_CHAR_LITERAL: + lit_type = AST::Literal::BYTE; + break; + case BYTE_STRING_LITERAL: + lit_type = AST::Literal::BYTE_STRING; + break; + case STRING_LITERAL: + default: + lit_type = AST::Literal::STRING; + break; // TODO: raw string? don't eliminate it from lexer? + } + + // create actual LiteralExpr + AST::LiteralExpr lit_expr (t->get_str (), lit_type, t->get_type_hint (), + {}, t->get_locus ()); + lexer.skip_token (); + + std::unique_ptr attr_input_lit ( + new AST::AttrInputLiteral (std::move (lit_expr))); + + // do checks or whatever? none required, really + + // FIXME: shouldn't a skip token be required here? + + return attr_input_lit; + } + break; + case RIGHT_SQUARE: + // means AttrInput is missing, which is allowed + return nullptr; + default: + add_error ( + Error (t->get_locus (), + "unknown token %qs in attribute body - attribute input or " + "none expected", + t->get_token_description ())); + + skip_after_end_attribute (); + return nullptr; + } + gcc_unreachable (); + // TODO: find out how to stop gcc error on "no return value" +} + +/* Returns true if the token id matches the delimiter type. Note that this only + * operates for END delimiter tokens. */ +inline bool +token_id_matches_delims (TokenId token_id, AST::DelimType delim_type) +{ + return ((token_id == RIGHT_PAREN && delim_type == AST::PARENS) + || (token_id == RIGHT_SQUARE && delim_type == AST::SQUARE) + || (token_id == RIGHT_CURLY && delim_type == AST::CURLY)); +} + +/* Returns true if the likely result of parsing the next few tokens is a path. + * Not guaranteed, though, especially in the case of syntax errors. */ +inline bool +is_likely_path_next (TokenId next_token_id) +{ + switch (next_token_id) + { + case IDENTIFIER: + case SUPER: + case SELF: + case SELF_ALIAS: + case CRATE: + // maybe - maybe do extra check. But then requires another TokenId. + case DOLLAR_SIGN: + case SCOPE_RESOLUTION: + return true; + default: + return false; + } +} + +// Parses a delimited token tree +template +AST::DelimTokenTree +Parser::parse_delim_token_tree () +{ + const_TokenPtr t = lexer.peek_token (); + lexer.skip_token (); + Location initial_loc = t->get_locus (); + + // save delim type to ensure it is reused later + AST::DelimType delim_type = AST::PARENS; + + // Map tokens to DelimType + switch (t->get_id ()) + { + case LEFT_PAREN: + delim_type = AST::PARENS; + break; + case LEFT_SQUARE: + delim_type = AST::SQUARE; + break; + case LEFT_CURLY: + delim_type = AST::CURLY; + break; + default: + add_error (Error (t->get_locus (), + "unexpected token %qs - expecting delimiters (for a " + "delimited token tree)", + t->get_token_description ())); + + return AST::DelimTokenTree::create_empty (); + } + + // parse actual token tree vector - 0 or more + std::vector> token_trees_in_tree; + auto delim_open + = std::unique_ptr (new AST::Token (std::move (t))); + token_trees_in_tree.push_back (std::move (delim_open)); + + // repeat loop until finding the matching delimiter + t = lexer.peek_token (); + while (!token_id_matches_delims (t->get_id (), delim_type) + && t->get_id () != END_OF_FILE) + { + std::unique_ptr tok_tree = parse_token_tree (); + + if (tok_tree == nullptr) + { + // TODO: is this error handling appropriate? + Error error ( + t->get_locus (), + "failed to parse token tree in delimited token tree - found %qs", + t->get_token_description ()); + add_error (std::move (error)); + + return AST::DelimTokenTree::create_empty (); + } + + token_trees_in_tree.push_back (std::move (tok_tree)); + + // lexer.skip_token(); + t = lexer.peek_token (); + } + auto delim_close + = std::unique_ptr (new AST::Token (std::move (t))); + token_trees_in_tree.push_back (std::move (delim_close)); + + AST::DelimTokenTree token_tree (delim_type, std::move (token_trees_in_tree), + initial_loc); + + // parse end delimiters + t = lexer.peek_token (); + + if (token_id_matches_delims (t->get_id (), delim_type)) + { + // tokens match opening delimiter, so skip. + lexer.skip_token (); + + // DEBUG + rust_debug ("finished parsing new delim token tree - peeked token is now " + "'%s' while t is '%s'", + lexer.peek_token ()->get_token_description (), + t->get_token_description ()); + + return token_tree; + } + else + { + // tokens don't match opening delimiters, so produce error + Error error (t->get_locus (), + "unexpected token %qs - expecting closing delimiter %qs " + "(for a delimited token tree)", + t->get_token_description (), + (delim_type == AST::PARENS + ? ")" + : (delim_type == AST::SQUARE ? "]" : "}"))); + add_error (std::move (error)); + + /* return empty token tree despite possibly parsing valid token tree - + * TODO is this a good idea? */ + return AST::DelimTokenTree::create_empty (); + } +} + +/* Parses a TokenTree syntactical production. This is either a delimited token + * tree or a non-delimiter token. */ +template +std::unique_ptr +Parser::parse_token_tree () +{ + const_TokenPtr t = lexer.peek_token (); + + switch (t->get_id ()) + { + case LEFT_PAREN: + case LEFT_SQUARE: + case LEFT_CURLY: + // Parse delimited token tree + // TODO: use move rather than copy constructor + return std::unique_ptr ( + new AST::DelimTokenTree (parse_delim_token_tree ())); + case RIGHT_PAREN: + case RIGHT_SQUARE: + case RIGHT_CURLY: + // error - should not be called when this a token + add_error ( + Error (t->get_locus (), + "unexpected closing delimiter %qs - token tree requires " + "either paired delimiters or non-delimiter tokens", + t->get_token_description ())); + + lexer.skip_token (); + return nullptr; + default: + // parse token itself as TokenTree + lexer.skip_token (); + return std::unique_ptr (new AST::Token (std::move (t))); + } +} + +// Parses a single item +template +std::unique_ptr +Parser::parse_item (bool called_from_statement) +{ + // has a "called_from_statement" parameter for better error message handling + + // parse outer attributes for item + AST::AttrVec outer_attrs = parse_outer_attributes (); + + // TODO: decide how to deal with VisItem vs MacroItem dichotomy + /* best current solution: catch all keywords that would imply a VisItem in a + * switch and have MacroItem as a last resort */ + + const_TokenPtr t = lexer.peek_token (); + + switch (t->get_id ()) + { + case END_OF_FILE: + // not necessarily an error, unless we just read outer + // attributes which needs to be attached + if (!outer_attrs.empty ()) + { + Rust::AST::Attribute attr = outer_attrs.back (); + Error error (attr.get_locus (), + "expected item after outer attribute or doc comment"); + add_error (std::move (error)); + } + return nullptr; + case PUB: + case MOD: + case EXTERN_TOK: + case USE: + case FN_TOK: + case TYPE: + case STRUCT_TOK: + case ENUM_TOK: + case CONST: + case STATIC_TOK: + case TRAIT: + case IMPL: + /* TODO: implement union keyword but not really because of + * context-dependence crappy hack way to parse a union written below to + * separate it from the good code. */ + // case UNION: + case UNSAFE: // maybe - unsafe traits are a thing + // if any of these (should be all possible VisItem prefixes), parse a + // VisItem + return parse_vis_item (std::move (outer_attrs)); + break; + case SUPER: + case SELF: + case CRATE: + case DOLLAR_SIGN: + // almost certainly macro invocation semi + return parse_macro_item (std::move (outer_attrs)); + break; + // crappy hack to do union "keyword" + case IDENTIFIER: + // TODO: ensure std::string and literal comparison works + if (t->get_str () == "union" + && lexer.peek_token (1)->get_id () == IDENTIFIER) + { + return parse_vis_item (std::move (outer_attrs)); + // or should this go straight to parsing union? + } + else if (t->get_str () == "macro_rules") + { + // macro_rules! macro item + return parse_macro_item (std::move (outer_attrs)); + } + else if (lexer.peek_token (1)->get_id () == SCOPE_RESOLUTION + || lexer.peek_token (1)->get_id () == EXCLAM) + { + /* path (probably) or macro invocation, so probably a macro invocation + * semi */ + return parse_macro_item (std::move (outer_attrs)); + } + gcc_fallthrough (); + default: + // otherwise unrecognised + // return parse_macro_item(std::move(outer_attrs)); + add_error (Error (t->get_locus (), + "unrecognised token %qs for start of %s", + t->get_token_description (), + called_from_statement ? "statement" : "item")); + + // skip somewhere? + return nullptr; + break; + } +} + +// Parses a contiguous block of outer attributes. +template +AST::AttrVec +Parser::parse_outer_attributes () +{ + AST::AttrVec outer_attributes; + + while (lexer.peek_token ()->get_id () + == HASH /* Can also be #!, which catches errors. */ + || lexer.peek_token ()->get_id () == OUTER_DOC_COMMENT + || lexer.peek_token ()->get_id () + == INNER_DOC_COMMENT) /* For error handling. */ + { + AST::Attribute outer_attr = parse_outer_attribute (); + + /* Ensure only valid outer attributes are added to the outer_attributes + * list */ + if (!outer_attr.is_empty ()) + { + outer_attributes.push_back (std::move (outer_attr)); + } + else + { + /* If no more valid outer attributes, break out of loop (only + * contiguous outer attributes parsed). */ + break; + } + } + + outer_attributes.shrink_to_fit (); + return outer_attributes; + + /* TODO: this shares basically all code with parse_inner_attributes except + * function call - find way of making it more modular? function pointer? */ +} + +// Parse a single outer attribute. +template +AST::Attribute +Parser::parse_outer_attribute () +{ + if (lexer.peek_token ()->get_id () == OUTER_DOC_COMMENT) + return parse_doc_comment (); + + if (lexer.peek_token ()->get_id () == INNER_DOC_COMMENT) + { + Error error ( + lexer.peek_token ()->get_locus (), + "inner doc (% or %) only allowed at start of item " + "and before any outer attribute or doc (%<#[%>, % or %)"); + add_error (std::move (error)); + lexer.skip_token (); + return AST::Attribute::create_empty (); + } + + /* OuterAttribute -> '#' '[' Attr ']' */ + + if (lexer.peek_token ()->get_id () != HASH) + return AST::Attribute::create_empty (); + + lexer.skip_token (); + + TokenId id = lexer.peek_token ()->get_id (); + if (id != LEFT_SQUARE) + { + if (id == EXCLAM) + { + // this is inner attribute syntax, so throw error + // inner attributes were either already parsed or not allowed here. + Error error ( + lexer.peek_token ()->get_locus (), + "token % found, indicating inner attribute definition. Inner " + "attributes are not possible at this location"); + add_error (std::move (error)); + } + return AST::Attribute::create_empty (); + } + + lexer.skip_token (); + + AST::Attribute actual_attribute = parse_attribute_body (); + + if (lexer.peek_token ()->get_id () != RIGHT_SQUARE) + return AST::Attribute::create_empty (); + + lexer.skip_token (); + + return actual_attribute; +} + +// Parses a VisItem (item that can have non-default visibility). +template +std::unique_ptr +Parser::parse_vis_item (AST::AttrVec outer_attrs) +{ + // parse visibility, which may or may not exist + AST::Visibility vis = parse_visibility (); + + // select VisItem to create depending on keyword + const_TokenPtr t = lexer.peek_token (); + + switch (t->get_id ()) + { + case MOD: + return parse_module (std::move (vis), std::move (outer_attrs)); + case EXTERN_TOK: + // lookahead to resolve syntactical production + t = lexer.peek_token (1); + + switch (t->get_id ()) + { + case CRATE: + return parse_extern_crate (std::move (vis), std::move (outer_attrs)); + case FN_TOK: // extern function + return parse_function (std::move (vis), std::move (outer_attrs)); + case LEFT_CURLY: // extern block + return parse_extern_block (std::move (vis), std::move (outer_attrs)); + case STRING_LITERAL: // for specifying extern ABI + // could be extern block or extern function, so more lookahead + t = lexer.peek_token (2); + + switch (t->get_id ()) + { + case FN_TOK: + return parse_function (std::move (vis), std::move (outer_attrs)); + case LEFT_CURLY: + return parse_extern_block (std::move (vis), + std::move (outer_attrs)); + default: + add_error ( + Error (t->get_locus (), + "unexpected token %qs in some sort of extern production", + t->get_token_description ())); + + lexer.skip_token (2); // TODO: is this right thing to do? + return nullptr; + } + default: + add_error ( + Error (t->get_locus (), + "unexpected token %qs in some sort of extern production", + t->get_token_description ())); + + lexer.skip_token (1); // TODO: is this right thing to do? + return nullptr; + } + case USE: + return parse_use_decl (std::move (vis), std::move (outer_attrs)); + case FN_TOK: + return parse_function (std::move (vis), std::move (outer_attrs)); + case TYPE: + return parse_type_alias (std::move (vis), std::move (outer_attrs)); + case STRUCT_TOK: + return parse_struct (std::move (vis), std::move (outer_attrs)); + case ENUM_TOK: + return parse_enum (std::move (vis), std::move (outer_attrs)); + // TODO: implement union keyword but not really because of + // context-dependence case UNION: crappy hack to do union "keyword" + case IDENTIFIER: + if (t->get_str () == "union" + && lexer.peek_token (1)->get_id () == IDENTIFIER) + { + return parse_union (std::move (vis), std::move (outer_attrs)); + // or should item switch go straight to parsing union? + } + else + { + break; + } + case CONST: + // lookahead to resolve syntactical production + t = lexer.peek_token (1); + + switch (t->get_id ()) + { + case IDENTIFIER: + case UNDERSCORE: + return parse_const_item (std::move (vis), std::move (outer_attrs)); + case UNSAFE: + case EXTERN_TOK: + case FN_TOK: + return parse_function (std::move (vis), std::move (outer_attrs)); + default: + add_error ( + Error (t->get_locus (), + "unexpected token %qs in some sort of const production", + t->get_token_description ())); + + lexer.skip_token (1); // TODO: is this right thing to do? + return nullptr; + } + case STATIC_TOK: + return parse_static_item (std::move (vis), std::move (outer_attrs)); + case TRAIT: + return parse_trait (std::move (vis), std::move (outer_attrs)); + case IMPL: + return parse_impl (std::move (vis), std::move (outer_attrs)); + case UNSAFE: // unsafe traits, unsafe functions, unsafe impls (trait impls), + // lookahead to resolve syntactical production + t = lexer.peek_token (1); + + switch (t->get_id ()) + { + case TRAIT: + return parse_trait (std::move (vis), std::move (outer_attrs)); + case EXTERN_TOK: + case FN_TOK: + return parse_function (std::move (vis), std::move (outer_attrs)); + case IMPL: + return parse_impl (std::move (vis), std::move (outer_attrs)); + default: + add_error ( + Error (t->get_locus (), + "unexpected token %qs in some sort of unsafe production", + t->get_token_description ())); + + lexer.skip_token (1); // TODO: is this right thing to do? + return nullptr; + } + default: + // otherwise vis item clearly doesn't exist, which is not an error + // has a catch-all post-switch return to allow other breaks to occur + break; + } + return nullptr; +} + +// Parses a MacroItem (either a MacroInvocationSemi or MacroRulesDefinition). +template +std::unique_ptr +Parser::parse_macro_item (AST::AttrVec outer_attrs) +{ + const_TokenPtr t = lexer.peek_token (); + + /* dodgy way of detecting macro due to weird context-dependence thing. + * probably can be improved */ + // TODO: ensure that string compare works properly + if (t->get_id () == IDENTIFIER && t->get_str () == "macro_rules") + { + return parse_macro_rules_def (std::move (outer_attrs)); + } + else + { + // DEBUG: TODO: remove + rust_debug ( + "DEBUG - parse_macro_item called and token is not macro_rules"); + if (t->get_id () == IDENTIFIER) + { + rust_debug ("just add to last error: token is not macro_rules and is " + "instead '%s'", + t->get_str ().c_str ()); + } + else + { + rust_debug ("just add to last error: token is not macro_rules and is " + "not an identifier either - it is '%s'", + t->get_token_description ()); + } + + return parse_macro_invocation_semi (std::move (outer_attrs)); + } +} + +// Parses a macro rules definition syntax extension whatever thing. +template +std::unique_ptr +Parser::parse_macro_rules_def (AST::AttrVec outer_attrs) +{ + // ensure that first token is identifier saying "macro_rules" + const_TokenPtr t = lexer.peek_token (); + if (t->get_id () != IDENTIFIER || t->get_str () != "macro_rules") + { + Error error ( + t->get_locus (), + "macro rules definition does not start with %"); + add_error (std::move (error)); + + // skip after somewhere? + return nullptr; + } + lexer.skip_token (); + Location macro_locus = t->get_locus (); + + if (!skip_token (EXCLAM)) + { + // skip after somewhere? + return nullptr; + } + + // parse macro name + const_TokenPtr ident_tok = expect_token (IDENTIFIER); + if (ident_tok == nullptr) + { + return nullptr; + } + Identifier rule_name = ident_tok->get_str (); + + // DEBUG + rust_debug ("in macro rules def, about to parse parens."); + + // save delim type to ensure it is reused later + AST::DelimType delim_type = AST::PARENS; + + // Map tokens to DelimType + t = lexer.peek_token (); + switch (t->get_id ()) + { + case LEFT_PAREN: + delim_type = AST::PARENS; + break; + case LEFT_SQUARE: + delim_type = AST::SQUARE; + break; + case LEFT_CURLY: + delim_type = AST::CURLY; + break; + default: + add_error (Error (t->get_locus (), + "unexpected token %qs - expecting delimiters (for a " + "macro rules definition)", + t->get_token_description ())); + + return nullptr; + } + lexer.skip_token (); + + // parse actual macro rules + std::vector macro_rules; + + // must be at least one macro rule, so parse it + AST::MacroRule initial_rule = parse_macro_rule (); + if (initial_rule.is_error ()) + { + Error error (lexer.peek_token ()->get_locus (), + "required first macro rule in macro rules definition " + "could not be parsed"); + add_error (std::move (error)); + + // skip after somewhere? + return nullptr; + } + macro_rules.push_back (std::move (initial_rule)); + + // DEBUG + rust_debug ("successfully pushed back initial macro rule"); + + t = lexer.peek_token (); + // parse macro rules + while (t->get_id () == SEMICOLON) + { + // skip semicolon + lexer.skip_token (); + + // don't parse if end of macro rules + if (token_id_matches_delims (lexer.peek_token ()->get_id (), delim_type)) + { + // DEBUG + rust_debug ( + "broke out of parsing macro rules loop due to finding delim"); + + break; + } + + // try to parse next rule + AST::MacroRule rule = parse_macro_rule (); + if (rule.is_error ()) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse macro rule in macro rules definition"); + add_error (std::move (error)); + + return nullptr; + } + + macro_rules.push_back (std::move (rule)); + + // DEBUG + rust_debug ("successfully pushed back another macro rule"); + + t = lexer.peek_token (); + } + + // parse end delimiters + t = lexer.peek_token (); + if (token_id_matches_delims (t->get_id (), delim_type)) + { + // tokens match opening delimiter, so skip. + lexer.skip_token (); + + if (delim_type != AST::CURLY) + { + // skip semicolon at end of non-curly macro definitions + if (!skip_token (SEMICOLON)) + { + // as this is the end, allow recovery (probably) - may change + return std::unique_ptr ( + new AST::MacroRulesDefinition ( + std::move (rule_name), delim_type, std::move (macro_rules), + std::move (outer_attrs), macro_locus)); + } + } + + return std::unique_ptr ( + new AST::MacroRulesDefinition (std::move (rule_name), delim_type, + std::move (macro_rules), + std::move (outer_attrs), macro_locus)); + } + else + { + // tokens don't match opening delimiters, so produce error + Error error (t->get_locus (), + "unexpected token %qs - expecting closing delimiter %qs " + "(for a macro rules definition)", + t->get_token_description (), + (delim_type == AST::PARENS + ? ")" + : (delim_type == AST::SQUARE ? "]" : "}"))); + add_error (std::move (error)); + + /* return empty macro definiton despite possibly parsing mostly valid one + * - TODO is this a good idea? */ + return nullptr; + } +} + +// Parses a semi-coloned (except for full block) macro invocation item. +template +std::unique_ptr +Parser::parse_macro_invocation_semi ( + AST::AttrVec outer_attrs) +{ + Location macro_locus = lexer.peek_token ()->get_locus (); + AST::SimplePath path = parse_simple_path (); + + if (!skip_token (EXCLAM)) + { + // skip after somewhere? + return nullptr; + } + + // save delim type to ensure it is reused later + AST::DelimType delim_type = AST::PARENS; + + // Map tokens to DelimType + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case LEFT_PAREN: + delim_type = AST::PARENS; + break; + case LEFT_SQUARE: + delim_type = AST::SQUARE; + break; + case LEFT_CURLY: + delim_type = AST::CURLY; + break; + default: + add_error (Error (t->get_locus (), + "unexpected token %qs - expecting delimiters (for a " + "macro invocation semi body)", + t->get_token_description ())); + + return nullptr; + } + Location tok_tree_locus = t->get_locus (); + lexer.skip_token (); + + // parse actual token trees + std::vector> token_trees; + auto delim_open + = std::unique_ptr (new AST::Token (std::move (t))); + token_trees.push_back (std::move (delim_open)); + + t = lexer.peek_token (); + // parse token trees until the initial delimiter token is found again + while (!token_id_matches_delims (t->get_id (), delim_type)) + { + std::unique_ptr tree = parse_token_tree (); + + if (tree == nullptr) + { + Error error (t->get_locus (), + "failed to parse token tree for macro invocation semi " + "- found %qs", + t->get_token_description ()); + add_error (std::move (error)); + + return nullptr; + } + + token_trees.push_back (std::move (tree)); + + t = lexer.peek_token (); + } + auto delim_close + = std::unique_ptr (new AST::Token (std::move (t))); + token_trees.push_back (std::move (delim_close)); + + AST::DelimTokenTree delim_tok_tree (delim_type, std::move (token_trees), + tok_tree_locus); + AST::MacroInvocData invoc_data (std::move (path), std::move (delim_tok_tree)); + + // parse end delimiters + t = lexer.peek_token (); + if (token_id_matches_delims (t->get_id (), delim_type)) + { + // tokens match opening delimiter, so skip. + lexer.skip_token (); + + if (delim_type != AST::CURLY) + { + // skip semicolon at end of non-curly macro invocation semis + if (!skip_token (SEMICOLON)) + { + // as this is the end, allow recovery (probably) - may change + + return std::unique_ptr ( + new AST::MacroInvocation (std::move (invoc_data), + std::move (outer_attrs), macro_locus, + true)); + } + } + + // DEBUG: + rust_debug ("skipped token is '%s', next token (current peek) is '%s'", + t->get_token_description (), + lexer.peek_token ()->get_token_description ()); + + return std::unique_ptr ( + new AST::MacroInvocation (std::move (invoc_data), + std::move (outer_attrs), macro_locus, true)); + } + else + { + // tokens don't match opening delimiters, so produce error + Error error (t->get_locus (), + "unexpected token %qs - expecting closing delimiter %qs " + "(for a macro invocation semi)", + t->get_token_description (), + (delim_type == AST::PARENS + ? ")" + : (delim_type == AST::SQUARE ? "]" : "}"))); + add_error (std::move (error)); + + /* return empty macro invocation despite possibly parsing mostly valid one + * - TODO is this a good idea? */ + return nullptr; + } +} + +// Parses a non-semicoloned macro invocation (i.e. as pattern or expression). +template +std::unique_ptr +Parser::parse_macro_invocation (AST::AttrVec outer_attrs) +{ + // parse macro path + AST::SimplePath macro_path = parse_simple_path (); + if (macro_path.is_empty ()) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse macro invocation path"); + add_error (std::move (error)); + + // skip? + return nullptr; + } + + if (!skip_token (EXCLAM)) + { + // skip after somewhere? + return nullptr; + } + + // parse internal delim token tree + AST::DelimTokenTree delim_tok_tree = parse_delim_token_tree (); + + Location macro_locus = macro_path.get_locus (); + + return std::unique_ptr ( + new AST::MacroInvocation (AST::MacroInvocData (std::move (macro_path), + std::move (delim_tok_tree)), + std::move (outer_attrs), macro_locus)); +} + +// Parses a macro rule definition - does not parse semicolons. +template +AST::MacroRule +Parser::parse_macro_rule () +{ + Location locus = lexer.peek_token ()->get_locus (); + + // parse macro matcher + AST::MacroMatcher matcher = parse_macro_matcher (); + + if (matcher.is_error ()) + return AST::MacroRule::create_error (locus); + + if (!skip_token (MATCH_ARROW)) + { + // skip after somewhere? + return AST::MacroRule::create_error (locus); + } + + // parse transcriber (this is just a delim token tree) + Location token_tree_loc = lexer.peek_token ()->get_locus (); + AST::MacroTranscriber transcriber (parse_delim_token_tree (), token_tree_loc); + + return AST::MacroRule (std::move (matcher), std::move (transcriber), locus); +} + +// Parses a macro matcher (part of a macro rule definition). +template +AST::MacroMatcher +Parser::parse_macro_matcher () +{ + // save delim type to ensure it is reused later + AST::DelimType delim_type = AST::PARENS; + + // DEBUG + rust_debug ("begun parsing macro matcher"); + + // Map tokens to DelimType + const_TokenPtr t = lexer.peek_token (); + Location locus = t->get_locus (); + switch (t->get_id ()) + { + case LEFT_PAREN: + delim_type = AST::PARENS; + break; + case LEFT_SQUARE: + delim_type = AST::SQUARE; + break; + case LEFT_CURLY: + delim_type = AST::CURLY; + break; + default: + add_error (Error ( + t->get_locus (), + "unexpected token %qs - expecting delimiters (for a macro matcher)", + t->get_token_description ())); + + return AST::MacroMatcher::create_error (t->get_locus ()); + } + lexer.skip_token (); + + // parse actual macro matches + std::vector> matches; + // Set of possible preceding macro matches to make sure follow-set + // restrictions are respected. + // TODO: Consider using std::reference_wrapper instead of raw pointers? + std::vector last_matches; + + t = lexer.peek_token (); + // parse token trees until the initial delimiter token is found again + while (!token_id_matches_delims (t->get_id (), delim_type)) + { + std::unique_ptr match = parse_macro_match (); + + if (match == nullptr) + { + Error error ( + t->get_locus (), + "failed to parse macro match for macro matcher - found %qs", + t->get_token_description ()); + add_error (std::move (error)); + + return AST::MacroMatcher::create_error (t->get_locus ()); + } + + if (matches.size () > 0) + { + const auto *last_match = matches.back ().get (); + + // We want to check if we are dealing with a zeroable repetition + bool zeroable = false; + if (last_match->get_macro_match_type () + == AST::MacroMatch::MacroMatchType::Repetition) + { + auto repetition + = static_cast (last_match); + + if (repetition->get_op () + != AST::MacroMatchRepetition::MacroRepOp::ONE_OR_MORE) + zeroable = true; + } + + if (!zeroable) + last_matches.clear (); + + last_matches.emplace_back (last_match); + + for (auto last : last_matches) + if (!is_match_compatible (*last, *match)) + return AST::MacroMatcher::create_error ( + match->get_match_locus ()); + } + + matches.push_back (std::move (match)); + + // DEBUG + rust_debug ("pushed back a match in macro matcher"); + + t = lexer.peek_token (); + } + + // parse end delimiters + t = lexer.peek_token (); + if (token_id_matches_delims (t->get_id (), delim_type)) + { + // tokens match opening delimiter, so skip. + lexer.skip_token (); + + return AST::MacroMatcher (delim_type, std::move (matches), locus); + } + else + { + // tokens don't match opening delimiters, so produce error + Error error (t->get_locus (), + "unexpected token %qs - expecting closing delimiter %qs " + "(for a macro matcher)", + t->get_token_description (), + (delim_type == AST::PARENS + ? ")" + : (delim_type == AST::SQUARE ? "]" : "}"))); + add_error (std::move (error)); + + /* return error macro matcher despite possibly parsing mostly correct one? + * TODO is this the best idea? */ + return AST::MacroMatcher::create_error (t->get_locus ()); + } +} + +// Parses a macro match (syntax match inside a matcher in a macro rule). +template +std::unique_ptr +Parser::parse_macro_match () +{ + // branch based on token available + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case LEFT_PAREN: + case LEFT_SQUARE: + case LEFT_CURLY: { + // must be macro matcher as delimited + AST::MacroMatcher matcher = parse_macro_matcher (); + if (matcher.is_error ()) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse macro matcher in macro match"); + add_error (std::move (error)); + + return nullptr; + } + return std::unique_ptr ( + new AST::MacroMatcher (std::move (matcher))); + } + case DOLLAR_SIGN: { + // have to do more lookahead to determine if fragment or repetition + const_TokenPtr t2 = lexer.peek_token (1); + switch (t2->get_id ()) + { + case ABSTRACT: + case AS: + case ASYNC: + case BECOME: + case BOX: + case BREAK: + case CONST: + case CONTINUE: + case CRATE: + case DO: + case DYN: + case ELSE: + case ENUM_TOK: + case EXTERN_TOK: + case FALSE_LITERAL: + case FINAL_TOK: + case FN_TOK: + case FOR: + case IF: + case IMPL: + case IN: + case LET: + case LOOP: + case MACRO: + case MATCH_TOK: + case MOD: + case MOVE: + case MUT: + case OVERRIDE_TOK: + case PRIV: + case PUB: + case REF: + case RETURN_TOK: + case SELF_ALIAS: + case SELF: + case STATIC_TOK: + case STRUCT_TOK: + case SUPER: + case TRAIT: + case TRUE_LITERAL: + case TRY: + case TYPE: + case TYPEOF: + case UNSAFE: + case UNSIZED: + case USE: + case VIRTUAL: + case WHERE: + case WHILE: + case YIELD: + case IDENTIFIER: + // macro fragment + return parse_macro_match_fragment (); + case LEFT_PAREN: + // macro repetition + return parse_macro_match_repetition (); + default: + // error: unrecognised + add_error ( + Error (t2->get_locus (), + "unrecognised token combination %<$%s%> at start of " + "macro match - did you mean %<$identifier%> or %<$(%>?", + t2->get_token_description ())); + + // skip somewhere? + return nullptr; + } + } + case RIGHT_PAREN: + case RIGHT_SQUARE: + case RIGHT_CURLY: + // not allowed + add_error (Error ( + t->get_locus (), + "closing delimiters like %qs are not allowed at the start of a macro " + "match", + t->get_token_description ())); + + // skip somewhere? + return nullptr; + default: + // just the token + lexer.skip_token (); + return std::unique_ptr (new AST::Token (std::move (t))); + } +} + +// Parses a fragment macro match. +template +std::unique_ptr +Parser::parse_macro_match_fragment () +{ + Location fragment_locus = lexer.peek_token ()->get_locus (); + skip_token (DOLLAR_SIGN); + + Identifier ident = ""; + auto identifier = lexer.peek_token (); + if (identifier->has_str ()) + ident = identifier->get_str (); + else + ident = std::string (token_id_to_str (identifier->get_id ())); + + if (ident.empty ()) + { + Error error (lexer.peek_token ()->get_locus (), + "missing identifier in macro match fragment"); + add_error (std::move (error)); + + return nullptr; + } + skip_token (identifier->get_id ()); + + if (!skip_token (COLON)) + { + // skip after somewhere? + return nullptr; + } + + // get MacroFragSpec for macro + const_TokenPtr t = expect_token (IDENTIFIER); + if (t == nullptr) + return nullptr; + + AST::MacroFragSpec frag + = AST::MacroFragSpec::get_frag_spec_from_str (t->get_str ()); + if (frag.is_error ()) + { + Error error (t->get_locus (), + "invalid fragment specifier %qs in fragment macro match", + t->get_str ().c_str ()); + add_error (std::move (error)); + + return nullptr; + } + + return std::unique_ptr ( + new AST::MacroMatchFragment (std::move (ident), frag, fragment_locus)); +} + +// Parses a repetition macro match. +template +std::unique_ptr +Parser::parse_macro_match_repetition () +{ + skip_token (DOLLAR_SIGN); + skip_token (LEFT_PAREN); + + std::vector> matches; + + // parse required first macro match + std::unique_ptr initial_match = parse_macro_match (); + if (initial_match == nullptr) + { + Error error ( + lexer.peek_token ()->get_locus (), + "could not parse required first macro match in macro match repetition"); + add_error (std::move (error)); + + // skip after somewhere? + return nullptr; + } + matches.push_back (std::move (initial_match)); + + // parse optional later macro matches + const_TokenPtr t = lexer.peek_token (); + while (t->get_id () != RIGHT_PAREN) + { + std::unique_ptr match = parse_macro_match (); + + if (match == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse macro match in macro match repetition"); + add_error (std::move (error)); + + return nullptr; + } + + matches.push_back (std::move (match)); + + t = lexer.peek_token (); + } + + if (!skip_token (RIGHT_PAREN)) + { + // skip after somewhere? + return nullptr; + } + + t = lexer.peek_token (); + // see if separator token exists + std::unique_ptr separator = nullptr; + switch (t->get_id ()) + { + // repetition operators + case ASTERISK: + case PLUS: + case QUESTION_MARK: + // delimiters + case LEFT_PAREN: + case LEFT_CURLY: + case LEFT_SQUARE: + case RIGHT_PAREN: + case RIGHT_CURLY: + case RIGHT_SQUARE: + // separator does not exist, so still null and don't skip token + break; + default: + // separator does exist + separator = std::unique_ptr (new AST::Token (std::move (t))); + lexer.skip_token (); + break; + } + + // parse repetition operator + t = lexer.peek_token (); + AST::MacroMatchRepetition::MacroRepOp op = AST::MacroMatchRepetition::NONE; + switch (t->get_id ()) + { + case ASTERISK: + op = AST::MacroMatchRepetition::ANY; + lexer.skip_token (); + break; + case PLUS: + op = AST::MacroMatchRepetition::ONE_OR_MORE; + lexer.skip_token (); + break; + case QUESTION_MARK: + op = AST::MacroMatchRepetition::ZERO_OR_ONE; + lexer.skip_token (); + break; + default: + add_error ( + Error (t->get_locus (), + "expected macro repetition operator (%<*%>, %<+%>, or %) in " + "macro match - found %qs", + t->get_token_description ())); + + // skip after somewhere? + return nullptr; + } + + return std::unique_ptr ( + new AST::MacroMatchRepetition (std::move (matches), op, + std::move (separator), t->get_locus ())); +} + +/* Parses a visibility syntactical production (i.e. creating a non-default + * visibility) */ +template +AST::Visibility +Parser::parse_visibility () +{ + // check for no visibility + if (lexer.peek_token ()->get_id () != PUB) + { + return AST::Visibility::create_private (); + } + + lexer.skip_token (); + + // create simple pub visibility if no parentheses + if (lexer.peek_token ()->get_id () != LEFT_PAREN) + { + return AST::Visibility::create_public (); + // or whatever + } + + lexer.skip_token (); + + const_TokenPtr t = lexer.peek_token (); + auto path_loc = t->get_locus (); + + switch (t->get_id ()) + { + case CRATE: + lexer.skip_token (); + + skip_token (RIGHT_PAREN); + + return AST::Visibility::create_crate (path_loc); + case SELF: + lexer.skip_token (); + + skip_token (RIGHT_PAREN); + + return AST::Visibility::create_self (path_loc); + case SUPER: + lexer.skip_token (); + + skip_token (RIGHT_PAREN); + + return AST::Visibility::create_super (path_loc); + case IN: { + lexer.skip_token (); + + // parse the "in" path as well + AST::SimplePath path = parse_simple_path (); + if (path.is_empty ()) + { + Error error (lexer.peek_token ()->get_locus (), + "missing path in pub(in path) visibility"); + add_error (std::move (error)); + + // skip after somewhere? + return AST::Visibility::create_error (); + } + + skip_token (RIGHT_PAREN); + + return AST::Visibility::create_in_path (std::move (path)); + } + default: + add_error (Error (t->get_locus (), "unexpected token %qs in visibility", + t->get_token_description ())); + + lexer.skip_token (); + return AST::Visibility::create_error (); + } +} + +// Parses a module - either a bodied module or a module defined in another file. +template +std::unique_ptr +Parser::parse_module (AST::Visibility vis, + AST::AttrVec outer_attrs) +{ + Location locus = lexer.peek_token ()->get_locus (); + skip_token (MOD); + + const_TokenPtr module_name = expect_token (IDENTIFIER); + if (module_name == nullptr) + { + return nullptr; + } + Identifier name = module_name->get_str (); + + const_TokenPtr t = lexer.peek_token (); + + switch (t->get_id ()) + { + case SEMICOLON: + lexer.skip_token (); + + // Construct an external module + return std::unique_ptr ( + new AST::Module (std::move (name), std::move (vis), + std::move (outer_attrs), locus, lexer.get_filename (), + inline_module_stack)); + case LEFT_CURLY: { + lexer.skip_token (); + + // parse inner attributes + AST::AttrVec inner_attrs = parse_inner_attributes (); + + std::string module_path_name + = extract_module_path (inner_attrs, outer_attrs, name); + InlineModuleStackScope scope (*this, std::move (module_path_name)); + + // parse items + std::vector> items; + const_TokenPtr tok = lexer.peek_token (); + while (tok->get_id () != RIGHT_CURLY) + { + std::unique_ptr item = parse_item (false); + if (item == nullptr) + { + Error error (tok->get_locus (), + "failed to parse item in module"); + add_error (std::move (error)); + + return nullptr; + } + + items.push_back (std::move (item)); + + tok = lexer.peek_token (); + } + + if (!skip_token (RIGHT_CURLY)) + { + // skip somewhere? + return nullptr; + } + + return std::unique_ptr ( + new AST::Module (std::move (name), locus, std::move (items), + std::move (vis), std::move (inner_attrs), + std::move (outer_attrs))); // module name? + } + default: + add_error ( + Error (t->get_locus (), + "unexpected token %qs in module declaration/definition item", + t->get_token_description ())); + + lexer.skip_token (); + return nullptr; + } +} + +// Parses an extern crate declaration (dependency on external crate) +template +std::unique_ptr +Parser::parse_extern_crate (AST::Visibility vis, + AST::AttrVec outer_attrs) +{ + Location locus = lexer.peek_token ()->get_locus (); + if (!skip_token (EXTERN_TOK)) + { + skip_after_semicolon (); + return nullptr; + } + + if (!skip_token (CRATE)) + { + skip_after_semicolon (); + return nullptr; + } + + /* parse crate reference name - this has its own syntactical rule in reference + * but seems to not be used elsewhere, so i'm putting it here */ + const_TokenPtr crate_name_tok = lexer.peek_token (); + std::string crate_name; + + switch (crate_name_tok->get_id ()) + { + case IDENTIFIER: + crate_name = crate_name_tok->get_str (); + lexer.skip_token (); + break; + case SELF: + crate_name = "self"; + lexer.skip_token (); + break; + default: + add_error ( + Error (crate_name_tok->get_locus (), + "expecting crate name (identifier or %), found %qs", + crate_name_tok->get_token_description ())); + + skip_after_semicolon (); + return nullptr; + } + + // don't parse as clause if it doesn't exist + if (lexer.peek_token ()->get_id () == SEMICOLON) + { + lexer.skip_token (); + + return std::unique_ptr ( + new AST::ExternCrate (std::move (crate_name), std::move (vis), + std::move (outer_attrs), locus)); + } + + /* parse as clause - this also has its own syntactical rule in reference and + * also seems to not be used elsewhere, so including here again. */ + if (!skip_token (AS)) + { + skip_after_semicolon (); + return nullptr; + } + + const_TokenPtr as_name_tok = lexer.peek_token (); + std::string as_name; + + switch (as_name_tok->get_id ()) + { + case IDENTIFIER: + as_name = as_name_tok->get_str (); + lexer.skip_token (); + break; + case UNDERSCORE: + as_name = "_"; + lexer.skip_token (); + break; + default: + add_error ( + Error (as_name_tok->get_locus (), + "expecting as clause name (identifier or %<_%>), found %qs", + as_name_tok->get_token_description ())); + + skip_after_semicolon (); + return nullptr; + } + + if (!skip_token (SEMICOLON)) + { + skip_after_semicolon (); + return nullptr; + } + + return std::unique_ptr ( + new AST::ExternCrate (std::move (crate_name), std::move (vis), + std::move (outer_attrs), locus, std::move (as_name))); +} + +// Parses a use declaration. +template +std::unique_ptr +Parser::parse_use_decl (AST::Visibility vis, + AST::AttrVec outer_attrs) +{ + Location locus = lexer.peek_token ()->get_locus (); + if (!skip_token (USE)) + { + skip_after_semicolon (); + return nullptr; + } + + // parse use tree, which is required + std::unique_ptr use_tree = parse_use_tree (); + if (use_tree == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "could not parse use tree in use declaration"); + add_error (std::move (error)); + + skip_after_semicolon (); + return nullptr; + } + + if (!skip_token (SEMICOLON)) + { + skip_after_semicolon (); + return nullptr; + } + + return std::unique_ptr ( + new AST::UseDeclaration (std::move (use_tree), std::move (vis), + std::move (outer_attrs), locus)); +} + +// Parses a use tree (which can be recursive and is actually a base class). +template +std::unique_ptr +Parser::parse_use_tree () +{ + /* potential syntax definitions in attempt to get algorithm: + * Glob: + * <- SimplePath :: * + * <- :: * + * <- * + * Nested tree thing: + * <- SimplePath :: { COMPLICATED_INNER_TREE_THING } + * <- :: COMPLICATED_INNER_TREE_THING } + * <- { COMPLICATED_INNER_TREE_THING } + * Rebind thing: + * <- SimplePath as IDENTIFIER + * <- SimplePath as _ + * <- SimplePath + */ + + /* current plan of attack: try to parse SimplePath first - if fails, one of + * top two then try parse :: - if fails, one of top two. Next is deciding + * character for top two. */ + + /* Thus, parsing smaller parts of use tree may require feeding into function + * via parameters (or could handle all in this single function because other + * use tree types aren't recognised as separate in the spec) */ + + // TODO: I think this function is too complex, probably should split it + + Location locus = lexer.peek_token ()->get_locus (); + + // bool has_path = false; + AST::SimplePath path = parse_simple_path (); + + if (path.is_empty ()) + { + // has no path, so must be glob or nested tree UseTree type + + bool is_global = false; + + // check for global scope resolution operator + if (lexer.peek_token ()->get_id () == SCOPE_RESOLUTION) + { + lexer.skip_token (); + is_global = true; + } + + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case ASTERISK: + // glob UseTree type + lexer.skip_token (); + + if (is_global) + return std::unique_ptr ( + new AST::UseTreeGlob (AST::UseTreeGlob::GLOBAL, + AST::SimplePath::create_empty (), locus)); + else + return std::unique_ptr ( + new AST::UseTreeGlob (AST::UseTreeGlob::NO_PATH, + AST::SimplePath::create_empty (), locus)); + case LEFT_CURLY: { + // nested tree UseTree type + lexer.skip_token (); + + std::vector> use_trees; + + const_TokenPtr t = lexer.peek_token (); + while (t->get_id () != RIGHT_CURLY) + { + std::unique_ptr use_tree = parse_use_tree (); + if (use_tree == nullptr) + { + break; + } + + use_trees.push_back (std::move (use_tree)); + + if (lexer.peek_token ()->get_id () != COMMA) + break; + + lexer.skip_token (); + t = lexer.peek_token (); + } + + // skip end curly delimiter + if (!skip_token (RIGHT_CURLY)) + { + // skip after somewhere? + return nullptr; + } + + if (is_global) + return std::unique_ptr ( + new AST::UseTreeList (AST::UseTreeList::GLOBAL, + AST::SimplePath::create_empty (), + std::move (use_trees), locus)); + else + return std::unique_ptr ( + new AST::UseTreeList (AST::UseTreeList::NO_PATH, + AST::SimplePath::create_empty (), + std::move (use_trees), locus)); + } + case AS: + // this is not allowed + add_error (Error ( + t->get_locus (), + "use declaration with rebind % requires a valid simple path - " + "none found")); + + skip_after_semicolon (); + return nullptr; + default: + add_error (Error (t->get_locus (), + "unexpected token %qs in use tree with " + "no valid simple path (i.e. list" + " or glob use tree)", + t->get_token_description ())); + + skip_after_semicolon (); + return nullptr; + } + } + else + { + /* Due to aforementioned implementation issues, the trailing :: token is + * consumed by the path, so it can not be used as a disambiguator. + * NOPE, not true anymore - TODO what are the consequences of this? */ + + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case ASTERISK: + // glob UseTree type + lexer.skip_token (); + + return std::unique_ptr ( + new AST::UseTreeGlob (AST::UseTreeGlob::PATH_PREFIXED, + std::move (path), locus)); + case LEFT_CURLY: { + // nested tree UseTree type + lexer.skip_token (); + + std::vector> use_trees; + + // TODO: think of better control structure + const_TokenPtr t = lexer.peek_token (); + while (t->get_id () != RIGHT_CURLY) + { + std::unique_ptr use_tree = parse_use_tree (); + if (use_tree == nullptr) + { + break; + } + + use_trees.push_back (std::move (use_tree)); + + if (lexer.peek_token ()->get_id () != COMMA) + break; + + lexer.skip_token (); + t = lexer.peek_token (); + } + + // skip end curly delimiter + if (!skip_token (RIGHT_CURLY)) + { + // skip after somewhere? + return nullptr; + } + + return std::unique_ptr ( + new AST::UseTreeList (AST::UseTreeList::PATH_PREFIXED, + std::move (path), std::move (use_trees), + locus)); + } + case AS: { + // rebind UseTree type + lexer.skip_token (); + + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case IDENTIFIER: + // skip lexer token + lexer.skip_token (); + + return std::unique_ptr ( + new AST::UseTreeRebind (AST::UseTreeRebind::IDENTIFIER, + std::move (path), locus, + t->get_str ())); + case UNDERSCORE: + // skip lexer token + lexer.skip_token (); + + return std::unique_ptr ( + new AST::UseTreeRebind (AST::UseTreeRebind::WILDCARD, + std::move (path), locus, "_")); + default: + add_error (Error ( + t->get_locus (), + "unexpected token %qs in use tree with as clause - expected " + "identifier or %<_%>", + t->get_token_description ())); + + skip_after_semicolon (); + return nullptr; + } + } + case SEMICOLON: + // rebind UseTree type without rebinding - path only + + // don't skip semicolon - handled in parse_use_tree + // lexer.skip_token(); + + return std::unique_ptr ( + new AST::UseTreeRebind (AST::UseTreeRebind::NONE, std::move (path), + locus)); + case COMMA: + case RIGHT_CURLY: + // this may occur in recursive calls - assume it is ok and ignore it + return std::unique_ptr ( + new AST::UseTreeRebind (AST::UseTreeRebind::NONE, std::move (path), + locus)); + default: + add_error (Error (t->get_locus (), + "unexpected token %qs in use tree with valid path", + t->get_token_description ())); + + // skip_after_semicolon(); + return nullptr; + } + } +} + +// Parses a function (not a method). +template +std::unique_ptr +Parser::parse_function (AST::Visibility vis, + AST::AttrVec outer_attrs) +{ + Location locus = lexer.peek_token ()->get_locus (); + // Get qualifiers for function if they exist + AST::FunctionQualifiers qualifiers = parse_function_qualifiers (); + + skip_token (FN_TOK); + + // Save function name token + const_TokenPtr function_name_tok = expect_token (IDENTIFIER); + if (function_name_tok == nullptr) + { + skip_after_next_block (); + return nullptr; + } + Identifier function_name = function_name_tok->get_str (); + + // parse generic params - if exist + std::vector> generic_params + = parse_generic_params_in_angles (); + + if (!skip_token (LEFT_PAREN)) + { + Error error (lexer.peek_token ()->get_locus (), + "function declaration missing opening parentheses before " + "parameter list"); + add_error (std::move (error)); + + skip_after_next_block (); + return nullptr; + } + + // parse function parameters (only if next token isn't right paren) + std::vector function_params; + if (lexer.peek_token ()->get_id () != RIGHT_PAREN) + function_params + = parse_function_params ([] (TokenId id) { return id == RIGHT_PAREN; }); + + if (!skip_token (RIGHT_PAREN)) + { + Error error (lexer.peek_token ()->get_locus (), + "function declaration missing closing parentheses after " + "parameter list"); + add_error (std::move (error)); + + skip_after_next_block (); + return nullptr; + } + + // parse function return type - if exists + std::unique_ptr return_type = parse_function_return_type (); + + // parse where clause - if exists + AST::WhereClause where_clause = parse_where_clause (); + + // parse block expression + std::unique_ptr block_expr = parse_block_expr (); + + return std::unique_ptr ( + new AST::Function (std::move (function_name), std::move (qualifiers), + std::move (generic_params), std::move (function_params), + std::move (return_type), std::move (where_clause), + std::move (block_expr), std::move (vis), + std::move (outer_attrs), locus)); +} + +// Parses function or method qualifiers (i.e. const, unsafe, and extern). +template +AST::FunctionQualifiers +Parser::parse_function_qualifiers () +{ + AsyncConstStatus const_status = NONE; + bool has_unsafe = false; + bool has_extern = false; + std::string abi; + + // Check in order of const, unsafe, then extern + const_TokenPtr t = lexer.peek_token (); + Location locus = t->get_locus (); + switch (t->get_id ()) + { + case CONST: + lexer.skip_token (); + const_status = CONST_FN; + break; + case ASYNC: + lexer.skip_token (); + const_status = ASYNC_FN; + break; + default: + // const status is still none + break; + } + + if (lexer.peek_token ()->get_id () == UNSAFE) + { + lexer.skip_token (); + has_unsafe = true; + } + + if (lexer.peek_token ()->get_id () == EXTERN_TOK) + { + lexer.skip_token (); + has_extern = true; + + // detect optional abi name + const_TokenPtr next_tok = lexer.peek_token (); + if (next_tok->get_id () == STRING_LITERAL) + { + lexer.skip_token (); + abi = next_tok->get_str (); + } + } + + return AST::FunctionQualifiers (locus, const_status, has_unsafe, has_extern, + std::move (abi)); +} + +// Parses generic (lifetime or type) params inside angle brackets (optional). +template +std::vector> +Parser::parse_generic_params_in_angles () +{ + if (lexer.peek_token ()->get_id () != LEFT_ANGLE) + { + // seems to be no generic params, so exit with empty vector + return std::vector> (); + } + lexer.skip_token (); + + // DEBUG: + rust_debug ("skipped left angle in generic param"); + + std::vector> generic_params + = parse_generic_params (is_right_angle_tok); + + // DEBUG: + rust_debug ("finished parsing actual generic params (i.e. inside angles)"); + + if (!skip_generics_right_angle ()) + { + // DEBUG + rust_debug ("failed to skip generics right angle - returning empty " + "generic params"); + + return std::vector> (); + } + + return generic_params; +} + +template +template +std::unique_ptr +Parser::parse_generic_param (EndTokenPred is_end_token) +{ + auto token = lexer.peek_token (); + auto outer_attrs = parse_outer_attribute (); + std::unique_ptr param; + + switch (token->get_id ()) + { + case LIFETIME: { + auto lifetime = parse_lifetime (); + if (lifetime.is_error ()) + { + rust_error_at ( + token->get_locus (), + "failed to parse lifetime in generic parameter list"); + return nullptr; + } + + std::vector lifetime_bounds; + if (lexer.peek_token ()->get_id () == COLON) + { + lexer.skip_token (); + // parse required bounds + lifetime_bounds + = parse_lifetime_bounds ([is_end_token] (TokenId id) { + return is_end_token (id) || id == COMMA; + }); + } + + param = std::unique_ptr (new AST::LifetimeParam ( + std::move (lifetime), std::move (lifetime_bounds), + std::move (outer_attrs), token->get_locus ())); + break; + } + case IDENTIFIER: { + auto type_ident = token->get_str (); + lexer.skip_token (); + + std::vector> type_param_bounds; + if (lexer.peek_token ()->get_id () == COLON) + { + lexer.skip_token (); + + // parse optional type param bounds + type_param_bounds = parse_type_param_bounds (); + } + + std::unique_ptr type = nullptr; + if (lexer.peek_token ()->get_id () == EQUAL) + { + lexer.skip_token (); + + // parse required type + type = parse_type (); + if (!type) + { + rust_error_at ( + lexer.peek_token ()->get_locus (), + "failed to parse type in type param in generic params"); + return nullptr; + } + } + + param = std::unique_ptr ( + new AST::TypeParam (std::move (type_ident), token->get_locus (), + std::move (type_param_bounds), std::move (type), + std::move (outer_attrs))); + break; + } + case CONST: { + lexer.skip_token (); + + auto name_token = expect_token (IDENTIFIER); + + if (!name_token || !expect_token (COLON)) + return nullptr; + + auto type = parse_type (); + if (!type) + return nullptr; + + // optional default value + auto default_expr = AST::GenericArg::create_error (); + if (lexer.peek_token ()->get_id () == EQUAL) + { + lexer.skip_token (); + auto tok = lexer.peek_token (); + default_expr = parse_generic_arg (); + + if (default_expr.is_error ()) + rust_error_at (tok->get_locus (), + "invalid token for start of default value for " + "const generic parameter: expected %, " + "% or %, got %qs", + token_id_to_str (tok->get_id ())); + + // At this point, we *know* that we are parsing a const + // expression + if (default_expr.get_kind () == AST::GenericArg::Kind::Either) + default_expr = default_expr.disambiguate_to_const (); + } + + param = std::unique_ptr ( + new AST::ConstGenericParam (name_token->get_str (), std::move (type), + default_expr, std::move (outer_attrs), + token->get_locus ())); + + break; + } + default: + // FIXME: Can we clean this last call with a method call? + rust_error_at (token->get_locus (), + "unexpected token when parsing generic parameters: %qs", + token->get_str ().c_str ()); + return nullptr; + } + + return param; +} + +/* Parse generic (lifetime or type) params NOT INSIDE ANGLE BRACKETS!!! Almost + * always parse_generic_params_in_angles is what is wanted. */ +template +template +std::vector> +Parser::parse_generic_params (EndTokenPred is_end_token) +{ + std::vector> generic_params; + + /* can't parse lifetime and type params separately due to lookahead issues + * thus, parse them all here */ + + /* HACK: used to retain attribute data if a lifetime param is tentatively + * parsed but it turns out to be type param */ + AST::Attribute parsed_outer_attr = AST::Attribute::create_empty (); + + // Did we parse a generic type param yet + auto type_seen = false; + // Did the user write a lifetime parameter after a type one + auto order_error = false; + + // parse lifetime params + while (!is_end_token (lexer.peek_token ()->get_id ())) + { + auto param = parse_generic_param (is_end_token); + if (param) + { + // TODO: Handle `Const` here as well if necessary + if (param->get_kind () == AST::GenericParam::Kind::Type) + type_seen = true; + else if (param->get_kind () == AST::GenericParam::Kind::Lifetime + && type_seen) + order_error = true; + + generic_params.emplace_back (std::move (param)); + maybe_skip_token (COMMA); + } + } + + // FIXME: Add reordering hint + if (order_error) + rust_error_at (generic_params.front ()->get_locus (), + "invalid order for generic parameters: lifetimes should " + "always come before types"); + + generic_params.shrink_to_fit (); + return generic_params; +} + +/* Parses lifetime generic parameters (pointers). Will also consume any + * trailing comma. No extra checks for end token. */ +template +std::vector> +Parser::parse_lifetime_params () +{ + std::vector> lifetime_params; + + while (lexer.peek_token ()->get_id () != END_OF_FILE) + { + AST::LifetimeParam lifetime_param = parse_lifetime_param (); + + if (lifetime_param.is_error ()) + { + // can't treat as error as only way to get out with trailing comma + break; + } + + lifetime_params.push_back (std::unique_ptr ( + new AST::LifetimeParam (std::move (lifetime_param)))); + + if (lexer.peek_token ()->get_id () != COMMA) + break; + + // skip commas, including trailing commas + lexer.skip_token (); + } + + lifetime_params.shrink_to_fit (); + + return lifetime_params; +} + +/* Parses lifetime generic parameters (pointers). Will also consume any + * trailing comma. Has extra is_end_token predicate checking. */ +template +template +std::vector> +Parser::parse_lifetime_params (EndTokenPred is_end_token) +{ + std::vector> lifetime_params; + + // if end_token is not specified, it defaults to EOF, so should work fine + while (!is_end_token (lexer.peek_token ()->get_id ())) + { + AST::LifetimeParam lifetime_param = parse_lifetime_param (); + + if (lifetime_param.is_error ()) + { + /* TODO: is it worth throwing away all lifetime params just because + * one failed? */ + Error error (lexer.peek_token ()->get_locus (), + "failed to parse lifetime param in lifetime params"); + add_error (std::move (error)); + + return {}; + } + + lifetime_params.push_back (std::unique_ptr ( + new AST::LifetimeParam (std::move (lifetime_param)))); + + if (lexer.peek_token ()->get_id () != COMMA) + break; + + // skip commas, including trailing commas + lexer.skip_token (); + } + + lifetime_params.shrink_to_fit (); + + return lifetime_params; +} + +/* Parses lifetime generic parameters (objects). Will also consume any + * trailing comma. No extra checks for end token. + * TODO: is this best solution? implements most of the same algorithm. */ +template +std::vector +Parser::parse_lifetime_params_objs () +{ + std::vector lifetime_params; + + // bad control structure as end token cannot be guaranteed + while (true) + { + AST::LifetimeParam lifetime_param = parse_lifetime_param (); + + if (lifetime_param.is_error ()) + { + // not an error as only way to exit if trailing comma + break; + } + + lifetime_params.push_back (std::move (lifetime_param)); + + if (lexer.peek_token ()->get_id () != COMMA) + break; + + // skip commas, including trailing commas + lexer.skip_token (); + } + + lifetime_params.shrink_to_fit (); + + return lifetime_params; +} + +/* Parses lifetime generic parameters (objects). Will also consume any + * trailing comma. Has extra is_end_token predicate checking. + * TODO: is this best solution? implements most of the same algorithm. */ +template +template +std::vector +Parser::parse_lifetime_params_objs ( + EndTokenPred is_end_token) +{ + std::vector lifetime_params; + + while (!is_end_token (lexer.peek_token ()->get_id ())) + { + AST::LifetimeParam lifetime_param = parse_lifetime_param (); + + if (lifetime_param.is_error ()) + { + /* TODO: is it worth throwing away all lifetime params just because + * one failed? */ + Error error (lexer.peek_token ()->get_locus (), + "failed to parse lifetime param in lifetime params"); + add_error (std::move (error)); + + return {}; + } + + lifetime_params.push_back (std::move (lifetime_param)); + + if (lexer.peek_token ()->get_id () != COMMA) + break; + + // skip commas, including trailing commas + lexer.skip_token (); + } + + lifetime_params.shrink_to_fit (); + + return lifetime_params; +} + +/* Parses a sequence of a certain grammar rule in object form (not pointer or + * smart pointer), delimited by commas and ending when 'is_end_token' is + * satisfied (templated). Will also consume any trailing comma. + * FIXME: this cannot be used due to member function pointer problems (i.e. + * parsing_function cannot be specified properly) */ +template +template +auto +Parser::parse_non_ptr_sequence ( + ParseFunction parsing_function, EndTokenPred is_end_token, + std::string error_msg) -> std::vector +{ + std::vector params; + + while (!is_end_token (lexer.peek_token ()->get_id ())) + { + auto param = parsing_function (); + + if (param.is_error ()) + { + // TODO: is it worth throwing away all params just because one + // failed? + Error error (lexer.peek_token ()->get_locus (), + std::move (error_msg)); + add_error (std::move (error)); + + return {}; + } + + params.push_back (std::move (param)); + + if (lexer.peek_token ()->get_id () != COMMA) + break; + + // skip commas, including trailing commas + lexer.skip_token (); + } + + params.shrink_to_fit (); + + return params; +} + +/* Parses a single lifetime generic parameter (not including comma). */ +template +AST::LifetimeParam +Parser::parse_lifetime_param () +{ + // parse outer attribute, which is optional and may not exist + AST::Attribute outer_attr = parse_outer_attribute (); + + // save lifetime token - required + const_TokenPtr lifetime_tok = lexer.peek_token (); + if (lifetime_tok->get_id () != LIFETIME) + { + // if lifetime is missing, must not be a lifetime param, so return null + return AST::LifetimeParam::create_error (); + } + lexer.skip_token (); + /* TODO: does this always create a named lifetime? or can a different type + * be made? */ + AST::Lifetime lifetime (AST::Lifetime::NAMED, lifetime_tok->get_str (), + lifetime_tok->get_locus ()); + + // parse lifetime bounds, if it exists + std::vector lifetime_bounds; + if (lexer.peek_token ()->get_id () == COLON) + { + // parse lifetime bounds + lifetime_bounds = parse_lifetime_bounds (); + // TODO: have end token passed in? + } + + return AST::LifetimeParam (std::move (lifetime), std::move (lifetime_bounds), + std::move (outer_attr), + lifetime_tok->get_locus ()); +} + +// Parses type generic parameters. Will also consume any trailing comma. +template +std::vector> +Parser::parse_type_params () +{ + std::vector> type_params; + + // infinite loop with break on failure as no info on ending token + while (true) + { + std::unique_ptr type_param = parse_type_param (); + + if (type_param == nullptr) + { + // break if fails to parse + break; + } + + type_params.push_back (std::move (type_param)); + + if (lexer.peek_token ()->get_id () != COMMA) + break; + + // skip commas, including trailing commas + lexer.skip_token (); + } + + type_params.shrink_to_fit (); + return type_params; +} + +// Parses type generic parameters. Will also consume any trailing comma. +template +template +std::vector> +Parser::parse_type_params (EndTokenPred is_end_token) +{ + std::vector> type_params; + + while (!is_end_token (lexer.peek_token ()->get_id ())) + { + std::unique_ptr type_param = parse_type_param (); + + if (type_param == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse type param in type params"); + add_error (std::move (error)); + + return {}; + } + + type_params.push_back (std::move (type_param)); + + if (lexer.peek_token ()->get_id () != COMMA) + break; + + // skip commas, including trailing commas + lexer.skip_token (); + } + + type_params.shrink_to_fit (); + return type_params; + /* TODO: this shares most code with parse_lifetime_params - good place to + * use template (i.e. parse_non_ptr_sequence if doable) */ +} + +/* Parses a single type (generic) parameter, not including commas. May change + * to return value. */ +template +std::unique_ptr +Parser::parse_type_param () +{ + // parse outer attribute, which is optional and may not exist + AST::Attribute outer_attr = parse_outer_attribute (); + + const_TokenPtr identifier_tok = lexer.peek_token (); + if (identifier_tok->get_id () != IDENTIFIER) + { + // return null as type param can't exist without this required + // identifier + return nullptr; + } + // TODO: create identifier from identifier token + Identifier ident = identifier_tok->get_str (); + lexer.skip_token (); + + // parse type param bounds (if they exist) + std::vector> type_param_bounds; + if (lexer.peek_token ()->get_id () == COLON) + { + lexer.skip_token (); + + // parse type param bounds, which may or may not exist + type_param_bounds = parse_type_param_bounds (); + } + + // parse type (if it exists) + std::unique_ptr type = nullptr; + if (lexer.peek_token ()->get_id () == EQUAL) + { + lexer.skip_token (); + + // parse type (now required) + type = parse_type (); + if (type == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse type in type param"); + add_error (std::move (error)); + + return nullptr; + } + } + + return std::unique_ptr ( + new AST::TypeParam (std::move (ident), identifier_tok->get_locus (), + std::move (type_param_bounds), std::move (type), + std::move (outer_attr))); +} + +/* Parses regular (i.e. non-generic) parameters in functions or methods. Also + * has end token handling. */ +template +template +std::vector +Parser::parse_function_params (EndTokenPred is_end_token) +{ + std::vector params; + + if (is_end_token (lexer.peek_token ()->get_id ())) + return params; + + AST::FunctionParam initial_param = parse_function_param (); + + // Return empty parameter list if no parameter there + if (initial_param.is_error ()) + { + // TODO: is this an error? + return params; + } + + params.push_back (std::move (initial_param)); + + // maybe think of a better control structure here - do-while with an initial + // error state? basically, loop through parameter list until can't find any + // more params + const_TokenPtr t = lexer.peek_token (); + while (t->get_id () == COMMA) + { + // skip comma if applies + lexer.skip_token (); + + // TODO: strictly speaking, shouldn't there be no trailing comma? + if (is_end_token (lexer.peek_token ()->get_id ())) + break; + + // now, as right paren would break, function param is required + AST::FunctionParam param = parse_function_param (); + if (param.is_error ()) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse function param (in function params)"); + add_error (std::move (error)); + + // skip somewhere? + return std::vector (); + } + + params.push_back (std::move (param)); + + t = lexer.peek_token (); + } + + params.shrink_to_fit (); + return params; +} + +/* Parses a single regular (i.e. non-generic) parameter in a function or + * method, i.e. the "name: type" bit. Also handles it not existing. */ +template +AST::FunctionParam +Parser::parse_function_param () +{ + // parse outer attributes if they exist + AST::AttrVec outer_attrs = parse_outer_attributes (); + + // TODO: should saved location be at start of outer attributes or pattern? + Location locus = lexer.peek_token ()->get_locus (); + std::unique_ptr param_pattern = parse_pattern (); + + // create error function param if it doesn't exist + if (param_pattern == nullptr) + { + // skip after something + return AST::FunctionParam::create_error (); + } + + if (!skip_token (COLON)) + { + // skip after something + return AST::FunctionParam::create_error (); + } + + std::unique_ptr param_type = parse_type (); + if (param_type == nullptr) + { + // skip? + return AST::FunctionParam::create_error (); + } + + return AST::FunctionParam (std::move (param_pattern), std::move (param_type), + std::move (outer_attrs), locus); +} + +/* Parses a function or method return type syntactical construction. Also + * handles a function return type not existing. */ +template +std::unique_ptr +Parser::parse_function_return_type () +{ + if (lexer.peek_token ()->get_id () != RETURN_TYPE) + return nullptr; + + // skip return type, as it now obviously exists + lexer.skip_token (); + + std::unique_ptr type = parse_type (); + + return type; +} + +/* Parses a "where clause" (in a function, struct, method, etc.). Also handles + * a where clause not existing, in which it will return + * WhereClause::create_empty(), which can be checked via + * WhereClause::is_empty(). */ +template +AST::WhereClause +Parser::parse_where_clause () +{ + const_TokenPtr where_tok = lexer.peek_token (); + if (where_tok->get_id () != WHERE) + { + // where clause doesn't exist, so create empty one + return AST::WhereClause::create_empty (); + } + + lexer.skip_token (); + + /* parse where clause items - this is not a separate rule in the reference + * so won't be here */ + std::vector> where_clause_items; + + /* HACK: where clauses end with a right curly or semicolon or equals in all + * uses currently */ + const_TokenPtr t = lexer.peek_token (); + while (t->get_id () != LEFT_CURLY && t->get_id () != SEMICOLON + && t->get_id () != EQUAL) + { + std::unique_ptr where_clause_item + = parse_where_clause_item (); + + if (where_clause_item == nullptr) + { + Error error (t->get_locus (), "failed to parse where clause item"); + add_error (std::move (error)); + + return AST::WhereClause::create_empty (); + } + + where_clause_items.push_back (std::move (where_clause_item)); + + // also skip comma if it exists + if (lexer.peek_token ()->get_id () != COMMA) + break; + + lexer.skip_token (); + t = lexer.peek_token (); + } + + where_clause_items.shrink_to_fit (); + return AST::WhereClause (std::move (where_clause_items)); +} + +/* Parses a where clause item (lifetime or type bound). Does not parse any + * commas. */ +template +std::unique_ptr +Parser::parse_where_clause_item () +{ + // shitty cheat way of determining lifetime or type bound - test for + // lifetime + const_TokenPtr t = lexer.peek_token (); + + if (t->get_id () == LIFETIME) + return parse_lifetime_where_clause_item (); + else + return parse_type_bound_where_clause_item (); +} + +// Parses a lifetime where clause item. +template +std::unique_ptr +Parser::parse_lifetime_where_clause_item () +{ + AST::Lifetime lifetime = parse_lifetime (); + if (lifetime.is_error ()) + { + // TODO: error here? + return nullptr; + } + + if (!skip_token (COLON)) + { + // TODO: skip after somewhere + return nullptr; + } + + std::vector lifetime_bounds = parse_lifetime_bounds (); + // TODO: have end token passed in? + + Location locus = lifetime.get_locus (); + + return std::unique_ptr ( + new AST::LifetimeWhereClauseItem (std::move (lifetime), + std::move (lifetime_bounds), locus)); +} + +// Parses a type bound where clause item. +template +std::unique_ptr +Parser::parse_type_bound_where_clause_item () +{ + // parse for lifetimes, if it exists + std::vector for_lifetimes; + if (lexer.peek_token ()->get_id () == FOR) + for_lifetimes = parse_for_lifetimes (); + + std::unique_ptr type = parse_type (); + if (type == nullptr) + { + return nullptr; + } + + if (!skip_token (COLON)) + { + // TODO: skip after somewhere + return nullptr; + } + + // parse type param bounds if they exist + std::vector> type_param_bounds + = parse_type_param_bounds (); + + Location locus = lexer.peek_token ()->get_locus (); + + return std::unique_ptr ( + new AST::TypeBoundWhereClauseItem (std::move (for_lifetimes), + std::move (type), + std::move (type_param_bounds), locus)); +} + +// Parses a for lifetimes clause, including the for keyword and angle +// brackets. +template +std::vector +Parser::parse_for_lifetimes () +{ + std::vector params; + + if (!skip_token (FOR)) + { + // skip after somewhere? + return params; + } + + if (!skip_token (LEFT_ANGLE)) + { + // skip after somewhere? + return params; + } + + /* cannot specify end token due to parsing problems with '>' tokens being + * nested */ + params = parse_lifetime_params_objs (is_right_angle_tok); + + if (!skip_generics_right_angle ()) + { + // DEBUG + rust_debug ("failed to skip generics right angle after (supposedly) " + "finished parsing where clause items"); + // ok, well this gets called. + + // skip after somewhere? + return params; + } + + return params; +} + +// Parses type parameter bounds in where clause or generic arguments. +template +std::vector> +Parser::parse_type_param_bounds () +{ + std::vector> type_param_bounds; + + std::unique_ptr initial_bound + = parse_type_param_bound (); + + // quick exit if null + if (initial_bound == nullptr) + { + /* error? type param bounds must have at least one term, but are bounds + * optional? */ + return type_param_bounds; + } + type_param_bounds.push_back (std::move (initial_bound)); + + while (lexer.peek_token ()->get_id () == PLUS) + { + lexer.skip_token (); + + std::unique_ptr bound = parse_type_param_bound (); + if (bound == nullptr) + { + /* not an error: bound is allowed to be null as trailing plus is + * allowed */ + return type_param_bounds; + } + + type_param_bounds.push_back (std::move (bound)); + } + + type_param_bounds.shrink_to_fit (); + return type_param_bounds; +} + +/* Parses type parameter bounds in where clause or generic arguments, with end + * token handling. */ +template +template +std::vector> +Parser::parse_type_param_bounds (EndTokenPred is_end_token) +{ + std::vector> type_param_bounds; + + std::unique_ptr initial_bound + = parse_type_param_bound (); + + // quick exit if null + if (initial_bound == nullptr) + { + /* error? type param bounds must have at least one term, but are bounds + * optional? */ + return type_param_bounds; + } + type_param_bounds.push_back (std::move (initial_bound)); + + while (lexer.peek_token ()->get_id () == PLUS) + { + lexer.skip_token (); + + // break if end token character + if (is_end_token (lexer.peek_token ()->get_id ())) + break; + + std::unique_ptr bound = parse_type_param_bound (); + if (bound == nullptr) + { + // TODO how wise is it to ditch all bounds if only one failed? + Error error (lexer.peek_token ()->get_locus (), + "failed to parse type param bound in type param bounds"); + add_error (std::move (error)); + + return {}; + } + + type_param_bounds.push_back (std::move (bound)); + } + + type_param_bounds.shrink_to_fit (); + return type_param_bounds; +} + +/* Parses a single type parameter bound in a where clause or generic argument. + * Does not parse the '+' between arguments. */ +template +std::unique_ptr +Parser::parse_type_param_bound () +{ + // shitty cheat way of determining lifetime or trait bound - test for + // lifetime + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case LIFETIME: + return std::unique_ptr ( + new AST::Lifetime (parse_lifetime ())); + case LEFT_PAREN: + case QUESTION_MARK: + case FOR: + case IDENTIFIER: + case SUPER: + case SELF: + case SELF_ALIAS: + case CRATE: + case DOLLAR_SIGN: + return parse_trait_bound (); + default: + // don't error - assume this is fine TODO + return nullptr; + } +} + +// Parses a trait bound type param bound. +template +std::unique_ptr +Parser::parse_trait_bound () +{ + bool has_parens = false; + bool has_question_mark = false; + + Location locus = lexer.peek_token ()->get_locus (); + + // handle trait bound being in parentheses + if (lexer.peek_token ()->get_id () == LEFT_PAREN) + { + has_parens = true; + lexer.skip_token (); + } + + // handle having question mark (optional) + if (lexer.peek_token ()->get_id () == QUESTION_MARK) + { + has_question_mark = true; + lexer.skip_token (); + } + + /* parse for lifetimes, if it exists (although empty for lifetimes is ok to + * handle this) */ + std::vector for_lifetimes; + if (lexer.peek_token ()->get_id () == FOR) + for_lifetimes = parse_for_lifetimes (); + + // handle TypePath + AST::TypePath type_path = parse_type_path (); + + // handle closing parentheses + if (has_parens) + { + if (!skip_token (RIGHT_PAREN)) + { + return nullptr; + } + } + + return std::unique_ptr ( + new AST::TraitBound (std::move (type_path), locus, has_parens, + has_question_mark, std::move (for_lifetimes))); +} + +// Parses lifetime bounds. +template +std::vector +Parser::parse_lifetime_bounds () +{ + std::vector lifetime_bounds; + + while (true) + { + AST::Lifetime lifetime = parse_lifetime (); + + // quick exit for parsing failure + if (lifetime.is_error ()) + break; + + lifetime_bounds.push_back (std::move (lifetime)); + + /* plus is maybe not allowed at end - spec defines it weirdly, so + * assuming allowed at end */ + if (lexer.peek_token ()->get_id () != PLUS) + break; + + lexer.skip_token (); + } + + lifetime_bounds.shrink_to_fit (); + return lifetime_bounds; +} + +// Parses lifetime bounds, with added check for ending token. +template +template +std::vector +Parser::parse_lifetime_bounds (EndTokenPred is_end_token) +{ + std::vector lifetime_bounds; + + while (!is_end_token (lexer.peek_token ()->get_id ())) + { + AST::Lifetime lifetime = parse_lifetime (); + + if (lifetime.is_error ()) + { + /* TODO: is it worth throwing away all lifetime bound info just + * because one failed? */ + Error error (lexer.peek_token ()->get_locus (), + "failed to parse lifetime in lifetime bounds"); + add_error (std::move (error)); + + return {}; + } + + lifetime_bounds.push_back (std::move (lifetime)); + + /* plus is maybe not allowed at end - spec defines it weirdly, so + * assuming allowed at end */ + if (lexer.peek_token ()->get_id () != PLUS) + break; + + lexer.skip_token (); + } + + lifetime_bounds.shrink_to_fit (); + return lifetime_bounds; +} + +/* Parses a lifetime token (named, 'static, or '_). Also handles lifetime not + * existing. */ +template +AST::Lifetime +Parser::parse_lifetime () +{ + const_TokenPtr lifetime_tok = lexer.peek_token (); + Location locus = lifetime_tok->get_locus (); + // create error lifetime if doesn't exist + if (lifetime_tok->get_id () != LIFETIME) + { + return AST::Lifetime::error (); + } + lexer.skip_token (); + + std::string lifetime_ident = lifetime_tok->get_str (); + + if (lifetime_ident == "'static") + { + return AST::Lifetime (AST::Lifetime::STATIC, "", locus); + } + else if (lifetime_ident == "'_") + { + return AST::Lifetime (AST::Lifetime::WILDCARD, "", locus); + } + else + { + return AST::Lifetime (AST::Lifetime::NAMED, std::move (lifetime_ident), + locus); + } +} + +// Parses a "type alias" (typedef) item. +template +std::unique_ptr +Parser::parse_type_alias (AST::Visibility vis, + AST::AttrVec outer_attrs) +{ + Location locus = lexer.peek_token ()->get_locus (); + skip_token (TYPE); + + // TODO: use this token for identifier when finished that + const_TokenPtr alias_name_tok = expect_token (IDENTIFIER); + if (alias_name_tok == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "could not parse identifier in type alias"); + add_error (std::move (error)); + + skip_after_semicolon (); + return nullptr; + } + Identifier alias_name = alias_name_tok->get_str (); + + // parse generic params, which may not exist + std::vector> generic_params + = parse_generic_params_in_angles (); + + // parse where clause, which may not exist + AST::WhereClause where_clause = parse_where_clause (); + + if (!skip_token (EQUAL)) + { + skip_after_semicolon (); + return nullptr; + } + + std::unique_ptr type_to_alias = parse_type (); + + if (!skip_token (SEMICOLON)) + { + // should be skipping past this, not the next line + return nullptr; + } + + return std::unique_ptr ( + new AST::TypeAlias (std::move (alias_name), std::move (generic_params), + std::move (where_clause), std::move (type_to_alias), + std::move (vis), std::move (outer_attrs), locus)); +} + +// Parse a struct item AST node. +template +std::unique_ptr +Parser::parse_struct (AST::Visibility vis, + AST::AttrVec outer_attrs) +{ + /* TODO: determine best way to parse the proper struct vs tuple struct - + * share most of initial constructs so lookahead might be impossible, and if + * not probably too expensive. Best way is probably unified parsing for the + * initial parts and then pass them in as params to more derived functions. + * Alternatively, just parse everything in this one function - do this if + * function not too long. */ + + /* Proper struct <- 'struct' IDENTIFIER generic_params? where_clause? ( '{' + * struct_fields? '}' | ';' ) */ + /* Tuple struct <- 'struct' IDENTIFIER generic_params? '(' tuple_fields? ')' + * where_clause? ';' */ + Location locus = lexer.peek_token ()->get_locus (); + skip_token (STRUCT_TOK); + + // parse struct name + const_TokenPtr name_tok = expect_token (IDENTIFIER); + if (name_tok == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "could not parse struct or tuple struct identifier"); + add_error (std::move (error)); + + // skip after somewhere? + return nullptr; + } + Identifier struct_name = name_tok->get_str (); + + // parse generic params, which may or may not exist + std::vector> generic_params + = parse_generic_params_in_angles (); + + // branch on next token - determines whether proper struct or tuple struct + if (lexer.peek_token ()->get_id () == LEFT_PAREN) + { + // tuple struct + + // skip left parenthesis + lexer.skip_token (); + + // parse tuple fields + std::vector tuple_fields; + // Might be empty tuple for unit tuple struct. + if (lexer.peek_token ()->get_id () == RIGHT_PAREN) + tuple_fields = std::vector (); + else + tuple_fields = parse_tuple_fields (); + + // tuple parameters must have closing parenthesis + if (!skip_token (RIGHT_PAREN)) + { + skip_after_semicolon (); + return nullptr; + } + + // parse where clause, which is optional + AST::WhereClause where_clause = parse_where_clause (); + + if (!skip_token (SEMICOLON)) + { + // can't skip after semicolon because it's meant to be here + return nullptr; + } + + return std::unique_ptr ( + new AST::TupleStruct (std::move (tuple_fields), std::move (struct_name), + std::move (generic_params), + std::move (where_clause), std::move (vis), + std::move (outer_attrs), locus)); + } + + // assume it is a proper struct being parsed and continue outside of switch + // - label only here to suppress warning + + // parse where clause, which is optional + AST::WhereClause where_clause = parse_where_clause (); + + // branch on next token - determines whether struct is a unit struct + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case LEFT_CURLY: { + // struct with body + + // skip curly bracket + lexer.skip_token (); + + // parse struct fields, if any + std::vector struct_fields + = parse_struct_fields ([] (TokenId id) { return id == RIGHT_CURLY; }); + + if (!skip_token (RIGHT_CURLY)) + { + // skip somewhere? + return nullptr; + } + + return std::unique_ptr (new AST::StructStruct ( + std::move (struct_fields), std::move (struct_name), + std::move (generic_params), std::move (where_clause), false, + std::move (vis), std::move (outer_attrs), locus)); + } + case SEMICOLON: + // unit struct declaration + + lexer.skip_token (); + + return std::unique_ptr ( + new AST::StructStruct (std::move (struct_name), + std::move (generic_params), + std::move (where_clause), std::move (vis), + std::move (outer_attrs), locus)); + default: + add_error (Error (t->get_locus (), + "unexpected token %qs in struct declaration", + t->get_token_description ())); + + // skip somewhere? + return nullptr; + } +} + +// Parses struct fields in struct declarations. +template +std::vector +Parser::parse_struct_fields () +{ + std::vector fields; + + AST::StructField initial_field = parse_struct_field (); + + // Return empty field list if no field there + if (initial_field.is_error ()) + return fields; + + fields.push_back (std::move (initial_field)); + + while (lexer.peek_token ()->get_id () == COMMA) + { + lexer.skip_token (); + + AST::StructField field = parse_struct_field (); + + if (field.is_error ()) + { + // would occur with trailing comma, so allowed + break; + } + + fields.push_back (std::move (field)); + } + + fields.shrink_to_fit (); + return fields; + // TODO: template if possible (parse_non_ptr_seq) +} + +// Parses struct fields in struct declarations. +template +template +std::vector +Parser::parse_struct_fields (EndTokenPred is_end_tok) +{ + std::vector fields; + + AST::StructField initial_field = parse_struct_field (); + + // Return empty field list if no field there + if (initial_field.is_error ()) + return fields; + + fields.push_back (std::move (initial_field)); + + while (lexer.peek_token ()->get_id () == COMMA) + { + lexer.skip_token (); + + if (is_end_tok (lexer.peek_token ()->get_id ())) + break; + + AST::StructField field = parse_struct_field (); + if (field.is_error ()) + { + /* TODO: should every field be ditched just because one couldn't be + * parsed? */ + Error error (lexer.peek_token ()->get_locus (), + "failed to parse struct field in struct fields"); + add_error (std::move (error)); + + return {}; + } + + fields.push_back (std::move (field)); + } + + fields.shrink_to_fit (); + return fields; + // TODO: template if possible (parse_non_ptr_seq) +} + +// Parses a single struct field (in a struct definition). Does not parse +// commas. +template +AST::StructField +Parser::parse_struct_field () +{ + // parse outer attributes, if they exist + AST::AttrVec outer_attrs = parse_outer_attributes (); + + // parse visibility, if it exists + AST::Visibility vis = parse_visibility (); + + Location locus = lexer.peek_token ()->get_locus (); + + // parse field name + const_TokenPtr field_name_tok = lexer.peek_token (); + if (field_name_tok->get_id () != IDENTIFIER) + { + // if not identifier, assumes there is no struct field and exits - not + // necessarily error + return AST::StructField::create_error (); + } + Identifier field_name = field_name_tok->get_str (); + lexer.skip_token (); + + if (!skip_token (COLON)) + { + // skip after somewhere? + return AST::StructField::create_error (); + } + + // parse field type - this is required + std::unique_ptr field_type = parse_type (); + if (field_type == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "could not parse type in struct field definition"); + add_error (std::move (error)); + + // skip after somewhere + return AST::StructField::create_error (); + } + + return AST::StructField (std::move (field_name), std::move (field_type), + std::move (vis), locus, std::move (outer_attrs)); +} + +// Parses tuple fields in tuple/tuple struct declarations. +template +std::vector +Parser::parse_tuple_fields () +{ + std::vector fields; + + AST::TupleField initial_field = parse_tuple_field (); + + // Return empty field list if no field there + if (initial_field.is_error ()) + { + return fields; + } + + fields.push_back (std::move (initial_field)); + + // maybe think of a better control structure here - do-while with an initial + // error state? basically, loop through field list until can't find any more + // params HACK: all current syntax uses of tuple fields have them ending + // with a right paren token + const_TokenPtr t = lexer.peek_token (); + while (t->get_id () == COMMA) + { + // skip comma if applies - e.g. trailing comma + lexer.skip_token (); + + // break out due to right paren if it exists + if (lexer.peek_token ()->get_id () == RIGHT_PAREN) + { + break; + } + + AST::TupleField field = parse_tuple_field (); + if (field.is_error ()) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse tuple field in tuple fields"); + add_error (std::move (error)); + + return std::vector (); + } + + fields.push_back (std::move (field)); + + t = lexer.peek_token (); + } + + fields.shrink_to_fit (); + return fields; + + // TODO: this shares basically all code with function params and struct + // fields + // - templates? +} + +/* Parses a single tuple struct field in a tuple struct definition. Does not + * parse commas. */ +template +AST::TupleField +Parser::parse_tuple_field () +{ + // parse outer attributes if they exist + AST::AttrVec outer_attrs = parse_outer_attributes (); + + // parse visibility if it exists + AST::Visibility vis = parse_visibility (); + + Location locus = lexer.peek_token ()->get_locus (); + + // parse type, which is required + std::unique_ptr field_type = parse_type (); + if (field_type == nullptr) + { + // error if null + Error error (lexer.peek_token ()->get_locus (), + "could not parse type in tuple struct field"); + add_error (std::move (error)); + + // skip after something + return AST::TupleField::create_error (); + } + + return AST::TupleField (std::move (field_type), std::move (vis), locus, + std::move (outer_attrs)); +} + +// Parses a Rust "enum" tagged union item definition. +template +std::unique_ptr +Parser::parse_enum (AST::Visibility vis, + AST::AttrVec outer_attrs) +{ + Location locus = lexer.peek_token ()->get_locus (); + skip_token (ENUM_TOK); + + // parse enum name + const_TokenPtr enum_name_tok = expect_token (IDENTIFIER); + if (enum_name_tok == nullptr) + return nullptr; + + Identifier enum_name = enum_name_tok->get_str (); + + // parse generic params (of enum container, not enum variants) if they exist + std::vector> generic_params + = parse_generic_params_in_angles (); + + // parse where clause if it exists + AST::WhereClause where_clause = parse_where_clause (); + + if (!skip_token (LEFT_CURLY)) + { + skip_after_end_block (); + return nullptr; + } + + // parse actual enum variant definitions + std::vector> enum_items + = parse_enum_items ([] (TokenId id) { return id == RIGHT_CURLY; }); + + if (!skip_token (RIGHT_CURLY)) + { + skip_after_end_block (); + return nullptr; + } + + return std::unique_ptr ( + new AST::Enum (std::move (enum_name), std::move (vis), + std::move (generic_params), std::move (where_clause), + std::move (enum_items), std::move (outer_attrs), locus)); +} + +// Parses the enum variants inside an enum definiton. +template +std::vector> +Parser::parse_enum_items () +{ + std::vector> items; + + std::unique_ptr initial_item = parse_enum_item (); + + // Return empty item list if no field there + if (initial_item == nullptr) + return items; + + items.push_back (std::move (initial_item)); + + while (lexer.peek_token ()->get_id () == COMMA) + { + lexer.skip_token (); + + std::unique_ptr item = parse_enum_item (); + if (item == nullptr) + { + // this would occur with a trailing comma, which is allowed + break; + } + + items.push_back (std::move (item)); + } + + items.shrink_to_fit (); + return items; + + /* TODO: use template if doable (parse_non_ptr_sequence) */ +} + +// Parses the enum variants inside an enum definiton. +template +template +std::vector> +Parser::parse_enum_items (EndTokenPred is_end_tok) +{ + std::vector> items; + + std::unique_ptr initial_item = parse_enum_item (); + + // Return empty item list if no field there + if (initial_item == nullptr) + return items; + + items.push_back (std::move (initial_item)); + + while (lexer.peek_token ()->get_id () == COMMA) + { + lexer.skip_token (); + + if (is_end_tok (lexer.peek_token ()->get_id ())) + break; + + std::unique_ptr item = parse_enum_item (); + if (item == nullptr) + { + /* TODO should this ignore all successfully parsed enum items just + * because one failed? */ + Error error (lexer.peek_token ()->get_locus (), + "failed to parse enum item in enum items"); + add_error (std::move (error)); + + return {}; + } + + items.push_back (std::move (item)); + } + + items.shrink_to_fit (); + return items; + + /* TODO: use template if doable (parse_non_ptr_sequence) */ +} + +/* Parses a single enum variant item in an enum definition. Does not parse + * commas. */ +template +std::unique_ptr +Parser::parse_enum_item () +{ + // parse outer attributes if they exist + AST::AttrVec outer_attrs = parse_outer_attributes (); + + // parse visibility, which may or may not exist + AST::Visibility vis = parse_visibility (); + + // parse name for enum item, which is required + const_TokenPtr item_name_tok = lexer.peek_token (); + if (item_name_tok->get_id () != IDENTIFIER) + { + // this may not be an error but it means there is no enum item here + return nullptr; + } + lexer.skip_token (); + Identifier item_name = item_name_tok->get_str (); + + // branch based on next token + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case LEFT_PAREN: { + // tuple enum item + lexer.skip_token (); + + std::vector tuple_fields; + // Might be empty tuple for unit tuple enum variant. + if (lexer.peek_token ()->get_id () == RIGHT_PAREN) + tuple_fields = std::vector (); + else + tuple_fields = parse_tuple_fields (); + + if (!skip_token (RIGHT_PAREN)) + { + // skip after somewhere + return nullptr; + } + + return std::unique_ptr (new AST::EnumItemTuple ( + std::move (item_name), std::move (vis), std::move (tuple_fields), + std::move (outer_attrs), item_name_tok->get_locus ())); + } + case LEFT_CURLY: { + // struct enum item + lexer.skip_token (); + + std::vector struct_fields + = parse_struct_fields ([] (TokenId id) { return id == RIGHT_CURLY; }); + + if (!skip_token (RIGHT_CURLY)) + { + // skip after somewhere + return nullptr; + } + + return std::unique_ptr (new AST::EnumItemStruct ( + std::move (item_name), std::move (vis), std::move (struct_fields), + std::move (outer_attrs), item_name_tok->get_locus ())); + } + case EQUAL: { + // discriminant enum item + lexer.skip_token (); + + std::unique_ptr discriminant_expr = parse_expr (); + + return std::unique_ptr ( + new AST::EnumItemDiscriminant (std::move (item_name), std::move (vis), + std::move (discriminant_expr), + std::move (outer_attrs), + item_name_tok->get_locus ())); + } + default: + // regular enum with just an identifier + return std::unique_ptr ( + new AST::EnumItem (std::move (item_name), std::move (vis), + std::move (outer_attrs), + item_name_tok->get_locus ())); + } +} + +// Parses a C-style (and C-compat) untagged union declaration. +template +std::unique_ptr +Parser::parse_union (AST::Visibility vis, + AST::AttrVec outer_attrs) +{ + /* hack - "weak keyword" by finding identifier called "union" (lookahead in + * item switch) */ + const_TokenPtr union_keyword = expect_token (IDENTIFIER); + rust_assert (union_keyword->get_str () == "union"); + Location locus = union_keyword->get_locus (); + + // parse actual union name + const_TokenPtr union_name_tok = expect_token (IDENTIFIER); + if (union_name_tok == nullptr) + { + skip_after_next_block (); + return nullptr; + } + Identifier union_name = union_name_tok->get_str (); + + // parse optional generic parameters + std::vector> generic_params + = parse_generic_params_in_angles (); + + // parse optional where clause + AST::WhereClause where_clause = parse_where_clause (); + + if (!skip_token (LEFT_CURLY)) + { + skip_after_end_block (); + return nullptr; + } + + /* parse union inner items as "struct fields" because hey, syntax reuse. + * Spec said so. */ + std::vector union_fields + = parse_struct_fields ([] (TokenId id) { return id == RIGHT_CURLY; }); + + if (!skip_token (RIGHT_CURLY)) + { + // skip after somewhere + return nullptr; + } + + return std::unique_ptr ( + new AST::Union (std::move (union_name), std::move (vis), + std::move (generic_params), std::move (where_clause), + std::move (union_fields), std::move (outer_attrs), locus)); +} + +/* Parses a "constant item" (compile-time constant to maybe "inline" + * throughout the program - like constexpr). */ +template +std::unique_ptr +Parser::parse_const_item (AST::Visibility vis, + AST::AttrVec outer_attrs) +{ + Location locus = lexer.peek_token ()->get_locus (); + skip_token (CONST); + + /* get constant identifier - this is either a proper identifier or the _ + * wildcard */ + const_TokenPtr ident_tok = lexer.peek_token (); + // make default identifier the underscore wildcard one + std::string ident ("_"); + switch (ident_tok->get_id ()) + { + case IDENTIFIER: + ident = ident_tok->get_str (); + lexer.skip_token (); + break; + case UNDERSCORE: + // do nothing - identifier is already "_" + lexer.skip_token (); + break; + default: + add_error ( + Error (ident_tok->get_locus (), + "expected item name (identifier or %<_%>) in constant item " + "declaration - found %qs", + ident_tok->get_token_description ())); + + skip_after_semicolon (); + return nullptr; + } + + if (!skip_token (COLON)) + { + skip_after_semicolon (); + return nullptr; + } + + // parse constant type (required) + std::unique_ptr type = parse_type (); + + if (!skip_token (EQUAL)) + { + skip_after_semicolon (); + return nullptr; + } + + // parse constant expression (required) + std::unique_ptr expr = parse_expr (); + + if (!skip_token (SEMICOLON)) + { + // skip somewhere? + return nullptr; + } + + return std::unique_ptr ( + new AST::ConstantItem (std::move (ident), std::move (vis), std::move (type), + std::move (expr), std::move (outer_attrs), locus)); +} + +// Parses a "static item" (static storage item, with 'static lifetime). +template +std::unique_ptr +Parser::parse_static_item (AST::Visibility vis, + AST::AttrVec outer_attrs) +{ + Location locus = lexer.peek_token ()->get_locus (); + skip_token (STATIC_TOK); + + // determine whether static item is mutable + bool is_mut = false; + if (lexer.peek_token ()->get_id () == MUT) + { + is_mut = true; + lexer.skip_token (); + } + + const_TokenPtr ident_tok = expect_token (IDENTIFIER); + if (ident_tok == nullptr) + return nullptr; + + Identifier ident = ident_tok->get_str (); + + if (!skip_token (COLON)) + { + skip_after_semicolon (); + return nullptr; + } + + // parse static item type (required) + std::unique_ptr type = parse_type (); + + if (!skip_token (EQUAL)) + { + skip_after_semicolon (); + return nullptr; + } + + // parse static item expression (required) + std::unique_ptr expr = parse_expr (); + + if (!skip_token (SEMICOLON)) + { + // skip after somewhere + return nullptr; + } + + return std::unique_ptr ( + new AST::StaticItem (std::move (ident), is_mut, std::move (type), + std::move (expr), std::move (vis), + std::move (outer_attrs), locus)); +} + +// Parses a trait definition item, including unsafe ones. +template +std::unique_ptr +Parser::parse_trait (AST::Visibility vis, + AST::AttrVec outer_attrs) +{ + Location locus = lexer.peek_token ()->get_locus (); + bool is_unsafe = false; + if (lexer.peek_token ()->get_id () == UNSAFE) + { + is_unsafe = true; + lexer.skip_token (); + } + + skip_token (TRAIT); + + // parse trait name + const_TokenPtr ident_tok = expect_token (IDENTIFIER); + if (ident_tok == nullptr) + return nullptr; + + Identifier ident = ident_tok->get_str (); + + // parse generic parameters (if they exist) + std::vector> generic_params + = parse_generic_params_in_angles (); + + // create placeholder type param bounds in case they don't exist + std::vector> type_param_bounds; + + // parse type param bounds (if they exist) + if (lexer.peek_token ()->get_id () == COLON) + { + lexer.skip_token (); + + type_param_bounds = parse_type_param_bounds ( + [] (TokenId id) { return id == WHERE || id == LEFT_CURLY; }); + // type_param_bounds = parse_type_param_bounds (); + } + + // parse where clause (if it exists) + AST::WhereClause where_clause = parse_where_clause (); + + if (!skip_token (LEFT_CURLY)) + { + skip_after_end_block (); + return nullptr; + } + + // parse inner attrs (if they exist) + AST::AttrVec inner_attrs = parse_inner_attributes (); + + // parse trait items + std::vector> trait_items; + + const_TokenPtr t = lexer.peek_token (); + while (t->get_id () != RIGHT_CURLY) + { + std::unique_ptr trait_item = parse_trait_item (); + + if (trait_item == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse trait item in trait"); + add_error (std::move (error)); + + return nullptr; + } + trait_items.push_back (std::move (trait_item)); + + t = lexer.peek_token (); + } + + if (!skip_token (RIGHT_CURLY)) + { + // skip after something + return nullptr; + } + + trait_items.shrink_to_fit (); + return std::unique_ptr ( + new AST::Trait (std::move (ident), is_unsafe, std::move (generic_params), + std::move (type_param_bounds), std::move (where_clause), + std::move (trait_items), std::move (vis), + std::move (outer_attrs), std::move (inner_attrs), locus)); +} + +// Parses a trait item used inside traits (not trait, the Item). +template +std::unique_ptr +Parser::parse_trait_item () +{ + // parse outer attributes (if they exist) + AST::AttrVec outer_attrs = parse_outer_attributes (); + + // lookahead to determine what type of trait item to parse + const_TokenPtr tok = lexer.peek_token (); + switch (tok->get_id ()) + { + case TYPE: + return parse_trait_type (std::move (outer_attrs)); + case CONST: + // disambiguate with function qualifier + if (lexer.peek_token (1)->get_id () == IDENTIFIER) + { + return parse_trait_const (std::move (outer_attrs)); + } + // else, fallthrough to function + // TODO: find out how to disable gcc "implicit fallthrough" error + gcc_fallthrough (); + case UNSAFE: + case EXTERN_TOK: + case FN_TOK: { + /* function and method can't be disambiguated by lookahead alone + * (without a lot of work and waste), so either make a + * "parse_trait_function_or_method" or parse here mostly and pass in + * most parameters (or if short enough, parse whole thing here). */ + // parse function and method here + + // parse function or method qualifiers + AST::FunctionQualifiers qualifiers = parse_function_qualifiers (); + + skip_token (FN_TOK); + + // parse function or method name + const_TokenPtr ident_tok = expect_token (IDENTIFIER); + if (ident_tok == nullptr) + return nullptr; + + Identifier ident = ident_tok->get_str (); + + // parse generic params + std::vector> generic_params + = parse_generic_params_in_angles (); + + if (!skip_token (LEFT_PAREN)) + { + // skip after somewhere? + return nullptr; + } + + /* now for function vs method disambiguation - method has opening + * "self" param */ + AST::SelfParam self_param = parse_self_param (); + /* FIXME: ensure that self param doesn't accidently consume tokens for + * a function */ + bool is_method = false; + if (!self_param.is_error ()) + { + is_method = true; + + /* skip comma so function and method regular params can be parsed + * in same way */ + if (lexer.peek_token ()->get_id () == COMMA) + lexer.skip_token (); + } + + // parse trait function params + std::vector function_params + = parse_function_params ( + [] (TokenId id) { return id == RIGHT_PAREN; }); + + if (!skip_token (RIGHT_PAREN)) + { + // skip after somewhere? + return nullptr; + } + + // parse return type (optional) + std::unique_ptr return_type = parse_function_return_type (); + + // parse where clause (optional) + AST::WhereClause where_clause = parse_where_clause (); + + // parse semicolon or function definition (in block) + const_TokenPtr t = lexer.peek_token (); + std::unique_ptr definition = nullptr; + switch (t->get_id ()) + { + case SEMICOLON: + lexer.skip_token (); + // definition is already nullptr, so don't need to change it + break; + case LEFT_CURLY: + definition = parse_block_expr (); + /* FIXME: are these outer attributes meant to be passed into the + * block? */ + break; + default: + add_error ( + Error (t->get_locus (), + "expected %<;%> or definiton at the end of trait %s " + "definition - found %qs instead", + is_method ? "method" : "function", + t->get_token_description ())); + + // skip? + return nullptr; + } + + // do actual if instead of ternary for return value optimisation + if (is_method) + { + AST::TraitMethodDecl method_decl (std::move (ident), + std::move (qualifiers), + std::move (generic_params), + std::move (self_param), + std::move (function_params), + std::move (return_type), + std::move (where_clause)); + + // TODO: does this (method_decl) need move? + return std::unique_ptr ( + new AST::TraitItemMethod (std::move (method_decl), + std::move (definition), + std::move (outer_attrs), + tok->get_locus ())); + } + else + { + AST::TraitFunctionDecl function_decl (std::move (ident), + std::move (qualifiers), + std::move (generic_params), + std::move (function_params), + std::move (return_type), + std::move (where_clause)); + + return std::unique_ptr (new AST::TraitItemFunc ( + std::move (function_decl), std::move (definition), + std::move (outer_attrs), tok->get_locus ())); + } + } + default: { + // TODO: try and parse macro invocation semi - if fails, maybe error. + std::unique_ptr macro_invoc + = parse_macro_invocation_semi (outer_attrs); + + if (macro_invoc == nullptr) + { + // TODO: error? + return nullptr; + } + else + { + return macro_invoc; + } + /* FIXME: macro invocations can only start with certain tokens. be + * more picky with these? */ + } + } +} + +// Parse a typedef trait item. +template +std::unique_ptr +Parser::parse_trait_type (AST::AttrVec outer_attrs) +{ + Location locus = lexer.peek_token ()->get_locus (); + skip_token (TYPE); + + const_TokenPtr ident_tok = expect_token (IDENTIFIER); + if (ident_tok == nullptr) + return nullptr; + + Identifier ident = ident_tok->get_str (); + + std::vector> bounds; + + // parse optional colon + if (lexer.peek_token ()->get_id () == COLON) + { + lexer.skip_token (); + + // parse optional type param bounds + bounds + = parse_type_param_bounds ([] (TokenId id) { return id == SEMICOLON; }); + // bounds = parse_type_param_bounds (); + } + + if (!skip_token (SEMICOLON)) + { + // skip? + return nullptr; + } + + return std::unique_ptr ( + new AST::TraitItemType (std::move (ident), std::move (bounds), + std::move (outer_attrs), locus)); +} + +// Parses a constant trait item. +template +std::unique_ptr +Parser::parse_trait_const (AST::AttrVec outer_attrs) +{ + Location locus = lexer.peek_token ()->get_locus (); + skip_token (CONST); + + // parse constant item name + const_TokenPtr ident_tok = expect_token (IDENTIFIER); + if (ident_tok == nullptr) + return nullptr; + + Identifier ident = ident_tok->get_str (); + + if (!skip_token (COLON)) + { + skip_after_semicolon (); + return nullptr; + } + + // parse constant trait item type + std::unique_ptr type = parse_type (); + + // parse constant trait body expression, if it exists + std::unique_ptr const_body = nullptr; + if (lexer.peek_token ()->get_id () == EQUAL) + { + lexer.skip_token (); + + // expression must exist, so parse it + const_body = parse_expr (); + } + + if (!skip_token (SEMICOLON)) + { + // skip after something? + return nullptr; + } + + return std::unique_ptr ( + new AST::TraitItemConst (std::move (ident), std::move (type), + std::move (const_body), std::move (outer_attrs), + locus)); +} + +/* Parses a struct "impl" item (both inherent impl and trait impl can be + * parsed here), */ +template +std::unique_ptr +Parser::parse_impl (AST::Visibility vis, + AST::AttrVec outer_attrs) +{ + /* Note that only trait impls are allowed to be unsafe. So if unsafe, it + * must be a trait impl. However, this isn't enough for full disambiguation, + * so don't branch here. */ + Location locus = lexer.peek_token ()->get_locus (); + bool is_unsafe = false; + if (lexer.peek_token ()->get_id () == UNSAFE) + { + lexer.skip_token (); + is_unsafe = true; + } + + if (!skip_token (IMPL)) + { + skip_after_next_block (); + return nullptr; + } + + // parse generic params (shared by trait and inherent impls) + std::vector> generic_params + = parse_generic_params_in_angles (); + + // Again, trait impl-only feature, but optional one, so can be used for + // branching yet. + bool has_exclam = false; + if (lexer.peek_token ()->get_id () == EXCLAM) + { + lexer.skip_token (); + has_exclam = true; + } + + /* FIXME: code that doesn't look shit for TypePath. Also, make sure this + * doesn't parse too much and not work. */ + AST::TypePath type_path = parse_type_path (); + if (type_path.is_error () || lexer.peek_token ()->get_id () != FOR) + { + /* cannot parse type path (or not for token next, at least), so must be + * inherent impl */ + + // hacky conversion of TypePath stack object to Type pointer + std::unique_ptr type = nullptr; + if (!type_path.is_error ()) + type = std::unique_ptr ( + new AST::TypePath (std::move (type_path))); + else + type = parse_type (); + + // Type is required, so error if null + if (type == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "could not parse type in inherent impl"); + add_error (std::move (error)); + + skip_after_next_block (); + return nullptr; + } + + // parse optional where clause + AST::WhereClause where_clause = parse_where_clause (); + + if (!skip_token (LEFT_CURLY)) + { + // TODO: does this still skip properly? + skip_after_end_block (); + return nullptr; + } + + // parse inner attributes (optional) + AST::AttrVec inner_attrs = parse_inner_attributes (); + + // parse inherent impl items + std::vector> impl_items; + + const_TokenPtr t = lexer.peek_token (); + while (t->get_id () != RIGHT_CURLY) + { + std::unique_ptr impl_item + = parse_inherent_impl_item (); + + if (impl_item == nullptr) + { + Error error ( + lexer.peek_token ()->get_locus (), + "failed to parse inherent impl item in inherent impl"); + add_error (std::move (error)); + + return nullptr; + } + + impl_items.push_back (std::move (impl_item)); + + t = lexer.peek_token (); + } + + if (!skip_token (RIGHT_CURLY)) + { + // skip somewhere + return nullptr; + } + + // DEBUG + rust_debug ("successfully parsed inherent impl"); + + impl_items.shrink_to_fit (); + + return std::unique_ptr (new AST::InherentImpl ( + std::move (impl_items), std::move (generic_params), std::move (type), + std::move (where_clause), std::move (vis), std::move (inner_attrs), + std::move (outer_attrs), locus)); + } + else + { + // type path must both be valid and next token is for, so trait impl + if (!skip_token (FOR)) + { + skip_after_next_block (); + return nullptr; + } + + // parse type + std::unique_ptr type = parse_type (); + // ensure type is included as it is required + if (type == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "could not parse type in trait impl"); + add_error (std::move (error)); + + skip_after_next_block (); + return nullptr; + } + + // parse optional where clause + AST::WhereClause where_clause = parse_where_clause (); + + if (!skip_token (LEFT_CURLY)) + { + // TODO: does this still skip properly? + skip_after_end_block (); + return nullptr; + } + + // parse inner attributes (optional) + AST::AttrVec inner_attrs = parse_inner_attributes (); + + // parse trait impl items + std::vector> impl_items; + + const_TokenPtr t = lexer.peek_token (); + while (t->get_id () != RIGHT_CURLY) + { + std::unique_ptr impl_item + = parse_trait_impl_item (); + + if (impl_item == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse trait impl item in trait impl"); + add_error (std::move (error)); + + return nullptr; + } + + impl_items.push_back (std::move (impl_item)); + + t = lexer.peek_token (); + + // DEBUG + rust_debug ("successfully parsed a trait impl item"); + } + // DEBUG + rust_debug ("successfully finished trait impl items"); + + if (!skip_token (RIGHT_CURLY)) + { + // skip somewhere + return nullptr; + } + + // DEBUG + rust_debug ("successfully parsed trait impl"); + + impl_items.shrink_to_fit (); + + return std::unique_ptr ( + new AST::TraitImpl (std::move (type_path), is_unsafe, has_exclam, + std::move (impl_items), std::move (generic_params), + std::move (type), std::move (where_clause), + std::move (vis), std::move (inner_attrs), + std::move (outer_attrs), locus)); + } +} + +// Parses a single inherent impl item (item inside an inherent impl block). +template +std::unique_ptr +Parser::parse_inherent_impl_item () +{ + // parse outer attributes (if they exist) + AST::AttrVec outer_attrs = parse_outer_attributes (); + + // TODO: cleanup - currently an unreadable mess + + // branch on next token: + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case IDENTIFIER: + // FIXME: Arthur: Do we need to some lookahead here? + return parse_macro_invocation_semi (outer_attrs); + case SUPER: + case SELF: + case CRATE: + case PUB: { + // visibility, so not a macro invocation semi - must be constant, + // function, or method + AST::Visibility vis = parse_visibility (); + + // TODO: is a recursive call to parse_inherent_impl_item better? + switch (lexer.peek_token ()->get_id ()) + { + case EXTERN_TOK: + case UNSAFE: + case FN_TOK: + // function or method + return parse_inherent_impl_function_or_method (std::move (vis), + std::move ( + outer_attrs)); + case CONST: + // lookahead to resolve production - could be function/method or + // const item + t = lexer.peek_token (1); + + switch (t->get_id ()) + { + case IDENTIFIER: + case UNDERSCORE: + return parse_const_item (std::move (vis), + std::move (outer_attrs)); + case UNSAFE: + case EXTERN_TOK: + case FN_TOK: + return parse_inherent_impl_function_or_method (std::move (vis), + std::move ( + outer_attrs)); + default: + add_error (Error (t->get_locus (), + "unexpected token %qs in some sort of const " + "item in inherent impl", + t->get_token_description ())); + + lexer.skip_token (1); // TODO: is this right thing to do? + return nullptr; + } + default: + add_error ( + Error (t->get_locus (), + "unrecognised token %qs for item in inherent impl", + t->get_token_description ())); + // skip? + return nullptr; + } + } + case EXTERN_TOK: + case UNSAFE: + case FN_TOK: + // function or method + return parse_inherent_impl_function_or_method ( + AST::Visibility::create_private (), std::move (outer_attrs)); + case CONST: + /* lookahead to resolve production - could be function/method or const + * item */ + t = lexer.peek_token (1); + + switch (t->get_id ()) + { + case IDENTIFIER: + case UNDERSCORE: + return parse_const_item (AST::Visibility::create_private (), + std::move (outer_attrs)); + case UNSAFE: + case EXTERN_TOK: + case FN_TOK: + return parse_inherent_impl_function_or_method ( + AST::Visibility::create_private (), std::move (outer_attrs)); + default: + add_error (Error (t->get_locus (), + "unexpected token %qs in some sort of const item " + "in inherent impl", + t->get_token_description ())); + + lexer.skip_token (1); // TODO: is this right thing to do? + return nullptr; + } + gcc_unreachable (); + default: + add_error (Error (t->get_locus (), + "unrecognised token %qs for item in inherent impl", + t->get_token_description ())); + + // skip? + return nullptr; + } +} + +/* For internal use only by parse_inherent_impl_item() - splits giant method + * into smaller ones and prevents duplication of logic. Strictly, this parses + * a function or method item inside an inherent impl item block. */ +// TODO: make this a templated function with "return type" as type param - +// InherentImplItem is this specialisation of the template while TraitImplItem +// will be the other. +template +std::unique_ptr +Parser::parse_inherent_impl_function_or_method ( + AST::Visibility vis, AST::AttrVec outer_attrs) +{ + Location locus = lexer.peek_token ()->get_locus (); + // parse function or method qualifiers + AST::FunctionQualifiers qualifiers = parse_function_qualifiers (); + + skip_token (FN_TOK); + + // parse function or method name + const_TokenPtr ident_tok = expect_token (IDENTIFIER); + if (ident_tok == nullptr) + return nullptr; + + Identifier ident = ident_tok->get_str (); + + // parse generic params + std::vector> generic_params + = parse_generic_params_in_angles (); + + if (!skip_token (LEFT_PAREN)) + { + // skip after somewhere? + return nullptr; + } + + // now for function vs method disambiguation - method has opening "self" + // param + AST::SelfParam self_param = parse_self_param (); + /* FIXME: ensure that self param doesn't accidently consume tokens for a + * function one idea is to lookahead up to 4 tokens to see whether self is + * one of them */ + bool is_method = false; + if (!self_param.is_error ()) + { + is_method = true; + + /* skip comma so function and method regular params can be parsed in + * same way */ + if (lexer.peek_token ()->get_id () == COMMA) + lexer.skip_token (); + } + + // parse trait function params + std::vector function_params + = parse_function_params ([] (TokenId id) { return id == RIGHT_PAREN; }); + + if (!skip_token (RIGHT_PAREN)) + { + skip_after_end_block (); + return nullptr; + } + + // parse return type (optional) + std::unique_ptr return_type = parse_function_return_type (); + + // parse where clause (optional) + AST::WhereClause where_clause = parse_where_clause (); + + // parse function definition (in block) - semicolon not allowed + if (lexer.peek_token ()->get_id () == SEMICOLON) + { + Error error (lexer.peek_token ()->get_locus (), + "%s declaration in inherent impl not allowed - must have " + "a definition", + is_method ? "method" : "function"); + add_error (std::move (error)); + + lexer.skip_token (); + return nullptr; + } + std::unique_ptr body = parse_block_expr (); + if (body == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "could not parse definition in inherent impl %s definition", + is_method ? "method" : "function"); + add_error (std::move (error)); + + skip_after_end_block (); + return nullptr; + } + + // do actual if instead of ternary for return value optimisation + if (is_method) + { + return std::unique_ptr ( + new AST::Method (std::move (ident), std::move (qualifiers), + std::move (generic_params), std::move (self_param), + std::move (function_params), std::move (return_type), + std::move (where_clause), std::move (body), + std::move (vis), std::move (outer_attrs), locus)); + } + else + { + return std::unique_ptr ( + new AST::Function (std::move (ident), std::move (qualifiers), + std::move (generic_params), + std::move (function_params), std::move (return_type), + std::move (where_clause), std::move (body), + std::move (vis), std::move (outer_attrs), locus)); + } +} + +// Parses a single trait impl item (item inside a trait impl block). +template +std::unique_ptr +Parser::parse_trait_impl_item () +{ + // parse outer attributes (if they exist) + AST::AttrVec outer_attrs = parse_outer_attributes (); + + // TODO: clean this function up, it is basically unreadable hacks + + // branch on next token: + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case IDENTIFIER: + case SUPER: + case SELF: + case CRATE: + case DOLLAR_SIGN: + // these seem to be SimplePath tokens, so this is a macro invocation + // semi + return parse_macro_invocation_semi (std::move (outer_attrs)); + case TYPE: + return parse_type_alias (AST::Visibility::create_private (), + std::move (outer_attrs)); + case PUB: { + // visibility, so not a macro invocation semi - must be constant, + // function, or method + AST::Visibility vis = parse_visibility (); + + // TODO: is a recursive call to parse_trait_impl_item better? + switch (lexer.peek_token ()->get_id ()) + { + case TYPE: + return parse_type_alias (std::move (vis), std::move (outer_attrs)); + case EXTERN_TOK: + case UNSAFE: + case FN_TOK: + // function or method + return parse_trait_impl_function_or_method (std::move (vis), + std::move ( + outer_attrs)); + case CONST: + // lookahead to resolve production - could be function/method or + // const item + t = lexer.peek_token (1); + + switch (t->get_id ()) + { + case IDENTIFIER: + case UNDERSCORE: + return parse_const_item (std::move (vis), + std::move (outer_attrs)); + case UNSAFE: + case EXTERN_TOK: + case FN_TOK: + return parse_trait_impl_function_or_method (std::move (vis), + std::move ( + outer_attrs)); + default: + add_error (Error (t->get_locus (), + "unexpected token %qs in some sort of const " + "item in trait impl", + t->get_token_description ())); + + lexer.skip_token (1); // TODO: is this right thing to do? + return nullptr; + } + default: + add_error (Error (t->get_locus (), + "unrecognised token %qs for item in trait impl", + t->get_token_description ())); + + // skip? + return nullptr; + } + } + case EXTERN_TOK: + case UNSAFE: + case FN_TOK: + // function or method + return parse_trait_impl_function_or_method ( + AST::Visibility::create_private (), std::move (outer_attrs)); + case CONST: + // lookahead to resolve production - could be function/method or const + // item + t = lexer.peek_token (1); + + switch (t->get_id ()) + { + case IDENTIFIER: + case UNDERSCORE: + return parse_const_item (AST::Visibility::create_private (), + std::move (outer_attrs)); + case UNSAFE: + case EXTERN_TOK: + case FN_TOK: + return parse_trait_impl_function_or_method ( + AST::Visibility::create_private (), std::move (outer_attrs)); + default: + add_error (Error ( + t->get_locus (), + "unexpected token %qs in some sort of const item in trait impl", + t->get_token_description ())); + + lexer.skip_token (1); // TODO: is this right thing to do? + return nullptr; + } + gcc_unreachable (); + default: + add_error (Error (t->get_locus (), + "unrecognised token %qs for item in trait impl", + t->get_token_description ())); + + // skip? + return nullptr; + } +} + +/* For internal use only by parse_trait_impl_item() - splits giant method into + * smaller ones and prevents duplication of logic. Strictly, this parses a + * function or method item inside a trait impl item block. */ +template +std::unique_ptr +Parser::parse_trait_impl_function_or_method ( + AST::Visibility vis, AST::AttrVec outer_attrs) +{ + // this shares virtually all logic with + // parse_inherent_impl_function_or_method + // - template? + Location locus = lexer.peek_token ()->get_locus (); + + // parse function or method qualifiers + AST::FunctionQualifiers qualifiers = parse_function_qualifiers (); + + skip_token (FN_TOK); + + // parse function or method name + const_TokenPtr ident_tok = expect_token (IDENTIFIER); + if (ident_tok == nullptr) + { + return nullptr; + } + Identifier ident = ident_tok->get_str (); + + // DEBUG: + rust_debug ( + "about to start parsing generic params in trait impl function or method"); + + // parse generic params + std::vector> generic_params + = parse_generic_params_in_angles (); + + // DEBUG: + rust_debug ( + "finished parsing generic params in trait impl function or method"); + + if (!skip_token (LEFT_PAREN)) + { + // skip after somewhere? + return nullptr; + } + + // now for function vs method disambiguation - method has opening "self" + // param + AST::SelfParam self_param = parse_self_param (); + // FIXME: ensure that self param doesn't accidently consume tokens for a + // function + bool is_method = false; + if (!self_param.is_error ()) + { + is_method = true; + + // skip comma so function and method regular params can be parsed in + // same way + if (lexer.peek_token ()->get_id () == COMMA) + { + lexer.skip_token (); + } + + // DEBUG + rust_debug ("successfully parsed self param in method trait impl item"); + } + + // DEBUG + rust_debug ( + "started to parse function params in function or method trait impl item"); + + // parse trait function params (only if next token isn't right paren) + std::vector function_params; + if (lexer.peek_token ()->get_id () != RIGHT_PAREN) + { + function_params + = parse_function_params ([] (TokenId id) { return id == RIGHT_PAREN; }); + + if (function_params.empty ()) + { + Error error ( + lexer.peek_token ()->get_locus (), + "failed to parse function params in trait impl %s definition", + is_method ? "method" : "function"); + add_error (std::move (error)); + + skip_after_next_block (); + return nullptr; + } + } + + // DEBUG + rust_debug ("successfully parsed function params in function or method " + "trait impl item"); + + if (!skip_token (RIGHT_PAREN)) + { + skip_after_next_block (); + return nullptr; + } + + // parse return type (optional) + std::unique_ptr return_type = parse_function_return_type (); + + // DEBUG + rust_debug ( + "successfully parsed return type in function or method trait impl item"); + + // parse where clause (optional) + AST::WhereClause where_clause = parse_where_clause (); + + // DEBUG + rust_debug ( + "successfully parsed where clause in function or method trait impl item"); + + // parse function definition (in block) - semicolon not allowed + if (lexer.peek_token ()->get_id () == SEMICOLON) + { + Error error ( + lexer.peek_token ()->get_locus (), + "%s declaration in trait impl not allowed - must have a definition", + is_method ? "method" : "function"); + add_error (std::move (error)); + + lexer.skip_token (); + return nullptr; + } + std::unique_ptr body = parse_block_expr (); + if (body == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "could not parse definition in trait impl %s definition", + is_method ? "method" : "function"); + add_error (std::move (error)); + + skip_after_end_block (); + return nullptr; + } + + // do actual if instead of ternary for return value optimisation + if (is_method) + { + return std::unique_ptr ( + new AST::Method (std::move (ident), std::move (qualifiers), + std::move (generic_params), std::move (self_param), + std::move (function_params), std::move (return_type), + std::move (where_clause), std::move (body), + std::move (vis), std::move (outer_attrs), locus)); + } + else + { + return std::unique_ptr ( + new AST::Function (std::move (ident), std::move (qualifiers), + std::move (generic_params), + std::move (function_params), std::move (return_type), + std::move (where_clause), std::move (body), + std::move (vis), std::move (outer_attrs), locus)); + } +} + +// Parses an extern block of declarations. +template +std::unique_ptr +Parser::parse_extern_block (AST::Visibility vis, + AST::AttrVec outer_attrs) +{ + Location locus = lexer.peek_token ()->get_locus (); + skip_token (EXTERN_TOK); + + // detect optional abi name + std::string abi; + const_TokenPtr next_tok = lexer.peek_token (); + if (next_tok->get_id () == STRING_LITERAL) + { + lexer.skip_token (); + abi = next_tok->get_str (); + } + + if (!skip_token (LEFT_CURLY)) + { + skip_after_end_block (); + return nullptr; + } + + AST::AttrVec inner_attrs = parse_inner_attributes (); + + // parse declarations inside extern block + std::vector> extern_items; + + const_TokenPtr t = lexer.peek_token (); + while (t->get_id () != RIGHT_CURLY) + { + std::unique_ptr extern_item = parse_external_item (); + + if (extern_item == nullptr) + { + Error error (t->get_locus (), + "failed to parse external item despite not reaching " + "end of extern block"); + add_error (std::move (error)); + + return nullptr; + } + + extern_items.push_back (std::move (extern_item)); + + t = lexer.peek_token (); + } + + if (!skip_token (RIGHT_CURLY)) + { + // skip somewhere + return nullptr; + } + + extern_items.shrink_to_fit (); + + return std::unique_ptr ( + new AST::ExternBlock (std::move (abi), std::move (extern_items), + std::move (vis), std::move (inner_attrs), + std::move (outer_attrs), locus)); +} + +// Parses a single extern block item (static or function declaration). +template +std::unique_ptr +Parser::parse_external_item () +{ + // parse optional outer attributes + AST::AttrVec outer_attrs = parse_outer_attributes (); + + Location locus = lexer.peek_token ()->get_locus (); + + // parse optional visibility + AST::Visibility vis = parse_visibility (); + + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case IDENTIFIER: + return parse_macro_invocation_semi (outer_attrs); + case STATIC_TOK: { + // parse extern static item + lexer.skip_token (); + + // parse mut (optional) + bool has_mut = false; + if (lexer.peek_token ()->get_id () == MUT) + { + lexer.skip_token (); + has_mut = true; + } + + // parse identifier + const_TokenPtr ident_tok = expect_token (IDENTIFIER); + if (ident_tok == nullptr) + { + skip_after_semicolon (); + return nullptr; + } + Identifier ident = ident_tok->get_str (); + + if (!skip_token (COLON)) + { + skip_after_semicolon (); + return nullptr; + } + + // parse type (required) + std::unique_ptr type = parse_type (); + if (type == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse type in external static item"); + add_error (std::move (error)); + + skip_after_semicolon (); + return nullptr; + } + + if (!skip_token (SEMICOLON)) + { + // skip after somewhere? + return nullptr; + } + + return std::unique_ptr ( + new AST::ExternalStaticItem (std::move (ident), std::move (type), + has_mut, std::move (vis), + std::move (outer_attrs), locus)); + } + case FN_TOK: { + // parse extern function declaration item + // skip function token + lexer.skip_token (); + + // parse identifier + const_TokenPtr ident_tok = expect_token (IDENTIFIER); + if (ident_tok == nullptr) + { + skip_after_semicolon (); + return nullptr; + } + Identifier ident = ident_tok->get_str (); + + // parse (optional) generic params + std::vector> generic_params + = parse_generic_params_in_angles (); + + if (!skip_token (LEFT_PAREN)) + { + skip_after_semicolon (); + return nullptr; + } + + // parse parameters + std::vector function_params; + bool is_variadic = false; + AST::AttrVec variadic_attrs; + + const_TokenPtr t = lexer.peek_token (); + while (t->get_id () != RIGHT_PAREN) + { + AST::AttrVec maybe_variadic_attrs = parse_outer_attributes (); + if (lexer.peek_token ()->get_id () == ELLIPSIS) + { + // variadic - use attrs for this + lexer.skip_token (); + is_variadic = true; + variadic_attrs = std::move (maybe_variadic_attrs); + t = lexer.peek_token (); + + if (t->get_id () != RIGHT_PAREN) + { + Error error (t->get_locus (), + "expected right parentheses after variadic in " + "named function " + "parameters, found %qs", + t->get_token_description ()); + add_error (std::move (error)); + + skip_after_semicolon (); + return nullptr; + } + + break; + } + + AST::NamedFunctionParam param + = parse_named_function_param (std::move (maybe_variadic_attrs)); + if (param.is_error ()) + { + Error error (t->get_locus (), "could not parse named function " + "parameter in external function"); + add_error (std::move (error)); + + skip_after_semicolon (); + return nullptr; + } + function_params.push_back (std::move (param)); + + if (lexer.peek_token ()->get_id () != COMMA) + break; + + // skip comma + lexer.skip_token (); + t = lexer.peek_token (); + } + + if (!skip_token (RIGHT_PAREN)) + { + skip_after_semicolon (); + return nullptr; + } + + // parse (optional) return type + std::unique_ptr return_type = parse_function_return_type (); + + // parse (optional) where clause + AST::WhereClause where_clause = parse_where_clause (); + + if (!skip_token (SEMICOLON)) + { + // skip somewhere? + return nullptr; + } + + function_params.shrink_to_fit (); + + return std::unique_ptr ( + new AST::ExternalFunctionItem ( + std::move (ident), std::move (generic_params), + std::move (return_type), std::move (where_clause), + std::move (function_params), is_variadic, + std::move (variadic_attrs), std::move (vis), + std::move (outer_attrs), locus)); + } + default: + // error + add_error ( + Error (t->get_locus (), + "unrecognised token %qs in extern block item declaration", + t->get_token_description ())); + + skip_after_semicolon (); + return nullptr; + } +} + +/* Parses an extern block function param (with "pattern" being _ or an + * identifier). */ +template +AST::NamedFunctionParam +Parser::parse_named_function_param ( + AST::AttrVec outer_attrs) +{ + // parse identifier/_ + std::string name; + + const_TokenPtr t = lexer.peek_token (); + Location name_location = t->get_locus (); + switch (t->get_id ()) + { + case IDENTIFIER: + name = t->get_str (); + lexer.skip_token (); + break; + case UNDERSCORE: + name = "_"; + lexer.skip_token (); + break; + default: + // this is not a function param, but not necessarily an error + return AST::NamedFunctionParam::create_error (); + } + + if (!skip_token (COLON)) + { + // skip after somewhere? + return AST::NamedFunctionParam::create_error (); + } + + // parse (required) type + std::unique_ptr param_type = parse_type (); + if (param_type == nullptr) + { + Error error ( + lexer.peek_token ()->get_locus (), + "could not parse param type in extern block function declaration"); + add_error (std::move (error)); + + skip_after_semicolon (); + return AST::NamedFunctionParam::create_error (); + } + + return AST::NamedFunctionParam (std::move (name), std::move (param_type), + std::move (outer_attrs), name_location); +} + +// Parses a statement (will further disambiguate any statement). +template +std::unique_ptr +Parser::parse_stmt (ParseRestrictions restrictions) +{ + // quick exit for empty statement + // FIXME: Can we have empty statements without semicolons? Just nothing? + const_TokenPtr t = lexer.peek_token (); + if (t->get_id () == SEMICOLON) + { + lexer.skip_token (); + return std::unique_ptr ( + new AST::EmptyStmt (t->get_locus ())); + } + + // parse outer attributes + AST::AttrVec outer_attrs = parse_outer_attributes (); + + // parsing this will be annoying because of the many different possibilities + /* best may be just to copy paste in parse_item switch, and failing that try + * to parse outer attributes, and then pass them in to either a let + * statement or (fallback) expression statement. */ + // FIXME: think of a way to do this without such a large switch? + t = lexer.peek_token (); + switch (t->get_id ()) + { + case LET: + // let statement + return parse_let_stmt (std::move (outer_attrs), restrictions); + case PUB: + case MOD: + case EXTERN_TOK: + case USE: + case FN_TOK: + case TYPE: + case STRUCT_TOK: + case ENUM_TOK: + case CONST: + case STATIC_TOK: + case TRAIT: + case IMPL: + /* TODO: implement union keyword but not really because of + * context-dependence crappy hack way to parse a union written below to + * separate it from the good code. */ + // case UNION: + case UNSAFE: // maybe - unsafe traits are a thing + /* if any of these (should be all possible VisItem prefixes), parse a + * VisItem can't parse item because would require reparsing outer + * attributes */ + return parse_vis_item (std::move (outer_attrs)); + break; + case SUPER: + case SELF: + case CRATE: + case DOLLAR_SIGN: + // almost certainly macro invocation semi + return parse_macro_item (std::move (outer_attrs)); + break; + // crappy hack to do union "keyword" + case IDENTIFIER: + if (t->get_str () == "union" + && lexer.peek_token (1)->get_id () == IDENTIFIER) + { + return parse_vis_item (std::move (outer_attrs)); + // or should this go straight to parsing union? + } + else if (t->get_str () == "macro_rules") + { + // macro_rules! macro item + return parse_macro_item (std::move (outer_attrs)); + } + else if (lexer.peek_token (1)->get_id () == SCOPE_RESOLUTION + || lexer.peek_token (1)->get_id () == EXCLAM) + { + // FIXME: ensure doesn't take any expressions by mistake + /* path (probably) or macro invocation, so probably a macro + * invocation semi */ + return parse_macro_item (std::move (outer_attrs)); + } + gcc_fallthrough (); + // TODO: find out how to disable gcc "implicit fallthrough" warning + default: + // fallback: expression statement + return parse_expr_stmt (std::move (outer_attrs), restrictions); + break; + } +} + +// Parses a let statement. +template +std::unique_ptr +Parser::parse_let_stmt (AST::AttrVec outer_attrs, + ParseRestrictions restrictions) +{ + Location locus = lexer.peek_token ()->get_locus (); + skip_token (LET); + + // parse pattern (required) + std::unique_ptr pattern = parse_pattern (); + if (pattern == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse pattern in let statement"); + add_error (std::move (error)); + + skip_after_semicolon (); + return nullptr; + } + + // parse type declaration (optional) + std::unique_ptr type = nullptr; + if (lexer.peek_token ()->get_id () == COLON) + { + // must have a type declaration + lexer.skip_token (); + + type = parse_type (); + if (type == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse type in let statement"); + add_error (std::move (error)); + + skip_after_semicolon (); + return nullptr; + } + } + + // parse expression to set variable to (optional) + std::unique_ptr expr = nullptr; + if (lexer.peek_token ()->get_id () == EQUAL) + { + // must have an expression + lexer.skip_token (); + + expr = parse_expr (); + if (expr == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse expression in let statement"); + add_error (std::move (error)); + + skip_after_semicolon (); + return nullptr; + } + } + + if (restrictions.consume_semi) + if (!skip_token (SEMICOLON)) + return nullptr; + + return std::unique_ptr ( + new AST::LetStmt (std::move (pattern), std::move (expr), std::move (type), + std::move (outer_attrs), locus)); +} + +// Parses a type path. +template +AST::TypePath +Parser::parse_type_path () +{ + bool has_opening_scope_resolution = false; + Location locus = lexer.peek_token ()->get_locus (); + if (lexer.peek_token ()->get_id () == SCOPE_RESOLUTION) + { + has_opening_scope_resolution = true; + lexer.skip_token (); + } + + // create segment vector + std::vector> segments; + + // parse required initial segment + std::unique_ptr initial_segment + = parse_type_path_segment (); + if (initial_segment == nullptr) + { + // skip after somewhere? + // don't necessarily throw error but yeah + return AST::TypePath::create_error (); + } + segments.push_back (std::move (initial_segment)); + + // parse optional segments (as long as scope resolution operator exists) + const_TokenPtr t = lexer.peek_token (); + while (t->get_id () == SCOPE_RESOLUTION) + { + // skip scope resolution operator + lexer.skip_token (); + + // parse the actual segment - it is an error if it doesn't exist now + std::unique_ptr segment + = parse_type_path_segment (); + if (segment == nullptr) + { + // skip after somewhere? + Error error (t->get_locus (), "could not parse type path segment"); + add_error (std::move (error)); + + return AST::TypePath::create_error (); + } + + segments.push_back (std::move (segment)); + + t = lexer.peek_token (); + } + + segments.shrink_to_fit (); + + return AST::TypePath (std::move (segments), locus, + has_opening_scope_resolution); +} + +template +AST::GenericArg +Parser::parse_generic_arg () +{ + auto tok = lexer.peek_token (); + std::unique_ptr expr = nullptr; + + switch (tok->get_id ()) + { + case IDENTIFIER: { + // This is a bit of a weird situation: With an identifier token, we + // could either have a valid type or a macro (FIXME: anything else?). So + // we need one bit of lookahead to differentiate if this is really + auto next_tok = lexer.peek_token (1); + if (next_tok->get_id () == EXCLAM) + { + auto type = parse_type (); + if (type) + return AST::GenericArg::create_type (std::move (type)); + else + return AST::GenericArg::create_error (); + } + lexer.skip_token (); + return AST::GenericArg::create_ambiguous (tok->get_str (), + tok->get_locus ()); + } + case LEFT_CURLY: + expr = parse_block_expr (); + break; + case MINUS: + case STRING_LITERAL: + case CHAR_LITERAL: + case INT_LITERAL: + case FLOAT_LITERAL: + case TRUE_LITERAL: + case FALSE_LITERAL: + expr = parse_literal_expr (); + break; + // FIXME: Because of this, error reporting is garbage for const generic + // parameter's default values + default: { + auto type = parse_type (); + // FIXME: Find a better way to do this? + if (type) + return AST::GenericArg::create_type (std::move (type)); + else + return AST::GenericArg::create_error (); + } + } + + if (!expr) + return AST::GenericArg::create_error (); + + return AST::GenericArg::create_const (std::move (expr)); +} + +// Parses the generic arguments in each path segment. +template +AST::GenericArgs +Parser::parse_path_generic_args () +{ + if (!skip_token (LEFT_ANGLE)) + { + // skip after somewhere? + return AST::GenericArgs::create_empty (); + } + + // We need to parse all lifetimes, then parse types and const generics in + // any order. + + // try to parse lifetimes first + std::vector lifetime_args; + + const_TokenPtr t = lexer.peek_token (); + Location locus = t->get_locus (); + while (!is_right_angle_tok (t->get_id ())) + { + AST::Lifetime lifetime = parse_lifetime (); + if (lifetime.is_error ()) + { + // not necessarily an error + break; + } + + lifetime_args.push_back (std::move (lifetime)); + + // if next token isn't comma, then it must be end of list + if (lexer.peek_token ()->get_id () != COMMA) + { + break; + } + // skip comma + lexer.skip_token (); + + t = lexer.peek_token (); + } + + // try to parse types and const generics second + std::vector generic_args; + + // TODO: think of better control structure + t = lexer.peek_token (); + while (!is_right_angle_tok (t->get_id ())) + { + // FIXME: Is it fine to break if there is one binding? Can't there be + // bindings in between types? + + // ensure not binding being parsed as type accidently + if (t->get_id () == IDENTIFIER + && lexer.peek_token (1)->get_id () == EQUAL) + break; + + auto arg = parse_generic_arg (); + if (!arg.is_error ()) + { + generic_args.emplace_back (std::move (arg)); + } + + // FIXME: Do we need to break if we encounter an error? + + // if next token isn't comma, then it must be end of list + if (lexer.peek_token ()->get_id () != COMMA) + break; + + // skip comma + lexer.skip_token (); + t = lexer.peek_token (); + } + + // try to parse bindings third + std::vector binding_args; + + // TODO: think of better control structure + t = lexer.peek_token (); + while (!is_right_angle_tok (t->get_id ())) + { + AST::GenericArgsBinding binding = parse_generic_args_binding (); + if (binding.is_error ()) + { + // not necessarily an error + break; + } + + binding_args.push_back (std::move (binding)); + + // if next token isn't comma, then it must be end of list + if (lexer.peek_token ()->get_id () != COMMA) + { + break; + } + // skip comma + lexer.skip_token (); + + t = lexer.peek_token (); + } + + // skip any trailing commas + if (lexer.peek_token ()->get_id () == COMMA) + lexer.skip_token (); + + if (!skip_generics_right_angle ()) + return AST::GenericArgs::create_empty (); + + lifetime_args.shrink_to_fit (); + generic_args.shrink_to_fit (); + binding_args.shrink_to_fit (); + + return AST::GenericArgs (std::move (lifetime_args), std::move (generic_args), + std::move (binding_args), locus); +} + +// Parses a binding in a generic args path segment. +template +AST::GenericArgsBinding +Parser::parse_generic_args_binding () +{ + const_TokenPtr ident_tok = lexer.peek_token (); + if (ident_tok->get_id () != IDENTIFIER) + { + // allow non error-inducing use + // skip somewhere? + return AST::GenericArgsBinding::create_error (); + } + lexer.skip_token (); + Identifier ident = ident_tok->get_str (); + + if (!skip_token (EQUAL)) + { + // skip after somewhere? + return AST::GenericArgsBinding::create_error (); + } + + // parse type (required) + std::unique_ptr type = parse_type (); + if (type == nullptr) + { + // skip somewhere? + return AST::GenericArgsBinding::create_error (); + } + + return AST::GenericArgsBinding (std::move (ident), std::move (type), + ident_tok->get_locus ()); +} + +/* Parses a single type path segment (not including opening scope resolution, + * but includes any internal ones). Includes generic args or type path + * functions too. */ +template +std::unique_ptr +Parser::parse_type_path_segment () +{ + Location locus = lexer.peek_token ()->get_locus (); + // parse ident segment part + AST::PathIdentSegment ident_segment = parse_path_ident_segment (); + if (ident_segment.is_error ()) + { + // not necessarily an error + return nullptr; + } + + /* lookahead to determine if variants exist - only consume scope resolution + * then */ + bool has_separating_scope_resolution = false; + const_TokenPtr next = lexer.peek_token (1); + if (lexer.peek_token ()->get_id () == SCOPE_RESOLUTION + && (next->get_id () == LEFT_ANGLE || next->get_id () == LEFT_PAREN)) + { + has_separating_scope_resolution = true; + lexer.skip_token (); + } + + // branch into variants on next token + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case LEFT_ANGLE: { + // parse generic args + AST::GenericArgs generic_args = parse_path_generic_args (); + + return std::unique_ptr ( + new AST::TypePathSegmentGeneric (std::move (ident_segment), + has_separating_scope_resolution, + std::move (generic_args), locus)); + } + case LEFT_PAREN: { + // parse type path function + AST::TypePathFunction type_path_function + = parse_type_path_function (locus); + + if (type_path_function.is_error ()) + { + // skip after somewhere? + return nullptr; + } + + return std::unique_ptr ( + new AST::TypePathSegmentFunction (std::move (ident_segment), + has_separating_scope_resolution, + std::move (type_path_function), + locus)); + } + default: + // neither of them + return std::unique_ptr ( + new AST::TypePathSegment (std::move (ident_segment), + has_separating_scope_resolution, locus)); + } + gcc_unreachable (); +} + +// Parses a function call representation inside a type path. +template +AST::TypePathFunction +Parser::parse_type_path_function (Location id_location) +{ + if (!skip_token (LEFT_PAREN)) + { + // skip somewhere? + return AST::TypePathFunction::create_error (); + } + + // parse function inputs + std::vector> inputs; + + while (lexer.peek_token ()->get_id () != RIGHT_PAREN) + { + std::unique_ptr type = parse_type (); + if (type == nullptr) + { + /* this is an error as there should've been a ')' there if there + * wasn't a type */ + Error error ( + lexer.peek_token ()->get_locus (), + "failed to parse type in parameters of type path function"); + add_error (std::move (error)); + + // skip somewhere? + return AST::TypePathFunction::create_error (); + } + + inputs.push_back (std::move (type)); + + // skip commas, including trailing commas + if (lexer.peek_token ()->get_id () != COMMA) + break; + + lexer.skip_token (); + } + + if (!skip_token (RIGHT_PAREN)) + { + // skip somewhere? + return AST::TypePathFunction::create_error (); + } + + // parse optional return type + std::unique_ptr return_type = parse_function_return_type (); + + inputs.shrink_to_fit (); + return AST::TypePathFunction (std::move (inputs), id_location, + std::move (return_type)); +} + +// Parses a path inside an expression that allows generic arguments. +template +AST::PathInExpression +Parser::parse_path_in_expression () +{ + Location locus = Linemap::unknown_location (); + bool has_opening_scope_resolution = false; + if (lexer.peek_token ()->get_id () == SCOPE_RESOLUTION) + { + has_opening_scope_resolution = true; + + locus = lexer.peek_token ()->get_locus (); + + lexer.skip_token (); + } + + // create segment vector + std::vector segments; + + if (locus == Linemap::unknown_location ()) + { + locus = lexer.peek_token ()->get_locus (); + } + + // parse required initial segment + AST::PathExprSegment initial_segment = parse_path_expr_segment (); + if (initial_segment.is_error ()) + { + // skip after somewhere? + // don't necessarily throw error but yeah + return AST::PathInExpression::create_error (); + } + segments.push_back (std::move (initial_segment)); + + // parse optional segments (as long as scope resolution operator exists) + const_TokenPtr t = lexer.peek_token (); + while (t->get_id () == SCOPE_RESOLUTION) + { + // skip scope resolution operator + lexer.skip_token (); + + // parse the actual segment - it is an error if it doesn't exist now + AST::PathExprSegment segment = parse_path_expr_segment (); + if (segment.is_error ()) + { + // skip after somewhere? + Error error (t->get_locus (), + "could not parse path expression segment"); + add_error (std::move (error)); + + return AST::PathInExpression::create_error (); + } + + segments.push_back (std::move (segment)); + + t = lexer.peek_token (); + } + + segments.shrink_to_fit (); + + return AST::PathInExpression (std::move (segments), {}, locus, + has_opening_scope_resolution); +} + +/* Parses a single path in expression path segment (including generic + * arguments). */ +template +AST::PathExprSegment +Parser::parse_path_expr_segment () +{ + Location locus = lexer.peek_token ()->get_locus (); + // parse ident segment + AST::PathIdentSegment ident = parse_path_ident_segment (); + if (ident.is_error ()) + { + // not necessarily an error? + return AST::PathExprSegment::create_error (); + } + + // parse generic args (and turbofish), if they exist + /* use lookahead to determine if they actually exist (don't want to + * accidently parse over next ident segment) */ + if (lexer.peek_token ()->get_id () == SCOPE_RESOLUTION + && lexer.peek_token (1)->get_id () == LEFT_ANGLE) + { + // skip scope resolution + lexer.skip_token (); + + AST::GenericArgs generic_args = parse_path_generic_args (); + + return AST::PathExprSegment (std::move (ident), locus, + std::move (generic_args)); + } + + // return a generic parameter-less expr segment if not found + return AST::PathExprSegment (std::move (ident), locus); +} + +/* Parses a fully qualified path in expression (i.e. a pattern). FIXME does + * not parse outer attrs. */ +template +AST::QualifiedPathInExpression +Parser::parse_qualified_path_in_expression ( + Location pratt_parsed_loc) +{ + /* Note: the Rust grammar is defined in such a way that it is impossible to + * determine whether a prospective qualified path is a + * QualifiedPathInExpression or QualifiedPathInType in all cases by the + * rules themselves (the only possible difference is a TypePathSegment with + * function, and lookahead to find this is too difficult). However, as this + * is a pattern and QualifiedPathInType is a type, I believe it that their + * construction will not be confused (due to rules regarding patterns vs + * types). + * As such, this function will not attempt to minimise errors created by + * their confusion. */ + + // parse the qualified path type (required) + AST::QualifiedPathType qual_path_type + = parse_qualified_path_type (pratt_parsed_loc); + if (qual_path_type.is_error ()) + { + // TODO: should this create a parse error? + return AST::QualifiedPathInExpression::create_error (); + } + Location locus = qual_path_type.get_locus (); + + // parse path segments + std::vector segments; + + // parse initial required segment + if (!expect_token (SCOPE_RESOLUTION)) + { + // skip after somewhere? + + return AST::QualifiedPathInExpression::create_error (); + } + AST::PathExprSegment initial_segment = parse_path_expr_segment (); + if (initial_segment.is_error ()) + { + // skip after somewhere? + Error error (lexer.peek_token ()->get_locus (), + "required initial path expression segment in " + "qualified path in expression could not be parsed"); + add_error (std::move (error)); + + return AST::QualifiedPathInExpression::create_error (); + } + segments.push_back (std::move (initial_segment)); + + // parse optional segments (as long as scope resolution operator exists) + const_TokenPtr t = lexer.peek_token (); + while (t->get_id () == SCOPE_RESOLUTION) + { + // skip scope resolution operator + lexer.skip_token (); + + // parse the actual segment - it is an error if it doesn't exist now + AST::PathExprSegment segment = parse_path_expr_segment (); + if (segment.is_error ()) + { + // skip after somewhere? + Error error (t->get_locus (), + "could not parse path expression segment in qualified " + "path in expression"); + add_error (std::move (error)); + + return AST::QualifiedPathInExpression::create_error (); + } + + segments.push_back (std::move (segment)); + + t = lexer.peek_token (); + } + + segments.shrink_to_fit (); + + // FIXME: outer attr parsing + return AST::QualifiedPathInExpression (std::move (qual_path_type), + std::move (segments), {}, locus); +} + +// Parses the type syntactical construction at the start of a qualified path. +template +AST::QualifiedPathType +Parser::parse_qualified_path_type ( + Location pratt_parsed_loc) +{ + Location locus = pratt_parsed_loc; + /* TODO: should this actually be error? is there anywhere where this could + * be valid? */ + if (locus == Linemap::unknown_location ()) + { + locus = lexer.peek_token ()->get_locus (); + if (!skip_token (LEFT_ANGLE)) + { + // skip after somewhere? + return AST::QualifiedPathType::create_error (); + } + } + + // parse type (required) + std::unique_ptr type = parse_type (); + if (type == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "could not parse type in qualified path type"); + add_error (std::move (error)); + + // skip somewhere? + return AST::QualifiedPathType::create_error (); + } + + // parse optional as clause + AST::TypePath as_type_path = AST::TypePath::create_error (); + if (lexer.peek_token ()->get_id () == AS) + { + lexer.skip_token (); + + // parse type path, which is required now + as_type_path = parse_type_path (); + if (as_type_path.is_error ()) + { + Error error ( + lexer.peek_token ()->get_locus (), + "could not parse type path in as clause in qualified path type"); + add_error (std::move (error)); + + // skip somewhere? + return AST::QualifiedPathType::create_error (); + } + } + + /* NOTE: should actually be a right-angle token, so + * skip_generics_right_angle shouldn't be required */ + if (!skip_token (RIGHT_ANGLE)) + { + // skip after somewhere? + return AST::QualifiedPathType::create_error (); + } + + return AST::QualifiedPathType (std::move (type), locus, + std::move (as_type_path)); +} + +// Parses a fully qualified path in type (i.e. a type). +template +AST::QualifiedPathInType +Parser::parse_qualified_path_in_type () +{ + Location locus = lexer.peek_token ()->get_locus (); + // parse the qualified path type (required) + AST::QualifiedPathType qual_path_type = parse_qualified_path_type (); + if (qual_path_type.is_error ()) + { + // TODO: should this create a parse error? + return AST::QualifiedPathInType::create_error (); + } + + // parse initial required segment + if (!expect_token (SCOPE_RESOLUTION)) + { + // skip after somewhere? + + return AST::QualifiedPathInType::create_error (); + } + std::unique_ptr initial_segment + = parse_type_path_segment (); + if (initial_segment == nullptr) + { + // skip after somewhere? + Error error (lexer.peek_token ()->get_locus (), + "required initial type path segment in qualified path in " + "type could not be parsed"); + add_error (std::move (error)); + + return AST::QualifiedPathInType::create_error (); + } + + // parse optional segments (as long as scope resolution operator exists) + std::vector> segments; + const_TokenPtr t = lexer.peek_token (); + while (t->get_id () == SCOPE_RESOLUTION) + { + // skip scope resolution operator + lexer.skip_token (); + + // parse the actual segment - it is an error if it doesn't exist now + std::unique_ptr segment + = parse_type_path_segment (); + if (segment == nullptr) + { + // skip after somewhere? + Error error ( + t->get_locus (), + "could not parse type path segment in qualified path in type"); + add_error (std::move (error)); + + return AST::QualifiedPathInType::create_error (); + } + + segments.push_back (std::move (segment)); + + t = lexer.peek_token (); + } + + segments.shrink_to_fit (); + + return AST::QualifiedPathInType (std::move (qual_path_type), + std::move (initial_segment), + std::move (segments), locus); +} + +// Parses a self param. Also handles self param not existing. +template +AST::SelfParam +Parser::parse_self_param () +{ + bool has_reference = false; + AST::Lifetime lifetime = AST::Lifetime::error (); + + Location locus = lexer.peek_token ()->get_locus (); + + // test if self is a reference parameter + if (lexer.peek_token ()->get_id () == AMP) + { + has_reference = true; + lexer.skip_token (); + + // now test whether it has a lifetime + if (lexer.peek_token ()->get_id () == LIFETIME) + { + lifetime = parse_lifetime (); + + // something went wrong somehow + if (lifetime.is_error ()) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse lifetime in self param"); + add_error (std::move (error)); + + // skip after somewhere? + return AST::SelfParam::create_error (); + } + } + } + + // test for mut + bool has_mut = false; + if (lexer.peek_token ()->get_id () == MUT) + { + has_mut = true; + lexer.skip_token (); + } + + // skip self token + const_TokenPtr self_tok = lexer.peek_token (); + if (self_tok->get_id () != SELF) + { + // skip after somewhere? + return AST::SelfParam::create_error (); + } + lexer.skip_token (); + + // parse optional type + std::unique_ptr type = nullptr; + if (lexer.peek_token ()->get_id () == COLON) + { + lexer.skip_token (); + + // type is now required + type = parse_type (); + if (type == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "could not parse type in self param"); + add_error (std::move (error)); + + // skip after somewhere? + return AST::SelfParam::create_error (); + } + } + + // ensure that cannot have both type and reference + if (type != nullptr && has_reference) + { + Error error ( + lexer.peek_token ()->get_locus (), + "cannot have both a reference and a type specified in a self param"); + add_error (std::move (error)); + + // skip after somewhere? + return AST::SelfParam::create_error (); + } + + if (has_reference) + { + return AST::SelfParam (std::move (lifetime), has_mut, locus); + } + else + { + // note that type may be nullptr here and that's fine + return AST::SelfParam (std::move (type), has_mut, locus); + } +} + +/* Parses a method. Note that this function is probably useless because using + * lookahead to determine whether a function is a method is a PITA (maybe not + * even doable), so most places probably parse a "function or method" and then + * resolve it into whatever it is afterward. As such, this is only here for + * algorithmically defining the grammar rule. */ +template +AST::Method +Parser::parse_method () +{ + Location locus = lexer.peek_token ()->get_locus (); + /* Note: as a result of the above, this will not attempt to disambiguate a + * function parse qualifiers */ + AST::FunctionQualifiers qualifiers = parse_function_qualifiers (); + + skip_token (FN_TOK); + + const_TokenPtr ident_tok = expect_token (IDENTIFIER); + if (ident_tok == nullptr) + { + skip_after_next_block (); + return AST::Method::create_error (); + } + Identifier method_name = ident_tok->get_str (); + + // parse generic params - if exist + std::vector> generic_params + = parse_generic_params_in_angles (); + + if (!skip_token (LEFT_PAREN)) + { + Error error (lexer.peek_token ()->get_locus (), + "method missing opening parentheses before parameter list"); + add_error (std::move (error)); + + skip_after_next_block (); + return AST::Method::create_error (); + } + + // parse self param + AST::SelfParam self_param = parse_self_param (); + if (self_param.is_error ()) + { + Error error (lexer.peek_token ()->get_locus (), + "could not parse self param in method"); + add_error (std::move (error)); + + skip_after_next_block (); + return AST::Method::create_error (); + } + + // skip comma if it exists + if (lexer.peek_token ()->get_id () == COMMA) + lexer.skip_token (); + + // parse function parameters + std::vector function_params + = parse_function_params ([] (TokenId id) { return id == RIGHT_PAREN; }); + + if (!skip_token (RIGHT_PAREN)) + { + Error error (lexer.peek_token ()->get_locus (), + "method declaration missing closing parentheses after " + "parameter list"); + add_error (std::move (error)); + + skip_after_next_block (); + return AST::Method::create_error (); + } + + // parse function return type - if exists + std::unique_ptr return_type = parse_function_return_type (); + + // parse where clause - if exists + AST::WhereClause where_clause = parse_where_clause (); + + // parse block expression + std::unique_ptr block_expr = parse_block_expr (); + if (block_expr == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "method declaration missing block expression"); + add_error (std::move (error)); + + skip_after_end_block (); + return AST::Method::create_error (); + } + + // does not parse visibility, but this method isn't used, so doesn't matter + return AST::Method (std::move (method_name), std::move (qualifiers), + std::move (generic_params), std::move (self_param), + std::move (function_params), std::move (return_type), + std::move (where_clause), std::move (block_expr), + AST::Visibility::create_error (), AST::AttrVec (), locus); +} + +/* Parses an expression statement (disambiguates to expression with or without + * block statement). */ +template +std::unique_ptr +Parser::parse_expr_stmt (AST::AttrVec outer_attrs, + ParseRestrictions restrictions) +{ + /* potential thoughts - define new virtual method "has_block()" on expr. + * parse expr and then determine whether semicolon is needed as a result of + * this method. but then this would require dynamic_cast, which is not + * allowed. */ + + /* okay new thought - big switch to disambiguate exprs with blocks - either + * block expr, async block expr, unsafe block expr, loop expr, if expr, if + * let expr, or match expr. So all others are exprs without block. */ + /* new thought: possible initial tokens: 'loop', 'while', 'for', lifetime + * (and then ':' and then loop), 'if', 'match', '{', 'async', 'unsafe' (and + * then + * '{')). This seems to have no ambiguity. */ + + const_TokenPtr t = lexer.peek_token (); + /* TODO: should the switch just directly call the individual parse methods + * rather than adding another layer of indirection with + * parse_expr_stmt_with_block()? */ + switch (t->get_id ()) + { + case LOOP: + case WHILE: + case FOR: + case IF: + case MATCH_TOK: + case LEFT_CURLY: + case ASYNC: + // expression with block + return parse_expr_stmt_with_block (std::move (outer_attrs)); + case LIFETIME: { + /* FIXME: are there any expressions without blocks that can have + * lifetime as their first token? Or is loop expr the only one? */ + // safe side for now: + if (lexer.peek_token (1)->get_id () == COLON + && lexer.peek_token (2)->get_id () == LOOP) + { + return parse_expr_stmt_with_block (std::move (outer_attrs)); + } + else + { + return parse_expr_stmt_without_block (std::move (outer_attrs), + restrictions); + } + } + case UNSAFE: { + /* FIXME: are there any expressions without blocks that can have + * unsafe as their first token? Or is unsafe the only one? */ + // safe side for now + if (lexer.peek_token (1)->get_id () == LEFT_CURLY) + { + return parse_expr_stmt_with_block (std::move (outer_attrs)); + } + else + { + return parse_expr_stmt_without_block (std::move (outer_attrs), + restrictions); + } + } + default: + // not a parse expr with block, so must be expr without block + /* TODO: if possible, be more selective about possible expr without + * block initial tokens in order to prevent more syntactical errors at + * parse time. */ + return parse_expr_stmt_without_block (std::move (outer_attrs), + restrictions); + } +} + +template +std::unique_ptr +Parser::parse_expr_with_block (AST::AttrVec outer_attrs) +{ + std::unique_ptr expr_parsed = nullptr; + + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case IF: + // if or if let, so more lookahead to find out + if (lexer.peek_token (1)->get_id () == LET) + { + // if let expr + expr_parsed = parse_if_let_expr (std::move (outer_attrs)); + break; + } + else + { + // if expr + expr_parsed = parse_if_expr (std::move (outer_attrs)); + break; + } + case LOOP: + // infinite loop + expr_parsed = parse_loop_expr (std::move (outer_attrs)); + break; + case FOR: + // "for" iterator loop + expr_parsed = parse_for_loop_expr (std::move (outer_attrs)); + break; + case WHILE: { + // while or while let, so more lookahead to find out + if (lexer.peek_token (1)->get_id () == LET) + { + // while let loop expr + expr_parsed = parse_while_let_loop_expr (std::move (outer_attrs)); + break; + } + else + { + // while loop expr + expr_parsed = parse_while_loop_expr (std::move (outer_attrs)); + break; + } + } + case MATCH_TOK: + // match expression + expr_parsed = parse_match_expr (std::move (outer_attrs)); + break; + case LEFT_CURLY: + // block expression + expr_parsed = parse_block_expr (std::move (outer_attrs)); + break; + case ASYNC: + // async block expression + expr_parsed = parse_async_block_expr (std::move (outer_attrs)); + break; + case UNSAFE: + // unsafe block expression + expr_parsed = parse_unsafe_block_expr (std::move (outer_attrs)); + break; + case LIFETIME: + // some kind of loop expr (with loop label) + expr_parsed = parse_labelled_loop_expr (std::move (outer_attrs)); + break; + default: + add_error (Error ( + t->get_locus (), + "could not recognise expr beginning with %qs as an expr with block in" + " parsing expr statement", + t->get_token_description ())); + + skip_after_next_block (); + return nullptr; + } + + // ensure expr parsed exists + if (expr_parsed == nullptr) + { + Error error (t->get_locus (), + "failed to parse expr with block in parsing expr statement"); + add_error (std::move (error)); + + skip_after_end_block (); + return nullptr; + } + + return expr_parsed; +} + +/* Parses a expression statement containing an expression with block. + * Disambiguates internally. */ +template +std::unique_ptr +Parser::parse_expr_stmt_with_block ( + AST::AttrVec outer_attrs) +{ + auto expr_parsed = parse_expr_with_block (std::move (outer_attrs)); + auto locus = expr_parsed->get_locus (); + + // return expr stmt created from expr + return std::unique_ptr ( + new AST::ExprStmtWithBlock (std::move (expr_parsed), locus, + lexer.peek_token ()->get_id () == SEMICOLON)); +} + +/* Parses an expression statement containing an expression without block. + * Disambiguates further. */ +template +std::unique_ptr +Parser::parse_expr_stmt_without_block ( + AST::AttrVec outer_attrs, ParseRestrictions restrictions) +{ + /* TODO: maybe move more logic for expr without block in here for better + * error handling */ + + // attempt to parse via parse_expr_without_block - seems to work + std::unique_ptr expr = nullptr; + Location locus = lexer.peek_token ()->get_locus (); + + restrictions.expr_can_be_stmt = true; + + expr = parse_expr_without_block (std::move (outer_attrs), restrictions); + if (expr == nullptr) + { + // expr is required, error + Error error (lexer.peek_token ()->get_locus (), + "failed to parse expr without block in expr statement"); + add_error (std::move (error)); + + skip_after_semicolon (); + return nullptr; + } + + if (restrictions.consume_semi) + if (!skip_token (SEMICOLON)) + return nullptr; + + return std::unique_ptr ( + new AST::ExprStmtWithoutBlock (std::move (expr), locus)); +} + +/* Parses an expression without a block associated with it (further + * disambiguates). */ +template +std::unique_ptr +Parser::parse_expr_without_block ( + AST::AttrVec outer_attrs, ParseRestrictions restrictions) +{ + /* Notes on types of expr without block: + * - literal expr tokens that are literals + * - path expr path_in_expr or qual_path_in_expr + * - operator expr many different types + * unary: + * borrow expr ( '&' | '&&' ) 'mut'? expr + * dereference expr '*' expr + * error propagation expr '?' + * negation '-' expr + * not '!' expr + * binary: all start with expr + * - grouped/paren expr '(' inner_attributes expr ')' + * - array expr '[' inner_attributes array_elems? ']' + * - await expr expr '.' 'await' + * - (array/slice) index expr expr '[' expr ']' + * - tuple expr '(' inner_attributes tuple_elems? ')' + * note that a single elem tuple is distinguished from a grouped expr + * by a trailing comma, i.e. a grouped expr is preferred over a tuple expr + * - tuple index expr expr '.' tuple_index + * - struct expr path_in_expr (and optional other stuff) + * - enum variant expr path_in_expr (and optional other stuff) + * this means that there is no syntactic difference between an enum + * variant and a struct + * - only name resolution can tell the difference. Thus, maybe rework + * AST to take this into account ("struct or enum" nodes?) + * - (function) call expr expr '(' call_params? ')' + * - method call expr expr '.' path_expr_segment '(' call_params? ')' + * - field expr expr '.' identifier + * note that method call expr is preferred, i.e. field expr must not be + * followed by parenthesised expression sequence. + * - closure expr 'move'? ( '||' | '|' closure_params? '|' ) ( + * expr | '->' type_no_bounds block_expr ) + * - continue expr 'continue' labelled_lifetime? + * - break expr 'break' labelled_lifetime? expr? + * - range expr many different types but all involve '..' or + * '..=' + * - return expr 'return' as 1st tok + * - macro invocation identifier then :: or identifier then ! + * (simple_path '!') + * + * any that have rules beginning with 'expr' should probably be + * pratt-parsed, + * with parsing type to use determined by token AND lookahead. */ + + // ok well at least can do easy ones + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case RETURN_TOK: + // return expr + return parse_return_expr (std::move (outer_attrs)); + case BREAK: + // break expr + return parse_break_expr (std::move (outer_attrs)); + case CONTINUE: + // continue expr + return parse_continue_expr (std::move (outer_attrs)); + case MOVE: + // closure expr (though not all closure exprs require this) + return parse_closure_expr (std::move (outer_attrs)); + case LEFT_SQUARE: + // array expr (creation, not index) + return parse_array_expr (std::move (outer_attrs)); + default: { + /* HACK: piggyback on pratt parsed expr and abuse polymorphism to + * essentially downcast */ + + std::unique_ptr expr + = parse_expr (std::move (outer_attrs), restrictions); + + if (expr == nullptr) + { + Error error (t->get_locus (), + "failed to parse expression for expression without " + "block (pratt-parsed expression is null)"); + add_error (std::move (error)); + + return nullptr; + } + + std::unique_ptr expr_without_block ( + expr->as_expr_without_block ()); + + if (expr_without_block != nullptr) + { + return expr_without_block; + } + else + { + Error error (t->get_locus (), + "converted expr without block is null"); + add_error (std::move (error)); + + return nullptr; + } + } + } +} + +// Parses a block expression, including the curly braces at start and end. +template +std::unique_ptr +Parser::parse_block_expr (AST::AttrVec outer_attrs, + Location pratt_parsed_loc) +{ + Location locus = pratt_parsed_loc; + if (locus == Linemap::unknown_location ()) + { + locus = lexer.peek_token ()->get_locus (); + if (!skip_token (LEFT_CURLY)) + { + skip_after_end_block (); + return nullptr; + } + } + + AST::AttrVec inner_attrs = parse_inner_attributes (); + + // parse statements and expression + std::vector> stmts; + std::unique_ptr expr = nullptr; + + const_TokenPtr t = lexer.peek_token (); + while (t->get_id () != RIGHT_CURLY) + { + ExprOrStmt expr_or_stmt = parse_stmt_or_expr_without_block (); + if (expr_or_stmt.is_error ()) + { + Error error (t->get_locus (), + "failed to parse statement or expression without " + "block in block expression"); + add_error (std::move (error)); + + return nullptr; + } + + t = lexer.peek_token (); + + if (expr_or_stmt.stmt != nullptr) + { + stmts.push_back (std::move (expr_or_stmt.stmt)); + } + else + { + // assign to expression and end parsing inside + expr = std::move (expr_or_stmt.expr); + break; + } + } + + Location end_locus = t->get_locus (); + + if (!skip_token (RIGHT_CURLY)) + { + Error error (t->get_locus (), + "error may be from having an expression (as opposed to " + "statement) in the body of the function but not last"); + add_error (std::move (error)); + + skip_after_end_block (); + return nullptr; + } + + // grammar allows for empty block expressions + + stmts.shrink_to_fit (); + + return std::unique_ptr ( + new AST::BlockExpr (std::move (stmts), std::move (expr), + std::move (inner_attrs), std::move (outer_attrs), locus, + end_locus)); +} + +/* Parses a "grouped" expression (expression in parentheses), used to control + * precedence. */ +template +std::unique_ptr +Parser::parse_grouped_expr (AST::AttrVec outer_attrs) +{ + Location locus = lexer.peek_token ()->get_locus (); + skip_token (LEFT_PAREN); + + AST::AttrVec inner_attrs = parse_inner_attributes (); + + // parse required expr inside parentheses + std::unique_ptr expr_in_parens = parse_expr (); + if (expr_in_parens == nullptr) + { + // skip after somewhere? + // error? + return nullptr; + } + + if (!skip_token (RIGHT_PAREN)) + { + // skip after somewhere? + return nullptr; + } + + return std::unique_ptr ( + new AST::GroupedExpr (std::move (expr_in_parens), std::move (inner_attrs), + std::move (outer_attrs), locus)); +} + +// Parses a closure expression (closure definition). +template +std::unique_ptr +Parser::parse_closure_expr (AST::AttrVec outer_attrs) +{ + Location locus = lexer.peek_token ()->get_locus (); + // detect optional "move" + bool has_move = false; + if (lexer.peek_token ()->get_id () == MOVE) + { + lexer.skip_token (); + has_move = true; + } + + // handle parameter list + std::vector params; + + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case OR: + // skip token, no parameters + lexer.skip_token (); + break; + case PIPE: + // actually may have parameters + lexer.skip_token (); + + while (t->get_id () != PIPE) + { + AST::ClosureParam param = parse_closure_param (); + if (param.is_error ()) + { + // TODO is this really an error? + Error error (t->get_locus (), "could not parse closure param"); + add_error (std::move (error)); + + break; + } + params.push_back (std::move (param)); + + if (lexer.peek_token ()->get_id () != COMMA) + { + // not an error but means param list is done + break; + } + // skip comma + lexer.skip_token (); + + t = lexer.peek_token (); + } + params.shrink_to_fit (); + break; + default: + add_error (Error (t->get_locus (), + "unexpected token %qs in closure expression - expected " + "%<|%> or %<||%>", + t->get_token_description ())); + + // skip somewhere? + return nullptr; + } + + // again branch based on next token + t = lexer.peek_token (); + if (t->get_id () == RETURN_TYPE) + { + // must be return type closure with block expr + + // skip "return type" token + lexer.skip_token (); + + // parse actual type, which is required + std::unique_ptr type = parse_type_no_bounds (); + if (type == nullptr) + { + // error + Error error (t->get_locus (), "failed to parse type for closure"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + // parse block expr, which is required + std::unique_ptr block = parse_block_expr (); + if (block == nullptr) + { + // error + Error error (lexer.peek_token ()->get_locus (), + "failed to parse block expr in closure"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + return std::unique_ptr ( + new AST::ClosureExprInnerTyped (std::move (type), std::move (block), + std::move (params), locus, has_move, + std::move (outer_attrs))); + } + else + { + // must be expr-only closure + + // parse expr, which is required + std::unique_ptr expr = parse_expr (); + if (expr == nullptr) + { + Error error (t->get_locus (), + "failed to parse expression in closure"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + return std::unique_ptr ( + new AST::ClosureExprInner (std::move (expr), std::move (params), locus, + has_move, std::move (outer_attrs))); + } +} + +// Parses a literal token (to literal expression). +template +std::unique_ptr +Parser::parse_literal_expr (AST::AttrVec outer_attrs) +{ + // TODO: change if literal representation in lexer changes + + std::string literal_value; + AST::Literal::LitType type = AST::Literal::STRING; + + // branch based on token + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case CHAR_LITERAL: + type = AST::Literal::CHAR; + literal_value = t->get_str (); + lexer.skip_token (); + break; + case STRING_LITERAL: + type = AST::Literal::STRING; + literal_value = t->get_str (); + lexer.skip_token (); + break; + case BYTE_CHAR_LITERAL: + type = AST::Literal::BYTE; + literal_value = t->get_str (); + lexer.skip_token (); + break; + case BYTE_STRING_LITERAL: + type = AST::Literal::BYTE_STRING; + literal_value = t->get_str (); + lexer.skip_token (); + break; + case INT_LITERAL: + type = AST::Literal::INT; + literal_value = t->get_str (); + lexer.skip_token (); + break; + case FLOAT_LITERAL: + type = AST::Literal::FLOAT; + literal_value = t->get_str (); + lexer.skip_token (); + break; + // case BOOL_LITERAL + // use true and false keywords rather than "bool literal" Rust terminology + case TRUE_LITERAL: + type = AST::Literal::BOOL; + literal_value = "true"; + lexer.skip_token (); + break; + case FALSE_LITERAL: + type = AST::Literal::BOOL; + literal_value = "false"; + lexer.skip_token (); + break; + default: + // error - cannot be a literal expr + add_error (Error (t->get_locus (), + "unexpected token %qs when parsing literal expression", + t->get_token_description ())); + + // skip? + return nullptr; + } + + // create literal based on stuff in switch + return std::unique_ptr ( + new AST::LiteralExpr (std::move (literal_value), std::move (type), + t->get_type_hint (), std::move (outer_attrs), + t->get_locus ())); +} + +// Parses a return expression (including any expression to return). +template +std::unique_ptr +Parser::parse_return_expr (AST::AttrVec outer_attrs, + Location pratt_parsed_loc) +{ + Location locus = pratt_parsed_loc; + if (locus == Linemap::unknown_location ()) + { + locus = lexer.peek_token ()->get_locus (); + skip_token (RETURN_TOK); + } + + // parse expression to return, if it exists + ParseRestrictions restrictions; + restrictions.expr_can_be_null = true; + std::unique_ptr returned_expr + = parse_expr (AST::AttrVec (), restrictions); + + return std::unique_ptr ( + new AST::ReturnExpr (std::move (returned_expr), std::move (outer_attrs), + locus)); +} + +/* Parses a break expression (including any label to break to AND any return + * expression). */ +template +std::unique_ptr +Parser::parse_break_expr (AST::AttrVec outer_attrs, + Location pratt_parsed_loc) +{ + Location locus = pratt_parsed_loc; + if (locus == Linemap::unknown_location ()) + { + locus = lexer.peek_token ()->get_locus (); + skip_token (BREAK); + } + + // parse label (lifetime) if it exists - create dummy first + AST::Lifetime label = AST::Lifetime::error (); + if (lexer.peek_token ()->get_id () == LIFETIME) + { + label = parse_lifetime (); + } + + // parse break return expression if it exists + ParseRestrictions restrictions; + restrictions.expr_can_be_null = true; + std::unique_ptr return_expr + = parse_expr (AST::AttrVec (), restrictions); + + return std::unique_ptr ( + new AST::BreakExpr (std::move (label), std::move (return_expr), + std::move (outer_attrs), locus)); +} + +// Parses a continue expression (including any label to continue from). +template +std::unique_ptr +Parser::parse_continue_expr (AST::AttrVec outer_attrs, + Location pratt_parsed_loc) +{ + Location locus = pratt_parsed_loc; + if (locus == Linemap::unknown_location ()) + { + locus = lexer.peek_token ()->get_locus (); + skip_token (CONTINUE); + } + + // parse label (lifetime) if it exists - create dummy first + AST::Lifetime label = AST::Lifetime::error (); + if (lexer.peek_token ()->get_id () == LIFETIME) + { + label = parse_lifetime (); + } + + return std::unique_ptr ( + new AST::ContinueExpr (std::move (label), std::move (outer_attrs), locus)); +} + +// Parses a loop label used in loop expressions. +template +AST::LoopLabel +Parser::parse_loop_label () +{ + // parse lifetime - if doesn't exist, assume no label + const_TokenPtr t = lexer.peek_token (); + if (t->get_id () != LIFETIME) + { + // not necessarily an error + return AST::LoopLabel::error (); + } + /* FIXME: check for named lifetime requirement here? or check in semantic + * analysis phase? */ + AST::Lifetime label = parse_lifetime (); + + if (!skip_token (COLON)) + { + // skip somewhere? + return AST::LoopLabel::error (); + } + + return AST::LoopLabel (std::move (label), t->get_locus ()); +} + +/* Parses an if expression of any kind, including with else, else if, else if + * let, and neither. Note that any outer attributes will be ignored because if + * expressions don't support them. */ +template +std::unique_ptr +Parser::parse_if_expr (AST::AttrVec outer_attrs, + Location pratt_parsed_loc) +{ + // TODO: make having outer attributes an error? + Location locus = pratt_parsed_loc; + if (locus == Linemap::unknown_location ()) + { + locus = lexer.peek_token ()->get_locus (); + if (!skip_token (IF)) + { + skip_after_end_block (); + return nullptr; + } + } + + // detect accidental if let + if (lexer.peek_token ()->get_id () == LET) + { + Error error (lexer.peek_token ()->get_locus (), + "if let expression probably exists, but is being parsed " + "as an if expression. This may be a parser error"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + /* parse required condition expr - HACK to prevent struct expr from being + * parsed */ + ParseRestrictions no_struct_expr; + no_struct_expr.can_be_struct_expr = false; + std::unique_ptr condition = parse_expr ({}, no_struct_expr); + if (condition == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse condition expression in if expression"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + // parse required block expr + std::unique_ptr if_body = parse_block_expr (); + if (if_body == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse if body block expression in if expression"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + // branch to parse end or else (and then else, else if, or else if let) + if (lexer.peek_token ()->get_id () != ELSE) + { + // single selection - end of if expression + return std::unique_ptr ( + new AST::IfExpr (std::move (condition), std::move (if_body), + std::move (outer_attrs), locus)); + } + else + { + // double or multiple selection - branch on end, else if, or else if let + + // skip "else" + lexer.skip_token (); + + // branch on whether next token is '{' or 'if' + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case LEFT_CURLY: { + // double selection - else + // parse else block expr (required) + std::unique_ptr else_body = parse_block_expr (); + if (else_body == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse else body block expression in " + "if expression"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + return std::unique_ptr ( + new AST::IfExprConseqElse (std::move (condition), + std::move (if_body), + std::move (else_body), + std::move (outer_attrs), locus)); + } + case IF: { + // multiple selection - else if or else if let + // branch on whether next token is 'let' or not + if (lexer.peek_token (1)->get_id () == LET) + { + // parse if let expr (required) + std::unique_ptr if_let_expr + = parse_if_let_expr (); + if (if_let_expr == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse (else) if let expression " + "after if expression"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + return std::unique_ptr ( + new AST::IfExprConseqIfLet (std::move (condition), + std::move (if_body), + std::move (if_let_expr), + std::move (outer_attrs), locus)); + } + else + { + // parse if expr (required) + std::unique_ptr if_expr = parse_if_expr (); + if (if_expr == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse (else) if expression after " + "if expression"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + return std::unique_ptr ( + new AST::IfExprConseqIf (std::move (condition), + std::move (if_body), + std::move (if_expr), + std::move (outer_attrs), locus)); + } + } + default: + // error - invalid token + add_error (Error (t->get_locus (), + "unexpected token %qs after else in if expression", + t->get_token_description ())); + + // skip somewhere? + return nullptr; + } + } +} + +/* Parses an if let expression of any kind, including with else, else if, else + * if let, and none. Note that any outer attributes will be ignored as if let + * expressions don't support them. */ +template +std::unique_ptr +Parser::parse_if_let_expr (AST::AttrVec outer_attrs, + Location pratt_parsed_loc) +{ + // TODO: make having outer attributes an error? + Location locus = pratt_parsed_loc; + if (locus == Linemap::unknown_location ()) + { + locus = lexer.peek_token ()->get_locus (); + if (!skip_token (IF)) + { + skip_after_end_block (); + return nullptr; + } + } + + // detect accidental if expr parsed as if let expr + if (lexer.peek_token ()->get_id () != LET) + { + Error error (lexer.peek_token ()->get_locus (), + "if expression probably exists, but is being parsed as an " + "if let expression. This may be a parser error"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + lexer.skip_token (); + + // parse match arm patterns (which are required) + std::vector> match_arm_patterns + = parse_match_arm_patterns (EQUAL); + if (match_arm_patterns.empty ()) + { + Error error ( + lexer.peek_token ()->get_locus (), + "failed to parse any match arm patterns in if let expression"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + if (!skip_token (EQUAL)) + { + // skip somewhere? + return nullptr; + } + + // parse expression (required) - HACK to prevent struct expr being parsed + ParseRestrictions no_struct_expr; + no_struct_expr.can_be_struct_expr = false; + std::unique_ptr scrutinee_expr = parse_expr ({}, no_struct_expr); + if (scrutinee_expr == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse scrutinee expression in if let expression"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + /* TODO: check for expression not being a struct expression or lazy boolean + * expression here? or actually probably in semantic analysis. */ + + // parse block expression (required) + std::unique_ptr if_let_body = parse_block_expr (); + if (if_let_body == nullptr) + { + Error error ( + lexer.peek_token ()->get_locus (), + "failed to parse if let body block expression in if let expression"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + // branch to parse end or else (and then else, else if, or else if let) + if (lexer.peek_token ()->get_id () != ELSE) + { + // single selection - end of if let expression + return std::unique_ptr ( + new AST::IfLetExpr (std::move (match_arm_patterns), + std::move (scrutinee_expr), std::move (if_let_body), + std::move (outer_attrs), locus)); + } + else + { + // double or multiple selection - branch on end, else if, or else if let + + // skip "else" + lexer.skip_token (); + + // branch on whether next token is '{' or 'if' + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case LEFT_CURLY: { + // double selection - else + // parse else block expr (required) + std::unique_ptr else_body = parse_block_expr (); + if (else_body == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse else body block expression in " + "if let expression"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + return std::unique_ptr ( + new AST::IfLetExprConseqElse (std::move (match_arm_patterns), + std::move (scrutinee_expr), + std::move (if_let_body), + std::move (else_body), + std::move (outer_attrs), locus)); + } + case IF: { + // multiple selection - else if or else if let + // branch on whether next token is 'let' or not + if (lexer.peek_token (1)->get_id () == LET) + { + // parse if let expr (required) + std::unique_ptr if_let_expr + = parse_if_let_expr (); + if (if_let_expr == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse (else) if let expression " + "after if let expression"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + return std::unique_ptr ( + new AST::IfLetExprConseqIfLet ( + std::move (match_arm_patterns), std::move (scrutinee_expr), + std::move (if_let_body), std::move (if_let_expr), + std::move (outer_attrs), locus)); + } + else + { + // parse if expr (required) + std::unique_ptr if_expr = parse_if_expr (); + if (if_expr == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse (else) if expression after " + "if let expression"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + return std::unique_ptr ( + new AST::IfLetExprConseqIf (std::move (match_arm_patterns), + std::move (scrutinee_expr), + std::move (if_let_body), + std::move (if_expr), + std::move (outer_attrs), locus)); + } + } + default: + // error - invalid token + add_error ( + Error (t->get_locus (), + "unexpected token %qs after else in if let expression", + t->get_token_description ())); + + // skip somewhere? + return nullptr; + } + } +} + +/* TODO: possibly decide on different method of handling label (i.e. not + * parameter) */ + +/* Parses a "loop" infinite loop expression. Label is not parsed and should be + * parsed via parse_labelled_loop_expr, which would call this. */ +template +std::unique_ptr +Parser::parse_loop_expr (AST::AttrVec outer_attrs, + AST::LoopLabel label, + Location pratt_parsed_loc) +{ + Location locus = pratt_parsed_loc; + if (locus == Linemap::unknown_location ()) + { + if (label.is_error ()) + locus = lexer.peek_token ()->get_locus (); + else + locus = label.get_locus (); + + if (!skip_token (LOOP)) + { + skip_after_end_block (); + return nullptr; + } + } + else + { + if (!label.is_error ()) + locus = label.get_locus (); + } + + // parse loop body, which is required + std::unique_ptr loop_body = parse_block_expr (); + if (loop_body == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "could not parse loop body in (infinite) loop expression"); + add_error (std::move (error)); + + return nullptr; + } + + return std::unique_ptr ( + new AST::LoopExpr (std::move (loop_body), locus, std::move (label), + std::move (outer_attrs))); +} + +/* Parses a "while" loop expression. Label is not parsed and should be parsed + * via parse_labelled_loop_expr, which would call this. */ +template +std::unique_ptr +Parser::parse_while_loop_expr (AST::AttrVec outer_attrs, + AST::LoopLabel label, + Location pratt_parsed_loc) +{ + Location locus = pratt_parsed_loc; + if (locus == Linemap::unknown_location ()) + { + if (label.is_error ()) + locus = lexer.peek_token ()->get_locus (); + else + locus = label.get_locus (); + + if (!skip_token (WHILE)) + { + skip_after_end_block (); + return nullptr; + } + } + else + { + if (!label.is_error ()) + locus = label.get_locus (); + } + + // ensure it isn't a while let loop + if (lexer.peek_token ()->get_id () == LET) + { + Error error (lexer.peek_token ()->get_locus (), + "appears to be while let loop but is being parsed by " + "while loop - this may be a compiler issue"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + // parse loop predicate (required) with HACK to prevent struct expr parsing + ParseRestrictions no_struct_expr; + no_struct_expr.can_be_struct_expr = false; + std::unique_ptr predicate = parse_expr ({}, no_struct_expr); + if (predicate == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse predicate expression in while loop"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + /* TODO: check that it isn't struct expression here? actually, probably in + * semantic analysis */ + + // parse loop body (required) + std::unique_ptr body = parse_block_expr (); + if (body == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse loop body block expression in while loop"); + add_error (std::move (error)); + + // skip somewhere + return nullptr; + } + + return std::unique_ptr ( + new AST::WhileLoopExpr (std::move (predicate), std::move (body), locus, + std::move (label), std::move (outer_attrs))); +} + +/* Parses a "while let" loop expression. Label is not parsed and should be + * parsed via parse_labelled_loop_expr, which would call this. */ +template +std::unique_ptr +Parser::parse_while_let_loop_expr (AST::AttrVec outer_attrs, + AST::LoopLabel label) +{ + Location locus = Linemap::unknown_location (); + if (label.is_error ()) + locus = lexer.peek_token ()->get_locus (); + else + locus = label.get_locus (); + skip_token (WHILE); + + /* check for possible accidental recognition of a while loop as a while let + * loop */ + if (lexer.peek_token ()->get_id () != LET) + { + Error error (lexer.peek_token ()->get_locus (), + "appears to be a while loop but is being parsed by " + "while let loop - this may be a compiler issue"); + add_error (std::move (error)); + + // skip somewhere + return nullptr; + } + // as this token is definitely let now, save the computation of comparison + lexer.skip_token (); + + // parse predicate patterns + std::vector> predicate_patterns + = parse_match_arm_patterns (EQUAL); + // TODO: have to ensure that there is at least 1 pattern? + + if (!skip_token (EQUAL)) + { + // skip somewhere? + return nullptr; + } + + /* parse predicate expression, which is required (and HACK to prevent struct + * expr) */ + ParseRestrictions no_struct_expr; + no_struct_expr.can_be_struct_expr = false; + std::unique_ptr predicate_expr = parse_expr ({}, no_struct_expr); + if (predicate_expr == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse predicate expression in while let loop"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + /* TODO: ensure that struct expression is not parsed? Actually, probably in + * semantic analysis. */ + + // parse loop body, which is required + std::unique_ptr body = parse_block_expr (); + if (body == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse block expr (loop body) of while let loop"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + return std::unique_ptr (new AST::WhileLetLoopExpr ( + std::move (predicate_patterns), std::move (predicate_expr), + std::move (body), locus, std::move (label), std::move (outer_attrs))); +} + +/* Parses a "for" iterative loop. Label is not parsed and should be parsed via + * parse_labelled_loop_expr, which would call this. */ +template +std::unique_ptr +Parser::parse_for_loop_expr (AST::AttrVec outer_attrs, + AST::LoopLabel label) +{ + Location locus = Linemap::unknown_location (); + if (label.is_error ()) + locus = lexer.peek_token ()->get_locus (); + else + locus = label.get_locus (); + skip_token (FOR); + + // parse pattern, which is required + std::unique_ptr pattern = parse_pattern (); + if (pattern == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse iterator pattern in for loop"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + if (!skip_token (IN)) + { + // skip somewhere? + return nullptr; + } + + /* parse iterator expression, which is required - also HACK to prevent + * struct expr */ + ParseRestrictions no_struct_expr; + no_struct_expr.can_be_struct_expr = false; + std::unique_ptr expr = parse_expr ({}, no_struct_expr); + if (expr == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse iterator expression in for loop"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + // TODO: check to ensure this isn't struct expr? Or in semantic analysis. + + // parse loop body, which is required + std::unique_ptr body = parse_block_expr (); + if (body == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse loop body block expression in for loop"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + return std::unique_ptr ( + new AST::ForLoopExpr (std::move (pattern), std::move (expr), + std::move (body), locus, std::move (label), + std::move (outer_attrs))); +} + +// Parses a loop expression with label (any kind of loop - disambiguates). +template +std::unique_ptr +Parser::parse_labelled_loop_expr (AST::AttrVec outer_attrs) +{ + /* TODO: decide whether it should not work if there is no label, or parse it + * with no label at the moment, I will make it not work with no label + * because that's the implication. */ + + if (lexer.peek_token ()->get_id () != LIFETIME) + { + Error error (lexer.peek_token ()->get_locus (), + "expected lifetime in labelled loop expr (to parse loop " + "label) - found %qs", + lexer.peek_token ()->get_token_description ()); + add_error (std::move (error)); + + // skip? + return nullptr; + } + + // parse loop label (required) + AST::LoopLabel label = parse_loop_label (); + if (label.is_error ()) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse loop label in labelled loop expr"); + add_error (std::move (error)); + + // skip? + return nullptr; + } + + // branch on next token + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case LOOP: + return parse_loop_expr (std::move (outer_attrs), std::move (label)); + case FOR: + return parse_for_loop_expr (std::move (outer_attrs), std::move (label)); + case WHILE: + // further disambiguate into while vs while let + if (lexer.peek_token (1)->get_id () == LET) + { + return parse_while_let_loop_expr (std::move (outer_attrs), + std::move (label)); + } + else + { + return parse_while_loop_expr (std::move (outer_attrs), + std::move (label)); + } + default: + // error + add_error (Error (t->get_locus (), + "unexpected token %qs when parsing labelled loop", + t->get_token_description ())); + + // skip? + return nullptr; + } +} + +// Parses a match expression. +template +std::unique_ptr +Parser::parse_match_expr (AST::AttrVec outer_attrs, + Location pratt_parsed_loc) +{ + Location locus = pratt_parsed_loc; + if (locus == Linemap::unknown_location ()) + { + locus = lexer.peek_token ()->get_locus (); + skip_token (MATCH_TOK); + } + + /* parse scrutinee expression, which is required (and HACK to prevent struct + * expr) */ + ParseRestrictions no_struct_expr; + no_struct_expr.can_be_struct_expr = false; + std::unique_ptr scrutinee = parse_expr ({}, no_struct_expr); + if (scrutinee == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse scrutinee expression in match expression"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + /* TODO: check for scrutinee expr not being struct expr? or do so in + * semantic analysis */ + + if (!skip_token (LEFT_CURLY)) + { + // skip somewhere? + return nullptr; + } + + // parse inner attributes (if they exist) + AST::AttrVec inner_attrs = parse_inner_attributes (); + + // parse match arms (if they exist) + // std::vector > match_arms; + std::vector match_arms; + + // parse match cases + while (lexer.peek_token ()->get_id () != RIGHT_CURLY) + { + // parse match arm itself, which is required + AST::MatchArm arm = parse_match_arm (); + if (arm.is_error ()) + { + // TODO is this worth throwing everything away? + Error error (lexer.peek_token ()->get_locus (), + "failed to parse match arm in match arms"); + add_error (std::move (error)); + + return nullptr; + } + + if (!skip_token (MATCH_ARROW)) + { + // skip after somewhere? + // TODO is returning here a good idea? or is break better? + return nullptr; + } + + ParseRestrictions restrictions; + restrictions.expr_can_be_stmt = true; + restrictions.consume_semi = false; + + std::unique_ptr expr = parse_expr_stmt ({}, restrictions); + if (expr == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse expr in match arm in match expr"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + bool is_expr_without_block + = expr->get_type () == AST::ExprStmt::ExprStmtType::WITHOUT_BLOCK; + + // construct match case expr and add to cases + switch (expr->get_type ()) + { + case AST::ExprStmt::ExprStmtType::WITH_BLOCK: { + AST::ExprStmtWithBlock *cast + = static_cast (expr.get ()); + std::unique_ptr e = cast->get_expr ()->clone_expr (); + match_arms.push_back ( + AST::MatchCase (std::move (arm), std::move (e))); + } + break; + + case AST::ExprStmt::ExprStmtType::WITHOUT_BLOCK: { + AST::ExprStmtWithoutBlock *cast + = static_cast (expr.get ()); + std::unique_ptr e = cast->get_expr ()->clone_expr (); + match_arms.push_back ( + AST::MatchCase (std::move (arm), std::move (e))); + } + break; + } + + // handle comma presence + if (lexer.peek_token ()->get_id () != COMMA) + { + if (!is_expr_without_block) + { + // allowed even if not final case + continue; + } + else if (is_expr_without_block + && lexer.peek_token ()->get_id () != RIGHT_CURLY) + { + // not allowed if not final case + Error error (lexer.peek_token ()->get_locus (), + "exprwithoutblock requires comma after match case " + "expression in match arm (if not final case)"); + add_error (std::move (error)); + + return nullptr; + } + else + { + // otherwise, must be final case, so fine + break; + } + } + lexer.skip_token (); + } + + if (!skip_token (RIGHT_CURLY)) + { + // skip somewhere? + return nullptr; + } + + match_arms.shrink_to_fit (); + + return std::unique_ptr ( + new AST::MatchExpr (std::move (scrutinee), std::move (match_arms), + std::move (inner_attrs), std::move (outer_attrs), + locus)); +} + +// Parses the "pattern" part of the match arm (the 'case x:' equivalent). +template +AST::MatchArm +Parser::parse_match_arm () +{ + // parse optional outer attributes + AST::AttrVec outer_attrs = parse_outer_attributes (); + + // DEBUG + rust_debug ("about to start parsing match arm patterns"); + + // break early if find right curly + if (lexer.peek_token ()->get_id () == RIGHT_CURLY) + { + // not an error + return AST::MatchArm::create_error (); + } + + // parse match arm patterns - at least 1 is required + std::vector> match_arm_patterns + = parse_match_arm_patterns (RIGHT_CURLY); + if (match_arm_patterns.empty ()) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse any patterns in match arm"); + add_error (std::move (error)); + + // skip somewhere? + return AST::MatchArm::create_error (); + } + + // DEBUG + rust_debug ("successfully parsed match arm patterns"); + + // parse match arm guard expr if it exists + std::unique_ptr guard_expr = nullptr; + if (lexer.peek_token ()->get_id () == IF) + { + lexer.skip_token (); + + guard_expr = parse_expr (); + if (guard_expr == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse guard expression in match arm"); + add_error (std::move (error)); + + // skip somewhere? + return AST::MatchArm::create_error (); + } + } + + // DEBUG + rust_debug ("successfully parsed match arm"); + + return AST::MatchArm (std::move (match_arm_patterns), + lexer.peek_token ()->get_locus (), + std::move (guard_expr), std::move (outer_attrs)); +} + +/* Parses the patterns used in a match arm. End token id is the id of the + * token that would exist after the patterns are done (e.g. '}' for match + * expr, '=' for if let and while let). */ +template +std::vector> +Parser::parse_match_arm_patterns (TokenId end_token_id) +{ + // skip optional leading '|' + if (lexer.peek_token ()->get_id () == PIPE) + lexer.skip_token (); + /* TODO: do I even need to store the result of this? can't be used. + * If semantically different, I need a wrapped "match arm patterns" object + * for this. */ + + std::vector> patterns; + + // quick break out if end_token_id + if (lexer.peek_token ()->get_id () == end_token_id) + return patterns; + + // parse required pattern - if doesn't exist, return empty + std::unique_ptr initial_pattern = parse_pattern (); + if (initial_pattern == nullptr) + { + // FIXME: should this be an error? + return patterns; + } + patterns.push_back (std::move (initial_pattern)); + + // DEBUG + rust_debug ("successfully parsed initial match arm pattern"); + + // parse new patterns as long as next char is '|' + const_TokenPtr t = lexer.peek_token (); + while (t->get_id () == PIPE) + { + // skip pipe token + lexer.skip_token (); + + // break if hit end token id + if (lexer.peek_token ()->get_id () == end_token_id) + break; + + // parse pattern + std::unique_ptr pattern = parse_pattern (); + if (pattern == nullptr) + { + // this is an error + Error error (lexer.peek_token ()->get_locus (), + "failed to parse pattern in match arm patterns"); + add_error (std::move (error)); + + // skip somewhere? + return {}; + } + + patterns.push_back (std::move (pattern)); + + t = lexer.peek_token (); + } + + patterns.shrink_to_fit (); + + return patterns; +} + +// Parses an async block expression. +template +std::unique_ptr +Parser::parse_async_block_expr (AST::AttrVec outer_attrs) +{ + Location locus = lexer.peek_token ()->get_locus (); + skip_token (ASYNC); + + // detect optional move token + bool has_move = false; + if (lexer.peek_token ()->get_id () == MOVE) + { + lexer.skip_token (); + has_move = true; + } + + // parse block expression (required) + std::unique_ptr block_expr = parse_block_expr (); + if (block_expr == nullptr) + { + Error error ( + lexer.peek_token ()->get_locus (), + "failed to parse block expression of async block expression"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + return std::unique_ptr ( + new AST::AsyncBlockExpr (std::move (block_expr), has_move, + std::move (outer_attrs), locus)); +} + +// Parses an unsafe block expression. +template +std::unique_ptr +Parser::parse_unsafe_block_expr (AST::AttrVec outer_attrs, + Location pratt_parsed_loc) +{ + Location locus = pratt_parsed_loc; + if (locus == Linemap::unknown_location ()) + { + locus = lexer.peek_token ()->get_locus (); + skip_token (UNSAFE); + } + + // parse block expression (required) + std::unique_ptr block_expr = parse_block_expr (); + if (block_expr == nullptr) + { + Error error ( + lexer.peek_token ()->get_locus (), + "failed to parse block expression of unsafe block expression"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + return std::unique_ptr ( + new AST::UnsafeBlockExpr (std::move (block_expr), std::move (outer_attrs), + locus)); +} + +// Parses an array definition expression. +template +std::unique_ptr +Parser::parse_array_expr (AST::AttrVec outer_attrs, + Location pratt_parsed_loc) +{ + Location locus = pratt_parsed_loc; + if (locus == Linemap::unknown_location ()) + { + locus = lexer.peek_token ()->get_locus (); + skip_token (LEFT_SQUARE); + } + + // parse optional inner attributes + AST::AttrVec inner_attrs = parse_inner_attributes (); + + // parse the "array elements" section, which is optional + if (lexer.peek_token ()->get_id () == RIGHT_SQUARE) + { + // no array elements + lexer.skip_token (); + + std::vector> exprs; + auto array_elems + = Rust::make_unique (std::move (exprs), locus); + return Rust::make_unique (std::move (array_elems), + std::move (inner_attrs), + std::move (outer_attrs), locus); + } + else + { + // should have array elements + // parse initial expression, which is required for either + std::unique_ptr initial_expr = parse_expr (); + if (initial_expr == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "could not parse expression in array expression " + "(even though arrayelems seems to be present)"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + if (lexer.peek_token ()->get_id () == SEMICOLON) + { + // copy array elems + lexer.skip_token (); + + // parse copy amount expression (required) + std::unique_ptr copy_amount = parse_expr (); + if (copy_amount == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "could not parse copy amount expression in array " + "expression (arrayelems)"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + skip_token (RIGHT_SQUARE); + + std::unique_ptr copied_array_elems ( + new AST::ArrayElemsCopied (std::move (initial_expr), + std::move (copy_amount), locus)); + return std::unique_ptr ( + new AST::ArrayExpr (std::move (copied_array_elems), + std::move (inner_attrs), + std::move (outer_attrs), locus)); + } + else if (lexer.peek_token ()->get_id () == RIGHT_SQUARE) + { + // single-element array expression + std::vector> exprs; + exprs.reserve (1); + exprs.push_back (std::move (initial_expr)); + exprs.shrink_to_fit (); + + skip_token (RIGHT_SQUARE); + + std::unique_ptr array_elems ( + new AST::ArrayElemsValues (std::move (exprs), locus)); + return std::unique_ptr ( + new AST::ArrayExpr (std::move (array_elems), + std::move (inner_attrs), + std::move (outer_attrs), locus)); + } + else if (lexer.peek_token ()->get_id () == COMMA) + { + // multi-element array expression (or trailing comma) + std::vector> exprs; + exprs.push_back (std::move (initial_expr)); + + const_TokenPtr t = lexer.peek_token (); + while (t->get_id () == COMMA) + { + lexer.skip_token (); + + // quick break if right square bracket + if (lexer.peek_token ()->get_id () == RIGHT_SQUARE) + break; + + // parse expression (required) + std::unique_ptr expr = parse_expr (); + if (expr == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse element in array expression"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + exprs.push_back (std::move (expr)); + + t = lexer.peek_token (); + } + + skip_token (RIGHT_SQUARE); + + exprs.shrink_to_fit (); + + std::unique_ptr array_elems ( + new AST::ArrayElemsValues (std::move (exprs), locus)); + return std::unique_ptr ( + new AST::ArrayExpr (std::move (array_elems), + std::move (inner_attrs), + std::move (outer_attrs), locus)); + } + else + { + // error + Error error (lexer.peek_token ()->get_locus (), + "unexpected token %qs in array expression (arrayelems)", + lexer.peek_token ()->get_token_description ()); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + } +} + +// Parses a single parameter used in a closure definition. +template +AST::ClosureParam +Parser::parse_closure_param () +{ + AST::AttrVec outer_attrs = parse_outer_attributes (); + + // parse pattern (which is required) + std::unique_ptr pattern = parse_pattern (); + if (pattern == nullptr) + { + // not necessarily an error + return AST::ClosureParam::create_error (); + } + + // parse optional type of param + std::unique_ptr type = nullptr; + if (lexer.peek_token ()->get_id () == COLON) + { + lexer.skip_token (); + + // parse type, which is now required + type = parse_type (); + if (type == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse type in closure parameter"); + add_error (std::move (error)); + + // skip somewhere? + return AST::ClosureParam::create_error (); + } + } + + return AST::ClosureParam (std::move (pattern), pattern->get_locus (), + std::move (type), std::move (outer_attrs)); +} + +// Parses a grouped or tuple expression (disambiguates). +template +std::unique_ptr +Parser::parse_grouped_or_tuple_expr ( + AST::AttrVec outer_attrs, Location pratt_parsed_loc) +{ + // adjustment to allow Pratt parsing to reuse function without copy-paste + Location locus = pratt_parsed_loc; + if (locus == Linemap::unknown_location ()) + { + locus = lexer.peek_token ()->get_locus (); + skip_token (LEFT_PAREN); + } + + // parse optional inner attributes + AST::AttrVec inner_attrs = parse_inner_attributes (); + + if (lexer.peek_token ()->get_id () == RIGHT_PAREN) + { + // must be empty tuple + lexer.skip_token (); + + // create tuple with empty tuple elems + return std::unique_ptr ( + new AST::TupleExpr (std::vector> (), + std::move (inner_attrs), std::move (outer_attrs), + locus)); + } + + // parse first expression (required) + std::unique_ptr first_expr = parse_expr (); + if (first_expr == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse expression in grouped or tuple expression"); + add_error (std::move (error)); + + // skip after somewhere? + return nullptr; + } + + // detect whether grouped expression with right parentheses as next token + if (lexer.peek_token ()->get_id () == RIGHT_PAREN) + { + // must be grouped expr + lexer.skip_token (); + + // create grouped expr + return std::unique_ptr ( + new AST::GroupedExpr (std::move (first_expr), std::move (inner_attrs), + std::move (outer_attrs), locus)); + } + else if (lexer.peek_token ()->get_id () == COMMA) + { + // tuple expr + std::vector> exprs; + exprs.push_back (std::move (first_expr)); + + // parse potential other tuple exprs + const_TokenPtr t = lexer.peek_token (); + while (t->get_id () == COMMA) + { + lexer.skip_token (); + + // break out if right paren + if (lexer.peek_token ()->get_id () == RIGHT_PAREN) + break; + + // parse expr, which is now required + std::unique_ptr expr = parse_expr (); + if (expr == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse expr in tuple expr"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + exprs.push_back (std::move (expr)); + + t = lexer.peek_token (); + } + + // skip right paren + skip_token (RIGHT_PAREN); + + return std::unique_ptr ( + new AST::TupleExpr (std::move (exprs), std::move (inner_attrs), + std::move (outer_attrs), locus)); + } + else + { + // error + const_TokenPtr t = lexer.peek_token (); + Error error (t->get_locus (), + "unexpected token %qs in grouped or tuple expression " + "(parenthesised expression) - expected %<)%> for grouped " + "expr and %<,%> for tuple expr", + t->get_token_description ()); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } +} + +// Parses a type (will further disambiguate any type). +template +std::unique_ptr +Parser::parse_type (bool save_errors) +{ + /* rules for all types: + * NeverType: '!' + * SliceType: '[' Type ']' + * InferredType: '_' + * MacroInvocation: SimplePath '!' DelimTokenTree + * ParenthesisedType: '(' Type ')' + * ImplTraitType: 'impl' TypeParamBounds + * TypeParamBounds (not type) TypeParamBound ( '+' TypeParamBound )* '+'? + * TypeParamBound Lifetime | TraitBound + * ImplTraitTypeOneBound: 'impl' TraitBound + * TraitObjectType: 'dyn'? TypeParamBounds + * TraitObjectTypeOneBound: 'dyn'? TraitBound + * TraitBound '?'? ForLifetimes? TypePath | '(' '?'? + * ForLifetimes? TypePath ')' BareFunctionType: ForLifetimes? + * FunctionQualifiers 'fn' etc. ForLifetimes (not type) 'for' '<' + * LifetimeParams '>' FunctionQualifiers ( 'async' | 'const' )? + * 'unsafe'? + * ('extern' abi?)? QualifiedPathInType: '<' Type ( 'as' TypePath )? '>' + * ( + * '::' TypePathSegment )+ TypePath: '::'? TypePathSegment ( + * '::' TypePathSegment)* ArrayType: '[' Type ';' Expr ']' + * ReferenceType: '&' Lifetime? 'mut'? TypeNoBounds + * RawPointerType: '*' ( 'mut' | 'const' ) TypeNoBounds + * TupleType: '(' Type etc. - regular tuple stuff. Also + * regular tuple vs parenthesised precedence + * + * Disambiguate between macro and type path via type path being parsed, and + * then if '!' found, convert type path to simple path for macro. Usual + * disambiguation for tuple vs parenthesised. For ImplTraitType and + * TraitObjectType individual disambiguations, they seem more like "special + * cases", so probably just try to parse the more general ImplTraitType or + * TraitObjectType and return OneBound versions if they satisfy those + * criteria. */ + + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case EXCLAM: + // never type - can't be macro as no path beforehand + lexer.skip_token (); + return std::unique_ptr ( + new AST::NeverType (t->get_locus ())); + case LEFT_SQUARE: + // slice type or array type - requires further disambiguation + return parse_slice_or_array_type (); + case LEFT_ANGLE: { + // qualified path in type + AST::QualifiedPathInType path = parse_qualified_path_in_type (); + if (path.is_error ()) + { + if (save_errors) + { + Error error (t->get_locus (), + "failed to parse qualified path in type"); + add_error (std::move (error)); + } + + return nullptr; + } + return std::unique_ptr ( + new AST::QualifiedPathInType (std::move (path))); + } + case UNDERSCORE: + // inferred type + lexer.skip_token (); + return std::unique_ptr ( + new AST::InferredType (t->get_locus ())); + case ASTERISK: + // raw pointer type + return parse_raw_pointer_type (); + case AMP: // does this also include AMP_AMP? + // reference type + return parse_reference_type (); + case LIFETIME: { + /* probably a lifetime bound, so probably type param bounds in + * TraitObjectType */ + std::vector> bounds + = parse_type_param_bounds (); + + return std::unique_ptr ( + new AST::TraitObjectType (std::move (bounds), t->get_locus (), + false)); + } + case IDENTIFIER: + case SUPER: + case SELF: + case SELF_ALIAS: + case CRATE: + case DOLLAR_SIGN: + case SCOPE_RESOLUTION: { + // macro invocation or type path - requires further disambiguation. + /* for parsing path component of each rule, perhaps parse it as a + * typepath and attempt conversion to simplepath if a trailing '!' is + * found */ + /* Type path also includes TraitObjectTypeOneBound BUT if it starts + * with it, it is exactly the same as a TypePath syntactically, so + * this is a syntactical ambiguity. As such, the parser will parse it + * as a TypePath. This, however, does not prevent TraitObjectType from + * starting with a typepath. */ + + // parse path as type path + AST::TypePath path = parse_type_path (); + if (path.is_error ()) + { + if (save_errors) + { + Error error (t->get_locus (), + "failed to parse path as first component of type"); + add_error (std::move (error)); + } + + return nullptr; + } + Location locus = path.get_locus (); + + // branch on next token + t = lexer.peek_token (); + switch (t->get_id ()) + { + case EXCLAM: { + // macro invocation + // convert to simple path + AST::SimplePath macro_path = path.as_simple_path (); + if (macro_path.is_empty ()) + { + if (save_errors) + { + Error error (t->get_locus (), + "failed to parse simple path in macro " + "invocation (for type)"); + add_error (std::move (error)); + } + + return nullptr; + } + + lexer.skip_token (); + + AST::DelimTokenTree tok_tree = parse_delim_token_tree (); + + return std::unique_ptr ( + new AST::MacroInvocation ( + AST::MacroInvocData (std::move (macro_path), + std::move (tok_tree)), + {}, locus)); + } + case PLUS: { + // type param bounds + std::vector> bounds; + + // convert type path to trait bound + std::unique_ptr path_bound ( + new AST::TraitBound (std::move (path), locus, false, false)); + bounds.push_back (std::move (path_bound)); + + /* parse rest of bounds - FIXME: better way to find when to stop + * parsing */ + while (t->get_id () == PLUS) + { + lexer.skip_token (); + + // parse bound if it exists - if not, assume end of sequence + std::unique_ptr bound + = parse_type_param_bound (); + if (bound == nullptr) + { + break; + } + bounds.push_back (std::move (bound)); + + t = lexer.peek_token (); + } + + return std::unique_ptr ( + new AST::TraitObjectType (std::move (bounds), locus, false)); + } + default: + // assume that this is a type path and not an error + return std::unique_ptr ( + new AST::TypePath (std::move (path))); + } + } + case LEFT_PAREN: + /* tuple type or parenthesised type - requires further disambiguation + * (the usual). ok apparently can be a parenthesised TraitBound too, so + * could be TraitObjectTypeOneBound or TraitObjectType */ + return parse_paren_prefixed_type (); + case FOR: + // TraitObjectTypeOneBound or BareFunctionType + return parse_for_prefixed_type (); + case ASYNC: + case CONST: + case UNSAFE: + case EXTERN_TOK: + case FN_TOK: + // bare function type (with no for lifetimes) + return parse_bare_function_type (std::vector ()); + case IMPL: + lexer.skip_token (); + if (lexer.peek_token ()->get_id () == LIFETIME) + { + /* cannot be one bound because lifetime prevents it from being + * traitbound */ + std::vector> bounds + = parse_type_param_bounds (); + + return std::unique_ptr ( + new AST::ImplTraitType (std::move (bounds), t->get_locus ())); + } + else + { + // should be trait bound, so parse trait bound + std::unique_ptr initial_bound = parse_trait_bound (); + if (initial_bound == nullptr) + { + if (save_errors) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse ImplTraitType initial bound"); + add_error (std::move (error)); + } + + return nullptr; + } + + Location locus = t->get_locus (); + + // short cut if next token isn't '+' + t = lexer.peek_token (); + if (t->get_id () != PLUS) + { + // convert trait bound to value object + AST::TraitBound value_bound (*initial_bound); + + // DEBUG: removed as unique ptr, so should auto-delete + // delete initial_bound; + + return std::unique_ptr ( + new AST::ImplTraitTypeOneBound (std::move (value_bound), + locus)); + } + + // parse additional type param bounds + std::vector> bounds; + bounds.push_back (std::move (initial_bound)); + while (t->get_id () == PLUS) + { + lexer.skip_token (); + + // parse bound if it exists + std::unique_ptr bound + = parse_type_param_bound (); + if (bound == nullptr) + { + // not an error as trailing plus may exist + break; + } + bounds.push_back (std::move (bound)); + + t = lexer.peek_token (); + } + + return std::unique_ptr ( + new AST::ImplTraitType (std::move (bounds), locus)); + } + case DYN: + case QUESTION_MARK: { + // either TraitObjectType or TraitObjectTypeOneBound + bool has_dyn = false; + if (t->get_id () == DYN) + { + lexer.skip_token (); + has_dyn = true; + } + + if (lexer.peek_token ()->get_id () == LIFETIME) + { + /* cannot be one bound because lifetime prevents it from being + * traitbound */ + std::vector> bounds + = parse_type_param_bounds (); + + return std::unique_ptr ( + new AST::TraitObjectType (std::move (bounds), t->get_locus (), + has_dyn)); + } + else + { + // should be trait bound, so parse trait bound + std::unique_ptr initial_bound + = parse_trait_bound (); + if (initial_bound == nullptr) + { + if (save_errors) + { + Error error ( + lexer.peek_token ()->get_locus (), + "failed to parse TraitObjectType initial bound"); + add_error (std::move (error)); + } + + return nullptr; + } + + // short cut if next token isn't '+' + t = lexer.peek_token (); + if (t->get_id () != PLUS) + { + // convert trait bound to value object + AST::TraitBound value_bound (*initial_bound); + + // DEBUG: removed as unique ptr, so should auto delete + // delete initial_bound; + + return std::unique_ptr ( + new AST::TraitObjectTypeOneBound (std::move (value_bound), + t->get_locus (), has_dyn)); + } + + // parse additional type param bounds + std::vector> bounds; + bounds.push_back (std::move (initial_bound)); + while (t->get_id () == PLUS) + { + lexer.skip_token (); + + // parse bound if it exists + std::unique_ptr bound + = parse_type_param_bound (); + if (bound == nullptr) + { + // not an error as trailing plus may exist + break; + } + bounds.push_back (std::move (bound)); + + t = lexer.peek_token (); + } + + return std::unique_ptr ( + new AST::TraitObjectType (std::move (bounds), t->get_locus (), + has_dyn)); + } + } + default: + if (save_errors) + add_error (Error (t->get_locus (), "unrecognised token %qs in type", + t->get_token_description ())); + + return nullptr; + } +} + +/* Parses a type that has '(' as its first character. Returns a tuple type, + * parenthesised type, TraitObjectTypeOneBound, or TraitObjectType depending + * on following characters. */ +template +std::unique_ptr +Parser::parse_paren_prefixed_type () +{ + /* NOTE: Syntactical ambiguity of a parenthesised trait bound is considered + * a trait bound, not a parenthesised type, so that it can still be used in + * type param bounds. */ + + /* NOTE: this implementation is really shit but I couldn't think of a better + * one. It requires essentially breaking polymorphism and downcasting via + * virtual method abuse, as it was copied from the rustc implementation (in + * which types are reified due to tagged union), after a more OOP attempt by + * me failed. */ + Location left_delim_locus = lexer.peek_token ()->get_locus (); + + // skip left delim + lexer.skip_token (); + /* while next token isn't close delim, parse comma-separated types, saving + * whether trailing comma happens */ + const_TokenPtr t = lexer.peek_token (); + bool trailing_comma = true; + std::vector> types; + + while (t->get_id () != RIGHT_PAREN) + { + std::unique_ptr type = parse_type (); + if (type == nullptr) + { + Error error (t->get_locus (), + "failed to parse type inside parentheses (probably " + "tuple or parenthesised)"); + add_error (std::move (error)); + + return nullptr; + } + types.push_back (std::move (type)); + + t = lexer.peek_token (); + if (t->get_id () != COMMA) + { + trailing_comma = false; + break; + } + lexer.skip_token (); + + t = lexer.peek_token (); + } + + if (!skip_token (RIGHT_PAREN)) + { + return nullptr; + } + + // if only one type and no trailing comma, then not a tuple type + if (types.size () == 1 && !trailing_comma) + { + // must be a TraitObjectType (with more than one bound) + if (lexer.peek_token ()->get_id () == PLUS) + { + // create type param bounds vector + std::vector> bounds; + + // HACK: convert type to traitbound and add to bounds + std::unique_ptr released_ptr = std::move (types[0]); + std::unique_ptr converted_bound ( + released_ptr->to_trait_bound (true)); + if (converted_bound == nullptr) + { + Error error ( + lexer.peek_token ()->get_locus (), + "failed to hackily converted parsed type to trait bound"); + add_error (std::move (error)); + + return nullptr; + } + bounds.push_back (std::move (converted_bound)); + + t = lexer.peek_token (); + while (t->get_id () == PLUS) + { + lexer.skip_token (); + + // attempt to parse typeparambound + std::unique_ptr bound + = parse_type_param_bound (); + if (bound == nullptr) + { + // not an error if null + break; + } + bounds.push_back (std::move (bound)); + + t = lexer.peek_token (); + } + + return std::unique_ptr ( + new AST::TraitObjectType (std::move (bounds), left_delim_locus, + false)); + } + else + { + // release vector pointer + std::unique_ptr released_ptr = std::move (types[0]); + /* HACK: attempt to convert to trait bound. if fails, parenthesised + * type */ + std::unique_ptr converted_bound ( + released_ptr->to_trait_bound (true)); + if (converted_bound == nullptr) + { + // parenthesised type + return std::unique_ptr ( + new AST::ParenthesisedType (std::move (released_ptr), + left_delim_locus)); + } + else + { + // trait object type (one bound) + + // get value semantics trait bound + AST::TraitBound value_bound (*converted_bound); + + return std::unique_ptr ( + new AST::TraitObjectTypeOneBound (value_bound, + left_delim_locus)); + } + } + } + else + { + return std::unique_ptr ( + new AST::TupleType (std::move (types), left_delim_locus)); + } + /* TODO: ensure that this ensures that dynamic dispatch for traits is not + * lost somehow */ +} + +/* Parses a type that has 'for' as its first character. This means it has a + * "for lifetimes", so returns either a BareFunctionType, TraitObjectType, or + * TraitObjectTypeOneBound depending on following characters. */ +template +std::unique_ptr +Parser::parse_for_prefixed_type () +{ + Location for_locus = lexer.peek_token ()->get_locus (); + // parse for lifetimes in type + std::vector for_lifetimes = parse_for_lifetimes (); + + // branch on next token - either function or a trait type + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case ASYNC: + case CONST: + case UNSAFE: + case EXTERN_TOK: + case FN_TOK: + return parse_bare_function_type (std::move (for_lifetimes)); + case SCOPE_RESOLUTION: + case IDENTIFIER: + case SUPER: + case SELF: + case SELF_ALIAS: + case CRATE: + case DOLLAR_SIGN: { + // path, so trait type + + // parse type path to finish parsing trait bound + AST::TypePath path = parse_type_path (); + + t = lexer.peek_token (); + if (t->get_id () != PLUS) + { + // must be one-bound trait type + // create trait bound value object + AST::TraitBound bound (std::move (path), for_locus, false, false, + std::move (for_lifetimes)); + + return std::unique_ptr ( + new AST::TraitObjectTypeOneBound (std::move (bound), for_locus)); + } + + /* more than one bound trait type (or at least parsed as it - could be + * trailing '+') create trait bound pointer and bounds */ + std::unique_ptr initial_bound ( + new AST::TraitBound (std::move (path), for_locus, false, false, + std::move (for_lifetimes))); + std::vector> bounds; + bounds.push_back (std::move (initial_bound)); + + while (t->get_id () == PLUS) + { + lexer.skip_token (); + + // parse type param bound if it exists + std::unique_ptr bound + = parse_type_param_bound (); + if (bound == nullptr) + { + // not an error - e.g. trailing plus + return nullptr; + } + bounds.push_back (std::move (bound)); + + t = lexer.peek_token (); + } + + return std::unique_ptr ( + new AST::TraitObjectType (std::move (bounds), for_locus, false)); + } + default: + // error + add_error (Error (t->get_locus (), + "unrecognised token %qs in bare function type or trait " + "object type or trait object type one bound", + t->get_token_description ())); + + return nullptr; + } +} + +// Parses a maybe named param used in bare function types. +template +AST::MaybeNamedParam +Parser::parse_maybe_named_param (AST::AttrVec outer_attrs) +{ + /* Basically guess that param is named if first token is identifier or + * underscore and second token is semicolon. This should probably have no + * exceptions. rustc uses backtracking to parse these, but at the time of + * writing gccrs has no backtracking capabilities. */ + const_TokenPtr current = lexer.peek_token (); + const_TokenPtr next = lexer.peek_token (1); + + Identifier name; + AST::MaybeNamedParam::ParamKind kind = AST::MaybeNamedParam::UNNAMED; + + if (current->get_id () == IDENTIFIER && next->get_id () == COLON) + { + // named param + name = current->get_str (); + kind = AST::MaybeNamedParam::IDENTIFIER; + lexer.skip_token (1); + } + else if (current->get_id () == UNDERSCORE && next->get_id () == COLON) + { + // wildcard param + name = "_"; + kind = AST::MaybeNamedParam::WILDCARD; + lexer.skip_token (1); + } + + // parse type (required) + std::unique_ptr type = parse_type (); + if (type == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse type in maybe named param"); + add_error (std::move (error)); + + return AST::MaybeNamedParam::create_error (); + } + + return AST::MaybeNamedParam (std::move (name), kind, std::move (type), + std::move (outer_attrs), current->get_locus ()); +} + +/* Parses a bare function type (with the given for lifetimes for convenience - + * does not parse them itself). */ +template +std::unique_ptr +Parser::parse_bare_function_type ( + std::vector for_lifetimes) +{ + // TODO: pass in for lifetime location as param + Location best_try_locus = lexer.peek_token ()->get_locus (); + + AST::FunctionQualifiers qualifiers = parse_function_qualifiers (); + + if (!skip_token (FN_TOK)) + return nullptr; + + if (!skip_token (LEFT_PAREN)) + return nullptr; + + // parse function params, if they exist + std::vector params; + bool is_variadic = false; + AST::AttrVec variadic_attrs; + + const_TokenPtr t = lexer.peek_token (); + while (t->get_id () != RIGHT_PAREN) + { + AST::AttrVec temp_attrs = parse_outer_attributes (); + + if (lexer.peek_token ()->get_id () == ELLIPSIS) + { + lexer.skip_token (); + is_variadic = true; + variadic_attrs = std::move (temp_attrs); + + t = lexer.peek_token (); + + if (t->get_id () != RIGHT_PAREN) + { + Error error (t->get_locus (), + "expected right parentheses after variadic in maybe " + "named function " + "parameters, found %qs", + t->get_token_description ()); + add_error (std::move (error)); + + return nullptr; + } + + break; + } + + AST::MaybeNamedParam param + = parse_maybe_named_param (std::move (temp_attrs)); + if (param.is_error ()) + { + Error error ( + lexer.peek_token ()->get_locus (), + "failed to parse maybe named param in bare function type"); + add_error (std::move (error)); + + return nullptr; + } + params.push_back (std::move (param)); + + if (lexer.peek_token ()->get_id () != COMMA) + break; + + lexer.skip_token (); + t = lexer.peek_token (); + } + + if (!skip_token (RIGHT_PAREN)) + return nullptr; + + // bare function return type, if exists + std::unique_ptr return_type = nullptr; + if (lexer.peek_token ()->get_id () == RETURN_TYPE) + { + lexer.skip_token (); + + // parse required TypeNoBounds + return_type = parse_type_no_bounds (); + if (return_type == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse return type (type no bounds) in bare " + "function type"); + add_error (std::move (error)); + + return nullptr; + } + } + + return std::unique_ptr ( + new AST::BareFunctionType (std::move (for_lifetimes), + std::move (qualifiers), std::move (params), + is_variadic, std::move (variadic_attrs), + std::move (return_type), best_try_locus)); +} + +// Parses a reference type (mutable or immutable, with given lifetime). +template +std::unique_ptr +Parser::parse_reference_type () +{ + Location locus = lexer.peek_token ()->get_locus (); + skip_token (AMP); + + // parse optional lifetime + AST::Lifetime lifetime = AST::Lifetime::error (); + if (lexer.peek_token ()->get_id () == LIFETIME) + { + lifetime = parse_lifetime (); + if (lifetime.is_error ()) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse lifetime in reference type"); + add_error (std::move (error)); + + return nullptr; + } + } + + bool is_mut = false; + if (lexer.peek_token ()->get_id () == MUT) + { + lexer.skip_token (); + is_mut = true; + } + + // parse type no bounds, which is required + std::unique_ptr type = parse_type_no_bounds (); + if (type == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse referenced type in reference type"); + add_error (std::move (error)); + + return nullptr; + } + + return std::unique_ptr ( + new AST::ReferenceType (is_mut, std::move (type), locus, + std::move (lifetime))); +} + +// Parses a raw (unsafe) pointer type. +template +std::unique_ptr +Parser::parse_raw_pointer_type () +{ + Location locus = lexer.peek_token ()->get_locus (); + skip_token (ASTERISK); + + AST::RawPointerType::PointerType kind = AST::RawPointerType::CONST; + + // branch on next token for pointer kind info + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case MUT: + kind = AST::RawPointerType::MUT; + lexer.skip_token (); + break; + case CONST: + kind = AST::RawPointerType::CONST; + lexer.skip_token (); + break; + default: + add_error (Error (t->get_locus (), + "unrecognised token %qs in raw pointer type", + t->get_token_description ())); + + return nullptr; + } + + // parse type no bounds (required) + std::unique_ptr type = parse_type_no_bounds (); + if (type == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse pointed type of raw pointer type"); + add_error (std::move (error)); + + return nullptr; + } + + return std::unique_ptr ( + new AST::RawPointerType (kind, std::move (type), locus)); +} + +/* Parses a slice or array type, depending on following arguments (as + * lookahead is not possible). */ +template +std::unique_ptr +Parser::parse_slice_or_array_type () +{ + Location locus = lexer.peek_token ()->get_locus (); + skip_token (LEFT_SQUARE); + + // parse inner type (required) + std::unique_ptr inner_type = parse_type (); + if (inner_type == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse inner type in slice or array type"); + add_error (std::move (error)); + + return nullptr; + } + + // branch on next token + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case RIGHT_SQUARE: + // slice type + lexer.skip_token (); + + return std::unique_ptr ( + new AST::SliceType (std::move (inner_type), locus)); + case SEMICOLON: { + // array type + lexer.skip_token (); + + // parse required array size expression + std::unique_ptr size = parse_expr (); + if (size == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse size expression in array type"); + add_error (std::move (error)); + + return nullptr; + } + + if (!skip_token (RIGHT_SQUARE)) + { + return nullptr; + } + + return std::unique_ptr ( + new AST::ArrayType (std::move (inner_type), std::move (size), locus)); + } + default: + // error + add_error ( + Error (t->get_locus (), + "unrecognised token %qs in slice or array type after inner type", + t->get_token_description ())); + + return nullptr; + } +} + +// Parses a type, taking into account type boundary disambiguation. +template +std::unique_ptr +Parser::parse_type_no_bounds () +{ + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case EXCLAM: + // never type - can't be macro as no path beforehand + lexer.skip_token (); + return std::unique_ptr ( + new AST::NeverType (t->get_locus ())); + case LEFT_SQUARE: + // slice type or array type - requires further disambiguation + return parse_slice_or_array_type (); + case LEFT_ANGLE: { + // qualified path in type + AST::QualifiedPathInType path = parse_qualified_path_in_type (); + if (path.is_error ()) + { + Error error (t->get_locus (), + "failed to parse qualified path in type"); + add_error (std::move (error)); + + return nullptr; + } + return std::unique_ptr ( + new AST::QualifiedPathInType (std::move (path))); + } + case UNDERSCORE: + // inferred type + lexer.skip_token (); + return std::unique_ptr ( + new AST::InferredType (t->get_locus ())); + case ASTERISK: + // raw pointer type + return parse_raw_pointer_type (); + case AMP: // does this also include AMP_AMP? + // reference type + return parse_reference_type (); + case LIFETIME: + /* probably a lifetime bound, so probably type param bounds in + * TraitObjectType. this is not allowed, but detection here for error + * message */ + add_error (Error (t->get_locus (), + "lifetime bounds (i.e. in type param bounds, in " + "TraitObjectType) are not allowed as TypeNoBounds")); + + return nullptr; + case IDENTIFIER: + case SUPER: + case SELF: + case SELF_ALIAS: + case CRATE: + case DOLLAR_SIGN: + case SCOPE_RESOLUTION: { + // macro invocation or type path - requires further disambiguation. + /* for parsing path component of each rule, perhaps parse it as a + * typepath and attempt conversion to simplepath if a trailing '!' is + * found */ + /* Type path also includes TraitObjectTypeOneBound BUT if it starts + * with it, it is exactly the same as a TypePath syntactically, so + * this is a syntactical ambiguity. As such, the parser will parse it + * as a TypePath. This, however, does not prevent TraitObjectType from + * starting with a typepath. */ + + // parse path as type path + AST::TypePath path = parse_type_path (); + if (path.is_error ()) + { + Error error ( + t->get_locus (), + "failed to parse path as first component of type no bounds"); + add_error (std::move (error)); + + return nullptr; + } + Location locus = path.get_locus (); + + // branch on next token + t = lexer.peek_token (); + switch (t->get_id ()) + { + case EXCLAM: { + // macro invocation + // convert to simple path + AST::SimplePath macro_path = path.as_simple_path (); + if (macro_path.is_empty ()) + { + Error error (t->get_locus (), + "failed to parse simple path in macro " + "invocation (for type)"); + add_error (std::move (error)); + + return nullptr; + } + + lexer.skip_token (); + + AST::DelimTokenTree tok_tree = parse_delim_token_tree (); + + return std::unique_ptr ( + new AST::MacroInvocation ( + AST::MacroInvocData (std::move (macro_path), + std::move (tok_tree)), + {}, locus)); + } + default: + // assume that this is a type path and not an error + return std::unique_ptr ( + new AST::TypePath (std::move (path))); + } + } + case LEFT_PAREN: + /* tuple type or parenthesised type - requires further disambiguation + * (the usual). ok apparently can be a parenthesised TraitBound too, so + * could be TraitObjectTypeOneBound */ + return parse_paren_prefixed_type_no_bounds (); + case FOR: + case ASYNC: + case CONST: + case UNSAFE: + case EXTERN_TOK: + case FN_TOK: + // bare function type (with no for lifetimes) + return parse_bare_function_type (std::vector ()); + case IMPL: + lexer.skip_token (); + if (lexer.peek_token ()->get_id () == LIFETIME) + { + /* cannot be one bound because lifetime prevents it from being + * traitbound not allowed as type no bounds, only here for error + * message */ + Error error ( + lexer.peek_token ()->get_locus (), + "lifetime (probably lifetime bound, in type param " + "bounds, in ImplTraitType) is not allowed in TypeNoBounds"); + add_error (std::move (error)); + + return nullptr; + } + else + { + // should be trait bound, so parse trait bound + std::unique_ptr initial_bound = parse_trait_bound (); + if (initial_bound == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse ImplTraitTypeOneBound bound"); + add_error (std::move (error)); + + return nullptr; + } + + Location locus = t->get_locus (); + + // ensure not a trait with multiple bounds + t = lexer.peek_token (); + if (t->get_id () == PLUS) + { + Error error (t->get_locus (), + "plus after trait bound means an ImplTraitType, " + "which is not allowed as a TypeNoBounds"); + add_error (std::move (error)); + + return nullptr; + } + + // convert trait bound to value object + AST::TraitBound value_bound (*initial_bound); + + return std::unique_ptr ( + new AST::ImplTraitTypeOneBound (std::move (value_bound), locus)); + } + case DYN: + case QUESTION_MARK: { + // either TraitObjectTypeOneBound + bool has_dyn = false; + if (t->get_id () == DYN) + { + lexer.skip_token (); + has_dyn = true; + } + + if (lexer.peek_token ()->get_id () == LIFETIME) + { + /* means that cannot be TraitObjectTypeOneBound - so here for + * error message */ + Error error (lexer.peek_token ()->get_locus (), + "lifetime as bound in TraitObjectTypeOneBound " + "is not allowed, so cannot be TypeNoBounds"); + add_error (std::move (error)); + + return nullptr; + } + + // should be trait bound, so parse trait bound + std::unique_ptr initial_bound = parse_trait_bound (); + if (initial_bound == nullptr) + { + Error error ( + lexer.peek_token ()->get_locus (), + "failed to parse TraitObjectTypeOneBound initial bound"); + add_error (std::move (error)); + + return nullptr; + } + + Location locus = t->get_locus (); + + // detect error with plus as next token + t = lexer.peek_token (); + if (t->get_id () == PLUS) + { + Error error (t->get_locus (), + "plus after trait bound means a TraitObjectType, " + "which is not allowed as a TypeNoBounds"); + add_error (std::move (error)); + + return nullptr; + } + + // convert trait bound to value object + AST::TraitBound value_bound (*initial_bound); + + return std::unique_ptr ( + new AST::TraitObjectTypeOneBound (std::move (value_bound), locus, + has_dyn)); + } + default: + add_error (Error (t->get_locus (), + "unrecognised token %qs in type no bounds", + t->get_token_description ())); + + return nullptr; + } +} + +// Parses a type no bounds beginning with '('. +template +std::unique_ptr +Parser::parse_paren_prefixed_type_no_bounds () +{ + /* NOTE: this could probably be parsed without the HACK solution of + * parse_paren_prefixed_type, but I was lazy. So FIXME for future.*/ + + /* NOTE: again, syntactical ambiguity of a parenthesised trait bound is + * considered a trait bound, not a parenthesised type, so that it can still + * be used in type param bounds. */ + + Location left_paren_locus = lexer.peek_token ()->get_locus (); + + // skip left delim + lexer.skip_token (); + /* while next token isn't close delim, parse comma-separated types, saving + * whether trailing comma happens */ + const_TokenPtr t = lexer.peek_token (); + bool trailing_comma = true; + std::vector> types; + + while (t->get_id () != RIGHT_PAREN) + { + std::unique_ptr type = parse_type (); + if (type == nullptr) + { + Error error (t->get_locus (), + "failed to parse type inside parentheses (probably " + "tuple or parenthesised)"); + add_error (std::move (error)); + + return nullptr; + } + types.push_back (std::move (type)); + + t = lexer.peek_token (); + if (t->get_id () != COMMA) + { + trailing_comma = false; + break; + } + lexer.skip_token (); + + t = lexer.peek_token (); + } + + if (!skip_token (RIGHT_PAREN)) + { + return nullptr; + } + + // if only one type and no trailing comma, then not a tuple type + if (types.size () == 1 && !trailing_comma) + { + // must be a TraitObjectType (with more than one bound) + if (lexer.peek_token ()->get_id () == PLUS) + { + // error - this is not allowed for type no bounds + Error error (lexer.peek_token ()->get_locus (), + "plus (implying TraitObjectType as type param " + "bounds) is not allowed in type no bounds"); + add_error (std::move (error)); + + return nullptr; + } + else + { + // release vector pointer + std::unique_ptr released_ptr = std::move (types[0]); + /* HACK: attempt to convert to trait bound. if fails, parenthesised + * type */ + std::unique_ptr converted_bound ( + released_ptr->to_trait_bound (true)); + if (converted_bound == nullptr) + { + // parenthesised type + return std::unique_ptr ( + new AST::ParenthesisedType (std::move (released_ptr), + left_paren_locus)); + } + else + { + // trait object type (one bound) + + // get value semantics trait bound + AST::TraitBound value_bound (*converted_bound); + + return std::unique_ptr ( + new AST::TraitObjectTypeOneBound (value_bound, + left_paren_locus)); + } + } + } + else + { + return std::unique_ptr ( + new AST::TupleType (std::move (types), left_paren_locus)); + } + /* TODO: ensure that this ensures that dynamic dispatch for traits is not + * lost somehow */ +} + +/* Parses a literal pattern or range pattern. Assumes that literals passed in + * are valid range pattern bounds. Do not pass in paths in expressions, for + * instance. */ +template +std::unique_ptr +Parser::parse_literal_or_range_pattern () +{ + const_TokenPtr range_lower = lexer.peek_token (); + AST::Literal::LitType type = AST::Literal::STRING; + bool has_minus = false; + + // get lit type + switch (range_lower->get_id ()) + { + case CHAR_LITERAL: + type = AST::Literal::CHAR; + lexer.skip_token (); + break; + case BYTE_CHAR_LITERAL: + type = AST::Literal::BYTE; + lexer.skip_token (); + break; + case INT_LITERAL: + type = AST::Literal::INT; + lexer.skip_token (); + break; + case FLOAT_LITERAL: + type = AST::Literal::FLOAT; + lexer.skip_token (); + break; + case MINUS: + // branch on next token + range_lower = lexer.peek_token (1); + switch (range_lower->get_id ()) + { + case INT_LITERAL: + type = AST::Literal::INT; + has_minus = true; + lexer.skip_token (1); + break; + case FLOAT_LITERAL: + type = AST::Literal::FLOAT; + has_minus = true; + lexer.skip_token (1); + break; + default: + add_error (Error (range_lower->get_locus (), + "token type %qs cannot be parsed as range pattern " + "bound or literal after minus symbol", + range_lower->get_token_description ())); + + return nullptr; + } + break; + default: + add_error ( + Error (range_lower->get_locus (), + "token type %qs cannot be parsed as range pattern bound", + range_lower->get_token_description ())); + + return nullptr; + } + + const_TokenPtr next = lexer.peek_token (); + if (next->get_id () == DOT_DOT_EQ || next->get_id () == ELLIPSIS) + { + // range pattern + lexer.skip_token (); + std::unique_ptr lower ( + new AST::RangePatternBoundLiteral ( + AST::Literal (range_lower->get_str (), type, + PrimitiveCoreType::CORETYPE_UNKNOWN), + range_lower->get_locus (), has_minus)); + + std::unique_ptr upper + = parse_range_pattern_bound (); + if (upper == nullptr) + { + Error error (next->get_locus (), + "failed to parse range pattern bound in range pattern"); + add_error (std::move (error)); + + return nullptr; + } + + return std::unique_ptr ( + new AST::RangePattern (std::move (lower), std::move (upper), + range_lower->get_locus ())); + } + else + { + // literal pattern + return std::unique_ptr ( + new AST::LiteralPattern (range_lower->get_str (), type, + range_lower->get_locus ())); + } +} + +// Parses a range pattern bound (value only). +template +std::unique_ptr +Parser::parse_range_pattern_bound () +{ + const_TokenPtr range_lower = lexer.peek_token (); + Location range_lower_locus = range_lower->get_locus (); + + // get lit type + switch (range_lower->get_id ()) + { + case CHAR_LITERAL: + lexer.skip_token (); + return std::unique_ptr ( + new AST::RangePatternBoundLiteral ( + AST::Literal (range_lower->get_str (), AST::Literal::CHAR, + range_lower->get_type_hint ()), + range_lower_locus)); + case BYTE_CHAR_LITERAL: + lexer.skip_token (); + return std::unique_ptr ( + new AST::RangePatternBoundLiteral ( + AST::Literal (range_lower->get_str (), AST::Literal::BYTE, + range_lower->get_type_hint ()), + range_lower_locus)); + case INT_LITERAL: + lexer.skip_token (); + return std::unique_ptr ( + new AST::RangePatternBoundLiteral ( + AST::Literal (range_lower->get_str (), AST::Literal::INT, + range_lower->get_type_hint ()), + range_lower_locus)); + case FLOAT_LITERAL: + lexer.skip_token (); + rust_debug ("warning: used deprecated float range pattern bound"); + return std::unique_ptr ( + new AST::RangePatternBoundLiteral ( + AST::Literal (range_lower->get_str (), AST::Literal::FLOAT, + range_lower->get_type_hint ()), + range_lower_locus)); + case MINUS: + // branch on next token + range_lower = lexer.peek_token (1); + switch (range_lower->get_id ()) + { + case INT_LITERAL: + lexer.skip_token (1); + return std::unique_ptr ( + new AST::RangePatternBoundLiteral ( + AST::Literal (range_lower->get_str (), AST::Literal::INT, + range_lower->get_type_hint ()), + range_lower_locus, true)); + case FLOAT_LITERAL: + lexer.skip_token (1); + rust_debug ("warning: used deprecated float range pattern bound"); + return std::unique_ptr ( + new AST::RangePatternBoundLiteral ( + AST::Literal (range_lower->get_str (), AST::Literal::FLOAT, + range_lower->get_type_hint ()), + range_lower_locus, true)); + default: + add_error (Error (range_lower->get_locus (), + "token type %qs cannot be parsed as range pattern " + "bound after minus symbol", + range_lower->get_token_description ())); + + return nullptr; + } + case IDENTIFIER: + case SUPER: + case SELF: + case SELF_ALIAS: + case CRATE: + case SCOPE_RESOLUTION: + case DOLLAR_SIGN: { + // path in expression + AST::PathInExpression path = parse_path_in_expression (); + if (path.is_error ()) + { + Error error ( + range_lower->get_locus (), + "failed to parse path in expression range pattern bound"); + add_error (std::move (error)); + + return nullptr; + } + return std::unique_ptr ( + new AST::RangePatternBoundPath (std::move (path))); + } + case LEFT_ANGLE: { + // qualified path in expression + AST::QualifiedPathInExpression path + = parse_qualified_path_in_expression (); + if (path.is_error ()) + { + Error error (range_lower->get_locus (), + "failed to parse qualified path in expression range " + "pattern bound"); + add_error (std::move (error)); + + return nullptr; + } + return std::unique_ptr ( + new AST::RangePatternBoundQualPath (std::move (path))); + } + default: + add_error ( + Error (range_lower->get_locus (), + "token type %qs cannot be parsed as range pattern bound", + range_lower->get_token_description ())); + + return nullptr; + } +} + +// Parses a pattern (will further disambiguate any pattern). +template +std::unique_ptr +Parser::parse_pattern () +{ + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case TRUE_LITERAL: + lexer.skip_token (); + return std::unique_ptr ( + new AST::LiteralPattern ("true", AST::Literal::BOOL, t->get_locus ())); + case FALSE_LITERAL: + lexer.skip_token (); + return std::unique_ptr ( + new AST::LiteralPattern ("false", AST::Literal::BOOL, t->get_locus ())); + case CHAR_LITERAL: + case BYTE_CHAR_LITERAL: + case INT_LITERAL: + case FLOAT_LITERAL: + return parse_literal_or_range_pattern (); + case STRING_LITERAL: + lexer.skip_token (); + return std::unique_ptr ( + new AST::LiteralPattern (t->get_str (), AST::Literal::STRING, + t->get_locus ())); + case BYTE_STRING_LITERAL: + lexer.skip_token (); + return std::unique_ptr ( + new AST::LiteralPattern (t->get_str (), AST::Literal::BYTE_STRING, + t->get_locus ())); + // raw string and raw byte string literals too if they are readded to + // lexer + case MINUS: + if (lexer.peek_token (1)->get_id () == INT_LITERAL) + { + return parse_literal_or_range_pattern (); + } + else if (lexer.peek_token (1)->get_id () == FLOAT_LITERAL) + { + return parse_literal_or_range_pattern (); + } + else + { + Error error (t->get_locus (), "unexpected token %<-%> in pattern - " + "did you forget an integer literal"); + add_error (std::move (error)); + + return nullptr; + } + case UNDERSCORE: + lexer.skip_token (); + return std::unique_ptr ( + new AST::WildcardPattern (t->get_locus ())); + case REF: + case MUT: + return parse_identifier_pattern (); + case IDENTIFIER: + /* if identifier with no scope resolution afterwards, identifier + * pattern. if scope resolution afterwards, path pattern (or range + * pattern or struct pattern or tuple struct pattern) or macro + * invocation */ + return parse_ident_leading_pattern (); + case AMP: + case LOGICAL_AND: + // reference pattern + return parse_reference_pattern (); + case LEFT_PAREN: + // tuple pattern or grouped pattern + return parse_grouped_or_tuple_pattern (); + case LEFT_SQUARE: + // slice pattern + return parse_slice_pattern (); + case LEFT_ANGLE: { + // qualified path in expression or qualified range pattern bound + AST::QualifiedPathInExpression path + = parse_qualified_path_in_expression (); + + if (lexer.peek_token ()->get_id () == DOT_DOT_EQ + || lexer.peek_token ()->get_id () == ELLIPSIS) + { + // qualified range pattern bound, so parse rest of range pattern + bool has_ellipsis_syntax + = lexer.peek_token ()->get_id () == ELLIPSIS; + lexer.skip_token (); + + std::unique_ptr lower_bound ( + new AST::RangePatternBoundQualPath (std::move (path))); + std::unique_ptr upper_bound + = parse_range_pattern_bound (); + + return std::unique_ptr ( + new AST::RangePattern (std::move (lower_bound), + std::move (upper_bound), t->get_locus (), + has_ellipsis_syntax)); + } + else + { + // just qualified path in expression + return std::unique_ptr ( + new AST::QualifiedPathInExpression (std::move (path))); + } + } + case SUPER: + case SELF: + case SELF_ALIAS: + case CRATE: + case SCOPE_RESOLUTION: + case DOLLAR_SIGN: { + // path in expression or range pattern bound + AST::PathInExpression path = parse_path_in_expression (); + + const_TokenPtr next = lexer.peek_token (); + switch (next->get_id ()) + { + case DOT_DOT_EQ: + case ELLIPSIS: { + // qualified range pattern bound, so parse rest of range pattern + bool has_ellipsis_syntax + = lexer.peek_token ()->get_id () == ELLIPSIS; + lexer.skip_token (); + + std::unique_ptr lower_bound ( + new AST::RangePatternBoundPath (std::move (path))); + std::unique_ptr upper_bound + = parse_range_pattern_bound (); + + return std::unique_ptr (new AST::RangePattern ( + std::move (lower_bound), std::move (upper_bound), + Linemap::unknown_location (), has_ellipsis_syntax)); + } + case EXCLAM: + return parse_macro_invocation_partial (std::move (path), + AST::AttrVec ()); + case LEFT_PAREN: { + // tuple struct + lexer.skip_token (); + + // check if empty tuple + if (lexer.peek_token ()->get_id () == RIGHT_PAREN) + { + lexer.skip_token (); + return std::unique_ptr ( + new AST::TupleStructPattern (std::move (path), nullptr)); + } + + // parse items + std::unique_ptr items + = parse_tuple_struct_items (); + if (items == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse tuple struct items"); + add_error (std::move (error)); + + return nullptr; + } + + if (!skip_token (RIGHT_PAREN)) + { + return nullptr; + } + + return std::unique_ptr ( + new AST::TupleStructPattern (std::move (path), + std::move (items))); + } + case LEFT_CURLY: { + // struct + lexer.skip_token (); + + // parse elements (optional) + AST::StructPatternElements elems = parse_struct_pattern_elems (); + + if (!skip_token (RIGHT_CURLY)) + { + return nullptr; + } + + return std::unique_ptr ( + new AST::StructPattern (std::move (path), t->get_locus (), + std::move (elems))); + } + default: + // assume path in expression + return std::unique_ptr ( + new AST::PathInExpression (std::move (path))); + } + } + default: + add_error (Error (t->get_locus (), "unexpected token %qs in pattern", + t->get_token_description ())); + + return nullptr; + } +} + +// Parses a single or double reference pattern. +template +std::unique_ptr +Parser::parse_reference_pattern () +{ + // parse double or single ref + bool is_double_ref = false; + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case AMP: + // still false + lexer.skip_token (); + break; + case LOGICAL_AND: + is_double_ref = true; + lexer.skip_token (); + break; + default: + add_error (Error (t->get_locus (), + "unexpected token %qs in reference pattern", + t->get_token_description ())); + + return nullptr; + } + + // parse mut (if it exists) + bool is_mut = false; + if (lexer.peek_token ()->get_id () == MUT) + { + is_mut = true; + lexer.skip_token (); + } + + // parse pattern to get reference of (required) + std::unique_ptr pattern = parse_pattern (); + if (pattern == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse pattern in reference pattern"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + return std::unique_ptr ( + new AST::ReferencePattern (std::move (pattern), is_mut, is_double_ref, + t->get_locus ())); +} + +/* Parses a grouped pattern or tuple pattern. Prefers grouped over tuple if + * only a single element with no commas. */ +template +std::unique_ptr +Parser::parse_grouped_or_tuple_pattern () +{ + Location paren_locus = lexer.peek_token ()->get_locus (); + skip_token (LEFT_PAREN); + + // detect '..' token (ranged with no lower range) + if (lexer.peek_token ()->get_id () == DOT_DOT) + { + lexer.skip_token (); + + // parse new patterns while next token is a comma + std::vector> patterns; + + const_TokenPtr t = lexer.peek_token (); + while (t->get_id () == COMMA) + { + lexer.skip_token (); + + // break if next token is ')' + if (lexer.peek_token ()->get_id () == RIGHT_PAREN) + { + break; + } + + // parse pattern, which is required + std::unique_ptr pattern = parse_pattern (); + if (pattern == nullptr) + { + Error error ( + lexer.peek_token ()->get_locus (), + "failed to parse pattern inside ranged tuple pattern"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + patterns.push_back (std::move (pattern)); + + t = lexer.peek_token (); + } + + if (!skip_token (RIGHT_PAREN)) + { + // skip somewhere? + return nullptr; + } + + // create ranged tuple pattern items with only upper items + std::unique_ptr items ( + new AST::TuplePatternItemsRanged ( + std::vector> (), std::move (patterns))); + return std::unique_ptr ( + new AST::TuplePattern (std::move (items), paren_locus)); + } + + // parse initial pattern (required) + std::unique_ptr initial_pattern = parse_pattern (); + if (initial_pattern == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse pattern in grouped or tuple pattern"); + add_error (std::move (error)); + + return nullptr; + } + + // branch on whether next token is a comma or not + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case RIGHT_PAREN: + // grouped pattern + lexer.skip_token (); + + return std::unique_ptr ( + new AST::GroupedPattern (std::move (initial_pattern), paren_locus)); + case COMMA: { + // tuple pattern + lexer.skip_token (); + + // create vector of patterns + std::vector> patterns; + patterns.push_back (std::move (initial_pattern)); + + t = lexer.peek_token (); + while (t->get_id () != RIGHT_PAREN && t->get_id () != DOT_DOT) + { + // parse pattern (required) + std::unique_ptr pattern = parse_pattern (); + if (pattern == nullptr) + { + Error error (t->get_locus (), + "failed to parse pattern in tuple pattern"); + add_error (std::move (error)); + + return nullptr; + } + patterns.push_back (std::move (pattern)); + + if (lexer.peek_token ()->get_id () != COMMA) + break; + + lexer.skip_token (); + t = lexer.peek_token (); + } + + t = lexer.peek_token (); + if (t->get_id () == RIGHT_PAREN) + { + // non-ranged tuple pattern + lexer.skip_token (); + + std::unique_ptr items ( + new AST::TuplePatternItemsMultiple (std::move (patterns))); + return std::unique_ptr ( + new AST::TuplePattern (std::move (items), paren_locus)); + } + else if (t->get_id () == DOT_DOT) + { + // ranged tuple pattern + lexer.skip_token (); + + // parse upper patterns + std::vector> upper_patterns; + t = lexer.peek_token (); + while (t->get_id () == COMMA) + { + lexer.skip_token (); + + // break if end + if (lexer.peek_token ()->get_id () == RIGHT_PAREN) + break; + + // parse pattern (required) + std::unique_ptr pattern = parse_pattern (); + if (pattern == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse pattern in tuple pattern"); + add_error (std::move (error)); + + return nullptr; + } + upper_patterns.push_back (std::move (pattern)); + + t = lexer.peek_token (); + } + + if (!skip_token (RIGHT_PAREN)) + { + return nullptr; + } + + std::unique_ptr items ( + new AST::TuplePatternItemsRanged (std::move (patterns), + std::move (upper_patterns))); + return std::unique_ptr ( + new AST::TuplePattern (std::move (items), paren_locus)); + } + else + { + // some kind of error + Error error (t->get_locus (), + "failed to parse tuple pattern (probably) or maybe " + "grouped pattern"); + add_error (std::move (error)); + + return nullptr; + } + } + default: + // error + add_error (Error (t->get_locus (), + "unrecognised token %qs in grouped or tuple pattern " + "after first pattern", + t->get_token_description ())); + + return nullptr; + } +} + +/* Parses a slice pattern that can match arrays or slices. Parses the square + * brackets too. */ +template +std::unique_ptr +Parser::parse_slice_pattern () +{ + Location square_locus = lexer.peek_token ()->get_locus (); + skip_token (LEFT_SQUARE); + + // parse initial pattern (required) + std::unique_ptr initial_pattern = parse_pattern (); + if (initial_pattern == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse initial pattern in slice pattern"); + add_error (std::move (error)); + + return nullptr; + } + + std::vector> patterns; + patterns.push_back (std::move (initial_pattern)); + + const_TokenPtr t = lexer.peek_token (); + while (t->get_id () == COMMA) + { + lexer.skip_token (); + + // break if end bracket + if (lexer.peek_token ()->get_id () == RIGHT_SQUARE) + break; + + // parse pattern (required) + std::unique_ptr pattern = parse_pattern (); + if (pattern == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse pattern in slice pattern"); + add_error (std::move (error)); + + return nullptr; + } + patterns.push_back (std::move (pattern)); + + t = lexer.peek_token (); + } + + if (!skip_token (RIGHT_SQUARE)) + { + return nullptr; + } + + return std::unique_ptr ( + new AST::SlicePattern (std::move (patterns), square_locus)); +} + +/* Parses an identifier pattern (pattern that binds a value matched to a + * variable). */ +template +std::unique_ptr +Parser::parse_identifier_pattern () +{ + Location locus = lexer.peek_token ()->get_locus (); + + bool has_ref = false; + if (lexer.peek_token ()->get_id () == REF) + { + has_ref = true; + lexer.skip_token (); + + // DEBUG + rust_debug ("parsed ref in identifier pattern"); + } + + bool has_mut = false; + if (lexer.peek_token ()->get_id () == MUT) + { + has_mut = true; + lexer.skip_token (); + } + + // parse identifier (required) + const_TokenPtr ident_tok = expect_token (IDENTIFIER); + if (ident_tok == nullptr) + { + // skip somewhere? + return nullptr; + } + Identifier ident = ident_tok->get_str (); + + // DEBUG + rust_debug ("parsed identifier in identifier pattern"); + + // parse optional pattern binding thing + std::unique_ptr bind_pattern = nullptr; + if (lexer.peek_token ()->get_id () == PATTERN_BIND) + { + lexer.skip_token (); + + // parse required pattern to bind + bind_pattern = parse_pattern (); + if (bind_pattern == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse pattern to bind in identifier pattern"); + add_error (std::move (error)); + + return nullptr; + } + } + + // DEBUG + rust_debug ("about to return identifier pattern"); + + return std::unique_ptr ( + new AST::IdentifierPattern (std::move (ident), locus, has_ref, has_mut, + std::move (bind_pattern))); +} + +/* Parses a pattern that opens with an identifier. This includes identifier + * patterns, path patterns (and derivatives such as struct patterns, tuple + * struct patterns, and macro invocations), and ranges. */ +template +std::unique_ptr +Parser::parse_ident_leading_pattern () +{ + // ensure first token is actually identifier + const_TokenPtr initial_tok = lexer.peek_token (); + if (initial_tok->get_id () != IDENTIFIER) + { + return nullptr; + } + + // save initial identifier as it may be useful (but don't skip) + std::string initial_ident = initial_tok->get_str (); + + // parse next tokens as a PathInExpression + AST::PathInExpression path = parse_path_in_expression (); + + // branch on next token + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case EXCLAM: + return parse_macro_invocation_partial (std::move (path), AST::AttrVec ()); + case LEFT_PAREN: { + // tuple struct + lexer.skip_token (); + + // DEBUG + rust_debug ("parsing tuple struct pattern"); + + // check if empty tuple + if (lexer.peek_token ()->get_id () == RIGHT_PAREN) + { + lexer.skip_token (); + return std::unique_ptr ( + new AST::TupleStructPattern (std::move (path), nullptr)); + } + + // parse items + std::unique_ptr items + = parse_tuple_struct_items (); + if (items == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse tuple struct items"); + add_error (std::move (error)); + + return nullptr; + } + + // DEBUG + rust_debug ("successfully parsed tuple struct items"); + + if (!skip_token (RIGHT_PAREN)) + { + return nullptr; + } + + // DEBUG + rust_debug ("successfully parsed tuple struct pattern"); + + return std::unique_ptr ( + new AST::TupleStructPattern (std::move (path), std::move (items))); + } + case LEFT_CURLY: { + // struct + lexer.skip_token (); + + // parse elements (optional) + AST::StructPatternElements elems = parse_struct_pattern_elems (); + + if (!skip_token (RIGHT_CURLY)) + { + return nullptr; + } + + // DEBUG + rust_debug ("successfully parsed struct pattern"); + + return std::unique_ptr ( + new AST::StructPattern (std::move (path), initial_tok->get_locus (), + std::move (elems))); + } + case DOT_DOT_EQ: + case ELLIPSIS: { + // range + bool has_ellipsis_syntax = lexer.peek_token ()->get_id () == ELLIPSIS; + + lexer.skip_token (); + + std::unique_ptr lower_bound ( + new AST::RangePatternBoundPath (std::move (path))); + std::unique_ptr upper_bound + = parse_range_pattern_bound (); + + return std::unique_ptr (new AST::RangePattern ( + std::move (lower_bound), std::move (upper_bound), + Linemap::unknown_location (), has_ellipsis_syntax)); + } + case PATTERN_BIND: { + // only allow on single-segment paths + if (path.is_single_segment ()) + { + // identifier with pattern bind + lexer.skip_token (); + + std::unique_ptr bind_pattern = parse_pattern (); + if (bind_pattern == nullptr) + { + Error error ( + t->get_locus (), + "failed to parse pattern to bind to identifier pattern"); + add_error (std::move (error)); + + return nullptr; + } + return std::unique_ptr ( + new AST::IdentifierPattern (std::move (initial_ident), + initial_tok->get_locus (), false, + false, std::move (bind_pattern))); + } + Error error ( + t->get_locus (), + "failed to parse pattern bind to a path, not an identifier"); + add_error (std::move (error)); + + return nullptr; + } + default: + // assume identifier if single segment + if (path.is_single_segment ()) + { + return std::unique_ptr ( + new AST::IdentifierPattern (std::move (initial_ident), + initial_tok->get_locus ())); + } + // return path otherwise + return std::unique_ptr ( + new AST::PathInExpression (std::move (path))); + } +} + +// Parses tuple struct items if they exist. Does not parse parentheses. +template +std::unique_ptr +Parser::parse_tuple_struct_items () +{ + std::vector> lower_patterns; + + // DEBUG + rust_debug ("started parsing tuple struct items"); + + // check for '..' at front + if (lexer.peek_token ()->get_id () == DOT_DOT) + { + // only parse upper patterns + lexer.skip_token (); + + // DEBUG + rust_debug ("'..' at front in tuple struct items detected"); + + std::vector> upper_patterns; + + const_TokenPtr t = lexer.peek_token (); + while (t->get_id () == COMMA) + { + lexer.skip_token (); + + // break if right paren + if (lexer.peek_token ()->get_id () == RIGHT_PAREN) + break; + + // parse pattern, which is now required + std::unique_ptr pattern = parse_pattern (); + if (pattern == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse pattern in tuple struct items"); + add_error (std::move (error)); + + return nullptr; + } + upper_patterns.push_back (std::move (pattern)); + + t = lexer.peek_token (); + } + + // DEBUG + rust_debug ( + "finished parsing tuple struct items ranged (upper/none only)"); + + return std::unique_ptr ( + new AST::TupleStructItemsRange (std::move (lower_patterns), + std::move (upper_patterns))); + } + + // has at least some lower patterns + const_TokenPtr t = lexer.peek_token (); + while (t->get_id () != RIGHT_PAREN && t->get_id () != DOT_DOT) + { + // DEBUG + rust_debug ("about to parse pattern in tuple struct items"); + + // parse pattern, which is required + std::unique_ptr pattern = parse_pattern (); + if (pattern == nullptr) + { + Error error (t->get_locus (), + "failed to parse pattern in tuple struct items"); + add_error (std::move (error)); + + return nullptr; + } + lower_patterns.push_back (std::move (pattern)); + + // DEBUG + rust_debug ("successfully parsed pattern in tuple struct items"); + + if (lexer.peek_token ()->get_id () != COMMA) + { + // DEBUG + rust_debug ("broke out of parsing patterns in tuple struct " + "items as no comma"); + + break; + } + lexer.skip_token (); + t = lexer.peek_token (); + } + + // branch on next token + t = lexer.peek_token (); + switch (t->get_id ()) + { + case RIGHT_PAREN: + return std::unique_ptr ( + new AST::TupleStructItemsNoRange (std::move (lower_patterns))); + case DOT_DOT: { + // has an upper range that must be parsed separately + lexer.skip_token (); + + std::vector> upper_patterns; + + t = lexer.peek_token (); + while (t->get_id () == COMMA) + { + lexer.skip_token (); + + // break if next token is right paren + if (lexer.peek_token ()->get_id () == RIGHT_PAREN) + break; + + // parse pattern, which is required + std::unique_ptr pattern = parse_pattern (); + if (pattern == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse pattern in tuple struct items"); + add_error (std::move (error)); + + return nullptr; + } + upper_patterns.push_back (std::move (pattern)); + + t = lexer.peek_token (); + } + + return std::unique_ptr ( + new AST::TupleStructItemsRange (std::move (lower_patterns), + std::move (upper_patterns))); + } + default: + // error + add_error (Error (t->get_locus (), + "unexpected token %qs in tuple struct items", + t->get_token_description ())); + + return nullptr; + } +} + +// Parses struct pattern elements if they exist. +template +AST::StructPatternElements +Parser::parse_struct_pattern_elems () +{ + std::vector> fields; + + AST::AttrVec etc_attrs; + bool has_etc = false; + + // try parsing struct pattern fields + const_TokenPtr t = lexer.peek_token (); + while (t->get_id () != RIGHT_CURLY) + { + AST::AttrVec outer_attrs = parse_outer_attributes (); + + // parse etc (must be last in struct pattern, so breaks) + if (lexer.peek_token ()->get_id () == DOT_DOT) + { + lexer.skip_token (); + etc_attrs = std::move (outer_attrs); + has_etc = true; + break; + } + + std::unique_ptr field + = parse_struct_pattern_field_partial (std::move (outer_attrs)); + if (field == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse struct pattern field"); + add_error (std::move (error)); + + // skip after somewhere? + return AST::StructPatternElements::create_empty (); + } + fields.push_back (std::move (field)); + + if (lexer.peek_token ()->get_id () != COMMA) + break; + + // skip comma + lexer.skip_token (); + t = lexer.peek_token (); + } + + if (has_etc) + return AST::StructPatternElements (std::move (fields), + std::move (etc_attrs)); + else + return AST::StructPatternElements (std::move (fields)); +} + +/* Parses a struct pattern field (tuple index/pattern, identifier/pattern, or + * identifier). */ +template +std::unique_ptr +Parser::parse_struct_pattern_field () +{ + // parse outer attributes (if they exist) + AST::AttrVec outer_attrs = parse_outer_attributes (); + + return parse_struct_pattern_field_partial (std::move (outer_attrs)); +} + +/* Parses a struct pattern field (tuple index/pattern, identifier/pattern, or + * identifier), with outer attributes passed in. */ +template +std::unique_ptr +Parser::parse_struct_pattern_field_partial ( + AST::AttrVec outer_attrs) +{ + // branch based on next token + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case INT_LITERAL: { + // tuple index + std::string index_str = t->get_str (); + int index = atoi (index_str.c_str ()); + + if (!skip_token (COLON)) + { + return nullptr; + } + + // parse required pattern + std::unique_ptr pattern = parse_pattern (); + if (pattern == nullptr) + { + Error error ( + t->get_locus (), + "failed to parse pattern in tuple index struct pattern field"); + add_error (std::move (error)); + + return nullptr; + } + + return std::unique_ptr ( + new AST::StructPatternFieldTuplePat (index, std::move (pattern), + std::move (outer_attrs), + t->get_locus ())); + } + case IDENTIFIER: + // identifier-pattern OR only identifier + // branch on next token + switch (lexer.peek_token (1)->get_id ()) + { + case COLON: { + // identifier-pattern + Identifier ident = t->get_str (); + lexer.skip_token (); + + skip_token (COLON); + + // parse required pattern + std::unique_ptr pattern = parse_pattern (); + if (pattern == nullptr) + { + Error error (t->get_locus (), + "failed to parse pattern in struct pattern field"); + add_error (std::move (error)); + + return nullptr; + } + + return std::unique_ptr ( + new AST::StructPatternFieldIdentPat (std::move (ident), + std::move (pattern), + std::move (outer_attrs), + t->get_locus ())); + } + case COMMA: + case RIGHT_CURLY: { + // identifier only + Identifier ident = t->get_str (); + lexer.skip_token (); + + return std::unique_ptr ( + new AST::StructPatternFieldIdent (std::move (ident), false, false, + std::move (outer_attrs), + t->get_locus ())); + } + default: + // error + add_error (Error (t->get_locus (), + "unrecognised token %qs in struct pattern field", + t->get_token_description ())); + + return nullptr; + } + case REF: + case MUT: { + // only identifier + bool has_ref = false; + if (t->get_id () == REF) + { + has_ref = true; + lexer.skip_token (); + } + + bool has_mut = false; + if (lexer.peek_token ()->get_id () == MUT) + { + has_mut = true; + lexer.skip_token (); + } + + const_TokenPtr ident_tok = expect_token (IDENTIFIER); + if (ident_tok == nullptr) + { + return nullptr; + } + Identifier ident = ident_tok->get_str (); + + return std::unique_ptr ( + new AST::StructPatternFieldIdent (std::move (ident), has_ref, has_mut, + std::move (outer_attrs), + t->get_locus ())); + } + default: + // not necessarily an error + return nullptr; + } +} + +template +ExprOrStmt +Parser::parse_stmt_or_expr_with_block ( + AST::AttrVec outer_attrs) +{ + auto expr = parse_expr_with_block (std::move (outer_attrs)); + if (expr == nullptr) + return ExprOrStmt::create_error (); + + auto tok = lexer.peek_token (); + + // tail expr in a block expr + if (tok->get_id () == RIGHT_CURLY) + return ExprOrStmt (std::move (expr)); + + // internal block expr must either have semicolons followed, or evaluate to + // () + auto locus = expr->get_locus (); + std::unique_ptr stmt ( + new AST::ExprStmtWithBlock (std::move (expr), locus, + tok->get_id () == SEMICOLON)); + if (tok->get_id () == SEMICOLON) + lexer.skip_token (); + + return ExprOrStmt (std::move (stmt)); +} + +/* Parses a statement or expression (depending on whether a trailing semicolon + * exists). Useful for block expressions where it cannot be determined through + * lookahead whether it is a statement or expression to be parsed. */ +template +ExprOrStmt +Parser::parse_stmt_or_expr_without_block () +{ + // quick exit for empty statement + const_TokenPtr t = lexer.peek_token (); + if (t->get_id () == SEMICOLON) + { + lexer.skip_token (); + std::unique_ptr stmt ( + new AST::EmptyStmt (t->get_locus ())); + return ExprOrStmt (std::move (stmt)); + } + + // parse outer attributes + AST::AttrVec outer_attrs = parse_outer_attributes (); + + // parsing this will be annoying because of the many different possibilities + /* best may be just to copy paste in parse_item switch, and failing that try + * to parse outer attributes, and then pass them in to either a let + * statement or (fallback) expression statement. */ + // FIXME: think of a way to do this without such a large switch? + + /* FIXME: for expressions at least, the only way that they can really be + * parsed properly in this way is if they don't support operators on them. + * They must be pratt-parsed otherwise. As such due to composability, only + * explicit statements will have special cases here. This should roughly + * correspond to "expr-with-block", but this warning is here in case it + * isn't the case. */ + t = lexer.peek_token (); + switch (t->get_id ()) + { + case LET: { + // let statement + std::unique_ptr stmt ( + parse_let_stmt (std::move (outer_attrs))); + return ExprOrStmt (std::move (stmt)); + } + case PUB: + case MOD: + case EXTERN_TOK: + case USE: + case FN_TOK: + case TYPE: + case STRUCT_TOK: + case ENUM_TOK: + case CONST: + case STATIC_TOK: + case TRAIT: + case IMPL: { + std::unique_ptr item ( + parse_vis_item (std::move (outer_attrs))); + return ExprOrStmt (std::move (item)); + } + /* TODO: implement union keyword but not really because of + * context-dependence crappy hack way to parse a union written below to + * separate it from the good code. */ + // case UNION: + case UNSAFE: { // maybe - unsafe traits are a thing + /* if any of these (should be all possible VisItem prefixes), parse a + * VisItem - can't parse item because would require reparsing outer + * attributes */ + const_TokenPtr t2 = lexer.peek_token (1); + switch (t2->get_id ()) + { + case LEFT_CURLY: { + // unsafe block + return parse_stmt_or_expr_with_block (std::move (outer_attrs)); + } + case TRAIT: { + // unsafe trait + std::unique_ptr item ( + parse_vis_item (std::move (outer_attrs))); + return ExprOrStmt (std::move (item)); + } + case EXTERN_TOK: + case FN_TOK: { + // unsafe function + std::unique_ptr item ( + parse_vis_item (std::move (outer_attrs))); + return ExprOrStmt (std::move (item)); + } + case IMPL: { + // unsafe trait impl + std::unique_ptr item ( + parse_vis_item (std::move (outer_attrs))); + return ExprOrStmt (std::move (item)); + } + default: + add_error (Error (t2->get_locus (), + "unrecognised token %qs after parsing unsafe - " + "expected beginning of expression or statement", + t->get_token_description ())); + + // skip somewhere? + return ExprOrStmt::create_error (); + } + } + case SUPER: + case SELF: + case CRATE: + case DOLLAR_SIGN: { + /* path-based thing so struct/enum or path or macro invocation of a + * kind. however, the expressions are composable (i think) */ + + std::unique_ptr expr + = parse_expr_without_block (); + + if (lexer.peek_token ()->get_id () == SEMICOLON) + { + // must be expression statement + lexer.skip_token (); + + std::unique_ptr stmt ( + new AST::ExprStmtWithoutBlock (std::move (expr), + t->get_locus ())); + return ExprOrStmt (std::move (stmt)); + } + + // return expression + return ExprOrStmt (std::move (expr)); + } + /* FIXME: this is either a macro invocation or macro invocation semi. + * start parsing to determine which one it is. */ + // FIXME: or this is another path-based thing - struct/enum or path + // itself return parse_path_based_stmt_or_expr(std::move(outer_attrs)); + // FIXME: old code there + case LOOP: + case WHILE: + case FOR: + case IF: + case MATCH_TOK: + case LEFT_CURLY: + case ASYNC: { + return parse_stmt_or_expr_with_block (std::move (outer_attrs)); + } + case LIFETIME: { + /* FIXME: are there any expressions without blocks that can have + * lifetime as their first token? Or is loop expr the only one? */ + // safe side for now: + const_TokenPtr t2 = lexer.peek_token (2); + if (lexer.peek_token (1)->get_id () == COLON + && (t2->get_id () == LOOP || t2->get_id () == WHILE + || t2->get_id () == FOR)) + { + return parse_stmt_or_expr_with_block (std::move (outer_attrs)); + } + else + { + // should be expr without block + std::unique_ptr expr + = parse_expr_without_block (std::move (outer_attrs)); + + if (lexer.peek_token ()->get_id () == SEMICOLON) + { + // must be expression statement + lexer.skip_token (); + + std::unique_ptr stmt ( + new AST::ExprStmtWithoutBlock (std::move (expr), + t->get_locus ())); + return ExprOrStmt (std::move (stmt)); + } + + // return expression + return ExprOrStmt (std::move (expr)); + } + } + // crappy hack to do union "keyword" + case IDENTIFIER: + if (t->get_str () == "union" + && lexer.peek_token (1)->get_id () == IDENTIFIER) + { + std::unique_ptr item ( + parse_vis_item (std::move (outer_attrs))); + return ExprOrStmt (std::move (item)); + // or should this go straight to parsing union? + } + else if (t->get_str () == "macro_rules") + { + // macro_rules! macro item + std::unique_ptr item ( + parse_macro_item (std::move (outer_attrs))); + return ExprOrStmt (std::move (item)); + } + else if (lexer.peek_token (1)->get_id () == SCOPE_RESOLUTION + || lexer.peek_token (1)->get_id () == EXCLAM + || lexer.peek_token (1)->get_id () == LEFT_CURLY) + { + /* path (probably) or macro invocation or struct or enum, so + * probably a macro invocation semi decide how to parse - probably + * parse path and then get macro from it */ + + // FIXME: old code was good until composability was required + // return parse_path_based_stmt_or_expr(std::move(outer_attrs)); + std::unique_ptr expr + = parse_expr_without_block (std::move (outer_attrs)); + + if (lexer.peek_token ()->get_id () == SEMICOLON) + { + // must be expression statement + lexer.skip_token (); + + std::unique_ptr stmt ( + new AST::ExprStmtWithoutBlock (std::move (expr), + t->get_locus ())); + return ExprOrStmt (std::move (stmt)); + } + + // return expression + return ExprOrStmt (std::move (expr)); + } + gcc_fallthrough (); + // TODO: find out how to disable gcc "implicit fallthrough" warning + default: { + /* expression statement (without block) or expression itself - parse + * expression then make it statement if semi afterwards */ + + std::unique_ptr expr + = parse_expr_without_block (std::move (outer_attrs)); + + if (lexer.peek_token ()->get_id () == SEMICOLON) + { + // must be expression statement + lexer.skip_token (); + + std::unique_ptr stmt ( + new AST::ExprStmtWithoutBlock (std::move (expr), + t->get_locus ())); + return ExprOrStmt (std::move (stmt)); + } + + // return expression + return ExprOrStmt (std::move (expr)); + } + } +} + +/* Parses a statement or expression beginning with a path (i.e. macro, + * struct/enum, or path expr) */ +template +ExprOrStmt +Parser::parse_path_based_stmt_or_expr ( + AST::AttrVec outer_attrs) +{ + // attempt to parse path + Location stmt_or_expr_loc = lexer.peek_token ()->get_locus (); + AST::PathInExpression path = parse_path_in_expression (); + + // branch on next token + const_TokenPtr t2 = lexer.peek_token (); + switch (t2->get_id ()) + { + case EXCLAM: { + /* macro invocation or macro invocation semi - depends on whether + * there is a final ';' */ + // convert path in expr to simple path (as that is used in macros) + AST::SimplePath macro_path = path.as_simple_path (); + if (macro_path.is_empty ()) + { + Error error (t2->get_locus (), + "failed to convert parsed path to simple " + "path (for macro invocation or semi)"); + add_error (std::move (error)); + + return ExprOrStmt::create_error (); + } + + // skip exclamation mark + lexer.skip_token (); + + const_TokenPtr t3 = lexer.peek_token (); + Location tok_tree_loc = t3->get_locus (); + + AST::DelimType type = AST::PARENS; + switch (t3->get_id ()) + { + case LEFT_PAREN: + type = AST::PARENS; + break; + case LEFT_SQUARE: + type = AST::SQUARE; + break; + case LEFT_CURLY: + type = AST::CURLY; + break; + default: + add_error ( + Error (t3->get_locus (), + "unrecognised token %qs in macro invocation - (opening) " + "delimiter expected", + t3->get_token_description ())); + + return ExprOrStmt::create_error (); + } + lexer.skip_token (); + + // parse actual token trees + std::vector> token_trees; + auto delim_open + = std::unique_ptr (new AST::Token (std::move (t3))); + token_trees.push_back (std::move (delim_open)); + + t3 = lexer.peek_token (); + // parse token trees until the initial delimiter token is found again + while (!token_id_matches_delims (t3->get_id (), type)) + { + std::unique_ptr tree = parse_token_tree (); + + if (tree == nullptr) + { + Error error (t3->get_locus (), + "failed to parse token tree for macro " + "invocation (or semi) - " + "found %qs", + t3->get_token_description ()); + add_error (std::move (error)); + + return ExprOrStmt::create_error (); + } + + token_trees.push_back (std::move (tree)); + + t3 = lexer.peek_token (); + } + + auto delim_close + = std::unique_ptr (new AST::Token (std::move (t3))); + token_trees.push_back (std::move (delim_close)); + + // parse end delimiters + t3 = lexer.peek_token (); + if (token_id_matches_delims (t3->get_id (), type)) + { + // tokens match opening delimiter, so skip. + lexer.skip_token (); + + /* with curly bracketed macros, assume it is a macro invocation + * unless a semicolon is explicitly put at the end. this is not + * necessarily true (i.e. context-dependence) and so may have to + * be fixed up via HACKs in semantic analysis (by checking whether + * it is the last elem in the vector). */ + + AST::DelimTokenTree delim_tok_tree (type, std::move (token_trees), + tok_tree_loc); + AST::MacroInvocData invoc_data (std::move (macro_path), + std::move (delim_tok_tree)); + + if (lexer.peek_token ()->get_id () == SEMICOLON) + { + lexer.skip_token (); + + std::unique_ptr stmt ( + new AST::MacroInvocation (std::move (invoc_data), + std::move (outer_attrs), + stmt_or_expr_loc, true)); + return ExprOrStmt (std::move (stmt)); + } + + // otherwise, create macro invocation + std::unique_ptr expr ( + new AST::MacroInvocation (std::move (invoc_data), + std::move (outer_attrs), + stmt_or_expr_loc, false)); + return ExprOrStmt (std::move (expr)); + } + else + { + // tokens don't match opening delimiters, so produce error + Error error ( + t2->get_locus (), + "unexpected token %qs - expecting closing delimiter %qs (for a " + "macro invocation)", + t2->get_token_description (), + (type == AST::PARENS ? ")" : (type == AST::SQUARE ? "]" : "}"))); + add_error (std::move (error)); + + return ExprOrStmt::create_error (); + } + } + case LEFT_CURLY: { + /* definitely not a block: + * path '{' ident ',' + * path '{' ident ':' [anything] ',' + * path '{' ident ':' [not a type] + * otherwise, assume block expr and thus path */ + bool not_a_block = lexer.peek_token (1)->get_id () == IDENTIFIER + && (lexer.peek_token (2)->get_id () == COMMA + || (lexer.peek_token (2)->get_id () == COLON + && (lexer.peek_token (4)->get_id () == COMMA + || !can_tok_start_type ( + lexer.peek_token (3)->get_id ())))); + std::unique_ptr expr = nullptr; + + if (not_a_block) + { + /* assume struct expr struct (as struct-enum disambiguation + * requires name lookup) again, make statement if final ';' */ + expr = parse_struct_expr_struct_partial (std::move (path), + std::move (outer_attrs)); + if (expr == nullptr) + { + Error error (t2->get_locus (), + "failed to parse struct expr struct"); + add_error (std::move (error)); + + return ExprOrStmt::create_error (); + } + } + else + { + // assume path - make statement if final ';' + // lexer.skip_token(); + + // HACK: add outer attrs to path + path.set_outer_attrs (std::move (outer_attrs)); + expr = std::unique_ptr ( + new AST::PathInExpression (std::move (path))); + } + + // determine if statement if ends with semicolon + if (lexer.peek_token ()->get_id () == SEMICOLON) + { + // statement + lexer.skip_token (); + std::unique_ptr stmt ( + new AST::ExprStmtWithoutBlock (std::move (expr), + stmt_or_expr_loc)); + return ExprOrStmt (std::move (stmt)); + } + + // otherwise, expression + return ExprOrStmt (std::move (expr)); + } + case LEFT_PAREN: { + /* assume struct expr tuple (as struct-enum disambiguation requires + * name lookup) again, make statement if final ';' */ + std::unique_ptr struct_expr + = parse_struct_expr_tuple_partial (std::move (path), + std::move (outer_attrs)); + if (struct_expr == nullptr) + { + Error error (t2->get_locus (), "failed to parse struct expr tuple"); + add_error (std::move (error)); + + return ExprOrStmt::create_error (); + } + + // determine if statement if ends with semicolon + if (lexer.peek_token ()->get_id () == SEMICOLON) + { + // statement + lexer.skip_token (); + std::unique_ptr stmt ( + new AST::ExprStmtWithoutBlock (std::move (struct_expr), + stmt_or_expr_loc)); + return ExprOrStmt (std::move (stmt)); + } + + // otherwise, expression + return ExprOrStmt (std::move (struct_expr)); + } + default: { + // assume path - make statement if final ';' + // lexer.skip_token(); + + // HACK: replace outer attributes in path + path.set_outer_attrs (std::move (outer_attrs)); + std::unique_ptr expr ( + new AST::PathInExpression (std::move (path))); + + if (lexer.peek_token ()->get_id () == SEMICOLON) + { + lexer.skip_token (); + + std::unique_ptr stmt ( + new AST::ExprStmtWithoutBlock (std::move (expr), + stmt_or_expr_loc)); + return ExprOrStmt (std::move (stmt)); + } + + return ExprOrStmt (std::move (expr)); + } + } +} + +// Parses a struct expression field. +template +std::unique_ptr +Parser::parse_struct_expr_field () +{ + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case IDENTIFIER: + if (lexer.peek_token (1)->get_id () == COLON) + { + // struct expr field with identifier and expr + Identifier ident = t->get_str (); + lexer.skip_token (1); + + // parse expression (required) + std::unique_ptr expr = parse_expr (); + if (expr == nullptr) + { + Error error (t->get_locus (), + "failed to parse struct expression field with " + "identifier and expression"); + add_error (std::move (error)); + + return nullptr; + } + + return std::unique_ptr ( + new AST::StructExprFieldIdentifierValue (std::move (ident), + std::move (expr), + t->get_locus ())); + } + else + { + // struct expr field with identifier only + Identifier ident = t->get_str (); + lexer.skip_token (); + + return std::unique_ptr ( + new AST::StructExprFieldIdentifier (std::move (ident), + t->get_locus ())); + } + case INT_LITERAL: { + // parse tuple index field + int index = atoi (t->get_str ().c_str ()); + lexer.skip_token (); + + if (!skip_token (COLON)) + { + // skip somewhere? + return nullptr; + } + + // parse field expression (required) + std::unique_ptr expr = parse_expr (); + if (expr == nullptr) + { + Error error (t->get_locus (), + "failed to parse expr in struct (or enum) expr " + "field with tuple index"); + add_error (std::move (error)); + + return nullptr; + } + + return std::unique_ptr ( + new AST::StructExprFieldIndexValue (index, std::move (expr), + t->get_locus ())); + } + case DOT_DOT: + /* this is a struct base and can't be parsed here, so just return + * nothing without erroring */ + + return nullptr; + default: + add_error ( + Error (t->get_locus (), + "unrecognised token %qs as first token of struct expr field - " + "expected identifier or integer literal", + t->get_token_description ())); + + return nullptr; + } +} + +// Parses a macro invocation or macro invocation semi. +template +ExprOrStmt +Parser::parse_macro_invocation_maybe_semi ( + AST::AttrVec outer_attrs) +{ + Location macro_locus = lexer.peek_token ()->get_locus (); + AST::SimplePath macro_path = parse_simple_path (); + if (macro_path.is_empty ()) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse simple path in macro invocation or semi"); + add_error (std::move (error)); + + return ExprOrStmt::create_error (); + } + + if (!skip_token (EXCLAM)) + { + return ExprOrStmt::create_error (); + } + + const_TokenPtr t3 = lexer.peek_token (); + Location tok_tree_loc = t3->get_locus (); + + AST::DelimType type = AST::PARENS; + switch (t3->get_id ()) + { + case LEFT_PAREN: + type = AST::PARENS; + break; + case LEFT_SQUARE: + type = AST::SQUARE; + break; + case LEFT_CURLY: + type = AST::CURLY; + break; + default: + add_error ( + Error (t3->get_locus (), + "unrecognised token %qs in macro invocation - (opening) " + "delimiter expected", + t3->get_token_description ())); + + return ExprOrStmt::create_error (); + } + lexer.skip_token (); + + // parse actual token trees + std::vector> token_trees; + auto delim_open + = std::unique_ptr (new AST::Token (std::move (t3))); + token_trees.push_back (std::move (delim_open)); + + t3 = lexer.peek_token (); + // parse token trees until the initial delimiter token is found again + while (!token_id_matches_delims (t3->get_id (), type)) + { + std::unique_ptr tree = parse_token_tree (); + + if (tree == nullptr) + { + Error error (t3->get_locus (), + "failed to parse token tree for macro invocation (or " + "semi) - found %qs", + t3->get_token_description ()); + add_error (std::move (error)); + + return ExprOrStmt::create_error (); + } + + token_trees.push_back (std::move (tree)); + + t3 = lexer.peek_token (); + } + auto delim_close + = std::unique_ptr (new AST::Token (std::move (t3))); + token_trees.push_back (std::move (delim_close)); + + // parse end delimiters + t3 = lexer.peek_token (); + if (token_id_matches_delims (t3->get_id (), type)) + { + // tokens match opening delimiter, so skip. + lexer.skip_token (); + + /* with curly bracketed macros, assume it is a macro invocation unless + * a semicolon is explicitly put at the end. this is not necessarily + * true (i.e. context-dependence) and so may have to be fixed up via + * HACKs in semantic analysis (by checking whether it is the last elem + * in the vector). */ + + AST::DelimTokenTree delim_tok_tree (type, std::move (token_trees), + tok_tree_loc); + AST::MacroInvocData invoc_data (std::move (macro_path), + std::move (delim_tok_tree)); + + if (lexer.peek_token ()->get_id () == SEMICOLON) + { + lexer.skip_token (); + + std::unique_ptr stmt ( + new AST::MacroInvocation (std::move (invoc_data), + std::move (outer_attrs), macro_locus, + true)); + return ExprOrStmt (std::move (stmt)); + } + + // otherwise, create macro invocation + std::unique_ptr expr ( + new AST::MacroInvocation (std::move (invoc_data), + std::move (outer_attrs), macro_locus)); + return ExprOrStmt (std::move (expr)); + } + else + { + const_TokenPtr t = lexer.peek_token (); + // tokens don't match opening delimiters, so produce error + Error error ( + t->get_locus (), + "unexpected token %qs - expecting closing delimiter %qs (for a " + "macro invocation)", + t->get_token_description (), + (type == AST::PARENS ? ")" : (type == AST::SQUARE ? "]" : "}"))); + add_error (std::move (error)); + + return ExprOrStmt::create_error (); + } +} + +// "Unexpected token" panic mode - flags gcc error at unexpected token +template +void +Parser::unexpected_token (const_TokenPtr t) +{ + Error error (t->get_locus (), "unexpected token %qs\n", + t->get_token_description ()); + add_error (std::move (error)); +} + +/* Crappy "error recovery" performed after error by skipping tokens until a + * semi-colon is found */ +template +void +Parser::skip_after_semicolon () +{ + const_TokenPtr t = lexer.peek_token (); + + while (t->get_id () != END_OF_FILE && t->get_id () != SEMICOLON) + { + lexer.skip_token (); + t = lexer.peek_token (); + } + + if (t->get_id () == SEMICOLON) + lexer.skip_token (); +} + +/* Checks if current token has inputted id - skips it and returns true if so, + * diagnoses an error and returns false otherwise. */ +template +bool +Parser::skip_token (TokenId token_id) +{ + return expect_token (token_id) != const_TokenPtr (); +} + +/* Checks if current token has inputted id - skips it and returns true if so, + * returns false otherwise without diagnosing an error */ +template +bool +Parser::maybe_skip_token (TokenId token_id) +{ + if (lexer.peek_token ()->get_id () != token_id) + return false; + else + return skip_token (token_id); +} + +/* Checks the current token - if id is same as expected, skips and returns it, + * otherwise diagnoses error and returns null. */ +template +const_TokenPtr +Parser::expect_token (TokenId token_id) +{ + const_TokenPtr t = lexer.peek_token (); + if (t->get_id () == token_id) + { + lexer.skip_token (); + return t; + } + else + { + Error error (t->get_locus (), "expecting %qs but %qs found", + get_token_description (token_id), + t->get_token_description ()); + add_error (std::move (error)); + + return const_TokenPtr (); + } +} + +// Skips all tokens until EOF or }. Don't use. +template +void +Parser::skip_after_end () +{ + const_TokenPtr t = lexer.peek_token (); + + while (t->get_id () != END_OF_FILE && t->get_id () != RIGHT_CURLY) + { + lexer.skip_token (); + t = lexer.peek_token (); + } + + if (t->get_id () == RIGHT_CURLY) + { + lexer.skip_token (); + } +} + +/* A slightly more aware error-handler that skips all tokens until it reaches + * the end of the block scope (i.e. when left curly brackets = right curly + * brackets). Note: assumes currently in the middle of a block. Use + * skip_after_next_block to skip based on the assumption that the block + * has not been entered yet. */ +template +void +Parser::skip_after_end_block () +{ + const_TokenPtr t = lexer.peek_token (); + int curly_count = 1; + + while (curly_count > 0 && t->get_id () != END_OF_FILE) + { + switch (t->get_id ()) + { + case LEFT_CURLY: + curly_count++; + break; + case RIGHT_CURLY: + curly_count--; + break; + default: + break; + } + lexer.skip_token (); + t = lexer.peek_token (); + } +} + +/* Skips tokens until the end of the next block. i.e. assumes that the block + * has not been entered yet. */ +template +void +Parser::skip_after_next_block () +{ + const_TokenPtr t = lexer.peek_token (); + + // initial loop - skip until EOF if no left curlies encountered + while (t->get_id () != END_OF_FILE && t->get_id () != LEFT_CURLY) + { + lexer.skip_token (); + + t = lexer.peek_token (); + } + + // if next token is left, skip it and then skip after the block ends + if (t->get_id () == LEFT_CURLY) + { + lexer.skip_token (); + + skip_after_end_block (); + } + // otherwise, do nothing as EOF +} + +/* Skips all tokens until ] (the end of an attribute) - does not skip the ] + * (as designed for attribute body use) */ +template +void +Parser::skip_after_end_attribute () +{ + const_TokenPtr t = lexer.peek_token (); + + while (t->get_id () != RIGHT_SQUARE) + { + lexer.skip_token (); + t = lexer.peek_token (); + } + + // Don't skip the RIGHT_SQUARE token +} + +/* Pratt parser impl of parse_expr. FIXME: this is only provisional and + * probably will be changed. + * FIXME: this may only parse expressions without blocks as they are the only + * expressions to have precedence? */ +template +std::unique_ptr +Parser::parse_expr (int right_binding_power, + AST::AttrVec outer_attrs, + ParseRestrictions restrictions) +{ + const_TokenPtr current_token = lexer.peek_token (); + // Special hack because we are allowed to return nullptr, in that case we + // don't want to skip the token, since we don't actually parse it. But if + // null isn't allowed it indicates an error, and we want to skip past that. + // So return early if it is one of the tokens that ends an expression + // (or at least cannot start a new expression). + if (restrictions.expr_can_be_null) + { + TokenId id = current_token->get_id (); + if (id == SEMICOLON || id == RIGHT_PAREN || id == RIGHT_CURLY + || id == RIGHT_SQUARE) + return nullptr; + } + lexer.skip_token (); + + // parse null denotation (unary part of expression) + std::unique_ptr expr + = null_denotation (current_token, {}, restrictions); + + if (expr == nullptr) + { + // DEBUG + rust_debug ("null denotation is null; returning null for parse_expr"); + return nullptr; + } + + // stop parsing if find lower priority token - parse higher priority first + while (right_binding_power < left_binding_power (lexer.peek_token ())) + { + current_token = lexer.peek_token (); + lexer.skip_token (); + + expr = left_denotation (current_token, std::move (expr), + std::move (outer_attrs), restrictions); + + if (expr == nullptr) + { + // DEBUG + rust_debug ("left denotation is null; returning null for parse_expr"); + + return nullptr; + } + } + + return expr; +} + +// Parse expression with lowest left binding power. +template +std::unique_ptr +Parser::parse_expr (AST::AttrVec outer_attrs, + ParseRestrictions restrictions) +{ + return parse_expr (LBP_LOWEST, std::move (outer_attrs), restrictions); +} + +/* Determines action to take when finding token at beginning of expression. + * FIXME: this may only apply to precedence-capable expressions (which are all + * expressions without blocks), so make return type ExprWithoutBlock? It would + * simplify stuff. */ +template +std::unique_ptr +Parser::null_denotation (const_TokenPtr tok, + AST::AttrVec outer_attrs, + ParseRestrictions restrictions) +{ + /* note: tok is previous character in input stream, not current one, as + * parse_expr skips it before passing it in */ + + /* as a Pratt parser (which works by decomposing expressions into a null + * denotation and then a left denotation), null denotations handle primaries + * and unary operands (but only prefix unary operands) */ + + switch (tok->get_id ()) + { + case IDENTIFIER: { + // DEBUG + rust_debug ("beginning null denotation identifier handling"); + + /* best option: parse as path, then extract identifier, macro, + * struct/enum, or just path info from it */ + AST::PathInExpression path = parse_path_in_expression_pratt (tok); + + // DEBUG: + rust_debug ("finished null denotation identifier path parsing - " + "next is branching"); + + // branch on next token + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case EXCLAM: + // macro + return parse_macro_invocation_partial (std::move (path), + std::move (outer_attrs), + restrictions); + case LEFT_CURLY: { + bool not_a_block + = lexer.peek_token (1)->get_id () == IDENTIFIER + && (lexer.peek_token (2)->get_id () == COMMA + || (lexer.peek_token (2)->get_id () == COLON + && (lexer.peek_token (4)->get_id () == COMMA + || !can_tok_start_type ( + lexer.peek_token (3)->get_id ())))); + + /* definitely not a block: + * path '{' ident ',' + * path '{' ident ':' [anything] ',' + * path '{' ident ':' [not a type] + * otherwise, assume block expr and thus path */ + // DEBUG + rust_debug ("values of lookahead: '%s' '%s' '%s' '%s' ", + lexer.peek_token (1)->get_token_description (), + lexer.peek_token (2)->get_token_description (), + lexer.peek_token (3)->get_token_description (), + lexer.peek_token (4)->get_token_description ()); + + rust_debug ("can be struct expr: '%s', not a block: '%s'", + restrictions.can_be_struct_expr ? "true" : "false", + not_a_block ? "true" : "false"); + + // struct/enum expr struct + if (!restrictions.can_be_struct_expr && !not_a_block) + { + // HACK: add outer attrs to path + path.set_outer_attrs (std::move (outer_attrs)); + return std::unique_ptr ( + new AST::PathInExpression (std::move (path))); + } + return parse_struct_expr_struct_partial (std::move (path), + std::move (outer_attrs)); + } + case LEFT_PAREN: + // struct/enum expr tuple + if (!restrictions.can_be_struct_expr) + { + // HACK: add outer attrs to path + path.set_outer_attrs (std::move (outer_attrs)); + return std::unique_ptr ( + new AST::PathInExpression (std::move (path))); + } + return parse_struct_expr_tuple_partial (std::move (path), + std::move (outer_attrs)); + default: + // assume path is returned if not single segment + if (path.is_single_segment ()) + { + // have to return an identifier expression or something, idk + /* HACK: may have to become permanent, but this is my current + * identifier expression */ + return std::unique_ptr ( + new AST::IdentifierExpr (tok->get_str (), {}, + tok->get_locus ())); + } + // HACK: add outer attrs to path + path.set_outer_attrs (std::move (outer_attrs)); + return std::unique_ptr ( + new AST::PathInExpression (std::move (path))); + } + gcc_unreachable (); + } + /* FIXME: delegate to parse_literal_expr instead? would have to rejig + * tokens and whatever. */ + /* FIXME: could also be path expression (and hence macro expression, + * struct/enum expr) */ + case LEFT_ANGLE: { + // qualified path + // HACK: add outer attrs to path + AST::QualifiedPathInExpression path + = parse_qualified_path_in_expression (tok->get_locus ()); + path.set_outer_attrs (std::move (outer_attrs)); + return std::unique_ptr ( + new AST::QualifiedPathInExpression (std::move (path))); + } + // FIXME: for literal exprs, should outer attrs be passed in or just + // ignored? + case INT_LITERAL: + // we should check the range, but ignore for now + // encode as int? + return std::unique_ptr ( + new AST::LiteralExpr (tok->get_str (), AST::Literal::INT, + tok->get_type_hint (), {}, tok->get_locus ())); + case FLOAT_LITERAL: + // encode as float? + return std::unique_ptr ( + new AST::LiteralExpr (tok->get_str (), AST::Literal::FLOAT, + tok->get_type_hint (), {}, tok->get_locus ())); + case STRING_LITERAL: + return std::unique_ptr ( + new AST::LiteralExpr (tok->get_str (), AST::Literal::STRING, + tok->get_type_hint (), {}, tok->get_locus ())); + case BYTE_STRING_LITERAL: + return std::unique_ptr ( + new AST::LiteralExpr (tok->get_str (), AST::Literal::BYTE_STRING, + tok->get_type_hint (), {}, tok->get_locus ())); + case CHAR_LITERAL: + return std::unique_ptr ( + new AST::LiteralExpr (tok->get_str (), AST::Literal::CHAR, + tok->get_type_hint (), {}, tok->get_locus ())); + case BYTE_CHAR_LITERAL: + return std::unique_ptr ( + new AST::LiteralExpr (tok->get_str (), AST::Literal::BYTE, + tok->get_type_hint (), {}, tok->get_locus ())); + case TRUE_LITERAL: + return std::unique_ptr ( + new AST::LiteralExpr ("true", AST::Literal::BOOL, tok->get_type_hint (), + {}, tok->get_locus ())); + case FALSE_LITERAL: + return std::unique_ptr ( + new AST::LiteralExpr ("false", AST::Literal::BOOL, + tok->get_type_hint (), {}, tok->get_locus ())); + case LEFT_PAREN: + return parse_grouped_or_tuple_expr (std::move (outer_attrs), + tok->get_locus ()); + + /*case PLUS: { // unary plus operator + // invoke parse_expr recursively with appropriate priority, etc. for + below AST::Expr* expr = parse_expr(LBP_UNARY_PLUS); + + if (expr == nullptr) + return nullptr; + // can only apply to integer and float expressions + if (expr->get_type() != integer_type_node || expr->get_type() != + float_type_node) { rust_error_at(tok->get_locus(), "operand of unary + plus must be int or float but it is %s", print_type(expr->get_type())); + return nullptr; + } + + return Tree(expr, tok->get_locus()); + }*/ + // Rust has no unary plus operator + case MINUS: { // unary minus + ParseRestrictions entered_from_unary; + entered_from_unary.entered_from_unary = true; + if (!restrictions.can_be_struct_expr) + entered_from_unary.can_be_struct_expr = false; + std::unique_ptr expr + = parse_expr (LBP_UNARY_MINUS, {}, entered_from_unary); + + if (expr == nullptr) + return nullptr; + // can only apply to integer and float expressions + /*if (expr.get_type() != integer_type_node || expr.get_type() != + float_type_node) { rust_error_at(tok->get_locus(), "operand of unary + minus must be int or float but it is %s", + print_type(expr.get_type())); return Tree::error(); + }*/ + /* FIXME: when implemented the "get type" method on expr, ensure it is + * int or float type (except unsigned int). Actually, this would + * probably have to be done in semantic analysis (as type checking). + */ + + /* FIXME: allow outer attributes on these expressions by having an + * outer attrs parameter in function*/ + return std::unique_ptr ( + new AST::NegationExpr (std::move (expr), NegationOperator::NEGATE, + std::move (outer_attrs), tok->get_locus ())); + } + case EXCLAM: { // logical or bitwise not + ParseRestrictions entered_from_unary; + entered_from_unary.entered_from_unary = true; + if (!restrictions.can_be_struct_expr) + entered_from_unary.can_be_struct_expr = false; + std::unique_ptr expr + = parse_expr (LBP_UNARY_EXCLAM, {}, entered_from_unary); + + if (expr == nullptr) + return nullptr; + // can only apply to boolean expressions + /*if (expr.get_type() != boolean_type_node) { + rust_error_at(tok->get_locus(), + "operand of logical not must be a boolean but it is %s", + print_type(expr.get_type())); + return Tree::error(); + }*/ + /* FIXME: type checking for boolean or integer expressions in semantic + * analysis */ + + // FIXME: allow outer attributes on these expressions + return std::unique_ptr ( + new AST::NegationExpr (std::move (expr), NegationOperator::NOT, + std::move (outer_attrs), tok->get_locus ())); + } + case ASTERISK: { + /* pointer dereference only - HACK: as struct expressions should + * always be value expressions, cannot be dereferenced */ + ParseRestrictions entered_from_unary; + entered_from_unary.entered_from_unary = true; + entered_from_unary.can_be_struct_expr = false; + std::unique_ptr expr + = parse_expr (LBP_UNARY_ASTERISK, {}, entered_from_unary); + // FIXME: allow outer attributes on expression + return std::unique_ptr ( + new AST::DereferenceExpr (std::move (expr), std::move (outer_attrs), + tok->get_locus ())); + } + case AMP: { + // (single) "borrow" expression - shared (mutable) or immutable + std::unique_ptr expr = nullptr; + bool is_mut_borrow = false; + + /* HACK: as struct expressions should always be value expressions, + * cannot be referenced */ + ParseRestrictions entered_from_unary; + entered_from_unary.entered_from_unary = true; + entered_from_unary.can_be_struct_expr = false; + + if (lexer.peek_token ()->get_id () == MUT) + { + lexer.skip_token (); + expr = parse_expr (LBP_UNARY_AMP_MUT, {}, entered_from_unary); + is_mut_borrow = true; + } + else + { + expr = parse_expr (LBP_UNARY_AMP, {}, entered_from_unary); + } + + // FIXME: allow outer attributes on expression + return std::unique_ptr ( + new AST::BorrowExpr (std::move (expr), is_mut_borrow, false, + std::move (outer_attrs), tok->get_locus ())); + } + case LOGICAL_AND: { + // (double) "borrow" expression - shared (mutable) or immutable + std::unique_ptr expr = nullptr; + bool is_mut_borrow = false; + + ParseRestrictions entered_from_unary; + entered_from_unary.entered_from_unary = true; + + if (lexer.peek_token ()->get_id () == MUT) + { + lexer.skip_token (); + expr = parse_expr (LBP_UNARY_AMP_MUT, {}, entered_from_unary); + is_mut_borrow = true; + } + else + { + expr = parse_expr (LBP_UNARY_AMP, {}, entered_from_unary); + } + + // FIXME: allow outer attributes on expression + return std::unique_ptr ( + new AST::BorrowExpr (std::move (expr), is_mut_borrow, true, + std::move (outer_attrs), tok->get_locus ())); + } + case SCOPE_RESOLUTION: { + // TODO: fix: this is for global paths, i.e. std::string::whatever + Error error (tok->get_locus (), + "found null denotation scope resolution operator, and " + "have not written handling for it"); + add_error (std::move (error)); + + return nullptr; + } + case SELF: + case SELF_ALIAS: + case DOLLAR_SIGN: + case CRATE: + case SUPER: { + // DEBUG + rust_debug ("beginning null denotation " + "self/self-alias/dollar/crate/super handling"); + + /* best option: parse as path, then extract identifier, macro, + * struct/enum, or just path info from it */ + AST::PathInExpression path = parse_path_in_expression_pratt (tok); + + // DEBUG + rust_debug ( + "just finished parsing path (going to disambiguate) - peeked " + "token is '%s'", + lexer.peek_token ()->get_token_description ()); + + // HACK: always make "self" by itself a path (regardless of next + // tokens) + if (tok->get_id () == SELF && path.is_single_segment ()) + { + // HACK: add outer attrs to path + path.set_outer_attrs (std::move (outer_attrs)); + return std::unique_ptr ( + new AST::PathInExpression (std::move (path))); + } + + // branch on next token + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case EXCLAM: + // macro + return parse_macro_invocation_partial (std::move (path), + std::move (outer_attrs)); + case LEFT_CURLY: { + // struct/enum expr struct + rust_debug ("can_be_struct_expr: %s", + restrictions.can_be_struct_expr ? "true" : "false"); + + bool not_a_block + = lexer.peek_token (1)->get_id () == IDENTIFIER + && (lexer.peek_token (2)->get_id () == COMMA + || (lexer.peek_token (2)->get_id () == COLON + && (lexer.peek_token (4)->get_id () == COMMA + || !can_tok_start_type ( + lexer.peek_token (3)->get_id ())))); + + if (!restrictions.can_be_struct_expr && !not_a_block) + { + // assume path is returned + // HACK: add outer attributes to path + path.set_outer_attrs (std::move (outer_attrs)); + return std::unique_ptr ( + new AST::PathInExpression (std::move (path))); + } + return parse_struct_expr_struct_partial (std::move (path), + std::move (outer_attrs)); + } + case LEFT_PAREN: + // struct/enum expr tuple + if (!restrictions.can_be_struct_expr) + { + // assume path is returned + // HACK: add outer attributes to path + path.set_outer_attrs (std::move (outer_attrs)); + return std::unique_ptr ( + new AST::PathInExpression (std::move (path))); + } + return parse_struct_expr_tuple_partial (std::move (path), + std::move (outer_attrs)); + default: + // assume path is returned + // HACK: add outer attributes to path + path.set_outer_attrs (std::move (outer_attrs)); + return std::unique_ptr ( + new AST::PathInExpression (std::move (path))); + } + gcc_unreachable (); + } + case OR: + case PIPE: + case MOVE: + // closure expression + return parse_closure_expr_pratt (tok, std::move (outer_attrs)); + case DOT_DOT: + // either "range to" or "range full" expressions + return parse_nud_range_exclusive_expr (tok, std::move (outer_attrs)); + case DOT_DOT_EQ: + // range to inclusive expr + return parse_range_to_inclusive_expr (tok, std::move (outer_attrs)); + case RETURN_TOK: + // FIXME: is this really a null denotation expression? + return parse_return_expr (std::move (outer_attrs), tok->get_locus ()); + case BREAK: + // FIXME: is this really a null denotation expression? + return parse_break_expr (std::move (outer_attrs), tok->get_locus ()); + case CONTINUE: + return parse_continue_expr (std::move (outer_attrs), tok->get_locus ()); + case LEFT_CURLY: + // ok - this is an expression with block for once. + return parse_block_expr (std::move (outer_attrs), tok->get_locus ()); + case IF: + // if or if let, so more lookahead to find out + if (lexer.peek_token (1)->get_id () == LET) + { + // if let expr + return parse_if_let_expr (std::move (outer_attrs), tok->get_locus ()); + } + else + { + // if expr + return parse_if_expr (std::move (outer_attrs), tok->get_locus ()); + } + case LOOP: + return parse_loop_expr (std::move (outer_attrs), AST::LoopLabel::error (), + tok->get_locus ()); + case WHILE: + return parse_while_loop_expr (std::move (outer_attrs), + AST::LoopLabel::error (), + tok->get_locus ()); + case MATCH_TOK: + // also an expression with block + return parse_match_expr (std::move (outer_attrs), tok->get_locus ()); + case LEFT_SQUARE: + // array definition expr (not indexing) + return parse_array_expr (std::move (outer_attrs), tok->get_locus ()); + case UNSAFE: + return parse_unsafe_block_expr (std::move (outer_attrs), + tok->get_locus ()); + default: + if (!restrictions.expr_can_be_null) + add_error (Error (tok->get_locus (), + "found unexpected token %qs in null denotation", + tok->get_token_description ())); + return nullptr; + } +} + +/* Called for each token that can appear in infix (between) position. Can be + * operators or other punctuation. Returns a function pointer to member + * function that implements the left denotation for the token given. */ +template +std::unique_ptr +Parser::left_denotation (const_TokenPtr tok, + std::unique_ptr left, + AST::AttrVec outer_attrs, + ParseRestrictions restrictions) +{ + // Token passed in has already been skipped, so peek gives "next" token + switch (tok->get_id ()) + { + // FIXME: allow for outer attributes to be applied + case QUESTION_MARK: { + Location left_locus = left->get_locus (); + // error propagation expression - unary postfix + return std::unique_ptr ( + new AST::ErrorPropagationExpr (std::move (left), + std::move (outer_attrs), left_locus)); + } + case PLUS: + // sum expression - binary infix + /*return parse_binary_plus_expr (tok, std::move (left), + std::move (outer_attrs), restrictions);*/ + return parse_arithmetic_or_logical_expr (tok, std::move (left), + std::move (outer_attrs), + ArithmeticOrLogicalOperator::ADD, + restrictions); + case MINUS: + // difference expression - binary infix + /*return parse_binary_minus_expr (tok, std::move (left), + std::move (outer_attrs), + restrictions);*/ + return parse_arithmetic_or_logical_expr ( + tok, std::move (left), std::move (outer_attrs), + ArithmeticOrLogicalOperator::SUBTRACT, restrictions); + case ASTERISK: + // product expression - binary infix + /*return parse_binary_mult_expr (tok, std::move (left), + std::move (outer_attrs), restrictions);*/ + return parse_arithmetic_or_logical_expr ( + tok, std::move (left), std::move (outer_attrs), + ArithmeticOrLogicalOperator::MULTIPLY, restrictions); + case DIV: + // quotient expression - binary infix + /*return parse_binary_div_expr (tok, std::move (left), + std::move (outer_attrs), restrictions);*/ + return parse_arithmetic_or_logical_expr ( + tok, std::move (left), std::move (outer_attrs), + ArithmeticOrLogicalOperator::DIVIDE, restrictions); + case PERCENT: + // modulo expression - binary infix + /*return parse_binary_mod_expr (tok, std::move (left), + std::move (outer_attrs), restrictions);*/ + return parse_arithmetic_or_logical_expr ( + tok, std::move (left), std::move (outer_attrs), + ArithmeticOrLogicalOperator::MODULUS, restrictions); + case AMP: + // logical or bitwise and expression - binary infix + /*return parse_bitwise_and_expr (tok, std::move (left), + std::move (outer_attrs), restrictions);*/ + return parse_arithmetic_or_logical_expr ( + tok, std::move (left), std::move (outer_attrs), + ArithmeticOrLogicalOperator::BITWISE_AND, restrictions); + case PIPE: + // logical or bitwise or expression - binary infix + /*return parse_bitwise_or_expr (tok, std::move (left), + std::move (outer_attrs), restrictions);*/ + return parse_arithmetic_or_logical_expr ( + tok, std::move (left), std::move (outer_attrs), + ArithmeticOrLogicalOperator::BITWISE_OR, restrictions); + case CARET: + // logical or bitwise xor expression - binary infix + /*return parse_bitwise_xor_expr (tok, std::move (left), + std::move (outer_attrs), restrictions);*/ + return parse_arithmetic_or_logical_expr ( + tok, std::move (left), std::move (outer_attrs), + ArithmeticOrLogicalOperator::BITWISE_XOR, restrictions); + case LEFT_SHIFT: + // left shift expression - binary infix + /*return parse_left_shift_expr (tok, std::move (left), + std::move (outer_attrs), restrictions);*/ + return parse_arithmetic_or_logical_expr ( + tok, std::move (left), std::move (outer_attrs), + ArithmeticOrLogicalOperator::LEFT_SHIFT, restrictions); + case RIGHT_SHIFT: + // right shift expression - binary infix + /*return parse_right_shift_expr (tok, std::move (left), + std::move (outer_attrs), restrictions);*/ + return parse_arithmetic_or_logical_expr ( + tok, std::move (left), std::move (outer_attrs), + ArithmeticOrLogicalOperator::RIGHT_SHIFT, restrictions); + case EQUAL_EQUAL: + // equal to expression - binary infix (no associativity) + /*return parse_binary_equal_expr (tok, std::move (left), + std::move (outer_attrs), + restrictions);*/ + return parse_comparison_expr (tok, std::move (left), + std::move (outer_attrs), + ComparisonOperator::EQUAL, restrictions); + case NOT_EQUAL: + // not equal to expression - binary infix (no associativity) + /*return parse_binary_not_equal_expr (tok, std::move (left), + std::move (outer_attrs), + restrictions);*/ + return parse_comparison_expr (tok, std::move (left), + std::move (outer_attrs), + ComparisonOperator::NOT_EQUAL, + restrictions); + case RIGHT_ANGLE: + // greater than expression - binary infix (no associativity) + /*return parse_binary_greater_than_expr (tok, std::move (left), + std::move (outer_attrs), + restrictions);*/ + return parse_comparison_expr (tok, std::move (left), + std::move (outer_attrs), + ComparisonOperator::GREATER_THAN, + restrictions); + case LEFT_ANGLE: + // less than expression - binary infix (no associativity) + /*return parse_binary_less_than_expr (tok, std::move (left), + std::move (outer_attrs), + restrictions);*/ + return parse_comparison_expr (tok, std::move (left), + std::move (outer_attrs), + ComparisonOperator::LESS_THAN, + restrictions); + case GREATER_OR_EQUAL: + // greater than or equal to expression - binary infix (no associativity) + /*return parse_binary_greater_equal_expr (tok, std::move (left), + std::move (outer_attrs), + restrictions);*/ + return parse_comparison_expr (tok, std::move (left), + std::move (outer_attrs), + ComparisonOperator::GREATER_OR_EQUAL, + restrictions); + case LESS_OR_EQUAL: + // less than or equal to expression - binary infix (no associativity) + /*return parse_binary_less_equal_expr (tok, std::move (left), + std::move (outer_attrs), + restrictions);*/ + return parse_comparison_expr (tok, std::move (left), + std::move (outer_attrs), + ComparisonOperator::LESS_OR_EQUAL, + restrictions); + case OR: + // lazy logical or expression - binary infix + return parse_lazy_or_expr (tok, std::move (left), std::move (outer_attrs), + restrictions); + case LOGICAL_AND: + // lazy logical and expression - binary infix + return parse_lazy_and_expr (tok, std::move (left), + std::move (outer_attrs), restrictions); + case AS: + /* type cast expression - kind of binary infix (RHS is actually a + * TypeNoBounds) */ + return parse_type_cast_expr (tok, std::move (left), + std::move (outer_attrs), restrictions); + case EQUAL: + // assignment expression - binary infix (note right-to-left + // associativity) + return parse_assig_expr (tok, std::move (left), std::move (outer_attrs), + restrictions); + case PLUS_EQ: + /* plus-assignment expression - binary infix (note right-to-left + * associativity) */ + /*return parse_plus_assig_expr (tok, std::move (left), + std::move (outer_attrs), restrictions);*/ + return parse_compound_assignment_expr (tok, std::move (left), + std::move (outer_attrs), + CompoundAssignmentOperator::ADD, + restrictions); + case MINUS_EQ: + /* minus-assignment expression - binary infix (note right-to-left + * associativity) */ + /*return parse_minus_assig_expr (tok, std::move (left), + std::move (outer_attrs), restrictions);*/ + return parse_compound_assignment_expr ( + tok, std::move (left), std::move (outer_attrs), + CompoundAssignmentOperator::SUBTRACT, restrictions); + case ASTERISK_EQ: + /* multiply-assignment expression - binary infix (note right-to-left + * associativity) */ + /*return parse_mult_assig_expr (tok, std::move (left), + std::move (outer_attrs), restrictions);*/ + return parse_compound_assignment_expr ( + tok, std::move (left), std::move (outer_attrs), + CompoundAssignmentOperator::MULTIPLY, restrictions); + case DIV_EQ: + /* division-assignment expression - binary infix (note right-to-left + * associativity) */ + /*return parse_div_assig_expr (tok, std::move (left), + std::move (outer_attrs), restrictions);*/ + return parse_compound_assignment_expr (tok, std::move (left), + std::move (outer_attrs), + CompoundAssignmentOperator::DIVIDE, + restrictions); + case PERCENT_EQ: + /* modulo-assignment expression - binary infix (note right-to-left + * associativity) */ + /*return parse_mod_assig_expr (tok, std::move (left), + std::move (outer_attrs), restrictions);*/ + return parse_compound_assignment_expr ( + tok, std::move (left), std::move (outer_attrs), + CompoundAssignmentOperator::MODULUS, restrictions); + case AMP_EQ: + /* bitwise and-assignment expression - binary infix (note right-to-left + * associativity) */ + /*return parse_and_assig_expr (tok, std::move (left), + std::move (outer_attrs), restrictions);*/ + return parse_compound_assignment_expr ( + tok, std::move (left), std::move (outer_attrs), + CompoundAssignmentOperator::BITWISE_AND, restrictions); + case PIPE_EQ: + /* bitwise or-assignment expression - binary infix (note right-to-left + * associativity) */ + /*return parse_or_assig_expr (tok, std::move (left), + std::move (outer_attrs), restrictions);*/ + return parse_compound_assignment_expr ( + tok, std::move (left), std::move (outer_attrs), + CompoundAssignmentOperator::BITWISE_OR, restrictions); + case CARET_EQ: + /* bitwise xor-assignment expression - binary infix (note right-to-left + * associativity) */ + /*return parse_xor_assig_expr (tok, std::move (left), + std::move (outer_attrs), restrictions);*/ + return parse_compound_assignment_expr ( + tok, std::move (left), std::move (outer_attrs), + CompoundAssignmentOperator::BITWISE_XOR, restrictions); + case LEFT_SHIFT_EQ: + /* left shift-assignment expression - binary infix (note right-to-left + * associativity) */ + /*return parse_left_shift_assig_expr (tok, std::move (left), + std::move (outer_attrs), + restrictions);*/ + return parse_compound_assignment_expr ( + tok, std::move (left), std::move (outer_attrs), + CompoundAssignmentOperator::LEFT_SHIFT, restrictions); + case RIGHT_SHIFT_EQ: + /* right shift-assignment expression - binary infix (note right-to-left + * associativity) */ + /*return parse_right_shift_assig_expr (tok, std::move (left), + std::move (outer_attrs), + restrictions);*/ + return parse_compound_assignment_expr ( + tok, std::move (left), std::move (outer_attrs), + CompoundAssignmentOperator::RIGHT_SHIFT, restrictions); + case DOT_DOT: + /* range exclusive expression - binary infix (no associativity) + * either "range" or "range from" */ + return parse_led_range_exclusive_expr (tok, std::move (left), + std::move (outer_attrs), + restrictions); + case DOT_DOT_EQ: + /* range inclusive expression - binary infix (no associativity) + * unambiguously RangeInclusiveExpr */ + return parse_range_inclusive_expr (tok, std::move (left), + std::move (outer_attrs), restrictions); + case SCOPE_RESOLUTION: + // path expression - binary infix? FIXME should this even be parsed + // here? + add_error ( + Error (tok->get_locus (), + "found scope resolution operator in left denotation " + "function - this should probably be handled elsewhere")); + + return nullptr; + case DOT: { + /* field expression or method call - relies on parentheses after next + * identifier or await if token after is "await" (unary postfix) or + * tuple index if token after is a decimal int literal */ + + const_TokenPtr next_tok = lexer.peek_token (); + if (next_tok->get_id () == IDENTIFIER + && next_tok->get_str () == "await") + { + // await expression + return parse_await_expr (tok, std::move (left), + std::move (outer_attrs)); + } + else if (next_tok->get_id () == INT_LITERAL) + { + // tuple index expression - TODO check for decimal int literal + return parse_tuple_index_expr (tok, std::move (left), + std::move (outer_attrs), + restrictions); + } + else if (next_tok->get_id () == IDENTIFIER + && lexer.peek_token (1)->get_id () != LEFT_PAREN + && lexer.peek_token (1)->get_id () != SCOPE_RESOLUTION) + { + /* field expression (or should be) - FIXME: scope resolution right + * after identifier should always be method, I'm pretty sure */ + return parse_field_access_expr (tok, std::move (left), + std::move (outer_attrs), + restrictions); + } + else + { + // method call (probably) + return parse_method_call_expr (tok, std::move (left), + std::move (outer_attrs), + restrictions); + } + } + case LEFT_PAREN: + // function call - method call is based on dot notation first + return parse_function_call_expr (tok, std::move (left), + std::move (outer_attrs), restrictions); + case LEFT_SQUARE: + // array or slice index expression (pseudo binary infix) + return parse_index_expr (tok, std::move (left), std::move (outer_attrs), + restrictions); + case FLOAT_LITERAL: + /* HACK: get around lexer mis-identifying '.0' or '.1' or whatever as a + * float literal - TODO does this happen anymore? It shouldn't. */ + return parse_tuple_index_expr_float (tok, std::move (left), + std::move (outer_attrs), + restrictions); + default: + add_error (Error (tok->get_locus (), + "found unexpected token %qs in left denotation", + tok->get_token_description ())); + + return nullptr; + } +} + +/* Returns the left binding power for the given ArithmeticOrLogicalExpr type. + * TODO make constexpr? Would that even do anything useful? */ +inline binding_powers +get_lbp_for_arithmetic_or_logical_expr ( + AST::ArithmeticOrLogicalExpr::ExprType expr_type) +{ + switch (expr_type) + { + case ArithmeticOrLogicalOperator::ADD: + return LBP_PLUS; + case ArithmeticOrLogicalOperator::SUBTRACT: + return LBP_MINUS; + case ArithmeticOrLogicalOperator::MULTIPLY: + return LBP_MUL; + case ArithmeticOrLogicalOperator::DIVIDE: + return LBP_DIV; + case ArithmeticOrLogicalOperator::MODULUS: + return LBP_MOD; + case ArithmeticOrLogicalOperator::BITWISE_AND: + return LBP_AMP; + case ArithmeticOrLogicalOperator::BITWISE_OR: + return LBP_PIPE; + case ArithmeticOrLogicalOperator::BITWISE_XOR: + return LBP_CARET; + case ArithmeticOrLogicalOperator::LEFT_SHIFT: + return LBP_L_SHIFT; + case ArithmeticOrLogicalOperator::RIGHT_SHIFT: + return LBP_R_SHIFT; + default: + // WTF? should not happen, this is an error + gcc_unreachable (); + + return LBP_PLUS; + } +} + +// Parses an arithmetic or logical expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_arithmetic_or_logical_expr ( + const_TokenPtr, std::unique_ptr left, AST::AttrVec, + AST::ArithmeticOrLogicalExpr::ExprType expr_type, + ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (get_lbp_for_arithmetic_or_logical_expr (expr_type), + AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + + // TODO: check types. actually, do so during semantic analysis + Location locus = left->get_locus (); + + return std::unique_ptr ( + new AST::ArithmeticOrLogicalExpr (std::move (left), std::move (right), + expr_type, locus)); +} + +// Parses a binary addition expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_binary_plus_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_PLUS, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + + // TODO: check types. actually, do so during semantic analysis + Location locus = left->get_locus (); + + return std::unique_ptr ( + new AST::ArithmeticOrLogicalExpr (std::move (left), std::move (right), + ArithmeticOrLogicalOperator::ADD, locus)); +} + +// Parses a binary subtraction expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_binary_minus_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_MINUS, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + + // TODO: check types. actually, do so during semantic analysis + Location locus = left->get_locus (); + + return std::unique_ptr ( + new AST::ArithmeticOrLogicalExpr (std::move (left), std::move (right), + ArithmeticOrLogicalOperator::SUBTRACT, + locus)); +} + +// Parses a binary multiplication expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_binary_mult_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_MUL, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + + // TODO: check types. actually, do so during semantic analysis + Location locus = left->get_locus (); + + return std::unique_ptr ( + new AST::ArithmeticOrLogicalExpr (std::move (left), std::move (right), + ArithmeticOrLogicalOperator::MULTIPLY, + locus)); +} + +// Parses a binary division expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_binary_div_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_DIV, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + + // TODO: check types. actually, do so during semantic analysis + Location locus = left->get_locus (); + + return std::unique_ptr ( + new AST::ArithmeticOrLogicalExpr (std::move (left), std::move (right), + ArithmeticOrLogicalOperator::DIVIDE, + locus)); +} + +// Parses a binary modulo expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_binary_mod_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_MOD, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + + // TODO: check types. actually, do so during semantic analysis + Location locus = left->get_locus (); + + return std::unique_ptr ( + new AST::ArithmeticOrLogicalExpr (std::move (left), std::move (right), + ArithmeticOrLogicalOperator::MODULUS, + locus)); +} + +/* Parses a binary bitwise (or eager logical) and expression (with Pratt + * parsing). */ +template +std::unique_ptr +Parser::parse_bitwise_and_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_AMP, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + + // TODO: check types. actually, do so during semantic analysis + Location locus = left->get_locus (); + + return std::unique_ptr ( + new AST::ArithmeticOrLogicalExpr (std::move (left), std::move (right), + ArithmeticOrLogicalOperator::BITWISE_AND, + locus)); +} + +/* Parses a binary bitwise (or eager logical) or expression (with Pratt + * parsing). */ +template +std::unique_ptr +Parser::parse_bitwise_or_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_PIPE, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + + // TODO: check types. actually, do so during semantic analysis + Location locus = left->get_locus (); + + return std::unique_ptr ( + new AST::ArithmeticOrLogicalExpr (std::move (left), std::move (right), + ArithmeticOrLogicalOperator::BITWISE_OR, + locus)); +} + +/* Parses a binary bitwise (or eager logical) xor expression (with Pratt + * parsing). */ +template +std::unique_ptr +Parser::parse_bitwise_xor_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_CARET, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + + // TODO: check types. actually, do so during semantic analysis + Location locus = left->get_locus (); + + return std::unique_ptr ( + new AST::ArithmeticOrLogicalExpr (std::move (left), std::move (right), + ArithmeticOrLogicalOperator::BITWISE_XOR, + locus)); +} + +// Parses a binary left shift expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_left_shift_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_L_SHIFT, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + + // TODO: check types. actually, do so during semantic analysis + Location locus = left->get_locus (); + + return std::unique_ptr ( + new AST::ArithmeticOrLogicalExpr (std::move (left), std::move (right), + ArithmeticOrLogicalOperator::LEFT_SHIFT, + locus)); +} + +// Parses a binary right shift expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_right_shift_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_R_SHIFT, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + + // TODO: check types. actually, do so during semantic analysis + Location locus = left->get_locus (); + + return std::unique_ptr ( + new AST::ArithmeticOrLogicalExpr (std::move (left), std::move (right), + ArithmeticOrLogicalOperator::RIGHT_SHIFT, + locus)); +} + +/* Returns the left binding power for the given ComparisonExpr type. + * TODO make constexpr? Would that even do anything useful? */ +inline binding_powers +get_lbp_for_comparison_expr (AST::ComparisonExpr::ExprType expr_type) +{ + switch (expr_type) + { + case ComparisonOperator::EQUAL: + return LBP_EQUAL; + case ComparisonOperator::NOT_EQUAL: + return LBP_NOT_EQUAL; + case ComparisonOperator::GREATER_THAN: + return LBP_GREATER_THAN; + case ComparisonOperator::LESS_THAN: + return LBP_SMALLER_THAN; + case ComparisonOperator::GREATER_OR_EQUAL: + return LBP_GREATER_EQUAL; + case ComparisonOperator::LESS_OR_EQUAL: + return LBP_SMALLER_EQUAL; + default: + // WTF? should not happen, this is an error + gcc_unreachable (); + + return LBP_EQUAL; + } +} + +/* Parses a ComparisonExpr of given type and LBP. TODO find a way to only + * specify one and have the other looked up - e.g. specify ExprType and + * binding power is looked up? */ +template +std::unique_ptr +Parser::parse_comparison_expr ( + const_TokenPtr, std::unique_ptr left, AST::AttrVec, + AST::ComparisonExpr::ExprType expr_type, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (get_lbp_for_comparison_expr (expr_type), AST::AttrVec (), + restrictions); + if (right == nullptr) + return nullptr; + + // TODO: check types. actually, do so during semantic analysis + Location locus = left->get_locus (); + + return std::unique_ptr ( + new AST::ComparisonExpr (std::move (left), std::move (right), expr_type, + locus)); +} + +// Parses a binary equal to expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_binary_equal_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_EQUAL, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + + // TODO: check types. actually, do so during semantic analysis + Location locus = left->get_locus (); + + return std::unique_ptr ( + new AST::ComparisonExpr (std::move (left), std::move (right), + ComparisonOperator::EQUAL, locus)); +} + +// Parses a binary not equal to expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_binary_not_equal_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_NOT_EQUAL, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + + // TODO: check types. actually, do so during semantic analysis + Location locus = left->get_locus (); + + return std::unique_ptr ( + new AST::ComparisonExpr (std::move (left), std::move (right), + ComparisonOperator::NOT_EQUAL, locus)); +} + +// Parses a binary greater than expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_binary_greater_than_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_GREATER_THAN, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + + // TODO: check types. actually, do so during semantic analysis + Location locus = left->get_locus (); + + return std::unique_ptr ( + new AST::ComparisonExpr (std::move (left), std::move (right), + ComparisonOperator::GREATER_THAN, locus)); +} + +// Parses a binary less than expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_binary_less_than_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_SMALLER_THAN, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + + // TODO: check types. actually, do so during semantic analysis + Location locus = left->get_locus (); + + return std::unique_ptr ( + new AST::ComparisonExpr (std::move (left), std::move (right), + ComparisonOperator::LESS_THAN, locus)); +} + +// Parses a binary greater than or equal to expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_binary_greater_equal_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_GREATER_EQUAL, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + + // TODO: check types. actually, do so during semantic analysis + Location locus = left->get_locus (); + + return std::unique_ptr ( + new AST::ComparisonExpr (std::move (left), std::move (right), + ComparisonOperator::GREATER_OR_EQUAL, locus)); +} + +// Parses a binary less than or equal to expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_binary_less_equal_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_SMALLER_EQUAL, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + + // TODO: check types. actually, do so during semantic analysis + Location locus = left->get_locus (); + + return std::unique_ptr ( + new AST::ComparisonExpr (std::move (left), std::move (right), + ComparisonOperator::LESS_OR_EQUAL, locus)); +} + +// Parses a binary lazy boolean or expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_lazy_or_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_LOGICAL_OR, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + + // TODO: check types. actually, do so during semantic analysis + Location locus = left->get_locus (); + + return std::unique_ptr ( + new AST::LazyBooleanExpr (std::move (left), std::move (right), + LazyBooleanOperator::LOGICAL_OR, locus)); +} + +// Parses a binary lazy boolean and expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_lazy_and_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_LOGICAL_AND, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + + // TODO: check types. actually, do so during semantic analysis + Location locus = left->get_locus (); + + return std::unique_ptr ( + new AST::LazyBooleanExpr (std::move (left), std::move (right), + LazyBooleanOperator::LOGICAL_AND, locus)); +} + +// Parses a pseudo-binary infix type cast expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_type_cast_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr expr_to_cast, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, + ParseRestrictions restrictions ATTRIBUTE_UNUSED) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr type = parse_type_no_bounds (); + if (type == nullptr) + return nullptr; + // FIXME: how do I get precedence put in here? + + // TODO: check types. actually, do so during semantic analysis + Location locus = expr_to_cast->get_locus (); + + return std::unique_ptr ( + new AST::TypeCastExpr (std::move (expr_to_cast), std::move (type), locus)); +} + +// Parses a binary assignment expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_assig_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_ASSIG - 1, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + // FIXME: ensure right-associativity for this - 'LBP - 1' may do this? + + // TODO: check types. actually, do so during semantic analysis + Location locus = left->get_locus (); + + return std::unique_ptr ( + new AST::AssignmentExpr (std::move (left), std::move (right), + std::move (outer_attrs), locus)); +} + +/* Returns the left binding power for the given CompoundAssignmentExpr type. + * TODO make constexpr? Would that even do anything useful? */ +inline binding_powers +get_lbp_for_compound_assignment_expr ( + AST::CompoundAssignmentExpr::ExprType expr_type) +{ + switch (expr_type) + { + case CompoundAssignmentOperator::ADD: + return LBP_PLUS; + case CompoundAssignmentOperator::SUBTRACT: + return LBP_MINUS; + case CompoundAssignmentOperator::MULTIPLY: + return LBP_MUL; + case CompoundAssignmentOperator::DIVIDE: + return LBP_DIV; + case CompoundAssignmentOperator::MODULUS: + return LBP_MOD; + case CompoundAssignmentOperator::BITWISE_AND: + return LBP_AMP; + case CompoundAssignmentOperator::BITWISE_OR: + return LBP_PIPE; + case CompoundAssignmentOperator::BITWISE_XOR: + return LBP_CARET; + case CompoundAssignmentOperator::LEFT_SHIFT: + return LBP_L_SHIFT; + case CompoundAssignmentOperator::RIGHT_SHIFT: + return LBP_R_SHIFT; + default: + // WTF? should not happen, this is an error + gcc_unreachable (); + + return LBP_PLUS; + } +} + +// Parses a compound assignment expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_compound_assignment_expr ( + const_TokenPtr, std::unique_ptr left, AST::AttrVec, + AST::CompoundAssignmentExpr::ExprType expr_type, + ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (get_lbp_for_compound_assignment_expr (expr_type) - 1, + AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + // FIXME: ensure right-associativity for this - 'LBP - 1' may do this? + + // TODO: check types. actually, do so during semantic analysis + Location locus = left->get_locus (); + + return std::unique_ptr ( + new AST::CompoundAssignmentExpr (std::move (left), std::move (right), + expr_type, locus)); +} + +// Parses a binary add-assignment expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_plus_assig_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_PLUS_ASSIG - 1, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + // FIXME: ensure right-associativity for this - 'LBP - 1' may do this? + + // TODO: check types. actually, do so during semantic analysis + Location locus = left->get_locus (); + + return std::unique_ptr ( + new AST::CompoundAssignmentExpr (std::move (left), std::move (right), + CompoundAssignmentOperator::ADD, locus)); +} + +// Parses a binary minus-assignment expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_minus_assig_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_MINUS_ASSIG - 1, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + // FIXME: ensure right-associativity for this - 'LBP - 1' may do this? + + // TODO: check types. actually, do so during semantic analysis + Location locus = left->get_locus (); + + return std::unique_ptr ( + new AST::CompoundAssignmentExpr (std::move (left), std::move (right), + CompoundAssignmentOperator::SUBTRACT, + locus)); +} + +// Parses a binary multiplication-assignment expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_mult_assig_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_MULT_ASSIG - 1, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + // FIXME: ensure right-associativity for this - 'LBP - 1' may do this? + + // TODO: check types. actually, do so during semantic analysis + Location locus = left->get_locus (); + + return std::unique_ptr ( + new AST::CompoundAssignmentExpr (std::move (left), std::move (right), + CompoundAssignmentOperator::MULTIPLY, + locus)); +} + +// Parses a binary division-assignment expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_div_assig_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_DIV_ASSIG - 1, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + // FIXME: ensure right-associativity for this - 'LBP - 1' may do this? + + // TODO: check types. actually, do so during semantic analysis + Location locus = left->get_locus (); + + return std::unique_ptr ( + new AST::CompoundAssignmentExpr (std::move (left), std::move (right), + CompoundAssignmentOperator::DIVIDE, + locus)); +} + +// Parses a binary modulo-assignment expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_mod_assig_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_MOD_ASSIG - 1, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + // FIXME: ensure right-associativity for this - 'LBP - 1' may do this? + + // TODO: check types. actually, do so during semantic analysis + Location locus = left->get_locus (); + + return std::unique_ptr ( + new AST::CompoundAssignmentExpr (std::move (left), std::move (right), + CompoundAssignmentOperator::MODULUS, + locus)); +} + +// Parses a binary and-assignment expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_and_assig_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_AMP_ASSIG - 1, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + // FIXME: ensure right-associativity for this - 'LBP - 1' may do this? + + // TODO: check types. actually, do so during semantic analysis + Location locus = left->get_locus (); + + return std::unique_ptr ( + new AST::CompoundAssignmentExpr (std::move (left), std::move (right), + CompoundAssignmentOperator::BITWISE_AND, + locus)); +} + +// Parses a binary or-assignment expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_or_assig_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_PIPE_ASSIG - 1, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + // FIXME: ensure right-associativity for this - 'LBP - 1' may do this? + + // TODO: check types. actually, do so during semantic analysis + Location locus = left->get_locus (); + + return std::unique_ptr ( + new AST::CompoundAssignmentExpr (std::move (left), std::move (right), + CompoundAssignmentOperator::BITWISE_OR, + locus)); +} + +// Parses a binary xor-assignment expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_xor_assig_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_CARET_ASSIG - 1, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + // FIXME: ensure right-associativity for this - 'LBP - 1' may do this? + + // TODO: check types. actually, do so during semantic analysis + Location locus = left->get_locus (); + + return std::unique_ptr ( + new AST::CompoundAssignmentExpr (std::move (left), std::move (right), + CompoundAssignmentOperator::BITWISE_XOR, + locus)); +} + +// Parses a binary left shift-assignment expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_left_shift_assig_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_L_SHIFT_ASSIG - 1, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + // FIXME: ensure right-associativity for this - 'LBP - 1' may do this? + + // TODO: check types. actually, do so during semantic analysis + Location locus = left->get_locus (); + + return std::unique_ptr ( + new AST::CompoundAssignmentExpr (std::move (left), std::move (right), + CompoundAssignmentOperator::LEFT_SHIFT, + locus)); +} + +// Parses a binary right shift-assignment expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_right_shift_assig_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_R_SHIFT_ASSIG - 1, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + // FIXME: ensure right-associativity for this - 'LBP - 1' may do this? + + // TODO: check types. actually, do so during semantic analysis + Location locus = left->get_locus (); + + return std::unique_ptr ( + new AST::CompoundAssignmentExpr (std::move (left), std::move (right), + CompoundAssignmentOperator::RIGHT_SHIFT, + locus)); +} + +// Parses a postfix unary await expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_await_expr ( + const_TokenPtr tok, std::unique_ptr expr_to_await, + AST::AttrVec outer_attrs) +{ + /* skip "await" identifier (as "." has already been consumed in + * parse_expression) this assumes that the identifier was already identified + * as await */ + if (!skip_token (IDENTIFIER)) + { + Error error (tok->get_locus (), "failed to skip % in await expr " + "- this is probably a deep issue"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + // TODO: check inside async block in semantic analysis + Location locus = expr_to_await->get_locus (); + + return std::unique_ptr ( + new AST::AwaitExpr (std::move (expr_to_await), std::move (outer_attrs), + locus)); +} + +/* Parses an exclusive range ('..') in left denotation position (i.e. + * RangeFromExpr or RangeFromToExpr). */ +template +std::unique_ptr +Parser::parse_led_range_exclusive_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // FIXME: this probably parses expressions accidently or whatever + // try parsing RHS (as tok has already been consumed in parse_expression) + // Can be nullptr, in which case it is a RangeFromExpr, otherwise a + // RangeFromToExpr. + restrictions.expr_can_be_null = true; + std::unique_ptr right + = parse_expr (LBP_DOT_DOT, AST::AttrVec (), restrictions); + + Location locus = left->get_locus (); + + if (right == nullptr) + { + // range from expr + return std::unique_ptr ( + new AST::RangeFromExpr (std::move (left), locus)); + } + else + { + return std::unique_ptr ( + new AST::RangeFromToExpr (std::move (left), std::move (right), locus)); + } + // FIXME: make non-associative +} + +/* Parses an exclusive range ('..') in null denotation position (i.e. + * RangeToExpr or RangeFullExpr). */ +template +std::unique_ptr +Parser::parse_nud_range_exclusive_expr ( + const_TokenPtr tok, AST::AttrVec outer_attrs ATTRIBUTE_UNUSED) +{ + // FIXME: this probably parses expressions accidently or whatever + // try parsing RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right = parse_expr (LBP_DOT_DOT, AST::AttrVec ()); + + Location locus = tok->get_locus (); + + if (right == nullptr) + { + // range from expr + return std::unique_ptr ( + new AST::RangeFullExpr (locus)); + } + else + { + return std::unique_ptr ( + new AST::RangeToExpr (std::move (right), locus)); + } + // FIXME: make non-associative +} + +// Parses a full binary range inclusive expression. +template +std::unique_ptr +Parser::parse_range_inclusive_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_DOT_DOT_EQ, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + // FIXME: make non-associative + + // TODO: check types. actually, do so during semantic analysis + Location locus = left->get_locus (); + + return std::unique_ptr ( + new AST::RangeFromToInclExpr (std::move (left), std::move (right), locus)); +} + +// Parses an inclusive range-to prefix unary expression. +template +std::unique_ptr +Parser::parse_range_to_inclusive_expr ( + const_TokenPtr tok, AST::AttrVec outer_attrs ATTRIBUTE_UNUSED) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right = parse_expr (LBP_DOT_DOT_EQ); + if (right == nullptr) + return nullptr; + // FIXME: make non-associative + + // TODO: check types. actually, do so during semantic analysis + + return std::unique_ptr ( + new AST::RangeToInclExpr (std::move (right), tok->get_locus ())); +} + +// Parses a pseudo-binary infix tuple index expression. +template +std::unique_ptr +Parser::parse_tuple_index_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr tuple_expr, + AST::AttrVec outer_attrs, ParseRestrictions restrictions ATTRIBUTE_UNUSED) +{ + // parse int literal (as token already skipped) + const_TokenPtr index_tok = expect_token (INT_LITERAL); + if (index_tok == nullptr) + { + return nullptr; + } + std::string index = index_tok->get_str (); + + // convert to integer + if (!index_tok->is_pure_decimal ()) + { + Error error (index_tok->get_locus (), + "tuple index should be a pure decimal literal"); + add_error (std::move (error)); + } + int index_int = atoi (index.c_str ()); + + Location locus = tuple_expr->get_locus (); + + return std::unique_ptr ( + new AST::TupleIndexExpr (std::move (tuple_expr), index_int, + std::move (outer_attrs), locus)); +} + +// Parses a pseudo-binary infix array (or slice) index expression. +template +std::unique_ptr +Parser::parse_index_expr ( + const_TokenPtr, std::unique_ptr array_expr, + AST::AttrVec outer_attrs, ParseRestrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + /*std::unique_ptr index_expr + = parse_expr (LBP_ARRAY_REF, AST::AttrVec (), + restrictions);*/ + // TODO: conceptually, should treat [] as brackets, so just parse all expr + std::unique_ptr index_expr = parse_expr (); + if (index_expr == nullptr) + return nullptr; + + // skip ']' at end of array + if (!skip_token (RIGHT_SQUARE)) + { + // skip somewhere? + return nullptr; + } + + // TODO: check types. actually, do so during semantic analysis + Location locus = array_expr->get_locus (); + + return std::unique_ptr ( + new AST::ArrayIndexExpr (std::move (array_expr), std::move (index_expr), + std::move (outer_attrs), locus)); +} + +// Parses a pseudo-binary infix struct field access expression. +template +std::unique_ptr +Parser::parse_field_access_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr struct_expr, + AST::AttrVec outer_attrs, ParseRestrictions restrictions ATTRIBUTE_UNUSED) +{ + /* get field name identifier (assume that this is a field access expr and + * not await, for instance) */ + const_TokenPtr ident_tok = expect_token (IDENTIFIER); + if (ident_tok == nullptr) + return nullptr; + + Identifier ident = ident_tok->get_str (); + + Location locus = struct_expr->get_locus (); + + // TODO: check types. actually, do so during semantic analysis + return std::unique_ptr ( + new AST::FieldAccessExpr (std::move (struct_expr), std::move (ident), + std::move (outer_attrs), locus)); +} + +// Parses a pseudo-binary infix method call expression. +template +std::unique_ptr +Parser::parse_method_call_expr ( + const_TokenPtr tok, std::unique_ptr receiver_expr, + AST::AttrVec outer_attrs, ParseRestrictions) +{ + // parse path expr segment + AST::PathExprSegment segment = parse_path_expr_segment (); + if (segment.is_error ()) + { + Error error (tok->get_locus (), + "failed to parse path expr segment of method call expr"); + add_error (std::move (error)); + + return nullptr; + } + + // skip left parentheses + if (!skip_token (LEFT_PAREN)) + { + return nullptr; + } + + // parse method params (if they exist) + std::vector> params; + + const_TokenPtr t = lexer.peek_token (); + while (t->get_id () != RIGHT_PAREN) + { + std::unique_ptr param = parse_expr (); + if (param == nullptr) + { + Error error (t->get_locus (), + "failed to parse method param in method call"); + add_error (std::move (error)); + + return nullptr; + } + params.push_back (std::move (param)); + + if (lexer.peek_token ()->get_id () != COMMA) + break; + + lexer.skip_token (); + t = lexer.peek_token (); + } + + // skip right paren + if (!skip_token (RIGHT_PAREN)) + { + return nullptr; + } + + // TODO: check types. actually do so in semantic analysis pass. + Location locus = receiver_expr->get_locus (); + + return std::unique_ptr ( + new AST::MethodCallExpr (std::move (receiver_expr), std::move (segment), + std::move (params), std::move (outer_attrs), + locus)); +} + +// Parses a pseudo-binary infix function call expression. +template +std::unique_ptr +Parser::parse_function_call_expr ( + const_TokenPtr, std::unique_ptr function_expr, + AST::AttrVec outer_attrs, ParseRestrictions) +{ + // parse function params (if they exist) + std::vector> params; + + const_TokenPtr t = lexer.peek_token (); + while (t->get_id () != RIGHT_PAREN) + { + std::unique_ptr param = parse_expr (); + if (param == nullptr) + { + Error error (t->get_locus (), + "failed to parse function param in function call"); + add_error (std::move (error)); + + return nullptr; + } + params.push_back (std::move (param)); + + if (lexer.peek_token ()->get_id () != COMMA) + break; + + lexer.skip_token (); + t = lexer.peek_token (); + } + + // skip ')' at end of param list + if (!skip_token (RIGHT_PAREN)) + { + // skip somewhere? + return nullptr; + } + + // TODO: check types. actually, do so during semantic analysis + Location locus = function_expr->get_locus (); + + return std::unique_ptr ( + new AST::CallExpr (std::move (function_expr), std::move (params), + std::move (outer_attrs), locus)); +} + +/* Parses a macro invocation with a path in expression already parsed (but not + * '!' token). */ +template +std::unique_ptr +Parser::parse_macro_invocation_partial ( + AST::PathInExpression path, AST::AttrVec outer_attrs, + ParseRestrictions restrictions) +{ + // macro invocation + if (!skip_token (EXCLAM)) + { + return nullptr; + } + + // convert PathInExpression to SimplePath - if this isn't possible, error + AST::SimplePath converted_path = path.as_simple_path (); + if (converted_path.is_empty ()) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse simple path in macro invocation"); + add_error (std::move (error)); + + return nullptr; + } + + AST::DelimTokenTree tok_tree = parse_delim_token_tree (); + + rust_debug ("successfully parsed macro invocation (via partial)"); + + Location macro_locus = converted_path.get_locus (); + + return std::unique_ptr (new AST::MacroInvocation ( + AST::MacroInvocData (std::move (converted_path), std::move (tok_tree)), + std::move (outer_attrs), macro_locus, restrictions.expr_can_be_stmt)); +} + +/* Parses a struct expr struct with a path in expression already parsed (but + * not + * '{' token). */ +template +std::unique_ptr +Parser::parse_struct_expr_struct_partial ( + AST::PathInExpression path, AST::AttrVec outer_attrs) +{ + // assume struct expr struct (as struct-enum disambiguation requires name + // lookup) again, make statement if final ';' + if (!skip_token (LEFT_CURLY)) + { + return nullptr; + } + + // parse inner attributes + AST::AttrVec inner_attrs = parse_inner_attributes (); + + // branch based on next token + const_TokenPtr t = lexer.peek_token (); + Location path_locus = path.get_locus (); + switch (t->get_id ()) + { + case RIGHT_CURLY: + // struct with no body + lexer.skip_token (); + + return std::unique_ptr ( + new AST::StructExprStruct (std::move (path), std::move (inner_attrs), + std::move (outer_attrs), path_locus)); + case DOT_DOT: + /* technically this would give a struct base-only struct, but this + * algorithm should work too. As such, AST type not happening. */ + case IDENTIFIER: + case INT_LITERAL: { + // struct with struct expr fields + + // parse struct expr fields + std::vector> fields; + + while (t->get_id () != RIGHT_CURLY && t->get_id () != DOT_DOT) + { + std::unique_ptr field + = parse_struct_expr_field (); + if (field == nullptr) + { + Error error (t->get_locus (), + "failed to parse struct (or enum) expr field"); + add_error (std::move (error)); + + return nullptr; + } + + // DEBUG: + rust_debug ("struct/enum expr field validated to not be null"); + + fields.push_back (std::move (field)); + + // DEBUG: + rust_debug ("struct/enum expr field pushed back"); + + if (lexer.peek_token ()->get_id () != COMMA) + { + // DEBUG: + rust_debug ("lack of comma detected in struct/enum expr " + "fields - break"); + break; + } + lexer.skip_token (); + + // DEBUG: + rust_debug ("struct/enum expr fields comma skipped "); + + t = lexer.peek_token (); + } + + // DEBUG: + rust_debug ("struct/enum expr about to parse struct base "); + + // parse struct base if it exists + AST::StructBase struct_base = AST::StructBase::error (); + if (lexer.peek_token ()->get_id () == DOT_DOT) + { + Location dot_dot_location = lexer.peek_token ()->get_locus (); + lexer.skip_token (); + + // parse required struct base expr + std::unique_ptr base_expr = parse_expr (); + if (base_expr == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse struct base expression in struct " + "expression"); + add_error (std::move (error)); + + return nullptr; + } + + // DEBUG: + rust_debug ("struct/enum expr - parsed and validated base expr"); + + struct_base + = AST::StructBase (std::move (base_expr), dot_dot_location); + + // DEBUG: + rust_debug ("assigned struct base to new struct base "); + } + + if (!skip_token (RIGHT_CURLY)) + { + return nullptr; + } + + // DEBUG: + rust_debug ( + "struct/enum expr skipped right curly - done and ready to return"); + + return std::unique_ptr ( + new AST::StructExprStructFields (std::move (path), std::move (fields), + path_locus, std::move (struct_base), + std::move (inner_attrs), + std::move (outer_attrs))); + } + default: + add_error ( + Error (t->get_locus (), + "unrecognised token %qs in struct (or enum) expression - " + "expected %<}%>, identifier, integer literal, or %<..%>", + t->get_token_description ())); + + return nullptr; + } +} + +/* Parses a struct expr tuple with a path in expression already parsed (but + * not + * '(' token). + * FIXME: this currently outputs a call expr, as they cannot be disambiguated. + * A better solution would be to just get this to call that function directly. + * */ +template +std::unique_ptr +Parser::parse_struct_expr_tuple_partial ( + AST::PathInExpression path, AST::AttrVec outer_attrs) +{ + if (!skip_token (LEFT_PAREN)) + { + return nullptr; + } + + AST::AttrVec inner_attrs = parse_inner_attributes (); + + std::vector> exprs; + + const_TokenPtr t = lexer.peek_token (); + while (t->get_id () != RIGHT_PAREN) + { + // parse expression (required) + std::unique_ptr expr = parse_expr (); + if (expr == nullptr) + { + Error error (t->get_locus (), "failed to parse expression in " + "struct (or enum) expression tuple"); + add_error (std::move (error)); + + return nullptr; + } + exprs.push_back (std::move (expr)); + + if (lexer.peek_token ()->get_id () != COMMA) + break; + + lexer.skip_token (); + + t = lexer.peek_token (); + } + + if (!skip_token (RIGHT_PAREN)) + { + return nullptr; + } + + Location path_locus = path.get_locus (); + + auto pathExpr = std::unique_ptr ( + new AST::PathInExpression (std::move (path))); + + return std::unique_ptr ( + new AST::CallExpr (std::move (pathExpr), std::move (exprs), + std::move (outer_attrs), path_locus)); +} + +/* Parses a path in expression with the first token passed as a parameter (as + * it is skipped in token stream). Note that this only parses segment-first + * paths, not global ones. */ +template +AST::PathInExpression +Parser::parse_path_in_expression_pratt (const_TokenPtr tok) +{ + // HACK-y way of making up for pratt-parsing consuming first token + + // DEBUG + rust_debug ("current peek token when starting path pratt parse: '%s'", + lexer.peek_token ()->get_token_description ()); + + // create segment vector + std::vector segments; + + std::string initial_str; + + switch (tok->get_id ()) + { + case IDENTIFIER: + initial_str = tok->get_str (); + break; + case SUPER: + initial_str = "super"; + break; + case SELF: + initial_str = "self"; + break; + case SELF_ALIAS: + initial_str = "Self"; + break; + case CRATE: + initial_str = "crate"; + break; + case DOLLAR_SIGN: + if (lexer.peek_token ()->get_id () == CRATE) + { + initial_str = "$crate"; + break; + } + gcc_fallthrough (); + default: + add_error (Error (tok->get_locus (), + "unrecognised token %qs in path in expression", + tok->get_token_description ())); + + return AST::PathInExpression::create_error (); + } + + // parse required initial segment + AST::PathExprSegment initial_segment (initial_str, tok->get_locus ()); + // parse generic args (and turbofish), if they exist + /* use lookahead to determine if they actually exist (don't want to + * accidently parse over next ident segment) */ + if (lexer.peek_token ()->get_id () == SCOPE_RESOLUTION + && lexer.peek_token (1)->get_id () == LEFT_ANGLE) + { + // skip scope resolution + lexer.skip_token (); + + AST::GenericArgs generic_args = parse_path_generic_args (); + + initial_segment + = AST::PathExprSegment (AST::PathIdentSegment (initial_str, + tok->get_locus ()), + tok->get_locus (), std::move (generic_args)); + } + if (initial_segment.is_error ()) + { + // skip after somewhere? + // don't necessarily throw error but yeah + + // DEBUG + rust_debug ("initial segment is error - returning null"); + + return AST::PathInExpression::create_error (); + } + segments.push_back (std::move (initial_segment)); + + // parse optional segments (as long as scope resolution operator exists) + const_TokenPtr t = lexer.peek_token (); + while (t->get_id () == SCOPE_RESOLUTION) + { + // skip scope resolution operator + lexer.skip_token (); + + // parse the actual segment - it is an error if it doesn't exist now + AST::PathExprSegment segment = parse_path_expr_segment (); + if (segment.is_error ()) + { + // skip after somewhere? + Error error (t->get_locus (), + "could not parse path expression segment"); + add_error (std::move (error)); + + return AST::PathInExpression::create_error (); + } + + segments.push_back (std::move (segment)); + + t = lexer.peek_token (); + } + + // DEBUG: + rust_debug ( + "current token (just about to return path to null denotation): '%s'", + lexer.peek_token ()->get_token_description ()); + + return AST::PathInExpression (std::move (segments), {}, tok->get_locus (), + false); +} + +// Parses a closure expression with pratt parsing (from null denotation). +template +std::unique_ptr +Parser::parse_closure_expr_pratt (const_TokenPtr tok, + AST::AttrVec outer_attrs) +{ + // TODO: does this need pratt parsing (for precedence)? probably not, but + // idk + Location locus = tok->get_locus (); + bool has_move = false; + if (tok->get_id () == MOVE) + { + has_move = true; + tok = lexer.peek_token (); + lexer.skip_token (); + // skip token and reassign + } + + // handle parameter list + std::vector params; + + switch (tok->get_id ()) + { + case OR: + // no parameters, don't skip token + break; + case PIPE: { + // actually may have parameters + // don't skip token + const_TokenPtr t = lexer.peek_token (); + while (t->get_id () != PIPE) + { + AST::ClosureParam param = parse_closure_param (); + if (param.is_error ()) + { + // TODO is this really an error? + Error error (t->get_locus (), "could not parse closure param"); + add_error (std::move (error)); + + return nullptr; + } + params.push_back (std::move (param)); + + if (lexer.peek_token ()->get_id () != COMMA) + { + // not an error but means param list is done + break; + } + // skip comma + lexer.skip_token (); + + t = lexer.peek_token (); + } + + if (!skip_token (PIPE)) + { + return nullptr; + } + break; + } + default: + add_error (Error (tok->get_locus (), + "unexpected token %qs in closure expression - expected " + "%<|%> or %<||%>", + tok->get_token_description ())); + + // skip somewhere? + return nullptr; + } + + // again branch based on next token + tok = lexer.peek_token (); + if (tok->get_id () == RETURN_TYPE) + { + // must be return type closure with block expr + + // skip "return type" token + lexer.skip_token (); + + // parse actual type, which is required + std::unique_ptr type = parse_type_no_bounds (); + if (type == nullptr) + { + // error + Error error (tok->get_locus (), "failed to parse type for closure"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + // parse block expr, which is required + std::unique_ptr block = parse_block_expr (); + if (block == nullptr) + { + // error + Error error (lexer.peek_token ()->get_locus (), + "failed to parse block expr in closure"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + return std::unique_ptr ( + new AST::ClosureExprInnerTyped (std::move (type), std::move (block), + std::move (params), locus, has_move, + std::move (outer_attrs))); + } + else + { + // must be expr-only closure + + // parse expr, which is required + std::unique_ptr expr = parse_expr (); + if (expr == nullptr) + { + Error error (tok->get_locus (), + "failed to parse expression in closure"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + return std::unique_ptr ( + new AST::ClosureExprInner (std::move (expr), std::move (params), locus, + has_move, std::move (outer_attrs))); + } +} + +/* Parses a tuple index expression (pratt-parsed) from a 'float' token as a + * result of lexer misidentification. */ +template +std::unique_ptr +Parser::parse_tuple_index_expr_float ( + const_TokenPtr tok, std::unique_ptr tuple_expr, + AST::AttrVec outer_attrs, ParseRestrictions restrictions ATTRIBUTE_UNUSED) +{ + // only works on float literals + if (tok->get_id () != FLOAT_LITERAL) + return nullptr; + + // DEBUG: + rust_debug ("exact string form of float: '%s'", tok->get_str ().c_str ()); + + // get float string and remove dot and initial 0 + std::string index_str = tok->get_str (); + index_str.erase (index_str.begin ()); + + // get int from string + int index = atoi (index_str.c_str ()); + + Location locus = tuple_expr->get_locus (); + + return std::unique_ptr ( + new AST::TupleIndexExpr (std::move (tuple_expr), index, + std::move (outer_attrs), locus)); +} + +// Returns true if the next token is END, ELSE, or EOF; +template +bool +Parser::done_end_or_else () +{ + const_TokenPtr t = lexer.peek_token (); + return (t->get_id () == RIGHT_CURLY || t->get_id () == ELSE + || t->get_id () == END_OF_FILE); +} + +// Returns true if the next token is END or EOF. +template +bool +Parser::done_end () +{ + const_TokenPtr t = lexer.peek_token (); + return (t->get_id () == RIGHT_CURLY || t->get_id () == END_OF_FILE); +} + +// Dumps lexer output to stderr. +template +void +Parser::debug_dump_lex_output (std::ostream &out) +{ + /* TODO: a better implementation of "lexer dump" (as in dump what was + * actually tokenised) would actually be to "write" a token to a file every + * time skip_token() here was called. This would reflect the parser + * modifications to the token stream, such as fixing the template angle + * brackets. */ + + const_TokenPtr tok = lexer.peek_token (); + + while (true) + { + if (tok->get_id () == Rust::END_OF_FILE) + break; + + bool has_text = tok->get_id () == Rust::IDENTIFIER + || tok->get_id () == Rust::INT_LITERAL + || tok->get_id () == Rust::FLOAT_LITERAL + || tok->get_id () == Rust::STRING_LITERAL + || tok->get_id () == Rust::CHAR_LITERAL + || tok->get_id () == Rust::BYTE_STRING_LITERAL + || tok->get_id () == Rust::BYTE_CHAR_LITERAL; + + Location loc = tok->get_locus (); + + out << "token_id_to_str (); + out << has_text ? (std::string (", text=") + tok->get_str () + + std::string (", typehint=") + + std::string (tok->get_type_hint_str ())) + : ""; + out << lexer.get_line_map ()->to_string (loc); + + lexer.skip_token (); + tok = lexer.peek_token (); + } +} + +// Parses crate and dumps AST to stderr, recursively. +template +void +Parser::debug_dump_ast_output (AST::Crate &crate, + std::ostream &out) +{ + out << crate.as_string (); +} +} // namespace Rust diff --git a/gcc/rust/parse/rust-parse.cc b/gcc/rust/parse/rust-parse.cc new file mode 100644 index 00000000000..f1e2caa258b --- /dev/null +++ b/gcc/rust/parse/rust-parse.cc @@ -0,0 +1,328 @@ +/* This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#include "rust-parse.h" +#include "rust-linemap.h" +#include "rust-diagnostics.h" + +namespace Rust { + +std::string +extract_module_path (const AST::AttrVec &inner_attrs, + const AST::AttrVec &outer_attrs, const std::string &name) +{ + AST::Attribute path_attr = AST::Attribute::create_empty (); + for (const auto &attr : inner_attrs) + { + if (attr.get_path ().as_string () == "path") + { + path_attr = attr; + break; + } + } + + // Here, we found a path attribute, but it has no associated string. This is + // invalid + if (!path_attr.is_empty () && !path_attr.has_attr_input ()) + { + rust_error_at ( + path_attr.get_locus (), + // Split the format string so that -Wformat-diag does not complain... + "path attributes must contain a filename: '%s'", "#![path = \"file\"]"); + return name; + } + + for (const auto &attr : outer_attrs) + { + if (attr.get_path ().as_string () == "path") + { + path_attr = attr; + break; + } + } + + // We didn't find a path attribute. This is not an error, there simply isn't + // one present + if (path_attr.is_empty ()) + return name; + + // Here, we found a path attribute, but it has no associated string. This is + // invalid + if (!path_attr.has_attr_input ()) + { + rust_error_at ( + path_attr.get_locus (), + // Split the format string so that -Wformat-diag does not complain... + "path attributes must contain a filename: '%s'", "#[path = \"file\"]"); + return name; + } + + auto path_value = path_attr.get_attr_input ().as_string (); + + // At this point, the 'path' is of the following format: '= ""' + // We need to remove the equal sign and only keep the actual filename. + // In order to do this, we can simply go through the string until we find + // a character that is not an equal sign or whitespace + auto filename_begin = path_value.find_first_not_of ("=\t "); + + auto path = path_value.substr (filename_begin); + + // On windows, the path might mix '/' and '\' separators. Replace the + // UNIX-like separators by MSDOS separators to make sure the path will resolve + // properly. + // + // Source: rustc compiler + // (https://github.com/rust-lang/rust/blob/9863bf51a52b8e61bcad312f81b5193d53099f9f/compiler/rustc_expand/src/module.rs#L174) +#if defined(HAVE_DOS_BASED_FILE_SYSTEM) + path.replace ('/', '\\'); +#endif /* HAVE_DOS_BASED_FILE_SYSTEM */ + + return path; +} + +template +static bool +contains (std::vector &vec, T elm) +{ + return std::find (vec.begin (), vec.end (), elm) != vec.end (); +} + +/** + * Avoid UB by calling .front() and .back() on empty containers... + */ + +template +static const T * +get_back_ptr (const std::vector> &values) +{ + if (values.empty ()) + return nullptr; + + return values.back ().get (); +} + +template +static const T * +get_front_ptr (const std::vector> &values) +{ + if (values.empty ()) + return nullptr; + + return values.front ().get (); +} + +static bool +peculiar_fragment_match_compatible_fragment ( + const AST::MacroFragSpec &last_spec, const AST::MacroFragSpec &spec, + Location match_locus) +{ + static std::unordered_map> + fragment_follow_set + = {{AST::MacroFragSpec::PATH, {AST::MacroFragSpec::BLOCK}}, + {AST::MacroFragSpec::TY, {AST::MacroFragSpec::BLOCK}}, + {AST::MacroFragSpec::VIS, + {AST::MacroFragSpec::IDENT, AST::MacroFragSpec::TY, + AST::MacroFragSpec::PATH}}}; + + auto is_valid + = contains (fragment_follow_set[last_spec.get_kind ()], spec.get_kind ()); + + if (!is_valid) + rust_error_at ( + match_locus, + "fragment specifier %<%s%> is not allowed after %<%s%> fragments", + spec.as_string ().c_str (), last_spec.as_string ().c_str ()); + + return is_valid; +} + +static bool +peculiar_fragment_match_compatible (const AST::MacroMatchFragment &last_match, + const AST::MacroMatch &match) +{ + static std::unordered_map> + follow_set + = {{AST::MacroFragSpec::EXPR, {MATCH_ARROW, COMMA, SEMICOLON}}, + {AST::MacroFragSpec::STMT, {MATCH_ARROW, COMMA, SEMICOLON}}, + {AST::MacroFragSpec::PAT, {MATCH_ARROW, COMMA, EQUAL, PIPE, IF, IN}}, + {AST::MacroFragSpec::PATH, + {MATCH_ARROW, COMMA, EQUAL, PIPE, SEMICOLON, COLON, RIGHT_ANGLE, + RIGHT_SHIFT, LEFT_SQUARE, LEFT_CURLY, AS, WHERE}}, + {AST::MacroFragSpec::TY, + {MATCH_ARROW, COMMA, EQUAL, PIPE, SEMICOLON, COLON, RIGHT_ANGLE, + RIGHT_SHIFT, LEFT_SQUARE, LEFT_CURLY, AS, WHERE}}, + {AST::MacroFragSpec::VIS, + { + COMMA, + IDENTIFIER /* FIXME: Other than `priv` */, + LEFT_PAREN, + LEFT_SQUARE, + EXCLAM, + ASTERISK, + AMP, + LOGICAL_AND, + QUESTION_MARK, + LIFETIME, + LEFT_ANGLE, + LEFT_SHIFT, + SUPER, + SELF, + SELF_ALIAS, + EXTERN_TOK, + CRATE, + UNDERSCORE, + FOR, + IMPL, + FN_TOK, + UNSAFE, + TYPEOF, + DYN + // FIXME: Add Non kw identifiers + // FIXME: Add $crate as valid + }}}; + + Location error_locus = match.get_match_locus (); + std::string kind_str = "fragment"; + auto &allowed_toks = follow_set[last_match.get_frag_spec ().get_kind ()]; + + // There are two behaviors to handle here: If the follow-up match is a token, + // we want to check if it is allowed. + // If it is a fragment, repetition or matcher then we know that it will be + // an error. + // For repetitions and matchers we want to extract a proper location to report + // the error. + switch (match.get_macro_match_type ()) + { + case AST::MacroMatch::Tok: { + auto tok = static_cast (&match); + if (contains (allowed_toks, tok->get_id ())) + return true; + kind_str = "token `" + + std::string (get_token_description (tok->get_id ())) + "`"; + error_locus = tok->get_match_locus (); + break; + } + break; + case AST::MacroMatch::Repetition: { + auto repetition + = static_cast (&match); + auto &matches = repetition->get_matches (); + auto first_frag = get_front_ptr (matches); + if (first_frag) + return peculiar_fragment_match_compatible (last_match, *first_frag); + break; + } + case AST::MacroMatch::Matcher: { + auto matcher = static_cast (&match); + auto first_token = matcher->get_delim_type (); + TokenId delim_id; + switch (first_token) + { + case AST::PARENS: + delim_id = LEFT_PAREN; + break; + case AST::SQUARE: + delim_id = LEFT_SQUARE; + break; + case AST::CURLY: + delim_id = LEFT_CURLY; + break; + default: + gcc_unreachable (); + break; + } + if (contains (allowed_toks, delim_id)) + return true; + kind_str = "token `" + std::string (get_token_description (delim_id)) + + "` at start of matcher"; + error_locus = matcher->get_match_locus (); + break; + } + case AST::MacroMatch::Fragment: { + auto last_spec = last_match.get_frag_spec (); + auto fragment = static_cast (&match); + if (last_spec.has_follow_set_fragment_restrictions ()) + return peculiar_fragment_match_compatible_fragment ( + last_spec, fragment->get_frag_spec (), match.get_match_locus ()); + } + break; + } + + rust_error_at (error_locus, "%s is not allowed after %<%s%> fragment", + kind_str.c_str (), + last_match.get_frag_spec ().as_string ().c_str ()); + auto allowed_toks_str + = "`" + std::string (get_token_description (allowed_toks[0])) + "`"; + for (size_t i = 1; i < allowed_toks.size (); i++) + allowed_toks_str + += ", `" + std::string (get_token_description (allowed_toks[i])) + "`"; + + rust_inform (error_locus, "allowed tokens are %s", allowed_toks_str.c_str ()); + + return false; +} + +bool +is_match_compatible (const AST::MacroMatch &last_match, + const AST::MacroMatch &match) +{ + const AST::MacroMatch *new_last = nullptr; + + // We want to "extract" the concerning matches. In cases such as matchers and + // repetitions, we actually store multiple matchers, but are only concerned + // about the follow-set ambiguities of certain elements. + // There are some cases where we can short-circuit the algorithm: There will + // never be restrictions on token literals, or on certain fragments which do + // not have a set of follow-restrictions. + + switch (last_match.get_macro_match_type ()) + { + // This is our main stop condition: When we are finally looking at the + // last match (or its actual last component), and it is a fragment, it + // may contain some follow up restrictions. + case AST::MacroMatch::Fragment: { + auto fragment + = static_cast (&last_match); + if (fragment->get_frag_spec ().has_follow_set_restrictions ()) + return peculiar_fragment_match_compatible (*fragment, match); + else + return true; + } + case AST::MacroMatch::Repetition: { + // A repetition on the left hand side means we want to make sure the + // last match of the repetition is compatible with the new match + auto repetition + = static_cast (&last_match); + new_last = get_back_ptr (repetition->get_matches ()); + // If there are no matches in the matcher, then it can be followed by + // anything + if (!new_last) + return true; + break; + } + case AST::MacroMatch::Matcher: + case AST::MacroMatch::Tok: + return true; + } + + rust_assert (new_last); + + // We check recursively until we find a terminating condition + // FIXME: Does expansion depth/limit matter here? + return is_match_compatible (*new_last, match); +} +} // namespace Rust diff --git a/gcc/rust/parse/rust-parse.h b/gcc/rust/parse/rust-parse.h new file mode 100644 index 00000000000..e4c5a2c5c9f --- /dev/null +++ b/gcc/rust/parse/rust-parse.h @@ -0,0 +1,732 @@ +/* This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#ifndef RUST_PARSE_H +#define RUST_PARSE_H + +#include "rust-lex.h" +#include "rust-ast-full.h" +#include "rust-diagnostics.h" + +namespace Rust { +/* HACK: used to resolve the expression-or-statement problem at the end of a + * block by allowing either to be returned (technically). Tagged union would + * probably take up the same amount of space. */ +struct ExprOrStmt +{ + std::unique_ptr expr; + std::unique_ptr stmt; + + /* I was going to resist the urge to make this a real class and make it POD, + * but construction in steps is too difficult. So it'll just also have a + * constructor. */ + + // expression constructor + ExprOrStmt (std::unique_ptr expr) : expr (std::move (expr)) {} + + // statement constructor + ExprOrStmt (std::unique_ptr stmt) : stmt (std::move (stmt)) {} + + // macro constructor + ExprOrStmt (std::unique_ptr macro) + : expr (std::move (macro)) + {} + + // Returns whether this object is in an error state. + bool is_error () const + { + return (expr == nullptr && stmt == nullptr) + || (expr != nullptr && stmt != nullptr); + } + + // Returns an error state object. + static ExprOrStmt create_error () { return ExprOrStmt (nullptr, nullptr); } + + ~ExprOrStmt () = default; + + /* no copy constructors/assignment as simple object like this shouldn't + * require it */ + + // move constructors + ExprOrStmt (ExprOrStmt &&other) = default; + ExprOrStmt &operator= (ExprOrStmt &&other) = default; + +private: + // private constructor only used for creating error state expr or stmt objects + ExprOrStmt (AST::Expr *expr, AST::Stmt *stmt) : expr (expr), stmt (stmt) {} + + // make this work: have a disambiguation specifically for known statements + // (i.e. ';' and 'let'). then, have a special "parse expr or stmt" function + // that returns this type. inside it, it parses an expression, and then + // determines whether to return expr or stmt via whether the next token is a + // semicolon. should be able to disambiguate inside that function between + // stmts with blocks and without blocks. +}; + +/* Restrictions on parsing used to signal that certain ambiguous grammar + * features should be parsed in a certain way. */ +struct ParseRestrictions +{ + bool can_be_struct_expr = true; + /* Whether the expression was entered from a unary expression - prevents stuff + * like struct exprs being parsed from a dereference. */ + bool entered_from_unary = false; + bool expr_can_be_null = false; + bool expr_can_be_stmt = false; + bool consume_semi = true; +}; + +// Parser implementation for gccrs. +// TODO: if updated to C++20, ManagedTokenSource would be useful as a concept +template class Parser +{ +public: + /** + * Consume a token, reporting an error if it isn't the next token + * + * @param t ID of the token to consume + * + * @return true if the token was next, false if it wasn't found + */ + bool skip_token (TokenId t); + + /** + * Same as `skip_token` but allows for failure without necessarily reporting + * an error + * + * @param t ID of the token to consume + * + * @return true if the token was next, false if it wasn't found + */ + bool maybe_skip_token (TokenId t); + + std::unique_ptr + parse_expr (AST::AttrVec outer_attrs = AST::AttrVec (), + ParseRestrictions restrictions = ParseRestrictions ()); + + std::unique_ptr parse_literal_expr (AST::AttrVec outer_attrs + = AST::AttrVec ()); + + std::unique_ptr + parse_block_expr (AST::AttrVec outer_attrs = AST::AttrVec (), + Location pratt_parsed_loc = Linemap::unknown_location ()); + + std::unique_ptr parse_item (bool called_from_statement); + std::unique_ptr parse_pattern (); + + /** + * Parse a statement + * + * Statement : ';' + * | Item + * | LetStatement + * | ExpressionStatement + * | MacroInvocationSemi + */ + std::unique_ptr parse_stmt (ParseRestrictions restrictions + = ParseRestrictions ()); + std::unique_ptr parse_type (bool save_errors = true); + std::unique_ptr parse_external_item (); + std::unique_ptr parse_trait_item (); + std::unique_ptr parse_inherent_impl_item (); + std::unique_ptr parse_trait_impl_item (); + AST::PathInExpression parse_path_in_expression (); + std::vector > parse_lifetime_params (); + AST::Visibility parse_visibility (); + std::unique_ptr parse_identifier_pattern (); + std::unique_ptr parse_token_tree (); + AST::Attribute parse_attribute_body (); + AST::AttrVec parse_inner_attributes (); + +private: + void skip_after_semicolon (); + void skip_after_end (); + void skip_after_end_block (); + void skip_after_next_block (); + void skip_after_end_attribute (); + + const_TokenPtr expect_token (TokenId t); + void unexpected_token (const_TokenPtr t); + bool skip_generics_right_angle (); + + void parse_statement_seq (bool (Parser::*done) ()); + + // AST-related stuff - maybe move or something? + AST::Attribute parse_inner_attribute (); + AST::AttrVec parse_outer_attributes (); + AST::Attribute parse_outer_attribute (); + std::unique_ptr parse_attr_input (); + AST::Attribute parse_doc_comment (); + + // Path-related + AST::SimplePath parse_simple_path (); + AST::SimplePathSegment parse_simple_path_segment (); + AST::TypePath parse_type_path (); + std::unique_ptr parse_type_path_segment (); + AST::PathIdentSegment parse_path_ident_segment (); + AST::GenericArg parse_generic_arg (); + AST::GenericArgs parse_path_generic_args (); + AST::GenericArgsBinding parse_generic_args_binding (); + AST::TypePathFunction parse_type_path_function (Location locus); + AST::PathExprSegment parse_path_expr_segment (); + AST::QualifiedPathInExpression + // When given a pratt_parsed_loc, use it as the location of the + // first token parsed in the expression (the parsing of that first + // token should be skipped). + parse_qualified_path_in_expression (Location pratt_parsed_loc + = Linemap::unknown_location ()); + AST::QualifiedPathType + parse_qualified_path_type (Location pratt_parsed_loc + = Linemap::unknown_location ()); + AST::QualifiedPathInType parse_qualified_path_in_type (); + + // Token tree or macro related + AST::DelimTokenTree parse_delim_token_tree (); + std::unique_ptr + parse_macro_rules_def (AST::AttrVec outer_attrs); + std::unique_ptr + parse_macro_invocation_semi (AST::AttrVec outer_attrs); + std::unique_ptr + parse_macro_invocation (AST::AttrVec outer_attrs); + AST::MacroRule parse_macro_rule (); + AST::MacroMatcher parse_macro_matcher (); + std::unique_ptr parse_macro_match (); + std::unique_ptr parse_macro_match_fragment (); + std::unique_ptr parse_macro_match_repetition (); + + // Top-level item-related + std::unique_ptr parse_vis_item (AST::AttrVec outer_attrs); + std::unique_ptr parse_macro_item (AST::AttrVec outer_attrs); + + // VisItem subclass-related + std::unique_ptr parse_module (AST::Visibility vis, + AST::AttrVec outer_attrs); + std::unique_ptr + parse_extern_crate (AST::Visibility vis, AST::AttrVec outer_attrs); + std::unique_ptr + parse_use_decl (AST::Visibility vis, AST::AttrVec outer_attrs); + std::unique_ptr parse_use_tree (); + std::unique_ptr parse_function (AST::Visibility vis, + AST::AttrVec outer_attrs); + AST::FunctionQualifiers parse_function_qualifiers (); + std::vector > + parse_generic_params_in_angles (); + template + std::vector > + parse_generic_params (EndTokenPred is_end_token); + template + std::unique_ptr + parse_generic_param (EndTokenPred is_end_token); + + template + std::vector > + parse_lifetime_params (EndTokenPred is_end_token); + std::vector parse_lifetime_params_objs (); + template + std::vector + parse_lifetime_params_objs (EndTokenPred is_end_token); + template + auto parse_non_ptr_sequence ( + ParseFunction parsing_function, EndTokenPred is_end_token, + std::string error_msg = "failed to parse generic param in generic params") + -> std::vector; + AST::LifetimeParam parse_lifetime_param (); + std::vector > parse_type_params (); + template + std::vector > + parse_type_params (EndTokenPred is_end_token); + std::unique_ptr parse_type_param (); + template + std::vector + parse_function_params (EndTokenPred is_end_token); + AST::FunctionParam parse_function_param (); + std::unique_ptr parse_function_return_type (); + AST::WhereClause parse_where_clause (); + std::unique_ptr parse_where_clause_item (); + std::unique_ptr + parse_lifetime_where_clause_item (); + std::unique_ptr + parse_type_bound_where_clause_item (); + std::vector parse_for_lifetimes (); + template + std::vector > + parse_type_param_bounds (EndTokenPred is_end_token); + std::vector > parse_type_param_bounds (); + std::unique_ptr parse_type_param_bound (); + std::unique_ptr parse_trait_bound (); + std::vector parse_lifetime_bounds (); + template + std::vector parse_lifetime_bounds (EndTokenPred is_end_token); + AST::Lifetime parse_lifetime (); + std::unique_ptr parse_type_alias (AST::Visibility vis, + AST::AttrVec outer_attrs); + std::unique_ptr parse_struct (AST::Visibility vis, + AST::AttrVec outer_attrs); + std::vector parse_struct_fields (); + template + std::vector parse_struct_fields (EndTokenPred is_end_token); + AST::StructField parse_struct_field (); + std::vector parse_tuple_fields (); + AST::TupleField parse_tuple_field (); + std::unique_ptr parse_enum (AST::Visibility vis, + AST::AttrVec outer_attrs); + std::vector > parse_enum_items (); + template + std::vector > + parse_enum_items (EndTokenPred is_end_token); + std::unique_ptr parse_enum_item (); + std::unique_ptr parse_union (AST::Visibility vis, + AST::AttrVec outer_attrs); + std::unique_ptr + parse_const_item (AST::Visibility vis, AST::AttrVec outer_attrs); + std::unique_ptr parse_static_item (AST::Visibility vis, + AST::AttrVec outer_attrs); + std::unique_ptr parse_trait (AST::Visibility vis, + AST::AttrVec outer_attrs); + std::unique_ptr + parse_trait_type (AST::AttrVec outer_attrs); + std::unique_ptr + parse_trait_const (AST::AttrVec outer_attrs); + AST::SelfParam parse_self_param (); + std::unique_ptr parse_impl (AST::Visibility vis, + AST::AttrVec outer_attrs); + std::unique_ptr + parse_inherent_impl_function_or_method (AST::Visibility vis, + AST::AttrVec outer_attrs); + std::unique_ptr + parse_trait_impl_function_or_method (AST::Visibility vis, + AST::AttrVec outer_attrs); + std::unique_ptr + parse_extern_block (AST::Visibility vis, AST::AttrVec outer_attrs); + AST::NamedFunctionParam parse_named_function_param (AST::AttrVec outer_attrs + = AST::AttrVec ()); + AST::Method parse_method (); + + // Expression-related (Pratt parsed) + std::unique_ptr + parse_expr (int right_binding_power, + AST::AttrVec outer_attrs = AST::AttrVec (), + ParseRestrictions restrictions = ParseRestrictions ()); + std::unique_ptr + null_denotation (const_TokenPtr t, AST::AttrVec outer_attrs = AST::AttrVec (), + ParseRestrictions restrictions = ParseRestrictions ()); + std::unique_ptr + left_denotation (const_TokenPtr t, std::unique_ptr left, + AST::AttrVec outer_attrs = AST::AttrVec (), + ParseRestrictions restrictions = ParseRestrictions ()); + std::unique_ptr + parse_arithmetic_or_logical_expr ( + const_TokenPtr tok, std::unique_ptr left, + AST::AttrVec outer_attrs, AST::ArithmeticOrLogicalExpr::ExprType expr_type, + ParseRestrictions restrictions = ParseRestrictions ()); + std::unique_ptr + parse_binary_plus_expr (const_TokenPtr tok, std::unique_ptr left, + AST::AttrVec outer_attrs, + ParseRestrictions restrictions + = ParseRestrictions ()); + std::unique_ptr + parse_binary_minus_expr (const_TokenPtr tok, std::unique_ptr left, + AST::AttrVec outer_attrs, + ParseRestrictions restrictions + = ParseRestrictions ()); + std::unique_ptr + parse_binary_mult_expr (const_TokenPtr tok, std::unique_ptr left, + AST::AttrVec outer_attrs, + ParseRestrictions restrictions + = ParseRestrictions ()); + std::unique_ptr + parse_binary_div_expr (const_TokenPtr tok, std::unique_ptr left, + AST::AttrVec outer_attrs, + ParseRestrictions restrictions = ParseRestrictions ()); + std::unique_ptr + parse_binary_mod_expr (const_TokenPtr tok, std::unique_ptr left, + AST::AttrVec outer_attrs, + ParseRestrictions restrictions = ParseRestrictions ()); + std::unique_ptr + parse_bitwise_and_expr (const_TokenPtr tok, std::unique_ptr left, + AST::AttrVec outer_attrs, + ParseRestrictions restrictions + = ParseRestrictions ()); + std::unique_ptr + parse_bitwise_or_expr (const_TokenPtr tok, std::unique_ptr left, + AST::AttrVec outer_attrs, + ParseRestrictions restrictions = ParseRestrictions ()); + std::unique_ptr + parse_bitwise_xor_expr (const_TokenPtr tok, std::unique_ptr left, + AST::AttrVec outer_attrs, + ParseRestrictions restrictions + = ParseRestrictions ()); + std::unique_ptr + parse_left_shift_expr (const_TokenPtr tok, std::unique_ptr left, + AST::AttrVec outer_attrs, + ParseRestrictions restrictions = ParseRestrictions ()); + std::unique_ptr + parse_right_shift_expr (const_TokenPtr tok, std::unique_ptr left, + AST::AttrVec outer_attrs, + ParseRestrictions restrictions + = ParseRestrictions ()); + std::unique_ptr + parse_comparison_expr (const_TokenPtr tok, std::unique_ptr left, + AST::AttrVec outer_attrs, + AST::ComparisonExpr::ExprType expr_type, + ParseRestrictions restrictions = ParseRestrictions ()); + std::unique_ptr + parse_binary_equal_expr (const_TokenPtr tok, std::unique_ptr left, + AST::AttrVec outer_attrs, + ParseRestrictions restrictions + = ParseRestrictions ()); + std::unique_ptr parse_binary_not_equal_expr ( + const_TokenPtr tok, std::unique_ptr left, + AST::AttrVec outer_attrs, + ParseRestrictions restrictions = ParseRestrictions ()); + std::unique_ptr parse_binary_greater_than_expr ( + const_TokenPtr tok, std::unique_ptr left, + AST::AttrVec outer_attrs, + ParseRestrictions restrictions = ParseRestrictions ()); + std::unique_ptr parse_binary_less_than_expr ( + const_TokenPtr tok, std::unique_ptr left, + AST::AttrVec outer_attrs, + ParseRestrictions restrictions = ParseRestrictions ()); + std::unique_ptr parse_binary_greater_equal_expr ( + const_TokenPtr tok, std::unique_ptr left, + AST::AttrVec outer_attrs, + ParseRestrictions restrictions = ParseRestrictions ()); + std::unique_ptr parse_binary_less_equal_expr ( + const_TokenPtr tok, std::unique_ptr left, + AST::AttrVec outer_attrs, + ParseRestrictions restrictions = ParseRestrictions ()); + std::unique_ptr + parse_lazy_or_expr (const_TokenPtr tok, std::unique_ptr left, + AST::AttrVec outer_attrs, + ParseRestrictions restrictions = ParseRestrictions ()); + std::unique_ptr + parse_lazy_and_expr (const_TokenPtr tok, std::unique_ptr left, + AST::AttrVec outer_attrs, + ParseRestrictions restrictions = ParseRestrictions ()); + std::unique_ptr + parse_type_cast_expr (const_TokenPtr tok, + std::unique_ptr expr_to_cast, + AST::AttrVec outer_attrs, + ParseRestrictions restrictions = ParseRestrictions ()); + std::unique_ptr + parse_assig_expr (const_TokenPtr tok, std::unique_ptr left, + AST::AttrVec outer_attrs, + ParseRestrictions restrictions = ParseRestrictions ()); + std::unique_ptr parse_compound_assignment_expr ( + const_TokenPtr tok, std::unique_ptr left, + AST::AttrVec outer_attrs, AST::CompoundAssignmentExpr::ExprType expr_type, + ParseRestrictions restrictions = ParseRestrictions ()); + std::unique_ptr + parse_plus_assig_expr (const_TokenPtr tok, std::unique_ptr left, + AST::AttrVec outer_attrs, + ParseRestrictions restrictions = ParseRestrictions ()); + std::unique_ptr + parse_minus_assig_expr (const_TokenPtr tok, std::unique_ptr left, + AST::AttrVec outer_attrs, + ParseRestrictions restrictions + = ParseRestrictions ()); + std::unique_ptr + parse_mult_assig_expr (const_TokenPtr tok, std::unique_ptr left, + AST::AttrVec outer_attrs, + ParseRestrictions restrictions = ParseRestrictions ()); + std::unique_ptr + parse_div_assig_expr (const_TokenPtr tok, std::unique_ptr left, + AST::AttrVec outer_attrs, + ParseRestrictions restrictions = ParseRestrictions ()); + std::unique_ptr + parse_mod_assig_expr (const_TokenPtr tok, std::unique_ptr left, + AST::AttrVec outer_attrs, + ParseRestrictions restrictions = ParseRestrictions ()); + std::unique_ptr + parse_and_assig_expr (const_TokenPtr tok, std::unique_ptr left, + AST::AttrVec outer_attrs, + ParseRestrictions restrictions = ParseRestrictions ()); + std::unique_ptr + parse_or_assig_expr (const_TokenPtr tok, std::unique_ptr left, + AST::AttrVec outer_attrs, + ParseRestrictions restrictions = ParseRestrictions ()); + std::unique_ptr + parse_xor_assig_expr (const_TokenPtr tok, std::unique_ptr left, + AST::AttrVec outer_attrs, + ParseRestrictions restrictions = ParseRestrictions ()); + std::unique_ptr parse_left_shift_assig_expr ( + const_TokenPtr tok, std::unique_ptr left, + AST::AttrVec outer_attrs, + ParseRestrictions restrictions = ParseRestrictions ()); + std::unique_ptr parse_right_shift_assig_expr ( + const_TokenPtr tok, std::unique_ptr left, + AST::AttrVec outer_attrs, + ParseRestrictions restrictions = ParseRestrictions ()); + std::unique_ptr + parse_await_expr (const_TokenPtr tok, + std::unique_ptr expr_to_await, + AST::AttrVec outer_attrs); + std::unique_ptr parse_method_call_expr ( + const_TokenPtr tok, std::unique_ptr receiver_expr, + AST::AttrVec outer_attrs, + ParseRestrictions restrictions = ParseRestrictions ()); + std::unique_ptr parse_function_call_expr ( + const_TokenPtr tok, std::unique_ptr function_expr, + AST::AttrVec outer_attrs, + ParseRestrictions restrictions = ParseRestrictions ()); + std::unique_ptr parse_led_range_exclusive_expr ( + const_TokenPtr tok, std::unique_ptr left, + AST::AttrVec outer_attrs, + ParseRestrictions restrictions = ParseRestrictions ()); + std::unique_ptr + parse_nud_range_exclusive_expr (const_TokenPtr tok, AST::AttrVec outer_attrs); + std::unique_ptr parse_range_inclusive_expr ( + const_TokenPtr tok, std::unique_ptr left, + AST::AttrVec outer_attrs, + ParseRestrictions restrictions = ParseRestrictions ()); + std::unique_ptr + parse_range_to_inclusive_expr (const_TokenPtr tok, AST::AttrVec outer_attrs); + std::unique_ptr parse_tuple_index_expr ( + const_TokenPtr tok, std::unique_ptr tuple_expr, + AST::AttrVec outer_attrs, + ParseRestrictions restrictions = ParseRestrictions ()); + std::unique_ptr parse_field_access_expr ( + const_TokenPtr tok, std::unique_ptr struct_expr, + AST::AttrVec outer_attrs, + ParseRestrictions restrictions = ParseRestrictions ()); + std::unique_ptr + parse_index_expr (const_TokenPtr tok, std::unique_ptr array_expr, + AST::AttrVec outer_attrs, + ParseRestrictions restrictions = ParseRestrictions ()); + std::unique_ptr parse_macro_invocation_partial ( + AST::PathInExpression path, AST::AttrVec outer_attrs, + ParseRestrictions restrictions = ParseRestrictions ()); + std::unique_ptr + parse_struct_expr_struct_partial (AST::PathInExpression path, + AST::AttrVec outer_attrs); + std::unique_ptr + parse_struct_expr_tuple_partial (AST::PathInExpression path, + AST::AttrVec outer_attrs); + AST::PathInExpression parse_path_in_expression_pratt (const_TokenPtr tok); + std::unique_ptr + parse_closure_expr_pratt (const_TokenPtr tok, + AST::AttrVec outer_attrs = AST::AttrVec ()); + std::unique_ptr parse_tuple_index_expr_float ( + const_TokenPtr tok, std::unique_ptr tuple_expr, + AST::AttrVec outer_attrs, + ParseRestrictions restrictions = ParseRestrictions ()); + + // Expression-related (non-Pratt parsed) + std::unique_ptr + parse_expr_with_block (AST::AttrVec outer_attrs); + std::unique_ptr + parse_expr_without_block (AST::AttrVec outer_attrs = AST::AttrVec (), + ParseRestrictions restrictions + = ParseRestrictions ()); + // When given a pratt_parsed_loc, use it as the location of the + // first token parsed in the expression (the parsing of that first + // token should be skipped). + std::unique_ptr + parse_if_expr (AST::AttrVec outer_attrs = AST::AttrVec (), + Location pratt_parsed_loc = Linemap::unknown_location ()); + std::unique_ptr + parse_if_let_expr (AST::AttrVec outer_attrs = AST::AttrVec (), + Location pratt_parsed_loc = Linemap::unknown_location ()); + std::unique_ptr + parse_loop_expr (AST::AttrVec outer_attrs = AST::AttrVec (), + AST::LoopLabel label = AST::LoopLabel::error (), + Location pratt_parsed_loc = Linemap::unknown_location ()); + std::unique_ptr + parse_while_loop_expr (AST::AttrVec outer_attrs = AST::AttrVec (), + AST::LoopLabel label = AST::LoopLabel::error (), + Location pratt_parsed_loc + = Linemap::unknown_location ()); + std::unique_ptr + parse_while_let_loop_expr (AST::AttrVec outer_attrs = AST::AttrVec (), + AST::LoopLabel label = AST::LoopLabel::error ()); + std::unique_ptr + parse_for_loop_expr (AST::AttrVec outer_attrs = AST::AttrVec (), + AST::LoopLabel label = AST::LoopLabel::error ()); + std::unique_ptr + parse_match_expr (AST::AttrVec outer_attrs = AST::AttrVec (), + Location pratt_parsed_loc = Linemap::unknown_location ()); + AST::MatchArm parse_match_arm (); + std::vector > + parse_match_arm_patterns (TokenId end_token_id); + std::unique_ptr + parse_labelled_loop_expr (AST::AttrVec outer_attrs = AST::AttrVec ()); + AST::LoopLabel parse_loop_label (); + std::unique_ptr + parse_async_block_expr (AST::AttrVec outer_attrs = AST::AttrVec ()); + std::unique_ptr parse_grouped_expr (AST::AttrVec outer_attrs + = AST::AttrVec ()); + std::unique_ptr parse_closure_expr (AST::AttrVec outer_attrs + = AST::AttrVec ()); + AST::ClosureParam parse_closure_param (); + + // When given a pratt_parsed_loc, use it as the location of the + // first token parsed in the expression (the parsing of that first + // token should be skipped). + std::unique_ptr + parse_return_expr (AST::AttrVec outer_attrs = AST::AttrVec (), + Location pratt_parsed_loc = Linemap::unknown_location ()); + std::unique_ptr + parse_break_expr (AST::AttrVec outer_attrs = AST::AttrVec (), + Location pratt_parsed_loc = Linemap::unknown_location ()); + std::unique_ptr + parse_continue_expr (AST::AttrVec outer_attrs = AST::AttrVec (), + Location pratt_parsed_loc + = Linemap::unknown_location ()); + std::unique_ptr + parse_unsafe_block_expr (AST::AttrVec outer_attrs = AST::AttrVec (), + Location pratt_parsed_loc + = Linemap::unknown_location ()); + std::unique_ptr + parse_array_expr (AST::AttrVec outer_attrs = AST::AttrVec (), + Location pratt_parsed_loc = Linemap::unknown_location ()); + std::unique_ptr + parse_grouped_or_tuple_expr (AST::AttrVec outer_attrs = AST::AttrVec (), + Location pratt_parsed_loc + = Linemap::unknown_location ()); + std::unique_ptr parse_struct_expr_field (); + bool will_be_expr_with_block (); + + // Type-related + std::unique_ptr parse_type_no_bounds (); + std::unique_ptr parse_slice_or_array_type (); + std::unique_ptr parse_raw_pointer_type (); + std::unique_ptr parse_reference_type (); + std::unique_ptr + parse_bare_function_type (std::vector for_lifetimes); + std::unique_ptr parse_paren_prefixed_type (); + std::unique_ptr parse_paren_prefixed_type_no_bounds (); + std::unique_ptr parse_for_prefixed_type (); + AST::MaybeNamedParam parse_maybe_named_param (AST::AttrVec outer_attrs); + + // Statement-related + + /** + *Parse a let-statement + * LetStatement : + * OuterAttribute* + * 'let' PatternNoTopAlt ( ':' Type )? ('=' Expression )? ';' + * + * @param allow_no_semi Allow parsing a let-statement without expecting a + * semicolon to follow it + */ + std::unique_ptr parse_let_stmt (AST::AttrVec outer_attrs, + ParseRestrictions restrictions + = ParseRestrictions ()); + std::unique_ptr parse_expr_stmt (AST::AttrVec outer_attrs, + ParseRestrictions restrictions + = ParseRestrictions ()); + std::unique_ptr + parse_expr_stmt_with_block (AST::AttrVec outer_attrs); + std::unique_ptr + parse_expr_stmt_without_block (AST::AttrVec outer_attrs, + ParseRestrictions restrictions + = ParseRestrictions ()); + ExprOrStmt parse_stmt_or_expr_without_block (); + ExprOrStmt parse_stmt_or_expr_with_block (AST::AttrVec outer_attrs); + ExprOrStmt parse_macro_invocation_maybe_semi (AST::AttrVec outer_attrs); + ExprOrStmt parse_path_based_stmt_or_expr (AST::AttrVec outer_attrs); + + // Pattern-related + std::unique_ptr parse_literal_or_range_pattern (); + std::unique_ptr parse_range_pattern_bound (); + std::unique_ptr parse_reference_pattern (); + std::unique_ptr parse_grouped_or_tuple_pattern (); + std::unique_ptr parse_slice_pattern (); + std::unique_ptr parse_ident_leading_pattern (); + std::unique_ptr parse_tuple_struct_items (); + AST::StructPatternElements parse_struct_pattern_elems (); + std::unique_ptr parse_struct_pattern_field (); + std::unique_ptr + parse_struct_pattern_field_partial (AST::AttrVec outer_attrs); + + int left_binding_power (const_TokenPtr token); + + bool done_end (); + bool done_end_or_else (); + bool done_end_of_file (); + + void add_error (Error error) { error_table.push_back (std::move (error)); } + +public: + // Construct parser with specified "managed" token source. + Parser (ManagedTokenSource &tokenSource) : lexer (tokenSource) {} + + // Parse items without parsing an entire crate. This function is the main + // parsing loop of AST::Crate::parse_crate(). + std::vector > parse_items (); + + // Main entry point for parser. + std::unique_ptr parse_crate (); + + // Dumps all lexer output. + void debug_dump_lex_output (std::ostream &out); + void debug_dump_ast_output (AST::Crate &crate, std::ostream &out); + + // Returns whether any parsing errors have occurred. + bool has_errors () const { return !error_table.empty (); } + // Remove all parsing errors from the table + void clear_errors () { error_table.clear (); } + + // Get a reference to the list of errors encountered + std::vector &get_errors () { return error_table; } + + const ManagedTokenSource &get_token_source () const { return lexer; } + + const_TokenPtr peek_current_token () { return lexer.peek_token (0); } + +private: + // The token source (usually lexer) associated with the parser. + ManagedTokenSource &lexer; + // The error list. + std::vector error_table; + // The names of inline modules while parsing. + std::vector inline_module_stack; + + class InlineModuleStackScope + { + private: + Parser &parser; + + public: + InlineModuleStackScope (Parser &parser, std::string name) : parser (parser) + { + parser.inline_module_stack.emplace_back (std::move (name)); + } + ~InlineModuleStackScope () { parser.inline_module_stack.pop_back (); } + }; +}; + +std::string +extract_module_path (const AST::AttrVec &inner_attrs, + const AST::AttrVec &outer_attrs, const std::string &name); + +/** + * Check if a MacroMatch is allowed to follow the last parsed MacroMatch. + * + * @param last_match Last matcher parsed before the current match + * @param match Current matcher to check + * + * @return true if the follow-up is valid, false otherwise + */ +bool +is_match_compatible (const AST::MacroMatch &last_match, + const AST::MacroMatch ¤t_match); +} // namespace Rust + +// as now template, include implementations of all methods +#include "rust-parse-impl.h" + +#endif // RUST_PARSE_H -- 2.25.1