From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from gnu.wildebeest.org (wildebeest.demon.nl [212.238.236.112]) by sourceware.org (Postfix) with ESMTPS id E01D03857C48 for ; Sun, 4 Jul 2021 22:10:18 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org E01D03857C48 Authentication-Results: sourceware.org; dmarc=none (p=none dis=none) header.from=klomp.org Authentication-Results: sourceware.org; spf=pass smtp.mailfrom=klomp.org Received: from reform (deer0x01.wildebeest.org [172.31.17.131]) (using TLSv1.2 with cipher ADH-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by gnu.wildebeest.org (Postfix) with ESMTPSA id 2A1C5302FBA6; Mon, 5 Jul 2021 00:10:17 +0200 (CEST) Received: by reform (Postfix, from userid 1000) id 123142E80FB4; Mon, 5 Jul 2021 00:10:17 +0200 (CEST) From: Mark Wielaard To: gcc-rust@gcc.gnu.org Cc: Mark Wielaard Subject: [PATCH 1/2] Handle shebang line, plus any whitespace and comment skipping in lexer Date: Mon, 5 Jul 2021 00:09:58 +0200 Message-Id: <20210704220959.85580-2-mark@klomp.org> X-Mailer: git-send-email 2.32.0 In-Reply-To: <20210704220959.85580-1-mark@klomp.org> References: <20210704220959.85580-1-mark@klomp.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Spam-Status: No, score=-10.8 required=5.0 tests=BAYES_00, GIT_PATCH_0, JMQ_SPF_NEUTRAL, KAM_DMARC_STATUS, SPF_HELO_NONE, SPF_PASS, TXREP autolearn=ham autolearn_force=no version=3.4.4 X-Spam-Checker-Version: SpamAssassin 3.4.4 (2020-01-24) on server2.sourceware.org X-BeenThere: gcc-rust@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: gcc-rust mailing list List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sun, 04 Jul 2021 22:10:20 -0000 The lexer tried to handle the shebang line but used loc directly, instead of the current_column. And it assumed a '/' should immediately follow the "#!". But if the "#!" is followed by whitespace and/or comments and a '[' character, then the first line isn't see as a shebang line (even if the kernel or shell would) but as the start of an inner attribute. Add various tests for when the first line starting with "#!" is seen as a shebang line (and should be skipped). And some tests there is a '[' character following some whitespace and/or comments and the "#!" is seen as part of an inner attribute. --- gcc/rust/lex/rust-lex.cc | 79 ++++++++++++++----- .../rust/compile/torture/not_shebang.rs | 3 + .../torture/not_shebang_block_comment.rs | 1 + .../compile/torture/not_shebang_comment.rs | 3 + .../torture/not_shebang_multiline_comment.rs | 7 ++ .../compile/torture/not_shebang_spaces.rs | 6 ++ gcc/testsuite/rust/compile/torture/shebang.rs | 3 + .../rust/compile/torture/shebang_plus_attr.rs | 3 + .../compile/torture/shebang_plus_attr2.rs | 3 + 9 files changed, 89 insertions(+), 19 deletions(-) create mode 100644 gcc/testsuite/rust/compile/torture/not_shebang.rs create mode 100644 gcc/testsuite/rust/compile/torture/not_shebang_block_comment.rs create mode 100644 gcc/testsuite/rust/compile/torture/not_shebang_comment.rs create mode 100644 gcc/testsuite/rust/compile/torture/not_shebang_multiline_comment.rs create mode 100644 gcc/testsuite/rust/compile/torture/not_shebang_spaces.rs create mode 100755 gcc/testsuite/rust/compile/torture/shebang.rs create mode 100755 gcc/testsuite/rust/compile/torture/shebang_plus_attr.rs create mode 100755 gcc/testsuite/rust/compile/torture/shebang_plus_attr2.rs diff --git a/gcc/rust/lex/rust-lex.cc b/gcc/rust/lex/rust-lex.cc index d1384168731..ebd69de0fd1 100644 --- a/gcc/rust/lex/rust-lex.cc +++ b/gcc/rust/lex/rust-lex.cc @@ -237,28 +237,63 @@ Lexer::build_token () current_char = peek_input (); skip_input (); - // return end of file token if end of file - if (current_char == EOF) - return Token::make (END_OF_FILE, loc); - // detect shebang - if (loc == 1 && current_line == 1 && current_char == '#') + // Must be the first thing on the first line, starting with #! + // But since an attribute can also start with an #! we don't count it as a + // shebang line when after any whitespace or comments there is a [. If it + // is a shebang line we simple drop the line. Otherwise we don't consume + // any characters and fall through to the real tokenizer. + if (current_line == 1 && current_column == 1 && current_char == '#' + && peek_input () == '!') { - current_char = peek_input (); - - if (current_char == '!') + int n = 1; + while (true) { - skip_input (); - current_char = peek_input (); - - if (current_char == '/') + int next_char = peek_input (n); + if (is_whitespace (next_char)) + n++; + else if (next_char == '/' && peek_input (n + 1) == '/') { - // definitely shebang - - skip_input (); - - // ignore rest of line - while (current_char != '\n') + // A single line comment + n += 2; + next_char = peek_input (n); + while (next_char != '\n' && next_char != EOF) + { + n++; + next_char = peek_input (n); + } + if (next_char == '\n') + n++; + } + else if (next_char == '/' && peek_input (n + 1) == '*') + { + // Start of a block comment + n += 2; + int level = 1; + while (level > 0) + { + if (peek_input (n) == EOF) + break; + else if (peek_input (n) == '/' + && peek_input (n + 1) == '*') + { + n += 2; + level += 1; + } + else if (peek_input (n) == '*' + && peek_input (n + 1) == '/') + { + n += 2; + level -= 1; + } + else + n++; + } + } + else if (next_char != '[') + { + // definitely shebang, ignore the first line + while (current_char != '\n' && current_char != EOF) { current_char = peek_input (); skip_input (); @@ -269,11 +304,17 @@ Lexer::build_token () current_column = 1; // tell line_table that new line starts line_map->start_line (current_line, max_column_hint); - continue; + break; } + else + break; /* Definitely not a shebang line. */ } } + // return end of file token if end of file + if (current_char == EOF) + return Token::make (END_OF_FILE, loc); + // if not end of file, start tokenising switch (current_char) { diff --git a/gcc/testsuite/rust/compile/torture/not_shebang.rs b/gcc/testsuite/rust/compile/torture/not_shebang.rs new file mode 100644 index 00000000000..37e01b65940 --- /dev/null +++ b/gcc/testsuite/rust/compile/torture/not_shebang.rs @@ -0,0 +1,3 @@ +#! +[allow(unused)] +fn main () { } diff --git a/gcc/testsuite/rust/compile/torture/not_shebang_block_comment.rs b/gcc/testsuite/rust/compile/torture/not_shebang_block_comment.rs new file mode 100644 index 00000000000..662f6506749 --- /dev/null +++ b/gcc/testsuite/rust/compile/torture/not_shebang_block_comment.rs @@ -0,0 +1 @@ +#!/*/this/is/a/comment*/[allow(unused)] fn main () { } diff --git a/gcc/testsuite/rust/compile/torture/not_shebang_comment.rs b/gcc/testsuite/rust/compile/torture/not_shebang_comment.rs new file mode 100644 index 00000000000..273ae4e8e2a --- /dev/null +++ b/gcc/testsuite/rust/compile/torture/not_shebang_comment.rs @@ -0,0 +1,3 @@ +#!//this/is/a/comment +[allow(unused)] +fn main () { } diff --git a/gcc/testsuite/rust/compile/torture/not_shebang_multiline_comment.rs b/gcc/testsuite/rust/compile/torture/not_shebang_multiline_comment.rs new file mode 100644 index 00000000000..86800b14cb3 --- /dev/null +++ b/gcc/testsuite/rust/compile/torture/not_shebang_multiline_comment.rs @@ -0,0 +1,7 @@ +#!//this/is/a/comment + +/* Also a /* nested */ + multiline // comment + with some more whitespace after, but then finally a [, so not a real #! line. */ + +[allow(unused)] fn main () { } diff --git a/gcc/testsuite/rust/compile/torture/not_shebang_spaces.rs b/gcc/testsuite/rust/compile/torture/not_shebang_spaces.rs new file mode 100644 index 00000000000..6b94a69111a --- /dev/null +++ b/gcc/testsuite/rust/compile/torture/not_shebang_spaces.rs @@ -0,0 +1,6 @@ +#! + + [allow(unused)] + + fn main () { } + diff --git a/gcc/testsuite/rust/compile/torture/shebang.rs b/gcc/testsuite/rust/compile/torture/shebang.rs new file mode 100755 index 00000000000..1c8b9c9a955 --- /dev/null +++ b/gcc/testsuite/rust/compile/torture/shebang.rs @@ -0,0 +1,3 @@ +#!/usr/bin/env cat + +fn main () { } diff --git a/gcc/testsuite/rust/compile/torture/shebang_plus_attr.rs b/gcc/testsuite/rust/compile/torture/shebang_plus_attr.rs new file mode 100755 index 00000000000..075bc6cf594 --- /dev/null +++ b/gcc/testsuite/rust/compile/torture/shebang_plus_attr.rs @@ -0,0 +1,3 @@ +#!/usr/bin/env cat +#![allow(unused)] +fn main () { } diff --git a/gcc/testsuite/rust/compile/torture/shebang_plus_attr2.rs b/gcc/testsuite/rust/compile/torture/shebang_plus_attr2.rs new file mode 100755 index 00000000000..ece8a52381c --- /dev/null +++ b/gcc/testsuite/rust/compile/torture/shebang_plus_attr2.rs @@ -0,0 +1,3 @@ +#!//usr/bin/env cat +#![allow(unused)] +fn main () { } -- 2.32.0