From: Mark Wielaard <mark@klomp.org>
To: gcc-rust@gcc.gnu.org
Cc: Mark Wielaard <mark@klomp.org>
Subject: [PATCH 1/2] Handle shebang line, plus any whitespace and comment skipping in lexer
Date: Mon, 5 Jul 2021 00:09:58 +0200 [thread overview]
Message-ID: <20210704220959.85580-2-mark@klomp.org> (raw)
In-Reply-To: <20210704220959.85580-1-mark@klomp.org>
The lexer tried to handle the shebang line but used loc directly,
instead of the current_column. And it assumed a '/' should immediately
follow the "#!". But if the "#!" is followed by whitespace and/or
comments and a '[' character, then the first line isn't see as a
shebang line (even if the kernel or shell would) but as the start of
an inner attribute.
Add various tests for when the first line starting with "#!" is seen
as a shebang line (and should be skipped). And some tests there is a
'[' character following some whitespace and/or comments and the "#!"
is seen as part of an inner attribute.
---
gcc/rust/lex/rust-lex.cc | 79 ++++++++++++++-----
.../rust/compile/torture/not_shebang.rs | 3 +
| 1 +
| 3 +
| 7 ++
.../compile/torture/not_shebang_spaces.rs | 6 ++
gcc/testsuite/rust/compile/torture/shebang.rs | 3 +
.../rust/compile/torture/shebang_plus_attr.rs | 3 +
.../compile/torture/shebang_plus_attr2.rs | 3 +
9 files changed, 89 insertions(+), 19 deletions(-)
create mode 100644 gcc/testsuite/rust/compile/torture/not_shebang.rs
create mode 100644 gcc/testsuite/rust/compile/torture/not_shebang_block_comment.rs
create mode 100644 gcc/testsuite/rust/compile/torture/not_shebang_comment.rs
create mode 100644 gcc/testsuite/rust/compile/torture/not_shebang_multiline_comment.rs
create mode 100644 gcc/testsuite/rust/compile/torture/not_shebang_spaces.rs
create mode 100755 gcc/testsuite/rust/compile/torture/shebang.rs
create mode 100755 gcc/testsuite/rust/compile/torture/shebang_plus_attr.rs
create mode 100755 gcc/testsuite/rust/compile/torture/shebang_plus_attr2.rs
diff --git a/gcc/rust/lex/rust-lex.cc b/gcc/rust/lex/rust-lex.cc
index d1384168731..ebd69de0fd1 100644
--- a/gcc/rust/lex/rust-lex.cc
+++ b/gcc/rust/lex/rust-lex.cc
@@ -237,28 +237,63 @@ Lexer::build_token ()
current_char = peek_input ();
skip_input ();
- // return end of file token if end of file
- if (current_char == EOF)
- return Token::make (END_OF_FILE, loc);
-
// detect shebang
- if (loc == 1 && current_line == 1 && current_char == '#')
+ // Must be the first thing on the first line, starting with #!
+ // But since an attribute can also start with an #! we don't count it as a
+ // shebang line when after any whitespace or comments there is a [. If it
+ // is a shebang line we simple drop the line. Otherwise we don't consume
+ // any characters and fall through to the real tokenizer.
+ if (current_line == 1 && current_column == 1 && current_char == '#'
+ && peek_input () == '!')
{
- current_char = peek_input ();
-
- if (current_char == '!')
+ int n = 1;
+ while (true)
{
- skip_input ();
- current_char = peek_input ();
-
- if (current_char == '/')
+ int next_char = peek_input (n);
+ if (is_whitespace (next_char))
+ n++;
+ else if (next_char == '/' && peek_input (n + 1) == '/')
{
- // definitely shebang
-
- skip_input ();
-
- // ignore rest of line
- while (current_char != '\n')
+ // A single line comment
+ n += 2;
+ next_char = peek_input (n);
+ while (next_char != '\n' && next_char != EOF)
+ {
+ n++;
+ next_char = peek_input (n);
+ }
+ if (next_char == '\n')
+ n++;
+ }
+ else if (next_char == '/' && peek_input (n + 1) == '*')
+ {
+ // Start of a block comment
+ n += 2;
+ int level = 1;
+ while (level > 0)
+ {
+ if (peek_input (n) == EOF)
+ break;
+ else if (peek_input (n) == '/'
+ && peek_input (n + 1) == '*')
+ {
+ n += 2;
+ level += 1;
+ }
+ else if (peek_input (n) == '*'
+ && peek_input (n + 1) == '/')
+ {
+ n += 2;
+ level -= 1;
+ }
+ else
+ n++;
+ }
+ }
+ else if (next_char != '[')
+ {
+ // definitely shebang, ignore the first line
+ while (current_char != '\n' && current_char != EOF)
{
current_char = peek_input ();
skip_input ();
@@ -269,11 +304,17 @@ Lexer::build_token ()
current_column = 1;
// tell line_table that new line starts
line_map->start_line (current_line, max_column_hint);
- continue;
+ break;
}
+ else
+ break; /* Definitely not a shebang line. */
}
}
+ // return end of file token if end of file
+ if (current_char == EOF)
+ return Token::make (END_OF_FILE, loc);
+
// if not end of file, start tokenising
switch (current_char)
{
diff --git a/gcc/testsuite/rust/compile/torture/not_shebang.rs b/gcc/testsuite/rust/compile/torture/not_shebang.rs
new file mode 100644
index 00000000000..37e01b65940
--- /dev/null
+++ b/gcc/testsuite/rust/compile/torture/not_shebang.rs
@@ -0,0 +1,3 @@
+#!
+[allow(unused)]
+fn main () { }
--git a/gcc/testsuite/rust/compile/torture/not_shebang_block_comment.rs b/gcc/testsuite/rust/compile/torture/not_shebang_block_comment.rs
new file mode 100644
index 00000000000..662f6506749
--- /dev/null
+++ b/gcc/testsuite/rust/compile/torture/not_shebang_block_comment.rs
@@ -0,0 +1 @@
+#!/*/this/is/a/comment*/[allow(unused)] fn main () { }
--git a/gcc/testsuite/rust/compile/torture/not_shebang_comment.rs b/gcc/testsuite/rust/compile/torture/not_shebang_comment.rs
new file mode 100644
index 00000000000..273ae4e8e2a
--- /dev/null
+++ b/gcc/testsuite/rust/compile/torture/not_shebang_comment.rs
@@ -0,0 +1,3 @@
+#!//this/is/a/comment
+[allow(unused)]
+fn main () { }
--git a/gcc/testsuite/rust/compile/torture/not_shebang_multiline_comment.rs b/gcc/testsuite/rust/compile/torture/not_shebang_multiline_comment.rs
new file mode 100644
index 00000000000..86800b14cb3
--- /dev/null
+++ b/gcc/testsuite/rust/compile/torture/not_shebang_multiline_comment.rs
@@ -0,0 +1,7 @@
+#!//this/is/a/comment
+
+/* Also a /* nested */
+ multiline // comment
+ with some more whitespace after, but then finally a [, so not a real #! line. */
+
+[allow(unused)] fn main () { }
diff --git a/gcc/testsuite/rust/compile/torture/not_shebang_spaces.rs b/gcc/testsuite/rust/compile/torture/not_shebang_spaces.rs
new file mode 100644
index 00000000000..6b94a69111a
--- /dev/null
+++ b/gcc/testsuite/rust/compile/torture/not_shebang_spaces.rs
@@ -0,0 +1,6 @@
+#!
+
+ [allow(unused)]
+
+ fn main () { }
+
diff --git a/gcc/testsuite/rust/compile/torture/shebang.rs b/gcc/testsuite/rust/compile/torture/shebang.rs
new file mode 100755
index 00000000000..1c8b9c9a955
--- /dev/null
+++ b/gcc/testsuite/rust/compile/torture/shebang.rs
@@ -0,0 +1,3 @@
+#!/usr/bin/env cat
+
+fn main () { }
diff --git a/gcc/testsuite/rust/compile/torture/shebang_plus_attr.rs b/gcc/testsuite/rust/compile/torture/shebang_plus_attr.rs
new file mode 100755
index 00000000000..075bc6cf594
--- /dev/null
+++ b/gcc/testsuite/rust/compile/torture/shebang_plus_attr.rs
@@ -0,0 +1,3 @@
+#!/usr/bin/env cat
+#![allow(unused)]
+fn main () { }
diff --git a/gcc/testsuite/rust/compile/torture/shebang_plus_attr2.rs b/gcc/testsuite/rust/compile/torture/shebang_plus_attr2.rs
new file mode 100755
index 00000000000..ece8a52381c
--- /dev/null
+++ b/gcc/testsuite/rust/compile/torture/shebang_plus_attr2.rs
@@ -0,0 +1,3 @@
+#!//usr/bin/env cat
+#![allow(unused)]
+fn main () { }
--
2.32.0
next prev parent reply other threads:[~2021-07-04 22:10 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-07-04 22:09 shebang handling Mark Wielaard
2021-07-04 22:09 ` Mark Wielaard [this message]
2021-07-04 22:09 ` [PATCH 2/2] Remove has_shebang flag from AST and HIR Crate classes Mark Wielaard
2021-07-05 5:46 ` shebang handling Marc
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210704220959.85580-2-mark@klomp.org \
--to=mark@klomp.org \
--cc=gcc-rust@gcc.gnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).