public inbox for binutils@sourceware.org
 help / color / mirror / Atom feed
* [PATCH] aarch64: Check for register aliases before mnemonics
@ 2021-11-30 12:16 Richard Sandiford
  2021-11-30 12:25 ` Richard Earnshaw
  0 siblings, 1 reply; 2+ messages in thread
From: Richard Sandiford @ 2021-11-30 12:16 UTC (permalink / raw)
  To: binutils; +Cc: rearnsha

Previously we would not accept:

	A .req B

if A happened to be the name of an instruction.  Adding new
instructions could therefore invalidate existing register aliases.

I noticed this with a test that used "zero" as a register alias
for "xzr", where "zero" is now also the name of an SME instruction.
I don't have any evidence that "real" code is doing this, but it
seems at least plausible.

This patch switches things so that we check for register aliases
first.  It might slow down parsing slightly, but the difference
is unlikely to be noticeable.

Things like:

	b	.req + 0

still work, since create_register_alias checks for " .req ",
and with the input scrubber, we'll only keep whitespace after
.req if it's followed by another name.  If there's some valid
expression that I haven't thought about that is scrubbed to
" .req ", users could avoid the ambiguity by wrapping .req
in parentheses.

The new test for invalid aliases already passed.  I just wanted
something to exercise the !dot condition.

I can't find a way of exercising the (existing) p == base condition,
but I'm not brave enough to say that it can never happen.  If it does
happen, get_mnemonic_name would return an empty string.

Tested on aarch64-linux-gnu.  OK to install?

Richard


gas/
	* config/tc-aarch64.c (opcode_lookup): Move mnemonic extraction
	code to...
	(md_assemble): ...here.  Check for register aliases first.
	* testsuite/gas/aarch64/register_aliases.d,
	testsuite/gas/aarch64/register_aliases.s: Test for a register
	alias called "zero".
	* testsuite/gas/aarch64/register_aliases_invalid.d,
	testsuite/gas/aarch64/register_aliases_invalid.l,
	testsuite/gas/aarch64/register_aliases_invalid.s: New test.
---
 gas/config/tc-aarch64.c                       | 62 +++++++++----------
 gas/testsuite/gas/aarch64/register_aliases.d  |  1 +
 gas/testsuite/gas/aarch64/register_aliases.s  |  3 +-
 .../gas/aarch64/register_aliases_invalid.d    |  1 +
 .../gas/aarch64/register_aliases_invalid.l    |  3 +
 .../gas/aarch64/register_aliases_invalid.s    |  2 +
 6 files changed, 38 insertions(+), 34 deletions(-)
 create mode 100644 gas/testsuite/gas/aarch64/register_aliases_invalid.d
 create mode 100644 gas/testsuite/gas/aarch64/register_aliases_invalid.l
 create mode 100644 gas/testsuite/gas/aarch64/register_aliases_invalid.s

diff --git a/gas/config/tc-aarch64.c b/gas/config/tc-aarch64.c
index 9fc61f70c61..b6ed80e6d3a 100644
--- a/gas/config/tc-aarch64.c
+++ b/gas/config/tc-aarch64.c
@@ -5738,25 +5738,18 @@ lookup_mnemonic (const char *start, int len)
 }
 
 /* Subroutine of md_assemble, responsible for looking up the primary
-   opcode from the mnemonic the user wrote.  STR points to the
-   beginning of the mnemonic. */
+   opcode from the mnemonic the user wrote.  BASE points to the beginning
+   of the mnemonic, DOT points to the first '.' within the mnemonic
+   (if any) and END points to the end of the mnemonic.  */
 
 static templates *
-opcode_lookup (char **str)
+opcode_lookup (char *base, char *dot, char *end)
 {
-  char *end, *base, *dot;
   const aarch64_cond *cond;
   char condname[16];
   int len;
 
-  /* Scan up to the end of the mnemonic, which must end in white space,
-     '.', or end of string.  */
-  dot = 0;
-  for (base = end = *str; is_part_of_name(*end); end++)
-    if (*end == '.' && !dot)
-      dot = end;
-
-  if (end == base || dot == base)
+  if (dot == end)
     return 0;
 
   inst.cond = COND_ALWAYS;
@@ -5765,23 +5758,13 @@ opcode_lookup (char **str)
   if (dot)
     {
       cond = str_hash_find_n (aarch64_cond_hsh, dot + 1, end - dot - 1);
-      if (cond)
-	{
-	  inst.cond = cond->value;
-	  *str = end;
-	}
-      else
-	{
-	  *str = dot;
-	  return 0;
-	}
+      if (!cond)
+	return 0;
+      inst.cond = cond->value;
       len = dot - base;
     }
   else
-    {
-      *str = end;
-      len = end - base;
-    }
+    len = end - base;
 
   if (inst.cond == COND_ALWAYS)
     {
@@ -7870,7 +7853,6 @@ dump_opcode_operands (const aarch64_opcode *opcode)
 void
 md_assemble (char *str)
 {
-  char *p = str;
   templates *template;
   const aarch64_opcode *opcode;
   aarch64_inst *inst_base;
@@ -7893,14 +7875,28 @@ md_assemble (char *str)
   DEBUG_TRACE ("==============================");
   DEBUG_TRACE ("Enter md_assemble with %s", str);
 
-  template = opcode_lookup (&p);
+  /* Scan up to the end of the mnemonic, which must end in whitespace,
+     '.', or end of string.  */
+  char *p = str;
+  char *dot = 0;
+  for (; is_part_of_name (*p); p++)
+    if (*p == '.' && !dot)
+      dot = p;
+
+  if (p == str)
+    {
+      as_bad (_("unknown mnemonic -- `%s'"), str);
+      return;
+    }
+
+  if (!dot && create_register_alias (str, p))
+    return;
+
+  template = opcode_lookup (str, dot, p);
   if (!template)
     {
-      /* It wasn't an instruction, but it might be a register alias of
-         the form alias .req reg directive.  */
-      if (!create_register_alias (str, p))
-	as_bad (_("unknown mnemonic `%s' -- `%s'"), get_mnemonic_name (str),
-		str);
+      as_bad (_("unknown mnemonic `%s' -- `%s'"), get_mnemonic_name (str),
+	      str);
       return;
     }
 
diff --git a/gas/testsuite/gas/aarch64/register_aliases.d b/gas/testsuite/gas/aarch64/register_aliases.d
index eab63870dee..8d614b47606 100644
--- a/gas/testsuite/gas/aarch64/register_aliases.d
+++ b/gas/testsuite/gas/aarch64/register_aliases.d
@@ -10,3 +10,4 @@ Disassembly of section \.text:
    8:	f94003b1 	ldr	x17, \[x29\]
    c:	f90003b0 	str	x16, \[x29\]
   10:	f94003b1 	ldr	x17, \[x29\]
+  14:	f900001f 	str	xzr, \[x0\]
diff --git a/gas/testsuite/gas/aarch64/register_aliases.s b/gas/testsuite/gas/aarch64/register_aliases.s
index fcd065078bb..856be5699ce 100644
--- a/gas/testsuite/gas/aarch64/register_aliases.s
+++ b/gas/testsuite/gas/aarch64/register_aliases.s
@@ -3,9 +3,10 @@
 	fp 	.req 	x29
 	ip0	.req 	x16
 	ip1	.req 	x17
+	zero	.req	xzr
 	add	ip0, ip0, lr
 	str 	ip0, [fp]
 	ldr	ip1, [fp]
 	str 	IP0, [fp]
 	ldr	IP1, [fp]
-
+	str	zero, [x0]
diff --git a/gas/testsuite/gas/aarch64/register_aliases_invalid.d b/gas/testsuite/gas/aarch64/register_aliases_invalid.d
new file mode 100644
index 00000000000..7c453ce02b8
--- /dev/null
+++ b/gas/testsuite/gas/aarch64/register_aliases_invalid.d
@@ -0,0 +1 @@
+#error_output: register_aliases_invalid.l
diff --git a/gas/testsuite/gas/aarch64/register_aliases_invalid.l b/gas/testsuite/gas/aarch64/register_aliases_invalid.l
new file mode 100644
index 00000000000..6350049df74
--- /dev/null
+++ b/gas/testsuite/gas/aarch64/register_aliases_invalid.l
@@ -0,0 +1,3 @@
+.*:
+.*: Error: unknown mnemonic `lr\.req' -- `lr\.req x30'
+.*: Error: unknown mnemonic `lr\.a' -- `lr\.a .req x30'
diff --git a/gas/testsuite/gas/aarch64/register_aliases_invalid.s b/gas/testsuite/gas/aarch64/register_aliases_invalid.s
new file mode 100644
index 00000000000..2df2eaab4d6
--- /dev/null
+++ b/gas/testsuite/gas/aarch64/register_aliases_invalid.s
@@ -0,0 +1,2 @@
+lr.req 	x30
+lr.a	.req	x30
-- 
2.25.1


^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH] aarch64: Check for register aliases before mnemonics
  2021-11-30 12:16 [PATCH] aarch64: Check for register aliases before mnemonics Richard Sandiford
@ 2021-11-30 12:25 ` Richard Earnshaw
  0 siblings, 0 replies; 2+ messages in thread
From: Richard Earnshaw @ 2021-11-30 12:25 UTC (permalink / raw)
  To: binutils, rearnsha, richard.sandiford



On 30/11/2021 12:16, Richard Sandiford via Binutils wrote:
> Previously we would not accept:
> 
> 	A .req B
> 
> if A happened to be the name of an instruction.  Adding new
> instructions could therefore invalidate existing register aliases.
> 
> I noticed this with a test that used "zero" as a register alias
> for "xzr", where "zero" is now also the name of an SME instruction.
> I don't have any evidence that "real" code is doing this, but it
> seems at least plausible.
> 
> This patch switches things so that we check for register aliases
> first.  It might slow down parsing slightly, but the difference
> is unlikely to be noticeable.
> 
> Things like:
> 
> 	b	.req + 0
> 
> still work, since create_register_alias checks for " .req ",
> and with the input scrubber, we'll only keep whitespace after
> .req if it's followed by another name.  If there's some valid
> expression that I haven't thought about that is scrubbed to
> " .req ", users could avoid the ambiguity by wrapping .req
> in parentheses.
> 
> The new test for invalid aliases already passed.  I just wanted
> something to exercise the !dot condition.
> 
> I can't find a way of exercising the (existing) p == base condition,
> but I'm not brave enough to say that it can never happen.  If it does
> happen, get_mnemonic_name would return an empty string.
> 

We inherited the `a .req b` syntax from the aarch32 assembler, which in 
turn inherited it from the syntax supported by the original Acorn 
assembler for RISC iX.  However, it's always been a bit of an anomaly.

I wonder if it would be wise to add a more conventional directive

   .req a b

and then start the process of deprecating the old form?

> Tested on aarch64-linux-gnu.  OK to install?

OK.

R.

> 
> Richard
> 
> 
> gas/
> 	* config/tc-aarch64.c (opcode_lookup): Move mnemonic extraction
> 	code to...
> 	(md_assemble): ...here.  Check for register aliases first.
> 	* testsuite/gas/aarch64/register_aliases.d,
> 	testsuite/gas/aarch64/register_aliases.s: Test for a register
> 	alias called "zero".
> 	* testsuite/gas/aarch64/register_aliases_invalid.d,
> 	testsuite/gas/aarch64/register_aliases_invalid.l,
> 	testsuite/gas/aarch64/register_aliases_invalid.s: New test.
> ---
>   gas/config/tc-aarch64.c                       | 62 +++++++++----------
>   gas/testsuite/gas/aarch64/register_aliases.d  |  1 +
>   gas/testsuite/gas/aarch64/register_aliases.s  |  3 +-
>   .../gas/aarch64/register_aliases_invalid.d    |  1 +
>   .../gas/aarch64/register_aliases_invalid.l    |  3 +
>   .../gas/aarch64/register_aliases_invalid.s    |  2 +
>   6 files changed, 38 insertions(+), 34 deletions(-)
>   create mode 100644 gas/testsuite/gas/aarch64/register_aliases_invalid.d
>   create mode 100644 gas/testsuite/gas/aarch64/register_aliases_invalid.l
>   create mode 100644 gas/testsuite/gas/aarch64/register_aliases_invalid.s
> 
> diff --git a/gas/config/tc-aarch64.c b/gas/config/tc-aarch64.c
> index 9fc61f70c61..b6ed80e6d3a 100644
> --- a/gas/config/tc-aarch64.c
> +++ b/gas/config/tc-aarch64.c
> @@ -5738,25 +5738,18 @@ lookup_mnemonic (const char *start, int len)
>   }
>   
>   /* Subroutine of md_assemble, responsible for looking up the primary
> -   opcode from the mnemonic the user wrote.  STR points to the
> -   beginning of the mnemonic. */
> +   opcode from the mnemonic the user wrote.  BASE points to the beginning
> +   of the mnemonic, DOT points to the first '.' within the mnemonic
> +   (if any) and END points to the end of the mnemonic.  */
>   
>   static templates *
> -opcode_lookup (char **str)
> +opcode_lookup (char *base, char *dot, char *end)
>   {
> -  char *end, *base, *dot;
>     const aarch64_cond *cond;
>     char condname[16];
>     int len;
>   
> -  /* Scan up to the end of the mnemonic, which must end in white space,
> -     '.', or end of string.  */
> -  dot = 0;
> -  for (base = end = *str; is_part_of_name(*end); end++)
> -    if (*end == '.' && !dot)
> -      dot = end;
> -
> -  if (end == base || dot == base)
> +  if (dot == end)
>       return 0;
>   
>     inst.cond = COND_ALWAYS;
> @@ -5765,23 +5758,13 @@ opcode_lookup (char **str)
>     if (dot)
>       {
>         cond = str_hash_find_n (aarch64_cond_hsh, dot + 1, end - dot - 1);
> -      if (cond)
> -	{
> -	  inst.cond = cond->value;
> -	  *str = end;
> -	}
> -      else
> -	{
> -	  *str = dot;
> -	  return 0;
> -	}
> +      if (!cond)
> +	return 0;
> +      inst.cond = cond->value;
>         len = dot - base;
>       }
>     else
> -    {
> -      *str = end;
> -      len = end - base;
> -    }
> +    len = end - base;
>   
>     if (inst.cond == COND_ALWAYS)
>       {
> @@ -7870,7 +7853,6 @@ dump_opcode_operands (const aarch64_opcode *opcode)
>   void
>   md_assemble (char *str)
>   {
> -  char *p = str;
>     templates *template;
>     const aarch64_opcode *opcode;
>     aarch64_inst *inst_base;
> @@ -7893,14 +7875,28 @@ md_assemble (char *str)
>     DEBUG_TRACE ("==============================");
>     DEBUG_TRACE ("Enter md_assemble with %s", str);
>   
> -  template = opcode_lookup (&p);
> +  /* Scan up to the end of the mnemonic, which must end in whitespace,
> +     '.', or end of string.  */
> +  char *p = str;
> +  char *dot = 0;
> +  for (; is_part_of_name (*p); p++)
> +    if (*p == '.' && !dot)
> +      dot = p;
> +
> +  if (p == str)
> +    {
> +      as_bad (_("unknown mnemonic -- `%s'"), str);
> +      return;
> +    }
> +
> +  if (!dot && create_register_alias (str, p))
> +    return;
> +
> +  template = opcode_lookup (str, dot, p);
>     if (!template)
>       {
> -      /* It wasn't an instruction, but it might be a register alias of
> -         the form alias .req reg directive.  */
> -      if (!create_register_alias (str, p))
> -	as_bad (_("unknown mnemonic `%s' -- `%s'"), get_mnemonic_name (str),
> -		str);
> +      as_bad (_("unknown mnemonic `%s' -- `%s'"), get_mnemonic_name (str),
> +	      str);
>         return;
>       }
>   
> diff --git a/gas/testsuite/gas/aarch64/register_aliases.d b/gas/testsuite/gas/aarch64/register_aliases.d
> index eab63870dee..8d614b47606 100644
> --- a/gas/testsuite/gas/aarch64/register_aliases.d
> +++ b/gas/testsuite/gas/aarch64/register_aliases.d
> @@ -10,3 +10,4 @@ Disassembly of section \.text:
>      8:	f94003b1 	ldr	x17, \[x29\]
>      c:	f90003b0 	str	x16, \[x29\]
>     10:	f94003b1 	ldr	x17, \[x29\]
> +  14:	f900001f 	str	xzr, \[x0\]
> diff --git a/gas/testsuite/gas/aarch64/register_aliases.s b/gas/testsuite/gas/aarch64/register_aliases.s
> index fcd065078bb..856be5699ce 100644
> --- a/gas/testsuite/gas/aarch64/register_aliases.s
> +++ b/gas/testsuite/gas/aarch64/register_aliases.s
> @@ -3,9 +3,10 @@
>   	fp 	.req 	x29
>   	ip0	.req 	x16
>   	ip1	.req 	x17
> +	zero	.req	xzr
>   	add	ip0, ip0, lr
>   	str 	ip0, [fp]
>   	ldr	ip1, [fp]
>   	str 	IP0, [fp]
>   	ldr	IP1, [fp]
> -
> +	str	zero, [x0]
> diff --git a/gas/testsuite/gas/aarch64/register_aliases_invalid.d b/gas/testsuite/gas/aarch64/register_aliases_invalid.d
> new file mode 100644
> index 00000000000..7c453ce02b8
> --- /dev/null
> +++ b/gas/testsuite/gas/aarch64/register_aliases_invalid.d
> @@ -0,0 +1 @@
> +#error_output: register_aliases_invalid.l
> diff --git a/gas/testsuite/gas/aarch64/register_aliases_invalid.l b/gas/testsuite/gas/aarch64/register_aliases_invalid.l
> new file mode 100644
> index 00000000000..6350049df74
> --- /dev/null
> +++ b/gas/testsuite/gas/aarch64/register_aliases_invalid.l
> @@ -0,0 +1,3 @@
> +.*:
> +.*: Error: unknown mnemonic `lr\.req' -- `lr\.req x30'
> +.*: Error: unknown mnemonic `lr\.a' -- `lr\.a .req x30'
> diff --git a/gas/testsuite/gas/aarch64/register_aliases_invalid.s b/gas/testsuite/gas/aarch64/register_aliases_invalid.s
> new file mode 100644
> index 00000000000..2df2eaab4d6
> --- /dev/null
> +++ b/gas/testsuite/gas/aarch64/register_aliases_invalid.s
> @@ -0,0 +1,2 @@
> +lr.req 	x30
> +lr.a	.req	x30
> 

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2021-11-30 12:25 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-11-30 12:16 [PATCH] aarch64: Check for register aliases before mnemonics Richard Sandiford
2021-11-30 12:25 ` Richard Earnshaw

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).