public inbox for binutils-cvs@sourceware.org
 help / color / mirror / Atom feed
* [binutils-gdb] x86: accept whitespace around prefix separator
@ 2024-07-26  6:01 Jan Beulich
  0 siblings, 0 replies; only message in thread
From: Jan Beulich @ 2024-07-26  6:01 UTC (permalink / raw)
  To: binutils-cvs

https://sourceware.org/git/gitweb.cgi?p=binutils-gdb.git;h=c97f0d71ea9cc5c3bf63aacef34b5ebc95dd3b9a

commit c97f0d71ea9cc5c3bf63aacef34b5ebc95dd3b9a
Author: Jan Beulich <jbeulich@suse.com>
Date:   Fri Jul 26 07:59:53 2024 +0200

    x86: accept whitespace around prefix separator
    
    ... and prediction suffix comma. Other than documented /**/ comments
    currently aren't really converted to a single space, at least not for
    x86 in its most common configurations. That'll be fixed subsequently, at
    which point blanks may appear where so far none were expected.
    Furthermore not permitting blanks around these separators wasn't quite
    logical anyway - such constructs are composite ones, and hence
    components ought to have been permitted to be separated by whitespace
    from the very beginning. Furthermore note how, due to the scrubber being
    overly aggressive in removing whitespace, some similar construct with a
    prefix were already accepted.
    
    Note how certain other checks in parse_insn() can be simplified as a
    result.
    
    While there for the prediction suffix also make checks case-insensitive
    and check for a proper trailing separator.

Diff:
---
 gas/config/tc-i386.c               | 49 +++++++++++++++++++++++---------------
 gas/testsuite/gas/i386/i386.exp    |  1 +
 gas/testsuite/gas/i386/separator.d | 27 +++++++++++++++++++++
 gas/testsuite/gas/i386/separator.s | 41 +++++++++++++++++++++++++++++++
 4 files changed, 99 insertions(+), 19 deletions(-)

diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c
index 109fb7eb84c..68c35fbdd33 100644
--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -7957,6 +7957,8 @@ parse_insn (const char *line, char *mnemonic, enum parse_mode mode)
 
   while (1)
     {
+      const char *split;
+
       mnem_p = mnemonic;
       /* Pseudo-prefixes start with an opening figure brace.  */
       if ((*mnem_p = *l) == '{')
@@ -7981,9 +7983,10 @@ parse_insn (const char *line, char *mnemonic, enum parse_mode mode)
 	    }
 	  l++;
 	}
-      /* Pseudo-prefixes end with a closing figure brace.  */
-      if (*mnemonic == '{' && is_space_char (*l))
+      split = l;
+      if (is_space_char (*l))
 	++l;
+      /* Pseudo-prefixes end with a closing figure brace.  */
       if (*mnemonic == '{' && *l == '}')
 	{
 	  *mnem_p++ = *l++;
@@ -7991,12 +7994,10 @@ parse_insn (const char *line, char *mnemonic, enum parse_mode mode)
 	    goto too_long;
 	  *mnem_p = '\0';
 
-	  /* Point l at the closing brace if there's no other separator.  */
-	  if (*l != END_OF_INSN && !is_space_char (*l)
-	      && *l != PREFIX_SEPARATOR)
-	    --l;
+	  if (is_space_char (*l))
+	    ++l;
 	}
-      else if (!is_space_char (*l)
+      else if (l == split
 	       && *l != END_OF_INSN
 	       && (intel_syntax
 		   || (*l != PREFIX_SEPARATOR && *l != ',')))
@@ -8004,7 +8005,7 @@ parse_insn (const char *line, char *mnemonic, enum parse_mode mode)
 	  if (mode != parse_all)
 	    break;
 	  as_bad (_("invalid character %s in mnemonic"),
-		  output_invalid (*l));
+		  output_invalid (*split));
 	  return NULL;
 	}
       if (token_start == l)
@@ -8020,7 +8021,6 @@ parse_insn (const char *line, char *mnemonic, enum parse_mode mode)
       op_lookup (mnemonic);
 
       if (*l != END_OF_INSN
-	  && (!is_space_char (*l) || l[1] != END_OF_INSN)
 	  && current_templates.start
 	  && current_templates.start->opcode_modifier.isprefix)
 	{
@@ -8142,7 +8142,10 @@ parse_insn (const char *line, char *mnemonic, enum parse_mode mode)
 		}
 	    }
 	  /* Skip past PREFIX_SEPARATOR and reset token_start.  */
-	  token_start = ++l;
+	  l += (!intel_syntax && *l == PREFIX_SEPARATOR);
+	  if (is_space_char (*l))
+	    ++l;
+	  token_start = l;
 	}
       else
 	break;
@@ -8234,8 +8237,7 @@ parse_insn (const char *line, char *mnemonic, enum parse_mode mode)
 		}
 	      /* For compatibility reasons accept MOVSD and CMPSD without
 	         operands even in AT&T mode.  */
-	      else if (*l == END_OF_INSN
-		       || (is_space_char (*l) && l[1] == END_OF_INSN))
+	      else if (*l == END_OF_INSN)
 		{
 		  mnem_p[-1] = '\0';
 		  op_lookup (mnemonic);
@@ -8277,8 +8279,9 @@ parse_insn (const char *line, char *mnemonic, enum parse_mode mode)
       l += length;
     }
 
-  if (current_templates.start->opcode_modifier.jump == JUMP
-      || current_templates.start->opcode_modifier.jump == JUMP_BYTE)
+  if ((current_templates.start->opcode_modifier.jump == JUMP
+       || current_templates.start->opcode_modifier.jump == JUMP_BYTE)
+      && *l == ',')
     {
       /* Check for a branch hint.  We allow ",pt" and ",pn" for
 	 predict taken and predict not taken respectively.
@@ -8286,21 +8289,29 @@ parse_insn (const char *line, char *mnemonic, enum parse_mode mode)
 	 and jcxz insns (JumpByte) for current Pentium4 chips.  They
 	 may work in the future and it doesn't hurt to accept them
 	 now.  */
-      if (l[0] == ',' && l[1] == 'p')
+      token_start = l++;
+      if (is_space_char (*l))
+	++l;
+      if (TOLOWER (*l) == 'p' && ISALPHA (l[1])
+	  && (l[2] == END_OF_INSN || is_space_char (l[2])))
 	{
-	  if (l[2] == 't')
+	  if (TOLOWER (l[1]) == 't')
 	    {
 	      if (!add_prefix (DS_PREFIX_OPCODE))
 		return NULL;
-	      l += 3;
+	      l += 2;
 	    }
-	  else if (l[2] == 'n')
+	  else if (TOLOWER (l[1]) == 'n')
 	    {
 	      if (!add_prefix (CS_PREFIX_OPCODE))
 		return NULL;
-	      l += 3;
+	      l += 2;
 	    }
+	  else
+	    l = token_start;
 	}
+      else
+	l = token_start;
     }
   /* Any other comma loses.  */
   if (*l == ',')
diff --git a/gas/testsuite/gas/i386/i386.exp b/gas/testsuite/gas/i386/i386.exp
index facb4ed598c..bf9cb2637bd 100644
--- a/gas/testsuite/gas/i386/i386.exp
+++ b/gas/testsuite/gas/i386/i386.exp
@@ -106,6 +106,7 @@ if [gas_32_check] then {
     run_list_test "equ-2" "-al"
     run_list_test "equ-bad"
     run_dump_test "curly"
+    run_dump_test "separator"
     run_dump_test "divide"
     run_dump_test "quoted"
     run_dump_test "quoted2"
diff --git a/gas/testsuite/gas/i386/separator.d b/gas/testsuite/gas/i386/separator.d
new file mode 100644
index 00000000000..712215aa785
--- /dev/null
+++ b/gas/testsuite/gas/i386/separator.d
@@ -0,0 +1,27 @@
+#objdump: -dw
+#name: whitespace around special separators
+
+.*: +file format .*
+
+Disassembly of section \.text:
+
+0+ <separators>:
+[ 	]*[a-f0-9]+:	3e 72 fd +	j[cb],pt .*
+[ 	]*[a-f0-9]+:	3e 72 fd +	j[cb],pt .*
+[ 	]*[a-f0-9]+:	3e 72 fd +	j[cb],pt .*
+[ 	]*[a-f0-9]+:	3e 72 fd +	j[cb],pt .*
+[ 	]*[a-f0-9]+:	3e 72 fd +	j[cb],pt .*
+[ 	]*[a-f0-9]+:	3e 0f 82 f9 ff ff ff 	j[cb],pt .*
+[ 	]*[a-f0-9]+:	3e 0f 82 f9 ff ff ff 	j[cb],pt .*
+[ 	]*[a-f0-9]+:	3e 0f 82 f9 ff ff ff 	j[cb],pt .*
+[ 	]*[a-f0-9]+:	3e 0f 82 f9 ff ff ff 	j[cb],pt .*
+[ 	]*[a-f0-9]+:	65 f7 d8 +	gs neg %eax
+[ 	]*[a-f0-9]+:	65 f7 d8 +	gs neg %eax
+[ 	]*[a-f0-9]+:	65 f7 d8 +	gs neg %eax
+[ 	]*[a-f0-9]+:	65 f7 d8 +	gs neg %eax
+[ 	]*[a-f0-9]+:	65 f7 d8 +	gs neg %eax
+[ 	]*[a-f0-9]+:	65 f7 d8 +	gs neg %eax
+[ 	]*[a-f0-9]+:	65 f7 d8 +	gs neg %eax
+[ 	]*[a-f0-9]+:	65 f7 d8 +	gs neg %eax
+[ 	]*[a-f0-9]+:	65 f7 d8 +	gs neg %eax
+#pass
diff --git a/gas/testsuite/gas/i386/separator.s b/gas/testsuite/gas/i386/separator.s
new file mode 100644
index 00000000000..edac0738f1e
--- /dev/null
+++ b/gas/testsuite/gas/i386/separator.s
@@ -0,0 +1,41 @@
+	.text
+separators:
+	jc,pt		.
+	jc ,pt		.
+	jc, pt		.
+	jc , pt		.
+	jc/**/,/**/pt	.
+
+	{disp32} jc,pt	.
+	{disp32} jc ,pt .
+	{disp32} jc, pt .
+	{disp32} jc , pt .
+
+	# Which block to use depends on whether / starts a comment.
+	.ifeq 1/2
+
+	gs/neg	%eax
+	gs /neg	%eax
+	gs/ neg	%eax
+	gs / neg %eax
+	gs/**///**/neg %eax
+
+	{disp32} gs/neg %eax
+	{disp32} gs /neg %eax
+	{disp32} gs/ neg %eax
+	{disp32} gs / neg %eax
+
+	.else
+
+	gs\neg	%eax
+	gs \neg	%eax
+	gs\ neg	%eax
+	gs \ neg %eax
+	gs/**/\/**/neg %eax
+
+	{disp32} gs\neg %eax
+	{disp32} gs \neg %eax
+	{disp32} gs\ neg %eax
+	{disp32} gs \ neg %eax
+
+	.endif

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2024-07-26  6:01 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-07-26  6:01 [binutils-gdb] x86: accept whitespace around prefix separator Jan Beulich

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).