public inbox for binutils@sourceware.org
 help / color / mirror / Atom feed
* [PATCH 0/2] x86: lexical meaning of characters
@ 2023-05-19 13:29 Jan Beulich
  2023-05-19 13:30 ` [PATCH 1/2] x86: de-duplicate operand_special_chars[] wrt extra_symbol_chars[] Jan Beulich
  2023-05-19 13:31 ` [PATCH 2/2] x86: figure braces aren't really part of mnemonics Jan Beulich
  0 siblings, 2 replies; 3+ messages in thread
From: Jan Beulich @ 2023-05-19 13:29 UTC (permalink / raw)
  To: Binutils; +Cc: H.J. Lu

While adding the missing '=' I've noticed two more aspects I
wanted to deal with, but separately.

1: de-duplicate operand_special_chars[] wrt extra_symbol_chars[]
2: figure braces aren't really part of mnemonics

Jan

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCH 1/2] x86: de-duplicate operand_special_chars[] wrt extra_symbol_chars[]
  2023-05-19 13:29 [PATCH 0/2] x86: lexical meaning of characters Jan Beulich
@ 2023-05-19 13:30 ` Jan Beulich
  2023-05-19 13:31 ` [PATCH 2/2] x86: figure braces aren't really part of mnemonics Jan Beulich
  1 sibling, 0 replies; 3+ messages in thread
From: Jan Beulich @ 2023-05-19 13:30 UTC (permalink / raw)
  To: Binutils; +Cc: H.J. Lu

Having to add characters to both arrays can easily lead to oversights.
Consuming extra_symbol_chars[] when populating operand_chars[] also
allows to drop two special cases in md_begin().

Constify operand_special_chars[] at this occasion.

--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -546,8 +546,9 @@ static char operand_chars[256];
 #define is_register_char(x) (register_chars[(unsigned char) x])
 #define is_space_char(x) ((x) == ' ')
 
-/* All non-digit non-letter characters that may occur in an operand.  */
-static char operand_special_chars[] = "%$-+(,)*._~/<>|&^!=:[@]";
+/* All non-digit non-letter characters that may occur in an operand and
+   which aren't already in extra_symbol_chars[].  */
+static const char operand_special_chars[] = "$+,)._~/<>|&^!=:@]";
 
 /* md_assemble() always leaves the strings it's passed unaltered.  To
    effect this we maintain a stack of saved characters that we've smashed
@@ -3068,7 +3069,7 @@ md_begin (void)
   /* Fill in lexical tables:  mnemonic_chars, operand_chars.  */
   {
     int c;
-    char *p;
+    const char *p;
 
     for (c = 0; c < 256; c++)
       {
@@ -3085,10 +3086,7 @@ md_begin (void)
 	    operand_chars[c] = c;
 	  }
 	else if (c == '{' || c == '}')
-	  {
-	    mnemonic_chars[c] = c;
-	    operand_chars[c] = c;
-	  }
+	  mnemonic_chars[c] = c;
 #ifdef SVR4_COMMENT_CHARS
 	else if (c == '\\' && strchr (i386_comment_chars, '/'))
 	  operand_chars[c] = c;
@@ -3098,13 +3096,12 @@ md_begin (void)
 	  operand_chars[c] = c;
       }
 
-#ifdef LEX_QM
-    operand_chars['?'] = '?';
-#endif
     mnemonic_chars['_'] = '_';
     mnemonic_chars['-'] = '-';
     mnemonic_chars['.'] = '.';
 
+    for (p = extra_symbol_chars; *p != '\0'; p++)
+      operand_chars[(unsigned char) *p] = *p;
     for (p = operand_special_chars; *p != '\0'; p++)
       operand_chars[(unsigned char) *p] = *p;
   }


^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCH 2/2] x86: figure braces aren't really part of mnemonics
  2023-05-19 13:29 [PATCH 0/2] x86: lexical meaning of characters Jan Beulich
  2023-05-19 13:30 ` [PATCH 1/2] x86: de-duplicate operand_special_chars[] wrt extra_symbol_chars[] Jan Beulich
@ 2023-05-19 13:31 ` Jan Beulich
  1 sibling, 0 replies; 3+ messages in thread
From: Jan Beulich @ 2023-05-19 13:31 UTC (permalink / raw)
  To: Binutils; +Cc: H.J. Lu

Instead they're separators for pseudo-prefixes. Don't insert them in
mnemonic_chars[], handling them explicitly in parse_insn() instead. Note
that this eliminates the need for another separator after a pseudo-
prefix. While maybe not overly interesting for a following real
mnemonic, I view this as quite desirable between multiple successive
pseudo-prefixes (bringing things in line with the other use of figure
braces in AVX512's zeroing-masking).

Drop the unused is_mnemonic_char() at this occasion.

--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -541,7 +541,6 @@ static char register_chars[256];
 static char operand_chars[256];
 
 /* Lexical macros.  */
-#define is_mnemonic_char(x) (mnemonic_chars[(unsigned char) x])
 #define is_operand_char(x) (operand_chars[(unsigned char) x])
 #define is_register_char(x) (register_chars[(unsigned char) x])
 #define is_space_char(x) ((x) == ' ')
@@ -3085,8 +3084,6 @@ md_begin (void)
 	    register_chars[c] = mnemonic_chars[c];
 	    operand_chars[c] = c;
 	  }
-	else if (c == '{' || c == '}')
-	  mnemonic_chars[c] = c;
 #ifdef SVR4_COMMENT_CHARS
 	else if (c == '\\' && strchr (i386_comment_chars, '/'))
 	  operand_chars[c] = c;
@@ -5476,6 +5473,12 @@ parse_insn (const char *line, char *mnem
   while (1)
     {
       mnem_p = mnemonic;
+      /* Pseudo-prefixes start with an opening figure brace.  */
+      if ((*mnem_p = *l) == '{')
+	{
+	  ++mnem_p;
+	  ++l;
+	}
       while ((*mnem_p = mnemonic_chars[(unsigned char) *l]) != 0)
 	{
 	  if (*mnem_p == '.')
@@ -5483,16 +5486,29 @@ parse_insn (const char *line, char *mnem
 	  mnem_p++;
 	  if (mnem_p >= mnemonic + MAX_MNEM_SIZE)
 	    {
+	    too_long:
 	      as_bad (_("no such instruction: `%s'"), token_start);
 	      return NULL;
 	    }
 	  l++;
 	}
-      if (!is_space_char (*l)
-	  && *l != END_OF_INSN
-	  && (intel_syntax
-	      || (*l != PREFIX_SEPARATOR
-		  && *l != ',')))
+      /* Pseudo-prefixes end with a closing figure brace.  */
+      if (*mnemonic == '{' && *l == '}')
+	{
+	  *mnem_p++ = *l++;
+	  if (mnem_p >= mnemonic + MAX_MNEM_SIZE)
+	    goto too_long;
+	  *mnem_p = '\0';
+
+	  /* Point l at the closing brace if there's no other separator.  */
+	  if (*l != END_OF_INSN && !is_space_char (*l)
+	      && *l != PREFIX_SEPARATOR)
+	    --l;
+	}
+      else if (!is_space_char (*l)
+	       && *l != END_OF_INSN
+	       && (intel_syntax
+		   || (*l != PREFIX_SEPARATOR && *l != ',')))
 	{
 	  if (prefix_only)
 	    break;


^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2023-05-19 13:31 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-05-19 13:29 [PATCH 0/2] x86: lexical meaning of characters Jan Beulich
2023-05-19 13:30 ` [PATCH 1/2] x86: de-duplicate operand_special_chars[] wrt extra_symbol_chars[] Jan Beulich
2023-05-19 13:31 ` [PATCH 2/2] x86: figure braces aren't really part of mnemonics Jan Beulich

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).