public inbox for binutils@sourceware.org
 help / color / mirror / Atom feed
* [PATCH 0/8] x86: mnemonic and register string literals
@ 2023-01-13 11:04 Jan Beulich
  2023-01-13 11:05 ` [PATCH 1/8] x86: abstract out obtaining of a template's mnemonic Jan Beulich
                   ` (7 more replies)
  0 siblings, 8 replies; 10+ messages in thread
From: Jan Beulich @ 2023-01-13 11:04 UTC (permalink / raw)
  To: Binutils; +Cc: H.J. Lu

There are some inefficiencies there, which the series tries to improve
on. Likely there are further gains to be had, some pointed out in post-
commit-message remarks.

1: abstract out obtaining of a template's mnemonic
2: move insn mnemonics to a separate table
3: re-use insn mnemonic strings as much as possible
4: absorb allocation in i386-gen
5: avoid strcmp() in a few places
6: embed register names in reg_entry
7: embed register and alike names in disassembler
8: split i386-gen's opcode hash entry struct

Jan

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH 1/8] x86: abstract out obtaining of a template's mnemonic
  2023-01-13 11:04 [PATCH 0/8] x86: mnemonic and register string literals Jan Beulich
@ 2023-01-13 11:05 ` Jan Beulich
  2023-01-13 11:06 ` [PATCH 1/8] x86: move insn mnemonics to a separate table Jan Beulich
                   ` (6 subsequent siblings)
  7 siblings, 0 replies; 10+ messages in thread
From: Jan Beulich @ 2023-01-13 11:05 UTC (permalink / raw)
  To: Binutils; +Cc: H.J. Lu

In preparation for changing the representation of the "name" field
introduce a wrapper function. This keeps the mechanical change separate
from the functional one.

--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -2426,6 +2426,11 @@ offset_in_range (offsetT val, int size)
   return val & mask;
 }
 
+static INLINE const char *insn_name (const insn_template *t)
+{
+  return t->name;
+}
+
 enum PREFIX_GROUP
 {
   PREFIX_EXIST = 0,
@@ -2989,8 +2994,8 @@ md_begin (void)
     (void) sizeof (sets == &current_templates->start);
     (void) sizeof (end == &current_templates->end);
     for (; sets < end; ++sets)
-      if (str_hash_insert (op_hash, (*sets)->name, sets, 0))
-	as_fatal (_("duplicate %s"), (*sets)->name);
+      if (str_hash_insert (op_hash, insn_name (*sets), sets, 0))
+	as_fatal (_("duplicate %s"), insn_name (*sets));
   }
 
   /* Initialize reg_hash hash table.  */
@@ -3809,7 +3814,7 @@ get_broadcast_bytes (const insn_template
 
   if (diag)
     as_warn (_("ambiguous broadcast for `%s', using %u-bit form"),
-	     t->name, bytes * 8);
+	     insn_name (t), bytes * 8);
 
   return bytes;
 }
@@ -4030,7 +4035,7 @@ check_hle (void)
     case PrefixNoTrack:
     case PrefixRep:
       as_bad (_("invalid instruction `%s' after `%s'"),
-	      i.tm.name, i.hle_prefix);
+	      insn_name (&i.tm), i.hle_prefix);
       return 0;
     case PrefixHLELock:
       if (i.prefix[LOCK_PREFIX])
@@ -4043,13 +4048,13 @@ check_hle (void)
       if (i.prefix[HLE_PREFIX] != XRELEASE_PREFIX_OPCODE)
 	{
 	  as_bad (_("instruction `%s' after `xacquire' not allowed"),
-		  i.tm.name);
+		  insn_name (&i.tm));
 	  return 0;
 	}
       if (i.mem_operands == 0 || !(i.flags[i.operands - 1] & Operand_Mem))
 	{
 	  as_bad (_("memory destination needed for instruction `%s'"
-		    " after `xrelease'"), i.tm.name);
+		    " after `xrelease'"), insn_name (&i.tm));
 	  return 0;
 	}
       return 1;
@@ -4544,7 +4549,7 @@ load_insn_p (void)
 	return 0;
 
       /* pop.   */
-      if (strcmp (i.tm.name, "pop") == 0)
+      if (strcmp (insn_name (&i.tm), "pop") == 0)
 	return 1;
     }
 
@@ -4723,7 +4728,7 @@ insert_lfence_after (void)
 	  && i.prefix[REP_PREFIX])
 	{
 	    as_warn (_("`%s` changes flags which would affect control flow behavior"),
-		     i.tm.name);
+		     insn_name (&i.tm));
 	}
       char *p = frag_more (3);
       *p++ = 0xf;
@@ -4765,7 +4770,7 @@ insert_lfence_before (void)
 	       && lfence_before_indirect_branch != lfence_branch_register)
 	{
 	  as_warn (_("indirect `%s` with memory operand should be avoided"),
-		   i.tm.name);
+		   insn_name (&i.tm));
 	  return;
 	}
       else
@@ -4776,7 +4781,7 @@ insert_lfence_before (void)
 	{
 	  as_warn_where (last_insn.file, last_insn.line,
 			 _("`%s` skips -mlfence-before-indirect-branch on `%s`"),
-			 last_insn.name, i.tm.name);
+			 last_insn.name, insn_name (&i.tm));
 	  return;
 	}
 
@@ -4797,7 +4802,7 @@ insert_lfence_before (void)
 	{
 	  as_warn_where (last_insn.file, last_insn.line,
 			 _("`%s` skips -mlfence-before-ret on `%s`"),
-			 last_insn.name, i.tm.name);
+			 last_insn.name, insn_name (&i.tm));
 	  return;
 	}
 
@@ -5014,7 +5019,7 @@ md_assemble (char *line)
 	  copy = NULL;
   no_match:
 	  pass1_err = i.error;
-	  pass1_mnem = current_templates->start->name;
+	  pass1_mnem = insn_name (current_templates->start);
 	  goto retry;
 	}
 
@@ -5057,11 +5062,11 @@ md_assemble (char *line)
 	  break;
 	case unsupported:
 	  as_bad (_("unsupported instruction `%s'"),
-		  pass1_mnem ? pass1_mnem : current_templates->start->name);
+		  pass1_mnem ? pass1_mnem : insn_name (current_templates->start));
 	  return;
 	case unsupported_on_arch:
 	  as_bad (_("`%s' is not supported on `%s%s'"),
-		  pass1_mnem ? pass1_mnem : current_templates->start->name,
+		  pass1_mnem ? pass1_mnem : insn_name (current_templates->start),
 		  cpu_arch_name ? cpu_arch_name : default_arch,
 		  cpu_sub_arch_name ? cpu_sub_arch_name : "");
 	  return;
@@ -5070,23 +5075,22 @@ md_assemble (char *line)
 	    {
 	      if (flag_code == CODE_64BIT)
 		as_bad (_("`%s%c' is not supported in 64-bit mode"),
-			pass1_mnem ? pass1_mnem : current_templates->start->name,
+			pass1_mnem ? pass1_mnem : insn_name (current_templates->start),
 			mnem_suffix);
 	      else
 		as_bad (_("`%s%c' is only supported in 64-bit mode"),
-			pass1_mnem ? pass1_mnem : current_templates->start->name,
+			pass1_mnem ? pass1_mnem : insn_name (current_templates->start),
 			mnem_suffix);
 	    }
 	  else
 	    {
 	      if (flag_code == CODE_64BIT)
 		as_bad (_("`%s' is not supported in 64-bit mode"),
-			pass1_mnem ? pass1_mnem : current_templates->start->name);
+			pass1_mnem ? pass1_mnem : insn_name (current_templates->start));
 	      else
 		as_bad (_("`%s' is only supported in 64-bit mode"),
-			pass1_mnem ? pass1_mnem : current_templates->start->name);
+			pass1_mnem ? pass1_mnem : insn_name (current_templates->start));
 	    }
-		  
 	  return;
 	case invalid_sib_address:
 	  err_msg = _("invalid SIB address");
@@ -5129,7 +5133,7 @@ md_assemble (char *line)
 	  break;
 	}
       as_bad (_("%s for `%s'"), err_msg,
-	      pass1_mnem ? pass1_mnem : current_templates->start->name);
+	      pass1_mnem ? pass1_mnem : insn_name (current_templates->start));
       return;
     }
 
@@ -5156,7 +5160,7 @@ md_assemble (char *line)
       if (j >= t->operands && simd)
 	(sse_check == check_warning
 	 ? as_warn
-	 : as_bad) (_("SSE instruction `%s' is used"), i.tm.name);
+	 : as_bad) (_("SSE instruction `%s' is used"), insn_name (&i.tm));
     }
 
   if (i.tm.opcode_modifier.fwait)
@@ -5167,7 +5171,7 @@ md_assemble (char *line)
   if (i.rep_prefix && i.tm.opcode_modifier.prefixok != PrefixRep)
     {
       as_bad (_("invalid instruction `%s' after `%s'"),
-		i.tm.name, i.rep_prefix);
+		insn_name (&i.tm), i.rep_prefix);
       return;
     }
 
@@ -5190,7 +5194,7 @@ md_assemble (char *line)
       /* Check for data size prefix on VEX/XOP/EVEX encoded and SIMD insns.  */
       if (i.prefix[DATA_PREFIX])
 	{
-	  as_bad (_("data size prefix invalid with `%s'"), i.tm.name);
+	  as_bad (_("data size prefix invalid with `%s'"), insn_name (&i.tm));
 	  return;
 	}
 
@@ -5202,7 +5206,7 @@ md_assemble (char *line)
 	  case BFD_RELOC_386_TLS_LE_32:
 	  case BFD_RELOC_X86_64_GOTTPOFF:
 	  case BFD_RELOC_X86_64_TLSLD:
-	    as_bad (_("TLS relocation cannot be used with `%s'"), i.tm.name);
+	    as_bad (_("TLS relocation cannot be used with `%s'"), insn_name (&i.tm));
 	    return;
 	  default:
 	    break;
@@ -5259,7 +5263,7 @@ md_assemble (char *line)
 	  || i.tm.opcode_modifier.opcodespace != SPACE_BASE))
     {
       as_bad (_("input/output port address isn't allowed with `%s'"),
-	      i.tm.name);
+	      insn_name (&i.tm));
       return;
     }
 
@@ -5275,7 +5279,7 @@ md_assemble (char *line)
   /* Check if IP-relative addressing requirements can be satisfied.  */
   if (i.tm.cpu_flags.bitfield.cpuprefetchi
       && !(i.base_reg && i.base_reg->reg_num == RegIP))
-    as_warn (_("'%s' only supports RIP-relative address"), i.tm.name);
+    as_warn (_("'%s' only supports RIP-relative address"), insn_name (&i.tm));
 
   /* Update operand types and check extended states.  */
   for (j = 0; j < i.operands; j++)
@@ -5329,7 +5333,7 @@ md_assemble (char *line)
   else if (!quiet_warnings && i.tm.opcode_modifier.operandconstraint == UGH)
     {
       /* UnixWare fsub no args is alias for fsubp, fadd -> faddp, etc.  */
-      as_warn (_("translating to `%sp'"), i.tm.name);
+      as_warn (_("translating to `%sp'"), insn_name (&i.tm));
     }
 
   if (is_any_vex_encoding (&i.tm))
@@ -5337,14 +5341,14 @@ md_assemble (char *line)
       if (!cpu_arch_flags.bitfield.cpui286)
 	{
 	  as_bad (_("instruction `%s' isn't supported outside of protected mode."),
-		  i.tm.name);
+		  insn_name (&i.tm));
 	  return;
 	}
 
       /* Check for explicit REX prefix.  */
       if (i.prefix[REX_PREFIX] || i.rex_encoding)
 	{
-	  as_bad (_("REX prefix invalid with `%s'"), i.tm.name);
+	  as_bad (_("REX prefix invalid with `%s'"), insn_name (&i.tm));
 	  return;
 	}
 
@@ -5454,7 +5458,7 @@ md_assemble (char *line)
   if (i.tm.opcode_modifier.isprefix)
     {
       last_insn.kind = last_insn_prefix;
-      last_insn.name = i.tm.name;
+      last_insn.name = insn_name (&i.tm);
       last_insn.file = as_where (&last_insn.line);
     }
   else
@@ -5535,7 +5539,7 @@ parse_insn (const char *line, char *mnem
 	      as_bad ((flag_code != CODE_64BIT
 		       ? _("`%s' is only supported in 64-bit mode")
 		       : _("`%s' is not supported in 64-bit mode")),
-		      current_templates->start->name);
+		      insn_name (current_templates->start));
 	      return NULL;
 	    }
 	  /* If we are in 16-bit mode, do not allow addr16 or data16.
@@ -5547,7 +5551,7 @@ parse_insn (const char *line, char *mnem
 		  ^ (flag_code == CODE_16BIT)))
 	    {
 	      as_bad (_("redundant %s prefix"),
-		      current_templates->start->name);
+		      insn_name (current_templates->start));
 	      return NULL;
 	    }
 
@@ -5609,15 +5613,15 @@ parse_insn (const char *line, char *mnem
 		  return NULL;
 		case PREFIX_DS:
 		  if (current_templates->start->cpu_flags.bitfield.cpuibt)
-		    i.notrack_prefix = current_templates->start->name;
+		    i.notrack_prefix = insn_name (current_templates->start);
 		  break;
 		case PREFIX_REP:
 		  if (current_templates->start->cpu_flags.bitfield.cpuhle)
-		    i.hle_prefix = current_templates->start->name;
+		    i.hle_prefix = insn_name (current_templates->start);
 		  else if (current_templates->start->cpu_flags.bitfield.cpumpx)
-		    i.bnd_prefix = current_templates->start->name;
+		    i.bnd_prefix = insn_name (current_templates->start);
 		  else
-		    i.rep_prefix = current_templates->start->name;
+		    i.rep_prefix = insn_name (current_templates->start);
 		  break;
 		default:
 		  break;
@@ -6792,8 +6796,8 @@ match_template (char mnem_suffix)
 	  && !cpu_arch_flags.bitfield.cpui386
 	  && (intel_syntax
 	      ? (t->opcode_modifier.mnemonicsize != IGNORESIZE
-		 && !intel_float_operand (t->name))
-	      : intel_float_operand (t->name) != 2)
+		 && !intel_float_operand (insn_name (t)))
+	      : intel_float_operand (insn_name (t)) != 2)
 	  && (t->operands == i.imm_operands
 	      || (operand_types[i.imm_operands].bitfield.class != RegMMX
 	       && operand_types[i.imm_operands].bitfield.class != RegSIMD
@@ -7103,14 +7107,14 @@ match_template (char mnem_suffix)
     {
       if (!intel_syntax
 	  && (i.jumpabsolute != (t->opcode_modifier.jump == JUMP_ABSOLUTE)))
-	as_warn (_("indirect %s without `*'"), t->name);
+	as_warn (_("indirect %s without `*'"), insn_name (t));
 
       if (t->opcode_modifier.isprefix
 	  && t->opcode_modifier.mnemonicsize == IGNORESIZE)
 	{
 	  /* Warn them that a data or address size prefix doesn't
 	     affect assembly of the next line of code.  */
-	  as_warn (_("stand-alone `%s' prefix"), t->name);
+	  as_warn (_("stand-alone `%s' prefix"), insn_name (t));
 	}
     }
 
@@ -7170,7 +7174,7 @@ check_string (void)
   if (i.seg[op] != NULL && i.seg[op] != reg_es)
     {
       as_bad (_("`%s' operand %u must use `%ses' segment"),
-	      i.tm.name,
+	      insn_name (&i.tm),
 	      intel_syntax ? i.tm.operands - es_op : es_op + 1,
 	      register_prefix);
       return 0;
@@ -7309,7 +7313,7 @@ process_suffix (void)
 	  /* Warn about changed behavior for segment register push/pop.  */
 	  else if ((i.tm.base_opcode | 1) == 0x07)
 	    as_warn (_("generating 32-bit `%s', unlike earlier gas versions"),
-		     i.tm.name);
+		     insn_name (&i.tm));
 	}
     }
   else if (!i.suffix
@@ -7425,13 +7429,13 @@ process_suffix (void)
 	      && (i.tm.opcode_modifier.mnemonicsize != DEFAULTSIZE
 		  || operand_check == check_error))
 	    {
-	      as_bad (_("ambiguous operand size for `%s'"), i.tm.name);
+	      as_bad (_("ambiguous operand size for `%s'"), insn_name (&i.tm));
 	      return 0;
 	    }
 	  if (operand_check == check_error)
 	    {
 	      as_bad (_("no instruction mnemonic suffix given and "
-			"no register operands; can't size `%s'"), i.tm.name);
+			"no register operands; can't size `%s'"), insn_name (&i.tm));
 	      return 0;
 	    }
 	  if (operand_check == check_warning)
@@ -7440,7 +7444,7 @@ process_suffix (void)
 		       ? _("ambiguous operand size")
 		       : _("no instruction mnemonic suffix given and "
 			   "no register operands"),
-		       i.tm.name);
+		       insn_name (&i.tm));
 
 	  if (i.tm.opcode_modifier.floatmf)
 	    i.suffix = SHORT_MNEM_SUFFIX;
@@ -7578,7 +7582,7 @@ process_suffix (void)
 	      && i.op[0].regs->reg_type.bitfield.word)
 	    {
 	      as_bad (_("16-bit addressing unavailable for `%s'"),
-		      i.tm.name);
+		      insn_name (&i.tm));
 	      return 0;
 	    }
 
@@ -7646,7 +7650,7 @@ process_suffix (void)
 		}
 
 	      as_bad (_("invalid register operand size for `%s'"),
-		      i.tm.name);
+		      insn_name (&i.tm));
 	      return 0;
 	    }
 	}
@@ -7687,7 +7691,7 @@ check_byte_reg (void)
       /* Any other register is bad.  */
       as_bad (_("`%s%s' not allowed with `%s%c'"),
 	      register_prefix, i.op[op].regs->reg_name,
-	      i.tm.name, i.suffix);
+	      insn_name (&i.tm), i.suffix);
       return 0;
     }
   return 1;
@@ -7713,7 +7717,7 @@ check_long_reg (void)
 	as_bad (_("`%s%s' not allowed with `%s%c'"),
 		register_prefix,
 		i.op[op].regs->reg_name,
-		i.tm.name,
+		insn_name (&i.tm),
 		i.suffix);
 	return 0;
       }
@@ -7761,7 +7765,7 @@ check_qword_reg (void)
 	as_bad (_("`%s%s' not allowed with `%s%c'"),
 		register_prefix,
 		i.op[op].regs->reg_name,
-		i.tm.name,
+		insn_name (&i.tm),
 		i.suffix);
 	return 0;
       }
@@ -7800,7 +7804,7 @@ check_word_reg (void)
 	as_bad (_("`%s%s' not allowed with `%s%c'"),
 		register_prefix,
 		i.op[op].regs->reg_name,
-		i.tm.name,
+		insn_name (&i.tm),
 		i.suffix);
 	return 0;
       }
@@ -8057,7 +8061,7 @@ process_operands (void)
 		 register_prefix, i.op[1].regs->reg_name,
 		 register_prefix, i.op[1].regs->reg_name, first_reg_in_group,
 		 register_prefix, i.op[1].regs->reg_name, last_reg_in_group,
-		 i.tm.name);
+		 insn_name (&i.tm));
     }
   else if (i.tm.opcode_modifier.operandconstraint == REG_KLUDGE)
     {
@@ -8097,7 +8101,7 @@ process_operands (void)
 	    && i.op[0].regs->reg_num < 4)
 	{
 	  as_bad (_("you can't `%s %s%s'"),
-		  i.tm.name, register_prefix, i.op[0].regs->reg_name);
+		  insn_name (&i.tm), register_prefix, i.op[0].regs->reg_name);
 	  return 0;
 	}
       if (i.op[0].regs->reg_num > 3
@@ -8138,13 +8142,13 @@ process_operands (void)
 	  if (i.operands != 2)
 	    {
 	      /* Extraneous `l' suffix on fp insn.  */
-	      as_warn (_("translating to `%s %s%s'"), i.tm.name,
+	      as_warn (_("translating to `%s %s%s'"), insn_name (&i.tm),
 		       register_prefix, i.op[0].regs->reg_name);
 	    }
 	  else if (i.op[0].regs->reg_type.bitfield.instance != Accum)
 	    {
 	      /* Reversed arguments on faddp or fmulp.  */
-	      as_warn (_("translating to `%s %s%s,%s%s'"), i.tm.name,
+	      as_warn (_("translating to `%s %s%s,%s%s'"), insn_name (&i.tm),
 		       register_prefix, i.op[!intel_syntax].regs->reg_name,
 		       register_prefix, i.op[intel_syntax].regs->reg_name);
 	    }
@@ -8157,7 +8161,7 @@ process_operands (void)
       && !is_any_vex_encoding(&i.tm))
     {
       if (!quiet_warnings)
-	as_warn (_("segment override on `%s' is ineffectual"), i.tm.name);
+	as_warn (_("segment override on `%s' is ineffectual"), insn_name (&i.tm));
       if (optimize)
 	{
 	  i.seg[0] = NULL;
@@ -8889,7 +8893,7 @@ output_branch (void)
     }
 
   if (i.prefixes != 0)
-    as_warn (_("skipping prefixes on `%s'"), i.tm.name);
+    as_warn (_("skipping prefixes on `%s'"), insn_name (&i.tm));
 
   /* It's always a symbol;  End frag & setup for relax.
      Make sure there is enough room in this frag for the largest
@@ -9037,7 +9041,7 @@ output_jump (void)
     }
 
   if (i.prefixes != 0)
-    as_warn (_("skipping prefixes on `%s'"), i.tm.name);
+    as_warn (_("skipping prefixes on `%s'"), insn_name (&i.tm));
 
   if (now_seg == absolute_section)
     {
@@ -9119,7 +9123,7 @@ output_interseg_jump (void)
     size = 2;
 
   if (i.prefixes != 0)
-    as_warn (_("skipping prefixes on `%s'"), i.tm.name);
+    as_warn (_("skipping prefixes on `%s'"), insn_name (&i.tm));
 
   if (now_seg == absolute_section)
     {
@@ -9438,7 +9442,7 @@ add_fused_jcc_padding_frag_p (enum mf_cm
       if (flag_debug)
 	as_warn_where (last_insn.file, last_insn.line,
 		       _("`%s` skips -malign-branch-boundary on `%s`"),
-		       last_insn.name, i.tm.name);
+		       last_insn.name, insn_name (&i.tm));
     }
 
   return 0;
@@ -9470,7 +9474,7 @@ add_branch_prefix_frag_p (void)
   if (flag_debug)
     as_warn_where (last_insn.file, last_insn.line,
 		   _("`%s` skips -malign-branch-boundary on `%s`"),
-		   last_insn.name, i.tm.name);
+		   last_insn.name, insn_name (&i.tm));
 
   return 0;
 }
@@ -9559,7 +9563,7 @@ add_branch_padding_frag_p (enum align_br
       if (flag_debug)
 	as_warn_where (last_insn.file, last_insn.line,
 		       _("`%s` skips -malign-branch-boundary on `%s`"),
-		       last_insn.name, i.tm.name);
+		       last_insn.name, insn_name (&i.tm));
       return 0;
     }
 
@@ -9759,10 +9763,10 @@ output_insn (void)
 	  /* Encode lfence, mfence, and sfence as
 	     f0 83 04 24 00   lock addl $0x0, (%{re}sp).  */
 	  if (flag_code == CODE_16BIT)
-	    as_bad (_("Cannot convert `%s' in 16-bit mode"), i.tm.name);
+	    as_bad (_("Cannot convert `%s' in 16-bit mode"), insn_name (&i.tm));
 	  else if (omit_lock_prefix)
 	    as_bad (_("Cannot convert `%s' with `-momit-lock-prefix=yes' in effect"),
-		    i.tm.name);
+		    insn_name (&i.tm));
 	  else if (now_seg != absolute_section)
 	    {
 	      offsetT val = 0x240483f0ULL;
@@ -11673,7 +11677,7 @@ i386_att_operand (char *operand_string)
 	    if (i.rounding.type == RC_NamesTable[j].type)
 	      break;
 	  as_bad (_("`%s': misplaced `{%s}'"),
-		  current_templates->start->name, RC_NamesTable[j].name);
+		  insn_name (current_templates->start), RC_NamesTable[j].name);
 	  return 0;
 	}
     }
@@ -11695,7 +11699,7 @@ i386_att_operand (char *operand_string)
       if (i.rounding.type != rc_none)
 	{
 	  as_bad (_("`%s': RC/SAE operand must follow immediate operands"),
-		  current_templates->start->name);
+		  insn_name (current_templates->start));
 	  return 0;
 	}
     }
@@ -11708,7 +11712,7 @@ i386_att_operand (char *operand_string)
 	      && i.op[0].regs->reg_type.bitfield.class != Reg))
 	{
 	  as_bad (_("`%s': misplaced `%s'"),
-		  current_templates->start->name, operand_string);
+		  insn_name (current_templates->start), operand_string);
 	  return 0;
 	}
     }
--- a/gas/config/tc-i386-intel.c
+++ b/gas/config/tc-i386-intel.c
@@ -609,7 +609,7 @@ i386_intel_operand (char *operand_string
       if (i.imm_operands)
 	{
 	  as_bad (_("`%s': RC/SAE operand must precede immediate operands"),
-		  current_templates->start->name);
+		  insn_name (current_templates->start));
 	  return 0;
 	}
 
@@ -705,9 +705,9 @@ i386_intel_operand (char *operand_string
 
 	case O_dword_ptr:
 	  i.types[this_operand].bitfield.dword = 1;
-	  if ((current_templates->start->name[0] == 'l'
-	       && current_templates->start->name[2] == 's'
-	       && current_templates->start->name[3] == 0)
+	  if ((insn_name (current_templates->start)[0] == 'l'
+	       && insn_name (current_templates->start)[2] == 's'
+	       && insn_name (current_templates->start)[3] == 0)
 	      || (current_templates->start->opcode_modifier.opcodespace == SPACE_BASE
 		  && current_templates->start->base_opcode == 0x62 /* bound */))
 	    suffix = WORD_MNEM_SUFFIX;
@@ -727,9 +727,9 @@ i386_intel_operand (char *operand_string
 
 	case O_fword_ptr:
 	  i.types[this_operand].bitfield.fword = 1;
-	  if (current_templates->start->name[0] == 'l'
-	      && current_templates->start->name[2] == 's'
-	      && current_templates->start->name[3] == 0)
+	  if (insn_name (current_templates->start)[0] == 'l'
+	      && insn_name (current_templates->start)[2] == 's'
+	      && insn_name (current_templates->start)[3] == 0)
 	    suffix = LONG_MNEM_SUFFIX;
 	  else if (!got_a_float)
 	    {
@@ -990,7 +990,7 @@ i386_intel_operand (char *operand_string
 	    if (i.rounding.type == RC_NamesTable[j].type)
 	      break;
 	  as_bad (_("`%s': misplaced `{%s}'"),
-		  current_templates->start->name, RC_NamesTable[j].name);
+		  insn_name (current_templates->start), RC_NamesTable[j].name);
 	  return 0;
 	}
     }


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH 1/8] x86: move insn mnemonics to a separate table
  2023-01-13 11:04 [PATCH 0/8] x86: mnemonic and register string literals Jan Beulich
  2023-01-13 11:05 ` [PATCH 1/8] x86: abstract out obtaining of a template's mnemonic Jan Beulich
@ 2023-01-13 11:06 ` Jan Beulich
  2023-01-13 11:12   ` Jan Beulich
  2023-01-13 11:07 ` [PATCH 3/8] x86: re-use insn mnemonic strings as much as possible Jan Beulich
                   ` (5 subsequent siblings)
  7 siblings, 1 reply; 10+ messages in thread
From: Jan Beulich @ 2023-01-13 11:06 UTC (permalink / raw)
  To: Binutils; +Cc: H.J. Lu

Using full pointers to reference the insn mnemonic strings is not very
efficient. With overall string size presently just slightly over 20k,
even a 16-bit value would suffice. Use "unsigned int" for now, as
there's no good use we could presently make of the otherwise saved 16
bits.

For 64-bit builds this reduces table size by 6.25% (prior to the recent
ISA extension additions it would have been 12.5%), with a similar effect
on cache occupation of table entries accessed. For PIE builds of gas
this also reduces the number of base relocations quite a bit (obviously
independent of bitness).
---
An alternative to introducing i386-mnem.h would of course be to put the
#define-s in i386-init.h. That would look like an abuse of the file to
me, but I'd be okay switching to such an approach.

As to further shrinking mnem_off (to 16 bits): I'm intending to drop
i386_opcode_modifier as a separate struct, embedding the fields directly
in insn_template. i386_opcode_modifier presently using only 6 bits from
its 3rd word will allow to shrink insn_template by another word then.
(This would also benefit readabilty of tc-i386*.c, as all the uses of
"opcode_modifier." would go away. This would additionally reduce the
apparent discrepancy between e.g. opcode_modifier.opcode_space and
opcode_modifier.opcode_prefix vs base_opcode and extension_opcode.)

--- a/gas/Makefile.am
+++ b/gas/Makefile.am
@@ -453,7 +453,7 @@ i386_tbl_deps = $(srcdir)/../opcodes/i38
 	$(srcdir)/../opcodes/i386-reg.tbl \
 	$(srcdir)/../opcodes/i386-gen.c $(srcdir)/../opcodes/i386-opc.h
 
-$(srcdir)/../opcodes/i386%init.h $(srcdir)/../opcodes/i386%tbl.h: @MAINT@ $(i386_tbl_deps)
+$(srcdir)/../opcodes/i386%init.h $(srcdir)/../opcodes/i386%tbl.h $(srcdir)/../opcodes/i386%mnem.h: @MAINT@ $(i386_tbl_deps)
 	@echo '"$@" is outdated wrt "$?"' >&2
 	@echo 'Please rebuild from the top level or in $(CURDIR)/../opcodes/' >&2
 	@false
--- a/gas/Makefile.in
+++ b/gas/Makefile.in
@@ -2070,7 +2070,7 @@ development.exp: $(BFDDIR)/development.s
 
 config/tc-i386.o: $(srcdir)/../opcodes/i386-init.h $(srcdir)/../opcodes/i386-tbl.h
 
-$(srcdir)/../opcodes/i386%init.h $(srcdir)/../opcodes/i386%tbl.h: @MAINT@ $(i386_tbl_deps)
+$(srcdir)/../opcodes/i386%init.h $(srcdir)/../opcodes/i386%tbl.h $(srcdir)/../opcodes/i386%mnem.h: @MAINT@ $(i386_tbl_deps)
 	@echo '"$@" is outdated wrt "$?"' >&2
 	@echo 'Please rebuild from the top level or in $(CURDIR)/../opcodes/' >&2
 	@false
--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -34,6 +34,7 @@
 #include "sframe.h"
 #include "elf/x86-64.h"
 #include "opcodes/i386-init.h"
+#include "opcodes/i386-mnem.h"
 #include <limits.h>
 
 #ifndef INFER_ADDR_PREFIX
@@ -2428,7 +2429,7 @@ offset_in_range (offsetT val, int size)
 
 static INLINE const char *insn_name (const insn_template *t)
 {
-  return t->name;
+  return &i386_mnemonics[t->mnem_off];
 }
 
 enum PREFIX_GROUP
--- a/opcodes/Makefile.am
+++ b/opcodes/Makefile.am
@@ -523,7 +523,8 @@ MOSTLYCLEANFILES = aarch64-gen$(EXEEXT_F
 	z8kgen$(EXEEXT_FOR_BUILD) opc2c$(EXEEXT_FOR_BUILD)
 
 MAINTAINERCLEANFILES = $(srcdir)/aarch64-asm-2.c $(srcdir)/aarch64-dis-2.c \
-	$(srcdir)/aarch64-opc-2.c $(srcdir)/i386-tbl.h $(srcdir)/i386-init.h \
+	$(srcdir)/aarch64-opc-2.c \
+	$(srcdir)/i386-tbl.h $(srcdir)/i386-init.h $(srcdir)/i386-mnem.h \
 	$(srcdir)/ia64-asmtab.c $(srcdir)/z8k-opc.h \
 	$(srcdir)/msp430-decode.c \
 	$(srcdir)/rl78-decode.c \
@@ -552,16 +553,17 @@ i386-gen.o: i386-gen.c i386-opc.h $(srcd
 	config.h sysdep.h
 	$(COMPILE_FOR_BUILD) -c $(srcdir)/i386-gen.c
 
-# i386-gen will generate both headers in one go.  Use a pattern rule to properly
+# i386-gen will generate all headers in one go.  Use a pattern rule to properly
 # express this, with the inner dash ('-') arbitrarily chosen to be the stem.
-$(srcdir)/i386%tbl.h $(srcdir)/i386%init.h: @MAINT@ i386-gen$(EXEEXT_FOR_BUILD) i386-opc.tbl i386-reg.tbl i386-opc.h
+$(srcdir)/i386%tbl.h $(srcdir)/i386%init.h $(srcdir)/i386%mnem.h: \
+		@MAINT@ i386-gen$(EXEEXT_FOR_BUILD) i386-opc.tbl i386-reg.tbl i386-opc.h
 	$(AM_V_GEN)$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) - \
 		< $(srcdir)/i386-opc.tbl \
 		| ./i386-gen$(EXEEXT_FOR_BUILD) --srcdir $(srcdir)
 
-# While not really dependencies, specify i386-{init,tbl}.h here as well to
-# make sure they are re-generated as necessary.
-i386-dis.lo: $(srcdir)/i386-tbl.h $(srcdir)/i386-init.h
+# While not really dependencies, specify other generated i386-*.h here as well
+# to make sure they are re-generated as necessary.
+i386-dis.lo: $(srcdir)/i386-tbl.h $(srcdir)/i386-init.h $(srcdir)/i386-mnem.h
 
 ia64-gen$(EXEEXT_FOR_BUILD): ia64-gen.o $(BUILD_LIB_DEPS)
 	$(AM_V_CCLD)$(LINK_FOR_BUILD) ia64-gen.o $(BUILD_LIBS)
--- a/opcodes/Makefile.in
+++ b/opcodes/Makefile.in
@@ -758,7 +758,8 @@ MOSTLYCLEANFILES = aarch64-gen$(EXEEXT_F
 	z8kgen$(EXEEXT_FOR_BUILD) opc2c$(EXEEXT_FOR_BUILD)
 
 MAINTAINERCLEANFILES = $(srcdir)/aarch64-asm-2.c $(srcdir)/aarch64-dis-2.c \
-	$(srcdir)/aarch64-opc-2.c $(srcdir)/i386-tbl.h $(srcdir)/i386-init.h \
+	$(srcdir)/aarch64-opc-2.c \
+	$(srcdir)/i386-tbl.h $(srcdir)/i386-init.h $(srcdir)/i386-mnem.h \
 	$(srcdir)/ia64-asmtab.c $(srcdir)/z8k-opc.h \
 	$(srcdir)/msp430-decode.c \
 	$(srcdir)/rl78-decode.c \
@@ -1526,16 +1527,17 @@ i386-gen.o: i386-gen.c i386-opc.h $(srcd
 	config.h sysdep.h
 	$(COMPILE_FOR_BUILD) -c $(srcdir)/i386-gen.c
 
-# i386-gen will generate both headers in one go.  Use a pattern rule to properly
+# i386-gen will generate all headers in one go.  Use a pattern rule to properly
 # express this, with the inner dash ('-') arbitrarily chosen to be the stem.
-$(srcdir)/i386%tbl.h $(srcdir)/i386%init.h: @MAINT@ i386-gen$(EXEEXT_FOR_BUILD) i386-opc.tbl i386-reg.tbl i386-opc.h
+$(srcdir)/i386%tbl.h $(srcdir)/i386%init.h $(srcdir)/i386%mnem.h: \
+		@MAINT@ i386-gen$(EXEEXT_FOR_BUILD) i386-opc.tbl i386-reg.tbl i386-opc.h
 	$(AM_V_GEN)$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) - \
 		< $(srcdir)/i386-opc.tbl \
 		| ./i386-gen$(EXEEXT_FOR_BUILD) --srcdir $(srcdir)
 
-# While not really dependencies, specify i386-{init,tbl}.h here as well to
-# make sure they are re-generated as necessary.
-i386-dis.lo: $(srcdir)/i386-tbl.h $(srcdir)/i386-init.h
+# While not really dependencies, specify other generated i386-*.h here as well
+# to make sure they are re-generated as necessary.
+i386-dis.lo: $(srcdir)/i386-tbl.h $(srcdir)/i386-init.h $(srcdir)/i386-mnem.h
 
 ia64-gen$(EXEEXT_FOR_BUILD): ia64-gen.o $(BUILD_LIB_DEPS)
 	$(AM_V_CCLD)$(LINK_FOR_BUILD) ia64-gen.o $(BUILD_LIBS)
--- a/opcodes/i386-gen.c
+++ b/opcodes/i386-gen.c
@@ -1146,12 +1146,26 @@ process_i386_operand_type (FILE *table,
 		       stage, indent);
 }
 
+static char *mkident (const char *mnem)
+{
+  char *ident = xstrdup (mnem), *p = ident;
+
+  do
+    {
+      if (!ISALNUM (*p))
+	*p = '_';
+    }
+  while (*++p);
+
+  return ident;
+}
+
 static void
 output_i386_opcode (FILE *table, const char *name, char *str,
 		    char *last, int lineno)
 {
   unsigned int i, length, prefix = 0, space = 0;
-  char *base_opcode, *extension_opcode, *end;
+  char *base_opcode, *extension_opcode, *end, *ident;
   char *cpu_flags, *opcode_modifier, *operand_types [MAX_OPERANDS];
   unsigned long long opcode;
 
@@ -1245,9 +1259,11 @@ output_i386_opcode (FILE *table, const c
     fail (_("%s:%d: %s: residual opcode (0x%0*llx) too large\n"),
 	  filename, lineno, name, 2 * length, opcode);
 
-  fprintf (table, "  { \"%s\", 0x%0*llx%s, %lu, %s,\n",
-	   name, 2 * (int)length, opcode, end, i,
+  ident = mkident (name);
+  fprintf (table, "  { MN_%s, 0x%0*llx%s, %lu, %s,\n",
+	   ident, 2 * (int)length, opcode, end, i,
 	   extension_opcode ? extension_opcode : "None");
+  free (ident);
 
   process_i386_opcode_modifier (table, opcode_modifier, space, prefix,
 				operand_types, lineno);
@@ -1565,7 +1581,7 @@ process_i386_opcodes (FILE *table)
 {
   FILE *fp;
   char buf[2048];
-  unsigned int i, j, nr;
+  unsigned int i, j, nr, offs;
   char *str, *p, *last, *name;
   htab_t opcode_hash_table;
   struct opcode_hash_entry **opcode_array = NULL;
@@ -1579,6 +1595,7 @@ process_i386_opcodes (FILE *table)
 					 opcode_hash_eq, NULL,
 					 xcalloc, free);
 
+  fprintf (table, "\n#include \"i386-mnem.h\"\n");
   fprintf (table, "\n/* i386 opcode table.  */\n\n");
   fprintf (table, "static const insn_template i386_optab[] =\n{\n");
 
@@ -1701,6 +1718,32 @@ process_i386_opcodes (FILE *table)
     }
 
   fprintf (table, "};\n");
+
+  /* Emit mnemonics and associated #define-s.  */
+  fp = fopen ("i386-mnem.h", "w");
+  if (fp == NULL)
+    fail (_("can't create i386-mnem.h, errno = %s\n"),
+	  xstrerror (errno));
+
+  process_copyright (fp);
+
+  fprintf (table, "\n/* i386 mnemonics table.  */\n\n");
+  fprintf (table, "const char i386_mnemonics[] =\n");
+  fprintf (fp, "\nextern const char i386_mnemonics[];\n\n");
+
+  for (offs = j = 0; j < i; j++)
+    {
+      name = opcode_array[j]->name;
+      fprintf (table, "  \"\\0\"\"%s\"\n", name);
+      str = mkident (name);
+      fprintf (fp, "#define MN_%s %#x\n", str, offs + 1);
+      free (str);
+      offs += strlen (name) + 1;
+    }
+
+  fprintf (table, ";\n");
+
+  fclose (fp);
 }
 
 static void
--- a/opcodes/i386-opc.h
+++ b/opcodes/i386-opc.h
@@ -918,7 +918,7 @@ typedef union i386_operand_type
 typedef struct insn_template
 {
   /* instruction name sans width suffix ("mov" for movl insns) */
-  const char *name;
+  unsigned int mnem_off;
 
   /* Bitfield arrangement is such that individual fields can be easily
      extracted (in native builds at least) - either by at most a masking


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH 3/8] x86: re-use insn mnemonic strings as much as possible
  2023-01-13 11:04 [PATCH 0/8] x86: mnemonic and register string literals Jan Beulich
  2023-01-13 11:05 ` [PATCH 1/8] x86: abstract out obtaining of a template's mnemonic Jan Beulich
  2023-01-13 11:06 ` [PATCH 1/8] x86: move insn mnemonics to a separate table Jan Beulich
@ 2023-01-13 11:07 ` Jan Beulich
  2023-01-13 11:07 ` [PATCH 4/8] x86: absorb allocation in i386-gen Jan Beulich
                   ` (4 subsequent siblings)
  7 siblings, 0 replies; 10+ messages in thread
From: Jan Beulich @ 2023-01-13 11:07 UTC (permalink / raw)
  To: Binutils; +Cc: H.J. Lu

Compact the mnemonic string table such that the tails of longer
mnemonics are re-used for shorter ones, going beyond what compilers
would typically do, but matching what ELF linkers may do when processing
SHF_MERGE|SHF_STRINGS sections. This reduces table size by about 12.5%.

--- a/opcodes/i386-gen.c
+++ b/opcodes/i386-gen.c
@@ -1576,12 +1576,29 @@ expand_templates (char *name, const char
   return idx;
 }
 
+static int mnemonic_cmp(const void *p1, const void *p2)
+{
+  const struct opcode_hash_entry *const *e1 = p1, *const *e2 = p2;
+  const char *s1 = (*e1)->name, *s2 = (*e2)->name;
+  unsigned int i;
+  size_t l1 = strlen (s1), l2 = strlen (s2);
+
+  for (i = 1; i <= l1 && i <= l2; ++i)
+    {
+      if (s1[l1 - i] != s2[l2 - i])
+	return (unsigned char)s1[l1 - i] - (unsigned char)s2[l2 - i];
+    }
+
+  return (int)(l1 - l2);
+}
+
 static void
 process_i386_opcodes (FILE *table)
 {
   FILE *fp;
   char buf[2048];
   unsigned int i, j, nr, offs;
+  size_t l;
   char *str, *p, *last, *name;
   htab_t opcode_hash_table;
   struct opcode_hash_entry **opcode_array = NULL;
@@ -1720,6 +1737,8 @@ process_i386_opcodes (FILE *table)
   fprintf (table, "};\n");
 
   /* Emit mnemonics and associated #define-s.  */
+  qsort (opcode_array, i, sizeof (*opcode_array), mnemonic_cmp);
+
   fp = fopen ("i386-mnem.h", "w");
   if (fp == NULL)
     fail (_("can't create i386-mnem.h, errno = %s\n"),
@@ -1731,14 +1750,28 @@ process_i386_opcodes (FILE *table)
   fprintf (table, "const char i386_mnemonics[] =\n");
   fprintf (fp, "\nextern const char i386_mnemonics[];\n\n");
 
-  for (offs = j = 0; j < i; j++)
+  for (l = strlen (opcode_array[offs = j = 0]->name); j < i; j++)
     {
+      const char *next = NULL;
+      size_t l1 = j + 1 < i ? strlen(next = opcode_array[j + 1]->name) : 0;
+
       name = opcode_array[j]->name;
-      fprintf (table, "  \"\\0\"\"%s\"\n", name);
       str = mkident (name);
-      fprintf (fp, "#define MN_%s %#x\n", str, offs + 1);
+      if (l < l1 && !strcmp(name, next + l1 - l))
+	{
+	  fprintf (fp, "#define MN_%s ", str);
+	  free (str);
+	  str = mkident (next);
+	  fprintf (fp, "(MN_%s + %u)\n", str, l1 - l);
+	}
+      else
+	{
+	  fprintf (table, "  \"\\0\"\"%s\"\n", name);
+	  fprintf (fp, "#define MN_%s %#x\n", str, offs + 1);
+	  offs += strlen (name) + 1;
+	}
       free (str);
-      offs += strlen (name) + 1;
+      l = l1;
     }
 
   fprintf (table, ";\n");


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH 4/8] x86: absorb allocation in i386-gen
  2023-01-13 11:04 [PATCH 0/8] x86: mnemonic and register string literals Jan Beulich
                   ` (2 preceding siblings ...)
  2023-01-13 11:07 ` [PATCH 3/8] x86: re-use insn mnemonic strings as much as possible Jan Beulich
@ 2023-01-13 11:07 ` Jan Beulich
  2023-01-13 11:08 ` [PATCH 5/8] x86: avoid strcmp() in a few places Jan Beulich
                   ` (3 subsequent siblings)
  7 siblings, 0 replies; 10+ messages in thread
From: Jan Beulich @ 2023-01-13 11:07 UTC (permalink / raw)
  To: Binutils; +Cc: H.J. Lu

When generating the mnemonic string table we already set up an
identifier for the following entry in a number of cases. Re-use that on
the next loop iteration rather than re-doing allocation and conversion.
---
Could also be folded directly into "x86: re-use insn mnemonic strings as
much as possible"; separate for now to keep the earlier change a little
more simple.

--- a/opcodes/i386-gen.c
+++ b/opcodes/i386-gen.c
@@ -1750,13 +1750,15 @@ process_i386_opcodes (FILE *table)
   fprintf (table, "const char i386_mnemonics[] =\n");
   fprintf (fp, "\nextern const char i386_mnemonics[];\n\n");
 
+  str = NULL;
   for (l = strlen (opcode_array[offs = j = 0]->name); j < i; j++)
     {
       const char *next = NULL;
       size_t l1 = j + 1 < i ? strlen(next = opcode_array[j + 1]->name) : 0;
 
       name = opcode_array[j]->name;
-      str = mkident (name);
+      if (str == NULL)
+	str = mkident (name);
       if (l < l1 && !strcmp(name, next + l1 - l))
 	{
 	  fprintf (fp, "#define MN_%s ", str);
@@ -1769,8 +1771,9 @@ process_i386_opcodes (FILE *table)
 	  fprintf (table, "  \"\\0\"\"%s\"\n", name);
 	  fprintf (fp, "#define MN_%s %#x\n", str, offs + 1);
 	  offs += strlen (name) + 1;
+	  free (str);
+	  str = NULL;
 	}
-      free (str);
       l = l1;
     }
 


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH 5/8] x86: avoid strcmp() in a few places
  2023-01-13 11:04 [PATCH 0/8] x86: mnemonic and register string literals Jan Beulich
                   ` (3 preceding siblings ...)
  2023-01-13 11:07 ` [PATCH 4/8] x86: absorb allocation in i386-gen Jan Beulich
@ 2023-01-13 11:08 ` Jan Beulich
  2023-01-13 11:10 ` [PATCH 6/8] x86: embed register names in reg_entry Jan Beulich
                   ` (2 subsequent siblings)
  7 siblings, 0 replies; 10+ messages in thread
From: Jan Beulich @ 2023-01-13 11:08 UTC (permalink / raw)
  To: Binutils; +Cc: H.J. Lu

Now that we have identifiers for the mnemonic strings we can avoid
strcmp() in a number of places, comparing the offsets into the mnemonic
string table instead. While doing this also
- convert a leftover strncmp() to startswith() (apparently an oversight
  when rebasing the original patch introducing the startswith() uses),
- use the new shorthand for current_templates->start also elsewhere in
  md_assemble() (valid up to the point where match_template() is
  called).
---
There are likely further opportunities for improvement, in particular
when currently comparing opcode space + opcode: E.g. in want_disp32()
we could instead compare against MN_lea, at once improving readability.
Thoughts?

--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -4550,7 +4550,7 @@ load_insn_p (void)
 	return 0;
 
       /* pop.   */
-      if (strcmp (insn_name (&i.tm), "pop") == 0)
+      if (i.tm.mnem_off == MN_pop)
 	return 1;
     }
 
@@ -4919,7 +4919,8 @@ md_assemble (char *line)
 	}
       return;
     }
-  if (may_need_pass2 (current_templates->start))
+  t = current_templates->start;
+  if (may_need_pass2 (t))
     {
       /* Make a copy of the full line in case we need to retry.  */
       copy = xstrdup (line);
@@ -4946,14 +4947,14 @@ md_assemble (char *line)
      AT&T modes.  */
   if (intel_syntax
       && i.operands > 1
-      && (strcmp (mnemonic, "bound") != 0)
-      && (strncmp (mnemonic, "invlpg", 6) != 0)
+      && (t->mnem_off != MN_bound)
+      && !startswith (mnemonic, "invlpg")
       && !startswith (mnemonic, "monitor")
       && !startswith (mnemonic, "mwait")
-      && (strcmp (mnemonic, "pvalidate") != 0)
+      && (t->mnem_off != MN_pvalidate)
       && !startswith (mnemonic, "rmp")
-      && (strcmp (mnemonic, "tpause") != 0)
-      && (strcmp (mnemonic, "umwait") != 0)
+      && (t->mnem_off != MN_tpause)
+      && (t->mnem_off != MN_umwait)
       && !(i.operands == 2
 	   && operand_type_check (i.types[0], imm)
 	   && operand_type_check (i.types[1], imm)))
@@ -4962,15 +4963,14 @@ md_assemble (char *line)
   /* The order of the immediates should be reversed
      for 2 immediates extrq and insertq instructions */
   if (i.imm_operands == 2
-      && (strcmp (mnemonic, "extrq") == 0
-	  || strcmp (mnemonic, "insertq") == 0))
+      && (t->mnem_off == MN_extrq || t->mnem_off == MN_insertq))
       swap_2_operands (0, 1);
 
   if (i.imm_operands)
     optimize_imm ();
 
-  if (i.disp_operands && !want_disp32 (current_templates->start)
-      && (!current_templates->start->opcode_modifier.jump
+  if (i.disp_operands && !want_disp32 (t)
+      && (!t->opcode_modifier.jump
 	  || i.jumpabsolute || i.types[0].bitfield.baseindex))
     {
       for (j = 0; j < i.operands; ++j)


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH 6/8] x86: embed register names in reg_entry
  2023-01-13 11:04 [PATCH 0/8] x86: mnemonic and register string literals Jan Beulich
                   ` (4 preceding siblings ...)
  2023-01-13 11:08 ` [PATCH 5/8] x86: avoid strcmp() in a few places Jan Beulich
@ 2023-01-13 11:10 ` Jan Beulich
  2023-01-13 11:11 ` [PATCH 7/8] x86: embed register and alike names in disassembler Jan Beulich
  2023-01-13 11:11 ` [PATCH 8/8] x86: split i386-gen's opcode hash entry struct Jan Beulich
  7 siblings, 0 replies; 10+ messages in thread
From: Jan Beulich @ 2023-01-13 11:10 UTC (permalink / raw)
  To: Binutils; +Cc: H.J. Lu

Register names are (including their nul terminators) on average almost 4
bytes long. Otoh no register name is longer than 7 bytes. Hence even for
32-bit builds using a pointer is only slightly more space efficient than
embedding the strings. A level of indirection can be also avoided by
embedding the names as an array of 8 characters directly in the struct,
and the number of base relocations in PIE builds of gas goes down as
well.

--- a/opcodes/i386-opc.h
+++ b/opcodes/i386-opc.h
@@ -987,7 +987,7 @@ insn_template;
 /* these are for register name --> number & type hash lookup */
 typedef struct
 {
-  const char *reg_name;
+  char reg_name[8];
   i386_operand_type reg_type;
   unsigned char reg_flags;
 #define RegRex	    0x1  /* Extended register.  */


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH 7/8] x86: embed register and alike names in disassembler
  2023-01-13 11:04 [PATCH 0/8] x86: mnemonic and register string literals Jan Beulich
                   ` (5 preceding siblings ...)
  2023-01-13 11:10 ` [PATCH 6/8] x86: embed register names in reg_entry Jan Beulich
@ 2023-01-13 11:11 ` Jan Beulich
  2023-01-13 11:11 ` [PATCH 8/8] x86: split i386-gen's opcode hash entry struct Jan Beulich
  7 siblings, 0 replies; 10+ messages in thread
From: Jan Beulich @ 2023-01-13 11:11 UTC (permalink / raw)
  To: Binutils; +Cc: H.J. Lu

Register names are (including their nul terminators) on average almost 4
bytes long. Otoh no register name is longer than 8 bytes. Hence even for
32-bit builds using a pointer is only slightly more space efficient than
embedding the strings. A level of indirection can be also avoided by
embedding the names as an array of 8 characters directly in the arrays,
and the number of base relocations in libopcodes.so (or PIE builds of
statically linked executables) goes down as well.

To amortize for the otherwise reduced folding of string literals by the
linker, use att_names_seg[] in place of string literals in append_seg()
and OP_ESreg().

--- a/opcodes/i386-dis.c
+++ b/opcodes/i386-dis.c
@@ -2462,48 +2462,48 @@ struct op
    need to update onebyte_has_modrm or twobyte_has_modrm.  */
 #define MODRM_CHECK  if (!ins->need_modrm) abort ()
 
-static const char *const intel_index16[] = {
+static const char intel_index16[][6] = {
   "bx+si", "bx+di", "bp+si", "bp+di", "si", "di", "bp", "bx"
 };
 
-static const char *const att_names64[] = {
+static const char att_names64[][8] = {
   "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
   "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15"
 };
-static const char *const att_names32[] = {
+static const char att_names32[][8] = {
   "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
   "%r8d", "%r9d", "%r10d", "%r11d", "%r12d", "%r13d", "%r14d", "%r15d"
 };
-static const char *const att_names16[] = {
+static const char att_names16[][8] = {
   "%ax", "%cx", "%dx", "%bx", "%sp", "%bp", "%si", "%di",
   "%r8w", "%r9w", "%r10w", "%r11w", "%r12w", "%r13w", "%r14w", "%r15w"
 };
-static const char *const att_names8[] = {
+static const char att_names8[][8] = {
   "%al", "%cl", "%dl", "%bl", "%ah", "%ch", "%dh", "%bh",
 };
-static const char *const att_names8rex[] = {
+static const char att_names8rex[][8] = {
   "%al", "%cl", "%dl", "%bl", "%spl", "%bpl", "%sil", "%dil",
   "%r8b", "%r9b", "%r10b", "%r11b", "%r12b", "%r13b", "%r14b", "%r15b"
 };
-static const char *const att_names_seg[] = {
+static const char att_names_seg[][4] = {
   "%es", "%cs", "%ss", "%ds", "%fs", "%gs", "%?", "%?",
 };
 static const char att_index64[] = "%riz";
 static const char att_index32[] = "%eiz";
-static const char *const att_index16[] = {
+static const char att_index16[][8] = {
   "%bx,%si", "%bx,%di", "%bp,%si", "%bp,%di", "%si", "%di", "%bp", "%bx"
 };
 
-static const char *const att_names_mm[] = {
+static const char att_names_mm[][8] = {
   "%mm0", "%mm1", "%mm2", "%mm3",
   "%mm4", "%mm5", "%mm6", "%mm7"
 };
 
-static const char *const att_names_bnd[] = {
+static const char att_names_bnd[][8] = {
   "%bnd0", "%bnd1", "%bnd2", "%bnd3"
 };
 
-static const char *const att_names_xmm[] = {
+static const char att_names_xmm[][8] = {
   "%xmm0", "%xmm1", "%xmm2", "%xmm3",
   "%xmm4", "%xmm5", "%xmm6", "%xmm7",
   "%xmm8", "%xmm9", "%xmm10", "%xmm11",
@@ -2514,7 +2514,7 @@ static const char *const att_names_xmm[]
   "%xmm28", "%xmm29", "%xmm30", "%xmm31"
 };
 
-static const char *const att_names_ymm[] = {
+static const char att_names_ymm[][8] = {
   "%ymm0", "%ymm1", "%ymm2", "%ymm3",
   "%ymm4", "%ymm5", "%ymm6", "%ymm7",
   "%ymm8", "%ymm9", "%ymm10", "%ymm11",
@@ -2525,7 +2525,7 @@ static const char *const att_names_ymm[]
   "%ymm28", "%ymm29", "%ymm30", "%ymm31"
 };
 
-static const char *const att_names_zmm[] = {
+static const char att_names_zmm[][8] = {
   "%zmm0", "%zmm1", "%zmm2", "%zmm3",
   "%zmm4", "%zmm5", "%zmm6", "%zmm7",
   "%zmm8", "%zmm9", "%zmm10", "%zmm11",
@@ -2536,12 +2536,12 @@ static const char *const att_names_zmm[]
   "%zmm28", "%zmm29", "%zmm30", "%zmm31"
 };
 
-static const char *const att_names_tmm[] = {
+static const char att_names_tmm[][8] = {
   "%tmm0", "%tmm1", "%tmm2", "%tmm3",
   "%tmm4", "%tmm5", "%tmm6", "%tmm7"
 };
 
-static const char *const att_names_mask[] = {
+static const char att_names_mask[][8] = {
   "%k0", "%k1", "%k2", "%k3", "%k4", "%k5", "%k6", "%k7"
 };
 
@@ -11291,22 +11291,22 @@ append_seg (instr_info *ins)
   switch (ins->active_seg_prefix)
     {
     case PREFIX_CS:
-      oappend_register (ins, "%cs");
+      oappend_register (ins, att_names_seg[1]);
       break;
     case PREFIX_DS:
-      oappend_register (ins, "%ds");
+      oappend_register (ins, att_names_seg[3]);
       break;
     case PREFIX_SS:
-      oappend_register (ins, "%ss");
+      oappend_register (ins, att_names_seg[2]);
       break;
     case PREFIX_ES:
-      oappend_register (ins, "%es");
+      oappend_register (ins, att_names_seg[0]);
       break;
     case PREFIX_FS:
-      oappend_register (ins, "%fs");
+      oappend_register (ins, att_names_seg[4]);
       break;
     case PREFIX_GS:
-      oappend_register (ins, "%gs");
+      oappend_register (ins, att_names_seg[5]);
       break;
     default:
       break;
@@ -11649,7 +11649,7 @@ static void
 print_register (instr_info *ins, unsigned int reg, unsigned int rexmask,
 		int bytemode, int sizeflag)
 {
-  const char *const *names;
+  const char (*names)[8];
 
   USED_REX (rexmask);
   if (ins->rex & rexmask)
@@ -11888,7 +11888,7 @@ OP_E_memory (instr_info *ins, int bytemo
 			 || bytemode == bnd_mode
 			 || bytemode == bnd_swap_mode);
       bool check_gather = false;
-      const char *const *indexes = NULL;
+      const char (*indexes)[8] = NULL;
 
       havebase = 1;
       base = ins->modrm.rm;
@@ -12177,8 +12177,8 @@ OP_E_memory (instr_info *ins, int bytemo
       if (ins->modrm.mod != 0 || ins->modrm.rm != 6)
 	{
 	  oappend_char (ins, ins->open_char);
-	  oappend (ins, (ins->intel_syntax ? intel_index16
-			 : att_index16)[ins->modrm.rm]);
+	  oappend (ins, ins->intel_syntax ? intel_index16[ins->modrm.rm]
+					  : att_index16[ins->modrm.rm]);
 	  if (ins->intel_syntax
 	      && (disp || ins->modrm.mod != 0 || ins->modrm.rm == 6))
 	    {
@@ -12801,7 +12801,7 @@ OP_ESreg (instr_info *ins, int code, int
 	  intel_operand_size (ins, b_mode, sizeflag);
 	}
     }
-  oappend_register (ins, "%es");
+  oappend_register (ins, att_names_seg[0]);
   oappend_char (ins, ':');
   ptr_reg (ins, code, sizeflag);
 }
@@ -12898,7 +12898,7 @@ OP_MMX (instr_info *ins, int bytemode AT
 	int sizeflag ATTRIBUTE_UNUSED)
 {
   int reg = ins->modrm.reg;
-  const char *const *names;
+  const char (*names)[8];
 
   ins->used_prefixes |= (ins->prefixes & PREFIX_DATA);
   if (ins->prefixes & PREFIX_DATA)
@@ -12916,7 +12916,7 @@ OP_MMX (instr_info *ins, int bytemode AT
 static void
 print_vector_reg (instr_info *ins, unsigned int reg, int bytemode)
 {
-  const char *const *names;
+  const char (*names)[8];
 
   if (bytemode == xmmq_mode
       || bytemode == evex_half_bcst_xmmqh_mode
@@ -13014,7 +13014,7 @@ static void
 OP_EM (instr_info *ins, int bytemode, int sizeflag)
 {
   int reg;
-  const char *const *names;
+  const char (*names)[8];
 
   if (ins->modrm.mod != 3)
     {
@@ -13370,8 +13370,8 @@ OP_Monitor (instr_info *ins, int bytemod
   /* monitor %{e,r,}ax,%ecx,%edx"  */
   if (!ins->intel_syntax)
     {
-      const char *const *names = (ins->address_mode == mode_64bit
-				  ? att_names64 : att_names32);
+      const char (*names)[8] = (ins->address_mode == mode_64bit
+				? att_names64 : att_names32);
 
       if (ins->prefixes & PREFIX_ADDR)
 	{
@@ -13547,7 +13547,7 @@ CMPXCHG8B_Fixup (instr_info *ins, int by
 static void
 XMM_Fixup (instr_info *ins, int reg, int sizeflag ATTRIBUTE_UNUSED)
 {
-  const char *const *names = att_names_xmm;
+  const char (*names)[8] = att_names_xmm;
 
   if (ins->need_vex)
     {
@@ -13588,7 +13588,7 @@ static void
 OP_VEX (instr_info *ins, int bytemode, int sizeflag ATTRIBUTE_UNUSED)
 {
   int reg, modrm_reg, sib_index = -1;
-  const char *const *names;
+  const char (*names)[8];
 
   if (!ins->need_vex)
     abort ();
@@ -13763,7 +13763,7 @@ static void
 OP_REG_VexI4 (instr_info *ins, int bytemode, int sizeflag ATTRIBUTE_UNUSED)
 {
   int reg;
-  const char *const *names = att_names_xmm;
+  const char (*names)[8] = att_names_xmm;
 
   FETCH_DATA (ins->info, ins->codep + 1);
   reg = *ins->codep++;


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH 8/8] x86: split i386-gen's opcode hash entry struct
  2023-01-13 11:04 [PATCH 0/8] x86: mnemonic and register string literals Jan Beulich
                   ` (6 preceding siblings ...)
  2023-01-13 11:11 ` [PATCH 7/8] x86: embed register and alike names in disassembler Jan Beulich
@ 2023-01-13 11:11 ` Jan Beulich
  7 siblings, 0 replies; 10+ messages in thread
From: Jan Beulich @ 2023-01-13 11:11 UTC (permalink / raw)
  To: Binutils; +Cc: H.J. Lu

All glibc malloc() implementations I've checked have a smallest
allocation size worth of 3 pointers, with an increment worth of 2
pointers. Hence mnemonics with multiple templates can be stored more
efficiently when maintaining the shared "name" field only in the actual
hash entry. (To express the shared nature, also convert "name" to by
pointer-to-const.)

While doing the conversation also pull out common code from the involved
if/else construct in expand_templates().

--- a/opcodes/i386-gen.c
+++ b/opcodes/i386-gen.c
@@ -1293,10 +1293,13 @@ output_i386_opcode (FILE *table, const c
 
 struct opcode_hash_entry
 {
-  struct opcode_hash_entry *next;
-  char *name;
-  char *opcode;
-  int lineno;
+  const char *name;
+  struct opcode_entry
+  {
+    struct opcode_entry *next;
+    char *opcode;
+    int lineno;
+  } entry;
 };
 
 /* Calculate the hash value of an opcode hash entry P.  */
@@ -1432,7 +1435,8 @@ expand_templates (char *name, const char
 {
   static unsigned int idx, opcode_array_size;
   struct opcode_hash_entry **opcode_array = *opcode_array_p;
-  struct opcode_hash_entry **hash_slot, **entry;
+  struct opcode_hash_entry **hash_slot;
+  struct opcode_entry *entry;
   char *ptr1 = strchr(name, '<'), *ptr2;
 
   if (ptr1 == NULL)
@@ -1458,26 +1462,25 @@ expand_templates (char *name, const char
 
 	  opcode_array[idx] = (struct opcode_hash_entry *)
 	    xmalloc (sizeof (struct opcode_hash_entry));
-	  opcode_array[idx]->next = NULL;
 	  opcode_array[idx]->name = xstrdup (name);
-	  opcode_array[idx]->opcode = xstrdup (str);
-	  opcode_array[idx]->lineno = lineno;
 	  *hash_slot = opcode_array[idx];
+	  entry = &opcode_array[idx]->entry;
 	  idx++;
 	}
       else
 	{
 	  /* Append it to the existing one.  */
-	  entry = hash_slot;
-	  while ((*entry) != NULL)
-	    entry = &(*entry)->next;
-	  *entry = (struct opcode_hash_entry *)
-	    xmalloc (sizeof (struct opcode_hash_entry));
-	  (*entry)->next = NULL;
-	  (*entry)->name = (*hash_slot)->name;
-	  (*entry)->opcode = xstrdup (str);
-	  (*entry)->lineno = lineno;
+	  struct opcode_entry **entryp = &(*hash_slot)->entry.next;
+
+	  while (*entryp != NULL)
+	    entryp = &(*entryp)->next;
+	  entry = (struct opcode_entry *)xmalloc (sizeof (struct opcode_entry));
+	  *entryp = entry;
 	}
+
+      entry->next = NULL;
+      entry->opcode = xstrdup (str);
+      entry->lineno = lineno;
     }
   else if ((ptr2 = strchr(ptr1 + 1, '>')) == NULL)
     fail ("%s: %d: missing '>'\n", filename, lineno);
@@ -1599,7 +1602,7 @@ process_i386_opcodes (FILE *table)
   char buf[2048];
   unsigned int i, j, nr, offs;
   size_t l;
-  char *str, *p, *last, *name;
+  char *str, *p, *last;
   htab_t opcode_hash_table;
   struct opcode_hash_entry **opcode_array = NULL;
   int lineno = 0, marker = 0;
@@ -1619,6 +1622,8 @@ process_i386_opcodes (FILE *table)
   /* Put everything on opcode array.  */
   while (!feof (fp))
     {
+      char *name;
+
       if (fgets (buf, sizeof (buf), fp) == NULL)
 	break;
 
@@ -1700,11 +1705,11 @@ process_i386_opcodes (FILE *table)
   /* Process opcode array.  */
   for (j = 0; j < i; j++)
     {
-      struct opcode_hash_entry *next;
+      const char *name = opcode_array[j]->name;
+      struct opcode_entry *next;
 
-      for (next = opcode_array[j]; next; next = next->next)
+      for (next = &opcode_array[j]->entry; next; next = next->next)
 	{
-	  name = next->name;
 	  str = next->opcode;
 	  lineno = next->lineno;
 	  last = str + strlen (str);
@@ -1723,7 +1728,7 @@ process_i386_opcodes (FILE *table)
 
   for (nr = j = 0; j < i; j++)
     {
-      struct opcode_hash_entry *next = opcode_array[j];
+      struct opcode_entry *next = &opcode_array[j]->entry;
 
       do
 	{
@@ -1753,10 +1758,10 @@ process_i386_opcodes (FILE *table)
   str = NULL;
   for (l = strlen (opcode_array[offs = j = 0]->name); j < i; j++)
     {
+      const char *name = opcode_array[j]->name;
       const char *next = NULL;
       size_t l1 = j + 1 < i ? strlen(next = opcode_array[j + 1]->name) : 0;
 
-      name = opcode_array[j]->name;
       if (str == NULL)
 	str = mkident (name);
       if (l < l1 && !strcmp(name, next + l1 - l))


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/8] x86: move insn mnemonics to a separate table
  2023-01-13 11:06 ` [PATCH 1/8] x86: move insn mnemonics to a separate table Jan Beulich
@ 2023-01-13 11:12   ` Jan Beulich
  0 siblings, 0 replies; 10+ messages in thread
From: Jan Beulich @ 2023-01-13 11:12 UTC (permalink / raw)
  To: Binutils; +Cc: H.J. Lu

On 13.01.2023 12:06, Jan Beulich via Binutils wrote:
> Using full pointers to reference the insn mnemonic strings is not very
> efficient. With overall string size presently just slightly over 20k,
> even a 16-bit value would suffice. Use "unsigned int" for now, as
> there's no good use we could presently make of the otherwise saved 16
> bits.
> 
> For 64-bit builds this reduces table size by 6.25% (prior to the recent
> ISA extension additions it would have been 12.5%), with a similar effect
> on cache occupation of table entries accessed. For PIE builds of gas
> this also reduces the number of base relocations quite a bit (obviously
> independent of bitness).
> ---
> An alternative to introducing i386-mnem.h would of course be to put the
> #define-s in i386-init.h. That would look like an abuse of the file to
> me, but I'd be okay switching to such an approach.
> 
> As to further shrinking mnem_off (to 16 bits): I'm intending to drop
> i386_opcode_modifier as a separate struct, embedding the fields directly
> in insn_template. i386_opcode_modifier presently using only 6 bits from
> its 3rd word will allow to shrink insn_template by another word then.
> (This would also benefit readabilty of tc-i386*.c, as all the uses of
> "opcode_modifier." would go away. This would additionally reduce the
> apparent discrepancy between e.g. opcode_modifier.opcode_space and
> opcode_modifier.opcode_prefix vs base_opcode and extension_opcode.)

And of course this is patch 2/8; I'm sorry for the typo.

Jan

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2023-01-13 11:12 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-01-13 11:04 [PATCH 0/8] x86: mnemonic and register string literals Jan Beulich
2023-01-13 11:05 ` [PATCH 1/8] x86: abstract out obtaining of a template's mnemonic Jan Beulich
2023-01-13 11:06 ` [PATCH 1/8] x86: move insn mnemonics to a separate table Jan Beulich
2023-01-13 11:12   ` Jan Beulich
2023-01-13 11:07 ` [PATCH 3/8] x86: re-use insn mnemonic strings as much as possible Jan Beulich
2023-01-13 11:07 ` [PATCH 4/8] x86: absorb allocation in i386-gen Jan Beulich
2023-01-13 11:08 ` [PATCH 5/8] x86: avoid strcmp() in a few places Jan Beulich
2023-01-13 11:10 ` [PATCH 6/8] x86: embed register names in reg_entry Jan Beulich
2023-01-13 11:11 ` [PATCH 7/8] x86: embed register and alike names in disassembler Jan Beulich
2023-01-13 11:11 ` [PATCH 8/8] x86: split i386-gen's opcode hash entry struct Jan Beulich

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).