public inbox for binutils@sourceware.org
 help / color / mirror / Atom feed
* [PATCH v2] x86: pack CPU flags in opcode table
@ 2023-08-04 11:48 Jan Beulich
  0 siblings, 0 replies; only message in thread
From: Jan Beulich @ 2023-08-04 11:48 UTC (permalink / raw)
  To: Binutils; +Cc: H.J. Lu

The table constantly growing in two dimensions (number of table entries
times number of ISA extension flags) doesn't scale very well. Use a more
compact representation: Only identifiers which need to combine with
other identifiers retain individual flag bits. All others are combined
into an enum, with a new helper added to transform the table entries
into the original i386_cpu_flags layout. This way the table in the final
binary shrinks by almost a third (the generated source code shrinks by
about half), and isn't likely to grow again in that dimension any time
soon.

While moving the 3DNow! fields, drop the stray inner 'a' from their
names.
---
If of concern, the 2nd invocation of cpu_flags_from_attr() could be
avoided by having the 1st one latch its result into a new field of "i".
---
v2: Have is_cpu() handle the flag-encoded enumerators as well. Convert
    remaining checks on templates.

--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -1672,6 +1672,51 @@ operand_type_equal (const union i386_ope
     }
 }
 
+static INLINE bool
+is_cpu (const insn_template *t, enum i386_cpu cpu)
+{
+  switch (cpu)
+    {
+    case Cpu287:      return t->cpu.bitfield.cpu287;
+    case Cpu387:      return t->cpu.bitfield.cpu387;
+    case Cpu3dnow:    return t->cpu.bitfield.cpu3dnow;
+    case Cpu3dnowA:   return t->cpu.bitfield.cpu3dnowa;
+    case CpuAVX:      return t->cpu.bitfield.cpuavx;
+    case CpuHLE:      return t->cpu.bitfield.cpuhle;
+    case CpuAVX512F:  return t->cpu.bitfield.cpuavx512f;
+    case CpuAVX512VL: return t->cpu.bitfield.cpuavx512vl;
+    case Cpu64:       return t->cpu.bitfield.cpu64;
+    case CpuNo64:     return t->cpu.bitfield.cpuno64;
+    default:
+      gas_assert (cpu < CpuAttrEnums);
+    }
+  return t->cpu.bitfield.isa == cpu + 1u;
+}
+
+static i386_cpu_flags cpu_flags_from_attr (i386_cpu_attr a)
+{
+  const unsigned int bps = sizeof (a.array[0]) * CHAR_BIT;
+  i386_cpu_flags f = { .array[0] = 0 };
+
+  switch (ARRAY_SIZE(a.array))
+    {
+    case 1:
+      f.array[CpuAttrEnums / bps]
+        |= (a.array[0] >> CpuIsaBits) << (CpuAttrEnums % bps);
+      if (CpuAttrEnums % bps > CpuIsaBits)
+	f.array[CpuAttrEnums / bps + 1]
+	  = (a.array[0] >> CpuIsaBits) >> (bps - CpuAttrEnums % bps);
+      break;
+    default:
+      abort ();
+    }
+
+  if (a.bitfield.isa)
+    f.array[(a.bitfield.isa - 1) / bps] |= 1u << ((a.bitfield.isa - 1) % bps);
+
+  return f;
+}
+
 static INLINE int
 cpu_flags_all_zero (const union i386_cpu_flags *x)
 {
@@ -1731,10 +1776,10 @@ cpu_flags_equal (const union i386_cpu_fl
 }
 
 static INLINE int
-cpu_flags_check_cpu64 (i386_cpu_flags f)
+cpu_flags_check_cpu64 (i386_cpu_attr a)
 {
-  return !((flag_code == CODE_64BIT && f.bitfield.cpuno64)
-	   || (flag_code != CODE_64BIT && f.bitfield.cpu64));
+  return !((flag_code == CODE_64BIT && a.bitfield.cpuno64)
+	   || (flag_code != CODE_64BIT && a.bitfield.cpu64));
 }
 
 static INLINE i386_cpu_flags
@@ -1828,8 +1873,8 @@ static const i386_cpu_flags avx512 = CPU
 static int
 cpu_flags_match (const insn_template *t)
 {
-  i386_cpu_flags x = t->cpu_flags;
-  int match = cpu_flags_check_cpu64 (x) ? CPU_FLAGS_64BIT_MATCH : 0;
+  i386_cpu_flags x = cpu_flags_from_attr (t->cpu);
+  int match = cpu_flags_check_cpu64 (t->cpu) ? CPU_FLAGS_64BIT_MATCH : 0;
 
   x.bitfield.cpu64 = 0;
   x.bitfield.cpuno64 = 0;
@@ -2171,7 +2216,7 @@ operand_size_match (const insn_template
 
       /* For FMA4 and XOP insns VEX.W controls just the first two
 	 register operands.  */
-      if (t->cpu_flags.bitfield.cpufma4 || t->cpu_flags.bitfield.cpuxop)
+      if (is_cpu (t, CpuFMA4) || is_cpu (t, CpuXOP))
 	given = j < 2 ? 1 - j : j;
 
       if (t->operand_types[j].bitfield.class == Reg
@@ -4416,7 +4461,7 @@ optimize_encoding (void)
 		   && is_evex_encoding (&i.tm)
 		   && (i.vec_encoding != vex_encoding_evex
 		       || cpu_arch_isa_flags.bitfield.cpuavx512vl
-		       || i.tm.cpu_flags.bitfield.cpuavx512vl
+		       || is_cpu (&i.tm, CpuAVX512VL)
 		       || (i.tm.operand_types[2].bitfield.zmmword
 			   && i.types[2].bitfield.ymmword))))
 	   && i.tm.opcode_space == SPACE_0F
@@ -5222,7 +5267,7 @@ md_assemble (char *line)
 	 bypass the logic below when easily possible.  */
       && t->opcode_space >= SPACE_0F
       && t->opcode_space <= SPACE_0F3A
-      && !i.tm.cpu_flags.bitfield.cpusse4a
+      && !is_cpu (&i.tm, CpuSSE4a)
       && !is_any_vex_encoding (t))
     {
       bool simd = false;
@@ -5309,7 +5354,7 @@ md_assemble (char *line)
   if (i.notrack_prefix && i.tm.opcode_modifier.prefixok != PrefixNoTrack)
     as_bad (_("expecting indirect branch instruction after `notrack'"));
 
-  if (i.tm.cpu_flags.bitfield.cpumpx)
+  if (is_cpu (&i.tm, CpuMPX))
     {
       if (flag_code == CODE_64BIT && i.prefix[ADDR_PREFIX])
 	as_bad (_("32-bit address isn't allowed in 64-bit MPX instructions."));
@@ -5361,7 +5406,7 @@ md_assemble (char *line)
     return;
 
   /* Check if IP-relative addressing requirements can be satisfied.  */
-  if (i.tm.cpu_flags.bitfield.cpuprefetchi
+  if (is_cpu (&i.tm, CpuPREFETCHI)
       && !(i.base_reg && i.base_reg->reg_num == RegIP))
     as_warn (_("'%s' only supports RIP-relative address"), insn_name (&i.tm));
 
@@ -5624,7 +5669,7 @@ parse_insn (const char *line, char *mnem
 	  && current_templates
 	  && current_templates->start->opcode_modifier.isprefix)
 	{
-	  if (!cpu_flags_check_cpu64 (current_templates->start->cpu_flags))
+	  if (!cpu_flags_check_cpu64 (current_templates->start->cpu))
 	    {
 	      as_bad ((flag_code != CODE_64BIT
 		       ? _("`%s' is only supported in 64-bit mode")
@@ -5702,13 +5747,13 @@ parse_insn (const char *line, char *mnem
 		case PREFIX_EXIST:
 		  return NULL;
 		case PREFIX_DS:
-		  if (current_templates->start->cpu_flags.bitfield.cpuibt)
+		  if (is_cpu (current_templates->start, CpuIBT))
 		    i.notrack_prefix = insn_name (current_templates->start);
 		  break;
 		case PREFIX_REP:
-		  if (current_templates->start->cpu_flags.bitfield.cpuhle)
+		  if (is_cpu (current_templates->start, CpuHLE))
 		    i.hle_prefix = insn_name (current_templates->start);
-		  else if (current_templates->start->cpu_flags.bitfield.cpumpx)
+		  else if (is_cpu (current_templates->start, CpuMPX))
 		    i.bnd_prefix = insn_name (current_templates->start);
 		  else
 		    i.rep_prefix = insn_name (current_templates->start);
@@ -6379,9 +6424,9 @@ check_VecOperands (const insn_template *
      operand size is YMMword or XMMword.  Since this function runs after
      template matching, there's no need to check for YMMword/XMMword in
      the template.  */
-  cpu = cpu_flags_and (t->cpu_flags, avx512);
+  cpu = cpu_flags_and (cpu_flags_from_attr (t->cpu), avx512);
   if (!cpu_flags_all_zero (&cpu)
-      && !t->cpu_flags.bitfield.cpuavx512vl
+      && !is_cpu (t, CpuAVX512VL)
       && !cpu_arch_flags.bitfield.cpuavx512vl)
     {
       for (op = 0; op < t->operands; ++op)
@@ -6398,8 +6443,7 @@ check_VecOperands (const insn_template *
 
   /* Somewhat similarly, templates specifying both AVX and AVX2 are
      requiring AVX2 support if the actual operand size is YMMword.  */
-  if (t->cpu_flags.bitfield.cpuavx
-      && t->cpu_flags.bitfield.cpuavx2
+  if (is_cpu (t, CpuAVX) && is_cpu (t, CpuAVX2)
       && !cpu_arch_flags.bitfield.cpuavx2)
     {
       for (op = 0; op < t->operands; ++op)
@@ -6675,7 +6719,7 @@ check_VecOperands (const insn_template *
     }
 
   /* Check the special Imm4 cases; must be the first operand.  */
-  if (t->cpu_flags.bitfield.cpuxop && t->operands == 5)
+  if (is_cpu (t, CpuXOP) && t->operands == 5)
     {
       if (i.op[0].imms->X_op != O_constant
 	  || !fits_in_imm4 (i.op[0].imms->X_add_number))
@@ -7143,8 +7187,8 @@ match_template (char mnem_suffix)
 	      if (!(size_match & MATCH_REVERSE))
 		continue;
 	      /* Try reversing direction of operands.  */
-	      j = t->cpu_flags.bitfield.cpufma4
-		  || t->cpu_flags.bitfield.cpuxop ? 1 : i.operands - 1;
+	      j = is_cpu (t, CpuFMA4)
+		  || is_cpu (t, CpuXOP) ? 1 : i.operands - 1;
 	      overlap0 = operand_type_and (i.types[0], operand_types[j]);
 	      overlap1 = operand_type_and (i.types[j], operand_types[0]);
 	      overlap2 = operand_type_and (i.types[1], operand_types[1]);
@@ -7178,8 +7222,7 @@ match_template (char mnem_suffix)
 		      && (intel_syntax || intel_mnemonic))
 		    found_reverse_match |= Opcode_FloatR;
 		}
-	      else if (t->cpu_flags.bitfield.cpufma4
-		       || t->cpu_flags.bitfield.cpuxop)
+	      else if (is_cpu (t, CpuFMA4) || is_cpu (t, CpuXOP))
 		{
 		  found_reverse_match = Opcode_VexW;
 		  goto check_operands_345;
@@ -7399,7 +7442,7 @@ process_suffix (void)
 		 && (i.tm.base_opcode | 8) == 0xbe)
 		|| (i.tm.opcode_space == SPACE_BASE
 		    && i.tm.base_opcode == 0x63
-		    && i.tm.cpu_flags.bitfield.cpu64);
+		    && is_cpu (&i.tm, Cpu64));
 
       /* movsx/movzx want only their source operand considered here, for the
 	 ambiguity checking below.  The suffix will be replaced afterwards
@@ -9401,7 +9444,7 @@ maybe_fused_with_jcc_p (enum mf_cmp_kind
     }
 
   /* inc, dec without inc/dec m.   */
-  if ((i.tm.cpu_flags.bitfield.cpuno64
+  if ((is_cpu (&i.tm, CpuNo64)
        && (i.tm.base_opcode | 0xf) == 0x4f)
       || ((i.tm.base_opcode | 1) == 0xff
 	  && i.tm.extension_opcode <= 0x1))
@@ -9449,7 +9492,7 @@ add_branch_prefix_frag_p (void)
   if (!align_branch_power
       || !align_branch_prefix_size
       || now_seg == absolute_section
-      || i.tm.cpu_flags.bitfield.cpupadlock
+      || is_cpu (&i.tm, CpuPadLock)
       || !cpu_arch_flags.bitfield.cpui386)
     return 0;
 
@@ -9577,14 +9620,14 @@ output_insn (void)
   if (IS_ELF && x86_used_note && now_seg != absolute_section)
     {
       if ((i.xstate & xstate_tmm) == xstate_tmm
-	  || i.tm.cpu_flags.bitfield.cpuamx_tile)
+	  || is_cpu (&i.tm, CpuAMX_TILE))
 	x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_TMM;
 
-      if (i.tm.cpu_flags.bitfield.cpu8087
-	  || i.tm.cpu_flags.bitfield.cpu287
-	  || i.tm.cpu_flags.bitfield.cpu387
-	  || i.tm.cpu_flags.bitfield.cpu687
-	  || i.tm.cpu_flags.bitfield.cpufisttp)
+      if (is_cpu (&i.tm, Cpu8087)
+	  || is_cpu (&i.tm, Cpu287)
+	  || is_cpu (&i.tm, Cpu387)
+	  || is_cpu (&i.tm, Cpu687)
+	  || is_cpu (&i.tm, CpuFISTTP))
 	x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_X87;
 
       if ((i.xstate & xstate_mmx)
@@ -9603,16 +9646,16 @@ output_insn (void)
 	}
 
       /* vzeroall / vzeroupper */
-      if (i.tm.base_opcode == 0x77 && i.tm.cpu_flags.bitfield.cpuavx)
+      if (i.tm.base_opcode == 0x77 && is_cpu (&i.tm, CpuAVX))
 	i.xstate |= xstate_ymm;
 
       if ((i.xstate & xstate_xmm)
 	  /* ldmxcsr / stmxcsr / vldmxcsr / vstmxcsr */
 	  || (i.tm.base_opcode == 0xae
-	      && (i.tm.cpu_flags.bitfield.cpusse
-		  || i.tm.cpu_flags.bitfield.cpuavx))
-	  || i.tm.cpu_flags.bitfield.cpuwidekl
-	  || i.tm.cpu_flags.bitfield.cpukl)
+	      && (is_cpu (&i.tm, CpuSSE)
+		  || is_cpu (&i.tm, CpuAVX)))
+	  || is_cpu (&i.tm, CpuWideKL)
+	  || is_cpu (&i.tm, CpuKL))
 	x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XMM;
 
       if ((i.xstate & xstate_ymm) == xstate_ymm)
@@ -9621,65 +9664,65 @@ output_insn (void)
 	x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_ZMM;
       if (i.mask.reg || (i.xstate & xstate_mask) == xstate_mask)
 	x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_MASK;
-      if (i.tm.cpu_flags.bitfield.cpufxsr)
+      if (is_cpu (&i.tm, CpuFXSR))
 	x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_FXSR;
-      if (i.tm.cpu_flags.bitfield.cpuxsave)
+      if (is_cpu (&i.tm, CpuXsave))
 	x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVE;
-      if (i.tm.cpu_flags.bitfield.cpuxsaveopt)
+      if (is_cpu (&i.tm, CpuXsaveopt))
 	x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVEOPT;
-      if (i.tm.cpu_flags.bitfield.cpuxsavec)
+      if (is_cpu (&i.tm, CpuXSAVEC))
 	x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVEC;
 
       if (x86_feature_2_used
-	  || i.tm.cpu_flags.bitfield.cpucmov
-	  || i.tm.cpu_flags.bitfield.cpusyscall
+	  || is_cpu (&i.tm, CpuCMOV)
+	  || is_cpu (&i.tm, CpuSYSCALL)
 	  || i.tm.mnem_off == MN_cmpxchg8b)
 	x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_BASELINE;
-      if (i.tm.cpu_flags.bitfield.cpusse3
-	  || i.tm.cpu_flags.bitfield.cpussse3
-	  || i.tm.cpu_flags.bitfield.cpusse4_1
-	  || i.tm.cpu_flags.bitfield.cpusse4_2
-	  || i.tm.cpu_flags.bitfield.cpucx16
-	  || i.tm.cpu_flags.bitfield.cpupopcnt
+      if (is_cpu (&i.tm, CpuSSE3)
+	  || is_cpu (&i.tm, CpuSSSE3)
+	  || is_cpu (&i.tm, CpuSSE4_1)
+	  || is_cpu (&i.tm, CpuSSE4_2)
+	  || is_cpu (&i.tm, CpuCX16)
+	  || is_cpu (&i.tm, CpuPOPCNT)
 	  /* LAHF-SAHF insns in 64-bit mode.  */
 	  || (flag_code == CODE_64BIT
 	      && (i.tm.base_opcode | 1) == 0x9f
 	      && i.tm.opcode_space == SPACE_BASE))
 	x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_V2;
-      if (i.tm.cpu_flags.bitfield.cpuavx
-	  || i.tm.cpu_flags.bitfield.cpuavx2
+      if (is_cpu (&i.tm, CpuAVX)
+	  || is_cpu (&i.tm, CpuAVX2)
 	  /* Any VEX encoded insns execpt for AVX512F, AVX512BW, AVX512DQ,
 	     XOP, FMA4, LPW, TBM, and AMX.  */
 	  || (i.tm.opcode_modifier.vex
-	      && !i.tm.cpu_flags.bitfield.cpuavx512f
-	      && !i.tm.cpu_flags.bitfield.cpuavx512bw
-	      && !i.tm.cpu_flags.bitfield.cpuavx512dq
-	      && !i.tm.cpu_flags.bitfield.cpuxop
-	      && !i.tm.cpu_flags.bitfield.cpufma4
-	      && !i.tm.cpu_flags.bitfield.cpulwp
-	      && !i.tm.cpu_flags.bitfield.cputbm
+	      && !is_cpu (&i.tm, CpuAVX512F)
+	      && !is_cpu (&i.tm, CpuAVX512BW)
+	      && !is_cpu (&i.tm, CpuAVX512DQ)
+	      && !is_cpu (&i.tm, CpuXOP)
+	      && !is_cpu (&i.tm, CpuFMA4)
+	      && !is_cpu (&i.tm, CpuLWP)
+	      && !is_cpu (&i.tm, CpuTBM)
 	      && !(x86_feature_2_used & GNU_PROPERTY_X86_FEATURE_2_TMM))
-	  || i.tm.cpu_flags.bitfield.cpuf16c
-	  || i.tm.cpu_flags.bitfield.cpufma
-	  || i.tm.cpu_flags.bitfield.cpulzcnt
-	  || i.tm.cpu_flags.bitfield.cpumovbe
-	  || i.tm.cpu_flags.bitfield.cpuxsaves
+	  || is_cpu (&i.tm, CpuF16C)
+	  || is_cpu (&i.tm, CpuFMA)
+	  || is_cpu (&i.tm, CpuLZCNT)
+	  || is_cpu (&i.tm, CpuMovbe)
+	  || is_cpu (&i.tm, CpuXSAVES)
 	  || (x86_feature_2_used
 	      & (GNU_PROPERTY_X86_FEATURE_2_XSAVE
 		 | GNU_PROPERTY_X86_FEATURE_2_XSAVEOPT
 		 | GNU_PROPERTY_X86_FEATURE_2_XSAVEC)) != 0)
 	x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_V3;
-      if (i.tm.cpu_flags.bitfield.cpuavx512f
-	  || i.tm.cpu_flags.bitfield.cpuavx512bw
-	  || i.tm.cpu_flags.bitfield.cpuavx512dq
-	  || i.tm.cpu_flags.bitfield.cpuavx512vl
+      if (is_cpu (&i.tm, CpuAVX512F)
+	  || is_cpu (&i.tm, CpuAVX512BW)
+	  || is_cpu (&i.tm, CpuAVX512DQ)
+	  || is_cpu (&i.tm, CpuAVX512VL)
 	  /* Any EVEX encoded insns except for AVX512ER, AVX512PF,
 	     AVX512-4FMAPS, and AVX512-4VNNIW.  */
 	  || (i.tm.opcode_modifier.evex
-	      && !i.tm.cpu_flags.bitfield.cpuavx512er
-	      && !i.tm.cpu_flags.bitfield.cpuavx512pf
-	      && !i.tm.cpu_flags.bitfield.cpuavx512_4fmaps
-	      && !i.tm.cpu_flags.bitfield.cpuavx512_4vnniw))
+	      && !is_cpu (&i.tm, CpuAVX512ER)
+	      && !is_cpu (&i.tm, CpuAVX512PF)
+	      && !is_cpu (&i.tm, CpuAVX512_4FMAPS)
+	      && !is_cpu (&i.tm, CpuAVX512_4VNNIW)))
 	x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_V4;
     }
 #endif
@@ -9824,7 +9867,7 @@ output_insn (void)
 	      add_prefix (0xf2);
 	      break;
 	    case PREFIX_0XF3:
-	      if (!i.tm.cpu_flags.bitfield.cpupadlock
+	      if (!is_cpu (&i.tm, CpuPadLock)
 		  || (i.prefix[REP_PREFIX] != 0xf3))
 		add_prefix (0xf3);
 	      break;
@@ -12127,7 +12170,7 @@ i386_addressing_mode (void)
   if (i.prefix[ADDR_PREFIX])
     addr_mode = flag_code == CODE_32BIT ? CODE_16BIT : CODE_32BIT;
   else if (flag_code == CODE_16BIT
-	   && current_templates->start->cpu_flags.bitfield.cpumpx
+	   && is_cpu (current_templates->start, CpuMPX)
 	   /* Avoid replacing the "16-bit addressing not allowed" diagnostic
 	      from md_assemble() by "is not a valid base/index expression"
 	      when there is a base and/or index.  */
--- a/opcodes/i386-gen.c
+++ b/opcodes/i386-gen.c
@@ -774,21 +774,41 @@ add_isa_dependencies (bitfield *flags, c
 
 static void
 output_cpu_flags (FILE *table, bitfield *flags, unsigned int size,
-		  int macro, const char *comma, const char *indent)
+		  int macro, const char *comma, const char *indent, int lineno)
 {
-  unsigned int i;
+  unsigned int i = 0, j = 0;
 
   memset (&active_cpu_flags, 0, sizeof(active_cpu_flags));
 
   fprintf (table, "%s{ { ", indent);
 
-  for (i = 0; i < size - 1; i++)
+  if (!macro)
     {
-      if (((i + 1) % 20) != 0)
+      for (j = ~0u; i < CpuAttrEnums; i++)
+	{
+	  if (!flags[i].value)
+	    continue;
+
+	  if (j < ~0u)
+	    fail ("%s: %d: invalid combination of CPU identifiers\n",
+		  filename, lineno);
+	  j = i;
+	  active_cpu_flags.array[i / 32] |= 1U << (i % 32);
+	}
+
+	/* Write 0 to indicate "no associated flag".  */
+	fprintf (table, "%u, ", j + 1);
+
+	j = 1;
+    }
+
+  for (; i < size - 1; i++, j++)
+    {
+      if (((j + 1) % 20) != 0)
 	fprintf (table, "%d, ", flags[i].value);
       else
 	fprintf (table, "%d,", flags[i].value);
-      if (((i + 1) % 20) == 0)
+      if (((j + 1) % 20) == 0)
 	{
 	  /* We need \\ for macro.  */
 	  if (macro)
@@ -899,7 +919,7 @@ process_i386_cpu_flag (FILE *table, char
     }
 
   output_cpu_flags (table, flags, ARRAY_SIZE (flags), name != NULL,
-		    comma, indent);
+		    comma, indent, lineno);
 }
 
 static void
@@ -2073,6 +2093,16 @@ main (int argc, char **argv)
     fail ("%d unused bits in i386_cpu_flags.\n", c);
 #endif
 
+  /* If this triggers, CpuIsaBits needs to be increased.  */
+  static_assert (CpuAttrEnums <= (1u << CpuIsaBits));
+
+  /* Check the unused bitfield in i386_cpu_attr.  */
+#ifndef CpuAttrUnused
+  c = CpuAttrNumOfBits - (CpuIsaBits + CpuMax + 1 - CpuAttrEnums);
+  if (c)
+    fail ("%d unused bits in i386_cpu_attr.\n", c);
+#endif
+
   static_assert (ARRAY_SIZE (opcode_modifiers) == Opcode_Modifier_Num);
 
   /* Check the unused bitfield in i386_operand_type.  */
--- a/opcodes/i386-opc.h
+++ b/opcodes/i386-opc.h
@@ -26,7 +26,7 @@
 
 /* Position of cpu flags bitfiled.  */
 
-enum
+enum i386_cpu
 {
   /* i186 or better required */
   Cpu186 = 0,
@@ -52,10 +52,6 @@ enum
   CpuSYSCALL,
   /* Floating point support required */
   Cpu8087,
-  /* i287 support required */
-  Cpu287,
-  /* i387 support required */
-  Cpu387,
   /* i686 and floating point support required */
   Cpu687,
   /* SSE3 and floating point support required */
@@ -66,10 +62,6 @@ enum
   CpuSSE,
   /* SSE2 support required */
   CpuSSE2,
-  /* 3dnow! support required */
-  Cpu3dnow,
-  /* 3dnow! Extensions support required */
-  Cpu3dnowA,
   /* SSE3 support required */
   CpuSSE3,
   /* VIA PadLock required */
@@ -94,12 +86,8 @@ enum
   CpuSSE4_1,
   /* SSE4.2 support required */
   CpuSSE4_2,
-  /* AVX support required */
-  CpuAVX,
   /* AVX2 support required */
   CpuAVX2,
-  /* Intel AVX-512 Foundation Instructions support required */
-  CpuAVX512F,
   /* Intel AVX-512 Conflict Detection Instructions support required */
   CpuAVX512CD,
   /* Intel AVX-512 Exponential and Reciprocal Instructions support
@@ -107,8 +95,6 @@ enum
   CpuAVX512ER,
   /* Intel AVX-512 Prefetch Instructions support required */
   CpuAVX512PF,
-  /* Intel AVX-512 VL Instructions support required.  */
-  CpuAVX512VL,
   /* Intel AVX-512 DQ Instructions support required.  */
   CpuAVX512DQ,
   /* Intel AVX-512 BW Instructions support required.  */
@@ -153,8 +139,6 @@ enum
   CpuF16C,
   /* Intel BMI2 support required */
   CpuBMI2,
-  /* HLE support required */
-  CpuHLE,
   /* RTM support required */
   CpuRTM,
   /* INVPCID Instructions required */
@@ -313,12 +297,33 @@ enum
   /* RMPQUERY instruction required */
   CpuRMPQUERY,
 
-  /* NOTE: These last three items need to remain last and in this order. */
+  /* NOTE: These items, which can be combined with other ISA flags above, need
+     to remain second to last and in sync with CPU_FLAGS_COMMON. */
 
+  /* i287 support required */
+  Cpu287,
+  CpuAttrEnums = Cpu287,
+  /* i387 support required */
+  Cpu387,
+  /* 3dnow! support required */
+  Cpu3dnow,
+  /* 3dnow! Extensions support required */
+  Cpu3dnowA,
+  /* AVX support required */
+  CpuAVX,
+  /* HLE support required */
+  CpuHLE,
+  /* Intel AVX-512 Foundation Instructions support required */
+  CpuAVX512F,
+  /* Intel AVX-512 VL Instructions support required.  */
+  CpuAVX512VL,
   /* 64bit support required  */
   Cpu64,
   /* Not supported in the 64bit mode  */
   CpuNo64,
+
+  /* NOTE: This item needs to remain last. */
+
   /* The last bitfield in i386_cpu_flags.  */
   CpuMax = CpuNo64
 };
@@ -328,9 +333,42 @@ enum
 #define CpuNumOfBits \
   (CpuNumOfUints * sizeof (unsigned int) * CHAR_BIT)
 
-/* If you get a compiler error for zero width of the unused field,
-   comment it out.  */
+#define CpuIsaBits 8
+#define CpuAttrNumOfUints \
+  ((CpuIsaBits + CpuMax - CpuAttrEnums) / sizeof (unsigned int) / CHAR_BIT + 1)
+#define CpuAttrNumOfBits \
+  (CpuAttrNumOfUints * sizeof (unsigned int) * CHAR_BIT)
+
+/* If you get a compiler error for zero width of an unused field,
+   comment the respective one out.  */
 #define CpuUnused	(CpuMax + 1)
+#define CpuAttrUnused	(CpuIsaBits + CpuMax + 1 - CpuAttrEnums)
+
+#define CPU_FLAGS_COMMON \
+      unsigned int cpu287:1, \
+		   cpu387:1, \
+		   cpu3dnow:1, \
+		   cpu3dnowa:1, \
+		   cpuavx:1, \
+		   cpuhle:1, \
+		   cpuavx512f:1, \
+		   cpuavx512vl:1, \
+      /* NOTE: These two fields need to remain last and in this order. */ \
+		   cpu64:1, \
+		   cpuno64:1
+
+typedef union i386_cpu_attr
+{
+  struct
+    {
+      unsigned int isa:CpuIsaBits;
+      CPU_FLAGS_COMMON;
+#ifdef CpuAttrUnused
+      unsigned int unused:(CpuAttrNumOfBits - CpuAttrUnused);
+#endif
+    } bitfield;
+  unsigned int array[CpuAttrNumOfUints];
+} i386_cpu_attr;
 
 /* We can check if an instruction is available with array instead
    of bitfield. */
@@ -350,15 +388,11 @@ typedef union i386_cpu_flags
       unsigned int cpunop:1;
       unsigned int cpusyscall:1;
       unsigned int cpu8087:1;
-      unsigned int cpu287:1;
-      unsigned int cpu387:1;
       unsigned int cpu687:1;
       unsigned int cpufisttp:1;
       unsigned int cpummx:1;
       unsigned int cpusse:1;
       unsigned int cpusse2:1;
-      unsigned int cpua3dnow:1;
-      unsigned int cpua3dnowa:1;
       unsigned int cpusse3:1;
       unsigned int cpupadlock:1;
       unsigned int cpusvme:1;
@@ -371,13 +405,10 @@ typedef union i386_cpu_flags
       unsigned int cpumonitor:1;
       unsigned int cpusse4_1:1;
       unsigned int cpusse4_2:1;
-      unsigned int cpuavx:1;
       unsigned int cpuavx2:1;
-      unsigned int cpuavx512f:1;
       unsigned int cpuavx512cd:1;
       unsigned int cpuavx512er:1;
       unsigned int cpuavx512pf:1;
-      unsigned int cpuavx512vl:1;
       unsigned int cpuavx512dq:1;
       unsigned int cpuavx512bw:1;
       unsigned int cpuiamcu:1;
@@ -400,7 +431,6 @@ typedef union i386_cpu_flags
       unsigned int cpurdrnd:1;
       unsigned int cpuf16c:1;
       unsigned int cpubmi2:1;
-      unsigned int cpuhle:1;
       unsigned int cpurtm:1;
       unsigned int cpuinvpcid:1;
       unsigned int cpuvmfunc:1;
@@ -480,9 +510,7 @@ typedef union i386_cpu_flags
       unsigned int cputlbsync:1;
       unsigned int cpusnp:1;
       unsigned int cpurmpquery:1;
-      /* NOTE: These last three fields need to remain last and in this order. */
-      unsigned int cpu64:1;
-      unsigned int cpuno64:1;
+      CPU_FLAGS_COMMON;
 #ifdef CpuUnused
       unsigned int unused:(CpuNumOfBits - CpuUnused);
 #endif
@@ -972,8 +1000,8 @@ typedef struct insn_template
      the same instruction */
   i386_opcode_modifier opcode_modifier;
 
-  /* cpu feature flags */
-  i386_cpu_flags cpu_flags;
+  /* cpu feature attributes */
+  i386_cpu_attr cpu;
 
   /* operand_types[i] describes the type of operand i.  This is made
      by OR'ing together all of the possible type masks.  (e.g.

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-08-04 11:48 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-08-04 11:48 [PATCH v2] x86: pack CPU flags in opcode table Jan Beulich

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).