[PATCH 0/4] x86/APX: misc adjustments

public inbox for binutils@sourceware.org
 help / color / mirror / Atom feed

* [PATCH 0/4] x86/APX: misc adjustments
@ 2024-02-16  9:56 Jan Beulich
  2024-02-16  9:57 ` [PATCH 1/4] x86: rename vec_encoding and vex_encoding_* Jan Beulich
                   ` (3 more replies)
  0 siblings, 4 replies; 14+ messages in thread
From: Jan Beulich @ 2024-02-16  9:56 UTC (permalink / raw)
  To: Binutils; +Cc: H.J. Lu, Lili Cui

In part doing what was originally asked for during review.

1: rename vec_encoding and vex_encoding_*
2: APX: respect {vex}/{vex3}
3: APX: correct .insn opcode space determination when REX2 is needed
4: APX: optimize certain XOR and SUB forms

Jan

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 1/4] x86: rename vec_encoding and vex_encoding_*
  2024-02-16  9:56 [PATCH 0/4] x86/APX: misc adjustments Jan Beulich
@ 2024-02-16  9:57 ` Jan Beulich
  2024-02-18  5:59   ` Cui, Lili
  2024-02-16  9:58 ` [PATCH 2/4] x86/APX: respect {vex}/{vex3} Jan Beulich
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 14+ messages in thread
From: Jan Beulich @ 2024-02-16  9:57 UTC (permalink / raw)
  To: Binutils; +Cc: H.J. Lu, Lili Cui

Even with just VEX these weren't limited to vector insns. With APX the
set of non-vector ones covered has greatly increased. Drop the vec_
prefix. Also drop the vex_ ones off of the enumerators, as they weren't
appropriate anyway: Should have been vec_ then, too.

--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -445,16 +445,16 @@ struct _i386_insn
     /* Disable instruction size optimization.  */
     bool no_optimize;
 
-    /* How to encode vector instructions.  */
+    /* How to encode instructions.  */
     enum
       {
-	vex_encoding_default = 0,
-	vex_encoding_vex,
-	vex_encoding_vex3,
-	vex_encoding_evex,
-	vex_encoding_evex512,
-	vex_encoding_error
-      } vec_encoding;
+	encoding_default = 0,
+	encoding_vex,
+	encoding_vex3,
+	encoding_evex,
+	encoding_evex512,
+	encoding_error
+      } encoding;
 
     /* REP prefix.  */
     const char *rep_prefix;
@@ -1885,8 +1885,8 @@ static const i386_cpu_flags avx512 = CPU
 
 static INLINE bool need_evex_encoding (const insn_template *t)
 {
-  return i.vec_encoding == vex_encoding_evex
-	|| i.vec_encoding == vex_encoding_evex512
+  return i.encoding == encoding_evex
+	|| i.encoding == encoding_evex512
 	|| (t->opcode_modifier.vex && i.has_egpr)
 	|| i.mask.reg;
 }
@@ -2489,7 +2489,7 @@ static INLINE int
 fits_in_imm4 (offsetT num)
 {
   /* Despite the name, check for imm3 if we're dealing with EVEX.  */
-  return (num & (i.vec_encoding != vex_encoding_evex ? 0xf : 7)) == num;
+  return (num & (i.encoding != encoding_evex ? 0xf : 7)) == num;
 }
 
 static i386_operand_type
@@ -3800,7 +3800,7 @@ build_vex_prefix (const insn_template *t
   /* Use 2-byte VEX prefix by swapping destination and source operand
      if there are more than 1 register operand.  */
   if (i.reg_operands > 1
-      && i.vec_encoding != vex_encoding_vex3
+      && i.encoding != encoding_vex3
       && i.dir_encoding == dir_encoding_default
       && i.operands == i.reg_operands
       && operand_type_equal (&i.types[0], &i.types[i.operands - 1])
@@ -3829,7 +3829,7 @@ build_vex_prefix (const insn_template *t
   /* Use 2-byte VEX prefix by swapping commutative source operands if there
      are no memory operands and at least 3 register ones.  */
   if (i.reg_operands >= 3
-      && i.vec_encoding != vex_encoding_vex3
+      && i.encoding != encoding_vex3
       && i.reg_operands == i.operands - i.imm_operands
       && i.tm.opcode_modifier.vex
       && i.tm.opcode_modifier.commutative
@@ -3892,7 +3892,7 @@ build_vex_prefix (const insn_template *t
 
   /* Use 2-byte VEX prefix if possible.  */
   if (w == 0
-      && i.vec_encoding != vex_encoding_vex3
+      && i.encoding != encoding_vex3
       && i.tm.opcode_space == SPACE_0F
       && (i.rex & (REX_W | REX_X | REX_B)) == 0)
     {
@@ -4758,7 +4758,7 @@ optimize_encoding (void)
 	   && (i.tm.opcode_modifier.vex
 	       || ((!i.mask.reg || i.mask.zeroing)
 		   && i.tm.opcode_modifier.evex
-		   && (i.vec_encoding != vex_encoding_evex
+		   && (i.encoding != encoding_evex
 		       || cpu_arch_isa_flags.bitfield.cpuavx512vl
 		       || is_cpu (&i.tm, CpuAVX512VL)
 		       || (i.tm.operand_types[2].bitfield.zmmword
@@ -4808,12 +4808,12 @@ optimize_encoding (void)
        */
       if (i.tm.opcode_modifier.evex)
 	{
-	  if (i.vec_encoding != vex_encoding_evex)
+	  if (i.encoding != encoding_evex)
 	    {
 	      i.tm.opcode_modifier.vex = VEX128;
 	      i.tm.opcode_modifier.vexw = VEXW0;
 	      i.tm.opcode_modifier.evex = 0;
-	      i.vec_encoding = vex_encoding_vex;
+	      i.encoding = encoding_vex;
 	      i.mask.reg = NULL;
 	    }
 	  else if (optimize > 1)
@@ -4836,7 +4836,7 @@ optimize_encoding (void)
 	    i.types[j].bitfield.ymmword = 0;
 	  }
     }
-  else if (i.vec_encoding != vex_encoding_evex
+  else if (i.encoding != encoding_evex
 	   && !i.types[0].bitfield.zmmword
 	   && !i.types[1].bitfield.zmmword
 	   && !i.mask.reg
@@ -4978,7 +4978,7 @@ optimize_encoding (void)
 	   && i.tm.opcode_modifier.vex
 	   && !(i.op[0].regs->reg_flags & RegRex)
 	   && i.op[0].regs->reg_type.bitfield.xmmword
-	   && i.vec_encoding != vex_encoding_vex3)
+	   && i.encoding != encoding_vex3)
     {
       /* Optimize: -Os:
          vpbroadcastq %xmmN, %xmmM  -> vpunpcklqdq %xmmN, %xmmN, %xmmM (N < 8)
@@ -6839,10 +6839,10 @@ md_assemble (char *line)
   if (optimize && !i.no_optimize && i.tm.opcode_modifier.optimize)
     optimize_encoding ();
 
-  /* Past optimization there's no need to distinguish vex_encoding_evex and
-     vex_encoding_evex512 anymore.  */
-  if (i.vec_encoding == vex_encoding_evex512)
-    i.vec_encoding = vex_encoding_evex;
+  /* Past optimization there's no need to distinguish encoding_evex and
+     encoding_evex512 anymore.  */
+  if (i.encoding == encoding_evex512)
+    i.encoding = encoding_evex;
 
   if (use_unaligned_vector_move)
     encode_with_unaligned_vector_move ();
@@ -7136,15 +7136,15 @@ parse_insn (const char *line, char *mnem
 		  break;
 		case Prefix_VEX:
 		  /* {vex} */
-		  i.vec_encoding = vex_encoding_vex;
+		  i.encoding = encoding_vex;
 		  break;
 		case Prefix_VEX3:
 		  /* {vex3} */
-		  i.vec_encoding = vex_encoding_vex3;
+		  i.encoding = encoding_vex3;
 		  break;
 		case Prefix_EVEX:
 		  /* {evex} */
-		  i.vec_encoding = vex_encoding_evex;
+		  i.encoding = encoding_evex;
 		  break;
 		case Prefix_REX:
 		  /* {rex} */
@@ -8260,7 +8260,7 @@ check_VecOperands (const insn_template *
 static int
 VEX_check_encoding (const insn_template *t)
 {
-  if (i.vec_encoding == vex_encoding_error)
+  if (i.encoding == encoding_error)
     {
       i.error = unsupported;
       return 1;
@@ -8277,8 +8277,8 @@ VEX_check_encoding (const insn_template
       return 1;
     }
 
-  if (i.vec_encoding == vex_encoding_evex
-      || i.vec_encoding == vex_encoding_evex512)
+  if (i.encoding == encoding_evex
+      || i.encoding == encoding_evex512)
     {
       /* This instruction must be encoded with EVEX prefix.  */
       if (!t->opcode_modifier.evex)
@@ -8292,7 +8292,7 @@ VEX_check_encoding (const insn_template
   if (!t->opcode_modifier.vex)
     {
       /* This instruction template doesn't have VEX prefix.  */
-      if (i.vec_encoding != vex_encoding_default)
+      if (i.encoding != encoding_default)
 	{
 	  i.error = no_vex_encoding;
 	  return 1;
@@ -8967,7 +8967,7 @@ match_template (char mnem_suffix)
 	 Note that the semantics have not been changed.  */
       if (optimize
 	  && !i.no_optimize
-	  && i.vec_encoding != vex_encoding_evex
+	  && i.encoding != encoding_evex
 	  && ((t + 1 < current_templates.end
 	       && !t[1].opcode_modifier.evex
 	       && t[1].opcode_space <= SPACE_0F38
@@ -10157,7 +10157,7 @@ process_operands (void)
       if (dot_insn () && i.reg_operands == 2)
 	{
 	  gas_assert (is_any_vex_encoding (&i.tm)
-		      || i.vec_encoding != vex_encoding_default);
+		      || i.encoding != encoding_default);
 	  i.vex.register_specifier = i.op[i.operands - 1].regs;
 	}
     }
@@ -10167,7 +10167,7 @@ process_operands (void)
 	      == InstanceNone)
     {
       gas_assert (is_any_vex_encoding (&i.tm)
-		  || i.vec_encoding != vex_encoding_default);
+		  || i.encoding != encoding_default);
       i.vex.register_specifier = i.op[i.operands - 1].regs;
     }
 
@@ -10275,7 +10275,7 @@ build_modrm_byte (void)
 	}
       exp->X_add_number |= register_number (i.op[reg_slot].regs)
 			   << (3 + !(i.tm.opcode_modifier.evex
-				     || i.vec_encoding == vex_encoding_evex));
+				     || i.encoding == encoding_evex));
     }
 
   if (i.tm.opcode_modifier.vexvvvv == VexVVVV_DST)
@@ -12548,20 +12548,20 @@ s_insn (int dummy ATTRIBUTE_UNUSED)
     }
 
   if (vex || xop
-      ? i.vec_encoding == vex_encoding_evex
+      ? i.encoding == encoding_evex
       : evex
-	? i.vec_encoding == vex_encoding_vex
-	  || i.vec_encoding == vex_encoding_vex3
-	: i.vec_encoding != vex_encoding_default)
+	? i.encoding == encoding_vex
+	  || i.encoding == encoding_vex3
+	: i.encoding != encoding_default)
     {
       as_bad (_("pseudo-prefix conflicts with encoding specifier"));
       goto bad;
     }
 
-  if (line > end && i.vec_encoding == vex_encoding_default)
-    i.vec_encoding = evex ? vex_encoding_evex : vex_encoding_vex;
+  if (line > end && i.encoding == encoding_default)
+    i.encoding = evex ? encoding_evex : encoding_vex;
 
-  if (i.vec_encoding != vex_encoding_default)
+  if (i.encoding != encoding_default)
     {
       /* Only address size and segment override prefixes are permitted with
          VEX/XOP/EVEX encodings.  */
@@ -12892,20 +12892,20 @@ s_insn (int dummy ATTRIBUTE_UNUSED)
 	  goto done;
 	}
 
-      /* No need to distinguish vex_encoding_evex and vex_encoding_evex512.  */
-      if (i.vec_encoding == vex_encoding_evex512)
-	i.vec_encoding = vex_encoding_evex;
+      /* No need to distinguish encoding_evex and encoding_evex512.  */
+      if (i.encoding == encoding_evex512)
+	i.encoding = encoding_evex;
 
       /* Are we to emit ModR/M encoding?  */
       if (!i.short_form
 	  && (i.mem_operands
-	      || i.reg_operands > (i.vec_encoding != vex_encoding_default)
+	      || i.reg_operands > (i.encoding != encoding_default)
 	      || i.tm.extension_opcode != None))
 	i.tm.opcode_modifier.modrm = 1;
 
       if (!i.tm.opcode_modifier.modrm
 	  && (i.reg_operands
-	      > i.short_form + 0U + (i.vec_encoding != vex_encoding_default)
+	      > i.short_form + 0U + (i.encoding != encoding_default)
 	      || i.mem_operands))
 	{
 	  as_bad (_("too many register/memory operands"));
@@ -12944,7 +12944,7 @@ s_insn (int dummy ATTRIBUTE_UNUSED)
 	    }
 	  /* Fall through.  */
 	case 3:
-	  if (i.vec_encoding != vex_encoding_default)
+	  if (i.encoding != encoding_default)
 	    {
 	      i.tm.opcode_modifier.vexvvvv = 1;
 	      break;
@@ -13000,13 +13000,13 @@ s_insn (int dummy ATTRIBUTE_UNUSED)
 	      || i.index_reg->reg_type.bitfield.ymmword
 	      || i.index_reg->reg_type.bitfield.zmmword))
 	{
-	  if (i.vec_encoding == vex_encoding_default)
+	  if (i.encoding == encoding_default)
 	    {
 	      as_bad (_("VSIB unavailable with legacy encoding"));
 	      goto done;
 	    }
 
-	  if (i.vec_encoding == vex_encoding_evex
+	  if (i.encoding == encoding_evex
 	      && i.reg_operands > 1)
 	    {
 	      /* We could allow two register operands, encoding the 2nd one in
@@ -13026,7 +13026,7 @@ s_insn (int dummy ATTRIBUTE_UNUSED)
       for (j = i.imm_operands; j < i.operands; ++j)
 	{
 	  /* Look for 8-bit operands that use old registers.  */
-	  if (i.vec_encoding != vex_encoding_default
+	  if (i.encoding != encoding_default
 	      && flag_code == CODE_64BIT
 	      && i.types[j].bitfield.class == Reg
 	      && i.types[j].bitfield.byte
@@ -13085,7 +13085,7 @@ s_insn (int dummy ATTRIBUTE_UNUSED)
 	case  4: combined.bitfield.dword = 1; break;
 	}
 
-      if (i.vec_encoding == vex_encoding_default)
+      if (i.encoding == encoding_default)
 	{
 	  if (flag_code == CODE_64BIT && combined.bitfield.qword)
 	    i.rex |= REX_W;
@@ -13162,7 +13162,7 @@ s_insn (int dummy ATTRIBUTE_UNUSED)
       if (i.memshift >= 32)
 	i.memshift = 0;
       else if (!evex)
-	i.vec_encoding = vex_encoding_error;
+	i.encoding = encoding_error;
 
       if (i.disp_operands && !optimize_disp (&i.tm))
 	goto done;
@@ -13228,8 +13228,8 @@ s_insn (int dummy ATTRIBUTE_UNUSED)
      potential special casing there.  */
   i.tm.base_opcode |= val;
 
-  if (i.vec_encoding == vex_encoding_error
-      || (i.vec_encoding != vex_encoding_evex
+  if (i.encoding == encoding_error
+      || (i.encoding != encoding_evex
 	  ? i.broadcast.type || i.broadcast.bytes
 	    || i.rounding.type != rc_none
 	    || i.mask.reg
@@ -13340,10 +13340,10 @@ RC_SAE_specifier (const char *pstr)
 	      return NULL;
 	    }
 
-	  if (i.vec_encoding == vex_encoding_default)
-	    i.vec_encoding = vex_encoding_evex512;
-	  else if (i.vec_encoding != vex_encoding_evex
-		   && i.vec_encoding != vex_encoding_evex512)
+	  if (i.encoding == encoding_default)
+	    i.encoding = encoding_evex512;
+	  else if (i.encoding != encoding_evex
+		   && i.encoding != encoding_evex512)
 	    return NULL;
 
 	  i.rounding.type = RC_NamesTable[j].type;
@@ -13405,10 +13405,10 @@ check_VecOperations (char *op_string)
 		}
 	      op_string++;
 
-	      if (i.vec_encoding == vex_encoding_default)
-		i.vec_encoding = vex_encoding_evex;
-	      else if (i.vec_encoding != vex_encoding_evex
-		       && i.vec_encoding != vex_encoding_evex512)
+	      if (i.encoding == encoding_default)
+		i.encoding = encoding_evex;
+	      else if (i.encoding != encoding_evex
+		       && i.encoding != encoding_evex512)
 		goto unknown_vec_op;
 
 	      i.broadcast.type = bcst_type;
@@ -15676,11 +15676,11 @@ static bool check_register (const reg_en
       if (vector_size < VSZ512)
 	return false;
 
-      if (i.vec_encoding == vex_encoding_default)
-	i.vec_encoding = vex_encoding_evex512;
-      else if (i.vec_encoding != vex_encoding_evex
-	       && i.vec_encoding != vex_encoding_evex512)
-	i.vec_encoding = vex_encoding_error;
+      if (i.encoding == encoding_default)
+	i.encoding = encoding_evex512;
+      else if (i.encoding != encoding_evex
+	       && i.encoding != encoding_evex512)
+	i.encoding = encoding_error;
     }
 
   if (vector_size < VSZ256 && r->reg_type.bitfield.ymmword)
@@ -15706,11 +15706,11 @@ static bool check_register (const reg_en
 	  || flag_code != CODE_64BIT)
 	return false;
 
-      if (i.vec_encoding == vex_encoding_default
-	  || i.vec_encoding == vex_encoding_evex512)
-	i.vec_encoding = vex_encoding_evex;
-      else if (i.vec_encoding != vex_encoding_evex)
-	i.vec_encoding = vex_encoding_error;
+      if (i.encoding == encoding_default
+	  || i.encoding == encoding_evex512)
+	i.encoding = encoding_evex;
+      else if (i.encoding != encoding_evex)
+	i.encoding = encoding_error;
     }
 
   if (r->reg_flags & RegRex2)
--- a/gas/config/tc-i386-intel.c
+++ b/gas/config/tc-i386-intel.c
@@ -209,10 +209,10 @@ operatorT i386_operator (const char *nam
 	      || i386_types[j].sz[0] > 8
 	      || (i386_types[j].sz[0] & (i386_types[j].sz[0] - 1)))
 	    return O_illegal;
-	  if (i.vec_encoding == vex_encoding_default)
-	    i.vec_encoding = vex_encoding_evex;
-	  else if (i.vec_encoding != vex_encoding_evex
-		   && i.vec_encoding != vex_encoding_evex512)
+	  if (i.encoding == encoding_default)
+	    i.encoding = encoding_evex;
+	  else if (i.encoding != encoding_evex
+		   && i.encoding != encoding_evex512)
 	    return O_illegal;
 	  if (!i.broadcast.bytes && !i.broadcast.type)
 	    {


^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 2/4] x86/APX: respect {vex}/{vex3}
  2024-02-16  9:56 [PATCH 0/4] x86/APX: misc adjustments Jan Beulich
  2024-02-16  9:57 ` [PATCH 1/4] x86: rename vec_encoding and vex_encoding_* Jan Beulich
@ 2024-02-16  9:58 ` Jan Beulich
  2024-02-18  7:55   ` Cui, Lili
  2024-02-16  9:58 ` [PATCH 3/4] x86/APX: correct .insn opcode space determination when REX2 is needed Jan Beulich
  2024-02-16  9:59 ` [PATCH 4/4] x86/APX: optimize certain XOR and SUB forms Jan Beulich
  3 siblings, 1 reply; 14+ messages in thread
From: Jan Beulich @ 2024-02-16  9:58 UTC (permalink / raw)
  To: Binutils; +Cc: H.J. Lu, Lili Cui

Even when an EVEX encoding is available, use of such a prefix ought to
be respected (resulting in an error) rather than ignored. As requested
during review already, introduce a new encoding enumerator to record use
of eGPR-s, and update state transitions accordingly.

The optimize_encoding() change also addresses an internal assembler
error that was previously raised when respective memory operands used
eGPR-s for addressing.

While this results in a change of diagnostic issued for VEX-encoded
insns, the new one is at least no worse than the prior one.
---
Question is whether for the state transitions we want to introduce a
couple of helper functions: check_register() has duplicates each of
what RC_SAE_specifier() and check_VecOperations() also do.

--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -439,9 +439,6 @@ struct _i386_insn
     /* Prefer the REX2 prefix in encoding.  */
     bool rex2_encoding;
 
-    /* Need to use an Egpr capable encoding (REX2 or EVEX).  */
-    bool has_egpr;
-
     /* Disable instruction size optimization.  */
     bool no_optimize;
 
@@ -451,6 +448,7 @@ struct _i386_insn
 	encoding_default = 0,
 	encoding_vex,
 	encoding_vex3,
+	encoding_egpr, /* REX2 or EVEX.  */
 	encoding_evex,
 	encoding_evex512,
 	encoding_error
@@ -1887,7 +1885,7 @@ static INLINE bool need_evex_encoding (c
 {
   return i.encoding == encoding_evex
 	|| i.encoding == encoding_evex512
-	|| (t->opcode_modifier.vex && i.has_egpr)
+	|| (t->opcode_modifier.vex && i.encoding == encoding_egpr)
 	|| i.mask.reg;
 }
 
@@ -2489,7 +2487,8 @@ static INLINE int
 fits_in_imm4 (offsetT num)
 {
   /* Despite the name, check for imm3 if we're dealing with EVEX.  */
-  return (num & (i.encoding != encoding_evex ? 0xf : 7)) == num;
+  return (num & (i.encoding != encoding_evex
+		 && i.encoding != encoding_egpr ? 0xf : 7)) == num;
 }
 
 static i386_operand_type
@@ -4837,6 +4836,7 @@ optimize_encoding (void)
 	  }
     }
   else if (i.encoding != encoding_evex
+	   && i.encoding != encoding_egpr
 	   && !i.types[0].bitfield.zmmword
 	   && !i.types[1].bitfield.zmmword
 	   && !i.mask.reg
@@ -6839,10 +6839,13 @@ md_assemble (char *line)
   if (optimize && !i.no_optimize && i.tm.opcode_modifier.optimize)
     optimize_encoding ();
 
-  /* Past optimization there's no need to distinguish encoding_evex and
-     encoding_evex512 anymore.  */
+  /* Past optimization there's no need to distinguish encoding_evex,
+     encoding_evex512, and encoding_egpr anymore.  */
   if (i.encoding == encoding_evex512)
     i.encoding = encoding_evex;
+  else if (i.encoding == encoding_egpr)
+    i.encoding = is_any_vex_encoding (&i.tm) ? encoding_evex
+					     : encoding_default;
 
   if (use_unaligned_vector_move)
     encode_with_unaligned_vector_move ();
@@ -8277,27 +8280,42 @@ VEX_check_encoding (const insn_template
       return 1;
     }
 
-  if (i.encoding == encoding_evex
-      || i.encoding == encoding_evex512)
+  switch (i.encoding)
     {
+    case encoding_default:
+      break;
+
+    case encoding_vex:
+    case encoding_vex3:
+      /* This instruction must be encoded with VEX prefix.  */
+      if (!t->opcode_modifier.vex)
+	{
+	  i.error = no_vex_encoding;
+	  return 1;
+	}
+      break;
+
+    case encoding_evex:
+    case encoding_evex512:
       /* This instruction must be encoded with EVEX prefix.  */
       if (!t->opcode_modifier.evex)
 	{
 	  i.error = no_evex_encoding;
 	  return 1;
 	}
-      return 0;
-    }
+      break;
 
-  if (!t->opcode_modifier.vex)
-    {
-      /* This instruction template doesn't have VEX prefix.  */
-      if (i.encoding != encoding_default)
+    case encoding_egpr:
+      /* This instruction must be encoded with REX2 or EVEX prefix.  */
+      if (t->opcode_modifier.vex && !t->opcode_modifier.evex)
 	{
-	  i.error = no_vex_encoding;
+	  i.error = no_evex_encoding;
 	  return 1;
 	}
-      return 0;
+      break;
+
+    default:
+      abort ();
     }
 
   return 0;
@@ -12896,6 +12914,19 @@ s_insn (int dummy ATTRIBUTE_UNUSED)
       if (i.encoding == encoding_evex512)
 	i.encoding = encoding_evex;
 
+      if (i.encoding == encoding_egpr)
+	{
+	  if (vex || xop)
+	    {
+	      as_bad (_("eGPR use conflicts with encoding specifier"));
+	      goto done;
+	    }
+	  if (evex)
+	    i.encoding = encoding_evex;
+	  else
+	    i.encoding = encoding_default;
+	}
+
       /* Are we to emit ModR/M encoding?  */
       if (!i.short_form
 	  && (i.mem_operands
@@ -13340,11 +13371,18 @@ RC_SAE_specifier (const char *pstr)
 	      return NULL;
 	    }
 
-	  if (i.encoding == encoding_default)
-	    i.encoding = encoding_evex512;
-	  else if (i.encoding != encoding_evex
-		   && i.encoding != encoding_evex512)
-	    return NULL;
+	  switch (i.encoding)
+	    {
+	    case encoding_default:
+	    case encoding_egpr:
+	      i.encoding = encoding_evex512;
+	      break;
+	    case encoding_evex:
+	    case encoding_evex512:
+	      break;
+	    default:
+	      return NULL;
+	    }
 
 	  i.rounding.type = RC_NamesTable[j].type;
 
@@ -13405,11 +13443,18 @@ check_VecOperations (char *op_string)
 		}
 	      op_string++;
 
-	      if (i.encoding == encoding_default)
-		i.encoding = encoding_evex;
-	      else if (i.encoding != encoding_evex
-		       && i.encoding != encoding_evex512)
-		goto unknown_vec_op;
+	      switch (i.encoding)
+		{
+		case encoding_default:
+		case encoding_egpr:
+		  i.encoding = encoding_evex;
+		  break;
+		case encoding_evex:
+		case encoding_evex512:
+		  break;
+		default:
+		  goto unknown_vec_op;
+		}
 
 	      i.broadcast.type = bcst_type;
 	      i.broadcast.operand = this_operand;
@@ -15676,11 +15721,19 @@ static bool check_register (const reg_en
       if (vector_size < VSZ512)
 	return false;
 
-      if (i.encoding == encoding_default)
-	i.encoding = encoding_evex512;
-      else if (i.encoding != encoding_evex
-	       && i.encoding != encoding_evex512)
-	i.encoding = encoding_error;
+      switch (i.encoding)
+	{
+	case encoding_default:
+	case encoding_egpr:
+	  i.encoding = encoding_evex512;
+	  break;
+	case encoding_evex:
+	case encoding_evex512:
+	  break;
+	default:
+	  i.encoding = encoding_error;
+	  break;
+	}
     }
 
   if (vector_size < VSZ256 && r->reg_type.bitfield.ymmword)
@@ -15706,11 +15759,19 @@ static bool check_register (const reg_en
 	  || flag_code != CODE_64BIT)
 	return false;
 
-      if (i.encoding == encoding_default
-	  || i.encoding == encoding_evex512)
-	i.encoding = encoding_evex;
-      else if (i.encoding != encoding_evex)
-	i.encoding = encoding_error;
+      switch (i.encoding)
+	{
+	  case encoding_default:
+	  case encoding_egpr:
+	  case encoding_evex512:
+	    i.encoding = encoding_evex;
+	    break;
+	  case encoding_evex:
+	    break;
+	  default:
+	    i.encoding = encoding_error;
+	    break;
+	}
     }
 
   if (r->reg_flags & RegRex2)
@@ -15719,7 +15780,19 @@ static bool check_register (const reg_en
 	  || flag_code != CODE_64BIT)
 	return false;
 
-      i.has_egpr = true;
+      switch (i.encoding)
+	{
+	case encoding_default:
+	  i.encoding = encoding_egpr;
+	  break;
+	case encoding_egpr:
+	case encoding_evex:
+	case encoding_evex512:
+	  break;
+	default:
+	  i.encoding = encoding_error;
+	  break;
+	}
     }
 
   if (((r->reg_flags & (RegRex64 | RegRex)) || r->reg_type.bitfield.qword)
--- a/gas/config/tc-i386-intel.c
+++ b/gas/config/tc-i386-intel.c
@@ -209,11 +209,18 @@ operatorT i386_operator (const char *nam
 	      || i386_types[j].sz[0] > 8
 	      || (i386_types[j].sz[0] & (i386_types[j].sz[0] - 1)))
 	    return O_illegal;
-	  if (i.encoding == encoding_default)
-	    i.encoding = encoding_evex;
-	  else if (i.encoding != encoding_evex
-		   && i.encoding != encoding_evex512)
-	    return O_illegal;
+	  switch (i.encoding)
+	    {
+	    case encoding_default:
+	    case encoding_egpr:
+	      i.encoding = encoding_evex;
+	      break;
+	    case encoding_evex:
+	    case encoding_evex512:
+	      break;
+	    default:
+	      return O_illegal;
+	    }
 	  if (!i.broadcast.bytes && !i.broadcast.type)
 	    {
 	      i.broadcast.bytes = i386_types[j].sz[0];
--- a/gas/testsuite/gas/i386/x86-64-apx-egpr-inval.l
+++ b/gas/testsuite/gas/i386/x86-64-apx-egpr-inval.l
@@ -101,102 +101,109 @@
 .*:108: Error: extended GPR cannot be used as base/index for `gf2p8affineinvqb'
 .*:109: Error: extended GPR cannot be used as base/index for `gf2p8affineqb'
 .*:110: Error: extended GPR cannot be used as base/index for `gf2p8mulb'
-.*:112: Error: extended GPR cannot be used as base/index for `vaesimc'
-.*:113: Error: extended GPR cannot be used as base/index for `vaeskeygenassist'
-.*:114: Error: extended GPR cannot be used as base/index for `vblendpd'
-.*:115: Error: extended GPR cannot be used as base/index for `vblendpd'
-.*:116: Error: extended GPR cannot be used as base/index for `vblendps'
-.*:117: Error: extended GPR cannot be used as base/index for `vblendps'
-.*:118: Error: extended GPR cannot be used as base/index for `vblendvpd'
-.*:119: Error: extended GPR cannot be used as base/index for `vblendvpd'
-.*:120: Error: extended GPR cannot be used as base/index for `vblendvps'
-.*:121: Error: extended GPR cannot be used as base/index for `vblendvps'
-.*:122: Error: extended GPR cannot be used as base/index for `vdppd'
-.*:123: Error: extended GPR cannot be used as base/index for `vdpps'
-.*:124: Error: extended GPR cannot be used as base/index for `vdpps'
-.*:125: Error: extended GPR cannot be used as base/index for `vhaddpd'
-.*:126: Error: extended GPR cannot be used as base/index for `vhaddpd'
-.*:127: Error: extended GPR cannot be used as base/index for `vhsubps'
-.*:128: Error: extended GPR cannot be used as base/index for `vhsubps'
-.*:129: Error: extended GPR cannot be used as base/index for `vlddqu'
-.*:130: Error: extended GPR cannot be used as base/index for `vlddqu'
-.*:131: Error: extended GPR cannot be used as base/index for `vldmxcsr'
-.*:132: Error: extended GPR cannot be used as base/index for `vmaskmovpd'
-.*:133: Error: extended GPR cannot be used as base/index for `vmaskmovpd'
-.*:134: Error: extended GPR cannot be used as base/index for `vmaskmovpd'
-.*:135: Error: extended GPR cannot be used as base/index for `vmaskmovpd'
-.*:136: Error: extended GPR cannot be used as base/index for `vmaskmovps'
-.*:137: Error: extended GPR cannot be used as base/index for `vmaskmovps'
-.*:138: Error: extended GPR cannot be used as base/index for `vmaskmovps'
-.*:139: Error: extended GPR cannot be used as base/index for `vmaskmovps'
-.*:140: Error: register type mismatch for `vmovmskpd'
-.*:141: Error: register type mismatch for `vmovmskpd'
-.*:142: Error: register type mismatch for `vmovmskps'
-.*:143: Error: register type mismatch for `vmovmskps'
-.*:144: Error: extended GPR cannot be used as base/index for `vpblendd'
-.*:145: Error: extended GPR cannot be used as base/index for `vpblendd'
-.*:146: Error: extended GPR cannot be used as base/index for `vpblendvb'
-.*:147: Error: extended GPR cannot be used as base/index for `vpblendvb'
-.*:148: Error: extended GPR cannot be used as base/index for `vpblendw'
-.*:149: Error: extended GPR cannot be used as base/index for `vpblendw'
-.*:150: Error: extended GPR cannot be used as base/index for `vpcmpeqb'
-.*:151: Error: extended GPR cannot be used as base/index for `vpcmpeqd'
-.*:152: Error: extended GPR cannot be used as base/index for `vpcmpeqq'
-.*:153: Error: extended GPR cannot be used as base/index for `vpcmpeqw'
-.*:154: Error: extended GPR cannot be used as base/index for `vpcmpestri'
-.*:155: Error: extended GPR cannot be used as base/index for `vpcmpestrm'
-.*:156: Error: extended GPR cannot be used as base/index for `vpcmpgtb'
-.*:157: Error: extended GPR cannot be used as base/index for `vpcmpgtd'
-.*:158: Error: extended GPR cannot be used as base/index for `vpcmpgtq'
-.*:159: Error: extended GPR cannot be used as base/index for `vpcmpgtw'
-.*:160: Error: extended GPR cannot be used as base/index for `vpcmpistri'
-.*:161: Error: extended GPR cannot be used as base/index for `vpcmpistrm'
-.*:162: Error: extended GPR cannot be used as base/index for `vperm2f128'
-.*:163: Error: extended GPR cannot be used as base/index for `vperm2i128'
-.*:164: Error: extended GPR cannot be used as base/index for `vphaddd'
-.*:165: Error: extended GPR cannot be used as base/index for `vphaddd'
-.*:166: Error: extended GPR cannot be used as base/index for `vphaddsw'
-.*:167: Error: extended GPR cannot be used as base/index for `vphaddsw'
-.*:168: Error: extended GPR cannot be used as base/index for `vphaddw'
-.*:169: Error: extended GPR cannot be used as base/index for `vphaddw'
-.*:170: Error: extended GPR cannot be used as base/index for `vphminposuw'
-.*:171: Error: extended GPR cannot be used as base/index for `vphsubd'
-.*:172: Error: extended GPR cannot be used as base/index for `vphsubd'
-.*:173: Error: extended GPR cannot be used as base/index for `vphsubsw'
-.*:174: Error: extended GPR cannot be used as base/index for `vphsubsw'
-.*:175: Error: extended GPR cannot be used as base/index for `vphsubw'
-.*:176: Error: extended GPR cannot be used as base/index for `vphsubw'
-.*:177: Error: extended GPR cannot be used as base/index for `vpmaskmovd'
-.*:178: Error: extended GPR cannot be used as base/index for `vpmaskmovd'
-.*:179: Error: extended GPR cannot be used as base/index for `vpmaskmovd'
-.*:180: Error: extended GPR cannot be used as base/index for `vpmaskmovd'
-.*:181: Error: extended GPR cannot be used as base/index for `vpmaskmovq'
-.*:182: Error: extended GPR cannot be used as base/index for `vpmaskmovq'
-.*:183: Error: extended GPR cannot be used as base/index for `vpmaskmovq'
-.*:184: Error: extended GPR cannot be used as base/index for `vpmaskmovq'
-.*:185: Error: register type mismatch for `vpmovmskb'
-.*:186: Error: register type mismatch for `vpmovmskb'
-.*:187: Error: extended GPR cannot be used as base/index for `vpsignb'
-.*:188: Error: extended GPR cannot be used as base/index for `vpsignb'
-.*:189: Error: extended GPR cannot be used as base/index for `vpsignd'
-.*:190: Error: extended GPR cannot be used as base/index for `vpsignd'
-.*:191: Error: extended GPR cannot be used as base/index for `vpsignw'
-.*:192: Error: extended GPR cannot be used as base/index for `vpsignw'
-.*:193: Error: extended GPR cannot be used as base/index for `vptest'
-.*:194: Error: extended GPR cannot be used as base/index for `vptest'
-.*:195: Error: extended GPR cannot be used as base/index for `vrcpps'
-.*:196: Error: extended GPR cannot be used as base/index for `vrcpps'
-.*:197: Error: extended GPR cannot be used as base/index for `vrcpss'
+.*:112: Error: no EVEX encoding for `vaesimc'
+.*:113: Error: no EVEX encoding for `vaeskeygenassist'
+.*:114: Error: no EVEX encoding for `vblendpd'
+.*:115: Error: no EVEX encoding for `vblendpd'
+.*:116: Error: no EVEX encoding for `vblendps'
+.*:117: Error: no EVEX encoding for `vblendps'
+.*:118: Error: no EVEX encoding for `vblendvpd'
+.*:119: Error: no EVEX encoding for `vblendvpd'
+.*:120: Error: no EVEX encoding for `vblendvps'
+.*:121: Error: no EVEX encoding for `vblendvps'
+.*:122: Error: no EVEX encoding for `vdppd'
+.*:123: Error: no EVEX encoding for `vdpps'
+.*:124: Error: no EVEX encoding for `vdpps'
+.*:125: Error: no EVEX encoding for `vhaddpd'
+.*:126: Error: no EVEX encoding for `vhaddpd'
+.*:127: Error: no EVEX encoding for `vhsubps'
+.*:128: Error: no EVEX encoding for `vhsubps'
+.*:129: Error: no EVEX encoding for `vlddqu'
+.*:130: Error: no EVEX encoding for `vlddqu'
+.*:131: Error: no EVEX encoding for `vldmxcsr'
+.*:132: Error: no EVEX encoding for `vmaskmovpd'
+.*:133: Error: no EVEX encoding for `vmaskmovpd'
+.*:134: Error: no EVEX encoding for `vmaskmovpd'
+.*:135: Error: no EVEX encoding for `vmaskmovpd'
+.*:136: Error: no EVEX encoding for `vmaskmovps'
+.*:137: Error: no EVEX encoding for `vmaskmovps'
+.*:138: Error: no EVEX encoding for `vmaskmovps'
+.*:139: Error: no EVEX encoding for `vmaskmovps'
+.*:140: Error: no EVEX encoding for `vmovmskpd'
+.*:141: Error: no EVEX encoding for `vmovmskpd'
+.*:142: Error: no EVEX encoding for `vmovmskps'
+.*:143: Error: no EVEX encoding for `vmovmskps'
+.*:144: Error: no EVEX encoding for `vpblendd'
+.*:145: Error: no EVEX encoding for `vpblendd'
+.*:146: Error: no EVEX encoding for `vpblendvb'
+.*:147: Error: no EVEX encoding for `vpblendvb'
+.*:148: Error: no EVEX encoding for `vpblendw'
+.*:149: Error: no EVEX encoding for `vpblendw'
+.*:150: Error: no EVEX encoding for `vpcmpeqb'
+.*:151: Error: no EVEX encoding for `vpcmpeqd'
+.*:152: Error: no EVEX encoding for `vpcmpeqq'
+.*:153: Error: no EVEX encoding for `vpcmpeqw'
+.*:154: Error: no EVEX encoding for `vpcmpestri'
+.*:155: Error: no EVEX encoding for `vpcmpestrm'
+.*:156: Error: no EVEX encoding for `vpcmpgtb'
+.*:157: Error: no EVEX encoding for `vpcmpgtd'
+.*:158: Error: no EVEX encoding for `vpcmpgtq'
+.*:159: Error: no EVEX encoding for `vpcmpgtw'
+.*:160: Error: no EVEX encoding for `vpcmpistri'
+.*:161: Error: no EVEX encoding for `vpcmpistrm'
+.*:162: Error: no EVEX encoding for `vperm2f128'
+.*:163: Error: no EVEX encoding for `vperm2i128'
+.*:164: Error: no EVEX encoding for `vphaddd'
+.*:165: Error: no EVEX encoding for `vphaddd'
+.*:166: Error: no EVEX encoding for `vphaddsw'
+.*:167: Error: no EVEX encoding for `vphaddsw'
+.*:168: Error: no EVEX encoding for `vphaddw'
+.*:169: Error: no EVEX encoding for `vphaddw'
+.*:170: Error: no EVEX encoding for `vphminposuw'
+.*:171: Error: no EVEX encoding for `vphsubd'
+.*:172: Error: no EVEX encoding for `vphsubd'
+.*:173: Error: no EVEX encoding for `vphsubsw'
+.*:174: Error: no EVEX encoding for `vphsubsw'
+.*:175: Error: no EVEX encoding for `vphsubw'
+.*:176: Error: no EVEX encoding for `vphsubw'
+.*:177: Error: no EVEX encoding for `vpmaskmovd'
+.*:178: Error: no EVEX encoding for `vpmaskmovd'
+.*:179: Error: no EVEX encoding for `vpmaskmovd'
+.*:180: Error: no EVEX encoding for `vpmaskmovd'
+.*:181: Error: no EVEX encoding for `vpmaskmovq'
+.*:182: Error: no EVEX encoding for `vpmaskmovq'
+.*:183: Error: no EVEX encoding for `vpmaskmovq'
+.*:184: Error: no EVEX encoding for `vpmaskmovq'
+.*:185: Error: no EVEX encoding for `vpmovmskb'
+.*:186: Error: no EVEX encoding for `vpmovmskb'
+.*:187: Error: no EVEX encoding for `vpsignb'
+.*:188: Error: no EVEX encoding for `vpsignb'
+.*:189: Error: no EVEX encoding for `vpsignd'
+.*:190: Error: no EVEX encoding for `vpsignd'
+.*:191: Error: no EVEX encoding for `vpsignw'
+.*:192: Error: no EVEX encoding for `vpsignw'
+.*:193: Error: no EVEX encoding for `vptest'
+.*:194: Error: no EVEX encoding for `vptest'
+.*:195: Error: no EVEX encoding for `vrcpps'
+.*:196: Error: no EVEX encoding for `vrcpps'
+.*:197: Error: no EVEX encoding for `vrcpss'
 .*:198: Error: .* 4 bits for `vroundpd'
 .*:199: Error: .* 4 bits for `vroundps'
 .*:200: Error: .* 4 bits for `vroundsd'
 .*:201: Error: .* 4 bits for `vroundss'
-.*:202: Error: extended GPR cannot be used as base/index for `vrsqrtps'
-.*:203: Error: extended GPR cannot be used as base/index for `vrsqrtps'
-.*:204: Error: extended GPR cannot be used as base/index for `vrsqrtss'
-.*:205: Error: extended GPR cannot be used as base/index for `vstmxcsr'
-.*:206: Error: extended GPR cannot be used as base/index for `vtestpd'
-.*:207: Error: extended GPR cannot be used as base/index for `vtestpd'
-.*:208: Error: extended GPR cannot be used as base/index for `vtestps'
-.*:209: Error: extended GPR cannot be used as base/index for `vtestps'
+.*:202: Error: no EVEX encoding for `vrsqrtps'
+.*:203: Error: no EVEX encoding for `vrsqrtps'
+.*:204: Error: no EVEX encoding for `vrsqrtss'
+.*:205: Error: no EVEX encoding for `vstmxcsr'
+.*:206: Error: no EVEX encoding for `vtestpd'
+.*:207: Error: no EVEX encoding for `vtestpd'
+.*:208: Error: no EVEX encoding for `vtestps'
+.*:209: Error: no EVEX encoding for `vtestps'
+.*:211: Error: no VEX/XOP encoding for `and'
+.*:212: Error: no VEX/XOP encoding for `and'
+.*:213: Error: .* `and'
+.*:214: Error: no VEX/XOP encoding for `and'
+.*:215: Error: no VEX/XOP encoding for `and'
+.*:216: Error: .* `and'
+.*:219: Error: .* `andn'
 #pass
--- a/gas/testsuite/gas/i386/x86-64-apx-egpr-inval.s
+++ b/gas/testsuite/gas/i386/x86-64-apx-egpr-inval.s
@@ -207,3 +207,13 @@
 	vtestpd (%r27),%ymm6
 	vtestps (%r27),%xmm6
 	vtestps (%r27),%ymm6
+# {vex}
+	{vex} and %eax, %eax
+	{vex} and %r8, %r8
+	{vex} and %r16, %r16
+	{vex} and %eax, %eax, %eax
+	{vex} and %r8, %r8, %r8
+	{vex} and %r16, %r16, %r16
+	{vex} andn %eax, %eax, %eax
+	{vex} andn %r8, %r8, %r8
+	{vex} andn %r16, %r16, %r16


^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 3/4] x86/APX: correct .insn opcode space determination when REX2 is needed
  2024-02-16  9:56 [PATCH 0/4] x86/APX: misc adjustments Jan Beulich
  2024-02-16  9:57 ` [PATCH 1/4] x86: rename vec_encoding and vex_encoding_* Jan Beulich
  2024-02-16  9:58 ` [PATCH 2/4] x86/APX: respect {vex}/{vex3} Jan Beulich
@ 2024-02-16  9:58 ` Jan Beulich
  2024-02-16  9:59 ` [PATCH 4/4] x86/APX: optimize certain XOR and SUB forms Jan Beulich
  3 siblings, 0 replies; 14+ messages in thread
From: Jan Beulich @ 2024-02-16  9:58 UTC (permalink / raw)
  To: Binutils; +Cc: H.J. Lu, Lili Cui

In this case spaces 0f38 and 0f3a may not be put in place. To achieve
the intended effect, operand parsing (but not operand processing) needs
pulling ahead, so we know whether eGRP-s are in use.

--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -12849,13 +12849,43 @@ s_insn (int dummy ATTRIBUTE_UNUSED)
 	}
     }
 
+  /* Parse operands, if any, before evaluating encoding space.  */
+  if (*line == ',')
+    {
+      i.memshift = -1;
+
+      ptr = parse_operands (line + 1, &i386_mnemonics[MN__insn]);
+      this_operand = -1;
+      if (!ptr)
+	goto bad;
+      line = ptr;
+
+      if (!i.operands)
+	{
+	  as_bad (_("expecting operand after ','; got nothing"));
+	  goto done;
+	}
+
+      if (i.mem_operands > 1)
+	{
+	  as_bad (_("too many memory references for `%s'"),
+		  &i386_mnemonics[MN__insn]);
+	  goto done;
+	}
+
+      /* No need to distinguish encoding_evex and encoding_evex512.  */
+      if (i.encoding == encoding_evex512)
+	i.encoding = encoding_evex;
+    }
+
   /* Trim off encoding space.  */
   if (j > 1 && !i.insn_opcode_space && (val >> ((j - 1) * 8)) == 0x0f)
     {
       uint8_t byte = val >> ((--j - 1) * 8);
 
       i.insn_opcode_space = SPACE_0F;
-      switch (byte & -(j > 1))
+      switch (byte & -(j > 1 && !i.rex2_encoding
+		       && (i.encoding != encoding_egpr || evex)))
 	{
 	case 0x38:
 	  i.insn_opcode_space = SPACE_0F38;
@@ -12878,42 +12908,17 @@ s_insn (int dummy ATTRIBUTE_UNUSED)
   if (j > 2)
     {
       as_bad (_("opcode residual (%#"PRIx64") too wide"), (uint64_t) val);
-      goto bad;
+      goto done;
     }
   i.opcode_length = j;
 
   /* Handle operands, if any.  */
-  if (*line == ',')
+  if (i.operands)
     {
       i386_operand_type combined;
       expressionS *disp_exp = NULL;
       bool changed;
 
-      i.memshift = -1;
-
-      ptr = parse_operands (line + 1, &i386_mnemonics[MN__insn]);
-      this_operand = -1;
-      if (!ptr)
-	goto bad;
-      line = ptr;
-
-      if (!i.operands)
-	{
-	  as_bad (_("expecting operand after ','; got nothing"));
-	  goto done;
-	}
-
-      if (i.mem_operands > 1)
-	{
-	  as_bad (_("too many memory references for `%s'"),
-		  &i386_mnemonics[MN__insn]);
-	  goto done;
-	}
-
-      /* No need to distinguish encoding_evex and encoding_evex512.  */
-      if (i.encoding == encoding_evex512)
-	i.encoding = encoding_evex;
-
       if (i.encoding == encoding_egpr)
 	{
 	  if (vex || xop)
--- /dev/null
+++ b/gas/testsuite/gas/i386/insn-rex2.l
@@ -0,0 +1,38 @@
+[ 	]*[0-9]+[ 	]+\.text
+[ 	]*[0-9]+[ 	]+insn_rex2:
+[ 	]*[0-9]+ .... D58001C0[ 	]+\.insn \{rex2\} 0x0f01/0, %eax
+[ 	]*[0-9]+ .... D58038C0[ 	]+\.insn \{rex2\} 0x0f38/0, %eax
+[ 	]*[0-9]+ .... D5803801[ 	]+\.insn \{rex2\} 0x0f3801/0, %eax
+[ 	]*[0-9]+ +C0
+[ 	]*[0-9]+ .... D5803901[ 	]+\.insn \{rex2\} 0x0f3901/0, %eax
+[ 	]*[0-9]+ +C0
+[ 	]*[0-9]+ .... D5803A01[ 	]+\.insn \{rex2\} 0x0f3a01/0, \$0xCC, %eax
+[ 	]*[0-9]+ +C0CC
+[ 	]*[0-9]+[ 	]+
+[ 	]*[0-9]+ .... D58801C0[ 	]+\.insn \{rex2\} 0x0f01/0, %rax
+[ 	]*[0-9]+ .... D58838C0[ 	]+\.insn \{rex2\} 0x0f38/0, %rax
+[ 	]*[0-9]+ .... D5883801[ 	]+\.insn \{rex2\} 0x0f3801/0, %rax
+[ 	]*[0-9]+ +C0
+[ 	]*[0-9]+ .... D5883901[ 	]+\.insn \{rex2\} 0x0f3901/0, %rax
+[ 	]*[0-9]+ +C0
+[ 	]*[0-9]+ .... D5883A01[ 	]+\.insn \{rex2\} 0x0f3a01/0, \$0xCC, %rax
+[ 	]*[0-9]+ +C0CC
+[ 	]*[0-9]+[ 	]+
+[ 	]*[0-9]+ .... D58901C0[ 	]+\.insn \{rex2\} 0x0f01/0, %r8
+[ 	]*[0-9]+ .... D58938C0[ 	]+\.insn \{rex2\} 0x0f38/0, %r8
+[ 	]*[0-9]+ .... D5893801[ 	]+\.insn \{rex2\} 0x0f3801/0, %r8
+[ 	]*[0-9]+ +C0
+[ 	]*[0-9]+ .... D5893901[ 	]+\.insn \{rex2\} 0x0f3901/0, %r8
+[ 	]*[0-9]+ +C0
+[ 	]*[0-9]+ .... D5893A01[ 	]+\.insn \{rex2\} 0x0f3a01/0, \$0xCC, %r8
+[ 	]*[0-9]+ +C0CC
+[ 	]*[0-9]+[ 	]+
+[ 	]*[0-9]+ .... D59801C0[ 	]+\.insn 0x0f01/0, %r16
+[ 	]*[0-9]+ .... D59838C0[ 	]+\.insn 0x0f38/0, %r16
+[ 	]*[0-9]+ .... D5983801[ 	]+\.insn 0x0f3801/0, %r16
+[ 	]*[0-9]+ +C0
+[ 	]*[0-9]+ .... D5983901[ 	]+\.insn 0x0f3901/0, %r16
+[ 	]*[0-9]+ +C0
+[ 	]*[0-9]+ .... D5983A01[ 	]+\.insn 0x0f3a01/0, \$0xCC, %r16
+[ 	]*[0-9]+[ 	]+C0CC
+#pass
--- /dev/null
+++ b/gas/testsuite/gas/i386/insn-rex2.s
@@ -0,0 +1,25 @@
+	.text
+insn_rex2:
+	.insn {rex2} 0x0f01/0, %eax
+	.insn {rex2} 0x0f38/0, %eax
+	.insn {rex2} 0x0f3801/0, %eax
+	.insn {rex2} 0x0f3901/0, %eax
+	.insn {rex2} 0x0f3a01/0, $0xCC, %eax
+
+	.insn {rex2} 0x0f01/0, %rax
+	.insn {rex2} 0x0f38/0, %rax
+	.insn {rex2} 0x0f3801/0, %rax
+	.insn {rex2} 0x0f3901/0, %rax
+	.insn {rex2} 0x0f3a01/0, $0xCC, %rax
+
+	.insn {rex2} 0x0f01/0, %r8
+	.insn {rex2} 0x0f38/0, %r8
+	.insn {rex2} 0x0f3801/0, %r8
+	.insn {rex2} 0x0f3901/0, %r8
+	.insn {rex2} 0x0f3a01/0, $0xCC, %r8
+
+	.insn 0x0f01/0, %r16
+	.insn 0x0f38/0, %r16
+	.insn 0x0f3801/0, %r16
+	.insn 0x0f3901/0, %r16
+	.insn 0x0f3a01/0, $0xCC, %r16
--- a/gas/testsuite/gas/i386/x86-64.exp
+++ b/gas/testsuite/gas/i386/x86-64.exp
@@ -126,6 +126,7 @@ run_dump_test "x86-64-sysenter-mixed"
 run_dump_test "x86-64-sysenter-amd"
 run_list_test "x86-64-sysenter-amd" "-mamd64"
 run_dump_test "insn-64"
+run_list_test "insn-rex2" "-aln"
 run_dump_test "noreg64"
 run_list_test "noreg64"
 run_dump_test "noreg64-data16"


^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 4/4] x86/APX: optimize certain XOR and SUB forms
  2024-02-16  9:56 [PATCH 0/4] x86/APX: misc adjustments Jan Beulich
                   ` (2 preceding siblings ...)
  2024-02-16  9:58 ` [PATCH 3/4] x86/APX: correct .insn opcode space determination when REX2 is needed Jan Beulich
@ 2024-02-16  9:59 ` Jan Beulich
  3 siblings, 0 replies; 14+ messages in thread
From: Jan Beulich @ 2024-02-16  9:59 UTC (permalink / raw)
  To: Binutils; +Cc: H.J. Lu, Lili Cui

While most logic in optimize_encoding() is already covering APX by way
of the earlier NDD->REX2 conversion, there's a remaining set of cases
which wants handling separately.

--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -4693,6 +4693,34 @@ optimize_encoding (void)
 	    }
 	}
     }
+  else if (i.reg_operands == 3
+	   && i.op[0].regs == i.op[1].regs
+	   && i.encoding != encoding_evex
+	   && (i.tm.mnem_off == MN_xor
+	       || i.tm.mnem_off == MN_sub))
+    {
+      /* Optimize: -O:
+	   xorb %rNb, %rNb, %rMb  -> xorl %rMd, %rMd
+	   xorw %rNw, %rNw, %rMw  -> xorl %rMd, %rMd
+	   xorl %rNd, %rNd, %rMd  -> xorl %rMd, %rMd
+	   xorq %rN,  %rN,  %rM   -> xorl %rMd, %rMd
+	   subb %rNb, %rNb, %rMb  -> subl %rMd, %rMd
+	   subw %rNw, %rNw, %rMw  -> subl %rMd, %rMd
+	   subl %rNd, %rNd, %rMd  -> subl %rMd, %rMd
+	   subq %rN,  %rN,  %rM   -> subl %rMd, %rMd
+        */
+      i.tm.opcode_space = SPACE_BASE;
+      i.tm.opcode_modifier.evex = 0;
+      i.tm.opcode_modifier.size = SIZE32;
+      i.types[0].bitfield.byte = 0;
+      i.types[0].bitfield.word = 0;
+      i.types[0].bitfield.dword = 1;
+      i.types[0].bitfield.qword = 0;
+      i.op[0].regs = i.op[2].regs;
+      i.types[1] = i.types[0];
+      i.op[1].regs = i.op[2].regs;
+      i.reg_operands = 2;
+    }
   else if (optimize > 1
 	   && !optimize_for_space
 	   && i.reg_operands == 2
--- a/gas/testsuite/gas/i386/x86-64-optimize-1.d
+++ b/gas/testsuite/gas/i386/x86-64-optimize-1.d
@@ -71,4 +71,28 @@ Disassembly of section .text:
  +[a-f0-9]+:	48 0f ba f0 1f       	btr    \$0x1f,%rax
  +[a-f0-9]+:	66 0f ba e8 0f       	bts    \$0xf,%ax
  +[a-f0-9]+:	48 0f ba e8 1f       	bts    \$0x1f,%rax
+ +[a-f0-9]+:	31 c9                	xor    %ecx,%ecx
+ +[a-f0-9]+:	48 31 d1             	xor    %rdx,%rcx
+ +[a-f0-9]+:	31 c9                	xor    %ecx,%ecx
+ +[a-f0-9]+:	29 c9                	sub    %ecx,%ecx
+ +[a-f0-9]+:	48 29 d1             	sub    %rdx,%rcx
+ +[a-f0-9]+:	29 c9                	sub    %ecx,%ecx
+ +[a-f0-9]+:	d5 50 31 c9          	xor    %r17d,%r17d
+ +[a-f0-9]+:	d5 58 31 d1          	xor    %r18,%r17
+ +[a-f0-9]+:	d5 50 31 c9          	xor    %r17d,%r17d
+ +[a-f0-9]+:	d5 50 29 c9          	sub    %r17d,%r17d
+ +[a-f0-9]+:	d5 58 29 d1          	sub    %r18,%r17
+ +[a-f0-9]+:	d5 50 29 c9          	sub    %r17d,%r17d
+ +[a-f0-9]+:	31 c9                	xor    %ecx,%ecx
+ +[a-f0-9]+:	62 f4 75 18 31 d1    	xor    %dx,%cx,%cx
+ +[a-f0-9]+:	31 c9                	xor    %ecx,%ecx
+ +[a-f0-9]+:	29 c9                	sub    %ecx,%ecx
+ +[a-f0-9]+:	62 f4 75 18 29 d1    	sub    %dx,%cx,%cx
+ +[a-f0-9]+:	29 c9                	sub    %ecx,%ecx
+ +[a-f0-9]+:	d5 50 31 c9          	xor    %r17d,%r17d
+ +[a-f0-9]+:	62 ec 74 10 30 d1    	xor    %r18b,%r17b,%r17b
+ +[a-f0-9]+:	d5 50 31 c9          	xor    %r17d,%r17d
+ +[a-f0-9]+:	d5 50 29 c9          	sub    %r17d,%r17d
+ +[a-f0-9]+:	62 ec 74 10 28 d1    	sub    %r18b,%r17b,%r17b
+ +[a-f0-9]+:	d5 50 29 c9          	sub    %r17d,%r17d
 #pass
--- a/gas/testsuite/gas/i386/x86-64-optimize-1.s
+++ b/gas/testsuite/gas/i386/x86-64-optimize-1.s
@@ -65,3 +65,27 @@ _start:
 	btr	$31, %rax
 	bts	$15, %ax
 	bts	$31, %rax
+	xor	%rcx, %rcx, %rcx
+	xor	%rdx, %rcx, %rcx
+	xor	%rdx, %rdx, %rcx
+	sub	%rcx, %rcx, %rcx
+	sub	%rdx, %rcx, %rcx
+	sub	%rdx, %rdx, %rcx
+	xor	%r17, %r17, %r17
+	xor	%r18, %r17, %r17
+	xor	%r18, %r18, %r17
+	sub	%r17, %r17, %r17
+	sub	%r18, %r17, %r17
+	sub	%r18, %r18, %r17
+	xor	%cx, %cx, %cx
+	xor	%dx, %cx, %cx
+	xor	%dx, %dx, %cx
+	sub	%cx, %cx, %cx
+	sub	%dx, %cx, %cx
+	sub	%dx, %dx, %cx
+	xor	%r17b, %r17b, %r17b
+	xor	%r18b, %r17b, %r17b
+	xor	%r18b, %r18b, %r17b
+	sub	%r17b, %r17b, %r17b
+	sub	%r18b, %r17b, %r17b
+	sub	%r18b, %r18b, %r17b
--- a/opcodes/i386-opc.tbl
+++ b/opcodes/i386-opc.tbl
@@ -320,7 +320,7 @@ inc, 0x40, No64, No_bSuf|No_sSuf|No_qSuf
 inc, 0xfe/0, APX_F, W|Modrm|No_sSuf|CheckOperandSize|DstVVVV|EVexMap4|NF, {Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64}
 inc, 0xfe/0, 0, W|Modrm|No_sSuf|HLEPrefixLock, { Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex }
 
-sub, 0x28, APX_F, D|W|CheckOperandSize|Modrm|No_sSuf|DstVVVV|EVexMap4|NF, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64, }
+sub, 0x28, APX_F, D|W|CheckOperandSize|Modrm|No_sSuf|DstVVVV|EVexMap4|NF|Optimize, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64, }
 sub, 0x28, 0, D|W|CheckOperandSize|Modrm|No_sSuf|HLEPrefixLock|Optimize, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex }
 sub, 0x83/5, APX_F, Modrm|No_bSuf|No_sSuf|DstVVVV|EVexMap4|NF, { Imm8S, Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
 sub, 0x83/5, 0, Modrm|No_bSuf|No_sSuf|HLEPrefixLock, { Imm8S, Reg16|Reg32|Reg64|Unspecified|BaseIndex }
@@ -366,7 +366,7 @@ or, 0xc, 0, W|No_sSuf, { Imm8|Imm16|Imm3
 or, 0x80/1, APX_F, W|Modrm|CheckOperandSize|No_sSuf|DstVVVV|EVexMap4|NF, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64 }
 or, 0x80/1, 0, W|Modrm|No_sSuf|HLEPrefixLock, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex }
 
-xor, 0x30, APX_F, D|C|W|CheckOperandSize|Modrm|No_sSuf|DstVVVV|EVexMap4|NF, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64 }
+xor, 0x30, APX_F, D|C|W|CheckOperandSize|Modrm|No_sSuf|DstVVVV|EVexMap4|NF|Optimize, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64 }
 xor, 0x30, 0, D|W|CheckOperandSize|Modrm|No_sSuf|HLEPrefixLock|Optimize, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex }
 xor, 0x83/6, APX_F, Modrm|CheckOperandSize|No_bSuf|No_sSuf|DstVVVV|EVexMap4|NF, { Imm8S, Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
 xor, 0x83/6, 0, Modrm|No_bSuf|No_sSuf|HLEPrefixLock, { Imm8S, Reg16|Reg32|Reg64|Unspecified|BaseIndex }


^ permalink raw reply	[flat|nested] 14+ messages in thread

* RE: [PATCH 1/4] x86: rename vec_encoding and vex_encoding_*
  2024-02-16  9:57 ` [PATCH 1/4] x86: rename vec_encoding and vex_encoding_* Jan Beulich
@ 2024-02-18  5:59   ` Cui, Lili
  2024-02-19  7:54     ` Jan Beulich
  0 siblings, 1 reply; 14+ messages in thread
From: Cui, Lili @ 2024-02-18  5:59 UTC (permalink / raw)
  To: Beulich, Jan, Binutils; +Cc: H.J. Lu

> Even with just VEX these weren't limited to vector insns. With APX the set of
> non-vector ones covered has greatly increased. Drop the vec_ prefix. Also drop
> the vex_ ones off of the enumerators, as they weren't appropriate anyway:
> Should have been vec_ then, too.
> 
> --- a/gas/config/tc-i386.c
> +++ b/gas/config/tc-i386.c
> @@ -445,16 +445,16 @@ struct _i386_insn
>      /* Disable instruction size optimization.  */
>      bool no_optimize;
> 
> -    /* How to encode vector instructions.  */
> +    /* How to encode instructions.  */
>      enum
>        {
> -	vex_encoding_default = 0,
> -	vex_encoding_vex,
> -	vex_encoding_vex3,
> -	vex_encoding_evex,
> -	vex_encoding_evex512,
> -	vex_encoding_error
> -      } vec_encoding;
> +	encoding_default = 0,
> +	encoding_vex,
> +	encoding_vex3,
> +	encoding_evex,
> +	encoding_evex512,

"encoding_evex512" seems redundant, duplicating with encoding_evex, do you know the reason for keeping it? Could you add some comments for " encoding_evex512", thanks.

Lili.

> +	encoding_error
> +      } encoding;
> 
>      /* REP prefix.  */
>      const char *rep_prefix;
> @@ -1885,8 +1885,8 @@ static const i386_cpu_flags avx512 = CPU
> 
>  static INLINE bool need_evex_encoding (const insn_template *t)  {
> -  return i.vec_encoding == vex_encoding_evex
> -	|| i.vec_encoding == vex_encoding_evex512
> +  return i.encoding == encoding_evex
> +	|| i.encoding == encoding_evex512
>  	|| (t->opcode_modifier.vex && i.has_egpr)
>  	|| i.mask.reg;
>  }
> @@ -2489,7 +2489,7 @@ static INLINE int
>  fits_in_imm4 (offsetT num)
>  {
>    /* Despite the name, check for imm3 if we're dealing with EVEX.  */
> -  return (num & (i.vec_encoding != vex_encoding_evex ? 0xf : 7)) == num;
> +  return (num & (i.encoding != encoding_evex ? 0xf : 7)) == num;
>  }
> 
>  static i386_operand_type
> @@ -3800,7 +3800,7 @@ build_vex_prefix (const insn_template *t
>    /* Use 2-byte VEX prefix by swapping destination and source operand
>       if there are more than 1 register operand.  */
>    if (i.reg_operands > 1
> -      && i.vec_encoding != vex_encoding_vex3
> +      && i.encoding != encoding_vex3
>        && i.dir_encoding == dir_encoding_default
>        && i.operands == i.reg_operands
>        && operand_type_equal (&i.types[0], &i.types[i.operands - 1]) @@ -
> 3829,7 +3829,7 @@ build_vex_prefix (const insn_template *t
>    /* Use 2-byte VEX prefix by swapping commutative source operands if there
>       are no memory operands and at least 3 register ones.  */
>    if (i.reg_operands >= 3
> -      && i.vec_encoding != vex_encoding_vex3
> +      && i.encoding != encoding_vex3
>        && i.reg_operands == i.operands - i.imm_operands
>        && i.tm.opcode_modifier.vex
>        && i.tm.opcode_modifier.commutative @@ -3892,7 +3892,7 @@
> build_vex_prefix (const insn_template *t
> 
>    /* Use 2-byte VEX prefix if possible.  */
>    if (w == 0
> -      && i.vec_encoding != vex_encoding_vex3
> +      && i.encoding != encoding_vex3
>        && i.tm.opcode_space == SPACE_0F
>        && (i.rex & (REX_W | REX_X | REX_B)) == 0)
>      {
> @@ -4758,7 +4758,7 @@ optimize_encoding (void)
>  	   && (i.tm.opcode_modifier.vex
>  	       || ((!i.mask.reg || i.mask.zeroing)
>  		   && i.tm.opcode_modifier.evex
> -		   && (i.vec_encoding != vex_encoding_evex
> +		   && (i.encoding != encoding_evex
>  		       || cpu_arch_isa_flags.bitfield.cpuavx512vl
>  		       || is_cpu (&i.tm, CpuAVX512VL)
>  		       || (i.tm.operand_types[2].bitfield.zmmword
> @@ -4808,12 +4808,12 @@ optimize_encoding (void)
>         */
>        if (i.tm.opcode_modifier.evex)
>  	{
> -	  if (i.vec_encoding != vex_encoding_evex)
> +	  if (i.encoding != encoding_evex)
>  	    {
>  	      i.tm.opcode_modifier.vex = VEX128;
>  	      i.tm.opcode_modifier.vexw = VEXW0;
>  	      i.tm.opcode_modifier.evex = 0;
> -	      i.vec_encoding = vex_encoding_vex;
> +	      i.encoding = encoding_vex;
>  	      i.mask.reg = NULL;
>  	    }
>  	  else if (optimize > 1)
> @@ -4836,7 +4836,7 @@ optimize_encoding (void)
>  	    i.types[j].bitfield.ymmword = 0;
>  	  }
>      }
> -  else if (i.vec_encoding != vex_encoding_evex
> +  else if (i.encoding != encoding_evex
>  	   && !i.types[0].bitfield.zmmword
>  	   && !i.types[1].bitfield.zmmword
>  	   && !i.mask.reg
> @@ -4978,7 +4978,7 @@ optimize_encoding (void)
>  	   && i.tm.opcode_modifier.vex
>  	   && !(i.op[0].regs->reg_flags & RegRex)
>  	   && i.op[0].regs->reg_type.bitfield.xmmword
> -	   && i.vec_encoding != vex_encoding_vex3)
> +	   && i.encoding != encoding_vex3)
>      {
>        /* Optimize: -Os:
>           vpbroadcastq %xmmN, %xmmM  ->
> vpunpcklqdq %xmmN, %xmmN, %xmmM (N < 8) @@ -6839,10 +6839,10
> @@ md_assemble (char *line)
>    if (optimize && !i.no_optimize && i.tm.opcode_modifier.optimize)
>      optimize_encoding ();
> 
> -  /* Past optimization there's no need to distinguish vex_encoding_evex and
> -     vex_encoding_evex512 anymore.  */
> -  if (i.vec_encoding == vex_encoding_evex512)
> -    i.vec_encoding = vex_encoding_evex;
> +  /* Past optimization there's no need to distinguish encoding_evex and
> +     encoding_evex512 anymore.  */
> +  if (i.encoding == encoding_evex512)
> +    i.encoding = encoding_evex;
> 
>    if (use_unaligned_vector_move)
>      encode_with_unaligned_vector_move (); @@ -7136,15 +7136,15 @@
> parse_insn (const char *line, char *mnem
>  		  break;
>  		case Prefix_VEX:
>  		  /* {vex} */
> -		  i.vec_encoding = vex_encoding_vex;
> +		  i.encoding = encoding_vex;
>  		  break;
>  		case Prefix_VEX3:
>  		  /* {vex3} */
> -		  i.vec_encoding = vex_encoding_vex3;
> +		  i.encoding = encoding_vex3;
>  		  break;
>  		case Prefix_EVEX:
>  		  /* {evex} */
> -		  i.vec_encoding = vex_encoding_evex;
> +		  i.encoding = encoding_evex;
>  		  break;
>  		case Prefix_REX:
>  		  /* {rex} */
> @@ -8260,7 +8260,7 @@ check_VecOperands (const insn_template *  static
> int  VEX_check_encoding (const insn_template *t)  {
> -  if (i.vec_encoding == vex_encoding_error)
> +  if (i.encoding == encoding_error)
>      {
>        i.error = unsupported;
>        return 1;
> @@ -8277,8 +8277,8 @@ VEX_check_encoding (const insn_template
>        return 1;
>      }
> 
> -  if (i.vec_encoding == vex_encoding_evex
> -      || i.vec_encoding == vex_encoding_evex512)
> +  if (i.encoding == encoding_evex
> +      || i.encoding == encoding_evex512)
>      {
>        /* This instruction must be encoded with EVEX prefix.  */
>        if (!t->opcode_modifier.evex)
> @@ -8292,7 +8292,7 @@ VEX_check_encoding (const insn_template
>    if (!t->opcode_modifier.vex)
>      {
>        /* This instruction template doesn't have VEX prefix.  */
> -      if (i.vec_encoding != vex_encoding_default)
> +      if (i.encoding != encoding_default)
>  	{
>  	  i.error = no_vex_encoding;
>  	  return 1;
> @@ -8967,7 +8967,7 @@ match_template (char mnem_suffix)
>  	 Note that the semantics have not been changed.  */
>        if (optimize
>  	  && !i.no_optimize
> -	  && i.vec_encoding != vex_encoding_evex
> +	  && i.encoding != encoding_evex
>  	  && ((t + 1 < current_templates.end
>  	       && !t[1].opcode_modifier.evex
>  	       && t[1].opcode_space <= SPACE_0F38 @@ -10157,7 +10157,7
> @@ process_operands (void)
>        if (dot_insn () && i.reg_operands == 2)
>  	{
>  	  gas_assert (is_any_vex_encoding (&i.tm)
> -		      || i.vec_encoding != vex_encoding_default);
> +		      || i.encoding != encoding_default);
>  	  i.vex.register_specifier = i.op[i.operands - 1].regs;
>  	}
>      }
> @@ -10167,7 +10167,7 @@ process_operands (void)
>  	      == InstanceNone)
>      {
>        gas_assert (is_any_vex_encoding (&i.tm)
> -		  || i.vec_encoding != vex_encoding_default);
> +		  || i.encoding != encoding_default);
>        i.vex.register_specifier = i.op[i.operands - 1].regs;
>      }
> 
> @@ -10275,7 +10275,7 @@ build_modrm_byte (void)
>  	}
>        exp->X_add_number |= register_number (i.op[reg_slot].regs)
>  			   << (3 + !(i.tm.opcode_modifier.evex
> -				     || i.vec_encoding == vex_encoding_evex));
> +				     || i.encoding == encoding_evex));
>      }
> 
>    if (i.tm.opcode_modifier.vexvvvv == VexVVVV_DST) @@ -12548,20
> +12548,20 @@ s_insn (int dummy ATTRIBUTE_UNUSED)
>      }
> 
>    if (vex || xop
> -      ? i.vec_encoding == vex_encoding_evex
> +      ? i.encoding == encoding_evex
>        : evex
> -	? i.vec_encoding == vex_encoding_vex
> -	  || i.vec_encoding == vex_encoding_vex3
> -	: i.vec_encoding != vex_encoding_default)
> +	? i.encoding == encoding_vex
> +	  || i.encoding == encoding_vex3
> +	: i.encoding != encoding_default)
>      {
>        as_bad (_("pseudo-prefix conflicts with encoding specifier"));
>        goto bad;
>      }
> 
> -  if (line > end && i.vec_encoding == vex_encoding_default)
> -    i.vec_encoding = evex ? vex_encoding_evex : vex_encoding_vex;
> +  if (line > end && i.encoding == encoding_default)
> +    i.encoding = evex ? encoding_evex : encoding_vex;
> 
> -  if (i.vec_encoding != vex_encoding_default)
> +  if (i.encoding != encoding_default)
>      {
>        /* Only address size and segment override prefixes are permitted with
>           VEX/XOP/EVEX encodings.  */
> @@ -12892,20 +12892,20 @@ s_insn (int dummy ATTRIBUTE_UNUSED)
>  	  goto done;
>  	}
> 
> -      /* No need to distinguish vex_encoding_evex and vex_encoding_evex512.
> */
> -      if (i.vec_encoding == vex_encoding_evex512)
> -	i.vec_encoding = vex_encoding_evex;
> +      /* No need to distinguish encoding_evex and encoding_evex512.  */
> +      if (i.encoding == encoding_evex512)
> +	i.encoding = encoding_evex;
> 
>        /* Are we to emit ModR/M encoding?  */
>        if (!i.short_form
>  	  && (i.mem_operands
> -	      || i.reg_operands > (i.vec_encoding != vex_encoding_default)
> +	      || i.reg_operands > (i.encoding != encoding_default)
>  	      || i.tm.extension_opcode != None))
>  	i.tm.opcode_modifier.modrm = 1;
> 
>        if (!i.tm.opcode_modifier.modrm
>  	  && (i.reg_operands
> -	      > i.short_form + 0U + (i.vec_encoding != vex_encoding_default)
> +	      > i.short_form + 0U + (i.encoding != encoding_default)
>  	      || i.mem_operands))
>  	{
>  	  as_bad (_("too many register/memory operands")); @@ -12944,7
> +12944,7 @@ s_insn (int dummy ATTRIBUTE_UNUSED)
>  	    }
>  	  /* Fall through.  */
>  	case 3:
> -	  if (i.vec_encoding != vex_encoding_default)
> +	  if (i.encoding != encoding_default)
>  	    {
>  	      i.tm.opcode_modifier.vexvvvv = 1;
>  	      break;
> @@ -13000,13 +13000,13 @@ s_insn (int dummy ATTRIBUTE_UNUSED)
>  	      || i.index_reg->reg_type.bitfield.ymmword
>  	      || i.index_reg->reg_type.bitfield.zmmword))
>  	{
> -	  if (i.vec_encoding == vex_encoding_default)
> +	  if (i.encoding == encoding_default)
>  	    {
>  	      as_bad (_("VSIB unavailable with legacy encoding"));
>  	      goto done;
>  	    }
> 
> -	  if (i.vec_encoding == vex_encoding_evex
> +	  if (i.encoding == encoding_evex
>  	      && i.reg_operands > 1)
>  	    {
>  	      /* We could allow two register operands, encoding the 2nd one in
> @@ -13026,7 +13026,7 @@ s_insn (int dummy ATTRIBUTE_UNUSED)
>        for (j = i.imm_operands; j < i.operands; ++j)
>  	{
>  	  /* Look for 8-bit operands that use old registers.  */
> -	  if (i.vec_encoding != vex_encoding_default
> +	  if (i.encoding != encoding_default
>  	      && flag_code == CODE_64BIT
>  	      && i.types[j].bitfield.class == Reg
>  	      && i.types[j].bitfield.byte
> @@ -13085,7 +13085,7 @@ s_insn (int dummy ATTRIBUTE_UNUSED)
>  	case  4: combined.bitfield.dword = 1; break;
>  	}
> 
> -      if (i.vec_encoding == vex_encoding_default)
> +      if (i.encoding == encoding_default)
>  	{
>  	  if (flag_code == CODE_64BIT && combined.bitfield.qword)
>  	    i.rex |= REX_W;
> @@ -13162,7 +13162,7 @@ s_insn (int dummy ATTRIBUTE_UNUSED)
>        if (i.memshift >= 32)
>  	i.memshift = 0;
>        else if (!evex)
> -	i.vec_encoding = vex_encoding_error;
> +	i.encoding = encoding_error;
> 
>        if (i.disp_operands && !optimize_disp (&i.tm))
>  	goto done;
> @@ -13228,8 +13228,8 @@ s_insn (int dummy ATTRIBUTE_UNUSED)
>       potential special casing there.  */
>    i.tm.base_opcode |= val;
> 
> -  if (i.vec_encoding == vex_encoding_error
> -      || (i.vec_encoding != vex_encoding_evex
> +  if (i.encoding == encoding_error
> +      || (i.encoding != encoding_evex
>  	  ? i.broadcast.type || i.broadcast.bytes
>  	    || i.rounding.type != rc_none
>  	    || i.mask.reg
> @@ -13340,10 +13340,10 @@ RC_SAE_specifier (const char *pstr)
>  	      return NULL;
>  	    }
> 
> -	  if (i.vec_encoding == vex_encoding_default)
> -	    i.vec_encoding = vex_encoding_evex512;
> -	  else if (i.vec_encoding != vex_encoding_evex
> -		   && i.vec_encoding != vex_encoding_evex512)
> +	  if (i.encoding == encoding_default)
> +	    i.encoding = encoding_evex512;
> +	  else if (i.encoding != encoding_evex
> +		   && i.encoding != encoding_evex512)
>  	    return NULL;
> 
>  	  i.rounding.type = RC_NamesTable[j].type; @@ -13405,10 +13405,10
> @@ check_VecOperations (char *op_string)
>  		}
>  	      op_string++;
> 
> -	      if (i.vec_encoding == vex_encoding_default)
> -		i.vec_encoding = vex_encoding_evex;
> -	      else if (i.vec_encoding != vex_encoding_evex
> -		       && i.vec_encoding != vex_encoding_evex512)
> +	      if (i.encoding == encoding_default)
> +		i.encoding = encoding_evex;
> +	      else if (i.encoding != encoding_evex
> +		       && i.encoding != encoding_evex512)
>  		goto unknown_vec_op;
> 
>  	      i.broadcast.type = bcst_type;
> @@ -15676,11 +15676,11 @@ static bool check_register (const reg_en
>        if (vector_size < VSZ512)
>  	return false;
> 
> -      if (i.vec_encoding == vex_encoding_default)
> -	i.vec_encoding = vex_encoding_evex512;
> -      else if (i.vec_encoding != vex_encoding_evex
> -	       && i.vec_encoding != vex_encoding_evex512)
> -	i.vec_encoding = vex_encoding_error;
> +      if (i.encoding == encoding_default)
> +	i.encoding = encoding_evex512;
> +      else if (i.encoding != encoding_evex
> +	       && i.encoding != encoding_evex512)
> +	i.encoding = encoding_error;
>      }
> 
>    if (vector_size < VSZ256 && r->reg_type.bitfield.ymmword) @@ -15706,11
> +15706,11 @@ static bool check_register (const reg_en
>  	  || flag_code != CODE_64BIT)
>  	return false;
> 
> -      if (i.vec_encoding == vex_encoding_default
> -	  || i.vec_encoding == vex_encoding_evex512)
> -	i.vec_encoding = vex_encoding_evex;
> -      else if (i.vec_encoding != vex_encoding_evex)
> -	i.vec_encoding = vex_encoding_error;
> +      if (i.encoding == encoding_default
> +	  || i.encoding == encoding_evex512)
> +	i.encoding = encoding_evex;
> +      else if (i.encoding != encoding_evex)
> +	i.encoding = encoding_error;
>      }
> 
>    if (r->reg_flags & RegRex2)
> --- a/gas/config/tc-i386-intel.c
> +++ b/gas/config/tc-i386-intel.c
> @@ -209,10 +209,10 @@ operatorT i386_operator (const char *nam
>  	      || i386_types[j].sz[0] > 8
>  	      || (i386_types[j].sz[0] & (i386_types[j].sz[0] - 1)))
>  	    return O_illegal;
> -	  if (i.vec_encoding == vex_encoding_default)
> -	    i.vec_encoding = vex_encoding_evex;
> -	  else if (i.vec_encoding != vex_encoding_evex
> -		   && i.vec_encoding != vex_encoding_evex512)
> +	  if (i.encoding == encoding_default)
> +	    i.encoding = encoding_evex;
> +	  else if (i.encoding != encoding_evex
> +		   && i.encoding != encoding_evex512)
>  	    return O_illegal;
>  	  if (!i.broadcast.bytes && !i.broadcast.type)
>  	    {


^ permalink raw reply	[flat|nested] 14+ messages in thread

* RE: [PATCH 2/4] x86/APX: respect {vex}/{vex3}
  2024-02-16  9:58 ` [PATCH 2/4] x86/APX: respect {vex}/{vex3} Jan Beulich
@ 2024-02-18  7:55   ` Cui, Lili
  2024-02-19  8:00     ` Jan Beulich
  0 siblings, 1 reply; 14+ messages in thread
From: Cui, Lili @ 2024-02-18  7:55 UTC (permalink / raw)
  To: Beulich, Jan, Binutils; +Cc: H.J. Lu

> Even when an EVEX encoding is available, use of such a prefix ought to be
> respected (resulting in an error) rather than ignored. As requested during
> review already, introduce a new encoding enumerator to record use of eGPR-
> s, and update state transitions accordingly.
> 

Yes, we have such issue for dual VEX/EVEX templates.

> The optimize_encoding() change also addresses an internal assembler error
> that was previously raised when respective memory operands used eGPR-s for
> addressing.
> 
> While this results in a change of diagnostic issued for VEX-encoded insns, the
> new one is at least no worse than the prior one.
> ---
> Question is whether for the state transitions we want to introduce a couple of
> helper functions: check_register() has duplicates each of what
> RC_SAE_specifier() and check_VecOperations() also do.
> 
> --- a/gas/config/tc-i386.c
> +++ b/gas/config/tc-i386.c
> @@ -439,9 +439,6 @@ struct _i386_insn
>      /* Prefer the REX2 prefix in encoding.  */
>      bool rex2_encoding;
> 
> -    /* Need to use an Egpr capable encoding (REX2 or EVEX).  */
> -    bool has_egpr;
> -
>      /* Disable instruction size optimization.  */
>      bool no_optimize;
> 
> @@ -451,6 +448,7 @@ struct _i386_insn
>  	encoding_default = 0,
>  	encoding_vex,
>  	encoding_vex3,
> +	encoding_egpr, /* REX2 or EVEX.  */

I find it difficult to understand putting egpr here.  Although this area can be further optimized, it is difficult to say that this solution is clearer than the current one.

1. We have separated vex/evex and rex/rex2, and put the state containing both rex2 and evex in vex/evex encoding, so that the logic becomes confusing.
2. In this enumeration, each enumeration represents an encoding format, and only encoding_egpr describes the register of the operand.
3. encoding_egpr is not the final encoding expression, but an intermediate state that ultimately needs to be converted into other expressions of the same level.

If this patch just wants to report an error to vex prefix, maybe we could handle it like this. Or create a separate branch.

--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -8322,7 +8322,8 @@ VEX_check_encoding (const insn_template *t)
       return 0;
     }

-  if (!t->opcode_modifier.vex)
+  if (!t->opcode_modifier.vex
+      || ((i.vec_encoding == vex_encoding_vex) && i.has_egpr))
     {
       /* This instruction template doesn't have VEX prefix.  */
       if (i.vec_encoding != vex_encoding_default)


>  	encoding_evex,
>  	encoding_evex512,
>  	encoding_error
> @@ -1887,7 +1885,7 @@ static INLINE bool need_evex_encoding (c  {
>    return i.encoding == encoding_evex
>  	|| i.encoding == encoding_evex512
> -	|| (t->opcode_modifier.vex && i.has_egpr)
> +	|| (t->opcode_modifier.vex && i.encoding == encoding_egpr)
>  	|| i.mask.reg;
>  }
> 
> +.*:211: Error: no VEX/XOP encoding for `and'
> +.*:212: Error: no VEX/XOP encoding for `and'
> +.*:213: Error: .* `and'
> +.*:214: Error: no VEX/XOP encoding for `and'
> +.*:215: Error: no VEX/XOP encoding for `and'
> +.*:216: Error: .* `and'
> +.*:219: Error: .* `andn'
>  #pass
> --- a/gas/testsuite/gas/i386/x86-64-apx-egpr-inval.s
> +++ b/gas/testsuite/gas/i386/x86-64-apx-egpr-inval.s
> @@ -207,3 +207,13 @@
>  	vtestpd (%r27),%ymm6
>  	vtestps (%r27),%xmm6
>  	vtestps (%r27),%ymm6
> +# {vex}
> +	{vex} and %eax, %eax
> +	{vex} and %r8, %r8
> +	{vex} and %r16, %r16
> +	{vex} and %eax, %eax, %eax
> +	{vex} and %r8, %r8, %r8
> +	{vex} and %r16, %r16, %r16
> +	{vex} andn %eax, %eax, %eax
> +	{vex} andn %r8, %r8, %r8

These two test cases are valid.

> +	{vex} andn %r16, %r16, %r16

Thanks,
Lili.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 1/4] x86: rename vec_encoding and vex_encoding_*
  2024-02-18  5:59   ` Cui, Lili
@ 2024-02-19  7:54     ` Jan Beulich
  2024-02-20  9:19       ` Cui, Lili
  0 siblings, 1 reply; 14+ messages in thread
From: Jan Beulich @ 2024-02-19  7:54 UTC (permalink / raw)
  To: Cui, Lili; +Cc: H.J. Lu, Binutils

On 18.02.2024 06:59, Cui, Lili wrote:
>> Even with just VEX these weren't limited to vector insns. With APX the set of
>> non-vector ones covered has greatly increased. Drop the vec_ prefix. Also drop
>> the vex_ ones off of the enumerators, as they weren't appropriate anyway:
>> Should have been vec_ then, too.
>>
>> --- a/gas/config/tc-i386.c
>> +++ b/gas/config/tc-i386.c
>> @@ -445,16 +445,16 @@ struct _i386_insn
>>      /* Disable instruction size optimization.  */
>>      bool no_optimize;
>>
>> -    /* How to encode vector instructions.  */
>> +    /* How to encode instructions.  */
>>      enum
>>        {
>> -	vex_encoding_default = 0,
>> -	vex_encoding_vex,
>> -	vex_encoding_vex3,
>> -	vex_encoding_evex,
>> -	vex_encoding_evex512,
>> -	vex_encoding_error
>> -      } vec_encoding;
>> +	encoding_default = 0,
>> +	encoding_vex,
>> +	encoding_vex3,
>> +	encoding_evex,
>> +	encoding_evex512,
> 
> "encoding_evex512" seems redundant, duplicating with encoding_evex, do you know the reason for keeping it?

Well, did you look at how it's used, or at the commit that introduced
it?

> Could you add some comments for " encoding_evex512", thanks.

If any comments are to be added here, then for every one of the
enumerators.

Jan

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/4] x86/APX: respect {vex}/{vex3}
  2024-02-18  7:55   ` Cui, Lili
@ 2024-02-19  8:00     ` Jan Beulich
  2024-02-20 10:12       ` Cui, Lili
  0 siblings, 1 reply; 14+ messages in thread
From: Jan Beulich @ 2024-02-19  8:00 UTC (permalink / raw)
  To: Cui, Lili; +Cc: H.J. Lu, Binutils

On 18.02.2024 08:55, Cui, Lili wrote:
>> Even when an EVEX encoding is available, use of such a prefix ought to be
>> respected (resulting in an error) rather than ignored. As requested during
>> review already, introduce a new encoding enumerator to record use of eGPR-
>> s, and update state transitions accordingly.
>>
> 
> Yes, we have such issue for dual VEX/EVEX templates.
> 
>> The optimize_encoding() change also addresses an internal assembler error
>> that was previously raised when respective memory operands used eGPR-s for
>> addressing.
>>
>> While this results in a change of diagnostic issued for VEX-encoded insns, the
>> new one is at least no worse than the prior one.
>> ---
>> Question is whether for the state transitions we want to introduce a couple of
>> helper functions: check_register() has duplicates each of what
>> RC_SAE_specifier() and check_VecOperations() also do.
>>
>> --- a/gas/config/tc-i386.c
>> +++ b/gas/config/tc-i386.c
>> @@ -439,9 +439,6 @@ struct _i386_insn
>>      /* Prefer the REX2 prefix in encoding.  */
>>      bool rex2_encoding;
>>
>> -    /* Need to use an Egpr capable encoding (REX2 or EVEX).  */
>> -    bool has_egpr;
>> -
>>      /* Disable instruction size optimization.  */
>>      bool no_optimize;
>>
>> @@ -451,6 +448,7 @@ struct _i386_insn
>>  	encoding_default = 0,
>>  	encoding_vex,
>>  	encoding_vex3,
>> +	encoding_egpr, /* REX2 or EVEX.  */
> 
> I find it difficult to understand putting egpr here.  Although this area can be further optimized, it is difficult to say that this solution is clearer than the current one.
> 
> 1. We have separated vex/evex and rex/rex2, and put the state containing both rex2 and evex in vex/evex encoding, so that the logic becomes confusing.
> 2. In this enumeration, each enumeration represents an encoding format, and only encoding_egpr describes the register of the operand.

I don't view it like this: This enumerator indicates "need an encoding
which can represent eGPR-s, i.e. REX2 or EVEX". To me encoding_rex2_or_evex
would be pretty clearly worse a name for it.

> 3. encoding_egpr is not the final encoding expression, but an intermediate state that ultimately needs to be converted into other expressions of the same level.

Not much different from encoding_evex512.

> If this patch just wants to report an error to vex prefix, maybe we could handle it like this. Or create a separate branch.
> 
> --- a/gas/config/tc-i386.c
> +++ b/gas/config/tc-i386.c
> @@ -8322,7 +8322,8 @@ VEX_check_encoding (const insn_template *t)
>        return 0;
>      }
> 
> -  if (!t->opcode_modifier.vex)
> +  if (!t->opcode_modifier.vex
> +      || ((i.vec_encoding == vex_encoding_vex) && i.has_egpr))
>      {
>        /* This instruction template doesn't have VEX prefix.  */
>        if (i.vec_encoding != vex_encoding_default)

That's indeed a possibility, I think. Yet already when reviewing the
original work of yours I indicated that I'd like the encoding
restrictions all be represented by a single enum. To me it is more
difficult to follow when there are two separate entities which need
to be consulted in order to reflect all constraints.

>> +.*:211: Error: no VEX/XOP encoding for `and'
>> +.*:212: Error: no VEX/XOP encoding for `and'
>> +.*:213: Error: .* `and'
>> +.*:214: Error: no VEX/XOP encoding for `and'
>> +.*:215: Error: no VEX/XOP encoding for `and'
>> +.*:216: Error: .* `and'
>> +.*:219: Error: .* `andn'

Please pay attention to the gap in line numbers here; that ...

>> --- a/gas/testsuite/gas/i386/x86-64-apx-egpr-inval.s
>> +++ b/gas/testsuite/gas/i386/x86-64-apx-egpr-inval.s
>> @@ -207,3 +207,13 @@
>>  	vtestpd (%r27),%ymm6
>>  	vtestps (%r27),%xmm6
>>  	vtestps (%r27),%ymm6
>> +# {vex}
>> +	{vex} and %eax, %eax
>> +	{vex} and %r8, %r8
>> +	{vex} and %r16, %r16
>> +	{vex} and %eax, %eax, %eax
>> +	{vex} and %r8, %r8, %r8
>> +	{vex} and %r16, %r16, %r16
>> +	{vex} andn %eax, %eax, %eax
>> +	{vex} andn %r8, %r8, %r8
> 
> These two test cases are valid.

... reflects exactly this fact.

Jan

^ permalink raw reply	[flat|nested] 14+ messages in thread

* RE: [PATCH 1/4] x86: rename vec_encoding and vex_encoding_*
  2024-02-19  7:54     ` Jan Beulich
@ 2024-02-20  9:19       ` Cui, Lili
  0 siblings, 0 replies; 14+ messages in thread
From: Cui, Lili @ 2024-02-20  9:19 UTC (permalink / raw)
  To: Beulich, Jan; +Cc: H.J. Lu, Binutils

> On 18.02.2024 06:59, Cui, Lili wrote:
> >> Even with just VEX these weren't limited to vector insns. With APX
> >> the set of non-vector ones covered has greatly increased. Drop the
> >> vec_ prefix. Also drop the vex_ ones off of the enumerators, as they weren't
> appropriate anyway:
> >> Should have been vec_ then, too.
> >>
> >> --- a/gas/config/tc-i386.c
> >> +++ b/gas/config/tc-i386.c
> >> @@ -445,16 +445,16 @@ struct _i386_insn
> >>      /* Disable instruction size optimization.  */
> >>      bool no_optimize;
> >>
> >> -    /* How to encode vector instructions.  */
> >> +    /* How to encode instructions.  */
> >>      enum
> >>        {
> >> -	vex_encoding_default = 0,
> >> -	vex_encoding_vex,
> >> -	vex_encoding_vex3,
> >> -	vex_encoding_evex,
> >> -	vex_encoding_evex512,
> >> -	vex_encoding_error
> >> -      } vec_encoding;
> >> +	encoding_default = 0,
> >> +	encoding_vex,
> >> +	encoding_vex3,
> >> +	encoding_evex,
> >> +	encoding_evex512,
> >
> > "encoding_evex512" seems redundant, duplicating with encoding_evex, do
> you know the reason for keeping it?
> 
> Well, did you look at how it's used, or at the commit that introduced it?
> 
> > Could you add some comments for " encoding_evex512", thanks.
> 
> If any comments are to be added here, then for every one of the enumerators.
> 

Well, you used evex512 to mark out "sae" and "zmm" for optimization. Maybe it's because you're so familiar with this area, but it's special to me. Of course, you are free not to annotate it.

Lili.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* RE: [PATCH 2/4] x86/APX: respect {vex}/{vex3}
  2024-02-19  8:00     ` Jan Beulich
@ 2024-02-20 10:12       ` Cui, Lili
  2024-02-20 10:30         ` Jan Beulich
  0 siblings, 1 reply; 14+ messages in thread
From: Cui, Lili @ 2024-02-20 10:12 UTC (permalink / raw)
  To: Beulich, Jan; +Cc: H.J. Lu, Binutils

> On 18.02.2024 08:55, Cui, Lili wrote:
> >> Even when an EVEX encoding is available, use of such a prefix ought
> >> to be respected (resulting in an error) rather than ignored. As
> >> requested during review already, introduce a new encoding enumerator
> >> to record use of eGPR- s, and update state transitions accordingly.
> >>
> >
> > Yes, we have such issue for dual VEX/EVEX templates.
> >
> >> The optimize_encoding() change also addresses an internal assembler
> >> error that was previously raised when respective memory operands used
> >> eGPR-s for addressing.
> >>
> >> While this results in a change of diagnostic issued for VEX-encoded
> >> insns, the new one is at least no worse than the prior one.
> >> ---
> >> Question is whether for the state transitions we want to introduce a
> >> couple of helper functions: check_register() has duplicates each of
> >> what
> >> RC_SAE_specifier() and check_VecOperations() also do.
> >>
> >> --- a/gas/config/tc-i386.c
> >> +++ b/gas/config/tc-i386.c
> >> @@ -439,9 +439,6 @@ struct _i386_insn
> >>      /* Prefer the REX2 prefix in encoding.  */
> >>      bool rex2_encoding;
> >>
> >> -    /* Need to use an Egpr capable encoding (REX2 or EVEX).  */
> >> -    bool has_egpr;
> >> -
> >>      /* Disable instruction size optimization.  */
> >>      bool no_optimize;
> >>
> >> @@ -451,6 +448,7 @@ struct _i386_insn
> >>  	encoding_default = 0,
> >>  	encoding_vex,
> >>  	encoding_vex3,
> >> +	encoding_egpr, /* REX2 or EVEX.  */
> >
> > I find it difficult to understand putting egpr here.  Although this area can be
> further optimized, it is difficult to say that this solution is clearer than the
> current one.
> >
> > 1. We have separated vex/evex and rex/rex2, and put the state containing
> both rex2 and evex in vex/evex encoding, so that the logic becomes confusing.
> > 2. In this enumeration, each enumeration represents an encoding format,
> and only encoding_egpr describes the register of the operand.
> 
> I don't view it like this: This enumerator indicates "need an encoding which can
> represent eGPR-s, i.e. REX2 or EVEX". To me encoding_rex2_or_evex would be
> pretty clearly worse a name for it.
> 

I think the essential problem is that it's strange to put it in this enumeration.

> > 3. encoding_egpr is not the final encoding expression, but an intermediate
> state that ultimately needs to be converted into other expressions of the same
> level.
> 
> Not much different from encoding_evex512.

Yes, encoding_evex512 is also an intermediate state, at least it chooses one of the states in this enumeration. 

    /* Prefer the REX byte in encoding.  */
    bool rex_encoding;

    /* Prefer the REX2 prefix in encoding.  */
    bool rex2_encoding;

    /* Need to use an Egpr capable encoding (REX2 or EVEX).  */
    bool has_egpr;

    /* How to encode vector instructions.  */
    enum
      {
        vex_encoding_default = 0,
        vex_encoding_vex,
        vex_encoding_vex3,
        vex_encoding_evex,
        vex_encoding_evex512,
        vex_encoding_error
      } vec_encoding;


> 
> > If this patch just wants to report an error to vex prefix, maybe we could
> handle it like this. Or create a separate branch.
> >
> > --- a/gas/config/tc-i386.c
> > +++ b/gas/config/tc-i386.c
> > @@ -8322,7 +8322,8 @@ VEX_check_encoding (const insn_template *t)
> >        return 0;
> >      }
> >
> > -  if (!t->opcode_modifier.vex)
> > +  if (!t->opcode_modifier.vex
> > +      || ((i.vec_encoding == vex_encoding_vex) && i.has_egpr))
> >      {
> >        /* This instruction template doesn't have VEX prefix.  */
> >        if (i.vec_encoding != vex_encoding_default)
> 
> That's indeed a possibility, I think. Yet already when reviewing the original
> work of yours I indicated that I'd like the encoding restrictions all be
> represented by a single enum. To me it is more difficult to follow when there
> are two separate entities which need to be consulted in order to reflect all
> constraints.
> 

Egpr does conflict with the existing architecture, making implementation awkward.

> >> +.*:211: Error: no VEX/XOP encoding for `and'
> >> +.*:212: Error: no VEX/XOP encoding for `and'
> >> +.*:213: Error: .* `and'
> >> +.*:214: Error: no VEX/XOP encoding for `and'
> >> +.*:215: Error: no VEX/XOP encoding for `and'
> >> +.*:216: Error: .* `and'
> >> +.*:219: Error: .* `andn'
> 
> Please pay attention to the gap in line numbers here; that ...
> 
> >> --- a/gas/testsuite/gas/i386/x86-64-apx-egpr-inval.s
> >> +++ b/gas/testsuite/gas/i386/x86-64-apx-egpr-inval.s
> >> @@ -207,3 +207,13 @@
> >>  	vtestpd (%r27),%ymm6
> >>  	vtestps (%r27),%xmm6
> >>  	vtestps (%r27),%ymm6
> >> +# {vex}
> >> +	{vex} and %eax, %eax
> >> +	{vex} and %r8, %r8
> >> +	{vex} and %r16, %r16
> >> +	{vex} and %eax, %eax, %eax
> >> +	{vex} and %r8, %r8, %r8
> >> +	{vex} and %r16, %r16, %r16
> >> +	{vex} andn %eax, %eax, %eax
> >> +	{vex} andn %r8, %r8, %r8
> >
> > These two test cases are valid.
> 
> ... reflects exactly this fact.
> 

Normally we don't put valid test cases into invalid test case file, right?

Thanks,
Lili.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/4] x86/APX: respect {vex}/{vex3}
  2024-02-20 10:12       ` Cui, Lili
@ 2024-02-20 10:30         ` Jan Beulich
  2024-02-20 15:59           ` Michael Matz
  0 siblings, 1 reply; 14+ messages in thread
From: Jan Beulich @ 2024-02-20 10:30 UTC (permalink / raw)
  To: Cui, Lili; +Cc: H.J. Lu, Binutils

On 20.02.2024 11:12, Cui, Lili wrote:
>> On 18.02.2024 08:55, Cui, Lili wrote:
>>>> --- a/gas/testsuite/gas/i386/x86-64-apx-egpr-inval.s
>>>> +++ b/gas/testsuite/gas/i386/x86-64-apx-egpr-inval.s
>>>> @@ -207,3 +207,13 @@
>>>>  	vtestpd (%r27),%ymm6
>>>>  	vtestps (%r27),%xmm6
>>>>  	vtestps (%r27),%ymm6
>>>> +# {vex}
>>>> +	{vex} and %eax, %eax
>>>> +	{vex} and %r8, %r8
>>>> +	{vex} and %r16, %r16
>>>> +	{vex} and %eax, %eax, %eax
>>>> +	{vex} and %r8, %r8, %r8
>>>> +	{vex} and %r16, %r16, %r16
>>>> +	{vex} andn %eax, %eax, %eax
>>>> +	{vex} andn %r8, %r8, %r8
>>>
>>> These two test cases are valid.
>>
>> ... reflects exactly this fact.
>>
> 
> Normally we don't put valid test cases into invalid test case file, right?

Depends, I would say (and I think you'll find other examples). I view it as
pretty relevant here.

Jan

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/4] x86/APX: respect {vex}/{vex3}
  2024-02-20 10:30         ` Jan Beulich
@ 2024-02-20 15:59           ` Michael Matz
  2024-02-20 16:52             ` H.J. Lu
  0 siblings, 1 reply; 14+ messages in thread
From: Michael Matz @ 2024-02-20 15:59 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Cui, Lili, H.J. Lu, Binutils

Hello,

On Tue, 20 Feb 2024, Jan Beulich wrote:

> On 20.02.2024 11:12, Cui, Lili wrote:
> >> On 18.02.2024 08:55, Cui, Lili wrote:
> >>>> --- a/gas/testsuite/gas/i386/x86-64-apx-egpr-inval.s
> >>>> +++ b/gas/testsuite/gas/i386/x86-64-apx-egpr-inval.s
> >>>> @@ -207,3 +207,13 @@
> >>>>  	vtestpd (%r27),%ymm6
> >>>>  	vtestps (%r27),%xmm6
> >>>>  	vtestps (%r27),%ymm6
> >>>> +# {vex}
> >>>> +	{vex} and %eax, %eax
> >>>> +	{vex} and %r8, %r8
> >>>> +	{vex} and %r16, %r16
> >>>> +	{vex} and %eax, %eax, %eax
> >>>> +	{vex} and %r8, %r8, %r8
> >>>> +	{vex} and %r16, %r16, %r16
> >>>> +	{vex} andn %eax, %eax, %eax
> >>>> +	{vex} andn %r8, %r8, %r8
> >>>
> >>> These two test cases are valid.
> >>
> >> ... reflects exactly this fact.
> >>
> > 
> > Normally we don't put valid test cases into invalid test case file, right?
> 
> Depends, I would say (and I think you'll find other examples). I view it as
> pretty relevant here.

I think the test filename should be different then, also for other cases 
where this is the case.  That, or at least a comment next to the insns 
that those are _not_ invalid (referring to the .d file to see that is not 
self-describing).  Without any other indication I'd say instructions in a 
file named "*inval.s" should _all_ be invalid, always.


Ciao,
Michael.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/4] x86/APX: respect {vex}/{vex3}
  2024-02-20 15:59           ` Michael Matz
@ 2024-02-20 16:52             ` H.J. Lu
  0 siblings, 0 replies; 14+ messages in thread
From: H.J. Lu @ 2024-02-20 16:52 UTC (permalink / raw)
  To: Michael Matz; +Cc: Jan Beulich, Cui, Lili, Binutils

On Tue, Feb 20, 2024 at 8:00 AM Michael Matz <matz@suse.de> wrote:
>
> Hello,
>
> On Tue, 20 Feb 2024, Jan Beulich wrote:
>
> > On 20.02.2024 11:12, Cui, Lili wrote:
> > >> On 18.02.2024 08:55, Cui, Lili wrote:
> > >>>> --- a/gas/testsuite/gas/i386/x86-64-apx-egpr-inval.s
> > >>>> +++ b/gas/testsuite/gas/i386/x86-64-apx-egpr-inval.s
> > >>>> @@ -207,3 +207,13 @@
> > >>>>          vtestpd (%r27),%ymm6
> > >>>>          vtestps (%r27),%xmm6
> > >>>>          vtestps (%r27),%ymm6
> > >>>> +# {vex}
> > >>>> +        {vex} and %eax, %eax
> > >>>> +        {vex} and %r8, %r8
> > >>>> +        {vex} and %r16, %r16
> > >>>> +        {vex} and %eax, %eax, %eax
> > >>>> +        {vex} and %r8, %r8, %r8
> > >>>> +        {vex} and %r16, %r16, %r16
> > >>>> +        {vex} andn %eax, %eax, %eax
> > >>>> +        {vex} andn %r8, %r8, %r8
> > >>>
> > >>> These two test cases are valid.
> > >>
> > >> ... reflects exactly this fact.
> > >>
> > >
> > > Normally we don't put valid test cases into invalid test case file, right?
> >
> > Depends, I would say (and I think you'll find other examples). I view it as
> > pretty relevant here.
>
> I think the test filename should be different then, also for other cases
> where this is the case.  That, or at least a comment next to the insns
> that those are _not_ invalid (referring to the .d file to see that is not
> self-describing).  Without any other indication I'd say instructions in a
> file named "*inval.s" should _all_ be invalid, always.

Agreed.

-- 
H.J.

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2024-02-20 16:53 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-02-16  9:56 [PATCH 0/4] x86/APX: misc adjustments Jan Beulich
2024-02-16  9:57 ` [PATCH 1/4] x86: rename vec_encoding and vex_encoding_* Jan Beulich
2024-02-18  5:59   ` Cui, Lili
2024-02-19  7:54     ` Jan Beulich
2024-02-20  9:19       ` Cui, Lili
2024-02-16  9:58 ` [PATCH 2/4] x86/APX: respect {vex}/{vex3} Jan Beulich
2024-02-18  7:55   ` Cui, Lili
2024-02-19  8:00     ` Jan Beulich
2024-02-20 10:12       ` Cui, Lili
2024-02-20 10:30         ` Jan Beulich
2024-02-20 15:59           ` Michael Matz
2024-02-20 16:52             ` H.J. Lu
2024-02-16  9:58 ` [PATCH 3/4] x86/APX: correct .insn opcode space determination when REX2 is needed Jan Beulich
2024-02-16  9:59 ` [PATCH 4/4] x86/APX: optimize certain XOR and SUB forms Jan Beulich

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).