public inbox for binutils@sourceware.org
 help / color / mirror / Atom feed
* [PATCH 0/3] x86: improve encoding selection and prereq tidying
@ 2023-09-15  8:58 Jan Beulich
  2023-09-15  8:59 ` [PATCH 1/3] x86: correct cpu_arch_isa_flags maintenance Jan Beulich
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Jan Beulich @ 2023-09-15  8:58 UTC (permalink / raw)
  To: Binutils; +Cc: H.J. Lu

While perhaps seemingly unrelated, the first two changes here are
kind of fallout from preparing the 3rd patch. That 3rd patch also
depends on the earlier submitted "x86: fold certain VEX and EVEX
templates", at least for its use of need_evex_encoding(). The
first two patches should be pretty much independent of the earlier
series.

1: correct cpu_arch_isa_flags maintenance
2: drop cpu_arch_tune_flags
3: prefer VEX encodings over EVEX ones when possible

Jan

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH 1/3] x86: correct cpu_arch_isa_flags maintenance
  2023-09-15  8:58 [PATCH 0/3] x86: improve encoding selection and prereq tidying Jan Beulich
@ 2023-09-15  8:59 ` Jan Beulich
  2023-09-15  8:59 ` [PATCH 2/3] x86: drop cpu_arch_tune_flags Jan Beulich
  2023-09-15  9:00 ` [PATCH 3/3] x86: prefer VEX encodings over EVEX ones when possible Jan Beulich
  2 siblings, 0 replies; 4+ messages in thread
From: Jan Beulich @ 2023-09-15  8:59 UTC (permalink / raw)
  To: Binutils; +Cc: H.J. Lu

These may not be set from a value derived from cpu_arch_flags: That
starts with (almost) all functionality enabled, while cpu_arch_isa_flags
is supposed to track features that were explicitly enabled (and perhaps
later disabled) by the user.

To avoid needing to do any such adjustment in two places (each),
introduce helper functions used by both command line handling and
directive processing.
---
While setting of vector_size could be moved into isa_disable() (further
reducing code duplication), the same isn't true for isa_enable().
Because of the asymmetry I didn't do so. Thoughts / opinions?

--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -2804,13 +2804,41 @@ check_cpu_arch_compatible (const char *n
 }
 
 static void
-extend_cpu_sub_arch_name (const char *name)
+extend_cpu_sub_arch_name (const char *pfx, const char *name)
 {
   if (cpu_sub_arch_name)
     cpu_sub_arch_name = reconcat (cpu_sub_arch_name, cpu_sub_arch_name,
-				  ".", name, (const char *) NULL);
+				  pfx, name, (const char *) NULL);
   else
-    cpu_sub_arch_name = concat (".", name, (const char *) NULL);
+    cpu_sub_arch_name = concat (pfx, name, (const char *) NULL);
+}
+
+static void isa_enable (unsigned int idx)
+{
+  i386_cpu_flags flags = cpu_flags_or (cpu_arch_flags, cpu_arch[idx].enable);
+
+  if (!cpu_flags_equal (&flags, &cpu_arch_flags))
+    {
+      extend_cpu_sub_arch_name (".", cpu_arch[idx].name);
+      cpu_arch_flags = flags;
+    }
+
+  cpu_arch_isa_flags = cpu_flags_or (cpu_arch_isa_flags, cpu_arch[idx].enable);
+}
+
+static void isa_disable (unsigned int idx)
+{
+  i386_cpu_flags flags
+    = cpu_flags_and_not (cpu_arch_flags, cpu_arch[idx].disable);
+
+  if (!cpu_flags_equal (&flags, &cpu_arch_flags))
+    {
+      extend_cpu_sub_arch_name (".no", cpu_arch[idx].name);
+      cpu_arch_flags = flags;
+    }
+
+  cpu_arch_isa_flags
+    = cpu_flags_and_not (cpu_arch_isa_flags, cpu_arch[idx].disable);
 }
 
 static void
@@ -2834,7 +2862,6 @@ set_cpu_arch (int dummy ATTRIBUTE_UNUSED
   int e;
   const char *string;
   unsigned int j = 0;
-  i386_cpu_flags flags;
 
   SKIP_WHITESPACE ();
 
@@ -2987,17 +3014,7 @@ set_cpu_arch (int dummy ATTRIBUTE_UNUSED
 	  if (cpu_flags_all_zero (&cpu_arch[j].enable))
 	    continue;
 
-	  flags = cpu_flags_or (cpu_arch_flags, cpu_arch[j].enable);
-
-	  if (!cpu_flags_equal (&flags, &cpu_arch_flags))
-	    {
-	      extend_cpu_sub_arch_name (string + 1);
-	      cpu_arch_flags = flags;
-	      cpu_arch_isa_flags = flags;
-	    }
-	  else
-	    cpu_arch_isa_flags
-	      = cpu_flags_or (cpu_arch_isa_flags, cpu_arch[j].enable);
+	  isa_enable (j);
 
 	  (void) restore_line_pointer (e);
 
@@ -3044,13 +3061,7 @@ set_cpu_arch (int dummy ATTRIBUTE_UNUSED
 	if (cpu_arch[j].type == PROCESSOR_NONE
 	    && strcmp (string + 3, cpu_arch[j].name) == 0)
 	  {
-	    flags = cpu_flags_and_not (cpu_arch_flags, cpu_arch[j].disable);
-	    if (!cpu_flags_equal (&flags, &cpu_arch_flags))
-	      {
-		extend_cpu_sub_arch_name (string + 1);
-		cpu_arch_flags = flags;
-		cpu_arch_isa_flags = flags;
-	      }
+	    isa_disable (j);
 
 	    if (cpu_arch[j].vsz == vsz_set)
 	      vector_size = VSZ_DEFAULT;
@@ -14598,21 +14609,7 @@ md_parse_option (int c, const char *arg)
 		       && !cpu_flags_all_zero (&cpu_arch[j].enable))
 		{
 		  /* ISA extension.  */
-		  i386_cpu_flags flags;
-
-		  flags = cpu_flags_or (cpu_arch_flags,
-					cpu_arch[j].enable);
-
-		  if (!cpu_flags_equal (&flags, &cpu_arch_flags))
-		    {
-		      extend_cpu_sub_arch_name (arch);
-		      cpu_arch_flags = flags;
-		      cpu_arch_isa_flags = flags;
-		    }
-		  else
-		    cpu_arch_isa_flags
-		      = cpu_flags_or (cpu_arch_isa_flags,
-				      cpu_arch[j].enable);
+		  isa_enable (j);
 
 		  switch (cpu_arch[j].vsz)
 		    {
@@ -14655,16 +14652,7 @@ md_parse_option (int c, const char *arg)
 		if (cpu_arch[j].type == PROCESSOR_NONE
 		    && strcmp (arch + 2, cpu_arch[j].name) == 0)
 		  {
-		    i386_cpu_flags flags;
-
-		    flags = cpu_flags_and_not (cpu_arch_flags,
-					       cpu_arch[j].disable);
-		    if (!cpu_flags_equal (&flags, &cpu_arch_flags))
-		      {
-			extend_cpu_sub_arch_name (arch);
-			cpu_arch_flags = flags;
-			cpu_arch_isa_flags = flags;
-		      }
+		    isa_disable (j);
 		    if (cpu_arch[j].vsz == vsz_set)
 		      vector_size = VSZ_DEFAULT;
 		    break;
--- a/gas/testsuite/gas/i386/i386.exp
+++ b/gas/testsuite/gas/i386/i386.exp
@@ -147,6 +147,7 @@ if [gas_32_check] then {
     run_dump_test "nops-6"
     run_dump_test "nops-7"
     run_dump_test "nops-8"
+    run_dump_test "nops-9"
     run_dump_test "noreg16"
     run_list_test "noreg16"
     run_dump_test "noreg16-data32"
--- /dev/null
+++ b/gas/testsuite/gas/i386/nops-9.d
@@ -0,0 +1,28 @@
+#objdump: -drw
+#name: i386 nops 9
+
+.*: +file format .*
+
+Disassembly of section .text:
+
+0+ <default>:
+[ 	]*[a-f0-9]+:	0f be f0             	movsbl %al,%esi
+[ 	]*[a-f0-9]+:	8d b4 26 00 00 00 00 	lea    0x0\(%esi,%eiz,1\),%esi
+[ 	]*[a-f0-9]+:	8d b6 00 00 00 00    	lea    0x0\(%esi\),%esi
+
+0+10 <nopopcnt>:
+[ 	]*[a-f0-9]+:	0f be f0             	movsbl %al,%esi
+[ 	]*[a-f0-9]+:	8d b4 26 00 00 00 00 	lea    0x0\(%esi,%eiz,1\),%esi
+[ 	]*[a-f0-9]+:	8d b6 00 00 00 00    	lea    0x0\(%esi\),%esi
+
+0+20 <popcnt>:
+[ 	]*[a-f0-9]+:	f3 0f b8 f0          	popcnt %eax,%esi
+[ 	]*[a-f0-9]+:	8d b4 26 00 00 00 00 	lea    0x0\(%esi,%eiz,1\),%esi
+[ 	]*[a-f0-9]+:	8d 74 26 00          	lea    0x0\(%esi,%eiz,1\),%esi
+[ 	]*[a-f0-9]+:	90                   	nop
+
+0+30 <nop>:
+[ 	]*[a-f0-9]+:	0f be f0             	movsbl %al,%esi
+[ 	]*[a-f0-9]+:	66 66 2e 0f 1f 84 00 00 00 00 00 	data16 nopw %cs:0x0\(%eax,%eax,1\)
+[ 	]*[a-f0-9]+:	66 90                	xchg   %ax,%ax
+#pass
--- /dev/null
+++ b/gas/testsuite/gas/i386/nops-9.s
@@ -0,0 +1,19 @@
+	.text
+default:
+	movsbl %al,%esi
+	.p2align 4
+
+	.arch .nopopcnt
+nopopcnt:
+	movsbl %al,%esi
+	.p2align 4
+
+	.arch .popcnt
+popcnt:
+	popcnt %eax,%esi
+	.p2align 4
+
+	.arch .nop
+nop:
+	movsbl %al,%esi
+	.p2align 4


^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH 2/3] x86: drop cpu_arch_tune_flags
  2023-09-15  8:58 [PATCH 0/3] x86: improve encoding selection and prereq tidying Jan Beulich
  2023-09-15  8:59 ` [PATCH 1/3] x86: correct cpu_arch_isa_flags maintenance Jan Beulich
@ 2023-09-15  8:59 ` Jan Beulich
  2023-09-15  9:00 ` [PATCH 3/3] x86: prefer VEX encodings over EVEX ones when possible Jan Beulich
  2 siblings, 0 replies; 4+ messages in thread
From: Jan Beulich @ 2023-09-15  8:59 UTC (permalink / raw)
  To: Binutils; +Cc: H.J. Lu

Apparently from its introduction the variable was only ever written (the
only read is merely to determine whether to write it with another value).
(Since, due to the need to re-indent, the adjacent lines setting
cpu_arch_tune need touching anyway, switch to using PREOCESSOR_*
constants where applicable, to make more obvious what the resulting
state is going to be.)

--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -816,9 +816,6 @@ static int cpu_arch_tune_set = 0;
 /* Cpu we are generating instructions for.  */
 enum processor_type cpu_arch_tune = PROCESSOR_UNKNOWN;
 
-/* CPU feature flags of cpu we are generating instructions for.  */
-static i386_cpu_flags cpu_arch_tune_flags;
-
 /* CPU instruction set architecture used.  */
 enum processor_type cpu_arch_isa = PROCESSOR_UNKNOWN;
 
@@ -2955,10 +2952,7 @@ set_cpu_arch (int dummy ATTRIBUTE_UNUSED
 	  cpu_arch_isa = PROCESSOR_UNKNOWN;
 	  cpu_arch_isa_flags = cpu_arch[flag_code == CODE_64BIT].enable;
 	  if (!cpu_arch_tune_set)
-	    {
-	      cpu_arch_tune = cpu_arch_isa;
-	      cpu_arch_tune_flags = cpu_arch_isa_flags;
-	    }
+	    cpu_arch_tune = PROCESSOR_UNKNOWN;
 
 	  vector_size = VSZ_DEFAULT;
 
@@ -3000,10 +2994,7 @@ set_cpu_arch (int dummy ATTRIBUTE_UNUSED
 	      cpu_arch_isa = cpu_arch[j].type;
 	      cpu_arch_isa_flags = cpu_arch[j].enable;
 	      if (!cpu_arch_tune_set)
-		{
-		  cpu_arch_tune = cpu_arch_isa;
-		  cpu_arch_tune_flags = cpu_arch_isa_flags;
-		}
+		cpu_arch_tune = cpu_arch_isa;
 
 	      vector_size = VSZ_DEFAULT;
 
@@ -14597,10 +14588,7 @@ md_parse_option (int c, const char *arg)
 		  cpu_arch_isa = cpu_arch[j].type;
 		  cpu_arch_isa_flags = cpu_arch[j].enable;
 		  if (!cpu_arch_tune_set)
-		    {
-		      cpu_arch_tune = cpu_arch_isa;
-		      cpu_arch_tune_flags = cpu_arch_isa_flags;
-		    }
+		    cpu_arch_tune = cpu_arch_isa;
 		  vector_size = VSZ_DEFAULT;
 		  break;
 		}
@@ -14678,7 +14666,6 @@ md_parse_option (int c, const char *arg)
 	    {
 	      cpu_arch_tune_set = 1;
 	      cpu_arch_tune = cpu_arch [j].type;
-	      cpu_arch_tune_flags = cpu_arch[j].enable;
 	      break;
 	    }
 	}
@@ -15284,10 +15271,7 @@ i386_target_format (void)
 	  cpu_arch_isa = PROCESSOR_IAMCU;
 	  cpu_arch_isa_flags = iamcu_flags;
 	  if (!cpu_arch_tune_set)
-	    {
-	      cpu_arch_tune = cpu_arch_isa;
-	      cpu_arch_tune_flags = cpu_arch_isa_flags;
-	    }
+	    cpu_arch_tune = PROCESSOR_IAMCU;
 	}
       else if (cpu_arch_isa != PROCESSOR_IAMCU)
 	as_fatal (_("Intel MCU doesn't support `%s' architecture"),
@@ -15298,8 +15282,6 @@ i386_target_format (void)
 
   if (cpu_flags_all_zero (&cpu_arch_isa_flags))
     cpu_arch_isa_flags = cpu_arch[flag_code == CODE_64BIT].enable;
-  if (cpu_flags_all_zero (&cpu_arch_tune_flags))
-    cpu_arch_tune_flags = cpu_arch[flag_code == CODE_64BIT].enable;
 
   switch (OUTPUT_FLAVOR)
     {


^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH 3/3] x86: prefer VEX encodings over EVEX ones when possible
  2023-09-15  8:58 [PATCH 0/3] x86: improve encoding selection and prereq tidying Jan Beulich
  2023-09-15  8:59 ` [PATCH 1/3] x86: correct cpu_arch_isa_flags maintenance Jan Beulich
  2023-09-15  8:59 ` [PATCH 2/3] x86: drop cpu_arch_tune_flags Jan Beulich
@ 2023-09-15  9:00 ` Jan Beulich
  2 siblings, 0 replies; 4+ messages in thread
From: Jan Beulich @ 2023-09-15  9:00 UTC (permalink / raw)
  To: Binutils; +Cc: H.J. Lu

AVX-* features / insns paralleling earlier introduced AVX512* ones can
be encoded more compactly when the respective feature was explicitly
enabled by the user.
---
TBD: The non-xy forms of vcvtneps2bf16 don't fit the pattern, for having
     several more variants.

--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -255,6 +255,7 @@ enum i386_error
     no_default_mask,
     unsupported_rc_sae,
     invalid_register_operand,
+    internal_error,
   };
 
 struct _i386_insn
@@ -5359,6 +5360,9 @@ md_assemble (char *line)
 	case invalid_register_operand:
 	  err_msg = _("invalid register operand");
 	  break;
+	case internal_error:
+	  err_msg = _("internal error");
+	  break;
 	}
       as_bad (_("%s for `%s'"), err_msg,
 	      pass1_mnem ? pass1_mnem : insn_name (current_templates->start));
@@ -7447,6 +7451,33 @@ match_template (char mnem_suffix)
 	  continue;
 	}
 
+      /* Check whether to use the shorter VEX encoding for certain insns where
+	 the EVEX enconding comes first in the table.  This requires the respective
+	 AVX-* feature to be explicitly enabled.  */
+      if (t == current_templates->start
+	  && t->opcode_modifier.disp8memshift
+	  && !t->opcode_modifier.vex
+	  && !need_evex_encoding ()
+	  && t + 1 < current_templates->end
+	  && t[1].opcode_modifier.vex)
+	{
+	  i386_cpu_flags cpu;
+	  unsigned int memshift = i.memshift;
+
+	  i.memshift = 0;
+	  cpu = cpu_flags_and (cpu_flags_from_attr (t[1].cpu), cpu_arch_isa_flags);
+	  if (!cpu_flags_all_zero (&cpu)
+	      && (!i.types[0].bitfield.disp8
+		  || !operand_type_check (i.types[0], disp)
+		  || i.op[0].disps->X_op != O_constant
+		  || fits_in_disp8 (i.op[0].disps->X_add_number)))
+	    {
+	      specific_error = progress (internal_error);
+	      continue;
+	    }
+	  i.memshift = memshift;
+	}
+
       /* We've found a match; break out of loop.  */
       break;
     }
--- a/gas/testsuite/gas/i386/avx-ifma-intel.d
+++ b/gas/testsuite/gas/i386/avx-ifma-intel.d
@@ -32,6 +32,6 @@ Disassembly of section .text:
 [ 	]*[a-f0-9]+:[ 	]*c4 e2 f9 b5 c0[ 	]*\{vex\} vpmadd52huq xmm0,xmm0,xmm0
 [ 	]*[a-f0-9]+:[ 	]*c4 e2 fd b5 c0[ 	]*\{vex\} vpmadd52huq ymm0,ymm0,ymm0
 [ 	]*[a-f0-9]+:[ 	]*c4 e2 f9 b5 c0[ 	]*\{vex\} vpmadd52huq xmm0,xmm0,xmm0
-[ 	]*[a-f0-9]+:[ 	]*62 f2 dd 08 b5 d2[ 	]*vpmadd52huq xmm2,xmm4,xmm2
-[ 	]*[a-f0-9]+:[ 	]*62 f2 dd 28 b5 d2[ 	]*vpmadd52huq ymm2,ymm4,ymm2
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 d9 b5 d2[ 	]*\{vex\} vpmadd52huq xmm2,xmm4,xmm2
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 dd b5 d2[ 	]*\{vex\} vpmadd52huq ymm2,ymm4,ymm2
 #pass
--- a/gas/testsuite/gas/i386/avx-ifma.d
+++ b/gas/testsuite/gas/i386/avx-ifma.d
@@ -32,6 +32,6 @@ Disassembly of section .text:
 [ 	]*[a-f0-9]+:[ 	]*c4 e2 f9 b5 c0[ 	]*\{vex\} vpmadd52huq %xmm0,%xmm0,%xmm0
 [ 	]*[a-f0-9]+:[ 	]*c4 e2 fd b5 c0[ 	]*\{vex\} vpmadd52huq %ymm0,%ymm0,%ymm0
 [ 	]*[a-f0-9]+:[ 	]*c4 e2 f9 b5 c0[ 	]*\{vex\} vpmadd52huq %xmm0,%xmm0,%xmm0
-[ 	]*[a-f0-9]+:[ 	]*62 f2 dd 08 b5 d2[ 	]*vpmadd52huq %xmm2,%xmm4,%xmm2
-[ 	]*[a-f0-9]+:[ 	]*62 f2 dd 28 b5 d2[ 	]*vpmadd52huq %ymm2,%ymm4,%ymm2
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 d9 b5 d2[ 	]*\{vex\} vpmadd52huq %xmm2,%xmm4,%xmm2
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 dd b5 d2[ 	]*\{vex\} vpmadd52huq %ymm2,%ymm4,%ymm2
 #pass
--- /dev/null
+++ b/gas/testsuite/gas/i386/avx-vex.l
@@ -0,0 +1,181 @@
+.*: Assembler messages:
+.*:15: Error: .* `vpmadd52luq'
+.*:21: Error: .* `vcvtneps2bf16y'
+.*:15: Error: .* `vpmadd52luq'
+.*:21: Error: .* `vcvtneps2bf16y'
+#...
+[ 	]*[0-9]+[ 	]+\.irp isa, default, .*
+#...
+[ 	]*[0-9]+[ 	]+\.endr
+#...
+[ 	]*[0-9]+[ 	]+>  \.arch default
+[ 	]*[0-9]+[ 	]+>  \.arch default
+[ 	]*[0-9]+[ 	]+> *
+[ 	]*[0-9]+[ 	]+\?\?\?\? 62F27528 	>  vpdpbusd %ymm0,%ymm1,%ymm2
+[ 	]*[0-9]+[ 	]+50D0
+[ 	]*[0-9]+[ 	]+\?\?\?\? 62F27528 	>  vpdpbusd 0x20\(%eax\),%ymm1,%ymm2
+[ 	]*[0-9]+[ 	]+505001
+[ 	]*[0-9]+[ 	]+\?\?\?\? 62F27528 	>  vpdpbusd 0x100\(%eax\),%ymm1,%ymm2
+[ 	]*[0-9]+[ 	]+505008
+[ 	]*[0-9]+[ 	]+> *
+[ 	]*[0-9]+[ 	]+\?\?\?\? 62F2F528 	>  vpmadd52luq %ymm0,%ymm1,%ymm2
+[ 	]*[0-9]+[ 	]+B4D0
+[ 	]*[0-9]+[ 	]+\?\?\?\? 62F2F528 	>  vpmadd52luq 0x20\(%eax\),%ymm1,%ymm2
+[ 	]*[0-9]+[ 	]+B45001
+[ 	]*[0-9]+[ 	]+\?\?\?\? 62F2F528 	>  vpmadd52luq 0x100\(%eax\),%ymm1,%ymm2
+[ 	]*[0-9]+[ 	]+B45008
+[ 	]*[0-9]+[ 	]+\?\?\?\? 62F2F538 	>  vpmadd52luq \(%eax\)\{1to4\},%ymm1,%ymm2
+[ 	]*[0-9]+[ 	]+B410
+[ 	]*[0-9]+[ 	]+> *
+[ 	]*[0-9]+[ 	]+>.*
+[ 	]*[0-9]+[ 	]+\?\?\?\? 62F27E28 	>  vcvtneps2bf16y %ymm0,%xmm1
+[ 	]*[0-9]+[ 	]+72C8
+[ 	]*[0-9]+[ 	]+\?\?\?\? 62F27E28 	>  vcvtneps2bf16y 0x20\(%eax\),%xmm1
+[ 	]*[0-9]+[ 	]+724801
+[ 	]*[0-9]+[ 	]+\?\?\?\? 62F27E28 	>  vcvtneps2bf16y 0x100\(%eax\),%xmm1
+[ 	]*[0-9]+[ 	]+724808
+[ 	]*[0-9]+[ 	]+\?\?\?\? 62F27E38 	>  vcvtneps2bf16y \(%eax\)\{1to8\},%xmm1
+[ 	]*[0-9]+[ 	]+7208
+#...
+[ 	]*[0-9]+[ 	]+>  \.arch \.noavx512vl
+[ 	]*[0-9]+[ 	]+> *
+[ 	]*[0-9]+[ 	]+\?\?\?\? C4E27550 	>  vpdpbusd %ymm0,%ymm1,%ymm2
+[ 	]*[0-9]+[ 	]+D0
+[ 	]*[0-9]+[ 	]+\?\?\?\? C4E27550 	>  vpdpbusd 0x20\(%eax\),%ymm1,%ymm2
+[ 	]*[0-9]+[ 	]+5020
+[ 	]*[0-9]+[ 	]+\?\?\?\? C4E27550 	>  vpdpbusd 0x100\(%eax\),%ymm1,%ymm2
+[ 	]*[0-9]+[ 	]+90000100 *
+[ 	]*[0-9]+[ 	]+00
+[ 	]*[0-9]+[ 	]+> *
+[ 	]*[0-9]+[ 	]+\?\?\?\? C4E2F5B4 	>  vpmadd52luq %ymm0,%ymm1,%ymm2
+[ 	]*[0-9]+[ 	]+D0
+[ 	]*[0-9]+[ 	]+\?\?\?\? C4E2F5B4 	>  vpmadd52luq 0x20\(%eax\),%ymm1,%ymm2
+[ 	]*[0-9]+[ 	]+5020
+[ 	]*[0-9]+[ 	]+\?\?\?\? C4E2F5B4 	>  vpmadd52luq 0x100\(%eax\),%ymm1,%ymm2
+[ 	]*[0-9]+[ 	]+90000100 *
+[ 	]*[0-9]+[ 	]+00
+[ 	]*[0-9]+[ 	]+>  vpmadd52luq \(%eax\)\{1to4\},%ymm1,%ymm2
+[ 	]*[0-9]+[ 	]+> *
+[ 	]*[0-9]+[ 	]+>.*
+[ 	]*[0-9]+[ 	]+\?\?\?\? C4E27E72 	>  vcvtneps2bf16y %ymm0,%xmm1
+[ 	]*[0-9]+[ 	]+C8
+[ 	]*[0-9]+[ 	]+\?\?\?\? C4E27E72 	>  vcvtneps2bf16y 0x20\(%eax\),%xmm1
+[ 	]*[0-9]+[ 	]+4820
+[ 	]*[0-9]+[ 	]+\?\?\?\? C4E27E72 	>  vcvtneps2bf16y 0x100\(%eax\),%xmm1
+[ 	]*[0-9]+[ 	]+88000100 *
+[ 	]*[0-9]+[ 	]+00
+[ 	]*[0-9]+[ 	]+>  vcvtneps2bf16y \(%eax\)\{1to8\},%xmm1
+#...
+[ 	]*[0-9]+[ 	]+>  \.arch \.noavx512f
+[ 	]*[0-9]+[ 	]+> *
+[ 	]*[0-9]+[ 	]+\?\?\?\? C4E27550 	>  vpdpbusd %ymm0,%ymm1,%ymm2
+[ 	]*[0-9]+[ 	]+D0
+[ 	]*[0-9]+[ 	]+\?\?\?\? C4E27550 	>  vpdpbusd 0x20\(%eax\),%ymm1,%ymm2
+[ 	]*[0-9]+[ 	]+5020
+[ 	]*[0-9]+[ 	]+\?\?\?\? C4E27550 	>  vpdpbusd 0x100\(%eax\),%ymm1,%ymm2
+[ 	]*[0-9]+[ 	]+90000100 *
+[ 	]*[0-9]+[ 	]+00
+[ 	]*[0-9]+[ 	]+> *
+[ 	]*[0-9]+[ 	]+\?\?\?\? C4E2F5B4 	>  vpmadd52luq %ymm0,%ymm1,%ymm2
+[ 	]*[0-9]+[ 	]+D0
+[ 	]*[0-9]+[ 	]+\?\?\?\? C4E2F5B4 	>  vpmadd52luq 0x20\(%eax\),%ymm1,%ymm2
+[ 	]*[0-9]+[ 	]+5020
+[ 	]*[0-9]+[ 	]+\?\?\?\? C4E2F5B4 	>  vpmadd52luq 0x100\(%eax\),%ymm1,%ymm2
+[ 	]*[0-9]+[ 	]+90000100 *
+[ 	]*[0-9]+[ 	]+00
+[ 	]*[0-9]+[ 	]+>  vpmadd52luq \(%eax\)\{1to4\},%ymm1,%ymm2
+[ 	]*[0-9]+[ 	]+> *
+[ 	]*[0-9]+[ 	]+>.*
+[ 	]*[0-9]+[ 	]+\?\?\?\? C4E27E72 	>  vcvtneps2bf16y %ymm0,%xmm1
+[ 	]*[0-9]+[ 	]+C8
+[ 	]*[0-9]+[ 	]+\?\?\?\? C4E27E72 	>  vcvtneps2bf16y 0x20\(%eax\),%xmm1
+[ 	]*[0-9]+[ 	]+4820
+[ 	]*[0-9]+[ 	]+\?\?\?\? C4E27E72 	>  vcvtneps2bf16y 0x100\(%eax\),%xmm1
+[ 	]*[0-9]+[ 	]+88000100 *
+[ 	]*[0-9]+[ 	]+00
+[ 	]*[0-9]+[ 	]+>  vcvtneps2bf16y \(%eax\)\{1to8\},%xmm1
+#...
+[ 	]*[0-9]+[ 	]+>  \.arch \.avx_vnni
+[ 	]*[0-9]+[ 	]+> *
+[ 	]*[0-9]+[ 	]+\?\?\?\? C4E27550 	>  vpdpbusd %ymm0,%ymm1,%ymm2
+[ 	]*[0-9]+[ 	]+D0
+[ 	]*[0-9]+[ 	]+\?\?\?\? C4E27550 	>  vpdpbusd 0x20\(%eax\),%ymm1,%ymm2
+[ 	]*[0-9]+[ 	]+5020
+[ 	]*[0-9]+[ 	]+\?\?\?\? 62F27528 	>  vpdpbusd 0x100\(%eax\),%ymm1,%ymm2
+[ 	]*[0-9]+[ 	]+505008
+[ 	]*[0-9]+[ 	]+> *
+[ 	]*[0-9]+[ 	]+\?\?\?\? 62F2F528 	>  vpmadd52luq %ymm0,%ymm1,%ymm2
+[ 	]*[0-9]+[ 	]+B4D0
+[ 	]*[0-9]+[ 	]+\?\?\?\? 62F2F528 	>  vpmadd52luq 0x20\(%eax\),%ymm1,%ymm2
+[ 	]*[0-9]+[ 	]+B45001
+[ 	]*[0-9]+[ 	]+\?\?\?\? 62F2F528 	>  vpmadd52luq 0x100\(%eax\),%ymm1,%ymm2
+[ 	]*[0-9]+[ 	]+B45008
+[ 	]*[0-9]+[ 	]+\?\?\?\? 62F2F538 	>  vpmadd52luq \(%eax\)\{1to4\},%ymm1,%ymm2
+[ 	]*[0-9]+[ 	]+B410
+[ 	]*[0-9]+[ 	]+> *
+[ 	]*[0-9]+[ 	]+>.*
+[ 	]*[0-9]+[ 	]+\?\?\?\? 62F27E28 	>  vcvtneps2bf16y %ymm0,%xmm1
+[ 	]*[0-9]+[ 	]+72C8
+[ 	]*[0-9]+[ 	]+\?\?\?\? 62F27E28 	>  vcvtneps2bf16y 0x20\(%eax\),%xmm1
+[ 	]*[0-9]+[ 	]+724801
+[ 	]*[0-9]+[ 	]+\?\?\?\? 62F27E28 	>  vcvtneps2bf16y 0x100\(%eax\),%xmm1
+[ 	]*[0-9]+[ 	]+724808
+[ 	]*[0-9]+[ 	]+\?\?\?\? 62F27E38 	>  vcvtneps2bf16y \(%eax\)\{1to8\},%xmm1
+[ 	]*[0-9]+[ 	]+7208
+#...
+[ 	]*[0-9]+[ 	]+>  \.arch \.avx_ifma
+[ 	]*[0-9]+[ 	]+> *
+[ 	]*[0-9]+[ 	]+\?\?\?\? 62F27528 	>  vpdpbusd %ymm0,%ymm1,%ymm2
+[ 	]*[0-9]+[ 	]+50D0
+[ 	]*[0-9]+[ 	]+\?\?\?\? 62F27528 	>  vpdpbusd 0x20\(%eax\),%ymm1,%ymm2
+[ 	]*[0-9]+[ 	]+505001
+[ 	]*[0-9]+[ 	]+\?\?\?\? 62F27528 	>  vpdpbusd 0x100\(%eax\),%ymm1,%ymm2
+[ 	]*[0-9]+[ 	]+505008
+[ 	]*[0-9]+[ 	]+> *
+[ 	]*[0-9]+[ 	]+\?\?\?\? C4E2F5B4 	>  vpmadd52luq %ymm0,%ymm1,%ymm2
+[ 	]*[0-9]+[ 	]+D0
+[ 	]*[0-9]+[ 	]+\?\?\?\? C4E2F5B4 	>  vpmadd52luq 0x20\(%eax\),%ymm1,%ymm2
+[ 	]*[0-9]+[ 	]+5020
+[ 	]*[0-9]+[ 	]+\?\?\?\? 62F2F528 	>  vpmadd52luq 0x100\(%eax\),%ymm1,%ymm2
+[ 	]*[0-9]+[ 	]+B45008
+[ 	]*[0-9]+[ 	]+\?\?\?\? 62F2F538 	>  vpmadd52luq \(%eax\)\{1to4\},%ymm1,%ymm2
+[ 	]*[0-9]+[ 	]+B410
+[ 	]*[0-9]+[ 	]+> *
+[ 	]*[0-9]+[ 	]+>.*
+[ 	]*[0-9]+[ 	]+\?\?\?\? 62F27E28 	>  vcvtneps2bf16y %ymm0,%xmm1
+[ 	]*[0-9]+[ 	]+72C8
+[ 	]*[0-9]+[ 	]+\?\?\?\? 62F27E28 	>  vcvtneps2bf16y 0x20\(%eax\),%xmm1
+[ 	]*[0-9]+[ 	]+724801
+[ 	]*[0-9]+[ 	]+\?\?\?\? 62F27E28 	>  vcvtneps2bf16y 0x100\(%eax\),%xmm1
+[ 	]*[0-9]+[ 	]+724808
+[ 	]*[0-9]+[ 	]+\?\?\?\? 62F27E38 	>  vcvtneps2bf16y \(%eax\)\{1to8\},%xmm1
+[ 	]*[0-9]+[ 	]+7208
+#...
+[ 	]*[0-9]+[ 	]+>  \.arch \.avx_ne_convert
+[ 	]*[0-9]+[ 	]+> *
+[ 	]*[0-9]+[ 	]+\?\?\?\? 62F27528 	>  vpdpbusd %ymm0,%ymm1,%ymm2
+[ 	]*[0-9]+[ 	]+50D0
+[ 	]*[0-9]+[ 	]+\?\?\?\? 62F27528 	>  vpdpbusd 0x20\(%eax\),%ymm1,%ymm2
+[ 	]*[0-9]+[ 	]+505001
+[ 	]*[0-9]+[ 	]+\?\?\?\? 62F27528 	>  vpdpbusd 0x100\(%eax\),%ymm1,%ymm2
+[ 	]*[0-9]+[ 	]+505008
+[ 	]*[0-9]+[ 	]+> *
+[ 	]*[0-9]+[ 	]+\?\?\?\? 62F2F528 	>  vpmadd52luq %ymm0,%ymm1,%ymm2
+[ 	]*[0-9]+[ 	]+B4D0
+[ 	]*[0-9]+[ 	]+\?\?\?\? 62F2F528 	>  vpmadd52luq 0x20\(%eax\),%ymm1,%ymm2
+[ 	]*[0-9]+[ 	]+B45001
+[ 	]*[0-9]+[ 	]+\?\?\?\? 62F2F528 	>  vpmadd52luq 0x100\(%eax\),%ymm1,%ymm2
+[ 	]*[0-9]+[ 	]+B45008
+[ 	]*[0-9]+[ 	]+\?\?\?\? 62F2F538 	>  vpmadd52luq \(%eax\)\{1to4\},%ymm1,%ymm2
+[ 	]*[0-9]+[ 	]+B410
+[ 	]*[0-9]+[ 	]+> *
+[ 	]*[0-9]+[ 	]+>.*
+[ 	]*[0-9]+[ 	]+\?\?\?\? C4E27E72 	>  vcvtneps2bf16y %ymm0,%xmm1
+[ 	]*[0-9]+[ 	]+C8
+[ 	]*[0-9]+[ 	]+\?\?\?\? C4E27E72 	>  vcvtneps2bf16y 0x20\(%eax\),%xmm1
+[ 	]*[0-9]+[ 	]+4820
+[ 	]*[0-9]+[ 	]+\?\?\?\? 62F27E28 	>  vcvtneps2bf16y 0x100\(%eax\),%xmm1
+[ 	]*[0-9]+[ 	]+724808
+[ 	]*[0-9]+[ 	]+\?\?\?\? 62F27E38 	>  vcvtneps2bf16y \(%eax\)\{1to8\},%xmm1
+[ 	]*[0-9]+[ 	]+7208
+#pass
--- /dev/null
+++ b/gas/testsuite/gas/i386/avx-vex.s
@@ -0,0 +1,23 @@
+	.text
+
+	.irp isa, default, .noavx512vl, .noavx512f, .avx_vnni, .avx_ifma, .avx_ne_convert
+
+	.arch default
+	.arch \isa
+
+	vpdpbusd	%ymm0, %ymm1, %ymm2
+	vpdpbusd	0x20(%eax), %ymm1, %ymm2
+	vpdpbusd	0x100(%eax), %ymm1, %ymm2
+
+	vpmadd52luq	%ymm0, %ymm1, %ymm2
+	vpmadd52luq	0x20(%eax), %ymm1, %ymm2
+	vpmadd52luq	0x100(%eax), %ymm1, %ymm2
+	vpmadd52luq	(%eax){1to4}, %ymm1, %ymm2
+
+#	vcvtneps2bf16	%ymm0, %xmm1
+	vcvtneps2bf16y	%ymm0, %xmm1
+	vcvtneps2bf16y	0x20(%eax), %xmm1
+	vcvtneps2bf16y	0x100(%eax), %xmm1
+	vcvtneps2bf16y	(%eax){1to8}, %xmm1
+
+	.endr
--- a/gas/testsuite/gas/i386/avx-vnni.d
+++ b/gas/testsuite/gas/i386/avx-vnni.d
@@ -38,6 +38,6 @@ Disassembly of section .text:
  +[a-f0-9]+:	c4 e2 79 50 c0       	\{vex\} vpdpbusd %xmm0,%xmm0,%xmm0
  +[a-f0-9]+:	c4 e2 7d 50 c0       	\{vex\} vpdpbusd %ymm0,%ymm0,%ymm0
  +[a-f0-9]+:	c4 e2 79 50 c0       	\{vex\} vpdpbusd %xmm0,%xmm0,%xmm0
- +[a-f0-9]+:	62 f2 5d 08 50 d2    	vpdpbusd %xmm2,%xmm4,%xmm2
+ +[a-f0-9]+:	c4 e2 59 50 d2       	\{vex\} vpdpbusd %xmm2,%xmm4,%xmm2
  +[a-f0-9]+:	c4 e2 59 50 91 f0 07 00 00 	\{vex\} vpdpbusd 0x7f0\(%ecx\),%xmm4,%xmm2
 #pass
--- a/gas/testsuite/gas/i386/i386.exp
+++ b/gas/testsuite/gas/i386/i386.exp
@@ -495,6 +495,7 @@ if [gas_32_check] then {
     run_list_test "msrlist-inval"
     run_dump_test "avx-ne-convert"
     run_dump_test "avx-ne-convert-intel"
+    run_list_test "avx-vex" "-almn"
     run_dump_test "raoint"
     run_dump_test "raoint-intel"
     run_list_test "amx-complex-inval"
--- a/gas/testsuite/gas/i386/x86-64-avx-ifma-intel.d
+++ b/gas/testsuite/gas/i386/x86-64-avx-ifma-intel.d
@@ -29,6 +29,6 @@ Disassembly of section .text:
 [ 	]*[a-f0-9]+:[ 	]*c4 c2 dd b4 d4[ 	]*\{vex\} vpmadd52luq ymm2,ymm4,ymm12
 [ 	]*[a-f0-9]+:[ 	]*c4 e2 dd b4 11[ 	]*\{vex\} vpmadd52luq ymm2,ymm4,YMMWORD PTR \[rcx\]
 [ 	]*[a-f0-9]+:[ 	]*62 b2 dd 28 b4 d6[ 	]*vpmadd52luq ymm2,ymm4,ymm22
-[ 	]*[a-f0-9]+:[ 	]*62 d2 dd 08 b5 d4[ 	]*vpmadd52huq xmm2,xmm4,xmm12
-[ 	]*[a-f0-9]+:[ 	]*62 d2 dd 28 b5 d4[ 	]*vpmadd52huq ymm2,ymm4,ymm12
+[ 	]*[a-f0-9]+:[ 	]*c4 c2 d9 b5 d4[ 	]*\{vex\} vpmadd52huq xmm2,xmm4,xmm12
+[ 	]*[a-f0-9]+:[ 	]*c4 c2 dd b5 d4[ 	]*\{vex\} vpmadd52huq ymm2,ymm4,ymm12
 #pass
--- a/gas/testsuite/gas/i386/x86-64-avx-ifma.d
+++ b/gas/testsuite/gas/i386/x86-64-avx-ifma.d
@@ -29,6 +29,6 @@ Disassembly of section .text:
 [ 	]*[a-f0-9]+:[ 	]*c4 c2 dd b4 d4[ 	]*\{vex\} vpmadd52luq %ymm12,%ymm4,%ymm2
 [ 	]*[a-f0-9]+:[ 	]*c4 e2 dd b4 11[ 	]*\{vex\} vpmadd52luq \(%rcx\),%ymm4,%ymm2
 [ 	]*[a-f0-9]+:[ 	]*62 b2 dd 28 b4 d6[ 	]*vpmadd52luq %ymm22,%ymm4,%ymm2
-[ 	]*[a-f0-9]+:[ 	]*62 d2 dd 08 b5 d4[ 	]*vpmadd52huq %xmm12,%xmm4,%xmm2
-[ 	]*[a-f0-9]+:[ 	]*62 d2 dd 28 b5 d4[ 	]*vpmadd52huq %ymm12,%ymm4,%ymm2
+[ 	]*[a-f0-9]+:[ 	]*c4 c2 d9 b5 d4[ 	]*\{vex\} vpmadd52huq %xmm12,%xmm4,%xmm2
+[ 	]*[a-f0-9]+:[ 	]*c4 c2 dd b5 d4[ 	]*\{vex\} vpmadd52huq %ymm12,%ymm4,%ymm2
 #pass
--- a/gas/testsuite/gas/i386/x86-64-avx-vnni.d
+++ b/gas/testsuite/gas/i386/x86-64-avx-vnni.d
@@ -35,6 +35,6 @@ Disassembly of section .text:
  +[a-f0-9]+:	c4 e2 59 53 11       	\{vex\} vpdpwssds \(%rcx\),%xmm4,%xmm2
  +[a-f0-9]+:	c4 e2 59 53 11       	\{vex\} vpdpwssds \(%rcx\),%xmm4,%xmm2
  +[a-f0-9]+:	62 b2 5d 08 53 d6    	vpdpwssds %xmm22,%xmm4,%xmm2
- +[a-f0-9]+:	62 d2 5d 08 50 d4    	vpdpbusd %xmm12,%xmm4,%xmm2
+ +[a-f0-9]+:	c4 c2 59 50 d4       	\{vex\} vpdpbusd %xmm12,%xmm4,%xmm2
  +[a-f0-9]+:	c4 e2 59 50 91 f0 07 00 00 	\{vex\} vpdpbusd 0x7f0\(%rcx\),%xmm4,%xmm2
 #pass


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2023-09-15  9:00 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-09-15  8:58 [PATCH 0/3] x86: improve encoding selection and prereq tidying Jan Beulich
2023-09-15  8:59 ` [PATCH 1/3] x86: correct cpu_arch_isa_flags maintenance Jan Beulich
2023-09-15  8:59 ` [PATCH 2/3] x86: drop cpu_arch_tune_flags Jan Beulich
2023-09-15  9:00 ` [PATCH 3/3] x86: prefer VEX encodings over EVEX ones when possible Jan Beulich

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).