* [PATCH 1/3] x86: correct cpu_arch_isa_flags maintenance
2023-09-15 8:58 [PATCH 0/3] x86: improve encoding selection and prereq tidying Jan Beulich
@ 2023-09-15 8:59 ` Jan Beulich
2023-09-15 8:59 ` [PATCH 2/3] x86: drop cpu_arch_tune_flags Jan Beulich
2023-09-15 9:00 ` [PATCH 3/3] x86: prefer VEX encodings over EVEX ones when possible Jan Beulich
2 siblings, 0 replies; 4+ messages in thread
From: Jan Beulich @ 2023-09-15 8:59 UTC (permalink / raw)
To: Binutils; +Cc: H.J. Lu
These may not be set from a value derived from cpu_arch_flags: That
starts with (almost) all functionality enabled, while cpu_arch_isa_flags
is supposed to track features that were explicitly enabled (and perhaps
later disabled) by the user.
To avoid needing to do any such adjustment in two places (each),
introduce helper functions used by both command line handling and
directive processing.
---
While setting of vector_size could be moved into isa_disable() (further
reducing code duplication), the same isn't true for isa_enable().
Because of the asymmetry I didn't do so. Thoughts / opinions?
--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -2804,13 +2804,41 @@ check_cpu_arch_compatible (const char *n
}
static void
-extend_cpu_sub_arch_name (const char *name)
+extend_cpu_sub_arch_name (const char *pfx, const char *name)
{
if (cpu_sub_arch_name)
cpu_sub_arch_name = reconcat (cpu_sub_arch_name, cpu_sub_arch_name,
- ".", name, (const char *) NULL);
+ pfx, name, (const char *) NULL);
else
- cpu_sub_arch_name = concat (".", name, (const char *) NULL);
+ cpu_sub_arch_name = concat (pfx, name, (const char *) NULL);
+}
+
+static void isa_enable (unsigned int idx)
+{
+ i386_cpu_flags flags = cpu_flags_or (cpu_arch_flags, cpu_arch[idx].enable);
+
+ if (!cpu_flags_equal (&flags, &cpu_arch_flags))
+ {
+ extend_cpu_sub_arch_name (".", cpu_arch[idx].name);
+ cpu_arch_flags = flags;
+ }
+
+ cpu_arch_isa_flags = cpu_flags_or (cpu_arch_isa_flags, cpu_arch[idx].enable);
+}
+
+static void isa_disable (unsigned int idx)
+{
+ i386_cpu_flags flags
+ = cpu_flags_and_not (cpu_arch_flags, cpu_arch[idx].disable);
+
+ if (!cpu_flags_equal (&flags, &cpu_arch_flags))
+ {
+ extend_cpu_sub_arch_name (".no", cpu_arch[idx].name);
+ cpu_arch_flags = flags;
+ }
+
+ cpu_arch_isa_flags
+ = cpu_flags_and_not (cpu_arch_isa_flags, cpu_arch[idx].disable);
}
static void
@@ -2834,7 +2862,6 @@ set_cpu_arch (int dummy ATTRIBUTE_UNUSED
int e;
const char *string;
unsigned int j = 0;
- i386_cpu_flags flags;
SKIP_WHITESPACE ();
@@ -2987,17 +3014,7 @@ set_cpu_arch (int dummy ATTRIBUTE_UNUSED
if (cpu_flags_all_zero (&cpu_arch[j].enable))
continue;
- flags = cpu_flags_or (cpu_arch_flags, cpu_arch[j].enable);
-
- if (!cpu_flags_equal (&flags, &cpu_arch_flags))
- {
- extend_cpu_sub_arch_name (string + 1);
- cpu_arch_flags = flags;
- cpu_arch_isa_flags = flags;
- }
- else
- cpu_arch_isa_flags
- = cpu_flags_or (cpu_arch_isa_flags, cpu_arch[j].enable);
+ isa_enable (j);
(void) restore_line_pointer (e);
@@ -3044,13 +3061,7 @@ set_cpu_arch (int dummy ATTRIBUTE_UNUSED
if (cpu_arch[j].type == PROCESSOR_NONE
&& strcmp (string + 3, cpu_arch[j].name) == 0)
{
- flags = cpu_flags_and_not (cpu_arch_flags, cpu_arch[j].disable);
- if (!cpu_flags_equal (&flags, &cpu_arch_flags))
- {
- extend_cpu_sub_arch_name (string + 1);
- cpu_arch_flags = flags;
- cpu_arch_isa_flags = flags;
- }
+ isa_disable (j);
if (cpu_arch[j].vsz == vsz_set)
vector_size = VSZ_DEFAULT;
@@ -14598,21 +14609,7 @@ md_parse_option (int c, const char *arg)
&& !cpu_flags_all_zero (&cpu_arch[j].enable))
{
/* ISA extension. */
- i386_cpu_flags flags;
-
- flags = cpu_flags_or (cpu_arch_flags,
- cpu_arch[j].enable);
-
- if (!cpu_flags_equal (&flags, &cpu_arch_flags))
- {
- extend_cpu_sub_arch_name (arch);
- cpu_arch_flags = flags;
- cpu_arch_isa_flags = flags;
- }
- else
- cpu_arch_isa_flags
- = cpu_flags_or (cpu_arch_isa_flags,
- cpu_arch[j].enable);
+ isa_enable (j);
switch (cpu_arch[j].vsz)
{
@@ -14655,16 +14652,7 @@ md_parse_option (int c, const char *arg)
if (cpu_arch[j].type == PROCESSOR_NONE
&& strcmp (arch + 2, cpu_arch[j].name) == 0)
{
- i386_cpu_flags flags;
-
- flags = cpu_flags_and_not (cpu_arch_flags,
- cpu_arch[j].disable);
- if (!cpu_flags_equal (&flags, &cpu_arch_flags))
- {
- extend_cpu_sub_arch_name (arch);
- cpu_arch_flags = flags;
- cpu_arch_isa_flags = flags;
- }
+ isa_disable (j);
if (cpu_arch[j].vsz == vsz_set)
vector_size = VSZ_DEFAULT;
break;
--- a/gas/testsuite/gas/i386/i386.exp
+++ b/gas/testsuite/gas/i386/i386.exp
@@ -147,6 +147,7 @@ if [gas_32_check] then {
run_dump_test "nops-6"
run_dump_test "nops-7"
run_dump_test "nops-8"
+ run_dump_test "nops-9"
run_dump_test "noreg16"
run_list_test "noreg16"
run_dump_test "noreg16-data32"
--- /dev/null
+++ b/gas/testsuite/gas/i386/nops-9.d
@@ -0,0 +1,28 @@
+#objdump: -drw
+#name: i386 nops 9
+
+.*: +file format .*
+
+Disassembly of section .text:
+
+0+ <default>:
+[ ]*[a-f0-9]+: 0f be f0 movsbl %al,%esi
+[ ]*[a-f0-9]+: 8d b4 26 00 00 00 00 lea 0x0\(%esi,%eiz,1\),%esi
+[ ]*[a-f0-9]+: 8d b6 00 00 00 00 lea 0x0\(%esi\),%esi
+
+0+10 <nopopcnt>:
+[ ]*[a-f0-9]+: 0f be f0 movsbl %al,%esi
+[ ]*[a-f0-9]+: 8d b4 26 00 00 00 00 lea 0x0\(%esi,%eiz,1\),%esi
+[ ]*[a-f0-9]+: 8d b6 00 00 00 00 lea 0x0\(%esi\),%esi
+
+0+20 <popcnt>:
+[ ]*[a-f0-9]+: f3 0f b8 f0 popcnt %eax,%esi
+[ ]*[a-f0-9]+: 8d b4 26 00 00 00 00 lea 0x0\(%esi,%eiz,1\),%esi
+[ ]*[a-f0-9]+: 8d 74 26 00 lea 0x0\(%esi,%eiz,1\),%esi
+[ ]*[a-f0-9]+: 90 nop
+
+0+30 <nop>:
+[ ]*[a-f0-9]+: 0f be f0 movsbl %al,%esi
+[ ]*[a-f0-9]+: 66 66 2e 0f 1f 84 00 00 00 00 00 data16 nopw %cs:0x0\(%eax,%eax,1\)
+[ ]*[a-f0-9]+: 66 90 xchg %ax,%ax
+#pass
--- /dev/null
+++ b/gas/testsuite/gas/i386/nops-9.s
@@ -0,0 +1,19 @@
+ .text
+default:
+ movsbl %al,%esi
+ .p2align 4
+
+ .arch .nopopcnt
+nopopcnt:
+ movsbl %al,%esi
+ .p2align 4
+
+ .arch .popcnt
+popcnt:
+ popcnt %eax,%esi
+ .p2align 4
+
+ .arch .nop
+nop:
+ movsbl %al,%esi
+ .p2align 4
^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH 3/3] x86: prefer VEX encodings over EVEX ones when possible
2023-09-15 8:58 [PATCH 0/3] x86: improve encoding selection and prereq tidying Jan Beulich
2023-09-15 8:59 ` [PATCH 1/3] x86: correct cpu_arch_isa_flags maintenance Jan Beulich
2023-09-15 8:59 ` [PATCH 2/3] x86: drop cpu_arch_tune_flags Jan Beulich
@ 2023-09-15 9:00 ` Jan Beulich
2 siblings, 0 replies; 4+ messages in thread
From: Jan Beulich @ 2023-09-15 9:00 UTC (permalink / raw)
To: Binutils; +Cc: H.J. Lu
AVX-* features / insns paralleling earlier introduced AVX512* ones can
be encoded more compactly when the respective feature was explicitly
enabled by the user.
---
TBD: The non-xy forms of vcvtneps2bf16 don't fit the pattern, for having
several more variants.
--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -255,6 +255,7 @@ enum i386_error
no_default_mask,
unsupported_rc_sae,
invalid_register_operand,
+ internal_error,
};
struct _i386_insn
@@ -5359,6 +5360,9 @@ md_assemble (char *line)
case invalid_register_operand:
err_msg = _("invalid register operand");
break;
+ case internal_error:
+ err_msg = _("internal error");
+ break;
}
as_bad (_("%s for `%s'"), err_msg,
pass1_mnem ? pass1_mnem : insn_name (current_templates->start));
@@ -7447,6 +7451,33 @@ match_template (char mnem_suffix)
continue;
}
+ /* Check whether to use the shorter VEX encoding for certain insns where
+ the EVEX enconding comes first in the table. This requires the respective
+ AVX-* feature to be explicitly enabled. */
+ if (t == current_templates->start
+ && t->opcode_modifier.disp8memshift
+ && !t->opcode_modifier.vex
+ && !need_evex_encoding ()
+ && t + 1 < current_templates->end
+ && t[1].opcode_modifier.vex)
+ {
+ i386_cpu_flags cpu;
+ unsigned int memshift = i.memshift;
+
+ i.memshift = 0;
+ cpu = cpu_flags_and (cpu_flags_from_attr (t[1].cpu), cpu_arch_isa_flags);
+ if (!cpu_flags_all_zero (&cpu)
+ && (!i.types[0].bitfield.disp8
+ || !operand_type_check (i.types[0], disp)
+ || i.op[0].disps->X_op != O_constant
+ || fits_in_disp8 (i.op[0].disps->X_add_number)))
+ {
+ specific_error = progress (internal_error);
+ continue;
+ }
+ i.memshift = memshift;
+ }
+
/* We've found a match; break out of loop. */
break;
}
--- a/gas/testsuite/gas/i386/avx-ifma-intel.d
+++ b/gas/testsuite/gas/i386/avx-ifma-intel.d
@@ -32,6 +32,6 @@ Disassembly of section .text:
[ ]*[a-f0-9]+:[ ]*c4 e2 f9 b5 c0[ ]*\{vex\} vpmadd52huq xmm0,xmm0,xmm0
[ ]*[a-f0-9]+:[ ]*c4 e2 fd b5 c0[ ]*\{vex\} vpmadd52huq ymm0,ymm0,ymm0
[ ]*[a-f0-9]+:[ ]*c4 e2 f9 b5 c0[ ]*\{vex\} vpmadd52huq xmm0,xmm0,xmm0
-[ ]*[a-f0-9]+:[ ]*62 f2 dd 08 b5 d2[ ]*vpmadd52huq xmm2,xmm4,xmm2
-[ ]*[a-f0-9]+:[ ]*62 f2 dd 28 b5 d2[ ]*vpmadd52huq ymm2,ymm4,ymm2
+[ ]*[a-f0-9]+:[ ]*c4 e2 d9 b5 d2[ ]*\{vex\} vpmadd52huq xmm2,xmm4,xmm2
+[ ]*[a-f0-9]+:[ ]*c4 e2 dd b5 d2[ ]*\{vex\} vpmadd52huq ymm2,ymm4,ymm2
#pass
--- a/gas/testsuite/gas/i386/avx-ifma.d
+++ b/gas/testsuite/gas/i386/avx-ifma.d
@@ -32,6 +32,6 @@ Disassembly of section .text:
[ ]*[a-f0-9]+:[ ]*c4 e2 f9 b5 c0[ ]*\{vex\} vpmadd52huq %xmm0,%xmm0,%xmm0
[ ]*[a-f0-9]+:[ ]*c4 e2 fd b5 c0[ ]*\{vex\} vpmadd52huq %ymm0,%ymm0,%ymm0
[ ]*[a-f0-9]+:[ ]*c4 e2 f9 b5 c0[ ]*\{vex\} vpmadd52huq %xmm0,%xmm0,%xmm0
-[ ]*[a-f0-9]+:[ ]*62 f2 dd 08 b5 d2[ ]*vpmadd52huq %xmm2,%xmm4,%xmm2
-[ ]*[a-f0-9]+:[ ]*62 f2 dd 28 b5 d2[ ]*vpmadd52huq %ymm2,%ymm4,%ymm2
+[ ]*[a-f0-9]+:[ ]*c4 e2 d9 b5 d2[ ]*\{vex\} vpmadd52huq %xmm2,%xmm4,%xmm2
+[ ]*[a-f0-9]+:[ ]*c4 e2 dd b5 d2[ ]*\{vex\} vpmadd52huq %ymm2,%ymm4,%ymm2
#pass
--- /dev/null
+++ b/gas/testsuite/gas/i386/avx-vex.l
@@ -0,0 +1,181 @@
+.*: Assembler messages:
+.*:15: Error: .* `vpmadd52luq'
+.*:21: Error: .* `vcvtneps2bf16y'
+.*:15: Error: .* `vpmadd52luq'
+.*:21: Error: .* `vcvtneps2bf16y'
+#...
+[ ]*[0-9]+[ ]+\.irp isa, default, .*
+#...
+[ ]*[0-9]+[ ]+\.endr
+#...
+[ ]*[0-9]+[ ]+> \.arch default
+[ ]*[0-9]+[ ]+> \.arch default
+[ ]*[0-9]+[ ]+> *
+[ ]*[0-9]+[ ]+\?\?\?\? 62F27528 > vpdpbusd %ymm0,%ymm1,%ymm2
+[ ]*[0-9]+[ ]+50D0
+[ ]*[0-9]+[ ]+\?\?\?\? 62F27528 > vpdpbusd 0x20\(%eax\),%ymm1,%ymm2
+[ ]*[0-9]+[ ]+505001
+[ ]*[0-9]+[ ]+\?\?\?\? 62F27528 > vpdpbusd 0x100\(%eax\),%ymm1,%ymm2
+[ ]*[0-9]+[ ]+505008
+[ ]*[0-9]+[ ]+> *
+[ ]*[0-9]+[ ]+\?\?\?\? 62F2F528 > vpmadd52luq %ymm0,%ymm1,%ymm2
+[ ]*[0-9]+[ ]+B4D0
+[ ]*[0-9]+[ ]+\?\?\?\? 62F2F528 > vpmadd52luq 0x20\(%eax\),%ymm1,%ymm2
+[ ]*[0-9]+[ ]+B45001
+[ ]*[0-9]+[ ]+\?\?\?\? 62F2F528 > vpmadd52luq 0x100\(%eax\),%ymm1,%ymm2
+[ ]*[0-9]+[ ]+B45008
+[ ]*[0-9]+[ ]+\?\?\?\? 62F2F538 > vpmadd52luq \(%eax\)\{1to4\},%ymm1,%ymm2
+[ ]*[0-9]+[ ]+B410
+[ ]*[0-9]+[ ]+> *
+[ ]*[0-9]+[ ]+>.*
+[ ]*[0-9]+[ ]+\?\?\?\? 62F27E28 > vcvtneps2bf16y %ymm0,%xmm1
+[ ]*[0-9]+[ ]+72C8
+[ ]*[0-9]+[ ]+\?\?\?\? 62F27E28 > vcvtneps2bf16y 0x20\(%eax\),%xmm1
+[ ]*[0-9]+[ ]+724801
+[ ]*[0-9]+[ ]+\?\?\?\? 62F27E28 > vcvtneps2bf16y 0x100\(%eax\),%xmm1
+[ ]*[0-9]+[ ]+724808
+[ ]*[0-9]+[ ]+\?\?\?\? 62F27E38 > vcvtneps2bf16y \(%eax\)\{1to8\},%xmm1
+[ ]*[0-9]+[ ]+7208
+#...
+[ ]*[0-9]+[ ]+> \.arch \.noavx512vl
+[ ]*[0-9]+[ ]+> *
+[ ]*[0-9]+[ ]+\?\?\?\? C4E27550 > vpdpbusd %ymm0,%ymm1,%ymm2
+[ ]*[0-9]+[ ]+D0
+[ ]*[0-9]+[ ]+\?\?\?\? C4E27550 > vpdpbusd 0x20\(%eax\),%ymm1,%ymm2
+[ ]*[0-9]+[ ]+5020
+[ ]*[0-9]+[ ]+\?\?\?\? C4E27550 > vpdpbusd 0x100\(%eax\),%ymm1,%ymm2
+[ ]*[0-9]+[ ]+90000100 *
+[ ]*[0-9]+[ ]+00
+[ ]*[0-9]+[ ]+> *
+[ ]*[0-9]+[ ]+\?\?\?\? C4E2F5B4 > vpmadd52luq %ymm0,%ymm1,%ymm2
+[ ]*[0-9]+[ ]+D0
+[ ]*[0-9]+[ ]+\?\?\?\? C4E2F5B4 > vpmadd52luq 0x20\(%eax\),%ymm1,%ymm2
+[ ]*[0-9]+[ ]+5020
+[ ]*[0-9]+[ ]+\?\?\?\? C4E2F5B4 > vpmadd52luq 0x100\(%eax\),%ymm1,%ymm2
+[ ]*[0-9]+[ ]+90000100 *
+[ ]*[0-9]+[ ]+00
+[ ]*[0-9]+[ ]+> vpmadd52luq \(%eax\)\{1to4\},%ymm1,%ymm2
+[ ]*[0-9]+[ ]+> *
+[ ]*[0-9]+[ ]+>.*
+[ ]*[0-9]+[ ]+\?\?\?\? C4E27E72 > vcvtneps2bf16y %ymm0,%xmm1
+[ ]*[0-9]+[ ]+C8
+[ ]*[0-9]+[ ]+\?\?\?\? C4E27E72 > vcvtneps2bf16y 0x20\(%eax\),%xmm1
+[ ]*[0-9]+[ ]+4820
+[ ]*[0-9]+[ ]+\?\?\?\? C4E27E72 > vcvtneps2bf16y 0x100\(%eax\),%xmm1
+[ ]*[0-9]+[ ]+88000100 *
+[ ]*[0-9]+[ ]+00
+[ ]*[0-9]+[ ]+> vcvtneps2bf16y \(%eax\)\{1to8\},%xmm1
+#...
+[ ]*[0-9]+[ ]+> \.arch \.noavx512f
+[ ]*[0-9]+[ ]+> *
+[ ]*[0-9]+[ ]+\?\?\?\? C4E27550 > vpdpbusd %ymm0,%ymm1,%ymm2
+[ ]*[0-9]+[ ]+D0
+[ ]*[0-9]+[ ]+\?\?\?\? C4E27550 > vpdpbusd 0x20\(%eax\),%ymm1,%ymm2
+[ ]*[0-9]+[ ]+5020
+[ ]*[0-9]+[ ]+\?\?\?\? C4E27550 > vpdpbusd 0x100\(%eax\),%ymm1,%ymm2
+[ ]*[0-9]+[ ]+90000100 *
+[ ]*[0-9]+[ ]+00
+[ ]*[0-9]+[ ]+> *
+[ ]*[0-9]+[ ]+\?\?\?\? C4E2F5B4 > vpmadd52luq %ymm0,%ymm1,%ymm2
+[ ]*[0-9]+[ ]+D0
+[ ]*[0-9]+[ ]+\?\?\?\? C4E2F5B4 > vpmadd52luq 0x20\(%eax\),%ymm1,%ymm2
+[ ]*[0-9]+[ ]+5020
+[ ]*[0-9]+[ ]+\?\?\?\? C4E2F5B4 > vpmadd52luq 0x100\(%eax\),%ymm1,%ymm2
+[ ]*[0-9]+[ ]+90000100 *
+[ ]*[0-9]+[ ]+00
+[ ]*[0-9]+[ ]+> vpmadd52luq \(%eax\)\{1to4\},%ymm1,%ymm2
+[ ]*[0-9]+[ ]+> *
+[ ]*[0-9]+[ ]+>.*
+[ ]*[0-9]+[ ]+\?\?\?\? C4E27E72 > vcvtneps2bf16y %ymm0,%xmm1
+[ ]*[0-9]+[ ]+C8
+[ ]*[0-9]+[ ]+\?\?\?\? C4E27E72 > vcvtneps2bf16y 0x20\(%eax\),%xmm1
+[ ]*[0-9]+[ ]+4820
+[ ]*[0-9]+[ ]+\?\?\?\? C4E27E72 > vcvtneps2bf16y 0x100\(%eax\),%xmm1
+[ ]*[0-9]+[ ]+88000100 *
+[ ]*[0-9]+[ ]+00
+[ ]*[0-9]+[ ]+> vcvtneps2bf16y \(%eax\)\{1to8\},%xmm1
+#...
+[ ]*[0-9]+[ ]+> \.arch \.avx_vnni
+[ ]*[0-9]+[ ]+> *
+[ ]*[0-9]+[ ]+\?\?\?\? C4E27550 > vpdpbusd %ymm0,%ymm1,%ymm2
+[ ]*[0-9]+[ ]+D0
+[ ]*[0-9]+[ ]+\?\?\?\? C4E27550 > vpdpbusd 0x20\(%eax\),%ymm1,%ymm2
+[ ]*[0-9]+[ ]+5020
+[ ]*[0-9]+[ ]+\?\?\?\? 62F27528 > vpdpbusd 0x100\(%eax\),%ymm1,%ymm2
+[ ]*[0-9]+[ ]+505008
+[ ]*[0-9]+[ ]+> *
+[ ]*[0-9]+[ ]+\?\?\?\? 62F2F528 > vpmadd52luq %ymm0,%ymm1,%ymm2
+[ ]*[0-9]+[ ]+B4D0
+[ ]*[0-9]+[ ]+\?\?\?\? 62F2F528 > vpmadd52luq 0x20\(%eax\),%ymm1,%ymm2
+[ ]*[0-9]+[ ]+B45001
+[ ]*[0-9]+[ ]+\?\?\?\? 62F2F528 > vpmadd52luq 0x100\(%eax\),%ymm1,%ymm2
+[ ]*[0-9]+[ ]+B45008
+[ ]*[0-9]+[ ]+\?\?\?\? 62F2F538 > vpmadd52luq \(%eax\)\{1to4\},%ymm1,%ymm2
+[ ]*[0-9]+[ ]+B410
+[ ]*[0-9]+[ ]+> *
+[ ]*[0-9]+[ ]+>.*
+[ ]*[0-9]+[ ]+\?\?\?\? 62F27E28 > vcvtneps2bf16y %ymm0,%xmm1
+[ ]*[0-9]+[ ]+72C8
+[ ]*[0-9]+[ ]+\?\?\?\? 62F27E28 > vcvtneps2bf16y 0x20\(%eax\),%xmm1
+[ ]*[0-9]+[ ]+724801
+[ ]*[0-9]+[ ]+\?\?\?\? 62F27E28 > vcvtneps2bf16y 0x100\(%eax\),%xmm1
+[ ]*[0-9]+[ ]+724808
+[ ]*[0-9]+[ ]+\?\?\?\? 62F27E38 > vcvtneps2bf16y \(%eax\)\{1to8\},%xmm1
+[ ]*[0-9]+[ ]+7208
+#...
+[ ]*[0-9]+[ ]+> \.arch \.avx_ifma
+[ ]*[0-9]+[ ]+> *
+[ ]*[0-9]+[ ]+\?\?\?\? 62F27528 > vpdpbusd %ymm0,%ymm1,%ymm2
+[ ]*[0-9]+[ ]+50D0
+[ ]*[0-9]+[ ]+\?\?\?\? 62F27528 > vpdpbusd 0x20\(%eax\),%ymm1,%ymm2
+[ ]*[0-9]+[ ]+505001
+[ ]*[0-9]+[ ]+\?\?\?\? 62F27528 > vpdpbusd 0x100\(%eax\),%ymm1,%ymm2
+[ ]*[0-9]+[ ]+505008
+[ ]*[0-9]+[ ]+> *
+[ ]*[0-9]+[ ]+\?\?\?\? C4E2F5B4 > vpmadd52luq %ymm0,%ymm1,%ymm2
+[ ]*[0-9]+[ ]+D0
+[ ]*[0-9]+[ ]+\?\?\?\? C4E2F5B4 > vpmadd52luq 0x20\(%eax\),%ymm1,%ymm2
+[ ]*[0-9]+[ ]+5020
+[ ]*[0-9]+[ ]+\?\?\?\? 62F2F528 > vpmadd52luq 0x100\(%eax\),%ymm1,%ymm2
+[ ]*[0-9]+[ ]+B45008
+[ ]*[0-9]+[ ]+\?\?\?\? 62F2F538 > vpmadd52luq \(%eax\)\{1to4\},%ymm1,%ymm2
+[ ]*[0-9]+[ ]+B410
+[ ]*[0-9]+[ ]+> *
+[ ]*[0-9]+[ ]+>.*
+[ ]*[0-9]+[ ]+\?\?\?\? 62F27E28 > vcvtneps2bf16y %ymm0,%xmm1
+[ ]*[0-9]+[ ]+72C8
+[ ]*[0-9]+[ ]+\?\?\?\? 62F27E28 > vcvtneps2bf16y 0x20\(%eax\),%xmm1
+[ ]*[0-9]+[ ]+724801
+[ ]*[0-9]+[ ]+\?\?\?\? 62F27E28 > vcvtneps2bf16y 0x100\(%eax\),%xmm1
+[ ]*[0-9]+[ ]+724808
+[ ]*[0-9]+[ ]+\?\?\?\? 62F27E38 > vcvtneps2bf16y \(%eax\)\{1to8\},%xmm1
+[ ]*[0-9]+[ ]+7208
+#...
+[ ]*[0-9]+[ ]+> \.arch \.avx_ne_convert
+[ ]*[0-9]+[ ]+> *
+[ ]*[0-9]+[ ]+\?\?\?\? 62F27528 > vpdpbusd %ymm0,%ymm1,%ymm2
+[ ]*[0-9]+[ ]+50D0
+[ ]*[0-9]+[ ]+\?\?\?\? 62F27528 > vpdpbusd 0x20\(%eax\),%ymm1,%ymm2
+[ ]*[0-9]+[ ]+505001
+[ ]*[0-9]+[ ]+\?\?\?\? 62F27528 > vpdpbusd 0x100\(%eax\),%ymm1,%ymm2
+[ ]*[0-9]+[ ]+505008
+[ ]*[0-9]+[ ]+> *
+[ ]*[0-9]+[ ]+\?\?\?\? 62F2F528 > vpmadd52luq %ymm0,%ymm1,%ymm2
+[ ]*[0-9]+[ ]+B4D0
+[ ]*[0-9]+[ ]+\?\?\?\? 62F2F528 > vpmadd52luq 0x20\(%eax\),%ymm1,%ymm2
+[ ]*[0-9]+[ ]+B45001
+[ ]*[0-9]+[ ]+\?\?\?\? 62F2F528 > vpmadd52luq 0x100\(%eax\),%ymm1,%ymm2
+[ ]*[0-9]+[ ]+B45008
+[ ]*[0-9]+[ ]+\?\?\?\? 62F2F538 > vpmadd52luq \(%eax\)\{1to4\},%ymm1,%ymm2
+[ ]*[0-9]+[ ]+B410
+[ ]*[0-9]+[ ]+> *
+[ ]*[0-9]+[ ]+>.*
+[ ]*[0-9]+[ ]+\?\?\?\? C4E27E72 > vcvtneps2bf16y %ymm0,%xmm1
+[ ]*[0-9]+[ ]+C8
+[ ]*[0-9]+[ ]+\?\?\?\? C4E27E72 > vcvtneps2bf16y 0x20\(%eax\),%xmm1
+[ ]*[0-9]+[ ]+4820
+[ ]*[0-9]+[ ]+\?\?\?\? 62F27E28 > vcvtneps2bf16y 0x100\(%eax\),%xmm1
+[ ]*[0-9]+[ ]+724808
+[ ]*[0-9]+[ ]+\?\?\?\? 62F27E38 > vcvtneps2bf16y \(%eax\)\{1to8\},%xmm1
+[ ]*[0-9]+[ ]+7208
+#pass
--- /dev/null
+++ b/gas/testsuite/gas/i386/avx-vex.s
@@ -0,0 +1,23 @@
+ .text
+
+ .irp isa, default, .noavx512vl, .noavx512f, .avx_vnni, .avx_ifma, .avx_ne_convert
+
+ .arch default
+ .arch \isa
+
+ vpdpbusd %ymm0, %ymm1, %ymm2
+ vpdpbusd 0x20(%eax), %ymm1, %ymm2
+ vpdpbusd 0x100(%eax), %ymm1, %ymm2
+
+ vpmadd52luq %ymm0, %ymm1, %ymm2
+ vpmadd52luq 0x20(%eax), %ymm1, %ymm2
+ vpmadd52luq 0x100(%eax), %ymm1, %ymm2
+ vpmadd52luq (%eax){1to4}, %ymm1, %ymm2
+
+# vcvtneps2bf16 %ymm0, %xmm1
+ vcvtneps2bf16y %ymm0, %xmm1
+ vcvtneps2bf16y 0x20(%eax), %xmm1
+ vcvtneps2bf16y 0x100(%eax), %xmm1
+ vcvtneps2bf16y (%eax){1to8}, %xmm1
+
+ .endr
--- a/gas/testsuite/gas/i386/avx-vnni.d
+++ b/gas/testsuite/gas/i386/avx-vnni.d
@@ -38,6 +38,6 @@ Disassembly of section .text:
+[a-f0-9]+: c4 e2 79 50 c0 \{vex\} vpdpbusd %xmm0,%xmm0,%xmm0
+[a-f0-9]+: c4 e2 7d 50 c0 \{vex\} vpdpbusd %ymm0,%ymm0,%ymm0
+[a-f0-9]+: c4 e2 79 50 c0 \{vex\} vpdpbusd %xmm0,%xmm0,%xmm0
- +[a-f0-9]+: 62 f2 5d 08 50 d2 vpdpbusd %xmm2,%xmm4,%xmm2
+ +[a-f0-9]+: c4 e2 59 50 d2 \{vex\} vpdpbusd %xmm2,%xmm4,%xmm2
+[a-f0-9]+: c4 e2 59 50 91 f0 07 00 00 \{vex\} vpdpbusd 0x7f0\(%ecx\),%xmm4,%xmm2
#pass
--- a/gas/testsuite/gas/i386/i386.exp
+++ b/gas/testsuite/gas/i386/i386.exp
@@ -495,6 +495,7 @@ if [gas_32_check] then {
run_list_test "msrlist-inval"
run_dump_test "avx-ne-convert"
run_dump_test "avx-ne-convert-intel"
+ run_list_test "avx-vex" "-almn"
run_dump_test "raoint"
run_dump_test "raoint-intel"
run_list_test "amx-complex-inval"
--- a/gas/testsuite/gas/i386/x86-64-avx-ifma-intel.d
+++ b/gas/testsuite/gas/i386/x86-64-avx-ifma-intel.d
@@ -29,6 +29,6 @@ Disassembly of section .text:
[ ]*[a-f0-9]+:[ ]*c4 c2 dd b4 d4[ ]*\{vex\} vpmadd52luq ymm2,ymm4,ymm12
[ ]*[a-f0-9]+:[ ]*c4 e2 dd b4 11[ ]*\{vex\} vpmadd52luq ymm2,ymm4,YMMWORD PTR \[rcx\]
[ ]*[a-f0-9]+:[ ]*62 b2 dd 28 b4 d6[ ]*vpmadd52luq ymm2,ymm4,ymm22
-[ ]*[a-f0-9]+:[ ]*62 d2 dd 08 b5 d4[ ]*vpmadd52huq xmm2,xmm4,xmm12
-[ ]*[a-f0-9]+:[ ]*62 d2 dd 28 b5 d4[ ]*vpmadd52huq ymm2,ymm4,ymm12
+[ ]*[a-f0-9]+:[ ]*c4 c2 d9 b5 d4[ ]*\{vex\} vpmadd52huq xmm2,xmm4,xmm12
+[ ]*[a-f0-9]+:[ ]*c4 c2 dd b5 d4[ ]*\{vex\} vpmadd52huq ymm2,ymm4,ymm12
#pass
--- a/gas/testsuite/gas/i386/x86-64-avx-ifma.d
+++ b/gas/testsuite/gas/i386/x86-64-avx-ifma.d
@@ -29,6 +29,6 @@ Disassembly of section .text:
[ ]*[a-f0-9]+:[ ]*c4 c2 dd b4 d4[ ]*\{vex\} vpmadd52luq %ymm12,%ymm4,%ymm2
[ ]*[a-f0-9]+:[ ]*c4 e2 dd b4 11[ ]*\{vex\} vpmadd52luq \(%rcx\),%ymm4,%ymm2
[ ]*[a-f0-9]+:[ ]*62 b2 dd 28 b4 d6[ ]*vpmadd52luq %ymm22,%ymm4,%ymm2
-[ ]*[a-f0-9]+:[ ]*62 d2 dd 08 b5 d4[ ]*vpmadd52huq %xmm12,%xmm4,%xmm2
-[ ]*[a-f0-9]+:[ ]*62 d2 dd 28 b5 d4[ ]*vpmadd52huq %ymm12,%ymm4,%ymm2
+[ ]*[a-f0-9]+:[ ]*c4 c2 d9 b5 d4[ ]*\{vex\} vpmadd52huq %xmm12,%xmm4,%xmm2
+[ ]*[a-f0-9]+:[ ]*c4 c2 dd b5 d4[ ]*\{vex\} vpmadd52huq %ymm12,%ymm4,%ymm2
#pass
--- a/gas/testsuite/gas/i386/x86-64-avx-vnni.d
+++ b/gas/testsuite/gas/i386/x86-64-avx-vnni.d
@@ -35,6 +35,6 @@ Disassembly of section .text:
+[a-f0-9]+: c4 e2 59 53 11 \{vex\} vpdpwssds \(%rcx\),%xmm4,%xmm2
+[a-f0-9]+: c4 e2 59 53 11 \{vex\} vpdpwssds \(%rcx\),%xmm4,%xmm2
+[a-f0-9]+: 62 b2 5d 08 53 d6 vpdpwssds %xmm22,%xmm4,%xmm2
- +[a-f0-9]+: 62 d2 5d 08 50 d4 vpdpbusd %xmm12,%xmm4,%xmm2
+ +[a-f0-9]+: c4 c2 59 50 d4 \{vex\} vpdpbusd %xmm12,%xmm4,%xmm2
+[a-f0-9]+: c4 e2 59 50 91 f0 07 00 00 \{vex\} vpdpbusd 0x7f0\(%rcx\),%xmm4,%xmm2
#pass
^ permalink raw reply [flat|nested] 4+ messages in thread