* [PATCH v2 2/4] x86/APX: extend SSE2AVX coverage
2024-04-19 9:36 [PATCH v2 0/4] x86/APX: respect -msse2avx Jan Beulich
2024-04-19 9:37 ` [PATCH v2 1/4] x86: zap value-less Disp8MemShift from non-EVEX templates Jan Beulich
@ 2024-04-19 9:37 ` Jan Beulich
2024-04-25 6:09 ` Cui, Lili
2024-04-19 9:38 ` [PATCH v2 3/4] x86/APX: further " Jan Beulich
2024-04-19 9:38 ` [PATCH v2 4/4] x86: tidy <sse*> templates Jan Beulich
3 siblings, 1 reply; 12+ messages in thread
From: Jan Beulich @ 2024-04-19 9:37 UTC (permalink / raw)
To: Binutils; +Cc: H.J. Lu, Lili Cui
Legacy encoded SIMD insns are converted to AVX ones in that mode. When
eGPR-s are in use, i.e. with APX, convert to AVX10 insns (where
available; there are quite a few which can't be converted).
Note that LDDQU is represented as VMOVDQU32 (and the prior use of the
sse3 template there needs dropping, to get the order right).
Note further that in a few cases, due to the use of templates, AVX512VL
is used when AVX512F would suffice. Since AVX10 is the main reference,
this shouldn't be too much of a problem.
---
To preempt the question: If we weren't to do this (i.e. leave legacy-
encoded SIMD insns using eGPR-s alone), I'd raise the counter question
of why these insns are supported by APX then in the first place.
By using a mask register (which supposedly shouldn't be used by legacy
SIMD code) we could likely convert further insns (by emitting a pair of
replacement ones).
---
v2: Correct MOVSD. Also deal with RCP{P,S}S and RSQRT{P,S}S. Re-work
<gfni>. Re-base.
--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -4133,7 +4133,7 @@ build_evex_prefix (void)
/* Check the REX.W bit and VEXW. */
if (i.tm.opcode_modifier.vexw == VEXWIG)
w = (evexwig == evexw1 || (i.rex & REX_W)) ? 1 : 0;
- else if (i.tm.opcode_modifier.vexw)
+ else if (i.tm.opcode_modifier.vexw && !(i.rex & REX_W))
w = i.tm.opcode_modifier.vexw == VEXW1 ? 1 : 0;
else
w = (flag_code == CODE_64BIT ? i.rex & REX_W : evexwig == evexw1) ? 1 : 0;
@@ -8278,7 +8278,12 @@ check_VecOperands (const insn_template *
if ((is_cpu (t, CpuXOP) && t->operands == 5)
|| (t->opcode_space == SPACE_0F3A
&& (t->base_opcode | 3) == 0x0b
- && is_cpu (t, CpuAPX_F)))
+ && (is_cpu (t, CpuAPX_F)
+ || (t->opcode_modifier.sse2avx && t->opcode_modifier.evex
+ && (!t->opcode_modifier.vex
+ || (i.encoding != encoding_default
+ && i.encoding != encoding_vex
+ && i.encoding != encoding_vex3))))))
{
if (i.op[0].imms->X_op != O_constant
|| !fits_in_imm4 (i.op[0].imms->X_add_number))
--- a/gas/doc/c-i386.texi
+++ b/gas/doc/c-i386.texi
@@ -312,7 +312,9 @@ when respective checks fail.
@cindex @samp{-msse2avx} option, x86-64
@item -msse2avx
This option specifies that the assembler should encode SSE instructions
-with VEX prefix.
+with VEX prefix, requiring AVX to be available. SSE instructions using
+extended GPRs will be encoded with EVEX prefix, requiring AVX512 or AVX10 to
+be available.
@cindex @samp{-muse-unaligned-vector-move} option, i386
@cindex @samp{-muse-unaligned-vector-move} option, x86-64
--- /dev/null
+++ b/gas/testsuite/gas/i386/sse2avx-apx.d
@@ -0,0 +1,261 @@
+#as: -msse2avx
+#objdump: -dw
+#name: x86-64 SSE+ with APX encoding
+
+.*: file format .*
+
+Disassembly of section .text:
+
+0+ <sse2avx>:
+[ ]*[a-f0-9]+: 62 f9 7c 08 5b 60 01 vcvtdq2ps 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 f9 ff 08 e6 60 01 vcvtpd2dqx 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 f9 fd 08 5a 60 01 vcvtpd2psx 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 f9 7d 08 5b 60 01 vcvtps2dq 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 f9 fd 08 e6 60 01 vcvttpd2dqx 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 f9 7e 08 5b 60 01 vcvttps2dq 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 f9 7e 08 6f 60 01 vmovdqu32 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 f9 fd 08 28 60 01 vmovapd 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 f9 7c 08 28 60 01 vmovaps 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 f9 7d 08 6f 60 01 vmovdqa32 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 f9 7e 08 6f 60 01 vmovdqu32 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 fa 7d 08 2a 60 01 vmovntdqa 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 f9 7e 08 16 60 01 vmovshdup 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 f9 7e 08 12 60 01 vmovsldup 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 f9 fd 08 10 60 01 vmovupd 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 f9 7c 08 10 60 01 vmovups 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 fa 7d 08 1c 60 01 vpabsb 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 fa 7d 08 1d 60 01 vpabsw 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 fa 7d 08 1e 60 01 vpabsd 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 fa 7d 08 4c 60 01 vrcp14ps 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 fa 7d 08 4e 60 01 vrsqrt14ps 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 f9 fd 08 51 60 01 vsqrtpd 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 f9 7c 08 51 60 01 vsqrtps 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 f9 fd 08 29 60 01 vmovapd %xmm4,0x10\(%r16\)
+[ ]*[a-f0-9]+: 62 f9 7c 08 29 60 01 vmovaps %xmm4,0x10\(%r16\)
+[ ]*[a-f0-9]+: 62 f9 7d 08 7f 60 01 vmovdqa32 %xmm4,0x10\(%r16\)
+[ ]*[a-f0-9]+: 62 f9 7e 08 7f 60 01 vmovdqu32 %xmm4,0x10\(%r16\)
+[ ]*[a-f0-9]+: 62 f9 7d 08 e7 60 01 vmovntdq %xmm4,0x10\(%r16\)
+[ ]*[a-f0-9]+: 62 f9 fd 08 2b 60 01 vmovntpd %xmm4,0x10\(%r16\)
+[ ]*[a-f0-9]+: 62 f9 7c 08 2b 60 01 vmovntps %xmm4,0x10\(%r16\)
+[ ]*[a-f0-9]+: 62 f9 fd 08 11 60 01 vmovupd %xmm4,0x10\(%r16\)
+[ ]*[a-f0-9]+: 62 f9 7c 08 11 60 01 vmovups %xmm4,0x10\(%r16\)
+[ ]*[a-f0-9]+: 62 f9 cd 08 58 70 01 vaddpd 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4c 08 58 70 01 vaddps 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 fa 4d 08 dc 70 01 vaesenc 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 fa 4d 08 dd 70 01 vaesenclast 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 fa 4d 08 de 70 01 vaesdec 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 fa 4d 08 df 70 01 vaesdeclast 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 cd 08 55 70 01 vandnpd 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4c 08 55 70 01 vandnps 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 cd 08 54 70 01 vandpd 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4c 08 54 70 01 vandps 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 cd 08 5e 70 01 vdivpd 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4c 08 5e 70 01 vdivps 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 fa 4d 08 cf 70 01 vgf2p8mulb 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 cd 08 5f 70 01 vmaxpd 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4c 08 5f 70 01 vmaxps 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 cd 08 5d 70 01 vminpd 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4c 08 5d 70 01 vminps 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 cd 08 59 70 01 vmulpd 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4c 08 59 70 01 vmulps 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 cd 08 56 70 01 vorpd 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4c 08 56 70 01 vorps 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 63 70 01 vpacksswb 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 6b 70 01 vpackssdw 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 67 70 01 vpackuswb 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 fa 4d 08 2b 70 01 vpackusdw 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 fc 70 01 vpaddb 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 fd 70 01 vpaddw 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 fe 70 01 vpaddd 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 cd 08 d4 70 01 vpaddq 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 ec 70 01 vpaddsb 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 ed 70 01 vpaddsw 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 dc 70 01 vpaddusb 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 dd 70 01 vpaddusw 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 db 70 01 vpandd 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 df 70 01 vpandnd 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 e0 70 01 vpavgb 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 e3 70 01 vpavgw 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 fb 4d 08 44 70 01 00 vpclmullqlqdq 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 fb 4d 08 44 70 01 01 vpclmulhqlqdq 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 fb 4d 08 44 70 01 10 vpclmullqhqdq 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 fb 4d 08 44 70 01 11 vpclmulhqhqdq 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 f5 70 01 vpmaddwd 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 fa 4d 08 04 70 01 vpmaddubsw 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 fa 4d 08 3c 70 01 vpmaxsb 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 ee 70 01 vpmaxsw 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 fa 4d 08 3d 70 01 vpmaxsd 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 de 70 01 vpmaxub 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 fa 4d 08 3e 70 01 vpmaxuw 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 fa 4d 08 3f 70 01 vpmaxud 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 fa 4d 08 38 70 01 vpminsb 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 ea 70 01 vpminsw 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 fa 4d 08 39 70 01 vpminsd 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 da 70 01 vpminub 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 fa 4d 08 3a 70 01 vpminuw 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 fa 4d 08 3b 70 01 vpminud 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 fa cd 08 28 70 01 vpmuldq 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 e4 70 01 vpmulhuw 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 fa 4d 08 0b 70 01 vpmulhrsw 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 e5 70 01 vpmulhw 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 d5 70 01 vpmullw 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 fa 4d 08 40 70 01 vpmulld 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 cd 08 f4 70 01 vpmuludq 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 eb 70 01 vpord 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 f6 70 01 vpsadbw 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 fa 4d 08 00 70 01 vpshufb 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 f1 70 01 vpsllw 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 f2 70 01 vpslld 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 cd 08 f3 70 01 vpsllq 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 e1 70 01 vpsraw 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 e2 70 01 vpsrad 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 d1 70 01 vpsrlw 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 d2 70 01 vpsrld 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 cd 08 d3 70 01 vpsrlq 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 f8 70 01 vpsubb 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 f9 70 01 vpsubw 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 fa 70 01 vpsubd 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 cd 08 fb 70 01 vpsubq 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 e8 70 01 vpsubsb 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 e9 70 01 vpsubsw 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 d8 70 01 vpsubusb 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 d9 70 01 vpsubusw 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 68 70 01 vpunpckhbw 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 69 70 01 vpunpckhwd 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 6a 70 01 vpunpckhdq 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 cd 08 6d 70 01 vpunpckhqdq 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 60 70 01 vpunpcklbw 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 61 70 01 vpunpcklwd 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 62 70 01 vpunpckldq 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 cd 08 6c 70 01 vpunpcklqdq 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 ef 70 01 vpxord 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 cd 08 5c 70 01 vsubpd 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4c 08 5c 70 01 vsubps 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 cd 08 15 70 01 vunpckhpd 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4c 08 15 70 01 vunpckhps 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 cd 08 14 70 01 vunpcklpd 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4c 08 14 70 01 vunpcklps 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 cd 08 57 70 01 vxorpd 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4c 08 57 70 01 vxorps 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 7d 08 70 70 01 64 vpshufd \$0x64,0x10\(%r16\),%xmm6
+[ ]*[a-f0-9]+: 62 f9 7e 08 70 70 01 64 vpshufhw \$0x64,0x10\(%r16\),%xmm6
+[ ]*[a-f0-9]+: 62 f9 7f 08 70 70 01 64 vpshuflw \$0x64,0x10\(%r16\),%xmm6
+[ ]*[a-f0-9]+: 62 fb fd 08 09 70 01 04 vrndscalepd \$(0x)?4,0x10\(%r16\),%xmm6
+[ ]*[a-f0-9]+: 62 fb 7d 08 08 70 01 04 vrndscaleps \$(0x)?4,0x10\(%r16\),%xmm6
+[ ]*[a-f0-9]+: 62 fb cd 08 ce 70 01 64 vgf2p8affineqb \$0x64,0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 fb cd 08 cf 70 01 64 vgf2p8affineinvqb \$0x64,0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 fb 4d 08 0f 70 01 64 vpalignr \$0x64,0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 fb 4d 08 44 70 01 64 vpclmulqdq \$0x64,0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 cd 08 c6 70 01 64 vshufpd \$0x64,0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4c 08 c6 70 01 64 vshufps \$0x64,0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 fd 08 2f 60 02 vcomisd 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 f9 7e 08 e6 60 02 vcvtdq2pd 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 f9 7e 08 e6 60 02 vcvtdq2pd 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 f9 7c 08 5a 60 02 vcvtps2pd 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 f9 ff 08 12 60 02 vmovddup 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 f9 ff 08 10 60 02 vmovsd 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 fa 7d 08 20 60 02 vpmovsxbw 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 fa 7d 08 23 60 02 vpmovsxwd 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 fa 7d 08 25 60 02 vpmovsxdq 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 fa 7d 08 30 60 02 vpmovzxbw 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 fa 7d 08 33 60 02 vpmovzxwd 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 fa 7d 08 35 60 02 vpmovzxdq 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 f9 fd 08 2e 60 02 vucomisd 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 f9 fd 08 13 60 02 vmovlpd %xmm4,0x10\(%r16\)
+[ ]*[a-f0-9]+: 62 f9 7c 08 13 60 02 vmovlps %xmm4,0x10\(%r16\)
+[ ]*[a-f0-9]+: 62 f9 fd 08 17 60 02 vmovhpd %xmm4,0x10\(%r16\)
+[ ]*[a-f0-9]+: 62 f9 7c 08 17 60 02 vmovhps %xmm4,0x10\(%r16\)
+[ ]*[a-f0-9]+: 62 f9 ff 08 11 60 02 vmovsd %xmm4,0x10\(%r16\)
+[ ]*[a-f0-9]+: 62 f9 dd 08 12 60 02 vmovlpd 0x10\(%r16\),%xmm4,%xmm4
+[ ]*[a-f0-9]+: 62 f9 5c 08 12 60 02 vmovlps 0x10\(%r16\),%xmm4,%xmm4
+[ ]*[a-f0-9]+: 62 f9 dd 08 16 60 02 vmovhpd 0x10\(%r16\),%xmm4,%xmm4
+[ ]*[a-f0-9]+: 62 f9 5c 08 16 60 02 vmovhps 0x10\(%r16\),%xmm4,%xmm4
+[ ]*[a-f0-9]+: 62 f9 fd 08 7e e0 vmovq %xmm4,%r16
+[ ]*[a-f0-9]+: 62 f9 fd 08 6e e0 vmovq %r16,%xmm4
+[ ]*[a-f0-9]+: 62 f9 fd 08 7e e0 vmovq %xmm4,%r16
+[ ]*[a-f0-9]+: 62 f9 fd 08 6e e0 vmovq %r16,%xmm4
+[ ]*[a-f0-9]+: 62 f9 fd 08 d6 60 02 vmovq %xmm4,0x10\(%r16\)
+[ ]*[a-f0-9]+: 62 f9 fe 08 7e 60 02 vmovq 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 e1 7f 08 2d c4 vcvtsd2si %xmm4,%r16d
+[ ]*[a-f0-9]+: 62 f9 7f 08 2d 48 02 vcvtsd2si 0x10\(%r16\),%ecx
+[ ]*[a-f0-9]+: 62 e1 7f 08 2c c4 vcvttsd2si %xmm4,%r16d
+[ ]*[a-f0-9]+: 62 f9 7f 08 2c 48 02 vcvttsd2si 0x10\(%r16\),%ecx
+[ ]*[a-f0-9]+: 62 e1 ff 08 2d c4 vcvtsd2si %xmm4,%r16
+[ ]*[a-f0-9]+: 62 f9 ff 08 2d 48 02 vcvtsd2si 0x10\(%r16\),%rcx
+[ ]*[a-f0-9]+: 62 e1 ff 08 2c c4 vcvttsd2si %xmm4,%r16
+[ ]*[a-f0-9]+: 62 f9 ff 08 2c 48 02 vcvttsd2si 0x10\(%r16\),%rcx
+[ ]*[a-f0-9]+: 62 f9 df 08 2a e0 vcvtsi2sd %r16,%xmm4,%xmm4
+[ ]*[a-f0-9]+: 62 f9 df 08 2a 60 02 vcvtsi2sdq 0x10\(%r16\),%xmm4,%xmm4
+[ ]*[a-f0-9]+: 62 f9 de 08 2a e0 vcvtsi2ss %r16,%xmm4,%xmm4
+[ ]*[a-f0-9]+: 62 f9 de 08 2a 60 02 vcvtsi2ssq 0x10\(%r16\),%xmm4,%xmm4
+[ ]*[a-f0-9]+: 62 fb dd 08 22 e0 64 vpinsrq \$0x64,%r16,%xmm4,%xmm4
+[ ]*[a-f0-9]+: 62 fb dd 08 22 60 02 64 vpinsrq \$0x64,0x10\(%r16\),%xmm4,%xmm4
+[ ]*[a-f0-9]+: 62 fb fd 08 16 e0 64 vpextrq \$0x64,%xmm4,%r16
+[ ]*[a-f0-9]+: 62 fb fd 08 16 60 02 64 vpextrq \$0x64,%xmm4,0x10\(%r16\)
+[ ]*[a-f0-9]+: 62 fb cd 08 0b 70 02 04 vrndscalesd \$(0x)?4,0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 cf 08 58 70 02 vaddsd 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 cf 08 5a 70 02 vcvtsd2ss 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 cf 08 5e 70 02 vdivsd 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 cf 08 5f 70 02 vmaxsd 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 cf 08 5d 70 02 vminsd 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 cf 08 59 70 02 vmulsd 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 cf 08 51 70 02 vsqrtsd 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 cf 08 5c 70 02 vsubsd 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4e 08 58 70 04 vaddss 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4e 08 5a 70 04 vcvtss2sd 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4e 08 5e 70 04 vdivss 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4e 08 5f 70 04 vmaxss 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4e 08 5d 70 04 vminss 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4e 08 59 70 04 vmulss 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 fa 4d 08 4d 70 04 vrcp14ss 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 fa 4d 08 4f 70 04 vrsqrt14ss 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4e 08 51 70 04 vsqrtss 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4e 08 5c 70 04 vsubss 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 7c 08 2f 60 04 vcomiss 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 f9 7e 08 10 60 04 vmovss 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 fa 7d 08 21 60 04 vpmovsxbd 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 fa 7d 08 24 60 04 vpmovsxwq 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 fa 7d 08 31 60 04 vpmovzxbd 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 fa 7d 08 34 60 04 vpmovzxwq 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 f9 7c 08 2e 60 04 vucomiss 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 f9 7e 08 11 60 04 vmovss %xmm4,0x10\(%r16\)
+[ ]*[a-f0-9]+: 62 f9 7d 08 7e e0 vmovd %xmm4,%r16d
+[ ]*[a-f0-9]+: 62 f9 7d 08 7e 60 04 vmovd %xmm4,0x10\(%r16\)
+[ ]*[a-f0-9]+: 62 f9 7d 08 6e e0 vmovd %r16d,%xmm4
+[ ]*[a-f0-9]+: 62 f9 7d 08 6e 60 04 vmovd 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 e1 7e 08 2d c4 vcvtss2si %xmm4,%r16d
+[ ]*[a-f0-9]+: 62 f9 7e 08 2d 48 04 vcvtss2si 0x10\(%r16\),%ecx
+[ ]*[a-f0-9]+: 62 e1 7e 08 2c c4 vcvttss2si %xmm4,%r16d
+[ ]*[a-f0-9]+: 62 f9 7e 08 2c 48 04 vcvttss2si 0x10\(%r16\),%ecx
+[ ]*[a-f0-9]+: 62 e1 fe 08 2d c4 vcvtss2si %xmm4,%r16
+[ ]*[a-f0-9]+: 62 f9 fe 08 2d 48 04 vcvtss2si 0x10\(%r16\),%rcx
+[ ]*[a-f0-9]+: 62 e1 fe 08 2c c4 vcvttss2si %xmm4,%r16
+[ ]*[a-f0-9]+: 62 f9 fe 08 2c 48 04 vcvttss2si 0x10\(%r16\),%rcx
+[ ]*[a-f0-9]+: 62 fb fd 08 17 e0 64 vextractps \$0x64,%xmm4,%r16d
+[ ]*[a-f0-9]+: 62 fb 7d 08 16 e0 64 vpextrd \$0x64,%xmm4,%r16d
+[ ]*[a-f0-9]+: 62 fb 7d 08 16 60 04 64 vpextrd \$0x64,%xmm4,0x10\(%r16\)
+[ ]*[a-f0-9]+: 62 fb 7d 08 17 e0 64 vextractps \$0x64,%xmm4,%r16d
+[ ]*[a-f0-9]+: 62 fb 7d 08 17 60 04 64 vextractps \$0x64,%xmm4,0x10\(%r16\)
+[ ]*[a-f0-9]+: 62 fb 5d 08 22 e0 64 vpinsrd \$0x64,%r16d,%xmm4,%xmm4
+[ ]*[a-f0-9]+: 62 fb 5d 08 22 60 04 64 vpinsrd \$0x64,0x10\(%r16\),%xmm4,%xmm4
+[ ]*[a-f0-9]+: 62 f9 5f 08 2a e0 vcvtsi2sd %r16d,%xmm4,%xmm4
+[ ]*[a-f0-9]+: 62 f9 5f 08 2a 60 04 vcvtsi2sdl 0x10\(%r16\),%xmm4,%xmm4
+[ ]*[a-f0-9]+: 62 f9 5e 08 2a e0 vcvtsi2ss %r16d,%xmm4,%xmm4
+[ ]*[a-f0-9]+: 62 f9 5e 08 2a 60 04 vcvtsi2ssl 0x10\(%r16\),%xmm4,%xmm4
+[ ]*[a-f0-9]+: 62 fb 4d 08 21 70 04 64 vinsertps \$0x64,0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 fb 4d 08 0a 70 04 04 vrndscaless \$(0x)?4,0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 fa 7d 08 22 60 08 vpmovsxbq 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 fa 7d 08 32 60 08 vpmovzxbq 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 e1 7d 08 c5 c4 64 vpextrw \$0x64,%xmm4,%r16d
+[ ]*[a-f0-9]+: 62 e1 7d 08 c5 c4 64 vpextrw \$0x64,%xmm4,%r16d
+[ ]*[a-f0-9]+: 62 fb 7d 08 15 60 08 64 vpextrw \$0x64,%xmm4,0x10\(%r16\)
+[ ]*[a-f0-9]+: 62 f9 5d 08 c4 e0 64 vpinsrw \$0x64,%r16d,%xmm4,%xmm4
+[ ]*[a-f0-9]+: 62 f9 5d 08 c4 e0 64 vpinsrw \$0x64,%r16d,%xmm4,%xmm4
+[ ]*[a-f0-9]+: 62 f9 5d 08 c4 60 08 64 vpinsrw \$0x64,0x10\(%r16\),%xmm4,%xmm4
+[ ]*[a-f0-9]+: 62 fb 7d 08 14 e0 64 vpextrb \$0x64,%xmm4,%r16d
+[ ]*[a-f0-9]+: 62 fb 7d 08 14 e0 64 vpextrb \$0x64,%xmm4,%r16d
+[ ]*[a-f0-9]+: 62 fb 7d 08 14 60 10 64 vpextrb \$0x64,%xmm4,0x10\(%r16\)
+[ ]*[a-f0-9]+: 62 fb 5d 08 20 e0 64 vpinsrb \$0x64,%r16d,%xmm4,%xmm4
+[ ]*[a-f0-9]+: 62 fb 5d 08 20 e0 64 vpinsrb \$0x64,%r16d,%xmm4,%xmm4
+[ ]*[a-f0-9]+: 62 fb 5d 08 20 60 10 64 vpinsrb \$0x64,0x10\(%r16\),%xmm4,%xmm4
+#pass
--- /dev/null
+++ b/gas/testsuite/gas/i386/sse2avx-apx.s
@@ -0,0 +1,320 @@
+ .text
+ .sse_check warning
+sse2avx:
+
+# Tests for op mem128, xmm
+ cvtdq2ps 16(%r16),%xmm4
+ cvtpd2dq 16(%r16),%xmm4
+ cvtpd2ps 16(%r16),%xmm4
+ cvtps2dq 16(%r16),%xmm4
+ cvttpd2dq 16(%r16),%xmm4
+ cvttps2dq 16(%r16),%xmm4
+ lddqu 16(%r16),%xmm4
+ movapd 16(%r16),%xmm4
+ movaps 16(%r16),%xmm4
+ movdqa 16(%r16),%xmm4
+ movdqu 16(%r16),%xmm4
+ movntdqa 16(%r16),%xmm4
+ movshdup 16(%r16),%xmm4
+ movsldup 16(%r16),%xmm4
+ movupd 16(%r16),%xmm4
+ movups 16(%r16),%xmm4
+ pabsb 16(%r16),%xmm4
+ pabsw 16(%r16),%xmm4
+ pabsd 16(%r16),%xmm4
+ rcpps 16(%r16),%xmm4
+ rsqrtps 16(%r16),%xmm4
+ sqrtpd 16(%r16),%xmm4
+ sqrtps 16(%r16),%xmm4
+
+# Tests for op xmm, mem128
+ movapd %xmm4,16(%r16)
+ movaps %xmm4,16(%r16)
+ movdqa %xmm4,16(%r16)
+ movdqu %xmm4,16(%r16)
+ movntdq %xmm4,16(%r16)
+ movntpd %xmm4,16(%r16)
+ movntps %xmm4,16(%r16)
+ movupd %xmm4,16(%r16)
+ movups %xmm4,16(%r16)
+
+# Tests for op mem128, xmm[, xmm]
+ addpd 16(%r16),%xmm6
+ addps 16(%r16),%xmm6
+ aesenc 16(%r16),%xmm6
+ aesenclast 16(%r16),%xmm6
+ aesdec 16(%r16),%xmm6
+ aesdeclast 16(%r16),%xmm6
+ andnpd 16(%r16),%xmm6
+ andnps 16(%r16),%xmm6
+ andpd 16(%r16),%xmm6
+ andps 16(%r16),%xmm6
+ divpd 16(%r16),%xmm6
+ divps 16(%r16),%xmm6
+ gf2p8mulb 16(%r16),%xmm6
+ maxpd 16(%r16),%xmm6
+ maxps 16(%r16),%xmm6
+ minpd 16(%r16),%xmm6
+ minps 16(%r16),%xmm6
+ mulpd 16(%r16),%xmm6
+ mulps 16(%r16),%xmm6
+ orpd 16(%r16),%xmm6
+ orps 16(%r16),%xmm6
+ packsswb 16(%r16),%xmm6
+ packssdw 16(%r16),%xmm6
+ packuswb 16(%r16),%xmm6
+ packusdw 16(%r16),%xmm6
+ paddb 16(%r16),%xmm6
+ paddw 16(%r16),%xmm6
+ paddd 16(%r16),%xmm6
+ paddq 16(%r16),%xmm6
+ paddsb 16(%r16),%xmm6
+ paddsw 16(%r16),%xmm6
+ paddusb 16(%r16),%xmm6
+ paddusw 16(%r16),%xmm6
+ pand 16(%r16),%xmm6
+ pandn 16(%r16),%xmm6
+ pavgb 16(%r16),%xmm6
+ pavgw 16(%r16),%xmm6
+ pclmullqlqdq 16(%r16),%xmm6
+ pclmulhqlqdq 16(%r16),%xmm6
+ pclmullqhqdq 16(%r16),%xmm6
+ pclmulhqhqdq 16(%r16),%xmm6
+ pmaddwd 16(%r16),%xmm6
+ pmaddubsw 16(%r16),%xmm6
+ pmaxsb 16(%r16),%xmm6
+ pmaxsw 16(%r16),%xmm6
+ pmaxsd 16(%r16),%xmm6
+ pmaxub 16(%r16),%xmm6
+ pmaxuw 16(%r16),%xmm6
+ pmaxud 16(%r16),%xmm6
+ pminsb 16(%r16),%xmm6
+ pminsw 16(%r16),%xmm6
+ pminsd 16(%r16),%xmm6
+ pminub 16(%r16),%xmm6
+ pminuw 16(%r16),%xmm6
+ pminud 16(%r16),%xmm6
+ pmuldq 16(%r16),%xmm6
+ pmulhuw 16(%r16),%xmm6
+ pmulhrsw 16(%r16),%xmm6
+ pmulhw 16(%r16),%xmm6
+ pmullw 16(%r16),%xmm6
+ pmulld 16(%r16),%xmm6
+ pmuludq 16(%r16),%xmm6
+ por 16(%r16),%xmm6
+ psadbw 16(%r16),%xmm6
+ pshufb 16(%r16),%xmm6
+ psllw 16(%r16),%xmm6
+ pslld 16(%r16),%xmm6
+ psllq 16(%r16),%xmm6
+ psraw 16(%r16),%xmm6
+ psrad 16(%r16),%xmm6
+ psrlw 16(%r16),%xmm6
+ psrld 16(%r16),%xmm6
+ psrlq 16(%r16),%xmm6
+ psubb 16(%r16),%xmm6
+ psubw 16(%r16),%xmm6
+ psubd 16(%r16),%xmm6
+ psubq 16(%r16),%xmm6
+ psubsb 16(%r16),%xmm6
+ psubsw 16(%r16),%xmm6
+ psubusb 16(%r16),%xmm6
+ psubusw 16(%r16),%xmm6
+ punpckhbw 16(%r16),%xmm6
+ punpckhwd 16(%r16),%xmm6
+ punpckhdq 16(%r16),%xmm6
+ punpckhqdq 16(%r16),%xmm6
+ punpcklbw 16(%r16),%xmm6
+ punpcklwd 16(%r16),%xmm6
+ punpckldq 16(%r16),%xmm6
+ punpcklqdq 16(%r16),%xmm6
+ pxor 16(%r16),%xmm6
+ subpd 16(%r16),%xmm6
+ subps 16(%r16),%xmm6
+ unpckhpd 16(%r16),%xmm6
+ unpckhps 16(%r16),%xmm6
+ unpcklpd 16(%r16),%xmm6
+ unpcklps 16(%r16),%xmm6
+ xorpd 16(%r16),%xmm6
+ xorps 16(%r16),%xmm6
+
+# Tests for op imm8, mem128, xmm
+ pshufd $100,16(%r16),%xmm6
+ pshufhw $100,16(%r16),%xmm6
+ pshuflw $100,16(%r16),%xmm6
+ roundpd $4,16(%r16),%xmm6
+ roundps $4,16(%r16),%xmm6
+
+# Tests for op imm8, mem128, xmm[, xmm]
+ gf2p8affineqb $100,16(%r16),%xmm6
+ gf2p8affineinvqb $100,16(%r16),%xmm6
+ palignr $100,16(%r16),%xmm6
+ pclmulqdq $100,16(%r16),%xmm6
+ shufpd $100,16(%r16),%xmm6
+ shufps $100,16(%r16),%xmm6
+
+# Tests for op mem64, xmm
+ comisd 16(%r16),%xmm4
+ cvtdq2pd 16(%r16),%xmm4
+ cvtpi2pd 16(%r16),%xmm4
+ cvtps2pd 16(%r16),%xmm4
+ movddup 16(%r16),%xmm4
+ movsd 16(%r16),%xmm4
+ pmovsxbw 16(%r16),%xmm4
+ pmovsxwd 16(%r16),%xmm4
+ pmovsxdq 16(%r16),%xmm4
+ pmovzxbw 16(%r16),%xmm4
+ pmovzxwd 16(%r16),%xmm4
+ pmovzxdq 16(%r16),%xmm4
+ ucomisd 16(%r16),%xmm4
+
+# Tests for op xmm, mem64
+ movlpd %xmm4,16(%r16)
+ movlps %xmm4,16(%r16)
+ movhpd %xmm4,16(%r16)
+ movhps %xmm4,16(%r16)
+ movsd %xmm4,16(%r16)
+
+# Tests for op mem64, xmm[, xmm]
+ movlpd 16(%r16),%xmm4
+ movlps 16(%r16),%xmm4
+ movhpd 16(%r16),%xmm4
+ movhps 16(%r16),%xmm4
+
+# Tests for op xmm, regq/mem64
+# Tests for op regq/mem64, xmm
+ movd %xmm4,%r16
+ movd %r16,%xmm4
+ movq %xmm4,%r16
+ movq %r16,%xmm4
+ movq %xmm4,16(%r16)
+ movq 16(%r16),%xmm4
+
+# Tests for op xmm/mem64, regl
+ cvtsd2si %xmm4,%r16d
+ cvtsd2si 16(%r16),%ecx
+ cvttsd2si %xmm4,%r16d
+ cvttsd2si 16(%r16),%ecx
+
+# Tests for op xmm/mem64, regq
+ cvtsd2si %xmm4,%r16
+ cvtsd2si 16(%r16),%rcx
+ cvttsd2si %xmm4,%r16
+ cvttsd2si 16(%r16),%rcx
+
+# Tests for op regq/mem64, xmm[, xmm]
+ cvtsi2sdq %r16,%xmm4
+ cvtsi2sdq 16(%r16),%xmm4
+ cvtsi2ssq %r16,%xmm4
+ cvtsi2ssq 16(%r16),%xmm4
+
+# Tests for op imm8, regq/mem64, xmm[, xmm]
+ pinsrq $100,%r16,%xmm4
+ pinsrq $100,16(%r16),%xmm4
+
+# Tests for op imm8, xmm, regq/mem64
+ pextrq $100,%xmm4,%r16
+ pextrq $100,%xmm4,16(%r16)
+
+# Tests for op imm8, mem64, xmm[, xmm]
+ roundsd $4,16(%r16),%xmm6
+
+# Tests for op mem64, xmm[, xmm]
+ addsd 16(%r16),%xmm6
+ cvtsd2ss 16(%r16),%xmm6
+ divsd 16(%r16),%xmm6
+ maxsd 16(%r16),%xmm6
+ minsd 16(%r16),%xmm6
+ mulsd 16(%r16),%xmm6
+ sqrtsd 16(%r16),%xmm6
+ subsd 16(%r16),%xmm6
+
+# Tests for op mem32, xmm[, xmm]
+ addss 16(%r16),%xmm6
+ cvtss2sd 16(%r16),%xmm6
+ divss 16(%r16),%xmm6
+ maxss 16(%r16),%xmm6
+ minss 16(%r16),%xmm6
+ mulss 16(%r16),%xmm6
+ rcpss 16(%r16),%xmm6
+ rsqrtss 16(%r16),%xmm6
+ sqrtss 16(%r16),%xmm6
+ subss 16(%r16),%xmm6
+
+# Tests for op mem32, xmm
+ comiss 16(%r16),%xmm4
+ movss 16(%r16),%xmm4
+ pmovsxbd 16(%r16),%xmm4
+ pmovsxwq 16(%r16),%xmm4
+ pmovzxbd 16(%r16),%xmm4
+ pmovzxwq 16(%r16),%xmm4
+ ucomiss 16(%r16),%xmm4
+
+# Tests for op xmm, mem32
+ movss %xmm4,16(%r16)
+
+# Tests for op xmm, regl/mem32
+# Tests for op regl/mem32, xmm
+ movd %xmm4,%r16d
+ movd %xmm4,16(%r16)
+ movd %r16d,%xmm4
+ movd 16(%r16),%xmm4
+
+# Tests for op xmm/mem32, regl
+ cvtss2si %xmm4,%r16d
+ cvtss2si 16(%r16),%ecx
+ cvttss2si %xmm4,%r16d
+ cvttss2si 16(%r16),%ecx
+
+# Tests for op xmm/mem32, regq
+ cvtss2si %xmm4,%r16
+ cvtss2si 16(%r16),%rcx
+ cvttss2si %xmm4,%r16
+ cvttss2si 16(%r16),%rcx
+
+# Tests for op imm8, xmm, regq/mem32
+ extractps $100,%xmm4,%r16
+
+# Tests for op imm8, xmm, regl/mem32
+ pextrd $100,%xmm4,%r16d
+ pextrd $100,%xmm4,16(%r16)
+ extractps $100,%xmm4,%r16d
+ extractps $100,%xmm4,16(%r16)
+
+# Tests for op imm8, regl/mem32, xmm[, xmm]
+ pinsrd $100,%r16d,%xmm4
+ pinsrd $100,16(%r16),%xmm4
+
+# Tests for op regl/mem32, xmm[, xmm]
+ cvtsi2sd %r16d,%xmm4
+ cvtsi2sd 16(%r16),%xmm4
+ cvtsi2ss %r16d,%xmm4
+ cvtsi2ss 16(%r16),%xmm4
+
+# Tests for op imm8, mem32, xmm[, xmm]
+ insertps $100,16(%r16),%xmm6
+ roundss $4,16(%r16),%xmm6
+
+# Tests for op mem16, xmm
+ pmovsxbq 16(%r16),%xmm4
+ pmovzxbq 16(%r16),%xmm4
+
+# Tests for op imm8, xmm, regl/mem16
+ pextrw $100,%xmm4,%r16d
+ pextrw $100,%xmm4,%r16
+ pextrw $100,%xmm4,16(%r16)
+
+# Tests for op imm8, regl/mem16, xmm[, xmm]
+ pinsrw $100,%r16d,%xmm4
+ pinsrw $100,%r16,%xmm4
+ pinsrw $100,16(%r16),%xmm4
+
+# Tests for op imm8, xmm, regl/mem8
+ pextrb $100,%xmm4,%r16d
+ pextrb $100,%xmm4,%r16
+ pextrb $100,%xmm4,16(%r16)
+
+# Tests for op imm8, regl/mem8, xmm[, xmm]
+ pinsrb $100,%r16d,%xmm4
+ pinsrb $100,%r16,%xmm4
+ pinsrb $100,16(%r16),%xmm4
--- a/gas/testsuite/gas/i386/x86-64.exp
+++ b/gas/testsuite/gas/i386/x86-64.exp
@@ -390,6 +390,7 @@ run_dump_test "x86-64-apx-jmpabs-inval"
run_dump_test "x86-64-apx-nf"
run_dump_test "x86-64-apx-nf-intel"
run_dump_test "x86-64-apx_f-evex"
+run_dump_test "sse2avx-apx"
run_dump_test "x86-64-avx512f-rcigrz-intel"
run_dump_test "x86-64-avx512f-rcigrz"
run_dump_test "x86-64-clwb"
--- a/gas/testsuite/gas/i386/x86-64-sse2avx.d
+++ b/gas/testsuite/gas/i386/x86-64-sse2avx.d
@@ -746,9 +746,19 @@ Disassembly of section .text:
[ ]*[a-f0-9]+: c5 79 7e c8 vmovd %xmm9,%eax
[ ]*[a-f0-9]+: c4 a1 79 7e c8 vmovd %xmm1,%eax
[ ]*[a-f0-9]+: c4 e1 f9 7e c8 vmovq %xmm1,%rax
+[ ]*[a-f0-9]+: 62 f1 7d 08 7e c8 \{evex\} vmovd %xmm1,%eax
[ ]*[a-f0-9]+: c5 f9 7e c8 vmovd %xmm1,%eax
[ ]*[a-f0-9]+: c5 f9 7e c8 vmovd %xmm1,%eax
[ ]*[a-f0-9]+: c4 e1 79 7e c8 vmovd %xmm1,%eax
+[ ]*[a-f0-9]+: 62 f9 7d 08 7e c8 vmovd %xmm1,%r16d
+[ ]*[a-f0-9]+: 62 f9 7d 08 7e c8 vmovd %xmm1,%r16d
+[ ]*[a-f0-9]+: 62 d9 7d 08 7e c8 vmovd %xmm1,%r24d
+[ ]*[a-f0-9]+: 62 79 7d 08 7e c8 vmovd %xmm9,%r16d
+[ ]*[a-f0-9]+: 62 b9 7d 08 7e c8 vmovd %xmm1,%r16d
+[ ]*[a-f0-9]+: 62 f9 fd 08 7e c8 vmovq %xmm1,%r16
+[ ]*[a-f0-9]+: 62 f9 7d 08 7e c8 vmovd %xmm1,%r16d
+[ ]*[a-f0-9]+: 62 f9 7d 08 7e c8 vmovd %xmm1,%r16d
+[ ]*[a-f0-9]+: 62 f9 7d 08 7e c8 vmovd %xmm1,%r16d
[ ]*[a-f0-9]+: c5 f8 ae 11 vldmxcsr \(%rcx\)
[ ]*[a-f0-9]+: c5 f8 ae 19 vstmxcsr \(%rcx\)
[ ]*[a-f0-9]+: c5 f8 5b f4 vcvtdq2ps %xmm4,%xmm6
--- a/gas/testsuite/gas/i386/x86-64-sse2avx.s
+++ b/gas/testsuite/gas/i386/x86-64-sse2avx.s
@@ -847,10 +847,21 @@ _start:
rex.r movd %xmm1, %eax
rex.x movd %xmm1, %eax
rex.w movd %xmm1, %eax
+ {evex} movd %xmm1, %eax
{rex} movd %xmm1, %eax
{rex2} movd %xmm1, %eax
{vex3} movd %xmm1, %eax
+ movd %xmm1, %r16d
+ rex movd %xmm1, %r16d
+ rex.b movd %xmm1, %r16d
+ rex.r movd %xmm1, %r16d
+ rex.x movd %xmm1, %r16d
+ rex.w movd %xmm1, %r16d
+ {evex} movd %xmm1, %r16d
+ {rex} movd %xmm1, %r16d
+ {rex2} movd %xmm1, %r16d
+
.intel_syntax noprefix
# Tests for op mem64
ldmxcsr DWORD PTR [rcx]
--- a/opcodes/i386-opc.tbl
+++ b/opcodes/i386-opc.tbl
@@ -1004,10 +1004,40 @@ pause, 0xf390, i186, NoSuf, {}
$avx:AVX:66:Vex128|VexVVVV|VexW0|SSE2AVX:RegXMM:Xmmword, +
$sse:SSE2:66::RegXMM:Xmmword, +
$mmx:MMX:::RegMMX:Qword>
+// As above, but also allowing AVX512 (EVEX) encoding, to transform
+// in particular insns using eGPR-s.
+<MMX:cpu:pfx:attr:reg:mem, +
+ $avx:AVX|AVX512VL:66:Vex128|EVex128|VexVVVV|VexW0|Disp8MemShift=4|SSE2AVX:RegXMM:Xmmword, +
+ $sse:SSE2:66::RegXMM:Xmmword, +
+ $mmx:MMX:::RegMMX:Qword>
+<MMXdq:opc:cpu:pfx:attr:reg:mem, +
+ d:0:AVX|AVX512VL:66:Vex128|EVex128|VexVVVV|VexW0|Disp8MemShift=4|SSE2AVX:RegXMM:Xmmword, +
+ d:0:SSE2:66::RegXMM:Xmmword, +
+ d:0:MMX:::RegMMX:Qword, +
+ q:1:AVX:66:Vex128|VexVVVV|VexW0|SSE2AVX:RegXMM:Xmmword, +
+ q:1:AVX512VL:66:EVex128|VexVVVV|VexW1|Disp8MemShift=4|SSE2AVX:RegXMM:Xmmword, +
+ q:1:SSE2:66::RegXMM:Xmmword, +
+ q:1:MMX:::RegMMX:Qword>
+<MMXBW:cpu:pfx:attr:reg:mem, +
+ $avx:AVX:66:Vex128|VexVVVV|VexW0|SSE2AVX:RegXMM:Xmmword, +
+ $apx:AVX512BW&AVX512VL:66:EVex128|VexVVVV|VexW0|Disp8MemShift=4|SSE2AVX:RegXMM:Xmmword, +
+ $sse:SSE2:66::RegXMM:Xmmword, +
+ $mmx:MMX:::RegMMX:Qword>
<sse2:cpu:attr:scal:vvvv, +
$avx:AVX:Vex128|VexW0|SSE2AVX:VexLIG|VexW0|SSE2AVX:VexVVVV, +
$sse:SSE2:::>
+<SSE2BW:cpu:attr:vvvv, +
+ $avx:AVX:Vex128|VexW0|SSE2AVX:VexVVVV, +
+ $apx:AVX512BW&AVX512VL:EVex128|VexW0|Disp8MemShift=4|SSE2AVX:VexVVVV, +
+ $sse:SSE2::>
+<SSE2D:cpu:attr:scal:vvvv, +
+ $avx:AVX|AVX512VL:Vex128|EVex128|VexW0|Disp8MemShift=4|SSE2AVX:VexLIG|EVexLIG|VexW0|Disp8MemShift=2|SSE2AVX:VexVVVV, +
+ $sse:SSE2:::>
+<SSE2Q:cpu:attr:scal:vvvv, +
+ $avx:AVX:Vex128|VexW0|SSE2AVX:VexLIG|VexW0|SSE2AVX:VexVVVV, +
+ $apx:AVX512VL:EVex128|VexW1|Disp8MemShift=4|SSE2AVX:EVexLIG|VexW1|Disp8MemShift=3|SSE2AVX:VexVVVV, +
+ $sse:SSE2:::>
<bw:opc:vexw:elem:kcpu:kpfx:cpubmi, +
b:0:VexW0:Byte:AVX512DQ:66:AVX512VBMI, +
@@ -1022,8 +1052,8 @@ emms, 0xf77, MMX, NoSuf, {}
// copying between Reg64/Mem64 and RegXMM/RegMMX, as is mandated by Intel's
// spec). AMD's spec, having been in existence for much longer, failed to
// recognize that and specified movd for 32- and 64-bit operations.
-movd, 0x666e, AVX, D|Modrm|Vex128|Space0F|VexW0|NoSuf|SSE2AVX, { Reg32|Unspecified|BaseIndex, RegXMM }
-movd, 0x666e, AVX&x64, D|Modrm|Vex=1|Space0F|VexW1|NoSuf|Size64|SSE2AVX, { Reg64|BaseIndex, RegXMM }
+movd, 0x666e, AVX|AVX512F, D|Modrm|Vex128|EVex128|Space0F|VexW0|Disp8MemShift=2|NoSuf|SSE2AVX, { Reg32|Unspecified|BaseIndex, RegXMM }
+movd, 0x666e, x64&(AVX|AVX512F), D|Modrm|Vex128|EVex128|Space0F|VexW1|Disp8MemShift=3|NoSuf|Size64|SSE2AVX, { Reg64|BaseIndex, RegXMM }
movd, 0x660f6e, SSE2, D|Modrm|IgnoreSize|NoSuf, { Reg32|Unspecified|BaseIndex, RegXMM }
movd, 0x660f6e, SSE2&x64, D|Modrm|NoSuf|Size64, { Reg64|BaseIndex, RegXMM }
// The MMX templates have to remain after at least the SSE2AVX ones.
@@ -1031,247 +1061,280 @@ movd, 0xf6e, MMX, D|Modrm|IgnoreSize|NoS
movd, 0xf6e, MMX&x64, D|Modrm|NoSuf|Size64, { Reg64|BaseIndex, RegMMX }
movq, 0xf37e, AVX, Load|Modrm|Vex128|Space0F|VexW0|NoSuf|SSE2AVX, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
movq, 0x66d6, AVX, Modrm|Vex128|Space0F|VexW0|NoSuf|SSE2AVX, { RegXMM, Qword|Unspecified|BaseIndex|RegXMM }
-movq, 0x666e, AVX&x64, D|Modrm|Vex=1|Space0F|VexW1|NoSuf|Size64|SSE2AVX, { Reg64|Unspecified|BaseIndex, RegXMM }
+movq, 0xf37e, AVX512F, Load|Modrm|EVex128|Space0F|VexW1|Disp8MemShift=3|NoSuf|SSE2AVX, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
+movq, 0x66d6, AVX512F, Modrm|EVex128|Space0F|VexW1|Disp8MemShift=3|NoSuf|SSE2AVX, { RegXMM, Qword|Unspecified|BaseIndex|RegXMM }
+movq, 0x666e, x64&(AVX|AVX512F), D|Modrm|Vex128|EVex128|Space0F|VexW1|Disp8MemShift=3|NoSuf|Size64|SSE2AVX, { Reg64|Unspecified|BaseIndex, RegXMM }
movq, 0xf30f7e, SSE2, Load|Modrm|NoSuf, { Unspecified|Qword|BaseIndex|RegXMM, RegXMM }
movq, 0x660fd6, SSE2, Modrm|NoSuf, { RegXMM, Unspecified|Qword|BaseIndex|RegXMM }
movq, 0x660f6e, SSE2&x64, D|Modrm|NoSuf|Size64, { Reg64|Unspecified|BaseIndex, RegXMM }
// The MMX templates have to remain after at least the SSE2AVX ones.
movq, 0xf6f, MMX, D|Modrm|NoSuf, { Unspecified|Qword|BaseIndex|RegMMX, RegMMX }
movq, 0xf6e, MMX&x64, D|Modrm|NoSuf|Size64, { Reg64|Unspecified|BaseIndex, RegMMX }
-packssdw<mmx>, 0x<mmx:pfx>0f6b, <mmx:cpu>, Modrm|<mmx:attr>|NoSuf, { <mmx:reg>|<mmx:mem>|Unspecified|BaseIndex, <mmx:reg> }
-packsswb<mmx>, 0x<mmx:pfx>0f63, <mmx:cpu>, Modrm|<mmx:attr>|NoSuf, { <mmx:reg>|<mmx:mem>|Unspecified|BaseIndex, <mmx:reg> }
-packuswb<mmx>, 0x<mmx:pfx>0f67, <mmx:cpu>, Modrm|<mmx:attr>|NoSuf, { <mmx:reg>|<mmx:mem>|Unspecified|BaseIndex, <mmx:reg> }
-padd<bw><mmx>, 0x<mmx:pfx>0ffc | <bw:opc>, <mmx:cpu>, Modrm|<mmx:attr>|C|NoSuf, { <mmx:reg>|<mmx:mem>|Unspecified|BaseIndex, <mmx:reg> }
-paddd<mmx>, 0x<mmx:pfx>0ffe, <mmx:cpu>, Modrm|<mmx:attr>|C|NoSuf, { <mmx:reg>|<mmx:mem>|Unspecified|BaseIndex, <mmx:reg> }
-paddq<sse2>, 0x660fd4, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|C|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+packssdw<MMXBW>, 0x<MMXBW:pfx>0f6b, <MMXBW:cpu>, Modrm|<MMXBW:attr>|NoSuf, { <MMXBW:reg>|<MMXBW:mem>|Unspecified|BaseIndex, <MMXBW:reg> }
+packsswb<MMXBW>, 0x<MMXBW:pfx>0f63, <MMXBW:cpu>, Modrm|<MMXBW:attr>|NoSuf, { <MMXBW:reg>|<MMXBW:mem>|Unspecified|BaseIndex, <MMXBW:reg> }
+packuswb<MMXBW>, 0x<MMXBW:pfx>0f67, <MMXBW:cpu>, Modrm|<MMXBW:attr>|NoSuf, { <MMXBW:reg>|<MMXBW:mem>|Unspecified|BaseIndex, <MMXBW:reg> }
+padd<bw><MMXBW>, 0x<MMXBW:pfx>0ffc | <bw:opc>, <MMXBW:cpu>, Modrm|<MMXBW:attr>|C|NoSuf, { <MMXBW:reg>|<MMXBW:mem>|Unspecified|BaseIndex, <MMXBW:reg> }
+paddd<MMX>, 0x<MMX:pfx>0ffe, <MMX:cpu>, Modrm|<MMX:attr>|C|NoSuf, { <MMX:reg>|<MMX:mem>|Unspecified|BaseIndex, <MMX:reg> }
+paddq<SSE2Q>, 0x660fd4, <SSE2Q:cpu>, Modrm|<SSE2Q:attr>|<SSE2Q:vvvv>|C|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
paddq, 0xfd4, SSE2, Modrm|NoSuf, { Qword|Unspecified|BaseIndex|RegMMX, RegMMX }
-padds<bw><mmx>, 0x<mmx:pfx>0fec | <bw:opc>, <mmx:cpu>, Modrm|<mmx:attr>|C|NoSuf, { <mmx:reg>|<mmx:mem>|Unspecified|BaseIndex, <mmx:reg> }
-paddus<bw><mmx>, 0x<mmx:pfx>0fdc | <bw:opc>, <mmx:cpu>, Modrm|<mmx:attr>|C|NoSuf, { <mmx:reg>|<mmx:mem>|Unspecified|BaseIndex, <mmx:reg> }
-pand<mmx>, 0x<mmx:pfx>0fdb, <mmx:cpu>, Modrm|<mmx:attr>|C|NoSuf, { <mmx:reg>|<mmx:mem>|Unspecified|BaseIndex, <mmx:reg> }
-pandn<mmx>, 0x<mmx:pfx>0fdf, <mmx:cpu>, Modrm|<mmx:attr>|NoSuf, { <mmx:reg>|<mmx:mem>|Unspecified|BaseIndex, <mmx:reg> }
+padds<bw><MMXBW>, 0x<MMXBW:pfx>0fec | <bw:opc>, <MMXBW:cpu>, Modrm|<MMXBW:attr>|C|NoSuf, { <MMXBW:reg>|<MMXBW:mem>|Unspecified|BaseIndex, <MMXBW:reg> }
+paddus<bw><MMXBW>, 0x<MMXBW:pfx>0fdc | <bw:opc>, <MMXBW:cpu>, Modrm|<MMXBW:attr>|C|NoSuf, { <MMXBW:reg>|<MMXBW:mem>|Unspecified|BaseIndex, <MMXBW:reg> }
+pand<MMX>, 0x<MMX:pfx>0fdb, <MMX:cpu>, Modrm|<MMX:attr>|C|NoSuf, { <MMX:reg>|<MMX:mem>|Unspecified|BaseIndex, <MMX:reg> }
+pandn<MMX>, 0x<MMX:pfx>0fdf, <MMX:cpu>, Modrm|<MMX:attr>|NoSuf, { <MMX:reg>|<MMX:mem>|Unspecified|BaseIndex, <MMX:reg> }
pcmpeq<bw><mmx>, 0x<mmx:pfx>0f74 | <bw:opc>, <mmx:cpu>, Modrm|<mmx:attr>|C|NoSuf, { <mmx:reg>|<mmx:mem>|Unspecified|BaseIndex, <mmx:reg> }
pcmpeqd<mmx>, 0x<mmx:pfx>0f76, <mmx:cpu>, Modrm|<mmx:attr>|C|NoSuf, { <mmx:reg>|<mmx:mem>|Unspecified|BaseIndex, <mmx:reg> }
pcmpgt<bw><mmx>, 0x<mmx:pfx>0f64 | <bw:opc>, <mmx:cpu>, Modrm|<mmx:attr>|NoSuf|Optimize, { <mmx:reg>|<mmx:mem>|Unspecified|BaseIndex, <mmx:reg> }
pcmpgtd<mmx>, 0x<mmx:pfx>0f66, <mmx:cpu>, Modrm|<mmx:attr>|NoSuf|Optimize, { <mmx:reg>|<mmx:mem>|Unspecified|BaseIndex, <mmx:reg> }
-pmaddwd<mmx>, 0x<mmx:pfx>0ff5, <mmx:cpu>, Modrm|<mmx:attr>|C|NoSuf, { <mmx:reg>|<mmx:mem>|Unspecified|BaseIndex, <mmx:reg> }
-pmulhw<mmx>, 0x<mmx:pfx>0fe5, <mmx:cpu>, Modrm|<mmx:attr>|C|NoSuf, { <mmx:reg>|<mmx:mem>|Unspecified|BaseIndex, <mmx:reg> }
-pmullw<mmx>, 0x<mmx:pfx>0fd5, <mmx:cpu>, Modrm|<mmx:attr>|C|NoSuf, { <mmx:reg>|<mmx:mem>|Unspecified|BaseIndex, <mmx:reg> }
-por<mmx>, 0x<mmx:pfx>0feb, <mmx:cpu>, Modrm|<mmx:attr>|C|NoSuf, { <mmx:reg>|<mmx:mem>|Unspecified|BaseIndex, <mmx:reg> }
-psllw<mmx>, 0x<mmx:pfx>0ff1, <mmx:cpu>, Modrm|<mmx:attr>|NoSuf, { <mmx:reg>|<mmx:mem>|Unspecified|BaseIndex, <mmx:reg> }
-psllw<mmx>, 0x<mmx:pfx>0f71/6, <mmx:cpu>, Modrm|<mmx:attr>|NoSuf, { Imm8, <mmx:reg> }
-psll<dq><mmx>, 0x<mmx:pfx>0ff2 | <dq:opc>, <mmx:cpu>, Modrm|<mmx:attr>|NoSuf, { <mmx:reg>|<mmx:mem>|Unspecified|BaseIndex, <mmx:reg> }
-psll<dq><mmx>, 0x<mmx:pfx>0f72 | <dq:opc>/6, <mmx:cpu>, Modrm|<mmx:attr>|NoSuf, { Imm8, <mmx:reg> }
-psraw<mmx>, 0x<mmx:pfx>0fe1, <mmx:cpu>, Modrm|<mmx:attr>|NoSuf, { <mmx:reg>|<mmx:mem>|Unspecified|BaseIndex, <mmx:reg> }
-psraw<mmx>, 0x<mmx:pfx>0f71/4, <mmx:cpu>, Modrm|<mmx:attr>|NoSuf, { Imm8, <mmx:reg> }
-psrad<mmx>, 0x<mmx:pfx>0fe2, <mmx:cpu>, Modrm|<mmx:attr>|NoSuf, { <mmx:reg>|<mmx:mem>|Unspecified|BaseIndex, <mmx:reg> }
-psrad<mmx>, 0x<mmx:pfx>0f72/4, <mmx:cpu>, Modrm|<mmx:attr>|NoSuf, { Imm8, <mmx:reg> }
-psrlw<mmx>, 0x<mmx:pfx>0fd1, <mmx:cpu>, Modrm|<mmx:attr>|NoSuf, { <mmx:reg>|<mmx:mem>|Unspecified|BaseIndex, <mmx:reg> }
-psrlw<mmx>, 0x<mmx:pfx>0f71/2, <mmx:cpu>, Modrm|<mmx:attr>|NoSuf, { Imm8, <mmx:reg> }
-psrl<dq><mmx>, 0x<mmx:pfx>0fd2 | <dq:opc>, <mmx:cpu>, Modrm|<mmx:attr>|NoSuf, { <mmx:reg>|<mmx:mem>|Unspecified|BaseIndex, <mmx:reg> }
-psrl<dq><mmx>, 0x<mmx:pfx>0f72 | <dq:opc>/2, <mmx:cpu>, Modrm|<mmx:attr>|NoSuf, { Imm8, <mmx:reg> }
-psub<bw><mmx>, 0x<mmx:pfx>0ff8 | <bw:opc>, <mmx:cpu>, Modrm|<mmx:attr>|NoSuf, { <mmx:reg>|<mmx:mem>|Unspecified|BaseIndex, <mmx:reg> }
-psubd<mmx>, 0x<mmx:pfx>0ffa, <mmx:cpu>, Modrm|<mmx:attr>|NoSuf, { <mmx:reg>|<mmx:mem>|Unspecified|BaseIndex, <mmx:reg> }
-psubq<sse2>, 0x660ffb, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+pmaddwd<MMXBW>, 0x<MMXBW:pfx>0ff5, <MMXBW:cpu>, Modrm|<MMXBW:attr>|C|NoSuf, { <MMXBW:reg>|<MMXBW:mem>|Unspecified|BaseIndex, <MMXBW:reg> }
+pmulhw<MMXBW>, 0x<MMXBW:pfx>0fe5, <MMXBW:cpu>, Modrm|<MMXBW:attr>|C|NoSuf, { <MMXBW:reg>|<MMXBW:mem>|Unspecified|BaseIndex, <MMXBW:reg> }
+pmullw<MMXBW>, 0x<MMXBW:pfx>0fd5, <MMXBW:cpu>, Modrm|<MMXBW:attr>|C|NoSuf, { <MMXBW:reg>|<MMXBW:mem>|Unspecified|BaseIndex, <MMXBW:reg> }
+por<MMX>, 0x<MMX:pfx>0feb, <MMX:cpu>, Modrm|<MMX:attr>|C|NoSuf, { <MMX:reg>|<MMX:mem>|Unspecified|BaseIndex, <MMX:reg> }
+psllw<MMXBW>, 0x<MMXBW:pfx>0ff1, <MMXBW:cpu>, Modrm|<MMXBW:attr>|NoSuf, { <MMXBW:reg>|<MMXBW:mem>|Unspecified|BaseIndex, <MMXBW:reg> }
+psllw<MMXBW>, 0x<MMXBW:pfx>0f71/6, <MMXBW:cpu>, Modrm|<MMXBW:attr>|NoSuf, { Imm8, <MMXBW:reg> }
+psll<MMXdq>, 0x<MMXdq:pfx>0ff2 | <MMXdq:opc>, <MMXdq:cpu>, Modrm|<MMXdq:attr>|NoSuf, { <MMXdq:reg>|<MMXdq:mem>|Unspecified|BaseIndex, <MMXdq:reg> }
+psll<MMXdq>, 0x<MMXdq:pfx>0f72 | <MMXdq:opc>/6, <MMXdq:cpu>, Modrm|<MMXdq:attr>|NoSuf, { Imm8, <MMXdq:reg> }
+psraw<MMXBW>, 0x<MMXBW:pfx>0fe1, <MMXBW:cpu>, Modrm|<MMXBW:attr>|NoSuf, { <MMXBW:reg>|<MMXBW:mem>|Unspecified|BaseIndex, <MMXBW:reg> }
+psraw<MMXBW>, 0x<MMXBW:pfx>0f71/4, <MMXBW:cpu>, Modrm|<MMXBW:attr>|NoSuf, { Imm8, <MMXBW:reg> }
+psrad<MMX>, 0x<MMX:pfx>0fe2, <MMX:cpu>, Modrm|<MMX:attr>|NoSuf, { <MMX:reg>|<MMX:mem>|Unspecified|BaseIndex, <MMX:reg> }
+psrad<MMX>, 0x<MMX:pfx>0f72/4, <MMX:cpu>, Modrm|<MMX:attr>|NoSuf, { Imm8, <MMX:reg> }
+psrlw<MMXBW>, 0x<MMXBW:pfx>0fd1, <MMXBW:cpu>, Modrm|<MMXBW:attr>|NoSuf, { <MMXBW:reg>|<MMXBW:mem>|Unspecified|BaseIndex, <MMXBW:reg> }
+psrlw<MMXBW>, 0x<MMXBW:pfx>0f71/2, <MMXBW:cpu>, Modrm|<MMXBW:attr>|NoSuf, { Imm8, <MMXBW:reg> }
+psrl<MMXdq>, 0x<MMXdq:pfx>0fd2 | <MMXdq:opc>, <MMXdq:cpu>, Modrm|<MMXdq:attr>|NoSuf, { <MMXdq:reg>|<MMXdq:mem>|Unspecified|BaseIndex, <MMXdq:reg> }
+psrl<MMXdq>, 0x<MMXdq:pfx>0f72 | <MMXdq:opc>/2, <MMXdq:cpu>, Modrm|<MMXdq:attr>|NoSuf, { Imm8, <MMXdq:reg> }
+psub<bw><MMXBW>, 0x<MMXBW:pfx>0ff8 | <bw:opc>, <MMXBW:cpu>, Modrm|<MMXBW:attr>|NoSuf, { <MMXBW:reg>|<MMXBW:mem>|Unspecified|BaseIndex, <MMXBW:reg> }
+psubd<MMX>, 0x<MMX:pfx>0ffa, <MMX:cpu>, Modrm|<MMX:attr>|NoSuf, { <MMX:reg>|<MMX:mem>|Unspecified|BaseIndex, <MMX:reg> }
+psubq<SSE2Q>, 0x660ffb, <SSE2Q:cpu>, Modrm|<SSE2Q:attr>|<SSE2Q:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
psubq, 0xffb, SSE2, Modrm|NoSuf, { Qword|Unspecified|BaseIndex|RegMMX, RegMMX }
-psubs<bw><mmx>, 0x<mmx:pfx>0fe8 | <bw:opc>, <mmx:cpu>, Modrm|<mmx:attr>|NoSuf, { <mmx:reg>|<mmx:mem>|Unspecified|BaseIndex, <mmx:reg> }
-psubus<bw><mmx>, 0x<mmx:pfx>0fd8 | <bw:opc>, <mmx:cpu>, Modrm|<mmx:attr>|NoSuf, { <mmx:reg>|<mmx:mem>|Unspecified|BaseIndex, <mmx:reg> }
-punpckhbw<mmx>, 0x<mmx:pfx>0f68, <mmx:cpu>, Modrm|<mmx:attr>|NoSuf, { <mmx:reg>|<mmx:mem>|Unspecified|BaseIndex, <mmx:reg> }
-punpckhwd<mmx>, 0x<mmx:pfx>0f69, <mmx:cpu>, Modrm|<mmx:attr>|NoSuf, { <mmx:reg>|<mmx:mem>|Unspecified|BaseIndex, <mmx:reg> }
-punpckhdq<mmx>, 0x<mmx:pfx>0f6a, <mmx:cpu>, Modrm|<mmx:attr>|NoSuf, { <mmx:reg>|<mmx:mem>|Unspecified|BaseIndex, <mmx:reg> }
-punpcklbw<sse2>, 0x660f60, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+psubs<bw><MMXBW>, 0x<MMXBW:pfx>0fe8 | <bw:opc>, <MMXBW:cpu>, Modrm|<MMXBW:attr>|NoSuf, { <MMXBW:reg>|<MMXBW:mem>|Unspecified|BaseIndex, <MMXBW:reg> }
+psubus<bw><MMXBW>, 0x<MMXBW:pfx>0fd8 | <bw:opc>, <MMXBW:cpu>, Modrm|<MMXBW:attr>|NoSuf, { <MMXBW:reg>|<MMXBW:mem>|Unspecified|BaseIndex, <MMXBW:reg> }
+punpckhbw<MMXBW>, 0x<MMXBW:pfx>0f68, <MMXBW:cpu>, Modrm|<MMXBW:attr>|NoSuf, { <MMXBW:reg>|<MMXBW:mem>|Unspecified|BaseIndex, <MMXBW:reg> }
+punpckhwd<MMXBW>, 0x<MMXBW:pfx>0f69, <MMXBW:cpu>, Modrm|<MMXBW:attr>|NoSuf, { <MMXBW:reg>|<MMXBW:mem>|Unspecified|BaseIndex, <MMXBW:reg> }
+punpckhdq<MMX>, 0x<MMX:pfx>0f6a, <MMX:cpu>, Modrm|<MMX:attr>|NoSuf, { <MMX:reg>|<MMX:mem>|Unspecified|BaseIndex, <MMX:reg> }
+punpcklbw<SSE2BW>, 0x660f60, <SSE2BW:cpu>, Modrm|<SSE2BW:attr>|<SSE2BW:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
punpcklbw, 0xf60, MMX, Modrm|NoSuf, { Dword|Unspecified|BaseIndex|RegMMX, RegMMX }
-punpcklwd<sse2>, 0x660f61, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+punpcklwd<SSE2BW>, 0x660f61, <SSE2BW:cpu>, Modrm|<SSE2BW:attr>|<SSE2BW:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
punpcklwd, 0xf61, MMX, Modrm|NoSuf, { Dword|Unspecified|BaseIndex|RegMMX, RegMMX }
-punpckldq<sse2>, 0x660f62, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+punpckldq<SSE2D>, 0x660f62, <SSE2D:cpu>, Modrm|<SSE2D:attr>|<SSE2D:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
punpckldq, 0xf62, MMX, Modrm|NoSuf, { Dword|Unspecified|BaseIndex|RegMMX, RegMMX }
-pxor<mmx>, 0x<mmx:pfx>0fef, <mmx:cpu>, Modrm|<mmx:attr>|C|NoSuf, { <mmx:reg>|<mmx:mem>|Unspecified|BaseIndex, <mmx:reg> }
+pxor<MMX>, 0x<MMX:pfx>0fef, <MMX:cpu>, Modrm|<MMX:attr>|C|NoSuf, { <MMX:reg>|<MMX:mem>|Unspecified|BaseIndex, <MMX:reg> }
// SSE instructions.
<sse:cpu:attr:scal:vvvv, +
$avx:AVX:Vex128|VexW0|SSE2AVX:VexLIG|VexW0|SSE2AVX:VexVVVV, +
$sse:SSE:::>
+<SSE:cpu:attr:scal:vvvv, +
+ $avx:AVX|AVX512VL:Vex128|EVex128|VexW0|Disp8MemShift=4|SSE2AVX:VexLIG|EVexLIG|VexW0|Disp8MemShift=2|SSE2AVX:VexVVVV, +
+ $sse:SSE:::>
+<SSEDQ:cpu:attr, +
+ $avx:AVX:Vex128|VexW0|VexVVVV|SSE2AVX, +
+ $apx:AVX512DQ&AVX512VL:EVex128|VexW0|VexVVVV|Disp8MemShift=4|SSE2AVX, +
+ $sse:SSE:>
+<SSERP:recip:rsqrt:cpu:attr, +
+ $avx:0x0f53:0x0f52:AVX:Vex128|VexW0|SSE2AVX, +
+ $apx:0x660f384c:0x660f384e:AVX512VL:EVex128|VexW0|Disp8MemShift=4|SSE2AVX, +
+ $sse:0x0f53:0x0f52:SSE:::>
+<SSERS:recip:rsqrt:cpu:attr, +
+ $avx:0xf30f53:0xf30f52:AVX:VexLIG|VexW0|VexVVVV|SSE2AVX, +
+ $apx:0x660f384d:0x660f384f:AVX512F:EVexLIG|VexW0|VexVVVV|Disp8MemShift=2|SSE2AVX, +
+ $sse:0xf30f53:0xf30f52:SSE:::>
<frel:imm:comm, eq:0:C, lt:1:, le:2:, unord:3:C, neq:4:C, nlt:5:, nle:6:, ord:7:C>
-addps<sse>, 0x0f58, <sse:cpu>, Modrm|<sse:attr>|<sse:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-addss<sse>, 0xf30f58, <sse:cpu>, Modrm|<sse:scal>|<sse:vvvv>|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
-andnps<sse>, 0x0f55, <sse:cpu>, Modrm|<sse:attr>|<sse:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-andps<sse>, 0x0f54, <sse:cpu>, Modrm|<sse:attr>|<sse:vvvv>|C|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+addps<SSE>, 0x0f58, <SSE:cpu>, Modrm|<SSE:attr>|<SSE:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+addss<SSE>, 0xf30f58, <SSE:cpu>, Modrm|<SSE:scal>|<SSE:vvvv>|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
+andnps<SSEDQ>, 0x0f55, <SSEDQ:cpu>, Modrm|<SSEDQ:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+andps<SSEDQ>, 0x0f54, <SSEDQ:cpu>, Modrm|<SSEDQ:attr>|C|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
cmp<frel>ps<sse>, 0x0fc2/<frel:imm>, <sse:cpu>, Modrm|<sse:attr>|<sse:vvvv>|<frel:comm>|NoSuf|ImmExt, { RegXMM|Unspecified|BaseIndex, RegXMM }
cmp<frel>ss<sse>, 0xf30fc2/<frel:imm>, <sse:cpu>, Modrm|<sse:scal>|<sse:vvvv>|<frel:comm>|NoSuf|ImmExt, { RegXMM|Dword|Unspecified|BaseIndex, RegXMM }
cmpps<sse>, 0x0fc2, <sse:cpu>, Modrm|<sse:attr>|<sse:vvvv>|NoSuf, { Imm8, RegXMM|Unspecified|BaseIndex, RegXMM }
cmpss<sse>, 0xf30fc2, <sse:cpu>, Modrm|<sse:scal>|<sse:vvvv>|NoSuf, { Imm8, Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
-comiss<sse>, 0x0f2f, <sse:cpu>, Modrm|<sse:scal>|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
+comiss<SSE>, 0x0f2f, <SSE:cpu>, Modrm|<SSE:scal>|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
cvtpi2ps, 0xf2a, SSE, Modrm|NoSuf, { Qword|Unspecified|BaseIndex|RegMMX, RegXMM }
cvtps2pi, 0xf2d, SSE, Modrm|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegMMX }
cvtsi2ss<sse>, 0xf30f2a, <sse:cpu>&No64, Modrm|<sse:scal>|<sse:vvvv>|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|No_qSuf, { Reg32|Unspecified|BaseIndex, RegXMM }
-cvtsi2ss, 0xf32a, AVX&x64, Modrm|Vex=3|Space0F|VexVVVV|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|SSE2AVX|ATTSyntax, { Reg32|Reg64|Unspecified|BaseIndex, RegXMM }
-cvtsi2ss, 0xf32a, AVX&x64, Modrm|Vex=3|Space0F|VexVVVV|No_bSuf|No_wSuf|No_sSuf|SSE2AVX|IntelSyntax, { Reg32|Reg64|Unspecified|BaseIndex, RegXMM }
+cvtsi2ss, 0xf32a, x64&(AVX|AVX512F), Modrm|VexLIG|EVexLIG|Space0F|VexVVVV|IgnoreSize|Disp8ShiftVL|No_bSuf|No_wSuf|No_sSuf|SSE2AVX|ATTSyntax, { Reg32|Reg64|Unspecified|BaseIndex, RegXMM }
+cvtsi2ss, 0xf32a, x64&(AVX|AVX512F), Modrm|VexLIG|EVexLIG|Space0F|VexVVVV|Disp8ShiftVL|No_bSuf|No_wSuf|No_sSuf|SSE2AVX|IntelSyntax, { Reg32|Reg64|Unspecified|BaseIndex, RegXMM }
cvtsi2ss, 0xf30f2a, SSE&x64, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|ATTSyntax, { Reg32|Reg64|Unspecified|BaseIndex, RegXMM }
cvtsi2ss, 0xf30f2a, SSE&x64, Modrm|No_bSuf|No_wSuf|No_sSuf|IntelSyntax, { Reg32|Reg64|Unspecified|BaseIndex, RegXMM }
-cvtss2si, 0xf32d, AVX, Modrm|VexLIG|Space0F|No_bSuf|No_wSuf|No_sSuf|SSE2AVX, { Dword|Unspecified|BaseIndex|RegXMM, Reg32|Reg64 }
+cvtss2si, 0xf32d, AVX|AVX512F, Modrm|VexLIG|EVexLIG|Space0F|Disp8MemShift=2|No_bSuf|No_wSuf|No_sSuf|SSE2AVX, { Dword|Unspecified|BaseIndex|RegXMM, Reg32|Reg64 }
cvtss2si, 0xf30f2d, SSE, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_sSuf, { Dword|Unspecified|BaseIndex|RegXMM, Reg32|Reg64 }
cvttps2pi, 0xf2c, SSE, Modrm|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegMMX }
-cvttss2si, 0xf32c, AVX, Modrm|VexLIG|Space0F|No_bSuf|No_wSuf|No_sSuf|SSE2AVX, { Dword|Unspecified|BaseIndex|RegXMM, Reg32|Reg64 }
+cvttss2si, 0xf32c, AVX|AVX512F, Modrm|VexLIG|EVexLIG|Space0F|Disp8MemShift=2|No_bSuf|No_wSuf|No_sSuf|SSE2AVX, { Dword|Unspecified|BaseIndex|RegXMM, Reg32|Reg64 }
cvttss2si, 0xf30f2c, SSE, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_sSuf, { Dword|Unspecified|BaseIndex|RegXMM, Reg32|Reg64 }
-divps<sse>, 0x0f5e, <sse:cpu>, Modrm|<sse:attr>|<sse:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-divss<sse>, 0xf30f5e, <sse:cpu>, Modrm|<sse:scal>|<sse:vvvv>|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
+divps<SSE>, 0x0f5e, <SSE:cpu>, Modrm|<SSE:attr>|<SSE:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+divss<SSE>, 0xf30f5e, <SSE:cpu>, Modrm|<SSE:scal>|<SSE:vvvv>|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
ldmxcsr<sse>, 0x0fae/2, <sse:cpu>, Modrm|<sse:attr>|NoSuf, { Dword|Unspecified|BaseIndex }
maskmovq, 0xff7, SSE|3dnowA, Modrm|NoSuf, { RegMMX, RegMMX }
-maxps<sse>, 0x0f5f, <sse:cpu>, Modrm|<sse:attr>|<sse:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-maxss<sse>, 0xf30f5f, <sse:cpu>, Modrm|<sse:scal>|<sse:vvvv>|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
-minps<sse>, 0x0f5d, <sse:cpu>, Modrm|<sse:attr>|<sse:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-minss<sse>, 0xf30f5d, <sse:cpu>, Modrm|<sse:scal>|<sse:vvvv>|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
-movaps<sse>, 0x0f28, <sse:cpu>, D|Modrm|<sse:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+maxps<SSE>, 0x0f5f, <SSE:cpu>, Modrm|<SSE:attr>|<SSE:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+maxss<SSE>, 0xf30f5f, <SSE:cpu>, Modrm|<SSE:scal>|<SSE:vvvv>|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
+minps<SSE>, 0x0f5d, <SSE:cpu>, Modrm|<SSE:attr>|<SSE:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+minss<SSE>, 0xf30f5d, <SSE:cpu>, Modrm|<SSE:scal>|<SSE:vvvv>|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
+movaps<SSE>, 0x0f28, <SSE:cpu>, D|Modrm|<SSE:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
movhlps<sse>, 0x0f12, <sse:cpu>, Modrm|<sse:attr>|<sse:vvvv>|NoSuf, { RegXMM, RegXMM }
-movhps, 0x16, AVX, Modrm|Vex|Space0F|VexVVVV|VexW0|NoSuf|SSE2AVX, { Qword|Unspecified|BaseIndex, RegXMM }
-movhps, 0x17, AVX, Modrm|Vex|Space0F|VexW0|NoSuf|SSE2AVX, { RegXMM, Qword|Unspecified|BaseIndex }
+movhps, 0x16, AVX|AVX512F, Modrm|Vex128|EVex128|Space0F|VexVVVV|VexW0|Disp8MemShift=3|NoSuf|SSE2AVX, { Qword|Unspecified|BaseIndex, RegXMM }
+movhps, 0x17, AVX|AVX512F, Modrm|Vex128|EVex128|Space0F|VexW0|Disp8MemShift=3|NoSuf|SSE2AVX, { RegXMM, Qword|Unspecified|BaseIndex }
movhps, 0xf16, SSE, D|Modrm|NoSuf, { Qword|Unspecified|BaseIndex, RegXMM }
movlhps<sse>, 0x0f16, <sse:cpu>, Modrm|<sse:attr>|<sse:vvvv>|NoSuf, { RegXMM, RegXMM }
-movlps, 0x12, AVX, Modrm|Vex|Space0F|VexVVVV|VexW0|NoSuf|SSE2AVX, { Qword|Unspecified|BaseIndex, RegXMM }
-movlps, 0x13, AVX, Modrm|Vex|Space0F|VexW0|NoSuf|SSE2AVX, { RegXMM, Qword|Unspecified|BaseIndex }
+movlps, 0x12, AVX|AVX512F, Modrm|Vex128|EVex128|Space0F|VexVVVV|VexW0|Disp8MemShift=3|NoSuf|SSE2AVX, { Qword|Unspecified|BaseIndex, RegXMM }
+movlps, 0x13, AVX|AVX512F, Modrm|Vex128|EVex128|Space0F|VexW0|Disp8MemShift=3|NoSuf|SSE2AVX, { RegXMM, Qword|Unspecified|BaseIndex }
movlps, 0xf12, SSE, D|Modrm|NoSuf, { Qword|Unspecified|BaseIndex, RegXMM }
movmskps<sse>, 0x0f50, <sse:cpu>, Modrm|<sse:attr>|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|NoRex64, { RegXMM, Reg32|Reg64 }
-movntps<sse>, 0x0f2b, <sse:cpu>, Modrm|<sse:attr>|NoSuf, { RegXMM, Xmmword|Unspecified|BaseIndex }
+movntps<SSE>, 0x0f2b, <SSE:cpu>, Modrm|<SSE:attr>|NoSuf, { RegXMM, Xmmword|Unspecified|BaseIndex }
movntq, 0xfe7, SSE|3dnowA, Modrm|NoSuf, { RegMMX, Qword|Unspecified|BaseIndex }
-movntdq<sse2>, 0x660fe7, <sse2:cpu>, Modrm|<sse2:attr>|NoSuf, { RegXMM, Xmmword|Unspecified|BaseIndex }
-movss, 0xf310, AVX, D|Modrm|VexLIG|Space0F|VexW0|NoSuf|SSE2AVX, { Dword|Unspecified|BaseIndex, RegXMM }
+movntdq<SSE2D>, 0x660fe7, <SSE2D:cpu>, Modrm|<SSE2D:attr>|NoSuf, { RegXMM, Xmmword|Unspecified|BaseIndex }
+movss, 0xf310, AVX|AVX512F, D|Modrm|VexLIG|EVexLIG|Space0F|VexW0|Disp8MemShift=2|NoSuf|SSE2AVX, { Dword|Unspecified|BaseIndex, RegXMM }
movss, 0xf310, AVX, D|Modrm|VexLIG|Space0F|VexVVVV|VexW0|NoSuf|SSE2AVX, { RegXMM, RegXMM }
movss, 0xf30f10, SSE, D|Modrm|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
-movups<sse>, 0x0f10, <sse:cpu>, D|Modrm|<sse:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-mulps<sse>, 0x0f59, <sse:cpu>, Modrm|<sse:attr>|<sse:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-mulss<sse>, 0xf30f59, <sse:cpu>, Modrm|<sse:scal>|<sse:vvvv>|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
-orps<sse>, 0x0f56, <sse:cpu>, Modrm|<sse:attr>|<sse:vvvv>|C|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+movups<SSE>, 0x0f10, <SSE:cpu>, D|Modrm|<SSE:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+mulps<SSE>, 0x0f59, <SSE:cpu>, Modrm|<SSE:attr>|<SSE:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+mulss<SSE>, 0xf30f59, <SSE:cpu>, Modrm|<SSE:scal>|<SSE:vvvv>|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
+orps<SSEDQ>, 0x0f56, <SSEDQ:cpu>, Modrm|<SSEDQ:attr>|C|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
pavg<bw>, 0xfe0 | (3 * <bw:opc>), SSE|3dnowA, Modrm|NoSuf, { Qword|Unspecified|BaseIndex|RegMMX, RegMMX }
-pavg<bw><sse2>, 0x660fe0 | (3 * <bw:opc>), <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|C|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-pextrw<sse2>, 0x660fc5, <sse2:cpu>, Load|Modrm|<sse2:attr>|No_bSuf|No_wSuf|No_sSuf|IgnoreSize|NoRex64, { Imm8, RegXMM, Reg32|Reg64 }
+pavg<bw><SSE2BW>, 0x660fe0 | (3 * <bw:opc>), <SSE2BW:cpu>, Modrm|<SSE2BW:attr>|<SSE2BW:vvvv>|C|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+pextrw<SSE2D>, 0x660fc5, <SSE2D:cpu>, Load|Modrm|<SSE2D:attr>|No_bSuf|No_wSuf|No_sSuf|IgnoreSize|NoRex64, { Imm8, RegXMM, Reg32|Reg64 }
pextrw, 0xfc5, SSE|3dnowA, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|NoRex64, { Imm8, RegMMX, Reg32|Reg64 }
-pinsrw<sse2>, 0x660fc4, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|No_bSuf|No_wSuf|No_sSuf|IgnoreSize|NoRex64, { Imm8, Reg32|Reg64, RegXMM }
-pinsrw<sse2>, 0x660fc4, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|NoSuf, { Imm8, Word|Unspecified|BaseIndex, RegXMM }
+pinsrw<SSE2D>, 0x660fc4, <SSE2D:cpu>, Modrm|<SSE2D:attr>|<SSE2D:vvvv>|No_bSuf|No_wSuf|No_sSuf|IgnoreSize|NoRex64, { Imm8, Reg32|Reg64, RegXMM }
+pinsrw<SSE2D>, 0x660fc4, <SSE2D:cpu>, Modrm|<SSE2D:attr>|<SSE2D:vvvv>|Disp8MemShift|NoSuf, { Imm8, Word|Unspecified|BaseIndex, RegXMM }
pinsrw, 0xfc4, SSE|3dnowA, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|NoRex64, { Imm8, Reg32|Reg64, RegMMX }
pinsrw, 0xfc4, SSE|3dnowA, Modrm|NoSuf, { Imm8, Word|Unspecified|BaseIndex, RegMMX }
-pmaxsw<sse2>, 0x660fee, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|C|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+pmaxsw<SSE2BW>, 0x660fee, <SSE2BW:cpu>, Modrm|<SSE2BW:attr>|<SSE2BW:vvvv>|C|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
pmaxsw, 0xfee, SSE|3dnowA, Modrm|NoSuf, { Qword|Unspecified|BaseIndex|RegMMX, RegMMX }
-pmaxub<sse2>, 0x660fde, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|C|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+pmaxub<SSE2BW>, 0x660fde, <SSE2BW:cpu>, Modrm|<SSE2BW:attr>|<SSE2BW:vvvv>|C|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
pmaxub, 0xfde, SSE|3dnowA, Modrm|NoSuf, { Qword|Unspecified|BaseIndex|RegMMX, RegMMX }
-pminsw<sse2>, 0x660fea, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|C|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+pminsw<SSE2BW>, 0x660fea, <SSE2BW:cpu>, Modrm|<SSE2BW:attr>|<SSE2BW:vvvv>|C|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
pminsw, 0xfea, SSE|3dnowA, Modrm|NoSuf, { Qword|Unspecified|BaseIndex|RegMMX, RegMMX }
-pminub<sse2>, 0x660fda, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|C|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+pminub<SSE2BW>, 0x660fda, <SSE2BW:cpu>, Modrm|<SSE2BW:attr>|<SSE2BW:vvvv>|C|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
pminub, 0xfda, SSE|3dnowA, Modrm|NoSuf, { Qword|Unspecified|BaseIndex|RegMMX, RegMMX }
pmovmskb<sse2>, 0x660fd7, <sse2:cpu>, Modrm|<sse2:attr>|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|NoRex64, { RegXMM, Reg32|Reg64 }
pmovmskb, 0xfd7, SSE|3dnowA, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|NoRex64, { RegMMX, Reg32|Reg64 }
-pmulhuw<sse2>, 0x660fe4, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|C|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+pmulhuw<SSE2BW>, 0x660fe4, <SSE2BW:cpu>, Modrm|<SSE2BW:attr>|<SSE2BW:vvvv>|C|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
pmulhuw, 0xfe4, SSE|3dnowA, Modrm|NoSuf, { Qword|Unspecified|BaseIndex|RegMMX, RegMMX }
prefetchnta, 0xf18/0, SSE|3dnowA, Modrm|Anysize|IgnoreSize|NoSuf, { BaseIndex }
prefetcht0, 0xf18/1, SSE|3dnowA, Modrm|Anysize|IgnoreSize|NoSuf, { BaseIndex }
prefetcht1, 0xf18/2, SSE|3dnowA, Modrm|Anysize|IgnoreSize|NoSuf, { BaseIndex }
prefetcht2, 0xf18/3, SSE|3dnowA, Modrm|Anysize|IgnoreSize|NoSuf, { BaseIndex }
psadbw, 0xff6, SSE|3dnowA, Modrm|NoSuf, { Qword|Unspecified|BaseIndex|RegMMX, RegMMX }
-psadbw<sse2>, 0x660ff6, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|C|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+psadbw<SSE2BW>, 0x660ff6, <SSE2BW:cpu>, Modrm|<SSE2BW:attr>|<SSE2BW:vvvv>|C|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
pshufw, 0xf70, SSE|3dnowA, Modrm|NoSuf, { Imm8|Imm8S, Qword|Unspecified|BaseIndex|RegMMX, RegMMX }
-rcpps<sse>, 0x0f53, <sse:cpu>, Modrm|<sse:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-rcpss<sse>, 0xf30f53, <sse:cpu>, Modrm|<sse:scal>|<sse:vvvv>|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
-rsqrtps<sse>, 0x0f52, <sse:cpu>, Modrm|<sse:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-rsqrtss<sse>, 0xf30f52, <sse:cpu>, Modrm|<sse:scal>|<sse:vvvv>|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
+rcpps<SSERP>, <SSERP:recip>, <SSERP:cpu>, Modrm|<SSERP:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+rcpss<SSERS>, <SSERS:recip>, <SSERS:cpu>, Modrm|<SSERS:attr>|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
+rsqrtps<SSERP>, <SSERP:rsqrt>, <SSERP:cpu>, Modrm|<SSERP:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+rsqrtss<SSERS>, <SSERS:rsqrt>, <SSERS:cpu>, Modrm|<SSERS:attr>|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
sfence, 0xfaef8, SSE|3dnowA, NoSuf, {}
-shufps<sse>, 0x0fc6, <sse:cpu>, Modrm|<sse:attr>|<sse:vvvv>|NoSuf, { Imm8|Imm8S, RegXMM|Unspecified|BaseIndex, RegXMM }
-sqrtps<sse>, 0x0f51, <sse:cpu>, Modrm|<sse:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-sqrtss<sse>, 0xf30f51, <sse:cpu>, Modrm|<sse:scal>|<sse:vvvv>|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
+shufps<SSE>, 0x0fc6, <SSE:cpu>, Modrm|<SSE:attr>|<SSE:vvvv>|NoSuf, { Imm8|Imm8S, RegXMM|Unspecified|BaseIndex, RegXMM }
+sqrtps<SSE>, 0x0f51, <SSE:cpu>, Modrm|<SSE:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+sqrtss<SSE>, 0xf30f51, <SSE:cpu>, Modrm|<SSE:scal>|<SSE:vvvv>|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
stmxcsr<sse>, 0x0fae/3, <sse:cpu>, Modrm|<sse:attr>|NoSuf, { Dword|Unspecified|BaseIndex }
-subps<sse>, 0x0f5c, <sse:cpu>, Modrm|<sse:attr>|<sse:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-subss<sse>, 0xf30f5c, <sse:cpu>, Modrm|<sse:scal>|<sse:vvvv>|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
-ucomiss<sse>, 0x0f2e, <sse:cpu>, Modrm|<sse:scal>|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
-unpckhps<sse>, 0x0f15, <sse:cpu>, Modrm|<sse:attr>|<sse:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-unpcklps<sse>, 0x0f14, <sse:cpu>, Modrm|<sse:attr>|<sse:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-xorps<sse>, 0x0f57, <sse:cpu>, Modrm|<sse:attr>|<sse:vvvv>|C|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+subps<SSE>, 0x0f5c, <SSE:cpu>, Modrm|<SSE:attr>|<SSE:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+subss<SSE>, 0xf30f5c, <SSE:cpu>, Modrm|<SSE:scal>|<SSE:vvvv>|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
+ucomiss<SSE>, 0x0f2e, <SSE:cpu>, Modrm|<SSE:scal>|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
+unpckhps<SSE>, 0x0f15, <SSE:cpu>, Modrm|<SSE:attr>|<SSE:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+unpcklps<SSE>, 0x0f14, <SSE:cpu>, Modrm|<SSE:attr>|<SSE:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+xorps<SSEDQ>, 0x0f57, <SSEDQ:cpu>, Modrm|<SSEDQ:attr>|C|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
// SSE2 instructions.
-addpd<sse2>, 0x660f58, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-addsd<sse2>, 0xf20f58, <sse2:cpu>, Modrm|<sse2:scal>|<sse2:vvvv>|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
-andnpd<sse2>, 0x660f55, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-andpd<sse2>, 0x660f54, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|C|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+<SSE2DQ:cpu:attr, +
+ $avx:AVX:Vex128|VexW0|VexVVVV|SSE2AVX, +
+ $apx:AVX512DQ&AVX512VL:EVex128|VexW1|VexVVVV|Disp8MemShift=4|SSE2AVX, +
+ $sse:SSE2:>
+
+addpd<SSE2Q>, 0x660f58, <SSE2Q:cpu>, Modrm|<SSE2Q:attr>|<SSE2Q:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+addsd<SSE2Q>, 0xf20f58, <SSE2Q:cpu>, Modrm|<SSE2Q:scal>|<SSE2Q:vvvv>|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
+andnpd<SSE2DQ>, 0x660f55, <SSE2DQ:cpu>, Modrm|<SSE2DQ:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+andpd<SSE2DQ>, 0x660f54, <SSE2DQ:cpu>, Modrm|<SSE2DQ:attr>|C|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
cmp<frel>pd<sse2>, 0x660fc2/<frel:imm>, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|<frel:comm>|NoSuf|ImmExt, { RegXMM|Unspecified|BaseIndex, RegXMM }
cmp<frel>sd<sse2>, 0xf20fc2/<frel:imm>, <sse2:cpu>, Modrm|<sse2:scal>|<sse2:vvvv>|<frel:comm>|NoSuf|ImmExt, { RegXMM|Qword|Unspecified|BaseIndex, RegXMM }
cmppd<sse2>, 0x660fc2, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|NoSuf, { Imm8, RegXMM|Unspecified|BaseIndex, RegXMM }
cmpsd<sse2>, 0xf20fc2, <sse2:cpu>, Modrm|<sse2:scal>|<sse2:vvvv>|NoSuf, { Imm8, Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
-comisd<sse2>, 0x660f2f, <sse2:cpu>, Modrm|<sse2:scal>|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
+comisd<SSE2Q>, 0x660f2f, <SSE2Q:cpu>, Modrm|<SSE2Q:scal>|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
cvtpi2pd, 0x660f2a, SSE2, Modrm|NoSuf, { RegMMX, RegXMM }
-cvtpi2pd, 0xf3e6, AVX, Modrm|Vex|Space0F|VexW0|NoSuf|SSE2AVX, { Qword|Unspecified|BaseIndex, RegXMM }
+cvtpi2pd, 0xf3e6, AVX|AVX512VL, Modrm|Vex128|EVex128|Space0F|VexW0|Disp8MemShift=3|NoSuf|SSE2AVX, { Qword|Unspecified|BaseIndex, RegXMM }
cvtpi2pd, 0x660f2a, SSE2, Modrm|NoSuf, { Qword|Unspecified|BaseIndex, RegXMM }
cvtsi2sd<sse2>, 0xf20f2a, <sse2:cpu>&No64, Modrm|IgnoreSize|<sse2:scal>|<sse2:vvvv>|No_bSuf|No_wSuf|No_sSuf|No_qSuf, { Reg32|Unspecified|BaseIndex, RegXMM }
-cvtsi2sd, 0xf22a, AVX&x64, Modrm|Vex=3|Space0F|VexVVVV|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|SSE2AVX|ATTSyntax, { Reg32|Reg64|Unspecified|BaseIndex, RegXMM }
-cvtsi2sd, 0xf22a, AVX&x64, Modrm|Vex=3|Space0F|VexVVVV|No_bSuf|No_wSuf|No_sSuf|SSE2AVX|IntelSyntax, { Reg32|Reg64|Unspecified|BaseIndex, RegXMM }
+cvtsi2sd, 0xf22a, x64&(AVX|AVX512F), Modrm|VexLIG|EVexLIG|Space0F|VexVVVV|Disp8ShiftVL|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|SSE2AVX|ATTSyntax, { Reg32|Reg64|Unspecified|BaseIndex, RegXMM }
+cvtsi2sd, 0xf22a, x64&(AVX|AVX512F), Modrm|VexLIG|EVexLIG|Space0F|VexVVVV|Disp8ShiftVL|No_bSuf|No_wSuf|No_sSuf|SSE2AVX|IntelSyntax, { Reg32|Reg64|Unspecified|BaseIndex, RegXMM }
cvtsi2sd, 0xf20f2a, SSE2&x64, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|ATTSyntax, { Reg32|Reg64|Unspecified|BaseIndex, RegXMM }
cvtsi2sd, 0xf20f2a, SSE2&x64, Modrm|No_bSuf|No_wSuf|No_sSuf|IntelSyntax, { Reg32|Reg64|Unspecified|BaseIndex, RegXMM }
-divpd<sse2>, 0x660f5e, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-divsd<sse2>, 0xf20f5e, <sse2:cpu>, Modrm|<sse2:scal>|<sse2:vvvv>|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
-maxpd<sse2>, 0x660f5f, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-maxsd<sse2>, 0xf20f5f, <sse2:cpu>, Modrm|<sse2:scal>|<sse2:vvvv>|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
-minpd<sse2>, 0x660f5d, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-minsd<sse2>, 0xf20f5d, <sse2:cpu>, Modrm|<sse2:scal>|<sse2:vvvv>|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
-movapd<sse2>, 0x660f28, <sse2:cpu>, D|Modrm|<sse2:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+divpd<SSE2Q>, 0x660f5e, <SSE2Q:cpu>, Modrm|<SSE2Q:attr>|<SSE2Q:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+divsd<SSE2Q>, 0xf20f5e, <SSE2Q:cpu>, Modrm|<SSE2Q:scal>|<SSE2Q:vvvv>|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
+maxpd<SSE2Q>, 0x660f5f, <SSE2Q:cpu>, Modrm|<SSE2Q:attr>|<SSE2Q:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+maxsd<SSE2Q>, 0xf20f5f, <SSE2Q:cpu>, Modrm|<SSE2Q:scal>|<SSE2Q:vvvv>|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
+minpd<SSE2Q>, 0x660f5d, <SSE2Q:cpu>, Modrm|<SSE2Q:attr>|<SSE2Q:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+minsd<SSE2Q>, 0xf20f5d, <SSE2Q:cpu>, Modrm|<SSE2Q:scal>|<SSE2Q:vvvv>|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
+movapd<SSE2Q>, 0x660f28, <SSE2Q:cpu>, D|Modrm|<SSE2Q:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
movhpd, 0x6616, AVX, Modrm|Vex|Space0F|VexVVVV|VexW0|NoSuf|SSE2AVX, { Qword|Unspecified|BaseIndex, RegXMM }
+movhpd, 0x6616, AVX512F, Modrm|EVex128|Space0F|VexVVVV|VexW1|Disp8MemShift=3|NoSuf|SSE2AVX, { Qword|Unspecified|BaseIndex, RegXMM }
movhpd, 0x6617, AVX, Modrm|Vex|Space0F|VexW0|NoSuf|SSE2AVX, { RegXMM, Qword|Unspecified|BaseIndex }
+movhpd, 0x6617, AVX512F, Modrm|EVex128|Space0F|VexW1|Disp8MemShift=3|NoSuf|SSE2AVX, { RegXMM, Qword|Unspecified|BaseIndex }
movhpd, 0x660f16, SSE2, D|Modrm|NoSuf, { Qword|Unspecified|BaseIndex, RegXMM }
movlpd, 0x6612, AVX, Modrm|Vex|Space0F|VexVVVV|VexW0|NoSuf|SSE2AVX, { Qword|Unspecified|BaseIndex, RegXMM }
+movlpd, 0x6612, AVX512F, Modrm|EVex128|Space0F|VexVVVV|VexW1|Disp8MemShift=3|NoSuf|SSE2AVX, { Qword|Unspecified|BaseIndex, RegXMM }
movlpd, 0x6613, AVX, Modrm|Vex|Space0F|VexW0|NoSuf|SSE2AVX, { RegXMM, Qword|Unspecified|BaseIndex }
+movlpd, 0x6613, AVX512F, Modrm|EVex128|Space0F|VexW1|Disp8MemShift=3|NoSuf|SSE2AVX, { RegXMM, Qword|Unspecified|BaseIndex }
movlpd, 0x660f12, SSE2, D|Modrm|NoSuf, { Qword|Unspecified|BaseIndex, RegXMM }
movmskpd<sse2>, 0x660f50, <sse2:cpu>, Modrm|<sse2:attr>|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|NoRex64, { RegXMM, Reg32|Reg64 }
-movntpd<sse2>, 0x660f2b, <sse2:cpu>, Modrm|<sse2:attr>|NoSuf, { RegXMM, Xmmword|Unspecified|BaseIndex }
+movntpd<SSE2Q>, 0x660f2b, <SSE2Q:cpu>, Modrm|<SSE2Q:attr>|NoSuf, { RegXMM, Xmmword|Unspecified|BaseIndex }
movsd, 0xf210, AVX, D|Modrm|VexLIG|Space0F|VexW0|NoSuf|SSE2AVX, { Qword|Unspecified|BaseIndex, RegXMM }
+movsd, 0xf210, AVX512F, D|Modrm|EVexLIG|Space0F|VexW1|Disp8MemShift=3|NoSuf|SSE2AVX, { Qword|Unspecified|BaseIndex, RegXMM }
movsd, 0xf210, AVX, D|Modrm|VexLIG|Space0F|VexVVVV|VexW0|NoSuf|SSE2AVX, { RegXMM, RegXMM }
movsd, 0xf20f10, SSE2, D|Modrm|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
-movupd<sse2>, 0x660f10, <sse2:cpu>, D|Modrm|<sse2:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-mulpd<sse2>, 0x660f59, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-mulsd<sse2>, 0xf20f59, <sse2:cpu>, Modrm|<sse2:scal>|<sse2:vvvv>|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
-orpd<sse2>, 0x660f56, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|C|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-shufpd<sse2>, 0x660fc6, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|NoSuf, { Imm8|Imm8S, RegXMM|Unspecified|BaseIndex, RegXMM }
-sqrtpd<sse2>, 0x660f51, <sse2:cpu>, Modrm|<sse2:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-sqrtsd<sse2>, 0xf20f51, <sse2:cpu>, Modrm|<sse2:scal>|<sse2:vvvv>|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
-subpd<sse2>, 0x660f5c, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-subsd<sse2>, 0xf20f5c, <sse2:cpu>, Modrm|<sse2:scal>|<sse2:vvvv>|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
-ucomisd<sse2>, 0x660f2e, <sse2:cpu>, Modrm|<sse2:scal>|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
-unpckhpd<sse2>, 0x660f15, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-unpcklpd<sse2>, 0x660f14, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-xorpd<sse2>, 0x660f57, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|C|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-cvtdq2pd<sse2>, 0xf30fe6, <sse2:cpu>, Modrm|<sse2:attr>|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
-cvtpd2dq<sse2>, 0xf20fe6, <sse2:cpu>, Modrm|<sse2:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-cvtdq2ps<sse2>, 0x0f5b, <sse2:cpu>, Modrm|<sse2:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+movupd<SSE2Q>, 0x660f10, <SSE2Q:cpu>, D|Modrm|<SSE2Q:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+mulpd<SSE2Q>, 0x660f59, <SSE2Q:cpu>, Modrm|<SSE2Q:attr>|<SSE2Q:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+mulsd<SSE2Q>, 0xf20f59, <SSE2Q:cpu>, Modrm|<SSE2Q:scal>|<SSE2Q:vvvv>|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
+orpd<SSE2DQ>, 0x660f56, <SSE2DQ:cpu>, Modrm|<SSE2DQ:attr>|C|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+shufpd<SSE2Q>, 0x660fc6, <SSE2Q:cpu>, Modrm|<SSE2Q:attr>|<SSE2Q:vvvv>|NoSuf, { Imm8|Imm8S, RegXMM|Unspecified|BaseIndex, RegXMM }
+sqrtpd<SSE2Q>, 0x660f51, <SSE2Q:cpu>, Modrm|<SSE2Q:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+sqrtsd<SSE2Q>, 0xf20f51, <SSE2Q:cpu>, Modrm|<SSE2Q:scal>|<SSE2Q:vvvv>|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
+subpd<SSE2Q>, 0x660f5c, <SSE2Q:cpu>, Modrm|<SSE2Q:attr>|<SSE2Q:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+subsd<SSE2Q>, 0xf20f5c, <SSE2Q:cpu>, Modrm|<SSE2Q:scal>|<SSE2Q:vvvv>|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
+ucomisd<SSE2Q>, 0x660f2e, <SSE2Q:cpu>, Modrm|<SSE2Q:scal>|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
+unpckhpd<SSE2Q>, 0x660f15, <SSE2Q:cpu>, Modrm|<SSE2Q:attr>|<SSE2Q:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+unpcklpd<SSE2Q>, 0x660f14, <SSE2Q:cpu>, Modrm|<SSE2Q:attr>|<SSE2Q:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+xorpd<SSE2DQ>, 0x660f57, <SSE2DQ:cpu>, Modrm|<SSE2DQ:attr>|C|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+cvtdq2pd<SSE2D>, 0xf30fe6, <SSE2D:cpu>, Modrm|<SSE2D:attr>|Disp8MemShift|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
+cvtpd2dq<SSE2Q>, 0xf20fe6, <SSE2Q:cpu>, Modrm|<SSE2Q:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+cvtdq2ps<SSE2D>, 0x0f5b, <SSE2D:cpu>, Modrm|<SSE2D:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
cvtpd2pi, 0x660f2d, SSE2, Modrm|NoSuf, { RegXMM|Unspecified|BaseIndex, RegMMX }
-cvtpd2ps<sse2>, 0x660f5a, <sse2:cpu>, Modrm|<sse2:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-cvtps2pd<sse2>, 0x0f5a, <sse2:cpu>, Modrm|<sse2:attr>|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
-cvtps2dq<sse2>, 0x660f5b, <sse2:cpu>, Modrm|<sse2:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-cvtsd2si, 0xf22d, AVX, Modrm|VexLIG|Space0F|No_bSuf|No_wSuf|No_sSuf|SSE2AVX, { Qword|Unspecified|BaseIndex|RegXMM, Reg32|Reg64 }
+cvtpd2ps<SSE2Q>, 0x660f5a, <SSE2Q:cpu>, Modrm|<SSE2Q:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+cvtps2pd<SSE2D>, 0x0f5a, <SSE2D:cpu>, Modrm|<SSE2D:attr>|Disp8MemShift|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
+cvtps2dq<SSE2D>, 0x660f5b, <SSE2D:cpu>, Modrm|<SSE2D:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+cvtsd2si, 0xf22d, AVX|AVX512F, Modrm|VexLIG|EVexLIG|Space0F|Disp8MemShift=3|No_bSuf|No_wSuf|No_sSuf|SSE2AVX, { Qword|Unspecified|BaseIndex|RegXMM, Reg32|Reg64 }
cvtsd2si, 0xf20f2d, SSE2, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_sSuf, { Qword|Unspecified|BaseIndex|RegXMM, Reg32|Reg64 }
-cvtsd2ss<sse2>, 0xf20f5a, <sse2:cpu>, Modrm|<sse2:scal>|<sse2:vvvv>|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
-cvtss2sd<sse2>, 0xf30f5a, <sse2:cpu>, Modrm|<sse2:scal>|<sse2:vvvv>|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
-
+cvtsd2ss<SSE2Q>, 0xf20f5a, <SSE2Q:cpu>, Modrm|<SSE2Q:scal>|<SSE2Q:vvvv>|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
+cvtss2sd<SSE2D>, 0xf30f5a, <SSE2D:cpu>, Modrm|<SSE2D:scal>|<SSE2D:vvvv>|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
cvttpd2pi, 0x660f2c, SSE2, Modrm|NoSuf, { RegXMM|Unspecified|BaseIndex, RegMMX }
-cvttsd2si, 0xf22c, AVX, Modrm|VexLIG|Space0F|No_bSuf|No_wSuf|No_sSuf|SSE2AVX, { Qword|Unspecified|BaseIndex|RegXMM, Reg32|Reg64 }
+cvttsd2si, 0xf22c, AVX|AVX512F, Modrm|VexLIG|EVexLIG|Space0F|Disp8MemShift=3|No_bSuf|No_wSuf|No_sSuf|SSE2AVX, { Qword|Unspecified|BaseIndex|RegXMM, Reg32|Reg64 }
cvttsd2si, 0xf20f2c, SSE2, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_sSuf, { Qword|Unspecified|BaseIndex|RegXMM, Reg32|Reg64 }
-cvttpd2dq<sse2>, 0x660fe6, <sse2:cpu>, Modrm|<sse2:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-cvttps2dq<sse2>, 0xf30f5b, <sse2:cpu>, Modrm|<sse2:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+cvttpd2dq<SSE2Q>, 0x660fe6, <SSE2Q:cpu>, Modrm|<SSE2Q:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+cvttps2dq<SSE2D>, 0xf30f5b, <SSE2D:cpu>, Modrm|<SSE2D:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
maskmovdqu<sse2>, 0x660ff7, <sse2:cpu>, Modrm|<sse2:attr>|NoSuf, { RegXMM, RegXMM }
-movdqa<sse2>, 0x660f6f, <sse2:cpu>, D|Modrm|<sse2:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-movdqu<sse2>, 0xf30f6f, <sse2:cpu>, D|Modrm|<sse2:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+movdqa<SSE2D>, 0x660f6f, <SSE2D:cpu>, D|Modrm|<SSE2D:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+movdqu<SSE2D>, 0xf30f6f, <SSE2D:cpu>, D|Modrm|<SSE2D:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
movdq2q, 0xf20fd6, SSE2, Modrm|NoSuf, { RegXMM, RegMMX }
movq2dq, 0xf30fd6, SSE2, Modrm|NoSuf, { RegMMX, RegXMM }
-pmuludq<sse2>, 0x660ff4, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|C|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+pmuludq<SSE2Q>, 0x660ff4, <SSE2Q:cpu>, Modrm|<SSE2Q:attr>|<SSE2Q:vvvv>|C|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
pmuludq, 0xff4, SSE2, Modrm|NoSuf, { Qword|Unspecified|BaseIndex|RegMMX, RegMMX }
-pshufd<sse2>, 0x660f70, <sse2:cpu>, Modrm|<sse2:attr>|NoSuf, { Imm8|Imm8S, RegXMM|Unspecified|BaseIndex, RegXMM }
-pshufhw<sse2>, 0xf30f70, <sse2:cpu>, Modrm|<sse2:attr>|NoSuf, { Imm8|Imm8S, RegXMM|Unspecified|BaseIndex, RegXMM }
-pshuflw<sse2>, 0xf20f70, <sse2:cpu>, Modrm|<sse2:attr>|NoSuf, { Imm8|Imm8S, RegXMM|Unspecified|BaseIndex, RegXMM }
+pshufd<SSE2D>, 0x660f70, <SSE2D:cpu>, Modrm|<SSE2D:attr>|NoSuf, { Imm8|Imm8S, RegXMM|Unspecified|BaseIndex, RegXMM }
+pshufhw<SSE2BW>, 0xf30f70, <SSE2BW:cpu>, Modrm|<SSE2BW:attr>|NoSuf, { Imm8|Imm8S, RegXMM|Unspecified|BaseIndex, RegXMM }
+pshuflw<SSE2BW>, 0xf20f70, <SSE2BW:cpu>, Modrm|<SSE2BW:attr>|NoSuf, { Imm8|Imm8S, RegXMM|Unspecified|BaseIndex, RegXMM }
pslldq<sse2>, 0x660f73/7, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|NoSuf, { Imm8, RegXMM }
psrldq<sse2>, 0x660f73/3, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|NoSuf, { Imm8, RegXMM }
-punpckhqdq<sse2>, 0x660f6d, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-punpcklqdq<sse2>, 0x660f6c, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+punpckhqdq<SSE2Q>, 0x660f6d, <SSE2Q:cpu>, Modrm|<SSE2Q:attr>|<SSE2Q:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+punpcklqdq<SSE2Q>, 0x660f6c, <SSE2Q:cpu>, Modrm|<SSE2Q:attr>|<SSE2Q:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
<frel>
// SSE3 instructions.
<sse3:cpu:attr:vvvv, $avx:AVX:Vex128|VexW0|SSE2AVX:VexVVVV, $sse:SSE3::>
+<SSE3D:cpu:attr, +
+ $avx:AVX|AVX512VL:Vex128|EVex128|VexW0|Disp8MemShift=4|SSE2AVX, +
+ $sse:SSE3:>
+<SSE3Q:cpu:attr, +
+ $avx:AVX:Vex128|VexW0|SSE2AVX:VexVVVV, +
+ $apx:AVX512VL:EVex128|VexW1|Disp8MemShift=3|SSE2AVX, +
+ $sse:SSE3:>
addsubpd<sse3>, 0x660fd0, <sse3:cpu>, Modrm|<sse3:attr>|<sse3:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
addsubps<sse3>, 0xf20fd0, <sse3:cpu>, Modrm|<sse3:attr>|<sse3:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
@@ -1279,10 +1342,13 @@ haddpd<sse3>, 0x660f7c, <sse3:cpu>, Modr
haddps<sse3>, 0xf20f7c, <sse3:cpu>, Modrm|<sse3:attr>|<sse3:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
hsubpd<sse3>, 0x660f7d, <sse3:cpu>, Modrm|<sse3:attr>|<sse3:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
hsubps<sse3>, 0xf20f7d, <sse3:cpu>, Modrm|<sse3:attr>|<sse3:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-lddqu<sse3>, 0xf20ff0, <sse3:cpu>, Modrm|<sse3:attr>|NoSuf, { Xmmword|Unspecified|BaseIndex, RegXMM }
-movddup<sse3>, 0xf20f12, <sse3:cpu>, Modrm|<sse3:attr>|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
-movshdup<sse3>, 0xf30f16, <sse3:cpu>, Modrm|<sse3:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-movsldup<sse3>, 0xf30f12, <sse3:cpu>, Modrm|<sse3:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+lddqu, 0xf20ff0, AVX, Modrm|Vex128|VexW0|SSE2AVX|NoSuf, { Xmmword|Unspecified|BaseIndex, RegXMM }
+// For use with eGPR-s in the memory operand, utilize VMOVDQU32.
+lddqu, 0xf30f6f, AVX512VL, Modrm|EVex128|VexW0|Disp8MemShift=4|SSE2AVX|NoSuf, { Xmmword|Unspecified|BaseIndex, RegXMM }
+lddqu, 0xf20ff0, SSE3, Modrm|NoSuf, { Xmmword|Unspecified|BaseIndex, RegXMM }
+movddup<SSE3Q>, 0xf20f12, <SSE3Q:cpu>, Modrm|<SSE3Q:attr>|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
+movshdup<SSE3D>, 0xf30f16, <SSE3D:cpu>, Modrm|<SSE3D:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+movsldup<SSE3D>, 0xf30f12, <SSE3D:cpu>, Modrm|<SSE3D:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
// FPU instructions also covered by SSE3 CPUID flag.
@@ -1352,6 +1418,15 @@ invpcid, 0xf3f2, INVPCID&APX_F, Modrm|No
$avx:AVX:66:Vex128|VexW0|SSE2AVX:VexVVVV:RegXMM:Xmmword, +
$sse:SSSE3:66:::RegXMM:Xmmword, +
$mmx:SSSE3::::RegMMX:Qword>
+<SSSE3BW:cpu:pfx:attr:vvvv:reg:mem, +
+ $avx:AVX:66:Vex128|VexW0|SSE2AVX:VexVVVV:RegXMM:Xmmword, +
+ $apx:AVX512BW&AVX512VL:66:EVex128|VexW0|Disp8MemShift=4|SSE2AVX:VexVVVV:RegXMM:Xmmword, +
+ $sse:SSSE3:66:::RegXMM:Xmmword, +
+ $mmx:SSSE3::::RegMMX:Qword>
+<SSSE3D:cpu:pfx:attr:reg:mem, +
+ $avx:AVX|AVX512VL:66:Vex128|EVex128|VexW0|Disp8MemShift=4|SSE2AVX:RegXMM:Xmmword, +
+ $sse:SSSE3:66::RegXMM:Xmmword, +
+ $mmx:SSSE3:::RegMMX:Qword>
phaddw<ssse3>, 0x<ssse3:pfx>0f3801, <ssse3:cpu>, Modrm|<ssse3:attr>|<ssse3:vvvv>|NoSuf, { <ssse3:reg>|<ssse3:mem>|Unspecified|BaseIndex, <ssse3:reg> }
phaddd<ssse3>, 0x<ssse3:pfx>0f3802, <ssse3:cpu>, Modrm|<ssse3:attr>|<ssse3:vvvv>|NoSuf, { <ssse3:reg>|<ssse3:mem>|Unspecified|BaseIndex, <ssse3:reg> }
@@ -1359,18 +1434,34 @@ phaddsw<ssse3>, 0x<ssse3:pfx>0f3803, <ss
phsubw<ssse3>, 0x<ssse3:pfx>0f3805, <ssse3:cpu>, Modrm|<ssse3:attr>|<ssse3:vvvv>|NoSuf, { <ssse3:reg>|<ssse3:mem>|Unspecified|BaseIndex, <ssse3:reg> }
phsubd<ssse3>, 0x<ssse3:pfx>0f3806, <ssse3:cpu>, Modrm|<ssse3:attr>|<ssse3:vvvv>|NoSuf, { <ssse3:reg>|<ssse3:mem>|Unspecified|BaseIndex, <ssse3:reg> }
phsubsw<ssse3>, 0x<ssse3:pfx>0f3807, <ssse3:cpu>, Modrm|<ssse3:attr>|<ssse3:vvvv>|NoSuf, { <ssse3:reg>|<ssse3:mem>|Unspecified|BaseIndex, <ssse3:reg> }
-pmaddubsw<ssse3>, 0x<ssse3:pfx>0f3804, <ssse3:cpu>, Modrm|<ssse3:attr>|<ssse3:vvvv>|NoSuf, { <ssse3:reg>|<ssse3:mem>|Unspecified|BaseIndex, <ssse3:reg> }
-pmulhrsw<ssse3>, 0x<ssse3:pfx>0f380b, <ssse3:cpu>, Modrm|<ssse3:attr>|<ssse3:vvvv>|NoSuf, { <ssse3:reg>|<ssse3:mem>|Unspecified|BaseIndex, <ssse3:reg> }
-pshufb<ssse3>, 0x<ssse3:pfx>0f3800, <ssse3:cpu>, Modrm|<ssse3:attr>|<ssse3:vvvv>|NoSuf, { <ssse3:reg>|<ssse3:mem>|Unspecified|BaseIndex, <ssse3:reg> }
+pmaddubsw<SSSE3BW>, 0x<SSSE3BW:pfx>0f3804, <SSSE3BW:cpu>, Modrm|<SSSE3BW:attr>|<SSSE3BW:vvvv>|NoSuf, { <SSSE3BW:reg>|<SSSE3BW:mem>|Unspecified|BaseIndex, <SSSE3BW:reg> }
+pmulhrsw<SSSE3BW>, 0x<SSSE3BW:pfx>0f380b, <SSSE3BW:cpu>, Modrm|<SSSE3BW:attr>|<SSSE3BW:vvvv>|NoSuf, { <SSSE3BW:reg>|<SSSE3BW:mem>|Unspecified|BaseIndex, <SSSE3BW:reg> }
+pshufb<SSSE3BW>, 0x<SSSE3BW:pfx>0f3800, <SSSE3BW:cpu>, Modrm|<SSSE3BW:attr>|<SSSE3BW:vvvv>|NoSuf, { <SSSE3BW:reg>|<SSSE3BW:mem>|Unspecified|BaseIndex, <SSSE3BW:reg> }
psign<bw><ssse3>, 0x<ssse3:pfx>0f3808 | <bw:opc>, <ssse3:cpu>, Modrm|<ssse3:attr>|<ssse3:vvvv>|NoSuf, { <ssse3:reg>|<ssse3:mem>|Unspecified|BaseIndex, <ssse3:reg> }
psignd<ssse3>, 0x<ssse3:pfx>0f380a, <ssse3:cpu>, Modrm|<ssse3:attr>|<ssse3:vvvv>|NoSuf, { <ssse3:reg>|<ssse3:mem>|Unspecified|BaseIndex, <ssse3:reg> }
-palignr<ssse3>, 0x<ssse3:pfx>0f3a0f, <ssse3:cpu>, Modrm|<ssse3:attr>|<ssse3:vvvv>|NoSuf, { Imm8, <ssse3:reg>|<ssse3:mem>|Unspecified|BaseIndex, <ssse3:reg> }
-pabs<bw><ssse3>, 0x<ssse3:pfx>0f381c | <bw:opc>, <ssse3:cpu>, Modrm|<ssse3:attr>|NoSuf, { <ssse3:reg>|<ssse3:mem>|Unspecified|BaseIndex, <ssse3:reg> }
-pabsd<ssse3>, 0x<ssse3:pfx>0f381e, <ssse3:cpu>, Modrm|<ssse3:attr>|NoSuf, { <ssse3:reg>|<ssse3:mem>|Unspecified|BaseIndex, <ssse3:reg> }
+palignr<SSSE3BW>, 0x<SSSE3BW:pfx>0f3a0f, <SSSE3BW:cpu>, Modrm|<SSSE3BW:attr>|<SSSE3BW:vvvv>|NoSuf, { Imm8, <SSSE3BW:reg>|<SSSE3BW:mem>|Unspecified|BaseIndex, <SSSE3BW:reg> }
+pabs<bw><SSSE3BW>, 0x<SSSE3BW:pfx>0f381c | <bw:opc>, <SSSE3BW:cpu>, Modrm|<SSSE3BW:attr>|NoSuf, { <SSSE3BW:reg>|<SSSE3BW:mem>|Unspecified|BaseIndex, <SSSE3BW:reg> }
+pabsd<SSSE3D>, 0x<SSSE3D:pfx>0f381e, <SSSE3D:cpu>, Modrm|<SSSE3D:attr>|NoSuf, { <SSSE3D:reg>|<SSSE3D:mem>|Unspecified|BaseIndex, <SSSE3D:reg> }
// SSE4.1 instructions.
<sse41:cpu:attr:scal:vvvv, $avx:AVX:Vex128|VexW0|SSE2AVX:VexLIG|VexW0|SSE2AVX:VexVVVV, $sse:SSE4_1:::>
+<SSE41BW:cpu:attr:vvvv, +
+ $avx:AVX:Vex128|VexW0|SSE2AVX:VexVVVV, +
+ $apx:AVX512BW&AVX512VL:EVex128|VexW0|Disp8MemShift=4|SSE2AVX:VexVVVV, +
+ $sse:SSE4_1::>
+<SSE41DQ:cpu:attr:vvvv, +
+ $avx:AVX:Vex128|VexW0|SSE2AVX:VexVVVV, +
+ $apx:AVX512DQ&AVX512VL:EVex128|VexW0|Disp8MemShift=4|SSE2AVX:VexVVVV, +
+ $sse:SSE4_1::>
+<SSE41D:cpu:attr:scal:vvvv, +
+ $avx:AVX|AVX512VL:Vex128|EVex128|VexW0|Disp8MemShift=4|SSE2AVX:VexLIG|EVexLIG|VexW0|Disp8MemShift=2|SSE2AVX:VexVVVV, +
+ $sse:SSE4_1:::>
+<SSE41Q:cpu:attr:scal:vvvv, +
+ $avx:AVX:Vex128|VexW0|SSE2AVX:VexLIG|VexW0|SSE2AVX:VexVVVV, +
+ $apx:AVX512VL:EVex128|VexW1|Disp8MemShift=4|SSE2AVX:EVexLIG|VexW1|Disp8MemShift=3|SSE2AVX:VexVVVV, +
+ $sse:SSE4_1:::>
+
<sd:ppfx:spfx:opc:vexw:elem, s::f3:0:VexW0:Dword, d:66:f2:1:VexW1:Qword>
blendp<sd><sse41>, 0x660f3a0c | <sd:opc>, <sse41:cpu>, Modrm|<sse41:attr>|<sse41:vvvv>|NoSuf, { Imm8|Imm8S, RegXMM|Unspecified|BaseIndex, RegXMM }
@@ -1379,56 +1470,60 @@ blendvp<sd>, 0x664a | <sd:opc>, AVX, Mod
blendvp<sd>, 0x660f3814 | <sd:opc>, SSE4_1, Modrm|NoSuf, { Acc|Xmmword, RegXMM|Unspecified|BaseIndex, RegXMM }
blendvp<sd>, 0x660f3814 | <sd:opc>, SSE4_1, Modrm|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
dpp<sd><sse41>, 0x660f3a40 | <sd:opc>, <sse41:cpu>, Modrm|<sse41:attr>|<sse41:vvvv>|NoSuf, { Imm8|Imm8S, RegXMM|Unspecified|BaseIndex, RegXMM }
-extractps, 0x6617, AVX, Modrm|Vex128|Space0F3A|VexW0|NoSuf|SSE2AVX, { Imm8, RegXMM, Reg32|Unspecified|BaseIndex }
-extractps, 0x6617, AVX&x64, RegMem|Vex128|Space0F3A|VexW1|NoSuf|SSE2AVX, { Imm8, RegXMM, Reg64 }
+extractps, 0x6617, AVX|AVX512F, Modrm|Vex128|EVex128|Space0F3A|VexW0|Disp8MemShift=2|NoSuf|SSE2AVX, { Imm8, RegXMM, Reg32|Unspecified|BaseIndex }
+extractps, 0x6617, x64&(AVX|AVX512F), RegMem|Vex128|EVex128|Space0F3A|VexW1|NoSuf|SSE2AVX, { Imm8, RegXMM, Reg64 }
extractps, 0x660f3a17, SSE4_1, Modrm|IgnoreSize|NoSuf, { Imm8, RegXMM, Reg32|Unspecified|BaseIndex }
extractps, 0x660f3a17, SSE4_1&x64, RegMem|NoSuf|NoRex64, { Imm8, RegXMM, Reg64 }
-insertps<sse41>, 0x660f3a21, <sse41:cpu>, Modrm|<sse41:attr>|<sse41:vvvv>|NoSuf, { Imm8, Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
-movntdqa<sse41>, 0x660f382a, <sse41:cpu>, Modrm|<sse41:attr>|NoSuf, { Xmmword|Unspecified|BaseIndex, RegXMM }
+insertps<SSE41D>, 0x660f3a21, <SSE41D:cpu>, Modrm|<SSE41D:attr>|<SSE41D:vvvv>|Disp8MemShift|NoSuf, { Imm8, Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
+movntdqa<SSE41D>, 0x660f382a, <SSE41D:cpu>, Modrm|<SSE41D:attr>|NoSuf, { Xmmword|Unspecified|BaseIndex, RegXMM }
mpsadbw<sse41>, 0x660f3a42, <sse41:cpu>, Modrm|<sse41:attr>|<sse41:vvvv>|NoSuf, { Imm8|Imm8S, RegXMM|Unspecified|BaseIndex, RegXMM }
-packusdw<sse41>, 0x660f382b, <sse41:cpu>, Modrm|<sse41:attr>|<sse41:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+packusdw<SSE41BW>, 0x660f382b, <SSE41BW:cpu>, Modrm|<SSE41BW:attr>|<SSE41BW:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
pblendvb, 0x664c, AVX, Modrm|Vex128|Space0F3A|VexVVVV|VexW0|NoSuf|SSE2AVX, { Acc|Xmmword, RegXMM|Unspecified|BaseIndex, RegXMM }
pblendvb, 0x664c, AVX, Modrm|Vex128|Space0F3A|VexVVVV|VexW0|NoSuf|Implicit1stXmm0|SSE2AVX, { RegXMM|Unspecified|BaseIndex, RegXMM }
pblendvb, 0x660f3810, SSE4_1, Modrm|NoSuf, { Acc|Xmmword, RegXMM|Unspecified|BaseIndex, RegXMM }
pblendvb, 0x660f3810, SSE4_1, Modrm|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
pblendw<sse41>, 0x660f3a0e, <sse41:cpu>, Modrm|<sse41:attr>|<sse41:vvvv>|NoSuf, { Imm8|Imm8S, RegXMM|Unspecified|BaseIndex, RegXMM }
pcmpeqq<sse41>, 0x660f3829, <sse41:cpu>, Modrm|<sse41:attr>|<sse41:vvvv>|NoSuf|Optimize, { RegXMM|Unspecified|BaseIndex, RegXMM }
-pextr<bw><sse41>, 0x660f3a14 | <bw:opc>, <sse41:cpu>, RegMem|<sse41:attr>|NoSuf|IgnoreSize|NoRex64, { Imm8, RegXMM, Reg32|Reg64 }
-pextr<bw><sse41>, 0x660f3a14 | <bw:opc>, <sse41:cpu>, Modrm|<sse41:attr>|NoSuf, { Imm8, RegXMM, <bw:elem>|Unspecified|BaseIndex }
-pextrd<sse41>, 0x660f3a16, <sse41:cpu>, Modrm|<sse41:attr>|NoSuf|IgnoreSize, { Imm8, RegXMM, Reg32|Unspecified|BaseIndex }
+pextr<bw><SSE41BW>, 0x660f3a14 | <bw:opc>, <SSE41BW:cpu>, RegMem|<SSE41BW:attr>|NoSuf|IgnoreSize|NoRex64, { Imm8, RegXMM, Reg32|Reg64 }
+pextr<bw><SSE41BW>, 0x660f3a14 | <bw:opc>, <SSE41BW:cpu>, Modrm|<SSE41BW:attr>|Disp8MemShift|NoSuf, { Imm8, RegXMM, <bw:elem>|Unspecified|BaseIndex }
+pextrd<SSE41DQ>, 0x660f3a16, <SSE41DQ:cpu>, Modrm|<SSE41DQ:attr>|Disp8MemShift|NoSuf|IgnoreSize, { Imm8, RegXMM, Reg32|Unspecified|BaseIndex }
pextrq, 0x6616, AVX&x64, Modrm|Vex|Space0F3A|VexW1|NoSuf|SSE2AVX, { Imm8, RegXMM, Reg64|Unspecified|BaseIndex }
+pextrq, 0x6616, AVX512DQ&AVX512VL&x64, Modrm|EVex128|Space0F3A|VexW1|Disp8MemShift=3|NoSuf|SSE2AVX, { Imm8, RegXMM, Reg64|Unspecified|BaseIndex }
pextrq, 0x660f3a16, SSE4_1&x64, Modrm|Size64|NoSuf, { Imm8, RegXMM, Reg64|Unspecified|BaseIndex }
phminposuw<sse41>, 0x660f3841, <sse41:cpu>, Modrm|<sse41:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-pinsrb<sse41>, 0x660f3a20, <sse41:cpu>, Modrm|<sse41:attr>|<sse41:vvvv>|NoSuf|IgnoreSize|NoRex64, { Imm8, Reg32|Reg64, RegXMM }
-pinsrb<sse41>, 0x660f3a20, <sse41:cpu>, Modrm|<sse41:attr>|<sse41:vvvv>|NoSuf, { Imm8, Byte|Unspecified|BaseIndex, RegXMM }
-pinsrd<sse41>, 0x660f3a22, <sse41:cpu>, Modrm|<sse41:attr>|<sse41:vvvv>|NoSuf|IgnoreSize, { Imm8, Reg32|Unspecified|BaseIndex, RegXMM }
+pinsrb<SSE41BW>, 0x660f3a20, <SSE41BW:cpu>, Modrm|<SSE41BW:attr>|<SSE41BW:vvvv>|NoSuf|IgnoreSize|NoRex64, { Imm8, Reg32|Reg64, RegXMM }
+pinsrb<SSE41BW>, 0x660f3a20, <SSE41BW:cpu>, Modrm|<SSE41BW:attr>|<SSE41BW:vvvv>|Disp8MemShift|NoSuf, { Imm8, Byte|Unspecified|BaseIndex, RegXMM }
+pinsrd<SSE41DQ>, 0x660f3a22, <SSE41DQ:cpu>, Modrm|<SSE41DQ:attr>|<SSE41DQ:vvvv>|Disp8MemShift|NoSuf|IgnoreSize, { Imm8, Reg32|Unspecified|BaseIndex, RegXMM }
pinsrq, 0x6622, AVX&x64, Modrm|Vex|Space0F3A|VexVVVV|VexW1|NoSuf|SSE2AVX, { Imm8, Reg64|Unspecified|BaseIndex, RegXMM }
+pinsrq, 0x6622, AVX512DQ&AVX512VL&AVX&x64, Modrm|EVex128|Space0F3A|VexVVVV|VexW1|Disp8MemShift=3|NoSuf|SSE2AVX, { Imm8, Reg64|Unspecified|BaseIndex, RegXMM }
pinsrq, 0x660f3a22, SSE4_1&x64, Modrm|Size64|NoSuf, { Imm8, Reg64|Unspecified|BaseIndex, RegXMM }
-pmaxsb<sse41>, 0x660f383c, <sse41:cpu>, Modrm|<sse41:attr>|<sse41:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-pmaxsd<sse41>, 0x660f383d, <sse41:cpu>, Modrm|<sse41:attr>|<sse41:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-pmaxud<sse41>, 0x660f383f, <sse41:cpu>, Modrm|<sse41:attr>|<sse41:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-pmaxuw<sse41>, 0x660f383e, <sse41:cpu>, Modrm|<sse41:attr>|<sse41:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-pminsb<sse41>, 0x660f3838, <sse41:cpu>, Modrm|<sse41:attr>|<sse41:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-pminsd<sse41>, 0x660f3839, <sse41:cpu>, Modrm|<sse41:attr>|<sse41:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-pminud<sse41>, 0x660f383b, <sse41:cpu>, Modrm|<sse41:attr>|<sse41:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-pminuw<sse41>, 0x660f383a, <sse41:cpu>, Modrm|<sse41:attr>|<sse41:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-pmovsxbw<sse41>, 0x660f3820, <sse41:cpu>, Modrm|<sse41:attr>|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
-pmovsxbd<sse41>, 0x660f3821, <sse41:cpu>, Modrm|<sse41:attr>|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
-pmovsxbq<sse41>, 0x660f3822, <sse41:cpu>, Modrm|<sse41:attr>|NoSuf, { Word|Unspecified|BaseIndex|RegXMM, RegXMM }
-pmovsxwd<sse41>, 0x660f3823, <sse41:cpu>, Modrm|<sse41:attr>|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
-pmovsxwq<sse41>, 0x660f3824, <sse41:cpu>, Modrm|<sse41:attr>|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
-pmovsxdq<sse41>, 0x660f3825, <sse41:cpu>, Modrm|<sse41:attr>|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
-pmovzxbw<sse41>, 0x660f3830, <sse41:cpu>, Modrm|<sse41:attr>|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
-pmovzxbd<sse41>, 0x660f3831, <sse41:cpu>, Modrm|<sse41:attr>|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
-pmovzxbq<sse41>, 0x660f3832, <sse41:cpu>, Modrm|<sse41:attr>|NoSuf, { Word|Unspecified|BaseIndex|RegXMM, RegXMM }
-pmovzxwd<sse41>, 0x660f3833, <sse41:cpu>, Modrm|<sse41:attr>|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
-pmovzxwq<sse41>, 0x660f3834, <sse41:cpu>, Modrm|<sse41:attr>|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
-pmovzxdq<sse41>, 0x660f3835, <sse41:cpu>, Modrm|<sse41:attr>|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
-pmuldq<sse41>, 0x660f3828, <sse41:cpu>, Modrm|<sse41:attr>|<sse41:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-pmulld<sse41>, 0x660f3840, <sse41:cpu>, Modrm|<sse41:attr>|<sse41:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+pmaxsb<SSE41BW>, 0x660f383c, <SSE41BW:cpu>, Modrm|<SSE41BW:attr>|<SSE41BW:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+pmaxsd<SSE41D>, 0x660f383d, <SSE41D:cpu>, Modrm|<SSE41D:attr>|<SSE41D:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+pmaxud<SSE41D>, 0x660f383f, <SSE41D:cpu>, Modrm|<SSE41D:attr>|<SSE41D:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+pmaxuw<SSE41BW>, 0x660f383e, <SSE41BW:cpu>, Modrm|<SSE41BW:attr>|<SSE41BW:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+pminsb<SSE41BW>, 0x660f3838, <SSE41BW:cpu>, Modrm|<SSE41BW:attr>|<SSE41BW:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+pminsd<SSE41D>, 0x660f3839, <SSE41D:cpu>, Modrm|<SSE41D:attr>|<SSE41D:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+pminud<SSE41D>, 0x660f383b, <SSE41D:cpu>, Modrm|<SSE41D:attr>|<SSE41D:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+pminuw<SSE41BW>, 0x660f383a, <SSE41BW:cpu>, Modrm|<SSE41BW:attr>|<SSE41BW:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+pmovsxbw<SSE41BW>, 0x660f3820, <SSE41BW:cpu>, Modrm|<SSE41BW:attr>|Disp8MemShift|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
+pmovsxbd<SSE41D>, 0x660f3821, <SSE41D:cpu>, Modrm|<SSE41D:attr>|Disp8MemShift|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
+pmovsxbq<SSE41D>, 0x660f3822, <SSE41D:cpu>, Modrm|<SSE41D:attr>|Disp8MemShift|NoSuf, { Word|Unspecified|BaseIndex|RegXMM, RegXMM }
+pmovsxwd<SSE41D>, 0x660f3823, <SSE41D:cpu>, Modrm|<SSE41D:attr>|Disp8MemShift|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
+pmovsxwq<SSE41D>, 0x660f3824, <SSE41D:cpu>, Modrm|<SSE41D:attr>|Disp8MemShift|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
+pmovsxdq<SSE41D>, 0x660f3825, <SSE41D:cpu>, Modrm|<SSE41D:attr>|Disp8MemShift|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
+pmovzxbw<SSE41BW>, 0x660f3830, <SSE41BW:cpu>, Modrm|<SSE41BW:attr>|Disp8MemShift|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
+pmovzxbd<SSE41D>, 0x660f3831, <SSE41D:cpu>, Modrm|<SSE41D:attr>|Disp8MemShift|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
+pmovzxbq<SSE41D>, 0x660f3832, <SSE41D:cpu>, Modrm|<SSE41D:attr>|Disp8MemShift|NoSuf, { Word|Unspecified|BaseIndex|RegXMM, RegXMM }
+pmovzxwd<SSE41D>, 0x660f3833, <SSE41D:cpu>, Modrm|<SSE41D:attr>|Disp8MemShift|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
+pmovzxwq<SSE41D>, 0x660f3834, <SSE41D:cpu>, Modrm|<SSE41D:attr>|Disp8MemShift|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
+pmovzxdq<SSE41D>, 0x660f3835, <SSE41D:cpu>, Modrm|<SSE41D:attr>|Disp8MemShift|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
+pmuldq<SSE41Q>, 0x660f3828, <SSE41Q:cpu>, Modrm|<SSE41Q:attr>|<SSE41Q:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+pmulld<SSE41D>, 0x660f3840, <SSE41D:cpu>, Modrm|<SSE41D:attr>|<SSE41D:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
ptest<sse41>, 0x660f3817, <sse41:cpu>, Modrm|<sse41:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-roundp<sd><sse41>, 0x660f3a08 | <sd:opc>, <sse41:cpu>, Modrm|<sse41:attr>|NoSuf, { Imm8, RegXMM|Unspecified|BaseIndex, RegXMM }
-rounds<sd><sse41>, 0x660f3a0a | <sd:opc>, <sse41:cpu>, Modrm|<sse41:scal>|<sse41:vvvv>|NoSuf, { Imm8, <sd:elem>|Unspecified|BaseIndex|RegXMM, RegXMM }
+roundpd<SSE41Q>, 0x660f3a09, <SSE41Q:cpu>, Modrm|<SSE41Q:attr>|NoSuf, { Imm8, RegXMM|Unspecified|BaseIndex, RegXMM }
+roundps<SSE41D>, 0x660f3a08, <SSE41D:cpu>, Modrm|<SSE41D:attr>|NoSuf, { Imm8, RegXMM|Unspecified|BaseIndex, RegXMM }
+roundsd<SSE41Q>, 0x660f3a0b, <SSE41Q:cpu>, Modrm|<SSE41Q:scal>|<SSE41Q:vvvv>|NoSuf, { Imm8, Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
+roundss<SSE41D>, 0x660f3a0a, <SSE41D:cpu>, Modrm|<SSE41D:scal>|<SSE41D:vvvv>|NoSuf, { Imm8, Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
// SSE4.2 instructions.
@@ -1465,31 +1560,38 @@ xsaveopt64, 0xfae/6, Xsaveopt&x64, Modrm
// AES instructions.
<aes:cpu:attr:vvvv, $avx:AVX&:Vex128|VexW0|SSE2AVX:VexVVVV, $sse:::>
-
-aesdec<aes>, 0x660f38de, <aes:cpu>AES, Modrm|<aes:attr>|<aes:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-aesdeclast<aes>, 0x660f38df, <aes:cpu>AES, Modrm|<aes:attr>|<aes:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-aesenc<aes>, 0x660f38dc, <aes:cpu>AES, Modrm|<aes:attr>|<aes:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-aesenclast<aes>, 0x660f38dd, <aes:cpu>AES, Modrm|<aes:attr>|<aes:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+<AES:cpu:attr, +
+ $avx:&(AVX|AVX512VL):Vex128|EVex128|VexW0|VexVVVV|Disp8MemShift=4|SSE2AVX, +
+ $sse::>
+
+aesdec<AES>, 0x660f38de, AES<AES:cpu>, Modrm|<AES:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+aesdeclast<AES>, 0x660f38df, AES<AES:cpu>, Modrm|<AES:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+aesenc<AES>, 0x660f38dc, AES<AES:cpu>, Modrm|<AES:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+aesenclast<AES>, 0x660f38dd, AES<AES:cpu>, Modrm|<AES:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
aesimc<aes>, 0x660f38db, <aes:cpu>AES, Modrm|<aes:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
aeskeygenassist<aes>, 0x660f3adf, <aes:cpu>AES, Modrm|<aes:attr>|NoSuf, { Imm8, RegXMM|Unspecified|BaseIndex, RegXMM }
// PCLMULQDQ
-<pclmul:cpu:attr, $avx:AVX&:Vex128|VexW0|SSE2AVX|VexVVVV, $sse::>
-
-pclmulqdq<pclmul>, 0x660f3a44, <pclmul:cpu>PCLMULQDQ, Modrm|<pclmul:attr>|NoSuf, { Imm8|Imm8S, RegXMM|Unspecified|BaseIndex, RegXMM }
-pclmullqlqdq<pclmul>, 0x660f3a44/0x00, <pclmul:cpu>PCLMULQDQ, Modrm|<pclmul:attr>|NoSuf|ImmExt, { RegXMM|Unspecified|BaseIndex, RegXMM }
-pclmulhqlqdq<pclmul>, 0x660f3a44/0x01, <pclmul:cpu>PCLMULQDQ, Modrm|<pclmul:attr>|NoSuf|ImmExt, { RegXMM|Unspecified|BaseIndex, RegXMM }
-pclmullqhqdq<pclmul>, 0x660f3a44/0x10, <pclmul:cpu>PCLMULQDQ, Modrm|<pclmul:attr>|NoSuf|ImmExt, { RegXMM|Unspecified|BaseIndex, RegXMM }
-pclmulhqhqdq<pclmul>, 0x660f3a44/0x11, <pclmul:cpu>PCLMULQDQ, Modrm|<pclmul:attr>|NoSuf|ImmExt, { RegXMM|Unspecified|BaseIndex, RegXMM }
+<pclmul:cpu:attr, +
+ $avx:&(AVX|AVX512VL):Vex128|EVex128|VexW0|VexVVVV|Disp8MemShift=4|SSE2AVX, +
+ $sse::>
+
+pclmulqdq<pclmul>, 0x660f3a44, PCLMULQDQ<pclmul:cpu>, Modrm|<pclmul:attr>|NoSuf, { Imm8|Imm8S, RegXMM|Unspecified|BaseIndex, RegXMM }
+pclmullqlqdq<pclmul>, 0x660f3a44/0x00, PCLMULQDQ<pclmul:cpu>, Modrm|<pclmul:attr>|NoSuf|ImmExt, { RegXMM|Unspecified|BaseIndex, RegXMM }
+pclmulhqlqdq<pclmul>, 0x660f3a44/0x01, PCLMULQDQ<pclmul:cpu>, Modrm|<pclmul:attr>|NoSuf|ImmExt, { RegXMM|Unspecified|BaseIndex, RegXMM }
+pclmullqhqdq<pclmul>, 0x660f3a44/0x10, PCLMULQDQ<pclmul:cpu>, Modrm|<pclmul:attr>|NoSuf|ImmExt, { RegXMM|Unspecified|BaseIndex, RegXMM }
+pclmulhqhqdq<pclmul>, 0x660f3a44/0x11, PCLMULQDQ<pclmul:cpu>, Modrm|<pclmul:attr>|NoSuf|ImmExt, { RegXMM|Unspecified|BaseIndex, RegXMM }
// GFNI
-<gfni:cpu:w0:w1, $avx:AVX&:Vex128|VexW0|SSE2AVX|VexVVVV:Vex128|VexW1|SSE2AVX|VexVVVV, $sse:::>
-
-gf2p8affineqb<gfni>, 0x660f3ace, <gfni:cpu>GFNI, Modrm|<gfni:w1>|NoSuf, { Imm8, RegXMM|Unspecified|BaseIndex, RegXMM }
-gf2p8affineinvqb<gfni>, 0x660f3acf, <gfni:cpu>GFNI, Modrm|<gfni:w1>|NoSuf, { Imm8, RegXMM|Unspecified|BaseIndex, RegXMM }
-gf2p8mulb<gfni>, 0x660f38cf, <gfni:cpu>GFNI, Modrm|<gfni:w0>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+<gfni:cpu:attr:vexw0:vexw1, +
+ $avx:&(AVX|AVX512VL):Vex128|EVex128|VexVVVV|Disp8MemShift=4|SSE2AVX:VexW0:VexW1, +
+ $sse::::>
+
+gf2p8affineqb<gfni>, 0x660f3ace, GFNI<gfni:cpu>, Modrm|<gfni:attr>|<gfni:vexw1>|NoSuf, { Imm8, RegXMM|Unspecified|BaseIndex, RegXMM }
+gf2p8affineinvqb<gfni>, 0x660f3acf, GFNI<gfni:cpu>, Modrm|<gfni:attr>|<gfni:vexw1>|NoSuf, { Imm8, RegXMM|Unspecified|BaseIndex, RegXMM }
+gf2p8mulb<gfni>, 0x660f38cf, GFNI<gfni:cpu>, Modrm|<gfni:attr>|<gfni:vexw0>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
// AVX instructions.
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH v2 3/4] x86/APX: further extend SSE2AVX coverage
2024-04-19 9:36 [PATCH v2 0/4] x86/APX: respect -msse2avx Jan Beulich
2024-04-19 9:37 ` [PATCH v2 1/4] x86: zap value-less Disp8MemShift from non-EVEX templates Jan Beulich
2024-04-19 9:37 ` [PATCH v2 2/4] x86/APX: extend SSE2AVX coverage Jan Beulich
@ 2024-04-19 9:38 ` Jan Beulich
2024-04-19 9:38 ` [PATCH v2 4/4] x86: tidy <sse*> templates Jan Beulich
3 siblings, 0 replies; 12+ messages in thread
From: Jan Beulich @ 2024-04-19 9:38 UTC (permalink / raw)
To: Binutils; +Cc: H.J. Lu, Lili Cui
Since {vex}/{vex3} are respected on legacy mnemonics when -msse2avx is
in use, {evex} should be respected, too. So far this is the case only
for insns where eGPR-s can come into play. Extend coverage to insns with
only %xmm register and possibly immediate operands.
---
This could be folded into the earlier patch, but as that one's large
enough already, I think it's beter to keep this separate.
---
v2: New.
--- /dev/null
+++ b/gas/testsuite/gas/i386/sse2avx-evex.d
@@ -0,0 +1,451 @@
+#as: -msse2avx
+#objdump: -dw
+#name: x86-64 SSE+ with \{evex\} prefix
+
+.*: file format .*
+
+Disassembly of section .text:
+
+0+ <sse2avx>:
+[ ]*[a-f0-9]+: 62 f1 7c 08 5b f4 \{evex\} vcvtdq2ps %xmm4,%xmm6
+[ ]*[a-f0-9]+: 62 f1 7c 08 5b 21 \{evex\} vcvtdq2ps \(%rcx\),%xmm4
+[ ]*[a-f0-9]+: 62 f1 ff 08 e6 f4 \{evex\} vcvtpd2dq %xmm4,%xmm6
+[ ]*[a-f0-9]+: 62 f1 ff 08 e6 21 \{evex\} vcvtpd2dqx \(%rcx\),%xmm4
+[ ]*[a-f0-9]+: 62 f1 fd 08 5a f4 \{evex\} vcvtpd2ps %xmm4,%xmm6
+[ ]*[a-f0-9]+: 62 f1 fd 08 5a 21 \{evex\} vcvtpd2psx \(%rcx\),%xmm4
+[ ]*[a-f0-9]+: 62 f1 7d 08 5b f4 \{evex\} vcvtps2dq %xmm4,%xmm6
+[ ]*[a-f0-9]+: 62 f1 7d 08 5b 21 \{evex\} vcvtps2dq \(%rcx\),%xmm4
+[ ]*[a-f0-9]+: 62 f1 fd 08 e6 f4 \{evex\} vcvttpd2dq %xmm4,%xmm6
+[ ]*[a-f0-9]+: 62 f1 fd 08 e6 21 \{evex\} vcvttpd2dqx \(%rcx\),%xmm4
+[ ]*[a-f0-9]+: 62 f1 7e 08 5b f4 \{evex\} vcvttps2dq %xmm4,%xmm6
+[ ]*[a-f0-9]+: 62 f1 7e 08 5b 21 \{evex\} vcvttps2dq \(%rcx\),%xmm4
+[ ]*[a-f0-9]+: 62 f1 fd 08 28 f4 \{evex\} vmovapd %xmm4,%xmm6
+[ ]*[a-f0-9]+: 62 f1 fd 08 28 21 \{evex\} vmovapd \(%rcx\),%xmm4
+[ ]*[a-f0-9]+: 62 f1 7c 08 28 f4 \{evex\} vmovaps %xmm4,%xmm6
+[ ]*[a-f0-9]+: 62 f1 7c 08 28 21 \{evex\} vmovaps \(%rcx\),%xmm4
+[ ]*[a-f0-9]+: 62 f1 7d 08 6f f4 vmovdqa32 %xmm4,%xmm6
+[ ]*[a-f0-9]+: 62 f1 7d 08 6f 21 vmovdqa32 \(%rcx\),%xmm4
+[ ]*[a-f0-9]+: 62 f1 7e 08 6f f4 vmovdqu32 %xmm4,%xmm6
+[ ]*[a-f0-9]+: 62 f1 7e 08 6f 21 vmovdqu32 \(%rcx\),%xmm4
+[ ]*[a-f0-9]+: 62 f1 7e 08 16 f4 \{evex\} vmovshdup %xmm4,%xmm6
+[ ]*[a-f0-9]+: 62 f1 7e 08 16 21 \{evex\} vmovshdup \(%rcx\),%xmm4
+[ ]*[a-f0-9]+: 62 f1 7e 08 12 f4 \{evex\} vmovsldup %xmm4,%xmm6
+[ ]*[a-f0-9]+: 62 f1 7e 08 12 21 \{evex\} vmovsldup \(%rcx\),%xmm4
+[ ]*[a-f0-9]+: 62 f1 fd 08 10 f4 \{evex\} vmovupd %xmm4,%xmm6
+[ ]*[a-f0-9]+: 62 f1 fd 08 10 21 \{evex\} vmovupd \(%rcx\),%xmm4
+[ ]*[a-f0-9]+: 62 f1 7c 08 10 f4 \{evex\} vmovups %xmm4,%xmm6
+[ ]*[a-f0-9]+: 62 f1 7c 08 10 21 \{evex\} vmovups \(%rcx\),%xmm4
+[ ]*[a-f0-9]+: 62 f2 7d 08 1c f4 \{evex\} vpabsb %xmm4,%xmm6
+[ ]*[a-f0-9]+: 62 f2 7d 08 1c 21 \{evex\} vpabsb \(%rcx\),%xmm4
+[ ]*[a-f0-9]+: 62 f2 7d 08 1d f4 \{evex\} vpabsw %xmm4,%xmm6
+[ ]*[a-f0-9]+: 62 f2 7d 08 1d 21 \{evex\} vpabsw \(%rcx\),%xmm4
+[ ]*[a-f0-9]+: 62 f2 7d 08 1e f4 \{evex\} vpabsd %xmm4,%xmm6
+[ ]*[a-f0-9]+: 62 f2 7d 08 1e 21 \{evex\} vpabsd \(%rcx\),%xmm4
+[ ]*[a-f0-9]+: 62 f2 7d 08 4c f4 vrcp14ps %xmm4,%xmm6
+[ ]*[a-f0-9]+: 62 f2 7d 08 4c 21 vrcp14ps \(%rcx\),%xmm4
+[ ]*[a-f0-9]+: 62 f2 7d 08 4e f4 vrsqrt14ps %xmm4,%xmm6
+[ ]*[a-f0-9]+: 62 f2 7d 08 4e 21 vrsqrt14ps \(%rcx\),%xmm4
+[ ]*[a-f0-9]+: 62 f1 fd 08 51 f4 \{evex\} vsqrtpd %xmm4,%xmm6
+[ ]*[a-f0-9]+: 62 f1 fd 08 51 21 \{evex\} vsqrtpd \(%rcx\),%xmm4
+[ ]*[a-f0-9]+: 62 f1 7c 08 51 f4 \{evex\} vsqrtps %xmm4,%xmm6
+[ ]*[a-f0-9]+: 62 f1 7c 08 51 21 \{evex\} vsqrtps \(%rcx\),%xmm4
+[ ]*[a-f0-9]+: 62 f1 fd 08 28 f4 \{evex\} vmovapd %xmm4,%xmm6
+[ ]*[a-f0-9]+: 62 f1 fd 08 29 21 \{evex\} vmovapd %xmm4,\(%rcx\)
+[ ]*[a-f0-9]+: 62 f1 7c 08 28 f4 \{evex\} vmovaps %xmm4,%xmm6
+[ ]*[a-f0-9]+: 62 f1 7c 08 29 21 \{evex\} vmovaps %xmm4,\(%rcx\)
+[ ]*[a-f0-9]+: 62 f1 7d 08 6f f4 vmovdqa32 %xmm4,%xmm6
+[ ]*[a-f0-9]+: 62 f1 7d 08 7f 21 vmovdqa32 %xmm4,\(%rcx\)
+[ ]*[a-f0-9]+: 62 f1 7e 08 6f f4 vmovdqu32 %xmm4,%xmm6
+[ ]*[a-f0-9]+: 62 f1 7e 08 7f 21 vmovdqu32 %xmm4,\(%rcx\)
+[ ]*[a-f0-9]+: 62 f1 fd 08 10 f4 \{evex\} vmovupd %xmm4,%xmm6
+[ ]*[a-f0-9]+: 62 f1 fd 08 11 21 \{evex\} vmovupd %xmm4,\(%rcx\)
+[ ]*[a-f0-9]+: 62 f1 7c 08 10 f4 \{evex\} vmovups %xmm4,%xmm6
+[ ]*[a-f0-9]+: 62 f1 7c 08 11 21 \{evex\} vmovups %xmm4,\(%rcx\)
+[ ]*[a-f0-9]+: 62 f1 7e 08 6f 21 vmovdqu32 \(%rcx\),%xmm4
+[ ]*[a-f0-9]+: 62 f2 7d 08 2a 21 \{evex\} vmovntdqa \(%rcx\),%xmm4
+[ ]*[a-f0-9]+: 62 f1 7d 08 e7 21 \{evex\} vmovntdq %xmm4,\(%rcx\)
+[ ]*[a-f0-9]+: 62 f1 fd 08 2b 21 \{evex\} vmovntpd %xmm4,\(%rcx\)
+[ ]*[a-f0-9]+: 62 f1 7c 08 2b 21 \{evex\} vmovntps %xmm4,\(%rcx\)
+[ ]*[a-f0-9]+: 62 f1 cd 08 58 f4 \{evex\} vaddpd %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 cd 08 58 31 \{evex\} vaddpd \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4c 08 58 f4 \{evex\} vaddps %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4c 08 58 31 \{evex\} vaddps \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f2 4d 08 dc f4 \{evex\} vaesenc %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f2 4d 08 dc 31 \{evex\} vaesenc \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f2 4d 08 dd f4 \{evex\} vaesenclast %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f2 4d 08 dd 31 \{evex\} vaesenclast \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f2 4d 08 de f4 \{evex\} vaesdec %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f2 4d 08 de 31 \{evex\} vaesdec \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f2 4d 08 df f4 \{evex\} vaesdeclast %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f2 4d 08 df 31 \{evex\} vaesdeclast \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 cd 08 55 f4 \{evex\} vandnpd %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 cd 08 55 31 \{evex\} vandnpd \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4c 08 55 f4 \{evex\} vandnps %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4c 08 55 31 \{evex\} vandnps \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 cd 08 54 f4 \{evex\} vandpd %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 cd 08 54 31 \{evex\} vandpd \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4c 08 54 f4 \{evex\} vandps %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4c 08 54 31 \{evex\} vandps \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 cd 08 5e f4 \{evex\} vdivpd %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 cd 08 5e 31 \{evex\} vdivpd \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4c 08 5e f4 \{evex\} vdivps %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4c 08 5e 31 \{evex\} vdivps \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f2 4d 08 cf f4 \{evex\} vgf2p8mulb %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f2 4d 08 cf 31 \{evex\} vgf2p8mulb \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 cd 08 5f f4 \{evex\} vmaxpd %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 cd 08 5f 31 \{evex\} vmaxpd \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4c 08 5f f4 \{evex\} vmaxps %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4c 08 5f 31 \{evex\} vmaxps \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 cd 08 5d f4 \{evex\} vminpd %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 cd 08 5d 31 \{evex\} vminpd \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4c 08 5d f4 \{evex\} vminps %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4c 08 5d 31 \{evex\} vminps \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 cd 08 59 f4 \{evex\} vmulpd %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 cd 08 59 31 \{evex\} vmulpd \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4c 08 59 f4 \{evex\} vmulps %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4c 08 59 31 \{evex\} vmulps \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 cd 08 56 f4 \{evex\} vorpd %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 cd 08 56 31 \{evex\} vorpd \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4c 08 56 f4 \{evex\} vorps %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4c 08 56 31 \{evex\} vorps \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 63 f4 \{evex\} vpacksswb %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 63 31 \{evex\} vpacksswb \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 6b f4 \{evex\} vpackssdw %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 6b 31 \{evex\} vpackssdw \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 67 f4 \{evex\} vpackuswb %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 67 31 \{evex\} vpackuswb \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f2 4d 08 2b f4 \{evex\} vpackusdw %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f2 4d 08 2b 31 \{evex\} vpackusdw \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 fc f4 \{evex\} vpaddb %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 fc 31 \{evex\} vpaddb \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 fd f4 \{evex\} vpaddw %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 fd 31 \{evex\} vpaddw \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 fe f4 \{evex\} vpaddd %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 fe 31 \{evex\} vpaddd \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 cd 08 d4 f4 \{evex\} vpaddq %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 cd 08 d4 31 \{evex\} vpaddq \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 ec f4 \{evex\} vpaddsb %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 ec 31 \{evex\} vpaddsb \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 ed f4 \{evex\} vpaddsw %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 ed 31 \{evex\} vpaddsw \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 dc f4 \{evex\} vpaddusb %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 dc 31 \{evex\} vpaddusb \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 dd f4 \{evex\} vpaddusw %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 dd 31 \{evex\} vpaddusw \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 db f4 vpandd %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 db 31 vpandd \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 df f4 vpandnd %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 df 31 vpandnd \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 e0 f4 \{evex\} vpavgb %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 e0 31 \{evex\} vpavgb \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 e3 f4 \{evex\} vpavgw %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 e3 31 \{evex\} vpavgw \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f3 4d 08 44 f4 00 \{evex\} vpclmullqlqdq %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f3 4d 08 44 31 00 \{evex\} vpclmullqlqdq \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f3 4d 08 44 f4 01 \{evex\} vpclmulhqlqdq %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f3 4d 08 44 31 01 \{evex\} vpclmulhqlqdq \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f3 4d 08 44 f4 10 \{evex\} vpclmullqhqdq %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f3 4d 08 44 31 10 \{evex\} vpclmullqhqdq \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f3 4d 08 44 f4 11 \{evex\} vpclmulhqhqdq %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f3 4d 08 44 31 11 \{evex\} vpclmulhqhqdq \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 f5 f4 \{evex\} vpmaddwd %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 f5 31 \{evex\} vpmaddwd \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f2 4d 08 04 f4 \{evex\} vpmaddubsw %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f2 4d 08 04 31 \{evex\} vpmaddubsw \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f2 4d 08 3c f4 \{evex\} vpmaxsb %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f2 4d 08 3c 31 \{evex\} vpmaxsb \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 ee f4 \{evex\} vpmaxsw %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 ee 31 \{evex\} vpmaxsw \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f2 4d 08 3d f4 \{evex\} vpmaxsd %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f2 4d 08 3d 31 \{evex\} vpmaxsd \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 de f4 \{evex\} vpmaxub %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 de 31 \{evex\} vpmaxub \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f2 4d 08 3e f4 \{evex\} vpmaxuw %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f2 4d 08 3e 31 \{evex\} vpmaxuw \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f2 4d 08 3f f4 \{evex\} vpmaxud %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f2 4d 08 3f 31 \{evex\} vpmaxud \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f2 4d 08 38 f4 \{evex\} vpminsb %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f2 4d 08 38 31 \{evex\} vpminsb \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 ea f4 \{evex\} vpminsw %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 ea 31 \{evex\} vpminsw \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f2 4d 08 39 f4 \{evex\} vpminsd %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f2 4d 08 39 31 \{evex\} vpminsd \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 da f4 \{evex\} vpminub %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 da 31 \{evex\} vpminub \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f2 4d 08 3a f4 \{evex\} vpminuw %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f2 4d 08 3a 31 \{evex\} vpminuw \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f2 4d 08 3b f4 \{evex\} vpminud %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f2 4d 08 3b 31 \{evex\} vpminud \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 e4 f4 \{evex\} vpmulhuw %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 e4 31 \{evex\} vpmulhuw \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f2 4d 08 0b f4 \{evex\} vpmulhrsw %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f2 4d 08 0b 31 \{evex\} vpmulhrsw \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 e5 f4 \{evex\} vpmulhw %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 e5 31 \{evex\} vpmulhw \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 d5 f4 \{evex\} vpmullw %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 d5 31 \{evex\} vpmullw \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f2 4d 08 40 f4 \{evex\} vpmulld %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f2 4d 08 40 31 \{evex\} vpmulld \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 cd 08 f4 f4 \{evex\} vpmuludq %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 cd 08 f4 31 \{evex\} vpmuludq \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f2 cd 08 28 f4 \{evex\} vpmuldq %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f2 cd 08 28 31 \{evex\} vpmuldq \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 eb f4 vpord %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 eb 31 vpord \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 f6 f4 \{evex\} vpsadbw %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 f6 31 \{evex\} vpsadbw \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f2 4d 08 00 f4 \{evex\} vpshufb %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f2 4d 08 00 31 \{evex\} vpshufb \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 f1 f4 \{evex\} vpsllw %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 f1 31 \{evex\} vpsllw \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 f2 f4 \{evex\} vpslld %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 f2 31 \{evex\} vpslld \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 cd 08 f3 f4 \{evex\} vpsllq %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 cd 08 f3 31 \{evex\} vpsllq \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 e1 f4 \{evex\} vpsraw %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 e1 31 \{evex\} vpsraw \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 e2 f4 \{evex\} vpsrad %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 e2 31 \{evex\} vpsrad \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 d1 f4 \{evex\} vpsrlw %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 d1 31 \{evex\} vpsrlw \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 d2 f4 \{evex\} vpsrld %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 d2 31 \{evex\} vpsrld \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 cd 08 d3 f4 \{evex\} vpsrlq %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 cd 08 d3 31 \{evex\} vpsrlq \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 f8 f4 \{evex\} vpsubb %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 f8 31 \{evex\} vpsubb \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 f9 f4 \{evex\} vpsubw %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 f9 31 \{evex\} vpsubw \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 fa f4 \{evex\} vpsubd %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 fa 31 \{evex\} vpsubd \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 cd 08 fb f4 \{evex\} vpsubq %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 cd 08 fb 31 \{evex\} vpsubq \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 e8 f4 \{evex\} vpsubsb %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 e8 31 \{evex\} vpsubsb \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 e9 f4 \{evex\} vpsubsw %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 e9 31 \{evex\} vpsubsw \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 d8 f4 \{evex\} vpsubusb %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 d8 31 \{evex\} vpsubusb \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 d9 f4 \{evex\} vpsubusw %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 d9 31 \{evex\} vpsubusw \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 68 f4 \{evex\} vpunpckhbw %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 68 31 \{evex\} vpunpckhbw \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 69 f4 \{evex\} vpunpckhwd %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 69 31 \{evex\} vpunpckhwd \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 6a f4 \{evex\} vpunpckhdq %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 6a 31 \{evex\} vpunpckhdq \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 cd 08 6d f4 \{evex\} vpunpckhqdq %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 cd 08 6d 31 \{evex\} vpunpckhqdq \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 60 f4 \{evex\} vpunpcklbw %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 60 31 \{evex\} vpunpcklbw \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 61 f4 \{evex\} vpunpcklwd %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 61 31 \{evex\} vpunpcklwd \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 62 f4 \{evex\} vpunpckldq %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 62 31 \{evex\} vpunpckldq \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 cd 08 6c f4 \{evex\} vpunpcklqdq %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 cd 08 6c 31 \{evex\} vpunpcklqdq \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 ef f4 vpxord %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4d 08 ef 31 vpxord \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 cd 08 5c f4 \{evex\} vsubpd %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 cd 08 5c 31 \{evex\} vsubpd \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4c 08 5c f4 \{evex\} vsubps %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4c 08 5c 31 \{evex\} vsubps \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 cd 08 15 f4 \{evex\} vunpckhpd %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 cd 08 15 31 \{evex\} vunpckhpd \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4c 08 15 f4 \{evex\} vunpckhps %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4c 08 15 31 \{evex\} vunpckhps \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 cd 08 14 f4 \{evex\} vunpcklpd %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 cd 08 14 31 \{evex\} vunpcklpd \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4c 08 14 f4 \{evex\} vunpcklps %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4c 08 14 31 \{evex\} vunpcklps \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 cd 08 57 f4 \{evex\} vxorpd %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 cd 08 57 31 \{evex\} vxorpd \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4c 08 57 f4 \{evex\} vxorps %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4c 08 57 31 \{evex\} vxorps \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 7d 08 70 f4 64 \{evex\} vpshufd \$0x64,%xmm4,%xmm6
+[ ]*[a-f0-9]+: 62 f1 7d 08 70 31 64 \{evex\} vpshufd \$0x64,\(%rcx\),%xmm6
+[ ]*[a-f0-9]+: 62 f1 7e 08 70 f4 64 \{evex\} vpshufhw \$0x64,%xmm4,%xmm6
+[ ]*[a-f0-9]+: 62 f1 7e 08 70 31 64 \{evex\} vpshufhw \$0x64,\(%rcx\),%xmm6
+[ ]*[a-f0-9]+: 62 f1 7f 08 70 f4 64 \{evex\} vpshuflw \$0x64,%xmm4,%xmm6
+[ ]*[a-f0-9]+: 62 f1 7f 08 70 31 64 \{evex\} vpshuflw \$0x64,\(%rcx\),%xmm6
+[ ]*[a-f0-9]+: 62 f3 fd 08 09 f4 04 vrndscalepd \$0x4,%xmm4,%xmm6
+[ ]*[a-f0-9]+: 62 f3 fd 08 09 31 04 vrndscalepd \$0x4,\(%rcx\),%xmm6
+[ ]*[a-f0-9]+: 62 f3 7d 08 08 f4 04 vrndscaleps \$0x4,%xmm4,%xmm6
+[ ]*[a-f0-9]+: 62 f3 7d 08 08 31 04 vrndscaleps \$0x4,\(%rcx\),%xmm6
+[ ]*[a-f0-9]+: 62 f3 cd 08 ce f4 64 \{evex\} vgf2p8affineqb \$0x64,%xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f3 cd 08 ce 31 64 \{evex\} vgf2p8affineqb \$0x64,\(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f3 cd 08 cf f4 64 \{evex\} vgf2p8affineinvqb \$0x64,%xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f3 cd 08 cf 31 64 \{evex\} vgf2p8affineinvqb \$0x64,\(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f3 4d 08 0f f4 64 \{evex\} vpalignr \$0x64,%xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f3 4d 08 0f 31 64 \{evex\} vpalignr \$0x64,\(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f3 4d 08 44 f4 64 \{evex\} vpclmulqdq \$0x64,%xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f3 4d 08 44 31 64 \{evex\} vpclmulqdq \$0x64,\(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 cd 08 c6 f4 64 \{evex\} vshufpd \$0x64,%xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 cd 08 c6 31 64 \{evex\} vshufpd \$0x64,\(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4c 08 c6 f4 64 \{evex\} vshufps \$0x64,%xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4c 08 c6 31 64 \{evex\} vshufps \$0x64,\(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 fd 08 2f f4 \{evex\} vcomisd %xmm4,%xmm6
+[ ]*[a-f0-9]+: 62 f1 fd 08 2f 21 \{evex\} vcomisd \(%rcx\),%xmm4
+[ ]*[a-f0-9]+: 62 f1 7e 08 e6 f4 \{evex\} vcvtdq2pd %xmm4,%xmm6
+[ ]*[a-f0-9]+: 62 f1 7e 08 e6 21 \{evex\} vcvtdq2pd \(%rcx\),%xmm4
+[ ]*[a-f0-9]+: 62 f1 7e 08 e6 21 \{evex\} vcvtdq2pd \(%rcx\),%xmm4
+[ ]*[a-f0-9]+: 62 f1 7c 08 5a f4 \{evex\} vcvtps2pd %xmm4,%xmm6
+[ ]*[a-f0-9]+: 62 f1 7c 08 5a 21 \{evex\} vcvtps2pd \(%rcx\),%xmm4
+[ ]*[a-f0-9]+: 62 f1 ff 08 12 f4 \{evex\} vmovddup %xmm4,%xmm6
+[ ]*[a-f0-9]+: 62 f1 ff 08 12 21 \{evex\} vmovddup \(%rcx\),%xmm4
+[ ]*[a-f0-9]+: 62 f2 7d 08 20 f4 \{evex\} vpmovsxbw %xmm4,%xmm6
+[ ]*[a-f0-9]+: 62 f2 7d 08 20 21 \{evex\} vpmovsxbw \(%rcx\),%xmm4
+[ ]*[a-f0-9]+: 62 f2 7d 08 23 f4 \{evex\} vpmovsxwd %xmm4,%xmm6
+[ ]*[a-f0-9]+: 62 f2 7d 08 23 21 \{evex\} vpmovsxwd \(%rcx\),%xmm4
+[ ]*[a-f0-9]+: 62 f2 7d 08 25 f4 \{evex\} vpmovsxdq %xmm4,%xmm6
+[ ]*[a-f0-9]+: 62 f2 7d 08 25 21 \{evex\} vpmovsxdq \(%rcx\),%xmm4
+[ ]*[a-f0-9]+: 62 f2 7d 08 30 f4 \{evex\} vpmovzxbw %xmm4,%xmm6
+[ ]*[a-f0-9]+: 62 f2 7d 08 30 21 \{evex\} vpmovzxbw \(%rcx\),%xmm4
+[ ]*[a-f0-9]+: 62 f2 7d 08 33 f4 \{evex\} vpmovzxwd %xmm4,%xmm6
+[ ]*[a-f0-9]+: 62 f2 7d 08 33 21 \{evex\} vpmovzxwd \(%rcx\),%xmm4
+[ ]*[a-f0-9]+: 62 f2 7d 08 35 f4 \{evex\} vpmovzxdq %xmm4,%xmm6
+[ ]*[a-f0-9]+: 62 f2 7d 08 35 21 \{evex\} vpmovzxdq \(%rcx\),%xmm4
+[ ]*[a-f0-9]+: 62 f1 fd 08 2e f4 \{evex\} vucomisd %xmm4,%xmm6
+[ ]*[a-f0-9]+: 62 f1 fd 08 2e 21 \{evex\} vucomisd \(%rcx\),%xmm4
+[ ]*[a-f0-9]+: 62 f1 ff 08 10 21 \{evex\} vmovsd \(%rcx\),%xmm4
+[ ]*[a-f0-9]+: 62 f1 fd 08 13 21 \{evex\} vmovlpd %xmm4,\(%rcx\)
+[ ]*[a-f0-9]+: 62 f1 7c 08 13 21 \{evex\} vmovlps %xmm4,\(%rcx\)
+[ ]*[a-f0-9]+: 62 f1 fd 08 17 21 \{evex\} vmovhpd %xmm4,\(%rcx\)
+[ ]*[a-f0-9]+: 62 f1 7c 08 17 21 \{evex\} vmovhps %xmm4,\(%rcx\)
+[ ]*[a-f0-9]+: 62 f1 ff 08 11 21 \{evex\} vmovsd %xmm4,\(%rcx\)
+[ ]*[a-f0-9]+: 62 f1 fd 08 7e e1 \{evex\} vmovq %xmm4,%rcx
+[ ]*[a-f0-9]+: 62 f1 fd 08 6e e1 \{evex\} vmovq %rcx,%xmm4
+[ ]*[a-f0-9]+: 62 f1 fd 08 7e e1 \{evex\} vmovq %xmm4,%rcx
+[ ]*[a-f0-9]+: 62 f1 fd 08 6e e1 \{evex\} vmovq %rcx,%xmm4
+[ ]*[a-f0-9]+: 62 f1 fd 08 d6 21 \{evex\} vmovq %xmm4,\(%rcx\)
+[ ]*[a-f0-9]+: 62 f1 fe 08 7e 21 \{evex\} vmovq \(%rcx\),%xmm4
+[ ]*[a-f0-9]+: 62 f1 7f 08 2d cc \{evex\} vcvtsd2si %xmm4,%ecx
+[ ]*[a-f0-9]+: 62 f1 7f 08 2d 09 \{evex\} vcvtsd2si \(%rcx\),%ecx
+[ ]*[a-f0-9]+: 62 f1 7f 08 2c cc \{evex\} vcvttsd2si %xmm4,%ecx
+[ ]*[a-f0-9]+: 62 f1 7f 08 2c 09 \{evex\} vcvttsd2si \(%rcx\),%ecx
+[ ]*[a-f0-9]+: 62 f1 ff 08 2d cc \{evex\} vcvtsd2si %xmm4,%rcx
+[ ]*[a-f0-9]+: 62 f1 ff 08 2d 09 \{evex\} vcvtsd2si \(%rcx\),%rcx
+[ ]*[a-f0-9]+: 62 f1 ff 08 2c cc \{evex\} vcvttsd2si %xmm4,%rcx
+[ ]*[a-f0-9]+: 62 f1 ff 08 2c 09 \{evex\} vcvttsd2si \(%rcx\),%rcx
+[ ]*[a-f0-9]+: 62 f1 df 08 2a e1 \{evex\} vcvtsi2sd %rcx,%xmm4,%xmm4
+[ ]*[a-f0-9]+: 62 f1 df 08 2a 21 \{evex\} vcvtsi2sdq \(%rcx\),%xmm4,%xmm4
+[ ]*[a-f0-9]+: 62 f1 de 08 2a e1 \{evex\} vcvtsi2ss %rcx,%xmm4,%xmm4
+[ ]*[a-f0-9]+: 62 f1 de 08 2a 21 \{evex\} vcvtsi2ssq \(%rcx\),%xmm4,%xmm4
+[ ]*[a-f0-9]+: 62 f3 dd 08 22 e1 64 \{evex\} vpinsrq \$0x64,%rcx,%xmm4,%xmm4
+[ ]*[a-f0-9]+: 62 f3 dd 08 22 21 64 \{evex\} vpinsrq \$0x64,\(%rcx\),%xmm4,%xmm4
+[ ]*[a-f0-9]+: 62 f3 fd 08 16 e1 64 \{evex\} vpextrq \$0x64,%xmm4,%rcx
+[ ]*[a-f0-9]+: 62 f3 fd 08 16 21 64 \{evex\} vpextrq \$0x64,%xmm4,\(%rcx\)
+[ ]*[a-f0-9]+: 62 f1 dd 08 12 21 \{evex\} vmovlpd \(%rcx\),%xmm4,%xmm4
+[ ]*[a-f0-9]+: 62 f1 5c 08 12 21 \{evex\} vmovlps \(%rcx\),%xmm4,%xmm4
+[ ]*[a-f0-9]+: 62 f1 dd 08 16 21 \{evex\} vmovhpd \(%rcx\),%xmm4,%xmm4
+[ ]*[a-f0-9]+: 62 f1 5c 08 16 21 \{evex\} vmovhps \(%rcx\),%xmm4,%xmm4
+[ ]*[a-f0-9]+: 62 f3 cd 08 0b f4 04 vrndscalesd \$0x4,%xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f3 cd 08 0b 31 04 vrndscalesd \$0x4,\(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 cf 08 58 f4 \{evex\} vaddsd %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 cf 08 58 31 \{evex\} vaddsd \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 cf 08 5a f4 \{evex\} vcvtsd2ss %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 cf 08 5a 31 \{evex\} vcvtsd2ss \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 cf 08 5e f4 \{evex\} vdivsd %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 cf 08 5e 31 \{evex\} vdivsd \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 cf 08 5f f4 \{evex\} vmaxsd %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 cf 08 5f 31 \{evex\} vmaxsd \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 cf 08 5d f4 \{evex\} vminsd %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 cf 08 5d 31 \{evex\} vminsd \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 cf 08 59 f4 \{evex\} vmulsd %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 cf 08 59 31 \{evex\} vmulsd \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 cf 08 51 f4 \{evex\} vsqrtsd %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 cf 08 51 31 \{evex\} vsqrtsd \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 cf 08 5c f4 \{evex\} vsubsd %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 cf 08 5c 31 \{evex\} vsubsd \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4e 08 58 f4 \{evex\} vaddss %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4e 08 58 31 \{evex\} vaddss \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4e 08 5a f4 \{evex\} vcvtss2sd %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4e 08 5a 31 \{evex\} vcvtss2sd \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4e 08 5e f4 \{evex\} vdivss %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4e 08 5e 31 \{evex\} vdivss \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4e 08 5f f4 \{evex\} vmaxss %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4e 08 5f 31 \{evex\} vmaxss \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4e 08 5d f4 \{evex\} vminss %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4e 08 5d 31 \{evex\} vminss \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4e 08 59 f4 \{evex\} vmulss %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4e 08 59 31 \{evex\} vmulss \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f2 4d 08 4d f4 vrcp14ss %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f2 4d 08 4d 31 vrcp14ss \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f2 4d 08 4f f4 vrsqrt14ss %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f2 4d 08 4f 31 vrsqrt14ss \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4e 08 51 f4 \{evex\} vsqrtss %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4e 08 51 31 \{evex\} vsqrtss \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4e 08 5c f4 \{evex\} vsubss %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4e 08 5c 31 \{evex\} vsubss \(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 7c 08 2f f4 \{evex\} vcomiss %xmm4,%xmm6
+[ ]*[a-f0-9]+: 62 f1 7c 08 2f 21 \{evex\} vcomiss \(%rcx\),%xmm4
+[ ]*[a-f0-9]+: 62 f2 7d 08 21 f4 \{evex\} vpmovsxbd %xmm4,%xmm6
+[ ]*[a-f0-9]+: 62 f2 7d 08 21 21 \{evex\} vpmovsxbd \(%rcx\),%xmm4
+[ ]*[a-f0-9]+: 62 f2 7d 08 24 f4 \{evex\} vpmovsxwq %xmm4,%xmm6
+[ ]*[a-f0-9]+: 62 f2 7d 08 24 21 \{evex\} vpmovsxwq \(%rcx\),%xmm4
+[ ]*[a-f0-9]+: 62 f2 7d 08 31 f4 \{evex\} vpmovzxbd %xmm4,%xmm6
+[ ]*[a-f0-9]+: 62 f2 7d 08 31 21 \{evex\} vpmovzxbd \(%rcx\),%xmm4
+[ ]*[a-f0-9]+: 62 f2 7d 08 34 f4 \{evex\} vpmovzxwq %xmm4,%xmm6
+[ ]*[a-f0-9]+: 62 f2 7d 08 34 21 \{evex\} vpmovzxwq \(%rcx\),%xmm4
+[ ]*[a-f0-9]+: 62 f1 7c 08 2e f4 \{evex\} vucomiss %xmm4,%xmm6
+[ ]*[a-f0-9]+: 62 f1 7c 08 2e 21 \{evex\} vucomiss \(%rcx\),%xmm4
+[ ]*[a-f0-9]+: 62 f1 7e 08 10 21 \{evex\} vmovss \(%rcx\),%xmm4
+[ ]*[a-f0-9]+: 62 f1 7e 08 11 21 \{evex\} vmovss %xmm4,\(%rcx\)
+[ ]*[a-f0-9]+: 62 f1 7d 08 7e e1 \{evex\} vmovd %xmm4,%ecx
+[ ]*[a-f0-9]+: 62 f1 7d 08 7e 21 \{evex\} vmovd %xmm4,\(%rcx\)
+[ ]*[a-f0-9]+: 62 f1 7d 08 6e e1 \{evex\} vmovd %ecx,%xmm4
+[ ]*[a-f0-9]+: 62 f1 7d 08 6e 21 \{evex\} vmovd \(%rcx\),%xmm4
+[ ]*[a-f0-9]+: 62 f1 7e 08 2d cc \{evex\} vcvtss2si %xmm4,%ecx
+[ ]*[a-f0-9]+: 62 f1 7e 08 2d 09 \{evex\} vcvtss2si \(%rcx\),%ecx
+[ ]*[a-f0-9]+: 62 f1 7e 08 2c cc \{evex\} vcvttss2si %xmm4,%ecx
+[ ]*[a-f0-9]+: 62 f1 7e 08 2c 09 \{evex\} vcvttss2si \(%rcx\),%ecx
+[ ]*[a-f0-9]+: 62 f1 fe 08 2d cc \{evex\} vcvtss2si %xmm4,%rcx
+[ ]*[a-f0-9]+: 62 f1 fe 08 2d 09 \{evex\} vcvtss2si \(%rcx\),%rcx
+[ ]*[a-f0-9]+: 62 f1 fe 08 2c cc \{evex\} vcvttss2si %xmm4,%rcx
+[ ]*[a-f0-9]+: 62 f1 fe 08 2c 09 \{evex\} vcvttss2si \(%rcx\),%rcx
+[ ]*[a-f0-9]+: 62 f3 fd 08 17 e1 64 \{evex\} vextractps \$0x64,%xmm4,%ecx
+[ ]*[a-f0-9]+: 62 f3 7d 08 17 21 64 \{evex\} vextractps \$0x64,%xmm4,\(%rcx\)
+[ ]*[a-f0-9]+: 62 f3 7d 08 16 e1 64 \{evex\} vpextrd \$0x64,%xmm4,%ecx
+[ ]*[a-f0-9]+: 62 f3 7d 08 16 21 64 \{evex\} vpextrd \$0x64,%xmm4,\(%rcx\)
+[ ]*[a-f0-9]+: 62 f3 7d 08 17 e1 64 \{evex\} vextractps \$0x64,%xmm4,%ecx
+[ ]*[a-f0-9]+: 62 f3 7d 08 17 21 64 \{evex\} vextractps \$0x64,%xmm4,\(%rcx\)
+[ ]*[a-f0-9]+: 62 f1 5f 08 2a e1 \{evex\} vcvtsi2sd %ecx,%xmm4,%xmm4
+[ ]*[a-f0-9]+: 62 f1 5f 08 2a 21 \{evex\} vcvtsi2sdl \(%rcx\),%xmm4,%xmm4
+[ ]*[a-f0-9]+: 62 f1 5e 08 2a e1 \{evex\} vcvtsi2ss %ecx,%xmm4,%xmm4
+[ ]*[a-f0-9]+: 62 f1 5e 08 2a 21 \{evex\} vcvtsi2ssl \(%rcx\),%xmm4,%xmm4
+[ ]*[a-f0-9]+: 62 f3 4d 08 21 f4 64 \{evex\} vinsertps \$0x64,%xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f3 4d 08 21 31 64 \{evex\} vinsertps \$0x64,\(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f3 4d 08 0a f4 04 vrndscaless \$0x4,%xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f3 4d 08 0a 31 04 vrndscaless \$0x4,\(%rcx\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f2 7d 08 22 f4 \{evex\} vpmovsxbq %xmm4,%xmm6
+[ ]*[a-f0-9]+: 62 f2 7d 08 22 21 \{evex\} vpmovsxbq \(%rcx\),%xmm4
+[ ]*[a-f0-9]+: 62 f2 7d 08 32 f4 \{evex\} vpmovzxbq %xmm4,%xmm6
+[ ]*[a-f0-9]+: 62 f2 7d 08 32 21 \{evex\} vpmovzxbq \(%rcx\),%xmm4
+[ ]*[a-f0-9]+: 62 f1 7d 08 c5 cc 64 \{evex\} vpextrw \$0x64,%xmm4,%ecx
+[ ]*[a-f0-9]+: 62 f1 7d 08 c5 cc 64 \{evex\} vpextrw \$0x64,%xmm4,%ecx
+[ ]*[a-f0-9]+: 62 f3 7d 08 15 21 64 \{evex\} vpextrw \$0x64,%xmm4,\(%rcx\)
+[ ]*[a-f0-9]+: 62 f1 5d 08 c4 e1 64 \{evex\} vpinsrw \$0x64,%ecx,%xmm4,%xmm4
+[ ]*[a-f0-9]+: 62 f1 5d 08 c4 e1 64 \{evex\} vpinsrw \$0x64,%ecx,%xmm4,%xmm4
+[ ]*[a-f0-9]+: 62 f1 5d 08 c4 21 64 \{evex\} vpinsrw \$0x64,\(%rcx\),%xmm4,%xmm4
+[ ]*[a-f0-9]+: 62 f3 7d 08 14 e1 64 \{evex\} vpextrb \$0x64,%xmm4,%ecx
+[ ]*[a-f0-9]+: 62 f3 7d 08 14 e1 64 \{evex\} vpextrb \$0x64,%xmm4,%ecx
+[ ]*[a-f0-9]+: 62 f3 7d 08 14 21 64 \{evex\} vpextrb \$0x64,%xmm4,\(%rcx\)
+[ ]*[a-f0-9]+: 62 f3 5d 08 20 e1 64 \{evex\} vpinsrb \$0x64,%ecx,%xmm4,%xmm4
+[ ]*[a-f0-9]+: 62 f3 5d 08 20 e1 64 \{evex\} vpinsrb \$0x64,%ecx,%xmm4,%xmm4
+[ ]*[a-f0-9]+: 62 f3 5d 08 20 21 64 \{evex\} vpinsrb \$0x64,\(%rcx\),%xmm4,%xmm4
+[ ]*[a-f0-9]+: 62 f1 fe 08 7e f4 \{evex\} vmovq %xmm4,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4c 08 12 f4 \{evex\} vmovhlps %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4c 08 16 f4 \{evex\} vmovlhps %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 cf 08 10 f4 \{evex\} vmovsd %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 4e 08 10 f4 \{evex\} vmovss %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f1 5d 08 72 f4 64 \{evex\} vpslld \$0x64,%xmm4,%xmm4
+[ ]*[a-f0-9]+: 62 f1 5d 08 73 fc 64 \{evex\} vpslldq \$0x64,%xmm4,%xmm4
+[ ]*[a-f0-9]+: 62 f1 dd 08 73 f4 64 \{evex\} vpsllq \$0x64,%xmm4,%xmm4
+[ ]*[a-f0-9]+: 62 f1 5d 08 71 f4 64 \{evex\} vpsllw \$0x64,%xmm4,%xmm4
+[ ]*[a-f0-9]+: 62 f1 5d 08 72 e4 64 \{evex\} vpsrad \$0x64,%xmm4,%xmm4
+[ ]*[a-f0-9]+: 62 f1 5d 08 71 e4 64 \{evex\} vpsraw \$0x64,%xmm4,%xmm4
+[ ]*[a-f0-9]+: 62 f1 5d 08 72 d4 64 \{evex\} vpsrld \$0x64,%xmm4,%xmm4
+[ ]*[a-f0-9]+: 62 f1 5d 08 73 dc 64 \{evex\} vpsrldq \$0x64,%xmm4,%xmm4
+[ ]*[a-f0-9]+: 62 f1 dd 08 73 d4 64 \{evex\} vpsrlq \$0x64,%xmm4,%xmm4
+[ ]*[a-f0-9]+: 62 f1 5d 08 71 d4 64 \{evex\} vpsrlw \$0x64,%xmm4,%xmm4
+#pass
--- /dev/null
+++ b/gas/testsuite/gas/i386/sse2avx-evex.s
@@ -0,0 +1,522 @@
+ .text
+ .sse_check warning
+sse2avx:
+
+# Tests for op xmm/mem128, xmm
+ {evex} cvtdq2ps %xmm4,%xmm6
+ {evex} cvtdq2ps (%rcx),%xmm4
+ {evex} cvtpd2dq %xmm4,%xmm6
+ {evex} cvtpd2dq (%rcx),%xmm4
+ {evex} cvtpd2ps %xmm4,%xmm6
+ {evex} cvtpd2ps (%rcx),%xmm4
+ {evex} cvtps2dq %xmm4,%xmm6
+ {evex} cvtps2dq (%rcx),%xmm4
+ {evex} cvttpd2dq %xmm4,%xmm6
+ {evex} cvttpd2dq (%rcx),%xmm4
+ {evex} cvttps2dq %xmm4,%xmm6
+ {evex} cvttps2dq (%rcx),%xmm4
+ {evex} movapd %xmm4,%xmm6
+ {evex} movapd (%rcx),%xmm4
+ {evex} movaps %xmm4,%xmm6
+ {evex} movaps (%rcx),%xmm4
+ {evex} movdqa %xmm4,%xmm6
+ {evex} movdqa (%rcx),%xmm4
+ {evex} movdqu %xmm4,%xmm6
+ {evex} movdqu (%rcx),%xmm4
+ {evex} movshdup %xmm4,%xmm6
+ {evex} movshdup (%rcx),%xmm4
+ {evex} movsldup %xmm4,%xmm6
+ {evex} movsldup (%rcx),%xmm4
+ {evex} movupd %xmm4,%xmm6
+ {evex} movupd (%rcx),%xmm4
+ {evex} movups %xmm4,%xmm6
+ {evex} movups (%rcx),%xmm4
+ {evex} pabsb %xmm4,%xmm6
+ {evex} pabsb (%rcx),%xmm4
+ {evex} pabsw %xmm4,%xmm6
+ {evex} pabsw (%rcx),%xmm4
+ {evex} pabsd %xmm4,%xmm6
+ {evex} pabsd (%rcx),%xmm4
+ {evex} rcpps %xmm4,%xmm6
+ {evex} rcpps (%rcx),%xmm4
+ {evex} rsqrtps %xmm4,%xmm6
+ {evex} rsqrtps (%rcx),%xmm4
+ {evex} sqrtpd %xmm4,%xmm6
+ {evex} sqrtpd (%rcx),%xmm4
+ {evex} sqrtps %xmm4,%xmm6
+ {evex} sqrtps (%rcx),%xmm4
+
+# Tests for op xmm, xmm/mem128
+ {evex} movapd %xmm4,%xmm6
+ {evex} movapd %xmm4,(%rcx)
+ {evex} movaps %xmm4,%xmm6
+ {evex} movaps %xmm4,(%rcx)
+ {evex} movdqa %xmm4,%xmm6
+ {evex} movdqa %xmm4,(%rcx)
+ {evex} movdqu %xmm4,%xmm6
+ {evex} movdqu %xmm4,(%rcx)
+ {evex} movupd %xmm4,%xmm6
+ {evex} movupd %xmm4,(%rcx)
+ {evex} movups %xmm4,%xmm6
+ {evex} movups %xmm4,(%rcx)
+
+# Tests for op mem128, xmm
+ {evex} lddqu (%rcx),%xmm4
+ {evex} movntdqa (%rcx),%xmm4
+
+# Tests for op xmm, mem128
+ {evex} movntdq %xmm4,(%rcx)
+ {evex} movntpd %xmm4,(%rcx)
+ {evex} movntps %xmm4,(%rcx)
+
+# Tests for op xmm/mem128, xmm[, xmm]
+ {evex} addpd %xmm4,%xmm6
+ {evex} addpd (%rcx),%xmm6
+ {evex} addps %xmm4,%xmm6
+ {evex} addps (%rcx),%xmm6
+ {evex} aesenc %xmm4,%xmm6
+ {evex} aesenc (%rcx),%xmm6
+ {evex} aesenclast %xmm4,%xmm6
+ {evex} aesenclast (%rcx),%xmm6
+ {evex} aesdec %xmm4,%xmm6
+ {evex} aesdec (%rcx),%xmm6
+ {evex} aesdeclast %xmm4,%xmm6
+ {evex} aesdeclast (%rcx),%xmm6
+ {evex} andnpd %xmm4,%xmm6
+ {evex} andnpd (%rcx),%xmm6
+ {evex} andnps %xmm4,%xmm6
+ {evex} andnps (%rcx),%xmm6
+ {evex} andpd %xmm4,%xmm6
+ {evex} andpd (%rcx),%xmm6
+ {evex} andps %xmm4,%xmm6
+ {evex} andps (%rcx),%xmm6
+ {evex} divpd %xmm4,%xmm6
+ {evex} divpd (%rcx),%xmm6
+ {evex} divps %xmm4,%xmm6
+ {evex} divps (%rcx),%xmm6
+ {evex} gf2p8mulb %xmm4,%xmm6
+ {evex} gf2p8mulb (%rcx),%xmm6
+ {evex} maxpd %xmm4,%xmm6
+ {evex} maxpd (%rcx),%xmm6
+ {evex} maxps %xmm4,%xmm6
+ {evex} maxps (%rcx),%xmm6
+ {evex} minpd %xmm4,%xmm6
+ {evex} minpd (%rcx),%xmm6
+ {evex} minps %xmm4,%xmm6
+ {evex} minps (%rcx),%xmm6
+ {evex} mulpd %xmm4,%xmm6
+ {evex} mulpd (%rcx),%xmm6
+ {evex} mulps %xmm4,%xmm6
+ {evex} mulps (%rcx),%xmm6
+ {evex} orpd %xmm4,%xmm6
+ {evex} orpd (%rcx),%xmm6
+ {evex} orps %xmm4,%xmm6
+ {evex} orps (%rcx),%xmm6
+ {evex} packsswb %xmm4,%xmm6
+ {evex} packsswb (%rcx),%xmm6
+ {evex} packssdw %xmm4,%xmm6
+ {evex} packssdw (%rcx),%xmm6
+ {evex} packuswb %xmm4,%xmm6
+ {evex} packuswb (%rcx),%xmm6
+ {evex} packusdw %xmm4,%xmm6
+ {evex} packusdw (%rcx),%xmm6
+ {evex} paddb %xmm4,%xmm6
+ {evex} paddb (%rcx),%xmm6
+ {evex} paddw %xmm4,%xmm6
+ {evex} paddw (%rcx),%xmm6
+ {evex} paddd %xmm4,%xmm6
+ {evex} paddd (%rcx),%xmm6
+ {evex} paddq %xmm4,%xmm6
+ {evex} paddq (%rcx),%xmm6
+ {evex} paddsb %xmm4,%xmm6
+ {evex} paddsb (%rcx),%xmm6
+ {evex} paddsw %xmm4,%xmm6
+ {evex} paddsw (%rcx),%xmm6
+ {evex} paddusb %xmm4,%xmm6
+ {evex} paddusb (%rcx),%xmm6
+ {evex} paddusw %xmm4,%xmm6
+ {evex} paddusw (%rcx),%xmm6
+ {evex} pand %xmm4,%xmm6
+ {evex} pand (%rcx),%xmm6
+ {evex} pandn %xmm4,%xmm6
+ {evex} pandn (%rcx),%xmm6
+ {evex} pavgb %xmm4,%xmm6
+ {evex} pavgb (%rcx),%xmm6
+ {evex} pavgw %xmm4,%xmm6
+ {evex} pavgw (%rcx),%xmm6
+ {evex} pclmullqlqdq %xmm4,%xmm6
+ {evex} pclmullqlqdq (%rcx),%xmm6
+ {evex} pclmulhqlqdq %xmm4,%xmm6
+ {evex} pclmulhqlqdq (%rcx),%xmm6
+ {evex} pclmullqhqdq %xmm4,%xmm6
+ {evex} pclmullqhqdq (%rcx),%xmm6
+ {evex} pclmulhqhqdq %xmm4,%xmm6
+ {evex} pclmulhqhqdq (%rcx),%xmm6
+ {evex} pmaddwd %xmm4,%xmm6
+ {evex} pmaddwd (%rcx),%xmm6
+ {evex} pmaddubsw %xmm4,%xmm6
+ {evex} pmaddubsw (%rcx),%xmm6
+ {evex} pmaxsb %xmm4,%xmm6
+ {evex} pmaxsb (%rcx),%xmm6
+ {evex} pmaxsw %xmm4,%xmm6
+ {evex} pmaxsw (%rcx),%xmm6
+ {evex} pmaxsd %xmm4,%xmm6
+ {evex} pmaxsd (%rcx),%xmm6
+ {evex} pmaxub %xmm4,%xmm6
+ {evex} pmaxub (%rcx),%xmm6
+ {evex} pmaxuw %xmm4,%xmm6
+ {evex} pmaxuw (%rcx),%xmm6
+ {evex} pmaxud %xmm4,%xmm6
+ {evex} pmaxud (%rcx),%xmm6
+ {evex} pminsb %xmm4,%xmm6
+ {evex} pminsb (%rcx),%xmm6
+ {evex} pminsw %xmm4,%xmm6
+ {evex} pminsw (%rcx),%xmm6
+ {evex} pminsd %xmm4,%xmm6
+ {evex} pminsd (%rcx),%xmm6
+ {evex} pminub %xmm4,%xmm6
+ {evex} pminub (%rcx),%xmm6
+ {evex} pminuw %xmm4,%xmm6
+ {evex} pminuw (%rcx),%xmm6
+ {evex} pminud %xmm4,%xmm6
+ {evex} pminud (%rcx),%xmm6
+ {evex} pmulhuw %xmm4,%xmm6
+ {evex} pmulhuw (%rcx),%xmm6
+ {evex} pmulhrsw %xmm4,%xmm6
+ {evex} pmulhrsw (%rcx),%xmm6
+ {evex} pmulhw %xmm4,%xmm6
+ {evex} pmulhw (%rcx),%xmm6
+ {evex} pmullw %xmm4,%xmm6
+ {evex} pmullw (%rcx),%xmm6
+ {evex} pmulld %xmm4,%xmm6
+ {evex} pmulld (%rcx),%xmm6
+ {evex} pmuludq %xmm4,%xmm6
+ {evex} pmuludq (%rcx),%xmm6
+ {evex} pmuldq %xmm4,%xmm6
+ {evex} pmuldq (%rcx),%xmm6
+ {evex} por %xmm4,%xmm6
+ {evex} por (%rcx),%xmm6
+ {evex} psadbw %xmm4,%xmm6
+ {evex} psadbw (%rcx),%xmm6
+ {evex} pshufb %xmm4,%xmm6
+ {evex} pshufb (%rcx),%xmm6
+ {evex} psllw %xmm4,%xmm6
+ {evex} psllw (%rcx),%xmm6
+ {evex} pslld %xmm4,%xmm6
+ {evex} pslld (%rcx),%xmm6
+ {evex} psllq %xmm4,%xmm6
+ {evex} psllq (%rcx),%xmm6
+ {evex} psraw %xmm4,%xmm6
+ {evex} psraw (%rcx),%xmm6
+ {evex} psrad %xmm4,%xmm6
+ {evex} psrad (%rcx),%xmm6
+ {evex} psrlw %xmm4,%xmm6
+ {evex} psrlw (%rcx),%xmm6
+ {evex} psrld %xmm4,%xmm6
+ {evex} psrld (%rcx),%xmm6
+ {evex} psrlq %xmm4,%xmm6
+ {evex} psrlq (%rcx),%xmm6
+ {evex} psubb %xmm4,%xmm6
+ {evex} psubb (%rcx),%xmm6
+ {evex} psubw %xmm4,%xmm6
+ {evex} psubw (%rcx),%xmm6
+ {evex} psubd %xmm4,%xmm6
+ {evex} psubd (%rcx),%xmm6
+ {evex} psubq %xmm4,%xmm6
+ {evex} psubq (%rcx),%xmm6
+ {evex} psubsb %xmm4,%xmm6
+ {evex} psubsb (%rcx),%xmm6
+ {evex} psubsw %xmm4,%xmm6
+ {evex} psubsw (%rcx),%xmm6
+ {evex} psubusb %xmm4,%xmm6
+ {evex} psubusb (%rcx),%xmm6
+ {evex} psubusw %xmm4,%xmm6
+ {evex} psubusw (%rcx),%xmm6
+ {evex} punpckhbw %xmm4,%xmm6
+ {evex} punpckhbw (%rcx),%xmm6
+ {evex} punpckhwd %xmm4,%xmm6
+ {evex} punpckhwd (%rcx),%xmm6
+ {evex} punpckhdq %xmm4,%xmm6
+ {evex} punpckhdq (%rcx),%xmm6
+ {evex} punpckhqdq %xmm4,%xmm6
+ {evex} punpckhqdq (%rcx),%xmm6
+ {evex} punpcklbw %xmm4,%xmm6
+ {evex} punpcklbw (%rcx),%xmm6
+ {evex} punpcklwd %xmm4,%xmm6
+ {evex} punpcklwd (%rcx),%xmm6
+ {evex} punpckldq %xmm4,%xmm6
+ {evex} punpckldq (%rcx),%xmm6
+ {evex} punpcklqdq %xmm4,%xmm6
+ {evex} punpcklqdq (%rcx),%xmm6
+ {evex} pxor %xmm4,%xmm6
+ {evex} pxor (%rcx),%xmm6
+ {evex} subpd %xmm4,%xmm6
+ {evex} subpd (%rcx),%xmm6
+ {evex} subps %xmm4,%xmm6
+ {evex} subps (%rcx),%xmm6
+ {evex} unpckhpd %xmm4,%xmm6
+ {evex} unpckhpd (%rcx),%xmm6
+ {evex} unpckhps %xmm4,%xmm6
+ {evex} unpckhps (%rcx),%xmm6
+ {evex} unpcklpd %xmm4,%xmm6
+ {evex} unpcklpd (%rcx),%xmm6
+ {evex} unpcklps %xmm4,%xmm6
+ {evex} unpcklps (%rcx),%xmm6
+ {evex} xorpd %xmm4,%xmm6
+ {evex} xorpd (%rcx),%xmm6
+ {evex} xorps %xmm4,%xmm6
+ {evex} xorps (%rcx),%xmm6
+
+# Tests for op imm8, xmm/mem128, xmm
+ {evex} pshufd $100,%xmm4,%xmm6
+ {evex} pshufd $100,(%rcx),%xmm6
+ {evex} pshufhw $100,%xmm4,%xmm6
+ {evex} pshufhw $100,(%rcx),%xmm6
+ {evex} pshuflw $100,%xmm4,%xmm6
+ {evex} pshuflw $100,(%rcx),%xmm6
+ {evex} roundpd $4,%xmm4,%xmm6
+ {evex} roundpd $4,(%rcx),%xmm6
+ {evex} roundps $4,%xmm4,%xmm6
+ {evex} roundps $4,(%rcx),%xmm6
+
+# Tests for op imm8, xmm/mem128, xmm[, xmm]
+ {evex} gf2p8affineqb $100,%xmm4,%xmm6
+ {evex} gf2p8affineqb $100,(%rcx),%xmm6
+ {evex} gf2p8affineinvqb $100,%xmm4,%xmm6
+ {evex} gf2p8affineinvqb $100,(%rcx),%xmm6
+ {evex} palignr $100,%xmm4,%xmm6
+ {evex} palignr $100,(%rcx),%xmm6
+ {evex} pclmulqdq $100,%xmm4,%xmm6
+ {evex} pclmulqdq $100,(%rcx),%xmm6
+ {evex} shufpd $100,%xmm4,%xmm6
+ {evex} shufpd $100,(%rcx),%xmm6
+ {evex} shufps $100,%xmm4,%xmm6
+ {evex} shufps $100,(%rcx),%xmm6
+
+# Tests for op xmm/mem64, xmm
+ {evex} comisd %xmm4,%xmm6
+ {evex} comisd (%rcx),%xmm4
+ {evex} cvtdq2pd %xmm4,%xmm6
+ {evex} cvtdq2pd (%rcx),%xmm4
+ {evex} cvtpi2pd (%rcx),%xmm4
+ {evex} cvtps2pd %xmm4,%xmm6
+ {evex} cvtps2pd (%rcx),%xmm4
+ {evex} movddup %xmm4,%xmm6
+ {evex} movddup (%rcx),%xmm4
+ {evex} pmovsxbw %xmm4,%xmm6
+ {evex} pmovsxbw (%rcx),%xmm4
+ {evex} pmovsxwd %xmm4,%xmm6
+ {evex} pmovsxwd (%rcx),%xmm4
+ {evex} pmovsxdq %xmm4,%xmm6
+ {evex} pmovsxdq (%rcx),%xmm4
+ {evex} pmovzxbw %xmm4,%xmm6
+ {evex} pmovzxbw (%rcx),%xmm4
+ {evex} pmovzxwd %xmm4,%xmm6
+ {evex} pmovzxwd (%rcx),%xmm4
+ {evex} pmovzxdq %xmm4,%xmm6
+ {evex} pmovzxdq (%rcx),%xmm4
+ {evex} ucomisd %xmm4,%xmm6
+ {evex} ucomisd (%rcx),%xmm4
+
+# Tests for op mem64, xmm
+ {evex} movsd (%rcx),%xmm4
+
+# Tests for op xmm, mem64
+ {evex} movlpd %xmm4,(%rcx)
+ {evex} movlps %xmm4,(%rcx)
+ {evex} movhpd %xmm4,(%rcx)
+ {evex} movhps %xmm4,(%rcx)
+ {evex} movsd %xmm4,(%rcx)
+
+# Tests for op xmm, regq/mem64
+# Tests for op regq/mem64, xmm
+ {evex} movd %xmm4,%rcx
+ {evex} movd %rcx,%xmm4
+ {evex} movq %xmm4,%rcx
+ {evex} movq %rcx,%xmm4
+ {evex} movq %xmm4,(%rcx)
+ {evex} movq (%rcx),%xmm4
+
+# Tests for op xmm/mem64, regl
+ {evex} cvtsd2si %xmm4,%ecx
+ {evex} cvtsd2si (%rcx),%ecx
+ {evex} cvttsd2si %xmm4,%ecx
+ {evex} cvttsd2si (%rcx),%ecx
+
+# Tests for op xmm/mem64, regq
+ {evex} cvtsd2si %xmm4,%rcx
+ {evex} cvtsd2si (%rcx),%rcx
+ {evex} cvttsd2si %xmm4,%rcx
+ {evex} cvttsd2si (%rcx),%rcx
+
+# Tests for op regq/mem64, xmm[, xmm]
+ {evex} cvtsi2sdq %rcx,%xmm4
+ {evex} cvtsi2sdq (%rcx),%xmm4
+ {evex} cvtsi2ssq %rcx,%xmm4
+ {evex} cvtsi2ssq (%rcx),%xmm4
+
+# Tests for op imm8, regq/mem64, xmm[, xmm]
+ {evex} pinsrq $100,%rcx,%xmm4
+ {evex} pinsrq $100,(%rcx),%xmm4
+
+# Testsf for op imm8, xmm, regq/mem64
+ {evex} pextrq $100,%xmm4,%rcx
+ {evex} pextrq $100,%xmm4,(%rcx)
+
+# Tests for op mem64, xmm[, xmm]
+ {evex} movlpd (%rcx),%xmm4
+ {evex} movlps (%rcx),%xmm4
+ {evex} movhpd (%rcx),%xmm4
+ {evex} movhps (%rcx),%xmm4
+
+# Tests for op imm8, xmm/mem64, xmm[, xmm]
+ {evex} roundsd $4,%xmm4,%xmm6
+ {evex} roundsd $4,(%rcx),%xmm6
+
+# Tests for op xmm/mem64, xmm[, xmm]
+ {evex} addsd %xmm4,%xmm6
+ {evex} addsd (%rcx),%xmm6
+ {evex} cvtsd2ss %xmm4,%xmm6
+ {evex} cvtsd2ss (%rcx),%xmm6
+ {evex} divsd %xmm4,%xmm6
+ {evex} divsd (%rcx),%xmm6
+ {evex} maxsd %xmm4,%xmm6
+ {evex} maxsd (%rcx),%xmm6
+ {evex} minsd %xmm4,%xmm6
+ {evex} minsd (%rcx),%xmm6
+ {evex} mulsd %xmm4,%xmm6
+ {evex} mulsd (%rcx),%xmm6
+ {evex} sqrtsd %xmm4,%xmm6
+ {evex} sqrtsd (%rcx),%xmm6
+ {evex} subsd %xmm4,%xmm6
+ {evex} subsd (%rcx),%xmm6
+
+# Tests for op xmm/mem32, xmm[, xmm]
+ {evex} addss %xmm4,%xmm6
+ {evex} addss (%rcx),%xmm6
+ {evex} cvtss2sd %xmm4,%xmm6
+ {evex} cvtss2sd (%rcx),%xmm6
+ {evex} divss %xmm4,%xmm6
+ {evex} divss (%rcx),%xmm6
+ {evex} maxss %xmm4,%xmm6
+ {evex} maxss (%rcx),%xmm6
+ {evex} minss %xmm4,%xmm6
+ {evex} minss (%rcx),%xmm6
+ {evex} mulss %xmm4,%xmm6
+ {evex} mulss (%rcx),%xmm6
+ {evex} rcpss %xmm4,%xmm6
+ {evex} rcpss (%rcx),%xmm6
+ {evex} rsqrtss %xmm4,%xmm6
+ {evex} rsqrtss (%rcx),%xmm6
+ {evex} sqrtss %xmm4,%xmm6
+ {evex} sqrtss (%rcx),%xmm6
+ {evex} subss %xmm4,%xmm6
+ {evex} subss (%rcx),%xmm6
+
+# Tests for op xmm/mem32, xmm
+ {evex} comiss %xmm4,%xmm6
+ {evex} comiss (%rcx),%xmm4
+ {evex} pmovsxbd %xmm4,%xmm6
+ {evex} pmovsxbd (%rcx),%xmm4
+ {evex} pmovsxwq %xmm4,%xmm6
+ {evex} pmovsxwq (%rcx),%xmm4
+ {evex} pmovzxbd %xmm4,%xmm6
+ {evex} pmovzxbd (%rcx),%xmm4
+ {evex} pmovzxwq %xmm4,%xmm6
+ {evex} pmovzxwq (%rcx),%xmm4
+ {evex} ucomiss %xmm4,%xmm6
+ {evex} ucomiss (%rcx),%xmm4
+
+# Tests for op mem32, xmm
+ {evex} movss (%rcx),%xmm4
+
+# Tests for op xmm, mem32
+ {evex} movss %xmm4,(%rcx)
+
+# Tests for op xmm, regl/mem32
+# Tests for op regl/mem32, xmm
+ {evex} movd %xmm4,%ecx
+ {evex} movd %xmm4,(%rcx)
+ {evex} movd %ecx,%xmm4
+ {evex} movd (%rcx),%xmm4
+
+# Tests for op xmm/mem32, regl
+ {evex} cvtss2si %xmm4,%ecx
+ {evex} cvtss2si (%rcx),%ecx
+ {evex} cvttss2si %xmm4,%ecx
+ {evex} cvttss2si (%rcx),%ecx
+
+# Tests for op xmm/mem32, regq
+ {evex} cvtss2si %xmm4,%rcx
+ {evex} cvtss2si (%rcx),%rcx
+ {evex} cvttss2si %xmm4,%rcx
+ {evex} cvttss2si (%rcx),%rcx
+
+# Tests for op imm8, xmm, regq/mem32
+ {evex} extractps $100,%xmm4,%rcx
+ {evex} extractps $100,%xmm4,(%rcx)
+
+# Tests for op imm8, xmm, regl/mem32
+ {evex} pextrd $100,%xmm4,%ecx
+ {evex} pextrd $100,%xmm4,(%rcx)
+ {evex} extractps $100,%xmm4,%ecx
+ {evex} extractps $100,%xmm4,(%rcx)
+
+# Tests for op regl/mem32, xmm[, xmm]
+ {evex} cvtsi2sd %ecx,%xmm4
+ {evex} cvtsi2sdl (%rcx),%xmm4
+ {evex} cvtsi2ss %ecx,%xmm4
+ {evex} cvtsi2ssl (%rcx),%xmm4
+
+# Tests for op imm8, xmm/mem32, xmm[, xmm]
+ {evex} insertps $100,%xmm4,%xmm6
+ {evex} insertps $100,(%rcx),%xmm6
+ {evex} roundss $4,%xmm4,%xmm6
+ {evex} roundss $4,(%rcx),%xmm6
+
+# Tests for op xmm/m16, xmm
+ {evex} pmovsxbq %xmm4,%xmm6
+ {evex} pmovsxbq (%rcx),%xmm4
+ {evex} pmovzxbq %xmm4,%xmm6
+ {evex} pmovzxbq (%rcx),%xmm4
+
+# Tests for op imm8, xmm, regl/mem16
+ {evex} pextrw $100,%xmm4,%ecx
+ {evex} pextrw $100,%xmm4,%rcx
+ {evex} pextrw $100,%xmm4,(%rcx)
+
+# Tests for op imm8, regl/mem16, xmm[, xmm]
+ {evex} pinsrw $100,%ecx,%xmm4
+ {evex} pinsrw $100,%rcx,%xmm4
+ {evex} pinsrw $100,(%rcx),%xmm4
+
+# Tests for op imm8, xmm, regl/mem8
+ {evex} pextrb $100,%xmm4,%ecx
+ {evex} pextrb $100,%xmm4,%rcx
+ {evex} pextrb $100,%xmm4,(%rcx)
+
+# Tests for op imm8, regl/mem8, xmm[, xmm]
+ {evex} pinsrb $100,%ecx,%xmm4
+ {evex} pinsrb $100,%rcx,%xmm4
+ {evex} pinsrb $100,(%rcx),%xmm4
+
+# Tests for op xmm, xmm
+ {evex} movq %xmm4,%xmm6
+
+# Tests for op xmm, xmm[, xmm]
+ {evex} movhlps %xmm4,%xmm6
+ {evex} movlhps %xmm4,%xmm6
+ {evex} movsd %xmm4,%xmm6
+ {evex} movss %xmm4,%xmm6
+
+# Tests for op imm8, xmm[, xmm]
+ {evex} pslld $100,%xmm4
+ {evex} pslldq $100,%xmm4
+ {evex} psllq $100,%xmm4
+ {evex} psllw $100,%xmm4
+ {evex} psrad $100,%xmm4
+ {evex} psraw $100,%xmm4
+ {evex} psrld $100,%xmm4
+ {evex} psrldq $100,%xmm4
+ {evex} psrlq $100,%xmm4
+ {evex} psrlw $100,%xmm4
--- a/gas/testsuite/gas/i386/x86-64.exp
+++ b/gas/testsuite/gas/i386/x86-64.exp
@@ -391,6 +391,7 @@ run_dump_test "x86-64-apx-nf"
run_dump_test "x86-64-apx-nf-intel"
run_dump_test "x86-64-apx_f-evex"
run_dump_test "sse2avx-apx"
+run_dump_test "sse2avx-evex"
run_dump_test "x86-64-avx512f-rcigrz-intel"
run_dump_test "x86-64-avx512f-rcigrz"
run_dump_test "x86-64-clwb"
--- a/opcodes/i386-opc.tbl
+++ b/opcodes/i386-opc.tbl
@@ -1170,11 +1170,11 @@ maxss<SSE>, 0xf30f5f, <SSE:cpu>, Modrm|<
minps<SSE>, 0x0f5d, <SSE:cpu>, Modrm|<SSE:attr>|<SSE:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
minss<SSE>, 0xf30f5d, <SSE:cpu>, Modrm|<SSE:scal>|<SSE:vvvv>|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
movaps<SSE>, 0x0f28, <SSE:cpu>, D|Modrm|<SSE:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-movhlps<sse>, 0x0f12, <sse:cpu>, Modrm|<sse:attr>|<sse:vvvv>|NoSuf, { RegXMM, RegXMM }
+movhlps<SSE>, 0x0f12, <SSE:cpu>, Modrm|<SSE:attr>|<SSE:vvvv>|NoSuf, { RegXMM, RegXMM }
movhps, 0x16, AVX|AVX512F, Modrm|Vex128|EVex128|Space0F|VexVVVV|VexW0|Disp8MemShift=3|NoSuf|SSE2AVX, { Qword|Unspecified|BaseIndex, RegXMM }
movhps, 0x17, AVX|AVX512F, Modrm|Vex128|EVex128|Space0F|VexW0|Disp8MemShift=3|NoSuf|SSE2AVX, { RegXMM, Qword|Unspecified|BaseIndex }
movhps, 0xf16, SSE, D|Modrm|NoSuf, { Qword|Unspecified|BaseIndex, RegXMM }
-movlhps<sse>, 0x0f16, <sse:cpu>, Modrm|<sse:attr>|<sse:vvvv>|NoSuf, { RegXMM, RegXMM }
+movlhps<SSE>, 0x0f16, <SSE:cpu>, Modrm|<SSE:attr>|<SSE:vvvv>|NoSuf, { RegXMM, RegXMM }
movlps, 0x12, AVX|AVX512F, Modrm|Vex128|EVex128|Space0F|VexVVVV|VexW0|Disp8MemShift=3|NoSuf|SSE2AVX, { Qword|Unspecified|BaseIndex, RegXMM }
movlps, 0x13, AVX|AVX512F, Modrm|Vex128|EVex128|Space0F|VexW0|Disp8MemShift=3|NoSuf|SSE2AVX, { RegXMM, Qword|Unspecified|BaseIndex }
movlps, 0xf12, SSE, D|Modrm|NoSuf, { Qword|Unspecified|BaseIndex, RegXMM }
@@ -1183,7 +1183,7 @@ movntps<SSE>, 0x0f2b, <SSE:cpu>, Modrm|<
movntq, 0xfe7, SSE|3dnowA, Modrm|NoSuf, { RegMMX, Qword|Unspecified|BaseIndex }
movntdq<SSE2D>, 0x660fe7, <SSE2D:cpu>, Modrm|<SSE2D:attr>|NoSuf, { RegXMM, Xmmword|Unspecified|BaseIndex }
movss, 0xf310, AVX|AVX512F, D|Modrm|VexLIG|EVexLIG|Space0F|VexW0|Disp8MemShift=2|NoSuf|SSE2AVX, { Dword|Unspecified|BaseIndex, RegXMM }
-movss, 0xf310, AVX, D|Modrm|VexLIG|Space0F|VexVVVV|VexW0|NoSuf|SSE2AVX, { RegXMM, RegXMM }
+movss, 0xf310, AVX|AVX512F, D|Modrm|VexLIG|EVexLIG|Space0F|VexVVVV|VexW0|NoSuf|SSE2AVX, { RegXMM, RegXMM }
movss, 0xf30f10, SSE, D|Modrm|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
movups<SSE>, 0x0f10, <SSE:cpu>, D|Modrm|<SSE:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
mulps<SSE>, 0x0f59, <SSE:cpu>, Modrm|<SSE:attr>|<SSE:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
@@ -1278,6 +1278,7 @@ movntpd<SSE2Q>, 0x660f2b, <SSE2Q:cpu>, M
movsd, 0xf210, AVX, D|Modrm|VexLIG|Space0F|VexW0|NoSuf|SSE2AVX, { Qword|Unspecified|BaseIndex, RegXMM }
movsd, 0xf210, AVX512F, D|Modrm|EVexLIG|Space0F|VexW1|Disp8MemShift=3|NoSuf|SSE2AVX, { Qword|Unspecified|BaseIndex, RegXMM }
movsd, 0xf210, AVX, D|Modrm|VexLIG|Space0F|VexVVVV|VexW0|NoSuf|SSE2AVX, { RegXMM, RegXMM }
+movsd, 0xf210, AVX512F, D|Modrm|EVexLIG|Space0F|VexVVVV|VexW1|NoSuf|SSE2AVX, { RegXMM, RegXMM }
movsd, 0xf20f10, SSE2, D|Modrm|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
movupd<SSE2Q>, 0x660f10, <SSE2Q:cpu>, D|Modrm|<SSE2Q:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
mulpd<SSE2Q>, 0x660f59, <SSE2Q:cpu>, Modrm|<SSE2Q:attr>|<SSE2Q:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
@@ -1318,8 +1319,8 @@ pmuludq, 0xff4, SSE2, Modrm|NoSuf, { Qwo
pshufd<SSE2D>, 0x660f70, <SSE2D:cpu>, Modrm|<SSE2D:attr>|NoSuf, { Imm8|Imm8S, RegXMM|Unspecified|BaseIndex, RegXMM }
pshufhw<SSE2BW>, 0xf30f70, <SSE2BW:cpu>, Modrm|<SSE2BW:attr>|NoSuf, { Imm8|Imm8S, RegXMM|Unspecified|BaseIndex, RegXMM }
pshuflw<SSE2BW>, 0xf20f70, <SSE2BW:cpu>, Modrm|<SSE2BW:attr>|NoSuf, { Imm8|Imm8S, RegXMM|Unspecified|BaseIndex, RegXMM }
-pslldq<sse2>, 0x660f73/7, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|NoSuf, { Imm8, RegXMM }
-psrldq<sse2>, 0x660f73/3, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|NoSuf, { Imm8, RegXMM }
+pslldq<SSE2BW>, 0x660f73/7, <SSE2BW:cpu>, Modrm|<SSE2BW:attr>|<SSE2BW:vvvv>|NoSuf, { Imm8, RegXMM }
+psrldq<SSE2BW>, 0x660f73/3, <SSE2BW:cpu>, Modrm|<SSE2BW:attr>|<SSE2BW:vvvv>|NoSuf, { Imm8, RegXMM }
punpckhqdq<SSE2Q>, 0x660f6d, <SSE2Q:cpu>, Modrm|<SSE2Q:attr>|<SSE2Q:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
punpcklqdq<SSE2Q>, 0x660f6c, <SSE2Q:cpu>, Modrm|<SSE2Q:attr>|<SSE2Q:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
^ permalink raw reply [flat|nested] 12+ messages in thread