public inbox for binutils@sourceware.org
 help / color / mirror / Atom feed
* [PATCH 0/3] x86: Optimize the encoder of the vvvv register
@ 2024-04-24  7:23 Cui, Lili
  2024-04-24  7:23 ` [PATCH 1/3] x86: Use vexvvvv to encode " Cui, Lili
                   ` (2 more replies)
  0 siblings, 3 replies; 24+ messages in thread
From: Cui, Lili @ 2024-04-24  7:23 UTC (permalink / raw)
  To: binutils; +Cc: hjl.tools, jbeulich

These patches want to optimize the encoder of the vvvv register.
Previously we used Vexvvvv, SWAP_SOURCES and extension_opcode
to help encode the vvvv register, these patches simplified the
logic to only use vexvvvv and added appropriate Vexvvvv values
for the related instructions.

Cui, Lili (3):
  x86: Use vexvvvv to encode the vvvv register
  x86: Drop SwapSources
  x86: Drop using extension_opcode to encode vvvv register

 gas/config/tc-i386.c                    |   56 +-
 gas/testsuite/gas/i386/x86-64-sse2avx.d |    7 +
 gas/testsuite/gas/i386/x86-64-sse2avx.s |    7 +
 opcodes/i386-opc.h                      |   21 +-
 opcodes/i386-opc.tbl                    | 1154 ++++++++++++-----------
 5 files changed, 628 insertions(+), 617 deletions(-)

-- 
2.34.1


^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH 1/3] x86: Use vexvvvv to encode the vvvv register
  2024-04-24  7:23 [PATCH 0/3] x86: Optimize the encoder of the vvvv register Cui, Lili
@ 2024-04-24  7:23 ` Cui, Lili
  2024-04-24  7:52   ` Jan Beulich
  2024-04-24  7:23 ` [PATCH 2/3] x86: Drop SwapSources Cui, Lili
  2024-04-24  7:23 ` [PATCH 3/3] x86: Drop using extension_opcode to encode vvvv register Cui, Lili
  2 siblings, 1 reply; 24+ messages in thread
From: Cui, Lili @ 2024-04-24  7:23 UTC (permalink / raw)
  To: binutils; +Cc: hjl.tools, jbeulich

gas/ChangeLog:

        * config/tc-i386.c (optimize_encoding): Replaced 1 with VexVVVV_SRC1.
        (build_modrm_byte): Used vexvvvv to encode the vvvv register.
        (s_insn): Replaced 1 with VexVVVV_SRC1.

opcodes/ChangeLog:

        * i386-opc.h (VexVVVV_DST): Adjusted the value.
        (VexVVVV_SRC1): New.
        * i386-opc.tbl: Replaced VexVVVV with VexVVVV_SRC1.
---
 gas/config/tc-i386.c |   33 +-
 opcodes/i386-opc.h   |    9 +-
 opcodes/i386-opc.tbl | 1134 +++++++++++++++++++++---------------------
 3 files changed, 588 insertions(+), 588 deletions(-)

diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c
index b03746852d6..640a83c8b2d 100644
--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -5045,7 +5045,7 @@ optimize_encoding (void)
        */
       i.tm.opcode_space = SPACE_0F;
       i.tm.base_opcode = 0x6c;
-      i.tm.opcode_modifier.vexvvvv = 1;
+      i.tm.opcode_modifier.vexvvvv = VexVVVV_SRC1;
 
       ++i.operands;
       ++i.reg_operands;
@@ -10432,19 +10432,19 @@ build_modrm_byte (void)
 				     || i.encoding == encoding_evex));
     }
 
-  if (i.tm.opcode_modifier.vexvvvv == VexVVVV_DST)
+  switch (i.tm.opcode_modifier.vexvvvv)
     {
-      v = dest;
-      dest-- ;
-    }
-  else
-    {
-      for (v = source + 1; v < dest; ++v)
-	if (v != reg_slot)
-	  break;
-      if (v >= dest)
-	v = ~0;
-    }
+    case VexVVVV_SRC1:
+      v =  dest - 1;
+      break;
+    case VexVVVV_DST:
+      v = dest--;
+      break;
+    default:
+      v = ~0;
+      break;
+     }
+
   if (i.tm.extension_opcode != None)
     {
       if (dest != source)
@@ -10462,10 +10462,7 @@ build_modrm_byte (void)
     }
 
   if (v < MAX_OPERANDS)
-    {
-      gas_assert (i.tm.opcode_modifier.vexvvvv);
-      i.vex.register_specifier = i.op[v].regs;
-    }
+    i.vex.register_specifier = i.op[v].regs;
 
   if (op < i.operands)
     {
@@ -13155,7 +13152,7 @@ s_insn (int dummy ATTRIBUTE_UNUSED)
 	case 3:
 	  if (i.encoding != encoding_default)
 	    {
-	      i.tm.opcode_modifier.vexvvvv = 1;
+	      i.tm.opcode_modifier.vexvvvv = VexVVVV_SRC1;
 	      break;
 	    }
 	  /* Fall through.  */
diff --git a/opcodes/i386-opc.h b/opcodes/i386-opc.h
index ce54c9d8d26..4ae87a3dbd5 100644
--- a/opcodes/i386-opc.h
+++ b/opcodes/i386-opc.h
@@ -639,11 +639,12 @@ enum
 #define VEXScalar	3
   Vex,
   /* How to encode VEX.vvvv:
-     0: VEX.vvvv must be 1111b.
-     1: VEX.vvvv encodes one of the src register operands.
-     2: VEX.vvvv encodes the dest register operand.
+     1: VEX.vvvv encodes the src1 register operand.
+     3: VEX.vvvv encodes the dest register operand.
    */
-#define VexVVVV_DST   2
+#define VexVVVV_SRC1   1
+#define VexVVVV_DST    3
+
   VexVVVV,
   /* How the VEX.W bit is used:
      0: Set by the REX.W bit.
diff --git a/opcodes/i386-opc.tbl b/opcodes/i386-opc.tbl
index 7d248f64bbd..31c769857dc 100644
--- a/opcodes/i386-opc.tbl
+++ b/opcodes/i386-opc.tbl
@@ -141,7 +141,9 @@
 
 #define Disp8ShiftVL Disp8MemShift=DISP8_SHIFT_VL
 
-#define DstVVVV VexVVVV=VexVVVV_DST
+#define Src1VVVV VexVVVV=VexVVVV_SRC1
+#define DstVVVV  VexVVVV=VexVVVV_DST
+
 
 // The template supports VEX format for cpuid and EVEX format for cpuid & APX_F.
 // While therefore we really mean cpuid|(cpuid&APX_F) here, this can't be
@@ -1001,12 +1003,12 @@ pause, 0xf390, i186, NoSuf, {}
 // MMX/SSE2 instructions.
 
 <mmx:cpu:pfx:attr:reg:mem, +
-    $avx:AVX:66:Vex128|VexVVVV|VexW0|SSE2AVX:RegXMM:Xmmword, +
+    $avx:AVX:66:Vex128|Src1VVVV|VexW0|SSE2AVX:RegXMM:Xmmword, +
     $sse:SSE2:66::RegXMM:Xmmword, +
     $mmx:MMX:::RegMMX:Qword>
 
 <sse2:cpu:attr:scal:vvvv, +
-    $avx:AVX:Vex128|VexW0|SSE2AVX:VexLIG|VexW0|SSE2AVX:VexVVVV, +
+    $avx:AVX:Vex128|VexW0|SSE2AVX:VexLIG|VexW0|SSE2AVX:Src1VVVV, +
     $sse:SSE2:::>
 
 <bw:opc:vexw:elem:kcpu:kpfx:cpubmi, +
@@ -1089,7 +1091,7 @@ pxor<mmx>, 0x<mmx:pfx>0fef, <mmx:cpu>, Modrm|<mmx:attr>|C|NoSuf, { <mmx:reg>|<mm
 // SSE instructions.
 
 <sse:cpu:attr:scal:vvvv, +
-    $avx:AVX:Vex128|VexW0|SSE2AVX:VexLIG|VexW0|SSE2AVX:VexVVVV, +
+    $avx:AVX:Vex128|VexW0|SSE2AVX:VexLIG|VexW0|SSE2AVX:Src1VVVV, +
     $sse:SSE:::>
 <frel:imm:comm, eq:0:C, lt:1:, le:2:, unord:3:C, neq:4:C, nlt:5:, nle:6:, ord:7:C>
 
@@ -1105,8 +1107,8 @@ comiss<sse>, 0x0f2f, <sse:cpu>, Modrm|<sse:scal>|NoSuf, { Dword|Unspecified|Base
 cvtpi2ps, 0xf2a, SSE, Modrm|NoSuf, { Qword|Unspecified|BaseIndex|RegMMX, RegXMM }
 cvtps2pi, 0xf2d, SSE, Modrm|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegMMX }
 cvtsi2ss<sse>, 0xf30f2a, <sse:cpu>&No64, Modrm|<sse:scal>|<sse:vvvv>|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|No_qSuf, { Reg32|Unspecified|BaseIndex, RegXMM }
-cvtsi2ss, 0xf32a, AVX&x64, Modrm|Vex=3|Space0F|VexVVVV|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|SSE2AVX|ATTSyntax, { Reg32|Reg64|Unspecified|BaseIndex, RegXMM }
-cvtsi2ss, 0xf32a, AVX&x64, Modrm|Vex=3|Space0F|VexVVVV|No_bSuf|No_wSuf|No_sSuf|SSE2AVX|IntelSyntax, { Reg32|Reg64|Unspecified|BaseIndex, RegXMM }
+cvtsi2ss, 0xf32a, AVX&x64, Modrm|Vex=3|Space0F|Src1VVVV|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|SSE2AVX|ATTSyntax, { Reg32|Reg64|Unspecified|BaseIndex, RegXMM }
+cvtsi2ss, 0xf32a, AVX&x64, Modrm|Vex=3|Space0F|Src1VVVV|No_bSuf|No_wSuf|No_sSuf|SSE2AVX|IntelSyntax, { Reg32|Reg64|Unspecified|BaseIndex, RegXMM }
 cvtsi2ss, 0xf30f2a, SSE&x64, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|ATTSyntax, { Reg32|Reg64|Unspecified|BaseIndex, RegXMM }
 cvtsi2ss, 0xf30f2a, SSE&x64, Modrm|No_bSuf|No_wSuf|No_sSuf|IntelSyntax, { Reg32|Reg64|Unspecified|BaseIndex, RegXMM }
 cvtss2si, 0xf32d, AVX, Modrm|VexLIG|Space0F|No_bSuf|No_wSuf|No_sSuf|SSE2AVX, { Dword|Unspecified|BaseIndex|RegXMM, Reg32|Reg64 }
@@ -1124,11 +1126,11 @@ minps<sse>, 0x0f5d, <sse:cpu>, Modrm|<sse:attr>|<sse:vvvv>|NoSuf, { RegXMM|Unspe
 minss<sse>, 0xf30f5d, <sse:cpu>, Modrm|<sse:scal>|<sse:vvvv>|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
 movaps<sse>, 0x0f28, <sse:cpu>, D|Modrm|<sse:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
 movhlps<sse>, 0x0f12, <sse:cpu>, Modrm|<sse:attr>|<sse:vvvv>|NoSuf, { RegXMM, RegXMM }
-movhps, 0x16, AVX, Modrm|Vex|Space0F|VexVVVV|VexW0|NoSuf|SSE2AVX, { Qword|Unspecified|BaseIndex, RegXMM }
+movhps, 0x16, AVX, Modrm|Vex|Space0F|Src1VVVV|VexW0|NoSuf|SSE2AVX, { Qword|Unspecified|BaseIndex, RegXMM }
 movhps, 0x17, AVX, Modrm|Vex|Space0F|VexW0|NoSuf|SSE2AVX, { RegXMM, Qword|Unspecified|BaseIndex }
 movhps, 0xf16, SSE, D|Modrm|NoSuf, { Qword|Unspecified|BaseIndex, RegXMM }
 movlhps<sse>, 0x0f16, <sse:cpu>, Modrm|<sse:attr>|<sse:vvvv>|NoSuf, { RegXMM, RegXMM }
-movlps, 0x12, AVX, Modrm|Vex|Space0F|VexVVVV|VexW0|NoSuf|SSE2AVX, { Qword|Unspecified|BaseIndex, RegXMM }
+movlps, 0x12, AVX, Modrm|Vex|Space0F|Src1VVVV|VexW0|NoSuf|SSE2AVX, { Qword|Unspecified|BaseIndex, RegXMM }
 movlps, 0x13, AVX, Modrm|Vex|Space0F|VexW0|NoSuf|SSE2AVX, { RegXMM, Qword|Unspecified|BaseIndex }
 movlps, 0xf12, SSE, D|Modrm|NoSuf, { Qword|Unspecified|BaseIndex, RegXMM }
 movmskps<sse>, 0x0f50, <sse:cpu>, Modrm|<sse:attr>|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|NoRex64, { RegXMM, Reg32|Reg64 }
@@ -1136,7 +1138,7 @@ movntps<sse>, 0x0f2b, <sse:cpu>, Modrm|<sse:attr>|NoSuf, { RegXMM, Xmmword|Unspe
 movntq, 0xfe7, SSE|3dnowA, Modrm|NoSuf, { RegMMX, Qword|Unspecified|BaseIndex }
 movntdq<sse2>, 0x660fe7, <sse2:cpu>, Modrm|<sse2:attr>|NoSuf, { RegXMM, Xmmword|Unspecified|BaseIndex }
 movss, 0xf310, AVX, D|Modrm|VexLIG|Space0F|VexW0|NoSuf|SSE2AVX, { Dword|Unspecified|BaseIndex, RegXMM }
-movss, 0xf310, AVX, D|Modrm|VexLIG|Space0F|VexVVVV|VexW0|NoSuf|SSE2AVX, { RegXMM, RegXMM }
+movss, 0xf310, AVX, D|Modrm|VexLIG|Space0F|Src1VVVV|VexW0|NoSuf|SSE2AVX, { RegXMM, RegXMM }
 movss, 0xf30f10, SSE, D|Modrm|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
 movups<sse>, 0x0f10, <sse:cpu>, D|Modrm|<sse:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
 mulps<sse>, 0x0f59, <sse:cpu>, Modrm|<sse:attr>|<sse:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
@@ -1200,8 +1202,8 @@ cvtpi2pd, 0x660f2a, SSE2, Modrm|NoSuf, { RegMMX, RegXMM }
 cvtpi2pd, 0xf3e6, AVX, Modrm|Vex|Space0F|VexW0|NoSuf|SSE2AVX, { Qword|Unspecified|BaseIndex, RegXMM }
 cvtpi2pd, 0x660f2a, SSE2, Modrm|NoSuf, { Qword|Unspecified|BaseIndex, RegXMM }
 cvtsi2sd<sse2>, 0xf20f2a, <sse2:cpu>&No64, Modrm|IgnoreSize|<sse2:scal>|<sse2:vvvv>|No_bSuf|No_wSuf|No_sSuf|No_qSuf, { Reg32|Unspecified|BaseIndex, RegXMM }
-cvtsi2sd, 0xf22a, AVX&x64, Modrm|Vex=3|Space0F|VexVVVV|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|SSE2AVX|ATTSyntax, { Reg32|Reg64|Unspecified|BaseIndex, RegXMM }
-cvtsi2sd, 0xf22a, AVX&x64, Modrm|Vex=3|Space0F|VexVVVV|No_bSuf|No_wSuf|No_sSuf|SSE2AVX|IntelSyntax, { Reg32|Reg64|Unspecified|BaseIndex, RegXMM }
+cvtsi2sd, 0xf22a, AVX&x64, Modrm|Vex=3|Space0F|Src1VVVV|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|SSE2AVX|ATTSyntax, { Reg32|Reg64|Unspecified|BaseIndex, RegXMM }
+cvtsi2sd, 0xf22a, AVX&x64, Modrm|Vex=3|Space0F|Src1VVVV|No_bSuf|No_wSuf|No_sSuf|SSE2AVX|IntelSyntax, { Reg32|Reg64|Unspecified|BaseIndex, RegXMM }
 cvtsi2sd, 0xf20f2a, SSE2&x64, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|ATTSyntax, { Reg32|Reg64|Unspecified|BaseIndex, RegXMM }
 cvtsi2sd, 0xf20f2a, SSE2&x64, Modrm|No_bSuf|No_wSuf|No_sSuf|IntelSyntax, { Reg32|Reg64|Unspecified|BaseIndex, RegXMM }
 divpd<sse2>, 0x660f5e, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
@@ -1211,16 +1213,16 @@ maxsd<sse2>, 0xf20f5f, <sse2:cpu>, Modrm|<sse2:scal>|<sse2:vvvv>|NoSuf, { Qword|
 minpd<sse2>, 0x660f5d, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
 minsd<sse2>, 0xf20f5d, <sse2:cpu>, Modrm|<sse2:scal>|<sse2:vvvv>|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
 movapd<sse2>, 0x660f28, <sse2:cpu>, D|Modrm|<sse2:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-movhpd, 0x6616, AVX, Modrm|Vex|Space0F|VexVVVV|VexW0|NoSuf|SSE2AVX, { Qword|Unspecified|BaseIndex, RegXMM }
+movhpd, 0x6616, AVX, Modrm|Vex|Space0F|Src1VVVV|VexW0|NoSuf|SSE2AVX, { Qword|Unspecified|BaseIndex, RegXMM }
 movhpd, 0x6617, AVX, Modrm|Vex|Space0F|VexW0|NoSuf|SSE2AVX, { RegXMM, Qword|Unspecified|BaseIndex }
 movhpd, 0x660f16, SSE2, D|Modrm|NoSuf, { Qword|Unspecified|BaseIndex, RegXMM }
-movlpd, 0x6612, AVX, Modrm|Vex|Space0F|VexVVVV|VexW0|NoSuf|SSE2AVX, { Qword|Unspecified|BaseIndex, RegXMM }
+movlpd, 0x6612, AVX, Modrm|Vex|Space0F|Src1VVVV|VexW0|NoSuf|SSE2AVX, { Qword|Unspecified|BaseIndex, RegXMM }
 movlpd, 0x6613, AVX, Modrm|Vex|Space0F|VexW0|NoSuf|SSE2AVX, { RegXMM, Qword|Unspecified|BaseIndex }
 movlpd, 0x660f12, SSE2, D|Modrm|NoSuf, { Qword|Unspecified|BaseIndex, RegXMM }
 movmskpd<sse2>, 0x660f50, <sse2:cpu>, Modrm|<sse2:attr>|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|NoRex64, { RegXMM, Reg32|Reg64 }
 movntpd<sse2>, 0x660f2b, <sse2:cpu>, Modrm|<sse2:attr>|NoSuf, { RegXMM, Xmmword|Unspecified|BaseIndex }
 movsd, 0xf210, AVX, D|Modrm|VexLIG|Space0F|VexW0|NoSuf|SSE2AVX, { Qword|Unspecified|BaseIndex, RegXMM }
-movsd, 0xf210, AVX, D|Modrm|VexLIG|Space0F|VexVVVV|VexW0|NoSuf|SSE2AVX, { RegXMM, RegXMM }
+movsd, 0xf210, AVX, D|Modrm|VexLIG|Space0F|Src1VVVV|VexW0|NoSuf|SSE2AVX, { RegXMM, RegXMM }
 movsd, 0xf20f10, SSE2, D|Modrm|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
 movupd<sse2>, 0x660f10, <sse2:cpu>, D|Modrm|<sse2:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
 mulpd<sse2>, 0x660f59, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
@@ -1271,7 +1273,7 @@ punpcklqdq<sse2>, 0x660f6c, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|NoSuf, { R
 
 // SSE3 instructions.
 
-<sse3:cpu:attr:vvvv, $avx:AVX:Vex128|VexW0|SSE2AVX:VexVVVV, $sse:SSE3::>
+<sse3:cpu:attr:vvvv, $avx:AVX:Vex128|VexW0|SSE2AVX:Src1VVVV, $sse:SSE3::>
 
 addsubpd<sse3>, 0x660fd0, <sse3:cpu>, Modrm|<sse3:attr>|<sse3:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
 addsubps<sse3>, 0xf20fd0, <sse3:cpu>, Modrm|<sse3:attr>|<sse3:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
@@ -1349,7 +1351,7 @@ invpcid, 0xf3f2, INVPCID&APX_F, Modrm|NoSuf|EVexMap4|VexWIG, { Oword|Unspecified
 // SSSE3 instructions.
 
 <ssse3:cpu:pfx:attr:vvvv:reg:mem, +
-    $avx:AVX:66:Vex128|VexW0|SSE2AVX:VexVVVV:RegXMM:Xmmword, +
+    $avx:AVX:66:Vex128|VexW0|SSE2AVX:Src1VVVV:RegXMM:Xmmword, +
     $sse:SSSE3:66:::RegXMM:Xmmword, +
     $mmx:SSSE3::::RegMMX:Qword>
 
@@ -1370,12 +1372,12 @@ pabsd<ssse3>, 0x<ssse3:pfx>0f381e, <ssse3:cpu>, Modrm|<ssse3:attr>|NoSuf, { <sss
 
 // SSE4.1 instructions.
 
-<sse41:cpu:attr:scal:vvvv, $avx:AVX:Vex128|VexW0|SSE2AVX:VexLIG|VexW0|SSE2AVX:VexVVVV, $sse:SSE4_1:::>
+<sse41:cpu:attr:scal:vvvv, $avx:AVX:Vex128|VexW0|SSE2AVX:VexLIG|VexW0|SSE2AVX:Src1VVVV, $sse:SSE4_1:::>
 <sd:ppfx:spfx:opc:vexw:elem, s::f3:0:VexW0:Dword, d:66:f2:1:VexW1:Qword>
 
 blendp<sd><sse41>, 0x660f3a0c | <sd:opc>, <sse41:cpu>, Modrm|<sse41:attr>|<sse41:vvvv>|NoSuf, { Imm8|Imm8S, RegXMM|Unspecified|BaseIndex, RegXMM }
-blendvp<sd>, 0x664a | <sd:opc>, AVX, Modrm|Vex128|Space0F3A|VexVVVV|VexW0|NoSuf|SSE2AVX, { Acc|Xmmword, RegXMM|Unspecified|BaseIndex, RegXMM }
-blendvp<sd>, 0x664a | <sd:opc>, AVX, Modrm|Vex128|Space0F3A|VexVVVV|VexW0|NoSuf|Implicit1stXmm0|SSE2AVX, { RegXMM|Unspecified|BaseIndex, RegXMM }
+blendvp<sd>, 0x664a | <sd:opc>, AVX, Modrm|Vex128|Space0F3A|Src1VVVV|VexW0|NoSuf|SSE2AVX, { Acc|Xmmword, RegXMM|Unspecified|BaseIndex, RegXMM }
+blendvp<sd>, 0x664a | <sd:opc>, AVX, Modrm|Vex128|Space0F3A|Src1VVVV|VexW0|NoSuf|Implicit1stXmm0|SSE2AVX, { RegXMM|Unspecified|BaseIndex, RegXMM }
 blendvp<sd>, 0x660f3814 | <sd:opc>, SSE4_1, Modrm|NoSuf, { Acc|Xmmword, RegXMM|Unspecified|BaseIndex, RegXMM }
 blendvp<sd>, 0x660f3814 | <sd:opc>, SSE4_1, Modrm|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
 dpp<sd><sse41>, 0x660f3a40 | <sd:opc>, <sse41:cpu>, Modrm|<sse41:attr>|<sse41:vvvv>|NoSuf, { Imm8|Imm8S, RegXMM|Unspecified|BaseIndex, RegXMM }
@@ -1387,8 +1389,8 @@ insertps<sse41>, 0x660f3a21, <sse41:cpu>, Modrm|<sse41:attr>|<sse41:vvvv>|NoSuf,
 movntdqa<sse41>, 0x660f382a, <sse41:cpu>, Modrm|<sse41:attr>|NoSuf, { Xmmword|Unspecified|BaseIndex, RegXMM }
 mpsadbw<sse41>, 0x660f3a42, <sse41:cpu>, Modrm|<sse41:attr>|<sse41:vvvv>|NoSuf, { Imm8|Imm8S, RegXMM|Unspecified|BaseIndex, RegXMM }
 packusdw<sse41>, 0x660f382b, <sse41:cpu>, Modrm|<sse41:attr>|<sse41:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-pblendvb, 0x664c, AVX, Modrm|Vex128|Space0F3A|VexVVVV|VexW0|NoSuf|SSE2AVX, { Acc|Xmmword, RegXMM|Unspecified|BaseIndex, RegXMM }
-pblendvb, 0x664c, AVX, Modrm|Vex128|Space0F3A|VexVVVV|VexW0|NoSuf|Implicit1stXmm0|SSE2AVX, { RegXMM|Unspecified|BaseIndex, RegXMM }
+pblendvb, 0x664c, AVX, Modrm|Vex128|Space0F3A|Src1VVVV|VexW0|NoSuf|SSE2AVX, { Acc|Xmmword, RegXMM|Unspecified|BaseIndex, RegXMM }
+pblendvb, 0x664c, AVX, Modrm|Vex128|Space0F3A|Src1VVVV|VexW0|NoSuf|Implicit1stXmm0|SSE2AVX, { RegXMM|Unspecified|BaseIndex, RegXMM }
 pblendvb, 0x660f3810, SSE4_1, Modrm|NoSuf, { Acc|Xmmword, RegXMM|Unspecified|BaseIndex, RegXMM }
 pblendvb, 0x660f3810, SSE4_1, Modrm|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
 pblendw<sse41>, 0x660f3a0e, <sse41:cpu>, Modrm|<sse41:attr>|<sse41:vvvv>|NoSuf, { Imm8|Imm8S, RegXMM|Unspecified|BaseIndex, RegXMM }
@@ -1402,7 +1404,7 @@ phminposuw<sse41>, 0x660f3841, <sse41:cpu>, Modrm|<sse41:attr>|NoSuf, { RegXMM|U
 pinsrb<sse41>, 0x660f3a20, <sse41:cpu>, Modrm|<sse41:attr>|<sse41:vvvv>|NoSuf|IgnoreSize|NoRex64, { Imm8, Reg32|Reg64, RegXMM }
 pinsrb<sse41>, 0x660f3a20, <sse41:cpu>, Modrm|<sse41:attr>|<sse41:vvvv>|NoSuf, { Imm8, Byte|Unspecified|BaseIndex, RegXMM }
 pinsrd<sse41>, 0x660f3a22, <sse41:cpu>, Modrm|<sse41:attr>|<sse41:vvvv>|NoSuf|IgnoreSize, { Imm8, Reg32|Unspecified|BaseIndex, RegXMM }
-pinsrq, 0x6622, AVX&x64, Modrm|Vex|Space0F3A|VexVVVV|VexW1|NoSuf|SSE2AVX, { Imm8, Reg64|Unspecified|BaseIndex, RegXMM }
+pinsrq, 0x6622, AVX&x64, Modrm|Vex|Space0F3A|Src1VVVV|VexW1|NoSuf|SSE2AVX, { Imm8, Reg64|Unspecified|BaseIndex, RegXMM }
 pinsrq, 0x660f3a22, SSE4_1&x64, Modrm|Size64|NoSuf, { Imm8, Reg64|Unspecified|BaseIndex, RegXMM }
 pmaxsb<sse41>, 0x660f383c, <sse41:cpu>, Modrm|<sse41:attr>|<sse41:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
 pmaxsd<sse41>, 0x660f383d, <sse41:cpu>, Modrm|<sse41:attr>|<sse41:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
@@ -1432,7 +1434,7 @@ rounds<sd><sse41>, 0x660f3a0a | <sd:opc>, <sse41:cpu>, Modrm|<sse41:scal>|<sse41
 
 // SSE4.2 instructions.
 
-<sse42:cpu:attr:vvvv, $avx:AVX:Vex128|VexW0|SSE2AVX:VexVVVV, $sse:SSE4_2::>
+<sse42:cpu:attr:vvvv, $avx:AVX:Vex128|VexW0|SSE2AVX:Src1VVVV, $sse:SSE4_2::>
 
 pcmpgtq<sse42>, 0x660f3837, <sse42:cpu>, Modrm|<sse42:attr>|<sse42:vvvv>|NoSuf|Optimize, { RegXMM|Unspecified|BaseIndex, RegXMM }
 pcmpestri<sse42>, 0x660f3a61, <sse42:cpu>&No64, Modrm|<sse42:attr>|NoSuf, { Imm8, RegXMM|Unspecified|BaseIndex, RegXMM }
@@ -1464,7 +1466,7 @@ xsaveopt64, 0xfae/6, Xsaveopt&x64, Modrm|NoSuf|Size64|NoEgpr, { Unspecified|Base
 
 // AES instructions.
 
-<aes:cpu:attr:vvvv, $avx:AVX&:Vex128|VexW0|SSE2AVX:VexVVVV, $sse:::>
+<aes:cpu:attr:vvvv, $avx:AVX&:Vex128|VexW0|SSE2AVX:Src1VVVV, $sse:::>
 
 aesdec<aes>, 0x660f38de, <aes:cpu>AES, Modrm|<aes:attr>|<aes:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
 aesdeclast<aes>, 0x660f38df, <aes:cpu>AES, Modrm|<aes:attr>|<aes:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
@@ -1475,7 +1477,7 @@ aeskeygenassist<aes>, 0x660f3adf, <aes:cpu>AES, Modrm|<aes:attr>|NoSuf, { Imm8,
 
 // PCLMULQDQ
 
-<pclmul:cpu:attr, $avx:AVX&:Vex128|VexW0|SSE2AVX|VexVVVV, $sse::>
+<pclmul:cpu:attr, $avx:AVX&:Vex128|VexW0|SSE2AVX|Src1VVVV, $sse::>
 
 pclmulqdq<pclmul>, 0x660f3a44, <pclmul:cpu>PCLMULQDQ, Modrm|<pclmul:attr>|NoSuf, { Imm8|Imm8S, RegXMM|Unspecified|BaseIndex, RegXMM }
 pclmullqlqdq<pclmul>, 0x660f3a44/0x00, <pclmul:cpu>PCLMULQDQ, Modrm|<pclmul:attr>|NoSuf|ImmExt, { RegXMM|Unspecified|BaseIndex, RegXMM }
@@ -1485,7 +1487,7 @@ pclmulhqhqdq<pclmul>, 0x660f3a44/0x11, <pclmul:cpu>PCLMULQDQ, Modrm|<pclmul:attr
 
 // GFNI
 
-<gfni:cpu:w0:w1, $avx:AVX&:Vex128|VexW0|SSE2AVX|VexVVVV:Vex128|VexW1|SSE2AVX|VexVVVV, $sse:::>
+<gfni:cpu:w0:w1, $avx:AVX&:Vex128|VexW0|SSE2AVX|Src1VVVV:Vex128|VexW1|SSE2AVX|Src1VVVV, $sse:::>
 
 gf2p8affineqb<gfni>, 0x660f3ace, <gfni:cpu>GFNI, Modrm|<gfni:w1>|NoSuf, { Imm8, RegXMM|Unspecified|BaseIndex, RegXMM }
 gf2p8affineinvqb<gfni>, 0x660f3acf, <gfni:cpu>GFNI, Modrm|<gfni:w1>|NoSuf, { Imm8, RegXMM|Unspecified|BaseIndex, RegXMM }
@@ -1513,23 +1515,23 @@ gf2p8mulb<gfni>, 0x660f38cf, <gfni:cpu>GFNI, Modrm|<gfni:w0>|NoSuf, { RegXMM|Uns
     x:Vex128:ATTSyntax:RegXMM|Unspecified|BaseIndex, +
     y:Vex256:ATTSyntax:RegYMM|Unspecified|BaseIndex>
 
-vaddp<sd>, 0x<sd:ppfx>58, AVX, Modrm|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vadds<sd>, 0x<sd:spfx>58, AVX, Modrm|VexLIG|Space0F|VexVVVV|VexWIG|NoSuf, { <sd:elem>|Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM }
-vaddsubpd, 0x66d0, AVX, Modrm|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vaddsubps, 0xf2d0, AVX, Modrm|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vandnp<sd>, 0x<sd:ppfx>55, AVX, Modrm|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf|Optimize, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vandp<sd>, 0x<sd:ppfx>54, AVX, Modrm|C|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vblendp<sd>, 0x660c | <sd:opc>, AVX, Modrm|Vex|Space0F3A|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Imm8|Imm8S, Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vblendvp<sd>, 0x664a | <sd:opc>, AVX, Modrm|Vex|Space0F3A|VexVVVV|VexW0|CheckOperandSize|NoSuf, { RegXMM|RegYMM, Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vaddp<sd>, 0x<sd:ppfx>58, AVX, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vadds<sd>, 0x<sd:spfx>58, AVX, Modrm|VexLIG|Space0F|Src1VVVV|VexWIG|NoSuf, { <sd:elem>|Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM }
+vaddsubpd, 0x66d0, AVX, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vaddsubps, 0xf2d0, AVX, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vandnp<sd>, 0x<sd:ppfx>55, AVX, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf|Optimize, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vandp<sd>, 0x<sd:ppfx>54, AVX, Modrm|C|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vblendp<sd>, 0x660c | <sd:opc>, AVX, Modrm|Vex|Space0F3A|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Imm8|Imm8S, Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vblendvp<sd>, 0x664a | <sd:opc>, AVX, Modrm|Vex|Space0F3A|Src1VVVV|VexW0|CheckOperandSize|NoSuf, { RegXMM|RegYMM, Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
 vbroadcastf128, 0x661a, AVX, Modrm|Vex256|Space0F38|VexW0|NoSuf, { Xmmword|Unspecified|BaseIndex, RegYMM }
 // vbroadcastf32x4 in disguise (see vround{p,s}{s,d} comment)
 vbroadcastf128, 0x661a, APX_F&AVX512VL, Modrm|EVex256|Space0F38|VexW0|Disp8MemShift=4|NoSuf, { Xmmword|Unspecified|BaseIndex, RegYMM }
 vbroadcastsd, 0x6619, AVX, Modrm|Vex256|Space0F38|VexW0|NoSuf, { Qword|Unspecified|BaseIndex, RegYMM }
 vbroadcastss, 0x6618, AVX, Modrm|Vex128|Space0F38|VexW0|NoSuf, { Dword|Unspecified|BaseIndex, RegXMM|RegYMM }
-vcmp<frel>p<sd>, 0x<sd:ppfx>c2/0x<frel:imm>, AVX, Modrm|<frel:comm>|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf|ImmExt, { RegXMM|RegYMM|Unspecified|BaseIndex, RegXMM|RegYMM, RegXMM|RegYMM }
-vcmp<frel>s<sd>, 0x<sd:spfx>c2/0x<frel:imm>, AVX, Modrm|<frel:comm>|VexLIG|Space0F|VexVVVV|VexWIG|NoSuf|ImmExt, { RegXMM|<sd:elem>|Unspecified|BaseIndex, RegXMM, RegXMM }
-vcmpp<sd>, 0x<sd:ppfx>c2, AVX, Modrm|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Imm8, Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vcmps<sd>, 0x<sd:spfx>c2, AVX, Modrm|VexLIG|Space0F|VexVVVV|VexWIG|NoSuf, { Imm8, <sd:elem>|Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM }
+vcmp<frel>p<sd>, 0x<sd:ppfx>c2/0x<frel:imm>, AVX, Modrm|<frel:comm>|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf|ImmExt, { RegXMM|RegYMM|Unspecified|BaseIndex, RegXMM|RegYMM, RegXMM|RegYMM }
+vcmp<frel>s<sd>, 0x<sd:spfx>c2/0x<frel:imm>, AVX, Modrm|<frel:comm>|VexLIG|Space0F|Src1VVVV|VexWIG|NoSuf|ImmExt, { RegXMM|<sd:elem>|Unspecified|BaseIndex, RegXMM, RegXMM }
+vcmpp<sd>, 0x<sd:ppfx>c2, AVX, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Imm8, Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vcmps<sd>, 0x<sd:spfx>c2, AVX, Modrm|VexLIG|Space0F|Src1VVVV|VexWIG|NoSuf, { Imm8, <sd:elem>|Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM }
 vcomis<sd>, 0x<sd:ppfx>2f, AVX, Modrm|VexLIG|Space0F|VexWIG|NoSuf, { <sd:elem>|Unspecified|BaseIndex|RegXMM, RegXMM }
 vcvtdq2pd, 0xf3e6, AVX, Modrm|Vex128|Space0F|VexWIG|NoSuf, { RegXMM|Qword|Unspecified|BaseIndex, RegXMM }
 vcvtdq2pd, 0xf3e6, AVX, Modrm|Vex256|Space0F|VexWIG|NoSuf, { RegXMM|Unspecified|BaseIndex, RegYMM }
@@ -1540,39 +1542,39 @@ vcvtps2dq, 0x665b, AVX, Modrm|Vex|Space0F|VexWIG|CheckOperandSize|NoSuf, { Unspe
 vcvtps2pd, 0x5a, AVX, Modrm|Vex128|Space0F|VexWIG|NoSuf, { RegXMM|Qword|Unspecified|BaseIndex, RegXMM }
 vcvtps2pd, 0x5a, AVX, Modrm|Vex256|Space0F|VexWIG|NoSuf, { RegXMM|Unspecified|BaseIndex, RegYMM }
 vcvts<sd>2si, 0x<sd:spfx>2d, AVX, Modrm|VexLIG|Space0F|No_bSuf|No_wSuf|No_sSuf, { <sd:elem>|Unspecified|BaseIndex|RegXMM, Reg32|Reg64 }
-vcvtsd2ss, 0xf25a, AVX, Modrm|Vex=3|Space0F|VexVVVV|VexWIG|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM }
-vcvtsi2s<sd>, 0x<sd:spfx>2a, AVX, Modrm|VexLIG|Space0F|VexVVVV|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|ATTSyntax, { Reg32|Reg64|Unspecified|BaseIndex, RegXMM, RegXMM }
-vcvtsi2s<sd>, 0x<sd:spfx>2a, AVX, Modrm|VexLIG|Space0F|VexVVVV|No_bSuf|No_wSuf|No_sSuf|IntelSyntax, { Reg32|Reg64|Unspecified|BaseIndex, RegXMM, RegXMM }
-vcvtss2sd, 0xf35a, AVX, Modrm|Vex=3|Space0F|VexVVVV|VexWIG|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM }
+vcvtsd2ss, 0xf25a, AVX, Modrm|Vex=3|Space0F|Src1VVVV|VexWIG|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM }
+vcvtsi2s<sd>, 0x<sd:spfx>2a, AVX, Modrm|VexLIG|Space0F|Src1VVVV|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|ATTSyntax, { Reg32|Reg64|Unspecified|BaseIndex, RegXMM, RegXMM }
+vcvtsi2s<sd>, 0x<sd:spfx>2a, AVX, Modrm|VexLIG|Space0F|Src1VVVV|No_bSuf|No_wSuf|No_sSuf|IntelSyntax, { Reg32|Reg64|Unspecified|BaseIndex, RegXMM, RegXMM }
+vcvtss2sd, 0xf35a, AVX, Modrm|Vex=3|Space0F|Src1VVVV|VexWIG|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM }
 vcvttpd2dq<Vxy>, 0x66e6, AVX, Modrm|<Vxy:vex>|Space0F|VexWIG|NoSuf|<Vxy:syntax>, { <Vxy:src>, RegXMM }
 vcvttps2dq, 0xf35b, AVX, Modrm|Vex|Space0F|VexWIG|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM }
 vcvtts<sd>2si, 0x<sd:spfx>2c, AVX, Modrm|VexLIG|Space0F|No_bSuf|No_wSuf|No_sSuf, { <sd:elem>|Unspecified|BaseIndex|RegXMM, Reg32|Reg64 }
-vdivp<sd>, 0x<sd:ppfx>5e, AVX, Modrm|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vdivs<sd>, 0x<sd:spfx>5e, AVX, Modrm|VexLIG|Space0F|VexVVVV|VexWIG|NoSuf, { <sd:elem>|Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM }
-vdppd, 0x6641, AVX, Modrm|Vex|Space0F3A|VexVVVV|VexWIG|NoSuf, { Imm8|Imm8S, Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM }
-vdpps, 0x6640, AVX, Modrm|Vex|Space0F3A|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Imm8|Imm8S, Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vdivp<sd>, 0x<sd:ppfx>5e, AVX, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vdivs<sd>, 0x<sd:spfx>5e, AVX, Modrm|VexLIG|Space0F|Src1VVVV|VexWIG|NoSuf, { <sd:elem>|Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM }
+vdppd, 0x6641, AVX, Modrm|Vex|Space0F3A|Src1VVVV|VexWIG|NoSuf, { Imm8|Imm8S, Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM }
+vdpps, 0x6640, AVX, Modrm|Vex|Space0F3A|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Imm8|Imm8S, Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
 vextractf128, 0x6619, AVX, Modrm|Vex256|Space0F3A|VexW0|NoSuf, { Imm8, RegYMM, Unspecified|BaseIndex|RegXMM }
 // vextractf32x4 in disguise (see vround{p,s}{s,d} comment)
 vextractf128, 0x6619, APX_F&AVX512VL, Modrm|EVex256|Space0F3A|VexW0|Disp8MemShift=4|NoSuf, { Imm8, RegYMM, Xmmword|Unspecified|BaseIndex }
 vextractps, 0x6617, AVX|AVX512F, Modrm|Vex128|EVex128|Space0F3A|VexWIG|Disp8MemShift=2|NoSuf, { Imm8, RegXMM, Reg32|Unspecified|BaseIndex }
 vextractps, 0x6617, x64&(AVX|AVX512F), RegMem|Vex128|EVex128|Space0F3A|VexWIG|NoSuf, { Imm8, RegXMM, Reg64 }
-vhaddpd, 0x667c, AVX, Modrm|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vhaddps, 0xf27c, AVX, Modrm|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vhsubpd, 0x667d, AVX, Modrm|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vhsubps, 0xf27d, AVX, Modrm|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vinsertf128, 0x6618, AVX, Modrm|Vex256|Space0F3A|VexVVVV|VexW0|NoSuf, { Imm8, Unspecified|BaseIndex|RegXMM, RegYMM, RegYMM }
+vhaddpd, 0x667c, AVX, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vhaddps, 0xf27c, AVX, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vhsubpd, 0x667d, AVX, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vhsubps, 0xf27d, AVX, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vinsertf128, 0x6618, AVX, Modrm|Vex256|Space0F3A|Src1VVVV|VexW0|NoSuf, { Imm8, Unspecified|BaseIndex|RegXMM, RegYMM, RegYMM }
 // vinsertf32x4 in disguise (see vround{p,s}{s,d} comment)
-vinsertf128, 0x6618, APX_F&AVX512VL, Modrm|EVex256|Space0F3A|VexVVVV|VexW0|Disp8MemShift=4|NoSuf, { Imm8, Xmmword|Unspecified|BaseIndex, RegYMM, RegYMM }
-vinsertps, 0x6621, AVX, Modrm|Vex|Space0F3A|VexVVVV|VexWIG|NoSuf, { Imm8, Dword|Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM }
+vinsertf128, 0x6618, APX_F&AVX512VL, Modrm|EVex256|Space0F3A|Src1VVVV|VexW0|Disp8MemShift=4|NoSuf, { Imm8, Xmmword|Unspecified|BaseIndex, RegYMM, RegYMM }
+vinsertps, 0x6621, AVX, Modrm|Vex|Space0F3A|Src1VVVV|VexWIG|NoSuf, { Imm8, Dword|Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM }
 vlddqu, 0xf2f0, AVX, Modrm|Vex|Space0F|VexWIG|CheckOperandSize|NoSuf, { Xmmword|Ymmword|Unspecified|BaseIndex, RegXMM|RegYMM }
 vldmxcsr, 0xae/2, AVX, Modrm|Vex128|Space0F|VexWIG|NoSuf, { Dword|Unspecified|BaseIndex }
 vmaskmovdqu, 0x66f7, AVX, Modrm|Vex|Space0F|VexWIG|NoSuf, { RegXMM, RegXMM }
-vmaskmovp<sd>, 0x662e | <sd:opc>, AVX, Modrm|Vex|Space0F38|VexVVVV|VexW0|CheckOperandSize|NoSuf, { RegXMM|RegYMM, RegXMM|RegYMM, Xmmword|Ymmword|Unspecified|BaseIndex }
-vmaskmovp<sd>, 0x662c | <sd:opc>, AVX, Modrm|Vex|Space0F38|VexVVVV|VexW0|CheckOperandSize|NoSuf, { Xmmword|Ymmword|Unspecified|BaseIndex, RegXMM|RegYMM, RegXMM|RegYMM }
-vmaxp<sd>, 0x<sd:ppfx>5f, AVX, Modrm|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vmaxs<sd>, 0x<sd:spfx>5f, AVX, Modrm|VexLIG|Space0F|VexVVVV|VexWIG|NoSuf, { <sd:elem>|Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM }
-vminp<sd>, 0x<sd:ppfx>5d, AVX, Modrm|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vmins<sd>, 0x<sd:spfx>5d, AVX, Modrm|VexLIG|Space0F|VexVVVV|VexWIG|NoSuf, { <sd:elem>|Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM }
+vmaskmovp<sd>, 0x662e | <sd:opc>, AVX, Modrm|Vex|Space0F38|Src1VVVV|VexW0|CheckOperandSize|NoSuf, { RegXMM|RegYMM, RegXMM|RegYMM, Xmmword|Ymmword|Unspecified|BaseIndex }
+vmaskmovp<sd>, 0x662c | <sd:opc>, AVX, Modrm|Vex|Space0F38|Src1VVVV|VexW0|CheckOperandSize|NoSuf, { Xmmword|Ymmword|Unspecified|BaseIndex, RegXMM|RegYMM, RegXMM|RegYMM }
+vmaxp<sd>, 0x<sd:ppfx>5f, AVX, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vmaxs<sd>, 0x<sd:spfx>5f, AVX, Modrm|VexLIG|Space0F|Src1VVVV|VexWIG|NoSuf, { <sd:elem>|Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM }
+vminp<sd>, 0x<sd:ppfx>5d, AVX, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vmins<sd>, 0x<sd:spfx>5d, AVX, Modrm|VexLIG|Space0F|Src1VVVV|VexWIG|NoSuf, { <sd:elem>|Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM }
 vmovap<sd>, 0x<sd:ppfx>28, AVX, D|Modrm|Vex|Space0F|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM }
 // vmovd really shouldn't allow for 64bit operand (vmovq is the right
 // mnemonic for copying between Reg64/Mem64 and RegXMM, as is mandated
@@ -1585,11 +1587,11 @@ vmovddup, 0xf212, AVX, Modrm|Vex|Space0F|VexWIG|NoSuf, { Qword|Unspecified|BaseI
 vmovddup, 0xf212, AVX, Modrm|Vex=2|Space0F|VexWIG|NoSuf, { Unspecified|BaseIndex|RegYMM, RegYMM }
 vmovdqa, 0x666f, AVX, D|Modrm|Vex|Space0F|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM }
 vmovdqu, 0xf36f, AVX, D|Modrm|Vex|Space0F|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM }
-vmovhlps, 0x12, AVX, Modrm|Vex|Space0F|VexVVVV|VexWIG|NoSuf, { RegXMM, RegXMM, RegXMM }
-vmovhp<sd>, 0x<sd:ppfx>16, AVX, Modrm|Vex|Space0F|VexVVVV|VexWIG|NoSuf, { Qword|Unspecified|BaseIndex, RegXMM, RegXMM }
+vmovhlps, 0x12, AVX, Modrm|Vex|Space0F|Src1VVVV|VexWIG|NoSuf, { RegXMM, RegXMM, RegXMM }
+vmovhp<sd>, 0x<sd:ppfx>16, AVX, Modrm|Vex|Space0F|Src1VVVV|VexWIG|NoSuf, { Qword|Unspecified|BaseIndex, RegXMM, RegXMM }
 vmovhp<sd>, 0x<sd:ppfx>17, AVX, Modrm|Vex|Space0F|VexWIG|NoSuf, { RegXMM, Qword|Unspecified|BaseIndex }
-vmovlhps, 0x16, AVX, Modrm|Vex|Space0F|VexVVVV|VexWIG|NoSuf, { RegXMM, RegXMM, RegXMM }
-vmovlp<sd>, 0x<sd:ppfx>12, AVX, Modrm|Vex|Space0F|VexVVVV|VexWIG|NoSuf, { Qword|Unspecified|BaseIndex, RegXMM, RegXMM }
+vmovlhps, 0x16, AVX, Modrm|Vex|Space0F|Src1VVVV|VexWIG|NoSuf, { RegXMM, RegXMM, RegXMM }
+vmovlp<sd>, 0x<sd:ppfx>12, AVX, Modrm|Vex|Space0F|Src1VVVV|VexWIG|NoSuf, { Qword|Unspecified|BaseIndex, RegXMM, RegXMM }
 vmovlp<sd>, 0x<sd:ppfx>13, AVX, Modrm|Vex|Space0F|VexWIG|NoSuf, { RegXMM, Qword|Unspecified|BaseIndex }
 vmovmskp<sd>, 0x<sd:ppfx>50, AVX, Modrm|Vex|Space0F|VexWIG|No_bSuf|No_wSuf|No_sSuf, { RegXMM|RegYMM, Reg32|Reg64 }
 vmovntdq, 0x66e7, AVX, Modrm|Vex|Space0F|VexWIG|CheckOperandSize|NoSuf, { RegXMM|RegYMM, Xmmword|Ymmword|Unspecified|BaseIndex }
@@ -1599,78 +1601,78 @@ vmovq, 0xf37e, AVX, Load|Modrm|Vex=1|Space0F|VexWIG|NoSuf, { Qword|Unspecified|B
 vmovq, 0x66d6, AVX, Modrm|Vex=1|Space0F|VexWIG|NoSuf, { RegXMM, Qword|Unspecified|BaseIndex|RegXMM }
 vmovq, 0x666e, x64&(AVX|AVX512F), D|Modrm|Vex128|EVex128|Space0F|VexW1|Disp8MemShift=3|NoSuf, { Reg64|Unspecified|BaseIndex, RegXMM }
 vmovs<sd>, 0x<sd:spfx>10, AVX, D|Modrm|VexLIG|Space0F|VexWIG|NoSuf, { <sd:elem>|Unspecified|BaseIndex, RegXMM }
-vmovs<sd>, 0x<sd:spfx>10, AVX, D|Modrm|VexLIG|Space0F|VexVVVV|VexWIG|NoSuf, { RegXMM, RegXMM, RegXMM }
+vmovs<sd>, 0x<sd:spfx>10, AVX, D|Modrm|VexLIG|Space0F|Src1VVVV|VexWIG|NoSuf, { RegXMM, RegXMM, RegXMM }
 vmovshdup, 0xf316, AVX, Modrm|Vex|Space0F|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM }
 vmovsldup, 0xf312, AVX, Modrm|Vex|Space0F|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM }
 vmovup<sd>, 0x<sd:ppfx>10, AVX, D|Modrm|Vex|Space0F|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM }
-vmpsadbw, 0x6642, AVX|AVX2, Modrm|Vex|Space0F3A|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Imm8|Imm8S, Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vmulp<sd>, 0x<sd:ppfx>59, AVX, Modrm|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vmuls<sd>, 0x<sd:spfx>59, AVX, Modrm|VexLIG|Space0F|VexVVVV|VexWIG|NoSuf, { <sd:elem>|Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM }
-vorp<sd>, 0x<sd:ppfx>56, AVX, Modrm|C|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vmpsadbw, 0x6642, AVX|AVX2, Modrm|Vex|Space0F3A|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Imm8|Imm8S, Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vmulp<sd>, 0x<sd:ppfx>59, AVX, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vmuls<sd>, 0x<sd:spfx>59, AVX, Modrm|VexLIG|Space0F|Src1VVVV|VexWIG|NoSuf, { <sd:elem>|Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM }
+vorp<sd>, 0x<sd:ppfx>56, AVX, Modrm|C|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
 vpabs<bw>, 0x661c | <bw:opc>, AVX|AVX2, Modrm|Vex|Space0F38|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM }
 vpabsd, 0x661e, AVX|AVX2, Modrm|Vex|Space0F38|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM }
-vpackssdw, 0x666b, AVX|AVX2, Modrm|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpacksswb, 0x6663, AVX|AVX2, Modrm|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpackusdw, 0x662b, AVX|AVX2, Modrm|Vex|Space0F38|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpackuswb, 0x6667, AVX|AVX2, Modrm|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpadds<bw>, 0x66ec | <bw:opc>, AVX|AVX2, Modrm|C|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpadd<bw>, 0x66fc | <bw:opc>, AVX|AVX2, Modrm|C|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpaddd, 0x66fe, AVX|AVX2, Modrm|C|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpaddq, 0x66d4, AVX|AVX2, Modrm|C|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpaddus<bw>, 0x66dc | <bw:opc>, AVX|AVX2, Modrm|C|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpalignr, 0x660f, AVX|AVX2, Modrm|Vex|Space0F3A|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Imm8, Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpand, 0x66db, AVX|AVX2, Modrm|C|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpandn, 0x66df, AVX|AVX2, Modrm|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf|Optimize, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpavg<bw>, 0x66e0 | (3 * <bw:opc>), AVX|AVX2, Modrm|C|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpblendvb, 0x664c, AVX|AVX2, Modrm|Vex|Space0F3A|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { RegXMM|RegYMM, Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpblendw, 0x660e, AVX|AVX2, Modrm|Vex|Space0F3A|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Imm8|Imm8S, Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpcmpeq<bw>, 0x6674 | <bw:opc>, AVX|AVX2, Modrm|C|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpcmpeqd, 0x6676, AVX|AVX2, Modrm|C|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpcmpeqq, 0x6629, AVX|AVX2, Modrm|Vex|Space0F38|VexVVVV|VexWIG|CheckOperandSize|NoSuf|Optimize, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpackssdw, 0x666b, AVX|AVX2, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpacksswb, 0x6663, AVX|AVX2, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpackusdw, 0x662b, AVX|AVX2, Modrm|Vex|Space0F38|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpackuswb, 0x6667, AVX|AVX2, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpadds<bw>, 0x66ec | <bw:opc>, AVX|AVX2, Modrm|C|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpadd<bw>, 0x66fc | <bw:opc>, AVX|AVX2, Modrm|C|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpaddd, 0x66fe, AVX|AVX2, Modrm|C|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpaddq, 0x66d4, AVX|AVX2, Modrm|C|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpaddus<bw>, 0x66dc | <bw:opc>, AVX|AVX2, Modrm|C|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpalignr, 0x660f, AVX|AVX2, Modrm|Vex|Space0F3A|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Imm8, Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpand, 0x66db, AVX|AVX2, Modrm|C|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpandn, 0x66df, AVX|AVX2, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf|Optimize, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpavg<bw>, 0x66e0 | (3 * <bw:opc>), AVX|AVX2, Modrm|C|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpblendvb, 0x664c, AVX|AVX2, Modrm|Vex|Space0F3A|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { RegXMM|RegYMM, Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpblendw, 0x660e, AVX|AVX2, Modrm|Vex|Space0F3A|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Imm8|Imm8S, Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpcmpeq<bw>, 0x6674 | <bw:opc>, AVX|AVX2, Modrm|C|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpcmpeqd, 0x6676, AVX|AVX2, Modrm|C|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpcmpeqq, 0x6629, AVX|AVX2, Modrm|Vex|Space0F38|Src1VVVV|VexWIG|CheckOperandSize|NoSuf|Optimize, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
 vpcmpestri, 0x6661, AVX&No64, Modrm|Vex|Space0F3A|NoSuf, { Imm8, Unspecified|BaseIndex|RegXMM, RegXMM }
 vpcmpestri, 0x6661, AVX&x64, Modrm|Vex|Space0F3A|IgnoreSize|No_bSuf|No_wSuf|No_sSuf, { Imm8, Unspecified|BaseIndex|RegXMM, RegXMM }
 vpcmpestrm, 0x6660, AVX&No64, Modrm|Vex|Space0F3A|NoSuf, { Imm8, Unspecified|BaseIndex|RegXMM, RegXMM }
 vpcmpestrm, 0x6660, AVX&x64, Modrm|Vex|Space0F3A|IgnoreSize|No_bSuf|No_wSuf|No_sSuf, { Imm8, Unspecified|BaseIndex|RegXMM, RegXMM }
-vpcmpgt<bw>, 0x6664 | <bw:opc>, AVX|AVX2, Modrm|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf|Optimize, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpcmpgtd, 0x6666, AVX|AVX2, Modrm|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf|Optimize, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpcmpgtq, 0x6637, AVX|AVX2, Modrm|Vex|Space0F38|VexVVVV|VexWIG|CheckOperandSize|NoSuf|Optimize, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpcmpgt<bw>, 0x6664 | <bw:opc>, AVX|AVX2, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf|Optimize, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpcmpgtd, 0x6666, AVX|AVX2, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf|Optimize, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpcmpgtq, 0x6637, AVX|AVX2, Modrm|Vex|Space0F38|Src1VVVV|VexWIG|CheckOperandSize|NoSuf|Optimize, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
 vpcmpistri, 0x6663, AVX, Modrm|Vex|Space0F3A|VexWIG|NoSuf, { Imm8, Unspecified|BaseIndex|RegXMM, RegXMM }
 vpcmpistrm, 0x6662, AVX, Modrm|Vex|Space0F3A|VexWIG|NoSuf, { Imm8, Unspecified|BaseIndex|RegXMM, RegXMM }
-vperm2f128, 0x6606, AVX, Modrm|Vex256|Space0F3A|VexVVVV|VexW0|NoSuf, { Imm8|Imm8S, Unspecified|BaseIndex|RegYMM, RegYMM, RegYMM }
-vpermilps, 0x660c, AVX|AVX512F, Modrm|Vex|EVexDYN|Masking|Space0F38|VexVVVV|VexW0|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vperm2f128, 0x6606, AVX, Modrm|Vex256|Space0F3A|Src1VVVV|VexW0|NoSuf, { Imm8|Imm8S, Unspecified|BaseIndex|RegYMM, RegYMM, RegYMM }
+vpermilps, 0x660c, AVX|AVX512F, Modrm|Vex|EVexDYN|Masking|Space0F38|Src1VVVV|VexW0|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
 vpermilps, 0x6604, AVX|AVX512F, Modrm|Vex|EVexDYN|Masking|Space0F3A|VexW0|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8|Imm8S, RegXMM|RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
-vpermilpd, 0x660d, AVX, Modrm|Vex|Space0F38|VexVVVV|VexW0|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpermilpd, 0x660d, AVX, Modrm|Vex|Space0F38|Src1VVVV|VexW0|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
 vpermilpd, 0x6605, AVX, Modrm|Vex|Space0F3A|VexW0|CheckOperandSize|NoSuf, { Imm8|Imm8S, Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM }
 vpextr<dq>, 0x6616, AVX&<dq:cpu64>, Modrm|Vex|Space0F3A|<dq:vexw64>|NoSuf, { Imm8, RegXMM, <dq:gpr>|Unspecified|BaseIndex }
 vpextrw, 0x66c5, AVX, Load|Modrm|Vex|Space0F|VexWIG|No_bSuf|No_wSuf|No_sSuf, { Imm8, RegXMM, Reg32|Reg64 }
 vpextr<bw>, 0x6614 | <bw:opc>, AVX, RegMem|Vex|Space0F3A|VexWIG|NoSuf, { Imm8, RegXMM, Reg32|Reg64 }
 vpextr<bw>, 0x6614 | <bw:opc>, AVX, Modrm|Vex|Space0F3A|VexWIG|NoSuf, { Imm8, RegXMM, <bw:elem>|Unspecified|BaseIndex }
-vphaddd, 0x6602, AVX|AVX2, Modrm|Vex|Space0F38|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vphaddsw, 0x6603, AVX|AVX2, Modrm|Vex|Space0F38|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vphaddw, 0x6601, AVX|AVX2, Modrm|Vex|Space0F38|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vphaddd, 0x6602, AVX|AVX2, Modrm|Vex|Space0F38|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vphaddsw, 0x6603, AVX|AVX2, Modrm|Vex|Space0F38|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vphaddw, 0x6601, AVX|AVX2, Modrm|Vex|Space0F38|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
 vphminposuw, 0x6641, AVX, Modrm|Vex|Space0F38|VexWIG|NoSuf, { Unspecified|BaseIndex|RegXMM, RegXMM }
-vphsubd, 0x6606, AVX|AVX2, Modrm|Vex|Space0F38|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vphsubsw, 0x6607, AVX|AVX2, Modrm|Vex|Space0F38|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vphsubw, 0x6605, AVX|AVX2, Modrm|Vex|Space0F38|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpinsrb, 0x6620, AVX, Modrm|Vex|Space0F3A|VexVVVV|VexWIG|NoSuf, { Imm8, Reg32|Reg64, RegXMM, RegXMM }
-vpinsrb, 0x6620, AVX, Modrm|Vex|Space0F3A|VexVVVV|VexWIG|NoSuf, { Imm8, Byte|Unspecified|BaseIndex, RegXMM, RegXMM }
-vpinsr<dq>, 0x6622, AVX&<dq:cpu64>, Modrm|Vex|Space0F3A|VexVVVV|<dq:vexw64>|NoSuf, { Imm8, <dq:gpr>|Unspecified|BaseIndex, RegXMM, RegXMM }
-vpinsrw, 0x66c4, AVX, Modrm|Vex|Space0F|VexVVVV|VexWIG|No_bSuf|No_wSuf|No_sSuf, { Imm8, Reg32|Reg64, RegXMM, RegXMM }
-vpinsrw, 0x66c4, AVX, Modrm|Vex|Space0F|VexVVVV|VexWIG|NoSuf, { Imm8, Word|Unspecified|BaseIndex, RegXMM, RegXMM }
-vpmaddubsw, 0x6604, AVX|AVX2, Modrm|Vex|Space0F38|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpmaddwd, 0x66f5, AVX|AVX2, Modrm|C|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpmaxsb, 0x663c, AVX|AVX2, Modrm|Vex|Space0F38|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpmaxsd, 0x663d, AVX|AVX2, Modrm|Vex|Space0F38|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpmaxsw, 0x66ee, AVX|AVX2, Modrm|C|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpmaxub, 0x66de, AVX|AVX2, Modrm|C|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpmaxud, 0x663f, AVX|AVX2, Modrm|Vex|Space0F38|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpmaxuw, 0x663e, AVX|AVX2, Modrm|Vex|Space0F38|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpminsb, 0x6638, AVX|AVX2, Modrm|Vex|Space0F38|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpminsd, 0x6639, AVX|AVX2, Modrm|Vex|Space0F38|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpminsw, 0x66ea, AVX|AVX2, Modrm|C|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpminub, 0x66da, AVX|AVX2, Modrm|C|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpminud, 0x663b, AVX|AVX2, Modrm|Vex|Space0F38|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpminuw, 0x663a, AVX|AVX2, Modrm|Vex|Space0F38|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vphsubd, 0x6606, AVX|AVX2, Modrm|Vex|Space0F38|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vphsubsw, 0x6607, AVX|AVX2, Modrm|Vex|Space0F38|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vphsubw, 0x6605, AVX|AVX2, Modrm|Vex|Space0F38|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpinsrb, 0x6620, AVX, Modrm|Vex|Space0F3A|Src1VVVV|VexWIG|NoSuf, { Imm8, Reg32|Reg64, RegXMM, RegXMM }
+vpinsrb, 0x6620, AVX, Modrm|Vex|Space0F3A|Src1VVVV|VexWIG|NoSuf, { Imm8, Byte|Unspecified|BaseIndex, RegXMM, RegXMM }
+vpinsr<dq>, 0x6622, AVX&<dq:cpu64>, Modrm|Vex|Space0F3A|Src1VVVV|<dq:vexw64>|NoSuf, { Imm8, <dq:gpr>|Unspecified|BaseIndex, RegXMM, RegXMM }
+vpinsrw, 0x66c4, AVX, Modrm|Vex|Space0F|Src1VVVV|VexWIG|No_bSuf|No_wSuf|No_sSuf, { Imm8, Reg32|Reg64, RegXMM, RegXMM }
+vpinsrw, 0x66c4, AVX, Modrm|Vex|Space0F|Src1VVVV|VexWIG|NoSuf, { Imm8, Word|Unspecified|BaseIndex, RegXMM, RegXMM }
+vpmaddubsw, 0x6604, AVX|AVX2, Modrm|Vex|Space0F38|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpmaddwd, 0x66f5, AVX|AVX2, Modrm|C|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpmaxsb, 0x663c, AVX|AVX2, Modrm|Vex|Space0F38|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpmaxsd, 0x663d, AVX|AVX2, Modrm|Vex|Space0F38|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpmaxsw, 0x66ee, AVX|AVX2, Modrm|C|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpmaxub, 0x66de, AVX|AVX2, Modrm|C|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpmaxud, 0x663f, AVX|AVX2, Modrm|Vex|Space0F38|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpmaxuw, 0x663e, AVX|AVX2, Modrm|Vex|Space0F38|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpminsb, 0x6638, AVX|AVX2, Modrm|Vex|Space0F38|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpminsd, 0x6639, AVX|AVX2, Modrm|Vex|Space0F38|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpminsw, 0x66ea, AVX|AVX2, Modrm|C|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpminub, 0x66da, AVX|AVX2, Modrm|C|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpminud, 0x663b, AVX|AVX2, Modrm|Vex|Space0F38|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpminuw, 0x663a, AVX|AVX2, Modrm|Vex|Space0F38|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
 vpmovmskb, 0x66d7, AVX|AVX2, Modrm|Vex|Space0F|VexWIG|No_bSuf|No_wSuf|No_sSuf, { RegXMM|RegYMM, Reg32|Reg64 }
 vpmovsxbd, 0x6621, AVX|AVX512VL, Modrm|Vex128|EVex128|Masking|Space0F38|VexWIG|Disp8MemShift=2|NoSuf, { RegXMM|Dword|Unspecified|BaseIndex, RegXMM }
 vpmovsxbq, 0x6622, AVX|AVX512VL, Modrm|Vex128|EVex128|Masking|Space0F38|VexWIG|Disp8MemShift=1|NoSuf, { RegXMM|Word|Unspecified|BaseIndex, RegXMM }
@@ -1684,70 +1686,70 @@ vpmovzxbw, 0x6630, AVX, Modrm|Vex|Space0F38|VexWIG|NoSuf, { Qword|Unspecified|Ba
 vpmovzxdq, 0x6635, AVX, Modrm|Vex|Space0F38|VexWIG|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
 vpmovzxwd, 0x6633, AVX|AVX512VL, Modrm|Vex128|EVex128|Masking|Space0F38|VexWIG|Disp8MemShift=3|NoSuf, { RegXMM|Qword|Unspecified|BaseIndex, RegXMM }
 vpmovzxwq, 0x6634, AVX|AVX512VL, Modrm|Vex128|EVex128|Masking|Space0F38|VexWIG|Disp8MemShift=2|NoSuf, { RegXMM|Dword|Unspecified|BaseIndex, RegXMM }
-vpmuldq, 0x6628, AVX|AVX2, Modrm|Vex|Space0F38|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpmulhrsw, 0x660b, AVX|AVX2, Modrm|Vex|Space0F38|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpmulhuw, 0x66e4, AVX|AVX2, Modrm|C|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpmulhw, 0x66e5, AVX|AVX2, Modrm|C|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpmulld, 0x6640, AVX|AVX2, Modrm|Vex|Space0F38|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpmullw, 0x66d5, AVX|AVX2, Modrm|C|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpmuludq, 0x66f4, AVX|AVX2, Modrm|C|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpor, 0x66eb, AVX|AVX2, Modrm|C|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpsadbw, 0x66f6, AVX|AVX2, Modrm|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|C|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpshufb, 0x6600, AVX|AVX2, Modrm|Vex|Space0F38|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpmuldq, 0x6628, AVX|AVX2, Modrm|Vex|Space0F38|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpmulhrsw, 0x660b, AVX|AVX2, Modrm|Vex|Space0F38|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpmulhuw, 0x66e4, AVX|AVX2, Modrm|C|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpmulhw, 0x66e5, AVX|AVX2, Modrm|C|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpmulld, 0x6640, AVX|AVX2, Modrm|Vex|Space0F38|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpmullw, 0x66d5, AVX|AVX2, Modrm|C|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpmuludq, 0x66f4, AVX|AVX2, Modrm|C|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpor, 0x66eb, AVX|AVX2, Modrm|C|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpsadbw, 0x66f6, AVX|AVX2, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|C|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpshufb, 0x6600, AVX|AVX2, Modrm|Vex|Space0F38|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
 vpshufd, 0x6670, AVX|AVX2, Modrm|Vex|Space0F|VexWIG|CheckOperandSize|NoSuf, { Imm8|Imm8S, Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM }
 vpshufhw, 0xf370, AVX|AVX2, Modrm|Vex|Space0F|VexWIG|CheckOperandSize|NoSuf, { Imm8|Imm8S, Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM }
 vpshuflw, 0xf270, AVX|AVX2, Modrm|Vex|Space0F|VexWIG|CheckOperandSize|NoSuf, { Imm8|Imm8S, Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM }
-vpsign<bw>, 0x6608 | <bw:opc>, AVX|AVX2, Modrm|Vex|Space0F38|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpsignd, 0x660a, AVX|AVX2, Modrm|Vex|Space0F38|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpsll<dq>, 0x6672 | <dq:opc>/6, AVX|AVX2, Modrm|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM, RegXMM|RegYMM }
-vpsll<dq>, 0x66f2 | <dq:opc>, AVX|AVX2, Modrm|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpslldq, 0x6673/7, AVX|AVX2, Modrm|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM, RegXMM|RegYMM }
-vpsllw, 0x6671/6, AVX|AVX2, Modrm|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM, RegXMM|RegYMM }
-vpsllw, 0x66f1, AVX|AVX2, Modrm|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpsrad, 0x6672/4, AVX|AVX2, Modrm|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM, RegXMM|RegYMM }
-vpsrad, 0x66e2, AVX|AVX2, Modrm|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpsraw, 0x6671/4, AVX|AVX2, Modrm|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM, RegXMM|RegYMM }
-vpsraw, 0x66e1, AVX|AVX2, Modrm|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpsrl<dq>, 0x6672 | <dq:opc>/2, AVX|AVX2, Modrm|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM, RegXMM|RegYMM }
-vpsrl<dq>, 0x66d2 | <dq:opc>, AVX|AVX2, Modrm|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpsrldq, 0x6673/3, AVX|AVX2, Modrm|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM, RegXMM|RegYMM }
-vpsrlw, 0x6671/2, AVX|AVX2, Modrm|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM, RegXMM|RegYMM }
-vpsrlw, 0x66d1, AVX|AVX2, Modrm|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpsub<bw>, 0x66f8 | <bw:opc>, AVX|AVX2, Modrm|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf|Optimize, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpsub<dq>, 0x66fa | <dq:opc>, AVX|AVX2, Modrm|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf|Optimize, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpsubs<bw>, 0x66e8 | <bw:opc>, AVX|AVX2, Modrm|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpsubus<bw>, 0x66d8 | <bw:opc>, AVX|AVX2, Modrm|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpsign<bw>, 0x6608 | <bw:opc>, AVX|AVX2, Modrm|Vex|Space0F38|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpsignd, 0x660a, AVX|AVX2, Modrm|Vex|Space0F38|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpsll<dq>, 0x6672 | <dq:opc>/6, AVX|AVX2, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM, RegXMM|RegYMM }
+vpsll<dq>, 0x66f2 | <dq:opc>, AVX|AVX2, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpslldq, 0x6673/7, AVX|AVX2, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM, RegXMM|RegYMM }
+vpsllw, 0x6671/6, AVX|AVX2, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM, RegXMM|RegYMM }
+vpsllw, 0x66f1, AVX|AVX2, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpsrad, 0x6672/4, AVX|AVX2, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM, RegXMM|RegYMM }
+vpsrad, 0x66e2, AVX|AVX2, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpsraw, 0x6671/4, AVX|AVX2, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM, RegXMM|RegYMM }
+vpsraw, 0x66e1, AVX|AVX2, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpsrl<dq>, 0x6672 | <dq:opc>/2, AVX|AVX2, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM, RegXMM|RegYMM }
+vpsrl<dq>, 0x66d2 | <dq:opc>, AVX|AVX2, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpsrldq, 0x6673/3, AVX|AVX2, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM, RegXMM|RegYMM }
+vpsrlw, 0x6671/2, AVX|AVX2, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM, RegXMM|RegYMM }
+vpsrlw, 0x66d1, AVX|AVX2, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpsub<bw>, 0x66f8 | <bw:opc>, AVX|AVX2, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf|Optimize, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpsub<dq>, 0x66fa | <dq:opc>, AVX|AVX2, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf|Optimize, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpsubs<bw>, 0x66e8 | <bw:opc>, AVX|AVX2, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpsubus<bw>, 0x66d8 | <bw:opc>, AVX|AVX2, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
 vptest, 0x6617, AVX, Modrm|Vex|Space0F38|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM }
-vpunpckhbw, 0x6668, AVX|AVX2, Modrm|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpunpckhdq, 0x666a, AVX|AVX2, Modrm|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpunpckhqdq, 0x666d, AVX|AVX2, Modrm|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpunpckhwd, 0x6669, AVX|AVX2, Modrm|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpunpcklbw, 0x6660, AVX|AVX2, Modrm|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpunpckldq, 0x6662, AVX|AVX2, Modrm|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpunpcklqdq, 0x666c, AVX|AVX2, Modrm|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpunpcklwd, 0x6661, AVX|AVX2, Modrm|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpxor, 0x66ef, AVX|AVX2, Modrm|C|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf|Optimize, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpunpckhbw, 0x6668, AVX|AVX2, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpunpckhdq, 0x666a, AVX|AVX2, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpunpckhqdq, 0x666d, AVX|AVX2, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpunpckhwd, 0x6669, AVX|AVX2, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpunpcklbw, 0x6660, AVX|AVX2, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpunpckldq, 0x6662, AVX|AVX2, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpunpcklqdq, 0x666c, AVX|AVX2, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpunpcklwd, 0x6661, AVX|AVX2, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpxor, 0x66ef, AVX|AVX2, Modrm|C|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf|Optimize, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
 vrcpps, 0x53, AVX, Modrm|Vex|Space0F|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM }
-vrcpss, 0xf353, AVX, Modrm|Vex=3|Space0F|VexVVVV|VexWIG|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM }
+vrcpss, 0xf353, AVX, Modrm|Vex=3|Space0F|Src1VVVV|VexWIG|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM }
 vroundp<sd>, 0x6608 | <sd:opc>, AVX, Modrm|Vex|Space0F3A|VexWIG|CheckOperandSize|NoSuf, { Imm8, Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM }
-vrounds<sd>, 0x660a | <sd:opc>, AVX, Modrm|VexLIG|Space0F3A|VexVVVV|VexWIG|NoSuf, { Imm8, <sd:elem>|Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM }
+vrounds<sd>, 0x660a | <sd:opc>, AVX, Modrm|VexLIG|Space0F3A|Src1VVVV|VexWIG|NoSuf, { Imm8, <sd:elem>|Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM }
 // These are really clones of VRNDSCALE{P,S}{S,D}, with broadcast, masking, SAE,
 // 512-bit operand size, and register sources dropped.
 vroundp<sd>, 0x6608 | <sd:opc>, APX_F&AVX512VL, Modrm|Space0F3A|<sd:vexw>|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8, Xmmword|Ymmword|Unspecified|BaseIndex, RegXMM|RegYMM }
-vrounds<sd>, 0x660a | <sd:opc>, APX_F&AVX512F, Modrm|EVexLIG|Space0F3A|VexVVVV|<sd:vexw>|Disp8MemShift|NoSuf, { Imm8, <sd:elem>|Unspecified|BaseIndex, RegXMM, RegXMM }
+vrounds<sd>, 0x660a | <sd:opc>, APX_F&AVX512F, Modrm|EVexLIG|Space0F3A|Src1VVVV|<sd:vexw>|Disp8MemShift|NoSuf, { Imm8, <sd:elem>|Unspecified|BaseIndex, RegXMM, RegXMM }
 vrsqrtps, 0x52, AVX, Modrm|Vex|Space0F|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM }
-vrsqrtss, 0xf352, AVX, Modrm|Vex=3|Space0F|VexVVVV|VexWIG|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM }
-vshufp<sd>, 0x<sd:ppfx>c6, AVX, Modrm|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Imm8|Imm8S, Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vrsqrtss, 0xf352, AVX, Modrm|Vex=3|Space0F|Src1VVVV|VexWIG|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM }
+vshufp<sd>, 0x<sd:ppfx>c6, AVX, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Imm8|Imm8S, Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
 vsqrtp<sd>, 0x<sd:ppfx>51, AVX, Modrm|Vex|Space0F|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM }
-vsqrts<sd>, 0x<sd:spfx>51, AVX, Modrm|VexLIG|Space0F|VexVVVV|VexWIG|NoSuf, { <sd:elem>|Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM }
+vsqrts<sd>, 0x<sd:spfx>51, AVX, Modrm|VexLIG|Space0F|Src1VVVV|VexWIG|NoSuf, { <sd:elem>|Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM }
 vstmxcsr, 0xae/3, AVX, Modrm|Vex128|Space0F|VexWIG|NoSuf, { Dword|Unspecified|BaseIndex }
-vsubp<sd>, 0x<sd:ppfx>5c, AVX, Modrm|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vsubs<sd>, 0x<sd:spfx>5c, AVX, Modrm|VexLIG|Space0F|VexVVVV|VexWIG|NoSuf, { <sd:elem>|Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM }
+vsubp<sd>, 0x<sd:ppfx>5c, AVX, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vsubs<sd>, 0x<sd:spfx>5c, AVX, Modrm|VexLIG|Space0F|Src1VVVV|VexWIG|NoSuf, { <sd:elem>|Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM }
 vtestp<sd>, 0x660e | <sd:opc>, AVX, Modrm|Vex|Space0F38|VexW0|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM }
 vucomis<sd>, 0x<sd:ppfx>2e, AVX, Modrm|VexLIG|Space0F|VexWIG|NoSuf, { <sd:elem>|Unspecified|BaseIndex|RegXMM, RegXMM }
-vunpckhp<sd>, 0x<sd:ppfx>15, AVX, Modrm|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vunpcklp<sd>, 0x<sd:ppfx>14, AVX, Modrm|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vxorp<sd>, 0x<sd:ppfx>57, AVX, Modrm|C|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf|Optimize, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vunpckhp<sd>, 0x<sd:ppfx>15, AVX, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vunpcklp<sd>, 0x<sd:ppfx>14, AVX, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vxorp<sd>, 0x<sd:ppfx>57, AVX, Modrm|C|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf|Optimize, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
 vzeroall, 0x77, AVX, Vex=2|Space0F|VexWIG|NoSuf, {}
 vzeroupper, 0x77, AVX, Vex|Space0F|VexWIG|NoSuf, {}
 
@@ -1774,63 +1776,63 @@ vbroadcasti128, 0x665A, AVX2, Modrm|Vex256|Space0F38|VexW0|NoSuf, { Xmmword|Unsp
 vbroadcasti128, 0x665a, APX_F&AVX512VL, Modrm|EVex256|Space0F38|VexW0|Disp8MemShift=4|NoSuf, { Xmmword|Unspecified|BaseIndex, RegYMM }
 vbroadcastsd, 0x6619, AVX2, Modrm|Vex=2|Space0F38|VexW=1|NoSuf, { RegXMM, RegYMM }
 vbroadcastss, 0x6618, AVX2|AVX512F, Modrm|Vex|EVexDYN|Masking|Space0F38|VexW0|Disp8MemShift=2|NoSuf, { RegXMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
-vpblendd, 0x6602, AVX2, Modrm|Vex|Space0F3A|VexVVVV|VexW0|CheckOperandSize|NoSuf, { Imm8|Imm8S, Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpblendd, 0x6602, AVX2, Modrm|Vex|Space0F3A|Src1VVVV|VexW0|CheckOperandSize|NoSuf, { Imm8|Imm8S, Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
 vpbroadcast<bw>, 0x6678 | <bw:opc>, AVX2, Modrm|Vex|Space0F38|VexW0|NoSuf, { <bw:elem>|Unspecified|BaseIndex|RegXMM, RegXMM|RegYMM }
 vpbroadcastd, 0x6658, AVX2|AVX512F, Modrm|Vex|EVexDYN|Masking|Space0F38|VexW0|Disp8MemShift|NoSuf, { RegXMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
 vpbroadcastq, 0x6659, AVX2, Modrm|Vex|Space0F38|VexW0|NoSuf|Optimize, { RegXMM|Qword|Unspecified|BaseIndex, RegXMM|RegYMM }
-vperm2i128, 0x6646, AVX2, Modrm|Vex=2|Space0F3A|VexVVVV|VexW0|NoSuf, { Imm8|Imm8S, Unspecified|BaseIndex|RegYMM, RegYMM, RegYMM }
-vpermd, 0x6636, AVX2|AVX512F, Modrm|Vex256|EVexDYN|Masking|Space0F38|VexVVVV|VexW0|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegYMM|RegZMM, RegYMM|RegZMM }
+vperm2i128, 0x6646, AVX2, Modrm|Vex=2|Space0F3A|Src1VVVV|VexW0|NoSuf, { Imm8|Imm8S, Unspecified|BaseIndex|RegYMM, RegYMM, RegYMM }
+vpermd, 0x6636, AVX2|AVX512F, Modrm|Vex256|EVexDYN|Masking|Space0F38|Src1VVVV|VexW0|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegYMM|RegZMM, RegYMM|RegZMM }
 vpermpd, 0x6601, AVX2|AVX512F, Modrm|Vex256|EVexDYN|Masking|Space0F3A|VexW1|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8|Imm8S, RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegYMM|RegZMM }
-vpermps, 0x6616, AVX2|AVX512F, Modrm|Vex256|EVexDYN|Masking|Space0F38|VexVVVV|VexW0|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegYMM|RegZMM, RegYMM|RegZMM }
+vpermps, 0x6616, AVX2|AVX512F, Modrm|Vex256|EVexDYN|Masking|Space0F38|Src1VVVV|VexW0|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegYMM|RegZMM, RegYMM|RegZMM }
 vpermq, 0x6600, AVX2|AVX512F, Modrm|Vex256|EVexDYN|Masking|Space0F3A|VexW1|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8|Imm8S, RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegYMM|RegZMM }
 vextracti128, 0x6639, AVX2, Modrm|Vex256|Space0F3A|VexW0|NoSuf, { Imm8, RegYMM, Unspecified|BaseIndex|RegXMM }
 // vextracti32x4 in disguise (see vround{p,s}{s,d} comment)
 vextracti128, 0x6639, APX_F&AVX512VL, Modrm|EVex256|Space0F3A|VexW0|Disp8MemShift=4|NoSuf, { Imm8, RegYMM, Xmmword|Unspecified|BaseIndex }
-vinserti128, 0x6638, AVX2, Modrm|Vex256|Space0F3A|VexVVVV|VexW0|NoSuf, { Imm8, Unspecified|BaseIndex|RegXMM, RegYMM, RegYMM }
+vinserti128, 0x6638, AVX2, Modrm|Vex256|Space0F3A|Src1VVVV|VexW0|NoSuf, { Imm8, Unspecified|BaseIndex|RegXMM, RegYMM, RegYMM }
 // vinserti32x4 in disguise (see vround{p,s}{s,d} comment)
-vinserti128, 0x6638, APX_F&AVX512VL, Modrm|EVex256|Space0F3A|VexVVVV|VexW0|Disp8MemShift=4|NoSuf, { Imm8, Xmmword|Unspecified|BaseIndex, RegYMM, RegYMM }
-vpmaskmov<dq>, 0x668e, AVX2, Modrm|Vex|Space0F38|VexVVVV|<dq:vexw>|CheckOperandSize|NoSuf, { RegXMM|RegYMM, RegXMM|RegYMM, Xmmword|Ymmword|Unspecified|BaseIndex }
-vpmaskmov<dq>, 0x668c, AVX2, Modrm|Vex|Space0F38|VexVVVV|<dq:vexw>|CheckOperandSize|NoSuf, { Xmmword|Ymmword|Unspecified|BaseIndex, RegXMM|RegYMM, RegXMM|RegYMM }
-vpsllv<dq>, 0x6647, AVX2|AVX512F, Modrm|Vex|EVexDYN|Masking|Space0F38|VexVVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpsravd, 0x6646, AVX2|AVX512F, Modrm|Vex|EVexDYN|Masking|Space0F38|VexVVVV|VexW0|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpsrlv<dq>, 0x6645, AVX2|AVX512F, Modrm|Vex|EVexDYN|Masking|Space0F38|VexVVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vinserti128, 0x6638, APX_F&AVX512VL, Modrm|EVex256|Space0F3A|Src1VVVV|VexW0|Disp8MemShift=4|NoSuf, { Imm8, Xmmword|Unspecified|BaseIndex, RegYMM, RegYMM }
+vpmaskmov<dq>, 0x668e, AVX2, Modrm|Vex|Space0F38|Src1VVVV|<dq:vexw>|CheckOperandSize|NoSuf, { RegXMM|RegYMM, RegXMM|RegYMM, Xmmword|Ymmword|Unspecified|BaseIndex }
+vpmaskmov<dq>, 0x668c, AVX2, Modrm|Vex|Space0F38|Src1VVVV|<dq:vexw>|CheckOperandSize|NoSuf, { Xmmword|Ymmword|Unspecified|BaseIndex, RegXMM|RegYMM, RegXMM|RegYMM }
+vpsllv<dq>, 0x6647, AVX2|AVX512F, Modrm|Vex|EVexDYN|Masking|Space0F38|Src1VVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpsravd, 0x6646, AVX2|AVX512F, Modrm|Vex|EVexDYN|Masking|Space0F38|Src1VVVV|VexW0|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpsrlv<dq>, 0x6645, AVX2|AVX512F, Modrm|Vex|EVexDYN|Masking|Space0F38|Src1VVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
 
 // AVX gather instructions
-vgatherdpd, 0x6692, AVX2, Modrm|Vex|Space0F38|VexVVVV|VexW1|SwapSources|CheckOperandSize|NoSuf|VecSIB128, { RegXMM|RegYMM, Qword|Unspecified|BaseIndex, RegXMM|RegYMM }
-vgatherdps, 0x6692, AVX2, Modrm|Vex128|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf|VecSIB128, { RegXMM, Dword|Unspecified|BaseIndex, RegXMM }
-vgatherdps, 0x6692, AVX2, Modrm|Vex256|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf|VecSIB256, { RegYMM, Dword|Unspecified|BaseIndex, RegYMM }
-vgatherqp<sd>, 0x6693, AVX2, Modrm|Vex|Space0F38|VexVVVV|<sd:vexw>|SwapSources|NoSuf|VecSIB128, { RegXMM, <sd:elem>|Unspecified|BaseIndex, RegXMM }
-vgatherqpd, 0x6693, AVX2, Modrm|Vex256|Space0F38|VexVVVV|VexW1|SwapSources|NoSuf|VecSIB256, { RegYMM, Qword|Unspecified|BaseIndex, RegYMM }
-vgatherqps, 0x6693, AVX2, Modrm|Vex256|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf|VecSIB256, { RegXMM, Dword|Unspecified|BaseIndex, RegXMM }
-vpgatherdd, 0x6690, AVX2, Modrm|Vex128|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf|VecSIB128, { RegXMM, Dword|Unspecified|BaseIndex, RegXMM }
-vpgatherdd, 0x6690, AVX2, Modrm|Vex256|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf|VecSIB256, { RegYMM, Dword|Unspecified|BaseIndex, RegYMM }
-vpgatherdq, 0x6690, AVX2, Modrm|Vex|Space0F38|VexVVVV|VexW1|SwapSources|CheckOperandSize|NoSuf|VecSIB128, { RegXMM|RegYMM, Qword|Unspecified|BaseIndex, RegXMM|RegYMM }
-vpgatherq<dq>, 0x6691, AVX2, Modrm|Vex128|Space0F38|VexVVVV|<dq:vexw>|SwapSources|NoSuf|VecSIB128, { RegXMM, <dq:elem>|Unspecified|BaseIndex, RegXMM }
-vpgatherqd, 0x6691, AVX2, Modrm|Vex256|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf|VecSIB256, { RegXMM, Dword|Unspecified|BaseIndex, RegXMM }
-vpgatherqq, 0x6691, AVX2, Modrm|Vex256|Space0F38|VexVVVV|VexW1|SwapSources|NoSuf|VecSIB256, { RegYMM, Qword|Unspecified|BaseIndex, RegYMM }
+vgatherdpd, 0x6692, AVX2, Modrm|Vex|Space0F38|Src1VVVV|VexW1|SwapSources|CheckOperandSize|NoSuf|VecSIB128, { RegXMM|RegYMM, Qword|Unspecified|BaseIndex, RegXMM|RegYMM }
+vgatherdps, 0x6692, AVX2, Modrm|Vex128|Space0F38|Src1VVVV|VexW0|SwapSources|NoSuf|VecSIB128, { RegXMM, Dword|Unspecified|BaseIndex, RegXMM }
+vgatherdps, 0x6692, AVX2, Modrm|Vex256|Space0F38|Src1VVVV|VexW0|SwapSources|NoSuf|VecSIB256, { RegYMM, Dword|Unspecified|BaseIndex, RegYMM }
+vgatherqp<sd>, 0x6693, AVX2, Modrm|Vex|Space0F38|Src1VVVV|<sd:vexw>|SwapSources|NoSuf|VecSIB128, { RegXMM, <sd:elem>|Unspecified|BaseIndex, RegXMM }
+vgatherqpd, 0x6693, AVX2, Modrm|Vex256|Space0F38|Src1VVVV|VexW1|SwapSources|NoSuf|VecSIB256, { RegYMM, Qword|Unspecified|BaseIndex, RegYMM }
+vgatherqps, 0x6693, AVX2, Modrm|Vex256|Space0F38|Src1VVVV|VexW0|SwapSources|NoSuf|VecSIB256, { RegXMM, Dword|Unspecified|BaseIndex, RegXMM }
+vpgatherdd, 0x6690, AVX2, Modrm|Vex128|Space0F38|Src1VVVV|VexW0|SwapSources|NoSuf|VecSIB128, { RegXMM, Dword|Unspecified|BaseIndex, RegXMM }
+vpgatherdd, 0x6690, AVX2, Modrm|Vex256|Space0F38|Src1VVVV|VexW0|SwapSources|NoSuf|VecSIB256, { RegYMM, Dword|Unspecified|BaseIndex, RegYMM }
+vpgatherdq, 0x6690, AVX2, Modrm|Vex|Space0F38|Src1VVVV|VexW1|SwapSources|CheckOperandSize|NoSuf|VecSIB128, { RegXMM|RegYMM, Qword|Unspecified|BaseIndex, RegXMM|RegYMM }
+vpgatherq<dq>, 0x6691, AVX2, Modrm|Vex128|Space0F38|Src1VVVV|<dq:vexw>|SwapSources|NoSuf|VecSIB128, { RegXMM, <dq:elem>|Unspecified|BaseIndex, RegXMM }
+vpgatherqd, 0x6691, AVX2, Modrm|Vex256|Space0F38|Src1VVVV|VexW0|SwapSources|NoSuf|VecSIB256, { RegXMM, Dword|Unspecified|BaseIndex, RegXMM }
+vpgatherqq, 0x6691, AVX2, Modrm|Vex256|Space0F38|Src1VVVV|VexW1|SwapSources|NoSuf|VecSIB256, { RegYMM, Qword|Unspecified|BaseIndex, RegYMM }
 
 // AES + AVX
 
-vaesdec, 0x66de, AVX&AES, Modrm|Vex|Space0F38|VexVVVV|VexWIG|NoSuf, { Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM }
-vaesdeclast, 0x66df, AVX&AES, Modrm|Vex|Space0F38|VexVVVV|VexWIG|NoSuf, { Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM }
-vaesenc, 0x66dc, AVX&AES, Modrm|Vex|Space0F38|VexVVVV|VexWIG|NoSuf, { Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM }
-vaesenclast, 0x66dd, AVX&AES, Modrm|Vex|Space0F38|VexVVVV|VexWIG|NoSuf, { Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM }
+vaesdec, 0x66de, AVX&AES, Modrm|Vex|Space0F38|Src1VVVV|VexWIG|NoSuf, { Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM }
+vaesdeclast, 0x66df, AVX&AES, Modrm|Vex|Space0F38|Src1VVVV|VexWIG|NoSuf, { Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM }
+vaesenc, 0x66dc, AVX&AES, Modrm|Vex|Space0F38|Src1VVVV|VexWIG|NoSuf, { Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM }
+vaesenclast, 0x66dd, AVX&AES, Modrm|Vex|Space0F38|Src1VVVV|VexWIG|NoSuf, { Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM }
 vaesimc, 0x66db, AVX&AES, Modrm|Vex|Space0F38|VexWIG|NoSuf, { Unspecified|BaseIndex|RegXMM, RegXMM }
 vaeskeygenassist, 0x66df, AVX&AES, Modrm|Vex|Space0F3A|VexWIG|NoSuf, { Imm8, Unspecified|BaseIndex|RegXMM, RegXMM }
 
 // PCLMULQDQ + AVX
 
-vpclmulqdq, 0x6644, AVX&PCLMULQDQ, Modrm|Vex|Space0F3A|VexVVVV|VexWIG|NoSuf, { Imm8|Imm8S, Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM }
-vpclmullqlqdq, 0x6644/0x00, AVX&PCLMULQDQ, Modrm|Vex|Space0F3A|VexVVVV|VexWIG|NoSuf|ImmExt, { Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM }
-vpclmulhqlqdq, 0x6644/0x01, AVX&PCLMULQDQ, Modrm|Vex|Space0F3A|VexVVVV|VexWIG|NoSuf|ImmExt, { Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM }
-vpclmullqhqdq, 0x6644/0x10, AVX&PCLMULQDQ, Modrm|Vex|Space0F3A|VexVVVV|VexWIG|NoSuf|ImmExt, { Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM }
-vpclmulhqhqdq, 0x6644/0x11, AVX&PCLMULQDQ, Modrm|Vex|Space0F3A|VexVVVV|VexWIG|NoSuf|ImmExt, { Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM }
+vpclmulqdq, 0x6644, AVX&PCLMULQDQ, Modrm|Vex|Space0F3A|Src1VVVV|VexWIG|NoSuf, { Imm8|Imm8S, Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM }
+vpclmullqlqdq, 0x6644/0x00, AVX&PCLMULQDQ, Modrm|Vex|Space0F3A|Src1VVVV|VexWIG|NoSuf|ImmExt, { Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM }
+vpclmulhqlqdq, 0x6644/0x01, AVX&PCLMULQDQ, Modrm|Vex|Space0F3A|Src1VVVV|VexWIG|NoSuf|ImmExt, { Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM }
+vpclmullqhqdq, 0x6644/0x10, AVX&PCLMULQDQ, Modrm|Vex|Space0F3A|Src1VVVV|VexWIG|NoSuf|ImmExt, { Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM }
+vpclmulhqhqdq, 0x6644/0x11, AVX&PCLMULQDQ, Modrm|Vex|Space0F3A|Src1VVVV|VexWIG|NoSuf|ImmExt, { Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM }
 
 // GFNI + AVX
 
-vgf2p8affineinvqb, 0x66cf, AVX&GFNI, Modrm|Vex|Space0F3A|VexVVVV|VexW1|CheckOperandSize|NoSuf, { Imm8, Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vgf2p8affineqb, 0x66ce, AVX&GFNI, Modrm|Vex|Space0F3A|VexVVVV|VexW1|CheckOperandSize|NoSuf, { Imm8, Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vgf2p8mulb, 0x66cf, GFNI&(AVX|AVX512F), Modrm|Vex|EVexDYN|Masking|Space0F38|VexVVVV|VexW0|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vgf2p8affineinvqb, 0x66cf, AVX&GFNI, Modrm|Vex|Space0F3A|Src1VVVV|VexW1|CheckOperandSize|NoSuf, { Imm8, Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vgf2p8affineqb, 0x66ce, AVX&GFNI, Modrm|Vex|Space0F3A|Src1VVVV|VexW1|CheckOperandSize|NoSuf, { Imm8, Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vgf2p8mulb, 0x66cf, GFNI&(AVX|AVX512F), Modrm|Vex|EVexDYN|Masking|Space0F38|Src1VVVV|VexW0|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
 
 // FSGSBASE, RDRND and F16C
 
@@ -1853,16 +1855,16 @@ vcvtps2ph, 0x661d, F16C, Modrm|Vex=2|Space0F3A|VexW=1|NoSuf, { Imm8, RegYMM, Uns
     d:AVX512F:AVX512DQ:FMA|AVX512F:66:f2:66:Space0F:Space0F38:1:Vex|EVexDYN:VexLIG|EVexLIG:VexW1:Qword, +
     h:AVX512_FP16:AVX512_FP16:AVX512_FP16::f3::EVexMap5:EVexMap6:0::EVexLIG:VexW0:Word>
 
-vfmadd<fma>p<sdh>, 0x6688 | 0x<fma:opc>, <sdh:fma>, Modrm|<sdh:vex>|Masking|<sdh:spc2>|VexVVVV|<sdh:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf|StaticRounding|SAE, { RegXMM|RegYMM|RegZMM|<sdh:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vfmadd<fma>s<sdh>, 0x6689 | 0x<fma:opc>, <sdh:fma>, Modrm|<sdh:vexlig>|Masking|<sdh:spc2>|VexVVVV|<sdh:vexw>|Disp8MemShift|NoSuf|StaticRounding|SAE, { RegXMM|<sdh:elem>|Unspecified|BaseIndex, RegXMM, RegXMM }
-vfmaddsub<fma>p<sdh>, 0x6686 | 0x<fma:opc>, <sdh:fma>, Modrm|<sdh:vex>|Masking|<sdh:spc2>|VexVVVV|<sdh:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf|StaticRounding|SAE, { RegXMM|RegYMM|RegZMM|<sdh:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vfmsub<fma>p<sdh>, 0x668a | 0x<fma:opc>, <sdh:fma>, Modrm|<sdh:vex>|Masking|<sdh:spc2>|VexVVVV|<sdh:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf|StaticRounding|SAE, { RegXMM|RegYMM|RegZMM|<sdh:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vfmsub<fma>s<sdh>, 0x668b | 0x<fma:opc>, <sdh:fma>, Modrm|<sdh:vexlig>|Masking|<sdh:spc2>|VexVVVV|<sdh:vexw>|Disp8MemShift|NoSuf|StaticRounding|SAE, { RegXMM|<sdh:elem>|Unspecified|BaseIndex, RegXMM, RegXMM }
-vfmsubadd<fma>p<sdh>, 0x6687 | 0x<fma:opc>, <sdh:fma>, Modrm|<sdh:vex>|Masking|<sdh:spc2>|VexVVVV|<sdh:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf|StaticRounding|SAE, { RegXMM|RegYMM|RegZMM|<sdh:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vfnmadd<fma>p<sdh>, 0x668c | 0x<fma:opc>, <sdh:fma>, Modrm|<sdh:vex>|Masking|<sdh:spc2>|VexVVVV|<sdh:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf|StaticRounding|SAE, { RegXMM|RegYMM|RegZMM|<sdh:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vfnmadd<fma>s<sdh>, 0x668d | 0x<fma:opc>, <sdh:fma>, Modrm|<sdh:vexlig>|Masking|<sdh:spc2>|VexVVVV|<sdh:vexw>|Disp8MemShift|NoSuf|StaticRounding|SAE, { RegXMM|<sdh:elem>|Unspecified|BaseIndex, RegXMM, RegXMM }
-vfnmsub<fma>p<sdh>, 0x668e | 0x<fma:opc>, <sdh:fma>, Modrm|<sdh:vex>|Masking|<sdh:spc2>|VexVVVV|<sdh:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf|StaticRounding|SAE, { RegXMM|RegYMM|RegZMM|<sdh:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vfnmsub<fma>s<sdh>, 0x668f | 0x<fma:opc>, <sdh:fma>, Modrm|<sdh:vexlig>|Masking|<sdh:spc2>|VexVVVV|<sdh:vexw>|Disp8MemShift|NoSuf|StaticRounding|SAE, { RegXMM|<sdh:elem>|Unspecified|BaseIndex, RegXMM, RegXMM }
+vfmadd<fma>p<sdh>, 0x6688 | 0x<fma:opc>, <sdh:fma>, Modrm|<sdh:vex>|Masking|<sdh:spc2>|Src1VVVV|<sdh:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf|StaticRounding|SAE, { RegXMM|RegYMM|RegZMM|<sdh:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vfmadd<fma>s<sdh>, 0x6689 | 0x<fma:opc>, <sdh:fma>, Modrm|<sdh:vexlig>|Masking|<sdh:spc2>|Src1VVVV|<sdh:vexw>|Disp8MemShift|NoSuf|StaticRounding|SAE, { RegXMM|<sdh:elem>|Unspecified|BaseIndex, RegXMM, RegXMM }
+vfmaddsub<fma>p<sdh>, 0x6686 | 0x<fma:opc>, <sdh:fma>, Modrm|<sdh:vex>|Masking|<sdh:spc2>|Src1VVVV|<sdh:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf|StaticRounding|SAE, { RegXMM|RegYMM|RegZMM|<sdh:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vfmsub<fma>p<sdh>, 0x668a | 0x<fma:opc>, <sdh:fma>, Modrm|<sdh:vex>|Masking|<sdh:spc2>|Src1VVVV|<sdh:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf|StaticRounding|SAE, { RegXMM|RegYMM|RegZMM|<sdh:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vfmsub<fma>s<sdh>, 0x668b | 0x<fma:opc>, <sdh:fma>, Modrm|<sdh:vexlig>|Masking|<sdh:spc2>|Src1VVVV|<sdh:vexw>|Disp8MemShift|NoSuf|StaticRounding|SAE, { RegXMM|<sdh:elem>|Unspecified|BaseIndex, RegXMM, RegXMM }
+vfmsubadd<fma>p<sdh>, 0x6687 | 0x<fma:opc>, <sdh:fma>, Modrm|<sdh:vex>|Masking|<sdh:spc2>|Src1VVVV|<sdh:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf|StaticRounding|SAE, { RegXMM|RegYMM|RegZMM|<sdh:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vfnmadd<fma>p<sdh>, 0x668c | 0x<fma:opc>, <sdh:fma>, Modrm|<sdh:vex>|Masking|<sdh:spc2>|Src1VVVV|<sdh:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf|StaticRounding|SAE, { RegXMM|RegYMM|RegZMM|<sdh:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vfnmadd<fma>s<sdh>, 0x668d | 0x<fma:opc>, <sdh:fma>, Modrm|<sdh:vexlig>|Masking|<sdh:spc2>|Src1VVVV|<sdh:vexw>|Disp8MemShift|NoSuf|StaticRounding|SAE, { RegXMM|<sdh:elem>|Unspecified|BaseIndex, RegXMM, RegXMM }
+vfnmsub<fma>p<sdh>, 0x668e | 0x<fma:opc>, <sdh:fma>, Modrm|<sdh:vex>|Masking|<sdh:spc2>|Src1VVVV|<sdh:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf|StaticRounding|SAE, { RegXMM|RegYMM|RegZMM|<sdh:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vfnmsub<fma>s<sdh>, 0x668f | 0x<fma:opc>, <sdh:fma>, Modrm|<sdh:vexlig>|Masking|<sdh:spc2>|Src1VVVV|<sdh:vexw>|Disp8MemShift|NoSuf|StaticRounding|SAE, { RegXMM|<sdh:elem>|Unspecified|BaseIndex, RegXMM, RegXMM }
 
 // HLE prefixes
 
@@ -1877,27 +1879,27 @@ xtest, 0xf01d6, HLE|RTM, NoSuf, {}
 
 // BMI2 instructions.
 
-bzhi, 0xf5, APX_F(BMI2), Modrm|CheckOperandSize|Vex128|EVex128|Space0F38|VexVVVV|SwapSources|No_bSuf|No_wSuf|No_sSuf|NF, { Reg32|Reg64, Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
-mulx, 0xf2f6, APX_F(BMI2), Modrm|CheckOperandSize|Vex128|EVex128|Space0F38|VexVVVV|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64, Reg32|Reg64 }
-pdep, 0xf2f5, APX_F(BMI2), Modrm|CheckOperandSize|Vex128|EVex128|Space0F38|VexVVVV|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64, Reg32|Reg64 }
-pext, 0xf3f5, APX_F(BMI2), Modrm|CheckOperandSize|Vex128|EVex128|Space0F38|VexVVVV|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64, Reg32|Reg64 }
+bzhi, 0xf5, APX_F(BMI2), Modrm|CheckOperandSize|Vex128|EVex128|Space0F38|Src1VVVV|SwapSources|No_bSuf|No_wSuf|No_sSuf|NF, { Reg32|Reg64, Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
+mulx, 0xf2f6, APX_F(BMI2), Modrm|CheckOperandSize|Vex128|EVex128|Space0F38|Src1VVVV|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64, Reg32|Reg64 }
+pdep, 0xf2f5, APX_F(BMI2), Modrm|CheckOperandSize|Vex128|EVex128|Space0F38|Src1VVVV|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64, Reg32|Reg64 }
+pext, 0xf3f5, APX_F(BMI2), Modrm|CheckOperandSize|Vex128|EVex128|Space0F38|Src1VVVV|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64, Reg32|Reg64 }
 rorx, 0xf2f0, APX_F(BMI2), Modrm|CheckOperandSize|Vex128|EVex128|Space0F3A|No_bSuf|No_wSuf|No_sSuf, { Imm8|Imm8S, Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
-sarx, 0xf3f7, APX_F(BMI2), Modrm|CheckOperandSize|Vex128|EVex128|Space0F38|VexVVVV|SwapSources|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64, Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
-shlx, 0x66f7, APX_F(BMI2), Modrm|CheckOperandSize|Vex128|EVex128|Space0F38|VexVVVV|SwapSources|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64, Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
-shrx, 0xf2f7, APX_F(BMI2), Modrm|CheckOperandSize|Vex128|EVex128|Space0F38|VexVVVV|SwapSources|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64, Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
+sarx, 0xf3f7, APX_F(BMI2), Modrm|CheckOperandSize|Vex128|EVex128|Space0F38|Src1VVVV|SwapSources|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64, Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
+shlx, 0x66f7, APX_F(BMI2), Modrm|CheckOperandSize|Vex128|EVex128|Space0F38|Src1VVVV|SwapSources|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64, Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
+shrx, 0xf2f7, APX_F(BMI2), Modrm|CheckOperandSize|Vex128|EVex128|Space0F38|Src1VVVV|SwapSources|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64, Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
 
 // FMA4 instructions
 
-vfmaddp<sd>, 0x6668 | <sd:opc>, FMA4, D|Modrm|Vex|Space0F3A|VexVVVV|VexW1|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vfmadds<sd>, 0x666a | <sd:opc>, FMA4, D|Modrm|VexLIG|Space0F3A|VexVVVV|VexW1|NoSuf, { <sd:elem>|Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM, RegXMM }
-vfmaddsubp<sd>, 0x665c | <sd:opc>, FMA4, D|Modrm|Vex|Space0F3A|VexVVVV|VexW1|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vfmsubaddp<sd>, 0x665e | <sd:opc>, FMA4, D|Modrm|Vex|Space0F3A|VexVVVV|VexW1|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vfmsubp<sd>, 0x666c | <sd:opc>, FMA4, D|Modrm|Vex|Space0F3A|VexVVVV|VexW1|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vfmsubs<sd>, 0x666e | <sd:opc>, FMA4, D|Modrm|VexLIG|Space0F3A|VexVVVV|VexW1|NoSuf, { <sd:elem>|Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM, RegXMM }
-vfnmaddp<sd>, 0x6678 | <sd:opc>, FMA4, D|Modrm|Vex|Space0F3A|VexVVVV|VexW1|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vfnmadds<sd>, 0x667a | <sd:opc>, FMA4, D|Modrm|VexLIG|Space0F3A|VexVVVV|VexW1|NoSuf, { <sd:elem>|Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM, RegXMM }
-vfnmsubp<sd>, 0x667c | <sd:opc>, FMA4, D|Modrm|Vex|Space0F3A|VexVVVV|VexW1|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vfnmsubs<sd>, 0x667e | <sd:opc>, FMA4, D|Modrm|VexLIG|Space0F3A|VexVVVV|VexW1|NoSuf, { <sd:elem>|Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM, RegXMM }
+vfmaddp<sd>, 0x6668 | <sd:opc>, FMA4, D|Modrm|Vex|Space0F3A|Src1VVVV|VexW1|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vfmadds<sd>, 0x666a | <sd:opc>, FMA4, D|Modrm|VexLIG|Space0F3A|Src1VVVV|VexW1|NoSuf, { <sd:elem>|Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM, RegXMM }
+vfmaddsubp<sd>, 0x665c | <sd:opc>, FMA4, D|Modrm|Vex|Space0F3A|Src1VVVV|VexW1|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vfmsubaddp<sd>, 0x665e | <sd:opc>, FMA4, D|Modrm|Vex|Space0F3A|Src1VVVV|VexW1|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vfmsubp<sd>, 0x666c | <sd:opc>, FMA4, D|Modrm|Vex|Space0F3A|Src1VVVV|VexW1|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vfmsubs<sd>, 0x666e | <sd:opc>, FMA4, D|Modrm|VexLIG|Space0F3A|Src1VVVV|VexW1|NoSuf, { <sd:elem>|Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM, RegXMM }
+vfnmaddp<sd>, 0x6678 | <sd:opc>, FMA4, D|Modrm|Vex|Space0F3A|Src1VVVV|VexW1|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vfnmadds<sd>, 0x667a | <sd:opc>, FMA4, D|Modrm|VexLIG|Space0F3A|Src1VVVV|VexW1|NoSuf, { <sd:elem>|Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM, RegXMM }
+vfnmsubp<sd>, 0x667c | <sd:opc>, FMA4, D|Modrm|Vex|Space0F3A|Src1VVVV|VexW1|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vfnmsubs<sd>, 0x667e | <sd:opc>, FMA4, D|Modrm|VexLIG|Space0F3A|Src1VVVV|VexW1|NoSuf, { <sd:elem>|Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM, RegXMM }
 
 // XOP instructions
 
@@ -1907,11 +1909,11 @@ vfnmsubs<sd>, 0x667e | <sd:opc>, FMA4, D|Modrm|VexLIG|Space0F3A|VexVVVV|VexW1|No
 
 vfrczp<sd>, 0x80 | <sd:opc>, XOP, Modrm|SpaceXOP09|VexW0|CheckOperandSize|NoSuf|Vex, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM }
 vfrczs<sd>, 0x82 | <sd:opc>, XOP, Modrm|SpaceXOP09|VexW0|NoSuf|Vex, { <sd:elem>|RegXMM|Unspecified|BaseIndex, RegXMM }
-vpcmov, 0xa2, XOP, D|Modrm|Vex|SpaceXOP08|VexVVVV|VexW0|CheckOperandSize|NoSuf, { RegXMM|RegYMM, Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpcom<sign><xop>, 0xcc | 0x<sign:opc> | <xop:opc>, XOP, Modrm|Vex128|SpaceXOP08|VexVVVV|VexW0|NoSuf, { Imm8, RegXMM|Unspecified|BaseIndex, RegXMM, RegXMM }
-vpcom<irel><sign><xop>, 0xcc | 0x<sign:opc> | <xop:opc>/<irel:imm>, XOP, Modrm|Vex128|SpaceXOP08|VexVVVV|VexW0|NoSuf|ImmExt, { RegXMM|Unspecified|BaseIndex, RegXMM, RegXMM }
-vpermil2p<sd>, 0x6648 | <sd:opc>, XOP, Modrm|Vex|Space0F3A|VexVVVV|VexW0|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM, Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpermil2p<sd>, 0x6648 | <sd:opc>, XOP, Modrm|Vex|Space0F3A|VexVVVV|VexW1|CheckOperandSize|NoSuf, { Imm8, Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpcmov, 0xa2, XOP, D|Modrm|Vex|SpaceXOP08|Src1VVVV|VexW0|CheckOperandSize|NoSuf, { RegXMM|RegYMM, Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpcom<sign><xop>, 0xcc | 0x<sign:opc> | <xop:opc>, XOP, Modrm|Vex128|SpaceXOP08|Src1VVVV|VexW0|NoSuf, { Imm8, RegXMM|Unspecified|BaseIndex, RegXMM, RegXMM }
+vpcom<irel><sign><xop>, 0xcc | 0x<sign:opc> | <xop:opc>/<irel:imm>, XOP, Modrm|Vex128|SpaceXOP08|Src1VVVV|VexW0|NoSuf|ImmExt, { RegXMM|Unspecified|BaseIndex, RegXMM, RegXMM }
+vpermil2p<sd>, 0x6648 | <sd:opc>, XOP, Modrm|Vex|Space0F3A|Src1VVVV|VexW0|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM, Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpermil2p<sd>, 0x6648 | <sd:opc>, XOP, Modrm|Vex|Space0F3A|Src1VVVV|VexW1|CheckOperandSize|NoSuf, { Imm8, Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
 vphaddb<dq>, 0xc2 | <dq:opc>, XOP, Modrm|SpaceXOP09|VexW0|NoSuf|Vex, { RegXMM|Unspecified|BaseIndex, RegXMM }
 vphaddbw, 0xc1, XOP, Modrm|SpaceXOP09|VexW0|NoSuf|Vex, { RegXMM|Unspecified|BaseIndex, RegXMM }
 vphadddq, 0xcb, XOP, Modrm|SpaceXOP09|VexW0|NoSuf|Vex, { RegXMM|Unspecified|BaseIndex, RegXMM }
@@ -1923,23 +1925,23 @@ vphaddw<dq>, 0xc6 | <dq:opc>, XOP, Modrm|SpaceXOP09|VexW0|NoSuf|Vex, { RegXMM|Un
 vphsubbw, 0xe1, XOP, Modrm|SpaceXOP09|VexW0|NoSuf|Vex, { RegXMM|Unspecified|BaseIndex, RegXMM }
 vphsubdq, 0xe3, XOP, Modrm|SpaceXOP09|VexW0|NoSuf|Vex, { RegXMM|Unspecified|BaseIndex, RegXMM }
 vphsubwd, 0xe2, XOP, Modrm|SpaceXOP09|VexW0|NoSuf|Vex, { RegXMM|Unspecified|BaseIndex, RegXMM }
-vpmacsdd, 0x9e, XOP, Modrm|Vex128|SpaceXOP08|VexVVVV|VexW0|NoSuf, { RegXMM, RegXMM|Unspecified|BaseIndex, RegXMM, RegXMM }
-vpmacsdqh, 0x9f, XOP, Modrm|Vex128|SpaceXOP08|VexVVVV|VexW0|NoSuf, { RegXMM, RegXMM|Unspecified|BaseIndex, RegXMM, RegXMM }
-vpmacsdql, 0x97, XOP, Modrm|Vex128|SpaceXOP08|VexVVVV|VexW0|NoSuf, { RegXMM, RegXMM|Unspecified|BaseIndex, RegXMM, RegXMM }
-vpmacssdd, 0x8e, XOP, Modrm|Vex128|SpaceXOP08|VexVVVV|VexW0|NoSuf, { RegXMM, RegXMM|Unspecified|BaseIndex, RegXMM, RegXMM }
-vpmacssdqh, 0x8f, XOP, Modrm|Vex128|SpaceXOP08|VexVVVV|VexW0|NoSuf, { RegXMM, RegXMM|Unspecified|BaseIndex, RegXMM, RegXMM }
-vpmacssdql, 0x87, XOP, Modrm|Vex128|SpaceXOP08|VexVVVV|VexW0|NoSuf, { RegXMM, RegXMM|Unspecified|BaseIndex, RegXMM, RegXMM }
-vpmacsswd, 0x86, XOP, Modrm|Vex128|SpaceXOP08|VexVVVV|VexW0|NoSuf, { RegXMM, RegXMM|Unspecified|BaseIndex, RegXMM, RegXMM }
-vpmacssww, 0x85, XOP, Modrm|Vex128|SpaceXOP08|VexVVVV|VexW0|NoSuf, { RegXMM, RegXMM|Unspecified|BaseIndex, RegXMM, RegXMM }
-vpmacswd, 0x96, XOP, Modrm|Vex128|SpaceXOP08|VexVVVV|VexW0|NoSuf, { RegXMM, RegXMM|Unspecified|BaseIndex, RegXMM, RegXMM }
-vpmacsww, 0x95, XOP, Modrm|Vex128|SpaceXOP08|VexVVVV|VexW0|NoSuf, { RegXMM, RegXMM|Unspecified|BaseIndex, RegXMM, RegXMM }
-vpmadcsswd, 0xa6, XOP, Modrm|Vex128|SpaceXOP08|VexVVVV|VexW0|NoSuf, { RegXMM, RegXMM|Unspecified|BaseIndex, RegXMM, RegXMM }
-vpmadcswd, 0xb6, XOP, Modrm|Vex128|SpaceXOP08|VexVVVV|VexW0|NoSuf, { RegXMM, RegXMM|Unspecified|BaseIndex, RegXMM, RegXMM }
-vpperm, 0xa3, XOP, D|Modrm|Vex128|SpaceXOP08|VexVVVV|VexW0|NoSuf, { RegXMM, RegXMM|Unspecified|BaseIndex, RegXMM, RegXMM }
-vprot<xop>, 0x90 | <xop:opc>, XOP, D|Modrm|Vex128|SpaceXOP09|VexVVVV|SwapSources|VexW0|NoSuf, { RegXMM, RegXMM|Unspecified|BaseIndex, RegXMM }
+vpmacsdd, 0x9e, XOP, Modrm|Vex128|SpaceXOP08|Src1VVVV|VexW0|NoSuf, { RegXMM, RegXMM|Unspecified|BaseIndex, RegXMM, RegXMM }
+vpmacsdqh, 0x9f, XOP, Modrm|Vex128|SpaceXOP08|Src1VVVV|VexW0|NoSuf, { RegXMM, RegXMM|Unspecified|BaseIndex, RegXMM, RegXMM }
+vpmacsdql, 0x97, XOP, Modrm|Vex128|SpaceXOP08|Src1VVVV|VexW0|NoSuf, { RegXMM, RegXMM|Unspecified|BaseIndex, RegXMM, RegXMM }
+vpmacssdd, 0x8e, XOP, Modrm|Vex128|SpaceXOP08|Src1VVVV|VexW0|NoSuf, { RegXMM, RegXMM|Unspecified|BaseIndex, RegXMM, RegXMM }
+vpmacssdqh, 0x8f, XOP, Modrm|Vex128|SpaceXOP08|Src1VVVV|VexW0|NoSuf, { RegXMM, RegXMM|Unspecified|BaseIndex, RegXMM, RegXMM }
+vpmacssdql, 0x87, XOP, Modrm|Vex128|SpaceXOP08|Src1VVVV|VexW0|NoSuf, { RegXMM, RegXMM|Unspecified|BaseIndex, RegXMM, RegXMM }
+vpmacsswd, 0x86, XOP, Modrm|Vex128|SpaceXOP08|Src1VVVV|VexW0|NoSuf, { RegXMM, RegXMM|Unspecified|BaseIndex, RegXMM, RegXMM }
+vpmacssww, 0x85, XOP, Modrm|Vex128|SpaceXOP08|Src1VVVV|VexW0|NoSuf, { RegXMM, RegXMM|Unspecified|BaseIndex, RegXMM, RegXMM }
+vpmacswd, 0x96, XOP, Modrm|Vex128|SpaceXOP08|Src1VVVV|VexW0|NoSuf, { RegXMM, RegXMM|Unspecified|BaseIndex, RegXMM, RegXMM }
+vpmacsww, 0x95, XOP, Modrm|Vex128|SpaceXOP08|Src1VVVV|VexW0|NoSuf, { RegXMM, RegXMM|Unspecified|BaseIndex, RegXMM, RegXMM }
+vpmadcsswd, 0xa6, XOP, Modrm|Vex128|SpaceXOP08|Src1VVVV|VexW0|NoSuf, { RegXMM, RegXMM|Unspecified|BaseIndex, RegXMM, RegXMM }
+vpmadcswd, 0xb6, XOP, Modrm|Vex128|SpaceXOP08|Src1VVVV|VexW0|NoSuf, { RegXMM, RegXMM|Unspecified|BaseIndex, RegXMM, RegXMM }
+vpperm, 0xa3, XOP, D|Modrm|Vex128|SpaceXOP08|Src1VVVV|VexW0|NoSuf, { RegXMM, RegXMM|Unspecified|BaseIndex, RegXMM, RegXMM }
+vprot<xop>, 0x90 | <xop:opc>, XOP, D|Modrm|Vex128|SpaceXOP09|Src1VVVV|SwapSources|VexW0|NoSuf, { RegXMM, RegXMM|Unspecified|BaseIndex, RegXMM }
 vprot<xop>, 0xc0 | <xop:opc>, XOP, Modrm|Vex128|SpaceXOP08|VexW0|NoSuf, { Imm8|Imm8S, RegXMM|Unspecified|BaseIndex, RegXMM }
-vpsha<xop>, 0x98 | <xop:opc>, XOP, D|Modrm|Vex128|SpaceXOP09|VexVVVV|SwapSources|VexW0|NoSuf, { RegXMM, RegXMM|Unspecified|BaseIndex, RegXMM }
-vpshl<xop>, 0x94 | <xop:opc>, XOP, D|Modrm|Vex128|SpaceXOP09|VexVVVV|SwapSources|VexW0|NoSuf, { RegXMM, RegXMM|Unspecified|BaseIndex, RegXMM }
+vpsha<xop>, 0x98 | <xop:opc>, XOP, D|Modrm|Vex128|SpaceXOP09|Src1VVVV|SwapSources|VexW0|NoSuf, { RegXMM, RegXMM|Unspecified|BaseIndex, RegXMM }
+vpshl<xop>, 0x94 | <xop:opc>, XOP, D|Modrm|Vex128|SpaceXOP09|Src1VVVV|SwapSources|VexW0|NoSuf, { RegXMM, RegXMM|Unspecified|BaseIndex, RegXMM }
 
 <xop>
 <irel>
@@ -1949,31 +1951,31 @@ vpshl<xop>, 0x94 | <xop:opc>, XOP, D|Modrm|Vex128|SpaceXOP09|VexVVVV|SwapSources
 
 llwpcb, 0x12/0, LWP, Modrm|SpaceXOP09|NoSuf|Vex, { Reg32|Reg64 }
 slwpcb, 0x12/1, LWP, Modrm|SpaceXOP09|NoSuf|Vex, { Reg32|Reg64 }
-lwpval, 0x12/1, LWP, Modrm|SpaceXOP0A|NoSuf|VexVVVV|Vex, { Imm32|Imm32S, Reg32|Unspecified|BaseIndex, Reg32|Reg64 }
-lwpins, 0x12/0, LWP, Modrm|SpaceXOP0A|NoSuf|VexVVVV|Vex, { Imm32|Imm32S, Reg32|Unspecified|BaseIndex, Reg32|Reg64 }
+lwpval, 0x12/1, LWP, Modrm|SpaceXOP0A|NoSuf|Src1VVVV|Vex, { Imm32|Imm32S, Reg32|Unspecified|BaseIndex, Reg32|Reg64 }
+lwpins, 0x12/0, LWP, Modrm|SpaceXOP0A|NoSuf|Src1VVVV|Vex, { Imm32|Imm32S, Reg32|Unspecified|BaseIndex, Reg32|Reg64 }
 
 // BMI instructions
 
-andn, 0xf2, APX_F(BMI), Modrm|CheckOperandSize|Vex128|EVex128|Space0F38|VexVVVV|No_bSuf|No_wSuf|No_sSuf|NF, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64, Reg32|Reg64 }
-bextr, 0xf7, APX_F(BMI), Modrm|CheckOperandSize|Vex128|EVex128|Space0F38|VexVVVV|SwapSources|No_bSuf|No_wSuf|No_sSuf|NF, { Reg32|Reg64, Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
-blsi, 0xf3/3, APX_F(BMI), Modrm|CheckOperandSize|Vex128|EVex128|Space0F38|VexVVVV|No_bSuf|No_wSuf|No_sSuf|NF, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
-blsmsk, 0xf3/2, APX_F(BMI), Modrm|CheckOperandSize|Vex128|EVex128|Space0F38|VexVVVV|No_bSuf|No_wSuf|No_sSuf|NF, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
-blsr, 0xf3/1, APX_F(BMI), Modrm|CheckOperandSize|Vex128|EVex128|Space0F38|VexVVVV|No_bSuf|No_wSuf|No_sSuf|NF, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
+andn, 0xf2, APX_F(BMI), Modrm|CheckOperandSize|Vex128|EVex128|Space0F38|Src1VVVV|No_bSuf|No_wSuf|No_sSuf|NF, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64, Reg32|Reg64 }
+bextr, 0xf7, APX_F(BMI), Modrm|CheckOperandSize|Vex128|EVex128|Space0F38|Src1VVVV|SwapSources|No_bSuf|No_wSuf|No_sSuf|NF, { Reg32|Reg64, Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
+blsi, 0xf3/3, APX_F(BMI), Modrm|CheckOperandSize|Vex128|EVex128|Space0F38|Src1VVVV|No_bSuf|No_wSuf|No_sSuf|NF, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
+blsmsk, 0xf3/2, APX_F(BMI), Modrm|CheckOperandSize|Vex128|EVex128|Space0F38|Src1VVVV|No_bSuf|No_wSuf|No_sSuf|NF, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
+blsr, 0xf3/1, APX_F(BMI), Modrm|CheckOperandSize|Vex128|EVex128|Space0F38|Src1VVVV|No_bSuf|No_wSuf|No_sSuf|NF, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
 tzcnt, 0xf30fbc, BMI, Modrm|CheckOperandSize|No_bSuf|No_sSuf, { Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
 tzcnt, 0xf4, BMI&APX_F, Modrm|CheckOperandSize|No_bSuf|No_sSuf|EVexMap4|NF, { Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
 
 // TBM instructions
 
 bextr, 0x10, TBM, Modrm|CheckOperandSize|Vex128|SpaceXOP0A|No_bSuf|No_wSuf|No_sSuf, { Imm32|Imm32S, Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
-blcfill, 0x01/1, TBM, Modrm|CheckOperandSize|Vex128|SpaceXOP09|VexVVVV|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
-blci, 0x02/6, TBM, Modrm|CheckOperandSize|Vex128|SpaceXOP09|VexVVVV|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
-blcic, 0x01/5, TBM, Modrm|CheckOperandSize|Vex128|SpaceXOP09|VexVVVV|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
-blcmsk, 0x02/1, TBM, Modrm|CheckOperandSize|Vex128|SpaceXOP09|VexVVVV|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
-blcs, 0x01/3, TBM, Modrm|CheckOperandSize|Vex128|SpaceXOP09|VexVVVV|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
-blsfill, 0x01/2, TBM, Modrm|CheckOperandSize|Vex128|SpaceXOP09|VexVVVV|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
-blsic, 0x01/6, TBM, Modrm|CheckOperandSize|Vex128|SpaceXOP09|VexVVVV|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
-t1mskc, 0x01/7, TBM, Modrm|CheckOperandSize|Vex128|SpaceXOP09|VexVVVV|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
-tzmsk, 0x01/4, TBM, Modrm|CheckOperandSize|Vex128|SpaceXOP09|VexVVVV|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
+blcfill, 0x01/1, TBM, Modrm|CheckOperandSize|Vex128|SpaceXOP09|Src1VVVV|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
+blci, 0x02/6, TBM, Modrm|CheckOperandSize|Vex128|SpaceXOP09|Src1VVVV|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
+blcic, 0x01/5, TBM, Modrm|CheckOperandSize|Vex128|SpaceXOP09|Src1VVVV|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
+blcmsk, 0x02/1, TBM, Modrm|CheckOperandSize|Vex128|SpaceXOP09|Src1VVVV|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
+blcs, 0x01/3, TBM, Modrm|CheckOperandSize|Vex128|SpaceXOP09|Src1VVVV|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
+blsfill, 0x01/2, TBM, Modrm|CheckOperandSize|Vex128|SpaceXOP09|Src1VVVV|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
+blsic, 0x01/6, TBM, Modrm|CheckOperandSize|Vex128|SpaceXOP09|Src1VVVV|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
+t1mskc, 0x01/7, TBM, Modrm|CheckOperandSize|Vex128|SpaceXOP09|Src1VVVV|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
+tzmsk, 0x01/4, TBM, Modrm|CheckOperandSize|Vex128|SpaceXOP09|Src1VVVV|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
 
 // AMD 3DNow! instructions.
 
@@ -2103,42 +2105,42 @@ sha256msg2, 0xf38cd, SHA, Modrm|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
 
 // SHA512 instructions.
 
-vsha512rnds2, 0xf2cb, SHA512, Modrm|Vex256|Space0F38|VexVVVV|VexW0|NoSuf, { RegXMM, RegYMM, RegYMM }
+vsha512rnds2, 0xf2cb, SHA512, Modrm|Vex256|Space0F38|Src1VVVV|VexW0|NoSuf, { RegXMM, RegYMM, RegYMM }
 vsha512msg1, 0xf2cc, SHA512, Modrm|Vex256|Space0F38|VexW0|NoSuf, { RegXMM, RegYMM }
 vsha512msg2, 0xf2cd, SHA512, Modrm|Vex256|Space0F38|VexW0|NoSuf, { RegYMM, RegYMM }
 
 // SHA512 instructions end.
 
 // SM3 instructions.
-vsm3rnds2, 0x66de, SM3, Modrm|Space0F3A|Vex128|VexVVVV|VexW0|NoSuf, { Imm8, RegXMM|Unspecified|BaseIndex, RegXMM, RegXMM }
-vsm3msg1, 0xda, SM3, Modrm|Space0F38|Vex128|VexVVVV|VexW0|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM, RegXMM }
-vsm3msg2, 0x66da, SM3, Modrm|Space0F38|Vex128|VexVVVV|VexW0|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM, RegXMM }
+vsm3rnds2, 0x66de, SM3, Modrm|Space0F3A|Vex128|Src1VVVV|VexW0|NoSuf, { Imm8, RegXMM|Unspecified|BaseIndex, RegXMM, RegXMM }
+vsm3msg1, 0xda, SM3, Modrm|Space0F38|Vex128|Src1VVVV|VexW0|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM, RegXMM }
+vsm3msg2, 0x66da, SM3, Modrm|Space0F38|Vex128|Src1VVVV|VexW0|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM, RegXMM }
 
 // SM3 instructions end.
 
 // SM4 instructions.
 
-vsm4key4, 0xf3da, SM4, Modrm|Space0F38|Vex|VexVVVV|VexW0|CheckOperandSize|NoSuf, { RegXMM|RegYMM|Unspecified|BaseIndex, RegXMM|RegYMM, RegXMM|RegYMM }
-vsm4rnds4, 0xf2da, SM4, Modrm|Space0F38|Vex|VexVVVV|VexW0|CheckOperandSize|NoSuf, { RegXMM|RegYMM|Unspecified|BaseIndex, RegXMM|RegYMM, RegXMM|RegYMM }
+vsm4key4, 0xf3da, SM4, Modrm|Space0F38|Vex|Src1VVVV|VexW0|CheckOperandSize|NoSuf, { RegXMM|RegYMM|Unspecified|BaseIndex, RegXMM|RegYMM, RegXMM|RegYMM }
+vsm4rnds4, 0xf2da, SM4, Modrm|Space0F38|Vex|Src1VVVV|VexW0|CheckOperandSize|NoSuf, { RegXMM|RegYMM|Unspecified|BaseIndex, RegXMM|RegYMM, RegXMM|RegYMM }
 
 // SM4 instructions end.
 
 // VAES
 
-vaesdec, 0x66de, VAES&(AVX|AVX512F), Modrm|Vex|EVexDYN|Space0F38|VexWIG|VexVVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vaesdeclast, 0x66df, VAES&(AVX|AVX512F), Modrm|Vex|EVexDYN|Space0F38|VexWIG|VexVVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vaesenc, 0x66dc, VAES&(AVX|AVX512F), Modrm|Vex|EVexDYN|Space0F38|VexWIG|VexVVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vaesenclast, 0x66dd, VAES&(AVX|AVX512F), Modrm|Vex|EVexDYN|Space0F38|VexWIG|VexVVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vaesdec, 0x66de, VAES&(AVX|AVX512F), Modrm|Vex|EVexDYN|Space0F38|VexWIG|Src1VVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vaesdeclast, 0x66df, VAES&(AVX|AVX512F), Modrm|Vex|EVexDYN|Space0F38|VexWIG|Src1VVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vaesenc, 0x66dc, VAES&(AVX|AVX512F), Modrm|Vex|EVexDYN|Space0F38|VexWIG|Src1VVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vaesenclast, 0x66dd, VAES&(AVX|AVX512F), Modrm|Vex|EVexDYN|Space0F38|VexWIG|Src1VVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
 
 // VAES instructions end
 
 // VPCLMULQDQ instructions
 
-vpclmulqdq, 0x6644, VPCLMULQDQ&(AVX|AVX512F), Modrm|Space0F3A|Vex|EVexDYN|VexWIG|VexVVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8|Imm8S, RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpclmullqlqdq, 0x6644/0x00, VPCLMULQDQ&(AVX|AVX512F), Modrm|Space0F3A|Vex|EVexDYN|VexWIG|VexVVVV|Disp8ShiftVL|CheckOperandSize|NoSuf|ImmExt, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpclmulhqlqdq, 0x6644/0x01, VPCLMULQDQ&(AVX|AVX512F), Modrm|Space0F3A|Vex|EVexDYN|VexWIG|VexVVVV|Disp8ShiftVL|CheckOperandSize|NoSuf|ImmExt, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpclmullqhqdq, 0x6644/0x10, VPCLMULQDQ&(AVX|AVX512F), Modrm|Space0F3A|Vex|EVexDYN|VexWIG|VexVVVV|Disp8ShiftVL|CheckOperandSize|NoSuf|ImmExt, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpclmulhqhqdq, 0x6644/0x11, VPCLMULQDQ&(AVX|AVX512F), Modrm|Space0F3A|Vex|EVexDYN|VexWIG|VexVVVV|Disp8ShiftVL|CheckOperandSize|NoSuf|ImmExt, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpclmulqdq, 0x6644, VPCLMULQDQ&(AVX|AVX512F), Modrm|Space0F3A|Vex|EVexDYN|VexWIG|Src1VVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8|Imm8S, RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpclmullqlqdq, 0x6644/0x00, VPCLMULQDQ&(AVX|AVX512F), Modrm|Space0F3A|Vex|EVexDYN|VexWIG|Src1VVVV|Disp8ShiftVL|CheckOperandSize|NoSuf|ImmExt, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpclmulhqlqdq, 0x6644/0x01, VPCLMULQDQ&(AVX|AVX512F), Modrm|Space0F3A|Vex|EVexDYN|VexWIG|Src1VVVV|Disp8ShiftVL|CheckOperandSize|NoSuf|ImmExt, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpclmullqhqdq, 0x6644/0x10, VPCLMULQDQ&(AVX|AVX512F), Modrm|Space0F3A|Vex|EVexDYN|VexWIG|Src1VVVV|Disp8ShiftVL|CheckOperandSize|NoSuf|ImmExt, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpclmulhqhqdq, 0x6644/0x11, VPCLMULQDQ&(AVX|AVX512F), Modrm|Space0F3A|Vex|EVexDYN|VexWIG|Src1VVVV|Disp8ShiftVL|CheckOperandSize|NoSuf|ImmExt, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
 
 // VPCLMULQDQ instructions end
 
@@ -2155,11 +2157,11 @@ vpclmulhqhqdq, 0x6644/0x11, VPCLMULQDQ&(AVX|AVX512F), Modrm|Space0F3A|Vex|EVexDY
     x:AVX512VL:EVex128|Disp8MemShift=4|ATTSyntax:::RegXMM|Unspecified|BaseIndex:RegXMM, +
     y:AVX512VL:EVex256|Disp8MemShift=5|ATTSyntax:::RegYMM|Unspecified|BaseIndex:RegXMM>
 
-kand<bw>, 0x<bw:kpfx>41, <bw:kcpu>, Modrm|Vex256|Space0F|VexVVVV|VexW0|NoSuf, { RegMask, RegMask, RegMask }
-kandn<bw>, 0x<bw:kpfx>42, <bw:kcpu>, Modrm|Vex256|Space0F|VexVVVV|VexW0|NoSuf, { RegMask, RegMask, RegMask }
-kor<bw>, 0x<bw:kpfx>45, <bw:kcpu>, Modrm|Vex256|Space0F|VexVVVV|VexW0|NoSuf, { RegMask, RegMask, RegMask }
-kxnor<bw>, 0x<bw:kpfx>46, <bw:kcpu>, Modrm|Vex256|Space0F|VexVVVV|VexW0|NoSuf, { RegMask, RegMask, RegMask }
-kxor<bw>, 0x<bw:kpfx>47, <bw:kcpu>, Modrm|Vex256|Space0F|VexVVVV|VexW0|NoSuf, { RegMask, RegMask, RegMask }
+kand<bw>, 0x<bw:kpfx>41, <bw:kcpu>, Modrm|Vex256|Space0F|Src1VVVV|VexW0|NoSuf, { RegMask, RegMask, RegMask }
+kandn<bw>, 0x<bw:kpfx>42, <bw:kcpu>, Modrm|Vex256|Space0F|Src1VVVV|VexW0|NoSuf, { RegMask, RegMask, RegMask }
+kor<bw>, 0x<bw:kpfx>45, <bw:kcpu>, Modrm|Vex256|Space0F|Src1VVVV|VexW0|NoSuf, { RegMask, RegMask, RegMask }
+kxnor<bw>, 0x<bw:kpfx>46, <bw:kcpu>, Modrm|Vex256|Space0F|Src1VVVV|VexW0|NoSuf, { RegMask, RegMask, RegMask }
+kxor<bw>, 0x<bw:kpfx>47, <bw:kcpu>, Modrm|Vex256|Space0F|Src1VVVV|VexW0|NoSuf, { RegMask, RegMask, RegMask }
 
 kmov<bw>, 0x<bw:kpfx>90, APX_F(<bw:kcpu>), Modrm|Vex128|EVex128|Space0F|VexW0|NoSuf, { RegMask|<bw:elem>|Unspecified|BaseIndex, RegMask }
 kmov<bw>, 0x<bw:kpfx>91, APX_F(<bw:kcpu>), Modrm|Vex128|EVex128|Space0F|VexW0|NoSuf, { RegMask, <bw:elem>|Unspecified|BaseIndex }
@@ -2171,37 +2173,37 @@ kortest<bw>, 0x<bw:kpfx>98, <bw:kcpu>, Modrm|Vex128|Space0F|VexW0|NoSuf, { RegMa
 kshiftl<bw>, 0x6632, <bw:kcpu>, Modrm|Vex128|Space0F3A|<bw:vexw>|NoSuf, { Imm8, RegMask, RegMask }
 kshiftr<bw>, 0x6630, <bw:kcpu>, Modrm|Vex128|Space0F3A|<bw:vexw>|NoSuf, { Imm8, RegMask, RegMask }
 
-kunpckbw, 0x664B, AVX512F, Modrm|Vex=2|Space0F|VexVVVV|VexW0|NoSuf, { RegMask, RegMask, RegMask }
+kunpckbw, 0x664B, AVX512F, Modrm|Vex=2|Space0F|Src1VVVV|VexW0|NoSuf, { RegMask, RegMask, RegMask }
 
-vaddp<sdh>, 0x<sdh:ppfx>58, <sdh:cpu>, Modrm|Masking|<sdh:spc1>|VexVVVV|<sdh:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf|StaticRounding|SAE, { RegXMM|RegYMM|RegZMM|<sdh:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vdivp<sdh>, 0x<sdh:ppfx>5e, <sdh:cpu>, Modrm|Masking|<sdh:spc1>|VexVVVV|<sdh:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf|StaticRounding|SAE, { RegXMM|RegYMM|RegZMM|<sdh:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vmulp<sdh>, 0x<sdh:ppfx>59, <sdh:cpu>, Modrm|Masking|<sdh:spc1>|VexVVVV|<sdh:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf|StaticRounding|SAE, { RegXMM|RegYMM|RegZMM|<sdh:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vaddp<sdh>, 0x<sdh:ppfx>58, <sdh:cpu>, Modrm|Masking|<sdh:spc1>|Src1VVVV|<sdh:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf|StaticRounding|SAE, { RegXMM|RegYMM|RegZMM|<sdh:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vdivp<sdh>, 0x<sdh:ppfx>5e, <sdh:cpu>, Modrm|Masking|<sdh:spc1>|Src1VVVV|<sdh:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf|StaticRounding|SAE, { RegXMM|RegYMM|RegZMM|<sdh:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vmulp<sdh>, 0x<sdh:ppfx>59, <sdh:cpu>, Modrm|Masking|<sdh:spc1>|Src1VVVV|<sdh:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf|StaticRounding|SAE, { RegXMM|RegYMM|RegZMM|<sdh:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
 vsqrtp<sdh>, 0x<sdh:ppfx>51, <sdh:cpu>, Modrm|Masking|<sdh:spc1>|<sdh:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf|StaticRounding|SAE, { RegXMM|RegYMM|RegZMM|<sdh:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
-vsubp<sdh>, 0x<sdh:ppfx>5c, <sdh:cpu>, Modrm|Masking|<sdh:spc1>|VexVVVV|<sdh:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf|StaticRounding|SAE, { RegXMM|RegYMM|RegZMM|<sdh:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-
-vadds<sdh>, 0x<sdh:spfx>58, <sdh:cpu>, Modrm|EVexLIG|Masking|<sdh:spc1>|VexVVVV|<sdh:vexw>|Disp8MemShift|NoSuf|StaticRounding|SAE, { RegXMM|<sdh:elem>|Unspecified|BaseIndex, RegXMM, RegXMM }
-vdivs<sdh>, 0x<sdh:spfx>5e, <sdh:cpu>, Modrm|EVexLIG|Masking|<sdh:spc1>|VexVVVV|<sdh:vexw>|Disp8MemShift|NoSuf|StaticRounding|SAE, { RegXMM|<sdh:elem>|Unspecified|BaseIndex, RegXMM, RegXMM }
-vmuls<sdh>, 0x<sdh:spfx>59, <sdh:cpu>, Modrm|EVexLIG|Masking|<sdh:spc1>|VexVVVV|<sdh:vexw>|Disp8MemShift|NoSuf|StaticRounding|SAE, { RegXMM|<sdh:elem>|Unspecified|BaseIndex, RegXMM, RegXMM }
-vsqrts<sdh>, 0x<sdh:spfx>51, <sdh:cpu>, Modrm|EVexLIG|Masking|<sdh:spc1>|VexVVVV|<sdh:vexw>|Disp8MemShift|NoSuf|StaticRounding|SAE, { RegXMM|<sdh:elem>|Unspecified|BaseIndex, RegXMM, RegXMM }
-vsubs<sdh>, 0x<sdh:spfx>5C, <sdh:cpu>, Modrm|EVexLIG|Masking|<sdh:spc1>|VexVVVV|<sdh:vexw>|Disp8MemShift|NoSuf|StaticRounding|SAE, { RegXMM|<sdh:elem>|Unspecified|BaseIndex, RegXMM, RegXMM }
-
-valign<dq>, 0x6603, AVX512F, Modrm|Masking|Space0F3A|VexVVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vblendmp<sd>, 0x6665, AVX512F, Modrm|Masking|Space0F38|VexVVVV|<sd:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|<sd:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpblendm<dq>, 0x6664, AVX512F, Modrm|Masking|Space0F38|VexVVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpermi2<dq>, 0x6676, AVX512F, Modrm|Masking|Space0F38|VexVVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpermi2p<sd>, 0x6677, AVX512F, Modrm|Masking|Space0F38|VexVVVV|<sd:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|<sd:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpermt2<dq>, 0x667E, AVX512F, Modrm|Masking|Space0F38|VexVVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpermt2p<sd>, 0x667F, AVX512F, Modrm|Masking|Space0F38|VexVVVV|<sd:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|<sd:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpmaxs<dq>, 0x663D, AVX512F, Modrm|Masking|Space0F38|VexVVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpmaxu<dq>, 0x663F, AVX512F, Modrm|Masking|Space0F38|VexVVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpmins<dq>, 0x6639, AVX512F, Modrm|Masking|Space0F38|VexVVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpminu<dq>, 0x663B, AVX512F, Modrm|Masking|Space0F38|VexVVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpmuldq, 0x6628, AVX512F, Modrm|Masking|Space0F38|VexVVVV|VexW=2|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpmulld, 0x6640, AVX512F, Modrm|Masking|Space0F38|VexVVVV|VexW=1|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vprolv<dq>, 0x6615, AVX512F, Modrm|Masking|Space0F38|VexVVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vprorv<dq>, 0x6614, AVX512F, Modrm|Masking|Space0F38|VexVVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpsravq, 0x6646, AVX512F, Modrm|Masking|Space0F38|VexVVVV|VexW1|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpternlog<dq>, 0x6625, AVX512F, Modrm|Masking|Space0F3A|VexVVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8|Imm8S, RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vsubp<sdh>, 0x<sdh:ppfx>5c, <sdh:cpu>, Modrm|Masking|<sdh:spc1>|Src1VVVV|<sdh:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf|StaticRounding|SAE, { RegXMM|RegYMM|RegZMM|<sdh:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+
+vadds<sdh>, 0x<sdh:spfx>58, <sdh:cpu>, Modrm|EVexLIG|Masking|<sdh:spc1>|Src1VVVV|<sdh:vexw>|Disp8MemShift|NoSuf|StaticRounding|SAE, { RegXMM|<sdh:elem>|Unspecified|BaseIndex, RegXMM, RegXMM }
+vdivs<sdh>, 0x<sdh:spfx>5e, <sdh:cpu>, Modrm|EVexLIG|Masking|<sdh:spc1>|Src1VVVV|<sdh:vexw>|Disp8MemShift|NoSuf|StaticRounding|SAE, { RegXMM|<sdh:elem>|Unspecified|BaseIndex, RegXMM, RegXMM }
+vmuls<sdh>, 0x<sdh:spfx>59, <sdh:cpu>, Modrm|EVexLIG|Masking|<sdh:spc1>|Src1VVVV|<sdh:vexw>|Disp8MemShift|NoSuf|StaticRounding|SAE, { RegXMM|<sdh:elem>|Unspecified|BaseIndex, RegXMM, RegXMM }
+vsqrts<sdh>, 0x<sdh:spfx>51, <sdh:cpu>, Modrm|EVexLIG|Masking|<sdh:spc1>|Src1VVVV|<sdh:vexw>|Disp8MemShift|NoSuf|StaticRounding|SAE, { RegXMM|<sdh:elem>|Unspecified|BaseIndex, RegXMM, RegXMM }
+vsubs<sdh>, 0x<sdh:spfx>5C, <sdh:cpu>, Modrm|EVexLIG|Masking|<sdh:spc1>|Src1VVVV|<sdh:vexw>|Disp8MemShift|NoSuf|StaticRounding|SAE, { RegXMM|<sdh:elem>|Unspecified|BaseIndex, RegXMM, RegXMM }
+
+valign<dq>, 0x6603, AVX512F, Modrm|Masking|Space0F3A|Src1VVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vblendmp<sd>, 0x6665, AVX512F, Modrm|Masking|Space0F38|Src1VVVV|<sd:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|<sd:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpblendm<dq>, 0x6664, AVX512F, Modrm|Masking|Space0F38|Src1VVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpermi2<dq>, 0x6676, AVX512F, Modrm|Masking|Space0F38|Src1VVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpermi2p<sd>, 0x6677, AVX512F, Modrm|Masking|Space0F38|Src1VVVV|<sd:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|<sd:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpermt2<dq>, 0x667E, AVX512F, Modrm|Masking|Space0F38|Src1VVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpermt2p<sd>, 0x667F, AVX512F, Modrm|Masking|Space0F38|Src1VVVV|<sd:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|<sd:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpmaxs<dq>, 0x663D, AVX512F, Modrm|Masking|Space0F38|Src1VVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpmaxu<dq>, 0x663F, AVX512F, Modrm|Masking|Space0F38|Src1VVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpmins<dq>, 0x6639, AVX512F, Modrm|Masking|Space0F38|Src1VVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpminu<dq>, 0x663B, AVX512F, Modrm|Masking|Space0F38|Src1VVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpmuldq, 0x6628, AVX512F, Modrm|Masking|Space0F38|Src1VVVV|VexW=2|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpmulld, 0x6640, AVX512F, Modrm|Masking|Space0F38|Src1VVVV|VexW=1|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vprolv<dq>, 0x6615, AVX512F, Modrm|Masking|Space0F38|Src1VVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vprorv<dq>, 0x6614, AVX512F, Modrm|Masking|Space0F38|Src1VVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpsravq, 0x6646, AVX512F, Modrm|Masking|Space0F38|Src1VVVV|VexW1|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpternlog<dq>, 0x6625, AVX512F, Modrm|Masking|Space0F3A|Src1VVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8|Imm8S, RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
 
 vbroadcastf32x4, 0x661A, AVX512F, Modrm|Masking|Space0F38|VexW=1|Disp8MemShift=4|NoSuf, { XMMword|Unspecified|BaseIndex, RegYMM|RegZMM }
 vbroadcasti32x4, 0x665A, AVX512F, Modrm|Masking|Space0F38|VexW=1|Disp8MemShift=4|NoSuf, { XMMword|Unspecified|BaseIndex, RegYMM|RegZMM }
@@ -2214,11 +2216,11 @@ vbroadcastsd, 0x6619, AVX512F, Modrm|Masking|Space0F38|VexW1|Disp8MemShift=3|NoS
 vpbroadcastq, 0x6659, AVX512F, Modrm|Masking|Space0F38|VexW1|Disp8MemShift|NoSuf, { RegXMM|Qword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
 vpbroadcast<dq>, 0x667c, AVX512F, Modrm|Masking|Space0F38|<dq:vexw64>|NoSuf, { <dq:gpr>, RegXMM|RegYMM|RegZMM }
 
-vcmp<frel>p<sd>, 0x<sd:ppfx>C2/0x<frel:imm>, AVX512F, Modrm|Masking|Space0F|VexVVVV|<sd:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf|ImmExt|SAE, { RegXMM|RegYMM|RegZMM|<sd:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegMask }
-vcmpp<sd>, 0x<sd:ppfx>C2, AVX512F, Modrm|Masking|Space0F|VexVVVV|<sd:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf|SAE, { Imm8, RegXMM|RegYMM|RegZMM|<sd:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegMask }
+vcmp<frel>p<sd>, 0x<sd:ppfx>C2/0x<frel:imm>, AVX512F, Modrm|Masking|Space0F|Src1VVVV|<sd:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf|ImmExt|SAE, { RegXMM|RegYMM|RegZMM|<sd:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegMask }
+vcmpp<sd>, 0x<sd:ppfx>C2, AVX512F, Modrm|Masking|Space0F|Src1VVVV|<sd:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf|SAE, { Imm8, RegXMM|RegYMM|RegZMM|<sd:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegMask }
 
-vcmp<frel>s<sd>, 0x<sd:spfx>C2/0x<frel:imm>, AVX512F, Modrm|EVexLIG|Masking|Space0F|VexVVVV|<sd:vexw>|Disp8MemShift|NoSuf|SAE|ImmExt, { RegXMM|<sd:elem>|Unspecified|BaseIndex, RegXMM, RegMask }
-vcmps<sd>, 0x<sd:spfx>C2, AVX512F, Modrm|EVexLIG|Masking|Space0F|VexVVVV|<sd:vexw>|Disp8MemShift|NoSuf|SAE, { Imm8, RegXMM|<sd:elem>|Unspecified|BaseIndex, RegXMM, RegMask }
+vcmp<frel>s<sd>, 0x<sd:spfx>C2/0x<frel:imm>, AVX512F, Modrm|EVexLIG|Masking|Space0F|Src1VVVV|<sd:vexw>|Disp8MemShift|NoSuf|SAE|ImmExt, { RegXMM|<sd:elem>|Unspecified|BaseIndex, RegXMM, RegMask }
+vcmps<sd>, 0x<sd:spfx>C2, AVX512F, Modrm|EVexLIG|Masking|Space0F|Src1VVVV|<sd:vexw>|Disp8MemShift|NoSuf|SAE, { Imm8, RegXMM|<sd:elem>|Unspecified|BaseIndex, RegXMM, RegMask }
 
 vcomis<sdh>, 0x<sdh:ppfx>2f, <sdh:cpu>, Modrm|EVexLIG|<sdh:spc1>|<sdh:vexw>|Disp8MemShift|NoSuf|SAE, { RegXMM|<sdh:elem>|Unspecified|BaseIndex, RegXMM }
 vucomis<sdh>, 0x<sdh:ppfx>2e, <sdh:cpu>, Modrm|EVexLIG|<sdh:spc1>|<sdh:vexw>|Disp8MemShift|NoSuf|SAE, { RegXMM|<sdh:elem>|Unspecified|BaseIndex, RegXMM }
@@ -2260,23 +2262,23 @@ vcvtps2ph, 0x661D, AVX512F, Modrm|EVex512|Masking|Space0F3A|VexW0|Disp8MemShift=
 vcvts<sd>2si, 0x<sd:spfx>2d, AVX512F, Modrm|EVexLIG|Space0F|Disp8MemShift|No_bSuf|No_wSuf|No_sSuf|StaticRounding|SAE, { RegXMM|<sd:elem>|Unspecified|BaseIndex, Reg32|Reg64 }
 vcvts<sdh>2usi, 0x<sdh:spfx>79, <sdh:cpu>, Modrm|EVexLIG|<sdh:spc1>|Disp8MemShift|NoSuf|StaticRounding|SAE, { RegXMM|<sdh:elem>|Unspecified|BaseIndex, Reg32|Reg64 }
 
-vcvtsd2ss, 0xF25A, AVX512F, Modrm|EVexLIG|Masking|Space0F|VexVVVV|VexW1|Disp8MemShift=3|NoSuf|StaticRounding|SAE, { RegXMM|Qword|Unspecified|BaseIndex, RegXMM, RegXMM }
+vcvtsd2ss, 0xF25A, AVX512F, Modrm|EVexLIG|Masking|Space0F|Src1VVVV|VexW1|Disp8MemShift=3|NoSuf|StaticRounding|SAE, { RegXMM|Qword|Unspecified|BaseIndex, RegXMM, RegXMM }
 
-vcvtsi2sd, 0xF22A, AVX512F, Modrm|EVexLIG|Space0F|VexVVVV|Disp8ShiftVL|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|ATTSyntax, { Reg32|Unspecified|BaseIndex, RegXMM, RegXMM }
-vcvtsi2sd, 0xF22A, AVX512F, Modrm|EVexLIG|Space0F|VexVVVV|Disp8ShiftVL|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|StaticRounding|SAE|ATTSyntax, { Reg64|Unspecified|BaseIndex, RegXMM, RegXMM }
-vcvtsi2sd, 0xF22A, AVX512F, Modrm|EVexLIG|Space0F|VexVVVV|Disp8ShiftVL|No_bSuf|No_wSuf|No_sSuf|IntelSyntax, { Reg32|Unspecified|BaseIndex, RegXMM, RegXMM }
-vcvtsi2sd, 0xF22A, AVX512F, Modrm|EVexLIG|Space0F|VexVVVV|Disp8ShiftVL|No_bSuf|No_wSuf|No_sSuf|StaticRounding|SAE|IntelSyntax, { Reg64|Unspecified|BaseIndex, RegXMM, RegXMM }
-vcvtusi2sd, 0xF27B, AVX512F, Modrm|EVexLIG|Space0F|VexVVVV|Disp8ShiftVL|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|ATTSyntax, { Reg32|Unspecified|BaseIndex, RegXMM, RegXMM }
-vcvtusi2sd, 0xF27B, AVX512F, Modrm|EVexLIG|Space0F|VexVVVV|Disp8ShiftVL|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|StaticRounding|SAE|ATTSyntax, { Reg64|Unspecified|BaseIndex, RegXMM, RegXMM }
-vcvtusi2sd, 0xF27B, AVX512F, Modrm|EVexLIG|Space0F|VexVVVV|Disp8ShiftVL|No_bSuf|No_wSuf|No_sSuf|IntelSyntax, { Reg32|Unspecified|BaseIndex, RegXMM, RegXMM }
-vcvtusi2sd, 0xF27B, AVX512F, Modrm|EVexLIG|Space0F|VexVVVV|Disp8ShiftVL|No_bSuf|No_wSuf|No_sSuf|StaticRounding|SAE|IntelSyntax, { Reg64|Unspecified|BaseIndex, RegXMM, RegXMM }
+vcvtsi2sd, 0xF22A, AVX512F, Modrm|EVexLIG|Space0F|Src1VVVV|Disp8ShiftVL|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|ATTSyntax, { Reg32|Unspecified|BaseIndex, RegXMM, RegXMM }
+vcvtsi2sd, 0xF22A, AVX512F, Modrm|EVexLIG|Space0F|Src1VVVV|Disp8ShiftVL|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|StaticRounding|SAE|ATTSyntax, { Reg64|Unspecified|BaseIndex, RegXMM, RegXMM }
+vcvtsi2sd, 0xF22A, AVX512F, Modrm|EVexLIG|Space0F|Src1VVVV|Disp8ShiftVL|No_bSuf|No_wSuf|No_sSuf|IntelSyntax, { Reg32|Unspecified|BaseIndex, RegXMM, RegXMM }
+vcvtsi2sd, 0xF22A, AVX512F, Modrm|EVexLIG|Space0F|Src1VVVV|Disp8ShiftVL|No_bSuf|No_wSuf|No_sSuf|StaticRounding|SAE|IntelSyntax, { Reg64|Unspecified|BaseIndex, RegXMM, RegXMM }
+vcvtusi2sd, 0xF27B, AVX512F, Modrm|EVexLIG|Space0F|Src1VVVV|Disp8ShiftVL|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|ATTSyntax, { Reg32|Unspecified|BaseIndex, RegXMM, RegXMM }
+vcvtusi2sd, 0xF27B, AVX512F, Modrm|EVexLIG|Space0F|Src1VVVV|Disp8ShiftVL|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|StaticRounding|SAE|ATTSyntax, { Reg64|Unspecified|BaseIndex, RegXMM, RegXMM }
+vcvtusi2sd, 0xF27B, AVX512F, Modrm|EVexLIG|Space0F|Src1VVVV|Disp8ShiftVL|No_bSuf|No_wSuf|No_sSuf|IntelSyntax, { Reg32|Unspecified|BaseIndex, RegXMM, RegXMM }
+vcvtusi2sd, 0xF27B, AVX512F, Modrm|EVexLIG|Space0F|Src1VVVV|Disp8ShiftVL|No_bSuf|No_wSuf|No_sSuf|StaticRounding|SAE|IntelSyntax, { Reg64|Unspecified|BaseIndex, RegXMM, RegXMM }
 
-vcvtsi2ss, 0xF32A, AVX512F, Modrm|EVexLIG|Space0F|VexVVVV|Disp8ShiftVL|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|StaticRounding|SAE|ATTSyntax, { Reg32|Reg64|Unspecified|BaseIndex, RegXMM, RegXMM }
-vcvtsi2ss, 0xF32A, AVX512F, Modrm|EVexLIG|Space0F|VexVVVV|Disp8ShiftVL|No_bSuf|No_wSuf|No_sSuf|StaticRounding|SAE|IntelSyntax, { Reg32|Reg64|Unspecified|BaseIndex, RegXMM, RegXMM }
-vcvtusi2ss, 0xF37B, AVX512F, Modrm|EVexLIG|Space0F|VexVVVV|Disp8ShiftVL|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|StaticRounding|SAE|ATTSyntax, { Reg32|Reg64|Unspecified|BaseIndex, RegXMM, RegXMM }
-vcvtusi2ss, 0xF37B, AVX512F, Modrm|EVexLIG|Space0F|VexVVVV|Disp8ShiftVL|No_bSuf|No_wSuf|No_sSuf|StaticRounding|SAE|IntelSyntax, { Reg32|Reg64|Unspecified|BaseIndex, RegXMM, RegXMM }
+vcvtsi2ss, 0xF32A, AVX512F, Modrm|EVexLIG|Space0F|Src1VVVV|Disp8ShiftVL|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|StaticRounding|SAE|ATTSyntax, { Reg32|Reg64|Unspecified|BaseIndex, RegXMM, RegXMM }
+vcvtsi2ss, 0xF32A, AVX512F, Modrm|EVexLIG|Space0F|Src1VVVV|Disp8ShiftVL|No_bSuf|No_wSuf|No_sSuf|StaticRounding|SAE|IntelSyntax, { Reg32|Reg64|Unspecified|BaseIndex, RegXMM, RegXMM }
+vcvtusi2ss, 0xF37B, AVX512F, Modrm|EVexLIG|Space0F|Src1VVVV|Disp8ShiftVL|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|StaticRounding|SAE|ATTSyntax, { Reg32|Reg64|Unspecified|BaseIndex, RegXMM, RegXMM }
+vcvtusi2ss, 0xF37B, AVX512F, Modrm|EVexLIG|Space0F|Src1VVVV|Disp8ShiftVL|No_bSuf|No_wSuf|No_sSuf|StaticRounding|SAE|IntelSyntax, { Reg32|Reg64|Unspecified|BaseIndex, RegXMM, RegXMM }
 
-vcvtss2sd, 0xF35A, AVX512F, Modrm|EVexLIG|Masking|Space0F|VexVVVV|VexW0|Disp8MemShift=2|NoSuf|SAE, { RegXMM|Dword|Unspecified|BaseIndex, RegXMM, RegXMM }
+vcvtss2sd, 0xF35A, AVX512F, Modrm|EVexLIG|Masking|Space0F|Src1VVVV|VexW0|Disp8MemShift=2|NoSuf|SAE, { RegXMM|Dword|Unspecified|BaseIndex, RegXMM, RegXMM }
 
 vcvttpd2dq<Exy>, 0x66e6, AVX512F&<Exy:vl>, Modrm|<Exy:attr>|Masking|Space0F|VexW1|Broadcast|NoSuf|<Exy:sae>, { <Exy:src>|Qword, <Exy:dst> }
 vcvttpd2udq<Exy>, 0x78, AVX512F&<Exy:vl>, Modrm|<Exy:attr>|Masking|Space0F|VexW1|Broadcast|NoSuf|<Exy:sae>, { <Exy:src>|Qword, <Exy:dst> }
@@ -2301,17 +2303,17 @@ vextracti32x4, 0x6639, AVX512F, Modrm|Masking|Space0F3A|VexW=1|Disp8MemShift=4|N
 vextractf64x4, 0x661B, AVX512F, Modrm|EVex=1|Masking|Space0F3A|VexW=2|Disp8MemShift=5|NoSuf, { Imm8, RegZMM, RegYMM|Unspecified|BaseIndex }
 vextracti64x4, 0x663B, AVX512F, Modrm|EVex=1|Masking|Space0F3A|VexW=2|Disp8MemShift=5|NoSuf, { Imm8, RegZMM, RegYMM|Unspecified|BaseIndex }
 
-vfixupimmp<sd>, 0x6654, AVX512F, Modrm|Masking|Space0F3A|VexVVVV|<sd:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf|SAE, { Imm8|Imm8S, RegXMM|RegYMM|RegZMM|<sd:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vfixupimms<sd>, 0x6655, AVX512F, Modrm|EVexLIG|Masking|Space0F3A|VexVVVV|<sd:vexw>|Disp8MemShift|NoSuf|SAE, { Imm8|Imm8S, RegXMM|<sd:elem>|Unspecified|BaseIndex, RegXMM, RegXMM }
+vfixupimmp<sd>, 0x6654, AVX512F, Modrm|Masking|Space0F3A|Src1VVVV|<sd:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf|SAE, { Imm8|Imm8S, RegXMM|RegYMM|RegZMM|<sd:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vfixupimms<sd>, 0x6655, AVX512F, Modrm|EVexLIG|Masking|Space0F3A|Src1VVVV|<sd:vexw>|Disp8MemShift|NoSuf|SAE, { Imm8|Imm8S, RegXMM|<sd:elem>|Unspecified|BaseIndex, RegXMM, RegXMM }
 
 vgetmantp<sdh>, 0x<sdh:pfx>26, <sdh:cpu>, Modrm|Masking|Space0F3A|<sdh:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf|SAE, { Imm8, RegXMM|RegYMM|RegZMM|<sdh:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
-vgetmants<sdh>, 0x<sdh:pfx>27, <sdh:cpu>, Modrm|EVexLIG|Masking|Space0F3A|VexVVVV|<sdh:vexw>|Disp8MemShift|NoSuf|SAE, { Imm8, RegXMM|<sdh:elem>|Unspecified|BaseIndex, RegXMM, RegXMM }
+vgetmants<sdh>, 0x<sdh:pfx>27, <sdh:cpu>, Modrm|EVexLIG|Masking|Space0F3A|Src1VVVV|<sdh:vexw>|Disp8MemShift|NoSuf|SAE, { Imm8, RegXMM|<sdh:elem>|Unspecified|BaseIndex, RegXMM, RegXMM }
 
 vrndscalep<sdh>, 0x<sdh:pfx>08 | <sdh:opc>, <sdh:cpu>, Modrm|Masking|Space0F3A|<sdh:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf|SAE, { Imm8, RegXMM|RegYMM|RegZMM|<sdh:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
-vrndscales<sdh>, 0x<sdh:pfx>0a | <sdh:opc>, <sdh:cpu>, Modrm|EVexLIG|Masking|Space0F3A|VexVVVV|<sdh:vexw>|Disp8MemShift|NoSuf|SAE, { Imm8, RegXMM|<sdh:elem>|Unspecified|BaseIndex, RegXMM, RegXMM }
+vrndscales<sdh>, 0x<sdh:pfx>0a | <sdh:opc>, <sdh:cpu>, Modrm|EVexLIG|Masking|Space0F3A|Src1VVVV|<sdh:vexw>|Disp8MemShift|NoSuf|SAE, { Imm8, RegXMM|<sdh:elem>|Unspecified|BaseIndex, RegXMM, RegXMM }
 
-vscalefp<sdh>, 0x662c, <sdh:cpu>, Modrm|Masking|<sdh:spc2>|VexVVVV|<sdh:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf|StaticRounding|SAE, { RegXMM|RegYMM|RegZMM|<sdh:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vscalefs<sdh>, 0x662d, <sdh:cpu>, Modrm|EVexLIG|Masking|<sdh:spc2>|VexVVVV|<sdh:vexw>|Disp8MemShift|NoSuf|StaticRounding|SAE, { RegXMM|<sdh:elem>|Unspecified|BaseIndex, RegXMM, RegXMM }
+vscalefp<sdh>, 0x662c, <sdh:cpu>, Modrm|Masking|<sdh:spc2>|Src1VVVV|<sdh:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf|StaticRounding|SAE, { RegXMM|RegYMM|RegZMM|<sdh:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vscalefs<sdh>, 0x662d, <sdh:cpu>, Modrm|EVexLIG|Masking|<sdh:spc2>|Src1VVVV|<sdh:vexw>|Disp8MemShift|NoSuf|StaticRounding|SAE, { RegXMM|<sdh:elem>|Unspecified|BaseIndex, RegXMM, RegXMM }
 
 vgatherdpd, 0x6692, AVX512F, Modrm|EVex=1|Masking|NoDefMask|Space0F38|VexW1|Disp8MemShift=3|VecSIB256|NoSuf, { Qword|Unspecified|BaseIndex, RegZMM }
 vgatherdps, 0x6692, AVX512F, Modrm|EVex512|Masking|NoDefMask|Space0F38|VexW0|Disp8MemShift=2|VecSIB512|NoSuf, { Dword|Unspecified|BaseIndex, RegZMM }
@@ -2325,21 +2327,21 @@ vpgatherqq, 0x6691, AVX512F, Modrm|EVex=1|Masking|NoDefMask|Space0F38|VexW1|Disp
 vmovntdqa, 0x662A, AVX512F, Modrm|Space0F38|VexW=1|Disp8ShiftVL|CheckOperandSize|NoSuf, { XMMword|YMMword|ZMMword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
 
 vgetexpp<sdh>, 0x6642, <sdh:cpu>, Modrm|Masking|<sdh:spc2>|<sdh:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf|SAE, { RegXMM|RegYMM|RegZMM|<sdh:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
-vgetexps<sdh>, 0x6643, <sdh:cpu>, Modrm|EVexLIG|Masking|<sdh:spc2>|VexVVVV|<sdh:vexw>|Disp8MemShift|NoSuf|SAE, { RegXMM|<sdh:elem>|Unspecified|BaseIndex, RegXMM, RegXMM }
+vgetexps<sdh>, 0x6643, <sdh:cpu>, Modrm|EVexLIG|Masking|<sdh:spc2>|Src1VVVV|<sdh:vexw>|Disp8MemShift|NoSuf|SAE, { RegXMM|<sdh:elem>|Unspecified|BaseIndex, RegXMM, RegXMM }
 
-vinsertf32x4, 0x6618, AVX512F, Modrm|Masking|Space0F3A|VexVVVV|VexW0|Disp8MemShift=4|CheckOperandSize|NoSuf, { Imm8, RegXMM|Unspecified|BaseIndex, RegYMM|RegZMM, RegYMM|RegZMM }
-vinserti32x4, 0x6638, AVX512F, Modrm|Masking|Space0F3A|VexVVVV|VexW0|Disp8MemShift=4|CheckOperandSize|NoSuf, { Imm8, RegXMM|Unspecified|BaseIndex, RegYMM|RegZMM, RegYMM|RegZMM }
+vinsertf32x4, 0x6618, AVX512F, Modrm|Masking|Space0F3A|Src1VVVV|VexW0|Disp8MemShift=4|CheckOperandSize|NoSuf, { Imm8, RegXMM|Unspecified|BaseIndex, RegYMM|RegZMM, RegYMM|RegZMM }
+vinserti32x4, 0x6638, AVX512F, Modrm|Masking|Space0F3A|Src1VVVV|VexW0|Disp8MemShift=4|CheckOperandSize|NoSuf, { Imm8, RegXMM|Unspecified|BaseIndex, RegYMM|RegZMM, RegYMM|RegZMM }
 
-vinsertf64x4, 0x661A, AVX512F, Modrm|EVex=1|Masking|Space0F3A|VexVVVV|VexW1|Disp8MemShift=5|NoSuf, { Imm8, RegYMM|Unspecified|BaseIndex, RegZMM, RegZMM }
-vinserti64x4, 0x663A, AVX512F, Modrm|EVex=1|Masking|Space0F3A|VexVVVV|VexW1|Disp8MemShift=5|NoSuf, { Imm8, RegYMM|Unspecified|BaseIndex, RegZMM, RegZMM }
+vinsertf64x4, 0x661A, AVX512F, Modrm|EVex=1|Masking|Space0F3A|Src1VVVV|VexW1|Disp8MemShift=5|NoSuf, { Imm8, RegYMM|Unspecified|BaseIndex, RegZMM, RegZMM }
+vinserti64x4, 0x663A, AVX512F, Modrm|EVex=1|Masking|Space0F3A|Src1VVVV|VexW1|Disp8MemShift=5|NoSuf, { Imm8, RegYMM|Unspecified|BaseIndex, RegZMM, RegZMM }
 
-vinsertps, 0x6621, AVX512F, Modrm|EVex128|Space0F3A|VexVVVV|VexW0|Disp8MemShift=2|NoSuf, { Imm8, RegXMM|Dword|Unspecified|BaseIndex, RegXMM, RegXMM }
+vinsertps, 0x6621, AVX512F, Modrm|EVex128|Space0F3A|Src1VVVV|VexW0|Disp8MemShift=2|NoSuf, { Imm8, RegXMM|Dword|Unspecified|BaseIndex, RegXMM, RegXMM }
 
-vmaxp<sdh>, 0x<sdh:ppfx>5f, <sdh:cpu>, Modrm|Masking|<sdh:spc1>|VexVVVV|<sdh:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf|SAE, { RegXMM|RegYMM|RegZMM|<sdh:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vmaxs<sdh>, 0x<sdh:spfx>5f, <sdh:cpu>, Modrm|EVexLIG|Masking|<sdh:spc1>|VexVVVV|<sdh:vexw>|Disp8MemShift|NoSuf|SAE, { RegXMM|<sdh:elem>|Unspecified|BaseIndex, RegXMM, RegXMM }
+vmaxp<sdh>, 0x<sdh:ppfx>5f, <sdh:cpu>, Modrm|Masking|<sdh:spc1>|Src1VVVV|<sdh:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf|SAE, { RegXMM|RegYMM|RegZMM|<sdh:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vmaxs<sdh>, 0x<sdh:spfx>5f, <sdh:cpu>, Modrm|EVexLIG|Masking|<sdh:spc1>|Src1VVVV|<sdh:vexw>|Disp8MemShift|NoSuf|SAE, { RegXMM|<sdh:elem>|Unspecified|BaseIndex, RegXMM, RegXMM }
 
-vminp<sdh>, 0x<sdh:ppfx>5d, <sdh:cpu>, Modrm|Masking|<sdh:spc1>|VexVVVV|<sdh:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf|SAE, { RegXMM|RegYMM|RegZMM|<sdh:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vmins<sdh>, 0x<sdh:spfx>5d, <sdh:cpu>, Modrm|EVexLIG|Masking|<sdh:spc1>|VexVVVV|<sdh:vexw>|Disp8MemShift|NoSuf|SAE, { RegXMM|<sdh:elem>|Unspecified|BaseIndex, RegXMM, RegXMM }
+vminp<sdh>, 0x<sdh:ppfx>5d, <sdh:cpu>, Modrm|Masking|<sdh:spc1>|Src1VVVV|<sdh:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf|SAE, { RegXMM|RegYMM|RegZMM|<sdh:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vmins<sdh>, 0x<sdh:spfx>5d, <sdh:cpu>, Modrm|EVexLIG|Masking|<sdh:spc1>|Src1VVVV|<sdh:vexw>|Disp8MemShift|NoSuf|SAE, { RegXMM|<sdh:elem>|Unspecified|BaseIndex, RegXMM, RegXMM }
 
 vmovap<sd>, 0x<sd:ppfx>28, AVX512F, D|Modrm|Masking|Space0F|<sd:vexw>|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
 vmovntp<sd>, 0x<sd:ppfx>2B, AVX512F, Modrm|Space0F|<sd:vexw>|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM, XMMword|YMMword|ZMMword|Unspecified|BaseIndex }
@@ -2353,56 +2355,56 @@ vmovntdq, 0x66E7, AVX512F, Modrm|Space0F|VexW=1|Disp8ShiftVL|CheckOperandSize|No
 vmovdqu32, 0xF36F, AVX512F, D|Modrm|Masking|Space0F|VexW=1|Disp8ShiftVL|CheckOperandSize|NoSuf|Optimize, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
 vmovdqu64, 0xF36F, AVX512F, D|Modrm|Masking|Space0F|VexW=2|Disp8ShiftVL|CheckOperandSize|NoSuf|Optimize, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
 
-vmovhlps, 0x12, AVX512F, Modrm|EVex=4|Space0F|VexVVVV|VexW0|NoSuf, { RegXMM, RegXMM, RegXMM }
-vmovlhps, 0x16, AVX512F, Modrm|EVex=4|Space0F|VexVVVV|VexW0|NoSuf, { RegXMM, RegXMM, RegXMM }
+vmovhlps, 0x12, AVX512F, Modrm|EVex=4|Space0F|Src1VVVV|VexW0|NoSuf, { RegXMM, RegXMM, RegXMM }
+vmovlhps, 0x16, AVX512F, Modrm|EVex=4|Space0F|Src1VVVV|VexW0|NoSuf, { RegXMM, RegXMM, RegXMM }
 
-vmovhp<sd>, 0x<sd:ppfx>16, AVX512F, Modrm|EVexLIG|Space0F|VexVVVV|<sd:vexw>|Disp8MemShift=3|NoSuf, { Qword|Unspecified|BaseIndex, RegXMM, RegXMM }
+vmovhp<sd>, 0x<sd:ppfx>16, AVX512F, Modrm|EVexLIG|Space0F|Src1VVVV|<sd:vexw>|Disp8MemShift=3|NoSuf, { Qword|Unspecified|BaseIndex, RegXMM, RegXMM }
 vmovhp<sd>, 0x<sd:ppfx>17, AVX512F, Modrm|EVexLIG|Space0F|<sd:vexw>|Disp8MemShift=3|NoSuf, { RegXMM, Qword|Unspecified|BaseIndex }
-vmovlp<sd>, 0x<sd:ppfx>12, AVX512F, Modrm|EVexLIG|Space0F|VexVVVV|<sd:vexw>|Disp8MemShift=3|NoSuf, { Qword|Unspecified|BaseIndex, RegXMM, RegXMM }
+vmovlp<sd>, 0x<sd:ppfx>12, AVX512F, Modrm|EVexLIG|Space0F|Src1VVVV|<sd:vexw>|Disp8MemShift=3|NoSuf, { Qword|Unspecified|BaseIndex, RegXMM, RegXMM }
 vmovlp<sd>, 0x<sd:ppfx>13, AVX512F, Modrm|EVexLIG|Space0F|<sd:vexw>|Disp8MemShift=3|NoSuf, { RegXMM, Qword|Unspecified|BaseIndex }
 
 vmovq, 0xF37E, AVX512F, Load|Modrm|EVex=2|Space0F|VexW1|Disp8MemShift=3|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
 vmovq, 0x66D6, AVX512F, Modrm|EVex=2|Space0F|VexW1|Disp8MemShift=3|NoSuf, { RegXMM, Qword|Unspecified|BaseIndex|RegXMM }
 
 vmovs<sdh>, 0x<sdh:spfx>10, <sdh:cpu>, D|Modrm|EVexLIG|Masking|<sdh:spc1>|<sdh:vexw>|Disp8MemShift|NoSuf, { <sdh:elem>|Unspecified|BaseIndex, RegXMM }
-vmovs<sdh>, 0x<sdh:spfx>10, <sdh:cpu>, D|Modrm|EVexLIG|Masking|<sdh:spc1>|VexVVVV|<sdh:vexw>|NoSuf, { RegXMM, RegXMM, RegXMM }
+vmovs<sdh>, 0x<sdh:spfx>10, <sdh:cpu>, D|Modrm|EVexLIG|Masking|<sdh:spc1>|Src1VVVV|<sdh:vexw>|NoSuf, { RegXMM, RegXMM, RegXMM }
 
 vmovshdup, 0xF316, AVX512F, Modrm|Masking|Space0F|VexW=1|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
 vmovsldup, 0xF312, AVX512F, Modrm|Masking|Space0F|VexW=1|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
 
 vpabs<dq>, 0x661e | <dq:opc>, AVX512F, Modrm|Masking|Space0F38|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
-vpaddd, 0x66FE, AVX512F, Modrm|Masking|Space0F|VexVVVV|VexW0|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpaddq, 0x66d4, AVX512F, Modrm|Masking|Space0F|VexVVVV|VexW1|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpand<dq>, 0x66db, AVX512F, Modrm|Masking|Space0F|VexVVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf|Optimize, { RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpandn<dq>, 0x66df, AVX512F, Modrm|Masking|Space0F|VexVVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf|Optimize, { RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpmuludq, 0x66f4, AVX512F, Modrm|Masking|Space0F|VexVVVV|VexW1|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpor<dq>, 0x66eb, AVX512F, Modrm|Masking|Space0F|VexVVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf|Optimize, { RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpsub<dq>, 0x66fa | <dq:opc>, AVX512F, Modrm|Masking|Space0F|VexVVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf|Optimize, { RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpunpckhdq, 0x666A, AVX512F, Modrm|Masking|Space0F|VexVVVV|VexW0|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpunpckhqdq, 0x666d, AVX512F, Modrm|Masking|Space0F|VexVVVV|VexW1|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpunpckldq, 0x6662, AVX512F, Modrm|Masking|Space0F|VexVVVV|VexW0|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpunpcklqdq, 0x666c, AVX512F, Modrm|Masking|Space0F|VexVVVV|VexW1|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpxor<dq>, 0x66ef, AVX512F, Modrm|Masking|Space0F|VexVVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf|Optimize, { RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpaddd, 0x66FE, AVX512F, Modrm|Masking|Space0F|Src1VVVV|VexW0|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpaddq, 0x66d4, AVX512F, Modrm|Masking|Space0F|Src1VVVV|VexW1|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpand<dq>, 0x66db, AVX512F, Modrm|Masking|Space0F|Src1VVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf|Optimize, { RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpandn<dq>, 0x66df, AVX512F, Modrm|Masking|Space0F|Src1VVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf|Optimize, { RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpmuludq, 0x66f4, AVX512F, Modrm|Masking|Space0F|Src1VVVV|VexW1|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpor<dq>, 0x66eb, AVX512F, Modrm|Masking|Space0F|Src1VVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf|Optimize, { RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpsub<dq>, 0x66fa | <dq:opc>, AVX512F, Modrm|Masking|Space0F|Src1VVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf|Optimize, { RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpunpckhdq, 0x666A, AVX512F, Modrm|Masking|Space0F|Src1VVVV|VexW0|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpunpckhqdq, 0x666d, AVX512F, Modrm|Masking|Space0F|Src1VVVV|VexW1|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpunpckldq, 0x6662, AVX512F, Modrm|Masking|Space0F|Src1VVVV|VexW0|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpunpcklqdq, 0x666c, AVX512F, Modrm|Masking|Space0F|Src1VVVV|VexW1|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpxor<dq>, 0x66ef, AVX512F, Modrm|Masking|Space0F|Src1VVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf|Optimize, { RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
 
 <irel:imm, eq:0, lt:1, le:2, neq:4, nlt:5, nle:6>
 
-vpcmpeqd, 0x6676, AVX512F, Modrm|Masking|Space0F|VexVVVV|VexW0|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegMask }
-vpcmpeqq, 0x6629, AVX512F, Modrm|Masking|Space0F38|VexVVVV|VexW1|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegMask }
-vpcmpgtd, 0x6666, AVX512F, Modrm|Masking|Space0F|VexVVVV|VexW0|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegMask }
-vpcmpgtq, 0x6637, AVX512F, Modrm|Masking|Space0F38|VexVVVV|VexW1|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegMask }
-vpcmp<dq>, 0x661f, AVX512F, Modrm|Masking|Space0F3A|VexVVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegMask }
-vpcmpu<dq>, 0x661e, AVX512F, Modrm|Masking|Space0F3A|VexVVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegMask }
-vpcmp<irel><dq>, 0x661f/<irel:imm>, AVX512F, Modrm|Masking|Space0F3A|VexVVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf|ImmExt, { RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegMask }
-vpcmp<irel>u<dq>, 0x661e/<irel:imm>, AVX512F, Modrm|Masking|Space0F3A|VexVVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf|ImmExt, { RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegMask }
+vpcmpeqd, 0x6676, AVX512F, Modrm|Masking|Space0F|Src1VVVV|VexW0|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegMask }
+vpcmpeqq, 0x6629, AVX512F, Modrm|Masking|Space0F38|Src1VVVV|VexW1|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegMask }
+vpcmpgtd, 0x6666, AVX512F, Modrm|Masking|Space0F|Src1VVVV|VexW0|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegMask }
+vpcmpgtq, 0x6637, AVX512F, Modrm|Masking|Space0F38|Src1VVVV|VexW1|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegMask }
+vpcmp<dq>, 0x661f, AVX512F, Modrm|Masking|Space0F3A|Src1VVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegMask }
+vpcmpu<dq>, 0x661e, AVX512F, Modrm|Masking|Space0F3A|Src1VVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegMask }
+vpcmp<irel><dq>, 0x661f/<irel:imm>, AVX512F, Modrm|Masking|Space0F3A|Src1VVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf|ImmExt, { RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegMask }
+vpcmp<irel>u<dq>, 0x661e/<irel:imm>, AVX512F, Modrm|Masking|Space0F3A|Src1VVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf|ImmExt, { RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegMask }
 
-vptestm<dq>, 0x6627, AVX512F, Modrm|Masking|Space0F38|VexVVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegMask }
-vptestnm<dq>, 0xf327, AVX512F, Modrm|Masking|Space0F38|VexVVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegMask }
+vptestm<dq>, 0x6627, AVX512F, Modrm|Masking|Space0F38|Src1VVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegMask }
+vptestnm<dq>, 0xf327, AVX512F, Modrm|Masking|Space0F38|Src1VVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegMask }
 
 vpermilpd, 0x6605, AVX512F, Modrm|Masking|Space0F3A|VexW1|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8|Imm8S, RegXMM|RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
-vpermilpd, 0x660d, AVX512F, Modrm|Masking|Space0F38|VexVVVV|VexW1|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpermilpd, 0x660d, AVX512F, Modrm|Masking|Space0F38|Src1VVVV|VexW1|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
 
-vpermpd, 0x6616, AVX512F, Modrm|Masking|Space0F38|VexVVVV|VexW1|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegYMM|RegZMM, RegYMM|RegZMM }
-vpermq, 0x6636, AVX512F, Modrm|Masking|Space0F38|VexVVVV|VexW1|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegYMM|RegZMM, RegYMM|RegZMM }
+vpermpd, 0x6616, AVX512F, Modrm|Masking|Space0F38|Src1VVVV|VexW1|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegYMM|RegZMM, RegYMM|RegZMM }
+vpermq, 0x6636, AVX512F, Modrm|Masking|Space0F38|Src1VVVV|VexW1|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegYMM|RegZMM, RegYMM|RegZMM }
 
 vpmovdb, 0xF331, AVX512F, Modrm|EVex=1|Masking|Space0F38|VexW=1|Disp8MemShift=4|NoSuf, { RegZMM, RegXMM|Unspecified|BaseIndex }
 vpmovsdb, 0xF321, AVX512F, Modrm|EVex=1|Masking|Space0F38|VexW=1|Disp8MemShift=4|NoSuf, { RegZMM, RegXMM|Unspecified|BaseIndex }
@@ -2439,34 +2441,34 @@ vpmovzxwd, 0x6633, AVX512F, Modrm|EVex=1|Masking|Space0F38|VexWIG|Disp8MemShift=
 vpmovsxwq, 0x6624, AVX512F, Modrm|EVex=1|Masking|Space0F38|VexWIG|Disp8MemShift=4|NoSuf, { RegXMM|Unspecified|BaseIndex, RegZMM }
 vpmovzxwq, 0x6634, AVX512F, Modrm|EVex=1|Masking|Space0F38|VexWIG|Disp8MemShift=4|NoSuf, { RegXMM|Unspecified|BaseIndex, RegZMM }
 
-vprol<dq>, 0x6672/1, AVX512F, Modrm|Masking|Space0F|VexVVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8|Imm8S, RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
-vpror<dq>, 0x6672/0, AVX512F, Modrm|Masking|Space0F|VexVVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8|Imm8S, RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
+vprol<dq>, 0x6672/1, AVX512F, Modrm|Masking|Space0F|Src1VVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8|Imm8S, RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
+vpror<dq>, 0x6672/0, AVX512F, Modrm|Masking|Space0F|Src1VVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8|Imm8S, RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
 
 vpshufd, 0x6670, AVX512F, Modrm|Masking|Space0F|VexW=1|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8|Imm8S, RegXMM|RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
 
-vpsll<dq>, 0x66f2 | <dq:opc>, AVX512F, Modrm|Masking|Space0F|VexVVVV|<dq:vexw>|Disp8MemShift=4|CheckOperandSize|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpsll<dq>, 0x6672 | <dq:opc>/6, AVX512F, Modrm|Masking|Space0F|VexVVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
-vpsra<dq>, 0x66e2, AVX512F, Modrm|Masking|Space0F|VexVVVV|<dq:vexw>|Disp8MemShift=4|CheckOperandSize|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpsra<dq>, 0x6672/4, AVX512F, Modrm|Masking|Space0F|VexVVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
-vpsrl<dq>, 0x66d2 | <dq:opc>, AVX512F, Modrm|Masking|Space0F|VexVVVV|<dq:vexw>|Disp8MemShift=4|CheckOperandSize|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpsrl<dq>, 0x6672 | <dq:opc>/2, AVX512F, Modrm|Masking|Space0F|VexVVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
+vpsll<dq>, 0x66f2 | <dq:opc>, AVX512F, Modrm|Masking|Space0F|Src1VVVV|<dq:vexw>|Disp8MemShift=4|CheckOperandSize|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpsll<dq>, 0x6672 | <dq:opc>/6, AVX512F, Modrm|Masking|Space0F|Src1VVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
+vpsra<dq>, 0x66e2, AVX512F, Modrm|Masking|Space0F|Src1VVVV|<dq:vexw>|Disp8MemShift=4|CheckOperandSize|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpsra<dq>, 0x6672/4, AVX512F, Modrm|Masking|Space0F|Src1VVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
+vpsrl<dq>, 0x66d2 | <dq:opc>, AVX512F, Modrm|Masking|Space0F|Src1VVVV|<dq:vexw>|Disp8MemShift=4|CheckOperandSize|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpsrl<dq>, 0x6672 | <dq:opc>/2, AVX512F, Modrm|Masking|Space0F|Src1VVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
 
 vrcp14p<sd>, 0x664C, AVX512F, Modrm|Masking|Space0F38|<sd:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|<sd:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
-vrcp14s<sd>, 0x664D, AVX512F, Modrm|EVexLIG|Masking|Space0F38|VexVVVV|<sd:vexw>|Disp8MemShift|NoSuf, { RegXMM|<sd:elem>|Unspecified|BaseIndex, RegXMM, RegXMM }
+vrcp14s<sd>, 0x664D, AVX512F, Modrm|EVexLIG|Masking|Space0F38|Src1VVVV|<sd:vexw>|Disp8MemShift|NoSuf, { RegXMM|<sd:elem>|Unspecified|BaseIndex, RegXMM, RegXMM }
 
 vrsqrt14p<sd>, 0x664E, AVX512F, Modrm|Masking|Space0F38|<sd:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|<sd:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
-vrsqrt14s<sd>, 0x664F, AVX512F, Modrm|EVexLIG|Masking|Space0F38|VexVVVV|<sd:vexw>|Disp8MemShift|NoSuf, { RegXMM|<sd:elem>|Unspecified|BaseIndex, RegXMM, RegXMM }
+vrsqrt14s<sd>, 0x664F, AVX512F, Modrm|EVexLIG|Masking|Space0F38|Src1VVVV|<sd:vexw>|Disp8MemShift|NoSuf, { RegXMM|<sd:elem>|Unspecified|BaseIndex, RegXMM, RegXMM }
 
-vshuff32x4, 0x6623, AVX512F, Modrm|Masking|Space0F3A|VexVVVV|VexW0|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8|Imm8S, RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegYMM|RegZMM, RegYMM|RegZMM }
-vshufi32x4, 0x6643, AVX512F, Modrm|Masking|Space0F3A|VexVVVV|VexW0|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8|Imm8S, RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegYMM|RegZMM, RegYMM|RegZMM }
+vshuff32x4, 0x6623, AVX512F, Modrm|Masking|Space0F3A|Src1VVVV|VexW0|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8|Imm8S, RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegYMM|RegZMM, RegYMM|RegZMM }
+vshufi32x4, 0x6643, AVX512F, Modrm|Masking|Space0F3A|Src1VVVV|VexW0|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8|Imm8S, RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegYMM|RegZMM, RegYMM|RegZMM }
 
-vshuff64x2, 0x6623, AVX512F, Modrm|Masking|Space0F3A|VexVVVV|VexW1|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8|Imm8S, RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegYMM|RegZMM, RegYMM|RegZMM }
-vshufi64x2, 0x6643, AVX512F, Modrm|Masking|Space0F3A|VexVVVV|VexW1|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8|Imm8S, RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegYMM|RegZMM, RegYMM|RegZMM }
+vshuff64x2, 0x6623, AVX512F, Modrm|Masking|Space0F3A|Src1VVVV|VexW1|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8|Imm8S, RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegYMM|RegZMM, RegYMM|RegZMM }
+vshufi64x2, 0x6643, AVX512F, Modrm|Masking|Space0F3A|Src1VVVV|VexW1|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8|Imm8S, RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegYMM|RegZMM, RegYMM|RegZMM }
 
-vshufp<sd>, 0x<sd:ppfx>C6, AVX512F, Modrm|Masking|Space0F|VexVVVV|<sd:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8|Imm8S, RegXMM|RegYMM|RegZMM|<sd:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vshufp<sd>, 0x<sd:ppfx>C6, AVX512F, Modrm|Masking|Space0F|Src1VVVV|<sd:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8|Imm8S, RegXMM|RegYMM|RegZMM|<sd:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
 
-vunpckhp<sd>, 0x<sd:ppfx>15, AVX512F, Modrm|Masking|Space0F|VexVVVV|<sd:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|<sd:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vunpcklp<sd>, 0x<sd:ppfx>14, AVX512F, Modrm|Masking|Space0F|VexVVVV|<sd:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|<sd:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vunpckhp<sd>, 0x<sd:ppfx>15, AVX512F, Modrm|Masking|Space0F|Src1VVVV|<sd:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|<sd:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vunpcklp<sd>, 0x<sd:ppfx>14, AVX512F, Modrm|Masking|Space0F|Src1VVVV|<sd:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|<sd:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
 
 // AVX512F instructions end.
 
@@ -2486,10 +2488,10 @@ vplzcnt<dq>, 0x6644, AVX512CD, Modrm|Masking|Space0F38|<dq:vexw>|Broadcast|Disp8
 vexp2p<sd>, 0x66C8, AVX512ER, Modrm|EVex512|Masking|Space0F38|<sd:vexw>|Broadcast|Disp8MemShift=6|NoSuf|SAE, { RegZMM|<sd:elem>|Unspecified|BaseIndex, RegZMM }
 
 vrcp28p<sd>, 0x66CA, AVX512ER, Modrm|EVex512|Masking|Space0F38|<sd:vexw>|Broadcast|Disp8MemShift=6|NoSuf|SAE, { RegZMM|<sd:elem>|Unspecified|BaseIndex, RegZMM }
-vrcp28s<sd>, 0x66CB, AVX512ER, Modrm|EVexLIG|Masking|Space0F38|VexVVVV|<sd:vexw>|Disp8MemShift|NoSuf|SAE, { RegXMM|<sd:elem>|Unspecified|BaseIndex, RegXMM, RegXMM }
+vrcp28s<sd>, 0x66CB, AVX512ER, Modrm|EVexLIG|Masking|Space0F38|Src1VVVV|<sd:vexw>|Disp8MemShift|NoSuf|SAE, { RegXMM|<sd:elem>|Unspecified|BaseIndex, RegXMM, RegXMM }
 
 vrsqrt28p<sd>, 0x66CC, AVX512ER, Modrm|EVex512|Masking|Space0F38|<sd:vexw>|Broadcast|Disp8MemShift=6|NoSuf|SAE, { RegZMM|<sd:elem>|Unspecified|BaseIndex, RegZMM }
-vrsqrt28s<sd>, 0x66CD, AVX512ER, Modrm|EVexLIG|Masking|Space0F38|VexVVVV|<sd:vexw>|Disp8MemShift|NoSuf|SAE, { RegXMM|<sd:elem>|Unspecified|BaseIndex, RegXMM, RegXMM }
+vrsqrt28s<sd>, 0x66CD, AVX512ER, Modrm|EVexLIG|Masking|Space0F38|Src1VVVV|<sd:vexw>|Disp8MemShift|NoSuf|SAE, { RegXMM|<sd:elem>|Unspecified|BaseIndex, RegXMM, RegXMM }
 
 // AVX512ER instructions end.
 
@@ -2635,102 +2637,102 @@ vpmovzxdq, 0x6635, AVX512VL, Modrm|EVex=3|Masking|Space0F38|VexW=1|Disp8MemShift
 
 // AVX512BW instructions.
 
-kadd<dq>, 0x<dq:kpfx>4a, AVX512BW, Modrm|Vex256|Space0F|VexVVVV|VexW1|NoSuf, { RegMask, RegMask, RegMask }
-kand<dq>, 0x<dq:kpfx>41, AVX512BW, Modrm|Vex256|Space0F|VexVVVV|VexW1|NoSuf, { RegMask, RegMask, RegMask }
-kandn<dq>, 0x<dq:kpfx>42, AVX512BW, Modrm|Vex256|Space0F|VexVVVV|VexW1|NoSuf|Optimize, { RegMask, RegMask, RegMask }
+kadd<dq>, 0x<dq:kpfx>4a, AVX512BW, Modrm|Vex256|Space0F|Src1VVVV|VexW1|NoSuf, { RegMask, RegMask, RegMask }
+kand<dq>, 0x<dq:kpfx>41, AVX512BW, Modrm|Vex256|Space0F|Src1VVVV|VexW1|NoSuf, { RegMask, RegMask, RegMask }
+kandn<dq>, 0x<dq:kpfx>42, AVX512BW, Modrm|Vex256|Space0F|Src1VVVV|VexW1|NoSuf|Optimize, { RegMask, RegMask, RegMask }
 kmov<dq>, 0x<dq:kpfx>90, APX_F(AVX512BW), Modrm|Vex128|EVex128|Space0F|VexW1|NoSuf, { RegMask|<dq:elem>|Unspecified|BaseIndex, RegMask }
 kmov<dq>, 0x<dq:kpfx>91, APX_F(AVX512BW), Modrm|Vex128|EVex128|Space0F|VexW1|NoSuf, { RegMask, <dq:elem>|Unspecified|BaseIndex }
 kmov<dq>, 0xf292, APX_F(AVX512BW), D|Modrm|Vex128|EVex128|Space0F|<dq:vexw64>|NoSuf, { <dq:gpr>, RegMask }
 knot<dq>, 0x<dq:kpfx>44, AVX512BW, Modrm|Vex128|Space0F|VexW1|NoSuf, { RegMask, RegMask }
-kor<dq>, 0x<dq:kpfx>45, AVX512BW, Modrm|Vex256|Space0F|VexVVVV|VexW1|NoSuf, { RegMask, RegMask, RegMask }
+kor<dq>, 0x<dq:kpfx>45, AVX512BW, Modrm|Vex256|Space0F|Src1VVVV|VexW1|NoSuf, { RegMask, RegMask, RegMask }
 kortest<dq>, 0x<dq:kpfx>98, AVX512BW, Modrm|Vex128|Space0F|VexW1|NoSuf, { RegMask, RegMask }
 ktest<dq>, 0x<dq:kpfx>99, AVX512BW, Modrm|Vex128|Space0F|VexW1|NoSuf, { RegMask, RegMask }
-kxnor<dq>, 0x<dq:kpfx>46, AVX512BW, Modrm|Vex256|Space0F|VexVVVV|VexW1|NoSuf, { RegMask, RegMask, RegMask }
-kxor<dq>, 0x<dq:kpfx>47, AVX512BW, Modrm|Vex256|Space0F|VexVVVV|VexW1|NoSuf|Optimize, { RegMask, RegMask, RegMask }
-kunpckdq, 0x4B, AVX512BW, Modrm|Vex256|Space0F|VexVVVV|VexW1|NoSuf, { RegMask, RegMask, RegMask }
-kunpckwd, 0x4B, AVX512BW, Modrm|Vex256|Space0F|VexVVVV|VexW0|NoSuf, { RegMask, RegMask, RegMask }
+kxnor<dq>, 0x<dq:kpfx>46, AVX512BW, Modrm|Vex256|Space0F|Src1VVVV|VexW1|NoSuf, { RegMask, RegMask, RegMask }
+kxor<dq>, 0x<dq:kpfx>47, AVX512BW, Modrm|Vex256|Space0F|Src1VVVV|VexW1|NoSuf|Optimize, { RegMask, RegMask, RegMask }
+kunpckdq, 0x4B, AVX512BW, Modrm|Vex256|Space0F|Src1VVVV|VexW1|NoSuf, { RegMask, RegMask, RegMask }
+kunpckwd, 0x4B, AVX512BW, Modrm|Vex256|Space0F|Src1VVVV|VexW0|NoSuf, { RegMask, RegMask, RegMask }
 kshiftl<dq>, 0x6633, AVX512BW, Modrm|Vex128|Space0F3A|<dq:vexw>|NoSuf, { Imm8, RegMask, RegMask }
 kshiftr<dq>, 0x6631, AVX512BW, Modrm|Vex128|Space0F3A|<dq:vexw>|NoSuf, { Imm8, RegMask, RegMask }
 
-vdbpsadbw, 0x6642, AVX512BW, Modrm|Masking|Space0F3A|VexVVVV|VexW0|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8|Imm8S, RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vdbpsadbw, 0x6642, AVX512BW, Modrm|Masking|Space0F3A|Src1VVVV|VexW0|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8|Imm8S, RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
 
 vmovdqu8, 0xF26F, AVX512BW, D|Modrm|Masking|Space0F|VexW=1|Disp8ShiftVL|CheckOperandSize|NoSuf|Optimize, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
 vmovdqu16, 0xF26F, AVX512BW, D|Modrm|Masking|Space0F|VexW=2|Disp8ShiftVL|CheckOperandSize|NoSuf|Optimize, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
 
 vpabs<bw>, 0x661c | <bw:opc>, AVX512BW, Modrm|Masking|Space0F38|VexWIG|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
-vpmaxsb, 0x663C, AVX512BW, Modrm|Masking|Space0F38|VexWIG|VexVVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpminsb, 0x6638, AVX512BW, Modrm|Masking|Space0F38|VexWIG|VexVVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpshufb, 0x6600, AVX512BW, Modrm|Masking|Space0F38|VexWIG|VexVVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-
-vpmaddubsw, 0x6604, AVX512BW, Modrm|Masking|Space0F38|VexWIG|VexVVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpmaxuw, 0x663E, AVX512BW, Modrm|Masking|VexWIG|Space0F38|VexVVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpminuw, 0x663A, AVX512BW, Modrm|Masking|VexWIG|Space0F38|VexVVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpmulhrsw, 0x660B, AVX512BW, Modrm|Masking|Space0F38|VexWIG|VexVVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-
-vpackssdw, 0x666B, AVX512BW, Modrm|Masking|Space0F|VexVVVV|VexW0|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpacksswb, 0x6663, AVX512BW, Modrm|Masking|Space0F|VexWIG|VexVVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpackuswb, 0x6667, AVX512BW, Modrm|Masking|Space0F|VexWIG|VexVVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpackusdw, 0x662B, AVX512BW, Modrm|Masking|Space0F38|VexVVVV|VexW0|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-
-vpadd<bw>, 0x66fc | <bw:opc>, AVX512BW, Modrm|Masking|Space0F|VexWIG|VexVVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpadds<bw>, 0x66ec | <bw:opc>, AVX512BW, Modrm|Masking|Space0F|VexWIG|VexVVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpaddus<bw>, 0x66dc | <bw:opc>, AVX512BW, Modrm|Masking|Space0F|VexWIG|VexVVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpavg<bw>, 0x66e0 | (3 * <bw:opc>), AVX512BW, Modrm|Masking|Space0F|VexWIG|VexVVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpmaxub, 0x66DE, AVX512BW, Modrm|Masking|Space0F|VexWIG|VexVVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpminub, 0x66DA, AVX512BW, Modrm|Masking|Space0F|VexWIG|VexVVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpsub<bw>, 0x66f8 | <bw:opc>, AVX512BW, Modrm|Masking|Space0F|VexWIG|VexVVVV|Disp8ShiftVL|CheckOperandSize|NoSuf|Optimize, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpsubs<bw>, 0x66e8 | <bw:opc>, AVX512BW, Modrm|Masking|Space0F|VexWIG|VexVVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpsubus<bw>, 0x66d8 | <bw:opc>, AVX512BW, Modrm|Masking|Space0F|VexWIG|VexVVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpunpckhbw, 0x6668, AVX512BW, Modrm|Masking|Space0F|VexWIG|VexVVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpunpcklbw, 0x6660, AVX512BW, Modrm|Masking|Space0F|VexWIG|VexVVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-
-vpmaxsw, 0x66EE, AVX512BW, Modrm|Masking|Space0F|VexWIG|VexVVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpminsw, 0x66EA, AVX512BW, Modrm|Masking|Space0F|VexWIG|VexVVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpmulhuw, 0x66E4, AVX512BW, Modrm|Masking|Space0F|VexWIG|VexVVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpmulhw, 0x66E5, AVX512BW, Modrm|Masking|Space0F|VexWIG|VexVVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpmullw, 0x66D5, AVX512BW, Modrm|Masking|Space0F|VexWIG|VexVVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpsllw, 0x6671/6, AVX512BW, Modrm|Masking|Space0F|VexWIG|VexVVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
-vpsllw, 0x66F1, AVX512BW, Modrm|Masking|Space0F|VexWIG|VexVVVV|Disp8MemShift=4|CheckOperandSize|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpsraw, 0x6671/4, AVX512BW, Modrm|Masking|Space0F|VexWIG|VexVVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
-vpsraw, 0x66E1, AVX512BW, Modrm|Masking|Space0F|VexWIG|VexVVVV|Disp8MemShift=4|CheckOperandSize|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpsrlw, 0x6671/2, AVX512BW, Modrm|Masking|Space0F|VexWIG|VexVVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
-vpsrlw, 0x66D1, AVX512BW, Modrm|Masking|Space0F|VexWIG|VexVVVV|Disp8MemShift=4|CheckOperandSize|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpunpckhwd, 0x6669, AVX512BW, Modrm|Masking|Space0F|VexWIG|VexVVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpunpcklwd, 0x6661, AVX512BW, Modrm|Masking|Space0F|VexWIG|VexVVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-
-vpalignr, 0x660F, AVX512BW, Modrm|Masking|Space0F3A|VexWIG|VexVVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-
-vpblendm<bw>, 0x6666, AVX512BW, Modrm|Masking|Space0F38|VexVVVV|<bw:vexw>|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpmaxsb, 0x663C, AVX512BW, Modrm|Masking|Space0F38|VexWIG|Src1VVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpminsb, 0x6638, AVX512BW, Modrm|Masking|Space0F38|VexWIG|Src1VVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpshufb, 0x6600, AVX512BW, Modrm|Masking|Space0F38|VexWIG|Src1VVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+
+vpmaddubsw, 0x6604, AVX512BW, Modrm|Masking|Space0F38|VexWIG|Src1VVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpmaxuw, 0x663E, AVX512BW, Modrm|Masking|VexWIG|Space0F38|Src1VVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpminuw, 0x663A, AVX512BW, Modrm|Masking|VexWIG|Space0F38|Src1VVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpmulhrsw, 0x660B, AVX512BW, Modrm|Masking|Space0F38|VexWIG|Src1VVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+
+vpackssdw, 0x666B, AVX512BW, Modrm|Masking|Space0F|Src1VVVV|VexW0|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpacksswb, 0x6663, AVX512BW, Modrm|Masking|Space0F|VexWIG|Src1VVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpackuswb, 0x6667, AVX512BW, Modrm|Masking|Space0F|VexWIG|Src1VVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpackusdw, 0x662B, AVX512BW, Modrm|Masking|Space0F38|Src1VVVV|VexW0|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+
+vpadd<bw>, 0x66fc | <bw:opc>, AVX512BW, Modrm|Masking|Space0F|VexWIG|Src1VVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpadds<bw>, 0x66ec | <bw:opc>, AVX512BW, Modrm|Masking|Space0F|VexWIG|Src1VVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpaddus<bw>, 0x66dc | <bw:opc>, AVX512BW, Modrm|Masking|Space0F|VexWIG|Src1VVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpavg<bw>, 0x66e0 | (3 * <bw:opc>), AVX512BW, Modrm|Masking|Space0F|VexWIG|Src1VVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpmaxub, 0x66DE, AVX512BW, Modrm|Masking|Space0F|VexWIG|Src1VVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpminub, 0x66DA, AVX512BW, Modrm|Masking|Space0F|VexWIG|Src1VVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpsub<bw>, 0x66f8 | <bw:opc>, AVX512BW, Modrm|Masking|Space0F|VexWIG|Src1VVVV|Disp8ShiftVL|CheckOperandSize|NoSuf|Optimize, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpsubs<bw>, 0x66e8 | <bw:opc>, AVX512BW, Modrm|Masking|Space0F|VexWIG|Src1VVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpsubus<bw>, 0x66d8 | <bw:opc>, AVX512BW, Modrm|Masking|Space0F|VexWIG|Src1VVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpunpckhbw, 0x6668, AVX512BW, Modrm|Masking|Space0F|VexWIG|Src1VVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpunpcklbw, 0x6660, AVX512BW, Modrm|Masking|Space0F|VexWIG|Src1VVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+
+vpmaxsw, 0x66EE, AVX512BW, Modrm|Masking|Space0F|VexWIG|Src1VVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpminsw, 0x66EA, AVX512BW, Modrm|Masking|Space0F|VexWIG|Src1VVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpmulhuw, 0x66E4, AVX512BW, Modrm|Masking|Space0F|VexWIG|Src1VVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpmulhw, 0x66E5, AVX512BW, Modrm|Masking|Space0F|VexWIG|Src1VVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpmullw, 0x66D5, AVX512BW, Modrm|Masking|Space0F|VexWIG|Src1VVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpsllw, 0x6671/6, AVX512BW, Modrm|Masking|Space0F|VexWIG|Src1VVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
+vpsllw, 0x66F1, AVX512BW, Modrm|Masking|Space0F|VexWIG|Src1VVVV|Disp8MemShift=4|CheckOperandSize|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpsraw, 0x6671/4, AVX512BW, Modrm|Masking|Space0F|VexWIG|Src1VVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
+vpsraw, 0x66E1, AVX512BW, Modrm|Masking|Space0F|VexWIG|Src1VVVV|Disp8MemShift=4|CheckOperandSize|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpsrlw, 0x6671/2, AVX512BW, Modrm|Masking|Space0F|VexWIG|Src1VVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
+vpsrlw, 0x66D1, AVX512BW, Modrm|Masking|Space0F|VexWIG|Src1VVVV|Disp8MemShift=4|CheckOperandSize|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpunpckhwd, 0x6669, AVX512BW, Modrm|Masking|Space0F|VexWIG|Src1VVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpunpcklwd, 0x6661, AVX512BW, Modrm|Masking|Space0F|VexWIG|Src1VVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+
+vpalignr, 0x660F, AVX512BW, Modrm|Masking|Space0F3A|VexWIG|Src1VVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+
+vpblendm<bw>, 0x6666, AVX512BW, Modrm|Masking|Space0F38|Src1VVVV|<bw:vexw>|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
 vpbroadcast<bw>, 0x6678 | <bw:opc>, AVX512BW, Modrm|Masking|Space0F38|VexW0|Disp8MemShift|NoSuf, { RegXMM|<bw:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
 vpbroadcast<bw>, 0x667a | <bw:opc>, AVX512BW, Modrm|Masking|Space0F38|VexW0|NoSuf, { Reg32, RegXMM|RegYMM|RegZMM }
 
-vpermi2<bw>, 0x6675, <bw:cpubmi>, Modrm|Masking|Space0F38|VexVVVV|<bw:vexw>|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpermt2<bw>, 0x667d, <bw:cpubmi>, Modrm|Masking|Space0F38|VexVVVV|<bw:vexw>|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vperm<bw>, 0x668d, <bw:cpubmi>, Modrm|Masking|Space0F38|VexVVVV|<bw:vexw>|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpsllvw, 0x6612, AVX512BW, Modrm|Masking|Space0F38|VexVVVV|VexW1|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpsravw, 0x6611, AVX512BW, Modrm|Masking|Space0F38|VexVVVV|VexW1|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpsrlvw, 0x6610, AVX512BW, Modrm|Masking|Space0F38|VexVVVV|VexW1|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpermi2<bw>, 0x6675, <bw:cpubmi>, Modrm|Masking|Space0F38|Src1VVVV|<bw:vexw>|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpermt2<bw>, 0x667d, <bw:cpubmi>, Modrm|Masking|Space0F38|Src1VVVV|<bw:vexw>|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vperm<bw>, 0x668d, <bw:cpubmi>, Modrm|Masking|Space0F38|Src1VVVV|<bw:vexw>|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpsllvw, 0x6612, AVX512BW, Modrm|Masking|Space0F38|Src1VVVV|VexW1|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpsravw, 0x6611, AVX512BW, Modrm|Masking|Space0F38|Src1VVVV|VexW1|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpsrlvw, 0x6610, AVX512BW, Modrm|Masking|Space0F38|Src1VVVV|VexW1|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
 
-vpcmpeq<bw>, 0x6674 | <bw:opc>, AVX512BW, Modrm|Masking|Space0F|VexWIG|VexVVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegMask }
-vpcmpgt<bw>, 0x6664 | <bw:opc>, AVX512BW, Modrm|Masking|Space0F|VexWIG|VexVVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegMask }
-vpcmp<bw>, 0x663f, AVX512BW, Modrm|Masking|Space0F3A|VexVVVV|<bw:vexw>|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegMask }
-vpcmpu<bw>, 0x663e, AVX512BW, Modrm|Masking|Space0F3A|VexVVVV|<bw:vexw>|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegMask }
-vpcmp<irel><bw>, 0x663f/<irel:imm>, AVX512BW, Modrm|Masking|Space0F3A|VexVVVV|<bw:vexw>|Disp8ShiftVL|CheckOperandSize|NoSuf|ImmExt, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegMask }
-vpcmp<irel>u<bw>, 0x663e/<irel:imm>, AVX512BW, Modrm|Masking|Space0F3A|VexVVVV|<bw:vexw>|Disp8ShiftVL|CheckOperandSize|NoSuf|ImmExt, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegMask }
+vpcmpeq<bw>, 0x6674 | <bw:opc>, AVX512BW, Modrm|Masking|Space0F|VexWIG|Src1VVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegMask }
+vpcmpgt<bw>, 0x6664 | <bw:opc>, AVX512BW, Modrm|Masking|Space0F|VexWIG|Src1VVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegMask }
+vpcmp<bw>, 0x663f, AVX512BW, Modrm|Masking|Space0F3A|Src1VVVV|<bw:vexw>|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegMask }
+vpcmpu<bw>, 0x663e, AVX512BW, Modrm|Masking|Space0F3A|Src1VVVV|<bw:vexw>|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegMask }
+vpcmp<irel><bw>, 0x663f/<irel:imm>, AVX512BW, Modrm|Masking|Space0F3A|Src1VVVV|<bw:vexw>|Disp8ShiftVL|CheckOperandSize|NoSuf|ImmExt, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegMask }
+vpcmp<irel>u<bw>, 0x663e/<irel:imm>, AVX512BW, Modrm|Masking|Space0F3A|Src1VVVV|<bw:vexw>|Disp8ShiftVL|CheckOperandSize|NoSuf|ImmExt, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegMask }
 
-vpslldq, 0x6673/7, AVX512BW, Modrm|Space0F|VexWIG|VexVVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
-vpsrldq, 0x6673/3, AVX512BW, Modrm|Space0F|VexWIG|VexVVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
+vpslldq, 0x6673/7, AVX512BW, Modrm|Space0F|VexWIG|Src1VVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
+vpsrldq, 0x6673/3, AVX512BW, Modrm|Space0F|VexWIG|Src1VVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
 
 vpextrw, 0x66C5, AVX512BW, Load|Modrm|EVex128|Space0F|VexWIG|NoSuf, { Imm8, RegXMM, Reg32|Reg64 }
 vpextr<bw>, 0x6614 | <bw:opc>, AVX512BW, RegMem|EVex128|Space0F3A|VexWIG|NoSuf, { Imm8, RegXMM, Reg32|Reg64 }
 vpextr<bw>, 0x6614 | <bw:opc>, AVX512BW, Modrm|EVex128|Space0F3A|VexWIG|Disp8MemShift|NoSuf, { Imm8, RegXMM, <bw:elem>|Unspecified|BaseIndex }
 
-vpinsrw, 0x66C4, AVX512BW, Modrm|EVex128|Space0F|VexWIG|VexVVVV|NoSuf, { Imm8, Reg32|Reg64, RegXMM, RegXMM }
-vpinsrw, 0x66C4, AVX512BW, Modrm|EVex128|Space0F|VexWIG|VexVVVV|Disp8MemShift=1|NoSuf, { Imm8, Word|Unspecified|BaseIndex, RegXMM, RegXMM }
-vpinsrb, 0x6620, AVX512BW, Modrm|EVex128|Space0F3A|VexWIG|VexVVVV|NoSuf, { Imm8, Reg32|Reg64, RegXMM, RegXMM }
-vpinsrb, 0x6620, AVX512BW, Modrm|EVex128|Space0F3A|VexWIG|VexVVVV|NoSuf, { Imm8, Byte|Unspecified|BaseIndex, RegXMM, RegXMM }
+vpinsrw, 0x66C4, AVX512BW, Modrm|EVex128|Space0F|VexWIG|Src1VVVV|NoSuf, { Imm8, Reg32|Reg64, RegXMM, RegXMM }
+vpinsrw, 0x66C4, AVX512BW, Modrm|EVex128|Space0F|VexWIG|Src1VVVV|Disp8MemShift=1|NoSuf, { Imm8, Word|Unspecified|BaseIndex, RegXMM, RegXMM }
+vpinsrb, 0x6620, AVX512BW, Modrm|EVex128|Space0F3A|VexWIG|Src1VVVV|NoSuf, { Imm8, Reg32|Reg64, RegXMM, RegXMM }
+vpinsrb, 0x6620, AVX512BW, Modrm|EVex128|Space0F3A|VexWIG|Src1VVVV|NoSuf, { Imm8, Byte|Unspecified|BaseIndex, RegXMM, RegXMM }
 
-vpmaddwd, 0x66F5, AVX512BW, Modrm|Masking|Space0F|VexVVVV|VexWIG|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpmaddwd, 0x66F5, AVX512BW, Modrm|Masking|Space0F|Src1VVVV|VexWIG|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
 
 vpmov<bw>2m, 0xf329, AVX512BW, Modrm|EVexDYN|Space0F38|<bw:vexw>|NoSuf, { RegXMM|RegYMM|RegZMM, RegMask }
 vpmovm2<bw>, 0xf328, AVX512BW, Modrm|EVexDYN|Space0F38|<bw:vexw>|NoSuf, { RegMask, RegXMM|RegYMM|RegZMM }
@@ -2754,13 +2756,13 @@ vpmovzxbw, 0x6630, AVX512BW, Modrm|EVex=1|Masking|Space0F38|VexWIG|Disp8MemShift
 vpmovzxbw, 0x6630, AVX512BW&AVX512VL, Modrm|EVex=2|Masking|VexWIG|Space0F38|Disp8MemShift=3|NoSuf, { RegXMM|Qword|Unspecified|BaseIndex, RegXMM }
 vpmovzxbw, 0x6630, AVX512BW&AVX512VL, Modrm|EVex=3|Masking|VexWIG|Space0F38|Disp8MemShift=4|NoSuf, { RegXMM|Unspecified|BaseIndex, RegYMM }
 
-vpsadbw, 0x66F6, AVX512BW, Modrm|Space0F|VexVVVV|VexWIG|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpsadbw, 0x66F6, AVX512BW, Modrm|Space0F|Src1VVVV|VexWIG|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
 
 vpshufhw, 0xF370, AVX512BW, Modrm|Masking|Space0F|VexWIG|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8|Imm8S, RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
 vpshuflw, 0xF270, AVX512BW, Modrm|Masking|Space0F|VexWIG|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8|Imm8S, RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
 
-vptestm<bw>, 0x6626, AVX512BW, Modrm|Masking|Space0F38|VexVVVV|<bw:vexw>|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegMask }
-vptestnm<bw>, 0xf326, AVX512BW, Modrm|Masking|Space0F38|VexVVVV|<bw:vexw>|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegMask }
+vptestm<bw>, 0x6626, AVX512BW, Modrm|Masking|Space0F38|Src1VVVV|<bw:vexw>|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegMask }
+vptestnm<bw>, 0xf326, AVX512BW, Modrm|Masking|Space0F38|Src1VVVV|<bw:vexw>|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegMask }
 
 // AVX512BW instructions end.
 
@@ -2773,13 +2775,13 @@ vptestnm<bw>, 0xf326, AVX512BW, Modrm|Masking|Space0F38|VexVVVV|<bw:vexw>|Disp8S
     x:AVX512VL:EVex128|Disp8MemShift=4::ATTSyntax:RegXMM|Unspecified|BaseIndex, +
     y:AVX512VL:EVex256|Disp8MemShift=5::ATTSyntax:RegYMM|Unspecified|BaseIndex>
 
-kadd<bw>, 0x<bw:kpfx>4A, AVX512DQ, Modrm|Vex256|Space0F|VexVVVV|VexW0|NoSuf, { RegMask, RegMask, RegMask }
+kadd<bw>, 0x<bw:kpfx>4A, AVX512DQ, Modrm|Vex256|Space0F|Src1VVVV|VexW0|NoSuf, { RegMask, RegMask, RegMask }
 ktest<bw>, 0x<bw:kpfx>99, AVX512DQ, Modrm|Vex128|Space0F|VexW0|NoSuf, { RegMask, RegMask }
 
-vandnp<sd>, 0x<sd:ppfx>55, AVX512DQ, Modrm|Masking|Space0F|VexVVVV|<sd:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf|Optimize, { RegXMM|RegYMM|RegZMM|<sd:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vandp<sd>, 0x<sd:ppfx>54, AVX512DQ, Modrm|Masking|Space0F|VexVVVV|<sd:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|<sd:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vorp<sd>, 0x<sd:ppfx>56, AVX512DQ, Modrm|Masking|Space0F|VexVVVV|<sd:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|<sd:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vxorp<sd>, 0x<sd:ppfx>57, AVX512DQ, Modrm|Masking|Space0F|VexVVVV|<sd:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf|Optimize, { RegXMM|RegYMM|RegZMM|<sd:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vandnp<sd>, 0x<sd:ppfx>55, AVX512DQ, Modrm|Masking|Space0F|Src1VVVV|<sd:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf|Optimize, { RegXMM|RegYMM|RegZMM|<sd:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vandp<sd>, 0x<sd:ppfx>54, AVX512DQ, Modrm|Masking|Space0F|Src1VVVV|<sd:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|<sd:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vorp<sd>, 0x<sd:ppfx>56, AVX512DQ, Modrm|Masking|Space0F|Src1VVVV|<sd:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|<sd:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vxorp<sd>, 0x<sd:ppfx>57, AVX512DQ, Modrm|Masking|Space0F|Src1VVVV|<sd:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf|Optimize, { RegXMM|RegYMM|RegZMM|<sd:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
 
 vbroadcastf32x2, 0x6619, AVX512DQ, Modrm|Masking|Space0F38|VexW0|Disp8MemShift=3|NoSuf, { RegXMM|Qword|Unspecified|BaseIndex, RegYMM|RegZMM }
 vbroadcastf32x8, 0x661B, AVX512DQ, Modrm|EVex=1|Masking|Space0F38|VexW=1|Disp8MemShift=5|NoSuf, { YMMword|Unspecified|BaseIndex, RegZMM }
@@ -2818,16 +2820,16 @@ vcvtuqq2ps<Exy>, 0xf27a, AVX512DQ&<Exy:vl>, Modrm|<Exy:attr>|Masking|Space0F|Vex
 
 vextractf32x8, 0x661B, AVX512DQ, Modrm|EVex=1|Masking|Space0F3A|VexW=1|Disp8MemShift=5|NoSuf, { Imm8, RegZMM, RegYMM|Unspecified|BaseIndex }
 vextracti32x8, 0x663B, AVX512DQ, Modrm|EVex=1|Masking|Space0F3A|VexW=1|Disp8MemShift=5|NoSuf, { Imm8, RegZMM, RegYMM|Unspecified|BaseIndex }
-vinsertf32x8, 0x661A, AVX512DQ, Modrm|EVex512|Masking|Space0F3A|VexVVVV|VexW0|Disp8MemShift=5|NoSuf, { Imm8, RegYMM|Unspecified|BaseIndex, RegZMM, RegZMM }
-vinserti32x8, 0x663A, AVX512DQ, Modrm|EVex512|Masking|Space0F3A|VexVVVV|VexW0|Disp8MemShift=5|NoSuf, { Imm8, RegYMM|Unspecified|BaseIndex, RegZMM, RegZMM }
+vinsertf32x8, 0x661A, AVX512DQ, Modrm|EVex512|Masking|Space0F3A|Src1VVVV|VexW0|Disp8MemShift=5|NoSuf, { Imm8, RegYMM|Unspecified|BaseIndex, RegZMM, RegZMM }
+vinserti32x8, 0x663A, AVX512DQ, Modrm|EVex512|Masking|Space0F3A|Src1VVVV|VexW0|Disp8MemShift=5|NoSuf, { Imm8, RegYMM|Unspecified|BaseIndex, RegZMM, RegZMM }
 
 vpextr<dq>, 0x6616, AVX512DQ&<dq:cpu64>, Modrm|EVex128|Space0F3A|<dq:vexw64>|Disp8MemShift|NoSuf, { Imm8, RegXMM, <dq:gpr>|Unspecified|BaseIndex }
-vpinsr<dq>, 0x6622, AVX512DQ&<dq:cpu64>, Modrm|EVex128|Space0F3A|VexVVVV|<dq:vexw64>|Disp8MemShift|NoSuf, { Imm8, <dq:gpr>|Unspecified|BaseIndex, RegXMM, RegXMM }
+vpinsr<dq>, 0x6622, AVX512DQ&<dq:cpu64>, Modrm|EVex128|Space0F3A|Src1VVVV|<dq:vexw64>|Disp8MemShift|NoSuf, { Imm8, <dq:gpr>|Unspecified|BaseIndex, RegXMM, RegXMM }
 
 vextractf64x2, 0x6619, AVX512DQ, Modrm|Masking|Space0F3A|VexW=2|Disp8MemShift=4|NoSuf, { Imm8, RegYMM|RegZMM, RegXMM|Unspecified|BaseIndex }
 vextracti64x2, 0x6639, AVX512DQ, Modrm|Masking|Space0F3A|VexW=2|Disp8MemShift=4|NoSuf, { Imm8, RegYMM|RegZMM, RegXMM|Unspecified|BaseIndex }
-vinsertf64x2, 0x6618, AVX512DQ, Modrm|Masking|Space0F3A|VexVVVV|VexW1|Disp8MemShift=4|CheckOperandSize|NoSuf, { Imm8, RegXMM|Unspecified|BaseIndex, RegYMM|RegZMM, RegYMM|RegZMM }
-vinserti64x2, 0x6638, AVX512DQ, Modrm|Masking|Space0F3A|VexVVVV|VexW1|Disp8MemShift=4|CheckOperandSize|NoSuf, { Imm8, RegXMM|Unspecified|BaseIndex, RegYMM|RegZMM, RegYMM|RegZMM }
+vinsertf64x2, 0x6618, AVX512DQ, Modrm|Masking|Space0F3A|Src1VVVV|VexW1|Disp8MemShift=4|CheckOperandSize|NoSuf, { Imm8, RegXMM|Unspecified|BaseIndex, RegYMM|RegZMM, RegYMM|RegZMM }
+vinserti64x2, 0x6638, AVX512DQ, Modrm|Masking|Space0F3A|Src1VVVV|VexW1|Disp8MemShift=4|CheckOperandSize|NoSuf, { Imm8, RegXMM|Unspecified|BaseIndex, RegYMM|RegZMM, RegYMM|RegZMM }
 
 vfpclassp<sd>, 0x6666, AVX512DQ, Modrm|Masking|Space0F3A|<sd:vexw>|Broadcast|Disp8ShiftVL|NoSuf|IntelSyntax, { Imm8|Imm8S, RegXMM|RegYMM|RegZMM|<sd:elem>|Unspecified|BaseIndex, RegMask }
 vfpclassp<sd>, 0x6666, AVX512DQ, Modrm|Masking|Space0F3A|<sd:vexw>|Broadcast|Disp8ShiftVL|NoSuf|ATTSyntax, { Imm8|Imm8S, RegXMM|RegYMM|RegZMM|<sd:elem>|BaseIndex, RegMask }
@@ -2839,13 +2841,13 @@ vfpclasss<sdh>, 0x<sdh:pfx>67, <sdh:cpudq>, Modrm|EVexLIG|Masking|Space0F3A|<sdh
 vpmov<dq>2m, 0xf339, AVX512DQ, Modrm|EVexDYN|Space0F38|<dq:vexw>|NoSuf, { RegXMM|RegYMM|RegZMM, RegMask }
 vpmovm2<dq>, 0xf338, AVX512DQ, Modrm|EVexDYN|Space0F38|<dq:vexw>|NoSuf, { RegMask, RegXMM|RegYMM|RegZMM }
 
-vpmullq, 0x6640, AVX512DQ, Modrm|Masking|Space0F38|VexVVVV|VexW1|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpmullq, 0x6640, AVX512DQ, Modrm|Masking|Space0F38|Src1VVVV|VexW1|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
 
-vrangep<sd>, 0x6650, AVX512DQ, Modrm|Masking|Space0F3A|VexVVVV|<sd:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf|SAE, { Imm8, RegXMM|RegYMM|RegZMM|<sd:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vranges<sd>, 0x6651, AVX512DQ, Modrm|EVexLIG|Masking|Space0F3A|VexVVVV|<sd:vexw>|Disp8MemShift|NoSuf|SAE, { Imm8, RegXMM|<sd:elem>|Unspecified|BaseIndex, RegXMM, RegXMM }
+vrangep<sd>, 0x6650, AVX512DQ, Modrm|Masking|Space0F3A|Src1VVVV|<sd:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf|SAE, { Imm8, RegXMM|RegYMM|RegZMM|<sd:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vranges<sd>, 0x6651, AVX512DQ, Modrm|EVexLIG|Masking|Space0F3A|Src1VVVV|<sd:vexw>|Disp8MemShift|NoSuf|SAE, { Imm8, RegXMM|<sd:elem>|Unspecified|BaseIndex, RegXMM, RegXMM }
 
 vreducep<sdh>, 0x<sdh:pfx>56, <sdh:cpudq>, Modrm|Masking|Space0F3A|<sdh:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf|SAE, { Imm8, RegXMM|RegYMM|RegZMM|<sdh:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
-vreduces<sdh>, 0x<sdh:pfx>57, <sdh:cpudq>, Modrm|EVexLIG|Masking|Space0F3A|VexVVVV|<sdh:vexw>|Disp8MemShift|NoSuf|SAE, { Imm8, RegXMM|<sdh:elem>|Unspecified|BaseIndex, RegXMM, RegXMM }
+vreduces<sdh>, 0x<sdh:pfx>57, <sdh:cpudq>, Modrm|EVexLIG|Masking|Space0F3A|Src1VVVV|<sdh:vexw>|Disp8MemShift|NoSuf|SAE, { Imm8, RegXMM|<sdh:elem>|Unspecified|BaseIndex, RegXMM, RegXMM }
 
 // AVX512DQ instructions end.
 
@@ -2857,37 +2859,37 @@ clwb, 0x660fae/6, CLWB, Modrm|Anysize|IgnoreSize|NoSuf, { BaseIndex }
 
 // AVX512IFMA instructions
 
-vpmadd52huq, 0x66B5, AVX512IFMA, Modrm|Masking|Space0F38|VexVVVV|VexW1|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpmadd52luq, 0x66B4, AVX512IFMA, Modrm|Masking|Space0F38|VexVVVV|VexW1|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpmadd52huq, 0x66B5, AVX512IFMA, Modrm|Masking|Space0F38|Src1VVVV|VexW1|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpmadd52luq, 0x66B4, AVX512IFMA, Modrm|Masking|Space0F38|Src1VVVV|VexW1|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
 
 // AVX512IFMA instructions end
 
 // AVX-IFMA instructions.
 
-vpmadd52huq, 0x66B5, AVX_IFMA, Modrm|Vex|Space0F38|VexVVVV|VexW1|CheckOperandSize|NoSuf, { RegXMM|RegYMM|Unspecified|BaseIndex, RegXMM|RegYMM, RegXMM|RegYMM }
-vpmadd52luq, 0x66B4, AVX_IFMA, Modrm|Vex|Space0F38|VexVVVV|VexW1|CheckOperandSize|NoSuf, { RegXMM|RegYMM|Unspecified|BaseIndex, RegXMM|RegYMM, RegXMM|RegYMM }
+vpmadd52huq, 0x66B5, AVX_IFMA, Modrm|Vex|Space0F38|Src1VVVV|VexW1|CheckOperandSize|NoSuf, { RegXMM|RegYMM|Unspecified|BaseIndex, RegXMM|RegYMM, RegXMM|RegYMM }
+vpmadd52luq, 0x66B4, AVX_IFMA, Modrm|Vex|Space0F38|Src1VVVV|VexW1|CheckOperandSize|NoSuf, { RegXMM|RegYMM|Unspecified|BaseIndex, RegXMM|RegYMM, RegXMM|RegYMM }
 
 // AVX-IFMA instructions end.
 
 // AVX512VBMI instructions
 
-vpmultishiftqb, 0x6683, AVX512VBMI, Modrm|Masking|Space0F38|VexVVVV|VexW1|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpmultishiftqb, 0x6683, AVX512VBMI, Modrm|Masking|Space0F38|Src1VVVV|VexW1|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
 
 // AVX512VBMI instructions end
 
 // AVX512_4FMAPS instructions
 
-v4fmaddps, 0xf29a, AVX512_4FMAPS, Modrm|EVex=1|Masking|Space0F38|VexVVVV|VexW0|Disp8MemShift=4|NoSuf|ImplicitQuadGroup, { XMMword|Unspecified|BaseIndex, RegZMM, RegZMM }
-v4fnmaddps, 0xf2aa, AVX512_4FMAPS, Modrm|EVex=1|Masking|Space0F38|VexVVVV|VexW0|Disp8MemShift=4|NoSuf|ImplicitQuadGroup, { XMMword|Unspecified|BaseIndex, RegZMM, RegZMM }
-v4fmaddss, 0xf29b, AVX512_4FMAPS, Modrm|EVex=4|Masking|Space0F38|VexVVVV|VexW0|Disp8MemShift=4|NoSuf|ImplicitQuadGroup, { XMMword|Unspecified|BaseIndex, RegXMM, RegXMM }
-v4fnmaddss, 0xf2ab, AVX512_4FMAPS, Modrm|EVex=4|Masking|Space0F38|VexVVVV|VexW0|Disp8MemShift=4|NoSuf|ImplicitQuadGroup, { XMMword|Unspecified|BaseIndex, RegXMM, RegXMM }
+v4fmaddps, 0xf29a, AVX512_4FMAPS, Modrm|EVex=1|Masking|Space0F38|Src1VVVV|VexW0|Disp8MemShift=4|NoSuf|ImplicitQuadGroup, { XMMword|Unspecified|BaseIndex, RegZMM, RegZMM }
+v4fnmaddps, 0xf2aa, AVX512_4FMAPS, Modrm|EVex=1|Masking|Space0F38|Src1VVVV|VexW0|Disp8MemShift=4|NoSuf|ImplicitQuadGroup, { XMMword|Unspecified|BaseIndex, RegZMM, RegZMM }
+v4fmaddss, 0xf29b, AVX512_4FMAPS, Modrm|EVex=4|Masking|Space0F38|Src1VVVV|VexW0|Disp8MemShift=4|NoSuf|ImplicitQuadGroup, { XMMword|Unspecified|BaseIndex, RegXMM, RegXMM }
+v4fnmaddss, 0xf2ab, AVX512_4FMAPS, Modrm|EVex=4|Masking|Space0F38|Src1VVVV|VexW0|Disp8MemShift=4|NoSuf|ImplicitQuadGroup, { XMMword|Unspecified|BaseIndex, RegXMM, RegXMM }
 
 // AVX512_4FMAPS instructions end
 
 // AVX512_4VNNIW instructions
 
-vp4dpwssd, 0xf252, AVX512_4VNNIW, Modrm|EVex=1|Masking|Space0F38|VexVVVV|VexW0|Disp8MemShift=4|NoSuf|ImplicitQuadGroup, { XMMword|Unspecified|BaseIndex, RegZMM, RegZMM }
-vp4dpwssds, 0xf253, AVX512_4VNNIW, Modrm|EVex=1|Masking|Space0F38|VexVVVV|VexW0|Disp8MemShift=4|NoSuf|ImplicitQuadGroup, { XMMword|Unspecified|BaseIndex, RegZMM, RegZMM }
+vp4dpwssd, 0xf252, AVX512_4VNNIW, Modrm|EVex=1|Masking|Space0F38|Src1VVVV|VexW0|Disp8MemShift=4|NoSuf|ImplicitQuadGroup, { XMMword|Unspecified|BaseIndex, RegZMM, RegZMM }
+vp4dpwssds, 0xf253, AVX512_4VNNIW, Modrm|EVex=1|Masking|Space0F38|Src1VVVV|VexW0|Disp8MemShift=4|NoSuf|ImplicitQuadGroup, { XMMword|Unspecified|BaseIndex, RegZMM, RegZMM }
 
 // AVX512_4VNNIW instructions end
 
@@ -2905,59 +2907,59 @@ vpcompressw, 0x6663, AVX512_VBMI2, Modrm|Masking|Space0F38|VexW=2|Disp8MemShift=
 vpexpandb, 0x6662, AVX512_VBMI2, Modrm|Masking|Space0F38|VexW=1|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
 vpexpandw, 0x6662, AVX512_VBMI2, Modrm|Masking|Space0F38|VexW=2|Disp8MemShift=1|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
 
-vpshldv<dq>, 0x6671, AVX512_VBMI2, Modrm|Masking|Space0F38|VexVVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpshldvw, 0x6670, AVX512_VBMI2, Modrm|Masking|Space0F38|VexVVVV|VexW1|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpshldv<dq>, 0x6671, AVX512_VBMI2, Modrm|Masking|Space0F38|Src1VVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpshldvw, 0x6670, AVX512_VBMI2, Modrm|Masking|Space0F38|Src1VVVV|VexW1|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
 
-vpshrdv<dq>, 0x6673, AVX512_VBMI2, Modrm|Masking|Space0F38|VexVVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpshrdvw, 0x6672, AVX512_VBMI2, Modrm|Masking|Space0F38|VexVVVV|VexW1|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpshrdv<dq>, 0x6673, AVX512_VBMI2, Modrm|Masking|Space0F38|Src1VVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpshrdvw, 0x6672, AVX512_VBMI2, Modrm|Masking|Space0F38|Src1VVVV|VexW1|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
 
-vpshld<dq>, 0x6671, AVX512_VBMI2, Modrm|Masking|Space0F3A|VexVVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpshldw, 0x6670, AVX512_VBMI2, Modrm|Masking|Space0F3A|VexVVVV|VexW1|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpshld<dq>, 0x6671, AVX512_VBMI2, Modrm|Masking|Space0F3A|Src1VVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpshldw, 0x6670, AVX512_VBMI2, Modrm|Masking|Space0F3A|Src1VVVV|VexW1|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
 
-vpshrd<dq>, 0x6673, AVX512_VBMI2, Modrm|Masking|Space0F3A|VexVVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpshrdw, 0x6672, AVX512_VBMI2, Modrm|Masking|Space0F3A|VexVVVV|VexW1|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpshrd<dq>, 0x6673, AVX512_VBMI2, Modrm|Masking|Space0F3A|Src1VVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpshrdw, 0x6672, AVX512_VBMI2, Modrm|Masking|Space0F3A|Src1VVVV|VexW1|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
 
 // AVX512_VBMI2 instructions end
 
 // AVX512_VNNI instructions
 
-vpdpbusd, 0x6650, AVX512_VNNI, Modrm|Masking|Space0F38|VexVVVV|VexW0|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpdpwssd, 0x6652, AVX512_VNNI, Modrm|Masking|Space0F38|VexVVVV|VexW0|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpdpbusd, 0x6650, AVX512_VNNI, Modrm|Masking|Space0F38|Src1VVVV|VexW0|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpdpwssd, 0x6652, AVX512_VNNI, Modrm|Masking|Space0F38|Src1VVVV|VexW0|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
 
-vpdpbusds, 0x6651, AVX512_VNNI, Modrm|Masking|Space0F38|VexVVVV|VexW0|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpdpwssds, 0x6653, AVX512_VNNI, Modrm|Masking|Space0F38|VexVVVV|VexW0|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpdpbusds, 0x6651, AVX512_VNNI, Modrm|Masking|Space0F38|Src1VVVV|VexW0|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpdpwssds, 0x6653, AVX512_VNNI, Modrm|Masking|Space0F38|Src1VVVV|VexW0|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
 
 // AVX512_VNNI instructions end
 
 // AVX_VNNI instructions
 
-vpdpbusd, 0x6650, AVX_VNNI, Modrm|Vex|Space0F38|VexVVVV|VexW0|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpdpwssd, 0x6652, AVX_VNNI, Modrm|Vex|Space0F38|VexVVVV|VexW0|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpdpbusd, 0x6650, AVX_VNNI, Modrm|Vex|Space0F38|Src1VVVV|VexW0|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpdpwssd, 0x6652, AVX_VNNI, Modrm|Vex|Space0F38|Src1VVVV|VexW0|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
 
-vpdpbusds, 0x6651, AVX_VNNI, Modrm|Vex|Space0F38|VexVVVV|VexW0|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpdpwssds, 0x6653, AVX_VNNI, Modrm|Vex|Space0F38|VexVVVV|VexW0|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpdpbusds, 0x6651, AVX_VNNI, Modrm|Vex|Space0F38|Src1VVVV|VexW0|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpdpwssds, 0x6653, AVX_VNNI, Modrm|Vex|Space0F38|Src1VVVV|VexW0|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
 
 // AVX_VNNI instructions end
 
 // AVX-VNNI-INT8 instructions.
 
-vpdpbuud, 0x50, AVX_VNNI_INT8, Modrm|Vex|Space0F38|VexVVVV|VexW0|CheckOperandSize|NoSuf, { RegXMM|RegYMM|Unspecified|BaseIndex, RegXMM|RegYMM, RegXMM|RegYMM }
-vpdpbuuds, 0x51, AVX_VNNI_INT8, Modrm|Vex|Space0F38|VexVVVV|VexW0|CheckOperandSize|NoSuf, { RegXMM|RegYMM|Unspecified|BaseIndex, RegXMM|RegYMM, RegXMM|RegYMM }
-vpdpbssd, 0xf250, AVX_VNNI_INT8, Modrm|Vex|Space0F38|VexVVVV|VexW0|CheckOperandSize|NoSuf, { RegXMM|RegYMM|Unspecified|BaseIndex, RegXMM|RegYMM, RegXMM|RegYMM }
-vpdpbssds, 0xf251, AVX_VNNI_INT8, Modrm|Vex|Space0F38|VexVVVV|VexW0|CheckOperandSize|NoSuf, { RegXMM|RegYMM|Unspecified|BaseIndex, RegXMM|RegYMM, RegXMM|RegYMM }
-vpdpbsud, 0xf350, AVX_VNNI_INT8, Modrm|Vex|Space0F38|VexVVVV|VexW0|CheckOperandSize|NoSuf, { RegXMM|RegYMM|Unspecified|BaseIndex, RegXMM|RegYMM, RegXMM|RegYMM }
-vpdpbsuds, 0xf351, AVX_VNNI_INT8, Modrm|Vex|Space0F38|VexVVVV|VexW0|CheckOperandSize|NoSuf, { RegXMM|RegYMM|Unspecified|BaseIndex, RegXMM|RegYMM, RegXMM|RegYMM }
+vpdpbuud, 0x50, AVX_VNNI_INT8, Modrm|Vex|Space0F38|Src1VVVV|VexW0|CheckOperandSize|NoSuf, { RegXMM|RegYMM|Unspecified|BaseIndex, RegXMM|RegYMM, RegXMM|RegYMM }
+vpdpbuuds, 0x51, AVX_VNNI_INT8, Modrm|Vex|Space0F38|Src1VVVV|VexW0|CheckOperandSize|NoSuf, { RegXMM|RegYMM|Unspecified|BaseIndex, RegXMM|RegYMM, RegXMM|RegYMM }
+vpdpbssd, 0xf250, AVX_VNNI_INT8, Modrm|Vex|Space0F38|Src1VVVV|VexW0|CheckOperandSize|NoSuf, { RegXMM|RegYMM|Unspecified|BaseIndex, RegXMM|RegYMM, RegXMM|RegYMM }
+vpdpbssds, 0xf251, AVX_VNNI_INT8, Modrm|Vex|Space0F38|Src1VVVV|VexW0|CheckOperandSize|NoSuf, { RegXMM|RegYMM|Unspecified|BaseIndex, RegXMM|RegYMM, RegXMM|RegYMM }
+vpdpbsud, 0xf350, AVX_VNNI_INT8, Modrm|Vex|Space0F38|Src1VVVV|VexW0|CheckOperandSize|NoSuf, { RegXMM|RegYMM|Unspecified|BaseIndex, RegXMM|RegYMM, RegXMM|RegYMM }
+vpdpbsuds, 0xf351, AVX_VNNI_INT8, Modrm|Vex|Space0F38|Src1VVVV|VexW0|CheckOperandSize|NoSuf, { RegXMM|RegYMM|Unspecified|BaseIndex, RegXMM|RegYMM, RegXMM|RegYMM }
 
 // AVX-VNNI-INT8 instructions end.
 
 // AVX-VNNI-INT16 instructions.
 
-vpdpwuud, 0xd2, AVX_VNNI_INT16, Modrm|Vex|Space0F38|VexVVVV|VexW0|CheckOperandSize|NoSuf, { RegXMM|RegYMM|Unspecified|BaseIndex, RegXMM|RegYMM, RegXMM|RegYMM }
-vpdpwuuds, 0xd3, AVX_VNNI_INT16, Modrm|Vex|Space0F38|VexVVVV|VexW0|CheckOperandSize|NoSuf, { RegXMM|RegYMM|Unspecified|BaseIndex, RegXMM|RegYMM, RegXMM|RegYMM }
-vpdpwusd, 0x66d2, AVX_VNNI_INT16, Modrm|Vex|Space0F38|VexVVVV|VexW0|CheckOperandSize|NoSuf, { RegXMM|RegYMM|Unspecified|BaseIndex, RegXMM|RegYMM, RegXMM|RegYMM }
-vpdpwusds, 0x66d3, AVX_VNNI_INT16, Modrm|Vex|Space0F38|VexVVVV|VexW0|CheckOperandSize|NoSuf, { RegXMM|RegYMM|Unspecified|BaseIndex, RegXMM|RegYMM, RegXMM|RegYMM }
-vpdpwsud, 0xf3d2, AVX_VNNI_INT16, Modrm|Vex|Space0F38|VexVVVV|VexW0|CheckOperandSize|NoSuf, { RegXMM|RegYMM|Unspecified|BaseIndex, RegXMM|RegYMM, RegXMM|RegYMM }
-vpdpwsuds, 0xf3d3, AVX_VNNI_INT16, Modrm|Vex|Space0F38|VexVVVV|VexW0|CheckOperandSize|NoSuf, { RegXMM|RegYMM|Unspecified|BaseIndex, RegXMM|RegYMM, RegXMM|RegYMM }
+vpdpwuud, 0xd2, AVX_VNNI_INT16, Modrm|Vex|Space0F38|Src1VVVV|VexW0|CheckOperandSize|NoSuf, { RegXMM|RegYMM|Unspecified|BaseIndex, RegXMM|RegYMM, RegXMM|RegYMM }
+vpdpwuuds, 0xd3, AVX_VNNI_INT16, Modrm|Vex|Space0F38|Src1VVVV|VexW0|CheckOperandSize|NoSuf, { RegXMM|RegYMM|Unspecified|BaseIndex, RegXMM|RegYMM, RegXMM|RegYMM }
+vpdpwusd, 0x66d2, AVX_VNNI_INT16, Modrm|Vex|Space0F38|Src1VVVV|VexW0|CheckOperandSize|NoSuf, { RegXMM|RegYMM|Unspecified|BaseIndex, RegXMM|RegYMM, RegXMM|RegYMM }
+vpdpwusds, 0x66d3, AVX_VNNI_INT16, Modrm|Vex|Space0F38|Src1VVVV|VexW0|CheckOperandSize|NoSuf, { RegXMM|RegYMM|Unspecified|BaseIndex, RegXMM|RegYMM, RegXMM|RegYMM }
+vpdpwsud, 0xf3d2, AVX_VNNI_INT16, Modrm|Vex|Space0F38|Src1VVVV|VexW0|CheckOperandSize|NoSuf, { RegXMM|RegYMM|Unspecified|BaseIndex, RegXMM|RegYMM, RegXMM|RegYMM }
+vpdpwsuds, 0xf3d3, AVX_VNNI_INT16, Modrm|Vex|Space0F38|Src1VVVV|VexW0|CheckOperandSize|NoSuf, { RegXMM|RegYMM|Unspecified|BaseIndex, RegXMM|RegYMM, RegXMM|RegYMM }
 
 // AVX-VNNI-INT16 instructions end.
 
@@ -2965,14 +2967,14 @@ vpdpwsuds, 0xf3d3, AVX_VNNI_INT16, Modrm|Vex|Space0F38|VexVVVV|VexW0|CheckOperan
 
 vpopcnt<bw>, 0x6654, AVX512_BITALG, Modrm|Masking|Space0F38|<bw:vexw>|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
 
-vpshufbitqmb, 0x668f, AVX512_BITALG, Modrm|Masking|Space0F38|VexVVVV|VexW0|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegMask }
+vpshufbitqmb, 0x668f, AVX512_BITALG, Modrm|Masking|Space0F38|Src1VVVV|VexW0|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegMask }
 
 // AVX512_BITALG instructions end
 
 // AVX512 + GFNI instructions
 
-vgf2p8affineinvqb, 0x66cf, GFNI&AVX512F, Modrm|Masking|Space0F3A|VexVVVV|VexW1|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vgf2p8affineqb, 0x66ce, GFNI&AVX512F, Modrm|Masking|Space0F3A|VexVVVV|VexW1|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vgf2p8affineinvqb, 0x66cf, GFNI&AVX512F, Modrm|Masking|Space0F3A|Src1VVVV|VexW1|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vgf2p8affineqb, 0x66ce, GFNI&AVX512F, Modrm|Masking|Space0F3A|Src1VVVV|VexW1|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
 
 // AVX512 + GFNI instructions end
 
@@ -3101,11 +3103,11 @@ movdir64b, 0x66f8, MOVDIR64B&APX_F, Modrm|AddrPrefixOpReg|NoSuf|EVexMap4, { Unsp
 
 // AVX512_BF16 instructions.
 
-vcvtne2ps2bf16, 0xf272, AVX512_BF16, Modrm|Space0F38|VexVVVV|Masking|VexW0|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vcvtne2ps2bf16, 0xf272, AVX512_BF16, Modrm|Space0F38|Src1VVVV|Masking|VexW0|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
 
 vcvtneps2bf16<Exy>, 0xf372, AVX512_BF16&<Exy:vl>, Modrm|Space0F38|<Exy:attr>|Masking|VexW0|Broadcast|NoSuf, { <Exy:src>|Dword, <Exy:dst> }
 
-vdpbf16ps, 0xf352, AVX512_BF16, Modrm|Space0F38|VexVVVV|Masking|VexW0|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vdpbf16ps, 0xf352, AVX512_BF16, Modrm|Space0F38|Src1VVVV|Masking|VexW0|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
 
 // AVX512_BF16 instructions end.
 
@@ -3132,7 +3134,7 @@ enqcmds, 0xf3f8, APX_F(ENQCMD), Modrm|AddrPrefixOpReg|NoSuf|EVexMap4, { Unspecif
 
 // VP2INTERSECT instructions.
 
-vp2intersect<dq>, 0xf268, AVX512_VP2INTERSECT, Modrm|Space0F38|VexVVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegMask }
+vp2intersect<dq>, 0xf268, AVX512_VP2INTERSECT, Modrm|Space0F38|Src1VVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegMask }
 
 // VP2INTERSECT instructions end.
 
@@ -3190,15 +3192,15 @@ xresldtrk, 0xf20f01e9, TSXLDTRK, NoSuf, {}
 ldtilecfg, 0x49/0, APX_F(AMX_TILE), Modrm|Vex128|EVex128|Space0F38|VexW0|NoSuf, { Unspecified|BaseIndex }
 sttilecfg, 0x6649/0, APX_F(AMX_TILE), Modrm|Vex128|EVex128|Space0F38|VexW0|NoSuf, { Unspecified|BaseIndex }
 
-tcmmimfp16ps, 0x666c, AMX_COMPLEX, Modrm|Vex128|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM }
-tcmmrlfp16ps, 0x6c, AMX_COMPLEX, Modrm|Vex128|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM }
+tcmmimfp16ps, 0x666c, AMX_COMPLEX, Modrm|Vex128|Space0F38|Src1VVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM }
+tcmmrlfp16ps, 0x6c, AMX_COMPLEX, Modrm|Vex128|Space0F38|Src1VVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM }
 
-tdpbf16ps, 0xf35c, AMX_BF16, Modrm|Vex128|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM }
-tdpfp16ps, 0xf25c, AMX_FP16, Modrm|Vex128|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM }
-tdpbssd, 0xf25e, AMX_INT8, Modrm|Vex128|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM }
-tdpbuud, 0x5e, AMX_INT8, Modrm|Vex128|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM }
-tdpbusd, 0x665e, AMX_INT8, Modrm|Vex128|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM }
-tdpbsud, 0xf35e, AMX_INT8, Modrm|Vex128|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM }
+tdpbf16ps, 0xf35c, AMX_BF16, Modrm|Vex128|Space0F38|Src1VVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM }
+tdpfp16ps, 0xf25c, AMX_FP16, Modrm|Vex128|Space0F38|Src1VVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM }
+tdpbssd, 0xf25e, AMX_INT8, Modrm|Vex128|Space0F38|Src1VVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM }
+tdpbuud, 0x5e, AMX_INT8, Modrm|Vex128|Space0F38|Src1VVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM }
+tdpbusd, 0x665e, AMX_INT8, Modrm|Vex128|Space0F38|Src1VVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM }
+tdpbsud, 0xf35e, AMX_INT8, Modrm|Vex128|Space0F38|Src1VVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM }
 
 tileloadd, 0xf24b, APX_F(AMX_TILE), Sibmem|Vex128|EVex128|Space0F38|VexW0|NoSuf, { Unspecified|BaseIndex, RegTMM }
 tileloaddt1, 0x664b, APX_F(AMX_TILE), Sibmem|Vex128|EVex128|Space0F38|VexW0|NoSuf, { Unspecified|BaseIndex, RegTMM }
@@ -3253,23 +3255,23 @@ hreset, 0xf30f3af0c0, HRESET, NoSuf, { Imm8 }
 
 // FP16 (HFNI) instructions.
 
-vfcmaddcph, 0xf256, AVX512_FP16, Modrm|VexVVVV|Masking|EVexMap6|VexW0|Broadcast|Disp8ShiftVL|CheckOperandSize|DistinctDest|NoSuf|StaticRounding|SAE, { RegXMM|RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vfcmaddcsh, 0xf257, AVX512_FP16, Modrm|EVexLIG|Masking|EVexMap6|VexVVVV|VexW0|Disp8MemShift=2|DistinctDest|NoSuf|StaticRounding|SAE, { RegXMM|Dword|Unspecified|BaseIndex, RegXMM, RegXMM }
+vfcmaddcph, 0xf256, AVX512_FP16, Modrm|Src1VVVV|Masking|EVexMap6|VexW0|Broadcast|Disp8ShiftVL|CheckOperandSize|DistinctDest|NoSuf|StaticRounding|SAE, { RegXMM|RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vfcmaddcsh, 0xf257, AVX512_FP16, Modrm|EVexLIG|Masking|EVexMap6|Src1VVVV|VexW0|Disp8MemShift=2|DistinctDest|NoSuf|StaticRounding|SAE, { RegXMM|Dword|Unspecified|BaseIndex, RegXMM, RegXMM }
 
-vfmaddcph, 0xf356, AVX512_FP16, Modrm|VexVVVV|Masking|EVexMap6|VexW0|Broadcast|Disp8ShiftVL|CheckOperandSize|DistinctDest|NoSuf|StaticRounding|SAE, { RegXMM|RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vfmaddcsh, 0xf357, AVX512_FP16, Modrm|EVexLIG|Masking|EVexMap6|VexVVVV|VexW0|Disp8MemShift=2|DistinctDest|NoSuf|StaticRounding|SAE, { RegXMM|Dword|Unspecified|BaseIndex, RegXMM, RegXMM }
+vfmaddcph, 0xf356, AVX512_FP16, Modrm|Src1VVVV|Masking|EVexMap6|VexW0|Broadcast|Disp8ShiftVL|CheckOperandSize|DistinctDest|NoSuf|StaticRounding|SAE, { RegXMM|RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vfmaddcsh, 0xf357, AVX512_FP16, Modrm|EVexLIG|Masking|EVexMap6|Src1VVVV|VexW0|Disp8MemShift=2|DistinctDest|NoSuf|StaticRounding|SAE, { RegXMM|Dword|Unspecified|BaseIndex, RegXMM, RegXMM }
 
-vfcmulcph, 0xf2d6, AVX512_FP16, Modrm|VexVVVV|Masking|EVexMap6|VexW0|Broadcast|Disp8ShiftVL|CheckOperandSize|DistinctDest|NoSuf|StaticRounding|SAE, { RegXMM|RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vfcmulcsh, 0xf2d7, AVX512_FP16, Modrm|EVexLIG|Masking|EVexMap6|VexVVVV|VexW0|Disp8MemShift=2|DistinctDest|NoSuf|StaticRounding|SAE, { RegXMM|Dword|Unspecified|BaseIndex, RegXMM, RegXMM }
+vfcmulcph, 0xf2d6, AVX512_FP16, Modrm|Src1VVVV|Masking|EVexMap6|VexW0|Broadcast|Disp8ShiftVL|CheckOperandSize|DistinctDest|NoSuf|StaticRounding|SAE, { RegXMM|RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vfcmulcsh, 0xf2d7, AVX512_FP16, Modrm|EVexLIG|Masking|EVexMap6|Src1VVVV|VexW0|Disp8MemShift=2|DistinctDest|NoSuf|StaticRounding|SAE, { RegXMM|Dword|Unspecified|BaseIndex, RegXMM, RegXMM }
 
-vfmulcph, 0xf3d6, AVX512_FP16, Modrm|VexVVVV|Masking|EVexMap6|VexW0|Broadcast|Disp8ShiftVL|CheckOperandSize|DistinctDest|NoSuf|StaticRounding|SAE, { RegXMM|RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vfmulcsh, 0xf3d7, AVX512_FP16, Modrm|EVexLIG|Masking|EVexMap6|VexVVVV|VexW0|Disp8MemShift=2|DistinctDest|NoSuf|StaticRounding|SAE, { RegXMM|Dword|Unspecified|BaseIndex, RegXMM, RegXMM }
+vfmulcph, 0xf3d6, AVX512_FP16, Modrm|Src1VVVV|Masking|EVexMap6|VexW0|Broadcast|Disp8ShiftVL|CheckOperandSize|DistinctDest|NoSuf|StaticRounding|SAE, { RegXMM|RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vfmulcsh, 0xf3d7, AVX512_FP16, Modrm|EVexLIG|Masking|EVexMap6|Src1VVVV|VexW0|Disp8MemShift=2|DistinctDest|NoSuf|StaticRounding|SAE, { RegXMM|Dword|Unspecified|BaseIndex, RegXMM, RegXMM }
 
-vcmp<frel>ph, 0xc2/0x<frel:imm>, AVX512_FP16, Modrm|Masking|Space0F3A|VexVVVV|VexW0|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf|ImmExt|SAE, { RegXMM|RegYMM|RegZMM|Word|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegMask }
-vcmpph, 0xc2, AVX512_FP16, Modrm|Masking|Space0F3A|VexVVVV|VexW0|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf|SAE, { Imm8, RegXMM|RegYMM|RegZMM|Word|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegMask }
+vcmp<frel>ph, 0xc2/0x<frel:imm>, AVX512_FP16, Modrm|Masking|Space0F3A|Src1VVVV|VexW0|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf|ImmExt|SAE, { RegXMM|RegYMM|RegZMM|Word|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegMask }
+vcmpph, 0xc2, AVX512_FP16, Modrm|Masking|Space0F3A|Src1VVVV|VexW0|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf|SAE, { Imm8, RegXMM|RegYMM|RegZMM|Word|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegMask }
 
-vcmp<frel>sh, 0xf3c2/0x<frel:imm>, AVX512_FP16, Modrm|EVexLIG|Masking|Space0F3A|VexVVVV|VexW0|Disp8MemShift=1|NoSuf|ImmExt|SAE, { RegXMM|Word|Unspecified|BaseIndex, RegXMM, RegMask }
-vcmpsh, 0xf3c2, AVX512_FP16, Modrm|EVexLIG|Masking|Space0F3A|VexVVVV|VexW0|Disp8MemShift=1|NoSuf|SAE, { Imm8, RegXMM|Word|Unspecified|BaseIndex, RegXMM, RegMask }
+vcmp<frel>sh, 0xf3c2/0x<frel:imm>, AVX512_FP16, Modrm|EVexLIG|Masking|Space0F3A|Src1VVVV|VexW0|Disp8MemShift=1|NoSuf|ImmExt|SAE, { RegXMM|Word|Unspecified|BaseIndex, RegXMM, RegMask }
+vcmpsh, 0xf3c2, AVX512_FP16, Modrm|EVexLIG|Masking|Space0F3A|Src1VVVV|VexW0|Disp8MemShift=1|NoSuf|SAE, { Imm8, RegXMM|Word|Unspecified|BaseIndex, RegXMM, RegMask }
 
 vcvtdq2ph<Exy>, 0x5b, AVX512_FP16&<Exy:vl>, Modrm|<Exy:attr>|Masking|EVexMap5|VexW0|Broadcast|NoSuf|<Exy:sr>, { <Exy:src>|Dword, <Exy:dst> }
 vcvtudq2ph<Exy>, 0xf27a, AVX512_FP16&<Exy:vl>, Modrm|<Exy:attr>|Masking|EVexMap5|VexW0|Broadcast|NoSuf|<Exy:sr>, { <Exy:src>|Dword, <Exy:dst> }
@@ -3307,17 +3309,17 @@ vcvtph2pd, 0x5a, AVX512_FP16, Modrm|EVex512|Masking|EVexMap5|VexW0|Broadcast|Dis
 vcvtph2w, 0x667d, AVX512_FP16, Modrm|Masking|EVexMap5|VexW0|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf|StaticRounding|SAE, { RegXMM|RegYMM|RegZMM|Word|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
 vcvtph2uw, 0x7d, AVX512_FP16, Modrm|Masking|EVexMap5|VexW0|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf|StaticRounding|SAE, { RegXMM|RegYMM|RegZMM|Word|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
 
-vcvtsd2sh, 0xf25a, AVX512_FP16, Modrm|EVexLIG|Masking|EVexMap5|VexVVVV|VexW1|Disp8MemShift=3|NoSuf|StaticRounding|SAE, { RegXMM|Qword|Unspecified|BaseIndex, RegXMM, RegXMM }
-vcvtss2sh, 0x1d, AVX512_FP16, Modrm|EVexLIG|Masking|EVexMap5|VexVVVV|VexW0|Disp8MemShift=2|NoSuf|StaticRounding|SAE, { RegXMM|Dword|Unspecified|BaseIndex, RegXMM, RegXMM }
+vcvtsd2sh, 0xf25a, AVX512_FP16, Modrm|EVexLIG|Masking|EVexMap5|Src1VVVV|VexW1|Disp8MemShift=3|NoSuf|StaticRounding|SAE, { RegXMM|Qword|Unspecified|BaseIndex, RegXMM, RegXMM }
+vcvtss2sh, 0x1d, AVX512_FP16, Modrm|EVexLIG|Masking|EVexMap5|Src1VVVV|VexW0|Disp8MemShift=2|NoSuf|StaticRounding|SAE, { RegXMM|Dword|Unspecified|BaseIndex, RegXMM, RegXMM }
 
-vcvtsi2sh, 0xf32a, AVX512_FP16, Modrm|EVexLIG|EVexMap5|VexVVVV|Disp8ShiftVL|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|StaticRounding|SAE|ATTSyntax, { Reg32|Reg64|Unspecified|BaseIndex, RegXMM, RegXMM }
-vcvtsi2sh, 0xf32a, AVX512_FP16, Modrm|EVexLIG|EVexMap5|VexVVVV|Disp8ShiftVL|No_bSuf|No_wSuf|No_sSuf|StaticRounding|SAE|IntelSyntax, { Reg32|Reg64|Unspecified|BaseIndex, RegXMM, RegXMM }
+vcvtsi2sh, 0xf32a, AVX512_FP16, Modrm|EVexLIG|EVexMap5|Src1VVVV|Disp8ShiftVL|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|StaticRounding|SAE|ATTSyntax, { Reg32|Reg64|Unspecified|BaseIndex, RegXMM, RegXMM }
+vcvtsi2sh, 0xf32a, AVX512_FP16, Modrm|EVexLIG|EVexMap5|Src1VVVV|Disp8ShiftVL|No_bSuf|No_wSuf|No_sSuf|StaticRounding|SAE|IntelSyntax, { Reg32|Reg64|Unspecified|BaseIndex, RegXMM, RegXMM }
 
-vcvtusi2sh, 0xf37b, AVX512_FP16, Modrm|EVexLIG|EVexMap5|VexVVVV|Disp8ShiftVL|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|StaticRounding|SAE|ATTSyntax, { Reg32|Reg64|Unspecified|BaseIndex, RegXMM, RegXMM }
-vcvtusi2sh, 0xf37b, AVX512_FP16, Modrm|EVexLIG|EVexMap5|VexVVVV|Disp8ShiftVL|No_bSuf|No_wSuf|No_sSuf|StaticRounding|SAE|IntelSyntax, { Reg32|Reg64|Unspecified|BaseIndex, RegXMM, RegXMM }
+vcvtusi2sh, 0xf37b, AVX512_FP16, Modrm|EVexLIG|EVexMap5|Src1VVVV|Disp8ShiftVL|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|StaticRounding|SAE|ATTSyntax, { Reg32|Reg64|Unspecified|BaseIndex, RegXMM, RegXMM }
+vcvtusi2sh, 0xf37b, AVX512_FP16, Modrm|EVexLIG|EVexMap5|Src1VVVV|Disp8ShiftVL|No_bSuf|No_wSuf|No_sSuf|StaticRounding|SAE|IntelSyntax, { Reg32|Reg64|Unspecified|BaseIndex, RegXMM, RegXMM }
 
-vcvtsh2sd, 0xf35a, AVX512_FP16, Modrm|EVexLIG|Masking|EVexMap5|VexVVVV|VexW0|Disp8MemShift=1|NoSuf|SAE, { RegXMM|Word|Unspecified|BaseIndex, RegXMM, RegXMM }
-vcvtsh2ss, 0x13, AVX512_FP16, Modrm|EVexLIG|Masking|EVexMap6|VexVVVV|VexW0|Disp8MemShift=1|NoSuf|SAE, { RegXMM|Word|Unspecified|BaseIndex, RegXMM, RegXMM }
+vcvtsh2sd, 0xf35a, AVX512_FP16, Modrm|EVexLIG|Masking|EVexMap5|Src1VVVV|VexW0|Disp8MemShift=1|NoSuf|SAE, { RegXMM|Word|Unspecified|BaseIndex, RegXMM, RegXMM }
+vcvtsh2ss, 0x13, AVX512_FP16, Modrm|EVexLIG|Masking|EVexMap6|Src1VVVV|VexW0|Disp8MemShift=1|NoSuf|SAE, { RegXMM|Word|Unspecified|BaseIndex, RegXMM, RegXMM }
 
 vcvtsh2si, 0xf32d, AVX512_FP16, Modrm|EVexLIG|EVexMap5|Disp8MemShift=1|NoSuf|StaticRounding|SAE, { RegXMM|Word|Unspecified|BaseIndex, Reg32|Reg64 }
 
@@ -3353,11 +3355,11 @@ vmovw, 0x667e, AVX512_FP16, D|RegMem|EVex128|VexWIG|EVexMap5|NoSuf, { RegXMM, Re
 
 vrcpph, 0x664c, AVX512_FP16, Modrm|Masking|EVexMap6|VexW0|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Word|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
 
-vrcpsh, 0x664d, AVX512_FP16, Modrm|EVexLIG|Masking|EVexMap6|VexVVVV|VexW0|Disp8MemShift=1|NoSuf, { RegXMM|Word|Unspecified|BaseIndex, RegXMM, RegXMM }
+vrcpsh, 0x664d, AVX512_FP16, Modrm|EVexLIG|Masking|EVexMap6|Src1VVVV|VexW0|Disp8MemShift=1|NoSuf, { RegXMM|Word|Unspecified|BaseIndex, RegXMM, RegXMM }
 
 vrsqrtph, 0x664e, AVX512_FP16, Modrm|Masking|EVexMap6|VexW0|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Word|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
 
-vrsqrtsh, 0x664f, AVX512_FP16, Modrm|EVexLIG|Masking|EVexMap6|VexVVVV|VexW0|Disp8MemShift=1|NoSuf, { RegXMM|Word|Unspecified|BaseIndex, RegXMM, RegXMM }
+vrsqrtsh, 0x664f, AVX512_FP16, Modrm|EVexLIG|Masking|EVexMap6|Src1VVVV|VexW0|Disp8MemShift=1|NoSuf, { RegXMM|Word|Unspecified|BaseIndex, RegXMM, RegXMM }
 
 // FP16 (HFNI) instructions end.
 
@@ -3370,7 +3372,7 @@ prefetchit1, 0xf18/6, PREFETCHI, Modrm|Anysize|IgnoreSize|NoSuf, { BaseIndex }
 
 // CMPCCXADD instructions.
 
-cmp<cc>xadd, 0x66e<cc:opc>, APX_F(CMPCCXADD), Modrm|Vex|EVex128|Space0F38|VexVVVV|SwapSources|CheckOperandSize|NoSuf, { Reg32|Reg64, Reg32|Reg64, Dword|Qword|Unspecified|BaseIndex }
+cmp<cc>xadd, 0x66e<cc:opc>, APX_F(CMPCCXADD), Modrm|Vex|EVex128|Space0F38|Src1VVVV|SwapSources|CheckOperandSize|NoSuf, { Reg32|Reg64, Reg32|Reg64, Dword|Qword|Unspecified|BaseIndex }
 
 // CMPCCXADD instructions end.
 
@@ -3425,9 +3427,9 @@ uwrmsr, 0xf3f8/0, APX_F(USER_MSR), Modrm|Vex128|VexMap7|EVex128|VexW0|NoSuf, { I
 
 // APX Push2/Pop2 instructions.
 
-push2, 0xff/6, APX_F, Modrm|VexW0|EVexMap4|VexVVVV|ImplicitStackOp|No_bSuf|No_wSuf|No_lSuf|No_sSuf, { Reg64, Reg64 }
-push2p, 0xff/6, APX_F, Modrm|VexW1|EVexMap4|VexVVVV|ImplicitStackOp|No_bSuf|No_wSuf|No_lSuf|No_sSuf, { Reg64, Reg64 }
-pop2, 0x8f/0, APX_F, Modrm|VexW0|EVexMap4|VexVVVV|ImplicitStackOp|No_bSuf|No_wSuf|No_lSuf|No_sSuf, { Reg64, Reg64 }
-pop2p, 0x8f/0, APX_F, Modrm|VexW1|EVexMap4|VexVVVV|ImplicitStackOp|No_bSuf|No_wSuf|No_lSuf|No_sSuf, { Reg64, Reg64 }
+push2, 0xff/6, APX_F, Modrm|VexW0|EVexMap4|Src1VVVV|ImplicitStackOp|No_bSuf|No_wSuf|No_lSuf|No_sSuf, { Reg64, Reg64 }
+push2p, 0xff/6, APX_F, Modrm|VexW1|EVexMap4|Src1VVVV|ImplicitStackOp|No_bSuf|No_wSuf|No_lSuf|No_sSuf, { Reg64, Reg64 }
+pop2, 0x8f/0, APX_F, Modrm|VexW0|EVexMap4|Src1VVVV|ImplicitStackOp|No_bSuf|No_wSuf|No_lSuf|No_sSuf, { Reg64, Reg64 }
+pop2p, 0x8f/0, APX_F, Modrm|VexW1|EVexMap4|Src1VVVV|ImplicitStackOp|No_bSuf|No_wSuf|No_lSuf|No_sSuf, { Reg64, Reg64 }
 
 // APX Push2/Pop2 instructions end.
-- 
2.34.1


^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH 2/3] x86: Drop SwapSources
  2024-04-24  7:23 [PATCH 0/3] x86: Optimize the encoder of the vvvv register Cui, Lili
  2024-04-24  7:23 ` [PATCH 1/3] x86: Use vexvvvv to encode " Cui, Lili
@ 2024-04-24  7:23 ` Cui, Lili
  2024-04-24  8:07   ` Jan Beulich
  2024-04-24  7:23 ` [PATCH 3/3] x86: Drop using extension_opcode to encode vvvv register Cui, Lili
  2 siblings, 1 reply; 24+ messages in thread
From: Cui, Lili @ 2024-04-24  7:23 UTC (permalink / raw)
  To: binutils; +Cc: hjl.tools, jbeulich

gas/ChangeLog:

        * config/tc-i386.c (build_modrm_byte): Dropped the use of
	SWAP_SOURCES to encode the vvvv register.

opcodes/ChangeLog:

        * i386-opc.h (SWAP_SOURCES): Dropped.
        (NO_DEFAULT_MASK): Adjusted the value.
        (ADDR_PREFIX_OP_REG): Ditto.
        (DISTINCT_DEST): Ditto.
        (IMPLICIT_STACK_OP): Ditto.
        (VexVVVV_SRC2): New.
        * i386-opc.tbl: Dropped SwapSources and replaced its Src2VVVV
	with Src1VVVV.
---
 gas/config/tc-i386.c | 16 ++++++------
 opcodes/i386-opc.h   | 12 ++++-----
 opcodes/i386-opc.tbl | 60 ++++++++++++++++++++++----------------------
 3 files changed, 44 insertions(+), 44 deletions(-)

diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c
index 640a83c8b2d..de1c08404cd 100644
--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -10434,6 +10434,14 @@ build_modrm_byte (void)
 
   switch (i.tm.opcode_modifier.vexvvvv)
     {
+    case VexVVVV_SRC2:
+      if (source != op)
+	{
+	  v = source++;
+	  break;
+	}
+      /* For XOP: vpshl* and vpsha*.  */
+      /* Fall through.  */
     case VexVVVV_SRC1:
       v =  dest - 1;
       break;
@@ -10452,14 +10460,6 @@ build_modrm_byte (void)
       dest = ~0;
     }
   gas_assert (source < dest);
-  if (i.tm.opcode_modifier.operandconstraint == SWAP_SOURCES
-      && source != op)
-    {
-      unsigned int tmp = source;
-
-      source = v;
-      v = tmp;
-    }
 
   if (v < MAX_OPERANDS)
     i.vex.register_specifier = i.op[v].regs;
diff --git a/opcodes/i386-opc.h b/opcodes/i386-opc.h
index 4ae87a3dbd5..fa482ca3d37 100644
--- a/opcodes/i386-opc.h
+++ b/opcodes/i386-opc.h
@@ -570,17 +570,15 @@ enum
      It implicitly denotes the register group of {x,y,z}mmN - {x,y,z}mm(N + 3).
    */
 #define IMPLICIT_QUAD_GROUP 5
-  /* Two source operands are swapped.  */
-#define SWAP_SOURCES 6
   /* Default mask isn't allowed.  */
-#define NO_DEFAULT_MASK 7
+#define NO_DEFAULT_MASK 6
   /* Address prefix changes register operand */
-#define ADDR_PREFIX_OP_REG 8
+#define ADDR_PREFIX_OP_REG 7
   /* Instrucion requires that destination must be distinct from source
      registers.  */
-#define DISTINCT_DEST 9
+#define DISTINCT_DEST 8
   /* Instruction updates stack pointer implicitly.  */
-#define IMPLICIT_STACK_OP 10
+#define IMPLICIT_STACK_OP 9
   OperandConstraint,
   /* instruction ignores operand size prefix and in Intel mode ignores
      mnemonic size suffix check.  */
@@ -640,9 +638,11 @@ enum
   Vex,
   /* How to encode VEX.vvvv:
      1: VEX.vvvv encodes the src1 register operand.
+     2: VEX.vvvv encodes the src2 register operand.
      3: VEX.vvvv encodes the dest register operand.
    */
 #define VexVVVV_SRC1   1
+#define VexVVVV_SRC2   2
 #define VexVVVV_DST    3
 
   VexVVVV,
diff --git a/opcodes/i386-opc.tbl b/opcodes/i386-opc.tbl
index 31c769857dc..143ac4e5597 100644
--- a/opcodes/i386-opc.tbl
+++ b/opcodes/i386-opc.tbl
@@ -83,7 +83,6 @@
 #define ImplicitQuadGroup OperandConstraint=IMPLICIT_QUAD_GROUP
 #define NoDefMask         OperandConstraint=NO_DEFAULT_MASK
 #define RegKludge         OperandConstraint=REG_KLUDGE
-#define SwapSources       OperandConstraint=SWAP_SOURCES
 #define Ugh               OperandConstraint=UGH
 #define ImplicitStackOp   OperandConstraint=IMPLICIT_STACK_OP
 
@@ -142,6 +141,7 @@
 #define Disp8ShiftVL Disp8MemShift=DISP8_SHIFT_VL
 
 #define Src1VVVV VexVVVV=VexVVVV_SRC1
+#define Src2VVVV VexVVVV=VexVVVV_SRC2
 #define DstVVVV  VexVVVV=VexVVVV_DST
 
 
@@ -1798,18 +1798,18 @@ vpsravd, 0x6646, AVX2|AVX512F, Modrm|Vex|EVexDYN|Masking|Space0F38|Src1VVVV|VexW
 vpsrlv<dq>, 0x6645, AVX2|AVX512F, Modrm|Vex|EVexDYN|Masking|Space0F38|Src1VVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
 
 // AVX gather instructions
-vgatherdpd, 0x6692, AVX2, Modrm|Vex|Space0F38|Src1VVVV|VexW1|SwapSources|CheckOperandSize|NoSuf|VecSIB128, { RegXMM|RegYMM, Qword|Unspecified|BaseIndex, RegXMM|RegYMM }
-vgatherdps, 0x6692, AVX2, Modrm|Vex128|Space0F38|Src1VVVV|VexW0|SwapSources|NoSuf|VecSIB128, { RegXMM, Dword|Unspecified|BaseIndex, RegXMM }
-vgatherdps, 0x6692, AVX2, Modrm|Vex256|Space0F38|Src1VVVV|VexW0|SwapSources|NoSuf|VecSIB256, { RegYMM, Dword|Unspecified|BaseIndex, RegYMM }
-vgatherqp<sd>, 0x6693, AVX2, Modrm|Vex|Space0F38|Src1VVVV|<sd:vexw>|SwapSources|NoSuf|VecSIB128, { RegXMM, <sd:elem>|Unspecified|BaseIndex, RegXMM }
-vgatherqpd, 0x6693, AVX2, Modrm|Vex256|Space0F38|Src1VVVV|VexW1|SwapSources|NoSuf|VecSIB256, { RegYMM, Qword|Unspecified|BaseIndex, RegYMM }
-vgatherqps, 0x6693, AVX2, Modrm|Vex256|Space0F38|Src1VVVV|VexW0|SwapSources|NoSuf|VecSIB256, { RegXMM, Dword|Unspecified|BaseIndex, RegXMM }
-vpgatherdd, 0x6690, AVX2, Modrm|Vex128|Space0F38|Src1VVVV|VexW0|SwapSources|NoSuf|VecSIB128, { RegXMM, Dword|Unspecified|BaseIndex, RegXMM }
-vpgatherdd, 0x6690, AVX2, Modrm|Vex256|Space0F38|Src1VVVV|VexW0|SwapSources|NoSuf|VecSIB256, { RegYMM, Dword|Unspecified|BaseIndex, RegYMM }
-vpgatherdq, 0x6690, AVX2, Modrm|Vex|Space0F38|Src1VVVV|VexW1|SwapSources|CheckOperandSize|NoSuf|VecSIB128, { RegXMM|RegYMM, Qword|Unspecified|BaseIndex, RegXMM|RegYMM }
-vpgatherq<dq>, 0x6691, AVX2, Modrm|Vex128|Space0F38|Src1VVVV|<dq:vexw>|SwapSources|NoSuf|VecSIB128, { RegXMM, <dq:elem>|Unspecified|BaseIndex, RegXMM }
-vpgatherqd, 0x6691, AVX2, Modrm|Vex256|Space0F38|Src1VVVV|VexW0|SwapSources|NoSuf|VecSIB256, { RegXMM, Dword|Unspecified|BaseIndex, RegXMM }
-vpgatherqq, 0x6691, AVX2, Modrm|Vex256|Space0F38|Src1VVVV|VexW1|SwapSources|NoSuf|VecSIB256, { RegYMM, Qword|Unspecified|BaseIndex, RegYMM }
+vgatherdpd, 0x6692, AVX2, Modrm|Vex|Space0F38|Src2VVVV|VexW1|CheckOperandSize|NoSuf|VecSIB128, { RegXMM|RegYMM, Qword|Unspecified|BaseIndex, RegXMM|RegYMM }
+vgatherdps, 0x6692, AVX2, Modrm|Vex128|Space0F38|Src2VVVV|VexW0|NoSuf|VecSIB128, { RegXMM, Dword|Unspecified|BaseIndex, RegXMM }
+vgatherdps, 0x6692, AVX2, Modrm|Vex256|Space0F38|Src2VVVV|VexW0|NoSuf|VecSIB256, { RegYMM, Dword|Unspecified|BaseIndex, RegYMM }
+vgatherqp<sd>, 0x6693, AVX2, Modrm|Vex|Space0F38|Src2VVVV|<sd:vexw>|NoSuf|VecSIB128, { RegXMM, <sd:elem>|Unspecified|BaseIndex, RegXMM }
+vgatherqpd, 0x6693, AVX2, Modrm|Vex256|Space0F38|Src2VVVV|VexW1|NoSuf|VecSIB256, { RegYMM, Qword|Unspecified|BaseIndex, RegYMM }
+vgatherqps, 0x6693, AVX2, Modrm|Vex256|Space0F38|Src2VVVV|VexW0|NoSuf|VecSIB256, { RegXMM, Dword|Unspecified|BaseIndex, RegXMM }
+vpgatherdd, 0x6690, AVX2, Modrm|Vex128|Space0F38|Src2VVVV|VexW0|NoSuf|VecSIB128, { RegXMM, Dword|Unspecified|BaseIndex, RegXMM }
+vpgatherdd, 0x6690, AVX2, Modrm|Vex256|Space0F38|Src2VVVV|VexW0|NoSuf|VecSIB256, { RegYMM, Dword|Unspecified|BaseIndex, RegYMM }
+vpgatherdq, 0x6690, AVX2, Modrm|Vex|Space0F38|Src2VVVV|VexW1|CheckOperandSize|NoSuf|VecSIB128, { RegXMM|RegYMM, Qword|Unspecified|BaseIndex, RegXMM|RegYMM }
+vpgatherq<dq>, 0x6691, AVX2, Modrm|Vex128|Space0F38|Src2VVVV|<dq:vexw>|NoSuf|VecSIB128, { RegXMM, <dq:elem>|Unspecified|BaseIndex, RegXMM }
+vpgatherqd, 0x6691, AVX2, Modrm|Vex256|Space0F38|Src2VVVV|VexW0|NoSuf|VecSIB256, { RegXMM, Dword|Unspecified|BaseIndex, RegXMM }
+vpgatherqq, 0x6691, AVX2, Modrm|Vex256|Space0F38|Src2VVVV|VexW1|NoSuf|VecSIB256, { RegYMM, Qword|Unspecified|BaseIndex, RegYMM }
 
 // AES + AVX
 
@@ -1879,14 +1879,14 @@ xtest, 0xf01d6, HLE|RTM, NoSuf, {}
 
 // BMI2 instructions.
 
-bzhi, 0xf5, APX_F(BMI2), Modrm|CheckOperandSize|Vex128|EVex128|Space0F38|Src1VVVV|SwapSources|No_bSuf|No_wSuf|No_sSuf|NF, { Reg32|Reg64, Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
+bzhi, 0xf5, APX_F(BMI2), Modrm|CheckOperandSize|Vex128|EVex128|Space0F38|Src2VVVV|No_bSuf|No_wSuf|No_sSuf|NF, { Reg32|Reg64, Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
 mulx, 0xf2f6, APX_F(BMI2), Modrm|CheckOperandSize|Vex128|EVex128|Space0F38|Src1VVVV|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64, Reg32|Reg64 }
 pdep, 0xf2f5, APX_F(BMI2), Modrm|CheckOperandSize|Vex128|EVex128|Space0F38|Src1VVVV|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64, Reg32|Reg64 }
 pext, 0xf3f5, APX_F(BMI2), Modrm|CheckOperandSize|Vex128|EVex128|Space0F38|Src1VVVV|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64, Reg32|Reg64 }
 rorx, 0xf2f0, APX_F(BMI2), Modrm|CheckOperandSize|Vex128|EVex128|Space0F3A|No_bSuf|No_wSuf|No_sSuf, { Imm8|Imm8S, Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
-sarx, 0xf3f7, APX_F(BMI2), Modrm|CheckOperandSize|Vex128|EVex128|Space0F38|Src1VVVV|SwapSources|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64, Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
-shlx, 0x66f7, APX_F(BMI2), Modrm|CheckOperandSize|Vex128|EVex128|Space0F38|Src1VVVV|SwapSources|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64, Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
-shrx, 0xf2f7, APX_F(BMI2), Modrm|CheckOperandSize|Vex128|EVex128|Space0F38|Src1VVVV|SwapSources|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64, Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
+sarx, 0xf3f7, APX_F(BMI2), Modrm|CheckOperandSize|Vex128|EVex128|Space0F38|Src2VVVV|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64, Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
+shlx, 0x66f7, APX_F(BMI2), Modrm|CheckOperandSize|Vex128|EVex128|Space0F38|Src2VVVV|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64, Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
+shrx, 0xf2f7, APX_F(BMI2), Modrm|CheckOperandSize|Vex128|EVex128|Space0F38|Src2VVVV|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64, Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
 
 // FMA4 instructions
 
@@ -1938,10 +1938,10 @@ vpmacsww, 0x95, XOP, Modrm|Vex128|SpaceXOP08|Src1VVVV|VexW0|NoSuf, { RegXMM, Reg
 vpmadcsswd, 0xa6, XOP, Modrm|Vex128|SpaceXOP08|Src1VVVV|VexW0|NoSuf, { RegXMM, RegXMM|Unspecified|BaseIndex, RegXMM, RegXMM }
 vpmadcswd, 0xb6, XOP, Modrm|Vex128|SpaceXOP08|Src1VVVV|VexW0|NoSuf, { RegXMM, RegXMM|Unspecified|BaseIndex, RegXMM, RegXMM }
 vpperm, 0xa3, XOP, D|Modrm|Vex128|SpaceXOP08|Src1VVVV|VexW0|NoSuf, { RegXMM, RegXMM|Unspecified|BaseIndex, RegXMM, RegXMM }
-vprot<xop>, 0x90 | <xop:opc>, XOP, D|Modrm|Vex128|SpaceXOP09|Src1VVVV|SwapSources|VexW0|NoSuf, { RegXMM, RegXMM|Unspecified|BaseIndex, RegXMM }
+vprot<xop>, 0x90 | <xop:opc>, XOP, D|Modrm|Vex128|SpaceXOP09|Src2VVVV|VexW0|NoSuf, { RegXMM, RegXMM|Unspecified|BaseIndex, RegXMM }
 vprot<xop>, 0xc0 | <xop:opc>, XOP, Modrm|Vex128|SpaceXOP08|VexW0|NoSuf, { Imm8|Imm8S, RegXMM|Unspecified|BaseIndex, RegXMM }
-vpsha<xop>, 0x98 | <xop:opc>, XOP, D|Modrm|Vex128|SpaceXOP09|Src1VVVV|SwapSources|VexW0|NoSuf, { RegXMM, RegXMM|Unspecified|BaseIndex, RegXMM }
-vpshl<xop>, 0x94 | <xop:opc>, XOP, D|Modrm|Vex128|SpaceXOP09|Src1VVVV|SwapSources|VexW0|NoSuf, { RegXMM, RegXMM|Unspecified|BaseIndex, RegXMM }
+vpsha<xop>, 0x98 | <xop:opc>, XOP, D|Modrm|Vex128|SpaceXOP09|Src2VVVV|VexW0|NoSuf, { RegXMM, RegXMM|Unspecified|BaseIndex, RegXMM }
+vpshl<xop>, 0x94 | <xop:opc>, XOP, D|Modrm|Vex128|SpaceXOP09|Src2VVVV|VexW0|NoSuf, { RegXMM, RegXMM|Unspecified|BaseIndex, RegXMM }
 
 <xop>
 <irel>
@@ -1957,7 +1957,7 @@ lwpins, 0x12/0, LWP, Modrm|SpaceXOP0A|NoSuf|Src1VVVV|Vex, { Imm32|Imm32S, Reg32|
 // BMI instructions
 
 andn, 0xf2, APX_F(BMI), Modrm|CheckOperandSize|Vex128|EVex128|Space0F38|Src1VVVV|No_bSuf|No_wSuf|No_sSuf|NF, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64, Reg32|Reg64 }
-bextr, 0xf7, APX_F(BMI), Modrm|CheckOperandSize|Vex128|EVex128|Space0F38|Src1VVVV|SwapSources|No_bSuf|No_wSuf|No_sSuf|NF, { Reg32|Reg64, Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
+bextr, 0xf7, APX_F(BMI), Modrm|CheckOperandSize|Vex128|EVex128|Space0F38|Src2VVVV|No_bSuf|No_wSuf|No_sSuf|NF, { Reg32|Reg64, Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
 blsi, 0xf3/3, APX_F(BMI), Modrm|CheckOperandSize|Vex128|EVex128|Space0F38|Src1VVVV|No_bSuf|No_wSuf|No_sSuf|NF, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
 blsmsk, 0xf3/2, APX_F(BMI), Modrm|CheckOperandSize|Vex128|EVex128|Space0F38|Src1VVVV|No_bSuf|No_wSuf|No_sSuf|NF, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
 blsr, 0xf3/1, APX_F(BMI), Modrm|CheckOperandSize|Vex128|EVex128|Space0F38|Src1VVVV|No_bSuf|No_wSuf|No_sSuf|NF, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
@@ -3192,15 +3192,15 @@ xresldtrk, 0xf20f01e9, TSXLDTRK, NoSuf, {}
 ldtilecfg, 0x49/0, APX_F(AMX_TILE), Modrm|Vex128|EVex128|Space0F38|VexW0|NoSuf, { Unspecified|BaseIndex }
 sttilecfg, 0x6649/0, APX_F(AMX_TILE), Modrm|Vex128|EVex128|Space0F38|VexW0|NoSuf, { Unspecified|BaseIndex }
 
-tcmmimfp16ps, 0x666c, AMX_COMPLEX, Modrm|Vex128|Space0F38|Src1VVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM }
-tcmmrlfp16ps, 0x6c, AMX_COMPLEX, Modrm|Vex128|Space0F38|Src1VVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM }
+tcmmimfp16ps, 0x666c, AMX_COMPLEX, Modrm|Vex128|Space0F38|Src2VVVV|VexW0|NoSuf, { RegTMM, RegTMM, RegTMM }
+tcmmrlfp16ps, 0x6c, AMX_COMPLEX, Modrm|Vex128|Space0F38|Src2VVVV|VexW0|NoSuf, { RegTMM, RegTMM, RegTMM }
 
-tdpbf16ps, 0xf35c, AMX_BF16, Modrm|Vex128|Space0F38|Src1VVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM }
-tdpfp16ps, 0xf25c, AMX_FP16, Modrm|Vex128|Space0F38|Src1VVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM }
-tdpbssd, 0xf25e, AMX_INT8, Modrm|Vex128|Space0F38|Src1VVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM }
-tdpbuud, 0x5e, AMX_INT8, Modrm|Vex128|Space0F38|Src1VVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM }
-tdpbusd, 0x665e, AMX_INT8, Modrm|Vex128|Space0F38|Src1VVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM }
-tdpbsud, 0xf35e, AMX_INT8, Modrm|Vex128|Space0F38|Src1VVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM }
+tdpbf16ps, 0xf35c, AMX_BF16, Modrm|Vex128|Space0F38|Src2VVVV|VexW0|NoSuf, { RegTMM, RegTMM, RegTMM }
+tdpfp16ps, 0xf25c, AMX_FP16, Modrm|Vex128|Space0F38|Src2VVVV|VexW0|NoSuf, { RegTMM, RegTMM, RegTMM }
+tdpbssd, 0xf25e, AMX_INT8, Modrm|Vex128|Space0F38|Src2VVVV|VexW0|NoSuf, { RegTMM, RegTMM, RegTMM }
+tdpbuud, 0x5e, AMX_INT8, Modrm|Vex128|Space0F38|Src2VVVV|VexW0|NoSuf, { RegTMM, RegTMM, RegTMM }
+tdpbusd, 0x665e, AMX_INT8, Modrm|Vex128|Space0F38|Src2VVVV|VexW0|NoSuf, { RegTMM, RegTMM, RegTMM }
+tdpbsud, 0xf35e, AMX_INT8, Modrm|Vex128|Space0F38|Src2VVVV|VexW0|NoSuf, { RegTMM, RegTMM, RegTMM }
 
 tileloadd, 0xf24b, APX_F(AMX_TILE), Sibmem|Vex128|EVex128|Space0F38|VexW0|NoSuf, { Unspecified|BaseIndex, RegTMM }
 tileloaddt1, 0x664b, APX_F(AMX_TILE), Sibmem|Vex128|EVex128|Space0F38|VexW0|NoSuf, { Unspecified|BaseIndex, RegTMM }
@@ -3372,7 +3372,7 @@ prefetchit1, 0xf18/6, PREFETCHI, Modrm|Anysize|IgnoreSize|NoSuf, { BaseIndex }
 
 // CMPCCXADD instructions.
 
-cmp<cc>xadd, 0x66e<cc:opc>, APX_F(CMPCCXADD), Modrm|Vex|EVex128|Space0F38|Src1VVVV|SwapSources|CheckOperandSize|NoSuf, { Reg32|Reg64, Reg32|Reg64, Dword|Qword|Unspecified|BaseIndex }
+cmp<cc>xadd, 0x66e<cc:opc>, APX_F(CMPCCXADD), Modrm|Vex|EVex128|Space0F38|Src2VVVV|CheckOperandSize|NoSuf, { Reg32|Reg64, Reg32|Reg64, Dword|Qword|Unspecified|BaseIndex }
 
 // CMPCCXADD instructions end.
 
-- 
2.34.1


^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH 3/3] x86: Drop using extension_opcode to encode vvvv register
  2024-04-24  7:23 [PATCH 0/3] x86: Optimize the encoder of the vvvv register Cui, Lili
  2024-04-24  7:23 ` [PATCH 1/3] x86: Use vexvvvv to encode " Cui, Lili
  2024-04-24  7:23 ` [PATCH 2/3] x86: Drop SwapSources Cui, Lili
@ 2024-04-24  7:23 ` Cui, Lili
  2024-04-24  8:19   ` Jan Beulich
  2 siblings, 1 reply; 24+ messages in thread
From: Cui, Lili @ 2024-04-24  7:23 UTC (permalink / raw)
  To: binutils; +Cc: hjl.tools, jbeulich

gas/ChangeLog:

        * config/tc-i386.c (build_modrm_byte): Dropped the use of
        extension_opcode to encode the vvvv register.
        * testsuite/gas/i386/x86-64-sse2avx.d: Added new testcases.
        * testsuite/gas/i386/x86-64-sse2avx.s: Diito.

opcodes/ChangeLog:

        * i386-opc.tbl: Added DstVVVV to some extension_opcode instructions.
---
 gas/config/tc-i386.c                    |  9 +--
 gas/testsuite/gas/i386/x86-64-sse2avx.d |  7 ++
 gas/testsuite/gas/i386/x86-64-sse2avx.s |  7 ++
 opcodes/i386-opc.tbl                    | 92 ++++++++++++-------------
 4 files changed, 63 insertions(+), 52 deletions(-)

diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c
index de1c08404cd..64a0c27364d 100644
--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -10453,12 +10453,9 @@ build_modrm_byte (void)
       break;
      }
 
-  if (i.tm.extension_opcode != None)
-    {
-      if (dest != source)
-	v = dest;
-      dest = ~0;
-    }
+  if (dest == source)
+    dest = ~0;
+
   gas_assert (source < dest);
 
   if (v < MAX_OPERANDS)
diff --git a/gas/testsuite/gas/i386/x86-64-sse2avx.d b/gas/testsuite/gas/i386/x86-64-sse2avx.d
index dbbba4fc8d0..df377f13795 100644
--- a/gas/testsuite/gas/i386/x86-64-sse2avx.d
+++ b/gas/testsuite/gas/i386/x86-64-sse2avx.d
@@ -728,6 +728,13 @@ Disassembly of section .text:
 [ 	]*[a-f0-9]+:	c5 72 11 c1          	vmovss %xmm8,%xmm1,%xmm1
 [ 	]*[a-f0-9]+:	c4 c1 32 11 c1       	vmovss %xmm0,%xmm9,%xmm9
 [ 	]*[a-f0-9]+:	c4 c1 39 71 f0 00    	vpsllw \$(0x)?0,%xmm8,%xmm8
+[ 	]*[a-f0-9]+:	c4 c1 39 72 f0 00    	vpslld \$(0x)?0,%xmm8,%xmm8
+[ 	]*[a-f0-9]+:	c4 c1 39 73 f0 00    	vpsllq \$(0x)?0,%xmm8,%xmm8
+[ 	]*[a-f0-9]+:	c4 c1 39 71 e0 00    	vpsraw \$(0x)?0,%xmm8,%xmm8
+[ 	]*[a-f0-9]+:	c4 c1 39 72 e0 00    	vpsrad \$(0x)?0,%xmm8,%xmm8
+[ 	]*[a-f0-9]+:	c4 c1 39 71 d0 00    	vpsrlw \$(0x)?0,%xmm8,%xmm8
+[ 	]*[a-f0-9]+:	c4 c1 39 72 d0 00    	vpsrld \$(0x)?0,%xmm8,%xmm8
+[ 	]*[a-f0-9]+:	c4 c1 39 73 d0 00    	vpsrlq \$(0x)?0,%xmm8,%xmm8
 [ 	]*[a-f0-9]+:	c5 79 c5 c8 00       	vpextrw \$(0x)?0,%xmm0,%r9d
 [ 	]*[a-f0-9]+:	c4 c1 79 c5 c8 00    	vpextrw \$(0x)?0,%xmm8,%ecx
 [ 	]*[a-f0-9]+:	c4 63 79 14 c1 00    	vpextrb \$(0x)?0,%xmm8,%ecx
diff --git a/gas/testsuite/gas/i386/x86-64-sse2avx.s b/gas/testsuite/gas/i386/x86-64-sse2avx.s
index 8b37a60dacb..2966f5f2919 100644
--- a/gas/testsuite/gas/i386/x86-64-sse2avx.s
+++ b/gas/testsuite/gas/i386/x86-64-sse2avx.s
@@ -822,6 +822,13 @@ _start:
 	{store} rexz movss %xmm0, %xmm1
 
 	rexz psllw $0, %xmm0
+	rexz pslld $0, %xmm0
+	rexz psllq $0, %xmm0
+	rexz psraw $0, %xmm0
+	rexz psrad $0, %xmm0
+	rexz psrlw $0, %xmm0
+	rexz psrld $0, %xmm0
+	rexz psrlq $0, %xmm0
 
 	rexx pextrw $0, %xmm0, %ecx
 	rexz pextrw $0, %xmm0, %ecx
diff --git a/opcodes/i386-opc.tbl b/opcodes/i386-opc.tbl
index 143ac4e5597..953dc2846dc 100644
--- a/opcodes/i386-opc.tbl
+++ b/opcodes/i386-opc.tbl
@@ -1002,10 +1002,10 @@ pause, 0xf390, i186, NoSuf, {}
 
 // MMX/SSE2 instructions.
 
-<mmx:cpu:pfx:attr:reg:mem, +
-    $avx:AVX:66:Vex128|Src1VVVV|VexW0|SSE2AVX:RegXMM:Xmmword, +
-    $sse:SSE2:66::RegXMM:Xmmword, +
-    $mmx:MMX:::RegMMX:Qword>
+<mmx:cpu:pfx:attr:vvvv:reg:mem, +
+    $avx:AVX:66:Vex128|Src1VVVV|VexW0|SSE2AVX:Vex128|DstVVVV|VexW0|SSE2AVX:RegXMM:Xmmword, +
+    $sse:SSE2:66:::RegXMM:Xmmword, +
+    $mmx:MMX::::RegMMX:Qword>
 
 <sse2:cpu:attr:scal:vvvv, +
     $avx:AVX:Vex128|VexW0|SSE2AVX:VexLIG|VexW0|SSE2AVX:Src1VVVV, +
@@ -1060,17 +1060,17 @@ pmulhw<mmx>, 0x<mmx:pfx>0fe5, <mmx:cpu>, Modrm|<mmx:attr>|C|NoSuf, { <mmx:reg>|<
 pmullw<mmx>, 0x<mmx:pfx>0fd5, <mmx:cpu>, Modrm|<mmx:attr>|C|NoSuf, { <mmx:reg>|<mmx:mem>|Unspecified|BaseIndex, <mmx:reg> }
 por<mmx>, 0x<mmx:pfx>0feb, <mmx:cpu>, Modrm|<mmx:attr>|C|NoSuf, { <mmx:reg>|<mmx:mem>|Unspecified|BaseIndex, <mmx:reg> }
 psllw<mmx>, 0x<mmx:pfx>0ff1, <mmx:cpu>, Modrm|<mmx:attr>|NoSuf, { <mmx:reg>|<mmx:mem>|Unspecified|BaseIndex, <mmx:reg> }
-psllw<mmx>, 0x<mmx:pfx>0f71/6, <mmx:cpu>, Modrm|<mmx:attr>|NoSuf, { Imm8, <mmx:reg> }
+psllw<mmx>, 0x<mmx:pfx>0f71/6, <mmx:cpu>, Modrm|<mmx:vvvv>|NoSuf, { Imm8, <mmx:reg> }
 psll<dq><mmx>, 0x<mmx:pfx>0ff2 | <dq:opc>, <mmx:cpu>, Modrm|<mmx:attr>|NoSuf, { <mmx:reg>|<mmx:mem>|Unspecified|BaseIndex, <mmx:reg> }
-psll<dq><mmx>, 0x<mmx:pfx>0f72 | <dq:opc>/6, <mmx:cpu>, Modrm|<mmx:attr>|NoSuf, { Imm8, <mmx:reg> }
+psll<dq><mmx>, 0x<mmx:pfx>0f72 | <dq:opc>/6, <mmx:cpu>, Modrm|<mmx:vvvv>|NoSuf, { Imm8, <mmx:reg> }
 psraw<mmx>, 0x<mmx:pfx>0fe1, <mmx:cpu>, Modrm|<mmx:attr>|NoSuf, { <mmx:reg>|<mmx:mem>|Unspecified|BaseIndex, <mmx:reg> }
-psraw<mmx>, 0x<mmx:pfx>0f71/4, <mmx:cpu>, Modrm|<mmx:attr>|NoSuf, { Imm8, <mmx:reg> }
+psraw<mmx>, 0x<mmx:pfx>0f71/4, <mmx:cpu>, Modrm|<mmx:vvvv>|NoSuf, { Imm8, <mmx:reg> }
 psrad<mmx>, 0x<mmx:pfx>0fe2, <mmx:cpu>, Modrm|<mmx:attr>|NoSuf, { <mmx:reg>|<mmx:mem>|Unspecified|BaseIndex, <mmx:reg> }
-psrad<mmx>, 0x<mmx:pfx>0f72/4, <mmx:cpu>, Modrm|<mmx:attr>|NoSuf, { Imm8, <mmx:reg> }
+psrad<mmx>, 0x<mmx:pfx>0f72/4, <mmx:cpu>, Modrm|<mmx:vvvv>|NoSuf, { Imm8, <mmx:reg> }
 psrlw<mmx>, 0x<mmx:pfx>0fd1, <mmx:cpu>, Modrm|<mmx:attr>|NoSuf, { <mmx:reg>|<mmx:mem>|Unspecified|BaseIndex, <mmx:reg> }
-psrlw<mmx>, 0x<mmx:pfx>0f71/2, <mmx:cpu>, Modrm|<mmx:attr>|NoSuf, { Imm8, <mmx:reg> }
+psrlw<mmx>, 0x<mmx:pfx>0f71/2, <mmx:cpu>, Modrm|<mmx:vvvv>|NoSuf, { Imm8, <mmx:reg> }
 psrl<dq><mmx>, 0x<mmx:pfx>0fd2 | <dq:opc>, <mmx:cpu>, Modrm|<mmx:attr>|NoSuf, { <mmx:reg>|<mmx:mem>|Unspecified|BaseIndex, <mmx:reg> }
-psrl<dq><mmx>, 0x<mmx:pfx>0f72 | <dq:opc>/2, <mmx:cpu>, Modrm|<mmx:attr>|NoSuf, { Imm8, <mmx:reg> }
+psrl<dq><mmx>, 0x<mmx:pfx>0f72 | <dq:opc>/2, <mmx:cpu>, Modrm|<mmx:vvvv>|NoSuf, { Imm8, <mmx:reg> }
 psub<bw><mmx>, 0x<mmx:pfx>0ff8 | <bw:opc>, <mmx:cpu>, Modrm|<mmx:attr>|NoSuf, { <mmx:reg>|<mmx:mem>|Unspecified|BaseIndex, <mmx:reg> }
 psubd<mmx>, 0x<mmx:pfx>0ffa, <mmx:cpu>, Modrm|<mmx:attr>|NoSuf, { <mmx:reg>|<mmx:mem>|Unspecified|BaseIndex, <mmx:reg> }
 psubq<sse2>, 0x660ffb, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
@@ -1701,19 +1701,19 @@ vpshufhw, 0xf370, AVX|AVX2, Modrm|Vex|Space0F|VexWIG|CheckOperandSize|NoSuf, { I
 vpshuflw, 0xf270, AVX|AVX2, Modrm|Vex|Space0F|VexWIG|CheckOperandSize|NoSuf, { Imm8|Imm8S, Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM }
 vpsign<bw>, 0x6608 | <bw:opc>, AVX|AVX2, Modrm|Vex|Space0F38|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
 vpsignd, 0x660a, AVX|AVX2, Modrm|Vex|Space0F38|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpsll<dq>, 0x6672 | <dq:opc>/6, AVX|AVX2, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM, RegXMM|RegYMM }
+vpsll<dq>, 0x6672 | <dq:opc>/6, AVX|AVX2, Modrm|Vex|Space0F|DstVVVV|VexWIG|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM, RegXMM|RegYMM }
 vpsll<dq>, 0x66f2 | <dq:opc>, AVX|AVX2, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpslldq, 0x6673/7, AVX|AVX2, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM, RegXMM|RegYMM }
-vpsllw, 0x6671/6, AVX|AVX2, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM, RegXMM|RegYMM }
+vpslldq, 0x6673/7, AVX|AVX2, Modrm|Vex|Space0F|DstVVVV|VexWIG|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM, RegXMM|RegYMM }
+vpsllw, 0x6671/6, AVX|AVX2, Modrm|Vex|Space0F|DstVVVV|VexWIG|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM, RegXMM|RegYMM }
 vpsllw, 0x66f1, AVX|AVX2, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpsrad, 0x6672/4, AVX|AVX2, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM, RegXMM|RegYMM }
+vpsrad, 0x6672/4, AVX|AVX2, Modrm|Vex|Space0F|DstVVVV|VexWIG|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM, RegXMM|RegYMM }
 vpsrad, 0x66e2, AVX|AVX2, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpsraw, 0x6671/4, AVX|AVX2, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM, RegXMM|RegYMM }
+vpsraw, 0x6671/4, AVX|AVX2, Modrm|Vex|Space0F|DstVVVV|VexWIG|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM, RegXMM|RegYMM }
 vpsraw, 0x66e1, AVX|AVX2, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpsrl<dq>, 0x6672 | <dq:opc>/2, AVX|AVX2, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM, RegXMM|RegYMM }
+vpsrl<dq>, 0x6672 | <dq:opc>/2, AVX|AVX2, Modrm|Vex|Space0F|DstVVVV|VexWIG|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM, RegXMM|RegYMM }
 vpsrl<dq>, 0x66d2 | <dq:opc>, AVX|AVX2, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpsrldq, 0x6673/3, AVX|AVX2, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM, RegXMM|RegYMM }
-vpsrlw, 0x6671/2, AVX|AVX2, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM, RegXMM|RegYMM }
+vpsrldq, 0x6673/3, AVX|AVX2, Modrm|Vex|Space0F|DstVVVV|VexWIG|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM, RegXMM|RegYMM }
+vpsrlw, 0x6671/2, AVX|AVX2, Modrm|Vex|Space0F|DstVVVV|VexWIG|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM, RegXMM|RegYMM }
 vpsrlw, 0x66d1, AVX|AVX2, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM, RegXMM|RegYMM, RegXMM|RegYMM }
 vpsub<bw>, 0x66f8 | <bw:opc>, AVX|AVX2, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf|Optimize, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
 vpsub<dq>, 0x66fa | <dq:opc>, AVX|AVX2, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf|Optimize, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
@@ -1951,31 +1951,31 @@ vpshl<xop>, 0x94 | <xop:opc>, XOP, D|Modrm|Vex128|SpaceXOP09|Src2VVVV|VexW0|NoSu
 
 llwpcb, 0x12/0, LWP, Modrm|SpaceXOP09|NoSuf|Vex, { Reg32|Reg64 }
 slwpcb, 0x12/1, LWP, Modrm|SpaceXOP09|NoSuf|Vex, { Reg32|Reg64 }
-lwpval, 0x12/1, LWP, Modrm|SpaceXOP0A|NoSuf|Src1VVVV|Vex, { Imm32|Imm32S, Reg32|Unspecified|BaseIndex, Reg32|Reg64 }
-lwpins, 0x12/0, LWP, Modrm|SpaceXOP0A|NoSuf|Src1VVVV|Vex, { Imm32|Imm32S, Reg32|Unspecified|BaseIndex, Reg32|Reg64 }
+lwpval, 0x12/1, LWP, Modrm|SpaceXOP0A|NoSuf|DstVVVV|Vex, { Imm32|Imm32S, Reg32|Unspecified|BaseIndex, Reg32|Reg64 }
+lwpins, 0x12/0, LWP, Modrm|SpaceXOP0A|NoSuf|DstVVVV|Vex, { Imm32|Imm32S, Reg32|Unspecified|BaseIndex, Reg32|Reg64 }
 
 // BMI instructions
 
 andn, 0xf2, APX_F(BMI), Modrm|CheckOperandSize|Vex128|EVex128|Space0F38|Src1VVVV|No_bSuf|No_wSuf|No_sSuf|NF, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64, Reg32|Reg64 }
 bextr, 0xf7, APX_F(BMI), Modrm|CheckOperandSize|Vex128|EVex128|Space0F38|Src2VVVV|No_bSuf|No_wSuf|No_sSuf|NF, { Reg32|Reg64, Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
-blsi, 0xf3/3, APX_F(BMI), Modrm|CheckOperandSize|Vex128|EVex128|Space0F38|Src1VVVV|No_bSuf|No_wSuf|No_sSuf|NF, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
-blsmsk, 0xf3/2, APX_F(BMI), Modrm|CheckOperandSize|Vex128|EVex128|Space0F38|Src1VVVV|No_bSuf|No_wSuf|No_sSuf|NF, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
-blsr, 0xf3/1, APX_F(BMI), Modrm|CheckOperandSize|Vex128|EVex128|Space0F38|Src1VVVV|No_bSuf|No_wSuf|No_sSuf|NF, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
+blsi, 0xf3/3, APX_F(BMI), Modrm|CheckOperandSize|Vex128|EVex128|Space0F38|DstVVVV|No_bSuf|No_wSuf|No_sSuf|NF, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
+blsmsk, 0xf3/2, APX_F(BMI), Modrm|CheckOperandSize|Vex128|EVex128|Space0F38|DstVVVV|No_bSuf|No_wSuf|No_sSuf|NF, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
+blsr, 0xf3/1, APX_F(BMI), Modrm|CheckOperandSize|Vex128|EVex128|Space0F38|DstVVVV|No_bSuf|No_wSuf|No_sSuf|NF, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
 tzcnt, 0xf30fbc, BMI, Modrm|CheckOperandSize|No_bSuf|No_sSuf, { Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
 tzcnt, 0xf4, BMI&APX_F, Modrm|CheckOperandSize|No_bSuf|No_sSuf|EVexMap4|NF, { Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
 
 // TBM instructions
 
 bextr, 0x10, TBM, Modrm|CheckOperandSize|Vex128|SpaceXOP0A|No_bSuf|No_wSuf|No_sSuf, { Imm32|Imm32S, Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
-blcfill, 0x01/1, TBM, Modrm|CheckOperandSize|Vex128|SpaceXOP09|Src1VVVV|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
-blci, 0x02/6, TBM, Modrm|CheckOperandSize|Vex128|SpaceXOP09|Src1VVVV|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
-blcic, 0x01/5, TBM, Modrm|CheckOperandSize|Vex128|SpaceXOP09|Src1VVVV|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
-blcmsk, 0x02/1, TBM, Modrm|CheckOperandSize|Vex128|SpaceXOP09|Src1VVVV|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
-blcs, 0x01/3, TBM, Modrm|CheckOperandSize|Vex128|SpaceXOP09|Src1VVVV|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
-blsfill, 0x01/2, TBM, Modrm|CheckOperandSize|Vex128|SpaceXOP09|Src1VVVV|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
-blsic, 0x01/6, TBM, Modrm|CheckOperandSize|Vex128|SpaceXOP09|Src1VVVV|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
-t1mskc, 0x01/7, TBM, Modrm|CheckOperandSize|Vex128|SpaceXOP09|Src1VVVV|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
-tzmsk, 0x01/4, TBM, Modrm|CheckOperandSize|Vex128|SpaceXOP09|Src1VVVV|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
+blcfill, 0x01/1, TBM, Modrm|CheckOperandSize|Vex128|SpaceXOP09|DstVVVV|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
+blci, 0x02/6, TBM, Modrm|CheckOperandSize|Vex128|SpaceXOP09|DstVVVV|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
+blcic, 0x01/5, TBM, Modrm|CheckOperandSize|Vex128|SpaceXOP09|DstVVVV|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
+blcmsk, 0x02/1, TBM, Modrm|CheckOperandSize|Vex128|SpaceXOP09|DstVVVV|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
+blcs, 0x01/3, TBM, Modrm|CheckOperandSize|Vex128|SpaceXOP09|DstVVVV|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
+blsfill, 0x01/2, TBM, Modrm|CheckOperandSize|Vex128|SpaceXOP09|DstVVVV|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
+blsic, 0x01/6, TBM, Modrm|CheckOperandSize|Vex128|SpaceXOP09|DstVVVV|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
+t1mskc, 0x01/7, TBM, Modrm|CheckOperandSize|Vex128|SpaceXOP09|DstVVVV|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
+tzmsk, 0x01/4, TBM, Modrm|CheckOperandSize|Vex128|SpaceXOP09|DstVVVV|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
 
 // AMD 3DNow! instructions.
 
@@ -2441,17 +2441,17 @@ vpmovzxwd, 0x6633, AVX512F, Modrm|EVex=1|Masking|Space0F38|VexWIG|Disp8MemShift=
 vpmovsxwq, 0x6624, AVX512F, Modrm|EVex=1|Masking|Space0F38|VexWIG|Disp8MemShift=4|NoSuf, { RegXMM|Unspecified|BaseIndex, RegZMM }
 vpmovzxwq, 0x6634, AVX512F, Modrm|EVex=1|Masking|Space0F38|VexWIG|Disp8MemShift=4|NoSuf, { RegXMM|Unspecified|BaseIndex, RegZMM }
 
-vprol<dq>, 0x6672/1, AVX512F, Modrm|Masking|Space0F|Src1VVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8|Imm8S, RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
-vpror<dq>, 0x6672/0, AVX512F, Modrm|Masking|Space0F|Src1VVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8|Imm8S, RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
+vprol<dq>, 0x6672/1, AVX512F, Modrm|Masking|Space0F|DstVVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8|Imm8S, RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
+vpror<dq>, 0x6672/0, AVX512F, Modrm|Masking|Space0F|DstVVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8|Imm8S, RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
 
 vpshufd, 0x6670, AVX512F, Modrm|Masking|Space0F|VexW=1|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8|Imm8S, RegXMM|RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
 
 vpsll<dq>, 0x66f2 | <dq:opc>, AVX512F, Modrm|Masking|Space0F|Src1VVVV|<dq:vexw>|Disp8MemShift=4|CheckOperandSize|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpsll<dq>, 0x6672 | <dq:opc>/6, AVX512F, Modrm|Masking|Space0F|Src1VVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
+vpsll<dq>, 0x6672 | <dq:opc>/6, AVX512F, Modrm|Masking|Space0F|DstVVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
 vpsra<dq>, 0x66e2, AVX512F, Modrm|Masking|Space0F|Src1VVVV|<dq:vexw>|Disp8MemShift=4|CheckOperandSize|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpsra<dq>, 0x6672/4, AVX512F, Modrm|Masking|Space0F|Src1VVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
+vpsra<dq>, 0x6672/4, AVX512F, Modrm|Masking|Space0F|DstVVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
 vpsrl<dq>, 0x66d2 | <dq:opc>, AVX512F, Modrm|Masking|Space0F|Src1VVVV|<dq:vexw>|Disp8MemShift=4|CheckOperandSize|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpsrl<dq>, 0x6672 | <dq:opc>/2, AVX512F, Modrm|Masking|Space0F|Src1VVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
+vpsrl<dq>, 0x6672 | <dq:opc>/2, AVX512F, Modrm|Masking|Space0F|DstVVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
 
 vrcp14p<sd>, 0x664C, AVX512F, Modrm|Masking|Space0F38|<sd:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|<sd:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
 vrcp14s<sd>, 0x664D, AVX512F, Modrm|EVexLIG|Masking|Space0F38|Src1VVVV|<sd:vexw>|Disp8MemShift|NoSuf, { RegXMM|<sd:elem>|Unspecified|BaseIndex, RegXMM, RegXMM }
@@ -2691,11 +2691,11 @@ vpminsw, 0x66EA, AVX512BW, Modrm|Masking|Space0F|VexWIG|Src1VVVV|Disp8ShiftVL|Ch
 vpmulhuw, 0x66E4, AVX512BW, Modrm|Masking|Space0F|VexWIG|Src1VVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
 vpmulhw, 0x66E5, AVX512BW, Modrm|Masking|Space0F|VexWIG|Src1VVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
 vpmullw, 0x66D5, AVX512BW, Modrm|Masking|Space0F|VexWIG|Src1VVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpsllw, 0x6671/6, AVX512BW, Modrm|Masking|Space0F|VexWIG|Src1VVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
+vpsllw, 0x6671/6, AVX512BW, Modrm|Masking|Space0F|VexWIG|DstVVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
 vpsllw, 0x66F1, AVX512BW, Modrm|Masking|Space0F|VexWIG|Src1VVVV|Disp8MemShift=4|CheckOperandSize|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpsraw, 0x6671/4, AVX512BW, Modrm|Masking|Space0F|VexWIG|Src1VVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
+vpsraw, 0x6671/4, AVX512BW, Modrm|Masking|Space0F|VexWIG|DstVVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
 vpsraw, 0x66E1, AVX512BW, Modrm|Masking|Space0F|VexWIG|Src1VVVV|Disp8MemShift=4|CheckOperandSize|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpsrlw, 0x6671/2, AVX512BW, Modrm|Masking|Space0F|VexWIG|Src1VVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
+vpsrlw, 0x6671/2, AVX512BW, Modrm|Masking|Space0F|VexWIG|DstVVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
 vpsrlw, 0x66D1, AVX512BW, Modrm|Masking|Space0F|VexWIG|Src1VVVV|Disp8MemShift=4|CheckOperandSize|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
 vpunpckhwd, 0x6669, AVX512BW, Modrm|Masking|Space0F|VexWIG|Src1VVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
 vpunpcklwd, 0x6661, AVX512BW, Modrm|Masking|Space0F|VexWIG|Src1VVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
@@ -2720,8 +2720,8 @@ vpcmpu<bw>, 0x663e, AVX512BW, Modrm|Masking|Space0F3A|Src1VVVV|<bw:vexw>|Disp8Sh
 vpcmp<irel><bw>, 0x663f/<irel:imm>, AVX512BW, Modrm|Masking|Space0F3A|Src1VVVV|<bw:vexw>|Disp8ShiftVL|CheckOperandSize|NoSuf|ImmExt, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegMask }
 vpcmp<irel>u<bw>, 0x663e/<irel:imm>, AVX512BW, Modrm|Masking|Space0F3A|Src1VVVV|<bw:vexw>|Disp8ShiftVL|CheckOperandSize|NoSuf|ImmExt, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegMask }
 
-vpslldq, 0x6673/7, AVX512BW, Modrm|Space0F|VexWIG|Src1VVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
-vpsrldq, 0x6673/3, AVX512BW, Modrm|Space0F|VexWIG|Src1VVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
+vpslldq, 0x6673/7, AVX512BW, Modrm|Space0F|VexWIG|DstVVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
+vpsrldq, 0x6673/3, AVX512BW, Modrm|Space0F|VexWIG|DstVVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
 
 vpextrw, 0x66C5, AVX512BW, Load|Modrm|EVex128|Space0F|VexWIG|NoSuf, { Imm8, RegXMM, Reg32|Reg64 }
 vpextr<bw>, 0x6614 | <bw:opc>, AVX512BW, RegMem|EVex128|Space0F3A|VexWIG|NoSuf, { Imm8, RegXMM, Reg32|Reg64 }
@@ -3427,9 +3427,9 @@ uwrmsr, 0xf3f8/0, APX_F(USER_MSR), Modrm|Vex128|VexMap7|EVex128|VexW0|NoSuf, { I
 
 // APX Push2/Pop2 instructions.
 
-push2, 0xff/6, APX_F, Modrm|VexW0|EVexMap4|Src1VVVV|ImplicitStackOp|No_bSuf|No_wSuf|No_lSuf|No_sSuf, { Reg64, Reg64 }
-push2p, 0xff/6, APX_F, Modrm|VexW1|EVexMap4|Src1VVVV|ImplicitStackOp|No_bSuf|No_wSuf|No_lSuf|No_sSuf, { Reg64, Reg64 }
-pop2, 0x8f/0, APX_F, Modrm|VexW0|EVexMap4|Src1VVVV|ImplicitStackOp|No_bSuf|No_wSuf|No_lSuf|No_sSuf, { Reg64, Reg64 }
-pop2p, 0x8f/0, APX_F, Modrm|VexW1|EVexMap4|Src1VVVV|ImplicitStackOp|No_bSuf|No_wSuf|No_lSuf|No_sSuf, { Reg64, Reg64 }
+push2, 0xff/6, APX_F, Modrm|VexW0|EVexMap4|DstVVVV|ImplicitStackOp|No_bSuf|No_wSuf|No_lSuf|No_sSuf, { Reg64, Reg64 }
+push2p, 0xff/6, APX_F, Modrm|VexW1|EVexMap4|DstVVVV|ImplicitStackOp|No_bSuf|No_wSuf|No_lSuf|No_sSuf, { Reg64, Reg64 }
+pop2, 0x8f/0, APX_F, Modrm|VexW0|EVexMap4|DstVVVV|ImplicitStackOp|No_bSuf|No_wSuf|No_lSuf|No_sSuf, { Reg64, Reg64 }
+pop2p, 0x8f/0, APX_F, Modrm|VexW1|EVexMap4|DstVVVV|ImplicitStackOp|No_bSuf|No_wSuf|No_lSuf|No_sSuf, { Reg64, Reg64 }
 
 // APX Push2/Pop2 instructions end.
-- 
2.34.1


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 1/3] x86: Use vexvvvv to encode the vvvv register
  2024-04-24  7:23 ` [PATCH 1/3] x86: Use vexvvvv to encode " Cui, Lili
@ 2024-04-24  7:52   ` Jan Beulich
  2024-04-25 13:14     ` Cui, Lili
  0 siblings, 1 reply; 24+ messages in thread
From: Jan Beulich @ 2024-04-24  7:52 UTC (permalink / raw)
  To: Cui, Lili; +Cc: hjl.tools, binutils

On 24.04.2024 09:23, Cui, Lili wrote:
> gas/ChangeLog:
> 
>         * config/tc-i386.c (optimize_encoding): Replaced 1 with VexVVVV_SRC1.
>         (build_modrm_byte): Used vexvvvv to encode the vvvv register.
>         (s_insn): Replaced 1 with VexVVVV_SRC1.
> 
> opcodes/ChangeLog:
> 
>         * i386-opc.h (VexVVVV_DST): Adjusted the value.
>         (VexVVVV_SRC1): New.
>         * i386-opc.tbl: Replaced VexVVVV with VexVVVV_SRC1.

Not quite: You replace by Src1VVVV now.

> --- a/gas/config/tc-i386.c
> +++ b/gas/config/tc-i386.c
> @@ -5045,7 +5045,7 @@ optimize_encoding (void)
>         */
>        i.tm.opcode_space = SPACE_0F;
>        i.tm.base_opcode = 0x6c;
> -      i.tm.opcode_modifier.vexvvvv = 1;
> +      i.tm.opcode_modifier.vexvvvv = VexVVVV_SRC1;
>  
>        ++i.operands;
>        ++i.reg_operands;
> @@ -10432,19 +10432,19 @@ build_modrm_byte (void)
>  				     || i.encoding == encoding_evex));
>      }
>  
> -  if (i.tm.opcode_modifier.vexvvvv == VexVVVV_DST)
> +  switch (i.tm.opcode_modifier.vexvvvv)
>      {
> -      v = dest;
> -      dest-- ;
> -    }
> -  else
> -    {
> -      for (v = source + 1; v < dest; ++v)
> -	if (v != reg_slot)
> -	  break;
> -      if (v >= dest)
> -	v = ~0;

Replacing this by ...

> -    }
> +    case VexVVVV_SRC1:
> +      v =  dest - 1;
> +      break;

... just this could do with a word of explanation in the (sadly once
again empty) description. While I'm sure this tests out okay for you, it's
not immediately clear why the loop can be replaced this easily. Whereas by
the end of the series (with SwapSources dropped) this simplification is
pretty obvious.

As to the deecription once again being empty: This is even more of an
issue considering that the title suggests the patch isn't doing anything.
After all we already use vexvvvv for the stated purpose.

Finally with the reg_slot use gone here (assuming, as per above, that
this is actually correct at this point), that variable wants to move into
the more narrow scope it's now solely used in, and it wants to have its
initializer dropped.

> @@ -10462,10 +10462,7 @@ build_modrm_byte (void)
>      }
>  
>    if (v < MAX_OPERANDS)
> -    {
> -      gas_assert (i.tm.opcode_modifier.vexvvvv);
> -      i.vex.register_specifier = i.op[v].regs;
> -    }
> +    i.vex.register_specifier = i.op[v].regs;

I have to admit I'm not entirely convinced of it being a good idea to drop
the assertion. At least as long as the SWAP_SOURCES logic is still there.

> @@ -1774,63 +1776,63 @@ vbroadcasti128, 0x665A, AVX2, Modrm|Vex256|Space0F38|VexW0|NoSuf, { Xmmword|Unsp
>  vbroadcasti128, 0x665a, APX_F&AVX512VL, Modrm|EVex256|Space0F38|VexW0|Disp8MemShift=4|NoSuf, { Xmmword|Unspecified|BaseIndex, RegYMM }
>  vbroadcastsd, 0x6619, AVX2, Modrm|Vex=2|Space0F38|VexW=1|NoSuf, { RegXMM, RegYMM }
>  vbroadcastss, 0x6618, AVX2|AVX512F, Modrm|Vex|EVexDYN|Masking|Space0F38|VexW0|Disp8MemShift=2|NoSuf, { RegXMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
> -vpblendd, 0x6602, AVX2, Modrm|Vex|Space0F3A|VexVVVV|VexW0|CheckOperandSize|NoSuf, { Imm8|Imm8S, Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
> +vpblendd, 0x6602, AVX2, Modrm|Vex|Space0F3A|Src1VVVV|VexW0|CheckOperandSize|NoSuf, { Imm8|Imm8S, Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
>  vpbroadcast<bw>, 0x6678 | <bw:opc>, AVX2, Modrm|Vex|Space0F38|VexW0|NoSuf, { <bw:elem>|Unspecified|BaseIndex|RegXMM, RegXMM|RegYMM }
>  vpbroadcastd, 0x6658, AVX2|AVX512F, Modrm|Vex|EVexDYN|Masking|Space0F38|VexW0|Disp8MemShift|NoSuf, { RegXMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
>  vpbroadcastq, 0x6659, AVX2, Modrm|Vex|Space0F38|VexW0|NoSuf|Optimize, { RegXMM|Qword|Unspecified|BaseIndex, RegXMM|RegYMM }
> -vperm2i128, 0x6646, AVX2, Modrm|Vex=2|Space0F3A|VexVVVV|VexW0|NoSuf, { Imm8|Imm8S, Unspecified|BaseIndex|RegYMM, RegYMM, RegYMM }
> -vpermd, 0x6636, AVX2|AVX512F, Modrm|Vex256|EVexDYN|Masking|Space0F38|VexVVVV|VexW0|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegYMM|RegZMM, RegYMM|RegZMM }
> +vperm2i128, 0x6646, AVX2, Modrm|Vex=2|Space0F3A|Src1VVVV|VexW0|NoSuf, { Imm8|Imm8S, Unspecified|BaseIndex|RegYMM, RegYMM, RegYMM }
> +vpermd, 0x6636, AVX2|AVX512F, Modrm|Vex256|EVexDYN|Masking|Space0F38|Src1VVVV|VexW0|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegYMM|RegZMM, RegYMM|RegZMM }
>  vpermpd, 0x6601, AVX2|AVX512F, Modrm|Vex256|EVexDYN|Masking|Space0F3A|VexW1|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8|Imm8S, RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegYMM|RegZMM }
> -vpermps, 0x6616, AVX2|AVX512F, Modrm|Vex256|EVexDYN|Masking|Space0F38|VexVVVV|VexW0|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegYMM|RegZMM, RegYMM|RegZMM }
> +vpermps, 0x6616, AVX2|AVX512F, Modrm|Vex256|EVexDYN|Masking|Space0F38|Src1VVVV|VexW0|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegYMM|RegZMM, RegYMM|RegZMM }
>  vpermq, 0x6600, AVX2|AVX512F, Modrm|Vex256|EVexDYN|Masking|Space0F3A|VexW1|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8|Imm8S, RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegYMM|RegZMM }
>  vextracti128, 0x6639, AVX2, Modrm|Vex256|Space0F3A|VexW0|NoSuf, { Imm8, RegYMM, Unspecified|BaseIndex|RegXMM }
>  // vextracti32x4 in disguise (see vround{p,s}{s,d} comment)
>  vextracti128, 0x6639, APX_F&AVX512VL, Modrm|EVex256|Space0F3A|VexW0|Disp8MemShift=4|NoSuf, { Imm8, RegYMM, Xmmword|Unspecified|BaseIndex }
> -vinserti128, 0x6638, AVX2, Modrm|Vex256|Space0F3A|VexVVVV|VexW0|NoSuf, { Imm8, Unspecified|BaseIndex|RegXMM, RegYMM, RegYMM }
> +vinserti128, 0x6638, AVX2, Modrm|Vex256|Space0F3A|Src1VVVV|VexW0|NoSuf, { Imm8, Unspecified|BaseIndex|RegXMM, RegYMM, RegYMM }
>  // vinserti32x4 in disguise (see vround{p,s}{s,d} comment)
> -vinserti128, 0x6638, APX_F&AVX512VL, Modrm|EVex256|Space0F3A|VexVVVV|VexW0|Disp8MemShift=4|NoSuf, { Imm8, Xmmword|Unspecified|BaseIndex, RegYMM, RegYMM }
> -vpmaskmov<dq>, 0x668e, AVX2, Modrm|Vex|Space0F38|VexVVVV|<dq:vexw>|CheckOperandSize|NoSuf, { RegXMM|RegYMM, RegXMM|RegYMM, Xmmword|Ymmword|Unspecified|BaseIndex }
> -vpmaskmov<dq>, 0x668c, AVX2, Modrm|Vex|Space0F38|VexVVVV|<dq:vexw>|CheckOperandSize|NoSuf, { Xmmword|Ymmword|Unspecified|BaseIndex, RegXMM|RegYMM, RegXMM|RegYMM }
> -vpsllv<dq>, 0x6647, AVX2|AVX512F, Modrm|Vex|EVexDYN|Masking|Space0F38|VexVVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
> -vpsravd, 0x6646, AVX2|AVX512F, Modrm|Vex|EVexDYN|Masking|Space0F38|VexVVVV|VexW0|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
> -vpsrlv<dq>, 0x6645, AVX2|AVX512F, Modrm|Vex|EVexDYN|Masking|Space0F38|VexVVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
> +vinserti128, 0x6638, APX_F&AVX512VL, Modrm|EVex256|Space0F3A|Src1VVVV|VexW0|Disp8MemShift=4|NoSuf, { Imm8, Xmmword|Unspecified|BaseIndex, RegYMM, RegYMM }
> +vpmaskmov<dq>, 0x668e, AVX2, Modrm|Vex|Space0F38|Src1VVVV|<dq:vexw>|CheckOperandSize|NoSuf, { RegXMM|RegYMM, RegXMM|RegYMM, Xmmword|Ymmword|Unspecified|BaseIndex }
> +vpmaskmov<dq>, 0x668c, AVX2, Modrm|Vex|Space0F38|Src1VVVV|<dq:vexw>|CheckOperandSize|NoSuf, { Xmmword|Ymmword|Unspecified|BaseIndex, RegXMM|RegYMM, RegXMM|RegYMM }
> +vpsllv<dq>, 0x6647, AVX2|AVX512F, Modrm|Vex|EVexDYN|Masking|Space0F38|Src1VVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
> +vpsravd, 0x6646, AVX2|AVX512F, Modrm|Vex|EVexDYN|Masking|Space0F38|Src1VVVV|VexW0|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
> +vpsrlv<dq>, 0x6645, AVX2|AVX512F, Modrm|Vex|EVexDYN|Masking|Space0F38|Src1VVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
>  
>  // AVX gather instructions
> -vgatherdpd, 0x6692, AVX2, Modrm|Vex|Space0F38|VexVVVV|VexW1|SwapSources|CheckOperandSize|NoSuf|VecSIB128, { RegXMM|RegYMM, Qword|Unspecified|BaseIndex, RegXMM|RegYMM }
> -vgatherdps, 0x6692, AVX2, Modrm|Vex128|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf|VecSIB128, { RegXMM, Dword|Unspecified|BaseIndex, RegXMM }
> -vgatherdps, 0x6692, AVX2, Modrm|Vex256|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf|VecSIB256, { RegYMM, Dword|Unspecified|BaseIndex, RegYMM }
> -vgatherqp<sd>, 0x6693, AVX2, Modrm|Vex|Space0F38|VexVVVV|<sd:vexw>|SwapSources|NoSuf|VecSIB128, { RegXMM, <sd:elem>|Unspecified|BaseIndex, RegXMM }
> -vgatherqpd, 0x6693, AVX2, Modrm|Vex256|Space0F38|VexVVVV|VexW1|SwapSources|NoSuf|VecSIB256, { RegYMM, Qword|Unspecified|BaseIndex, RegYMM }
> -vgatherqps, 0x6693, AVX2, Modrm|Vex256|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf|VecSIB256, { RegXMM, Dword|Unspecified|BaseIndex, RegXMM }
> -vpgatherdd, 0x6690, AVX2, Modrm|Vex128|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf|VecSIB128, { RegXMM, Dword|Unspecified|BaseIndex, RegXMM }
> -vpgatherdd, 0x6690, AVX2, Modrm|Vex256|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf|VecSIB256, { RegYMM, Dword|Unspecified|BaseIndex, RegYMM }
> -vpgatherdq, 0x6690, AVX2, Modrm|Vex|Space0F38|VexVVVV|VexW1|SwapSources|CheckOperandSize|NoSuf|VecSIB128, { RegXMM|RegYMM, Qword|Unspecified|BaseIndex, RegXMM|RegYMM }
> -vpgatherq<dq>, 0x6691, AVX2, Modrm|Vex128|Space0F38|VexVVVV|<dq:vexw>|SwapSources|NoSuf|VecSIB128, { RegXMM, <dq:elem>|Unspecified|BaseIndex, RegXMM }
> -vpgatherqd, 0x6691, AVX2, Modrm|Vex256|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf|VecSIB256, { RegXMM, Dword|Unspecified|BaseIndex, RegXMM }
> -vpgatherqq, 0x6691, AVX2, Modrm|Vex256|Space0F38|VexVVVV|VexW1|SwapSources|NoSuf|VecSIB256, { RegYMM, Qword|Unspecified|BaseIndex, RegYMM }
> +vgatherdpd, 0x6692, AVX2, Modrm|Vex|Space0F38|Src1VVVV|VexW1|SwapSources|CheckOperandSize|NoSuf|VecSIB128, { RegXMM|RegYMM, Qword|Unspecified|BaseIndex, RegXMM|RegYMM }
> +vgatherdps, 0x6692, AVX2, Modrm|Vex128|Space0F38|Src1VVVV|VexW0|SwapSources|NoSuf|VecSIB128, { RegXMM, Dword|Unspecified|BaseIndex, RegXMM }
> +vgatherdps, 0x6692, AVX2, Modrm|Vex256|Space0F38|Src1VVVV|VexW0|SwapSources|NoSuf|VecSIB256, { RegYMM, Dword|Unspecified|BaseIndex, RegYMM }
> +vgatherqp<sd>, 0x6693, AVX2, Modrm|Vex|Space0F38|Src1VVVV|<sd:vexw>|SwapSources|NoSuf|VecSIB128, { RegXMM, <sd:elem>|Unspecified|BaseIndex, RegXMM }
> +vgatherqpd, 0x6693, AVX2, Modrm|Vex256|Space0F38|Src1VVVV|VexW1|SwapSources|NoSuf|VecSIB256, { RegYMM, Qword|Unspecified|BaseIndex, RegYMM }
> +vgatherqps, 0x6693, AVX2, Modrm|Vex256|Space0F38|Src1VVVV|VexW0|SwapSources|NoSuf|VecSIB256, { RegXMM, Dword|Unspecified|BaseIndex, RegXMM }
> +vpgatherdd, 0x6690, AVX2, Modrm|Vex128|Space0F38|Src1VVVV|VexW0|SwapSources|NoSuf|VecSIB128, { RegXMM, Dword|Unspecified|BaseIndex, RegXMM }
> +vpgatherdd, 0x6690, AVX2, Modrm|Vex256|Space0F38|Src1VVVV|VexW0|SwapSources|NoSuf|VecSIB256, { RegYMM, Dword|Unspecified|BaseIndex, RegYMM }
> +vpgatherdq, 0x6690, AVX2, Modrm|Vex|Space0F38|Src1VVVV|VexW1|SwapSources|CheckOperandSize|NoSuf|VecSIB128, { RegXMM|RegYMM, Qword|Unspecified|BaseIndex, RegXMM|RegYMM }
> +vpgatherq<dq>, 0x6691, AVX2, Modrm|Vex128|Space0F38|Src1VVVV|<dq:vexw>|SwapSources|NoSuf|VecSIB128, { RegXMM, <dq:elem>|Unspecified|BaseIndex, RegXMM }
> +vpgatherqd, 0x6691, AVX2, Modrm|Vex256|Space0F38|Src1VVVV|VexW0|SwapSources|NoSuf|VecSIB256, { RegXMM, Dword|Unspecified|BaseIndex, RegXMM }
> +vpgatherqq, 0x6691, AVX2, Modrm|Vex256|Space0F38|Src1VVVV|VexW1|SwapSources|NoSuf|VecSIB256, { RegYMM, Qword|Unspecified|BaseIndex, RegYMM }

Anything using SwapSources wants leaving alone in this patch. There's no point
touching any of these twice (here and when subsequently you replace SwapSources).
That'll also help to limit patch size a little.

Jan

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/3] x86: Drop SwapSources
  2024-04-24  7:23 ` [PATCH 2/3] x86: Drop SwapSources Cui, Lili
@ 2024-04-24  8:07   ` Jan Beulich
  2024-04-26  8:14     ` Cui, Lili
  0 siblings, 1 reply; 24+ messages in thread
From: Jan Beulich @ 2024-04-24  8:07 UTC (permalink / raw)
  To: Cui, Lili; +Cc: hjl.tools, binutils

On 24.04.2024 09:23, Cui, Lili wrote:
> --- a/gas/config/tc-i386.c
> +++ b/gas/config/tc-i386.c
> @@ -10434,6 +10434,14 @@ build_modrm_byte (void)
>  
>    switch (i.tm.opcode_modifier.vexvvvv)
>      {
> +    case VexVVVV_SRC2:
> +      if (source != op)
> +	{
> +	  v = source++;
> +	  break;
> +	}
> +      /* For XOP: vpshl* and vpsha*.  */
> +      /* Fall through.  */
>      case VexVVVV_SRC1:

This falling-through is odd and hence needs a better comment (then also
covering vprot*, which afaict is similarly affected). The reason for this
is the XOP.W-controlled operand swapping, if I'm not mistaken? In which
case perhaps instead of the fall-through here the logic swapping the
operands should replace VexVVVV_SRC2 by VexVVVV_SRC1?

Jan

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 3/3] x86: Drop using extension_opcode to encode vvvv register
  2024-04-24  7:23 ` [PATCH 3/3] x86: Drop using extension_opcode to encode vvvv register Cui, Lili
@ 2024-04-24  8:19   ` Jan Beulich
  2024-04-24  8:27     ` Jan Beulich
  0 siblings, 1 reply; 24+ messages in thread
From: Jan Beulich @ 2024-04-24  8:19 UTC (permalink / raw)
  To: Cui, Lili; +Cc: hjl.tools, binutils

On 24.04.2024 09:23, Cui, Lili wrote:
> @@ -3427,9 +3427,9 @@ uwrmsr, 0xf3f8/0, APX_F(USER_MSR), Modrm|Vex128|VexMap7|EVex128|VexW0|NoSuf, { I
>  
>  // APX Push2/Pop2 instructions.
>  
> -push2, 0xff/6, APX_F, Modrm|VexW0|EVexMap4|Src1VVVV|ImplicitStackOp|No_bSuf|No_wSuf|No_lSuf|No_sSuf, { Reg64, Reg64 }
> -push2p, 0xff/6, APX_F, Modrm|VexW1|EVexMap4|Src1VVVV|ImplicitStackOp|No_bSuf|No_wSuf|No_lSuf|No_sSuf, { Reg64, Reg64 }
> -pop2, 0x8f/0, APX_F, Modrm|VexW0|EVexMap4|Src1VVVV|ImplicitStackOp|No_bSuf|No_wSuf|No_lSuf|No_sSuf, { Reg64, Reg64 }
> -pop2p, 0x8f/0, APX_F, Modrm|VexW1|EVexMap4|Src1VVVV|ImplicitStackOp|No_bSuf|No_wSuf|No_lSuf|No_sSuf, { Reg64, Reg64 }
> +push2, 0xff/6, APX_F, Modrm|VexW0|EVexMap4|DstVVVV|ImplicitStackOp|No_bSuf|No_wSuf|No_lSuf|No_sSuf, { Reg64, Reg64 }
> +push2p, 0xff/6, APX_F, Modrm|VexW1|EVexMap4|DstVVVV|ImplicitStackOp|No_bSuf|No_wSuf|No_lSuf|No_sSuf, { Reg64, Reg64 }
> +pop2, 0x8f/0, APX_F, Modrm|VexW0|EVexMap4|DstVVVV|ImplicitStackOp|No_bSuf|No_wSuf|No_lSuf|No_sSuf, { Reg64, Reg64 }
> +pop2p, 0x8f/0, APX_F, Modrm|VexW1|EVexMap4|DstVVVV|ImplicitStackOp|No_bSuf|No_wSuf|No_lSuf|No_sSuf, { Reg64, Reg64 }
>  
>  // APX Push2/Pop2 instructions end.

The latest now is the point where a comment wants adding here, owing to
the fact that PUSH2 has no destination operands, while POP2 has no
source ones. Following the common pattern "mnem %<src>,%<dst>" DstVVVV is
indeed appropriate here, but the anomaly for PUSH2 imo wants clarifying.
Okay with such a comment added.

Jan

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 3/3] x86: Drop using extension_opcode to encode vvvv register
  2024-04-24  8:19   ` Jan Beulich
@ 2024-04-24  8:27     ` Jan Beulich
  0 siblings, 0 replies; 24+ messages in thread
From: Jan Beulich @ 2024-04-24  8:27 UTC (permalink / raw)
  To: Cui, Lili; +Cc: hjl.tools, binutils

On 24.04.2024 10:19, Jan Beulich wrote:
> On 24.04.2024 09:23, Cui, Lili wrote:
>> @@ -3427,9 +3427,9 @@ uwrmsr, 0xf3f8/0, APX_F(USER_MSR), Modrm|Vex128|VexMap7|EVex128|VexW0|NoSuf, { I
>>  
>>  // APX Push2/Pop2 instructions.
>>  
>> -push2, 0xff/6, APX_F, Modrm|VexW0|EVexMap4|Src1VVVV|ImplicitStackOp|No_bSuf|No_wSuf|No_lSuf|No_sSuf, { Reg64, Reg64 }
>> -push2p, 0xff/6, APX_F, Modrm|VexW1|EVexMap4|Src1VVVV|ImplicitStackOp|No_bSuf|No_wSuf|No_lSuf|No_sSuf, { Reg64, Reg64 }
>> -pop2, 0x8f/0, APX_F, Modrm|VexW0|EVexMap4|Src1VVVV|ImplicitStackOp|No_bSuf|No_wSuf|No_lSuf|No_sSuf, { Reg64, Reg64 }
>> -pop2p, 0x8f/0, APX_F, Modrm|VexW1|EVexMap4|Src1VVVV|ImplicitStackOp|No_bSuf|No_wSuf|No_lSuf|No_sSuf, { Reg64, Reg64 }
>> +push2, 0xff/6, APX_F, Modrm|VexW0|EVexMap4|DstVVVV|ImplicitStackOp|No_bSuf|No_wSuf|No_lSuf|No_sSuf, { Reg64, Reg64 }
>> +push2p, 0xff/6, APX_F, Modrm|VexW1|EVexMap4|DstVVVV|ImplicitStackOp|No_bSuf|No_wSuf|No_lSuf|No_sSuf, { Reg64, Reg64 }
>> +pop2, 0x8f/0, APX_F, Modrm|VexW0|EVexMap4|DstVVVV|ImplicitStackOp|No_bSuf|No_wSuf|No_lSuf|No_sSuf, { Reg64, Reg64 }
>> +pop2p, 0x8f/0, APX_F, Modrm|VexW1|EVexMap4|DstVVVV|ImplicitStackOp|No_bSuf|No_wSuf|No_lSuf|No_sSuf, { Reg64, Reg64 }
>>  
>>  // APX Push2/Pop2 instructions end.
> 
> The latest now is the point where a comment wants adding here, owing to
> the fact that PUSH2 has no destination operands, while POP2 has no
> source ones. Following the common pattern "mnem %<src>,%<dst>" DstVVVV is
> indeed appropriate here, but the anomaly for PUSH2 imo wants clarifying.
> Okay with such a comment added.

And then everything touched here also wants leaving alone in patch 1.
There's no reason to touch stuff twice when once suffices. Plus it
eliminates the need to add respective comment for POP2 there, which
you'd then replace by said PUSH2-related comment here.

Jan

^ permalink raw reply	[flat|nested] 24+ messages in thread

* RE: [PATCH 1/3] x86: Use vexvvvv to encode the vvvv register
  2024-04-24  7:52   ` Jan Beulich
@ 2024-04-25 13:14     ` Cui, Lili
  2024-04-25 13:22       ` Jan Beulich
  0 siblings, 1 reply; 24+ messages in thread
From: Cui, Lili @ 2024-04-25 13:14 UTC (permalink / raw)
  To: Beulich, Jan; +Cc: hjl.tools, binutils

> On 24.04.2024 09:23, Cui, Lili wrote:
> > gas/ChangeLog:
> >
> >         * config/tc-i386.c (optimize_encoding): Replaced 1 with VexVVVV_SRC1.
> >         (build_modrm_byte): Used vexvvvv to encode the vvvv register.
> >         (s_insn): Replaced 1 with VexVVVV_SRC1.
> >
> > opcodes/ChangeLog:
> >
> >         * i386-opc.h (VexVVVV_DST): Adjusted the value.
> >         (VexVVVV_SRC1): New.
> >         * i386-opc.tbl: Replaced VexVVVV with VexVVVV_SRC1.
> 
> Not quite: You replace by Src1VVVV now.
> 

OK.

> > --- a/gas/config/tc-i386.c
> > +++ b/gas/config/tc-i386.c
> > @@ -5045,7 +5045,7 @@ optimize_encoding (void)
> >         */
> >        i.tm.opcode_space = SPACE_0F;
> >        i.tm.base_opcode = 0x6c;
> > -      i.tm.opcode_modifier.vexvvvv = 1;
> > +      i.tm.opcode_modifier.vexvvvv = VexVVVV_SRC1;
> >
> >        ++i.operands;
> >        ++i.reg_operands;
> > @@ -10432,19 +10432,19 @@ build_modrm_byte (void)
> >  				     || i.encoding == encoding_evex));
> >      }
> >
> > -  if (i.tm.opcode_modifier.vexvvvv == VexVVVV_DST)
> > +  switch (i.tm.opcode_modifier.vexvvvv)
> >      {
> > -      v = dest;
> > -      dest-- ;
> > -    }
> > -  else
> > -    {
> > -      for (v = source + 1; v < dest; ++v)
> > -	if (v != reg_slot)
> > -	  break;
> > -      if (v >= dest)
> > -	v = ~0;
> 
> Replacing this by ...
> 
> > -    }
> > +    case VexVVVV_SRC1:
> > +      v =  dest - 1;
> > +      break;
> 
> ... just this could do with a word of explanation in the (sadly once again empty)
> description. While I'm sure this tests out okay for you, it's not immediately clear
> why the loop can be replaced this easily. Whereas by the end of the series (with
> SwapSources dropped) this simplification is pretty obvious.
> 

SDM said "VEX.vvvv encodes the first source register operand". It should be "dest -1". The old logic did not check vexvvvv first, which made the logic here very complicated. I'll add some comments here.

> As to the deecription once again being empty: This is even more of an issue
> considering that the title suggests the patch isn't doing anything.
> After all we already use vexvvvv for the stated purpose.
> 

The title is "Use vexvvvv to encode the vvvv register", which means that use vexvvvv for the stated purpose.

> Finally with the reg_slot use gone here (assuming, as per above, that this is
> actually correct at this point), that variable wants to move into the more
> narrow scope it's now solely used in, and it wants to have its initializer dropped.

Ok.

> > @@ -10462,10 +10462,7 @@ build_modrm_byte (void)
> >      }
> >
> >    if (v < MAX_OPERANDS)
> > -    {
> > -      gas_assert (i.tm.opcode_modifier.vexvvvv);
> > -      i.vex.register_specifier = i.op[v].regs;
> > -    }
> > +    i.vex.register_specifier = i.op[v].regs;
> 
> I have to admit I'm not entirely convinced of it being a good idea to drop the
> assertion. At least as long as the SWAP_SOURCES logic is still there.
> 

Ok, I will move it to the patch 2/3.

> >  vpgatherqq, 0x6691, AVX2, Modrm|Vex256|Space0F38|Src1VVVV|VexW1|SwapSources|NoSuf|VecSIB2
> 56, { RegYMM, Qword|Unspecified|BaseIndex, RegYMM }
> 
> Anything using SwapSources wants leaving alone in this patch. There's no point
> touching any of these twice (here and when subsequently you replace
> SwapSources).
> That'll also help to limit patch size a little.
> 

If we keep the old value VexVVVV here, we still need to deal with it in tc-i386.c and i386-opc.h, which is a bit strange.

Thanks,
Lili.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 1/3] x86: Use vexvvvv to encode the vvvv register
  2024-04-25 13:14     ` Cui, Lili
@ 2024-04-25 13:22       ` Jan Beulich
  2024-04-26  5:33         ` Cui, Lili
  0 siblings, 1 reply; 24+ messages in thread
From: Jan Beulich @ 2024-04-25 13:22 UTC (permalink / raw)
  To: Cui, Lili; +Cc: hjl.tools, binutils

On 25.04.2024 15:14, Cui, Lili wrote:
>> On 24.04.2024 09:23, Cui, Lili wrote:
>>> --- a/gas/config/tc-i386.c
>>> +++ b/gas/config/tc-i386.c
>>> @@ -5045,7 +5045,7 @@ optimize_encoding (void)
>>>         */
>>>        i.tm.opcode_space = SPACE_0F;
>>>        i.tm.base_opcode = 0x6c;
>>> -      i.tm.opcode_modifier.vexvvvv = 1;
>>> +      i.tm.opcode_modifier.vexvvvv = VexVVVV_SRC1;
>>>
>>>        ++i.operands;
>>>        ++i.reg_operands;
>>> @@ -10432,19 +10432,19 @@ build_modrm_byte (void)
>>>  				     || i.encoding == encoding_evex));
>>>      }
>>>
>>> -  if (i.tm.opcode_modifier.vexvvvv == VexVVVV_DST)
>>> +  switch (i.tm.opcode_modifier.vexvvvv)
>>>      {
>>> -      v = dest;
>>> -      dest-- ;
>>> -    }
>>> -  else
>>> -    {
>>> -      for (v = source + 1; v < dest; ++v)
>>> -	if (v != reg_slot)
>>> -	  break;
>>> -      if (v >= dest)
>>> -	v = ~0;
>>
>> Replacing this by ...
>>
>>> -    }
>>> +    case VexVVVV_SRC1:
>>> +      v =  dest - 1;
>>> +      break;
>>
>> ... just this could do with a word of explanation in the (sadly once again empty)
>> description. While I'm sure this tests out okay for you, it's not immediately clear
>> why the loop can be replaced this easily. Whereas by the end of the series (with
>> SwapSources dropped) this simplification is pretty obvious.
>>
> 
> SDM said "VEX.vvvv encodes the first source register operand". It should be "dest -1". The old logic did not check vexvvvv first, which made the logic here very complicated. I'll add some comments here.
> 
>> As to the deecription once again being empty: This is even more of an issue
>> considering that the title suggests the patch isn't doing anything.
>> After all we already use vexvvvv for the stated purpose.
> 
> The title is "Use vexvvvv to encode the vvvv register", which means that use vexvvvv for the stated purpose.

Well, yes and no: We did so before already, so the title suggests "no
change".

>>>  vpgatherqq, 0x6691, AVX2, Modrm|Vex256|Space0F38|Src1VVVV|VexW1|SwapSources|NoSuf|VecSIB2
>> 56, { RegYMM, Qword|Unspecified|BaseIndex, RegYMM }
>>
>> Anything using SwapSources wants leaving alone in this patch. There's no point
>> touching any of these twice (here and when subsequently you replace
>> SwapSources).
>> That'll also help to limit patch size a little.
>>
> 
> If we keep the old value VexVVVV here, we still need to deal with it in tc-i386.c and i386-opc.h, which is a bit strange.

VexVVVV without a value is the same as Src1VVVV, hence I don't think tc-i386.c
would be affected by leaving those in place.

Jan

^ permalink raw reply	[flat|nested] 24+ messages in thread

* RE: [PATCH 1/3] x86: Use vexvvvv to encode the vvvv register
  2024-04-25 13:22       ` Jan Beulich
@ 2024-04-26  5:33         ` Cui, Lili
  2024-04-26  6:52           ` Jan Beulich
  0 siblings, 1 reply; 24+ messages in thread
From: Cui, Lili @ 2024-04-26  5:33 UTC (permalink / raw)
  To: Beulich, Jan; +Cc: hjl.tools, binutils

> >>> --- a/gas/config/tc-i386.c
> >>> +++ b/gas/config/tc-i386.c
> >>> @@ -5045,7 +5045,7 @@ optimize_encoding (void)
> >>>         */
> >>>        i.tm.opcode_space = SPACE_0F;
> >>>        i.tm.base_opcode = 0x6c;
> >>> -      i.tm.opcode_modifier.vexvvvv = 1;
> >>> +      i.tm.opcode_modifier.vexvvvv = VexVVVV_SRC1;
> >>>
> >>>        ++i.operands;
> >>>        ++i.reg_operands;
> >>> @@ -10432,19 +10432,19 @@ build_modrm_byte (void)
> >>>  				     || i.encoding == encoding_evex));
> >>>      }
> >>>
> >>> -  if (i.tm.opcode_modifier.vexvvvv == VexVVVV_DST)
> >>> +  switch (i.tm.opcode_modifier.vexvvvv)
> >>>      {
> >>> -      v = dest;
> >>> -      dest-- ;
> >>> -    }
> >>> -  else
> >>> -    {
> >>> -      for (v = source + 1; v < dest; ++v)
> >>> -	if (v != reg_slot)
> >>> -	  break;
> >>> -      if (v >= dest)
> >>> -	v = ~0;
> >>
> >> Replacing this by ...
> >>
> >>> -    }
> >>> +    case VexVVVV_SRC1:
> >>> +      v =  dest - 1;
> >>> +      break;
> >>
> >> ... just this could do with a word of explanation in the (sadly once
> >> again empty) description. While I'm sure this tests out okay for you,
> >> it's not immediately clear why the loop can be replaced this easily.
> >> Whereas by the end of the series (with SwapSources dropped) this
> simplification is pretty obvious.
> >>
> >
> > SDM said "VEX.vvvv encodes the first source register operand". It should be
> "dest -1". The old logic did not check vexvvvv first, which made the logic here
> very complicated. I'll add some comments here.
> >
> >> As to the deecription once again being empty: This is even more of an
> >> issue considering that the title suggests the patch isn't doing anything.
> >> After all we already use vexvvvv for the stated purpose.
> >
> > The title is "Use vexvvvv to encode the vvvv register", which means that use
> vexvvvv for the stated purpose.
> 
> Well, yes and no: We did so before already, so the title suggests "no change".
> 

Do you want to split this patch 1/3 into two? Or move it to patch 2/3?

> >>>  vpgatherqq, 0x6691, AVX2,
> >>>
> Modrm|Vex256|Space0F38|Src1VVVV|VexW1|SwapSources|NoSuf|VecSIB2
> >> 56, { RegYMM, Qword|Unspecified|BaseIndex, RegYMM }
> >>
> >> Anything using SwapSources wants leaving alone in this patch. There's
> >> no point touching any of these twice (here and when subsequently you
> >> replace SwapSources).
> >> That'll also help to limit patch size a little.
> >>
> >
> > If we keep the old value VexVVVV here, we still need to deal with it in tc-
> i386.c and i386-opc.h, which is a bit strange.
> 
> VexVVVV without a value is the same as Src1VVVV, hence I don't think tc-
> i386.c would be affected by leaving those in place.
> 

OK.

Thanks,
Lili.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 1/3] x86: Use vexvvvv to encode the vvvv register
  2024-04-26  5:33         ` Cui, Lili
@ 2024-04-26  6:52           ` Jan Beulich
  0 siblings, 0 replies; 24+ messages in thread
From: Jan Beulich @ 2024-04-26  6:52 UTC (permalink / raw)
  To: Cui, Lili; +Cc: hjl.tools, binutils

On 26.04.2024 07:33, Cui, Lili wrote:
>>>>> --- a/gas/config/tc-i386.c
>>>>> +++ b/gas/config/tc-i386.c
>>>>> @@ -5045,7 +5045,7 @@ optimize_encoding (void)
>>>>>         */
>>>>>        i.tm.opcode_space = SPACE_0F;
>>>>>        i.tm.base_opcode = 0x6c;
>>>>> -      i.tm.opcode_modifier.vexvvvv = 1;
>>>>> +      i.tm.opcode_modifier.vexvvvv = VexVVVV_SRC1;
>>>>>
>>>>>        ++i.operands;
>>>>>        ++i.reg_operands;
>>>>> @@ -10432,19 +10432,19 @@ build_modrm_byte (void)
>>>>>  				     || i.encoding == encoding_evex));
>>>>>      }
>>>>>
>>>>> -  if (i.tm.opcode_modifier.vexvvvv == VexVVVV_DST)
>>>>> +  switch (i.tm.opcode_modifier.vexvvvv)
>>>>>      {
>>>>> -      v = dest;
>>>>> -      dest-- ;
>>>>> -    }
>>>>> -  else
>>>>> -    {
>>>>> -      for (v = source + 1; v < dest; ++v)
>>>>> -	if (v != reg_slot)
>>>>> -	  break;
>>>>> -      if (v >= dest)
>>>>> -	v = ~0;
>>>>
>>>> Replacing this by ...
>>>>
>>>>> -    }
>>>>> +    case VexVVVV_SRC1:
>>>>> +      v =  dest - 1;
>>>>> +      break;
>>>>
>>>> ... just this could do with a word of explanation in the (sadly once
>>>> again empty) description. While I'm sure this tests out okay for you,
>>>> it's not immediately clear why the loop can be replaced this easily.
>>>> Whereas by the end of the series (with SwapSources dropped) this
>> simplification is pretty obvious.
>>>>
>>>
>>> SDM said "VEX.vvvv encodes the first source register operand". It should be
>> "dest -1". The old logic did not check vexvvvv first, which made the logic here
>> very complicated. I'll add some comments here.
>>>
>>>> As to the deecription once again being empty: This is even more of an
>>>> issue considering that the title suggests the patch isn't doing anything.
>>>> After all we already use vexvvvv for the stated purpose.
>>>
>>> The title is "Use vexvvvv to encode the vvvv register", which means that use
>> vexvvvv for the stated purpose.
>>
>> Well, yes and no: We did so before already, so the title suggests "no change".
> 
> Do you want to split this patch 1/3 into two? Or move it to patch 2/3?

No. I'm asking for the title to be adjusted to state what is changing,
rather than what has already been the case forever since the introduction
of the struct field. This may be as simple as inserting "more directly".

Jan

^ permalink raw reply	[flat|nested] 24+ messages in thread

* RE: [PATCH 2/3] x86: Drop SwapSources
  2024-04-24  8:07   ` Jan Beulich
@ 2024-04-26  8:14     ` Cui, Lili
  2024-04-26 10:37       ` Jan Beulich
  0 siblings, 1 reply; 24+ messages in thread
From: Cui, Lili @ 2024-04-26  8:14 UTC (permalink / raw)
  To: Beulich, Jan; +Cc: hjl.tools, binutils

> On 24.04.2024 09:23, Cui, Lili wrote:
> > --- a/gas/config/tc-i386.c
> > +++ b/gas/config/tc-i386.c
> > @@ -10434,6 +10434,14 @@ build_modrm_byte (void)
> >
> >    switch (i.tm.opcode_modifier.vexvvvv)
> >      {
> > +    case VexVVVV_SRC2:
> > +      if (source != op)
> > +	{
> > +	  v = source++;
> > +	  break;
> > +	}
> > +      /* For XOP: vpshl* and vpsha*.  */
> > +      /* Fall through.  */
> >      case VexVVVV_SRC1:
> 
> This falling-through is odd and hence needs a better comment (then also
> covering vprot*, which afaict is similarly affected). The reason for this is the
> XOP.W-controlled operand swapping, if I'm not mistaken? In which case
> perhaps instead of the fall-through here the logic swapping the operands
> should replace VexVVVV_SRC2 by VexVVVV_SRC1?
> 

Yes, vprot* should be included, and it is related to XOP.W-controlled operand swapping, the comments says " /* Only the first two register operands need reversing, alongside flipping VEX.W.  */ ",  But there is actually a memory operand, not two register operands.

I think VexVVVV_SRC2 makes more sense here, it matches the actual situation, we want to use vvvv to encode the first operand.

Opcode table:
vprot<xop>, 0x90 | <xop:opc>, XOP, D|Modrm|Vex128|SpaceXOP09|VexVVVV_Src2|VexW0|NoSuf, { RegXMM, RegXMM|Unspecified|BaseIndex, RegXMM }

testcase:
vprotb (%rax),%xmm12,%xmm15
vprotb %xmm15,(%r12),%xmm0

Thanks,
Lili.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/3] x86: Drop SwapSources
  2024-04-26  8:14     ` Cui, Lili
@ 2024-04-26 10:37       ` Jan Beulich
  2024-04-28  4:47         ` Cui, Lili
  0 siblings, 1 reply; 24+ messages in thread
From: Jan Beulich @ 2024-04-26 10:37 UTC (permalink / raw)
  To: Cui, Lili; +Cc: hjl.tools, binutils

On 26.04.2024 10:14, Cui, Lili wrote:
>> On 24.04.2024 09:23, Cui, Lili wrote:
>>> --- a/gas/config/tc-i386.c
>>> +++ b/gas/config/tc-i386.c
>>> @@ -10434,6 +10434,14 @@ build_modrm_byte (void)
>>>
>>>    switch (i.tm.opcode_modifier.vexvvvv)
>>>      {
>>> +    case VexVVVV_SRC2:
>>> +      if (source != op)
>>> +	{
>>> +	  v = source++;
>>> +	  break;
>>> +	}
>>> +      /* For XOP: vpshl* and vpsha*.  */
>>> +      /* Fall through.  */
>>>      case VexVVVV_SRC1:
>>
>> This falling-through is odd and hence needs a better comment (then also
>> covering vprot*, which afaict is similarly affected). The reason for this is the
>> XOP.W-controlled operand swapping, if I'm not mistaken? In which case
>> perhaps instead of the fall-through here the logic swapping the operands
>> should replace VexVVVV_SRC2 by VexVVVV_SRC1?
>>
> 
> Yes, vprot* should be included, and it is related to XOP.W-controlled operand swapping, the comments says " /* Only the first two register operands need reversing, alongside flipping VEX.W.  */ ",  But there is actually a memory operand, not two register operands.
> 
> I think VexVVVV_SRC2 makes more sense here, it matches the actual situation, we want to use vvvv to encode the first operand.
> 
> Opcode table:
> vprot<xop>, 0x90 | <xop:opc>, XOP, D|Modrm|Vex128|SpaceXOP09|VexVVVV_Src2|VexW0|NoSuf, { RegXMM, RegXMM|Unspecified|BaseIndex, RegXMM }
> 
> testcase:
> vprotb (%rax),%xmm12,%xmm15
> vprotb %xmm15,(%r12),%xmm0

VexVVVV_Src2 is appropriate for the latter, yes, but not for the former. That
uses VexVVVV_Src1 layout. Hence my suggestion to replace the attribute when
swapping operands.

Jan

^ permalink raw reply	[flat|nested] 24+ messages in thread

* RE: [PATCH 2/3] x86: Drop SwapSources
  2024-04-26 10:37       ` Jan Beulich
@ 2024-04-28  4:47         ` Cui, Lili
  2024-04-29  6:40           ` Jan Beulich
  0 siblings, 1 reply; 24+ messages in thread
From: Cui, Lili @ 2024-04-28  4:47 UTC (permalink / raw)
  To: Beulich, Jan; +Cc: hjl.tools, binutils

> On 26.04.2024 10:14, Cui, Lili wrote:
> >> On 24.04.2024 09:23, Cui, Lili wrote:
> >>> --- a/gas/config/tc-i386.c
> >>> +++ b/gas/config/tc-i386.c
> >>> @@ -10434,6 +10434,14 @@ build_modrm_byte (void)
> >>>
> >>>    switch (i.tm.opcode_modifier.vexvvvv)
> >>>      {
> >>> +    case VexVVVV_SRC2:
> >>> +      if (source != op)
> >>> +	{
> >>> +	  v = source++;
> >>> +	  break;
> >>> +	}
> >>> +      /* For XOP: vpshl* and vpsha*.  */
> >>> +      /* Fall through.  */
> >>>      case VexVVVV_SRC1:
> >>
> >> This falling-through is odd and hence needs a better comment (then
> >> also covering vprot*, which afaict is similarly affected). The reason
> >> for this is the XOP.W-controlled operand swapping, if I'm not
> >> mistaken? In which case perhaps instead of the fall-through here the
> >> logic swapping the operands should replace VexVVVV_SRC2 by
> VexVVVV_SRC1?
> >>
> >
> > Yes, vprot* should be included, and it is related to XOP.W-controlled
> operand swapping, the comments says " /* Only the first two register
> operands need reversing, alongside flipping VEX.W.  */ ",  But there is
> actually a memory operand, not two register operands.
> >
> > I think VexVVVV_SRC2 makes more sense here, it matches the actual
> situation, we want to use vvvv to encode the first operand.
> >
> > Opcode table:
> > vprot<xop>, 0x90 | <xop:opc>, XOP,
> > D|Modrm|Vex128|SpaceXOP09|VexVVVV_Src2|VexW0|NoSuf, { RegXMM,
> > RegXMM|Unspecified|BaseIndex, RegXMM }
> >
> > testcase:
> > vprotb (%rax),%xmm12,%xmm15
> > vprotb %xmm15,(%r12),%xmm0
> 
> VexVVVV_Src2 is appropriate for the latter, yes, but not for the former. That
> uses VexVVVV_Src1 layout. Hence my suggestion to replace the attribute
> when swapping operands.
> 

If replace the Src2VVVV| VexW0 with Src1VVVV| VexW1 and swapping operands. We can put VexVVVV_SRC1 before VexVVVV_SRC2, but we still need to add "(!is_cpu (&i.tm, CpuXOP) || source == op" under VexVVVV_SRC1 , and match_template also needs to be adjusted (I made a simple modification and it still failed, I think continuing like this may go against the original intention).

  switch (i.tm.opcode_modifier.vexvvvv)
    {
    /* VEX.vvvv encodes the first source register operand.  */
    case VexVVVV_SRC1:
      if (!is_cpu (&i.tm, CpuXOP) || source == op)
        {
          v =  dest - 1;
          break;
        }
    /* For XOP: vpshl*, vpsha* and vprot*.  */
    /* Fall through.  */
    /* VEX.vvvv encodes the last source register operand.  */
    case VexVVVV_SRC2:
      v = source++;
      break;
    /* VEX.vvvv encodes the destination register operand.  */
    case VexVVVV_DST:
      v = dest--;
      break;
    default:
      v = ~0;
      break;
     }

Do you think we should add a separate patch 4 for XOP that removes the special handling in match_template and completes its template? so we don't have to add special handling for src1vvvv or src2vvvv. This might go against your desire to reduce template size, but it would help simplify the logic. I'd like to know your thoughts.

  vprot<xop>, 0x90 | <xop:opc>, XOP, D|Modrm|Vex128|SpaceXOP09|Src2VVVV|VexW0|NoSuf, { RegXMM, RegXMM|Unspecified|BaseIndex, RegXMM }
+vprot<xop>, 0x90 | <xop:opc>, XOP, D|Modrm|Vex128|SpaceXOP09|Src1VVVV|VexW1|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM, RegXMM }

Thanks,
Lili.


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/3] x86: Drop SwapSources
  2024-04-28  4:47         ` Cui, Lili
@ 2024-04-29  6:40           ` Jan Beulich
  2024-04-29 12:23             ` Cui, Lili
  0 siblings, 1 reply; 24+ messages in thread
From: Jan Beulich @ 2024-04-29  6:40 UTC (permalink / raw)
  To: Cui, Lili; +Cc: hjl.tools, binutils

On 28.04.2024 06:47, Cui, Lili wrote:
>> On 26.04.2024 10:14, Cui, Lili wrote:
>>>> On 24.04.2024 09:23, Cui, Lili wrote:
>>>>> --- a/gas/config/tc-i386.c
>>>>> +++ b/gas/config/tc-i386.c
>>>>> @@ -10434,6 +10434,14 @@ build_modrm_byte (void)
>>>>>
>>>>>    switch (i.tm.opcode_modifier.vexvvvv)
>>>>>      {
>>>>> +    case VexVVVV_SRC2:
>>>>> +      if (source != op)
>>>>> +	{
>>>>> +	  v = source++;
>>>>> +	  break;
>>>>> +	}
>>>>> +      /* For XOP: vpshl* and vpsha*.  */
>>>>> +      /* Fall through.  */
>>>>>      case VexVVVV_SRC1:
>>>>
>>>> This falling-through is odd and hence needs a better comment (then
>>>> also covering vprot*, which afaict is similarly affected). The reason
>>>> for this is the XOP.W-controlled operand swapping, if I'm not
>>>> mistaken? In which case perhaps instead of the fall-through here the
>>>> logic swapping the operands should replace VexVVVV_SRC2 by
>> VexVVVV_SRC1?
>>>>
>>>
>>> Yes, vprot* should be included, and it is related to XOP.W-controlled
>> operand swapping, the comments says " /* Only the first two register
>> operands need reversing, alongside flipping VEX.W.  */ ",  But there is
>> actually a memory operand, not two register operands.
>>>
>>> I think VexVVVV_SRC2 makes more sense here, it matches the actual
>> situation, we want to use vvvv to encode the first operand.
>>>
>>> Opcode table:
>>> vprot<xop>, 0x90 | <xop:opc>, XOP,
>>> D|Modrm|Vex128|SpaceXOP09|VexVVVV_Src2|VexW0|NoSuf, { RegXMM,
>>> RegXMM|Unspecified|BaseIndex, RegXMM }
>>>
>>> testcase:
>>> vprotb (%rax),%xmm12,%xmm15
>>> vprotb %xmm15,(%r12),%xmm0
>>
>> VexVVVV_Src2 is appropriate for the latter, yes, but not for the former. That
>> uses VexVVVV_Src1 layout. Hence my suggestion to replace the attribute
>> when swapping operands.
>>
> 
> If replace the Src2VVVV| VexW0 with Src1VVVV| VexW1 and swapping operands. We can put VexVVVV_SRC1 before VexVVVV_SRC2, but we still need to add "(!is_cpu (&i.tm, CpuXOP) || source == op" under VexVVVV_SRC1 , and match_template also needs to be adjusted (I made a simple modification and it still failed, I think continuing like this may go against the original intention).
> 
>   switch (i.tm.opcode_modifier.vexvvvv)
>     {
>     /* VEX.vvvv encodes the first source register operand.  */
>     case VexVVVV_SRC1:
>       if (!is_cpu (&i.tm, CpuXOP) || source == op)
>         {
>           v =  dest - 1;
>           break;
>         }
>     /* For XOP: vpshl*, vpsha* and vprot*.  */
>     /* Fall through.  */
>     /* VEX.vvvv encodes the last source register operand.  */
>     case VexVVVV_SRC2:
>       v = source++;
>       break;
>     /* VEX.vvvv encodes the destination register operand.  */
>     case VexVVVV_DST:
>       v = dest--;
>       break;
>     default:
>       v = ~0;
>       break;
>      }
> 
> Do you think we should add a separate patch 4 for XOP that removes the special handling in match_template and completes its template? so we don't have to add special handling for src1vvvv or src2vvvv. This might go against your desire to reduce template size, but it would help simplify the logic. I'd like to know your thoughts.

Indeed. You'd effectively revert earlier folding that I did. And the adjustment
I suggested earlier ought to be small/simple enough.

Jan

^ permalink raw reply	[flat|nested] 24+ messages in thread

* RE: [PATCH 2/3] x86: Drop SwapSources
  2024-04-29  6:40           ` Jan Beulich
@ 2024-04-29 12:23             ` Cui, Lili
  2024-04-29 13:08               ` Jan Beulich
  0 siblings, 1 reply; 24+ messages in thread
From: Cui, Lili @ 2024-04-29 12:23 UTC (permalink / raw)
  To: Beulich, Jan; +Cc: hjl.tools, binutils

> On 28.04.2024 06:47, Cui, Lili wrote:
> >> On 26.04.2024 10:14, Cui, Lili wrote:
> >>>> On 24.04.2024 09:23, Cui, Lili wrote:
> >>>>> --- a/gas/config/tc-i386.c
> >>>>> +++ b/gas/config/tc-i386.c
> >>>>> @@ -10434,6 +10434,14 @@ build_modrm_byte (void)
> >>>>>
> >>>>>    switch (i.tm.opcode_modifier.vexvvvv)
> >>>>>      {
> >>>>> +    case VexVVVV_SRC2:
> >>>>> +      if (source != op)
> >>>>> +	{
> >>>>> +	  v = source++;
> >>>>> +	  break;
> >>>>> +	}
> >>>>> +      /* For XOP: vpshl* and vpsha*.  */
> >>>>> +      /* Fall through.  */
> >>>>>      case VexVVVV_SRC1:
> >>>>
> >>>> This falling-through is odd and hence needs a better comment (then
> >>>> also covering vprot*, which afaict is similarly affected). The
> >>>> reason for this is the XOP.W-controlled operand swapping, if I'm
> >>>> not mistaken? In which case perhaps instead of the fall-through
> >>>> here the logic swapping the operands should replace VexVVVV_SRC2 by
> >> VexVVVV_SRC1?
> >>>>
> >>>
> >>> Yes, vprot* should be included, and it is related to
> >>> XOP.W-controlled
> >> operand swapping, the comments says " /* Only the first two register
> >> operands need reversing, alongside flipping VEX.W.  */ ",  But there
> >> is actually a memory operand, not two register operands.
> >>>
> >>> I think VexVVVV_SRC2 makes more sense here, it matches the actual
> >> situation, we want to use vvvv to encode the first operand.
> >>>
> >>> Opcode table:
> >>> vprot<xop>, 0x90 | <xop:opc>, XOP,
> >>> D|Modrm|Vex128|SpaceXOP09|VexVVVV_Src2|VexW0|NoSuf,
> { RegXMM,
> >>> RegXMM|Unspecified|BaseIndex, RegXMM }
> >>>
> >>> testcase:
> >>> vprotb (%rax),%xmm12,%xmm15
> >>> vprotb %xmm15,(%r12),%xmm0
> >>
> >> VexVVVV_Src2 is appropriate for the latter, yes, but not for the
> >> former. That uses VexVVVV_Src1 layout. Hence my suggestion to replace
> >> the attribute when swapping operands.
> >>
> >
> > If replace the Src2VVVV| VexW0 with Src1VVVV| VexW1 and swapping
> operands. We can put VexVVVV_SRC1 before VexVVVV_SRC2, but we still
> need to add "(!is_cpu (&i.tm, CpuXOP) || source == op" under
> VexVVVV_SRC1 , and match_template also needs to be adjusted (I made a
> simple modification and it still failed, I think continuing like this may go
> against the original intention).
> >
> >   switch (i.tm.opcode_modifier.vexvvvv)
> >     {
> >     /* VEX.vvvv encodes the first source register operand.  */
> >     case VexVVVV_SRC1:
> >       if (!is_cpu (&i.tm, CpuXOP) || source == op)
> >         {
> >           v =  dest - 1;
> >           break;
> >         }
> >     /* For XOP: vpshl*, vpsha* and vprot*.  */
> >     /* Fall through.  */
> >     /* VEX.vvvv encodes the last source register operand.  */
> >     case VexVVVV_SRC2:
> >       v = source++;
> >       break;
> >     /* VEX.vvvv encodes the destination register operand.  */
> >     case VexVVVV_DST:
> >       v = dest--;
> >       break;
> >     default:
> >       v = ~0;
> >       break;
> >      }
> >
> > Do you think we should add a separate patch 4 for XOP that removes the
> special handling in match_template and completes its template? so we don't
> have to add special handling for src1vvvv or src2vvvv. This might go against
> your desire to reduce template size, but it would help simplify the logic. I'd
> like to know your thoughts.
> 
> Indeed. You'd effectively revert earlier folding that I did. And the adjustment I
> suggested earlier ought to be small/simple enough.
> 

So, I continued working on the previous suggestion. With the following modification and it worked.

@@ -8932,7 +8932,7 @@ match_template (char mnem_suffix)
                          || is_cpu (t, CpuAPX_F));
              if (!operand_type_match (overlap0, i.types[0])
                  || !operand_type_match (overlap1, i.types[j])
-                 || (t->operands == 3
+                 || (t->operands == 3 && !is_cpu (t, CpuXOP)
                      && !operand_type_match (overlap2, i.types[1]))
                  || (check_register
                      && !operand_type_register_match (i.types[0],

But I found that there are 4 test files that failed, I didn't find the doc on how to encode vprotb but I guess that is because I changed the default template from Src2VVVV| VexW0 to Src1VVVV| VexW1, then all the related test cases needed to be modified. Do you have any comments here? 

regexp "^[      ]*[a-f0-9]+:    8f e9 40 90 d8[         ]+vprotb %xmm7,%xmm0,%xmm3$"
line   "    11ed:       8f e9 f8 90 df          vprotb %xmm7,%xmm0,%xmm3"

Thanks,
Lili.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/3] x86: Drop SwapSources
  2024-04-29 12:23             ` Cui, Lili
@ 2024-04-29 13:08               ` Jan Beulich
  2024-04-29 13:41                 ` Cui, Lili
  0 siblings, 1 reply; 24+ messages in thread
From: Jan Beulich @ 2024-04-29 13:08 UTC (permalink / raw)
  To: Cui, Lili; +Cc: hjl.tools, binutils

On 29.04.2024 14:23, Cui, Lili wrote:
>> On 28.04.2024 06:47, Cui, Lili wrote:
>>>> On 26.04.2024 10:14, Cui, Lili wrote:
>>>>>> On 24.04.2024 09:23, Cui, Lili wrote:
>>>>>>> --- a/gas/config/tc-i386.c
>>>>>>> +++ b/gas/config/tc-i386.c
>>>>>>> @@ -10434,6 +10434,14 @@ build_modrm_byte (void)
>>>>>>>
>>>>>>>    switch (i.tm.opcode_modifier.vexvvvv)
>>>>>>>      {
>>>>>>> +    case VexVVVV_SRC2:
>>>>>>> +      if (source != op)
>>>>>>> +	{
>>>>>>> +	  v = source++;
>>>>>>> +	  break;
>>>>>>> +	}
>>>>>>> +      /* For XOP: vpshl* and vpsha*.  */
>>>>>>> +      /* Fall through.  */
>>>>>>>      case VexVVVV_SRC1:
>>>>>>
>>>>>> This falling-through is odd and hence needs a better comment (then
>>>>>> also covering vprot*, which afaict is similarly affected). The
>>>>>> reason for this is the XOP.W-controlled operand swapping, if I'm
>>>>>> not mistaken? In which case perhaps instead of the fall-through
>>>>>> here the logic swapping the operands should replace VexVVVV_SRC2 by
>>>> VexVVVV_SRC1?
>>>>>>
>>>>>
>>>>> Yes, vprot* should be included, and it is related to
>>>>> XOP.W-controlled
>>>> operand swapping, the comments says " /* Only the first two register
>>>> operands need reversing, alongside flipping VEX.W.  */ ",  But there
>>>> is actually a memory operand, not two register operands.
>>>>>
>>>>> I think VexVVVV_SRC2 makes more sense here, it matches the actual
>>>> situation, we want to use vvvv to encode the first operand.
>>>>>
>>>>> Opcode table:
>>>>> vprot<xop>, 0x90 | <xop:opc>, XOP,
>>>>> D|Modrm|Vex128|SpaceXOP09|VexVVVV_Src2|VexW0|NoSuf,
>> { RegXMM,
>>>>> RegXMM|Unspecified|BaseIndex, RegXMM }
>>>>>
>>>>> testcase:
>>>>> vprotb (%rax),%xmm12,%xmm15
>>>>> vprotb %xmm15,(%r12),%xmm0
>>>>
>>>> VexVVVV_Src2 is appropriate for the latter, yes, but not for the
>>>> former. That uses VexVVVV_Src1 layout. Hence my suggestion to replace
>>>> the attribute when swapping operands.
>>>>
>>>
>>> If replace the Src2VVVV| VexW0 with Src1VVVV| VexW1 and swapping
>> operands. We can put VexVVVV_SRC1 before VexVVVV_SRC2, but we still
>> need to add "(!is_cpu (&i.tm, CpuXOP) || source == op" under
>> VexVVVV_SRC1 , and match_template also needs to be adjusted (I made a
>> simple modification and it still failed, I think continuing like this may go
>> against the original intention).
>>>
>>>   switch (i.tm.opcode_modifier.vexvvvv)
>>>     {
>>>     /* VEX.vvvv encodes the first source register operand.  */
>>>     case VexVVVV_SRC1:
>>>       if (!is_cpu (&i.tm, CpuXOP) || source == op)
>>>         {
>>>           v =  dest - 1;
>>>           break;
>>>         }
>>>     /* For XOP: vpshl*, vpsha* and vprot*.  */
>>>     /* Fall through.  */
>>>     /* VEX.vvvv encodes the last source register operand.  */
>>>     case VexVVVV_SRC2:
>>>       v = source++;
>>>       break;
>>>     /* VEX.vvvv encodes the destination register operand.  */
>>>     case VexVVVV_DST:
>>>       v = dest--;
>>>       break;
>>>     default:
>>>       v = ~0;
>>>       break;
>>>      }
>>>
>>> Do you think we should add a separate patch 4 for XOP that removes the
>> special handling in match_template and completes its template? so we don't
>> have to add special handling for src1vvvv or src2vvvv. This might go against
>> your desire to reduce template size, but it would help simplify the logic. I'd
>> like to know your thoughts.
>>
>> Indeed. You'd effectively revert earlier folding that I did. And the adjustment I
>> suggested earlier ought to be small/simple enough.
>>
> 
> So, I continued working on the previous suggestion. With the following modification and it worked.
> 
> @@ -8932,7 +8932,7 @@ match_template (char mnem_suffix)
>                           || is_cpu (t, CpuAPX_F));
>               if (!operand_type_match (overlap0, i.types[0])
>                   || !operand_type_match (overlap1, i.types[j])
> -                 || (t->operands == 3
> +                 || (t->operands == 3 && !is_cpu (t, CpuXOP)
>                       && !operand_type_match (overlap2, i.types[1]))
>                   || (check_register
>                       && !operand_type_register_match (i.types[0],

Just to mention it - this certainly isn't what I suggested. In fact I seem to
vaguely recall that something similar was once proposed during the original
APX work as well, where I then objected, too.

> But I found that there are 4 test files that failed, I didn't find the doc on how to encode vprotb but I guess that is because I changed the default template from Src2VVVV| VexW0 to Src1VVVV| VexW1, then all the related test cases needed to be modified. Do you have any comments here? 
> 
> regexp "^[      ]*[a-f0-9]+:    8f e9 40 90 d8[         ]+vprotb %xmm7,%xmm0,%xmm3$"
> line   "    11ed:       8f e9 f8 90 df          vprotb %xmm7,%xmm0,%xmm3"

Well, while changing the templates is in principle possible, and the
resulting code would still be correct, changing encodings it usually not a
good idea. Thus when it can be avoided, it should be avoided, imo. Hence
why I didn't suggest this, but to amend the code doing the operand swapping
(for the case where operand order is controlled by XOP.W).

Jan

^ permalink raw reply	[flat|nested] 24+ messages in thread

* RE: [PATCH 2/3] x86: Drop SwapSources
  2024-04-29 13:08               ` Jan Beulich
@ 2024-04-29 13:41                 ` Cui, Lili
  2024-04-29 13:49                   ` Jan Beulich
  0 siblings, 1 reply; 24+ messages in thread
From: Cui, Lili @ 2024-04-29 13:41 UTC (permalink / raw)
  To: Beulich, Jan; +Cc: hjl.tools, binutils

> On 29.04.2024 14:23, Cui, Lili wrote:
> >> On 28.04.2024 06:47, Cui, Lili wrote:
> >>>> On 26.04.2024 10:14, Cui, Lili wrote:
> >>>>>> On 24.04.2024 09:23, Cui, Lili wrote:
> >>>>>>> --- a/gas/config/tc-i386.c
> >>>>>>> +++ b/gas/config/tc-i386.c
> >>>>>>> @@ -10434,6 +10434,14 @@ build_modrm_byte (void)
> >>>>>>>
> >>>>>>>    switch (i.tm.opcode_modifier.vexvvvv)
> >>>>>>>      {
> >>>>>>> +    case VexVVVV_SRC2:
> >>>>>>> +      if (source != op)
> >>>>>>> +	{
> >>>>>>> +	  v = source++;
> >>>>>>> +	  break;
> >>>>>>> +	}
> >>>>>>> +      /* For XOP: vpshl* and vpsha*.  */
> >>>>>>> +      /* Fall through.  */
> >>>>>>>      case VexVVVV_SRC1:
> >>>>>>
> >>>>>> This falling-through is odd and hence needs a better comment
> >>>>>> (then also covering vprot*, which afaict is similarly affected).
> >>>>>> The reason for this is the XOP.W-controlled operand swapping, if
> >>>>>> I'm not mistaken? In which case perhaps instead of the
> >>>>>> fall-through here the logic swapping the operands should replace
> >>>>>> VexVVVV_SRC2 by
> >>>> VexVVVV_SRC1?
> >>>>>>
> >>>>>
> >>>>> Yes, vprot* should be included, and it is related to
> >>>>> XOP.W-controlled
> >>>> operand swapping, the comments says " /* Only the first two
> >>>> register operands need reversing, alongside flipping VEX.W.  */ ",
> >>>> But there is actually a memory operand, not two register operands.
> >>>>>
> >>>>> I think VexVVVV_SRC2 makes more sense here, it matches the actual
> >>>> situation, we want to use vvvv to encode the first operand.
> >>>>>
> >>>>> Opcode table:
> >>>>> vprot<xop>, 0x90 | <xop:opc>, XOP,
> >>>>> D|Modrm|Vex128|SpaceXOP09|VexVVVV_Src2|VexW0|NoSuf,
> >> { RegXMM,
> >>>>> RegXMM|Unspecified|BaseIndex, RegXMM }
> >>>>>
> >>>>> testcase:
> >>>>> vprotb (%rax),%xmm12,%xmm15
> >>>>> vprotb %xmm15,(%r12),%xmm0
> >>>>
> >>>> VexVVVV_Src2 is appropriate for the latter, yes, but not for the
> >>>> former. That uses VexVVVV_Src1 layout. Hence my suggestion to
> >>>> replace the attribute when swapping operands.
> >>>>
> >>>
> >>> If replace the Src2VVVV| VexW0 with Src1VVVV| VexW1 and swapping
> >> operands. We can put VexVVVV_SRC1 before VexVVVV_SRC2, but we still
> >> need to add "(!is_cpu (&i.tm, CpuXOP) || source == op" under
> >> VexVVVV_SRC1 , and match_template also needs to be adjusted (I made a
> >> simple modification and it still failed, I think continuing like this
> >> may go against the original intention).
> >>>
> >>>   switch (i.tm.opcode_modifier.vexvvvv)
> >>>     {
> >>>     /* VEX.vvvv encodes the first source register operand.  */
> >>>     case VexVVVV_SRC1:
> >>>       if (!is_cpu (&i.tm, CpuXOP) || source == op)
> >>>         {
> >>>           v =  dest - 1;
> >>>           break;
> >>>         }
> >>>     /* For XOP: vpshl*, vpsha* and vprot*.  */
> >>>     /* Fall through.  */
> >>>     /* VEX.vvvv encodes the last source register operand.  */
> >>>     case VexVVVV_SRC2:
> >>>       v = source++;
> >>>       break;
> >>>     /* VEX.vvvv encodes the destination register operand.  */
> >>>     case VexVVVV_DST:
> >>>       v = dest--;
> >>>       break;
> >>>     default:
> >>>       v = ~0;
> >>>       break;
> >>>      }
> >>>
> >>> Do you think we should add a separate patch 4 for XOP that removes
> >>> the
> >> special handling in match_template and completes its template? so we
> >> don't have to add special handling for src1vvvv or src2vvvv. This
> >> might go against your desire to reduce template size, but it would
> >> help simplify the logic. I'd like to know your thoughts.
> >>
> >> Indeed. You'd effectively revert earlier folding that I did. And the
> >> adjustment I suggested earlier ought to be small/simple enough.
> >>
> >
> > So, I continued working on the previous suggestion. With the following
> modification and it worked.
> >
> > @@ -8932,7 +8932,7 @@ match_template (char mnem_suffix)
> >                           || is_cpu (t, CpuAPX_F));
> >               if (!operand_type_match (overlap0, i.types[0])
> >                   || !operand_type_match (overlap1, i.types[j])
> > -                 || (t->operands == 3
> > +                 || (t->operands == 3 && !is_cpu (t, CpuXOP)
> >                       && !operand_type_match (overlap2, i.types[1]))
> >                   || (check_register
> >                       && !operand_type_register_match (i.types[0],
> 
> Just to mention it - this certainly isn't what I suggested. In fact I seem to
> vaguely recall that something similar was once proposed during the original
> APX work as well, where I then objected, too.
> 
> > But I found that there are 4 test files that failed, I didn't find the doc on
> how to encode vprotb but I guess that is because I changed the default
> template from Src2VVVV| VexW0 to Src1VVVV| VexW1, then all the related
> test cases needed to be modified. Do you have any comments here?
> >
> > regexp "^[      ]*[a-f0-9]+:    8f e9 40 90
> d8[         ]+vprotb %xmm7,%xmm0,%xmm3$"
> > line   "    11ed:       8f e9 f8 90 df          vprotb %xmm7,%xmm0,%xmm3"
> 
> Well, while changing the templates is in principle possible, and the resulting
> code would still be correct, changing encodings it usually not a good idea.
> Thus when it can be avoided, it should be avoided, imo. Hence why I didn't
> suggest this, but to amend the code doing the operand swapping (for the
> case where operand order is controlled by XOP.W).
> 

I mistakenly thought this was what you wanted, and I also think this modification is unacceptable. Could you elaborate further on your original suggestion?

Thanks,
Lili.


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/3] x86: Drop SwapSources
  2024-04-29 13:41                 ` Cui, Lili
@ 2024-04-29 13:49                   ` Jan Beulich
  2024-04-30  2:56                     ` Cui, Lili
  0 siblings, 1 reply; 24+ messages in thread
From: Jan Beulich @ 2024-04-29 13:49 UTC (permalink / raw)
  To: Cui, Lili; +Cc: hjl.tools, binutils

On 29.04.2024 15:41, Cui, Lili wrote:
>> On 29.04.2024 14:23, Cui, Lili wrote:
>>>> On 28.04.2024 06:47, Cui, Lili wrote:
>>>>>> On 26.04.2024 10:14, Cui, Lili wrote:
>>>>>>>> On 24.04.2024 09:23, Cui, Lili wrote:
>>>>>>>>> --- a/gas/config/tc-i386.c
>>>>>>>>> +++ b/gas/config/tc-i386.c
>>>>>>>>> @@ -10434,6 +10434,14 @@ build_modrm_byte (void)
>>>>>>>>>
>>>>>>>>>    switch (i.tm.opcode_modifier.vexvvvv)
>>>>>>>>>      {
>>>>>>>>> +    case VexVVVV_SRC2:
>>>>>>>>> +      if (source != op)
>>>>>>>>> +	{
>>>>>>>>> +	  v = source++;
>>>>>>>>> +	  break;
>>>>>>>>> +	}
>>>>>>>>> +      /* For XOP: vpshl* and vpsha*.  */
>>>>>>>>> +      /* Fall through.  */
>>>>>>>>>      case VexVVVV_SRC1:
>>>>>>>>
>>>>>>>> This falling-through is odd and hence needs a better comment
>>>>>>>> (then also covering vprot*, which afaict is similarly affected).
>>>>>>>> The reason for this is the XOP.W-controlled operand swapping, if
>>>>>>>> I'm not mistaken? In which case perhaps instead of the
>>>>>>>> fall-through here the logic swapping the operands should replace
>>>>>>>> VexVVVV_SRC2 by
>>>>>> VexVVVV_SRC1?
>>>>>>>>
>>>>>>>
>>>>>>> Yes, vprot* should be included, and it is related to
>>>>>>> XOP.W-controlled
>>>>>> operand swapping, the comments says " /* Only the first two
>>>>>> register operands need reversing, alongside flipping VEX.W.  */ ",
>>>>>> But there is actually a memory operand, not two register operands.
>>>>>>>
>>>>>>> I think VexVVVV_SRC2 makes more sense here, it matches the actual
>>>>>> situation, we want to use vvvv to encode the first operand.
>>>>>>>
>>>>>>> Opcode table:
>>>>>>> vprot<xop>, 0x90 | <xop:opc>, XOP,
>>>>>>> D|Modrm|Vex128|SpaceXOP09|VexVVVV_Src2|VexW0|NoSuf,
>>>> { RegXMM,
>>>>>>> RegXMM|Unspecified|BaseIndex, RegXMM }
>>>>>>>
>>>>>>> testcase:
>>>>>>> vprotb (%rax),%xmm12,%xmm15
>>>>>>> vprotb %xmm15,(%r12),%xmm0
>>>>>>
>>>>>> VexVVVV_Src2 is appropriate for the latter, yes, but not for the
>>>>>> former. That uses VexVVVV_Src1 layout. Hence my suggestion to
>>>>>> replace the attribute when swapping operands.
>>>>>>
>>>>>
>>>>> If replace the Src2VVVV| VexW0 with Src1VVVV| VexW1 and swapping
>>>> operands. We can put VexVVVV_SRC1 before VexVVVV_SRC2, but we still
>>>> need to add "(!is_cpu (&i.tm, CpuXOP) || source == op" under
>>>> VexVVVV_SRC1 , and match_template also needs to be adjusted (I made a
>>>> simple modification and it still failed, I think continuing like this
>>>> may go against the original intention).
>>>>>
>>>>>   switch (i.tm.opcode_modifier.vexvvvv)
>>>>>     {
>>>>>     /* VEX.vvvv encodes the first source register operand.  */
>>>>>     case VexVVVV_SRC1:
>>>>>       if (!is_cpu (&i.tm, CpuXOP) || source == op)
>>>>>         {
>>>>>           v =  dest - 1;
>>>>>           break;
>>>>>         }
>>>>>     /* For XOP: vpshl*, vpsha* and vprot*.  */
>>>>>     /* Fall through.  */
>>>>>     /* VEX.vvvv encodes the last source register operand.  */
>>>>>     case VexVVVV_SRC2:
>>>>>       v = source++;
>>>>>       break;
>>>>>     /* VEX.vvvv encodes the destination register operand.  */
>>>>>     case VexVVVV_DST:
>>>>>       v = dest--;
>>>>>       break;
>>>>>     default:
>>>>>       v = ~0;
>>>>>       break;
>>>>>      }
>>>>>
>>>>> Do you think we should add a separate patch 4 for XOP that removes
>>>>> the
>>>> special handling in match_template and completes its template? so we
>>>> don't have to add special handling for src1vvvv or src2vvvv. This
>>>> might go against your desire to reduce template size, but it would
>>>> help simplify the logic. I'd like to know your thoughts.
>>>>
>>>> Indeed. You'd effectively revert earlier folding that I did. And the
>>>> adjustment I suggested earlier ought to be small/simple enough.
>>>>
>>>
>>> So, I continued working on the previous suggestion. With the following
>> modification and it worked.
>>>
>>> @@ -8932,7 +8932,7 @@ match_template (char mnem_suffix)
>>>                           || is_cpu (t, CpuAPX_F));
>>>               if (!operand_type_match (overlap0, i.types[0])
>>>                   || !operand_type_match (overlap1, i.types[j])
>>> -                 || (t->operands == 3
>>> +                 || (t->operands == 3 && !is_cpu (t, CpuXOP)
>>>                       && !operand_type_match (overlap2, i.types[1]))
>>>                   || (check_register
>>>                       && !operand_type_register_match (i.types[0],
>>
>> Just to mention it - this certainly isn't what I suggested. In fact I seem to
>> vaguely recall that something similar was once proposed during the original
>> APX work as well, where I then objected, too.
>>
>>> But I found that there are 4 test files that failed, I didn't find the doc on
>> how to encode vprotb but I guess that is because I changed the default
>> template from Src2VVVV| VexW0 to Src1VVVV| VexW1, then all the related
>> test cases needed to be modified. Do you have any comments here?
>>>
>>> regexp "^[      ]*[a-f0-9]+:    8f e9 40 90
>> d8[         ]+vprotb %xmm7,%xmm0,%xmm3$"
>>> line   "    11ed:       8f e9 f8 90 df          vprotb %xmm7,%xmm0,%xmm3"
>>
>> Well, while changing the templates is in principle possible, and the resulting
>> code would still be correct, changing encodings it usually not a good idea.
>> Thus when it can be avoided, it should be avoided, imo. Hence why I didn't
>> suggest this, but to amend the code doing the operand swapping (for the
>> case where operand order is controlled by XOP.W).
>>
> 
> I mistakenly thought this was what you wanted, and I also think this modification is unacceptable. Could you elaborate further on your original suggestion?

At the bottom of match_template() we have

    case Opcode_VexW:
      /* Only the first two register operands need reversing, alongside
	 flipping VEX.W.  */
      i.tm.opcode_modifier.vexw ^= VEXW0 ^ VEXW1;

This is where I think you further want to adjust i.tm.opcode_modifier.vexvvvv.

Jan

^ permalink raw reply	[flat|nested] 24+ messages in thread

* RE: [PATCH 2/3] x86: Drop SwapSources
  2024-04-29 13:49                   ` Jan Beulich
@ 2024-04-30  2:56                     ` Cui, Lili
  2024-04-30  6:18                       ` Jan Beulich
  0 siblings, 1 reply; 24+ messages in thread
From: Cui, Lili @ 2024-04-30  2:56 UTC (permalink / raw)
  To: Beulich, Jan; +Cc: hjl.tools, binutils

> On 29.04.2024 15:41, Cui, Lili wrote:
> >> On 29.04.2024 14:23, Cui, Lili wrote:
> >>>> On 28.04.2024 06:47, Cui, Lili wrote:
> >>>>>> On 26.04.2024 10:14, Cui, Lili wrote:
> >>>>>>>> On 24.04.2024 09:23, Cui, Lili wrote:
> >>>>>>>>> --- a/gas/config/tc-i386.c
> >>>>>>>>> +++ b/gas/config/tc-i386.c
> >>>>>>>>> @@ -10434,6 +10434,14 @@ build_modrm_byte (void)
> >>>>>>>>>
> >>>>>>>>>    switch (i.tm.opcode_modifier.vexvvvv)
> >>>>>>>>>      {
> >>>>>>>>> +    case VexVVVV_SRC2:
> >>>>>>>>> +      if (source != op)
> >>>>>>>>> +	{
> >>>>>>>>> +	  v = source++;
> >>>>>>>>> +	  break;
> >>>>>>>>> +	}
> >>>>>>>>> +      /* For XOP: vpshl* and vpsha*.  */
> >>>>>>>>> +      /* Fall through.  */
> >>>>>>>>>      case VexVVVV_SRC1:
> >>>>>>>>
> >>>>>>>> This falling-through is odd and hence needs a better comment
> >>>>>>>> (then also covering vprot*, which afaict is similarly affected).
> >>>>>>>> The reason for this is the XOP.W-controlled operand swapping,
> >>>>>>>> if I'm not mistaken? In which case perhaps instead of the
> >>>>>>>> fall-through here the logic swapping the operands should
> >>>>>>>> replace
> >>>>>>>> VexVVVV_SRC2 by
> >>>>>> VexVVVV_SRC1?
> >>>>>>>>
> >>>>>>>
> >>>>>>> Yes, vprot* should be included, and it is related to
> >>>>>>> XOP.W-controlled
> >>>>>> operand swapping, the comments says " /* Only the first two
> >>>>>> register operands need reversing, alongside flipping VEX.W.  */
> >>>>>> ", But there is actually a memory operand, not two register operands.
> >>>>>>>
> >>>>>>> I think VexVVVV_SRC2 makes more sense here, it matches the
> >>>>>>> actual
> >>>>>> situation, we want to use vvvv to encode the first operand.
> >>>>>>>
> >>>>>>> Opcode table:
> >>>>>>> vprot<xop>, 0x90 | <xop:opc>, XOP,
> >>>>>>> D|Modrm|Vex128|SpaceXOP09|VexVVVV_Src2|VexW0|NoSuf,
> >>>> { RegXMM,
> >>>>>>> RegXMM|Unspecified|BaseIndex, RegXMM }
> >>>>>>>
> >>>>>>> testcase:
> >>>>>>> vprotb (%rax),%xmm12,%xmm15
> >>>>>>> vprotb %xmm15,(%r12),%xmm0
> >>>>>>
> >>>>>> VexVVVV_Src2 is appropriate for the latter, yes, but not for the
> >>>>>> former. That uses VexVVVV_Src1 layout. Hence my suggestion to
> >>>>>> replace the attribute when swapping operands.
> >>>>>>
> >>>>>
> >>>>> If replace the Src2VVVV| VexW0 with Src1VVVV| VexW1 and swapping
> >>>> operands. We can put VexVVVV_SRC1 before VexVVVV_SRC2, but we
> still
> >>>> need to add "(!is_cpu (&i.tm, CpuXOP) || source == op" under
> >>>> VexVVVV_SRC1 , and match_template also needs to be adjusted (I
> made
> >>>> a simple modification and it still failed, I think continuing like
> >>>> this may go against the original intention).
> >>>>>
> >>>>>   switch (i.tm.opcode_modifier.vexvvvv)
> >>>>>     {
> >>>>>     /* VEX.vvvv encodes the first source register operand.  */
> >>>>>     case VexVVVV_SRC1:
> >>>>>       if (!is_cpu (&i.tm, CpuXOP) || source == op)
> >>>>>         {
> >>>>>           v =  dest - 1;
> >>>>>           break;
> >>>>>         }
> >>>>>     /* For XOP: vpshl*, vpsha* and vprot*.  */
> >>>>>     /* Fall through.  */
> >>>>>     /* VEX.vvvv encodes the last source register operand.  */
> >>>>>     case VexVVVV_SRC2:
> >>>>>       v = source++;
> >>>>>       break;
> >>>>>     /* VEX.vvvv encodes the destination register operand.  */
> >>>>>     case VexVVVV_DST:
> >>>>>       v = dest--;
> >>>>>       break;
> >>>>>     default:
> >>>>>       v = ~0;
> >>>>>       break;
> >>>>>      }
> >>>>>
> >>>>> Do you think we should add a separate patch 4 for XOP that removes
> >>>>> the
> >>>> special handling in match_template and completes its template? so
> >>>> we don't have to add special handling for src1vvvv or src2vvvv.
> >>>> This might go against your desire to reduce template size, but it
> >>>> would help simplify the logic. I'd like to know your thoughts.
> >>>>
> >>>> Indeed. You'd effectively revert earlier folding that I did. And
> >>>> the adjustment I suggested earlier ought to be small/simple enough.
> >>>>
> >>>
> >>> So, I continued working on the previous suggestion. With the
> >>> following
> >> modification and it worked.
> >>>
> >>> @@ -8932,7 +8932,7 @@ match_template (char mnem_suffix)
> >>>                           || is_cpu (t, CpuAPX_F));
> >>>               if (!operand_type_match (overlap0, i.types[0])
> >>>                   || !operand_type_match (overlap1, i.types[j])
> >>> -                 || (t->operands == 3
> >>> +                 || (t->operands == 3 && !is_cpu (t, CpuXOP)
> >>>                       && !operand_type_match (overlap2, i.types[1]))
> >>>                   || (check_register
> >>>                       && !operand_type_register_match (i.types[0],
> >>
> >> Just to mention it - this certainly isn't what I suggested. In fact I
> >> seem to vaguely recall that something similar was once proposed
> >> during the original APX work as well, where I then objected, too.
> >>
> >>> But I found that there are 4 test files that failed, I didn't find
> >>> the doc on
> >> how to encode vprotb but I guess that is because I changed the
> >> default template from Src2VVVV| VexW0 to Src1VVVV| VexW1, then all
> >> the related test cases needed to be modified. Do you have any comments
> here?
> >>>
> >>> regexp "^[      ]*[a-f0-9]+:    8f e9 40 90
> >> d8[         ]+vprotb %xmm7,%xmm0,%xmm3$"
> >>> line   "    11ed:       8f e9 f8 90 df          vprotb %xmm7,%xmm0,%xmm3"
> >>
> >> Well, while changing the templates is in principle possible, and the
> >> resulting code would still be correct, changing encodings it usually not a
> good idea.
> >> Thus when it can be avoided, it should be avoided, imo. Hence why I
> >> didn't suggest this, but to amend the code doing the operand swapping
> >> (for the case where operand order is controlled by XOP.W).
> >>
> >
> > I mistakenly thought this was what you wanted, and I also think this
> modification is unacceptable. Could you elaborate further on your original
> suggestion?
> 
> At the bottom of match_template() we have
> 
>     case Opcode_VexW:
>       /* Only the first two register operands need reversing, alongside
> 	 flipping VEX.W.  */
>       i.tm.opcode_modifier.vexw ^= VEXW0 ^ VEXW1;
> 
> This is where I think you further want to adjust
> i.tm.opcode_modifier.vexvvvv.
> 

"vprotb %xmm7,%xmm0,%xmm3" doesn't go through this place, it finds the matching template directly (Src1VVVV|VexW1|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM, RegXMM }). I inserted a judgment to solve this problem, it's a bit ugly. I'd like to abandon this optimization. 

To avoid confusion, I post all the changes.

--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -8932,7 +8932,7 @@ match_template (char mnem_suffix)
                          || is_cpu (t, CpuAPX_F));
              if (!operand_type_match (overlap0, i.types[0])
                  || !operand_type_match (overlap1, i.types[j])
-                 || (t->operands == 3
+                 || (t->operands == 3 && !is_cpu (t, CpuXOP)     --> This place also needs to change.
                      && !operand_type_match (overlap2, i.types[1]))
                  || (check_register
                      && !operand_type_register_match (i.types[0],
@@ -9035,6 +9035,10 @@ match_template (char mnem_suffix)
                      specific_error = progress (i.error);
                      continue;
                    }
+                 if (is_cpu (t, CpuXOP) && operand_types[0].bitfield.baseindex   --> It's ugly, and still has some test cases failing for other XOP instructions.
+                     && i.types[0].bitfield.class == RegSIMD
+                     && t->opcode_modifier.vexw == VEXW1)
+                   found_reverse_match = Opcode_VexW;
                  break;
                }
            } 
@@ -10434,19 +10434,21 @@ build_modrm_byte (void)
   switch (i.tm.opcode_modifier.vexvvvv)
     {
-    /* VEX.vvvv encodes the first source register operand.  */
-    case VexVVVV_SRC2:
-      if (source != op)
+      /* VEX.vvvv encodes the first source register operand.  */
+    case VexVVVV_SRC1:
+      if (!is_cpu (&i.tm, CpuXOP) || op == 0)     --> changed src1vvvv to src2vvvv of " vprotb %xmm7,%xmm0,%xmm3".
        {
-         v = source++;
+         v =  dest - 1;
          break;
        }
-      /* For XOP: vpshl* and vpsha*.  */
+      /* For XOP: vpshl*, vpsha* and vprot*.  */
       /* Fall through.  */
-    case VexVVVV_SRC1:
-      v =  dest - 1;
+      /* VEX.vvvv encodes the last source register operand.  */
+    case VexVVVV_SRC2:
+      v = source++;
       break;
-    /* VEX.vvvv encodes the destination register operand.  */
+
+      /* VEX.vvvv encodes the destination register operand.  */
     case VexVVVV_DST:
       v = dest--;
       break;
diff --git a/opcodes/i386-opc.tbl b/opcodes/i386-opc.tbl
index 5d6f1c74449..5eef1cabd8c 100644
--- a/opcodes/i386-opc.tbl
+++ b/opcodes/i386-opc.tbl
@@ -1938,10 +1938,10 @@ vpmacsww, 0x95, XOP, Modrm|Vex128|SpaceXOP08|Src1VVVV|VexW0|NoSuf, { RegXMM, Reg
 vpmadcsswd, 0xa6, XOP, Modrm|Vex128|SpaceXOP08|Src1VVVV|VexW0|NoSuf, { RegXMM, RegXMM|Unspecified|BaseIndex, RegXMM, RegXMM }
 vpmadcswd, 0xb6, XOP, Modrm|Vex128|SpaceXOP08|Src1VVVV|VexW0|NoSuf, { RegXMM, RegXMM|Unspecified|BaseIndex, RegXMM, RegXMM }
 vpperm, 0xa3, XOP, D|Modrm|Vex128|SpaceXOP08|Src1VVVV|VexW0|NoSuf, { RegXMM, RegXMM|Unspecified|BaseIndex, RegXMM, RegXMM }
-vprot<xop>, 0x90 | <xop:opc>, XOP, D|Modrm|Vex128|SpaceXOP09|Src2VVVV|VexW0|NoSuf, { RegXMM, RegXMM|Unspecified|BaseIndex, RegXMM }
+vprot<xop>, 0x90 | <xop:opc>, XOP, D|Modrm|Vex128|SpaceXOP09|Src1VVVV|VexW1|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM, RegXMM }
 vprot<xop>, 0xc0 | <xop:opc>, XOP, Modrm|Vex128|SpaceXOP08|VexW0|NoSuf, { Imm8|Imm8S, RegXMM|Unspecified|BaseIndex, RegXMM }
-vpsha<xop>, 0x98 | <xop:opc>, XOP, D|Modrm|Vex128|SpaceXOP09|Src2VVVV|VexW0|NoSuf, { RegXMM, RegXMM|Unspecified|BaseIndex, RegXMM }
-vpshl<xop>, 0x94 | <xop:opc>, XOP, D|Modrm|Vex128|SpaceXOP09|Src2VVVV|VexW0|NoSuf, { RegXMM, RegXMM|Unspecified|BaseIndex, RegXMM }
+vpsha<xop>, 0x98 | <xop:opc>, XOP, D|Modrm|Vex128|SpaceXOP09|Src1VVVV|VexW1|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM, RegXMM }
+vpshl<xop>, 0x94 | <xop:opc>, XOP, D|Modrm|Vex128|SpaceXOP09|Src1VVVV|VexW1|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM, RegXMM }

Thanks,
Lili.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/3] x86: Drop SwapSources
  2024-04-30  2:56                     ` Cui, Lili
@ 2024-04-30  6:18                       ` Jan Beulich
  2024-04-30  7:34                         ` Cui, Lili
  0 siblings, 1 reply; 24+ messages in thread
From: Jan Beulich @ 2024-04-30  6:18 UTC (permalink / raw)
  To: Cui, Lili; +Cc: hjl.tools, binutils

On 30.04.2024 04:56, Cui, Lili wrote:
>> On 29.04.2024 15:41, Cui, Lili wrote:
>>>> On 29.04.2024 14:23, Cui, Lili wrote:
>>>>>> On 28.04.2024 06:47, Cui, Lili wrote:
>>>>>>>> On 26.04.2024 10:14, Cui, Lili wrote:
>>>>>>>>>> On 24.04.2024 09:23, Cui, Lili wrote:
>>>>>>>>>>> --- a/gas/config/tc-i386.c
>>>>>>>>>>> +++ b/gas/config/tc-i386.c
>>>>>>>>>>> @@ -10434,6 +10434,14 @@ build_modrm_byte (void)
>>>>>>>>>>>
>>>>>>>>>>>    switch (i.tm.opcode_modifier.vexvvvv)
>>>>>>>>>>>      {
>>>>>>>>>>> +    case VexVVVV_SRC2:
>>>>>>>>>>> +      if (source != op)
>>>>>>>>>>> +	{
>>>>>>>>>>> +	  v = source++;
>>>>>>>>>>> +	  break;
>>>>>>>>>>> +	}
>>>>>>>>>>> +      /* For XOP: vpshl* and vpsha*.  */
>>>>>>>>>>> +      /* Fall through.  */
>>>>>>>>>>>      case VexVVVV_SRC1:
>>>>>>>>>>
>>>>>>>>>> This falling-through is odd and hence needs a better comment
>>>>>>>>>> (then also covering vprot*, which afaict is similarly affected).
>>>>>>>>>> The reason for this is the XOP.W-controlled operand swapping,
>>>>>>>>>> if I'm not mistaken? In which case perhaps instead of the
>>>>>>>>>> fall-through here the logic swapping the operands should
>>>>>>>>>> replace
>>>>>>>>>> VexVVVV_SRC2 by
>>>>>>>> VexVVVV_SRC1?
>>>>>>>>>>
>>>>>>>>>
>>>>>>>>> Yes, vprot* should be included, and it is related to
>>>>>>>>> XOP.W-controlled
>>>>>>>> operand swapping, the comments says " /* Only the first two
>>>>>>>> register operands need reversing, alongside flipping VEX.W.  */
>>>>>>>> ", But there is actually a memory operand, not two register operands.
>>>>>>>>>
>>>>>>>>> I think VexVVVV_SRC2 makes more sense here, it matches the
>>>>>>>>> actual
>>>>>>>> situation, we want to use vvvv to encode the first operand.
>>>>>>>>>
>>>>>>>>> Opcode table:
>>>>>>>>> vprot<xop>, 0x90 | <xop:opc>, XOP,
>>>>>>>>> D|Modrm|Vex128|SpaceXOP09|VexVVVV_Src2|VexW0|NoSuf,
>>>>>> { RegXMM,
>>>>>>>>> RegXMM|Unspecified|BaseIndex, RegXMM }
>>>>>>>>>
>>>>>>>>> testcase:
>>>>>>>>> vprotb (%rax),%xmm12,%xmm15
>>>>>>>>> vprotb %xmm15,(%r12),%xmm0
>>>>>>>>
>>>>>>>> VexVVVV_Src2 is appropriate for the latter, yes, but not for the
>>>>>>>> former. That uses VexVVVV_Src1 layout. Hence my suggestion to
>>>>>>>> replace the attribute when swapping operands.
>>>>>>>>
>>>>>>>
>>>>>>> If replace the Src2VVVV| VexW0 with Src1VVVV| VexW1 and swapping
>>>>>> operands. We can put VexVVVV_SRC1 before VexVVVV_SRC2, but we
>> still
>>>>>> need to add "(!is_cpu (&i.tm, CpuXOP) || source == op" under
>>>>>> VexVVVV_SRC1 , and match_template also needs to be adjusted (I
>> made
>>>>>> a simple modification and it still failed, I think continuing like
>>>>>> this may go against the original intention).
>>>>>>>
>>>>>>>   switch (i.tm.opcode_modifier.vexvvvv)
>>>>>>>     {
>>>>>>>     /* VEX.vvvv encodes the first source register operand.  */
>>>>>>>     case VexVVVV_SRC1:
>>>>>>>       if (!is_cpu (&i.tm, CpuXOP) || source == op)
>>>>>>>         {
>>>>>>>           v =  dest - 1;
>>>>>>>           break;
>>>>>>>         }
>>>>>>>     /* For XOP: vpshl*, vpsha* and vprot*.  */
>>>>>>>     /* Fall through.  */
>>>>>>>     /* VEX.vvvv encodes the last source register operand.  */
>>>>>>>     case VexVVVV_SRC2:
>>>>>>>       v = source++;
>>>>>>>       break;
>>>>>>>     /* VEX.vvvv encodes the destination register operand.  */
>>>>>>>     case VexVVVV_DST:
>>>>>>>       v = dest--;
>>>>>>>       break;
>>>>>>>     default:
>>>>>>>       v = ~0;
>>>>>>>       break;
>>>>>>>      }
>>>>>>>
>>>>>>> Do you think we should add a separate patch 4 for XOP that removes
>>>>>>> the
>>>>>> special handling in match_template and completes its template? so
>>>>>> we don't have to add special handling for src1vvvv or src2vvvv.
>>>>>> This might go against your desire to reduce template size, but it
>>>>>> would help simplify the logic. I'd like to know your thoughts.
>>>>>>
>>>>>> Indeed. You'd effectively revert earlier folding that I did. And
>>>>>> the adjustment I suggested earlier ought to be small/simple enough.
>>>>>>
>>>>>
>>>>> So, I continued working on the previous suggestion. With the
>>>>> following
>>>> modification and it worked.
>>>>>
>>>>> @@ -8932,7 +8932,7 @@ match_template (char mnem_suffix)
>>>>>                           || is_cpu (t, CpuAPX_F));
>>>>>               if (!operand_type_match (overlap0, i.types[0])
>>>>>                   || !operand_type_match (overlap1, i.types[j])
>>>>> -                 || (t->operands == 3
>>>>> +                 || (t->operands == 3 && !is_cpu (t, CpuXOP)
>>>>>                       && !operand_type_match (overlap2, i.types[1]))
>>>>>                   || (check_register
>>>>>                       && !operand_type_register_match (i.types[0],
>>>>
>>>> Just to mention it - this certainly isn't what I suggested. In fact I
>>>> seem to vaguely recall that something similar was once proposed
>>>> during the original APX work as well, where I then objected, too.
>>>>
>>>>> But I found that there are 4 test files that failed, I didn't find
>>>>> the doc on
>>>> how to encode vprotb but I guess that is because I changed the
>>>> default template from Src2VVVV| VexW0 to Src1VVVV| VexW1, then all
>>>> the related test cases needed to be modified. Do you have any comments
>> here?
>>>>>
>>>>> regexp "^[      ]*[a-f0-9]+:    8f e9 40 90
>>>> d8[         ]+vprotb %xmm7,%xmm0,%xmm3$"
>>>>> line   "    11ed:       8f e9 f8 90 df          vprotb %xmm7,%xmm0,%xmm3"
>>>>
>>>> Well, while changing the templates is in principle possible, and the
>>>> resulting code would still be correct, changing encodings it usually not a
>> good idea.
>>>> Thus when it can be avoided, it should be avoided, imo. Hence why I
>>>> didn't suggest this, but to amend the code doing the operand swapping
>>>> (for the case where operand order is controlled by XOP.W).
>>>>
>>>
>>> I mistakenly thought this was what you wanted, and I also think this
>> modification is unacceptable. Could you elaborate further on your original
>> suggestion?
>>
>> At the bottom of match_template() we have
>>
>>     case Opcode_VexW:
>>       /* Only the first two register operands need reversing, alongside
>> 	 flipping VEX.W.  */
>>       i.tm.opcode_modifier.vexw ^= VEXW0 ^ VEXW1;
>>
>> This is where I think you further want to adjust
>> i.tm.opcode_modifier.vexvvvv.
>>
> 
> "vprotb %xmm7,%xmm0,%xmm3" doesn't go through this place, it finds the matching template directly (Src1VVVV|VexW1|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM, RegXMM }).

Of course, for not having a memory operand (and no pseudo prefix). But that's
not the problem here - the problem is that for build_modrm_byte() you want to
make adjustments when operands need swapping (compared to what the templates
say).

> I inserted a judgment to solve this problem, it's a bit ugly. I'd like to abandon this optimization. 

Didn't you have a working form earlier on? Could we use that, leaving the XOP
stuff untouched for now, for me to see about looking into later?

> To avoid confusion, I post all the changes.
> 
> --- a/gas/config/tc-i386.c
> +++ b/gas/config/tc-i386.c
> @@ -8932,7 +8932,7 @@ match_template (char mnem_suffix)
>                           || is_cpu (t, CpuAPX_F));
>               if (!operand_type_match (overlap0, i.types[0])
>                   || !operand_type_match (overlap1, i.types[j])
> -                 || (t->operands == 3
> +                 || (t->operands == 3 && !is_cpu (t, CpuXOP)     --> This place also needs to change.
>                       && !operand_type_match (overlap2, i.types[1]))
>                   || (check_register
>                       && !operand_type_register_match (i.types[0],
> @@ -9035,6 +9035,10 @@ match_template (char mnem_suffix)
>                       specific_error = progress (i.error);
>                       continue;
>                     }
> +                 if (is_cpu (t, CpuXOP) && operand_types[0].bitfield.baseindex   --> It's ugly, and still has some test cases failing for other XOP instructions.
> +                     && i.types[0].bitfield.class == RegSIMD
> +                     && t->opcode_modifier.vexw == VEXW1)
> +                   found_reverse_match = Opcode_VexW;
>                   break;
>                 }
>             } 

Yeah, as indicated before: These probably shouldn't be changed like this.

> --- a/opcodes/i386-opc.tbl
> +++ b/opcodes/i386-opc.tbl
> @@ -1938,10 +1938,10 @@ vpmacsww, 0x95, XOP, Modrm|Vex128|SpaceXOP08|Src1VVVV|VexW0|NoSuf, { RegXMM, Reg
>  vpmadcsswd, 0xa6, XOP, Modrm|Vex128|SpaceXOP08|Src1VVVV|VexW0|NoSuf, { RegXMM, RegXMM|Unspecified|BaseIndex, RegXMM, RegXMM }
>  vpmadcswd, 0xb6, XOP, Modrm|Vex128|SpaceXOP08|Src1VVVV|VexW0|NoSuf, { RegXMM, RegXMM|Unspecified|BaseIndex, RegXMM, RegXMM }
>  vpperm, 0xa3, XOP, D|Modrm|Vex128|SpaceXOP08|Src1VVVV|VexW0|NoSuf, { RegXMM, RegXMM|Unspecified|BaseIndex, RegXMM, RegXMM }
> -vprot<xop>, 0x90 | <xop:opc>, XOP, D|Modrm|Vex128|SpaceXOP09|Src2VVVV|VexW0|NoSuf, { RegXMM, RegXMM|Unspecified|BaseIndex, RegXMM }
> +vprot<xop>, 0x90 | <xop:opc>, XOP, D|Modrm|Vex128|SpaceXOP09|Src1VVVV|VexW1|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM, RegXMM }
>  vprot<xop>, 0xc0 | <xop:opc>, XOP, Modrm|Vex128|SpaceXOP08|VexW0|NoSuf, { Imm8|Imm8S, RegXMM|Unspecified|BaseIndex, RegXMM }
> -vpsha<xop>, 0x98 | <xop:opc>, XOP, D|Modrm|Vex128|SpaceXOP09|Src2VVVV|VexW0|NoSuf, { RegXMM, RegXMM|Unspecified|BaseIndex, RegXMM }
> -vpshl<xop>, 0x94 | <xop:opc>, XOP, D|Modrm|Vex128|SpaceXOP09|Src2VVVV|VexW0|NoSuf, { RegXMM, RegXMM|Unspecified|BaseIndex, RegXMM }
> +vpsha<xop>, 0x98 | <xop:opc>, XOP, D|Modrm|Vex128|SpaceXOP09|Src1VVVV|VexW1|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM, RegXMM }
> +vpshl<xop>, 0x94 | <xop:opc>, XOP, D|Modrm|Vex128|SpaceXOP09|Src1VVVV|VexW1|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM, RegXMM }

Aiui (and as per earlier discussion) this leads to encoding changes, which we
want to avoid whenever possible.

Jan

^ permalink raw reply	[flat|nested] 24+ messages in thread

* RE: [PATCH 2/3] x86: Drop SwapSources
  2024-04-30  6:18                       ` Jan Beulich
@ 2024-04-30  7:34                         ` Cui, Lili
  2024-04-30  9:22                           ` Jan Beulich
  0 siblings, 1 reply; 24+ messages in thread
From: Cui, Lili @ 2024-04-30  7:34 UTC (permalink / raw)
  To: Beulich, Jan; +Cc: hjl.tools, binutils

> On 30.04.2024 04:56, Cui, Lili wrote:
> >> On 29.04.2024 15:41, Cui, Lili wrote:
> >>>> On 29.04.2024 14:23, Cui, Lili wrote:
> >>>>>> On 28.04.2024 06:47, Cui, Lili wrote:
> >>>>>>>> On 26.04.2024 10:14, Cui, Lili wrote:
> >>>>>>>>>> On 24.04.2024 09:23, Cui, Lili wrote:
> >>>>>>>>>>> --- a/gas/config/tc-i386.c
> >>>>>>>>>>> +++ b/gas/config/tc-i386.c
> >>>>>>>>>>> @@ -10434,6 +10434,14 @@ build_modrm_byte (void)
> >>>>>>>>>>>
> >>>>>>>>>>>    switch (i.tm.opcode_modifier.vexvvvv)
> >>>>>>>>>>>      {
> >>>>>>>>>>> +    case VexVVVV_SRC2:
> >>>>>>>>>>> +      if (source != op)
> >>>>>>>>>>> +	{
> >>>>>>>>>>> +	  v = source++;
> >>>>>>>>>>> +	  break;
> >>>>>>>>>>> +	}
> >>>>>>>>>>> +      /* For XOP: vpshl* and vpsha*.  */
> >>>>>>>>>>> +      /* Fall through.  */
> >>>>>>>>>>>      case VexVVVV_SRC1:
> >>>>>>>>>>
> >>>>>>>>>> This falling-through is odd and hence needs a better comment
> >>>>>>>>>> (then also covering vprot*, which afaict is similarly affected).
> >>>>>>>>>> The reason for this is the XOP.W-controlled operand swapping,
> >>>>>>>>>> if I'm not mistaken? In which case perhaps instead of the
> >>>>>>>>>> fall-through here the logic swapping the operands should
> >>>>>>>>>> replace
> >>>>>>>>>> VexVVVV_SRC2 by
> >>>>>>>> VexVVVV_SRC1?
> >>>>>>>>>>
> >>>>>>>>>
> >>>>>>>>> Yes, vprot* should be included, and it is related to
> >>>>>>>>> XOP.W-controlled
> >>>>>>>> operand swapping, the comments says " /* Only the first two
> >>>>>>>> register operands need reversing, alongside flipping VEX.W.  */
> >>>>>>>> ", But there is actually a memory operand, not two register
> operands.
> >>>>>>>>>
> >>>>>>>>> I think VexVVVV_SRC2 makes more sense here, it matches the
> >>>>>>>>> actual
> >>>>>>>> situation, we want to use vvvv to encode the first operand.
> >>>>>>>>>
> >>>>>>>>> Opcode table:
> >>>>>>>>> vprot<xop>, 0x90 | <xop:opc>, XOP,
> >>>>>>>>> D|Modrm|Vex128|SpaceXOP09|VexVVVV_Src2|VexW0|NoSuf,
> >>>>>> { RegXMM,
> >>>>>>>>> RegXMM|Unspecified|BaseIndex, RegXMM }
> >>>>>>>>>
> >>>>>>>>> testcase:
> >>>>>>>>> vprotb (%rax),%xmm12,%xmm15
> >>>>>>>>> vprotb %xmm15,(%r12),%xmm0
> >>>>>>>>
> >>>>>>>> VexVVVV_Src2 is appropriate for the latter, yes, but not for
> >>>>>>>> the former. That uses VexVVVV_Src1 layout. Hence my suggestion
> >>>>>>>> to replace the attribute when swapping operands.
> >>>>>>>>
> >>>>>>>
> >>>>>>> If replace the Src2VVVV| VexW0 with Src1VVVV| VexW1 and
> swapping
> >>>>>> operands. We can put VexVVVV_SRC1 before VexVVVV_SRC2, but we
> >> still
> >>>>>> need to add "(!is_cpu (&i.tm, CpuXOP) || source == op" under
> >>>>>> VexVVVV_SRC1 , and match_template also needs to be adjusted (I
> >> made
> >>>>>> a simple modification and it still failed, I think continuing
> >>>>>> like this may go against the original intention).
> >>>>>>>
> >>>>>>>   switch (i.tm.opcode_modifier.vexvvvv)
> >>>>>>>     {
> >>>>>>>     /* VEX.vvvv encodes the first source register operand.  */
> >>>>>>>     case VexVVVV_SRC1:
> >>>>>>>       if (!is_cpu (&i.tm, CpuXOP) || source == op)
> >>>>>>>         {
> >>>>>>>           v =  dest - 1;
> >>>>>>>           break;
> >>>>>>>         }
> >>>>>>>     /* For XOP: vpshl*, vpsha* and vprot*.  */
> >>>>>>>     /* Fall through.  */
> >>>>>>>     /* VEX.vvvv encodes the last source register operand.  */
> >>>>>>>     case VexVVVV_SRC2:
> >>>>>>>       v = source++;
> >>>>>>>       break;
> >>>>>>>     /* VEX.vvvv encodes the destination register operand.  */
> >>>>>>>     case VexVVVV_DST:
> >>>>>>>       v = dest--;
> >>>>>>>       break;
> >>>>>>>     default:
> >>>>>>>       v = ~0;
> >>>>>>>       break;
> >>>>>>>      }
> >>>>>>>
> >>>>>>> Do you think we should add a separate patch 4 for XOP that
> >>>>>>> removes the
> >>>>>> special handling in match_template and completes its template? so
> >>>>>> we don't have to add special handling for src1vvvv or src2vvvv.
> >>>>>> This might go against your desire to reduce template size, but it
> >>>>>> would help simplify the logic. I'd like to know your thoughts.
> >>>>>>
> >>>>>> Indeed. You'd effectively revert earlier folding that I did. And
> >>>>>> the adjustment I suggested earlier ought to be small/simple enough.
> >>>>>>
> >>>>>
> >>>>> So, I continued working on the previous suggestion. With the
> >>>>> following
> >>>> modification and it worked.
> >>>>>
> >>>>> @@ -8932,7 +8932,7 @@ match_template (char mnem_suffix)
> >>>>>                           || is_cpu (t, CpuAPX_F));
> >>>>>               if (!operand_type_match (overlap0, i.types[0])
> >>>>>                   || !operand_type_match (overlap1, i.types[j])
> >>>>> -                 || (t->operands == 3
> >>>>> +                 || (t->operands == 3 && !is_cpu (t, CpuXOP)
> >>>>>                       && !operand_type_match (overlap2, i.types[1]))
> >>>>>                   || (check_register
> >>>>>                       && !operand_type_register_match (i.types[0],
> >>>>
> >>>> Just to mention it - this certainly isn't what I suggested. In fact
> >>>> I seem to vaguely recall that something similar was once proposed
> >>>> during the original APX work as well, where I then objected, too.
> >>>>
> >>>>> But I found that there are 4 test files that failed, I didn't find
> >>>>> the doc on
> >>>> how to encode vprotb but I guess that is because I changed the
> >>>> default template from Src2VVVV| VexW0 to Src1VVVV| VexW1, then all
> >>>> the related test cases needed to be modified. Do you have any
> >>>> comments
> >> here?
> >>>>>
> >>>>> regexp "^[      ]*[a-f0-9]+:    8f e9 40 90
> >>>> d8[         ]+vprotb %xmm7,%xmm0,%xmm3$"
> >>>>> line   "    11ed:       8f e9 f8 90 df          vprotb %xmm7,%xmm0,%xmm3"
> >>>>
> >>>> Well, while changing the templates is in principle possible, and
> >>>> the resulting code would still be correct, changing encodings it
> >>>> usually not a
> >> good idea.
> >>>> Thus when it can be avoided, it should be avoided, imo. Hence why I
> >>>> didn't suggest this, but to amend the code doing the operand
> >>>> swapping (for the case where operand order is controlled by XOP.W).
> >>>>
> >>>
> >>> I mistakenly thought this was what you wanted, and I also think this
> >> modification is unacceptable. Could you elaborate further on your
> >> original suggestion?
> >>
> >> At the bottom of match_template() we have
> >>
> >>     case Opcode_VexW:
> >>       /* Only the first two register operands need reversing, alongside
> >> 	 flipping VEX.W.  */
> >>       i.tm.opcode_modifier.vexw ^= VEXW0 ^ VEXW1;
> >>
> >> This is where I think you further want to adjust
> >> i.tm.opcode_modifier.vexvvvv.
> >>
> >
> > "vprotb %xmm7,%xmm0,%xmm3" doesn't go through this place, it finds
> the matching template directly (Src1VVVV|VexW1|NoSuf,
> { RegXMM|Unspecified|BaseIndex, RegXMM, RegXMM }).
> 
> Of course, for not having a memory operand (and no pseudo prefix). But
> that's not the problem here - the problem is that for build_modrm_byte()
> you want to make adjustments when operands need swapping (compared to
> what the templates say).
> 
> > I inserted a judgment to solve this problem, it's a bit ugly. I'd like to
> abandon this optimization.
> 
> Didn't you have a working form earlier on? Could we use that, leaving the
> XOP stuff untouched for now, for me to see about looking into later?
> 

Yes, I mean I will continue with these patches, but without the XOP optimization. Thanks for helping look at this part later.

The next 5 days are our Labor Day, and email responses will be slower.

Regards,
Lili.







^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/3] x86: Drop SwapSources
  2024-04-30  7:34                         ` Cui, Lili
@ 2024-04-30  9:22                           ` Jan Beulich
  0 siblings, 0 replies; 24+ messages in thread
From: Jan Beulich @ 2024-04-30  9:22 UTC (permalink / raw)
  To: Cui, Lili; +Cc: hjl.tools, binutils

On 30.04.2024 09:34, Cui, Lili wrote:
> The next 5 days are our Labor Day, and email responses will be slower.

NP. As a heads up, I'll be on FTO following that, until the 13th.

Jan

^ permalink raw reply	[flat|nested] 24+ messages in thread

end of thread, other threads:[~2024-04-30  9:22 UTC | newest]

Thread overview: 24+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-04-24  7:23 [PATCH 0/3] x86: Optimize the encoder of the vvvv register Cui, Lili
2024-04-24  7:23 ` [PATCH 1/3] x86: Use vexvvvv to encode " Cui, Lili
2024-04-24  7:52   ` Jan Beulich
2024-04-25 13:14     ` Cui, Lili
2024-04-25 13:22       ` Jan Beulich
2024-04-26  5:33         ` Cui, Lili
2024-04-26  6:52           ` Jan Beulich
2024-04-24  7:23 ` [PATCH 2/3] x86: Drop SwapSources Cui, Lili
2024-04-24  8:07   ` Jan Beulich
2024-04-26  8:14     ` Cui, Lili
2024-04-26 10:37       ` Jan Beulich
2024-04-28  4:47         ` Cui, Lili
2024-04-29  6:40           ` Jan Beulich
2024-04-29 12:23             ` Cui, Lili
2024-04-29 13:08               ` Jan Beulich
2024-04-29 13:41                 ` Cui, Lili
2024-04-29 13:49                   ` Jan Beulich
2024-04-30  2:56                     ` Cui, Lili
2024-04-30  6:18                       ` Jan Beulich
2024-04-30  7:34                         ` Cui, Lili
2024-04-30  9:22                           ` Jan Beulich
2024-04-24  7:23 ` [PATCH 3/3] x86: Drop using extension_opcode to encode vvvv register Cui, Lili
2024-04-24  8:19   ` Jan Beulich
2024-04-24  8:27     ` Jan Beulich

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).