From: "Hu, Lin1" <lin1.hu@intel.com>
To: binutils@sourceware.org
Cc: JBeulich@suse.com, hongjiu.lu@intel.com
Subject: [PATCH 5/8] [v2] Support APX NDD optimized encoding.
Date: Mon, 23 Oct 2023 11:30:08 +0800 [thread overview]
Message-ID: <20231023033008.3256485-1-lin1.hu@intel.com> (raw)
In-Reply-To: <a2e3a361-28c3-010a-54fa-4b5edd2bf3b6@suse.com>
The new version of PATCH has been adjusted mainly based on comments.
This patch aims to optimize:
add %r16, %r15, %r15 -> add %r16, %r15
gas/ChangeLog:
* config/tc-i386.c (optimize_NDD_to_nonNDD): New function.
(match_template): If we can optimzie APX NDD insns, so rematch
template.
* testsuite/gas/i386/x86-64.exp: Add test.
* testsuite/gas/i386/x86-64-apx-ndd-optimize.d: New test.
* testsuite/gas/i386/x86-64-apx-ndd-optimize.s: Ditto.
---
gas/config/tc-i386.c | 45 +
.../gas/i386/x86-64-apx-ndd-optimize.d | 124 +
.../gas/i386/x86-64-apx-ndd-optimize.s | 118 +
gas/testsuite/gas/i386/x86-64.exp | 1 +
opcodes/i386-opc.tbl | 22 +-
8 files changed, 11414 insertions(+), 8754 deletions(-)
create mode 100644 gas/testsuite/gas/i386/x86-64-apx-ndd-optimize.d
create mode 100644 gas/testsuite/gas/i386/x86-64-apx-ndd-optimize.s
diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c
index 5a40fdcce40..5e6bb5435e3 100644
--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -7186,6 +7186,43 @@ check_EgprOperands (const insn_template *t)
return 0;
}
+/* Optimize APX NDD insns to non-NDD insns. */
+
+static bool
+optimize_NDD_to_nonNDD (const insn_template *t)
+{
+ if (t->opcode_modifier.vexvvvv
+ && t->opcode_space == SPACE_EVEXMAP4
+ && i.reg_operands >= 2
+ && i.types[i.operands - 1].bitfield.class == Reg)
+ {
+ unsigned int readonly_var = ~0;
+ unsigned int dest = i.operands - 1;
+ unsigned int src1 = (i.operands > 2) ? i.operands - 2 : 0;
+ unsigned int src2 = (i.operands > 3) ? i.operands - 3 : 0;
+
+ if (i.types[src1].bitfield.class == Reg
+ && i.op[src1].regs == i.op[dest].regs)
+ readonly_var = src2;
+ /* adcx, adox and imul don't have D bit. */
+ else if (i.types[src2].bitfield.class == Reg
+ && i.op[src2].regs == i.op[dest].regs
+ && t->opcode_modifier.commutative)
+ readonly_var = src1;
+ if (readonly_var != (unsigned int) ~0)
+ {
+ --i.operands;
+ --i.reg_operands;
+ --i.tm.operands;
+
+ if (readonly_var != src2)
+ swap_2_operands (readonly_var, src2);
+ return 1;
+ }
+ }
+ return 0;
+}
+
/* Helper function for the progress() macro in match_template(). */
static INLINE enum i386_error progress (enum i386_error new,
enum i386_error last,
@@ -7706,6 +7743,14 @@ match_template (char mnem_suffix)
i.memshift = memshift;
}
+ /* If we can optimize a NDD insn to non-NDD insn, like
+ add %r16, %r8, %r8 -> add %r16, %r8, then rematch template. */
+ if (optimize == 1 && optimize_NDD_to_nonNDD (t))
+ {
+ t = current_templates->start - 1;
+ continue;
+ }
+
/* We've found a match; break out of loop. */
break;
}
diff --git a/gas/testsuite/gas/i386/x86-64-apx-ndd-optimize.d b/gas/testsuite/gas/i386/x86-64-apx-ndd-optimize.d
new file mode 100644
index 00000000000..f23b2b127b6
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-apx-ndd-optimize.d
@@ -0,0 +1,124 @@
+#as: -O1
+#objdump: -drw
+#name: x86-64 APX NDD optimized encoding
+#source: x86-64-apx-ndd-optimize.s
+
+.*: +file format .*
+
+
+Disassembly of section .text:
+
+0+ <_start>:
+\s*[a-f0-9]+:\s*d5 19 ff c7 inc %r31
+\s*[a-f0-9]+:\s*d5 11 fe c7 inc %r31b
+\s*[a-f0-9]+:\s*d5 4d 01 f8 add %r31,%r8
+\s*[a-f0-9]+:\s*d5 45 00 f8 add %r31b,%r8b
+\s*[a-f0-9]+:\s*d5 4d 01 f8 add %r31,%r8
+\s*[a-f0-9]+:\s*d5 1d 03 c7 add %r31,%r8
+\s*[a-f0-9]+:\s*d5 4d 03 38 add \(%r8\),%r31
+\s*[a-f0-9]+:\s*d5 1d 03 07 add \(%r31\),%r8
+\s*[a-f0-9]+:\s*49 81 c7 33 44 34 12 add \$0x12344433,%r15
+\s*[a-f0-9]+:\s*49 81 c0 11 22 33 f4 add \$0xfffffffff4332211,%r8
+\s*[a-f0-9]+:\s*d5 18 ff c9 dec %r17
+\s*[a-f0-9]+:\s*d5 10 fe c9 dec %r17b
+\s*[a-f0-9]+:\s*d5 18 f7 d1 not %r17
+\s*[a-f0-9]+:\s*d5 10 f6 d1 not %r17b
+\s*[a-f0-9]+:\s*d5 18 f7 d9 neg %r17
+\s*[a-f0-9]+:\s*d5 10 f6 d9 neg %r17b
+\s*[a-f0-9]+:\s*d5 1c 29 f9 sub %r15,%r17
+\s*[a-f0-9]+:\s*d5 14 28 f9 sub %r15b,%r17b
+\s*[a-f0-9]+:\s*62 54 84 18 29 38 sub %r15,\(%r8\),%r15
+\s*[a-f0-9]+:\s*d5 49 2b 04 07 sub \(%r15,%rax,1\),%r16
+\s*[a-f0-9]+:\s*d5 19 81 ee 34 12 00 00 sub \$0x1234,%r30
+\s*[a-f0-9]+:\s*d5 1c 19 f9 sbb %r15,%r17
+\s*[a-f0-9]+:\s*d5 14 18 f9 sbb %r15b,%r17b
+\s*[a-f0-9]+:\s*62 54 84 18 19 38 sbb %r15,\(%r8\),%r15
+\s*[a-f0-9]+:\s*d5 49 1b 04 07 sbb \(%r15,%rax,1\),%r16
+\s*[a-f0-9]+:\s*d5 19 81 de 34 12 00 00 sbb \$0x1234,%r30
+\s*[a-f0-9]+:\s*d5 1c 11 f9 adc %r15,%r17
+\s*[a-f0-9]+:\s*d5 14 10 f9 adc %r15b,%r17b
+\s*[a-f0-9]+:\s*4d 13 38 adc \(%r8\),%r15
+\s*[a-f0-9]+:\s*d5 49 13 04 07 adc \(%r15,%rax,1\),%r16
+\s*[a-f0-9]+:\s*d5 19 81 d6 34 12 00 00 adc \$0x1234,%r30
+\s*[a-f0-9]+:\s*d5 1c 09 f9 or %r15,%r17
+\s*[a-f0-9]+:\s*d5 14 08 f9 or %r15b,%r17b
+\s*[a-f0-9]+:\s*4d 0b 38 or \(%r8\),%r15
+\s*[a-f0-9]+:\s*d5 49 0b 04 07 or \(%r15,%rax,1\),%r16
+\s*[a-f0-9]+:\s*d5 19 81 ce 34 12 00 00 or \$0x1234,%r30
+\s*[a-f0-9]+:\s*d5 1c 31 f9 xor %r15,%r17
+\s*[a-f0-9]+:\s*d5 14 30 f9 xor %r15b,%r17b
+\s*[a-f0-9]+:\s*4d 33 38 xor \(%r8\),%r15
+\s*[a-f0-9]+:\s*d5 49 33 04 07 xor \(%r15,%rax,1\),%r16
+\s*[a-f0-9]+:\s*d5 19 81 f6 34 12 00 00 xor \$0x1234,%r30
+\s*[a-f0-9]+:\s*d5 1c 21 f9 and %r15,%r17
+\s*[a-f0-9]+:\s*d5 14 20 f9 and %r15b,%r17b
+\s*[a-f0-9]+:\s*4d 23 38 and \(%r8\),%r15
+\s*[a-f0-9]+:\s*d5 49 23 04 07 and \(%r15,%rax,1\),%r16
+\s*[a-f0-9]+:\s*d5 11 81 e6 34 12 00 00 and \$0x1234,%r30d
+\s*[a-f0-9]+:\s*d5 19 d1 cf ror %r31
+\s*[a-f0-9]+:\s*d5 11 d0 cf ror %r31b
+\s*[a-f0-9]+:\s*49 c1 cc 02 ror \$0x2,%r12
+\s*[a-f0-9]+:\s*41 c0 cc 02 ror \$0x2,%r12b
+\s*[a-f0-9]+:\s*d5 19 d1 c7 rol %r31
+\s*[a-f0-9]+:\s*d5 11 d0 c7 rol %r31b
+\s*[a-f0-9]+:\s*49 c1 c4 02 rol \$0x2,%r12
+\s*[a-f0-9]+:\s*41 c0 c4 02 rol \$0x2,%r12b
+\s*[a-f0-9]+:\s*d5 19 d1 df rcr %r31
+\s*[a-f0-9]+:\s*d5 11 d0 df rcr %r31b
+\s*[a-f0-9]+:\s*49 c1 dc 02 rcr \$0x2,%r12
+\s*[a-f0-9]+:\s*41 c0 dc 02 rcr \$0x2,%r12b
+\s*[a-f0-9]+:\s*d5 19 d1 d7 rcl %r31
+\s*[a-f0-9]+:\s*d5 11 d0 d7 rcl %r31b
+\s*[a-f0-9]+:\s*49 c1 d4 02 rcl \$0x2,%r12
+\s*[a-f0-9]+:\s*41 c0 d4 02 rcl \$0x2,%r12b
+\s*[a-f0-9]+:\s*d5 19 d1 e7 shl %r31
+\s*[a-f0-9]+:\s*d5 11 d0 e7 shl %r31b
+\s*[a-f0-9]+:\s*49 c1 e4 02 shl \$0x2,%r12
+\s*[a-f0-9]+:\s*41 c0 e4 02 shl \$0x2,%r12b
+\s*[a-f0-9]+:\s*d5 19 d1 ff sar %r31
+\s*[a-f0-9]+:\s*d5 11 d0 ff sar %r31b
+\s*[a-f0-9]+:\s*49 c1 fc 02 sar \$0x2,%r12
+\s*[a-f0-9]+:\s*41 c0 fc 02 sar \$0x2,%r12b
+\s*[a-f0-9]+:\s*d5 19 d1 e7 shl %r31
+\s*[a-f0-9]+:\s*d5 11 d0 e7 shl %r31b
+\s*[a-f0-9]+:\s*49 c1 e4 02 shl \$0x2,%r12
+\s*[a-f0-9]+:\s*41 c0 e4 02 shl \$0x2,%r12b
+\s*[a-f0-9]+:\s*d5 19 d1 ef shr %r31
+\s*[a-f0-9]+:\s*d5 11 d0 ef shr %r31b
+\s*[a-f0-9]+:\s*49 c1 ec 02 shr \$0x2,%r12
+\s*[a-f0-9]+:\s*41 c0 ec 02 shr \$0x2,%r12b
+\s*[a-f0-9]+:\s*62 74 9c 18 24 20 01 shld \$0x1,%r12,\(%rax\),%r12
+\s*[a-f0-9]+:\s*4d 0f a4 c4 02 shld \$0x2,%r8,%r12
+\s*[a-f0-9]+:\s*62 74 b4 18 a5 08 shld %cl,%r9,\(%rax\),%r9
+\s*[a-f0-9]+:\s*d5 9c a5 e0 shld %cl,%r12,%r16
+\s*[a-f0-9]+:\s*62 7c 94 18 a5 2c 83 shld %cl,%r13,\(%r19,%rax,4\),%r13
+\s*[a-f0-9]+:\s*62 74 9c 18 2c 20 01 shrd \$0x1,%r12,\(%rax\),%r12
+\s*[a-f0-9]+:\s*4d 0f ac ec 01 shrd \$0x1,%r13,%r12
+\s*[a-f0-9]+:\s*62 74 b4 18 ad 08 shrd %cl,%r9,\(%rax\),%r9
+\s*[a-f0-9]+:\s*d5 9c ad e0 shrd %cl,%r12,%r16
+\s*[a-f0-9]+:\s*62 7c 94 18 ad 2c 83 shrd %cl,%r13,\(%r19,%rax,4\),%r13
+\s*[a-f0-9]+:\s*66 4d 0f 38 f6 c7 adcx %r15,%r8
+\s*[a-f0-9]+:\s*62 14 f9 08 66 04 3f adcx \(%r15,%r31,1\),%r8
+\s*[a-f0-9]+:\s*66 4d 0f 38 f6 c1 adcx %r9,%r8
+\s*[a-f0-9]+:\s*f3 4d 0f 38 f6 c7 adox %r15,%r8
+\s*[a-f0-9]+:\s*62 14 fa 08 66 04 3f adox \(%r15,%r31,1\),%r8
+\s*[a-f0-9]+:\s*f3 4d 0f 38 f6 c1 adox %r9,%r8
+\s*[a-f0-9]+:\s*67 0f 40 90 90 90 90 90 cmovo -0x6f6f6f70\(%eax\),%edx
+\s*[a-f0-9]+:\s*67 0f 41 90 90 90 90 90 cmovno -0x6f6f6f70\(%eax\),%edx
+\s*[a-f0-9]+:\s*67 0f 42 90 90 90 90 90 cmovb -0x6f6f6f70\(%eax\),%edx
+\s*[a-f0-9]+:\s*67 0f 43 90 90 90 90 90 cmovae -0x6f6f6f70\(%eax\),%edx
+\s*[a-f0-9]+:\s*67 0f 44 90 90 90 90 90 cmove -0x6f6f6f70\(%eax\),%edx
+\s*[a-f0-9]+:\s*67 0f 45 90 90 90 90 90 cmovne -0x6f6f6f70\(%eax\),%edx
+\s*[a-f0-9]+:\s*67 0f 46 90 90 90 90 90 cmovbe -0x6f6f6f70\(%eax\),%edx
+\s*[a-f0-9]+:\s*67 0f 47 90 90 90 90 90 cmova -0x6f6f6f70\(%eax\),%edx
+\s*[a-f0-9]+:\s*67 0f 48 90 90 90 90 90 cmovs -0x6f6f6f70\(%eax\),%edx
+\s*[a-f0-9]+:\s*67 0f 49 90 90 90 90 90 cmovns -0x6f6f6f70\(%eax\),%edx
+\s*[a-f0-9]+:\s*67 0f 4a 90 90 90 90 90 cmovp -0x6f6f6f70\(%eax\),%edx
+\s*[a-f0-9]+:\s*67 0f 4b 90 90 90 90 90 cmovnp -0x6f6f6f70\(%eax\),%edx
+\s*[a-f0-9]+:\s*67 0f 4c 90 90 90 90 90 cmovl -0x6f6f6f70\(%eax\),%edx
+\s*[a-f0-9]+:\s*67 0f 4d 90 90 90 90 90 cmovge -0x6f6f6f70\(%eax\),%edx
+\s*[a-f0-9]+:\s*67 0f 4e 90 90 90 90 90 cmovle -0x6f6f6f70\(%eax\),%edx
+\s*[a-f0-9]+:\s*67 0f 4f 90 90 90 90 90 cmovg -0x6f6f6f70\(%eax\),%edx
+\s*[a-f0-9]+:\s*67 0f af 90 09 09 09 00 imul 0x90909\(%eax\),%edx
+\s*[a-f0-9]+:\s*d5 aa af 94 f8 09 09 00 00 imul 0x909\(%rax,%r31,8\),%rdx
+\s*[a-f0-9]+:\s*48 0f af d0 imul %rax,%rdx
diff --git a/gas/testsuite/gas/i386/x86-64-apx-ndd-optimize.s b/gas/testsuite/gas/i386/x86-64-apx-ndd-optimize.s
new file mode 100644
index 00000000000..0f5c15a2f9c
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-apx-ndd-optimize.s
@@ -0,0 +1,118 @@
+# Check 64bit APX NDD instructions with optimized encoding
+
+ .allow_index_reg
+ .text
+_start:
+inc %r31,%r31
+incb %r31b,%r31b
+add %r31,%r8,%r8
+addb %r31b,%r8b,%r8b
+{store} add %r31,%r8,%r8
+{load} add %r31,%r8,%r8
+add %r31,(%r8),%r31
+add (%r31),%r8,%r8
+add $0x12344433,%r15,%r15
+add $0xfffffffff4332211,%r8,%r8
+dec %r17,%r17
+decb %r17b,%r17b
+not %r17,%r17
+notb %r17b,%r17b
+neg %r17,%r17
+negb %r17b,%r17b
+sub %r15,%r17,%r17
+subb %r15b,%r17b,%r17b
+sub %r15,(%r8),%r15
+sub (%r15,%rax,1),%r16,%r16
+sub $0x1234,%r30,%r30
+sbb %r15,%r17,%r17
+sbbb %r15b,%r17b,%r17b
+sbb %r15,(%r8),%r15
+sbb (%r15,%rax,1),%r16,%r16
+sbb $0x1234,%r30,%r30
+adc %r15,%r17,%r17
+adcb %r15b,%r17b,%r17b
+adc %r15,(%r8),%r15
+adc (%r15,%rax,1),%r16,%r16
+adc $0x1234,%r30,%r30
+or %r15,%r17,%r17
+orb %r15b,%r17b,%r17b
+or %r15,(%r8),%r15
+or (%r15,%rax,1),%r16,%r16
+or $0x1234,%r30,%r30
+xor %r15,%r17,%r17
+xorb %r15b,%r17b,%r17b
+xor %r15,(%r8),%r15
+xor (%r15,%rax,1),%r16,%r16
+xor $0x1234,%r30,%r30
+and %r15,%r17,%r17
+andb %r15b,%r17b,%r17b
+and %r15,(%r8),%r15
+and (%r15,%rax,1),%r16,%r16
+and $0x1234,%r30,%r30
+ror %r31,%r31
+rorb %r31b,%r31b
+ror $0x2,%r12,%r12
+rorb $0x2,%r12b,%r12b
+rol %r31,%r31
+rolb %r31b,%r31b
+rol $0x2,%r12,%r12
+rolb $0x2,%r12b,%r12b
+rcr %r31,%r31
+rcrb %r31b,%r31b
+rcr $0x2,%r12,%r12
+rcrb $0x2,%r12b,%r12b
+rcl %r31,%r31
+rclb %r31b,%r31b
+rcl $0x2,%r12,%r12
+rclb $0x2,%r12b,%r12b
+shl %r31,%r31
+shlb %r31b,%r31b
+shl $0x2,%r12,%r12
+shlb $0x2,%r12b,%r12b
+sar %r31,%r31
+sarb %r31b,%r31b
+sar $0x2,%r12,%r12
+sarb $0x2,%r12b,%r12b
+shl %r31,%r31
+shlb %r31b,%r31b
+shl $0x2,%r12,%r12
+shlb $0x2,%r12b,%r12b
+shr %r31,%r31
+shrb %r31b,%r31b
+shr $0x2,%r12,%r12
+shrb $0x2,%r12b,%r12b
+shld $0x1,%r12,(%rax),%r12
+shld $0x2,%r8,%r12,%r12
+shld %cl,%r9,(%rax),%r9
+shld %cl,%r12,%r16,%r16
+shld %cl,%r13,(%r19,%rax,4),%r13
+shrd $0x1,%r12,(%rax),%r12
+shrd $0x1,%r13,%r12,%r12
+shrd %cl,%r9,(%rax),%r9
+shrd %cl,%r12,%r16,%r16
+shrd %cl,%r13,(%r19,%rax,4),%r13
+adcx %r15,%r8,%r8
+adcx (%r15,%r31,1),%r8,%r8
+adcx %r8,%r9,%r8
+adox %r15,%r8,%r8
+adox (%r15,%r31,1),%r8,%r8
+adox %r8,%r9,%r8
+cmovo 0x90909090(%eax),%edx,%edx
+cmovno 0x90909090(%eax),%edx,%edx
+cmovb 0x90909090(%eax),%edx,%edx
+cmovae 0x90909090(%eax),%edx,%edx
+cmove 0x90909090(%eax),%edx,%edx
+cmovne 0x90909090(%eax),%edx,%edx
+cmovbe 0x90909090(%eax),%edx,%edx
+cmova 0x90909090(%eax),%edx,%edx
+cmovs 0x90909090(%eax),%edx,%edx
+cmovns 0x90909090(%eax),%edx,%edx
+cmovp 0x90909090(%eax),%edx,%edx
+cmovnp 0x90909090(%eax),%edx,%edx
+cmovl 0x90909090(%eax),%edx,%edx
+cmovge 0x90909090(%eax),%edx,%edx
+cmovle 0x90909090(%eax),%edx,%edx
+cmovg 0x90909090(%eax),%edx,%edx
+imul 0x90909(%eax),%edx,%edx
+imul 0x909(%rax,%r31,8),%rdx,%rdx
+imul %rdx,%rax,%rdx
diff --git a/gas/testsuite/gas/i386/x86-64.exp b/gas/testsuite/gas/i386/x86-64.exp
index 07cb716d2a5..38fbed8a388 100644
--- a/gas/testsuite/gas/i386/x86-64.exp
+++ b/gas/testsuite/gas/i386/x86-64.exp
@@ -549,6 +549,7 @@ run_dump_test "x86-64-optimize-6"
run_list_test "x86-64-optimize-7a" "-I${srcdir}/$subdir -march=+noavx -al"
run_dump_test "x86-64-optimize-7b"
run_list_test "x86-64-optimize-8" "-I${srcdir}/$subdir -march=+noavx2 -al"
+run_dump_test "x86-64-apx-ndd-optimize"
run_dump_test "x86-64-align-branch-1a"
run_dump_test "x86-64-align-branch-1b"
run_dump_test "x86-64-align-branch-1c"
diff --git a/opcodes/i386-opc.tbl b/opcodes/i386-opc.tbl
index f68940b9b4a..3c255e79a91 100644
--- a/opcodes/i386-opc.tbl
+++ b/opcodes/i386-opc.tbl
@@ -145,6 +145,8 @@
// The EVEX purpose of StaticRounding appears only together with SAE. Re-use
// the bit to mark commutative VEX encodings where swapping the source
// operands may allow to switch from 3-byte to 2-byte VEX encoding.
+// And re-use the bit to mark some NDD insns that swapping the source operands
+// may allow to switch from 3 operands to 2 operands.
#define C StaticRounding
#define FP 387|287|8087
@@ -166,6 +168,10 @@
### MARKER ###
+// Please don't add a NDD insn which may be optimized to a REX2 insn before the
+// mov. It may result that a good UB checker object the behavior
+// "template->start - 1" at the end of match_template.
+
// Move instructions.
mov, 0xa0, No64, D|W|CheckOperandSize|No_sSuf|No_qSuf, { Disp16|Disp32|Unspecified|Byte|Word|Dword, Acc|Byte|Word|Dword }
mov, 0xa0, x64, D|W|CheckOperandSize|No_sSuf, { Disp64|Unspecified|Byte|Word|Dword|Qword, Acc|Byte|Word|Dword|Qword }
@@ -295,7 +301,7 @@ add, 0x0, 0, D|W|CheckOperandSize|Modrm|No_sSuf|HLEPrefixLock, { Reg8|Reg16|Reg3
add, 0x83/0, 0, Modrm|No_bSuf|No_sSuf|HLEPrefixLock, { Imm8S, Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex }
add, 0x4, 0, W|No_sSuf, { Imm8|Imm16|Imm32|Imm32S, Acc|Byte|Word|Dword|Qword }
add, 0x80/0, 0, W|Modrm|No_sSuf|HLEPrefixLock, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
-add, 0x0, APX_F, D|W|CheckOperandSize|Modrm|No_sSuf|VexVVVVDest|EVex128|EVexMap4, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64 }
+add, 0x0, APX_F, D|C|W|CheckOperandSize|Modrm|No_sSuf|VexVVVVDest|EVex128|EVexMap4, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64 }
add, 0x83/0, APX_F, Modrm|CheckOperandSize|No_bSuf|No_sSuf|VexVVVVDest|EVex128|EVexMap4, { Imm8S, Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
add, 0x80/0, APX_F, W|Modrm|CheckOperandSize|No_sSuf|VexVVVVDest|EVex128|EVexMap4, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64}
@@ -339,7 +345,7 @@ and, 0x20, 0, D|W|CheckOperandSize|Modrm|No_sSuf|HLEPrefixLock|Optimize, { Reg8|
and, 0x83/4, 0, Modrm|No_bSuf|No_sSuf|HLEPrefixLock|Optimize, { Imm8S, Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex }
and, 0x24, 0, W|No_sSuf|Optimize, { Imm8|Imm16|Imm32|Imm32S, Acc|Byte|Word|Dword|Qword }
and, 0x80/4, 0, W|Modrm|No_sSuf|HLEPrefixLock|Optimize, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
-and, 0x20, APX_F, D|W|CheckOperandSize|Modrm|No_sSuf|VexVVVVDest|EVex128|EVexMap4|Optimize, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64 }
+and, 0x20, APX_F, D|C|W|CheckOperandSize|Modrm|No_sSuf|VexVVVVDest|EVex128|EVexMap4|Optimize, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64 }
and, 0x83/4, APX_F, Modrm|CheckOperandSize|No_bSuf|No_sSuf|VexVVVVDest|EVex128|EVexMap4|Optimize, { Imm8S, Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
and, 0x80/4, APX_F, W|Modrm|CheckOperandSize|No_sSuf|VexVVVVDest|EVex128|EVexMap4|Optimize, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64 }
@@ -347,7 +353,7 @@ or, 0x8, 0, D|W|CheckOperandSize|Modrm|No_sSuf|HLEPrefixLock|Optimize, { Reg8|Re
or, 0x83/1, 0, Modrm|No_bSuf|No_sSuf|HLEPrefixLock, { Imm8S, Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex }
or, 0xc, 0, W|No_sSuf, { Imm8|Imm16|Imm32|Imm32S, Acc|Byte|Word|Dword|Qword }
or, 0x80/1, 0, W|Modrm|No_sSuf|HLEPrefixLock, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
-or, 0x8, APX_F, D|W|CheckOperandSize|Modrm|No_sSuf|VexVVVVDest|EVex128|EVexMap4|Optimize, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64 }
+or, 0x8, APX_F, D|C|W|CheckOperandSize|Modrm|No_sSuf|VexVVVVDest|EVex128|EVexMap4|Optimize, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64 }
or, 0x83/1, APX_F, Modrm|CheckOperandSize|No_bSuf|No_sSuf|VexVVVVDest|EVex128|EVexMap4, { Imm8S, Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
or, 0x80/1, APX_F, W|Modrm|CheckOperandSize|No_sSuf|VexVVVVDest|EVex128|EVexMap4, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64 }
@@ -355,7 +361,7 @@ xor, 0x30, 0, D|W|CheckOperandSize|Modrm|No_sSuf|HLEPrefixLock|Optimize, { Reg8|
xor, 0x83/6, 0, Modrm|No_bSuf|No_sSuf|HLEPrefixLock, { Imm8S, Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex }
xor, 0x34, 0, W|No_sSuf, { Imm8|Imm16|Imm32|Imm32S, Acc|Byte|Word|Dword|Qword }
xor, 0x80/6, 0, W|Modrm|No_sSuf|HLEPrefixLock, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
-xor, 0x30, APX_F, D|W|CheckOperandSize|Modrm|No_sSuf|VexVVVVDest|EVex128|EVexMap4|Optimize, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64 }
+xor, 0x30, APX_F, D|C|W|CheckOperandSize|Modrm|No_sSuf|VexVVVVDest|EVex128|EVexMap4|Optimize, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64 }
xor, 0x83/6, APX_F, Modrm|CheckOperandSize|No_bSuf|No_sSuf|VexVVVVDest|EVex128|EVexMap4, { Imm8S, Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
xor, 0x80/6, APX_F, W|Modrm|CheckOperandSize|No_sSuf|VexVVVVDest|EVex128|EVexMap4, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64 }
@@ -369,7 +375,7 @@ adc, 0x80/2, 0, W|Modrm|No_sSuf|HLEPrefixLock, { Imm8|Imm16|Imm32|Imm32S, Reg8|R
adc, 0x10, APX_F, D|W|CheckOperandSize|Modrm|EVex128|EVexMap4|No_sSuf, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
adc, 0x83/2, APX_F, Modrm|EVex128|EVexMap4|No_bSuf|No_sSuf, { Imm8S, Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex }
adc, 0x80/2, APX_F, W|Modrm|EVex128|EVexMap4|No_sSuf, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
-adc, 0x10, APX_F, D|W|CheckOperandSize|Modrm|No_sSuf|VexVVVVDest|EVex128|EVexMap4, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64 }
+adc, 0x10, APX_F, D|C|W|CheckOperandSize|Modrm|No_sSuf|VexVVVVDest|EVex128|EVexMap4, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64 }
adc, 0x83/2, APX_F, Modrm|CheckOperandSize|No_bSuf|No_sSuf|VexVVVVDest|EVex128|EVexMap4, { Imm8S, Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
adc, 0x80/2, APX_F, W|Modrm|CheckOperandSize|No_sSuf|VexVVVVDest|EVex128|EVexMap4, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64 }
@@ -412,7 +418,7 @@ cqto, 0x99, x64, Size64|NoSuf, {}
mul, 0xf6/4, 0, W|Modrm|No_sSuf, { Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
imul, 0xf6/5, 0, W|Modrm|No_sSuf, { Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
imul, 0xfaf, i386, Modrm|CheckOperandSize|No_bSuf|No_sSuf, { Reg16|Reg32|Reg64|Unspecified|Word|Dword|Qword|BaseIndex, Reg16|Reg32|Reg64 }
-imul, 0xaf, APX_F, Modrm|CheckOperandSize|No_bSuf|No_sSuf|VexVVVVDest|EVex128|EVexMap4, { Reg16|Reg32|Reg64|Unspecified|Word|Dword|Qword|BaseIndex, Reg16|Reg32|Reg64, Reg16|Reg32|Reg64 }
+imul, 0xaf, APX_F, C|Modrm|CheckOperandSize|No_bSuf|No_sSuf|VexVVVVDest|EVex128|EVexMap4, { Reg16|Reg32|Reg64|Unspecified|Word|Dword|Qword|BaseIndex, Reg16|Reg32|Reg64, Reg16|Reg32|Reg64 }
imul, 0x6b, i186, Modrm|CheckOperandSize|No_bSuf|No_sSuf, { Imm8S, Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
imul, 0x69, i186, Modrm|CheckOperandSize|No_bSuf|No_sSuf, { Imm16|Imm32|Imm32S, Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
// imul with 2 operands mimics imul with 3 by putting the register in
@@ -2126,10 +2132,10 @@ xstore, 0xfa7c0, PadLock, NoSuf|RepPrefixOk, {}
// Multy-precision Add Carry, rdseed instructions.
adcx, 0x660f38f6, ADX, Modrm|CheckOperandSize|IgnoreSize|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
adcx, 0x6666, ADX|APX_F, Modrm|CheckOperandSize|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|EVex128|EVexMap4, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
-adcx, 0x6666, ADX|APX_F, Modrm|CheckOperandSize|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|VexVVVVDest|EVex128|EVexMap4, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64, Reg32|Reg64 }
+adcx, 0x6666, ADX|APX_F, C|Modrm|CheckOperandSize|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|VexVVVVDest|EVex128|EVexMap4, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64, Reg32|Reg64 }
adox, 0xf30f38f6, ADX, Modrm|CheckOperandSize|IgnoreSize|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
adox, 0xf366, ADX|APX_F, Modrm|CheckOperandSize|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|EVex128|EVexMap4, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
-adox, 0xf366, ADX|APX_F, Modrm|CheckOperandSize|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|VexVVVVDest|EVex128|EVexMap4, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64, Reg32|Reg64 }
+adox, 0xf366, ADX|APX_F, C|Modrm|CheckOperandSize|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|VexVVVVDest|EVex128|EVexMap4, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64, Reg32|Reg64 }
rdseed, 0xfc7/7, RdSeed, Modrm|NoSuf, { Reg16|Reg32|Reg64 }
// SMAP instructions.
--
2.31.1
next prev parent reply other threads:[~2023-10-23 3:30 UTC|newest]
Thread overview: 84+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-09-19 15:25 [PATCH 0/8] [RFC] Support Intel APX EGPR Cui, Lili
2023-09-19 15:25 ` [PATCH 1/8] Support APX GPR32 with rex2 prefix Cui, Lili
2023-09-21 15:27 ` Jan Beulich
2023-09-27 15:57 ` Cui, Lili
2023-09-21 15:51 ` Jan Beulich
2023-09-27 15:59 ` Cui, Lili
2023-09-28 8:02 ` Jan Beulich
2023-10-07 3:27 ` Cui, Lili
2023-09-19 15:25 ` [PATCH 2/8] Support APX GPR32 with extend evex prefix Cui, Lili
2023-09-22 10:12 ` Jan Beulich
2023-10-17 15:48 ` Cui, Lili
2023-10-18 6:40 ` Jan Beulich
2023-10-18 10:44 ` Cui, Lili
2023-10-18 10:50 ` Jan Beulich
2023-09-22 10:50 ` Jan Beulich
2023-10-17 15:50 ` Cui, Lili
2023-10-17 16:11 ` Jan Beulich
2023-10-18 2:02 ` Cui, Lili
2023-10-18 6:10 ` Jan Beulich
2023-09-25 6:03 ` Jan Beulich
2023-10-17 15:52 ` Cui, Lili
2023-10-17 16:12 ` Jan Beulich
2023-10-18 6:31 ` Cui, Lili
2023-10-18 6:47 ` Jan Beulich
2023-10-18 7:52 ` Cui, Lili
2023-10-18 8:21 ` Jan Beulich
2023-10-18 11:30 ` Cui, Lili
2023-10-19 11:58 ` Cui, Lili
2023-10-19 15:24 ` Jan Beulich
2023-10-19 16:38 ` Cui, Lili
2023-10-20 6:25 ` Jan Beulich
2023-10-22 14:33 ` Cui, Lili
2023-09-19 15:25 ` [PATCH 3/8] Add tests for " Cui, Lili
2023-09-27 13:11 ` Jan Beulich
2023-10-17 15:53 ` FW: " Cui, Lili
2023-10-17 16:19 ` Jan Beulich
2023-10-18 2:32 ` Cui, Lili
2023-10-18 6:05 ` Jan Beulich
2023-10-18 7:16 ` Cui, Lili
2023-10-18 8:05 ` Jan Beulich
2023-10-18 11:26 ` Cui, Lili
2023-10-18 12:06 ` Jan Beulich
2023-10-25 16:03 ` Cui, Lili
2023-09-27 13:19 ` Jan Beulich
2023-09-19 15:25 ` [PATCH 4/8] Support APX NDD Cui, Lili
2023-09-27 14:44 ` Jan Beulich
2023-10-22 14:05 ` Cui, Lili
2023-10-23 7:12 ` Jan Beulich
2023-10-25 8:10 ` Cui, Lili
2023-10-25 8:47 ` Jan Beulich
2023-10-25 15:49 ` Cui, Lili
2023-10-25 15:59 ` Jan Beulich
2023-09-28 7:57 ` Jan Beulich
2023-10-22 14:57 ` Cui, Lili
2023-10-24 11:39 ` Cui, Lili
2023-10-24 11:58 ` Jan Beulich
2023-10-25 15:29 ` Cui, Lili
2023-09-19 15:25 ` [PATCH 5/8] Support APX NDD optimized encoding Cui, Lili
2023-09-28 9:29 ` Jan Beulich
2023-10-23 2:57 ` Hu, Lin1
2023-10-23 7:23 ` Jan Beulich
2023-10-23 7:50 ` Hu, Lin1
2023-10-23 8:15 ` Jan Beulich
2023-10-24 1:40 ` Hu, Lin1
2023-10-24 6:03 ` Jan Beulich
2023-10-24 6:08 ` Hu, Lin1
2023-10-23 3:07 ` [PATCH-V2] " Hu, Lin1
2023-10-23 3:30 ` Hu, Lin1 [this message]
2023-10-23 7:26 ` [PATCH 5/8] [v2] " Jan Beulich
2023-09-19 15:25 ` [PATCH 6/8] Support APX Push2/Pop2 Cui, Lili
2023-09-28 11:37 ` Jan Beulich
2023-10-30 15:21 ` Cui, Lili
2023-10-30 15:31 ` Jan Beulich
2023-11-20 13:05 ` Cui, Lili
2023-09-19 15:25 ` [PATCH 7/8] Support APX NF Cui, Lili
2023-09-25 6:07 ` Jan Beulich
2023-09-28 12:42 ` Jan Beulich
2023-11-02 10:15 ` Cui, Lili
2023-11-02 10:23 ` Jan Beulich
2023-11-02 10:46 ` Cui, Lili
2023-12-12 2:59 ` H.J. Lu
2023-09-19 15:25 ` [PATCH 8/8] Support APX JMPABS Cui, Lili
2023-09-28 13:11 ` Jan Beulich
2023-11-02 2:32 ` Hu, Lin1
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20231023033008.3256485-1-lin1.hu@intel.com \
--to=lin1.hu@intel.com \
--cc=JBeulich@suse.com \
--cc=binutils@sourceware.org \
--cc=hongjiu.lu@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).