From: Jan Beulich <jbeulich@suse.com>
To: "Cui, Lili" <lili.cui@intel.com>
Cc: hongjiu.lu@intel.com, konglin1 <lingling.kong@intel.com>,
binutils@sourceware.org
Subject: Re: [PATCH v3 6/9] Support APX NDD
Date: Fri, 8 Dec 2023 15:12:52 +0100 [thread overview]
Message-ID: <fb14d665-6aae-4b62-88cf-221b9788dcd3@suse.com> (raw)
In-Reply-To: <20231124070213.3886483-6-lili.cui@intel.com>
On 24.11.2023 08:02, Cui, Lili wrote:
> @@ -8870,25 +8890,33 @@ build_modrm_byte (void)
> || i.vec_encoding == vex_encoding_evex));
> }
>
> - for (v = source + 1; v < dest; ++v)
> - if (v != reg_slot)
> - break;
> - if (v >= dest)
> - v = ~0;
> - if (i.tm.extension_opcode != None)
> + if (i.tm.opcode_modifier.vexvvvv == VexVVVV_DST)
> {
> - if (dest != source)
> - v = dest;
> - dest = ~0;
> + v = dest;
> + dest-- ;
Nit: Stray blank.
> }
> - gas_assert (source < dest);
Starting from this line, do you really need to move that into the "else"
branch? It looks to me as it it could stay here. (Maybe I'm wrong with
the assertion itself, but ...
> - if (i.tm.opcode_modifier.operandconstraint == SWAP_SOURCES
> - && source != op)
... this entire if() pretty surely can stay as is, as there are no
templates with both DstVVVV and SwapSources afaict. (Thing is - as
before - that it isn't easy to see that what is happening here is
really just re-indentation. Iirc in an earlier version there actually
were hidden changes.) If you want this moved as an optimization,
please do so in a separate patch.
> --- a/gas/testsuite/gas/i386/x86-64-apx-evex-promoted-bad.d
> +++ b/gas/testsuite/gas/i386/x86-64-apx-evex-promoted-bad.d
> @@ -27,4 +27,6 @@ Disassembly of section .text:
> [ ]*[a-f0-9]+:[ ]+c8 ff ff ff[ ]+enter \$0xffff,\$0xff
> [ ]*[a-f0-9]+:[ ]+67 62 f2 7c 18 f5[ ]+addr32 \(bad\)
> [ ]*[a-f0-9]+:[ ]+0b ff[ ]+or %edi,%edi
> +[ ]*[a-f0-9]+:[ ]+62 f4 fc 08 ff[ ]+\(bad\)
> +[ ]*[a-f0-9]+:[ ]+d8[ ]+.byte 0xd8
> #pass
> --- a/gas/testsuite/gas/i386/x86-64-apx-evex-promoted-bad.s
> +++ b/gas/testsuite/gas/i386/x86-64-apx-evex-promoted-bad.s
> @@ -26,3 +26,5 @@ _start:
> #EVEX from VEX bzhi %ebx,%eax,%ecx EVEX.P[20](EVEX.b) == 1 (illegal value).
> .insn EVEX.L0.NP.0f38.W0 0xf5, %eax ,(%ebx){1to8}, %ecx
> .byte 0xff
> + #{evex} inc %rax %rbx EVEX.vvvv' != 1111 && EVEX.ND = 0.
> + .insn EVEX.L0.NP.M4.W1 0xff, %rax, %rbx
I don't think this does what you want. In the .d file the 4 bits are
all set. I think you mean something like
.insn EVEX.L0.NP.M4.W1 0xff/0, %rcx, %rbx
(i.e. ModR/M.reg specified as opcode extension _and_ the first operand
not the accumulator). The reason disassembly fails for what you've used
looks to be ModR/M.reg == 0b011 (resulting from the use of %rbx).
(Also, nit: What's EVEX.vvvv' ? I.e. what's the ' there about?)
> --- /dev/null
> +++ b/gas/testsuite/gas/i386/x86-64-apx-ndd.s
> @@ -0,0 +1,155 @@
> +# Check 64bit APX NDD instructions with evex prefix encoding
> +
> + .allow_index_reg
> + .text
> +_start:
> + adc $0x1234,%ax,%r30w
> + adc %r15b,%r17b,%r18b
> + adc %r15d,(%r8),%r18d
> + adc (%r15,%rax,1),%r16b,%r8b
> + adc (%r15,%rax,1),%r16w,%r8w
> + adcl $0x11,(%r19,%rax,4),%r20d
> + adcx %r15d,%r8d,%r18d
> + adcx (%r15,%r31,1),%r8
> + adcx (%r15,%r31,1),%r8d,%r18d
> + add $0x1234,%ax,%r30w
> + add $0x12344433,%r15,%r16
> + add $0x34,%r13b,%r17b
> + add $0xfffffffff4332211,%rax,%r8
> + add %r31,%r8,%r16
> + add %r31,(%r8),%r16
> + add %r31,(%r8,%r16,8),%r16
> + add %r31b,%r8b,%r16b
> + add %r31d,%r8d,%r16d
> + add %r31w,%r8w,%r16w
> + add (%r31),%r8,%r16
> + add 0x9090(%r31,%r16,1),%r8,%r16
> + addb %r31b,%r8b,%r16b
> + addl %r31d,%r8d,%r16d
> + addl $0x11,(%r19,%rax,4),%r20d
> + addq %r31,%r8,%r16
> + addq $0x12344433,(%r15,%rcx,4),%r16
> + addw %r31w,%r8w,%r16w
> + adox %r15d,%r8d,%r18d
Nit: Inconsistent blank padding.
> + {load} add %r31,%r8,%r16
> + {store} add %r31,%r8,%r16
> + adox (%r15,%r31,1),%r8
> + adox (%r15,%r31,1),%r8d,%r18d
> + and $0x1234,%ax,%r30w
> + and %r15b,%r17b,%r18b
> + and %r15d,(%r8),%r18d
> + and (%r15,%rax,1),%r16b,%r8b
> + and (%r15,%rax,1),%r16w,%r8w
> + andl $0x11,(%r19,%rax,4),%r20d
> + cmova 0x90909090(%eax),%edx,%r8d
> + cmovae 0x90909090(%eax),%edx,%r8d
> + cmovb 0x90909090(%eax),%edx,%r8d
> + cmovbe 0x90909090(%eax),%edx,%r8d
> + cmove 0x90909090(%eax),%edx,%r8d
> + cmovg 0x90909090(%eax),%edx,%r8d
> + cmovge 0x90909090(%eax),%edx,%r8d
> + cmovl 0x90909090(%eax),%edx,%r8d
> + cmovle 0x90909090(%eax),%edx,%r8d
> + cmovne 0x90909090(%eax),%edx,%r8d
> + cmovno 0x90909090(%eax),%edx,%r8d
> + cmovnp 0x90909090(%eax),%edx,%r8d
> + cmovns 0x90909090(%eax),%edx,%r8d
> + cmovo 0x90909090(%eax),%edx,%r8d
> + cmovp 0x90909090(%eax),%edx,%r8d
> + cmovs 0x90909090(%eax),%edx,%r8d
> + dec %rax,%r17
> + decb (%r31,%r12,1),%r8b
> + imul 0x909(%rax,%r31,8),%rdx,%r25
> + imul 0x90909(%eax),%edx,%r8d
> + inc %r31,%r16
> + inc %r31,%r8
> + inc %rax,%rbx
> + neg %rax,%r17
> + negb (%r31,%r12,1),%r8b
> + not %rax,%r17
> + notb (%r31,%r12,1),%r8b
> + or $0x1234,%ax,%r30w
> + or %r15b,%r17b,%r18b
> + or %r15d,(%r8),%r18d
> + or (%r15,%rax,1),%r16b,%r8b
> + or (%r15,%rax,1),%r16w,%r8w
> + orl $0x11,(%r19,%rax,4),%r20d
> + rcl $0x2,%r12b,%r31b
> + rcl %cl,%r16b,%r8b
> + rclb $0x1, (%rax),%r31b
> + rcll $0x2,(%rax),%r31d
> + rclw $0x1, (%rax),%r31w
Nit: Would be nice if there consistently were or were not blanks after
the commas.
> --- a/opcodes/i386-opc.tbl
> +++ b/opcodes/i386-opc.tbl
> @@ -139,9 +139,13 @@
> #define Vsz256 Vsz=VSZ256
> #define Vsz512 Vsz=VSZ512
>
> +#define DstVVVV VexVVVV=VexVVVV_DST
> +
> // The EVEX purpose of StaticRounding appears only together with SAE. Re-use
> // the bit to mark commutative VEX encodings where swapping the source
> // operands may allow to switch from 3-byte to 2-byte VEX encoding.
> +// And re-use the bit to mark some NDD insns that swapping the source operands
> +// may allow to switch from EVEX encoding to REX2 encoding.
> #define C StaticRounding
>
> #define FP 387|287|8087
> @@ -288,26 +292,40 @@ std, 0xfd, 0, NoSuf, {}
> sti, 0xfb, 0, NoSuf, {}
>
> // Arithmetic.
> +add, 0x0, APX_F, D|C|W|CheckOperandSize|Modrm|No_sSuf|DstVVVV|EVex128|EVexMap4|NF, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64 }
There is _still_ Byte|Word|Dword|Qword in here (and below), when I think I
pointed out more than once before that in new templates such redundancy
wants omitting.
Since this isn't the first instance of earlier review comments not taken
care of, may I please ask that you make reasonably sure that new versions
aren't sent out like this?
> add, 0x0, 0, D|W|CheckOperandSize|Modrm|No_sSuf|HLEPrefixLock, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
> +add, 0x83/0, APX_F, Modrm|CheckOperandSize|No_bSuf|No_sSuf|DstVVVV|EVex128|EVexMap4|NF, { Imm8S, Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
> add, 0x83/0, 0, Modrm|No_bSuf|No_sSuf|HLEPrefixLock, { Imm8S, Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex }
> add, 0x4, 0, W|No_sSuf, { Imm8|Imm16|Imm32|Imm32S, Acc|Byte|Word|Dword|Qword }
> +add, 0x80/0, APX_F, W|Modrm|CheckOperandSize|No_sSuf|DstVVVV|EVex128|EVexMap4|NF, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64}
> add, 0x80/0, 0, W|Modrm|No_sSuf|HLEPrefixLock, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
>
> inc, 0x40, No64, No_bSuf|No_sSuf|No_qSuf, { Reg16|Reg32 }
> +inc, 0xfe/0, APX_F, W|Modrm|No_sSuf|CheckOperandSize|DstVVVV|EVex128|EVexMap4|NF, {Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64}
> inc, 0xfe/0, 0, W|Modrm|No_sSuf|HLEPrefixLock, { Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
>
> +sub, 0x28, APX_F, D|W|CheckOperandSize|Modrm|No_sSuf|DstVVVV|EVex128|EVexMap4|Optimize|NF, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64, }
Here and elsewhere, what's Optimize for? It not being there on other templates,
it can't be for the EVEX->REX2 optimization? If there are further optimization
plans, that's (again) something to mention in the description. Yet better would
be if such attributes were added only when respective optimizations are actually
introduced. Unlike e.g. NF, which would mean another bulk update if not added
right away, new optimizations typically affect only a few templates at a time.
> sub, 0x28, 0, D|W|CheckOperandSize|Modrm|No_sSuf|HLEPrefixLock|Optimize, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
> +sub, 0x83/5, APX_F, Modrm|No_bSuf|No_sSuf|DstVVVV|EVex128|EVexMap4|NF, { Imm8S, Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
> sub, 0x83/5, 0, Modrm|No_bSuf|No_sSuf|HLEPrefixLock, { Imm8S, Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex }
> sub, 0x2c, 0, W|No_sSuf, { Imm8|Imm16|Imm32|Imm32S, Acc|Byte|Word|Dword|Qword }
> +sub, 0x80/5, APX_F, W|Modrm|CheckOperandSize|No_sSuf|DstVVVV|EVex128|EVexMap4|NF, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64 }
> sub, 0x80/5, 0, W|Modrm|No_sSuf|HLEPrefixLock, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
There are still only 3 new templates here (and also above for add, plus for
other similar insns), when ...
> dec, 0x48, No64, No_bSuf|No_sSuf|No_qSuf, { Reg16|Reg32 }
> +dec, 0xfe/1, APX_F, W|Modrm|CheckOperandSize|No_sSuf|DstVVVV|EVex128|EVexMap4|NF, { Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64 }
> dec, 0xfe/1, 0, W|Modrm|No_sSuf|HLEPrefixLock, { Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
>
> +sbb, 0x18, APX_F, D|W|CheckOperandSize|Modrm|No_sSuf|DstVVVV|EVex128|EVexMap4, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64 }
> sbb, 0x18, 0, D|W|CheckOperandSize|Modrm|No_sSuf|HLEPrefixLock, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
> +sbb, 0x18, APX_F, D|W|CheckOperandSize|Modrm|EVex128|EVexMap4|No_sSuf, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
> +sbb, 0x83/3, APX_F, Modrm|CheckOperandSize|No_bSuf|No_sSuf|DstVVVV|EVex128|EVexMap4, { Imm8S, Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
> sbb, 0x83/3, 0, Modrm|No_bSuf|No_sSuf|HLEPrefixLock, { Imm8S, Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex }
> +sbb, 0x83/3, APX_F, Modrm|EVex128|EVexMap4|No_bSuf|No_sSuf, { Imm8S, Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex }
> sbb, 0x1c, 0, W|No_sSuf, { Imm8|Imm16|Imm32|Imm32S, Acc|Byte|Word|Dword|Qword }
> +sbb, 0x80/3, APX_F, W|Modrm|CheckOperandSize|No_sSuf|DstVVVV|EVex128|EVexMap4, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64 }
> sbb, 0x80/3, 0, W|Modrm|No_sSuf|HLEPrefixLock, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
> +sbb, 0x80/3, APX_F, W|Modrm|EVex128|EVexMap4|No_sSuf, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
... there are 6 new templates here. This is again an aspect I had pointed
out before. You cannot defer the addition of the other 3 until the NF patch,
as you want to make sure that with just this patch in place something both
{evex} sbb %eax, %eax
and
{evex} sub %eax, %eax
actually assemble, and to EVEX encodings. I can't see how that would work
in the latter case without those further templates.
The alternative is to also defer adding the 2-operand SBB templates (and
any others you add here which don't use DstVVVV).
> cmp, 0x38, 0, D|W|CheckOperandSize|Modrm|No_sSuf, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
> cmp, 0x83/7, 0, Modrm|No_bSuf|No_sSuf, { Imm8S, Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex }
> @@ -318,31 +336,50 @@ test, 0x84, 0, D|W|C|CheckOperandSize|Modrm|No_sSuf, { Reg8|Reg16|Reg32|Reg64, R
> test, 0xa8, 0, W|No_sSuf|Optimize, { Imm8|Imm16|Imm32|Imm32S, Acc|Byte|Word|Dword|Qword }
> test, 0xf6/0, 0, W|Modrm|No_sSuf|Optimize, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
>
> +and, 0x20, APX_F, D|C|W|CheckOperandSize|Modrm|No_sSuf|DstVVVV|EVex128|EVexMap4|NF|Optimize, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64 }
> and, 0x20, 0, D|W|CheckOperandSize|Modrm|No_sSuf|HLEPrefixLock|Optimize, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
> +and, 0x83/4, APX_F, Modrm|CheckOperandSize|No_bSuf|No_sSuf|DstVVVV|EVex128|EVexMap4|NF|Optimize, { Imm8S, Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
> and, 0x83/4, 0, Modrm|No_bSuf|No_sSuf|HLEPrefixLock|Optimize, { Imm8S, Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex }
> and, 0x24, 0, W|No_sSuf|Optimize, { Imm8|Imm16|Imm32|Imm32S, Acc|Byte|Word|Dword|Qword }
> +and, 0x80/4, APX_F, W|Modrm|CheckOperandSize|No_sSuf|DstVVVV|EVex128|EVexMap4|NF|Optimize, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64 }
> and, 0x80/4, 0, W|Modrm|No_sSuf|HLEPrefixLock|Optimize, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
>
> +or, 0x8, APX_F, D|C|W|CheckOperandSize|Modrm|No_sSuf|DstVVVV|EVex128|EVexMap4|NF|Optimize, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64 }
> or, 0x8, 0, D|W|CheckOperandSize|Modrm|No_sSuf|HLEPrefixLock|Optimize, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
> +or, 0x83/1, APX_F, Modrm|CheckOperandSize|No_bSuf|No_sSuf|DstVVVV|EVex128|EVexMap4|NF, { Imm8S, Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
> or, 0x83/1, 0, Modrm|No_bSuf|No_sSuf|HLEPrefixLock, { Imm8S, Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex }
> or, 0xc, 0, W|No_sSuf, { Imm8|Imm16|Imm32|Imm32S, Acc|Byte|Word|Dword|Qword }
> +or, 0x80/1, APX_F, W|Modrm|CheckOperandSize|No_sSuf|DstVVVV|EVex128|EVexMap4|NF, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64 }
> or, 0x80/1, 0, W|Modrm|No_sSuf|HLEPrefixLock, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
>
> +xor, 0x30, APX_F, D|C|W|CheckOperandSize|Modrm|No_sSuf|DstVVVV|EVex128|EVexMap4|NF|Optimize, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64 }
> xor, 0x30, 0, D|W|CheckOperandSize|Modrm|No_sSuf|HLEPrefixLock|Optimize, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
> +xor, 0x83/6, APX_F, Modrm|CheckOperandSize|No_bSuf|No_sSuf|DstVVVV|EVex128|EVexMap4|NF, { Imm8S, Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
> xor, 0x83/6, 0, Modrm|No_bSuf|No_sSuf|HLEPrefixLock, { Imm8S, Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex }
> xor, 0x34, 0, W|No_sSuf, { Imm8|Imm16|Imm32|Imm32S, Acc|Byte|Word|Dword|Qword }
> +xor, 0x80/6, APX_F, W|Modrm|CheckOperandSize|No_sSuf|DstVVVV|EVex128|EVexMap4|NF, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64 }
> xor, 0x80/6, 0, W|Modrm|No_sSuf|HLEPrefixLock, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
>
> // clr with 1 operand is really xor with 2 operands.
> clr, 0x30, 0, W|Modrm|No_sSuf|RegKludge|Optimize, { Reg8|Reg16|Reg32|Reg64 }
Btw., for consistency this may also want accompanying with an EVEX counterpart.
> +adc, 0x10, APX_F, D|C|W|CheckOperandSize|Modrm|No_sSuf|DstVVVV|EVex128|EVexMap4, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64 }
> adc, 0x10, 0, D|W|CheckOperandSize|Modrm|No_sSuf|HLEPrefixLock, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
> +adc, 0x10, APX_F, D|W|CheckOperandSize|Modrm|EVex128|EVexMap4|No_sSuf, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
> +adc, 0x83/2, APX_F, Modrm|CheckOperandSize|No_bSuf|No_sSuf|DstVVVV|EVex128|EVexMap4, { Imm8S, Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
> adc, 0x83/2, 0, Modrm|No_bSuf|No_sSuf|HLEPrefixLock, { Imm8S, Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex }
> +adc, 0x83/2, APX_F, Modrm|EVex128|EVexMap4|No_bSuf|No_sSuf, { Imm8S, Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex }
> adc, 0x14, 0, W|No_sSuf, { Imm8|Imm16|Imm32|Imm32S, Acc|Byte|Word|Dword|Qword }
> +adc, 0x80/2, APX_F, W|Modrm|CheckOperandSize|No_sSuf|DstVVVV|EVex128|EVexMap4, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64 }
> adc, 0x80/2, 0, W|Modrm|No_sSuf|HLEPrefixLock, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
> +adc, 0x80/2, APX_F, W|Modrm|EVex128|EVexMap4|No_sSuf, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
>
> +neg, 0xf6/3, APX_F, W|Modrm|CheckOperandSize|No_sSuf|DstVVVV|EVex128|EVexMap4|NF, { Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64 }
> neg, 0xf6/3, 0, W|Modrm|No_sSuf|HLEPrefixLock, { Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
> +
> +not, 0xf6/2, APX_F, W|Modrm|CheckOperandSize|No_sSuf|DstVVVV|EVex128|EVexMap4, { Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64 }
> not, 0xf6/2, 0, W|Modrm|No_sSuf|HLEPrefixLock, { Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
> +not, 0xf6/2, APX_F, W|Modrm|No_sSuf|EVex128|EVexMap4, { Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
>
> aaa, 0x37, No64, NoSuf, {}
> aas, 0x3f, No64, NoSuf, {}
> @@ -375,6 +412,7 @@ cqto, 0x99, x64, Size64|NoSuf, {}
> // These multiplies can only be selected with single operand forms.
> mul, 0xf6/4, 0, W|Modrm|No_sSuf, { Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
> imul, 0xf6/5, 0, W|Modrm|No_sSuf, { Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
> +imul, 0xaf, APX_F, C|Modrm|CheckOperandSize|No_bSuf|No_sSuf|DstVVVV|EVex128|EVexMap4, { Reg16|Reg32|Reg64|Unspecified|Word|Dword|Qword|BaseIndex, Reg16|Reg32|Reg64, Reg16|Reg32|Reg64 }
Missing NF?
> imul, 0xfaf, i386, Modrm|CheckOperandSize|No_bSuf|No_sSuf, { Reg16|Reg32|Reg64|Unspecified|Word|Dword|Qword|BaseIndex, Reg16|Reg32|Reg64 }
> imul, 0x6b, i186, Modrm|CheckOperandSize|No_bSuf|No_sSuf, { Imm8S, Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
> imul, 0x69, i186, Modrm|CheckOperandSize|No_bSuf|No_sSuf, { Imm16|Imm32|Imm32S, Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
> @@ -389,52 +427,98 @@ div, 0xf6/6, 0, W|CheckOperandSize|Modrm|No_sSuf, { Reg8|Reg16|Reg32|Reg64|Byte|
> idiv, 0xf6/7, 0, W|Modrm|No_sSuf, { Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
> idiv, 0xf6/7, 0, W|CheckOperandSize|Modrm|No_sSuf, { Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex, Acc|Byte|Word|Dword|Qword }
>
> +rol, 0xd0/0, APX_F, W|Modrm|No_sSuf|CheckOperandSize|DstVVVV|EVex128|EVexMap4|NF, { Imm1, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64 }
> rol, 0xd0/0, 0, W|Modrm|No_sSuf, { Imm1, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
> +rol, 0xc0/0, APX_F, W|Modrm|No_sSuf|CheckOperandSize|DstVVVV|EVex128|EVexMap4|NF, { Imm8|Imm8S, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64 }
> rol, 0xc0/0, i186, W|Modrm|No_sSuf, { Imm8|Imm8S, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
> +rol, 0xd2/0, APX_F, W|Modrm|No_sSuf|CheckOperandSize|DstVVVV|EVex128|EVexMap4|NF, { ShiftCount, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64 }
> rol, 0xd2/0, 0, W|Modrm|No_sSuf, { ShiftCount, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
> +rol, 0xd0/0, APX_F, W|Modrm|No_sSuf|CheckOperandSize|DstVVVV|EVex128|EVexMap4|NF, { Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64 }
Didn't we agree to avoid adding this (and its sibling) template, for the omitted
shift count being ambiguous? Consider
rol %cl, %al
Is this a rotate by %cl, or a 1-bit NDD rotate?
Jan
next prev parent reply other threads:[~2023-12-08 14:13 UTC|newest]
Thread overview: 69+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-11-24 7:02 [PATCH 1/9] Make const_1_mode print $1 in AT&T syntax Cui, Lili
2023-11-24 7:02 ` [PATCH v3 2/9] Support APX GPR32 with rex2 prefix Cui, Lili
2023-12-04 16:30 ` Jan Beulich
2023-12-05 13:31 ` Cui, Lili
2023-12-06 7:52 ` Jan Beulich
2023-12-06 12:43 ` Cui, Lili
2023-12-07 9:01 ` Jan Beulich
2023-12-08 3:10 ` Cui, Lili
2023-11-24 7:02 ` [PATCH v3 3/9] Created an empty EVEX_MAP4_ sub-table for EVEX instructions Cui, Lili
2023-11-24 7:02 ` [PATCH v3 4/9] Support APX GPR32 with extend evex prefix Cui, Lili
2023-12-07 12:38 ` Jan Beulich
2023-12-08 15:21 ` Cui, Lili
2023-12-11 8:34 ` Jan Beulich
2023-12-12 10:44 ` Cui, Lili
2023-12-12 11:16 ` Jan Beulich
2023-12-12 12:32 ` Cui, Lili
2023-12-12 12:39 ` Jan Beulich
2023-12-12 13:15 ` Cui, Lili
2023-12-12 14:13 ` Jan Beulich
2023-12-13 7:36 ` Cui, Lili
2023-12-13 7:48 ` Jan Beulich
2023-12-12 12:58 ` Cui, Lili
2023-12-12 14:04 ` Jan Beulich
2023-12-13 8:35 ` Cui, Lili
2023-12-13 9:13 ` Jan Beulich
2023-12-07 13:34 ` Jan Beulich
2023-12-11 6:16 ` Cui, Lili
2023-12-11 8:43 ` Jan Beulich
2023-12-11 11:50 ` Jan Beulich
2023-11-24 7:02 ` [PATCH v3 5/9] Add tests for " Cui, Lili
2023-12-07 14:05 ` Jan Beulich
2023-12-11 6:16 ` Cui, Lili
2023-12-11 8:55 ` Jan Beulich
2023-11-24 7:02 ` [PATCH v3 6/9] Support APX NDD Cui, Lili
2023-12-08 14:12 ` Jan Beulich [this message]
2023-12-11 13:36 ` Cui, Lili
2023-12-11 16:50 ` Jan Beulich
2023-12-13 10:42 ` Cui, Lili
2024-03-22 10:02 ` Jan Beulich
2024-03-22 10:31 ` Jan Beulich
2024-03-26 2:04 ` Cui, Lili
2024-03-26 7:06 ` Jan Beulich
2024-03-26 7:18 ` Cui, Lili
2024-03-22 10:59 ` Jan Beulich
2024-03-26 8:22 ` Cui, Lili
2024-03-26 9:30 ` Jan Beulich
2024-03-27 2:41 ` Cui, Lili
2023-12-08 14:27 ` Jan Beulich
2023-12-12 5:53 ` Cui, Lili
2023-12-12 8:28 ` Jan Beulich
2023-11-24 7:02 ` [PATCH v3 7/9] Support APX Push2/Pop2 Cui, Lili
2023-12-11 11:17 ` Jan Beulich
2023-12-15 8:38 ` Cui, Lili
2023-12-15 8:44 ` Jan Beulich
2023-11-24 7:02 ` [PATCH v3 8/9] Support APX NDD optimized encoding Cui, Lili
2023-12-11 12:27 ` Jan Beulich
2023-12-12 3:18 ` Hu, Lin1
2023-12-12 8:41 ` Jan Beulich
2023-12-13 5:31 ` Hu, Lin1
2023-12-12 8:45 ` Jan Beulich
2023-12-13 6:06 ` Hu, Lin1
2023-12-13 8:19 ` Jan Beulich
2023-12-13 8:34 ` Hu, Lin1
2023-11-24 7:02 ` [PATCH v3 9/9] Support APX JMPABS for disassembler Cui, Lili
2023-11-24 7:09 ` [PATCH 1/9] Make const_1_mode print $1 in AT&T syntax Jan Beulich
2023-11-24 11:22 ` Cui, Lili
2023-11-24 12:14 ` Jan Beulich
2023-12-12 2:57 ` Lu, Hongjiu
2023-12-12 8:16 ` Cui, Lili
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=fb14d665-6aae-4b62-88cf-221b9788dcd3@suse.com \
--to=jbeulich@suse.com \
--cc=binutils@sourceware.org \
--cc=hongjiu.lu@intel.com \
--cc=lili.cui@intel.com \
--cc=lingling.kong@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).