Re: [PATCH v3 6/9] Support APX NDD

public inbox for binutils@sourceware.org
 help / color / mirror / Atom feed

From: Jan Beulich <jbeulich@suse.com>
To: "Cui, Lili" <lili.cui@intel.com>
Cc: hongjiu.lu@intel.com, konglin1 <lingling.kong@intel.com>,
	binutils@sourceware.org
Subject: Re: [PATCH v3 6/9] Support APX NDD
Date: Fri, 8 Dec 2023 15:12:52 +0100	[thread overview]
Message-ID: <fb14d665-6aae-4b62-88cf-221b9788dcd3@suse.com> (raw)
In-Reply-To: <20231124070213.3886483-6-lili.cui@intel.com>

On 24.11.2023 08:02, Cui, Lili wrote:
> @@ -8870,25 +8890,33 @@ build_modrm_byte (void)
>  				     || i.vec_encoding == vex_encoding_evex));
>      }
>  
> -  for (v = source + 1; v < dest; ++v)
> -    if (v != reg_slot)
> -      break;
> -  if (v >= dest)
> -    v = ~0;
> -  if (i.tm.extension_opcode != None)
> +  if (i.tm.opcode_modifier.vexvvvv == VexVVVV_DST)
>      {
> -      if (dest != source)
> -	v = dest;
> -      dest = ~0;
> +      v = dest;
> +      dest-- ;

Nit: Stray blank.

>      }
> -  gas_assert (source < dest);

Starting from this line, do you really need to move that into the "else"
branch? It looks to me as it it could stay here. (Maybe I'm wrong with
the assertion itself, but ...

> -  if (i.tm.opcode_modifier.operandconstraint == SWAP_SOURCES
> -      && source != op)

... this entire if() pretty surely can stay as is, as there are no
templates with both DstVVVV and SwapSources afaict. (Thing is - as
before - that it isn't easy to see that what is happening here is
really just re-indentation. Iirc in an earlier version there actually
were hidden changes.) If you want this moved as an optimization,
please do so in a separate patch.

> --- a/gas/testsuite/gas/i386/x86-64-apx-evex-promoted-bad.d
> +++ b/gas/testsuite/gas/i386/x86-64-apx-evex-promoted-bad.d
> @@ -27,4 +27,6 @@ Disassembly of section .text:
>  [ 	]*[a-f0-9]+:[ 	]+c8 ff ff ff[ 	]+enter  \$0xffff,\$0xff
>  [ 	]*[a-f0-9]+:[ 	]+67 62 f2 7c 18 f5[ 	]+addr32 \(bad\)
>  [ 	]*[a-f0-9]+:[ 	]+0b ff[ 	]+or     %edi,%edi
> +[ 	]*[a-f0-9]+:[ 	]+62 f4 fc 08 ff[ 	]+\(bad\)
> +[ 	]*[a-f0-9]+:[ 	]+d8[ 	]+.byte 0xd8
>  #pass
> --- a/gas/testsuite/gas/i386/x86-64-apx-evex-promoted-bad.s
> +++ b/gas/testsuite/gas/i386/x86-64-apx-evex-promoted-bad.s
> @@ -26,3 +26,5 @@ _start:
>  	#EVEX from VEX bzhi %ebx,%eax,%ecx EVEX.P[20](EVEX.b) == 1 (illegal value).
>  	.insn EVEX.L0.NP.0f38.W0 0xf5, %eax ,(%ebx){1to8}, %ecx
>  	.byte 0xff
> +	#{evex} inc %rax %rbx EVEX.vvvv' != 1111 && EVEX.ND = 0.
> +	.insn EVEX.L0.NP.M4.W1 0xff, %rax, %rbx

I don't think this does what you want. In the .d file the 4 bits are
all set. I think you mean something like

	.insn EVEX.L0.NP.M4.W1 0xff/0, %rcx, %rbx

(i.e. ModR/M.reg specified as opcode extension _and_ the first operand
not the accumulator). The reason disassembly fails for what you've used
looks to be ModR/M.reg == 0b011 (resulting from the use of %rbx).

(Also, nit: What's EVEX.vvvv' ? I.e. what's the ' there about?)

> --- /dev/null
> +++ b/gas/testsuite/gas/i386/x86-64-apx-ndd.s
> @@ -0,0 +1,155 @@
> +# Check 64bit APX NDD instructions with evex prefix encoding
> +
> +	.allow_index_reg
> +	.text
> +_start:
> +	adc    $0x1234,%ax,%r30w
> +	adc    %r15b,%r17b,%r18b
> +	adc    %r15d,(%r8),%r18d
> +	adc    (%r15,%rax,1),%r16b,%r8b
> +	adc    (%r15,%rax,1),%r16w,%r8w
> +	adcl   $0x11,(%r19,%rax,4),%r20d
> +	adcx   %r15d,%r8d,%r18d
> +	adcx   (%r15,%r31,1),%r8
> +	adcx   (%r15,%r31,1),%r8d,%r18d
> +	add    $0x1234,%ax,%r30w
> +	add    $0x12344433,%r15,%r16
> +	add    $0x34,%r13b,%r17b
> +	add    $0xfffffffff4332211,%rax,%r8
> +	add    %r31,%r8,%r16
> +	add    %r31,(%r8),%r16
> +	add    %r31,(%r8,%r16,8),%r16
> +	add    %r31b,%r8b,%r16b
> +	add    %r31d,%r8d,%r16d
> +	add    %r31w,%r8w,%r16w
> +	add    (%r31),%r8,%r16
> +	add    0x9090(%r31,%r16,1),%r8,%r16
> +	addb    %r31b,%r8b,%r16b
> +	addl    %r31d,%r8d,%r16d
> +	addl   $0x11,(%r19,%rax,4),%r20d
> +	addq    %r31,%r8,%r16
> +	addq   $0x12344433,(%r15,%rcx,4),%r16
> +	addw    %r31w,%r8w,%r16w
> +	adox   %r15d,%r8d,%r18d

Nit: Inconsistent blank padding.

> +	{load}  add    %r31,%r8,%r16
> +	{store} add    %r31,%r8,%r16
> +	adox   (%r15,%r31,1),%r8
> +	adox   (%r15,%r31,1),%r8d,%r18d
> +	and    $0x1234,%ax,%r30w
> +	and    %r15b,%r17b,%r18b
> +	and    %r15d,(%r8),%r18d
> +	and    (%r15,%rax,1),%r16b,%r8b
> +	and    (%r15,%rax,1),%r16w,%r8w
> +	andl   $0x11,(%r19,%rax,4),%r20d
> +	cmova  0x90909090(%eax),%edx,%r8d
> +	cmovae 0x90909090(%eax),%edx,%r8d
> +	cmovb  0x90909090(%eax),%edx,%r8d
> +	cmovbe 0x90909090(%eax),%edx,%r8d
> +	cmove  0x90909090(%eax),%edx,%r8d
> +	cmovg  0x90909090(%eax),%edx,%r8d
> +	cmovge 0x90909090(%eax),%edx,%r8d
> +	cmovl  0x90909090(%eax),%edx,%r8d
> +	cmovle 0x90909090(%eax),%edx,%r8d
> +	cmovne 0x90909090(%eax),%edx,%r8d
> +	cmovno 0x90909090(%eax),%edx,%r8d
> +	cmovnp 0x90909090(%eax),%edx,%r8d
> +	cmovns 0x90909090(%eax),%edx,%r8d
> +	cmovo  0x90909090(%eax),%edx,%r8d
> +	cmovp  0x90909090(%eax),%edx,%r8d
> +	cmovs  0x90909090(%eax),%edx,%r8d
> +	dec    %rax,%r17
> +	decb   (%r31,%r12,1),%r8b
> +	imul   0x909(%rax,%r31,8),%rdx,%r25
> +	imul   0x90909(%eax),%edx,%r8d
> +	inc    %r31,%r16
> +	inc    %r31,%r8
> +	inc    %rax,%rbx
> +	neg    %rax,%r17
> +	negb   (%r31,%r12,1),%r8b
> +	not    %rax,%r17
> +	notb   (%r31,%r12,1),%r8b
> +	or     $0x1234,%ax,%r30w
> +	or     %r15b,%r17b,%r18b
> +	or     %r15d,(%r8),%r18d
> +	or     (%r15,%rax,1),%r16b,%r8b
> +	or     (%r15,%rax,1),%r16w,%r8w
> +	orl    $0x11,(%r19,%rax,4),%r20d
> +	rcl    $0x2,%r12b,%r31b
> +	rcl    %cl,%r16b,%r8b
> +	rclb   $0x1, (%rax),%r31b
> +	rcll   $0x2,(%rax),%r31d
> +	rclw   $0x1, (%rax),%r31w

Nit: Would be nice if there consistently were or were not blanks after
the commas.

> --- a/opcodes/i386-opc.tbl
> +++ b/opcodes/i386-opc.tbl
> @@ -139,9 +139,13 @@
>  #define Vsz256 Vsz=VSZ256
>  #define Vsz512 Vsz=VSZ512
>  
> +#define DstVVVV VexVVVV=VexVVVV_DST
> +
>  // The EVEX purpose of StaticRounding appears only together with SAE. Re-use
>  // the bit to mark commutative VEX encodings where swapping the source
>  // operands may allow to switch from 3-byte to 2-byte VEX encoding.
> +// And re-use the bit to mark some NDD insns that swapping the source operands
> +// may allow to switch from EVEX encoding to REX2 encoding.
>  #define C StaticRounding
>  
>  #define FP 387|287|8087
> @@ -288,26 +292,40 @@ std, 0xfd, 0, NoSuf, {}
>  sti, 0xfb, 0, NoSuf, {}
>  
>  // Arithmetic.
> +add, 0x0, APX_F, D|C|W|CheckOperandSize|Modrm|No_sSuf|DstVVVV|EVex128|EVexMap4|NF, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64 }

There is _still_ Byte|Word|Dword|Qword in here (and below), when I think I
pointed out more than once before that in new templates such redundancy
wants omitting.

Since this isn't the first instance of earlier review comments not taken
care of, may I please ask that you make reasonably sure that new versions
aren't sent out like this?

>  add, 0x0, 0, D|W|CheckOperandSize|Modrm|No_sSuf|HLEPrefixLock, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
> +add, 0x83/0, APX_F, Modrm|CheckOperandSize|No_bSuf|No_sSuf|DstVVVV|EVex128|EVexMap4|NF, { Imm8S, Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
>  add, 0x83/0, 0, Modrm|No_bSuf|No_sSuf|HLEPrefixLock, { Imm8S, Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex }
>  add, 0x4, 0, W|No_sSuf, { Imm8|Imm16|Imm32|Imm32S, Acc|Byte|Word|Dword|Qword }
> +add, 0x80/0, APX_F, W|Modrm|CheckOperandSize|No_sSuf|DstVVVV|EVex128|EVexMap4|NF, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64}
>  add, 0x80/0, 0, W|Modrm|No_sSuf|HLEPrefixLock, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
>  
>  inc, 0x40, No64, No_bSuf|No_sSuf|No_qSuf, { Reg16|Reg32 }
> +inc, 0xfe/0, APX_F, W|Modrm|No_sSuf|CheckOperandSize|DstVVVV|EVex128|EVexMap4|NF, {Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64}
>  inc, 0xfe/0, 0, W|Modrm|No_sSuf|HLEPrefixLock, { Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
>  
> +sub, 0x28, APX_F, D|W|CheckOperandSize|Modrm|No_sSuf|DstVVVV|EVex128|EVexMap4|Optimize|NF, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64, }

Here and elsewhere, what's Optimize for? It not being there on other templates,
it can't be for the EVEX->REX2 optimization? If there are further optimization
plans, that's (again) something to mention in the description. Yet better would
be if such attributes were added only when respective optimizations are actually
introduced. Unlike e.g. NF, which would mean another bulk update if not added
right away, new optimizations typically affect only a few templates at a time.

>  sub, 0x28, 0, D|W|CheckOperandSize|Modrm|No_sSuf|HLEPrefixLock|Optimize, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
> +sub, 0x83/5, APX_F, Modrm|No_bSuf|No_sSuf|DstVVVV|EVex128|EVexMap4|NF, { Imm8S, Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
>  sub, 0x83/5, 0, Modrm|No_bSuf|No_sSuf|HLEPrefixLock, { Imm8S, Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex }
>  sub, 0x2c, 0, W|No_sSuf, { Imm8|Imm16|Imm32|Imm32S, Acc|Byte|Word|Dword|Qword }
> +sub, 0x80/5, APX_F, W|Modrm|CheckOperandSize|No_sSuf|DstVVVV|EVex128|EVexMap4|NF, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64 }
>  sub, 0x80/5, 0, W|Modrm|No_sSuf|HLEPrefixLock, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }

There are still only 3 new templates here (and also above for add, plus for
other similar insns), when ...

>  dec, 0x48, No64, No_bSuf|No_sSuf|No_qSuf, { Reg16|Reg32 }
> +dec, 0xfe/1, APX_F, W|Modrm|CheckOperandSize|No_sSuf|DstVVVV|EVex128|EVexMap4|NF, { Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64 }
>  dec, 0xfe/1, 0, W|Modrm|No_sSuf|HLEPrefixLock, { Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
>  
> +sbb, 0x18, APX_F, D|W|CheckOperandSize|Modrm|No_sSuf|DstVVVV|EVex128|EVexMap4, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64 }
>  sbb, 0x18, 0, D|W|CheckOperandSize|Modrm|No_sSuf|HLEPrefixLock, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
> +sbb, 0x18, APX_F, D|W|CheckOperandSize|Modrm|EVex128|EVexMap4|No_sSuf, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
> +sbb, 0x83/3, APX_F, Modrm|CheckOperandSize|No_bSuf|No_sSuf|DstVVVV|EVex128|EVexMap4, { Imm8S, Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
>  sbb, 0x83/3, 0, Modrm|No_bSuf|No_sSuf|HLEPrefixLock, { Imm8S, Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex }
> +sbb, 0x83/3, APX_F, Modrm|EVex128|EVexMap4|No_bSuf|No_sSuf, { Imm8S, Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex }
>  sbb, 0x1c, 0, W|No_sSuf, { Imm8|Imm16|Imm32|Imm32S, Acc|Byte|Word|Dword|Qword }
> +sbb, 0x80/3, APX_F, W|Modrm|CheckOperandSize|No_sSuf|DstVVVV|EVex128|EVexMap4, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64 }
>  sbb, 0x80/3, 0, W|Modrm|No_sSuf|HLEPrefixLock, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
> +sbb, 0x80/3, APX_F, W|Modrm|EVex128|EVexMap4|No_sSuf, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }

... there are 6 new templates here. This is again an aspect I had pointed
out before. You cannot defer the addition of the other 3 until the NF patch,
as you want to make sure that with just this patch in place something both

    {evex} sbb %eax, %eax

and

    {evex} sub %eax, %eax

actually assemble, and to EVEX encodings. I can't see how that would work
in the latter case without those further templates.

The alternative is to also defer adding the 2-operand SBB templates (and
any others you add here which don't use DstVVVV).

>  cmp, 0x38, 0, D|W|CheckOperandSize|Modrm|No_sSuf, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
>  cmp, 0x83/7, 0, Modrm|No_bSuf|No_sSuf, { Imm8S, Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex }
> @@ -318,31 +336,50 @@ test, 0x84, 0, D|W|C|CheckOperandSize|Modrm|No_sSuf, { Reg8|Reg16|Reg32|Reg64, R
>  test, 0xa8, 0, W|No_sSuf|Optimize, { Imm8|Imm16|Imm32|Imm32S, Acc|Byte|Word|Dword|Qword }
>  test, 0xf6/0, 0, W|Modrm|No_sSuf|Optimize, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
>  
> +and, 0x20, APX_F, D|C|W|CheckOperandSize|Modrm|No_sSuf|DstVVVV|EVex128|EVexMap4|NF|Optimize, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64 }
>  and, 0x20, 0, D|W|CheckOperandSize|Modrm|No_sSuf|HLEPrefixLock|Optimize, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
> +and, 0x83/4, APX_F, Modrm|CheckOperandSize|No_bSuf|No_sSuf|DstVVVV|EVex128|EVexMap4|NF|Optimize, { Imm8S, Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
>  and, 0x83/4, 0, Modrm|No_bSuf|No_sSuf|HLEPrefixLock|Optimize, { Imm8S, Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex }
>  and, 0x24, 0, W|No_sSuf|Optimize, { Imm8|Imm16|Imm32|Imm32S, Acc|Byte|Word|Dword|Qword }
> +and, 0x80/4, APX_F, W|Modrm|CheckOperandSize|No_sSuf|DstVVVV|EVex128|EVexMap4|NF|Optimize, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64 }
>  and, 0x80/4, 0, W|Modrm|No_sSuf|HLEPrefixLock|Optimize, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
>  
> +or, 0x8, APX_F, D|C|W|CheckOperandSize|Modrm|No_sSuf|DstVVVV|EVex128|EVexMap4|NF|Optimize, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64 }
>  or, 0x8, 0, D|W|CheckOperandSize|Modrm|No_sSuf|HLEPrefixLock|Optimize, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
> +or, 0x83/1, APX_F, Modrm|CheckOperandSize|No_bSuf|No_sSuf|DstVVVV|EVex128|EVexMap4|NF, { Imm8S, Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
>  or, 0x83/1, 0, Modrm|No_bSuf|No_sSuf|HLEPrefixLock, { Imm8S, Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex }
>  or, 0xc, 0, W|No_sSuf, { Imm8|Imm16|Imm32|Imm32S, Acc|Byte|Word|Dword|Qword }
> +or, 0x80/1, APX_F, W|Modrm|CheckOperandSize|No_sSuf|DstVVVV|EVex128|EVexMap4|NF, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64 }
>  or, 0x80/1, 0, W|Modrm|No_sSuf|HLEPrefixLock, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
>  
> +xor, 0x30, APX_F, D|C|W|CheckOperandSize|Modrm|No_sSuf|DstVVVV|EVex128|EVexMap4|NF|Optimize, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64 }
>  xor, 0x30, 0, D|W|CheckOperandSize|Modrm|No_sSuf|HLEPrefixLock|Optimize, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
> +xor, 0x83/6, APX_F, Modrm|CheckOperandSize|No_bSuf|No_sSuf|DstVVVV|EVex128|EVexMap4|NF, { Imm8S, Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
>  xor, 0x83/6, 0, Modrm|No_bSuf|No_sSuf|HLEPrefixLock, { Imm8S, Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex }
>  xor, 0x34, 0, W|No_sSuf, { Imm8|Imm16|Imm32|Imm32S, Acc|Byte|Word|Dword|Qword }
> +xor, 0x80/6, APX_F, W|Modrm|CheckOperandSize|No_sSuf|DstVVVV|EVex128|EVexMap4|NF, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64 }
>  xor, 0x80/6, 0, W|Modrm|No_sSuf|HLEPrefixLock, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
>  
>  // clr with 1 operand is really xor with 2 operands.
>  clr, 0x30, 0, W|Modrm|No_sSuf|RegKludge|Optimize, { Reg8|Reg16|Reg32|Reg64 }

Btw., for consistency this may also want accompanying with an EVEX counterpart.

> +adc, 0x10, APX_F, D|C|W|CheckOperandSize|Modrm|No_sSuf|DstVVVV|EVex128|EVexMap4, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64 }
>  adc, 0x10, 0, D|W|CheckOperandSize|Modrm|No_sSuf|HLEPrefixLock, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
> +adc, 0x10, APX_F, D|W|CheckOperandSize|Modrm|EVex128|EVexMap4|No_sSuf, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
> +adc, 0x83/2, APX_F, Modrm|CheckOperandSize|No_bSuf|No_sSuf|DstVVVV|EVex128|EVexMap4, { Imm8S, Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
>  adc, 0x83/2, 0, Modrm|No_bSuf|No_sSuf|HLEPrefixLock, { Imm8S, Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex }
> +adc, 0x83/2, APX_F, Modrm|EVex128|EVexMap4|No_bSuf|No_sSuf, { Imm8S, Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex }
>  adc, 0x14, 0, W|No_sSuf, { Imm8|Imm16|Imm32|Imm32S, Acc|Byte|Word|Dword|Qword }
> +adc, 0x80/2, APX_F, W|Modrm|CheckOperandSize|No_sSuf|DstVVVV|EVex128|EVexMap4, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64 }
>  adc, 0x80/2, 0, W|Modrm|No_sSuf|HLEPrefixLock, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
> +adc, 0x80/2, APX_F, W|Modrm|EVex128|EVexMap4|No_sSuf, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
>  
> +neg, 0xf6/3, APX_F, W|Modrm|CheckOperandSize|No_sSuf|DstVVVV|EVex128|EVexMap4|NF, { Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64 }
>  neg, 0xf6/3, 0, W|Modrm|No_sSuf|HLEPrefixLock, { Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
> +
> +not, 0xf6/2, APX_F, W|Modrm|CheckOperandSize|No_sSuf|DstVVVV|EVex128|EVexMap4, { Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64 }
>  not, 0xf6/2, 0, W|Modrm|No_sSuf|HLEPrefixLock, { Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
> +not, 0xf6/2, APX_F, W|Modrm|No_sSuf|EVex128|EVexMap4, { Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
>  
>  aaa, 0x37, No64, NoSuf, {}
>  aas, 0x3f, No64, NoSuf, {}
> @@ -375,6 +412,7 @@ cqto, 0x99, x64, Size64|NoSuf, {}
>  // These multiplies can only be selected with single operand forms.
>  mul, 0xf6/4, 0, W|Modrm|No_sSuf, { Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
>  imul, 0xf6/5, 0, W|Modrm|No_sSuf, { Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
> +imul, 0xaf, APX_F, C|Modrm|CheckOperandSize|No_bSuf|No_sSuf|DstVVVV|EVex128|EVexMap4, { Reg16|Reg32|Reg64|Unspecified|Word|Dword|Qword|BaseIndex, Reg16|Reg32|Reg64, Reg16|Reg32|Reg64 }

Missing NF?

>  imul, 0xfaf, i386, Modrm|CheckOperandSize|No_bSuf|No_sSuf, { Reg16|Reg32|Reg64|Unspecified|Word|Dword|Qword|BaseIndex, Reg16|Reg32|Reg64 }
>  imul, 0x6b, i186, Modrm|CheckOperandSize|No_bSuf|No_sSuf, { Imm8S, Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
>  imul, 0x69, i186, Modrm|CheckOperandSize|No_bSuf|No_sSuf, { Imm16|Imm32|Imm32S, Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
> @@ -389,52 +427,98 @@ div, 0xf6/6, 0, W|CheckOperandSize|Modrm|No_sSuf, { Reg8|Reg16|Reg32|Reg64|Byte|
>  idiv, 0xf6/7, 0, W|Modrm|No_sSuf, { Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
>  idiv, 0xf6/7, 0, W|CheckOperandSize|Modrm|No_sSuf, { Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex, Acc|Byte|Word|Dword|Qword }
>  
> +rol, 0xd0/0, APX_F, W|Modrm|No_sSuf|CheckOperandSize|DstVVVV|EVex128|EVexMap4|NF, { Imm1, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64 }
>  rol, 0xd0/0, 0, W|Modrm|No_sSuf, { Imm1, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
> +rol, 0xc0/0, APX_F, W|Modrm|No_sSuf|CheckOperandSize|DstVVVV|EVex128|EVexMap4|NF, { Imm8|Imm8S, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64 }
>  rol, 0xc0/0, i186, W|Modrm|No_sSuf, { Imm8|Imm8S, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
> +rol, 0xd2/0, APX_F, W|Modrm|No_sSuf|CheckOperandSize|DstVVVV|EVex128|EVexMap4|NF, { ShiftCount, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64 }
>  rol, 0xd2/0, 0, W|Modrm|No_sSuf, { ShiftCount, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
> +rol, 0xd0/0, APX_F, W|Modrm|No_sSuf|CheckOperandSize|DstVVVV|EVex128|EVexMap4|NF, { Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64 }

Didn't we agree to avoid adding this (and its sibling) template, for the omitted
shift count being ambiguous? Consider

    rol %cl, %al

Is this a rotate by %cl, or a 1-bit NDD rotate?

Jan

next prev parent reply	other threads:[~2023-12-08 14:13 UTC|newest]

Thread overview: 69+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-11-24  7:02 [PATCH 1/9] Make const_1_mode print $1 in AT&T syntax Cui, Lili
2023-11-24  7:02 ` [PATCH v3 2/9] Support APX GPR32 with rex2 prefix Cui, Lili
2023-12-04 16:30   ` Jan Beulich
2023-12-05 13:31     ` Cui, Lili
2023-12-06  7:52       ` Jan Beulich
2023-12-06 12:43         ` Cui, Lili
2023-12-07  9:01           ` Jan Beulich
2023-12-08  3:10             ` Cui, Lili
2023-11-24  7:02 ` [PATCH v3 3/9] Created an empty EVEX_MAP4_ sub-table for EVEX instructions Cui, Lili
2023-11-24  7:02 ` [PATCH v3 4/9] Support APX GPR32 with extend evex prefix Cui, Lili
2023-12-07 12:38   ` Jan Beulich
2023-12-08 15:21     ` Cui, Lili
2023-12-11  8:34       ` Jan Beulich
2023-12-12 10:44         ` Cui, Lili
2023-12-12 11:16           ` Jan Beulich
2023-12-12 12:32             ` Cui, Lili
2023-12-12 12:39               ` Jan Beulich
2023-12-12 13:15                 ` Cui, Lili
2023-12-12 14:13                   ` Jan Beulich
2023-12-13  7:36                     ` Cui, Lili
2023-12-13  7:48                       ` Jan Beulich
2023-12-12 12:58         ` Cui, Lili
2023-12-12 14:04           ` Jan Beulich
2023-12-13  8:35             ` Cui, Lili
2023-12-13  9:13               ` Jan Beulich
2023-12-07 13:34   ` Jan Beulich
2023-12-11  6:16     ` Cui, Lili
2023-12-11  8:43       ` Jan Beulich
2023-12-11 11:50   ` Jan Beulich
2023-11-24  7:02 ` [PATCH v3 5/9] Add tests for " Cui, Lili
2023-12-07 14:05   ` Jan Beulich
2023-12-11  6:16     ` Cui, Lili
2023-12-11  8:55       ` Jan Beulich
2023-11-24  7:02 ` [PATCH v3 6/9] Support APX NDD Cui, Lili
2023-12-08 14:12   ` Jan Beulich [this message]
2023-12-11 13:36     ` Cui, Lili
2023-12-11 16:50       ` Jan Beulich
2023-12-13 10:42         ` Cui, Lili
2024-03-22 10:02     ` Jan Beulich
2024-03-22 10:31       ` Jan Beulich
2024-03-26  2:04         ` Cui, Lili
2024-03-26  7:06           ` Jan Beulich
2024-03-26  7:18             ` Cui, Lili
2024-03-22 10:59       ` Jan Beulich
2024-03-26  8:22         ` Cui, Lili
2024-03-26  9:30           ` Jan Beulich
2024-03-27  2:41             ` Cui, Lili
2023-12-08 14:27   ` Jan Beulich
2023-12-12  5:53     ` Cui, Lili
2023-12-12  8:28       ` Jan Beulich
2023-11-24  7:02 ` [PATCH v3 7/9] Support APX Push2/Pop2 Cui, Lili
2023-12-11 11:17   ` Jan Beulich
2023-12-15  8:38     ` Cui, Lili
2023-12-15  8:44       ` Jan Beulich
2023-11-24  7:02 ` [PATCH v3 8/9] Support APX NDD optimized encoding Cui, Lili
2023-12-11 12:27   ` Jan Beulich
2023-12-12  3:18     ` Hu, Lin1
2023-12-12  8:41       ` Jan Beulich
2023-12-13  5:31         ` Hu, Lin1
2023-12-12  8:45       ` Jan Beulich
2023-12-13  6:06         ` Hu, Lin1
2023-12-13  8:19           ` Jan Beulich
2023-12-13  8:34             ` Hu, Lin1
2023-11-24  7:02 ` [PATCH v3 9/9] Support APX JMPABS for disassembler Cui, Lili
2023-11-24  7:09 ` [PATCH 1/9] Make const_1_mode print $1 in AT&T syntax Jan Beulich
2023-11-24 11:22   ` Cui, Lili
2023-11-24 12:14     ` Jan Beulich
2023-12-12  2:57 ` Lu, Hongjiu
2023-12-12  8:16 ` Cui, Lili

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=fb14d665-6aae-4b62-88cf-221b9788dcd3@suse.com \
    --to=jbeulich@suse.com \
    --cc=binutils@sourceware.org \
    --cc=hongjiu.lu@intel.com \
    --cc=lili.cui@intel.com \
    --cc=lingling.kong@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).