RE: [PATCH v3 6/9] Support APX NDD

public inbox for binutils@sourceware.org
 help / color / mirror / Atom feed

From: "Cui, Lili" <lili.cui@intel.com>
To: "Beulich, Jan" <JBeulich@suse.com>
Cc: "Lu, Hongjiu" <hongjiu.lu@intel.com>,
	"Kong, Lingling" <lingling.kong@intel.com>,
	"binutils@sourceware.org" <binutils@sourceware.org>
Subject: RE: [PATCH v3 6/9] Support APX NDD
Date: Mon, 11 Dec 2023 13:36:08 +0000	[thread overview]
Message-ID: <SJ0PR11MB560048569C0181D3832F88DF9E8FA@SJ0PR11MB5600.namprd11.prod.outlook.com> (raw)
In-Reply-To: <fb14d665-6aae-4b62-88cf-221b9788dcd3@suse.com>

> On 24.11.2023 08:02, Cui, Lili wrote:
> > @@ -8870,25 +8890,33 @@ build_modrm_byte (void)
> >  				     || i.vec_encoding == vex_encoding_evex));
> >      }
> >
> > -  for (v = source + 1; v < dest; ++v)
> > -    if (v != reg_slot)
> > -      break;
> > -  if (v >= dest)
> > -    v = ~0;
> > -  if (i.tm.extension_opcode != None)
> > +  if (i.tm.opcode_modifier.vexvvvv == VexVVVV_DST)
> >      {
> > -      if (dest != source)
> > -	v = dest;
> > -      dest = ~0;
> > +      v = dest;
> > +      dest-- ;
> 
> Nit: Stray blank.
> 

Done.

> >      }
> > -  gas_assert (source < dest);
> 
> Starting from this line, do you really need to move that into the "else"
> branch? It looks to me as it it could stay here. (Maybe I'm wrong with the
> assertion itself, but ...
> 
> > -  if (i.tm.opcode_modifier.operandconstraint == SWAP_SOURCES
> > -      && source != op)
> 
> ... this entire if() pretty surely can stay as is, as there are no templates with
> both DstVVVV and SwapSources afaict. (Thing is - as before - that it isn't easy
> to see that what is happening here is really just re-indentation. Iirc in an
> earlier version there actually were hidden changes.) If you want this moved as
> an optimization, please do so in a separate patch.
> 

Moved "i.tm.extension_opcode != None" and SWAP_SOURCES.

  if (i.tm.opcode_modifier.vexvvvv == VexVVVV_DST)
    {
      v = dest;
      dest-- ;
    }
  else
    {
      for (v = source + 1; v < dest; ++v)
        if (v != reg_slot)
          break;
      if (v >= dest)
        v = ~0;
    }
  if (i.tm.extension_opcode != None)
    {
      if (dest != source)
        v = dest;
      dest = ~0;
    }
  gas_assert (source < dest);
  if (i.tm.opcode_modifier.operandconstraint == SWAP_SOURCES
      && source != op)
    {
      unsigned int tmp = source;

      source = v;
      v = tmp;
    }

> > --- a/gas/testsuite/gas/i386/x86-64-apx-evex-promoted-bad.d
> > +++ b/gas/testsuite/gas/i386/x86-64-apx-evex-promoted-bad.d
> > @@ -27,4 +27,6 @@ Disassembly of section .text:
> >  [ 	]*[a-f0-9]+:[ 	]+c8 ff ff ff[ 	]+enter  \$0xffff,\$0xff
> >  [ 	]*[a-f0-9]+:[ 	]+67 62 f2 7c 18 f5[ 	]+addr32 \(bad\)
> >  [ 	]*[a-f0-9]+:[ 	]+0b ff[ 	]+or     %edi,%edi
> > +[ 	]*[a-f0-9]+:[ 	]+62 f4 fc 08 ff[ 	]+\(bad\)
> > +[ 	]*[a-f0-9]+:[ 	]+d8[ 	]+.byte 0xd8
> >  #pass
> > --- a/gas/testsuite/gas/i386/x86-64-apx-evex-promoted-bad.s
> > +++ b/gas/testsuite/gas/i386/x86-64-apx-evex-promoted-bad.s
> > @@ -26,3 +26,5 @@ _start:
> >  	#EVEX from VEX bzhi %ebx,%eax,%ecx EVEX.P[20](EVEX.b) == 1 (illegal
> value).
> >  	.insn EVEX.L0.NP.0f38.W0 0xf5, %eax ,(%ebx){1to8}, %ecx
> >  	.byte 0xff
> > +	#{evex} inc %rax %rbx EVEX.vvvv' != 1111 && EVEX.ND = 0.
> > +	.insn EVEX.L0.NP.M4.W1 0xff, %rax, %rbx
> 
> I don't think this does what you want. In the .d file the 4 bits are all set. I think
> you mean something like
> 
> 	.insn EVEX.L0.NP.M4.W1 0xff/0, %rcx, %rbx
> 
> (i.e. ModR/M.reg specified as opcode extension _and_ the first operand not
> the accumulator). The reason disassembly fails for what you've used looks to
> be ModR/M.reg == 0b011 (resulting from the use of %rbx).
> 

Change it to SIB so don’t need to add 0xff.

.insn EVEX.L0.NP.M4.W1 0xff/0, (%rax,%rcx), %rbx

0000000000000000 <_start>:
   0:   62 f4 e4                (bad)
   3:   08 ff                      or     %bh,%bh
   5:   04 08                   add    $0x8,%al

> (Also, nit: What's EVEX.vvvv' ? I.e. what's the ' there about?)
> 

Oh, it should be EVEX.vvvv.

> > --- /dev/null
> > +++ b/gas/testsuite/gas/i386/x86-64-apx-ndd.s
> > @@ -0,0 +1,155 @@
> > +# Check 64bit APX NDD instructions with evex prefix encoding
> > +
> > +	.allow_index_reg
> > +	.text
> > +_start:
> > +	adc    $0x1234,%ax,%r30w
> > +	adc    %r15b,%r17b,%r18b
> > +	adc    %r15d,(%r8),%r18d
> > +	adc    (%r15,%rax,1),%r16b,%r8b
> > +	adc    (%r15,%rax,1),%r16w,%r8w
> > +	adcl   $0x11,(%r19,%rax,4),%r20d
> > +	adcx   %r15d,%r8d,%r18d
> > +	adcx   (%r15,%r31,1),%r8
> > +	adcx   (%r15,%r31,1),%r8d,%r18d
> > +	add    $0x1234,%ax,%r30w
> > +	add    $0x12344433,%r15,%r16
> > +	add    $0x34,%r13b,%r17b
> > +	add    $0xfffffffff4332211,%rax,%r8
> > +	add    %r31,%r8,%r16
> > +	add    %r31,(%r8),%r16
> > +	add    %r31,(%r8,%r16,8),%r16
> > +	add    %r31b,%r8b,%r16b
> > +	add    %r31d,%r8d,%r16d
> > +	add    %r31w,%r8w,%r16w
> > +	add    (%r31),%r8,%r16
> > +	add    0x9090(%r31,%r16,1),%r8,%r16
> > +	addb    %r31b,%r8b,%r16b
> > +	addl    %r31d,%r8d,%r16d
> > +	addl   $0x11,(%r19,%rax,4),%r20d
> > +	addq    %r31,%r8,%r16
> > +	addq   $0x12344433,(%r15,%rcx,4),%r16
> > +	addw    %r31w,%r8w,%r16w
> > +	adox   %r15d,%r8d,%r18d
> 
> Nit: Inconsistent blank padding.
> 

Done.

> > +	{load}  add    %r31,%r8,%r16
> > +	{store} add    %r31,%r8,%r16
> > +	adox   (%r15,%r31,1),%r8
> > +	adox   (%r15,%r31,1),%r8d,%r18d
> > +	and    $0x1234,%ax,%r30w
> > +	and    %r15b,%r17b,%r18b
> > +	and    %r15d,(%r8),%r18d
> > +	and    (%r15,%rax,1),%r16b,%r8b
> > +	and    (%r15,%rax,1),%r16w,%r8w
> > +	andl   $0x11,(%r19,%rax,4),%r20d
> > +	cmova  0x90909090(%eax),%edx,%r8d
> > +	cmovae 0x90909090(%eax),%edx,%r8d
> > +	cmovb  0x90909090(%eax),%edx,%r8d
> > +	cmovbe 0x90909090(%eax),%edx,%r8d
> > +	cmove  0x90909090(%eax),%edx,%r8d
> > +	cmovg  0x90909090(%eax),%edx,%r8d
> > +	cmovge 0x90909090(%eax),%edx,%r8d
> > +	cmovl  0x90909090(%eax),%edx,%r8d
> > +	cmovle 0x90909090(%eax),%edx,%r8d
> > +	cmovne 0x90909090(%eax),%edx,%r8d
> > +	cmovno 0x90909090(%eax),%edx,%r8d
> > +	cmovnp 0x90909090(%eax),%edx,%r8d
> > +	cmovns 0x90909090(%eax),%edx,%r8d
> > +	cmovo  0x90909090(%eax),%edx,%r8d
> > +	cmovp  0x90909090(%eax),%edx,%r8d
> > +	cmovs  0x90909090(%eax),%edx,%r8d
> > +	dec    %rax,%r17
> > +	decb   (%r31,%r12,1),%r8b
> > +	imul   0x909(%rax,%r31,8),%rdx,%r25
> > +	imul   0x90909(%eax),%edx,%r8d
> > +	inc    %r31,%r16
> > +	inc    %r31,%r8
> > +	inc    %rax,%rbx
> > +	neg    %rax,%r17
> > +	negb   (%r31,%r12,1),%r8b
> > +	not    %rax,%r17
> > +	notb   (%r31,%r12,1),%r8b
> > +	or     $0x1234,%ax,%r30w
> > +	or     %r15b,%r17b,%r18b
> > +	or     %r15d,(%r8),%r18d
> > +	or     (%r15,%rax,1),%r16b,%r8b
> > +	or     (%r15,%rax,1),%r16w,%r8w
> > +	orl    $0x11,(%r19,%rax,4),%r20d
> > +	rcl    $0x2,%r12b,%r31b
> > +	rcl    %cl,%r16b,%r8b
> > +	rclb   $0x1, (%rax),%r31b
> > +	rcll   $0x2,(%rax),%r31d
> > +	rclw   $0x1, (%rax),%r31w
> 
> Nit: Would be nice if there consistently were or were not blanks after the
> commas.
> 

Done.

> > --- a/opcodes/i386-opc.tbl
> > +++ b/opcodes/i386-opc.tbl
> > @@ -139,9 +139,13 @@
> >  #define Vsz256 Vsz=VSZ256
> >  #define Vsz512 Vsz=VSZ512
> >
> > +#define DstVVVV VexVVVV=VexVVVV_DST
> > +
> >  // The EVEX purpose of StaticRounding appears only together with SAE.
> > Re-use  // the bit to mark commutative VEX encodings where swapping
> > the source  // operands may allow to switch from 3-byte to 2-byte VEX
> encoding.
> > +// And re-use the bit to mark some NDD insns that swapping the source
> > +operands // may allow to switch from EVEX encoding to REX2 encoding.
> >  #define C StaticRounding
> >
> >  #define FP 387|287|8087
> > @@ -288,26 +292,40 @@ std, 0xfd, 0, NoSuf, {}  sti, 0xfb, 0, NoSuf, {}
> >
> >  // Arithmetic.
> > +add, 0x0, APX_F,
> >
> +D|C|W|CheckOperandSize|Modrm|No_sSuf|DstVVVV|EVex128|EVexMap4|N
> F, {
> > +Reg8|Reg16|Reg32|Reg64,
> >
> +Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseInde
> x,
> > +Reg8|Reg16|Reg32|Reg64 }
> 
> There is _still_ Byte|Word|Dword|Qword in here (and below), when I think I
> pointed out more than once before that in new templates such redundancy
> wants omitting.
> 
> Since this isn't the first instance of earlier review comments not taken care of,
> may I please ask that you make reasonably sure that new versions aren't sent
> out like this?
> 

This part could indeed be omitted, but I really don't remember you mentioning it on the APX patches. There are still a lot of redundant Byte|Word|Dword|Qword in the opcode table, APX just added some flags on top of the old ones. Do you mind if I create a patch first to remove the redundant parts of master?

> >  add, 0x0, 0, D|W|CheckOperandSize|Modrm|No_sSuf|HLEPrefixLock, {
> > Reg8|Reg16|Reg32|Reg64,
> >
> Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex
> }
> > +add, 0x83/0, APX_F,
> >
> +Modrm|CheckOperandSize|No_bSuf|No_sSuf|DstVVVV|EVex128|EVexMap4|
> NF, {
> > +Imm8S, Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex,
> > +Reg16|Reg32|Reg64 }
> >  add, 0x83/0, 0, Modrm|No_bSuf|No_sSuf|HLEPrefixLock, { Imm8S,
> > Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex }  add,
> 0x4,
> > 0, W|No_sSuf, { Imm8|Imm16|Imm32|Imm32S,
> Acc|Byte|Word|Dword|Qword }
> > +add, 0x80/0, APX_F,
> > +W|Modrm|CheckOperandSize|No_sSuf|DstVVVV|EVex128|EVexMap4|NF, {
> > +Imm8|Imm16|Imm32|Imm32S,
> >
> +Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseInde
> x,
> > +Reg8|Reg16|Reg32|Reg64}
> >  add, 0x80/0, 0, W|Modrm|No_sSuf|HLEPrefixLock, {
> > Imm8|Imm16|Imm32|Imm32S,
> >
> Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex
> }
> >
> >  inc, 0x40, No64, No_bSuf|No_sSuf|No_qSuf, { Reg16|Reg32 }
> > +inc, 0xfe/0, APX_F,
> > +W|Modrm|No_sSuf|CheckOperandSize|DstVVVV|EVex128|EVexMap4|NF,
> >
> +{Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseInde
> x,
> > +Reg8|Reg16|Reg32|Reg64}
> >  inc, 0xfe/0, 0, W|Modrm|No_sSuf|HLEPrefixLock, {
> >
> Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex
> }
> >
> > +sub, 0x28, APX_F,
> >
> +D|W|CheckOperandSize|Modrm|No_sSuf|DstVVVV|EVex128|EVexMap4|Opti
> mize|
> > +NF, { Reg8|Reg16|Reg32|Reg64,
> >
> +Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseInde
> x,
> > +Reg8|Reg16|Reg32|Reg64, }
> 
> Here and elsewhere, what's Optimize for? It not being there on other
> templates, it can't be for the EVEX->REX2 optimization? If there are further
> optimization plans, that's (again) something to mention in the description. Yet
> better would be if such attributes were added only when respective
> optimizations are actually introduced. Unlike e.g. NF, which would mean
> another bulk update if not added right away, new optimizations typically affect
> only a few templates at a time.
> 

Optimize is not new.

sub, 0x28, APX_F, D|W|CheckOperandSize|Modrm|No_sSuf|DstVVVV|EVex128|EVexMap4|Optimize|NF, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64, }
sub, 0x28, 0, D|W|CheckOperandSize|Modrm|No_sSuf|HLEPrefixLock|Optimize, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }

> >  sub, 0x28, 0,
> > D|W|CheckOperandSize|Modrm|No_sSuf|HLEPrefixLock|Optimize, {
> > Reg8|Reg16|Reg32|Reg64,
> >
> Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex
> }
> > +sub, 0x83/5, APX_F,
> > +Modrm|No_bSuf|No_sSuf|DstVVVV|EVex128|EVexMap4|NF, { Imm8S,
> > +Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex,
> > +Reg16|Reg32|Reg64 }
> >  sub, 0x83/5, 0, Modrm|No_bSuf|No_sSuf|HLEPrefixLock, { Imm8S,
> > Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex }  sub,
> 0x2c,
> > 0, W|No_sSuf, { Imm8|Imm16|Imm32|Imm32S,
> Acc|Byte|Word|Dword|Qword }
> > +sub, 0x80/5, APX_F,
> > +W|Modrm|CheckOperandSize|No_sSuf|DstVVVV|EVex128|EVexMap4|NF, {
> > +Imm8|Imm16|Imm32|Imm32S,
> >
> +Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseInde
> x,
> > +Reg8|Reg16|Reg32|Reg64 }
> >  sub, 0x80/5, 0, W|Modrm|No_sSuf|HLEPrefixLock, {
> > Imm8|Imm16|Imm32|Imm32S,
> >
> Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex
> }
> 
> There are still only 3 new templates here (and also above for add, plus for
> other similar insns), when ...
> 
> >  dec, 0x48, No64, No_bSuf|No_sSuf|No_qSuf, { Reg16|Reg32 }
> > +dec, 0xfe/1, APX_F,
> > +W|Modrm|CheckOperandSize|No_sSuf|DstVVVV|EVex128|EVexMap4|NF, {
> >
> +Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseInde
> x,
> > +Reg8|Reg16|Reg32|Reg64 }
> >  dec, 0xfe/1, 0, W|Modrm|No_sSuf|HLEPrefixLock, {
> >
> Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex
> }
> >
> > +sbb, 0x18, APX_F,
> > +D|W|CheckOperandSize|Modrm|No_sSuf|DstVVVV|EVex128|EVexMap4, {
> > +Reg8|Reg16|Reg32|Reg64,
> >
> +Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseInde
> x,
> > +Reg8|Reg16|Reg32|Reg64 }
> >  sbb, 0x18, 0, D|W|CheckOperandSize|Modrm|No_sSuf|HLEPrefixLock, {
> > Reg8|Reg16|Reg32|Reg64,
> >
> Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex
> }
> > +sbb, 0x18, APX_F,
> > +D|W|CheckOperandSize|Modrm|EVex128|EVexMap4|No_sSuf, {
> > +Reg8|Reg16|Reg32|Reg64,
> >
> +Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseInde
> x }
> > +sbb, 0x83/3, APX_F,
> >
> +Modrm|CheckOperandSize|No_bSuf|No_sSuf|DstVVVV|EVex128|EVexMap4,
> {
> > +Imm8S, Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex,
> > +Reg16|Reg32|Reg64 }
> >  sbb, 0x83/3, 0, Modrm|No_bSuf|No_sSuf|HLEPrefixLock, { Imm8S,
> > Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex }
> > +sbb, 0x83/3, APX_F, Modrm|EVex128|EVexMap4|No_bSuf|No_sSuf,
> { Imm8S,
> > +Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex }
> >  sbb, 0x1c, 0, W|No_sSuf, { Imm8|Imm16|Imm32|Imm32S,
> > Acc|Byte|Word|Dword|Qword }
> > +sbb, 0x80/3, APX_F,
> > +W|Modrm|CheckOperandSize|No_sSuf|DstVVVV|EVex128|EVexMap4, {
> > +Imm8|Imm16|Imm32|Imm32S,
> >
> +Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseInde
> x,
> > +Reg8|Reg16|Reg32|Reg64 }
> >  sbb, 0x80/3, 0, W|Modrm|No_sSuf|HLEPrefixLock, {
> > Imm8|Imm16|Imm32|Imm32S,
> >
> Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex
> }
> > +sbb, 0x80/3, APX_F, W|Modrm|EVex128|EVexMap4|No_sSuf, {
> > +Imm8|Imm16|Imm32|Imm32S,
> >
> +Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseInde
> x }
> 
> ... there are 6 new templates here. This is again an aspect I had pointed out
> before. You cannot defer the addition of the other 3 until the NF patch, as you
> want to make sure that with just this patch in place something both
> 
>     {evex} sbb %eax, %eax
> 
> and
> 
>     {evex} sub %eax, %eax
> 
> actually assemble, and to EVEX encodings. I can't see how that would work in
> the latter case without those further templates.
> 
> The alternative is to also defer adding the 2-operand SBB templates (and any
> others you add here which don't use DstVVVV).
> 

I'm having a headache with this, some instructions like sbb don't support NF, originally they were in the 4/9 patch, but their disassemblers are in the NDD patch, and you agreed to put them in the NDD patch. Now I really don't know where to move. Moving encoding, decoding, and especially test cases for instructions between patches is cumbersome and I really don't think it makes much sense.

> >  xor, 0x30, 0,
> > D|W|CheckOperandSize|Modrm|No_sSuf|HLEPrefixLock|Optimize, {
> > Reg8|Reg16|Reg32|Reg64,
> >
> Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex
> }
> > +xor, 0x83/6, APX_F,
> >
> +Modrm|CheckOperandSize|No_bSuf|No_sSuf|DstVVVV|EVex128|EVexMap4|
> NF, {
> > +Imm8S, Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex,
> > +Reg16|Reg32|Reg64 }
> >  xor, 0x83/6, 0, Modrm|No_bSuf|No_sSuf|HLEPrefixLock, { Imm8S,
> > Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex }  xor,
> 0x34,
> > 0, W|No_sSuf, { Imm8|Imm16|Imm32|Imm32S,
> Acc|Byte|Word|Dword|Qword }
> > +xor, 0x80/6, APX_F,
> > +W|Modrm|CheckOperandSize|No_sSuf|DstVVVV|EVex128|EVexMap4|NF, {
> > +Imm8|Imm16|Imm32|Imm32S,
> >
> +Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseInde
> x,
> > +Reg8|Reg16|Reg32|Reg64 }
> >  xor, 0x80/6, 0, W|Modrm|No_sSuf|HLEPrefixLock, {
> > Imm8|Imm16|Imm32|Imm32S,
> >
> Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex
> }
> >
> >  // clr with 1 operand is really xor with 2 operands.
> >  clr, 0x30, 0, W|Modrm|No_sSuf|RegKludge|Optimize, {
> > Reg8|Reg16|Reg32|Reg64 }
> 
> Btw., for consistency this may also want accompanying with an EVEX
> counterpart.
> 

Do you mean to add an entry like this? It should belong to the previous patch.

// clr with 1 operand is really xor with 2 operands.
clr, 0x30, 0, W|Modrm|No_sSuf|RegKludge|Optimize, { Reg8|Reg16|Reg32|Reg64 }
clr, 0x30, APX_F, W|Modrm|No_sSuf|RegKludge|EVex128|EVexMap4|Optimize, { Reg8|Reg16|Reg32|Reg64 }

> >  mul, 0xf6/4, 0, W|Modrm|No_sSuf, {
> >
> Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex
> }
> > imul, 0xf6/5, 0, W|Modrm|No_sSuf, {
> >
> Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex
> }
> > +imul, 0xaf, APX_F,
> >
> +C|Modrm|CheckOperandSize|No_bSuf|No_sSuf|DstVVVV|EVex128|EVexMap
> 4, {
> > +Reg16|Reg32|Reg64|Unspecified|Word|Dword|Qword|BaseIndex,
> > +Reg16|Reg32|Reg64, Reg16|Reg32|Reg64 }
> 
> Missing NF?
> 

Oh, when I rebase the NF patch, I found this missing and fixed it.

> >  rol, 0xd2/0, 0, W|Modrm|No_sSuf, { ShiftCount,
> >
> Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex
> }
> > +rol, 0xd0/0, APX_F,
> > +W|Modrm|No_sSuf|CheckOperandSize|DstVVVV|EVex128|EVexMap4|NF, {
> >
> +Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseInde
> x,
> > +Reg8|Reg16|Reg32|Reg64 }
> 
> Didn't we agree to avoid adding this (and its sibling) template, for the omitted
> shift count being ambiguous? Consider
> 
>     rol %cl, %al
> 
> Is this a rotate by %cl, or a 1-bit NDD rotate?
> 

These entries should be deleted.

rol, 0xd0/0, APX_F, W|Modrm|No_sSuf|CheckOperandSize|DstVVVV|EVex128|EVexMap4|NF, { Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64 }

Thanks,
Lili.

next prev parent reply	other threads:[~2023-12-11 13:36 UTC|newest]

Thread overview: 69+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-11-24  7:02 [PATCH 1/9] Make const_1_mode print $1 in AT&T syntax Cui, Lili
2023-11-24  7:02 ` [PATCH v3 2/9] Support APX GPR32 with rex2 prefix Cui, Lili
2023-12-04 16:30   ` Jan Beulich
2023-12-05 13:31     ` Cui, Lili
2023-12-06  7:52       ` Jan Beulich
2023-12-06 12:43         ` Cui, Lili
2023-12-07  9:01           ` Jan Beulich
2023-12-08  3:10             ` Cui, Lili
2023-11-24  7:02 ` [PATCH v3 3/9] Created an empty EVEX_MAP4_ sub-table for EVEX instructions Cui, Lili
2023-11-24  7:02 ` [PATCH v3 4/9] Support APX GPR32 with extend evex prefix Cui, Lili
2023-12-07 12:38   ` Jan Beulich
2023-12-08 15:21     ` Cui, Lili
2023-12-11  8:34       ` Jan Beulich
2023-12-12 10:44         ` Cui, Lili
2023-12-12 11:16           ` Jan Beulich
2023-12-12 12:32             ` Cui, Lili
2023-12-12 12:39               ` Jan Beulich
2023-12-12 13:15                 ` Cui, Lili
2023-12-12 14:13                   ` Jan Beulich
2023-12-13  7:36                     ` Cui, Lili
2023-12-13  7:48                       ` Jan Beulich
2023-12-12 12:58         ` Cui, Lili
2023-12-12 14:04           ` Jan Beulich
2023-12-13  8:35             ` Cui, Lili
2023-12-13  9:13               ` Jan Beulich
2023-12-07 13:34   ` Jan Beulich
2023-12-11  6:16     ` Cui, Lili
2023-12-11  8:43       ` Jan Beulich
2023-12-11 11:50   ` Jan Beulich
2023-11-24  7:02 ` [PATCH v3 5/9] Add tests for " Cui, Lili
2023-12-07 14:05   ` Jan Beulich
2023-12-11  6:16     ` Cui, Lili
2023-12-11  8:55       ` Jan Beulich
2023-11-24  7:02 ` [PATCH v3 6/9] Support APX NDD Cui, Lili
2023-12-08 14:12   ` Jan Beulich
2023-12-11 13:36     ` Cui, Lili [this message]
2023-12-11 16:50       ` Jan Beulich
2023-12-13 10:42         ` Cui, Lili
2024-03-22 10:02     ` Jan Beulich
2024-03-22 10:31       ` Jan Beulich
2024-03-26  2:04         ` Cui, Lili
2024-03-26  7:06           ` Jan Beulich
2024-03-26  7:18             ` Cui, Lili
2024-03-22 10:59       ` Jan Beulich
2024-03-26  8:22         ` Cui, Lili
2024-03-26  9:30           ` Jan Beulich
2024-03-27  2:41             ` Cui, Lili
2023-12-08 14:27   ` Jan Beulich
2023-12-12  5:53     ` Cui, Lili
2023-12-12  8:28       ` Jan Beulich
2023-11-24  7:02 ` [PATCH v3 7/9] Support APX Push2/Pop2 Cui, Lili
2023-12-11 11:17   ` Jan Beulich
2023-12-15  8:38     ` Cui, Lili
2023-12-15  8:44       ` Jan Beulich
2023-11-24  7:02 ` [PATCH v3 8/9] Support APX NDD optimized encoding Cui, Lili
2023-12-11 12:27   ` Jan Beulich
2023-12-12  3:18     ` Hu, Lin1
2023-12-12  8:41       ` Jan Beulich
2023-12-13  5:31         ` Hu, Lin1
2023-12-12  8:45       ` Jan Beulich
2023-12-13  6:06         ` Hu, Lin1
2023-12-13  8:19           ` Jan Beulich
2023-12-13  8:34             ` Hu, Lin1
2023-11-24  7:02 ` [PATCH v3 9/9] Support APX JMPABS for disassembler Cui, Lili
2023-11-24  7:09 ` [PATCH 1/9] Make const_1_mode print $1 in AT&T syntax Jan Beulich
2023-11-24 11:22   ` Cui, Lili
2023-11-24 12:14     ` Jan Beulich
2023-12-12  2:57 ` Lu, Hongjiu
2023-12-12  8:16 ` Cui, Lili

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=SJ0PR11MB560048569C0181D3832F88DF9E8FA@SJ0PR11MB5600.namprd11.prod.outlook.com \
    --to=lili.cui@intel.com \
    --cc=JBeulich@suse.com \
    --cc=binutils@sourceware.org \
    --cc=hongjiu.lu@intel.com \
    --cc=lingling.kong@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).