public inbox for binutils@sourceware.org
 help / color / mirror / Atom feed
From: Jan Beulich <jbeulich@suse.com>
To: "Cui, Lili" <lili.cui@intel.com>
Cc: hjl.tools@gmail.com, binutils@sourceware.org
Subject: Re: [PATCH] Support APX NF
Date: Thu, 21 Mar 2024 15:26:25 +0100	[thread overview]
Message-ID: <0fd9e98e-907a-4569-ad21-4cf9e4a81673@suse.com> (raw)
In-Reply-To: <20240319064109.1530126-1-lili.cui@intel.com>

On 19.03.2024 07:41, Cui, Lili wrote:
> --- a/gas/testsuite/gas/i386/x86-64-apx-evex-promoted-bad.s
> +++ b/gas/testsuite/gas/i386/x86-64-apx-evex-promoted-bad.s
> @@ -23,7 +23,7 @@ _start:
>  	.insn EVEX.L1.66.M12.W0 0x60, %di, %ax
>  
>  	#EVEX_MAP4 movbe %r18w,%ax set EVEX.z == 0b1.
> -	.insn EVEX.L0.66.M12.W0 0x60, %di, %ax {%k7}{z}
> +	.insn EVEX.L0.66.M12.W0 0x60, %di, %ax {%k3}{z}
>  
>  	#EVEX from VEX bzhi %rax,(%rax,%rbx),%rcx EVEX.aaa[1:0] (P[17:16])
>  	#== 0b01
> @@ -33,17 +33,23 @@ _start:
>  	.insn EVEX.L1.NP.0f38.W1 0xf5, %rax, (%rax,%rbx), %rcx
>  
>  	#EVEX from VEX bzhi %rax,(%rax,%rbx),%rcx EVEX.P[23](EVEX.z) == 0b1
> -	.insn EVEX.L0.NP.0f38.W1 0xf5, %rax, (%rax,%rbx), %rcx {%k7}{z}
> +	.insn EVEX.L0.NP.0f38.W1 0xf5, %rax, (%rax,%rbx), %rcx {%k3}{z}
>  
>  	#EVEX from VEX bzhi %rax,(%rax,%rbx),%rcx EVEX.P[20](EVEX.b) == 0b1
>  	.insn EVEX.L0.NP.0f38.W1 0xf5, %rax, (%rax,%rbx){1to8}, %rcx
>  
>  	#{evex} inc %rax %rbx EVEX.vvvv != 1111 && EVEX.ND = 0.
>  	.byte 0x62, 0xf4, 0xe4, 0x08, 0xff, 0x04, 0x08
> +
>  	# pop2 %rax, %r8 set EVEX.ND=0.
>  	.byte 0x62, 0xf4, 0x3c, 0x08, 0x8f, 0xc0
>  	.byte 0xff, 0xff, 0xff
> +
>  	# pop2 %rax, %r8 set EVEX.vvvv = 1111.
>  	.insn EVEX.L0.M4.W0 0x8f,  %rax, {rn-sae},%r8
> -	# pop2 %r8, %r8.
> -	.byte 0x62, 0xd4, 0x3c, 0x18, 0x8f, 0xc0

This was already replaced, and not ...

> +	# pop2 %r11, %r11.
> +	.byte 0x62, 0xd4, 0x24, 0x18, 0x8f, 0xc3

... by byte. You want to rebase.

> --- /dev/null
> +++ b/gas/testsuite/gas/i386/x86-64-apx-nf.s
> @@ -0,0 +1,1374 @@
> +# Check 64bit APX_F instructions
> +
> +        .text
> +_start:
> +	{nf}	add	$123, %bl
> +	{nf}	add	$123, %bl, %dl
> +	{nf}	add	$123, %dx
> +	{nf}	add	$123, %dx, %ax
> +	{nf}	add	$123, %ecx
> +	{nf}	add	$123, %ecx, %edx
> +	{nf}	add	$123, %r9
> +	{nf}	add	$123, %r9, %r31
> +	{nf}	addb	$123, 291(%r8, %rax, 4)
> +	{nf}	add	$123, 291(%r8, %rax, 4), %bl
> +	{nf}	addw	$123, 291(%r8, %rax, 4)
> +	{nf}	add	$123, 291(%r8, %rax, 4), %dx
> +	{nf}	addl	$123, 291(%r8, %rax, 4)
> +	{nf}	add	$123, 291(%r8, %rax, 4), %ecx
> +	{nf}	addq	$123, 291(%r8, %rax, 4)
> +	{nf}	add	$123, 291(%r8, %rax, 4), %r9
> +	{nf}	add	%bl, %dl
> +	{nf}	add	%bl, %dl, %r8b
> +	{nf}	add	%bl, 291(%r8, %rax, 4)
> +	{nf}	add	%bl, 291(%r8, %rax, 4), %dl
> +	{nf}	add	%dx, %ax
> +	{nf}	add	%dx, %ax, %r9w
> +	{nf}	add	%dx, 291(%r8, %rax, 4)
> +	{nf}	add	%dx, 291(%r8, %rax, 4), %ax
> +	{nf}	add	%ecx, %edx
> +	{nf}	add	%ecx, %edx, %r10d
> +	{nf}	add	%ecx, 291(%r8, %rax, 4)
> +	{nf}	add	%ecx, 291(%r8, %rax, 4), %edx
> +	{nf}	add	%r9, %r31
> +	{nf}	add	%r9, %r31, %r11
> +	{nf}	add	%r9, 291(%r8, %rax, 4)
> +	{nf}	add	%r9, 291(%r8, %rax, 4), %r31
> +	{nf}	add	291(%r8, %rax, 4), %bl
> +	{nf}	add	291(%r8, %rax, 4), %bl, %dl
> +	{nf}	add	291(%r8, %rax, 4), %dx
> +	{nf}	add	291(%r8, %rax, 4), %dx, %ax
> +	{nf}	add	291(%r8, %rax, 4), %ecx
> +	{nf}	add	291(%r8, %rax, 4), %ecx, %edx
> +	{nf}	add	291(%r8, %rax, 4), %r9
> +	{nf}	add	291(%r8, %rax, 4), %r9, %r31
> +	{nf}	and	$123, %bl
> +	{nf}	and	$123, %bl, %dl
> +	{nf}	and	$123, %dx
> +	{nf}	and	$123, %dx, %ax
> +	{nf}	and	$123, %ecx
> +	{nf}	and	$123, %ecx, %edx
> +	{nf}	and	$123, %r9
> +	{nf}	and	$123, %r9, %r31
> +	{nf}	andb	$123, 291(%r8, %rax, 4)
> +	{nf}	and	$123, 291(%r8, %rax, 4), %bl
> +	{nf}	andw	$123, 291(%r8, %rax, 4)
> +	{nf}	and	$123, 291(%r8, %rax, 4), %dx
> +	{nf}	andl	$123, 291(%r8, %rax, 4)
> +	{nf}	and	$123, 291(%r8, %rax, 4), %ecx
> +	{nf}	andq	$123, 291(%r8, %rax, 4)
> +	{nf}	and	$123, 291(%r8, %rax, 4), %r9
> +	{nf}	and	%bl, %dl
> +	{nf}	and	%bl, %dl, %r8b
> +	{nf}	and	%bl, 291(%r8, %rax, 4)
> +	{nf}	and	%bl, 291(%r8, %rax, 4), %dl
> +	{nf}	and	%dx, %ax
> +	{nf}	and	%dx, %ax, %r9w
> +	{nf}	and	%dx, 291(%r8, %rax, 4)
> +	{nf}	and	%dx, 291(%r8, %rax, 4), %ax
> +	{nf}	and	%ecx, %edx
> +	{nf}	and	%ecx, %edx, %r10d
> +	{nf}	and	%ecx, 291(%r8, %rax, 4)
> +	{nf}	and	%ecx, 291(%r8, %rax, 4), %edx
> +	{nf}	and	%r9, %r31
> +	{nf}	and	%r9, %r31, %r11
> +	{nf}	and	%r9, 291(%r8, %rax, 4)
> +	{nf}	and	%r9, 291(%r8, %rax, 4), %r31
> +	{nf}	and	291(%r8, %rax, 4), %bl
> +	{nf}	and	291(%r8, %rax, 4), %bl, %dl
> +	{nf}	and	291(%r8, %rax, 4), %dx
> +	{nf}	and	291(%r8, %rax, 4), %dx, %ax
> +	{nf}	and	291(%r8, %rax, 4), %ecx
> +	{nf}	and	291(%r8, %rax, 4), %ecx, %edx
> +	{nf}	and	291(%r8, %rax, 4), %r9
> +	{nf}	and	291(%r8, %rax, 4), %r9, %r31
> +	{nf}	andn	%ecx, %edx, %r10d
> +	{nf}	andn	%r9, %r31, %r11
> +	{nf}	andn	291(%r8, %rax, 4), %ecx, %edx
> +	{nf}	andn	291(%r8, %rax, 4), %r9, %r31
> +	{nf}	bextr	%ecx, %edx, %r10d
> +	{nf}	bextr	%ecx, 291(%r8, %rax, 4), %edx
> +	{nf}	bextr	%r9, %r31, %r11
> +	{nf}	bextr	%r9, 291(%r8, %rax, 4), %r31
> +	{nf}	blsi	%ecx, %edx
> +	{nf}	blsi	%r9, %r31
> +	{nf}	blsi	291(%r8, %rax, 4), %ecx
> +	{nf}	blsi	291(%r8, %rax, 4), %r9
> +	{nf}	blsmsk	%ecx, %edx
> +	{nf}	blsmsk	%r9, %r31
> +	{nf}	blsmsk	291(%r8, %rax, 4), %ecx
> +	{nf}	blsmsk	291(%r8, %rax, 4), %r9
> +	{nf}	blsr	%ecx, %edx
> +	{nf}	blsr	%r9, %r31
> +	{nf}	blsr	291(%r8, %rax, 4), %ecx
> +	{nf}	blsr	291(%r8, %rax, 4), %r9
> +	{nf}	bzhi	%ecx, %edx, %r10d
> +	{nf}	bzhi	%ecx, 291(%r8, %rax, 4), %edx
> +	{nf}	bzhi	%r9, %r31, %r11
> +	{nf}	bzhi	%r9, 291(%r8, %rax, 4), %r31
> +	{nf}	dec	%bl
> +	{nf}	dec	%bl, %dl
> +	{nf}	dec	%dx
> +	{nf}	dec	%dx, %ax
> +	{nf}	dec	%ecx
> +	{nf}	dec	%ecx, %edx
> +	{nf}	dec	%r9
> +	{nf}	dec	%r9, %r31
> +	{nf}	decb	291(%r8, %rax, 4)
> +	{nf}	dec	291(%r8, %rax, 4), %bl
> +	{nf}	decw	291(%r8, %rax, 4)
> +	{nf}	dec	291(%r8, %rax, 4), %dx
> +	{nf}	decl	291(%r8, %rax, 4)
> +	{nf}	dec	291(%r8, %rax, 4), %ecx
> +	{nf}	decq	291(%r8, %rax, 4)
> +	{nf}	dec	291(%r8, %rax, 4), %r9
> +	{nf}	div	%bl
> +	{nf}	div	%dx
> +	{nf}	div	%ecx
> +	{nf}	div	%r9
> +	{nf}	divb	291(%r8, %rax, 4)
> +	{nf}	divw	291(%r8, %rax, 4)
> +	{nf}	divl	291(%r8, %rax, 4)
> +	{nf}	divq	291(%r8, %rax, 4)
> +	{nf}	idiv	%bl
> +	{nf}	idiv	%bl, %al
> +	{nf}	idiv	%dx
> +	{nf}	idiv	%dx, %ax
> +	{nf}	idiv	%ecx
> +	{nf}	idiv	%ecx, %eax
> +	{nf}	idiv	%r9
> +	{nf}	idiv	%r9, %rax
> +	{nf}	idivb	291(%r8, %rax, 4)
> +	{nf}	idivb	291(%r8, %rax, 4), %al
> +	{nf}	idivw	291(%r8, %rax, 4)
> +	{nf}	idivw	291(%r8, %rax, 4), %ax
> +	{nf}	idivl	291(%r8, %rax, 4)
> +	{nf}	idivl	291(%r8, %rax, 4), %eax
> +	{nf}	idivq	291(%r8, %rax, 4)
> +	{nf}	idivq	291(%r8, %rax, 4), %rax
> +	{nf}	imul	%bl
> +	{nf}	imul	%dx
> +	{nf}	imul	%dx, %ax
> +	{nf}	imul	%dx, %ax, %r9w
> +	{nf}	imul	%ecx
> +	{nf}	imul	%ecx, %edx
> +	{nf}	imul	%ecx, %edx, %r10d
> +	{nf}	imul	%r9
> +	{nf}	imul	%r9, %r31
> +	{nf}	imul	%r9, %r31, %r11
> +	{nf}	imulb	291(%r8, %rax, 4)
> +	{nf}	imulw	291(%r8, %rax, 4)
> +	{nf}	imul	291(%r8, %rax, 4), %dx
> +	{nf}	imul	291(%r8, %rax, 4), %dx, %ax
> +	{nf}	imull	291(%r8, %rax, 4)
> +	{nf}	imul	291(%r8, %rax, 4), %ecx
> +	{nf}	imul	291(%r8, %rax, 4), %ecx, %edx
> +	{nf}	imulq	291(%r8, %rax, 4)
> +	{nf}	imul	291(%r8, %rax, 4), %r9
> +	{nf}	imul	291(%r8, %rax, 4), %r9, %r31
> +	{nf}	imul	$0x7b, %dx, %ax
> +	{nf}	imul	$0x7b, %ecx, %edx
> +	{nf}	imul	$0x7b, %r9, %r15
> +	{nf}	imul	$0x7b, 291(%r8, %rax, 4), %dx
> +	{nf}	imul	$0x7b, 291(%r8, %rax, 4), %ecx
> +	{nf}	imul	$0x7b, 291(%r8, %rax, 4), %r9
> +	{nf}	imul	$0xff90, %dx, %ax
> +	{nf}	imul	$0xff90, %ecx, %edx
> +	{nf}	imul	$0xff90, %r9, %r15
> +	{nf}	imul	$0xff90, 291(%r8, %rax, 4), %dx
> +	{nf}	imul	$0xff90, 291(%r8, %rax, 4), %ecx
> +	{nf}	imul	$0xff90, 291(%r8, %rax, 4), %r9

Just to mention it here as well: The two-operand forms (immediate and
register) also want testing (and, as you had indicated, also enabling
in the opcode table).

> --- a/opcodes/i386-dis-evex.h
> +++ b/opcodes/i386-dis-evex.h
> @@ -875,19 +875,19 @@ static const struct dis386 evex_table[][256] = {
>    /* EVEX_MAP4_ */
>    {
>      /* 00 */
> -    { "addB",             { VexGb, Eb, Gb }, NO_PREFIX },
> -    { "addS",             { VexGv, Ev, Gv }, PREFIX_NP_OR_DATA },
> -    { "addB",             { VexGb, Gb, EbS }, NO_PREFIX },
> -    { "addS",             { VexGv, Gv, EvS }, PREFIX_NP_OR_DATA },
> +    { "%NFaddB",		{ VexGb, Eb, Gb }, NO_PREFIX },
> +    { "%NFaddS",		{ VexGv, Ev, Gv }, PREFIX_NP_OR_DATA },
> +    { "%NFaddB",		{ VexGb, Gb, EbS }, NO_PREFIX },
> +    { "%NFaddS",		{ VexGv, Gv, EvS }, PREFIX_NP_OR_DATA },

While I appreciate you switching from space to tab padding, as is
used elsewhere, I'm under the impression that the operand column
doesn't align properly anymore throughout ...

>      { Bad_Opcode },
>      { Bad_Opcode },
>      { Bad_Opcode },
>      { Bad_Opcode },
>      /* 08 */
> -    { "orB",		{ VexGb, Eb, Gb }, NO_PREFIX },
> -    { "orS",		{ VexGv, Ev, Gv }, PREFIX_NP_OR_DATA },
> -    { "orB",		{ VexGb, Gb, EbS }, NO_PREFIX },
> -    { "orS",		{ VexGv, Gv, EvS }, PREFIX_NP_OR_DATA },
> +    { "%NForB",		{ VexGb, Eb, Gb }, NO_PREFIX },
> +    { "%NForS",		{ VexGv, Ev, Gv }, PREFIX_NP_OR_DATA },
> +    { "%NForB",		{ VexGb, Gb, EbS }, NO_PREFIX },
> +    { "%NForS",		{ VexGv, Gv, EvS }, PREFIX_NP_OR_DATA },
>      { Bad_Opcode },
>      { Bad_Opcode },
>      { Bad_Opcode },
> @@ -911,28 +911,28 @@ static const struct dis386 evex_table[][256] = {
>      { Bad_Opcode },
>      { Bad_Opcode },
>      /* 20 */
> -    { "andB",		{ VexGb, Eb, Gb }, NO_PREFIX },
> -    { "andS",		{ VexGv, Ev, Gv }, PREFIX_NP_OR_DATA },
> -    { "andB",		{ VexGb, Gb, EbS }, NO_PREFIX },
> -    { "andS",		{ VexGv, Gv, EvS }, PREFIX_NP_OR_DATA },
> -    { "shldS",		{ VexGv, Ev, Gv, Ib }, PREFIX_NP_OR_DATA },
> +    { "%NFandB",		{ VexGb, Eb, Gb }, NO_PREFIX },
> +    { "%NFandS",		{ VexGv, Ev, Gv }, PREFIX_NP_OR_DATA },
> +    { "%NFandB",		{ VexGb, Gb, EbS }, NO_PREFIX },
> +    { "%NFandS",		{ VexGv, Gv, EvS }, PREFIX_NP_OR_DATA },
> +    { "%NFshldS",		{ VexGv, Ev, Gv, Ib }, PREFIX_NP_OR_DATA },
>      { Bad_Opcode },
>      { Bad_Opcode },
>      { Bad_Opcode },
>      /* 28 */
> -    { "subB",		{ VexGb, Eb, Gb }, NO_PREFIX },
> -    { "subS",		{ VexGv, Ev, Gv }, PREFIX_NP_OR_DATA },
> -    { "subB",		{ VexGb, Gb, EbS }, NO_PREFIX },
> -    { "subS",		{ VexGv, Gv, EvS }, PREFIX_NP_OR_DATA },
> -    { "shrdS",		{ VexGv, Ev, Gv, Ib }, PREFIX_NP_OR_DATA },
> +    { "%NFsubB",		{ VexGb, Eb, Gb }, NO_PREFIX },
> +    { "%NFsubS",		{ VexGv, Ev, Gv }, PREFIX_NP_OR_DATA },
> +    { "%NFsubB",		{ VexGb, Gb, EbS }, NO_PREFIX },
> +    { "%NFsubS",		{ VexGv, Gv, EvS }, PREFIX_NP_OR_DATA },
> +    { "%NFshrdS",		{ VexGv, Ev, Gv, Ib }, PREFIX_NP_OR_DATA },
>      { Bad_Opcode },
>      { Bad_Opcode },
>      { Bad_Opcode },
>      /* 30 */
> -    { "xorB",		{ VexGb, Eb, Gb }, NO_PREFIX },
> -    { "xorS",		{ VexGv, Ev, Gv }, PREFIX_NP_OR_DATA },
> -    { "xorB",		{ VexGb, Gb, EbS }, NO_PREFIX },
> -    { "xorS",		{ VexGv, Gv, EvS }, PREFIX_NP_OR_DATA },
> +    { "%NFxorB",		{ VexGb, Eb, Gb }, NO_PREFIX },
> +    { "%NFxorS",		{ VexGv, Ev, Gv }, PREFIX_NP_OR_DATA },
> +    { "%NFxorB",		{ VexGb, Gb, EbS }, NO_PREFIX },
> +    { "%NFxorS",		{ VexGv, Gv, EvS }, PREFIX_NP_OR_DATA },

... here.

> @@ -9652,6 +9666,9 @@ print_insn (bfd_vma pc, disassemble_info *info, int intel_syntax)
>  		    oappend (&ins, "/(bad)");
>  		}
>  	    }
> +	  /* vex.nf is cleared after being consumed.  */
> +	  if (ins.vex.nf)

Just like you have it here, ...

> +	    oappend (&ins, "{bad-nf}");
>  
>  	  /* Check whether rounding control was enabled for an insn not
>  	     supporting it, when evex.b is not treated as evex.nd.  */
> @@ -10564,6 +10581,15 @@ putop (instr_info *ins, const char *in_template, int sizeflag)
>  	    }
>  	  else if (l == 1 && last[0] == 'C')
>  	    break;
> +	  else if (l == 1 && last[0] == 'N')
> +	    {
> +	      if (ins->vex.nf == true)

... no comparison against true (or false) please.

>  // clr with 1 operand is really xor with 2 operands.
>  clr, 0x30, 0, W|Modrm|No_sSuf|RegKludge|Optimize, { Reg8|Reg16|Reg32|Reg64 }
> +clr, 0x30, APX_F, W|Modrm|No_sSuf|RegKludge|EVexMap4, { Reg8|Reg16|Reg32|Reg64 }

Shouldn't this also have NF?

Jan

  reply	other threads:[~2024-03-21 14:26 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-03-19  6:41 Cui, Lili
2024-03-21 14:26 ` Jan Beulich [this message]
2024-03-26  6:28   ` [PATCH V3] " Cui, Lili
  -- strict thread matches above, loose matches on Subject: below --
2024-02-27  9:01 [PATCH] " Cui, Lili
2024-02-28 16:11 ` H.J. Lu
2024-02-29  1:12   ` Cui, Lili
2024-02-29  6:53   ` Jan Beulich
2024-02-29  8:39     ` Cui, Lili
2024-02-29  9:06       ` Jan Beulich
2024-02-29 10:22         ` Cui, Lili
2024-02-29 12:23           ` H.J. Lu
2024-02-29 12:26             ` Cui, Lili
2024-02-29 11:21 ` Jan Beulich
2024-02-29 12:00   ` Cui, Lili
2024-02-29 12:04     ` Jan Beulich
2024-02-29 12:41       ` Cui, Lili
2024-02-29 13:17         ` Jan Beulich
2024-02-29 13:47           ` Cui, Lili
2024-02-29 14:12             ` Jan Beulich
2024-03-01  3:23               ` Cui, Lili
2024-03-01  6:56                 ` Jan Beulich
2024-03-01  8:01                   ` Cui, Lili
2024-03-01 11:36   ` Cui, Lili
2024-03-01 11:49     ` Jan Beulich
2024-03-01  7:04 ` Jan Beulich
2024-03-01 11:50   ` Cui, Lili

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=0fd9e98e-907a-4569-ad21-4cf9e4a81673@suse.com \
    --to=jbeulich@suse.com \
    --cc=binutils@sourceware.org \
    --cc=hjl.tools@gmail.com \
    --cc=lili.cui@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).