From: Jan Beulich <jbeulich@suse.com>
To: "Cui, Lili" <lili.cui@intel.com>
Cc: hjl.tools@gmail.com, binutils@sourceware.org
Subject: Re: [PATCH] Support APX NF
Date: Thu, 21 Mar 2024 15:26:25 +0100 [thread overview]
Message-ID: <0fd9e98e-907a-4569-ad21-4cf9e4a81673@suse.com> (raw)
In-Reply-To: <20240319064109.1530126-1-lili.cui@intel.com>
On 19.03.2024 07:41, Cui, Lili wrote:
> --- a/gas/testsuite/gas/i386/x86-64-apx-evex-promoted-bad.s
> +++ b/gas/testsuite/gas/i386/x86-64-apx-evex-promoted-bad.s
> @@ -23,7 +23,7 @@ _start:
> .insn EVEX.L1.66.M12.W0 0x60, %di, %ax
>
> #EVEX_MAP4 movbe %r18w,%ax set EVEX.z == 0b1.
> - .insn EVEX.L0.66.M12.W0 0x60, %di, %ax {%k7}{z}
> + .insn EVEX.L0.66.M12.W0 0x60, %di, %ax {%k3}{z}
>
> #EVEX from VEX bzhi %rax,(%rax,%rbx),%rcx EVEX.aaa[1:0] (P[17:16])
> #== 0b01
> @@ -33,17 +33,23 @@ _start:
> .insn EVEX.L1.NP.0f38.W1 0xf5, %rax, (%rax,%rbx), %rcx
>
> #EVEX from VEX bzhi %rax,(%rax,%rbx),%rcx EVEX.P[23](EVEX.z) == 0b1
> - .insn EVEX.L0.NP.0f38.W1 0xf5, %rax, (%rax,%rbx), %rcx {%k7}{z}
> + .insn EVEX.L0.NP.0f38.W1 0xf5, %rax, (%rax,%rbx), %rcx {%k3}{z}
>
> #EVEX from VEX bzhi %rax,(%rax,%rbx),%rcx EVEX.P[20](EVEX.b) == 0b1
> .insn EVEX.L0.NP.0f38.W1 0xf5, %rax, (%rax,%rbx){1to8}, %rcx
>
> #{evex} inc %rax %rbx EVEX.vvvv != 1111 && EVEX.ND = 0.
> .byte 0x62, 0xf4, 0xe4, 0x08, 0xff, 0x04, 0x08
> +
> # pop2 %rax, %r8 set EVEX.ND=0.
> .byte 0x62, 0xf4, 0x3c, 0x08, 0x8f, 0xc0
> .byte 0xff, 0xff, 0xff
> +
> # pop2 %rax, %r8 set EVEX.vvvv = 1111.
> .insn EVEX.L0.M4.W0 0x8f, %rax, {rn-sae},%r8
> - # pop2 %r8, %r8.
> - .byte 0x62, 0xd4, 0x3c, 0x18, 0x8f, 0xc0
This was already replaced, and not ...
> + # pop2 %r11, %r11.
> + .byte 0x62, 0xd4, 0x24, 0x18, 0x8f, 0xc3
... by byte. You want to rebase.
> --- /dev/null
> +++ b/gas/testsuite/gas/i386/x86-64-apx-nf.s
> @@ -0,0 +1,1374 @@
> +# Check 64bit APX_F instructions
> +
> + .text
> +_start:
> + {nf} add $123, %bl
> + {nf} add $123, %bl, %dl
> + {nf} add $123, %dx
> + {nf} add $123, %dx, %ax
> + {nf} add $123, %ecx
> + {nf} add $123, %ecx, %edx
> + {nf} add $123, %r9
> + {nf} add $123, %r9, %r31
> + {nf} addb $123, 291(%r8, %rax, 4)
> + {nf} add $123, 291(%r8, %rax, 4), %bl
> + {nf} addw $123, 291(%r8, %rax, 4)
> + {nf} add $123, 291(%r8, %rax, 4), %dx
> + {nf} addl $123, 291(%r8, %rax, 4)
> + {nf} add $123, 291(%r8, %rax, 4), %ecx
> + {nf} addq $123, 291(%r8, %rax, 4)
> + {nf} add $123, 291(%r8, %rax, 4), %r9
> + {nf} add %bl, %dl
> + {nf} add %bl, %dl, %r8b
> + {nf} add %bl, 291(%r8, %rax, 4)
> + {nf} add %bl, 291(%r8, %rax, 4), %dl
> + {nf} add %dx, %ax
> + {nf} add %dx, %ax, %r9w
> + {nf} add %dx, 291(%r8, %rax, 4)
> + {nf} add %dx, 291(%r8, %rax, 4), %ax
> + {nf} add %ecx, %edx
> + {nf} add %ecx, %edx, %r10d
> + {nf} add %ecx, 291(%r8, %rax, 4)
> + {nf} add %ecx, 291(%r8, %rax, 4), %edx
> + {nf} add %r9, %r31
> + {nf} add %r9, %r31, %r11
> + {nf} add %r9, 291(%r8, %rax, 4)
> + {nf} add %r9, 291(%r8, %rax, 4), %r31
> + {nf} add 291(%r8, %rax, 4), %bl
> + {nf} add 291(%r8, %rax, 4), %bl, %dl
> + {nf} add 291(%r8, %rax, 4), %dx
> + {nf} add 291(%r8, %rax, 4), %dx, %ax
> + {nf} add 291(%r8, %rax, 4), %ecx
> + {nf} add 291(%r8, %rax, 4), %ecx, %edx
> + {nf} add 291(%r8, %rax, 4), %r9
> + {nf} add 291(%r8, %rax, 4), %r9, %r31
> + {nf} and $123, %bl
> + {nf} and $123, %bl, %dl
> + {nf} and $123, %dx
> + {nf} and $123, %dx, %ax
> + {nf} and $123, %ecx
> + {nf} and $123, %ecx, %edx
> + {nf} and $123, %r9
> + {nf} and $123, %r9, %r31
> + {nf} andb $123, 291(%r8, %rax, 4)
> + {nf} and $123, 291(%r8, %rax, 4), %bl
> + {nf} andw $123, 291(%r8, %rax, 4)
> + {nf} and $123, 291(%r8, %rax, 4), %dx
> + {nf} andl $123, 291(%r8, %rax, 4)
> + {nf} and $123, 291(%r8, %rax, 4), %ecx
> + {nf} andq $123, 291(%r8, %rax, 4)
> + {nf} and $123, 291(%r8, %rax, 4), %r9
> + {nf} and %bl, %dl
> + {nf} and %bl, %dl, %r8b
> + {nf} and %bl, 291(%r8, %rax, 4)
> + {nf} and %bl, 291(%r8, %rax, 4), %dl
> + {nf} and %dx, %ax
> + {nf} and %dx, %ax, %r9w
> + {nf} and %dx, 291(%r8, %rax, 4)
> + {nf} and %dx, 291(%r8, %rax, 4), %ax
> + {nf} and %ecx, %edx
> + {nf} and %ecx, %edx, %r10d
> + {nf} and %ecx, 291(%r8, %rax, 4)
> + {nf} and %ecx, 291(%r8, %rax, 4), %edx
> + {nf} and %r9, %r31
> + {nf} and %r9, %r31, %r11
> + {nf} and %r9, 291(%r8, %rax, 4)
> + {nf} and %r9, 291(%r8, %rax, 4), %r31
> + {nf} and 291(%r8, %rax, 4), %bl
> + {nf} and 291(%r8, %rax, 4), %bl, %dl
> + {nf} and 291(%r8, %rax, 4), %dx
> + {nf} and 291(%r8, %rax, 4), %dx, %ax
> + {nf} and 291(%r8, %rax, 4), %ecx
> + {nf} and 291(%r8, %rax, 4), %ecx, %edx
> + {nf} and 291(%r8, %rax, 4), %r9
> + {nf} and 291(%r8, %rax, 4), %r9, %r31
> + {nf} andn %ecx, %edx, %r10d
> + {nf} andn %r9, %r31, %r11
> + {nf} andn 291(%r8, %rax, 4), %ecx, %edx
> + {nf} andn 291(%r8, %rax, 4), %r9, %r31
> + {nf} bextr %ecx, %edx, %r10d
> + {nf} bextr %ecx, 291(%r8, %rax, 4), %edx
> + {nf} bextr %r9, %r31, %r11
> + {nf} bextr %r9, 291(%r8, %rax, 4), %r31
> + {nf} blsi %ecx, %edx
> + {nf} blsi %r9, %r31
> + {nf} blsi 291(%r8, %rax, 4), %ecx
> + {nf} blsi 291(%r8, %rax, 4), %r9
> + {nf} blsmsk %ecx, %edx
> + {nf} blsmsk %r9, %r31
> + {nf} blsmsk 291(%r8, %rax, 4), %ecx
> + {nf} blsmsk 291(%r8, %rax, 4), %r9
> + {nf} blsr %ecx, %edx
> + {nf} blsr %r9, %r31
> + {nf} blsr 291(%r8, %rax, 4), %ecx
> + {nf} blsr 291(%r8, %rax, 4), %r9
> + {nf} bzhi %ecx, %edx, %r10d
> + {nf} bzhi %ecx, 291(%r8, %rax, 4), %edx
> + {nf} bzhi %r9, %r31, %r11
> + {nf} bzhi %r9, 291(%r8, %rax, 4), %r31
> + {nf} dec %bl
> + {nf} dec %bl, %dl
> + {nf} dec %dx
> + {nf} dec %dx, %ax
> + {nf} dec %ecx
> + {nf} dec %ecx, %edx
> + {nf} dec %r9
> + {nf} dec %r9, %r31
> + {nf} decb 291(%r8, %rax, 4)
> + {nf} dec 291(%r8, %rax, 4), %bl
> + {nf} decw 291(%r8, %rax, 4)
> + {nf} dec 291(%r8, %rax, 4), %dx
> + {nf} decl 291(%r8, %rax, 4)
> + {nf} dec 291(%r8, %rax, 4), %ecx
> + {nf} decq 291(%r8, %rax, 4)
> + {nf} dec 291(%r8, %rax, 4), %r9
> + {nf} div %bl
> + {nf} div %dx
> + {nf} div %ecx
> + {nf} div %r9
> + {nf} divb 291(%r8, %rax, 4)
> + {nf} divw 291(%r8, %rax, 4)
> + {nf} divl 291(%r8, %rax, 4)
> + {nf} divq 291(%r8, %rax, 4)
> + {nf} idiv %bl
> + {nf} idiv %bl, %al
> + {nf} idiv %dx
> + {nf} idiv %dx, %ax
> + {nf} idiv %ecx
> + {nf} idiv %ecx, %eax
> + {nf} idiv %r9
> + {nf} idiv %r9, %rax
> + {nf} idivb 291(%r8, %rax, 4)
> + {nf} idivb 291(%r8, %rax, 4), %al
> + {nf} idivw 291(%r8, %rax, 4)
> + {nf} idivw 291(%r8, %rax, 4), %ax
> + {nf} idivl 291(%r8, %rax, 4)
> + {nf} idivl 291(%r8, %rax, 4), %eax
> + {nf} idivq 291(%r8, %rax, 4)
> + {nf} idivq 291(%r8, %rax, 4), %rax
> + {nf} imul %bl
> + {nf} imul %dx
> + {nf} imul %dx, %ax
> + {nf} imul %dx, %ax, %r9w
> + {nf} imul %ecx
> + {nf} imul %ecx, %edx
> + {nf} imul %ecx, %edx, %r10d
> + {nf} imul %r9
> + {nf} imul %r9, %r31
> + {nf} imul %r9, %r31, %r11
> + {nf} imulb 291(%r8, %rax, 4)
> + {nf} imulw 291(%r8, %rax, 4)
> + {nf} imul 291(%r8, %rax, 4), %dx
> + {nf} imul 291(%r8, %rax, 4), %dx, %ax
> + {nf} imull 291(%r8, %rax, 4)
> + {nf} imul 291(%r8, %rax, 4), %ecx
> + {nf} imul 291(%r8, %rax, 4), %ecx, %edx
> + {nf} imulq 291(%r8, %rax, 4)
> + {nf} imul 291(%r8, %rax, 4), %r9
> + {nf} imul 291(%r8, %rax, 4), %r9, %r31
> + {nf} imul $0x7b, %dx, %ax
> + {nf} imul $0x7b, %ecx, %edx
> + {nf} imul $0x7b, %r9, %r15
> + {nf} imul $0x7b, 291(%r8, %rax, 4), %dx
> + {nf} imul $0x7b, 291(%r8, %rax, 4), %ecx
> + {nf} imul $0x7b, 291(%r8, %rax, 4), %r9
> + {nf} imul $0xff90, %dx, %ax
> + {nf} imul $0xff90, %ecx, %edx
> + {nf} imul $0xff90, %r9, %r15
> + {nf} imul $0xff90, 291(%r8, %rax, 4), %dx
> + {nf} imul $0xff90, 291(%r8, %rax, 4), %ecx
> + {nf} imul $0xff90, 291(%r8, %rax, 4), %r9
Just to mention it here as well: The two-operand forms (immediate and
register) also want testing (and, as you had indicated, also enabling
in the opcode table).
> --- a/opcodes/i386-dis-evex.h
> +++ b/opcodes/i386-dis-evex.h
> @@ -875,19 +875,19 @@ static const struct dis386 evex_table[][256] = {
> /* EVEX_MAP4_ */
> {
> /* 00 */
> - { "addB", { VexGb, Eb, Gb }, NO_PREFIX },
> - { "addS", { VexGv, Ev, Gv }, PREFIX_NP_OR_DATA },
> - { "addB", { VexGb, Gb, EbS }, NO_PREFIX },
> - { "addS", { VexGv, Gv, EvS }, PREFIX_NP_OR_DATA },
> + { "%NFaddB", { VexGb, Eb, Gb }, NO_PREFIX },
> + { "%NFaddS", { VexGv, Ev, Gv }, PREFIX_NP_OR_DATA },
> + { "%NFaddB", { VexGb, Gb, EbS }, NO_PREFIX },
> + { "%NFaddS", { VexGv, Gv, EvS }, PREFIX_NP_OR_DATA },
While I appreciate you switching from space to tab padding, as is
used elsewhere, I'm under the impression that the operand column
doesn't align properly anymore throughout ...
> { Bad_Opcode },
> { Bad_Opcode },
> { Bad_Opcode },
> { Bad_Opcode },
> /* 08 */
> - { "orB", { VexGb, Eb, Gb }, NO_PREFIX },
> - { "orS", { VexGv, Ev, Gv }, PREFIX_NP_OR_DATA },
> - { "orB", { VexGb, Gb, EbS }, NO_PREFIX },
> - { "orS", { VexGv, Gv, EvS }, PREFIX_NP_OR_DATA },
> + { "%NForB", { VexGb, Eb, Gb }, NO_PREFIX },
> + { "%NForS", { VexGv, Ev, Gv }, PREFIX_NP_OR_DATA },
> + { "%NForB", { VexGb, Gb, EbS }, NO_PREFIX },
> + { "%NForS", { VexGv, Gv, EvS }, PREFIX_NP_OR_DATA },
> { Bad_Opcode },
> { Bad_Opcode },
> { Bad_Opcode },
> @@ -911,28 +911,28 @@ static const struct dis386 evex_table[][256] = {
> { Bad_Opcode },
> { Bad_Opcode },
> /* 20 */
> - { "andB", { VexGb, Eb, Gb }, NO_PREFIX },
> - { "andS", { VexGv, Ev, Gv }, PREFIX_NP_OR_DATA },
> - { "andB", { VexGb, Gb, EbS }, NO_PREFIX },
> - { "andS", { VexGv, Gv, EvS }, PREFIX_NP_OR_DATA },
> - { "shldS", { VexGv, Ev, Gv, Ib }, PREFIX_NP_OR_DATA },
> + { "%NFandB", { VexGb, Eb, Gb }, NO_PREFIX },
> + { "%NFandS", { VexGv, Ev, Gv }, PREFIX_NP_OR_DATA },
> + { "%NFandB", { VexGb, Gb, EbS }, NO_PREFIX },
> + { "%NFandS", { VexGv, Gv, EvS }, PREFIX_NP_OR_DATA },
> + { "%NFshldS", { VexGv, Ev, Gv, Ib }, PREFIX_NP_OR_DATA },
> { Bad_Opcode },
> { Bad_Opcode },
> { Bad_Opcode },
> /* 28 */
> - { "subB", { VexGb, Eb, Gb }, NO_PREFIX },
> - { "subS", { VexGv, Ev, Gv }, PREFIX_NP_OR_DATA },
> - { "subB", { VexGb, Gb, EbS }, NO_PREFIX },
> - { "subS", { VexGv, Gv, EvS }, PREFIX_NP_OR_DATA },
> - { "shrdS", { VexGv, Ev, Gv, Ib }, PREFIX_NP_OR_DATA },
> + { "%NFsubB", { VexGb, Eb, Gb }, NO_PREFIX },
> + { "%NFsubS", { VexGv, Ev, Gv }, PREFIX_NP_OR_DATA },
> + { "%NFsubB", { VexGb, Gb, EbS }, NO_PREFIX },
> + { "%NFsubS", { VexGv, Gv, EvS }, PREFIX_NP_OR_DATA },
> + { "%NFshrdS", { VexGv, Ev, Gv, Ib }, PREFIX_NP_OR_DATA },
> { Bad_Opcode },
> { Bad_Opcode },
> { Bad_Opcode },
> /* 30 */
> - { "xorB", { VexGb, Eb, Gb }, NO_PREFIX },
> - { "xorS", { VexGv, Ev, Gv }, PREFIX_NP_OR_DATA },
> - { "xorB", { VexGb, Gb, EbS }, NO_PREFIX },
> - { "xorS", { VexGv, Gv, EvS }, PREFIX_NP_OR_DATA },
> + { "%NFxorB", { VexGb, Eb, Gb }, NO_PREFIX },
> + { "%NFxorS", { VexGv, Ev, Gv }, PREFIX_NP_OR_DATA },
> + { "%NFxorB", { VexGb, Gb, EbS }, NO_PREFIX },
> + { "%NFxorS", { VexGv, Gv, EvS }, PREFIX_NP_OR_DATA },
... here.
> @@ -9652,6 +9666,9 @@ print_insn (bfd_vma pc, disassemble_info *info, int intel_syntax)
> oappend (&ins, "/(bad)");
> }
> }
> + /* vex.nf is cleared after being consumed. */
> + if (ins.vex.nf)
Just like you have it here, ...
> + oappend (&ins, "{bad-nf}");
>
> /* Check whether rounding control was enabled for an insn not
> supporting it, when evex.b is not treated as evex.nd. */
> @@ -10564,6 +10581,15 @@ putop (instr_info *ins, const char *in_template, int sizeflag)
> }
> else if (l == 1 && last[0] == 'C')
> break;
> + else if (l == 1 && last[0] == 'N')
> + {
> + if (ins->vex.nf == true)
... no comparison against true (or false) please.
> // clr with 1 operand is really xor with 2 operands.
> clr, 0x30, 0, W|Modrm|No_sSuf|RegKludge|Optimize, { Reg8|Reg16|Reg32|Reg64 }
> +clr, 0x30, APX_F, W|Modrm|No_sSuf|RegKludge|EVexMap4, { Reg8|Reg16|Reg32|Reg64 }
Shouldn't this also have NF?
Jan
next prev parent reply other threads:[~2024-03-21 14:26 UTC|newest]
Thread overview: 26+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-03-19 6:41 Cui, Lili
2024-03-21 14:26 ` Jan Beulich [this message]
2024-03-26 6:28 ` [PATCH V3] " Cui, Lili
-- strict thread matches above, loose matches on Subject: below --
2024-02-27 9:01 [PATCH] " Cui, Lili
2024-02-28 16:11 ` H.J. Lu
2024-02-29 1:12 ` Cui, Lili
2024-02-29 6:53 ` Jan Beulich
2024-02-29 8:39 ` Cui, Lili
2024-02-29 9:06 ` Jan Beulich
2024-02-29 10:22 ` Cui, Lili
2024-02-29 12:23 ` H.J. Lu
2024-02-29 12:26 ` Cui, Lili
2024-02-29 11:21 ` Jan Beulich
2024-02-29 12:00 ` Cui, Lili
2024-02-29 12:04 ` Jan Beulich
2024-02-29 12:41 ` Cui, Lili
2024-02-29 13:17 ` Jan Beulich
2024-02-29 13:47 ` Cui, Lili
2024-02-29 14:12 ` Jan Beulich
2024-03-01 3:23 ` Cui, Lili
2024-03-01 6:56 ` Jan Beulich
2024-03-01 8:01 ` Cui, Lili
2024-03-01 11:36 ` Cui, Lili
2024-03-01 11:49 ` Jan Beulich
2024-03-01 7:04 ` Jan Beulich
2024-03-01 11:50 ` Cui, Lili
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=0fd9e98e-907a-4569-ad21-4cf9e4a81673@suse.com \
--to=jbeulich@suse.com \
--cc=binutils@sourceware.org \
--cc=hjl.tools@gmail.com \
--cc=lili.cui@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).