public inbox for binutils@sourceware.org
 help / color / mirror / Atom feed
* [PATCH] x86: fold certain VCVT{,U}SI2S{S,D} templates
@ 2020-02-14 13:41 Jan Beulich
  2020-02-14 13:50 ` H.J. Lu
  0 siblings, 1 reply; 2+ messages in thread
From: Jan Beulich @ 2020-02-14 13:41 UTC (permalink / raw)
  To: binutils; +Cc: H.J. Lu

There don't really need to be separate Cpu64 and CpuNo64 templates for
these. One small issue with this is that slightly strange code

	.intel_syntax noprefix
	.code16
	.arch i286
	.arch .avx
	vcvtsi2sd xmm0, xmm0, dword ptr [bx]
	vcvtsi2sd xmm0, xmm0, qword ptr [bx]

	vcvtsi2sd xmm0, xmm0, ebx
	vcvtsi2sd xmm0, xmm0, rbx

now will match in behavior with the AVX512 counterparts in that not
only the 2nd vcvtsi2sd won't assemble, but also the first. The last
two, otoh, will continue to assemble fine (due to the lack of any
memory operand size specifier). As a result, another way to make
things behave more consistently would be to avoid the folding and
add IgnoreSize to the CpuNo64 AVX512 variants. A 3rd way to do so
would be to add Cpu386 to any such insn template.

While doing this also make the usual cosmetic adjustments for the
insns touched anyway. Additionally drop the redundant Cpu64 from
the SAE forms of VCVT{,U}SI2SD - they won't assemble outside of
64-bit mode due to there not being anything to match the Reg64
operand.

opcodes/
2020-02-XX  Jan Beulich <jbeulich@suse.com>

	* i386-opc.tbl (vcvtsi2sd, vcvtsi2ss, vcvtusi2sd, vcvtusi2ss):
	Fold CpuNo64 and Cpu64 templates. Use VexLIG/EVexLIG and VexW0/
	VexW1 instead of open-coding them.
	* i386-tbl.h: Re-generate.

--- a/opcodes/i386-opc.tbl
+++ b/opcodes/i386-opc.tbl
@@ -2025,10 +2025,8 @@ vcvtps2pd, 2, 0x5a, None, 1, CpuAVX, Mod
 vcvtps2pd, 2, 0x5a, None, 1, CpuAVX, Modrm|Vex=2|VexOpcode=0|VexWIG|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Xmmword|Unspecified|BaseIndex, RegYMM }
 vcvtsd2si, 2, 0xf22d, None, 1, CpuAVX, Modrm|Vex=3|VexOpcode=0|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|No_ldSuf|ToDword, { Qword|Unspecified|BaseIndex|RegXMM, Reg32|Reg64 }
 vcvtsd2ss, 3, 0xf25a, None, 1, CpuAVX, Modrm|Vex=3|VexOpcode=0|VexVVVV=1|VexWIG|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM }
-vcvtsi2sd, 3, 0xf22a, None, 1, CpuAVX|CpuNo64, Modrm|Vex=3|VexOpcode=0|VexVVVV=1|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|No_qSuf|No_ldSuf, { Reg32|Unspecified|BaseIndex, RegXMM, RegXMM }
-vcvtsi2sd, 3, 0xf22a, None, 1, CpuAVX|Cpu64, Modrm|Vex=3|VexOpcode=0|VexVVVV=1|No_bSuf|No_wSuf|No_sSuf|No_ldSuf, { Reg32|Reg64|Dword|Qword|Unspecified|BaseIndex, RegXMM, RegXMM }
-vcvtsi2ss, 3, 0xf32a, None, 1, CpuAVX|CpuNo64, Modrm|Vex=3|VexOpcode=0|VexVVVV=1|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|No_qSuf|No_ldSuf, { Reg32|Unspecified|BaseIndex, RegXMM, RegXMM }
-vcvtsi2ss, 3, 0xf32a, None, 1, CpuAVX|Cpu64, Modrm|Vex=3|VexOpcode=0|VexVVVV=1|No_bSuf|No_wSuf|No_sSuf|No_ldSuf, { Reg32|Reg64|Dword|Qword|Unspecified|BaseIndex, RegXMM, RegXMM }
+vcvtsi2sd, 3, 0xf22a, None, 1, CpuAVX, Modrm|VexLIG|VexOpcode=0|VexVVVV|No_bSuf|No_wSuf|No_sSuf|No_ldSuf, { Reg32|Reg64|Unspecified|BaseIndex, RegXMM, RegXMM }
+vcvtsi2ss, 3, 0xf32a, None, 1, CpuAVX, Modrm|VexLIG|VexOpcode=0|VexVVVV|No_bSuf|No_wSuf|No_sSuf|No_ldSuf, { Reg32|Reg64|Unspecified|BaseIndex, RegXMM, RegXMM }
 vcvtss2sd, 3, 0xf35a, None, 1, CpuAVX, Modrm|Vex=3|VexOpcode=0|VexVVVV=1|VexWIG|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM }
 vcvtss2si, 2, 0xf32d, None, 1, CpuAVX, Modrm|Vex=3|VexOpcode=0|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|No_ldSuf|ToQword, { Dword|Unspecified|BaseIndex|RegXMM, Reg32|Reg64 }
 vcvttpd2dq, 2, 0x66e6, None, 1, CpuAVX, Modrm|Vex|VexOpcode=0|VexWIG|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM|RegYMM|BaseIndex, RegXMM }
@@ -3471,23 +3469,19 @@ vcvtsd2usi, 3, 0xF279, None, 1, CpuAVX51
 vcvtsd2ss, 3, 0xF25A, None, 1, CpuAVX512F, Modrm|EVex=4|Masking=3|VexOpcode=0|VexVVVV=1|VexW=2|Disp8MemShift=3|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM|Qword|Unspecified|BaseIndex, RegXMM, RegXMM }
 vcvtsd2ss, 4, 0xF25A, None, 1, CpuAVX512F, Modrm|EVex=4|Masking=3|VexOpcode=0|VexVVVV=1|VexW=2|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|StaticRounding|SAE, { Imm8, RegXMM, RegXMM, RegXMM }
 
-vcvtsi2sd, 3, 0xF22A, None, 1, CpuAVX512F|CpuNo64, Modrm|EVex=4|VexOpcode=0|VexVVVV=1|Disp8MemShift=2|No_bSuf|No_wSuf|No_sSuf|No_qSuf|No_ldSuf, { Reg32|Unspecified|BaseIndex, RegXMM, RegXMM }
-vcvtsi2sd, 3, 0xF22A, None, 1, CpuAVX512F|Cpu64, Modrm|EVex=4|VexOpcode=0|VexVVVV=1|Disp8ShiftVL|No_bSuf|No_wSuf|No_sSuf|No_ldSuf, { Reg32|Reg64|Unspecified|BaseIndex, RegXMM, RegXMM }
-vcvtsi2sd, 4, 0xF22A, None, 1, CpuAVX512F|Cpu64, Modrm|EVex=4|VexOpcode=0|VexVVVV=1|VexW=2|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_ldSuf|StaticRounding|SAE, { Reg64, Imm8, RegXMM, RegXMM }
-vcvtsi2sd, 4, 0xF22A, None, 1, CpuAVX512F|Cpu64, Modrm|EVex=4|VexOpcode=0|VexVVVV=1|VexW=2|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_ldSuf|StaticRounding|SAE|IntelSyntax, { Imm8, Reg64, RegXMM, RegXMM }
-vcvtusi2sd, 3, 0xF27B, None, 1, CpuAVX512F|CpuNo64, Modrm|EVex=4|VexOpcode=0|VexVVVV=1|Disp8MemShift=2|No_bSuf|No_wSuf|No_sSuf|No_qSuf|No_ldSuf, { Reg32|Unspecified|BaseIndex, RegXMM, RegXMM }
-vcvtusi2sd, 3, 0xF27B, None, 1, CpuAVX512F|Cpu64, Modrm|EVex=4|VexOpcode=0|VexVVVV=1|Disp8ShiftVL|No_bSuf|No_wSuf|No_sSuf|No_ldSuf, { Reg32|Reg64|Unspecified|BaseIndex, RegXMM, RegXMM }
-vcvtusi2sd, 4, 0xF27B, None, 1, CpuAVX512F|Cpu64, Modrm|EVex=4|VexOpcode=0|VexVVVV=1|VexW=2|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_ldSuf|StaticRounding|SAE, { Reg64, Imm8, RegXMM, RegXMM }
-vcvtusi2sd, 4, 0xF27B, None, 1, CpuAVX512F|Cpu64, Modrm|EVex=4|VexOpcode=0|VexVVVV=1|VexW=2|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_ldSuf|StaticRounding|SAE|IntelSyntax, { Imm8, Reg64, RegXMM, RegXMM }
-
-vcvtsi2ss, 3, 0xF32A, None, 1, CpuAVX512F|CpuNo64, Modrm|EVex=4|VexOpcode=0|VexVVVV=1|Disp8MemShift=2|No_bSuf|No_wSuf|No_sSuf|No_qSuf|No_ldSuf, { Reg32|Unspecified|BaseIndex, RegXMM, RegXMM }
-vcvtsi2ss, 3, 0xF32A, None, 1, CpuAVX512F|Cpu64, Modrm|EVex=4|VexOpcode=0|VexVVVV=1|Disp8ShiftVL|No_bSuf|No_wSuf|No_sSuf|No_ldSuf, { Reg32|Reg64|Unspecified|BaseIndex, RegXMM, RegXMM }
-vcvtsi2ss, 4, 0xF32A, None, 1, CpuAVX512F, Modrm|EVex=4|VexOpcode=0|VexVVVV=1|No_bSuf|No_wSuf|No_sSuf|No_ldSuf|StaticRounding|SAE, { Reg32|Reg64, Imm8, RegXMM, RegXMM }
-vcvtsi2ss, 4, 0xF32A, None, 1, CpuAVX512F, Modrm|EVex=4|VexOpcode=0|VexVVVV=1|No_bSuf|No_wSuf|No_sSuf|No_ldSuf|StaticRounding|SAE|IntelSyntax, { Imm8, Reg32|Reg64, RegXMM, RegXMM }
-vcvtusi2ss, 3, 0xF37B, None, 1, CpuAVX512F|CpuNo64, Modrm|EVex=4|VexOpcode=0|VexVVVV=1|Disp8MemShift=2|No_bSuf|No_wSuf|No_sSuf|No_qSuf|No_ldSuf, { Reg32|Unspecified|BaseIndex, RegXMM, RegXMM }
-vcvtusi2ss, 3, 0xF37B, None, 1, CpuAVX512F|Cpu64, Modrm|EVex=4|VexOpcode=0|VexVVVV=1|Disp8ShiftVL|No_bSuf|No_wSuf|No_sSuf|No_ldSuf, { Reg32|Reg64|Unspecified|BaseIndex, RegXMM, RegXMM }
-vcvtusi2ss, 4, 0xF37B, None, 1, CpuAVX512F, Modrm|EVex=4|VexOpcode=0|VexVVVV=1|No_bSuf|No_wSuf|No_sSuf|No_ldSuf|StaticRounding|SAE, { Reg32|Reg64, Imm8, RegXMM, RegXMM }
-vcvtusi2ss, 4, 0xF37B, None, 1, CpuAVX512F, Modrm|EVex=4|VexOpcode=0|VexVVVV=1|No_bSuf|No_wSuf|No_sSuf|No_ldSuf|StaticRounding|SAE|IntelSyntax, { Imm8, Reg32|Reg64, RegXMM, RegXMM }
+vcvtsi2sd, 3, 0xF22A, None, 1, CpuAVX512F, Modrm|EVexLIG|VexOpcode=0|VexVVVV|Disp8ShiftVL|No_bSuf|No_wSuf|No_sSuf|No_ldSuf, { Reg32|Reg64|Unspecified|BaseIndex, RegXMM, RegXMM }
+vcvtsi2sd, 4, 0xF22A, None, 1, CpuAVX512F, Modrm|EVexLIG|VexOpcode=0|VexVVVV|VexW1|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_ldSuf|StaticRounding|SAE, { Reg64, Imm8, RegXMM, RegXMM }
+vcvtsi2sd, 4, 0xF22A, None, 1, CpuAVX512F, Modrm|EVexLIG|VexOpcode=0|VexVVVV|VexW1|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_ldSuf|StaticRounding|SAE|IntelSyntax, { Imm8, Reg64, RegXMM, RegXMM }
+vcvtusi2sd, 3, 0xF27B, None, 1, CpuAVX512F, Modrm|EVexLIG|VexOpcode=0|VexVVVV|Disp8ShiftVL|No_bSuf|No_wSuf|No_sSuf|No_ldSuf, { Reg32|Reg64|Unspecified|BaseIndex, RegXMM, RegXMM }
+vcvtusi2sd, 4, 0xF27B, None, 1, CpuAVX512F, Modrm|EVexLIG|VexOpcode=0|VexVVVV|VexW1|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_ldSuf|StaticRounding|SAE, { Reg64, Imm8, RegXMM, RegXMM }
+vcvtusi2sd, 4, 0xF27B, None, 1, CpuAVX512F, Modrm|EVexLIG|VexOpcode=0|VexVVVV|VexW1|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_ldSuf|StaticRounding|SAE|IntelSyntax, { Imm8, Reg64, RegXMM, RegXMM }
+
+vcvtsi2ss, 3, 0xF32A, None, 1, CpuAVX512F, Modrm|EVexLIG|VexOpcode=0|VexVVVV|Disp8ShiftVL|No_bSuf|No_wSuf|No_sSuf|No_ldSuf, { Reg32|Reg64|Unspecified|BaseIndex, RegXMM, RegXMM }
+vcvtsi2ss, 4, 0xF32A, None, 1, CpuAVX512F, Modrm|EVexLIG|VexOpcode=0|VexVVVV|No_bSuf|No_wSuf|No_sSuf|No_ldSuf|StaticRounding|SAE, { Reg32|Reg64, Imm8, RegXMM, RegXMM }
+vcvtsi2ss, 4, 0xF32A, None, 1, CpuAVX512F, Modrm|EVexLIG|VexOpcode=0|VexVVVV|No_bSuf|No_wSuf|No_sSuf|No_ldSuf|StaticRounding|SAE|IntelSyntax, { Imm8, Reg32|Reg64, RegXMM, RegXMM }
+vcvtusi2ss, 3, 0xF37B, None, 1, CpuAVX512F, Modrm|EVexLIG|VexOpcode=0|VexVVVV|Disp8ShiftVL|No_bSuf|No_wSuf|No_sSuf|No_ldSuf, { Reg32|Reg64|Unspecified|BaseIndex, RegXMM, RegXMM }
+vcvtusi2ss, 4, 0xF37B, None, 1, CpuAVX512F, Modrm|EVexLIG|VexOpcode=0|VexVVVV|No_bSuf|No_wSuf|No_sSuf|No_ldSuf|StaticRounding|SAE, { Reg32|Reg64, Imm8, RegXMM, RegXMM }
+vcvtusi2ss, 4, 0xF37B, None, 1, CpuAVX512F, Modrm|EVexLIG|VexOpcode=0|VexVVVV|No_bSuf|No_wSuf|No_sSuf|No_ldSuf|StaticRounding|SAE|IntelSyntax, { Imm8, Reg32|Reg64, RegXMM, RegXMM }
 
 vcvtss2sd, 3, 0xF35A, None, 1, CpuAVX512F, Modrm|EVex=4|Masking=3|VexOpcode=0|VexVVVV=1|VexW=1|Disp8MemShift=2|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM|Dword|Unspecified|BaseIndex, RegXMM, RegXMM }
 vcvtss2sd, 4, 0xF35A, None, 1, CpuAVX512F, Modrm|EVex=4|Masking=3|VexOpcode=0|VexVVVV=1|VexW=1|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|SAE, { Imm8, RegXMM, RegXMM, RegXMM }

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH] x86: fold certain VCVT{,U}SI2S{S,D} templates
  2020-02-14 13:41 [PATCH] x86: fold certain VCVT{,U}SI2S{S,D} templates Jan Beulich
@ 2020-02-14 13:50 ` H.J. Lu
  0 siblings, 0 replies; 2+ messages in thread
From: H.J. Lu @ 2020-02-14 13:50 UTC (permalink / raw)
  To: Jan Beulich; +Cc: binutils

On Fri, Feb 14, 2020 at 5:41 AM Jan Beulich <jbeulich@suse.com> wrote:
>
> There don't really need to be separate Cpu64 and CpuNo64 templates for
> these. One small issue with this is that slightly strange code
>
>         .intel_syntax noprefix
>         .code16
>         .arch i286
>         .arch .avx
>         vcvtsi2sd xmm0, xmm0, dword ptr [bx]
>         vcvtsi2sd xmm0, xmm0, qword ptr [bx]
>
>         vcvtsi2sd xmm0, xmm0, ebx
>         vcvtsi2sd xmm0, xmm0, rbx
>
> now will match in behavior with the AVX512 counterparts in that not
> only the 2nd vcvtsi2sd won't assemble, but also the first. The last
> two, otoh, will continue to assemble fine (due to the lack of any
> memory operand size specifier). As a result, another way to make
> things behave more consistently would be to avoid the folding and
> add IgnoreSize to the CpuNo64 AVX512 variants. A 3rd way to do so
> would be to add Cpu386 to any such insn template.
>
> While doing this also make the usual cosmetic adjustments for the
> insns touched anyway. Additionally drop the redundant Cpu64 from
> the SAE forms of VCVT{,U}SI2SD - they won't assemble outside of
> 64-bit mode due to there not being anything to match the Reg64
> operand.
>
> opcodes/
> 2020-02-XX  Jan Beulich <jbeulich@suse.com>
>
>         * i386-opc.tbl (vcvtsi2sd, vcvtsi2ss, vcvtusi2sd, vcvtusi2ss):
>         Fold CpuNo64 and Cpu64 templates. Use VexLIG/EVexLIG and VexW0/
>         VexW1 instead of open-coding them.
>         * i386-tbl.h: Re-generate.
>

OK.

Thanks.


-- 
H.J.

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2020-02-14 13:50 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-02-14 13:41 [PATCH] x86: fold certain VCVT{,U}SI2S{S,D} templates Jan Beulich
2020-02-14 13:50 ` H.J. Lu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).