* [binutils-gdb] x86: optimize 128-bit VPBROADCASTQ to VPUNPCKLQDQ
@ 2023-07-04 15:07 Jan Beulich
0 siblings, 0 replies; only message in thread
From: Jan Beulich @ 2023-07-04 15:07 UTC (permalink / raw)
To: bfd-cvs
https://sourceware.org/git/gitweb.cgi?p=binutils-gdb.git;h=eb4031cb20aa710834be891f8638e04dbba81edc
commit eb4031cb20aa710834be891f8638e04dbba81edc
Author: Jan Beulich <jbeulich@suse.com>
Date: Tue Jul 4 17:07:26 2023 +0200
x86: optimize 128-bit VPBROADCASTQ to VPUNPCKLQDQ
The alternative is 1 byte shorter when the source is %xmm0-7, as a
2-byte VEX prefix can then be used.
Diff:
---
gas/config/tc-i386.c | 27 +++++++++++++++++++++++++++
gas/testsuite/gas/i386/optimize-2.d | 1 +
gas/testsuite/gas/i386/optimize-2.s | 2 ++
gas/testsuite/gas/i386/optimize-2b.d | 1 +
gas/testsuite/gas/i386/x86-64-optimize-3.d | 2 ++
gas/testsuite/gas/i386/x86-64-optimize-3.s | 3 +++
gas/testsuite/gas/i386/x86-64-optimize-3b.d | 2 ++
opcodes/i386-opc.tbl | 2 +-
opcodes/i386-tbl.h | 4 ++--
9 files changed, 41 insertions(+), 3 deletions(-)
diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c
index 8ebeaf1ab84..bc02f8e0abf 100644
--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -4620,6 +4620,33 @@ optimize_encoding (void)
i.op[1].regs = i.op[0].regs;
}
}
+ else if (optimize_for_space
+ && i.tm.base_opcode == 0x59
+ && i.tm.opcode_space == SPACE_0F38
+ && i.operands == i.reg_operands
+ && i.tm.opcode_modifier.vex
+ && !(i.op[0].regs->reg_flags & RegRex)
+ && i.op[0].regs->reg_type.bitfield.xmmword
+ && i.vec_encoding != vex_encoding_vex3)
+ {
+ /* Optimize: -Os:
+ vpbroadcastq %xmmN, %xmmM -> vpunpcklqdq %xmmN, %xmmN, %xmmM (N < 8)
+ */
+ i.tm.opcode_space = SPACE_0F;
+ i.tm.base_opcode = 0x6c;
+ i.tm.opcode_modifier.vexvvvv = 1;
+
+ ++i.operands;
+ ++i.reg_operands;
+ ++i.tm.operands;
+
+ i.op[2].regs = i.op[0].regs;
+ i.types[2] = i.types[0];
+ i.flags[2] = i.flags[0];
+ i.tm.operand_types[2] = i.tm.operand_types[0];
+
+ swap_2_operands (1, 2);
+ }
}
/* Return non-zero for load instruction. */
diff --git a/gas/testsuite/gas/i386/optimize-2.d b/gas/testsuite/gas/i386/optimize-2.d
index 60a9069c6a7..41056fb6ef1 100644
--- a/gas/testsuite/gas/i386/optimize-2.d
+++ b/gas/testsuite/gas/i386/optimize-2.d
@@ -164,4 +164,5 @@ Disassembly of section .text:
+[a-f0-9]+: 66 .* pcmpeqd %xmm2,%xmm2
+[a-f0-9]+: c5 .* vpcmpeqd %xmm2,%xmm2,%xmm0
+[a-f0-9]+: c5 .* vpcmpeqd %ymm2,%ymm2,%ymm0
+ +[a-f0-9]+: c5 .* vpunpcklqdq %xmm2,%xmm2,%xmm0
#pass
diff --git a/gas/testsuite/gas/i386/optimize-2.s b/gas/testsuite/gas/i386/optimize-2.s
index 3cf41b1f59c..29399ae8473 100644
--- a/gas/testsuite/gas/i386/optimize-2.s
+++ b/gas/testsuite/gas/i386/optimize-2.s
@@ -184,3 +184,5 @@ _start:
pcmpeqq %xmm2, %xmm2
vpcmpeqq %xmm2, %xmm2, %xmm0
vpcmpeqq %ymm2, %ymm2, %ymm0
+
+ vpbroadcastq %xmm2, %xmm0
diff --git a/gas/testsuite/gas/i386/optimize-2b.d b/gas/testsuite/gas/i386/optimize-2b.d
index 0624cb482d5..d9e83198fb6 100644
--- a/gas/testsuite/gas/i386/optimize-2b.d
+++ b/gas/testsuite/gas/i386/optimize-2b.d
@@ -165,4 +165,5 @@ Disassembly of section .text:
+[a-f0-9]+: 66 .* pcmpeqq %xmm2,%xmm2
+[a-f0-9]+: c4 .* vpcmpeqq %xmm2,%xmm2,%xmm0
+[a-f0-9]+: c4 .* vpcmpeqq %ymm2,%ymm2,%ymm0
+ +[a-f0-9]+: c4 .* vpbroadcastq %xmm2,%xmm0
#pass
diff --git a/gas/testsuite/gas/i386/x86-64-optimize-3.d b/gas/testsuite/gas/i386/x86-64-optimize-3.d
index fc5ec5e43d0..23b9305d25c 100644
--- a/gas/testsuite/gas/i386/x86-64-optimize-3.d
+++ b/gas/testsuite/gas/i386/x86-64-optimize-3.d
@@ -205,4 +205,6 @@ Disassembly of section .text:
+[a-f0-9]+: 66 .* pcmpeqd %xmm12,%xmm12
+[a-f0-9]+: c4 .* vpcmpeqq %xmm12,%xmm12,%xmm0
+[a-f0-9]+: c4 .* vpcmpeqq %ymm12,%ymm12,%ymm0
+ +[a-f0-9]+: c5 .* vpunpcklqdq %xmm2,%xmm2,%xmm0
+ +[a-f0-9]+: c4 .* vpbroadcastq %xmm12,%xmm0
#pass
diff --git a/gas/testsuite/gas/i386/x86-64-optimize-3.s b/gas/testsuite/gas/i386/x86-64-optimize-3.s
index d2bcbce3bf4..bab99cf1eae 100644
--- a/gas/testsuite/gas/i386/x86-64-optimize-3.s
+++ b/gas/testsuite/gas/i386/x86-64-optimize-3.s
@@ -229,3 +229,6 @@ _start:
pcmpeqq %xmm12, %xmm12
vpcmpeqq %xmm12, %xmm12, %xmm0
vpcmpeqq %ymm12, %ymm12, %ymm0
+
+ vpbroadcastq %xmm2, %xmm0
+ vpbroadcastq %xmm12, %xmm0
diff --git a/gas/testsuite/gas/i386/x86-64-optimize-3b.d b/gas/testsuite/gas/i386/x86-64-optimize-3b.d
index abd48dce3a8..929c6f70e8b 100644
--- a/gas/testsuite/gas/i386/x86-64-optimize-3b.d
+++ b/gas/testsuite/gas/i386/x86-64-optimize-3b.d
@@ -206,4 +206,6 @@ Disassembly of section .text:
+[a-f0-9]+: 66 .* pcmpeqq %xmm12,%xmm12
+[a-f0-9]+: c4 .* vpcmpeqq %xmm12,%xmm12,%xmm0
+[a-f0-9]+: c4 .* vpcmpeqq %ymm12,%ymm12,%ymm0
+ +[a-f0-9]+: c4 .* vpbroadcastq %xmm2,%xmm0
+ +[a-f0-9]+: c4 .* vpbroadcastq %xmm12,%xmm0
#pass
diff --git a/opcodes/i386-opc.tbl b/opcodes/i386-opc.tbl
index f9abffb4b9f..b6263f88605 100644
--- a/opcodes/i386-opc.tbl
+++ b/opcodes/i386-opc.tbl
@@ -1734,7 +1734,7 @@ vbroadcastsd, 0x6619, AVX2, Modrm|Vex=2|Space0F38|VexW=1|NoSuf, { RegXMM, RegYMM
vbroadcastss, 0x6618, AVX2, Modrm|Vex|Space0F38|VexW=1|NoSuf, { RegXMM, RegXMM|RegYMM }
vpblendd, 0x6602, AVX2, Modrm|Vex|Space0F3A|VexVVVV|VexW0|CheckOperandSize|NoSuf, { Imm8|Imm8S, Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
vpbroadcast<bw>, 0x6678 | <bw:opc>, AVX2, Modrm|Vex|Space0F38|VexW0|NoSuf, { <bw:elem>|Unspecified|BaseIndex|RegXMM, RegXMM|RegYMM }
-vpbroadcast<dq>, 0x6658 | <dq:opc>, AVX2, Modrm|Vex|Space0F38|VexW0|NoSuf, { <dq:elem>|Unspecified|BaseIndex|RegXMM, RegXMM|RegYMM }
+vpbroadcast<dq>, 0x6658 | <dq:opc>, AVX2, Modrm|Vex|Space0F38|VexW0|NoSuf|Optimize, { <dq:elem>|Unspecified|BaseIndex|RegXMM, RegXMM|RegYMM }
vperm2i128, 0x6646, AVX2, Modrm|Vex=2|Space0F3A|VexVVVV|VexW0|NoSuf, { Imm8|Imm8S, Unspecified|BaseIndex|RegYMM, RegYMM, RegYMM }
vpermd, 0x6636, AVX2, Modrm|Vex256|Space0F38|VexVVVV|VexW0|NoSuf, { Unspecified|BaseIndex|RegYMM, RegYMM, RegYMM }
vpermpd, 0x6601, AVX2, Modrm|Vex=2|Space0F3A|VexW1|NoSuf, { Imm8|Imm8S, Unspecified|BaseIndex|RegYMM, RegYMM }
diff --git a/opcodes/i386-tbl.h b/opcodes/i386-tbl.h
index 4779cc958db..43bcd688852 100644
--- a/opcodes/i386-tbl.h
+++ b/opcodes/i386-tbl.h
@@ -34844,7 +34844,7 @@ static const insn_template i386_optab[] =
1, 1, 1, 0, 0, 0 } } } },
{ MN_vpbroadcastd, 0x58 | 0, 2, SPACE_0F38, None,
{ 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
- 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+ 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0 },
{ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -34886,7 +34886,7 @@ static const insn_template i386_optab[] =
1, 1, 1, 0, 0, 0 } } } },
{ MN_vpbroadcastq, 0x58 | 1, 2, SPACE_0F38, None,
{ 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
- 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+ 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0 },
{ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2023-07-04 15:07 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-07-04 15:07 [binutils-gdb] x86: optimize 128-bit VPBROADCASTQ to VPUNPCKLQDQ Jan Beulich
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).