From: Jan Beulich <jbeulich@suse.com>
To: Binutils <binutils@sourceware.org>
Cc: "H.J. Lu" <hjl.tools@gmail.com>, Lili Cui <lili.cui@intel.com>
Subject: [PATCH 4/4] x86/APX: optimize certain XOR and SUB forms
Date: Fri, 16 Feb 2024 10:59:25 +0100 [thread overview]
Message-ID: <263f41dd-b7bf-42a5-92a4-3732c53e276e@suse.com> (raw)
In-Reply-To: <3098e797-3749-40ee-802c-ea8a6f63914c@suse.com>
While most logic in optimize_encoding() is already covering APX by way
of the earlier NDD->REX2 conversion, there's a remaining set of cases
which wants handling separately.
--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -4693,6 +4693,34 @@ optimize_encoding (void)
}
}
}
+ else if (i.reg_operands == 3
+ && i.op[0].regs == i.op[1].regs
+ && i.encoding != encoding_evex
+ && (i.tm.mnem_off == MN_xor
+ || i.tm.mnem_off == MN_sub))
+ {
+ /* Optimize: -O:
+ xorb %rNb, %rNb, %rMb -> xorl %rMd, %rMd
+ xorw %rNw, %rNw, %rMw -> xorl %rMd, %rMd
+ xorl %rNd, %rNd, %rMd -> xorl %rMd, %rMd
+ xorq %rN, %rN, %rM -> xorl %rMd, %rMd
+ subb %rNb, %rNb, %rMb -> subl %rMd, %rMd
+ subw %rNw, %rNw, %rMw -> subl %rMd, %rMd
+ subl %rNd, %rNd, %rMd -> subl %rMd, %rMd
+ subq %rN, %rN, %rM -> subl %rMd, %rMd
+ */
+ i.tm.opcode_space = SPACE_BASE;
+ i.tm.opcode_modifier.evex = 0;
+ i.tm.opcode_modifier.size = SIZE32;
+ i.types[0].bitfield.byte = 0;
+ i.types[0].bitfield.word = 0;
+ i.types[0].bitfield.dword = 1;
+ i.types[0].bitfield.qword = 0;
+ i.op[0].regs = i.op[2].regs;
+ i.types[1] = i.types[0];
+ i.op[1].regs = i.op[2].regs;
+ i.reg_operands = 2;
+ }
else if (optimize > 1
&& !optimize_for_space
&& i.reg_operands == 2
--- a/gas/testsuite/gas/i386/x86-64-optimize-1.d
+++ b/gas/testsuite/gas/i386/x86-64-optimize-1.d
@@ -71,4 +71,28 @@ Disassembly of section .text:
+[a-f0-9]+: 48 0f ba f0 1f btr \$0x1f,%rax
+[a-f0-9]+: 66 0f ba e8 0f bts \$0xf,%ax
+[a-f0-9]+: 48 0f ba e8 1f bts \$0x1f,%rax
+ +[a-f0-9]+: 31 c9 xor %ecx,%ecx
+ +[a-f0-9]+: 48 31 d1 xor %rdx,%rcx
+ +[a-f0-9]+: 31 c9 xor %ecx,%ecx
+ +[a-f0-9]+: 29 c9 sub %ecx,%ecx
+ +[a-f0-9]+: 48 29 d1 sub %rdx,%rcx
+ +[a-f0-9]+: 29 c9 sub %ecx,%ecx
+ +[a-f0-9]+: d5 50 31 c9 xor %r17d,%r17d
+ +[a-f0-9]+: d5 58 31 d1 xor %r18,%r17
+ +[a-f0-9]+: d5 50 31 c9 xor %r17d,%r17d
+ +[a-f0-9]+: d5 50 29 c9 sub %r17d,%r17d
+ +[a-f0-9]+: d5 58 29 d1 sub %r18,%r17
+ +[a-f0-9]+: d5 50 29 c9 sub %r17d,%r17d
+ +[a-f0-9]+: 31 c9 xor %ecx,%ecx
+ +[a-f0-9]+: 62 f4 75 18 31 d1 xor %dx,%cx,%cx
+ +[a-f0-9]+: 31 c9 xor %ecx,%ecx
+ +[a-f0-9]+: 29 c9 sub %ecx,%ecx
+ +[a-f0-9]+: 62 f4 75 18 29 d1 sub %dx,%cx,%cx
+ +[a-f0-9]+: 29 c9 sub %ecx,%ecx
+ +[a-f0-9]+: d5 50 31 c9 xor %r17d,%r17d
+ +[a-f0-9]+: 62 ec 74 10 30 d1 xor %r18b,%r17b,%r17b
+ +[a-f0-9]+: d5 50 31 c9 xor %r17d,%r17d
+ +[a-f0-9]+: d5 50 29 c9 sub %r17d,%r17d
+ +[a-f0-9]+: 62 ec 74 10 28 d1 sub %r18b,%r17b,%r17b
+ +[a-f0-9]+: d5 50 29 c9 sub %r17d,%r17d
#pass
--- a/gas/testsuite/gas/i386/x86-64-optimize-1.s
+++ b/gas/testsuite/gas/i386/x86-64-optimize-1.s
@@ -65,3 +65,27 @@ _start:
btr $31, %rax
bts $15, %ax
bts $31, %rax
+ xor %rcx, %rcx, %rcx
+ xor %rdx, %rcx, %rcx
+ xor %rdx, %rdx, %rcx
+ sub %rcx, %rcx, %rcx
+ sub %rdx, %rcx, %rcx
+ sub %rdx, %rdx, %rcx
+ xor %r17, %r17, %r17
+ xor %r18, %r17, %r17
+ xor %r18, %r18, %r17
+ sub %r17, %r17, %r17
+ sub %r18, %r17, %r17
+ sub %r18, %r18, %r17
+ xor %cx, %cx, %cx
+ xor %dx, %cx, %cx
+ xor %dx, %dx, %cx
+ sub %cx, %cx, %cx
+ sub %dx, %cx, %cx
+ sub %dx, %dx, %cx
+ xor %r17b, %r17b, %r17b
+ xor %r18b, %r17b, %r17b
+ xor %r18b, %r18b, %r17b
+ sub %r17b, %r17b, %r17b
+ sub %r18b, %r17b, %r17b
+ sub %r18b, %r18b, %r17b
--- a/opcodes/i386-opc.tbl
+++ b/opcodes/i386-opc.tbl
@@ -320,7 +320,7 @@ inc, 0x40, No64, No_bSuf|No_sSuf|No_qSuf
inc, 0xfe/0, APX_F, W|Modrm|No_sSuf|CheckOperandSize|DstVVVV|EVexMap4|NF, {Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64}
inc, 0xfe/0, 0, W|Modrm|No_sSuf|HLEPrefixLock, { Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex }
-sub, 0x28, APX_F, D|W|CheckOperandSize|Modrm|No_sSuf|DstVVVV|EVexMap4|NF, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64, }
+sub, 0x28, APX_F, D|W|CheckOperandSize|Modrm|No_sSuf|DstVVVV|EVexMap4|NF|Optimize, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64, }
sub, 0x28, 0, D|W|CheckOperandSize|Modrm|No_sSuf|HLEPrefixLock|Optimize, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex }
sub, 0x83/5, APX_F, Modrm|No_bSuf|No_sSuf|DstVVVV|EVexMap4|NF, { Imm8S, Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
sub, 0x83/5, 0, Modrm|No_bSuf|No_sSuf|HLEPrefixLock, { Imm8S, Reg16|Reg32|Reg64|Unspecified|BaseIndex }
@@ -366,7 +366,7 @@ or, 0xc, 0, W|No_sSuf, { Imm8|Imm16|Imm3
or, 0x80/1, APX_F, W|Modrm|CheckOperandSize|No_sSuf|DstVVVV|EVexMap4|NF, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64 }
or, 0x80/1, 0, W|Modrm|No_sSuf|HLEPrefixLock, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex }
-xor, 0x30, APX_F, D|C|W|CheckOperandSize|Modrm|No_sSuf|DstVVVV|EVexMap4|NF, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64 }
+xor, 0x30, APX_F, D|C|W|CheckOperandSize|Modrm|No_sSuf|DstVVVV|EVexMap4|NF|Optimize, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64 }
xor, 0x30, 0, D|W|CheckOperandSize|Modrm|No_sSuf|HLEPrefixLock|Optimize, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex }
xor, 0x83/6, APX_F, Modrm|CheckOperandSize|No_bSuf|No_sSuf|DstVVVV|EVexMap4|NF, { Imm8S, Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
xor, 0x83/6, 0, Modrm|No_bSuf|No_sSuf|HLEPrefixLock, { Imm8S, Reg16|Reg32|Reg64|Unspecified|BaseIndex }
prev parent reply other threads:[~2024-02-16 9:59 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-02-16 9:56 [PATCH 0/4] x86/APX: misc adjustments Jan Beulich
2024-02-16 9:57 ` [PATCH 1/4] x86: rename vec_encoding and vex_encoding_* Jan Beulich
2024-02-18 5:59 ` Cui, Lili
2024-02-19 7:54 ` Jan Beulich
2024-02-20 9:19 ` Cui, Lili
2024-02-16 9:58 ` [PATCH 2/4] x86/APX: respect {vex}/{vex3} Jan Beulich
2024-02-18 7:55 ` Cui, Lili
2024-02-19 8:00 ` Jan Beulich
2024-02-20 10:12 ` Cui, Lili
2024-02-20 10:30 ` Jan Beulich
2024-02-20 15:59 ` Michael Matz
2024-02-20 16:52 ` H.J. Lu
2024-02-16 9:58 ` [PATCH 3/4] x86/APX: correct .insn opcode space determination when REX2 is needed Jan Beulich
2024-02-16 9:59 ` Jan Beulich [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=263f41dd-b7bf-42a5-92a4-3732c53e276e@suse.com \
--to=jbeulich@suse.com \
--cc=binutils@sourceware.org \
--cc=hjl.tools@gmail.com \
--cc=lili.cui@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).