public inbox for binutils@sourceware.org
 help / color / mirror / Atom feed
From: Jan Beulich <jbeulich@suse.com>
To: Binutils <binutils@sourceware.org>
Cc: "H.J. Lu" <hjl.tools@gmail.com>, Lili Cui <lili.cui@intel.com>
Subject: [PATCH 4/4] x86/APX: optimize certain XOR and SUB forms
Date: Fri, 16 Feb 2024 10:59:25 +0100	[thread overview]
Message-ID: <263f41dd-b7bf-42a5-92a4-3732c53e276e@suse.com> (raw)
In-Reply-To: <3098e797-3749-40ee-802c-ea8a6f63914c@suse.com>

While most logic in optimize_encoding() is already covering APX by way
of the earlier NDD->REX2 conversion, there's a remaining set of cases
which wants handling separately.

--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -4693,6 +4693,34 @@ optimize_encoding (void)
 	    }
 	}
     }
+  else if (i.reg_operands == 3
+	   && i.op[0].regs == i.op[1].regs
+	   && i.encoding != encoding_evex
+	   && (i.tm.mnem_off == MN_xor
+	       || i.tm.mnem_off == MN_sub))
+    {
+      /* Optimize: -O:
+	   xorb %rNb, %rNb, %rMb  -> xorl %rMd, %rMd
+	   xorw %rNw, %rNw, %rMw  -> xorl %rMd, %rMd
+	   xorl %rNd, %rNd, %rMd  -> xorl %rMd, %rMd
+	   xorq %rN,  %rN,  %rM   -> xorl %rMd, %rMd
+	   subb %rNb, %rNb, %rMb  -> subl %rMd, %rMd
+	   subw %rNw, %rNw, %rMw  -> subl %rMd, %rMd
+	   subl %rNd, %rNd, %rMd  -> subl %rMd, %rMd
+	   subq %rN,  %rN,  %rM   -> subl %rMd, %rMd
+        */
+      i.tm.opcode_space = SPACE_BASE;
+      i.tm.opcode_modifier.evex = 0;
+      i.tm.opcode_modifier.size = SIZE32;
+      i.types[0].bitfield.byte = 0;
+      i.types[0].bitfield.word = 0;
+      i.types[0].bitfield.dword = 1;
+      i.types[0].bitfield.qword = 0;
+      i.op[0].regs = i.op[2].regs;
+      i.types[1] = i.types[0];
+      i.op[1].regs = i.op[2].regs;
+      i.reg_operands = 2;
+    }
   else if (optimize > 1
 	   && !optimize_for_space
 	   && i.reg_operands == 2
--- a/gas/testsuite/gas/i386/x86-64-optimize-1.d
+++ b/gas/testsuite/gas/i386/x86-64-optimize-1.d
@@ -71,4 +71,28 @@ Disassembly of section .text:
  +[a-f0-9]+:	48 0f ba f0 1f       	btr    \$0x1f,%rax
  +[a-f0-9]+:	66 0f ba e8 0f       	bts    \$0xf,%ax
  +[a-f0-9]+:	48 0f ba e8 1f       	bts    \$0x1f,%rax
+ +[a-f0-9]+:	31 c9                	xor    %ecx,%ecx
+ +[a-f0-9]+:	48 31 d1             	xor    %rdx,%rcx
+ +[a-f0-9]+:	31 c9                	xor    %ecx,%ecx
+ +[a-f0-9]+:	29 c9                	sub    %ecx,%ecx
+ +[a-f0-9]+:	48 29 d1             	sub    %rdx,%rcx
+ +[a-f0-9]+:	29 c9                	sub    %ecx,%ecx
+ +[a-f0-9]+:	d5 50 31 c9          	xor    %r17d,%r17d
+ +[a-f0-9]+:	d5 58 31 d1          	xor    %r18,%r17
+ +[a-f0-9]+:	d5 50 31 c9          	xor    %r17d,%r17d
+ +[a-f0-9]+:	d5 50 29 c9          	sub    %r17d,%r17d
+ +[a-f0-9]+:	d5 58 29 d1          	sub    %r18,%r17
+ +[a-f0-9]+:	d5 50 29 c9          	sub    %r17d,%r17d
+ +[a-f0-9]+:	31 c9                	xor    %ecx,%ecx
+ +[a-f0-9]+:	62 f4 75 18 31 d1    	xor    %dx,%cx,%cx
+ +[a-f0-9]+:	31 c9                	xor    %ecx,%ecx
+ +[a-f0-9]+:	29 c9                	sub    %ecx,%ecx
+ +[a-f0-9]+:	62 f4 75 18 29 d1    	sub    %dx,%cx,%cx
+ +[a-f0-9]+:	29 c9                	sub    %ecx,%ecx
+ +[a-f0-9]+:	d5 50 31 c9          	xor    %r17d,%r17d
+ +[a-f0-9]+:	62 ec 74 10 30 d1    	xor    %r18b,%r17b,%r17b
+ +[a-f0-9]+:	d5 50 31 c9          	xor    %r17d,%r17d
+ +[a-f0-9]+:	d5 50 29 c9          	sub    %r17d,%r17d
+ +[a-f0-9]+:	62 ec 74 10 28 d1    	sub    %r18b,%r17b,%r17b
+ +[a-f0-9]+:	d5 50 29 c9          	sub    %r17d,%r17d
 #pass
--- a/gas/testsuite/gas/i386/x86-64-optimize-1.s
+++ b/gas/testsuite/gas/i386/x86-64-optimize-1.s
@@ -65,3 +65,27 @@ _start:
 	btr	$31, %rax
 	bts	$15, %ax
 	bts	$31, %rax
+	xor	%rcx, %rcx, %rcx
+	xor	%rdx, %rcx, %rcx
+	xor	%rdx, %rdx, %rcx
+	sub	%rcx, %rcx, %rcx
+	sub	%rdx, %rcx, %rcx
+	sub	%rdx, %rdx, %rcx
+	xor	%r17, %r17, %r17
+	xor	%r18, %r17, %r17
+	xor	%r18, %r18, %r17
+	sub	%r17, %r17, %r17
+	sub	%r18, %r17, %r17
+	sub	%r18, %r18, %r17
+	xor	%cx, %cx, %cx
+	xor	%dx, %cx, %cx
+	xor	%dx, %dx, %cx
+	sub	%cx, %cx, %cx
+	sub	%dx, %cx, %cx
+	sub	%dx, %dx, %cx
+	xor	%r17b, %r17b, %r17b
+	xor	%r18b, %r17b, %r17b
+	xor	%r18b, %r18b, %r17b
+	sub	%r17b, %r17b, %r17b
+	sub	%r18b, %r17b, %r17b
+	sub	%r18b, %r18b, %r17b
--- a/opcodes/i386-opc.tbl
+++ b/opcodes/i386-opc.tbl
@@ -320,7 +320,7 @@ inc, 0x40, No64, No_bSuf|No_sSuf|No_qSuf
 inc, 0xfe/0, APX_F, W|Modrm|No_sSuf|CheckOperandSize|DstVVVV|EVexMap4|NF, {Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64}
 inc, 0xfe/0, 0, W|Modrm|No_sSuf|HLEPrefixLock, { Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex }
 
-sub, 0x28, APX_F, D|W|CheckOperandSize|Modrm|No_sSuf|DstVVVV|EVexMap4|NF, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64, }
+sub, 0x28, APX_F, D|W|CheckOperandSize|Modrm|No_sSuf|DstVVVV|EVexMap4|NF|Optimize, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64, }
 sub, 0x28, 0, D|W|CheckOperandSize|Modrm|No_sSuf|HLEPrefixLock|Optimize, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex }
 sub, 0x83/5, APX_F, Modrm|No_bSuf|No_sSuf|DstVVVV|EVexMap4|NF, { Imm8S, Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
 sub, 0x83/5, 0, Modrm|No_bSuf|No_sSuf|HLEPrefixLock, { Imm8S, Reg16|Reg32|Reg64|Unspecified|BaseIndex }
@@ -366,7 +366,7 @@ or, 0xc, 0, W|No_sSuf, { Imm8|Imm16|Imm3
 or, 0x80/1, APX_F, W|Modrm|CheckOperandSize|No_sSuf|DstVVVV|EVexMap4|NF, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64 }
 or, 0x80/1, 0, W|Modrm|No_sSuf|HLEPrefixLock, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex }
 
-xor, 0x30, APX_F, D|C|W|CheckOperandSize|Modrm|No_sSuf|DstVVVV|EVexMap4|NF, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64 }
+xor, 0x30, APX_F, D|C|W|CheckOperandSize|Modrm|No_sSuf|DstVVVV|EVexMap4|NF|Optimize, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64 }
 xor, 0x30, 0, D|W|CheckOperandSize|Modrm|No_sSuf|HLEPrefixLock|Optimize, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex }
 xor, 0x83/6, APX_F, Modrm|CheckOperandSize|No_bSuf|No_sSuf|DstVVVV|EVexMap4|NF, { Imm8S, Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
 xor, 0x83/6, 0, Modrm|No_bSuf|No_sSuf|HLEPrefixLock, { Imm8S, Reg16|Reg32|Reg64|Unspecified|BaseIndex }


      parent reply	other threads:[~2024-02-16  9:59 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-02-16  9:56 [PATCH 0/4] x86/APX: misc adjustments Jan Beulich
2024-02-16  9:57 ` [PATCH 1/4] x86: rename vec_encoding and vex_encoding_* Jan Beulich
2024-02-18  5:59   ` Cui, Lili
2024-02-19  7:54     ` Jan Beulich
2024-02-20  9:19       ` Cui, Lili
2024-02-16  9:58 ` [PATCH 2/4] x86/APX: respect {vex}/{vex3} Jan Beulich
2024-02-18  7:55   ` Cui, Lili
2024-02-19  8:00     ` Jan Beulich
2024-02-20 10:12       ` Cui, Lili
2024-02-20 10:30         ` Jan Beulich
2024-02-20 15:59           ` Michael Matz
2024-02-20 16:52             ` H.J. Lu
2024-02-16  9:58 ` [PATCH 3/4] x86/APX: correct .insn opcode space determination when REX2 is needed Jan Beulich
2024-02-16  9:59 ` Jan Beulich [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=263f41dd-b7bf-42a5-92a4-3732c53e276e@suse.com \
    --to=jbeulich@suse.com \
    --cc=binutils@sourceware.org \
    --cc=hjl.tools@gmail.com \
    --cc=lili.cui@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).