public inbox for binutils@sourceware.org
 help / color / mirror / Atom feed
* [PATCH 0/3] x86: small adjustments to optimization logic
@ 2023-01-27 11:34 Jan Beulich
  2023-01-27 11:35 ` [PATCH 1/3] x86: respect {nooptimize} for LEA Jan Beulich
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Jan Beulich @ 2023-01-27 11:34 UTC (permalink / raw)
  To: Binutils; +Cc: H.J. Lu

1: respect {nooptimize} for LEA
2: respect {nooptimize} when building VEX prefix
3: drop LOCK from XCHG when optimizing

Jan

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH 1/3] x86: respect {nooptimize} for LEA
  2023-01-27 11:34 [PATCH 0/3] x86: small adjustments to optimization logic Jan Beulich
@ 2023-01-27 11:35 ` Jan Beulich
  2023-01-27 11:35 ` [PATCH 2/3] x86-64: respect {nooptimize} when building VEX prefix Jan Beulich
  2023-01-27 11:36 ` [PATCH 3/3] x86: drop LOCK from XCHG when optimizing Jan Beulich
  2 siblings, 0 replies; 4+ messages in thread
From: Jan Beulich @ 2023-01-27 11:35 UTC (permalink / raw)
  To: Binutils; +Cc: H.J. Lu

Dropping a meaningless segment prefix occurs outside of
optimize_encoding() and hence needs explicit checking for a request to
avoid any optimizations.

--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -8159,7 +8159,7 @@ process_operands (void)
     {
       if (!quiet_warnings)
 	as_warn (_("segment override on `%s' is ineffectual"), insn_name (&i.tm));
-      if (optimize)
+      if (optimize && !i.no_optimize)
 	{
 	  i.seg[0] = NULL;
 	  i.prefix[SEG_PREFIX] = 0;
--- a/gas/testsuite/gas/i386/lea-optimize.d
+++ b/gas/testsuite/gas/i386/lea-optimize.d
@@ -10,6 +10,7 @@ Disassembly of section .text:
 0+ <start>:
 [ 	]*[0-9a-f]+:[ 	]+8d 04 08[ 	]+lea[ 	]+\(%eax,%ecx(,1)?\),%eax
 [ 	]*[0-9a-f]+:[ 	]+8d 04 08[ 	]+lea[ 	]+\(%eax,%ecx(,1)?\),%eax
+[ 	]*[0-9a-f]+:[ 	]+26 8d 04 01[ 	]+lea[ 	]+%es:\(%ecx,%eax(,1)?\),%eax
 [ 	]*[0-9a-f]+:[ 	]+8d 48 01[ 	]+lea[ 	]+0x1\(%eax\),%ecx
 [ 	]*[0-9a-f]+:[ 	]+8d 88 00 00 00 00[ 	]+lea[ 	]+0x0\(%eax\),%ecx
 [ 	]*[0-9a-f]+:[ 	]+8d 0c 25 00 00 00 00[ 	]+lea[ 	]+0x0\(,(%eiz)?(,1)?\),%ecx
@@ -21,6 +22,7 @@ Disassembly of section .text:
 [ 	]*[0-9a-f]+:[ 	]+8b c8[ 	]+mov[ 	]+%eax,%ecx
 [ 	]*[0-9a-f]+:[ 	]+8b c8[ 	]+mov[ 	]+%eax,%ecx
 [ 	]*[0-9a-f]+:[ 	]+8b c8[ 	]+mov[ 	]+%eax,%ecx
+[ 	]*[0-9a-f]+:[ 	]+64 8d 08[ 	]+lea[ 	]+%fs:\(%eax\),%ecx
 [ 	]*[0-9a-f]+:[ 	]+0f b7 c6[ 	]+movzwl[ 	]+%si,%eax
 [ 	]*[0-9a-f]+:[ 	]+0f b7 f6[ 	]+movzwl[ 	]+%si,%esi
 [ 	]*[0-9a-f]+:[ 	]+0f b7 c6[ 	]+movzwl[ 	]+%si,%eax
--- a/gas/testsuite/gas/i386/lea.d
+++ b/gas/testsuite/gas/i386/lea.d
@@ -9,6 +9,7 @@ Disassembly of section .text:
 0+ <start>:
 [ 	]*[0-9a-f]+:[ 	]+36 8d 04 08[ 	]+lea[ 	]+%ss:\(%eax,%ecx(,1)?\),%eax
 [ 	]*[0-9a-f]+:[ 	]+36 8d 04 08[ 	]+lea[ 	]+%ss:\(%eax,%ecx(,1)?\),%eax
+[ 	]*[0-9a-f]+:[ 	]+26 8d 04 01[ 	]+lea[ 	]+%es:\(%ecx,%eax(,1)?\),%eax
 [ 	]*[0-9a-f]+:[ 	]+8d 48 01[ 	]+lea[ 	]+0x1\(%eax\),%ecx
 [ 	]*[0-9a-f]+:[ 	]+8d 88 00 00 00 00[ 	]+lea[ 	]+0x0\(%eax\),%ecx
 [ 	]*[0-9a-f]+:[ 	]+8d 0c 25 00 00 00 00[ 	]+lea[ 	]+0x0\(,(%eiz)?(,1)?\),%ecx
@@ -20,6 +21,7 @@ Disassembly of section .text:
 [ 	]*[0-9a-f]+:[ 	]+8d 08[ 	]+lea[ 	]+\(%eax\),%ecx
 [ 	]*[0-9a-f]+:[ 	]+8d 08[ 	]+lea[ 	]+\(%eax\),%ecx
 [ 	]*[0-9a-f]+:[ 	]+65 8d 08[ 	]+lea[ 	]+%gs:\(%eax\),%ecx
+[ 	]*[0-9a-f]+:[ 	]+64 8d 08[ 	]+lea[ 	]+%fs:\(%eax\),%ecx
 [ 	]*[0-9a-f]+:[ 	]+67 8d 04[ 	]+lea[ 	]+\(%si\),%eax
 [ 	]*[0-9a-f]+:[ 	]+67 8d 34[ 	]+lea[ 	]+\(%si\),%esi
 [ 	]*[0-9a-f]+:[ 	]+67 8d 04[ 	]+lea[ 	]+\(%si\),%eax
--- a/gas/testsuite/gas/i386/lea.e
+++ b/gas/testsuite/gas/i386/lea.e
@@ -1,4 +1,6 @@
 .*: Assembler messages:
 .*:3: Warning: .* `lea' .*
 .*:4: Warning: .* `lea' .*
-.*:19: Warning: .* `lea' .*
+.*:5: Warning: .* `lea' .*
+.*:20: Warning: .* `lea' .*
+.*:21: Warning: .* `lea' .*
--- a/gas/testsuite/gas/i386/lea.s
+++ b/gas/testsuite/gas/i386/lea.s
@@ -2,6 +2,7 @@
 start:
 	lea	%ss:(%eax,%ecx), %eax
 	ss lea	(%eax,%ecx), %eax
+	{nooptimize} es lea (%ecx,%eax), %eax
 
 	.allow_index_reg
 	lea	1(%eax), %ecx
@@ -17,6 +18,7 @@ start:
 	lea	(%eax), %ecx
 	lea	1-1(%eax), %ecx
 	lea	%gs:(%eax), %ecx
+	{nooptimize} lea %fs:(%eax), %ecx
 
 	lea	(%si), %eax
 	lea	(%si), %esi
--- a/gas/testsuite/gas/i386/lea16-optimize.d
+++ b/gas/testsuite/gas/i386/lea16-optimize.d
@@ -10,6 +10,7 @@ Disassembly of section .text:
 0+ <start>:
 [ 	]*[0-9a-f]+:[ 	]+67 66 8d 04 08[ 	]+lea[ 	]+\(%eax,%ecx(,1)?\),%eax
 [ 	]*[0-9a-f]+:[ 	]+67 66 8d 04 08[ 	]+lea[ 	]+\(%eax,%ecx(,1)?\),%eax
+[ 	]*[0-9a-f]+:[ 	]+26 67 66 8d 04 01[ 	]+lea[ 	]+%es:\(%ecx,%eax(,1)?\),%eax
 [ 	]*[0-9a-f]+:[ 	]+67 66 8d 48 01[ 	]+lea[ 	]+0x1\(%eax\),%ecx
 [ 	]*[0-9a-f]+:[ 	]+67 66 8d 88 00 00 00 00[ 	]+lea[ 	]+0x0\(%eax\),%ecx
 [ 	]*[0-9a-f]+:[ 	]+67 66 8d 0c 25 00 00 00 00[ 	]+addr32 lea[ 	]+0x0,%ecx
@@ -21,6 +22,7 @@ Disassembly of section .text:
 [ 	]*[0-9a-f]+:[ 	]+66 8b c8[ 	]+mov[ 	]+%eax,%ecx
 [ 	]*[0-9a-f]+:[ 	]+66 8b c8[ 	]+mov[ 	]+%eax,%ecx
 [ 	]*[0-9a-f]+:[ 	]+66 8b c8[ 	]+mov[ 	]+%eax,%ecx
+[ 	]*[0-9a-f]+:[ 	]+64 67 66 8d 08[ 	]+lea[ 	]+%fs:\(%eax\),%ecx
 [ 	]*[0-9a-f]+:[ 	]+66 8d 04[ 	]+lea[ 	]+\(%si\),%eax
 [ 	]*[0-9a-f]+:[ 	]+66 8d 34[ 	]+lea[ 	]+\(%si\),%esi
 [ 	]*[0-9a-f]+:[ 	]+66 8d 04[ 	]+lea[ 	]+\(%si\),%eax
--- a/gas/testsuite/gas/i386/lea16-optimize2.d
+++ b/gas/testsuite/gas/i386/lea16-optimize2.d
@@ -10,6 +10,7 @@ Disassembly of section .text:
 0+ <start>:
 [ 	]*[0-9a-f]+:[ 	]+67 66 8d 04 08[ 	]+lea[ 	]+\(%eax,%ecx(,1)?\),%eax
 [ 	]*[0-9a-f]+:[ 	]+67 66 8d 04 08[ 	]+lea[ 	]+\(%eax,%ecx(,1)?\),%eax
+[ 	]*[0-9a-f]+:[ 	]+26 67 66 8d 04 01[ 	]+lea[ 	]+%es:\(%ecx,%eax(,1)?\),%eax
 [ 	]*[0-9a-f]+:[ 	]+67 66 8d 48 01[ 	]+lea[ 	]+0x1\(%eax\),%ecx
 [ 	]*[0-9a-f]+:[ 	]+67 66 8d 88 00 00 00 00[ 	]+lea[ 	]+0x0\(%eax\),%ecx
 [ 	]*[0-9a-f]+:[ 	]+67 66 8d 0c 25 00 00 00 00[ 	]+addr32 lea[ 	]+0x0,%ecx
@@ -21,6 +22,7 @@ Disassembly of section .text:
 [ 	]*[0-9a-f]+:[ 	]+66 8b c8[ 	]+mov[ 	]+%eax,%ecx
 [ 	]*[0-9a-f]+:[ 	]+66 8b c8[ 	]+mov[ 	]+%eax,%ecx
 [ 	]*[0-9a-f]+:[ 	]+66 8b c8[ 	]+mov[ 	]+%eax,%ecx
+[ 	]*[0-9a-f]+:[ 	]+64 67 66 8d 08[ 	]+lea[ 	]+%fs:\(%eax\),%ecx
 [ 	]*[0-9a-f]+:[ 	]+66 8d 04[ 	]+lea[ 	]+\(%si\),%eax
 [ 	]*[0-9a-f]+:[ 	]+66 8d 34[ 	]+lea[ 	]+\(%si\),%esi
 [ 	]*[0-9a-f]+:[ 	]+66 8d 04[ 	]+lea[ 	]+\(%si\),%eax
--- a/gas/testsuite/gas/i386/lea16.d
+++ b/gas/testsuite/gas/i386/lea16.d
@@ -10,6 +10,7 @@ Disassembly of section .text:
 0+ <start>:
 [ 	]*[0-9a-f]+:[ 	]+36 67 66 8d 04 08[ 	]+lea[ 	]+%ss:\(%eax,%ecx(,1)?\),%eax
 [ 	]*[0-9a-f]+:[ 	]+36 67 66 8d 04 08[ 	]+lea[ 	]+%ss:\(%eax,%ecx(,1)?\),%eax
+[ 	]*[0-9a-f]+:[ 	]+26 67 66 8d 04 01[ 	]+lea[ 	]+%es:\(%ecx,%eax(,1)?\),%eax
 [ 	]*[0-9a-f]+:[ 	]+67 66 8d 48 01[ 	]+lea[ 	]+0x1\(%eax\),%ecx
 [ 	]*[0-9a-f]+:[ 	]+67 66 8d 88 00 00 00 00[ 	]+lea[ 	]+0x0\(%eax\),%ecx
 [ 	]*[0-9a-f]+:[ 	]+67 66 8d 0c 25 00 00 00 00[ 	]+addr32 lea[ 	]+0x0,%ecx
@@ -21,6 +22,7 @@ Disassembly of section .text:
 [ 	]*[0-9a-f]+:[ 	]+67 66 8d 08[ 	]+lea[ 	]+\(%eax\),%ecx
 [ 	]*[0-9a-f]+:[ 	]+67 66 8d 08[ 	]+lea[ 	]+\(%eax\),%ecx
 [ 	]*[0-9a-f]+:[ 	]+65 67 66 8d 08[ 	]+lea[ 	]+%gs:\(%eax\),%ecx
+[ 	]*[0-9a-f]+:[ 	]+64 67 66 8d 08[ 	]+lea[ 	]+%fs:\(%eax\),%ecx
 [ 	]*[0-9a-f]+:[ 	]+66 8d 04[ 	]+lea[ 	]+\(%si\),%eax
 [ 	]*[0-9a-f]+:[ 	]+66 8d 34[ 	]+lea[ 	]+\(%si\),%esi
 [ 	]*[0-9a-f]+:[ 	]+66 8d 04[ 	]+lea[ 	]+\(%si\),%eax


^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH 2/3] x86-64: respect {nooptimize} when building VEX prefix
  2023-01-27 11:34 [PATCH 0/3] x86: small adjustments to optimization logic Jan Beulich
  2023-01-27 11:35 ` [PATCH 1/3] x86: respect {nooptimize} for LEA Jan Beulich
@ 2023-01-27 11:35 ` Jan Beulich
  2023-01-27 11:36 ` [PATCH 3/3] x86: drop LOCK from XCHG when optimizing Jan Beulich
  2 siblings, 0 replies; 4+ messages in thread
From: Jan Beulich @ 2023-01-27 11:35 UTC (permalink / raw)
  To: Binutils; +Cc: H.J. Lu

Swapping operands for commutative insns occurs outside of
optimize_encoding() and hence needs explicit checking for a request to
avoid any optimizations.

--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -3624,7 +3624,8 @@ build_vex_prefix (const insn_template *t
       && i.reg_operands == i.operands - i.imm_operands
       && i.tm.opcode_modifier.vex
       && i.tm.opcode_modifier.commutative
-      && (i.tm.opcode_modifier.sse2avx || optimize > 1)
+      && (i.tm.opcode_modifier.sse2avx
+	  || (optimize > 1 && !i.no_optimize))
       && i.rex == REX_B
       && i.vex.register_specifier
       && !(i.vex.register_specifier->reg_flags & RegRex))
--- a/gas/testsuite/gas/i386/x86-64-optimize-4.d
+++ b/gas/testsuite/gas/i386/x86-64-optimize-4.d
@@ -9,6 +9,8 @@ Disassembly of section .text:
 
 0+ <_start>:
  +[a-f0-9]+:	a9 7f 00 00 00       	test   \$0x7f,%eax
+ +[a-f0-9]+:	c4 c1 61 db e4       	vpand  %xmm12,%xmm3,%xmm4
+ +[a-f0-9]+:	c5 91 db e2          	vpand  %xmm2,%xmm13,%xmm4
  +[a-f0-9]+:	62 f1 7d 28 6f d1    	vmovdqa32 %ymm1,%ymm2
  +[a-f0-9]+:	62 f1 fd 28 6f d1    	vmovdqa64 %ymm1,%ymm2
  +[a-f0-9]+:	62 f1 7f 08 6f d1    	vmovdqu8 %xmm1,%xmm2
--- a/gas/testsuite/gas/i386/x86-64-optimize-4.s
+++ b/gas/testsuite/gas/i386/x86-64-optimize-4.s
@@ -5,6 +5,9 @@
 _start:
 	{nooptimize} testl $0x7f, %eax
 
+	{nooptimize} vpand	%xmm12, %xmm3, %xmm4
+	{nooptimize} vpand	%xmm2, %xmm13, %xmm4
+
 	{nooptimize} vmovdqa32	%ymm1, %ymm2
 	{nooptimize} vmovdqa64	%ymm1, %ymm2
 	{nooptimize} vmovdqu8	%xmm1, %xmm2


^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH 3/3] x86: drop LOCK from XCHG when optimizing
  2023-01-27 11:34 [PATCH 0/3] x86: small adjustments to optimization logic Jan Beulich
  2023-01-27 11:35 ` [PATCH 1/3] x86: respect {nooptimize} for LEA Jan Beulich
  2023-01-27 11:35 ` [PATCH 2/3] x86-64: respect {nooptimize} when building VEX prefix Jan Beulich
@ 2023-01-27 11:36 ` Jan Beulich
  2 siblings, 0 replies; 4+ messages in thread
From: Jan Beulich @ 2023-01-27 11:36 UTC (permalink / raw)
  To: Binutils; +Cc: H.J. Lu

Like with segment overrides on LEA, optimize away such a redundant
instruction prefix.

--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -5179,14 +5179,20 @@ md_assemble (char *line)
 
   /* Check for lock without a lockable instruction.  Destination operand
      must be memory unless it is xchg (0x86).  */
-  if (i.prefix[LOCK_PREFIX]
-      && (i.tm.opcode_modifier.prefixok < PrefixLock
+  if (i.prefix[LOCK_PREFIX])
+    {
+      if (i.tm.opcode_modifier.prefixok < PrefixLock
 	  || i.mem_operands == 0
 	  || (i.tm.base_opcode != 0x86
-	      && !(i.flags[i.operands - 1] & Operand_Mem))))
-    {
-      as_bad (_("expecting lockable instruction after `lock'"));
-      return;
+	      && !(i.flags[i.operands - 1] & Operand_Mem)))
+	{
+	  as_bad (_("expecting lockable instruction after `lock'"));
+	  return;
+	}
+
+      /* Zap the redundant prefix from XCHG when optimizing.  */
+      if (i.tm.base_opcode == 0x86 && optimize && !i.no_optimize)
+	i.prefix[LOCK_PREFIX] = 0;
     }
 
   if (is_any_vex_encoding (&i.tm)
--- a/gas/testsuite/gas/i386/optimize-2.d
+++ b/gas/testsuite/gas/i386/optimize-2.d
@@ -22,6 +22,8 @@ Disassembly of section .text:
  +[a-f0-9]+:	08 e4                	or     %ah,%ah
  +[a-f0-9]+:	66 09 ed             	or     %bp,%bp
  +[a-f0-9]+:	09 f6                	or     %esi,%esi
+ +[a-f0-9]+:	87 0a                	xchg   %ecx,\(%edx\)
+ +[a-f0-9]+:	87 11                	xchg   %edx,\(%ecx\)
  +[a-f0-9]+:	c5 f1 55 e9          	vandnpd %xmm1,%xmm1,%xmm5
  +[a-f0-9]+:	c5 f9 6f d1          	vmovdqa %xmm1,%xmm2
  +[a-f0-9]+:	c5 f9 6f d1          	vmovdqa %xmm1,%xmm2
--- a/gas/testsuite/gas/i386/optimize-2.s
+++ b/gas/testsuite/gas/i386/optimize-2.s
@@ -20,6 +20,9 @@ _start:
 	or	%bp, %bp
 	or	%esi, %esi
 
+	lock xchg %ecx, (%edx)
+	lock xchg (%ecx), %edx
+
 	vandnpd	%zmm1, %zmm1, %zmm5
 
 	vmovdqa32	%xmm1, %xmm2
--- a/gas/testsuite/gas/i386/optimize-2b.d
+++ b/gas/testsuite/gas/i386/optimize-2b.d
@@ -23,6 +23,8 @@ Disassembly of section .text:
  +[a-f0-9]+:	84 e4                	test   %ah,%ah
  +[a-f0-9]+:	66 85 ed             	test   %bp,%bp
  +[a-f0-9]+:	85 f6                	test   %esi,%esi
+ +[a-f0-9]+:	87 0a                	xchg   %ecx,\(%edx\)
+ +[a-f0-9]+:	87 11                	xchg   %edx,\(%ecx\)
  +[a-f0-9]+:	c5 f1 55 e9          	vandnpd %xmm1,%xmm1,%xmm5
  +[a-f0-9]+:	c5 f9 6f d1          	vmovdqa %xmm1,%xmm2
  +[a-f0-9]+:	c5 f9 6f d1          	vmovdqa %xmm1,%xmm2
--- a/gas/testsuite/gas/i386/optimize-3.d
+++ b/gas/testsuite/gas/i386/optimize-3.d
@@ -9,6 +9,8 @@ Disassembly of section .text:
 
 0+ <_start>:
  +[a-f0-9]+:	a9 7f 00 00 00       	test   \$0x7f,%eax
+ +[a-f0-9]+:	f0 87 0a             	lock xchg %ecx,\(%edx\)
+ +[a-f0-9]+:	f0 87 11             	lock xchg %edx,\(%ecx\)
  +[a-f0-9]+:	62 f1 7d 28 6f d1    	vmovdqa32 %ymm1,%ymm2
  +[a-f0-9]+:	62 f1 fd 28 6f d1    	vmovdqa64 %ymm1,%ymm2
  +[a-f0-9]+:	62 f1 7f 08 6f d1    	vmovdqu8 %xmm1,%xmm2
--- a/gas/testsuite/gas/i386/optimize-3.s
+++ b/gas/testsuite/gas/i386/optimize-3.s
@@ -5,6 +5,9 @@
 _start:
 	{nooptimize} testl $0x7f, %eax
 
+	{nooptimize} lock xchg	%ecx, (%edx)
+	{nooptimize} lock xchg	(%ecx), %edx
+
 	{nooptimize} vmovdqa32	%ymm1, %ymm2
 	{nooptimize} vmovdqa64	%ymm1, %ymm2
 	{nooptimize} vmovdqu8	%xmm1, %xmm2


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2023-01-27 11:36 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-01-27 11:34 [PATCH 0/3] x86: small adjustments to optimization logic Jan Beulich
2023-01-27 11:35 ` [PATCH 1/3] x86: respect {nooptimize} for LEA Jan Beulich
2023-01-27 11:35 ` [PATCH 2/3] x86-64: respect {nooptimize} when building VEX prefix Jan Beulich
2023-01-27 11:36 ` [PATCH 3/3] x86: drop LOCK from XCHG when optimizing Jan Beulich

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).