* [PATCH 0/3] x86: small adjustments to optimization logic
@ 2023-01-27 11:34 Jan Beulich
2023-01-27 11:35 ` [PATCH 1/3] x86: respect {nooptimize} for LEA Jan Beulich
` (2 more replies)
0 siblings, 3 replies; 4+ messages in thread
From: Jan Beulich @ 2023-01-27 11:34 UTC (permalink / raw)
To: Binutils; +Cc: H.J. Lu
1: respect {nooptimize} for LEA
2: respect {nooptimize} when building VEX prefix
3: drop LOCK from XCHG when optimizing
Jan
^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH 1/3] x86: respect {nooptimize} for LEA
2023-01-27 11:34 [PATCH 0/3] x86: small adjustments to optimization logic Jan Beulich
@ 2023-01-27 11:35 ` Jan Beulich
2023-01-27 11:35 ` [PATCH 2/3] x86-64: respect {nooptimize} when building VEX prefix Jan Beulich
2023-01-27 11:36 ` [PATCH 3/3] x86: drop LOCK from XCHG when optimizing Jan Beulich
2 siblings, 0 replies; 4+ messages in thread
From: Jan Beulich @ 2023-01-27 11:35 UTC (permalink / raw)
To: Binutils; +Cc: H.J. Lu
Dropping a meaningless segment prefix occurs outside of
optimize_encoding() and hence needs explicit checking for a request to
avoid any optimizations.
--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -8159,7 +8159,7 @@ process_operands (void)
{
if (!quiet_warnings)
as_warn (_("segment override on `%s' is ineffectual"), insn_name (&i.tm));
- if (optimize)
+ if (optimize && !i.no_optimize)
{
i.seg[0] = NULL;
i.prefix[SEG_PREFIX] = 0;
--- a/gas/testsuite/gas/i386/lea-optimize.d
+++ b/gas/testsuite/gas/i386/lea-optimize.d
@@ -10,6 +10,7 @@ Disassembly of section .text:
0+ <start>:
[ ]*[0-9a-f]+:[ ]+8d 04 08[ ]+lea[ ]+\(%eax,%ecx(,1)?\),%eax
[ ]*[0-9a-f]+:[ ]+8d 04 08[ ]+lea[ ]+\(%eax,%ecx(,1)?\),%eax
+[ ]*[0-9a-f]+:[ ]+26 8d 04 01[ ]+lea[ ]+%es:\(%ecx,%eax(,1)?\),%eax
[ ]*[0-9a-f]+:[ ]+8d 48 01[ ]+lea[ ]+0x1\(%eax\),%ecx
[ ]*[0-9a-f]+:[ ]+8d 88 00 00 00 00[ ]+lea[ ]+0x0\(%eax\),%ecx
[ ]*[0-9a-f]+:[ ]+8d 0c 25 00 00 00 00[ ]+lea[ ]+0x0\(,(%eiz)?(,1)?\),%ecx
@@ -21,6 +22,7 @@ Disassembly of section .text:
[ ]*[0-9a-f]+:[ ]+8b c8[ ]+mov[ ]+%eax,%ecx
[ ]*[0-9a-f]+:[ ]+8b c8[ ]+mov[ ]+%eax,%ecx
[ ]*[0-9a-f]+:[ ]+8b c8[ ]+mov[ ]+%eax,%ecx
+[ ]*[0-9a-f]+:[ ]+64 8d 08[ ]+lea[ ]+%fs:\(%eax\),%ecx
[ ]*[0-9a-f]+:[ ]+0f b7 c6[ ]+movzwl[ ]+%si,%eax
[ ]*[0-9a-f]+:[ ]+0f b7 f6[ ]+movzwl[ ]+%si,%esi
[ ]*[0-9a-f]+:[ ]+0f b7 c6[ ]+movzwl[ ]+%si,%eax
--- a/gas/testsuite/gas/i386/lea.d
+++ b/gas/testsuite/gas/i386/lea.d
@@ -9,6 +9,7 @@ Disassembly of section .text:
0+ <start>:
[ ]*[0-9a-f]+:[ ]+36 8d 04 08[ ]+lea[ ]+%ss:\(%eax,%ecx(,1)?\),%eax
[ ]*[0-9a-f]+:[ ]+36 8d 04 08[ ]+lea[ ]+%ss:\(%eax,%ecx(,1)?\),%eax
+[ ]*[0-9a-f]+:[ ]+26 8d 04 01[ ]+lea[ ]+%es:\(%ecx,%eax(,1)?\),%eax
[ ]*[0-9a-f]+:[ ]+8d 48 01[ ]+lea[ ]+0x1\(%eax\),%ecx
[ ]*[0-9a-f]+:[ ]+8d 88 00 00 00 00[ ]+lea[ ]+0x0\(%eax\),%ecx
[ ]*[0-9a-f]+:[ ]+8d 0c 25 00 00 00 00[ ]+lea[ ]+0x0\(,(%eiz)?(,1)?\),%ecx
@@ -20,6 +21,7 @@ Disassembly of section .text:
[ ]*[0-9a-f]+:[ ]+8d 08[ ]+lea[ ]+\(%eax\),%ecx
[ ]*[0-9a-f]+:[ ]+8d 08[ ]+lea[ ]+\(%eax\),%ecx
[ ]*[0-9a-f]+:[ ]+65 8d 08[ ]+lea[ ]+%gs:\(%eax\),%ecx
+[ ]*[0-9a-f]+:[ ]+64 8d 08[ ]+lea[ ]+%fs:\(%eax\),%ecx
[ ]*[0-9a-f]+:[ ]+67 8d 04[ ]+lea[ ]+\(%si\),%eax
[ ]*[0-9a-f]+:[ ]+67 8d 34[ ]+lea[ ]+\(%si\),%esi
[ ]*[0-9a-f]+:[ ]+67 8d 04[ ]+lea[ ]+\(%si\),%eax
--- a/gas/testsuite/gas/i386/lea.e
+++ b/gas/testsuite/gas/i386/lea.e
@@ -1,4 +1,6 @@
.*: Assembler messages:
.*:3: Warning: .* `lea' .*
.*:4: Warning: .* `lea' .*
-.*:19: Warning: .* `lea' .*
+.*:5: Warning: .* `lea' .*
+.*:20: Warning: .* `lea' .*
+.*:21: Warning: .* `lea' .*
--- a/gas/testsuite/gas/i386/lea.s
+++ b/gas/testsuite/gas/i386/lea.s
@@ -2,6 +2,7 @@
start:
lea %ss:(%eax,%ecx), %eax
ss lea (%eax,%ecx), %eax
+ {nooptimize} es lea (%ecx,%eax), %eax
.allow_index_reg
lea 1(%eax), %ecx
@@ -17,6 +18,7 @@ start:
lea (%eax), %ecx
lea 1-1(%eax), %ecx
lea %gs:(%eax), %ecx
+ {nooptimize} lea %fs:(%eax), %ecx
lea (%si), %eax
lea (%si), %esi
--- a/gas/testsuite/gas/i386/lea16-optimize.d
+++ b/gas/testsuite/gas/i386/lea16-optimize.d
@@ -10,6 +10,7 @@ Disassembly of section .text:
0+ <start>:
[ ]*[0-9a-f]+:[ ]+67 66 8d 04 08[ ]+lea[ ]+\(%eax,%ecx(,1)?\),%eax
[ ]*[0-9a-f]+:[ ]+67 66 8d 04 08[ ]+lea[ ]+\(%eax,%ecx(,1)?\),%eax
+[ ]*[0-9a-f]+:[ ]+26 67 66 8d 04 01[ ]+lea[ ]+%es:\(%ecx,%eax(,1)?\),%eax
[ ]*[0-9a-f]+:[ ]+67 66 8d 48 01[ ]+lea[ ]+0x1\(%eax\),%ecx
[ ]*[0-9a-f]+:[ ]+67 66 8d 88 00 00 00 00[ ]+lea[ ]+0x0\(%eax\),%ecx
[ ]*[0-9a-f]+:[ ]+67 66 8d 0c 25 00 00 00 00[ ]+addr32 lea[ ]+0x0,%ecx
@@ -21,6 +22,7 @@ Disassembly of section .text:
[ ]*[0-9a-f]+:[ ]+66 8b c8[ ]+mov[ ]+%eax,%ecx
[ ]*[0-9a-f]+:[ ]+66 8b c8[ ]+mov[ ]+%eax,%ecx
[ ]*[0-9a-f]+:[ ]+66 8b c8[ ]+mov[ ]+%eax,%ecx
+[ ]*[0-9a-f]+:[ ]+64 67 66 8d 08[ ]+lea[ ]+%fs:\(%eax\),%ecx
[ ]*[0-9a-f]+:[ ]+66 8d 04[ ]+lea[ ]+\(%si\),%eax
[ ]*[0-9a-f]+:[ ]+66 8d 34[ ]+lea[ ]+\(%si\),%esi
[ ]*[0-9a-f]+:[ ]+66 8d 04[ ]+lea[ ]+\(%si\),%eax
--- a/gas/testsuite/gas/i386/lea16-optimize2.d
+++ b/gas/testsuite/gas/i386/lea16-optimize2.d
@@ -10,6 +10,7 @@ Disassembly of section .text:
0+ <start>:
[ ]*[0-9a-f]+:[ ]+67 66 8d 04 08[ ]+lea[ ]+\(%eax,%ecx(,1)?\),%eax
[ ]*[0-9a-f]+:[ ]+67 66 8d 04 08[ ]+lea[ ]+\(%eax,%ecx(,1)?\),%eax
+[ ]*[0-9a-f]+:[ ]+26 67 66 8d 04 01[ ]+lea[ ]+%es:\(%ecx,%eax(,1)?\),%eax
[ ]*[0-9a-f]+:[ ]+67 66 8d 48 01[ ]+lea[ ]+0x1\(%eax\),%ecx
[ ]*[0-9a-f]+:[ ]+67 66 8d 88 00 00 00 00[ ]+lea[ ]+0x0\(%eax\),%ecx
[ ]*[0-9a-f]+:[ ]+67 66 8d 0c 25 00 00 00 00[ ]+addr32 lea[ ]+0x0,%ecx
@@ -21,6 +22,7 @@ Disassembly of section .text:
[ ]*[0-9a-f]+:[ ]+66 8b c8[ ]+mov[ ]+%eax,%ecx
[ ]*[0-9a-f]+:[ ]+66 8b c8[ ]+mov[ ]+%eax,%ecx
[ ]*[0-9a-f]+:[ ]+66 8b c8[ ]+mov[ ]+%eax,%ecx
+[ ]*[0-9a-f]+:[ ]+64 67 66 8d 08[ ]+lea[ ]+%fs:\(%eax\),%ecx
[ ]*[0-9a-f]+:[ ]+66 8d 04[ ]+lea[ ]+\(%si\),%eax
[ ]*[0-9a-f]+:[ ]+66 8d 34[ ]+lea[ ]+\(%si\),%esi
[ ]*[0-9a-f]+:[ ]+66 8d 04[ ]+lea[ ]+\(%si\),%eax
--- a/gas/testsuite/gas/i386/lea16.d
+++ b/gas/testsuite/gas/i386/lea16.d
@@ -10,6 +10,7 @@ Disassembly of section .text:
0+ <start>:
[ ]*[0-9a-f]+:[ ]+36 67 66 8d 04 08[ ]+lea[ ]+%ss:\(%eax,%ecx(,1)?\),%eax
[ ]*[0-9a-f]+:[ ]+36 67 66 8d 04 08[ ]+lea[ ]+%ss:\(%eax,%ecx(,1)?\),%eax
+[ ]*[0-9a-f]+:[ ]+26 67 66 8d 04 01[ ]+lea[ ]+%es:\(%ecx,%eax(,1)?\),%eax
[ ]*[0-9a-f]+:[ ]+67 66 8d 48 01[ ]+lea[ ]+0x1\(%eax\),%ecx
[ ]*[0-9a-f]+:[ ]+67 66 8d 88 00 00 00 00[ ]+lea[ ]+0x0\(%eax\),%ecx
[ ]*[0-9a-f]+:[ ]+67 66 8d 0c 25 00 00 00 00[ ]+addr32 lea[ ]+0x0,%ecx
@@ -21,6 +22,7 @@ Disassembly of section .text:
[ ]*[0-9a-f]+:[ ]+67 66 8d 08[ ]+lea[ ]+\(%eax\),%ecx
[ ]*[0-9a-f]+:[ ]+67 66 8d 08[ ]+lea[ ]+\(%eax\),%ecx
[ ]*[0-9a-f]+:[ ]+65 67 66 8d 08[ ]+lea[ ]+%gs:\(%eax\),%ecx
+[ ]*[0-9a-f]+:[ ]+64 67 66 8d 08[ ]+lea[ ]+%fs:\(%eax\),%ecx
[ ]*[0-9a-f]+:[ ]+66 8d 04[ ]+lea[ ]+\(%si\),%eax
[ ]*[0-9a-f]+:[ ]+66 8d 34[ ]+lea[ ]+\(%si\),%esi
[ ]*[0-9a-f]+:[ ]+66 8d 04[ ]+lea[ ]+\(%si\),%eax
^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH 2/3] x86-64: respect {nooptimize} when building VEX prefix
2023-01-27 11:34 [PATCH 0/3] x86: small adjustments to optimization logic Jan Beulich
2023-01-27 11:35 ` [PATCH 1/3] x86: respect {nooptimize} for LEA Jan Beulich
@ 2023-01-27 11:35 ` Jan Beulich
2023-01-27 11:36 ` [PATCH 3/3] x86: drop LOCK from XCHG when optimizing Jan Beulich
2 siblings, 0 replies; 4+ messages in thread
From: Jan Beulich @ 2023-01-27 11:35 UTC (permalink / raw)
To: Binutils; +Cc: H.J. Lu
Swapping operands for commutative insns occurs outside of
optimize_encoding() and hence needs explicit checking for a request to
avoid any optimizations.
--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -3624,7 +3624,8 @@ build_vex_prefix (const insn_template *t
&& i.reg_operands == i.operands - i.imm_operands
&& i.tm.opcode_modifier.vex
&& i.tm.opcode_modifier.commutative
- && (i.tm.opcode_modifier.sse2avx || optimize > 1)
+ && (i.tm.opcode_modifier.sse2avx
+ || (optimize > 1 && !i.no_optimize))
&& i.rex == REX_B
&& i.vex.register_specifier
&& !(i.vex.register_specifier->reg_flags & RegRex))
--- a/gas/testsuite/gas/i386/x86-64-optimize-4.d
+++ b/gas/testsuite/gas/i386/x86-64-optimize-4.d
@@ -9,6 +9,8 @@ Disassembly of section .text:
0+ <_start>:
+[a-f0-9]+: a9 7f 00 00 00 test \$0x7f,%eax
+ +[a-f0-9]+: c4 c1 61 db e4 vpand %xmm12,%xmm3,%xmm4
+ +[a-f0-9]+: c5 91 db e2 vpand %xmm2,%xmm13,%xmm4
+[a-f0-9]+: 62 f1 7d 28 6f d1 vmovdqa32 %ymm1,%ymm2
+[a-f0-9]+: 62 f1 fd 28 6f d1 vmovdqa64 %ymm1,%ymm2
+[a-f0-9]+: 62 f1 7f 08 6f d1 vmovdqu8 %xmm1,%xmm2
--- a/gas/testsuite/gas/i386/x86-64-optimize-4.s
+++ b/gas/testsuite/gas/i386/x86-64-optimize-4.s
@@ -5,6 +5,9 @@
_start:
{nooptimize} testl $0x7f, %eax
+ {nooptimize} vpand %xmm12, %xmm3, %xmm4
+ {nooptimize} vpand %xmm2, %xmm13, %xmm4
+
{nooptimize} vmovdqa32 %ymm1, %ymm2
{nooptimize} vmovdqa64 %ymm1, %ymm2
{nooptimize} vmovdqu8 %xmm1, %xmm2
^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH 3/3] x86: drop LOCK from XCHG when optimizing
2023-01-27 11:34 [PATCH 0/3] x86: small adjustments to optimization logic Jan Beulich
2023-01-27 11:35 ` [PATCH 1/3] x86: respect {nooptimize} for LEA Jan Beulich
2023-01-27 11:35 ` [PATCH 2/3] x86-64: respect {nooptimize} when building VEX prefix Jan Beulich
@ 2023-01-27 11:36 ` Jan Beulich
2 siblings, 0 replies; 4+ messages in thread
From: Jan Beulich @ 2023-01-27 11:36 UTC (permalink / raw)
To: Binutils; +Cc: H.J. Lu
Like with segment overrides on LEA, optimize away such a redundant
instruction prefix.
--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -5179,14 +5179,20 @@ md_assemble (char *line)
/* Check for lock without a lockable instruction. Destination operand
must be memory unless it is xchg (0x86). */
- if (i.prefix[LOCK_PREFIX]
- && (i.tm.opcode_modifier.prefixok < PrefixLock
+ if (i.prefix[LOCK_PREFIX])
+ {
+ if (i.tm.opcode_modifier.prefixok < PrefixLock
|| i.mem_operands == 0
|| (i.tm.base_opcode != 0x86
- && !(i.flags[i.operands - 1] & Operand_Mem))))
- {
- as_bad (_("expecting lockable instruction after `lock'"));
- return;
+ && !(i.flags[i.operands - 1] & Operand_Mem)))
+ {
+ as_bad (_("expecting lockable instruction after `lock'"));
+ return;
+ }
+
+ /* Zap the redundant prefix from XCHG when optimizing. */
+ if (i.tm.base_opcode == 0x86 && optimize && !i.no_optimize)
+ i.prefix[LOCK_PREFIX] = 0;
}
if (is_any_vex_encoding (&i.tm)
--- a/gas/testsuite/gas/i386/optimize-2.d
+++ b/gas/testsuite/gas/i386/optimize-2.d
@@ -22,6 +22,8 @@ Disassembly of section .text:
+[a-f0-9]+: 08 e4 or %ah,%ah
+[a-f0-9]+: 66 09 ed or %bp,%bp
+[a-f0-9]+: 09 f6 or %esi,%esi
+ +[a-f0-9]+: 87 0a xchg %ecx,\(%edx\)
+ +[a-f0-9]+: 87 11 xchg %edx,\(%ecx\)
+[a-f0-9]+: c5 f1 55 e9 vandnpd %xmm1,%xmm1,%xmm5
+[a-f0-9]+: c5 f9 6f d1 vmovdqa %xmm1,%xmm2
+[a-f0-9]+: c5 f9 6f d1 vmovdqa %xmm1,%xmm2
--- a/gas/testsuite/gas/i386/optimize-2.s
+++ b/gas/testsuite/gas/i386/optimize-2.s
@@ -20,6 +20,9 @@ _start:
or %bp, %bp
or %esi, %esi
+ lock xchg %ecx, (%edx)
+ lock xchg (%ecx), %edx
+
vandnpd %zmm1, %zmm1, %zmm5
vmovdqa32 %xmm1, %xmm2
--- a/gas/testsuite/gas/i386/optimize-2b.d
+++ b/gas/testsuite/gas/i386/optimize-2b.d
@@ -23,6 +23,8 @@ Disassembly of section .text:
+[a-f0-9]+: 84 e4 test %ah,%ah
+[a-f0-9]+: 66 85 ed test %bp,%bp
+[a-f0-9]+: 85 f6 test %esi,%esi
+ +[a-f0-9]+: 87 0a xchg %ecx,\(%edx\)
+ +[a-f0-9]+: 87 11 xchg %edx,\(%ecx\)
+[a-f0-9]+: c5 f1 55 e9 vandnpd %xmm1,%xmm1,%xmm5
+[a-f0-9]+: c5 f9 6f d1 vmovdqa %xmm1,%xmm2
+[a-f0-9]+: c5 f9 6f d1 vmovdqa %xmm1,%xmm2
--- a/gas/testsuite/gas/i386/optimize-3.d
+++ b/gas/testsuite/gas/i386/optimize-3.d
@@ -9,6 +9,8 @@ Disassembly of section .text:
0+ <_start>:
+[a-f0-9]+: a9 7f 00 00 00 test \$0x7f,%eax
+ +[a-f0-9]+: f0 87 0a lock xchg %ecx,\(%edx\)
+ +[a-f0-9]+: f0 87 11 lock xchg %edx,\(%ecx\)
+[a-f0-9]+: 62 f1 7d 28 6f d1 vmovdqa32 %ymm1,%ymm2
+[a-f0-9]+: 62 f1 fd 28 6f d1 vmovdqa64 %ymm1,%ymm2
+[a-f0-9]+: 62 f1 7f 08 6f d1 vmovdqu8 %xmm1,%xmm2
--- a/gas/testsuite/gas/i386/optimize-3.s
+++ b/gas/testsuite/gas/i386/optimize-3.s
@@ -5,6 +5,9 @@
_start:
{nooptimize} testl $0x7f, %eax
+ {nooptimize} lock xchg %ecx, (%edx)
+ {nooptimize} lock xchg (%ecx), %edx
+
{nooptimize} vmovdqa32 %ymm1, %ymm2
{nooptimize} vmovdqa64 %ymm1, %ymm2
{nooptimize} vmovdqu8 %xmm1, %xmm2
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2023-01-27 11:36 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-01-27 11:34 [PATCH 0/3] x86: small adjustments to optimization logic Jan Beulich
2023-01-27 11:35 ` [PATCH 1/3] x86: respect {nooptimize} for LEA Jan Beulich
2023-01-27 11:35 ` [PATCH 2/3] x86-64: respect {nooptimize} when building VEX prefix Jan Beulich
2023-01-27 11:36 ` [PATCH 3/3] x86: drop LOCK from XCHG when optimizing Jan Beulich
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).