* [PATCH, i386]: Fix PR 58945, Improve atomic_compare_and_swap*_doubleword pattern
@ 2015-03-31 17:13 Uros Bizjak
0 siblings, 0 replies; only message in thread
From: Uros Bizjak @ 2015-03-31 17:13 UTC (permalink / raw)
To: gcc-patches
[-- Attachment #1: Type: text/plain, Size: 1226 bytes --]
Hello!
As shown in the PR, the attached patch substantial improves generated
code when cmpxchg}8,16}b insn is involved. Following testcase:
--cut here--
__int128_t i;
int main()
{
__atomic_store_16(&i, -1, 0);
if (i != -1)
__builtin_abort();
return 0;
}
--cut here--
compiles with -O2 -mcx16 to:
movq i(%rip), %rax
movq $-1, %rcx
movq i+8(%rip), %rdx
.L2:
movq %rcx, %rbx
lock cmpxchg16b i(%rip)
jne .L2
where without the patch, the compiler generated:
movq i(%rip), %rsi
movq $-1, %rcx
movq i+8(%rip), %rdi
.L2:
movq %rsi, %rax
movq %rdi, %rdx
movq %rcx, %rbx
lock cmpxchg16b i(%rip)
movq %rdx, %rdi
movq %rax, %rsi
jne .L2
2015-03-31 Uros Bizjak <ubizjak@gmail.com>
PR target/58945
* config/i386/sync.md (atomic_compare_and_swap<dwi>_doubleword):
Do not split operands 0 and operands 2 to halfmode.
(atomic_compare_and_swap<mode>): Update for
atomic_compare_and_swap<dwi>_doubleword changes.
Patch was bootstrapped and regression tested on x86_64-linux-gnu
{,-m32} and was committed to mainline.
Uros.
[-- Attachment #2: p.diff.txt --]
[-- Type: text/plain, Size: 2941 bytes --]
Index: config/i386/sync.md
===================================================================
--- config/i386/sync.md (revision 221786)
+++ config/i386/sync.md (working copy)
@@ -351,21 +351,12 @@
else
{
machine_mode hmode = <CASHMODE>mode;
- rtx lo_o, lo_e, lo_n, hi_o, hi_e, hi_n;
- lo_o = operands[1];
- lo_e = operands[3];
- lo_n = operands[4];
- hi_o = gen_highpart (hmode, lo_o);
- hi_e = gen_highpart (hmode, lo_e);
- hi_n = gen_highpart (hmode, lo_n);
- lo_o = gen_lowpart (hmode, lo_o);
- lo_e = gen_lowpart (hmode, lo_e);
- lo_n = gen_lowpart (hmode, lo_n);
-
emit_insn
(gen_atomic_compare_and_swap<mode>_doubleword
- (lo_o, hi_o, operands[2], lo_e, hi_e, lo_n, hi_n, operands[6]));
+ (operands[1], operands[2], operands[3],
+ gen_lowpart (hmode, operands[4]), gen_highpart (hmode, operands[4]),
+ operands[6]));
}
ix86_expand_setcc (operands[0], EQ, gen_rtx_REG (CCZmode, FLAGS_REG),
@@ -389,31 +380,26 @@
"lock{%;} %K4cmpxchg{<imodesuffix>}\t{%3, %1|%1, %3}")
;; For double-word compare and swap, we are obliged to play tricks with
-;; the input newval (op5:op6) because the Intel register numbering does
+;; the input newval (op3:op4) because the Intel register numbering does
;; not match the gcc register numbering, so the pair must be CX:BX.
-;; That said, in order to take advantage of possible lower-subreg opts,
-;; treat all of the integral operands in the same way.
(define_mode_attr doublemodesuffix [(SI "8") (DI "16")])
(define_insn "atomic_compare_and_swap<dwi>_doubleword"
- [(set (match_operand:DWIH 0 "register_operand" "=a")
- (unspec_volatile:DWIH
- [(match_operand:<DWI> 2 "memory_operand" "+m")
- (match_operand:DWIH 3 "register_operand" "0")
- (match_operand:DWIH 4 "register_operand" "1")
- (match_operand:DWIH 5 "register_operand" "b")
- (match_operand:DWIH 6 "register_operand" "c")
- (match_operand:SI 7 "const_int_operand")]
+ [(set (match_operand:<DWI> 0 "register_operand" "=A")
+ (unspec_volatile:<DWI>
+ [(match_operand:<DWI> 1 "memory_operand" "+m")
+ (match_operand:<DWI> 2 "register_operand" "0")
+ (match_operand:DWIH 3 "register_operand" "b")
+ (match_operand:DWIH 4 "register_operand" "c")
+ (match_operand:SI 5 "const_int_operand")]
UNSPECV_CMPXCHG))
- (set (match_operand:DWIH 1 "register_operand" "=d")
- (unspec_volatile:DWIH [(const_int 0)] UNSPECV_CMPXCHG))
- (set (match_dup 2)
+ (set (match_dup 1)
(unspec_volatile:<DWI> [(const_int 0)] UNSPECV_CMPXCHG))
(set (reg:CCZ FLAGS_REG)
(unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG))]
"TARGET_CMPXCHG<doublemodesuffix>B"
- "lock{%;} %K7cmpxchg<doublemodesuffix>b\t%2")
+ "lock{%;} %K5cmpxchg<doublemodesuffix>b\t%1")
;; For operand 2 nonmemory_operand predicate is used instead of
;; register_operand to allow combiner to better optimize atomic
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2015-03-31 17:13 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-03-31 17:13 [PATCH, i386]: Fix PR 58945, Improve atomic_compare_and_swap*_doubleword pattern Uros Bizjak
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).