public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/vendors/redhat/heads/gcc-8-branch)] i386: Fix atomic FP peepholes [PR100182]
@ 2021-05-14 14:55 Jakub Jelinek
  0 siblings, 0 replies; only message in thread
From: Jakub Jelinek @ 2021-05-14 14:55 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:33cec5d35b0c96d4711b4549ee5f78487856038a

commit 33cec5d35b0c96d4711b4549ee5f78487856038a
Author: Uros Bizjak <ubizjak@gmail.com>
Date:   Fri Apr 23 17:29:29 2021 +0200

    i386: Fix atomic FP peepholes [PR100182]
    
    64bit loads to/stores from x87 and SSE registers are atomic also on 32-bit
    targets, so there is no need for additional atomic moves to a temporary
    register.
    
    Introduced load peephole2 patterns assume that there won't be any additional
    loads from the load location outside the peepholed sequence and wrongly
    removed the source location initialization.
    
    OTOH, introduced store peephole2 patterns assume there won't be any additional
    loads from the stored location outside the peepholed sequence and wrongly
    removed the destination location initialization.  Note that we can't use plain
    x87 FST instruction to initialize destination location because FST converts
    the value to the double-precision format, changing bits during move.
    
    The patch restores removed initializations in load and store patterns.
    Additionally, plain x87 FST in store peephole2 patterns is prevented by
    limiting the store operand source to SSE registers.
    
    2021-04-23  Uroš Bizjak  <ubizjak@gmail.com>
    
    gcc/
            PR target/100182
            * config/i386/sync.md (FILD_ATOMIC/FIST_ATOMIC FP load peephole2):
            Copy operand 3 to operand 4.  Use sse_reg_operand
            as operand 3 predicate.
            (FILD_ATOMIC/FIST_ATOMIC FP load peephole2 with mem blockage): Ditto.
            (LDX_ATOMIC/STX_ATOMIC FP load peephole2): Ditto.
            (LDX_ATOMIC/LDX_ATOMIC FP load peephole2 with mem blockage): Ditto.
            (FILD_ATOMIC/FIST_ATOMIC FP store peephole2):
            Copy operand 1 to operand 0.
            (FILD_ATOMIC/FIST_ATOMIC FP store peephole2 with mem blockage): Ditto.
            (LDX_ATOMIC/STX_ATOMIC FP store peephole2): Ditto.
            (LDX_ATOMIC/LDX_ATOMIC FP store peephole2 with mem blockage): Ditto.
    
    gcc/testsuite/
    
            PR target/100182
            * gcc.target/i386/pr100182.c: New test.
            * gcc.target/i386/pr71245-1.c (dg-final): Xfail scan-assembler-not.
            * gcc.target/i386/pr71245-2.c (dg-final): Ditto.
    
    (cherry picked from commit d2324a5ab3ff097864ae6828cb1db4dd013c70d1)

Diff:
---
 gcc/config/i386/sync.md                   | 24 ++++++++++++++++--------
 gcc/testsuite/gcc.target/i386/pr100182.c  | 30 ++++++++++++++++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr71245-1.c |  2 +-
 gcc/testsuite/gcc.target/i386/pr71245-2.c |  2 +-
 4 files changed, 48 insertions(+), 10 deletions(-)

diff --git a/gcc/config/i386/sync.md b/gcc/config/i386/sync.md
index 618397c2cd2..3e9a5f06b23 100644
--- a/gcc/config/i386/sync.md
+++ b/gcc/config/i386/sync.md
@@ -219,12 +219,13 @@
    (set (match_operand:DI 2 "memory_operand")
 	(unspec:DI [(match_dup 0)]
 		   UNSPEC_FIST_ATOMIC))
-   (set (match_operand:DF 3 "any_fp_register_operand")
+   (set (match_operand:DF 3 "sse_reg_operand")
 	(match_operand:DF 4 "memory_operand"))]
   "!TARGET_64BIT
    && peep2_reg_dead_p (2, operands[0])
    && rtx_equal_p (XEXP (operands[4], 0), XEXP (operands[2], 0))"
-  [(set (match_dup 3) (match_dup 5))]
+  [(set (match_dup 3) (match_dup 5))
+   (set (match_dup 4) (match_dup 3))]
   "operands[5] = gen_lowpart (DFmode, operands[1]);")
 
 (define_peephole2
@@ -236,7 +237,7 @@
 		   UNSPEC_FIST_ATOMIC))
    (set (mem:BLK (scratch:SI))
 	(unspec:BLK [(mem:BLK (scratch:SI))] UNSPEC_MEMORY_BLOCKAGE))
-   (set (match_operand:DF 3 "any_fp_register_operand")
+   (set (match_operand:DF 3 "sse_reg_operand")
 	(match_operand:DF 4 "memory_operand"))]
   "!TARGET_64BIT
    && peep2_reg_dead_p (2, operands[0])
@@ -244,6 +245,7 @@
   [(const_int 0)]
 {
   emit_move_insn (operands[3], gen_lowpart (DFmode, operands[1]));
+  emit_move_insn (operands[4], operands[3]);
   emit_insn (gen_memory_blockage ());
   DONE;
 })
@@ -255,12 +257,13 @@
    (set (match_operand:DI 2 "memory_operand")
 	(unspec:DI [(match_dup 0)]
 		   UNSPEC_STX_ATOMIC))
-   (set (match_operand:DF 3 "any_fp_register_operand")
+   (set (match_operand:DF 3 "sse_reg_operand")
 	(match_operand:DF 4 "memory_operand"))]
   "!TARGET_64BIT
    && peep2_reg_dead_p (2, operands[0])
    && rtx_equal_p (XEXP (operands[4], 0), XEXP (operands[2], 0))"
-  [(set (match_dup 3) (match_dup 5))]
+  [(set (match_dup 3) (match_dup 5))
+   (set (match_dup 4) (match_dup 3))]
   "operands[5] = gen_lowpart (DFmode, operands[1]);")
 
 (define_peephole2
@@ -272,7 +275,7 @@
 		   UNSPEC_STX_ATOMIC))
    (set (mem:BLK (scratch:SI))
 	(unspec:BLK [(mem:BLK (scratch:SI))] UNSPEC_MEMORY_BLOCKAGE))
-   (set (match_operand:DF 3 "any_fp_register_operand")
+   (set (match_operand:DF 3 "sse_reg_operand")
 	(match_operand:DF 4 "memory_operand"))]
   "!TARGET_64BIT
    && peep2_reg_dead_p (2, operands[0])
@@ -280,6 +283,7 @@
   [(const_int 0)]
 {
   emit_move_insn (operands[3], gen_lowpart (DFmode, operands[1]));
+  emit_move_insn (operands[4], operands[3]);
   emit_insn (gen_memory_blockage ());
   DONE;
 })
@@ -383,7 +387,8 @@
   "!TARGET_64BIT
    && peep2_reg_dead_p (3, operands[2])
    && rtx_equal_p (XEXP (operands[0], 0), XEXP (operands[3], 0))"
-  [(set (match_dup 5) (match_dup 1))]
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 5) (match_dup 1))]
   "operands[5] = gen_lowpart (DFmode, operands[4]);")
 
 (define_peephole2
@@ -402,6 +407,7 @@
    && rtx_equal_p (XEXP (operands[0], 0), XEXP (operands[3], 0))"
   [(const_int 0)]
 {
+  emit_move_insn (operands[0], operands[1]);
   emit_insn (gen_memory_blockage ());
   emit_move_insn (gen_lowpart (DFmode, operands[4]), operands[1]);
   DONE;
@@ -419,7 +425,8 @@
   "!TARGET_64BIT
    && peep2_reg_dead_p (3, operands[2])
    && rtx_equal_p (XEXP (operands[0], 0), XEXP (operands[3], 0))"
-  [(set (match_dup 5) (match_dup 1))]
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 5) (match_dup 1))]
   "operands[5] = gen_lowpart (DFmode, operands[4]);")
 
 (define_peephole2
@@ -438,6 +445,7 @@
    && rtx_equal_p (XEXP (operands[0], 0), XEXP (operands[3], 0))"
   [(const_int 0)]
 {
+  emit_move_insn (operands[0], operands[1]);
   emit_insn (gen_memory_blockage ());
   emit_move_insn (gen_lowpart (DFmode, operands[4]), operands[1]);
   DONE;
diff --git a/gcc/testsuite/gcc.target/i386/pr100182.c b/gcc/testsuite/gcc.target/i386/pr100182.c
new file mode 100644
index 00000000000..2f92a04db73
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr100182.c
@@ -0,0 +1,30 @@
+/* { dg-do run { target ia32 } } */
+/* { dg-options "-O2 -march=i686" } */
+
+struct S { double _M_fp; };
+union U { double d; unsigned long long int l; };
+
+void
+__attribute__((noipa))
+foo (void)
+{
+  struct S a0, a1;
+  union U u;
+  double d0, d1;
+  a0._M_fp = 0.0;
+  a1._M_fp = 1.0;
+  __atomic_store_8 (&a0._M_fp, __atomic_load_8 (&a1._M_fp, __ATOMIC_SEQ_CST), __ATOMIC_SEQ_CST);
+  u.l = __atomic_load_8 (&a0._M_fp, __ATOMIC_SEQ_CST);
+  d0 = u.d;
+  u.l = __atomic_load_8 (&a1._M_fp, __ATOMIC_SEQ_CST);
+  d1 = u.d;
+  if (d0 != d1)
+    __builtin_abort ();
+}
+
+int
+main ()
+{
+  foo ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr71245-1.c b/gcc/testsuite/gcc.target/i386/pr71245-1.c
index be0b7602a8c..02c0dcb80b6 100644
--- a/gcc/testsuite/gcc.target/i386/pr71245-1.c
+++ b/gcc/testsuite/gcc.target/i386/pr71245-1.c
@@ -19,4 +19,4 @@ void foo_d (void)
   __atomic_store_n (&d.ll, tmp.ll, __ATOMIC_SEQ_CST);
 }
 
-/* { dg-final { scan-assembler-not "(fistp|fild)" } } */
+/* { dg-final { scan-assembler-not "(fistp|fild)" { xfail *-*-* } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr71245-2.c b/gcc/testsuite/gcc.target/i386/pr71245-2.c
index 65c139849d5..bf37a8cbb71 100644
--- a/gcc/testsuite/gcc.target/i386/pr71245-2.c
+++ b/gcc/testsuite/gcc.target/i386/pr71245-2.c
@@ -19,4 +19,4 @@ void foo_d (void)
   __atomic_store_n (&d.ll, tmp.ll, __ATOMIC_SEQ_CST);
 }
 
-/* { dg-final { scan-assembler-not "movlps" } } */
+/* { dg-final { scan-assembler-not "movlps" { xfail *-*-* } } } */


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2021-05-14 14:55 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-05-14 14:55 [gcc(refs/vendors/redhat/heads/gcc-8-branch)] i386: Fix atomic FP peepholes [PR100182] Jakub Jelinek

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).