public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] ARM: Block predication on atomics [PR111235]
@ 2023-09-07 14:06 Wilco Dijkstra
  2023-09-26 22:52 ` Ramana Radhakrishnan
  0 siblings, 1 reply; 7+ messages in thread
From: Wilco Dijkstra @ 2023-09-07 14:06 UTC (permalink / raw)
  To: Richard Earnshaw, Kyrylo Tkachov; +Cc: GCC Patches

The v7 memory ordering model allows reordering of conditional atomic instructions.
To avoid this, make all atomic patterns unconditional.  Expand atomic loads and
stores for all architectures so the memory access can be wrapped into an UNSPEC.

Passes regress/bootstrap, OK for commit?

gcc/ChangeLog/
        PR target/111235
	* config/arm/constraints.md: Remove Pf constraint.
        * onfig/arm/sync.md (arm_atomic_load<mode>): Add new pattern.
        (arm_atomic_load_acquire<mode>): Likewise.
        (arm_atomic_store<mode>): Likewise.
        (arm_atomic_store_release<mode>): Likewise.
        (atomic_load<mode>): Always expand atomic loads explicitly.
        (atomic_store<mode>): Always expand atomic stores explicitly.
        (arm_atomic_loaddi2_ldrd): Remove predication.
        (arm_load_exclusive<mode>): Likewise.
        (arm_load_acquire_exclusive<mode>): Likewise.
        (arm_load_exclusivesi): Likewise.
        (arm_load_acquire_exclusivesi: Likewise.
        (arm_load_exclusivedi): Likewise.
        (arm_load_acquire_exclusivedi): Likewise.
        (arm_store_exclusive<mode>): Likewise.
        (arm_store_release_exclusivedi): Likewise.
        (arm_store_release_exclusive<mode>): Likewise.
        * gcc/config/arm/unspecs.md: Add VUNSPEC_LDR and VUNSPEC_STR.

gcc/testsuite/ChangeLog/
        PR target/111235
	* gcc.target/arm/pr111235.c: Add new test.

---

diff --git a/gcc/config/arm/constraints.md b/gcc/config/arm/constraints.md
index 05a4ebbdd67601d7b92aa44a619d17634cc69f17..d7c4a1b0cd785f276862048005e6cfa57cdcb20d 100644
--- a/gcc/config/arm/constraints.md
+++ b/gcc/config/arm/constraints.md
@@ -36,7 +36,7 @@
 ;; in Thumb-1 state: Pa, Pb, Pc, Pd, Pe
 ;; in Thumb-2 state: Ha, Pj, PJ, Ps, Pt, Pu, Pv, Pw, Px, Py, Pz, Rd, Rf, Rb, Ra,
 ;;		     Rg, Ri
-;; in all states: Pf, Pg
+;; in all states: Pg
 
 ;; The following memory constraints have been used:
 ;; in ARM/Thumb-2 state: Uh, Ut, Uv, Uy, Un, Um, Us, Up, Uf, Ux, Ul
@@ -239,13 +239,6 @@ (define_constraint "Pe"
   (and (match_code "const_int")
        (match_test "TARGET_THUMB1 && ival >= 256 && ival <= 510")))
 
-(define_constraint "Pf"
-  "Memory models except relaxed, consume or release ones."
-  (and (match_code "const_int")
-       (match_test "!is_mm_relaxed (memmodel_from_int (ival))
-		    && !is_mm_consume (memmodel_from_int (ival))
-		    && !is_mm_release (memmodel_from_int (ival))")))
-
 (define_constraint "Pg"
   "@internal In Thumb-2 state a constant in range 1 to 32"
   (and (match_code "const_int")
diff --git a/gcc/config/arm/sync.md b/gcc/config/arm/sync.md
index 7626bf3c443285dc63b4c4367b11a879a99c93c6..2210810f67f37ce043b8fdc73b4f21b54c5b1912 100644
--- a/gcc/config/arm/sync.md
+++ b/gcc/config/arm/sync.md
@@ -62,68 +62,110 @@ (define_insn "*memory_barrier"
    (set_attr "conds" "unconditional")
    (set_attr "predicable" "no")])
 
-(define_insn "atomic_load<mode>"
-  [(set (match_operand:QHSI 0 "register_operand" "=r,r,l")
+(define_insn "arm_atomic_load<mode>"
+  [(set (match_operand:QHSI 0 "register_operand" "=r,l")
     (unspec_volatile:QHSI
-      [(match_operand:QHSI 1 "arm_sync_memory_operand" "Q,Q,Q")
-       (match_operand:SI 2 "const_int_operand" "n,Pf,n")]	;; model
+      [(match_operand:QHSI 1 "memory_operand" "m,m")]
+      VUNSPEC_LDR))]
+  ""
+  "ldr<sync_sfx>\t%0, %1"
+  [(set_attr "arch" "32,any")])
+
+(define_insn "arm_atomic_load_acquire<mode>"
+  [(set (match_operand:QHSI 0 "register_operand" "=r")
+    (unspec_volatile:QHSI
+      [(match_operand:QHSI 1 "arm_sync_memory_operand" "Q")]
       VUNSPEC_LDA))]
   "TARGET_HAVE_LDACQ"
-  {
-    if (aarch_mm_needs_acquire (operands[2]))
-      {
-	if (TARGET_THUMB1)
-	  return "lda<sync_sfx>\t%0, %1";
-	else
-	  return "lda<sync_sfx>%?\t%0, %1";
-      }
-    else
-      {
-	if (TARGET_THUMB1)
-	  return "ldr<sync_sfx>\t%0, %1";
-	else
-	  return "ldr<sync_sfx>%?\t%0, %1";
-      }
-  }
-  [(set_attr "arch" "32,v8mb,any")
-   (set_attr "predicable" "yes")])
+  "lda<sync_sfx>\t%0, %C1"
+)
 
-(define_insn "atomic_store<mode>"
-  [(set (match_operand:QHSI 0 "memory_operand" "=Q,Q,Q")
+(define_insn "arm_atomic_store<mode>"
+  [(set (match_operand:QHSI 0 "memory_operand" "=m,m")
+    (unspec_volatile:QHSI
+      [(match_operand:QHSI 1 "register_operand" "r,l")]
+      VUNSPEC_STR))]
+  ""
+  "str<sync_sfx>\t%1, %0";
+  [(set_attr "arch" "32,any")])
+
+(define_insn "arm_atomic_store_release<mode>"
+  [(set (match_operand:QHSI 0 "arm_sync_memory_operand" "=Q")
     (unspec_volatile:QHSI
-      [(match_operand:QHSI 1 "general_operand" "r,r,l")
-       (match_operand:SI 2 "const_int_operand" "n,Pf,n")]	;; model
+      [(match_operand:QHSI 1 "register_operand" "r")]
       VUNSPEC_STL))]
   "TARGET_HAVE_LDACQ"
-  {
-    if (aarch_mm_needs_release (operands[2]))
-      {
-	if (TARGET_THUMB1)
-	  return "stl<sync_sfx>\t%1, %0";
-	else
-	  return "stl<sync_sfx>%?\t%1, %0";
-      }
-    else
-      {
-	if (TARGET_THUMB1)
-	  return "str<sync_sfx>\t%1, %0";
-	else
-	  return "str<sync_sfx>%?\t%1, %0";
-      }
-  }
-  [(set_attr "arch" "32,v8mb,any")
-   (set_attr "predicable" "yes")])
+  "stl<sync_sfx>\t%1, %C0")
+
+
+(define_expand "atomic_load<mode>"
+  [(match_operand:QHSI 0 "register_operand")		;; val out
+   (match_operand:QHSI 1 "arm_sync_memory_operand")	;; memory
+   (match_operand:SI 2 "const_int_operand")]		;; model
+  ""
+{
+  memmodel model = memmodel_from_int (INTVAL (operands[2]));
+
+  if (TARGET_HAVE_LDACQ && !is_mm_relaxed (model))
+    {
+      emit_insn (gen_arm_atomic_load_acquire<mode> (operands[0], operands[1]));
+      DONE;
+    }
+
+  /* The seq_cst model needs a barrier before the load to block reordering with
+     earlier accesses.  */
+  if (is_mm_seq_cst (model))
+    expand_mem_thread_fence (model);
+
+  emit_insn (gen_arm_atomic_load<mode> (operands[0], operands[1]));
+
+  /* All non-relaxed models need a barrier after the load when load-acquire
+     instructions are not available.  */
+  if (!is_mm_relaxed (model))
+    expand_mem_thread_fence (model);
+
+  DONE;
+})
+
+(define_expand "atomic_store<mode>"
+  [(match_operand:QHSI 0 "arm_sync_memory_operand")	;; memory
+   (match_operand:QHSI 1 "register_operand")		;; store value
+   (match_operand:SI 2 "const_int_operand")]		;; model
+  ""
+{
+  memmodel model = memmodel_from_int (INTVAL (operands[2]));
+
+  if (TARGET_HAVE_LDACQ && !is_mm_relaxed (model))
+    {
+      emit_insn (gen_arm_atomic_store_release<mode> (operands[0], operands[1]));
+      DONE;
+    }
+
+  /* All non-relaxed models need a barrier after the load when load-acquire
+     instructions are not available.  */
+  if (!is_mm_relaxed (model))
+    expand_mem_thread_fence (model);
+
+  emit_insn (gen_arm_atomic_store<mode> (operands[0], operands[1]));
+
+  /* The seq_cst model needs a barrier after the store to block reordering with
+     later accesses.  */
+  if (is_mm_seq_cst (model))
+    expand_mem_thread_fence (model);
+
+  DONE;
+})
 
 ;; An LDRD instruction usable by the atomic_loaddi expander on LPAE targets
 
 (define_insn "arm_atomic_loaddi2_ldrd"
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(unspec_volatile:DI
-	  [(match_operand:DI 1 "arm_sync_memory_operand" "Q")]
+	  [(match_operand:DI 1 "memory_operand" "m")]
 	    VUNSPEC_LDRD_ATOMIC))]
   "ARM_DOUBLEWORD_ALIGN && TARGET_HAVE_LPAE"
-  "ldrd%?\t%0, %H0, %C1"
-  [(set_attr "predicable" "yes")])
+  "ldrd\t%0, %H0, %1"
+)
 
 ;; There are three ways to expand this depending on the architecture
 ;; features available.  As for the barriers, a load needs a barrier
@@ -152,6 +194,11 @@ (define_expand "atomic_loaddi"
       DONE;
     }
 
+  /* The seq_cst model needs a barrier before the load to block reordering with
+     earlier accesses.  */
+  if (is_mm_seq_cst (model))
+    expand_mem_thread_fence (model);
+
   /* On LPAE targets LDRD and STRD accesses to 64-bit aligned
      locations are 64-bit single-copy atomic.  We still need barriers in the
      appropriate places to implement the ordering constraints.  */
@@ -160,7 +207,6 @@ (define_expand "atomic_loaddi"
   else
     emit_insn (gen_arm_load_exclusivedi (operands[0], operands[1]));
 
-
   /* All non-relaxed models need a barrier after the load when load-acquire
      instructions are not available.  */
   if (!is_mm_relaxed (model))
@@ -446,54 +492,42 @@ (define_insn_and_split "atomic_nand_fetch<mode>"
   [(set_attr "arch" "32,v8mb")])
 
 (define_insn "arm_load_exclusive<mode>"
-  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
         (zero_extend:SI
 	  (unspec_volatile:NARROW
-	    [(match_operand:NARROW 1 "mem_noofs_operand" "Ua,Ua")]
+	    [(match_operand:NARROW 1 "mem_noofs_operand" "Ua")]
 	    VUNSPEC_LL)))]
   "TARGET_HAVE_LDREXBH"
-  "@
-   ldrex<sync_sfx>%?\t%0, %C1
-   ldrex<sync_sfx>\t%0, %C1"
-  [(set_attr "arch" "32,v8mb")
-   (set_attr "predicable" "yes")])
+  "ldrex<sync_sfx>\t%0, %C1"
+)
 
 (define_insn "arm_load_acquire_exclusive<mode>"
-  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
         (zero_extend:SI
 	  (unspec_volatile:NARROW
-	    [(match_operand:NARROW 1 "mem_noofs_operand" "Ua,Ua")]
+	    [(match_operand:NARROW 1 "mem_noofs_operand" "Ua")]
 	    VUNSPEC_LAX)))]
   "TARGET_HAVE_LDACQ"
-  "@
-   ldaex<sync_sfx>%?\\t%0, %C1
-   ldaex<sync_sfx>\\t%0, %C1"
-  [(set_attr "arch" "32,v8mb")
-   (set_attr "predicable" "yes")])
+  "ldaex<sync_sfx>\\t%0, %C1"
+)
 
 (define_insn "arm_load_exclusivesi"
-  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
 	(unspec_volatile:SI
-	  [(match_operand:SI 1 "mem_noofs_operand" "Ua,Ua")]
+	  [(match_operand:SI 1 "mem_noofs_operand" "Ua")]
 	  VUNSPEC_LL))]
   "TARGET_HAVE_LDREX"
-  "@
-   ldrex%?\t%0, %C1
-   ldrex\t%0, %C1"
-  [(set_attr "arch" "32,v8mb")
-   (set_attr "predicable" "yes")])
+  "ldrex\t%0, %C1"
+)
 
 (define_insn "arm_load_acquire_exclusivesi"
-  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
 	(unspec_volatile:SI
-	  [(match_operand:SI 1 "mem_noofs_operand" "Ua,Ua")]
+	  [(match_operand:SI 1 "mem_noofs_operand" "Ua")]
 	  VUNSPEC_LAX))]
   "TARGET_HAVE_LDACQ"
-  "@
-   ldaex%?\t%0, %C1
-   ldaex\t%0, %C1"
-  [(set_attr "arch" "32,v8mb")
-   (set_attr "predicable" "yes")])
+  "ldaex\t%0, %C1"
+)
 
 (define_insn "arm_load_exclusivedi"
   [(set (match_operand:DI 0 "s_register_operand" "=r")
@@ -501,8 +535,8 @@ (define_insn "arm_load_exclusivedi"
 	  [(match_operand:DI 1 "mem_noofs_operand" "Ua")]
 	  VUNSPEC_LL))]
   "TARGET_HAVE_LDREXD"
-  "ldrexd%?\t%0, %H0, %C1"
-  [(set_attr "predicable" "yes")])
+  "ldrexd\t%0, %H0, %C1"
+)
 
 (define_insn "arm_load_acquire_exclusivedi"
   [(set (match_operand:DI 0 "s_register_operand" "=r")
@@ -510,8 +544,8 @@ (define_insn "arm_load_acquire_exclusivedi"
 	  [(match_operand:DI 1 "mem_noofs_operand" "Ua")]
 	  VUNSPEC_LAX))]
   "TARGET_HAVE_LDACQEXD && ARM_DOUBLEWORD_ALIGN"
-  "ldaexd%?\t%0, %H0, %C1"
-  [(set_attr "predicable" "yes")])
+  "ldaexd\t%0, %H0, %C1"
+)
 
 (define_insn "arm_store_exclusive<mode>"
   [(set (match_operand:SI 0 "s_register_operand" "=&r")
@@ -530,14 +564,11 @@ (define_insn "arm_store_exclusive<mode>"
 	   Note that the 1st register always gets the
 	   lowest word in memory.  */
 	gcc_assert ((REGNO (operands[2]) & 1) == 0 || TARGET_THUMB2);
-	return "strexd%?\t%0, %2, %H2, %C1";
+	return "strexd\t%0, %2, %H2, %C1";
       }
-    if (TARGET_THUMB1)
-      return "strex<sync_sfx>\t%0, %2, %C1";
-    else
-      return "strex<sync_sfx>%?\t%0, %2, %C1";
+    return "strex<sync_sfx>\t%0, %2, %C1";
   }
-  [(set_attr "predicable" "yes")])
+)
 
 (define_insn "arm_store_release_exclusivedi"
   [(set (match_operand:SI 0 "s_register_operand" "=&r")
@@ -550,20 +581,16 @@ (define_insn "arm_store_release_exclusivedi"
   {
     /* See comment in arm_store_exclusive<mode> above.  */
     gcc_assert ((REGNO (operands[2]) & 1) == 0 || TARGET_THUMB2);
-    return "stlexd%?\t%0, %2, %H2, %C1";
+    return "stlexd\t%0, %2, %H2, %C1";
   }
-  [(set_attr "predicable" "yes")])
+)
 
 (define_insn "arm_store_release_exclusive<mode>"
-  [(set (match_operand:SI 0 "s_register_operand" "=&r,&r")
+  [(set (match_operand:SI 0 "s_register_operand" "=&r")
 	(unspec_volatile:SI [(const_int 0)] VUNSPEC_SLX))
-   (set (match_operand:QHSI 1 "mem_noofs_operand" "=Ua,Ua")
+   (set (match_operand:QHSI 1 "mem_noofs_operand" "=Ua")
 	(unspec_volatile:QHSI
-	  [(match_operand:QHSI 2 "s_register_operand" "r,r")]
+	  [(match_operand:QHSI 2 "s_register_operand" "r")]
 	  VUNSPEC_SLX))]
   "TARGET_HAVE_LDACQ"
-  "@
-   stlex<sync_sfx>%?\t%0, %2, %C1
-   stlex<sync_sfx>\t%0, %2, %C1"
-  [(set_attr "arch" "32,v8mb")
-   (set_attr "predicable" "yes")])
+  "stlex<sync_sfx>\t%0, %2, %C1")
diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md
index dccda283573208bd5db4f04c10ae9dbbfdda49dd..ba86ce0992f2b14a5e65ac09784b3fb3539de035 100644
--- a/gcc/config/arm/unspecs.md
+++ b/gcc/config/arm/unspecs.md
@@ -221,7 +221,9 @@ (define_c_enum "unspecv" [
   VUNSPEC_SC		; Represent a store-register-exclusive.
   VUNSPEC_LAX		; Represent a load-register-acquire-exclusive.
   VUNSPEC_SLX		; Represent a store-register-release-exclusive.
-  VUNSPEC_LDA		; Represent a store-register-acquire.
+  VUNSPEC_LDR		; Represent a load-register-relaxed.
+  VUNSPEC_LDA		; Represent a load-register-acquire.
+  VUNSPEC_STR		; Represent a store-register-relaxed.
   VUNSPEC_STL		; Represent a store-register-release.
   VUNSPEC_GET_FPSCR	; Represent fetch of FPSCR content.
   VUNSPEC_SET_FPSCR	; Represent assign of FPSCR content.
diff --git a/gcc/testsuite/gcc.target/arm/pr111235.c b/gcc/testsuite/gcc.target/arm/pr111235.c
new file mode 100644
index 0000000000000000000000000000000000000000..923b231afa888d326bcdc0ecabbcf8ba223d365a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/pr111235.c
@@ -0,0 +1,35 @@
+/* { dg-do compile } */
+/* { dg-options "-std=c11 -O" } */
+/* { dg-require-effective-target arm_arch_v7a_ok } */
+/* { dg-add-options arm_arch_v7a } */
+
+int t0 (int *p, int x)
+{
+  if (x > 100)
+    x = atomic_load_explicit (p, memory_order_relaxed);
+  return x + 1;
+}
+
+long long t1 (long long *p, int x)
+{
+  if (x > 100)
+    x = atomic_load_explicit (p, memory_order_relaxed);
+  return x + 1;
+}
+
+void t2 (int *p, int x)
+{
+  if (x > 100)
+    atomic_store_explicit (p, x, memory_order_relaxed);
+}
+
+void t3 (long long *p, long long x)
+{
+  if (x > 100)
+    atomic_store_explicit (p, x, memory_order_relaxed);
+}
+
+
+/* { dg-final { scan-assembler-times "ldrexd\tr\[0-9\]+, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "dmb\tish" 1 } } */
+

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2023-10-20 16:47 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-09-07 14:06 [PATCH] ARM: Block predication on atomics [PR111235] Wilco Dijkstra
2023-09-26 22:52 ` Ramana Radhakrishnan
2023-09-27 19:40   ` [PATCH v2] " Wilco Dijkstra
2023-09-30 20:36     ` Ramana Radhakrishnan
2023-10-02 17:12       ` Wilco Dijkstra
2023-10-20 16:47         ` Richard Earnshaw
2023-10-03  8:23       ` Maxim Kuvyrkov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).