public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r12-7092] [nvptx] Fix .local atomic regressions
@ 2022-02-08  9:01 Tom de Vries
  0 siblings, 0 replies; only message in thread
From: Tom de Vries @ 2022-02-08  9:01 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:04b54cc486cc6fcc40380445e500eaf46d7901dc

commit r12-7092-g04b54cc486cc6fcc40380445e500eaf46d7901dc
Author: Tom de Vries <tdevries@suse.de>
Date:   Thu Feb 3 14:00:02 2022 +0100

    [nvptx] Fix .local atomic regressions
    
    In PR target/104364, two problems were reported:
    - in muniform-simt mode, an atom.cas insn is no longer executed in the
      "master lane" only.
    - in msoft-stack mode, an __atomic_compare_exchange_n on stack memory is
      translated assuming it accesses local memory, while that's not the case.
    
    Fix these by:
    - ensuring that all insns with atomic attribute are also predicable, such
      that the validate_change in nvptx_reorg_uniform_simt will succeed, and
      asserting that it does, and
    - guarding the local atomics implementation with a new function
      nvptx_mem_local_p that correctly handles msoft-stack.
    
    Tested on x86_64 with nvptx accelerator.
    
    gcc/ChangeLog:
    
    2022-02-04  Tom de Vries  <tdevries@suse.de>
    
            PR target/104364
            * config/nvptx/nvptx-protos.h (nvptx_mem_local_p): Declare.
            * config/nvptx/nvptx.cc (nvptx_reorg_uniform_simt): Assert that
            change is validated.
            (nvptx_mem_local_p): New function.
            * config/nvptx/nvptx.md: Use nvptx_mem_local_p.
            (define_c_enum "unspecv"): Add UNSPECV_CAS_LOCAL.
            (define_insn "atomic_compare_and_swap<mode>_1_local"): New
            non-atomic, non-predicable define_insn, factored out of ...
            (define_insn "atomic_compare_and_swap<mode>_1"): ... here.
            Make predicable again.
            (define_expand "atomic_compare_and_swap<mode>"): Use
            atomic_compare_and_swap<mode>_1_local.
    
    gcc/testsuite/ChangeLog:
    
    2022-02-04  Tom de Vries  <tdevries@suse.de>
    
            PR target/104364
            * gcc.target/nvptx/softstack-2.c: New test.
            * gcc.target/nvptx/uniform-simt-1.c: New test.

Diff:
---
 gcc/config/nvptx/nvptx-protos.h                 |  1 +
 gcc/config/nvptx/nvptx.cc                       | 25 +++++++++-
 gcc/config/nvptx/nvptx.md                       | 63 +++++++++++++------------
 gcc/testsuite/gcc.target/nvptx/softstack-2.c    | 11 +++++
 gcc/testsuite/gcc.target/nvptx/uniform-simt-1.c | 18 +++++++
 5 files changed, 87 insertions(+), 31 deletions(-)

diff --git a/gcc/config/nvptx/nvptx-protos.h b/gcc/config/nvptx/nvptx-protos.h
index 3d6ad148cb4..a846e341917 100644
--- a/gcc/config/nvptx/nvptx-protos.h
+++ b/gcc/config/nvptx/nvptx-protos.h
@@ -59,5 +59,6 @@ extern const char *nvptx_output_simt_enter (rtx, rtx, rtx);
 extern const char *nvptx_output_simt_exit (rtx);
 extern const char *nvptx_output_red_partition (rtx, rtx);
 extern const char *nvptx_output_atomic_insn (const char *, rtx *, int, int);
+extern bool nvptx_mem_local_p (rtx);
 #endif
 #endif
diff --git a/gcc/config/nvptx/nvptx.cc b/gcc/config/nvptx/nvptx.cc
index b3bb97c3c14..2a694926b7a 100644
--- a/gcc/config/nvptx/nvptx.cc
+++ b/gcc/config/nvptx/nvptx.cc
@@ -3150,7 +3150,8 @@ nvptx_reorg_uniform_simt ()
       rtx pred = nvptx_get_unisimt_predicate ();
       pred = gen_rtx_NE (BImode, pred, const0_rtx);
       pat = gen_rtx_COND_EXEC (VOIDmode, pred, pat);
-      validate_change (insn, &PATTERN (insn), pat, false);
+      bool changed_p = validate_change (insn, &PATTERN (insn), pat, false);
+      gcc_assert (changed_p);
     }
 }
 
@@ -6894,6 +6895,28 @@ nvptx_libc_has_function (enum function_class fn_class, tree type)
   return default_libc_has_function (fn_class, type);
 }
 
+bool
+nvptx_mem_local_p (rtx mem)
+{
+  gcc_assert (GET_CODE (mem) == MEM);
+
+  struct address_info info;
+  decompose_mem_address (&info, mem);
+
+  if (info.base != NULL && REG_P (*info.base)
+      && REGNO_PTR_FRAME_P (REGNO (*info.base)))
+    {
+      if (TARGET_SOFT_STACK)
+	{
+	  /* Frame-related doesn't mean local.  */
+	}
+      else
+	return true;
+    }
+
+  return false;
+}
+
 #undef TARGET_OPTION_OVERRIDE
 #define TARGET_OPTION_OVERRIDE nvptx_option_override
 
diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md
index 92768dd9e95..d64dbfd8b33 100644
--- a/gcc/config/nvptx/nvptx.md
+++ b/gcc/config/nvptx/nvptx.md
@@ -54,6 +54,7 @@
 (define_c_enum "unspecv" [
    UNSPECV_LOCK
    UNSPECV_CAS
+   UNSPECV_CAS_LOCAL
    UNSPECV_XCHG
    UNSPECV_BARSYNC
    UNSPECV_WARPSYNC
@@ -1771,8 +1772,14 @@
    (match_operand:SI 7 "const_int_operand")]		;; failure model
   ""
 {
-  emit_insn (gen_atomic_compare_and_swap<mode>_1
-    (operands[1], operands[2], operands[3], operands[4], operands[6]));
+  if (nvptx_mem_local_p (operands[2]))
+    emit_insn (gen_atomic_compare_and_swap<mode>_1_local
+		(operands[1], operands[2], operands[3], operands[4],
+		 operands[6]));
+  else
+    emit_insn (gen_atomic_compare_and_swap<mode>_1
+		(operands[1], operands[2], operands[3], operands[4],
+		 operands[6]));
 
   rtx cond = gen_reg_rtx (BImode);
   emit_move_insn (cond, gen_rtx_EQ (BImode, operands[1], operands[3]));
@@ -1780,23 +1787,18 @@
   DONE;
 })
 
-(define_insn "atomic_compare_and_swap<mode>_1"
+(define_insn "atomic_compare_and_swap<mode>_1_local"
   [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
 	(unspec_volatile:SDIM
 	  [(match_operand:SDIM 1 "memory_operand" "+m")
 	   (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri")
 	   (match_operand:SDIM 3 "nvptx_nonmemory_operand" "Ri")
 	   (match_operand:SI 4 "const_int_operand")]
-	  UNSPECV_CAS))
+	  UNSPECV_CAS_LOCAL))
    (set (match_dup 1)
-	(unspec_volatile:SDIM [(const_int 0)] UNSPECV_CAS))]
+	(unspec_volatile:SDIM [(const_int 0)] UNSPECV_CAS_LOCAL))]
   ""
   {
-    struct address_info info;
-    decompose_mem_address (&info, operands[1]);
-    if (info.base != NULL && REG_P (*info.base)
-	&& REGNO_PTR_FRAME_P (REGNO (*info.base)))
-      {
 	output_asm_insn ("{", NULL);
 	output_asm_insn ("\\t"	      ".reg.pred"  "\\t" "%%eq_p;", NULL);
 	output_asm_insn ("\\t"	      ".reg%t0"	   "\\t" "%%val;", operands);
@@ -1807,13 +1809,26 @@
 	output_asm_insn ("\\t"	      "mov%t0"	   "\\t" "%0,%%val;", operands);
 	output_asm_insn ("}", NULL);
 	return "";
-      }
+  }
+  [(set_attr "predicable" "false")])
+
+(define_insn "atomic_compare_and_swap<mode>_1"
+  [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
+	(unspec_volatile:SDIM
+	  [(match_operand:SDIM 1 "memory_operand" "+m")
+	   (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri")
+	   (match_operand:SDIM 3 "nvptx_nonmemory_operand" "Ri")
+	   (match_operand:SI 4 "const_int_operand")]
+	  UNSPECV_CAS))
+   (set (match_dup 1)
+	(unspec_volatile:SDIM [(const_int 0)] UNSPECV_CAS))]
+  ""
+  {
     const char *t
-      = "\\tatom%A1.cas.b%T0\\t%0, %1, %2, %3;";
+      = "%.\\tatom%A1.cas.b%T0\\t%0, %1, %2, %3;";
     return nvptx_output_atomic_insn (t, operands, 1, 4);
   }
-  [(set_attr "atomic" "true")
-   (set_attr "predicable" "false")])
+  [(set_attr "atomic" "true")])
 
 (define_insn "atomic_exchange<mode>"
   [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")	;; output
@@ -1825,10 +1840,7 @@
 	(match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri"))]	;; input
   ""
   {
-    struct address_info info;
-    decompose_mem_address (&info, operands[1]);
-    if (info.base != NULL && REG_P (*info.base)
-	&& REGNO_PTR_FRAME_P (REGNO (*info.base)))
+    if (nvptx_mem_local_p (operands[1]))
       {
 	output_asm_insn ("{", NULL);
 	output_asm_insn ("\\t"	 ".reg%t0"  "\\t" "%%val;", operands);
@@ -1855,10 +1867,7 @@
 	(match_dup 1))]
   ""
   {
-    struct address_info info;
-    decompose_mem_address (&info, operands[1]);
-    if (info.base != NULL && REG_P (*info.base)
-	&& REGNO_PTR_FRAME_P (REGNO (*info.base)))
+    if (nvptx_mem_local_p (operands[1]))
       {
 	output_asm_insn ("{", NULL);
 	output_asm_insn ("\\t"	 ".reg%t0"  "\\t" "%%val;", operands);
@@ -1888,10 +1897,7 @@
 	(match_dup 1))]
   ""
   {
-    struct address_info info;
-    decompose_mem_address (&info, operands[1]);
-    if (info.base != NULL && REG_P (*info.base)
-	&& REGNO_PTR_FRAME_P (REGNO (*info.base)))
+    if (nvptx_mem_local_p (operands[1]))
       {
 	output_asm_insn ("{", NULL);
 	output_asm_insn ("\\t"	 ".reg%t0"  "\\t" "%%val;", operands);
@@ -1924,10 +1930,7 @@
 	(match_dup 1))]
   "<MODE>mode == SImode || TARGET_SM35"
   {
-    struct address_info info;
-    decompose_mem_address (&info, operands[1]);
-    if (info.base != NULL && REG_P (*info.base)
-	&& REGNO_PTR_FRAME_P (REGNO (*info.base)))
+    if (nvptx_mem_local_p (operands[1]))
       {
 	output_asm_insn ("{", NULL);
 	output_asm_insn ("\\t"	 ".reg.b%T0"    "\\t" "%%val;", operands);
diff --git a/gcc/testsuite/gcc.target/nvptx/softstack-2.c b/gcc/testsuite/gcc.target/nvptx/softstack-2.c
new file mode 100644
index 00000000000..cccfda947d7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/nvptx/softstack-2.c
@@ -0,0 +1,11 @@
+/* { dg-options "-O2 -msoft-stack" } */
+
+int
+f (void)
+{
+  int a = 0;
+  return __sync_lock_test_and_set (&a, 1);
+}
+
+/* { dg-final { scan-assembler-times "atom.exch" 1 } } */
+
diff --git a/gcc/testsuite/gcc.target/nvptx/uniform-simt-1.c b/gcc/testsuite/gcc.target/nvptx/uniform-simt-1.c
new file mode 100644
index 00000000000..1bc0adae014
--- /dev/null
+++ b/gcc/testsuite/gcc.target/nvptx/uniform-simt-1.c
@@ -0,0 +1,18 @@
+/* { dg-options "-O2 -muniform-simt" } */
+
+enum memmodel
+{
+  MEMMODEL_RELAXED = 0,
+};
+
+int a = 0;
+
+int
+f (void)
+{
+  int expected = 1;
+  return __atomic_compare_exchange_n (&a, &expected, 0, 0, MEMMODEL_RELAXED,
+				      MEMMODEL_RELAXED);
+}
+
+/* { dg-final { scan-assembler-times "@%r\[0-9\]*\tatom.global.cas" 1 } } */


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2022-02-08  9:01 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-02-08  9:01 [gcc r12-7092] [nvptx] Fix .local atomic regressions Tom de Vries

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).