public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc/devel/autopar_devel] amdgcn: fix vcc clobber in vector load/store
@ 2020-08-22 21:16 Giuliano Belinassi
  0 siblings, 0 replies; only message in thread
From: Giuliano Belinassi @ 2020-08-22 21:16 UTC (permalink / raw)
  To: gcc-cvs

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset="us-ascii", Size: 16513 bytes --]

https://gcc.gnu.org/g:552209a0adae51f88bc02ffecdb66a6385d35dd6

commit 552209a0adae51f88bc02ffecdb66a6385d35dd6
Author: Andrew Stubbs <ams@codesourcery.com>
Date:   Wed May 13 16:05:54 2020 +0100

    amdgcn: fix vcc clobber in vector load/store
    
    This switches the code that expands scalar addresses to vectors of addresses
    from using VCC to using CC_SAVE_REG, for the lo-part to hi-part carry values.
    These were fine in code expanded in earlier passes, but addresses expanded
    late, such as for stack spills or reloads, could clobber live VCC values,
    causing execution failures.
    
    This is the first target-specific testcase for GCN, so the new .exp file is
    included.
    
    2020-05-14  Andrew Stubbs  <ams@codesourcery.com>
    
            gcc/
            * config/gcn/gcn-valu.md (add<mode>3_zext_dup): Change to a
            define_expand, and rename the original to ...
            (add<mode>3_vcc_zext_dup): ... this, and add a custom VCC operand.
            (add<mode>3_zext_dup_exec): Likewise, with ...
            (add<mode>3_vcc_zext_dup_exec): ... this.
            (add<mode>3_zext_dup2): Likewise, with ...
            (add<mode>3_zext_dup_exec): ... this.
            (add<mode>3_zext_dup2_exec): Likewise, with ...
            (add<mode>3_zext_dup2): ... this.
            * config/gcn/gcn.c (gcn_expand_scalar_to_vector_address): Switch
            addv64di3_zext* calls to use addv64di3_vcc_zext*.
    
            gcc/testsuite/
            * testsuite/gcc.target/gcn/gcn.exp: New file.
            * testsuite/gcc.target/gcn/vcc-clobber.c: New file.

Diff:
---
 gcc/ChangeLog                              |  14 +++
 gcc/config/gcn/gcn-valu.md                 | 161 +++++++++++++++++++++--------
 gcc/config/gcn/gcn.c                       |  13 +--
 gcc/testsuite/ChangeLog                    |   5 +
 gcc/testsuite/gcc.target/gcn/gcn.exp       |  42 ++++++++
 gcc/testsuite/gcc.target/gcn/vcc-clobber.c |  33 ++++++
 6 files changed, 217 insertions(+), 51 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 2d6b6c07a3b..27b4b04f9b0 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,17 @@
+2020-05-14  Andrew Stubbs  <ams@codesourcery.com>
+
+	* config/gcn/gcn-valu.md (add<mode>3_zext_dup): Change to a
+	define_expand, and rename the original to ...
+	(add<mode>3_vcc_zext_dup): ... this, and add a custom VCC operand.
+	(add<mode>3_zext_dup_exec): Likewise, with ...
+	(add<mode>3_vcc_zext_dup_exec): ... this.
+	(add<mode>3_zext_dup2): Likewise, with ...
+	(add<mode>3_zext_dup_exec): ... this.
+	(add<mode>3_zext_dup2_exec): Likewise, with ...
+	(add<mode>3_zext_dup2): ... this.
+	* config/gcn/gcn.c (gcn_expand_scalar_to_vector_address): Switch
+	addv64di3_zext* calls to use addv64di3_vcc_zext*.
+
 2020-05-14  Uroš Bizjak  <ubizjak@gmail.com>
 
 	PR target/95046
diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md
index d3badb4059c..a43d6b6c6f3 100644
--- a/gcc/config/gcn/gcn-valu.md
+++ b/gcc/config/gcn/gcn-valu.md
@@ -1379,135 +1379,206 @@
   [(set_attr "type" "vmult")
    (set_attr "length" "8")])
 
-(define_insn_and_split "add<mode>3_zext_dup"
-  [(set (match_operand:V_DI 0 "register_operand"    "= v,  v")
+(define_insn_and_split "add<mode>3_vcc_zext_dup"
+  [(set (match_operand:V_DI 0 "register_operand"    "=   v,   v")
 	(plus:V_DI
 	  (zero_extend:V_DI
 	    (vec_duplicate:<VnSI>
-	      (match_operand:SI 1 "gcn_alu_operand" "BSv,ASv")))
-	  (match_operand:V_DI 2 "gcn_alu_operand"   "vDA,vDb")))
-   (clobber (reg:DI VCC_REG))]
+	      (match_operand:SI 1 "gcn_alu_operand" "  BSv, ASv")))
+	  (match_operand:V_DI 2 "gcn_alu_operand"   "  vDA, vDb")))
+   (set (match_operand:DI 3 "register_operand"	    "=SgcV,SgcV")
+	(ltu:DI (plus:V_DI 
+		  (zero_extend:V_DI (vec_duplicate:<VnSI> (match_dup 1)))
+		  (match_dup 2))
+		(match_dup 1)))]
   ""
   "#"
   "gcn_can_split_p  (<MODE>mode, operands[0])
    && gcn_can_split_p (<MODE>mode, operands[2])"
   [(const_int 0)]
   {
-    rtx vcc = gen_rtx_REG (DImode, VCC_REG);
     emit_insn (gen_add<vnsi>3_vcc_dup
 		(gcn_operand_part (<MODE>mode, operands[0], 0),
 		 gcn_operand_part (DImode, operands[1], 0),
 		 gcn_operand_part (<MODE>mode, operands[2], 0),
-		 vcc));
+		 operands[3]));
     emit_insn (gen_addc<vnsi>3
 		(gcn_operand_part (<MODE>mode, operands[0], 1),
 		 gcn_operand_part (<MODE>mode, operands[2], 1),
-		 const0_rtx, vcc, vcc));
+		 const0_rtx, operands[3], operands[3]));
     DONE;
   }
   [(set_attr "type" "vmult")
    (set_attr "length" "8")])
 
-(define_insn_and_split "add<mode>3_zext_dup_exec"
-  [(set (match_operand:V_DI 0 "register_operand"		 "= v,  v")
+(define_expand "add<mode>3_zext_dup"
+  [(match_operand:V_DI 0 "register_operand")
+   (match_operand:SI 1 "gcn_alu_operand")
+   (match_operand:V_DI 2 "gcn_alu_operand")]
+  ""
+  {
+    rtx vcc = gen_rtx_REG (DImode, VCC_REG);
+    emit_insn (gen_add<mode>3_vcc_zext_dup (operands[0], operands[1],
+					    operands[2], vcc));
+    DONE;
+  })
+
+(define_insn_and_split "add<mode>3_vcc_zext_dup_exec"
+  [(set (match_operand:V_DI 0 "register_operand"		"=   v,   v")
 	(vec_merge:V_DI
 	  (plus:V_DI
 	    (zero_extend:V_DI
 	      (vec_duplicate:<VnSI>
-		(match_operand:SI 1 "gcn_alu_operand"		 "ASv,BSv")))
-	    (match_operand:V_DI 2 "gcn_alu_operand"		 "vDb,vDA"))
-	  (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0, U0")
-	  (match_operand:DI 4 "gcn_exec_reg_operand"		 "  e,  e")))
-   (clobber (reg:DI VCC_REG))]
+		(match_operand:SI 1 "gcn_alu_operand"		"  ASv, BSv")))
+	    (match_operand:V_DI 2 "gcn_alu_operand"		"  vDb, vDA"))
+	  (match_operand:V_DI 4 "gcn_register_or_unspec_operand" "  U0,  U0")
+	  (match_operand:DI 5 "gcn_exec_reg_operand"		"    e,   e")))
+   (set (match_operand:DI 3 "register_operand"			"=SgcV,SgcV")
+	(and:DI
+	  (ltu:DI (plus:V_DI 
+		    (zero_extend:V_DI (vec_duplicate:<VnSI> (match_dup 1)))
+		    (match_dup 2))
+		  (match_dup 1))
+	  (match_dup 5)))]
   ""
   "#"
   "gcn_can_split_p  (<MODE>mode, operands[0])
    && gcn_can_split_p (<MODE>mode, operands[2])
-   && gcn_can_split_p (<MODE>mode, operands[3])"
+   && gcn_can_split_p (<MODE>mode, operands[4])"
   [(const_int 0)]
   {
-    rtx vcc = gen_rtx_REG (DImode, VCC_REG);
     emit_insn (gen_add<vnsi>3_vcc_dup_exec
 		(gcn_operand_part (<MODE>mode, operands[0], 0),
 		 gcn_operand_part (DImode, operands[1], 0),
 		 gcn_operand_part (<MODE>mode, operands[2], 0),
-		 vcc,
-		 gcn_operand_part (<MODE>mode, operands[3], 0),
-		 operands[4]));
+		 operands[3],
+		 gcn_operand_part (<MODE>mode, operands[4], 0),
+		 operands[5]));
     emit_insn (gen_addc<vnsi>3_exec
 		(gcn_operand_part (<MODE>mode, operands[0], 1),
 		 gcn_operand_part (<MODE>mode, operands[2], 1),
-		 const0_rtx, vcc, vcc,
-		 gcn_operand_part (<MODE>mode, operands[3], 1),
-		 operands[4]));
+		 const0_rtx, operands[3], operands[3],
+		 gcn_operand_part (<MODE>mode, operands[4], 1),
+		 operands[5]));
     DONE;
   }
   [(set_attr "type" "vmult")
    (set_attr "length" "8")])
 
-(define_insn_and_split "add<mode>3_zext_dup2"
-  [(set (match_operand:V_DI 0 "register_operand"		      "=  v")
+(define_expand "add<mode>3_zext_dup_exec"
+  [(match_operand:V_DI 0 "register_operand")
+   (match_operand:SI 1 "gcn_alu_operand")
+   (match_operand:V_DI 2 "gcn_alu_operand")
+   (match_operand:V_DI 3 "gcn_register_or_unspec_operand")
+   (match_operand:DI 4 "gcn_exec_reg_operand")]
+  ""
+  {
+    rtx vcc = gen_rtx_REG (DImode, VCC_REG);
+    emit_insn (gen_add<mode>3_vcc_zext_dup_exec (operands[0], operands[1],
+						 operands[2], vcc, operands[3],
+						 operands[4]));
+    DONE;
+  })
+
+(define_insn_and_split "add<mode>3_vcc_zext_dup2"
+  [(set (match_operand:V_DI 0 "register_operand"		    "=   v")
 	(plus:V_DI
 	  (zero_extend:V_DI (match_operand:<VnSI> 1 "gcn_alu_operand" " vA"))
-	  (vec_duplicate:V_DI (match_operand:DI 2 "gcn_alu_operand"   "DbSv"))))
-   (clobber (reg:DI VCC_REG))]
+	  (vec_duplicate:V_DI (match_operand:DI 2 "gcn_alu_operand" " DbSv"))))
+   (set (match_operand:DI 3 "register_operand"			    "=SgcV")
+	(ltu:DI (plus:V_DI 
+		  (zero_extend:V_DI (match_dup 1))
+		  (vec_duplicate:V_DI (match_dup 2)))
+		(match_dup 1)))]
   ""
   "#"
   "gcn_can_split_p (<MODE>mode, operands[0])"
   [(const_int 0)]
   {
-    rtx vcc = gen_rtx_REG (DImode, VCC_REG);
     emit_insn (gen_add<vnsi>3_vcc_dup
 		(gcn_operand_part (<MODE>mode, operands[0], 0),
 		 gcn_operand_part (DImode, operands[2], 0),
 		 operands[1],
-		 vcc));
+		 operands[3]));
     rtx dsthi = gcn_operand_part (<MODE>mode, operands[0], 1);
     emit_insn (gen_vec_duplicate<vnsi>
 		(dsthi, gcn_operand_part (DImode, operands[2], 1)));
-    emit_insn (gen_addc<vnsi>3 (dsthi, dsthi, const0_rtx, vcc, vcc));
+    emit_insn (gen_addc<vnsi>3 (dsthi, dsthi, const0_rtx, operands[3],
+				operands[3]));
     DONE;
   }
   [(set_attr "type" "vmult")
    (set_attr "length" "8")])
 
-(define_insn_and_split "add<mode>3_zext_dup2_exec"
-  [(set (match_operand:V_DI 0 "register_operand"		       "= v")
+(define_expand "add<mode>3_zext_dup2"
+  [(match_operand:V_DI 0 "register_operand")
+   (match_operand:<VnSI> 1 "gcn_alu_operand")
+   (match_operand:DI 2 "gcn_alu_operand")]
+  ""
+  {
+    rtx vcc = gen_rtx_REG (DImode, VCC_REG);
+    emit_insn (gen_add<mode>3_vcc_zext_dup2 (operands[0], operands[1],
+					     operands[2], vcc));
+    DONE;
+  })
+
+(define_insn_and_split "add<mode>3_vcc_zext_dup2_exec"
+  [(set (match_operand:V_DI 0 "register_operand"		     "=   v")
 	(vec_merge:V_DI
 	  (plus:V_DI
 	    (zero_extend:V_DI (match_operand:<VnSI> 1 "gcn_alu_operand" "vA"))
 	    (vec_duplicate:V_DI (match_operand:DI 2 "gcn_alu_operand"  "BSv")))
-	  (match_operand:V_DI 3 "gcn_register_or_unspec_operand"       " U0")
-	  (match_operand:DI 4 "gcn_exec_reg_operand"		       "  e")))
-   (clobber (reg:DI VCC_REG))]
+	  (match_operand:V_DI 4 "gcn_register_or_unspec_operand"     "   U0")
+	  (match_operand:DI 5 "gcn_exec_reg_operand"		     "    e")))
+   (set (match_operand:DI 3 "register_operand"			     "=SgcV")
+	(and:DI
+	  (ltu:DI (plus:V_DI 
+		    (zero_extend:V_DI (match_dup 1))
+		    (vec_duplicate:V_DI (match_dup 2)))
+		  (match_dup 1))
+	  (match_dup 5)))]
   ""
   "#"
   "gcn_can_split_p  (<MODE>mode, operands[0])
-   && gcn_can_split_p (<MODE>mode, operands[3])"
+   && gcn_can_split_p (<MODE>mode, operands[4])"
   [(const_int 0)]
   {
-    rtx vcc = gen_rtx_REG (DImode, VCC_REG);
     emit_insn (gen_add<vnsi>3_vcc_dup_exec
 		(gcn_operand_part (<MODE>mode, operands[0], 0),
 		 gcn_operand_part (DImode, operands[2], 0),
 		 operands[1],
-		 vcc,
-		 gcn_operand_part (<MODE>mode, operands[3], 0),
-		 operands[4]));
+		 operands[3],
+		 gcn_operand_part (<MODE>mode, operands[4], 0),
+		 operands[5]));
     rtx dsthi = gcn_operand_part (<MODE>mode, operands[0], 1);
     emit_insn (gen_vec_duplicate<vnsi>_exec
 		(dsthi, gcn_operand_part (DImode, operands[2], 1),
-		 gcn_operand_part (<MODE>mode, operands[3], 1),
-		 operands[4]));
+		 gcn_operand_part (<MODE>mode, operands[4], 1),
+		 operands[5]));
     emit_insn (gen_addc<vnsi>3_exec
-		(dsthi, dsthi, const0_rtx, vcc, vcc,
-		 gcn_operand_part (<MODE>mode, operands[3], 1),
-		 operands[4]));
+		(dsthi, dsthi, const0_rtx, operands[3], operands[3],
+		 gcn_operand_part (<MODE>mode, operands[4], 1),
+		 operands[5]));
     DONE;
   }
   [(set_attr "type" "vmult")
    (set_attr "length" "8")])
 
+(define_expand "add<mode>3_zext_dup2_exec"
+  [(match_operand:V_DI 0 "register_operand")
+   (match_operand:<VnSI> 1 "gcn_alu_operand")
+   (match_operand:DI 2 "gcn_alu_operand")
+   (match_operand:V_DI 3 "gcn_register_or_unspec_operand")
+   (match_operand:DI 4 "gcn_exec_reg_operand")]
+  ""
+  {
+    rtx vcc = gen_rtx_REG (DImode, VCC_REG);
+    emit_insn (gen_add<mode>3_vcc_zext_dup2_exec (operands[0], operands[1],
+						  operands[2], vcc,
+						  operands[3], operands[4]));
+    DONE;
+  })
+
 (define_insn_and_split "add<mode>3_sext_dup2"
   [(set (match_operand:V_DI 0 "register_operand"		      "= v")
 	(plus:V_DI
diff --git a/gcc/config/gcn/gcn.c b/gcc/config/gcn/gcn.c
index 38b5b98c7c8..39eb8fd283f 100644
--- a/gcc/config/gcn/gcn.c
+++ b/gcc/config/gcn/gcn.c
@@ -1786,9 +1786,10 @@ gcn_expand_scalar_to_vector_address (machine_mode mode, rtx exec, rtx mem,
 
   if (AS_FLAT_P (as))
     {
+      rtx vcc = gen_rtx_REG (DImode, CC_SAVE_REG);
+
       if (REG_P (tmp))
 	{
-	  rtx vcc = gen_rtx_REG (DImode, CC_SAVE_REG);
 	  rtx mem_base_lo = gcn_operand_part (DImode, mem_base, 0);
 	  rtx mem_base_hi = gcn_operand_part (DImode, mem_base, 1);
 	  rtx tmphi = gcn_operand_part (V64DImode, tmp, 1);
@@ -1809,17 +1810,17 @@ gcn_expand_scalar_to_vector_address (machine_mode mode, rtx exec, rtx mem,
 					      vcc, vcc, undef_v64si, exec));
 	    }
 	  else
-	    emit_insn (gen_addv64di3_zext_dup (tmp, mem_base_lo, tmp));
+	    emit_insn (gen_addv64di3_vcc_zext_dup (tmp, mem_base_lo, tmp, vcc));
 	}
       else
 	{
 	  tmp = gen_reg_rtx (V64DImode);
 	  if (exec)
-	    emit_insn (gen_addv64di3_zext_dup2_exec (tmp, tmplo, mem_base,
-						     gcn_gen_undef (V64DImode),
-						     exec));
+	    emit_insn (gen_addv64di3_vcc_zext_dup2_exec
+		       (tmp, tmplo, mem_base, vcc, gcn_gen_undef (V64DImode),
+			exec));
 	  else
-	    emit_insn (gen_addv64di3_zext_dup2 (tmp, tmplo, mem_base));
+	    emit_insn (gen_addv64di3_vcc_zext_dup2 (tmp, tmplo, mem_base, vcc));
 	}
 
       new_base = tmp;
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index d82df8ea961..7319b2026cd 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2020-05-14  Andrew Stubbs  <ams@codesourcery.com>
+
+	* testsuite/gcc.target/gcn/gcn.exp: New file.
+	* testsuite/gcc.target/gcn/vcc-clobber.c: New file.
+
 2020-05-14  Uroš Bizjak  <ubizjak@gmail.com>
 
 	PR target/95046
diff --git a/gcc/testsuite/gcc.target/gcn/gcn.exp b/gcc/testsuite/gcc.target/gcn/gcn.exp
new file mode 100644
index 00000000000..0e799e8bc80
--- /dev/null
+++ b/gcc/testsuite/gcc.target/gcn/gcn.exp
@@ -0,0 +1,42 @@
+# Specific regression driver for nvptx.
+# Copyright (C) 2020 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# GCC testsuite that uses the `dg.exp' driver.
+
+# Exit immediately if this isn't a nvptx target.
+if ![istarget amdgcn*-*-*] then {
+  return
+}
+
+# Load support procs.
+load_lib gcc-dg.exp
+
+# If a testcase doesn't have special options, use these.
+global DEFAULT_CFLAGS
+if ![info exists DEFAULT_CFLAGS] then {
+    set DEFAULT_CFLAGS " -ansi -pedantic-errors"
+}
+
+# Initialize `dg'.
+dg-init
+
+# Main loop.
+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cS\]]] \
+	"" $DEFAULT_CFLAGS
+
+# All done.
+dg-finish
diff --git a/gcc/testsuite/gcc.target/gcn/vcc-clobber.c b/gcc/testsuite/gcc.target/gcn/vcc-clobber.c
new file mode 100644
index 00000000000..e52733cf1e5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/gcn/vcc-clobber.c
@@ -0,0 +1,33 @@
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+
+/* Test that gcn_expand_scalar_to_vector_address does not clobber VCC.
+   If it does then spills and reloads will be unsafe, leading to unexpected
+   conditional branch behaviour.  */
+
+extern void abort ();
+
+__attribute__((vector_size(256))) int vec[2] = {{0}, {0}};
+
+int
+main()
+{
+  long vcc = 0;
+
+  /* Load a known value into VCC.  The memory barrier ensures that the vector
+     load must happen after this point.  */
+  asm volatile ("s_mov_b32 vcc_lo, 0x12345689\n\t"
+		"s_mov_b32 vcc_hi, 0xabcdef0"
+		::: "memory");
+
+  /* Compiler inserts vector load here.  */
+
+  /* Consume the abitrary vector, and return the current value of VCC.  */
+  asm volatile ("; no-op" : "=cV"(vcc) : "v"(vec[0]), "v"(vec[1]));
+
+  /* The value should match the initialized value.  */
+  if (vcc != 0xabcdef012345689)
+    abort ();
+
+  return 0;
+}


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2020-08-22 21:16 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-08-22 21:16 [gcc/devel/autopar_devel] amdgcn: fix vcc clobber in vector load/store Giuliano Belinassi

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).