public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r12-5969] rs6000: Do not allow combining of multiple assemble quads [PR103548]
@ 2021-12-14 21:00 Peter Bergner
  0 siblings, 0 replies; only message in thread
From: Peter Bergner @ 2021-12-14 21:00 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:15c02ab2569b3c4e27d6f133c013b15a9fa70177

commit r12-5969-g15c02ab2569b3c4e27d6f133c013b15a9fa70177
Author: Peter Bergner <bergner@linux.ibm.com>
Date:   Tue Dec 14 14:50:41 2021 -0600

    rs6000: Do not allow combining of multiple assemble quads [PR103548]
    
    The compiler will gladly CSE the result of two __builtin_mma_build_acc
    calls with the same four vector arguments, leading to illegal MMA
    code being generated.  The fix here is to make the mma_assemble_acc
    pattern use a unspec_volatile to stop the CSE from happening.
    
    2021-12-14  Peter Bergner  <bergner@linux.ibm.com>
    
    gcc/
            PR target/103548
            * config/rs6000/mma.md (UNSPEC_MMA_ASSEMBLE): Rename unspec from this...
            (UNSPEC_VSX_ASSEMBLE): ...to this.
            (UNSPECV_MMA_ASSEMBLE): New unspecv.
            (vsx_assemble_pair): Use UNSPEC_VSX_ASSEMBLE.
            (*vsx_assemble_pair): Likewise.
            (mma_assemble_acc): Use UNSPECV_MMA_ASSEMBLE.
            (*mma_assemble_acc): Likewise.
            * config/rs6000/rs6000.c (rs6000_split_multireg_move): Handle
            UNSPEC_VOLATILE.  Use UNSPEC_VSX_ASSEMBLE and UNSPECV_MMA_ASSEMBLE.
    
    gcc/testsuite/
            PR target/103548
            * gcc.target/powerpc/mma-builtin-10-pair.c: New test.
            * gcc.target/powerpc/mma-builtin-10-quad.c: New test.

Diff:
---
 gcc/config/rs6000/mma.md                           | 38 ++++++++++++----------
 gcc/config/rs6000/rs6000.c                         |  6 ++--
 .../gcc.target/powerpc/mma-builtin-10-pair.c       | 21 ++++++++++++
 .../gcc.target/powerpc/mma-builtin-10-quad.c       | 23 +++++++++++++
 4 files changed, 68 insertions(+), 20 deletions(-)

diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index fa081608c4c..8a262054d5f 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -29,7 +29,7 @@
 ;; Constants for creating unspecs
 
 (define_c_enum "unspec"
-  [UNSPEC_MMA_ASSEMBLE
+  [UNSPEC_VSX_ASSEMBLE
    UNSPEC_MMA_EXTRACT
    UNSPEC_MMA_PMXVBF16GER2
    UNSPEC_MMA_PMXVBF16GER2NN
@@ -94,7 +94,8 @@
   ])
 
 (define_c_enum "unspecv"
-  [UNSPECV_MMA_XXSETACCZ
+  [UNSPECV_MMA_ASSEMBLE
+   UNSPECV_MMA_XXSETACCZ
   ])
 
 ;; MMA instructions with 1 accumulator argument
@@ -333,7 +334,7 @@
 {
   rtx src = gen_rtx_UNSPEC (OOmode,
 			    gen_rtvec (2, operands[1], operands[2]),
-			    UNSPEC_MMA_ASSEMBLE);
+			    UNSPEC_VSX_ASSEMBLE);
   emit_move_insn (operands[0], src);
   DONE;
 })
@@ -345,7 +346,7 @@
   [(set (match_operand:OO 0 "vsx_register_operand" "=&wa")
 	(unspec:OO [(match_operand:V16QI 1 "mma_assemble_input_operand" "mwa")
 		    (match_operand:V16QI 2 "mma_assemble_input_operand" "mwa")]
-		    UNSPEC_MMA_ASSEMBLE))]
+		   UNSPEC_VSX_ASSEMBLE))]
   "TARGET_MMA"
   "#"
   "&& reload_completed"
@@ -353,7 +354,7 @@
 {
   rtx src = gen_rtx_UNSPEC (OOmode,
 			    gen_rtvec (2, operands[1], operands[2]),
-			    UNSPEC_MMA_ASSEMBLE);
+			    UNSPEC_VSX_ASSEMBLE);
   rs6000_split_multireg_move (operands[0], src);
   DONE;
 })
@@ -399,10 +400,10 @@
    (match_operand:V16QI 4 "mma_assemble_input_operand")]
   "TARGET_MMA"
 {
-  rtx src = gen_rtx_UNSPEC (XOmode,
-			    gen_rtvec (4, operands[1], operands[2],
-				       operands[3], operands[4]),
-			    UNSPEC_MMA_ASSEMBLE);
+  rtx src = gen_rtx_UNSPEC_VOLATILE (XOmode,
+			    	     gen_rtvec (4, operands[1], operands[2],
+				       		operands[3], operands[4]),
+			    	     UNSPECV_MMA_ASSEMBLE);
   emit_move_insn (operands[0], src);
   DONE;
 })
@@ -412,21 +413,22 @@
 
 (define_insn_and_split "*mma_assemble_acc"
   [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
-	(unspec:XO [(match_operand:V16QI 1 "mma_assemble_input_operand" "mwa")
-		    (match_operand:V16QI 2 "mma_assemble_input_operand" "mwa")
-		    (match_operand:V16QI 3 "mma_assemble_input_operand" "mwa")
-		    (match_operand:V16QI 4 "mma_assemble_input_operand" "mwa")]
-		    UNSPEC_MMA_ASSEMBLE))]
+	(unspec_volatile:XO
+	  [(match_operand:V16QI 1 "mma_assemble_input_operand" "mwa")
+	   (match_operand:V16QI 2 "mma_assemble_input_operand" "mwa")
+	   (match_operand:V16QI 3 "mma_assemble_input_operand" "mwa")
+	   (match_operand:V16QI 4 "mma_assemble_input_operand" "mwa")]
+	  UNSPECV_MMA_ASSEMBLE))]
   "TARGET_MMA
    && fpr_reg_operand (operands[0], XOmode)"
   "#"
   "&& reload_completed"
   [(const_int 0)]
 {
-  rtx src = gen_rtx_UNSPEC (XOmode,
-			    gen_rtvec (4, operands[1], operands[2],
-				       operands[3], operands[4]),
-			    UNSPEC_MMA_ASSEMBLE);
+  rtx src = gen_rtx_UNSPEC_VOLATILE (XOmode,
+			    	     gen_rtvec (4, operands[1], operands[2],
+				       		operands[3], operands[4]),
+			    	     UNSPECV_MMA_ASSEMBLE);
   rs6000_split_multireg_move (operands[0], src);
   DONE;
 })
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 70df511ff98..9fc1577be40 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -27071,9 +27071,11 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 	  return;
 	}
 
-      if (GET_CODE (src) == UNSPEC)
+      if (GET_CODE (src) == UNSPEC
+	  || GET_CODE (src) == UNSPEC_VOLATILE)
 	{
-	  gcc_assert (XINT (src, 1) == UNSPEC_MMA_ASSEMBLE);
+	  gcc_assert (XINT (src, 1) == UNSPEC_VSX_ASSEMBLE
+		      || XINT (src, 1) == UNSPECV_MMA_ASSEMBLE);
 	  gcc_assert (REG_P (dst));
 	  if (GET_MODE (src) == XOmode)
 	    gcc_assert (FP_REGNO_P (REGNO (dst)));
diff --git a/gcc/testsuite/gcc.target/powerpc/mma-builtin-10-pair.c b/gcc/testsuite/gcc.target/powerpc/mma-builtin-10-pair.c
new file mode 100644
index 00000000000..d8748d8e7d0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/mma-builtin-10-pair.c
@@ -0,0 +1,21 @@
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+typedef unsigned char  vec_t __attribute__((vector_size(16)));
+
+void
+foo (__vector_pair *dst, vec_t *src)
+{
+  __vector_pair pair0, pair1;
+  /* Adjacent loads should be combined into one lxvp instruction
+     and identical build pairs should be combined.  */
+  __builtin_vsx_build_pair (&pair0, src[0], src[1]);
+  __builtin_vsx_build_pair (&pair1, src[0], src[1]);
+  dst[0] = pair0;
+  dst[2] = pair1;
+}
+
+/* { dg-final { scan-assembler-not {\mlxv\M} } } */
+/* { dg-final { scan-assembler-not {\mstxv\M} } } */
+/* { dg-final { scan-assembler-times {\mlxvp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mstxvp\M} 2 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/mma-builtin-10-quad.c b/gcc/testsuite/gcc.target/powerpc/mma-builtin-10-quad.c
new file mode 100644
index 00000000000..02342c76f5f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/mma-builtin-10-quad.c
@@ -0,0 +1,23 @@
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+typedef unsigned char  vec_t __attribute__((vector_size(16)));
+
+void
+foo (__vector_quad *dst, vec_t *src)
+{
+  __vector_quad quad0, quad1;
+  /* Adjacent loads should be combined into two lxvp instructions.
+     and identical build accs should not be combined.  */
+  __builtin_mma_build_acc (&quad0, src[0], src[1], src[2], src[3]);
+  __builtin_mma_build_acc (&quad1, src[0], src[1], src[2], src[3]);
+  dst[0] = quad0;
+  dst[2] = quad1;
+}
+
+/* { dg-final { scan-assembler-not {\mlxv\M} } } */
+/* { dg-final { scan-assembler-not {\mstxv\M} } } */
+/* { dg-final { scan-assembler-times {\mlxvp\M} 4 } } */
+/* { dg-final { scan-assembler-times {\mxxmtacc\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxxmfacc\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mstxvp\M} 4 } } */


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2021-12-14 21:00 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-12-14 21:00 [gcc r12-5969] rs6000: Do not allow combining of multiple assemble quads [PR103548] Peter Bergner

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).