public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r12-5969] rs6000: Do not allow combining of multiple assemble quads [PR103548]
@ 2021-12-14 21:00 Peter Bergner
0 siblings, 0 replies; only message in thread
From: Peter Bergner @ 2021-12-14 21:00 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:15c02ab2569b3c4e27d6f133c013b15a9fa70177
commit r12-5969-g15c02ab2569b3c4e27d6f133c013b15a9fa70177
Author: Peter Bergner <bergner@linux.ibm.com>
Date: Tue Dec 14 14:50:41 2021 -0600
rs6000: Do not allow combining of multiple assemble quads [PR103548]
The compiler will gladly CSE the result of two __builtin_mma_build_acc
calls with the same four vector arguments, leading to illegal MMA
code being generated. The fix here is to make the mma_assemble_acc
pattern use a unspec_volatile to stop the CSE from happening.
2021-12-14 Peter Bergner <bergner@linux.ibm.com>
gcc/
PR target/103548
* config/rs6000/mma.md (UNSPEC_MMA_ASSEMBLE): Rename unspec from this...
(UNSPEC_VSX_ASSEMBLE): ...to this.
(UNSPECV_MMA_ASSEMBLE): New unspecv.
(vsx_assemble_pair): Use UNSPEC_VSX_ASSEMBLE.
(*vsx_assemble_pair): Likewise.
(mma_assemble_acc): Use UNSPECV_MMA_ASSEMBLE.
(*mma_assemble_acc): Likewise.
* config/rs6000/rs6000.c (rs6000_split_multireg_move): Handle
UNSPEC_VOLATILE. Use UNSPEC_VSX_ASSEMBLE and UNSPECV_MMA_ASSEMBLE.
gcc/testsuite/
PR target/103548
* gcc.target/powerpc/mma-builtin-10-pair.c: New test.
* gcc.target/powerpc/mma-builtin-10-quad.c: New test.
Diff:
---
gcc/config/rs6000/mma.md | 38 ++++++++++++----------
gcc/config/rs6000/rs6000.c | 6 ++--
.../gcc.target/powerpc/mma-builtin-10-pair.c | 21 ++++++++++++
.../gcc.target/powerpc/mma-builtin-10-quad.c | 23 +++++++++++++
4 files changed, 68 insertions(+), 20 deletions(-)
diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index fa081608c4c..8a262054d5f 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -29,7 +29,7 @@
;; Constants for creating unspecs
(define_c_enum "unspec"
- [UNSPEC_MMA_ASSEMBLE
+ [UNSPEC_VSX_ASSEMBLE
UNSPEC_MMA_EXTRACT
UNSPEC_MMA_PMXVBF16GER2
UNSPEC_MMA_PMXVBF16GER2NN
@@ -94,7 +94,8 @@
])
(define_c_enum "unspecv"
- [UNSPECV_MMA_XXSETACCZ
+ [UNSPECV_MMA_ASSEMBLE
+ UNSPECV_MMA_XXSETACCZ
])
;; MMA instructions with 1 accumulator argument
@@ -333,7 +334,7 @@
{
rtx src = gen_rtx_UNSPEC (OOmode,
gen_rtvec (2, operands[1], operands[2]),
- UNSPEC_MMA_ASSEMBLE);
+ UNSPEC_VSX_ASSEMBLE);
emit_move_insn (operands[0], src);
DONE;
})
@@ -345,7 +346,7 @@
[(set (match_operand:OO 0 "vsx_register_operand" "=&wa")
(unspec:OO [(match_operand:V16QI 1 "mma_assemble_input_operand" "mwa")
(match_operand:V16QI 2 "mma_assemble_input_operand" "mwa")]
- UNSPEC_MMA_ASSEMBLE))]
+ UNSPEC_VSX_ASSEMBLE))]
"TARGET_MMA"
"#"
"&& reload_completed"
@@ -353,7 +354,7 @@
{
rtx src = gen_rtx_UNSPEC (OOmode,
gen_rtvec (2, operands[1], operands[2]),
- UNSPEC_MMA_ASSEMBLE);
+ UNSPEC_VSX_ASSEMBLE);
rs6000_split_multireg_move (operands[0], src);
DONE;
})
@@ -399,10 +400,10 @@
(match_operand:V16QI 4 "mma_assemble_input_operand")]
"TARGET_MMA"
{
- rtx src = gen_rtx_UNSPEC (XOmode,
- gen_rtvec (4, operands[1], operands[2],
- operands[3], operands[4]),
- UNSPEC_MMA_ASSEMBLE);
+ rtx src = gen_rtx_UNSPEC_VOLATILE (XOmode,
+ gen_rtvec (4, operands[1], operands[2],
+ operands[3], operands[4]),
+ UNSPECV_MMA_ASSEMBLE);
emit_move_insn (operands[0], src);
DONE;
})
@@ -412,21 +413,22 @@
(define_insn_and_split "*mma_assemble_acc"
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
- (unspec:XO [(match_operand:V16QI 1 "mma_assemble_input_operand" "mwa")
- (match_operand:V16QI 2 "mma_assemble_input_operand" "mwa")
- (match_operand:V16QI 3 "mma_assemble_input_operand" "mwa")
- (match_operand:V16QI 4 "mma_assemble_input_operand" "mwa")]
- UNSPEC_MMA_ASSEMBLE))]
+ (unspec_volatile:XO
+ [(match_operand:V16QI 1 "mma_assemble_input_operand" "mwa")
+ (match_operand:V16QI 2 "mma_assemble_input_operand" "mwa")
+ (match_operand:V16QI 3 "mma_assemble_input_operand" "mwa")
+ (match_operand:V16QI 4 "mma_assemble_input_operand" "mwa")]
+ UNSPECV_MMA_ASSEMBLE))]
"TARGET_MMA
&& fpr_reg_operand (operands[0], XOmode)"
"#"
"&& reload_completed"
[(const_int 0)]
{
- rtx src = gen_rtx_UNSPEC (XOmode,
- gen_rtvec (4, operands[1], operands[2],
- operands[3], operands[4]),
- UNSPEC_MMA_ASSEMBLE);
+ rtx src = gen_rtx_UNSPEC_VOLATILE (XOmode,
+ gen_rtvec (4, operands[1], operands[2],
+ operands[3], operands[4]),
+ UNSPECV_MMA_ASSEMBLE);
rs6000_split_multireg_move (operands[0], src);
DONE;
})
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 70df511ff98..9fc1577be40 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -27071,9 +27071,11 @@ rs6000_split_multireg_move (rtx dst, rtx src)
return;
}
- if (GET_CODE (src) == UNSPEC)
+ if (GET_CODE (src) == UNSPEC
+ || GET_CODE (src) == UNSPEC_VOLATILE)
{
- gcc_assert (XINT (src, 1) == UNSPEC_MMA_ASSEMBLE);
+ gcc_assert (XINT (src, 1) == UNSPEC_VSX_ASSEMBLE
+ || XINT (src, 1) == UNSPECV_MMA_ASSEMBLE);
gcc_assert (REG_P (dst));
if (GET_MODE (src) == XOmode)
gcc_assert (FP_REGNO_P (REGNO (dst)));
diff --git a/gcc/testsuite/gcc.target/powerpc/mma-builtin-10-pair.c b/gcc/testsuite/gcc.target/powerpc/mma-builtin-10-pair.c
new file mode 100644
index 00000000000..d8748d8e7d0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/mma-builtin-10-pair.c
@@ -0,0 +1,21 @@
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+typedef unsigned char vec_t __attribute__((vector_size(16)));
+
+void
+foo (__vector_pair *dst, vec_t *src)
+{
+ __vector_pair pair0, pair1;
+ /* Adjacent loads should be combined into one lxvp instruction
+ and identical build pairs should be combined. */
+ __builtin_vsx_build_pair (&pair0, src[0], src[1]);
+ __builtin_vsx_build_pair (&pair1, src[0], src[1]);
+ dst[0] = pair0;
+ dst[2] = pair1;
+}
+
+/* { dg-final { scan-assembler-not {\mlxv\M} } } */
+/* { dg-final { scan-assembler-not {\mstxv\M} } } */
+/* { dg-final { scan-assembler-times {\mlxvp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mstxvp\M} 2 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/mma-builtin-10-quad.c b/gcc/testsuite/gcc.target/powerpc/mma-builtin-10-quad.c
new file mode 100644
index 00000000000..02342c76f5f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/mma-builtin-10-quad.c
@@ -0,0 +1,23 @@
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+typedef unsigned char vec_t __attribute__((vector_size(16)));
+
+void
+foo (__vector_quad *dst, vec_t *src)
+{
+ __vector_quad quad0, quad1;
+ /* Adjacent loads should be combined into two lxvp instructions.
+ and identical build accs should not be combined. */
+ __builtin_mma_build_acc (&quad0, src[0], src[1], src[2], src[3]);
+ __builtin_mma_build_acc (&quad1, src[0], src[1], src[2], src[3]);
+ dst[0] = quad0;
+ dst[2] = quad1;
+}
+
+/* { dg-final { scan-assembler-not {\mlxv\M} } } */
+/* { dg-final { scan-assembler-not {\mstxv\M} } } */
+/* { dg-final { scan-assembler-times {\mlxvp\M} 4 } } */
+/* { dg-final { scan-assembler-times {\mxxmtacc\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxxmfacc\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mstxvp\M} 4 } } */
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2021-12-14 21:00 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-12-14 21:00 [gcc r12-5969] rs6000: Do not allow combining of multiple assemble quads [PR103548] Peter Bergner
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).