public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/users/meissner/heads/dmf003)] Make MMA insns support dmr registers.
@ 2022-11-05 2:58 Michael Meissner
0 siblings, 0 replies; 3+ messages in thread
From: Michael Meissner @ 2022-11-05 2:58 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:fbb3f89e02ee33d188e4c5080901f34de5b2499e
commit fbb3f89e02ee33d188e4c5080901f34de5b2499e
Author: Michael Meissner <meissner@linux.ibm.com>
Date: Fri Nov 4 22:57:54 2022 -0400
Make MMA insns support dmr registers.
2022-11-04 Michael Meissner <meissner@linux.ibm.com>
gcc/
* config/rs6000/mma.md (mma_<acc>): New define_expand to handle
mma_<acc> for dense math and non dense math.
(mma_<acc> insn): Restrict to non dense math.
(mma_xxsetaccz): Convert to define_expand to handle non dense math and
dense math.
(mma_xxsetaccz_p10): Rename from mma_xxsetaccz and restrict usage to non
dense math.
(mma_xxsetaccz_dm): Dense math version of mma_xxsetaccz.
(mma_<vv>): Add support for dense math.
(mma_<avv>): Likewise.
(mma_<pv>): Likewise.
(mma_<apv>): Likewise.
(mma_<vvi4i4i8>): Likewise.
(mma_<avvi4i4i8>): Likewise.
(mma_<vvi4i4i2>): Likewise.
(mma_<avvi4i4i2>): Likewise.
(mma_<vvi4i4>): Likewise.
(mma_<avvi4i4>): Likewise.
(mma_<pvi4i2>): Likewise.
(mma_<apvi4i2>): Likewise.
(mma_<vvi4i4i4>): Likewise.
(mma_<avvi4i4i4>): Likewise.
* config/rs6000/rs6000.cc (print_operand): Make %A handle only DMRs if
dense math and only FPRs if not dense math.
(rs6000_split_multireg_move): Do not generate accumulator prime or
de-prime instructions if dense math.
gcc/testsuite/
* gcc.target/powerpc/dm-double-test.c: New test.
* lib/target-supports.exp (check_effective_target_ppc_dmr_ok): New
target test.
Diff:
---
gcc/config/rs6000/mma.md | 247 ++++++++++++++--------
gcc/config/rs6000/rs6000-c.cc | 3 +
gcc/config/rs6000/rs6000.cc | 35 +--
gcc/testsuite/gcc.target/powerpc/dm-double-test.c | 194 +++++++++++++++++
gcc/testsuite/lib/target-supports.exp | 19 ++
5 files changed, 389 insertions(+), 109 deletions(-)
diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index fe2e9c9e63e..835f34e8e00 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -545,190 +545,249 @@
"dmxxextfdmr256 %0,%1,2"
[(set_attr "type" "mma")])
-(define_insn "mma_<acc>"
+;; MMA instructions that do not use their accumulators as an input, still must
+;; not allow their vector operands to overlap the registers used by the
+;; accumulator. We enforce this by marking the output as early clobber. If we
+;; have dense math, we don't need the whole prime/de-prime action, so just make
+;; thse instructions be NOPs.
+
+(define_expand "mma_<acc>"
+ [(set (match_operand:XO 0 "register_operand")
+ (unspec:XO [(match_operand:XO 1 "register_operand")]
+ MMA_ACC))]
+ "TARGET_MMA"
+{
+ if (TARGET_DENSE_MATH)
+ {
+ if (!rtx_equal_p (operands[0], operands[1]))
+ emit_move_insn (operands[0], operands[1]);
+ DONE;
+ }
+
+ /* Generate the prime/de-prime code. */
+})
+
+(define_insn "*mma_<acc>"
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")]
MMA_ACC))]
- "TARGET_MMA"
+ "TARGET_MMA && !TARGET_DENSE_MATH"
"<acc> %A0"
[(set_attr "type" "mma")])
;; We can't have integer constants in XOmode so we wrap this in an
-;; UNSPEC_VOLATILE.
+;; UNSPEC_VOLATILE for the non-dense math case. For dense math, we don't need
+;; to disable optimization and we can do a normal UNSPEC.
-(define_insn "mma_xxsetaccz"
- [(set (match_operand:XO 0 "fpr_reg_operand" "=d")
+(define_expand "mma_xxsetaccz"
+ [(set (match_operand:XO 0 "register_operand")
(unspec_volatile:XO [(const_int 0)]
UNSPECV_MMA_XXSETACCZ))]
"TARGET_MMA"
+{
+ if (TARGET_DENSE_MATH)
+ {
+ emit_insn (gen_mma_xxsetaccz_dm (operands[0]));
+ DONE;
+ }
+})
+
+(define_insn "*mma_xxsetaccz_p10"
+ [(set (match_operand:XO 0 "fpr_reg_operand" "=d")
+ (unspec_volatile:XO [(const_int 0)]
+ UNSPECV_MMA_XXSETACCZ))]
+ "TARGET_MMA && !TARGET_DENSE_MATH"
"xxsetaccz %A0"
[(set_attr "type" "mma")])
+
+(define_insn "mma_xxsetaccz_dm"
+ [(set (match_operand:XO 0 "dmr_operand" "=wD")
+ (unspec:XO [(const_int 0)]
+ UNSPECV_MMA_XXSETACCZ))]
+ "TARGET_DENSE_MATH"
+ "dmsetaccz %0"
+ [(set_attr "type" "mma")])
+
(define_insn "mma_<vv>"
- [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
- (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
- (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
+ [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
+ (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa")
+ (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")]
MMA_VV))]
"TARGET_MMA"
"<vv> %A0,%x1,%x2"
- [(set_attr "type" "mma")])
+ [(set_attr "type" "mma")
+ (set_attr "isa" "dm,not_dm,not_dm")])
(define_insn "mma_<avv>"
- [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
- (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
- (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
- (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
+ [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
+ (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0")
+ (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+ (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa")]
MMA_AVV))]
"TARGET_MMA"
"<avv> %A0,%x2,%x3"
- [(set_attr "type" "mma")])
+ [(set_attr "type" "mma")
+ (set_attr "isa" "dm,not_dm,not_dm")])
(define_insn "mma_<pv>"
- [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
- (unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa")
- (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
+ [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
+ (unspec:XO [(match_operand:OO 1 "vsx_register_operand" "wa,v,?wa")
+ (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")]
MMA_PV))]
"TARGET_MMA"
"<pv> %A0,%x1,%x2"
- [(set_attr "type" "mma")])
+ [(set_attr "type" "mma")
+ (set_attr "isa" "dm,not_dm,not_dm")])
(define_insn "mma_<apv>"
- [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
- (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
- (match_operand:OO 2 "vsx_register_operand" "v,?wa")
- (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
+ [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
+ (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0")
+ (match_operand:OO 2 "vsx_register_operand" "wa,v,?wa")
+ (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa")]
MMA_APV))]
"TARGET_MMA"
"<apv> %A0,%x2,%x3"
- [(set_attr "type" "mma")])
+ [(set_attr "type" "mma")
+ (set_attr "isa" "dm,not_dm,not_dm")])
(define_insn "mma_<vvi4i4i8>"
- [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
- (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
- (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
- (match_operand:SI 3 "const_0_to_15_operand" "n,n")
- (match_operand:SI 4 "const_0_to_15_operand" "n,n")
- (match_operand:SI 5 "u8bit_cint_operand" "n,n")]
+ [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
+ (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa")
+ (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+ (match_operand:SI 3 "const_0_to_15_operand" "n,n,n")
+ (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
+ (match_operand:SI 5 "u8bit_cint_operand" "n,n,n")]
MMA_VVI4I4I8))]
"TARGET_MMA"
"<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5"
[(set_attr "type" "mma")
- (set_attr "prefixed" "yes")])
+ (set_attr "prefixed" "yes")
+ (set_attr "isa" "dm,not_dm,not_dm")])
(define_insn "mma_<avvi4i4i8>"
- [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
- (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
- (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
- (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
- (match_operand:SI 4 "const_0_to_15_operand" "n,n")
- (match_operand:SI 5 "const_0_to_15_operand" "n,n")
- (match_operand:SI 6 "u8bit_cint_operand" "n,n")]
+ [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
+ (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0")
+ (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+ (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa")
+ (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
+ (match_operand:SI 5 "const_0_to_15_operand" "n,n,n")
+ (match_operand:SI 6 "u8bit_cint_operand" "n,n,n")]
MMA_AVVI4I4I8))]
"TARGET_MMA"
"<avvi4i4i8> %A0,%x2,%x3,%4,%5,%6"
[(set_attr "type" "mma")
- (set_attr "prefixed" "yes")])
+ (set_attr "prefixed" "yes")
+ (set_attr "isa" "dm,not_dm,not_dm")])
(define_insn "mma_<vvi4i4i2>"
- [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
- (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
- (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
- (match_operand:SI 3 "const_0_to_15_operand" "n,n")
- (match_operand:SI 4 "const_0_to_15_operand" "n,n")
- (match_operand:SI 5 "const_0_to_3_operand" "n,n")]
+ [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
+ (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa")
+ (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+ (match_operand:SI 3 "const_0_to_15_operand" "n,n,n")
+ (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
+ (match_operand:SI 5 "const_0_to_3_operand" "n,n,n")]
MMA_VVI4I4I2))]
"TARGET_MMA"
"<vvi4i4i2> %A0,%x1,%x2,%3,%4,%5"
[(set_attr "type" "mma")
- (set_attr "prefixed" "yes")])
+ (set_attr "prefixed" "yes")
+ (set_attr "isa" "dm,not_dm,not_dm")])
(define_insn "mma_<avvi4i4i2>"
- [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
- (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
- (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
- (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
- (match_operand:SI 4 "const_0_to_15_operand" "n,n")
- (match_operand:SI 5 "const_0_to_15_operand" "n,n")
- (match_operand:SI 6 "const_0_to_3_operand" "n,n")]
+ [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
+ (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0")
+ (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+ (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa")
+ (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
+ (match_operand:SI 5 "const_0_to_15_operand" "n,n,n")
+ (match_operand:SI 6 "const_0_to_3_operand" "n,n,n")]
MMA_AVVI4I4I2))]
"TARGET_MMA"
"<avvi4i4i2> %A0,%x2,%x3,%4,%5,%6"
[(set_attr "type" "mma")
- (set_attr "prefixed" "yes")])
+ (set_attr "prefixed" "yes")
+ (set_attr "isa" "dm,not_dm,not_dm")])
(define_insn "mma_<vvi4i4>"
- [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
- (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
- (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
- (match_operand:SI 3 "const_0_to_15_operand" "n,n")
- (match_operand:SI 4 "const_0_to_15_operand" "n,n")]
+ [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
+ (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa")
+ (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+ (match_operand:SI 3 "const_0_to_15_operand" "n,n,n")
+ (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")]
MMA_VVI4I4))]
"TARGET_MMA"
"<vvi4i4> %A0,%x1,%x2,%3,%4"
[(set_attr "type" "mma")
- (set_attr "prefixed" "yes")])
+ (set_attr "prefixed" "yes")
+ (set_attr "isa" "dm,not_dm,not_dm")])
(define_insn "mma_<avvi4i4>"
- [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
- (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
- (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
- (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
- (match_operand:SI 4 "const_0_to_15_operand" "n,n")
- (match_operand:SI 5 "const_0_to_15_operand" "n,n")]
+ [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
+ (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0")
+ (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+ (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa")
+ (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
+ (match_operand:SI 5 "const_0_to_15_operand" "n,n,n")]
MMA_AVVI4I4))]
"TARGET_MMA"
"<avvi4i4> %A0,%x2,%x3,%4,%5"
[(set_attr "type" "mma")
- (set_attr "prefixed" "yes")])
+ (set_attr "prefixed" "yes")
+ (set_attr "isa" "dm,not_dm,not_dm")])
(define_insn "mma_<pvi4i2>"
- [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
- (unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa")
- (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
- (match_operand:SI 3 "const_0_to_15_operand" "n,n")
- (match_operand:SI 4 "const_0_to_3_operand" "n,n")]
+ [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
+ (unspec:XO [(match_operand:OO 1 "vsx_register_operand" "wa,v,?wa")
+ (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+ (match_operand:SI 3 "const_0_to_15_operand" "n,n,n")
+ (match_operand:SI 4 "const_0_to_3_operand" "n,n,n")]
MMA_PVI4I2))]
"TARGET_MMA"
"<pvi4i2> %A0,%x1,%x2,%3,%4"
[(set_attr "type" "mma")
- (set_attr "prefixed" "yes")])
+ (set_attr "prefixed" "yes")
+ (set_attr "isa" "dm,not_dm,not_dm")])
(define_insn "mma_<apvi4i2>"
- [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
- (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
- (match_operand:OO 2 "vsx_register_operand" "v,?wa")
- (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
- (match_operand:SI 4 "const_0_to_15_operand" "n,n")
- (match_operand:SI 5 "const_0_to_3_operand" "n,n")]
+ [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
+ (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0")
+ (match_operand:OO 2 "vsx_register_operand" "wa,v,?wa")
+ (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa")
+ (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
+ (match_operand:SI 5 "const_0_to_3_operand" "n,n,n")]
MMA_APVI4I2))]
"TARGET_MMA"
"<apvi4i2> %A0,%x2,%x3,%4,%5"
[(set_attr "type" "mma")
- (set_attr "prefixed" "yes")])
+ (set_attr "prefixed" "yes")
+ (set_attr "isa" "dm,not_dm,not_dm")])
(define_insn "mma_<vvi4i4i4>"
- [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
- (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
- (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
- (match_operand:SI 3 "const_0_to_15_operand" "n,n")
- (match_operand:SI 4 "const_0_to_15_operand" "n,n")
- (match_operand:SI 5 "const_0_to_15_operand" "n,n")]
+ [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
+ (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa")
+ (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+ (match_operand:SI 3 "const_0_to_15_operand" "n,n,n")
+ (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
+ (match_operand:SI 5 "const_0_to_15_operand" "n,n,n")]
MMA_VVI4I4I4))]
"TARGET_MMA"
"<vvi4i4i4> %A0,%x1,%x2,%3,%4,%5"
[(set_attr "type" "mma")
- (set_attr "prefixed" "yes")])
+ (set_attr "prefixed" "yes")
+ (set_attr "isa" "dm,not_dm,not_dm")])
(define_insn "mma_<avvi4i4i4>"
- [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
- (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
- (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
- (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
- (match_operand:SI 4 "const_0_to_15_operand" "n,n")
- (match_operand:SI 5 "const_0_to_15_operand" "n,n")
- (match_operand:SI 6 "const_0_to_15_operand" "n,n")]
+ [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
+ (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0")
+ (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+ (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa")
+ (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
+ (match_operand:SI 5 "const_0_to_15_operand" "n,n,n")
+ (match_operand:SI 6 "const_0_to_15_operand" "n,n,n")]
MMA_AVVI4I4I4))]
"TARGET_MMA"
"<avvi4i4i4> %A0,%x2,%x3,%4,%5,%6"
[(set_attr "type" "mma")
- (set_attr "prefixed" "yes")])
+ (set_attr "prefixed" "yes")
+ (set_attr "isa" "dm,not_dm,not_dm")])
diff --git a/gcc/config/rs6000/rs6000-c.cc b/gcc/config/rs6000/rs6000-c.cc
index 3b781639be1..296aa3b1630 100644
--- a/gcc/config/rs6000/rs6000-c.cc
+++ b/gcc/config/rs6000/rs6000-c.cc
@@ -600,6 +600,9 @@ rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT flags)
/* Tell the user if we support the MMA instructions. */
if ((flags & OPTION_MASK_MMA) != 0)
rs6000_define_or_undefine_macro (define_p, "__MMA__");
+ /* Tell the user if we support the dense math instructions. */
+ if ((flags & OPTION_MASK_DENSE_MATH) != 0)
+ rs6000_define_or_undefine_macro (define_p, "__PPC_DMR__");
/* Whether pc-relative code is being generated. */
if ((flags & OPTION_MASK_PCREL) != 0)
rs6000_define_or_undefine_macro (define_p, "__PCREL__");
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 03f43f70a62..caa908e7e2c 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -13909,8 +13909,13 @@ print_operand (FILE *file, rtx x, int code)
overlapping with the FPR registers. */
if (!REG_P (x))
output_operand_lossage ("invalid %%A value");
- else if (TARGET_DENSE_MATH && DMR_REGNO_P (REGNO (x)))
- fprintf (file, "%d", REGNO (x) - FIRST_DMR_REGNO);
+ else if (TARGET_DENSE_MATH)
+ {
+ if (DMR_REGNO_P (REGNO (x)))
+ fprintf (file, "%d", REGNO (x) - FIRST_DMR_REGNO);
+ else
+ output_operand_lossage ("%%A operand is not a DMR");
+ }
else if (!FP_REGNO_P (REGNO (x)) || (REGNO (x) % 4) != 0)
output_operand_lossage ("invalid %%A value");
else
@@ -27298,7 +27303,7 @@ rs6000_split_multireg_move (rtx dst, rtx src)
/* If we are reading an accumulator register, we have to
deprime it before we can access it. */
- if (TARGET_MMA
+ if (TARGET_MMA && !TARGET_DENSE_MATH
&& GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
emit_insn (gen_mma_xxmfacc (src, src));
@@ -27330,9 +27335,9 @@ rs6000_split_multireg_move (rtx dst, rtx src)
emit_insn (gen_rtx_SET (dst2, src2));
}
- /* If we are writing an accumulator register, we have to
- prime it after we've written it. */
- if (TARGET_MMA
+ /* If we are writing an accumulator register that overlaps with the
+ FPR registers, we have to prime it after we've written it. */
+ if (TARGET_MMA && !TARGET_DENSE_MATH
&& GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
emit_insn (gen_mma_xxmtacc (dst, dst));
@@ -27401,9 +27406,9 @@ rs6000_split_multireg_move (rtx dst, rtx src)
emit_insn (gen_rtx_SET (dst_i, op));
}
- /* We are writing an accumulator register, so we have to
- prime it after we've written it. */
- if (GET_MODE (src) == XOmode)
+ /* On systems without dense math where accumulators overlap with the
+ vector registers, we have to prime it after we've written it. */
+ if (GET_MODE (src) == XOmode && !TARGET_DENSE_MATH)
emit_insn (gen_mma_xxmtacc (dst, dst));
return;
@@ -27414,9 +27419,9 @@ rs6000_split_multireg_move (rtx dst, rtx src)
if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
{
- /* If we are reading an accumulator register, we have to
- deprime it before we can access it. */
- if (TARGET_MMA
+ /* If we are reading an accumulator register and we don't have dense
+ math, we have to deprime it before we can access it. */
+ if (TARGET_MMA && !TARGET_DENSE_MATH
&& GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
emit_insn (gen_mma_xxmfacc (src, src));
@@ -27444,7 +27449,7 @@ rs6000_split_multireg_move (rtx dst, rtx src)
/* If we are writing an accumulator register, we have to
prime it after we've written it. */
- if (TARGET_MMA
+ if (TARGET_MMA && !TARGET_DENSE_MATH
&& GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
emit_insn (gen_mma_xxmtacc (dst, dst));
}
@@ -27581,7 +27586,7 @@ rs6000_split_multireg_move (rtx dst, rtx src)
/* If we are reading an accumulator register, we have to
deprime it before we can access it. */
- if (TARGET_MMA && REG_P (src)
+ if (TARGET_MMA && !TARGET_DENSE_MATH && REG_P (src)
&& GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
emit_insn (gen_mma_xxmfacc (src, src));
@@ -27613,7 +27618,7 @@ rs6000_split_multireg_move (rtx dst, rtx src)
/* If we are writing an accumulator register, we have to
prime it after we've written it. */
- if (TARGET_MMA && REG_P (dst)
+ if (TARGET_MMA && !TARGET_DENSE_MATH && REG_P (dst)
&& GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
emit_insn (gen_mma_xxmtacc (dst, dst));
diff --git a/gcc/testsuite/gcc.target/powerpc/dm-double-test.c b/gcc/testsuite/gcc.target/powerpc/dm-double-test.c
new file mode 100644
index 00000000000..51733d6f641
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/dm-double-test.c
@@ -0,0 +1,194 @@
+/* Test derived from mma-double-1.c, modified for dense math. */
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_dense_math_ok } */
+/* { dg-options "-mdejagnu-cpu=future -O2" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <altivec.h>
+
+typedef unsigned char vec_t __attribute__ ((vector_size (16)));
+typedef double v4sf_t __attribute__ ((vector_size (16)));
+#define SAVE_ACC(ACC, ldc, J) \
+ __builtin_mma_disassemble_acc (result, ACC); \
+ rowC = (v4sf_t *) &CO[0*ldc+J]; \
+ rowC[0] += result[0]; \
+ rowC = (v4sf_t *) &CO[1*ldc+J]; \
+ rowC[0] += result[1]; \
+ rowC = (v4sf_t *) &CO[2*ldc+J]; \
+ rowC[0] += result[2]; \
+ rowC = (v4sf_t *) &CO[3*ldc+J]; \
+ rowC[0] += result[3];
+
+void
+DM (int m, int n, int k, double *A, double *B, double *C)
+{
+ __vector_quad acc0, acc1, acc2, acc3, acc4, acc5, acc6, acc7;
+ v4sf_t result[4];
+ v4sf_t *rowC;
+ for (int l = 0; l < n; l += 4)
+ {
+ double *CO;
+ double *AO;
+ AO = A;
+ CO = C;
+ C += m * 4;
+ for (int j = 0; j < m; j += 16)
+ {
+ double *BO = B;
+ __builtin_mma_xxsetaccz (&acc0);
+ __builtin_mma_xxsetaccz (&acc1);
+ __builtin_mma_xxsetaccz (&acc2);
+ __builtin_mma_xxsetaccz (&acc3);
+ __builtin_mma_xxsetaccz (&acc4);
+ __builtin_mma_xxsetaccz (&acc5);
+ __builtin_mma_xxsetaccz (&acc6);
+ __builtin_mma_xxsetaccz (&acc7);
+ unsigned long i;
+
+ for (i = 0; i < k; i++)
+ {
+ vec_t *rowA = (vec_t *) & AO[i * 16];
+ __vector_pair rowB;
+ vec_t *rb = (vec_t *) & BO[i * 4];
+ __builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
+ __builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
+ __builtin_mma_xvf64gerpp (&acc1, rowB, rowA[1]);
+ __builtin_mma_xvf64gerpp (&acc2, rowB, rowA[2]);
+ __builtin_mma_xvf64gerpp (&acc3, rowB, rowA[3]);
+ __builtin_mma_xvf64gerpp (&acc4, rowB, rowA[4]);
+ __builtin_mma_xvf64gerpp (&acc5, rowB, rowA[5]);
+ __builtin_mma_xvf64gerpp (&acc6, rowB, rowA[6]);
+ __builtin_mma_xvf64gerpp (&acc7, rowB, rowA[7]);
+ }
+ SAVE_ACC (&acc0, m, 0);
+ SAVE_ACC (&acc2, m, 4);
+ SAVE_ACC (&acc1, m, 2);
+ SAVE_ACC (&acc3, m, 6);
+ SAVE_ACC (&acc4, m, 8);
+ SAVE_ACC (&acc6, m, 12);
+ SAVE_ACC (&acc5, m, 10);
+ SAVE_ACC (&acc7, m, 14);
+ AO += k * 16;
+ BO += k * 4;
+ CO += 16;
+ }
+ B += k * 4;
+ }
+}
+
+void
+init (double *matrix, int row, int column)
+{
+ for (int j = 0; j < column; j++)
+ {
+ for (int i = 0; i < row; i++)
+ {
+ matrix[j * row + i] = (i * 16 + 2 + j) / 0.123;
+ }
+ }
+}
+
+void
+init0 (double *matrix, double *matrix1, int row, int column)
+{
+ for (int j = 0; j < column; j++)
+ for (int i = 0; i < row; i++)
+ matrix[j * row + i] = matrix1[j * row + i] = 0;
+}
+
+
+void
+print (const char *name, const double *matrix, int row, int column)
+{
+ printf ("Matrix %s has %d rows and %d columns:\n", name, row, column);
+ for (int i = 0; i < row; i++)
+ {
+ for (int j = 0; j < column; j++)
+ {
+ printf ("%f ", matrix[j * row + i]);
+ }
+ printf ("\n");
+ }
+ printf ("\n");
+}
+
+int
+main (int argc, char *argv[])
+{
+ int rowsA, colsB, common;
+ int i, j, k;
+ int ret = 0;
+
+ for (int t = 16; t <= 128; t += 16)
+ {
+ for (int t1 = 4; t1 <= 16; t1 += 4)
+ {
+ rowsA = t;
+ colsB = t1;
+ common = 1;
+ /* printf ("Running test for rows = %d,cols = %d\n", t, t1); */
+ double A[rowsA * common];
+ double B[common * colsB];
+ double C[rowsA * colsB];
+ double D[rowsA * colsB];
+
+
+ init (A, rowsA, common);
+ init (B, common, colsB);
+ init0 (C, D, rowsA, colsB);
+ DM (rowsA, colsB, common, A, B, C);
+
+ for (i = 0; i < colsB; i++)
+ {
+ for (j = 0; j < rowsA; j++)
+ {
+ D[i * rowsA + j] = 0;
+ for (k = 0; k < common; k++)
+ {
+ D[i * rowsA + j] +=
+ A[k * rowsA + j] * B[k + common * i];
+ }
+ }
+ }
+ for (i = 0; i < colsB; i++)
+ {
+ for (j = 0; j < rowsA; j++)
+ {
+ for (k = 0; k < common; k++)
+ {
+ if (D[i * rowsA + j] != C[i * rowsA + j])
+ {
+ printf ("Error %d,%d,%d\n",i,j,k);
+ ret++;
+ }
+ }
+ }
+ }
+ if (ret)
+ {
+ print ("A", A, rowsA, common);
+ print ("B", B, common, colsB);
+ print ("C", C, rowsA, colsB);
+ print ("D", D, rowsA, colsB);
+ }
+ }
+ }
+
+#ifdef VERBOSE
+ if (ret)
+ printf ("DM double test fail: %d errors\n",ret);
+ else
+ printf ("DM double test success: 0 DM errors\n");
+#else
+ if (ret)
+ abort();
+#endif
+
+ return ret;
+}
+
+/* { dg-final { scan-assembler-times {\mdmsetaccz\M} 8 } } */
+/* { dg-final { scan-assembler-times {\mdmxvf64gerpp\M} 8 } } */
+/* { dg-final { scan-assembler-times {\mdmxxextfdmr512\M} 11 } } */
+
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 0e45a2baed5..6826d150e65 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -6534,6 +6534,25 @@ proc check_effective_target_power10_ok { } {
}
}
+# Return 1 if this is a PowerPC target supporting -mcpu=future or -mdense-math
+# which enables the dense math operations.
+proc check_effective_target_powerpc_dense_math_ok { } {
+ return [check_no_compiler_messages_nocache powerpc_dense_math_ok assembly {
+ __vector_quad vq;
+ void test (void)
+ {
+ #ifndef __PPC_DMR__
+ #error "target does not have dense math support."
+ #else
+ /* Make sure we have dense math support. */
+ __vector_quad dmr;
+ __asm__ ("dmsetaccz %A0" : "=wD" (dmr));
+ vq = dmr;
+ #endif
+ }
+ } "-mcpu=future"]
+}
+
# Return 1 if this is a PowerPC target supporting -mfloat128 via either
# software emulation on power7/power8 systems or hardware support on power9.
^ permalink raw reply [flat|nested] 3+ messages in thread
* [gcc(refs/users/meissner/heads/dmf003)] Make MMA insns support dmr registers.
@ 2022-11-05 3:25 Michael Meissner
0 siblings, 0 replies; 3+ messages in thread
From: Michael Meissner @ 2022-11-05 3:25 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:abfc3f312686cd84ea1a0fad6e97fe450fb48168
commit abfc3f312686cd84ea1a0fad6e97fe450fb48168
Author: Michael Meissner <meissner@linux.ibm.com>
Date: Fri Nov 4 23:24:50 2022 -0400
Make MMA insns support dmr registers.
2022-11-04 Michael Meissner <meissner@linux.ibm.com>
gcc/
* config/rs6000/mma.md (mma_<acc>): New define_expand to handle
mma_<acc> for dense math and non dense math.
(mma_<acc> insn): Restrict to non dense math.
(mma_xxsetaccz): Convert to define_expand to handle non dense math and
dense math.
(mma_xxsetaccz_p10): Rename from mma_xxsetaccz and restrict usage to non
dense math.
(mma_xxsetaccz_dm): Dense math version of mma_xxsetaccz.
(mma_<vv>): Add support for dense math.
(mma_<avv>): Likewise.
(mma_<pv>): Likewise.
(mma_<apv>): Likewise.
(mma_<vvi4i4i8>): Likewise.
(mma_<avvi4i4i8>): Likewise.
(mma_<vvi4i4i2>): Likewise.
(mma_<avvi4i4i2>): Likewise.
(mma_<vvi4i4>): Likewise.
(mma_<avvi4i4>): Likewise.
(mma_<pvi4i2>): Likewise.
(mma_<apvi4i2>): Likewise.
(mma_<vvi4i4i4>): Likewise.
(mma_<avvi4i4i4>): Likewise.
* config/rs6000/rs6000.cc (print_operand): Make %A handle only DMRs if
dense math and only FPRs if not dense math.
(rs6000_split_multireg_move): Do not generate accumulator prime or
de-prime instructions if dense math.
Diff:
---
gcc/config/rs6000/mma.md | 247 ++++++++++++++++++++++++++----------------
gcc/config/rs6000/rs6000-c.cc | 3 +
gcc/config/rs6000/rs6000.cc | 35 +++---
3 files changed, 176 insertions(+), 109 deletions(-)
diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index fe2e9c9e63e..835f34e8e00 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -545,190 +545,249 @@
"dmxxextfdmr256 %0,%1,2"
[(set_attr "type" "mma")])
-(define_insn "mma_<acc>"
+;; MMA instructions that do not use their accumulators as an input, still must
+;; not allow their vector operands to overlap the registers used by the
+;; accumulator. We enforce this by marking the output as early clobber. If we
+;; have dense math, we don't need the whole prime/de-prime action, so just make
+;; thse instructions be NOPs.
+
+(define_expand "mma_<acc>"
+ [(set (match_operand:XO 0 "register_operand")
+ (unspec:XO [(match_operand:XO 1 "register_operand")]
+ MMA_ACC))]
+ "TARGET_MMA"
+{
+ if (TARGET_DENSE_MATH)
+ {
+ if (!rtx_equal_p (operands[0], operands[1]))
+ emit_move_insn (operands[0], operands[1]);
+ DONE;
+ }
+
+ /* Generate the prime/de-prime code. */
+})
+
+(define_insn "*mma_<acc>"
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")]
MMA_ACC))]
- "TARGET_MMA"
+ "TARGET_MMA && !TARGET_DENSE_MATH"
"<acc> %A0"
[(set_attr "type" "mma")])
;; We can't have integer constants in XOmode so we wrap this in an
-;; UNSPEC_VOLATILE.
+;; UNSPEC_VOLATILE for the non-dense math case. For dense math, we don't need
+;; to disable optimization and we can do a normal UNSPEC.
-(define_insn "mma_xxsetaccz"
- [(set (match_operand:XO 0 "fpr_reg_operand" "=d")
+(define_expand "mma_xxsetaccz"
+ [(set (match_operand:XO 0 "register_operand")
(unspec_volatile:XO [(const_int 0)]
UNSPECV_MMA_XXSETACCZ))]
"TARGET_MMA"
+{
+ if (TARGET_DENSE_MATH)
+ {
+ emit_insn (gen_mma_xxsetaccz_dm (operands[0]));
+ DONE;
+ }
+})
+
+(define_insn "*mma_xxsetaccz_p10"
+ [(set (match_operand:XO 0 "fpr_reg_operand" "=d")
+ (unspec_volatile:XO [(const_int 0)]
+ UNSPECV_MMA_XXSETACCZ))]
+ "TARGET_MMA && !TARGET_DENSE_MATH"
"xxsetaccz %A0"
[(set_attr "type" "mma")])
+
+(define_insn "mma_xxsetaccz_dm"
+ [(set (match_operand:XO 0 "dmr_operand" "=wD")
+ (unspec:XO [(const_int 0)]
+ UNSPECV_MMA_XXSETACCZ))]
+ "TARGET_DENSE_MATH"
+ "dmsetaccz %0"
+ [(set_attr "type" "mma")])
+
(define_insn "mma_<vv>"
- [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
- (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
- (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
+ [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
+ (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa")
+ (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")]
MMA_VV))]
"TARGET_MMA"
"<vv> %A0,%x1,%x2"
- [(set_attr "type" "mma")])
+ [(set_attr "type" "mma")
+ (set_attr "isa" "dm,not_dm,not_dm")])
(define_insn "mma_<avv>"
- [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
- (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
- (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
- (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
+ [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
+ (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0")
+ (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+ (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa")]
MMA_AVV))]
"TARGET_MMA"
"<avv> %A0,%x2,%x3"
- [(set_attr "type" "mma")])
+ [(set_attr "type" "mma")
+ (set_attr "isa" "dm,not_dm,not_dm")])
(define_insn "mma_<pv>"
- [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
- (unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa")
- (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
+ [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
+ (unspec:XO [(match_operand:OO 1 "vsx_register_operand" "wa,v,?wa")
+ (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")]
MMA_PV))]
"TARGET_MMA"
"<pv> %A0,%x1,%x2"
- [(set_attr "type" "mma")])
+ [(set_attr "type" "mma")
+ (set_attr "isa" "dm,not_dm,not_dm")])
(define_insn "mma_<apv>"
- [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
- (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
- (match_operand:OO 2 "vsx_register_operand" "v,?wa")
- (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
+ [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
+ (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0")
+ (match_operand:OO 2 "vsx_register_operand" "wa,v,?wa")
+ (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa")]
MMA_APV))]
"TARGET_MMA"
"<apv> %A0,%x2,%x3"
- [(set_attr "type" "mma")])
+ [(set_attr "type" "mma")
+ (set_attr "isa" "dm,not_dm,not_dm")])
(define_insn "mma_<vvi4i4i8>"
- [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
- (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
- (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
- (match_operand:SI 3 "const_0_to_15_operand" "n,n")
- (match_operand:SI 4 "const_0_to_15_operand" "n,n")
- (match_operand:SI 5 "u8bit_cint_operand" "n,n")]
+ [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
+ (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa")
+ (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+ (match_operand:SI 3 "const_0_to_15_operand" "n,n,n")
+ (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
+ (match_operand:SI 5 "u8bit_cint_operand" "n,n,n")]
MMA_VVI4I4I8))]
"TARGET_MMA"
"<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5"
[(set_attr "type" "mma")
- (set_attr "prefixed" "yes")])
+ (set_attr "prefixed" "yes")
+ (set_attr "isa" "dm,not_dm,not_dm")])
(define_insn "mma_<avvi4i4i8>"
- [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
- (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
- (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
- (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
- (match_operand:SI 4 "const_0_to_15_operand" "n,n")
- (match_operand:SI 5 "const_0_to_15_operand" "n,n")
- (match_operand:SI 6 "u8bit_cint_operand" "n,n")]
+ [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
+ (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0")
+ (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+ (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa")
+ (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
+ (match_operand:SI 5 "const_0_to_15_operand" "n,n,n")
+ (match_operand:SI 6 "u8bit_cint_operand" "n,n,n")]
MMA_AVVI4I4I8))]
"TARGET_MMA"
"<avvi4i4i8> %A0,%x2,%x3,%4,%5,%6"
[(set_attr "type" "mma")
- (set_attr "prefixed" "yes")])
+ (set_attr "prefixed" "yes")
+ (set_attr "isa" "dm,not_dm,not_dm")])
(define_insn "mma_<vvi4i4i2>"
- [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
- (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
- (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
- (match_operand:SI 3 "const_0_to_15_operand" "n,n")
- (match_operand:SI 4 "const_0_to_15_operand" "n,n")
- (match_operand:SI 5 "const_0_to_3_operand" "n,n")]
+ [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
+ (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa")
+ (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+ (match_operand:SI 3 "const_0_to_15_operand" "n,n,n")
+ (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
+ (match_operand:SI 5 "const_0_to_3_operand" "n,n,n")]
MMA_VVI4I4I2))]
"TARGET_MMA"
"<vvi4i4i2> %A0,%x1,%x2,%3,%4,%5"
[(set_attr "type" "mma")
- (set_attr "prefixed" "yes")])
+ (set_attr "prefixed" "yes")
+ (set_attr "isa" "dm,not_dm,not_dm")])
(define_insn "mma_<avvi4i4i2>"
- [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
- (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
- (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
- (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
- (match_operand:SI 4 "const_0_to_15_operand" "n,n")
- (match_operand:SI 5 "const_0_to_15_operand" "n,n")
- (match_operand:SI 6 "const_0_to_3_operand" "n,n")]
+ [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
+ (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0")
+ (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+ (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa")
+ (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
+ (match_operand:SI 5 "const_0_to_15_operand" "n,n,n")
+ (match_operand:SI 6 "const_0_to_3_operand" "n,n,n")]
MMA_AVVI4I4I2))]
"TARGET_MMA"
"<avvi4i4i2> %A0,%x2,%x3,%4,%5,%6"
[(set_attr "type" "mma")
- (set_attr "prefixed" "yes")])
+ (set_attr "prefixed" "yes")
+ (set_attr "isa" "dm,not_dm,not_dm")])
(define_insn "mma_<vvi4i4>"
- [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
- (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
- (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
- (match_operand:SI 3 "const_0_to_15_operand" "n,n")
- (match_operand:SI 4 "const_0_to_15_operand" "n,n")]
+ [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
+ (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa")
+ (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+ (match_operand:SI 3 "const_0_to_15_operand" "n,n,n")
+ (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")]
MMA_VVI4I4))]
"TARGET_MMA"
"<vvi4i4> %A0,%x1,%x2,%3,%4"
[(set_attr "type" "mma")
- (set_attr "prefixed" "yes")])
+ (set_attr "prefixed" "yes")
+ (set_attr "isa" "dm,not_dm,not_dm")])
(define_insn "mma_<avvi4i4>"
- [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
- (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
- (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
- (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
- (match_operand:SI 4 "const_0_to_15_operand" "n,n")
- (match_operand:SI 5 "const_0_to_15_operand" "n,n")]
+ [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
+ (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0")
+ (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+ (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa")
+ (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
+ (match_operand:SI 5 "const_0_to_15_operand" "n,n,n")]
MMA_AVVI4I4))]
"TARGET_MMA"
"<avvi4i4> %A0,%x2,%x3,%4,%5"
[(set_attr "type" "mma")
- (set_attr "prefixed" "yes")])
+ (set_attr "prefixed" "yes")
+ (set_attr "isa" "dm,not_dm,not_dm")])
(define_insn "mma_<pvi4i2>"
- [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
- (unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa")
- (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
- (match_operand:SI 3 "const_0_to_15_operand" "n,n")
- (match_operand:SI 4 "const_0_to_3_operand" "n,n")]
+ [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
+ (unspec:XO [(match_operand:OO 1 "vsx_register_operand" "wa,v,?wa")
+ (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+ (match_operand:SI 3 "const_0_to_15_operand" "n,n,n")
+ (match_operand:SI 4 "const_0_to_3_operand" "n,n,n")]
MMA_PVI4I2))]
"TARGET_MMA"
"<pvi4i2> %A0,%x1,%x2,%3,%4"
[(set_attr "type" "mma")
- (set_attr "prefixed" "yes")])
+ (set_attr "prefixed" "yes")
+ (set_attr "isa" "dm,not_dm,not_dm")])
(define_insn "mma_<apvi4i2>"
- [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
- (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
- (match_operand:OO 2 "vsx_register_operand" "v,?wa")
- (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
- (match_operand:SI 4 "const_0_to_15_operand" "n,n")
- (match_operand:SI 5 "const_0_to_3_operand" "n,n")]
+ [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
+ (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0")
+ (match_operand:OO 2 "vsx_register_operand" "wa,v,?wa")
+ (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa")
+ (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
+ (match_operand:SI 5 "const_0_to_3_operand" "n,n,n")]
MMA_APVI4I2))]
"TARGET_MMA"
"<apvi4i2> %A0,%x2,%x3,%4,%5"
[(set_attr "type" "mma")
- (set_attr "prefixed" "yes")])
+ (set_attr "prefixed" "yes")
+ (set_attr "isa" "dm,not_dm,not_dm")])
(define_insn "mma_<vvi4i4i4>"
- [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
- (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
- (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
- (match_operand:SI 3 "const_0_to_15_operand" "n,n")
- (match_operand:SI 4 "const_0_to_15_operand" "n,n")
- (match_operand:SI 5 "const_0_to_15_operand" "n,n")]
+ [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
+ (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa")
+ (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+ (match_operand:SI 3 "const_0_to_15_operand" "n,n,n")
+ (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
+ (match_operand:SI 5 "const_0_to_15_operand" "n,n,n")]
MMA_VVI4I4I4))]
"TARGET_MMA"
"<vvi4i4i4> %A0,%x1,%x2,%3,%4,%5"
[(set_attr "type" "mma")
- (set_attr "prefixed" "yes")])
+ (set_attr "prefixed" "yes")
+ (set_attr "isa" "dm,not_dm,not_dm")])
(define_insn "mma_<avvi4i4i4>"
- [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
- (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
- (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
- (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
- (match_operand:SI 4 "const_0_to_15_operand" "n,n")
- (match_operand:SI 5 "const_0_to_15_operand" "n,n")
- (match_operand:SI 6 "const_0_to_15_operand" "n,n")]
+ [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
+ (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0")
+ (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+ (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa")
+ (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
+ (match_operand:SI 5 "const_0_to_15_operand" "n,n,n")
+ (match_operand:SI 6 "const_0_to_15_operand" "n,n,n")]
MMA_AVVI4I4I4))]
"TARGET_MMA"
"<avvi4i4i4> %A0,%x2,%x3,%4,%5,%6"
[(set_attr "type" "mma")
- (set_attr "prefixed" "yes")])
+ (set_attr "prefixed" "yes")
+ (set_attr "isa" "dm,not_dm,not_dm")])
diff --git a/gcc/config/rs6000/rs6000-c.cc b/gcc/config/rs6000/rs6000-c.cc
index 3b781639be1..296aa3b1630 100644
--- a/gcc/config/rs6000/rs6000-c.cc
+++ b/gcc/config/rs6000/rs6000-c.cc
@@ -600,6 +600,9 @@ rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT flags)
/* Tell the user if we support the MMA instructions. */
if ((flags & OPTION_MASK_MMA) != 0)
rs6000_define_or_undefine_macro (define_p, "__MMA__");
+ /* Tell the user if we support the dense math instructions. */
+ if ((flags & OPTION_MASK_DENSE_MATH) != 0)
+ rs6000_define_or_undefine_macro (define_p, "__PPC_DMR__");
/* Whether pc-relative code is being generated. */
if ((flags & OPTION_MASK_PCREL) != 0)
rs6000_define_or_undefine_macro (define_p, "__PCREL__");
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 03f43f70a62..caa908e7e2c 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -13909,8 +13909,13 @@ print_operand (FILE *file, rtx x, int code)
overlapping with the FPR registers. */
if (!REG_P (x))
output_operand_lossage ("invalid %%A value");
- else if (TARGET_DENSE_MATH && DMR_REGNO_P (REGNO (x)))
- fprintf (file, "%d", REGNO (x) - FIRST_DMR_REGNO);
+ else if (TARGET_DENSE_MATH)
+ {
+ if (DMR_REGNO_P (REGNO (x)))
+ fprintf (file, "%d", REGNO (x) - FIRST_DMR_REGNO);
+ else
+ output_operand_lossage ("%%A operand is not a DMR");
+ }
else if (!FP_REGNO_P (REGNO (x)) || (REGNO (x) % 4) != 0)
output_operand_lossage ("invalid %%A value");
else
@@ -27298,7 +27303,7 @@ rs6000_split_multireg_move (rtx dst, rtx src)
/* If we are reading an accumulator register, we have to
deprime it before we can access it. */
- if (TARGET_MMA
+ if (TARGET_MMA && !TARGET_DENSE_MATH
&& GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
emit_insn (gen_mma_xxmfacc (src, src));
@@ -27330,9 +27335,9 @@ rs6000_split_multireg_move (rtx dst, rtx src)
emit_insn (gen_rtx_SET (dst2, src2));
}
- /* If we are writing an accumulator register, we have to
- prime it after we've written it. */
- if (TARGET_MMA
+ /* If we are writing an accumulator register that overlaps with the
+ FPR registers, we have to prime it after we've written it. */
+ if (TARGET_MMA && !TARGET_DENSE_MATH
&& GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
emit_insn (gen_mma_xxmtacc (dst, dst));
@@ -27401,9 +27406,9 @@ rs6000_split_multireg_move (rtx dst, rtx src)
emit_insn (gen_rtx_SET (dst_i, op));
}
- /* We are writing an accumulator register, so we have to
- prime it after we've written it. */
- if (GET_MODE (src) == XOmode)
+ /* On systems without dense math where accumulators overlap with the
+ vector registers, we have to prime it after we've written it. */
+ if (GET_MODE (src) == XOmode && !TARGET_DENSE_MATH)
emit_insn (gen_mma_xxmtacc (dst, dst));
return;
@@ -27414,9 +27419,9 @@ rs6000_split_multireg_move (rtx dst, rtx src)
if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
{
- /* If we are reading an accumulator register, we have to
- deprime it before we can access it. */
- if (TARGET_MMA
+ /* If we are reading an accumulator register and we don't have dense
+ math, we have to deprime it before we can access it. */
+ if (TARGET_MMA && !TARGET_DENSE_MATH
&& GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
emit_insn (gen_mma_xxmfacc (src, src));
@@ -27444,7 +27449,7 @@ rs6000_split_multireg_move (rtx dst, rtx src)
/* If we are writing an accumulator register, we have to
prime it after we've written it. */
- if (TARGET_MMA
+ if (TARGET_MMA && !TARGET_DENSE_MATH
&& GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
emit_insn (gen_mma_xxmtacc (dst, dst));
}
@@ -27581,7 +27586,7 @@ rs6000_split_multireg_move (rtx dst, rtx src)
/* If we are reading an accumulator register, we have to
deprime it before we can access it. */
- if (TARGET_MMA && REG_P (src)
+ if (TARGET_MMA && !TARGET_DENSE_MATH && REG_P (src)
&& GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
emit_insn (gen_mma_xxmfacc (src, src));
@@ -27613,7 +27618,7 @@ rs6000_split_multireg_move (rtx dst, rtx src)
/* If we are writing an accumulator register, we have to
prime it after we've written it. */
- if (TARGET_MMA && REG_P (dst)
+ if (TARGET_MMA && !TARGET_DENSE_MATH && REG_P (dst)
&& GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
emit_insn (gen_mma_xxmtacc (dst, dst));
^ permalink raw reply [flat|nested] 3+ messages in thread
* [gcc(refs/users/meissner/heads/dmf003)] Make MMA insns support dmr registers.
@ 2022-11-04 21:10 Michael Meissner
0 siblings, 0 replies; 3+ messages in thread
From: Michael Meissner @ 2022-11-04 21:10 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:de32c95595d363ead5f3e613212b569582d0bbd1
commit de32c95595d363ead5f3e613212b569582d0bbd1
Author: Michael Meissner <meissner@linux.ibm.com>
Date: Fri Nov 4 17:06:51 2022 -0400
Make MMA insns support dmr registers.
2022-11-04 Michael Meissner <meissner@linux.ibm.com>
gcc/
* config/rs6000/mma.md (mma_<acc>): New define_expand to handle
mma_<acc> for dense math and non dense math.
(mma_<acc> insn): Restrict to non dense math.
(mma_xxsetaccz): Convert to define_expand to handle non dense math and
dense math.
(mma_xxsetaccz_p10): Rename from mma_xxsetaccz and restrict usage to non
dense math.
(mma_xxsetaccz_dm): Dense math version of mma_xxsetaccz.
(mma_<vv>): Add support for dense math.
(mma_<avv>): Likewise.
(mma_<pv>): Likewise.
(mma_<apv>): Likewise.
(mma_<vvi4i4i8>): Likewise.
(mma_<avvi4i4i8>): Likewise.
(mma_<vvi4i4i2>): Likewise.
(mma_<avvi4i4i2>): Likewise.
(mma_<vvi4i4>): Likewise.
(mma_<avvi4i4>): Likewise.
(mma_<pvi4i2>): Likewise.
(mma_<apvi4i2>): Likewise.
(mma_<vvi4i4i4>): Likewise.
(mma_<avvi4i4i4>): Likewise.
* config/rs6000/rs6000.cc (print_operand): Make %A handle only DMRs if
dense math and only FPRs if not dense math.
(rs6000_split_multireg_move): Do not generate accumulator prime or
de-prime instructions if dense math.
Diff:
---
gcc/config/rs6000/mma.md | 247 +++++++++++++++++++++++++++-----------------
gcc/config/rs6000/rs6000.cc | 35 ++++---
2 files changed, 173 insertions(+), 109 deletions(-)
diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index fe2e9c9e63e..835f34e8e00 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -545,190 +545,249 @@
"dmxxextfdmr256 %0,%1,2"
[(set_attr "type" "mma")])
-(define_insn "mma_<acc>"
+;; MMA instructions that do not use their accumulators as an input, still must
+;; not allow their vector operands to overlap the registers used by the
+;; accumulator. We enforce this by marking the output as early clobber. If we
+;; have dense math, we don't need the whole prime/de-prime action, so just make
+;; thse instructions be NOPs.
+
+(define_expand "mma_<acc>"
+ [(set (match_operand:XO 0 "register_operand")
+ (unspec:XO [(match_operand:XO 1 "register_operand")]
+ MMA_ACC))]
+ "TARGET_MMA"
+{
+ if (TARGET_DENSE_MATH)
+ {
+ if (!rtx_equal_p (operands[0], operands[1]))
+ emit_move_insn (operands[0], operands[1]);
+ DONE;
+ }
+
+ /* Generate the prime/de-prime code. */
+})
+
+(define_insn "*mma_<acc>"
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")]
MMA_ACC))]
- "TARGET_MMA"
+ "TARGET_MMA && !TARGET_DENSE_MATH"
"<acc> %A0"
[(set_attr "type" "mma")])
;; We can't have integer constants in XOmode so we wrap this in an
-;; UNSPEC_VOLATILE.
+;; UNSPEC_VOLATILE for the non-dense math case. For dense math, we don't need
+;; to disable optimization and we can do a normal UNSPEC.
-(define_insn "mma_xxsetaccz"
- [(set (match_operand:XO 0 "fpr_reg_operand" "=d")
+(define_expand "mma_xxsetaccz"
+ [(set (match_operand:XO 0 "register_operand")
(unspec_volatile:XO [(const_int 0)]
UNSPECV_MMA_XXSETACCZ))]
"TARGET_MMA"
+{
+ if (TARGET_DENSE_MATH)
+ {
+ emit_insn (gen_mma_xxsetaccz_dm (operands[0]));
+ DONE;
+ }
+})
+
+(define_insn "*mma_xxsetaccz_p10"
+ [(set (match_operand:XO 0 "fpr_reg_operand" "=d")
+ (unspec_volatile:XO [(const_int 0)]
+ UNSPECV_MMA_XXSETACCZ))]
+ "TARGET_MMA && !TARGET_DENSE_MATH"
"xxsetaccz %A0"
[(set_attr "type" "mma")])
+
+(define_insn "mma_xxsetaccz_dm"
+ [(set (match_operand:XO 0 "dmr_operand" "=wD")
+ (unspec:XO [(const_int 0)]
+ UNSPECV_MMA_XXSETACCZ))]
+ "TARGET_DENSE_MATH"
+ "dmsetaccz %0"
+ [(set_attr "type" "mma")])
+
(define_insn "mma_<vv>"
- [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
- (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
- (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
+ [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
+ (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa")
+ (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")]
MMA_VV))]
"TARGET_MMA"
"<vv> %A0,%x1,%x2"
- [(set_attr "type" "mma")])
+ [(set_attr "type" "mma")
+ (set_attr "isa" "dm,not_dm,not_dm")])
(define_insn "mma_<avv>"
- [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
- (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
- (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
- (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
+ [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
+ (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0")
+ (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+ (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa")]
MMA_AVV))]
"TARGET_MMA"
"<avv> %A0,%x2,%x3"
- [(set_attr "type" "mma")])
+ [(set_attr "type" "mma")
+ (set_attr "isa" "dm,not_dm,not_dm")])
(define_insn "mma_<pv>"
- [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
- (unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa")
- (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
+ [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
+ (unspec:XO [(match_operand:OO 1 "vsx_register_operand" "wa,v,?wa")
+ (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")]
MMA_PV))]
"TARGET_MMA"
"<pv> %A0,%x1,%x2"
- [(set_attr "type" "mma")])
+ [(set_attr "type" "mma")
+ (set_attr "isa" "dm,not_dm,not_dm")])
(define_insn "mma_<apv>"
- [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
- (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
- (match_operand:OO 2 "vsx_register_operand" "v,?wa")
- (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
+ [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
+ (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0")
+ (match_operand:OO 2 "vsx_register_operand" "wa,v,?wa")
+ (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa")]
MMA_APV))]
"TARGET_MMA"
"<apv> %A0,%x2,%x3"
- [(set_attr "type" "mma")])
+ [(set_attr "type" "mma")
+ (set_attr "isa" "dm,not_dm,not_dm")])
(define_insn "mma_<vvi4i4i8>"
- [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
- (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
- (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
- (match_operand:SI 3 "const_0_to_15_operand" "n,n")
- (match_operand:SI 4 "const_0_to_15_operand" "n,n")
- (match_operand:SI 5 "u8bit_cint_operand" "n,n")]
+ [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
+ (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa")
+ (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+ (match_operand:SI 3 "const_0_to_15_operand" "n,n,n")
+ (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
+ (match_operand:SI 5 "u8bit_cint_operand" "n,n,n")]
MMA_VVI4I4I8))]
"TARGET_MMA"
"<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5"
[(set_attr "type" "mma")
- (set_attr "prefixed" "yes")])
+ (set_attr "prefixed" "yes")
+ (set_attr "isa" "dm,not_dm,not_dm")])
(define_insn "mma_<avvi4i4i8>"
- [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
- (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
- (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
- (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
- (match_operand:SI 4 "const_0_to_15_operand" "n,n")
- (match_operand:SI 5 "const_0_to_15_operand" "n,n")
- (match_operand:SI 6 "u8bit_cint_operand" "n,n")]
+ [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
+ (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0")
+ (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+ (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa")
+ (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
+ (match_operand:SI 5 "const_0_to_15_operand" "n,n,n")
+ (match_operand:SI 6 "u8bit_cint_operand" "n,n,n")]
MMA_AVVI4I4I8))]
"TARGET_MMA"
"<avvi4i4i8> %A0,%x2,%x3,%4,%5,%6"
[(set_attr "type" "mma")
- (set_attr "prefixed" "yes")])
+ (set_attr "prefixed" "yes")
+ (set_attr "isa" "dm,not_dm,not_dm")])
(define_insn "mma_<vvi4i4i2>"
- [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
- (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
- (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
- (match_operand:SI 3 "const_0_to_15_operand" "n,n")
- (match_operand:SI 4 "const_0_to_15_operand" "n,n")
- (match_operand:SI 5 "const_0_to_3_operand" "n,n")]
+ [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
+ (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa")
+ (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+ (match_operand:SI 3 "const_0_to_15_operand" "n,n,n")
+ (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
+ (match_operand:SI 5 "const_0_to_3_operand" "n,n,n")]
MMA_VVI4I4I2))]
"TARGET_MMA"
"<vvi4i4i2> %A0,%x1,%x2,%3,%4,%5"
[(set_attr "type" "mma")
- (set_attr "prefixed" "yes")])
+ (set_attr "prefixed" "yes")
+ (set_attr "isa" "dm,not_dm,not_dm")])
(define_insn "mma_<avvi4i4i2>"
- [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
- (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
- (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
- (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
- (match_operand:SI 4 "const_0_to_15_operand" "n,n")
- (match_operand:SI 5 "const_0_to_15_operand" "n,n")
- (match_operand:SI 6 "const_0_to_3_operand" "n,n")]
+ [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
+ (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0")
+ (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+ (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa")
+ (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
+ (match_operand:SI 5 "const_0_to_15_operand" "n,n,n")
+ (match_operand:SI 6 "const_0_to_3_operand" "n,n,n")]
MMA_AVVI4I4I2))]
"TARGET_MMA"
"<avvi4i4i2> %A0,%x2,%x3,%4,%5,%6"
[(set_attr "type" "mma")
- (set_attr "prefixed" "yes")])
+ (set_attr "prefixed" "yes")
+ (set_attr "isa" "dm,not_dm,not_dm")])
(define_insn "mma_<vvi4i4>"
- [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
- (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
- (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
- (match_operand:SI 3 "const_0_to_15_operand" "n,n")
- (match_operand:SI 4 "const_0_to_15_operand" "n,n")]
+ [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
+ (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa")
+ (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+ (match_operand:SI 3 "const_0_to_15_operand" "n,n,n")
+ (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")]
MMA_VVI4I4))]
"TARGET_MMA"
"<vvi4i4> %A0,%x1,%x2,%3,%4"
[(set_attr "type" "mma")
- (set_attr "prefixed" "yes")])
+ (set_attr "prefixed" "yes")
+ (set_attr "isa" "dm,not_dm,not_dm")])
(define_insn "mma_<avvi4i4>"
- [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
- (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
- (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
- (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
- (match_operand:SI 4 "const_0_to_15_operand" "n,n")
- (match_operand:SI 5 "const_0_to_15_operand" "n,n")]
+ [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
+ (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0")
+ (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+ (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa")
+ (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
+ (match_operand:SI 5 "const_0_to_15_operand" "n,n,n")]
MMA_AVVI4I4))]
"TARGET_MMA"
"<avvi4i4> %A0,%x2,%x3,%4,%5"
[(set_attr "type" "mma")
- (set_attr "prefixed" "yes")])
+ (set_attr "prefixed" "yes")
+ (set_attr "isa" "dm,not_dm,not_dm")])
(define_insn "mma_<pvi4i2>"
- [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
- (unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa")
- (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
- (match_operand:SI 3 "const_0_to_15_operand" "n,n")
- (match_operand:SI 4 "const_0_to_3_operand" "n,n")]
+ [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
+ (unspec:XO [(match_operand:OO 1 "vsx_register_operand" "wa,v,?wa")
+ (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+ (match_operand:SI 3 "const_0_to_15_operand" "n,n,n")
+ (match_operand:SI 4 "const_0_to_3_operand" "n,n,n")]
MMA_PVI4I2))]
"TARGET_MMA"
"<pvi4i2> %A0,%x1,%x2,%3,%4"
[(set_attr "type" "mma")
- (set_attr "prefixed" "yes")])
+ (set_attr "prefixed" "yes")
+ (set_attr "isa" "dm,not_dm,not_dm")])
(define_insn "mma_<apvi4i2>"
- [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
- (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
- (match_operand:OO 2 "vsx_register_operand" "v,?wa")
- (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
- (match_operand:SI 4 "const_0_to_15_operand" "n,n")
- (match_operand:SI 5 "const_0_to_3_operand" "n,n")]
+ [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
+ (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0")
+ (match_operand:OO 2 "vsx_register_operand" "wa,v,?wa")
+ (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa")
+ (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
+ (match_operand:SI 5 "const_0_to_3_operand" "n,n,n")]
MMA_APVI4I2))]
"TARGET_MMA"
"<apvi4i2> %A0,%x2,%x3,%4,%5"
[(set_attr "type" "mma")
- (set_attr "prefixed" "yes")])
+ (set_attr "prefixed" "yes")
+ (set_attr "isa" "dm,not_dm,not_dm")])
(define_insn "mma_<vvi4i4i4>"
- [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
- (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
- (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
- (match_operand:SI 3 "const_0_to_15_operand" "n,n")
- (match_operand:SI 4 "const_0_to_15_operand" "n,n")
- (match_operand:SI 5 "const_0_to_15_operand" "n,n")]
+ [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
+ (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa")
+ (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+ (match_operand:SI 3 "const_0_to_15_operand" "n,n,n")
+ (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
+ (match_operand:SI 5 "const_0_to_15_operand" "n,n,n")]
MMA_VVI4I4I4))]
"TARGET_MMA"
"<vvi4i4i4> %A0,%x1,%x2,%3,%4,%5"
[(set_attr "type" "mma")
- (set_attr "prefixed" "yes")])
+ (set_attr "prefixed" "yes")
+ (set_attr "isa" "dm,not_dm,not_dm")])
(define_insn "mma_<avvi4i4i4>"
- [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
- (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
- (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
- (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
- (match_operand:SI 4 "const_0_to_15_operand" "n,n")
- (match_operand:SI 5 "const_0_to_15_operand" "n,n")
- (match_operand:SI 6 "const_0_to_15_operand" "n,n")]
+ [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
+ (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0")
+ (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+ (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa")
+ (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
+ (match_operand:SI 5 "const_0_to_15_operand" "n,n,n")
+ (match_operand:SI 6 "const_0_to_15_operand" "n,n,n")]
MMA_AVVI4I4I4))]
"TARGET_MMA"
"<avvi4i4i4> %A0,%x2,%x3,%4,%5,%6"
[(set_attr "type" "mma")
- (set_attr "prefixed" "yes")])
+ (set_attr "prefixed" "yes")
+ (set_attr "isa" "dm,not_dm,not_dm")])
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 03f43f70a62..caa908e7e2c 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -13909,8 +13909,13 @@ print_operand (FILE *file, rtx x, int code)
overlapping with the FPR registers. */
if (!REG_P (x))
output_operand_lossage ("invalid %%A value");
- else if (TARGET_DENSE_MATH && DMR_REGNO_P (REGNO (x)))
- fprintf (file, "%d", REGNO (x) - FIRST_DMR_REGNO);
+ else if (TARGET_DENSE_MATH)
+ {
+ if (DMR_REGNO_P (REGNO (x)))
+ fprintf (file, "%d", REGNO (x) - FIRST_DMR_REGNO);
+ else
+ output_operand_lossage ("%%A operand is not a DMR");
+ }
else if (!FP_REGNO_P (REGNO (x)) || (REGNO (x) % 4) != 0)
output_operand_lossage ("invalid %%A value");
else
@@ -27298,7 +27303,7 @@ rs6000_split_multireg_move (rtx dst, rtx src)
/* If we are reading an accumulator register, we have to
deprime it before we can access it. */
- if (TARGET_MMA
+ if (TARGET_MMA && !TARGET_DENSE_MATH
&& GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
emit_insn (gen_mma_xxmfacc (src, src));
@@ -27330,9 +27335,9 @@ rs6000_split_multireg_move (rtx dst, rtx src)
emit_insn (gen_rtx_SET (dst2, src2));
}
- /* If we are writing an accumulator register, we have to
- prime it after we've written it. */
- if (TARGET_MMA
+ /* If we are writing an accumulator register that overlaps with the
+ FPR registers, we have to prime it after we've written it. */
+ if (TARGET_MMA && !TARGET_DENSE_MATH
&& GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
emit_insn (gen_mma_xxmtacc (dst, dst));
@@ -27401,9 +27406,9 @@ rs6000_split_multireg_move (rtx dst, rtx src)
emit_insn (gen_rtx_SET (dst_i, op));
}
- /* We are writing an accumulator register, so we have to
- prime it after we've written it. */
- if (GET_MODE (src) == XOmode)
+ /* On systems without dense math where accumulators overlap with the
+ vector registers, we have to prime it after we've written it. */
+ if (GET_MODE (src) == XOmode && !TARGET_DENSE_MATH)
emit_insn (gen_mma_xxmtacc (dst, dst));
return;
@@ -27414,9 +27419,9 @@ rs6000_split_multireg_move (rtx dst, rtx src)
if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
{
- /* If we are reading an accumulator register, we have to
- deprime it before we can access it. */
- if (TARGET_MMA
+ /* If we are reading an accumulator register and we don't have dense
+ math, we have to deprime it before we can access it. */
+ if (TARGET_MMA && !TARGET_DENSE_MATH
&& GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
emit_insn (gen_mma_xxmfacc (src, src));
@@ -27444,7 +27449,7 @@ rs6000_split_multireg_move (rtx dst, rtx src)
/* If we are writing an accumulator register, we have to
prime it after we've written it. */
- if (TARGET_MMA
+ if (TARGET_MMA && !TARGET_DENSE_MATH
&& GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
emit_insn (gen_mma_xxmtacc (dst, dst));
}
@@ -27581,7 +27586,7 @@ rs6000_split_multireg_move (rtx dst, rtx src)
/* If we are reading an accumulator register, we have to
deprime it before we can access it. */
- if (TARGET_MMA && REG_P (src)
+ if (TARGET_MMA && !TARGET_DENSE_MATH && REG_P (src)
&& GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
emit_insn (gen_mma_xxmfacc (src, src));
@@ -27613,7 +27618,7 @@ rs6000_split_multireg_move (rtx dst, rtx src)
/* If we are writing an accumulator register, we have to
prime it after we've written it. */
- if (TARGET_MMA && REG_P (dst)
+ if (TARGET_MMA && !TARGET_DENSE_MATH && REG_P (dst)
&& GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
emit_insn (gen_mma_xxmtacc (dst, dst));
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2022-11-05 3:25 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-11-05 2:58 [gcc(refs/users/meissner/heads/dmf003)] Make MMA insns support dmr registers Michael Meissner
-- strict thread matches above, loose matches on Subject: below --
2022-11-05 3:25 Michael Meissner
2022-11-04 21:10 Michael Meissner
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).