public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/users/meissner/heads/dmf001)] Implement DMF mma_assemble_acc and mma_disassemble_acc
@ 2022-10-11 23:17 Michael Meissner
0 siblings, 0 replies; only message in thread
From: Michael Meissner @ 2022-10-11 23:17 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:8c57aac65cf34a2d6782f2234ebf8ef0b0066323
commit 8c57aac65cf34a2d6782f2234ebf8ef0b0066323
Author: Michael Meissner <meissner@linux.ibm.com>
Date: Tue Oct 11 19:17:40 2022 -0400
Implement DMF mma_assemble_acc and mma_disassemble_acc
2022-10-11 Michael Meissner <meissner@linux.ibm.com>
gcc/
* config/rs6000/mma.md (UNSPEC_MMA_ASSEMBLE_ACC_DMF): New unspec.
(movxo_dmf): Use 'mma' type, not 'vecmove'.
(mma_assemble_acc): Split handling for non-DMF and DMF usage.
(mma_assemble_acc_p10): Rename from mma_assemble_acc to handle non-DMF
case.
(mma_assemble_acc_dmf): Build the accumulator from two vector pairs.
(mma_disassemble_acc_p10): Rename from mma_disassemble_acc to handle non-DMF
case.
(mma_disassemble_acc_dmf): Implement on DMF to use dmxxexttdmr256.
Diff:
---
gcc/config/rs6000/mma.md | 81 +++++++++++++++++++++++++++++++-----------------
1 file changed, 53 insertions(+), 28 deletions(-)
diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index e198cd15350..a7ddb66c3de 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -91,6 +91,7 @@
UNSPEC_MMA_XVI8GER4SPP
UNSPEC_MMA_XXMFACC
UNSPEC_MMA_XXMTACC
+ UNSPEC_MMA_ASSEMBLE_ACC_DMF
])
(define_c_enum "unspecv"
@@ -374,7 +375,7 @@
rs6000_split_multireg_move (operands[0], operands[1]);
DONE;
}
- [(set_attr "type" "vecload,vecstore,veclogical,vecmove,vecmove,vecmove")
+ [(set_attr "type" "vecload,vecstore,veclogical,mma,mma,mma")
(set_attr "length" "*,*,16,*,*,*")
(set_attr "max_prefixed_insns" "2,2,*,*,*,*")])
@@ -445,25 +446,38 @@
})
(define_expand "mma_assemble_acc"
- [(match_operand:XO 0 "fpr_reg_operand")
+ [(match_operand:XO 0 "register_operand")
(match_operand:V16QI 1 "mma_assemble_input_operand")
(match_operand:V16QI 2 "mma_assemble_input_operand")
(match_operand:V16QI 3 "mma_assemble_input_operand")
(match_operand:V16QI 4 "mma_assemble_input_operand")]
"TARGET_MMA"
{
- rtx src = gen_rtx_UNSPEC_VOLATILE (XOmode,
- gen_rtvec (4, operands[1], operands[2],
- operands[3], operands[4]),
- UNSPECV_MMA_ASSEMBLE);
- emit_move_insn (operands[0], src);
+ rtx op0 = operands[0];
+ rtx op1 = operands[1];
+ rtx op2 = operands[2];
+ rtx op3 = operands[3];
+ rtx op4 = operands[4];
+
+ if (TARGET_DMF)
+ {
+ rtx vpair1 = gen_reg_rtx (OOmode);
+ rtx vpair2 = gen_reg_rtx (OOmode);
+ emit_insn (gen_vsx_assemble_pair (vpair1, op1, op2));
+ emit_insn (gen_vsx_assemble_pair (vpair2, op3, op4));
+ emit_insn (gen_mma_assemble_acc_dmf (op0, vpair1, vpair2));
+ }
+
+ else
+ emit_insn (gen_mma_assemble_acc_p10 (op0, op1, op2, op3, op4));
+
DONE;
})
;; We cannot update the four output registers atomically, so mark the output
-;; as an early clobber so we don't accidentally clobber the input operands. */
+;; as an early clobber so we don't accidentally clobber the input operands.
-(define_insn_and_split "*mma_assemble_acc"
+(define_insn_and_split "mma_assemble_acc_p10"
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
(unspec_volatile:XO
[(match_operand:V16QI 1 "mma_assemble_input_operand" "mwa")
@@ -471,7 +485,7 @@
(match_operand:V16QI 3 "mma_assemble_input_operand" "mwa")
(match_operand:V16QI 4 "mma_assemble_input_operand" "mwa")]
UNSPECV_MMA_ASSEMBLE))]
- "TARGET_MMA
+ "TARGET_MMA && !TARGET_DMF
&& fpr_reg_operand (operands[0], XOmode)"
"#"
"&& reload_completed"
@@ -485,28 +499,30 @@
DONE;
})
+;; On a system with DMF, we build the accumulators from two vector pairs.
+
+(define_insn "mma_assemble_acc_dmf"
+ [(set (match_operand:XO 0 "dmf_operand" "=wD")
+ (unspec:XO [(match_operand:OO 1 "vsx_register_operand" "wa")
+ (match_operand:OO 2 "vsx_register_operand" "wa")]
+ UNSPEC_MMA_ASSEMBLE_ACC_DMF))]
+ "TARGET_MMA && TARGET_DMF"
+ "dmxxinstdmr512 %0,%1,%2,0"
+ [(set_attr "type" "mma")])
+
(define_expand "mma_disassemble_acc"
- [(match_operand:V16QI 0 "mma_disassemble_output_operand")
- (match_operand:XO 1 "fpr_reg_operand")
- (match_operand 2 "const_0_to_3_operand")]
- "TARGET_MMA"
-{
- rtx src;
- int regoff = INTVAL (operands[2]);
- src = gen_rtx_UNSPEC (V16QImode,
- gen_rtvec (2, operands[1], GEN_INT (regoff)),
- UNSPEC_MMA_EXTRACT);
- emit_move_insn (operands[0], src);
- DONE;
-})
+ [(set (match_operand:V16QI 0 "register_operand")
+ (unspec:V16QI [(match_operand:XO 1 "register_operand")
+ (match_operand 2 "const_0_to_3_operand")]
+ UNSPEC_MMA_EXTRACT))]
+ "TARGET_MMA")
-(define_insn_and_split "*mma_disassemble_acc"
+(define_insn_and_split "*mma_disassemble_acc_p10"
[(set (match_operand:V16QI 0 "mma_disassemble_output_operand" "=mwa")
- (unspec:V16QI [(match_operand:XO 1 "fpr_reg_operand" "d")
- (match_operand 2 "const_0_to_3_operand")]
+ (unspec:V16QI [(match_operand:XO 1 "fpr_reg_operand" "d")
+ (match_operand 2 "const_0_to_3_operand")]
UNSPEC_MMA_EXTRACT))]
- "TARGET_MMA
- && fpr_reg_operand (operands[1], XOmode)"
+ "TARGET_MMA"
"#"
"&& reload_completed"
[(const_int 0)]
@@ -518,6 +534,15 @@
DONE;
})
+(define_insn "*mma_disassemble_acc_dmf"
+ [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
+ (unspec:V16QI [(match_operand:XO 1 "dmf_operand" "wD")
+ (match_operand 2 "const_0_to_3_operand")]
+ UNSPEC_MMA_EXTRACT))]
+ "TARGET_DMF"
+ "dmxxexttdmr256 %0,%1,2"
+ [(set_attr "type" "mma")])
+
;; MMA instructions that do not use their accumulators as an input, still must
;; not allow their vector operands to overlap the registers used by the
;; accumulator. We enforce this by marking the output as early clobber. If we
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2022-10-11 23:17 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-10-11 23:17 [gcc(refs/users/meissner/heads/dmf001)] Implement DMF mma_assemble_acc and mma_disassemble_acc Michael Meissner
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).