public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/users/meissner/heads/dmf001)] Implement DMF mma_assemble_acc and mma_disassemble_acc
@ 2022-10-11 23:17 Michael Meissner
  0 siblings, 0 replies; only message in thread
From: Michael Meissner @ 2022-10-11 23:17 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:8c57aac65cf34a2d6782f2234ebf8ef0b0066323

commit 8c57aac65cf34a2d6782f2234ebf8ef0b0066323
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Tue Oct 11 19:17:40 2022 -0400

    Implement DMF mma_assemble_acc and mma_disassemble_acc
    
    2022-10-11   Michael Meissner  <meissner@linux.ibm.com>
    
    gcc/
    
            * config/rs6000/mma.md (UNSPEC_MMA_ASSEMBLE_ACC_DMF): New unspec.
            (movxo_dmf): Use 'mma' type, not 'vecmove'.
            (mma_assemble_acc): Split handling for non-DMF and DMF usage.
            (mma_assemble_acc_p10): Rename from mma_assemble_acc to handle non-DMF
            case.
            (mma_assemble_acc_dmf): Build the accumulator from two vector pairs.
            (mma_disassemble_acc_p10): Rename from mma_disassemble_acc to handle non-DMF
            case.
            (mma_disassemble_acc_dmf): Implement on DMF to use dmxxexttdmr256.

Diff:
---
 gcc/config/rs6000/mma.md | 81 +++++++++++++++++++++++++++++++-----------------
 1 file changed, 53 insertions(+), 28 deletions(-)

diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index e198cd15350..a7ddb66c3de 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -91,6 +91,7 @@
    UNSPEC_MMA_XVI8GER4SPP
    UNSPEC_MMA_XXMFACC
    UNSPEC_MMA_XXMTACC
+   UNSPEC_MMA_ASSEMBLE_ACC_DMF
   ])
 
 (define_c_enum "unspecv"
@@ -374,7 +375,7 @@
   rs6000_split_multireg_move (operands[0], operands[1]);
   DONE;
 }
-  [(set_attr "type" "vecload,vecstore,veclogical,vecmove,vecmove,vecmove")
+  [(set_attr "type" "vecload,vecstore,veclogical,mma,mma,mma")
    (set_attr "length" "*,*,16,*,*,*")
    (set_attr "max_prefixed_insns" "2,2,*,*,*,*")])
 
@@ -445,25 +446,38 @@
 })
 
 (define_expand "mma_assemble_acc"
-  [(match_operand:XO 0 "fpr_reg_operand")
+  [(match_operand:XO 0 "register_operand")
    (match_operand:V16QI 1 "mma_assemble_input_operand")
    (match_operand:V16QI 2 "mma_assemble_input_operand")
    (match_operand:V16QI 3 "mma_assemble_input_operand")
    (match_operand:V16QI 4 "mma_assemble_input_operand")]
   "TARGET_MMA"
 {
-  rtx src = gen_rtx_UNSPEC_VOLATILE (XOmode,
-			    	     gen_rtvec (4, operands[1], operands[2],
-				       		operands[3], operands[4]),
-			    	     UNSPECV_MMA_ASSEMBLE);
-  emit_move_insn (operands[0], src);
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+  rtx op2 = operands[2];
+  rtx op3 = operands[3];
+  rtx op4 = operands[4];
+
+  if (TARGET_DMF)
+    {
+      rtx vpair1 = gen_reg_rtx (OOmode);
+      rtx vpair2 = gen_reg_rtx (OOmode);
+      emit_insn (gen_vsx_assemble_pair (vpair1, op1, op2));
+      emit_insn (gen_vsx_assemble_pair (vpair2, op3, op4));
+      emit_insn (gen_mma_assemble_acc_dmf (op0, vpair1, vpair2));
+    }
+
+  else
+    emit_insn (gen_mma_assemble_acc_p10 (op0, op1, op2, op3, op4));
+
   DONE;
 })
 
 ;; We cannot update the four output registers atomically, so mark the output
-;; as an early clobber so we don't accidentally clobber the input operands.  */
+;; as an early clobber so we don't accidentally clobber the input operands.
 
-(define_insn_and_split "*mma_assemble_acc"
+(define_insn_and_split "mma_assemble_acc_p10"
   [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
 	(unspec_volatile:XO
 	  [(match_operand:V16QI 1 "mma_assemble_input_operand" "mwa")
@@ -471,7 +485,7 @@
 	   (match_operand:V16QI 3 "mma_assemble_input_operand" "mwa")
 	   (match_operand:V16QI 4 "mma_assemble_input_operand" "mwa")]
 	  UNSPECV_MMA_ASSEMBLE))]
-  "TARGET_MMA
+  "TARGET_MMA && !TARGET_DMF
    && fpr_reg_operand (operands[0], XOmode)"
   "#"
   "&& reload_completed"
@@ -485,28 +499,30 @@
   DONE;
 })
 
+;; On a system with DMF, we build the accumulators from two vector pairs.
+
+(define_insn "mma_assemble_acc_dmf"
+ [(set (match_operand:XO 0 "dmf_operand" "=wD")
+       (unspec:XO [(match_operand:OO 1 "vsx_register_operand" "wa")
+		   (match_operand:OO 2 "vsx_register_operand" "wa")]
+		  UNSPEC_MMA_ASSEMBLE_ACC_DMF))]
+ "TARGET_MMA && TARGET_DMF"
+ "dmxxinstdmr512 %0,%1,%2,0"
+ [(set_attr "type" "mma")])
+
 (define_expand "mma_disassemble_acc"
-  [(match_operand:V16QI 0 "mma_disassemble_output_operand")
-   (match_operand:XO 1 "fpr_reg_operand")
-   (match_operand 2 "const_0_to_3_operand")]
-  "TARGET_MMA"
-{
-  rtx src;
-  int regoff = INTVAL (operands[2]);
-  src = gen_rtx_UNSPEC (V16QImode,
-			gen_rtvec (2, operands[1], GEN_INT (regoff)),
-			UNSPEC_MMA_EXTRACT);
-  emit_move_insn (operands[0], src);
-  DONE;
-})
+  [(set (match_operand:V16QI 0 "register_operand")
+	(unspec:V16QI [(match_operand:XO 1 "register_operand")
+		       (match_operand 2 "const_0_to_3_operand")]
+		      UNSPEC_MMA_EXTRACT))]
+  "TARGET_MMA")
 
-(define_insn_and_split "*mma_disassemble_acc"
+(define_insn_and_split "*mma_disassemble_acc_p10"
   [(set (match_operand:V16QI 0 "mma_disassemble_output_operand" "=mwa")
-       (unspec:V16QI [(match_operand:XO 1 "fpr_reg_operand" "d")
-		      (match_operand 2 "const_0_to_3_operand")]
+	(unspec:V16QI [(match_operand:XO 1 "fpr_reg_operand" "d")
+		       (match_operand 2 "const_0_to_3_operand")]
 		      UNSPEC_MMA_EXTRACT))]
-  "TARGET_MMA
-   && fpr_reg_operand (operands[1], XOmode)"
+  "TARGET_MMA"
   "#"
   "&& reload_completed"
   [(const_int 0)]
@@ -518,6 +534,15 @@
   DONE;
 })
 
+(define_insn "*mma_disassemble_acc_dmf"
+  [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
+	(unspec:V16QI [(match_operand:XO 1 "dmf_operand" "wD")
+		       (match_operand 2 "const_0_to_3_operand")]
+		      UNSPEC_MMA_EXTRACT))]
+  "TARGET_DMF"
+  "dmxxexttdmr256 %0,%1,2"
+  [(set_attr "type" "mma")])
+
 ;; MMA instructions that do not use their accumulators as an input, still must
 ;; not allow their vector operands to overlap the registers used by the
 ;; accumulator.  We enforce this by marking the output as early clobber.  If we

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2022-10-11 23:17 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-10-11 23:17 [gcc(refs/users/meissner/heads/dmf001)] Implement DMF mma_assemble_acc and mma_disassemble_acc Michael Meissner

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).