From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1005) id B90C53858404; Tue, 11 Oct 2022 23:17:54 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org B90C53858404 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1665530274; bh=RKhdraYGdL/jYG1GwGVammJWsPoyk5mWeRk/pe/VeBw=; h=From:To:Subject:Date:From; b=X428h+t9YplMDTIac7ocMgWin1C0zgO/LEN84gJu5/4/VIADF16SPejSKggbruEfU gQkD19CcaGkfNCim7eKm4fsqkMCv8ltVjz75PK1S/3cTggRIF4zWJ8uuHIEIvhhrXq tiM2uy8r+d7x1+cLMGQsNMEViWcHq9ohGMdUd+Cg= Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: Michael Meissner To: gcc-cvs@gcc.gnu.org Subject: [gcc(refs/users/meissner/heads/dmf001)] Implement DMF mma_assemble_acc and mma_disassemble_acc X-Act-Checkin: gcc X-Git-Author: Michael Meissner X-Git-Refname: refs/users/meissner/heads/dmf001 X-Git-Oldrev: 9af78e29a3a16ec1ab33cf31c2c5c33ce7e325a7 X-Git-Newrev: 8c57aac65cf34a2d6782f2234ebf8ef0b0066323 Message-Id: <20221011231754.B90C53858404@sourceware.org> Date: Tue, 11 Oct 2022 23:17:54 +0000 (GMT) List-Id: https://gcc.gnu.org/g:8c57aac65cf34a2d6782f2234ebf8ef0b0066323 commit 8c57aac65cf34a2d6782f2234ebf8ef0b0066323 Author: Michael Meissner Date: Tue Oct 11 19:17:40 2022 -0400 Implement DMF mma_assemble_acc and mma_disassemble_acc 2022-10-11 Michael Meissner gcc/ * config/rs6000/mma.md (UNSPEC_MMA_ASSEMBLE_ACC_DMF): New unspec. (movxo_dmf): Use 'mma' type, not 'vecmove'. (mma_assemble_acc): Split handling for non-DMF and DMF usage. (mma_assemble_acc_p10): Rename from mma_assemble_acc to handle non-DMF case. (mma_assemble_acc_dmf): Build the accumulator from two vector pairs. (mma_disassemble_acc_p10): Rename from mma_disassemble_acc to handle non-DMF case. (mma_disassemble_acc_dmf): Implement on DMF to use dmxxexttdmr256. Diff: --- gcc/config/rs6000/mma.md | 81 +++++++++++++++++++++++++++++++----------------- 1 file changed, 53 insertions(+), 28 deletions(-) diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md index e198cd15350..a7ddb66c3de 100644 --- a/gcc/config/rs6000/mma.md +++ b/gcc/config/rs6000/mma.md @@ -91,6 +91,7 @@ UNSPEC_MMA_XVI8GER4SPP UNSPEC_MMA_XXMFACC UNSPEC_MMA_XXMTACC + UNSPEC_MMA_ASSEMBLE_ACC_DMF ]) (define_c_enum "unspecv" @@ -374,7 +375,7 @@ rs6000_split_multireg_move (operands[0], operands[1]); DONE; } - [(set_attr "type" "vecload,vecstore,veclogical,vecmove,vecmove,vecmove") + [(set_attr "type" "vecload,vecstore,veclogical,mma,mma,mma") (set_attr "length" "*,*,16,*,*,*") (set_attr "max_prefixed_insns" "2,2,*,*,*,*")]) @@ -445,25 +446,38 @@ }) (define_expand "mma_assemble_acc" - [(match_operand:XO 0 "fpr_reg_operand") + [(match_operand:XO 0 "register_operand") (match_operand:V16QI 1 "mma_assemble_input_operand") (match_operand:V16QI 2 "mma_assemble_input_operand") (match_operand:V16QI 3 "mma_assemble_input_operand") (match_operand:V16QI 4 "mma_assemble_input_operand")] "TARGET_MMA" { - rtx src = gen_rtx_UNSPEC_VOLATILE (XOmode, - gen_rtvec (4, operands[1], operands[2], - operands[3], operands[4]), - UNSPECV_MMA_ASSEMBLE); - emit_move_insn (operands[0], src); + rtx op0 = operands[0]; + rtx op1 = operands[1]; + rtx op2 = operands[2]; + rtx op3 = operands[3]; + rtx op4 = operands[4]; + + if (TARGET_DMF) + { + rtx vpair1 = gen_reg_rtx (OOmode); + rtx vpair2 = gen_reg_rtx (OOmode); + emit_insn (gen_vsx_assemble_pair (vpair1, op1, op2)); + emit_insn (gen_vsx_assemble_pair (vpair2, op3, op4)); + emit_insn (gen_mma_assemble_acc_dmf (op0, vpair1, vpair2)); + } + + else + emit_insn (gen_mma_assemble_acc_p10 (op0, op1, op2, op3, op4)); + DONE; }) ;; We cannot update the four output registers atomically, so mark the output -;; as an early clobber so we don't accidentally clobber the input operands. */ +;; as an early clobber so we don't accidentally clobber the input operands. -(define_insn_and_split "*mma_assemble_acc" +(define_insn_and_split "mma_assemble_acc_p10" [(set (match_operand:XO 0 "fpr_reg_operand" "=&d") (unspec_volatile:XO [(match_operand:V16QI 1 "mma_assemble_input_operand" "mwa") @@ -471,7 +485,7 @@ (match_operand:V16QI 3 "mma_assemble_input_operand" "mwa") (match_operand:V16QI 4 "mma_assemble_input_operand" "mwa")] UNSPECV_MMA_ASSEMBLE))] - "TARGET_MMA + "TARGET_MMA && !TARGET_DMF && fpr_reg_operand (operands[0], XOmode)" "#" "&& reload_completed" @@ -485,28 +499,30 @@ DONE; }) +;; On a system with DMF, we build the accumulators from two vector pairs. + +(define_insn "mma_assemble_acc_dmf" + [(set (match_operand:XO 0 "dmf_operand" "=wD") + (unspec:XO [(match_operand:OO 1 "vsx_register_operand" "wa") + (match_operand:OO 2 "vsx_register_operand" "wa")] + UNSPEC_MMA_ASSEMBLE_ACC_DMF))] + "TARGET_MMA && TARGET_DMF" + "dmxxinstdmr512 %0,%1,%2,0" + [(set_attr "type" "mma")]) + (define_expand "mma_disassemble_acc" - [(match_operand:V16QI 0 "mma_disassemble_output_operand") - (match_operand:XO 1 "fpr_reg_operand") - (match_operand 2 "const_0_to_3_operand")] - "TARGET_MMA" -{ - rtx src; - int regoff = INTVAL (operands[2]); - src = gen_rtx_UNSPEC (V16QImode, - gen_rtvec (2, operands[1], GEN_INT (regoff)), - UNSPEC_MMA_EXTRACT); - emit_move_insn (operands[0], src); - DONE; -}) + [(set (match_operand:V16QI 0 "register_operand") + (unspec:V16QI [(match_operand:XO 1 "register_operand") + (match_operand 2 "const_0_to_3_operand")] + UNSPEC_MMA_EXTRACT))] + "TARGET_MMA") -(define_insn_and_split "*mma_disassemble_acc" +(define_insn_and_split "*mma_disassemble_acc_p10" [(set (match_operand:V16QI 0 "mma_disassemble_output_operand" "=mwa") - (unspec:V16QI [(match_operand:XO 1 "fpr_reg_operand" "d") - (match_operand 2 "const_0_to_3_operand")] + (unspec:V16QI [(match_operand:XO 1 "fpr_reg_operand" "d") + (match_operand 2 "const_0_to_3_operand")] UNSPEC_MMA_EXTRACT))] - "TARGET_MMA - && fpr_reg_operand (operands[1], XOmode)" + "TARGET_MMA" "#" "&& reload_completed" [(const_int 0)] @@ -518,6 +534,15 @@ DONE; }) +(define_insn "*mma_disassemble_acc_dmf" + [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") + (unspec:V16QI [(match_operand:XO 1 "dmf_operand" "wD") + (match_operand 2 "const_0_to_3_operand")] + UNSPEC_MMA_EXTRACT))] + "TARGET_DMF" + "dmxxexttdmr256 %0,%1,2" + [(set_attr "type" "mma")]) + ;; MMA instructions that do not use their accumulators as an input, still must ;; not allow their vector operands to overlap the registers used by the ;; accumulator. We enforce this by marking the output as early clobber. If we