From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1005) id 9A9293858C41; Tue, 5 Mar 2024 07:46:50 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 9A9293858C41 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1709624810; bh=jad5ad3fRlf7A+ja2F1B5ONaC8bhBsCRZZVphY6R310=; h=From:To:Subject:Date:From; b=Ul/XNRcvr4eKzOaIZVvZa7YQdB+ef+hSNAlNgeXQgYYz7tPcSqKv3iYC0rMqBBMLf VfOk4YWYAq8FSIcAePCI3xipHxVyaZjsZv3W3UP4HrqGZ5lGXHD0VnNUZtn5YT6Vvd 6QSqY94bA9TK2ciIAeVL+XsY4C+4m6yYXxJu/nk4= Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: Michael Meissner To: gcc-cvs@gcc.gnu.org Subject: [gcc(refs/users/meissner/heads/work161-dmf)] PowerPC: Switch to dense math names for all MMA operations. X-Act-Checkin: gcc X-Git-Author: Michael Meissner X-Git-Refname: refs/users/meissner/heads/work161-dmf X-Git-Oldrev: 6cfa27bc3c5f84443a8f7bbd213a4f7d24642dd0 X-Git-Newrev: 1c3fac525978339e5838ebac6f9d8a31ae953d04 Message-Id: <20240305074650.9A9293858C41@sourceware.org> Date: Tue, 5 Mar 2024 07:46:50 +0000 (GMT) List-Id: https://gcc.gnu.org/g:1c3fac525978339e5838ebac6f9d8a31ae953d04 commit 1c3fac525978339e5838ebac6f9d8a31ae953d04 Author: Michael Meissner Date: Tue Mar 5 02:44:36 2024 -0500 PowerPC: Switch to dense math names for all MMA operations. This patch changes the assembler instruction names for MMA instructions from the original name used in power10 to the new name when used with the dense math system. I.e. xvf64gerpp becomes dmxvf64gerpp. The assembler will emit the same bits for either spelling. The patches have been tested on both little and big endian systems. Can I check it into the master branch? For the non-prefixed MMA instructions, we add a 'dm' prefix in front of the instruction. However, the prefixed instructions have a 'pm' prefix, and we add the 'dm' prefix afterwards. To prevent having two sets of parallel int attributes, we remove the "pm" prefix from the instruction string in the attributes, and add it later, both in the insn name and in the output template. For example, previously we had (define_int_attr vvi4i4i8 [(UNSPEC_MMA_PMXVI4GER8 "pmxvi4ger8")]) ;; ... (define_insn "mma_" [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d") (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa") (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa") (match_operand:SI 3 "const_0_to_15_operand" "n,n,n") (match_operand:SI 4 "const_0_to_15_operand" "n,n,n") (match_operand:SI 5 "u8bit_cint_operand" "n,n,n")] MMA_VVI4I4I8))] "TARGET_MMA" " %A0,%x1,%x2,%3,%4,%5" [(set_attr "type" "mma") (set_attr "prefixed" "yes") (set_attr "isa" "dm,not_dm,not_dm")]) And now we have: (define_int_attr vvi4i4i8 [(UNSPEC_MMA_PMXVI4GER8 "xvi4ger8")]) ;; ... (define_insn "mma_pm" [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d") (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa") (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa") (match_operand:SI 3 "const_0_to_15_operand" "n,n,n") (match_operand:SI 4 "const_0_to_15_operand" "n,n,n") (match_operand:SI 5 "u8bit_cint_operand" "n,n,n")] MMA_VVI4I4I8))] "TARGET_MMA" "@ pmdm %A0,%x1,%x2,%3,%4,%5 pm %A0,%x1,%x2,%3,%4,%5 pm %A0,%x1,%x2,%3,%4,%5" [(set_attr "type" "mma") (set_attr "prefixed" "yes") (set_attr "isa" "dm,not_dm,not_dm")]) 2024-03-05 Michael Meissner gcc/ * config/rs6000/mma.md (vvi4i4i8): Change the instruction to not have a "pm" prefix. (avvi4i4i8): Likewise. (vvi4i4i2): Likewise. (avvi4i4i2): Likewise. (vvi4i4): Likewise. (avvi4i4): Likewise. (pvi4i2): Likewise. (apvi4i2): Likewise. (vvi4i4i4): Likewise. (avvi4i4i4): Likewise. (mma_xxsetaccz): Add support for running on DMF systems, generating the dense math instruction and using the dense math accumulators. (mma_): Likewise. (mma_): Likewise. (mma_): Likewise. (mma_): Likewise. (mma_pm): Add support for running on DMF systems, generating the dense math instruction and using the dense math accumulators. Rename the insn with a 'pm' prefix and add either 'pm' or 'pmdm' prefixes based on whether we have the original MMA specification or if we have dense math support. (mma_pm): Likewise. (mma_pm): Likewise. (mma_pm): Likewise. (mma_pm): Likewise. (mma_pm): Likewise. (mma_pm): Likewise. (mma_pm): Likewise. gcc/testsuite/ * gcc.target/powerpc/dm-double-test.c: New test. * lib/target-supports.exp (check_effective_target_ppc_dmr_ok): New target test. Diff: --- gcc/config/rs6000/mma.md | 368 +++++++++++++--------- gcc/testsuite/gcc.target/powerpc/dm-double-test.c | 194 ++++++++++++ gcc/testsuite/lib/target-supports.exp | 25 +- 3 files changed, 433 insertions(+), 154 deletions(-) diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md index 8799f4137fa..1a93c60418f 100644 --- a/gcc/config/rs6000/mma.md +++ b/gcc/config/rs6000/mma.md @@ -224,44 +224,48 @@ (UNSPEC_MMA_XVF64GERNP "xvf64gernp") (UNSPEC_MMA_XVF64GERNN "xvf64gernn")]) -(define_int_attr vvi4i4i8 [(UNSPEC_MMA_PMXVI4GER8 "pmxvi4ger8")]) +;; Do not include the "pm" prefix in these instructions. If we have MMA but we +;; don't have dense math register support we want to issue the instruction with +;; a "pm" prefix, but if we have dense math registers, we want to issue it with +;; a "pmdm" prefix. I.e. pmxvi4ger8 vs. pmdmxvi4ger8 +(define_int_attr vvi4i4i8 [(UNSPEC_MMA_PMXVI4GER8 "xvi4ger8")]) -(define_int_attr avvi4i4i8 [(UNSPEC_MMA_PMXVI4GER8PP "pmxvi4ger8pp")]) +(define_int_attr avvi4i4i8 [(UNSPEC_MMA_PMXVI4GER8PP "xvi4ger8pp")]) -(define_int_attr vvi4i4i2 [(UNSPEC_MMA_PMXVI16GER2 "pmxvi16ger2") - (UNSPEC_MMA_PMXVI16GER2S "pmxvi16ger2s") - (UNSPEC_MMA_PMXVF16GER2 "pmxvf16ger2") - (UNSPEC_MMA_PMXVBF16GER2 "pmxvbf16ger2")]) +(define_int_attr vvi4i4i2 [(UNSPEC_MMA_PMXVI16GER2 "xvi16ger2") + (UNSPEC_MMA_PMXVI16GER2S "xvi16ger2s") + (UNSPEC_MMA_PMXVF16GER2 "xvf16ger2") + (UNSPEC_MMA_PMXVBF16GER2 "xvbf16ger2")]) -(define_int_attr avvi4i4i2 [(UNSPEC_MMA_PMXVI16GER2PP "pmxvi16ger2pp") - (UNSPEC_MMA_PMXVI16GER2SPP "pmxvi16ger2spp") - (UNSPEC_MMA_PMXVF16GER2PP "pmxvf16ger2pp") - (UNSPEC_MMA_PMXVF16GER2PN "pmxvf16ger2pn") - (UNSPEC_MMA_PMXVF16GER2NP "pmxvf16ger2np") - (UNSPEC_MMA_PMXVF16GER2NN "pmxvf16ger2nn") - (UNSPEC_MMA_PMXVBF16GER2PP "pmxvbf16ger2pp") - (UNSPEC_MMA_PMXVBF16GER2PN "pmxvbf16ger2pn") - (UNSPEC_MMA_PMXVBF16GER2NP "pmxvbf16ger2np") - (UNSPEC_MMA_PMXVBF16GER2NN "pmxvbf16ger2nn")]) +(define_int_attr avvi4i4i2 [(UNSPEC_MMA_PMXVI16GER2PP "xvi16ger2pp") + (UNSPEC_MMA_PMXVI16GER2SPP "xvi16ger2spp") + (UNSPEC_MMA_PMXVF16GER2PP "xvf16ger2pp") + (UNSPEC_MMA_PMXVF16GER2PN "xvf16ger2pn") + (UNSPEC_MMA_PMXVF16GER2NP "xvf16ger2np") + (UNSPEC_MMA_PMXVF16GER2NN "xvf16ger2nn") + (UNSPEC_MMA_PMXVBF16GER2PP "xvbf16ger2pp") + (UNSPEC_MMA_PMXVBF16GER2PN "xvbf16ger2pn") + (UNSPEC_MMA_PMXVBF16GER2NP "xvbf16ger2np") + (UNSPEC_MMA_PMXVBF16GER2NN "xvbf16ger2nn")]) -(define_int_attr vvi4i4 [(UNSPEC_MMA_PMXVF32GER "pmxvf32ger")]) +(define_int_attr vvi4i4 [(UNSPEC_MMA_PMXVF32GER "xvf32ger")]) -(define_int_attr avvi4i4 [(UNSPEC_MMA_PMXVF32GERPP "pmxvf32gerpp") - (UNSPEC_MMA_PMXVF32GERPN "pmxvf32gerpn") - (UNSPEC_MMA_PMXVF32GERNP "pmxvf32gernp") - (UNSPEC_MMA_PMXVF32GERNN "pmxvf32gernn")]) +(define_int_attr avvi4i4 [(UNSPEC_MMA_PMXVF32GERPP "xvf32gerpp") + (UNSPEC_MMA_PMXVF32GERPN "xvf32gerpn") + (UNSPEC_MMA_PMXVF32GERNP "xvf32gernp") + (UNSPEC_MMA_PMXVF32GERNN "xvf32gernn")]) -(define_int_attr pvi4i2 [(UNSPEC_MMA_PMXVF64GER "pmxvf64ger")]) +(define_int_attr pvi4i2 [(UNSPEC_MMA_PMXVF64GER "xvf64ger")]) -(define_int_attr apvi4i2 [(UNSPEC_MMA_PMXVF64GERPP "pmxvf64gerpp") - (UNSPEC_MMA_PMXVF64GERPN "pmxvf64gerpn") - (UNSPEC_MMA_PMXVF64GERNP "pmxvf64gernp") - (UNSPEC_MMA_PMXVF64GERNN "pmxvf64gernn")]) +(define_int_attr apvi4i2 [(UNSPEC_MMA_PMXVF64GERPP "xvf64gerpp") + (UNSPEC_MMA_PMXVF64GERPN "xvf64gerpn") + (UNSPEC_MMA_PMXVF64GERNP "xvf64gernp") + (UNSPEC_MMA_PMXVF64GERNN "xvf64gernn")]) -(define_int_attr vvi4i4i4 [(UNSPEC_MMA_PMXVI8GER4 "pmxvi8ger4")]) +(define_int_attr vvi4i4i4 [(UNSPEC_MMA_PMXVI8GER4 "xvi8ger4")]) -(define_int_attr avvi4i4i4 [(UNSPEC_MMA_PMXVI8GER4PP "pmxvi8ger4pp") - (UNSPEC_MMA_PMXVI8GER4SPP "pmxvi8ger4spp")]) +(define_int_attr avvi4i4i4 [(UNSPEC_MMA_PMXVI8GER4PP "xvi8ger4pp") + (UNSPEC_MMA_PMXVI8GER4SPP "xvi8ger4spp")]) ;; Vector pair support. OOmode can only live in VSRs. @@ -542,178 +546,236 @@ ;; UNSPEC_VOLATILE for the non-dense math case. (define_insn "mma_xxsetaccz" - [(set (match_operand:XO 0 "accumulator_operand" "=wD") + [(set (match_operand:XO 0 "accumulator_operand" "=wD,wD") (unspec_volatile:XO [(const_int 0)] UNSPECV_MMA_XXSETACCZ))] "TARGET_MMA" - "xxsetaccz %A0" - [(set_attr "type" "mma")]) + "@ + dmsetdmrz %A0 + xxsetaccz %A0" + [(set_attr "type" "mma") + (set_attr "isa" "dm,not_dm")]) (define_insn "mma_" - [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") - (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa") - (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")] + [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD") + (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa") + (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")] MMA_VV))] "TARGET_MMA" - " %A0,%x1,%x2" - [(set_attr "type" "mma")]) + "@ + dm %A0,%x1,%x2 + %A0,%x1,%x2 + %A0,%x1,%x2" + [(set_attr "type" "mma") + (set_attr "isa" "dm,not_dm,not_dm")]) (define_insn "mma_" - [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") - (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0") - (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") - (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")] + [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD") + (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0") + (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa") + (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa")] MMA_AVV))] "TARGET_MMA" - " %A0,%x2,%x3" - [(set_attr "type" "mma")]) + "@ + dm %A0,%x2,%x3 + %A0,%x2,%x3 + %A0,%x2,%x3" + [(set_attr "type" "mma") + (set_attr "isa" "dm,not_dm,not_dm")]) (define_insn "mma_" - [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") - (unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa") - (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")] + [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD") + (unspec:XO [(match_operand:OO 1 "vsx_register_operand" "wa,v,?wa") + (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")] MMA_PV))] "TARGET_MMA" - " %A0,%x1,%x2" - [(set_attr "type" "mma")]) + "@ + dm %A0,%x1,%x2 + %A0,%x1,%x2 + %A0,%x1,%x2" + [(set_attr "type" "mma") + (set_attr "isa" "dm,not_dm,not_dm")]) (define_insn "mma_" - [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") - (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0") - (match_operand:OO 2 "vsx_register_operand" "v,?wa") - (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")] + [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD") + (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0") + (match_operand:OO 2 "vsx_register_operand" "wa,v,?wa") + (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa")] MMA_APV))] "TARGET_MMA" - " %A0,%x2,%x3" - [(set_attr "type" "mma")]) - -(define_insn "mma_" - [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") - (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa") - (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") - (match_operand:SI 3 "const_0_to_15_operand" "n,n") - (match_operand:SI 4 "const_0_to_15_operand" "n,n") - (match_operand:SI 5 "u8bit_cint_operand" "n,n")] + "@ + dm %A0,%x2,%x3 + %A0,%x2,%x3 + %A0,%x2,%x3" + [(set_attr "type" "mma") + (set_attr "isa" "dm,not_dm,not_dm")]) + +(define_insn "mma_pm" + [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD") + (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa") + (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa") + (match_operand:SI 3 "const_0_to_15_operand" "n,n,n") + (match_operand:SI 4 "const_0_to_15_operand" "n,n,n") + (match_operand:SI 5 "u8bit_cint_operand" "n,n,n")] MMA_VVI4I4I8))] "TARGET_MMA" - " %A0,%x1,%x2,%3,%4,%5" + "@ + pmdm %A0,%x1,%x2,%3,%4,%5 + pm %A0,%x1,%x2,%3,%4,%5 + pm %A0,%x1,%x2,%3,%4,%5" [(set_attr "type" "mma") - (set_attr "prefixed" "yes")]) - -(define_insn "mma_" - [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") - (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0") - (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") - (match_operand:V16QI 3 "vsx_register_operand" "v,?wa") - (match_operand:SI 4 "const_0_to_15_operand" "n,n") - (match_operand:SI 5 "const_0_to_15_operand" "n,n") - (match_operand:SI 6 "u8bit_cint_operand" "n,n")] + (set_attr "prefixed" "yes") + (set_attr "isa" "dm,not_dm,not_dm")]) + +(define_insn "mma_pm" + [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD") + (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0") + (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa") + (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa") + (match_operand:SI 4 "const_0_to_15_operand" "n,n,n") + (match_operand:SI 5 "const_0_to_15_operand" "n,n,n") + (match_operand:SI 6 "u8bit_cint_operand" "n,n,n")] MMA_AVVI4I4I8))] "TARGET_MMA" - " %A0,%x2,%x3,%4,%5,%6" + "@ + pmdm %A0,%x2,%x3,%4,%5,%6 + pm %A0,%x2,%x3,%4,%5,%6 + pm %A0,%x2,%x3,%4,%5,%6" [(set_attr "type" "mma") - (set_attr "prefixed" "yes")]) - -(define_insn "mma_" - [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") - (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa") - (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") - (match_operand:SI 3 "const_0_to_15_operand" "n,n") - (match_operand:SI 4 "const_0_to_15_operand" "n,n") - (match_operand:SI 5 "const_0_to_3_operand" "n,n")] + (set_attr "prefixed" "yes") + (set_attr "isa" "dm,not_dm,not_dm")]) + +(define_insn "mma_pm" + [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD") + (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa") + (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa") + (match_operand:SI 3 "const_0_to_15_operand" "n,n,n") + (match_operand:SI 4 "const_0_to_15_operand" "n,n,n") + (match_operand:SI 5 "const_0_to_3_operand" "n,n,n")] MMA_VVI4I4I2))] "TARGET_MMA" - " %A0,%x1,%x2,%3,%4,%5" + "@ + pmdm %A0,%x1,%x2,%3,%4,%5 + pm %A0,%x1,%x2,%3,%4,%5 + pm %A0,%x1,%x2,%3,%4,%5" [(set_attr "type" "mma") (set_attr "prefixed" "yes")]) -(define_insn "mma_" - [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") - (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0") - (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") - (match_operand:V16QI 3 "vsx_register_operand" "v,?wa") - (match_operand:SI 4 "const_0_to_15_operand" "n,n") - (match_operand:SI 5 "const_0_to_15_operand" "n,n") - (match_operand:SI 6 "const_0_to_3_operand" "n,n")] +(define_insn "mma_pm" + [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD") + (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0") + (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa") + (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa") + (match_operand:SI 4 "const_0_to_15_operand" "n,n,n") + (match_operand:SI 5 "const_0_to_15_operand" "n,n,n") + (match_operand:SI 6 "const_0_to_3_operand" "n,n,n")] MMA_AVVI4I4I2))] "TARGET_MMA" - " %A0,%x2,%x3,%4,%5,%6" + " + pmdm %A0,%x2,%x3,%4,%5,%6 + pm %A0,%x2,%x3,%4,%5,%6 + pm %A0,%x2,%x3,%4,%5,%6" [(set_attr "type" "mma") - (set_attr "prefixed" "yes")]) - -(define_insn "mma_" - [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") - (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa") - (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") - (match_operand:SI 3 "const_0_to_15_operand" "n,n") - (match_operand:SI 4 "const_0_to_15_operand" "n,n")] + (set_attr "prefixed" "yes") + (set_attr "isa" "dm,not_dm,not_dm")]) + +(define_insn "mma_pm" + [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD") + (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa") + (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa") + (match_operand:SI 3 "const_0_to_15_operand" "n,n,n") + (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")] MMA_VVI4I4))] "TARGET_MMA" - " %A0,%x1,%x2,%3,%4" + "@ + pmdm %A0,%x1,%x2,%3,%4 + pm %A0,%x1,%x2,%3,%4 + pm %A0,%x1,%x2,%3,%4" [(set_attr "type" "mma") - (set_attr "prefixed" "yes")]) - -(define_insn "mma_" - [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") - (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0") - (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") - (match_operand:V16QI 3 "vsx_register_operand" "v,?wa") - (match_operand:SI 4 "const_0_to_15_operand" "n,n") - (match_operand:SI 5 "const_0_to_15_operand" "n,n")] + (set_attr "prefixed" "yes") + (set_attr "isa" "dm,not_dm,not_dm")]) + +(define_insn "mma_pm" + [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD") + (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0") + (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa") + (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa") + (match_operand:SI 4 "const_0_to_15_operand" "n,n,n") + (match_operand:SI 5 "const_0_to_15_operand" "n,n,n")] MMA_AVVI4I4))] "TARGET_MMA" - " %A0,%x2,%x3,%4,%5" + "@ + pmdm %A0,%x2,%x3,%4,%5 + pm %A0,%x2,%x3,%4,%5 + pm %A0,%x2,%x3,%4,%5" [(set_attr "type" "mma") - (set_attr "prefixed" "yes")]) - -(define_insn "mma_" - [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") - (unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa") - (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") - (match_operand:SI 3 "const_0_to_15_operand" "n,n") - (match_operand:SI 4 "const_0_to_3_operand" "n,n")] + (set_attr "prefixed" "yes") + (set_attr "isa" "dm,not_dm,not_dm")]) + +(define_insn "mma_pm" + [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD") + (unspec:XO [(match_operand:OO 1 "vsx_register_operand" "wa,v,?wa") + (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa") + (match_operand:SI 3 "const_0_to_15_operand" "n,n,n") + (match_operand:SI 4 "const_0_to_3_operand" "n,n,n")] MMA_PVI4I2))] "TARGET_MMA" - " %A0,%x1,%x2,%3,%4" + "@ + pmdm %A0,%x1,%x2,%3,%4 + pm %A0,%x1,%x2,%3,%4 + pm %A0,%x1,%x2,%3,%4" [(set_attr "type" "mma") - (set_attr "prefixed" "yes")]) - -(define_insn "mma_" - [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") - (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0") - (match_operand:OO 2 "vsx_register_operand" "v,?wa") - (match_operand:V16QI 3 "vsx_register_operand" "v,?wa") - (match_operand:SI 4 "const_0_to_15_operand" "n,n") - (match_operand:SI 5 "const_0_to_3_operand" "n,n")] + (set_attr "prefixed" "yes") + (set_attr "isa" "dm,not_dm,not_dm")]) + +(define_insn "mma_pm" + [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD") + (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0") + (match_operand:OO 2 "vsx_register_operand" "wa,v,?wa") + (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa") + (match_operand:SI 4 "const_0_to_15_operand" "n,n,n") + (match_operand:SI 5 "const_0_to_3_operand" "n,n,n")] MMA_APVI4I2))] "TARGET_MMA" - " %A0,%x2,%x3,%4,%5" + "@ + pmdm %A0,%x2,%x3,%4,%5 + pm %A0,%x2,%x3,%4,%5 + pm %A0,%x2,%x3,%4,%5" [(set_attr "type" "mma") - (set_attr "prefixed" "yes")]) - -(define_insn "mma_" - [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") - (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa") - (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") - (match_operand:SI 3 "const_0_to_15_operand" "n,n") - (match_operand:SI 4 "const_0_to_15_operand" "n,n") - (match_operand:SI 5 "const_0_to_15_operand" "n,n")] + (set_attr "prefixed" "yes") + (set_attr "isa" "dm,not_dm,not_dm")]) + +(define_insn "mma_pm" + [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD") + (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa") + (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa") + (match_operand:SI 3 "const_0_to_15_operand" "n,n,n") + (match_operand:SI 4 "const_0_to_15_operand" "n,n,n") + (match_operand:SI 5 "const_0_to_15_operand" "n,n,n")] MMA_VVI4I4I4))] "TARGET_MMA" - " %A0,%x1,%x2,%3,%4,%5" + "@ + pmdm %A0,%x1,%x2,%3,%4,%5 + pm %A0,%x1,%x2,%3,%4,%5 + pm %A0,%x1,%x2,%3,%4,%5" [(set_attr "type" "mma") - (set_attr "prefixed" "yes")]) - -(define_insn "mma_" - [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") - (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0") - (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") - (match_operand:V16QI 3 "vsx_register_operand" "v,?wa") - (match_operand:SI 4 "const_0_to_15_operand" "n,n") - (match_operand:SI 5 "const_0_to_15_operand" "n,n") - (match_operand:SI 6 "const_0_to_15_operand" "n,n")] + (set_attr "prefixed" "yes") + (set_attr "isa" "dm,not_dm,not_dm")]) + +(define_insn "mma_pm" + [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD") + (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0") + (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa") + (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa") + (match_operand:SI 4 "const_0_to_15_operand" "n,n,n") + (match_operand:SI 5 "const_0_to_15_operand" "n,n,n") + (match_operand:SI 6 "const_0_to_15_operand" "n,n,n")] MMA_AVVI4I4I4))] "TARGET_MMA" - " %A0,%x2,%x3,%4,%5,%6" + "@ + pmdm %A0,%x2,%x3,%4,%5,%6 + pm %A0,%x2,%x3,%4,%5,%6 + pm %A0,%x2,%x3,%4,%5,%6" [(set_attr "type" "mma") - (set_attr "prefixed" "yes")]) + (set_attr "prefixed" "yes") + (set_attr "isa" "dm,not_dm,not_dm")]) diff --git a/gcc/testsuite/gcc.target/powerpc/dm-double-test.c b/gcc/testsuite/gcc.target/powerpc/dm-double-test.c new file mode 100644 index 00000000000..66c19779585 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/dm-double-test.c @@ -0,0 +1,194 @@ +/* Test derived from mma-double-1.c, modified for dense math. */ +/* { dg-do compile } */ +/* { dg-require-effective-target powerpc_dense_math_ok } */ +/* { dg-options "-mdejagnu-cpu=future -O2" } */ + +#include +#include +#include + +typedef unsigned char vec_t __attribute__ ((vector_size (16))); +typedef double v4sf_t __attribute__ ((vector_size (16))); +#define SAVE_ACC(ACC, ldc, J) \ + __builtin_mma_disassemble_acc (result, ACC); \ + rowC = (v4sf_t *) &CO[0*ldc+J]; \ + rowC[0] += result[0]; \ + rowC = (v4sf_t *) &CO[1*ldc+J]; \ + rowC[0] += result[1]; \ + rowC = (v4sf_t *) &CO[2*ldc+J]; \ + rowC[0] += result[2]; \ + rowC = (v4sf_t *) &CO[3*ldc+J]; \ + rowC[0] += result[3]; + +void +DM (int m, int n, int k, double *A, double *B, double *C) +{ + __vector_quad acc0, acc1, acc2, acc3, acc4, acc5, acc6, acc7; + v4sf_t result[4]; + v4sf_t *rowC; + for (int l = 0; l < n; l += 4) + { + double *CO; + double *AO; + AO = A; + CO = C; + C += m * 4; + for (int j = 0; j < m; j += 16) + { + double *BO = B; + __builtin_mma_xxsetaccz (&acc0); + __builtin_mma_xxsetaccz (&acc1); + __builtin_mma_xxsetaccz (&acc2); + __builtin_mma_xxsetaccz (&acc3); + __builtin_mma_xxsetaccz (&acc4); + __builtin_mma_xxsetaccz (&acc5); + __builtin_mma_xxsetaccz (&acc6); + __builtin_mma_xxsetaccz (&acc7); + unsigned long i; + + for (i = 0; i < k; i++) + { + vec_t *rowA = (vec_t *) & AO[i * 16]; + __vector_pair rowB; + vec_t *rb = (vec_t *) & BO[i * 4]; + __builtin_mma_assemble_pair (&rowB, rb[1], rb[0]); + __builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]); + __builtin_mma_xvf64gerpp (&acc1, rowB, rowA[1]); + __builtin_mma_xvf64gerpp (&acc2, rowB, rowA[2]); + __builtin_mma_xvf64gerpp (&acc3, rowB, rowA[3]); + __builtin_mma_xvf64gerpp (&acc4, rowB, rowA[4]); + __builtin_mma_xvf64gerpp (&acc5, rowB, rowA[5]); + __builtin_mma_xvf64gerpp (&acc6, rowB, rowA[6]); + __builtin_mma_xvf64gerpp (&acc7, rowB, rowA[7]); + } + SAVE_ACC (&acc0, m, 0); + SAVE_ACC (&acc2, m, 4); + SAVE_ACC (&acc1, m, 2); + SAVE_ACC (&acc3, m, 6); + SAVE_ACC (&acc4, m, 8); + SAVE_ACC (&acc6, m, 12); + SAVE_ACC (&acc5, m, 10); + SAVE_ACC (&acc7, m, 14); + AO += k * 16; + BO += k * 4; + CO += 16; + } + B += k * 4; + } +} + +void +init (double *matrix, int row, int column) +{ + for (int j = 0; j < column; j++) + { + for (int i = 0; i < row; i++) + { + matrix[j * row + i] = (i * 16 + 2 + j) / 0.123; + } + } +} + +void +init0 (double *matrix, double *matrix1, int row, int column) +{ + for (int j = 0; j < column; j++) + for (int i = 0; i < row; i++) + matrix[j * row + i] = matrix1[j * row + i] = 0; +} + + +void +print (const char *name, const double *matrix, int row, int column) +{ + printf ("Matrix %s has %d rows and %d columns:\n", name, row, column); + for (int i = 0; i < row; i++) + { + for (int j = 0; j < column; j++) + { + printf ("%f ", matrix[j * row + i]); + } + printf ("\n"); + } + printf ("\n"); +} + +int +main (int argc, char *argv[]) +{ + int rowsA, colsB, common; + int i, j, k; + int ret = 0; + + for (int t = 16; t <= 128; t += 16) + { + for (int t1 = 4; t1 <= 16; t1 += 4) + { + rowsA = t; + colsB = t1; + common = 1; + /* printf ("Running test for rows = %d,cols = %d\n", t, t1); */ + double A[rowsA * common]; + double B[common * colsB]; + double C[rowsA * colsB]; + double D[rowsA * colsB]; + + + init (A, rowsA, common); + init (B, common, colsB); + init0 (C, D, rowsA, colsB); + DM (rowsA, colsB, common, A, B, C); + + for (i = 0; i < colsB; i++) + { + for (j = 0; j < rowsA; j++) + { + D[i * rowsA + j] = 0; + for (k = 0; k < common; k++) + { + D[i * rowsA + j] += + A[k * rowsA + j] * B[k + common * i]; + } + } + } + for (i = 0; i < colsB; i++) + { + for (j = 0; j < rowsA; j++) + { + for (k = 0; k < common; k++) + { + if (D[i * rowsA + j] != C[i * rowsA + j]) + { + printf ("Error %d,%d,%d\n",i,j,k); + ret++; + } + } + } + } + if (ret) + { + print ("A", A, rowsA, common); + print ("B", B, common, colsB); + print ("C", C, rowsA, colsB); + print ("D", D, rowsA, colsB); + } + } + } + +#ifdef VERBOSE + if (ret) + printf ("DM double test fail: %d errors\n",ret); + else + printf ("DM double test success: 0 DM errors\n"); +#else + if (ret) + abort(); +#endif + + return ret; +} + +/* { dg-final { scan-assembler {\mdmsetdmrz\M} } } */ +/* { dg-final { scan-assembler {\mdmxvf64gerpp\M} } } */ +/* { dg-final { scan-assembler {\mdmxxextfdmr512\M} } } */ + diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index e23d3ec8b3c..a46447efbee 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -7117,7 +7117,30 @@ proc check_effective_target_power11_ok { } { } } "-mcpu=power11"] } else { - return 0 + return 0; + } +} + +# Return 1 if this is a PowerPC target supporting -mcpu=future which enables +# the dense math operations. +proc check_effective_target_powerpc_dense_math_ok { } { + if { ([istarget powerpc*-*-*]) } { + return [check_no_compiler_messages powerpc_dense_math_ok object { + __vector_quad vq; + int main (void) { + #ifndef __PPC_DMR__ + #error "target does not have dense math support." + #else + /* Make sure we have dense math support. */ + __vector_quad dmr; + __asm__ ("dmsetaccz %A0" : "=wD" (dmr)); + vq = dmr; + #endif + return 0; + } + } "-mcpu=future"] + } else { + return 0; } }