public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/users/meissner/heads/work161-dmf)] PowerPC: Switch to dense math names for all MMA operations.
@ 2024-02-29 19:28 Michael Meissner
  0 siblings, 0 replies; 10+ messages in thread
From: Michael Meissner @ 2024-02-29 19:28 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:c0ecbb5f9b62e65a763e24ab9c7df9289b45922f

commit c0ecbb5f9b62e65a763e24ab9c7df9289b45922f
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Thu Feb 29 14:25:58 2024 -0500

    PowerPC: Switch to dense math names for all MMA operations.
    
    This patch changes the assembler instruction names for MMA instructions from
    the original name used in power10 to the new name when used with the dense math
    system.  I.e. xvf64gerpp becomes dmxvf64gerpp.  The assembler will emit the
    same bits for either spelling.
    
    The patches have been tested on both little and big endian systems.  Can I check
    it into the master branch?
    
    For the non-prefixed MMA instructions, we add a 'dm' prefix in front of the
    instruction.  However, the prefixed instructions have a 'pm' prefix, and we add
    the 'dm' prefix afterwards.  To prevent having two sets of parallel int
    attributes, we remove the "pm" prefix from the instruction string in the
    attributes, and add it later, both in the insn name and in the output template.
    
    For example, previously we had
    
      (define_int_attr vvi4i4i8 [(UNSPEC_MMA_PMXVI4GER8 "pmxvi4ger8")])
    
      ;; ...
    
      (define_insn "mma_<vvi4i4i8>"
        [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
              (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa")
                          (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
                          (match_operand:SI 3 "const_0_to_15_operand" "n,n,n")
                          (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
                          (match_operand:SI 5 "u8bit_cint_operand" "n,n,n")]
                          MMA_VVI4I4I8))]
        "TARGET_MMA"
        "<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5"
        [(set_attr "type" "mma")
         (set_attr "prefixed" "yes")
         (set_attr "isa" "dm,not_dm,not_dm")])
    
    And now we have:
    
      (define_int_attr vvi4i4i8 [(UNSPEC_MMA_PMXVI4GER8 "xvi4ger8")])
    
      ;; ...
    
      (define_insn "mma_pm<vvi4i4i8>"
        [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
              (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa")
                          (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
                          (match_operand:SI 3 "const_0_to_15_operand" "n,n,n")
                          (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
                          (match_operand:SI 5 "u8bit_cint_operand" "n,n,n")]
                          MMA_VVI4I4I8))]
        "TARGET_MMA"
        "@
         pmdm<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5
         pm<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5
         pm<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5"
        [(set_attr "type" "mma")
         (set_attr "prefixed" "yes")
         (set_attr "isa" "dm,not_dm,not_dm")])
    
    2024-02-29   Michael Meissner  <meissner@linux.ibm.com>
    
    gcc/
    
            * config/rs6000/mma.md (vvi4i4i8): Change the instruction to not have a
            "pm" prefix.
            (avvi4i4i8): Likewise.
            (vvi4i4i2): Likewise.
            (avvi4i4i2): Likewise.
            (vvi4i4): Likewise.
            (avvi4i4): Likewise.
            (pvi4i2): Likewise.
            (apvi4i2): Likewise.
            (vvi4i4i4): Likewise.
            (avvi4i4i4): Likewise.
            (mma_<vv>): Add support for running on DMF systems, generating the dense
            math instruction and using the dense math accumulators.
            (mma_<pv>): Likewise.
            (mma_<avv>): Likewise.
            (mma_<apv>): Likewise.
            (mma_pm<vvi4i4i8>): Add support for running on DMF systems, generating
            the dense math instruction and using the dense math accumulators.
            Rename the insn with a 'pm' prefix and add either 'pm' or 'pmdm'
            prefixes based on whether we have the original MMA specification or if
            we have dense math support.
            (mma_pm<avvi4i4i8>): Likewise.
            (mma_pm<vvi4i4i2>): Likewise.
            (mma_pm<avvi4i4i2>): Likewise.
            (mma_pm<vvi4i4>): Likewise.
            (mma_pm<avvi4i4): Likewise.
            (mma_pm<pvi4i2>): Likewise.
            (mma_pm<apvi4i2): Likewise.
            (mma_pm<vvi4i4i4>): Likewise.
            (mma_pm<avvi4i4i4>): Likewise.
    
    gcc/testsuite/
    
            * gcc.target/powerpc/dm-double-test.c: New test.
            * lib/target-supports.exp (check_effective_target_ppc_dmr_ok): New
            target test.

Diff:
---
 gcc/config/rs6000/mma.md                          | 147 ++++++++++------
 gcc/testsuite/gcc.target/powerpc/dm-double-test.c | 194 ++++++++++++++++++++++
 gcc/testsuite/lib/target-supports.exp             |  19 +++
 3 files changed, 308 insertions(+), 52 deletions(-)

diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index ac26de93143..df329b00f15 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -225,44 +225,48 @@
 				 (UNSPEC_MMA_XVF64GERNP		"xvf64gernp")
 				 (UNSPEC_MMA_XVF64GERNN		"xvf64gernn")])
 
-(define_int_attr vvi4i4i8	[(UNSPEC_MMA_PMXVI4GER8		"pmxvi4ger8")])
+;; Do not include the "pm" prefix in these instructions.  If we have MMA but we
+;; don't have dense math register support we want to issue the instruction with
+;; a "pm" prefix, but if we have dense math registers, we want to issue it with
+;; a "pmdm" prefix.  I.e. pmxvi4ger8 vs. pmdmxvi4ger8
+(define_int_attr vvi4i4i8	[(UNSPEC_MMA_PMXVI4GER8		"xvi4ger8")])
 
-(define_int_attr avvi4i4i8	[(UNSPEC_MMA_PMXVI4GER8PP	"pmxvi4ger8pp")])
+(define_int_attr avvi4i4i8	[(UNSPEC_MMA_PMXVI4GER8PP	"xvi4ger8pp")])
 
-(define_int_attr vvi4i4i2	[(UNSPEC_MMA_PMXVI16GER2	"pmxvi16ger2")
-				 (UNSPEC_MMA_PMXVI16GER2S	"pmxvi16ger2s")
-				 (UNSPEC_MMA_PMXVF16GER2	"pmxvf16ger2")
-				 (UNSPEC_MMA_PMXVBF16GER2	"pmxvbf16ger2")])
+(define_int_attr vvi4i4i2	[(UNSPEC_MMA_PMXVI16GER2	"xvi16ger2")
+				 (UNSPEC_MMA_PMXVI16GER2S	"xvi16ger2s")
+				 (UNSPEC_MMA_PMXVF16GER2	"xvf16ger2")
+				 (UNSPEC_MMA_PMXVBF16GER2	"xvbf16ger2")])
 
-(define_int_attr avvi4i4i2	[(UNSPEC_MMA_PMXVI16GER2PP	"pmxvi16ger2pp")
-				 (UNSPEC_MMA_PMXVI16GER2SPP	"pmxvi16ger2spp")
-				 (UNSPEC_MMA_PMXVF16GER2PP	"pmxvf16ger2pp")
-				 (UNSPEC_MMA_PMXVF16GER2PN	"pmxvf16ger2pn")
-				 (UNSPEC_MMA_PMXVF16GER2NP	"pmxvf16ger2np")
-				 (UNSPEC_MMA_PMXVF16GER2NN	"pmxvf16ger2nn")
-				 (UNSPEC_MMA_PMXVBF16GER2PP	"pmxvbf16ger2pp")
-				 (UNSPEC_MMA_PMXVBF16GER2PN	"pmxvbf16ger2pn")
-				 (UNSPEC_MMA_PMXVBF16GER2NP	"pmxvbf16ger2np")
-				 (UNSPEC_MMA_PMXVBF16GER2NN	"pmxvbf16ger2nn")])
+(define_int_attr avvi4i4i2	[(UNSPEC_MMA_PMXVI16GER2PP	"xvi16ger2pp")
+				 (UNSPEC_MMA_PMXVI16GER2SPP	"xvi16ger2spp")
+				 (UNSPEC_MMA_PMXVF16GER2PP	"xvf16ger2pp")
+				 (UNSPEC_MMA_PMXVF16GER2PN	"xvf16ger2pn")
+				 (UNSPEC_MMA_PMXVF16GER2NP	"xvf16ger2np")
+				 (UNSPEC_MMA_PMXVF16GER2NN	"xvf16ger2nn")
+				 (UNSPEC_MMA_PMXVBF16GER2PP	"xvbf16ger2pp")
+				 (UNSPEC_MMA_PMXVBF16GER2PN	"xvbf16ger2pn")
+				 (UNSPEC_MMA_PMXVBF16GER2NP	"xvbf16ger2np")
+				 (UNSPEC_MMA_PMXVBF16GER2NN	"xvbf16ger2nn")])
 
-(define_int_attr vvi4i4		[(UNSPEC_MMA_PMXVF32GER		"pmxvf32ger")])
+(define_int_attr vvi4i4		[(UNSPEC_MMA_PMXVF32GER		"xvf32ger")])
 
-(define_int_attr avvi4i4	[(UNSPEC_MMA_PMXVF32GERPP	"pmxvf32gerpp")
-				 (UNSPEC_MMA_PMXVF32GERPN	"pmxvf32gerpn")
-				 (UNSPEC_MMA_PMXVF32GERNP	"pmxvf32gernp")
-				 (UNSPEC_MMA_PMXVF32GERNN	"pmxvf32gernn")])
+(define_int_attr avvi4i4	[(UNSPEC_MMA_PMXVF32GERPP	"xvf32gerpp")
+				 (UNSPEC_MMA_PMXVF32GERPN	"xvf32gerpn")
+				 (UNSPEC_MMA_PMXVF32GERNP	"xvf32gernp")
+				 (UNSPEC_MMA_PMXVF32GERNN	"xvf32gernn")])
 
-(define_int_attr pvi4i2		[(UNSPEC_MMA_PMXVF64GER		"pmxvf64ger")])
+(define_int_attr pvi4i2		[(UNSPEC_MMA_PMXVF64GER		"xvf64ger")])
 
-(define_int_attr apvi4i2	[(UNSPEC_MMA_PMXVF64GERPP	"pmxvf64gerpp")
-				 (UNSPEC_MMA_PMXVF64GERPN	"pmxvf64gerpn")
-				 (UNSPEC_MMA_PMXVF64GERNP	"pmxvf64gernp")
-				 (UNSPEC_MMA_PMXVF64GERNN	"pmxvf64gernn")])
+(define_int_attr apvi4i2	[(UNSPEC_MMA_PMXVF64GERPP	"xvf64gerpp")
+				 (UNSPEC_MMA_PMXVF64GERPN	"xvf64gerpn")
+				 (UNSPEC_MMA_PMXVF64GERNP	"xvf64gernp")
+				 (UNSPEC_MMA_PMXVF64GERNN	"xvf64gernn")])
 
-(define_int_attr vvi4i4i4	[(UNSPEC_MMA_PMXVI8GER4		"pmxvi8ger4")])
+(define_int_attr vvi4i4i4	[(UNSPEC_MMA_PMXVI8GER4		"xvi8ger4")])
 
-(define_int_attr avvi4i4i4	[(UNSPEC_MMA_PMXVI8GER4PP	"pmxvi8ger4pp")
-				 (UNSPEC_MMA_PMXVI8GER4SPP	"pmxvi8ger4spp")])
+(define_int_attr avvi4i4i4	[(UNSPEC_MMA_PMXVI8GER4PP	"xvi8ger4pp")
+				 (UNSPEC_MMA_PMXVI8GER4SPP	"xvi8ger4spp")])
 
 
 ;; Vector pair support.  OOmode can only live in VSRs.
@@ -622,7 +626,10 @@
 		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")]
 		    MMA_VV))]
   "TARGET_MMA"
-  "<vv> %A0,%x1,%x2"
+  "@
+   dm<vv> %A0,%x1,%x2
+   <vv> %A0,%x1,%x2
+   <vv> %A0,%x1,%x2"
   [(set_attr "type" "mma")
    (set_attr "isa" "dm,not_dm,not_dm")])
 
@@ -643,7 +650,10 @@
 		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")]
 		    MMA_PV))]
   "TARGET_MMA"
-  "<pv> %A0,%x1,%x2"
+  "@
+   dm<pv> %A0,%x1,%x2
+   <pv> %A0,%x1,%x2
+   <pv> %A0,%x1,%x2"
   [(set_attr "type" "mma")
    (set_attr "isa" "dm,not_dm,not_dm")])
 
@@ -654,11 +664,14 @@
 		    (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa")]
 		    MMA_APV))]
   "TARGET_MMA"
-  "<apv> %A0,%x2,%x3"
+  "@
+   dm<apv> %A0,%x2,%x3
+   <apv> %A0,%x2,%x3
+   <apv> %A0,%x2,%x3"
   [(set_attr "type" "mma")
    (set_attr "isa" "dm,not_dm,not_dm")])
 
-(define_insn "mma_<vvi4i4i8>"
+(define_insn "mma_pm<vvi4i4i8>"
   [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
 	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa")
 		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
@@ -667,12 +680,15 @@
 		    (match_operand:SI 5 "u8bit_cint_operand" "n,n,n")]
 		    MMA_VVI4I4I8))]
   "TARGET_MMA"
-  "<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5"
+  "@
+   pmdm<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5
+   pm<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5
+   pm<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5"
   [(set_attr "type" "mma")
    (set_attr "prefixed" "yes")
    (set_attr "isa" "dm,not_dm,not_dm")])
 
-(define_insn "mma_<avvi4i4i8>"
+(define_insn "mma_pm<avvi4i4i8>"
   [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
 	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0")
 		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
@@ -682,12 +698,15 @@
 		    (match_operand:SI 6 "u8bit_cint_operand" "n,n,n")]
 		    MMA_AVVI4I4I8))]
   "TARGET_MMA"
-  "<avvi4i4i8> %A0,%x2,%x3,%4,%5,%6"
+  "@
+   pmdm<avvi4i4i8> %A0,%x2,%x3,%4,%5,%6
+   pm<avvi4i4i8> %A0,%x2,%x3,%4,%5,%6
+   pm<avvi4i4i8> %A0,%x2,%x3,%4,%5,%6"
   [(set_attr "type" "mma")
    (set_attr "prefixed" "yes")
    (set_attr "isa" "dm,not_dm,not_dm")])
 
-(define_insn "mma_<vvi4i4i2>"
+(define_insn "mma_pm<vvi4i4i2>"
   [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
 	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa")
 		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
@@ -696,12 +715,15 @@
 		    (match_operand:SI 5 "const_0_to_3_operand" "n,n,n")]
 		    MMA_VVI4I4I2))]
   "TARGET_MMA"
-  "<vvi4i4i2> %A0,%x1,%x2,%3,%4,%5"
+  "@
+   pmdm<vvi4i4i2> %A0,%x1,%x2,%3,%4,%5
+   pm<vvi4i4i2> %A0,%x1,%x2,%3,%4,%5
+   pm<vvi4i4i2> %A0,%x1,%x2,%3,%4,%5"
   [(set_attr "type" "mma")
    (set_attr "prefixed" "yes")
    (set_attr "isa" "dm,not_dm,not_dm")])
 
-(define_insn "mma_<avvi4i4i2>"
+(define_insn "mma_pm<avvi4i4i2>"
   [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
 	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0")
 		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
@@ -711,12 +733,15 @@
 		    (match_operand:SI 6 "const_0_to_3_operand" "n,n,n")]
 		    MMA_AVVI4I4I2))]
   "TARGET_MMA"
-  "<avvi4i4i2> %A0,%x2,%x3,%4,%5,%6"
+  "@
+   pm<avvi4i4i2> %A0,%x2,%x3,%4,%5,%6
+   pmdm<avvi4i4i2> %A0,%x2,%x3,%4,%5,%6
+   pm<avvi4i4i2> %A0,%x2,%x3,%4,%5,%6"
   [(set_attr "type" "mma")
    (set_attr "prefixed" "yes")
    (set_attr "isa" "dm,not_dm,not_dm")])
 
-(define_insn "mma_<vvi4i4>"
+(define_insn "mma_pm<vvi4i4>"
   [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
 	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa")
 		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
@@ -724,12 +749,15 @@
 		    (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")]
 		    MMA_VVI4I4))]
   "TARGET_MMA"
-  "<vvi4i4> %A0,%x1,%x2,%3,%4"
+  "@
+   pmdm<vvi4i4> %A0,%x1,%x2,%3,%4
+   pm<vvi4i4> %A0,%x1,%x2,%3,%4
+   pm<vvi4i4> %A0,%x1,%x2,%3,%4"
   [(set_attr "type" "mma")
    (set_attr "prefixed" "yes")
    (set_attr "isa" "dm,not_dm,not_dm")])
 
-(define_insn "mma_<avvi4i4>"
+(define_insn "mma_pm<avvi4i4>"
   [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
 	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0")
 		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
@@ -738,12 +766,15 @@
 		    (match_operand:SI 5 "const_0_to_15_operand" "n,n,n")]
 		    MMA_AVVI4I4))]
   "TARGET_MMA"
-  "<avvi4i4> %A0,%x2,%x3,%4,%5"
+  "@
+   pmdm<avvi4i4> %A0,%x2,%x3,%4,%5
+   pm<avvi4i4> %A0,%x2,%x3,%4,%5
+   pm<avvi4i4> %A0,%x2,%x3,%4,%5"
   [(set_attr "type" "mma")
    (set_attr "prefixed" "yes")
    (set_attr "isa" "dm,not_dm,not_dm")])
 
-(define_insn "mma_<pvi4i2>"
+(define_insn "mma_pm<pvi4i2>"
   [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
 	(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "wa,v,?wa")
 		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
@@ -751,12 +782,15 @@
 		    (match_operand:SI 4 "const_0_to_3_operand" "n,n,n")]
 		    MMA_PVI4I2))]
   "TARGET_MMA"
-  "<pvi4i2> %A0,%x1,%x2,%3,%4"
+  "@
+   dmpm<pvi4i2> %A0,%x1,%x2,%3,%4
+   dm<pvi4i2> %A0,%x1,%x2,%3,%4
+   dm<pvi4i2> %A0,%x1,%x2,%3,%4"
   [(set_attr "type" "mma")
    (set_attr "prefixed" "yes")
    (set_attr "isa" "dm,not_dm,not_dm")])
 
-(define_insn "mma_<apvi4i2>"
+(define_insn "mma_pm<apvi4i2>"
   [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
 	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0")
 		    (match_operand:OO 2 "vsx_register_operand" "wa,v,?wa")
@@ -765,12 +799,15 @@
 		    (match_operand:SI 5 "const_0_to_3_operand" "n,n,n")]
 		    MMA_APVI4I2))]
   "TARGET_MMA"
-  "<apvi4i2> %A0,%x2,%x3,%4,%5"
+  "@
+   pmdm<apvi4i2> %A0,%x2,%x3,%4,%5
+   pm<apvi4i2> %A0,%x2,%x3,%4,%5
+   pm<apvi4i2> %A0,%x2,%x3,%4,%5"
   [(set_attr "type" "mma")
    (set_attr "prefixed" "yes")
    (set_attr "isa" "dm,not_dm,not_dm")])
 
-(define_insn "mma_<vvi4i4i4>"
+(define_insn "mma_pm<vvi4i4i4>"
   [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
 	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa")
 		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
@@ -779,12 +816,15 @@
 		    (match_operand:SI 5 "const_0_to_15_operand" "n,n,n")]
 		    MMA_VVI4I4I4))]
   "TARGET_MMA"
-  "<vvi4i4i4> %A0,%x1,%x2,%3,%4,%5"
+  "@
+   pmdm<vvi4i4i4> %A0,%x1,%x2,%3,%4,%5
+   pm<vvi4i4i4> %A0,%x1,%x2,%3,%4,%5
+   pm<vvi4i4i4> %A0,%x1,%x2,%3,%4,%5"
   [(set_attr "type" "mma")
    (set_attr "prefixed" "yes")
    (set_attr "isa" "dm,not_dm,not_dm")])
 
-(define_insn "mma_<avvi4i4i4>"
+(define_insn "mma_pm<avvi4i4i4>"
   [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
 	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0")
 		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
@@ -794,7 +834,10 @@
 		    (match_operand:SI 6 "const_0_to_15_operand" "n,n,n")]
 		    MMA_AVVI4I4I4))]
   "TARGET_MMA"
-  "<avvi4i4i4> %A0,%x2,%x3,%4,%5,%6"
+  "@
+   pmdm<avvi4i4i4> %A0,%x2,%x3,%4,%5,%6
+   pm<avvi4i4i4> %A0,%x2,%x3,%4,%5,%6
+   pm<avvi4i4i4> %A0,%x2,%x3,%4,%5,%6"
   [(set_attr "type" "mma")
    (set_attr "prefixed" "yes")
    (set_attr "isa" "dm,not_dm,not_dm")])
diff --git a/gcc/testsuite/gcc.target/powerpc/dm-double-test.c b/gcc/testsuite/gcc.target/powerpc/dm-double-test.c
new file mode 100644
index 00000000000..66c19779585
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/dm-double-test.c
@@ -0,0 +1,194 @@
+/* Test derived from mma-double-1.c, modified for dense math.  */
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_dense_math_ok } */
+/* { dg-options "-mdejagnu-cpu=future -O2" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <altivec.h>
+
+typedef unsigned char vec_t __attribute__ ((vector_size (16)));
+typedef double v4sf_t __attribute__ ((vector_size (16)));
+#define SAVE_ACC(ACC, ldc, J)  \
+	  __builtin_mma_disassemble_acc (result, ACC); \
+	  rowC = (v4sf_t *) &CO[0*ldc+J]; \
+          rowC[0] += result[0]; \
+          rowC = (v4sf_t *) &CO[1*ldc+J]; \
+          rowC[0] += result[1]; \
+          rowC = (v4sf_t *) &CO[2*ldc+J]; \
+          rowC[0] += result[2]; \
+          rowC = (v4sf_t *) &CO[3*ldc+J]; \
+	  rowC[0] += result[3];
+
+void
+DM (int m, int n, int k, double *A, double *B, double *C)
+{
+  __vector_quad acc0, acc1, acc2, acc3, acc4, acc5, acc6, acc7;
+  v4sf_t result[4];
+  v4sf_t *rowC;
+  for (int l = 0; l < n; l += 4)
+    {
+      double *CO;
+      double *AO;
+      AO = A;
+      CO = C;
+      C += m * 4;
+      for (int j = 0; j < m; j += 16)
+	{
+	  double *BO = B;
+	  __builtin_mma_xxsetaccz (&acc0);
+	  __builtin_mma_xxsetaccz (&acc1);
+	  __builtin_mma_xxsetaccz (&acc2);
+	  __builtin_mma_xxsetaccz (&acc3);
+	  __builtin_mma_xxsetaccz (&acc4);
+	  __builtin_mma_xxsetaccz (&acc5);
+	  __builtin_mma_xxsetaccz (&acc6);
+	  __builtin_mma_xxsetaccz (&acc7);
+	  unsigned long i;
+
+	  for (i = 0; i < k; i++)
+	    {
+	      vec_t *rowA = (vec_t *) & AO[i * 16];
+	      __vector_pair rowB;
+	      vec_t *rb = (vec_t *) & BO[i * 4];
+	      __builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
+	      __builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
+	      __builtin_mma_xvf64gerpp (&acc1, rowB, rowA[1]);
+	      __builtin_mma_xvf64gerpp (&acc2, rowB, rowA[2]);
+	      __builtin_mma_xvf64gerpp (&acc3, rowB, rowA[3]);
+	      __builtin_mma_xvf64gerpp (&acc4, rowB, rowA[4]);
+	      __builtin_mma_xvf64gerpp (&acc5, rowB, rowA[5]);
+	      __builtin_mma_xvf64gerpp (&acc6, rowB, rowA[6]);
+	      __builtin_mma_xvf64gerpp (&acc7, rowB, rowA[7]);
+	    }
+	  SAVE_ACC (&acc0, m, 0);
+	  SAVE_ACC (&acc2, m, 4);
+	  SAVE_ACC (&acc1, m, 2);
+	  SAVE_ACC (&acc3, m, 6);
+	  SAVE_ACC (&acc4, m, 8);
+	  SAVE_ACC (&acc6, m, 12);
+	  SAVE_ACC (&acc5, m, 10);
+	  SAVE_ACC (&acc7, m, 14);
+	  AO += k * 16;
+	  BO += k * 4;
+	  CO += 16;
+	}
+      B += k * 4;
+    }
+}
+
+void
+init (double *matrix, int row, int column)
+{
+  for (int j = 0; j < column; j++)
+    {
+      for (int i = 0; i < row; i++)
+	{
+	  matrix[j * row + i] = (i * 16 + 2 + j) / 0.123;
+	}
+    }
+}
+
+void
+init0 (double *matrix, double *matrix1, int row, int column)
+{
+  for (int j = 0; j < column; j++)
+    for (int i = 0; i < row; i++)
+      matrix[j * row + i] = matrix1[j * row + i] = 0;
+}
+
+
+void
+print (const char *name, const double *matrix, int row, int column)
+{
+  printf ("Matrix %s has %d rows and %d columns:\n", name, row, column);
+  for (int i = 0; i < row; i++)
+    {
+      for (int j = 0; j < column; j++)
+	{
+	  printf ("%f ", matrix[j * row + i]);
+	}
+      printf ("\n");
+    }
+  printf ("\n");
+}
+
+int
+main (int argc, char *argv[])
+{
+  int rowsA, colsB, common;
+  int i, j, k;
+  int ret = 0;
+
+  for (int t = 16; t <= 128; t += 16)
+    {
+      for (int t1 = 4; t1 <= 16; t1 += 4)
+	{
+	  rowsA = t;
+	  colsB = t1;
+	  common = 1;
+	  /* printf ("Running test for rows = %d,cols = %d\n", t, t1); */
+	  double A[rowsA * common];
+	  double B[common * colsB];
+	  double C[rowsA * colsB];
+	  double D[rowsA * colsB];
+
+
+	  init (A, rowsA, common);
+	  init (B, common, colsB);
+	  init0 (C, D, rowsA, colsB);
+	  DM (rowsA, colsB, common, A, B, C);
+
+	  for (i = 0; i < colsB; i++)
+	    {
+	      for (j = 0; j < rowsA; j++)
+		{
+		  D[i * rowsA + j] = 0;
+		  for (k = 0; k < common; k++)
+		    {
+		      D[i * rowsA + j] +=
+			A[k * rowsA + j] * B[k + common * i];
+		    }
+		}
+	    }
+	  for (i = 0; i < colsB; i++)
+	    {
+	      for (j = 0; j < rowsA; j++)
+		{
+		  for (k = 0; k < common; k++)
+		    {
+		      if (D[i * rowsA + j] != C[i * rowsA + j])
+			{
+			  printf ("Error %d,%d,%d\n",i,j,k);
+			  ret++;
+			}
+		    }
+		}
+	    }
+	  if (ret)
+	    {
+	      print ("A", A, rowsA, common);
+	      print ("B", B, common, colsB);
+	      print ("C", C, rowsA, colsB);
+	      print ("D", D, rowsA, colsB);
+	    }
+	}
+    }
+  
+#ifdef VERBOSE
+  if (ret)
+    printf ("DM double test fail: %d errors\n",ret);
+  else
+    printf ("DM double test success: 0 DM errors\n");
+#else
+  if (ret)
+    abort();
+#endif
+      
+  return ret;
+}
+
+/* { dg-final { scan-assembler {\mdmsetdmrz\M}      } } */
+/* { dg-final { scan-assembler {\mdmxvf64gerpp\M}   } } */
+/* { dg-final { scan-assembler {\mdmxxextfdmr512\M} } } */
+
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index e23d3ec8b3c..96033b950eb 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -7121,6 +7121,25 @@ proc check_effective_target_power11_ok { } {
     }
 }
 
+# Return 1 if this is a PowerPC target supporting -mcpu=future which enables
+# the dense math operations.
+proc check_effective_target_powerpc_dense_math_ok { } {
+	return [check_no_compiler_messages_nocache powerpc_dense_math_ok assembly {
+		__vector_quad vq;
+		void test (void)
+		{
+		#ifndef __PPC_DMR__
+		#error "target does not have dense math support."
+		#else
+		/* Make sure we have dense math support.  */
+		  __vector_quad dmr;
+		  __asm__ ("dmsetaccz %A0" : "=wD" (dmr));
+		  vq = dmr;
+		#endif
+		}
+	} "-mcpu=future"]
+}
+
 # Return 1 if this is a PowerPC target supporting -mfloat128 via either
 # software emulation on power7/power8 systems or hardware support on power9.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [gcc(refs/users/meissner/heads/work161-dmf)] PowerPC: Switch to dense math names for all MMA operations.
@ 2024-03-06  0:36 Michael Meissner
  0 siblings, 0 replies; 10+ messages in thread
From: Michael Meissner @ 2024-03-06  0:36 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:f6f63c432351eae3d363fde51d2d1dc5ed68e901

commit f6f63c432351eae3d363fde51d2d1dc5ed68e901
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Tue Mar 5 19:36:25 2024 -0500

    PowerPC: Switch to dense math names for all MMA operations.
    
    This patch changes the assembler instruction names for MMA instructions from
    the original name used in power10 to the new name when used with the dense math
    system.  I.e. xvf64gerpp becomes dmxvf64gerpp.  The assembler will emit the
    same bits for either spelling.
    
    For the non-prefixed MMA instructions, we add a 'dm' prefix in front of the
    instruction.  However, the prefixed instructions have a 'pm' prefix, and we add
    the 'dm' prefix afterwards.  To prevent having two sets of parallel int
    attributes, we remove the "pm" prefix from the instruction string in the
    attributes, and add it later, both in the insn name and in the output template.
    
    2024-03-05   Michael Meissner  <meissner@linux.ibm.com>
    
    gcc/
    
            * config/rs6000/mma.md (vvi4i4i8): Change the instruction to not have a
            "pm" prefix.
            (avvi4i4i8): Likewise.
            (vvi4i4i2): Likewise.
            (avvi4i4i2): Likewise.
            (vvi4i4): Likewise.
            (avvi4i4): Likewise.
            (pvi4i2): Likewise.
            (apvi4i2): Likewise.
            (vvi4i4i4): Likewise.
            (avvi4i4i4): Likewise.
            (mma_xxsetaccz): Add support for running on DMF systems, generating the
            dense math instruction and using the dense math accumulators.
            (mma_<vv>): Likewise.
            (mma_<pv>): Likewise.
            (mma_<avv>): Likewise.
            (mma_<apv>): Likewise.
            (mma_pm<vvi4i4i8>): Add support for running on DMF systems, generating
            the dense math instruction and using the dense math accumulators.
            Rename the insn with a 'pm' prefix and add either 'pm' or 'pmdm'
            prefixes based on whether we have the original MMA specification or if
            we have dense math support.
            (mma_pm<avvi4i4i8>): Likewise.
            (mma_pm<vvi4i4i2>): Likewise.
            (mma_pm<avvi4i4i2>): Likewise.
            (mma_pm<vvi4i4>): Likewise.
            (mma_pm<avvi4i4): Likewise.
            (mma_pm<pvi4i2>): Likewise.
            (mma_pm<apvi4i2): Likewise.
            (mma_pm<vvi4i4i4>): Likewise.
            (mma_pm<avvi4i4i4>): Likewise.

Diff:
---
 gcc/config/rs6000/mma.md | 161 +++++++++++++++++++++++++++++++----------------
 1 file changed, 107 insertions(+), 54 deletions(-)

diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index 2ce613b46cc..f3870eac51a 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -224,44 +224,47 @@
 				 (UNSPEC_MMA_XVF64GERNP		"xvf64gernp")
 				 (UNSPEC_MMA_XVF64GERNN		"xvf64gernn")])
 
-(define_int_attr vvi4i4i8	[(UNSPEC_MMA_PMXVI4GER8		"pmxvi4ger8")])
+;; The "pm" prefix is not in these expansions, so that we can generate
+;; pmdmxvi4ger8 on systems with dense math registers and xvi4ger8 on systems
+;; without dense math registers.
+(define_int_attr vvi4i4i8	[(UNSPEC_MMA_PMXVI4GER8		"xvi4ger8")])
 
-(define_int_attr avvi4i4i8	[(UNSPEC_MMA_PMXVI4GER8PP	"pmxvi4ger8pp")])
+(define_int_attr avvi4i4i8	[(UNSPEC_MMA_PMXVI4GER8PP	"xvi4ger8pp")])
 
-(define_int_attr vvi4i4i2	[(UNSPEC_MMA_PMXVI16GER2	"pmxvi16ger2")
-				 (UNSPEC_MMA_PMXVI16GER2S	"pmxvi16ger2s")
-				 (UNSPEC_MMA_PMXVF16GER2	"pmxvf16ger2")
-				 (UNSPEC_MMA_PMXVBF16GER2	"pmxvbf16ger2")])
+(define_int_attr vvi4i4i2	[(UNSPEC_MMA_PMXVI16GER2	"xvi16ger2")
+				 (UNSPEC_MMA_PMXVI16GER2S	"xvi16ger2s")
+				 (UNSPEC_MMA_PMXVF16GER2	"xvf16ger2")
+				 (UNSPEC_MMA_PMXVBF16GER2	"xvbf16ger2")])
 
-(define_int_attr avvi4i4i2	[(UNSPEC_MMA_PMXVI16GER2PP	"pmxvi16ger2pp")
-				 (UNSPEC_MMA_PMXVI16GER2SPP	"pmxvi16ger2spp")
-				 (UNSPEC_MMA_PMXVF16GER2PP	"pmxvf16ger2pp")
-				 (UNSPEC_MMA_PMXVF16GER2PN	"pmxvf16ger2pn")
-				 (UNSPEC_MMA_PMXVF16GER2NP	"pmxvf16ger2np")
-				 (UNSPEC_MMA_PMXVF16GER2NN	"pmxvf16ger2nn")
-				 (UNSPEC_MMA_PMXVBF16GER2PP	"pmxvbf16ger2pp")
-				 (UNSPEC_MMA_PMXVBF16GER2PN	"pmxvbf16ger2pn")
-				 (UNSPEC_MMA_PMXVBF16GER2NP	"pmxvbf16ger2np")
-				 (UNSPEC_MMA_PMXVBF16GER2NN	"pmxvbf16ger2nn")])
+(define_int_attr avvi4i4i2	[(UNSPEC_MMA_PMXVI16GER2PP	"xvi16ger2pp")
+				 (UNSPEC_MMA_PMXVI16GER2SPP	"xvi16ger2spp")
+				 (UNSPEC_MMA_PMXVF16GER2PP	"xvf16ger2pp")
+				 (UNSPEC_MMA_PMXVF16GER2PN	"xvf16ger2pn")
+				 (UNSPEC_MMA_PMXVF16GER2NP	"xvf16ger2np")
+				 (UNSPEC_MMA_PMXVF16GER2NN	"xvf16ger2nn")
+				 (UNSPEC_MMA_PMXVBF16GER2PP	"xvbf16ger2pp")
+				 (UNSPEC_MMA_PMXVBF16GER2PN	"xvbf16ger2pn")
+				 (UNSPEC_MMA_PMXVBF16GER2NP	"xvbf16ger2np")
+				 (UNSPEC_MMA_PMXVBF16GER2NN	"xvbf16ger2nn")])
 
-(define_int_attr vvi4i4		[(UNSPEC_MMA_PMXVF32GER		"pmxvf32ger")])
+(define_int_attr vvi4i4		[(UNSPEC_MMA_PMXVF32GER		"xvf32ger")])
 
-(define_int_attr avvi4i4	[(UNSPEC_MMA_PMXVF32GERPP	"pmxvf32gerpp")
-				 (UNSPEC_MMA_PMXVF32GERPN	"pmxvf32gerpn")
-				 (UNSPEC_MMA_PMXVF32GERNP	"pmxvf32gernp")
-				 (UNSPEC_MMA_PMXVF32GERNN	"pmxvf32gernn")])
+(define_int_attr avvi4i4	[(UNSPEC_MMA_PMXVF32GERPP	"xvf32gerpp")
+				 (UNSPEC_MMA_PMXVF32GERPN	"xvf32gerpn")
+				 (UNSPEC_MMA_PMXVF32GERNP	"xvf32gernp")
+				 (UNSPEC_MMA_PMXVF32GERNN	"xvf32gernn")])
 
-(define_int_attr pvi4i2		[(UNSPEC_MMA_PMXVF64GER		"pmxvf64ger")])
+(define_int_attr pvi4i2		[(UNSPEC_MMA_PMXVF64GER		"xvf64ger")])
 
-(define_int_attr apvi4i2	[(UNSPEC_MMA_PMXVF64GERPP	"pmxvf64gerpp")
-				 (UNSPEC_MMA_PMXVF64GERPN	"pmxvf64gerpn")
-				 (UNSPEC_MMA_PMXVF64GERNP	"pmxvf64gernp")
-				 (UNSPEC_MMA_PMXVF64GERNN	"pmxvf64gernn")])
+(define_int_attr apvi4i2	[(UNSPEC_MMA_PMXVF64GERPP	"xvf64gerpp")
+				 (UNSPEC_MMA_PMXVF64GERPN	"xvf64gerpn")
+				 (UNSPEC_MMA_PMXVF64GERNP	"xvf64gernp")
+				 (UNSPEC_MMA_PMXVF64GERNN	"xvf64gernn")])
 
-(define_int_attr vvi4i4i4	[(UNSPEC_MMA_PMXVI8GER4		"pmxvi8ger4")])
+(define_int_attr vvi4i4i4	[(UNSPEC_MMA_PMXVI8GER4		"xvi8ger4")])
 
-(define_int_attr avvi4i4i4	[(UNSPEC_MMA_PMXVI8GER4PP	"pmxvi8ger4pp")
-				 (UNSPEC_MMA_PMXVI8GER4SPP	"pmxvi8ger4spp")])
+(define_int_attr avvi4i4i4	[(UNSPEC_MMA_PMXVI8GER4PP	"xvi8ger4pp")
+				 (UNSPEC_MMA_PMXVI8GER4SPP	"xvi8ger4spp")])
 
 
 ;; Vector pair support.  OOmode can only live in VSRs.
@@ -546,7 +549,9 @@
 	(unspec_volatile:XO [(const_int 0)]
 			    UNSPECV_MMA_XXSETACCZ))]
   "TARGET_MMA"
-  "xxsetaccz %A0"
+{
+  return TARGET_DENSE_MATH ? "dmsetdmrz %A0" : "xxsetaccz %A0";
+}
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<vv>"
@@ -555,7 +560,9 @@
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
 		    MMA_VV))]
   "TARGET_MMA"
-  "<vv> %A0,%x1,%x2"
+{
+  return TARGET_DENSE_MATH ? "dm<vv> %A0,%x1,%x2" : "<vv> %A0,%x1,%x2";
+}
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<avv>"
@@ -565,7 +572,9 @@
 		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
 		    MMA_AVV))]
   "TARGET_MMA"
-  "<avv> %A0,%x2,%x3"
+{
+  return TARGET_DENSE_MATH ? "dm<avv> %A0,%x2,%x3" : "<avv> %A0,%x2,%x3";
+}
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<pv>"
@@ -574,7 +583,9 @@
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
 		    MMA_PV))]
   "TARGET_MMA"
-  "<pv> %A0,%x1,%x2"
+{
+  return TARGET_DENSE_MATH ? "dm<pv> %A0,%x1,%x2" : "<pv> %A0,%x1,%x2";
+}
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<apv>"
@@ -584,10 +595,12 @@
 		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
 		    MMA_APV))]
   "TARGET_MMA"
-  "<apv> %A0,%x2,%x3"
+{
+  return TARGET_DENSE_MATH ? "dm<apv> %A0,%x2,%x3" : "<apv> %A0,%x2,%x3";
+}
   [(set_attr "type" "mma")])
 
-(define_insn "mma_<vvi4i4i8>"
+(define_insn "mma_pm<vvi4i4i8>"
   [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
 	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
@@ -596,11 +609,15 @@
 		    (match_operand:SI 5 "u8bit_cint_operand" "n,n")]
 		    MMA_VVI4I4I8))]
   "TARGET_MMA"
-  "<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5"
+{
+  return (TARGET_DENSE_MATH
+          ? "pmdm<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5"
+          : "pm<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5");
+}
   [(set_attr "type" "mma")
    (set_attr "prefixed" "yes")])
 
-(define_insn "mma_<avvi4i4i8>"
+(define_insn "mma_pm<avvi4i4i8>"
   [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
 	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
@@ -610,11 +627,15 @@
 		    (match_operand:SI 6 "u8bit_cint_operand" "n,n")]
 		    MMA_AVVI4I4I8))]
   "TARGET_MMA"
-  "<avvi4i4i8> %A0,%x2,%x3,%4,%5,%6"
+{
+  return (TARGET_DENSE_MATH
+          ? "pmdm<avvi4i4i8> %A0,%x2,%x3,%4,%5,%6"
+          : "pm<avvi4i4i8> %A0,%x2,%x3,%4,%5,%6");
+}
   [(set_attr "type" "mma")
    (set_attr "prefixed" "yes")])
 
-(define_insn "mma_<vvi4i4i2>"
+(define_insn "mma_pm<vvi4i4i2>"
   [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
 	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
@@ -623,11 +644,15 @@
 		    (match_operand:SI 5 "const_0_to_3_operand" "n,n")]
 		    MMA_VVI4I4I2))]
   "TARGET_MMA"
-  "<vvi4i4i2> %A0,%x1,%x2,%3,%4,%5"
+{
+  return (TARGET_DENSE_MATH
+          ? "pmdm<vvi4i4i2> %A0,%x1,%x2,%3,%4,%5"
+          : "pm<vvi4i4i2> %A0,%x1,%x2,%3,%4,%5");
+}
   [(set_attr "type" "mma")
    (set_attr "prefixed" "yes")])
 
-(define_insn "mma_<avvi4i4i2>"
+(define_insn "mma_pm<avvi4i4i2>"
   [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
 	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
@@ -637,11 +662,15 @@
 		    (match_operand:SI 6 "const_0_to_3_operand" "n,n")]
 		    MMA_AVVI4I4I2))]
   "TARGET_MMA"
-  "<avvi4i4i2> %A0,%x2,%x3,%4,%5,%6"
+{
+  return (TARGET_DENSE_MATH
+          ? "pmdm<avvi4i4i2> %A0,%x2,%x3,%4,%5,%6"
+          : "pm<avvi4i4i2> %A0,%x2,%x3,%4,%5,%6");
+}
   [(set_attr "type" "mma")
    (set_attr "prefixed" "yes")])
 
-(define_insn "mma_<vvi4i4>"
+(define_insn "mma_pm<vvi4i4>"
   [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
 	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
@@ -649,11 +678,15 @@
 		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")]
 		    MMA_VVI4I4))]
   "TARGET_MMA"
-  "<vvi4i4> %A0,%x1,%x2,%3,%4"
+{
+  return (TARGET_DENSE_MATH
+          ? "pmdm<vvi4i4> %A0,%x1,%x2,%3,%4"
+          : "pm<vvi4i4> %A0,%x1,%x2,%3,%4");
+}
   [(set_attr "type" "mma")
    (set_attr "prefixed" "yes")])
 
-(define_insn "mma_<avvi4i4>"
+(define_insn "mma_pm<avvi4i4>"
   [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
 	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
@@ -662,11 +695,15 @@
 		    (match_operand:SI 5 "const_0_to_15_operand" "n,n")]
 		    MMA_AVVI4I4))]
   "TARGET_MMA"
-  "<avvi4i4> %A0,%x2,%x3,%4,%5"
+{
+  return (TARGET_DENSE_MATH
+          ? "pmdm<avvi4i4> %A0,%x2,%x3,%4,%5"
+          : "pm<avvi4i4> %A0,%x2,%x3,%4,%5");
+}
   [(set_attr "type" "mma")
    (set_attr "prefixed" "yes")])
 
-(define_insn "mma_<pvi4i2>"
+(define_insn "mma_pm<pvi4i2>"
   [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
 	(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
@@ -674,11 +711,15 @@
 		    (match_operand:SI 4 "const_0_to_3_operand" "n,n")]
 		    MMA_PVI4I2))]
   "TARGET_MMA"
-  "<pvi4i2> %A0,%x1,%x2,%3,%4"
+{
+  return (TARGET_DENSE_MATH
+          ? "pmdm<pvi4i2> %A0,%x1,%x2,%3,%4"
+          : "pm<pvi4i2> %A0,%x1,%x2,%3,%4");
+}
   [(set_attr "type" "mma")
    (set_attr "prefixed" "yes")])
 
-(define_insn "mma_<apvi4i2>"
+(define_insn "mma_pm<apvi4i2>"
   [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
 	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
 		    (match_operand:OO 2 "vsx_register_operand" "v,?wa")
@@ -687,11 +728,15 @@
 		    (match_operand:SI 5 "const_0_to_3_operand" "n,n")]
 		    MMA_APVI4I2))]
   "TARGET_MMA"
-  "<apvi4i2> %A0,%x2,%x3,%4,%5"
+{
+  return (TARGET_DENSE_MATH
+          ? "pmdm<apvi4i2> %A0,%x2,%x3,%4,%5"
+          : "pm<apvi4i2> %A0,%x2,%x3,%4,%5");
+}
   [(set_attr "type" "mma")
    (set_attr "prefixed" "yes")])
 
-(define_insn "mma_<vvi4i4i4>"
+(define_insn "mma_pm<vvi4i4i4>"
   [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
 	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
@@ -700,11 +745,15 @@
 		    (match_operand:SI 5 "const_0_to_15_operand" "n,n")]
 		    MMA_VVI4I4I4))]
   "TARGET_MMA"
-  "<vvi4i4i4> %A0,%x1,%x2,%3,%4,%5"
+{
+  return (TARGET_DENSE_MATH
+          ? "pmdm<vvi4i4i4> %A0,%x1,%x2,%3,%4,%5"
+          : "pm<vvi4i4i4> %A0,%x1,%x2,%3,%4,%5");
+}
   [(set_attr "type" "mma")
    (set_attr "prefixed" "yes")])
 
-(define_insn "mma_<avvi4i4i4>"
+(define_insn "mma_pm<avvi4i4i4>"
   [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
 	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
@@ -714,6 +763,10 @@
 		    (match_operand:SI 6 "const_0_to_15_operand" "n,n")]
 		    MMA_AVVI4I4I4))]
   "TARGET_MMA"
-  "<avvi4i4i4> %A0,%x2,%x3,%4,%5,%6"
+{
+  return (TARGET_DENSE_MATH
+          ? "pmdm<avvi4i4i4> %A0,%x2,%x3,%4,%5,%6"
+          : "pm<avvi4i4i4> %A0,%x2,%x3,%4,%5,%6");
+}
   [(set_attr "type" "mma")
    (set_attr "prefixed" "yes")])

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [gcc(refs/users/meissner/heads/work161-dmf)] PowerPC: Switch to dense math names for all MMA operations.
@ 2024-03-05 20:34 Michael Meissner
  0 siblings, 0 replies; 10+ messages in thread
From: Michael Meissner @ 2024-03-05 20:34 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:d6a52b18452ee2d086094e78dde287a44a53f91a

commit d6a52b18452ee2d086094e78dde287a44a53f91a
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Tue Mar 5 15:33:12 2024 -0500

    PowerPC: Switch to dense math names for all MMA operations.
    
    This patch changes the assembler instruction names for MMA instructions from
    the original name used in power10 to the new name when used with the dense math
    system.  I.e. xvf64gerpp becomes dmxvf64gerpp.  The assembler will emit the
    same bits for either spelling.
    
    For the non-prefixed MMA instructions, we add a 'dm' prefix in front of the
    instruction.  However, the prefixed instructions have a 'pm' prefix, and we add
    the 'dm' prefix afterwards.  To prevent having two sets of parallel int
    attributes, we remove the "pm" prefix from the instruction string in the
    attributes, and add it later, both in the insn name and in the output template.
    
    2024-03-05   Michael Meissner  <meissner@linux.ibm.com>
    
    gcc/
    
            * config/rs6000/mma.md (vvi4i4i8): Change the instruction to not have a
            "pm" prefix.
            (avvi4i4i8): Likewise.
            (vvi4i4i2): Likewise.
            (avvi4i4i2): Likewise.
            (vvi4i4): Likewise.
            (avvi4i4): Likewise.
            (pvi4i2): Likewise.
            (apvi4i2): Likewise.
            (vvi4i4i4): Likewise.
            (avvi4i4i4): Likewise.
            (mma_xxsetaccz): Add support for running on DMF systems, generating the
            dense math instruction and using the dense math accumulators.
            (mma_<vv>): Likewise.
            (mma_<pv>): Likewise.
            (mma_<avv>): Likewise.
            (mma_<apv>): Likewise.
            (mma_pm<vvi4i4i8>): Add support for running on DMF systems, generating
            the dense math instruction and using the dense math accumulators.
            Rename the insn with a 'pm' prefix and add either 'pm' or 'pmdm'
            prefixes based on whether we have the original MMA specification or if
            we have dense math support.
            (mma_pm<avvi4i4i8>): Likewise.
            (mma_pm<vvi4i4i2>): Likewise.
            (mma_pm<avvi4i4i2>): Likewise.
            (mma_pm<vvi4i4>): Likewise.
            (mma_pm<avvi4i4): Likewise.
            (mma_pm<pvi4i2>): Likewise.
            (mma_pm<apvi4i2): Likewise.
            (mma_pm<vvi4i4i4>): Likewise.
            (mma_pm<avvi4i4i4>): Likewise.

Diff:
---
 gcc/config/rs6000/mma.md | 161 +++++++++++++++++++++++++++++++----------------
 1 file changed, 107 insertions(+), 54 deletions(-)

diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index 2ce613b46cc..f3870eac51a 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -224,44 +224,47 @@
 				 (UNSPEC_MMA_XVF64GERNP		"xvf64gernp")
 				 (UNSPEC_MMA_XVF64GERNN		"xvf64gernn")])
 
-(define_int_attr vvi4i4i8	[(UNSPEC_MMA_PMXVI4GER8		"pmxvi4ger8")])
+;; The "pm" prefix is not in these expansions, so that we can generate
+;; pmdmxvi4ger8 on systems with dense math registers and xvi4ger8 on systems
+;; without dense math registers.
+(define_int_attr vvi4i4i8	[(UNSPEC_MMA_PMXVI4GER8		"xvi4ger8")])
 
-(define_int_attr avvi4i4i8	[(UNSPEC_MMA_PMXVI4GER8PP	"pmxvi4ger8pp")])
+(define_int_attr avvi4i4i8	[(UNSPEC_MMA_PMXVI4GER8PP	"xvi4ger8pp")])
 
-(define_int_attr vvi4i4i2	[(UNSPEC_MMA_PMXVI16GER2	"pmxvi16ger2")
-				 (UNSPEC_MMA_PMXVI16GER2S	"pmxvi16ger2s")
-				 (UNSPEC_MMA_PMXVF16GER2	"pmxvf16ger2")
-				 (UNSPEC_MMA_PMXVBF16GER2	"pmxvbf16ger2")])
+(define_int_attr vvi4i4i2	[(UNSPEC_MMA_PMXVI16GER2	"xvi16ger2")
+				 (UNSPEC_MMA_PMXVI16GER2S	"xvi16ger2s")
+				 (UNSPEC_MMA_PMXVF16GER2	"xvf16ger2")
+				 (UNSPEC_MMA_PMXVBF16GER2	"xvbf16ger2")])
 
-(define_int_attr avvi4i4i2	[(UNSPEC_MMA_PMXVI16GER2PP	"pmxvi16ger2pp")
-				 (UNSPEC_MMA_PMXVI16GER2SPP	"pmxvi16ger2spp")
-				 (UNSPEC_MMA_PMXVF16GER2PP	"pmxvf16ger2pp")
-				 (UNSPEC_MMA_PMXVF16GER2PN	"pmxvf16ger2pn")
-				 (UNSPEC_MMA_PMXVF16GER2NP	"pmxvf16ger2np")
-				 (UNSPEC_MMA_PMXVF16GER2NN	"pmxvf16ger2nn")
-				 (UNSPEC_MMA_PMXVBF16GER2PP	"pmxvbf16ger2pp")
-				 (UNSPEC_MMA_PMXVBF16GER2PN	"pmxvbf16ger2pn")
-				 (UNSPEC_MMA_PMXVBF16GER2NP	"pmxvbf16ger2np")
-				 (UNSPEC_MMA_PMXVBF16GER2NN	"pmxvbf16ger2nn")])
+(define_int_attr avvi4i4i2	[(UNSPEC_MMA_PMXVI16GER2PP	"xvi16ger2pp")
+				 (UNSPEC_MMA_PMXVI16GER2SPP	"xvi16ger2spp")
+				 (UNSPEC_MMA_PMXVF16GER2PP	"xvf16ger2pp")
+				 (UNSPEC_MMA_PMXVF16GER2PN	"xvf16ger2pn")
+				 (UNSPEC_MMA_PMXVF16GER2NP	"xvf16ger2np")
+				 (UNSPEC_MMA_PMXVF16GER2NN	"xvf16ger2nn")
+				 (UNSPEC_MMA_PMXVBF16GER2PP	"xvbf16ger2pp")
+				 (UNSPEC_MMA_PMXVBF16GER2PN	"xvbf16ger2pn")
+				 (UNSPEC_MMA_PMXVBF16GER2NP	"xvbf16ger2np")
+				 (UNSPEC_MMA_PMXVBF16GER2NN	"xvbf16ger2nn")])
 
-(define_int_attr vvi4i4		[(UNSPEC_MMA_PMXVF32GER		"pmxvf32ger")])
+(define_int_attr vvi4i4		[(UNSPEC_MMA_PMXVF32GER		"xvf32ger")])
 
-(define_int_attr avvi4i4	[(UNSPEC_MMA_PMXVF32GERPP	"pmxvf32gerpp")
-				 (UNSPEC_MMA_PMXVF32GERPN	"pmxvf32gerpn")
-				 (UNSPEC_MMA_PMXVF32GERNP	"pmxvf32gernp")
-				 (UNSPEC_MMA_PMXVF32GERNN	"pmxvf32gernn")])
+(define_int_attr avvi4i4	[(UNSPEC_MMA_PMXVF32GERPP	"xvf32gerpp")
+				 (UNSPEC_MMA_PMXVF32GERPN	"xvf32gerpn")
+				 (UNSPEC_MMA_PMXVF32GERNP	"xvf32gernp")
+				 (UNSPEC_MMA_PMXVF32GERNN	"xvf32gernn")])
 
-(define_int_attr pvi4i2		[(UNSPEC_MMA_PMXVF64GER		"pmxvf64ger")])
+(define_int_attr pvi4i2		[(UNSPEC_MMA_PMXVF64GER		"xvf64ger")])
 
-(define_int_attr apvi4i2	[(UNSPEC_MMA_PMXVF64GERPP	"pmxvf64gerpp")
-				 (UNSPEC_MMA_PMXVF64GERPN	"pmxvf64gerpn")
-				 (UNSPEC_MMA_PMXVF64GERNP	"pmxvf64gernp")
-				 (UNSPEC_MMA_PMXVF64GERNN	"pmxvf64gernn")])
+(define_int_attr apvi4i2	[(UNSPEC_MMA_PMXVF64GERPP	"xvf64gerpp")
+				 (UNSPEC_MMA_PMXVF64GERPN	"xvf64gerpn")
+				 (UNSPEC_MMA_PMXVF64GERNP	"xvf64gernp")
+				 (UNSPEC_MMA_PMXVF64GERNN	"xvf64gernn")])
 
-(define_int_attr vvi4i4i4	[(UNSPEC_MMA_PMXVI8GER4		"pmxvi8ger4")])
+(define_int_attr vvi4i4i4	[(UNSPEC_MMA_PMXVI8GER4		"xvi8ger4")])
 
-(define_int_attr avvi4i4i4	[(UNSPEC_MMA_PMXVI8GER4PP	"pmxvi8ger4pp")
-				 (UNSPEC_MMA_PMXVI8GER4SPP	"pmxvi8ger4spp")])
+(define_int_attr avvi4i4i4	[(UNSPEC_MMA_PMXVI8GER4PP	"xvi8ger4pp")
+				 (UNSPEC_MMA_PMXVI8GER4SPP	"xvi8ger4spp")])
 
 
 ;; Vector pair support.  OOmode can only live in VSRs.
@@ -546,7 +549,9 @@
 	(unspec_volatile:XO [(const_int 0)]
 			    UNSPECV_MMA_XXSETACCZ))]
   "TARGET_MMA"
-  "xxsetaccz %A0"
+{
+  return TARGET_DENSE_MATH ? "dmsetdmrz %A0" : "xxsetaccz %A0";
+}
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<vv>"
@@ -555,7 +560,9 @@
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
 		    MMA_VV))]
   "TARGET_MMA"
-  "<vv> %A0,%x1,%x2"
+{
+  return TARGET_DENSE_MATH ? "dm<vv> %A0,%x1,%x2" : "<vv> %A0,%x1,%x2";
+}
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<avv>"
@@ -565,7 +572,9 @@
 		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
 		    MMA_AVV))]
   "TARGET_MMA"
-  "<avv> %A0,%x2,%x3"
+{
+  return TARGET_DENSE_MATH ? "dm<avv> %A0,%x2,%x3" : "<avv> %A0,%x2,%x3";
+}
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<pv>"
@@ -574,7 +583,9 @@
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
 		    MMA_PV))]
   "TARGET_MMA"
-  "<pv> %A0,%x1,%x2"
+{
+  return TARGET_DENSE_MATH ? "dm<pv> %A0,%x1,%x2" : "<pv> %A0,%x1,%x2";
+}
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<apv>"
@@ -584,10 +595,12 @@
 		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
 		    MMA_APV))]
   "TARGET_MMA"
-  "<apv> %A0,%x2,%x3"
+{
+  return TARGET_DENSE_MATH ? "dm<apv> %A0,%x2,%x3" : "<apv> %A0,%x2,%x3";
+}
   [(set_attr "type" "mma")])
 
-(define_insn "mma_<vvi4i4i8>"
+(define_insn "mma_pm<vvi4i4i8>"
   [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
 	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
@@ -596,11 +609,15 @@
 		    (match_operand:SI 5 "u8bit_cint_operand" "n,n")]
 		    MMA_VVI4I4I8))]
   "TARGET_MMA"
-  "<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5"
+{
+  return (TARGET_DENSE_MATH
+          ? "pmdm<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5"
+          : "pm<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5");
+}
   [(set_attr "type" "mma")
    (set_attr "prefixed" "yes")])
 
-(define_insn "mma_<avvi4i4i8>"
+(define_insn "mma_pm<avvi4i4i8>"
   [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
 	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
@@ -610,11 +627,15 @@
 		    (match_operand:SI 6 "u8bit_cint_operand" "n,n")]
 		    MMA_AVVI4I4I8))]
   "TARGET_MMA"
-  "<avvi4i4i8> %A0,%x2,%x3,%4,%5,%6"
+{
+  return (TARGET_DENSE_MATH
+          ? "pmdm<avvi4i4i8> %A0,%x2,%x3,%4,%5,%6"
+          : "pm<avvi4i4i8> %A0,%x2,%x3,%4,%5,%6");
+}
   [(set_attr "type" "mma")
    (set_attr "prefixed" "yes")])
 
-(define_insn "mma_<vvi4i4i2>"
+(define_insn "mma_pm<vvi4i4i2>"
   [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
 	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
@@ -623,11 +644,15 @@
 		    (match_operand:SI 5 "const_0_to_3_operand" "n,n")]
 		    MMA_VVI4I4I2))]
   "TARGET_MMA"
-  "<vvi4i4i2> %A0,%x1,%x2,%3,%4,%5"
+{
+  return (TARGET_DENSE_MATH
+          ? "pmdm<vvi4i4i2> %A0,%x1,%x2,%3,%4,%5"
+          : "pm<vvi4i4i2> %A0,%x1,%x2,%3,%4,%5");
+}
   [(set_attr "type" "mma")
    (set_attr "prefixed" "yes")])
 
-(define_insn "mma_<avvi4i4i2>"
+(define_insn "mma_pm<avvi4i4i2>"
   [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
 	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
@@ -637,11 +662,15 @@
 		    (match_operand:SI 6 "const_0_to_3_operand" "n,n")]
 		    MMA_AVVI4I4I2))]
   "TARGET_MMA"
-  "<avvi4i4i2> %A0,%x2,%x3,%4,%5,%6"
+{
+  return (TARGET_DENSE_MATH
+          ? "pmdm<avvi4i4i2> %A0,%x2,%x3,%4,%5,%6"
+          : "pm<avvi4i4i2> %A0,%x2,%x3,%4,%5,%6");
+}
   [(set_attr "type" "mma")
    (set_attr "prefixed" "yes")])
 
-(define_insn "mma_<vvi4i4>"
+(define_insn "mma_pm<vvi4i4>"
   [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
 	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
@@ -649,11 +678,15 @@
 		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")]
 		    MMA_VVI4I4))]
   "TARGET_MMA"
-  "<vvi4i4> %A0,%x1,%x2,%3,%4"
+{
+  return (TARGET_DENSE_MATH
+          ? "pmdm<vvi4i4> %A0,%x1,%x2,%3,%4"
+          : "pm<vvi4i4> %A0,%x1,%x2,%3,%4");
+}
   [(set_attr "type" "mma")
    (set_attr "prefixed" "yes")])
 
-(define_insn "mma_<avvi4i4>"
+(define_insn "mma_pm<avvi4i4>"
   [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
 	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
@@ -662,11 +695,15 @@
 		    (match_operand:SI 5 "const_0_to_15_operand" "n,n")]
 		    MMA_AVVI4I4))]
   "TARGET_MMA"
-  "<avvi4i4> %A0,%x2,%x3,%4,%5"
+{
+  return (TARGET_DENSE_MATH
+          ? "pmdm<avvi4i4> %A0,%x2,%x3,%4,%5"
+          : "pm<avvi4i4> %A0,%x2,%x3,%4,%5");
+}
   [(set_attr "type" "mma")
    (set_attr "prefixed" "yes")])
 
-(define_insn "mma_<pvi4i2>"
+(define_insn "mma_pm<pvi4i2>"
   [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
 	(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
@@ -674,11 +711,15 @@
 		    (match_operand:SI 4 "const_0_to_3_operand" "n,n")]
 		    MMA_PVI4I2))]
   "TARGET_MMA"
-  "<pvi4i2> %A0,%x1,%x2,%3,%4"
+{
+  return (TARGET_DENSE_MATH
+          ? "pmdm<pvi4i2> %A0,%x1,%x2,%3,%4"
+          : "pm<pvi4i2> %A0,%x1,%x2,%3,%4");
+}
   [(set_attr "type" "mma")
    (set_attr "prefixed" "yes")])
 
-(define_insn "mma_<apvi4i2>"
+(define_insn "mma_pm<apvi4i2>"
   [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
 	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
 		    (match_operand:OO 2 "vsx_register_operand" "v,?wa")
@@ -687,11 +728,15 @@
 		    (match_operand:SI 5 "const_0_to_3_operand" "n,n")]
 		    MMA_APVI4I2))]
   "TARGET_MMA"
-  "<apvi4i2> %A0,%x2,%x3,%4,%5"
+{
+  return (TARGET_DENSE_MATH
+          ? "pmdm<apvi4i2> %A0,%x2,%x3,%4,%5"
+          : "pm<apvi4i2> %A0,%x2,%x3,%4,%5");
+}
   [(set_attr "type" "mma")
    (set_attr "prefixed" "yes")])
 
-(define_insn "mma_<vvi4i4i4>"
+(define_insn "mma_pm<vvi4i4i4>"
   [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
 	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
@@ -700,11 +745,15 @@
 		    (match_operand:SI 5 "const_0_to_15_operand" "n,n")]
 		    MMA_VVI4I4I4))]
   "TARGET_MMA"
-  "<vvi4i4i4> %A0,%x1,%x2,%3,%4,%5"
+{
+  return (TARGET_DENSE_MATH
+          ? "pmdm<vvi4i4i4> %A0,%x1,%x2,%3,%4,%5"
+          : "pm<vvi4i4i4> %A0,%x1,%x2,%3,%4,%5");
+}
   [(set_attr "type" "mma")
    (set_attr "prefixed" "yes")])
 
-(define_insn "mma_<avvi4i4i4>"
+(define_insn "mma_pm<avvi4i4i4>"
   [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
 	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
@@ -714,6 +763,10 @@
 		    (match_operand:SI 6 "const_0_to_15_operand" "n,n")]
 		    MMA_AVVI4I4I4))]
   "TARGET_MMA"
-  "<avvi4i4i4> %A0,%x2,%x3,%4,%5,%6"
+{
+  return (TARGET_DENSE_MATH
+          ? "pmdm<avvi4i4i4> %A0,%x2,%x3,%4,%5,%6"
+          : "pm<avvi4i4i4> %A0,%x2,%x3,%4,%5,%6");
+}
   [(set_attr "type" "mma")
    (set_attr "prefixed" "yes")])

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [gcc(refs/users/meissner/heads/work161-dmf)] PowerPC: Switch to dense math names for all MMA operations.
@ 2024-03-05  7:46 Michael Meissner
  0 siblings, 0 replies; 10+ messages in thread
From: Michael Meissner @ 2024-03-05  7:46 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:1c3fac525978339e5838ebac6f9d8a31ae953d04

commit 1c3fac525978339e5838ebac6f9d8a31ae953d04
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Tue Mar 5 02:44:36 2024 -0500

    PowerPC: Switch to dense math names for all MMA operations.
    
    This patch changes the assembler instruction names for MMA instructions from
    the original name used in power10 to the new name when used with the dense math
    system.  I.e. xvf64gerpp becomes dmxvf64gerpp.  The assembler will emit the
    same bits for either spelling.
    
    The patches have been tested on both little and big endian systems.  Can I check
    it into the master branch?
    
    For the non-prefixed MMA instructions, we add a 'dm' prefix in front of the
    instruction.  However, the prefixed instructions have a 'pm' prefix, and we add
    the 'dm' prefix afterwards.  To prevent having two sets of parallel int
    attributes, we remove the "pm" prefix from the instruction string in the
    attributes, and add it later, both in the insn name and in the output template.
    
    For example, previously we had
    
      (define_int_attr vvi4i4i8 [(UNSPEC_MMA_PMXVI4GER8 "pmxvi4ger8")])
    
      ;; ...
    
      (define_insn "mma_<vvi4i4i8>"
        [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
              (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa")
                          (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
                          (match_operand:SI 3 "const_0_to_15_operand" "n,n,n")
                          (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
                          (match_operand:SI 5 "u8bit_cint_operand" "n,n,n")]
                          MMA_VVI4I4I8))]
        "TARGET_MMA"
        "<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5"
        [(set_attr "type" "mma")
         (set_attr "prefixed" "yes")
         (set_attr "isa" "dm,not_dm,not_dm")])
    
    And now we have:
    
      (define_int_attr vvi4i4i8 [(UNSPEC_MMA_PMXVI4GER8 "xvi4ger8")])
    
      ;; ...
    
      (define_insn "mma_pm<vvi4i4i8>"
        [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
              (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa")
                          (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
                          (match_operand:SI 3 "const_0_to_15_operand" "n,n,n")
                          (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
                          (match_operand:SI 5 "u8bit_cint_operand" "n,n,n")]
                          MMA_VVI4I4I8))]
        "TARGET_MMA"
        "@
         pmdm<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5
         pm<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5
         pm<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5"
        [(set_attr "type" "mma")
         (set_attr "prefixed" "yes")
         (set_attr "isa" "dm,not_dm,not_dm")])
    
    2024-03-05   Michael Meissner  <meissner@linux.ibm.com>
    
    gcc/
    
            * config/rs6000/mma.md (vvi4i4i8): Change the instruction to not have a
            "pm" prefix.
            (avvi4i4i8): Likewise.
            (vvi4i4i2): Likewise.
            (avvi4i4i2): Likewise.
            (vvi4i4): Likewise.
            (avvi4i4): Likewise.
            (pvi4i2): Likewise.
            (apvi4i2): Likewise.
            (vvi4i4i4): Likewise.
            (avvi4i4i4): Likewise.
            (mma_xxsetaccz): Add support for running on DMF systems, generating the
            dense math instruction and using the dense math accumulators.
            (mma_<vv>): Likewise.
            (mma_<pv>): Likewise.
            (mma_<avv>): Likewise.
            (mma_<apv>): Likewise.
            (mma_pm<vvi4i4i8>): Add support for running on DMF systems, generating
            the dense math instruction and using the dense math accumulators.
            Rename the insn with a 'pm' prefix and add either 'pm' or 'pmdm'
            prefixes based on whether we have the original MMA specification or if
            we have dense math support.
            (mma_pm<avvi4i4i8>): Likewise.
            (mma_pm<vvi4i4i2>): Likewise.
            (mma_pm<avvi4i4i2>): Likewise.
            (mma_pm<vvi4i4>): Likewise.
            (mma_pm<avvi4i4): Likewise.
            (mma_pm<pvi4i2>): Likewise.
            (mma_pm<apvi4i2): Likewise.
            (mma_pm<vvi4i4i4>): Likewise.
            (mma_pm<avvi4i4i4>): Likewise.
    
    gcc/testsuite/
    
            * gcc.target/powerpc/dm-double-test.c: New test.
            * lib/target-supports.exp (check_effective_target_ppc_dmr_ok): New
            target test.

Diff:
---
 gcc/config/rs6000/mma.md                          | 368 +++++++++++++---------
 gcc/testsuite/gcc.target/powerpc/dm-double-test.c | 194 ++++++++++++
 gcc/testsuite/lib/target-supports.exp             |  25 +-
 3 files changed, 433 insertions(+), 154 deletions(-)

diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index 8799f4137fa..1a93c60418f 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -224,44 +224,48 @@
 				 (UNSPEC_MMA_XVF64GERNP		"xvf64gernp")
 				 (UNSPEC_MMA_XVF64GERNN		"xvf64gernn")])
 
-(define_int_attr vvi4i4i8	[(UNSPEC_MMA_PMXVI4GER8		"pmxvi4ger8")])
+;; Do not include the "pm" prefix in these instructions.  If we have MMA but we
+;; don't have dense math register support we want to issue the instruction with
+;; a "pm" prefix, but if we have dense math registers, we want to issue it with
+;; a "pmdm" prefix.  I.e. pmxvi4ger8 vs. pmdmxvi4ger8
+(define_int_attr vvi4i4i8	[(UNSPEC_MMA_PMXVI4GER8		"xvi4ger8")])
 
-(define_int_attr avvi4i4i8	[(UNSPEC_MMA_PMXVI4GER8PP	"pmxvi4ger8pp")])
+(define_int_attr avvi4i4i8	[(UNSPEC_MMA_PMXVI4GER8PP	"xvi4ger8pp")])
 
-(define_int_attr vvi4i4i2	[(UNSPEC_MMA_PMXVI16GER2	"pmxvi16ger2")
-				 (UNSPEC_MMA_PMXVI16GER2S	"pmxvi16ger2s")
-				 (UNSPEC_MMA_PMXVF16GER2	"pmxvf16ger2")
-				 (UNSPEC_MMA_PMXVBF16GER2	"pmxvbf16ger2")])
+(define_int_attr vvi4i4i2	[(UNSPEC_MMA_PMXVI16GER2	"xvi16ger2")
+				 (UNSPEC_MMA_PMXVI16GER2S	"xvi16ger2s")
+				 (UNSPEC_MMA_PMXVF16GER2	"xvf16ger2")
+				 (UNSPEC_MMA_PMXVBF16GER2	"xvbf16ger2")])
 
-(define_int_attr avvi4i4i2	[(UNSPEC_MMA_PMXVI16GER2PP	"pmxvi16ger2pp")
-				 (UNSPEC_MMA_PMXVI16GER2SPP	"pmxvi16ger2spp")
-				 (UNSPEC_MMA_PMXVF16GER2PP	"pmxvf16ger2pp")
-				 (UNSPEC_MMA_PMXVF16GER2PN	"pmxvf16ger2pn")
-				 (UNSPEC_MMA_PMXVF16GER2NP	"pmxvf16ger2np")
-				 (UNSPEC_MMA_PMXVF16GER2NN	"pmxvf16ger2nn")
-				 (UNSPEC_MMA_PMXVBF16GER2PP	"pmxvbf16ger2pp")
-				 (UNSPEC_MMA_PMXVBF16GER2PN	"pmxvbf16ger2pn")
-				 (UNSPEC_MMA_PMXVBF16GER2NP	"pmxvbf16ger2np")
-				 (UNSPEC_MMA_PMXVBF16GER2NN	"pmxvbf16ger2nn")])
+(define_int_attr avvi4i4i2	[(UNSPEC_MMA_PMXVI16GER2PP	"xvi16ger2pp")
+				 (UNSPEC_MMA_PMXVI16GER2SPP	"xvi16ger2spp")
+				 (UNSPEC_MMA_PMXVF16GER2PP	"xvf16ger2pp")
+				 (UNSPEC_MMA_PMXVF16GER2PN	"xvf16ger2pn")
+				 (UNSPEC_MMA_PMXVF16GER2NP	"xvf16ger2np")
+				 (UNSPEC_MMA_PMXVF16GER2NN	"xvf16ger2nn")
+				 (UNSPEC_MMA_PMXVBF16GER2PP	"xvbf16ger2pp")
+				 (UNSPEC_MMA_PMXVBF16GER2PN	"xvbf16ger2pn")
+				 (UNSPEC_MMA_PMXVBF16GER2NP	"xvbf16ger2np")
+				 (UNSPEC_MMA_PMXVBF16GER2NN	"xvbf16ger2nn")])
 
-(define_int_attr vvi4i4		[(UNSPEC_MMA_PMXVF32GER		"pmxvf32ger")])
+(define_int_attr vvi4i4		[(UNSPEC_MMA_PMXVF32GER		"xvf32ger")])
 
-(define_int_attr avvi4i4	[(UNSPEC_MMA_PMXVF32GERPP	"pmxvf32gerpp")
-				 (UNSPEC_MMA_PMXVF32GERPN	"pmxvf32gerpn")
-				 (UNSPEC_MMA_PMXVF32GERNP	"pmxvf32gernp")
-				 (UNSPEC_MMA_PMXVF32GERNN	"pmxvf32gernn")])
+(define_int_attr avvi4i4	[(UNSPEC_MMA_PMXVF32GERPP	"xvf32gerpp")
+				 (UNSPEC_MMA_PMXVF32GERPN	"xvf32gerpn")
+				 (UNSPEC_MMA_PMXVF32GERNP	"xvf32gernp")
+				 (UNSPEC_MMA_PMXVF32GERNN	"xvf32gernn")])
 
-(define_int_attr pvi4i2		[(UNSPEC_MMA_PMXVF64GER		"pmxvf64ger")])
+(define_int_attr pvi4i2		[(UNSPEC_MMA_PMXVF64GER		"xvf64ger")])
 
-(define_int_attr apvi4i2	[(UNSPEC_MMA_PMXVF64GERPP	"pmxvf64gerpp")
-				 (UNSPEC_MMA_PMXVF64GERPN	"pmxvf64gerpn")
-				 (UNSPEC_MMA_PMXVF64GERNP	"pmxvf64gernp")
-				 (UNSPEC_MMA_PMXVF64GERNN	"pmxvf64gernn")])
+(define_int_attr apvi4i2	[(UNSPEC_MMA_PMXVF64GERPP	"xvf64gerpp")
+				 (UNSPEC_MMA_PMXVF64GERPN	"xvf64gerpn")
+				 (UNSPEC_MMA_PMXVF64GERNP	"xvf64gernp")
+				 (UNSPEC_MMA_PMXVF64GERNN	"xvf64gernn")])
 
-(define_int_attr vvi4i4i4	[(UNSPEC_MMA_PMXVI8GER4		"pmxvi8ger4")])
+(define_int_attr vvi4i4i4	[(UNSPEC_MMA_PMXVI8GER4		"xvi8ger4")])
 
-(define_int_attr avvi4i4i4	[(UNSPEC_MMA_PMXVI8GER4PP	"pmxvi8ger4pp")
-				 (UNSPEC_MMA_PMXVI8GER4SPP	"pmxvi8ger4spp")])
+(define_int_attr avvi4i4i4	[(UNSPEC_MMA_PMXVI8GER4PP	"xvi8ger4pp")
+				 (UNSPEC_MMA_PMXVI8GER4SPP	"xvi8ger4spp")])
 
 
 ;; Vector pair support.  OOmode can only live in VSRs.
@@ -542,178 +546,236 @@
 ;; UNSPEC_VOLATILE for the non-dense math case.
 
 (define_insn "mma_xxsetaccz"
-  [(set (match_operand:XO 0 "accumulator_operand" "=wD")
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD,wD")
 	(unspec_volatile:XO [(const_int 0)]
 			    UNSPECV_MMA_XXSETACCZ))]
   "TARGET_MMA"
-  "xxsetaccz %A0"
-  [(set_attr "type" "mma")])
+  "@
+   dmsetdmrz %A0
+   xxsetaccz %A0"
+  [(set_attr "type" "mma")
+   (set_attr "isa" "dm,not_dm")])
 
 (define_insn "mma_<vv>"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
-	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
-		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD")
+	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")]
 		    MMA_VV))]
   "TARGET_MMA"
-  "<vv> %A0,%x1,%x2"
-  [(set_attr "type" "mma")])
+  "@
+   dm<vv> %A0,%x1,%x2
+   <vv> %A0,%x1,%x2
+   <vv> %A0,%x1,%x2"
+  [(set_attr "type" "mma")
+   (set_attr "isa" "dm,not_dm,not_dm")])
 
 (define_insn "mma_<avv>"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
-	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
-		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
-		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa")]
 		    MMA_AVV))]
   "TARGET_MMA"
-  "<avv> %A0,%x2,%x3"
-  [(set_attr "type" "mma")])
+  "@
+   dm<avv> %A0,%x2,%x3
+   <avv> %A0,%x2,%x3
+   <avv> %A0,%x2,%x3"
+  [(set_attr "type" "mma")
+   (set_attr "isa" "dm,not_dm,not_dm")])
 
 (define_insn "mma_<pv>"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
-	(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa")
-		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD")
+	(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")]
 		    MMA_PV))]
   "TARGET_MMA"
-  "<pv> %A0,%x1,%x2"
-  [(set_attr "type" "mma")])
+  "@
+   dm<pv> %A0,%x1,%x2
+   <pv> %A0,%x1,%x2
+   <pv> %A0,%x1,%x2"
+  [(set_attr "type" "mma")
+   (set_attr "isa" "dm,not_dm,not_dm")])
 
 (define_insn "mma_<apv>"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
-	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
-		    (match_operand:OO 2 "vsx_register_operand" "v,?wa")
-		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0")
+		    (match_operand:OO 2 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa")]
 		    MMA_APV))]
   "TARGET_MMA"
-  "<apv> %A0,%x2,%x3"
-  [(set_attr "type" "mma")])
-
-(define_insn "mma_<vvi4i4i8>"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
-	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
-		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
-		    (match_operand:SI 3 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 5 "u8bit_cint_operand" "n,n")]
+  "@
+   dm<apv> %A0,%x2,%x3
+   <apv> %A0,%x2,%x3
+   <apv> %A0,%x2,%x3"
+  [(set_attr "type" "mma")
+   (set_attr "isa" "dm,not_dm,not_dm")])
+
+(define_insn "mma_pm<vvi4i4i8>"
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD")
+	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:SI 3 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 5 "u8bit_cint_operand" "n,n,n")]
 		    MMA_VVI4I4I8))]
   "TARGET_MMA"
-  "<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5"
+  "@
+   pmdm<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5
+   pm<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5
+   pm<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5"
   [(set_attr "type" "mma")
-   (set_attr "prefixed" "yes")])
-
-(define_insn "mma_<avvi4i4i8>"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
-	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
-		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
-		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
-		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 5 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 6 "u8bit_cint_operand" "n,n")]
+   (set_attr "prefixed" "yes")
+   (set_attr "isa" "dm,not_dm,not_dm")])
+
+(define_insn "mma_pm<avvi4i4i8>"
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 5 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 6 "u8bit_cint_operand" "n,n,n")]
 		    MMA_AVVI4I4I8))]
   "TARGET_MMA"
-  "<avvi4i4i8> %A0,%x2,%x3,%4,%5,%6"
+  "@
+   pmdm<avvi4i4i8> %A0,%x2,%x3,%4,%5,%6
+   pm<avvi4i4i8> %A0,%x2,%x3,%4,%5,%6
+   pm<avvi4i4i8> %A0,%x2,%x3,%4,%5,%6"
   [(set_attr "type" "mma")
-   (set_attr "prefixed" "yes")])
-
-(define_insn "mma_<vvi4i4i2>"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
-	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
-		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
-		    (match_operand:SI 3 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 5 "const_0_to_3_operand" "n,n")]
+   (set_attr "prefixed" "yes")
+   (set_attr "isa" "dm,not_dm,not_dm")])
+
+(define_insn "mma_pm<vvi4i4i2>"
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD")
+	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:SI 3 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 5 "const_0_to_3_operand" "n,n,n")]
 		    MMA_VVI4I4I2))]
   "TARGET_MMA"
-  "<vvi4i4i2> %A0,%x1,%x2,%3,%4,%5"
+  "@
+   pmdm<vvi4i4i2> %A0,%x1,%x2,%3,%4,%5
+   pm<vvi4i4i2> %A0,%x1,%x2,%3,%4,%5
+   pm<vvi4i4i2> %A0,%x1,%x2,%3,%4,%5"
   [(set_attr "type" "mma")
    (set_attr "prefixed" "yes")])
 
-(define_insn "mma_<avvi4i4i2>"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
-	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
-		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
-		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
-		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 5 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 6 "const_0_to_3_operand" "n,n")]
+(define_insn "mma_pm<avvi4i4i2>"
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 5 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 6 "const_0_to_3_operand" "n,n,n")]
 		    MMA_AVVI4I4I2))]
   "TARGET_MMA"
-  "<avvi4i4i2> %A0,%x2,%x3,%4,%5,%6"
+  "
+   pmdm<avvi4i4i2> %A0,%x2,%x3,%4,%5,%6
+   pm<avvi4i4i2> %A0,%x2,%x3,%4,%5,%6
+   pm<avvi4i4i2> %A0,%x2,%x3,%4,%5,%6"
   [(set_attr "type" "mma")
-   (set_attr "prefixed" "yes")])
-
-(define_insn "mma_<vvi4i4>"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
-	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
-		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
-		    (match_operand:SI 3 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")]
+   (set_attr "prefixed" "yes")
+   (set_attr "isa" "dm,not_dm,not_dm")])
+
+(define_insn "mma_pm<vvi4i4>"
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD")
+	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:SI 3 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")]
 		    MMA_VVI4I4))]
   "TARGET_MMA"
-  "<vvi4i4> %A0,%x1,%x2,%3,%4"
+  "@
+   pmdm<vvi4i4> %A0,%x1,%x2,%3,%4
+   pm<vvi4i4> %A0,%x1,%x2,%3,%4
+   pm<vvi4i4> %A0,%x1,%x2,%3,%4"
   [(set_attr "type" "mma")
-   (set_attr "prefixed" "yes")])
-
-(define_insn "mma_<avvi4i4>"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
-	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
-		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
-		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
-		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 5 "const_0_to_15_operand" "n,n")]
+   (set_attr "prefixed" "yes")
+   (set_attr "isa" "dm,not_dm,not_dm")])
+
+(define_insn "mma_pm<avvi4i4>"
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 5 "const_0_to_15_operand" "n,n,n")]
 		    MMA_AVVI4I4))]
   "TARGET_MMA"
-  "<avvi4i4> %A0,%x2,%x3,%4,%5"
+  "@
+   pmdm<avvi4i4> %A0,%x2,%x3,%4,%5
+   pm<avvi4i4> %A0,%x2,%x3,%4,%5
+   pm<avvi4i4> %A0,%x2,%x3,%4,%5"
   [(set_attr "type" "mma")
-   (set_attr "prefixed" "yes")])
-
-(define_insn "mma_<pvi4i2>"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
-	(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa")
-		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
-		    (match_operand:SI 3 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 4 "const_0_to_3_operand" "n,n")]
+   (set_attr "prefixed" "yes")
+   (set_attr "isa" "dm,not_dm,not_dm")])
+
+(define_insn "mma_pm<pvi4i2>"
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD")
+	(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:SI 3 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 4 "const_0_to_3_operand" "n,n,n")]
 		    MMA_PVI4I2))]
   "TARGET_MMA"
-  "<pvi4i2> %A0,%x1,%x2,%3,%4"
+  "@
+   pmdm<pvi4i2> %A0,%x1,%x2,%3,%4
+   pm<pvi4i2> %A0,%x1,%x2,%3,%4
+   pm<pvi4i2> %A0,%x1,%x2,%3,%4"
   [(set_attr "type" "mma")
-   (set_attr "prefixed" "yes")])
-
-(define_insn "mma_<apvi4i2>"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
-	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
-		    (match_operand:OO 2 "vsx_register_operand" "v,?wa")
-		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
-		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 5 "const_0_to_3_operand" "n,n")]
+   (set_attr "prefixed" "yes")
+   (set_attr "isa" "dm,not_dm,not_dm")])
+
+(define_insn "mma_pm<apvi4i2>"
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0")
+		    (match_operand:OO 2 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 5 "const_0_to_3_operand" "n,n,n")]
 		    MMA_APVI4I2))]
   "TARGET_MMA"
-  "<apvi4i2> %A0,%x2,%x3,%4,%5"
+  "@
+   pmdm<apvi4i2> %A0,%x2,%x3,%4,%5
+   pm<apvi4i2> %A0,%x2,%x3,%4,%5
+   pm<apvi4i2> %A0,%x2,%x3,%4,%5"
   [(set_attr "type" "mma")
-   (set_attr "prefixed" "yes")])
-
-(define_insn "mma_<vvi4i4i4>"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
-	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
-		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
-		    (match_operand:SI 3 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 5 "const_0_to_15_operand" "n,n")]
+   (set_attr "prefixed" "yes")
+   (set_attr "isa" "dm,not_dm,not_dm")])
+
+(define_insn "mma_pm<vvi4i4i4>"
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD")
+	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:SI 3 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 5 "const_0_to_15_operand" "n,n,n")]
 		    MMA_VVI4I4I4))]
   "TARGET_MMA"
-  "<vvi4i4i4> %A0,%x1,%x2,%3,%4,%5"
+  "@
+   pmdm<vvi4i4i4> %A0,%x1,%x2,%3,%4,%5
+   pm<vvi4i4i4> %A0,%x1,%x2,%3,%4,%5
+   pm<vvi4i4i4> %A0,%x1,%x2,%3,%4,%5"
   [(set_attr "type" "mma")
-   (set_attr "prefixed" "yes")])
-
-(define_insn "mma_<avvi4i4i4>"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
-	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
-		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
-		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
-		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 5 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 6 "const_0_to_15_operand" "n,n")]
+   (set_attr "prefixed" "yes")
+   (set_attr "isa" "dm,not_dm,not_dm")])
+
+(define_insn "mma_pm<avvi4i4i4>"
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 5 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 6 "const_0_to_15_operand" "n,n,n")]
 		    MMA_AVVI4I4I4))]
   "TARGET_MMA"
-  "<avvi4i4i4> %A0,%x2,%x3,%4,%5,%6"
+  "@
+   pmdm<avvi4i4i4> %A0,%x2,%x3,%4,%5,%6
+   pm<avvi4i4i4> %A0,%x2,%x3,%4,%5,%6
+   pm<avvi4i4i4> %A0,%x2,%x3,%4,%5,%6"
   [(set_attr "type" "mma")
-   (set_attr "prefixed" "yes")])
+   (set_attr "prefixed" "yes")
+   (set_attr "isa" "dm,not_dm,not_dm")])
diff --git a/gcc/testsuite/gcc.target/powerpc/dm-double-test.c b/gcc/testsuite/gcc.target/powerpc/dm-double-test.c
new file mode 100644
index 00000000000..66c19779585
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/dm-double-test.c
@@ -0,0 +1,194 @@
+/* Test derived from mma-double-1.c, modified for dense math.  */
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_dense_math_ok } */
+/* { dg-options "-mdejagnu-cpu=future -O2" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <altivec.h>
+
+typedef unsigned char vec_t __attribute__ ((vector_size (16)));
+typedef double v4sf_t __attribute__ ((vector_size (16)));
+#define SAVE_ACC(ACC, ldc, J)  \
+	  __builtin_mma_disassemble_acc (result, ACC); \
+	  rowC = (v4sf_t *) &CO[0*ldc+J]; \
+          rowC[0] += result[0]; \
+          rowC = (v4sf_t *) &CO[1*ldc+J]; \
+          rowC[0] += result[1]; \
+          rowC = (v4sf_t *) &CO[2*ldc+J]; \
+          rowC[0] += result[2]; \
+          rowC = (v4sf_t *) &CO[3*ldc+J]; \
+	  rowC[0] += result[3];
+
+void
+DM (int m, int n, int k, double *A, double *B, double *C)
+{
+  __vector_quad acc0, acc1, acc2, acc3, acc4, acc5, acc6, acc7;
+  v4sf_t result[4];
+  v4sf_t *rowC;
+  for (int l = 0; l < n; l += 4)
+    {
+      double *CO;
+      double *AO;
+      AO = A;
+      CO = C;
+      C += m * 4;
+      for (int j = 0; j < m; j += 16)
+	{
+	  double *BO = B;
+	  __builtin_mma_xxsetaccz (&acc0);
+	  __builtin_mma_xxsetaccz (&acc1);
+	  __builtin_mma_xxsetaccz (&acc2);
+	  __builtin_mma_xxsetaccz (&acc3);
+	  __builtin_mma_xxsetaccz (&acc4);
+	  __builtin_mma_xxsetaccz (&acc5);
+	  __builtin_mma_xxsetaccz (&acc6);
+	  __builtin_mma_xxsetaccz (&acc7);
+	  unsigned long i;
+
+	  for (i = 0; i < k; i++)
+	    {
+	      vec_t *rowA = (vec_t *) & AO[i * 16];
+	      __vector_pair rowB;
+	      vec_t *rb = (vec_t *) & BO[i * 4];
+	      __builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
+	      __builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
+	      __builtin_mma_xvf64gerpp (&acc1, rowB, rowA[1]);
+	      __builtin_mma_xvf64gerpp (&acc2, rowB, rowA[2]);
+	      __builtin_mma_xvf64gerpp (&acc3, rowB, rowA[3]);
+	      __builtin_mma_xvf64gerpp (&acc4, rowB, rowA[4]);
+	      __builtin_mma_xvf64gerpp (&acc5, rowB, rowA[5]);
+	      __builtin_mma_xvf64gerpp (&acc6, rowB, rowA[6]);
+	      __builtin_mma_xvf64gerpp (&acc7, rowB, rowA[7]);
+	    }
+	  SAVE_ACC (&acc0, m, 0);
+	  SAVE_ACC (&acc2, m, 4);
+	  SAVE_ACC (&acc1, m, 2);
+	  SAVE_ACC (&acc3, m, 6);
+	  SAVE_ACC (&acc4, m, 8);
+	  SAVE_ACC (&acc6, m, 12);
+	  SAVE_ACC (&acc5, m, 10);
+	  SAVE_ACC (&acc7, m, 14);
+	  AO += k * 16;
+	  BO += k * 4;
+	  CO += 16;
+	}
+      B += k * 4;
+    }
+}
+
+void
+init (double *matrix, int row, int column)
+{
+  for (int j = 0; j < column; j++)
+    {
+      for (int i = 0; i < row; i++)
+	{
+	  matrix[j * row + i] = (i * 16 + 2 + j) / 0.123;
+	}
+    }
+}
+
+void
+init0 (double *matrix, double *matrix1, int row, int column)
+{
+  for (int j = 0; j < column; j++)
+    for (int i = 0; i < row; i++)
+      matrix[j * row + i] = matrix1[j * row + i] = 0;
+}
+
+
+void
+print (const char *name, const double *matrix, int row, int column)
+{
+  printf ("Matrix %s has %d rows and %d columns:\n", name, row, column);
+  for (int i = 0; i < row; i++)
+    {
+      for (int j = 0; j < column; j++)
+	{
+	  printf ("%f ", matrix[j * row + i]);
+	}
+      printf ("\n");
+    }
+  printf ("\n");
+}
+
+int
+main (int argc, char *argv[])
+{
+  int rowsA, colsB, common;
+  int i, j, k;
+  int ret = 0;
+
+  for (int t = 16; t <= 128; t += 16)
+    {
+      for (int t1 = 4; t1 <= 16; t1 += 4)
+	{
+	  rowsA = t;
+	  colsB = t1;
+	  common = 1;
+	  /* printf ("Running test for rows = %d,cols = %d\n", t, t1); */
+	  double A[rowsA * common];
+	  double B[common * colsB];
+	  double C[rowsA * colsB];
+	  double D[rowsA * colsB];
+
+
+	  init (A, rowsA, common);
+	  init (B, common, colsB);
+	  init0 (C, D, rowsA, colsB);
+	  DM (rowsA, colsB, common, A, B, C);
+
+	  for (i = 0; i < colsB; i++)
+	    {
+	      for (j = 0; j < rowsA; j++)
+		{
+		  D[i * rowsA + j] = 0;
+		  for (k = 0; k < common; k++)
+		    {
+		      D[i * rowsA + j] +=
+			A[k * rowsA + j] * B[k + common * i];
+		    }
+		}
+	    }
+	  for (i = 0; i < colsB; i++)
+	    {
+	      for (j = 0; j < rowsA; j++)
+		{
+		  for (k = 0; k < common; k++)
+		    {
+		      if (D[i * rowsA + j] != C[i * rowsA + j])
+			{
+			  printf ("Error %d,%d,%d\n",i,j,k);
+			  ret++;
+			}
+		    }
+		}
+	    }
+	  if (ret)
+	    {
+	      print ("A", A, rowsA, common);
+	      print ("B", B, common, colsB);
+	      print ("C", C, rowsA, colsB);
+	      print ("D", D, rowsA, colsB);
+	    }
+	}
+    }
+  
+#ifdef VERBOSE
+  if (ret)
+    printf ("DM double test fail: %d errors\n",ret);
+  else
+    printf ("DM double test success: 0 DM errors\n");
+#else
+  if (ret)
+    abort();
+#endif
+      
+  return ret;
+}
+
+/* { dg-final { scan-assembler {\mdmsetdmrz\M}      } } */
+/* { dg-final { scan-assembler {\mdmxvf64gerpp\M}   } } */
+/* { dg-final { scan-assembler {\mdmxxextfdmr512\M} } } */
+
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index e23d3ec8b3c..a46447efbee 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -7117,7 +7117,30 @@ proc check_effective_target_power11_ok { } {
 	    }
 	} "-mcpu=power11"]
     } else {
-	return 0
+	return 0;
+    }
+}
+
+# Return 1 if this is a PowerPC target supporting -mcpu=future which enables
+# the dense math operations.
+proc check_effective_target_powerpc_dense_math_ok { } {
+    if { ([istarget powerpc*-*-*]) } {
+	return [check_no_compiler_messages powerpc_dense_math_ok object {
+	    __vector_quad vq;
+	    int main (void) {
+		#ifndef __PPC_DMR__
+		#error "target does not have dense math support."
+		#else
+		/* Make sure we have dense math support.  */
+		  __vector_quad dmr;
+		  __asm__ ("dmsetaccz %A0" : "=wD" (dmr));
+		  vq = dmr;
+		#endif
+		return 0;
+	    }
+	} "-mcpu=future"]
+    } else {
+	return 0;
     }
 }

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [gcc(refs/users/meissner/heads/work161-dmf)] PowerPC: Switch to dense math names for all MMA operations.
@ 2024-03-05  6:00 Michael Meissner
  0 siblings, 0 replies; 10+ messages in thread
From: Michael Meissner @ 2024-03-05  6:00 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:523a88d85eb35d276d53e675049f2bb474c10d62

commit 523a88d85eb35d276d53e675049f2bb474c10d62
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Tue Mar 5 01:00:12 2024 -0500

    PowerPC: Switch to dense math names for all MMA operations.
    
    This patch changes the assembler instruction names for MMA instructions from
    the original name used in power10 to the new name when used with the dense math
    system.  I.e. xvf64gerpp becomes dmxvf64gerpp.  The assembler will emit the
    same bits for either spelling.
    
    The patches have been tested on both little and big endian systems.  Can I check
    it into the master branch?
    
    For the non-prefixed MMA instructions, we add a 'dm' prefix in front of the
    instruction.  However, the prefixed instructions have a 'pm' prefix, and we add
    the 'dm' prefix afterwards.  To prevent having two sets of parallel int
    attributes, we remove the "pm" prefix from the instruction string in the
    attributes, and add it later, both in the insn name and in the output template.
    
    For example, previously we had
    
      (define_int_attr vvi4i4i8 [(UNSPEC_MMA_PMXVI4GER8 "pmxvi4ger8")])
    
      ;; ...
    
      (define_insn "mma_<vvi4i4i8>"
        [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
              (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa")
                          (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
                          (match_operand:SI 3 "const_0_to_15_operand" "n,n,n")
                          (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
                          (match_operand:SI 5 "u8bit_cint_operand" "n,n,n")]
                          MMA_VVI4I4I8))]
        "TARGET_MMA"
        "<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5"
        [(set_attr "type" "mma")
         (set_attr "prefixed" "yes")
         (set_attr "isa" "dm,not_dm,not_dm")])
    
    And now we have:
    
      (define_int_attr vvi4i4i8 [(UNSPEC_MMA_PMXVI4GER8 "xvi4ger8")])
    
      ;; ...
    
      (define_insn "mma_pm<vvi4i4i8>"
        [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
              (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa")
                          (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
                          (match_operand:SI 3 "const_0_to_15_operand" "n,n,n")
                          (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
                          (match_operand:SI 5 "u8bit_cint_operand" "n,n,n")]
                          MMA_VVI4I4I8))]
        "TARGET_MMA"
        "@
         pmdm<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5
         pm<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5
         pm<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5"
        [(set_attr "type" "mma")
         (set_attr "prefixed" "yes")
         (set_attr "isa" "dm,not_dm,not_dm")])
    
    2024-03-05   Michael Meissner  <meissner@linux.ibm.com>
    
    gcc/
    
            * config/rs6000/mma.md (vvi4i4i8): Change the instruction to not have a
            "pm" prefix.
            (avvi4i4i8): Likewise.
            (vvi4i4i2): Likewise.
            (avvi4i4i2): Likewise.
            (vvi4i4): Likewise.
            (avvi4i4): Likewise.
            (pvi4i2): Likewise.
            (apvi4i2): Likewise.
            (vvi4i4i4): Likewise.
            (avvi4i4i4): Likewise.
            (mma_xxsetaccz): Add support for running on DMF systems, generating the
            dense math instruction and using the dense math accumulators.
            (mma_<vv>): Likewise.
            (mma_<pv>): Likewise.
            (mma_<avv>): Likewise.
            (mma_<apv>): Likewise.
            (mma_pm<vvi4i4i8>): Add support for running on DMF systems, generating
            the dense math instruction and using the dense math accumulators.
            Rename the insn with a 'pm' prefix and add either 'pm' or 'pmdm'
            prefixes based on whether we have the original MMA specification or if
            we have dense math support.
            (mma_pm<avvi4i4i8>): Likewise.
            (mma_pm<vvi4i4i2>): Likewise.
            (mma_pm<avvi4i4i2>): Likewise.
            (mma_pm<vvi4i4>): Likewise.
            (mma_pm<avvi4i4): Likewise.
            (mma_pm<pvi4i2>): Likewise.
            (mma_pm<apvi4i2): Likewise.
            (mma_pm<vvi4i4i4>): Likewise.
            (mma_pm<avvi4i4i4>): Likewise.
    
    gcc/testsuite/
    
            * gcc.target/powerpc/dm-double-test.c: New test.
            * lib/target-supports.exp (check_effective_target_ppc_dmr_ok): New
            target test.

Diff:
---
 gcc/config/rs6000/mma.md                          | 368 +++++++++++++---------
 gcc/testsuite/gcc.target/powerpc/dm-double-test.c | 194 ++++++++++++
 gcc/testsuite/lib/target-supports.exp             |  23 ++
 3 files changed, 432 insertions(+), 153 deletions(-)

diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index 8799f4137fa..1a93c60418f 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -224,44 +224,48 @@
 				 (UNSPEC_MMA_XVF64GERNP		"xvf64gernp")
 				 (UNSPEC_MMA_XVF64GERNN		"xvf64gernn")])
 
-(define_int_attr vvi4i4i8	[(UNSPEC_MMA_PMXVI4GER8		"pmxvi4ger8")])
+;; Do not include the "pm" prefix in these instructions.  If we have MMA but we
+;; don't have dense math register support we want to issue the instruction with
+;; a "pm" prefix, but if we have dense math registers, we want to issue it with
+;; a "pmdm" prefix.  I.e. pmxvi4ger8 vs. pmdmxvi4ger8
+(define_int_attr vvi4i4i8	[(UNSPEC_MMA_PMXVI4GER8		"xvi4ger8")])
 
-(define_int_attr avvi4i4i8	[(UNSPEC_MMA_PMXVI4GER8PP	"pmxvi4ger8pp")])
+(define_int_attr avvi4i4i8	[(UNSPEC_MMA_PMXVI4GER8PP	"xvi4ger8pp")])
 
-(define_int_attr vvi4i4i2	[(UNSPEC_MMA_PMXVI16GER2	"pmxvi16ger2")
-				 (UNSPEC_MMA_PMXVI16GER2S	"pmxvi16ger2s")
-				 (UNSPEC_MMA_PMXVF16GER2	"pmxvf16ger2")
-				 (UNSPEC_MMA_PMXVBF16GER2	"pmxvbf16ger2")])
+(define_int_attr vvi4i4i2	[(UNSPEC_MMA_PMXVI16GER2	"xvi16ger2")
+				 (UNSPEC_MMA_PMXVI16GER2S	"xvi16ger2s")
+				 (UNSPEC_MMA_PMXVF16GER2	"xvf16ger2")
+				 (UNSPEC_MMA_PMXVBF16GER2	"xvbf16ger2")])
 
-(define_int_attr avvi4i4i2	[(UNSPEC_MMA_PMXVI16GER2PP	"pmxvi16ger2pp")
-				 (UNSPEC_MMA_PMXVI16GER2SPP	"pmxvi16ger2spp")
-				 (UNSPEC_MMA_PMXVF16GER2PP	"pmxvf16ger2pp")
-				 (UNSPEC_MMA_PMXVF16GER2PN	"pmxvf16ger2pn")
-				 (UNSPEC_MMA_PMXVF16GER2NP	"pmxvf16ger2np")
-				 (UNSPEC_MMA_PMXVF16GER2NN	"pmxvf16ger2nn")
-				 (UNSPEC_MMA_PMXVBF16GER2PP	"pmxvbf16ger2pp")
-				 (UNSPEC_MMA_PMXVBF16GER2PN	"pmxvbf16ger2pn")
-				 (UNSPEC_MMA_PMXVBF16GER2NP	"pmxvbf16ger2np")
-				 (UNSPEC_MMA_PMXVBF16GER2NN	"pmxvbf16ger2nn")])
+(define_int_attr avvi4i4i2	[(UNSPEC_MMA_PMXVI16GER2PP	"xvi16ger2pp")
+				 (UNSPEC_MMA_PMXVI16GER2SPP	"xvi16ger2spp")
+				 (UNSPEC_MMA_PMXVF16GER2PP	"xvf16ger2pp")
+				 (UNSPEC_MMA_PMXVF16GER2PN	"xvf16ger2pn")
+				 (UNSPEC_MMA_PMXVF16GER2NP	"xvf16ger2np")
+				 (UNSPEC_MMA_PMXVF16GER2NN	"xvf16ger2nn")
+				 (UNSPEC_MMA_PMXVBF16GER2PP	"xvbf16ger2pp")
+				 (UNSPEC_MMA_PMXVBF16GER2PN	"xvbf16ger2pn")
+				 (UNSPEC_MMA_PMXVBF16GER2NP	"xvbf16ger2np")
+				 (UNSPEC_MMA_PMXVBF16GER2NN	"xvbf16ger2nn")])
 
-(define_int_attr vvi4i4		[(UNSPEC_MMA_PMXVF32GER		"pmxvf32ger")])
+(define_int_attr vvi4i4		[(UNSPEC_MMA_PMXVF32GER		"xvf32ger")])
 
-(define_int_attr avvi4i4	[(UNSPEC_MMA_PMXVF32GERPP	"pmxvf32gerpp")
-				 (UNSPEC_MMA_PMXVF32GERPN	"pmxvf32gerpn")
-				 (UNSPEC_MMA_PMXVF32GERNP	"pmxvf32gernp")
-				 (UNSPEC_MMA_PMXVF32GERNN	"pmxvf32gernn")])
+(define_int_attr avvi4i4	[(UNSPEC_MMA_PMXVF32GERPP	"xvf32gerpp")
+				 (UNSPEC_MMA_PMXVF32GERPN	"xvf32gerpn")
+				 (UNSPEC_MMA_PMXVF32GERNP	"xvf32gernp")
+				 (UNSPEC_MMA_PMXVF32GERNN	"xvf32gernn")])
 
-(define_int_attr pvi4i2		[(UNSPEC_MMA_PMXVF64GER		"pmxvf64ger")])
+(define_int_attr pvi4i2		[(UNSPEC_MMA_PMXVF64GER		"xvf64ger")])
 
-(define_int_attr apvi4i2	[(UNSPEC_MMA_PMXVF64GERPP	"pmxvf64gerpp")
-				 (UNSPEC_MMA_PMXVF64GERPN	"pmxvf64gerpn")
-				 (UNSPEC_MMA_PMXVF64GERNP	"pmxvf64gernp")
-				 (UNSPEC_MMA_PMXVF64GERNN	"pmxvf64gernn")])
+(define_int_attr apvi4i2	[(UNSPEC_MMA_PMXVF64GERPP	"xvf64gerpp")
+				 (UNSPEC_MMA_PMXVF64GERPN	"xvf64gerpn")
+				 (UNSPEC_MMA_PMXVF64GERNP	"xvf64gernp")
+				 (UNSPEC_MMA_PMXVF64GERNN	"xvf64gernn")])
 
-(define_int_attr vvi4i4i4	[(UNSPEC_MMA_PMXVI8GER4		"pmxvi8ger4")])
+(define_int_attr vvi4i4i4	[(UNSPEC_MMA_PMXVI8GER4		"xvi8ger4")])
 
-(define_int_attr avvi4i4i4	[(UNSPEC_MMA_PMXVI8GER4PP	"pmxvi8ger4pp")
-				 (UNSPEC_MMA_PMXVI8GER4SPP	"pmxvi8ger4spp")])
+(define_int_attr avvi4i4i4	[(UNSPEC_MMA_PMXVI8GER4PP	"xvi8ger4pp")
+				 (UNSPEC_MMA_PMXVI8GER4SPP	"xvi8ger4spp")])
 
 
 ;; Vector pair support.  OOmode can only live in VSRs.
@@ -542,178 +546,236 @@
 ;; UNSPEC_VOLATILE for the non-dense math case.
 
 (define_insn "mma_xxsetaccz"
-  [(set (match_operand:XO 0 "accumulator_operand" "=wD")
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD,wD")
 	(unspec_volatile:XO [(const_int 0)]
 			    UNSPECV_MMA_XXSETACCZ))]
   "TARGET_MMA"
-  "xxsetaccz %A0"
-  [(set_attr "type" "mma")])
+  "@
+   dmsetdmrz %A0
+   xxsetaccz %A0"
+  [(set_attr "type" "mma")
+   (set_attr "isa" "dm,not_dm")])
 
 (define_insn "mma_<vv>"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
-	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
-		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD")
+	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")]
 		    MMA_VV))]
   "TARGET_MMA"
-  "<vv> %A0,%x1,%x2"
-  [(set_attr "type" "mma")])
+  "@
+   dm<vv> %A0,%x1,%x2
+   <vv> %A0,%x1,%x2
+   <vv> %A0,%x1,%x2"
+  [(set_attr "type" "mma")
+   (set_attr "isa" "dm,not_dm,not_dm")])
 
 (define_insn "mma_<avv>"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
-	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
-		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
-		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa")]
 		    MMA_AVV))]
   "TARGET_MMA"
-  "<avv> %A0,%x2,%x3"
-  [(set_attr "type" "mma")])
+  "@
+   dm<avv> %A0,%x2,%x3
+   <avv> %A0,%x2,%x3
+   <avv> %A0,%x2,%x3"
+  [(set_attr "type" "mma")
+   (set_attr "isa" "dm,not_dm,not_dm")])
 
 (define_insn "mma_<pv>"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
-	(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa")
-		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD")
+	(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")]
 		    MMA_PV))]
   "TARGET_MMA"
-  "<pv> %A0,%x1,%x2"
-  [(set_attr "type" "mma")])
+  "@
+   dm<pv> %A0,%x1,%x2
+   <pv> %A0,%x1,%x2
+   <pv> %A0,%x1,%x2"
+  [(set_attr "type" "mma")
+   (set_attr "isa" "dm,not_dm,not_dm")])
 
 (define_insn "mma_<apv>"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
-	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
-		    (match_operand:OO 2 "vsx_register_operand" "v,?wa")
-		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0")
+		    (match_operand:OO 2 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa")]
 		    MMA_APV))]
   "TARGET_MMA"
-  "<apv> %A0,%x2,%x3"
-  [(set_attr "type" "mma")])
-
-(define_insn "mma_<vvi4i4i8>"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
-	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
-		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
-		    (match_operand:SI 3 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 5 "u8bit_cint_operand" "n,n")]
+  "@
+   dm<apv> %A0,%x2,%x3
+   <apv> %A0,%x2,%x3
+   <apv> %A0,%x2,%x3"
+  [(set_attr "type" "mma")
+   (set_attr "isa" "dm,not_dm,not_dm")])
+
+(define_insn "mma_pm<vvi4i4i8>"
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD")
+	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:SI 3 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 5 "u8bit_cint_operand" "n,n,n")]
 		    MMA_VVI4I4I8))]
   "TARGET_MMA"
-  "<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5"
+  "@
+   pmdm<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5
+   pm<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5
+   pm<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5"
   [(set_attr "type" "mma")
-   (set_attr "prefixed" "yes")])
-
-(define_insn "mma_<avvi4i4i8>"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
-	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
-		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
-		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
-		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 5 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 6 "u8bit_cint_operand" "n,n")]
+   (set_attr "prefixed" "yes")
+   (set_attr "isa" "dm,not_dm,not_dm")])
+
+(define_insn "mma_pm<avvi4i4i8>"
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 5 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 6 "u8bit_cint_operand" "n,n,n")]
 		    MMA_AVVI4I4I8))]
   "TARGET_MMA"
-  "<avvi4i4i8> %A0,%x2,%x3,%4,%5,%6"
+  "@
+   pmdm<avvi4i4i8> %A0,%x2,%x3,%4,%5,%6
+   pm<avvi4i4i8> %A0,%x2,%x3,%4,%5,%6
+   pm<avvi4i4i8> %A0,%x2,%x3,%4,%5,%6"
   [(set_attr "type" "mma")
-   (set_attr "prefixed" "yes")])
-
-(define_insn "mma_<vvi4i4i2>"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
-	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
-		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
-		    (match_operand:SI 3 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 5 "const_0_to_3_operand" "n,n")]
+   (set_attr "prefixed" "yes")
+   (set_attr "isa" "dm,not_dm,not_dm")])
+
+(define_insn "mma_pm<vvi4i4i2>"
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD")
+	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:SI 3 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 5 "const_0_to_3_operand" "n,n,n")]
 		    MMA_VVI4I4I2))]
   "TARGET_MMA"
-  "<vvi4i4i2> %A0,%x1,%x2,%3,%4,%5"
+  "@
+   pmdm<vvi4i4i2> %A0,%x1,%x2,%3,%4,%5
+   pm<vvi4i4i2> %A0,%x1,%x2,%3,%4,%5
+   pm<vvi4i4i2> %A0,%x1,%x2,%3,%4,%5"
   [(set_attr "type" "mma")
    (set_attr "prefixed" "yes")])
 
-(define_insn "mma_<avvi4i4i2>"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
-	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
-		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
-		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
-		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 5 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 6 "const_0_to_3_operand" "n,n")]
+(define_insn "mma_pm<avvi4i4i2>"
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 5 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 6 "const_0_to_3_operand" "n,n,n")]
 		    MMA_AVVI4I4I2))]
   "TARGET_MMA"
-  "<avvi4i4i2> %A0,%x2,%x3,%4,%5,%6"
+  "
+   pmdm<avvi4i4i2> %A0,%x2,%x3,%4,%5,%6
+   pm<avvi4i4i2> %A0,%x2,%x3,%4,%5,%6
+   pm<avvi4i4i2> %A0,%x2,%x3,%4,%5,%6"
   [(set_attr "type" "mma")
-   (set_attr "prefixed" "yes")])
-
-(define_insn "mma_<vvi4i4>"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
-	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
-		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
-		    (match_operand:SI 3 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")]
+   (set_attr "prefixed" "yes")
+   (set_attr "isa" "dm,not_dm,not_dm")])
+
+(define_insn "mma_pm<vvi4i4>"
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD")
+	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:SI 3 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")]
 		    MMA_VVI4I4))]
   "TARGET_MMA"
-  "<vvi4i4> %A0,%x1,%x2,%3,%4"
+  "@
+   pmdm<vvi4i4> %A0,%x1,%x2,%3,%4
+   pm<vvi4i4> %A0,%x1,%x2,%3,%4
+   pm<vvi4i4> %A0,%x1,%x2,%3,%4"
   [(set_attr "type" "mma")
-   (set_attr "prefixed" "yes")])
-
-(define_insn "mma_<avvi4i4>"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
-	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
-		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
-		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
-		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 5 "const_0_to_15_operand" "n,n")]
+   (set_attr "prefixed" "yes")
+   (set_attr "isa" "dm,not_dm,not_dm")])
+
+(define_insn "mma_pm<avvi4i4>"
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 5 "const_0_to_15_operand" "n,n,n")]
 		    MMA_AVVI4I4))]
   "TARGET_MMA"
-  "<avvi4i4> %A0,%x2,%x3,%4,%5"
+  "@
+   pmdm<avvi4i4> %A0,%x2,%x3,%4,%5
+   pm<avvi4i4> %A0,%x2,%x3,%4,%5
+   pm<avvi4i4> %A0,%x2,%x3,%4,%5"
   [(set_attr "type" "mma")
-   (set_attr "prefixed" "yes")])
-
-(define_insn "mma_<pvi4i2>"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
-	(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa")
-		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
-		    (match_operand:SI 3 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 4 "const_0_to_3_operand" "n,n")]
+   (set_attr "prefixed" "yes")
+   (set_attr "isa" "dm,not_dm,not_dm")])
+
+(define_insn "mma_pm<pvi4i2>"
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD")
+	(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:SI 3 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 4 "const_0_to_3_operand" "n,n,n")]
 		    MMA_PVI4I2))]
   "TARGET_MMA"
-  "<pvi4i2> %A0,%x1,%x2,%3,%4"
+  "@
+   pmdm<pvi4i2> %A0,%x1,%x2,%3,%4
+   pm<pvi4i2> %A0,%x1,%x2,%3,%4
+   pm<pvi4i2> %A0,%x1,%x2,%3,%4"
   [(set_attr "type" "mma")
-   (set_attr "prefixed" "yes")])
-
-(define_insn "mma_<apvi4i2>"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
-	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
-		    (match_operand:OO 2 "vsx_register_operand" "v,?wa")
-		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
-		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 5 "const_0_to_3_operand" "n,n")]
+   (set_attr "prefixed" "yes")
+   (set_attr "isa" "dm,not_dm,not_dm")])
+
+(define_insn "mma_pm<apvi4i2>"
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0")
+		    (match_operand:OO 2 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 5 "const_0_to_3_operand" "n,n,n")]
 		    MMA_APVI4I2))]
   "TARGET_MMA"
-  "<apvi4i2> %A0,%x2,%x3,%4,%5"
+  "@
+   pmdm<apvi4i2> %A0,%x2,%x3,%4,%5
+   pm<apvi4i2> %A0,%x2,%x3,%4,%5
+   pm<apvi4i2> %A0,%x2,%x3,%4,%5"
   [(set_attr "type" "mma")
-   (set_attr "prefixed" "yes")])
-
-(define_insn "mma_<vvi4i4i4>"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
-	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
-		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
-		    (match_operand:SI 3 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 5 "const_0_to_15_operand" "n,n")]
+   (set_attr "prefixed" "yes")
+   (set_attr "isa" "dm,not_dm,not_dm")])
+
+(define_insn "mma_pm<vvi4i4i4>"
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD")
+	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:SI 3 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 5 "const_0_to_15_operand" "n,n,n")]
 		    MMA_VVI4I4I4))]
   "TARGET_MMA"
-  "<vvi4i4i4> %A0,%x1,%x2,%3,%4,%5"
+  "@
+   pmdm<vvi4i4i4> %A0,%x1,%x2,%3,%4,%5
+   pm<vvi4i4i4> %A0,%x1,%x2,%3,%4,%5
+   pm<vvi4i4i4> %A0,%x1,%x2,%3,%4,%5"
   [(set_attr "type" "mma")
-   (set_attr "prefixed" "yes")])
-
-(define_insn "mma_<avvi4i4i4>"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
-	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
-		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
-		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
-		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 5 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 6 "const_0_to_15_operand" "n,n")]
+   (set_attr "prefixed" "yes")
+   (set_attr "isa" "dm,not_dm,not_dm")])
+
+(define_insn "mma_pm<avvi4i4i4>"
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 5 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 6 "const_0_to_15_operand" "n,n,n")]
 		    MMA_AVVI4I4I4))]
   "TARGET_MMA"
-  "<avvi4i4i4> %A0,%x2,%x3,%4,%5,%6"
+  "@
+   pmdm<avvi4i4i4> %A0,%x2,%x3,%4,%5,%6
+   pm<avvi4i4i4> %A0,%x2,%x3,%4,%5,%6
+   pm<avvi4i4i4> %A0,%x2,%x3,%4,%5,%6"
   [(set_attr "type" "mma")
-   (set_attr "prefixed" "yes")])
+   (set_attr "prefixed" "yes")
+   (set_attr "isa" "dm,not_dm,not_dm")])
diff --git a/gcc/testsuite/gcc.target/powerpc/dm-double-test.c b/gcc/testsuite/gcc.target/powerpc/dm-double-test.c
new file mode 100644
index 00000000000..66c19779585
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/dm-double-test.c
@@ -0,0 +1,194 @@
+/* Test derived from mma-double-1.c, modified for dense math.  */
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_dense_math_ok } */
+/* { dg-options "-mdejagnu-cpu=future -O2" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <altivec.h>
+
+typedef unsigned char vec_t __attribute__ ((vector_size (16)));
+typedef double v4sf_t __attribute__ ((vector_size (16)));
+#define SAVE_ACC(ACC, ldc, J)  \
+	  __builtin_mma_disassemble_acc (result, ACC); \
+	  rowC = (v4sf_t *) &CO[0*ldc+J]; \
+          rowC[0] += result[0]; \
+          rowC = (v4sf_t *) &CO[1*ldc+J]; \
+          rowC[0] += result[1]; \
+          rowC = (v4sf_t *) &CO[2*ldc+J]; \
+          rowC[0] += result[2]; \
+          rowC = (v4sf_t *) &CO[3*ldc+J]; \
+	  rowC[0] += result[3];
+
+void
+DM (int m, int n, int k, double *A, double *B, double *C)
+{
+  __vector_quad acc0, acc1, acc2, acc3, acc4, acc5, acc6, acc7;
+  v4sf_t result[4];
+  v4sf_t *rowC;
+  for (int l = 0; l < n; l += 4)
+    {
+      double *CO;
+      double *AO;
+      AO = A;
+      CO = C;
+      C += m * 4;
+      for (int j = 0; j < m; j += 16)
+	{
+	  double *BO = B;
+	  __builtin_mma_xxsetaccz (&acc0);
+	  __builtin_mma_xxsetaccz (&acc1);
+	  __builtin_mma_xxsetaccz (&acc2);
+	  __builtin_mma_xxsetaccz (&acc3);
+	  __builtin_mma_xxsetaccz (&acc4);
+	  __builtin_mma_xxsetaccz (&acc5);
+	  __builtin_mma_xxsetaccz (&acc6);
+	  __builtin_mma_xxsetaccz (&acc7);
+	  unsigned long i;
+
+	  for (i = 0; i < k; i++)
+	    {
+	      vec_t *rowA = (vec_t *) & AO[i * 16];
+	      __vector_pair rowB;
+	      vec_t *rb = (vec_t *) & BO[i * 4];
+	      __builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
+	      __builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
+	      __builtin_mma_xvf64gerpp (&acc1, rowB, rowA[1]);
+	      __builtin_mma_xvf64gerpp (&acc2, rowB, rowA[2]);
+	      __builtin_mma_xvf64gerpp (&acc3, rowB, rowA[3]);
+	      __builtin_mma_xvf64gerpp (&acc4, rowB, rowA[4]);
+	      __builtin_mma_xvf64gerpp (&acc5, rowB, rowA[5]);
+	      __builtin_mma_xvf64gerpp (&acc6, rowB, rowA[6]);
+	      __builtin_mma_xvf64gerpp (&acc7, rowB, rowA[7]);
+	    }
+	  SAVE_ACC (&acc0, m, 0);
+	  SAVE_ACC (&acc2, m, 4);
+	  SAVE_ACC (&acc1, m, 2);
+	  SAVE_ACC (&acc3, m, 6);
+	  SAVE_ACC (&acc4, m, 8);
+	  SAVE_ACC (&acc6, m, 12);
+	  SAVE_ACC (&acc5, m, 10);
+	  SAVE_ACC (&acc7, m, 14);
+	  AO += k * 16;
+	  BO += k * 4;
+	  CO += 16;
+	}
+      B += k * 4;
+    }
+}
+
+void
+init (double *matrix, int row, int column)
+{
+  for (int j = 0; j < column; j++)
+    {
+      for (int i = 0; i < row; i++)
+	{
+	  matrix[j * row + i] = (i * 16 + 2 + j) / 0.123;
+	}
+    }
+}
+
+void
+init0 (double *matrix, double *matrix1, int row, int column)
+{
+  for (int j = 0; j < column; j++)
+    for (int i = 0; i < row; i++)
+      matrix[j * row + i] = matrix1[j * row + i] = 0;
+}
+
+
+void
+print (const char *name, const double *matrix, int row, int column)
+{
+  printf ("Matrix %s has %d rows and %d columns:\n", name, row, column);
+  for (int i = 0; i < row; i++)
+    {
+      for (int j = 0; j < column; j++)
+	{
+	  printf ("%f ", matrix[j * row + i]);
+	}
+      printf ("\n");
+    }
+  printf ("\n");
+}
+
+int
+main (int argc, char *argv[])
+{
+  int rowsA, colsB, common;
+  int i, j, k;
+  int ret = 0;
+
+  for (int t = 16; t <= 128; t += 16)
+    {
+      for (int t1 = 4; t1 <= 16; t1 += 4)
+	{
+	  rowsA = t;
+	  colsB = t1;
+	  common = 1;
+	  /* printf ("Running test for rows = %d,cols = %d\n", t, t1); */
+	  double A[rowsA * common];
+	  double B[common * colsB];
+	  double C[rowsA * colsB];
+	  double D[rowsA * colsB];
+
+
+	  init (A, rowsA, common);
+	  init (B, common, colsB);
+	  init0 (C, D, rowsA, colsB);
+	  DM (rowsA, colsB, common, A, B, C);
+
+	  for (i = 0; i < colsB; i++)
+	    {
+	      for (j = 0; j < rowsA; j++)
+		{
+		  D[i * rowsA + j] = 0;
+		  for (k = 0; k < common; k++)
+		    {
+		      D[i * rowsA + j] +=
+			A[k * rowsA + j] * B[k + common * i];
+		    }
+		}
+	    }
+	  for (i = 0; i < colsB; i++)
+	    {
+	      for (j = 0; j < rowsA; j++)
+		{
+		  for (k = 0; k < common; k++)
+		    {
+		      if (D[i * rowsA + j] != C[i * rowsA + j])
+			{
+			  printf ("Error %d,%d,%d\n",i,j,k);
+			  ret++;
+			}
+		    }
+		}
+	    }
+	  if (ret)
+	    {
+	      print ("A", A, rowsA, common);
+	      print ("B", B, common, colsB);
+	      print ("C", C, rowsA, colsB);
+	      print ("D", D, rowsA, colsB);
+	    }
+	}
+    }
+  
+#ifdef VERBOSE
+  if (ret)
+    printf ("DM double test fail: %d errors\n",ret);
+  else
+    printf ("DM double test success: 0 DM errors\n");
+#else
+  if (ret)
+    abort();
+#endif
+      
+  return ret;
+}
+
+/* { dg-final { scan-assembler {\mdmsetdmrz\M}      } } */
+/* { dg-final { scan-assembler {\mdmxvf64gerpp\M}   } } */
+/* { dg-final { scan-assembler {\mdmxxextfdmr512\M} } } */
+
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index e23d3ec8b3c..54742a95142 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -7116,6 +7116,29 @@ proc check_effective_target_power11_ok { } {
 		return 0;
 	    }
 	} "-mcpu=power11"]
+    } else {
+	return 0;
+    }
+}
+
+# Return 1 if this is a PowerPC target supporting -mcpu=future which enables
+# the dense math operations.
+proc check_effective_target_powerpc_dense_math_ok { } {
+	return [check_no_compiler_messages_nocache powerpc_dense_math_ok assembly {
+		__vector_quad vq;
+		void test (void)
+		{
+		#ifndef __PPC_DMR__
+		#error "target does not have dense math support."
+		#else
+		/* Make sure we have dense math support.  */
+		  __vector_quad dmr;
+		  __asm__ ("dmsetaccz %A0" : "=wD" (dmr));
+		  vq = dmr;
+		#endif
+		return 0;
+	    }
+	} "-mcpu=power11"]
     } else {
 	return 0
     }

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [gcc(refs/users/meissner/heads/work161-dmf)] PowerPC: Switch to dense math names for all MMA operations.
@ 2024-03-05  5:42 Michael Meissner
  0 siblings, 0 replies; 10+ messages in thread
From: Michael Meissner @ 2024-03-05  5:42 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:388a5b747ceb76aa124b634280fd8ba30ae7dd5e

commit 388a5b747ceb76aa124b634280fd8ba30ae7dd5e
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Tue Mar 5 00:41:38 2024 -0500

    PowerPC: Switch to dense math names for all MMA operations.
    
    This patch changes the assembler instruction names for MMA instructions from
    the original name used in power10 to the new name when used with the dense math
    system.  I.e. xvf64gerpp becomes dmxvf64gerpp.  The assembler will emit the
    same bits for either spelling.
    
    The patches have been tested on both little and big endian systems.  Can I check
    it into the master branch?
    
    For the non-prefixed MMA instructions, we add a 'dm' prefix in front of the
    instruction.  However, the prefixed instructions have a 'pm' prefix, and we add
    the 'dm' prefix afterwards.  To prevent having two sets of parallel int
    attributes, we remove the "pm" prefix from the instruction string in the
    attributes, and add it later, both in the insn name and in the output template.
    
    For example, previously we had
    
      (define_int_attr vvi4i4i8 [(UNSPEC_MMA_PMXVI4GER8 "pmxvi4ger8")])
    
      ;; ...
    
      (define_insn "mma_<vvi4i4i8>"
        [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
              (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa")
                          (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
                          (match_operand:SI 3 "const_0_to_15_operand" "n,n,n")
                          (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
                          (match_operand:SI 5 "u8bit_cint_operand" "n,n,n")]
                          MMA_VVI4I4I8))]
        "TARGET_MMA"
        "<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5"
        [(set_attr "type" "mma")
         (set_attr "prefixed" "yes")
         (set_attr "isa" "dm,not_dm,not_dm")])
    
    And now we have:
    
      (define_int_attr vvi4i4i8 [(UNSPEC_MMA_PMXVI4GER8 "xvi4ger8")])
    
      ;; ...
    
      (define_insn "mma_pm<vvi4i4i8>"
        [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
              (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa")
                          (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
                          (match_operand:SI 3 "const_0_to_15_operand" "n,n,n")
                          (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
                          (match_operand:SI 5 "u8bit_cint_operand" "n,n,n")]
                          MMA_VVI4I4I8))]
        "TARGET_MMA"
        "@
         pmdm<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5
         pm<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5
         pm<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5"
        [(set_attr "type" "mma")
         (set_attr "prefixed" "yes")
         (set_attr "isa" "dm,not_dm,not_dm")])
    
    2024-03-05   Michael Meissner  <meissner@linux.ibm.com>
    
    gcc/
    
            * config/rs6000/mma.md (vvi4i4i8): Change the instruction to not have a
            "pm" prefix.
            (avvi4i4i8): Likewise.
            (vvi4i4i2): Likewise.
            (avvi4i4i2): Likewise.
            (vvi4i4): Likewise.
            (avvi4i4): Likewise.
            (pvi4i2): Likewise.
            (apvi4i2): Likewise.
            (vvi4i4i4): Likewise.
            (avvi4i4i4): Likewise.
            (mma_xxsetaccz): Add support for running on DMF systems, generating the
            dense math instruction and using the dense math accumulators.
            (mma_<vv>): Likewise.
            (mma_<pv>): Likewise.
            (mma_<avv>): Likewise.
            (mma_<apv>): Likewise.
            (mma_pm<vvi4i4i8>): Add support for running on DMF systems, generating
            the dense math instruction and using the dense math accumulators.
            Rename the insn with a 'pm' prefix and add either 'pm' or 'pmdm'
            prefixes based on whether we have the original MMA specification or if
            we have dense math support.
            (mma_pm<avvi4i4i8>): Likewise.
            (mma_pm<vvi4i4i2>): Likewise.
            (mma_pm<avvi4i4i2>): Likewise.
            (mma_pm<vvi4i4>): Likewise.
            (mma_pm<avvi4i4): Likewise.
            (mma_pm<pvi4i2>): Likewise.
            (mma_pm<apvi4i2): Likewise.
            (mma_pm<vvi4i4i4>): Likewise.
            (mma_pm<avvi4i4i4>): Likewise.
    
    gcc/testsuite/
    
            * gcc.target/powerpc/dm-double-test.c: New test.
            * lib/target-supports.exp (check_effective_target_ppc_dmr_ok): New
            target test.

Diff:
---
 gcc/config/rs6000/mma.md                          | 368 +++++++++++++---------
 gcc/testsuite/gcc.target/powerpc/dm-double-test.c | 194 ++++++++++++
 gcc/testsuite/lib/target-supports.exp             |  23 ++
 3 files changed, 432 insertions(+), 153 deletions(-)

diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index 8799f4137fa..1a93c60418f 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -224,44 +224,48 @@
 				 (UNSPEC_MMA_XVF64GERNP		"xvf64gernp")
 				 (UNSPEC_MMA_XVF64GERNN		"xvf64gernn")])
 
-(define_int_attr vvi4i4i8	[(UNSPEC_MMA_PMXVI4GER8		"pmxvi4ger8")])
+;; Do not include the "pm" prefix in these instructions.  If we have MMA but we
+;; don't have dense math register support we want to issue the instruction with
+;; a "pm" prefix, but if we have dense math registers, we want to issue it with
+;; a "pmdm" prefix.  I.e. pmxvi4ger8 vs. pmdmxvi4ger8
+(define_int_attr vvi4i4i8	[(UNSPEC_MMA_PMXVI4GER8		"xvi4ger8")])
 
-(define_int_attr avvi4i4i8	[(UNSPEC_MMA_PMXVI4GER8PP	"pmxvi4ger8pp")])
+(define_int_attr avvi4i4i8	[(UNSPEC_MMA_PMXVI4GER8PP	"xvi4ger8pp")])
 
-(define_int_attr vvi4i4i2	[(UNSPEC_MMA_PMXVI16GER2	"pmxvi16ger2")
-				 (UNSPEC_MMA_PMXVI16GER2S	"pmxvi16ger2s")
-				 (UNSPEC_MMA_PMXVF16GER2	"pmxvf16ger2")
-				 (UNSPEC_MMA_PMXVBF16GER2	"pmxvbf16ger2")])
+(define_int_attr vvi4i4i2	[(UNSPEC_MMA_PMXVI16GER2	"xvi16ger2")
+				 (UNSPEC_MMA_PMXVI16GER2S	"xvi16ger2s")
+				 (UNSPEC_MMA_PMXVF16GER2	"xvf16ger2")
+				 (UNSPEC_MMA_PMXVBF16GER2	"xvbf16ger2")])
 
-(define_int_attr avvi4i4i2	[(UNSPEC_MMA_PMXVI16GER2PP	"pmxvi16ger2pp")
-				 (UNSPEC_MMA_PMXVI16GER2SPP	"pmxvi16ger2spp")
-				 (UNSPEC_MMA_PMXVF16GER2PP	"pmxvf16ger2pp")
-				 (UNSPEC_MMA_PMXVF16GER2PN	"pmxvf16ger2pn")
-				 (UNSPEC_MMA_PMXVF16GER2NP	"pmxvf16ger2np")
-				 (UNSPEC_MMA_PMXVF16GER2NN	"pmxvf16ger2nn")
-				 (UNSPEC_MMA_PMXVBF16GER2PP	"pmxvbf16ger2pp")
-				 (UNSPEC_MMA_PMXVBF16GER2PN	"pmxvbf16ger2pn")
-				 (UNSPEC_MMA_PMXVBF16GER2NP	"pmxvbf16ger2np")
-				 (UNSPEC_MMA_PMXVBF16GER2NN	"pmxvbf16ger2nn")])
+(define_int_attr avvi4i4i2	[(UNSPEC_MMA_PMXVI16GER2PP	"xvi16ger2pp")
+				 (UNSPEC_MMA_PMXVI16GER2SPP	"xvi16ger2spp")
+				 (UNSPEC_MMA_PMXVF16GER2PP	"xvf16ger2pp")
+				 (UNSPEC_MMA_PMXVF16GER2PN	"xvf16ger2pn")
+				 (UNSPEC_MMA_PMXVF16GER2NP	"xvf16ger2np")
+				 (UNSPEC_MMA_PMXVF16GER2NN	"xvf16ger2nn")
+				 (UNSPEC_MMA_PMXVBF16GER2PP	"xvbf16ger2pp")
+				 (UNSPEC_MMA_PMXVBF16GER2PN	"xvbf16ger2pn")
+				 (UNSPEC_MMA_PMXVBF16GER2NP	"xvbf16ger2np")
+				 (UNSPEC_MMA_PMXVBF16GER2NN	"xvbf16ger2nn")])
 
-(define_int_attr vvi4i4		[(UNSPEC_MMA_PMXVF32GER		"pmxvf32ger")])
+(define_int_attr vvi4i4		[(UNSPEC_MMA_PMXVF32GER		"xvf32ger")])
 
-(define_int_attr avvi4i4	[(UNSPEC_MMA_PMXVF32GERPP	"pmxvf32gerpp")
-				 (UNSPEC_MMA_PMXVF32GERPN	"pmxvf32gerpn")
-				 (UNSPEC_MMA_PMXVF32GERNP	"pmxvf32gernp")
-				 (UNSPEC_MMA_PMXVF32GERNN	"pmxvf32gernn")])
+(define_int_attr avvi4i4	[(UNSPEC_MMA_PMXVF32GERPP	"xvf32gerpp")
+				 (UNSPEC_MMA_PMXVF32GERPN	"xvf32gerpn")
+				 (UNSPEC_MMA_PMXVF32GERNP	"xvf32gernp")
+				 (UNSPEC_MMA_PMXVF32GERNN	"xvf32gernn")])
 
-(define_int_attr pvi4i2		[(UNSPEC_MMA_PMXVF64GER		"pmxvf64ger")])
+(define_int_attr pvi4i2		[(UNSPEC_MMA_PMXVF64GER		"xvf64ger")])
 
-(define_int_attr apvi4i2	[(UNSPEC_MMA_PMXVF64GERPP	"pmxvf64gerpp")
-				 (UNSPEC_MMA_PMXVF64GERPN	"pmxvf64gerpn")
-				 (UNSPEC_MMA_PMXVF64GERNP	"pmxvf64gernp")
-				 (UNSPEC_MMA_PMXVF64GERNN	"pmxvf64gernn")])
+(define_int_attr apvi4i2	[(UNSPEC_MMA_PMXVF64GERPP	"xvf64gerpp")
+				 (UNSPEC_MMA_PMXVF64GERPN	"xvf64gerpn")
+				 (UNSPEC_MMA_PMXVF64GERNP	"xvf64gernp")
+				 (UNSPEC_MMA_PMXVF64GERNN	"xvf64gernn")])
 
-(define_int_attr vvi4i4i4	[(UNSPEC_MMA_PMXVI8GER4		"pmxvi8ger4")])
+(define_int_attr vvi4i4i4	[(UNSPEC_MMA_PMXVI8GER4		"xvi8ger4")])
 
-(define_int_attr avvi4i4i4	[(UNSPEC_MMA_PMXVI8GER4PP	"pmxvi8ger4pp")
-				 (UNSPEC_MMA_PMXVI8GER4SPP	"pmxvi8ger4spp")])
+(define_int_attr avvi4i4i4	[(UNSPEC_MMA_PMXVI8GER4PP	"xvi8ger4pp")
+				 (UNSPEC_MMA_PMXVI8GER4SPP	"xvi8ger4spp")])
 
 
 ;; Vector pair support.  OOmode can only live in VSRs.
@@ -542,178 +546,236 @@
 ;; UNSPEC_VOLATILE for the non-dense math case.
 
 (define_insn "mma_xxsetaccz"
-  [(set (match_operand:XO 0 "accumulator_operand" "=wD")
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD,wD")
 	(unspec_volatile:XO [(const_int 0)]
 			    UNSPECV_MMA_XXSETACCZ))]
   "TARGET_MMA"
-  "xxsetaccz %A0"
-  [(set_attr "type" "mma")])
+  "@
+   dmsetdmrz %A0
+   xxsetaccz %A0"
+  [(set_attr "type" "mma")
+   (set_attr "isa" "dm,not_dm")])
 
 (define_insn "mma_<vv>"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
-	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
-		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD")
+	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")]
 		    MMA_VV))]
   "TARGET_MMA"
-  "<vv> %A0,%x1,%x2"
-  [(set_attr "type" "mma")])
+  "@
+   dm<vv> %A0,%x1,%x2
+   <vv> %A0,%x1,%x2
+   <vv> %A0,%x1,%x2"
+  [(set_attr "type" "mma")
+   (set_attr "isa" "dm,not_dm,not_dm")])
 
 (define_insn "mma_<avv>"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
-	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
-		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
-		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa")]
 		    MMA_AVV))]
   "TARGET_MMA"
-  "<avv> %A0,%x2,%x3"
-  [(set_attr "type" "mma")])
+  "@
+   dm<avv> %A0,%x2,%x3
+   <avv> %A0,%x2,%x3
+   <avv> %A0,%x2,%x3"
+  [(set_attr "type" "mma")
+   (set_attr "isa" "dm,not_dm,not_dm")])
 
 (define_insn "mma_<pv>"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
-	(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa")
-		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD")
+	(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")]
 		    MMA_PV))]
   "TARGET_MMA"
-  "<pv> %A0,%x1,%x2"
-  [(set_attr "type" "mma")])
+  "@
+   dm<pv> %A0,%x1,%x2
+   <pv> %A0,%x1,%x2
+   <pv> %A0,%x1,%x2"
+  [(set_attr "type" "mma")
+   (set_attr "isa" "dm,not_dm,not_dm")])
 
 (define_insn "mma_<apv>"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
-	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
-		    (match_operand:OO 2 "vsx_register_operand" "v,?wa")
-		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0")
+		    (match_operand:OO 2 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa")]
 		    MMA_APV))]
   "TARGET_MMA"
-  "<apv> %A0,%x2,%x3"
-  [(set_attr "type" "mma")])
-
-(define_insn "mma_<vvi4i4i8>"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
-	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
-		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
-		    (match_operand:SI 3 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 5 "u8bit_cint_operand" "n,n")]
+  "@
+   dm<apv> %A0,%x2,%x3
+   <apv> %A0,%x2,%x3
+   <apv> %A0,%x2,%x3"
+  [(set_attr "type" "mma")
+   (set_attr "isa" "dm,not_dm,not_dm")])
+
+(define_insn "mma_pm<vvi4i4i8>"
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD")
+	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:SI 3 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 5 "u8bit_cint_operand" "n,n,n")]
 		    MMA_VVI4I4I8))]
   "TARGET_MMA"
-  "<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5"
+  "@
+   pmdm<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5
+   pm<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5
+   pm<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5"
   [(set_attr "type" "mma")
-   (set_attr "prefixed" "yes")])
-
-(define_insn "mma_<avvi4i4i8>"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
-	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
-		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
-		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
-		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 5 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 6 "u8bit_cint_operand" "n,n")]
+   (set_attr "prefixed" "yes")
+   (set_attr "isa" "dm,not_dm,not_dm")])
+
+(define_insn "mma_pm<avvi4i4i8>"
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 5 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 6 "u8bit_cint_operand" "n,n,n")]
 		    MMA_AVVI4I4I8))]
   "TARGET_MMA"
-  "<avvi4i4i8> %A0,%x2,%x3,%4,%5,%6"
+  "@
+   pmdm<avvi4i4i8> %A0,%x2,%x3,%4,%5,%6
+   pm<avvi4i4i8> %A0,%x2,%x3,%4,%5,%6
+   pm<avvi4i4i8> %A0,%x2,%x3,%4,%5,%6"
   [(set_attr "type" "mma")
-   (set_attr "prefixed" "yes")])
-
-(define_insn "mma_<vvi4i4i2>"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
-	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
-		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
-		    (match_operand:SI 3 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 5 "const_0_to_3_operand" "n,n")]
+   (set_attr "prefixed" "yes")
+   (set_attr "isa" "dm,not_dm,not_dm")])
+
+(define_insn "mma_pm<vvi4i4i2>"
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD")
+	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:SI 3 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 5 "const_0_to_3_operand" "n,n,n")]
 		    MMA_VVI4I4I2))]
   "TARGET_MMA"
-  "<vvi4i4i2> %A0,%x1,%x2,%3,%4,%5"
+  "@
+   pmdm<vvi4i4i2> %A0,%x1,%x2,%3,%4,%5
+   pm<vvi4i4i2> %A0,%x1,%x2,%3,%4,%5
+   pm<vvi4i4i2> %A0,%x1,%x2,%3,%4,%5"
   [(set_attr "type" "mma")
    (set_attr "prefixed" "yes")])
 
-(define_insn "mma_<avvi4i4i2>"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
-	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
-		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
-		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
-		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 5 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 6 "const_0_to_3_operand" "n,n")]
+(define_insn "mma_pm<avvi4i4i2>"
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 5 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 6 "const_0_to_3_operand" "n,n,n")]
 		    MMA_AVVI4I4I2))]
   "TARGET_MMA"
-  "<avvi4i4i2> %A0,%x2,%x3,%4,%5,%6"
+  "
+   pmdm<avvi4i4i2> %A0,%x2,%x3,%4,%5,%6
+   pm<avvi4i4i2> %A0,%x2,%x3,%4,%5,%6
+   pm<avvi4i4i2> %A0,%x2,%x3,%4,%5,%6"
   [(set_attr "type" "mma")
-   (set_attr "prefixed" "yes")])
-
-(define_insn "mma_<vvi4i4>"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
-	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
-		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
-		    (match_operand:SI 3 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")]
+   (set_attr "prefixed" "yes")
+   (set_attr "isa" "dm,not_dm,not_dm")])
+
+(define_insn "mma_pm<vvi4i4>"
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD")
+	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:SI 3 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")]
 		    MMA_VVI4I4))]
   "TARGET_MMA"
-  "<vvi4i4> %A0,%x1,%x2,%3,%4"
+  "@
+   pmdm<vvi4i4> %A0,%x1,%x2,%3,%4
+   pm<vvi4i4> %A0,%x1,%x2,%3,%4
+   pm<vvi4i4> %A0,%x1,%x2,%3,%4"
   [(set_attr "type" "mma")
-   (set_attr "prefixed" "yes")])
-
-(define_insn "mma_<avvi4i4>"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
-	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
-		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
-		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
-		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 5 "const_0_to_15_operand" "n,n")]
+   (set_attr "prefixed" "yes")
+   (set_attr "isa" "dm,not_dm,not_dm")])
+
+(define_insn "mma_pm<avvi4i4>"
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 5 "const_0_to_15_operand" "n,n,n")]
 		    MMA_AVVI4I4))]
   "TARGET_MMA"
-  "<avvi4i4> %A0,%x2,%x3,%4,%5"
+  "@
+   pmdm<avvi4i4> %A0,%x2,%x3,%4,%5
+   pm<avvi4i4> %A0,%x2,%x3,%4,%5
+   pm<avvi4i4> %A0,%x2,%x3,%4,%5"
   [(set_attr "type" "mma")
-   (set_attr "prefixed" "yes")])
-
-(define_insn "mma_<pvi4i2>"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
-	(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa")
-		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
-		    (match_operand:SI 3 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 4 "const_0_to_3_operand" "n,n")]
+   (set_attr "prefixed" "yes")
+   (set_attr "isa" "dm,not_dm,not_dm")])
+
+(define_insn "mma_pm<pvi4i2>"
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD")
+	(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:SI 3 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 4 "const_0_to_3_operand" "n,n,n")]
 		    MMA_PVI4I2))]
   "TARGET_MMA"
-  "<pvi4i2> %A0,%x1,%x2,%3,%4"
+  "@
+   pmdm<pvi4i2> %A0,%x1,%x2,%3,%4
+   pm<pvi4i2> %A0,%x1,%x2,%3,%4
+   pm<pvi4i2> %A0,%x1,%x2,%3,%4"
   [(set_attr "type" "mma")
-   (set_attr "prefixed" "yes")])
-
-(define_insn "mma_<apvi4i2>"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
-	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
-		    (match_operand:OO 2 "vsx_register_operand" "v,?wa")
-		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
-		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 5 "const_0_to_3_operand" "n,n")]
+   (set_attr "prefixed" "yes")
+   (set_attr "isa" "dm,not_dm,not_dm")])
+
+(define_insn "mma_pm<apvi4i2>"
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0")
+		    (match_operand:OO 2 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 5 "const_0_to_3_operand" "n,n,n")]
 		    MMA_APVI4I2))]
   "TARGET_MMA"
-  "<apvi4i2> %A0,%x2,%x3,%4,%5"
+  "@
+   pmdm<apvi4i2> %A0,%x2,%x3,%4,%5
+   pm<apvi4i2> %A0,%x2,%x3,%4,%5
+   pm<apvi4i2> %A0,%x2,%x3,%4,%5"
   [(set_attr "type" "mma")
-   (set_attr "prefixed" "yes")])
-
-(define_insn "mma_<vvi4i4i4>"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
-	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
-		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
-		    (match_operand:SI 3 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 5 "const_0_to_15_operand" "n,n")]
+   (set_attr "prefixed" "yes")
+   (set_attr "isa" "dm,not_dm,not_dm")])
+
+(define_insn "mma_pm<vvi4i4i4>"
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD")
+	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:SI 3 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 5 "const_0_to_15_operand" "n,n,n")]
 		    MMA_VVI4I4I4))]
   "TARGET_MMA"
-  "<vvi4i4i4> %A0,%x1,%x2,%3,%4,%5"
+  "@
+   pmdm<vvi4i4i4> %A0,%x1,%x2,%3,%4,%5
+   pm<vvi4i4i4> %A0,%x1,%x2,%3,%4,%5
+   pm<vvi4i4i4> %A0,%x1,%x2,%3,%4,%5"
   [(set_attr "type" "mma")
-   (set_attr "prefixed" "yes")])
-
-(define_insn "mma_<avvi4i4i4>"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
-	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
-		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
-		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
-		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 5 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 6 "const_0_to_15_operand" "n,n")]
+   (set_attr "prefixed" "yes")
+   (set_attr "isa" "dm,not_dm,not_dm")])
+
+(define_insn "mma_pm<avvi4i4i4>"
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 5 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 6 "const_0_to_15_operand" "n,n,n")]
 		    MMA_AVVI4I4I4))]
   "TARGET_MMA"
-  "<avvi4i4i4> %A0,%x2,%x3,%4,%5,%6"
+  "@
+   pmdm<avvi4i4i4> %A0,%x2,%x3,%4,%5,%6
+   pm<avvi4i4i4> %A0,%x2,%x3,%4,%5,%6
+   pm<avvi4i4i4> %A0,%x2,%x3,%4,%5,%6"
   [(set_attr "type" "mma")
-   (set_attr "prefixed" "yes")])
+   (set_attr "prefixed" "yes")
+   (set_attr "isa" "dm,not_dm,not_dm")])
diff --git a/gcc/testsuite/gcc.target/powerpc/dm-double-test.c b/gcc/testsuite/gcc.target/powerpc/dm-double-test.c
new file mode 100644
index 00000000000..66c19779585
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/dm-double-test.c
@@ -0,0 +1,194 @@
+/* Test derived from mma-double-1.c, modified for dense math.  */
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_dense_math_ok } */
+/* { dg-options "-mdejagnu-cpu=future -O2" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <altivec.h>
+
+typedef unsigned char vec_t __attribute__ ((vector_size (16)));
+typedef double v4sf_t __attribute__ ((vector_size (16)));
+#define SAVE_ACC(ACC, ldc, J)  \
+	  __builtin_mma_disassemble_acc (result, ACC); \
+	  rowC = (v4sf_t *) &CO[0*ldc+J]; \
+          rowC[0] += result[0]; \
+          rowC = (v4sf_t *) &CO[1*ldc+J]; \
+          rowC[0] += result[1]; \
+          rowC = (v4sf_t *) &CO[2*ldc+J]; \
+          rowC[0] += result[2]; \
+          rowC = (v4sf_t *) &CO[3*ldc+J]; \
+	  rowC[0] += result[3];
+
+void
+DM (int m, int n, int k, double *A, double *B, double *C)
+{
+  __vector_quad acc0, acc1, acc2, acc3, acc4, acc5, acc6, acc7;
+  v4sf_t result[4];
+  v4sf_t *rowC;
+  for (int l = 0; l < n; l += 4)
+    {
+      double *CO;
+      double *AO;
+      AO = A;
+      CO = C;
+      C += m * 4;
+      for (int j = 0; j < m; j += 16)
+	{
+	  double *BO = B;
+	  __builtin_mma_xxsetaccz (&acc0);
+	  __builtin_mma_xxsetaccz (&acc1);
+	  __builtin_mma_xxsetaccz (&acc2);
+	  __builtin_mma_xxsetaccz (&acc3);
+	  __builtin_mma_xxsetaccz (&acc4);
+	  __builtin_mma_xxsetaccz (&acc5);
+	  __builtin_mma_xxsetaccz (&acc6);
+	  __builtin_mma_xxsetaccz (&acc7);
+	  unsigned long i;
+
+	  for (i = 0; i < k; i++)
+	    {
+	      vec_t *rowA = (vec_t *) & AO[i * 16];
+	      __vector_pair rowB;
+	      vec_t *rb = (vec_t *) & BO[i * 4];
+	      __builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
+	      __builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
+	      __builtin_mma_xvf64gerpp (&acc1, rowB, rowA[1]);
+	      __builtin_mma_xvf64gerpp (&acc2, rowB, rowA[2]);
+	      __builtin_mma_xvf64gerpp (&acc3, rowB, rowA[3]);
+	      __builtin_mma_xvf64gerpp (&acc4, rowB, rowA[4]);
+	      __builtin_mma_xvf64gerpp (&acc5, rowB, rowA[5]);
+	      __builtin_mma_xvf64gerpp (&acc6, rowB, rowA[6]);
+	      __builtin_mma_xvf64gerpp (&acc7, rowB, rowA[7]);
+	    }
+	  SAVE_ACC (&acc0, m, 0);
+	  SAVE_ACC (&acc2, m, 4);
+	  SAVE_ACC (&acc1, m, 2);
+	  SAVE_ACC (&acc3, m, 6);
+	  SAVE_ACC (&acc4, m, 8);
+	  SAVE_ACC (&acc6, m, 12);
+	  SAVE_ACC (&acc5, m, 10);
+	  SAVE_ACC (&acc7, m, 14);
+	  AO += k * 16;
+	  BO += k * 4;
+	  CO += 16;
+	}
+      B += k * 4;
+    }
+}
+
+void
+init (double *matrix, int row, int column)
+{
+  for (int j = 0; j < column; j++)
+    {
+      for (int i = 0; i < row; i++)
+	{
+	  matrix[j * row + i] = (i * 16 + 2 + j) / 0.123;
+	}
+    }
+}
+
+void
+init0 (double *matrix, double *matrix1, int row, int column)
+{
+  for (int j = 0; j < column; j++)
+    for (int i = 0; i < row; i++)
+      matrix[j * row + i] = matrix1[j * row + i] = 0;
+}
+
+
+void
+print (const char *name, const double *matrix, int row, int column)
+{
+  printf ("Matrix %s has %d rows and %d columns:\n", name, row, column);
+  for (int i = 0; i < row; i++)
+    {
+      for (int j = 0; j < column; j++)
+	{
+	  printf ("%f ", matrix[j * row + i]);
+	}
+      printf ("\n");
+    }
+  printf ("\n");
+}
+
+int
+main (int argc, char *argv[])
+{
+  int rowsA, colsB, common;
+  int i, j, k;
+  int ret = 0;
+
+  for (int t = 16; t <= 128; t += 16)
+    {
+      for (int t1 = 4; t1 <= 16; t1 += 4)
+	{
+	  rowsA = t;
+	  colsB = t1;
+	  common = 1;
+	  /* printf ("Running test for rows = %d,cols = %d\n", t, t1); */
+	  double A[rowsA * common];
+	  double B[common * colsB];
+	  double C[rowsA * colsB];
+	  double D[rowsA * colsB];
+
+
+	  init (A, rowsA, common);
+	  init (B, common, colsB);
+	  init0 (C, D, rowsA, colsB);
+	  DM (rowsA, colsB, common, A, B, C);
+
+	  for (i = 0; i < colsB; i++)
+	    {
+	      for (j = 0; j < rowsA; j++)
+		{
+		  D[i * rowsA + j] = 0;
+		  for (k = 0; k < common; k++)
+		    {
+		      D[i * rowsA + j] +=
+			A[k * rowsA + j] * B[k + common * i];
+		    }
+		}
+	    }
+	  for (i = 0; i < colsB; i++)
+	    {
+	      for (j = 0; j < rowsA; j++)
+		{
+		  for (k = 0; k < common; k++)
+		    {
+		      if (D[i * rowsA + j] != C[i * rowsA + j])
+			{
+			  printf ("Error %d,%d,%d\n",i,j,k);
+			  ret++;
+			}
+		    }
+		}
+	    }
+	  if (ret)
+	    {
+	      print ("A", A, rowsA, common);
+	      print ("B", B, common, colsB);
+	      print ("C", C, rowsA, colsB);
+	      print ("D", D, rowsA, colsB);
+	    }
+	}
+    }
+  
+#ifdef VERBOSE
+  if (ret)
+    printf ("DM double test fail: %d errors\n",ret);
+  else
+    printf ("DM double test success: 0 DM errors\n");
+#else
+  if (ret)
+    abort();
+#endif
+      
+  return ret;
+}
+
+/* { dg-final { scan-assembler {\mdmsetdmrz\M}      } } */
+/* { dg-final { scan-assembler {\mdmxvf64gerpp\M}   } } */
+/* { dg-final { scan-assembler {\mdmxxextfdmr512\M} } } */
+
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index e23d3ec8b3c..54742a95142 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -7116,6 +7116,29 @@ proc check_effective_target_power11_ok { } {
 		return 0;
 	    }
 	} "-mcpu=power11"]
+    } else {
+	return 0;
+    }
+}
+
+# Return 1 if this is a PowerPC target supporting -mcpu=future which enables
+# the dense math operations.
+proc check_effective_target_powerpc_dense_math_ok { } {
+	return [check_no_compiler_messages_nocache powerpc_dense_math_ok assembly {
+		__vector_quad vq;
+		void test (void)
+		{
+		#ifndef __PPC_DMR__
+		#error "target does not have dense math support."
+		#else
+		/* Make sure we have dense math support.  */
+		  __vector_quad dmr;
+		  __asm__ ("dmsetaccz %A0" : "=wD" (dmr));
+		  vq = dmr;
+		#endif
+		return 0;
+	    }
+	} "-mcpu=power11"]
     } else {
 	return 0
     }

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [gcc(refs/users/meissner/heads/work161-dmf)] PowerPC: Switch to dense math names for all MMA operations.
@ 2024-03-05  5:22 Michael Meissner
  0 siblings, 0 replies; 10+ messages in thread
From: Michael Meissner @ 2024-03-05  5:22 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:917b19af811794338abad89319820e3716eaa898

commit 917b19af811794338abad89319820e3716eaa898
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Tue Mar 5 00:22:17 2024 -0500

    PowerPC: Switch to dense math names for all MMA operations.
    
    This patch changes the assembler instruction names for MMA instructions from
    the original name used in power10 to the new name when used with the dense math
    system.  I.e. xvf64gerpp becomes dmxvf64gerpp.  The assembler will emit the
    same bits for either spelling.
    
    The patches have been tested on both little and big endian systems.  Can I check
    it into the master branch?
    
    For the non-prefixed MMA instructions, we add a 'dm' prefix in front of the
    instruction.  However, the prefixed instructions have a 'pm' prefix, and we add
    the 'dm' prefix afterwards.  To prevent having two sets of parallel int
    attributes, we remove the "pm" prefix from the instruction string in the
    attributes, and add it later, both in the insn name and in the output template.
    
    For example, previously we had
    
      (define_int_attr vvi4i4i8 [(UNSPEC_MMA_PMXVI4GER8 "pmxvi4ger8")])
    
      ;; ...
    
      (define_insn "mma_<vvi4i4i8>"
        [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
              (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa")
                          (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
                          (match_operand:SI 3 "const_0_to_15_operand" "n,n,n")
                          (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
                          (match_operand:SI 5 "u8bit_cint_operand" "n,n,n")]
                          MMA_VVI4I4I8))]
        "TARGET_MMA"
        "<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5"
        [(set_attr "type" "mma")
         (set_attr "prefixed" "yes")
         (set_attr "isa" "dm,not_dm,not_dm")])
    
    And now we have:
    
      (define_int_attr vvi4i4i8 [(UNSPEC_MMA_PMXVI4GER8 "xvi4ger8")])
    
      ;; ...
    
      (define_insn "mma_pm<vvi4i4i8>"
        [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
              (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa")
                          (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
                          (match_operand:SI 3 "const_0_to_15_operand" "n,n,n")
                          (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
                          (match_operand:SI 5 "u8bit_cint_operand" "n,n,n")]
                          MMA_VVI4I4I8))]
        "TARGET_MMA"
        "@
         pmdm<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5
         pm<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5
         pm<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5"
        [(set_attr "type" "mma")
         (set_attr "prefixed" "yes")
         (set_attr "isa" "dm,not_dm,not_dm")])
    
    2024-03-05   Michael Meissner  <meissner@linux.ibm.com>
    
    gcc/
    
            * config/rs6000/mma.md (vvi4i4i8): Change the instruction to not have a
            "pm" prefix.
            (avvi4i4i8): Likewise.
            (vvi4i4i2): Likewise.
            (avvi4i4i2): Likewise.
            (vvi4i4): Likewise.
            (avvi4i4): Likewise.
            (pvi4i2): Likewise.
            (apvi4i2): Likewise.
            (vvi4i4i4): Likewise.
            (avvi4i4i4): Likewise.
            (mma_xxsetaccz): Add support for running on DMF systems, generating the
            dense math instruction and using the dense math accumulators.
            (mma_<vv>): Likewise.
            (mma_<pv>): Likewise.
            (mma_<avv>): Likewise.
            (mma_<apv>): Likewise.
            (mma_pm<vvi4i4i8>): Add support for running on DMF systems, generating
            the dense math instruction and using the dense math accumulators.
            Rename the insn with a 'pm' prefix and add either 'pm' or 'pmdm'
            prefixes based on whether we have the original MMA specification or if
            we have dense math support.
            (mma_pm<avvi4i4i8>): Likewise.
            (mma_pm<vvi4i4i2>): Likewise.
            (mma_pm<avvi4i4i2>): Likewise.
            (mma_pm<vvi4i4>): Likewise.
            (mma_pm<avvi4i4): Likewise.
            (mma_pm<pvi4i2>): Likewise.
            (mma_pm<apvi4i2): Likewise.
            (mma_pm<vvi4i4i4>): Likewise.
            (mma_pm<avvi4i4i4>): Likewise.
    
    gcc/testsuite/
    
            * gcc.target/powerpc/dm-double-test.c: New test.
            * lib/target-supports.exp (check_effective_target_ppc_dmr_ok): New
            target test.

Diff:
---
 gcc/config/rs6000/mma.md                          | 368 +++++++++++++---------
 gcc/testsuite/gcc.target/powerpc/dm-double-test.c | 194 ++++++++++++
 gcc/testsuite/lib/target-supports.exp             |  23 ++
 3 files changed, 432 insertions(+), 153 deletions(-)

diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index 8799f4137fa..1a93c60418f 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -224,44 +224,48 @@
 				 (UNSPEC_MMA_XVF64GERNP		"xvf64gernp")
 				 (UNSPEC_MMA_XVF64GERNN		"xvf64gernn")])
 
-(define_int_attr vvi4i4i8	[(UNSPEC_MMA_PMXVI4GER8		"pmxvi4ger8")])
+;; Do not include the "pm" prefix in these instructions.  If we have MMA but we
+;; don't have dense math register support we want to issue the instruction with
+;; a "pm" prefix, but if we have dense math registers, we want to issue it with
+;; a "pmdm" prefix.  I.e. pmxvi4ger8 vs. pmdmxvi4ger8
+(define_int_attr vvi4i4i8	[(UNSPEC_MMA_PMXVI4GER8		"xvi4ger8")])
 
-(define_int_attr avvi4i4i8	[(UNSPEC_MMA_PMXVI4GER8PP	"pmxvi4ger8pp")])
+(define_int_attr avvi4i4i8	[(UNSPEC_MMA_PMXVI4GER8PP	"xvi4ger8pp")])
 
-(define_int_attr vvi4i4i2	[(UNSPEC_MMA_PMXVI16GER2	"pmxvi16ger2")
-				 (UNSPEC_MMA_PMXVI16GER2S	"pmxvi16ger2s")
-				 (UNSPEC_MMA_PMXVF16GER2	"pmxvf16ger2")
-				 (UNSPEC_MMA_PMXVBF16GER2	"pmxvbf16ger2")])
+(define_int_attr vvi4i4i2	[(UNSPEC_MMA_PMXVI16GER2	"xvi16ger2")
+				 (UNSPEC_MMA_PMXVI16GER2S	"xvi16ger2s")
+				 (UNSPEC_MMA_PMXVF16GER2	"xvf16ger2")
+				 (UNSPEC_MMA_PMXVBF16GER2	"xvbf16ger2")])
 
-(define_int_attr avvi4i4i2	[(UNSPEC_MMA_PMXVI16GER2PP	"pmxvi16ger2pp")
-				 (UNSPEC_MMA_PMXVI16GER2SPP	"pmxvi16ger2spp")
-				 (UNSPEC_MMA_PMXVF16GER2PP	"pmxvf16ger2pp")
-				 (UNSPEC_MMA_PMXVF16GER2PN	"pmxvf16ger2pn")
-				 (UNSPEC_MMA_PMXVF16GER2NP	"pmxvf16ger2np")
-				 (UNSPEC_MMA_PMXVF16GER2NN	"pmxvf16ger2nn")
-				 (UNSPEC_MMA_PMXVBF16GER2PP	"pmxvbf16ger2pp")
-				 (UNSPEC_MMA_PMXVBF16GER2PN	"pmxvbf16ger2pn")
-				 (UNSPEC_MMA_PMXVBF16GER2NP	"pmxvbf16ger2np")
-				 (UNSPEC_MMA_PMXVBF16GER2NN	"pmxvbf16ger2nn")])
+(define_int_attr avvi4i4i2	[(UNSPEC_MMA_PMXVI16GER2PP	"xvi16ger2pp")
+				 (UNSPEC_MMA_PMXVI16GER2SPP	"xvi16ger2spp")
+				 (UNSPEC_MMA_PMXVF16GER2PP	"xvf16ger2pp")
+				 (UNSPEC_MMA_PMXVF16GER2PN	"xvf16ger2pn")
+				 (UNSPEC_MMA_PMXVF16GER2NP	"xvf16ger2np")
+				 (UNSPEC_MMA_PMXVF16GER2NN	"xvf16ger2nn")
+				 (UNSPEC_MMA_PMXVBF16GER2PP	"xvbf16ger2pp")
+				 (UNSPEC_MMA_PMXVBF16GER2PN	"xvbf16ger2pn")
+				 (UNSPEC_MMA_PMXVBF16GER2NP	"xvbf16ger2np")
+				 (UNSPEC_MMA_PMXVBF16GER2NN	"xvbf16ger2nn")])
 
-(define_int_attr vvi4i4		[(UNSPEC_MMA_PMXVF32GER		"pmxvf32ger")])
+(define_int_attr vvi4i4		[(UNSPEC_MMA_PMXVF32GER		"xvf32ger")])
 
-(define_int_attr avvi4i4	[(UNSPEC_MMA_PMXVF32GERPP	"pmxvf32gerpp")
-				 (UNSPEC_MMA_PMXVF32GERPN	"pmxvf32gerpn")
-				 (UNSPEC_MMA_PMXVF32GERNP	"pmxvf32gernp")
-				 (UNSPEC_MMA_PMXVF32GERNN	"pmxvf32gernn")])
+(define_int_attr avvi4i4	[(UNSPEC_MMA_PMXVF32GERPP	"xvf32gerpp")
+				 (UNSPEC_MMA_PMXVF32GERPN	"xvf32gerpn")
+				 (UNSPEC_MMA_PMXVF32GERNP	"xvf32gernp")
+				 (UNSPEC_MMA_PMXVF32GERNN	"xvf32gernn")])
 
-(define_int_attr pvi4i2		[(UNSPEC_MMA_PMXVF64GER		"pmxvf64ger")])
+(define_int_attr pvi4i2		[(UNSPEC_MMA_PMXVF64GER		"xvf64ger")])
 
-(define_int_attr apvi4i2	[(UNSPEC_MMA_PMXVF64GERPP	"pmxvf64gerpp")
-				 (UNSPEC_MMA_PMXVF64GERPN	"pmxvf64gerpn")
-				 (UNSPEC_MMA_PMXVF64GERNP	"pmxvf64gernp")
-				 (UNSPEC_MMA_PMXVF64GERNN	"pmxvf64gernn")])
+(define_int_attr apvi4i2	[(UNSPEC_MMA_PMXVF64GERPP	"xvf64gerpp")
+				 (UNSPEC_MMA_PMXVF64GERPN	"xvf64gerpn")
+				 (UNSPEC_MMA_PMXVF64GERNP	"xvf64gernp")
+				 (UNSPEC_MMA_PMXVF64GERNN	"xvf64gernn")])
 
-(define_int_attr vvi4i4i4	[(UNSPEC_MMA_PMXVI8GER4		"pmxvi8ger4")])
+(define_int_attr vvi4i4i4	[(UNSPEC_MMA_PMXVI8GER4		"xvi8ger4")])
 
-(define_int_attr avvi4i4i4	[(UNSPEC_MMA_PMXVI8GER4PP	"pmxvi8ger4pp")
-				 (UNSPEC_MMA_PMXVI8GER4SPP	"pmxvi8ger4spp")])
+(define_int_attr avvi4i4i4	[(UNSPEC_MMA_PMXVI8GER4PP	"xvi8ger4pp")
+				 (UNSPEC_MMA_PMXVI8GER4SPP	"xvi8ger4spp")])
 
 
 ;; Vector pair support.  OOmode can only live in VSRs.
@@ -542,178 +546,236 @@
 ;; UNSPEC_VOLATILE for the non-dense math case.
 
 (define_insn "mma_xxsetaccz"
-  [(set (match_operand:XO 0 "accumulator_operand" "=wD")
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD,wD")
 	(unspec_volatile:XO [(const_int 0)]
 			    UNSPECV_MMA_XXSETACCZ))]
   "TARGET_MMA"
-  "xxsetaccz %A0"
-  [(set_attr "type" "mma")])
+  "@
+   dmsetdmrz %A0
+   xxsetaccz %A0"
+  [(set_attr "type" "mma")
+   (set_attr "isa" "dm,not_dm")])
 
 (define_insn "mma_<vv>"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
-	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
-		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD")
+	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")]
 		    MMA_VV))]
   "TARGET_MMA"
-  "<vv> %A0,%x1,%x2"
-  [(set_attr "type" "mma")])
+  "@
+   dm<vv> %A0,%x1,%x2
+   <vv> %A0,%x1,%x2
+   <vv> %A0,%x1,%x2"
+  [(set_attr "type" "mma")
+   (set_attr "isa" "dm,not_dm,not_dm")])
 
 (define_insn "mma_<avv>"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
-	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
-		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
-		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa")]
 		    MMA_AVV))]
   "TARGET_MMA"
-  "<avv> %A0,%x2,%x3"
-  [(set_attr "type" "mma")])
+  "@
+   dm<avv> %A0,%x2,%x3
+   <avv> %A0,%x2,%x3
+   <avv> %A0,%x2,%x3"
+  [(set_attr "type" "mma")
+   (set_attr "isa" "dm,not_dm,not_dm")])
 
 (define_insn "mma_<pv>"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
-	(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa")
-		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD")
+	(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")]
 		    MMA_PV))]
   "TARGET_MMA"
-  "<pv> %A0,%x1,%x2"
-  [(set_attr "type" "mma")])
+  "@
+   dm<pv> %A0,%x1,%x2
+   <pv> %A0,%x1,%x2
+   <pv> %A0,%x1,%x2"
+  [(set_attr "type" "mma")
+   (set_attr "isa" "dm,not_dm,not_dm")])
 
 (define_insn "mma_<apv>"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
-	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
-		    (match_operand:OO 2 "vsx_register_operand" "v,?wa")
-		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0")
+		    (match_operand:OO 2 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa")]
 		    MMA_APV))]
   "TARGET_MMA"
-  "<apv> %A0,%x2,%x3"
-  [(set_attr "type" "mma")])
-
-(define_insn "mma_<vvi4i4i8>"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
-	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
-		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
-		    (match_operand:SI 3 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 5 "u8bit_cint_operand" "n,n")]
+  "@
+   dm<apv> %A0,%x2,%x3
+   <apv> %A0,%x2,%x3
+   <apv> %A0,%x2,%x3"
+  [(set_attr "type" "mma")
+   (set_attr "isa" "dm,not_dm,not_dm")])
+
+(define_insn "mma_pm<vvi4i4i8>"
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD")
+	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:SI 3 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 5 "u8bit_cint_operand" "n,n,n")]
 		    MMA_VVI4I4I8))]
   "TARGET_MMA"
-  "<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5"
+  "@
+   pmdm<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5
+   pm<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5
+   pm<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5"
   [(set_attr "type" "mma")
-   (set_attr "prefixed" "yes")])
-
-(define_insn "mma_<avvi4i4i8>"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
-	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
-		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
-		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
-		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 5 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 6 "u8bit_cint_operand" "n,n")]
+   (set_attr "prefixed" "yes")
+   (set_attr "isa" "dm,not_dm,not_dm")])
+
+(define_insn "mma_pm<avvi4i4i8>"
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 5 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 6 "u8bit_cint_operand" "n,n,n")]
 		    MMA_AVVI4I4I8))]
   "TARGET_MMA"
-  "<avvi4i4i8> %A0,%x2,%x3,%4,%5,%6"
+  "@
+   pmdm<avvi4i4i8> %A0,%x2,%x3,%4,%5,%6
+   pm<avvi4i4i8> %A0,%x2,%x3,%4,%5,%6
+   pm<avvi4i4i8> %A0,%x2,%x3,%4,%5,%6"
   [(set_attr "type" "mma")
-   (set_attr "prefixed" "yes")])
-
-(define_insn "mma_<vvi4i4i2>"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
-	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
-		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
-		    (match_operand:SI 3 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 5 "const_0_to_3_operand" "n,n")]
+   (set_attr "prefixed" "yes")
+   (set_attr "isa" "dm,not_dm,not_dm")])
+
+(define_insn "mma_pm<vvi4i4i2>"
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD")
+	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:SI 3 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 5 "const_0_to_3_operand" "n,n,n")]
 		    MMA_VVI4I4I2))]
   "TARGET_MMA"
-  "<vvi4i4i2> %A0,%x1,%x2,%3,%4,%5"
+  "@
+   pmdm<vvi4i4i2> %A0,%x1,%x2,%3,%4,%5
+   pm<vvi4i4i2> %A0,%x1,%x2,%3,%4,%5
+   pm<vvi4i4i2> %A0,%x1,%x2,%3,%4,%5"
   [(set_attr "type" "mma")
    (set_attr "prefixed" "yes")])
 
-(define_insn "mma_<avvi4i4i2>"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
-	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
-		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
-		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
-		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 5 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 6 "const_0_to_3_operand" "n,n")]
+(define_insn "mma_pm<avvi4i4i2>"
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 5 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 6 "const_0_to_3_operand" "n,n,n")]
 		    MMA_AVVI4I4I2))]
   "TARGET_MMA"
-  "<avvi4i4i2> %A0,%x2,%x3,%4,%5,%6"
+  "
+   pmdm<avvi4i4i2> %A0,%x2,%x3,%4,%5,%6
+   pm<avvi4i4i2> %A0,%x2,%x3,%4,%5,%6
+   pm<avvi4i4i2> %A0,%x2,%x3,%4,%5,%6"
   [(set_attr "type" "mma")
-   (set_attr "prefixed" "yes")])
-
-(define_insn "mma_<vvi4i4>"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
-	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
-		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
-		    (match_operand:SI 3 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")]
+   (set_attr "prefixed" "yes")
+   (set_attr "isa" "dm,not_dm,not_dm")])
+
+(define_insn "mma_pm<vvi4i4>"
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD")
+	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:SI 3 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")]
 		    MMA_VVI4I4))]
   "TARGET_MMA"
-  "<vvi4i4> %A0,%x1,%x2,%3,%4"
+  "@
+   pmdm<vvi4i4> %A0,%x1,%x2,%3,%4
+   pm<vvi4i4> %A0,%x1,%x2,%3,%4
+   pm<vvi4i4> %A0,%x1,%x2,%3,%4"
   [(set_attr "type" "mma")
-   (set_attr "prefixed" "yes")])
-
-(define_insn "mma_<avvi4i4>"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
-	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
-		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
-		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
-		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 5 "const_0_to_15_operand" "n,n")]
+   (set_attr "prefixed" "yes")
+   (set_attr "isa" "dm,not_dm,not_dm")])
+
+(define_insn "mma_pm<avvi4i4>"
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 5 "const_0_to_15_operand" "n,n,n")]
 		    MMA_AVVI4I4))]
   "TARGET_MMA"
-  "<avvi4i4> %A0,%x2,%x3,%4,%5"
+  "@
+   pmdm<avvi4i4> %A0,%x2,%x3,%4,%5
+   pm<avvi4i4> %A0,%x2,%x3,%4,%5
+   pm<avvi4i4> %A0,%x2,%x3,%4,%5"
   [(set_attr "type" "mma")
-   (set_attr "prefixed" "yes")])
-
-(define_insn "mma_<pvi4i2>"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
-	(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa")
-		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
-		    (match_operand:SI 3 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 4 "const_0_to_3_operand" "n,n")]
+   (set_attr "prefixed" "yes")
+   (set_attr "isa" "dm,not_dm,not_dm")])
+
+(define_insn "mma_pm<pvi4i2>"
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD")
+	(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:SI 3 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 4 "const_0_to_3_operand" "n,n,n")]
 		    MMA_PVI4I2))]
   "TARGET_MMA"
-  "<pvi4i2> %A0,%x1,%x2,%3,%4"
+  "@
+   pmdm<pvi4i2> %A0,%x1,%x2,%3,%4
+   pm<pvi4i2> %A0,%x1,%x2,%3,%4
+   pm<pvi4i2> %A0,%x1,%x2,%3,%4"
   [(set_attr "type" "mma")
-   (set_attr "prefixed" "yes")])
-
-(define_insn "mma_<apvi4i2>"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
-	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
-		    (match_operand:OO 2 "vsx_register_operand" "v,?wa")
-		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
-		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 5 "const_0_to_3_operand" "n,n")]
+   (set_attr "prefixed" "yes")
+   (set_attr "isa" "dm,not_dm,not_dm")])
+
+(define_insn "mma_pm<apvi4i2>"
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0")
+		    (match_operand:OO 2 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 5 "const_0_to_3_operand" "n,n,n")]
 		    MMA_APVI4I2))]
   "TARGET_MMA"
-  "<apvi4i2> %A0,%x2,%x3,%4,%5"
+  "@
+   pmdm<apvi4i2> %A0,%x2,%x3,%4,%5
+   pm<apvi4i2> %A0,%x2,%x3,%4,%5
+   pm<apvi4i2> %A0,%x2,%x3,%4,%5"
   [(set_attr "type" "mma")
-   (set_attr "prefixed" "yes")])
-
-(define_insn "mma_<vvi4i4i4>"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
-	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
-		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
-		    (match_operand:SI 3 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 5 "const_0_to_15_operand" "n,n")]
+   (set_attr "prefixed" "yes")
+   (set_attr "isa" "dm,not_dm,not_dm")])
+
+(define_insn "mma_pm<vvi4i4i4>"
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD")
+	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:SI 3 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 5 "const_0_to_15_operand" "n,n,n")]
 		    MMA_VVI4I4I4))]
   "TARGET_MMA"
-  "<vvi4i4i4> %A0,%x1,%x2,%3,%4,%5"
+  "@
+   pmdm<vvi4i4i4> %A0,%x1,%x2,%3,%4,%5
+   pm<vvi4i4i4> %A0,%x1,%x2,%3,%4,%5
+   pm<vvi4i4i4> %A0,%x1,%x2,%3,%4,%5"
   [(set_attr "type" "mma")
-   (set_attr "prefixed" "yes")])
-
-(define_insn "mma_<avvi4i4i4>"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
-	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
-		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
-		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
-		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 5 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 6 "const_0_to_15_operand" "n,n")]
+   (set_attr "prefixed" "yes")
+   (set_attr "isa" "dm,not_dm,not_dm")])
+
+(define_insn "mma_pm<avvi4i4i4>"
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 5 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 6 "const_0_to_15_operand" "n,n,n")]
 		    MMA_AVVI4I4I4))]
   "TARGET_MMA"
-  "<avvi4i4i4> %A0,%x2,%x3,%4,%5,%6"
+  "@
+   pmdm<avvi4i4i4> %A0,%x2,%x3,%4,%5,%6
+   pm<avvi4i4i4> %A0,%x2,%x3,%4,%5,%6
+   pm<avvi4i4i4> %A0,%x2,%x3,%4,%5,%6"
   [(set_attr "type" "mma")
-   (set_attr "prefixed" "yes")])
+   (set_attr "prefixed" "yes")
+   (set_attr "isa" "dm,not_dm,not_dm")])
diff --git a/gcc/testsuite/gcc.target/powerpc/dm-double-test.c b/gcc/testsuite/gcc.target/powerpc/dm-double-test.c
new file mode 100644
index 00000000000..66c19779585
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/dm-double-test.c
@@ -0,0 +1,194 @@
+/* Test derived from mma-double-1.c, modified for dense math.  */
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_dense_math_ok } */
+/* { dg-options "-mdejagnu-cpu=future -O2" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <altivec.h>
+
+typedef unsigned char vec_t __attribute__ ((vector_size (16)));
+typedef double v4sf_t __attribute__ ((vector_size (16)));
+#define SAVE_ACC(ACC, ldc, J)  \
+	  __builtin_mma_disassemble_acc (result, ACC); \
+	  rowC = (v4sf_t *) &CO[0*ldc+J]; \
+          rowC[0] += result[0]; \
+          rowC = (v4sf_t *) &CO[1*ldc+J]; \
+          rowC[0] += result[1]; \
+          rowC = (v4sf_t *) &CO[2*ldc+J]; \
+          rowC[0] += result[2]; \
+          rowC = (v4sf_t *) &CO[3*ldc+J]; \
+	  rowC[0] += result[3];
+
+void
+DM (int m, int n, int k, double *A, double *B, double *C)
+{
+  __vector_quad acc0, acc1, acc2, acc3, acc4, acc5, acc6, acc7;
+  v4sf_t result[4];
+  v4sf_t *rowC;
+  for (int l = 0; l < n; l += 4)
+    {
+      double *CO;
+      double *AO;
+      AO = A;
+      CO = C;
+      C += m * 4;
+      for (int j = 0; j < m; j += 16)
+	{
+	  double *BO = B;
+	  __builtin_mma_xxsetaccz (&acc0);
+	  __builtin_mma_xxsetaccz (&acc1);
+	  __builtin_mma_xxsetaccz (&acc2);
+	  __builtin_mma_xxsetaccz (&acc3);
+	  __builtin_mma_xxsetaccz (&acc4);
+	  __builtin_mma_xxsetaccz (&acc5);
+	  __builtin_mma_xxsetaccz (&acc6);
+	  __builtin_mma_xxsetaccz (&acc7);
+	  unsigned long i;
+
+	  for (i = 0; i < k; i++)
+	    {
+	      vec_t *rowA = (vec_t *) & AO[i * 16];
+	      __vector_pair rowB;
+	      vec_t *rb = (vec_t *) & BO[i * 4];
+	      __builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
+	      __builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
+	      __builtin_mma_xvf64gerpp (&acc1, rowB, rowA[1]);
+	      __builtin_mma_xvf64gerpp (&acc2, rowB, rowA[2]);
+	      __builtin_mma_xvf64gerpp (&acc3, rowB, rowA[3]);
+	      __builtin_mma_xvf64gerpp (&acc4, rowB, rowA[4]);
+	      __builtin_mma_xvf64gerpp (&acc5, rowB, rowA[5]);
+	      __builtin_mma_xvf64gerpp (&acc6, rowB, rowA[6]);
+	      __builtin_mma_xvf64gerpp (&acc7, rowB, rowA[7]);
+	    }
+	  SAVE_ACC (&acc0, m, 0);
+	  SAVE_ACC (&acc2, m, 4);
+	  SAVE_ACC (&acc1, m, 2);
+	  SAVE_ACC (&acc3, m, 6);
+	  SAVE_ACC (&acc4, m, 8);
+	  SAVE_ACC (&acc6, m, 12);
+	  SAVE_ACC (&acc5, m, 10);
+	  SAVE_ACC (&acc7, m, 14);
+	  AO += k * 16;
+	  BO += k * 4;
+	  CO += 16;
+	}
+      B += k * 4;
+    }
+}
+
+void
+init (double *matrix, int row, int column)
+{
+  for (int j = 0; j < column; j++)
+    {
+      for (int i = 0; i < row; i++)
+	{
+	  matrix[j * row + i] = (i * 16 + 2 + j) / 0.123;
+	}
+    }
+}
+
+void
+init0 (double *matrix, double *matrix1, int row, int column)
+{
+  for (int j = 0; j < column; j++)
+    for (int i = 0; i < row; i++)
+      matrix[j * row + i] = matrix1[j * row + i] = 0;
+}
+
+
+void
+print (const char *name, const double *matrix, int row, int column)
+{
+  printf ("Matrix %s has %d rows and %d columns:\n", name, row, column);
+  for (int i = 0; i < row; i++)
+    {
+      for (int j = 0; j < column; j++)
+	{
+	  printf ("%f ", matrix[j * row + i]);
+	}
+      printf ("\n");
+    }
+  printf ("\n");
+}
+
+int
+main (int argc, char *argv[])
+{
+  int rowsA, colsB, common;
+  int i, j, k;
+  int ret = 0;
+
+  for (int t = 16; t <= 128; t += 16)
+    {
+      for (int t1 = 4; t1 <= 16; t1 += 4)
+	{
+	  rowsA = t;
+	  colsB = t1;
+	  common = 1;
+	  /* printf ("Running test for rows = %d,cols = %d\n", t, t1); */
+	  double A[rowsA * common];
+	  double B[common * colsB];
+	  double C[rowsA * colsB];
+	  double D[rowsA * colsB];
+
+
+	  init (A, rowsA, common);
+	  init (B, common, colsB);
+	  init0 (C, D, rowsA, colsB);
+	  DM (rowsA, colsB, common, A, B, C);
+
+	  for (i = 0; i < colsB; i++)
+	    {
+	      for (j = 0; j < rowsA; j++)
+		{
+		  D[i * rowsA + j] = 0;
+		  for (k = 0; k < common; k++)
+		    {
+		      D[i * rowsA + j] +=
+			A[k * rowsA + j] * B[k + common * i];
+		    }
+		}
+	    }
+	  for (i = 0; i < colsB; i++)
+	    {
+	      for (j = 0; j < rowsA; j++)
+		{
+		  for (k = 0; k < common; k++)
+		    {
+		      if (D[i * rowsA + j] != C[i * rowsA + j])
+			{
+			  printf ("Error %d,%d,%d\n",i,j,k);
+			  ret++;
+			}
+		    }
+		}
+	    }
+	  if (ret)
+	    {
+	      print ("A", A, rowsA, common);
+	      print ("B", B, common, colsB);
+	      print ("C", C, rowsA, colsB);
+	      print ("D", D, rowsA, colsB);
+	    }
+	}
+    }
+  
+#ifdef VERBOSE
+  if (ret)
+    printf ("DM double test fail: %d errors\n",ret);
+  else
+    printf ("DM double test success: 0 DM errors\n");
+#else
+  if (ret)
+    abort();
+#endif
+      
+  return ret;
+}
+
+/* { dg-final { scan-assembler {\mdmsetdmrz\M}      } } */
+/* { dg-final { scan-assembler {\mdmxvf64gerpp\M}   } } */
+/* { dg-final { scan-assembler {\mdmxxextfdmr512\M} } } */
+
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index e23d3ec8b3c..54742a95142 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -7116,6 +7116,29 @@ proc check_effective_target_power11_ok { } {
 		return 0;
 	    }
 	} "-mcpu=power11"]
+    } else {
+	return 0;
+    }
+}
+
+# Return 1 if this is a PowerPC target supporting -mcpu=future which enables
+# the dense math operations.
+proc check_effective_target_powerpc_dense_math_ok { } {
+	return [check_no_compiler_messages_nocache powerpc_dense_math_ok assembly {
+		__vector_quad vq;
+		void test (void)
+		{
+		#ifndef __PPC_DMR__
+		#error "target does not have dense math support."
+		#else
+		/* Make sure we have dense math support.  */
+		  __vector_quad dmr;
+		  __asm__ ("dmsetaccz %A0" : "=wD" (dmr));
+		  vq = dmr;
+		#endif
+		return 0;
+	    }
+	} "-mcpu=power11"]
     } else {
 	return 0
     }

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [gcc(refs/users/meissner/heads/work161-dmf)] PowerPC: Switch to dense math names for all MMA operations.
@ 2024-03-05  4:49 Michael Meissner
  0 siblings, 0 replies; 10+ messages in thread
From: Michael Meissner @ 2024-03-05  4:49 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:04ca59c2cfd673f7cfd1a3aa55a316d4aeda9ae9

commit 04ca59c2cfd673f7cfd1a3aa55a316d4aeda9ae9
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Mon Mar 4 23:49:20 2024 -0500

    PowerPC: Switch to dense math names for all MMA operations.
    
    This patch changes the assembler instruction names for MMA instructions from
    the original name used in power10 to the new name when used with the dense math
    system.  I.e. xvf64gerpp becomes dmxvf64gerpp.  The assembler will emit the
    same bits for either spelling.
    
    The patches have been tested on both little and big endian systems.  Can I check
    it into the master branch?
    
    For the non-prefixed MMA instructions, we add a 'dm' prefix in front of the
    instruction.  However, the prefixed instructions have a 'pm' prefix, and we add
    the 'dm' prefix afterwards.  To prevent having two sets of parallel int
    attributes, we remove the "pm" prefix from the instruction string in the
    attributes, and add it later, both in the insn name and in the output template.
    
    For example, previously we had
    
      (define_int_attr vvi4i4i8 [(UNSPEC_MMA_PMXVI4GER8 "pmxvi4ger8")])
    
      ;; ...
    
      (define_insn "mma_<vvi4i4i8>"
        [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
              (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa")
                          (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
                          (match_operand:SI 3 "const_0_to_15_operand" "n,n,n")
                          (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
                          (match_operand:SI 5 "u8bit_cint_operand" "n,n,n")]
                          MMA_VVI4I4I8))]
        "TARGET_MMA"
        "<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5"
        [(set_attr "type" "mma")
         (set_attr "prefixed" "yes")
         (set_attr "isa" "dm,not_dm,not_dm")])
    
    And now we have:
    
      (define_int_attr vvi4i4i8 [(UNSPEC_MMA_PMXVI4GER8 "xvi4ger8")])
    
      ;; ...
    
      (define_insn "mma_pm<vvi4i4i8>"
        [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
              (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa")
                          (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
                          (match_operand:SI 3 "const_0_to_15_operand" "n,n,n")
                          (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
                          (match_operand:SI 5 "u8bit_cint_operand" "n,n,n")]
                          MMA_VVI4I4I8))]
        "TARGET_MMA"
        "@
         pmdm<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5
         pm<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5
         pm<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5"
        [(set_attr "type" "mma")
         (set_attr "prefixed" "yes")
         (set_attr "isa" "dm,not_dm,not_dm")])
    
    2024-03-04   Michael Meissner  <meissner@linux.ibm.com>
    
    gcc/
    
            * config/rs6000/mma.md (vvi4i4i8): Change the instruction to not have a
            "pm" prefix.
            (avvi4i4i8): Likewise.
            (vvi4i4i2): Likewise.
            (avvi4i4i2): Likewise.
            (vvi4i4): Likewise.
            (avvi4i4): Likewise.
            (pvi4i2): Likewise.
            (apvi4i2): Likewise.
            (vvi4i4i4): Likewise.
            (avvi4i4i4): Likewise.
            (mma_<vv>): Add support for running on DMF systems, generating the dense
            math instruction and using the dense math accumulators.
            (mma_<pv>): Likewise.
            (mma_<avv>): Likewise.
            (mma_<apv>): Likewise.
            (mma_pm<vvi4i4i8>): Add support for running on DMF systems, generating
            the dense math instruction and using the dense math accumulators.
            Rename the insn with a 'pm' prefix and add either 'pm' or 'pmdm'
            prefixes based on whether we have the original MMA specification or if
            we have dense math support.
            (mma_pm<avvi4i4i8>): Likewise.
            (mma_pm<vvi4i4i2>): Likewise.
            (mma_pm<avvi4i4i2>): Likewise.
            (mma_pm<vvi4i4>): Likewise.
            (mma_pm<avvi4i4): Likewise.
            (mma_pm<pvi4i2>): Likewise.
            (mma_pm<apvi4i2): Likewise.
            (mma_pm<vvi4i4i4>): Likewise.
            (mma_pm<avvi4i4i4>): Likewise.
    
    gcc/testsuite/
    
            * gcc.target/powerpc/dm-double-test.c: New test.
            * lib/target-supports.exp (check_effective_target_ppc_dmr_ok): New
            target test.

Diff:
---
 gcc/config/rs6000/mma.md                      | 155 ++++++++++++++++++++++++++
 gcc/config/rs6000/rs6000-builtin.cc           |  17 +++
 gcc/config/rs6000/rs6000-call.cc              |  10 +-
 gcc/config/rs6000/rs6000-modes.def            |   4 +
 gcc/config/rs6000/rs6000.cc                   | 123 +++++++++++++++-----
 gcc/config/rs6000/rs6000.h                    |   6 +-
 gcc/testsuite/gcc.target/powerpc/dm-1024bit.c |  63 +++++++++++
 7 files changed, 343 insertions(+), 35 deletions(-)

diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index 1a93c60418f..f13a7a2f877 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -91,6 +91,11 @@
    UNSPEC_MMA_XVI8GER4SPP
    UNSPEC_MMA_XXMFACC
    UNSPEC_MMA_XXMTACC
+   UNSPEC_DM_INSERT512_UPPER
+   UNSPEC_DM_INSERT512_LOWER
+   UNSPEC_DM_EXTRACT512
+   UNSPEC_DMR_RELOAD_FROM_MEMORY
+   UNSPEC_DMR_RELOAD_TO_MEMORY
   ])
 
 (define_c_enum "unspecv"
@@ -779,3 +784,153 @@
   [(set_attr "type" "mma")
    (set_attr "prefixed" "yes")
    (set_attr "isa" "dm,not_dm,not_dm")])
+
+\f
+;; TDOmode (i.e. __dmr).
+(define_expand "movtdo"
+  [(set (match_operand:TDO 0 "nonimmediate_operand")
+	(match_operand:TDO 1 "input_operand"))]
+  "TARGET_DENSE_MATH"
+{
+  rs6000_emit_move (operands[0], operands[1], TDOmode);
+  DONE;
+})
+
+(define_insn_and_split "*movtdo"
+  [(set (match_operand:TDO 0 "nonimmediate_operand" "=wa,m,wa,wD,wD,wa")
+	(match_operand:TDO 1 "input_operand" "m,wa,wa,wa,wD,wD"))]
+  "TARGET_DENSE_MATH
+   && (gpc_reg_operand (operands[0], TDOmode)
+       || gpc_reg_operand (operands[1], TDOmode))"
+  "@
+   #
+   #
+   #
+   #
+   dmmr %0,%1
+   #"
+  "&& reload_completed
+   && (!dmr_operand (operands[0], TDOmode) || !dmr_operand (operands[1], TDOmode))"
+  [(const_int 0)]
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+
+  if (REG_P (op0) && REG_P (op1))
+    {
+      int regno0 = REGNO (op0);
+      int regno1 = REGNO (op1);
+
+      if (DMR_REGNO_P (regno0) && VSX_REGNO_P (regno1))
+	{
+	  rtx op1_upper = gen_rtx_REG (XOmode, regno1);
+	  rtx op1_lower = gen_rtx_REG (XOmode, regno1 + 4);
+	  emit_insn (gen_movtdo_insert512_upper (op0, op1_upper));
+	  emit_insn (gen_movtdo_insert512_lower (op0, op0, op1_lower));
+	  DONE;
+	}
+
+      else if (VSX_REGNO_P (regno0) && DMR_REGNO_P (regno1))
+	{
+	  rtx op0_upper = gen_rtx_REG (XOmode, regno0);
+	  rtx op0_lower = gen_rtx_REG (XOmode, regno0 + 4);
+	  emit_insn (gen_movtdo_extract512 (op0_upper, op1, const0_rtx));
+	  emit_insn (gen_movtdo_extract512 (op0_lower, op1, const1_rtx));
+	  DONE;
+	}
+
+     else
+	gcc_assert (VSX_REGNO_P (regno0) && VSX_REGNO_P (regno1));
+    }
+
+  rs6000_split_multireg_move (operands[0], operands[1]);
+  DONE;
+}
+  [(set_attr "type" "vecload,vecstore,vecmove,vecmove,vecmove,vecmove")
+   (set_attr "length" "*,*,32,8,*,8")
+   (set_attr "max_prefixed_insns" "4,4,*,*,*,*")])
+
+;; Move from VSX registers to DMR registers via two insert 512 bit
+;; instructions.
+(define_insn "movtdo_insert512_upper"
+  [(set (match_operand:TDO 0 "dmr_operand" "=wD")
+	(unspec:TDO [(match_operand:XO 1 "vsx_register_operand" "wa")]
+		    UNSPEC_DM_INSERT512_UPPER))]
+  "TARGET_DENSE_MATH"
+  "dmxxinstdmr512 %0,%1,%Y1,0"
+  [(set_attr "type" "mma")])
+
+(define_insn "movtdo_insert512_lower"
+  [(set (match_operand:TDO 0 "dmr_operand" "=wD")
+	(unspec:TDO [(match_operand:TDO 1 "dmr_operand" "0")
+		     (match_operand:XO 2 "vsx_register_operand" "wa")]
+		    UNSPEC_DM_INSERT512_LOWER))]
+  "TARGET_DENSE_MATH"
+  "dmxxinstdmr512 %0,%2,%Y2,1"
+  [(set_attr "type" "mma")])
+
+;; Move from DMR registers to VSX registers via two extract 512 bit
+;; instructions.
+(define_insn "movtdo_extract512"
+  [(set (match_operand:XO 0 "vsx_register_operand" "=wa")
+	(unspec:XO [(match_operand:TDO 1 "dmr_operand" "wD")
+		    (match_operand 2 "const_0_to_1_operand" "n")]
+		   UNSPEC_DM_EXTRACT512))]
+  "TARGET_DENSE_MATH"
+  "dmxxextfdmr512 %0,%Y0,%1,%2"
+  [(set_attr "type" "mma")])
+
+;; Reload DMR registers from memory
+(define_insn_and_split "reload_dmr_from_memory"
+  [(set (match_operand:TDO 0 "dmr_operand" "=wD")
+	(unspec:TDO [(match_operand:TDO 1 "memory_operand" "m")]
+		    UNSPEC_DMR_RELOAD_FROM_MEMORY))
+   (clobber (match_operand:XO 2 "vsx_register_operand" "=wa"))]
+  "TARGET_DENSE_MATH"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx dest = operands[0];
+  rtx src = operands[1];
+  rtx tmp = operands[2];
+  rtx mem_upper = adjust_address (src, XOmode, BYTES_BIG_ENDIAN ? 0 : 64);
+  rtx mem_lower = adjust_address (src, XOmode, BYTES_BIG_ENDIAN ? 64 : 0);
+
+  emit_move_insn (tmp, mem_upper);
+  emit_insn (gen_movtdo_insert512_upper (dest, tmp));
+
+  emit_move_insn (tmp, mem_lower);
+  emit_insn (gen_movtdo_insert512_lower (dest, dest, tmp));
+  DONE;
+}
+  [(set_attr "length" "16")
+   (set_attr "max_prefixed_insns" "2")
+   (set_attr "type" "vecload")])
+
+;; Reload dense math registers to memory
+(define_insn_and_split "reload_dmr_to_memory"
+  [(set (match_operand:TDO 0 "memory_operand" "=m")
+	(unspec:TDO [(match_operand:TDO 1 "dmr_operand" "wD")]
+		    UNSPEC_DMR_RELOAD_TO_MEMORY))
+   (clobber (match_operand:XO 2 "vsx_register_operand" "=wa"))]
+  "TARGET_DENSE_MATH"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx dest = operands[0];
+  rtx src = operands[1];
+  rtx tmp = operands[2];
+  rtx mem_upper = adjust_address (dest, XOmode, BYTES_BIG_ENDIAN ? 0 : 64);
+  rtx mem_lower = adjust_address (dest, XOmode, BYTES_BIG_ENDIAN ? 64 : 0);
+
+  emit_insn (gen_movtdo_extract512 (tmp, src, const0_rtx));
+  emit_move_insn (mem_upper, tmp);
+
+  emit_insn (gen_movtdo_extract512 (tmp, src, const1_rtx));
+  emit_move_insn (mem_lower, tmp);
+  DONE;
+}
+  [(set_attr "length" "16")
+   (set_attr "max_prefixed_insns" "2")])
diff --git a/gcc/config/rs6000/rs6000-builtin.cc b/gcc/config/rs6000/rs6000-builtin.cc
index cf96ec6a869..976a42a74cd 100644
--- a/gcc/config/rs6000/rs6000-builtin.cc
+++ b/gcc/config/rs6000/rs6000-builtin.cc
@@ -495,6 +495,8 @@ const char *rs6000_type_string (tree type_node)
     return "__vector_pair";
   else if (type_node == vector_quad_type_node)
     return "__vector_quad";
+  else if (type_node == dmr_type_node)
+    return "__dmr";
 
   return "unknown";
 }
@@ -781,6 +783,21 @@ rs6000_init_builtins (void)
   t = build_qualified_type (vector_quad_type_node, TYPE_QUAL_CONST);
   ptr_vector_quad_type_node = build_pointer_type (t);
 
+  /* For TDOmode (1,024 bit dense math accumulators), don't use an alignment of
+     1,024, use 512.  TDOmode loads and stores are always broken up into 2
+     vector pair loads or stores.  In addition, we don't have support for
+     aligning the stack to 1,024 bits.  */
+  dmr_type_node = make_node (OPAQUE_TYPE);
+  SET_TYPE_MODE (dmr_type_node, TDOmode);
+  TYPE_SIZE (dmr_type_node) = bitsize_int (GET_MODE_BITSIZE (TDOmode));
+  TYPE_PRECISION (dmr_type_node) = GET_MODE_BITSIZE (TDOmode);
+  TYPE_SIZE_UNIT (dmr_type_node) = size_int (GET_MODE_SIZE (TDOmode));
+  SET_TYPE_ALIGN (dmr_type_node, 512);
+  TYPE_USER_ALIGN (dmr_type_node) = 0;
+  lang_hooks.types.register_builtin_type (dmr_type_node, "__dmr");
+  t = build_qualified_type (dmr_type_node, TYPE_QUAL_CONST);
+  ptr_dmr_type_node = build_pointer_type (t);
+
   tdecl = add_builtin_type ("__bool char", bool_char_type_node);
   TYPE_NAME (bool_char_type_node) = tdecl;
 
diff --git a/gcc/config/rs6000/rs6000-call.cc b/gcc/config/rs6000/rs6000-call.cc
index 8c590903c86..5cda8375902 100644
--- a/gcc/config/rs6000/rs6000-call.cc
+++ b/gcc/config/rs6000/rs6000-call.cc
@@ -437,14 +437,15 @@ rs6000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
   if (cfun
       && !cfun->machine->mma_return_type_error
       && TREE_TYPE (cfun->decl) == fntype
-      && (TYPE_MODE (type) == OOmode || TYPE_MODE (type) == XOmode))
+      && OPAQUE_MODE_P (TYPE_MODE (type)))
     {
       /* Record we have now handled function CFUN, so the next time we
 	 are called, we do not re-report the same error.  */
       cfun->machine->mma_return_type_error = true;
       if (TYPE_CANONICAL (type) != NULL_TREE)
 	type = TYPE_CANONICAL (type);
-      error ("invalid use of MMA type %qs as a function return value",
+      error ("invalid use of %s type %qs as a function return value",
+	     (TYPE_MODE (type) == TDOmode) ? "dense math" : "MMA",
 	     IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type))));
     }
 
@@ -1632,11 +1633,12 @@ rs6000_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
   int n_elts;
 
   /* We do not allow MMA types being used as function arguments.  */
-  if (mode == OOmode || mode == XOmode)
+  if (OPAQUE_MODE_P (mode))
     {
       if (TYPE_CANONICAL (type) != NULL_TREE)
 	type = TYPE_CANONICAL (type);
-      error ("invalid use of MMA operand of type %qs as a function parameter",
+      error ("invalid use of %s operand of type %qs as a function parameter",
+	     (mode == TDOmode) ? "dense math" : "MMA",
 	     IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type))));
       return NULL_RTX;
     }
diff --git a/gcc/config/rs6000/rs6000-modes.def b/gcc/config/rs6000/rs6000-modes.def
index 094b246c834..43d839bf30c 100644
--- a/gcc/config/rs6000/rs6000-modes.def
+++ b/gcc/config/rs6000/rs6000-modes.def
@@ -86,3 +86,7 @@ PARTIAL_INT_MODE (TI, 128, PTI);
 /* Modes used by __vector_pair and __vector_quad.  */
 OPAQUE_MODE (OO, 32);
 OPAQUE_MODE (XO, 64);
+
+/* Mode used by __dmr.  */
+OPAQUE_MODE (TDO, 128);
+
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index e83c507a8d5..c6c61a6f1f7 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -1836,7 +1836,8 @@ rs6000_hard_regno_nregs_internal (int regno, machine_mode mode)
      128-bit floating point that can go in vector registers, which has VSX
      memory addressing.  */
   if (FP_REGNO_P (regno))
-    reg_size = (VECTOR_MEM_VSX_P (mode) || VECTOR_ALIGNMENT_P (mode)
+    reg_size = (VECTOR_MEM_VSX_P (mode)
+		|| VECTOR_ALIGNMENT_P (mode)
 		? UNITS_PER_VSX_WORD
 		: UNITS_PER_FP_WORD);
 
@@ -1870,9 +1871,9 @@ rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode)
   /* On ISA 3.1 (power10), MMA accumulator modes need FPR registers divisible
      by 4.
 
-     If dense math is enabled, allow all VSX registers plus the DMR registers.
-     We need to make sure we don't cross between the boundary of FPRs and
-     traditional Altiviec registers.  */
+     If dense math is enabled, allow all VSX registers plus the dense math
+     registers.  We need to make sure we don't cross between the boundary of
+     FPRs and traditional Altiviec registers.  */
   if (mode == XOmode)
     {
       if (TARGET_MMA)
@@ -1894,7 +1895,27 @@ rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode)
 	return 0;
     }
 
-  /* No other types other than XOmode can go in DMRs.  */
+  /* Dense math register modes need DMR registers or VSX registers divisible by
+     2.  We need to make sure we don't cross between the boundary of FPRs and
+     traditional Altiviec registers.  */
+  if (mode == TDOmode)
+    {
+      if (!TARGET_DENSE_MATH)
+	return 0;
+
+      if (DMR_REGNO_P (regno))
+	return 1;
+
+      if (FP_REGNO_P (regno))
+	return ((regno & 1) == 0 && regno <= LAST_FPR_REGNO - 7);
+
+      if (ALTIVEC_REGNO_P (regno))
+	return ((regno & 1) == 0 && regno <= LAST_ALTIVEC_REGNO - 7);
+
+      return 0;
+    }
+
+  /* No other types other than XOmode or TDOmode can go in DMRs.  */
   if (DMR_REGNO_P (regno))
     return 0;
 
@@ -2002,9 +2023,11 @@ rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
    GPR registers, and TImode can go in any GPR as well as VSX registers (PR
    57744).
 
-   Similarly, don't allow OOmode (vector pair, restricted to even VSX
-   registers) or XOmode (vector quad, restricted to FPR registers divisible
-   by 4) to tie with other modes.
+   Similarly, don't allow OOmode (vector pair), XOmode (vector quad), or
+   TDOmode (dmr register) to pair with anything else.  Vector pairs are
+   restricted to even/odd VSX registers.  Without dense math, vector quads are
+   limited to FPR registers divisible by 4.  With dense math, vector quads are
+   limited to even VSX registers or DMR registers.
 
    Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE
    128-bit floating point on VSX systems ties with other vectors.  */
@@ -2013,7 +2036,8 @@ static bool
 rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2)
 {
   if (mode1 == PTImode || mode1 == OOmode || mode1 == XOmode
-      || mode2 == PTImode || mode2 == OOmode || mode2 == XOmode)
+      || mode1 == TDOmode || mode2 == PTImode || mode2 == OOmode
+      || mode2 == XOmode || mode2 == TDOmode)
     return mode1 == mode2;
 
   if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1))
@@ -2304,6 +2328,7 @@ rs6000_debug_reg_global (void)
     V4DFmode,
     OOmode,
     XOmode,
+    TDOmode,
     CCmode,
     CCUNSmode,
     CCEQmode,
@@ -2669,7 +2694,7 @@ rs6000_setup_reg_addr_masks (void)
 	  /* Special case DMR registers.  */
 	  if (rc == RELOAD_REG_DMR)
 	    {
-	      if (TARGET_DENSE_MATH && m2 == XOmode)
+	      if (TARGET_DENSE_MATH && (m2 == XOmode || m2 == TDOmode))
 		{
 		  addr_mask = RELOAD_REG_VALID;
 		  reg_addr[m].addr_mask[rc] = addr_mask;
@@ -2776,10 +2801,10 @@ rs6000_setup_reg_addr_masks (void)
 
 	  /* Vector pairs can do both indexed and offset loads if the
 	     instructions are enabled, otherwise they can only do offset loads
-	     since it will be broken into two vector moves.  Vector quads can
-	     only do offset loads.  */
+	     since it will be broken into two vector moves.  Vector quads and
+	     dense math 1,024 bit registers can only do offset loads.  */
 	  else if ((addr_mask != 0) && TARGET_MMA
-		   && (m2 == OOmode || m2 == XOmode))
+		   && (m2 == OOmode || m2 == XOmode || m2 == TDOmode))
 	    {
 	      addr_mask |= RELOAD_REG_OFFSET;
 	      if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
@@ -3007,6 +3032,14 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
       rs6000_vector_align[XOmode] = 512;
     }
 
+  /* Add support for 1,024 bit DMR registers.  */
+  if (TARGET_DENSE_MATH)
+    {
+      rs6000_vector_unit[TDOmode] = VECTOR_NONE;
+      rs6000_vector_mem[TDOmode] = VECTOR_VSX;
+      rs6000_vector_align[TDOmode] = 512;
+    }
+
   /* Register class constraints for the constraints that depend on compile
      switches. When the VSX code was added, different constraints were added
      based on the type (DFmode, V2DFmode, V4SFmode).  For the vector types, all
@@ -3219,6 +3252,12 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
 	}
     }
 
+  if (TARGET_DENSE_MATH)
+    {
+      reg_addr[TDOmode].reload_load = CODE_FOR_reload_dmr_from_memory;
+      reg_addr[TDOmode].reload_store = CODE_FOR_reload_dmr_to_memory;
+    }
+
   /* Precalculate HARD_REGNO_NREGS.  */
   for (r = 0; HARD_REGISTER_NUM_P (r); ++r)
     for (m = 0; m < NUM_MACHINE_MODES; ++m)
@@ -8694,12 +8733,15 @@ reg_offset_addressing_ok_p (machine_mode mode)
 	return mode_supports_dq_form (mode);
       break;
 
-      /* The vector pair/quad types support offset addressing if the
-	 underlying vectors support offset addressing.  */
+      /* The vector pair/quad types and the dense math types support offset
+	 addressing if the underlying vectors support offset addressing.  */
     case E_OOmode:
     case E_XOmode:
       return TARGET_MMA;
 
+    case E_TDOmode:
+      return TARGET_DENSE_MATH;
+
     case E_SDmode:
       /* If we can do direct load/stores of SDmode, restrict it to reg+reg
 	 addressing for the LFIWZX and STFIWX instructions.  */
@@ -11248,6 +11290,12 @@ rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
 	       (mode == OOmode) ? "__vector_pair" : "__vector_quad");
       break;
 
+    case E_TDOmode:
+      if (CONST_INT_P (operands[1]))
+	error ("%qs is an opaque type, and you cannot set it to constants",
+	       "__dmr");
+      break;
+
     case E_SImode:
     case E_DImode:
       /* Use default pattern for address of ELF small data */
@@ -12711,7 +12759,7 @@ rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
 
   /* We can transfer between VSX registers and DMR registers without needing
      extra registers.  */
-  if (TARGET_DENSE_MATH && mode == XOmode
+  if (TARGET_DENSE_MATH && (mode == XOmode || mode == TDOmode)
       && ((to_type == DMR_REG_TYPE && from_type == VSX_REG_TYPE)
 	  || (to_type == VSX_REG_TYPE && from_type == DMR_REG_TYPE)))
     return true;
@@ -13512,6 +13560,9 @@ rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
       if (mode == XOmode)
 	return TARGET_MMA_DENSE_MATH ? VSX_REGS : FLOAT_REGS;
 
+      if (mode == TDOmode)
+	return VSX_REGS;
+
       if (GET_MODE_CLASS (mode) == MODE_INT)
 	return GENERAL_REGS;
     }
@@ -20728,6 +20779,8 @@ rs6000_mangle_type (const_tree type)
     return "u13__vector_pair";
   if (type == vector_quad_type_node)
     return "u13__vector_quad";
+  if (type == dmr_type_node)
+    return "u5__dmr";
 
   /* For all other types, use the default mangling.  */
   return NULL;
@@ -22853,6 +22906,10 @@ rs6000_dmr_register_move_cost (machine_mode mode, reg_class_t rclass)
       if (mode == XOmode)
 	return reg_move_base;
 
+      /* __dmr (i.e. TDOmode) is transferred in 2 instructions.  */
+      else if (mode == TDOmode)
+	return reg_move_base * 2;
+
       else
 	return reg_move_base * 2 * hard_regno_nregs (FIRST_DMR_REGNO, mode);
     }
@@ -27540,9 +27597,10 @@ rs6000_split_multireg_move (rtx dst, rtx src)
   mode = GET_MODE (dst);
   nregs = hard_regno_nregs (reg, mode);
 
-  /* If we have a vector quad register for MMA, and this is a load or store,
-     see if we can use vector paired load/stores.  */
-  if (mode == XOmode && TARGET_MMA
+  /* If we have a vector quad register for MMA or DMR register for dense math,
+     and this is a load or store, see if we can use vector paired
+     load/stores.  */
+  if ((mode == XOmode || mode == TDOmode) && TARGET_MMA
       && (MEM_P (dst) || MEM_P (src)))
     {
       reg_mode = OOmode;
@@ -27550,7 +27608,7 @@ rs6000_split_multireg_move (rtx dst, rtx src)
     }
   /* If we have a vector pair/quad mode, split it into two/four separate
      vectors.  */
-  else if (mode == OOmode || mode == XOmode)
+  else if (mode == OOmode || mode == XOmode || mode == TDOmode)
     reg_mode = V1TImode;
   else if (FP_REGNO_P (reg))
     reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
@@ -27596,13 +27654,13 @@ rs6000_split_multireg_move (rtx dst, rtx src)
       return;
     }
 
-  /* The __vector_pair and __vector_quad modes are multi-register
-     modes, so if we have to load or store the registers, we have to be
-     careful to properly swap them if we're in little endian mode
-     below.  This means the last register gets the first memory
-     location.  We also need to be careful of using the right register
-     numbers if we are splitting XO to OO.  */
-  if (mode == OOmode || mode == XOmode)
+  /* The __vector_pair, __vector_quad, and __dmr modes are multi-register
+     modes, so if we have to load or store the registers, we have to be careful
+     to properly swap them if we're in little endian mode below.  This means
+     the last register gets the first memory location.  We also need to be
+     careful of using the right register numbers if we are splitting XO to
+     OO.  */
+  if (mode == OOmode || mode == XOmode || mode == TDOmode)
     {
       nregs = hard_regno_nregs (reg, mode);
       int reg_mode_nregs = hard_regno_nregs (reg, reg_mode);
@@ -27739,7 +27797,7 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 	 overlap.  */
       int i;
       /* XO/OO are opaque so cannot use subregs. */
-      if (mode == OOmode || mode == XOmode )
+      if (mode == OOmode || mode == XOmode || mode == TDOmode)
 	{
 	  for (i = nregs - 1; i >= 0; i--)
 	    {
@@ -27913,7 +27971,7 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 	    continue;
 
 	  /* XO/OO are opaque so cannot use subregs. */
-	  if (mode == OOmode || mode == XOmode )
+	  if (mode == OOmode || mode == XOmode || mode == TDOmode)
 	    {
 	      rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + j);
 	      rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + j);
@@ -28895,7 +28953,8 @@ rs6000_invalid_conversion (const_tree fromtype, const_tree totype)
 
   if (frommode != tomode)
     {
-      /* Do not allow conversions to/from XOmode and OOmode types.  */
+      /* Do not allow conversions to/from XOmode, OOmode, and TDOmode
+	 types.  */
       if (frommode == XOmode)
 	return N_("invalid conversion from type %<__vector_quad%>");
       if (tomode == XOmode)
@@ -28904,6 +28963,10 @@ rs6000_invalid_conversion (const_tree fromtype, const_tree totype)
 	return N_("invalid conversion from type %<__vector_pair%>");
       if (tomode == OOmode)
 	return N_("invalid conversion to type %<__vector_pair%>");
+      if (frommode == TDOmode)
+	return N_("invalid conversion from type %<__dmr%>");
+      if (tomode == TDOmode)
+	return N_("invalid conversion to type %<__dmr%>");
     }
 
   /* Conversion allowed.  */
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index f5d144cbb12..67ef3d3a7d0 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -1009,7 +1009,7 @@ enum data_align { align_abi, align_opt, align_both };
 /* Modes that are not vectors, but require vector alignment.  Treat these like
    vectors in terms of loads and stores.  */
 #define VECTOR_ALIGNMENT_P(MODE)					\
-  (FLOAT128_VECTOR_P (MODE) || (MODE) == OOmode || (MODE) == XOmode)
+  (FLOAT128_VECTOR_P (MODE) || OPAQUE_MODE_P (MODE))
 
 #define ALTIVEC_VECTOR_MODE(MODE)					\
   ((MODE) == V16QImode							\
@@ -2298,6 +2298,7 @@ enum rs6000_builtin_type_index
   RS6000_BTI_const_str,		 /* pointer to const char * */
   RS6000_BTI_vector_pair,	 /* unsigned 256-bit types (vector pair).  */
   RS6000_BTI_vector_quad,	 /* unsigned 512-bit types (vector quad).  */
+  RS6000_BTI_dmr,		 /* unsigned 1,024-bit types (dmr).  */
   RS6000_BTI_const_ptr_void,     /* const pointer to void */
   RS6000_BTI_ptr_V16QI,
   RS6000_BTI_ptr_V1TI,
@@ -2336,6 +2337,7 @@ enum rs6000_builtin_type_index
   RS6000_BTI_ptr_dfloat128,
   RS6000_BTI_ptr_vector_pair,
   RS6000_BTI_ptr_vector_quad,
+  RS6000_BTI_ptr_dmr,
   RS6000_BTI_ptr_long_long,
   RS6000_BTI_ptr_long_long_unsigned,
   RS6000_BTI_MAX
@@ -2393,6 +2395,7 @@ enum rs6000_builtin_type_index
 #define const_str_type_node		 (rs6000_builtin_types[RS6000_BTI_const_str])
 #define vector_pair_type_node		 (rs6000_builtin_types[RS6000_BTI_vector_pair])
 #define vector_quad_type_node		 (rs6000_builtin_types[RS6000_BTI_vector_quad])
+#define dmr_type_node			 (rs6000_builtin_types[RS6000_BTI_dmr])
 #define pcvoid_type_node		 (rs6000_builtin_types[RS6000_BTI_const_ptr_void])
 #define ptr_V16QI_type_node		 (rs6000_builtin_types[RS6000_BTI_ptr_V16QI])
 #define ptr_V1TI_type_node		 (rs6000_builtin_types[RS6000_BTI_ptr_V1TI])
@@ -2431,6 +2434,7 @@ enum rs6000_builtin_type_index
 #define ptr_dfloat128_type_node		 (rs6000_builtin_types[RS6000_BTI_ptr_dfloat128])
 #define ptr_vector_pair_type_node	 (rs6000_builtin_types[RS6000_BTI_ptr_vector_pair])
 #define ptr_vector_quad_type_node	 (rs6000_builtin_types[RS6000_BTI_ptr_vector_quad])
+#define ptr_dmr_type_node		 (rs6000_builtin_types[RS6000_BTI_ptr_dmr])
 #define ptr_long_long_integer_type_node	 (rs6000_builtin_types[RS6000_BTI_ptr_long_long])
 #define ptr_long_long_unsigned_type_node (rs6000_builtin_types[RS6000_BTI_ptr_long_long_unsigned])
 
diff --git a/gcc/testsuite/gcc.target/powerpc/dm-1024bit.c b/gcc/testsuite/gcc.target/powerpc/dm-1024bit.c
new file mode 100644
index 00000000000..0a9884ddf63
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/dm-1024bit.c
@@ -0,0 +1,63 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_dense_math_ok } */
+/* { dg-options "-mdejagnu-cpu=future -O2" } */
+
+/* Test basic load/store for __dmr type.  */
+
+#ifndef CONSTRAINT
+#if defined(USE_D)
+#define CONSTRAINT "d"
+
+#elif defined(USE_V)
+#define CONSTRAINT "v"
+
+#elif defined(USE_WA)
+#define CONSTRAINT "wa"
+
+#else
+#define CONSTRAINT "wD"
+#endif
+#endif
+const char constraint[] = CONSTRAINT;
+
+void foo_mem_asm (__dmr *p, __dmr *q)
+{
+  /* 2 LXVP instructions.  */
+  __dmr vq = *p;
+
+  /* 2 DMXXINSTDMR512 instructions to transfer VSX to DMR.  */
+  __asm__ ("# foo (" CONSTRAINT ") %A0" : "+" CONSTRAINT (vq));
+  /* 2 DMXXEXTFDMR512 instructions to transfer DMR to VSX.  */
+
+  /* 2 STXVP instructions.  */
+  *q = vq;
+}
+
+void foo_mem_asm2 (__dmr *p, __dmr *q)
+{
+  /* 2 LXVP instructions.  */
+  __dmr vq = *p;
+  __dmr vq2;
+  __dmr vq3;
+
+  /* 2 DMXXINSTDMR512 instructions to transfer VSX to DMR.  */
+  __asm__ ("# foo1 (" CONSTRAINT ") %A0" : "+" CONSTRAINT (vq));
+  /* 2 DMXXEXTFDMR512 instructions to transfer DMR to VSX.  */
+
+  vq2 = vq;
+  __asm__ ("# foo2 (wa) %0" : "+wa" (vq2));
+
+  /* 2 STXVP instructions.  */
+  *q = vq2;
+}
+
+void foo_mem (__dmr *p, __dmr *q)
+{
+  /* 2 LXVP, 2 STXVP instructions, no DMR transfer.  */
+  *q = *p;
+}
+
+/* { dg-final { scan-assembler-times {\mdmxxextfdmr512\M}  4 } } */
+/* { dg-final { scan-assembler-times {\mdmxxinstdmr512\M}  4 } } */
+/* { dg-final { scan-assembler-times {\mlxvp\M}           12 } } */
+/* { dg-final { scan-assembler-times {\mstxvp\M}          12 } } */

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [gcc(refs/users/meissner/heads/work161-dmf)] PowerPC: Switch to dense math names for all MMA operations.
@ 2024-03-05  3:59 Michael Meissner
  0 siblings, 0 replies; 10+ messages in thread
From: Michael Meissner @ 2024-03-05  3:59 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:0756633a3b385858deea8afec1e49f42a2b6ec4a

commit 0756633a3b385858deea8afec1e49f42a2b6ec4a
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Mon Mar 4 22:59:07 2024 -0500

    PowerPC: Switch to dense math names for all MMA operations.
    
    This patch changes the assembler instruction names for MMA instructions from
    the original name used in power10 to the new name when used with the dense math
    system.  I.e. xvf64gerpp becomes dmxvf64gerpp.  The assembler will emit the
    same bits for either spelling.
    
    The patches have been tested on both little and big endian systems.  Can I check
    it into the master branch?
    
    For the non-prefixed MMA instructions, we add a 'dm' prefix in front of the
    instruction.  However, the prefixed instructions have a 'pm' prefix, and we add
    the 'dm' prefix afterwards.  To prevent having two sets of parallel int
    attributes, we remove the "pm" prefix from the instruction string in the
    attributes, and add it later, both in the insn name and in the output template.
    
    For example, previously we had
    
      (define_int_attr vvi4i4i8 [(UNSPEC_MMA_PMXVI4GER8 "pmxvi4ger8")])
    
      ;; ...
    
      (define_insn "mma_<vvi4i4i8>"
        [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
              (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa")
                          (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
                          (match_operand:SI 3 "const_0_to_15_operand" "n,n,n")
                          (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
                          (match_operand:SI 5 "u8bit_cint_operand" "n,n,n")]
                          MMA_VVI4I4I8))]
        "TARGET_MMA"
        "<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5"
        [(set_attr "type" "mma")
         (set_attr "prefixed" "yes")
         (set_attr "isa" "dm,not_dm,not_dm")])
    
    And now we have:
    
      (define_int_attr vvi4i4i8 [(UNSPEC_MMA_PMXVI4GER8 "xvi4ger8")])
    
      ;; ...
    
      (define_insn "mma_pm<vvi4i4i8>"
        [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
              (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa")
                          (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
                          (match_operand:SI 3 "const_0_to_15_operand" "n,n,n")
                          (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
                          (match_operand:SI 5 "u8bit_cint_operand" "n,n,n")]
                          MMA_VVI4I4I8))]
        "TARGET_MMA"
        "@
         pmdm<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5
         pm<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5
         pm<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5"
        [(set_attr "type" "mma")
         (set_attr "prefixed" "yes")
         (set_attr "isa" "dm,not_dm,not_dm")])
    
    2024-03-04   Michael Meissner  <meissner@linux.ibm.com>
    
    gcc/
    
            * config/rs6000/mma.md (vvi4i4i8): Change the instruction to not have a
            "pm" prefix.
            (avvi4i4i8): Likewise.
            (vvi4i4i2): Likewise.
            (avvi4i4i2): Likewise.
            (vvi4i4): Likewise.
            (avvi4i4): Likewise.
            (pvi4i2): Likewise.
            (apvi4i2): Likewise.
            (vvi4i4i4): Likewise.
            (avvi4i4i4): Likewise.
            (mma_<vv>): Add support for running on DMF systems, generating the dense
            math instruction and using the dense math accumulators.
            (mma_<pv>): Likewise.
            (mma_<avv>): Likewise.
            (mma_<apv>): Likewise.
            (mma_pm<vvi4i4i8>): Add support for running on DMF systems, generating
            the dense math instruction and using the dense math accumulators.
            Rename the insn with a 'pm' prefix and add either 'pm' or 'pmdm'
            prefixes based on whether we have the original MMA specification or if
            we have dense math support.
            (mma_pm<avvi4i4i8>): Likewise.
            (mma_pm<vvi4i4i2>): Likewise.
            (mma_pm<avvi4i4i2>): Likewise.
            (mma_pm<vvi4i4>): Likewise.
            (mma_pm<avvi4i4): Likewise.
            (mma_pm<pvi4i2>): Likewise.
            (mma_pm<apvi4i2): Likewise.
            (mma_pm<vvi4i4i4>): Likewise.
            (mma_pm<avvi4i4i4>): Likewise.
    
    gcc/testsuite/
    
            * gcc.target/powerpc/dm-double-test.c: New test.
            * lib/target-supports.exp (check_effective_target_ppc_dmr_ok): New
            target test.

Diff:
---
 gcc/config/rs6000/mma.md                          | 368 +++++++++++++---------
 gcc/testsuite/gcc.target/powerpc/dm-double-test.c | 194 ++++++++++++
 gcc/testsuite/lib/target-supports.exp             |  23 ++
 3 files changed, 432 insertions(+), 153 deletions(-)

diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index 8799f4137fa..1a93c60418f 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -224,44 +224,48 @@
 				 (UNSPEC_MMA_XVF64GERNP		"xvf64gernp")
 				 (UNSPEC_MMA_XVF64GERNN		"xvf64gernn")])
 
-(define_int_attr vvi4i4i8	[(UNSPEC_MMA_PMXVI4GER8		"pmxvi4ger8")])
+;; Do not include the "pm" prefix in these instructions.  If we have MMA but we
+;; don't have dense math register support we want to issue the instruction with
+;; a "pm" prefix, but if we have dense math registers, we want to issue it with
+;; a "pmdm" prefix.  I.e. pmxvi4ger8 vs. pmdmxvi4ger8
+(define_int_attr vvi4i4i8	[(UNSPEC_MMA_PMXVI4GER8		"xvi4ger8")])
 
-(define_int_attr avvi4i4i8	[(UNSPEC_MMA_PMXVI4GER8PP	"pmxvi4ger8pp")])
+(define_int_attr avvi4i4i8	[(UNSPEC_MMA_PMXVI4GER8PP	"xvi4ger8pp")])
 
-(define_int_attr vvi4i4i2	[(UNSPEC_MMA_PMXVI16GER2	"pmxvi16ger2")
-				 (UNSPEC_MMA_PMXVI16GER2S	"pmxvi16ger2s")
-				 (UNSPEC_MMA_PMXVF16GER2	"pmxvf16ger2")
-				 (UNSPEC_MMA_PMXVBF16GER2	"pmxvbf16ger2")])
+(define_int_attr vvi4i4i2	[(UNSPEC_MMA_PMXVI16GER2	"xvi16ger2")
+				 (UNSPEC_MMA_PMXVI16GER2S	"xvi16ger2s")
+				 (UNSPEC_MMA_PMXVF16GER2	"xvf16ger2")
+				 (UNSPEC_MMA_PMXVBF16GER2	"xvbf16ger2")])
 
-(define_int_attr avvi4i4i2	[(UNSPEC_MMA_PMXVI16GER2PP	"pmxvi16ger2pp")
-				 (UNSPEC_MMA_PMXVI16GER2SPP	"pmxvi16ger2spp")
-				 (UNSPEC_MMA_PMXVF16GER2PP	"pmxvf16ger2pp")
-				 (UNSPEC_MMA_PMXVF16GER2PN	"pmxvf16ger2pn")
-				 (UNSPEC_MMA_PMXVF16GER2NP	"pmxvf16ger2np")
-				 (UNSPEC_MMA_PMXVF16GER2NN	"pmxvf16ger2nn")
-				 (UNSPEC_MMA_PMXVBF16GER2PP	"pmxvbf16ger2pp")
-				 (UNSPEC_MMA_PMXVBF16GER2PN	"pmxvbf16ger2pn")
-				 (UNSPEC_MMA_PMXVBF16GER2NP	"pmxvbf16ger2np")
-				 (UNSPEC_MMA_PMXVBF16GER2NN	"pmxvbf16ger2nn")])
+(define_int_attr avvi4i4i2	[(UNSPEC_MMA_PMXVI16GER2PP	"xvi16ger2pp")
+				 (UNSPEC_MMA_PMXVI16GER2SPP	"xvi16ger2spp")
+				 (UNSPEC_MMA_PMXVF16GER2PP	"xvf16ger2pp")
+				 (UNSPEC_MMA_PMXVF16GER2PN	"xvf16ger2pn")
+				 (UNSPEC_MMA_PMXVF16GER2NP	"xvf16ger2np")
+				 (UNSPEC_MMA_PMXVF16GER2NN	"xvf16ger2nn")
+				 (UNSPEC_MMA_PMXVBF16GER2PP	"xvbf16ger2pp")
+				 (UNSPEC_MMA_PMXVBF16GER2PN	"xvbf16ger2pn")
+				 (UNSPEC_MMA_PMXVBF16GER2NP	"xvbf16ger2np")
+				 (UNSPEC_MMA_PMXVBF16GER2NN	"xvbf16ger2nn")])
 
-(define_int_attr vvi4i4		[(UNSPEC_MMA_PMXVF32GER		"pmxvf32ger")])
+(define_int_attr vvi4i4		[(UNSPEC_MMA_PMXVF32GER		"xvf32ger")])
 
-(define_int_attr avvi4i4	[(UNSPEC_MMA_PMXVF32GERPP	"pmxvf32gerpp")
-				 (UNSPEC_MMA_PMXVF32GERPN	"pmxvf32gerpn")
-				 (UNSPEC_MMA_PMXVF32GERNP	"pmxvf32gernp")
-				 (UNSPEC_MMA_PMXVF32GERNN	"pmxvf32gernn")])
+(define_int_attr avvi4i4	[(UNSPEC_MMA_PMXVF32GERPP	"xvf32gerpp")
+				 (UNSPEC_MMA_PMXVF32GERPN	"xvf32gerpn")
+				 (UNSPEC_MMA_PMXVF32GERNP	"xvf32gernp")
+				 (UNSPEC_MMA_PMXVF32GERNN	"xvf32gernn")])
 
-(define_int_attr pvi4i2		[(UNSPEC_MMA_PMXVF64GER		"pmxvf64ger")])
+(define_int_attr pvi4i2		[(UNSPEC_MMA_PMXVF64GER		"xvf64ger")])
 
-(define_int_attr apvi4i2	[(UNSPEC_MMA_PMXVF64GERPP	"pmxvf64gerpp")
-				 (UNSPEC_MMA_PMXVF64GERPN	"pmxvf64gerpn")
-				 (UNSPEC_MMA_PMXVF64GERNP	"pmxvf64gernp")
-				 (UNSPEC_MMA_PMXVF64GERNN	"pmxvf64gernn")])
+(define_int_attr apvi4i2	[(UNSPEC_MMA_PMXVF64GERPP	"xvf64gerpp")
+				 (UNSPEC_MMA_PMXVF64GERPN	"xvf64gerpn")
+				 (UNSPEC_MMA_PMXVF64GERNP	"xvf64gernp")
+				 (UNSPEC_MMA_PMXVF64GERNN	"xvf64gernn")])
 
-(define_int_attr vvi4i4i4	[(UNSPEC_MMA_PMXVI8GER4		"pmxvi8ger4")])
+(define_int_attr vvi4i4i4	[(UNSPEC_MMA_PMXVI8GER4		"xvi8ger4")])
 
-(define_int_attr avvi4i4i4	[(UNSPEC_MMA_PMXVI8GER4PP	"pmxvi8ger4pp")
-				 (UNSPEC_MMA_PMXVI8GER4SPP	"pmxvi8ger4spp")])
+(define_int_attr avvi4i4i4	[(UNSPEC_MMA_PMXVI8GER4PP	"xvi8ger4pp")
+				 (UNSPEC_MMA_PMXVI8GER4SPP	"xvi8ger4spp")])
 
 
 ;; Vector pair support.  OOmode can only live in VSRs.
@@ -542,178 +546,236 @@
 ;; UNSPEC_VOLATILE for the non-dense math case.
 
 (define_insn "mma_xxsetaccz"
-  [(set (match_operand:XO 0 "accumulator_operand" "=wD")
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD,wD")
 	(unspec_volatile:XO [(const_int 0)]
 			    UNSPECV_MMA_XXSETACCZ))]
   "TARGET_MMA"
-  "xxsetaccz %A0"
-  [(set_attr "type" "mma")])
+  "@
+   dmsetdmrz %A0
+   xxsetaccz %A0"
+  [(set_attr "type" "mma")
+   (set_attr "isa" "dm,not_dm")])
 
 (define_insn "mma_<vv>"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
-	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
-		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD")
+	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")]
 		    MMA_VV))]
   "TARGET_MMA"
-  "<vv> %A0,%x1,%x2"
-  [(set_attr "type" "mma")])
+  "@
+   dm<vv> %A0,%x1,%x2
+   <vv> %A0,%x1,%x2
+   <vv> %A0,%x1,%x2"
+  [(set_attr "type" "mma")
+   (set_attr "isa" "dm,not_dm,not_dm")])
 
 (define_insn "mma_<avv>"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
-	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
-		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
-		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa")]
 		    MMA_AVV))]
   "TARGET_MMA"
-  "<avv> %A0,%x2,%x3"
-  [(set_attr "type" "mma")])
+  "@
+   dm<avv> %A0,%x2,%x3
+   <avv> %A0,%x2,%x3
+   <avv> %A0,%x2,%x3"
+  [(set_attr "type" "mma")
+   (set_attr "isa" "dm,not_dm,not_dm")])
 
 (define_insn "mma_<pv>"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
-	(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa")
-		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD")
+	(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")]
 		    MMA_PV))]
   "TARGET_MMA"
-  "<pv> %A0,%x1,%x2"
-  [(set_attr "type" "mma")])
+  "@
+   dm<pv> %A0,%x1,%x2
+   <pv> %A0,%x1,%x2
+   <pv> %A0,%x1,%x2"
+  [(set_attr "type" "mma")
+   (set_attr "isa" "dm,not_dm,not_dm")])
 
 (define_insn "mma_<apv>"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
-	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
-		    (match_operand:OO 2 "vsx_register_operand" "v,?wa")
-		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0")
+		    (match_operand:OO 2 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa")]
 		    MMA_APV))]
   "TARGET_MMA"
-  "<apv> %A0,%x2,%x3"
-  [(set_attr "type" "mma")])
-
-(define_insn "mma_<vvi4i4i8>"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
-	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
-		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
-		    (match_operand:SI 3 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 5 "u8bit_cint_operand" "n,n")]
+  "@
+   dm<apv> %A0,%x2,%x3
+   <apv> %A0,%x2,%x3
+   <apv> %A0,%x2,%x3"
+  [(set_attr "type" "mma")
+   (set_attr "isa" "dm,not_dm,not_dm")])
+
+(define_insn "mma_pm<vvi4i4i8>"
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD")
+	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:SI 3 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 5 "u8bit_cint_operand" "n,n,n")]
 		    MMA_VVI4I4I8))]
   "TARGET_MMA"
-  "<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5"
+  "@
+   pmdm<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5
+   pm<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5
+   pm<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5"
   [(set_attr "type" "mma")
-   (set_attr "prefixed" "yes")])
-
-(define_insn "mma_<avvi4i4i8>"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
-	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
-		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
-		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
-		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 5 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 6 "u8bit_cint_operand" "n,n")]
+   (set_attr "prefixed" "yes")
+   (set_attr "isa" "dm,not_dm,not_dm")])
+
+(define_insn "mma_pm<avvi4i4i8>"
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 5 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 6 "u8bit_cint_operand" "n,n,n")]
 		    MMA_AVVI4I4I8))]
   "TARGET_MMA"
-  "<avvi4i4i8> %A0,%x2,%x3,%4,%5,%6"
+  "@
+   pmdm<avvi4i4i8> %A0,%x2,%x3,%4,%5,%6
+   pm<avvi4i4i8> %A0,%x2,%x3,%4,%5,%6
+   pm<avvi4i4i8> %A0,%x2,%x3,%4,%5,%6"
   [(set_attr "type" "mma")
-   (set_attr "prefixed" "yes")])
-
-(define_insn "mma_<vvi4i4i2>"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
-	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
-		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
-		    (match_operand:SI 3 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 5 "const_0_to_3_operand" "n,n")]
+   (set_attr "prefixed" "yes")
+   (set_attr "isa" "dm,not_dm,not_dm")])
+
+(define_insn "mma_pm<vvi4i4i2>"
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD")
+	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:SI 3 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 5 "const_0_to_3_operand" "n,n,n")]
 		    MMA_VVI4I4I2))]
   "TARGET_MMA"
-  "<vvi4i4i2> %A0,%x1,%x2,%3,%4,%5"
+  "@
+   pmdm<vvi4i4i2> %A0,%x1,%x2,%3,%4,%5
+   pm<vvi4i4i2> %A0,%x1,%x2,%3,%4,%5
+   pm<vvi4i4i2> %A0,%x1,%x2,%3,%4,%5"
   [(set_attr "type" "mma")
    (set_attr "prefixed" "yes")])
 
-(define_insn "mma_<avvi4i4i2>"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
-	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
-		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
-		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
-		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 5 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 6 "const_0_to_3_operand" "n,n")]
+(define_insn "mma_pm<avvi4i4i2>"
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 5 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 6 "const_0_to_3_operand" "n,n,n")]
 		    MMA_AVVI4I4I2))]
   "TARGET_MMA"
-  "<avvi4i4i2> %A0,%x2,%x3,%4,%5,%6"
+  "
+   pmdm<avvi4i4i2> %A0,%x2,%x3,%4,%5,%6
+   pm<avvi4i4i2> %A0,%x2,%x3,%4,%5,%6
+   pm<avvi4i4i2> %A0,%x2,%x3,%4,%5,%6"
   [(set_attr "type" "mma")
-   (set_attr "prefixed" "yes")])
-
-(define_insn "mma_<vvi4i4>"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
-	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
-		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
-		    (match_operand:SI 3 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")]
+   (set_attr "prefixed" "yes")
+   (set_attr "isa" "dm,not_dm,not_dm")])
+
+(define_insn "mma_pm<vvi4i4>"
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD")
+	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:SI 3 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")]
 		    MMA_VVI4I4))]
   "TARGET_MMA"
-  "<vvi4i4> %A0,%x1,%x2,%3,%4"
+  "@
+   pmdm<vvi4i4> %A0,%x1,%x2,%3,%4
+   pm<vvi4i4> %A0,%x1,%x2,%3,%4
+   pm<vvi4i4> %A0,%x1,%x2,%3,%4"
   [(set_attr "type" "mma")
-   (set_attr "prefixed" "yes")])
-
-(define_insn "mma_<avvi4i4>"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
-	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
-		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
-		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
-		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 5 "const_0_to_15_operand" "n,n")]
+   (set_attr "prefixed" "yes")
+   (set_attr "isa" "dm,not_dm,not_dm")])
+
+(define_insn "mma_pm<avvi4i4>"
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 5 "const_0_to_15_operand" "n,n,n")]
 		    MMA_AVVI4I4))]
   "TARGET_MMA"
-  "<avvi4i4> %A0,%x2,%x3,%4,%5"
+  "@
+   pmdm<avvi4i4> %A0,%x2,%x3,%4,%5
+   pm<avvi4i4> %A0,%x2,%x3,%4,%5
+   pm<avvi4i4> %A0,%x2,%x3,%4,%5"
   [(set_attr "type" "mma")
-   (set_attr "prefixed" "yes")])
-
-(define_insn "mma_<pvi4i2>"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
-	(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa")
-		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
-		    (match_operand:SI 3 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 4 "const_0_to_3_operand" "n,n")]
+   (set_attr "prefixed" "yes")
+   (set_attr "isa" "dm,not_dm,not_dm")])
+
+(define_insn "mma_pm<pvi4i2>"
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD")
+	(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:SI 3 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 4 "const_0_to_3_operand" "n,n,n")]
 		    MMA_PVI4I2))]
   "TARGET_MMA"
-  "<pvi4i2> %A0,%x1,%x2,%3,%4"
+  "@
+   pmdm<pvi4i2> %A0,%x1,%x2,%3,%4
+   pm<pvi4i2> %A0,%x1,%x2,%3,%4
+   pm<pvi4i2> %A0,%x1,%x2,%3,%4"
   [(set_attr "type" "mma")
-   (set_attr "prefixed" "yes")])
-
-(define_insn "mma_<apvi4i2>"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
-	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
-		    (match_operand:OO 2 "vsx_register_operand" "v,?wa")
-		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
-		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 5 "const_0_to_3_operand" "n,n")]
+   (set_attr "prefixed" "yes")
+   (set_attr "isa" "dm,not_dm,not_dm")])
+
+(define_insn "mma_pm<apvi4i2>"
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0")
+		    (match_operand:OO 2 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 5 "const_0_to_3_operand" "n,n,n")]
 		    MMA_APVI4I2))]
   "TARGET_MMA"
-  "<apvi4i2> %A0,%x2,%x3,%4,%5"
+  "@
+   pmdm<apvi4i2> %A0,%x2,%x3,%4,%5
+   pm<apvi4i2> %A0,%x2,%x3,%4,%5
+   pm<apvi4i2> %A0,%x2,%x3,%4,%5"
   [(set_attr "type" "mma")
-   (set_attr "prefixed" "yes")])
-
-(define_insn "mma_<vvi4i4i4>"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
-	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
-		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
-		    (match_operand:SI 3 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 5 "const_0_to_15_operand" "n,n")]
+   (set_attr "prefixed" "yes")
+   (set_attr "isa" "dm,not_dm,not_dm")])
+
+(define_insn "mma_pm<vvi4i4i4>"
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD")
+	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:SI 3 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 5 "const_0_to_15_operand" "n,n,n")]
 		    MMA_VVI4I4I4))]
   "TARGET_MMA"
-  "<vvi4i4i4> %A0,%x1,%x2,%3,%4,%5"
+  "@
+   pmdm<vvi4i4i4> %A0,%x1,%x2,%3,%4,%5
+   pm<vvi4i4i4> %A0,%x1,%x2,%3,%4,%5
+   pm<vvi4i4i4> %A0,%x1,%x2,%3,%4,%5"
   [(set_attr "type" "mma")
-   (set_attr "prefixed" "yes")])
-
-(define_insn "mma_<avvi4i4i4>"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
-	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
-		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
-		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
-		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 5 "const_0_to_15_operand" "n,n")
-		    (match_operand:SI 6 "const_0_to_15_operand" "n,n")]
+   (set_attr "prefixed" "yes")
+   (set_attr "isa" "dm,not_dm,not_dm")])
+
+(define_insn "mma_pm<avvi4i4i4>"
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 5 "const_0_to_15_operand" "n,n,n")
+		    (match_operand:SI 6 "const_0_to_15_operand" "n,n,n")]
 		    MMA_AVVI4I4I4))]
   "TARGET_MMA"
-  "<avvi4i4i4> %A0,%x2,%x3,%4,%5,%6"
+  "@
+   pmdm<avvi4i4i4> %A0,%x2,%x3,%4,%5,%6
+   pm<avvi4i4i4> %A0,%x2,%x3,%4,%5,%6
+   pm<avvi4i4i4> %A0,%x2,%x3,%4,%5,%6"
   [(set_attr "type" "mma")
-   (set_attr "prefixed" "yes")])
+   (set_attr "prefixed" "yes")
+   (set_attr "isa" "dm,not_dm,not_dm")])
diff --git a/gcc/testsuite/gcc.target/powerpc/dm-double-test.c b/gcc/testsuite/gcc.target/powerpc/dm-double-test.c
new file mode 100644
index 00000000000..66c19779585
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/dm-double-test.c
@@ -0,0 +1,194 @@
+/* Test derived from mma-double-1.c, modified for dense math.  */
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_dense_math_ok } */
+/* { dg-options "-mdejagnu-cpu=future -O2" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <altivec.h>
+
+typedef unsigned char vec_t __attribute__ ((vector_size (16)));
+typedef double v4sf_t __attribute__ ((vector_size (16)));
+#define SAVE_ACC(ACC, ldc, J)  \
+	  __builtin_mma_disassemble_acc (result, ACC); \
+	  rowC = (v4sf_t *) &CO[0*ldc+J]; \
+          rowC[0] += result[0]; \
+          rowC = (v4sf_t *) &CO[1*ldc+J]; \
+          rowC[0] += result[1]; \
+          rowC = (v4sf_t *) &CO[2*ldc+J]; \
+          rowC[0] += result[2]; \
+          rowC = (v4sf_t *) &CO[3*ldc+J]; \
+	  rowC[0] += result[3];
+
+void
+DM (int m, int n, int k, double *A, double *B, double *C)
+{
+  __vector_quad acc0, acc1, acc2, acc3, acc4, acc5, acc6, acc7;
+  v4sf_t result[4];
+  v4sf_t *rowC;
+  for (int l = 0; l < n; l += 4)
+    {
+      double *CO;
+      double *AO;
+      AO = A;
+      CO = C;
+      C += m * 4;
+      for (int j = 0; j < m; j += 16)
+	{
+	  double *BO = B;
+	  __builtin_mma_xxsetaccz (&acc0);
+	  __builtin_mma_xxsetaccz (&acc1);
+	  __builtin_mma_xxsetaccz (&acc2);
+	  __builtin_mma_xxsetaccz (&acc3);
+	  __builtin_mma_xxsetaccz (&acc4);
+	  __builtin_mma_xxsetaccz (&acc5);
+	  __builtin_mma_xxsetaccz (&acc6);
+	  __builtin_mma_xxsetaccz (&acc7);
+	  unsigned long i;
+
+	  for (i = 0; i < k; i++)
+	    {
+	      vec_t *rowA = (vec_t *) & AO[i * 16];
+	      __vector_pair rowB;
+	      vec_t *rb = (vec_t *) & BO[i * 4];
+	      __builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
+	      __builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
+	      __builtin_mma_xvf64gerpp (&acc1, rowB, rowA[1]);
+	      __builtin_mma_xvf64gerpp (&acc2, rowB, rowA[2]);
+	      __builtin_mma_xvf64gerpp (&acc3, rowB, rowA[3]);
+	      __builtin_mma_xvf64gerpp (&acc4, rowB, rowA[4]);
+	      __builtin_mma_xvf64gerpp (&acc5, rowB, rowA[5]);
+	      __builtin_mma_xvf64gerpp (&acc6, rowB, rowA[6]);
+	      __builtin_mma_xvf64gerpp (&acc7, rowB, rowA[7]);
+	    }
+	  SAVE_ACC (&acc0, m, 0);
+	  SAVE_ACC (&acc2, m, 4);
+	  SAVE_ACC (&acc1, m, 2);
+	  SAVE_ACC (&acc3, m, 6);
+	  SAVE_ACC (&acc4, m, 8);
+	  SAVE_ACC (&acc6, m, 12);
+	  SAVE_ACC (&acc5, m, 10);
+	  SAVE_ACC (&acc7, m, 14);
+	  AO += k * 16;
+	  BO += k * 4;
+	  CO += 16;
+	}
+      B += k * 4;
+    }
+}
+
+void
+init (double *matrix, int row, int column)
+{
+  for (int j = 0; j < column; j++)
+    {
+      for (int i = 0; i < row; i++)
+	{
+	  matrix[j * row + i] = (i * 16 + 2 + j) / 0.123;
+	}
+    }
+}
+
+void
+init0 (double *matrix, double *matrix1, int row, int column)
+{
+  for (int j = 0; j < column; j++)
+    for (int i = 0; i < row; i++)
+      matrix[j * row + i] = matrix1[j * row + i] = 0;
+}
+
+
+void
+print (const char *name, const double *matrix, int row, int column)
+{
+  printf ("Matrix %s has %d rows and %d columns:\n", name, row, column);
+  for (int i = 0; i < row; i++)
+    {
+      for (int j = 0; j < column; j++)
+	{
+	  printf ("%f ", matrix[j * row + i]);
+	}
+      printf ("\n");
+    }
+  printf ("\n");
+}
+
+int
+main (int argc, char *argv[])
+{
+  int rowsA, colsB, common;
+  int i, j, k;
+  int ret = 0;
+
+  for (int t = 16; t <= 128; t += 16)
+    {
+      for (int t1 = 4; t1 <= 16; t1 += 4)
+	{
+	  rowsA = t;
+	  colsB = t1;
+	  common = 1;
+	  /* printf ("Running test for rows = %d,cols = %d\n", t, t1); */
+	  double A[rowsA * common];
+	  double B[common * colsB];
+	  double C[rowsA * colsB];
+	  double D[rowsA * colsB];
+
+
+	  init (A, rowsA, common);
+	  init (B, common, colsB);
+	  init0 (C, D, rowsA, colsB);
+	  DM (rowsA, colsB, common, A, B, C);
+
+	  for (i = 0; i < colsB; i++)
+	    {
+	      for (j = 0; j < rowsA; j++)
+		{
+		  D[i * rowsA + j] = 0;
+		  for (k = 0; k < common; k++)
+		    {
+		      D[i * rowsA + j] +=
+			A[k * rowsA + j] * B[k + common * i];
+		    }
+		}
+	    }
+	  for (i = 0; i < colsB; i++)
+	    {
+	      for (j = 0; j < rowsA; j++)
+		{
+		  for (k = 0; k < common; k++)
+		    {
+		      if (D[i * rowsA + j] != C[i * rowsA + j])
+			{
+			  printf ("Error %d,%d,%d\n",i,j,k);
+			  ret++;
+			}
+		    }
+		}
+	    }
+	  if (ret)
+	    {
+	      print ("A", A, rowsA, common);
+	      print ("B", B, common, colsB);
+	      print ("C", C, rowsA, colsB);
+	      print ("D", D, rowsA, colsB);
+	    }
+	}
+    }
+  
+#ifdef VERBOSE
+  if (ret)
+    printf ("DM double test fail: %d errors\n",ret);
+  else
+    printf ("DM double test success: 0 DM errors\n");
+#else
+  if (ret)
+    abort();
+#endif
+      
+  return ret;
+}
+
+/* { dg-final { scan-assembler {\mdmsetdmrz\M}      } } */
+/* { dg-final { scan-assembler {\mdmxvf64gerpp\M}   } } */
+/* { dg-final { scan-assembler {\mdmxxextfdmr512\M} } } */
+
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index e23d3ec8b3c..54742a95142 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -7116,6 +7116,29 @@ proc check_effective_target_power11_ok { } {
 		return 0;
 	    }
 	} "-mcpu=power11"]
+    } else {
+	return 0;
+    }
+}
+
+# Return 1 if this is a PowerPC target supporting -mcpu=future which enables
+# the dense math operations.
+proc check_effective_target_powerpc_dense_math_ok { } {
+	return [check_no_compiler_messages_nocache powerpc_dense_math_ok assembly {
+		__vector_quad vq;
+		void test (void)
+		{
+		#ifndef __PPC_DMR__
+		#error "target does not have dense math support."
+		#else
+		/* Make sure we have dense math support.  */
+		  __vector_quad dmr;
+		  __asm__ ("dmsetaccz %A0" : "=wD" (dmr));
+		  vq = dmr;
+		#endif
+		return 0;
+	    }
+	} "-mcpu=power11"]
     } else {
 	return 0
     }

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [gcc(refs/users/meissner/heads/work161-dmf)] PowerPC: Switch to dense math names for all MMA operations.
@ 2024-02-29 19:18 Michael Meissner
  0 siblings, 0 replies; 10+ messages in thread
From: Michael Meissner @ 2024-02-29 19:18 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:c0f4153dd7c5d8d05df3b360fd6fcbaa9d5d44d9

commit c0f4153dd7c5d8d05df3b360fd6fcbaa9d5d44d9
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Thu Feb 29 13:54:17 2024 -0500

    PowerPC: Switch to dense math names for all MMA operations.
    
    This patch changes the assembler instruction names for MMA instructions from
    the original name used in power10 to the new name when used with the dense math
    system.  I.e. xvf64gerpp becomes dmxvf64gerpp.  The assembler will emit the
    same bits for either spelling.
    
    The patches have been tested on both little and big endian systems.  Can I check
    it into the master branch?
    
    For the non-prefixed MMA instructions, we add a 'dm' prefix in front of the
    instruction.  However, the prefixed instructions have a 'pm' prefix, and we add
    the 'dm' prefix afterwards.  To prevent having two sets of parallel int
    attributes, we remove the "pm" prefix from the instruction string in the
    attributes, and add it later, both in the insn name and in the output template.
    
    For example, previously we had
    
      (define_int_attr vvi4i4i8 [(UNSPEC_MMA_PMXVI4GER8 "pmxvi4ger8")])
    
      ;; ...
    
      (define_insn "mma_<vvi4i4i8>"
        [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
              (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa")
                          (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
                          (match_operand:SI 3 "const_0_to_15_operand" "n,n,n")
                          (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
                          (match_operand:SI 5 "u8bit_cint_operand" "n,n,n")]
                          MMA_VVI4I4I8))]
        "TARGET_MMA"
        "<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5"
        [(set_attr "type" "mma")
         (set_attr "prefixed" "yes")
         (set_attr "isa" "dm,not_dm,not_dm")])
    
    And now we have:
    
      (define_int_attr vvi4i4i8 [(UNSPEC_MMA_PMXVI4GER8 "xvi4ger8")])
    
      ;; ...
    
      (define_insn "mma_pm<vvi4i4i8>"
        [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
              (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa")
                          (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
                          (match_operand:SI 3 "const_0_to_15_operand" "n,n,n")
                          (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
                          (match_operand:SI 5 "u8bit_cint_operand" "n,n,n")]
                          MMA_VVI4I4I8))]
        "TARGET_MMA"
        "@
         pmdm<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5
         pm<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5
         pm<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5"
        [(set_attr "type" "mma")
         (set_attr "prefixed" "yes")
         (set_attr "isa" "dm,not_dm,not_dm")])
    
    2024-02-29   Michael Meissner  <meissner@linux.ibm.com>
    
    gcc/
    
            * config/rs6000/mma.md (vvi4i4i8): Change the instruction to not have a
            "pm" prefix.
            (avvi4i4i8): Likewise.
            (vvi4i4i2): Likewise.
            (avvi4i4i2): Likewise.
            (vvi4i4): Likewise.
            (avvi4i4): Likewise.
            (pvi4i2): Likewise.
            (apvi4i2): Likewise.
            (vvi4i4i4): Likewise.
            (avvi4i4i4): Likewise.
            (mma_<vv>): Add support for running on DMF systems, generating the dense
            math instruction and using the dense math accumulators.
            (mma_<pv>): Likewise.
            (mma_<avv>): Likewise.
            (mma_<apv>): Likewise.
            (mma_pm<vvi4i4i8>): Add support for running on DMF systems, generating
            the dense math instruction and using the dense math accumulators.
            Rename the insn with a 'pm' prefix and add either 'pm' or 'pmdm'
            prefixes based on whether we have the original MMA specification or if
            we have dense math support.
            (mma_pm<avvi4i4i8>): Likewise.
            (mma_pm<vvi4i4i2>): Likewise.
            (mma_pm<avvi4i4i2>): Likewise.
            (mma_pm<vvi4i4>): Likewise.
            (mma_pm<avvi4i4): Likewise.
            (mma_pm<pvi4i2>): Likewise.
            (mma_pm<apvi4i2): Likewise.
            (mma_pm<vvi4i4i4>): Likewise.
            (mma_pm<avvi4i4i4>): Likewise.
    
    gcc/testsuite/
    
            * gcc.target/powerpc/dm-double-test.c: New test.
            * lib/target-supports.exp (check_effective_target_ppc_dmr_ok): New
            target test.

Diff:
---
 gcc/config/rs6000/mma.md                          | 147 ++++++++++------
 gcc/testsuite/gcc.target/powerpc/dm-double-test.c | 194 ++++++++++++++++++++++
 gcc/testsuite/lib/target-supports.exp             |  19 +++
 3 files changed, 308 insertions(+), 52 deletions(-)

diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index ac26de93143..df329b00f15 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -225,44 +225,48 @@
 				 (UNSPEC_MMA_XVF64GERNP		"xvf64gernp")
 				 (UNSPEC_MMA_XVF64GERNN		"xvf64gernn")])
 
-(define_int_attr vvi4i4i8	[(UNSPEC_MMA_PMXVI4GER8		"pmxvi4ger8")])
+;; Do not include the "pm" prefix in these instructions.  If we have MMA but we
+;; don't have dense math register support we want to issue the instruction with
+;; a "pm" prefix, but if we have dense math registers, we want to issue it with
+;; a "pmdm" prefix.  I.e. pmxvi4ger8 vs. pmdmxvi4ger8
+(define_int_attr vvi4i4i8	[(UNSPEC_MMA_PMXVI4GER8		"xvi4ger8")])
 
-(define_int_attr avvi4i4i8	[(UNSPEC_MMA_PMXVI4GER8PP	"pmxvi4ger8pp")])
+(define_int_attr avvi4i4i8	[(UNSPEC_MMA_PMXVI4GER8PP	"xvi4ger8pp")])
 
-(define_int_attr vvi4i4i2	[(UNSPEC_MMA_PMXVI16GER2	"pmxvi16ger2")
-				 (UNSPEC_MMA_PMXVI16GER2S	"pmxvi16ger2s")
-				 (UNSPEC_MMA_PMXVF16GER2	"pmxvf16ger2")
-				 (UNSPEC_MMA_PMXVBF16GER2	"pmxvbf16ger2")])
+(define_int_attr vvi4i4i2	[(UNSPEC_MMA_PMXVI16GER2	"xvi16ger2")
+				 (UNSPEC_MMA_PMXVI16GER2S	"xvi16ger2s")
+				 (UNSPEC_MMA_PMXVF16GER2	"xvf16ger2")
+				 (UNSPEC_MMA_PMXVBF16GER2	"xvbf16ger2")])
 
-(define_int_attr avvi4i4i2	[(UNSPEC_MMA_PMXVI16GER2PP	"pmxvi16ger2pp")
-				 (UNSPEC_MMA_PMXVI16GER2SPP	"pmxvi16ger2spp")
-				 (UNSPEC_MMA_PMXVF16GER2PP	"pmxvf16ger2pp")
-				 (UNSPEC_MMA_PMXVF16GER2PN	"pmxvf16ger2pn")
-				 (UNSPEC_MMA_PMXVF16GER2NP	"pmxvf16ger2np")
-				 (UNSPEC_MMA_PMXVF16GER2NN	"pmxvf16ger2nn")
-				 (UNSPEC_MMA_PMXVBF16GER2PP	"pmxvbf16ger2pp")
-				 (UNSPEC_MMA_PMXVBF16GER2PN	"pmxvbf16ger2pn")
-				 (UNSPEC_MMA_PMXVBF16GER2NP	"pmxvbf16ger2np")
-				 (UNSPEC_MMA_PMXVBF16GER2NN	"pmxvbf16ger2nn")])
+(define_int_attr avvi4i4i2	[(UNSPEC_MMA_PMXVI16GER2PP	"xvi16ger2pp")
+				 (UNSPEC_MMA_PMXVI16GER2SPP	"xvi16ger2spp")
+				 (UNSPEC_MMA_PMXVF16GER2PP	"xvf16ger2pp")
+				 (UNSPEC_MMA_PMXVF16GER2PN	"xvf16ger2pn")
+				 (UNSPEC_MMA_PMXVF16GER2NP	"xvf16ger2np")
+				 (UNSPEC_MMA_PMXVF16GER2NN	"xvf16ger2nn")
+				 (UNSPEC_MMA_PMXVBF16GER2PP	"xvbf16ger2pp")
+				 (UNSPEC_MMA_PMXVBF16GER2PN	"xvbf16ger2pn")
+				 (UNSPEC_MMA_PMXVBF16GER2NP	"xvbf16ger2np")
+				 (UNSPEC_MMA_PMXVBF16GER2NN	"xvbf16ger2nn")])
 
-(define_int_attr vvi4i4		[(UNSPEC_MMA_PMXVF32GER		"pmxvf32ger")])
+(define_int_attr vvi4i4		[(UNSPEC_MMA_PMXVF32GER		"xvf32ger")])
 
-(define_int_attr avvi4i4	[(UNSPEC_MMA_PMXVF32GERPP	"pmxvf32gerpp")
-				 (UNSPEC_MMA_PMXVF32GERPN	"pmxvf32gerpn")
-				 (UNSPEC_MMA_PMXVF32GERNP	"pmxvf32gernp")
-				 (UNSPEC_MMA_PMXVF32GERNN	"pmxvf32gernn")])
+(define_int_attr avvi4i4	[(UNSPEC_MMA_PMXVF32GERPP	"xvf32gerpp")
+				 (UNSPEC_MMA_PMXVF32GERPN	"xvf32gerpn")
+				 (UNSPEC_MMA_PMXVF32GERNP	"xvf32gernp")
+				 (UNSPEC_MMA_PMXVF32GERNN	"xvf32gernn")])
 
-(define_int_attr pvi4i2		[(UNSPEC_MMA_PMXVF64GER		"pmxvf64ger")])
+(define_int_attr pvi4i2		[(UNSPEC_MMA_PMXVF64GER		"xvf64ger")])
 
-(define_int_attr apvi4i2	[(UNSPEC_MMA_PMXVF64GERPP	"pmxvf64gerpp")
-				 (UNSPEC_MMA_PMXVF64GERPN	"pmxvf64gerpn")
-				 (UNSPEC_MMA_PMXVF64GERNP	"pmxvf64gernp")
-				 (UNSPEC_MMA_PMXVF64GERNN	"pmxvf64gernn")])
+(define_int_attr apvi4i2	[(UNSPEC_MMA_PMXVF64GERPP	"xvf64gerpp")
+				 (UNSPEC_MMA_PMXVF64GERPN	"xvf64gerpn")
+				 (UNSPEC_MMA_PMXVF64GERNP	"xvf64gernp")
+				 (UNSPEC_MMA_PMXVF64GERNN	"xvf64gernn")])
 
-(define_int_attr vvi4i4i4	[(UNSPEC_MMA_PMXVI8GER4		"pmxvi8ger4")])
+(define_int_attr vvi4i4i4	[(UNSPEC_MMA_PMXVI8GER4		"xvi8ger4")])
 
-(define_int_attr avvi4i4i4	[(UNSPEC_MMA_PMXVI8GER4PP	"pmxvi8ger4pp")
-				 (UNSPEC_MMA_PMXVI8GER4SPP	"pmxvi8ger4spp")])
+(define_int_attr avvi4i4i4	[(UNSPEC_MMA_PMXVI8GER4PP	"xvi8ger4pp")
+				 (UNSPEC_MMA_PMXVI8GER4SPP	"xvi8ger4spp")])
 
 
 ;; Vector pair support.  OOmode can only live in VSRs.
@@ -622,7 +626,10 @@
 		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")]
 		    MMA_VV))]
   "TARGET_MMA"
-  "<vv> %A0,%x1,%x2"
+  "@
+   dm<vv> %A0,%x1,%x2
+   <vv> %A0,%x1,%x2
+   <vv> %A0,%x1,%x2"
   [(set_attr "type" "mma")
    (set_attr "isa" "dm,not_dm,not_dm")])
 
@@ -643,7 +650,10 @@
 		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")]
 		    MMA_PV))]
   "TARGET_MMA"
-  "<pv> %A0,%x1,%x2"
+  "@
+   dm<pv> %A0,%x1,%x2
+   <pv> %A0,%x1,%x2
+   <pv> %A0,%x1,%x2"
   [(set_attr "type" "mma")
    (set_attr "isa" "dm,not_dm,not_dm")])
 
@@ -654,11 +664,14 @@
 		    (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa")]
 		    MMA_APV))]
   "TARGET_MMA"
-  "<apv> %A0,%x2,%x3"
+  "@
+   dm<apv> %A0,%x2,%x3
+   <apv> %A0,%x2,%x3
+   <apv> %A0,%x2,%x3"
   [(set_attr "type" "mma")
    (set_attr "isa" "dm,not_dm,not_dm")])
 
-(define_insn "mma_<vvi4i4i8>"
+(define_insn "mma_pm<vvi4i4i8>"
   [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
 	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa")
 		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
@@ -667,12 +680,15 @@
 		    (match_operand:SI 5 "u8bit_cint_operand" "n,n,n")]
 		    MMA_VVI4I4I8))]
   "TARGET_MMA"
-  "<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5"
+  "@
+   pmdm<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5
+   pm<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5
+   pm<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5"
   [(set_attr "type" "mma")
    (set_attr "prefixed" "yes")
    (set_attr "isa" "dm,not_dm,not_dm")])
 
-(define_insn "mma_<avvi4i4i8>"
+(define_insn "mma_pm<avvi4i4i8>"
   [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
 	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0")
 		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
@@ -682,12 +698,15 @@
 		    (match_operand:SI 6 "u8bit_cint_operand" "n,n,n")]
 		    MMA_AVVI4I4I8))]
   "TARGET_MMA"
-  "<avvi4i4i8> %A0,%x2,%x3,%4,%5,%6"
+  "@
+   pmdm<avvi4i4i8> %A0,%x2,%x3,%4,%5,%6
+   pm<avvi4i4i8> %A0,%x2,%x3,%4,%5,%6
+   pm<avvi4i4i8> %A0,%x2,%x3,%4,%5,%6"
   [(set_attr "type" "mma")
    (set_attr "prefixed" "yes")
    (set_attr "isa" "dm,not_dm,not_dm")])
 
-(define_insn "mma_<vvi4i4i2>"
+(define_insn "mma_pm<vvi4i4i2>"
   [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
 	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa")
 		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
@@ -696,12 +715,15 @@
 		    (match_operand:SI 5 "const_0_to_3_operand" "n,n,n")]
 		    MMA_VVI4I4I2))]
   "TARGET_MMA"
-  "<vvi4i4i2> %A0,%x1,%x2,%3,%4,%5"
+  "@
+   pmdm<vvi4i4i2> %A0,%x1,%x2,%3,%4,%5
+   pm<vvi4i4i2> %A0,%x1,%x2,%3,%4,%5
+   pm<vvi4i4i2> %A0,%x1,%x2,%3,%4,%5"
   [(set_attr "type" "mma")
    (set_attr "prefixed" "yes")
    (set_attr "isa" "dm,not_dm,not_dm")])
 
-(define_insn "mma_<avvi4i4i2>"
+(define_insn "mma_pm<avvi4i4i2>"
   [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
 	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0")
 		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
@@ -711,12 +733,15 @@
 		    (match_operand:SI 6 "const_0_to_3_operand" "n,n,n")]
 		    MMA_AVVI4I4I2))]
   "TARGET_MMA"
-  "<avvi4i4i2> %A0,%x2,%x3,%4,%5,%6"
+  "@
+   pm<avvi4i4i2> %A0,%x2,%x3,%4,%5,%6
+   pmdm<avvi4i4i2> %A0,%x2,%x3,%4,%5,%6
+   pm<avvi4i4i2> %A0,%x2,%x3,%4,%5,%6"
   [(set_attr "type" "mma")
    (set_attr "prefixed" "yes")
    (set_attr "isa" "dm,not_dm,not_dm")])
 
-(define_insn "mma_<vvi4i4>"
+(define_insn "mma_pm<vvi4i4>"
   [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
 	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa")
 		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
@@ -724,12 +749,15 @@
 		    (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")]
 		    MMA_VVI4I4))]
   "TARGET_MMA"
-  "<vvi4i4> %A0,%x1,%x2,%3,%4"
+  "@
+   pmdm<vvi4i4> %A0,%x1,%x2,%3,%4
+   pm<vvi4i4> %A0,%x1,%x2,%3,%4
+   pm<vvi4i4> %A0,%x1,%x2,%3,%4"
   [(set_attr "type" "mma")
    (set_attr "prefixed" "yes")
    (set_attr "isa" "dm,not_dm,not_dm")])
 
-(define_insn "mma_<avvi4i4>"
+(define_insn "mma_pm<avvi4i4>"
   [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
 	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0")
 		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
@@ -738,12 +766,15 @@
 		    (match_operand:SI 5 "const_0_to_15_operand" "n,n,n")]
 		    MMA_AVVI4I4))]
   "TARGET_MMA"
-  "<avvi4i4> %A0,%x2,%x3,%4,%5"
+  "@
+   pmdm<avvi4i4> %A0,%x2,%x3,%4,%5
+   pm<avvi4i4> %A0,%x2,%x3,%4,%5
+   pm<avvi4i4> %A0,%x2,%x3,%4,%5"
   [(set_attr "type" "mma")
    (set_attr "prefixed" "yes")
    (set_attr "isa" "dm,not_dm,not_dm")])
 
-(define_insn "mma_<pvi4i2>"
+(define_insn "mma_pm<pvi4i2>"
   [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
 	(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "wa,v,?wa")
 		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
@@ -751,12 +782,15 @@
 		    (match_operand:SI 4 "const_0_to_3_operand" "n,n,n")]
 		    MMA_PVI4I2))]
   "TARGET_MMA"
-  "<pvi4i2> %A0,%x1,%x2,%3,%4"
+  "@
+   dmpm<pvi4i2> %A0,%x1,%x2,%3,%4
+   dm<pvi4i2> %A0,%x1,%x2,%3,%4
+   dm<pvi4i2> %A0,%x1,%x2,%3,%4"
   [(set_attr "type" "mma")
    (set_attr "prefixed" "yes")
    (set_attr "isa" "dm,not_dm,not_dm")])
 
-(define_insn "mma_<apvi4i2>"
+(define_insn "mma_pm<apvi4i2>"
   [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
 	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0")
 		    (match_operand:OO 2 "vsx_register_operand" "wa,v,?wa")
@@ -765,12 +799,15 @@
 		    (match_operand:SI 5 "const_0_to_3_operand" "n,n,n")]
 		    MMA_APVI4I2))]
   "TARGET_MMA"
-  "<apvi4i2> %A0,%x2,%x3,%4,%5"
+  "@
+   pmdm<apvi4i2> %A0,%x2,%x3,%4,%5
+   pm<apvi4i2> %A0,%x2,%x3,%4,%5
+   pm<apvi4i2> %A0,%x2,%x3,%4,%5"
   [(set_attr "type" "mma")
    (set_attr "prefixed" "yes")
    (set_attr "isa" "dm,not_dm,not_dm")])
 
-(define_insn "mma_<vvi4i4i4>"
+(define_insn "mma_pm<vvi4i4i4>"
   [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
 	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa")
 		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
@@ -779,12 +816,15 @@
 		    (match_operand:SI 5 "const_0_to_15_operand" "n,n,n")]
 		    MMA_VVI4I4I4))]
   "TARGET_MMA"
-  "<vvi4i4i4> %A0,%x1,%x2,%3,%4,%5"
+  "@
+   pmdm<vvi4i4i4> %A0,%x1,%x2,%3,%4,%5
+   pm<vvi4i4i4> %A0,%x1,%x2,%3,%4,%5
+   pm<vvi4i4i4> %A0,%x1,%x2,%3,%4,%5"
   [(set_attr "type" "mma")
    (set_attr "prefixed" "yes")
    (set_attr "isa" "dm,not_dm,not_dm")])
 
-(define_insn "mma_<avvi4i4i4>"
+(define_insn "mma_pm<avvi4i4i4>"
   [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
 	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0")
 		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
@@ -794,7 +834,10 @@
 		    (match_operand:SI 6 "const_0_to_15_operand" "n,n,n")]
 		    MMA_AVVI4I4I4))]
   "TARGET_MMA"
-  "<avvi4i4i4> %A0,%x2,%x3,%4,%5,%6"
+  "@
+   pmdm<avvi4i4i4> %A0,%x2,%x3,%4,%5,%6
+   pm<avvi4i4i4> %A0,%x2,%x3,%4,%5,%6
+   pm<avvi4i4i4> %A0,%x2,%x3,%4,%5,%6"
   [(set_attr "type" "mma")
    (set_attr "prefixed" "yes")
    (set_attr "isa" "dm,not_dm,not_dm")])
diff --git a/gcc/testsuite/gcc.target/powerpc/dm-double-test.c b/gcc/testsuite/gcc.target/powerpc/dm-double-test.c
new file mode 100644
index 00000000000..66c19779585
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/dm-double-test.c
@@ -0,0 +1,194 @@
+/* Test derived from mma-double-1.c, modified for dense math.  */
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_dense_math_ok } */
+/* { dg-options "-mdejagnu-cpu=future -O2" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <altivec.h>
+
+typedef unsigned char vec_t __attribute__ ((vector_size (16)));
+typedef double v4sf_t __attribute__ ((vector_size (16)));
+#define SAVE_ACC(ACC, ldc, J)  \
+	  __builtin_mma_disassemble_acc (result, ACC); \
+	  rowC = (v4sf_t *) &CO[0*ldc+J]; \
+          rowC[0] += result[0]; \
+          rowC = (v4sf_t *) &CO[1*ldc+J]; \
+          rowC[0] += result[1]; \
+          rowC = (v4sf_t *) &CO[2*ldc+J]; \
+          rowC[0] += result[2]; \
+          rowC = (v4sf_t *) &CO[3*ldc+J]; \
+	  rowC[0] += result[3];
+
+void
+DM (int m, int n, int k, double *A, double *B, double *C)
+{
+  __vector_quad acc0, acc1, acc2, acc3, acc4, acc5, acc6, acc7;
+  v4sf_t result[4];
+  v4sf_t *rowC;
+  for (int l = 0; l < n; l += 4)
+    {
+      double *CO;
+      double *AO;
+      AO = A;
+      CO = C;
+      C += m * 4;
+      for (int j = 0; j < m; j += 16)
+	{
+	  double *BO = B;
+	  __builtin_mma_xxsetaccz (&acc0);
+	  __builtin_mma_xxsetaccz (&acc1);
+	  __builtin_mma_xxsetaccz (&acc2);
+	  __builtin_mma_xxsetaccz (&acc3);
+	  __builtin_mma_xxsetaccz (&acc4);
+	  __builtin_mma_xxsetaccz (&acc5);
+	  __builtin_mma_xxsetaccz (&acc6);
+	  __builtin_mma_xxsetaccz (&acc7);
+	  unsigned long i;
+
+	  for (i = 0; i < k; i++)
+	    {
+	      vec_t *rowA = (vec_t *) & AO[i * 16];
+	      __vector_pair rowB;
+	      vec_t *rb = (vec_t *) & BO[i * 4];
+	      __builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
+	      __builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
+	      __builtin_mma_xvf64gerpp (&acc1, rowB, rowA[1]);
+	      __builtin_mma_xvf64gerpp (&acc2, rowB, rowA[2]);
+	      __builtin_mma_xvf64gerpp (&acc3, rowB, rowA[3]);
+	      __builtin_mma_xvf64gerpp (&acc4, rowB, rowA[4]);
+	      __builtin_mma_xvf64gerpp (&acc5, rowB, rowA[5]);
+	      __builtin_mma_xvf64gerpp (&acc6, rowB, rowA[6]);
+	      __builtin_mma_xvf64gerpp (&acc7, rowB, rowA[7]);
+	    }
+	  SAVE_ACC (&acc0, m, 0);
+	  SAVE_ACC (&acc2, m, 4);
+	  SAVE_ACC (&acc1, m, 2);
+	  SAVE_ACC (&acc3, m, 6);
+	  SAVE_ACC (&acc4, m, 8);
+	  SAVE_ACC (&acc6, m, 12);
+	  SAVE_ACC (&acc5, m, 10);
+	  SAVE_ACC (&acc7, m, 14);
+	  AO += k * 16;
+	  BO += k * 4;
+	  CO += 16;
+	}
+      B += k * 4;
+    }
+}
+
+void
+init (double *matrix, int row, int column)
+{
+  for (int j = 0; j < column; j++)
+    {
+      for (int i = 0; i < row; i++)
+	{
+	  matrix[j * row + i] = (i * 16 + 2 + j) / 0.123;
+	}
+    }
+}
+
+void
+init0 (double *matrix, double *matrix1, int row, int column)
+{
+  for (int j = 0; j < column; j++)
+    for (int i = 0; i < row; i++)
+      matrix[j * row + i] = matrix1[j * row + i] = 0;
+}
+
+
+void
+print (const char *name, const double *matrix, int row, int column)
+{
+  printf ("Matrix %s has %d rows and %d columns:\n", name, row, column);
+  for (int i = 0; i < row; i++)
+    {
+      for (int j = 0; j < column; j++)
+	{
+	  printf ("%f ", matrix[j * row + i]);
+	}
+      printf ("\n");
+    }
+  printf ("\n");
+}
+
+int
+main (int argc, char *argv[])
+{
+  int rowsA, colsB, common;
+  int i, j, k;
+  int ret = 0;
+
+  for (int t = 16; t <= 128; t += 16)
+    {
+      for (int t1 = 4; t1 <= 16; t1 += 4)
+	{
+	  rowsA = t;
+	  colsB = t1;
+	  common = 1;
+	  /* printf ("Running test for rows = %d,cols = %d\n", t, t1); */
+	  double A[rowsA * common];
+	  double B[common * colsB];
+	  double C[rowsA * colsB];
+	  double D[rowsA * colsB];
+
+
+	  init (A, rowsA, common);
+	  init (B, common, colsB);
+	  init0 (C, D, rowsA, colsB);
+	  DM (rowsA, colsB, common, A, B, C);
+
+	  for (i = 0; i < colsB; i++)
+	    {
+	      for (j = 0; j < rowsA; j++)
+		{
+		  D[i * rowsA + j] = 0;
+		  for (k = 0; k < common; k++)
+		    {
+		      D[i * rowsA + j] +=
+			A[k * rowsA + j] * B[k + common * i];
+		    }
+		}
+	    }
+	  for (i = 0; i < colsB; i++)
+	    {
+	      for (j = 0; j < rowsA; j++)
+		{
+		  for (k = 0; k < common; k++)
+		    {
+		      if (D[i * rowsA + j] != C[i * rowsA + j])
+			{
+			  printf ("Error %d,%d,%d\n",i,j,k);
+			  ret++;
+			}
+		    }
+		}
+	    }
+	  if (ret)
+	    {
+	      print ("A", A, rowsA, common);
+	      print ("B", B, common, colsB);
+	      print ("C", C, rowsA, colsB);
+	      print ("D", D, rowsA, colsB);
+	    }
+	}
+    }
+  
+#ifdef VERBOSE
+  if (ret)
+    printf ("DM double test fail: %d errors\n",ret);
+  else
+    printf ("DM double test success: 0 DM errors\n");
+#else
+  if (ret)
+    abort();
+#endif
+      
+  return ret;
+}
+
+/* { dg-final { scan-assembler {\mdmsetdmrz\M}      } } */
+/* { dg-final { scan-assembler {\mdmxvf64gerpp\M}   } } */
+/* { dg-final { scan-assembler {\mdmxxextfdmr512\M} } } */
+
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index e23d3ec8b3c..96033b950eb 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -7121,6 +7121,25 @@ proc check_effective_target_power11_ok { } {
     }
 }
 
+# Return 1 if this is a PowerPC target supporting -mcpu=future which enables
+# the dense math operations.
+proc check_effective_target_powerpc_dense_math_ok { } {
+	return [check_no_compiler_messages_nocache powerpc_dense_math_ok assembly {
+		__vector_quad vq;
+		void test (void)
+		{
+		#ifndef __PPC_DMR__
+		#error "target does not have dense math support."
+		#else
+		/* Make sure we have dense math support.  */
+		  __vector_quad dmr;
+		  __asm__ ("dmsetaccz %A0" : "=wD" (dmr));
+		  vq = dmr;
+		#endif
+		}
+	} "-mcpu=future"]
+}
+
 # Return 1 if this is a PowerPC target supporting -mfloat128 via either
 # software emulation on power7/power8 systems or hardware support on power9.

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2024-03-06  0:36 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-02-29 19:28 [gcc(refs/users/meissner/heads/work161-dmf)] PowerPC: Switch to dense math names for all MMA operations Michael Meissner
  -- strict thread matches above, loose matches on Subject: below --
2024-03-06  0:36 Michael Meissner
2024-03-05 20:34 Michael Meissner
2024-03-05  7:46 Michael Meissner
2024-03-05  6:00 Michael Meissner
2024-03-05  5:42 Michael Meissner
2024-03-05  5:22 Michael Meissner
2024-03-05  4:49 Michael Meissner
2024-03-05  3:59 Michael Meissner
2024-02-29 19:18 Michael Meissner

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).