public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/users/meissner/heads/work161-dmf)] Add wD constraint.
@ 2024-03-01 23:34 Michael Meissner
  0 siblings, 0 replies; 6+ messages in thread
From: Michael Meissner @ 2024-03-01 23:34 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:1a2eb4d6e20fafa7722d09957894e3ca87439aa6

commit 1a2eb4d6e20fafa7722d09957894e3ca87439aa6
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Fri Mar 1 18:33:14 2024 -0500

    Add wD constraint.
    
    This patch adds a new constraint ('wD') that matches the accumulator registers
    that overlap with VSX registers 0..31 on power10.  Future patches will add the
    support for a separate accumulator register class that will be used when the
    support for dense math registes is added.
    
    2024-03-01   Michael Meissner  <meissner@linux.ibm.com>
    
            * config/rs6000/constraints.md (wD): New constraint.
            * config/rs6000/mma.md (mma_<vv>): Use accumulator_operand and the wD
            constraint.
            (mma_<avv>): Likewise.
            (mma_<pv>): Likewise.
            (mma_<apv>): Likewise.
            (mma_<vvi4i4i8>): Likewise.
            (mma_<avvi4i4i8>): Likewise.
            (mma_<vvi4i4i2>): Likewise.
            (mma_<avvi4i4i2>): Likewise.
            (mma_<vvi4i4>): Likewise.
            (mma_<avvi4i4>): Likewise.
            (mma_<pvi4i2): Likewise.
            (mma_<apvi4i2>): Likewise.
            (mma_<vvi4i4i4>): Likewise.
            (mma_<avvi4i4i4): Likewise.
            * config/rs6000/predicates.md (accumulator_operand): New predicate.
            * config/rs6000/rs6000.cc (rs6000_debug_reg_global): Print the register
            class for the 'wD' constraint.
            (rs6000_init_hard_regno_mode_ok): Set the 'wD' register constraint
            class.
            * config/rs6000/rs6000.h (enum r6000_reg_class_enum): Add element for
            the 'wD' constraint.
            * doc/md.texi (PowerPC constraints): Document the 'wD' constraint.

Diff:
---
 gcc/config/rs6000/constraints.md |  3 +++
 gcc/config/rs6000/mma.md         | 40 ++++++++++++++++++++--------------------
 gcc/config/rs6000/predicates.md  | 15 +++++++++++++++
 gcc/config/rs6000/rs6000.cc      |  7 ++++++-
 gcc/config/rs6000/rs6000.h       |  1 +
 gcc/doc/md.texi                  |  5 +++++
 6 files changed, 50 insertions(+), 21 deletions(-)

diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index 369a7b75042..277a30a8245 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -107,6 +107,9 @@
        (match_test "TARGET_P8_VECTOR")
        (match_operand 0 "s5bit_cint_operand")))
 
+(define_register_constraint "wD" "rs6000_constraints[RS6000_CONSTRAINT_wD]"
+  "Accumulator register.")
+
 (define_constraint "wE"
   "@internal Vector constant that can be loaded with the XXSPLTIB instruction."
   (match_test "xxspltib_constant_nosplit (op, mode)"))
diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index 04e2d0066df..d8c4138f196 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -523,7 +523,7 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<vv>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
 	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
 		    MMA_VV))]
@@ -532,7 +532,7 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<avv>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
 	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
@@ -542,7 +542,7 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<pv>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
 	(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
 		    MMA_PV))]
@@ -551,8 +551,8 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<apv>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
 		    (match_operand:OO 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
 		    MMA_APV))]
@@ -561,7 +561,7 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<vvi4i4i8>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
 	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:SI 3 "const_0_to_15_operand" "n,n")
@@ -574,8 +574,8 @@
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<avvi4i4i8>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
 		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
@@ -588,7 +588,7 @@
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<vvi4i4i2>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
 	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:SI 3 "const_0_to_15_operand" "n,n")
@@ -601,8 +601,8 @@
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<avvi4i4i2>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
 		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
@@ -615,7 +615,7 @@
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<vvi4i4>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
 	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:SI 3 "const_0_to_15_operand" "n,n")
@@ -627,8 +627,8 @@
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<avvi4i4>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
 		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
@@ -640,7 +640,7 @@
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<pvi4i2>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
 	(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:SI 3 "const_0_to_15_operand" "n,n")
@@ -652,8 +652,8 @@
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<apvi4i2>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
 		    (match_operand:OO 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
 		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
@@ -665,7 +665,7 @@
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<vvi4i4i4>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
 	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:SI 3 "const_0_to_15_operand" "n,n")
@@ -678,8 +678,8 @@
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<avvi4i4i4>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
 		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index d23ce9a77a3..2ef4256c72b 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -186,6 +186,21 @@
   return VLOGICAL_REGNO_P (REGNO (op));
 })
 
+;; Return 1 if op is an accumulator.  On power10 systems, the accumulators
+;; overlap with the FPRs.
+(define_predicate "accumulator_operand"
+  (match_operand 0 "register_operand")
+{
+  if (!REG_P (op))
+    return 0;
+
+  if (!HARD_REGISTER_P (op))
+    return 1;
+
+  int r = REGNO (op);
+  return FP_REGNO_P (r) && (r & 3) == 0;
+})
+
 ;; Return 1 if op is the carry register.
 (define_predicate "ca_operand"
   (match_operand 0 "register_operand")
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index b1ad49d3734..8baaacad0f2 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -2322,6 +2322,7 @@ rs6000_debug_reg_global (void)
 	   "wr reg_class = %s\n"
 	   "wx reg_class = %s\n"
 	   "wA reg_class = %s\n"
+	   "wD reg_class = %s\n"
 	   "\n",
 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
@@ -2329,7 +2330,8 @@ rs6000_debug_reg_global (void)
 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]],
 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
-	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]]);
+	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]],
+	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wD]]);
 
   nl = "\n";
   for (m = 0; m < NUM_MACHINE_MODES; ++m)
@@ -2986,6 +2988,9 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
   if (TARGET_DIRECT_MOVE_128)
     rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS;
 
+  if (TARGET_MMA)
+    rs6000_constraints[RS6000_CONSTRAINT_wD] = FLOAT_REGS;
+
   /* Set up the reload helper and direct move functions.  */
   if (TARGET_VSX || TARGET_ALTIVEC)
     {
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 79ce1a8cbf1..fc3bd006c47 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -1201,6 +1201,7 @@ enum r6000_reg_class_enum {
   RS6000_CONSTRAINT_wr,		/* GPR register if 64-bit  */
   RS6000_CONSTRAINT_wx,		/* FPR register for STFIWX */
   RS6000_CONSTRAINT_wA,		/* BASE_REGS if 64-bit.  */
+  RS6000_CONSTRAINT_wD,		/* Accumulator regs if MMA/Dense Math.  */
   RS6000_CONSTRAINT_MAX
 };
 
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 7b7e6507754..ea1b8c9157b 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -3441,6 +3441,11 @@ Like @code{d}, if @option{-mpowerpc-gfxopt} is used; otherwise, @code{NO_REGS}.
 @item wA
 Like @code{b}, if @option{-mpowerpc64} is used; otherwise, @code{NO_REGS}.
 
+@item wD
+Accumulator register if @option{-mma} is used; otherwise,
+@code{NO_REGS}.  For @option{-mcpu=power10} the accumulator registers
+overlap with VSX vector registers 0..31.
+
 @item wB
 Signed 5-bit constant integer that can be loaded into an Altivec register.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [gcc(refs/users/meissner/heads/work161-dmf)] Add wD constraint.
@ 2024-03-05 18:41 Michael Meissner
  0 siblings, 0 replies; 6+ messages in thread
From: Michael Meissner @ 2024-03-05 18:41 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:ae5ed2ec7666741a05b95e50bec7a02327102dac

commit ae5ed2ec7666741a05b95e50bec7a02327102dac
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Tue Mar 5 13:41:28 2024 -0500

    Add wD constraint.
    
    This patch adds a new constraint ('wD') that matches the accumulator registers
    that overlap with VSX registers 0..31 on power10.  Future patches will add the
    support for a separate accumulator register class that will be used when the
    support for dense math registes is added.
    
    2024-03-05   Michael Meissner  <meissner@linux.ibm.com>
    
            * config/rs6000/constraints.md (wD): New constraint.
            * config/rs6000/mma.md (mma_disassemble_acc): Likewise.
            (mma_<vv>): Likewise.
            (mma_<avv>): Likewise.
            (mma_<pv>): Likewise.
            (mma_<apv>): Likewise.
            (mma_<vvi4i4i8>): Likewise.
            (mma_<avvi4i4i8>): Likewise.
            (mma_<vvi4i4i2>): Likewise.
            (mma_<avvi4i4i2>): Likewise.
            (mma_<vvi4i4>): Likewise.
            (mma_<avvi4i4>): Likewise.
            (mma_<pvi4i2): Likewise.
            (mma_<apvi4i2>): Likewise.
            (mma_<vvi4i4i4>): Likewise.
            (mma_<avvi4i4i4): Likewise.
            * config/rs6000/predicates.md (accumulator_operand): New predicate.
            * config/rs6000/rs6000.cc (rs6000_debug_reg_global): Print the register
            class for the 'wD' constraint.
            (rs6000_init_hard_regno_mode_ok): Set the 'wD' register constraint
            class.
            * config/rs6000/rs6000.h (enum r6000_reg_class_enum): Add element for
            the 'wD' constraint.
            * doc/md.texi (PowerPC constraints): Document the 'wD' constraint.

Diff:
---
 gcc/config/rs6000/constraints.md |  3 +++
 gcc/config/rs6000/mma.md         | 54 +++++++++++++++++++---------------------
 gcc/config/rs6000/predicates.md  | 15 +++++++++++
 gcc/config/rs6000/rs6000.cc      |  7 +++++-
 gcc/config/rs6000/rs6000.h       |  1 +
 gcc/doc/md.texi                  |  5 ++++
 6 files changed, 56 insertions(+), 29 deletions(-)

diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index 369a7b75042..277a30a8245 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -107,6 +107,9 @@
        (match_test "TARGET_P8_VECTOR")
        (match_operand 0 "s5bit_cint_operand")))
 
+(define_register_constraint "wD" "rs6000_constraints[RS6000_CONSTRAINT_wD]"
+  "Accumulator register.")
+
 (define_constraint "wE"
   "@internal Vector constant that can be loaded with the XXSPLTIB instruction."
   (match_test "xxspltib_constant_nosplit (op, mode)"))
diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index 04e2d0066df..49cf5f8fe43 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -452,8 +452,7 @@
 	   (match_operand:V16QI 3 "mma_assemble_input_operand" "mwa")
 	   (match_operand:V16QI 4 "mma_assemble_input_operand" "mwa")]
 	  UNSPECV_MMA_ASSEMBLE))]
-  "TARGET_MMA
-   && fpr_reg_operand (operands[0], XOmode)"
+  "TARGET_MMA"
   "#"
   "&& reload_completed"
   [(const_int 0)]
@@ -486,8 +485,7 @@
        (unspec:V16QI [(match_operand:XO 1 "fpr_reg_operand" "d")
 		      (match_operand 2 "const_0_to_3_operand")]
 		      UNSPEC_MMA_EXTRACT))]
-  "TARGET_MMA
-   && fpr_reg_operand (operands[1], XOmode)"
+  "TARGET_MMA"
   "#"
   "&& reload_completed"
   [(const_int 0)]
@@ -504,8 +502,8 @@
 ;; the accumulator.  We enforce this by marking the output as early clobber.
 
 (define_insn "mma_<acc>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
-	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")]
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0")]
 		    MMA_ACC))]
   "TARGET_MMA"
   "<acc> %A0"
@@ -515,7 +513,7 @@
 ;; UNSPEC_VOLATILE.
 
 (define_insn "mma_xxsetaccz"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD")
 	(unspec_volatile:XO [(const_int 0)]
 			    UNSPECV_MMA_XXSETACCZ))]
   "TARGET_MMA"
@@ -523,7 +521,7 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<vv>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
 	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
 		    MMA_VV))]
@@ -532,8 +530,8 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<avv>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
 		    MMA_AVV))]
@@ -542,7 +540,7 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<pv>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
 	(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
 		    MMA_PV))]
@@ -551,8 +549,8 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<apv>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
 		    (match_operand:OO 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
 		    MMA_APV))]
@@ -561,7 +559,7 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<vvi4i4i8>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
 	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:SI 3 "const_0_to_15_operand" "n,n")
@@ -574,8 +572,8 @@
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<avvi4i4i8>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
 		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
@@ -588,7 +586,7 @@
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<vvi4i4i2>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
 	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:SI 3 "const_0_to_15_operand" "n,n")
@@ -601,8 +599,8 @@
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<avvi4i4i2>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
 		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
@@ -615,7 +613,7 @@
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<vvi4i4>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
 	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:SI 3 "const_0_to_15_operand" "n,n")
@@ -627,8 +625,8 @@
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<avvi4i4>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
 		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
@@ -640,7 +638,7 @@
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<pvi4i2>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
 	(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:SI 3 "const_0_to_15_operand" "n,n")
@@ -652,8 +650,8 @@
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<apvi4i2>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
 		    (match_operand:OO 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
 		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
@@ -665,7 +663,7 @@
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<vvi4i4i4>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
 	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:SI 3 "const_0_to_15_operand" "n,n")
@@ -678,8 +676,8 @@
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<avvi4i4i4>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
 		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index d23ce9a77a3..2ef4256c72b 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -186,6 +186,21 @@
   return VLOGICAL_REGNO_P (REGNO (op));
 })
 
+;; Return 1 if op is an accumulator.  On power10 systems, the accumulators
+;; overlap with the FPRs.
+(define_predicate "accumulator_operand"
+  (match_operand 0 "register_operand")
+{
+  if (!REG_P (op))
+    return 0;
+
+  if (!HARD_REGISTER_P (op))
+    return 1;
+
+  int r = REGNO (op);
+  return FP_REGNO_P (r) && (r & 3) == 0;
+})
+
 ;; Return 1 if op is the carry register.
 (define_predicate "ca_operand"
   (match_operand 0 "register_operand")
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index b1ad49d3734..8baaacad0f2 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -2322,6 +2322,7 @@ rs6000_debug_reg_global (void)
 	   "wr reg_class = %s\n"
 	   "wx reg_class = %s\n"
 	   "wA reg_class = %s\n"
+	   "wD reg_class = %s\n"
 	   "\n",
 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
@@ -2329,7 +2330,8 @@ rs6000_debug_reg_global (void)
 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]],
 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
-	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]]);
+	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]],
+	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wD]]);
 
   nl = "\n";
   for (m = 0; m < NUM_MACHINE_MODES; ++m)
@@ -2986,6 +2988,9 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
   if (TARGET_DIRECT_MOVE_128)
     rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS;
 
+  if (TARGET_MMA)
+    rs6000_constraints[RS6000_CONSTRAINT_wD] = FLOAT_REGS;
+
   /* Set up the reload helper and direct move functions.  */
   if (TARGET_VSX || TARGET_ALTIVEC)
     {
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 79ce1a8cbf1..fc3bd006c47 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -1201,6 +1201,7 @@ enum r6000_reg_class_enum {
   RS6000_CONSTRAINT_wr,		/* GPR register if 64-bit  */
   RS6000_CONSTRAINT_wx,		/* FPR register for STFIWX */
   RS6000_CONSTRAINT_wA,		/* BASE_REGS if 64-bit.  */
+  RS6000_CONSTRAINT_wD,		/* Accumulator regs if MMA/Dense Math.  */
   RS6000_CONSTRAINT_MAX
 };
 
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 7b7e6507754..ea1b8c9157b 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -3441,6 +3441,11 @@ Like @code{d}, if @option{-mpowerpc-gfxopt} is used; otherwise, @code{NO_REGS}.
 @item wA
 Like @code{b}, if @option{-mpowerpc64} is used; otherwise, @code{NO_REGS}.
 
+@item wD
+Accumulator register if @option{-mma} is used; otherwise,
+@code{NO_REGS}.  For @option{-mcpu=power10} the accumulator registers
+overlap with VSX vector registers 0..31.
+
 @item wB
 Signed 5-bit constant integer that can be loaded into an Altivec register.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [gcc(refs/users/meissner/heads/work161-dmf)] Add wD constraint.
@ 2024-03-05  4:55 Michael Meissner
  0 siblings, 0 replies; 6+ messages in thread
From: Michael Meissner @ 2024-03-05  4:55 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:999ce37be42b1433f91fcd0b02bb336bec763a70

commit 999ce37be42b1433f91fcd0b02bb336bec763a70
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Mon Mar 4 23:55:25 2024 -0500

    Add wD constraint.
    
    This patch adds a new constraint ('wD') that matches the accumulator registers
    that overlap with VSX registers 0..31 on power10.  Future patches will add the
    support for a separate accumulator register class that will be used when the
    support for dense math registes is added.
    
    2024-03-04   Michael Meissner  <meissner@linux.ibm.com>
    
            * config/rs6000/constraints.md (wD): New constraint.
            * config/rs6000/mma.md (mma_disassemble_acc): Likewise.
            (mma_<vv>): Likewise.
            (mma_<avv>): Likewise.
            (mma_<pv>): Likewise.
            (mma_<apv>): Likewise.
            (mma_<vvi4i4i8>): Likewise.
            (mma_<avvi4i4i8>): Likewise.
            (mma_<vvi4i4i2>): Likewise.
            (mma_<avvi4i4i2>): Likewise.
            (mma_<vvi4i4>): Likewise.
            (mma_<avvi4i4>): Likewise.
            (mma_<pvi4i2): Likewise.
            (mma_<apvi4i2>): Likewise.
            (mma_<vvi4i4i4>): Likewise.
            (mma_<avvi4i4i4): Likewise.
            * config/rs6000/predicates.md (accumulator_operand): New predicate.
            * config/rs6000/rs6000.cc (rs6000_debug_reg_global): Print the register
            class for the 'wD' constraint.
            (rs6000_init_hard_regno_mode_ok): Set the 'wD' register constraint
            class.
            * config/rs6000/rs6000.h (enum r6000_reg_class_enum): Add element for
            the 'wD' constraint.
            * doc/md.texi (PowerPC constraints): Document the 'wD' constraint.

Diff:
---
 gcc/config/rs6000/constraints.md |  3 +++
 gcc/config/rs6000/mma.md         | 54 +++++++++++++++++++---------------------
 gcc/config/rs6000/predicates.md  | 15 +++++++++++
 gcc/config/rs6000/rs6000.cc      |  7 +++++-
 gcc/config/rs6000/rs6000.h       |  1 +
 gcc/doc/md.texi                  |  5 ++++
 6 files changed, 56 insertions(+), 29 deletions(-)

diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index 369a7b75042..277a30a8245 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -107,6 +107,9 @@
        (match_test "TARGET_P8_VECTOR")
        (match_operand 0 "s5bit_cint_operand")))
 
+(define_register_constraint "wD" "rs6000_constraints[RS6000_CONSTRAINT_wD]"
+  "Accumulator register.")
+
 (define_constraint "wE"
   "@internal Vector constant that can be loaded with the XXSPLTIB instruction."
   (match_test "xxspltib_constant_nosplit (op, mode)"))
diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index 04e2d0066df..49cf5f8fe43 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -452,8 +452,7 @@
 	   (match_operand:V16QI 3 "mma_assemble_input_operand" "mwa")
 	   (match_operand:V16QI 4 "mma_assemble_input_operand" "mwa")]
 	  UNSPECV_MMA_ASSEMBLE))]
-  "TARGET_MMA
-   && fpr_reg_operand (operands[0], XOmode)"
+  "TARGET_MMA"
   "#"
   "&& reload_completed"
   [(const_int 0)]
@@ -486,8 +485,7 @@
        (unspec:V16QI [(match_operand:XO 1 "fpr_reg_operand" "d")
 		      (match_operand 2 "const_0_to_3_operand")]
 		      UNSPEC_MMA_EXTRACT))]
-  "TARGET_MMA
-   && fpr_reg_operand (operands[1], XOmode)"
+  "TARGET_MMA"
   "#"
   "&& reload_completed"
   [(const_int 0)]
@@ -504,8 +502,8 @@
 ;; the accumulator.  We enforce this by marking the output as early clobber.
 
 (define_insn "mma_<acc>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
-	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")]
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0")]
 		    MMA_ACC))]
   "TARGET_MMA"
   "<acc> %A0"
@@ -515,7 +513,7 @@
 ;; UNSPEC_VOLATILE.
 
 (define_insn "mma_xxsetaccz"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD")
 	(unspec_volatile:XO [(const_int 0)]
 			    UNSPECV_MMA_XXSETACCZ))]
   "TARGET_MMA"
@@ -523,7 +521,7 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<vv>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
 	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
 		    MMA_VV))]
@@ -532,8 +530,8 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<avv>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
 		    MMA_AVV))]
@@ -542,7 +540,7 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<pv>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
 	(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
 		    MMA_PV))]
@@ -551,8 +549,8 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<apv>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
 		    (match_operand:OO 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
 		    MMA_APV))]
@@ -561,7 +559,7 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<vvi4i4i8>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
 	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:SI 3 "const_0_to_15_operand" "n,n")
@@ -574,8 +572,8 @@
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<avvi4i4i8>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
 		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
@@ -588,7 +586,7 @@
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<vvi4i4i2>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
 	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:SI 3 "const_0_to_15_operand" "n,n")
@@ -601,8 +599,8 @@
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<avvi4i4i2>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
 		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
@@ -615,7 +613,7 @@
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<vvi4i4>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
 	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:SI 3 "const_0_to_15_operand" "n,n")
@@ -627,8 +625,8 @@
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<avvi4i4>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
 		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
@@ -640,7 +638,7 @@
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<pvi4i2>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
 	(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:SI 3 "const_0_to_15_operand" "n,n")
@@ -652,8 +650,8 @@
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<apvi4i2>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
 		    (match_operand:OO 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
 		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
@@ -665,7 +663,7 @@
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<vvi4i4i4>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
 	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:SI 3 "const_0_to_15_operand" "n,n")
@@ -678,8 +676,8 @@
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<avvi4i4i4>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
 		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index d23ce9a77a3..2ef4256c72b 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -186,6 +186,21 @@
   return VLOGICAL_REGNO_P (REGNO (op));
 })
 
+;; Return 1 if op is an accumulator.  On power10 systems, the accumulators
+;; overlap with the FPRs.
+(define_predicate "accumulator_operand"
+  (match_operand 0 "register_operand")
+{
+  if (!REG_P (op))
+    return 0;
+
+  if (!HARD_REGISTER_P (op))
+    return 1;
+
+  int r = REGNO (op);
+  return FP_REGNO_P (r) && (r & 3) == 0;
+})
+
 ;; Return 1 if op is the carry register.
 (define_predicate "ca_operand"
   (match_operand 0 "register_operand")
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index b1ad49d3734..8baaacad0f2 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -2322,6 +2322,7 @@ rs6000_debug_reg_global (void)
 	   "wr reg_class = %s\n"
 	   "wx reg_class = %s\n"
 	   "wA reg_class = %s\n"
+	   "wD reg_class = %s\n"
 	   "\n",
 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
@@ -2329,7 +2330,8 @@ rs6000_debug_reg_global (void)
 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]],
 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
-	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]]);
+	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]],
+	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wD]]);
 
   nl = "\n";
   for (m = 0; m < NUM_MACHINE_MODES; ++m)
@@ -2986,6 +2988,9 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
   if (TARGET_DIRECT_MOVE_128)
     rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS;
 
+  if (TARGET_MMA)
+    rs6000_constraints[RS6000_CONSTRAINT_wD] = FLOAT_REGS;
+
   /* Set up the reload helper and direct move functions.  */
   if (TARGET_VSX || TARGET_ALTIVEC)
     {
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 79ce1a8cbf1..fc3bd006c47 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -1201,6 +1201,7 @@ enum r6000_reg_class_enum {
   RS6000_CONSTRAINT_wr,		/* GPR register if 64-bit  */
   RS6000_CONSTRAINT_wx,		/* FPR register for STFIWX */
   RS6000_CONSTRAINT_wA,		/* BASE_REGS if 64-bit.  */
+  RS6000_CONSTRAINT_wD,		/* Accumulator regs if MMA/Dense Math.  */
   RS6000_CONSTRAINT_MAX
 };
 
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 7b7e6507754..ea1b8c9157b 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -3441,6 +3441,11 @@ Like @code{d}, if @option{-mpowerpc-gfxopt} is used; otherwise, @code{NO_REGS}.
 @item wA
 Like @code{b}, if @option{-mpowerpc64} is used; otherwise, @code{NO_REGS}.
 
+@item wD
+Accumulator register if @option{-mma} is used; otherwise,
+@code{NO_REGS}.  For @option{-mcpu=power10} the accumulator registers
+overlap with VSX vector registers 0..31.
+
 @item wB
 Signed 5-bit constant integer that can be loaded into an Altivec register.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [gcc(refs/users/meissner/heads/work161-dmf)] Add wD constraint.
@ 2024-03-04 23:41 Michael Meissner
  0 siblings, 0 replies; 6+ messages in thread
From: Michael Meissner @ 2024-03-04 23:41 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:5f42591bf71d90d22d866d5d156122415f3b9bdd

commit 5f42591bf71d90d22d866d5d156122415f3b9bdd
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Mon Mar 4 14:54:16 2024 -0500

    Add wD constraint.
    
    This patch adds a new constraint ('wD') that matches the accumulator registers
    that overlap with VSX registers 0..31 on power10.  Future patches will add the
    support for a separate accumulator register class that will be used when the
    support for dense math registes is added.
    
    2024-03-04   Michael Meissner  <meissner@linux.ibm.com>
    
            * config/rs6000/constraints.md (wD): New constraint.
            * config/rs6000/mma.md (mma_disassemble_acc): Likewise.
            (mma_<vv>): Likewise.
            (mma_<avv>): Likewise.
            (mma_<pv>): Likewise.
            (mma_<apv>): Likewise.
            (mma_<vvi4i4i8>): Likewise.
            (mma_<avvi4i4i8>): Likewise.
            (mma_<vvi4i4i2>): Likewise.
            (mma_<avvi4i4i2>): Likewise.
            (mma_<vvi4i4>): Likewise.
            (mma_<avvi4i4>): Likewise.
            (mma_<pvi4i2): Likewise.
            (mma_<apvi4i2>): Likewise.
            (mma_<vvi4i4i4>): Likewise.
            (mma_<avvi4i4i4): Likewise.
            * config/rs6000/predicates.md (accumulator_operand): New predicate.
            * config/rs6000/rs6000.cc (rs6000_debug_reg_global): Print the register
            class for the 'wD' constraint.
            (rs6000_init_hard_regno_mode_ok): Set the 'wD' register constraint
            class.
            * config/rs6000/rs6000.h (enum r6000_reg_class_enum): Add element for
            the 'wD' constraint.
            * doc/md.texi (PowerPC constraints): Document the 'wD' constraint.

Diff:
---
 gcc/config/rs6000/constraints.md |  3 +++
 gcc/config/rs6000/mma.md         | 54 +++++++++++++++++++---------------------
 gcc/config/rs6000/predicates.md  | 15 +++++++++++
 gcc/config/rs6000/rs6000.cc      |  7 +++++-
 gcc/config/rs6000/rs6000.h       |  1 +
 gcc/doc/md.texi                  |  5 ++++
 6 files changed, 56 insertions(+), 29 deletions(-)

diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index 369a7b75042..277a30a8245 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -107,6 +107,9 @@
        (match_test "TARGET_P8_VECTOR")
        (match_operand 0 "s5bit_cint_operand")))
 
+(define_register_constraint "wD" "rs6000_constraints[RS6000_CONSTRAINT_wD]"
+  "Accumulator register.")
+
 (define_constraint "wE"
   "@internal Vector constant that can be loaded with the XXSPLTIB instruction."
   (match_test "xxspltib_constant_nosplit (op, mode)"))
diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index 04e2d0066df..49cf5f8fe43 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -452,8 +452,7 @@
 	   (match_operand:V16QI 3 "mma_assemble_input_operand" "mwa")
 	   (match_operand:V16QI 4 "mma_assemble_input_operand" "mwa")]
 	  UNSPECV_MMA_ASSEMBLE))]
-  "TARGET_MMA
-   && fpr_reg_operand (operands[0], XOmode)"
+  "TARGET_MMA"
   "#"
   "&& reload_completed"
   [(const_int 0)]
@@ -486,8 +485,7 @@
        (unspec:V16QI [(match_operand:XO 1 "fpr_reg_operand" "d")
 		      (match_operand 2 "const_0_to_3_operand")]
 		      UNSPEC_MMA_EXTRACT))]
-  "TARGET_MMA
-   && fpr_reg_operand (operands[1], XOmode)"
+  "TARGET_MMA"
   "#"
   "&& reload_completed"
   [(const_int 0)]
@@ -504,8 +502,8 @@
 ;; the accumulator.  We enforce this by marking the output as early clobber.
 
 (define_insn "mma_<acc>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
-	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")]
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0")]
 		    MMA_ACC))]
   "TARGET_MMA"
   "<acc> %A0"
@@ -515,7 +513,7 @@
 ;; UNSPEC_VOLATILE.
 
 (define_insn "mma_xxsetaccz"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD")
 	(unspec_volatile:XO [(const_int 0)]
 			    UNSPECV_MMA_XXSETACCZ))]
   "TARGET_MMA"
@@ -523,7 +521,7 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<vv>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
 	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
 		    MMA_VV))]
@@ -532,8 +530,8 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<avv>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
 		    MMA_AVV))]
@@ -542,7 +540,7 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<pv>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
 	(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
 		    MMA_PV))]
@@ -551,8 +549,8 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<apv>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
 		    (match_operand:OO 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
 		    MMA_APV))]
@@ -561,7 +559,7 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<vvi4i4i8>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
 	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:SI 3 "const_0_to_15_operand" "n,n")
@@ -574,8 +572,8 @@
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<avvi4i4i8>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
 		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
@@ -588,7 +586,7 @@
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<vvi4i4i2>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
 	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:SI 3 "const_0_to_15_operand" "n,n")
@@ -601,8 +599,8 @@
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<avvi4i4i2>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
 		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
@@ -615,7 +613,7 @@
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<vvi4i4>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
 	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:SI 3 "const_0_to_15_operand" "n,n")
@@ -627,8 +625,8 @@
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<avvi4i4>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
 		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
@@ -640,7 +638,7 @@
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<pvi4i2>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
 	(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:SI 3 "const_0_to_15_operand" "n,n")
@@ -652,8 +650,8 @@
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<apvi4i2>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
 		    (match_operand:OO 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
 		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
@@ -665,7 +663,7 @@
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<vvi4i4i4>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
 	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:SI 3 "const_0_to_15_operand" "n,n")
@@ -678,8 +676,8 @@
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<avvi4i4i4>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
 		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index d23ce9a77a3..2ef4256c72b 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -186,6 +186,21 @@
   return VLOGICAL_REGNO_P (REGNO (op));
 })
 
+;; Return 1 if op is an accumulator.  On power10 systems, the accumulators
+;; overlap with the FPRs.
+(define_predicate "accumulator_operand"
+  (match_operand 0 "register_operand")
+{
+  if (!REG_P (op))
+    return 0;
+
+  if (!HARD_REGISTER_P (op))
+    return 1;
+
+  int r = REGNO (op);
+  return FP_REGNO_P (r) && (r & 3) == 0;
+})
+
 ;; Return 1 if op is the carry register.
 (define_predicate "ca_operand"
   (match_operand 0 "register_operand")
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index b1ad49d3734..8baaacad0f2 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -2322,6 +2322,7 @@ rs6000_debug_reg_global (void)
 	   "wr reg_class = %s\n"
 	   "wx reg_class = %s\n"
 	   "wA reg_class = %s\n"
+	   "wD reg_class = %s\n"
 	   "\n",
 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
@@ -2329,7 +2330,8 @@ rs6000_debug_reg_global (void)
 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]],
 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
-	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]]);
+	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]],
+	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wD]]);
 
   nl = "\n";
   for (m = 0; m < NUM_MACHINE_MODES; ++m)
@@ -2986,6 +2988,9 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
   if (TARGET_DIRECT_MOVE_128)
     rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS;
 
+  if (TARGET_MMA)
+    rs6000_constraints[RS6000_CONSTRAINT_wD] = FLOAT_REGS;
+
   /* Set up the reload helper and direct move functions.  */
   if (TARGET_VSX || TARGET_ALTIVEC)
     {
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 79ce1a8cbf1..fc3bd006c47 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -1201,6 +1201,7 @@ enum r6000_reg_class_enum {
   RS6000_CONSTRAINT_wr,		/* GPR register if 64-bit  */
   RS6000_CONSTRAINT_wx,		/* FPR register for STFIWX */
   RS6000_CONSTRAINT_wA,		/* BASE_REGS if 64-bit.  */
+  RS6000_CONSTRAINT_wD,		/* Accumulator regs if MMA/Dense Math.  */
   RS6000_CONSTRAINT_MAX
 };
 
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 7b7e6507754..ea1b8c9157b 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -3441,6 +3441,11 @@ Like @code{d}, if @option{-mpowerpc-gfxopt} is used; otherwise, @code{NO_REGS}.
 @item wA
 Like @code{b}, if @option{-mpowerpc64} is used; otherwise, @code{NO_REGS}.
 
+@item wD
+Accumulator register if @option{-mma} is used; otherwise,
+@code{NO_REGS}.  For @option{-mcpu=power10} the accumulator registers
+overlap with VSX vector registers 0..31.
+
 @item wB
 Signed 5-bit constant integer that can be loaded into an Altivec register.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [gcc(refs/users/meissner/heads/work161-dmf)] Add wD constraint.
@ 2024-03-02  5:11 Michael Meissner
  0 siblings, 0 replies; 6+ messages in thread
From: Michael Meissner @ 2024-03-02  5:11 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:10e4c8ccdf4ffb381cefafa5a74268df8bbb30fe

commit 10e4c8ccdf4ffb381cefafa5a74268df8bbb30fe
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Sat Mar 2 00:11:23 2024 -0500

    Add wD constraint.
    
    This patch adds a new constraint ('wD') that matches the accumulator registers
    that overlap with VSX registers 0..31 on power10.  Future patches will add the
    support for a separate accumulator register class that will be used when the
    support for dense math registes is added.
    
    2024-03-01   Michael Meissner  <meissner@linux.ibm.com>
    
            * config/rs6000/constraints.md (wD): New constraint.
            * config/rs6000/mma.md (mma_disassemble_acc): Likewise.
            (mma_<vv>): Likewise.
            (mma_<avv>): Likewise.
            (mma_<pv>): Likewise.
            (mma_<apv>): Likewise.
            (mma_<vvi4i4i8>): Likewise.
            (mma_<avvi4i4i8>): Likewise.
            (mma_<vvi4i4i2>): Likewise.
            (mma_<avvi4i4i2>): Likewise.
            (mma_<vvi4i4>): Likewise.
            (mma_<avvi4i4>): Likewise.
            (mma_<pvi4i2): Likewise.
            (mma_<apvi4i2>): Likewise.
            (mma_<vvi4i4i4>): Likewise.
            (mma_<avvi4i4i4): Likewise.
            * config/rs6000/predicates.md (accumulator_operand): New predicate.
            * config/rs6000/rs6000.cc (rs6000_debug_reg_global): Print the register
            class for the 'wD' constraint.
            (rs6000_init_hard_regno_mode_ok): Set the 'wD' register constraint
            class.
            * config/rs6000/rs6000.h (enum r6000_reg_class_enum): Add element for
            the 'wD' constraint.
            * doc/md.texi (PowerPC constraints): Document the 'wD' constraint.

Diff:
---
 gcc/config/rs6000/constraints.md |  3 +++
 gcc/config/rs6000/mma.md         | 54 +++++++++++++++++++---------------------
 gcc/config/rs6000/predicates.md  | 15 +++++++++++
 gcc/config/rs6000/rs6000.cc      |  7 +++++-
 gcc/config/rs6000/rs6000.h       |  1 +
 gcc/doc/md.texi                  |  5 ++++
 6 files changed, 56 insertions(+), 29 deletions(-)

diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index 369a7b75042..277a30a8245 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -107,6 +107,9 @@
        (match_test "TARGET_P8_VECTOR")
        (match_operand 0 "s5bit_cint_operand")))
 
+(define_register_constraint "wD" "rs6000_constraints[RS6000_CONSTRAINT_wD]"
+  "Accumulator register.")
+
 (define_constraint "wE"
   "@internal Vector constant that can be loaded with the XXSPLTIB instruction."
   (match_test "xxspltib_constant_nosplit (op, mode)"))
diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index 04e2d0066df..49cf5f8fe43 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -452,8 +452,7 @@
 	   (match_operand:V16QI 3 "mma_assemble_input_operand" "mwa")
 	   (match_operand:V16QI 4 "mma_assemble_input_operand" "mwa")]
 	  UNSPECV_MMA_ASSEMBLE))]
-  "TARGET_MMA
-   && fpr_reg_operand (operands[0], XOmode)"
+  "TARGET_MMA"
   "#"
   "&& reload_completed"
   [(const_int 0)]
@@ -486,8 +485,7 @@
        (unspec:V16QI [(match_operand:XO 1 "fpr_reg_operand" "d")
 		      (match_operand 2 "const_0_to_3_operand")]
 		      UNSPEC_MMA_EXTRACT))]
-  "TARGET_MMA
-   && fpr_reg_operand (operands[1], XOmode)"
+  "TARGET_MMA"
   "#"
   "&& reload_completed"
   [(const_int 0)]
@@ -504,8 +502,8 @@
 ;; the accumulator.  We enforce this by marking the output as early clobber.
 
 (define_insn "mma_<acc>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
-	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")]
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0")]
 		    MMA_ACC))]
   "TARGET_MMA"
   "<acc> %A0"
@@ -515,7 +513,7 @@
 ;; UNSPEC_VOLATILE.
 
 (define_insn "mma_xxsetaccz"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD")
 	(unspec_volatile:XO [(const_int 0)]
 			    UNSPECV_MMA_XXSETACCZ))]
   "TARGET_MMA"
@@ -523,7 +521,7 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<vv>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
 	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
 		    MMA_VV))]
@@ -532,8 +530,8 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<avv>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
 		    MMA_AVV))]
@@ -542,7 +540,7 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<pv>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
 	(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
 		    MMA_PV))]
@@ -551,8 +549,8 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<apv>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
 		    (match_operand:OO 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
 		    MMA_APV))]
@@ -561,7 +559,7 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<vvi4i4i8>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
 	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:SI 3 "const_0_to_15_operand" "n,n")
@@ -574,8 +572,8 @@
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<avvi4i4i8>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
 		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
@@ -588,7 +586,7 @@
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<vvi4i4i2>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
 	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:SI 3 "const_0_to_15_operand" "n,n")
@@ -601,8 +599,8 @@
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<avvi4i4i2>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
 		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
@@ -615,7 +613,7 @@
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<vvi4i4>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
 	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:SI 3 "const_0_to_15_operand" "n,n")
@@ -627,8 +625,8 @@
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<avvi4i4>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
 		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
@@ -640,7 +638,7 @@
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<pvi4i2>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
 	(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:SI 3 "const_0_to_15_operand" "n,n")
@@ -652,8 +650,8 @@
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<apvi4i2>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
 		    (match_operand:OO 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
 		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
@@ -665,7 +663,7 @@
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<vvi4i4i4>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
 	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:SI 3 "const_0_to_15_operand" "n,n")
@@ -678,8 +676,8 @@
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<avvi4i4i4>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
 		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index d23ce9a77a3..2ef4256c72b 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -186,6 +186,21 @@
   return VLOGICAL_REGNO_P (REGNO (op));
 })
 
+;; Return 1 if op is an accumulator.  On power10 systems, the accumulators
+;; overlap with the FPRs.
+(define_predicate "accumulator_operand"
+  (match_operand 0 "register_operand")
+{
+  if (!REG_P (op))
+    return 0;
+
+  if (!HARD_REGISTER_P (op))
+    return 1;
+
+  int r = REGNO (op);
+  return FP_REGNO_P (r) && (r & 3) == 0;
+})
+
 ;; Return 1 if op is the carry register.
 (define_predicate "ca_operand"
   (match_operand 0 "register_operand")
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index b1ad49d3734..8baaacad0f2 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -2322,6 +2322,7 @@ rs6000_debug_reg_global (void)
 	   "wr reg_class = %s\n"
 	   "wx reg_class = %s\n"
 	   "wA reg_class = %s\n"
+	   "wD reg_class = %s\n"
 	   "\n",
 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
@@ -2329,7 +2330,8 @@ rs6000_debug_reg_global (void)
 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]],
 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
-	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]]);
+	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]],
+	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wD]]);
 
   nl = "\n";
   for (m = 0; m < NUM_MACHINE_MODES; ++m)
@@ -2986,6 +2988,9 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
   if (TARGET_DIRECT_MOVE_128)
     rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS;
 
+  if (TARGET_MMA)
+    rs6000_constraints[RS6000_CONSTRAINT_wD] = FLOAT_REGS;
+
   /* Set up the reload helper and direct move functions.  */
   if (TARGET_VSX || TARGET_ALTIVEC)
     {
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 79ce1a8cbf1..fc3bd006c47 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -1201,6 +1201,7 @@ enum r6000_reg_class_enum {
   RS6000_CONSTRAINT_wr,		/* GPR register if 64-bit  */
   RS6000_CONSTRAINT_wx,		/* FPR register for STFIWX */
   RS6000_CONSTRAINT_wA,		/* BASE_REGS if 64-bit.  */
+  RS6000_CONSTRAINT_wD,		/* Accumulator regs if MMA/Dense Math.  */
   RS6000_CONSTRAINT_MAX
 };
 
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 7b7e6507754..ea1b8c9157b 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -3441,6 +3441,11 @@ Like @code{d}, if @option{-mpowerpc-gfxopt} is used; otherwise, @code{NO_REGS}.
 @item wA
 Like @code{b}, if @option{-mpowerpc64} is used; otherwise, @code{NO_REGS}.
 
+@item wD
+Accumulator register if @option{-mma} is used; otherwise,
+@code{NO_REGS}.  For @option{-mcpu=power10} the accumulator registers
+overlap with VSX vector registers 0..31.
+
 @item wB
 Signed 5-bit constant integer that can be loaded into an Altivec register.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [gcc(refs/users/meissner/heads/work161-dmf)] Add wD constraint.
@ 2024-03-02  1:46 Michael Meissner
  0 siblings, 0 replies; 6+ messages in thread
From: Michael Meissner @ 2024-03-02  1:46 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:24f1926b5b62c0ff4aff7b7c6d8bb67ae2cb21cc

commit 24f1926b5b62c0ff4aff7b7c6d8bb67ae2cb21cc
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Fri Mar 1 20:46:33 2024 -0500

    Add wD constraint.
    
    This patch adds a new constraint ('wD') that matches the accumulator registers
    that overlap with VSX registers 0..31 on power10.  Future patches will add the
    support for a separate accumulator register class that will be used when the
    support for dense math registes is added.
    
    2024-03-01   Michael Meissner  <meissner@linux.ibm.com>
    
            * config/rs6000/constraints.md (wD): New constraint.
            * config/rs6000/mma.md (mma_assemble_acc) Use accumulator_operand and
            the wD constraint.
            (mma_disassemble_acc): Likewise.
            (mma_<vv>): Likewise.
            (mma_<avv>): Likewise.
            (mma_<pv>): Likewise.
            (mma_<apv>): Likewise.
            (mma_<vvi4i4i8>): Likewise.
            (mma_<avvi4i4i8>): Likewise.
            (mma_<vvi4i4i2>): Likewise.
            (mma_<avvi4i4i2>): Likewise.
            (mma_<vvi4i4>): Likewise.
            (mma_<avvi4i4>): Likewise.
            (mma_<pvi4i2): Likewise.
            (mma_<apvi4i2>): Likewise.
            (mma_<vvi4i4i4>): Likewise.
            (mma_<avvi4i4i4): Likewise.
            * config/rs6000/predicates.md (accumulator_operand): New predicate.
            * config/rs6000/rs6000.cc (rs6000_debug_reg_global): Print the register
            class for the 'wD' constraint.
            (rs6000_init_hard_regno_mode_ok): Set the 'wD' register constraint
            class.
            * config/rs6000/rs6000.h (enum r6000_reg_class_enum): Add element for
            the 'wD' constraint.
            * doc/md.texi (PowerPC constraints): Document the 'wD' constraint.

Diff:
---
 gcc/config/rs6000/constraints.md |  3 ++
 gcc/config/rs6000/mma.md         | 62 +++++++++++++++++++---------------------
 gcc/config/rs6000/predicates.md  | 15 ++++++++++
 gcc/config/rs6000/rs6000.cc      |  7 ++++-
 gcc/config/rs6000/rs6000.h       |  1 +
 gcc/doc/md.texi                  |  5 ++++
 6 files changed, 60 insertions(+), 33 deletions(-)

diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index 369a7b75042..277a30a8245 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -107,6 +107,9 @@
        (match_test "TARGET_P8_VECTOR")
        (match_operand 0 "s5bit_cint_operand")))
 
+(define_register_constraint "wD" "rs6000_constraints[RS6000_CONSTRAINT_wD]"
+  "Accumulator register.")
+
 (define_constraint "wE"
   "@internal Vector constant that can be loaded with the XXSPLTIB instruction."
   (match_test "xxspltib_constant_nosplit (op, mode)"))
diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index 04e2d0066df..a2e54a519fa 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -426,7 +426,7 @@
 })
 
 (define_expand "mma_assemble_acc"
-  [(match_operand:XO 0 "fpr_reg_operand")
+  [(match_operand:XO 0 "accumulator_operand")
    (match_operand:V16QI 1 "mma_assemble_input_operand")
    (match_operand:V16QI 2 "mma_assemble_input_operand")
    (match_operand:V16QI 3 "mma_assemble_input_operand")
@@ -445,15 +445,14 @@
 ;; as an early clobber so we don't accidentally clobber the input operands.  */
 
 (define_insn_and_split "*mma_assemble_acc"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD")
 	(unspec_volatile:XO
 	  [(match_operand:V16QI 1 "mma_assemble_input_operand" "mwa")
 	   (match_operand:V16QI 2 "mma_assemble_input_operand" "mwa")
 	   (match_operand:V16QI 3 "mma_assemble_input_operand" "mwa")
 	   (match_operand:V16QI 4 "mma_assemble_input_operand" "mwa")]
 	  UNSPECV_MMA_ASSEMBLE))]
-  "TARGET_MMA
-   && fpr_reg_operand (operands[0], XOmode)"
+  "TARGET_MMA"
   "#"
   "&& reload_completed"
   [(const_int 0)]
@@ -468,7 +467,7 @@
 
 (define_expand "mma_disassemble_acc"
   [(match_operand:V16QI 0 "mma_disassemble_output_operand")
-   (match_operand:XO 1 "fpr_reg_operand")
+   (match_operand:XO 1 "accumulator_operand")
    (match_operand 2 "const_0_to_3_operand")]
   "TARGET_MMA"
 {
@@ -483,11 +482,10 @@
 
 (define_insn_and_split "*mma_disassemble_acc"
   [(set (match_operand:V16QI 0 "mma_disassemble_output_operand" "=mwa")
-       (unspec:V16QI [(match_operand:XO 1 "fpr_reg_operand" "d")
+       (unspec:V16QI [(match_operand:XO 1 "accumulator_operand" "d")
 		      (match_operand 2 "const_0_to_3_operand")]
 		      UNSPEC_MMA_EXTRACT))]
-  "TARGET_MMA
-   && fpr_reg_operand (operands[1], XOmode)"
+  "TARGET_MMA"
   "#"
   "&& reload_completed"
   [(const_int 0)]
@@ -504,8 +502,8 @@
 ;; the accumulator.  We enforce this by marking the output as early clobber.
 
 (define_insn "mma_<acc>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
-	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")]
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0")]
 		    MMA_ACC))]
   "TARGET_MMA"
   "<acc> %A0"
@@ -515,7 +513,7 @@
 ;; UNSPEC_VOLATILE.
 
 (define_insn "mma_xxsetaccz"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD")
 	(unspec_volatile:XO [(const_int 0)]
 			    UNSPECV_MMA_XXSETACCZ))]
   "TARGET_MMA"
@@ -523,7 +521,7 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<vv>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
 	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
 		    MMA_VV))]
@@ -532,8 +530,8 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<avv>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
 		    MMA_AVV))]
@@ -542,7 +540,7 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<pv>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
 	(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
 		    MMA_PV))]
@@ -551,8 +549,8 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<apv>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
 		    (match_operand:OO 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
 		    MMA_APV))]
@@ -561,7 +559,7 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<vvi4i4i8>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
 	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:SI 3 "const_0_to_15_operand" "n,n")
@@ -574,8 +572,8 @@
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<avvi4i4i8>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
 		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
@@ -588,7 +586,7 @@
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<vvi4i4i2>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
 	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:SI 3 "const_0_to_15_operand" "n,n")
@@ -601,8 +599,8 @@
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<avvi4i4i2>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
 		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
@@ -615,7 +613,7 @@
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<vvi4i4>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
 	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:SI 3 "const_0_to_15_operand" "n,n")
@@ -627,8 +625,8 @@
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<avvi4i4>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
 		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
@@ -640,7 +638,7 @@
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<pvi4i2>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
 	(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:SI 3 "const_0_to_15_operand" "n,n")
@@ -652,8 +650,8 @@
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<apvi4i2>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
 		    (match_operand:OO 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
 		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
@@ -665,7 +663,7 @@
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<vvi4i4i4>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
 	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:SI 3 "const_0_to_15_operand" "n,n")
@@ -678,8 +676,8 @@
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<avvi4i4i4>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
 		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index d23ce9a77a3..2ef4256c72b 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -186,6 +186,21 @@
   return VLOGICAL_REGNO_P (REGNO (op));
 })
 
+;; Return 1 if op is an accumulator.  On power10 systems, the accumulators
+;; overlap with the FPRs.
+(define_predicate "accumulator_operand"
+  (match_operand 0 "register_operand")
+{
+  if (!REG_P (op))
+    return 0;
+
+  if (!HARD_REGISTER_P (op))
+    return 1;
+
+  int r = REGNO (op);
+  return FP_REGNO_P (r) && (r & 3) == 0;
+})
+
 ;; Return 1 if op is the carry register.
 (define_predicate "ca_operand"
   (match_operand 0 "register_operand")
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index b1ad49d3734..8baaacad0f2 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -2322,6 +2322,7 @@ rs6000_debug_reg_global (void)
 	   "wr reg_class = %s\n"
 	   "wx reg_class = %s\n"
 	   "wA reg_class = %s\n"
+	   "wD reg_class = %s\n"
 	   "\n",
 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
@@ -2329,7 +2330,8 @@ rs6000_debug_reg_global (void)
 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]],
 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
-	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]]);
+	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]],
+	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wD]]);
 
   nl = "\n";
   for (m = 0; m < NUM_MACHINE_MODES; ++m)
@@ -2986,6 +2988,9 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
   if (TARGET_DIRECT_MOVE_128)
     rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS;
 
+  if (TARGET_MMA)
+    rs6000_constraints[RS6000_CONSTRAINT_wD] = FLOAT_REGS;
+
   /* Set up the reload helper and direct move functions.  */
   if (TARGET_VSX || TARGET_ALTIVEC)
     {
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 79ce1a8cbf1..fc3bd006c47 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -1201,6 +1201,7 @@ enum r6000_reg_class_enum {
   RS6000_CONSTRAINT_wr,		/* GPR register if 64-bit  */
   RS6000_CONSTRAINT_wx,		/* FPR register for STFIWX */
   RS6000_CONSTRAINT_wA,		/* BASE_REGS if 64-bit.  */
+  RS6000_CONSTRAINT_wD,		/* Accumulator regs if MMA/Dense Math.  */
   RS6000_CONSTRAINT_MAX
 };
 
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 7b7e6507754..ea1b8c9157b 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -3441,6 +3441,11 @@ Like @code{d}, if @option{-mpowerpc-gfxopt} is used; otherwise, @code{NO_REGS}.
 @item wA
 Like @code{b}, if @option{-mpowerpc64} is used; otherwise, @code{NO_REGS}.
 
+@item wD
+Accumulator register if @option{-mma} is used; otherwise,
+@code{NO_REGS}.  For @option{-mcpu=power10} the accumulator registers
+overlap with VSX vector registers 0..31.
+
 @item wB
 Signed 5-bit constant integer that can be loaded into an Altivec register.

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2024-03-05 18:41 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-03-01 23:34 [gcc(refs/users/meissner/heads/work161-dmf)] Add wD constraint Michael Meissner
2024-03-02  1:46 Michael Meissner
2024-03-02  5:11 Michael Meissner
2024-03-04 23:41 Michael Meissner
2024-03-05  4:55 Michael Meissner
2024-03-05 18:41 Michael Meissner

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).