public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
From: Michael Meissner <meissner@gcc.gnu.org>
To: gcc-cvs@gcc.gnu.org
Subject: [gcc(refs/users/meissner/heads/work161-dmf)] Add wD constraint.
Date: Sat,  2 Mar 2024 01:46:55 +0000 (GMT)	[thread overview]
Message-ID: <20240302014655.831463858D32@sourceware.org> (raw)

https://gcc.gnu.org/g:24f1926b5b62c0ff4aff7b7c6d8bb67ae2cb21cc

commit 24f1926b5b62c0ff4aff7b7c6d8bb67ae2cb21cc
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Fri Mar 1 20:46:33 2024 -0500

    Add wD constraint.
    
    This patch adds a new constraint ('wD') that matches the accumulator registers
    that overlap with VSX registers 0..31 on power10.  Future patches will add the
    support for a separate accumulator register class that will be used when the
    support for dense math registes is added.
    
    2024-03-01   Michael Meissner  <meissner@linux.ibm.com>
    
            * config/rs6000/constraints.md (wD): New constraint.
            * config/rs6000/mma.md (mma_assemble_acc) Use accumulator_operand and
            the wD constraint.
            (mma_disassemble_acc): Likewise.
            (mma_<vv>): Likewise.
            (mma_<avv>): Likewise.
            (mma_<pv>): Likewise.
            (mma_<apv>): Likewise.
            (mma_<vvi4i4i8>): Likewise.
            (mma_<avvi4i4i8>): Likewise.
            (mma_<vvi4i4i2>): Likewise.
            (mma_<avvi4i4i2>): Likewise.
            (mma_<vvi4i4>): Likewise.
            (mma_<avvi4i4>): Likewise.
            (mma_<pvi4i2): Likewise.
            (mma_<apvi4i2>): Likewise.
            (mma_<vvi4i4i4>): Likewise.
            (mma_<avvi4i4i4): Likewise.
            * config/rs6000/predicates.md (accumulator_operand): New predicate.
            * config/rs6000/rs6000.cc (rs6000_debug_reg_global): Print the register
            class for the 'wD' constraint.
            (rs6000_init_hard_regno_mode_ok): Set the 'wD' register constraint
            class.
            * config/rs6000/rs6000.h (enum r6000_reg_class_enum): Add element for
            the 'wD' constraint.
            * doc/md.texi (PowerPC constraints): Document the 'wD' constraint.

Diff:
---
 gcc/config/rs6000/constraints.md |  3 ++
 gcc/config/rs6000/mma.md         | 62 +++++++++++++++++++---------------------
 gcc/config/rs6000/predicates.md  | 15 ++++++++++
 gcc/config/rs6000/rs6000.cc      |  7 ++++-
 gcc/config/rs6000/rs6000.h       |  1 +
 gcc/doc/md.texi                  |  5 ++++
 6 files changed, 60 insertions(+), 33 deletions(-)

diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index 369a7b75042..277a30a8245 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -107,6 +107,9 @@
        (match_test "TARGET_P8_VECTOR")
        (match_operand 0 "s5bit_cint_operand")))
 
+(define_register_constraint "wD" "rs6000_constraints[RS6000_CONSTRAINT_wD]"
+  "Accumulator register.")
+
 (define_constraint "wE"
   "@internal Vector constant that can be loaded with the XXSPLTIB instruction."
   (match_test "xxspltib_constant_nosplit (op, mode)"))
diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index 04e2d0066df..a2e54a519fa 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -426,7 +426,7 @@
 })
 
 (define_expand "mma_assemble_acc"
-  [(match_operand:XO 0 "fpr_reg_operand")
+  [(match_operand:XO 0 "accumulator_operand")
    (match_operand:V16QI 1 "mma_assemble_input_operand")
    (match_operand:V16QI 2 "mma_assemble_input_operand")
    (match_operand:V16QI 3 "mma_assemble_input_operand")
@@ -445,15 +445,14 @@
 ;; as an early clobber so we don't accidentally clobber the input operands.  */
 
 (define_insn_and_split "*mma_assemble_acc"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD")
 	(unspec_volatile:XO
 	  [(match_operand:V16QI 1 "mma_assemble_input_operand" "mwa")
 	   (match_operand:V16QI 2 "mma_assemble_input_operand" "mwa")
 	   (match_operand:V16QI 3 "mma_assemble_input_operand" "mwa")
 	   (match_operand:V16QI 4 "mma_assemble_input_operand" "mwa")]
 	  UNSPECV_MMA_ASSEMBLE))]
-  "TARGET_MMA
-   && fpr_reg_operand (operands[0], XOmode)"
+  "TARGET_MMA"
   "#"
   "&& reload_completed"
   [(const_int 0)]
@@ -468,7 +467,7 @@
 
 (define_expand "mma_disassemble_acc"
   [(match_operand:V16QI 0 "mma_disassemble_output_operand")
-   (match_operand:XO 1 "fpr_reg_operand")
+   (match_operand:XO 1 "accumulator_operand")
    (match_operand 2 "const_0_to_3_operand")]
   "TARGET_MMA"
 {
@@ -483,11 +482,10 @@
 
 (define_insn_and_split "*mma_disassemble_acc"
   [(set (match_operand:V16QI 0 "mma_disassemble_output_operand" "=mwa")
-       (unspec:V16QI [(match_operand:XO 1 "fpr_reg_operand" "d")
+       (unspec:V16QI [(match_operand:XO 1 "accumulator_operand" "d")
 		      (match_operand 2 "const_0_to_3_operand")]
 		      UNSPEC_MMA_EXTRACT))]
-  "TARGET_MMA
-   && fpr_reg_operand (operands[1], XOmode)"
+  "TARGET_MMA"
   "#"
   "&& reload_completed"
   [(const_int 0)]
@@ -504,8 +502,8 @@
 ;; the accumulator.  We enforce this by marking the output as early clobber.
 
 (define_insn "mma_<acc>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
-	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")]
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0")]
 		    MMA_ACC))]
   "TARGET_MMA"
   "<acc> %A0"
@@ -515,7 +513,7 @@
 ;; UNSPEC_VOLATILE.
 
 (define_insn "mma_xxsetaccz"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD")
 	(unspec_volatile:XO [(const_int 0)]
 			    UNSPECV_MMA_XXSETACCZ))]
   "TARGET_MMA"
@@ -523,7 +521,7 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<vv>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
 	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
 		    MMA_VV))]
@@ -532,8 +530,8 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<avv>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
 		    MMA_AVV))]
@@ -542,7 +540,7 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<pv>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
 	(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
 		    MMA_PV))]
@@ -551,8 +549,8 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<apv>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
 		    (match_operand:OO 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
 		    MMA_APV))]
@@ -561,7 +559,7 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<vvi4i4i8>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
 	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:SI 3 "const_0_to_15_operand" "n,n")
@@ -574,8 +572,8 @@
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<avvi4i4i8>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
 		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
@@ -588,7 +586,7 @@
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<vvi4i4i2>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
 	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:SI 3 "const_0_to_15_operand" "n,n")
@@ -601,8 +599,8 @@
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<avvi4i4i2>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
 		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
@@ -615,7 +613,7 @@
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<vvi4i4>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
 	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:SI 3 "const_0_to_15_operand" "n,n")
@@ -627,8 +625,8 @@
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<avvi4i4>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
 		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
@@ -640,7 +638,7 @@
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<pvi4i2>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
 	(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:SI 3 "const_0_to_15_operand" "n,n")
@@ -652,8 +650,8 @@
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<apvi4i2>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
 		    (match_operand:OO 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
 		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
@@ -665,7 +663,7 @@
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<vvi4i4i4>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
 	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:SI 3 "const_0_to_15_operand" "n,n")
@@ -678,8 +676,8 @@
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<avvi4i4i4>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+	(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
 		    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
 		    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
 		    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index d23ce9a77a3..2ef4256c72b 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -186,6 +186,21 @@
   return VLOGICAL_REGNO_P (REGNO (op));
 })
 
+;; Return 1 if op is an accumulator.  On power10 systems, the accumulators
+;; overlap with the FPRs.
+(define_predicate "accumulator_operand"
+  (match_operand 0 "register_operand")
+{
+  if (!REG_P (op))
+    return 0;
+
+  if (!HARD_REGISTER_P (op))
+    return 1;
+
+  int r = REGNO (op);
+  return FP_REGNO_P (r) && (r & 3) == 0;
+})
+
 ;; Return 1 if op is the carry register.
 (define_predicate "ca_operand"
   (match_operand 0 "register_operand")
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index b1ad49d3734..8baaacad0f2 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -2322,6 +2322,7 @@ rs6000_debug_reg_global (void)
 	   "wr reg_class = %s\n"
 	   "wx reg_class = %s\n"
 	   "wA reg_class = %s\n"
+	   "wD reg_class = %s\n"
 	   "\n",
 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
@@ -2329,7 +2330,8 @@ rs6000_debug_reg_global (void)
 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]],
 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
-	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]]);
+	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]],
+	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wD]]);
 
   nl = "\n";
   for (m = 0; m < NUM_MACHINE_MODES; ++m)
@@ -2986,6 +2988,9 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
   if (TARGET_DIRECT_MOVE_128)
     rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS;
 
+  if (TARGET_MMA)
+    rs6000_constraints[RS6000_CONSTRAINT_wD] = FLOAT_REGS;
+
   /* Set up the reload helper and direct move functions.  */
   if (TARGET_VSX || TARGET_ALTIVEC)
     {
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 79ce1a8cbf1..fc3bd006c47 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -1201,6 +1201,7 @@ enum r6000_reg_class_enum {
   RS6000_CONSTRAINT_wr,		/* GPR register if 64-bit  */
   RS6000_CONSTRAINT_wx,		/* FPR register for STFIWX */
   RS6000_CONSTRAINT_wA,		/* BASE_REGS if 64-bit.  */
+  RS6000_CONSTRAINT_wD,		/* Accumulator regs if MMA/Dense Math.  */
   RS6000_CONSTRAINT_MAX
 };
 
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 7b7e6507754..ea1b8c9157b 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -3441,6 +3441,11 @@ Like @code{d}, if @option{-mpowerpc-gfxopt} is used; otherwise, @code{NO_REGS}.
 @item wA
 Like @code{b}, if @option{-mpowerpc64} is used; otherwise, @code{NO_REGS}.
 
+@item wD
+Accumulator register if @option{-mma} is used; otherwise,
+@code{NO_REGS}.  For @option{-mcpu=power10} the accumulator registers
+overlap with VSX vector registers 0..31.
+
 @item wB
 Signed 5-bit constant integer that can be loaded into an Altivec register.

             reply	other threads:[~2024-03-02  1:46 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-03-02  1:46 Michael Meissner [this message]
  -- strict thread matches above, loose matches on Subject: below --
2024-03-05 18:41 Michael Meissner
2024-03-05  4:55 Michael Meissner
2024-03-04 23:41 Michael Meissner
2024-03-02  5:11 Michael Meissner
2024-03-01 23:34 Michael Meissner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240302014655.831463858D32@sourceware.org \
    --to=meissner@gcc.gnu.org \
    --cc=gcc-cvs@gcc.gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).