public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/users/meissner/heads/dmf001)] Add initial DMR register support for XOmode.
@ 2022-10-10 21:49 Michael Meissner
  0 siblings, 0 replies; only message in thread
From: Michael Meissner @ 2022-10-10 21:49 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:4e378e5f0595acc7ecbf55e25e372a4b196560e9

commit 4e378e5f0595acc7ecbf55e25e372a4b196560e9
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Mon Oct 10 17:48:57 2022 -0400

    Add initial DMR register support for XOmode.
    
    2022-10-10   Michael Meissner  <meissner@linux.ibm.com>
    
    gcc/
    
            * config/rs6000/mma.md (movxo_p10): Rename from movxo, and limit its use
            to non-DMF systems.
            (movxo_dmf): New insn.
            (mma_<acc>): NOP prime/de-prime usage when -mdmf is used.
            * config/rs6000/rs6000.cc (rs6000_hard_regno_mode_ok_uncached): Allow
            XOmode in DMR registers or Altivec registers if -mdmf.
            (rs6000_setup_reg_addr_masks): Add support to allow TDOmode in DMF
            registers, and also in Altivec registers if -mdmf.  TDOmode registers
            can do offset addressing but not indexed addressing.
            (rs6000_init_hard_regno_mode_ok): Add support for TDOmode.
            (reg_offset_addressing_ok_p): Likewise.
            (rs6000_emit_move): Warn about using SUBREGs with TDOmode.
            (rs6000_preferred_reload_class): DMF registers can't be loaded or
            stored.  With -mdmf, XOmode can go in Altivec registers.  TDOmode needs
            to use VSX registers for load/store.
            (rs6000_compute_pressure_classes): Add DMF registers to the pressure
            classses.
            (rs6000_split_multireg_move): If -mdmf, we don't need to prime or
            de-prime the acculators.  Add initial support for TDOmode.
            (rs6000_invalid_conversion): Warn about converting __dmr types.
            * config/rs6000/rs6000.h (DMF_REG_CLASS_P): New macro.

Diff:
---
 gcc/config/rs6000/mma.md    |  55 ++++++++++++++++++---
 gcc/config/rs6000/rs6000.cc | 114 ++++++++++++++++++++++++++++++++------------
 2 files changed, 133 insertions(+), 36 deletions(-)

diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index 032f4263cb0..cb3f83fb1cc 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -333,10 +333,10 @@
     gcc_unreachable ();
 })
 
-(define_insn_and_split "*movxo"
+(define_insn_and_split "*movxo_p10"
   [(set (match_operand:XO 0 "nonimmediate_operand" "=d,m,d")
 	(match_operand:XO 1 "input_operand" "m,d,d"))]
-  "TARGET_MMA
+  "TARGET_MMA && !TARGET_DMF
    && (gpc_reg_operand (operands[0], XOmode)
        || gpc_reg_operand (operands[1], XOmode))"
   "@
@@ -353,6 +353,31 @@
    (set_attr "length" "*,*,16")
    (set_attr "max_prefixed_insns" "2,2,*")])
 
+(define_insn_and_split "*movxo_dmf"
+  [(set (match_operand:XO 0 "nonimmediate_operand" "=wa,m, wa,wD,wD,wa")
+	(match_operand:XO 1 "input_operand"          "m,wa,wa,wa,wD,wD"))]
+  "TARGET_DMF
+   && (gpc_reg_operand (operands[0], XOmode)
+       || gpc_reg_operand (operands[1], XOmode))"
+  "@
+   #
+   #
+   #
+   dmxxinstdmr512 %0,%1,%Y1,0
+   dmmr %0,%1
+   dmxxexttdmr512 %1,%0,%Y0,0"
+  "&& reload_completed
+   && !dmf_operand (operands[0], XOmode)
+   && !dmf_operand (operands[1], XOmode)"
+  [(const_int 0)]
+{
+  rs6000_split_multireg_move (operands[0], operands[1]);
+  DONE;
+}
+  [(set_attr "type" "vecload,vecstore,veclogical,vecmove,vecmove,vecmove")
+   (set_attr "length" "*,*,16,*,*,*")
+   (set_attr "max_prefixed_insns" "2,2,*,*,*,*")])
+
 (define_expand "vsx_assemble_pair"
   [(match_operand:OO 0 "vsx_register_operand")
    (match_operand:V16QI 1 "mma_assemble_input_operand")
@@ -493,11 +518,29 @@
   DONE;
 })
 
-;; MMA instructions that do not use their accumulators as an input, still
-;; must not allow their vector operands to overlap the registers used by
-;; the accumulator.  We enforce this by marking the output as early clobber.
+;; MMA instructions that do not use their accumulators as an input, still must
+;; not allow their vector operands to overlap the registers used by the
+;; accumulator.  We enforce this by marking the output as early clobber.  If we
+;; have DMF, we don't need the whole prime/de-prime action, so just make thse
+;; instructions be NOPs.
+
+(define_expand "mma_<acc>"
+  [(set (match_operand:XO 0 "fpr_reg_operand")
+	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand")]
+		   MMA_ACC))]
+  "TARGET_MMA"
+{
+  if (TARGET_DMF)
+    {
+      if (!rtx_equal_p (operands[0], operands[1]))
+	emit_move_insn (operands[0], operands[1]);
+      DONE;
+    }
+
+  /* Generate the prime/de-prime code.  */
+})
 
-(define_insn "mma_<acc>"
+(define_insn "*mma_<acc>"
   [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
 	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")]
 		    MMA_ACC))]
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index eb94bb27c78..92713407c4f 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -1854,11 +1854,20 @@ rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode)
   if (mode == OOmode)
     return (TARGET_MMA && VSX_REGNO_P (regno) && (regno & 1) == 0);
 
-  /* MMA accumulator modes need FPR registers divisible by 4 or they need DMF
-     registers.  */
+  /* On power10, MMA accumulator modes need FPR registers divisible by 4.  If
+     DMF is enabled, allow all VSX registers plus the DMF registers.  */
   if (mode == XOmode)
-    return ((TARGET_MMA && FP_REGNO_P (regno) && (regno & 3) == 0)
-	    || (TARGET_DMF && DMF_REGNO_P (regno)));
+    {
+      if (TARGET_DMF)
+	return (DMF_REGNO_P (regno)
+		|| (VSX_REGNO_P (regno) && (regno & 3) == 0));
+
+      else if (TARGET_MMA)
+	return (FP_REGNO_P (regno) && (regno & 3) == 0);
+
+      else
+	return 0;
+    }
 
   /* DMF register modes need DMF registers or VSX registers divisible by 4.  */
   if (mode == TDOmode)
@@ -2646,12 +2655,22 @@ rs6000_setup_reg_addr_masks (void)
 	  /* Special case DMF registers.  */
 	  if (rc == RELOAD_REG_DMF)
 	    {
-	      if (TARGET_DMF && (m2 == XOmode || m2 == TDOmode))
+	      if (TARGET_DMF && m2 == XOmode)
 		{
 		  addr_mask = RELOAD_REG_VALID | RELOAD_REG_NO_MEMORY;
 		  reg_addr[m].addr_mask[rc] = addr_mask;
 		  any_addr_mask |= addr_mask;
 		}
+	      else
+		reg_addr[m].addr_mask[rc] = 0;
+
+	      continue;
+	    }
+
+	  /* At the moment, don't enable TDOmode.  */
+	  if (m2 == TDOmode)
+	    {
+	      reg_addr[m].addr_mask[rc] = 0;
 	      continue;
 	    }
 
@@ -2750,10 +2769,10 @@ rs6000_setup_reg_addr_masks (void)
 
 	  /* Vector pairs can do both indexed and offset loads if the
 	     instructions are enabled, otherwise they can only do offset loads
-	     since it will be broken into two vector moves.  Vector quads can
-	     only do offset loads.  */
+	     since it will be broken into two vector moves.  Vector quads and
+	     1,024 bit DMR values can only do offset loads.  */
 	  else if ((addr_mask != 0) && TARGET_MMA
-		   && (m2 == OOmode || m2 == XOmode))
+		   && (m2 == OOmode || m2 == XOmode || m2 == TDOmode))
 	    {
 	      addr_mask |= RELOAD_REG_OFFSET;
 	      if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
@@ -2981,6 +3000,14 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
       rs6000_vector_align[XOmode] = 512;
     }
 
+  /* Add support for 1,024 bit DMR registers.  */
+  if (TARGET_DMF)
+    {
+      rs6000_vector_unit[TDOmode] = VECTOR_NONE;
+      rs6000_vector_mem[TDOmode] = VECTOR_VSX;
+      rs6000_vector_align[TDOmode] = 512;
+    }
+
   /* Register class constraints for the constraints that depend on compile
      switches. When the VSX code was added, different constraints were added
      based on the type (DFmode, V2DFmode, V4SFmode).  For the vector types, all
@@ -8662,10 +8689,11 @@ reg_offset_addressing_ok_p (machine_mode mode)
 	return mode_supports_dq_form (mode);
       break;
 
-      /* The vector pair/quad types support offset addressing if the
-	 underlying vectors support offset addressing.  */
+      /* The vector pair/quad types and the DMF types support offset addressing
+	 if the underlying vectors support offset addressing.  */
     case E_OOmode:
     case E_XOmode:
+    case E_TDOmode:
       return TARGET_MMA;
 
     case E_SDmode:
@@ -10943,6 +10971,12 @@ rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
 	       (mode == OOmode) ? "__vector_pair" : "__vector_quad");
       break;
 
+    case E_TDOmode:
+      if (CONST_INT_P (operands[1]))
+	error ("%qs is an opaque type, and you cannot set it to constants",
+	       "__dmr");
+      break;
+
     case E_SImode:
     case E_DImode:
       /* Use default pattern for address of ELF small data */
@@ -13130,6 +13164,10 @@ rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
   machine_mode mode = GET_MODE (x);
   bool is_constant = CONSTANT_P (x);
 
+  /* DMF registers can't be loaded or stored.  */
+  if (rclass == DMF_REGS)
+    return NO_REGS;
+
   /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
      reload class for it.  */
   if ((rclass == ALTIVEC_REGS || rclass == VSX_REGS)
@@ -13226,7 +13264,10 @@ rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
 	return VSX_REGS;
 
       if (mode == XOmode)
-	return FLOAT_REGS;
+	return TARGET_DMF ? VSX_REGS : FLOAT_REGS;
+
+      if (mode == TDOmode)
+	return VSX_REGS;
 
       if (GET_MODE_CLASS (mode) == MODE_INT)
 	return GENERAL_REGS;
@@ -13351,6 +13392,11 @@ rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode,
   else
     regno = -1;
 
+  /* DMF registers don't have loads or stores.  We have to go through the VSX
+     registers to load XOmode (vector quad) and TDOmode (dmf 1024 bit).  */
+  if (TARGET_DMF && rclass == DMF_REGS)
+    return VSX_REGS;
+
   /* If we have VSX register moves, prefer moving scalar values between
      Altivec registers and GPR by going via an FPR (and then via memory)
      instead of reloading the secondary memory address for Altivec moves.  */
@@ -23794,6 +23840,8 @@ rs6000_compute_pressure_classes (enum reg_class *pressure_classes)
       if (TARGET_HARD_FLOAT)
 	pressure_classes[n++] = FLOAT_REGS;
     }
+  if (TARGET_DMF)
+    pressure_classes[n++] = DMF_REGS;
   pressure_classes[n++] = CR_REGS;
   pressure_classes[n++] = SPECIAL_REGS;
 
@@ -27135,7 +27183,7 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 
   /* If we have a vector quad register for MMA, and this is a load or store,
      see if we can use vector paired load/stores.  */
-  if (mode == XOmode && TARGET_MMA
+  if ((mode == XOmode || mode == TDOmode) && TARGET_MMA
       && (MEM_P (dst) || MEM_P (src)))
     {
       reg_mode = OOmode;
@@ -27143,7 +27191,7 @@ rs6000_split_multireg_move (rtx dst, rtx src)
     }
   /* If we have a vector pair/quad mode, split it into two/four separate
      vectors.  */
-  else if (mode == OOmode || mode == XOmode)
+  else if (mode == OOmode || mode == XOmode || mode == TDOmode)
     reg_mode = V1TImode;
   else if (FP_REGNO_P (reg))
     reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
@@ -27195,7 +27243,7 @@ rs6000_split_multireg_move (rtx dst, rtx src)
      below.  This means the last register gets the first memory
      location.  We also need to be careful of using the right register
      numbers if we are splitting XO to OO.  */
-  if (mode == OOmode || mode == XOmode)
+  if (mode == OOmode || mode == XOmode || mode == TDOmode)
     {
       nregs = hard_regno_nregs (reg, mode);
       int reg_mode_nregs = hard_regno_nregs (reg, reg_mode);
@@ -27206,7 +27254,7 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 
 	  /* If we are reading an accumulator register, we have to
 	     deprime it before we can access it.  */
-	  if (TARGET_MMA
+	  if (TARGET_MMA && !TARGET_DMF
 	      && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
 	    emit_insn (gen_mma_xxmfacc (src, src));
 
@@ -27238,9 +27286,9 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 	      emit_insn (gen_rtx_SET (dst2, src2));
 	    }
 
-	  /* If we are writing an accumulator register, we have to
-	     prime it after we've written it.  */
-	  if (TARGET_MMA
+	  /* If we are writing an accumulator register that overlaps with the
+	     FPR registers, we have to prime it after we've written it.  */
+	  if (TARGET_MMA && !TARGET_DMF
 	      && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
 	    emit_insn (gen_mma_xxmtacc (dst, dst));
 
@@ -27254,7 +27302,9 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 		      || XINT (src, 1) == UNSPECV_MMA_ASSEMBLE);
 	  gcc_assert (REG_P (dst));
 	  if (GET_MODE (src) == XOmode)
-	    gcc_assert (FP_REGNO_P (REGNO (dst)));
+	    gcc_assert ((TARGET_DMF
+			 ? VSX_REGNO_P (REGNO (dst))
+			 : FP_REGNO_P (REGNO (dst))));
 	  if (GET_MODE (src) == OOmode)
 	    gcc_assert (VSX_REGNO_P (REGNO (dst)));
 
@@ -27307,9 +27357,9 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 	      emit_insn (gen_rtx_SET (dst_i, op));
 	    }
 
-	  /* We are writing an accumulator register, so we have to
-	     prime it after we've written it.  */
-	  if (GET_MODE (src) == XOmode)
+	  /* On systems without DMF where accumulators overlap with the vector
+	     registers, we have to prime it after we've written it.  */
+	  if (GET_MODE (src) == XOmode && !TARGET_DMF)
 	    emit_insn (gen_mma_xxmtacc (dst, dst));
 
 	  return;
@@ -27320,9 +27370,9 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 
   if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
     {
-      /* If we are reading an accumulator register, we have to
-	 deprime it before we can access it.  */
-      if (TARGET_MMA
+      /* If we are reading an accumulator register and we don't have DMF, we
+	 have to deprime it before we can access it.  */
+      if (TARGET_MMA && !TARGET_DMF
 	  && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
 	emit_insn (gen_mma_xxmfacc (src, src));
 
@@ -27330,7 +27380,7 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 	 overlap.  */
       int i;
       /* XO/OO are opaque so cannot use subregs. */
-      if (mode == OOmode || mode == XOmode )
+      if (mode == OOmode || mode == XOmode || mode == TDOmode )
 	{
 	  for (i = nregs - 1; i >= 0; i--)
 	    {
@@ -27350,7 +27400,7 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 
       /* If we are writing an accumulator register, we have to
 	 prime it after we've written it.  */
-      if (TARGET_MMA
+      if (TARGET_MMA && !TARGET_DMF
 	  && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
 	emit_insn (gen_mma_xxmtacc (dst, dst));
     }
@@ -27487,7 +27537,7 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 
       /* If we are reading an accumulator register, we have to
 	 deprime it before we can access it.  */
-      if (TARGET_MMA && REG_P (src)
+      if (TARGET_MMA && !TARGET_DMF && REG_P (src)
 	  && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
 	emit_insn (gen_mma_xxmfacc (src, src));
 
@@ -27504,7 +27554,7 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 	    continue;
 
 	  /* XO/OO are opaque so cannot use subregs. */
-	  if (mode == OOmode || mode == XOmode )
+	  if (mode == OOmode || mode == XOmode || mode == TDOmode )
 	    {
 	      rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + j);
 	      rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + j);
@@ -27519,7 +27569,7 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 
       /* If we are writing an accumulator register, we have to
 	 prime it after we've written it.  */
-      if (TARGET_MMA && REG_P (dst)
+      if (TARGET_MMA && !TARGET_DMF && REG_P (dst)
 	  && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
 	emit_insn (gen_mma_xxmtacc (dst, dst));
 
@@ -28432,6 +28482,10 @@ rs6000_invalid_conversion (const_tree fromtype, const_tree totype)
 	return N_("invalid conversion from type %<__vector_pair%>");
       if (tomode == OOmode)
 	return N_("invalid conversion to type %<__vector_pair%>");
+      if (frommode == TDOmode)
+	return N_("invalid conversion from type %<__dmr%>");
+      if (tomode == TDOmode)
+	return N_("invalid conversion to type %<__dmr%>");
     }
 
   /* Conversion allowed.  */

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2022-10-10 21:49 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-10-10 21:49 [gcc(refs/users/meissner/heads/dmf001)] Add initial DMR register support for XOmode Michael Meissner

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).