[gcc(refs/users/meissner/heads/work129-vpair)] First attempt at vector

public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed

* [gcc(refs/users/meissner/heads/work129-vpair)] First attempt at vector_size(32).
@ 2023-08-01  3:06 Michael Meissner
  0 siblings, 0 replies; only message in thread
From: Michael Meissner @ 2023-08-01  3:06 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:75baf4ddd9bfa6a7c8f2a2a94e4b7a9fc8480da7

commit 75baf4ddd9bfa6a7c8f2a2a94e4b7a9fc8480da7
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Mon Jul 31 23:05:37 2023 -0400

    First attempt at vector_size(32).
    
    2023-07-31  Michael Meissner  <meissner@linux.ibm.com>
    
    gcc/
    
            * config/rs6000/rs6000-c.cc (rs6000_target_modify_macros): Define
            __VECTOR_PAIR__ if -mvector-pair.
            * config/rs6000/rs6000-cpus.def (OTHER_POWER10_MASKS): Add
            -mvector-pair.
            (POWERPC_MASKS): Likewise.
            * config/rs6000/rs6000.cc (struct vpair_mode_info): Vector pair
            support.
            (VPAIR_MODE_INIT): Likewise.
            (vpair_modes): Likewise.
            (rs6000_hard_regno_mode_ok_uncached): Use VECTOR_PAIR_MODE_P instead of
            testing against OOmode.
            (rs6000_modes_tieable_p): Likewise.
            (rs6000_setup_reg_addr_masks): Likewise.
            (rs6000_init_hard_regno_mode_ok): Vector pair support.
            (rs6000_option_override_internal): Likewise.
            (rs6000_expand_vector_extract): Likewise.
            (reg_offset_addressing_ok_p): Likewise.
            (rs6000_emit_move): Likewise.
            (rs6000_preferred_reload_class): Likewise.
            (rs6000_opt_masks): Likewise.
            (rs6000_split_vpair_constant): Likewise.
            (rs6000_split_multireg_move): Likewise.
            (rs6000_invalid_conversion): Likewise.
            * config/rs6000/rs6000.h (VECTOR_ALIGNMENT_P): Likewise.
            (VECTOR_PAIR_MODE_P): Likewise.
            * config/rs6000/rs6000.md (RELOAD): Add vector pair modes.
            (vector-pair.md): Include.
            * config/rs6000/rs6000.opt (-mvector-pair): New switch.
            * config/rs6000/vector-pair.md: New file.
            * config/rs6000/vsx.md (V4SF_V8SF): New mode iterator.
            (vsx_extract_v4df): New insn.
            (vsx_extract_<mode>): New insn.

Diff:
---
 gcc/config/rs6000/rs6000-c.cc     |   3 +
 gcc/config/rs6000/rs6000-cpus.def |   4 +-
 gcc/config/rs6000/rs6000.cc       | 234 ++++++++++++++++++++++++++++++--
 gcc/config/rs6000/rs6000.h        |   8 +-
 gcc/config/rs6000/rs6000.md       |   2 +
 gcc/config/rs6000/rs6000.opt      |   4 +
 gcc/config/rs6000/t-rs6000        |   1 +
 gcc/config/rs6000/vector-pair.md  | 271 ++++++++++++++++++++++++++++++++++++++
 gcc/config/rs6000/vsx.md          |  58 +++++++-
 9 files changed, 566 insertions(+), 19 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-c.cc b/gcc/config/rs6000/rs6000-c.cc
index 65be0ac43e2..3ab352275cd 100644
--- a/gcc/config/rs6000/rs6000-c.cc
+++ b/gcc/config/rs6000/rs6000-c.cc
@@ -601,6 +601,9 @@ rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT flags)
   /* Whether pc-relative code is being generated.  */
   if ((flags & OPTION_MASK_PCREL) != 0)
     rs6000_define_or_undefine_macro (define_p, "__PCREL__");
+  /* Whether vector pair arithmetic is enabled.  */
+  if ((flags & OPTION_MASK_VECTOR_PAIR) != 0)
+    rs6000_define_or_undefine_macro (define_p, "__VECTOR_PAIR__");
   /* Tell the user -mrop-protect is in play.  */
   if (rs6000_rop_protect)
     rs6000_define_or_undefine_macro (define_p, "__ROP_PROTECT__");
diff --git a/gcc/config/rs6000/rs6000-cpus.def b/gcc/config/rs6000/rs6000-cpus.def
index 4f350da378c..72447fbc7df 100644
--- a/gcc/config/rs6000/rs6000-cpus.def
+++ b/gcc/config/rs6000/rs6000-cpus.def
@@ -80,7 +80,8 @@
 #define OTHER_POWER10_MASKS	(OPTION_MASK_MMA			\
 				 | OPTION_MASK_PCREL			\
 				 /* | OPTION_MASK_PCREL_OPT */		\
-				 | OPTION_MASK_PREFIXED)
+				 | OPTION_MASK_PREFIXED			\
+				 | OPTION_MASK_VECTOR_PAIR)
 
 #define ISA_3_1_MASKS_SERVER	(ISA_3_0_MASKS_SERVER			\
 				 | OPTION_MASK_POWER10			\
@@ -157,6 +158,7 @@
 				 | OPTION_MASK_RECIP_PRECISION		\
 				 | OPTION_MASK_SOFT_FLOAT		\
 				 | OPTION_MASK_STRICT_ALIGN_OPTIONAL	\
+				 | OPTION_MASK_VECTOR_PAIR		\
 				 | OPTION_MASK_VSX)
 
 #endif
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 44b448d2ba6..bb5bf415c32 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -361,6 +361,33 @@ struct rs6000_reg_addr {
 
 static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
 
+/* Vector pair modes that are supported with -mvector-pair.  */
+struct vpair_mode_info {
+  machine_mode vp_mode;			/* vector mode to use.  */
+  enum insn_code reload_di_load;	/* reload input handler, 64-bit.  */
+  enum insn_code reload_di_store;	/* reload output handler, 64-bit.  */
+  enum insn_code reload_si_load;	/* reload input handler, 32-bit.  */
+  enum insn_code reload_si_store;	/* reload output handler, 32-bit.  */
+};
+
+#define VPAIR_MODE_INIT(UC_TYPE, LC_TYPE)				\
+  {									\
+    UC_TYPE,								\
+    CODE_FOR_reload_ ## LC_TYPE ## _di_load,				\
+    CODE_FOR_reload_ ## LC_TYPE ## _di_store,				\
+    CODE_FOR_reload_ ## LC_TYPE ## _si_load,				\
+    CODE_FOR_reload_ ## LC_TYPE ## _si_store,				\
+  }
+
+static const struct vpair_mode_info vpair_modes[] = {
+  VPAIR_MODE_INIT (V32QImode, v32qi),
+  VPAIR_MODE_INIT (V16HImode, v16hi),
+  VPAIR_MODE_INIT (V8SImode,  v8si),
+  VPAIR_MODE_INIT (V4DImode,  v4di),
+  VPAIR_MODE_INIT (V8SFmode,  v8sf),
+  VPAIR_MODE_INIT (V4DFmode,  v4df),
+};
+
 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC.  */
 static inline bool
 mode_supports_pre_incdec_p (machine_mode mode)
@@ -1842,7 +1869,7 @@ rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode)
 
   /* Vector pair modes need even/odd VSX register pairs.  Only allow vector
      registers.  */
-  if (mode == OOmode)
+  if (VECTOR_PAIR_MODE_P (mode))
     return (TARGET_MMA && VSX_REGNO_P (regno) && (regno & 1) == 0);
 
   /* MMA accumulator modes need FPR registers divisible by 4.  */
@@ -1963,8 +1990,8 @@ rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
 static bool
 rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2)
 {
-  if (mode1 == PTImode || mode1 == OOmode || mode1 == XOmode
-      || mode2 == PTImode || mode2 == OOmode || mode2 == XOmode)
+  if (mode1 == PTImode || VECTOR_PAIR_MODE_P (mode1) || mode1 == XOmode
+      || mode2 == PTImode || VECTOR_PAIR_MODE_P (mode2) || mode2 == XOmode)
     return mode1 == mode2;
 
   if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1))
@@ -2712,13 +2739,13 @@ rs6000_setup_reg_addr_masks (void)
 	     since it will be broken into two vector moves.  Vector quads can
 	     only do offset loads.  */
 	  else if ((addr_mask != 0) && TARGET_MMA
-		   && (m2 == OOmode || m2 == XOmode))
+		   && (VECTOR_PAIR_MODE_P (m2) || m2 == XOmode))
 	    {
 	      addr_mask |= RELOAD_REG_OFFSET;
 	      if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
 		{
 		  addr_mask |= RELOAD_REG_QUAD_OFFSET;
-		  if (m2 == OOmode)
+		  if (VECTOR_PAIR_MODE_P (m2))
 		    addr_mask |= RELOAD_REG_INDEXED;
 		}
 	    }
@@ -2936,6 +2963,17 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
       rs6000_vector_align[XOmode] = 512;
     }
 
+  if (TARGET_VECTOR_PAIR)
+    {
+      for (size_t i = 0; i < ARRAY_SIZE (vpair_modes); i++)
+	{
+	  machine_mode vpair_mode = vpair_modes[i].vp_mode;
+	  rs6000_vector_unit[vpair_mode] = VECTOR_NONE;
+	  rs6000_vector_mem[vpair_mode] = VECTOR_VSX;
+	  rs6000_vector_align[vpair_mode] = 256;
+	}
+    }
+
   /* Register class constraints for the constraints that depend on compile
      switches. When the VSX code was added, different constraints were added
      based on the type (DFmode, V2DFmode, V4SFmode).  For the vector types, all
@@ -3067,6 +3105,18 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
 		  reg_addr[XOmode].reload_store = CODE_FOR_reload_xo_di_store;
 		  reg_addr[XOmode].reload_load = CODE_FOR_reload_xo_di_load;
 		}
+
+	      if (TARGET_VECTOR_PAIR)
+		{
+		  for (size_t i = 0; i < ARRAY_SIZE (vpair_modes); i++)
+		    {
+		      machine_mode vm = vpair_modes[i].vp_mode;
+		      reg_addr[vm].reload_store
+			= vpair_modes[i].reload_di_store;
+		      reg_addr[vm].reload_load
+			= vpair_modes[i].reload_di_load;
+		    }
+		}
 	    }
 	}
       else
@@ -3124,6 +3174,18 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
 	      reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
 	      reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
 	    }
+
+	  if (TARGET_VECTOR_PAIR)
+	    {
+	      for (size_t i = 0; i < ARRAY_SIZE (vpair_modes); i++)
+		{
+		  machine_mode vm = vpair_modes[i].vp_mode;
+		  reg_addr[vm].reload_store
+		    = vpair_modes[i].reload_si_store;
+		  reg_addr[vm].reload_load
+		    = vpair_modes[i].reload_si_load;
+		}
+	    }
 	}
 
       reg_addr[DFmode].scalar_in_vmx_p = true;
@@ -4424,6 +4486,16 @@ rs6000_option_override_internal (bool global_init_p)
       rs6000_isa_flags &= ~OPTION_MASK_MMA;
     }
 
+  /* At present, do not enable -mvector-pair by default if MMA is available.
+     Turn off vector pair/mma options on non-power10 systems.  */
+  if (!TARGET_MMA && TARGET_VECTOR_PAIR)
+    {
+      if ((rs6000_isa_flags_explicit & OPTION_MASK_VECTOR_PAIR) != 0)
+	error ("%qs requires %qs", "-mvector-pair", "-mmma");
+
+      rs6000_isa_flags &= ~OPTION_MASK_VECTOR_PAIR;
+    }
+
   if (!TARGET_PCREL && TARGET_PCREL_OPT)
     rs6000_isa_flags &= ~OPTION_MASK_PCREL_OPT;
 
@@ -7628,6 +7700,50 @@ rs6000_expand_vector_extract (rtx target, rtx vec, rtx elt)
 	      return;
 	    }
 	  break;
+#if 0
+	case E_V4DImode:
+	  if (TARGET_MMA)
+	    {
+	      emit_insn (gen_vsx_extract_v4di (target, vec, elt));
+	      return;
+	    }
+	  break;
+	case E_V8SImode:
+	  if (TARGET_MMA)
+	    {
+	      emit_insn (gen_vsx_extract_v8si (target, vec, elt));
+	      return;
+	    }
+	  break;
+	case E_V16HImode:
+	  if (TARGET_MMA)
+	    {
+	      emit_insn (gen_vsx_extract_v16hi (target, vec, elt));
+	      return;
+	    }
+	  break;
+	case E_V32HQmode:
+	  if (TARGET_MMA)
+	    {
+	      emit_insn (gen_vsx_extract_v32qi (target, vec, elt));
+	      return;
+	    }
+	  break;
+#endif
+	case E_V4DFmode:
+	  if (TARGET_MMA)
+	    {
+	      emit_insn (gen_vsx_extract_v4df (target, vec, elt));
+	      return;
+	    }
+	  break;
+	case E_V8SFmode:
+	  if (TARGET_MMA)
+	    {
+	      emit_insn (gen_vsx_extract_v8sf (target, vec, elt));
+	      return;
+	    }
+	  break;
 	}
     }
   else if (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (elt)
@@ -8669,6 +8785,12 @@ reg_offset_addressing_ok_p (machine_mode mode)
       /* The vector pair/quad types support offset addressing if the
 	 underlying vectors support offset addressing.  */
     case E_OOmode:
+    case E_V8SFmode:
+    case E_V4DFmode:
+    case E_V32QImode:
+    case E_V16HImode:
+    case E_V8SImode:
+    case E_V4DImode:
     case E_XOmode:
       return TARGET_MMA;
 
@@ -10955,11 +11077,17 @@ rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
 	operands[1] = force_const_mem (mode, operands[1]);
       break;
 
+    case E_V32QImode:
     case E_V16QImode:
+    case E_V16HImode:
     case E_V8HImode:
+    case E_V8SFmode:
     case E_V4SFmode:
+    case E_V8SImode:
     case E_V4SImode:
+    case E_V4DFmode:
     case E_V2DFmode:
+    case E_V4DImode:
     case E_V2DImode:
     case E_V1TImode:
       if (CONSTANT_P (operands[1])
@@ -10971,7 +11099,7 @@ rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
     case E_XOmode:
       if (CONST_INT_P (operands[1]) && INTVAL (operands[1]) != 0)
 	error ("%qs is an opaque type, and you cannot set it to other values",
-	       (mode == OOmode) ? "__vector_pair" : "__vector_quad");
+	       (VECTOR_PAIR_MODE_P (mode)) ? "__vector_pair" : "__vector_quad");
       break;
 
     case E_SImode:
@@ -13216,7 +13344,7 @@ rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
      the GPR registers.  */
   if (rclass == GEN_OR_FLOAT_REGS)
     {
-      if (mode == OOmode)
+      if (VECTOR_PAIR_MODE_P (mode))
 	return VSX_REGS;
 
       if (mode == XOmode)
@@ -24245,6 +24373,7 @@ static struct rs6000_opt_mask const rs6000_opt_masks[] =
   { "save-toc-indirect",	OPTION_MASK_SAVE_TOC_INDIRECT,	false, true  },
   { "string",			0,				false, true  },
   { "update",			OPTION_MASK_NO_UPDATE,		true , true  },
+  { "vector-pair",		OPTION_MASK_VECTOR_PAIR,	false, true  },
   { "vsx",			OPTION_MASK_VSX,		false, true  },
 #ifdef OPTION_MASK_64BIT
 #if TARGET_AIX_OS
@@ -27151,6 +27280,54 @@ rs6000_split_logical (rtx operands[3],
   return;
 }
 
+/* Split a vector constant for a type that can be held into a vector register
+   pair into 2 separate constants that can be held in a single vector register.
+   Return true if we can split the constant.  */
+
+bool
+rs6000_split_vpair_constant (rtx op, rtx *high, rtx *low)
+{
+  *high = *low = NULL_RTX;
+  if (!CONST_VECTOR_P (op))
+    return false;
+
+  if (GET_MODE (op) == V8SFmode)
+    {
+      rtvec hi_vec = gen_rtvec (4,
+				CONST_VECTOR_ELT (op, 0),
+				CONST_VECTOR_ELT (op, 1),
+				CONST_VECTOR_ELT (op, 2),
+				CONST_VECTOR_ELT (op, 3));
+
+      rtvec lo_vec = gen_rtvec (4,
+				CONST_VECTOR_ELT (op, 4),
+				CONST_VECTOR_ELT (op, 5),
+				CONST_VECTOR_ELT (op, 6),
+				CONST_VECTOR_ELT (op, 7));
+
+      *high = gen_rtx_CONST_VECTOR (V4SFmode, hi_vec);
+      *low = gen_rtx_CONST_VECTOR (V4SFmode, lo_vec);
+      return true;
+    }
+
+  else if (GET_MODE (op) == V4DFmode)
+    {
+      rtvec hi_vec = gen_rtvec (2,
+				CONST_VECTOR_ELT (op, 0),
+				CONST_VECTOR_ELT (op, 1));
+
+      rtvec lo_vec = gen_rtvec (2,
+				CONST_VECTOR_ELT (op, 3),
+				CONST_VECTOR_ELT (op, 4));
+
+      *high = gen_rtx_CONST_VECTOR (V2DFmode, hi_vec);
+      *low = gen_rtx_CONST_VECTOR (V2DFmode, lo_vec);
+      return true;
+    }
+
+  return false;
+}
+
 /* Emit instructions to move SRC to DST.  Called by splitters for
    multi-register moves.  It will emit at most one instruction for
    each register that is accessed; that is, it won't emit li/lis pairs
@@ -27169,6 +27346,8 @@ rs6000_split_multireg_move (rtx dst, rtx src)
   int reg_mode_size;
   /* The number of registers that will be moved.  */
   int nregs;
+  /* Hi/lo values for splitting vector pair constants.  */
+  rtx vpair_hi, vpair_lo;
 
   reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
   mode = GET_MODE (dst);
@@ -27184,7 +27363,7 @@ rs6000_split_multireg_move (rtx dst, rtx src)
     }
   /* If we have a vector pair/quad mode, split it into two/four separate
      vectors.  */
-  else if (mode == OOmode || mode == XOmode)
+  else if (VECTOR_PAIR_MODE_P (mode) || mode == XOmode)
     reg_mode = V1TImode;
   else if (FP_REGNO_P (reg))
     reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
@@ -27197,6 +27376,29 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 
   gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
 
+  /* Handle vector pair constants.  */
+  if (CONST_VECTOR_P (src) && VECTOR_PAIR_MODE_P (mode) && TARGET_MMA
+      && rs6000_split_vpair_constant (src, &vpair_hi, &vpair_lo)
+      && VSX_REGNO_P (reg))
+    {
+      reg_mode = GET_MODE (vpair_hi);
+      rtx reg_hi = gen_rtx_REG (reg_mode, reg);
+      rtx reg_lo = gen_rtx_REG (reg_mode, reg + 1);
+
+      emit_move_insn (reg_hi, vpair_hi);
+
+      /* 0.0 is easy.  For other constants, copy the high register into the low
+	 register if the two sets of constants are equal.  This means we won't
+	 be doing back to back prefixed load immediate instructions.  */
+      if (rtx_equal_p (vpair_hi, vpair_lo)
+	  && !rtx_equal_p (vpair_hi, CONST0_RTX (reg_mode)))
+	emit_move_insn (reg_lo, reg_hi);
+      else
+	emit_move_insn (reg_lo, vpair_lo);
+      
+      return;
+    }
+      
   /* TDmode residing in FP registers is special, since the ISA requires that
      the lower-numbered word of a register pair is always the most significant
      word, even in little-endian mode.  This does not match the usual subreg
@@ -27236,7 +27438,7 @@ rs6000_split_multireg_move (rtx dst, rtx src)
      below.  This means the last register gets the first memory
      location.  We also need to be careful of using the right register
      numbers if we are splitting XO to OO.  */
-  if (mode == OOmode || mode == XOmode)
+  if (VECTOR_PAIR_MODE_P (mode) || mode == XOmode)
     {
       nregs = hard_regno_nregs (reg, mode);
       int reg_mode_nregs = hard_regno_nregs (reg, reg_mode);
@@ -27296,7 +27498,7 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 	  gcc_assert (REG_P (dst));
 	  if (GET_MODE (src) == XOmode)
 	    gcc_assert (FP_REGNO_P (REGNO (dst)));
-	  if (GET_MODE (src) == OOmode)
+	  if (VECTOR_PAIR_MODE_P (GET_MODE (src)))
 	    gcc_assert (VSX_REGNO_P (REGNO (dst)));
 
 	  int nvecs = XVECLEN (src, 0);
@@ -27371,7 +27573,7 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 	 overlap.  */
       int i;
       /* XO/OO are opaque so cannot use subregs. */
-      if (mode == OOmode || mode == XOmode )
+      if (VECTOR_PAIR_MODE_P (mode) || mode == XOmode )
 	{
 	  for (i = nregs - 1; i >= 0; i--)
 	    {
@@ -27545,7 +27747,7 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 	    continue;
 
 	  /* XO/OO are opaque so cannot use subregs. */
-	  if (mode == OOmode || mode == XOmode )
+	  if (VECTOR_PAIR_MODE_P (mode) || mode == XOmode )
 	    {
 	      rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + j);
 	      rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + j);
@@ -28536,6 +28738,14 @@ rs6000_invalid_conversion (const_tree fromtype, const_tree totype)
 	return N_("invalid conversion from type %<__vector_pair%>");
       if (tomode == OOmode)
 	return N_("invalid conversion to type %<__vector_pair%>");
+      if (frommode == V8SFmode)
+	return N_("invalid conversion from type %<V8SF%>");
+      if (tomode == V8SFmode)
+	return N_("invalid conversion to type %<V8SF%>");
+      if (frommode == V4DFmode)
+	return N_("invalid conversion from type %<V4DF%>");
+      if (tomode == V4DFmode)
+	return N_("invalid conversion to type %<V4DF%>");
     }
 
   /* Conversion allowed.  */
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 3503614efbd..916c69457d4 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -993,7 +993,7 @@ enum data_align { align_abi, align_opt, align_both };
 /* Modes that are not vectors, but require vector alignment.  Treat these like
    vectors in terms of loads and stores.  */
 #define VECTOR_ALIGNMENT_P(MODE)					\
-  (FLOAT128_VECTOR_P (MODE) || (MODE) == OOmode || (MODE) == XOmode)
+  (FLOAT128_VECTOR_P (MODE) || VECTOR_PAIR_MODE_P (MODE) || (MODE) == XOmode)
 
 #define ALTIVEC_VECTOR_MODE(MODE)					\
   ((MODE) == V16QImode							\
@@ -1006,6 +1006,12 @@ enum data_align { align_abi, align_opt, align_both };
   (ALTIVEC_VECTOR_MODE (MODE) || VSX_VECTOR_MODE (MODE)			\
    || (MODE) == V2DImode || (MODE) == V1TImode)
 
+/* Whether a mode is held in paired vector registers.  */
+#define VECTOR_PAIR_MODE_P(MODE)					\
+  ((MODE) == OOmode							\
+   || (MODE) == V32QImode || (MODE) == V16HImode || (MODE) == V8SImode	\
+   || (MODE) == V4DImode || (MODE) == V8SFmode || (MODE) == V4DFmode)
+
 /* Post-reload, we can't use any new AltiVec registers, as we already
    emitted the vrsave mask.  */
 
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index be615c3584e..a8693512511 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -800,6 +800,7 @@
 ;; Reload iterator for creating the function to allocate a base register to
 ;; supplement addressing modes.
 (define_mode_iterator RELOAD [V16QI V8HI V4SI V2DI V4SF V2DF V1TI
+			      V32QI V16HI V8SI V4DI V8SF V4DF
 			      SF SD SI DF DD DI TI PTI KF IF TF
 			      OO XO])
 
@@ -15778,6 +15779,7 @@
 (include "vsx.md")
 (include "altivec.md")
 (include "mma.md")
+(include "vector-pair.md")
 (include "dfp.md")
 (include "crypto.md")
 (include "htm.md")
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index bde6d3ff664..3df90600729 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -597,6 +597,10 @@ mmma
 Target Mask(MMA) Var(rs6000_isa_flags)
 Generate (do not generate) MMA instructions.
 
+mvector-pair
+Target Mask(VECTOR_PAIR) Var(rs6000_isa_flags)
+Generate (do not generate) vector pair instructions for vector_size(32).
+
 mrelative-jumptables
 Target Undocumented Var(rs6000_relative_jumptables) Init(1) Save
 
diff --git a/gcc/config/rs6000/t-rs6000 b/gcc/config/rs6000/t-rs6000
index f183b42ce1d..5fc89499795 100644
--- a/gcc/config/rs6000/t-rs6000
+++ b/gcc/config/rs6000/t-rs6000
@@ -128,6 +128,7 @@ MD_INCLUDES = $(srcdir)/config/rs6000/rs64.md \
 	$(srcdir)/config/rs6000/vsx.md \
 	$(srcdir)/config/rs6000/altivec.md \
 	$(srcdir)/config/rs6000/mma.md \
+	$(srcdir)/config/rs6000/vector-pair.md \
 	$(srcdir)/config/rs6000/crypto.md \
 	$(srcdir)/config/rs6000/htm.md \
 	$(srcdir)/config/rs6000/dfp.md \
diff --git a/gcc/config/rs6000/vector-pair.md b/gcc/config/rs6000/vector-pair.md
new file mode 100644
index 00000000000..7369c1e4792
--- /dev/null
+++ b/gcc/config/rs6000/vector-pair.md
@@ -0,0 +1,271 @@
+;; Vector pair arithmetic and logical instruction support.
+;; Copyright (C) 2020-2023 Free Software Foundation, Inc.
+;; Contributed by Peter Bergner <bergner@linux.ibm.com> and
+;;		  Michael Meissner <meissner@linux.ibm.com>
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; This function adds support for doing vector operations on pairs of vector
+;; registers.  Most of the instructions use vector pair instructions to load
+;; and possibly store registers, but splitting the operation after register
+;; allocation to do 2 separate operations.  The second scheduler pass can
+;; interleave other instructions between these pairs of instructions if
+;; possible.
+
+;; Iterator for all vector pair modes
+(define_mode_iterator VPAIR [V32QI V16HI V8SI V4DI V8SF V4DF])
+
+;; Iterator for the integer vector pair modes
+(define_mode_iterator VPAIR_INT [V32QI V16HI V8SI V4DI])
+
+;; Iterator for the floating point vector pair modes
+(define_mode_iterator VPAIR_FP [V8SF V4DF])
+
+;; Iterator doing unary/binary arithmetic on vector pairs.  Split it into
+;; integer and floating point operations.
+(define_code_iterator VPAIR_INT_UNARY   [neg abs not])
+(define_code_iterator VPAIR_INT_BINARY  [plus minus smin smax])
+(define_code_iterator VPAIR_INT_LOGICAL [and ior xor])
+
+(define_code_iterator VPAIR_FP_UNARY  [neg abs sqrt])
+(define_code_iterator VPAIR_FP_BINARY [plus minus mult div copysign smin smax])
+
+;; Give the insn name from the opertion
+(define_code_attr vpair_op [(abs      "abs")
+			    (and      "and")
+			    (copysign "copysign")
+			    (div      "div")
+			    (ior      "ior")
+			    (minus    "sub")
+			    (mult     "mul")
+			    (not      "not")
+			    (neg      "neg")
+			    (plus     "add")
+			    (smin     "smin")
+			    (smax     "smax")
+			    (sqrt     "sqrt")
+			    (umin     "umin")
+			    (umax     "umax")
+			    (xor      "xor")])
+
+;; Map vector pair to vector
+(define_mode_attr VPAIR_VECT [(V32QI "V16QI")
+			      (V16HI "V8HI")
+			      (V8SI  "V4SI")
+			      (V4DI  "V2DI")
+			      (V8SF  "V4SF")
+			      (V4DF  "V2DF")])
+
+
+;; Vector pair move support.
+(define_expand "mov<mode>"
+  [(set (match_operand:VPAIR 0 "nonimmediate_operand")
+	(match_operand:VPAIR 1 "input_operand"))]
+  "TARGET_VECTOR_PAIR"
+{
+  rs6000_emit_move (operands[0], operands[1], <MODE>mode);
+  DONE;
+})
+
+(define_insn_and_split "*mov<mode>"
+  [(set (match_operand:VPAIR 0 "nonimmediate_operand" "=wa,m, wa,wa,wa")
+	(match_operand:VPAIR 1 "input_operand"          "m,wa,wa,j, eP"))]
+  "TARGET_VECTOR_PAIR
+   && (gpc_reg_operand (operands[0], <MODE>mode)
+       || gpc_reg_operand (operands[1], <MODE>mode))"
+  "@
+   lxvp%X1 %x0,%1
+   stxvp%X0 %x1,%0
+   #
+   #
+   #"
+  "&& reload_completed
+   && (!MEM_P (operands[0]) && !MEM_P (operands[1]))"
+  [(const_int 0)]
+{
+  rs6000_split_multireg_move (operands[0], operands[1]);
+  DONE;
+}
+  [(set_attr "type" "vecload,vecstore,veclogical,vecperm,vecperm")
+   (set_attr "size" "256")
+   (set_attr "length" "*,*,8,8,40")])
+
+\f
+;; Vector pair floating point arithmetic unary operations
+(define_insn_and_split "<vpair_op><mode>2"
+  [(set (match_operand:VPAIR_FP 0 "vsx_register_operand" "=wa")
+	(VPAIR_FP_UNARY:VPAIR_FP
+	 (match_operand:VPAIR_FP 1 "vsx_register_operand" "wa")))]
+  "TARGET_VECTOR_PAIR"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 2) (VPAIR_FP_UNARY:<VPAIR_VECT> (match_dup 3)))
+   (set (match_dup 4) (VPAIR_FP_UNARY:<VPAIR_VECT> (match_dup 5)))]
+{
+  machine_mode vmode = <VPAIR_VECT>mode;
+  unsigned reg0 = reg_or_subregno (operands[0]);
+  unsigned reg1 = reg_or_subregno (operands[1]);
+
+  operands[2] = gen_rtx_REG (vmode, reg0);
+  operands[3] = gen_rtx_REG (vmode, reg1);
+  operands[4] = gen_rtx_REG (vmode, reg0 + 1);
+  operands[5] = gen_rtx_REG (vmode, reg1 + 1);
+}
+  [(set_attr "length" "8")])
+
+;; Optimize negative absolute value (both floating point and integer)
+(define_insn_and_split "nabs<mode>2"
+  [(set (match_operand:VPAIR 0 "vsx_register_operand" "=wa")
+	(neg:VPAIR
+	 (abs:VPAIR
+	  (match_operand:VPAIR 1 "vsx_register_operand" "wa"))))]
+  "TARGET_VECTOR_PAIR"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 2)
+	(neg:<VPAIR_VECT>
+	 (abs:<VPAIR_VECT> (match_dup 3))))
+   (set (match_dup 4)
+	(neg:<VPAIR_VECT>
+	 (abs:<VPAIR_VECT> (match_dup 5))))]
+{
+  machine_mode vmode = <VPAIR_VECT>mode;
+  unsigned reg0 = reg_or_subregno (operands[0]);
+  unsigned reg1 = reg_or_subregno (operands[1]);
+
+  operands[2] = gen_rtx_REG (vmode, reg0);
+  operands[3] = gen_rtx_REG (vmode, reg1);
+  operands[4] = gen_rtx_REG (vmode, reg0 + 1);
+  operands[5] = gen_rtx_REG (vmode, reg1 + 1);
+}
+  [(set_attr "length" "8")])
+
+;; Vector pair floating point arithmetic binary operations
+(define_insn_and_split "<vpair_op><mode>3"
+  [(set (match_operand:VPAIR_FP 0 "vsx_register_operand" "=wa")
+	(VPAIR_FP_BINARY:VPAIR_FP
+	 (match_operand:VPAIR_FP 1 "vsx_register_operand" "wa")
+	 (match_operand:VPAIR_FP 2 "vsx_register_operand" "wa")))]
+  "TARGET_VECTOR_PAIR"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 3)
+	(VPAIR_FP_BINARY:<VPAIR_VECT> (match_dup 4)
+				      (match_dup 5)))
+   (set (match_dup 6)
+	(VPAIR_FP_BINARY:<VPAIR_VECT> (match_dup 7)
+				      (match_dup 8)))]
+{
+  machine_mode vmode = <VPAIR_VECT>mode;
+  unsigned reg0 = reg_or_subregno (operands[0]);
+  unsigned reg1 = reg_or_subregno (operands[1]);
+  unsigned reg2 = reg_or_subregno (operands[2]);
+
+  operands[3] = gen_rtx_REG (vmode, reg0);
+  operands[4] = gen_rtx_REG (vmode, reg1);
+  operands[5] = gen_rtx_REG (vmode, reg2);
+
+  operands[6] = gen_rtx_REG (vmode, reg0 + 1);
+  operands[7] = gen_rtx_REG (vmode, reg1 + 1);
+  operands[8] = gen_rtx_REG (vmode, reg2 + 1);
+}
+  [(set_attr "length" "8")])
+
+\f
+;; Vector pair integer arithmetic unary operations
+(define_insn_and_split "<vpair_op><mode>2"
+  [(set (match_operand:VPAIR_INT 0 "vsx_register_operand" "=wa")
+	(VPAIR_INT_UNARY:VPAIR_INT
+	 (match_operand:VPAIR_INT 1 "vsx_register_operand" "wa")))]
+  "TARGET_VECTOR_PAIR"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 2) (VPAIR_INT_UNARY:<VPAIR_VECT> (match_dup 3)))
+   (set (match_dup 4) (VPAIR_INT_UNARY:<VPAIR_VECT> (match_dup 5)))]
+{
+  machine_mode vmode = <VPAIR_VECT>mode;
+  unsigned reg0 = reg_or_subregno (operands[0]);
+  unsigned reg1 = reg_or_subregno (operands[1]);
+
+  operands[2] = gen_rtx_REG (vmode, reg0);
+  operands[3] = gen_rtx_REG (vmode, reg1);
+  operands[4] = gen_rtx_REG (vmode, reg0 + 1);
+  operands[5] = gen_rtx_REG (vmode, reg1 + 1);
+}
+  [(set_attr "length" "8")])
+
+;; Vector pair integer arithmetic binary operations
+(define_insn_and_split "<vpair_op><mode>3"
+  [(set (match_operand:VPAIR_INT 0 "vsx_register_operand" "=v")
+	(VPAIR_INT_BINARY:VPAIR_INT
+	 (match_operand:VPAIR_INT 1 "vsx_register_operand" "v")
+	 (match_operand:VPAIR_INT 2 "vsx_register_operand" "v")))]
+  "TARGET_VECTOR_PAIR"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 3)
+	(VPAIR_INT_BINARY:<VPAIR_VECT> (match_dup 4)
+				      (match_dup 5)))
+   (set (match_dup 6)
+	(VPAIR_INT_BINARY:<VPAIR_VECT> (match_dup 7)
+				      (match_dup 8)))]
+{
+  machine_mode vmode = <VPAIR_VECT>mode;
+  unsigned reg0 = reg_or_subregno (operands[0]);
+  unsigned reg1 = reg_or_subregno (operands[1]);
+  unsigned reg2 = reg_or_subregno (operands[2]);
+
+  operands[3] = gen_rtx_REG (vmode, reg0);
+  operands[4] = gen_rtx_REG (vmode, reg1);
+  operands[5] = gen_rtx_REG (vmode, reg2);
+
+  operands[6] = gen_rtx_REG (vmode, reg0 + 1);
+  operands[7] = gen_rtx_REG (vmode, reg1 + 1);
+  operands[8] = gen_rtx_REG (vmode, reg2 + 1);
+}
+  [(set_attr "length" "8")])
+
+;; Vector pair integer arithmetic logical operations
+(define_insn_and_split "<vpair_op><mode>3"
+  [(set (match_operand:VPAIR_INT 0 "vsx_register_operand" "=wa")
+	(VPAIR_INT_LOGICAL:VPAIR_INT
+	 (match_operand:VPAIR_INT 1 "vsx_register_operand" "wa")
+	 (match_operand:VPAIR_INT 2 "vsx_register_operand" "wa")))]
+  "TARGET_VECTOR_PAIR"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 3)
+	(VPAIR_INT_LOGICAL:<VPAIR_VECT> (match_dup 4)
+					(match_dup 5)))
+   (set (match_dup 6)
+	(VPAIR_INT_LOGICAL:<VPAIR_VECT> (match_dup 7)
+					(match_dup 8)))]
+{
+  machine_mode vmode = <VPAIR_VECT>mode;
+  unsigned reg0 = reg_or_subregno (operands[0]);
+  unsigned reg1 = reg_or_subregno (operands[1]);
+  unsigned reg2 = reg_or_subregno (operands[2]);
+
+  operands[3] = gen_rtx_REG (vmode, reg0);
+  operands[4] = gen_rtx_REG (vmode, reg1);
+  operands[5] = gen_rtx_REG (vmode, reg2);
+
+  operands[6] = gen_rtx_REG (vmode, reg0 + 1);
+  operands[7] = gen_rtx_REG (vmode, reg1 + 1);
+  operands[8] = gen_rtx_REG (vmode, reg2 + 1);
+}
+  [(set_attr "length" "8")])
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 78df3a815ef..521215ed6ba 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -255,6 +255,10 @@
 (define_mode_iterator VSX_MM [V16QI V8HI V4SI V2DI V1TI])
 (define_mode_iterator VSX_MM4 [V16QI V8HI V4SI V2DI])
 
+;; Iterator for V4SF and V8SF extracts
+(define_mode_iterator V4SF_V8SF [(V4SF "VECTOR_UNIT_VSX_P (V4SFmode)")
+				 (V8SF "TARGET_MMA")])
+
 ;; Longer vec int modes for rotate/mask ops
 ;; and Vector Integer Multiply/Divide/Modulo Instructions
 (define_mode_iterator VIlong [V2DI V4SI])
@@ -3545,22 +3549,66 @@
 }
   [(set_attr "type" "fpload,load")])
 
-;; Extract a SF element from V4SF
-(define_insn_and_split "vsx_extract_v4sf"
+;; Extract DF from vector pair
+(define_insn "vsx_extract_v4df"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=wa")
+	(vec_select:DF
+	 (match_operand:V4DF 1 "gpc_reg_operand" "wa")
+	 (parallel
+	  [(match_operand:QI 2 "const_0_to_3_operand" "n")])))]
+  "TARGET_MMA"
+{
+  unsigned int r = reg_or_subregno (operands[1]);
+  HOST_WIDE_INT index = INTVAL (operands[2]);
+  if ((BYTES_BIG_ENDIAN && index > 1)
+      || (!BYTES_BIG_ENDIAN && index < 2))
+    r++;
+
+  operands[3] = gen_rtx_REG (DFmode, r);
+  if ((index % 2) == 0)
+    {
+      /* value is in the high part of the register.  */
+      if (r == reg_or_subregno (operands[0]))
+	return ASM_COMMENT_START " vec_extract to same register (%x0)";
+
+      return "xxlor %x0,%x3,%x3";
+    }
+  else
+    /* value is in the low part of the register.  */
+    return "xxpermdi %x0,%x3,%x3,3";
+}
+  [(set_attr "type" "vecperm")])
+
+;; Extract a SF element from V4SF or V8SF
+(define_insn_and_split "vsx_extract_<mode>"
   [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
 	(vec_select:SF
-	 (match_operand:V4SF 1 "vsx_register_operand" "wa")
+	 (match_operand:V4SF_V8SF 1 "vsx_register_operand" "wa")
 	 (parallel [(match_operand:QI 2 "u5bit_cint_operand" "n")])))
-   (clobber (match_scratch:V4SF 3 "=0"))]
+   (clobber (match_scratch:V4SF 3 "=wa"))]
   "VECTOR_UNIT_VSX_P (V4SFmode)"
   "#"
-  "&& 1"
+  "&& (<MODE>mode == V4SFmode || reload_completed)"
   [(const_int 0)]
 {
   rtx op0 = operands[0];
   rtx op1 = operands[1];
   rtx op2 = operands[2];
   rtx op3 = operands[3];
+
+  /* If this is V8SFmode, select the right vector registers.  */
+  if (<MODE>mode == V8SFmode)
+    {
+      unsigned int r = reg_or_subregno (op1);
+      HOST_WIDE_INT index = INTVAL (op2);
+      if ((BYTES_BIG_ENDIAN && index > 3)
+          || (!BYTES_BIG_ENDIAN && index < 4))
+        r++;
+
+      operands[1] = op1 = gen_rtx_REG (V4SFmode, r);
+      operands[2] = op2 = GEN_INT (index & 0x3);
+    }
+
   rtx tmp;
   HOST_WIDE_INT ele = BYTES_BIG_ENDIAN ? INTVAL (op2) : 3 - INTVAL (op2);

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-08-01  3:06 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-08-01  3:06 [gcc(refs/users/meissner/heads/work129-vpair)] First attempt at vector_size(32) Michael Meissner

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).