public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/users/meissner/heads/work045)] Generate XXSPLTIW for V8HI,  V4SI and V4SF constants.
@ 2021-04-06 23:46 Michael Meissner
  0 siblings, 0 replies; only message in thread
From: Michael Meissner @ 2021-04-06 23:46 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:ba1d45128f78ef20730e6ddb75003b1225e541e6

commit ba1d45128f78ef20730e6ddb75003b1225e541e6
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Tue Apr 6 19:45:40 2021 -0400

    Generate XXSPLTIW for V8HI, V4SI and V4SF constants.
    
    This patch generates the XXSPLTIW instruction on ISA 3.1 (power10) for
    vector short, vector int, and vector float constants.
    
    gcc/
    2021-04-06  Michael Meissner  <meissner@linux.ibm.com>
    
            * config/rs6000/predicates.md (xxspltiw_operand):  New predicate.
            (easy_vector_constant): If we can generate XXSPLTIW, return true.
            * config/rs6000/rs6000.c (rs6000_option_override_internal): Add
            support for -mxxspltiw.
            (xxspltib_constant_p): If we can use XXSPLTIW, don't return true
            for doing XXSPLTIB and an extend.
            (xxspltiw_constant_p): New function.
            (output_vec_const_move): Add support for generating XXSPLTIW.
            (rs6000_opt_masks): Add -mxxspltiw.
            * config/rs6000/rs6000.opt (-mxxspltiw): New debug switch.
            * config/rs6000/vsx.md (XXSPLTIW): New iterator.
            (xxspltiw<mode>): New insns to generate XXSPLTIW.

Diff:
---
 gcc/config/rs6000/predicates.md   |  22 ++++++
 gcc/config/rs6000/rs6000-protos.h |   1 +
 gcc/config/rs6000/rs6000.c        | 144 ++++++++++++++++++++++++++++++++++++++
 gcc/config/rs6000/rs6000.opt      |   4 ++
 gcc/config/rs6000/vsx.md          |  19 +++++
 5 files changed, 190 insertions(+)

diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index e21bc745f72..dc23f62a3af 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -640,6 +640,25 @@
   return num_insns == 1;
 })
 
+;; Return 1 if the operand is a CONST_VECTOR and can be loaded into a vector
+;; using the ISA 3.1 XXSPLTIW instruction.  Do not return 1 if the value can be
+;; loaded with a smaller XXSPLTIB or VSPLTISW instruction.
+(define_predicate "xxspltiw_operand"
+  (match_code "vec_duplicate,const_vector")
+{
+  HOST_WIDE_INT value = 0;
+
+  if (!xxspltiw_constant_p (op, mode, &value))
+    return false;
+
+  /* xxspltiw_constant_p returns V8HI as (element | (element << 16)).  Undo
+     this to see if the value is in the range -16..15.  */
+  if (mode == V8HImode)
+    value = ((value & 0xffff) ^ 0x8000) - 0x8000;
+
+  return !EASY_VECTOR_15 (value);
+})
+
 ;; Return 1 if the operand is a CONST_VECTOR and can be loaded into a
 ;; vector register without using memory.
 (define_predicate "easy_vector_constant"
@@ -653,6 +672,9 @@
       if (zero_constant (op, mode) || all_ones_constant (op, mode))
 	return true;
 
+      if (xxspltiw_operand (op, mode))
+	return true;
+
       if (TARGET_P9_VECTOR
           && xxspltib_constant_p (op, mode, &num_insns, &value))
 	return true;
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
index 8ac30905013..eff72af8814 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -32,6 +32,7 @@ extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, int, int, int,
 
 extern bool easy_altivec_constant (rtx, machine_mode);
 extern bool xxspltib_constant_p (rtx, machine_mode, int *, int *);
+extern bool xxspltiw_constant_p (rtx, machine_mode, HOST_WIDE_INT *);
 extern int vspltis_shifted (rtx);
 extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int);
 extern bool macho_lo_sum_memory_operand (rtx, machine_mode);
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index dc38c093c53..b02ab4b2a3b 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -4476,6 +4476,10 @@ rs6000_option_override_internal (bool global_init_p)
       rs6000_isa_flags &= ~OPTION_MASK_MMA;
     }
 
+  if (TARGET_POWER10 && TARGET_VSX
+      && (rs6000_isa_flags_explicit & OPTION_MASK_XXSPLTIW) == 0)
+    rs6000_isa_flags |= OPTION_MASK_XXSPLTIW;
+
   if (!TARGET_PCREL && TARGET_PCREL_OPT)
     rs6000_isa_flags &= ~OPTION_MASK_PCREL_OPT;
 
@@ -6460,6 +6464,12 @@ xxspltib_constant_p (rtx op,
   else if (IN_RANGE (value, -1, 0))
     *num_insns_ptr = 1;
 
+  /* If XXSPLTIW is available, don't return true if we can use that
+     instruction instead of doing 2 instructions. */
+  else if (TARGET_XXSPLTIW
+	   && (mode == V4SImode || mode == V8HImode))
+    return false;
+
   else
     *num_insns_ptr = 2;
 
@@ -6467,6 +6477,117 @@ xxspltib_constant_p (rtx op,
   return true;
 }
 
+/* Return true if OP is of the given MODE and can be synthesized with ISA 3.1
+   XXSPLTIW instruction, possibly with an sign extension.
+
+   Return the constant that is being split via CONSTANT_PTR.  */
+
+bool
+xxspltiw_constant_p (rtx op,
+		     machine_mode mode,
+		     HOST_WIDE_INT *constant_ptr)
+{
+  *constant_ptr = 0;
+
+  if (!TARGET_XXSPLTIW)
+    return false;
+
+  if (mode == VOIDmode)
+    mode = GET_MODE (op);
+
+  if (mode != V8HImode && mode != V4SImode && mode != V4SFmode)
+    return false;
+
+  rtx element = op;
+  if (GET_CODE (op) == VEC_DUPLICATE)
+    element = op;
+
+  else if (GET_CODE (op) == CONST_VECTOR)
+    {
+      size_t nunits = GET_MODE_NUNITS (mode);
+      element = CONST_VECTOR_ELT (op, 0);
+
+      for (size_t i = 1; i < nunits; i++)
+	if (!rtx_equal_p (element, CONST_VECTOR_ELT (op, i)))
+	  return false;
+    }
+
+  HOST_WIDE_INT value;
+  if (CONST_INT_P (element))
+    {
+      value = INTVAL (element);
+      if (!SIGNED_INTEGER_NBIT_P (value, 32))
+	return false;
+
+      if (mode == V8HImode)
+	{
+	  value &= 0xffff;
+	  value |= value << 16;
+	}
+    }
+
+  else if (CONST_DOUBLE_P (element))
+    value = rs6000_const_f32_to_i32 (element);
+
+  else
+    return false;
+
+  *constant_ptr = value;
+  return true;
+}
+
+/* Return true if OP is of the given MODE and can be synthesized with ISA 3.1
+   XXSPLTIDP instruction.
+
+   Return the constant that is being split via CONSTANT_PTR to use in the
+   XXSPLTIDP instruction.  */
+
+bool
+xxspltidp_constant_p (rtx op,
+		      machine_mode mode,
+		      long *constant_ptr)
+{
+  *constant_ptr = 0;
+
+  rtx element = op;
+  if (mode == V2DFmode)
+    {
+      /* Handle VEC_DUPLICATE and CONST_VECTOR.  */
+      if (GET_CODE (op) == VEC_DUPLICATE)
+       element = XEXP (op, 0);
+
+      else if (GET_CODE (op) == CONST_VECTOR)
+       {
+         element = CONST_VECTOR_ELT (op, 0);
+         if (!rtx_equal_p (element, CONST_VECTOR_ELT (op, 1)))
+           return false;
+       }
+
+      else
+       return false;
+    }
+
+  else if (mode != SFmode && mode != DFmode)
+    return false;
+
+  if (!CONST_DOUBLE_P (element))
+    return false;
+
+  const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (element);
+  if (!exact_real_truncate (SFmode, rv))
+    return 0;
+
+  long value;
+  REAL_VALUE_TO_TARGET_SINGLE (*rv, value);
+
+  /* Test for SFmode denormal (exponent is 0, mantissa field is non-zero).  */
+  if (((value & 0x7F800000) == 0) && ((value & 0x7FFFFF) != 0))
+    return false;
+
+  *constant_ptr = value;
+  return true;
+}
+
 const char *
 output_vec_const_move (rtx *operands)
 {
@@ -6511,6 +6632,28 @@ output_vec_const_move (rtx *operands)
 	    gcc_unreachable ();
 	}
 
+      HOST_WIDE_INT xxspltiw_value = 0;
+      if (xxspltiw_constant_p (vec, mode, &xxspltiw_value))
+	{
+	  /* Generate the smaller VSPLTIS{H,W} if we can.  */
+	  if (dest_vmx_p && mode == V8HImode)
+	    {
+	      long hi_value = ((xxspltiw_value & 0xffff) ^ 0x8000) - 0x8000;
+	      if (IN_RANGE (hi_value, -16, 15))
+		{
+		  operands[2] = GEN_INT (hi_value);
+		  return "vspltish %0,%2";
+		}
+	    }
+
+	  operands[2] = GEN_INT (xxspltiw_value);
+	  if (dest_vmx_p && mode == V4SImode
+	      && IN_RANGE (xxspltiw_value, -16, 15))
+	    return "vspltisw %0,%2";
+
+	  return "xxspltiw %x0,%2";
+	}
+
       if (TARGET_P9_VECTOR
 	  && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
 	{
@@ -24008,6 +24151,7 @@ static struct rs6000_opt_mask const rs6000_opt_masks[] =
   { "string",			0,				false, true  },
   { "update",			OPTION_MASK_NO_UPDATE,		true , true  },
   { "vsx",			OPTION_MASK_VSX,		false, true  },
+  { "xxspltiw",			OPTION_MASK_XXSPLTIW,		false, true  },
 #ifdef OPTION_MASK_64BIT
 #if TARGET_AIX_OS
   { "aix64",			OPTION_MASK_64BIT,		false, false },
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index 0dbdf753673..9c0ecf75a6d 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -619,3 +619,7 @@ Generate (do not generate) MMA instructions.
 
 mrelative-jumptables
 Target Undocumented Var(rs6000_relative_jumptables) Init(1) Save
+
+mxxspltiw
+Target Undocumented Mask(XXSPLTIW) Var(rs6000_isa_flags)
+Generate (do not generate) the XXSPLTIW instruction.
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index bd800c4ce0c..972f08bba1f 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -209,6 +209,9 @@
 (define_mode_iterator VSINT_84  [V4SI V2DI DI SI])
 (define_mode_iterator VSINT_842 [V8HI V4SI V2DI])
 
+;; Iterator for loading constants with xxspltiw
+(define_mode_iterator XXSPLTIW [V8HI V4SI V4SF])
+
 ;; Vector reverse byte modes
 (define_mode_iterator VEC_REVB [V8HI V4SI V2DI V4SF V2DF V1TI])
 
@@ -1159,6 +1162,22 @@
   [(set_attr "type" "vecperm")
    (set_attr "length" "8")])
 
+;; XXSPLTIW support.
+(define_insn "*xxspltiw<mode>"
+  [(set (match_operand:XXSPLTIW 0 "vsx_register_operand" "=wa")
+	(match_operand:XXSPLTIW 1 "xxspltiw_operand"))]
+  "TARGET_XXSPLTIW"
+{
+  HOST_WIDE_INT value = 0;
+
+  if (!xxspltiw_constant_p (operands[1], <MODE>mode, &value))
+    gcc_unreachable ();
+
+  operands[2] = GEN_INT (value);
+  return "xxspltiw %x0,%2";
+}
+ [(set_attr "type" "vecperm")
+  (set_attr "prefixed" "yes")])
 
 ;; Prefer using vector registers over GPRs.  Prefer using ISA 3.0's XXSPLTISB
 ;; or Altivec VSPLITW 0/-1 over XXLXOR/XXLORC to set a register to all 0's or


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2021-04-06 23:46 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-04-06 23:46 [gcc(refs/users/meissner/heads/work045)] Generate XXSPLTIW for V8HI, V4SI and V4SF constants Michael Meissner

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).