public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/users/meissner/heads/work048)] Add XXSPLTIW support.
@ 2021-04-15 17:48 Michael Meissner
  0 siblings, 0 replies; only message in thread
From: Michael Meissner @ 2021-04-15 17:48 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:ad6b0b39b3ffc5f9b21888dcc427a12fafa468c2

commit ad6b0b39b3ffc5f9b21888dcc427a12fafa468c2
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Thu Apr 15 13:48:29 2021 -0400

    Add XXSPLTIW support.
    
    This patch adds support to automatically generate the ISA 3.1 XXSPLTIW
    instruction for V8HImode, V4SImode, and V4SFmode vectors.  It does this by
    adding support for vector constants that can be used, and adding a
    VEC_DUPLICATE pattern to generate the actual XXSPLTIW instruction.
    
    I rewrote the XXSPLTW built-in functions to use VEC_DUPLICATE instead of
    UNSPEC.  Because the XXSPLTIW instruction can set any VSX register, I
    moved the insns from altivec.md to vsx.md.
    
    gcc/
    2021-04-15  Michael Meissner  <meissner@linux.ibm.com>
    
            * config/rs6000/altivec.md (UNSPEC_XXSPLTIW): Delete.
            (xxspltiw_v4si): Move to vsx.md and rewrite.
            (xxspltiw_v4sf): Move to vsx.md and rewrite.
            (xxspltiw_v4sf_inst): Delete.
            * config/rs6000/predicates.md (xxspltiw_operand): New predicate.
            (easy_vector_constant): If we can use XXSPLTIW, the vector
            constant is easy.
            * config/rs6000/rs6000-cpus.def (ISA_3_1_MASKS_SERVER): Add
            -mxxspltiw support.
            (POWERPC_MASKS): Add -mxxspltiw support.
            * config/rs6000/rs6000.c (rs6000_option_override_internal): Add
            -mxxspltiw support.
            (xxspltib_constant_p): If we can generate XXSPLTIW, don't generate
            a XXSPLTIB and an extend instruction.
            (output_vec_const_move): Add support for XXSPLTIW vector
            constants.
            (rs6000_opt_masks): Add -mxxspltiw.
            * config/rs6000/rs6000.opt (-mxxspltiw): New debug switch.
            * config/rs6000/vsx.md (xxspltiw_v8hi): New insn.
            (xxspltiw_v4si): Move from altivec.md and reimplement to use
            VEC_DUPLICATE.
            (xxspltiw_v4sf): Move from altivec.md and reimplement to use
            VEC_DUPLICATE.
            (XXSPLTIW): New mode iterator.
            (XXSPLTIW splitter): New insn splitter for XXSPLTIW.

Diff:
---
 gcc/config/rs6000/altivec.md      | 30 -----------------
 gcc/config/rs6000/predicates.md   | 29 +++++++++++++++++
 gcc/config/rs6000/rs6000-cpus.def |  7 ++--
 gcc/config/rs6000/rs6000.c        | 18 +++++++++--
 gcc/config/rs6000/rs6000.opt      |  4 +++
 gcc/config/rs6000/vsx.md          | 68 +++++++++++++++++++++++++++++++++++++++
 6 files changed, 120 insertions(+), 36 deletions(-)

diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 1351dafbc41..708296cb14d 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -176,7 +176,6 @@
    UNSPEC_VSTRIL
    UNSPEC_SLDB
    UNSPEC_SRDB
-   UNSPEC_XXSPLTIW
    UNSPEC_XXSPLTID
    UNSPEC_XXSPLTI32DX
    UNSPEC_XXBLEND
@@ -820,35 +819,6 @@
   "vs<SLDB_lr>dbi %0,%1,%2,%3"
   [(set_attr "type" "vecsimple")])
 
-(define_insn "xxspltiw_v4si"
-  [(set (match_operand:V4SI 0 "register_operand" "=wa")
-	(unspec:V4SI [(match_operand:SI 1 "s32bit_cint_operand" "n")]
-		     UNSPEC_XXSPLTIW))]
- "TARGET_POWER10"
- "xxspltiw %x0,%1"
- [(set_attr "type" "vecsimple")
-  (set_attr "prefixed" "yes")])
-
-(define_expand "xxspltiw_v4sf"
-  [(set (match_operand:V4SF 0 "register_operand" "=wa")
-	(unspec:V4SF [(match_operand:SF 1 "const_double_operand" "n")]
-		     UNSPEC_XXSPLTIW))]
- "TARGET_POWER10"
-{
-  long long value = rs6000_const_f32_to_i32 (operands[1]);
-  emit_insn (gen_xxspltiw_v4sf_inst (operands[0], GEN_INT (value)));
-  DONE;
-})
-
-(define_insn "xxspltiw_v4sf_inst"
-  [(set (match_operand:V4SF 0 "register_operand" "=wa")
-	(unspec:V4SF [(match_operand:SI 1 "c32bit_cint_operand" "n")]
-		     UNSPEC_XXSPLTIW))]
- "TARGET_POWER10"
- "xxspltiw %x0,%1"
- [(set_attr "type" "vecsimple")
-  (set_attr "prefixed" "yes")])
-
 (define_expand "xxspltidp_v2df"
   [(set (match_operand:V2DF 0 "register_operand" )
 	(unspec:V2DF [(match_operand:SF 1 "const_double_operand")]
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index e21bc745f72..bf678f429af 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -640,6 +640,32 @@
   return num_insns == 1;
 })
 
+;; Return 1 if the operand is a CONST_VECTOR that can be loaded with the
+;; XXSPLTIW instruction.  Do not return 1 if the constant can be generated with
+;; XXSPLTIB or VSPLTIS{H,W}
+(define_predicate "xxspltiw_operand"
+  (match_code "const_vector")
+{
+  if (!TARGET_XXSPLTIW)
+    return false;
+
+  if (mode != V8HImode && mode != V4SImode && mode != V4SFmode)
+    return false;
+
+  rtx element = CONST_VECTOR_ELT (op, 0);
+  for (size_t i = 1; i < GET_MODE_NUNITS (mode); i++)
+    if (!rtx_equal_p (element, CONST_VECTOR_ELT (op, i)))
+      return false;
+
+  if (element == CONST0_RTX (GET_MODE_INNER (mode)))
+    return false;
+
+  if (CONST_INT_P (element) && EASY_VECTOR_15 (INTVAL (element)))
+    return false;
+
+  return true;
+})
+
 ;; Return 1 if the operand is a CONST_VECTOR and can be loaded into a
 ;; vector register without using memory.
 (define_predicate "easy_vector_constant"
@@ -653,6 +679,9 @@
       if (zero_constant (op, mode) || all_ones_constant (op, mode))
 	return true;
 
+      if (xxspltiw_operand (op, mode))
+	return true;
+
       if (TARGET_P9_VECTOR
           && xxspltib_constant_p (op, mode, &num_insns, &value))
 	return true;
diff --git a/gcc/config/rs6000/rs6000-cpus.def b/gcc/config/rs6000/rs6000-cpus.def
index cbbb42c1b3a..a21a95bc7aa 100644
--- a/gcc/config/rs6000/rs6000-cpus.def
+++ b/gcc/config/rs6000/rs6000-cpus.def
@@ -85,7 +85,8 @@
 				 | OTHER_POWER10_MASKS			\
 				 | OPTION_MASK_P10_FUSION		\
 				 | OPTION_MASK_P10_FUSION_LD_CMPI	\
-				 | OPTION_MASK_P10_FUSION_2LOGICAL)
+				 | OPTION_MASK_P10_FUSION_2LOGICAL	\
+				 | OPTION_MASK_XXSPLTIW)
 
 /* Flags that need to be turned off if -mno-power9-vector.  */
 #define OTHER_P9_VECTOR_MASKS	(OPTION_MASK_FLOAT128_HW		\
@@ -160,8 +161,8 @@
 				 | OPTION_MASK_RECIP_PRECISION		\
 				 | OPTION_MASK_SOFT_FLOAT		\
 				 | OPTION_MASK_STRICT_ALIGN_OPTIONAL	\
-				 | OPTION_MASK_VSX)
-
+				 | OPTION_MASK_VSX			\
+				 | OPTION_MASK_XXSPLTIW)
 #endif
 
 /* This table occasionally claims that a processor does not support a
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 28dbc507c5e..23f0925674c 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -4479,6 +4479,12 @@ rs6000_option_override_internal (bool global_init_p)
   if (!TARGET_PCREL && TARGET_PCREL_OPT)
     rs6000_isa_flags &= ~OPTION_MASK_PCREL_OPT;
 
+  if (TARGET_POWER10 && TARGET_VSX
+      && (rs6000_isa_flags_explicit & OPTION_MASK_XXSPLTIW) == 0)
+    rs6000_isa_flags |= OPTION_MASK_XXSPLTIW;
+  else if (!TARGET_POWER10 || !TARGET_VSX)
+    rs6000_isa_flags &= ~OPTION_MASK_XXSPLTIW;
+
   if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
     rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
 
@@ -6448,9 +6454,11 @@ xxspltib_constant_p (rtx op,
 
   /* See if we could generate vspltisw/vspltish directly instead of xxspltib +
      sign extend.  Special case 0/-1 to allow getting any VSX register instead
-     of an Altivec register.  */
-  if ((mode == V4SImode || mode == V8HImode) && !IN_RANGE (value, -1, 0)
-      && EASY_VECTOR_15 (value))
+     of an Altivec register.  Also if we can generate a XXSPLTIW instruction,
+     don't emit a XXSPLTIB and an extend instruction.  */
+  if ((mode == V4SImode || mode == V8HImode)
+      && !IN_RANGE (value, -1, 0)
+      && (EASY_VECTOR_15 (value) || TARGET_XXSPLTIW))
     return false;
 
   /* Return # of instructions and the constant byte for XXSPLTIB.  */
@@ -6511,6 +6519,9 @@ output_vec_const_move (rtx *operands)
 	    gcc_unreachable ();
 	}
 
+      if (xxspltiw_operand (vec, mode))
+	return "#";
+
       if (TARGET_P9_VECTOR
 	  && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
 	{
@@ -24008,6 +24019,7 @@ static struct rs6000_opt_mask const rs6000_opt_masks[] =
   { "string",			0,				false, true  },
   { "update",			OPTION_MASK_NO_UPDATE,		true , true  },
   { "vsx",			OPTION_MASK_VSX,		false, true  },
+  { "xxspltiw",			OPTION_MASK_XXSPLTIW,		false, true  },
 #ifdef OPTION_MASK_64BIT
 #if TARGET_AIX_OS
   { "aix64",			OPTION_MASK_64BIT,		false, false },
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index 0dbdf753673..b01ebd78c7f 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -619,3 +619,7 @@ Generate (do not generate) MMA instructions.
 
 mrelative-jumptables
 Target Undocumented Var(rs6000_relative_jumptables) Init(1) Save
+
+mxxspltiw
+Target Undocumented Mask(XXSPLTIW) Var(rs6000_isa_flags)
+Generate (do not generate) XXSPLTIW instructions.
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index bcb92be2f5c..9bad4da1e34 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -6216,3 +6216,71 @@
   "TARGET_POWER10"
   "vmulld %0,%1,%2"
   [(set_attr "type" "veccomplex")])
+
+\f
+;; XXSPLTIW support
+(define_insn "*xxspltiw_v8hi"
+  [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa,wa,v,wa")
+	(vec_duplicate:V8HI
+	 (match_operand:HI 1 "const_int_operand" "O,wM,wB,n")))]
+ "TARGET_XXSPLTIW"
+{
+  HOST_WIDE_INT uns_value = INTVAL (operands[1]) & 0xffff;
+  HOST_WIDE_INT sign_value = (uns_value ^ 0x8000) - 0x8000;
+
+  if (sign_value == 0)
+    return "xxspltib %x0,0";
+
+  if (sign_value == -1)
+    return "xxspltib %x0,255";
+
+  int r = reg_or_subregno (operands[0]);
+  if (ALTIVEC_REGNO_P (r) && EASY_VECTOR_15 (sign_value))
+    return "vspltish %0,%1";
+
+  operands[2] = GEN_INT ((uns_value << 16) | uns_value);
+  return "xxspltiw %0,%2";
+}
+ [(set_attr "type" "vecperm")
+  (set_attr "prefixed" "*,*,*,yes")])
+
+(define_insn "xxspltiw_v4si"
+  [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,wa,v,wa")
+	(vec_duplicate:V4SI
+	 (match_operand:SI 1 "s32bit_cint_operand" "O,wM,wB,n")))]
+ "TARGET_XXSPLTIW"
+ "@
+  xxspltib %x0,0
+  xxspltib %x0,255
+  vspltisw %0,%1
+  xxspltiw %x0,%1"
+ [(set_attr "type" "vecperm")
+  (set_attr "prefixed" "*,*,*,yes")])
+
+(define_insn "xxspltiw_v4sf"
+  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa")
+	(vec_duplicate:V4SF
+	 (match_operand:SF 1 "const_double_operand" "O,F")))]
+ "TARGET_XXSPLTIW"
+{
+  if (operands[1] == CONST0_RTX (SFmode))
+    return "xxspltib %x0,0";
+
+  operands[2] = GEN_INT (rs6000_const_f32_to_i32 (operands[1]));
+  return "xxspltiw %x0,%2";
+}
+ [(set_attr "type" "vecsimple")
+  (set_attr "prefixed" "*,yes")])
+
+(define_mode_iterator XXSPLTIW [V8HI V4SI V4SF])
+
+(define_split
+  [(set (match_operand:XXSPLTIW 0 "vsx_register_operand")
+	(match_operand:XXSPLTIW 1 "xxspltiw_operand"))]
+  "TARGET_XXSPLTIW"
+  [(set (match_dup 0)
+	(vec_duplicate:<MODE> (match_dup 2)))]
+{
+  operands[2] = CONST_VECTOR_ELT (operands[1], 0);
+})
+


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2021-04-15 17:48 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-04-15 17:48 [gcc(refs/users/meissner/heads/work048)] Add XXSPLTIW support Michael Meissner

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).