public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/users/meissner/heads/work055)] Generate XXSPLTIW on power10.
@ 2021-06-09 15:54 Michael Meissner
  0 siblings, 0 replies; 2+ messages in thread
From: Michael Meissner @ 2021-06-09 15:54 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:a9b65375ecacdc3c2b7d30e818830b628ba9f431

commit a9b65375ecacdc3c2b7d30e818830b628ba9f431
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Wed Jun 9 11:54:30 2021 -0400

    Generate XXSPLTIW on power10.
    
    This patch adds support to automatically generate the ISA 3.1 XXSPLTIW
    instruction for V8HImode, V4SImode, and V4SFmode vectors.  It does this by
    adding support for vector constants that can be used, and adding a
    VEC_DUPLICATE pattern to generate the actual XXSPLTIW instruction.
    
    I rewrote the XXSPLTW built-in functions to use VEC_DUPLICATE instead of
    UNSPEC.
    
    This patch also updates the insn counts in the vec-splati-runnable.c test to
    work with the new option to use XXSPLTIW to load up some vector constants.
    
    I added 3 new tests to test loading up V8HI, V4SI, and V4SF vector
    constants.
    
    The pr87631-wrapv.c test needed to be adjusted to account for xxspltiw
    code generation on power10.
    
    gcc/
    2021-06-09  Michael Meissner  <meissner@linux.ibm.com>
    
            * config/rs6000/predicates.md (xxspltiw_operand): New predicate.
            (easy_vector_constant): If we can use XXSPLTIW, the vector
            constant is easy.
            * config/rs6000/rs6000-cpus.def (ISA_3_1_MASKS_SERVER): Add
            -mxxspltiw support.
            (POWERPC_MASKS): Add -mxxspltiw support.
            * config/rs6000/rs6000.c (rs6000_option_override_internal): Add
            -mxxspltiw support.
            (xxspltib_constant_p): If we can generate XXSPLTIW, don't generate
            a XXSPLTIB and an extend instruction.
            (output_vec_const_move): Add support for loading up vector
            constants with XXSPLTIW.
            (rs6000_opt_masks): Add -mxxspltiw.
            * config/rs6000/rs6000.h (SIGN_EXTEND_8BIT): New macro.
            (SIGN_EXTEND_16BIT): New macro.
            (SIGN_EXTEND_32BIT): New macro.
            * config/rs6000/rs6000.opt (-mxxspltiw): New debug switch.
            * config/rs6000/vsx.md (UNSPEC_XXSPLTIW): Delete.
            (xxspltiw_v8hi): New insn.
            (xxspltiw_v4si): Rewrite to generate a vector constant.
            (xxspltiw_v4sf): Rewrite to generate a vector constant.
            (xxspltiw_v4si_inst): Delete.
            (xxspltiw_v4sf_inst): Delete.
            (xxspltiw_v8hi_dup): New insn.
            (xxspltiw_v4si_dup): New insn.
            (xxspltiw_v4sf_dup): New insn.
            (XXSPLTIW): New mode iterator.
            (XXSPLTIW splitter): New insn splitter for XXSPLTIW.
    
    gcc/testsuite/
    2021-06-09  Michael Meissner  <meissner@linux.ibm.com>
    
            * gcc.target/powerpc/pr86731-fwrapv.c: Update insn counts on
            power10.
            * gcc.target/powerpc/vec-splati-runnable.c: Update insn counts.
            * gcc.target/powerpc/vec-splat-constant-v4sf.c: New test.
            * gcc.target/powerpc/vec-splat-constant-v4si.c: New test.
            * gcc.target/powerpc/vec-splat-constant-v8hi.c: New test.

Diff:
---
 gcc/config/rs6000/predicates.md                    |  29 ++++
 gcc/config/rs6000/rs6000-cpus.def                  |   7 +-
 gcc/config/rs6000/rs6000.c                         |  18 ++-
 gcc/config/rs6000/rs6000.h                         |  19 +++
 gcc/config/rs6000/rs6000.opt                       |   4 +
 gcc/config/rs6000/vsx.md                           | 146 ++++++++++++++++-----
 gcc/testsuite/gcc.target/powerpc/pr86731-fwrapv.c  |   9 +-
 .../gcc.target/powerpc/vec-splat-constant-v4sf.c   |  66 ++++++++++
 .../gcc.target/powerpc/vec-splat-constant-v4si.c   |  51 +++++++
 .../gcc.target/powerpc/vec-splat-constant-v8hi.c   |  53 ++++++++
 .../gcc.target/powerpc/vec-splati-runnable.c       |   4 +-
 11 files changed, 364 insertions(+), 42 deletions(-)

diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 121cbf14810..aa17ddc94e5 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -640,6 +640,32 @@
   return num_insns == 1;
 })
 
+;; Return 1 if the operand is a CONST_VECTOR that can be loaded with the
+;; XXSPLTIW instruction.  Do not return 1 if the constant can be generated with
+;; XXSPLTIB or VSPLTIS{H,W}
+(define_predicate "xxspltiw_operand"
+  (match_code "const_vector")
+{
+  if (!TARGET_XXSPLTIW)
+    return false;
+
+  if (mode != V8HImode && mode != V4SImode && mode != V4SFmode)
+    return false;
+
+  rtx element = CONST_VECTOR_ELT (op, 0);
+  for (size_t i = 1; i < GET_MODE_NUNITS (mode); i++)
+    if (!rtx_equal_p (element, CONST_VECTOR_ELT (op, i)))
+      return false;
+
+  if (element == CONST0_RTX (GET_MODE_INNER (mode)))
+    return false;
+
+  if (CONST_INT_P (element) && EASY_VECTOR_15 (INTVAL (element)))
+    return false;
+
+  return true;
+})
+
 ;; Return 1 if the operand is a CONST_VECTOR and can be loaded into a
 ;; vector register without using memory.
 (define_predicate "easy_vector_constant"
@@ -653,6 +679,9 @@
       if (zero_constant (op, mode) || all_ones_constant (op, mode))
 	return true;
 
+      if (xxspltiw_operand (op, mode))
+	return true;
+
       if (TARGET_P9_VECTOR
           && xxspltib_constant_p (op, mode, &num_insns, &value))
 	return true;
diff --git a/gcc/config/rs6000/rs6000-cpus.def b/gcc/config/rs6000/rs6000-cpus.def
index 52ce84835f7..e6c5891d334 100644
--- a/gcc/config/rs6000/rs6000-cpus.def
+++ b/gcc/config/rs6000/rs6000-cpus.def
@@ -88,7 +88,8 @@
 				 | OPTION_MASK_P10_FUSION_2LOGICAL	\
 				 | OPTION_MASK_P10_FUSION_LOGADD 	\
 				 | OPTION_MASK_P10_FUSION_ADDLOG	\
-				 | OPTION_MASK_P10_FUSION_2ADD)
+				 | OPTION_MASK_P10_FUSION_2ADD		\
+				 | OPTION_MASK_XXSPLTIW)
 
 /* Flags that need to be turned off if -mno-power9-vector.  */
 #define OTHER_P9_VECTOR_MASKS	(OPTION_MASK_FLOAT128_HW		\
@@ -166,8 +167,8 @@
 				 | OPTION_MASK_RECIP_PRECISION		\
 				 | OPTION_MASK_SOFT_FLOAT		\
 				 | OPTION_MASK_STRICT_ALIGN_OPTIONAL	\
-				 | OPTION_MASK_VSX)
-
+				 | OPTION_MASK_VSX			\
+				 | OPTION_MASK_XXSPLTIW)
 #endif
 
 /* This table occasionally claims that a processor does not support a
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 9bb945de7bb..2ac53d74a77 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -4501,6 +4501,12 @@ rs6000_option_override_internal (bool global_init_p)
   if (!TARGET_PCREL && TARGET_PCREL_OPT)
     rs6000_isa_flags &= ~OPTION_MASK_PCREL_OPT;
 
+  if (TARGET_POWER10 && TARGET_VSX
+      && (rs6000_isa_flags_explicit & OPTION_MASK_XXSPLTIW) == 0)
+    rs6000_isa_flags |= OPTION_MASK_XXSPLTIW;
+  else if (!TARGET_POWER10 || !TARGET_VSX)
+    rs6000_isa_flags &= ~OPTION_MASK_XXSPLTIW;
+
   if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
     rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
 
@@ -6482,9 +6488,11 @@ xxspltib_constant_p (rtx op,
 
   /* See if we could generate vspltisw/vspltish directly instead of xxspltib +
      sign extend.  Special case 0/-1 to allow getting any VSX register instead
-     of an Altivec register.  */
-  if ((mode == V4SImode || mode == V8HImode) && !IN_RANGE (value, -1, 0)
-      && EASY_VECTOR_15 (value))
+     of an Altivec register.  Also if we can generate a XXSPLTIW instruction,
+     don't emit a XXSPLTIB and an extend instruction.  */
+  if ((mode == V4SImode || mode == V8HImode)
+      && !IN_RANGE (value, -1, 0)
+      && (EASY_VECTOR_15 (value) || TARGET_XXSPLTIW))
     return false;
 
   /* Return # of instructions and the constant byte for XXSPLTIB.  */
@@ -6545,6 +6553,9 @@ output_vec_const_move (rtx *operands)
 	    gcc_unreachable ();
 	}
 
+      if (xxspltiw_operand (vec, mode))
+	return "#";
+
       if (TARGET_P9_VECTOR
 	  && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
 	{
@@ -24116,6 +24127,7 @@ static struct rs6000_opt_mask const rs6000_opt_masks[] =
   { "string",			0,				false, true  },
   { "update",			OPTION_MASK_NO_UPDATE,		true , true  },
   { "vsx",			OPTION_MASK_VSX,		false, true  },
+  { "xxspltiw",			OPTION_MASK_XXSPLTIW,		false, true  },
 #ifdef OPTION_MASK_64BIT
 #if TARGET_AIX_OS
   { "aix64",			OPTION_MASK_64BIT,		false, false },
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 7131de609e3..e03f14a0405 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -2612,3 +2612,22 @@ while (0)
        rs6000_asm_output_opcode (STREAM);				\
     }									\
   while (0)
+
+/* Provide macros for sign-extending values.  */
+#if HOST_BITS_PER_CHAR == 8
+#define SIGN_EXTEND_8BIT(X) ((HOST_WIDE_INT)(signed char)(X))
+#else
+#define SIGN_EXTEND_8BIT(X) ((((X) & 0xff) ^ 0x80) - 0x80)
+#endif
+
+#if HOST_BITS_PER_SHORT == 16
+#define SIGN_EXTEND_16BIT(X) ((HOST_WIDE_INT)(short)(X))
+#else
+#define SIGN_EXTEND_16BIT(X) ((((X) & 0xffff) ^ 0x8000) - 0x8000)
+#endif
+
+#if HOST_BITS_PER_INT == 32
+#define SIGN_EXTEND_32BIT(X) ((HOST_WIDE_INT)(int)(X))
+#else
+#define SIGN_EXTEND_32BIT(X) ((((X) & 0xffffffff) ^ 0x80000000) - 0x80000000)
+#endif
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index 0538db387dc..38eaa36d6d8 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -639,3 +639,7 @@ Enable instructions that guard against return-oriented programming attacks.
 mprivileged
 Target Var(rs6000_privileged) Init(0)
 Generate code that will run in privileged state.
+
+mxxspltiw
+Target Undocumented Mask(XXSPLTIW) Var(rs6000_isa_flags)
+Generate (do not generate) XXSPLTIW instructions.
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 15a8c0e22d8..76e10f73dec 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -386,7 +386,6 @@
    UNSPEC_VDIVES
    UNSPEC_VDIVEU
    UNSPEC_XXEVAL
-   UNSPEC_XXSPLTIW
    UNSPEC_XXSPLTID
    UNSPEC_XXSPLTI32DX
    UNSPEC_XXBLEND
@@ -6239,36 +6238,6 @@
   "vmulld %0,%1,%2"
   [(set_attr "type" "veccomplex")])
 
-;; XXSPLTIW built-in function support
-(define_insn "xxspltiw_v4si"
-  [(set (match_operand:V4SI 0 "register_operand" "=wa")
-	(unspec:V4SI [(match_operand:SI 1 "s32bit_cint_operand" "n")]
-		     UNSPEC_XXSPLTIW))]
- "TARGET_POWER10"
- "xxspltiw %x0,%1"
- [(set_attr "type" "vecsimple")
-  (set_attr "prefixed" "yes")])
-
-(define_expand "xxspltiw_v4sf"
-  [(set (match_operand:V4SF 0 "register_operand" "=wa")
-	(unspec:V4SF [(match_operand:SF 1 "const_double_operand" "n")]
-		     UNSPEC_XXSPLTIW))]
- "TARGET_POWER10"
-{
-  long long value = rs6000_const_f32_to_i32 (operands[1]);
-  emit_insn (gen_xxspltiw_v4sf_inst (operands[0], GEN_INT (value)));
-  DONE;
-})
-
-(define_insn "xxspltiw_v4sf_inst"
-  [(set (match_operand:V4SF 0 "register_operand" "=wa")
-	(unspec:V4SF [(match_operand:SI 1 "c32bit_cint_operand" "n")]
-		     UNSPEC_XXSPLTIW))]
- "TARGET_POWER10"
- "xxspltiw %x0,%1"
- [(set_attr "type" "vecsimple")
-  (set_attr "prefixed" "yes")])
-
 ;; XXSPLTIDP built-in function support
 (define_expand "xxspltidp_v2df"
   [(set (match_operand:V2DF 0 "register_operand" )
@@ -6420,3 +6389,118 @@
    [(set_attr "type" "vecsimple")
     (set_attr "prefixed" "yes")])
 
+;; XXSPLTIW built-in function support.  Convert to a vector constant, which
+;; will then be optimized to the XXSPLTIW instruction.
+(define_expand "xxspltiw_v4si"
+  [(use (match_operand:V4SI 0 "register_operand"))
+   (use (match_operand:SI 1 "s32bit_cint_operand"))]
+  "TARGET_POWER10"
+{
+  rtx op1 = operands[1];
+  rtvec rv = gen_rtvec (4, op1, op1, op1, op1);
+  rtx vec_constant = gen_rtx_CONST_VECTOR (V4SImode, rv);
+  emit_move_insn (operands[0], vec_constant);
+})
+
+(define_expand "xxspltiw_v4sf"
+  [(use (match_operand:V4SF 0 "register_operand"))
+   (use (match_operand:SF 1 "const_double_operand"))]
+  "TARGET_POWER10"
+{
+  rtx op1 = operands[1];
+  rtvec rv = gen_rtvec (4, op1, op1, op1, op1);
+  rtx vec_constant = gen_rtx_CONST_VECTOR (V4SFmode, rv);
+  emit_move_insn (operands[0], vec_constant);
+})
+
+;; XXSPLTIW support.  Add support for the XXSPLTIW built-in functions, and to
+;; use XXSPLTIW to load up vector V8HImode, V4SImode, and V4SFmode vector
+;; constants where all elements are the the same.  We special case loading up
+;; integer -16..15 and floating point 0.0f, since we can use the shorter
+;; XXSPLTIB, VSPLTISH, and VSPLTISW instructions.
+
+(define_insn "*xxspltiw_v8hi_dup"
+  [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa,wa,v,wa")
+	(vec_duplicate:V8HI
+	 (match_operand 1 "const_int_operand" "O,wM,wB,n")))]
+ "TARGET_XXSPLTIW"
+{
+  HOST_WIDE_INT sign_value = SIGN_EXTEND_16BIT (INTVAL (operands[1]));
+
+  if (sign_value == 0)
+    return "xxspltib %x0,0";
+
+  if (sign_value == -1)
+    return "xxspltib %x0,255";
+
+  int r = reg_or_subregno (operands[0]);
+  if (ALTIVEC_REGNO_P (r) && EASY_VECTOR_15 (sign_value))
+    {
+      operands[2] = GEN_INT (sign_value);
+      return "vspltish %0,%1";
+    }
+
+  HOST_WIDE_INT uns_value = sign_value & 0xffff;
+  operands[2] = GEN_INT ((uns_value << 16) | uns_value);
+  return "xxspltiw %x0,%2";
+}
+ [(set_attr "type" "vecperm")
+  (set_attr "prefixed" "*,*,*,yes")])
+
+(define_insn "*xxspltiw_v4si_dup"
+  [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,wa,v,wa")
+	(vec_duplicate:V4SI
+	 (match_operand 1 "const_int_operand" "O,wM,wB,n")))]
+ "TARGET_XXSPLTIW"
+{
+  HOST_WIDE_INT sign_value = SIGN_EXTEND_32BIT (INTVAL (operands[1]));
+
+  if (sign_value == 0)
+    return "xxspltib %x0,0";
+
+  if (sign_value == -1)
+    return "xxspltib %x0,255";
+
+  int r = reg_or_subregno (operands[0]);
+  if (ALTIVEC_REGNO_P (r) && EASY_VECTOR_15 (sign_value))
+    {
+      operands[2] = GEN_INT (sign_value);
+      return "vspltisw %0,%2";
+    }
+
+  /* The assembler doesn't like negative values.  */
+  operands[2] = GEN_INT (sign_value & 0xffffffff);
+  return "xxspltiw %x0,%2";
+}
+ [(set_attr "type" "vecperm")
+  (set_attr "prefixed" "*,*,*,yes")])
+
+(define_insn "xxspltiw_v4sf_dup"
+  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa")
+	(vec_duplicate:V4SF
+	 (match_operand:SF 1 "const_double_operand" "O,F")))]
+ "TARGET_XXSPLTIW"
+{
+  if (operands[1] == CONST0_RTX (SFmode))
+    return "xxspltib %x0,0";
+
+  /* The assembler doesn't like negative values.  */
+  long value = rs6000_const_f32_to_i32 (operands[1]);
+  operands[2] = GEN_INT (value & 0xffffffff);
+  return "xxspltiw %x0,%2";
+}
+ [(set_attr "type" "vecsimple")
+  (set_attr "prefixed" "*,yes")])
+
+;; Convert vector constant to vec_duplicate.
+(define_mode_iterator XXSPLTIW [V8HI V4SI V4SF])
+
+(define_split
+  [(set (match_operand:XXSPLTIW 0 "vsx_register_operand")
+	(match_operand:XXSPLTIW 1 "xxspltiw_operand"))]
+  "TARGET_XXSPLTIW && GET_CODE (operands[1]) == CONST_VECTOR"
+  [(set (match_dup 0)
+	(vec_duplicate:<MODE> (match_dup 2)))]
+{
+  operands[2] = CONST_VECTOR_ELT (operands[1], 0);
+})
diff --git a/gcc/testsuite/gcc.target/powerpc/pr86731-fwrapv.c b/gcc/testsuite/gcc.target/powerpc/pr86731-fwrapv.c
index f312550f04d..22e43d21565 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr86731-fwrapv.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr86731-fwrapv.c
@@ -57,7 +57,12 @@ vector signed int splats3(void)
    If folding is enabled, the vec_sl tests using vector long long type will
    generate a lvx instead of a vspltisw+vsld pair.  */
 
-/* { dg-final { scan-assembler-times {\mvspltis[bhw]\M|\mxxspltib\M} 7 } } */
-/* { dg-final { scan-assembler-times {\mvsl[bhwd]\M} 7 } } */
+/* { dg-final { scan-assembler-times {\mvspltis[bhw]\M|\mxxspltib\M} 7 { target { ! has_arch_pwr10 } } } } */
+/* { dg-final { scan-assembler-times {\mvsl[bhwd]\M} 7                 { target { ! has_arch_pwr10 } } } } */
+
+/* { dg-final { scan-assembler-times {\mxxspltib\M}  2                 { target {   has_arch_pwr10 } } } } */
+/* { dg-final { scan-assembler-times {\mxxspltiw\M}  5                 { target {   has_arch_pwr10 } } } } */
+/* { dg-final { scan-assembler-times {\mvsl[bhwd]\M} 2                 { target {   has_arch_pwr10 } } } } */
+
 /* { dg-final { scan-assembler-times {\mlvx\M|\mlxvd2x\M} 0 } } */
 
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v4sf.c b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v4sf.c
new file mode 100644
index 00000000000..06830b02076
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v4sf.c
@@ -0,0 +1,66 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+#include <altivec.h>
+
+/* Test whether XXSPLTIW is generated for V4SF vector constants.  */
+
+vector float
+v4sf_const_1 (void)
+{
+  return (vector float) { 1.0f, 1.0f, 1.0f, 1.0f };	/* XXSPLTIW.  */
+}
+
+vector float
+v4sf_const_nan (void)
+{
+  return (vector float) { __builtin_nanf (""),
+			  __builtin_nanf (""),
+			  __builtin_nanf (""),
+			  __builtin_nanf ("") };	/* XXSPLTIW.  */
+}
+
+vector float
+v4sf_const_inf (void)
+{
+  return (vector float) { __builtin_inff (),
+			  __builtin_inff (),
+			  __builtin_inff (),
+			  __builtin_inff () };		/* XXSPLTIW.  */
+}
+
+vector float
+v4sf_const_m0 (void)
+{
+  return (vector float) { -0.0f, -0.0f, -0.0f, -0.0f };	/* XXSPLTIW.  */
+}
+
+vector float
+v4sf_splats_1 (void)
+{
+  return vec_splats (1.0f);				/* XXSPLTIW.  */
+}
+
+vector float
+v4sf_splats_nan (void)
+{
+  return vec_splats (__builtin_nanf (""));		/* XXSPLTIW.  */
+}
+
+vector float
+v4sf_splats_inf (void)
+{
+  return vec_splats (__builtin_inff ());		/* XXSPLTIW.  */
+}
+
+vector float
+v8hi_splats_m0 (void)
+{
+  return vec_splats (-0.0f);				/* XXSPLTIW.  */
+}
+
+/* { dg-final { scan-assembler-times {\mxxspltiw\M}  8 } } */
+/* { dg-final { scan-assembler-not   {\mxxspltib\M}    } } */
+/* { dg-final { scan-assembler-not   {\mlxvx?\M}       } } */
+/* { dg-final { scan-assembler-not   {\mplxv\M}        } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v4si.c b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v4si.c
new file mode 100644
index 00000000000..02d0c6d66a2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v4si.c
@@ -0,0 +1,51 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+#include <altivec.h>
+
+/* Test whether XXSPLTIW is generated for V4SI vector constants.  We make sure
+   the power9 support (XXSPLTIB/VEXTSB2W) is not done.  */
+
+vector int
+v4si_const_1 (void)
+{
+  return (vector int) { 1, 1, 1, 1 };			/* VSLTPISW.  */
+}
+
+vector int
+v4si_const_126 (void)
+{
+  return (vector int) { 126, 126, 126, 126 };		/* XXSPLTIW.  */
+}
+
+vector int
+v4si_const_1023 (void)
+{
+  return (vector int) { 1023, 1023, 1023, 1023 };	/* XXSPLTIW.  */
+}
+
+vector int
+v4si_splats_1 (void)
+{
+  return vec_splats (1);				/* VSLTPISW.  */
+}
+
+vector int
+v4si_splats_126 (void)
+{
+  return vec_splats (126);				/* XXSPLTIW.  */
+}
+
+vector int
+v8hi_splats_1023 (void)
+{
+  return vec_splats (1023);				/* XXSPLTIW.  */
+}
+
+/* { dg-final { scan-assembler-times {\mxxspltiw\M}  4 } } */
+/* { dg-final { scan-assembler-times {\mvspltisw\M}  2 } } */
+/* { dg-final { scan-assembler-not   {\mxxspltib\M}    } } */
+/* { dg-final { scan-assembler-not   {\mvextsb2w\M}    } } */
+/* { dg-final { scan-assembler-not   {\mlxvx?\M}       } } */
+/* { dg-final { scan-assembler-not   {\mplxv\M}        } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v8hi.c b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v8hi.c
new file mode 100644
index 00000000000..e6d0fab6d67
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v8hi.c
@@ -0,0 +1,53 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+#include <altivec.h>
+
+/* Test whether XXSPLTIW is generated for V8HI vector constants.  We make sure
+   the power9 support (XXSPLTIB/VUPKLSB) is not done.  */
+
+vector short
+v8hi_const_1 (void)
+{
+  return (vector short) { 1, 1, 1, 1, 1, 1, 1, 1 };	/* VSLTPISH.  */
+}
+
+vector short
+v8hi_const_126 (void)
+{
+  return (vector short) { 126, 126, 126, 126,
+			  126, 126, 126, 126 };		/* XXSPLTIW.  */
+}
+
+vector short
+v8hi_const_1023 (void)
+{
+  return (vector short) { 1023, 1023, 1023, 1023,
+			  1023, 1023, 1023, 1023 };	/* XXSPLTIW.  */
+}
+
+vector short
+v8hi_splats_1 (void)
+{
+  return vec_splats ((short)1);				/* VSLTPISH.  */
+}
+
+vector short
+v8hi_splats_126 (void)
+{
+  return vec_splats ((short)126);			/* XXSPLTIW.  */
+}
+
+vector short
+v8hi_splats_1023 (void)
+{
+  return vec_splats ((short)1023);			/* XXSPLTIW.  */
+}
+
+/* { dg-final { scan-assembler-times {\mxxspltiw\M}  4 } } */
+/* { dg-final { scan-assembler-times {\mvspltish\M}  2 } } */
+/* { dg-final { scan-assembler-not   {\mxxspltib\M}    } } */
+/* { dg-final { scan-assembler-not   {\mvupklsb\M}     } } */
+/* { dg-final { scan-assembler-not   {\mlxvx?\M}       } } */
+/* { dg-final { scan-assembler-not   {\mplxv\M}        } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splati-runnable.c b/gcc/testsuite/gcc.target/powerpc/vec-splati-runnable.c
index a135279b1d7..f49ef91422e 100644
--- a/gcc/testsuite/gcc.target/powerpc/vec-splati-runnable.c
+++ b/gcc/testsuite/gcc.target/powerpc/vec-splati-runnable.c
@@ -149,8 +149,6 @@ main (int argc, char *argv [])
   return 0;
 }
 
-/* { dg-final { scan-assembler-times {\mxxspltiw\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxxspltiw\M} 1 } } */
 /* { dg-final { scan-assembler-times {\mxxspltidp\M} 2 } } */
 /* { dg-final { scan-assembler-times {\mxxsplti32dx\M} 3 } } */
-
-


^ permalink raw reply	[flat|nested] 2+ messages in thread

* [gcc(refs/users/meissner/heads/work055)] Generate XXSPLTIW on power10.
@ 2021-06-09 14:56 Michael Meissner
  0 siblings, 0 replies; 2+ messages in thread
From: Michael Meissner @ 2021-06-09 14:56 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:5f86a4fef2b598147aef50a8644ab128689c7575

commit 5f86a4fef2b598147aef50a8644ab128689c7575
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Wed Jun 9 10:56:21 2021 -0400

    Generate XXSPLTIW on power10.
    
    This patch adds support to automatically generate the ISA 3.1 XXSPLTIW
    instruction for V8HImode, V4SImode, and V4SFmode vectors.  It does this by
    adding support for vector constants that can be used, and adding a
    VEC_DUPLICATE pattern to generate the actual XXSPLTIW instruction.
    
    I rewrote the XXSPLTW built-in functions to use VEC_DUPLICATE instead of
    UNSPEC.
    
    This patch also updates the insn counts in the vec-splati-runnable.c test to
    work with the new option to use XXSPLTIW to load up some vector constants.
    
    I added 3 new tests to test loading up V8HI, V4SI, and V4SF vector
    constants.
    
    gcc/
    2021-06-09  Michael Meissner  <meissner@linux.ibm.com>
    
            * config/rs6000/predicates.md (xxspltiw_operand): New predicate.
            (easy_vector_constant): If we can use XXSPLTIW, the vector
            constant is easy.
            * config/rs6000/rs6000-cpus.def (ISA_3_1_MASKS_SERVER): Add
            -mxxspltiw support.
            (POWERPC_MASKS): Add -mxxspltiw support.
            * config/rs6000/rs6000.c (rs6000_option_override_internal): Add
            -mxxspltiw support.
            (xxspltib_constant_p): If we can generate XXSPLTIW, don't generate
            a XXSPLTIB and an extend instruction.
            (output_vec_const_move): Add support for loading up vector
            constants with XXSPLTIW.
            (rs6000_opt_masks): Add -mxxspltiw.
            * config/rs6000/rs6000.h (SIGN_EXTEND_8BIT): New macro.
            (SIGN_EXTEND_16BIT): New macro.
            (SIGN_EXTEND_32BIT): New macro.
            * config/rs6000/rs6000.opt (-mxxspltiw): New debug switch.
            * config/rs6000/vsx.md (UNSPEC_XXSPLTIW): Delete.
            (xxspltiw_v8hi): New insn.
            (xxspltiw_v4si): Rewrite to generate a vector constant.
            (xxspltiw_v4sf): Rewrite to generate a vector constant.
            (xxspltiw_v4si_inst): Delete.
            (xxspltiw_v4sf_inst): Delete.
            (xxspltiw_v8hi_dup): New insn.
            (xxspltiw_v4si_dup): New insn.
            (xxspltiw_v4sf_dup): New insn.
            (XXSPLTIW): New mode iterator.
            (XXSPLTIW splitter): New insn splitter for XXSPLTIW.
    
    gcc/testsuite/
    2021-06-09  Michael Meissner  <meissner@linux.ibm.com>
    
            * gcc.target/powerpc/vec-splati-runnable.c: Update insn counts.
            * gcc.target/powerpc/vec-splat-constant-v4sf.c: New test.
            * gcc.target/powerpc/vec-splat-constant-v4si.c: New test.
            * gcc.target/powerpc/vec-splat-constant-v8hi.c: New test.

Diff:
---
 gcc/config/rs6000/predicates.md                    |  29 ++++
 gcc/config/rs6000/rs6000-cpus.def                  |   7 +-
 gcc/config/rs6000/rs6000.c                         |  18 ++-
 gcc/config/rs6000/rs6000.h                         |  19 +++
 gcc/config/rs6000/rs6000.opt                       |   4 +
 gcc/config/rs6000/vsx.md                           | 146 ++++++++++++++++-----
 .../gcc.target/powerpc/vec-splat-constant-v4sf.c   |  66 ++++++++++
 .../gcc.target/powerpc/vec-splat-constant-v4si.c   |  51 +++++++
 .../gcc.target/powerpc/vec-splat-constant-v8hi.c   |  53 ++++++++
 .../gcc.target/powerpc/vec-splati-runnable.c       |   4 +-
 10 files changed, 357 insertions(+), 40 deletions(-)

diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 121cbf14810..aa17ddc94e5 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -640,6 +640,32 @@
   return num_insns == 1;
 })
 
+;; Return 1 if the operand is a CONST_VECTOR that can be loaded with the
+;; XXSPLTIW instruction.  Do not return 1 if the constant can be generated with
+;; XXSPLTIB or VSPLTIS{H,W}
+(define_predicate "xxspltiw_operand"
+  (match_code "const_vector")
+{
+  if (!TARGET_XXSPLTIW)
+    return false;
+
+  if (mode != V8HImode && mode != V4SImode && mode != V4SFmode)
+    return false;
+
+  rtx element = CONST_VECTOR_ELT (op, 0);
+  for (size_t i = 1; i < GET_MODE_NUNITS (mode); i++)
+    if (!rtx_equal_p (element, CONST_VECTOR_ELT (op, i)))
+      return false;
+
+  if (element == CONST0_RTX (GET_MODE_INNER (mode)))
+    return false;
+
+  if (CONST_INT_P (element) && EASY_VECTOR_15 (INTVAL (element)))
+    return false;
+
+  return true;
+})
+
 ;; Return 1 if the operand is a CONST_VECTOR and can be loaded into a
 ;; vector register without using memory.
 (define_predicate "easy_vector_constant"
@@ -653,6 +679,9 @@
       if (zero_constant (op, mode) || all_ones_constant (op, mode))
 	return true;
 
+      if (xxspltiw_operand (op, mode))
+	return true;
+
       if (TARGET_P9_VECTOR
           && xxspltib_constant_p (op, mode, &num_insns, &value))
 	return true;
diff --git a/gcc/config/rs6000/rs6000-cpus.def b/gcc/config/rs6000/rs6000-cpus.def
index 52ce84835f7..e6c5891d334 100644
--- a/gcc/config/rs6000/rs6000-cpus.def
+++ b/gcc/config/rs6000/rs6000-cpus.def
@@ -88,7 +88,8 @@
 				 | OPTION_MASK_P10_FUSION_2LOGICAL	\
 				 | OPTION_MASK_P10_FUSION_LOGADD 	\
 				 | OPTION_MASK_P10_FUSION_ADDLOG	\
-				 | OPTION_MASK_P10_FUSION_2ADD)
+				 | OPTION_MASK_P10_FUSION_2ADD		\
+				 | OPTION_MASK_XXSPLTIW)
 
 /* Flags that need to be turned off if -mno-power9-vector.  */
 #define OTHER_P9_VECTOR_MASKS	(OPTION_MASK_FLOAT128_HW		\
@@ -166,8 +167,8 @@
 				 | OPTION_MASK_RECIP_PRECISION		\
 				 | OPTION_MASK_SOFT_FLOAT		\
 				 | OPTION_MASK_STRICT_ALIGN_OPTIONAL	\
-				 | OPTION_MASK_VSX)
-
+				 | OPTION_MASK_VSX			\
+				 | OPTION_MASK_XXSPLTIW)
 #endif
 
 /* This table occasionally claims that a processor does not support a
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 9bb945de7bb..2ac53d74a77 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -4501,6 +4501,12 @@ rs6000_option_override_internal (bool global_init_p)
   if (!TARGET_PCREL && TARGET_PCREL_OPT)
     rs6000_isa_flags &= ~OPTION_MASK_PCREL_OPT;
 
+  if (TARGET_POWER10 && TARGET_VSX
+      && (rs6000_isa_flags_explicit & OPTION_MASK_XXSPLTIW) == 0)
+    rs6000_isa_flags |= OPTION_MASK_XXSPLTIW;
+  else if (!TARGET_POWER10 || !TARGET_VSX)
+    rs6000_isa_flags &= ~OPTION_MASK_XXSPLTIW;
+
   if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
     rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
 
@@ -6482,9 +6488,11 @@ xxspltib_constant_p (rtx op,
 
   /* See if we could generate vspltisw/vspltish directly instead of xxspltib +
      sign extend.  Special case 0/-1 to allow getting any VSX register instead
-     of an Altivec register.  */
-  if ((mode == V4SImode || mode == V8HImode) && !IN_RANGE (value, -1, 0)
-      && EASY_VECTOR_15 (value))
+     of an Altivec register.  Also if we can generate a XXSPLTIW instruction,
+     don't emit a XXSPLTIB and an extend instruction.  */
+  if ((mode == V4SImode || mode == V8HImode)
+      && !IN_RANGE (value, -1, 0)
+      && (EASY_VECTOR_15 (value) || TARGET_XXSPLTIW))
     return false;
 
   /* Return # of instructions and the constant byte for XXSPLTIB.  */
@@ -6545,6 +6553,9 @@ output_vec_const_move (rtx *operands)
 	    gcc_unreachable ();
 	}
 
+      if (xxspltiw_operand (vec, mode))
+	return "#";
+
       if (TARGET_P9_VECTOR
 	  && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
 	{
@@ -24116,6 +24127,7 @@ static struct rs6000_opt_mask const rs6000_opt_masks[] =
   { "string",			0,				false, true  },
   { "update",			OPTION_MASK_NO_UPDATE,		true , true  },
   { "vsx",			OPTION_MASK_VSX,		false, true  },
+  { "xxspltiw",			OPTION_MASK_XXSPLTIW,		false, true  },
 #ifdef OPTION_MASK_64BIT
 #if TARGET_AIX_OS
   { "aix64",			OPTION_MASK_64BIT,		false, false },
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 7131de609e3..e03f14a0405 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -2612,3 +2612,22 @@ while (0)
        rs6000_asm_output_opcode (STREAM);				\
     }									\
   while (0)
+
+/* Provide macros for sign-extending values.  */
+#if HOST_BITS_PER_CHAR == 8
+#define SIGN_EXTEND_8BIT(X) ((HOST_WIDE_INT)(signed char)(X))
+#else
+#define SIGN_EXTEND_8BIT(X) ((((X) & 0xff) ^ 0x80) - 0x80)
+#endif
+
+#if HOST_BITS_PER_SHORT == 16
+#define SIGN_EXTEND_16BIT(X) ((HOST_WIDE_INT)(short)(X))
+#else
+#define SIGN_EXTEND_16BIT(X) ((((X) & 0xffff) ^ 0x8000) - 0x8000)
+#endif
+
+#if HOST_BITS_PER_INT == 32
+#define SIGN_EXTEND_32BIT(X) ((HOST_WIDE_INT)(int)(X))
+#else
+#define SIGN_EXTEND_32BIT(X) ((((X) & 0xffffffff) ^ 0x80000000) - 0x80000000)
+#endif
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index 0538db387dc..38eaa36d6d8 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -639,3 +639,7 @@ Enable instructions that guard against return-oriented programming attacks.
 mprivileged
 Target Var(rs6000_privileged) Init(0)
 Generate code that will run in privileged state.
+
+mxxspltiw
+Target Undocumented Mask(XXSPLTIW) Var(rs6000_isa_flags)
+Generate (do not generate) XXSPLTIW instructions.
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 15a8c0e22d8..76e10f73dec 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -386,7 +386,6 @@
    UNSPEC_VDIVES
    UNSPEC_VDIVEU
    UNSPEC_XXEVAL
-   UNSPEC_XXSPLTIW
    UNSPEC_XXSPLTID
    UNSPEC_XXSPLTI32DX
    UNSPEC_XXBLEND
@@ -6239,36 +6238,6 @@
   "vmulld %0,%1,%2"
   [(set_attr "type" "veccomplex")])
 
-;; XXSPLTIW built-in function support
-(define_insn "xxspltiw_v4si"
-  [(set (match_operand:V4SI 0 "register_operand" "=wa")
-	(unspec:V4SI [(match_operand:SI 1 "s32bit_cint_operand" "n")]
-		     UNSPEC_XXSPLTIW))]
- "TARGET_POWER10"
- "xxspltiw %x0,%1"
- [(set_attr "type" "vecsimple")
-  (set_attr "prefixed" "yes")])
-
-(define_expand "xxspltiw_v4sf"
-  [(set (match_operand:V4SF 0 "register_operand" "=wa")
-	(unspec:V4SF [(match_operand:SF 1 "const_double_operand" "n")]
-		     UNSPEC_XXSPLTIW))]
- "TARGET_POWER10"
-{
-  long long value = rs6000_const_f32_to_i32 (operands[1]);
-  emit_insn (gen_xxspltiw_v4sf_inst (operands[0], GEN_INT (value)));
-  DONE;
-})
-
-(define_insn "xxspltiw_v4sf_inst"
-  [(set (match_operand:V4SF 0 "register_operand" "=wa")
-	(unspec:V4SF [(match_operand:SI 1 "c32bit_cint_operand" "n")]
-		     UNSPEC_XXSPLTIW))]
- "TARGET_POWER10"
- "xxspltiw %x0,%1"
- [(set_attr "type" "vecsimple")
-  (set_attr "prefixed" "yes")])
-
 ;; XXSPLTIDP built-in function support
 (define_expand "xxspltidp_v2df"
   [(set (match_operand:V2DF 0 "register_operand" )
@@ -6420,3 +6389,118 @@
    [(set_attr "type" "vecsimple")
     (set_attr "prefixed" "yes")])
 
+;; XXSPLTIW built-in function support.  Convert to a vector constant, which
+;; will then be optimized to the XXSPLTIW instruction.
+(define_expand "xxspltiw_v4si"
+  [(use (match_operand:V4SI 0 "register_operand"))
+   (use (match_operand:SI 1 "s32bit_cint_operand"))]
+  "TARGET_POWER10"
+{
+  rtx op1 = operands[1];
+  rtvec rv = gen_rtvec (4, op1, op1, op1, op1);
+  rtx vec_constant = gen_rtx_CONST_VECTOR (V4SImode, rv);
+  emit_move_insn (operands[0], vec_constant);
+})
+
+(define_expand "xxspltiw_v4sf"
+  [(use (match_operand:V4SF 0 "register_operand"))
+   (use (match_operand:SF 1 "const_double_operand"))]
+  "TARGET_POWER10"
+{
+  rtx op1 = operands[1];
+  rtvec rv = gen_rtvec (4, op1, op1, op1, op1);
+  rtx vec_constant = gen_rtx_CONST_VECTOR (V4SFmode, rv);
+  emit_move_insn (operands[0], vec_constant);
+})
+
+;; XXSPLTIW support.  Add support for the XXSPLTIW built-in functions, and to
+;; use XXSPLTIW to load up vector V8HImode, V4SImode, and V4SFmode vector
+;; constants where all elements are the the same.  We special case loading up
+;; integer -16..15 and floating point 0.0f, since we can use the shorter
+;; XXSPLTIB, VSPLTISH, and VSPLTISW instructions.
+
+(define_insn "*xxspltiw_v8hi_dup"
+  [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa,wa,v,wa")
+	(vec_duplicate:V8HI
+	 (match_operand 1 "const_int_operand" "O,wM,wB,n")))]
+ "TARGET_XXSPLTIW"
+{
+  HOST_WIDE_INT sign_value = SIGN_EXTEND_16BIT (INTVAL (operands[1]));
+
+  if (sign_value == 0)
+    return "xxspltib %x0,0";
+
+  if (sign_value == -1)
+    return "xxspltib %x0,255";
+
+  int r = reg_or_subregno (operands[0]);
+  if (ALTIVEC_REGNO_P (r) && EASY_VECTOR_15 (sign_value))
+    {
+      operands[2] = GEN_INT (sign_value);
+      return "vspltish %0,%1";
+    }
+
+  HOST_WIDE_INT uns_value = sign_value & 0xffff;
+  operands[2] = GEN_INT ((uns_value << 16) | uns_value);
+  return "xxspltiw %x0,%2";
+}
+ [(set_attr "type" "vecperm")
+  (set_attr "prefixed" "*,*,*,yes")])
+
+(define_insn "*xxspltiw_v4si_dup"
+  [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,wa,v,wa")
+	(vec_duplicate:V4SI
+	 (match_operand 1 "const_int_operand" "O,wM,wB,n")))]
+ "TARGET_XXSPLTIW"
+{
+  HOST_WIDE_INT sign_value = SIGN_EXTEND_32BIT (INTVAL (operands[1]));
+
+  if (sign_value == 0)
+    return "xxspltib %x0,0";
+
+  if (sign_value == -1)
+    return "xxspltib %x0,255";
+
+  int r = reg_or_subregno (operands[0]);
+  if (ALTIVEC_REGNO_P (r) && EASY_VECTOR_15 (sign_value))
+    {
+      operands[2] = GEN_INT (sign_value);
+      return "vspltisw %0,%2";
+    }
+
+  /* The assembler doesn't like negative values.  */
+  operands[2] = GEN_INT (sign_value & 0xffffffff);
+  return "xxspltiw %x0,%2";
+}
+ [(set_attr "type" "vecperm")
+  (set_attr "prefixed" "*,*,*,yes")])
+
+(define_insn "xxspltiw_v4sf_dup"
+  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa")
+	(vec_duplicate:V4SF
+	 (match_operand:SF 1 "const_double_operand" "O,F")))]
+ "TARGET_XXSPLTIW"
+{
+  if (operands[1] == CONST0_RTX (SFmode))
+    return "xxspltib %x0,0";
+
+  /* The assembler doesn't like negative values.  */
+  long value = rs6000_const_f32_to_i32 (operands[1]);
+  operands[2] = GEN_INT (value & 0xffffffff);
+  return "xxspltiw %x0,%2";
+}
+ [(set_attr "type" "vecsimple")
+  (set_attr "prefixed" "*,yes")])
+
+;; Convert vector constant to vec_duplicate.
+(define_mode_iterator XXSPLTIW [V8HI V4SI V4SF])
+
+(define_split
+  [(set (match_operand:XXSPLTIW 0 "vsx_register_operand")
+	(match_operand:XXSPLTIW 1 "xxspltiw_operand"))]
+  "TARGET_XXSPLTIW && GET_CODE (operands[1]) == CONST_VECTOR"
+  [(set (match_dup 0)
+	(vec_duplicate:<MODE> (match_dup 2)))]
+{
+  operands[2] = CONST_VECTOR_ELT (operands[1], 0);
+})
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v4sf.c b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v4sf.c
new file mode 100644
index 00000000000..06830b02076
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v4sf.c
@@ -0,0 +1,66 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+#include <altivec.h>
+
+/* Test whether XXSPLTIW is generated for V4SF vector constants.  */
+
+vector float
+v4sf_const_1 (void)
+{
+  return (vector float) { 1.0f, 1.0f, 1.0f, 1.0f };	/* XXSPLTIW.  */
+}
+
+vector float
+v4sf_const_nan (void)
+{
+  return (vector float) { __builtin_nanf (""),
+			  __builtin_nanf (""),
+			  __builtin_nanf (""),
+			  __builtin_nanf ("") };	/* XXSPLTIW.  */
+}
+
+vector float
+v4sf_const_inf (void)
+{
+  return (vector float) { __builtin_inff (),
+			  __builtin_inff (),
+			  __builtin_inff (),
+			  __builtin_inff () };		/* XXSPLTIW.  */
+}
+
+vector float
+v4sf_const_m0 (void)
+{
+  return (vector float) { -0.0f, -0.0f, -0.0f, -0.0f };	/* XXSPLTIW.  */
+}
+
+vector float
+v4sf_splats_1 (void)
+{
+  return vec_splats (1.0f);				/* XXSPLTIW.  */
+}
+
+vector float
+v4sf_splats_nan (void)
+{
+  return vec_splats (__builtin_nanf (""));		/* XXSPLTIW.  */
+}
+
+vector float
+v4sf_splats_inf (void)
+{
+  return vec_splats (__builtin_inff ());		/* XXSPLTIW.  */
+}
+
+vector float
+v8hi_splats_m0 (void)
+{
+  return vec_splats (-0.0f);				/* XXSPLTIW.  */
+}
+
+/* { dg-final { scan-assembler-times {\mxxspltiw\M}  8 } } */
+/* { dg-final { scan-assembler-not   {\mxxspltib\M}    } } */
+/* { dg-final { scan-assembler-not   {\mlxvx?\M}       } } */
+/* { dg-final { scan-assembler-not   {\mplxv\M}        } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v4si.c b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v4si.c
new file mode 100644
index 00000000000..02d0c6d66a2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v4si.c
@@ -0,0 +1,51 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+#include <altivec.h>
+
+/* Test whether XXSPLTIW is generated for V4SI vector constants.  We make sure
+   the power9 support (XXSPLTIB/VEXTSB2W) is not done.  */
+
+vector int
+v4si_const_1 (void)
+{
+  return (vector int) { 1, 1, 1, 1 };			/* VSLTPISW.  */
+}
+
+vector int
+v4si_const_126 (void)
+{
+  return (vector int) { 126, 126, 126, 126 };		/* XXSPLTIW.  */
+}
+
+vector int
+v4si_const_1023 (void)
+{
+  return (vector int) { 1023, 1023, 1023, 1023 };	/* XXSPLTIW.  */
+}
+
+vector int
+v4si_splats_1 (void)
+{
+  return vec_splats (1);				/* VSLTPISW.  */
+}
+
+vector int
+v4si_splats_126 (void)
+{
+  return vec_splats (126);				/* XXSPLTIW.  */
+}
+
+vector int
+v8hi_splats_1023 (void)
+{
+  return vec_splats (1023);				/* XXSPLTIW.  */
+}
+
+/* { dg-final { scan-assembler-times {\mxxspltiw\M}  4 } } */
+/* { dg-final { scan-assembler-times {\mvspltisw\M}  2 } } */
+/* { dg-final { scan-assembler-not   {\mxxspltib\M}    } } */
+/* { dg-final { scan-assembler-not   {\mvextsb2w\M}    } } */
+/* { dg-final { scan-assembler-not   {\mlxvx?\M}       } } */
+/* { dg-final { scan-assembler-not   {\mplxv\M}        } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v8hi.c b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v8hi.c
new file mode 100644
index 00000000000..e6d0fab6d67
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v8hi.c
@@ -0,0 +1,53 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+#include <altivec.h>
+
+/* Test whether XXSPLTIW is generated for V8HI vector constants.  We make sure
+   the power9 support (XXSPLTIB/VUPKLSB) is not done.  */
+
+vector short
+v8hi_const_1 (void)
+{
+  return (vector short) { 1, 1, 1, 1, 1, 1, 1, 1 };	/* VSLTPISH.  */
+}
+
+vector short
+v8hi_const_126 (void)
+{
+  return (vector short) { 126, 126, 126, 126,
+			  126, 126, 126, 126 };		/* XXSPLTIW.  */
+}
+
+vector short
+v8hi_const_1023 (void)
+{
+  return (vector short) { 1023, 1023, 1023, 1023,
+			  1023, 1023, 1023, 1023 };	/* XXSPLTIW.  */
+}
+
+vector short
+v8hi_splats_1 (void)
+{
+  return vec_splats ((short)1);				/* VSLTPISH.  */
+}
+
+vector short
+v8hi_splats_126 (void)
+{
+  return vec_splats ((short)126);			/* XXSPLTIW.  */
+}
+
+vector short
+v8hi_splats_1023 (void)
+{
+  return vec_splats ((short)1023);			/* XXSPLTIW.  */
+}
+
+/* { dg-final { scan-assembler-times {\mxxspltiw\M}  4 } } */
+/* { dg-final { scan-assembler-times {\mvspltish\M}  2 } } */
+/* { dg-final { scan-assembler-not   {\mxxspltib\M}    } } */
+/* { dg-final { scan-assembler-not   {\mvupklsb\M}     } } */
+/* { dg-final { scan-assembler-not   {\mlxvx?\M}       } } */
+/* { dg-final { scan-assembler-not   {\mplxv\M}        } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splati-runnable.c b/gcc/testsuite/gcc.target/powerpc/vec-splati-runnable.c
index a135279b1d7..f49ef91422e 100644
--- a/gcc/testsuite/gcc.target/powerpc/vec-splati-runnable.c
+++ b/gcc/testsuite/gcc.target/powerpc/vec-splati-runnable.c
@@ -149,8 +149,6 @@ main (int argc, char *argv [])
   return 0;
 }
 
-/* { dg-final { scan-assembler-times {\mxxspltiw\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxxspltiw\M} 1 } } */
 /* { dg-final { scan-assembler-times {\mxxspltidp\M} 2 } } */
 /* { dg-final { scan-assembler-times {\mxxsplti32dx\M} 3 } } */
-
-


^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2021-06-09 15:54 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-06-09 15:54 [gcc(refs/users/meissner/heads/work055)] Generate XXSPLTIW on power10 Michael Meissner
  -- strict thread matches above, loose matches on Subject: below --
2021-06-09 14:56 Michael Meissner

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).