public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/users/meissner/heads/work069)] Generate XXSPLTIW on power10.
@ 2021-09-16 20:29 Michael Meissner
  0 siblings, 0 replies; 2+ messages in thread
From: Michael Meissner @ 2021-09-16 20:29 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:8b4857af0d3ce17cc72071cd2e65fdc1e2f47ae8

commit 8b4857af0d3ce17cc72071cd2e65fdc1e2f47ae8
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Thu Sep 16 16:28:52 2021 -0400

    Generate XXSPLTIW on power10.
    
    This patch adds support to automatically generate the ISA 3.1 XXSPLTIW
    instruction for V8HImode, V4SImode, and V4SFmode vectors.  It does this by
    adding support for vector constants that can be used, and adding a
    VEC_DUPLICATE pattern to generate the actual XXSPLTIW instruction.
    
    I rewrote the XXSPLTW built-in functions to use VEC_DUPLICATE instead of
    UNSPEC.
    
    This patch also updates the insn counts in the vec-splati-runnable.c test to
    work with the new option to use XXSPLTIW to load up some vector constants.
    
    I added 4 new tests to test loading up V16QI, V8HI, V4SI, and V4SF vector
    constants.
    
    At the present time, XXSPLTIW geneation is disabled by default.
    
    2021-09-16  Michael Meissner  <meissner@linux.ibm.com>
    
    gcc/
    
            * config/rs6000/constraints.md (eW): New constraint.
            * config/rs6000/predicates.md (easy_vector_constant_splat_word):
            New predicate.
            (easy_vector_constant): If we can use XXSPLTIW, the vector
            constant is easy.
            * config/rs6000/rs6000-protos.h (xxspltiw_constant_immediate): New
            declaration.
            * config/rs6000/rs6000.c (xxspltiw_constant_immediate): New
            function.
            (output_vec_const_move): Add support for loading up vector
            constants with XXSPLTIW.
            (prefixed_xxsplti_p): Recognize xxspltiw instructions as
            prefixed.
            * config/rs6000/rs6000.opt (-mxxspltiw): New debug switch.
            * config/rs6000/vsx.md (vsx_mov<mode>_64bit): Add support for
            constants loaded with XXSPLTIW.
            (vsx_mov<mode>_32bit): Likewise.
            (vsx_splat_v8hi_xxspltiw): New insn.
            (vsx_splat_v4si_xxspltiw): New insn.
            (vsx_splat_v4sf_xxspltiw): New insn.
    
    gcc/testsuite/
    
            * gcc.target/powerpc/vec-splat-constant-v16qi.c: New test.
            * gcc.target/powerpc/vec-splat-constant-v4sf.c: New test.
            * gcc.target/powerpc/vec-splat-constant-v4si.c: New test.
            * gcc.target/powerpc/vec-splat-constant-v8hi.c: New test.

Diff:
---
 gcc/config/rs6000/constraints.md                   |   5 +
 gcc/config/rs6000/predicates.md                    | 103 +++++++++++++++++++++
 gcc/config/rs6000/rs6000-protos.h                  |   1 +
 gcc/config/rs6000/rs6000.c                         |  98 +++++++++++++++++++-
 gcc/config/rs6000/rs6000.opt                       |   5 +
 .../gcc.target/powerpc/vec-splat-constant-v16qi.c  |  27 ++++++
 .../gcc.target/powerpc/vec-splat-constant-v4sf.c   |  67 ++++++++++++++
 .../gcc.target/powerpc/vec-splat-constant-v4si.c   |  51 ++++++++++
 .../gcc.target/powerpc/vec-splat-constant-v8hi.c   |  62 +++++++++++++
 9 files changed, 414 insertions(+), 5 deletions(-)

diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index b626510f26e..56b6d061011 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -223,6 +223,11 @@
   "An IEEE 128-bit constant that can be loaded with the LXVKQ instruction."
   (match_operand 0 "lxvkq_operand"))
 
+;; Vector constant that can be loaded with XXSPLTIW
+(define_constraint "eW"
+  "A vector constant that can be loaded with the XXSPLTIW instruction."
+  (match_operand 0 "easy_vector_constant_splat_word"))
+
 ;; Floating-point constraints.  These two are defined so that insn
 ;; length attributes can be calculated exactly.
 
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 2b5273ab356..e6653cd83ae 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -723,6 +723,106 @@
   return true;
 })
 
+;; Return 1 if the operand is a constant that can be loaded with the XXSPLTIW
+;; instruction that loads up a 32-bit immediate and splats it into the vector.
+
+(define_predicate "easy_vector_constant_splat_word"
+  (match_code "const_vector")
+{
+  HOST_WIDE_INT value;
+
+  if (!TARGET_PREFIXED || !TARGET_VSX || !TARGET_XXSPLTIW)
+    return false;
+
+  if (!CONST_VECTOR_P (op))
+    return true;
+
+  rtx element0 = CONST_VECTOR_ELT (op, 0);
+
+  switch (mode)
+    {
+      /* V4SImode constant vectors that have the same element are can be used
+	 with XXSPLTIW.  */
+    case V4SImode:
+      if (!CONST_VECTOR_DUPLICATE_P (op))
+	return false;
+
+      /* Don't return true if we can use the shorter vspltisw instruction.  */
+      value = INTVAL (element0);
+      return (!EASY_VECTOR_15 (value));
+
+      /* V4SFmode constant vectors that have the same element are
+	 can be used with XXSPLTIW.  */
+    case V4SFmode:
+      if (!CONST_VECTOR_DUPLICATE_P (op))
+	return false;
+
+      /* Don't return true for 0.0f, since that can be created with
+	 xxspltib or xxlxor.  */
+      return (element0 != CONST0_RTX (SFmode));
+
+      /* V8Hmode constant vectors that have the same element are can be used
+	 with XXSPLTIW.  */
+    case V8HImode:
+      if (CONST_VECTOR_DUPLICATE_P (op))
+	{
+	  /* Don't return true if we can use the shorter vspltish instruction.  */
+	  value = INTVAL (element0);
+	  if (EASY_VECTOR_15 (value))
+	    return false;
+
+	  return true;
+	}
+
+      else
+	{
+	  /* Check if all even elements are the same and all odd elements are
+	     the same.  */
+	  rtx element1 = CONST_VECTOR_ELT (op, 1);
+
+	  if (!CONST_INT_P (element1))
+	    return false;
+
+	  for (size_t i = 2; i < GET_MODE_NUNITS (V8HImode); i += 2)
+	    if (!rtx_equal_p (element0, CONST_VECTOR_ELT (op, i))
+		|| !rtx_equal_p (element1, CONST_VECTOR_ELT (op, i + 1)))
+	      return false;
+
+	  return true;
+	}
+
+      /* V16QI constant vectors that have the first four elements identical to
+	 the next set of 4 elements, and so forth can generate XXSPLTIW.  */
+    case V16QImode:
+	{
+	  /* If we can use XXSPLTIB, don't generate XXSPLTIW.  */
+	  if (xxspltib_constant_nosplit (op, mode))
+	    return false;
+
+	  rtx element1 = CONST_VECTOR_ELT (op, 1);
+	  rtx element2 = CONST_VECTOR_ELT (op, 2);
+	  rtx element3 = CONST_VECTOR_ELT (op, 3);
+
+	  if (!CONST_INT_P (element0) || !CONST_INT_P (element1)
+	      || !CONST_INT_P (element2) || !CONST_INT_P (element3))
+	    return false;
+
+	  for (size_t i = 4; i < GET_MODE_NUNITS (V16QImode); i += 4)
+	    if (!rtx_equal_p (element0, CONST_VECTOR_ELT (op, i))
+		|| !rtx_equal_p (element1, CONST_VECTOR_ELT (op, i + 1))
+		|| !rtx_equal_p (element2, CONST_VECTOR_ELT (op, i + 2))
+		|| !rtx_equal_p (element3, CONST_VECTOR_ELT (op, i + 3)))
+	      return false;
+
+	  return true;
+	}
+
+    default:
+      break;
+    }
+
+  return false;
+})
 
 ;; Return 1 if the operand is a constant that can loaded with a XXSPLTIB
 ;; instruction and then a VUPKHSB, VECSB2W or VECSB2D instruction.
@@ -829,6 +929,9 @@
       if (easy_fp_constant_sfmode (op, mode))
 	return true;
 
+      if (easy_vector_constant_splat_word (op, mode))
+	return true;
+
       if (TARGET_P9_VECTOR
           && xxspltib_constant_p (op, mode, &num_insns, &value))
 	return true;
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
index 1c7f532a7ee..87e76d2b487 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -33,6 +33,7 @@ extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, int, int, int,
 extern int easy_altivec_constant (rtx, machine_mode);
 extern bool xxspltib_constant_p (rtx, machine_mode, int *, int *);
 extern HOST_WIDE_INT xxspltidp_constant_immediate (rtx, machine_mode);
+extern HOST_WIDE_INT xxspltiw_constant_immediate (rtx, machine_mode);
 extern int lxvkq_constant_p (rtx, machine_mode);
 extern int vspltis_shifted (rtx);
 extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int);
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 2c850aec9bb..5c92060f8e9 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -6751,6 +6751,82 @@ xxspltidp_constant_immediate (rtx op, machine_mode mode)
   return ret;
 }
 
+/* Return the immediate value used in the XXSPLTIW instruction.  */
+HOST_WIDE_INT
+xxspltiw_constant_immediate (rtx op, machine_mode mode)
+{
+  HOST_WIDE_INT ret;
+
+  gcc_assert (easy_vector_constant_splat_word (op, mode));
+
+  switch (mode)
+    {
+    default:
+      gcc_unreachable ();
+
+      /* V4SImode constant vectors that have the same element are can be used
+	 with XXSPLTIW.  */
+    case E_V4SImode:
+      gcc_assert (CONST_VECTOR_DUPLICATE_P (op));
+      ret = INTVAL (CONST_VECTOR_ELT (op, 0));
+      break;
+
+      /* V4SFmode constant vectors that have the same element are
+	 can be used with XXSPLTIW.  */
+    case E_V4SFmode:
+      gcc_assert (CONST_VECTOR_DUPLICATE_P (op));
+      ret = rs6000_const_f32_to_i32 (CONST_VECTOR_ELT (op, 0));
+      break;
+
+      /* V8HImode constant vectors with all of the even elements the same and
+	 all of the odd elements the same can used XXSPLTIW.  */
+    case E_V8HImode:
+      {
+	if (!rtx_equal_p (CONST_VECTOR_ELT (op, 0), CONST_VECTOR_ELT (op, 2))
+	    || !rtx_equal_p (CONST_VECTOR_ELT (op, 1), CONST_VECTOR_ELT (op, 3)))
+	  gcc_unreachable ();
+
+	HOST_WIDE_INT value0 = INTVAL (CONST_VECTOR_ELT (op, 0)) & 0xffff;
+	HOST_WIDE_INT value1 = INTVAL (CONST_VECTOR_ELT (op, 1)) & 0xffff;
+
+	if (!BYTES_BIG_ENDIAN)
+	  std::swap (value0, value1);
+
+	ret = (value0 << 16) | value1;
+      }
+      break;
+
+      /* V16QI constant vectors that have the first four elements identical to
+	 the next set of 4 elements, and so forth can generate XXSPLTIW.  */
+    case E_V16QImode:
+      {
+	rtx op0 = CONST_VECTOR_ELT (op, 0);
+	rtx op1 = CONST_VECTOR_ELT (op, 1);
+	rtx op2 = CONST_VECTOR_ELT (op, 2);
+	rtx op3 = CONST_VECTOR_ELT (op, 3);
+
+	for (size_t i = 4; i < GET_MODE_NUNITS (V16QImode); i += 4)
+	  if (!rtx_equal_p (op0, CONST_VECTOR_ELT (op, i))
+	      || !rtx_equal_p (op1, CONST_VECTOR_ELT (op, i + 1))
+	      || !rtx_equal_p (op2, CONST_VECTOR_ELT (op, i + 2))
+	      || !rtx_equal_p (op3, CONST_VECTOR_ELT (op, i + 3)))
+	    gcc_unreachable ();
+
+	HOST_WIDE_INT value0 = INTVAL (op0) & 0xff;
+	HOST_WIDE_INT value1 = INTVAL (op1) & 0xff;
+	HOST_WIDE_INT value2 = INTVAL (op2) & 0xff;
+	HOST_WIDE_INT value3 = INTVAL (op3) & 0xff;
+
+	ret = ((BYTES_BIG_ENDIAN)
+	       ? ((value0 << 24) | (value1 << 16) | (value2 << 8) | value3)
+	       : ((value3 << 24) | (value2 << 16) | (value1 << 8) | value0));
+      }
+      break;
+    }
+
+  return ret;
+}
+
 /* Return the constant that will go in the LXVKQ instruction.  */
 
 /* LXVKQ immediates.  */
@@ -6877,6 +6953,12 @@ output_vec_const_move (rtx *operands)
 	  return "xxspltidp %x0,%2";
 	}
 
+      if (easy_vector_constant_splat_word (vec, mode))
+	{
+	  operands[2] = GEN_INT (easy_vector_constant_splat_word (vec, mode));
+	  return "xxspltiw %x0,%2";
+	}
+
       if (lxvkq_operand (vec, mode))
 	{
 	  operands[2] = GEN_INT (lxvkq_constant_immediate (vec, mode));
@@ -26642,13 +26724,19 @@ prefixed_xxsplti_p (rtx_insn *insn)
 
   switch (mode)
     {
-    case DImode:
-    case DFmode:
-    case SFmode:
-    case V2DImode:
-    case V2DFmode:
+    case E_DImode:
+    case E_DFmode:
+    case E_SFmode:
+    case E_V2DImode:
+    case E_V2DFmode:
       return easy_fp_constant_sfmode (src, mode);
 
+    case E_V16QImode:
+    case E_V8HImode:
+    case E_V4SImode:
+    case E_V4SFmode:
+      return easy_vector_constant_splat_word (src, mode);
+
     default:
       break;
     }
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index dadcefdb589..4fae4dc010a 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -644,6 +644,11 @@ mxxspltidp
 Target Undocumented Var(TARGET_XXSPLTIDP) Init(1) Save
 Generate (do not generate) XXSPLTIDP instructions.
 
+;; Do not enable this by default at the current time.
+mxxspltiw
+Target Undocumented Var(TARGET_XXSPLTIW) Init(0) Save
+Generate (do not generate) XXSPLTIW instructions.
+
 mlxvkq
 Target Undocumented Var(TARGET_LXVKQ) Init(1) Save
 Generate (do not generate) LXVKQ instructions.
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v16qi.c b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v16qi.c
new file mode 100644
index 00000000000..2707d86e6fd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v16qi.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2 -mxxspltiw" } */
+
+#include <altivec.h>
+
+/* Test whether XXSPLTIW is generated for V16HI vector constants where the
+   first 4 elements are the same as the next 4 elements, etc.  */
+
+vector unsigned char
+v16qi_const_1 (void)
+{
+  return (vector unsigned char) { 1, 1, 1, 1, 1, 1, 1, 1,
+				  1, 1, 1, 1, 1, 1, 1, 1, }; /* VSLTPISB.  */
+}
+
+vector unsigned char
+v16qi_const_2 (void)
+{
+  return (vector unsigned char) { 1, 2, 3, 4, 1, 2, 3, 4,
+				  1, 2, 3, 4, 1, 2, 3, 4, }; /* XXSPLTIW.  */
+}
+
+/* { dg-final { scan-assembler-times {\mxxspltiw\M}              1 } } */
+/* { dg-final { scan-assembler-times {\mvspltisb\M|\mxxspltib\M} 1 } } */
+/* { dg-final { scan-assembler-not   {\mlxvx?\M}                   } } */
+/* { dg-final { scan-assembler-not   {\mplxv\M}                    } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v4sf.c b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v4sf.c
new file mode 100644
index 00000000000..05d4ee3f5cb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v4sf.c
@@ -0,0 +1,67 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2 -mxxspltiw" } */
+
+#include <altivec.h>
+
+/* Test whether XXSPLTIW is generated for V4SF vector constants.  */
+
+vector float
+v4sf_const_1 (void)
+{
+  return (vector float) { 1.0f, 1.0f, 1.0f, 1.0f };	/* XXSPLTIW.  */
+}
+
+vector float
+v4sf_const_nan (void)
+{
+  return (vector float) { __builtin_nanf (""),
+			  __builtin_nanf (""),
+			  __builtin_nanf (""),
+			  __builtin_nanf ("") };	/* XXSPLTIW.  */
+}
+
+vector float
+v4sf_const_inf (void)
+{
+  return (vector float) { __builtin_inff (),
+			  __builtin_inff (),
+			  __builtin_inff (),
+			  __builtin_inff () };		/* XXSPLTIW.  */
+}
+
+vector float
+v4sf_const_m0 (void)
+{
+  return (vector float) { -0.0f, -0.0f, -0.0f, -0.0f };	/* XXSPLTIB/VSLW.  */
+}
+
+vector float
+v4sf_splats_1 (void)
+{
+  return vec_splats (1.0f);				/* XXSPLTIW.  */
+}
+
+vector float
+v4sf_splats_nan (void)
+{
+  return vec_splats (__builtin_nanf (""));		/* XXSPLTIW.  */
+}
+
+vector float
+v4sf_splats_inf (void)
+{
+  return vec_splats (__builtin_inff ());		/* XXSPLTIW.  */
+}
+
+vector float
+v8hi_splats_m0 (void)
+{
+  return vec_splats (-0.0f);				/* XXSPLTIB/VSLW.  */
+}
+
+/* { dg-final { scan-assembler-times {\mxxspltiw\M} 6 } } */
+/* { dg-final { scan-assembler-times {\mxxspltib\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mvslw\M}     2 } } */
+/* { dg-final { scan-assembler-not   {\mlxvx?\M}      } } */
+/* { dg-final { scan-assembler-not   {\mplxv\M}       } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v4si.c b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v4si.c
new file mode 100644
index 00000000000..da909e948b2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v4si.c
@@ -0,0 +1,51 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2 -mxxspltiw" } */
+
+#include <altivec.h>
+
+/* Test whether XXSPLTIW is generated for V4SI vector constants.  We make sure
+   the power9 support (XXSPLTIB/VEXTSB2W) is not done.  */
+
+vector int
+v4si_const_1 (void)
+{
+  return (vector int) { 1, 1, 1, 1 };			/* VSLTPISW.  */
+}
+
+vector int
+v4si_const_126 (void)
+{
+  return (vector int) { 126, 126, 126, 126 };		/* XXSPLTIW.  */
+}
+
+vector int
+v4si_const_1023 (void)
+{
+  return (vector int) { 1023, 1023, 1023, 1023 };	/* XXSPLTIW.  */
+}
+
+vector int
+v4si_splats_1 (void)
+{
+  return vec_splats (1);				/* VSLTPISW.  */
+}
+
+vector int
+v4si_splats_126 (void)
+{
+  return vec_splats (126);				/* XXSPLTIW.  */
+}
+
+vector int
+v8hi_splats_1023 (void)
+{
+  return vec_splats (1023);				/* XXSPLTIW.  */
+}
+
+/* { dg-final { scan-assembler-times {\mxxspltiw\M}  4 } } */
+/* { dg-final { scan-assembler-times {\mvspltisw\M}  2 } } */
+/* { dg-final { scan-assembler-not   {\mxxspltib\M}    } } */
+/* { dg-final { scan-assembler-not   {\mvextsb2w\M}    } } */
+/* { dg-final { scan-assembler-not   {\mlxvx?\M}       } } */
+/* { dg-final { scan-assembler-not   {\mplxv\M}        } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v8hi.c b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v8hi.c
new file mode 100644
index 00000000000..290e05d4a64
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v8hi.c
@@ -0,0 +1,62 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2 -mxxspltiw" } */
+
+#include <altivec.h>
+
+/* Test whether XXSPLTIW is generated for V8HI vector constants.  We make sure
+   the power9 support (XXSPLTIB/VUPKLSB) is not done.  */
+
+vector short
+v8hi_const_1 (void)
+{
+  return (vector short) { 1, 1, 1, 1, 1, 1, 1, 1 };	/* VSLTPISH.  */
+}
+
+vector short
+v8hi_const_126 (void)
+{
+  return (vector short) { 126, 126, 126, 126,
+			  126, 126, 126, 126 };		/* XXSPLTIW.  */
+}
+
+vector short
+v8hi_const_1023 (void)
+{
+  return (vector short) { 1023, 1023, 1023, 1023,
+			  1023, 1023, 1023, 1023 };	/* XXSPLTIW.  */
+}
+
+vector short
+v8hi_splats_1 (void)
+{
+  return vec_splats ((short)1);				/* VSLTPISH.  */
+}
+
+vector short
+v8hi_splats_126 (void)
+{
+  return vec_splats ((short)126);			/* XXSPLTIW.  */
+}
+
+vector short
+v8hi_splats_1023 (void)
+{
+  return vec_splats ((short)1023);			/* XXSPLTIW.  */
+}
+
+/* Test that we can optimiza V8HI where all of the even elements are the same
+   and all of the odd elements are the same.  */
+vector short
+v8hi_const_1023_1000 (void)
+{
+  return (vector short) { 1023, 1000, 1023, 1000,
+			  1023, 1000, 1023, 1000 };	/* XXSPLTIW.  */
+}
+
+/* { dg-final { scan-assembler-times {\mxxspltiw\M}  5 } } */
+/* { dg-final { scan-assembler-times {\mvspltish\M}  2 } } */
+/* { dg-final { scan-assembler-not   {\mxxspltib\M}    } } */
+/* { dg-final { scan-assembler-not   {\mvupklsb\M}     } } */
+/* { dg-final { scan-assembler-not   {\mlxvx?\M}       } } */
+/* { dg-final { scan-assembler-not   {\mplxv\M}        } } */


^ permalink raw reply	[flat|nested] 2+ messages in thread

* [gcc(refs/users/meissner/heads/work069)] Generate XXSPLTIW on power10.
@ 2021-09-17  5:27 Michael Meissner
  0 siblings, 0 replies; 2+ messages in thread
From: Michael Meissner @ 2021-09-17  5:27 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:69b6ba65ddd451c45c41cef3ab949f6f24b03c4c

commit 69b6ba65ddd451c45c41cef3ab949f6f24b03c4c
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Fri Sep 17 01:27:42 2021 -0400

    Generate XXSPLTIW on power10.
    
    This patch adds support to automatically generate the ISA 3.1 XXSPLTIW
    instruction for V8HImode, V4SImode, and V4SFmode vectors.  It does this by
    adding support for vector constants that can be used, and adding a
    VEC_DUPLICATE pattern to generate the actual XXSPLTIW instruction.
    
    I rewrote the XXSPLTW built-in functions to use VEC_DUPLICATE instead of
    UNSPEC.
    
    This patch also updates the insn counts in the vec-splati-runnable.c test to
    work with the new option to use XXSPLTIW to load up some vector constants.
    
    I added 4 new tests to test loading up V16QI, V8HI, V4SI, and V4SF vector
    constants.
    
    At the present time, XXSPLTIW geneation is disabled by default.
    
    2021-09-17  Michael Meissner  <meissner@linux.ibm.com>
    
    gcc/
    
            * config/rs6000/constraints.md (eW): New constraint.
            * config/rs6000/predicates.md (easy_vector_constant_splat_word):
            New predicate.
            (easy_vector_constant): If we can use XXSPLTIW, the vector
            constant is easy.
            * config/rs6000/rs6000-protos.h (xxspltiw_constant_immediate): New
            declaration.
            * config/rs6000/rs6000.c (xxspltiw_constant_immediate): New
            function.
            (output_vec_const_move): Add support for loading up vector
            constants with XXSPLTIW.
            (prefixed_xxsplti_p): Recognize xxspltiw instructions as
            prefixed.
            * config/rs6000/rs6000.opt (-mxxspltiw): New debug switch.
            * config/rs6000/vsx.md (vsx_mov<mode>_64bit): Add support for
            constants loaded with XXSPLTIW.
            (vsx_mov<mode>_32bit): Likewise.
    
    gcc/testsuite/
    
            * gcc.target/powerpc/vec-splat-constant-v16qi.c: New test.
            * gcc.target/powerpc/vec-splat-constant-v4sf.c: New test.
            * gcc.target/powerpc/vec-splat-constant-v4si.c: New test.
            * gcc.target/powerpc/vec-splat-constant-v8hi.c: New test.

Diff:
---
 gcc/config/rs6000/constraints.md                   |   5 +
 gcc/config/rs6000/predicates.md                    | 104 +++++++++++++++++++++
 gcc/config/rs6000/rs6000-protos.h                  |   1 +
 gcc/config/rs6000/rs6000.c                         |  88 +++++++++++++++++
 gcc/config/rs6000/rs6000.opt                       |   5 +
 gcc/config/rs6000/vsx.md                           |  28 +++---
 .../gcc.target/powerpc/vec-splat-constant-v16qi.c  |  27 ++++++
 .../gcc.target/powerpc/vec-splat-constant-v4sf.c   |  67 +++++++++++++
 .../gcc.target/powerpc/vec-splat-constant-v4si.c   |  51 ++++++++++
 .../gcc.target/powerpc/vec-splat-constant-v8hi.c   |  62 ++++++++++++
 10 files changed, 424 insertions(+), 14 deletions(-)

diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index 1700657abe9..46daeb0861c 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -228,6 +228,11 @@
   "An IEEE 128-bit constant that can be loaded with the LXVKQ instruction."
   (match_operand 0 "easy_fp_constant_ieee128"))
 
+;; Vector constant that can be loaded with XXSPLTIW
+(define_constraint "eW"
+  "A vector constant that can be loaded with the XXSPLTIW instruction."
+  (match_operand 0 "easy_vector_constant_splat_word"))
+
 ;; Floating-point constraints.  These two are defined so that insn
 ;; length attributes can be calculated exactly.
 
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 30e89ec79f0..78e64a8a1d4 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -751,6 +751,107 @@
   return easy_fp_constant_64bit_scalar (op, GET_MODE_INNER (mode));
 })
 
+;; Return 1 if the operand is a constant that can be loaded with the XXSPLTIW
+;; instruction that loads up a 32-bit immediate and splats it into the vector.
+
+(define_predicate "easy_vector_constant_splat_word"
+  (match_code "const_vector")
+{
+  HOST_WIDE_INT value;
+
+  if (!TARGET_PREFIXED || !TARGET_VSX || !TARGET_XXSPLTIW)
+    return false;
+
+  if (!CONST_VECTOR_P (op))
+    return true;
+
+  rtx element0 = CONST_VECTOR_ELT (op, 0);
+
+  switch (mode)
+    {
+      /* V4SImode constant vectors that have the same element are can be used
+	 with XXSPLTIW.  */
+    case V4SImode:
+      if (!CONST_VECTOR_DUPLICATE_P (op))
+	return false;
+
+      /* Don't return true if we can use the shorter vspltisw instruction.  */
+      value = INTVAL (element0);
+      return (!EASY_VECTOR_15 (value));
+
+      /* V4SFmode constant vectors that have the same element are
+	 can be used with XXSPLTIW.  */
+    case V4SFmode:
+      if (!CONST_VECTOR_DUPLICATE_P (op))
+	return false;
+
+      /* Don't return true for 0.0f, since that can be created with
+	 xxspltib or xxlxor.  */
+      return (element0 != CONST0_RTX (SFmode));
+
+      /* V8Hmode constant vectors that have the same element are can be used
+	 with XXSPLTIW.  */
+    case V8HImode:
+      if (CONST_VECTOR_DUPLICATE_P (op))
+	{
+	  /* Don't return true if we can use the shorter vspltish instruction.  */
+	  value = INTVAL (element0);
+	  if (EASY_VECTOR_15 (value))
+	    return false;
+
+	  return true;
+	}
+
+      else
+	{
+	  /* Check if all even elements are the same and all odd elements are
+	     the same.  */
+	  rtx element1 = CONST_VECTOR_ELT (op, 1);
+
+	  if (!CONST_INT_P (element1))
+	    return false;
+
+	  for (size_t i = 2; i < GET_MODE_NUNITS (V8HImode); i += 2)
+	    if (!rtx_equal_p (element0, CONST_VECTOR_ELT (op, i))
+		|| !rtx_equal_p (element1, CONST_VECTOR_ELT (op, i + 1)))
+	      return false;
+
+	  return true;
+	}
+
+      /* V16QI constant vectors that have the first four elements identical to
+	 the next set of 4 elements, and so forth can generate XXSPLTIW.  */
+    case V16QImode:
+	{
+	  /* If we can use XXSPLTIB, don't generate XXSPLTIW.  */
+	  if (xxspltib_constant_nosplit (op, mode))
+	    return false;
+
+	  rtx element1 = CONST_VECTOR_ELT (op, 1);
+	  rtx element2 = CONST_VECTOR_ELT (op, 2);
+	  rtx element3 = CONST_VECTOR_ELT (op, 3);
+
+	  if (!CONST_INT_P (element0) || !CONST_INT_P (element1)
+	      || !CONST_INT_P (element2) || !CONST_INT_P (element3))
+	    return false;
+
+	  for (size_t i = 4; i < GET_MODE_NUNITS (V16QImode); i += 4)
+	    if (!rtx_equal_p (element0, CONST_VECTOR_ELT (op, i))
+		|| !rtx_equal_p (element1, CONST_VECTOR_ELT (op, i + 1))
+		|| !rtx_equal_p (element2, CONST_VECTOR_ELT (op, i + 2))
+		|| !rtx_equal_p (element3, CONST_VECTOR_ELT (op, i + 3)))
+	      return false;
+
+	  return true;
+	}
+
+    default:
+      break;
+    }
+
+  return false;
+})
+
 ;; Return 1 if the operand is a constant that can loaded with a XXSPLTIB
 ;; instruction and then a VUPKHSB, VECSB2W or VECSB2D instruction.
 
@@ -871,6 +972,9 @@
       if (easy_vector_constant_64bit_element (op, mode))
 	return true;
 
+      if (easy_vector_constant_splat_word (op, mode))
+	return true;
+
       if (TARGET_P9_VECTOR
           && xxspltib_constant_p (op, mode, &num_insns, &value))
 	return true;
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
index a21fa08b367..540c401e7ad 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -33,6 +33,7 @@ extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, int, int, int,
 extern int easy_altivec_constant (rtx, machine_mode);
 extern bool xxspltib_constant_p (rtx, machine_mode, int *, int *);
 extern long xxspltidp_constant_immediate (rtx, machine_mode);
+extern long xxspltiw_constant_immediate (rtx, machine_mode);
 extern int lxvkq_constant_immediate (rtx, machine_mode);
 extern int vspltis_shifted (rtx);
 extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int);
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index a663a82f6b9..9464052a03d 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -6751,6 +6751,82 @@ xxspltidp_constant_immediate (rtx op, machine_mode mode)
   return ret;
 }
 
+/* Return the immediate value used in the XXSPLTIW instruction.  */
+long
+xxspltiw_constant_immediate (rtx op, machine_mode mode)
+{
+  long ret;
+
+  gcc_assert (easy_vector_constant_splat_word (op, mode));
+
+  switch (mode)
+    {
+    default:
+      gcc_unreachable ();
+
+      /* V4SImode constant vectors that have the same element are can be used
+	 with XXSPLTIW.  */
+    case E_V4SImode:
+      gcc_assert (CONST_VECTOR_DUPLICATE_P (op));
+      ret = INTVAL (CONST_VECTOR_ELT (op, 0));
+      break;
+
+      /* V4SFmode constant vectors that have the same element are
+	 can be used with XXSPLTIW.  */
+    case E_V4SFmode:
+      gcc_assert (CONST_VECTOR_DUPLICATE_P (op));
+      ret = rs6000_const_f32_to_i32 (CONST_VECTOR_ELT (op, 0));
+      break;
+
+      /* V8HImode constant vectors with all of the even elements the same and
+	 all of the odd elements the same can used XXSPLTIW.  */
+    case E_V8HImode:
+      {
+	if (!rtx_equal_p (CONST_VECTOR_ELT (op, 0), CONST_VECTOR_ELT (op, 2))
+	    || !rtx_equal_p (CONST_VECTOR_ELT (op, 1), CONST_VECTOR_ELT (op, 3)))
+	  gcc_unreachable ();
+
+	long value0 = INTVAL (CONST_VECTOR_ELT (op, 0)) & 0xffff;
+	long value1 = INTVAL (CONST_VECTOR_ELT (op, 1)) & 0xffff;
+
+	if (!BYTES_BIG_ENDIAN)
+	  std::swap (value0, value1);
+
+	ret = (value0 << 16) | value1;
+      }
+      break;
+
+      /* V16QI constant vectors that have the first four elements identical to
+	 the next set of 4 elements, and so forth can generate XXSPLTIW.  */
+    case E_V16QImode:
+      {
+	rtx op0 = CONST_VECTOR_ELT (op, 0);
+	rtx op1 = CONST_VECTOR_ELT (op, 1);
+	rtx op2 = CONST_VECTOR_ELT (op, 2);
+	rtx op3 = CONST_VECTOR_ELT (op, 3);
+
+	for (size_t i = 4; i < GET_MODE_NUNITS (V16QImode); i += 4)
+	  if (!rtx_equal_p (op0, CONST_VECTOR_ELT (op, i))
+	      || !rtx_equal_p (op1, CONST_VECTOR_ELT (op, i + 1))
+	      || !rtx_equal_p (op2, CONST_VECTOR_ELT (op, i + 2))
+	      || !rtx_equal_p (op3, CONST_VECTOR_ELT (op, i + 3)))
+	    gcc_unreachable ();
+
+	long value0 = INTVAL (op0) & 0xff;
+	long value1 = INTVAL (op1) & 0xff;
+	long value2 = INTVAL (op2) & 0xff;
+	long value3 = INTVAL (op3) & 0xff;
+
+	ret = ((BYTES_BIG_ENDIAN)
+	       ? ((value0 << 24) | (value1 << 16) | (value2 << 8) | value3)
+	       : ((value3 << 24) | (value2 << 16) | (value1 << 8) | value0));
+      }
+      break;
+    }
+
+  return ret;
+}
+
 /* Return the constant that will go in the LXVKQ instruction.  */
 
 /* LXVKQ immediates.  */
@@ -6892,6 +6968,12 @@ output_vec_const_move (rtx *operands)
 	  return "xxspltidp %x0,%2";
 	}
 
+      if (easy_vector_constant_splat_word (vec, mode))
+	{
+	  operands[2] = GEN_INT (xxspltiw_constant_immediate (vec, mode));
+	  return "xxspltiw %x0,%2";
+	}
+
       if (easy_fp_constant_ieee128 (vec, mode))
 	{
 	  operands[2] = GEN_INT (lxvkq_constant_immediate (vec, mode));
@@ -26666,6 +26748,12 @@ prefixed_xxsplti_p (rtx_insn *insn)
     case E_V2DFmode:
       return easy_vector_constant_64bit_element (src, mode);
 
+    case E_V16QImode:
+    case E_V8HImode:
+    case E_V4SImode:
+    case E_V4SFmode:
+      return easy_vector_constant_splat_word (src, mode);
+
     default:
       break;
     }
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index 970988179e9..817039d0321 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -644,6 +644,11 @@ mxxspltidp
 Target Undocumented Var(TARGET_XXSPLTIDP) Init(1) Save
 Generate (do not generate) XXSPLTIDP instructions.
 
+;; Do not enable at this time.
+mxxspltiw
+Target Undocumented Var(TARGET_XXSPLTIW) Init(0) Save
+Generate (do not generate) XXSPLTIW instructions.
+
 mlxvkq
 Target Undocumented Var(TARGET_LXVKQ) Init(1) Save
 Generate (do not generate) LXVKQ instructions.
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index d7e58654ded..712e5df0c02 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -1191,19 +1191,19 @@
 ;; instruction). But generate XXLXOR/XXLORC if it will avoid a register move.
 
 ;;              VSX store  VSX load   VSX move  VSX->GPR   GPR->VSX    LQ (GPR)
-;;              XXSPLTIDP  LXVKQ
+;;              XXSPLTIDP  XXSPLTIW   LXVKQ
 ;;              STQ (GPR)  GPR load   GPR store GPR move   XXSPLTIB    VSPLTISW
 ;;              VSX 0/-1   VMX const  GPR const LVX (VMX)  STVX (VMX)
 (define_insn "vsx_mov<mode>_64bit"
   [(set (match_operand:VSX_M 0 "nonimmediate_operand"
                "=ZwO,      wa,        wa,        r,         we,        ?wQ,
-                wa,        wa,
+                wa,        wa,        wa,
                 ?&r,       ??r,       ??Y,       <??r>,     wa,        v,
                 ?wa,       v,         <??r>,     wZ,        v")
 
 	(match_operand:VSX_M 1 "input_operand" 
                "wa,        ZwO,       wa,        we,        r,         r,
-                eV,        eQ,
+                eV,        eW,        eQ,
                 wQ,        Y,         r,         r,         wE,        jwM,
                 ?jwM,      W,         <nW>,      v,         wZ"))]
 
@@ -1215,44 +1215,44 @@
 }
   [(set_attr "type"
                "vecstore,  vecload,   vecsimple, mtvsr,     mfvsr,     load,
-                vecperm,   vecperm,
+                vecperm,   vecperm,   vecperm,
                 store,     load,      store,     *,         vecsimple, vecsimple,
                 vecsimple, *,         *,         vecstore,  vecload")
    (set_attr "num_insns"
                "*,         *,         *,         2,         *,         2,
-                *,         *,
+                *,         *,         *,
                 2,         2,         2,         2,         *,         *,
                 *,         5,         2,         *,         *")
    (set_attr "max_prefixed_insns"
                "*,         *,         *,         *,         *,         2,
-                *,         *,
+                *,         *,         *,
                 2,         2,         2,         2,         *,         *,
                 *,         *,         *,         *,         *")
    (set_attr "length"
                "*,         *,         *,         8,         *,         8,
-                *,         *,
+                *,         *,         *,
                 8,         8,         8,         8,         *,         *,
                 *,         20,        8,         *,         *")
    (set_attr "isa"
                "<VSisa>,   <VSisa>,   <VSisa>,   *,         *,         *,
-                p10,       p10,
+                p10,       p10,       p10,
                 *,         *,         *,         *,         p9v,       *,
                 <VSisa>,   *,         *,         *,         *")])
 
 ;;              VSX store  VSX load   VSX move   GPR load   GPR store  GPR move
-;;              XXSPLTIDP  LXVKQ
+;;              XXSPLTIDP  XXSPLTIW   LXVKQ
 ;;              XXSPLTIB   VSPLTISW   VSX 0/-1   VMX const  GPR const
 ;;              LVX (VMX)  STVX (VMX)
 (define_insn "*vsx_mov<mode>_32bit"
   [(set (match_operand:VSX_M 0 "nonimmediate_operand"
                "=ZwO,      wa,        wa,        ??r,       ??Y,       <??r>,
-                wa,        wa,
+                wa,        wa,        wa,
                 wa,        v,         ?wa,       v,         <??r>,
                 wZ,        v")
 
 	(match_operand:VSX_M 1 "input_operand" 
                "wa,        ZwO,       wa,        Y,         r,         r,
-                eV,        eQ,
+                eV,        eW,        eQ,
                 wE,        jwM,       ?jwM,      W,         <nW>,
                 v,         wZ"))]
 
@@ -1264,17 +1264,17 @@
 }
   [(set_attr "type"
                "vecstore,  vecload,   vecsimple, load,      store,    *,
-                vecperm,   vecperm,
+                vecperm,   vecperm,   vecperm,
                 vecsimple, vecsimple, vecsimple, *,         *,
                 vecstore,  vecload")
    (set_attr "length"
                "*,         *,         *,         16,        16,        16,
-                *,         *,
+                *,         *,         *,
                 *,         *,         *,         20,        16,
                 *,         *")
    (set_attr "isa"
                "<VSisa>,   <VSisa>,   <VSisa>,   *,         *,         *,
-                p10,       p10,
+                p10,       p10,       p10,
                 p9v,       *,         <VSisa>,   *,         *,
                 *,         *")])
 
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v16qi.c b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v16qi.c
new file mode 100644
index 00000000000..2707d86e6fd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v16qi.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2 -mxxspltiw" } */
+
+#include <altivec.h>
+
+/* Test whether XXSPLTIW is generated for V16HI vector constants where the
+   first 4 elements are the same as the next 4 elements, etc.  */
+
+vector unsigned char
+v16qi_const_1 (void)
+{
+  return (vector unsigned char) { 1, 1, 1, 1, 1, 1, 1, 1,
+				  1, 1, 1, 1, 1, 1, 1, 1, }; /* VSLTPISB.  */
+}
+
+vector unsigned char
+v16qi_const_2 (void)
+{
+  return (vector unsigned char) { 1, 2, 3, 4, 1, 2, 3, 4,
+				  1, 2, 3, 4, 1, 2, 3, 4, }; /* XXSPLTIW.  */
+}
+
+/* { dg-final { scan-assembler-times {\mxxspltiw\M}              1 } } */
+/* { dg-final { scan-assembler-times {\mvspltisb\M|\mxxspltib\M} 1 } } */
+/* { dg-final { scan-assembler-not   {\mlxvx?\M}                   } } */
+/* { dg-final { scan-assembler-not   {\mplxv\M}                    } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v4sf.c b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v4sf.c
new file mode 100644
index 00000000000..05d4ee3f5cb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v4sf.c
@@ -0,0 +1,67 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2 -mxxspltiw" } */
+
+#include <altivec.h>
+
+/* Test whether XXSPLTIW is generated for V4SF vector constants.  */
+
+vector float
+v4sf_const_1 (void)
+{
+  return (vector float) { 1.0f, 1.0f, 1.0f, 1.0f };	/* XXSPLTIW.  */
+}
+
+vector float
+v4sf_const_nan (void)
+{
+  return (vector float) { __builtin_nanf (""),
+			  __builtin_nanf (""),
+			  __builtin_nanf (""),
+			  __builtin_nanf ("") };	/* XXSPLTIW.  */
+}
+
+vector float
+v4sf_const_inf (void)
+{
+  return (vector float) { __builtin_inff (),
+			  __builtin_inff (),
+			  __builtin_inff (),
+			  __builtin_inff () };		/* XXSPLTIW.  */
+}
+
+vector float
+v4sf_const_m0 (void)
+{
+  return (vector float) { -0.0f, -0.0f, -0.0f, -0.0f };	/* XXSPLTIB/VSLW.  */
+}
+
+vector float
+v4sf_splats_1 (void)
+{
+  return vec_splats (1.0f);				/* XXSPLTIW.  */
+}
+
+vector float
+v4sf_splats_nan (void)
+{
+  return vec_splats (__builtin_nanf (""));		/* XXSPLTIW.  */
+}
+
+vector float
+v4sf_splats_inf (void)
+{
+  return vec_splats (__builtin_inff ());		/* XXSPLTIW.  */
+}
+
+vector float
+v8hi_splats_m0 (void)
+{
+  return vec_splats (-0.0f);				/* XXSPLTIB/VSLW.  */
+}
+
+/* { dg-final { scan-assembler-times {\mxxspltiw\M} 6 } } */
+/* { dg-final { scan-assembler-times {\mxxspltib\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mvslw\M}     2 } } */
+/* { dg-final { scan-assembler-not   {\mlxvx?\M}      } } */
+/* { dg-final { scan-assembler-not   {\mplxv\M}       } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v4si.c b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v4si.c
new file mode 100644
index 00000000000..da909e948b2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v4si.c
@@ -0,0 +1,51 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2 -mxxspltiw" } */
+
+#include <altivec.h>
+
+/* Test whether XXSPLTIW is generated for V4SI vector constants.  We make sure
+   the power9 support (XXSPLTIB/VEXTSB2W) is not done.  */
+
+vector int
+v4si_const_1 (void)
+{
+  return (vector int) { 1, 1, 1, 1 };			/* VSLTPISW.  */
+}
+
+vector int
+v4si_const_126 (void)
+{
+  return (vector int) { 126, 126, 126, 126 };		/* XXSPLTIW.  */
+}
+
+vector int
+v4si_const_1023 (void)
+{
+  return (vector int) { 1023, 1023, 1023, 1023 };	/* XXSPLTIW.  */
+}
+
+vector int
+v4si_splats_1 (void)
+{
+  return vec_splats (1);				/* VSLTPISW.  */
+}
+
+vector int
+v4si_splats_126 (void)
+{
+  return vec_splats (126);				/* XXSPLTIW.  */
+}
+
+vector int
+v8hi_splats_1023 (void)
+{
+  return vec_splats (1023);				/* XXSPLTIW.  */
+}
+
+/* { dg-final { scan-assembler-times {\mxxspltiw\M}  4 } } */
+/* { dg-final { scan-assembler-times {\mvspltisw\M}  2 } } */
+/* { dg-final { scan-assembler-not   {\mxxspltib\M}    } } */
+/* { dg-final { scan-assembler-not   {\mvextsb2w\M}    } } */
+/* { dg-final { scan-assembler-not   {\mlxvx?\M}       } } */
+/* { dg-final { scan-assembler-not   {\mplxv\M}        } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v8hi.c b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v8hi.c
new file mode 100644
index 00000000000..290e05d4a64
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v8hi.c
@@ -0,0 +1,62 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2 -mxxspltiw" } */
+
+#include <altivec.h>
+
+/* Test whether XXSPLTIW is generated for V8HI vector constants.  We make sure
+   the power9 support (XXSPLTIB/VUPKLSB) is not done.  */
+
+vector short
+v8hi_const_1 (void)
+{
+  return (vector short) { 1, 1, 1, 1, 1, 1, 1, 1 };	/* VSLTPISH.  */
+}
+
+vector short
+v8hi_const_126 (void)
+{
+  return (vector short) { 126, 126, 126, 126,
+			  126, 126, 126, 126 };		/* XXSPLTIW.  */
+}
+
+vector short
+v8hi_const_1023 (void)
+{
+  return (vector short) { 1023, 1023, 1023, 1023,
+			  1023, 1023, 1023, 1023 };	/* XXSPLTIW.  */
+}
+
+vector short
+v8hi_splats_1 (void)
+{
+  return vec_splats ((short)1);				/* VSLTPISH.  */
+}
+
+vector short
+v8hi_splats_126 (void)
+{
+  return vec_splats ((short)126);			/* XXSPLTIW.  */
+}
+
+vector short
+v8hi_splats_1023 (void)
+{
+  return vec_splats ((short)1023);			/* XXSPLTIW.  */
+}
+
+/* Test that we can optimiza V8HI where all of the even elements are the same
+   and all of the odd elements are the same.  */
+vector short
+v8hi_const_1023_1000 (void)
+{
+  return (vector short) { 1023, 1000, 1023, 1000,
+			  1023, 1000, 1023, 1000 };	/* XXSPLTIW.  */
+}
+
+/* { dg-final { scan-assembler-times {\mxxspltiw\M}  5 } } */
+/* { dg-final { scan-assembler-times {\mvspltish\M}  2 } } */
+/* { dg-final { scan-assembler-not   {\mxxspltib\M}    } } */
+/* { dg-final { scan-assembler-not   {\mvupklsb\M}     } } */
+/* { dg-final { scan-assembler-not   {\mlxvx?\M}       } } */
+/* { dg-final { scan-assembler-not   {\mplxv\M}        } } */


^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2021-09-17  5:27 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-09-16 20:29 [gcc(refs/users/meissner/heads/work069)] Generate XXSPLTIW on power10 Michael Meissner
2021-09-17  5:27 Michael Meissner

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).