public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/users/meissner/heads/work042)] Use VEC_DUPLICATE for XXSPLTIW.
@ 2021-03-19 17:39 Michael Meissner
  0 siblings, 0 replies; 3+ messages in thread
From: Michael Meissner @ 2021-03-19 17:39 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:e19a22cc6e14eeed2f7e8274dcaaec12e42bf452

commit e19a22cc6e14eeed2f7e8274dcaaec12e42bf452
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Fri Mar 19 13:39:36 2021 -0400

    Use VEC_DUPLICATE for XXSPLTIW.
    
    This code enables using VEC_DUPLICATE if we have the ISA 3.1 XXSPLTIW
    instruction for V4SI constants.  The __builtin_vec_xxspltiw function uses
    this instead of an UNSPEC.
    
    gcc/
    2021-03-19  Michael Meissner  <meissner@linux.ibm.com>
    
            * config/rs6000/altivec.md (xxspltiw_v4si): Rewrite to use
            vec_duplicate.
            * config/rs6000/predicates.md (easy_vector_constant): Add support
            for constants we can create with XXSPLTIW.
            * config/rs6000/rs6000-protos.h (xxspltiw_constant_p): New
            declaration.
            * config/rs6000/rs6000.c (xxspltiw_constant_p): New function.
            (xxspltib_constant_p): If we can generate XXSPLTIW don't return
            true for generating XXSPLTIB and extend operations.
            (output_vec_const_move): Add support for generating XXSPLTIW for
            V4SI moves.
            (rs6000_expand_vector_init): Add support for generating XXSPLTIW
            for V4SI moves.

Diff:
---
 gcc/config/rs6000/altivec.md      | 17 +++++----
 gcc/config/rs6000/predicates.md   |  4 +++
 gcc/config/rs6000/rs6000-protos.h |  1 +
 gcc/config/rs6000/rs6000.c        | 76 +++++++++++++++++++++++++++++++++++++++
 4 files changed, 92 insertions(+), 6 deletions(-)

diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index e2e17fe90ea..ec071bbd0a4 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -820,15 +820,20 @@
   "vs<SLDB_lr>dbi %0,%1,%2,%3"
   [(set_attr "type" "vecsimple")])
 
+;; Generate VSPLTIW, XXSPLITB, or XXSPLTIW to load up V4SI constants.
 (define_insn "xxspltiw_v4si"
-  [(set (match_operand:V4SI 0 "register_operand" "=wa")
-	(unspec:V4SI [(match_operand:SI 1 "s32bit_cint_operand" "n")]
-		     UNSPEC_XXSPLTIW))]
+  [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,wa,v,wa")
+       (vec_duplicate:V4SI
+        (match_operand:SI 1 "s32bit_cint_operand" "O,wM,wB,n")))]
  "TARGET_POWER10"
- "xxspltiw %x0,%1"
+ "@
+  xxspltib %x0,0
+  xxspltib %x0,255
+  vspltisw %0,%1
+  xxspltiw %x0,%1"
  [(set_attr "type" "vecperm")
-  (set_attr "prefixed" "yes")
-  (set_attr "prefixed_prepend_p" "no")])
+  (set_attr "prefixed" "*,*,*,yes")
+  (set_attr "prefixed_prepend_p" "*,*,*,no")])
 
 (define_expand "xxspltiw_v4sf"
   [(set (match_operand:V4SF 0 "register_operand" "=wa")
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 859af75dfbd..0c5d7a096f3 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -649,10 +649,14 @@
     {
       int value = 256;
       int num_insns = -1;
+      rtx constant;
 
       if (zero_constant (op, mode) || all_ones_constant (op, mode))
 	return true;
 
+      if (TARGET_POWER10 && xxspltiw_constant_p (op, mode, &constant))
+	return true;
+
       if (TARGET_P9_VECTOR
           && xxspltib_constant_p (op, mode, &num_insns, &value))
 	return true;
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
index 52436fc1637..46a7d7f7de9 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -32,6 +32,7 @@ extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, int, int, int,
 
 extern bool easy_altivec_constant (rtx, machine_mode);
 extern bool xxspltib_constant_p (rtx, machine_mode, int *, int *);
+extern bool xxspltiw_constant_p (rtx, machine_mode, rtx *);
 extern int vspltis_shifted (rtx);
 extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int);
 extern bool macho_lo_sum_memory_operand (rtx, machine_mode);
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index f3ee3a7ea22..63525e764cf 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -6320,6 +6320,51 @@ gen_easy_altivec_constant (rtx op)
   gcc_unreachable ();
 }
 
+/* Return true if OP is of the given MODE and can be generated with the ISA 3.1
+   XXSPLTIW instruction.  Return the SImode/SFmode/DFmode constant via the
+   pointer CONSTANT_PTR.  */
+
+bool
+xxspltiw_constant_p (rtx op, machine_mode mode, rtx *constant_ptr)
+{
+  *constant_ptr = NULL_RTX;
+
+  if (!TARGET_POWER10)
+    return false;
+
+  if (mode == VOIDmode)
+    mode = GET_MODE (op);
+
+  else if (mode != GET_MODE (op))
+    return false;
+
+  if (mode != V4SImode)
+    return false;
+
+  rtx element;
+
+  /* Handle (vec_duplicate <constant>).  */
+  if (GET_CODE (op) == VEC_DUPLICATE)
+    element = XEXP (op, 0);
+
+  /* Handle (const_vector [...]).  */
+  else if (GET_CODE (op) == CONST_VECTOR)
+    {
+      size_t nunits = GET_MODE_NUNITS (mode);
+      element = CONST_VECTOR_ELT (op, 0);
+
+      for (size_t i = 1; i < nunits; i++)
+	if (!rtx_equal_p (element, CONST_VECTOR_ELT (op, i)))
+	  return false;
+    }
+
+  else
+    return false;
+
+  *constant_ptr = element;
+  return true;
+}
+
 /* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
    instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
 
@@ -6352,6 +6397,12 @@ xxspltib_constant_p (rtx op,
   else if (mode != GET_MODE (op) && GET_MODE (op) != VOIDmode)
     return false;
 
+  /* If we can handle the constant directly with XXSPLTIW, don't both using
+     XXSPLTIB and vector extend.  */
+  rtx constant;
+  if (xxspltiw_constant_p (op, mode, &constant))
+    return false;
+
   /* Handle (vec_duplicate <constant>).  */
   if (GET_CODE (op) == VEC_DUPLICATE)
     {
@@ -6446,6 +6497,7 @@ output_vec_const_move (rtx *operands)
   int shift;
   machine_mode mode;
   rtx dest, vec;
+  rtx element;
 
   dest = operands[0];
   vec = operands[1];
@@ -6484,6 +6536,22 @@ output_vec_const_move (rtx *operands)
 	    gcc_unreachable ();
 	}
 
+      /* See if we can generate a XXSPLTIW directly.  */
+      if (TARGET_POWER10 && xxspltiw_constant_p (vec, mode, &element))
+	{
+	  if (CONST_INT_P (element))
+	    operands[2] = element;
+	  else
+	    gcc_unreachable ();
+
+	  HOST_WIDE_INT value = INTVAL (operands[2]);
+	  if (IN_RANGE (value, -16, 15) && dest_vmx_p)
+	    return "vspltisw %0,%2";
+
+	  else
+	    return "xxspltiw %x0,%2";
+	}
+
       if (TARGET_P9_VECTOR
 	  && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
 	{
@@ -6565,6 +6633,14 @@ rs6000_expand_vector_init (rtx target, rtx vals)
 
   if (n_var == 0)
     {
+      /* Generate XXSPLTIW if we can.  */
+      if (TARGET_POWER10 && all_same && mode == V4SImode)
+	{
+	  rtx dup = gen_rtx_VEC_DUPLICATE (mode, XVECEXP (vals, 0, 0));
+	  emit_insn (gen_rtx_SET (target, dup));							 
+	  return;
+	}
+
       rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
       bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
       if ((int_vector_p || TARGET_VSX) && all_const_zero)


^ permalink raw reply	[flat|nested] 3+ messages in thread

* [gcc(refs/users/meissner/heads/work042)] Use VEC_DUPLICATE for XXSPLTIW.
@ 2021-03-19 16:25 Michael Meissner
  0 siblings, 0 replies; 3+ messages in thread
From: Michael Meissner @ 2021-03-19 16:25 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:e90c5ba12e124384215ecec201d0487c0cabf8b7

commit e90c5ba12e124384215ecec201d0487c0cabf8b7
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Fri Mar 19 12:24:54 2021 -0400

    Use VEC_DUPLICATE for XXSPLTIW.
    
    This code enables using VEC_DUPLICATE if we have the ISA 3.1 XXSPLTIW
    instruction for V4SI constants.  The __builtin_vec_xxspltiw function uses
    this instead of an UNSPEC.
    
    gcc/
    2021-03-19  Michael Meissner  <meissner@linux.ibm.com>
    
            * config/rs6000/altivec.md (xxspltiw_v4si): Rewrite to use
            vec_duplicate.
            * config/rs6000/predicates.md (easy_vector_constant): Add support
            for constants we can create with XXSPLTIW.
            * config/rs6000/rs6000-protos.h (xxspltiw_constant_p): New
            declaration.
            * config/rs6000/rs6000.c (xxspltiw_constant_p): New function.
            (xxspltib_constant_p): If we can generate XXSPLTIW don't return
            true for generating XXSPLTIB and extend operations.
            (output_vec_const_move): Add support for generating XXSPLTIW for
            V4SI moves.
            (rs6000_expand_vector_init): Add support for generating XXSPLTIW
            for V4SI moves.

Diff:
---
 gcc/config/rs6000/altivec.md      | 17 +++++----
 gcc/config/rs6000/predicates.md   |  4 +++
 gcc/config/rs6000/rs6000-protos.h |  1 +
 gcc/config/rs6000/rs6000.c        | 76 +++++++++++++++++++++++++++++++++++++++
 4 files changed, 92 insertions(+), 6 deletions(-)

diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index e2e17fe90ea..ec071bbd0a4 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -820,15 +820,20 @@
   "vs<SLDB_lr>dbi %0,%1,%2,%3"
   [(set_attr "type" "vecsimple")])
 
+;; Generate VSPLTIW, XXSPLITB, or XXSPLTIW to load up V4SI constants.
 (define_insn "xxspltiw_v4si"
-  [(set (match_operand:V4SI 0 "register_operand" "=wa")
-	(unspec:V4SI [(match_operand:SI 1 "s32bit_cint_operand" "n")]
-		     UNSPEC_XXSPLTIW))]
+  [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,wa,v,wa")
+       (vec_duplicate:V4SI
+        (match_operand:SI 1 "s32bit_cint_operand" "O,wM,wB,n")))]
  "TARGET_POWER10"
- "xxspltiw %x0,%1"
+ "@
+  xxspltib %x0,0
+  xxspltib %x0,255
+  vspltisw %0,%1
+  xxspltiw %x0,%1"
  [(set_attr "type" "vecperm")
-  (set_attr "prefixed" "yes")
-  (set_attr "prefixed_prepend_p" "no")])
+  (set_attr "prefixed" "*,*,*,yes")
+  (set_attr "prefixed_prepend_p" "*,*,*,no")])
 
 (define_expand "xxspltiw_v4sf"
   [(set (match_operand:V4SF 0 "register_operand" "=wa")
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 859af75dfbd..0c5d7a096f3 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -649,10 +649,14 @@
     {
       int value = 256;
       int num_insns = -1;
+      rtx constant;
 
       if (zero_constant (op, mode) || all_ones_constant (op, mode))
 	return true;
 
+      if (TARGET_POWER10 && xxspltiw_constant_p (op, mode, &constant))
+	return true;
+
       if (TARGET_P9_VECTOR
           && xxspltib_constant_p (op, mode, &num_insns, &value))
 	return true;
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
index 52436fc1637..46a7d7f7de9 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -32,6 +32,7 @@ extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, int, int, int,
 
 extern bool easy_altivec_constant (rtx, machine_mode);
 extern bool xxspltib_constant_p (rtx, machine_mode, int *, int *);
+extern bool xxspltiw_constant_p (rtx, machine_mode, rtx *);
 extern int vspltis_shifted (rtx);
 extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int);
 extern bool macho_lo_sum_memory_operand (rtx, machine_mode);
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index f3ee3a7ea22..63525e764cf 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -6320,6 +6320,51 @@ gen_easy_altivec_constant (rtx op)
   gcc_unreachable ();
 }
 
+/* Return true if OP is of the given MODE and can be generated with the ISA 3.1
+   XXSPLTIW instruction.  Return the SImode/SFmode/DFmode constant via the
+   pointer CONSTANT_PTR.  */
+
+bool
+xxspltiw_constant_p (rtx op, machine_mode mode, rtx *constant_ptr)
+{
+  *constant_ptr = NULL_RTX;
+
+  if (!TARGET_POWER10)
+    return false;
+
+  if (mode == VOIDmode)
+    mode = GET_MODE (op);
+
+  else if (mode != GET_MODE (op))
+    return false;
+
+  if (mode != V4SImode)
+    return false;
+
+  rtx element;
+
+  /* Handle (vec_duplicate <constant>).  */
+  if (GET_CODE (op) == VEC_DUPLICATE)
+    element = XEXP (op, 0);
+
+  /* Handle (const_vector [...]).  */
+  else if (GET_CODE (op) == CONST_VECTOR)
+    {
+      size_t nunits = GET_MODE_NUNITS (mode);
+      element = CONST_VECTOR_ELT (op, 0);
+
+      for (size_t i = 1; i < nunits; i++)
+	if (!rtx_equal_p (element, CONST_VECTOR_ELT (op, i)))
+	  return false;
+    }
+
+  else
+    return false;
+
+  *constant_ptr = element;
+  return true;
+}
+
 /* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
    instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
 
@@ -6352,6 +6397,12 @@ xxspltib_constant_p (rtx op,
   else if (mode != GET_MODE (op) && GET_MODE (op) != VOIDmode)
     return false;
 
+  /* If we can handle the constant directly with XXSPLTIW, don't both using
+     XXSPLTIB and vector extend.  */
+  rtx constant;
+  if (xxspltiw_constant_p (op, mode, &constant))
+    return false;
+
   /* Handle (vec_duplicate <constant>).  */
   if (GET_CODE (op) == VEC_DUPLICATE)
     {
@@ -6446,6 +6497,7 @@ output_vec_const_move (rtx *operands)
   int shift;
   machine_mode mode;
   rtx dest, vec;
+  rtx element;
 
   dest = operands[0];
   vec = operands[1];
@@ -6484,6 +6536,22 @@ output_vec_const_move (rtx *operands)
 	    gcc_unreachable ();
 	}
 
+      /* See if we can generate a XXSPLTIW directly.  */
+      if (TARGET_POWER10 && xxspltiw_constant_p (vec, mode, &element))
+	{
+	  if (CONST_INT_P (element))
+	    operands[2] = element;
+	  else
+	    gcc_unreachable ();
+
+	  HOST_WIDE_INT value = INTVAL (operands[2]);
+	  if (IN_RANGE (value, -16, 15) && dest_vmx_p)
+	    return "vspltisw %0,%2";
+
+	  else
+	    return "xxspltiw %x0,%2";
+	}
+
       if (TARGET_P9_VECTOR
 	  && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
 	{
@@ -6565,6 +6633,14 @@ rs6000_expand_vector_init (rtx target, rtx vals)
 
   if (n_var == 0)
     {
+      /* Generate XXSPLTIW if we can.  */
+      if (TARGET_POWER10 && all_same && mode == V4SImode)
+	{
+	  rtx dup = gen_rtx_VEC_DUPLICATE (mode, XVECEXP (vals, 0, 0));
+	  emit_insn (gen_rtx_SET (target, dup));							 
+	  return;
+	}
+
       rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
       bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
       if ((int_vector_p || TARGET_VSX) && all_const_zero)


^ permalink raw reply	[flat|nested] 3+ messages in thread

* [gcc(refs/users/meissner/heads/work042)] Use VEC_DUPLICATE for XXSPLTIW.
@ 2021-03-19 16:14 Michael Meissner
  0 siblings, 0 replies; 3+ messages in thread
From: Michael Meissner @ 2021-03-19 16:14 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:80a1cba0b88a49772c6a22f8d2e0be6b9b0ba822

commit 80a1cba0b88a49772c6a22f8d2e0be6b9b0ba822
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Fri Mar 19 12:14:07 2021 -0400

    Use VEC_DUPLICATE for XXSPLTIW.
    
    This code enables using VEC_DUPLICATE if we have the ISA 3.1 XXSPLTIW
    instruction for V4SI constants.  The __builtin_vec_xxspltiw function uses
    this instead of an UNSPEC.
    
    gcc/
    2021-03-19  Michael Meissner  <meissner@linux.ibm.com>
    
            * config/rs6000/altivec.md (xxspltiw_v4si): Rewrite to use
            vec_duplicate.
            * config/rs6000/predicates.md (easy_vector_constant): Add support
            for constants we can create with XXSPLTIW.
            * config/rs6000/rs6000-protos.h (xxspltiw_constant_p): New
            declaration.
            * config/rs6000/rs6000.c (xxspltiw_constant_p): New function.
            (xxspltib_constant_p): If we can generate XXSPLTIW don't return
            true for generating XXSPLTIB and extend operations.
            (output_vec_const_move): Add support for generating XXSPLTIW for
            V4SI moves.
            (rs6000_expand_vector_init): Add support for generating XXSPLTIW
            for V4SI moves.

Diff:
---
 gcc/config/rs6000/altivec.md      | 17 ++++++---
 gcc/config/rs6000/predicates.md   |  4 ++
 gcc/config/rs6000/rs6000-protos.h |  1 +
 gcc/config/rs6000/rs6000.c        | 78 +++++++++++++++++++++++++++++++++++++++
 4 files changed, 94 insertions(+), 6 deletions(-)

diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index e2e17fe90ea..ec071bbd0a4 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -820,15 +820,20 @@
   "vs<SLDB_lr>dbi %0,%1,%2,%3"
   [(set_attr "type" "vecsimple")])
 
+;; Generate VSPLTIW, XXSPLITB, or XXSPLTIW to load up V4SI constants.
 (define_insn "xxspltiw_v4si"
-  [(set (match_operand:V4SI 0 "register_operand" "=wa")
-	(unspec:V4SI [(match_operand:SI 1 "s32bit_cint_operand" "n")]
-		     UNSPEC_XXSPLTIW))]
+  [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,wa,v,wa")
+       (vec_duplicate:V4SI
+        (match_operand:SI 1 "s32bit_cint_operand" "O,wM,wB,n")))]
  "TARGET_POWER10"
- "xxspltiw %x0,%1"
+ "@
+  xxspltib %x0,0
+  xxspltib %x0,255
+  vspltisw %0,%1
+  xxspltiw %x0,%1"
  [(set_attr "type" "vecperm")
-  (set_attr "prefixed" "yes")
-  (set_attr "prefixed_prepend_p" "no")])
+  (set_attr "prefixed" "*,*,*,yes")
+  (set_attr "prefixed_prepend_p" "*,*,*,no")])
 
 (define_expand "xxspltiw_v4sf"
   [(set (match_operand:V4SF 0 "register_operand" "=wa")
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 859af75dfbd..0c5d7a096f3 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -649,10 +649,14 @@
     {
       int value = 256;
       int num_insns = -1;
+      rtx constant;
 
       if (zero_constant (op, mode) || all_ones_constant (op, mode))
 	return true;
 
+      if (TARGET_POWER10 && xxspltiw_constant_p (op, mode, &constant))
+	return true;
+
       if (TARGET_P9_VECTOR
           && xxspltib_constant_p (op, mode, &num_insns, &value))
 	return true;
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
index 52436fc1637..46a7d7f7de9 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -32,6 +32,7 @@ extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, int, int, int,
 
 extern bool easy_altivec_constant (rtx, machine_mode);
 extern bool xxspltib_constant_p (rtx, machine_mode, int *, int *);
+extern bool xxspltiw_constant_p (rtx, machine_mode, rtx *);
 extern int vspltis_shifted (rtx);
 extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int);
 extern bool macho_lo_sum_memory_operand (rtx, machine_mode);
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index f3ee3a7ea22..3600af26133 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -6320,6 +6320,51 @@ gen_easy_altivec_constant (rtx op)
   gcc_unreachable ();
 }
 
+/* Return true if OP is of the given MODE and can be generated with the ISA 3.1
+   XXSPLTIW instruction.  Return the SImode/SFmode/DFmode constant via the
+   pointer CONSTANT_PTR.  */
+
+bool
+xxspltiw_constant_p (rtx op, machine_mode mode, rtx *constant_ptr)
+{
+  *constant_ptr = NULL_RTX;
+
+  if (!TARGET_POWER10)
+    return false;
+
+  if (mode == VOIDmode)
+    mode = GET_MODE (op);
+
+  else if (mode != GET_MODE (op))
+    return false;
+
+  if (mode != V4SImode)
+    return false;
+
+  rtx element;
+
+  /* Handle (vec_duplicate <constant>).  */
+  if (GET_CODE (op) == VEC_DUPLICATE)
+    element = XEXP (op, 0);
+
+  /* Handle (const_vector [...]).  */
+  else if (GET_CODE (op) == CONST_VECTOR)
+    {
+      size_t nunits = GET_MODE_NUNITS (mode);
+      element = CONST_VECTOR_ELT (op, 0);
+
+      for (size_t i = 1; i < nunits; i++)
+	if (!rtx_equal_p (element, CONST_VECTOR_ELT (op, i)))
+	  return false;
+    }
+
+  else
+    return false;
+
+  *constant_ptr = element;
+  return true;
+}
+
 /* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
    instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
 
@@ -6352,6 +6397,12 @@ xxspltib_constant_p (rtx op,
   else if (mode != GET_MODE (op) && GET_MODE (op) != VOIDmode)
     return false;
 
+  /* If we can handle the constant directly with XXSPLTIW, don't both using
+     XXSPLTIB and vector extend.  */
+  rtx constant;
+  if (xxspltiw_constant_p (op, mode, &constant))
+    return false;
+
   /* Handle (vec_duplicate <constant>).  */
   if (GET_CODE (op) == VEC_DUPLICATE)
     {
@@ -6446,6 +6497,7 @@ output_vec_const_move (rtx *operands)
   int shift;
   machine_mode mode;
   rtx dest, vec;
+  rtx element;
 
   dest = operands[0];
   vec = operands[1];
@@ -6484,6 +6536,24 @@ output_vec_const_move (rtx *operands)
 	    gcc_unreachable ();
 	}
 
+      /* See if we can generate a XXSPLTIW directly.  */
+      if (TARGET_POWER10 && xxspltiw_constant_p (vec, mode, &element))
+	{
+	  int r = reg_or_subregno (dest);
+
+	  if (CONST_INT_P (element))
+	    operands[2] = element;
+	  else
+	    gcc_unreachable ();
+
+	  HOST_WIDE_INT value = INTVAL (operands[2]);
+	  if (IN_RANGE (value, -16, 15) && ALTIVEC_REGNO_P (r))
+	    return "vspltisw %0,%2";
+
+	  else
+	    return "xxspltiw %x0,%2";
+	}
+
       if (TARGET_P9_VECTOR
 	  && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
 	{
@@ -6565,6 +6635,14 @@ rs6000_expand_vector_init (rtx target, rtx vals)
 
   if (n_var == 0)
     {
+      /* Generate XXSPLTIW if we can.  */
+      if (TARGET_POWER10 && all_same && mode == V4SImode)
+	{
+	  rtx dup = gen_rtx_VEC_DUPLICATE (mode, XVECEXP (vals, 0, 0));
+	  emit_insn (gen_rtx_SET (target, dup));							 
+	  return;
+	}
+
       rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
       bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
       if ((int_vector_p || TARGET_VSX) && all_const_zero)


^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2021-03-19 17:39 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-03-19 17:39 [gcc(refs/users/meissner/heads/work042)] Use VEC_DUPLICATE for XXSPLTIW Michael Meissner
  -- strict thread matches above, loose matches on Subject: below --
2021-03-19 16:25 Michael Meissner
2021-03-19 16:14 Michael Meissner

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).