public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/users/meissner/heads/work045)] Generate XXSPLTIW for V8HI, V4SI and V4SF constants.
@ 2021-04-06 23:46 Michael Meissner
0 siblings, 0 replies; only message in thread
From: Michael Meissner @ 2021-04-06 23:46 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:ba1d45128f78ef20730e6ddb75003b1225e541e6
commit ba1d45128f78ef20730e6ddb75003b1225e541e6
Author: Michael Meissner <meissner@linux.ibm.com>
Date: Tue Apr 6 19:45:40 2021 -0400
Generate XXSPLTIW for V8HI, V4SI and V4SF constants.
This patch generates the XXSPLTIW instruction on ISA 3.1 (power10) for
vector short, vector int, and vector float constants.
gcc/
2021-04-06 Michael Meissner <meissner@linux.ibm.com>
* config/rs6000/predicates.md (xxspltiw_operand): New predicate.
(easy_vector_constant): If we can generate XXSPLTIW, return true.
* config/rs6000/rs6000.c (rs6000_option_override_internal): Add
support for -mxxspltiw.
(xxspltib_constant_p): If we can use XXSPLTIW, don't return true
for doing XXSPLTIB and an extend.
(xxspltiw_constant_p): New function.
(output_vec_const_move): Add support for generating XXSPLTIW.
(rs6000_opt_masks): Add -mxxspltiw.
* config/rs6000/rs6000.opt (-mxxspltiw): New debug switch.
* config/rs6000/vsx.md (XXSPLTIW): New iterator.
(xxspltiw<mode>): New insns to generate XXSPLTIW.
Diff:
---
gcc/config/rs6000/predicates.md | 22 ++++++
gcc/config/rs6000/rs6000-protos.h | 1 +
gcc/config/rs6000/rs6000.c | 144 ++++++++++++++++++++++++++++++++++++++
gcc/config/rs6000/rs6000.opt | 4 ++
gcc/config/rs6000/vsx.md | 19 +++++
5 files changed, 190 insertions(+)
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index e21bc745f72..dc23f62a3af 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -640,6 +640,25 @@
return num_insns == 1;
})
+;; Return 1 if the operand is a CONST_VECTOR and can be loaded into a vector
+;; using the ISA 3.1 XXSPLTIW instruction. Do not return 1 if the value can be
+;; loaded with a smaller XXSPLTIB or VSPLTISW instruction.
+(define_predicate "xxspltiw_operand"
+ (match_code "vec_duplicate,const_vector")
+{
+ HOST_WIDE_INT value = 0;
+
+ if (!xxspltiw_constant_p (op, mode, &value))
+ return false;
+
+ /* xxspltiw_constant_p returns V8HI as (element | (element << 16)). Undo
+ this to see if the value is in the range -16..15. */
+ if (mode == V8HImode)
+ value = ((value & 0xffff) ^ 0x8000) - 0x8000;
+
+ return !EASY_VECTOR_15 (value);
+})
+
;; Return 1 if the operand is a CONST_VECTOR and can be loaded into a
;; vector register without using memory.
(define_predicate "easy_vector_constant"
@@ -653,6 +672,9 @@
if (zero_constant (op, mode) || all_ones_constant (op, mode))
return true;
+ if (xxspltiw_operand (op, mode))
+ return true;
+
if (TARGET_P9_VECTOR
&& xxspltib_constant_p (op, mode, &num_insns, &value))
return true;
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
index 8ac30905013..eff72af8814 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -32,6 +32,7 @@ extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, int, int, int,
extern bool easy_altivec_constant (rtx, machine_mode);
extern bool xxspltib_constant_p (rtx, machine_mode, int *, int *);
+extern bool xxspltiw_constant_p (rtx, machine_mode, HOST_WIDE_INT *);
extern int vspltis_shifted (rtx);
extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int);
extern bool macho_lo_sum_memory_operand (rtx, machine_mode);
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index dc38c093c53..b02ab4b2a3b 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -4476,6 +4476,10 @@ rs6000_option_override_internal (bool global_init_p)
rs6000_isa_flags &= ~OPTION_MASK_MMA;
}
+ if (TARGET_POWER10 && TARGET_VSX
+ && (rs6000_isa_flags_explicit & OPTION_MASK_XXSPLTIW) == 0)
+ rs6000_isa_flags |= OPTION_MASK_XXSPLTIW;
+
if (!TARGET_PCREL && TARGET_PCREL_OPT)
rs6000_isa_flags &= ~OPTION_MASK_PCREL_OPT;
@@ -6460,6 +6464,12 @@ xxspltib_constant_p (rtx op,
else if (IN_RANGE (value, -1, 0))
*num_insns_ptr = 1;
+ /* If XXSPLTIW is available, don't return true if we can use that
+ instruction instead of doing 2 instructions. */
+ else if (TARGET_XXSPLTIW
+ && (mode == V4SImode || mode == V8HImode))
+ return false;
+
else
*num_insns_ptr = 2;
@@ -6467,6 +6477,117 @@ xxspltib_constant_p (rtx op,
return true;
}
+/* Return true if OP is of the given MODE and can be synthesized with ISA 3.1
+ XXSPLTIW instruction, possibly with an sign extension.
+
+ Return the constant that is being split via CONSTANT_PTR. */
+
+bool
+xxspltiw_constant_p (rtx op,
+ machine_mode mode,
+ HOST_WIDE_INT *constant_ptr)
+{
+ *constant_ptr = 0;
+
+ if (!TARGET_XXSPLTIW)
+ return false;
+
+ if (mode == VOIDmode)
+ mode = GET_MODE (op);
+
+ if (mode != V8HImode && mode != V4SImode && mode != V4SFmode)
+ return false;
+
+ rtx element = op;
+ if (GET_CODE (op) == VEC_DUPLICATE)
+ element = op;
+
+ else if (GET_CODE (op) == CONST_VECTOR)
+ {
+ size_t nunits = GET_MODE_NUNITS (mode);
+ element = CONST_VECTOR_ELT (op, 0);
+
+ for (size_t i = 1; i < nunits; i++)
+ if (!rtx_equal_p (element, CONST_VECTOR_ELT (op, i)))
+ return false;
+ }
+
+ HOST_WIDE_INT value;
+ if (CONST_INT_P (element))
+ {
+ value = INTVAL (element);
+ if (!SIGNED_INTEGER_NBIT_P (value, 32))
+ return false;
+
+ if (mode == V8HImode)
+ {
+ value &= 0xffff;
+ value |= value << 16;
+ }
+ }
+
+ else if (CONST_DOUBLE_P (element))
+ value = rs6000_const_f32_to_i32 (element);
+
+ else
+ return false;
+
+ *constant_ptr = value;
+ return true;
+}
+
+/* Return true if OP is of the given MODE and can be synthesized with ISA 3.1
+ XXSPLTIDP instruction.
+
+ Return the constant that is being split via CONSTANT_PTR to use in the
+ XXSPLTIDP instruction. */
+
+bool
+xxspltidp_constant_p (rtx op,
+ machine_mode mode,
+ long *constant_ptr)
+{
+ *constant_ptr = 0;
+
+ rtx element = op;
+ if (mode == V2DFmode)
+ {
+ /* Handle VEC_DUPLICATE and CONST_VECTOR. */
+ if (GET_CODE (op) == VEC_DUPLICATE)
+ element = XEXP (op, 0);
+
+ else if (GET_CODE (op) == CONST_VECTOR)
+ {
+ element = CONST_VECTOR_ELT (op, 0);
+ if (!rtx_equal_p (element, CONST_VECTOR_ELT (op, 1)))
+ return false;
+ }
+
+ else
+ return false;
+ }
+
+ else if (mode != SFmode && mode != DFmode)
+ return false;
+
+ if (!CONST_DOUBLE_P (element))
+ return false;
+
+ const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (element);
+ if (!exact_real_truncate (SFmode, rv))
+ return 0;
+
+ long value;
+ REAL_VALUE_TO_TARGET_SINGLE (*rv, value);
+
+ /* Test for SFmode denormal (exponent is 0, mantissa field is non-zero). */
+ if (((value & 0x7F800000) == 0) && ((value & 0x7FFFFF) != 0))
+ return false;
+
+ *constant_ptr = value;
+ return true;
+}
+
const char *
output_vec_const_move (rtx *operands)
{
@@ -6511,6 +6632,28 @@ output_vec_const_move (rtx *operands)
gcc_unreachable ();
}
+ HOST_WIDE_INT xxspltiw_value = 0;
+ if (xxspltiw_constant_p (vec, mode, &xxspltiw_value))
+ {
+ /* Generate the smaller VSPLTIS{H,W} if we can. */
+ if (dest_vmx_p && mode == V8HImode)
+ {
+ long hi_value = ((xxspltiw_value & 0xffff) ^ 0x8000) - 0x8000;
+ if (IN_RANGE (hi_value, -16, 15))
+ {
+ operands[2] = GEN_INT (hi_value);
+ return "vspltish %0,%2";
+ }
+ }
+
+ operands[2] = GEN_INT (xxspltiw_value);
+ if (dest_vmx_p && mode == V4SImode
+ && IN_RANGE (xxspltiw_value, -16, 15))
+ return "vspltisw %0,%2";
+
+ return "xxspltiw %x0,%2";
+ }
+
if (TARGET_P9_VECTOR
&& xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
{
@@ -24008,6 +24151,7 @@ static struct rs6000_opt_mask const rs6000_opt_masks[] =
{ "string", 0, false, true },
{ "update", OPTION_MASK_NO_UPDATE, true , true },
{ "vsx", OPTION_MASK_VSX, false, true },
+ { "xxspltiw", OPTION_MASK_XXSPLTIW, false, true },
#ifdef OPTION_MASK_64BIT
#if TARGET_AIX_OS
{ "aix64", OPTION_MASK_64BIT, false, false },
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index 0dbdf753673..9c0ecf75a6d 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -619,3 +619,7 @@ Generate (do not generate) MMA instructions.
mrelative-jumptables
Target Undocumented Var(rs6000_relative_jumptables) Init(1) Save
+
+mxxspltiw
+Target Undocumented Mask(XXSPLTIW) Var(rs6000_isa_flags)
+Generate (do not generate) the XXSPLTIW instruction.
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index bd800c4ce0c..972f08bba1f 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -209,6 +209,9 @@
(define_mode_iterator VSINT_84 [V4SI V2DI DI SI])
(define_mode_iterator VSINT_842 [V8HI V4SI V2DI])
+;; Iterator for loading constants with xxspltiw
+(define_mode_iterator XXSPLTIW [V8HI V4SI V4SF])
+
;; Vector reverse byte modes
(define_mode_iterator VEC_REVB [V8HI V4SI V2DI V4SF V2DF V1TI])
@@ -1159,6 +1162,22 @@
[(set_attr "type" "vecperm")
(set_attr "length" "8")])
+;; XXSPLTIW support.
+(define_insn "*xxspltiw<mode>"
+ [(set (match_operand:XXSPLTIW 0 "vsx_register_operand" "=wa")
+ (match_operand:XXSPLTIW 1 "xxspltiw_operand"))]
+ "TARGET_XXSPLTIW"
+{
+ HOST_WIDE_INT value = 0;
+
+ if (!xxspltiw_constant_p (operands[1], <MODE>mode, &value))
+ gcc_unreachable ();
+
+ operands[2] = GEN_INT (value);
+ return "xxspltiw %x0,%2";
+}
+ [(set_attr "type" "vecperm")
+ (set_attr "prefixed" "yes")])
;; Prefer using vector registers over GPRs. Prefer using ISA 3.0's XXSPLTISB
;; or Altivec VSPLITW 0/-1 over XXLXOR/XXLORC to set a register to all 0's or
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2021-04-06 23:46 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-04-06 23:46 [gcc(refs/users/meissner/heads/work045)] Generate XXSPLTIW for V8HI, V4SI and V4SF constants Michael Meissner
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).