public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/users/meissner/heads/work049)] Generate XXSPLTI32DX for some float constants.
@ 2021-04-21 22:59 Michael Meissner
0 siblings, 0 replies; 4+ messages in thread
From: Michael Meissner @ 2021-04-21 22:59 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:1c4ed2f5afe847bbee385e86351123fcc283d03a
commit 1c4ed2f5afe847bbee385e86351123fcc283d03a
Author: Michael Meissner <meissner@linux.ibm.com>
Date: Wed Apr 21 18:58:59 2021 -0400
Generate XXSPLTI32DX for some float constants.
This patch generates XXSPLTI32DX for SF/DF floating point constants that
cannot be generated with the XXSPLTIDP instruction. In addition, it adds
support for using XXSPLTI32DX to load up V2DF constants, where both constants
are the same.
gcc/
2021-04-21 Michael Meissner <meissner@linux.ibm.com>
* config/rs6000/constraint.md (eD): New constraint.
* config/rs6000/predicates.md (easy_fp_constant): If the constant
can be loaded with XXSPLTI32DX, it is easy.
(xxsplti32dx_operand): New predicate.
(easy_vector_constant): If the constant can be loaded with
XXSPLTI32DX, it is easy.
* config/rs6000/rs6000-protos.h (xxsplti32dx_constant_p): New
declaration.
* config/rs6000/rs6000.c (rs6000_option_override_internal): Add
support for -mxxsplti32dx.
(+xxsplti32dx_constant_p): New function.
(output_vec_const_move): If the operand can be loaded with
XXSPLTI32DX, split it.
(rs6000_opt_masks): Add -mxxsplti32dx.
* config/rs6000/rs6000.md (movsf_hardfloat): Add support for
constants loaded with XXSPLTI32DX.
(mov<mode>_hardfloat32, FMOVE64 iterator): Add support for
constants loaded with XXSPLTI32DX.
(mov<mode>_hardfloat64, FMOVE64 iterator): Add support for
constants loaded with XXSPLTI32DX.
* config/rs6000/rs6000.opt (-mxxsplti32dx): New option.
* config/rs6000/vsx.md (UNSPEC_XXSPLTI32DX_CONST): New unspec.
(XXSPLTI32DX): New mode iterator.
(xxsplti32dx_<mode>): New insn and splitter for XXSPLTI32DX.
(xxsplti32dx_<mode>_first): New insn.
(xxsplti32dx_<mode>_second): New insn.
Diff:
---
gcc/config/rs6000/constraints.md | 6 +++
gcc/config/rs6000/predicates.md | 22 +++++++++++
gcc/config/rs6000/rs6000-protos.h | 2 +
gcc/config/rs6000/rs6000.c | 80 ++++++++++++++++++++++++++++++++++++++-
gcc/config/rs6000/rs6000.md | 67 ++++++++++++++++++++++----------
gcc/config/rs6000/rs6000.opt | 4 ++
gcc/config/rs6000/vsx.md | 75 ++++++++++++++++++++++++++++++++++++
7 files changed, 236 insertions(+), 20 deletions(-)
diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index e1fadd63580..d665e2a94db 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -208,6 +208,12 @@
(and (match_code "const_int")
(match_test "((- (unsigned HOST_WIDE_INT) ival) + 0x8000) < 0x10000")))
+;; SF/DF/V2DF/DI/V2DI scalar or vector constant that can be loaded with a pair
+;; of XXSPLTI32DX instructions.
+(define_constraint "eD"
+ "A vector constant that can be loaded with XXSPLTI32DX instructions."
+ (match_operand 0 "xxsplti32dx_operand"))
+
;; SF/DF/V2DF scalar or vector constant that can be loaded with XXSPLTIDP
(define_constraint "eF"
"A vector constant that can be loaded with the XXSPLTIDP instruction."
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 8c461ba2b76..7e99603c65d 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -606,6 +606,11 @@
if (xxspltidp_operand (op, mode))
return 1;
+ /* If we have the ISA 3.1 XXSPLTI32DX instruction, see if the constant can
+ be loaded with a pair of those instructions. */
+ if (xxsplti32dx_operand (op, mode))
+ return 1;
+
/* Otherwise consider floating point constants hard, so that the
constant gets pushed to memory during the early RTL phases. This
has the advantage that double precision constants that can be
@@ -684,6 +689,20 @@
return xxspltidp_constant_p (op, mode, &value);
})
+;; Return 1 if operand is a SF/DF CONST_DOUBLE or V2DF CONST_VECTOR that can be
+;; loaded via a pair f ISA 3.1 XXSPLTI32DX instructions. Do not return true if
+;; the value is 0.0 or it can be loaded with XXSPLTIDP, since that is easy to
+;; generate without using XXSPLTI32DX.
+(define_predicate "xxsplti32dx_operand"
+ (match_code "const_double,const_int,const_vector,vec_duplicate")
+{
+ if (op == CONST0_RTX (mode))
+ return false;
+
+ HOST_WIDE_INT high = 0, low = 0;
+ return xxsplti32dx_constant_p (op, mode, &high, &low);
+})
+
;; Return 1 if the operand is a CONST_VECTOR and can be loaded into a
;; vector register without using memory.
(define_predicate "easy_vector_constant"
@@ -703,6 +722,9 @@
if (xxspltidp_operand (op, mode))
return true;
+ if (xxsplti32dx_operand (op, mode))
+ return true;
+
if (TARGET_P9_VECTOR
&& xxspltib_constant_p (op, mode, &num_insns, &value))
return true;
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
index e87a51f42de..27fa17aeed9 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -33,6 +33,8 @@ extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, int, int, int,
extern bool easy_altivec_constant (rtx, machine_mode);
extern bool xxspltib_constant_p (rtx, machine_mode, int *, int *);
extern bool xxspltidp_constant_p (rtx, machine_mode, HOST_WIDE_INT *);
+extern bool xxsplti32dx_constant_p (rtx, machine_mode, HOST_WIDE_INT *,
+ HOST_WIDE_INT *);
extern int vspltis_shifted (rtx);
extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int);
extern bool macho_lo_sum_memory_operand (rtx, machine_mode);
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index d32437474f7..79ede7413a9 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -4481,6 +4481,9 @@ rs6000_option_override_internal (bool global_init_p)
if (TARGET_POWER10 && TARGET_VSX)
{
+ if ((rs6000_isa_flags_explicit & OPTION_MASK_XXSPLTI32DX) == 0)
+ rs6000_isa_flags |= OPTION_MASK_XXSPLTI32DX;
+
if ((rs6000_isa_flags_explicit & OPTION_MASK_XXSPLTIW) == 0)
rs6000_isa_flags |= OPTION_MASK_XXSPLTIW;
@@ -4488,7 +4491,9 @@ rs6000_option_override_internal (bool global_init_p)
rs6000_isa_flags |= OPTION_MASK_XXSPLTIDP;
}
else
- rs6000_isa_flags &= ~(OPTION_MASK_XXSPLTIW | OPTION_MASK_XXSPLTIDP);
+ rs6000_isa_flags &= ~(OPTION_MASK_XXSPLTIW
+ | OPTION_MASK_XXSPLTIDP
+ | OPTION_MASK_XXSPLTI32DX);
if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
@@ -6549,6 +6554,75 @@ xxspltidp_constant_p (rtx op,
return true;
}
+/* Return true if OP is of the given MODE and can be synthesized with ISA 3.1
+ XXSPLTI32DX instruction. If the instruction can be synthesized with
+ XXSPLTIDP or is 0/-1, return false.
+
+ Return the two 32-bit constants to use in the two XXSPLTI32DX instructions
+ via HIGH_PTR and LOW_PTR. */
+
+bool
+xxsplti32dx_constant_p (rtx op,
+ machine_mode mode,
+ HOST_WIDE_INT *high_ptr,
+ HOST_WIDE_INT *low_ptr)
+{
+ *high_ptr = *low_ptr = 0;
+
+ if (!TARGET_XXSPLTI32DX)
+ return false;
+
+ if (mode == VOIDmode)
+ mode = GET_MODE (op);
+
+ if (op == CONST0_RTX (mode))
+ return false;
+
+ rtx element = op;
+ machine_mode element_mode = mode;
+ if (mode == V2DFmode)
+ {
+ /* Handle VEC_DUPLICATE and CONST_VECTOR. */
+ if (GET_CODE (op) == VEC_DUPLICATE)
+ element = XEXP (op, 0);
+
+ else if (GET_CODE (op) == CONST_VECTOR)
+ {
+ element = CONST_VECTOR_ELT (op, 0);
+ if (!rtx_equal_p (element, CONST_VECTOR_ELT (op, 1)))
+ return false;
+ }
+
+ else
+ return false;
+
+ element_mode = GET_MODE_INNER (mode);
+ }
+
+ /* Handle floating point constants. */
+ if (element_mode == SFmode || element_mode == DFmode)
+ {
+ HOST_WIDE_INT xxspltidp_value = 0;
+
+ if (!CONST_DOUBLE_P (element))
+ return false;
+
+ if (xxspltidp_constant_p (element, mode, &xxspltidp_value))
+ return false;
+
+ long high_low[2];
+ const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (element);
+ REAL_VALUE_TO_TARGET_DOUBLE (*rv, high_low);
+
+ *high_ptr = high_low[0];
+ *low_ptr = high_low[1];
+ return true;
+ }
+
+ else
+ return false;
+}
+
const char *
output_vec_const_move (rtx *operands)
{
@@ -6597,6 +6671,9 @@ output_vec_const_move (rtx *operands)
|| xxspltidp_operand (vec, mode))
return "#";
+ if (xxsplti32dx_operand (vec, mode))
+ return "#";
+
if (TARGET_P9_VECTOR
&& xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
{
@@ -24124,6 +24201,7 @@ static struct rs6000_opt_mask const rs6000_opt_masks[] =
{ "string", 0, false, true },
{ "update", OPTION_MASK_NO_UPDATE, true , true },
{ "vsx", OPTION_MASK_VSX, false, true },
+ { "xxsplti32dx", OPTION_MASK_XXSPLTI32DX, false, true },
{ "xxspltiw", OPTION_MASK_XXSPLTIW, false, true },
{ "xxspltidp", OPTION_MASK_XXSPLTIDP, false, true },
#ifdef OPTION_MASK_64BIT
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 3d4dc820bdd..e1b27c9586c 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -7612,17 +7612,17 @@
;;
;; LWZ LFS LXSSP LXSSPX STFS STXSSP
;; STXSSPX STW XXLXOR LI FMR XSCPSGNDP
-;; MR MT<x> MF<x> NOP XXSPLTIDP
+;; MR MT<x> MF<x> NOP XXSPLTIDP XXSPLTI32DX
(define_insn "movsf_hardfloat"
[(set (match_operand:SF 0 "nonimmediate_operand"
"=!r, f, v, wa, m, wY,
Z, m, wa, !r, f, wa,
- !r, *c*l, !r, *h, wa")
+ !r, *c*l, !r, *h, wa, wa")
(match_operand:SF 1 "input_operand"
"m, m, wY, Z, f, v,
wa, r, j, j, f, wa,
- r, r, *h, 0, eF"))]
+ r, r, *h, 0, eF, eD"))]
"(register_operand (operands[0], SFmode)
|| register_operand (operands[1], SFmode))
&& TARGET_HARD_FLOAT
@@ -7645,19 +7645,28 @@
mt%0 %1
mf%1 %0
nop
+ #
#"
[(set_attr "type"
"load, fpload, fpload, fpload, fpstore, fpstore,
fpstore, store, veclogical, integer, fpsimple, fpsimple,
- *, mtjmpr, mfjmpr, *, vecperm")
+ *, mtjmpr, mfjmpr, *, vecperm, vecperm")
(set_attr "isa"
"*, *, p9v, p8v, *, p9v,
p8v, *, *, *, *, *,
- *, *, *, *, p10")
+ *, *, *, *, p10, p10")
(set_attr "prefixed"
"*, *, *, *, *, *,
*, *, *, *, *, *,
- *, *, *, *, yes")])
+ *, *, *, *, yes, yes")
+ (set_attr "max_prefixed_insns"
+ "*, *, *, *, *, *,
+ *, *, *, *, *, *,
+ *, *, *, *, *, 2")
+ (set_attr "num_insns"
+ "*, *, *, *, *, *,
+ *, *, *, *, *, *,
+ *, *, *, *, *, 2")])
;; LWZ LFIWZX STW STFIWX MTVSRWZ MFVSRWZ
;; FMR MR MT%0 MF%1 NOP
@@ -7917,18 +7926,18 @@
;; STFD LFD FMR LXSD STXSD
;; LXSD STXSD XXLOR XXLXOR GPR<-0
-;; LWZ STW MR XXSPLTIDP
+;; LWZ STW MR XXSPLTIDP XXSPLTI32DX
(define_insn "*mov<mode>_hardfloat32"
[(set (match_operand:FMOVE64 0 "nonimmediate_operand"
"=m, d, d, <f64_p9>, wY,
<f64_av>, Z, <f64_vsx>, <f64_vsx>, !r,
- Y, r, !r, wa")
+ Y, r, !r, wa, wa")
(match_operand:FMOVE64 1 "input_operand"
"d, m, d, wY, <f64_p9>,
Z, <f64_av>, <f64_vsx>, <zero_fp>, <zero_fp>,
- r, Y, r, eF"))]
+ r, Y, r, eF, eD"))]
"! TARGET_POWERPC64 && TARGET_HARD_FLOAT
&& (gpc_reg_operand (operands[0], <MODE>mode)
|| gpc_reg_operand (operands[1], <MODE>mode))"
@@ -7946,24 +7955,33 @@
#
#
#
+ #
#"
[(set_attr "type"
"fpstore, fpload, fpsimple, fpload, fpstore,
fpload, fpstore, veclogical, veclogical, two,
- store, load, two, vecperm")
+ store, load, two, vecperm, vecperm")
(set_attr "size" "64")
(set_attr "length"
"*, *, *, *, *,
*, *, *, *, 8,
- 8, 8, 8, *")
+ 8, 8, 8, *, *")
(set_attr "isa"
"*, *, *, p9v, p9v,
p7v, p7v, *, *, *,
- *, *, *, p10")
+ *, *, *, p10, p10")
(set_attr "prefixed"
"*, *, *, *, *,
*, *, *, *, *,
- *, *, *, yes")])
+ *, *, *, yes, yes")
+ (set_attr "max_prefixed_insns"
+ "*, *, *, *, *,
+ *, *, *, *, *,
+ *, *, *, *, 2")
+ (set_attr "num_insns"
+ "*, *, *, *, *,
+ *, *, *, *, *,
+ *, *, *, *, 2")])
;; STW LWZ MR G-const H-const F-const
@@ -7990,19 +8008,19 @@
;; STFD LFD FMR LXSD STXSD
;; LXSDX STXSDX XXLOR XXLXOR LI 0
;; STD LD MR MT{CTR,LR} MF{CTR,LR}
-;; NOP MFVSRD MTVSRD XXSPLTIDP
+;; NOP MFVSRD MTVSRD XXSPLTIDP XXSPLTI32DX
(define_insn "*mov<mode>_hardfloat64"
[(set (match_operand:FMOVE64 0 "nonimmediate_operand"
"=m, d, d, <f64_p9>, wY,
<f64_av>, Z, <f64_vsx>, <f64_vsx>, !r,
YZ, r, !r, *c*l, !r,
- *h, r, <f64_dm>, wa")
+ *h, r, <f64_dm>, wa, wa")
(match_operand:FMOVE64 1 "input_operand"
"d, m, d, wY, <f64_p9>,
Z, <f64_av>, <f64_vsx>, <zero_fp>, <zero_fp>,
r, YZ, r, r, *h,
- 0, <f64_dm>, r, eF"))]
+ 0, <f64_dm>, r, eF, eD"))]
"TARGET_POWERPC64 && TARGET_HARD_FLOAT
&& (gpc_reg_operand (operands[0], <MODE>mode)
|| gpc_reg_operand (operands[1], <MODE>mode))"
@@ -8025,23 +8043,34 @@
nop
mfvsrd %0,%x1
mtvsrd %x0,%1
+ #
#"
[(set_attr "type"
"fpstore, fpload, fpsimple, fpload, fpstore,
fpload, fpstore, veclogical, veclogical, integer,
store, load, *, mtjmpr, mfjmpr,
- *, mfvsr, mtvsr, vecperm")
+ *, mfvsr, mtvsr, vecperm, vecperm")
(set_attr "size" "64")
(set_attr "isa"
"*, *, *, p9v, p9v,
p7v, p7v, *, *, *,
*, *, *, *, *,
- *, p8v, p8v, p10")
+ *, p8v, p8v, p10, p10")
(set_attr "prefixed"
"*, *, *, *, *,
*, *, *, *, *,
*, *, *, *, *,
- *, *, *, yes")])
+ *, *, *, yes, yes")
+ (set_attr "max_prefixed_insns"
+ "*, *, *, *, *,
+ *, *, *, *, *,
+ *, *, *, *, *,
+ *, *, *, *, 2")
+ (set_attr "num_insns"
+ "*, *, *, *, *,
+ *, *, *, *, *,
+ *, *, *, *, *,
+ *, *, *, *, *")])
;; STD LD MR MT<SPR> MF<SPR> G-const
;; H-const F-const Special
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index 6620cdb7716..bd269369ca0 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -627,3 +627,7 @@ Generate (do not generate) the XXSPLTIW instruction.
mxxspltidp
Target Undocumented Mask(XXSPLTIDP) Var(rs6000_isa_flags)
Generate (do not generate) the XXSPLTIDP instruction.
+
+mxxsplti32dx
+Target Undocumented Mask(XXSPLTI32DX) Var(rs6000_isa_flags)
+Generate (do not generate) the XXSPLTI32DX instruction.
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 44cd380bfa7..22b12fd238e 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -386,6 +386,7 @@
UNSPEC_VDIVEU
UNSPEC_XXSPLTIDP
UNSPEC_XXSPLTI32DX
+ UNSPEC_XXSPLTI32DX_CONST
UNSPEC_XXPERMX
UNSPEC_XXEVAL
UNSPEC_XXBLEND
@@ -6346,6 +6347,80 @@
DONE;
})
+;; XXSPLTI32DX used to create 64-bit constants or vector constants where the
+;; even elements match and the odd elements match.
+(define_mode_iterator XXSPLTI32DX [SF DF V2DF])
+
+(define_insn_and_split "*xxsplti32dx_<mode>"
+ [(set (match_operand:XXSPLTI32DX 0 "vsx_register_operand" "=wa")
+ (match_operand:XXSPLTI32DX 1 "xxsplti32dx_operand"))]
+ "TARGET_XXSPLTI32DX"
+ "#"
+ "&& 1"
+ [(set (match_dup 0)
+ (unspec:XXSPLTI32DX [(match_dup 2)
+ (match_dup 3)] UNSPEC_XXSPLTI32DX_CONST))
+ (set (match_dup 0)
+ (unspec:XXSPLTI32DX [(match_dup 0)
+ (match_dup 4)
+ (match_dup 5)] UNSPEC_XXSPLTI32DX_CONST))]
+{
+ HOST_WIDE_INT high = 0, low = 0;
+
+ if (!xxsplti32dx_constant_p (operands[1], <MODE>mode, &high, &low))
+ gcc_unreachable ();
+
+ if (!BYTES_BIG_ENDIAN)
+ std::swap (high, low);
+
+ /* If the low bits are 0 or all 1s, initialize that word first. This way we
+ can use a smaller XXSPLTIB instruction instead the first XXSPLTI32DX. */
+ if (low == 0 || low == -1)
+ {
+ operands[2] = const1_rtx;
+ operands[3] = GEN_INT (low);
+ operands[4] = const0_rtx;
+ operands[5] = GEN_INT (high);
+ }
+ else
+ {
+ operands[2] = const0_rtx;
+ operands[3] = GEN_INT (high);
+ operands[4] = const1_rtx;
+ operands[5] = GEN_INT (low);
+ }
+}
+ [(set_attr "type" "vecperm")
+ (set_attr "prefixed" "yes")
+ (set_attr "num_insns" "2")
+ (set_attr "max_prefixed_insns" "2")])
+
+;; First word of XXSPLTI32DX
+(define_insn "*xxsplti32dx_<mode>_first"
+ [(set (match_operand:XXSPLTI32DX 0 "vsx_register_operand" "=wa,wa,wa")
+ (unspec:XXSPLTI32DX [(match_operand 1 "u1bit_cint_operand" "n,n,n")
+ (match_operand 2 "const_int_operand" "O,wM,n")]
+ UNSPEC_XXSPLTI32DX_CONST))]
+ "TARGET_XXSPLTI32DX"
+ "@
+ xxspltib %x0,0
+ xxspltib %x0,255
+ xxsplti32dx %x0,%1,%2"
+ [(set_attr "type" "vecperm")
+ (set_attr "prefixed" "*,*,yes")])
+
+;; Second word of XXSPLTI32DX
+(define_insn "*xxsplti32dx_<mode>_second"
+ [(set (match_operand:XXSPLTI32DX 0 "vsx_register_operand" "=wa")
+ (unspec:XXSPLTI32DX [(match_operand:XXSPLTI32DX 1 "vsx_register_operand" "0")
+ (match_operand 2 "u1bit_cint_operand" "n")
+ (match_operand 3 "const_int_operand" "n")]
+ UNSPEC_XXSPLTI32DX_CONST))]
+ "TARGET_XXSPLTI32DX"
+ "xxsplti32dx %x0,%2,%3"
+ [(set_attr "type" "vecperm")
+ (set_attr "prefixed" "yes")])
+
;; XXSPLTI32DX built-in function support.
(define_expand "xxsplti32dx_v4si"
[(set (match_operand:V4SI 0 "register_operand" "=wa")
^ permalink raw reply [flat|nested] 4+ messages in thread
* [gcc(refs/users/meissner/heads/work049)] Generate XXSPLTI32DX for some float constants.
@ 2021-04-22 3:05 Michael Meissner
0 siblings, 0 replies; 4+ messages in thread
From: Michael Meissner @ 2021-04-22 3:05 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:36e5987d78dca4bb656b43e0ad780224bef6c06f
commit 36e5987d78dca4bb656b43e0ad780224bef6c06f
Author: Michael Meissner <meissner@linux.ibm.com>
Date: Wed Apr 21 23:04:41 2021 -0400
Generate XXSPLTI32DX for some float constants.
This patch generates XXSPLTI32DX for SF/DF floating point constants that
cannot be generated with the XXSPLTIDP instruction. In addition, it adds
support for using XXSPLTI32DX to load up V2DF constants, where both constants
are the same.
gcc/
2021-04-21 Michael Meissner <meissner@linux.ibm.com>
* config/rs6000/constraint.md (eD): New constraint.
* config/rs6000/predicates.md (easy_fp_constant): If the constant
can be loaded with XXSPLTI32DX, it is easy.
(xxsplti32dx_operand): New predicate.
(easy_vector_constant): If the constant can be loaded with
XXSPLTI32DX, it is easy.
* config/rs6000/rs6000-protos.h (xxsplti32dx_constant_p): New
declaration.
* config/rs6000/rs6000.c (rs6000_option_override_internal): Add
support for -mxxsplti32dx.
(const_vector_element_all_same): New helper function.
(xxspltidp_constant_p): Move CONST_VECTOR and VEC_DUPLICATE code
into const_vector_element_all_same and call it.
(xxsplti32dx_constant_float_p): New helper function.
(xxsplti32dx_constant_p): New function.
(output_vec_const_move): If the operand can be loaded with
XXSPLTI32DX, split it.
(rs6000_opt_masks): Add -mxxsplti32dx.
* config/rs6000/rs6000.md (movsf_hardfloat): Add support for
constants loaded with XXSPLTI32DX.
(mov<mode>_hardfloat32, FMOVE64 iterator): Add support for
constants loaded with XXSPLTI32DX.
(mov<mode>_hardfloat64, FMOVE64 iterator): Add support for
constants loaded with XXSPLTI32DX.
* config/rs6000/rs6000.opt (-mxxsplti32dx): New option.
* config/rs6000/vsx.md (UNSPEC_XXSPLTI32DX_CONST): New unspec.
(XXSPLTI32DX): New mode iterator.
(xxsplti32dx_<mode>): New insn and splitter for XXSPLTI32DX.
(xxsplti32dx_<mode>_first): New insn.
(xxsplti32dx_<mode>_second): New insn.
Diff:
---
gcc/config/rs6000/constraints.md | 6 ++
gcc/config/rs6000/predicates.md | 18 ++++
gcc/config/rs6000/rs6000-protos.h | 2 +
gcc/config/rs6000/rs6000.c | 171 ++++++++++++++++++++++++++++++++++----
gcc/config/rs6000/rs6000.md | 67 ++++++++++-----
gcc/config/rs6000/rs6000.opt | 4 +
gcc/config/rs6000/vsx.md | 72 ++++++++++++++++
7 files changed, 307 insertions(+), 33 deletions(-)
diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index e1fadd63580..d665e2a94db 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -208,6 +208,12 @@
(and (match_code "const_int")
(match_test "((- (unsigned HOST_WIDE_INT) ival) + 0x8000) < 0x10000")))
+;; SF/DF/V2DF/DI/V2DI scalar or vector constant that can be loaded with a pair
+;; of XXSPLTI32DX instructions.
+(define_constraint "eD"
+ "A vector constant that can be loaded with XXSPLTI32DX instructions."
+ (match_operand 0 "xxsplti32dx_operand"))
+
;; SF/DF/V2DF scalar or vector constant that can be loaded with XXSPLTIDP
(define_constraint "eF"
"A vector constant that can be loaded with the XXSPLTIDP instruction."
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 8c461ba2b76..fc30b69018d 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -606,6 +606,11 @@
if (xxspltidp_operand (op, mode))
return 1;
+ /* If we have the ISA 3.1 XXSPLTI32DX instruction, see if the constant can
+ be loaded with a pair of those instructions. */
+ if (xxsplti32dx_operand (op, mode))
+ return 1;
+
/* Otherwise consider floating point constants hard, so that the
constant gets pushed to memory during the early RTL phases. This
has the advantage that double precision constants that can be
@@ -684,6 +689,16 @@
return xxspltidp_constant_p (op, mode, &value);
})
+;; Return 1 if operand is a SF/DF CONST_DOUBLE or V2DF CONST_VECTOR that can be
+;; loaded via a pair f ISA 3.1 XXSPLTI32DX instructions. Do not return true if
+;; the value can be loaded with the XXSPLTIDP instruction or XXSPLTIB to load 0.
+(define_predicate "xxsplti32dx_operand"
+ (match_code "const_double,const_vector,vec_duplicate")
+{
+ HOST_WIDE_INT high = 0, low = 0;
+ return xxsplti32dx_constant_p (op, mode, &high, &low);
+})
+
;; Return 1 if the operand is a CONST_VECTOR and can be loaded into a
;; vector register without using memory.
(define_predicate "easy_vector_constant"
@@ -703,6 +718,9 @@
if (xxspltidp_operand (op, mode))
return true;
+ if (xxsplti32dx_operand (op, mode))
+ return true;
+
if (TARGET_P9_VECTOR
&& xxspltib_constant_p (op, mode, &num_insns, &value))
return true;
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
index e87a51f42de..27fa17aeed9 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -33,6 +33,8 @@ extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, int, int, int,
extern bool easy_altivec_constant (rtx, machine_mode);
extern bool xxspltib_constant_p (rtx, machine_mode, int *, int *);
extern bool xxspltidp_constant_p (rtx, machine_mode, HOST_WIDE_INT *);
+extern bool xxsplti32dx_constant_p (rtx, machine_mode, HOST_WIDE_INT *,
+ HOST_WIDE_INT *);
extern int vspltis_shifted (rtx);
extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int);
extern bool macho_lo_sum_memory_operand (rtx, machine_mode);
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index d32437474f7..d931388d778 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -4481,6 +4481,9 @@ rs6000_option_override_internal (bool global_init_p)
if (TARGET_POWER10 && TARGET_VSX)
{
+ if ((rs6000_isa_flags_explicit & OPTION_MASK_XXSPLTI32DX) == 0)
+ rs6000_isa_flags |= OPTION_MASK_XXSPLTI32DX;
+
if ((rs6000_isa_flags_explicit & OPTION_MASK_XXSPLTIW) == 0)
rs6000_isa_flags |= OPTION_MASK_XXSPLTIW;
@@ -4488,7 +4491,9 @@ rs6000_option_override_internal (bool global_init_p)
rs6000_isa_flags |= OPTION_MASK_XXSPLTIDP;
}
else
- rs6000_isa_flags &= ~(OPTION_MASK_XXSPLTIW | OPTION_MASK_XXSPLTIDP);
+ rs6000_isa_flags &= ~(OPTION_MASK_XXSPLTIW
+ | OPTION_MASK_XXSPLTIDP
+ | OPTION_MASK_XXSPLTI32DX);
if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
@@ -6480,6 +6485,37 @@ xxspltib_constant_p (rtx op,
return true;
}
+/* Return the element of a constant vector whose elements are all the same. In
+ addition if VEC_DUPLICATE is used, return the element being duplicated. If
+ neither is true, return NULL_RTX. */
+
+static rtx
+const_vector_element_all_same (rtx op)
+{
+ if (GET_CODE (op) == VEC_DUPLICATE)
+ {
+ rtx element = XEXP (op, 0);
+ return (CONST_INT_P (element) || CONST_DOUBLE_P (element)
+ ? element
+ : NULL_RTX);
+ }
+
+ else if (GET_CODE (op) == CONST_VECTOR)
+ {
+ machine_mode mode = GET_MODE (op);
+ size_t n_elts = GET_MODE_NUNITS (mode);
+ rtx element = CONST_VECTOR_ELT (op, 0);
+
+ for (size_t i = 1; i < n_elts; i++)
+ if (!rtx_equal_p (element, CONST_VECTOR_ELT (op, 1)))
+ return NULL_RTX;
+
+ return element;
+ }
+
+ return NULL_RTX;
+}
+
/* Return true if OP is of the given MODE and can be synthesized with ISA 3.1
XXSPLTIDP instruction.
@@ -6502,19 +6538,9 @@ xxspltidp_constant_p (rtx op,
rtx element = op;
if (mode == V2DFmode)
{
- /* Handle VEC_DUPLICATE and CONST_VECTOR. */
- if (GET_CODE (op) == VEC_DUPLICATE)
- element = XEXP (op, 0);
-
- else if (GET_CODE (op) == CONST_VECTOR)
- {
- element = CONST_VECTOR_ELT (op, 0);
- if (!rtx_equal_p (element, CONST_VECTOR_ELT (op, 1)))
- return false;
- }
-
- else
- return false;
+ element = const_vector_element_all_same (op);
+ if (!element)
+ return false;
mode = DFmode;
}
@@ -6549,6 +6575,119 @@ xxspltidp_constant_p (rtx op,
return true;
}
+/* Return true if OP is a floating point constant that can be loaded with the
+ XXSPLTI32DX instruction. If the constant can be loaded with the simpler
+ XXSPLTIDP (constants that can fit as SFmode constants) or XXSPLTIB (0.0)
+ instructions, return false.
+
+ Return the two 32-bit constants to use in the two XXSPLTI32DX instructions
+ via HIGH_PTR and LOW_PTR. */
+
+static bool
+xxsplti32dx_constant_float_p (rtx op,
+ machine_mode mode,
+ HOST_WIDE_INT *high_ptr,
+ HOST_WIDE_INT *low_ptr)
+{
+ HOST_WIDE_INT xxspltidp_value = 0;
+
+ if (!CONST_DOUBLE_P (op))
+ return false;
+
+ if (mode != SFmode && mode != DFmode)
+ return false;
+
+ if (op == CONST0_RTX (mode))
+ return false;
+
+ if (xxspltidp_constant_p (op, mode, &xxspltidp_value))
+ return false;
+
+ long high_low[2];
+ const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (op);
+ REAL_VALUE_TO_TARGET_DOUBLE (*rv, high_low);
+
+ /* The double precision value is laid out in memory order. We need to undo
+ this for XXSPLTI32DX. */
+ if (!BYTES_BIG_ENDIAN)
+ std::swap (high_low[0], high_low[1]);
+
+ *high_ptr = high_low[0];
+ *low_ptr = high_low[1];
+ return true;
+}
+
+/* Return true if OP is of the given MODE and can be synthesized with ISA 3.1
+ XXSPLTI32DX instruction. If the instruction can be synthesized with
+ XXSPLTIDP or is 0/-1, return false.
+
+ We handle the following types of constants:
+
+ 1) vector double constants where each element is the same and you can't
+ load the constant with XXSPLTIDP;
+
+ 2) vector long long constants where each element is the same;
+
+ 3) Scalar floating point constants that can't be loaded with XXSPLTIDP.
+
+ Return the two 32-bit constants to use in the two XXSPLTI32DX instructions
+ via HIGH_PTR and LOW_PTR. */
+
+bool
+xxsplti32dx_constant_p (rtx op,
+ machine_mode mode,
+ HOST_WIDE_INT *high_ptr,
+ HOST_WIDE_INT *low_ptr)
+{
+ *high_ptr = *low_ptr = 0;
+
+ if (!TARGET_XXSPLTI32DX)
+ return false;
+
+ if (mode == VOIDmode)
+ mode = GET_MODE (op);
+
+ if (op == CONST0_RTX (mode))
+ return false;
+
+ switch (mode)
+ {
+ default:
+ break;
+
+ case E_V2DFmode:
+ {
+ rtx ele = const_vector_element_all_same (op);
+ if (!ele)
+ return false;
+
+ return xxsplti32dx_constant_float_p (ele, DFmode, high_ptr, low_ptr);
+ }
+
+ case E_SFmode:
+ case E_DFmode:
+ return xxsplti32dx_constant_float_p (op, mode, high_ptr, low_ptr);
+
+ case E_V2DImode:
+ {
+ rtx ele = const_vector_element_all_same (op);
+ if (!ele)
+ return false;
+
+ /* If we can generate XXSPLTIB and VEXTSB2D, don't return true. */
+ HOST_WIDE_INT value = INTVAL (ele);
+ if (IN_RANGE (value, -128, 127))
+ return false;
+
+ *high_ptr = value >> 32;
+ *low_ptr = value & 0xffffffff;
+ return true;
+ }
+ }
+
+ return false;
+}
+
const char *
output_vec_const_move (rtx *operands)
{
@@ -6597,6 +6736,9 @@ output_vec_const_move (rtx *operands)
|| xxspltidp_operand (vec, mode))
return "#";
+ if (xxsplti32dx_operand (vec, mode))
+ return "#";
+
if (TARGET_P9_VECTOR
&& xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
{
@@ -24124,6 +24266,7 @@ static struct rs6000_opt_mask const rs6000_opt_masks[] =
{ "string", 0, false, true },
{ "update", OPTION_MASK_NO_UPDATE, true , true },
{ "vsx", OPTION_MASK_VSX, false, true },
+ { "xxsplti32dx", OPTION_MASK_XXSPLTI32DX, false, true },
{ "xxspltiw", OPTION_MASK_XXSPLTIW, false, true },
{ "xxspltidp", OPTION_MASK_XXSPLTIDP, false, true },
#ifdef OPTION_MASK_64BIT
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 3d4dc820bdd..e1b27c9586c 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -7612,17 +7612,17 @@
;;
;; LWZ LFS LXSSP LXSSPX STFS STXSSP
;; STXSSPX STW XXLXOR LI FMR XSCPSGNDP
-;; MR MT<x> MF<x> NOP XXSPLTIDP
+;; MR MT<x> MF<x> NOP XXSPLTIDP XXSPLTI32DX
(define_insn "movsf_hardfloat"
[(set (match_operand:SF 0 "nonimmediate_operand"
"=!r, f, v, wa, m, wY,
Z, m, wa, !r, f, wa,
- !r, *c*l, !r, *h, wa")
+ !r, *c*l, !r, *h, wa, wa")
(match_operand:SF 1 "input_operand"
"m, m, wY, Z, f, v,
wa, r, j, j, f, wa,
- r, r, *h, 0, eF"))]
+ r, r, *h, 0, eF, eD"))]
"(register_operand (operands[0], SFmode)
|| register_operand (operands[1], SFmode))
&& TARGET_HARD_FLOAT
@@ -7645,19 +7645,28 @@
mt%0 %1
mf%1 %0
nop
+ #
#"
[(set_attr "type"
"load, fpload, fpload, fpload, fpstore, fpstore,
fpstore, store, veclogical, integer, fpsimple, fpsimple,
- *, mtjmpr, mfjmpr, *, vecperm")
+ *, mtjmpr, mfjmpr, *, vecperm, vecperm")
(set_attr "isa"
"*, *, p9v, p8v, *, p9v,
p8v, *, *, *, *, *,
- *, *, *, *, p10")
+ *, *, *, *, p10, p10")
(set_attr "prefixed"
"*, *, *, *, *, *,
*, *, *, *, *, *,
- *, *, *, *, yes")])
+ *, *, *, *, yes, yes")
+ (set_attr "max_prefixed_insns"
+ "*, *, *, *, *, *,
+ *, *, *, *, *, *,
+ *, *, *, *, *, 2")
+ (set_attr "num_insns"
+ "*, *, *, *, *, *,
+ *, *, *, *, *, *,
+ *, *, *, *, *, 2")])
;; LWZ LFIWZX STW STFIWX MTVSRWZ MFVSRWZ
;; FMR MR MT%0 MF%1 NOP
@@ -7917,18 +7926,18 @@
;; STFD LFD FMR LXSD STXSD
;; LXSD STXSD XXLOR XXLXOR GPR<-0
-;; LWZ STW MR XXSPLTIDP
+;; LWZ STW MR XXSPLTIDP XXSPLTI32DX
(define_insn "*mov<mode>_hardfloat32"
[(set (match_operand:FMOVE64 0 "nonimmediate_operand"
"=m, d, d, <f64_p9>, wY,
<f64_av>, Z, <f64_vsx>, <f64_vsx>, !r,
- Y, r, !r, wa")
+ Y, r, !r, wa, wa")
(match_operand:FMOVE64 1 "input_operand"
"d, m, d, wY, <f64_p9>,
Z, <f64_av>, <f64_vsx>, <zero_fp>, <zero_fp>,
- r, Y, r, eF"))]
+ r, Y, r, eF, eD"))]
"! TARGET_POWERPC64 && TARGET_HARD_FLOAT
&& (gpc_reg_operand (operands[0], <MODE>mode)
|| gpc_reg_operand (operands[1], <MODE>mode))"
@@ -7946,24 +7955,33 @@
#
#
#
+ #
#"
[(set_attr "type"
"fpstore, fpload, fpsimple, fpload, fpstore,
fpload, fpstore, veclogical, veclogical, two,
- store, load, two, vecperm")
+ store, load, two, vecperm, vecperm")
(set_attr "size" "64")
(set_attr "length"
"*, *, *, *, *,
*, *, *, *, 8,
- 8, 8, 8, *")
+ 8, 8, 8, *, *")
(set_attr "isa"
"*, *, *, p9v, p9v,
p7v, p7v, *, *, *,
- *, *, *, p10")
+ *, *, *, p10, p10")
(set_attr "prefixed"
"*, *, *, *, *,
*, *, *, *, *,
- *, *, *, yes")])
+ *, *, *, yes, yes")
+ (set_attr "max_prefixed_insns"
+ "*, *, *, *, *,
+ *, *, *, *, *,
+ *, *, *, *, 2")
+ (set_attr "num_insns"
+ "*, *, *, *, *,
+ *, *, *, *, *,
+ *, *, *, *, 2")])
;; STW LWZ MR G-const H-const F-const
@@ -7990,19 +8008,19 @@
;; STFD LFD FMR LXSD STXSD
;; LXSDX STXSDX XXLOR XXLXOR LI 0
;; STD LD MR MT{CTR,LR} MF{CTR,LR}
-;; NOP MFVSRD MTVSRD XXSPLTIDP
+;; NOP MFVSRD MTVSRD XXSPLTIDP XXSPLTI32DX
(define_insn "*mov<mode>_hardfloat64"
[(set (match_operand:FMOVE64 0 "nonimmediate_operand"
"=m, d, d, <f64_p9>, wY,
<f64_av>, Z, <f64_vsx>, <f64_vsx>, !r,
YZ, r, !r, *c*l, !r,
- *h, r, <f64_dm>, wa")
+ *h, r, <f64_dm>, wa, wa")
(match_operand:FMOVE64 1 "input_operand"
"d, m, d, wY, <f64_p9>,
Z, <f64_av>, <f64_vsx>, <zero_fp>, <zero_fp>,
r, YZ, r, r, *h,
- 0, <f64_dm>, r, eF"))]
+ 0, <f64_dm>, r, eF, eD"))]
"TARGET_POWERPC64 && TARGET_HARD_FLOAT
&& (gpc_reg_operand (operands[0], <MODE>mode)
|| gpc_reg_operand (operands[1], <MODE>mode))"
@@ -8025,23 +8043,34 @@
nop
mfvsrd %0,%x1
mtvsrd %x0,%1
+ #
#"
[(set_attr "type"
"fpstore, fpload, fpsimple, fpload, fpstore,
fpload, fpstore, veclogical, veclogical, integer,
store, load, *, mtjmpr, mfjmpr,
- *, mfvsr, mtvsr, vecperm")
+ *, mfvsr, mtvsr, vecperm, vecperm")
(set_attr "size" "64")
(set_attr "isa"
"*, *, *, p9v, p9v,
p7v, p7v, *, *, *,
*, *, *, *, *,
- *, p8v, p8v, p10")
+ *, p8v, p8v, p10, p10")
(set_attr "prefixed"
"*, *, *, *, *,
*, *, *, *, *,
*, *, *, *, *,
- *, *, *, yes")])
+ *, *, *, yes, yes")
+ (set_attr "max_prefixed_insns"
+ "*, *, *, *, *,
+ *, *, *, *, *,
+ *, *, *, *, *,
+ *, *, *, *, 2")
+ (set_attr "num_insns"
+ "*, *, *, *, *,
+ *, *, *, *, *,
+ *, *, *, *, *,
+ *, *, *, *, *")])
;; STD LD MR MT<SPR> MF<SPR> G-const
;; H-const F-const Special
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index 6620cdb7716..bd269369ca0 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -627,3 +627,7 @@ Generate (do not generate) the XXSPLTIW instruction.
mxxspltidp
Target Undocumented Mask(XXSPLTIDP) Var(rs6000_isa_flags)
Generate (do not generate) the XXSPLTIDP instruction.
+
+mxxsplti32dx
+Target Undocumented Mask(XXSPLTI32DX) Var(rs6000_isa_flags)
+Generate (do not generate) the XXSPLTI32DX instruction.
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 44cd380bfa7..0efe77489b6 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -386,6 +386,7 @@
UNSPEC_VDIVEU
UNSPEC_XXSPLTIDP
UNSPEC_XXSPLTI32DX
+ UNSPEC_XXSPLTI32DX_CONST
UNSPEC_XXPERMX
UNSPEC_XXEVAL
UNSPEC_XXBLEND
@@ -6346,6 +6347,77 @@
DONE;
})
+;; XXSPLTI32DX used to create 64-bit constants or vector constants where the
+;; even elements match and the odd elements match.
+(define_mode_iterator XXSPLTI32DX [SF DF V2DF V2DI])
+
+(define_insn_and_split "*xxsplti32dx_<mode>"
+ [(set (match_operand:XXSPLTI32DX 0 "vsx_register_operand" "=wa")
+ (match_operand:XXSPLTI32DX 1 "xxsplti32dx_operand"))]
+ "TARGET_XXSPLTI32DX"
+ "#"
+ "&& 1"
+ [(set (match_dup 0)
+ (unspec:XXSPLTI32DX [(match_dup 2)
+ (match_dup 3)] UNSPEC_XXSPLTI32DX_CONST))
+ (set (match_dup 0)
+ (unspec:XXSPLTI32DX [(match_dup 0)
+ (match_dup 4)
+ (match_dup 5)] UNSPEC_XXSPLTI32DX_CONST))]
+{
+ HOST_WIDE_INT high = 0, low = 0;
+
+ if (!xxsplti32dx_constant_p (operands[1], <MODE>mode, &high, &low))
+ gcc_unreachable ();
+
+ /* If the low bits are 0 or all 1s, initialize that word first. This way we
+ can use a smaller XXSPLTIB instruction instead the first XXSPLTI32DX. */
+ if (low == 0 || low == -1)
+ {
+ operands[2] = const1_rtx;
+ operands[3] = GEN_INT (low);
+ operands[4] = const0_rtx;
+ operands[5] = GEN_INT (high);
+ }
+ else
+ {
+ operands[2] = const0_rtx;
+ operands[3] = GEN_INT (high);
+ operands[4] = const1_rtx;
+ operands[5] = GEN_INT (low);
+ }
+}
+ [(set_attr "type" "vecperm")
+ (set_attr "prefixed" "yes")
+ (set_attr "num_insns" "2")
+ (set_attr "max_prefixed_insns" "2")])
+
+;; First word of XXSPLTI32DX
+(define_insn "*xxsplti32dx_<mode>_first"
+ [(set (match_operand:XXSPLTI32DX 0 "vsx_register_operand" "=wa,wa,wa")
+ (unspec:XXSPLTI32DX [(match_operand 1 "u1bit_cint_operand" "n,n,n")
+ (match_operand 2 "const_int_operand" "O,wM,n")]
+ UNSPEC_XXSPLTI32DX_CONST))]
+ "TARGET_XXSPLTI32DX"
+ "@
+ xxspltib %x0,0
+ xxspltib %x0,255
+ xxsplti32dx %x0,%1,%2"
+ [(set_attr "type" "vecperm")
+ (set_attr "prefixed" "*,*,yes")])
+
+;; Second word of XXSPLTI32DX
+(define_insn "*xxsplti32dx_<mode>_second"
+ [(set (match_operand:XXSPLTI32DX 0 "vsx_register_operand" "=wa")
+ (unspec:XXSPLTI32DX [(match_operand:XXSPLTI32DX 1 "vsx_register_operand" "0")
+ (match_operand 2 "u1bit_cint_operand" "n")
+ (match_operand 3 "const_int_operand" "n")]
+ UNSPEC_XXSPLTI32DX_CONST))]
+ "TARGET_XXSPLTI32DX"
+ "xxsplti32dx %x0,%2,%3"
+ [(set_attr "type" "vecperm")
+ (set_attr "prefixed" "yes")])
+
;; XXSPLTI32DX built-in function support.
(define_expand "xxsplti32dx_v4si"
[(set (match_operand:V4SI 0 "register_operand" "=wa")
^ permalink raw reply [flat|nested] 4+ messages in thread
* [gcc(refs/users/meissner/heads/work049)] Generate XXSPLTI32DX for some float constants.
@ 2021-04-22 1:28 Michael Meissner
0 siblings, 0 replies; 4+ messages in thread
From: Michael Meissner @ 2021-04-22 1:28 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:a04a707d8405d0c619aad077b20790d15ef85d61
commit a04a707d8405d0c619aad077b20790d15ef85d61
Author: Michael Meissner <meissner@linux.ibm.com>
Date: Wed Apr 21 21:28:04 2021 -0400
Generate XXSPLTI32DX for some float constants.
This patch generates XXSPLTI32DX for SF/DF floating point constants that
cannot be generated with the XXSPLTIDP instruction. In addition, it adds
support for using XXSPLTI32DX to load up V2DF constants, where both constants
are the same.
gcc/
2021-04-21 Michael Meissner <meissner@linux.ibm.com>
* config/rs6000/constraint.md (eD): New constraint.
* config/rs6000/predicates.md (easy_fp_constant): If the constant
can be loaded with XXSPLTI32DX, it is easy.
(xxsplti32dx_operand): New predicate.
(easy_vector_constant): If the constant can be loaded with
XXSPLTI32DX, it is easy.
* config/rs6000/rs6000-protos.h (xxsplti32dx_constant_p): New
declaration.
* config/rs6000/rs6000.c (rs6000_option_override_internal): Add
support for -mxxsplti32dx.
(const_vector_element_all_same): New helper function.
(xxspltidp_constant_p): Move CONST_VECTOR and VEC_DUPLICATE code
into const_vector_element_all_same and call it.
(xxsplti32dx_constant_float_p): New helper function.
(xxsplti32dx_constant_p): New function.
(output_vec_const_move): If the operand can be loaded with
XXSPLTI32DX, split it.
(rs6000_opt_masks): Add -mxxsplti32dx.
* config/rs6000/rs6000.md (movsf_hardfloat): Add support for
constants loaded with XXSPLTI32DX.
(mov<mode>_hardfloat32, FMOVE64 iterator): Add support for
constants loaded with XXSPLTI32DX.
(mov<mode>_hardfloat64, FMOVE64 iterator): Add support for
constants loaded with XXSPLTI32DX.
* config/rs6000/rs6000.opt (-mxxsplti32dx): New option.
* config/rs6000/vsx.md (UNSPEC_XXSPLTI32DX_CONST): New unspec.
(XXSPLTI32DX): New mode iterator.
(xxsplti32dx_<mode>): New insn and splitter for XXSPLTI32DX.
(xxsplti32dx_<mode>_first): New insn.
(xxsplti32dx_<mode>_second): New insn.
Diff:
---
gcc/config/rs6000/constraints.md | 6 ++
gcc/config/rs6000/predicates.md | 18 +++++
gcc/config/rs6000/rs6000-protos.h | 2 +
gcc/config/rs6000/rs6000.c | 166 ++++++++++++++++++++++++++++++++++----
gcc/config/rs6000/rs6000.md | 67 ++++++++++-----
gcc/config/rs6000/rs6000.opt | 4 +
gcc/config/rs6000/vsx.md | 72 +++++++++++++++++
7 files changed, 302 insertions(+), 33 deletions(-)
diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index e1fadd63580..d665e2a94db 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -208,6 +208,12 @@
(and (match_code "const_int")
(match_test "((- (unsigned HOST_WIDE_INT) ival) + 0x8000) < 0x10000")))
+;; SF/DF/V2DF/DI/V2DI scalar or vector constant that can be loaded with a pair
+;; of XXSPLTI32DX instructions.
+(define_constraint "eD"
+ "A vector constant that can be loaded with XXSPLTI32DX instructions."
+ (match_operand 0 "xxsplti32dx_operand"))
+
;; SF/DF/V2DF scalar or vector constant that can be loaded with XXSPLTIDP
(define_constraint "eF"
"A vector constant that can be loaded with the XXSPLTIDP instruction."
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 8c461ba2b76..fc30b69018d 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -606,6 +606,11 @@
if (xxspltidp_operand (op, mode))
return 1;
+ /* If we have the ISA 3.1 XXSPLTI32DX instruction, see if the constant can
+ be loaded with a pair of those instructions. */
+ if (xxsplti32dx_operand (op, mode))
+ return 1;
+
/* Otherwise consider floating point constants hard, so that the
constant gets pushed to memory during the early RTL phases. This
has the advantage that double precision constants that can be
@@ -684,6 +689,16 @@
return xxspltidp_constant_p (op, mode, &value);
})
+;; Return 1 if operand is a SF/DF CONST_DOUBLE or V2DF CONST_VECTOR that can be
+;; loaded via a pair f ISA 3.1 XXSPLTI32DX instructions. Do not return true if
+;; the value can be loaded with the XXSPLTIDP instruction or XXSPLTIB to load 0.
+(define_predicate "xxsplti32dx_operand"
+ (match_code "const_double,const_vector,vec_duplicate")
+{
+ HOST_WIDE_INT high = 0, low = 0;
+ return xxsplti32dx_constant_p (op, mode, &high, &low);
+})
+
;; Return 1 if the operand is a CONST_VECTOR and can be loaded into a
;; vector register without using memory.
(define_predicate "easy_vector_constant"
@@ -703,6 +718,9 @@
if (xxspltidp_operand (op, mode))
return true;
+ if (xxsplti32dx_operand (op, mode))
+ return true;
+
if (TARGET_P9_VECTOR
&& xxspltib_constant_p (op, mode, &num_insns, &value))
return true;
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
index e87a51f42de..27fa17aeed9 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -33,6 +33,8 @@ extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, int, int, int,
extern bool easy_altivec_constant (rtx, machine_mode);
extern bool xxspltib_constant_p (rtx, machine_mode, int *, int *);
extern bool xxspltidp_constant_p (rtx, machine_mode, HOST_WIDE_INT *);
+extern bool xxsplti32dx_constant_p (rtx, machine_mode, HOST_WIDE_INT *,
+ HOST_WIDE_INT *);
extern int vspltis_shifted (rtx);
extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int);
extern bool macho_lo_sum_memory_operand (rtx, machine_mode);
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index d32437474f7..967383dc2ad 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -4481,6 +4481,9 @@ rs6000_option_override_internal (bool global_init_p)
if (TARGET_POWER10 && TARGET_VSX)
{
+ if ((rs6000_isa_flags_explicit & OPTION_MASK_XXSPLTI32DX) == 0)
+ rs6000_isa_flags |= OPTION_MASK_XXSPLTI32DX;
+
if ((rs6000_isa_flags_explicit & OPTION_MASK_XXSPLTIW) == 0)
rs6000_isa_flags |= OPTION_MASK_XXSPLTIW;
@@ -4488,7 +4491,9 @@ rs6000_option_override_internal (bool global_init_p)
rs6000_isa_flags |= OPTION_MASK_XXSPLTIDP;
}
else
- rs6000_isa_flags &= ~(OPTION_MASK_XXSPLTIW | OPTION_MASK_XXSPLTIDP);
+ rs6000_isa_flags &= ~(OPTION_MASK_XXSPLTIW
+ | OPTION_MASK_XXSPLTIDP
+ | OPTION_MASK_XXSPLTI32DX);
if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
@@ -6480,6 +6485,37 @@ xxspltib_constant_p (rtx op,
return true;
}
+/* Return the element of a constant vector whose elements are all the same. In
+ addition if VEC_DUPLICATE is used, return the element being duplicated. If
+ neither is true, return NULL_RTX. */
+
+static rtx
+const_vector_element_all_same (rtx op)
+{
+ if (GET_CODE (op) == VEC_DUPLICATE)
+ {
+ rtx element = XEXP (op, 0);
+ return (CONST_INT_P (element) || CONST_DOUBLE_P (element)
+ ? element
+ : NULL_RTX);
+ }
+
+ else if (GET_CODE (op) == CONST_VECTOR)
+ {
+ machine_mode mode = GET_MODE (op);
+ size_t n_elts = GET_MODE_NUNITS (mode);
+ rtx element = CONST_VECTOR_ELT (op, 0);
+
+ for (size_t i = 1; i < n_elts; i++)
+ if (!rtx_equal_p (element, CONST_VECTOR_ELT (op, 1)))
+ return NULL_RTX;
+
+ return element;
+ }
+
+ return NULL_RTX;
+}
+
/* Return true if OP is of the given MODE and can be synthesized with ISA 3.1
XXSPLTIDP instruction.
@@ -6502,19 +6538,9 @@ xxspltidp_constant_p (rtx op,
rtx element = op;
if (mode == V2DFmode)
{
- /* Handle VEC_DUPLICATE and CONST_VECTOR. */
- if (GET_CODE (op) == VEC_DUPLICATE)
- element = XEXP (op, 0);
-
- else if (GET_CODE (op) == CONST_VECTOR)
- {
- element = CONST_VECTOR_ELT (op, 0);
- if (!rtx_equal_p (element, CONST_VECTOR_ELT (op, 1)))
- return false;
- }
-
- else
- return false;
+ element = const_vector_element_all_same (op);
+ if (!element)
+ return false;
mode = DFmode;
}
@@ -6549,6 +6575,114 @@ xxspltidp_constant_p (rtx op,
return true;
}
+/* Return true if OP is a floating point constant that can be loaded with the
+ XXSPLTI32DX instruction. If the constant can be loaded with the simpler
+ XXSPLTIDP instruction, return false.
+
+ Return the two 32-bit constants to use in the two XXSPLTI32DX instructions
+ via HIGH_PTR and LOW_PTR. */
+
+static bool
+xxsplti32dx_constant_float_p (rtx op,
+ machine_mode mode,
+ HOST_WIDE_INT *high_ptr,
+ HOST_WIDE_INT *low_ptr)
+{
+ HOST_WIDE_INT xxspltidp_value = 0;
+
+ if (!CONST_DOUBLE_P (op))
+ return false;
+
+ if (mode != SFmode && mode != DFmode)
+ return false;
+
+ if (xxspltidp_constant_p (op, mode, &xxspltidp_value))
+ return false;
+
+ long high_low[2];
+ const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (op);
+ REAL_VALUE_TO_TARGET_DOUBLE (*rv, high_low);
+
+ /* The double precision value is laid out in memory order. We need to undo
+ this for XXSPLTI32DX. */
+ if (!BYTES_BIG_ENDIAN)
+ std::swap (high_low[0], high_low[1]);
+
+ *high_ptr = high_low[0];
+ *low_ptr = high_low[1];
+ return true;
+}
+
+/* Return true if OP is of the given MODE and can be synthesized with ISA 3.1
+ XXSPLTI32DX instruction. If the instruction can be synthesized with
+ XXSPLTIDP or is 0/-1, return false.
+
+ We handle the following types of constants:
+
+ 1) vector double constants where each element is the same and you can't
+ load the constant with XXSPLTIDP;
+
+ 2) vector long long constants where each element is the same;
+
+ 3) Scalar floating point constants that can't be loaded with XXSPLTIDP.
+
+ Return the two 32-bit constants to use in the two XXSPLTI32DX instructions
+ via HIGH_PTR and LOW_PTR. */
+
+bool
+xxsplti32dx_constant_p (rtx op,
+ machine_mode mode,
+ HOST_WIDE_INT *high_ptr,
+ HOST_WIDE_INT *low_ptr)
+{
+ rtx element;
+ HOST_WIDE_INT value;
+
+ *high_ptr = *low_ptr = 0;
+
+ if (!TARGET_XXSPLTI32DX)
+ return false;
+
+ if (mode == VOIDmode)
+ mode = GET_MODE (op);
+
+ if (op == CONST0_RTX (mode))
+ return false;
+
+ switch (mode)
+ {
+ default:
+ break;
+
+ case E_V2DFmode:
+ element = const_vector_element_all_same (op);
+ if (!element)
+ return false;
+
+ return xxsplti32dx_constant_float_p (element, DFmode, high_ptr, low_ptr);
+
+ case E_SFmode:
+ case E_DFmode:
+ return xxsplti32dx_constant_float_p (op, mode, high_ptr, low_ptr);
+
+ case E_V2DImode:
+ element = const_vector_element_all_same (op);
+ if (!element)
+ return false;
+
+ value = INTVAL (element);
+ /* If we can generate XXSPLTIB and VEXTSB2D, don't return true. */
+ if (IN_RANGE (value, -128, 127))
+ return false;
+
+ *high_ptr = value >> 32;
+ *low_ptr = value & 0xffffffff;
+ return true;
+ }
+
+ return false;
+}
+
const char *
output_vec_const_move (rtx *operands)
{
@@ -6597,6 +6731,9 @@ output_vec_const_move (rtx *operands)
|| xxspltidp_operand (vec, mode))
return "#";
+ if (xxsplti32dx_operand (vec, mode))
+ return "#";
+
if (TARGET_P9_VECTOR
&& xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
{
@@ -24124,6 +24261,7 @@ static struct rs6000_opt_mask const rs6000_opt_masks[] =
{ "string", 0, false, true },
{ "update", OPTION_MASK_NO_UPDATE, true , true },
{ "vsx", OPTION_MASK_VSX, false, true },
+ { "xxsplti32dx", OPTION_MASK_XXSPLTI32DX, false, true },
{ "xxspltiw", OPTION_MASK_XXSPLTIW, false, true },
{ "xxspltidp", OPTION_MASK_XXSPLTIDP, false, true },
#ifdef OPTION_MASK_64BIT
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 3d4dc820bdd..e1b27c9586c 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -7612,17 +7612,17 @@
;;
;; LWZ LFS LXSSP LXSSPX STFS STXSSP
;; STXSSPX STW XXLXOR LI FMR XSCPSGNDP
-;; MR MT<x> MF<x> NOP XXSPLTIDP
+;; MR MT<x> MF<x> NOP XXSPLTIDP XXSPLTI32DX
(define_insn "movsf_hardfloat"
[(set (match_operand:SF 0 "nonimmediate_operand"
"=!r, f, v, wa, m, wY,
Z, m, wa, !r, f, wa,
- !r, *c*l, !r, *h, wa")
+ !r, *c*l, !r, *h, wa, wa")
(match_operand:SF 1 "input_operand"
"m, m, wY, Z, f, v,
wa, r, j, j, f, wa,
- r, r, *h, 0, eF"))]
+ r, r, *h, 0, eF, eD"))]
"(register_operand (operands[0], SFmode)
|| register_operand (operands[1], SFmode))
&& TARGET_HARD_FLOAT
@@ -7645,19 +7645,28 @@
mt%0 %1
mf%1 %0
nop
+ #
#"
[(set_attr "type"
"load, fpload, fpload, fpload, fpstore, fpstore,
fpstore, store, veclogical, integer, fpsimple, fpsimple,
- *, mtjmpr, mfjmpr, *, vecperm")
+ *, mtjmpr, mfjmpr, *, vecperm, vecperm")
(set_attr "isa"
"*, *, p9v, p8v, *, p9v,
p8v, *, *, *, *, *,
- *, *, *, *, p10")
+ *, *, *, *, p10, p10")
(set_attr "prefixed"
"*, *, *, *, *, *,
*, *, *, *, *, *,
- *, *, *, *, yes")])
+ *, *, *, *, yes, yes")
+ (set_attr "max_prefixed_insns"
+ "*, *, *, *, *, *,
+ *, *, *, *, *, *,
+ *, *, *, *, *, 2")
+ (set_attr "num_insns"
+ "*, *, *, *, *, *,
+ *, *, *, *, *, *,
+ *, *, *, *, *, 2")])
;; LWZ LFIWZX STW STFIWX MTVSRWZ MFVSRWZ
;; FMR MR MT%0 MF%1 NOP
@@ -7917,18 +7926,18 @@
;; STFD LFD FMR LXSD STXSD
;; LXSD STXSD XXLOR XXLXOR GPR<-0
-;; LWZ STW MR XXSPLTIDP
+;; LWZ STW MR XXSPLTIDP XXSPLTI32DX
(define_insn "*mov<mode>_hardfloat32"
[(set (match_operand:FMOVE64 0 "nonimmediate_operand"
"=m, d, d, <f64_p9>, wY,
<f64_av>, Z, <f64_vsx>, <f64_vsx>, !r,
- Y, r, !r, wa")
+ Y, r, !r, wa, wa")
(match_operand:FMOVE64 1 "input_operand"
"d, m, d, wY, <f64_p9>,
Z, <f64_av>, <f64_vsx>, <zero_fp>, <zero_fp>,
- r, Y, r, eF"))]
+ r, Y, r, eF, eD"))]
"! TARGET_POWERPC64 && TARGET_HARD_FLOAT
&& (gpc_reg_operand (operands[0], <MODE>mode)
|| gpc_reg_operand (operands[1], <MODE>mode))"
@@ -7946,24 +7955,33 @@
#
#
#
+ #
#"
[(set_attr "type"
"fpstore, fpload, fpsimple, fpload, fpstore,
fpload, fpstore, veclogical, veclogical, two,
- store, load, two, vecperm")
+ store, load, two, vecperm, vecperm")
(set_attr "size" "64")
(set_attr "length"
"*, *, *, *, *,
*, *, *, *, 8,
- 8, 8, 8, *")
+ 8, 8, 8, *, *")
(set_attr "isa"
"*, *, *, p9v, p9v,
p7v, p7v, *, *, *,
- *, *, *, p10")
+ *, *, *, p10, p10")
(set_attr "prefixed"
"*, *, *, *, *,
*, *, *, *, *,
- *, *, *, yes")])
+ *, *, *, yes, yes")
+ (set_attr "max_prefixed_insns"
+ "*, *, *, *, *,
+ *, *, *, *, *,
+ *, *, *, *, 2")
+ (set_attr "num_insns"
+ "*, *, *, *, *,
+ *, *, *, *, *,
+ *, *, *, *, 2")])
;; STW LWZ MR G-const H-const F-const
@@ -7990,19 +8008,19 @@
;; STFD LFD FMR LXSD STXSD
;; LXSDX STXSDX XXLOR XXLXOR LI 0
;; STD LD MR MT{CTR,LR} MF{CTR,LR}
-;; NOP MFVSRD MTVSRD XXSPLTIDP
+;; NOP MFVSRD MTVSRD XXSPLTIDP XXSPLTI32DX
(define_insn "*mov<mode>_hardfloat64"
[(set (match_operand:FMOVE64 0 "nonimmediate_operand"
"=m, d, d, <f64_p9>, wY,
<f64_av>, Z, <f64_vsx>, <f64_vsx>, !r,
YZ, r, !r, *c*l, !r,
- *h, r, <f64_dm>, wa")
+ *h, r, <f64_dm>, wa, wa")
(match_operand:FMOVE64 1 "input_operand"
"d, m, d, wY, <f64_p9>,
Z, <f64_av>, <f64_vsx>, <zero_fp>, <zero_fp>,
r, YZ, r, r, *h,
- 0, <f64_dm>, r, eF"))]
+ 0, <f64_dm>, r, eF, eD"))]
"TARGET_POWERPC64 && TARGET_HARD_FLOAT
&& (gpc_reg_operand (operands[0], <MODE>mode)
|| gpc_reg_operand (operands[1], <MODE>mode))"
@@ -8025,23 +8043,34 @@
nop
mfvsrd %0,%x1
mtvsrd %x0,%1
+ #
#"
[(set_attr "type"
"fpstore, fpload, fpsimple, fpload, fpstore,
fpload, fpstore, veclogical, veclogical, integer,
store, load, *, mtjmpr, mfjmpr,
- *, mfvsr, mtvsr, vecperm")
+ *, mfvsr, mtvsr, vecperm, vecperm")
(set_attr "size" "64")
(set_attr "isa"
"*, *, *, p9v, p9v,
p7v, p7v, *, *, *,
*, *, *, *, *,
- *, p8v, p8v, p10")
+ *, p8v, p8v, p10, p10")
(set_attr "prefixed"
"*, *, *, *, *,
*, *, *, *, *,
*, *, *, *, *,
- *, *, *, yes")])
+ *, *, *, yes, yes")
+ (set_attr "max_prefixed_insns"
+ "*, *, *, *, *,
+ *, *, *, *, *,
+ *, *, *, *, *,
+ *, *, *, *, 2")
+ (set_attr "num_insns"
+ "*, *, *, *, *,
+ *, *, *, *, *,
+ *, *, *, *, *,
+ *, *, *, *, *")])
;; STD LD MR MT<SPR> MF<SPR> G-const
;; H-const F-const Special
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index 6620cdb7716..bd269369ca0 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -627,3 +627,7 @@ Generate (do not generate) the XXSPLTIW instruction.
mxxspltidp
Target Undocumented Mask(XXSPLTIDP) Var(rs6000_isa_flags)
Generate (do not generate) the XXSPLTIDP instruction.
+
+mxxsplti32dx
+Target Undocumented Mask(XXSPLTI32DX) Var(rs6000_isa_flags)
+Generate (do not generate) the XXSPLTI32DX instruction.
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 44cd380bfa7..0efe77489b6 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -386,6 +386,7 @@
UNSPEC_VDIVEU
UNSPEC_XXSPLTIDP
UNSPEC_XXSPLTI32DX
+ UNSPEC_XXSPLTI32DX_CONST
UNSPEC_XXPERMX
UNSPEC_XXEVAL
UNSPEC_XXBLEND
@@ -6346,6 +6347,77 @@
DONE;
})
+;; XXSPLTI32DX used to create 64-bit constants or vector constants where the
+;; even elements match and the odd elements match.
+(define_mode_iterator XXSPLTI32DX [SF DF V2DF V2DI])
+
+(define_insn_and_split "*xxsplti32dx_<mode>"
+ [(set (match_operand:XXSPLTI32DX 0 "vsx_register_operand" "=wa")
+ (match_operand:XXSPLTI32DX 1 "xxsplti32dx_operand"))]
+ "TARGET_XXSPLTI32DX"
+ "#"
+ "&& 1"
+ [(set (match_dup 0)
+ (unspec:XXSPLTI32DX [(match_dup 2)
+ (match_dup 3)] UNSPEC_XXSPLTI32DX_CONST))
+ (set (match_dup 0)
+ (unspec:XXSPLTI32DX [(match_dup 0)
+ (match_dup 4)
+ (match_dup 5)] UNSPEC_XXSPLTI32DX_CONST))]
+{
+ HOST_WIDE_INT high = 0, low = 0;
+
+ if (!xxsplti32dx_constant_p (operands[1], <MODE>mode, &high, &low))
+ gcc_unreachable ();
+
+ /* If the low bits are 0 or all 1s, initialize that word first. This way we
+ can use a smaller XXSPLTIB instruction instead the first XXSPLTI32DX. */
+ if (low == 0 || low == -1)
+ {
+ operands[2] = const1_rtx;
+ operands[3] = GEN_INT (low);
+ operands[4] = const0_rtx;
+ operands[5] = GEN_INT (high);
+ }
+ else
+ {
+ operands[2] = const0_rtx;
+ operands[3] = GEN_INT (high);
+ operands[4] = const1_rtx;
+ operands[5] = GEN_INT (low);
+ }
+}
+ [(set_attr "type" "vecperm")
+ (set_attr "prefixed" "yes")
+ (set_attr "num_insns" "2")
+ (set_attr "max_prefixed_insns" "2")])
+
+;; First word of XXSPLTI32DX
+(define_insn "*xxsplti32dx_<mode>_first"
+ [(set (match_operand:XXSPLTI32DX 0 "vsx_register_operand" "=wa,wa,wa")
+ (unspec:XXSPLTI32DX [(match_operand 1 "u1bit_cint_operand" "n,n,n")
+ (match_operand 2 "const_int_operand" "O,wM,n")]
+ UNSPEC_XXSPLTI32DX_CONST))]
+ "TARGET_XXSPLTI32DX"
+ "@
+ xxspltib %x0,0
+ xxspltib %x0,255
+ xxsplti32dx %x0,%1,%2"
+ [(set_attr "type" "vecperm")
+ (set_attr "prefixed" "*,*,yes")])
+
+;; Second word of XXSPLTI32DX
+(define_insn "*xxsplti32dx_<mode>_second"
+ [(set (match_operand:XXSPLTI32DX 0 "vsx_register_operand" "=wa")
+ (unspec:XXSPLTI32DX [(match_operand:XXSPLTI32DX 1 "vsx_register_operand" "0")
+ (match_operand 2 "u1bit_cint_operand" "n")
+ (match_operand 3 "const_int_operand" "n")]
+ UNSPEC_XXSPLTI32DX_CONST))]
+ "TARGET_XXSPLTI32DX"
+ "xxsplti32dx %x0,%2,%3"
+ [(set_attr "type" "vecperm")
+ (set_attr "prefixed" "yes")])
+
;; XXSPLTI32DX built-in function support.
(define_expand "xxsplti32dx_v4si"
[(set (match_operand:V4SI 0 "register_operand" "=wa")
^ permalink raw reply [flat|nested] 4+ messages in thread
* [gcc(refs/users/meissner/heads/work049)] Generate XXSPLTI32DX for some float constants.
@ 2021-04-21 23:07 Michael Meissner
0 siblings, 0 replies; 4+ messages in thread
From: Michael Meissner @ 2021-04-21 23:07 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:19e3fe1306d83fc988008b95ca77361960baee93
commit 19e3fe1306d83fc988008b95ca77361960baee93
Author: Michael Meissner <meissner@linux.ibm.com>
Date: Wed Apr 21 19:06:54 2021 -0400
Generate XXSPLTI32DX for some float constants.
This patch generates XXSPLTI32DX for SF/DF floating point constants that
cannot be generated with the XXSPLTIDP instruction. In addition, it adds
support for using XXSPLTI32DX to load up V2DF constants, where both constants
are the same.
gcc/
2021-04-21 Michael Meissner <meissner@linux.ibm.com>
* config/rs6000/constraint.md (eD): New constraint.
* config/rs6000/predicates.md (easy_fp_constant): If the constant
can be loaded with XXSPLTI32DX, it is easy.
(xxsplti32dx_operand): New predicate.
(easy_vector_constant): If the constant can be loaded with
XXSPLTI32DX, it is easy.
* config/rs6000/rs6000-protos.h (xxsplti32dx_constant_p): New
declaration.
* config/rs6000/rs6000.c (rs6000_option_override_internal): Add
support for -mxxsplti32dx.
(+xxsplti32dx_constant_p): New function.
(output_vec_const_move): If the operand can be loaded with
XXSPLTI32DX, split it.
(rs6000_opt_masks): Add -mxxsplti32dx.
* config/rs6000/rs6000.md (movsf_hardfloat): Add support for
constants loaded with XXSPLTI32DX.
(mov<mode>_hardfloat32, FMOVE64 iterator): Add support for
constants loaded with XXSPLTI32DX.
(mov<mode>_hardfloat64, FMOVE64 iterator): Add support for
constants loaded with XXSPLTI32DX.
* config/rs6000/rs6000.opt (-mxxsplti32dx): New option.
* config/rs6000/vsx.md (UNSPEC_XXSPLTI32DX_CONST): New unspec.
(XXSPLTI32DX): New mode iterator.
(xxsplti32dx_<mode>): New insn and splitter for XXSPLTI32DX.
(xxsplti32dx_<mode>_first): New insn.
(xxsplti32dx_<mode>_second): New insn.
Diff:
---
gcc/config/rs6000/constraints.md | 6 +++
gcc/config/rs6000/predicates.md | 22 +++++++++++
gcc/config/rs6000/rs6000-protos.h | 2 +
gcc/config/rs6000/rs6000.c | 80 ++++++++++++++++++++++++++++++++++++++-
gcc/config/rs6000/rs6000.md | 67 ++++++++++++++++++++++----------
gcc/config/rs6000/rs6000.opt | 4 ++
gcc/config/rs6000/vsx.md | 75 ++++++++++++++++++++++++++++++++++++
7 files changed, 236 insertions(+), 20 deletions(-)
diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index e1fadd63580..d665e2a94db 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -208,6 +208,12 @@
(and (match_code "const_int")
(match_test "((- (unsigned HOST_WIDE_INT) ival) + 0x8000) < 0x10000")))
+;; SF/DF/V2DF/DI/V2DI scalar or vector constant that can be loaded with a pair
+;; of XXSPLTI32DX instructions.
+(define_constraint "eD"
+ "A vector constant that can be loaded with XXSPLTI32DX instructions."
+ (match_operand 0 "xxsplti32dx_operand"))
+
;; SF/DF/V2DF scalar or vector constant that can be loaded with XXSPLTIDP
(define_constraint "eF"
"A vector constant that can be loaded with the XXSPLTIDP instruction."
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 8c461ba2b76..01c0fd62c0a 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -606,6 +606,11 @@
if (xxspltidp_operand (op, mode))
return 1;
+ /* If we have the ISA 3.1 XXSPLTI32DX instruction, see if the constant can
+ be loaded with a pair of those instructions. */
+ if (xxsplti32dx_operand (op, mode))
+ return 1;
+
/* Otherwise consider floating point constants hard, so that the
constant gets pushed to memory during the early RTL phases. This
has the advantage that double precision constants that can be
@@ -684,6 +689,20 @@
return xxspltidp_constant_p (op, mode, &value);
})
+;; Return 1 if operand is a SF/DF CONST_DOUBLE or V2DF CONST_VECTOR that can be
+;; loaded via a pair f ISA 3.1 XXSPLTI32DX instructions. Do not return true if
+;; the value is 0.0 or it can be loaded with XXSPLTIDP, since that is easy to
+;; generate without using XXSPLTI32DX.
+(define_predicate "xxsplti32dx_operand"
+ (match_code "const_double,const_vector,vec_duplicate")
+{
+ if (op == CONST0_RTX (mode))
+ return false;
+
+ HOST_WIDE_INT high = 0, low = 0;
+ return xxsplti32dx_constant_p (op, mode, &high, &low);
+})
+
;; Return 1 if the operand is a CONST_VECTOR and can be loaded into a
;; vector register without using memory.
(define_predicate "easy_vector_constant"
@@ -703,6 +722,9 @@
if (xxspltidp_operand (op, mode))
return true;
+ if (xxsplti32dx_operand (op, mode))
+ return true;
+
if (TARGET_P9_VECTOR
&& xxspltib_constant_p (op, mode, &num_insns, &value))
return true;
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
index e87a51f42de..27fa17aeed9 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -33,6 +33,8 @@ extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, int, int, int,
extern bool easy_altivec_constant (rtx, machine_mode);
extern bool xxspltib_constant_p (rtx, machine_mode, int *, int *);
extern bool xxspltidp_constant_p (rtx, machine_mode, HOST_WIDE_INT *);
+extern bool xxsplti32dx_constant_p (rtx, machine_mode, HOST_WIDE_INT *,
+ HOST_WIDE_INT *);
extern int vspltis_shifted (rtx);
extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int);
extern bool macho_lo_sum_memory_operand (rtx, machine_mode);
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index d32437474f7..79ede7413a9 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -4481,6 +4481,9 @@ rs6000_option_override_internal (bool global_init_p)
if (TARGET_POWER10 && TARGET_VSX)
{
+ if ((rs6000_isa_flags_explicit & OPTION_MASK_XXSPLTI32DX) == 0)
+ rs6000_isa_flags |= OPTION_MASK_XXSPLTI32DX;
+
if ((rs6000_isa_flags_explicit & OPTION_MASK_XXSPLTIW) == 0)
rs6000_isa_flags |= OPTION_MASK_XXSPLTIW;
@@ -4488,7 +4491,9 @@ rs6000_option_override_internal (bool global_init_p)
rs6000_isa_flags |= OPTION_MASK_XXSPLTIDP;
}
else
- rs6000_isa_flags &= ~(OPTION_MASK_XXSPLTIW | OPTION_MASK_XXSPLTIDP);
+ rs6000_isa_flags &= ~(OPTION_MASK_XXSPLTIW
+ | OPTION_MASK_XXSPLTIDP
+ | OPTION_MASK_XXSPLTI32DX);
if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
@@ -6549,6 +6554,75 @@ xxspltidp_constant_p (rtx op,
return true;
}
+/* Return true if OP is of the given MODE and can be synthesized with ISA 3.1
+ XXSPLTI32DX instruction. If the instruction can be synthesized with
+ XXSPLTIDP or is 0/-1, return false.
+
+ Return the two 32-bit constants to use in the two XXSPLTI32DX instructions
+ via HIGH_PTR and LOW_PTR. */
+
+bool
+xxsplti32dx_constant_p (rtx op,
+ machine_mode mode,
+ HOST_WIDE_INT *high_ptr,
+ HOST_WIDE_INT *low_ptr)
+{
+ *high_ptr = *low_ptr = 0;
+
+ if (!TARGET_XXSPLTI32DX)
+ return false;
+
+ if (mode == VOIDmode)
+ mode = GET_MODE (op);
+
+ if (op == CONST0_RTX (mode))
+ return false;
+
+ rtx element = op;
+ machine_mode element_mode = mode;
+ if (mode == V2DFmode)
+ {
+ /* Handle VEC_DUPLICATE and CONST_VECTOR. */
+ if (GET_CODE (op) == VEC_DUPLICATE)
+ element = XEXP (op, 0);
+
+ else if (GET_CODE (op) == CONST_VECTOR)
+ {
+ element = CONST_VECTOR_ELT (op, 0);
+ if (!rtx_equal_p (element, CONST_VECTOR_ELT (op, 1)))
+ return false;
+ }
+
+ else
+ return false;
+
+ element_mode = GET_MODE_INNER (mode);
+ }
+
+ /* Handle floating point constants. */
+ if (element_mode == SFmode || element_mode == DFmode)
+ {
+ HOST_WIDE_INT xxspltidp_value = 0;
+
+ if (!CONST_DOUBLE_P (element))
+ return false;
+
+ if (xxspltidp_constant_p (element, mode, &xxspltidp_value))
+ return false;
+
+ long high_low[2];
+ const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (element);
+ REAL_VALUE_TO_TARGET_DOUBLE (*rv, high_low);
+
+ *high_ptr = high_low[0];
+ *low_ptr = high_low[1];
+ return true;
+ }
+
+ else
+ return false;
+}
+
const char *
output_vec_const_move (rtx *operands)
{
@@ -6597,6 +6671,9 @@ output_vec_const_move (rtx *operands)
|| xxspltidp_operand (vec, mode))
return "#";
+ if (xxsplti32dx_operand (vec, mode))
+ return "#";
+
if (TARGET_P9_VECTOR
&& xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
{
@@ -24124,6 +24201,7 @@ static struct rs6000_opt_mask const rs6000_opt_masks[] =
{ "string", 0, false, true },
{ "update", OPTION_MASK_NO_UPDATE, true , true },
{ "vsx", OPTION_MASK_VSX, false, true },
+ { "xxsplti32dx", OPTION_MASK_XXSPLTI32DX, false, true },
{ "xxspltiw", OPTION_MASK_XXSPLTIW, false, true },
{ "xxspltidp", OPTION_MASK_XXSPLTIDP, false, true },
#ifdef OPTION_MASK_64BIT
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 3d4dc820bdd..e1b27c9586c 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -7612,17 +7612,17 @@
;;
;; LWZ LFS LXSSP LXSSPX STFS STXSSP
;; STXSSPX STW XXLXOR LI FMR XSCPSGNDP
-;; MR MT<x> MF<x> NOP XXSPLTIDP
+;; MR MT<x> MF<x> NOP XXSPLTIDP XXSPLTI32DX
(define_insn "movsf_hardfloat"
[(set (match_operand:SF 0 "nonimmediate_operand"
"=!r, f, v, wa, m, wY,
Z, m, wa, !r, f, wa,
- !r, *c*l, !r, *h, wa")
+ !r, *c*l, !r, *h, wa, wa")
(match_operand:SF 1 "input_operand"
"m, m, wY, Z, f, v,
wa, r, j, j, f, wa,
- r, r, *h, 0, eF"))]
+ r, r, *h, 0, eF, eD"))]
"(register_operand (operands[0], SFmode)
|| register_operand (operands[1], SFmode))
&& TARGET_HARD_FLOAT
@@ -7645,19 +7645,28 @@
mt%0 %1
mf%1 %0
nop
+ #
#"
[(set_attr "type"
"load, fpload, fpload, fpload, fpstore, fpstore,
fpstore, store, veclogical, integer, fpsimple, fpsimple,
- *, mtjmpr, mfjmpr, *, vecperm")
+ *, mtjmpr, mfjmpr, *, vecperm, vecperm")
(set_attr "isa"
"*, *, p9v, p8v, *, p9v,
p8v, *, *, *, *, *,
- *, *, *, *, p10")
+ *, *, *, *, p10, p10")
(set_attr "prefixed"
"*, *, *, *, *, *,
*, *, *, *, *, *,
- *, *, *, *, yes")])
+ *, *, *, *, yes, yes")
+ (set_attr "max_prefixed_insns"
+ "*, *, *, *, *, *,
+ *, *, *, *, *, *,
+ *, *, *, *, *, 2")
+ (set_attr "num_insns"
+ "*, *, *, *, *, *,
+ *, *, *, *, *, *,
+ *, *, *, *, *, 2")])
;; LWZ LFIWZX STW STFIWX MTVSRWZ MFVSRWZ
;; FMR MR MT%0 MF%1 NOP
@@ -7917,18 +7926,18 @@
;; STFD LFD FMR LXSD STXSD
;; LXSD STXSD XXLOR XXLXOR GPR<-0
-;; LWZ STW MR XXSPLTIDP
+;; LWZ STW MR XXSPLTIDP XXSPLTI32DX
(define_insn "*mov<mode>_hardfloat32"
[(set (match_operand:FMOVE64 0 "nonimmediate_operand"
"=m, d, d, <f64_p9>, wY,
<f64_av>, Z, <f64_vsx>, <f64_vsx>, !r,
- Y, r, !r, wa")
+ Y, r, !r, wa, wa")
(match_operand:FMOVE64 1 "input_operand"
"d, m, d, wY, <f64_p9>,
Z, <f64_av>, <f64_vsx>, <zero_fp>, <zero_fp>,
- r, Y, r, eF"))]
+ r, Y, r, eF, eD"))]
"! TARGET_POWERPC64 && TARGET_HARD_FLOAT
&& (gpc_reg_operand (operands[0], <MODE>mode)
|| gpc_reg_operand (operands[1], <MODE>mode))"
@@ -7946,24 +7955,33 @@
#
#
#
+ #
#"
[(set_attr "type"
"fpstore, fpload, fpsimple, fpload, fpstore,
fpload, fpstore, veclogical, veclogical, two,
- store, load, two, vecperm")
+ store, load, two, vecperm, vecperm")
(set_attr "size" "64")
(set_attr "length"
"*, *, *, *, *,
*, *, *, *, 8,
- 8, 8, 8, *")
+ 8, 8, 8, *, *")
(set_attr "isa"
"*, *, *, p9v, p9v,
p7v, p7v, *, *, *,
- *, *, *, p10")
+ *, *, *, p10, p10")
(set_attr "prefixed"
"*, *, *, *, *,
*, *, *, *, *,
- *, *, *, yes")])
+ *, *, *, yes, yes")
+ (set_attr "max_prefixed_insns"
+ "*, *, *, *, *,
+ *, *, *, *, *,
+ *, *, *, *, 2")
+ (set_attr "num_insns"
+ "*, *, *, *, *,
+ *, *, *, *, *,
+ *, *, *, *, 2")])
;; STW LWZ MR G-const H-const F-const
@@ -7990,19 +8008,19 @@
;; STFD LFD FMR LXSD STXSD
;; LXSDX STXSDX XXLOR XXLXOR LI 0
;; STD LD MR MT{CTR,LR} MF{CTR,LR}
-;; NOP MFVSRD MTVSRD XXSPLTIDP
+;; NOP MFVSRD MTVSRD XXSPLTIDP XXSPLTI32DX
(define_insn "*mov<mode>_hardfloat64"
[(set (match_operand:FMOVE64 0 "nonimmediate_operand"
"=m, d, d, <f64_p9>, wY,
<f64_av>, Z, <f64_vsx>, <f64_vsx>, !r,
YZ, r, !r, *c*l, !r,
- *h, r, <f64_dm>, wa")
+ *h, r, <f64_dm>, wa, wa")
(match_operand:FMOVE64 1 "input_operand"
"d, m, d, wY, <f64_p9>,
Z, <f64_av>, <f64_vsx>, <zero_fp>, <zero_fp>,
r, YZ, r, r, *h,
- 0, <f64_dm>, r, eF"))]
+ 0, <f64_dm>, r, eF, eD"))]
"TARGET_POWERPC64 && TARGET_HARD_FLOAT
&& (gpc_reg_operand (operands[0], <MODE>mode)
|| gpc_reg_operand (operands[1], <MODE>mode))"
@@ -8025,23 +8043,34 @@
nop
mfvsrd %0,%x1
mtvsrd %x0,%1
+ #
#"
[(set_attr "type"
"fpstore, fpload, fpsimple, fpload, fpstore,
fpload, fpstore, veclogical, veclogical, integer,
store, load, *, mtjmpr, mfjmpr,
- *, mfvsr, mtvsr, vecperm")
+ *, mfvsr, mtvsr, vecperm, vecperm")
(set_attr "size" "64")
(set_attr "isa"
"*, *, *, p9v, p9v,
p7v, p7v, *, *, *,
*, *, *, *, *,
- *, p8v, p8v, p10")
+ *, p8v, p8v, p10, p10")
(set_attr "prefixed"
"*, *, *, *, *,
*, *, *, *, *,
*, *, *, *, *,
- *, *, *, yes")])
+ *, *, *, yes, yes")
+ (set_attr "max_prefixed_insns"
+ "*, *, *, *, *,
+ *, *, *, *, *,
+ *, *, *, *, *,
+ *, *, *, *, 2")
+ (set_attr "num_insns"
+ "*, *, *, *, *,
+ *, *, *, *, *,
+ *, *, *, *, *,
+ *, *, *, *, *")])
;; STD LD MR MT<SPR> MF<SPR> G-const
;; H-const F-const Special
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index 6620cdb7716..bd269369ca0 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -627,3 +627,7 @@ Generate (do not generate) the XXSPLTIW instruction.
mxxspltidp
Target Undocumented Mask(XXSPLTIDP) Var(rs6000_isa_flags)
Generate (do not generate) the XXSPLTIDP instruction.
+
+mxxsplti32dx
+Target Undocumented Mask(XXSPLTI32DX) Var(rs6000_isa_flags)
+Generate (do not generate) the XXSPLTI32DX instruction.
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 44cd380bfa7..22b12fd238e 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -386,6 +386,7 @@
UNSPEC_VDIVEU
UNSPEC_XXSPLTIDP
UNSPEC_XXSPLTI32DX
+ UNSPEC_XXSPLTI32DX_CONST
UNSPEC_XXPERMX
UNSPEC_XXEVAL
UNSPEC_XXBLEND
@@ -6346,6 +6347,80 @@
DONE;
})
+;; XXSPLTI32DX used to create 64-bit constants or vector constants where the
+;; even elements match and the odd elements match.
+(define_mode_iterator XXSPLTI32DX [SF DF V2DF])
+
+(define_insn_and_split "*xxsplti32dx_<mode>"
+ [(set (match_operand:XXSPLTI32DX 0 "vsx_register_operand" "=wa")
+ (match_operand:XXSPLTI32DX 1 "xxsplti32dx_operand"))]
+ "TARGET_XXSPLTI32DX"
+ "#"
+ "&& 1"
+ [(set (match_dup 0)
+ (unspec:XXSPLTI32DX [(match_dup 2)
+ (match_dup 3)] UNSPEC_XXSPLTI32DX_CONST))
+ (set (match_dup 0)
+ (unspec:XXSPLTI32DX [(match_dup 0)
+ (match_dup 4)
+ (match_dup 5)] UNSPEC_XXSPLTI32DX_CONST))]
+{
+ HOST_WIDE_INT high = 0, low = 0;
+
+ if (!xxsplti32dx_constant_p (operands[1], <MODE>mode, &high, &low))
+ gcc_unreachable ();
+
+ if (!BYTES_BIG_ENDIAN)
+ std::swap (high, low);
+
+ /* If the low bits are 0 or all 1s, initialize that word first. This way we
+ can use a smaller XXSPLTIB instruction instead the first XXSPLTI32DX. */
+ if (low == 0 || low == -1)
+ {
+ operands[2] = const1_rtx;
+ operands[3] = GEN_INT (low);
+ operands[4] = const0_rtx;
+ operands[5] = GEN_INT (high);
+ }
+ else
+ {
+ operands[2] = const0_rtx;
+ operands[3] = GEN_INT (high);
+ operands[4] = const1_rtx;
+ operands[5] = GEN_INT (low);
+ }
+}
+ [(set_attr "type" "vecperm")
+ (set_attr "prefixed" "yes")
+ (set_attr "num_insns" "2")
+ (set_attr "max_prefixed_insns" "2")])
+
+;; First word of XXSPLTI32DX
+(define_insn "*xxsplti32dx_<mode>_first"
+ [(set (match_operand:XXSPLTI32DX 0 "vsx_register_operand" "=wa,wa,wa")
+ (unspec:XXSPLTI32DX [(match_operand 1 "u1bit_cint_operand" "n,n,n")
+ (match_operand 2 "const_int_operand" "O,wM,n")]
+ UNSPEC_XXSPLTI32DX_CONST))]
+ "TARGET_XXSPLTI32DX"
+ "@
+ xxspltib %x0,0
+ xxspltib %x0,255
+ xxsplti32dx %x0,%1,%2"
+ [(set_attr "type" "vecperm")
+ (set_attr "prefixed" "*,*,yes")])
+
+;; Second word of XXSPLTI32DX
+(define_insn "*xxsplti32dx_<mode>_second"
+ [(set (match_operand:XXSPLTI32DX 0 "vsx_register_operand" "=wa")
+ (unspec:XXSPLTI32DX [(match_operand:XXSPLTI32DX 1 "vsx_register_operand" "0")
+ (match_operand 2 "u1bit_cint_operand" "n")
+ (match_operand 3 "const_int_operand" "n")]
+ UNSPEC_XXSPLTI32DX_CONST))]
+ "TARGET_XXSPLTI32DX"
+ "xxsplti32dx %x0,%2,%3"
+ [(set_attr "type" "vecperm")
+ (set_attr "prefixed" "yes")])
+
;; XXSPLTI32DX built-in function support.
(define_expand "xxsplti32dx_v4si"
[(set (match_operand:V4SI 0 "register_operand" "=wa")
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2021-04-22 3:05 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-04-21 22:59 [gcc(refs/users/meissner/heads/work049)] Generate XXSPLTI32DX for some float constants Michael Meissner
2021-04-21 23:07 Michael Meissner
2021-04-22 1:28 Michael Meissner
2021-04-22 3:05 Michael Meissner
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).