From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1005) id D52E43858C3B; Wed, 15 Sep 2021 06:40:29 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org D52E43858C3B Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: Michael Meissner To: gcc-cvs@gcc.gnu.org Subject: [gcc(refs/users/meissner/heads/work068)] Generate XXSPLTIDP on power10. X-Act-Checkin: gcc X-Git-Author: Michael Meissner X-Git-Refname: refs/users/meissner/heads/work068 X-Git-Oldrev: dbb87a42b0c4f67782e8dd30dc68ed669bf2d369 X-Git-Newrev: 422e4d158895875d2049f11262643dec9ce292c6 Message-Id: <20210915064029.D52E43858C3B@sourceware.org> Date: Wed, 15 Sep 2021 06:40:29 +0000 (GMT) X-BeenThere: gcc-cvs@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-cvs mailing list List-Unsubscribe: , List-Archive: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 15 Sep 2021 06:40:29 -0000 https://gcc.gnu.org/g:422e4d158895875d2049f11262643dec9ce292c6 commit 422e4d158895875d2049f11262643dec9ce292c6 Author: Michael Meissner Date: Wed Sep 15 02:40:08 2021 -0400 Generate XXSPLTIDP on power10. This patch implements XXSPLTIDP support for SF, DF, and DI scalar constants and V2DF and V2DI vector constants. The XXSPLTIDP instruction is given a 32-bit immediate that is converted to a vector of two DFmode constants. The immediate is in SFmode format, so only constants that fit as SFmode values can be loaded with XXSPLTIDP. I added a new constraint (eF) to match constants that can be loaded with the XXSPLTIDP instruction. I have added a temporary switch (-mxxspltidp) to control whether or not the XXSPLTIDP instruction is generated. I added 5 new tests to test loading up SF/DF/DI scalar and V2DI/V2DF vector constants. 2021-09-15 Michael Meissner gcc/ * config/rs6000/constraints.md (eF): New constraint. * config/rs6000/predicates.md (easy_fp_constant): If we can load the scalar constant with XXSPLTIDP, the floating point constant is easy. (easy_fp_constant_sfmode): New predicate. (easy_vector_constant): If we can generate XXSPLTIDP, mark the vector constant as easy. * config/rs6000/rs6000-protos.h (xxspltidp_constant_immediate): New declaration. (prefixed_xxsplti_p): Likewise. * config/rs6000/rs6000.c (xxspltidp_constant_immediate): New function. (output_vec_const_move): Add support for XXSPLTIDP. (prefixed_xxsplti_p): New function. * config/rs6000/rs6000.md (prefixed attribute): Add support for permute prefixed instructions. (movsf_hardfloat): Add XXSPLTIDP support. (mov_hardfloat32, FMOVE64 iterator): Likewise. (mov_hardfloat64, FMOVE64 iterator): Likewise. (movdi_internal32): Likewise. (movdi_internal64): Likewise. * config/rs6000/rs6000.opt (-mxxspltidp): New switch. * config/rs6000/vsx.md (vsx_move_64bit): Add XXSPLTIDP support. (vsx_move_32bit): Likewise. (XXSPLTIDP): New mode iterator. (xxspltidp__inst): Replace xxspltidp_v2df_inst with an iterated form that also does SFmode, DFmode, DImode, and V2DImode. (xxspltidp__internal): New insn and splits. * doc/md.texi (PowerPC and IBM RS6000 constraints): Document eF. gcc/testsuite/ * gcc.target/powerpc/vec-splat-constant-df.c: New test. * gcc.target/powerpc/vec-splat-constant-sf.c: New test. * gcc.target/powerpc/vec-splat-constant-v2df.c: New test. Diff: --- gcc/config/rs6000/constraints.md | 5 + gcc/config/rs6000/predicates.md | 110 +++++++++++++++++++++ gcc/config/rs6000/rs6000-protos.h | 2 + gcc/config/rs6000/rs6000.c | 94 ++++++++++++++++++ gcc/config/rs6000/rs6000.md | 61 ++++++++---- gcc/config/rs6000/rs6000.opt | 4 + gcc/config/rs6000/vsx.md | 43 +++++++- gcc/doc/md.texi | 3 + .../gcc.target/powerpc/vec-splat-constant-df.c | 60 +++++++++++ .../gcc.target/powerpc/vec-splat-constant-sf.c | 60 +++++++++++ .../gcc.target/powerpc/vec-splat-constant-v2df.c | 64 ++++++++++++ 11 files changed, 483 insertions(+), 23 deletions(-) diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md index c8cff1a3038..1f4f2f6ad01 100644 --- a/gcc/config/rs6000/constraints.md +++ b/gcc/config/rs6000/constraints.md @@ -208,6 +208,11 @@ (and (match_code "const_int") (match_test "((- (unsigned HOST_WIDE_INT) ival) + 0x8000) < 0x10000"))) +;; DI/SF/DF/V2DI/V2DF scalar or vector constant that can be loaded with XXSPLTIDP +(define_constraint "eF" + "A constant that can be loaded with the XXSPLTIDP instruction." + (match_operand 0 "easy_fp_constant_sfmode")) + ;; 34-bit signed integer constant (define_constraint "eI" "A signed 34-bit integer constant if prefixed instructions are supported." diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index 956e42bc514..105629c0344 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -601,6 +601,11 @@ if (TARGET_VSX && op == CONST0_RTX (mode)) return 1; + /* If we have the ISA 3.1 XXSPLTIDP instruction, see if the constant can + be loaded with that instruction. */ + if (easy_fp_constant_sfmode (op, mode)) + return 1; + /* Otherwise consider floating point constants hard, so that the constant gets pushed to memory during the early RTL phases. This has the advantage that double precision constants that can be @@ -609,6 +614,108 @@ return 0; }) +;; Return 1 if the operand is a constant that can be loaded via the XXSPLTIDP +;; instruction, which takes a SFmode value and produces a V2DFmode result. +;; This predicate matches also DImode constants that can be expressed as DFmode +;; values and vector constants produced either with CONST_VECTOR or +;; VEC_DUPLICATE. +(define_predicate "easy_fp_constant_sfmode" + (match_code "const_int,const_double,const_vector,vec_duplicate") +{ + /* Can we do the XXSPLTIDP instruction? */ + if (!TARGET_XXSPLTIDP || !TARGET_PREFIXED || !TARGET_VSX) + return false; + + if (mode == VOIDmode) + mode = GET_MODE (op); + + /* Handle vector constants and duplication. */ + rtx element = op; + if (mode == V2DFmode || mode == V2DImode) + { + if (CONST_VECTOR_P (op)) + { + element = CONST_VECTOR_ELT (op, 0); + if (!rtx_equal_p (element, CONST_VECTOR_ELT (op, 1))) + return false; + + mode = GET_MODE_INNER (mode); + } + + else if (GET_CODE (op) == VEC_DUPLICATE) + { + element = XEXP (op, 0); + mode = GET_MODE (element); + } + + else + return false; + } + + /* Don't return true for 0.0 or 0 since that is easy to create without + XXSPLTIDP. */ + if (element == CONST0_RTX (mode)) + return false; + + /* Handle DImode/V2DImode by creating a DF value from it. */ + const REAL_VALUE_TYPE *rv; + REAL_VALUE_TYPE rv_type; + + if (CONST_INT_P (element)) + { + HOST_WIDE_INT df_value = INTVAL (element); + long df_words[2]; + + /* Stay away from values that are DFmode NaNs or subnormal values. + The IEEE 754 64-bit floating format has 1 bit for sign, 11 bits for + the exponent, and 52 bits for the mantissa. NaN values have the + exponent set to all 1 bits. Subnormal numbers have the exponent + all 0 bits, and the mantissa non-zero. If the value is subnormal, + then the hidden bit in the mantissa is not set. */ + + int exponent = (df_value >> 52) & 0x7ff; + HOST_WIDE_INT mantissa = df_value & HOST_WIDE_INT_C (0x1fffffffffffff); + if (exponent == 0 && mantissa != 0) /* subnormal. */ + return false; + + if (exponent == 0x7ff) /* NaN. */ + return false; + + df_words[0] = (df_value >> 32) & 0xffffffff; + df_words[1] = df_value & 0xffffffff; + + /* real_from_target takes the target words in little endian order. */ + if (BYTES_BIG_ENDIAN) + std::swap (df_words[0], df_words[1]); + + real_from_target (&rv_type, df_words, DFmode); + rv = &rv_type; + } + + /* Handle SFmode/DFmode constants. */ + else if (CONST_DOUBLE_P (element) && (mode == SFmode || mode == DFmode)) + rv = CONST_DOUBLE_REAL_VALUE (element); + + else + return false; + + /* Validate that the number can be stored as a SFmode value. */ + if (!exact_real_truncate (SFmode, rv)) + return false; + + /* Validate that the number is not a SFmode subnormal value (exponent is 0, + mantissa field is non-zero) which is undefined for the XXSPLTIDP + instruction. */ + long sf_value; + real_to_target (&sf_value, rv, SFmode); + + if (((sf_value & 0x7F800000) == 0) && ((sf_value & 0x7FFFFF) != 0)) + return false; + + return true; +}) + + ;; Return 1 if the operand is a constant that can loaded with a XXSPLTIB ;; instruction and then a VUPKHSB, VECSB2W or VECSB2D instruction. @@ -653,6 +760,9 @@ if (zero_constant (op, mode) || all_ones_constant (op, mode)) return true; + if (easy_fp_constant_sfmode (op, mode)) + return true; + if (TARGET_P9_VECTOR && xxspltib_constant_p (op, mode, &num_insns, &value)) return true; diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index 14f6b313105..2b765cf9e7c 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -32,6 +32,7 @@ extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, int, int, int, extern int easy_altivec_constant (rtx, machine_mode); extern bool xxspltib_constant_p (rtx, machine_mode, int *, int *); +extern HOST_WIDE_INT xxspltidp_constant_immediate (rtx, machine_mode); extern int vspltis_shifted (rtx); extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int); extern bool macho_lo_sum_memory_operand (rtx, machine_mode); @@ -198,6 +199,7 @@ enum non_prefixed_form reg_to_non_prefixed (rtx reg, machine_mode mode); extern bool prefixed_load_p (rtx_insn *); extern bool prefixed_store_p (rtx_insn *); extern bool prefixed_paddi_p (rtx_insn *); +extern bool prefixed_xxsplti_p (rtx_insn *); extern void rs6000_asm_output_opcode (FILE *); extern void output_pcrel_opt_reloc (rtx); extern void rs6000_final_prescan_insn (rtx_insn *, rtx [], int); diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 030d41bb5e4..a325e753a14 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -6590,6 +6590,60 @@ xxspltib_constant_p (rtx op, return true; } +/* Return the immediate value used in the XXSPLTIDP instruction. */ + +HOST_WIDE_INT +xxspltidp_constant_immediate (rtx op, machine_mode mode) +{ + long ret; + + gcc_assert (easy_fp_constant_sfmode (op, mode)); + + /* Handle vectors. */ + if (CONST_VECTOR_P (op)) + { + op = CONST_VECTOR_ELT (op, 0); + mode = GET_MODE_INNER (mode); + } + + else if (GET_CODE (op) == VEC_DUPLICATE) + { + op = XEXP (op, 0); + mode = GET_MODE (op); + } + + /* Handle DImode/V2DImode by creating a DF value from it and then converting + the DFmode value to SFmode. */ + if (CONST_INT_P (op)) + { + HOST_WIDE_INT df_value = INTVAL (op); + long df_words[2]; + + df_words[0] = (df_value >> 32) & 0xffffffff; + df_words[1] = df_value & 0xffffffff; + + /* real_to_target takes input in little-endian fasion. */ + if (BYTES_BIG_ENDIAN) + std::swap (df_words[0], df_words[1]); + + REAL_VALUE_TYPE r; + real_from_target (&r, &df_words[0], DFmode); + real_to_target (&ret, &r, SFmode); + } + + /* For floating point constants, convert to SFmode. */ + else if (CONST_DOUBLE_P (op) && (mode == SFmode || mode == DFmode)) + { + const REAL_VALUE_TYPE *rv = CONST_DOUBLE_REAL_VALUE (op); + real_to_target (&ret, rv, SFmode); + } + + else + gcc_unreachable (); + + return ret; +} + const char * output_vec_const_move (rtx *operands) { @@ -6634,6 +6688,13 @@ output_vec_const_move (rtx *operands) gcc_unreachable (); } + if (easy_fp_constant_sfmode (vec, mode)) + { + long xxspltidp_value = xxspltidp_constant_immediate (vec, mode); + operands[2] = GEN_INT (xxspltidp_value); + return "xxspltidp %x0,%2"; + } + if (TARGET_P9_VECTOR && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value)) { @@ -26377,6 +26438,39 @@ prefixed_paddi_p (rtx_insn *insn) return (iform == INSN_FORM_PCREL_EXTERNAL || iform == INSN_FORM_PCREL_LOCAL); } +/* Whether a permute type instruction is a prefixed XXSPLTI* instruction. + This is called from the prefixed attribute processing. */ + +bool +prefixed_xxsplti_p (rtx_insn *insn) +{ + rtx set = single_set (insn); + if (!set) + return false; + + rtx dest = SET_DEST (set); + rtx src = SET_SRC (set); + machine_mode mode = GET_MODE (dest); + + if (!REG_P (dest) && !SUBREG_P (dest)) + return false; + + switch (mode) + { + case DImode: + case DFmode: + case SFmode: + case V2DImode: + case V2DFmode: + return easy_fp_constant_sfmode (src, mode); + + default: + break; + } + + return false; +} + /* Whether the next instruction needs a 'p' prefix issued before the instruction is printed out. */ static bool prepend_p_to_next_insn; diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 6bec2bddbde..ac7ff217e00 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -314,6 +314,11 @@ (eq_attr "type" "integer,add") (if_then_else (match_test "prefixed_paddi_p (insn)") + (const_string "yes") + (const_string "no")) + + (eq_attr "type" "vecperm") + (if_then_else (match_test "prefixed_xxsplti_p (insn)") (const_string "yes") (const_string "no"))] @@ -7759,17 +7764,17 @@ ;; ;; LWZ LFS LXSSP LXSSPX STFS STXSSP ;; STXSSPX STW XXLXOR LI FMR XSCPSGNDP -;; MR MT MF NOP +;; MR MT MF NOP XXSPLTIDP (define_insn "movsf_hardfloat" [(set (match_operand:SF 0 "nonimmediate_operand" "=!r, f, v, wa, m, wY, Z, m, wa, !r, f, wa, - !r, *c*l, !r, *h") + !r, *c*l, !r, *h, wa") (match_operand:SF 1 "input_operand" "m, m, wY, Z, f, v, wa, r, j, j, f, wa, - r, r, *h, 0"))] + r, r, *h, 0, eF"))] "(register_operand (operands[0], SFmode) || register_operand (operands[1], SFmode)) && TARGET_HARD_FLOAT @@ -7791,15 +7796,16 @@ mr %0,%1 mt%0 %1 mf%1 %0 - nop" + nop + #" [(set_attr "type" "load, fpload, fpload, fpload, fpstore, fpstore, fpstore, store, veclogical, integer, fpsimple, fpsimple, - *, mtjmpr, mfjmpr, *") + *, mtjmpr, mfjmpr, *, vecperm") (set_attr "isa" "*, *, p9v, p8v, *, p9v, p8v, *, *, *, *, *, - *, *, *, *")]) + *, *, *, *, p10")]) ;; LWZ LFIWZX STW STFIWX MTVSRWZ MFVSRWZ ;; FMR MR MT%0 MF%1 NOP @@ -8059,18 +8065,18 @@ ;; STFD LFD FMR LXSD STXSD ;; LXSD STXSD XXLOR XXLXOR GPR<-0 -;; LWZ STW MR +;; LWZ STW MR XXSPLTIDP (define_insn "*mov_hardfloat32" [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=m, d, d, , wY, , Z, , , !r, - Y, r, !r") + Y, r, !r, wa") (match_operand:FMOVE64 1 "input_operand" "d, m, d, wY, , Z, , , , , - r, Y, r"))] + r, Y, r, eF"))] "! TARGET_POWERPC64 && TARGET_HARD_FLOAT && (gpc_reg_operand (operands[0], mode) || gpc_reg_operand (operands[1], mode))" @@ -8087,20 +8093,21 @@ # # # + # #" [(set_attr "type" "fpstore, fpload, fpsimple, fpload, fpstore, fpload, fpstore, veclogical, veclogical, two, - store, load, two") + store, load, two, vecperm") (set_attr "size" "64") (set_attr "length" "*, *, *, *, *, *, *, *, *, 8, - 8, 8, 8") + 8, 8, 8, *") (set_attr "isa" "*, *, *, p9v, p9v, p7v, p7v, *, *, *, - *, *, *")]) + *, *, *, p10")]) ;; STW LWZ MR G-const H-const F-const @@ -8127,19 +8134,19 @@ ;; STFD LFD FMR LXSD STXSD ;; LXSDX STXSDX XXLOR XXLXOR LI 0 ;; STD LD MR MT{CTR,LR} MF{CTR,LR} -;; NOP MFVSRD MTVSRD +;; NOP MFVSRD MTVSRD XXSPLTIDP (define_insn "*mov_hardfloat64" [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=m, d, d, , wY, , Z, , , !r, YZ, r, !r, *c*l, !r, - *h, r, ") + *h, r, , wa") (match_operand:FMOVE64 1 "input_operand" "d, m, d, wY, , Z, , , , , r, YZ, r, r, *h, - 0, , r"))] + 0, , r, eF"))] "TARGET_POWERPC64 && TARGET_HARD_FLOAT && (gpc_reg_operand (operands[0], mode) || gpc_reg_operand (operands[1], mode))" @@ -8161,18 +8168,19 @@ mf%1 %0 nop mfvsrd %0,%x1 - mtvsrd %x0,%1" + mtvsrd %x0,%1 + #" [(set_attr "type" "fpstore, fpload, fpsimple, fpload, fpstore, fpload, fpstore, veclogical, veclogical, integer, store, load, *, mtjmpr, mfjmpr, - *, mfvsr, mtvsr") + *, mfvsr, mtvsr, vecperm") (set_attr "size" "64") (set_attr "isa" "*, *, *, p9v, p9v, p7v, p7v, *, *, *, *, *, *, *, *, - *, p8v, p8v")]) + *, p8v, p8v, p10")]) ;; STD LD MR MT MF G-const ;; H-const F-const Special @@ -8206,6 +8214,7 @@ (set_attr "length" "*, *, *, *, *, 8, 12, 16, *")]) + (define_expand "mov" [(set (match_operand:FMOVE128 0 "general_operand") @@ -9220,18 +9229,21 @@ ;; a gpr into a fpr instead of reloading an invalid 'Y' address ;; GPR store GPR load GPR move FPR store FPR load FPR move +;; XXSPLTIDP ;; GPR const AVX store AVX store AVX load AVX load VSX move ;; P9 0 P9 -1 AVX 0/-1 VSX 0 VSX -1 P9 const -;; AVX const +;; AVX const (define_insn "*movdi_internal32" [(set (match_operand:DI 0 "nonimmediate_operand" "=Y, r, r, m, ^d, ^d, + ^wa, r, wY, Z, ^v, $v, ^wa, wa, wa, v, wa, *i, v, v") (match_operand:DI 1 "input_operand" "r, Y, r, ^d, m, ^d, + eF, IJKnF, ^v, $v, wY, Z, ^wa, Oj, wM, OjwM, Oj, wM, wS, wB"))] @@ -9246,6 +9258,7 @@ lfd%U1%X1 %0,%1 fmr %0,%1 # + # stxsd %1,%0 stxsdx %x1,%y0 lxsd %0,%1 @@ -9260,17 +9273,20 @@ #" [(set_attr "type" "store, load, *, fpstore, fpload, fpsimple, + vecperm, *, fpstore, fpstore, fpload, fpload, veclogical, vecsimple, vecsimple, vecsimple, veclogical,veclogical,vecsimple, vecsimple") (set_attr "size" "64") (set_attr "length" "8, 8, 8, *, *, *, + *, 16, *, *, *, *, *, *, *, *, *, *, 8, *") (set_attr "isa" "*, *, *, *, *, *, + p10, *, p9v, p7v, p9v, p7v, *, p9v, p9v, p7v, *, *, p7v, p7v")]) @@ -9306,6 +9322,7 @@ }) ;; GPR store GPR load GPR move +;; XXSPLTIDP ;; GPR li GPR lis GPR pli GPR # ;; FPR store FPR load FPR move ;; AVX store AVX store AVX load AVX load VSX move @@ -9316,6 +9333,7 @@ (define_insn "*movdi_internal64" [(set (match_operand:DI 0 "nonimmediate_operand" "=YZ, r, r, + $wa, r, r, r, r, m, ^d, ^d, wY, Z, $v, $v, ^wa, @@ -9325,6 +9343,7 @@ ?r, ?wa") (match_operand:DI 1 "input_operand" "r, YZ, r, + eF, I, L, eI, nF, ^d, m, ^d, ^v, $v, wY, Z, ^wa, @@ -9339,6 +9358,7 @@ std%U0%X0 %1,%0 ld%U1%X1 %0,%1 mr %0,%1 + # li %0,%1 lis %0,%v1 li %0,%1 @@ -9365,6 +9385,7 @@ mtvsrd %x0,%1" [(set_attr "type" "store, load, *, + vecperm, *, *, *, *, fpstore, fpload, fpsimple, fpstore, fpstore, fpload, fpload, veclogical, @@ -9375,6 +9396,7 @@ (set_attr "size" "64") (set_attr "length" "*, *, *, + *, *, *, *, 20, *, *, *, *, *, *, *, *, @@ -9384,6 +9406,7 @@ *, *") (set_attr "isa" "*, *, *, + p10, *, *, p10, *, *, *, *, p9v, p7v, p9v, p7v, *, diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt index 3753de19557..3e096988a73 100644 --- a/gcc/config/rs6000/rs6000.opt +++ b/gcc/config/rs6000/rs6000.opt @@ -643,3 +643,7 @@ Enable instructions that guard against return-oriented programming attacks. mprivileged Target Var(rs6000_privileged) Init(0) Generate code that will run in privileged state. + +mxxspltidp +Target Undocumented Var(TARGET_XXSPLTIDP) Init(1) Save +Generate (do not generate) XXSPLTIDP instructions. diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index bf033e31c1c..3172c4cbb69 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -1191,16 +1191,19 @@ ;; instruction). But generate XXLXOR/XXLORC if it will avoid a register move. ;; VSX store VSX load VSX move VSX->GPR GPR->VSX LQ (GPR) +;; XXSPLTIDP ;; STQ (GPR) GPR load GPR store GPR move XXSPLTIB VSPLTISW ;; VSX 0/-1 VMX const GPR const LVX (VMX) STVX (VMX) (define_insn "vsx_mov_64bit" [(set (match_operand:VSX_M 0 "nonimmediate_operand" "=ZwO, wa, wa, r, we, ?wQ, + wa, ?&r, ??r, ??Y, , wa, v, ?wa, v, , wZ, v") (match_operand:VSX_M 1 "input_operand" "wa, ZwO, wa, we, r, r, + eF, wQ, Y, r, r, wE, jwM, ?jwM, W, , v, wZ"))] @@ -1212,36 +1215,44 @@ } [(set_attr "type" "vecstore, vecload, vecsimple, mtvsr, mfvsr, load, + vecperm, store, load, store, *, vecsimple, vecsimple, vecsimple, *, *, vecstore, vecload") (set_attr "num_insns" "*, *, *, 2, *, 2, + *, 2, 2, 2, 2, *, *, *, 5, 2, *, *") (set_attr "max_prefixed_insns" "*, *, *, *, *, 2, + *, 2, 2, 2, 2, *, *, *, *, *, *, *") (set_attr "length" "*, *, *, 8, *, 8, + *, 8, 8, 8, 8, *, *, *, 20, 8, *, *") (set_attr "isa" ", , , *, *, *, + p10, *, *, *, *, p9v, *, , *, *, *, *")]) ;; VSX store VSX load VSX move GPR load GPR store GPR move +;; XXSPLTIDP ;; XXSPLTIB VSPLTISW VSX 0/-1 VMX const GPR const ;; LVX (VMX) STVX (VMX) (define_insn "*vsx_mov_32bit" [(set (match_operand:VSX_M 0 "nonimmediate_operand" "=ZwO, wa, wa, ??r, ??Y, , + wa, wa, v, ?wa, v, , wZ, v") (match_operand:VSX_M 1 "input_operand" "wa, ZwO, wa, Y, r, r, + eF, wE, jwM, ?jwM, W, , v, wZ"))] @@ -1253,14 +1264,17 @@ } [(set_attr "type" "vecstore, vecload, vecsimple, load, store, *, + vecperm, vecsimple, vecsimple, vecsimple, *, *, vecstore, vecload") (set_attr "length" "*, *, *, 16, 16, 16, + *, *, *, *, 20, 16, *, *") (set_attr "isa" ", , , *, *, *, + p10, p9v, *, , *, *, *, *")]) @@ -6449,15 +6463,36 @@ DONE; }) -(define_insn "xxspltidp_v2df_inst" - [(set (match_operand:V2DF 0 "register_operand" "=wa") - (unspec:V2DF [(match_operand:SI 1 "c32bit_cint_operand" "n")] - UNSPEC_XXSPLTIDP))] +(define_mode_iterator XXSPLTIDP [DI SF DF V2DF V2DI]) + +(define_insn "xxspltidp__inst" + [(set (match_operand:XXSPLTIDP 0 "register_operand" "=wa") + (unspec:XXSPLTIDP [(match_operand:SI 1 "c32bit_cint_operand" "n")] + UNSPEC_XXSPLTIDP))] "TARGET_POWER10" "xxspltidp %x0,%1" [(set_attr "type" "vecperm") (set_attr "prefixed" "yes")]) +;; Generate the XXSPLTIDP instruction to support SFmode, DFmode, and DImode +;; scalar constants and V2DF and V2DI vector constants where both elements are +;; the same. The constant has to be expressible as a SFmode constant that is +;; not a SFmode denormal value. +(define_insn_and_split "*xxspltidp__internal" + [(set (match_operand:XXSPLTIDP 0 "vsx_register_operand" "=wa") + (match_operand:XXSPLTIDP 1 "easy_fp_constant_sfmode" "eF"))] + "TARGET_POWER10" + "#" + "&& 1" + [(set (match_dup 0) + (unspec:XXSPLTIDP [(match_dup 2)] UNSPEC_XXSPLTIDP))] +{ + long immediate = xxspltidp_constant_immediate (operands[1], mode); + operands[2] = GEN_INT (immediate); +} + [(set_attr "type" "vecperm") + (set_attr "prefixed" "yes")]) + ;; XXSPLTI32DX built-in function support (define_expand "xxsplti32dx_v4si" [(set (match_operand:V4SI 0 "register_operand" "=wa") diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi index 2b41cb7fb7b..370a46ed5fc 100644 --- a/gcc/doc/md.texi +++ b/gcc/doc/md.texi @@ -3333,6 +3333,9 @@ The integer constant zero. A constant whose negation is a signed 16-bit constant. @end ifset +@item eF +A constant that can be loaded with the XXSPLTIDP instruction. + @item eI A signed 34-bit integer constant if prefixed instructions are supported. diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-df.c b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-df.c new file mode 100644 index 00000000000..8f6e176f9af --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-df.c @@ -0,0 +1,60 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ + +#include + +/* Test generating DFmode constants with the ISA 3.1 (power10) XXSPLTIDP + instruction. */ + +double +scalar_double_0 (void) +{ + return 0.0; /* XXSPLTIB or XXLXOR. */ +} + +double +scalar_double_1 (void) +{ + return 1.0; /* XXSPLTIDP. */ +} + +#ifndef __FAST_MATH__ +double +scalar_double_m0 (void) +{ + return -0.0; /* XXSPLTIDP. */ +} + +double +scalar_double_nan (void) +{ + return __builtin_nan (""); /* XXSPLTIDP. */ +} + +double +scalar_double_inf (void) +{ + return __builtin_inf (); /* XXSPLTIDP. */ +} + +double +scalar_double_m_inf (void) /* XXSPLTIDP. */ +{ + return - __builtin_inf (); +} +#endif + +double +scalar_double_pi (void) +{ + return M_PI; /* PLFD. */ +} + +double +scalar_double_denorm (void) +{ + return 0x1p-149f; /* PLFD. */ +} + +/* { dg-final { scan-assembler-times {\mxxspltidp\M} 5 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-sf.c b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-sf.c new file mode 100644 index 00000000000..72504bdfbbd --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-sf.c @@ -0,0 +1,60 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ + +#include + +/* Test generating SFmode constants with the ISA 3.1 (power10) XXSPLTIDP + instruction. */ + +float +scalar_float_0 (void) +{ + return 0.0f; /* XXSPLTIB or XXLXOR. */ +} + +float +scalar_float_1 (void) +{ + return 1.0f; /* XXSPLTIDP. */ +} + +#ifndef __FAST_MATH__ +float +scalar_float_m0 (void) +{ + return -0.0f; /* XXSPLTIDP. */ +} + +float +scalar_float_nan (void) +{ + return __builtin_nanf (""); /* XXSPLTIDP. */ +} + +float +scalar_float_inf (void) +{ + return __builtin_inff (); /* XXSPLTIDP. */ +} + +float +scalar_float_m_inf (void) /* XXSPLTIDP. */ +{ + return - __builtin_inff (); +} +#endif + +float +scalar_float_pi (void) +{ + return (float)M_PI; /* XXSPLTIDP. */ +} + +float +scalar_float_denorm (void) +{ + return 0x1p-149f; /* PLFS. */ +} + +/* { dg-final { scan-assembler-times {\mxxspltidp\M} 6 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2df.c b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2df.c new file mode 100644 index 00000000000..82ffc86f8aa --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2df.c @@ -0,0 +1,64 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ + +#include + +/* Test generating V2DFmode constants with the ISA 3.1 (power10) XXSPLTIDP + instruction. */ + +vector double +v2df_double_0 (void) +{ + return (vector double) { 0.0, 0.0 }; /* XXSPLTIB or XXLXOR. */ +} + +vector double +v2df_double_1 (void) +{ + return (vector double) { 1.0, 1.0 }; /* XXSPLTIDP. */ +} + +#ifndef __FAST_MATH__ +vector double +v2df_double_m0 (void) +{ + return (vector double) { -0.0, -0.0 }; /* XXSPLTIDP. */ +} + +vector double +v2df_double_nan (void) +{ + return (vector double) { __builtin_nan (""), + __builtin_nan ("") }; /* XXSPLTIDP. */ +} + +vector double +v2df_double_inf (void) +{ + return (vector double) { __builtin_inf (), + __builtin_inf () }; /* XXSPLTIDP. */ +} + +vector double +v2df_double_m_inf (void) +{ + return (vector double) { - __builtin_inf (), + - __builtin_inf () }; /* XXSPLTIDP. */ +} +#endif + +vector double +v2df_double_pi (void) +{ + return (vector double) { M_PI, M_PI }; /* PLVX. */ +} + +vector double +v2df_double_denorm (void) +{ + return (vector double) { (double)0x1p-149f, + (double)0x1p-149f }; /* PLVX. */ +} + +/* { dg-final { scan-assembler-times {\mxxspltidp\M} 5 } } */