From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1005) id AE332394D826; Thu, 15 Apr 2021 17:48:48 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org AE332394D826 Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: Michael Meissner To: gcc-cvs@gcc.gnu.org Subject: [gcc(refs/users/meissner/heads/work048)] Add XXSPLTIW support. X-Act-Checkin: gcc X-Git-Author: Michael Meissner X-Git-Refname: refs/users/meissner/heads/work048 X-Git-Oldrev: 0105079f182ea12d8217ac498ec1408d8be786a5 X-Git-Newrev: ad6b0b39b3ffc5f9b21888dcc427a12fafa468c2 Message-Id: <20210415174848.AE332394D826@sourceware.org> Date: Thu, 15 Apr 2021 17:48:48 +0000 (GMT) X-BeenThere: gcc-cvs@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-cvs mailing list List-Unsubscribe: , List-Archive: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 15 Apr 2021 17:48:48 -0000 https://gcc.gnu.org/g:ad6b0b39b3ffc5f9b21888dcc427a12fafa468c2 commit ad6b0b39b3ffc5f9b21888dcc427a12fafa468c2 Author: Michael Meissner Date: Thu Apr 15 13:48:29 2021 -0400 Add XXSPLTIW support. This patch adds support to automatically generate the ISA 3.1 XXSPLTIW instruction for V8HImode, V4SImode, and V4SFmode vectors. It does this by adding support for vector constants that can be used, and adding a VEC_DUPLICATE pattern to generate the actual XXSPLTIW instruction. I rewrote the XXSPLTW built-in functions to use VEC_DUPLICATE instead of UNSPEC. Because the XXSPLTIW instruction can set any VSX register, I moved the insns from altivec.md to vsx.md. gcc/ 2021-04-15 Michael Meissner * config/rs6000/altivec.md (UNSPEC_XXSPLTIW): Delete. (xxspltiw_v4si): Move to vsx.md and rewrite. (xxspltiw_v4sf): Move to vsx.md and rewrite. (xxspltiw_v4sf_inst): Delete. * config/rs6000/predicates.md (xxspltiw_operand): New predicate. (easy_vector_constant): If we can use XXSPLTIW, the vector constant is easy. * config/rs6000/rs6000-cpus.def (ISA_3_1_MASKS_SERVER): Add -mxxspltiw support. (POWERPC_MASKS): Add -mxxspltiw support. * config/rs6000/rs6000.c (rs6000_option_override_internal): Add -mxxspltiw support. (xxspltib_constant_p): If we can generate XXSPLTIW, don't generate a XXSPLTIB and an extend instruction. (output_vec_const_move): Add support for XXSPLTIW vector constants. (rs6000_opt_masks): Add -mxxspltiw. * config/rs6000/rs6000.opt (-mxxspltiw): New debug switch. * config/rs6000/vsx.md (xxspltiw_v8hi): New insn. (xxspltiw_v4si): Move from altivec.md and reimplement to use VEC_DUPLICATE. (xxspltiw_v4sf): Move from altivec.md and reimplement to use VEC_DUPLICATE. (XXSPLTIW): New mode iterator. (XXSPLTIW splitter): New insn splitter for XXSPLTIW. Diff: --- gcc/config/rs6000/altivec.md | 30 ----------------- gcc/config/rs6000/predicates.md | 29 +++++++++++++++++ gcc/config/rs6000/rs6000-cpus.def | 7 ++-- gcc/config/rs6000/rs6000.c | 18 +++++++++-- gcc/config/rs6000/rs6000.opt | 4 +++ gcc/config/rs6000/vsx.md | 68 +++++++++++++++++++++++++++++++++++++++ 6 files changed, 120 insertions(+), 36 deletions(-) diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index 1351dafbc41..708296cb14d 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -176,7 +176,6 @@ UNSPEC_VSTRIL UNSPEC_SLDB UNSPEC_SRDB - UNSPEC_XXSPLTIW UNSPEC_XXSPLTID UNSPEC_XXSPLTI32DX UNSPEC_XXBLEND @@ -820,35 +819,6 @@ "vsdbi %0,%1,%2,%3" [(set_attr "type" "vecsimple")]) -(define_insn "xxspltiw_v4si" - [(set (match_operand:V4SI 0 "register_operand" "=wa") - (unspec:V4SI [(match_operand:SI 1 "s32bit_cint_operand" "n")] - UNSPEC_XXSPLTIW))] - "TARGET_POWER10" - "xxspltiw %x0,%1" - [(set_attr "type" "vecsimple") - (set_attr "prefixed" "yes")]) - -(define_expand "xxspltiw_v4sf" - [(set (match_operand:V4SF 0 "register_operand" "=wa") - (unspec:V4SF [(match_operand:SF 1 "const_double_operand" "n")] - UNSPEC_XXSPLTIW))] - "TARGET_POWER10" -{ - long long value = rs6000_const_f32_to_i32 (operands[1]); - emit_insn (gen_xxspltiw_v4sf_inst (operands[0], GEN_INT (value))); - DONE; -}) - -(define_insn "xxspltiw_v4sf_inst" - [(set (match_operand:V4SF 0 "register_operand" "=wa") - (unspec:V4SF [(match_operand:SI 1 "c32bit_cint_operand" "n")] - UNSPEC_XXSPLTIW))] - "TARGET_POWER10" - "xxspltiw %x0,%1" - [(set_attr "type" "vecsimple") - (set_attr "prefixed" "yes")]) - (define_expand "xxspltidp_v2df" [(set (match_operand:V2DF 0 "register_operand" ) (unspec:V2DF [(match_operand:SF 1 "const_double_operand")] diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index e21bc745f72..bf678f429af 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -640,6 +640,32 @@ return num_insns == 1; }) +;; Return 1 if the operand is a CONST_VECTOR that can be loaded with the +;; XXSPLTIW instruction. Do not return 1 if the constant can be generated with +;; XXSPLTIB or VSPLTIS{H,W} +(define_predicate "xxspltiw_operand" + (match_code "const_vector") +{ + if (!TARGET_XXSPLTIW) + return false; + + if (mode != V8HImode && mode != V4SImode && mode != V4SFmode) + return false; + + rtx element = CONST_VECTOR_ELT (op, 0); + for (size_t i = 1; i < GET_MODE_NUNITS (mode); i++) + if (!rtx_equal_p (element, CONST_VECTOR_ELT (op, i))) + return false; + + if (element == CONST0_RTX (GET_MODE_INNER (mode))) + return false; + + if (CONST_INT_P (element) && EASY_VECTOR_15 (INTVAL (element))) + return false; + + return true; +}) + ;; Return 1 if the operand is a CONST_VECTOR and can be loaded into a ;; vector register without using memory. (define_predicate "easy_vector_constant" @@ -653,6 +679,9 @@ if (zero_constant (op, mode) || all_ones_constant (op, mode)) return true; + if (xxspltiw_operand (op, mode)) + return true; + if (TARGET_P9_VECTOR && xxspltib_constant_p (op, mode, &num_insns, &value)) return true; diff --git a/gcc/config/rs6000/rs6000-cpus.def b/gcc/config/rs6000/rs6000-cpus.def index cbbb42c1b3a..a21a95bc7aa 100644 --- a/gcc/config/rs6000/rs6000-cpus.def +++ b/gcc/config/rs6000/rs6000-cpus.def @@ -85,7 +85,8 @@ | OTHER_POWER10_MASKS \ | OPTION_MASK_P10_FUSION \ | OPTION_MASK_P10_FUSION_LD_CMPI \ - | OPTION_MASK_P10_FUSION_2LOGICAL) + | OPTION_MASK_P10_FUSION_2LOGICAL \ + | OPTION_MASK_XXSPLTIW) /* Flags that need to be turned off if -mno-power9-vector. */ #define OTHER_P9_VECTOR_MASKS (OPTION_MASK_FLOAT128_HW \ @@ -160,8 +161,8 @@ | OPTION_MASK_RECIP_PRECISION \ | OPTION_MASK_SOFT_FLOAT \ | OPTION_MASK_STRICT_ALIGN_OPTIONAL \ - | OPTION_MASK_VSX) - + | OPTION_MASK_VSX \ + | OPTION_MASK_XXSPLTIW) #endif /* This table occasionally claims that a processor does not support a diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 28dbc507c5e..23f0925674c 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -4479,6 +4479,12 @@ rs6000_option_override_internal (bool global_init_p) if (!TARGET_PCREL && TARGET_PCREL_OPT) rs6000_isa_flags &= ~OPTION_MASK_PCREL_OPT; + if (TARGET_POWER10 && TARGET_VSX + && (rs6000_isa_flags_explicit & OPTION_MASK_XXSPLTIW) == 0) + rs6000_isa_flags |= OPTION_MASK_XXSPLTIW; + else if (!TARGET_POWER10 || !TARGET_VSX) + rs6000_isa_flags &= ~OPTION_MASK_XXSPLTIW; + if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags); @@ -6448,9 +6454,11 @@ xxspltib_constant_p (rtx op, /* See if we could generate vspltisw/vspltish directly instead of xxspltib + sign extend. Special case 0/-1 to allow getting any VSX register instead - of an Altivec register. */ - if ((mode == V4SImode || mode == V8HImode) && !IN_RANGE (value, -1, 0) - && EASY_VECTOR_15 (value)) + of an Altivec register. Also if we can generate a XXSPLTIW instruction, + don't emit a XXSPLTIB and an extend instruction. */ + if ((mode == V4SImode || mode == V8HImode) + && !IN_RANGE (value, -1, 0) + && (EASY_VECTOR_15 (value) || TARGET_XXSPLTIW)) return false; /* Return # of instructions and the constant byte for XXSPLTIB. */ @@ -6511,6 +6519,9 @@ output_vec_const_move (rtx *operands) gcc_unreachable (); } + if (xxspltiw_operand (vec, mode)) + return "#"; + if (TARGET_P9_VECTOR && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value)) { @@ -24008,6 +24019,7 @@ static struct rs6000_opt_mask const rs6000_opt_masks[] = { "string", 0, false, true }, { "update", OPTION_MASK_NO_UPDATE, true , true }, { "vsx", OPTION_MASK_VSX, false, true }, + { "xxspltiw", OPTION_MASK_XXSPLTIW, false, true }, #ifdef OPTION_MASK_64BIT #if TARGET_AIX_OS { "aix64", OPTION_MASK_64BIT, false, false }, diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt index 0dbdf753673..b01ebd78c7f 100644 --- a/gcc/config/rs6000/rs6000.opt +++ b/gcc/config/rs6000/rs6000.opt @@ -619,3 +619,7 @@ Generate (do not generate) MMA instructions. mrelative-jumptables Target Undocumented Var(rs6000_relative_jumptables) Init(1) Save + +mxxspltiw +Target Undocumented Mask(XXSPLTIW) Var(rs6000_isa_flags) +Generate (do not generate) XXSPLTIW instructions. diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index bcb92be2f5c..9bad4da1e34 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -6216,3 +6216,71 @@ "TARGET_POWER10" "vmulld %0,%1,%2" [(set_attr "type" "veccomplex")]) + + +;; XXSPLTIW support +(define_insn "*xxspltiw_v8hi" + [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa,wa,v,wa") + (vec_duplicate:V8HI + (match_operand:HI 1 "const_int_operand" "O,wM,wB,n")))] + "TARGET_XXSPLTIW" +{ + HOST_WIDE_INT uns_value = INTVAL (operands[1]) & 0xffff; + HOST_WIDE_INT sign_value = (uns_value ^ 0x8000) - 0x8000; + + if (sign_value == 0) + return "xxspltib %x0,0"; + + if (sign_value == -1) + return "xxspltib %x0,255"; + + int r = reg_or_subregno (operands[0]); + if (ALTIVEC_REGNO_P (r) && EASY_VECTOR_15 (sign_value)) + return "vspltish %0,%1"; + + operands[2] = GEN_INT ((uns_value << 16) | uns_value); + return "xxspltiw %0,%2"; +} + [(set_attr "type" "vecperm") + (set_attr "prefixed" "*,*,*,yes")]) + +(define_insn "xxspltiw_v4si" + [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,wa,v,wa") + (vec_duplicate:V4SI + (match_operand:SI 1 "s32bit_cint_operand" "O,wM,wB,n")))] + "TARGET_XXSPLTIW" + "@ + xxspltib %x0,0 + xxspltib %x0,255 + vspltisw %0,%1 + xxspltiw %x0,%1" + [(set_attr "type" "vecperm") + (set_attr "prefixed" "*,*,*,yes")]) + +(define_insn "xxspltiw_v4sf" + [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa") + (vec_duplicate:V4SF + (match_operand:SF 1 "const_double_operand" "O,F")))] + "TARGET_XXSPLTIW" +{ + if (operands[1] == CONST0_RTX (SFmode)) + return "xxspltib %x0,0"; + + operands[2] = GEN_INT (rs6000_const_f32_to_i32 (operands[1])); + return "xxspltiw %x0,%2"; +} + [(set_attr "type" "vecsimple") + (set_attr "prefixed" "*,yes")]) + +(define_mode_iterator XXSPLTIW [V8HI V4SI V4SF]) + +(define_split + [(set (match_operand:XXSPLTIW 0 "vsx_register_operand") + (match_operand:XXSPLTIW 1 "xxspltiw_operand"))] + "TARGET_XXSPLTIW" + [(set (match_dup 0) + (vec_duplicate: (match_dup 2)))] +{ + operands[2] = CONST_VECTOR_ELT (operands[1], 0); +}) +