From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 46759 invoked by alias); 23 Jul 2018 10:57:53 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Received: (qmail 46673 invoked by uid 89); 23 Jul 2018 10:57:52 -0000 Authentication-Results: sourceware.org; auth=none X-Spam-SWARE-Status: No, score=-24.4 required=5.0 tests=AWL,BAYES_00,GIT_PATCH_0,GIT_PATCH_1,GIT_PATCH_2,GIT_PATCH_3,KAM_LAZY_DOMAIN_SECURITY autolearn=ham version=3.3.2 spammy=HOW, TYPE, Delete, clobber X-HELO: gcc1-power7.osuosl.org Received: from gcc1-power7.osuosl.org (HELO gcc1-power7.osuosl.org) (140.211.15.137) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Mon, 23 Jul 2018 10:57:50 +0000 Received: by gcc1-power7.osuosl.org (Postfix, from userid 10019) id 38B3D12406E4; Mon, 23 Jul 2018 10:57:49 +0000 (UTC) From: Segher Boessenkool To: gcc-patches@gcc.gnu.org Cc: dje.gcc@gmail.com, Segher Boessenkool Subject: [PATCH 2/2] rs6000: Improve vsx_init_v4si Date: Mon, 23 Jul 2018 10:57:00 -0000 Message-Id: <43eb708a2cb8055176e9ae639a132550ef844a54.1532342360.git.segher@kernel.crashing.org> In-Reply-To: <3ec69c5ba2ad0e4f031cabf6e9601c6363d7ef06.1532342359.git.segher@kernel.crashing.org> References: <3ec69c5ba2ad0e4f031cabf6e9601c6363d7ef06.1532342359.git.segher@kernel.crashing.org> X-IsSubscribed: yes X-SW-Source: 2018-07/txt/msg01249.txt.bz2 This changes vsx_init_v4si to be an expander. That way, no special cases are needed anymore for special arguments: the normal RTL passes can deal with it. Tested as usual; committing. Segher 2018-07-23 Segher Boessenkool * config/rs6000/rs6000-p8swap.c (rtx_is_swappable_p): Adjust. * config/rs6000/rs6000-protos.h (rs6000_split_v4si_init): Delete. * config/rs6000/rs6000.c (rs6000_expand_vector_init): Always force the elements into a register. (rs6000_split_v4si_init_di_reg): Delete. (rs6000_split_v4si_init): Delete. * config/rs6000/vsx.md (unspec): Delete UNSPEC_VSX_VEC_INIT. (vsx_init_v4si): Rewrite as a define_expand. --- gcc/config/rs6000/rs6000-p8swap.c | 1 - gcc/config/rs6000/rs6000-protos.h | 1 - gcc/config/rs6000/rs6000.c | 92 +-------------------------------------- gcc/config/rs6000/vsx.md | 49 ++++++++++++++------- 4 files changed, 34 insertions(+), 109 deletions(-) diff --git a/gcc/config/rs6000/rs6000-p8swap.c b/gcc/config/rs6000/rs6000-p8swap.c index 071bc0c..f32db38 100644 --- a/gcc/config/rs6000/rs6000-p8swap.c +++ b/gcc/config/rs6000/rs6000-p8swap.c @@ -772,7 +772,6 @@ rtx_is_swappable_p (rtx op, unsigned int *special) case UNSPEC_VSX_EXTRACT: case UNSPEC_VSX_SET: case UNSPEC_VSX_SLDWI: - case UNSPEC_VSX_VEC_INIT: case UNSPEC_VSX_VSLO: case UNSPEC_VUNPACK_HI_SIGN: case UNSPEC_VUNPACK_HI_SIGN_DIRECT: diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index d548d80..9dec245 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -61,7 +61,6 @@ extern void rs6000_expand_vector_set (rtx, rtx, int); extern void rs6000_expand_vector_extract (rtx, rtx, rtx); extern void rs6000_split_vec_extract_var (rtx, rtx, rtx, rtx, rtx); extern rtx rs6000_adjust_vec_address (rtx, rtx, rtx, rtx, machine_mode); -extern void rs6000_split_v4si_init (rtx []); extern void altivec_expand_vec_perm_le (rtx op[4]); extern void rs6000_expand_extract_even (rtx, rtx, rtx); extern void rs6000_expand_interleave (rtx, rtx, rtx, bool); diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 62b8ea3..8f65a9f 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -6867,11 +6867,7 @@ rs6000_expand_vector_init (rtx target, rtx vals) size_t i; for (i = 0; i < 4; i++) - { - elements[i] = XVECEXP (vals, 0, i); - if (!CONST_INT_P (elements[i]) && !REG_P (elements[i])) - elements[i] = copy_to_mode_reg (SImode, elements[i]); - } + elements[i] = force_reg (SImode, XVECEXP (vals, 0, i)); emit_insn (gen_vsx_init_v4si (target, elements[0], elements[1], elements[2], elements[3])); @@ -7578,92 +7574,6 @@ rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr, gcc_unreachable (); } -/* Helper function for rs6000_split_v4si_init to build up a DImode value from - two SImode values. */ - -static void -rs6000_split_v4si_init_di_reg (rtx dest, rtx si1, rtx si2, rtx tmp) -{ - const unsigned HOST_WIDE_INT mask_32bit = HOST_WIDE_INT_C (0xffffffff); - - if (CONST_INT_P (si1) && CONST_INT_P (si2)) - { - unsigned HOST_WIDE_INT const1 = (UINTVAL (si1) & mask_32bit) << 32; - unsigned HOST_WIDE_INT const2 = UINTVAL (si2) & mask_32bit; - - emit_move_insn (dest, GEN_INT (const1 | const2)); - return; - } - - /* Put si1 into upper 32-bits of dest. */ - if (CONST_INT_P (si1)) - emit_move_insn (dest, GEN_INT ((UINTVAL (si1) & mask_32bit) << 32)); - else - { - /* Generate RLDIC. */ - rtx si1_di = gen_rtx_REG (DImode, regno_or_subregno (si1)); - rtx shift_rtx = gen_rtx_ASHIFT (DImode, si1_di, GEN_INT (32)); - rtx mask_rtx = GEN_INT (mask_32bit << 32); - rtx and_rtx = gen_rtx_AND (DImode, shift_rtx, mask_rtx); - gcc_assert (!reg_overlap_mentioned_p (dest, si1)); - emit_insn (gen_rtx_SET (dest, and_rtx)); - } - - /* Put si2 into the temporary. */ - gcc_assert (!reg_overlap_mentioned_p (dest, tmp)); - if (CONST_INT_P (si2)) - emit_move_insn (tmp, GEN_INT (UINTVAL (si2) & mask_32bit)); - else - emit_insn (gen_zero_extendsidi2 (tmp, si2)); - - /* Combine the two parts. */ - emit_insn (gen_iordi3 (dest, dest, tmp)); - return; -} - -/* Split a V4SI initialization. */ - -void -rs6000_split_v4si_init (rtx operands[]) -{ - rtx dest = operands[0]; - - /* Destination is a GPR, build up the two DImode parts in place. */ - if (REG_P (dest) || SUBREG_P (dest)) - { - int d_regno = regno_or_subregno (dest); - rtx scalar1 = operands[1]; - rtx scalar2 = operands[2]; - rtx scalar3 = operands[3]; - rtx scalar4 = operands[4]; - rtx tmp1 = operands[5]; - rtx tmp2 = operands[6]; - - /* Even though we only need one temporary (plus the destination, which - has an early clobber constraint, try to use two temporaries, one for - each double word created. That way the 2nd insn scheduling pass can - rearrange things so the two parts are done in parallel. */ - if (BYTES_BIG_ENDIAN) - { - rtx di_lo = gen_rtx_REG (DImode, d_regno); - rtx di_hi = gen_rtx_REG (DImode, d_regno + 1); - rs6000_split_v4si_init_di_reg (di_lo, scalar1, scalar2, tmp1); - rs6000_split_v4si_init_di_reg (di_hi, scalar3, scalar4, tmp2); - } - else - { - rtx di_lo = gen_rtx_REG (DImode, d_regno + 1); - rtx di_hi = gen_rtx_REG (DImode, d_regno); - rs6000_split_v4si_init_di_reg (di_lo, scalar4, scalar3, tmp1); - rs6000_split_v4si_init_di_reg (di_hi, scalar2, scalar1, tmp2); - } - return; - } - - else - gcc_unreachable (); -} - /* Return alignment of TYPE. Existing alignment is ALIGN. HOW selects whether the alignment is abi mandated, optional, or both abi and optional alignment. */ diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index 432aa1e..de2fa78 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -388,7 +388,6 @@ (define_c_enum "unspec" UNSPEC_VSX_VXSIG UNSPEC_VSX_VIEXP UNSPEC_VSX_VTSTDC - UNSPEC_VSX_VEC_INIT UNSPEC_VSX_VSIGNED2 UNSPEC_LXVL @@ -2946,23 +2945,41 @@ (define_insn "vsx_concat_v2sf" } [(set_attr "type" "vecperm")]) -;; V4SImode initialization splitter -(define_insn_and_split "vsx_init_v4si" - [(set (match_operand:V4SI 0 "gpc_reg_operand" "=&r") - (unspec:V4SI - [(match_operand:SI 1 "reg_or_cint_operand" "rn") - (match_operand:SI 2 "reg_or_cint_operand" "rn") - (match_operand:SI 3 "reg_or_cint_operand" "rn") - (match_operand:SI 4 "reg_or_cint_operand" "rn")] - UNSPEC_VSX_VEC_INIT)) - (clobber (match_scratch:DI 5 "=&r")) - (clobber (match_scratch:DI 6 "=&r"))] +;; Concatenate 4 SImode elements into a V4SImode reg. +(define_expand "vsx_init_v4si" + [(use (match_operand:V4SI 0 "gpc_reg_operand")) + (use (match_operand:SI 1 "gpc_reg_operand")) + (use (match_operand:SI 2 "gpc_reg_operand")) + (use (match_operand:SI 3 "gpc_reg_operand")) + (use (match_operand:SI 4 "gpc_reg_operand"))] "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT" - "#" - "&& reload_completed" - [(const_int 0)] { - rs6000_split_v4si_init (operands); + rtx a = gen_reg_rtx (DImode); + rtx b = gen_reg_rtx (DImode); + rtx c = gen_reg_rtx (DImode); + rtx d = gen_reg_rtx (DImode); + emit_insn (gen_zero_extendsidi2 (a, operands[1])); + emit_insn (gen_zero_extendsidi2 (b, operands[2])); + emit_insn (gen_zero_extendsidi2 (c, operands[3])); + emit_insn (gen_zero_extendsidi2 (d, operands[4])); + if (!BYTES_BIG_ENDIAN) + { + std::swap (a, b); + std::swap (c, d); + } + + rtx aa = gen_reg_rtx (DImode); + rtx ab = gen_reg_rtx (DImode); + rtx cc = gen_reg_rtx (DImode); + rtx cd = gen_reg_rtx (DImode); + emit_insn (gen_ashldi3 (aa, a, GEN_INT (32))); + emit_insn (gen_ashldi3 (cc, c, GEN_INT (32))); + emit_insn (gen_iordi3 (ab, aa, b)); + emit_insn (gen_iordi3 (cd, cc, d)); + + rtx abcd = gen_reg_rtx (V2DImode); + emit_insn (gen_vsx_concat_v2di (abcd, ab, cd)); + emit_move_insn (operands[0], gen_lowpart (V4SImode, abcd)); DONE; }) -- 1.8.3.1