From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1005) id 433A9385E010; Tue, 15 Mar 2022 21:15:02 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 433A9385E010 Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: Michael Meissner To: gcc-cvs@gcc.gnu.org Subject: [gcc(refs/users/meissner/heads/work081)] Optimize extendditi2 GPR to VSX register. X-Act-Checkin: gcc X-Git-Author: Michael Meissner X-Git-Refname: refs/users/meissner/heads/work081 X-Git-Oldrev: 46f8c79bbeb107bcf7a35eab158360891e291fcc X-Git-Newrev: fad5716a6579578052024626445b6a645a1c8a76 Message-Id: <20220315211502.433A9385E010@sourceware.org> Date: Tue, 15 Mar 2022 21:15:02 +0000 (GMT) X-BeenThere: gcc-cvs@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-cvs mailing list List-Unsubscribe: , List-Archive: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 15 Mar 2022 21:15:02 -0000 https://gcc.gnu.org/g:fad5716a6579578052024626445b6a645a1c8a76 commit fad5716a6579578052024626445b6a645a1c8a76 Author: Michael Meissner Date: Tue Mar 15 17:14:42 2022 -0400 Optimize extendditi2 GPR to VSX register. 2022-03-15 Michael Meissner gcc/ * config/rs6000/vsx.md (extendditi2): Optimize extendditi2 GPR to VSX register. gcc/testsuite/ * gcc.target/powerpc/pr104698-2.c: Update instruction counts. Diff: --- gcc/config/rs6000/vsx.md | 84 ++++++++++++++------------- gcc/testsuite/gcc.target/powerpc/pr104698-2.c | 34 +++++++---- 2 files changed, 68 insertions(+), 50 deletions(-) diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index 25ef1702f49..2fab6843c5f 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -5035,11 +5035,12 @@ ;; We also need the GPR code for power9 so that we can optimize to use the ;; multiply-add instructions. (define_insn_and_split "extendditi2" - [(set (match_operand:TI 0 "register_operand" "=r,r,wa,v,v") - (sign_extend:TI (match_operand:DI 1 "input_operand" "r,m,b, v,Z"))) - (clobber (match_scratch:DI 2 "=&X,X,r, X,X")) + [(set (match_operand:TI 0 "register_operand" "=r, wa, v, r, v") + (sign_extend:TI + (match_operand:DI 1 "input_operand" "r, b, v, m, Z"))) + (clobber (match_scratch:DI 2 "=X, &b, X, X, X")) (clobber (reg:DI CA_REGNO))] - "TARGET_POWERPC64 && TARGET_MADDLD" + "TARGET_POWERPC64 && TARGET_P9_VECTOR" "#" "&& reload_completed" [(pc)] @@ -5047,27 +5048,10 @@ rtx dest = operands[0]; rtx src = operands[1]; int dest_regno = reg_or_subregno (dest); - int src_regno = ((REG_P (src) || SUBREG_P (src)) - ? reg_or_subregno (src) - : -1); - - /* If we are converting from a GPR to a vector register, do the sign - extension in a scratch GPR register, and then do the mtvsrdd. */ - if (VSX_REGNO_P (dest_regno) && INT_REGNO_P (src_regno)) - { - rtx tmp = operands[2]; - rtx dest_v2di = gen_rtx_REG (V2DImode, dest_regno); - emit_insn (gen_ashrdi3 (tmp, src, GEN_INT (63))); - if (BYTES_BIG_ENDIAN) - emit_insn (gen_vsx_concat_v2di (dest_v2di, tmp, src)); - else - emit_insn (gen_vsx_concat_v2di (dest_v2di, src, tmp)); - DONE; - } /* Handle conversion to GPR registers. Load up the low part and then do a sign extension to the upper part. */ - else if (INT_REGNO_P (dest_regno)) + if (INT_REGNO_P (dest_regno)) { rtx dest_hi = gen_highpart (DImode, dest); rtx dest_lo = gen_lowpart (DImode, dest); @@ -5075,36 +5059,58 @@ emit_move_insn (dest_lo, src); /* In case src is a MEM, we have to use the destination, which is a register, instead of re-using the source. */ - rtx src2 = (REG_P (src) || SUBREG_P (src)) ? src : dest_lo; + rtx src2 = int_reg_operand (src, DImode) ? src : dest_lo; emit_insn (gen_ashrdi3 (dest_hi, src2, GEN_INT (63))); DONE; } - /* For conversion to an Altivec register, generate either a splat operation - or a load rightmost double word instruction. Both instructions gets the - DImode value into the lower 64 bits, and then do the vextsd2q - instruction. */ - - else if (ALTIVEC_REGNO_P (dest_regno)) + /* For memory, use lxvrdx to load the value into the bottom of the + register and do the sign extension. */ + else if (ALTIVEC_REGNO_P (dest_regno) && MEM_P (src)) { - if (MEM_P (src)) - emit_insn (gen_vsx_lxvrdx (dest, src)); - else + emit_insn (gen_vsx_lxvrdx (dest, src)); + emit_insn (gen_extendditi2_vector (dest, dest)); + DONE; + } + + else + { + int src_regno = reg_or_subregno (src); + + /* If we are converting from a GPR to a vector register, do the + sign extension in a scratch GPR register, and then do the + mtvsrdd. Note, the vsx_concat_v2di function will reverse + the arguments on little endian systems, so we need to build + the insn so the appropriate registers are generated. */ + if (VSX_REGNO_P (dest_regno) && INT_REGNO_P (src_regno)) + { + rtx tmp = operands[2]; + rtx dest_v2di = gen_rtx_REG (V2DImode, dest_regno); + emit_insn (gen_ashrdi3 (tmp, src, GEN_INT (63))); + if (BYTES_BIG_ENDIAN) + emit_insn (gen_vsx_concat_v2di (dest_v2di, tmp, src)); + else + emit_insn (gen_vsx_concat_v2di (dest_v2di, src, tmp)); + DONE; + } + + /* For conversion to an Altivec register, generate a splat operation to + to get the value in the bottom 64-bits. */ + else if (ALTIVEC_REGNO_P (dest_regno) && ALTIVEC_REGNO_P (src_regno)) { rtx dest_v2di = gen_rtx_REG (V2DImode, dest_regno); emit_insn (gen_vsx_splat_v2di (dest_v2di, src)); + emit_insn (gen_extendditi2_vector (dest, dest)); + DONE; } - emit_insn (gen_extendditi2_vector (dest, dest)); - DONE; + else + gcc_unreachable (); } - - else - gcc_unreachable (); } [(set_attr "length" "8") - (set_attr "type" "shift,load,mtvsr,vecperm,load") - (set_attr "isa" "p9,p9,p9,p10,p10")]) + (set_attr "type" "shift,mtvsr,vecperm,load,vecload") + (set_attr "isa" "*, *, p10, *, p10")]) ;; Sign extend 64-bit value in TI reg, word 1, to 128-bit value in TI reg (define_insn "extendditi2_vector" diff --git a/gcc/testsuite/gcc.target/powerpc/pr104698-2.c b/gcc/testsuite/gcc.target/powerpc/pr104698-2.c index 6966fce2ba9..10132d10e8e 100644 --- a/gcc/testsuite/gcc.target/powerpc/pr104698-2.c +++ b/gcc/testsuite/gcc.target/powerpc/pr104698-2.c @@ -3,31 +3,43 @@ /* { dg-options "-mdejagnu-cpu=power10 -O2" } */ /* PR target/104694 involved GCC generating vextsd2q to convent long long to - __int128_t when the long long value was in the GPR register. This test - verifies that if the result is in the Altivec registers, we still want to - generate vextsd2q. We use __int128_t to indicate that we want the result of - the conversion to be in an Altivec register. */ + __int128_t when the long long value was in the GPR register. + + This test verifies that the code we want when converting a GPR to a vector + register, that we generate the sign extend in the GPR register, and then do + a mtvsrdd to get it into the vector register. */ void do_div_1 (__int128_t *p, __int128_t *q, long long r) { - *p = *q / r; /* mtvsrdd, vextsd2q, vdivsq. */ + *p = *q / r; /* sradi, mtvsrdd, vdivsq. */ } -/* Test the optimization in vsx.md to use lxvrdx instead of ld and mtvsrdd if - the value is coming from memory. */ +/* Test the optimization in vsx.md to use lxvrdx instead of ld and then do + vextsd2q to sign extend it. */ void do_div_2 (__int128_t *p, __int128_t *q, long long *r) { *p = *q / r[2]; /* lxvrdx, vextsd2q, vdivsq. */ } +/* If the 64-bit integer is in a vector register (after conversion from + double), don't move it back to a GPR register to do the sign extension. + Instead use vextsd2q. */ + +void do_div_3 (__int128_t *p, __int128_t *q, double r) +{ + *p = *q / (long long)r; /* xscvdpsxds, xxpermdi, vextsd2q, vdivsq. */ +} + + /* { dg-final { scan-assembler-not {\mld\M} } } */ /* { dg-final { scan-assembler-not {\mmfvsrd\M} } } */ /* { dg-final { scan-assembler-not {\mmfvsrld\M} } } */ -/* { dg-final { scan-assembler-not {\msradi\M} } } */ -/* { dg-final { scan-assembler-times {\mlxv\M} 2 } } */ +/* { dg-final { scan-assembler-times {\msradi\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mlxv\M} 3 } } */ /* { dg-final { scan-assembler-times {\mlxvrdx\M} 1 } } */ /* { dg-final { scan-assembler-times {\mmtvsrdd\M} 1 } } */ -/* { dg-final { scan-assembler-times {\mstxv\M} 2 } } */ -/* { dg-final { scan-assembler-times {\mvdivsq\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mstxv\M} 3 } } */ +/* { dg-final { scan-assembler-times {\mvdivsq\M} 3 } } */ /* { dg-final { scan-assembler-times {\mvextsd2q\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mxxpermdi\M} 1 } } */