From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1005) id AC8DB3858CDB; Fri, 24 Mar 2023 03:46:03 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org AC8DB3858CDB DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1679629563; bh=Ux4jfUFDvIn5AmCekJO/h/iqxs2gnNGcbuhR/FQam7U=; h=From:To:Subject:Date:From; b=ntoEArr20vBTmRHSwLwyka7YtGLP/4yvvAid98tcE5jJe+apbX7Q0Kjg3ppJ6cjL7 bWpecng1Lqpg0GRk5YQmQF9yMU4Zj3E/p25DrNE9xb2Rnff/WVSQBMKqaKZ9Pf+7u1 uiMNTBp14qYueTRdvOH6s3UpDnLah71U7xrk7vh0= Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: Michael Meissner To: gcc-cvs@gcc.gnu.org Subject: [gcc(refs/users/meissner/heads/work115)] Improve 64->128 bit zero extension on PowerPC X-Act-Checkin: gcc X-Git-Author: Michael Meissner X-Git-Refname: refs/users/meissner/heads/work115 X-Git-Oldrev: c4529eab1c2c3b1cd4f4d9f22fe52aae00997e42 X-Git-Newrev: 12ed4fed8234fcb25d2e77c21cf7671562d59cdf Message-Id: <20230324034603.AC8DB3858CDB@sourceware.org> Date: Fri, 24 Mar 2023 03:46:03 +0000 (GMT) List-Id: https://gcc.gnu.org/g:12ed4fed8234fcb25d2e77c21cf7671562d59cdf commit 12ed4fed8234fcb25d2e77c21cf7671562d59cdf Author: Michael Meissner Date: Thu Mar 23 23:45:18 2023 -0400 Improve 64->128 bit zero extension on PowerPC 2023-03-23 Michael Meissner gcc/ PR target/108958 * gcc/config/rs6000.md (zero_extendditi2): New insn. gcc/testsuite/ PR target/108958 * gcc.target/powerpc/zero-extend-di-ti.c: New test. Diff: --- gcc/config/rs6000/rs6000.md | 52 ++++++++++++++++++ .../gcc.target/powerpc/zero-extend-di-ti.c | 62 ++++++++++++++++++++++ 2 files changed, 114 insertions(+) diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index d836a8a58b3..c6ab6179306 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -986,6 +986,58 @@ (set_attr "dot" "yes") (set_attr "length" "4,8")]) +(define_insn_and_split "zero_extendditi2" + [(set (match_operand:TI 0 "gpc_reg_operand" "=r,r,wa,wa,wa") + (zero_extend:TI + (match_operand:DI 1 "reg_or_mem_operand" "r,m,b,Z,wa"))) + (clobber (match_scratch:DI 2 "=&X,X,X,X,wa"))] + "TARGET_POWERPC64 && TARGET_P9_VECTOR" + "@ + # + # + mtvsrdd %x0,0,%1 + lxvrdx %x0,%y1 + #" + "&& reload_completed + && (int_reg_operand (operands[0], TImode) + || (vsx_register_operand (operands[0], TImode) + && vsx_register_operand (operands[1], DImode)))" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 3) (const_int 0))] +{ + rtx dest = operands[0]; + rtx src = operands[1]; + + /* If we are converting a VSX DImode to VSX TImode, we need to move the upper + 64-bits (DImode) to the lower 64-bits. We can't just do a xxpermdi + instruction to swap the two 64-bit words, because can't rely on the bottom + 64-bits of the VSX register being 0. Instead we create a 0 and do the + xxpermdi operation to combine the two registers. */ + if (vsx_register_operand (dest, TImode) + && vsx_register_operand (src, DImode)) + { + rtx tmp = operands[2]; + emit_move_insn (tmp, const0_rtx); + + rtx hi = tmp; + rtx lo = src; + if (!BYTES_BIG_ENDIAN) + std::swap (hi, lo); + + rtx dest_v2di = gen_rtx_REG (V2DImode, reg_or_subregno (dest)); + emit_insn (gen_vsx_concat_v2di (dest_v2di, hi, lo)); + DONE; + } + + /* If we are zero extending to a GPR register either from a GPR register, + a VSX register or from memory, do the zero extend operation to the + lower DI register, and set the upper DI register to 0. */ + operands[2] = gen_lowpart (DImode, dest); + operands[3] = gen_highpart (DImode, dest); +} + [(set_attr "type" "*,load,vecexts,vecload,vecperm") + (set_attr "isa" "*,*,p9v,p10,*") + (set_attr "length" "8,8,*,*,8")]) (define_insn "extendqi2" [(set (match_operand:EXTQI 0 "gpc_reg_operand" "=r,?*v") diff --git a/gcc/testsuite/gcc.target/powerpc/zero-extend-di-ti.c b/gcc/testsuite/gcc.target/powerpc/zero-extend-di-ti.c new file mode 100644 index 00000000000..9b3b9c4dbd0 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/zero-extend-di-ti.c @@ -0,0 +1,62 @@ +/* { dg-require-effective-target int128 } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ + +/* This patch makes sure the various optimization and code paths are done for + zero extending DImode to TImode on power10. */ + +__uint128_t +gpr_to_gpr (unsigned long long a) +{ + /* li 4,0. */ + return a; +} + +__uint128_t +mem_to_gpr (unsigned long long *p) +{ + /* ld 3,0(3); li 4,0. */ + return *p; +} + +__uint128_t +vsx_to_gpr (__uint128_t *p, double d) +{ + /* fctiduz 1,1; li 4,0;mfvsrd 3,1. */ + return (unsigned long long)d; +} + +void +gpr_to_vsx (__uint128_t *p, unsigned long long a) +{ + /* mtvsrdd 0,0,4; stxv 0,0(3). */ + __uint128_t b = a; + __asm__ (" # %x0" : "+wa" (b)); + *p = b; +} + +void +mem_to_vsx (__uint128_t *p, unsigned long long *q) +{ + /* lxvrdx 0,0,4; stxv 0,0(3). */ + __uint128_t a = *q; + __asm__ (" # %x0" : "+wa" (a)); + *p = a; +} + +void +vsx_to_vsx (__uint128_t *p, double d) +{ + /* fctiduz 1,1; xxspltib 0,0; xxpermdi 0,0,1,0; stxv 0,0(3). */ + __uint128_t a = (unsigned long long)d; + __asm__ (" # %x0" : "+wa" (a)); + *p = a; +} + +/* { dg-final { scan-assembler-times {\mli\M} 3 } } */ +/* { dg-final { scan-assembler-times {\mld\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mlxvrdx\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mmfvsrd\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mmtvsrdd\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mstxv\M} 3 } } */ +/* { dg-final { scan-assembler-times {\mxxpermdi\M} 1 } } */