From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1005) id 00FAF3858CDB; Sat, 29 Apr 2023 04:07:01 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 00FAF3858CDB DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1682741221; bh=XCh65oaVlNzE6SS2cOvjR5Jc4DV1mM1NnRfKSNJcpGM=; h=From:To:Subject:Date:From; b=L1XoTrFu3BpoM/SdLV425dUJa2b8FVN5j+NRCPCejhTmG/UgQKiQumtn5S8DqpYKZ 2lRcrbqSo4Lr0g2kmyd7z1tkcgeBEvSHCBy95b+dO2zRV8016P6RPhF11y2n1lwFOO Rc3ZBzG2LsD8sHddXF64XT4Kw5/4qfsxJ/2pL2MQ= Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: Michael Meissner To: gcc-cvs@gcc.gnu.org Subject: [gcc(refs/users/meissner/heads/work119)] Optimize variable element vec_extract to be converted to floating point X-Act-Checkin: gcc X-Git-Author: Michael Meissner X-Git-Refname: refs/users/meissner/heads/work119 X-Git-Oldrev: 7c7742d904f6d629bba309a7e854d28d3d0d0d30 X-Git-Newrev: f0f4835b6845fd3d207afd124d3defd25428a67d Message-Id: <20230429040701.00FAF3858CDB@sourceware.org> Date: Sat, 29 Apr 2023 04:07:01 +0000 (GMT) List-Id: https://gcc.gnu.org/g:f0f4835b6845fd3d207afd124d3defd25428a67d commit f0f4835b6845fd3d207afd124d3defd25428a67d Author: Michael Meissner Date: Sat Apr 29 00:06:39 2023 -0400 Optimize variable element vec_extract to be converted to floating point This patch optimizes vec_extract with a variable element number of the following types to be converted to floating point by loading the value directly to the vector register, and then doing the conversion instead of loading the value to a GPR and then doing a direct move: vector int vector unsigned int vector unsigned short vector unsigned char 2023-04-28 Michael Meissner gcc/ * config/rs6000/vsx.md (vsx_extract_v4si_var_load_to_): New * insn. * vsx_extract__var_load_to_uns: New insn. gcc/testsuite/ * gcc.target/powerpc/vec-extract-mem-int-6.c: New file. * gcc.target/powerpc/vec-extract-mem-int_7.c: New file. Diff: --- gcc/config/rs6000/vsx.md | 52 ++++++++++++++++++++++ .../gcc.target/powerpc/vec-extract-mem-int-6.c | 29 ++++++++++++ .../gcc.target/powerpc/vec-extract-mem-int-7.c | 29 ++++++++++++ 3 files changed, 110 insertions(+) diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index 559a7034367..d156c9bd90b 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -4295,6 +4295,58 @@ } [(set_attr "type" "load")]) +;; Fold extracting a V4SI element with a variable element with either sign or +;; zero extension to SFmode or DFmode into LFIWAX/LFIWZX and FCFID. +(define_insn_and_split "*vsx_extract_v4si_var_load_to_" + [(set (match_operand:SFDF 0 "register_operand" "=wa") + (any_float:SFDF + (unspec:SI + [(match_operand:V4SI 1 "memory_operand" "Q") + (match_operand:DI 2 "register_operand" "r")] + UNSPEC_VSX_EXTRACT))) + (clobber (match_scratch:DI 3 "=&b")) + (clobber (match_scratch:DI 4 "=wa"))] + "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT" + "#" + "&& reload_completed" + [(set (match_dup 4) + (:DI (match_dup 5))) + (set (match_dup 0) + (float:SFDF (match_dup 4)))] +{ + operands[5] = rs6000_adjust_vec_address (operands[0], operands[1], + operands[2], operands[3], + SImode); +} + [(set_attr "type" "fpload") + (set_attr "length" "8")]) + +;; Fold extracting a V8HI/V16QI element with a variable element with zero +;; extension to SFmode or DFmode into LXSIBZX/LXSIHZX and FCFID +(define_insn_and_split "*vsx_extract__var_load_to_uns" + [(set (match_operand:SFDF 0 "register_operand" "=wa") + (unsigned_float:SFDF + (unspec: + [(match_operand:VSX_EXTRACT_I2 1 "memory_operand" "Q") + (match_operand:DI 2 "register_operand" "r")] + UNSPEC_VSX_EXTRACT))) + (clobber (match_scratch:DI 3 "=&b")) + (clobber (match_scratch:DI 4 "=v"))] + "VECTOR_MEM_VSX_P (mode) && TARGET_P9_VECTOR" + "#" + "&& reload_completed" + [(set (match_dup 4) + (zero_extend:DI (match_dup 5))) + (set (match_dup 0) + (float:SFDF (match_dup 4)))] +{ + operands[5] = rs6000_adjust_vec_address (operands[0], operands[1], + operands[2], operands[3], + mode); +} + [(set_attr "type" "fpload") + (set_attr "length" "8")]) + ;; ISA 3.1 extract (define_expand "vextractl" [(set (match_operand:V2DI 0 "altivec_register_operand") diff --git a/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-int-6.c b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-int-6.c new file mode 100644 index 00000000000..e08a3587eb2 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-int-6.c @@ -0,0 +1,29 @@ +/* { dg-do compile { target lp64 } } */ +/* { dg-require-effective-target powerpc_p8vector_ok } */ +/* { dg-options "-mdejagnu-cpu=power8 -O2" } */ + +/* Test to verify that the vec_extract with variable element numbers can load + SImode and convert it to unsigned floating point, by loading the value + directly to a vector register, rather than loading up a GPR and transfering + the result to a vector register. */ + +#include +#include + +double +extract_dbl_uns_v4si_n (vector unsigned int *p, size_t n) +{ + return vec_extract (*p, n); /* lfiwzx/lxsiwzx, fcfid/xscvsxddp. */ +} + +float +extract_flt_uns_v4si_element_n_index_4 (vector unsigned int *p, size_t n) +{ + return vec_extract (p[4], n); /* lfiwzx/lxsiwzx, fcfids/xscvsxdsp. */ +} + +/* { dg-final { scan-assembler-times {\mlfiwzx\M|\mlxsiwzx\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mfcfid\M|\mxscvsxddp\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mfcfids\M|\mxscvsxdsp\M} 1 } } */ +/* { dg-final { scan-assembler-not {\mlw[az]x?\M} } } */ +/* { dg-final { scan-assembler-not {\mmtvsr} } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-int-7.c b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-int-7.c new file mode 100644 index 00000000000..ddba763a395 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-int-7.c @@ -0,0 +1,29 @@ +/* { dg-do compile { target lp64 } } */ +/* { dg-require-effective-target powerpc_p8vector_ok } */ +/* { dg-options "-mdejagnu-cpu=power8 -O2" } */ + +/* Test to verify that the vec_extract with variable element numbers can load + SImode and convert it to signed floating point, by loading the value + directly to a vector register, rather than loading up a GPR and transfering + the result to a vector register. */ + +#include +#include + +double +extract_dbl_sign_v4si_n (vector int *p, size_t n) +{ + return vec_extract (*p, n); /* lfiwzx/lxsiwzx, fcfid/xscvsxddp. */ +} + +float +extract_flt_sign_v4si_element_n_index_4 (vector int *p, size_t n) +{ + return vec_extract (p[4], n); /* lfiwzx/lxsiwzx, fcfids/xscvsxdsp. */ +} + +/* { dg-final { scan-assembler-times {\mlfiwax\M|\mlxsiwax\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mfcfid\M|\mxscvsxddp\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mfcfids\M|\mxscvsxdsp\M} 1 } } */ +/* { dg-final { scan-assembler-not {\mlw[az]x?\M} } } */ +/* { dg-final { scan-assembler-not {\mmtvsr} } } */