From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1005) id 1D5003858D33; Wed, 19 Apr 2023 20:53:29 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 1D5003858D33 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1681937609; bh=Xwf7LBKds+AOPrkg9Y3Q5seFLTqN4Zx+c7M9agqONI4=; h=From:To:Subject:Date:From; b=mLjjv5XZgSQoYRCH0s2W8eC+GKLj1od+ptc7BA1QATWgu/xf/b7ebkTMShJld8hWE qgoD0Dde+pejAUZDjakqChOeTg1xzAC1tYWWAkdnVqBtY17HLzlSMt4r9K1fYCWaN7 wVMKzk6qowPD7uVnNYWuQs77d0ZlLwKRPS9DiNIY= Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: Michael Meissner To: gcc-cvs@gcc.gnu.org Subject: [gcc(refs/users/meissner/heads/work119)] Fold conversion to float into V4SI vsx_extract from memory. X-Act-Checkin: gcc X-Git-Author: Michael Meissner X-Git-Refname: refs/users/meissner/heads/work119 X-Git-Oldrev: cf36f3bd3617da5e088e10129dfe9deaf679fb2d X-Git-Newrev: c33c0f8ce847479281360086f7f5f4482dae3a45 Message-Id: <20230419205329.1D5003858D33@sourceware.org> Date: Wed, 19 Apr 2023 20:53:29 +0000 (GMT) List-Id: https://gcc.gnu.org/g:c33c0f8ce847479281360086f7f5f4482dae3a45 commit c33c0f8ce847479281360086f7f5f4482dae3a45 Author: Michael Meissner Date: Wed Apr 19 16:52:27 2023 -0400 Fold conversion to float into V4SI vsx_extract from memory. This patch folds conversion to floating point of vsx_extract from memory of V4SI elements where the element number is constant. This code optimizes things so it will load the integer with LFIWAX or LFIWZX directly into a vector register rather than loading it into a GPR and doing a direct move operation. 2023-04-18 Michael Meissner gcc/ * config/rs6000/vsx.md (FL_CONSTRAINT): New mode attribute. (SIGN_ZERO_EXTEND): New mode attribute. (vsx_extract_v4si_load_to_): New insn. gcc/testsuite/ * gcc.target/powerpc/vec-extract-mem-int-2.c: New file. * gcc.target/powerpc/vec-extract-mem-int-3.c: New file. Diff: --- gcc/config/rs6000/vsx.md | 40 ++++++++++++++++++++++ .../gcc.target/powerpc/vec-extract-mem-int-2.c | 38 ++++++++++++++++++++ .../gcc.target/powerpc/vec-extract-mem-int-3.c | 26 ++++++++++++++ 3 files changed, 104 insertions(+) diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index 9be15b02af2..9c486a0cc79 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -251,6 +251,17 @@ (TF "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (TFmode)")]) +;; Constraint to use for floating point types that a direct conversion +;; from 64-bit integer to floating point. +(define_mode_attr FL_CONSTRAINT [(SF "wa") + (DF "wa") + (KF "v") + (TF "v")]) + +;; Whether to use SIGN or ZERO when depending on the floating point conversion. +(define_code_attr SIGN_ZERO_EXTEND [(float "SIGN_EXTEND") + (unsigned_float "ZERO_EXTEND")]) + ;; Iterator for the 2 short vector types to do a splat from an integer (define_mode_iterator VSX_SPLAT_I [V16QI V8HI]) @@ -4051,6 +4062,35 @@ (set_attr "length" "4,4,12,8,12") (set_attr "isa" "*,*,*,p9v,p9v")]) +;; Extract a V4SI element from memory with constant element number and convert +;; it to SFmode, DFmode, KFmode, or possibly TFmode using either signed or +;; unsigned conversion. +(define_insn_and_split "*vsx_extract_v4si_load_to_" + [(set (match_operand:FL_CONV 0 "register_operand" "=,") + (any_float:FL_CONV + (vec_select:SI + (match_operand:V4SI 1 "memory_operand" "m,m") + (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n")])))) + (clobber (match_scratch:DI 3 "=&b,&b")) + (clobber (match_scratch:DI 4 "=f,v"))] + "VECTOR_MEM_VSX_P (V4SImode) && TARGET_POWERPC64" + "#" + "&& reload_completed" + [(set (match_dup 4) + (match_dup 5)) + (set (match_dup 0) + (any_float:FL_CONV (match_dup 4)))] +{ + if (GET_CODE (operands[4]) == SCRATCH) + operands[4] = gen_reg_rtx (DImode); + + rtx new_mem = rs6000_adjust_vec_address (operands[4], operands[1], operands[2], + operands[3], SImode); + operands[5] = gen_rtx_ (DImode, new_mem); +} + [(set_attr "type" "fpload") + (set_attr "length" "12") + (set_attr "isa" "*,p8v")]) ;; Variable V16QI/V8HI/V4SI extract from a register (define_insn_and_split "vsx_extract__var" diff --git a/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-int-2.c b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-int-2.c new file mode 100644 index 00000000000..86077a060a1 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-int-2.c @@ -0,0 +1,38 @@ +/* { dg-do compile { target lp64 } } */ +/* { dg-require-effective-target powerpc_p8vector_ok } */ +/* { dg-options "-O2 -mdejagnu-cpu=power8" } */ + +/* Test to verify that the vec_extract with constant element numbers can load + SImode and convert the value to float, and double by loading the value + directly into a vector register, and not loading up the GPRs first. */ + +#include + +float +extract_float_sign_v4si_0 (vector int *p) +{ + return vec_extract (*p, 0); /* lfiwax or lxsiwax. */ +} + +double +extract_double_sign_v4si_1 (vector int *p) +{ + return vec_extract (*p, 1); /* lfiwax or lxsiwax. */ +} + +double +extract_double_uns_v4si_0 (vector unsigned int *p) +{ + return vec_extract (*p, 0); /* lfiwzx or lxsiwzx. */ +} + +double +extract_double_uns_v4si_3 (vector unsigned int *p) +{ + return vec_extract (*p, 3); /* lfiwzx or lxsiwzx. */ +} + +/* { dg-final { scan-assembler-times {\mlfiwax\M|\mlxsiwax\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mlfiwzx\M|\mlxsiwzx\M} 2 } } */ +/* { dg-final { scan-assembler-not {\mlw[az]\M} } } */ +/* { dg-final { scan-assembler-not {\mmtvsrw[sz]\M} } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-int-3.c b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-int-3.c new file mode 100644 index 00000000000..9e46caa8277 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-int-3.c @@ -0,0 +1,26 @@ +/* { dg-do compile { target lp64 } } */ +/* { dg-require-effective-target powerpc_p9vector_ok } */ +/* { dg-require-effective-target float128_hw } */ +/* { dg-options "-O2 -mdejagnu-cpu=power9" } */ + +/* Test to verify that the vec_extract with constant element numbers can load + SImode and convert the value _Float128 by loading the value directly into a + vector register, and not loading up the GPRs first. */ + +#include + +_Float128 +extract_ieee_uns_v4si_0 (vector unsigned int *p) +{ + return vec_extract (*p, 0); /* lxsiwzx. */ +} + +_Float128 +extract_ieee_uns_v4si_3 (vector unsigned int *p) +{ + return vec_extract (*p, 3); /* lxsiwzx. */ +} + +/* { dg-final { scan-assembler-times {\mlfiwzx\M|\mlxsiwzx\M} 2 } } */ +/* { dg-final { scan-assembler-not {\mlw[az]\M} } } */ +/* { dg-final { scan-assembler-not {\mmtvsrw[sz]\M} } } */