From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <meissner@sourceware.org>
Received: by sourceware.org (Postfix, from userid 1005)
	id 1D5003858D33; Wed, 19 Apr 2023 20:53:29 +0000 (GMT)
DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 1D5003858D33
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org;
	s=default; t=1681937609;
	bh=Xwf7LBKds+AOPrkg9Y3Q5seFLTqN4Zx+c7M9agqONI4=;
	h=From:To:Subject:Date:From;
	b=mLjjv5XZgSQoYRCH0s2W8eC+GKLj1od+ptc7BA1QATWgu/xf/b7ebkTMShJld8hWE
	 qgoD0Dde+pejAUZDjakqChOeTg1xzAC1tYWWAkdnVqBtY17HLzlSMt4r9K1fYCWaN7
	 wVMKzk6qowPD7uVnNYWuQs77d0ZlLwKRPS9DiNIY=
Content-Type: text/plain; charset="us-ascii"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
From: Michael Meissner <meissner@gcc.gnu.org>
To: gcc-cvs@gcc.gnu.org
Subject: [gcc(refs/users/meissner/heads/work119)] Fold conversion to float
 into V4SI vsx_extract from memory.
X-Act-Checkin: gcc
X-Git-Author: Michael Meissner <meissner@linux.ibm.com>
X-Git-Refname: refs/users/meissner/heads/work119
X-Git-Oldrev: cf36f3bd3617da5e088e10129dfe9deaf679fb2d
X-Git-Newrev: c33c0f8ce847479281360086f7f5f4482dae3a45
Message-Id: <20230419205329.1D5003858D33@sourceware.org>
Date: Wed, 19 Apr 2023 20:53:29 +0000 (GMT)
List-Id: <gcc-cvs.sourceware.org>

https://gcc.gnu.org/g:c33c0f8ce847479281360086f7f5f4482dae3a45

commit c33c0f8ce847479281360086f7f5f4482dae3a45
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Wed Apr 19 16:52:27 2023 -0400

    Fold conversion to float into V4SI vsx_extract from memory.
    
    This patch folds conversion to floating point of vsx_extract from memory of V4SI
    elements where the element number is constant.  This code optimizes things so it
    will load the integer with LFIWAX or LFIWZX directly into a vector register
    rather than loading it into a GPR and doing a direct move operation.
    
    2023-04-18   Michael Meissner  <meissner@linux.ibm.com>
    
    gcc/
    
            * config/rs6000/vsx.md (FL_CONSTRAINT): New mode attribute.
            (SIGN_ZERO_EXTEND): New mode attribute.
            (vsx_extract_v4si_load_to_<uns><mode>): New insn.
    
    gcc/testsuite/
    
            * gcc.target/powerpc/vec-extract-mem-int-2.c: New file.
            * gcc.target/powerpc/vec-extract-mem-int-3.c: New file.

Diff:
---
 gcc/config/rs6000/vsx.md                           | 40 ++++++++++++++++++++++
 .../gcc.target/powerpc/vec-extract-mem-int-2.c     | 38 ++++++++++++++++++++
 .../gcc.target/powerpc/vec-extract-mem-int-3.c     | 26 ++++++++++++++
 3 files changed, 104 insertions(+)
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 9be15b02af2..9c486a0cc79 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -251,6 +251,17 @@
 			       (TF "TARGET_FLOAT128_HW
 				    && FLOAT128_IEEE_P (TFmode)")])
 
+;; Constraint to use for floating point types that a direct conversion
+;; from 64-bit integer to floating point.
+(define_mode_attr FL_CONSTRAINT [(SF "wa")
+				 (DF "wa")
+				 (KF "v")
+				 (TF "v")])
+
+;; Whether to use SIGN or ZERO when depending on the floating point conversion.
+(define_code_attr SIGN_ZERO_EXTEND [(float          "SIGN_EXTEND")
+				    (unsigned_float "ZERO_EXTEND")])
+
 ;; Iterator for the 2 short vector types to do a splat from an integer
 (define_mode_iterator VSX_SPLAT_I [V16QI V8HI])
 
@@ -4051,6 +4062,35 @@
    (set_attr "length" "4,4,12,8,12")
    (set_attr "isa" "*,*,*,p9v,p9v")])
 
+;; Extract a V4SI element from memory with constant element number and convert
+;; it to SFmode, DFmode, KFmode, or possibly TFmode using either signed or
+;; unsigned conversion.
+(define_insn_and_split "*vsx_extract_v4si_load_to_<uns><mode>"
+  [(set (match_operand:FL_CONV 0 "register_operand" "=<FL_CONSTRAINT>,<FL_CONSTRAINT>")
+	(any_float:FL_CONV
+	 (vec_select:SI
+	  (match_operand:V4SI 1 "memory_operand" "m,m")
+	  (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n")]))))
+   (clobber (match_scratch:DI 3 "=&b,&b"))
+   (clobber (match_scratch:DI 4 "=f,v"))]
+  "VECTOR_MEM_VSX_P (V4SImode) && TARGET_POWERPC64"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 4)
+	(match_dup 5))
+   (set (match_dup 0)
+	(any_float:FL_CONV (match_dup 4)))]
+{
+  if (GET_CODE (operands[4]) == SCRATCH)
+    operands[4] = gen_reg_rtx (DImode);
+
+  rtx new_mem = rs6000_adjust_vec_address (operands[4], operands[1], operands[2],
+					   operands[3], SImode);
+  operands[5] = gen_rtx_<SIGN_ZERO_EXTEND> (DImode, new_mem);
+}
+  [(set_attr "type" "fpload")
+   (set_attr "length" "12")
+   (set_attr "isa" "*,p8v")])
 
 ;; Variable V16QI/V8HI/V4SI extract from a register
 (define_insn_and_split "vsx_extract_<mode>_var"
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-int-2.c b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-int-2.c
new file mode 100644
index 00000000000..86077a060a1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-int-2.c
@@ -0,0 +1,38 @@
+/* { dg-do compile { target lp64 } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-O2 -mdejagnu-cpu=power8" } */
+
+/* Test to verify that the vec_extract with constant element numbers can load
+   SImode and convert the value to float, and double by loading the value
+   directly into a vector register, and not loading up the GPRs first.  */
+
+#include <altivec.h>
+
+float
+extract_float_sign_v4si_0 (vector int *p)
+{
+  return vec_extract (*p, 0);		/* lfiwax or lxsiwax.  */
+}
+
+double
+extract_double_sign_v4si_1 (vector int *p)
+{
+  return vec_extract (*p, 1);		/* lfiwax or lxsiwax.  */
+}
+
+double
+extract_double_uns_v4si_0 (vector unsigned int *p)
+{
+  return vec_extract (*p, 0);		/* lfiwzx or lxsiwzx.  */
+}
+
+double
+extract_double_uns_v4si_3 (vector unsigned int *p)
+{
+  return vec_extract (*p, 3);		/* lfiwzx or lxsiwzx.  */
+}
+
+/* { dg-final { scan-assembler-times {\mlfiwax\M|\mlxsiwax\M}  2 } } */
+/* { dg-final { scan-assembler-times {\mlfiwzx\M|\mlxsiwzx\M}  2 } } */
+/* { dg-final { scan-assembler-not   {\mlw[az]\M}                } } */
+/* { dg-final { scan-assembler-not   {\mmtvsrw[sz]\M}            } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-int-3.c b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-int-3.c
new file mode 100644
index 00000000000..9e46caa8277
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-int-3.c
@@ -0,0 +1,26 @@
+/* { dg-do compile { target lp64 } } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-require-effective-target float128_hw } */
+/* { dg-options "-O2 -mdejagnu-cpu=power9" } */
+
+/* Test to verify that the vec_extract with constant element numbers can load
+   SImode and convert the value _Float128 by loading the value directly into a
+   vector register, and not loading up the GPRs first.  */
+
+#include <altivec.h>
+
+_Float128
+extract_ieee_uns_v4si_0 (vector unsigned int *p)
+{
+  return vec_extract (*p, 0);		/* lxsiwzx.  */
+}
+
+_Float128
+extract_ieee_uns_v4si_3 (vector unsigned int *p)
+{
+  return vec_extract (*p, 3);		/* lxsiwzx.  */
+}
+
+/* { dg-final { scan-assembler-times {\mlfiwzx\M|\mlxsiwzx\M}  2 } } */
+/* { dg-final { scan-assembler-not   {\mlw[az]\M}                } } */
+/* { dg-final { scan-assembler-not   {\mmtvsrw[sz]\M}            } } */