From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <meissner@sourceware.org>
Received: by sourceware.org (Postfix, from userid 1005)
	id 55DEF3858D37; Thu, 27 Apr 2023 21:25:22 +0000 (GMT)
DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 55DEF3858D37
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org;
	s=default; t=1682630722;
	bh=DDYn2lvC5OY28pE/07G6JrCpG4OqONH+VN2lIs6Akxc=;
	h=From:To:Subject:Date:From;
	b=ams0hetoAgsdGuUUtes+qaN/WRAXbnuBgkq3a+f5Ph8GUCrTqTNoxsIj3hWRbgXCg
	 N/Ehj2pAmqR0N6lRRYxyMjkL0s8BIzyMFwM6pECHwbY326Ccj2ltEkkmjMID5DYxiV
	 HJHdZwtVXGsEzluyspKgeg6MYb4eybhyQBb71LsQ=
Content-Type: text/plain; charset="us-ascii"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
From: Michael Meissner <meissner@gcc.gnu.org>
To: gcc-cvs@gcc.gnu.org
Subject: [gcc(refs/users/meissner/heads/work119)] Optimize vec_extract of V4SF
 from memory with constant element numbers.
X-Act-Checkin: gcc
X-Git-Author: Michael Meissner <meissner@linux.ibm.com>
X-Git-Refname: refs/users/meissner/heads/work119
X-Git-Oldrev: dceb9d2a8859af37f420abd2272880952d54a35c
X-Git-Newrev: 2059b7d7a1e1f6244ca73e2ba45e5ab1c44ba159
Message-Id: <20230427212522.55DEF3858D37@sourceware.org>
Date: Thu, 27 Apr 2023 21:25:22 +0000 (GMT)
List-Id: <gcc-cvs.sourceware.org>

https://gcc.gnu.org/g:2059b7d7a1e1f6244ca73e2ba45e5ab1c44ba159

commit 2059b7d7a1e1f6244ca73e2ba45e5ab1c44ba159
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Thu Apr 27 17:24:59 2023 -0400

    Optimize vec_extract of V4SF from memory with constant element numbers.
    
    This patch updates vec_extract of V4SF from memory with constant element
    numbers.
    
    This patch changes the splits so that they can be done before register
    allocation.
    
    This patch corrects the ISA for loading SF values to altivec registers to be
    power8 vector, and not power7.
    
    This patch adds a combiner patch to combine loading up a SF element and
    converting it to double.
    
    2023-04-27   Michael Meissner  <meissner@linux.ibm.com>
    
    gcc/
    
            * gcc/config/rs6000/vsx.md (vsx_extract_v4sf_load): Allow splitting
            before register allocation.  Fix ISA for loading up SFmode values to
            traditional Altivec registers.
            (vsx_extract_v4sf_load_to_df): New insn.
    
    gc/testsuite/
    
            * gcc.target/powerpc/vec-extract-mem-float-1.c: New file.

Diff:
---
 gcc/config/rs6000/vsx.md                           | 26 +++++++++++++++++--
 .../gcc.target/powerpc/vec-extract-mem-float-1.c   | 29 ++++++++++++++++++++++
 2 files changed, 53 insertions(+), 2 deletions(-)

diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 417aff5e24b..695b5cbd126 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -3549,6 +3549,7 @@
   [(set_attr "length" "8")
    (set_attr "type" "fp")])
 
+;; V4SF extract from memory with constant element number.
 (define_insn_and_split "*vsx_extract_v4sf_load"
   [(set (match_operand:SF 0 "register_operand" "=f,v,v,?r")
 	(vec_select:SF
@@ -3557,7 +3558,7 @@
    (clobber (match_scratch:P 3 "=&b,&b,&b,&b"))]
   "VECTOR_MEM_VSX_P (V4SFmode)"
   "#"
-  "&& reload_completed"
+  "&& 1"
   [(set (match_dup 0) (match_dup 4))]
 {
   operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
@@ -3565,7 +3566,28 @@
 }
   [(set_attr "type" "fpload,fpload,fpload,load")
    (set_attr "length" "8")
-   (set_attr "isa" "*,p7v,p9v,*")])
+   (set_attr "isa" "*,p8v,p9v,*")])
+
+;; V4SF extract from memory with constant element number and convert to DFmode.
+(define_insn_and_split "*vsx_extract_v4sf_load_to_df"
+  [(set (match_operand:DF 0 "register_operand" "=f,v,v")
+	(float_extend:DF
+	 (vec_select:SF
+	  (match_operand:V4SF 1 "memory_operand" "m,Z,m")
+	  (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n")]))))
+   (clobber (match_scratch:P 3 "=&b,&b,&b"))]
+  "VECTOR_MEM_VSX_P (V4SFmode)"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+	(float_extend:DF (match_dup 4)))]
+{
+  operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
+					   operands[3], SFmode);
+}
+  [(set_attr "type" "fpload")
+   (set_attr "length" "8")
+   (set_attr "isa" "*,p8v,p9v")])
 
 ;; Variable V4SF extract from a register
 (define_insn_and_split "vsx_extract_v4sf_var"
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-float-1.c b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-float-1.c
new file mode 100644
index 00000000000..4670e261ba8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-float-1.c
@@ -0,0 +1,29 @@
+/* { dg-do compile { target lp64 } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mdejagnu-cpu=power8 -O2" } */
+
+/* Test to verify that the vec_extract with constant element numbers can load
+   float elements into a GPR register without doing a LFS/STFS.  */
+
+#include <altivec.h>
+
+void
+extract_v4sf_gpr_0 (vector float *p, float *q)
+{
+  float x = vec_extract (*p, 0);
+  __asm__ (" # %0" : "+r" (x));		/* lwz, no lfs/stfs.  */
+  *q = x;
+}
+
+void
+extract_v4sf_gpr_1 (vector float *p, float *q)
+{
+  float x = vec_extract (*p, 1);
+  __asm__ (" # %0" : "+r" (x));		/* lwz, no lfs/stfs.  */
+  *q = x;
+}
+
+/* { dg-final { scan-assembler-times {\mlwzx?\M}               2 } } */
+/* { dg-final { scan-assembler-times {\mstw\M}                 2 } } */
+/* { dg-final { scan-assembler-not   {\mlfsx?\M|\mlxsspx?\M}     } } */
+/* { dg-final { scan-assembler-not   {\mstfsx?\M|\mstxsspx?\M}   } } */