[gcc(refs/users/meissner/heads/work119)] Optimize vec_extract of V4SF from memory with constant element numbers.

public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed

From: Michael Meissner <meissner@gcc.gnu.org>
To: gcc-cvs@gcc.gnu.org
Subject: [gcc(refs/users/meissner/heads/work119)] Optimize vec_extract of V4SF from memory with constant element numbers.
Date: Fri, 28 Apr 2023 22:12:25 +0000 (GMT)	[thread overview]
Message-ID: <20230428221225.8D55F3858CDB@sourceware.org> (raw)

https://gcc.gnu.org/g:51302d4ec98a7f197d350785dfb0ed0fc1ce6dad

commit 51302d4ec98a7f197d350785dfb0ed0fc1ce6dad
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Fri Apr 28 18:09:23 2023 -0400

    Optimize vec_extract of V4SF from memory with constant element numbers.
    
    This patch updates vec_extract of V4SF from memory with constant element
    numbers.
    
    This patch corrects the ISA for loading SF values to altivec registers to be
    power8 vector, and not power7.
    
    This patch adds a combiner patch to combine loading up a SF element and
    converting it to double.
    
    This patch expands the alternatives, so that if the element number is 0 or the
    address is offsettable, we don't need a scratch register.
    
    2023-04-28   Michael Meissner  <meissner@linux.ibm.com>
    
    gcc/
    
            * gcc/config/rs6000/vsx.md (vsx_extract_v4sf_load): Fix ISA for loading
            up SFmode values with x-form addresses.  Drill down on the alternatives
            to prevent allocating a scratch register if we don't need it.
            (vsx_extract_v4sf_load_to_df): New insn.
    
    gc/testsuite/
    
            * gcc.target/powerpc/vec-extract-mem-float-1.c: New file.

Diff:
---
 gcc/config/rs6000/vsx.md                           | 53 +++++++++++++++++++---
 .../gcc.target/powerpc/vec-extract-mem-float-1.c   | 29 ++++++++++++
 2 files changed, 75 insertions(+), 7 deletions(-)

diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 417aff5e24b..4777c870514 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -3549,12 +3549,22 @@
   [(set_attr "length" "8")
    (set_attr "type" "fp")])
 
+;; V4SF extract from memory with constant element number.
+;; Alternatives:
+;;    1: Load FPR, index 0, normal address, no address change.
+;;    2: Load FPR, index 0-3, offsettable address, element folded into addr.
+;;    3: Load FPR, index 0-3, single register, offset in op[3].
+;;    4: Load VMX, index 0, x-form, power8, no address change.
+;;    5: Load VMX, index 0-3, single register, power8, offset in op[3].
+;;    6: Load VMX, index 0, normal address, power9, no address change.
+;;    7: Load VMX, index 0-3, offsettable address, power9, element in addr.
+;;    8: Load GPR, index 0-3, single register, offset in op[3].
 (define_insn_and_split "*vsx_extract_v4sf_load"
-  [(set (match_operand:SF 0 "register_operand" "=f,v,v,?r")
+  [(set (match_operand:SF 0 "register_operand" "=f,f,f,v,v,v,v,?r")
 	(vec_select:SF
-	 (match_operand:V4SF 1 "memory_operand" "m,Z,m,m")
-	 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n,n")])))
-   (clobber (match_scratch:P 3 "=&b,&b,&b,&b"))]
+	 (match_operand:V4SF 1 "memory_operand" "m,o,Q,Z,Q,m,o,Q")
+	 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "O,n,n,O,n,O,n,n")])))
+   (clobber (match_scratch:P 3 "=X,X,&b,X,&b,X,X,&b"))]
   "VECTOR_MEM_VSX_P (V4SFmode)"
   "#"
   "&& reload_completed"
@@ -3563,9 +3573,38 @@
   operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
 					   operands[3], SFmode);
 }
-  [(set_attr "type" "fpload,fpload,fpload,load")
-   (set_attr "length" "8")
-   (set_attr "isa" "*,p7v,p9v,*")])
+  [(set_attr "type" "fpload,fpload,fpload,fpload,fpload,fpload,fpload,load")
+   (set_attr "length" "4,4,8,4,8,4,4,8")
+   (set_attr "isa" "*,*,*,p8v,p8v,p9v,p9v,*")])
+
+;; V4SF extract from memory with constant element number and convert to DFmode.
+;; Alternatives:
+;;    1: Load FPR, index 0, normal address, no address change.
+;;    2: Load FPR, index 0-3, offsettable address, element folded into addr.
+;;    3: Load FPR, index 0-3, single register, offset in op[3].
+;;    4: Load VMX, index 0, x-form, power8, no address change.
+;;    5: Load VMX, index 0-3, single register, power8, offset in op[3].
+;;    6: Load VMX, index 0, normal address, power9, no address change.
+;;    7: Load VMX, index 0-3, offsettable address, power9, element in addr.
+(define_insn_and_split "*vsx_extract_v4sf_load_to_df"
+  [(set (match_operand:DF 0 "register_operand" "=f,f,f,v,v,v,v")
+	(float_extend:DF
+	 (vec_select:SF
+	  (match_operand:V4SF 1 "memory_operand" "m,o,Q,Z,Q,m,o")
+	  (parallel [(match_operand:QI 2 "const_0_to_3_operand" "O,n,n,O,n,O,n")]))))
+   (clobber (match_scratch:P 3 "=X,X,&b,X,&b,X,&b"))]
+  "VECTOR_MEM_VSX_P (V4SFmode)"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
+	(float_extend:DF (match_dup 4)))]
+{
+  operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
+					   operands[3], SFmode);
+}
+  [(set_attr "type" "fpload")
+   (set_attr "length" "4,4,8,4,8,4,4")
+   (set_attr "isa" "*,*,*,p8v,p8v,p9v,p9v")])
 
 ;; Variable V4SF extract from a register
 (define_insn_and_split "vsx_extract_v4sf_var"
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-float-1.c b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-float-1.c
new file mode 100644
index 00000000000..4670e261ba8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-float-1.c
@@ -0,0 +1,29 @@
+/* { dg-do compile { target lp64 } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mdejagnu-cpu=power8 -O2" } */
+
+/* Test to verify that the vec_extract with constant element numbers can load
+   float elements into a GPR register without doing a LFS/STFS.  */
+
+#include <altivec.h>
+
+void
+extract_v4sf_gpr_0 (vector float *p, float *q)
+{
+  float x = vec_extract (*p, 0);
+  __asm__ (" # %0" : "+r" (x));		/* lwz, no lfs/stfs.  */
+  *q = x;
+}
+
+void
+extract_v4sf_gpr_1 (vector float *p, float *q)
+{
+  float x = vec_extract (*p, 1);
+  __asm__ (" # %0" : "+r" (x));		/* lwz, no lfs/stfs.  */
+  *q = x;
+}
+
+/* { dg-final { scan-assembler-times {\mlwzx?\M}               2 } } */
+/* { dg-final { scan-assembler-times {\mstw\M}                 2 } } */
+/* { dg-final { scan-assembler-not   {\mlfsx?\M|\mlxsspx?\M}     } } */
+/* { dg-final { scan-assembler-not   {\mstfsx?\M|\mstxsspx?\M}   } } */

next             reply	other threads:[~2023-04-28 22:12 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-04-28 22:12 Michael Meissner [this message]
  -- strict thread matches above, loose matches on Subject: below --
2023-04-29  2:40 Michael Meissner
2023-04-29  0:02 Michael Meissner
2023-04-28 17:57 Michael Meissner
2023-04-27 21:25 Michael Meissner
2023-04-27 20:32 Michael Meissner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230428221225.8D55F3858CDB@sourceware.org \
    --to=meissner@gcc.gnu.org \
    --cc=gcc-cvs@gcc.gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).