public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/users/meissner/heads/work120)] Optimize variable element vec_extract to be converted to floating point
@ 2023-05-01 22:38 Michael Meissner
  0 siblings, 0 replies; 2+ messages in thread
From: Michael Meissner @ 2023-05-01 22:38 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:9c3aeec2078ca25fc3986912e2aa459c92dacd6e

commit 9c3aeec2078ca25fc3986912e2aa459c92dacd6e
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Mon May 1 18:37:48 2023 -0400

    Optimize variable element vec_extract to be converted to floating point
    
    This patch optimizes vec_extract with a variable element number of the following
    types to be converted to floating point by loading the value directly to the
    vector register, and then doing the conversion instead of loading the value to a
    GPR and then doing a direct move:
    
    vector int
    vector unsigned int
    vector unsigned short
    vector unsigned char
    
    2023-04-28   Michael Meissner  <meissner@linux.ibm.com>
    
    gcc/
    
            * config/rs6000/vsx.md (vsx_extract_v4si_var_load_to_<uns><mode>): New
            * insn.
            * vsx_extract_<VSX_EXTRACT_I2:mode>_var_load_to_uns<SFDF:mode>: New
            insn.
    
    gcc/testsuite/
    
            * gcc.target/powerpc/vec-extract-mem-int-6.c: New file.
            * gcc.target/powerpc/vec-extract-mem-int_7.c: New file.

Diff:
---
 gcc/config/rs6000/vsx.md                           | 52 ++++++++++++++++++++++
 .../gcc.target/powerpc/vec-extract-mem-{int-6}.c   |  0
 .../gcc.target/powerpc/vec-extract-mem-{int-7}.c   |  0
 3 files changed, 52 insertions(+)

diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 47e5a9c4709..410183dde93 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -4295,6 +4295,58 @@
 }
   [(set_attr "type" "load")])
 
+;; Fold extracting a V4SI element with a variable element with either sign or
+;; zero extension to SFmode or DFmode into LFIWAX/LFIWZX and FCFID.
+(define_insn_and_split "*vsx_extract_v4si_var_load_to_<uns><mode>"
+  [(set (match_operand:SFDF 0 "register_operand" "=wa")
+	(any_float:SFDF
+	 (unspec:SI
+	  [(match_operand:V4SI 1 "memory_operand" "Q")
+	   (match_operand:DI 2 "register_operand" "r")]
+	  UNSPEC_VSX_EXTRACT)))
+   (clobber (match_scratch:DI 3 "=&b"))
+   (clobber (match_scratch:DI 4 "=wa"))]
+  "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 4)
+	(<fp_int_extend>:DI (match_dup 5)))
+   (set (match_dup 0)
+	(float:SFDF (match_dup 4)))]
+{
+  operands[5] = rs6000_adjust_vec_address (operands[0], operands[1],
+					   operands[2], operands[3],
+					   SImode);
+}
+  [(set_attr "type" "fpload")
+   (set_attr "length" "8")])
+
+;; Fold extracting a V8HI/V16QI element with a variable element with zero
+;; extension to SFmode or DFmode into LXSIBZX/LXSIHZX and FCFID
+(define_insn_and_split "*vsx_extract_<VSX_EXTRACT_I2:mode>_var_load_to_uns<SFDF:mode>"
+  [(set (match_operand:SFDF 0 "register_operand" "=wa")
+	(unsigned_float:SFDF
+	 (unspec:<VSX_EXTRACT_I2:VEC_base>
+	  [(match_operand:VSX_EXTRACT_I2 1 "memory_operand" "Q")
+	   (match_operand:DI 2 "register_operand" "r")]
+	  UNSPEC_VSX_EXTRACT)))
+   (clobber (match_scratch:DI 3 "=&b"))
+   (clobber (match_scratch:DI 4 "=v"))]
+  "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I2:MODE>mode) && TARGET_P9_VECTOR"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 4)
+	(zero_extend:DI (match_dup 5)))
+   (set (match_dup 0)
+	(float:SFDF (match_dup 4)))]
+{
+  operands[5] = rs6000_adjust_vec_address (operands[0], operands[1],
+					   operands[2], operands[3],
+					   <VSX_EXTRACT_I2:VEC_base>mode);
+}
+  [(set_attr "type" "fpload")
+   (set_attr "length" "8")])
+
 ;; ISA 3.1 extract
 (define_expand "vextractl<mode>"
   [(set (match_operand:V2DI 0 "altivec_register_operand")
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-{int-6}.c b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-{int-6}.c
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-{int-7}.c b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-{int-7}.c
new file mode 100644
index 00000000000..e69de29bb2d

^ permalink raw reply	[flat|nested] 2+ messages in thread

* [gcc(refs/users/meissner/heads/work120)] Optimize variable element vec_extract to be converted to floating point
@ 2023-05-02  1:40 Michael Meissner
  0 siblings, 0 replies; 2+ messages in thread
From: Michael Meissner @ 2023-05-02  1:40 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:9dba2ec9b054fd7c61d8d297b298e7400cd3bde1

commit 9dba2ec9b054fd7c61d8d297b298e7400cd3bde1
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Mon May 1 21:40:17 2023 -0400

    Optimize variable element vec_extract to be converted to floating point
    
    This patch optimizes vec_extract with a variable element number of the following
    types to be converted to floating point by loading the value directly to the
    vector register, and then doing the conversion instead of loading the value to a
    GPR and then doing a direct move:
    
    vector int
    vector unsigned int
    vector unsigned short
    vector unsigned char
    
    2023-04-28   Michael Meissner  <meissner@linux.ibm.com>
    
    gcc/
    
            * config/rs6000/vsx.md (vsx_extract_v4si_var_load_to_<uns><mode>): New
            * insn.
            * vsx_extract_<VSX_EXTRACT_I2:mode>_var_load_to_uns<SFDF:mode>: New
            insn.
    
    gcc/testsuite/
    
            * gcc.target/powerpc/vec-extract-mem-int-6.c: New file.
            * gcc.target/powerpc/vec-extract-mem-int_7.c: New file.

Diff:
---
 gcc/config/rs6000/vsx.md                           | 52 ++++++++++++++++++++++
 .../gcc.target/powerpc/vec-extract-mem-int-6.c     | 29 ++++++++++++
 .../gcc.target/powerpc/vec-extract-mem-int-7.c     | 29 ++++++++++++
 3 files changed, 110 insertions(+)

diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 47e5a9c4709..410183dde93 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -4295,6 +4295,58 @@
 }
   [(set_attr "type" "load")])
 
+;; Fold extracting a V4SI element with a variable element with either sign or
+;; zero extension to SFmode or DFmode into LFIWAX/LFIWZX and FCFID.
+(define_insn_and_split "*vsx_extract_v4si_var_load_to_<uns><mode>"
+  [(set (match_operand:SFDF 0 "register_operand" "=wa")
+	(any_float:SFDF
+	 (unspec:SI
+	  [(match_operand:V4SI 1 "memory_operand" "Q")
+	   (match_operand:DI 2 "register_operand" "r")]
+	  UNSPEC_VSX_EXTRACT)))
+   (clobber (match_scratch:DI 3 "=&b"))
+   (clobber (match_scratch:DI 4 "=wa"))]
+  "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 4)
+	(<fp_int_extend>:DI (match_dup 5)))
+   (set (match_dup 0)
+	(float:SFDF (match_dup 4)))]
+{
+  operands[5] = rs6000_adjust_vec_address (operands[0], operands[1],
+					   operands[2], operands[3],
+					   SImode);
+}
+  [(set_attr "type" "fpload")
+   (set_attr "length" "8")])
+
+;; Fold extracting a V8HI/V16QI element with a variable element with zero
+;; extension to SFmode or DFmode into LXSIBZX/LXSIHZX and FCFID
+(define_insn_and_split "*vsx_extract_<VSX_EXTRACT_I2:mode>_var_load_to_uns<SFDF:mode>"
+  [(set (match_operand:SFDF 0 "register_operand" "=wa")
+	(unsigned_float:SFDF
+	 (unspec:<VSX_EXTRACT_I2:VEC_base>
+	  [(match_operand:VSX_EXTRACT_I2 1 "memory_operand" "Q")
+	   (match_operand:DI 2 "register_operand" "r")]
+	  UNSPEC_VSX_EXTRACT)))
+   (clobber (match_scratch:DI 3 "=&b"))
+   (clobber (match_scratch:DI 4 "=v"))]
+  "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I2:MODE>mode) && TARGET_P9_VECTOR"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 4)
+	(zero_extend:DI (match_dup 5)))
+   (set (match_dup 0)
+	(float:SFDF (match_dup 4)))]
+{
+  operands[5] = rs6000_adjust_vec_address (operands[0], operands[1],
+					   operands[2], operands[3],
+					   <VSX_EXTRACT_I2:VEC_base>mode);
+}
+  [(set_attr "type" "fpload")
+   (set_attr "length" "8")])
+
 ;; ISA 3.1 extract
 (define_expand "vextractl<mode>"
   [(set (match_operand:V2DI 0 "altivec_register_operand")
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-int-6.c b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-int-6.c
new file mode 100644
index 00000000000..e08a3587eb2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-int-6.c
@@ -0,0 +1,29 @@
+/* { dg-do compile { target lp64 } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mdejagnu-cpu=power8 -O2" } */
+
+/* Test to verify that the vec_extract with variable element numbers can load
+   SImode and convert it to unsigned floating point, by loading the value
+   directly to a vector register, rather than loading up a GPR and transfering
+   the result to a vector register.  */
+
+#include <altivec.h>
+#include <stddef.h>
+
+double
+extract_dbl_uns_v4si_n (vector unsigned int *p, size_t n)
+{
+  return vec_extract (*p, n);	/* lfiwzx/lxsiwzx, fcfid/xscvsxddp.  */
+}
+
+float
+extract_flt_uns_v4si_element_n_index_4 (vector unsigned int *p, size_t n)
+{
+  return vec_extract (p[4], n);	/* lfiwzx/lxsiwzx, fcfids/xscvsxdsp.  */
+}
+
+/* { dg-final { scan-assembler-times {\mlfiwzx\M|\mlxsiwzx\M}   2 } } */
+/* { dg-final { scan-assembler-times {\mfcfid\M|\mxscvsxddp\M}  1 } } */
+/* { dg-final { scan-assembler-times {\mfcfids\M|\mxscvsxdsp\M} 1 } } */
+/* { dg-final { scan-assembler-not   {\mlw[az]x?\M}               } } */
+/* { dg-final { scan-assembler-not   {\mmtvsr}                    } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-int-7.c b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-int-7.c
new file mode 100644
index 00000000000..ddba763a395
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-int-7.c
@@ -0,0 +1,29 @@
+/* { dg-do compile { target lp64 } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mdejagnu-cpu=power8 -O2" } */
+
+/* Test to verify that the vec_extract with variable element numbers can load
+   SImode and convert it to signed floating point, by loading the value
+   directly to a vector register, rather than loading up a GPR and transfering
+   the result to a vector register.  */
+
+#include <altivec.h>
+#include <stddef.h>
+
+double
+extract_dbl_sign_v4si_n (vector int *p, size_t n)
+{
+  return vec_extract (*p, n);	/* lfiwzx/lxsiwzx, fcfid/xscvsxddp.  */
+}
+
+float
+extract_flt_sign_v4si_element_n_index_4 (vector int *p, size_t n)
+{
+  return vec_extract (p[4], n);	/* lfiwzx/lxsiwzx, fcfids/xscvsxdsp.  */
+}
+
+/* { dg-final { scan-assembler-times {\mlfiwax\M|\mlxsiwax\M}   2 } } */
+/* { dg-final { scan-assembler-times {\mfcfid\M|\mxscvsxddp\M}  1 } } */
+/* { dg-final { scan-assembler-times {\mfcfids\M|\mxscvsxdsp\M} 1 } } */
+/* { dg-final { scan-assembler-not   {\mlw[az]x?\M}               } } */
+/* { dg-final { scan-assembler-not   {\mmtvsr}                    } } */

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2023-05-02  1:40 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-05-01 22:38 [gcc(refs/users/meissner/heads/work120)] Optimize variable element vec_extract to be converted to floating point Michael Meissner
2023-05-02  1:40 Michael Meissner

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).