public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/users/meissner/heads/work119)] Optimize V8HI/V16QI vec_extract from memory with constant element number
@ 2023-04-18 22:15 Michael Meissner
  0 siblings, 0 replies; 2+ messages in thread
From: Michael Meissner @ 2023-04-18 22:15 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:eb3749749c713055dfc79b419cf4541fb6753d31

commit eb3749749c713055dfc79b419cf4541fb6753d31
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Tue Apr 18 18:15:07 2023 -0400

    Optimize V8HI/V16QI vec_extract from memory with constant element number
    
    This patch adds combiner insns to fold in conversion to DImode from vec_extract
    of a V8HI or V16QI variable with constant element number.  With this patch, GCC
    will directly emit LHA, LHZ, or LBZ without needing an instruction to do the
    sign or zero extension.
    
    2023-04-18   Michael Meissner  <meissner@linux.ibm.com>
    
    gcc/
    
            * config/rs6000/vsx.md (vsx_extract_<mode>_load_to_udi): New insn.
            (sx_extract_v8hi_load_to_sd): New insn.
    
    gcc/testsuite/
    
            * gcc.target/powerpc/vec-extract-mem-char-1.c: New file.
            * gcc.target/powerpc/vec-extract-mem-short-1.c: New file.

Diff:
---
 gcc/config/rs6000/vsx.md                           | 45 ++++++++++++++++++++++
 .../gcc.target/powerpc/vec-extract-mem-char-1.c    | 24 ++++++++++++
 .../gcc.target/powerpc/vec-extract-mem-short-1.c   | 37 ++++++++++++++++++
 3 files changed, 106 insertions(+)

diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 49da544bf28..e3466f3aa74 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -3980,6 +3980,51 @@
   [(set_attr "type" "load,load,fpload,fpload")
    (set_attr "length" "4,8,4,8")])
 
+;; Extract a V8HI/V16QI element from memory with constant element number and
+;; convert it to DImode with zero extension.
+(define_insn_and_split "*vsx_extract_<mode>_load_to_udi"
+  [(set (match_operand:DI 0 "register_operand" "=r,r,v,v")
+	(zero_extend:DI
+	 (vec_select:<VEC_base>
+	  (match_operand:VSX_EXTRACT_I2 1 "memory_operand" "YZ,m,Z,Q")
+	  (parallel
+	   [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "0,n,0,n")]))))
+   (clobber (match_scratch:DI 3 "=X,&b,X,&b"))]
+  "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
+	(zero_extend:DI (match_dup 4)))]
+{
+  operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
+					   operands[3], <VEC_base>mode);
+}
+  [(set_attr "type" "load,load,fpload,fpload")
+   (set_attr "length" "4,8,4,8")
+   (set_attr "isa" "*,*,p9v,p9v")])
+
+;; Extract a V8HI element from memory with constant element number and
+;; convert it to DImode with sign extension.
+(define_insn_and_split "*vsx_extract_v8hi_load_to_sdi"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(sign_extend:DI
+	 (vec_select:HI
+	  (match_operand:V8HI 1 "memory_operand" "YZ,m")
+	  (parallel
+	   [(match_operand:QI 2 "const_0_to_7_operand" "0,n")]))))
+   (clobber (match_scratch:DI 3 "=X,&b"))]
+  "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
+	(sign_extend:DI (match_dup 4)))]
+{
+  operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
+					   operands[3], HImode);
+}
+  [(set_attr "type" "load,load")
+   (set_attr "length" "4,8")])
+
 ;; Extract a V4SI element from memory with constant element number and convert
 ;; it to SFmode, DFmode, KFmode, or possibly TFmode using either signed or
 ;; unsigned conversion.
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-char-1.c b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-char-1.c
new file mode 100644
index 00000000000..e57dd0e8bb9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-char-1.c
@@ -0,0 +1,24 @@
+/* { dg-do compile { target lp64 } } */
+/* { dg-options "-O2 -mdejagnu-cpu=power8" } */
+/* { dg-require-effective-target p8vector_hw } */
+
+/* Test to verify that the vec_extract with constant element numbers can load
+   SImode and fold the sign/extension into the load.  */
+
+#include <altivec.h>
+
+unsigned long long
+extract_uns_v16qi_0 (vector unsigned char *p)
+{
+  return vec_extract (*p, 0);		/* lbz, no rlwinm.  */
+}
+
+unsigned long long
+extract_uns_v16qi_1 (vector unsigned char *p)
+{
+  return vec_extract (*p, 1);		/* lbz, no rlwinm.  */
+}
+
+/* { dg-final { scan-assembler-times {\mlbz\M}   2 } } */
+/* { dg-final { scan-assembler-not   {\mextsb\M}   } } */
+/* { dg-final { scan-assembler-not   {\mrlwinm\M}  } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-short-1.c b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-short-1.c
new file mode 100644
index 00000000000..a2c482f556e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-short-1.c
@@ -0,0 +1,37 @@
+/* { dg-do compile { target lp64 } } */
+/* { dg-options "-O2 -mdejagnu-cpu=power8" } */
+/* { dg-require-effective-target p8vector_hw } */
+
+/* Test to verify that the vec_extract with constant element numbers can load
+   SImode and fold the sign/extension into the load.  */
+
+#include <altivec.h>
+
+long long
+extract_sign_v8hi_0 (vector short *p)
+{
+  return vec_extract (*p, 0);		/* lha, no extsh.  */
+}
+
+long long
+extract_sign_v8hi_1 (vector short *p)
+{
+  return vec_extract (*p, 1);		/* lha, no extsh.  */
+}
+
+unsigned long long
+extract_uns_v8hi_0 (vector unsigned short *p)
+{
+  return vec_extract (*p, 0);		/* lhz, no rlwinm.  */
+}
+
+unsigned long long
+extract_uns_v8hi_1 (vector unsigned short *p)
+{
+  return vec_extract (*p, 1);		/* lhz, no rlwinm.  */
+}
+
+/* { dg-final { scan-assembler-times {\mlha\M}   2 } } */
+/* { dg-final { scan-assembler-times {\mlhz\M}   2 } } */
+/* { dg-final { scan-assembler-not   {\mextsh\M}   } } */
+/* { dg-final { scan-assembler-not   {\mrlwinm\M}  } } */

^ permalink raw reply	[flat|nested] 2+ messages in thread

* [gcc(refs/users/meissner/heads/work119)] Optimize V8HI/V16QI vec_extract from memory with constant element number
@ 2023-04-18 22:13 Michael Meissner
  0 siblings, 0 replies; 2+ messages in thread
From: Michael Meissner @ 2023-04-18 22:13 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:28cd85d9f246a465b8c82cd60ff669f4d084ca20

commit 28cd85d9f246a465b8c82cd60ff669f4d084ca20
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Tue Apr 18 18:12:29 2023 -0400

    Optimize V8HI/V16QI vec_extract from memory with constant element number
    
    This patch adds combiner insns to fold in conversion to DImode from vec_extract
    of a V8HI or V16QI variable with constant element number.  With this patch, GCC
    will directly emit LHA, LHZ, or LBZ without needing an instruction to do the
    sign or zero extension.
    
    2023-04-18   Michael Meissner  <meissner@linux.ibm.com>
    
    gcc/
    
            * config/rs6000/vsx.md (vsx_extract_<mode>_load_to_udi): New insn.
            (sx_extract_v8hi_load_to_sd): New insn.
    
    gcc/testsuite/
    
            * gcc.target/powerpc/vec-extract-mem-char-1.c: New file.
            * gcc.target/powerpc/vec-extract-mem-short-1.c: New file.

Diff:
---
 gcc/config/rs6000/vsx.md                           | 45 ++++++++++++++++++++++
 .../gcc.target/powerpc/vec-extract-mem-short-1.c   | 37 ++++++++++++++++++
 2 files changed, 82 insertions(+)

diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 49da544bf28..e3466f3aa74 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -3980,6 +3980,51 @@
   [(set_attr "type" "load,load,fpload,fpload")
    (set_attr "length" "4,8,4,8")])
 
+;; Extract a V8HI/V16QI element from memory with constant element number and
+;; convert it to DImode with zero extension.
+(define_insn_and_split "*vsx_extract_<mode>_load_to_udi"
+  [(set (match_operand:DI 0 "register_operand" "=r,r,v,v")
+	(zero_extend:DI
+	 (vec_select:<VEC_base>
+	  (match_operand:VSX_EXTRACT_I2 1 "memory_operand" "YZ,m,Z,Q")
+	  (parallel
+	   [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "0,n,0,n")]))))
+   (clobber (match_scratch:DI 3 "=X,&b,X,&b"))]
+  "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
+	(zero_extend:DI (match_dup 4)))]
+{
+  operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
+					   operands[3], <VEC_base>mode);
+}
+  [(set_attr "type" "load,load,fpload,fpload")
+   (set_attr "length" "4,8,4,8")
+   (set_attr "isa" "*,*,p9v,p9v")])
+
+;; Extract a V8HI element from memory with constant element number and
+;; convert it to DImode with sign extension.
+(define_insn_and_split "*vsx_extract_v8hi_load_to_sdi"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(sign_extend:DI
+	 (vec_select:HI
+	  (match_operand:V8HI 1 "memory_operand" "YZ,m")
+	  (parallel
+	   [(match_operand:QI 2 "const_0_to_7_operand" "0,n")]))))
+   (clobber (match_scratch:DI 3 "=X,&b"))]
+  "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
+	(sign_extend:DI (match_dup 4)))]
+{
+  operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
+					   operands[3], HImode);
+}
+  [(set_attr "type" "load,load")
+   (set_attr "length" "4,8")])
+
 ;; Extract a V4SI element from memory with constant element number and convert
 ;; it to SFmode, DFmode, KFmode, or possibly TFmode using either signed or
 ;; unsigned conversion.
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-short-1.c b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-short-1.c
new file mode 100644
index 00000000000..a2c482f556e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-short-1.c
@@ -0,0 +1,37 @@
+/* { dg-do compile { target lp64 } } */
+/* { dg-options "-O2 -mdejagnu-cpu=power8" } */
+/* { dg-require-effective-target p8vector_hw } */
+
+/* Test to verify that the vec_extract with constant element numbers can load
+   SImode and fold the sign/extension into the load.  */
+
+#include <altivec.h>
+
+long long
+extract_sign_v8hi_0 (vector short *p)
+{
+  return vec_extract (*p, 0);		/* lha, no extsh.  */
+}
+
+long long
+extract_sign_v8hi_1 (vector short *p)
+{
+  return vec_extract (*p, 1);		/* lha, no extsh.  */
+}
+
+unsigned long long
+extract_uns_v8hi_0 (vector unsigned short *p)
+{
+  return vec_extract (*p, 0);		/* lhz, no rlwinm.  */
+}
+
+unsigned long long
+extract_uns_v8hi_1 (vector unsigned short *p)
+{
+  return vec_extract (*p, 1);		/* lhz, no rlwinm.  */
+}
+
+/* { dg-final { scan-assembler-times {\mlha\M}   2 } } */
+/* { dg-final { scan-assembler-times {\mlhz\M}   2 } } */
+/* { dg-final { scan-assembler-not   {\mextsh\M}   } } */
+/* { dg-final { scan-assembler-not   {\mrlwinm\M}  } } */

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2023-04-18 22:15 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-04-18 22:15 [gcc(refs/users/meissner/heads/work119)] Optimize V8HI/V16QI vec_extract from memory with constant element number Michael Meissner
  -- strict thread matches above, loose matches on Subject: below --
2023-04-18 22:13 Michael Meissner

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).