public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/users/meissner/heads/work119)] Optimize V8HI/V16QI vec_extract from memory with constant element number
@ 2023-04-18 22:15 Michael Meissner
0 siblings, 0 replies; 2+ messages in thread
From: Michael Meissner @ 2023-04-18 22:15 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:eb3749749c713055dfc79b419cf4541fb6753d31
commit eb3749749c713055dfc79b419cf4541fb6753d31
Author: Michael Meissner <meissner@linux.ibm.com>
Date: Tue Apr 18 18:15:07 2023 -0400
Optimize V8HI/V16QI vec_extract from memory with constant element number
This patch adds combiner insns to fold in conversion to DImode from vec_extract
of a V8HI or V16QI variable with constant element number. With this patch, GCC
will directly emit LHA, LHZ, or LBZ without needing an instruction to do the
sign or zero extension.
2023-04-18 Michael Meissner <meissner@linux.ibm.com>
gcc/
* config/rs6000/vsx.md (vsx_extract_<mode>_load_to_udi): New insn.
(sx_extract_v8hi_load_to_sd): New insn.
gcc/testsuite/
* gcc.target/powerpc/vec-extract-mem-char-1.c: New file.
* gcc.target/powerpc/vec-extract-mem-short-1.c: New file.
Diff:
---
gcc/config/rs6000/vsx.md | 45 ++++++++++++++++++++++
| 24 ++++++++++++
| 37 ++++++++++++++++++
3 files changed, 106 insertions(+)
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 49da544bf28..e3466f3aa74 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -3980,6 +3980,51 @@
[(set_attr "type" "load,load,fpload,fpload")
(set_attr "length" "4,8,4,8")])
+;; Extract a V8HI/V16QI element from memory with constant element number and
+;; convert it to DImode with zero extension.
+(define_insn_and_split "*vsx_extract_<mode>_load_to_udi"
+ [(set (match_operand:DI 0 "register_operand" "=r,r,v,v")
+ (zero_extend:DI
+ (vec_select:<VEC_base>
+ (match_operand:VSX_EXTRACT_I2 1 "memory_operand" "YZ,m,Z,Q")
+ (parallel
+ [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "0,n,0,n")]))))
+ (clobber (match_scratch:DI 3 "=X,&b,X,&b"))]
+ "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 0)
+ (zero_extend:DI (match_dup 4)))]
+{
+ operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
+ operands[3], <VEC_base>mode);
+}
+ [(set_attr "type" "load,load,fpload,fpload")
+ (set_attr "length" "4,8,4,8")
+ (set_attr "isa" "*,*,p9v,p9v")])
+
+;; Extract a V8HI element from memory with constant element number and
+;; convert it to DImode with sign extension.
+(define_insn_and_split "*vsx_extract_v8hi_load_to_sdi"
+ [(set (match_operand:DI 0 "register_operand" "=r,r")
+ (sign_extend:DI
+ (vec_select:HI
+ (match_operand:V8HI 1 "memory_operand" "YZ,m")
+ (parallel
+ [(match_operand:QI 2 "const_0_to_7_operand" "0,n")]))))
+ (clobber (match_scratch:DI 3 "=X,&b"))]
+ "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 0)
+ (sign_extend:DI (match_dup 4)))]
+{
+ operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
+ operands[3], HImode);
+}
+ [(set_attr "type" "load,load")
+ (set_attr "length" "4,8")])
+
;; Extract a V4SI element from memory with constant element number and convert
;; it to SFmode, DFmode, KFmode, or possibly TFmode using either signed or
;; unsigned conversion.
--git a/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-char-1.c b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-char-1.c
new file mode 100644
index 00000000000..e57dd0e8bb9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-char-1.c
@@ -0,0 +1,24 @@
+/* { dg-do compile { target lp64 } } */
+/* { dg-options "-O2 -mdejagnu-cpu=power8" } */
+/* { dg-require-effective-target p8vector_hw } */
+
+/* Test to verify that the vec_extract with constant element numbers can load
+ SImode and fold the sign/extension into the load. */
+
+#include <altivec.h>
+
+unsigned long long
+extract_uns_v16qi_0 (vector unsigned char *p)
+{
+ return vec_extract (*p, 0); /* lbz, no rlwinm. */
+}
+
+unsigned long long
+extract_uns_v16qi_1 (vector unsigned char *p)
+{
+ return vec_extract (*p, 1); /* lbz, no rlwinm. */
+}
+
+/* { dg-final { scan-assembler-times {\mlbz\M} 2 } } */
+/* { dg-final { scan-assembler-not {\mextsb\M} } } */
+/* { dg-final { scan-assembler-not {\mrlwinm\M} } } */
--git a/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-short-1.c b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-short-1.c
new file mode 100644
index 00000000000..a2c482f556e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-short-1.c
@@ -0,0 +1,37 @@
+/* { dg-do compile { target lp64 } } */
+/* { dg-options "-O2 -mdejagnu-cpu=power8" } */
+/* { dg-require-effective-target p8vector_hw } */
+
+/* Test to verify that the vec_extract with constant element numbers can load
+ SImode and fold the sign/extension into the load. */
+
+#include <altivec.h>
+
+long long
+extract_sign_v8hi_0 (vector short *p)
+{
+ return vec_extract (*p, 0); /* lha, no extsh. */
+}
+
+long long
+extract_sign_v8hi_1 (vector short *p)
+{
+ return vec_extract (*p, 1); /* lha, no extsh. */
+}
+
+unsigned long long
+extract_uns_v8hi_0 (vector unsigned short *p)
+{
+ return vec_extract (*p, 0); /* lhz, no rlwinm. */
+}
+
+unsigned long long
+extract_uns_v8hi_1 (vector unsigned short *p)
+{
+ return vec_extract (*p, 1); /* lhz, no rlwinm. */
+}
+
+/* { dg-final { scan-assembler-times {\mlha\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mlhz\M} 2 } } */
+/* { dg-final { scan-assembler-not {\mextsh\M} } } */
+/* { dg-final { scan-assembler-not {\mrlwinm\M} } } */
^ permalink raw reply [flat|nested] 2+ messages in thread
* [gcc(refs/users/meissner/heads/work119)] Optimize V8HI/V16QI vec_extract from memory with constant element number
@ 2023-04-18 22:13 Michael Meissner
0 siblings, 0 replies; 2+ messages in thread
From: Michael Meissner @ 2023-04-18 22:13 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:28cd85d9f246a465b8c82cd60ff669f4d084ca20
commit 28cd85d9f246a465b8c82cd60ff669f4d084ca20
Author: Michael Meissner <meissner@linux.ibm.com>
Date: Tue Apr 18 18:12:29 2023 -0400
Optimize V8HI/V16QI vec_extract from memory with constant element number
This patch adds combiner insns to fold in conversion to DImode from vec_extract
of a V8HI or V16QI variable with constant element number. With this patch, GCC
will directly emit LHA, LHZ, or LBZ without needing an instruction to do the
sign or zero extension.
2023-04-18 Michael Meissner <meissner@linux.ibm.com>
gcc/
* config/rs6000/vsx.md (vsx_extract_<mode>_load_to_udi): New insn.
(sx_extract_v8hi_load_to_sd): New insn.
gcc/testsuite/
* gcc.target/powerpc/vec-extract-mem-char-1.c: New file.
* gcc.target/powerpc/vec-extract-mem-short-1.c: New file.
Diff:
---
gcc/config/rs6000/vsx.md | 45 ++++++++++++++++++++++
| 37 ++++++++++++++++++
2 files changed, 82 insertions(+)
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 49da544bf28..e3466f3aa74 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -3980,6 +3980,51 @@
[(set_attr "type" "load,load,fpload,fpload")
(set_attr "length" "4,8,4,8")])
+;; Extract a V8HI/V16QI element from memory with constant element number and
+;; convert it to DImode with zero extension.
+(define_insn_and_split "*vsx_extract_<mode>_load_to_udi"
+ [(set (match_operand:DI 0 "register_operand" "=r,r,v,v")
+ (zero_extend:DI
+ (vec_select:<VEC_base>
+ (match_operand:VSX_EXTRACT_I2 1 "memory_operand" "YZ,m,Z,Q")
+ (parallel
+ [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "0,n,0,n")]))))
+ (clobber (match_scratch:DI 3 "=X,&b,X,&b"))]
+ "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 0)
+ (zero_extend:DI (match_dup 4)))]
+{
+ operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
+ operands[3], <VEC_base>mode);
+}
+ [(set_attr "type" "load,load,fpload,fpload")
+ (set_attr "length" "4,8,4,8")
+ (set_attr "isa" "*,*,p9v,p9v")])
+
+;; Extract a V8HI element from memory with constant element number and
+;; convert it to DImode with sign extension.
+(define_insn_and_split "*vsx_extract_v8hi_load_to_sdi"
+ [(set (match_operand:DI 0 "register_operand" "=r,r")
+ (sign_extend:DI
+ (vec_select:HI
+ (match_operand:V8HI 1 "memory_operand" "YZ,m")
+ (parallel
+ [(match_operand:QI 2 "const_0_to_7_operand" "0,n")]))))
+ (clobber (match_scratch:DI 3 "=X,&b"))]
+ "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 0)
+ (sign_extend:DI (match_dup 4)))]
+{
+ operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
+ operands[3], HImode);
+}
+ [(set_attr "type" "load,load")
+ (set_attr "length" "4,8")])
+
;; Extract a V4SI element from memory with constant element number and convert
;; it to SFmode, DFmode, KFmode, or possibly TFmode using either signed or
;; unsigned conversion.
--git a/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-short-1.c b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-short-1.c
new file mode 100644
index 00000000000..a2c482f556e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-short-1.c
@@ -0,0 +1,37 @@
+/* { dg-do compile { target lp64 } } */
+/* { dg-options "-O2 -mdejagnu-cpu=power8" } */
+/* { dg-require-effective-target p8vector_hw } */
+
+/* Test to verify that the vec_extract with constant element numbers can load
+ SImode and fold the sign/extension into the load. */
+
+#include <altivec.h>
+
+long long
+extract_sign_v8hi_0 (vector short *p)
+{
+ return vec_extract (*p, 0); /* lha, no extsh. */
+}
+
+long long
+extract_sign_v8hi_1 (vector short *p)
+{
+ return vec_extract (*p, 1); /* lha, no extsh. */
+}
+
+unsigned long long
+extract_uns_v8hi_0 (vector unsigned short *p)
+{
+ return vec_extract (*p, 0); /* lhz, no rlwinm. */
+}
+
+unsigned long long
+extract_uns_v8hi_1 (vector unsigned short *p)
+{
+ return vec_extract (*p, 1); /* lhz, no rlwinm. */
+}
+
+/* { dg-final { scan-assembler-times {\mlha\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mlhz\M} 2 } } */
+/* { dg-final { scan-assembler-not {\mextsh\M} } } */
+/* { dg-final { scan-assembler-not {\mrlwinm\M} } } */
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2023-04-18 22:15 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-04-18 22:15 [gcc(refs/users/meissner/heads/work119)] Optimize V8HI/V16QI vec_extract from memory with constant element number Michael Meissner
-- strict thread matches above, loose matches on Subject: below --
2023-04-18 22:13 Michael Meissner
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).