public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/users/meissner/heads/work124)] Add alternatives for vec_extract with constant element loading from memory.
@ 2023-07-12 3:08 Michael Meissner
0 siblings, 0 replies; 2+ messages in thread
From: Michael Meissner @ 2023-07-12 3:08 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:24ba160aa963e79ce3b6a60b7086d654e5f58f68
commit 24ba160aa963e79ce3b6a60b7086d654e5f58f68
Author: Michael Meissner <meissner@linux.ibm.com>
Date: Tue Jul 11 23:08:13 2023 -0400
Add alternatives for vec_extract with constant element loading from memory.
This patch expands the alternatives for doing vec_extract of V4SI, V8HI, and
V16QI vectors with a constant offset when the vector is in memory. If the
element number is 0 or we are using offsettable addressing for loading up GPR
registers we don't need to allocate a temporary base register. We can fold the
offset from the vec_extract into the normal address.
I also added alternatives to load the values into vector registers. If we load
the value into vector registers, we require X-form addressing.
I added the VSX_EX_ISA mode attribute to distinguish that we can load 32-bit
integers on a power8 system to vector registers, but we need a power9 system to
be able to load 8-bit or 16-bit integers.
In general, loading up small integer values with vec_extract into the vector
registers explicitly is likely not done that much. However, this will be needed
in later patches when we want to combine loading up a small integer value into a
vector register with sign/zero extension. This happens when we want to do a
vec_extract of a smal integer value and convert it to floating point.
2023-07-11 Michael Meissner <meissner@linux.ibm.com>
gcc/
* config/rs6000/vsx.md (VSX_EX_ISA): New mode attribute.
(vsx_extract_<mode>_load): Add more alternatives for memory options.
Allow the load to load up vector registers if needed.
gcc/testsuite/
* gcc.target/powerpc/vec-extract-mem-char-1.c: New test.
* gcc.target/powerpc/vec-extract-mem-int-1.c: New test.
* gcc.target/powerpc/vec-extract-mem-short-1.c: New test.
Diff:
---
gcc/config/rs6000/vsx.md | 41 ++++++++++++----
| 55 ++++++++++++++++++++++
| 48 +++++++++++++++++++
| 55 ++++++++++++++++++++++
4 files changed, 190 insertions(+), 9 deletions(-)
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 799d77cbad8..f13fb3afc8e 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -223,6 +223,12 @@
(V8HI "v")
(V4SI "wa")])
+;; Mode attribute to give the isa constraint for accessing Altivec registers
+;; with vector extract and insert operations.
+(define_mode_attr VSX_EX_ISA [(V16QI "p9v")
+ (V8HI "p9v")
+ (V4SI "p8v")])
+
;; Mode iterator for binary floating types other than double to
;; optimize convert to that floating point type from an extract
;; of an integer type
@@ -4027,23 +4033,40 @@
}
[(set_attr "type" "mfvsr")])
-;; Optimize extracting a single scalar element from memory.
+;; Extract a V16QI/V8HI/V4SI element from memory with a constant element
+;; number. For vector registers, we require X-form addressing if the offset is
+;; 0, or single register addressing if we need to load the offset into a
+;; temporary register.
+;;
+;; Alternatives:
+;; Reg: Element: Cpu: Addr: need scratch
+;; 1: GPR 0 any normal address no
+;; 2: GPR 1-n any offsettable address no
+;; 3: GPR 1-n any single register yes
+;; 4: vector 0 p8/p9 reg+reg or reg no
+;; 5: vector 1-n p8/p9 single register yes
(define_insn_and_split "*vsx_extract_<mode>_load"
- [(set (match_operand:<VEC_base> 0 "register_operand" "=r")
+ [(set (match_operand:<VEC_base> 0 "register_operand"
+ "=r, r, r, wa, wa")
(vec_select:<VEC_base>
- (match_operand:VSX_EXTRACT_I 1 "memory_operand" "m")
- (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))
- (clobber (match_scratch:DI 3 "=&b"))]
+ (match_operand:VSX_EXTRACT_I 1 "memory_operand"
+ "m, o, m, Z, Q")
+ (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>"
+ "O, n, n, O, n")])))
+ (clobber (match_scratch:DI 3
+ "=X, X, &b, X, &b"))]
"VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
"#"
"&& reload_completed"
[(set (match_dup 0) (match_dup 4))]
{
- operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
- operands[3], <VEC_base>mode);
+ operands[4] = rs6000_adjust_vec_address (operands[0], operands[1],
+ operands[2], operands[3],
+ <VEC_base>mode);
}
- [(set_attr "type" "load")
- (set_attr "length" "8")])
+ [(set_attr "type" "load, load, load, fpload, fpload")
+ (set_attr "length" "*, *, 8, *, 8")
+ (set_attr "isa" "*, *, *, <VSX_EX_ISA>, <VSX_EX_ISA>")])
;; Variable V16QI/V8HI/V4SI extract from a register
(define_insn_and_split "vsx_extract_<mode>_var"
--git a/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-char-1.c b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-char-1.c
new file mode 100644
index 00000000000..e4f87f18a35
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-char-1.c
@@ -0,0 +1,55 @@
+/* { dg-do compile { target lp64 } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mdejagnu-cpu=power9 -O2" } */
+
+/* Test to verify that the vec_extract with constant element numbers can load
+ QImode into a vector register and store it without using a direct move
+ operation. */
+
+#include <altivec.h>
+
+void
+extract_uns_v16qi_0 (vector unsigned char *p, unsigned char *q)
+{
+ unsigned char u = vec_extract (*p, 0);
+ __asm__ (" # %x0" : "+wa" (u));
+
+ *q = u;
+ return;
+}
+
+void
+extract_uns_v16qi_1 (vector unsigned char *p, unsigned char *q)
+{
+ unsigned char u = vec_extract (*p, 1);
+ __asm__ (" # %x0" : "+wa" (u));
+
+ *q = u;
+ return;
+}
+
+void
+extract_uns_v16qi_element_0_index_4 (vector unsigned char *p,
+ unsigned char *q)
+{
+ unsigned char u = vec_extract (p[4], 0);
+ __asm__ (" # %x0" : "+wa" (u));
+
+ *q = u;
+ return;
+}
+
+void
+extract_uns_v16qi_element_3_index_4 (vector unsigned char *p,
+ unsigned char *q)
+{
+ unsigned char u = vec_extract (p[4], 0);
+ __asm__ (" # %x0" : "+wa" (u));
+
+ *q = u;
+ return;
+}
+
+/* { dg-final { scan-assembler-times {\mlxsibzx\M} 4 } } */
+/* { dg-final { scan-assembler-not {\mlbzx?\M} } } */
+/* { dg-final { scan-assembler-not {\mmtvsrwz\M} } } */
--git a/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-int-1.c b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-int-1.c
new file mode 100644
index 00000000000..5351b009090
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-int-1.c
@@ -0,0 +1,48 @@
+/* { dg-do compile { target lp64 } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mdejagnu-cpu=power8 -O2" } */
+
+/* Test to verify that the vec_extract with constant element numbers can load
+ SImode and fold zero extension into the load. */
+
+#include <altivec.h>
+
+void
+extract_uns_v4si_0 (vector unsigned int *p, unsigned int *q)
+{
+ unsigned int u = vec_extract (*p, 0);
+ __asm__ (" # %x0" : "+wa" (u));
+
+ *q = u;
+ return;
+}
+
+void
+extract_uns_v4si_1 (vector unsigned int *p, unsigned int *q)
+{
+ unsigned int u = vec_extract (*p, 1);
+ __asm__ (" # %x0" : "+wa" (u));
+
+ *q = u;
+ return;
+}
+
+void
+extract_uns_v4si_element_0_index_4 (vector unsigned int *p, unsigned int *q)
+{
+ unsigned int u = vec_extract (p[4], 0);
+ __asm__ (" # %x0" : "+wa" (u));
+
+ *q = u;
+ return;
+}
+
+void
+extract_uns_v4si_element_3_index_4 (vector unsigned int *p, unsigned int *q)
+{
+ unsigned int u = vec_extract (p[4], 0);
+ __asm__ (" # %x0" : "+wa" (u));
+
+ *q = u;
+ return;
+}
--git a/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-short-1.c b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-short-1.c
new file mode 100644
index 00000000000..13e881f1e22
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-short-1.c
@@ -0,0 +1,55 @@
+/* { dg-do compile { target lp64 } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mdejagnu-cpu=power9 -O2" } */
+
+/* Test to verify that the vec_extract with constant element numbers can load
+ HImode into a vector register and store it without using a direct move
+ operation. */
+
+#include <altivec.h>
+
+void
+extract_uns_v8hi_0 (vector unsigned short *p, unsigned short *q)
+{
+ unsigned short u = vec_extract (*p, 0);
+ __asm__ (" # %x0" : "+wa" (u));
+
+ *q = u;
+ return;
+}
+
+void
+extract_uns_v8hi_1 (vector unsigned short *p, unsigned short *q)
+{
+ unsigned short u = vec_extract (*p, 1);
+ __asm__ (" # %x0" : "+wa" (u));
+
+ *q = u;
+ return;
+}
+
+void
+extract_uns_v8hi_element_0_index_4 (vector unsigned short *p,
+ unsigned short *q)
+{
+ unsigned short u = vec_extract (p[4], 0);
+ __asm__ (" # %x0" : "+wa" (u));
+
+ *q = u;
+ return;
+}
+
+void
+extract_uns_v8hi_element_3_index_4 (vector unsigned short *p,
+ unsigned short *q)
+{
+ unsigned short u = vec_extract (p[4], 0);
+ __asm__ (" # %x0" : "+wa" (u));
+
+ *q = u;
+ return;
+}
+
+/* { dg-final { scan-assembler-times {\mlxsihzx\M} 4 } } */
+/* { dg-final { scan-assembler-not {\mlhzx?\M} } } */
+/* { dg-final { scan-assembler-not {\mmtvsrwz\M} } } */
^ permalink raw reply [flat|nested] 2+ messages in thread
* [gcc(refs/users/meissner/heads/work124)] Add alternatives for vec_extract with constant element loading from memory.
@ 2023-07-12 2:29 Michael Meissner
0 siblings, 0 replies; 2+ messages in thread
From: Michael Meissner @ 2023-07-12 2:29 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:a546c7fedf4af308adb26083571882d60c078ec0
commit a546c7fedf4af308adb26083571882d60c078ec0
Author: Michael Meissner <meissner@linux.ibm.com>
Date: Tue Jul 11 22:28:56 2023 -0400
Add alternatives for vec_extract with constant element loading from memory.
This patch expands the alternatives for doing vec_extract of V4SI, V8HI, and
V16QI vectors with a constant offset when the vector is in memory. If the
element number is 0 or we are using offsettable addressing for loading up GPR
registers we don't need to allocate a temporary base register. We can fold the
offset from the vec_extract into the normal address.
I also added alternatives to load the values into vector registers. If we load
the value into vector registers, we require X-form addressing.
In general, loading up small integer values with vec_extract into the vector
registers explicitly is likely not done that much. However, this will be needed
in later patches when we want to combine loading up a small integer value into a
vector register with sign/zero extension. This happens when we want to do a
vec_extract of a smal integer value and convert it to floating point.
2023-07-11 Michael Meissner <meissner@linux.ibm.com>
gcc/
* config/rs6000/vsx.md (VSX_EX_ISA): New mode attribute.
(vsx_extract_<mode>_load): Add more alternatives for memory options.
Allow the load to load up vector registers if needed.
gcc/testsuite/
* gcc.target/powerpc/vec-extract-mem-char-1.c: New test.
* gcc.target/powerpc/vec-extract-mem-int-1.c: New test.
* gcc.target/powerpc/vec-extract-mem-short-1.c: New test.
Diff:
---
gcc/config/rs6000/vsx.md | 38 +++++++++++----
| 55 ++++++++++++++++++++++
| 48 +++++++++++++++++++
| 55 ++++++++++++++++++++++
4 files changed, 187 insertions(+), 9 deletions(-)
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 799d77cbad8..710d68fc0b9 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -223,6 +223,12 @@
(V8HI "v")
(V4SI "wa")])
+;; Mode attribute to give the isa constraint for accessing Altivec registers
+;; with vector extract and insert operations.
+(define_mode_attr VSX_EX_ISA [(V16QI "p9v")
+ (V8HI "p9v")
+ (V4SI "p8v")])
+
;; Mode iterator for binary floating types other than double to
;; optimize convert to that floating point type from an extract
;; of an integer type
@@ -4027,23 +4033,37 @@
}
[(set_attr "type" "mfvsr")])
-;; Optimize extracting a single scalar element from memory.
+;; Extract a V16QI/V8HI/V4SI element from memory with a constant element
+;; number. For vector registers, we require X-form addressing.
+;; Alternatives:
+;; Reg: Element: Cpu: Addr: need scratch
+;; 1: GPR 0 any normal address no
+;; 2: GPR 1-3 any offsettable address no
+;; 3: GPR 1-3 any single register yes
+;; 4: wa/v 0 p8/p9 reg+reg or reg no
+;; 5: wa/v 1-3 p8/p9 single register yes
(define_insn_and_split "*vsx_extract_<mode>_load"
- [(set (match_operand:<VEC_base> 0 "register_operand" "=r")
+ [(set (match_operand:<VEC_base> 0 "register_operand"
+ "=r, r, r, wa, wa")
(vec_select:<VEC_base>
- (match_operand:VSX_EXTRACT_I 1 "memory_operand" "m")
- (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))
- (clobber (match_scratch:DI 3 "=&b"))]
+ (match_operand:VSX_EXTRACT_I 1 "memory_operand"
+ "m, o, m, Z, Q")
+ (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>"
+ "O, n, n, O, n")])))
+ (clobber (match_scratch:DI 3
+ "=X, X, &b, X, &b"))]
"VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
"#"
"&& reload_completed"
[(set (match_dup 0) (match_dup 4))]
{
- operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
- operands[3], <VEC_base>mode);
+ operands[4] = rs6000_adjust_vec_address (operands[0], operands[1],
+ operands[2], operands[3],
+ <VEC_base>mode);
}
- [(set_attr "type" "load")
- (set_attr "length" "8")])
+ [(set_attr "type" "load, load, load, fpload, fpload")
+ (set_attr "length" "*, *, 8, *, 8")
+ (set_attr "isa" "*, *, *, <VSX_EX_ISA>, <VSX_EX_ISA>")])
;; Variable V16QI/V8HI/V4SI extract from a register
(define_insn_and_split "vsx_extract_<mode>_var"
--git a/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-char-1.c b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-char-1.c
new file mode 100644
index 00000000000..e4f87f18a35
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-char-1.c
@@ -0,0 +1,55 @@
+/* { dg-do compile { target lp64 } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mdejagnu-cpu=power9 -O2" } */
+
+/* Test to verify that the vec_extract with constant element numbers can load
+ QImode into a vector register and store it without using a direct move
+ operation. */
+
+#include <altivec.h>
+
+void
+extract_uns_v16qi_0 (vector unsigned char *p, unsigned char *q)
+{
+ unsigned char u = vec_extract (*p, 0);
+ __asm__ (" # %x0" : "+wa" (u));
+
+ *q = u;
+ return;
+}
+
+void
+extract_uns_v16qi_1 (vector unsigned char *p, unsigned char *q)
+{
+ unsigned char u = vec_extract (*p, 1);
+ __asm__ (" # %x0" : "+wa" (u));
+
+ *q = u;
+ return;
+}
+
+void
+extract_uns_v16qi_element_0_index_4 (vector unsigned char *p,
+ unsigned char *q)
+{
+ unsigned char u = vec_extract (p[4], 0);
+ __asm__ (" # %x0" : "+wa" (u));
+
+ *q = u;
+ return;
+}
+
+void
+extract_uns_v16qi_element_3_index_4 (vector unsigned char *p,
+ unsigned char *q)
+{
+ unsigned char u = vec_extract (p[4], 0);
+ __asm__ (" # %x0" : "+wa" (u));
+
+ *q = u;
+ return;
+}
+
+/* { dg-final { scan-assembler-times {\mlxsibzx\M} 4 } } */
+/* { dg-final { scan-assembler-not {\mlbzx?\M} } } */
+/* { dg-final { scan-assembler-not {\mmtvsrwz\M} } } */
--git a/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-int-1.c b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-int-1.c
new file mode 100644
index 00000000000..5351b009090
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-int-1.c
@@ -0,0 +1,48 @@
+/* { dg-do compile { target lp64 } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mdejagnu-cpu=power8 -O2" } */
+
+/* Test to verify that the vec_extract with constant element numbers can load
+ SImode and fold zero extension into the load. */
+
+#include <altivec.h>
+
+void
+extract_uns_v4si_0 (vector unsigned int *p, unsigned int *q)
+{
+ unsigned int u = vec_extract (*p, 0);
+ __asm__ (" # %x0" : "+wa" (u));
+
+ *q = u;
+ return;
+}
+
+void
+extract_uns_v4si_1 (vector unsigned int *p, unsigned int *q)
+{
+ unsigned int u = vec_extract (*p, 1);
+ __asm__ (" # %x0" : "+wa" (u));
+
+ *q = u;
+ return;
+}
+
+void
+extract_uns_v4si_element_0_index_4 (vector unsigned int *p, unsigned int *q)
+{
+ unsigned int u = vec_extract (p[4], 0);
+ __asm__ (" # %x0" : "+wa" (u));
+
+ *q = u;
+ return;
+}
+
+void
+extract_uns_v4si_element_3_index_4 (vector unsigned int *p, unsigned int *q)
+{
+ unsigned int u = vec_extract (p[4], 0);
+ __asm__ (" # %x0" : "+wa" (u));
+
+ *q = u;
+ return;
+}
--git a/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-short-1.c b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-short-1.c
new file mode 100644
index 00000000000..13e881f1e22
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-short-1.c
@@ -0,0 +1,55 @@
+/* { dg-do compile { target lp64 } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mdejagnu-cpu=power9 -O2" } */
+
+/* Test to verify that the vec_extract with constant element numbers can load
+ HImode into a vector register and store it without using a direct move
+ operation. */
+
+#include <altivec.h>
+
+void
+extract_uns_v8hi_0 (vector unsigned short *p, unsigned short *q)
+{
+ unsigned short u = vec_extract (*p, 0);
+ __asm__ (" # %x0" : "+wa" (u));
+
+ *q = u;
+ return;
+}
+
+void
+extract_uns_v8hi_1 (vector unsigned short *p, unsigned short *q)
+{
+ unsigned short u = vec_extract (*p, 1);
+ __asm__ (" # %x0" : "+wa" (u));
+
+ *q = u;
+ return;
+}
+
+void
+extract_uns_v8hi_element_0_index_4 (vector unsigned short *p,
+ unsigned short *q)
+{
+ unsigned short u = vec_extract (p[4], 0);
+ __asm__ (" # %x0" : "+wa" (u));
+
+ *q = u;
+ return;
+}
+
+void
+extract_uns_v8hi_element_3_index_4 (vector unsigned short *p,
+ unsigned short *q)
+{
+ unsigned short u = vec_extract (p[4], 0);
+ __asm__ (" # %x0" : "+wa" (u));
+
+ *q = u;
+ return;
+}
+
+/* { dg-final { scan-assembler-times {\mlxsihzx\M} 4 } } */
+/* { dg-final { scan-assembler-not {\mlhzx?\M} } } */
+/* { dg-final { scan-assembler-not {\mmtvsrwz\M} } } */
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2023-07-12 3:08 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-07-12 3:08 [gcc(refs/users/meissner/heads/work124)] Add alternatives for vec_extract with constant element loading from memory Michael Meissner
-- strict thread matches above, loose matches on Subject: below --
2023-07-12 2:29 Michael Meissner
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).