public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/users/meissner/heads/work124)] Add alternatives for vec_extract with constant element loading from memory.
@ 2023-07-12  2:29 Michael Meissner
  0 siblings, 0 replies; 2+ messages in thread
From: Michael Meissner @ 2023-07-12  2:29 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:a546c7fedf4af308adb26083571882d60c078ec0

commit a546c7fedf4af308adb26083571882d60c078ec0
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Tue Jul 11 22:28:56 2023 -0400

    Add alternatives for vec_extract with constant element loading from memory.
    
    This patch expands the alternatives for doing vec_extract of V4SI, V8HI, and
    V16QI vectors with a constant offset when the vector is in memory.  If the
    element number is 0 or we are using offsettable addressing for loading up GPR
    registers we don't need to allocate a temporary base register.  We can fold the
    offset from the vec_extract into the normal address.
    
    I also added alternatives to load the values into vector registers.  If we load
    the value into vector registers, we require X-form addressing.
    
    In general, loading up small integer values with vec_extract into the vector
    registers explicitly is likely not done that much.  However, this will be needed
    in later patches when we want to combine loading up a small integer value into a
    vector register with sign/zero extension.  This happens when we want to do a
    vec_extract of a smal integer value and convert it to floating point.
    
    2023-07-11   Michael Meissner  <meissner@linux.ibm.com>
    
    gcc/
    
            * config/rs6000/vsx.md (VSX_EX_ISA): New mode attribute.
            (vsx_extract_<mode>_load): Add more alternatives for memory options.
            Allow the load to load up vector registers if needed.
    
    gcc/testsuite/
    
            * gcc.target/powerpc/vec-extract-mem-char-1.c: New test.
            * gcc.target/powerpc/vec-extract-mem-int-1.c: New test.
            * gcc.target/powerpc/vec-extract-mem-short-1.c: New test.

Diff:
---
 gcc/config/rs6000/vsx.md                           | 38 +++++++++++----
 .../gcc.target/powerpc/vec-extract-mem-char-1.c    | 55 ++++++++++++++++++++++
 .../gcc.target/powerpc/vec-extract-mem-int-1.c     | 48 +++++++++++++++++++
 .../gcc.target/powerpc/vec-extract-mem-short-1.c   | 55 ++++++++++++++++++++++
 4 files changed, 187 insertions(+), 9 deletions(-)

diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 799d77cbad8..710d68fc0b9 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -223,6 +223,12 @@
 			  (V8HI  "v")
 			  (V4SI  "wa")])
 
+;; Mode attribute to give the isa constraint for accessing Altivec registers
+;; with vector extract and insert operations.
+(define_mode_attr VSX_EX_ISA [(V16QI "p9v")
+			      (V8HI  "p9v")
+			      (V4SI  "p8v")])
+
 ;; Mode iterator for binary floating types other than double to
 ;; optimize convert to that floating point type from an extract
 ;; of an integer type
@@ -4027,23 +4033,37 @@
 }
   [(set_attr "type" "mfvsr")])
 
-;; Optimize extracting a single scalar element from memory.
+;; Extract a V16QI/V8HI/V4SI element from memory with a constant element
+;; number.  For vector registers, we require X-form addressing.
+;; Alternatives:
+;;       Reg:  Element:  Cpu:   Addr:                 need scratch
+;;    1: GPR   0         any    normal address        no
+;;    2: GPR   1-3       any    offsettable address   no
+;;    3: GPR   1-3       any    single register       yes
+;;    4: wa/v  0         p8/p9  reg+reg or reg        no
+;;    5: wa/v  1-3       p8/p9  single register       yes
 (define_insn_and_split "*vsx_extract_<mode>_load"
-  [(set (match_operand:<VEC_base> 0 "register_operand" "=r")
+  [(set (match_operand:<VEC_base> 0 "register_operand"
+			"=r,  r,  r, wa, wa")
 	(vec_select:<VEC_base>
-	 (match_operand:VSX_EXTRACT_I 1 "memory_operand" "m")
-	 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))
-   (clobber (match_scratch:DI 3 "=&b"))]
+	 (match_operand:VSX_EXTRACT_I 1 "memory_operand"
+			"m,   o,  m,  Z,  Q")
+	 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>"
+			"O,   n,  n,  O,  n")])))
+   (clobber (match_scratch:DI 3
+			"=X,  X, &b,  X, &b"))]
   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
   "#"
   "&& reload_completed"
   [(set (match_dup 0) (match_dup 4))]
 {
-  operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
-					   operands[3], <VEC_base>mode);
+  operands[4] = rs6000_adjust_vec_address (operands[0], operands[1],
+					   operands[2], operands[3],
+					   <VEC_base>mode);
 }
-  [(set_attr "type" "load")
-   (set_attr "length" "8")])
+  [(set_attr "type"   "load, load, load, fpload,       fpload")
+   (set_attr "length" "*,    *,    8,    *,            8")
+   (set_attr "isa"    "*,    *,    *,    <VSX_EX_ISA>, <VSX_EX_ISA>")])
 
 ;; Variable V16QI/V8HI/V4SI extract from a register
 (define_insn_and_split "vsx_extract_<mode>_var"
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-char-1.c b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-char-1.c
new file mode 100644
index 00000000000..e4f87f18a35
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-char-1.c
@@ -0,0 +1,55 @@
+/* { dg-do compile { target lp64 } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mdejagnu-cpu=power9 -O2" } */
+
+/* Test to verify that the vec_extract with constant element numbers can load
+   QImode into a vector register and store it without using a direct move
+   operation.  */
+
+#include <altivec.h>
+
+void
+extract_uns_v16qi_0 (vector unsigned char *p, unsigned char *q)
+{
+  unsigned char u = vec_extract (*p, 0);
+  __asm__ (" # %x0" : "+wa" (u));
+
+  *q = u;
+  return;
+}
+
+void
+extract_uns_v16qi_1 (vector unsigned char *p, unsigned char *q)
+{
+  unsigned char u = vec_extract (*p, 1);
+  __asm__ (" # %x0" : "+wa" (u));
+
+  *q = u;
+  return;
+}
+
+void
+extract_uns_v16qi_element_0_index_4 (vector unsigned char *p,
+				    unsigned char *q)
+{
+  unsigned char u = vec_extract (p[4], 0);
+  __asm__ (" # %x0" : "+wa" (u));
+
+  *q = u;
+  return;
+}
+
+void
+extract_uns_v16qi_element_3_index_4 (vector unsigned char *p,
+				    unsigned char *q)
+{
+  unsigned char u = vec_extract (p[4], 0);
+  __asm__ (" # %x0" : "+wa" (u));
+
+  *q = u;
+  return;
+}
+
+/* { dg-final { scan-assembler-times {\mlxsibzx\M}  4 } } */
+/* { dg-final { scan-assembler-not   {\mlbzx?\M}      } } */
+/* { dg-final { scan-assembler-not   {\mmtvsrwz\M}    } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-int-1.c b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-int-1.c
new file mode 100644
index 00000000000..5351b009090
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-int-1.c
@@ -0,0 +1,48 @@
+/* { dg-do compile { target lp64 } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mdejagnu-cpu=power8 -O2" } */
+
+/* Test to verify that the vec_extract with constant element numbers can load
+   SImode and fold zero extension into the load.  */
+
+#include <altivec.h>
+
+void
+extract_uns_v4si_0 (vector unsigned int *p, unsigned int *q)
+{
+  unsigned int u = vec_extract (*p, 0);
+  __asm__ (" # %x0" : "+wa" (u));
+
+  *q = u;
+  return;
+}
+
+void
+extract_uns_v4si_1 (vector unsigned int *p, unsigned int *q)
+{
+  unsigned int u = vec_extract (*p, 1);
+  __asm__ (" # %x0" : "+wa" (u));
+
+  *q = u;
+  return;
+}
+
+void
+extract_uns_v4si_element_0_index_4 (vector unsigned int *p, unsigned int *q)
+{
+  unsigned int u = vec_extract (p[4], 0);
+  __asm__ (" # %x0" : "+wa" (u));
+
+  *q = u;
+  return;
+}
+
+void
+extract_uns_v4si_element_3_index_4 (vector unsigned int *p, unsigned int *q)
+{
+  unsigned int u = vec_extract (p[4], 0);
+  __asm__ (" # %x0" : "+wa" (u));
+
+  *q = u;
+  return;
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-short-1.c b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-short-1.c
new file mode 100644
index 00000000000..13e881f1e22
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-short-1.c
@@ -0,0 +1,55 @@
+/* { dg-do compile { target lp64 } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mdejagnu-cpu=power9 -O2" } */
+
+/* Test to verify that the vec_extract with constant element numbers can load
+   HImode into a vector register and store it without using a direct move
+   operation.  */
+
+#include <altivec.h>
+
+void
+extract_uns_v8hi_0 (vector unsigned short *p, unsigned short *q)
+{
+  unsigned short u = vec_extract (*p, 0);
+  __asm__ (" # %x0" : "+wa" (u));
+
+  *q = u;
+  return;
+}
+
+void
+extract_uns_v8hi_1 (vector unsigned short *p, unsigned short *q)
+{
+  unsigned short u = vec_extract (*p, 1);
+  __asm__ (" # %x0" : "+wa" (u));
+
+  *q = u;
+  return;
+}
+
+void
+extract_uns_v8hi_element_0_index_4 (vector unsigned short *p,
+				    unsigned short *q)
+{
+  unsigned short u = vec_extract (p[4], 0);
+  __asm__ (" # %x0" : "+wa" (u));
+
+  *q = u;
+  return;
+}
+
+void
+extract_uns_v8hi_element_3_index_4 (vector unsigned short *p,
+				    unsigned short *q)
+{
+  unsigned short u = vec_extract (p[4], 0);
+  __asm__ (" # %x0" : "+wa" (u));
+
+  *q = u;
+  return;
+}
+
+/* { dg-final { scan-assembler-times {\mlxsihzx\M}  4 } } */
+/* { dg-final { scan-assembler-not   {\mlhzx?\M}      } } */
+/* { dg-final { scan-assembler-not   {\mmtvsrwz\M}    } } */

^ permalink raw reply	[flat|nested] 2+ messages in thread

* [gcc(refs/users/meissner/heads/work124)] Add alternatives for vec_extract with constant element loading from memory.
@ 2023-07-12  3:08 Michael Meissner
  0 siblings, 0 replies; 2+ messages in thread
From: Michael Meissner @ 2023-07-12  3:08 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:24ba160aa963e79ce3b6a60b7086d654e5f58f68

commit 24ba160aa963e79ce3b6a60b7086d654e5f58f68
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Tue Jul 11 23:08:13 2023 -0400

    Add alternatives for vec_extract with constant element loading from memory.
    
    This patch expands the alternatives for doing vec_extract of V4SI, V8HI, and
    V16QI vectors with a constant offset when the vector is in memory.  If the
    element number is 0 or we are using offsettable addressing for loading up GPR
    registers we don't need to allocate a temporary base register.  We can fold the
    offset from the vec_extract into the normal address.
    
    I also added alternatives to load the values into vector registers.  If we load
    the value into vector registers, we require X-form addressing.
    
    I added the VSX_EX_ISA mode attribute to distinguish that we can load 32-bit
    integers on a power8 system to vector registers, but we need a power9 system to
    be able to load 8-bit or 16-bit integers.
    
    In general, loading up small integer values with vec_extract into the vector
    registers explicitly is likely not done that much.  However, this will be needed
    in later patches when we want to combine loading up a small integer value into a
    vector register with sign/zero extension.  This happens when we want to do a
    vec_extract of a smal integer value and convert it to floating point.
    
    2023-07-11   Michael Meissner  <meissner@linux.ibm.com>
    
    gcc/
    
            * config/rs6000/vsx.md (VSX_EX_ISA): New mode attribute.
            (vsx_extract_<mode>_load): Add more alternatives for memory options.
            Allow the load to load up vector registers if needed.
    
    gcc/testsuite/
    
            * gcc.target/powerpc/vec-extract-mem-char-1.c: New test.
            * gcc.target/powerpc/vec-extract-mem-int-1.c: New test.
            * gcc.target/powerpc/vec-extract-mem-short-1.c: New test.

Diff:
---
 gcc/config/rs6000/vsx.md                           | 41 ++++++++++++----
 .../gcc.target/powerpc/vec-extract-mem-char-1.c    | 55 ++++++++++++++++++++++
 .../gcc.target/powerpc/vec-extract-mem-int-1.c     | 48 +++++++++++++++++++
 .../gcc.target/powerpc/vec-extract-mem-short-1.c   | 55 ++++++++++++++++++++++
 4 files changed, 190 insertions(+), 9 deletions(-)

diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 799d77cbad8..f13fb3afc8e 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -223,6 +223,12 @@
 			  (V8HI  "v")
 			  (V4SI  "wa")])
 
+;; Mode attribute to give the isa constraint for accessing Altivec registers
+;; with vector extract and insert operations.
+(define_mode_attr VSX_EX_ISA [(V16QI "p9v")
+			      (V8HI  "p9v")
+			      (V4SI  "p8v")])
+
 ;; Mode iterator for binary floating types other than double to
 ;; optimize convert to that floating point type from an extract
 ;; of an integer type
@@ -4027,23 +4033,40 @@
 }
   [(set_attr "type" "mfvsr")])
 
-;; Optimize extracting a single scalar element from memory.
+;; Extract a V16QI/V8HI/V4SI element from memory with a constant element
+;; number.  For vector registers, we require X-form addressing if the offset is
+;; 0, or single register addressing if we need to load the offset into a
+;; temporary register.
+;;
+;; Alternatives:
+;;       Reg:   Element:  Cpu:   Addr:                 need scratch
+;;    1: GPR    0         any    normal address        no
+;;    2: GPR    1-n       any    offsettable address   no
+;;    3: GPR    1-n       any    single register       yes
+;;    4: vector 0         p8/p9  reg+reg or reg        no
+;;    5: vector 1-n       p8/p9  single register       yes
 (define_insn_and_split "*vsx_extract_<mode>_load"
-  [(set (match_operand:<VEC_base> 0 "register_operand" "=r")
+  [(set (match_operand:<VEC_base> 0 "register_operand"
+			"=r,  r,  r, wa, wa")
 	(vec_select:<VEC_base>
-	 (match_operand:VSX_EXTRACT_I 1 "memory_operand" "m")
-	 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))
-   (clobber (match_scratch:DI 3 "=&b"))]
+	 (match_operand:VSX_EXTRACT_I 1 "memory_operand"
+			"m,   o,  m,  Z,  Q")
+	 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>"
+			"O,   n,  n,  O,  n")])))
+   (clobber (match_scratch:DI 3
+			"=X,  X, &b,  X, &b"))]
   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
   "#"
   "&& reload_completed"
   [(set (match_dup 0) (match_dup 4))]
 {
-  operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
-					   operands[3], <VEC_base>mode);
+  operands[4] = rs6000_adjust_vec_address (operands[0], operands[1],
+					   operands[2], operands[3],
+					   <VEC_base>mode);
 }
-  [(set_attr "type" "load")
-   (set_attr "length" "8")])
+  [(set_attr "type"   "load, load, load, fpload,       fpload")
+   (set_attr "length" "*,    *,    8,    *,            8")
+   (set_attr "isa"    "*,    *,    *,    <VSX_EX_ISA>, <VSX_EX_ISA>")])
 
 ;; Variable V16QI/V8HI/V4SI extract from a register
 (define_insn_and_split "vsx_extract_<mode>_var"
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-char-1.c b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-char-1.c
new file mode 100644
index 00000000000..e4f87f18a35
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-char-1.c
@@ -0,0 +1,55 @@
+/* { dg-do compile { target lp64 } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mdejagnu-cpu=power9 -O2" } */
+
+/* Test to verify that the vec_extract with constant element numbers can load
+   QImode into a vector register and store it without using a direct move
+   operation.  */
+
+#include <altivec.h>
+
+void
+extract_uns_v16qi_0 (vector unsigned char *p, unsigned char *q)
+{
+  unsigned char u = vec_extract (*p, 0);
+  __asm__ (" # %x0" : "+wa" (u));
+
+  *q = u;
+  return;
+}
+
+void
+extract_uns_v16qi_1 (vector unsigned char *p, unsigned char *q)
+{
+  unsigned char u = vec_extract (*p, 1);
+  __asm__ (" # %x0" : "+wa" (u));
+
+  *q = u;
+  return;
+}
+
+void
+extract_uns_v16qi_element_0_index_4 (vector unsigned char *p,
+				    unsigned char *q)
+{
+  unsigned char u = vec_extract (p[4], 0);
+  __asm__ (" # %x0" : "+wa" (u));
+
+  *q = u;
+  return;
+}
+
+void
+extract_uns_v16qi_element_3_index_4 (vector unsigned char *p,
+				    unsigned char *q)
+{
+  unsigned char u = vec_extract (p[4], 0);
+  __asm__ (" # %x0" : "+wa" (u));
+
+  *q = u;
+  return;
+}
+
+/* { dg-final { scan-assembler-times {\mlxsibzx\M}  4 } } */
+/* { dg-final { scan-assembler-not   {\mlbzx?\M}      } } */
+/* { dg-final { scan-assembler-not   {\mmtvsrwz\M}    } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-int-1.c b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-int-1.c
new file mode 100644
index 00000000000..5351b009090
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-int-1.c
@@ -0,0 +1,48 @@
+/* { dg-do compile { target lp64 } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mdejagnu-cpu=power8 -O2" } */
+
+/* Test to verify that the vec_extract with constant element numbers can load
+   SImode and fold zero extension into the load.  */
+
+#include <altivec.h>
+
+void
+extract_uns_v4si_0 (vector unsigned int *p, unsigned int *q)
+{
+  unsigned int u = vec_extract (*p, 0);
+  __asm__ (" # %x0" : "+wa" (u));
+
+  *q = u;
+  return;
+}
+
+void
+extract_uns_v4si_1 (vector unsigned int *p, unsigned int *q)
+{
+  unsigned int u = vec_extract (*p, 1);
+  __asm__ (" # %x0" : "+wa" (u));
+
+  *q = u;
+  return;
+}
+
+void
+extract_uns_v4si_element_0_index_4 (vector unsigned int *p, unsigned int *q)
+{
+  unsigned int u = vec_extract (p[4], 0);
+  __asm__ (" # %x0" : "+wa" (u));
+
+  *q = u;
+  return;
+}
+
+void
+extract_uns_v4si_element_3_index_4 (vector unsigned int *p, unsigned int *q)
+{
+  unsigned int u = vec_extract (p[4], 0);
+  __asm__ (" # %x0" : "+wa" (u));
+
+  *q = u;
+  return;
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-short-1.c b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-short-1.c
new file mode 100644
index 00000000000..13e881f1e22
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-short-1.c
@@ -0,0 +1,55 @@
+/* { dg-do compile { target lp64 } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mdejagnu-cpu=power9 -O2" } */
+
+/* Test to verify that the vec_extract with constant element numbers can load
+   HImode into a vector register and store it without using a direct move
+   operation.  */
+
+#include <altivec.h>
+
+void
+extract_uns_v8hi_0 (vector unsigned short *p, unsigned short *q)
+{
+  unsigned short u = vec_extract (*p, 0);
+  __asm__ (" # %x0" : "+wa" (u));
+
+  *q = u;
+  return;
+}
+
+void
+extract_uns_v8hi_1 (vector unsigned short *p, unsigned short *q)
+{
+  unsigned short u = vec_extract (*p, 1);
+  __asm__ (" # %x0" : "+wa" (u));
+
+  *q = u;
+  return;
+}
+
+void
+extract_uns_v8hi_element_0_index_4 (vector unsigned short *p,
+				    unsigned short *q)
+{
+  unsigned short u = vec_extract (p[4], 0);
+  __asm__ (" # %x0" : "+wa" (u));
+
+  *q = u;
+  return;
+}
+
+void
+extract_uns_v8hi_element_3_index_4 (vector unsigned short *p,
+				    unsigned short *q)
+{
+  unsigned short u = vec_extract (p[4], 0);
+  __asm__ (" # %x0" : "+wa" (u));
+
+  *q = u;
+  return;
+}
+
+/* { dg-final { scan-assembler-times {\mlxsihzx\M}  4 } } */
+/* { dg-final { scan-assembler-not   {\mlhzx?\M}      } } */
+/* { dg-final { scan-assembler-not   {\mmtvsrwz\M}    } } */

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2023-07-12  3:08 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-07-12  2:29 [gcc(refs/users/meissner/heads/work124)] Add alternatives for vec_extract with constant element loading from memory Michael Meissner
2023-07-12  3:08 Michael Meissner

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).