public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/users/meissner/heads/work119)] Allow consant element vec_extract to be loaded into vector registers.
@ 2023-04-27 22:03 Michael Meissner
  0 siblings, 0 replies; 8+ messages in thread
From: Michael Meissner @ 2023-04-27 22:03 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:5a1c1d17fa0290a08da67ffdefbf07b05cb476b6

commit 5a1c1d17fa0290a08da67ffdefbf07b05cb476b6
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Thu Apr 27 18:03:33 2023 -0400

    Allow consant element vec_extract to be loaded into vector registers.
    
    This patch allows vec_extract of V4SI, V8HI, and V16QI vector types with a
    constant element number to be loaded into vector registers directly.  It also
    will be split before register allocation.
    
    2023-04-27   Michael Meissner  <meissner@linux.ibm.com>
    
    gcc/
    
            * config/rs6000/vsx.md (VSX_EX_ISA): New mode attribute.
            (vsx_extract_<mode>_load): Allow vector registers to be loaded.  Do insn
            split before register allocation.

Diff:
---
 gcc/config/rs6000/vsx.md | 29 +++++++++++++++++++----------
 1 file changed, 19 insertions(+), 10 deletions(-)

diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 3a4b8cdb02a..18429a3c98a 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -223,6 +223,12 @@
 			  (V8HI  "v")
 			  (V4SI  "wa")])
 
+;; Mode attribute to give the isa constraint for accessing Altivec registers
+;; with vector extract and insert operations.
+(define_mode_attr VSX_EX_ISA [(V16QI "p9v")
+			      (V8HI  "p9v")
+			      (V4SI  "p8v")])
+
 ;; Mode iterator for binary floating types other than double to
 ;; optimize convert to that floating point type from an extract
 ;; of an integer type
@@ -3971,23 +3977,26 @@
 }
   [(set_attr "type" "mfvsr")])
 
-;; Optimize extracting a single scalar element from memory.
+;; Extract a V16QI/V8HI/V4SI element from memory with a constant element
+;; number.  For vector registers, we require X-form addressing.
 (define_insn_and_split "*vsx_extract_<mode>_load"
-  [(set (match_operand:<VEC_base> 0 "register_operand" "=r")
+  [(set (match_operand:<VEC_base> 0 "register_operand" "=r,<VSX_EX>")
 	(vec_select:<VEC_base>
-	 (match_operand:VSX_EXTRACT_I 1 "memory_operand" "m")
-	 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))
-   (clobber (match_scratch:DI 3 "=&b"))]
+	 (match_operand:VSX_EXTRACT_I 1 "memory_operand" "m,Q")
+	 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n,n")])))
+   (clobber (match_scratch:DI 3 "=&b,&b"))]
   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
   "#"
-  "&& reload_completed"
+  "&& 1"
   [(set (match_dup 0) (match_dup 4))]
 {
-  operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
-					   operands[3], <VEC_base>mode);
+  operands[4] = rs6000_adjust_vec_address (operands[0], operands[1],
+					   operands[2], operands[3],
+					   <VEC_base>mode);
 }
-  [(set_attr "type" "load")
-   (set_attr "length" "8")])
+  [(set_attr "type" "load,fpload")
+   (set_attr "length" "8")
+   (set_attr "isa" "*,<VSX_EX_ISA>")])
 
 ;; Variable V16QI/V8HI/V4SI extract from a register
 (define_insn_and_split "vsx_extract_<mode>_var"

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [gcc(refs/users/meissner/heads/work119)] Allow consant element vec_extract to be loaded into vector registers.
@ 2023-04-29  3:11 Michael Meissner
  0 siblings, 0 replies; 8+ messages in thread
From: Michael Meissner @ 2023-04-29  3:11 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:404f0dd14879585ae9625c3714d7f58190074af3

commit 404f0dd14879585ae9625c3714d7f58190074af3
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Fri Apr 28 23:11:24 2023 -0400

    Allow consant element vec_extract to be loaded into vector registers.
    
    This patch allows vec_extract of V4SI, V8HI, and V16QI vector types with a
    constant element number to be loaded into vector registers directly.
    
    This patch also adds support for optimzing 0 element number to not need a base
    register tempoary.  Likewise, if we have an offsettable address, we don't need
    to allocate a scratch register.
    
    2023-04-28   Michael Meissner  <meissner@linux.ibm.com>
    
    gcc/
    
            * config/rs6000/vsx.md (VSX_EX_ISA): New mode attribute.
            (vsx_extract_<mode>_load): Allow vector registers to be loaded.  Add
            optimizations for loading up element 0 and/or with an offsettable
            address.

Diff:
---
 gcc/config/rs6000/vsx.md | 38 +++++++++++++++++++++++++++++---------
 1 file changed, 29 insertions(+), 9 deletions(-)

diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index f42793fe012..0118d4788cb 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -223,6 +223,12 @@
 			  (V8HI  "v")
 			  (V4SI  "wa")])
 
+;; Mode attribute to give the isa constraint for accessing Altivec registers
+;; with vector extract and insert operations.
+(define_mode_attr VSX_EX_ISA [(V16QI "p9v")
+			      (V8HI  "p9v")
+			      (V4SI  "p8v")])
+
 ;; Mode iterator for binary floating types other than double to
 ;; optimize convert to that floating point type from an extract
 ;; of an integer type
@@ -4008,23 +4014,37 @@
 }
   [(set_attr "type" "mfvsr")])
 
-;; Optimize extracting a single scalar element from memory.
+;; Extract a V16QI/V8HI/V4SI element from memory with a constant element
+;; number.  For vector registers, we require X-form addressing.
+;; Alternatives:
+;;       Reg:  Ele:  Cpu:   Addr:                 need scratch
+;;    1: GPR   0     any    normal address        no
+;;    2: GPR   1-3   any    offsettable address   no
+;;    3: GPR   1-3   any    single register       yes
+;;    4: wa/v  0     p8/p9  reg+reg or reg        no
+;;    5: wa/v  1-3   p8/p9  single register       yes
 (define_insn_and_split "*vsx_extract_<mode>_load"
-  [(set (match_operand:<VEC_base> 0 "register_operand" "=r")
+  [(set (match_operand:<VEC_base> 0 "register_operand"
+			"=r,     r,     r,      <VSX_EX>,     <VSX_EX>")
 	(vec_select:<VEC_base>
-	 (match_operand:VSX_EXTRACT_I 1 "memory_operand" "m")
-	 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))
-   (clobber (match_scratch:DI 3 "=&b"))]
+	 (match_operand:VSX_EXTRACT_I 1 "memory_operand"
+			"m,      o,     m,      Z,            Q")
+	 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>"
+			"O,      n,     n,      O,            n")])))
+   (clobber (match_scratch:DI 3
+			"=X,     X,     &b,     X,            &b"))]
   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
   "#"
   "&& reload_completed"
   [(set (match_dup 0) (match_dup 4))]
 {
-  operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
-					   operands[3], <VEC_base>mode);
+  operands[4] = rs6000_adjust_vec_address (operands[0], operands[1],
+					   operands[2], operands[3],
+					   <VEC_base>mode);
 }
-  [(set_attr "type" "load")
-   (set_attr "length" "8")])
+  [(set_attr "type"	"load,   load,  load,   fpload,       fpload")
+   (set_attr "length"	"*,      *,     8,      *,            8")
+   (set_attr "isa"	"*,      *,     *,      <VSX_EX_ISA>, <VSX_EX_ISA>")])
 
 ;; Variable V16QI/V8HI/V4SI extract from a register
 (define_insn_and_split "vsx_extract_<mode>_var"

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [gcc(refs/users/meissner/heads/work119)] Allow consant element vec_extract to be loaded into vector registers.
@ 2023-04-28 22:56 Michael Meissner
  0 siblings, 0 replies; 8+ messages in thread
From: Michael Meissner @ 2023-04-28 22:56 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:1941eb946d423b0ffffb66eef1d94377a559d94c

commit 1941eb946d423b0ffffb66eef1d94377a559d94c
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Fri Apr 28 18:55:39 2023 -0400

    Allow consant element vec_extract to be loaded into vector registers.
    
    This patch allows vec_extract of V4SI, V8HI, and V16QI vector types with a
    constant element number to be loaded into vector registers directly.
    
    This patch also adds support for optimzing 0 element number to not need a base
    register tempoary.  Likewise, if we have an offsettable address, we don't need
    to allocate a scratch register.
    
    2023-04-28   Michael Meissner  <meissner@linux.ibm.com>
    
    gcc/
    
            * config/rs6000/vsx.md (VSX_EX_ISA): New mode attribute.
            (vsx_extract_<mode>_load): Allow vector registers to be loaded.  Add
            optimizations for loading up element 0 and/or with an offsettable
            address.

Diff:
---
 gcc/config/rs6000/vsx.md                           | 84 ++++++++++++++++++++++
 .../gcc.target/powerpc/vec-extract-mem-char-1.c    | 35 +++++++++
 .../gcc.target/powerpc/vec-extract-mem-int-1.c     | 35 +++++++++
 .../gcc.target/powerpc/vec-extract-mem-int-2.c     | 36 ++++++++++
 .../gcc.target/powerpc/vec-extract-mem-short-1.c   | 35 +++++++++
 .../gcc.target/powerpc/vec-extract-mem-short-2.c   | 36 ++++++++++
 6 files changed, 261 insertions(+)

diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 73b1e8896fd..f25b29855f4 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -4023,6 +4023,90 @@
    (set_attr "length" "4,4,8,4,8")
    (set_attr "isa" "*,*,*,<VSX_EX_ISA>,<VSX_EX_ISA>")])
 
+;; Fold extracting a V4SI element with a constant element with either sign or
+;; zero extension to DImode.
+;; Alternatives:
+;;   1: GPR, element 0, normal address, no modification
+;;   2: GPR, element 0-3, offsettable address
+;;   3: GPR, element 0-3, single register (offset to op[3])
+;;   4: VSX, element 0, X-form address, no modification
+;;   5: VSX, element 0-3, single register (offset to op[3])
+(define_insn_and_split "*vsx_extract_v4si_load_to_<su>di"
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r,wa,wa")
+	(any_extend:DI
+	 (vec_select:SI
+	  (match_operand:V4SI 1 "memory_operand" "m,o,m,Z,Q")
+	  (parallel [(match_operand:QI 2 "const_0_to_3_operand" "O,n,n,O,n")]))))
+   (clobber (match_scratch:DI 3 "=X,X,&b,X,&b"))]
+  "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
+	(any_extend:DI (match_dup 4)))]
+{
+  operands[4] = rs6000_adjust_vec_address (operands[0], operands[1],
+					   operands[2], operands[3],
+					   SImode);
+}
+  [(set_attr "type" "load,load,load,fpload,fpload")
+   (set_attr "length" "*,*,8,*,8")])
+
+;; Fold extracting a V8HI/V4SI element with a constant element with zero
+;; extension to either DImode or SImode.
+;; Alternatives:
+;;   1: GPR, element 0, normal address, no modification
+;;   2: GPR, element 0-3, offsettable address
+;;   3: GPR, element 0-3, single register (offset to op[3])
+;;   4: VMX, element 0, X-form address, no modification
+;;   5: VMX, element 0-3, single register (offset to op[3])
+(define_insn_and_split "*vsx_extract_<VSX_EXTRACT_I2:mode>_load_to_u<GPR:mode>"
+  [(set (match_operand:GPR 0 "register_operand" "=r,r,r,v,v")
+	(zero_extend:GPR
+	 (vec_select:<VEC_base>
+	  (match_operand:VSX_EXTRACT_I2 1 "memory_operand"
+		"m,o,m,Z,Q")
+	  (parallel [(match_operand:QI 2 "const_int_operand" "O,n,n,O,n")]))))
+   (clobber (match_scratch:DI 3 "=X,X,&b,X,&b"))]
+  "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I2:MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
+	(zero_extend:GPR (match_dup 4)))]
+{
+  operands[4] = rs6000_adjust_vec_address (operands[0], operands[1],
+					   operands[2], operands[3],
+					   <VSX_EXTRACT_I2:VEC_base>mode);
+}
+  [(set_attr "type" "load,load,load,fpload,fpload")
+   (set_attr "length" "*,*,8,*,8")
+   (set_attr "isa" "*,*,*,p9v,p9v")])
+
+;; Fold extracting a V8HI element with a constant element with sign extension
+;; to either DImode or SImode.
+;; Alternatives:
+;;   1: GPR, element 0, normal address, no modification
+;;   2: GPR, element 0-3, offsettable address
+;;   3: GPR, element 0-3, single register (offset to op[3])
+(define_insn_and_split "*vsx_extract_v8hi_load_to_s<mode>"
+  [(set (match_operand:GPR 0 "register_operand" "=r,r,r")
+	(sign_extend:GPR
+	 (vec_select:HI
+	  (match_operand:V8HI 1 "memory_operand" "m,o,m")
+	  (parallel [(match_operand:QI 2 "const_int_operand" "O,n,n")]))))
+   (clobber (match_scratch:DI 3 "=X,X,&b"))]
+  "VECTOR_MEM_VSX_P (V8HImode) && TARGET_DIRECT_MOVE_64BIT"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
+	(sign_extend:GPR (match_dup 4)))]
+{
+  operands[4] = rs6000_adjust_vec_address (operands[0], operands[1],
+					   operands[2], operands[3],
+					   HImode);
+}
+  [(set_attr "type" "load")
+   (set_attr "length" "8")])
+
 ;; Variable V16QI/V8HI/V4SI extract from a register
 (define_insn_and_split "vsx_extract_<mode>_var"
   [(set (match_operand:<VEC_base> 0 "gpc_reg_operand" "=r,r")
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-char-1.c b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-char-1.c
new file mode 100644
index 00000000000..61f021ee99f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-char-1.c
@@ -0,0 +1,35 @@
+/* { dg-do compile { target lp64 } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mdejagnu-cpu=power8 -O2" } */
+
+/* Test to verify that the vec_extract with constant element numbers can load
+   QImode and fold zero extension into the load.  */
+
+#include <altivec.h>
+
+unsigned long long
+extract_uns_v16qi_element_0 (vector unsigned char *p)
+{
+  return vec_extract (*p, 0);		/* lbz, no rlwinm.  */
+}
+
+unsigned long long
+extract_uns_v16qi_element_1 (vector unsigned char *p)
+{
+  return vec_extract (*p, 1);          /* lbz, no rlwinm.  */
+}
+
+unsigned long long
+extract_uns_v16qi_element_0_index_4 (vector unsigned char *p)
+{
+  return vec_extract (p[4], 0);		/* lbz, no rlwinm.  */
+}
+
+unsigned long long
+extract_uns_v16qi_element_3_index_4 (vector unsigned char *p)
+{
+  return vec_extract (p[4], 3);		/* lbz, no rlwinm.  */
+}
+
+/* { dg-final { scan-assembler-times {\mlbzx?\M}  4 } } */
+/* { dg-final { scan-assembler-not   {\mrlwinm\M}   } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-int-1.c b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-int-1.c
new file mode 100644
index 00000000000..e59ceae6866
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-int-1.c
@@ -0,0 +1,35 @@
+/* { dg-do compile { target lp64 } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mdejagnu-cpu=power8 -O2" } */
+
+/* Test to verify that the vec_extract with constant element numbers can load
+   SImode and fold zero extension into the load.  */
+
+#include <altivec.h>
+
+unsigned long long
+extract_uns_v4si_0 (vector unsigned int *p)
+{
+  return vec_extract (*p, 0);          /* lwz, no rldicl.  */
+}
+
+unsigned long long
+extract_uns_v4si_1 (vector unsigned int *p)
+{
+  return vec_extract (*p, 1);          /* lwz, no rldicl.  */
+}
+
+unsigned long long
+extract_uns_v4si_element_0_index_4 (vector unsigned int *p)
+{
+  return vec_extract (p[4], 0);		/* lwz, no rldicl.  */
+}
+
+unsigned long long
+extract_uns_v4si_element_3_index_4 (vector unsigned int *p)
+{
+  return vec_extract (p[4], 3);		/* lwz, no rldicl.  */
+}
+
+/* { dg-final { scan-assembler-times {\mlwzx?\M}  4 } } */
+/* { dg-final { scan-assembler-not   {\mrldicl\M}   } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-int-2.c b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-int-2.c
new file mode 100644
index 00000000000..052371e72ef
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-int-2.c
@@ -0,0 +1,36 @@
+/* { dg-do compile { target lp64 } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mdejagnu-cpu=power8 -O2" } */
+
+/* Test to verify that the vec_extract with constant element numbers can load
+   SImode and fold sign extension into the load.  */
+
+#include <altivec.h>
+
+long long
+extract_sign_v4si_0 (vector int *p)
+{
+  return vec_extract (*p, 0);          /* lwa, no extsw.  */
+}
+
+long long
+extract_sign_v4si_1 (vector int *p)
+{
+  return vec_extract (*p, 1);          /* lwa, no extsw.  */
+}
+
+long long
+extract_sign_v4si_element_0_index_4 (vector int *p)
+{
+  return vec_extract (p[4], 0);		/* lwa, no extsw.  */
+}
+
+long long
+extract_sign_v4si_element_3_index_4 (vector int *p)
+{
+  return vec_extract (p[4], 3);		/* lwa, no extsw.  */
+}
+
+/* { dg-final { scan-assembler-times {\mlwax?\M} 4 } } */
+/* { dg-final { scan-assembler-not   {\mlwzx?\M}   } } */
+/* { dg-final { scan-assembler-not   {\mextsw\M}   } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-short-1.c b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-short-1.c
new file mode 100644
index 00000000000..65ae21b1a1c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-short-1.c
@@ -0,0 +1,35 @@
+/* { dg-do compile { target lp64 } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mdejagnu-cpu=power8 -O2" } */
+
+/* Test to verify that the vec_extract with constant element numbers can load
+   SImode and fold zero extension into the load.  */
+
+#include <altivec.h>
+
+unsigned long long
+extract_uns_v8hi_0 (vector unsigned short *p)
+{
+  return vec_extract (*p, 0);          /* lwz, no rlwinm.  */
+}
+
+unsigned long long
+extract_uns_v8hi_1 (vector unsigned short *p)
+{
+  return vec_extract (*p, 1);          /* lwz, no rlwinm.  */
+}
+
+unsigned long long
+extract_uns_v8hi_element_0_index_4 (vector unsigned short *p)
+{
+  return vec_extract (p[4], 0);		/* lbz, no rlwinm.  */
+}
+
+unsigned long long
+extract_uns_v8hi_element_3_index_4 (vector unsigned short *p)
+{
+  return vec_extract (p[4], 3);		/* lbz, no rlwinm.  */
+}
+
+/* { dg-final { scan-assembler-times {\mlhzx?\M}  4 } } */
+/* { dg-final { scan-assembler-not   {\mrlwinm\M}   } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-short-2.c b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-short-2.c
new file mode 100644
index 00000000000..6a2f23cfc57
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-short-2.c
@@ -0,0 +1,36 @@
+/* { dg-do compile { target lp64 } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mdejagnu-cpu=power8 -O2" } */
+
+/* Test to verify that the vec_extract with constant element numbers can load
+   HImode and fold sign extension into the load.  */
+
+#include <altivec.h>
+
+long long
+extract_sign_v8hi_0 (vector short *p)
+{
+  return vec_extract (*p, 0);          /* lwa, no extsw.  */
+}
+
+long long
+extract_sign_v8hi_1 (vector short *p)
+{
+  return vec_extract (*p, 1);          /* lwa, no extsw.  */
+}
+
+long long
+extract_sign_v8hi_element_0_index_4 (vector short *p)
+{
+  return vec_extract (p[4], 0);		/* lwa, no extsw.  */
+}
+
+long long
+extract_sign_v8hi_element_3_index_4 (vector short *p)
+{
+  return vec_extract (p[4], 3);		/* lwa, no extsw.  */
+}
+
+/* { dg-final { scan-assembler-times {\mlhax?\M} 4 } } */
+/* { dg-final { scan-assembler-not   {\mlhzx?\M}   } } */
+/* { dg-final { scan-assembler-not   {\mextsh\M}   } } */

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [gcc(refs/users/meissner/heads/work119)] Allow consant element vec_extract to be loaded into vector registers.
@ 2023-04-28 22:36 Michael Meissner
  0 siblings, 0 replies; 8+ messages in thread
From: Michael Meissner @ 2023-04-28 22:36 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:0939f74cd682d703c4e799c3a33a3f76633ce5da

commit 0939f74cd682d703c4e799c3a33a3f76633ce5da
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Fri Apr 28 18:36:25 2023 -0400

    Allow consant element vec_extract to be loaded into vector registers.
    
    This patch allows vec_extract of V4SI, V8HI, and V16QI vector types with a
    constant element number to be loaded into vector registers directly.
    
    This patch also adds support for optimzing 0 element number to not need a base
    register tempoary.  Likewise, if we have an offsettable address, we don't need
    to allocate a scratch register.
    
    2023-04-28   Michael Meissner  <meissner@linux.ibm.com>
    
    gcc/
    
            * config/rs6000/vsx.md (VSX_EX_ISA): New mode attribute.
            (vsx_extract_<mode>_load): Allow vector registers to be loaded.  Add
            optimizations for loading up element 0 and/or with an offsettable
            address.

Diff:
---
 gcc/config/rs6000/vsx.md | 35 ++++++++++++++++++++++++++---------
 1 file changed, 26 insertions(+), 9 deletions(-)

diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 5fc2510c50b..73b1e8896fd 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -223,6 +223,12 @@
 			  (V8HI  "v")
 			  (V4SI  "wa")])
 
+;; Mode attribute to give the isa constraint for accessing Altivec registers
+;; with vector extract and insert operations.
+(define_mode_attr VSX_EX_ISA [(V16QI "p9v")
+			      (V8HI  "p9v")
+			      (V4SI  "p8v")])
+
 ;; Mode iterator for binary floating types other than double to
 ;; optimize convert to that floating point type from an extract
 ;; of an integer type
@@ -3988,23 +3994,34 @@
 }
   [(set_attr "type" "mfvsr")])
 
-;; Optimize extracting a single scalar element from memory.
+;; Extract a V16QI/V8HI/V4SI element from memory with a constant element
+;; number.  For vector registers, we require X-form addressing.
+;; Alternatives:
+;;  1: GPR, element 0, normal address
+;;  2: GPR, element 0-n, offsettable address (fold offset)
+;;  3: GPR, element 0-n, single register (op[3] has offset)
+;;  4: FP/VMX, element 0, X-form address
+;;  5: FP/VMX, element 0-n, single register (op[3] has offset)
 (define_insn_and_split "*vsx_extract_<mode>_load"
-  [(set (match_operand:<VEC_base> 0 "register_operand" "=r")
+  [(set (match_operand:<VEC_base> 0 "register_operand"
+		"=r,r,r,<VSX_EX>,<VSX_EX>")
 	(vec_select:<VEC_base>
-	 (match_operand:VSX_EXTRACT_I 1 "memory_operand" "m")
-	 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))
-   (clobber (match_scratch:DI 3 "=&b"))]
+	 (match_operand:VSX_EXTRACT_I 1 "memory_operand" "m,o,m,Z,Q")
+	 (parallel
+	  [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "O,n,n,O,n")])))
+   (clobber (match_scratch:DI 3 "=X,X,&b,X,&b"))]
   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
   "#"
   "&& reload_completed"
   [(set (match_dup 0) (match_dup 4))]
 {
-  operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
-					   operands[3], <VEC_base>mode);
+  operands[4] = rs6000_adjust_vec_address (operands[0], operands[1],
+					   operands[2], operands[3],
+					   <VEC_base>mode);
 }
-  [(set_attr "type" "load")
-   (set_attr "length" "8")])
+  [(set_attr "type" "load,load,load,fpload,fpload")
+   (set_attr "length" "4,4,8,4,8")
+   (set_attr "isa" "*,*,*,<VSX_EX_ISA>,<VSX_EX_ISA>")])
 
 ;; Variable V16QI/V8HI/V4SI extract from a register
 (define_insn_and_split "vsx_extract_<mode>_var"

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [gcc(refs/users/meissner/heads/work119)] Allow consant element vec_extract to be loaded into vector registers.
@ 2023-04-28 18:13 Michael Meissner
  0 siblings, 0 replies; 8+ messages in thread
From: Michael Meissner @ 2023-04-28 18:13 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:164f119d3aa1b009de56a5c69d103eb97dee8b23

commit 164f119d3aa1b009de56a5c69d103eb97dee8b23
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Fri Apr 28 14:13:33 2023 -0400

    Allow consant element vec_extract to be loaded into vector registers.
    
    This patch allows vec_extract of V4SI, V8HI, and V16QI vector types with a
    constant element number to be loaded into vector registers directly.  It also
    will be split before register allocation.  In doing so, I restricted the
    optimization to only occur if the memory address did not use an Altivec style
    address with AND -16.
    
    2023-04-28   Michael Meissner  <meissner@linux.ibm.com>
    
    gcc/
    
            * config/rs6000/vsx.md (VSX_EX_ISA): New mode attribute.
            (vsx_extract_<mode>_load): Allow vector registers to be loaded.  Do insn
            split before register allocation.  Restrict vector addresses to not use
            Altivec addressing.

Diff:
---
 gcc/config/rs6000/vsx.md | 29 +++++++++++++++++++----------
 1 file changed, 19 insertions(+), 10 deletions(-)

diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 42336bbf36b..4747aaa07e5 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -223,6 +223,12 @@
 			  (V8HI  "v")
 			  (V4SI  "wa")])
 
+;; Mode attribute to give the isa constraint for accessing Altivec registers
+;; with vector extract and insert operations.
+(define_mode_attr VSX_EX_ISA [(V16QI "p9v")
+			      (V8HI  "p9v")
+			      (V4SI  "p8v")])
+
 ;; Mode iterator for binary floating types other than double to
 ;; optimize convert to that floating point type from an extract
 ;; of an integer type
@@ -3971,23 +3977,26 @@
 }
   [(set_attr "type" "mfvsr")])
 
-;; Optimize extracting a single scalar element from memory.
+;; Extract a V16QI/V8HI/V4SI element from memory with a constant element
+;; number.  For vector registers, we require X-form addressing.
 (define_insn_and_split "*vsx_extract_<mode>_load"
-  [(set (match_operand:<VEC_base> 0 "register_operand" "=r")
+  [(set (match_operand:<VEC_base> 0 "register_operand" "=r,<VSX_EX>")
 	(vec_select:<VEC_base>
-	 (match_operand:VSX_EXTRACT_I 1 "memory_operand" "m")
-	 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))
-   (clobber (match_scratch:DI 3 "=&b"))]
+	 (match_operand:VSX_EXTRACT_I 1 "non_altivec_memory_operand" "m,Q")
+	 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n,n")])))
+   (clobber (match_scratch:DI 3 "=&b,&b"))]
   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
   "#"
-  "&& reload_completed"
+  "&& 1"
   [(set (match_dup 0) (match_dup 4))]
 {
-  operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
-					   operands[3], <VEC_base>mode);
+  operands[4] = rs6000_adjust_vec_address (operands[0], operands[1],
+					   operands[2], operands[3],
+					   <VEC_base>mode);
 }
-  [(set_attr "type" "load")
-   (set_attr "length" "8")])
+  [(set_attr "type" "load,fpload")
+   (set_attr "length" "8")
+   (set_attr "isa" "*,<VSX_EX_ISA>")])
 
 ;; Variable V16QI/V8HI/V4SI extract from a register
 (define_insn_and_split "vsx_extract_<mode>_var"

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [gcc(refs/users/meissner/heads/work119)] Allow consant element vec_extract to be loaded into vector registers.
@ 2023-04-28 18:08 Michael Meissner
  0 siblings, 0 replies; 8+ messages in thread
From: Michael Meissner @ 2023-04-28 18:08 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:ab2977bdf39f57e12321b53c1dcb3fc3e1b1a5fa

commit ab2977bdf39f57e12321b53c1dcb3fc3e1b1a5fa
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Fri Apr 28 14:08:08 2023 -0400

    Allow consant element vec_extract to be loaded into vector registers.
    
    This patch allows vec_extract of V4SI, V8HI, and V16QI vector types with a
    constant element number to be loaded into vector registers directly.  It also
    will be split before register allocation.  In doing so, I restricted the
    optimization to only occur if the memory address did not use an Altivec style
    address with AND -16.
    
    2023-04-28   Michael Meissner  <meissner@linux.ibm.com>
    
    gcc/
    
            * config/rs6000/vsx.md (VSX_EX_ISA): New mode attribute.
            (vsx_extract_<mode>_load): Allow vector registers to be loaded.  Do insn
            split before register allocation.  Restrict vector addresses to not use
            Altivec addressing.

Diff:
---
 gcc/config/rs6000/vsx.md | 29 +++++++++++++++++++----------
 1 file changed, 19 insertions(+), 10 deletions(-)

diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 42336bbf36b..ecf1279c95b 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -223,6 +223,12 @@
 			  (V8HI  "v")
 			  (V4SI  "wa")])
 
+;; Mode attribute to give the isa constraint for accessing Altivec registers
+;; with vector extract and insert operations.
+(define_mode_attr VSX_EX_ISA [(V16QI "p9v")
+			      (V8HI  "p9v")
+			      (V4SI  "p8v")])
+
 ;; Mode iterator for binary floating types other than double to
 ;; optimize convert to that floating point type from an extract
 ;; of an integer type
@@ -3971,23 +3977,26 @@
 }
   [(set_attr "type" "mfvsr")])
 
-;; Optimize extracting a single scalar element from memory.
+;; Extract a V16QI/V8HI/V4SI element from memory with a constant element
+;; number.  For vector registers, we require X-form addressing.
 (define_insn_and_split "*vsx_extract_<mode>_load"
-  [(set (match_operand:<VEC_base> 0 "register_operand" "=r")
+  [(set (match_operand:<VEC_base> 0 "register_operand" "=r,<VSX_EX>")
 	(vec_select:<VEC_base>
-	 (match_operand:VSX_EXTRACT_I 1 "memory_operand" "m")
-	 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))
-   (clobber (match_scratch:DI 3 "=&b"))]
+	 (match_operand:VSX_EXTRACT_I 1 "memory_operand" "m,Q")
+	 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n,n")])))
+   (clobber (match_scratch:DI 3 "=&b,&b"))]
   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
   "#"
-  "&& reload_completed"
+  "&& 1"
   [(set (match_dup 0) (match_dup 4))]
 {
-  operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
-					   operands[3], <VEC_base>mode);
+  operands[4] = rs6000_adjust_vec_address (operands[0], operands[1],
+					   operands[2], operands[3],
+					   <VEC_base>mode);
 }
-  [(set_attr "type" "load")
-   (set_attr "length" "8")])
+  [(set_attr "type" "load,fpload")
+   (set_attr "length" "8")
+   (set_attr "isa" "*,<VSX_EX_ISA>")])
 
 ;; Variable V16QI/V8HI/V4SI extract from a register
 (define_insn_and_split "vsx_extract_<mode>_var"

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [gcc(refs/users/meissner/heads/work119)] Allow consant element vec_extract to be loaded into vector registers.
@ 2023-04-25  1:54 Michael Meissner
  0 siblings, 0 replies; 8+ messages in thread
From: Michael Meissner @ 2023-04-25  1:54 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:51f1707cd47aeb16bd0a672210d1072dc0ebb362

commit 51f1707cd47aeb16bd0a672210d1072dc0ebb362
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Mon Apr 24 21:53:54 2023 -0400

    Allow consant element vec_extract to be loaded into vector registers.
    
    This patch allows vec_extract of V4SI, V8HI, and V16QI vector types with a
    constant element number to be loaded into vector registers directly.  It also
    will be split before register allocation.
    
    This patch also adds support to rs6000_adjust_vec_address to allow it to be run
    before register allocation.
    
    2023-04-24   Michael Meissner  <meissner@linux.ibm.com>
    
    gcc/
    
            * config/rs6000/rs6000.cc (get_vector_offset): Allow being called before
            register allocation.
            (adjust_vec_address_pcrel): Likewise.
            (rs6000_adjust_vec_address): Likewise.
            * config/rs6000/vsx.md (VSX_EX_ISA): New mode attribute.
            (vsx_extract_<mode>_load): Allow vector registers to be loaded.

Diff:
---
 gcc/config/rs6000/rs6000.cc | 56 +++++++++++++++++++++++++++++++--------------
 gcc/config/rs6000/vsx.md    | 31 +++++++++++++++++--------
 2 files changed, 60 insertions(+), 27 deletions(-)

diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 3be5860dd9b..65295dbaf81 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -7686,9 +7686,13 @@ get_vector_offset (rtx mem, rtx element, rtx base_tmp, unsigned scalar_size)
   if (CONST_INT_P (element))
     return GEN_INT (INTVAL (element) * scalar_size);
 
-  /* All insns should use the 'Q' constraint (address is a single register) if
-     the element number is not a constant.  */
-  gcc_assert (satisfies_constraint_Q (mem));
+  if (GET_CODE (base_tmp) == SCRATCH)
+    base_tmp = gen_reg_rtx (Pmode);
+
+  /* After register allocation, all insns should use the 'Q' constraint
+     (address is a single register) if the element number is not a
+     constant.  */
+  gcc_assert (can_create_pseudo_p () || satisfies_constraint_Q (mem));
 
   /* Mask the element to make sure the element number is between 0 and the
      maximum number of elements - 1 so that we don't generate an address
@@ -7704,6 +7708,9 @@ get_vector_offset (rtx mem, rtx element, rtx base_tmp, unsigned scalar_size)
   if (shift > 0)
     {
       rtx shift_op = gen_rtx_ASHIFT (Pmode, base_tmp, GEN_INT (shift));
+      if (can_create_pseudo_p ())
+	base_tmp = gen_reg_rtx (Pmode);
+
       emit_insn (gen_rtx_SET (base_tmp, shift_op));
     }
 
@@ -7747,6 +7754,9 @@ adjust_vec_address_pcrel (rtx addr, rtx element_offset, rtx base_tmp)
 
       else
 	{
+	  if (GET_CODE (base_tmp) == SCRATCH)
+	    base_tmp = gen_reg_rtx (Pmode);
+
 	  emit_move_insn (base_tmp, addr);
 	  new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
 	}
@@ -7769,9 +7779,8 @@ adjust_vec_address_pcrel (rtx addr, rtx element_offset, rtx base_tmp)
    temporary (BASE_TMP) to fixup the address.  Return the new memory address
    that is valid for reads or writes to a given register (SCALAR_REG).
 
-   This function is expected to be called after reload is completed when we are
-   splitting insns.  The temporary BASE_TMP might be set multiple times with
-   this code.  */
+   The temporary BASE_TMP might be set multiple times with this code if this is
+   called after register allocation.  */
 
 rtx
 rs6000_adjust_vec_address (rtx scalar_reg,
@@ -7784,8 +7793,11 @@ rs6000_adjust_vec_address (rtx scalar_reg,
   rtx addr = XEXP (mem, 0);
   rtx new_addr;
 
-  gcc_assert (!reg_mentioned_p (base_tmp, addr));
-  gcc_assert (!reg_mentioned_p (base_tmp, element));
+  if (GET_CODE (base_tmp) != SCRATCH)
+    {
+      gcc_assert (!reg_mentioned_p (base_tmp, addr));
+      gcc_assert (!reg_mentioned_p (base_tmp, element));
+    }
 
   /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY.  */
   gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC);
@@ -7841,6 +7853,9 @@ rs6000_adjust_vec_address (rtx scalar_reg,
 	     offset, it has the benefit that if D-FORM instructions are
 	     allowed, the offset is part of the memory access to the vector
 	     element. */
+	  if (GET_CODE (base_tmp) == SCRATCH)
+	    base_tmp = gen_reg_rtx (Pmode);
+
 	  emit_insn (gen_rtx_SET (base_tmp, gen_rtx_PLUS (Pmode, op0, op1)));
 	  new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
 	}
@@ -7848,26 +7863,33 @@ rs6000_adjust_vec_address (rtx scalar_reg,
 
   else
     {
+      if (GET_CODE (base_tmp) == SCRATCH)
+	base_tmp = gen_reg_rtx (Pmode);
+
       emit_move_insn (base_tmp, addr);
       new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
     }
 
-    /* If the address isn't valid, move the address into the temporary base
-       register.  Some reasons it could not be valid include:
+    /* If register allocation has been done and the address isn't valid, move
+       the address into the temporary base register.  Some reasons it could not
+       be valid include:
 
        The address offset overflowed the 16 or 34 bit offset size;
        We need to use a DS-FORM load, and the bottom 2 bits are non-zero;
        We need to use a DQ-FORM load, and the bottom 4 bits are non-zero;
        Only X_FORM loads can be done, and the address is D_FORM.  */
 
-  enum insn_form iform
-    = address_to_insn_form (new_addr, scalar_mode,
-			    reg_to_non_prefixed (scalar_reg, scalar_mode));
-
-  if (iform == INSN_FORM_BAD)
+  if (!can_create_pseudo_p ())
     {
-      emit_move_insn (base_tmp, new_addr);
-      new_addr = base_tmp;
+      enum insn_form iform
+	= address_to_insn_form (new_addr, scalar_mode,
+				reg_to_non_prefixed (scalar_reg, scalar_mode));
+
+      if (iform == INSN_FORM_BAD)
+	{
+	  emit_move_insn (base_tmp, new_addr);
+	  new_addr = base_tmp;
+	}
     }
 
   return change_address (mem, scalar_mode, new_addr);
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 04877dd51f6..0b7b26c2e2f 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -223,6 +223,12 @@
 			  (V8HI  "v")
 			  (V4SI  "wa")])
 
+;; Mode attribute to give the isa constraint for accessing Altivec registers
+;; with vector extract and insert operations.
+(define_mode_attr VSX_EX_ISA [(V16QI "p9v")
+			      (V8HI  "p9v")
+			      (V4SI  "p8v")])
+
 ;; Mode iterator for binary floating types other than double to
 ;; optimize convert to that floating point type from an extract
 ;; of an integer type
@@ -3971,23 +3977,28 @@
 }
   [(set_attr "type" "mfvsr")])
 
-;; Optimize extracting a single scalar element from memory.
+;; Extract a V16QI/V8HI/V4SI element from memory with a constant element
+;; number.  If the element number is 0 or the address is offsettable, we don't
+;; need a temporary base register.  For vector registers, we require X-form
+;; addressing.
 (define_insn_and_split "*vsx_extract_<mode>_load"
-  [(set (match_operand:<VEC_base> 0 "register_operand" "=r")
+  [(set (match_operand:<VEC_base> 0 "register_operand" "=r,r,r,<VSX_EX>,<VSX_EX>")
 	(vec_select:<VEC_base>
-	 (match_operand:VSX_EXTRACT_I 1 "memory_operand" "m")
-	 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))
-   (clobber (match_scratch:DI 3 "=&b"))]
+	 (match_operand:VSX_EXTRACT_I 1 "memory_operand" "m,o,Q,Z,Q")
+	 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "O,n,n,O,n")])))
+   (clobber (match_scratch:DI 3 "=X,X,&b,X,&b"))]
   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
   "#"
-  "&& reload_completed"
+  "&& 1"
   [(set (match_dup 0) (match_dup 4))]
 {
-  operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
-					   operands[3], <VEC_base>mode);
+  operands[4] = rs6000_adjust_vec_address (operands[0], operands[1],
+					   operands[2], operands[3],
+					   <VEC_base>mode);
 }
-  [(set_attr "type" "load")
-   (set_attr "length" "8")])
+  [(set_attr "type" "load,load,load,fpload,fpload")
+   (set_attr "length" "*,*,8,*,8")
+   (set_attr "isa" "*,*,*,<VSX_EX_ISA>,<VSX_EX_ISA>")])
 
 ;; Variable V16QI/V8HI/V4SI extract from a register
 (define_insn_and_split "vsx_extract_<mode>_var"

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [gcc(refs/users/meissner/heads/work119)] Allow consant element vec_extract to be loaded into vector registers.
@ 2023-04-24 23:20 Michael Meissner
  0 siblings, 0 replies; 8+ messages in thread
From: Michael Meissner @ 2023-04-24 23:20 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:4e2ec9fda148e138b2e14170509edd62b7ab6874

commit 4e2ec9fda148e138b2e14170509edd62b7ab6874
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Mon Apr 24 19:20:37 2023 -0400

    Allow consant element vec_extract to be loaded into vector registers.
    
    This patch allows vec_extract of V4SI, V8HI, and V16QI vector types with a
    constant element number to be loaded into vector registers directly.  It also
    will be split before register allocation.
    
    This patch also adds support to rs6000_adjust_vec_address to allow it to be run
    before register allocation.
    
    2023-04-24   Michael Meissner  <meissner@linux.ibm.com>
    
    gcc/
    
            * config/rs6000/rs6000.cc (get_vector_offset): Allow being called before
            register allocation.
            (adjust_vec_address_pcrel): Likewise.
            (rs6000_adjust_vec_address): Likewise.
            * config/rs6000/vsx.md (VSX_EX_ISA): New mode attribute.
            (vsx_extract_<mode>_load): Allow vector registers to be loaded.

Diff:
---
 gcc/config/rs6000/rs6000.cc | 56 +++++++++++++++++++++++++++++++--------------
 gcc/config/rs6000/vsx.md    | 26 ++++++++++++++-------
 2 files changed, 57 insertions(+), 25 deletions(-)

diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 3be5860dd9b..65295dbaf81 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -7686,9 +7686,13 @@ get_vector_offset (rtx mem, rtx element, rtx base_tmp, unsigned scalar_size)
   if (CONST_INT_P (element))
     return GEN_INT (INTVAL (element) * scalar_size);
 
-  /* All insns should use the 'Q' constraint (address is a single register) if
-     the element number is not a constant.  */
-  gcc_assert (satisfies_constraint_Q (mem));
+  if (GET_CODE (base_tmp) == SCRATCH)
+    base_tmp = gen_reg_rtx (Pmode);
+
+  /* After register allocation, all insns should use the 'Q' constraint
+     (address is a single register) if the element number is not a
+     constant.  */
+  gcc_assert (can_create_pseudo_p () || satisfies_constraint_Q (mem));
 
   /* Mask the element to make sure the element number is between 0 and the
      maximum number of elements - 1 so that we don't generate an address
@@ -7704,6 +7708,9 @@ get_vector_offset (rtx mem, rtx element, rtx base_tmp, unsigned scalar_size)
   if (shift > 0)
     {
       rtx shift_op = gen_rtx_ASHIFT (Pmode, base_tmp, GEN_INT (shift));
+      if (can_create_pseudo_p ())
+	base_tmp = gen_reg_rtx (Pmode);
+
       emit_insn (gen_rtx_SET (base_tmp, shift_op));
     }
 
@@ -7747,6 +7754,9 @@ adjust_vec_address_pcrel (rtx addr, rtx element_offset, rtx base_tmp)
 
       else
 	{
+	  if (GET_CODE (base_tmp) == SCRATCH)
+	    base_tmp = gen_reg_rtx (Pmode);
+
 	  emit_move_insn (base_tmp, addr);
 	  new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
 	}
@@ -7769,9 +7779,8 @@ adjust_vec_address_pcrel (rtx addr, rtx element_offset, rtx base_tmp)
    temporary (BASE_TMP) to fixup the address.  Return the new memory address
    that is valid for reads or writes to a given register (SCALAR_REG).
 
-   This function is expected to be called after reload is completed when we are
-   splitting insns.  The temporary BASE_TMP might be set multiple times with
-   this code.  */
+   The temporary BASE_TMP might be set multiple times with this code if this is
+   called after register allocation.  */
 
 rtx
 rs6000_adjust_vec_address (rtx scalar_reg,
@@ -7784,8 +7793,11 @@ rs6000_adjust_vec_address (rtx scalar_reg,
   rtx addr = XEXP (mem, 0);
   rtx new_addr;
 
-  gcc_assert (!reg_mentioned_p (base_tmp, addr));
-  gcc_assert (!reg_mentioned_p (base_tmp, element));
+  if (GET_CODE (base_tmp) != SCRATCH)
+    {
+      gcc_assert (!reg_mentioned_p (base_tmp, addr));
+      gcc_assert (!reg_mentioned_p (base_tmp, element));
+    }
 
   /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY.  */
   gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC);
@@ -7841,6 +7853,9 @@ rs6000_adjust_vec_address (rtx scalar_reg,
 	     offset, it has the benefit that if D-FORM instructions are
 	     allowed, the offset is part of the memory access to the vector
 	     element. */
+	  if (GET_CODE (base_tmp) == SCRATCH)
+	    base_tmp = gen_reg_rtx (Pmode);
+
 	  emit_insn (gen_rtx_SET (base_tmp, gen_rtx_PLUS (Pmode, op0, op1)));
 	  new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
 	}
@@ -7848,26 +7863,33 @@ rs6000_adjust_vec_address (rtx scalar_reg,
 
   else
     {
+      if (GET_CODE (base_tmp) == SCRATCH)
+	base_tmp = gen_reg_rtx (Pmode);
+
       emit_move_insn (base_tmp, addr);
       new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
     }
 
-    /* If the address isn't valid, move the address into the temporary base
-       register.  Some reasons it could not be valid include:
+    /* If register allocation has been done and the address isn't valid, move
+       the address into the temporary base register.  Some reasons it could not
+       be valid include:
 
        The address offset overflowed the 16 or 34 bit offset size;
        We need to use a DS-FORM load, and the bottom 2 bits are non-zero;
        We need to use a DQ-FORM load, and the bottom 4 bits are non-zero;
        Only X_FORM loads can be done, and the address is D_FORM.  */
 
-  enum insn_form iform
-    = address_to_insn_form (new_addr, scalar_mode,
-			    reg_to_non_prefixed (scalar_reg, scalar_mode));
-
-  if (iform == INSN_FORM_BAD)
+  if (!can_create_pseudo_p ())
     {
-      emit_move_insn (base_tmp, new_addr);
-      new_addr = base_tmp;
+      enum insn_form iform
+	= address_to_insn_form (new_addr, scalar_mode,
+				reg_to_non_prefixed (scalar_reg, scalar_mode));
+
+      if (iform == INSN_FORM_BAD)
+	{
+	  emit_move_insn (base_tmp, new_addr);
+	  new_addr = base_tmp;
+	}
     }
 
   return change_address (mem, scalar_mode, new_addr);
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 04877dd51f6..b7759b77159 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -223,6 +223,12 @@
 			  (V8HI  "v")
 			  (V4SI  "wa")])
 
+;; Mode attribute to give the isa constraint for accessing Altivec registers
+;; with vector extract and insert operations.
+(define_mode_attr VSX_EX_ISA [(V16QI "p9v")
+			      (V8HI  "p9v")
+			      (V4SI  "p8v")])
+
 ;; Mode iterator for binary floating types other than double to
 ;; optimize convert to that floating point type from an extract
 ;; of an integer type
@@ -3971,23 +3977,27 @@
 }
   [(set_attr "type" "mfvsr")])
 
-;; Optimize extracting a single scalar element from memory.
+;; Extract a V16QI/V8HI/V4SI element from memory with a constant element
+;; number.  If the element number is 0 or the address is offsettable, we don't
+;; need a temporary base register.  For vector registers, we require X-form
+;; addressing.
 (define_insn_and_split "*vsx_extract_<mode>_load"
-  [(set (match_operand:<VEC_base> 0 "register_operand" "=r")
+  [(set (match_operand:<VEC_base> 0 "register_operand" "=r,r,r,<VSX_EX>,<VSX_EX>")
 	(vec_select:<VEC_base>
-	 (match_operand:VSX_EXTRACT_I 1 "memory_operand" "m")
-	 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))
-   (clobber (match_scratch:DI 3 "=&b"))]
+	 (match_operand:VSX_EXTRACT_I 1 "memory_operand" "m,o,Q,Z,Q")
+	 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "O,n,n,O,n")])))
+   (clobber (match_scratch:DI 3 "=X,X,&b,X,&b"))]
   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
   "#"
-  "&& reload_completed"
+  "&& 1"
   [(set (match_dup 0) (match_dup 4))]
 {
   operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
 					   operands[3], <VEC_base>mode);
 }
-  [(set_attr "type" "load")
-   (set_attr "length" "8")])
+  [(set_attr "type" "load,load,load,fpload,fpload")
+   (set_attr "length" "*,*,8,*,8")
+   (set_attr "isa" "*,*,*,<VSX_EX_ISA>,<VSX_EX_ISA>")])
 
 ;; Variable V16QI/V8HI/V4SI extract from a register
 (define_insn_and_split "vsx_extract_<mode>_var"

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2023-04-29  3:11 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-04-27 22:03 [gcc(refs/users/meissner/heads/work119)] Allow consant element vec_extract to be loaded into vector registers Michael Meissner
  -- strict thread matches above, loose matches on Subject: below --
2023-04-29  3:11 Michael Meissner
2023-04-28 22:56 Michael Meissner
2023-04-28 22:36 Michael Meissner
2023-04-28 18:13 Michael Meissner
2023-04-28 18:08 Michael Meissner
2023-04-25  1:54 Michael Meissner
2023-04-24 23:20 Michael Meissner

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).