public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/users/meissner/heads/work119)] Optimize vec_extract of V4SF from memory with constant element numbers.
@ 2023-04-27 21:25 Michael Meissner
  0 siblings, 0 replies; 6+ messages in thread
From: Michael Meissner @ 2023-04-27 21:25 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:2059b7d7a1e1f6244ca73e2ba45e5ab1c44ba159

commit 2059b7d7a1e1f6244ca73e2ba45e5ab1c44ba159
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Thu Apr 27 17:24:59 2023 -0400

    Optimize vec_extract of V4SF from memory with constant element numbers.
    
    This patch updates vec_extract of V4SF from memory with constant element
    numbers.
    
    This patch changes the splits so that they can be done before register
    allocation.
    
    This patch corrects the ISA for loading SF values to altivec registers to be
    power8 vector, and not power7.
    
    This patch adds a combiner patch to combine loading up a SF element and
    converting it to double.
    
    2023-04-27   Michael Meissner  <meissner@linux.ibm.com>
    
    gcc/
    
            * gcc/config/rs6000/vsx.md (vsx_extract_v4sf_load): Allow splitting
            before register allocation.  Fix ISA for loading up SFmode values to
            traditional Altivec registers.
            (vsx_extract_v4sf_load_to_df): New insn.
    
    gc/testsuite/
    
            * gcc.target/powerpc/vec-extract-mem-float-1.c: New file.

Diff:
---
 gcc/config/rs6000/vsx.md                           | 26 +++++++++++++++++--
 .../gcc.target/powerpc/vec-extract-mem-float-1.c   | 29 ++++++++++++++++++++++
 2 files changed, 53 insertions(+), 2 deletions(-)

diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 417aff5e24b..695b5cbd126 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -3549,6 +3549,7 @@
   [(set_attr "length" "8")
    (set_attr "type" "fp")])
 
+;; V4SF extract from memory with constant element number.
 (define_insn_and_split "*vsx_extract_v4sf_load"
   [(set (match_operand:SF 0 "register_operand" "=f,v,v,?r")
 	(vec_select:SF
@@ -3557,7 +3558,7 @@
    (clobber (match_scratch:P 3 "=&b,&b,&b,&b"))]
   "VECTOR_MEM_VSX_P (V4SFmode)"
   "#"
-  "&& reload_completed"
+  "&& 1"
   [(set (match_dup 0) (match_dup 4))]
 {
   operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
@@ -3565,7 +3566,28 @@
 }
   [(set_attr "type" "fpload,fpload,fpload,load")
    (set_attr "length" "8")
-   (set_attr "isa" "*,p7v,p9v,*")])
+   (set_attr "isa" "*,p8v,p9v,*")])
+
+;; V4SF extract from memory with constant element number and convert to DFmode.
+(define_insn_and_split "*vsx_extract_v4sf_load_to_df"
+  [(set (match_operand:DF 0 "register_operand" "=f,v,v")
+	(float_extend:DF
+	 (vec_select:SF
+	  (match_operand:V4SF 1 "memory_operand" "m,Z,m")
+	  (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n")]))))
+   (clobber (match_scratch:P 3 "=&b,&b,&b"))]
+  "VECTOR_MEM_VSX_P (V4SFmode)"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+	(float_extend:DF (match_dup 4)))]
+{
+  operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
+					   operands[3], SFmode);
+}
+  [(set_attr "type" "fpload")
+   (set_attr "length" "8")
+   (set_attr "isa" "*,p8v,p9v")])
 
 ;; Variable V4SF extract from a register
 (define_insn_and_split "vsx_extract_v4sf_var"
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-float-1.c b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-float-1.c
new file mode 100644
index 00000000000..4670e261ba8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-float-1.c
@@ -0,0 +1,29 @@
+/* { dg-do compile { target lp64 } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mdejagnu-cpu=power8 -O2" } */
+
+/* Test to verify that the vec_extract with constant element numbers can load
+   float elements into a GPR register without doing a LFS/STFS.  */
+
+#include <altivec.h>
+
+void
+extract_v4sf_gpr_0 (vector float *p, float *q)
+{
+  float x = vec_extract (*p, 0);
+  __asm__ (" # %0" : "+r" (x));		/* lwz, no lfs/stfs.  */
+  *q = x;
+}
+
+void
+extract_v4sf_gpr_1 (vector float *p, float *q)
+{
+  float x = vec_extract (*p, 1);
+  __asm__ (" # %0" : "+r" (x));		/* lwz, no lfs/stfs.  */
+  *q = x;
+}
+
+/* { dg-final { scan-assembler-times {\mlwzx?\M}               2 } } */
+/* { dg-final { scan-assembler-times {\mstw\M}                 2 } } */
+/* { dg-final { scan-assembler-not   {\mlfsx?\M|\mlxsspx?\M}     } } */
+/* { dg-final { scan-assembler-not   {\mstfsx?\M|\mstxsspx?\M}   } } */

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [gcc(refs/users/meissner/heads/work119)] Optimize vec_extract of V4SF from memory with constant element numbers.
@ 2023-04-29  2:40 Michael Meissner
  0 siblings, 0 replies; 6+ messages in thread
From: Michael Meissner @ 2023-04-29  2:40 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:b43b46429c8492deeed191c539e114840ef6dc2a

commit b43b46429c8492deeed191c539e114840ef6dc2a
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Fri Apr 28 22:40:37 2023 -0400

    Optimize vec_extract of V4SF from memory with constant element numbers.
    
    This patch updates vec_extract of V4SF from memory with constant element
    numbers.
    
    This patch corrects the ISA for loading SF values to altivec registers to be
    power8 vector, and not power7.
    
    This patch adds a combiner patch to combine loading up a SF element and
    converting it to double.
    
    It also removes the '?' from the 'r' constraint so that if the SFmode is needed
    in a GPR, it doesn't have to load it to the vector unit and then store it.
    
    2023-04-28   Michael Meissner  <meissner@linux.ibm.com>
    
    gcc/
    
            * gcc/config/rs6000/vsx.md (vsx_extract_v4sf_load): Fix ISA for loading
            up SFmode values with x-form addresses.  Remove ? from 'r' constraint.
            (vsx_extract_v4sf_load_to_df): New insn.
    
    gc/testsuite/
    
            * gcc.target/powerpc/vec-extract-mem-float-1.c: New file.

Diff:
---
 gcc/config/rs6000/vsx.md                           | 73 +++++++++++++++++++---
 .../gcc.target/powerpc/vec-extract-mem-float-1.c   | 29 +++++++++
 2 files changed, 95 insertions(+), 7 deletions(-)

diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 417aff5e24b..9d3b3441ed5 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -3549,12 +3549,33 @@
   [(set_attr "length" "8")
    (set_attr "type" "fp")])
 
+;; V4SF extract from memory with constant element number.
+;; Alternatives:
+;;       Reg:  Ele:  Cpu: Addr:                 need scratch
+;;    1: FPR   0     any  normal address        no
+;;    2: FPR   1-3   any  offsettable address   no
+;;    3: FPR   1-3   any  single register       yes
+;;    4: VMX   0     p8   reg+reg or reg        no
+;;    5: VMX   1-3   p8   single register       yes
+;;    6: VMX   0     p9   normal address        no
+;;    7: VMX   1-3   p9   offsettable address   no
+;;    8: GPR   0     any  normal address        no
+;;    9: GPR   0-3   any  offsettable address   no
+;;   10: GPR   0-3   any  single register       yes
 (define_insn_and_split "*vsx_extract_v4sf_load"
-  [(set (match_operand:SF 0 "register_operand" "=f,v,v,?r")
+  [(set (match_operand:SF 0 "register_operand"
+		"=f,     f,      f,      v,      v,      v,      v,
+		 r,      r,      r")
 	(vec_select:SF
-	 (match_operand:V4SF 1 "memory_operand" "m,Z,m,m")
-	 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n,n")])))
-   (clobber (match_scratch:P 3 "=&b,&b,&b,&b"))]
+	 (match_operand:V4SF 1 "memory_operand"
+		"m,      o,      Q,      Z,      Q,      m,      o,
+		 m,      o,      Q")
+	 (parallel [(match_operand:QI 2 "const_0_to_3_operand"
+		"O,      n,      n,      O,      n,      O,      n,
+		 O,      n,      n")])))
+   (clobber (match_scratch:P 3
+		 "=X,    X,      &b,     X,      &b,     X,      X,
+		  X,      X,      &b"))]
   "VECTOR_MEM_VSX_P (V4SFmode)"
   "#"
   "&& reload_completed"
@@ -3563,9 +3584,47 @@
   operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
 					   operands[3], SFmode);
 }
-  [(set_attr "type" "fpload,fpload,fpload,load")
-   (set_attr "length" "8")
-   (set_attr "isa" "*,p7v,p9v,*")])
+  [(set_attr "type"
+		"fpload, fpload, fpload, fpload, fpload, fpload, fpload,
+		 load,   load,   load")
+   (set_attr "isa"
+		"*,      *,      *,      p8v,    p8v,    p9v,    p9v,
+		 *,      *,      *")])
+
+;; V4SF extract from memory with constant element number and convert to DFmode.
+;; Alternatives:
+;;       Reg:  Ele:  Cpu: Addr:                 need scratch
+;;    1: FPR   0     any  normal address        no
+;;    2: FPR   1-3   any  offsettable address   no
+;;    3: FPR   1-3   any  single register       yes
+;;    4: VMX   0     p8   reg+reg or reg        no
+;;    5: VMX   1-3   p8   single register       yes
+;;    6: VMX   0     p9   normal address        no
+;;    7: VMX   1-3   p9   offsettable address   no
+(define_insn_and_split "*vsx_extract_v4sf_load_to_df"
+  [(set (match_operand:DF 0 "register_operand"
+		"=f,     f,      f,      v,      v,      v,      v")
+	(float_extend:DF
+	 (vec_select:SF
+	  (match_operand:V4SF 1 "memory_operand"
+		"m,      o,      Q,      Z,      Q,      m,      o")
+	  (parallel [(match_operand:QI 2 "const_0_to_3_operand"
+		 "=X,    X,      &b,     X,      &b,     X,      X")]))))
+   (clobber (match_scratch:P 3
+		 "=X,    X,      &b,     X,      &b,     X,      X"))]
+  "VECTOR_MEM_VSX_P (V4SFmode)"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
+	(float_extend:DF (match_dup 4)))]
+{
+  operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
+					   operands[3], SFmode);
+}
+  [(set_attr "type"
+		"fpload, fpload, fpload, fpload, fpload, fpload, fpload")
+   (set_attr "isa"
+		"*,      *,      *,      p8v,    p8v,    p9v,    p9v")])
 
 ;; Variable V4SF extract from a register
 (define_insn_and_split "vsx_extract_v4sf_var"
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-float-1.c b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-float-1.c
new file mode 100644
index 00000000000..4670e261ba8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-float-1.c
@@ -0,0 +1,29 @@
+/* { dg-do compile { target lp64 } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mdejagnu-cpu=power8 -O2" } */
+
+/* Test to verify that the vec_extract with constant element numbers can load
+   float elements into a GPR register without doing a LFS/STFS.  */
+
+#include <altivec.h>
+
+void
+extract_v4sf_gpr_0 (vector float *p, float *q)
+{
+  float x = vec_extract (*p, 0);
+  __asm__ (" # %0" : "+r" (x));		/* lwz, no lfs/stfs.  */
+  *q = x;
+}
+
+void
+extract_v4sf_gpr_1 (vector float *p, float *q)
+{
+  float x = vec_extract (*p, 1);
+  __asm__ (" # %0" : "+r" (x));		/* lwz, no lfs/stfs.  */
+  *q = x;
+}
+
+/* { dg-final { scan-assembler-times {\mlwzx?\M}               2 } } */
+/* { dg-final { scan-assembler-times {\mstw\M}                 2 } } */
+/* { dg-final { scan-assembler-not   {\mlfsx?\M|\mlxsspx?\M}     } } */
+/* { dg-final { scan-assembler-not   {\mstfsx?\M|\mstxsspx?\M}   } } */

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [gcc(refs/users/meissner/heads/work119)] Optimize vec_extract of V4SF from memory with constant element numbers.
@ 2023-04-29  0:02 Michael Meissner
  0 siblings, 0 replies; 6+ messages in thread
From: Michael Meissner @ 2023-04-29  0:02 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:590d55ae10495faf15ffaf122205d095eb3aa440

commit 590d55ae10495faf15ffaf122205d095eb3aa440
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Fri Apr 28 20:01:43 2023 -0400

    Optimize vec_extract of V4SF from memory with constant element numbers.
    
    This patch updates vec_extract of V4SF from memory with constant element
    numbers.
    
    This patch corrects the ISA for loading SF values to altivec registers to be
    power8 vector, and not power7.
    
    This patch adds a combiner patch to combine loading up a SF element and
    converting it to double.
    
    This patch expands the alternatives, so that if the element number is 0 or the
    address is offsettable, we don't need a scratch register.
    
    2023-04-28   Michael Meissner  <meissner@linux.ibm.com>
    
    gcc/
    
            * gcc/config/rs6000/vsx.md (vsx_extract_v4sf_load): Fix ISA for loading
            up SFmode values with x-form addresses.  Drill down on the alternatives
            to prevent allocating a scratch register if we don't need it.
            (vsx_extract_v4sf_load_to_df): New insn.
    
    gc/testsuite/
    
            * gcc.target/powerpc/vec-extract-mem-float-1.c: New file.

Diff:
---
 gcc/config/rs6000/vsx.md                           | 73 +++++++++++++++++++---
 .../gcc.target/powerpc/vec-extract-mem-float-1.c   | 29 +++++++++
 2 files changed, 95 insertions(+), 7 deletions(-)

diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 7121f50a449..ce00e8a1db6 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -3555,12 +3555,33 @@
   [(set_attr "length" "8")
    (set_attr "type" "fp")])
 
+;; V4SF extract from memory with constant element number.
+;; Alternatives:
+;;       Reg:  Ele:  Cpu: Addr:                 need scratch
+;;    1: FPR   0     any  normal address        no
+;;    2: FPR   1-3   any  offsettable address   no
+;;    3: FPR   1-3   any  single register       yes
+;;    4: VMX   0     p8   reg+reg or reg        no
+;;    5: VMX   1-3   p8   single register       yes
+;;    6: VMX   0     p9   normal address        no
+;;    7: VMX   1-3   p9   offsettable address   no
+;;    8: GPR   0     any  normal address        no
+;;    9: GPR   0-3   any  offsettable address   no
+;;   10: GPR   0-3   any  single register       yes
 (define_insn_and_split "*vsx_extract_v4sf_load"
-  [(set (match_operand:SF 0 "register_operand" "=f,v,v,?r")
+  [(set (match_operand:SF 0 "register_operand"
+		"=f,     f,      f,      v,      v,      v,      v,
+		 ?r,     ?r,    ?r")
 	(vec_select:SF
-	 (match_operand:V4SF 1 "memory_operand" "m,Z,m,m")
-	 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n,n")])))
-   (clobber (match_scratch:P 3 "=&b,&b,&b,&b"))]
+	 (match_operand:V4SF 1 "memory_operand"
+		"m,      o,      Q,      Z,      Q,      m,      o,
+		 m,      o,      Q")
+	 (parallel [(match_operand:QI 2 "const_0_to_3_operand"
+		"O,      n,      n,      O,      n,      O,      n,
+		 O,      n,      n")])))
+   (clobber (match_scratch:P 3
+		 "=X,    X,      &b,     X,      &b,     X,      X,
+		  X,      X,      &b"))]
   "VECTOR_MEM_VSX_P (V4SFmode)"
   "#"
   "&& reload_completed"
@@ -3569,9 +3590,47 @@
   operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
 					   operands[3], SFmode);
 }
-  [(set_attr "type" "fpload,fpload,fpload,load")
-   (set_attr "length" "8")
-   (set_attr "isa" "*,p7v,p9v,*")])
+  [(set_attr "type"
+		"fpload, fpload, fpload, fpload, fpload, fpload, fpload,
+		 load,   load,   load")
+   (set_attr "isa"
+		"*,      *,      *,      p8v,    p8v,    p9v,    p9v,
+		 *,      *,      *")])
+
+;; V4SF extract from memory with constant element number and convert to DFmode.
+;; Alternatives:
+;;       Reg:  Ele:  Cpu: Addr:                 need scratch
+;;    1: FPR   0     any  normal address        no
+;;    2: FPR   1-3   any  offsettable address   no
+;;    3: FPR   1-3   any  single register       yes
+;;    4: VMX   0     p8   reg+reg or reg        no
+;;    5: VMX   1-3   p8   single register       yes
+;;    6: VMX   0     p9   normal address        no
+;;    7: VMX   1-3   p9   offsettable address   no
+(define_insn_and_split "*vsx_extract_v4sf_load_to_df"
+  [(set (match_operand:DF 0 "register_operand"
+		"=f,     f,      f,      v,      v,      v,      v")
+	(float_extend:DF
+	 (vec_select:SF
+	  (match_operand:V4SF 1 "memory_operand"
+		"m,      o,      Q,      Z,      Q,      m,      o")
+	  (parallel [(match_operand:QI 2 "const_0_to_3_operand"
+		 "=X,    X,      &b,     X,      &b,     X,      X")]))))
+   (clobber (match_scratch:P 3
+		 "=X,    X,      &b,     X,      &b,     X,      X"))]
+  "VECTOR_MEM_VSX_P (V4SFmode)"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
+	(float_extend:DF (match_dup 4)))]
+{
+  operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
+					   operands[3], SFmode);
+}
+  [(set_attr "type"
+		"fpload, fpload, fpload, fpload, fpload, fpload, fpload")
+   (set_attr "isa"
+		"*,      *,      *,      p8v,    p8v,    p9v,    p9v")])
 
 ;; Variable V4SF extract from a register
 (define_insn_and_split "vsx_extract_v4sf_var"
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-float-1.c b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-float-1.c
new file mode 100644
index 00000000000..4670e261ba8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-float-1.c
@@ -0,0 +1,29 @@
+/* { dg-do compile { target lp64 } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mdejagnu-cpu=power8 -O2" } */
+
+/* Test to verify that the vec_extract with constant element numbers can load
+   float elements into a GPR register without doing a LFS/STFS.  */
+
+#include <altivec.h>
+
+void
+extract_v4sf_gpr_0 (vector float *p, float *q)
+{
+  float x = vec_extract (*p, 0);
+  __asm__ (" # %0" : "+r" (x));		/* lwz, no lfs/stfs.  */
+  *q = x;
+}
+
+void
+extract_v4sf_gpr_1 (vector float *p, float *q)
+{
+  float x = vec_extract (*p, 1);
+  __asm__ (" # %0" : "+r" (x));		/* lwz, no lfs/stfs.  */
+  *q = x;
+}
+
+/* { dg-final { scan-assembler-times {\mlwzx?\M}               2 } } */
+/* { dg-final { scan-assembler-times {\mstw\M}                 2 } } */
+/* { dg-final { scan-assembler-not   {\mlfsx?\M|\mlxsspx?\M}     } } */
+/* { dg-final { scan-assembler-not   {\mstfsx?\M|\mstxsspx?\M}   } } */

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [gcc(refs/users/meissner/heads/work119)] Optimize vec_extract of V4SF from memory with constant element numbers.
@ 2023-04-28 22:12 Michael Meissner
  0 siblings, 0 replies; 6+ messages in thread
From: Michael Meissner @ 2023-04-28 22:12 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:51302d4ec98a7f197d350785dfb0ed0fc1ce6dad

commit 51302d4ec98a7f197d350785dfb0ed0fc1ce6dad
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Fri Apr 28 18:09:23 2023 -0400

    Optimize vec_extract of V4SF from memory with constant element numbers.
    
    This patch updates vec_extract of V4SF from memory with constant element
    numbers.
    
    This patch corrects the ISA for loading SF values to altivec registers to be
    power8 vector, and not power7.
    
    This patch adds a combiner patch to combine loading up a SF element and
    converting it to double.
    
    This patch expands the alternatives, so that if the element number is 0 or the
    address is offsettable, we don't need a scratch register.
    
    2023-04-28   Michael Meissner  <meissner@linux.ibm.com>
    
    gcc/
    
            * gcc/config/rs6000/vsx.md (vsx_extract_v4sf_load): Fix ISA for loading
            up SFmode values with x-form addresses.  Drill down on the alternatives
            to prevent allocating a scratch register if we don't need it.
            (vsx_extract_v4sf_load_to_df): New insn.
    
    gc/testsuite/
    
            * gcc.target/powerpc/vec-extract-mem-float-1.c: New file.

Diff:
---
 gcc/config/rs6000/vsx.md                           | 53 +++++++++++++++++++---
 .../gcc.target/powerpc/vec-extract-mem-float-1.c   | 29 ++++++++++++
 2 files changed, 75 insertions(+), 7 deletions(-)

diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 417aff5e24b..4777c870514 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -3549,12 +3549,22 @@
   [(set_attr "length" "8")
    (set_attr "type" "fp")])
 
+;; V4SF extract from memory with constant element number.
+;; Alternatives:
+;;    1: Load FPR, index 0, normal address, no address change.
+;;    2: Load FPR, index 0-3, offsettable address, element folded into addr.
+;;    3: Load FPR, index 0-3, single register, offset in op[3].
+;;    4: Load VMX, index 0, x-form, power8, no address change.
+;;    5: Load VMX, index 0-3, single register, power8, offset in op[3].
+;;    6: Load VMX, index 0, normal address, power9, no address change.
+;;    7: Load VMX, index 0-3, offsettable address, power9, element in addr.
+;;    8: Load GPR, index 0-3, single register, offset in op[3].
 (define_insn_and_split "*vsx_extract_v4sf_load"
-  [(set (match_operand:SF 0 "register_operand" "=f,v,v,?r")
+  [(set (match_operand:SF 0 "register_operand" "=f,f,f,v,v,v,v,?r")
 	(vec_select:SF
-	 (match_operand:V4SF 1 "memory_operand" "m,Z,m,m")
-	 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n,n")])))
-   (clobber (match_scratch:P 3 "=&b,&b,&b,&b"))]
+	 (match_operand:V4SF 1 "memory_operand" "m,o,Q,Z,Q,m,o,Q")
+	 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "O,n,n,O,n,O,n,n")])))
+   (clobber (match_scratch:P 3 "=X,X,&b,X,&b,X,X,&b"))]
   "VECTOR_MEM_VSX_P (V4SFmode)"
   "#"
   "&& reload_completed"
@@ -3563,9 +3573,38 @@
   operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
 					   operands[3], SFmode);
 }
-  [(set_attr "type" "fpload,fpload,fpload,load")
-   (set_attr "length" "8")
-   (set_attr "isa" "*,p7v,p9v,*")])
+  [(set_attr "type" "fpload,fpload,fpload,fpload,fpload,fpload,fpload,load")
+   (set_attr "length" "4,4,8,4,8,4,4,8")
+   (set_attr "isa" "*,*,*,p8v,p8v,p9v,p9v,*")])
+
+;; V4SF extract from memory with constant element number and convert to DFmode.
+;; Alternatives:
+;;    1: Load FPR, index 0, normal address, no address change.
+;;    2: Load FPR, index 0-3, offsettable address, element folded into addr.
+;;    3: Load FPR, index 0-3, single register, offset in op[3].
+;;    4: Load VMX, index 0, x-form, power8, no address change.
+;;    5: Load VMX, index 0-3, single register, power8, offset in op[3].
+;;    6: Load VMX, index 0, normal address, power9, no address change.
+;;    7: Load VMX, index 0-3, offsettable address, power9, element in addr.
+(define_insn_and_split "*vsx_extract_v4sf_load_to_df"
+  [(set (match_operand:DF 0 "register_operand" "=f,f,f,v,v,v,v")
+	(float_extend:DF
+	 (vec_select:SF
+	  (match_operand:V4SF 1 "memory_operand" "m,o,Q,Z,Q,m,o")
+	  (parallel [(match_operand:QI 2 "const_0_to_3_operand" "O,n,n,O,n,O,n")]))))
+   (clobber (match_scratch:P 3 "=X,X,&b,X,&b,X,&b"))]
+  "VECTOR_MEM_VSX_P (V4SFmode)"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
+	(float_extend:DF (match_dup 4)))]
+{
+  operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
+					   operands[3], SFmode);
+}
+  [(set_attr "type" "fpload")
+   (set_attr "length" "4,4,8,4,8,4,4")
+   (set_attr "isa" "*,*,*,p8v,p8v,p9v,p9v")])
 
 ;; Variable V4SF extract from a register
 (define_insn_and_split "vsx_extract_v4sf_var"
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-float-1.c b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-float-1.c
new file mode 100644
index 00000000000..4670e261ba8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-float-1.c
@@ -0,0 +1,29 @@
+/* { dg-do compile { target lp64 } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mdejagnu-cpu=power8 -O2" } */
+
+/* Test to verify that the vec_extract with constant element numbers can load
+   float elements into a GPR register without doing a LFS/STFS.  */
+
+#include <altivec.h>
+
+void
+extract_v4sf_gpr_0 (vector float *p, float *q)
+{
+  float x = vec_extract (*p, 0);
+  __asm__ (" # %0" : "+r" (x));		/* lwz, no lfs/stfs.  */
+  *q = x;
+}
+
+void
+extract_v4sf_gpr_1 (vector float *p, float *q)
+{
+  float x = vec_extract (*p, 1);
+  __asm__ (" # %0" : "+r" (x));		/* lwz, no lfs/stfs.  */
+  *q = x;
+}
+
+/* { dg-final { scan-assembler-times {\mlwzx?\M}               2 } } */
+/* { dg-final { scan-assembler-times {\mstw\M}                 2 } } */
+/* { dg-final { scan-assembler-not   {\mlfsx?\M|\mlxsspx?\M}     } } */
+/* { dg-final { scan-assembler-not   {\mstfsx?\M|\mstxsspx?\M}   } } */

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [gcc(refs/users/meissner/heads/work119)] Optimize vec_extract of V4SF from memory with constant element numbers.
@ 2023-04-28 17:57 Michael Meissner
  0 siblings, 0 replies; 6+ messages in thread
From: Michael Meissner @ 2023-04-28 17:57 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:984b341d78ddbc4ed3ad90dad7cb607edfa1fd12

commit 984b341d78ddbc4ed3ad90dad7cb607edfa1fd12
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Fri Apr 28 13:57:19 2023 -0400

    Optimize vec_extract of V4SF from memory with constant element numbers.
    
    This patch updates vec_extract of V4SF from memory with constant element
    numbers.
    
    This patch changes the splits so that they can be done before register
    allocation.
    
    This patch corrects the ISA for loading SF values to altivec registers to be
    power8 vector, and not power7.
    
    This patch adds a combiner patch to combine loading up a SF element and
    converting it to double.
    
    In order to do the splitting before register allocation, I modified the various
    vec_extract insns to allow the split to occur before register allocation.  This
    patch goes through the support function rs6000_adjust_vec_address and the
    functions it calls to allow them to be called before register allocation.  The
    places that take a scratch register will allocate a new pseudo register if they
    are passed a SCRATCH register.
    
    I also added a new predicate that checks if the operand is a normal memory
    address but not an Altivec vector addresses (i.e. with an AND -16).  These
    addresses are used in power8 as part of the vector swap optimization.  In the
    past, because we use the 'Q' constraint, ira/reload would handle the AND
    etc. so that the address was only a single register.
    
    2023-04-28   Michael Meissner  <meissner@linux.ibm.com>
    
    gcc/
    
            * config/rs6000/predicates.md (non_altivec_memory_operand): New
            predicate.
            * config/rs6000/rs6000.cc (get_vector_offset): Allow function to be
            called before register allocation.
            (adjust_vec_address_pcrel): Likewise.
            (rs6000_adjust_vec_address): Likewise.
            * gcc/config/rs6000/vsx.md (vsx_extract_v4sf_load): Allow splitting
            before register allocation.  Fix ISA for loading up SFmode values to
            traditional Altivec registers.  Require that the memory being optimized
            does not use Altivec memory addresses.
            (vsx_extract_v4sf_load_to_df): New insn.
    
    gc/testsuite/
    
            * gcc.target/powerpc/vec-extract-mem-float-1.c: New file.

Diff:
---
 gcc/config/rs6000/predicates.md                    | 10 ++++
 gcc/config/rs6000/rs6000.cc                        | 58 +++++++++++++++-------
 gcc/config/rs6000/vsx.md                           | 28 +++++++++--
 .../gcc.target/powerpc/vec-extract-mem-float-1.c   | 29 +++++++++++
 4 files changed, 104 insertions(+), 21 deletions(-)

diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 52c65534e51..3b9265ef1c0 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -957,6 +957,16 @@
   return memory_operand (op, mode);
 })
 
+;; Anything that matches memory_operand but does not match
+;; altivec_indexed_or_indirect_operand.  This used by vec_extract memory
+;; optimizations.
+(define_predicate "non_altivec_memory_operand"
+  (match_code "mem")
+{
+  return (memory_operand (op, mode)
+	  && !altivec_indexed_or_indirect_operand (op, mode));
+})
+
 ;; Return 1 if the operand is a MEM with an indexed-form address.
 (define_special_predicate "indexed_address_mem"
   (match_test "(MEM_P (op)
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 3be5860dd9b..332cb862f54 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -7686,9 +7686,13 @@ get_vector_offset (rtx mem, rtx element, rtx base_tmp, unsigned scalar_size)
   if (CONST_INT_P (element))
     return GEN_INT (INTVAL (element) * scalar_size);
 
-  /* All insns should use the 'Q' constraint (address is a single register) if
-     the element number is not a constant.  */
-  gcc_assert (satisfies_constraint_Q (mem));
+  if (GET_CODE (base_tmp) == SCRATCH)
+    base_tmp = gen_reg_rtx (Pmode);
+
+  /* After register allocation, all insns should use the 'Q' constraint
+     (address is a single register) if the element number is not a
+     constant.  */
+  gcc_assert (can_create_pseudo_p () || satisfies_constraint_Q (mem));
 
   /* Mask the element to make sure the element number is between 0 and the
      maximum number of elements - 1 so that we don't generate an address
@@ -7704,6 +7708,9 @@ get_vector_offset (rtx mem, rtx element, rtx base_tmp, unsigned scalar_size)
   if (shift > 0)
     {
       rtx shift_op = gen_rtx_ASHIFT (Pmode, base_tmp, GEN_INT (shift));
+      if (can_create_pseudo_p ())
+	base_tmp = gen_reg_rtx (Pmode);
+
       emit_insn (gen_rtx_SET (base_tmp, shift_op));
     }
 
@@ -7747,6 +7754,9 @@ adjust_vec_address_pcrel (rtx addr, rtx element_offset, rtx base_tmp)
 
       else
 	{
+	  if (GET_CODE (base_tmp) == SCRATCH)
+	    base_tmp = gen_reg_rtx (Pmode);
+
 	  emit_move_insn (base_tmp, addr);
 	  new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
 	}
@@ -7769,9 +7779,8 @@ adjust_vec_address_pcrel (rtx addr, rtx element_offset, rtx base_tmp)
    temporary (BASE_TMP) to fixup the address.  Return the new memory address
    that is valid for reads or writes to a given register (SCALAR_REG).
 
-   This function is expected to be called after reload is completed when we are
-   splitting insns.  The temporary BASE_TMP might be set multiple times with
-   this code.  */
+   The temporary BASE_TMP might be set multiple times with this code if this is
+   called after register allocation.  */
 
 rtx
 rs6000_adjust_vec_address (rtx scalar_reg,
@@ -7784,8 +7793,11 @@ rs6000_adjust_vec_address (rtx scalar_reg,
   rtx addr = XEXP (mem, 0);
   rtx new_addr;
 
-  gcc_assert (!reg_mentioned_p (base_tmp, addr));
-  gcc_assert (!reg_mentioned_p (base_tmp, element));
+  if (GET_CODE (base_tmp) != SCRATCH)
+    {
+      gcc_assert (!reg_mentioned_p (base_tmp, addr));
+      gcc_assert (!reg_mentioned_p (base_tmp, element));
+    }
 
   /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY.  */
   gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC);
@@ -7841,6 +7853,9 @@ rs6000_adjust_vec_address (rtx scalar_reg,
 	     offset, it has the benefit that if D-FORM instructions are
 	     allowed, the offset is part of the memory access to the vector
 	     element. */
+	  if (GET_CODE (base_tmp) == SCRATCH)
+	    base_tmp = gen_reg_rtx (Pmode);
+
 	  emit_insn (gen_rtx_SET (base_tmp, gen_rtx_PLUS (Pmode, op0, op1)));
 	  new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
 	}
@@ -7848,26 +7863,33 @@ rs6000_adjust_vec_address (rtx scalar_reg,
 
   else
     {
-      emit_move_insn (base_tmp, addr);
+      if (GET_CODE (base_tmp) == SCRATCH)
+	base_tmp = gen_reg_rtx (Pmode);
+
+      emit_insn (gen_rtx_SET (base_tmp, addr));
       new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
     }
 
-    /* If the address isn't valid, move the address into the temporary base
-       register.  Some reasons it could not be valid include:
+    /* If register allocation has been done and the address isn't valid, move
+       the address into the temporary base register.  Some reasons it could not
+       be valid include:
 
        The address offset overflowed the 16 or 34 bit offset size;
        We need to use a DS-FORM load, and the bottom 2 bits are non-zero;
        We need to use a DQ-FORM load, and the bottom 4 bits are non-zero;
        Only X_FORM loads can be done, and the address is D_FORM.  */
 
-  enum insn_form iform
-    = address_to_insn_form (new_addr, scalar_mode,
-			    reg_to_non_prefixed (scalar_reg, scalar_mode));
-
-  if (iform == INSN_FORM_BAD)
+  if (!can_create_pseudo_p ())
     {
-      emit_move_insn (base_tmp, new_addr);
-      new_addr = base_tmp;
+      enum insn_form iform
+	= address_to_insn_form (new_addr, scalar_mode,
+				reg_to_non_prefixed (scalar_reg, scalar_mode));
+
+      if (iform == INSN_FORM_BAD)
+	{
+	  emit_move_insn (base_tmp, new_addr);
+	  new_addr = base_tmp;
+	}
     }
 
   return change_address (mem, scalar_mode, new_addr);
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 417aff5e24b..ed4636f1e06 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -3549,15 +3549,16 @@
   [(set_attr "length" "8")
    (set_attr "type" "fp")])
 
+;; V4SF extract from memory with constant element number.
 (define_insn_and_split "*vsx_extract_v4sf_load"
   [(set (match_operand:SF 0 "register_operand" "=f,v,v,?r")
 	(vec_select:SF
-	 (match_operand:V4SF 1 "memory_operand" "m,Z,m,m")
+	 (match_operand:V4SF 1 "non_altivec_memory_operand" "m,Z,m,m")
 	 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n,n")])))
    (clobber (match_scratch:P 3 "=&b,&b,&b,&b"))]
   "VECTOR_MEM_VSX_P (V4SFmode)"
   "#"
-  "&& reload_completed"
+  "&& 1"
   [(set (match_dup 0) (match_dup 4))]
 {
   operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
@@ -3565,7 +3566,28 @@
 }
   [(set_attr "type" "fpload,fpload,fpload,load")
    (set_attr "length" "8")
-   (set_attr "isa" "*,p7v,p9v,*")])
+   (set_attr "isa" "*,p8v,p9v,*")])
+
+;; V4SF extract from memory with constant element number and convert to DFmode.
+(define_insn_and_split "*vsx_extract_v4sf_load_to_df"
+  [(set (match_operand:DF 0 "register_operand" "=f,v,v")
+	(float_extend:DF
+	 (vec_select:SF
+	  (match_operand:V4SF 1 "non_altivec_memory_operand" "m,Z,m")
+	  (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n")]))))
+   (clobber (match_scratch:P 3 "=&b,&b,&b"))]
+  "VECTOR_MEM_VSX_P (V4SFmode)"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+	(float_extend:DF (match_dup 4)))]
+{
+  operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
+					   operands[3], SFmode);
+}
+  [(set_attr "type" "fpload")
+   (set_attr "length" "8")
+   (set_attr "isa" "*,p8v,p9v")])
 
 ;; Variable V4SF extract from a register
 (define_insn_and_split "vsx_extract_v4sf_var"
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-float-1.c b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-float-1.c
new file mode 100644
index 00000000000..4670e261ba8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-float-1.c
@@ -0,0 +1,29 @@
+/* { dg-do compile { target lp64 } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mdejagnu-cpu=power8 -O2" } */
+
+/* Test to verify that the vec_extract with constant element numbers can load
+   float elements into a GPR register without doing a LFS/STFS.  */
+
+#include <altivec.h>
+
+void
+extract_v4sf_gpr_0 (vector float *p, float *q)
+{
+  float x = vec_extract (*p, 0);
+  __asm__ (" # %0" : "+r" (x));		/* lwz, no lfs/stfs.  */
+  *q = x;
+}
+
+void
+extract_v4sf_gpr_1 (vector float *p, float *q)
+{
+  float x = vec_extract (*p, 1);
+  __asm__ (" # %0" : "+r" (x));		/* lwz, no lfs/stfs.  */
+  *q = x;
+}
+
+/* { dg-final { scan-assembler-times {\mlwzx?\M}               2 } } */
+/* { dg-final { scan-assembler-times {\mstw\M}                 2 } } */
+/* { dg-final { scan-assembler-not   {\mlfsx?\M|\mlxsspx?\M}     } } */
+/* { dg-final { scan-assembler-not   {\mstfsx?\M|\mstxsspx?\M}   } } */

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [gcc(refs/users/meissner/heads/work119)] Optimize vec_extract of V4SF from memory with constant element numbers.
@ 2023-04-27 20:32 Michael Meissner
  0 siblings, 0 replies; 6+ messages in thread
From: Michael Meissner @ 2023-04-27 20:32 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:5fde705eaf4633764f3dea56e8824675edafddac

commit 5fde705eaf4633764f3dea56e8824675edafddac
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Thu Apr 27 16:31:50 2023 -0400

    Optimize vec_extract of V4SF from memory with constant element numbers.
    
    This patch updates vec_extract of V4SF from memory with constant element
    numbers.
    
    This patch changes the splits so that they can be done before register
    allocation.
    
    This patch corrects the ISA for loading SF values to altivec registers to be
    power8 vector, and not power7.
    
    This patch adds a combiner patch to combine loading up a SF element and
    converting it to double.
    
    2023-04-27   Michael Meissner  <meissner@linux.ibm.com>
    
    gcc/
    
            * gcc/config/rs6000/vsx.md (vsx_extract_v4sf_load): Allow splitting
            before register allocation.  Fix ISA for loading up SFmode values to
            traditional Altivec registers.
            (vsx_extract_v4sf_load_to_df): New insn.
    
    gc/testsuite/
    
            * gcc.target/powerpc/vec-extract-mem-float-1.c: New file.

Diff:
---
 gcc/config/rs6000/vsx.md                           | 26 +++++++++++++++++--
 .../gcc.target/powerpc/vec-extract-mem-float-1.c   | 29 ++++++++++++++++++++++
 2 files changed, 53 insertions(+), 2 deletions(-)

diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 417aff5e24b..695b5cbd126 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -3549,6 +3549,7 @@
   [(set_attr "length" "8")
    (set_attr "type" "fp")])
 
+;; V4SF extract from memory with constant element number.
 (define_insn_and_split "*vsx_extract_v4sf_load"
   [(set (match_operand:SF 0 "register_operand" "=f,v,v,?r")
 	(vec_select:SF
@@ -3557,7 +3558,7 @@
    (clobber (match_scratch:P 3 "=&b,&b,&b,&b"))]
   "VECTOR_MEM_VSX_P (V4SFmode)"
   "#"
-  "&& reload_completed"
+  "&& 1"
   [(set (match_dup 0) (match_dup 4))]
 {
   operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
@@ -3565,7 +3566,28 @@
 }
   [(set_attr "type" "fpload,fpload,fpload,load")
    (set_attr "length" "8")
-   (set_attr "isa" "*,p7v,p9v,*")])
+   (set_attr "isa" "*,p8v,p9v,*")])
+
+;; V4SF extract from memory with constant element number and convert to DFmode.
+(define_insn_and_split "*vsx_extract_v4sf_load_to_df"
+  [(set (match_operand:DF 0 "register_operand" "=f,v,v")
+	(float_extend:DF
+	 (vec_select:SF
+	  (match_operand:V4SF 1 "memory_operand" "m,Z,m")
+	  (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n")]))))
+   (clobber (match_scratch:P 3 "=&b,&b,&b"))]
+  "VECTOR_MEM_VSX_P (V4SFmode)"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+	(float_extend:DF (match_dup 4)))]
+{
+  operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
+					   operands[3], SFmode);
+}
+  [(set_attr "type" "fpload")
+   (set_attr "length" "8")
+   (set_attr "isa" "*,p8v,p9v")])
 
 ;; Variable V4SF extract from a register
 (define_insn_and_split "vsx_extract_v4sf_var"
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-float-1.c b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-float-1.c
new file mode 100644
index 00000000000..4670e261ba8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec-extract-mem-float-1.c
@@ -0,0 +1,29 @@
+/* { dg-do compile { target lp64 } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mdejagnu-cpu=power8 -O2" } */
+
+/* Test to verify that the vec_extract with constant element numbers can load
+   float elements into a GPR register without doing a LFS/STFS.  */
+
+#include <altivec.h>
+
+void
+extract_v4sf_gpr_0 (vector float *p, float *q)
+{
+  float x = vec_extract (*p, 0);
+  __asm__ (" # %0" : "+r" (x));		/* lwz, no lfs/stfs.  */
+  *q = x;
+}
+
+void
+extract_v4sf_gpr_1 (vector float *p, float *q)
+{
+  float x = vec_extract (*p, 1);
+  __asm__ (" # %0" : "+r" (x));		/* lwz, no lfs/stfs.  */
+  *q = x;
+}
+
+/* { dg-final { scan-assembler-times {\mlwzx?\M}               2 } } */
+/* { dg-final { scan-assembler-times {\mstw\M}                 2 } } */
+/* { dg-final { scan-assembler-not   {\mlfsx?\M|\mlxsspx?\M}     } } */
+/* { dg-final { scan-assembler-not   {\mstfsx?\M|\mstxsspx?\M}   } } */

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2023-04-29  2:40 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-04-27 21:25 [gcc(refs/users/meissner/heads/work119)] Optimize vec_extract of V4SF from memory with constant element numbers Michael Meissner
  -- strict thread matches above, loose matches on Subject: below --
2023-04-29  2:40 Michael Meissner
2023-04-29  0:02 Michael Meissner
2023-04-28 22:12 Michael Meissner
2023-04-28 17:57 Michael Meissner
2023-04-27 20:32 Michael Meissner

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).