public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH, rs6000] Generate mfvsrwz for all platforms and remove redundant zero extend [PR106769]
@ 2023-06-19  1:14 HAO CHEN GUI
  2023-07-19  3:00 ` Kewen.Lin
  0 siblings, 1 reply; 2+ messages in thread
From: HAO CHEN GUI @ 2023-06-19  1:14 UTC (permalink / raw)
  To: gcc-patches; +Cc: Segher Boessenkool, David, Kewen.Lin, Peter Bergner

Hi,
  This patch modifies vsx extract expander and generates mfvsrwz/stxsiwx
for all platforms when the mode is V4SI and the index of extracted element
is 1 for BE and 2 for LE. Also this patch adds a insn pattern for mfvsrwz
which can help eliminate redundant zero extend.

  Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.

Thanks
Gui Haochen


ChangeLog
rs6000: Generate mfvsrwz for all platforms and remove redundant zero extend

mfvsrwz has lower latency than xxextractuw.  So it should be generated
even with p9 vector enabled if possible.  Also the instruction is
already zero extended.  A combine pattern is needed to eliminate
redundant zero extend instructions.

gcc/
	PR target/106769
	* config/rs6000/vsx.md (expand vsx_extract_<mode>): Skip calling
	gen_vsx_extract_<mode>_p9 when it can be implemented by
	mfvsrwz/stxsiwx.
	(*vsx_extract_<mode>_di_p9): Not generate the insn when it can
	be generated by mfvsrwz.
	(mfvsrwz): New insn pattern.
	(*vsx_extract_si): Rename to...
	(vsx_extract_si): ..., remove redundant insn condition and
	generate the insn on p9 when it can be implemented by
	mfvsrwz/stxsiwx.  Add a dup alternative for simple vector moving.
	Remove reload_completed from split condition as it's unnecessary.
	Remove unnecessary checking from preparation statements.  Set
	type and length attributes for each alternative.

gcc/testsuite/
	PR target/106769
	* gcc.target/powerpc/pr106769.h: New.
	* gcc.target/powerpc/pr106769-p8.c: New.
	* gcc.target/powerpc/pr106769-p9.c: New.

diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 0a34ceebeb5..09b0f83db86 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -3728,7 +3728,9 @@ (define_expand  "vsx_extract_<mode>"
   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
 {
   /* If we have ISA 3.0, we can do a xxextractuw/vextractu{b,h}.  */
-  if (TARGET_P9_VECTOR)
+  if (TARGET_P9_VECTOR
+      && (<MODE>mode != V4SImode
+	  || INTVAL (operands[2]) != (BYTES_BIG_ENDIAN ? 1 : 2)))
     {
       emit_insn (gen_vsx_extract_<mode>_p9 (operands[0], operands[1],
 					    operands[2]));
@@ -3798,7 +3800,9 @@ (define_insn_and_split "*vsx_extract_<mode>_di_p9"
 	  (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v,<VSX_EX>")
 	  (parallel [(match_operand:QI 2 "const_int_operand" "n,n")]))))
    (clobber (match_scratch:SI 3 "=r,X"))]
-  "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
+  "TARGET_VEXTRACTUB
+   && (<MODE>mode != V4SImode
+       || INTVAL (operands[2]) != (BYTES_BIG_ENDIAN ? 1 : 2))"
   "#"
   "&& reload_completed"
   [(parallel [(set (match_dup 4)
@@ -3830,58 +3834,67 @@ (define_insn_and_split "*vsx_extract_<mode>_store_p9"
    (set (match_dup 0)
 	(match_dup 3))])

-(define_insn_and_split  "*vsx_extract_si"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,wa,Z")
+(define_insn "mfvsrwz"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (vec_select:SI
+	    (match_operand:V4SI 1 "vsx_register_operand" "wa")
+	    (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))
+   (clobber (match_scratch:V4SI 3 "=v"))]
+  "TARGET_DIRECT_MOVE_64BIT
+   && INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 1 : 2)"
+  "mfvsrwz %0,%x1"
+  [(set_attr "type" "mfvsr")
+   (set_attr "isa" "p8v")])
+
+(define_insn_and_split  "vsx_extract_si"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,wa,Z,wa")
 	(vec_select:SI
-	 (match_operand:V4SI 1 "gpc_reg_operand" "v,v,v")
-	 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n")])))
-   (clobber (match_scratch:V4SI 3 "=v,v,v"))]
-  "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT && !TARGET_P9_VECTOR"
-  "#"
-  "&& reload_completed"
+	 (match_operand:V4SI 1 "gpc_reg_operand" "v,v,v,0")
+	 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n,n")])))
+   (clobber (match_scratch:V4SI 3 "=v,v,v,v"))]
+  "TARGET_DIRECT_MOVE_64BIT
+   && (!TARGET_P9_VECTOR || INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 1 : 2))"
+{
+   if (which_alternative == 0)
+     return "mfvsrwz %0,%x1";
+
+   if (which_alternative == 1)
+     return "xxlor %x0,%x1,%x1";
+
+   if (which_alternative == 2)
+     return "stxsiwx %x1,%y0";
+
+   return ASM_COMMENT_START " vec_extract to same register";
+}
+  "&& INTVAL (operands[2]) != (BYTES_BIG_ENDIAN ? 1 : 2)"
   [(const_int 0)]
 {
   rtx dest = operands[0];
   rtx src = operands[1];
   rtx element = operands[2];
-  rtx vec_tmp = operands[3];
-  int value;
+  rtx vec_tmp;
+
+  if (GET_CODE (operands[3]) == SCRATCH)
+    vec_tmp = gen_reg_rtx (V4SImode);
+  else
+    vec_tmp = operands[3];

   /* Adjust index for LE element ordering, the below minuend 3 is computed by
      GET_MODE_NUNITS (V4SImode) - 1.  */
   if (!BYTES_BIG_ENDIAN)
     element = GEN_INT (3 - INTVAL (element));

-  /* If the value is in the correct position, we can avoid doing the VSPLT<x>
-     instruction.  */
-  value = INTVAL (element);
-  if (value != 1)
-    emit_insn (gen_altivec_vspltw_direct (vec_tmp, src, element));
-  else
-    vec_tmp = src;
-
-  if (MEM_P (operands[0]))
-    {
-      if (can_create_pseudo_p ())
-	dest = rs6000_force_indexed_or_indirect_mem (dest);
+  emit_insn (gen_altivec_vspltw_direct (vec_tmp, src, element));

-      if (TARGET_P8_VECTOR)
-	emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp)));
-      else
-	emit_insn (gen_stfiwx (dest, gen_rtx_REG (DImode, REGNO (vec_tmp))));
-    }
-
-  else if (TARGET_P8_VECTOR)
-    emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp)));
-  else
-    emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)),
-		    gen_rtx_REG (DImode, REGNO (vec_tmp)));
+  int value = BYTES_BIG_ENDIAN ? 1 : 2;
+  emit_insn (gen_vsx_extract_si (dest, vec_tmp, GEN_INT (value)));

   DONE;
 }
-  [(set_attr "type" "mfvsr,vecperm,fpstore")
-   (set_attr "length" "8")
-   (set_attr "isa" "*,p8v,*")])
+  [(set_attr "type" "mfvsr,veclogical,fpstore,*")
+   (set_attr "length" "4,4,4,0")
+   (set_attr "isa" "p8v,*,p8v,*")])

 (define_insn_and_split  "*vsx_extract_<mode>_p8"
   [(set (match_operand:<VEC_base> 0 "nonimmediate_operand" "=r")
diff --git a/gcc/testsuite/gcc.target/powerpc/pr106769-p8.c b/gcc/testsuite/gcc.target/powerpc/pr106769-p8.c
new file mode 100644
index 00000000000..e7cdbc76298
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr106769-p8.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mdejagnu-cpu=power8 -O2" } */
+/* { dg-require-effective-target has_arch_ppc64 } */
+
+#include "pr106769.h"
+
+/* { dg-final { scan-assembler {\mmfvsrwz\M} } } */
+/* { dg-final { scan-assembler {\mstxsiwx\M} } } */
+/* { dg-final { scan-assembler-not {\mrldicl\M} } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr106769-p9.c b/gcc/testsuite/gcc.target/powerpc/pr106769-p9.c
new file mode 100644
index 00000000000..d21c7f13382
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr106769-p9.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-options "-mdejagnu-cpu=power9 -O2" } */
+/* { dg-require-effective-target has_arch_ppc64 } */
+
+#include "pr106769.h"
+
+/* { dg-final { scan-assembler {\mmfvsrwz\M} } } */
+/* { dg-final { scan-assembler {\mstxsiwx\M} } } */
+/* { dg-final { scan-assembler-not {\mrldicl\M} } } */
+/* { dg-final { scan-assembler-not {\mxxextractuw\M} } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr106769.h b/gcc/testsuite/gcc.target/powerpc/pr106769.h
new file mode 100644
index 00000000000..1c8c8a024f3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr106769.h
@@ -0,0 +1,17 @@
+#include <altivec.h>
+
+#ifdef __BIG_ENDIAN__
+#define LANE 1
+#else
+#define LANE 2
+#endif
+
+unsigned int foo1 (vector unsigned int v)
+{
+   return vec_extract(v, LANE);
+}
+
+void foo2 (vector unsigned int v, unsigned int* p)
+{
+   *p = vec_extract(v, LANE);
+}

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2023-07-19  3:00 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-06-19  1:14 [PATCH, rs6000] Generate mfvsrwz for all platforms and remove redundant zero extend [PR106769] HAO CHEN GUI
2023-07-19  3:00 ` Kewen.Lin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).