public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: HAO CHEN GUI <guihaoc@linux.ibm.com>
To: gcc-patches <gcc-patches@gcc.gnu.org>
Cc: Segher Boessenkool <segher@kernel.crashing.org>,
	David <dje.gcc@gmail.com>, "Kewen.Lin" <linkw@linux.ibm.com>,
	Peter Bergner <bergner@linux.ibm.com>
Subject: [PATCH, rs6000] Generate mfvsrwz for all platforms and remove redundant zero extend [PR106769]
Date: Mon, 19 Jun 2023 09:14:55 +0800	[thread overview]
Message-ID: <acc68688-7292-f739-5ad4-a2367b6e18bc@linux.ibm.com> (raw)

Hi,
  This patch modifies vsx extract expander and generates mfvsrwz/stxsiwx
for all platforms when the mode is V4SI and the index of extracted element
is 1 for BE and 2 for LE. Also this patch adds a insn pattern for mfvsrwz
which can help eliminate redundant zero extend.

  Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.

Thanks
Gui Haochen


ChangeLog
rs6000: Generate mfvsrwz for all platforms and remove redundant zero extend

mfvsrwz has lower latency than xxextractuw.  So it should be generated
even with p9 vector enabled if possible.  Also the instruction is
already zero extended.  A combine pattern is needed to eliminate
redundant zero extend instructions.

gcc/
	PR target/106769
	* config/rs6000/vsx.md (expand vsx_extract_<mode>): Skip calling
	gen_vsx_extract_<mode>_p9 when it can be implemented by
	mfvsrwz/stxsiwx.
	(*vsx_extract_<mode>_di_p9): Not generate the insn when it can
	be generated by mfvsrwz.
	(mfvsrwz): New insn pattern.
	(*vsx_extract_si): Rename to...
	(vsx_extract_si): ..., remove redundant insn condition and
	generate the insn on p9 when it can be implemented by
	mfvsrwz/stxsiwx.  Add a dup alternative for simple vector moving.
	Remove reload_completed from split condition as it's unnecessary.
	Remove unnecessary checking from preparation statements.  Set
	type and length attributes for each alternative.

gcc/testsuite/
	PR target/106769
	* gcc.target/powerpc/pr106769.h: New.
	* gcc.target/powerpc/pr106769-p8.c: New.
	* gcc.target/powerpc/pr106769-p9.c: New.

diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 0a34ceebeb5..09b0f83db86 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -3728,7 +3728,9 @@ (define_expand  "vsx_extract_<mode>"
   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
 {
   /* If we have ISA 3.0, we can do a xxextractuw/vextractu{b,h}.  */
-  if (TARGET_P9_VECTOR)
+  if (TARGET_P9_VECTOR
+      && (<MODE>mode != V4SImode
+	  || INTVAL (operands[2]) != (BYTES_BIG_ENDIAN ? 1 : 2)))
     {
       emit_insn (gen_vsx_extract_<mode>_p9 (operands[0], operands[1],
 					    operands[2]));
@@ -3798,7 +3800,9 @@ (define_insn_and_split "*vsx_extract_<mode>_di_p9"
 	  (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v,<VSX_EX>")
 	  (parallel [(match_operand:QI 2 "const_int_operand" "n,n")]))))
    (clobber (match_scratch:SI 3 "=r,X"))]
-  "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
+  "TARGET_VEXTRACTUB
+   && (<MODE>mode != V4SImode
+       || INTVAL (operands[2]) != (BYTES_BIG_ENDIAN ? 1 : 2))"
   "#"
   "&& reload_completed"
   [(parallel [(set (match_dup 4)
@@ -3830,58 +3834,67 @@ (define_insn_and_split "*vsx_extract_<mode>_store_p9"
    (set (match_dup 0)
 	(match_dup 3))])

-(define_insn_and_split  "*vsx_extract_si"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,wa,Z")
+(define_insn "mfvsrwz"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (vec_select:SI
+	    (match_operand:V4SI 1 "vsx_register_operand" "wa")
+	    (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))
+   (clobber (match_scratch:V4SI 3 "=v"))]
+  "TARGET_DIRECT_MOVE_64BIT
+   && INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 1 : 2)"
+  "mfvsrwz %0,%x1"
+  [(set_attr "type" "mfvsr")
+   (set_attr "isa" "p8v")])
+
+(define_insn_and_split  "vsx_extract_si"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,wa,Z,wa")
 	(vec_select:SI
-	 (match_operand:V4SI 1 "gpc_reg_operand" "v,v,v")
-	 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n")])))
-   (clobber (match_scratch:V4SI 3 "=v,v,v"))]
-  "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT && !TARGET_P9_VECTOR"
-  "#"
-  "&& reload_completed"
+	 (match_operand:V4SI 1 "gpc_reg_operand" "v,v,v,0")
+	 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n,n")])))
+   (clobber (match_scratch:V4SI 3 "=v,v,v,v"))]
+  "TARGET_DIRECT_MOVE_64BIT
+   && (!TARGET_P9_VECTOR || INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 1 : 2))"
+{
+   if (which_alternative == 0)
+     return "mfvsrwz %0,%x1";
+
+   if (which_alternative == 1)
+     return "xxlor %x0,%x1,%x1";
+
+   if (which_alternative == 2)
+     return "stxsiwx %x1,%y0";
+
+   return ASM_COMMENT_START " vec_extract to same register";
+}
+  "&& INTVAL (operands[2]) != (BYTES_BIG_ENDIAN ? 1 : 2)"
   [(const_int 0)]
 {
   rtx dest = operands[0];
   rtx src = operands[1];
   rtx element = operands[2];
-  rtx vec_tmp = operands[3];
-  int value;
+  rtx vec_tmp;
+
+  if (GET_CODE (operands[3]) == SCRATCH)
+    vec_tmp = gen_reg_rtx (V4SImode);
+  else
+    vec_tmp = operands[3];

   /* Adjust index for LE element ordering, the below minuend 3 is computed by
      GET_MODE_NUNITS (V4SImode) - 1.  */
   if (!BYTES_BIG_ENDIAN)
     element = GEN_INT (3 - INTVAL (element));

-  /* If the value is in the correct position, we can avoid doing the VSPLT<x>
-     instruction.  */
-  value = INTVAL (element);
-  if (value != 1)
-    emit_insn (gen_altivec_vspltw_direct (vec_tmp, src, element));
-  else
-    vec_tmp = src;
-
-  if (MEM_P (operands[0]))
-    {
-      if (can_create_pseudo_p ())
-	dest = rs6000_force_indexed_or_indirect_mem (dest);
+  emit_insn (gen_altivec_vspltw_direct (vec_tmp, src, element));

-      if (TARGET_P8_VECTOR)
-	emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp)));
-      else
-	emit_insn (gen_stfiwx (dest, gen_rtx_REG (DImode, REGNO (vec_tmp))));
-    }
-
-  else if (TARGET_P8_VECTOR)
-    emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp)));
-  else
-    emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)),
-		    gen_rtx_REG (DImode, REGNO (vec_tmp)));
+  int value = BYTES_BIG_ENDIAN ? 1 : 2;
+  emit_insn (gen_vsx_extract_si (dest, vec_tmp, GEN_INT (value)));

   DONE;
 }
-  [(set_attr "type" "mfvsr,vecperm,fpstore")
-   (set_attr "length" "8")
-   (set_attr "isa" "*,p8v,*")])
+  [(set_attr "type" "mfvsr,veclogical,fpstore,*")
+   (set_attr "length" "4,4,4,0")
+   (set_attr "isa" "p8v,*,p8v,*")])

 (define_insn_and_split  "*vsx_extract_<mode>_p8"
   [(set (match_operand:<VEC_base> 0 "nonimmediate_operand" "=r")
diff --git a/gcc/testsuite/gcc.target/powerpc/pr106769-p8.c b/gcc/testsuite/gcc.target/powerpc/pr106769-p8.c
new file mode 100644
index 00000000000..e7cdbc76298
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr106769-p8.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mdejagnu-cpu=power8 -O2" } */
+/* { dg-require-effective-target has_arch_ppc64 } */
+
+#include "pr106769.h"
+
+/* { dg-final { scan-assembler {\mmfvsrwz\M} } } */
+/* { dg-final { scan-assembler {\mstxsiwx\M} } } */
+/* { dg-final { scan-assembler-not {\mrldicl\M} } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr106769-p9.c b/gcc/testsuite/gcc.target/powerpc/pr106769-p9.c
new file mode 100644
index 00000000000..d21c7f13382
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr106769-p9.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-options "-mdejagnu-cpu=power9 -O2" } */
+/* { dg-require-effective-target has_arch_ppc64 } */
+
+#include "pr106769.h"
+
+/* { dg-final { scan-assembler {\mmfvsrwz\M} } } */
+/* { dg-final { scan-assembler {\mstxsiwx\M} } } */
+/* { dg-final { scan-assembler-not {\mrldicl\M} } } */
+/* { dg-final { scan-assembler-not {\mxxextractuw\M} } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr106769.h b/gcc/testsuite/gcc.target/powerpc/pr106769.h
new file mode 100644
index 00000000000..1c8c8a024f3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr106769.h
@@ -0,0 +1,17 @@
+#include <altivec.h>
+
+#ifdef __BIG_ENDIAN__
+#define LANE 1
+#else
+#define LANE 2
+#endif
+
+unsigned int foo1 (vector unsigned int v)
+{
+   return vec_extract(v, LANE);
+}
+
+void foo2 (vector unsigned int v, unsigned int* p)
+{
+   *p = vec_extract(v, LANE);
+}

             reply	other threads:[~2023-06-19  1:16 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-06-19  1:14 HAO CHEN GUI [this message]
2023-07-19  3:00 ` Kewen.Lin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=acc68688-7292-f739-5ad4-a2367b6e18bc@linux.ibm.com \
    --to=guihaoc@linux.ibm.com \
    --cc=bergner@linux.ibm.com \
    --cc=dje.gcc@gmail.com \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=linkw@linux.ibm.com \
    --cc=segher@kernel.crashing.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).