From: HAO CHEN GUI <guihaoc@linux.ibm.com>
To: gcc-patches <gcc-patches@gcc.gnu.org>
Cc: Segher Boessenkool <segher@kernel.crashing.org>,
David <dje.gcc@gmail.com>, "Kewen.Lin" <linkw@linux.ibm.com>,
Peter Bergner <bergner@linux.ibm.com>
Subject: [PATCHv2, rs6000] Generate mfvsrwz for all subtargets and remove redundant zero extend [PR106769]
Date: Fri, 21 Jul 2023 09:32:16 +0800 [thread overview]
Message-ID: <894768a2-5ebe-60f0-e6e9-73bdc9f1425d@linux.ibm.com> (raw)
Hi,
This patch modifies vsx extract expand and generates mfvsrwz/stxsiwx
for all subtargets when the mode is V4SI and the index of extracted element
is 1 for BE and 2 for LE. Also this patch adds a insn pattern for mfvsrwz
which can help eliminate redundant zero extend.
Compared to last version, the main change is to add a new expand for V4SI
and separate "vsx_extract_si" to 2 insn patterns.
https://gcc.gnu.org/pipermail/gcc-patches/2023-June/622101.html
Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.
Thanks
Gui Haochen
ChangeLog
rs6000: Generate mfvsrwz for all subtargets and remove redundant zero extend
mfvsrwz has lower latency than xxextractuw or vextuw[lr]x. So it should be
generated even with p9 vector enabled. Also the instruction is already
zero extended. A combine pattern is needed to eliminate redundant zero
extend instructions.
gcc/
PR target/106769
* config/rs6000/vsx.md (expand vsx_extract_<mode>): Set it only
for V8HI and V16QI.
(vsx_extract_v4si): New expand for V4SI.
(*vsx_extract_<mode>_di_p9): Not generate the insn when it can
be generated by mfvsrwz.
(mfvsrwz): New insn pattern for zero extended vsx_extract_v4si.
(*vsx_extract_si): Removed.
(vsx_extract_v4si_0): New insn pattern to deal with V4SI extract
when the index of extracted element is 1 with BE and 2 with LE.
(vsx_extract_v4si_1): New insn and split pattern which deals with
the cases not handled by vsx_extract_v4si_0.
gcc/testsuite/
PR target/106769
* gcc.target/powerpc/pr106769.h: New.
* gcc.target/powerpc/pr106769-p8.c: New.
* gcc.target/powerpc/pr106769-p9.c: New.
patch.diff
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 0a34ceebeb5..ad249441bcf 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -3722,9 +3722,9 @@ (define_insn "vsx_xxpermdi2_<mode>_1"
(define_expand "vsx_extract_<mode>"
[(parallel [(set (match_operand:<VEC_base> 0 "gpc_reg_operand")
(vec_select:<VEC_base>
- (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand")
+ (match_operand:VSX_EXTRACT_I2 1 "gpc_reg_operand")
(parallel [(match_operand:QI 2 "const_int_operand")])))
- (clobber (match_scratch:VSX_EXTRACT_I 3))])]
+ (clobber (match_scratch:VSX_EXTRACT_I2 3))])]
"VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
{
/* If we have ISA 3.0, we can do a xxextractuw/vextractu{b,h}. */
@@ -3736,6 +3736,23 @@ (define_expand "vsx_extract_<mode>"
}
})
+(define_expand "vsx_extract_v4si"
+ [(parallel [(set (match_operand:SI 0 "gpc_reg_operand")
+ (vec_select:SI
+ (match_operand:V4SI 1 "gpc_reg_operand")
+ (parallel [(match_operand:QI 2 "const_0_to_3_operand")])))
+ (clobber (match_scratch:V4SI 3))])]
+ "TARGET_DIRECT_MOVE_64BIT"
+{
+ if (TARGET_P9_VECTOR
+ && INTVAL (operands[2]) != (BYTES_BIG_ENDIAN ? 1 : 2))
+ {
+ emit_insn (gen_vsx_extract_v4si_p9 (operands[0], operands[1],
+ operands[2]));
+ DONE;
+ }
+})
+
(define_insn "vsx_extract_<mode>_p9"
[(set (match_operand:<VEC_base> 0 "gpc_reg_operand" "=r,<VSX_EX>")
(vec_select:<VEC_base>
@@ -3798,7 +3815,9 @@ (define_insn_and_split "*vsx_extract_<mode>_di_p9"
(match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v,<VSX_EX>")
(parallel [(match_operand:QI 2 "const_int_operand" "n,n")]))))
(clobber (match_scratch:SI 3 "=r,X"))]
- "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
+ "TARGET_VEXTRACTUB
+ && (<MODE>mode != V4SImode
+ || INTVAL (operands[2]) != (BYTES_BIG_ENDIAN ? 1 : 2))"
"#"
"&& reload_completed"
[(parallel [(set (match_dup 4)
@@ -3830,58 +3849,78 @@ (define_insn_and_split "*vsx_extract_<mode>_store_p9"
(set (match_dup 0)
(match_dup 3))])
-(define_insn_and_split "*vsx_extract_si"
+(define_insn "mfvsrwz"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI
+ (vec_select:SI
+ (match_operand:V4SI 1 "vsx_register_operand" "wa")
+ (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))
+ (clobber (match_scratch:V4SI 3 "=v"))]
+ "TARGET_DIRECT_MOVE_64BIT
+ && INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 1 : 2)"
+ "mfvsrwz %0,%x1"
+ [(set_attr "type" "mfvsr")
+ (set_attr "isa" "p8v")])
+
+(define_insn "vsx_extract_v4si_0"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=r,wa,Z,wa")
+ (vec_select:SI
+ (match_operand:V4SI 1 "gpc_reg_operand" "v,v,v,0")
+ (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n,n")])))
+ (clobber (match_scratch:V4SI 3 "=v,v,v,v"))]
+ "TARGET_DIRECT_MOVE_64BIT
+ && (!TARGET_P9_VECTOR || INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 1 : 2))"
+{
+ if (which_alternative == 0)
+ return "mfvsrwz %0,%x1";
+
+ if (which_alternative == 1)
+ return "xxlor %x0,%x1,%x1";
+
+ if (which_alternative == 2)
+ return "stxsiwx %x1,%y0";
+
+ return ASM_COMMENT_START " vec_extract to same register";
+}
+ [(set_attr "type" "mfvsr,veclogical,fpstore,*")
+ (set_attr "length" "4,4,4,0")
+ (set_attr "isa" "p8v,*,p8v,*")])
+
+(define_insn_and_split "vsx_extract_v4si_1"
[(set (match_operand:SI 0 "nonimmediate_operand" "=r,wa,Z")
(vec_select:SI
(match_operand:V4SI 1 "gpc_reg_operand" "v,v,v")
(parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n")])))
(clobber (match_scratch:V4SI 3 "=v,v,v"))]
- "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT && !TARGET_P9_VECTOR"
+ "TARGET_DIRECT_MOVE_64BIT
+ && !TARGET_P9_VECTOR
+ && INTVAL (operands[2]) != (BYTES_BIG_ENDIAN ? 1 : 2)"
"#"
- "&& reload_completed"
+ "&& 1"
[(const_int 0)]
{
rtx dest = operands[0];
rtx src = operands[1];
rtx element = operands[2];
- rtx vec_tmp = operands[3];
- int value;
+ rtx vec_tmp;
+
+ if (GET_CODE (operands[3]) == SCRATCH)
+ vec_tmp = gen_reg_rtx (V4SImode);
+ else
+ vec_tmp = operands[3];
/* Adjust index for LE element ordering, the below minuend 3 is computed by
GET_MODE_NUNITS (V4SImode) - 1. */
if (!BYTES_BIG_ENDIAN)
element = GEN_INT (3 - INTVAL (element));
- /* If the value is in the correct position, we can avoid doing the VSPLT<x>
- instruction. */
- value = INTVAL (element);
- if (value != 1)
- emit_insn (gen_altivec_vspltw_direct (vec_tmp, src, element));
- else
- vec_tmp = src;
+ emit_insn (gen_altivec_vspltw_direct (vec_tmp, src, element));
- if (MEM_P (operands[0]))
- {
- if (can_create_pseudo_p ())
- dest = rs6000_force_indexed_or_indirect_mem (dest);
-
- if (TARGET_P8_VECTOR)
- emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp)));
- else
- emit_insn (gen_stfiwx (dest, gen_rtx_REG (DImode, REGNO (vec_tmp))));
- }
-
- else if (TARGET_P8_VECTOR)
- emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp)));
- else
- emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)),
- gen_rtx_REG (DImode, REGNO (vec_tmp)));
+ int value = BYTES_BIG_ENDIAN ? 1 : 2;
+ emit_insn (gen_vsx_extract_v4si_0 (dest, vec_tmp, GEN_INT (value)));
DONE;
-}
- [(set_attr "type" "mfvsr,vecperm,fpstore")
- (set_attr "length" "8")
- (set_attr "isa" "*,p8v,*")])
+})
(define_insn_and_split "*vsx_extract_<mode>_p8"
[(set (match_operand:<VEC_base> 0 "nonimmediate_operand" "=r")
diff --git a/gcc/testsuite/gcc.target/powerpc/pr106769-p8.c b/gcc/testsuite/gcc.target/powerpc/pr106769-p8.c
new file mode 100644
index 00000000000..e7cdbc76298
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr106769-p8.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mdejagnu-cpu=power8 -O2" } */
+/* { dg-require-effective-target has_arch_ppc64 } */
+
+#include "pr106769.h"
+
+/* { dg-final { scan-assembler {\mmfvsrwz\M} } } */
+/* { dg-final { scan-assembler {\mstxsiwx\M} } } */
+/* { dg-final { scan-assembler-not {\mrldicl\M} } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr106769-p9.c b/gcc/testsuite/gcc.target/powerpc/pr106769-p9.c
new file mode 100644
index 00000000000..2205e434a86
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr106769-p9.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-options "-mdejagnu-cpu=power9 -O2" } */
+/* { dg-require-effective-target has_arch_ppc64 } */
+
+#include "pr106769.h"
+
+/* { dg-final { scan-assembler {\mmfvsrwz\M} } } */
+/* { dg-final { scan-assembler {\mstxsiwx\M} } } */
+/* { dg-final { scan-assembler-not {\mrldicl\M} } } */
+/* { dg-final { scan-assembler-not {\mxxextractuw\M} } } */
+/* { dg-final { scan-assembler-not "vextuw\[rl\]x" } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr106769.h b/gcc/testsuite/gcc.target/powerpc/pr106769.h
new file mode 100644
index 00000000000..1c8c8a024f3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr106769.h
@@ -0,0 +1,17 @@
+#include <altivec.h>
+
+#ifdef __BIG_ENDIAN__
+#define LANE 1
+#else
+#define LANE 2
+#endif
+
+unsigned int foo1 (vector unsigned int v)
+{
+ return vec_extract(v, LANE);
+}
+
+void foo2 (vector unsigned int v, unsigned int* p)
+{
+ *p = vec_extract(v, LANE);
+}
next reply other threads:[~2023-07-21 1:33 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-07-21 1:32 HAO CHEN GUI [this message]
2023-07-24 2:43 ` Kewen.Lin
2023-07-25 2:10 HAO CHEN GUI
2023-07-31 5:43 ` Kewen.Lin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=894768a2-5ebe-60f0-e6e9-73bdc9f1425d@linux.ibm.com \
--to=guihaoc@linux.ibm.com \
--cc=bergner@linux.ibm.com \
--cc=dje.gcc@gmail.com \
--cc=gcc-patches@gcc.gnu.org \
--cc=linkw@linux.ibm.com \
--cc=segher@kernel.crashing.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).