* [PATCHv4, rs6000] Splat vector small V2DI constants with ISA 2.07 instructions [PR104124]
@ 2023-06-25 2:09 HAO CHEN GUI
2023-06-26 6:07 ` Kewen.Lin
0 siblings, 1 reply; 2+ messages in thread
From: HAO CHEN GUI @ 2023-06-25 2:09 UTC (permalink / raw)
To: gcc-patches; +Cc: Segher Boessenkool, David, Kewen.Lin, Peter Bergner
Hi,
This patch adds a new insn for vector splat with small V2DI constants on P8.
If the value of constant is in RANGE (-16, 15) and not 0 or -1, it can be loaded
with vspltisw and vupkhsw on P8. It should be efficient than loading vector from
memory.
Compared to last version, the main change is to remove the new constraint and
use a super constraint in the insn and set the check into insn condition.
Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.
Thanks
Gui Haochen
ChangeLog
2023-06-25 Haochen Gui <guihaoc@linux.ibm.com>
gcc/
PR target/104124
* config/rs6000/altivec.md (*altivec_vupkhs<VU_char>_direct): Rename
to...
(altivec_vupkhs<VU_char>_direct): ...this.
* config/rs6000/predicates.md (vspltisw_vupkhsw_constant_split): New
predicate to test if a constant can be loaded with vspltisw and
vupkhsw.
(easy_vector_constant): Call vspltisw_vupkhsw_constant_p to Check if
a vector constant can be synthesized with a vspltisw and a vupkhsw.
* config/rs6000/rs6000-protos.h (vspltisw_vupkhsw_constant_p): Declare.
* config/rs6000/rs6000.cc (vspltisw_vupkhsw_constant_p): New function
to return true if OP mode is V2DI and can be synthesized with vupkhsw
and vspltisw.
* config/rs6000/vsx.md (*vspltisw_v2di_split): New insn to load up
constants with vspltisw and vupkhsw.
gcc/testsuite/
PR target/104124
* gcc.target/powerpc/pr104124.c: New.
patch.diff
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 49b0c964f4d..2c932854c33 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -2542,7 +2542,7 @@ (define_insn "altivec_vupkhs<VU_char>"
}
[(set_attr "type" "vecperm")])
-(define_insn "*altivec_vupkhs<VU_char>_direct"
+(define_insn "altivec_vupkhs<VU_char>_direct"
[(set (match_operand:VP 0 "register_operand" "=v")
(unspec:VP [(match_operand:<VP_small> 1 "register_operand" "v")]
UNSPEC_VUNPACK_HI_SIGN_DIRECT))]
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 52c65534e51..f62a4d9b506 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -694,6 +694,12 @@ (define_predicate "xxspltib_constant_split"
return num_insns > 1;
})
+;; Return true if the operand is a constant that can be loaded with a vspltisw
+;; instruction and then a vupkhsw instruction.
+
+(define_predicate "vspltisw_vupkhsw_constant_split"
+ (and (match_code "const_vector")
+ (match_test "vspltisw_vupkhsw_constant_p (op, mode)")))
;; Return 1 if the operand is constant that can loaded directly with a XXSPLTIB
;; instruction.
@@ -742,6 +748,11 @@ (define_predicate "easy_vector_constant"
&& xxspltib_constant_p (op, mode, &num_insns, &value))
return true;
+ /* V2DI constant within RANGE (-16, 15) can be synthesized with a
+ vspltisw and a vupkhsw. */
+ if (vspltisw_vupkhsw_constant_p (op, mode, &value))
+ return true;
+
return easy_altivec_constant (op, mode);
}
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
index 1a4fc1df668..00cb2d82953 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -32,6 +32,7 @@ extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, int, int, int,
extern int easy_altivec_constant (rtx, machine_mode);
extern bool xxspltib_constant_p (rtx, machine_mode, int *, int *);
+extern bool vspltisw_vupkhsw_constant_p (rtx, machine_mode, int * = nullptr);
extern int vspltis_shifted (rtx);
extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int);
extern bool macho_lo_sum_memory_operand (rtx, machine_mode);
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 3be5860dd9b..ae34a02b282 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -6638,6 +6638,36 @@ xxspltib_constant_p (rtx op,
return true;
}
+/* Return true if OP mode is V2DI and can be synthesized with ISA 2.07
+ instructions vupkhsw and vspltisw.
+
+ Return the constant that is being split via CONSTANT_PTR. */
+
+bool
+vspltisw_vupkhsw_constant_p (rtx op, machine_mode mode, int *constant_ptr)
+{
+ HOST_WIDE_INT value;
+ rtx elt;
+
+ if (!TARGET_P8_VECTOR)
+ return false;
+
+ if (mode != V2DImode)
+ return false;
+
+ if (!const_vec_duplicate_p (op, &elt))
+ return false;
+
+ value = INTVAL (elt);
+ if (value == 0 || value == 1
+ || !EASY_VECTOR_15 (value))
+ return false;
+
+ if (constant_ptr)
+ *constant_ptr = (int) value;
+ return true;
+}
+
const char *
output_vec_const_move (rtx *operands)
{
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 7d845df5c2d..4919b073e50 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -1174,6 +1174,30 @@ (define_insn_and_split "*xxspltib_<mode>_split"
[(set_attr "type" "vecperm")
(set_attr "length" "8")])
+(define_insn_and_split "*vspltisw_v2di_split"
+ [(set (match_operand:V2DI 0 "altivec_register_operand" "=v")
+ (match_operand:V2DI 1 "vspltisw_vupkhsw_constant_split" "W"))]
+ "TARGET_P8_VECTOR && vspltisw_vupkhsw_constant_split (operands[1], V2DImode)"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+{
+ rtx op0 = operands[0];
+ rtx op1 = operands[1];
+ rtx tmp = can_create_pseudo_p ()
+ ? gen_reg_rtx (V4SImode)
+ : gen_lowpart (V4SImode, op0);
+ int value;
+
+ vspltisw_vupkhsw_constant_p (op1, V2DImode, &value);
+ emit_insn (gen_altivec_vspltisw (tmp, GEN_INT (value)));
+ emit_insn (gen_altivec_vupkhsw_direct (op0, tmp));
+
+ DONE;
+}
+ [(set_attr "type" "vecperm")
+ (set_attr "length" "8")])
+
;; Prefer using vector registers over GPRs. Prefer using ISA 3.0's XXSPLTISB
;; or Altivec VSPLITW 0/-1 over XXLXOR/XXLORC to set a register to all 0's or
diff --git a/gcc/testsuite/gcc.target/powerpc/pr104124.c b/gcc/testsuite/gcc.target/powerpc/pr104124.c
new file mode 100644
index 00000000000..30e3b6f86eb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr104124.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-mdejagnu-cpu=power8 -mpower8-vector -O2" } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-final { scan-assembler {\mvspltisw\M} } } */
+/* { dg-final { scan-assembler {\mvupkhsw\M} } } */
+/* { dg-final { scan-assembler-not {\mlvx\M} } } */
+
+#include <altivec.h>
+
+vector unsigned long long
+foo ()
+{
+ return vec_splats ((unsigned long long) 12);
+}
^ permalink raw reply [flat|nested] 2+ messages in thread
* Re: [PATCHv4, rs6000] Splat vector small V2DI constants with ISA 2.07 instructions [PR104124]
2023-06-25 2:09 [PATCHv4, rs6000] Splat vector small V2DI constants with ISA 2.07 instructions [PR104124] HAO CHEN GUI
@ 2023-06-26 6:07 ` Kewen.Lin
0 siblings, 0 replies; 2+ messages in thread
From: Kewen.Lin @ 2023-06-26 6:07 UTC (permalink / raw)
To: HAO CHEN GUI; +Cc: Segher Boessenkool, David, Peter Bergner, gcc-patches
Hi Haochen,
on 2023/6/25 10:09, HAO CHEN GUI wrote:
> Hi,
> This patch adds a new insn for vector splat with small V2DI constants on P8.
> If the value of constant is in RANGE (-16, 15) and not 0 or -1, it can be loaded
> with vspltisw and vupkhsw on P8. It should be efficient than loading vector from
> memory.
>
> Compared to last version, the main change is to remove the new constraint and
> use a super constraint in the insn and set the check into insn condition.
>
> Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.
This patch is ok for trunk, thanks!
BR,
Kewen
>
> Thanks
> Gui Haochen
>
> ChangeLog
> 2023-06-25 Haochen Gui <guihaoc@linux.ibm.com>
>
> gcc/
> PR target/104124
> * config/rs6000/altivec.md (*altivec_vupkhs<VU_char>_direct): Rename
> to...
> (altivec_vupkhs<VU_char>_direct): ...this.
> * config/rs6000/predicates.md (vspltisw_vupkhsw_constant_split): New
> predicate to test if a constant can be loaded with vspltisw and
> vupkhsw.
> (easy_vector_constant): Call vspltisw_vupkhsw_constant_p to Check if
> a vector constant can be synthesized with a vspltisw and a vupkhsw.
> * config/rs6000/rs6000-protos.h (vspltisw_vupkhsw_constant_p): Declare.
> * config/rs6000/rs6000.cc (vspltisw_vupkhsw_constant_p): New function
> to return true if OP mode is V2DI and can be synthesized with vupkhsw
> and vspltisw.
> * config/rs6000/vsx.md (*vspltisw_v2di_split): New insn to load up
> constants with vspltisw and vupkhsw.
>
> gcc/testsuite/
> PR target/104124
> * gcc.target/powerpc/pr104124.c: New.
>
> patch.diff
> diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
> index 49b0c964f4d..2c932854c33 100644
> --- a/gcc/config/rs6000/altivec.md
> +++ b/gcc/config/rs6000/altivec.md
> @@ -2542,7 +2542,7 @@ (define_insn "altivec_vupkhs<VU_char>"
> }
> [(set_attr "type" "vecperm")])
>
> -(define_insn "*altivec_vupkhs<VU_char>_direct"
> +(define_insn "altivec_vupkhs<VU_char>_direct"
> [(set (match_operand:VP 0 "register_operand" "=v")
> (unspec:VP [(match_operand:<VP_small> 1 "register_operand" "v")]
> UNSPEC_VUNPACK_HI_SIGN_DIRECT))]
> diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
> index 52c65534e51..f62a4d9b506 100644
> --- a/gcc/config/rs6000/predicates.md
> +++ b/gcc/config/rs6000/predicates.md
> @@ -694,6 +694,12 @@ (define_predicate "xxspltib_constant_split"
> return num_insns > 1;
> })
>
> +;; Return true if the operand is a constant that can be loaded with a vspltisw
> +;; instruction and then a vupkhsw instruction.
> +
> +(define_predicate "vspltisw_vupkhsw_constant_split"
> + (and (match_code "const_vector")
> + (match_test "vspltisw_vupkhsw_constant_p (op, mode)")))
>
> ;; Return 1 if the operand is constant that can loaded directly with a XXSPLTIB
> ;; instruction.
> @@ -742,6 +748,11 @@ (define_predicate "easy_vector_constant"
> && xxspltib_constant_p (op, mode, &num_insns, &value))
> return true;
>
> + /* V2DI constant within RANGE (-16, 15) can be synthesized with a
> + vspltisw and a vupkhsw. */
> + if (vspltisw_vupkhsw_constant_p (op, mode, &value))
> + return true;
> +
> return easy_altivec_constant (op, mode);
> }
>
> diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
> index 1a4fc1df668..00cb2d82953 100644
> --- a/gcc/config/rs6000/rs6000-protos.h
> +++ b/gcc/config/rs6000/rs6000-protos.h
> @@ -32,6 +32,7 @@ extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, int, int, int,
>
> extern int easy_altivec_constant (rtx, machine_mode);
> extern bool xxspltib_constant_p (rtx, machine_mode, int *, int *);
> +extern bool vspltisw_vupkhsw_constant_p (rtx, machine_mode, int * = nullptr);
> extern int vspltis_shifted (rtx);
> extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int);
> extern bool macho_lo_sum_memory_operand (rtx, machine_mode);
> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
> index 3be5860dd9b..ae34a02b282 100644
> --- a/gcc/config/rs6000/rs6000.cc
> +++ b/gcc/config/rs6000/rs6000.cc
> @@ -6638,6 +6638,36 @@ xxspltib_constant_p (rtx op,
> return true;
> }
>
> +/* Return true if OP mode is V2DI and can be synthesized with ISA 2.07
> + instructions vupkhsw and vspltisw.
> +
> + Return the constant that is being split via CONSTANT_PTR. */
> +
> +bool
> +vspltisw_vupkhsw_constant_p (rtx op, machine_mode mode, int *constant_ptr)
> +{
> + HOST_WIDE_INT value;
> + rtx elt;
> +
> + if (!TARGET_P8_VECTOR)
> + return false;
> +
> + if (mode != V2DImode)
> + return false;
> +
> + if (!const_vec_duplicate_p (op, &elt))
> + return false;
> +
> + value = INTVAL (elt);
> + if (value == 0 || value == 1
> + || !EASY_VECTOR_15 (value))
> + return false;
> +
> + if (constant_ptr)
> + *constant_ptr = (int) value;
> + return true;
> +}
> +
> const char *
> output_vec_const_move (rtx *operands)
> {
> diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
> index 7d845df5c2d..4919b073e50 100644
> --- a/gcc/config/rs6000/vsx.md
> +++ b/gcc/config/rs6000/vsx.md
> @@ -1174,6 +1174,30 @@ (define_insn_and_split "*xxspltib_<mode>_split"
> [(set_attr "type" "vecperm")
> (set_attr "length" "8")])
>
> +(define_insn_and_split "*vspltisw_v2di_split"
> + [(set (match_operand:V2DI 0 "altivec_register_operand" "=v")
> + (match_operand:V2DI 1 "vspltisw_vupkhsw_constant_split" "W"))]
> + "TARGET_P8_VECTOR && vspltisw_vupkhsw_constant_split (operands[1], V2DImode)"
> + "#"
> + "&& 1"
> + [(const_int 0)]
> +{
> + rtx op0 = operands[0];
> + rtx op1 = operands[1];
> + rtx tmp = can_create_pseudo_p ()
> + ? gen_reg_rtx (V4SImode)
> + : gen_lowpart (V4SImode, op0);
> + int value;
> +
> + vspltisw_vupkhsw_constant_p (op1, V2DImode, &value);
> + emit_insn (gen_altivec_vspltisw (tmp, GEN_INT (value)));
> + emit_insn (gen_altivec_vupkhsw_direct (op0, tmp));
> +
> + DONE;
> +}
> + [(set_attr "type" "vecperm")
> + (set_attr "length" "8")])
> +
>
> ;; Prefer using vector registers over GPRs. Prefer using ISA 3.0's XXSPLTISB
> ;; or Altivec VSPLITW 0/-1 over XXLXOR/XXLORC to set a register to all 0's or
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr104124.c b/gcc/testsuite/gcc.target/powerpc/pr104124.c
> new file mode 100644
> index 00000000000..30e3b6f86eb
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr104124.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile } */
> +/* { dg-options "-mdejagnu-cpu=power8 -mpower8-vector -O2" } */
> +/* { dg-require-effective-target powerpc_p8vector_ok } */
> +/* { dg-final { scan-assembler {\mvspltisw\M} } } */
> +/* { dg-final { scan-assembler {\mvupkhsw\M} } } */
> +/* { dg-final { scan-assembler-not {\mlvx\M} } } */
> +
> +#include <altivec.h>
> +
> +vector unsigned long long
> +foo ()
> +{
> + return vec_splats ((unsigned long long) 12);
> +}
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2023-06-26 6:07 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-06-25 2:09 [PATCHv4, rs6000] Splat vector small V2DI constants with ISA 2.07 instructions [PR104124] HAO CHEN GUI
2023-06-26 6:07 ` Kewen.Lin
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).