* [PATCH] aarch64: PR target/109406 Add support for SVE2 unpredicated MUL
@ 2023-04-24 8:56 Kyrylo Tkachov
2023-04-24 9:05 ` Richard Sandiford
0 siblings, 1 reply; 2+ messages in thread
From: Kyrylo Tkachov @ 2023-04-24 8:56 UTC (permalink / raw)
To: gcc-patches; +Cc: Richard Sandiford
[-- Attachment #1: Type: text/plain, Size: 1012 bytes --]
Hi all,
SVE2 supports an unpredicated vector integer MUL form that we can emit from our SVE expanders
without using up a predicate registers. This patch does so.
As the SVE MUL expansion currently is templated away through a code iterator I did not split it
off just for this case but instead special-cased it in the define_expand. It seemed somewhat less
invasive than the alternatives but I could split it off more explicitly if others want to.
The div-by-bitmask_1.c testcase is adjusted to expect this new MUL form.
Bootstrapped and tested on aarch64-none-linux-gnu.
Ok for trunk?
Thanks,
Kyrill
gcc/ChangeLog:
PR target/109406
* config/aarch64/aarch64-sve.md (<optab><mode>3): Handle TARGET_SVE2 MUL
case.
* config/aarch64/aarch64-sve2.md (*aarch64_mul_unpredicated_<mode>): New
pattern.
gcc/testsuite/ChangeLog:
PR target/109406
* gcc.target/aarch64/sve2/div-by-bitmask_1.c: Adjust for unpredicated SVE2
MUL.
* gcc.target/aarch64/sve2/unpred_mul_1.c: New test.
[-- Attachment #2: sve2-mul.patch --]
[-- Type: application/octet-stream, Size: 4742 bytes --]
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index b11b55f7ac718db199920b61bf3e4b4881c69660..4b4c02c90fec6ce1ff15a8b2a5df348224a307b7 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -3657,6 +3657,15 @@ (define_expand "<optab><mode>3"
UNSPEC_PRED_X))]
"TARGET_SVE"
{
+ /* SVE2 supports the MUL (vectors, unpredicated) form. Emit the simple
+ pattern for it here rather than splitting off the MULT expander
+ separately. */
+ if (TARGET_SVE2 && <CODE> == MULT)
+ {
+ emit_move_insn (operands[0], gen_rtx_MULT (<MODE>mode,
+ operands[1], operands[2]));
+ DONE;
+ }
operands[3] = aarch64_ptrue_reg (<VPRED>mode);
}
)
diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md
index 2346f9f835d26f5b87afd47cdc9e44f9f47604ed..da8a424dd57fc5482cb20ba417d4141148ac61b6 100644
--- a/gcc/config/aarch64/aarch64-sve2.md
+++ b/gcc/config/aarch64/aarch64-sve2.md
@@ -189,7 +189,7 @@ (define_insn "@aarch64_scatter_stnt_<SVE_FULL_SDI:mode><SVE_PARTIAL_I:mode>"
;; -------------------------------------------------------------------------
;; ---- [INT] Multiplication
;; -------------------------------------------------------------------------
-;; Includes the lane forms of:
+;; Includes the lane and unpredicated forms of:
;; - MUL
;; -------------------------------------------------------------------------
@@ -205,6 +205,21 @@ (define_insn "@aarch64_mul_lane_<mode>"
"mul\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]"
)
+;; The 2nd and 3rd alternatives are valid for just TARGET_SVE as well but
+;; we include them here to allow matching simpler, unpredicated RTL.
+(define_insn "*aarch64_mul_unpredicated_<mode>"
+ [(set (match_operand:SVE_I 0 "register_operand" "=w,w,?&w")
+ (mult:SVE_I
+ (match_operand:SVE_I 1 "register_operand" "w,0,w")
+ (match_operand:SVE_I 2 "aarch64_sve_vsm_operand" "w,vsm,vsm")))]
+ "TARGET_SVE2"
+ "@
+ mul\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>
+ mul\t%0.<Vetype>, %0.<Vetype>, #%2
+ movprfx\t%0, %1\;mul\t%0.<Vetype>, %0.<Vetype>, #%2"
+ [(set_attr "movprfx" "*,*,yes")]
+)
+
;; -------------------------------------------------------------------------
;; ---- [INT] Scaled high-part multiplication
;; -------------------------------------------------------------------------
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/div-by-bitmask_1.c b/gcc/testsuite/gcc.target/aarch64/sve2/div-by-bitmask_1.c
index e6f5098c30f4e2eb8ed1af153c0bb0d204cda6d9..1e546a93906962ba2469ddb3bf2ee9c0166dbae1 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve2/div-by-bitmask_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/div-by-bitmask_1.c
@@ -7,7 +7,7 @@
/*
** draw_bitmap1:
** ...
-** mul z[0-9]+.h, p[0-9]+/m, z[0-9]+.h, z[0-9]+.h
+** mul z[0-9]+.h, z[0-9]+.h, z[0-9]+.h
** addhnb z[0-9]+.b, z[0-9]+.h, z[0-9]+.h
** addhnb z[0-9]+.b, z[0-9]+.h, z[0-9]+.h
** ...
@@ -27,7 +27,7 @@ void draw_bitmap2(uint8_t* restrict pixel, uint8_t level, int n)
/*
** draw_bitmap3:
** ...
-** mul z[0-9]+.s, p[0-9]+/m, z[0-9]+.s, z[0-9]+.s
+** mul z[0-9]+.s, z[0-9]+.s, z[0-9]+.s
** addhnb z[0-9]+.h, z[0-9]+.s, z[0-9]+.s
** addhnb z[0-9]+.h, z[0-9]+.s, z[0-9]+.s
** ...
@@ -41,7 +41,7 @@ void draw_bitmap3(uint16_t* restrict pixel, uint16_t level, int n)
/*
** draw_bitmap4:
** ...
-** mul z[0-9]+.d, p[0-9]+/m, z[0-9]+.d, z[0-9]+.d
+** mul z[0-9]+.d, z[0-9]+.d, z[0-9]+.d
** addhnb z[0-9]+.s, z[0-9]+.d, z[0-9]+.d
** addhnb z[0-9]+.s, z[0-9]+.d, z[0-9]+.d
** ...
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/unpred_mul_1.c b/gcc/testsuite/gcc.target/aarch64/sve2/unpred_mul_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..aaf0ce49c99447439146a1e17ed0533231e141c2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/unpred_mul_1.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+#define N 1024
+
+#define TYPE(N) int##N##_t
+
+#define TEMPLATE(SIZE) \
+void __attribute__ ((noinline, noclone)) \
+f_##SIZE##_##OP \
+ (TYPE(SIZE) *restrict a, TYPE(SIZE) *restrict b, \
+ TYPE(SIZE) *restrict c) \
+{ \
+ for (int i = 0; i < N; i++) \
+ a[i] = b[i] * c[i]; \
+}
+
+TEMPLATE (8);
+TEMPLATE (16);
+TEMPLATE (32);
+TEMPLATE (64);
+
+/* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d} 1 } } */
+/* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s} 1 } } */
+/* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h} 1 } } */
+/* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b} 1 } } */
+
^ permalink raw reply [flat|nested] 2+ messages in thread
* Re: [PATCH] aarch64: PR target/109406 Add support for SVE2 unpredicated MUL
2023-04-24 8:56 [PATCH] aarch64: PR target/109406 Add support for SVE2 unpredicated MUL Kyrylo Tkachov
@ 2023-04-24 9:05 ` Richard Sandiford
0 siblings, 0 replies; 2+ messages in thread
From: Richard Sandiford @ 2023-04-24 9:05 UTC (permalink / raw)
To: Kyrylo Tkachov; +Cc: gcc-patches
Kyrylo Tkachov <Kyrylo.Tkachov@arm.com> writes:
> Hi all,
>
> SVE2 supports an unpredicated vector integer MUL form that we can emit from our SVE expanders
> without using up a predicate registers. This patch does so.
> As the SVE MUL expansion currently is templated away through a code iterator I did not split it
> off just for this case but instead special-cased it in the define_expand. It seemed somewhat less
> invasive than the alternatives but I could split it off more explicitly if others want to.
> The div-by-bitmask_1.c testcase is adjusted to expect this new MUL form.
>
> Bootstrapped and tested on aarch64-none-linux-gnu.
>
> Ok for trunk?
> Thanks,
> Kyrill
>
> gcc/ChangeLog:
>
> PR target/109406
> * config/aarch64/aarch64-sve.md (<optab><mode>3): Handle TARGET_SVE2 MUL
> case.
> * config/aarch64/aarch64-sve2.md (*aarch64_mul_unpredicated_<mode>): New
> pattern.
>
> gcc/testsuite/ChangeLog:
>
> PR target/109406
> * gcc.target/aarch64/sve2/div-by-bitmask_1.c: Adjust for unpredicated SVE2
> MUL.
> * gcc.target/aarch64/sve2/unpred_mul_1.c: New test.
LGTM.
Thanks,
Richard
> diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
> index b11b55f7ac718db199920b61bf3e4b4881c69660..4b4c02c90fec6ce1ff15a8b2a5df348224a307b7 100644
> --- a/gcc/config/aarch64/aarch64-sve.md
> +++ b/gcc/config/aarch64/aarch64-sve.md
> @@ -3657,6 +3657,15 @@ (define_expand "<optab><mode>3"
> UNSPEC_PRED_X))]
> "TARGET_SVE"
> {
> + /* SVE2 supports the MUL (vectors, unpredicated) form. Emit the simple
> + pattern for it here rather than splitting off the MULT expander
> + separately. */
> + if (TARGET_SVE2 && <CODE> == MULT)
> + {
> + emit_move_insn (operands[0], gen_rtx_MULT (<MODE>mode,
> + operands[1], operands[2]));
> + DONE;
> + }
> operands[3] = aarch64_ptrue_reg (<VPRED>mode);
> }
> )
> diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md
> index 2346f9f835d26f5b87afd47cdc9e44f9f47604ed..da8a424dd57fc5482cb20ba417d4141148ac61b6 100644
> --- a/gcc/config/aarch64/aarch64-sve2.md
> +++ b/gcc/config/aarch64/aarch64-sve2.md
> @@ -189,7 +189,7 @@ (define_insn "@aarch64_scatter_stnt_<SVE_FULL_SDI:mode><SVE_PARTIAL_I:mode>"
> ;; -------------------------------------------------------------------------
> ;; ---- [INT] Multiplication
> ;; -------------------------------------------------------------------------
> -;; Includes the lane forms of:
> +;; Includes the lane and unpredicated forms of:
> ;; - MUL
> ;; -------------------------------------------------------------------------
>
> @@ -205,6 +205,21 @@ (define_insn "@aarch64_mul_lane_<mode>"
> "mul\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]"
> )
>
> +;; The 2nd and 3rd alternatives are valid for just TARGET_SVE as well but
> +;; we include them here to allow matching simpler, unpredicated RTL.
> +(define_insn "*aarch64_mul_unpredicated_<mode>"
> + [(set (match_operand:SVE_I 0 "register_operand" "=w,w,?&w")
> + (mult:SVE_I
> + (match_operand:SVE_I 1 "register_operand" "w,0,w")
> + (match_operand:SVE_I 2 "aarch64_sve_vsm_operand" "w,vsm,vsm")))]
> + "TARGET_SVE2"
> + "@
> + mul\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>
> + mul\t%0.<Vetype>, %0.<Vetype>, #%2
> + movprfx\t%0, %1\;mul\t%0.<Vetype>, %0.<Vetype>, #%2"
> + [(set_attr "movprfx" "*,*,yes")]
> +)
> +
> ;; -------------------------------------------------------------------------
> ;; ---- [INT] Scaled high-part multiplication
> ;; -------------------------------------------------------------------------
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/div-by-bitmask_1.c b/gcc/testsuite/gcc.target/aarch64/sve2/div-by-bitmask_1.c
> index e6f5098c30f4e2eb8ed1af153c0bb0d204cda6d9..1e546a93906962ba2469ddb3bf2ee9c0166dbae1 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve2/div-by-bitmask_1.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve2/div-by-bitmask_1.c
> @@ -7,7 +7,7 @@
> /*
> ** draw_bitmap1:
> ** ...
> -** mul z[0-9]+.h, p[0-9]+/m, z[0-9]+.h, z[0-9]+.h
> +** mul z[0-9]+.h, z[0-9]+.h, z[0-9]+.h
> ** addhnb z[0-9]+.b, z[0-9]+.h, z[0-9]+.h
> ** addhnb z[0-9]+.b, z[0-9]+.h, z[0-9]+.h
> ** ...
> @@ -27,7 +27,7 @@ void draw_bitmap2(uint8_t* restrict pixel, uint8_t level, int n)
> /*
> ** draw_bitmap3:
> ** ...
> -** mul z[0-9]+.s, p[0-9]+/m, z[0-9]+.s, z[0-9]+.s
> +** mul z[0-9]+.s, z[0-9]+.s, z[0-9]+.s
> ** addhnb z[0-9]+.h, z[0-9]+.s, z[0-9]+.s
> ** addhnb z[0-9]+.h, z[0-9]+.s, z[0-9]+.s
> ** ...
> @@ -41,7 +41,7 @@ void draw_bitmap3(uint16_t* restrict pixel, uint16_t level, int n)
> /*
> ** draw_bitmap4:
> ** ...
> -** mul z[0-9]+.d, p[0-9]+/m, z[0-9]+.d, z[0-9]+.d
> +** mul z[0-9]+.d, z[0-9]+.d, z[0-9]+.d
> ** addhnb z[0-9]+.s, z[0-9]+.d, z[0-9]+.d
> ** addhnb z[0-9]+.s, z[0-9]+.d, z[0-9]+.d
> ** ...
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/unpred_mul_1.c b/gcc/testsuite/gcc.target/aarch64/sve2/unpred_mul_1.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..aaf0ce49c99447439146a1e17ed0533231e141c2
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve2/unpred_mul_1.c
> @@ -0,0 +1,29 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -ftree-vectorize" } */
> +
> +#include <stdint.h>
> +
> +#define N 1024
> +
> +#define TYPE(N) int##N##_t
> +
> +#define TEMPLATE(SIZE) \
> +void __attribute__ ((noinline, noclone)) \
> +f_##SIZE##_##OP \
> + (TYPE(SIZE) *restrict a, TYPE(SIZE) *restrict b, \
> + TYPE(SIZE) *restrict c) \
> +{ \
> + for (int i = 0; i < N; i++) \
> + a[i] = b[i] * c[i]; \
> +}
> +
> +TEMPLATE (8);
> +TEMPLATE (16);
> +TEMPLATE (32);
> +TEMPLATE (64);
> +
> +/* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d} 1 } } */
> +/* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s} 1 } } */
> +/* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h} 1 } } */
> +/* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b} 1 } } */
> +
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2023-04-24 9:05 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-04-24 8:56 [PATCH] aarch64: PR target/109406 Add support for SVE2 unpredicated MUL Kyrylo Tkachov
2023-04-24 9:05 ` Richard Sandiford
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).