From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1816) id E79F83858409; Mon, 24 Apr 2023 09:28:49 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org E79F83858409 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1682328529; bh=fvtYDjhkeKayoiYPlyhm4gppGafW7CfOl/kcnljZqZE=; h=From:To:Subject:Date:From; b=tFptx8DQhrcpj8JgrxX7OdfCfb6TKvWuaT9S7JI4ygRMIxZg5KVjmFf26JLiK6UYF q+vZJvPClvHw6PNyH+EHJQydzye5b61t574Aw10k2FO6bvzH3TE18WkrGmJM6aFzYd 2zSLs855UL7SwmJRe4TTjVlVbuq9p20d4+B/wzg8= MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset="utf-8" From: Kyrylo Tkachov To: gcc-cvs@gcc.gnu.org Subject: [gcc r14-193] aarch64: PR target/109406 Add support for SVE2 unpredicated MUL X-Act-Checkin: gcc X-Git-Author: Kyrylo Tkachov X-Git-Refname: refs/heads/master X-Git-Oldrev: 636e2273aec555faa0a2f0e0b97b5f3355b25e47 X-Git-Newrev: 9fd4a38c2f30b72ad5e3df7acb1ade201d8ac2cd Message-Id: <20230424092849.E79F83858409@sourceware.org> Date: Mon, 24 Apr 2023 09:28:49 +0000 (GMT) List-Id: https://gcc.gnu.org/g:9fd4a38c2f30b72ad5e3df7acb1ade201d8ac2cd commit r14-193-g9fd4a38c2f30b72ad5e3df7acb1ade201d8ac2cd Author: Kyrylo Tkachov Date: Mon Apr 24 10:27:31 2023 +0100 aarch64: PR target/109406 Add support for SVE2 unpredicated MUL SVE2 supports an unpredicated vector integer MUL form that we can emit from our SVE expanders without using up a predicate registers. This patch does so. As the SVE MUL expansion currently is templated away through a code iterator I did not split it off just for this case but instead special-cased it in the define_expand. It seemed somewhat less invasive than the alternatives but I could split it off more explicitly if others want to. The div-by-bitmask_1.c testcase is adjusted to expect this new MUL form. Bootstrapped and tested on aarch64-none-linux-gnu. gcc/ChangeLog: PR target/109406 * config/aarch64/aarch64-sve.md (3): Handle TARGET_SVE2 MUL case. * config/aarch64/aarch64-sve2.md (*aarch64_mul_unpredicated_): New pattern. gcc/testsuite/ChangeLog: PR target/109406 * gcc.target/aarch64/sve2/div-by-bitmask_1.c: Adjust for unpredicated SVE2 MUL. * gcc.target/aarch64/sve2/unpred_mul_1.c: New test. Diff: --- gcc/config/aarch64/aarch64-sve.md | 9 +++++++ gcc/config/aarch64/aarch64-sve2.md | 17 ++++++++++++- .../gcc.target/aarch64/sve2/div-by-bitmask_1.c | 6 ++--- .../gcc.target/aarch64/sve2/unpred_mul_1.c | 29 ++++++++++++++++++++++ 4 files changed, 57 insertions(+), 4 deletions(-) diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index b11b55f7ac7..4b4c02c90fe 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -3657,6 +3657,15 @@ UNSPEC_PRED_X))] "TARGET_SVE" { + /* SVE2 supports the MUL (vectors, unpredicated) form. Emit the simple + pattern for it here rather than splitting off the MULT expander + separately. */ + if (TARGET_SVE2 && == MULT) + { + emit_move_insn (operands[0], gen_rtx_MULT (mode, + operands[1], operands[2])); + DONE; + } operands[3] = aarch64_ptrue_reg (mode); } ) diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md index 2346f9f835d..da8a424dd57 100644 --- a/gcc/config/aarch64/aarch64-sve2.md +++ b/gcc/config/aarch64/aarch64-sve2.md @@ -189,7 +189,7 @@ ;; ------------------------------------------------------------------------- ;; ---- [INT] Multiplication ;; ------------------------------------------------------------------------- -;; Includes the lane forms of: +;; Includes the lane and unpredicated forms of: ;; - MUL ;; ------------------------------------------------------------------------- @@ -205,6 +205,21 @@ "mul\t%0., %1., %2.[%3]" ) +;; The 2nd and 3rd alternatives are valid for just TARGET_SVE as well but +;; we include them here to allow matching simpler, unpredicated RTL. +(define_insn "*aarch64_mul_unpredicated_" + [(set (match_operand:SVE_I 0 "register_operand" "=w,w,?&w") + (mult:SVE_I + (match_operand:SVE_I 1 "register_operand" "w,0,w") + (match_operand:SVE_I 2 "aarch64_sve_vsm_operand" "w,vsm,vsm")))] + "TARGET_SVE2" + "@ + mul\t%0., %1., %2. + mul\t%0., %0., #%2 + movprfx\t%0, %1\;mul\t%0., %0., #%2" + [(set_attr "movprfx" "*,*,yes")] +) + ;; ------------------------------------------------------------------------- ;; ---- [INT] Scaled high-part multiplication ;; ------------------------------------------------------------------------- diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/div-by-bitmask_1.c b/gcc/testsuite/gcc.target/aarch64/sve2/div-by-bitmask_1.c index e6f5098c30f..1e546a93906 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve2/div-by-bitmask_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve2/div-by-bitmask_1.c @@ -7,7 +7,7 @@ /* ** draw_bitmap1: ** ... -** mul z[0-9]+.h, p[0-9]+/m, z[0-9]+.h, z[0-9]+.h +** mul z[0-9]+.h, z[0-9]+.h, z[0-9]+.h ** addhnb z[0-9]+.b, z[0-9]+.h, z[0-9]+.h ** addhnb z[0-9]+.b, z[0-9]+.h, z[0-9]+.h ** ... @@ -27,7 +27,7 @@ void draw_bitmap2(uint8_t* restrict pixel, uint8_t level, int n) /* ** draw_bitmap3: ** ... -** mul z[0-9]+.s, p[0-9]+/m, z[0-9]+.s, z[0-9]+.s +** mul z[0-9]+.s, z[0-9]+.s, z[0-9]+.s ** addhnb z[0-9]+.h, z[0-9]+.s, z[0-9]+.s ** addhnb z[0-9]+.h, z[0-9]+.s, z[0-9]+.s ** ... @@ -41,7 +41,7 @@ void draw_bitmap3(uint16_t* restrict pixel, uint16_t level, int n) /* ** draw_bitmap4: ** ... -** mul z[0-9]+.d, p[0-9]+/m, z[0-9]+.d, z[0-9]+.d +** mul z[0-9]+.d, z[0-9]+.d, z[0-9]+.d ** addhnb z[0-9]+.s, z[0-9]+.d, z[0-9]+.d ** addhnb z[0-9]+.s, z[0-9]+.d, z[0-9]+.d ** ... diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/unpred_mul_1.c b/gcc/testsuite/gcc.target/aarch64/sve2/unpred_mul_1.c new file mode 100644 index 00000000000..aaf0ce49c99 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/unpred_mul_1.c @@ -0,0 +1,29 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#include + +#define N 1024 + +#define TYPE(N) int##N##_t + +#define TEMPLATE(SIZE) \ +void __attribute__ ((noinline, noclone)) \ +f_##SIZE##_##OP \ + (TYPE(SIZE) *restrict a, TYPE(SIZE) *restrict b, \ + TYPE(SIZE) *restrict c) \ +{ \ + for (int i = 0; i < N; i++) \ + a[i] = b[i] * c[i]; \ +} + +TEMPLATE (8); +TEMPLATE (16); +TEMPLATE (32); +TEMPLATE (64); + +/* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d} 1 } } */ +/* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s} 1 } } */ +/* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h} 1 } } */ +/* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b} 1 } } */ +