From: Richard Sandiford <richard.sandiford@arm.com>
To: Christophe Lyon <christophe.lyon@linaro.org>
Cc: gcc-patches@gcc.gnu.org
Subject: Re: [PATCH 1/2] arm: Fix vcond_mask expander for MVE (PR target/100757)
Date: Wed, 09 Jun 2021 16:04:11 +0100 [thread overview]
Message-ID: <mpta6nzm650.fsf@arm.com> (raw)
In-Reply-To: <20210608213844.6218-1-christophe.lyon@linaro.org> (Christophe Lyon's message of "Tue, 8 Jun 2021 21:38:43 +0000")
Christophe Lyon <christophe.lyon@linaro.org> writes:
> The problem in this PR is that we call VPSEL with a mask of vector
> type instead of HImode. This happens because operand 3 in vcond_mask
> is the pre-computed vector comparison and has vector type. The fix is
> to transfer this value to VPR.P0 by comparing operand 3 with a vector
> of constant 1 of the same type as operand 3.
The alternative is to implement TARGET_VECTORIZE_GET_MASK_MODE
and return HImode for MVE. This is how AVX512 handles masks.
It might be worth trying that to see how it works. I'm not sure
off-hand whether it'll produce worse code or better code. However,
using HImode as the mask mode would help when defining other
predicated optabs in future.
Thanks,
Richard
> The pr100757*.c testcases are derived from
> gcc.c-torture/compile/20160205-1.c, forcing the use of MVE, and using
> different types and return values different from 0 and 1 to avoid
> commonalization with boolean masks.
>
> Reducing the number of iterations in pr100757-3.c from 32 to 8, we
> generate the code below:
>
> float a[32];
> float fn1(int d) {
> int c = 4;
> for (int b = 0; b < 8; b++)
> if (a[b] != 2.0f)
> c = 5;
> return c;
> }
>
> fn1:
> ldr r3, .L4+80
> vpush.64 {d8, d9}
> vldrw.32 q3, [r3] // q3=a[0..3]
> vldr.64 d8, .L4 // q4=(2.0,2.0,2.0,2.0)
> vldr.64 d9, .L4+8
> adds r3, r3, #16
> vcmp.f32 eq, q3, q4 // cmp a[0..3] == (2.0,2.0,2.0,2.0)
> vldr.64 d2, .L4+16 // q1=(1,1,1,1)
> vldr.64 d3, .L4+24
> vldrw.32 q3, [r3] // q3=a[4..7]
> vldr.64 d4, .L4+32 // q2=(0,0,0,0)
> vldr.64 d5, .L4+40
> vpsel q0, q1, q2 // q0=select (a[0..3])
> vcmp.f32 eq, q3, q4 // cmp a[4..7] == (2.0,2.0,2.0,2.0)
> vldm sp!, {d8-d9}
> vpsel q2, q1, q2 // q2=select (a[4..7])
> vand q2, q0, q2 // q2=select (a[0..3]) && select (a[4..7])
> vldr.64 d6, .L4+48 // q3=(4.0,4.0,4.0,4.0)
> vldr.64 d7, .L4+56
> vldr.64 d0, .L4+64 // q0=(5.0,5.0,5.0,5.0)
> vldr.64 d1, .L4+72
> vcmp.i32 eq, q2, q1 // cmp mask(a[0..7]) == (1,1,1,1)
> vpsel q3, q3, q0 // q3= vcond_mask(4.0,5.0)
> vmov.32 r3, q3[0] // keep the scalar max
> vmov.32 r1, q3[1]
> vmov.32 r0, q3[3]
> vmov.32 r2, q3[2]
> vmov s14, r1
> vmov s15, r3
> vmaxnm.f32 s15, s15, s14
> vmov s14, r2
> vmaxnm.f32 s15, s15, s14
> vmov s14, r0
> vmaxnm.f32 s15, s15, s14
> vmov r0, s15
> bx lr
> .L5:
> .align 3
> .L4:
> .word 1073741824
> .word 1073741824
> .word 1073741824
> .word 1073741824
> .word 1
> .word 1
> .word 1
> .word 1
> .word 0
> .word 0
> .word 0
> .word 0
> .word 1082130432
> .word 1082130432
> .word 1082130432
> .word 1082130432
> .word 1084227584
> .word 1084227584
> .word 1084227584
> .word 1084227584
>
> 2021-06-09 Christophe Lyon <christophe.lyon@linaro.org>
>
> PR target/100757
> gcc/
> * config/arm/vec-common.md (vcond_mask_<mode><v_cmp_result>): Fix
> expansion for MVE.
>
> gcc/testsuite/
> * gcc.target/arm/simd/pr100757.c: New test.
> * gcc.target/arm/simd/pr100757-2.c: New test.
> * gcc.target/arm/simd/pr100757-3.c: New test.
> ---
> gcc/config/arm/vec-common.md | 24 +++++++++++++++++--
> .../gcc.target/arm/simd/pr100757-2.c | 20 ++++++++++++++++
> .../gcc.target/arm/simd/pr100757-3.c | 20 ++++++++++++++++
> gcc/testsuite/gcc.target/arm/simd/pr100757.c | 19 +++++++++++++++
> 4 files changed, 81 insertions(+), 2 deletions(-)
> create mode 100644 gcc/testsuite/gcc.target/arm/simd/pr100757-2.c
> create mode 100644 gcc/testsuite/gcc.target/arm/simd/pr100757-3.c
> create mode 100644 gcc/testsuite/gcc.target/arm/simd/pr100757.c
>
> diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md
> index 0ffc7a9322c..ccdfaa8321f 100644
> --- a/gcc/config/arm/vec-common.md
> +++ b/gcc/config/arm/vec-common.md
> @@ -478,8 +478,28 @@ (define_expand "vcond_mask_<mode><v_cmp_result>"
> }
> else if (TARGET_HAVE_MVE)
> {
> - emit_insn (gen_mve_vpselq (VPSELQ_S, <MODE>mode, operands[0],
> - operands[1], operands[2], operands[3]));
> + /* Convert pre-computed vector comparison into VPR.P0 by comparing
> + operand 3 with a vector of '1', then use VPSEL. */
> + machine_mode cmp_mode = GET_MODE (operands[3]);
> + rtx vpr_p0 = gen_reg_rtx (HImode);
> + rtx one = gen_reg_rtx (cmp_mode);
> + emit_move_insn (one, CONST1_RTX (cmp_mode));
> + emit_insn (gen_mve_vcmpq (EQ, cmp_mode, vpr_p0, operands[3], one));
> +
> + switch (GET_MODE_CLASS (<MODE>mode))
> + {
> + case MODE_VECTOR_INT:
> + emit_insn (gen_mve_vpselq (VPSELQ_S, <MODE>mode, operands[0], operands[1], operands[2], vpr_p0));
> + break;
> + case MODE_VECTOR_FLOAT:
> + if (TARGET_HAVE_MVE_FLOAT)
> + emit_insn (gen_mve_vpselq_f (<MODE>mode, operands[0], operands[1], operands[2], vpr_p0));
> + else
> + gcc_unreachable ();
> + break;
> + default:
> + gcc_unreachable ();
> + }
> }
> else
> gcc_unreachable ();
> diff --git a/gcc/testsuite/gcc.target/arm/simd/pr100757-2.c b/gcc/testsuite/gcc.target/arm/simd/pr100757-2.c
> new file mode 100644
> index 00000000000..993ce369090
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/simd/pr100757-2.c
> @@ -0,0 +1,20 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
> +/* { dg-add-options arm_v8_1m_mve_fp } */
> +/* { dg-additional-options "-O3 -funsafe-math-optimizations" } */
> +/* Derived from gcc.c-torture/compile/20160205-1.c. */
> +
> +float a[32];
> +int fn1(int d) {
> + int c = 4;
> + for (int b = 0; b < 32; b++)
> + if (a[b] != 2.0f)
> + c = 5;
> + return c;
> +}
> +
> +/* { dg-final { scan-assembler-times {\t.word\t1073741824\n} 4 } } */ /* Constant 2.0f. */
> +/* { dg-final { scan-assembler-times {\t.word\t1\n} 4 } } */ /* 'true' mask. */
> +/* { dg-final { scan-assembler-times {\t.word\t0\n} 4 } } */ /* 'false' mask. */
> +/* { dg-final { scan-assembler-times {\t.word\t4\n} 4 } } */ /* Initial value for c. */
> +/* { dg-final { scan-assembler-times {\t.word\t5\n} 4 } } */ /* Possible value for c. */
> diff --git a/gcc/testsuite/gcc.target/arm/simd/pr100757-3.c b/gcc/testsuite/gcc.target/arm/simd/pr100757-3.c
> new file mode 100644
> index 00000000000..b94a73b2d2c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/simd/pr100757-3.c
> @@ -0,0 +1,20 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
> +/* { dg-add-options arm_v8_1m_mve_fp } */
> +/* { dg-additional-options "-O3 -funsafe-math-optimizations" } */
> +/* Copied from gcc.c-torture/compile/20160205-1.c. */
> +
> +float a[32];
> +float fn1(int d) {
> + float c = 4;
> + for (int b = 0; b < 32; b++)
> + if (a[b] != 2.0f)
> + c = 5;
> + return c;
> +}
> +
> +/* { dg-final { scan-assembler-times {\t.word\t1073741824\n} 4 } } */ /* Constant 2.0f. */
> +/* { dg-final { scan-assembler-times {\t.word\t1\n} 4 } } */ /* 'true' mask. */
> +/* { dg-final { scan-assembler-times {\t.word\t0\n} 4 } } */ /* 'false' mask. */
> +/* { dg-final { scan-assembler-times {\t.word\t1084227584\n} 4 } } */ /* Initial value for c. */
> +/* { dg-final { scan-assembler-times {\t.word\t1082130432\n} 4 } } */ /* Possible value for c. */
> diff --git a/gcc/testsuite/gcc.target/arm/simd/pr100757.c b/gcc/testsuite/gcc.target/arm/simd/pr100757.c
> new file mode 100644
> index 00000000000..e51e716b4ec
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/simd/pr100757.c
> @@ -0,0 +1,19 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target arm_v8_1m_mve_ok } */
> +/* { dg-add-options arm_v8_1m_mve } */
> +/* { dg-additional-options "-O3" } */
> +/* Derived from gcc.c-torture/compile/20160205-1.c. */
> +
> +int a[32];
> +int fn1(int d) {
> + int c = 2;
> + for (int b = 0; b < 32; b++)
> + if (a[b])
> + c = 3;
> + return c;
> +}
> +
> +/* { dg-final { scan-assembler-times {\t.word\t1\n} 4 } } */ /* 'true' mask. */
> +/* { dg-final { scan-assembler-times {\t.word\t0\n} 4 } } */ /* 'false' mask. */
> +/* { dg-final { scan-assembler-times {\t.word\t2\n} 4 } } */ /* Initial value for c. */
> +/* { dg-final { scan-assembler-times {\t.word\t3\n} 4 } } */ /* Possible value for c. */
next prev parent reply other threads:[~2021-06-09 15:04 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-06-08 21:38 Christophe Lyon
2021-06-08 21:38 ` [PATCH 2/2] arm: Fix fix arm_expand_vcond for MVE Christophe Lyon
2021-06-09 15:04 ` Richard Sandiford [this message]
2021-07-02 8:53 ` [PATCH 1/2] arm: Fix vcond_mask expander for MVE (PR target/100757) Christophe Lyon
2021-07-02 9:07 ` Christophe Lyon
2021-07-13 9:11 ` Christophe Lyon
2021-07-16 14:06 ` Richard Sandiford
2021-07-16 14:32 ` Christophe LYON
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=mpta6nzm650.fsf@arm.com \
--to=richard.sandiford@arm.com \
--cc=christophe.lyon@linaro.org \
--cc=gcc-patches@gcc.gnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).