[PATCH] aarch64: [PR101529] Fix vector shuffle insertion expansion

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

* [PATCH] aarch64: [PR101529] Fix vector shuffle insertion expansion
@ 2021-11-10  4:06 apinski
  0 siblings, 0 replies; 5+ messages in thread
From: apinski @ 2021-11-10  4:06 UTC (permalink / raw)
  To: gcc-patches; +Cc: Andrew Pinski

From: Andrew Pinski <apinski@marvell.com>

The function aarch64_evpc_ins would reuse the target even though
it might be the same register as the two inputs.
Instead of checking to see if we can reuse the target, just use the
original input directly.

Committed as approved after bootstrapped and tested on
aarch64-linux-gnu with no regressions.

	PR target/101529

gcc/ChangeLog:

	* config/aarch64/aarch64.c (aarch64_evpc_ins): Don't use target
	as an input, use original one.

gcc/testsuite/ChangeLog:

	* c-c++-common/torture/builtin-convertvector-2.c: New test.
	* c-c++-common/torture/builtin-shufflevector-2.c: New test.
---
 gcc/config/aarch64/aarch64.c                  |  3 +--
 .../torture/builtin-convertvector-2.c         | 26 +++++++++++++++++++
 .../torture/builtin-shufflevector-2.c         | 26 +++++++++++++++++++
 3 files changed, 53 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/c-c++-common/torture/builtin-convertvector-2.c
 create mode 100644 gcc/testsuite/c-c++-common/torture/builtin-shufflevector-2.c

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 506764e5cfa..0549ea36900 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -23102,11 +23102,10 @@ aarch64_evpc_ins (struct expand_vec_perm_d *d)
     }
   gcc_assert (extractindex < nelt);
 
-  emit_move_insn (d->target, insv);
   insn_code icode = code_for_aarch64_simd_vec_copy_lane (mode);
   expand_operand ops[5];
   create_output_operand (&ops[0], d->target, mode);
-  create_input_operand (&ops[1], d->target, mode);
+  create_input_operand (&ops[1], insv, mode);
   create_integer_operand (&ops[2], 1 << idx);
   create_input_operand (&ops[3], extractv, mode);
   create_integer_operand (&ops[4], extractindex);
diff --git a/gcc/testsuite/c-c++-common/torture/builtin-convertvector-2.c b/gcc/testsuite/c-c++-common/torture/builtin-convertvector-2.c
new file mode 100644
index 00000000000..d88f6a72b5c
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/torture/builtin-convertvector-2.c
@@ -0,0 +1,26 @@
+/* { dg-do run } */
+/* PR target/101529 */
+
+typedef unsigned char __attribute__((__vector_size__ (1))) W;
+typedef unsigned char __attribute__((__vector_size__ (8))) V;
+typedef unsigned short __attribute__((__vector_size__ (16))) U;
+
+unsigned short us;
+
+/* aarch64 used to miscompile foo to just return 0. */
+W
+foo (unsigned char uc)
+{
+  V v = __builtin_convertvector ((U){ } >= us, V);
+  return __builtin_shufflevector ((W){ }, v, 4) & uc;
+}
+
+int
+main (void)
+{
+  W x = foo (5);
+  if (x[0] != 5)
+    __builtin_abort();
+  return 0;
+}
+
diff --git a/gcc/testsuite/c-c++-common/torture/builtin-shufflevector-2.c b/gcc/testsuite/c-c++-common/torture/builtin-shufflevector-2.c
new file mode 100644
index 00000000000..7c4999ed4e9
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/torture/builtin-shufflevector-2.c
@@ -0,0 +1,26 @@
+/* { dg-do run}  */
+/* PR target/101529 */
+typedef unsigned char C;
+typedef unsigned char __attribute__((__vector_size__ (8))) V;
+typedef unsigned char __attribute__((__vector_size__ (32))) U;
+
+C c;
+
+/* aarch64 used to miscompile foo to just return a vector of 0s */
+V
+foo (V v)
+{
+  v |= __builtin_shufflevector (c * v, (U) (0 == (U){ }),
+				0, 1, 8, 32, 8, 20, 36, 36);
+  return v;
+}
+
+int
+main (void)
+{
+  V v = foo ((V) { });
+  for (unsigned i = 0; i < sizeof (v); i++)
+    if (v[i] != (i >= 2 ? 0xff : 0))
+      __builtin_abort ();
+  return 0;
+}
-- 
2.17.1


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH] aarch64: [PR101529] Fix vector shuffle insertion expansion
@ 2022-01-26 17:50 apinski
  0 siblings, 0 replies; 5+ messages in thread
From: apinski @ 2022-01-26 17:50 UTC (permalink / raw)
  To: gcc-patches; +Cc: Andrew Pinski

From: Andrew Pinski <apinski@marvell.com>

The function aarch64_evpc_ins would reuse the target even though
it might be the same register as the two inputs.
Instead of checking to see if we can reuse the target, just use the
original input directly.

Committed as approved after bootstrapped and tested on
aarch64-linux-gnu with no regressions.
Note the testcases are not backported as __builtin_shufflevector
does not exist in GCC 11.

	PR target/101529

gcc/ChangeLog:

	* config/aarch64/aarch64.c (aarch64_evpc_ins): Don't use target
	as an input, use original one.

(cherry picked from commit 52fa771758635d9c53cddb9116e5a66fae592230)
---
 gcc/config/aarch64/aarch64.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index bbcf5ed4a61..b58a379759d 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -23026,11 +23026,10 @@ aarch64_evpc_ins (struct expand_vec_perm_d *d)
     }
   gcc_assert (extractindex < nelt);
 
-  emit_move_insn (d->target, insv);
   insn_code icode = code_for_aarch64_simd_vec_copy_lane (mode);
   expand_operand ops[5];
   create_output_operand (&ops[0], d->target, mode);
-  create_input_operand (&ops[1], d->target, mode);
+  create_input_operand (&ops[1], insv, mode);
   create_integer_operand (&ops[2], 1 << idx);
   create_input_operand (&ops[3], extractv, mode);
   create_integer_operand (&ops[4], extractindex);
-- 
2.17.1


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] aarch64: [PR101529] Fix vector shuffle insertion expansion
  2021-11-09 14:51 ` Richard Sandiford
@ 2021-11-10  4:23   ` Andrew Pinski
  0 siblings, 0 replies; 5+ messages in thread
From: Andrew Pinski @ 2021-11-10  4:23 UTC (permalink / raw)
  To: Richard Sandiford, apinski--- via Gcc-patches, Andrew Pinski

On Tue, Nov 9, 2021 at 6:51 AM Richard Sandiford via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> apinski--- via Gcc-patches <gcc-patches@gcc.gnu.org> writes:
> > From: Andrew Pinski <apinski@marvell.com>
> >
> > The function aarch64_evpc_ins would reuse the target even though
> > it might be the same register as the two inputs.
> > Instead of checking to see if we can reuse the target, creating
> > a new register always is better.
> >
> > OK? Bootstrapped and tested on aarch64-linux-gnu with no regressions.
> >
> >       PR target/101529
> >
> > gcc/ChangeLog:
> >
> >       * config/aarch64/aarch64.c (aarch64_evpc_ins): Don't use target
> >       as an input instead create a new reg.
> >
> > gcc/testsuite/ChangeLog:
> >
> >       * c-c++-common/torture/builtin-convertvector-2.c: New test.
> >       * c-c++-common/torture/builtin-shufflevector-2.c: New test.
> > ---
> >  gcc/config/aarch64/aarch64.c                  |  8 ++++--
> >  .../torture/builtin-convertvector-2.c         | 26 +++++++++++++++++++
> >  .../torture/builtin-shufflevector-2.c         | 26 +++++++++++++++++++
> >  3 files changed, 58 insertions(+), 2 deletions(-)
> >  create mode 100644 gcc/testsuite/c-c++-common/torture/builtin-convertvector-2.c
> >  create mode 100644 gcc/testsuite/c-c++-common/torture/builtin-shufflevector-2.c
> >
> > diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> > index 2c00583e12c..e4fc546fae7 100644
> > --- a/gcc/config/aarch64/aarch64.c
> > +++ b/gcc/config/aarch64/aarch64.c
> > @@ -23084,11 +23084,15 @@ aarch64_evpc_ins (struct expand_vec_perm_d *d)
> >      }
> >    gcc_assert (extractindex < nelt);
> >
> > -  emit_move_insn (d->target, insv);
> > +  /* Use a new reg instead of target as one of the
> > +     operands might be target. */
> > +  rtx original = gen_reg_rtx (GET_MODE (d->target));
> > +
> > +  emit_move_insn (original, insv);
>
> Why can't we just remove the move and pass insv directly to
> create_input_operand?  That'll encourage the RA to allocate insv and
> d->target to the same register where possible (but will reload where
> tying is not possible).
>
> OK with that change if it works.

Yes that worked, committed as r12-5078-g52fa771758 .

Thanks,
Andrew

>
> Thanks,
> Richard
>
> >    insn_code icode = code_for_aarch64_simd_vec_copy_lane (mode);
> >    expand_operand ops[5];
> >    create_output_operand (&ops[0], d->target, mode);
> > -  create_input_operand (&ops[1], d->target, mode);
> > +  create_input_operand (&ops[1], original, mode);
> >    create_integer_operand (&ops[2], 1 << idx);
> >    create_input_operand (&ops[3], extractv, mode);
> >    create_integer_operand (&ops[4], extractindex);
> > diff --git a/gcc/testsuite/c-c++-common/torture/builtin-convertvector-2.c b/gcc/testsuite/c-c++-common/torture/builtin-convertvector-2.c
> > new file mode 100644
> > index 00000000000..d88f6a72b5c
> > --- /dev/null
> > +++ b/gcc/testsuite/c-c++-common/torture/builtin-convertvector-2.c
> > @@ -0,0 +1,26 @@
> > +/* { dg-do run } */
> > +/* PR target/101529 */
> > +
> > +typedef unsigned char __attribute__((__vector_size__ (1))) W;
> > +typedef unsigned char __attribute__((__vector_size__ (8))) V;
> > +typedef unsigned short __attribute__((__vector_size__ (16))) U;
> > +
> > +unsigned short us;
> > +
> > +/* aarch64 used to miscompile foo to just return 0. */
> > +W
> > +foo (unsigned char uc)
> > +{
> > +  V v = __builtin_convertvector ((U){ } >= us, V);
> > +  return __builtin_shufflevector ((W){ }, v, 4) & uc;
> > +}
> > +
> > +int
> > +main (void)
> > +{
> > +  W x = foo (5);
> > +  if (x[0] != 5)
> > +    __builtin_abort();
> > +  return 0;
> > +}
> > +
> > diff --git a/gcc/testsuite/c-c++-common/torture/builtin-shufflevector-2.c b/gcc/testsuite/c-c++-common/torture/builtin-shufflevector-2.c
> > new file mode 100644
> > index 00000000000..7c4999ed4e9
> > --- /dev/null
> > +++ b/gcc/testsuite/c-c++-common/torture/builtin-shufflevector-2.c
> > @@ -0,0 +1,26 @@
> > +/* { dg-do run}  */
> > +/* PR target/101529 */
> > +typedef unsigned char C;
> > +typedef unsigned char __attribute__((__vector_size__ (8))) V;
> > +typedef unsigned char __attribute__((__vector_size__ (32))) U;
> > +
> > +C c;
> > +
> > +/* aarch64 used to miscompile foo to just return a vector of 0s */
> > +V
> > +foo (V v)
> > +{
> > +  v |= __builtin_shufflevector (c * v, (U) (0 == (U){ }),
> > +                             0, 1, 8, 32, 8, 20, 36, 36);
> > +  return v;
> > +}
> > +
> > +int
> > +main (void)
> > +{
> > +  V v = foo ((V) { });
> > +  for (unsigned i = 0; i < sizeof (v); i++)
> > +    if (v[i] != (i >= 2 ? 0xff : 0))
> > +      __builtin_abort ();
> > +  return 0;
> > +}

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] aarch64: [PR101529] Fix vector shuffle insertion expansion
  2021-11-06 19:31 apinski
@ 2021-11-09 14:51 ` Richard Sandiford
  2021-11-10  4:23   ` Andrew Pinski
  0 siblings, 1 reply; 5+ messages in thread
From: Richard Sandiford @ 2021-11-09 14:51 UTC (permalink / raw)
  To: apinski--- via Gcc-patches; +Cc: apinski

apinski--- via Gcc-patches <gcc-patches@gcc.gnu.org> writes:
> From: Andrew Pinski <apinski@marvell.com>
>
> The function aarch64_evpc_ins would reuse the target even though
> it might be the same register as the two inputs.
> Instead of checking to see if we can reuse the target, creating
> a new register always is better.
>
> OK? Bootstrapped and tested on aarch64-linux-gnu with no regressions.
>
> 	PR target/101529
>
> gcc/ChangeLog:
>
> 	* config/aarch64/aarch64.c (aarch64_evpc_ins): Don't use target
> 	as an input instead create a new reg.
>
> gcc/testsuite/ChangeLog:
>
> 	* c-c++-common/torture/builtin-convertvector-2.c: New test.
> 	* c-c++-common/torture/builtin-shufflevector-2.c: New test.
> ---
>  gcc/config/aarch64/aarch64.c                  |  8 ++++--
>  .../torture/builtin-convertvector-2.c         | 26 +++++++++++++++++++
>  .../torture/builtin-shufflevector-2.c         | 26 +++++++++++++++++++
>  3 files changed, 58 insertions(+), 2 deletions(-)
>  create mode 100644 gcc/testsuite/c-c++-common/torture/builtin-convertvector-2.c
>  create mode 100644 gcc/testsuite/c-c++-common/torture/builtin-shufflevector-2.c
>
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index 2c00583e12c..e4fc546fae7 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -23084,11 +23084,15 @@ aarch64_evpc_ins (struct expand_vec_perm_d *d)
>      }
>    gcc_assert (extractindex < nelt);
>  
> -  emit_move_insn (d->target, insv);
> +  /* Use a new reg instead of target as one of the
> +     operands might be target. */
> +  rtx original = gen_reg_rtx (GET_MODE (d->target));
> +
> +  emit_move_insn (original, insv);

Why can't we just remove the move and pass insv directly to
create_input_operand?  That'll encourage the RA to allocate insv and
d->target to the same register where possible (but will reload where
tying is not possible).

OK with that change if it works.

Thanks,
Richard

>    insn_code icode = code_for_aarch64_simd_vec_copy_lane (mode);
>    expand_operand ops[5];
>    create_output_operand (&ops[0], d->target, mode);
> -  create_input_operand (&ops[1], d->target, mode);
> +  create_input_operand (&ops[1], original, mode);
>    create_integer_operand (&ops[2], 1 << idx);
>    create_input_operand (&ops[3], extractv, mode);
>    create_integer_operand (&ops[4], extractindex);
> diff --git a/gcc/testsuite/c-c++-common/torture/builtin-convertvector-2.c b/gcc/testsuite/c-c++-common/torture/builtin-convertvector-2.c
> new file mode 100644
> index 00000000000..d88f6a72b5c
> --- /dev/null
> +++ b/gcc/testsuite/c-c++-common/torture/builtin-convertvector-2.c
> @@ -0,0 +1,26 @@
> +/* { dg-do run } */
> +/* PR target/101529 */
> +
> +typedef unsigned char __attribute__((__vector_size__ (1))) W;
> +typedef unsigned char __attribute__((__vector_size__ (8))) V;
> +typedef unsigned short __attribute__((__vector_size__ (16))) U;
> +
> +unsigned short us;
> +
> +/* aarch64 used to miscompile foo to just return 0. */
> +W
> +foo (unsigned char uc)
> +{
> +  V v = __builtin_convertvector ((U){ } >= us, V);
> +  return __builtin_shufflevector ((W){ }, v, 4) & uc;
> +}
> +
> +int
> +main (void)
> +{
> +  W x = foo (5);
> +  if (x[0] != 5)
> +    __builtin_abort();
> +  return 0;
> +}
> +
> diff --git a/gcc/testsuite/c-c++-common/torture/builtin-shufflevector-2.c b/gcc/testsuite/c-c++-common/torture/builtin-shufflevector-2.c
> new file mode 100644
> index 00000000000..7c4999ed4e9
> --- /dev/null
> +++ b/gcc/testsuite/c-c++-common/torture/builtin-shufflevector-2.c
> @@ -0,0 +1,26 @@
> +/* { dg-do run}  */
> +/* PR target/101529 */
> +typedef unsigned char C;
> +typedef unsigned char __attribute__((__vector_size__ (8))) V;
> +typedef unsigned char __attribute__((__vector_size__ (32))) U;
> +
> +C c;
> +
> +/* aarch64 used to miscompile foo to just return a vector of 0s */
> +V
> +foo (V v)
> +{
> +  v |= __builtin_shufflevector (c * v, (U) (0 == (U){ }),
> +				0, 1, 8, 32, 8, 20, 36, 36);
> +  return v;
> +}
> +
> +int
> +main (void)
> +{
> +  V v = foo ((V) { });
> +  for (unsigned i = 0; i < sizeof (v); i++)
> +    if (v[i] != (i >= 2 ? 0xff : 0))
> +      __builtin_abort ();
> +  return 0;
> +}

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH] aarch64: [PR101529] Fix vector shuffle insertion expansion
@ 2021-11-06 19:31 apinski
  2021-11-09 14:51 ` Richard Sandiford
  0 siblings, 1 reply; 5+ messages in thread
From: apinski @ 2021-11-06 19:31 UTC (permalink / raw)
  To: gcc-patches; +Cc: Andrew Pinski

From: Andrew Pinski <apinski@marvell.com>

The function aarch64_evpc_ins would reuse the target even though
it might be the same register as the two inputs.
Instead of checking to see if we can reuse the target, creating
a new register always is better.

OK? Bootstrapped and tested on aarch64-linux-gnu with no regressions.

	PR target/101529

gcc/ChangeLog:

	* config/aarch64/aarch64.c (aarch64_evpc_ins): Don't use target
	as an input instead create a new reg.

gcc/testsuite/ChangeLog:

	* c-c++-common/torture/builtin-convertvector-2.c: New test.
	* c-c++-common/torture/builtin-shufflevector-2.c: New test.
---
 gcc/config/aarch64/aarch64.c                  |  8 ++++--
 .../torture/builtin-convertvector-2.c         | 26 +++++++++++++++++++
 .../torture/builtin-shufflevector-2.c         | 26 +++++++++++++++++++
 3 files changed, 58 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/c-c++-common/torture/builtin-convertvector-2.c
 create mode 100644 gcc/testsuite/c-c++-common/torture/builtin-shufflevector-2.c

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 2c00583e12c..e4fc546fae7 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -23084,11 +23084,15 @@ aarch64_evpc_ins (struct expand_vec_perm_d *d)
     }
   gcc_assert (extractindex < nelt);
 
-  emit_move_insn (d->target, insv);
+  /* Use a new reg instead of target as one of the
+     operands might be target. */
+  rtx original = gen_reg_rtx (GET_MODE (d->target));
+
+  emit_move_insn (original, insv);
   insn_code icode = code_for_aarch64_simd_vec_copy_lane (mode);
   expand_operand ops[5];
   create_output_operand (&ops[0], d->target, mode);
-  create_input_operand (&ops[1], d->target, mode);
+  create_input_operand (&ops[1], original, mode);
   create_integer_operand (&ops[2], 1 << idx);
   create_input_operand (&ops[3], extractv, mode);
   create_integer_operand (&ops[4], extractindex);
diff --git a/gcc/testsuite/c-c++-common/torture/builtin-convertvector-2.c b/gcc/testsuite/c-c++-common/torture/builtin-convertvector-2.c
new file mode 100644
index 00000000000..d88f6a72b5c
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/torture/builtin-convertvector-2.c
@@ -0,0 +1,26 @@
+/* { dg-do run } */
+/* PR target/101529 */
+
+typedef unsigned char __attribute__((__vector_size__ (1))) W;
+typedef unsigned char __attribute__((__vector_size__ (8))) V;
+typedef unsigned short __attribute__((__vector_size__ (16))) U;
+
+unsigned short us;
+
+/* aarch64 used to miscompile foo to just return 0. */
+W
+foo (unsigned char uc)
+{
+  V v = __builtin_convertvector ((U){ } >= us, V);
+  return __builtin_shufflevector ((W){ }, v, 4) & uc;
+}
+
+int
+main (void)
+{
+  W x = foo (5);
+  if (x[0] != 5)
+    __builtin_abort();
+  return 0;
+}
+
diff --git a/gcc/testsuite/c-c++-common/torture/builtin-shufflevector-2.c b/gcc/testsuite/c-c++-common/torture/builtin-shufflevector-2.c
new file mode 100644
index 00000000000..7c4999ed4e9
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/torture/builtin-shufflevector-2.c
@@ -0,0 +1,26 @@
+/* { dg-do run}  */
+/* PR target/101529 */
+typedef unsigned char C;
+typedef unsigned char __attribute__((__vector_size__ (8))) V;
+typedef unsigned char __attribute__((__vector_size__ (32))) U;
+
+C c;
+
+/* aarch64 used to miscompile foo to just return a vector of 0s */
+V
+foo (V v)
+{
+  v |= __builtin_shufflevector (c * v, (U) (0 == (U){ }),
+				0, 1, 8, 32, 8, 20, 36, 36);
+  return v;
+}
+
+int
+main (void)
+{
+  V v = foo ((V) { });
+  for (unsigned i = 0; i < sizeof (v); i++)
+    if (v[i] != (i >= 2 ? 0xff : 0))
+      __builtin_abort ();
+  return 0;
+}
-- 
2.17.1


^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2022-01-26 17:50 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-11-10  4:06 [PATCH] aarch64: [PR101529] Fix vector shuffle insertion expansion apinski
  -- strict thread matches above, loose matches on Subject: below --
2022-01-26 17:50 apinski
2021-11-06 19:31 apinski
2021-11-09 14:51 ` Richard Sandiford
2021-11-10  4:23   ` Andrew Pinski

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).