[PATCH, AArch64] PR target/101609 - Use the correct iterator for AArch64 vector right shift pattern.

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

* [PATCH, AArch64] PR target/101609 - Use the correct iterator for AArch64 vector right shift pattern.
@ 2021-08-05 11:18 Tejas Belagod
  2021-08-06 13:07 ` Richard Sandiford
  0 siblings, 1 reply; 2+ messages in thread
From: Tejas Belagod @ 2021-08-05 11:18 UTC (permalink / raw)
  To: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 686 bytes --]

Hi,

Loops containing long long shifts fail to vectorize due to the vectorizer
not being able to recognize long long right shifts. This is due to a bug
in the iterator used for the vashr and vlshr patterns in aarch64-simd.md.

Tested and bootstrapped on aarch64-linux. OK?

2021-08-05  Tejas Belagod  <tejas.belagod@arm.com>

gcc/ChangeLog:

            PR target/101609
            * config/aarch64/aarch64-simd.md (vlshr<mode>3, vashr<mode>3): Use
              the right iterator.

gcc/testsuite/ChangeLog:

            * gcc.target/aarch64/vect-shr-reg.c: New testcase.
            * gcc.target/aarch64/vect-shr-reg-run.c: Likewise.


Thanks,
Tejas Belagod.

[-- Attachment #2: shr.txt --]
[-- Type: text/plain, Size: 3431 bytes --]

diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index c5638d096fa84a27b4ea397f62cd0d05a28e7c8c..48eddf64e05afe3788abfa05141f6544a9323ea1 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1299,13 +1299,10 @@ (define_expand "vashl<mode>3"
   DONE;
 })
 
-;; Using mode VDQ_BHSI as there is no V2DImode neg!
-;; Negating individual lanes most certainly offsets the
-;; gain from vectorization.
 (define_expand "vashr<mode>3"
- [(match_operand:VDQ_BHSI 0 "register_operand")
-  (match_operand:VDQ_BHSI 1 "register_operand")
-  (match_operand:VDQ_BHSI 2 "register_operand")]
+ [(match_operand:VDQ_I 0 "register_operand")
+  (match_operand:VDQ_I 1 "register_operand")
+  (match_operand:VDQ_I 2 "register_operand")]
  "TARGET_SIMD"
 {
   rtx neg = gen_reg_rtx (<MODE>mode);
@@ -1333,9 +1330,9 @@ (define_expand "aarch64_ashr_simddi"
 )
 
 (define_expand "vlshr<mode>3"
- [(match_operand:VDQ_BHSI 0 "register_operand")
-  (match_operand:VDQ_BHSI 1 "register_operand")
-  (match_operand:VDQ_BHSI 2 "register_operand")]
+ [(match_operand:VDQ_I 0 "register_operand")
+  (match_operand:VDQ_I 1 "register_operand")
+  (match_operand:VDQ_I 2 "register_operand")]
  "TARGET_SIMD"
 {
   rtx neg = gen_reg_rtx (<MODE>mode);
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-shr-reg-run.c b/gcc/testsuite/gcc.target/aarch64/vect-shr-reg-run.c
new file mode 100644
index 0000000000000000000000000000000000000000..3190448e0936b9d5265f538304f9d20f13927339
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vect-shr-reg-run.c
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -march=armv8.2-a" } */
+
+#include "vect-shr-reg.c"
+
+int
+main(void)
+{
+  int64_t a[16];
+  int64_t b[16];
+  int64_t c[17];
+
+  uint64_t ua[16];
+  uint64_t ub[16];
+  uint64_t uc[17];
+
+  int64_t res_a[16];
+  uint64_t res_ua[16];
+
+  int i;
+
+  /* Set up inputs.  */
+  for (i = 0; i < 16; i++)
+    {
+      b[i] = -2;
+      c[i] = 34;
+      ub[i] = 0xffffffffffffffff;
+      uc[i] = 52;
+    }
+
+  /* Set up reference values.  */
+  for (i = 0; i < 16; i++)
+    {
+      res_a[i] = -1LL;
+      res_ua[i] = 0x0fffLL;
+    }
+
+  /* Do the shifts.  */
+  f (ua, ub, uc);
+  g (a, b, c);
+
+  /* Compare outputs against reference values.  */
+  for (i = 0; i < 16; i++)
+    {
+      if (a[i] != res_a[i])
+	__builtin_abort ();
+
+      if (ua[i] != res_ua[i])
+	__builtin_abort ();
+    }
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-shr-reg.c b/gcc/testsuite/gcc.target/aarch64/vect-shr-reg.c
new file mode 100644
index 0000000000000000000000000000000000000000..5736dafb5a19957032e7b4bc1e90b218f52788fb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vect-shr-reg.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8.2-a" } */
+
+#include <stdint.h>
+#include <stdio.h>
+
+#pragma GCC target "+nosve"
+
+int __attribute__((noinline))
+f(uint64_t *__restrict a, uint64_t *__restrict b, uint64_t *__restrict c)
+{
+  int i;
+
+  for (i = 0; i < 16; i++)
+    a[i] = b[i] >> c[i];
+}
+
+
+int __attribute__((noinline))
+g(int64_t *__restrict a, int64_t *__restrict b, int64_t *__restrict c)
+{
+  int i;
+
+  for (i = 0; i < 16; i++)
+    a[i] = b[i] >> c[i];
+}
+
+/* { dg-final { scan-assembler "neg\\tv" } } */
+/* { dg-final { scan-assembler "ushl\\tv" } } */
+/* { dg-final { scan-assembler "sshl\\tv" } } */

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH, AArch64] PR target/101609 - Use the correct iterator for AArch64 vector right shift pattern.
  2021-08-05 11:18 [PATCH, AArch64] PR target/101609 - Use the correct iterator for AArch64 vector right shift pattern Tejas Belagod
@ 2021-08-06 13:07 ` Richard Sandiford
  0 siblings, 0 replies; 2+ messages in thread
From: Richard Sandiford @ 2021-08-06 13:07 UTC (permalink / raw)
  To: Tejas Belagod via Gcc-patches; +Cc: Tejas Belagod

Tejas Belagod via Gcc-patches <gcc-patches@gcc.gnu.org> writes:
> Hi,
>
> Loops containing long long shifts fail to vectorize due to the vectorizer
> not being able to recognize long long right shifts. This is due to a bug
> in the iterator used for the vashr and vlshr patterns in aarch64-simd.md.
>
> Tested and bootstrapped on aarch64-linux. OK?
>
> 2021-08-05  Tejas Belagod  <tejas.belagod@arm.com>
>
> gcc/ChangeLog:
>
>             PR target/101609
>             * config/aarch64/aarch64-simd.md (vlshr<mode>3, vashr<mode>3): Use
>               the right iterator.
>
> gcc/testsuite/ChangeLog:
>
>             * gcc.target/aarch64/vect-shr-reg.c: New testcase.
>             * gcc.target/aarch64/vect-shr-reg-run.c: Likewise.

OK, thanks.  Nice how we're still finding these little easter eggs
from the dawn of the port. :-)

Richard

>
>
> Thanks,
> Tejas Belagod.
>
> diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
> index c5638d096fa84a27b4ea397f62cd0d05a28e7c8c..48eddf64e05afe3788abfa05141f6544a9323ea1 100644
> --- a/gcc/config/aarch64/aarch64-simd.md
> +++ b/gcc/config/aarch64/aarch64-simd.md
> @@ -1299,13 +1299,10 @@ (define_expand "vashl<mode>3"
>    DONE;
>  })
>  
> -;; Using mode VDQ_BHSI as there is no V2DImode neg!
> -;; Negating individual lanes most certainly offsets the
> -;; gain from vectorization.
>  (define_expand "vashr<mode>3"
> - [(match_operand:VDQ_BHSI 0 "register_operand")
> -  (match_operand:VDQ_BHSI 1 "register_operand")
> -  (match_operand:VDQ_BHSI 2 "register_operand")]
> + [(match_operand:VDQ_I 0 "register_operand")
> +  (match_operand:VDQ_I 1 "register_operand")
> +  (match_operand:VDQ_I 2 "register_operand")]
>   "TARGET_SIMD"
>  {
>    rtx neg = gen_reg_rtx (<MODE>mode);
> @@ -1333,9 +1330,9 @@ (define_expand "aarch64_ashr_simddi"
>  )
>  
>  (define_expand "vlshr<mode>3"
> - [(match_operand:VDQ_BHSI 0 "register_operand")
> -  (match_operand:VDQ_BHSI 1 "register_operand")
> -  (match_operand:VDQ_BHSI 2 "register_operand")]
> + [(match_operand:VDQ_I 0 "register_operand")
> +  (match_operand:VDQ_I 1 "register_operand")
> +  (match_operand:VDQ_I 2 "register_operand")]
>   "TARGET_SIMD"
>  {
>    rtx neg = gen_reg_rtx (<MODE>mode);
> diff --git a/gcc/testsuite/gcc.target/aarch64/vect-shr-reg-run.c b/gcc/testsuite/gcc.target/aarch64/vect-shr-reg-run.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..3190448e0936b9d5265f538304f9d20f13927339
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/vect-shr-reg-run.c
> @@ -0,0 +1,53 @@
> +/* { dg-do run } */
> +/* { dg-options "-O3 -march=armv8.2-a" } */
> +
> +#include "vect-shr-reg.c"
> +
> +int
> +main(void)
> +{
> +  int64_t a[16];
> +  int64_t b[16];
> +  int64_t c[17];
> +
> +  uint64_t ua[16];
> +  uint64_t ub[16];
> +  uint64_t uc[17];
> +
> +  int64_t res_a[16];
> +  uint64_t res_ua[16];
> +
> +  int i;
> +
> +  /* Set up inputs.  */
> +  for (i = 0; i < 16; i++)
> +    {
> +      b[i] = -2;
> +      c[i] = 34;
> +      ub[i] = 0xffffffffffffffff;
> +      uc[i] = 52;
> +    }
> +
> +  /* Set up reference values.  */
> +  for (i = 0; i < 16; i++)
> +    {
> +      res_a[i] = -1LL;
> +      res_ua[i] = 0x0fffLL;
> +    }
> +
> +  /* Do the shifts.  */
> +  f (ua, ub, uc);
> +  g (a, b, c);
> +
> +  /* Compare outputs against reference values.  */
> +  for (i = 0; i < 16; i++)
> +    {
> +      if (a[i] != res_a[i])
> +	__builtin_abort ();
> +
> +      if (ua[i] != res_ua[i])
> +	__builtin_abort ();
> +    }
> +
> +  return 0;
> +}
> diff --git a/gcc/testsuite/gcc.target/aarch64/vect-shr-reg.c b/gcc/testsuite/gcc.target/aarch64/vect-shr-reg.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..5736dafb5a19957032e7b4bc1e90b218f52788fb
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/vect-shr-reg.c
> @@ -0,0 +1,30 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -march=armv8.2-a" } */
> +
> +#include <stdint.h>
> +#include <stdio.h>
> +
> +#pragma GCC target "+nosve"
> +
> +int __attribute__((noinline))
> +f(uint64_t *__restrict a, uint64_t *__restrict b, uint64_t *__restrict c)
> +{
> +  int i;
> +
> +  for (i = 0; i < 16; i++)
> +    a[i] = b[i] >> c[i];
> +}
> +
> +
> +int __attribute__((noinline))
> +g(int64_t *__restrict a, int64_t *__restrict b, int64_t *__restrict c)
> +{
> +  int i;
> +
> +  for (i = 0; i < 16; i++)
> +    a[i] = b[i] >> c[i];
> +}
> +
> +/* { dg-final { scan-assembler "neg\\tv" } } */
> +/* { dg-final { scan-assembler "ushl\\tv" } } */
> +/* { dg-final { scan-assembler "sshl\\tv" } } */

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2021-08-06 13:07 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-08-05 11:18 [PATCH, AArch64] PR target/101609 - Use the correct iterator for AArch64 vector right shift pattern Tejas Belagod
2021-08-06 13:07 ` Richard Sandiford

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).