public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* Fix PEELING_FOR_NITERS calculation (PR 87288)
@ 2018-09-20 12:31 Richard Sandiford
  2018-09-20 12:51 ` Richard Biener
  0 siblings, 1 reply; 2+ messages in thread
From: Richard Sandiford @ 2018-09-20 12:31 UTC (permalink / raw)
  To: gcc-patches

PEELING_FOR_GAPS now means "peel one iteration for the epilogue",
in much the same way that PEELING_FOR_ALIGNMENT > 0 means
"peel that number of iterations for the prologue".  We weren't
taking this into account when deciding whether we needed to peel
further scalar iterations beyond the iterations for "gaps" and
"alignment".

Only the first test failed before the patch.  The other two
are just for completeness.

Tested on aarch64-linux-gnu (with and without SVE), aarch64_be-elf
and x86_64-linux-gnu.  OK to install?

Richard


2018-09-20  Richard Sandiford  <richard.sandiford@arm.com>

gcc/
	PR tree-optimization/87288
	* tree-vect-loop.c (vect_analyze_loop_2): Take PEELING_FOR_GAPS
	into account when determining PEELING_FOR_NITERS.

gcc/testsuite/
	PR tree-optimization/87288
	* gcc.dg/vect/pr87288-1.c: New test.
	* gcc.dg/vect/pr87288-2.c: Likewise,
	* gcc.dg/vect/pr87288-3.c: Likewise.

Index: gcc/tree-vect-loop.c
===================================================================
--- gcc/tree-vect-loop.c	2018-09-20 12:39:14.541555902 +0100
+++ gcc/tree-vect-loop.c	2018-09-20 12:39:19.013518199 +0100
@@ -2074,14 +2074,22 @@ vect_analyze_loop_2 (loop_vec_info loop_
     /* The main loop handles all iterations.  */
     LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = false;
   else if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
-	   && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
+	   && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) >= 0)
     {
-      if (!multiple_p (LOOP_VINFO_INT_NITERS (loop_vinfo)
-		       - LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo),
+      /* Work out the (constant) number of iterations that need to be
+	 peeled for reasons other than niters.  */
+      unsigned int peel_niter = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
+      if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
+	peel_niter += 1;
+      if (!multiple_p (LOOP_VINFO_INT_NITERS (loop_vinfo) - peel_niter,
 		       LOOP_VINFO_VECT_FACTOR (loop_vinfo)))
 	LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true;
     }
   else if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
+	   /* ??? When peeling for gaps but not alignment, we could
+	      try to check whether the (variable) niters is known to be
+	      VF * N + 1.  That's something of a niche case though.  */
+	   || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
 	   || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&const_vf)
 	   || ((tree_ctz (LOOP_VINFO_NITERS (loop_vinfo))
 		< (unsigned) exact_log2 (const_vf))
Index: gcc/testsuite/gcc.dg/vect/pr87288-1.c
===================================================================
--- /dev/null	2018-09-14 11:16:31.122530289 +0100
+++ gcc/testsuite/gcc.dg/vect/pr87288-1.c	2018-09-20 12:39:19.009518233 +0100
@@ -0,0 +1,49 @@
+#include "tree-vect.h"
+
+#define N (VECTOR_BITS / 32)
+#define MAX_COUNT 4
+
+void __attribute__ ((noipa))
+run (int *restrict a, int *restrict b, int count)
+{
+  for (int i = 0; i < count * N; ++i)
+    {
+      a[i * 2] = b[i * 2] + count;
+      a[i * 2 + 1] = count;
+    }
+}
+
+void __attribute__ ((noipa))
+check (int *restrict a, int count)
+{
+  for (int i = 0; i < count * N; ++i)
+    if (a[i * 2] != i * 41 + count || a[i * 2 + 1] != count)
+      __builtin_abort ();
+  if (a[count * 2 * N] != 999)
+    __builtin_abort ();
+}
+
+int a[N * MAX_COUNT * 2 + 1], b[N * MAX_COUNT * 2];
+
+int
+main (void)
+{
+  check_vect ();
+
+  for (int i = 0; i < N * MAX_COUNT; ++i)
+    {
+      b[i * 2] = i * 41;
+      asm volatile ("" ::: "memory");
+    }
+
+  for (int i = 0; i <= MAX_COUNT; ++i)
+    {
+      a[i * 2 * N] = 999;
+      run (a, b, i);
+      check (a, i);
+    }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times {LOOP VECTORIZED} 1 "vect" { target { { vect_int && vect_perm } && vect_element_align } } } } */
Index: gcc/testsuite/gcc.dg/vect/pr87288-2.c
===================================================================
--- /dev/null	2018-09-14 11:16:31.122530289 +0100
+++ gcc/testsuite/gcc.dg/vect/pr87288-2.c	2018-09-20 12:39:19.009518233 +0100
@@ -0,0 +1,64 @@
+#include "tree-vect.h"
+
+#define N (VECTOR_BITS / 32)
+#define MAX_COUNT 4
+
+#define RUN_COUNT(COUNT)				\
+  void __attribute__ ((noipa))				\
+  run_##COUNT (int *restrict a, int *restrict b)	\
+  {							\
+    for (int i = 0; i < N * COUNT; ++i)			\
+      {							\
+	a[i * 2] = b[i * 2] + COUNT;			\
+	a[i * 2 + 1] = COUNT;				\
+      }							\
+  }
+
+RUN_COUNT (1)
+RUN_COUNT (2)
+RUN_COUNT (3)
+RUN_COUNT (4)
+
+void __attribute__ ((noipa))
+check (int *restrict a, int count)
+{
+  for (int i = 0; i < count * N; ++i)
+    if (a[i * 2] != i * 41 + count || a[i * 2 + 1] != count)
+      __builtin_abort ();
+  if (a[count * 2 * N] != 999)
+    __builtin_abort ();
+}
+
+int a[N * MAX_COUNT * 2 + 1], b[N * MAX_COUNT * 2];
+
+int
+main (void)
+{
+  check_vect ();
+
+  for (int i = 0; i < N * MAX_COUNT; ++i)
+    {
+      b[i * 2] = i * 41;
+      asm volatile ("" ::: "memory");
+    }
+
+  a[N * 2] = 999;
+  run_1 (a, b);
+  check (a, 1);
+
+  a[N * 4] = 999;
+  run_2 (a, b);
+  check (a, 2);
+
+  a[N * 6] = 999;
+  run_3 (a, b);
+  check (a, 3);
+
+  a[N * 8] = 999;
+  run_4 (a, b);
+  check (a, 4);
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump {LOOP VECTORIZED} "vect" { target { { vect_int && vect_perm } && vect_element_align } } } } */
Index: gcc/testsuite/gcc.dg/vect/pr87288-3.c
===================================================================
--- /dev/null	2018-09-14 11:16:31.122530289 +0100
+++ gcc/testsuite/gcc.dg/vect/pr87288-3.c	2018-09-20 12:39:19.009518233 +0100
@@ -0,0 +1,64 @@
+#include "tree-vect.h"
+
+#define N (VECTOR_BITS / 32)
+#define MAX_COUNT 4
+
+#define RUN_COUNT(COUNT)				\
+  void __attribute__ ((noipa))				\
+  run_##COUNT (int *restrict a, int *restrict b)	\
+  {							\
+    for (int i = 0; i < N * COUNT + 1; ++i)		\
+      {							\
+	a[i * 2] = b[i * 2] + COUNT;			\
+	a[i * 2 + 1] = COUNT;				\
+      }							\
+  }
+
+RUN_COUNT (1)
+RUN_COUNT (2)
+RUN_COUNT (3)
+RUN_COUNT (4)
+
+void __attribute__ ((noipa))
+check (int *restrict a, int count)
+{
+  for (int i = 0; i < count * N + 1; ++i)
+    if (a[i * 2] != i * 41 + count || a[i * 2 + 1] != count)
+      __builtin_abort ();
+  if (a[count * 2 * N + 2] != 999)
+    __builtin_abort ();
+}
+
+int a[N * MAX_COUNT * 2 + 3], b[N * MAX_COUNT * 2 + 2];
+
+int
+main (void)
+{
+  check_vect ();
+
+  for (int i = 0; i < N * MAX_COUNT + 1; ++i)
+    {
+      b[i * 2] = i * 41;
+      asm volatile ("" ::: "memory");
+    }
+
+  a[N * 2 + 2] = 999;
+  run_1 (a, b);
+  check (a, 1);
+
+  a[N * 4 + 2] = 999;
+  run_2 (a, b);
+  check (a, 2);
+
+  a[N * 6 + 2] = 999;
+  run_3 (a, b);
+  check (a, 3);
+
+  a[N * 8 + 2] = 999;
+  run_4 (a, b);
+  check (a, 4);
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump {LOOP VECTORIZED} "vect" { target { { vect_int && vect_perm } && vect_element_align } } } } */

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: Fix PEELING_FOR_NITERS calculation (PR 87288)
  2018-09-20 12:31 Fix PEELING_FOR_NITERS calculation (PR 87288) Richard Sandiford
@ 2018-09-20 12:51 ` Richard Biener
  0 siblings, 0 replies; 2+ messages in thread
From: Richard Biener @ 2018-09-20 12:51 UTC (permalink / raw)
  To: GCC Patches, Richard Sandiford

On Thu, Sep 20, 2018 at 1:44 PM Richard Sandiford
<richard.sandiford@arm.com> wrote:
>
> PEELING_FOR_GAPS now means "peel one iteration for the epilogue",
> in much the same way that PEELING_FOR_ALIGNMENT > 0 means
> "peel that number of iterations for the prologue".  We weren't
> taking this into account when deciding whether we needed to peel
> further scalar iterations beyond the iterations for "gaps" and
> "alignment".
>
> Only the first test failed before the patch.  The other two
> are just for completeness.
>
> Tested on aarch64-linux-gnu (with and without SVE), aarch64_be-elf
> and x86_64-linux-gnu.  OK to install?

OK.

Richard.

> Richard
>
>
> 2018-09-20  Richard Sandiford  <richard.sandiford@arm.com>
>
> gcc/
>         PR tree-optimization/87288
>         * tree-vect-loop.c (vect_analyze_loop_2): Take PEELING_FOR_GAPS
>         into account when determining PEELING_FOR_NITERS.
>
> gcc/testsuite/
>         PR tree-optimization/87288
>         * gcc.dg/vect/pr87288-1.c: New test.
>         * gcc.dg/vect/pr87288-2.c: Likewise,
>         * gcc.dg/vect/pr87288-3.c: Likewise.
>
> Index: gcc/tree-vect-loop.c
> ===================================================================
> --- gcc/tree-vect-loop.c        2018-09-20 12:39:14.541555902 +0100
> +++ gcc/tree-vect-loop.c        2018-09-20 12:39:19.013518199 +0100
> @@ -2074,14 +2074,22 @@ vect_analyze_loop_2 (loop_vec_info loop_
>      /* The main loop handles all iterations.  */
>      LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = false;
>    else if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
> -          && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
> +          && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) >= 0)
>      {
> -      if (!multiple_p (LOOP_VINFO_INT_NITERS (loop_vinfo)
> -                      - LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo),
> +      /* Work out the (constant) number of iterations that need to be
> +        peeled for reasons other than niters.  */
> +      unsigned int peel_niter = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
> +      if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
> +       peel_niter += 1;
> +      if (!multiple_p (LOOP_VINFO_INT_NITERS (loop_vinfo) - peel_niter,
>                        LOOP_VINFO_VECT_FACTOR (loop_vinfo)))
>         LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true;
>      }
>    else if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
> +          /* ??? When peeling for gaps but not alignment, we could
> +             try to check whether the (variable) niters is known to be
> +             VF * N + 1.  That's something of a niche case though.  */
> +          || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
>            || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&const_vf)
>            || ((tree_ctz (LOOP_VINFO_NITERS (loop_vinfo))
>                 < (unsigned) exact_log2 (const_vf))
> Index: gcc/testsuite/gcc.dg/vect/pr87288-1.c
> ===================================================================
> --- /dev/null   2018-09-14 11:16:31.122530289 +0100
> +++ gcc/testsuite/gcc.dg/vect/pr87288-1.c       2018-09-20 12:39:19.009518233 +0100
> @@ -0,0 +1,49 @@
> +#include "tree-vect.h"
> +
> +#define N (VECTOR_BITS / 32)
> +#define MAX_COUNT 4
> +
> +void __attribute__ ((noipa))
> +run (int *restrict a, int *restrict b, int count)
> +{
> +  for (int i = 0; i < count * N; ++i)
> +    {
> +      a[i * 2] = b[i * 2] + count;
> +      a[i * 2 + 1] = count;
> +    }
> +}
> +
> +void __attribute__ ((noipa))
> +check (int *restrict a, int count)
> +{
> +  for (int i = 0; i < count * N; ++i)
> +    if (a[i * 2] != i * 41 + count || a[i * 2 + 1] != count)
> +      __builtin_abort ();
> +  if (a[count * 2 * N] != 999)
> +    __builtin_abort ();
> +}
> +
> +int a[N * MAX_COUNT * 2 + 1], b[N * MAX_COUNT * 2];
> +
> +int
> +main (void)
> +{
> +  check_vect ();
> +
> +  for (int i = 0; i < N * MAX_COUNT; ++i)
> +    {
> +      b[i * 2] = i * 41;
> +      asm volatile ("" ::: "memory");
> +    }
> +
> +  for (int i = 0; i <= MAX_COUNT; ++i)
> +    {
> +      a[i * 2 * N] = 999;
> +      run (a, b, i);
> +      check (a, i);
> +    }
> +
> +  return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-times {LOOP VECTORIZED} 1 "vect" { target { { vect_int && vect_perm } && vect_element_align } } } } */
> Index: gcc/testsuite/gcc.dg/vect/pr87288-2.c
> ===================================================================
> --- /dev/null   2018-09-14 11:16:31.122530289 +0100
> +++ gcc/testsuite/gcc.dg/vect/pr87288-2.c       2018-09-20 12:39:19.009518233 +0100
> @@ -0,0 +1,64 @@
> +#include "tree-vect.h"
> +
> +#define N (VECTOR_BITS / 32)
> +#define MAX_COUNT 4
> +
> +#define RUN_COUNT(COUNT)                               \
> +  void __attribute__ ((noipa))                         \
> +  run_##COUNT (int *restrict a, int *restrict b)       \
> +  {                                                    \
> +    for (int i = 0; i < N * COUNT; ++i)                        \
> +      {                                                        \
> +       a[i * 2] = b[i * 2] + COUNT;                    \
> +       a[i * 2 + 1] = COUNT;                           \
> +      }                                                        \
> +  }
> +
> +RUN_COUNT (1)
> +RUN_COUNT (2)
> +RUN_COUNT (3)
> +RUN_COUNT (4)
> +
> +void __attribute__ ((noipa))
> +check (int *restrict a, int count)
> +{
> +  for (int i = 0; i < count * N; ++i)
> +    if (a[i * 2] != i * 41 + count || a[i * 2 + 1] != count)
> +      __builtin_abort ();
> +  if (a[count * 2 * N] != 999)
> +    __builtin_abort ();
> +}
> +
> +int a[N * MAX_COUNT * 2 + 1], b[N * MAX_COUNT * 2];
> +
> +int
> +main (void)
> +{
> +  check_vect ();
> +
> +  for (int i = 0; i < N * MAX_COUNT; ++i)
> +    {
> +      b[i * 2] = i * 41;
> +      asm volatile ("" ::: "memory");
> +    }
> +
> +  a[N * 2] = 999;
> +  run_1 (a, b);
> +  check (a, 1);
> +
> +  a[N * 4] = 999;
> +  run_2 (a, b);
> +  check (a, 2);
> +
> +  a[N * 6] = 999;
> +  run_3 (a, b);
> +  check (a, 3);
> +
> +  a[N * 8] = 999;
> +  run_4 (a, b);
> +  check (a, 4);
> +
> +  return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump {LOOP VECTORIZED} "vect" { target { { vect_int && vect_perm } && vect_element_align } } } } */
> Index: gcc/testsuite/gcc.dg/vect/pr87288-3.c
> ===================================================================
> --- /dev/null   2018-09-14 11:16:31.122530289 +0100
> +++ gcc/testsuite/gcc.dg/vect/pr87288-3.c       2018-09-20 12:39:19.009518233 +0100
> @@ -0,0 +1,64 @@
> +#include "tree-vect.h"
> +
> +#define N (VECTOR_BITS / 32)
> +#define MAX_COUNT 4
> +
> +#define RUN_COUNT(COUNT)                               \
> +  void __attribute__ ((noipa))                         \
> +  run_##COUNT (int *restrict a, int *restrict b)       \
> +  {                                                    \
> +    for (int i = 0; i < N * COUNT + 1; ++i)            \
> +      {                                                        \
> +       a[i * 2] = b[i * 2] + COUNT;                    \
> +       a[i * 2 + 1] = COUNT;                           \
> +      }                                                        \
> +  }
> +
> +RUN_COUNT (1)
> +RUN_COUNT (2)
> +RUN_COUNT (3)
> +RUN_COUNT (4)
> +
> +void __attribute__ ((noipa))
> +check (int *restrict a, int count)
> +{
> +  for (int i = 0; i < count * N + 1; ++i)
> +    if (a[i * 2] != i * 41 + count || a[i * 2 + 1] != count)
> +      __builtin_abort ();
> +  if (a[count * 2 * N + 2] != 999)
> +    __builtin_abort ();
> +}
> +
> +int a[N * MAX_COUNT * 2 + 3], b[N * MAX_COUNT * 2 + 2];
> +
> +int
> +main (void)
> +{
> +  check_vect ();
> +
> +  for (int i = 0; i < N * MAX_COUNT + 1; ++i)
> +    {
> +      b[i * 2] = i * 41;
> +      asm volatile ("" ::: "memory");
> +    }
> +
> +  a[N * 2 + 2] = 999;
> +  run_1 (a, b);
> +  check (a, 1);
> +
> +  a[N * 4 + 2] = 999;
> +  run_2 (a, b);
> +  check (a, 2);
> +
> +  a[N * 6 + 2] = 999;
> +  run_3 (a, b);
> +  check (a, 3);
> +
> +  a[N * 8 + 2] = 999;
> +  run_4 (a, b);
> +  check (a, 4);
> +
> +  return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump {LOOP VECTORIZED} "vect" { target { { vect_int && vect_perm } && vect_element_align } } } } */

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2018-09-20 12:49 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-09-20 12:31 Fix PEELING_FOR_NITERS calculation (PR 87288) Richard Sandiford
2018-09-20 12:51 ` Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).