public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] Fix peeling for alignment with negative step
@ 2021-09-29 11:46 Richard Biener
  0 siblings, 0 replies; only message in thread
From: Richard Biener @ 2021-09-29 11:46 UTC (permalink / raw)
  To: gcc-patches

The following fixes a regression causing us to no longer peel
negative step loops for alignment.  With dr_misalignment now
applying the bias for negative step we have to do the reverse
when adjusting the misalignment for peeled DRs.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

2021-09-29  Richard Biener  <rguenther@suse.de>

	* tree-vect-data-refs.c (vect_dr_misalign_for_aligned_access):
	New helper.
	(vect_update_misalignment_for_peel): Use it to update
	misaligned to the value necessary for an aligned access.
	(vect_get_peeling_costs_all_drs): Likewise.
	(vect_enhance_data_refs_alignment): Likewise.

	* gcc.target/i386/vect-alignment-peeling-1.c: New testcase.
	* gcc.target/i386/vect-alignment-peeling-2.c: Likewise.
---
 .../i386/vect-alignment-peeling-1.c           | 90 +++++++++++++++++++
 .../i386/vect-alignment-peeling-2.c           | 90 +++++++++++++++++++
 gcc/tree-vect-data-refs.c                     | 39 ++++++--
 3 files changed, 213 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/vect-alignment-peeling-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/vect-alignment-peeling-2.c

diff --git a/gcc/testsuite/gcc.target/i386/vect-alignment-peeling-1.c b/gcc/testsuite/gcc.target/i386/vect-alignment-peeling-1.c
new file mode 100644
index 00000000000..4aa536ba86c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-alignment-peeling-1.c
@@ -0,0 +1,90 @@
+/* { dg-do run { target lp64 } } */
+/* This is a test exercising peeling for alignment for a negative step
+   vector loop.  We're forcing atom tuning here because that has a higher
+   unaligned vs aligned cost unlike most other archs.  */
+/* { dg-options "-O3 -march=x86-64 -mtune=atom -fdump-tree-vect-details -save-temps" } */
+
+float a[1024], b[1024];
+
+void __attribute__((noipa)) foo1 ()
+{
+  for (int i = 507; i > 1; --i)
+    a[i] = b[i] * 2.;
+}
+void __attribute__((noipa)) foo2 ()
+{
+  for (int i = 506; i > 1; --i)
+    a[i] = b[i] * 2.;
+}
+void __attribute__((noipa)) foo3 ()
+{
+  for (int i = 505; i > 1; --i)
+    a[i] = b[i] * 2.;
+}
+void __attribute__((noipa)) foo4 ()
+{
+  for (int i = 504; i > 1; --i)
+    a[i] = b[i] * 2.;
+}
+void __attribute__((noipa)) foo5 (int start)
+{
+  for (int i = start; i > 1; --i)
+    a[i] = b[i] * 2.;
+}
+
+int main()
+{
+  for (int i = 2; i < 508; ++i)
+    {
+      __asm__ volatile ("" : : : "memory");
+      b[i] = i;
+    }
+  foo1 ();
+  for (int i = 2; i < 508; ++i)
+    if (a[i] != 2*i)
+      __builtin_abort ();
+
+  for (int i = 2; i < 507; ++i)
+    {
+      __asm__ volatile ("" : : : "memory");
+      b[i] = i;
+    }
+  foo2 ();
+  for (int i = 2; i < 507; ++i)
+    if (a[i] != 2*i)
+      __builtin_abort ();
+
+  for (int i = 2; i < 506; ++i)
+    {
+      __asm__ volatile ("" : : : "memory");
+      b[i] = i;
+    }
+  foo3 ();
+  for (int i = 2; i < 506; ++i)
+    if (a[i] != 2*i)
+      __builtin_abort ();
+
+  for (int i = 2; i < 505; ++i)
+    {
+      __asm__ volatile ("" : : : "memory");
+      b[i] = i;
+    }
+  foo4 ();
+  for (int i = 2; i < 505; ++i)
+    if (a[i] != 2*i)
+      __builtin_abort ();
+
+  for (int i = 2; i < 506; ++i)
+    {
+      __asm__ volatile ("" : : : "memory");
+      b[i] = i;
+    }
+  foo5 (505);
+  for (int i = 2; i < 506; ++i)
+    if (a[i] != 2*i)
+      __builtin_abort ();
+}
+
+/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 4 "vect" } } */ 
+/* Verify all vector accesses are emitted as aligned.  */
+/* { dg-final { scan-assembler-not "movup" } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-alignment-peeling-2.c b/gcc/testsuite/gcc.target/i386/vect-alignment-peeling-2.c
new file mode 100644
index 00000000000..834bf0f770d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-alignment-peeling-2.c
@@ -0,0 +1,90 @@
+/* { dg-do run { target lp64 } } */
+/* This is a test exercising peeling for alignment for a positive step
+   vector loop.  We're forcing atom tuning here because that has a higher
+   unaligned vs aligned cost unlike most other archs.  */
+/* { dg-options "-O3 -march=x86-64 -mtune=atom -fdump-tree-vect-details -save-temps" } */
+
+float a[1024], b[1024];
+
+void __attribute__((noipa)) foo1 ()
+{
+  for (int i = 2; i < 508; ++i)
+    a[i] = b[i] * 2.;
+}
+void __attribute__((noipa)) foo2 ()
+{
+  for (int i = 3; i < 508; ++i)
+    a[i] = b[i] * 2.;
+}
+void __attribute__((noipa)) foo3 ()
+{
+  for (int i = 4; i < 508; ++i)
+    a[i] = b[i] * 2.;
+}
+void __attribute__((noipa)) foo4 ()
+{
+  for (int i = 5; i < 508; ++i)
+    a[i] = b[i] * 2.;
+}
+void __attribute__((noipa)) foo5 (int start)
+{
+  for (int i = start; i < 508; ++i)
+    a[i] = b[i] * 2.;
+}
+
+int main()
+{
+  for (int i = 2; i < 508; ++i)
+    {
+      __asm__ volatile ("" : : : "memory");
+      b[i] = i;
+    }
+  foo1 ();
+  for (int i = 2; i < 508; ++i)
+    if (a[i] != 2*i)
+      __builtin_abort ();
+
+  for (int i = 3; i < 508; ++i)
+    {
+      __asm__ volatile ("" : : : "memory");
+      b[i] = i;
+    }
+  foo2 ();
+  for (int i = 3; i < 508; ++i)
+    if (a[i] != 2*i)
+      __builtin_abort ();
+
+  for (int i = 4; i < 508; ++i)
+    {
+      __asm__ volatile ("" : : : "memory");
+      b[i] = i;
+    }
+  foo3 ();
+  for (int i = 4; i < 508; ++i)
+    if (a[i] != 2*i)
+      __builtin_abort ();
+
+  for (int i = 5; i < 508; ++i)
+    {
+      __asm__ volatile ("" : : : "memory");
+      b[i] = i;
+    }
+  foo4 ();
+  for (int i = 5; i < 508; ++i)
+    if (a[i] != 2*i)
+      __builtin_abort ();
+
+  for (int i = 3; i < 508; ++i)
+    {
+      __asm__ volatile ("" : : : "memory");
+      b[i] = i;
+    }
+  foo5 (3);
+  for (int i = 3; i < 508; ++i)
+    if (a[i] != 2*i)
+      __builtin_abort ();
+}
+
+/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 4 "vect" } } */ 
+/* Verify all vector accesses are emitted as aligned.  */
+/* { dg-final { scan-assembler-not "movup" } } */
diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
index a0366fddbf3..1c6fc4a8f0f 100644
--- a/gcc/tree-vect-data-refs.c
+++ b/gcc/tree-vect-data-refs.c
@@ -1214,6 +1214,29 @@ vect_dr_aligned_if_peeled_dr_is (dr_vec_info *dr_info,
   return vect_dr_aligned_if_related_peeled_dr_is (dr_info, dr_peel_info);
 }
 
+/* Compute the value for dr_info->misalign so that the access appears
+   aligned.  This is used by peeling to compensate for dr_misalignment
+   applying the offset for negative step.  */
+
+int
+vect_dr_misalign_for_aligned_access (dr_vec_info *dr_info)
+{
+  if (tree_int_cst_sgn (DR_STEP (dr_info->dr)) >= 0)
+    return 0;
+
+  tree vectype = STMT_VINFO_VECTYPE (dr_info->stmt);
+  poly_int64 misalignment
+    = ((TYPE_VECTOR_SUBPARTS (vectype) - 1)
+       * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
+
+  unsigned HOST_WIDE_INT target_alignment_c;
+  int misalign;
+  if (!dr_info->target_alignment.is_constant (&target_alignment_c)
+      || !known_misalignment (misalignment, target_alignment_c, &misalign))
+    return DR_MISALIGNMENT_UNKNOWN;
+  return misalign;
+}
+
 /* Function vect_update_misalignment_for_peel.
    Sets DR_INFO's misalignment
    - to 0 if it has the same alignment as DR_PEEL_INFO,
@@ -1233,7 +1256,8 @@ vect_update_misalignment_for_peel (dr_vec_info *dr_info,
   /* If dr_info is aligned of dr_peel_info is, then mark it so.  */
   if (vect_dr_aligned_if_peeled_dr_is (dr_info, dr_peel_info))
     {
-      SET_DR_MISALIGNMENT (dr_info, 0);
+      SET_DR_MISALIGNMENT (dr_info,
+			   vect_dr_misalign_for_aligned_access (dr_peel_info));
       return;
     }
 
@@ -1241,9 +1265,9 @@ vect_update_misalignment_for_peel (dr_vec_info *dr_info,
   tree vectype = STMT_VINFO_VECTYPE (dr_info->stmt);
   if (DR_TARGET_ALIGNMENT (dr_info).is_constant (&alignment)
       && known_alignment_for_access_p (dr_info, vectype)
-      && known_alignment_for_access_p (dr_peel_info, vectype))
+      && npeel != -1)
     {
-      int misal = dr_misalignment (dr_info, vectype);
+      int misal = dr_info->misalignment;
       misal += npeel * TREE_INT_CST_LOW (DR_STEP (dr_info->dr));
       misal &= alignment - 1;
       set_dr_misalignment (dr_info, misal);
@@ -1516,7 +1540,8 @@ vect_get_peeling_costs_all_drs (loop_vec_info loop_vinfo,
       if (npeel == 0)
 	;
       else if (unknown_misalignment && dr_info == dr0_info)
-	SET_DR_MISALIGNMENT (dr_info, 0);
+	SET_DR_MISALIGNMENT (dr_info,
+			     vect_dr_misalign_for_aligned_access (dr0_info));
       else
 	vect_update_misalignment_for_peel (dr_info, dr0_info, npeel);
       vect_get_data_access_cost (loop_vinfo, dr_info, inside_cost, outside_cost,
@@ -2278,7 +2303,8 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
             LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel;
           else
 	    LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) = -1;
-	  SET_DR_MISALIGNMENT (dr0_info, 0);
+	  SET_DR_MISALIGNMENT (dr0_info,
+			       vect_dr_misalign_for_aligned_access (dr0_info));
 	  if (dump_enabled_p ())
             {
               dump_printf_loc (MSG_NOTE, vect_location,
@@ -2402,7 +2428,8 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
       FOR_EACH_VEC_ELT (may_misalign_stmts, i, stmt_info)
         {
 	  dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
-	  SET_DR_MISALIGNMENT (dr_info, 0);
+	  SET_DR_MISALIGNMENT (dr_info,
+			       vect_dr_misalign_for_aligned_access (dr_info));
 	  if (dump_enabled_p ())
             dump_printf_loc (MSG_NOTE, vect_location,
                              "Alignment of access forced using versioning.\n");
-- 
2.31.1

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2021-09-29 11:46 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-09-29 11:46 [PATCH] Fix peeling for alignment with negative step Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).