public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r10-8769] store-merging: Consider also overlapping stores earlier in the by bitpos sorting [PR97053]
@ 2020-09-16  7:50 Jakub Jelinek
  0 siblings, 0 replies; only message in thread
From: Jakub Jelinek @ 2020-09-16  7:50 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:7e97e7470e74b0d9a68000938a359a7049774d77

commit r10-8769-g7e97e7470e74b0d9a68000938a359a7049774d77
Author: Jakub Jelinek <jakub@redhat.com>
Date:   Wed Sep 16 09:42:33 2020 +0200

    store-merging: Consider also overlapping stores earlier in the by bitpos sorting [PR97053]
    
    As the testcases show, if we have something like:
      MEM <char[12]> [&b + 8B] = {};
      MEM[(short *) &b] = 5;
      _5 = *x_4(D);
      MEM <long long unsigned int> [&b + 2B] = _5;
      MEM[(char *)&b + 16B] = 88;
      MEM[(int *)&b + 20B] = 1;
    then in sort_by_bitpos the stores are almost like in the given order,
    except the first store is after the = _5; store.
    We can't coalesce the = 5; store with = _5;, because the latter is MEM_REF,
    while the former INTEGER_CST, and we can't coalesce the = _5 store with
    the = {} store because the former is MEM_REF, the latter INTEGER_CST.
    But we happily coalesce the remaining 3 stores, which is wrong, because the
    = _5; store overlaps those and is in between them in the program order.
    We already have code to deal with similar cases in check_no_overlap, but we
    deal only with the following stores in sort_by_bitpos order, not the earlier
    ones.
    
    The following patch checks also the earlier ones.  In coalesce_immediate_stores
    it computes the first one that needs to be checked (all the ones whose
    bitpos + bitsize is smaller or equal to merged_store->start don't need to be
    checked and don't need to be checked even for any following attempts because
    of the sort_by_bitpos sorting) and the end of that (that is the first store
    in the merged_store).
    
    2020-09-16  Jakub Jelinek  <jakub@redhat.com>
    
            PR tree-optimization/97053
            * gimple-ssa-store-merging.c (check_no_overlap): Add FIRST_ORDER,
            START, FIRST_EARLIER and LAST_EARLIER arguments.  Return false if
            any stores between FIRST_EARLIER inclusive and LAST_EARLIER exclusive
            has order in between FIRST_ORDER and LAST_ORDER and overlaps the to
            be merged store.
            (imm_store_chain_info::try_coalesce_bswap): Add FIRST_EARLIER argument.
            Adjust check_no_overlap caller.
            (imm_store_chain_info::coalesce_immediate_stores): Add first_earlier
            and last_earlier variables, adjust them during iterations.  Adjust
            check_no_overlap callers, call check_no_overlap even when extending
            overlapping stores by extra INTEGER_CST stores.
    
            * gcc.dg/store_merging_31.c: New test.
            * gcc.dg/store_merging_32.c: New test.
    
    (cherry picked from commit bd909071ac04e94f4b6f0baab64d0687ec55681d)

Diff:
---
 gcc/gimple-ssa-store-merging.c          |  76 ++++++++++++++++---
 gcc/testsuite/gcc.dg/store_merging_31.c |  27 +++++++
 gcc/testsuite/gcc.dg/store_merging_32.c | 129 ++++++++++++++++++++++++++++++++
 3 files changed, 222 insertions(+), 10 deletions(-)

diff --git a/gcc/gimple-ssa-store-merging.c b/gcc/gimple-ssa-store-merging.c
index 25753517cc6..1678e439e8f 100644
--- a/gcc/gimple-ssa-store-merging.c
+++ b/gcc/gimple-ssa-store-merging.c
@@ -2071,7 +2071,8 @@ public:
       }
   }
   bool terminate_and_process_chain ();
-  bool try_coalesce_bswap (merged_store_group *, unsigned int, unsigned int);
+  bool try_coalesce_bswap (merged_store_group *, unsigned int, unsigned int,
+			   unsigned int);
   bool coalesce_immediate_stores ();
   bool output_merged_store (merged_store_group *);
   bool output_merged_stores ();
@@ -2398,14 +2399,39 @@ gather_bswap_load_refs (vec<tree> *refs, tree val)
    into the group.  That way it will be its own store group and will
    not be touched.  If ALL_INTEGER_CST_P and there are overlapping
    INTEGER_CST stores, those are mergeable using merge_overlapping,
-   so don't return false for those.  */
+   so don't return false for those.
+
+   Similarly, check stores from FIRST_EARLIER (inclusive) to END_EARLIER
+   (exclusive), whether they don't overlap the bitrange START to END
+   and have order in between FIRST_ORDER and LAST_ORDER.  This is to
+   prevent merging in cases like:
+     MEM <char[12]> [&b + 8B] = {};
+     MEM[(short *) &b] = 5;
+     _5 = *x_4(D);
+     MEM <long long unsigned int> [&b + 2B] = _5;
+     MEM[(char *)&b + 16B] = 88;
+     MEM[(int *)&b + 20B] = 1;
+   The = {} store comes in sort_by_bitpos before the = 88 store, and can't
+   be merged with it, because the = _5 store overlaps these and is in between
+   them in sort_by_order ordering.  If it was merged, the merged store would
+   go after the = _5 store and thus change behavior.  */
 
 static bool
 check_no_overlap (vec<store_immediate_info *> m_store_info, unsigned int i,
-		  bool all_integer_cst_p, unsigned int last_order,
-		  unsigned HOST_WIDE_INT end)
+		  bool all_integer_cst_p, unsigned int first_order,
+		  unsigned int last_order, unsigned HOST_WIDE_INT start,
+		  unsigned HOST_WIDE_INT end, unsigned int first_earlier,
+		  unsigned end_earlier)
 {
   unsigned int len = m_store_info.length ();
+  for (unsigned int j = first_earlier; j < end_earlier; j++)
+    {
+      store_immediate_info *info = m_store_info[j];
+      if (info->order > first_order
+	  && info->order < last_order
+	  && info->bitpos + info->bitsize > start)
+	return false;
+    }
   for (++i; i < len; ++i)
     {
       store_immediate_info *info = m_store_info[i];
@@ -2426,7 +2452,8 @@ check_no_overlap (vec<store_immediate_info *> m_store_info, unsigned int i,
 bool
 imm_store_chain_info::try_coalesce_bswap (merged_store_group *merged_store,
 					  unsigned int first,
-					  unsigned int try_size)
+					  unsigned int try_size,
+					  unsigned int first_earlier)
 {
   unsigned int len = m_store_info.length (), last = first;
   unsigned HOST_WIDE_INT width = m_store_info[first]->bitsize;
@@ -2566,7 +2593,8 @@ imm_store_chain_info::try_coalesce_bswap (merged_store_group *merged_store,
   if (n.base_addr == NULL_TREE && !is_gimple_val (n.src))
     return false;
 
-  if (!check_no_overlap (m_store_info, last, false, last_order, end))
+  if (!check_no_overlap (m_store_info, last, false, first_order, last_order,
+			 merged_store->start, end, first_earlier, first))
     return false;
 
   /* Don't handle memory copy this way if normal non-bswap processing
@@ -2658,6 +2686,8 @@ imm_store_chain_info::coalesce_immediate_stores ()
 
   store_immediate_info *info;
   unsigned int i, ignore = 0;
+  unsigned int first_earlier = 0;
+  unsigned int end_earlier = 0;
 
   /* Order the stores by the bitposition they write to.  */
   m_store_info.qsort (sort_by_bitpos);
@@ -2674,6 +2704,12 @@ imm_store_chain_info::coalesce_immediate_stores ()
       if (i <= ignore)
 	goto done;
 
+      while (first_earlier < end_earlier
+	     && (m_store_info[first_earlier]->bitpos
+		 + m_store_info[first_earlier]->bitsize
+		 <= merged_store->start))
+	first_earlier++;
+
       /* First try to handle group of stores like:
 	 p[0] = data >> 24;
 	 p[1] = data >> 16;
@@ -2688,7 +2724,8 @@ imm_store_chain_info::coalesce_immediate_stores ()
 	{
 	  unsigned int try_size;
 	  for (try_size = 64; try_size >= 16; try_size >>= 1)
-	    if (try_coalesce_bswap (merged_store, i - 1, try_size))
+	    if (try_coalesce_bswap (merged_store, i - 1, try_size,
+				    first_earlier))
 	      break;
 
 	  if (try_size >= 16)
@@ -2696,7 +2733,10 @@ imm_store_chain_info::coalesce_immediate_stores ()
 	      ignore = i + merged_store->stores.length () - 1;
 	      m_merged_store_groups.safe_push (merged_store);
 	      if (ignore < m_store_info.length ())
-		merged_store = new merged_store_group (m_store_info[ignore]);
+		{
+		  merged_store = new merged_store_group (m_store_info[ignore]);
+		  end_earlier = ignore;
+		}
 	      else
 		merged_store = NULL;
 	      goto done;
@@ -2731,12 +2771,16 @@ imm_store_chain_info::coalesce_immediate_stores ()
 	      && merged_store->only_constants
 	      && info->lp_nr == merged_store->lp_nr)
 	    {
+	      unsigned int first_order
+		= MIN (merged_store->first_order, info->order);
 	      unsigned int last_order
 		= MAX (merged_store->last_order, info->order);
 	      unsigned HOST_WIDE_INT end
 		= MAX (merged_store->start + merged_store->width,
 		       info->bitpos + info->bitsize);
-	      if (check_no_overlap (m_store_info, i, true, last_order, end))
+	      if (check_no_overlap (m_store_info, i, true, first_order,
+				    last_order, merged_store->start, end,
+				    first_earlier, end_earlier))
 		{
 		  /* check_no_overlap call above made sure there are no
 		     overlapping stores with non-INTEGER_CST rhs_code
@@ -2765,6 +2809,7 @@ imm_store_chain_info::coalesce_immediate_stores ()
 		  do
 		    {
 		      unsigned int max_order = 0;
+		      unsigned int min_order = first_order;
 		      unsigned first_nonmergeable_int_order = ~0U;
 		      unsigned HOST_WIDE_INT this_end = end;
 		      k = i;
@@ -2791,6 +2836,7 @@ imm_store_chain_info::coalesce_immediate_stores ()
 				  break;
 				}
 			      k = j;
+			      min_order = MIN (min_order, info2->order);
 			      this_end = MAX (this_end,
 					      info2->bitpos + info2->bitsize);
 			    }
@@ -2807,6 +2853,12 @@ imm_store_chain_info::coalesce_immediate_stores ()
 			    first_nonmergeable_order
 			      = MIN (first_nonmergeable_order, info2->order);
 			}
+		      if (k > i
+			  && !check_no_overlap (m_store_info, len - 1, true,
+						min_order, try_order,
+						merged_store->start, this_end,
+						first_earlier, end_earlier))
+			k = 0;
 		      if (k == 0)
 			{
 			  if (last_order == try_order)
@@ -2892,9 +2944,12 @@ imm_store_chain_info::coalesce_immediate_stores ()
 	      info->ops_swapped_p = true;
 	    }
 	  if (check_no_overlap (m_store_info, i, false,
+				MIN (merged_store->first_order, info->order),
 				MAX (merged_store->last_order, info->order),
+				merged_store->start,
 				MAX (merged_store->start + merged_store->width,
-				     info->bitpos + info->bitsize)))
+				     info->bitpos + info->bitsize),
+				first_earlier, end_earlier))
 	    {
 	      /* Turn MEM_REF into BIT_INSERT_EXPR for bit-field stores.  */
 	      if (info->rhs_code == MEM_REF && infof->rhs_code != MEM_REF)
@@ -2939,6 +2994,7 @@ imm_store_chain_info::coalesce_immediate_stores ()
 	delete merged_store;
 
       merged_store = new merged_store_group (info);
+      end_earlier = i;
       if (dump_file && (dump_flags & TDF_DETAILS))
 	fputs ("New store group\n", dump_file);
 
diff --git a/gcc/testsuite/gcc.dg/store_merging_31.c b/gcc/testsuite/gcc.dg/store_merging_31.c
new file mode 100644
index 00000000000..32c21eb053c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/store_merging_31.c
@@ -0,0 +1,27 @@
+/* PR tree-optimization/97053 */
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+
+struct S { short a; char b[9]; int c; char d; int e; };
+
+__attribute__((noipa)) void
+foo (char *x, char *y)
+{
+  if (__builtin_strcmp (x, "ABCDXXXX") != 0
+      || __builtin_strcmp (y, "ABCDXXXX") != 0)
+    __builtin_abort ();
+}
+
+int
+main ()
+{
+  char a[9] = "XXXXXXXX";
+  struct S b = {};
+  __builtin_memcpy (a, "ABCD", 4);
+  b.a = 5;
+  __builtin_memcpy (b.b, a, 8); 
+  b.d = 'X';
+  b.e = 1;
+  foo (a, b.b);
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/store_merging_32.c b/gcc/testsuite/gcc.dg/store_merging_32.c
new file mode 100644
index 00000000000..8c90489bdeb
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/store_merging_32.c
@@ -0,0 +1,129 @@
+/* PR tree-optimization/97053 */
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-tree-dse" } */
+
+struct __attribute__((packed, may_alias)) S { long long s; };
+struct __attribute__((packed, may_alias)) T { short t; };
+
+__attribute__((noipa)) void
+test (char *p, char *q, int s)
+{
+  if ((s & 1) == 0)
+    {
+      if (*(short __attribute__((may_alias)) *) &p[sizeof (short)]
+	  != *(short __attribute__((may_alias)) *) &q[sizeof (short)]
+	  || (((struct S __attribute__((may_alias)) *) &p[1])->s
+	      != ((struct S __attribute__((may_alias)) *) &q[1])->s)
+	  || (*(short __attribute__((may_alias)) *) &p[2 * sizeof (short)]
+	      != *(short __attribute__((may_alias)) *) &q[2 * sizeof (short)]))
+	__builtin_abort ();
+    }
+  else
+    {
+      if (*(short __attribute__((may_alias)) *) &p[sizeof (short)]
+	  != *(short __attribute__((may_alias)) *) &q[sizeof (short)]
+	  || (((struct S __attribute__((may_alias)) *) &p[1])->s
+	      != ((struct S __attribute__((may_alias)) *) &q[1])->s)
+	  || (((struct T __attribute__((may_alias)) *) &p[2 * sizeof (short) - 1])->t
+	      != ((struct T __attribute__((may_alias)) *) &q[2 * sizeof (short) - 1])->t)
+	  || p[3 * sizeof (short) - 2] != q[3 * sizeof (short) - 2])
+	__builtin_abort ();
+    }
+}
+
+__attribute__((noipa)) void
+foo (long long *p, char *q, char *r, char *s)
+{
+  char a[64] __attribute__((aligned (__alignof (short))));
+  *(short __attribute__((may_alias)) *) &a[sizeof (short)] = 1;
+  ((struct S __attribute__((may_alias)) *) &a[1])->s = p[0];
+  *(short __attribute__((may_alias)) *) &a[2 * sizeof (short)] = 2;
+  *(short __attribute__((may_alias)) *) &q[sizeof (short)] = 1;
+  ((struct S __attribute__((may_alias)) *) &r[1])->s = p[0];
+  *(short __attribute__((may_alias)) *) &s[2 * sizeof (short)] = 2;
+  test (a, q, 0);
+}
+
+__attribute__((noipa)) void
+bar (long long *p, char *q, char *r, char *s, char *t)
+{
+  char a[64] __attribute__((aligned (__alignof (short))));
+  *(short __attribute__((may_alias)) *) &a[sizeof (short)] = 1;
+  ((struct S __attribute__((may_alias)) *) &a[1])->s = p[0];
+  ((struct T __attribute__((may_alias)) *) &a[2 * sizeof (short) - 1])->t = 2;
+  a[3 * sizeof (short) - 2] = 3;
+  *(short __attribute__((may_alias)) *) &q[sizeof (short)] = 1;
+  ((struct S __attribute__((may_alias)) *) &r[1])->s = p[0];
+  ((struct T __attribute__((may_alias)) *) &s[2 * sizeof (short) - 1])->t = 2;
+  t[3 * sizeof (short) - 2] = 3;
+  test (a, q, 1);
+}
+
+__attribute__((noipa)) void
+baz (long long *p, char *q, char *r, char *s)
+{
+  char a[64] __attribute__((aligned (__alignof (short))));
+  *(short __attribute__((may_alias)) *) &a[2 * sizeof (short)] = 2;
+  ((struct S __attribute__((may_alias)) *) &a[1])->s = p[0];
+  *(short __attribute__((may_alias)) *) &a[sizeof (short)] = 1;
+  *(short __attribute__((may_alias)) *) &q[2 * sizeof (short)] = 2;
+  ((struct S __attribute__((may_alias)) *) &r[1])->s = p[0];
+  *(short __attribute__((may_alias)) *) &s[sizeof (short)] = 1;
+  test (a, q, 2);
+}
+
+__attribute__((noipa)) void
+qux (long long *p, char *q, char *r, char *s, char *t)
+{
+  char a[64] __attribute__((aligned (__alignof (short))));
+  *(short __attribute__((may_alias)) *) &a[2 * sizeof (short) - 1] = 2;
+  ((struct S __attribute__((may_alias)) *) &a[1])->s = p[0];
+  a[3 * sizeof (short) - 2] = 3;
+  *(short __attribute__((may_alias)) *) &a[sizeof (short)] = 1;
+  ((struct T __attribute__((may_alias)) *) &q[2 * sizeof (short) - 1])->t = 2;
+  ((struct S __attribute__((may_alias)) *) &r[1])->s = p[0];
+  s[3 * sizeof (short) - 2] = 3;
+  ((struct T __attribute__((may_alias)) *) &t[sizeof (short)])->t = 1;
+  test (a, q, 3);
+}
+
+__attribute__((noipa)) void
+corge (long long *p, char *q, char *r, char *s, short u[3])
+{
+  char a[64] __attribute__((aligned (__alignof (short))));
+  *(short __attribute__((may_alias)) *) &a[2 * sizeof (short)] = u[2];
+  ((struct S __attribute__((may_alias)) *) &a[1])->s = p[0];
+  *(short __attribute__((may_alias)) *) &a[sizeof (short)] = u[1];
+  *(short __attribute__((may_alias)) *) &q[2 * sizeof (short)] = u[2];
+  ((struct S __attribute__((may_alias)) *) &r[1])->s = p[0];
+  *(short __attribute__((may_alias)) *) &s[sizeof (short)] = u[1];
+  test (a, q, 4);
+}
+
+__attribute__((noipa)) void
+garply (long long *p, char *q, char *r, char *s, short u[3])
+{
+  char a[64] __attribute__((aligned (__alignof (short))));
+  *(short __attribute__((may_alias)) *) &a[sizeof (short)] = u[1];
+  ((struct S __attribute__((may_alias)) *) &a[1])->s = p[0];
+  *(short __attribute__((may_alias)) *) &a[2 * sizeof (short)] = u[2];
+  *(short __attribute__((may_alias)) *) &s[sizeof (short)] = u[1];
+  ((struct S __attribute__((may_alias)) *) &r[1])->s = p[0];
+  *(short __attribute__((may_alias)) *) &q[2 * sizeof (short)] = u[2];
+  test (a, q, 6);
+}
+
+int
+main ()
+{
+  char a[64] __attribute__((aligned (__alignof (short))));
+  long long p = -1LL;
+  short u[] = { 1, 2, 3 };
+  foo (&p, &a[0], &a[0], &a[0]);
+  bar (&p, &a[0], &a[0], &a[0], &a[0]);
+  baz (&p, &a[0], &a[0], &a[0]);
+  qux (&p, &a[0], &a[0], &a[0], &a[0]);
+  corge (&p, &a[0], &a[0], &a[0], u);
+  garply (&p, &a[0], &a[0], &a[0], u);
+  return 0;
+}


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2020-09-16  7:50 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-09-16  7:50 [gcc r10-8769] store-merging: Consider also overlapping stores earlier in the by bitpos sorting [PR97053] Jakub Jelinek

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).