public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH V2] Split loop for NE condition.
@ 2021-05-17  2:01 Jiufu Guo
  2021-05-18  6:36 ` Bernd Edlinger
  2021-05-26  9:50 ` Richard Biener
  0 siblings, 2 replies; 11+ messages in thread
From: Jiufu Guo @ 2021-05-17  2:01 UTC (permalink / raw)
  To: gcc-patches; +Cc: guojiufu, wschmidt, segher, dje.gcc, rguenther, jlaw

When there is the possibility that overflow/wrap may happen on the loop index,
a few optimizations would not happen. For example code:

foo (int *a, int *b, unsigned k, unsigned n)
{
  while (++k != n)
    a[k] = b[k]  + 1;
}

For this code, if "k > n", k would wrap.  if "k < n" at begining,
it could be optimized (e.g. vectorization).

We can split the loop into two loops:

  while (++k > n)
    a[k] = b[k]  + 1;
  while (l++ < n)
    a[k] = b[k]  + 1;

then for the second loop, it could be optimized.

This patch is spltting this kind of small loop to achieve better performance.

Bootstrap and regtest pass on ppc64le.  Is this ok for trunk?

Thanks!

Jiufu Guo.

gcc/ChangeLog:

2021-05-15  Jiufu Guo  <guojiufu@linux.ibm.com>

	* tree-ssa-loop-split.c (connect_loop_phis): Add new param.
	(get_ne_cond_branch): New function.
	(split_ne_loop): New function.
	(split_loop_on_ne_cond): New function.
	(tree_ssa_split_loops): Use split_loop_on_ne_cond.

gcc/testsuite/ChangeLog:

2021-05-15  Jiufu Guo  <guojiufu@linux.ibm.com>

	* gcc.dg/loop-split1.c: New test.
	* g++.dg/vect/pr98064.cc: Suppress warning.
---
 gcc/testsuite/g++.dg/vect/pr98064.cc |   4 +-
 gcc/testsuite/gcc.dg/loop-split1.c   | 108 +++++++++++++++
 gcc/tree-ssa-loop-split.c            | 188 ++++++++++++++++++++++++++-
 3 files changed, 296 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/loop-split1.c

diff --git a/gcc/testsuite/g++.dg/vect/pr98064.cc b/gcc/testsuite/g++.dg/vect/pr98064.cc
index 74043ce7725..dcb2985d05a 100644
--- a/gcc/testsuite/g++.dg/vect/pr98064.cc
+++ b/gcc/testsuite/g++.dg/vect/pr98064.cc
@@ -1,5 +1,7 @@
 // { dg-do compile }
-// { dg-additional-options "-O3" }
+// { dg-additional-options "-O3 -Wno-stringop-overflow" }
+/* There is warning message when "short g = var_8; g; g++"
+   is optimized/analyzed as string operation,e.g. memset.  */
 
 const long long &min(const long long &__a, long long &__b) {
   if (__b < __a)
diff --git a/gcc/testsuite/gcc.dg/loop-split1.c b/gcc/testsuite/gcc.dg/loop-split1.c
new file mode 100644
index 00000000000..30b006b1b14
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/loop-split1.c
@@ -0,0 +1,108 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fsplit-loops -fdump-tree-lsplit-details" } */
+
+void
+foo (int *a, int *b, unsigned l, unsigned n)
+{
+  while (++l != n)
+    a[l] = b[l]  + 1;
+}
+void
+foo_1 (int *a, int *b, unsigned n)
+{
+  unsigned l = 0;
+  while (++l != n)
+    a[l] = b[l]  + 1;
+}
+
+void
+foo1 (int *a, int *b, unsigned l, unsigned n)
+{
+  while (l++ != n)
+    a[l] = b[l]  + 1;
+}
+
+/* No wrap.  */
+void
+foo1_1 (int *a, int *b, unsigned n)
+{
+  unsigned l = 0;
+  while (l++ != n)
+    a[l] = b[l]  + 1;
+}
+
+unsigned
+foo2 (char *a, char *b, unsigned l, unsigned n)
+{
+  while (++l != n)
+    if (a[l] != b[l])
+      break;
+
+  return l;
+}
+
+unsigned
+foo2_1 (char *a, char *b, unsigned l, unsigned n)
+{
+  l = 0;
+  while (++l != n)
+    if (a[l] != b[l])
+      break;
+
+  return l;
+}
+
+unsigned
+foo3 (char *a, char *b, unsigned l, unsigned n)
+{
+  while (l++ != n)
+    if (a[l] != b[l])
+      break;
+
+  return l;
+}
+
+/* No wrap.  */
+unsigned
+foo3_1 (char *a, char *b, unsigned l, unsigned n)
+{
+  l = 0;
+  while (l++ != n)
+    if (a[l] != b[l])
+      break;
+
+  return l;
+}
+
+void bar();
+void foo4(unsigned n,  unsigned i)
+{
+  do
+    {
+      if (i == n)
+        return;
+      bar();
+      ++i;
+    }
+  while (1);
+}
+
+unsigned
+foo5 (double *a, unsigned n, unsigned i)
+{
+  while (a[i] > 0 && i != n)
+    i++;
+
+  return i;
+}
+
+unsigned
+find_skip_diff (char *p, char *q, unsigned n, unsigned i)
+{
+  while (p[i] == q[i] && ++i != n)
+    p++,q++;
+
+  return i;
+}
+
+/* { dg-final { scan-tree-dump-times "Loop split" 9 "lsplit" } } */
diff --git a/gcc/tree-ssa-loop-split.c b/gcc/tree-ssa-loop-split.c
index 3a09bbc39e5..5c1742b5ff4 100644
--- a/gcc/tree-ssa-loop-split.c
+++ b/gcc/tree-ssa-loop-split.c
@@ -41,6 +41,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "cfghooks.h"
 #include "gimple-fold.h"
 #include "gimplify-me.h"
+#include "tree-ssa-loop-ivopts.h"
 
 /* This file implements two kinds of loop splitting.
 
@@ -233,7 +234,8 @@ easy_exit_values (class loop *loop)
    this.  The loops need to fulfill easy_exit_values().  */
 
 static void
-connect_loop_phis (class loop *loop1, class loop *loop2, edge new_e)
+connect_loop_phis (class loop *loop1, class loop *loop2, edge new_e,
+		   bool use_prev = false)
 {
   basic_block rest = loop_preheader_edge (loop2)->src;
   gcc_assert (new_e->dest == rest);
@@ -279,7 +281,8 @@ connect_loop_phis (class loop *loop1, class loop *loop2, edge new_e)
 
       gphi * newphi = create_phi_node (new_init, rest);
       add_phi_arg (newphi, init, skip_first, UNKNOWN_LOCATION);
-      add_phi_arg (newphi, next, new_e, UNKNOWN_LOCATION);
+      add_phi_arg (newphi, use_prev ? PHI_RESULT (phi_first) : next, new_e,
+		   UNKNOWN_LOCATION);
       SET_USE (op, new_init);
     }
 }
@@ -1593,6 +1596,184 @@ split_loop_on_cond (struct loop *loop)
   return do_split;
 }
 
+/* Check if the LOOP exit branch likes "if (idx != bound)",
+   Return the branch edge which exit loop, if overflow/wrap
+   may happen on "idx".  */
+
+static edge
+get_ne_cond_branch (struct loop *loop)
+{
+  int i;
+  edge e;
+
+  auto_vec<edge> edges = get_loop_exit_edges (loop);
+  FOR_EACH_VEC_ELT (edges, i, e)
+    {
+      /* Check if there is possible wrap/overflow.  */
+      class tree_niter_desc niter;
+      if (!number_of_iterations_exit (loop, e, &niter, false, false))
+	continue;
+      if (niter.control.no_overflow)
+	return NULL;
+      if (niter.cmp != NE_EXPR)
+	continue;
+
+      /* Check loop is simple to split.  */
+      if (single_pred_p (loop->latch)
+	  && single_pred_edge (loop->latch)->src == e->src
+	  && (gsi_end_p (gsi_start_nondebug_bb (loop->latch))))
+	return e;
+
+      /* Simple header.  */
+      if (e->src == loop->header)
+	{
+	  if (get_virtual_phi (e->src))
+	    continue;
+
+	  /* Only one phi.  */
+	  gphi_iterator psi = gsi_start_phis (e->src);
+	  if (gsi_end_p (psi))
+	    continue;
+	  gsi_next (&psi);
+	  if (!gsi_end_p (psi))
+	    continue;
+
+	  /* ++i or ++i */
+	  gimple_stmt_iterator gsi = gsi_start_bb (e->src);
+	  if (gsi_end_p (gsi))
+	    continue;
+
+	  gimple *gc = last_stmt (e->src);
+	  tree idx = gimple_cond_lhs (gc);
+	  if (expr_invariant_in_loop_p (loop, idx))
+	    idx = gimple_cond_rhs (gc);
+
+	  gimple *s1 = gsi_stmt (gsi);
+	  if (!(is_gimple_assign (s1) && idx
+		&& (idx == gimple_assign_lhs (s1)
+		    || idx == gimple_assign_rhs1 (s1))))
+	    continue;
+
+	  gsi_next (&gsi);
+	  if (!gsi_end_p (gsi) && gsi_stmt (gsi) == gc)
+	    return e;
+	}
+    }
+
+  return NULL;
+}
+
+/* Split the LOOP with NE_EXPR into two loops with GT_EXPR and LT_EXPR.  */
+
+static bool
+split_ne_loop (struct loop *loop, edge cond_e)
+{
+  initialize_original_copy_tables ();
+
+  struct loop *loop2 = loop_version (loop, boolean_true_node, NULL,
+				     profile_probability::always (),
+				     profile_probability::never (),
+				     profile_probability::always (),
+				     profile_probability::always (), true);
+
+  gcc_assert (loop2);
+  update_ssa (TODO_update_ssa);
+
+  basic_block loop2_cond_exit_bb = get_bb_copy (cond_e->src);
+  free_original_copy_tables ();
+
+  gcond *gc = as_a<gcond *> (last_stmt (cond_e->src));
+  gcond *dup_gc = as_a<gcond *> (last_stmt (loop2_cond_exit_bb));
+
+  /* Change if (i != n) to LOOP1:if (i > n) and LOOP2:if (i < n) */
+  bool inv = expr_invariant_in_loop_p (loop, gimple_cond_lhs (gc));
+  enum tree_code up_code = inv ? LT_EXPR : GT_EXPR;
+  enum tree_code down_code = inv ? GT_EXPR : LT_EXPR;
+
+  gimple_cond_set_code (gc, up_code);
+  gimple_cond_set_code (dup_gc, down_code);
+
+  /* Link the exit cond edge to new loop.  */
+  gcond *break_cond = as_a<gcond *> (gimple_copy (gc));
+  edge pred_e = single_pred_edge (loop->latch);
+  bool simple_loop = pred_e && pred_e->src == cond_e->src
+		     && (gsi_end_p (gsi_start_nondebug_bb (loop->latch)));
+  if (simple_loop)
+    gimple_cond_set_code (break_cond, down_code);
+  else
+    gimple_cond_make_true (break_cond);
+
+  basic_block break_bb = split_edge (cond_e);
+  gimple_stmt_iterator gsi = gsi_last_bb (break_bb);
+  gsi_insert_after (&gsi, break_cond, GSI_NEW_STMT);
+
+  edge to_exit = single_succ_edge (break_bb);
+  edge to_new_loop = make_edge (break_bb, loop_preheader_edge (loop2)->src, 0);
+  to_new_loop->flags |= EDGE_TRUE_VALUE;
+  to_exit->flags |= EDGE_FALSE_VALUE;
+  to_exit->flags &= ~EDGE_FALLTHRU;
+  to_exit->probability = cond_e->probability;
+  to_new_loop->probability = to_exit->probability.invert ();
+
+  update_ssa (TODO_update_ssa);
+
+  connect_loop_phis (loop, loop2, to_new_loop, !simple_loop);
+
+  rewrite_into_loop_closed_ssa_1 (NULL, 0, SSA_OP_USE, loop);
+  if (dump_file && (dump_flags & TDF_DETAILS))
+    fprintf (dump_file, ";; Loop split on wrap index.\n");
+
+  return true;
+}
+
+/* Checks if LOOP contains a suitable NE_EXPR conditional block to split.
+L_H:
+ if (i!=N)
+   S;
+ i++;
+ goto L_H;
+
+The "i!=N" is like "i>N || i<N", then it could be transform to:
+
+L_H:
+ if (i>N)
+   S;
+ i++;
+ goto L_H;
+L1_H:
+ if (i<N)
+   S;
+ i++;
+ goto L1_H;
+
+The loop with "i<N" is in favor both GIMPLE and RTL passes.  */
+
+static bool
+split_loop_on_ne_cond (class loop *loop)
+{
+  int num = 0;
+  basic_block *bbs = get_loop_body (loop);
+
+  if (!can_copy_bbs_p (bbs, loop->num_nodes))
+    {
+      free (bbs);
+      return false;
+    }
+
+  for (unsigned i = 0; i < loop->num_nodes; i++)
+    num += estimate_num_insns_seq (bb_seq (bbs[i]), &eni_size_weights);
+  free (bbs);
+
+  if (num > param_max_peeled_insns)
+    return false;
+
+  edge branch_edge = get_ne_cond_branch (loop);
+  if (branch_edge && split_ne_loop (loop, branch_edge))
+    return true;
+
+  return false;
+}
+
 /* Main entry point.  Perform loop splitting on all suitable loops.  */
 
 static unsigned int
@@ -1622,7 +1803,8 @@ tree_ssa_split_loops (void)
       if (optimize_loop_for_size_p (loop))
 	continue;
 
-      if (split_loop (loop) || split_loop_on_cond (loop))
+      if (split_loop (loop) || split_loop_on_cond (loop)
+	  || split_loop_on_ne_cond (loop))
 	{
 	  /* Mark our containing loop as having had some split inner loops.  */
 	  loop_outer (loop)->aux = loop;
-- 
2.17.1


^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2021-06-01  8:23 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-05-17  2:01 [PATCH V2] Split loop for NE condition Jiufu Guo
2021-05-18  6:36 ` Bernd Edlinger
2021-05-18  9:28   ` guojiufu
2021-05-18 10:32     ` guojiufu
2021-05-18 11:04       ` guojiufu
2021-05-18 11:00   ` Segher Boessenkool
2021-05-18 11:14     ` Bernd Edlinger
2021-05-26  9:50 ` Richard Biener
2021-05-26 13:22   ` Segher Boessenkool
2021-06-01  3:28   ` guojiufu
2021-06-01  8:23     ` guojiufu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).