public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] split loop for NE condition.
@ 2021-04-29  9:50 Jiufu Guo
  2021-04-30 16:27 ` Jeff Law
                   ` (2 more replies)
  0 siblings, 3 replies; 11+ messages in thread
From: Jiufu Guo @ 2021-04-29  9:50 UTC (permalink / raw)
  To: gcc-patches; +Cc: guojiufu, wschmidt, segher, dje.gcc, rguenther, jlaw

When there is the possibility that overflow may happen on the loop index,
a few optimizations would not happen. For example code:

foo (int *a, int *b, unsigned k, unsigned n)
{
  while (++k != n)
    a[k] = b[k]  + 1;
}

For this code, if "l > n", overflow may happen.  if "l < n" at begining,
it could be optimized (e.g. vectorization).

We can split the loop into two loops:

  while (++k > n)
    a[k] = b[k]  + 1;
  while (l++ < n)
    a[k] = b[k]  + 1;

then for the second loop, it could be optimized.

This patch is spltting this kind of small loop to achieve better performance.

Bootstrap and regtest pass on ppc64le.  Is this ok for trunk?

Thanks!

Jiufu Guo.

gcc/ChangeLog:

2021-04-29  Jiufu Guo  <guojiufu@linux.ibm.com>

	* params.opt (max-insns-ne-cond-split): New.
	* tree-ssa-loop-split.c (connect_loop_phis): Add new param.
	(get_ne_cond_branch): New function.
	(split_ne_loop): New function.
	(split_loop_on_ne_cond): New function.
	(tree_ssa_split_loops): Use split_loop_on_ne_cond.
	
gcc/testsuite/ChangeLog:
2021-04-29  Jiufu Guo  <guojiufu@linux.ibm.com>

	* gcc.dg/loop-split1.c: New test.
    
---
 gcc/params.opt                     |   4 +
 gcc/testsuite/gcc.dg/loop-split1.c |  28 ++++
 gcc/tree-ssa-loop-split.c          | 219 ++++++++++++++++++++++++++++-
 3 files changed, 247 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/loop-split1.c

diff --git a/gcc/params.opt b/gcc/params.opt
index 2e4cbdd7a71..900b59b5136 100644
--- a/gcc/params.opt
+++ b/gcc/params.opt
@@ -766,6 +766,10 @@ Min. ratio of insns to prefetches to enable prefetching for a loop with an unkno
 Common Joined UInteger Var(param_min_loop_cond_split_prob) Init(30) IntegerRange(0, 100) Param Optimization
 The minimum threshold for probability of semi-invariant condition statement to trigger loop split.
 
+-param=max-insns-ne-cond-split=
+Common Joined UInteger Var(param_max_insn_ne_cond_split) Init(64) Param Optimization
+The maximum threshold for insnstructions number of a loop with ne condition to split.
+
 -param=min-nondebug-insn-uid=
 Common Joined UInteger Var(param_min_nondebug_insn_uid) Param
 The minimum UID to be used for a nondebug insn.
diff --git a/gcc/testsuite/gcc.dg/loop-split1.c b/gcc/testsuite/gcc.dg/loop-split1.c
new file mode 100644
index 00000000000..4c466aa9f54
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/loop-split1.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fsplit-loops -fdump-tree-lsplit-details" } */
+
+void
+foo (int *a, int *b, unsigned l, unsigned n)
+{
+  while (++l != n)
+    a[l] = b[l]  + 1;
+}
+
+void
+foo1 (int *a, int *b, unsigned l, unsigned n)
+{
+  while (l++ != n)
+    a[l] = b[l]  + 1;
+}
+
+unsigned
+foo2 (char *a, char *b, unsigned l, unsigned n)
+{
+  while (++l != n)
+    if (a[l] != b[l])
+      break;
+
+  return l;
+}
+
+/* { dg-final { scan-tree-dump-times "Loop split" 3 "lsplit" } } */
diff --git a/gcc/tree-ssa-loop-split.c b/gcc/tree-ssa-loop-split.c
index b80b6a75e62..a6d28078e5e 100644
--- a/gcc/tree-ssa-loop-split.c
+++ b/gcc/tree-ssa-loop-split.c
@@ -41,6 +41,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "cfghooks.h"
 #include "gimple-fold.h"
 #include "gimplify-me.h"
+#include "tree-ssa-loop-ivopts.h"
 
 /* This file implements two kinds of loop splitting.
 
@@ -233,7 +234,8 @@ easy_exit_values (class loop *loop)
    this.  The loops need to fulfill easy_exit_values().  */
 
 static void
-connect_loop_phis (class loop *loop1, class loop *loop2, edge new_e)
+connect_loop_phis (class loop *loop1, class loop *loop2, edge new_e,
+		   bool use_prev = false)
 {
   basic_block rest = loop_preheader_edge (loop2)->src;
   gcc_assert (new_e->dest == rest);
@@ -248,13 +250,14 @@ connect_loop_phis (class loop *loop1, class loop *loop2, edge new_e)
        !gsi_end_p (psi_first);
        gsi_next (&psi_first), gsi_next (&psi_second))
     {
-      tree init, next, new_init;
+      tree init, next, new_init, prev;
       use_operand_p op;
       gphi *phi_first = psi_first.phi ();
       gphi *phi_second = psi_second.phi ();
 
       init = PHI_ARG_DEF_FROM_EDGE (phi_first, firste);
       next = PHI_ARG_DEF_FROM_EDGE (phi_first, firstn);
+      prev = PHI_RESULT (phi_first);
       op = PHI_ARG_DEF_PTR_FROM_EDGE (phi_second, seconde);
       gcc_assert (operand_equal_for_phi_arg_p (init, USE_FROM_PTR (op)));
 
@@ -279,7 +282,7 @@ connect_loop_phis (class loop *loop1, class loop *loop2, edge new_e)
 
       gphi * newphi = create_phi_node (new_init, rest);
       add_phi_arg (newphi, init, skip_first, UNKNOWN_LOCATION);
-      add_phi_arg (newphi, next, new_e, UNKNOWN_LOCATION);
+      add_phi_arg (newphi, use_prev ? prev : next, new_e, UNKNOWN_LOCATION);
       SET_USE (op, new_init);
     }
 }
@@ -1599,6 +1602,213 @@ split_loop_on_cond (struct loop *loop)
   return do_split;
 }
 
+/* Check if the LOOP exit branch likes "if (idx != bound)".
+   if INV is not NULL and the branch is "if (bound != idx)", set *INV to true.
+   return the branch edge which exit loop.  */
+
+static edge
+get_ne_cond_branch (struct loop *loop, bool *inv)
+{
+  int i;
+  edge e;
+
+  auto_vec<edge> edges = get_loop_exit_edges (loop);
+  FOR_EACH_VEC_ELT (edges, i, e)
+    {
+      basic_block bb = e->src;
+
+      /* Check gcond.  */
+      gimple *last = last_stmt (bb);
+      if (!last || gimple_code (last) != GIMPLE_COND)
+	continue;
+      gcond *cond = as_a<gcond *> (last);
+      enum tree_code code = gimple_cond_code (cond);
+      if (code != NE_EXPR)
+	continue;
+
+      /* Make sure idx and bound.  */
+      tree idx = gimple_cond_lhs (cond);
+      tree bnd = gimple_cond_rhs (cond);
+      if (expr_invariant_in_loop_p (loop, idx))
+	{
+	  std::swap (idx, bnd);
+	  if (inv)
+	    *inv = true;
+	}
+      else if (!expr_invariant_in_loop_p (loop, bnd))
+	continue;
+
+      /* Extract conversion.  */
+      if (TREE_CODE (idx) == SSA_NAME)
+	{
+	  gimple *stmt = SSA_NAME_DEF_STMT (idx);
+	  if (is_gimple_assign (stmt)
+	      && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt))
+	      && flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
+	    idx = gimple_assign_rhs1 (stmt);
+	}
+
+      /* Make sure idx is iv.  */
+      class loop *useloop = loop_containing_stmt (cond);
+      affine_iv iv;
+      if (!simple_iv (loop, useloop, idx, &iv, false))
+	continue;
+
+      /* No need to split loop, if base is know value.
+	 Or check range info.  */
+      if (TREE_CODE (iv.base) == INTEGER_CST)
+	continue;
+
+      /* There is type conversion on idx(or rhs of idx's def).
+	 And there is converting shorter to longer type. */
+      tree type = TREE_TYPE (idx);
+      if (!INTEGRAL_TYPE_P (type) || TREE_CODE (idx) != SSA_NAME
+	  || !TYPE_UNSIGNED (type)
+	  || TYPE_PRECISION (type) == TYPE_PRECISION (sizetype))
+	continue;
+
+      /* Check loop is simple to split.  */
+      gcc_assert (bb != loop->latch);
+
+      if (single_pred_p (loop->latch)
+	  && single_pred_edge (loop->latch)->src == bb
+	  && empty_block_p (loop->latch))
+	return e;
+
+      /* Splitting is cheap for idx increase header.  */
+      if (bb == loop->header)
+	{
+	  if (get_virtual_phi (loop->header))
+	    continue;
+
+	  /* In loop header: i++ or ++i.  */
+	  gimple_stmt_iterator gsi = gsi_start_bb (bb);
+	  if (gsi_end_p (gsi))
+	    return e;
+
+	  gimple *s1 = gsi_stmt (gsi);
+	  if (!(is_gimple_assign (s1)
+		&& (idx == gimple_assign_lhs (s1)
+		    || idx == gimple_assign_rhs1 (s1))))
+	    continue;
+
+	  gsi_next (&gsi);
+	  if (!gsi_end_p (gsi) && gsi_stmt (gsi) == cond)
+	    return e;
+	}
+    }
+
+  return NULL;
+}
+
+/* Split the LOOP with NE_EXPR into two loops with GT_EXPR and LT_EXPR.  */
+
+static bool
+split_ne_loop (struct loop *loop, edge cond_e)
+{
+  initialize_original_copy_tables ();
+
+  struct loop *loop2 = loop_version (loop, boolean_true_node, NULL,
+				     profile_probability::always (),
+				     profile_probability::never (),
+				     profile_probability::always (),
+				     profile_probability::always (), true);
+
+  gcc_assert (loop2);
+  update_ssa (TODO_update_ssa);
+
+  free_original_copy_tables ();
+
+  /* Change if (i != n) to LOOP1:if (i > n) and LOOP2:if (i < n) */
+  bool inv = false;
+  edge dup_cond = get_ne_cond_branch (loop2, &inv);
+  enum tree_code up_code = inv ? LT_EXPR : GT_EXPR;
+  enum tree_code down_code = inv ? GT_EXPR : LT_EXPR;
+
+  gcond *gc = as_a<gcond *> (last_stmt (cond_e->src));
+  gimple_cond_set_code (gc, up_code);
+
+  gcond *dup_gc = as_a<gcond *> (last_stmt (dup_cond->src));
+  gimple_cond_set_code (dup_gc, down_code);
+
+  /* Link the exit cond edge to new loop.  */
+  gcond *break_cond = as_a<gcond *> (gimple_copy (gc));
+  edge pred_e = single_pred_edge (loop->latch);
+  gcc_assert (pred_e);
+  bool simple_loop = pred_e->src == cond_e->src && empty_block_p (loop->latch);
+  if (simple_loop)
+    gimple_cond_set_code (break_cond, down_code);
+  else
+    gimple_cond_make_true (break_cond);
+
+  basic_block break_bb = split_edge (cond_e);
+  gimple_stmt_iterator gsi = gsi_last_bb (break_bb);
+  gsi_insert_after (&gsi, break_cond, GSI_NEW_STMT);
+
+  edge to_exit = single_succ_edge (break_bb);
+  edge to_new_loop = make_edge (break_bb, loop_preheader_edge (loop2)->src, 0);
+  to_new_loop->flags |= EDGE_TRUE_VALUE;
+  to_exit->flags |= EDGE_FALSE_VALUE;
+  to_exit->flags &= ~EDGE_FALLTHRU;
+  to_exit->probability = cond_e->probability;
+  to_new_loop->probability = to_exit->probability.invert ();
+
+  update_ssa (TODO_update_ssa);
+
+  connect_loop_phis (loop, loop2, to_new_loop, !simple_loop);
+
+  rewrite_into_loop_closed_ssa_1 (NULL, 0, SSA_OP_USE, loop);
+  if (dump_file && (dump_flags & TDF_DETAILS))
+    fprintf (dump_file, ";; Loop split.\n");
+
+  return true;
+}
+
+/* Checks if LOOP contains a suitable NE_EXPR conditional block to split.
+L_H:
+ if (i!=N)
+   S;
+ i++;
+ goto L_H;
+
+The "i!=N" is like "i>N || i<N", then it could be transform to:
+
+L_H:
+ if (i>N)
+   S;
+ i++;
+ goto L_H;
+L1_H:
+ if (i<N)
+   S;
+ i++;
+ goto L1_H;
+
+The loop with "i<N" is in favor both GIMPLE and RTL passes.  */
+
+static bool
+split_loop_on_ne_cond (class loop *loop)
+{
+  if (!can_duplicate_loop_p (loop))
+    return false;
+
+  int num = 0;
+  basic_block *bbs = get_loop_body (loop);
+
+  for (unsigned i = 0; i < loop->num_nodes; i++)
+    num += estimate_num_insns_seq (bb_seq (bbs[i]), &eni_size_weights);
+  free (bbs);
+
+  if (num > param_max_insn_ne_cond_split)
+    return false;
+
+  edge branch_edge = get_ne_cond_branch (loop, NULL);
+  if (branch_edge && split_ne_loop (loop, branch_edge))
+    return true;
+
+  return false;
+}
+
 /* Main entry point.  Perform loop splitting on all suitable loops.  */
 
 static unsigned int
@@ -1628,7 +1838,8 @@ tree_ssa_split_loops (void)
       if (optimize_loop_for_size_p (loop))
 	continue;
 
-      if (split_loop (loop) || split_loop_on_cond (loop))
+      if (split_loop (loop) || split_loop_on_cond (loop)
+	  || split_loop_on_ne_cond (loop))
 	{
 	  /* Mark our containing loop as having had some split inner loops.  */
 	  loop_outer (loop)->aux = loop;
-- 
2.17.1


^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2021-05-14 14:58 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-04-29  9:50 [PATCH] split loop for NE condition Jiufu Guo
2021-04-30 16:27 ` Jeff Law
2021-05-06  1:05   ` guojiufu
2021-04-30 21:37 ` Segher Boessenkool
2021-05-06  1:37   ` guojiufu
2021-05-03 12:18 ` Richard Biener
2021-05-06  7:57   ` guojiufu
2021-05-06  8:27     ` Richard Biener
2021-05-07  8:27       ` guojiufu
2021-05-07  9:52         ` Richard Biener
2021-05-14 14:58           ` [RFC] " guojiufu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).