public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] Unswitching outer loops.
@ 2015-07-10 10:03 Yuri Rumyantsev
  2015-07-14 11:07 ` Richard Biener
  0 siblings, 1 reply; 17+ messages in thread
From: Yuri Rumyantsev @ 2015-07-10 10:03 UTC (permalink / raw)
  To: gcc-patches, Richard Biener, Igor Zamyatin

[-- Attachment #1: Type: text/plain, Size: 1136 bytes --]

Hi All,

Here is presented simple transformation which tries to hoist out of
outer-loop a check on zero trip count for inner-loop. This is very
restricted transformation since it accepts outer-loops with very
simple cfg, as for example:
    acc = 0;
   for (i = 1; i <= m; i++) {
      for (j = 0; j < n; j++)
         if (l[j] == i) { v[j] = acc; acc++; };
      acc <<= 1;
   }

Note that degenerative outer loop (without inner loop) will be
completely deleted as dead code.
The main goal of this transformation was to convert outer-loop to form
accepted by outer-loop vectorization (such test-case is also included
to patch).

Bootstrap and regression testing did not show any new failures.

Is it OK for trunk?

ChangeLog:
2015-07-10  Yuri Rumyantsev  <ysrumyan@gmail.com>

* tree-ssa-loop-unswitch.c: Include "tree-cfgcleanup.h" and
"gimple-iterator.h", add prototype for tree_unswitch_outer_loop.
(tree_ssa_unswitch_loops): Add invoke of tree_unswitch_outer_loop.
(tree_unswitch_outer_loop): New function.

gcc/testsuite/ChangeLog:
* gcc.dg/tree-ssa/unswitch-outer-loop-1.c: New test.
* gcc.dg/vect/vect-outer-simd-3.c: New test.

[-- Attachment #2: patch.3 --]
[-- Type: application/octet-stream, Size: 9639 bytes --]

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/unswitch-outer-loop-1.c b/gcc/testsuite/gcc.dg/tree-ssa/unswitch-outer-loop-1.c
new file mode 100755
index 0000000..296d18f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/unswitch-outer-loop-1.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -funswitch-loops -fdump-tree-unswitch-details" } */
+
+void foo (int *v, int n, int m, int *l)
+{
+   int acc, i, j;
+
+   acc = 0;
+   for (i = 1; i <= m; i++) {
+      for (j = 0; j < n; j++)
+	if (l[j] == i)
+	  {
+	    v[j] = acc; acc++;
+	  }
+      acc <<= 1;
+   }
+}
+
+/* Outer loop should be unswitched.  */
+/* { dg-final { scan-tree-dump "Unswitching outer loop" "unswitch" } } */
+
diff --git a/gcc/testsuite/gcc.dg/vect/vect-outer-simd-3.c b/gcc/testsuite/gcc.dg/vect/vect-outer-simd-3.c
new file mode 100644
index 0000000..891afbc
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-outer-simd-3.c
@@ -0,0 +1,78 @@
+/* { dg-require-effective-target vect_simd_clones } */
+/* { dg-additional-options "-funswitch-loops -fopenmp-simd -ffast-math" } */
+#include <stdlib.h>
+#include "tree-vect.h"
+#define N 64
+
+float *px, *py;
+float *tx, *ty;
+float *x1, *z1, *t1, *t2;
+int bound[N];
+
+static void inline bar(const float cx, float cy,
+                         float *vx, float *vy, int n)
+{
+  int j;
+    for (j = 0; j < n; ++j)
+    {
+        const float dx  = cx - px[j];
+        const float dy  = cy - py[j];
+        *vx               -= dx * tx[j];
+        *vy               -= dy * ty[j];
+    }
+}
+
+__attribute__((noinline, noclone)) void foo1 ()
+{
+  int i;
+  int n = bound[63];
+#pragma omp simd
+  for (i=0; i<N; i++)
+    bar(px[i], py[i], x1+i, z1+i, n);
+}
+
+__attribute__((noinline, noclone)) void foo2 ()
+{
+  volatile int i;
+  int n = bound[63];
+  for (i=0; i<N; i++)
+    bar(px[i], py[i], x1+i, z1+i, n);
+}
+
+
+int main()
+{
+  float *X = (float*)malloc(N * 8 * sizeof (float));
+  int i;
+  check_vect ();
+  px = &X[0];
+  py = &X[N * 1];
+  tx = &X[N * 2];
+  ty = &X[N * 3];
+  x1 = &X[N * 4];
+  z1 = &X[N * 5];
+  t1 = &X[N * 6];
+  t2 = &X[N * 7];
+
+  for (i=0; i<N; i++)
+    {
+      px[i] = (float) (i+2);
+      tx[i] = (float) (i+1);
+      py[i] = (float) (i+4);
+      ty[i] = (float) (i+3);
+      x1[i] = z1[i] = 1.0f;
+      bound[i] = i + 1;
+    }
+  foo1 ();  /* vector variant.  */
+  for (i=0; i<N;i++)
+    {
+      t1[i] = x1[i]; x1[i] = 1.0f;
+      t2[i] = z1[i]; z1[i] = 1.0f;
+    }
+  foo2 ();  /* scalar variant.  */
+  for (i=0; i<N; i++)
+    if (x1[i] != t1[i] || z1[i] != t2[i])
+      abort ();
+  return 0;
+}
+/* { dg-final { scan-tree-dump "OUTER LOOP VECTORIZED" "vect" } } */
diff --git a/gcc/tree-ssa-loop-unswitch.c b/gcc/tree-ssa-loop-unswitch.c
index 0be9142..befde7a 100644
--- a/gcc/tree-ssa-loop-unswitch.c
+++ b/gcc/tree-ssa-loop-unswitch.c
@@ -48,6 +48,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "params.h"
 #include "tree-pass.h"
 #include "tree-inline.h"
+#include "tree-cfgcleanup.h"
+#include "gimple-iterator.h"
 
 /* This file implements the loop unswitching, i.e. transformation of loops like
 
@@ -88,6 +90,7 @@ along with GCC; see the file COPYING3.  If not see
 static struct loop *tree_unswitch_loop (struct loop *, basic_block, tree);
 static bool tree_unswitch_single_loop (struct loop *, int);
 static tree tree_may_unswitch_on (basic_block, struct loop *);
+static bool tree_unswitch_outer_loop (struct loop *);
 
 /* Main entry point.  Perform loop unswitching on all suitable loops.  */
 
@@ -112,6 +115,13 @@ tree_ssa_unswitch_loops (void)
           continue;
         }
 
+      /* Try to unswitch outer loop.  */
+      if (tree_unswitch_outer_loop (loop))
+	{
+	  changed = true;
+	  continue;
+	}
+
       /* The loop should not be too large, to limit code growth. */
       if (tree_num_loop_insns (loop, &eni_size_weights)
           > (unsigned) PARAM_VALUE (PARAM_MAX_UNSWITCH_INSNS))
@@ -412,6 +422,180 @@ tree_unswitch_loop (struct loop *loop,
 		       REG_BR_PROB_BASE - prob_true, false);
 }
 
+/* Try to unswitch outer loop if any to support vectorization of outer loop
+   with non-constant invariant upper bound of innermost loop.  In fact it tries
+   to hoist out a check on zero trip count for innermost loop.
+   Returns true if unswitching took place.  */
+static bool
+tree_unswitch_outer_loop (struct loop *inner_loop)
+{
+  struct loop *loop = loop_outer (inner_loop);
+  basic_block header;
+  basic_block inner_header;
+  gimple stmt;
+  basic_block exit_bb;
+  tree cond, cond_new;
+  ssa_op_iter iter;
+  tree use;
+  basic_block def_bb, merge_bb;
+  gimple def;
+  struct loop *nloop;
+  unsigned prob_true;
+  edge edge_true, edge_false;
+  gcond *cond_stmt;
+  gphi_iterator gsi;
+
+  if (!loop || loop->next)
+    return false;
+
+  if (!single_exit (inner_loop) || !single_exit (loop))
+    return false;
+  header = loop->header;
+  stmt = last_stmt (header);
+  if (!stmt || gimple_code (stmt) != GIMPLE_COND)
+    return false;
+  /* Determine exit bb and header of inner loop.  */
+  inner_header = EDGE_SUCC (header, 0)->dest;
+  if (!flow_bb_inside_loop_p (loop, inner_header))
+    return false;
+  /* Skip empty bb.  */
+  if (empty_block_p (inner_header))
+    {
+      if (!single_succ_p (inner_header))
+	return false;
+      inner_header = single_succ (inner_header);
+    }
+  exit_bb = EDGE_SUCC (header, 1)->dest;
+  if (!flow_bb_inside_loop_p (loop, exit_bb))
+    return false;
+  if (empty_block_p (exit_bb))
+    {
+      if (!single_succ_p (exit_bb))
+	return false;
+      exit_bb = single_succ (exit_bb);
+    }
+  if (inner_header != inner_loop->header)
+    {
+      basic_block tmp = inner_header;
+      if (exit_bb != inner_loop->header)
+	return false;
+      inner_header = exit_bb;
+      exit_bb = tmp;
+    }
+  if (EDGE_COUNT (exit_bb->succs) != 2)
+    return false;;
+  gcc_assert (last_stmt (inner_header));
+  if (EDGE_COUNT (exit_bb->preds) != 2)
+    return false;
+  gcc_assert (last_stmt (exit_bb));
+  if (!loop_exit_edge_p (loop, EDGE_SUCC (exit_bb, 0)))
+    {
+      if (!loop_exit_edge_p (loop, EDGE_SUCC (exit_bb, 1))
+	  || EDGE_SUCC (exit_bb, 0)->dest != loop->latch)
+	return false;
+    }
+  else if (EDGE_SUCC (exit_bb, 1)->dest != loop->latch)
+    return false;
+  /* Is condition loop invariant?  */
+  FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
+    {
+      def = SSA_NAME_DEF_STMT (use);
+      def_bb = gimple_bb (def);
+      if (def_bb && flow_bb_inside_loop_p (loop, def_bb))
+	return false;
+    }
+
+  cond = build2 (gimple_cond_code(stmt), boolean_type_node,
+		 gimple_cond_lhs (stmt), gimple_cond_rhs (stmt));
+  if (dump_file && (dump_flags & TDF_DETAILS))
+    fprintf (dump_file, ";; Unswitching outer loop\n");
+  initialize_original_copy_tables ();
+  extract_true_false_edges_from_block (header, &edge_true, &edge_false);
+  prob_true = edge_true->probability;
+  nloop = loop_version (loop, unshare_expr (cond), NULL, prob_true, prob_true,
+			REG_BR_PROB_BASE - prob_true, false);
+  if (!nloop)
+    {
+      if (dump_file && (dump_flags & TDF_DETAILS))
+	fprintf(dump_file, "Can't create version!\n");
+      free_original_copy_tables ();
+      return false;
+    }
+
+  if (!nloop->force_vectorize)
+    nloop->force_vectorize = true;
+  if (loop->safelen != 0)
+    nloop->safelen = loop->safelen;
+  header = loop->header;
+  stmt = last_stmt (header);
+  cond_new = simplify_using_entry_checks (loop, cond);
+  cond_stmt = as_a <gcond *> (stmt);
+  if (integer_nonzerop (cond_new))
+    gimple_cond_set_condition_from_tree (cond_stmt, boolean_true_node);
+  else if (integer_zerop (cond_new))
+    gimple_cond_set_condition_from_tree (cond_stmt, boolean_false_node);
+  else
+    gcc_unreachable ();
+
+  header = nloop->header;
+  stmt = last_stmt (header);
+  cond_new = simplify_using_entry_checks (nloop, cond);
+  cond_stmt = as_a <gcond *> (stmt);
+  if (integer_nonzerop (cond_new))
+    gimple_cond_set_condition_from_tree (cond_stmt, boolean_true_node);
+  else if (integer_zerop (cond_new))
+    gimple_cond_set_condition_from_tree (cond_stmt, boolean_false_node);
+  else
+    gcc_unreachable ();
+
+  merge_bb = single_exit (loop)->dest;
+  if (EDGE_COUNT (merge_bb->preds) >= 2)
+    split_edge (single_exit (loop));
+  /* Clean-up cfg to remove useless one-argument phi in exit block of
+     outer-loop.  */
+  cleanup_tree_cfg ();
+  mark_virtual_operands_for_renaming (cfun);
+  update_ssa (TODO_update_ssa);
+
+  /* Determine which loop is loop nest.  */
+  if (!loop->inner)
+    {
+      gcc_assert (nloop->inner);
+      loop = nloop;
+    }
+  exit_bb = single_exit (loop)->src;
+  gcc_assert (EDGE_COUNT (exit_bb->preds) == 1);
+  /* Perform substitution of rhs of phi nodes aka copy propagation since
+     vectorizer does not accept phi nodes in non-header bb.  */
+  for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi);)
+    {
+      gphi *phi;
+      tree arg;
+      gimple stmt;
+      imm_use_iterator imm_iter;
+      use_operand_p use_p;
+
+      phi = gsi.phi ();
+      arg = gimple_phi_arg_def (phi, 0);
+      if (TREE_CODE (arg) == SSA_NAME)
+	{
+	  if (dump_file && (dump_flags & TDF_DETAILS))
+	    fprintf(dump_file, "Remove phi in exit block of outer-loop\n");
+	  FOR_EACH_IMM_USE_STMT (stmt, imm_iter, PHI_RESULT (phi))
+	    {
+	      if (stmt != phi)
+		FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
+		  SET_USE (use_p, arg);
+	    }
+          remove_phi_node (&gsi, true);
+	}
+      else
+	gsi_next (&gsi);
+    }
+  free_original_copy_tables ();
+  return true;
+}
+
 /* Loop unswitching pass.  */
 
 namespace {

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2015-10-09 19:05 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-07-10 10:03 [PATCH] Unswitching outer loops Yuri Rumyantsev
2015-07-14 11:07 ` Richard Biener
2015-07-23 15:21   ` Yuri Rumyantsev
2015-07-28 11:00     ` Richard Biener
2015-07-31 12:07       ` Yuri Rumyantsev
2015-07-31 15:54         ` Jeff Law
2015-08-03  7:27         ` Richard Biener
     [not found]           ` <CAEoMCqSorkh1WmFtVB_huC2hbcVr8uc1EYaRaNVe1g+5hVuzPw@mail.gmail.com>
     [not found]             ` <CAFiYyc1nCCyF-4BH2hPWkKpmXnaQFQ34RMM5TTuHjZxZ25crrA@mail.gmail.com>
     [not found]               ` <CAEoMCqSRsER9ZGgnX9eJgZJyN4EwkpxzWWk1FHRxWNiEW0HVCg@mail.gmail.com>
     [not found]                 ` <CAFiYyc2O9i690A0LZ0+jEOP8nkyz8Btc0YAb469aMgnRaVsmsQ@mail.gmail.com>
2015-09-30 11:40                   ` Yuri Rumyantsev
2015-10-05 10:57                     ` Richard Biener
2015-10-05 13:13                       ` Yuri Rumyantsev
2015-10-06  7:59                         ` Richard Biener
2015-10-06 11:41                           ` Yuri Rumyantsev
2015-10-06 12:21                             ` Richard Biener
2015-10-07  9:53                               ` Yuri Rumyantsev
2015-10-07 15:26                                 ` Yuri Rumyantsev
2015-10-08 12:31                                   ` Richard Biener
2015-10-09 19:05                                 ` H.J. Lu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).