public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r13-3875] unswitching of outer loops
@ 2022-11-10 10:53 Richard Biener
  0 siblings, 0 replies; only message in thread
From: Richard Biener @ 2022-11-10 10:53 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:9e11ceef165bc074c2fc85b8ddece606b24e710b

commit r13-3875-g9e11ceef165bc074c2fc85b8ddece606b24e710b
Author: Richard Biener <rguenther@suse.de>
Date:   Tue Sep 27 10:16:52 2022 +0200

    unswitching of outer loops
    
    This allows loop unswitching to unswitch outer loops conditions are
    invariant in.  We restrict ourselves to unswitch conditions in innermost
    loops and will only unswitch loop nests that do not contain any sibling loops.
    To simplify the implementation the loop nest unswitched is the deepest all
    unswitching candidates are invariant in.
    
    For 507.cactuBSSN_r it can be observed we unswitch the outer loops
    of the compute kernels for the fdOrder parameter.  It seems to be within
    the existing growth limitations to perform the unswitchings, a performance
    benefit is not seen.
    
            * tree-ssa-loop-unswitch.cc (init_loop_unswitch_info): First collect
            candidates and determine the outermost loop to unswitch.
            (tree_ssa_unswitch_loops): First perform all guard hoisting,
            then perform unswitching on innermost loop predicates.
            (find_unswitching_predicates_for_bb): Keep track of the
            outermost loop to unswitch.
            (evaluate_bbs): Adjust exit test.
            (tree_unswitch_single_loop): Dump whether we unswitched an outer
            loop.
            (tree_unswitch_loop): Remove assert we unswitch only innermost
            loops.
    
            * gcc.dg/loop-unswitch-18.c: New testcase.
            * gcc.dg/tree-ssa/loopclosedphi.c: Disable unswitching,
            adjust expected counts.
            * gcc.dg/torture/pr71462.c: Add -w to ignore undefined
            behavior diagnostics after now unswitching outer loops.

Diff:
---
 gcc/testsuite/gcc.dg/loop-unswitch-18.c       |  13 ++
 gcc/testsuite/gcc.dg/torture/pr71462.c        |   1 +
 gcc/testsuite/gcc.dg/tree-ssa/loopclosedphi.c |   4 +-
 gcc/tree-ssa-loop-unswitch.cc                 | 203 ++++++++++++++++----------
 4 files changed, 140 insertions(+), 81 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/loop-unswitch-18.c b/gcc/testsuite/gcc.dg/loop-unswitch-18.c
new file mode 100644
index 00000000000..91dc2014922
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/loop-unswitch-18.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-unswitch-optimized" } */
+
+void bar();
+void foo (int x, int n, int m)
+{
+  for (int i = 0; i < n; ++i)
+    for (int j = 0; j < m; ++j)
+      if (x)
+        bar ();
+}
+
+/* { dg-final { scan-tree-dump "unswitching outer loop" "unswitch" } } */
diff --git a/gcc/testsuite/gcc.dg/torture/pr71462.c b/gcc/testsuite/gcc.dg/torture/pr71462.c
index 390b88673e3..996596321ca 100644
--- a/gcc/testsuite/gcc.dg/torture/pr71462.c
+++ b/gcc/testsuite/gcc.dg/torture/pr71462.c
@@ -1,4 +1,5 @@
 /* { dg-do compile } */
+/* { dg-additional-options "-w" } */
 
 short a;
 long b;
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loopclosedphi.c b/gcc/testsuite/gcc.dg/tree-ssa/loopclosedphi.c
index d71b757fbca..0a8a4174f5f 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/loopclosedphi.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/loopclosedphi.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O3 -fno-tree-ch -w -fdump-tree-loopdone-details" } */
+/* { dg-options "-O3 -fno-tree-ch -fno-unswitch-loops -w -fdump-tree-loopdone-details" } */
 
 void
 t6 (int qz, int wh)
@@ -18,4 +18,4 @@ t6 (int qz, int wh)
     qz = jl * wh;
 }
 
-/* { dg-final { scan-tree-dump-times "Replacing" 2 "loopdone"} } */
+/* { dg-final { scan-tree-dump-times "Replacing" 3 "loopdone"} } */
diff --git a/gcc/tree-ssa-loop-unswitch.cc b/gcc/tree-ssa-loop-unswitch.cc
index dfe75f52f12..186ae953f04 100644
--- a/gcc/tree-ssa-loop-unswitch.cc
+++ b/gcc/tree-ssa-loop-unswitch.cc
@@ -217,6 +217,7 @@ static bool tree_unswitch_single_loop (class loop *, dump_user_location_t,
 				       basic_block = NULL);
 static void
 find_unswitching_predicates_for_bb (basic_block bb, class loop *loop,
+				    class loop *&outer_loop,
 				    vec<unswitch_predicate *> &candidates,
 				    unswitch_predicate *&hottest,
 				    basic_block &hottest_bb);
@@ -249,36 +250,31 @@ set_predicates_for_bb (basic_block bb, vec<unswitch_predicate *> predicates)
 }
 
 /* Initialize LOOP information reused during the unswitching pass.
-   Return total number of instructions in the loop.  */
+   Return total number of instructions in the loop.  Adjusts LOOP to
+   the outermost loop all candidates are invariant in.  */
 
 static unsigned
-init_loop_unswitch_info (class loop *loop, unswitch_predicate *&hottest,
+init_loop_unswitch_info (class loop *&loop, unswitch_predicate *&hottest,
 			 basic_block &hottest_bb)
 {
   unsigned total_insns = 0;
 
-  /* Calculate instruction count.  */
   basic_block *bbs = get_loop_body (loop);
-  for (unsigned i = 0; i < loop->num_nodes; i++)
-    {
-      unsigned insns = 0;
-      for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi);
-	   gsi_next (&gsi))
-	insns += estimate_num_insns (gsi_stmt (gsi), &eni_size_weights);
-
-      bbs[i]->aux = (void *)(uintptr_t)insns;
-      total_insns += insns;
-    }
 
+  /* Unswitch only nests with no sibling loops.  */
+  class loop *outer_loop = loop;
+  while (loop_outer (outer_loop)->num != 0
+	 && !loop_outer (outer_loop)->inner->next)
+    outer_loop = loop_outer (outer_loop);
   hottest = NULL;
   hottest_bb = NULL;
-  /* Find all unswitching candidates.  */
+  /* Find all unswitching candidates in the innermost loop.  */
   for (unsigned i = 0; i != loop->num_nodes; i++)
     {
       /* Find a bb to unswitch on.  */
       vec<unswitch_predicate *> candidates;
       candidates.create (1);
-      find_unswitching_predicates_for_bb (bbs[i], loop, candidates,
+      find_unswitching_predicates_for_bb (bbs[i], loop, outer_loop, candidates,
 					  hottest, hottest_bb);
       if (!candidates.is_empty ())
 	set_predicates_for_bb (bbs[i], candidates);
@@ -291,8 +287,34 @@ init_loop_unswitch_info (class loop *loop, unswitch_predicate *&hottest,
 	}
     }
 
+  if (outer_loop != loop)
+    {
+      free (bbs);
+      bbs = get_loop_body (outer_loop);
+    }
+
+  /* Calculate instruction count.  */
+  for (unsigned i = 0; i < outer_loop->num_nodes; i++)
+    {
+      unsigned insns = 0;
+      for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi);
+	   gsi_next (&gsi))
+	insns += estimate_num_insns (gsi_stmt (gsi), &eni_size_weights);
+      /* No predicates to unswitch on in the outer loops.  */
+      if (!flow_bb_inside_loop_p (loop, bbs[i]))
+	{
+	  gimple *last = last_stmt (bbs[i]);
+	  if (last != NULL)
+	    gimple_set_uid (last, 0);
+	}
+
+      bbs[i]->aux = (void *)(uintptr_t)insns;
+      total_insns += insns;
+    }
+
   free (bbs);
 
+  loop = outer_loop;
   return total_insns;
 }
 
@@ -307,72 +329,74 @@ tree_ssa_unswitch_loops (function *fun)
 
   ranger = enable_ranger (fun);
 
-  /* Go through all loops starting from innermost.  */
+  /* Go through all loops starting from innermost, hoisting guards.  */
   for (auto loop : loops_list (fun, LI_FROM_INNERMOST))
     {
-      if (!loop->inner)
-	{
-	  /* Perform initial tests if unswitch is eligible.  */
-	  dump_user_location_t loc = find_loop_location (loop);
+      if (loop->inner)
+	changed_hoist |= tree_unswitch_outer_loop (loop);
+    }
 
-	  /* Do not unswitch in cold regions. */
-	  if (optimize_loop_for_size_p (loop))
-	    {
-	      if (dump_enabled_p ())
-		dump_printf_loc (MSG_NOTE, loc,
-				 "Not unswitching cold loops\n");
-	      continue;
-	    }
+  /* Go through innermost loops, unswitching on invariant predicates
+     within those.  */
+  for (auto loop : loops_list (fun, LI_ONLY_INNERMOST))
+    {
+      /* Perform initial tests if unswitch is eligible.  */
+      dump_user_location_t loc = find_loop_location (loop);
 
-	  /* If the loop is not expected to iterate, there is no need
-	     for unswitching.  */
-	  HOST_WIDE_INT iterations = estimated_loop_iterations_int (loop);
-	  if (iterations < 0)
-	    iterations = likely_max_loop_iterations_int (loop);
-	  if (iterations >= 0 && iterations <= 1)
-	    {
-	      if (dump_enabled_p ())
-		dump_printf_loc (MSG_NOTE, loc,
-				 "Not unswitching, loop is not expected"
-				 " to iterate\n");
-	      continue;
-	    }
+      /* Do not unswitch in cold regions. */
+      if (optimize_loop_for_size_p (loop))
+	{
+	  if (dump_enabled_p ())
+	    dump_printf_loc (MSG_NOTE, loc,
+			     "Not unswitching cold loops\n");
+	  continue;
+	}
 
-	  bb_predicates = new vec<vec<unswitch_predicate *>> ();
-	  bb_predicates->safe_push (vec<unswitch_predicate *> ());
-	  unswitch_predicate::predicates = new vec<unswitch_predicate *> ();
-
-	  /* Unswitch innermost loop.  */
-	  unswitch_predicate *hottest;
-	  basic_block hottest_bb;
-	  unsigned int loop_size = init_loop_unswitch_info (loop, hottest,
-							    hottest_bb);
-	  unsigned int budget = loop_size + param_max_unswitch_insns;
-
-	  predicate_vector predicate_path;
-	  predicate_path.create (8);
-	  auto_bitmap handled;
-	  changed_unswitch
-	    |= tree_unswitch_single_loop (loop, loc, predicate_path,
-					  loop_size, budget,
-					  ignored_edge_flag, handled,
-					  hottest, hottest_bb);
-	  predicate_path.release ();
-
-	  for (auto predlist : bb_predicates)
-	    predlist.release ();
-	  bb_predicates->release ();
-	  delete bb_predicates;
-	  bb_predicates = NULL;
-
-	  for (auto pred : unswitch_predicate::predicates)
-	    delete pred;
-	  unswitch_predicate::predicates->release ();
-	  delete unswitch_predicate::predicates;
-	  unswitch_predicate::predicates = NULL;
+      /* If the loop is not expected to iterate, there is no need
+	 for unswitching.  */
+      HOST_WIDE_INT iterations = estimated_loop_iterations_int (loop);
+      if (iterations < 0)
+	iterations = likely_max_loop_iterations_int (loop);
+      if (iterations >= 0 && iterations <= 1)
+	{
+	  if (dump_enabled_p ())
+	    dump_printf_loc (MSG_NOTE, loc,
+			     "Not unswitching, loop is not expected"
+			     " to iterate\n");
+	  continue;
 	}
-      else
-	changed_hoist |= tree_unswitch_outer_loop (loop);
+
+      bb_predicates = new vec<vec<unswitch_predicate *>> ();
+      bb_predicates->safe_push (vec<unswitch_predicate *> ());
+      unswitch_predicate::predicates = new vec<unswitch_predicate *> ();
+
+      /* Unswitch loop.  */
+      unswitch_predicate *hottest;
+      basic_block hottest_bb;
+      unsigned int loop_size = init_loop_unswitch_info (loop, hottest,
+							hottest_bb);
+      unsigned int budget = loop_size + param_max_unswitch_insns;
+
+      predicate_vector predicate_path;
+      predicate_path.create (8);
+      auto_bitmap handled;
+      changed_unswitch |= tree_unswitch_single_loop (loop, loc, predicate_path,
+						     loop_size, budget,
+						     ignored_edge_flag, handled,
+						     hottest, hottest_bb);
+      predicate_path.release ();
+
+      for (auto predlist : bb_predicates)
+	predlist.release ();
+      bb_predicates->release ();
+      delete bb_predicates;
+      bb_predicates = NULL;
+
+      for (auto pred : unswitch_predicate::predicates)
+	delete pred;
+      unswitch_predicate::predicates->release ();
+      delete unswitch_predicate::predicates;
+      unswitch_predicate::predicates = NULL;
     }
 
   disable_ranger (fun);
@@ -463,10 +487,13 @@ is_maybe_undefined (const tree name, gimple *stmt, class loop *loop)
 
 /* Checks whether we can unswitch LOOP on condition at end of BB -- one of its
    basic blocks (for what it means see comments below).
-   All candidates all filled to the provided vector CANDIDATES.  */
+   All candidates all filled to the provided vector CANDIDATES.
+   OUTER_LOOP is updated to the innermost loop all found candidates are
+   invariant in.  */
 
 static void
 find_unswitching_predicates_for_bb (basic_block bb, class loop *loop,
+				    class loop *&outer_loop,
 				    vec<unswitch_predicate *> &candidates,
 				    unswitch_predicate *&hottest,
 				    basic_block &hottest_bb)
@@ -506,6 +533,18 @@ find_unswitching_predicates_for_bb (basic_block bb, class loop *loop,
 	  if (is_maybe_undefined (use, stmt, loop))
 	    return;
 	}
+      /* Narrow OUTER_LOOP.  */
+      if (outer_loop != loop)
+	FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
+	  {
+	    def = SSA_NAME_DEF_STMT (use);
+	    def_bb = gimple_bb (def);
+	    while (outer_loop != loop
+		   && ((def_bb && flow_bb_inside_loop_p (outer_loop, def_bb))
+		       || is_maybe_undefined (use, stmt, outer_loop)))
+	      outer_loop = superloop_at_depth (loop,
+					       loop_depth (outer_loop) + 1);
+	  }
 
       unswitch_predicate *predicate = new unswitch_predicate (stmt);
       candidates.safe_push (predicate);
@@ -535,6 +574,12 @@ find_unswitching_predicates_for_bb (basic_block bb, class loop *loop,
 	 behavior that the original program might never exercise.  */
       if (is_maybe_undefined (idx, stmt, loop))
 	return;
+      /* Narrow OUTER_LOOP.  */
+      while (outer_loop != loop
+	     && ((def_bb && flow_bb_inside_loop_p (outer_loop, def_bb))
+		 || is_maybe_undefined (idx, stmt, outer_loop)))
+	outer_loop = superloop_at_depth (loop,
+					 loop_depth (outer_loop) + 1);
 
       /* Build compound expression for all outgoing edges of the switch.  */
       auto_vec<tree, 16> preds;
@@ -872,7 +917,7 @@ evaluate_bbs (class loop *loop, predicate_vector *predicate_path,
 	{
 	  basic_block dest = e->dest;
 
-	  if (dest->loop_father == loop
+	  if (flow_bb_inside_loop_p (loop, dest)
 	      && !(dest->flags & reachable_flag)
 	      && !(e->flags & flags)
 	      && !ignored_edges.contains (e))
@@ -992,7 +1037,8 @@ tree_unswitch_single_loop (class loop *loop, dump_user_location_t loc,
       if (dump_enabled_p ())
 	{
 	  dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc,
-			   "unswitching loop %d on %qs with condition: %T\n",
+			   "unswitching %sloop %d on %qs with condition: %T\n",
+			   loop->inner ? "outer " : "",
 			   loop->num, predicate->switch_p ? "switch" : "if",
 			   predicate->condition);
 	  dump_printf_loc (MSG_NOTE, loc,
@@ -1064,7 +1110,6 @@ tree_unswitch_loop (class loop *loop, edge edge_true, tree cond)
   /* Some sanity checking.  */
   gcc_assert (flow_bb_inside_loop_p (loop, edge_true->src));
   gcc_assert (EDGE_COUNT (edge_true->src->succs) >= 2);
-  gcc_assert (loop->inner == NULL);
 
   profile_probability prob_true = edge_true->probability;
   return loop_version (loop, unshare_expr (cond),

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2022-11-10 10:53 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-11-10 10:53 [gcc r13-3875] unswitching of outer loops Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).