public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] tree-optimization/102155 - fix LIM fill_always_executed_in CFG walk
@ 2021-09-01  9:58 Richard Biener
  2021-09-02  5:09 ` Xionghu Luo
  0 siblings, 1 reply; 15+ messages in thread
From: Richard Biener @ 2021-09-01  9:58 UTC (permalink / raw)
  To: gcc-patches; +Cc: luoxhu

This fixes the CFG walk order of fill_always_executed_in to use
RPO oder rather than the dominator based order computed by
get_loop_body_in_dom_order.  That fixes correctness issues with
unordered dominator children.

The RPO order computed by rev_post_order_and_mark_dfs_back_seme in
its for-iteration mode is a good match for the algorithm.

Xionghu, I've tried to only fix the CFG walk order issue and not
change anything else with this so we have a more correct base
to work against.  The code still walks inner loop bodies
up to loop depth times and thus is quadratic in the loop depth.

Bootstrapped and tested on x86_64-unknown-linux-gnu, if you don't
have any comments I plan to push this and then revisit what we
were circling around.

Richard.

2021-09-01  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/102155
	* tree-ssa-loop-im.c (fill_always_executed_in_1): Iterate
	over a part of the RPO array and do not recurse here.
	Dump blocks marked as always executed.
	(fill_always_executed_in): Walk over the RPO array and
	process loops whose header we run into.
	(loop_invariant_motion_in_fun): Compute the first RPO
	using rev_post_order_and_mark_dfs_back_seme in iteration
	order and pass that to fill_always_executed_in.
---
 gcc/tree-ssa-loop-im.c | 136 ++++++++++++++++++++++-------------------
 1 file changed, 73 insertions(+), 63 deletions(-)

diff --git a/gcc/tree-ssa-loop-im.c b/gcc/tree-ssa-loop-im.c
index d9f75d5025e..f3706dcdb8a 100644
--- a/gcc/tree-ssa-loop-im.c
+++ b/gcc/tree-ssa-loop-im.c
@@ -3025,77 +3025,74 @@ do_store_motion (void)
 /* Fills ALWAYS_EXECUTED_IN information for basic blocks of LOOP, i.e.
    for each such basic block bb records the outermost loop for that execution
    of its header implies execution of bb.  CONTAINS_CALL is the bitmap of
-   blocks that contain a nonpure call.  */
+   blocks that contain a nonpure call.  The blocks of LOOP start at index
+   START of the RPO array of size N.  */
 
 static void
-fill_always_executed_in_1 (class loop *loop, sbitmap contains_call)
+fill_always_executed_in_1 (function *fun, class loop *loop,
+			   int *rpo, int start, int n, sbitmap contains_call)
 {
-  basic_block bb = NULL, *bbs, last = NULL;
-  unsigned i;
-  edge e;
+  basic_block last = NULL;
   class loop *inn_loop = loop;
 
-  if (ALWAYS_EXECUTED_IN (loop->header) == NULL)
+  for (int i = start; i < n; i++)
     {
-      bbs = get_loop_body_in_dom_order (loop);
-
-      for (i = 0; i < loop->num_nodes; i++)
-	{
-	  edge_iterator ei;
-	  bb = bbs[i];
-
-	  if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
-	    last = bb;
+      basic_block bb = BASIC_BLOCK_FOR_FN (fun, rpo[i]);
+      /* Stop when we iterated over all blocks in this loop.  */
+      if (!flow_bb_inside_loop_p (loop, bb))
+	break;
 
-	  if (bitmap_bit_p (contains_call, bb->index))
-	    break;
+      if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
+	last = bb;
 
-	  FOR_EACH_EDGE (e, ei, bb->succs)
-	    {
-	      /* If there is an exit from this BB.  */
-	      if (!flow_bb_inside_loop_p (loop, e->dest))
-		break;
-	      /* Or we enter a possibly non-finite loop.  */
-	      if (flow_loop_nested_p (bb->loop_father,
-				      e->dest->loop_father)
-		  && ! finite_loop_p (e->dest->loop_father))
-		break;
-	    }
-	  if (e)
-	    break;
+      if (bitmap_bit_p (contains_call, bb->index))
+	break;
 
-	  /* A loop might be infinite (TODO use simple loop analysis
-	     to disprove this if possible).  */
-	  if (bb->flags & BB_IRREDUCIBLE_LOOP)
+      edge_iterator ei;
+      edge e;
+      FOR_EACH_EDGE (e, ei, bb->succs)
+	{
+	  /* If there is an exit from this BB.  */
+	  if (!flow_bb_inside_loop_p (loop, e->dest))
 	    break;
-
-	  if (!flow_bb_inside_loop_p (inn_loop, bb))
+	  /* Or we enter a possibly non-finite loop.  */
+	  if (flow_loop_nested_p (bb->loop_father,
+				  e->dest->loop_father)
+	      && ! finite_loop_p (e->dest->loop_father))
 	    break;
+	}
+      if (e)
+	break;
 
-	  if (bb->loop_father->header == bb)
-	    {
-	      if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
-		break;
+      /* A loop might be infinite (TODO use simple loop analysis
+	 to disprove this if possible).  */
+      if (bb->flags & BB_IRREDUCIBLE_LOOP)
+	break;
 
-	      /* In a loop that is always entered we may proceed anyway.
-		 But record that we entered it and stop once we leave it.  */
-	      inn_loop = bb->loop_father;
-	    }
-	}
+      if (!flow_bb_inside_loop_p (inn_loop, bb))
+	break;
 
-      while (1)
+      if (bb->loop_father->header == bb)
 	{
-	  SET_ALWAYS_EXECUTED_IN (last, loop);
-	  if (last == loop->header)
+	  if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
 	    break;
-	  last = get_immediate_dominator (CDI_DOMINATORS, last);
-	}
 
-      free (bbs);
+	  /* In a loop that is always entered we may proceed anyway.
+	     But record that we entered it and stop once we leave it.  */
+	  inn_loop = bb->loop_father;
+	}
     }
 
-  for (loop = loop->inner; loop; loop = loop->next)
-    fill_always_executed_in_1 (loop, contains_call);
+  while (1)
+    {
+      SET_ALWAYS_EXECUTED_IN (last, loop);
+      if (dump_enabled_p ())
+	dump_printf (MSG_NOTE, "bb %d is always executed in loop %d\n",
+		     last->index, loop->num);
+      if (last == loop->header)
+	break;
+      last = get_immediate_dominator (CDI_DOMINATORS, last);
+    }
 }
 
 /* Fills ALWAYS_EXECUTED_IN information for basic blocks, i.e.
@@ -3103,14 +3100,13 @@ fill_always_executed_in_1 (class loop *loop, sbitmap contains_call)
    of its header implies execution of bb.  */
 
 static void
-fill_always_executed_in (void)
+fill_always_executed_in (function *fun, int *rpo, int n)
 {
   basic_block bb;
-  class loop *loop;
 
-  auto_sbitmap contains_call (last_basic_block_for_fn (cfun));
+  auto_sbitmap contains_call (last_basic_block_for_fn (fun));
   bitmap_clear (contains_call);
-  FOR_EACH_BB_FN (bb, cfun)
+  FOR_EACH_BB_FN (bb, fun)
     {
       gimple_stmt_iterator gsi;
       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
@@ -3123,8 +3119,18 @@ fill_always_executed_in (void)
 	bitmap_set_bit (contains_call, bb->index);
     }
 
-  for (loop = current_loops->tree_root->inner; loop; loop = loop->next)
-    fill_always_executed_in_1 (loop, contains_call);
+  /* The RPO order we iterate over is one that visits all blocks of a CFG
+     cycle before leaving it.  That means we can visit a loop once we
+     run into its header and we can skip it if it was determined as always
+     entering when proccessing the containing loop.  */
+  for (int i = 0; i < n; ++i)
+    {
+      basic_block bb = BASIC_BLOCK_FOR_FN (fun, rpo[i]);
+      if (bb->loop_father->header == bb
+	  && !ALWAYS_EXECUTED_IN (bb))
+	fill_always_executed_in_1 (fun, bb->loop_father,
+				   rpo, i, n, contains_call);
+    }
 }
 
 
@@ -3227,23 +3233,27 @@ loop_invariant_motion_in_fun (function *fun, bool store_motion)
   /* Gathers information about memory accesses in the loops.  */
   analyze_memory_references (store_motion);
 
-  /* Fills ALWAYS_EXECUTED_IN information for basic blocks.  */
-  fill_always_executed_in ();
+  int *rpo = XNEWVEC (int, n_basic_blocks_for_fn (fun) - NUM_FIXED_BLOCKS);
+  auto_bitmap exit_bbs;
+  bitmap_set_bit (exit_bbs, EXIT_BLOCK);
+  edge entry = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (fun));
+  int n = rev_post_order_and_mark_dfs_back_seme (fun, entry, exit_bbs, true,
+						 rpo, NULL);
 
-  int *rpo = XNEWVEC (int, last_basic_block_for_fn (fun));
-  int n = pre_and_rev_post_order_compute_fn (fun, NULL, rpo, false);
+  /* Fills ALWAYS_EXECUTED_IN information for basic blocks.  */
+  fill_always_executed_in (fun, rpo, n);
 
   /* For each statement determine the outermost loop in that it is
      invariant and cost for computing the invariant.  */
   for (int i = 0; i < n; ++i)
     compute_invariantness (BASIC_BLOCK_FOR_FN (fun, rpo[i]));
+  free (rpo);
 
   /* Execute store motion.  Force the necessary invariants to be moved
      out of the loops as well.  */
   if (store_motion)
     do_store_motion ();
 
-  free (rpo);
   rpo = XNEWVEC (int, last_basic_block_for_fn (fun));
   n = pre_and_rev_post_order_compute_fn (fun, NULL, rpo, false);
 
-- 
2.31.1

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2021-09-14  7:30 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-09-01  9:58 [PATCH] tree-optimization/102155 - fix LIM fill_always_executed_in CFG walk Richard Biener
2021-09-02  5:09 ` Xionghu Luo
2021-09-02  8:00   ` Richard Biener
2021-09-02  8:50     ` Richard Biener
2021-09-02 10:00       ` Xionghu Luo
2021-09-02 10:37         ` Richard Biener
2021-09-09  6:40           ` Xionghu Luo
2021-09-09  9:16             ` Richard Biener
2021-09-09 10:55               ` Richard Biener
2021-09-10 13:54                 ` Xionghu Luo
2021-09-13  2:28                   ` Xionghu Luo
2021-09-13  8:17                     ` Richard Biener
2021-09-14  1:15                       ` Xionghu Luo
2021-09-14  7:30                         ` Richard Biener
2021-09-13  6:55                   ` Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).