public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r13-5215] Fix wrong code issues with ipa-sra
@ 2023-01-16 17:16 Jan Hubicka
  0 siblings, 0 replies; only message in thread
From: Jan Hubicka @ 2023-01-16 17:16 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:b1f30bf42d8d47228e52de998f3172b2f5dd7265

commit r13-5215-gb1f30bf42d8d47228e52de998f3172b2f5dd7265
Author: Jan Hubicka <jh@suse.cz>
Date:   Mon Jan 16 18:14:45 2023 +0100

    Fix wrong code issues with ipa-sra
    
    Fix wrong code issues in ipa-sra where we are trying to prove that on every
    execution of a given function a call to other function will happen.  The code
    uses post dominators and makes a wrong query (which passes only for first BB in
    function). Hoever post-dominators are only valid if fake edges for every
    possible reason for fuction execution to terminate are added.
    
    Fixing this using postdominators is somewhat costy since one needs to walk
    whole body and add a lot of fake edges. I ended up implementing a special
    purpose function for this which is also useful in ipa-modref and other places
    that does similar analysis.  One does not need to modify CFG to use it and
    moreover for complex functions it usually stops on first unanalyzed function
    call and ends up being relatively cheap.
    
    Bootstrapped/regtested x86_64-linux, plan to commit it shortly.
    
    gcc/ChangeLog:
    
    2023-01-16  Jan Hubicka  <hubicka@ucw.cz>
    
            PR ipa/106077
            * ipa-modref.cc (modref_access_analysis::analyze): Use
            find_always_executed_bbs.
            * ipa-sra.cc (process_scan_results): Likewise.
            * ipa-utils.cc (stmt_may_terminate_function_p): New function.
            (find_always_executed_bbs): New function.
            * ipa-utils.h (stmt_may_terminate_function_p): Declare.
            (find_always_executed_bbs): Declare.
    
    gcc/testsuite/ChangeLog:
    
    2023-01-16  Jan Hubicka  <hubicka@ucw.cz>
    
            * g++.dg/tree-ssa/pr106077.C: New test.

Diff:
---
 gcc/ipa-modref.cc                        |   5 +-
 gcc/ipa-sra.cc                           |  52 +++++---
 gcc/ipa-utils.cc                         | 222 +++++++++++++++++++++++++++++++
 gcc/ipa-utils.h                          |   2 +
 gcc/testsuite/g++.dg/tree-ssa/pr106077.C |  22 +++
 5 files changed, 285 insertions(+), 18 deletions(-)

diff --git a/gcc/ipa-modref.cc b/gcc/ipa-modref.cc
index 16e8bfb97a6..e3196df8aa9 100644
--- a/gcc/ipa-modref.cc
+++ b/gcc/ipa-modref.cc
@@ -1875,11 +1875,11 @@ modref_access_analysis::analyze ()
      statement cannot be analyzed (for any reason), the entire function cannot
      be analyzed by modref.  */
   basic_block bb;
+  bitmap always_executed_bbs = find_always_executed_bbs (cfun, true);
   FOR_EACH_BB_FN (bb, cfun)
     {
       gimple_stmt_iterator si;
-      bool always_executed
-	      = bb == single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun))->dest;
+      bool always_executed = bitmap_bit_p (always_executed_bbs, bb->index);
 
       for (si = gsi_start_nondebug_after_labels_bb (bb);
 	   !gsi_end_p (si); gsi_next_nondebug (&si))
@@ -1926,6 +1926,7 @@ modref_access_analysis::analyze ()
 	  && !finite_function_p ())
 	m_summary_lto->side_effects = true;
     }
+  BITMAP_FREE (always_executed_bbs);
 }
 
 /* Return true if OP accesses memory pointed to by SSA_NAME.  */
diff --git a/gcc/ipa-sra.cc b/gcc/ipa-sra.cc
index 4d7c25c8784..81b75910db1 100644
--- a/gcc/ipa-sra.cc
+++ b/gcc/ipa-sra.cc
@@ -2529,7 +2529,8 @@ process_scan_results (cgraph_node *node, struct function *fun,
      TODO: Measure the overhead and the effect of just being pessimistic.
      Maybe this is only -O3 material?  */
 
-  bool pdoms_calculated = false;
+  hash_map<gimple *, bool> analyzed_stmts;
+  bitmap always_executed_bbs = NULL;
   if (check_pass_throughs)
     for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
       {
@@ -2566,27 +2567,46 @@ process_scan_results (cgraph_node *node, struct function *fun,
 		continue;
 	      }
 
-	    /* Post-dominator check placed last, hoping that it usually won't
-	       be needed.  */
-	    if (!pdoms_calculated)
+	    /* Walk basic block and see if its execution can terminate earlier.
+	       Keep the info for later re-use to avoid quadratic behavoiur here.  */
+	    gimple_stmt_iterator gsi = gsi_for_stmt (call_stmt);
+	    bool safe = true;
+	    int n = 0;
+	    for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
 	      {
-		gcc_checking_assert (cfun);
-		connect_infinite_loops_to_exit ();
-		calculate_dominance_info (CDI_POST_DOMINATORS);
-		pdoms_calculated = true;
+		bool *b = analyzed_stmts.get (gsi_stmt (gsi));
+		if (b)
+		  {
+		    safe = *b;
+		    gsi_next (&gsi);
+		    break;
+		  }
+		n++;
+		if (stmt_may_terminate_function_p (fun, gsi_stmt (gsi), false))
+		  {
+		    safe = false;
+		    break;
+		  }
 	      }
-	    if (dominated_by_p (CDI_POST_DOMINATORS,
-				gimple_bb (call_stmt),
-				single_succ (ENTRY_BLOCK_PTR_FOR_FN (fun))))
+	    if (n)
+	      {
+		if (gsi_end_p (gsi))
+		  gsi = gsi_start_bb (gimple_bb (call_stmt));
+		for (; gsi_stmt (gsi) != call_stmt; gsi_next (&gsi))
+		  analyzed_stmts.get_or_insert (gsi_stmt (gsi)) = safe;
+	      }
+
+	    if (safe && !always_executed_bbs)
+	      {
+		mark_dfs_back_edges ();
+		always_executed_bbs = find_always_executed_bbs (fun, false);
+	      }
+	    if (safe && bitmap_bit_p (always_executed_bbs, gimple_bb (call_stmt)->index))
 	      csum->m_arg_flow[argidx].safe_to_import_accesses = true;
 	  }
 
       }
-  if (pdoms_calculated)
-    {
-      free_dominance_info (CDI_POST_DOMINATORS);
-      remove_fake_exit_edges ();
-    }
+  BITMAP_FREE (always_executed_bbs);
 
   /* TODO: Add early exit if we disqualified everything.  This also requires
      that we either relax the restriction that
diff --git a/gcc/ipa-utils.cc b/gcc/ipa-utils.cc
index 163899af6af..3d5633340f1 100644
--- a/gcc/ipa-utils.cc
+++ b/gcc/ipa-utils.cc
@@ -35,6 +35,11 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-vrp.h"
 #include "ipa-prop.h"
 #include "ipa-fnsummary.h"
+#include "tree-eh.h"
+#include "gimple-iterator.h"
+#include "ipa-modref-tree.h"
+#include "ipa-modref.h"
+#include "tree-ssa-loop-niter.h"
 
 /* Debugging function for postorder and inorder code. NOTE is a string
    that is printed before the nodes are printed.  ORDER is an array of
@@ -781,3 +786,220 @@ recursive_call_p (tree func, tree dest)
       return false;
   return true;
 }
+
+/* Return true if stmt may terminate execution of function.
+   If assume_return_or_eh we can further assume that the function ends
+   either by retrn statement or EH (no trapping or infinite loops).  */
+
+bool
+stmt_may_terminate_function_p (function *fun, gimple *stmt, bool assume_return_or_eh)
+{
+  if (stmt_can_throw_external (fun, stmt))
+    return true;
+  gasm *astmt = dyn_cast <gasm *> (stmt);
+  if (astmt && gimple_asm_volatile_p (astmt))
+    return true;
+  if (assume_return_or_eh)
+    return false;
+  if (gimple_could_trap_p (stmt))
+    return true;
+  if (gcall *call = dyn_cast <gcall *> (stmt))
+    {
+      int flags = gimple_call_flags (call);
+      if (flags & (ECF_PURE | ECF_CONST) && ! (flags & ECF_LOOPING_CONST_OR_PURE))
+	return false;
+      modref_summary *s = get_modref_function_summary (call, NULL);
+      if (s && !s->side_effects)
+	return false;
+      return true;
+    }
+  return false;
+}
+
+/* Return bitmap of all basic blocks whose first statements are known to
+   execute on every invocation of the function.
+
+   If assume_return_or_eh we can further assume that the function ends
+   either by retrn statement or EH (no trapping or infinite loops).
+   This is useful when sumarizing function in passes like ipa-modref.
+ 
+   Seeing assume_return_or_eh to false is used to prove that given
+   statmeent will be executed even if the function gets into infinite
+   loop or trap.  */
+bitmap
+find_always_executed_bbs (function *fun, bool assume_return_or_eh)
+{
+  auto_vec<basic_block, 20> stack;
+  auto_vec<basic_block, 20> terminating_bbs;
+  hash_set<basic_block> visited;
+  edge e;
+  edge_iterator ei;
+
+  /* First walk all BBs reachable from entry stopping on statements that may
+     terminate execution.  Everything past this statement is not going to be executed
+     each invocation.  */
+  stack.safe_push (ENTRY_BLOCK_PTR_FOR_FN (fun));
+  while (!stack.is_empty ())
+    {
+      basic_block bb = stack.pop ();
+      bool found = false, found_exit = false;
+      if (!assume_return_or_eh
+	  && (EDGE_COUNT (bb->succs) == 0 || (bb->flags & BB_IRREDUCIBLE_LOOP)))
+	found = true;
+      FOR_EACH_EDGE (e, ei, bb->succs)
+	{
+	  if (e->dest == EXIT_BLOCK_PTR_FOR_FN (fun))
+	    {
+	      found_exit = true;
+	      break;
+	    }
+	  /* Watch for infinite loops.  */
+	  if (!found && (assume_return_or_eh & EDGE_DFS_BACK)
+	      && !finite_loop_p (e->src->loop_father))
+	    found = true;
+	}
+      for (gimple_stmt_iterator si = gsi_start_nondebug_after_labels_bb (bb);
+	   !gsi_end_p (si) && !found; gsi_next_nondebug (&si))
+	if (stmt_may_terminate_function_p (fun, gsi_stmt (si), assume_return_or_eh))
+	  {
+	    found = true;
+	    break;
+	  }
+      if (found)
+	{
+	  visited.add (EXIT_BLOCK_PTR_FOR_FN (fun));
+	  if (!found_exit)
+	    terminating_bbs.safe_push (bb);
+	}
+      else
+	FOR_EACH_EDGE (e, ei, bb->succs)
+	  if (!visited.add (e->dest))
+	    stack.safe_push (e->dest);
+    }
+
+  /* Next walk from exit block and find all articulations in the CFG.
+     Add all terminating basic blocks as "fake" predecessors of the
+     exit block.  */
+
+  bitmap ret = BITMAP_ALLOC (NULL);
+  /* A degenerated case when there is no path to exit.  */
+  if (!visited.contains (EXIT_BLOCK_PTR_FOR_FN (fun))
+      && terminating_bbs.is_empty ())
+    {
+      bitmap_set_bit (ret,
+		      single_succ_edge
+		        (ENTRY_BLOCK_PTR_FOR_FN (fun))->dest->index);
+      return ret;
+    }
+
+  struct astate
+  {
+    unsigned int dfs_preorder;
+    unsigned int dfs_postorder;
+
+    unsigned int low, high;
+  };
+
+  struct worklist
+  {
+    basic_block bb;
+    astate *cstate;
+  };
+
+  struct obstack state_obstack;
+  gcc_obstack_init (&state_obstack);
+  hash_map<basic_block, astate *> state;
+  auto_vec<worklist, 32> worklist_vec;
+  unsigned int next_dfs_num = 1;
+
+  /* Always executed blocks are blocks that are on every path from entry to exit.
+     We proceed in two steps.  First we do backward DFS walk (so we know that entry
+     is always reached) and record preorder and postorder visiting times.
+
+     In second step we proceed in postorder and for every block A we compute
+     minimal preorder (A.low) and maximal postorder (A.high) of block reachable
+     from the BBs in DFS subtree of A.  If A is always executed there are no
+     edges out of this subtree.  This can be tested by checking that A.low == A.preorder
+     and B.high == A.postorder.
+    
+     This is first step. Do backward DFS walk and record preorder, postorder
+     and predecessor info.  Initialize stack in postorder.  */
+  worklist we = {EXIT_BLOCK_PTR_FOR_FN (fun), NULL};
+  worklist_vec.safe_push (we);
+  while (!worklist_vec.is_empty ())
+    {
+      worklist &w = worklist_vec.last ();
+      basic_block bb = w.bb;
+      astate *cstate = w.cstate;
+
+      if (!cstate)
+	{
+	  astate **slot = &state.get_or_insert (bb);
+
+	  cstate = *slot;
+	  /* Already processed by DFS?  */
+	  if (cstate)
+	    {
+	      worklist_vec.pop ();
+	      continue;
+	    }
+	  /* DFS is visiting BB for first time.  */
+	  *slot = cstate = XOBNEW (&state_obstack, struct astate);
+	  cstate->low = cstate->dfs_preorder = next_dfs_num++;
+	  w.cstate = cstate;
+	  /* Exit block is special; process all fake edges we identified.  */
+	  if (bb == EXIT_BLOCK_PTR_FOR_FN (fun))
+	    for (basic_block bb2 : terminating_bbs)
+	      {
+		worklist we = {bb2, NULL};
+		worklist_vec.safe_push (we);
+	      }
+	  FOR_EACH_EDGE (e, ei, bb->preds)
+	    if (visited.contains (e->src))
+	      {
+		worklist we = {e->src, NULL};
+		worklist_vec.safe_push (we);
+	      }
+	  /* Keep BB on worklist so we process it last time.  */
+	  continue;
+	}
+      /* We are finished with processing reachable BBs, see if we have articulation.  */
+      worklist_vec.pop ();
+      cstate->high = cstate->dfs_postorder = next_dfs_num++;
+      stack.safe_push (bb);
+    }
+  /* This is the final postorder walk.  Determine low and high values and mark
+     always executed blocks.  */
+  for (basic_block bb : stack)
+    {
+      astate *cstate = *state.get (bb);
+      FOR_EACH_EDGE (e, ei, bb->preds)
+	{
+	  astate **cstate2 = state.get (e->src);
+	  /* We skip walking part of CFG reached only after first edge to exit.
+	     No BB reachable from the skipped part is always executed */
+	  if (!cstate2)
+	    {
+	      if (e->src != ENTRY_BLOCK_PTR_FOR_FN (fun))
+	        cstate->low = 0;
+	      continue;
+	    }
+	  cstate->low = MIN (cstate->low, (*cstate2)->low);
+	  cstate->high = MAX (cstate->high, (*cstate2)->high);
+	}
+      if (cstate->low == cstate->dfs_preorder && cstate->high == cstate->dfs_postorder
+	  && bb != EXIT_BLOCK_PTR_FOR_FN (fun))
+	bitmap_set_bit (ret, bb->index);
+      FOR_EACH_EDGE (e, ei, bb->succs)
+	{
+	  astate **cstate2 = state.get (e->dest);
+	  if (!cstate2)
+	    continue;
+	  cstate->low = MIN (cstate->low, (*cstate2)->low);
+	  cstate->high = MAX (cstate->high, (*cstate2)->high);
+	}
+    }
+  obstack_free (&state_obstack, NULL);
+
+  return ret;
+}
diff --git a/gcc/ipa-utils.h b/gcc/ipa-utils.h
index e7322e1bf4c..0eefcf40d44 100644
--- a/gcc/ipa-utils.h
+++ b/gcc/ipa-utils.h
@@ -46,6 +46,8 @@ tree get_base_var (tree);
 void ipa_merge_profiles (struct cgraph_node *dst,
 			 struct cgraph_node *src, bool preserve_body = false);
 bool recursive_call_p (tree, tree);
+bool stmt_may_terminate_function_p (function *fun, gimple *stmt, bool assume_return_or_eh);
+bitmap find_always_executed_bbs (function *fun, bool assume_return_or_eh);
 
 /* In ipa-pure-const.cc  */
 bool finite_function_p ();
diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr106077.C b/gcc/testsuite/g++.dg/tree-ssa/pr106077.C
new file mode 100644
index 00000000000..2c2f7293fa0
--- /dev/null
+++ b/gcc/testsuite/g++.dg/tree-ssa/pr106077.C
@@ -0,0 +1,22 @@
+// { dg-do compile }
+// { dg-options "-O2 -fno-ipa-cp -fdump-tree-optimized" }
+short e,f;
+static __attribute__ ((noinline))
+int a(int *b)
+{
+        return *b;
+}
+static __attribute__ ((noinline))
+__attribute__ ((optimize("non-call-exceptions")))
+int wrap(int *b,int e, int f)
+{
+        e/=f;
+        return a(b)+e;
+}
+
+int
+t()
+{
+        return wrap(0,1,0);
+}
+// { dg-final { scan-tree-dump-not "builtin_trap" "optimized" } }

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-01-16 17:16 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-01-16 17:16 [gcc r13-5215] Fix wrong code issues with ipa-sra Jan Hubicka

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).