public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] Improve sinking with unrelated defs
@ 2023-07-31 12:39 Richard Biener
  0 siblings, 0 replies; 3+ messages in thread
From: Richard Biener @ 2023-07-31 12:39 UTC (permalink / raw)
  To: gcc-patches

statement_sink_location for loads is currently confused about
stores that are not on the paths we are sinking across.  The
following avoids this by explicitely checking whether a block
with a store is on any of those paths.  To not perform too many
walks over the sub-part of the CFG between the orignal stmt
location and the found sinking candidate we first collect all
blocks to check and then perform a single walk from the sinking
candidate location to the original stmt location.  We avoid enlarging
the region by conservatively handling backedges.

The original heuristics what store locations to ignore have been
refactored, some can possibly be removed now.

If anybody knows a cheaper way to check whether a BB is on a path
from block A to block B which is dominated by A I'd be happy to
know (or if there would be a clever caching method at least - I'm
probably going to limit the number of blocks to walk to aovid
quadraticness).

Boostrapped and tested on x86_64-unknown-linux-gnu.  This depends
on the previous sent RFC to limit testsuite fallout.

	* tree-ssa-sink.cc (pass_sink_code::execute): Mark backedges.
	(statement_sink_location): Do not consider
	stores that are not on any path from the original to the
	destination location.

	* gcc.dg/tree-ssa/ssa-sink-20.c: New testcase.
---
 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c |  16 +++
 gcc/tree-ssa-sink.cc                        | 125 ++++++++++++++++----
 2 files changed, 121 insertions(+), 20 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
new file mode 100644
index 00000000000..266ceb000a5
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-sink1-details" } */
+
+void bar ();
+int foo (int *p, int x)
+{
+  int res = *p;
+  if (x)
+    {
+      bar ();
+      res = 1;
+    }
+  return res;
+}
+
+/* { dg-final { scan-tree-dump "Sinking # VUSE" "sink1" } } */
diff --git a/gcc/tree-ssa-sink.cc b/gcc/tree-ssa-sink.cc
index cf0a32a954b..e996f46c864 100644
--- a/gcc/tree-ssa-sink.cc
+++ b/gcc/tree-ssa-sink.cc
@@ -388,13 +388,32 @@ statement_sink_location (gimple *stmt, basic_block frombb,
 
 	  imm_use_iterator imm_iter;
 	  use_operand_p use_p;
+	  auto_bitmap bbs_to_check;
 	  FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_vuse (stmt))
 	    {
 	      gimple *use_stmt = USE_STMT (use_p);
 	      basic_block bb = gimple_bb (use_stmt);
+
+	      /* If there is no virtual definition here, continue.  */
+	      if (gimple_code (use_stmt) != GIMPLE_PHI
+		  && !gimple_vdef (use_stmt))
+		continue;
+
+	      /* When the virtual definition is possibly within the same
+		 irreducible region as the current sinking location all
+		 bets are off.  */
+	      if (((bb->flags & commondom->flags & BB_IRREDUCIBLE_LOOP)
+		   && bb->loop_father == commondom->loop_father)
+		  || ((commondom->flags & BB_IRREDUCIBLE_LOOP)
+		      && flow_loop_nested_p (commondom->loop_father,
+					     bb->loop_father))
+		  || ((bb->flags & BB_IRREDUCIBLE_LOOP)
+		      && flow_loop_nested_p (bb->loop_father,
+					     commondom->loop_father)))
+		;
 	      /* For PHI nodes the block we know sth about is the incoming block
 		 with the use.  */
-	      if (gimple_code (use_stmt) == GIMPLE_PHI)
+	      else if (gimple_code (use_stmt) == GIMPLE_PHI)
 		{
 		  /* If the PHI defines the virtual operand, ignore it.  */
 		  if (gimple_phi_result (use_stmt) == gimple_vuse (stmt))
@@ -402,32 +421,97 @@ statement_sink_location (gimple *stmt, basic_block frombb,
 		  /* In case the PHI node post-dominates the current insert
 		     location we can disregard it.  But make sure it is not
 		     dominating it as well as can happen in a CFG cycle.  */
-		  if (commondom != bb
-		      && !dominated_by_p (CDI_DOMINATORS, commondom, bb)
-		      && dominated_by_p (CDI_POST_DOMINATORS, commondom, bb)
-		      /* If the blocks are possibly within the same irreducible
-			 cycle the above check breaks down.  */
-		      && !((bb->flags & commondom->flags & BB_IRREDUCIBLE_LOOP)
-			   && bb->loop_father == commondom->loop_father)
-		      && !((commondom->flags & BB_IRREDUCIBLE_LOOP)
-			   && flow_loop_nested_p (commondom->loop_father,
-						  bb->loop_father))
-		      && !((bb->flags & BB_IRREDUCIBLE_LOOP)
-			   && flow_loop_nested_p (bb->loop_father,
-						  commondom->loop_father)))
+		  if (!dominated_by_p (CDI_DOMINATORS, commondom, bb)
+		      && dominated_by_p (CDI_POST_DOMINATORS, commondom, bb))
 		    continue;
-		  bb = EDGE_PRED (bb, PHI_ARG_INDEX_FROM_USE (use_p))->src;
 		}
-	      else if (!gimple_vdef (use_stmt))
+
+	      /* For PHIs the relevant block is the edge source.  */
+	      if (gimple_code (use_stmt) == GIMPLE_PHI)
+		bb = EDGE_PRED (bb, PHI_ARG_INDEX_FROM_USE (use_p))->src;
+	      if (bb == commondom)
 		continue;
+	      if (bb == frombb)
+		return false;
+
 	      /* If the use is not dominated by the path entry it is not on
 		 the path.  */
 	      if (!dominated_by_p (CDI_DOMINATORS, bb, frombb))
 		continue;
-	      /* There is no easy way to disregard defs not on the path from
-		 frombb to commondom so just consider them all.  */
-	      commondom = nearest_common_dominator (CDI_DOMINATORS,
-						    bb, commondom);
+
+	      /* If BB is definitely on the path, shrink it.  Else defer
+		 processing.  */
+	      if (dominated_by_p (CDI_DOMINATORS, commondom, bb))
+		commondom = bb;
+	      else
+		bitmap_set_bit (bbs_to_check, bb->index);
+	    }
+
+	  /* Now check whether any of the BBs recorded in bbs_to_check
+	     is on a path from frombb to commondom.  If so, adjust
+	     commondom accordingly.  */
+	  if (!bitmap_empty_p (bbs_to_check))
+	    {
+	      auto_bb_flag visited (cfun);
+	      auto_vec<basic_block, 3> worklist;
+	      auto_vec<basic_block, 3> toclean;
+	      worklist.quick_push (commondom);
+	      do
+		{
+		  basic_block pb = worklist.pop ();
+		  edge_iterator ei;
+		  edge e;
+		  FOR_EACH_EDGE (e, ei, pb->preds)
+		    /* When we run into backedges instead of checking paths
+		       covering those (esp. considering irreducible regions)
+		       simply adjust commondom to the non-backedge common
+		       dominators.  */
+		    if (e->flags & EDGE_DFS_BACK)
+		      {
+			FOR_EACH_EDGE (e, ei, pb->preds)
+			  if (!(e->flags & EDGE_DFS_BACK))
+			    commondom
+			      = nearest_common_dominator (CDI_DOMINATORS,
+							  e->src, commondom);
+			if (!(commondom->flags & visited))
+			  {
+			    commondom->flags |= visited;
+			    toclean.safe_push (commondom);
+			    worklist.safe_push (commondom);
+			  }
+			break;
+		      }
+		    else if (e->src == frombb)
+		      ;
+		    else if (!(e->src->flags & visited))
+		      {
+			if (bitmap_clear_bit (bbs_to_check, e->src->index))
+			  {
+			    commondom = nearest_common_dominator (CDI_DOMINATORS,
+								  e->src,
+								  commondom);
+			    if (commondom == frombb)
+			      break;
+			    if (!(commondom->flags & visited))
+			      {
+				commondom->flags |= visited;
+				toclean.safe_push (commondom);
+				worklist.safe_push (commondom);
+			      }
+			  }
+			else
+			  {
+			    e->src->flags |= visited;
+			    toclean.safe_push (e->src);
+			    worklist.safe_push (e->src);
+			  }
+		      }
+		}
+	      while (commondom != frombb
+		     && !worklist.is_empty ()
+		     && !bitmap_empty_p (bbs_to_check));
+	      for (basic_block cb : toclean)
+		cb->flags &= ~visited;
 	      if (commondom == frombb)
 		return false;
 	    }
@@ -848,6 +932,7 @@ pass_sink_code::execute (function *fun)
   /* Arrange for the critical edge splitting to be undone if requested.  */
   unsigned todo = unsplit_edges ? TODO_cleanup_cfg : 0;
   connect_infinite_loops_to_exit ();
+  mark_dfs_back_edges (fun);
   memset (&sink_stats, 0, sizeof (sink_stats));
   calculate_dominance_info (CDI_DOMINATORS);
   calculate_dominance_info (CDI_POST_DOMINATORS);
-- 
2.35.3

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] Improve sinking with unrelated defs
  2023-08-02 12:50 Richard Biener
@ 2023-08-02 13:57 ` Jeff Law
  0 siblings, 0 replies; 3+ messages in thread
From: Jeff Law @ 2023-08-02 13:57 UTC (permalink / raw)
  To: Richard Biener, gcc-patches



On 8/2/23 06:50, Richard Biener via Gcc-patches wrote:
> On Mon, 31 Jul 2023, Richard Biener wrote:
> 
>> statement_sink_location for loads is currently confused about
>> stores that are not on the paths we are sinking across.  The
>> following avoids this by explicitely checking whether a block
>> with a store is on any of those paths.  To not perform too many
>> walks over the sub-part of the CFG between the orignal stmt
>> location and the found sinking candidate we first collect all
>> blocks to check and then perform a single walk from the sinking
>> candidate location to the original stmt location.  We avoid enlarging
>> the region by conservatively handling backedges.
>>
>> The original heuristics what store locations to ignore have been
>> refactored, some can possibly be removed now.
>>
>> If anybody knows a cheaper way to check whether a BB is on a path
>> from block A to block B which is dominated by A I'd be happy to
>> know (or if there would be a clever caching method at least - I'm
>> probably going to limit the number of blocks to walk to aovid
>> quadraticness).
> 
> I've replaced the whole thing with something based on virtual
> operand liveness.
Good.  I was trying to form something based on lowest common ancestor 
walks in the dominator tree, but couldn't convince myself that it would 
actually solve your problem.

jeff

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] Improve sinking with unrelated defs
@ 2023-08-02 12:50 Richard Biener
  2023-08-02 13:57 ` Jeff Law
  0 siblings, 1 reply; 3+ messages in thread
From: Richard Biener @ 2023-08-02 12:50 UTC (permalink / raw)
  To: gcc-patches

On Mon, 31 Jul 2023, Richard Biener wrote:

> statement_sink_location for loads is currently confused about
> stores that are not on the paths we are sinking across.  The
> following avoids this by explicitely checking whether a block
> with a store is on any of those paths.  To not perform too many
> walks over the sub-part of the CFG between the orignal stmt
> location and the found sinking candidate we first collect all
> blocks to check and then perform a single walk from the sinking
> candidate location to the original stmt location.  We avoid enlarging
> the region by conservatively handling backedges.
> 
> The original heuristics what store locations to ignore have been
> refactored, some can possibly be removed now.
> 
> If anybody knows a cheaper way to check whether a BB is on a path
> from block A to block B which is dominated by A I'd be happy to
> know (or if there would be a clever caching method at least - I'm
> probably going to limit the number of blocks to walk to aovid
> quadraticness).

I've replaced the whole thing with something based on virtual
operand liveness.

Richard.

> Boostrapped and tested on x86_64-unknown-linux-gnu.  This depends
> on the previous sent RFC to limit testsuite fallout.
> 
> 	* tree-ssa-sink.cc (pass_sink_code::execute): Mark backedges.
> 	(statement_sink_location): Do not consider
> 	stores that are not on any path from the original to the
> 	destination location.
> 
> 	* gcc.dg/tree-ssa/ssa-sink-20.c: New testcase.
> ---
>  gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c |  16 +++
>  gcc/tree-ssa-sink.cc                        | 125 ++++++++++++++++----
>  2 files changed, 121 insertions(+), 20 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
> 
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
> new file mode 100644
> index 00000000000..266ceb000a5
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
> @@ -0,0 +1,16 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-sink1-details" } */
> +
> +void bar ();
> +int foo (int *p, int x)
> +{
> +  int res = *p;
> +  if (x)
> +    {
> +      bar ();
> +      res = 1;
> +    }
> +  return res;
> +}
> +
> +/* { dg-final { scan-tree-dump "Sinking # VUSE" "sink1" } } */
> diff --git a/gcc/tree-ssa-sink.cc b/gcc/tree-ssa-sink.cc
> index cf0a32a954b..e996f46c864 100644
> --- a/gcc/tree-ssa-sink.cc
> +++ b/gcc/tree-ssa-sink.cc
> @@ -388,13 +388,32 @@ statement_sink_location (gimple *stmt, basic_block frombb,
>  
>  	  imm_use_iterator imm_iter;
>  	  use_operand_p use_p;
> +	  auto_bitmap bbs_to_check;
>  	  FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_vuse (stmt))
>  	    {
>  	      gimple *use_stmt = USE_STMT (use_p);
>  	      basic_block bb = gimple_bb (use_stmt);
> +
> +	      /* If there is no virtual definition here, continue.  */
> +	      if (gimple_code (use_stmt) != GIMPLE_PHI
> +		  && !gimple_vdef (use_stmt))
> +		continue;
> +
> +	      /* When the virtual definition is possibly within the same
> +		 irreducible region as the current sinking location all
> +		 bets are off.  */
> +	      if (((bb->flags & commondom->flags & BB_IRREDUCIBLE_LOOP)
> +		   && bb->loop_father == commondom->loop_father)
> +		  || ((commondom->flags & BB_IRREDUCIBLE_LOOP)
> +		      && flow_loop_nested_p (commondom->loop_father,
> +					     bb->loop_father))
> +		  || ((bb->flags & BB_IRREDUCIBLE_LOOP)
> +		      && flow_loop_nested_p (bb->loop_father,
> +					     commondom->loop_father)))
> +		;
>  	      /* For PHI nodes the block we know sth about is the incoming block
>  		 with the use.  */
> -	      if (gimple_code (use_stmt) == GIMPLE_PHI)
> +	      else if (gimple_code (use_stmt) == GIMPLE_PHI)
>  		{
>  		  /* If the PHI defines the virtual operand, ignore it.  */
>  		  if (gimple_phi_result (use_stmt) == gimple_vuse (stmt))
> @@ -402,32 +421,97 @@ statement_sink_location (gimple *stmt, basic_block frombb,
>  		  /* In case the PHI node post-dominates the current insert
>  		     location we can disregard it.  But make sure it is not
>  		     dominating it as well as can happen in a CFG cycle.  */
> -		  if (commondom != bb
> -		      && !dominated_by_p (CDI_DOMINATORS, commondom, bb)
> -		      && dominated_by_p (CDI_POST_DOMINATORS, commondom, bb)
> -		      /* If the blocks are possibly within the same irreducible
> -			 cycle the above check breaks down.  */
> -		      && !((bb->flags & commondom->flags & BB_IRREDUCIBLE_LOOP)
> -			   && bb->loop_father == commondom->loop_father)
> -		      && !((commondom->flags & BB_IRREDUCIBLE_LOOP)
> -			   && flow_loop_nested_p (commondom->loop_father,
> -						  bb->loop_father))
> -		      && !((bb->flags & BB_IRREDUCIBLE_LOOP)
> -			   && flow_loop_nested_p (bb->loop_father,
> -						  commondom->loop_father)))
> +		  if (!dominated_by_p (CDI_DOMINATORS, commondom, bb)
> +		      && dominated_by_p (CDI_POST_DOMINATORS, commondom, bb))
>  		    continue;
> -		  bb = EDGE_PRED (bb, PHI_ARG_INDEX_FROM_USE (use_p))->src;
>  		}
> -	      else if (!gimple_vdef (use_stmt))
> +
> +	      /* For PHIs the relevant block is the edge source.  */
> +	      if (gimple_code (use_stmt) == GIMPLE_PHI)
> +		bb = EDGE_PRED (bb, PHI_ARG_INDEX_FROM_USE (use_p))->src;
> +	      if (bb == commondom)
>  		continue;
> +	      if (bb == frombb)
> +		return false;
> +
>  	      /* If the use is not dominated by the path entry it is not on
>  		 the path.  */
>  	      if (!dominated_by_p (CDI_DOMINATORS, bb, frombb))
>  		continue;
> -	      /* There is no easy way to disregard defs not on the path from
> -		 frombb to commondom so just consider them all.  */
> -	      commondom = nearest_common_dominator (CDI_DOMINATORS,
> -						    bb, commondom);
> +
> +	      /* If BB is definitely on the path, shrink it.  Else defer
> +		 processing.  */
> +	      if (dominated_by_p (CDI_DOMINATORS, commondom, bb))
> +		commondom = bb;
> +	      else
> +		bitmap_set_bit (bbs_to_check, bb->index);
> +	    }
> +
> +	  /* Now check whether any of the BBs recorded in bbs_to_check
> +	     is on a path from frombb to commondom.  If so, adjust
> +	     commondom accordingly.  */
> +	  if (!bitmap_empty_p (bbs_to_check))
> +	    {
> +	      auto_bb_flag visited (cfun);
> +	      auto_vec<basic_block, 3> worklist;
> +	      auto_vec<basic_block, 3> toclean;
> +	      worklist.quick_push (commondom);
> +	      do
> +		{
> +		  basic_block pb = worklist.pop ();
> +		  edge_iterator ei;
> +		  edge e;
> +		  FOR_EACH_EDGE (e, ei, pb->preds)
> +		    /* When we run into backedges instead of checking paths
> +		       covering those (esp. considering irreducible regions)
> +		       simply adjust commondom to the non-backedge common
> +		       dominators.  */
> +		    if (e->flags & EDGE_DFS_BACK)
> +		      {
> +			FOR_EACH_EDGE (e, ei, pb->preds)
> +			  if (!(e->flags & EDGE_DFS_BACK))
> +			    commondom
> +			      = nearest_common_dominator (CDI_DOMINATORS,
> +							  e->src, commondom);
> +			if (!(commondom->flags & visited))
> +			  {
> +			    commondom->flags |= visited;
> +			    toclean.safe_push (commondom);
> +			    worklist.safe_push (commondom);
> +			  }
> +			break;
> +		      }
> +		    else if (e->src == frombb)
> +		      ;
> +		    else if (!(e->src->flags & visited))
> +		      {
> +			if (bitmap_clear_bit (bbs_to_check, e->src->index))
> +			  {
> +			    commondom = nearest_common_dominator (CDI_DOMINATORS,
> +								  e->src,
> +								  commondom);
> +			    if (commondom == frombb)
> +			      break;
> +			    if (!(commondom->flags & visited))
> +			      {
> +				commondom->flags |= visited;
> +				toclean.safe_push (commondom);
> +				worklist.safe_push (commondom);
> +			      }
> +			  }
> +			else
> +			  {
> +			    e->src->flags |= visited;
> +			    toclean.safe_push (e->src);
> +			    worklist.safe_push (e->src);
> +			  }
> +		      }
> +		}
> +	      while (commondom != frombb
> +		     && !worklist.is_empty ()
> +		     && !bitmap_empty_p (bbs_to_check));
> +	      for (basic_block cb : toclean)
> +		cb->flags &= ~visited;
>  	      if (commondom == frombb)
>  		return false;
>  	    }
> @@ -848,6 +932,7 @@ pass_sink_code::execute (function *fun)
>    /* Arrange for the critical edge splitting to be undone if requested.  */
>    unsigned todo = unsplit_edges ? TODO_cleanup_cfg : 0;
>    connect_infinite_loops_to_exit ();
> +  mark_dfs_back_edges (fun);
>    memset (&sink_stats, 0, sizeof (sink_stats));
>    calculate_dominance_info (CDI_DOMINATORS);
>    calculate_dominance_info (CDI_POST_DOMINATORS);
> 

-- 
Richard Biener <rguenther@suse.de>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2023-08-02 13:57 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-07-31 12:39 [PATCH] Improve sinking with unrelated defs Richard Biener
2023-08-02 12:50 Richard Biener
2023-08-02 13:57 ` Jeff Law

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).