public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] Fix more PR80928 fallout
@ 2017-06-23 11:40 Richard Biener
  2017-06-23 13:56 ` Rainer Orth
  2017-06-23 15:18 ` Jeff Law
  0 siblings, 2 replies; 4+ messages in thread
From: Richard Biener @ 2017-06-23 11:40 UTC (permalink / raw)
  To: gcc-patches


SLP induction vectorization runs into the issue that it remembers
pointers to PHI nodes in the SLP tree during analysis.  But those
may get invalidated by loop copying (for prologue/epilogue peeling
or versioning) as the low-level CFG helper copy_bbs works in the
way of copying individual BBs plus their outgoing edges but with
old destinations and at the end re-directing the edges to the
desired location.  In SSA this triggers the whole machinery of
making room for new PHI nodes -- that is undesirable because it
causes re-allocation of PHI nodes in the set of source blocks.

After much pondering I arrived at the following (least ugly) solution
to this "problem" (well, I define it as a problem, it's at least
an inefficiency and a workaround in the vectorizer would be way
uglier).  Namely simply do not trigger the SSA machinery for
blocks with BB_DUPLICATED (I skimmed all other users and they seem
fine).

In the process I also implemented some poisoning of the old PHI node
when we reallocate (well, free) PHI nodes.  But that triggers some
other issues, one fixed by the tree-ssa-phionlycoprop.c hunk below.
So I'm not submitting it as part of this fix.

Bootstrapped (with the poisoning sofar, plain patch still running)
on x86_64-unknown-linux-gnu, testing in progress.

Comments welcome, testing won't finish before I leave for the
weekend.

Thanks,
Richard.

2017-06-23  Richard Biener  <rguenther@suse.de>

	* cfghooks.c (duplicate_block): Do not copy BB_DUPLICATED flag.
	(copy_bbs): Set BB_DUPLICATED flag early.
	(execute_on_growing_pred): Do not execute for BB_DUPLICATED
	marked blocks.
	(execute_on_shrinking_pred): Likewise.
	* tree-ssa.c (ssa_redirect_edge): Do not look for PHI args in
	BB_DUPLICATED blocks.
	* tree-ssa-phionlycoprop.c (eliminate_degenerate_phis_1): Properly
	iterate over all PHIs considering removal of *gsi.

Index: gcc/cfghooks.c
===================================================================
--- gcc/cfghooks.c	(revision 249552)
+++ gcc/cfghooks.c	(working copy)
@@ -1087,7 +1087,7 @@ duplicate_block (basic_block bb, edge e,
   if (after)
     move_block_after (new_bb, after);
 
-  new_bb->flags = bb->flags;
+  new_bb->flags = (bb->flags & ~BB_DUPLICATED);
   FOR_EACH_EDGE (s, ei, bb->succs)
     {
       /* Since we are creating edges from a new block to successors
@@ -1207,7 +1207,8 @@ flow_call_edges_add (sbitmap blocks)
 void
 execute_on_growing_pred (edge e)
 {
-  if (cfg_hooks->execute_on_growing_pred)
+  if (! (e->dest->flags & BB_DUPLICATED)
+      && cfg_hooks->execute_on_growing_pred)
     cfg_hooks->execute_on_growing_pred (e);
 }
 
@@ -1217,7 +1218,8 @@ execute_on_growing_pred (edge e)
 void
 execute_on_shrinking_pred (edge e)
 {
-  if (cfg_hooks->execute_on_shrinking_pred)
+  if (! (e->dest->flags & BB_DUPLICATED)
+      && cfg_hooks->execute_on_shrinking_pred)
     cfg_hooks->execute_on_shrinking_pred (e);
 }
 
@@ -1353,6 +1355,12 @@ copy_bbs (basic_block *bbs, unsigned n,
   basic_block bb, new_bb, dom_bb;
   edge e;
 
+  /* Mark the blocks to be copied.  This is used by edge creation hooks
+     to decide whether to reallocate PHI nodes capacity to avoid reallocating
+     PHIs in the set of source BBs.  */
+  for (i = 0; i < n; i++)
+    bbs[i]->flags |= BB_DUPLICATED;
+
   /* Duplicate bbs, update dominators, assign bbs to loops.  */
   for (i = 0; i < n; i++)
     {
@@ -1360,7 +1368,6 @@ copy_bbs (basic_block *bbs, unsigned n,
       bb = bbs[i];
       new_bb = new_bbs[i] = duplicate_block (bb, NULL, after);
       after = new_bb;
-      bb->flags |= BB_DUPLICATED;
       if (bb->loop_father)
 	{
 	  /* Possibly set loop header.  */
Index: gcc/tree-ssa-phionlycprop.c
===================================================================
--- gcc/tree-ssa-phionlycprop.c	(revision 249552)
+++ gcc/tree-ssa-phionlycprop.c	(working copy)
@@ -420,10 +420,11 @@ eliminate_degenerate_phis_1 (basic_block
   basic_block son;
   bool cfg_altered = false;
 
-  for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+  for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi);)
     {
       gphi *phi = gsi.phi ();
-
+      /* We might end up removing PHI so advance the iterator now.  */
+      gsi_next (&gsi);
       cfg_altered |= eliminate_const_or_copy (phi, interesting_names,
 					      need_eh_cleanup);
     }

Index: gcc/tree-ssa.c
===================================================================
--- gcc/tree-ssa.c	(revision 249552)
+++ gcc/tree-ssa.c	(working copy)
@@ -142,21 +142,24 @@ ssa_redirect_edge (edge e, basic_block d
 
   redirect_edge_var_map_clear (e);
 
-  /* Remove the appropriate PHI arguments in E's destination block.  */
-  for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
-    {
-      tree def;
-      source_location locus ;
-
-      phi = gsi.phi ();
-      def = gimple_phi_arg_def (phi, e->dest_idx);
-      locus = gimple_phi_arg_location (phi, e->dest_idx);
+  /* Remove the appropriate PHI arguments in E's destination block.
+     If we are redirecting a copied edge the destination has not
+     got PHI argument space reserved nor an interesting argument.  */
+  if (! (e->dest->flags & BB_DUPLICATED))
+    for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
+      {
+	tree def;
+	source_location locus ;
+
+	phi = gsi.phi ();
+	def = gimple_phi_arg_def (phi, e->dest_idx);
+	locus = gimple_phi_arg_location (phi, e->dest_idx);
 
-      if (def == NULL_TREE)
-	continue;
+	if (def == NULL_TREE)
+	  continue;
 
-      redirect_edge_var_map_add (e, gimple_phi_result (phi), def, locus);
-    }
+	redirect_edge_var_map_add (e, gimple_phi_result (phi), def, locus);
+      }
 
   e = redirect_edge_succ_nodup (e, dest);
 

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] Fix more PR80928 fallout
  2017-06-23 11:40 [PATCH] Fix more PR80928 fallout Richard Biener
@ 2017-06-23 13:56 ` Rainer Orth
  2017-06-23 15:18 ` Jeff Law
  1 sibling, 0 replies; 4+ messages in thread
From: Rainer Orth @ 2017-06-23 13:56 UTC (permalink / raw)
  To: Richard Biener; +Cc: gcc-patches

Hi Richard,

> Bootstrapped (with the poisoning sofar, plain patch still running)
> on x86_64-unknown-linux-gnu, testing in progress.
>
> Comments welcome, testing won't finish before I leave for the
> weekend.

a i686-pc-linux-gnu bootstrap just completed with the 64-bit libgomp
failues gone and no regressions.

Thanks.
        Rainer

-- 
-----------------------------------------------------------------------------
Rainer Orth, Center for Biotechnology, Bielefeld University

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] Fix more PR80928 fallout
  2017-06-23 11:40 [PATCH] Fix more PR80928 fallout Richard Biener
  2017-06-23 13:56 ` Rainer Orth
@ 2017-06-23 15:18 ` Jeff Law
  2017-06-26  7:25   ` Richard Biener
  1 sibling, 1 reply; 4+ messages in thread
From: Jeff Law @ 2017-06-23 15:18 UTC (permalink / raw)
  To: Richard Biener, gcc-patches

On 06/23/2017 05:39 AM, Richard Biener wrote:
> 
> SLP induction vectorization runs into the issue that it remembers
> pointers to PHI nodes in the SLP tree during analysis.  But those
> may get invalidated by loop copying (for prologue/epilogue peeling
> or versioning) as the low-level CFG helper copy_bbs works in the
> way of copying individual BBs plus their outgoing edges but with
> old destinations and at the end re-directing the edges to the
> desired location.  In SSA this triggers the whole machinery of
> making room for new PHI nodes -- that is undesirable because it
> causes re-allocation of PHI nodes in the set of source blocks.
> 
> After much pondering I arrived at the following (least ugly) solution
> to this "problem" (well, I define it as a problem, it's at least
> an inefficiency and a workaround in the vectorizer would be way
> uglier).  Namely simply do not trigger the SSA machinery for
> blocks with BB_DUPLICATED (I skimmed all other users and they seem
> fine).
> 
> In the process I also implemented some poisoning of the old PHI node
> when we reallocate (well, free) PHI nodes.  But that triggers some
> other issues, one fixed by the tree-ssa-phionlycoprop.c hunk below.
> So I'm not submitting it as part of this fix.
> 
> Bootstrapped (with the poisoning sofar, plain patch still running)
> on x86_64-unknown-linux-gnu, testing in progress.
> 
> Comments welcome, testing won't finish before I leave for the
> weekend.
I fully support poisoning the old PHI nodes -- I tracked down a similar
problem just a few months back that probably would have been obvious if
we had poisoned the old nodes (79621 which is now a missed optimization
bug).

I wouldn't be surprised if there's others lurking and given the general
trend of using block duplication to enable various optimizations,
catching this stuff early would definitely be good.

Jeff

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] Fix more PR80928 fallout
  2017-06-23 15:18 ` Jeff Law
@ 2017-06-26  7:25   ` Richard Biener
  0 siblings, 0 replies; 4+ messages in thread
From: Richard Biener @ 2017-06-26  7:25 UTC (permalink / raw)
  To: Jeff Law; +Cc: gcc-patches

On Fri, 23 Jun 2017, Jeff Law wrote:

> On 06/23/2017 05:39 AM, Richard Biener wrote:
> > 
> > SLP induction vectorization runs into the issue that it remembers
> > pointers to PHI nodes in the SLP tree during analysis.  But those
> > may get invalidated by loop copying (for prologue/epilogue peeling
> > or versioning) as the low-level CFG helper copy_bbs works in the
> > way of copying individual BBs plus their outgoing edges but with
> > old destinations and at the end re-directing the edges to the
> > desired location.  In SSA this triggers the whole machinery of
> > making room for new PHI nodes -- that is undesirable because it
> > causes re-allocation of PHI nodes in the set of source blocks.
> > 
> > After much pondering I arrived at the following (least ugly) solution
> > to this "problem" (well, I define it as a problem, it's at least
> > an inefficiency and a workaround in the vectorizer would be way
> > uglier).  Namely simply do not trigger the SSA machinery for
> > blocks with BB_DUPLICATED (I skimmed all other users and they seem
> > fine).
> > 
> > In the process I also implemented some poisoning of the old PHI node
> > when we reallocate (well, free) PHI nodes.  But that triggers some
> > other issues, one fixed by the tree-ssa-phionlycoprop.c hunk below.
> > So I'm not submitting it as part of this fix.
> > 
> > Bootstrapped (with the poisoning sofar, plain patch still running)
> > on x86_64-unknown-linux-gnu, testing in progress.
> > 
> > Comments welcome, testing won't finish before I leave for the
> > weekend.
> I fully support poisoning the old PHI nodes -- I tracked down a similar
> problem just a few months back that probably would have been obvious if
> we had poisoned the old nodes (79621 which is now a missed optimization
> bug).
> 
> I wouldn't be surprised if there's others lurking and given the general
> trend of using block duplication to enable various optimizations,
> catching this stuff early would definitely be good.

I've applied this fix.  For reference, below is what passed bootstrap
and regtest minus a few PRE related testsuite fallouts (stupid PRE
simple-minded DCE is giving me a hard time here ... the interaction
between remove_dead_inserted_code and el_to_remove is quite ugly).

When looking at this I also wondered why/if the cache of allocated
PHI nodes is worth the extra trouble of duplicated API like
remove_phi_node vs. gsi_remove.  Not something I have time right now
to clean up, so I'll sit on the patch below for some more time as well.

Richard.

Index: gcc/tree-phinodes.c
===================================================================
--- gcc/tree-phinodes.c	(revision 249638)
+++ gcc/tree-phinodes.c	(working copy)
@@ -67,7 +67,7 @@ along with GCC; see the file COPYING3.
    the -2 on all the calculations below.  */
 
 #define NUM_BUCKETS 10
-static GTY ((deletable (""))) vec<gimple *, va_gc> *free_phinodes[NUM_BUCKETS - 2];
+static GTY ((deletable (""))) vec<gphi *, va_gc> *free_phinodes[NUM_BUCKETS - 2];
 static unsigned long free_phinode_count;
 
 static int ideal_phi_node_len (int);
@@ -103,10 +103,10 @@ allocate_phi_node (size_t len)
 
   /* If our free list has an element, then use it.  */
   if (bucket < NUM_BUCKETS - 2
-      && gimple_phi_capacity ((*free_phinodes[bucket])[0]) >= len)
+      && (*free_phinodes[bucket]).last()->capacity >= len)
     {
       free_phinode_count--;
-      phi = as_a <gphi *> (free_phinodes[bucket]->pop ());
+      phi = free_phinodes[bucket]->pop ();
       if (free_phinodes[bucket]->is_empty ())
 	vec_free (free_phinodes[bucket]);
       if (GATHER_STATISTICS)
@@ -208,7 +208,7 @@ make_phi_node (tree var, int len)
 /* We no longer need PHI, release it so that it may be reused.  */
 
 static void
-release_phi_node (gimple *phi)
+release_phi_node (gphi *phi)
 {
   size_t bucket;
   size_t len = gimple_phi_capacity (phi);
@@ -220,6 +220,13 @@ release_phi_node (gimple *phi)
       imm = gimple_phi_arg_imm_use_ptr (phi, x);
       delink_imm_use (imm);
     }
+  if (flag_checking)
+    {
+      memset (phi, 0xfe, (sizeof (struct gphi)
+			  - sizeof (struct phi_arg_d)
+			  + sizeof (struct phi_arg_d) * len));
+      phi->capacity = len;
+    }
 
   bucket = len > NUM_BUCKETS - 1 ? NUM_BUCKETS - 1 : len;
   bucket -= 2;
@@ -438,7 +445,7 @@ remove_phi_args (edge e)
 void
 remove_phi_node (gimple_stmt_iterator *gsi, bool release_lhs_p)
 {
-  gimple *phi = gsi_stmt (*gsi);
+  gphi *phi = as_a <gphi *> (gsi_stmt (*gsi));
 
   if (release_lhs_p)
     insert_debug_temps_for_defs (gsi);
@@ -447,9 +454,9 @@ remove_phi_node (gimple_stmt_iterator *g
 
   /* If we are deleting the PHI node, then we should release the
      SSA_NAME node so that it can be reused.  */
-  release_phi_node (phi);
   if (release_lhs_p)
     release_ssa_name (gimple_phi_result (phi));
+  release_phi_node (phi);
 }
 
 /* Remove all the phi nodes from BB.  */
Index: gcc/tree-ssa-pre.c
===================================================================
--- gcc/tree-ssa-pre.c	(revision 249638)
+++ gcc/tree-ssa-pre.c	(working copy)
@@ -4240,7 +4240,9 @@ eliminate_dom_walker::before_dom_childre
 	  if (may_propagate_copy (res, sprime))
 	    {
 	      /* Mark the PHI for removal.  */
-	      el_to_remove.safe_push (phi);
+	      if (inserted_exprs
+		  && !bitmap_bit_p (inserted_exprs, SSA_NAME_VERSION (res)))
+		el_to_remove.safe_push (phi);
 	      gsi_next (&gsi);
 	      continue;
 	    }

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2017-06-26  7:25 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-06-23 11:40 [PATCH] Fix more PR80928 fallout Richard Biener
2017-06-23 13:56 ` Rainer Orth
2017-06-23 15:18 ` Jeff Law
2017-06-26  7:25   ` Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).