public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] Fix PR51528
@ 2012-01-27 13:52 Richard Guenther
  2012-01-30 13:25 ` Richard Guenther
  0 siblings, 1 reply; 3+ messages in thread
From: Richard Guenther @ 2012-01-27 13:52 UTC (permalink / raw)
  To: gcc-patches


This fixes PR51528 by hiding the issue that SRA generates
copy in/out with a type not suitable for preserving the data
(any non-mode-precision thing).  It fixes it by instead of
emitting

  x$i_8 = x.i;
  D.1720 = x;
  D.1720.i = x$i_8;

for an assign to an unscalarized D.1720 from a partly scalarized x
(so the aggregate assignment prevails) emit

  x.i = x$i_8;
  D.1720 = x;

instead.  This allows the unfortunate copy in/out to be optimized
away (and thus its undefined behavior).

Basically whenever the aggregate copy will prevail prefer to
restore the RHS and reload the LHS components - just like we
do for aggregate uses/destinations of call statements.

I think we can still clean up the aggregate copy path a lot,
but that's for 4.8 (unless new bugs pop up).

Bootstrap and regtest pending on x86_64-unknown-linux-gnu.

Richard.

2012-01-27  Richard Guenther  <rguenther@suse.de>

	PR tree-optimization/51528
	* tree-sra.c (sra_modify_assign): Handle the default fallback
	for aggregate assigns better.

	* gcc.dg/torture/pr51528.c: New testcase.

Index: gcc/tree-sra.c
===================================================================
*** gcc/tree-sra.c	(revision 183616)
--- gcc/tree-sra.c	(working copy)
*************** sra_modify_assign (gimple *stmt, gimple_
*** 3060,3104 ****
  	}
        else
  	{
! 	  if (racc)
  	    {
! 	      if (!racc->grp_to_be_replaced && !racc->grp_unscalarized_data)
  		{
! 		  if (dump_file)
! 		    {
! 		      fprintf (dump_file, "Removing load: ");
! 		      print_gimple_stmt (dump_file, *stmt, 0, 0);
! 		    }
! 
! 		  if (TREE_CODE (lhs) == SSA_NAME)
! 		    {
! 		      rhs = get_repl_default_def_ssa_name (racc);
! 		      if (!useless_type_conversion_p (TREE_TYPE (lhs),
! 						      TREE_TYPE (rhs)))
! 			rhs = fold_build1_loc (loc, VIEW_CONVERT_EXPR,
! 					       TREE_TYPE (lhs), rhs);
! 		    }
! 		  else
! 		    {
! 		      if (racc->first_child)
! 			generate_subtree_copies (racc->first_child, lhs,
! 						 racc->offset, 0, 0, gsi,
! 						 false, false, loc);
! 
! 		      gcc_assert (*stmt == gsi_stmt (*gsi));
! 		      unlink_stmt_vdef (*stmt);
! 		      gsi_remove (gsi, true);
! 		      sra_stats.deleted++;
! 		      return SRA_AM_REMOVED;
! 		    }
  		}
- 	      else if (racc->first_child)
- 		generate_subtree_copies (racc->first_child, lhs, racc->offset,
- 					 0, 0, gsi, false, true, loc);
  	    }
  	  if (access_has_children_p (lacc))
! 	    generate_subtree_copies (lacc->first_child, rhs, lacc->offset,
! 				     0, 0, gsi, true, true, loc);
  	}
      }
  
--- 3061,3124 ----
  	}
        else
  	{
! 	  if (racc
! 	      && !racc->grp_to_be_replaced && !racc->grp_unscalarized_data)
  	    {
! 	      if (dump_file)
! 		{
! 		  fprintf (dump_file, "Removing load: ");
! 		  print_gimple_stmt (dump_file, *stmt, 0, 0);
! 		}
! 
! 	      if (TREE_CODE (lhs) == SSA_NAME)
! 		{
! 		  rhs = get_repl_default_def_ssa_name (racc);
! 		  if (!useless_type_conversion_p (TREE_TYPE (lhs),
! 						  TREE_TYPE (rhs)))
! 		    rhs = fold_build1_loc (loc, VIEW_CONVERT_EXPR,
! 					   TREE_TYPE (lhs), rhs);
! 		}
! 	      else
  		{
! 		  if (racc->first_child)
! 		    generate_subtree_copies (racc->first_child, lhs,
! 					     racc->offset, 0, 0, gsi,
! 					     false, false, loc);
! 
! 		  gcc_assert (*stmt == gsi_stmt (*gsi));
! 		  unlink_stmt_vdef (*stmt);
! 		  gsi_remove (gsi, true);
! 		  sra_stats.deleted++;
! 		  return SRA_AM_REMOVED;
  		}
  	    }
+ 
+ 	  /* If there is no unscalarized data on the RHS replace
+ 	     the aggregate copy by piecewise storing into the LHS.  */
+ 	  if (access_has_children_p (racc) && racc->grp_covered)
+ 	    {
+ 	      generate_subtree_copies (racc->first_child, lhs, racc->offset,
+ 				       0, 0, gsi, false, false, loc);
+ 	      gcc_assert (!access_has_children_p (lacc));
+ 	      gcc_assert (*stmt == gsi_stmt (*gsi));
+ 	      unlink_stmt_vdef (*stmt);
+ 	      gsi_remove (gsi, true);
+ 	      sra_stats.deleted++;
+ 	      return SRA_AM_REMOVED;
+ 	    }
+ 
+ 	  /* If there is unscalarized data on the RHS the aggregate
+ 	     copy will prevail, so simply update the RHS from its
+ 	     scalarized components.  */
+ 	  if (access_has_children_p (racc))
+ 	    generate_subtree_copies (racc->first_child, racc->base,
+ 				     0, 0, 0, gsi, false, false, loc);
+ 
+ 	  /* The aggregate copy has prevailed.  Re-load the LHS components
+ 	     from the LHS.  */
  	  if (access_has_children_p (lacc))
! 	    generate_subtree_copies (lacc->first_child, lhs,
! 				     0, 0, 0, gsi, true, true, loc);
  	}
      }
  
Index: gcc/testsuite/gcc.dg/torture/pr51528.c
===================================================================
*** gcc/testsuite/gcc.dg/torture/pr51528.c	(revision 0)
--- gcc/testsuite/gcc.dg/torture/pr51528.c	(revision 0)
***************
*** 0 ****
--- 1,46 ----
+ /* { dg-do run } */
+ /* { dg-options "-fno-early-inlining" } */
+ 
+ extern void abort (void);
+ 
+ union U
+ {
+   int i;
+   _Bool b;
+ };
+ 
+ _Bool gb;
+ 
+ void  __attribute__ ((noinline))
+ use_bool (union U u)
+ {
+   gb = u.b;
+ }
+ 
+ union U
+ bar (void)
+ {
+   union U u;
+   u.i = 0xFFFE;
+   return u;
+ }
+ 
+ union U  __attribute__ ((noinline))
+ foo (void)
+ {
+   union U u,v;
+ 
+   u.b = 1;
+   use_bool (u);
+   u = bar ();
+ 
+   return u;
+ }
+ 
+ int main (int argc, char **argv)
+ {
+   union U u = foo ();
+   if (u.i != 0xFFFE)
+     abort ();
+   return 0;
+ }

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] Fix PR51528
  2012-01-27 13:52 [PATCH] Fix PR51528 Richard Guenther
@ 2012-01-30 13:25 ` Richard Guenther
  2012-01-31  9:44   ` Richard Guenther
  0 siblings, 1 reply; 3+ messages in thread
From: Richard Guenther @ 2012-01-30 13:25 UTC (permalink / raw)
  To: gcc-patches

On Fri, 27 Jan 2012, Richard Guenther wrote:

> 
> This fixes PR51528 by hiding the issue that SRA generates
> copy in/out with a type not suitable for preserving the data
> (any non-mode-precision thing).  It fixes it by instead of
> emitting
> 
>   x$i_8 = x.i;
>   D.1720 = x;
>   D.1720.i = x$i_8;
> 
> for an assign to an unscalarized D.1720 from a partly scalarized x
> (so the aggregate assignment prevails) emit
> 
>   x.i = x$i_8;
>   D.1720 = x;
> 
> instead.  This allows the unfortunate copy in/out to be optimized
> away (and thus its undefined behavior).
> 
> Basically whenever the aggregate copy will prevail prefer to
> restore the RHS and reload the LHS components - just like we
> do for aggregate uses/destinations of call statements.
> 
> I think we can still clean up the aggregate copy path a lot,
> but that's for 4.8 (unless new bugs pop up).
> 
> Bootstrap and regtest pending on x86_64-unknown-linux-gnu.

So it turns out this wasn't that easy and I botched up some
of the intricate execution flow in sra_modify_assign.  As a
preparation for a smaller fix the following patch refactors
this function to be easier to understand and hopefully make
the real fix obvious in what it does.

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.

Richard.

2012-01-30  Richard Guenther  <rguenther@suse.de>

	PR tree-optimization/51528
	* tree-sra.c (sra_modify_assign): Re-factor in preparation
	for PR51528 fix.

Index: gcc/tree-sra.c
===================================================================
*** gcc/tree-sra.c	(revision 183694)
--- gcc/tree-sra.c	(working copy)
*************** sra_modify_assign (gimple *stmt, gimple_
*** 2991,2996 ****
--- 2991,3006 ----
  	force_gimple_rhs = true;
        sra_stats.exprs++;
      }
+   else if (racc
+ 	   && !access_has_children_p (racc)
+ 	   && !racc->grp_to_be_replaced
+ 	   && !racc->grp_unscalarized_data
+ 	   && TREE_CODE (lhs) == SSA_NAME)
+     {
+       rhs = get_repl_default_def_ssa_name (racc);
+       modify_this_stmt = true;
+       sra_stats.exprs++;
+     }
  
    if (modify_this_stmt)
      {
*************** sra_modify_assign (gimple *stmt, gimple_
*** 3067,3072 ****
--- 3077,3097 ----
  	generate_subtree_copies (lacc->first_child, lacc->base, 0, 0, 0,
  				 gsi, true, true, loc);
        sra_stats.separate_lhs_rhs_handling++;
+ 
+       /* This gimplification must be done after generate_subtree_copies,
+ 	 lest we insert the subtree copies in the middle of the gimplified
+ 	 sequence.  */
+       if (force_gimple_rhs)
+ 	rhs = force_gimple_operand_gsi (&orig_gsi, rhs, true, NULL_TREE,
+ 					true, GSI_SAME_STMT);
+       if (gimple_assign_rhs1 (*stmt) != rhs)
+ 	{
+ 	  modify_this_stmt = true;
+ 	  gimple_assign_set_rhs_from_tree (&orig_gsi, rhs);
+ 	  gcc_assert (*stmt == gsi_stmt (orig_gsi));
+ 	}
+ 
+       return modify_this_stmt ? SRA_AM_MODIFIED : SRA_AM_NONE;
      }
    else
      {
*************** sra_modify_assign (gimple *stmt, gimple_
*** 3093,3153 ****
  	}
        else
  	{
! 	  if (racc)
  	    {
! 	      if (!racc->grp_to_be_replaced && !racc->grp_unscalarized_data)
  		{
! 		  if (dump_file)
! 		    {
! 		      fprintf (dump_file, "Removing load: ");
! 		      print_gimple_stmt (dump_file, *stmt, 0, 0);
! 		    }
! 
! 		  if (TREE_CODE (lhs) == SSA_NAME)
! 		    {
! 		      rhs = get_repl_default_def_ssa_name (racc);
! 		      if (!useless_type_conversion_p (TREE_TYPE (lhs),
! 						      TREE_TYPE (rhs)))
! 			rhs = fold_build1_loc (loc, VIEW_CONVERT_EXPR,
! 					       TREE_TYPE (lhs), rhs);
! 		    }
! 		  else
! 		    {
! 		      if (racc->first_child)
! 			generate_subtree_copies (racc->first_child, lhs,
! 						 racc->offset, 0, 0, gsi,
! 						 false, false, loc);
! 
! 		      gcc_assert (*stmt == gsi_stmt (*gsi));
! 		      unlink_stmt_vdef (*stmt);
! 		      gsi_remove (gsi, true);
! 		      sra_stats.deleted++;
! 		      return SRA_AM_REMOVED;
! 		    }
  		}
! 	      else if (racc->first_child)
! 		generate_subtree_copies (racc->first_child, lhs, racc->offset,
! 					 0, 0, gsi, false, true, loc);
  	    }
  	  if (access_has_children_p (lacc))
  	    generate_subtree_copies (lacc->first_child, rhs, lacc->offset,
  				     0, 0, gsi, true, true, loc);
  	}
-     }
  
!   /* This gimplification must be done after generate_subtree_copies, lest we
!      insert the subtree copies in the middle of the gimplified sequence.  */
!   if (force_gimple_rhs)
!     rhs = force_gimple_operand_gsi (&orig_gsi, rhs, true, NULL_TREE,
! 				    true, GSI_SAME_STMT);
!   if (gimple_assign_rhs1 (*stmt) != rhs)
!     {
!       modify_this_stmt = true;
!       gimple_assign_set_rhs_from_tree (&orig_gsi, rhs);
!       gcc_assert (*stmt == gsi_stmt (orig_gsi));
      }
- 
-   return modify_this_stmt ? SRA_AM_MODIFIED : SRA_AM_NONE;
  }
  
  /* Traverse the function body and all modifications as decided in
--- 3118,3150 ----
  	}
        else
  	{
! 	  if (access_has_children_p (racc)
! 	      && !racc->grp_unscalarized_data)
  	    {
! 	      if (dump_file)
  		{
! 		  fprintf (dump_file, "Removing load: ");
! 		  print_gimple_stmt (dump_file, *stmt, 0, 0);
  		}
! 	      generate_subtree_copies (racc->first_child, lhs,
! 				       racc->offset, 0, 0, gsi,
! 				       false, false, loc);
! 	      gcc_assert (*stmt == gsi_stmt (*gsi));
! 	      unlink_stmt_vdef (*stmt);
! 	      gsi_remove (gsi, true);
! 	      sra_stats.deleted++;
! 	      return SRA_AM_REMOVED;
  	    }
+ 	  if (access_has_children_p (racc))
+ 	    generate_subtree_copies (racc->first_child, lhs, racc->offset,
+ 				     0, 0, gsi, false, true, loc);
  	  if (access_has_children_p (lacc))
  	    generate_subtree_copies (lacc->first_child, rhs, lacc->offset,
  				     0, 0, gsi, true, true, loc);
  	}
  
!       return SRA_AM_NONE;
      }
  }
  
  /* Traverse the function body and all modifications as decided in

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] Fix PR51528
  2012-01-30 13:25 ` Richard Guenther
@ 2012-01-31  9:44   ` Richard Guenther
  0 siblings, 0 replies; 3+ messages in thread
From: Richard Guenther @ 2012-01-31  9:44 UTC (permalink / raw)
  To: gcc-patches

On Mon, 30 Jan 2012, Richard Guenther wrote:

> On Fri, 27 Jan 2012, Richard Guenther wrote:
> 
> > 
> > This fixes PR51528 by hiding the issue that SRA generates
> > copy in/out with a type not suitable for preserving the data
> > (any non-mode-precision thing).  It fixes it by instead of
> > emitting
> > 
> >   x$i_8 = x.i;
> >   D.1720 = x;
> >   D.1720.i = x$i_8;
> > 
> > for an assign to an unscalarized D.1720 from a partly scalarized x
> > (so the aggregate assignment prevails) emit
> > 
> >   x.i = x$i_8;
> >   D.1720 = x;
> > 
> > instead.  This allows the unfortunate copy in/out to be optimized
> > away (and thus its undefined behavior).
> > 
> > Basically whenever the aggregate copy will prevail prefer to
> > restore the RHS and reload the LHS components - just like we
> > do for aggregate uses/destinations of call statements.
> > 
> > I think we can still clean up the aggregate copy path a lot,
> > but that's for 4.8 (unless new bugs pop up).
> > 
> > Bootstrap and regtest pending on x86_64-unknown-linux-gnu.
> 
> So it turns out this wasn't that easy and I botched up some
> of the intricate execution flow in sra_modify_assign.  As a
> preparation for a smaller fix the following patch refactors
> this function to be easier to understand and hopefully make
> the real fix obvious in what it does.

And this is the "real" fix - well, avoid the copy in/out we
currently generate which we are not able to optimize and instead
emit sth we are able to remove the redundant (and harmful) stores.

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.

Richard.

2012-01-30  Richard Guenther  <rguenther@suse.de>

	PR tree-optimization/51528
	* tree-sra.c (sra_modify_assign): Avoid copy-in/out for aggregate
	assigns.

	* gcc.dg/torture/pr51528.c: New testcase.

Index: gcc/tree-sra.c
===================================================================
*** gcc/tree-sra.c	(revision 183720)
--- gcc/tree-sra.c	(working copy)
*************** sra_modify_assign (gimple *stmt, gimple_
*** 3135,3143 ****
  	      sra_stats.deleted++;
  	      return SRA_AM_REMOVED;
  	    }
  	  if (access_has_children_p (racc))
! 	    generate_subtree_copies (racc->first_child, lhs, racc->offset,
! 				     0, 0, gsi, false, true, loc);
  	  if (access_has_children_p (lacc))
  	    generate_subtree_copies (lacc->first_child, rhs, lacc->offset,
  				     0, 0, gsi, true, true, loc);
--- 3135,3148 ----
  	      sra_stats.deleted++;
  	      return SRA_AM_REMOVED;
  	    }
+ 	  /* Restore the aggregate RHS from its components so the
+ 	     prevailing aggregate copy does the right thing.  */
  	  if (access_has_children_p (racc))
! 	    generate_subtree_copies (racc->first_child, racc->base, 0, 0, 0,
! 				     gsi, false, false, loc);
! 	  /* Re-load the components of the aggregate copy destination.
! 	     But use the RHS aggregate to load from to expose more
! 	     optimization opportunities.  */
  	  if (access_has_children_p (lacc))
  	    generate_subtree_copies (lacc->first_child, rhs, lacc->offset,
  				     0, 0, gsi, true, true, loc);
Index: gcc/testsuite/gcc.dg/torture/pr51528.c
===================================================================
*** gcc/testsuite/gcc.dg/torture/pr51528.c	(revision 0)
--- gcc/testsuite/gcc.dg/torture/pr51528.c	(revision 0)
***************
*** 0 ****
--- 1,46 ----
+ /* { dg-do run } */
+ /* { dg-options "-fno-early-inlining" } */
+ 
+ extern void abort (void);
+ 
+ union U
+ {
+   int i;
+   _Bool b;
+ };
+ 
+ _Bool gb;
+ 
+ void  __attribute__ ((noinline))
+ use_bool (union U u)
+ {
+   gb = u.b;
+ }
+ 
+ union U
+ bar (void)
+ {
+   union U u;
+   u.i = 0xFFFE;
+   return u;
+ }
+ 
+ union U  __attribute__ ((noinline))
+ foo (void)
+ {
+   union U u,v;
+ 
+   u.b = 1;
+   use_bool (u);
+   u = bar ();
+ 
+   return u;
+ }
+ 
+ int main (int argc, char **argv)
+ {
+   union U u = foo ();
+   if (u.i != 0xFFFE)
+     abort ();
+   return 0;
+ }

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2012-01-31  9:44 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-01-27 13:52 [PATCH] Fix PR51528 Richard Guenther
2012-01-30 13:25 ` Richard Guenther
2012-01-31  9:44   ` Richard Guenther

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).