* [PATCH] Fix PR51528
@ 2012-01-27 13:52 Richard Guenther
2012-01-30 13:25 ` Richard Guenther
0 siblings, 1 reply; 3+ messages in thread
From: Richard Guenther @ 2012-01-27 13:52 UTC (permalink / raw)
To: gcc-patches
This fixes PR51528 by hiding the issue that SRA generates
copy in/out with a type not suitable for preserving the data
(any non-mode-precision thing). It fixes it by instead of
emitting
x$i_8 = x.i;
D.1720 = x;
D.1720.i = x$i_8;
for an assign to an unscalarized D.1720 from a partly scalarized x
(so the aggregate assignment prevails) emit
x.i = x$i_8;
D.1720 = x;
instead. This allows the unfortunate copy in/out to be optimized
away (and thus its undefined behavior).
Basically whenever the aggregate copy will prevail prefer to
restore the RHS and reload the LHS components - just like we
do for aggregate uses/destinations of call statements.
I think we can still clean up the aggregate copy path a lot,
but that's for 4.8 (unless new bugs pop up).
Bootstrap and regtest pending on x86_64-unknown-linux-gnu.
Richard.
2012-01-27 Richard Guenther <rguenther@suse.de>
PR tree-optimization/51528
* tree-sra.c (sra_modify_assign): Handle the default fallback
for aggregate assigns better.
* gcc.dg/torture/pr51528.c: New testcase.
Index: gcc/tree-sra.c
===================================================================
*** gcc/tree-sra.c (revision 183616)
--- gcc/tree-sra.c (working copy)
*************** sra_modify_assign (gimple *stmt, gimple_
*** 3060,3104 ****
}
else
{
! if (racc)
{
! if (!racc->grp_to_be_replaced && !racc->grp_unscalarized_data)
{
! if (dump_file)
! {
! fprintf (dump_file, "Removing load: ");
! print_gimple_stmt (dump_file, *stmt, 0, 0);
! }
!
! if (TREE_CODE (lhs) == SSA_NAME)
! {
! rhs = get_repl_default_def_ssa_name (racc);
! if (!useless_type_conversion_p (TREE_TYPE (lhs),
! TREE_TYPE (rhs)))
! rhs = fold_build1_loc (loc, VIEW_CONVERT_EXPR,
! TREE_TYPE (lhs), rhs);
! }
! else
! {
! if (racc->first_child)
! generate_subtree_copies (racc->first_child, lhs,
! racc->offset, 0, 0, gsi,
! false, false, loc);
!
! gcc_assert (*stmt == gsi_stmt (*gsi));
! unlink_stmt_vdef (*stmt);
! gsi_remove (gsi, true);
! sra_stats.deleted++;
! return SRA_AM_REMOVED;
! }
}
- else if (racc->first_child)
- generate_subtree_copies (racc->first_child, lhs, racc->offset,
- 0, 0, gsi, false, true, loc);
}
if (access_has_children_p (lacc))
! generate_subtree_copies (lacc->first_child, rhs, lacc->offset,
! 0, 0, gsi, true, true, loc);
}
}
--- 3061,3124 ----
}
else
{
! if (racc
! && !racc->grp_to_be_replaced && !racc->grp_unscalarized_data)
{
! if (dump_file)
! {
! fprintf (dump_file, "Removing load: ");
! print_gimple_stmt (dump_file, *stmt, 0, 0);
! }
!
! if (TREE_CODE (lhs) == SSA_NAME)
! {
! rhs = get_repl_default_def_ssa_name (racc);
! if (!useless_type_conversion_p (TREE_TYPE (lhs),
! TREE_TYPE (rhs)))
! rhs = fold_build1_loc (loc, VIEW_CONVERT_EXPR,
! TREE_TYPE (lhs), rhs);
! }
! else
{
! if (racc->first_child)
! generate_subtree_copies (racc->first_child, lhs,
! racc->offset, 0, 0, gsi,
! false, false, loc);
!
! gcc_assert (*stmt == gsi_stmt (*gsi));
! unlink_stmt_vdef (*stmt);
! gsi_remove (gsi, true);
! sra_stats.deleted++;
! return SRA_AM_REMOVED;
}
}
+
+ /* If there is no unscalarized data on the RHS replace
+ the aggregate copy by piecewise storing into the LHS. */
+ if (access_has_children_p (racc) && racc->grp_covered)
+ {
+ generate_subtree_copies (racc->first_child, lhs, racc->offset,
+ 0, 0, gsi, false, false, loc);
+ gcc_assert (!access_has_children_p (lacc));
+ gcc_assert (*stmt == gsi_stmt (*gsi));
+ unlink_stmt_vdef (*stmt);
+ gsi_remove (gsi, true);
+ sra_stats.deleted++;
+ return SRA_AM_REMOVED;
+ }
+
+ /* If there is unscalarized data on the RHS the aggregate
+ copy will prevail, so simply update the RHS from its
+ scalarized components. */
+ if (access_has_children_p (racc))
+ generate_subtree_copies (racc->first_child, racc->base,
+ 0, 0, 0, gsi, false, false, loc);
+
+ /* The aggregate copy has prevailed. Re-load the LHS components
+ from the LHS. */
if (access_has_children_p (lacc))
! generate_subtree_copies (lacc->first_child, lhs,
! 0, 0, 0, gsi, true, true, loc);
}
}
Index: gcc/testsuite/gcc.dg/torture/pr51528.c
===================================================================
*** gcc/testsuite/gcc.dg/torture/pr51528.c (revision 0)
--- gcc/testsuite/gcc.dg/torture/pr51528.c (revision 0)
***************
*** 0 ****
--- 1,46 ----
+ /* { dg-do run } */
+ /* { dg-options "-fno-early-inlining" } */
+
+ extern void abort (void);
+
+ union U
+ {
+ int i;
+ _Bool b;
+ };
+
+ _Bool gb;
+
+ void __attribute__ ((noinline))
+ use_bool (union U u)
+ {
+ gb = u.b;
+ }
+
+ union U
+ bar (void)
+ {
+ union U u;
+ u.i = 0xFFFE;
+ return u;
+ }
+
+ union U __attribute__ ((noinline))
+ foo (void)
+ {
+ union U u,v;
+
+ u.b = 1;
+ use_bool (u);
+ u = bar ();
+
+ return u;
+ }
+
+ int main (int argc, char **argv)
+ {
+ union U u = foo ();
+ if (u.i != 0xFFFE)
+ abort ();
+ return 0;
+ }
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH] Fix PR51528
2012-01-27 13:52 [PATCH] Fix PR51528 Richard Guenther
@ 2012-01-30 13:25 ` Richard Guenther
2012-01-31 9:44 ` Richard Guenther
0 siblings, 1 reply; 3+ messages in thread
From: Richard Guenther @ 2012-01-30 13:25 UTC (permalink / raw)
To: gcc-patches
On Fri, 27 Jan 2012, Richard Guenther wrote:
>
> This fixes PR51528 by hiding the issue that SRA generates
> copy in/out with a type not suitable for preserving the data
> (any non-mode-precision thing). It fixes it by instead of
> emitting
>
> x$i_8 = x.i;
> D.1720 = x;
> D.1720.i = x$i_8;
>
> for an assign to an unscalarized D.1720 from a partly scalarized x
> (so the aggregate assignment prevails) emit
>
> x.i = x$i_8;
> D.1720 = x;
>
> instead. This allows the unfortunate copy in/out to be optimized
> away (and thus its undefined behavior).
>
> Basically whenever the aggregate copy will prevail prefer to
> restore the RHS and reload the LHS components - just like we
> do for aggregate uses/destinations of call statements.
>
> I think we can still clean up the aggregate copy path a lot,
> but that's for 4.8 (unless new bugs pop up).
>
> Bootstrap and regtest pending on x86_64-unknown-linux-gnu.
So it turns out this wasn't that easy and I botched up some
of the intricate execution flow in sra_modify_assign. As a
preparation for a smaller fix the following patch refactors
this function to be easier to understand and hopefully make
the real fix obvious in what it does.
Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.
Richard.
2012-01-30 Richard Guenther <rguenther@suse.de>
PR tree-optimization/51528
* tree-sra.c (sra_modify_assign): Re-factor in preparation
for PR51528 fix.
Index: gcc/tree-sra.c
===================================================================
*** gcc/tree-sra.c (revision 183694)
--- gcc/tree-sra.c (working copy)
*************** sra_modify_assign (gimple *stmt, gimple_
*** 2991,2996 ****
--- 2991,3006 ----
force_gimple_rhs = true;
sra_stats.exprs++;
}
+ else if (racc
+ && !access_has_children_p (racc)
+ && !racc->grp_to_be_replaced
+ && !racc->grp_unscalarized_data
+ && TREE_CODE (lhs) == SSA_NAME)
+ {
+ rhs = get_repl_default_def_ssa_name (racc);
+ modify_this_stmt = true;
+ sra_stats.exprs++;
+ }
if (modify_this_stmt)
{
*************** sra_modify_assign (gimple *stmt, gimple_
*** 3067,3072 ****
--- 3077,3097 ----
generate_subtree_copies (lacc->first_child, lacc->base, 0, 0, 0,
gsi, true, true, loc);
sra_stats.separate_lhs_rhs_handling++;
+
+ /* This gimplification must be done after generate_subtree_copies,
+ lest we insert the subtree copies in the middle of the gimplified
+ sequence. */
+ if (force_gimple_rhs)
+ rhs = force_gimple_operand_gsi (&orig_gsi, rhs, true, NULL_TREE,
+ true, GSI_SAME_STMT);
+ if (gimple_assign_rhs1 (*stmt) != rhs)
+ {
+ modify_this_stmt = true;
+ gimple_assign_set_rhs_from_tree (&orig_gsi, rhs);
+ gcc_assert (*stmt == gsi_stmt (orig_gsi));
+ }
+
+ return modify_this_stmt ? SRA_AM_MODIFIED : SRA_AM_NONE;
}
else
{
*************** sra_modify_assign (gimple *stmt, gimple_
*** 3093,3153 ****
}
else
{
! if (racc)
{
! if (!racc->grp_to_be_replaced && !racc->grp_unscalarized_data)
{
! if (dump_file)
! {
! fprintf (dump_file, "Removing load: ");
! print_gimple_stmt (dump_file, *stmt, 0, 0);
! }
!
! if (TREE_CODE (lhs) == SSA_NAME)
! {
! rhs = get_repl_default_def_ssa_name (racc);
! if (!useless_type_conversion_p (TREE_TYPE (lhs),
! TREE_TYPE (rhs)))
! rhs = fold_build1_loc (loc, VIEW_CONVERT_EXPR,
! TREE_TYPE (lhs), rhs);
! }
! else
! {
! if (racc->first_child)
! generate_subtree_copies (racc->first_child, lhs,
! racc->offset, 0, 0, gsi,
! false, false, loc);
!
! gcc_assert (*stmt == gsi_stmt (*gsi));
! unlink_stmt_vdef (*stmt);
! gsi_remove (gsi, true);
! sra_stats.deleted++;
! return SRA_AM_REMOVED;
! }
}
! else if (racc->first_child)
! generate_subtree_copies (racc->first_child, lhs, racc->offset,
! 0, 0, gsi, false, true, loc);
}
if (access_has_children_p (lacc))
generate_subtree_copies (lacc->first_child, rhs, lacc->offset,
0, 0, gsi, true, true, loc);
}
- }
! /* This gimplification must be done after generate_subtree_copies, lest we
! insert the subtree copies in the middle of the gimplified sequence. */
! if (force_gimple_rhs)
! rhs = force_gimple_operand_gsi (&orig_gsi, rhs, true, NULL_TREE,
! true, GSI_SAME_STMT);
! if (gimple_assign_rhs1 (*stmt) != rhs)
! {
! modify_this_stmt = true;
! gimple_assign_set_rhs_from_tree (&orig_gsi, rhs);
! gcc_assert (*stmt == gsi_stmt (orig_gsi));
}
-
- return modify_this_stmt ? SRA_AM_MODIFIED : SRA_AM_NONE;
}
/* Traverse the function body and all modifications as decided in
--- 3118,3150 ----
}
else
{
! if (access_has_children_p (racc)
! && !racc->grp_unscalarized_data)
{
! if (dump_file)
{
! fprintf (dump_file, "Removing load: ");
! print_gimple_stmt (dump_file, *stmt, 0, 0);
}
! generate_subtree_copies (racc->first_child, lhs,
! racc->offset, 0, 0, gsi,
! false, false, loc);
! gcc_assert (*stmt == gsi_stmt (*gsi));
! unlink_stmt_vdef (*stmt);
! gsi_remove (gsi, true);
! sra_stats.deleted++;
! return SRA_AM_REMOVED;
}
+ if (access_has_children_p (racc))
+ generate_subtree_copies (racc->first_child, lhs, racc->offset,
+ 0, 0, gsi, false, true, loc);
if (access_has_children_p (lacc))
generate_subtree_copies (lacc->first_child, rhs, lacc->offset,
0, 0, gsi, true, true, loc);
}
! return SRA_AM_NONE;
}
}
/* Traverse the function body and all modifications as decided in
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH] Fix PR51528
2012-01-30 13:25 ` Richard Guenther
@ 2012-01-31 9:44 ` Richard Guenther
0 siblings, 0 replies; 3+ messages in thread
From: Richard Guenther @ 2012-01-31 9:44 UTC (permalink / raw)
To: gcc-patches
On Mon, 30 Jan 2012, Richard Guenther wrote:
> On Fri, 27 Jan 2012, Richard Guenther wrote:
>
> >
> > This fixes PR51528 by hiding the issue that SRA generates
> > copy in/out with a type not suitable for preserving the data
> > (any non-mode-precision thing). It fixes it by instead of
> > emitting
> >
> > x$i_8 = x.i;
> > D.1720 = x;
> > D.1720.i = x$i_8;
> >
> > for an assign to an unscalarized D.1720 from a partly scalarized x
> > (so the aggregate assignment prevails) emit
> >
> > x.i = x$i_8;
> > D.1720 = x;
> >
> > instead. This allows the unfortunate copy in/out to be optimized
> > away (and thus its undefined behavior).
> >
> > Basically whenever the aggregate copy will prevail prefer to
> > restore the RHS and reload the LHS components - just like we
> > do for aggregate uses/destinations of call statements.
> >
> > I think we can still clean up the aggregate copy path a lot,
> > but that's for 4.8 (unless new bugs pop up).
> >
> > Bootstrap and regtest pending on x86_64-unknown-linux-gnu.
>
> So it turns out this wasn't that easy and I botched up some
> of the intricate execution flow in sra_modify_assign. As a
> preparation for a smaller fix the following patch refactors
> this function to be easier to understand and hopefully make
> the real fix obvious in what it does.
And this is the "real" fix - well, avoid the copy in/out we
currently generate which we are not able to optimize and instead
emit sth we are able to remove the redundant (and harmful) stores.
Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.
Richard.
2012-01-30 Richard Guenther <rguenther@suse.de>
PR tree-optimization/51528
* tree-sra.c (sra_modify_assign): Avoid copy-in/out for aggregate
assigns.
* gcc.dg/torture/pr51528.c: New testcase.
Index: gcc/tree-sra.c
===================================================================
*** gcc/tree-sra.c (revision 183720)
--- gcc/tree-sra.c (working copy)
*************** sra_modify_assign (gimple *stmt, gimple_
*** 3135,3143 ****
sra_stats.deleted++;
return SRA_AM_REMOVED;
}
if (access_has_children_p (racc))
! generate_subtree_copies (racc->first_child, lhs, racc->offset,
! 0, 0, gsi, false, true, loc);
if (access_has_children_p (lacc))
generate_subtree_copies (lacc->first_child, rhs, lacc->offset,
0, 0, gsi, true, true, loc);
--- 3135,3148 ----
sra_stats.deleted++;
return SRA_AM_REMOVED;
}
+ /* Restore the aggregate RHS from its components so the
+ prevailing aggregate copy does the right thing. */
if (access_has_children_p (racc))
! generate_subtree_copies (racc->first_child, racc->base, 0, 0, 0,
! gsi, false, false, loc);
! /* Re-load the components of the aggregate copy destination.
! But use the RHS aggregate to load from to expose more
! optimization opportunities. */
if (access_has_children_p (lacc))
generate_subtree_copies (lacc->first_child, rhs, lacc->offset,
0, 0, gsi, true, true, loc);
Index: gcc/testsuite/gcc.dg/torture/pr51528.c
===================================================================
*** gcc/testsuite/gcc.dg/torture/pr51528.c (revision 0)
--- gcc/testsuite/gcc.dg/torture/pr51528.c (revision 0)
***************
*** 0 ****
--- 1,46 ----
+ /* { dg-do run } */
+ /* { dg-options "-fno-early-inlining" } */
+
+ extern void abort (void);
+
+ union U
+ {
+ int i;
+ _Bool b;
+ };
+
+ _Bool gb;
+
+ void __attribute__ ((noinline))
+ use_bool (union U u)
+ {
+ gb = u.b;
+ }
+
+ union U
+ bar (void)
+ {
+ union U u;
+ u.i = 0xFFFE;
+ return u;
+ }
+
+ union U __attribute__ ((noinline))
+ foo (void)
+ {
+ union U u,v;
+
+ u.b = 1;
+ use_bool (u);
+ u = bar ();
+
+ return u;
+ }
+
+ int main (int argc, char **argv)
+ {
+ union U u = foo ();
+ if (u.i != 0xFFFE)
+ abort ();
+ return 0;
+ }
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2012-01-31 9:44 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-01-27 13:52 [PATCH] Fix PR51528 Richard Guenther
2012-01-30 13:25 ` Richard Guenther
2012-01-31 9:44 ` Richard Guenther
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).