public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [RFC] Move ehcleanup pass to before early SRA
@ 2014-01-15  9:25 Eric Botcazou
  2014-01-15 11:23 ` Richard Biener
  0 siblings, 1 reply; 10+ messages in thread
From: Eric Botcazou @ 2014-01-15  9:25 UTC (permalink / raw)
  To: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 898 bytes --]

Hi,

we have run into optimization regressions in Ada caused by the interaction 
between the GIMPLE clobbers and SRA: on the one hand GIMPLE clobbers create 
artificial EH handlers for aggregate variables, on the other hand SRA refuses 
to scalarize objects that appear in a statement that can throw internally.  
The result is that GIMPLE clobbers block the scalarization of variables that 
used to be possible up to the 4.6 series.  Testcase attached, compile p.adb on 
x86-64 with -O2 -fdump-tree-ealias -fdump-tree-esra.

This can be solved by moving the ehcleanup pass to before early SRA in 
the pipeline.  It has a small but measurable positive effect on some of our 
benchmarks (with a 4.7-based compiler).  Tested on x86-64/Linux.

Thoughts?


2014-01-15  Eric Botcazou  <ebotcazou@adacore.com>

	* passes.def (pass_early_local_passes): Run first EH cleanup pass early.


-- 
Eric Botcazou

[-- Attachment #2: p.diff --]
[-- Type: text/x-patch, Size: 1029 bytes --]

Index: passes.def
===================================================================
--- passes.def	(revision 206563)
+++ passes.def	(working copy)
@@ -71,6 +71,10 @@ along with GCC; see the file COPYING3.
 	  /* pass_build_ealias is a dummy pass that ensures that we
 	     execute TODO_rebuild_alias at this point.  */
 	  NEXT_PASS (pass_build_ealias);
+	  /* SRA refuses to scalarize objects that appear in a statement
+	     that can throw internally so we need to cleanup the EH tree
+	     early to remove handlers that contain only clobbers.  */
+	  NEXT_PASS (pass_cleanup_eh);
 	  NEXT_PASS (pass_sra_early);
 	  NEXT_PASS (pass_fre);
 	  NEXT_PASS (pass_copy_prop);
@@ -79,7 +83,6 @@ along with GCC; see the file COPYING3.
 	  NEXT_PASS (pass_early_ipa_sra);
 	  NEXT_PASS (pass_tail_recursion);
 	  NEXT_PASS (pass_convert_switch);
-          NEXT_PASS (pass_cleanup_eh);
           NEXT_PASS (pass_profile);
           NEXT_PASS (pass_local_pure_const);
 	  /* Split functions creates parts that are not run through

[-- Attachment #3: p.adb --]
[-- Type: text/x-adasrc, Size: 302 bytes --]

with Decls_Pack_2;  use Decls_Pack_2;
with Decls_Support; use Decls_Support;
with Support;       use Support;

procedure P is
   package Decls_Pack_Private is new Decls_Pack_Private_G;
   use Decls_Pack_Private;
begin
   Assert (Get_Integer (Decls_Pack_Private.Local_Fun (Get_Private (1))) = 100);
end;

[-- Attachment #4: decls_pack_2.adb --]
[-- Type: text/x-adasrc, Size: 1678 bytes --]

package body Decls_Pack_2 is

   procedure Local_Swap (V1, V2 : in out Access_Integer) is
      Tmp : Access_Integer := V1;
   begin
      V1  := V2;
      V2  := Tmp;
   end Local_Swap;

   function Local_Fun (I : Integer) return Access_Const_Integer is
      Result : Access_Const_Integer := new Integer'(I);
   begin

      if I < 0 then
        Result := null;
      end if;

      return Result;
   end Local_Fun;


   package body Decls_Pack_Derived_Records_G is

      procedure Local_Swap (V1, V2 : in out Derived_Coordinate) is
         Tmp : Derived_Coordinate;
      begin
         if V1 /= V2 then
            Tmp := V1;
            V1  := V2;
            V2  := Tmp;
         end if;
      end Local_Swap;

      function Local_Fun (C1, C2 : Float) return Derived_Coordinate is
         Result : Derived_Coordinate;
      begin

         if C1 > 0.0 and then C2 > 0.0 then
           Result.X := C1;
           Result.Y := C2;
         end if;

         return Result;
      end Local_Fun;

   end Decls_Pack_Derived_Records_G;

   package body Decls_Pack_Private_G is

      procedure Local_Swap (V1, V2 : in out T_Private) is
         Tmp : T_Private := V1;
      begin
         V1 := V2;
         V2 := Tmp;
      end Local_Swap;

      function Local_Fun (Arg : T_Private) return T_Private is
         Result : T_Private;
      begin

         case Get_Integer (Arg) is
            when 1 =>
               Result := Get_Private (100);
            when 2 =>
               Result := T_Private_Zero;
            when others =>
               null;
         end case;

         return Result;
      end Local_Fun;

   end Decls_Pack_Private_G;

end Decls_Pack_2;

[-- Attachment #5: decls_pack_2.ads --]
[-- Type: text/x-adasrc, Size: 894 bytes --]

with Decls_Support; use Decls_Support;
with Support;       use Support;
package Decls_Pack_2 is

   Access_All_Integer_Var : Access_All_Integer := Integer_Var'Access;

   I : Integer := Identity (1);

   procedure Local_Swap (V1, V2 : in out Access_Integer);

   function Local_Fun (I : Integer) return Access_Const_Integer;

   generic
   package Decls_Pack_Derived_Records_G is

      Derived_Discrete_Coordinate_V : Derived_Discrete_Coordinate;

      procedure Local_Swap (V1, V2 : in out Derived_Coordinate);

      function Local_Fun (C1, C2 : Float) return Derived_Coordinate;
   end Decls_Pack_Derived_Records_G;

   generic
   package Decls_Pack_Private_G is

      T_Private_C : constant T_Private := T_Private_Zero;

      procedure Local_Swap (V1, V2 : in out T_Private);

      function Local_Fun (Arg : T_Private) return T_Private;
   end Decls_Pack_Private_G;

end Decls_Pack_2;

[-- Attachment #6: decls_support.ads --]
[-- Type: text/x-adasrc, Size: 1377 bytes --]

with Support; use Support;
package Decls_Support is

   type Week_Day is (Mon, Tue, Wen, Thu, Fri, Sat, Sun);

   subtype Index is Natural range 0 .. 100;

   type Var_String (Len : Index := 0) is record
      Data : String (1 .. Len);
   end record;

   type Coordinate is record
      X, Y : Float := 0.0;
   end record;

   Coordinate_Zero : constant Coordinate := (X => 0.0, Y => 0.0);

   type Discrete_Coordinate is record
      X : Integer := Identity (0);
      Y : Integer := Identity (0);
   end record;

   type Vector is array (Integer range <>) of Integer;

   type Matrix is array (Integer range <>, Integer range <>) of Integer;
   subtype Small_Matrix is Matrix (1 .. 2, 1 .. 2);

   type T_Private is private;
   T_Private_Zero : constant T_Private;

   function Get_Private (I : Integer) return T_Private;
   function Get_Integer (X : T_Private) return Integer;
   type Access_Integer is access Integer;
   type Access_All_Integer is access all Integer;
   type Access_Const_Integer is access constant Integer;
   type Access_Coordinate is access Coordinate;

   type Derived_Discrete_Coordinate is new Discrete_Coordinate;
   type Derived_Coordinate is new Coordinate;

   Integer_Var : aliased Integer := 1;

private
   type T_Private is record
      I : Integer := 0;
   end record;

   T_Private_Zero : constant T_Private := (I => 0);

end Decls_Support;

[-- Attachment #7: support.ads --]
[-- Type: text/x-adasrc, Size: 295 bytes --]

package Support is
   procedure Assert (Cond : Boolean);
   function Identity (X : Integer) return Integer;
   function Identity (B : Boolean) return Boolean;

   function Value (X : Integer) return Integer renames Identity;
   function Value (B : Boolean) return Boolean renames Identity;
end;

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [RFC] Move ehcleanup pass to before early SRA
  2014-01-15  9:25 [RFC] Move ehcleanup pass to before early SRA Eric Botcazou
@ 2014-01-15 11:23 ` Richard Biener
  2014-01-15 12:06   ` Eric Botcazou
  0 siblings, 1 reply; 10+ messages in thread
From: Richard Biener @ 2014-01-15 11:23 UTC (permalink / raw)
  To: Eric Botcazou; +Cc: GCC Patches

On Wed, Jan 15, 2014 at 10:24 AM, Eric Botcazou <ebotcazou@adacore.com> wrote:
> Hi,
>
> we have run into optimization regressions in Ada caused by the interaction
> between the GIMPLE clobbers and SRA: on the one hand GIMPLE clobbers create
> artificial EH handlers for aggregate variables, on the other hand SRA refuses
> to scalarize objects that appear in a statement that can throw internally.
> The result is that GIMPLE clobbers block the scalarization of variables that
> used to be possible up to the 4.6 series.  Testcase attached, compile p.adb on
> x86-64 with -O2 -fdump-tree-ealias -fdump-tree-esra.
>
> This can be solved by moving the ehcleanup pass to before early SRA in
> the pipeline.  It has a small but measurable positive effect on some of our
> benchmarks (with a 4.7-based compiler).  Tested on x86-64/Linux.
>
> Thoughts?

What you want is tree-eh.c:optimize_clobbers, right?  Can't we
do this optimization during EH refactoring / lowering as well?

Also why does SRA refuse to scalarize here?  It will end up
scalarizing during regular opts as

<L1>:
  result = decls_support.get_private (100);
  result$i_12 = MEM[(struct decls_support__t_private *)&result];

where it can do the scalarization just fine if it inserts on all
non-EH successor edges (so not doing the scalarization only
if there is more than one non-EH successor edge would make
sense).

The current placement of EH cleanup is to make it catch
all opportunities before early inlining sees the function as
callee.  And I guess both FRE and DCE will expose quite
some EH cleanup opportunities especially with non-call-exceptions.

Richard.

>
> 2014-01-15  Eric Botcazou  <ebotcazou@adacore.com>
>
>         * passes.def (pass_early_local_passes): Run first EH cleanup pass early.
>
>
> --
> Eric Botcazou

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [RFC] Move ehcleanup pass to before early SRA
  2014-01-15 11:23 ` Richard Biener
@ 2014-01-15 12:06   ` Eric Botcazou
  2014-01-15 14:39     ` Richard Biener
  0 siblings, 1 reply; 10+ messages in thread
From: Eric Botcazou @ 2014-01-15 12:06 UTC (permalink / raw)
  To: Richard Biener; +Cc: gcc-patches

> What you want is tree-eh.c:optimize_clobbers, right?  Can't we
> do this optimization during EH refactoring / lowering as well?

We need the SSA form for sink_clobbers.

> Also why does SRA refuse to scalarize here?

Because of the EH handler and the following predicate:

/* Disqualify LHS and RHS for scalarization if STMT must end its basic block
   in modes in which it matters, return true iff they have been disqualified.
   RHS may be NULL, in that case ignore it.  If we scalarize an aggregate in
   intra-SRA we may need to add statements after each statement.  This is not
   possible if a statement unconditionally has to end the basic block.  */
static bool
disqualify_ops_if_throwing_stmt (gimple stmt, tree lhs, tree rhs)
{
  if ((sra_mode == SRA_MODE_EARLY_INTRA || sra_mode == SRA_MODE_INTRA)
      && (stmt_can_throw_internal (stmt) || stmt_ends_bb_p (stmt)))
    {
      disqualify_base_of_expr (lhs, "LHS of a throwing stmt.");
      if (rhs)
	disqualify_base_of_expr (rhs, "RHS of a throwing stmt.");
      return true;
    }
  return false;
}

> It will end up scalarizing during regular opts as
> 
> <L1>:
>   result = decls_support.get_private (100);
>   result$i_12 = MEM[(struct decls_support__t_private *)&result];

Yes, it's scalarized during SRA but not ESRA because there is an ehcleanup 
pass in-between.  It used to be scalarized during ESRA up to 4.6.x.

> The current placement of EH cleanup is to make it catch
> all opportunities before early inlining sees the function as
> callee.  And I guess both FRE and DCE will expose quite
> some EH cleanup opportunities especially with non-call-exceptions.

Early inlining or regular inlining (I'm always slightly confused here)?

p.adb.003t.original
p.adb.004t.gimple
p.adb.005t.nested
p.adb.006t.omplower
p.adb.007t.lower
p.adb.009t.ehopt
p.adb.010t.eh
p.adb.011t.cfg
p.adb.015t.ssa
p.adb.017t.inline_param1
p.adb.018t.einline
p.adb.019t.early_optimizations
p.adb.020t.copyrename1
p.adb.021t.ccp1
p.adb.022t.forwprop1
p.adb.023t.ealias
p.adb.024t.esra
p.adb.025t.fre1
p.adb.026t.copyprop1
p.adb.027t.mergephi1
p.adb.028t.cddce1
p.adb.029t.eipa_sra
p.adb.030t.tailr1
p.adb.031t.switchconv
p.adb.032t.ehcleanup1
p.adb.033t.profile_estimate
p.adb.034t.local-pure-const1
p.adb.035t.fnsplit
p.adb.036t.release_ssa
p.adb.037t.inline_param2
p.adb.054t.copyrename2

Then we could add another ehcleanup pass before early inlining, i.e. between 
015t.ssa and 017t.inline_param1, to achieve the same goal.

-- 
Eric Botcazou

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [RFC] Move ehcleanup pass to before early SRA
  2014-01-15 12:06   ` Eric Botcazou
@ 2014-01-15 14:39     ` Richard Biener
  2014-01-15 16:39       ` Martin Jambor
  2014-01-15 17:38       ` Eric Botcazou
  0 siblings, 2 replies; 10+ messages in thread
From: Richard Biener @ 2014-01-15 14:39 UTC (permalink / raw)
  To: Eric Botcazou; +Cc: GCC Patches

On Wed, Jan 15, 2014 at 1:06 PM, Eric Botcazou <ebotcazou@adacore.com> wrote:
>> What you want is tree-eh.c:optimize_clobbers, right?  Can't we
>> do this optimization during EH refactoring / lowering as well?
>
> We need the SSA form for sink_clobbers.

I know, I'm saying it may be possible to implement an equivalent
optimization without SSA form.

>> Also why does SRA refuse to scalarize here?
>
> Because of the EH handler and the following predicate:
>
> /* Disqualify LHS and RHS for scalarization if STMT must end its basic block
>    in modes in which it matters, return true iff they have been disqualified.
>    RHS may be NULL, in that case ignore it.  If we scalarize an aggregate in
>    intra-SRA we may need to add statements after each statement.  This is not
>    possible if a statement unconditionally has to end the basic block.  */
> static bool
> disqualify_ops_if_throwing_stmt (gimple stmt, tree lhs, tree rhs)
> {
>   if ((sra_mode == SRA_MODE_EARLY_INTRA || sra_mode == SRA_MODE_INTRA)
>       && (stmt_can_throw_internal (stmt) || stmt_ends_bb_p (stmt)))
>     {
>       disqualify_base_of_expr (lhs, "LHS of a throwing stmt.");
>       if (rhs)
>         disqualify_base_of_expr (rhs, "RHS of a throwing stmt.");
>       return true;
>     }
>   return false;
> }
>
>> It will end up scalarizing during regular opts as
>>
>> <L1>:
>>   result = decls_support.get_private (100);
>>   result$i_12 = MEM[(struct decls_support__t_private *)&result];
>
> Yes, it's scalarized during SRA but not ESRA because there is an ehcleanup
> pass in-between.  It used to be scalarized during ESRA up to 4.6.x.

I'm saying even ESRA should be able to scalarize it just fine.  It just
needs to be careful where to insert the loads from the aggregate result
(on the single outgoing non-EH edge).

Wouldn't that fix your issue?

>> The current placement of EH cleanup is to make it catch
>> all opportunities before early inlining sees the function as
>> callee.  And I guess both FRE and DCE will expose quite
>> some EH cleanup opportunities especially with non-call-exceptions.
>
> Early inlining or regular inlining (I'm always slightly confused here)?

Early inlining.

> p.adb.003t.original
> p.adb.004t.gimple
> p.adb.005t.nested
> p.adb.006t.omplower
> p.adb.007t.lower
> p.adb.009t.ehopt
> p.adb.010t.eh
> p.adb.011t.cfg
> p.adb.015t.ssa
> p.adb.017t.inline_param1
> p.adb.018t.einline
> p.adb.019t.early_optimizations
> p.adb.020t.copyrename1
> p.adb.021t.ccp1
> p.adb.022t.forwprop1
> p.adb.023t.ealias
> p.adb.024t.esra
> p.adb.025t.fre1
> p.adb.026t.copyprop1
> p.adb.027t.mergephi1
> p.adb.028t.cddce1
> p.adb.029t.eipa_sra
> p.adb.030t.tailr1
> p.adb.031t.switchconv
> p.adb.032t.ehcleanup1
> p.adb.033t.profile_estimate
> p.adb.034t.local-pure-const1
> p.adb.035t.fnsplit
> p.adb.036t.release_ssa
> p.adb.037t.inline_param2
> p.adb.054t.copyrename2
>
> Then we could add another ehcleanup pass before early inlining, i.e. between
> 015t.ssa and 017t.inline_param1, to achieve the same goal.
>
> --
> Eric Botcazou

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [RFC] Move ehcleanup pass to before early SRA
  2014-01-15 14:39     ` Richard Biener
@ 2014-01-15 16:39       ` Martin Jambor
  2014-01-15 17:38       ` Eric Botcazou
  1 sibling, 0 replies; 10+ messages in thread
From: Martin Jambor @ 2014-01-15 16:39 UTC (permalink / raw)
  To: Richard Biener; +Cc: Eric Botcazou, GCC Patches

Hi,

On Wed, Jan 15, 2014 at 03:39:09PM +0100, Richard Biener wrote:
> On Wed, Jan 15, 2014 at 1:06 PM, Eric Botcazou <ebotcazou@adacore.com> wrote:
> > Yes, it's scalarized during SRA but not ESRA because there is an ehcleanup
> > pass in-between.  It used to be scalarized during ESRA up to 4.6.x.
> 
> I'm saying even ESRA should be able to scalarize it just fine.  It just
> needs to be careful where to insert the loads from the aggregate result
> (on the single outgoing non-EH edge).
> 
> Wouldn't that fix your issue?

Eric, more specifically, would the (untested proof of concept) patch
below help?

Martin


diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c
index 4992b4c..a4c0e21 100644
--- a/gcc/tree-sra.c
+++ b/gcc/tree-sra.c
@@ -1142,6 +1142,26 @@ build_access_from_expr (tree expr, gimple stmt, bool write)
   return false;
 }
 
+/* Return the single non-EH successor edge of BB or NULL if there is none or
+   more than one.  */
+
+static edge
+single_non_eh_succ (basic_block bb)
+{
+  edge e, res = NULL;
+  edge_iterator ei;
+
+  FOR_EACH_EDGE (e, ei, bb->succs)
+    if (!(e->flags & EDGE_EH))
+      {
+	if (res)
+	  return NULL;
+	res = e;
+      }
+
+  return res;
+}
+
 /* Disqualify LHS and RHS for scalarization if STMT must end its basic block in
    modes in which it matters, return true iff they have been disqualified.  RHS
    may be NULL, in that case ignore it.  If we scalarize an aggregate in
@@ -1153,6 +1173,9 @@ disqualify_ops_if_throwing_stmt (gimple stmt, tree lhs, tree rhs)
   if ((sra_mode == SRA_MODE_EARLY_INTRA || sra_mode == SRA_MODE_INTRA)
       && (stmt_can_throw_internal (stmt) || stmt_ends_bb_p (stmt)))
     {
+      if (single_non_eh_succ (gimple_bb (stmt)))
+	return false;
+
       disqualify_base_of_expr (lhs, "LHS of a throwing stmt.");
       if (rhs)
 	disqualify_base_of_expr (rhs, "RHS of a throwing stmt.");
@@ -2720,6 +2743,19 @@ get_access_for_expr (tree expr)
   return get_var_base_offset_size_access (base, offset, max_size);
 }
 
+/* Aplit the sinle non-EH successor edge from BB (there must be exactly one)
+   and return a gsi to the new block.  */
+
+static gimple_stmt_iterator
+gsi_for_eh_followups (basic_block bb)
+{
+  edge e = single_non_eh_succ (bb);
+  gcc_assert (e);
+
+  basic_block new_bb = split_edge (e);
+  return gsi_start_bb (new_bb);
+}
+
 /* Replace the expression EXPR with a scalar replacement if there is one and
    generate other statements to do type conversion or subtree copying if
    necessary.  GSI is used to place newly created statements, WRITE is true if
@@ -2749,6 +2785,14 @@ sra_modify_expr (tree *expr, gimple_stmt_iterator *gsi, bool write)
   type = TREE_TYPE (*expr);
 
   loc = gimple_location (gsi_stmt (*gsi));
+  gimple_stmt_iterator alt_gsi = gsi_none ();
+  if (write && (stmt_can_throw_internal (gsi_stmt (*gsi))
+		|| stmt_ends_bb_p (gsi_stmt (*gsi))))
+    {
+      alt_gsi = gsi_for_eh_followups (gsi_bb (*gsi));
+      gsi = &alt_gsi;
+    }
+
   if (access->grp_to_be_replaced)
     {
       tree repl = get_access_replacement (access);
@@ -3208,14 +3252,25 @@ sra_modify_assign (gimple *stmt, gimple_stmt_iterator *gsi)
   if (modify_this_stmt
       || gimple_has_volatile_ops (*stmt)
       || contains_vce_or_bfcref_p (rhs)
-      || contains_vce_or_bfcref_p (lhs))
+      || contains_vce_or_bfcref_p (lhs)
+      || stmt_can_throw_internal (*stmt)
+      || stmt_ends_bb_p (*stmt))
     {
       if (access_has_children_p (racc))
 	generate_subtree_copies (racc->first_child, racc->base, 0, 0, 0,
 				 gsi, false, false, loc);
       if (access_has_children_p (lacc))
-	generate_subtree_copies (lacc->first_child, lacc->base, 0, 0, 0,
-				 gsi, true, true, loc);
+	{
+	  gimple_stmt_iterator alt_gsi = gsi_none ();
+	  if (stmt_can_throw_internal (*stmt)
+	      || stmt_ends_bb_p (*stmt))
+	    {
+	      alt_gsi = gsi_for_eh_followups (gsi_bb (*gsi));
+	      gsi = &alt_gsi;
+	    }
+	  generate_subtree_copies (lacc->first_child, lacc->base, 0, 0, 0,
+				   gsi, true, true, loc);
+	}
       sra_stats.separate_lhs_rhs_handling++;
 
       /* This gimplification must be done after generate_subtree_copies,
@@ -3309,8 +3364,13 @@ sra_modify_function_body (void)
 {
   bool cfg_changed = false;
   basic_block bb;
+  vec<basic_block> bb_list = vNULL;
 
   FOR_EACH_BB_FN (bb, cfun)
+    bb_list.safe_push (bb);
+
+  unsigned bb_vec_index;
+  FOR_EACH_VEC_ELT (bb_list, bb_vec_index, bb)
     {
       gimple_stmt_iterator gsi = gsi_start_bb (bb);
       while (!gsi_end_p (gsi))
@@ -3379,6 +3439,7 @@ sra_modify_function_body (void)
 	}
     }
 
+  bb_list.release ();
   return cfg_changed;
 }
 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [RFC] Move ehcleanup pass to before early SRA
  2014-01-15 14:39     ` Richard Biener
  2014-01-15 16:39       ` Martin Jambor
@ 2014-01-15 17:38       ` Eric Botcazou
  2014-01-15 18:04         ` Martin Jambor
  2014-01-16 11:30         ` Richard Biener
  1 sibling, 2 replies; 10+ messages in thread
From: Eric Botcazou @ 2014-01-15 17:38 UTC (permalink / raw)
  To: Richard Biener; +Cc: gcc-patches

> I know, I'm saying it may be possible to implement an equivalent
> optimization without SSA form.

Sure, but what would be the point of this duplication exactly?

> I'm saying even ESRA should be able to scalarize it just fine.  It just
> needs to be careful where to insert the loads from the aggregate result
> (on the single outgoing non-EH edge).
> 
> Wouldn't that fix your issue?

Very likely, yes, but there might be other passes with the same problem before 
the first EH cleanup pass (e.g. FRE, DCE, etc).

> Early inlining.

Are you sure?  I don't see how 032t.ehcleanup1 can have any effects on 
p.adb.018t.einline (unless something mysterious happens here).

> > p.adb.003t.original
> > p.adb.004t.gimple
> > p.adb.005t.nested
> > p.adb.006t.omplower
> > p.adb.007t.lower
> > p.adb.009t.ehopt
> > p.adb.010t.eh
> > p.adb.011t.cfg
> > p.adb.015t.ssa
> > p.adb.017t.inline_param1
> > p.adb.018t.einline
> > p.adb.019t.early_optimizations
> > p.adb.020t.copyrename1
> > p.adb.021t.ccp1
> > p.adb.022t.forwprop1
> > p.adb.023t.ealias
> > p.adb.024t.esra
> > p.adb.025t.fre1
> > p.adb.026t.copyprop1
> > p.adb.027t.mergephi1
> > p.adb.028t.cddce1
> > p.adb.029t.eipa_sra
> > p.adb.030t.tailr1
> > p.adb.031t.switchconv
> > p.adb.032t.ehcleanup1
> > p.adb.033t.profile_estimate
> > p.adb.034t.local-pure-const1
> > p.adb.035t.fnsplit
> > p.adb.036t.release_ssa
> > p.adb.037t.inline_param2
> > p.adb.054t.copyrename2

-- 
Eric Botcazou

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [RFC] Move ehcleanup pass to before early SRA
  2014-01-15 17:38       ` Eric Botcazou
@ 2014-01-15 18:04         ` Martin Jambor
  2014-01-16 11:30         ` Richard Biener
  1 sibling, 0 replies; 10+ messages in thread
From: Martin Jambor @ 2014-01-15 18:04 UTC (permalink / raw)
  To: Eric Botcazou; +Cc: gcc-patches

Hi,

On Wed, Jan 15, 2014 at 06:37:30PM +0100, Eric Botcazou wrote:
> > I know, I'm saying it may be possible to implement an equivalent
> > optimization without SSA form.
> 
> Sure, but what would be the point of this duplication exactly?
> 
> > I'm saying even ESRA should be able to scalarize it just fine.  It just
> > needs to be careful where to insert the loads from the aggregate result
> > (on the single outgoing non-EH edge).
> > 
> > Wouldn't that fix your issue?
> 
> Very likely, yes, but there might be other passes with the same problem before 
> the first EH cleanup pass (e.g. FRE, DCE, etc).
> 
> > Early inlining.
> 
> Are you sure?  I don't see how 032t.ehcleanup1 can have any effects on 
> p.adb.018t.einline (unless something mysterious happens here).

We early-optimize functions in topological order so generally (except
for SCCs), so when 018t.einline runs all the callees have already been
early-optimized, and it is really the callees it looks at most.

(But it of course prepares the function for ipa inlining analysis too
since the standalone functions will be untouched.)

Martin

> 
> > > p.adb.003t.original
> > > p.adb.004t.gimple
> > > p.adb.005t.nested
> > > p.adb.006t.omplower
> > > p.adb.007t.lower
> > > p.adb.009t.ehopt
> > > p.adb.010t.eh
> > > p.adb.011t.cfg
> > > p.adb.015t.ssa
> > > p.adb.017t.inline_param1
> > > p.adb.018t.einline
> > > p.adb.019t.early_optimizations
> > > p.adb.020t.copyrename1
> > > p.adb.021t.ccp1
> > > p.adb.022t.forwprop1
> > > p.adb.023t.ealias
> > > p.adb.024t.esra
> > > p.adb.025t.fre1
> > > p.adb.026t.copyprop1
> > > p.adb.027t.mergephi1
> > > p.adb.028t.cddce1
> > > p.adb.029t.eipa_sra
> > > p.adb.030t.tailr1
> > > p.adb.031t.switchconv
> > > p.adb.032t.ehcleanup1
> > > p.adb.033t.profile_estimate
> > > p.adb.034t.local-pure-const1
> > > p.adb.035t.fnsplit
> > > p.adb.036t.release_ssa
> > > p.adb.037t.inline_param2
> > > p.adb.054t.copyrename2
> 
> -- 
> Eric Botcazou

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [RFC] Move ehcleanup pass to before early SRA
  2014-01-15 17:38       ` Eric Botcazou
  2014-01-15 18:04         ` Martin Jambor
@ 2014-01-16 11:30         ` Richard Biener
  1 sibling, 0 replies; 10+ messages in thread
From: Richard Biener @ 2014-01-16 11:30 UTC (permalink / raw)
  To: Eric Botcazou; +Cc: GCC Patches

On Wed, Jan 15, 2014 at 6:37 PM, Eric Botcazou <ebotcazou@adacore.com> wrote:
>> I know, I'm saying it may be possible to implement an equivalent
>> optimization without SSA form.
>
> Sure, but what would be the point of this duplication exactly?

To avoid ...

>> I'm saying even ESRA should be able to scalarize it just fine.  It just
>> needs to be careful where to insert the loads from the aggregate result
>> (on the single outgoing non-EH edge).
>>
>> Wouldn't that fix your issue?
>
> Very likely, yes, but there might be other passes with the same problem before
> the first EH cleanup pass (e.g. FRE, DCE, etc).

... these and avoid adding another EH cleanup pass.

>> Early inlining.
>
> Are you sure?  I don't see how 032t.ehcleanup1 can have any effects on
> p.adb.018t.einline (unless something mysterious happens here).
>
>> > p.adb.003t.original
>> > p.adb.004t.gimple
>> > p.adb.005t.nested
>> > p.adb.006t.omplower
>> > p.adb.007t.lower
>> > p.adb.009t.ehopt
>> > p.adb.010t.eh
>> > p.adb.011t.cfg
>> > p.adb.015t.ssa
>> > p.adb.017t.inline_param1
>> > p.adb.018t.einline
>> > p.adb.019t.early_optimizations
>> > p.adb.020t.copyrename1
>> > p.adb.021t.ccp1
>> > p.adb.022t.forwprop1
>> > p.adb.023t.ealias
>> > p.adb.024t.esra
>> > p.adb.025t.fre1
>> > p.adb.026t.copyprop1
>> > p.adb.027t.mergephi1
>> > p.adb.028t.cddce1
>> > p.adb.029t.eipa_sra
>> > p.adb.030t.tailr1
>> > p.adb.031t.switchconv
>> > p.adb.032t.ehcleanup1
>> > p.adb.033t.profile_estimate
>> > p.adb.034t.local-pure-const1
>> > p.adb.035t.fnsplit
>> > p.adb.036t.release_ssa
>> > p.adb.037t.inline_param2
>> > p.adb.054t.copyrename2
>
> --
> Eric Botcazou

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [RFC] Move ehcleanup pass to before early SRA
  2012-09-23 11:00 Eric Botcazou
@ 2012-09-24 12:49 ` Richard Guenther
  0 siblings, 0 replies; 10+ messages in thread
From: Richard Guenther @ 2012-09-24 12:49 UTC (permalink / raw)
  To: Eric Botcazou; +Cc: gcc-patches

On Sun, Sep 23, 2012 at 10:46 AM, Eric Botcazou <ebotcazou@adacore.com> wrote:
> Hi,
>
> we have run into optimization regressions in Ada caused by the interaction
> between the new GIMPLE clobbers and SRA: on the one hand GIMPLE clobbers
> create artificial EH handlers for aggregate variables, on the other hand SRA
> refuses to scalarize objects that appear in a statement that can throw
> internally.  The result is that GIMPLE clobbers block the scalarization of
> variables that used to be possible up to the 4.6 series.
>
> Therefore the attached patch moves the ehcleanup pass to before early SRA in
> the pipeline.  It has a small but measurable positive effect on some of our
> benchmarks (with a 4.7-based compiler).  Tested on x86-64/Linux.

Hmm.  I think cleanup EH is scheduled late to help early inlining.  Do you
have a testcase where I can quickly look at the issue you describe?

Thanks,
Richard.

>
> 2012-09-23  Eric Botcazou  <ebotcazou@adacore.com>
>
>         * passes.c (init_optimization_passes): Run first EH cleanup pass early.
>
>
> --
> Eric Botcazou

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [RFC] Move ehcleanup pass to before early SRA
@ 2012-09-23 11:00 Eric Botcazou
  2012-09-24 12:49 ` Richard Guenther
  0 siblings, 1 reply; 10+ messages in thread
From: Eric Botcazou @ 2012-09-23 11:00 UTC (permalink / raw)
  To: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 805 bytes --]

Hi,

we have run into optimization regressions in Ada caused by the interaction 
between the new GIMPLE clobbers and SRA: on the one hand GIMPLE clobbers 
create artificial EH handlers for aggregate variables, on the other hand SRA 
refuses to scalarize objects that appear in a statement that can throw 
internally.  The result is that GIMPLE clobbers block the scalarization of 
variables that used to be possible up to the 4.6 series.

Therefore the attached patch moves the ehcleanup pass to before early SRA in 
the pipeline.  It has a small but measurable positive effect on some of our 
benchmarks (with a 4.7-based compiler).  Tested on x86-64/Linux.


2012-09-23  Eric Botcazou  <ebotcazou@adacore.com>

	* passes.c (init_optimization_passes): Run first EH cleanup pass early.


-- 
Eric Botcazou

[-- Attachment #2: p.diff --]
[-- Type: text/x-patch, Size: 1018 bytes --]

Index: passes.c
===================================================================
--- passes.c	(revision 191365)
+++ passes.c	(working copy)
@@ -1303,6 +1303,10 @@ init_optimization_passes (void)
 	  /* pass_build_ealias is a dummy pass that ensures that we
 	     execute TODO_rebuild_alias at this point.  */
 	  NEXT_PASS (pass_build_ealias);
+	  /* SRA refuses to scalarize objects that appear in a statement
+	     that can throw internally so we need to cleanup the EH tree
+	     early to remove handlers that contain only clobbers.  */
+	  NEXT_PASS (pass_cleanup_eh);
 	  NEXT_PASS (pass_sra_early);
 	  NEXT_PASS (pass_fre);
 	  NEXT_PASS (pass_copy_prop);
@@ -1311,7 +1315,6 @@ init_optimization_passes (void)
 	  NEXT_PASS (pass_early_ipa_sra);
 	  NEXT_PASS (pass_tail_recursion);
 	  NEXT_PASS (pass_convert_switch);
-          NEXT_PASS (pass_cleanup_eh);
           NEXT_PASS (pass_profile);
           NEXT_PASS (pass_local_pure_const);
 	  /* Split functions creates parts that are not run through

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2014-01-16 11:30 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-01-15  9:25 [RFC] Move ehcleanup pass to before early SRA Eric Botcazou
2014-01-15 11:23 ` Richard Biener
2014-01-15 12:06   ` Eric Botcazou
2014-01-15 14:39     ` Richard Biener
2014-01-15 16:39       ` Martin Jambor
2014-01-15 17:38       ` Eric Botcazou
2014-01-15 18:04         ` Martin Jambor
2014-01-16 11:30         ` Richard Biener
  -- strict thread matches above, loose matches on Subject: below --
2012-09-23 11:00 Eric Botcazou
2012-09-24 12:49 ` Richard Guenther

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).