public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH 4/7] Break up determine_known_aggregate_parts
  2014-05-21 13:31 [PATCH 0/7] ipa-prop escape analysis Martin Jambor
@ 2014-05-21 13:31 ` Martin Jambor
  2014-05-26  0:54   ` Jan Hubicka
  2014-05-21 13:31 ` [PATCH 6/7] Real aggregate contents merge and application of deltas Martin Jambor
                   ` (5 subsequent siblings)
  6 siblings, 1 reply; 29+ messages in thread
From: Martin Jambor @ 2014-05-21 13:31 UTC (permalink / raw)
  To: GCC Patches; +Cc: Jan Hubicka

[-- Attachment #1: known_agg_parts_changes.diff --]
[-- Type: text/plain, Size: 6879 bytes --]

Hi,

the main purpose of this patch is to break up function
determine_known_aggregate_parts so that the next patch can use the
standalone bits and to make the changes slightly easier for review.

However, this patch also removes some of the offset checks which Honza
correctly thought superfluous and even possibly filtering out useful
information.
 
Bootstrapped and tested and LTO-bootstrapped on x86_64-linux.
OK for trunk after the preceeding patches get in?

Thanks,

Martin


2014-02-19  Martin Jambor  <mjambor@suse.cz>

	* ipa-prop.c (get_place_in_agg_contents_list): New function.
	(build_agg_jump_func_from_list): Likewise.
	(determine_known_aggregate_parts): Renamed to
	determine_locally_known_aggregate_parts.  Moved some functionality
	to the two functions above, removed bound checks.

Index: src/gcc/ipa-prop.c
===================================================================
--- src.orig/gcc/ipa-prop.c
+++ src/gcc/ipa-prop.c
@@ -1679,14 +1679,72 @@ struct ipa_known_agg_contents_list
   struct ipa_known_agg_contents_list *next;
 };
 
+/* Find the proper place in linked list of ipa_known_agg_contents_list
+   structures where to put a new one with the given LHS_OFFSET and LHS_SIZE,
+   unless there is a partial overlap, in which case return NULL, or such
+   element is already there, in which case set *ALREADY_THERE to true.  */
+
+static struct ipa_known_agg_contents_list **
+get_place_in_agg_contents_list (struct ipa_known_agg_contents_list **list,
+				HOST_WIDE_INT lhs_offset,
+				HOST_WIDE_INT lhs_size,
+				bool *already_there)
+{
+  struct ipa_known_agg_contents_list **p = list;
+  while (*p && (*p)->offset < lhs_offset)
+    {
+      if ((*p)->offset + (*p)->size > lhs_offset)
+	return NULL;
+      p = &(*p)->next;
+    }
+
+  if (*p && (*p)->offset < lhs_offset + lhs_size)
+    {
+      if ((*p)->offset == lhs_offset && (*p)->size == lhs_size)
+	/* We already know this value is subsequently overwritten with
+	   something else.  */
+	*already_there = true;
+      else
+	/* Otherwise this is a partial overlap which we cannot
+	   represent.  */
+	return NULL;
+    }
+  return p;
+}
+
+/* Build aggregate jump function from LIST, assuming there are exactly
+   CONST_COUNT constant entries there and that th offset of the passed argument
+   is ARG_OFFSET and store it into JFUNC.  */
+
+static void
+build_agg_jump_func_from_list (struct ipa_known_agg_contents_list *list,
+			       int const_count, HOST_WIDE_INT arg_offset,
+			       struct ipa_jump_func *jfunc)
+{
+  vec_alloc (jfunc->agg.items, const_count);
+  while (list)
+    {
+      if (list->constant)
+	{
+	  struct ipa_agg_jf_item item;
+	  item.offset = list->offset - arg_offset;
+	  gcc_assert ((item.offset % BITS_PER_UNIT) == 0);
+	  item.value = unshare_expr_without_location (list->constant);
+	  jfunc->agg.items->quick_push (item);
+	}
+      list = list->next;
+    }
+}
+
 /* Traverse statements from CALL backwards, scanning whether an aggregate given
    in ARG is filled in with constant values.  ARG can either be an aggregate
-   expression or a pointer to an aggregate.  ARG_TYPE is the type of the aggregate.
-   JFUNC is the jump function into which the constants are subsequently stored.  */
+   expression or a pointer to an aggregate.  ARG_TYPE is the type of the
+   aggregate.  JFUNC is the jump function into which the constants are
+   subsequently stored.  */
 
 static void
-determine_known_aggregate_parts (gimple call, tree arg, tree arg_type,
-				 struct ipa_jump_func *jfunc)
+determine_locally_known_aggregate_parts (gimple call, tree arg, tree arg_type,
+					 struct ipa_jump_func *jfunc)
 {
   struct ipa_known_agg_contents_list *list = NULL;
   int item_count = 0, const_count = 0;
@@ -1728,10 +1786,8 @@ determine_known_aggregate_parts (gimple
 	    return;
 	  if (DECL_P (arg_base))
 	    {
-	      tree size;
 	      check_ref = false;
-	      size = build_int_cst (integer_type_node, arg_size);
-	      ao_ref_init_from_ptr_and_size (&r, arg_base, size);
+	      ao_ref_init (&r, arg_base);
 	    }
 	  else
 	    return;
@@ -1769,7 +1825,6 @@ determine_known_aggregate_parts (gimple
       gimple stmt = gsi_stmt (gsi);
       HOST_WIDE_INT lhs_offset, lhs_size, lhs_max_size;
       tree lhs, rhs, lhs_base;
-      bool partial_overlap;
 
       if (!stmt_may_clobber_ref_p_1 (stmt, &r))
 	continue;
@@ -1786,11 +1841,7 @@ determine_known_aggregate_parts (gimple
       lhs_base = get_ref_base_and_extent (lhs, &lhs_offset, &lhs_size,
 					  &lhs_max_size);
       if (lhs_max_size == -1
-	  || lhs_max_size != lhs_size
-	  || (lhs_offset < arg_offset
-	      && lhs_offset + lhs_size > arg_offset)
-	  || (lhs_offset < arg_offset + arg_size
-	      && lhs_offset + lhs_size > arg_offset + arg_size))
+	  || lhs_max_size != lhs_size)
 	break;
 
       if (check_ref)
@@ -1808,34 +1859,13 @@ determine_known_aggregate_parts (gimple
 	    break;
 	}
 
-      if (lhs_offset + lhs_size < arg_offset
-	  || lhs_offset >= (arg_offset + arg_size))
-	continue;
-
-      partial_overlap = false;
-      p = &list;
-      while (*p && (*p)->offset < lhs_offset)
-	{
-	  if ((*p)->offset + (*p)->size > lhs_offset)
-	    {
-	      partial_overlap = true;
-	      break;
-	    }
-	  p = &(*p)->next;
-	}
-      if (partial_overlap)
+      bool already_there = false;
+      p = get_place_in_agg_contents_list (&list, lhs_offset, lhs_size,
+					  &already_there);
+      if (!p)
 	break;
-      if (*p && (*p)->offset < lhs_offset + lhs_size)
-	{
-	  if ((*p)->offset == lhs_offset && (*p)->size == lhs_size)
-	    /* We already know this value is subsequently overwritten with
-	       something else.  */
-	    continue;
-	  else
-	    /* Otherwise this is a partial overlap which we cannot
-	       represent.  */
-	    break;
-	}
+      if (already_there)
+	continue;
 
       rhs = get_ssa_def_if_simple_copy (rhs);
       n = XALLOCA (struct ipa_known_agg_contents_list);
@@ -1864,19 +1894,7 @@ determine_known_aggregate_parts (gimple
   if (const_count)
     {
       jfunc->agg.by_ref = by_ref;
-      vec_alloc (jfunc->agg.items, const_count);
-      while (list)
-	{
-	  if (list->constant)
-	    {
-	      struct ipa_agg_jf_item item;
-	      item.offset = list->offset - arg_offset;
-	      gcc_assert ((item.offset % BITS_PER_UNIT) == 0);
-	      item.value = unshare_expr_without_location (list->constant);
-	      jfunc->agg.items->quick_push (item);
-	    }
-	  list = list->next;
-	}
+      build_agg_jump_func_from_list (list, const_count, arg_offset, jfunc);
     }
 }
 
@@ -2113,7 +2131,7 @@ ipa_compute_jump_functions_for_edge (str
 	  && jfunc->type != IPA_JF_ANCESTOR
 	  && (AGGREGATE_TYPE_P (TREE_TYPE (arg))
 	      || POINTER_TYPE_P (param_type)))
-	determine_known_aggregate_parts (call, arg, param_type, jfunc);
+	determine_locally_known_aggregate_parts (call, arg, param_type, jfunc);
     }
 }
 

^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH 6/7] Real aggregate contents merge and application of deltas
  2014-05-21 13:31 [PATCH 0/7] ipa-prop escape analysis Martin Jambor
  2014-05-21 13:31 ` [PATCH 4/7] Break up determine_known_aggregate_parts Martin Jambor
@ 2014-05-21 13:31 ` Martin Jambor
  2014-05-21 13:31 ` [PATCH 3/7] IPA-CP escape and clobber analysis Martin Jambor
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 29+ messages in thread
From: Martin Jambor @ 2014-05-21 13:31 UTC (permalink / raw)
  To: GCC Patches; +Cc: Jan Hubicka

[-- Attachment #1: real_merge_agg_contents.diff --]
[-- Type: text/plain, Size: 16528 bytes --]

Hi,

the previous patch used a very simplistic merging and delta
application of aggregate contents.  This patch replaces it with a real
one.

Because there are potentially many basic blocks and the contents of a
particular aggregate are very likely to be the same for many of them,
the description of the contents are shared among BBs.  In order to
facilitate this, each block knows whether it owns a particular
description and thus whether it can change it in place or has to copy
the list (list lengths are capped at PARAM_IPA_MAX_AGG_ITEMS).

I have gathered some information on the count of aggregate jump
functions (again I should have counted only pointer and aggregate
parameters but I realized it too late, other scalars arguments cannot
have jump function aggregate items so it needlessly makes the
per-jump-function columns even more puny).  Everything is with -Ofast
-flto:

 |                    |           | Unpatched |      |           |      |          |
 | Testcase           |      Jump | Aggregate |  per |   Patched |  per | increase |
 |                    | functions |     items |   jf | agg items |   jf |        % |
 |--------------------+-----------+-----------+------+-----------+------+----------+
 | FF libxul.so       |   1756545 |     82066 | 0.05 |    102779 | 0.06 |    25.24 |
 | Tramp 3D           |     19421 |       330 | 0.02 |       577 | 0.03 |    74.85 |
 |--------------------+-----------+-----------+------+-----------+------+----------+
 | perlbench          |     28169 |        76 | 0.00 |        85 | 0.00 |    11.84 |
 | bzip2              |       858 |         0 | 0.00 |         0 | 0.00 |     0.00 |
 | gcc                |    105977 |       156 | 0.00 |       166 | 0.00 |     6.41 |
 | mcf                |       161 |         1 | 0.01 |         2 | 0.01 |   100.00 |
 | gobmk              |     28843 |        41 | 0.00 |        42 | 0.00 |     2.44 |
 | hmmer              |      5122 |        11 | 0.00 |        14 | 0.00 |    27.27 |
 | sjeng              |      2089 |         6 | 0.00 |         6 | 0.00 |     0.00 |
 | libquantum         |       820 |         0 | 0.00 |         0 | 0.00 |     0.00 |
 | h264ref            |      8316 |        30 | 0.00 |        31 | 0.00 |     3.33 |
 | omnetpp            |       738 |        19 | 0.03 |        20 | 0.03 |     5.26 |
 | xalancbmk          |    121014 |      1220 | 0.01 |      1535 | 0.01 |    25.82 |
 |--------------------+-----------+-----------+------+-----------+------+----------+
 | bwaves             |       384 |        92 | 0.24 |        97 | 0.25 |     5.43 |
 | gamess             |    194795 |     44085 | 0.23 |     45319 | 0.23 |     2.80 |
 | milc               |      3027 |         0 | 0.00 |         1 | 0.00 |   100.00 |
 | zeusmp             |      4849 |      1011 | 0.21 |      1063 | 0.22 |     5.14 |
 | gromacs            |     27421 |        57 | 0.00 |        79 | 0.00 |    38.60 |
 | cactusADM          |     15693 |       230 | 0.01 |       237 | 0.02 |     3.04 |
 | leslie3d           |      1694 |       466 | 0.28 |       466 | 0.28 |     0.00 |
 | namd               |      3050 |        12 | 0.00 |       102 | 0.03 |   750.00 |
 | soplex             |      7904 |        59 | 0.01 |       109 | 0.01 |    84.75 |
 | povray             |     23981 |       317 | 0.01 |       351 | 0.01 |    10.73 |
 | calculix           |     42451 |      6175 | 0.15 |      7228 | 0.17 |    17.05 |
 | GemsFDTD           |      5782 |      1289 | 0.22 |      1519 | 0.26 |    17.84 |
 | tonto              |     81853 |      7900 | 0.10 |      8521 | 0.10 |     7.86 |
 | lbm                |       171 |         0 | 0.00 |         0 | 0.00 |     0.00 |
 | wrf                |    121094 |      4330 | 0.04 |      4423 | 0.04 |     2.15 |
 | sphinx3            |      5880 |         9 | 0.00 |        10 | 0.00 |    11.11 |
 |--------------------+-----------+-----------+------+-----------+------+----------+
 | ac.f90             |       474 |       287 | 0.61 |       300 | 0.63 |     4.53 |
 | aermod.f90         |     33296 |      6923 | 0.21 |      6866 | 0.21 |    -0.82 |
 | air.f90            |      1010 |       366 | 0.36 |       376 | 0.37 |     2.73 |
 | capacita.f90       |       487 |        80 | 0.16 |       153 | 0.31 |    91.25 |
 | channel2.f90       |       379 |       242 | 0.64 |       242 | 0.64 |     0.00 |
 | doduc.f90          |       938 |       240 | 0.26 |       258 | 0.28 |     7.50 |
 | fatigue2.f90       |      1936 |      1251 | 0.65 |      1259 | 0.65 |     0.64 |
 | gas_dyn2.f90       |      1033 |       500 | 0.48 |       500 | 0.48 |     0.00 |
 | induct2.f90        |      3982 |      2179 | 0.55 |      2183 | 0.55 |     0.18 |
 | linpk.f90          |        85 |        12 | 0.14 |        12 | 0.14 |     0.00 |
 | mdbx.f90           |       491 |       181 | 0.37 |       204 | 0.42 |    12.71 |
 | mp_prop_design.f90 |       518 |       334 | 0.64 |       334 | 0.64 |     0.00 |
 | nf.f90             |       398 |        48 | 0.12 |        48 | 0.12 |     0.00 |
 | protein.f90        |      1322 |       909 | 0.69 |       917 | 0.69 |     0.88 |
 | rnflow.f90         |      1282 |       132 | 0.10 |       145 | 0.11 |     9.85 |
 | test_fpu2.f90      |       991 |        70 | 0.07 |        70 | 0.07 |     0.00 |
 | tfft2.f90          |        65 |        30 | 0.46 |        30 | 0.46 |     0.00 |

As you can see, jump functions are very much a Fortran thing which is
quite expected.  The increases are not quite smaller than what I had
hoped for.  The reason is that the array descriptors which are not
already handled by our current technique are not filled with constants
but with data copied from other descriptors, sometimes after checks
for non-zero values.  Therefore, in addition to these patches, they
will require more complex aggregate jump functions allowing
pass-throughs and simple arithmetic ones in addition to simple values.

Bootstrapped and tested on x86_64 where it also passes LTO bootstrap
and is able to LTO build Firefox.

Thanks,

Martin


2014-02-25  Martin Jambor  <mjambor@suse.cz>

	* ipa-prop.c (ipa_bb_info): New field own_begin_agg_cnt.
	(apply_agg_contents_deltas): New parameters fbi and own.
	Reimplemented properly.  Adjust callers.
	(ipa_analyze_bb_statements): Allocate bi->own_begin_agg_cnt.
	(free_ipa_bb_info): Free own_begin_agg_cnt.
	(prune_agg_contents): New function.
	(merge_agg_contents): New parameters fbi, inc_own and target_own,
	adjusted all callers.  Reimplemented.
	(propagate_agg_cnts_through_bb): Manage own flags.

testsuite/
	* gcc.dg/ipa/ipcp-agg-15.c: New test.

Index: src/gcc/ipa-prop.c
===================================================================
--- src.orig/gcc/ipa-prop.c
+++ src/gcc/ipa-prop.c
@@ -172,6 +172,10 @@ struct ipa_bb_info
   /* Aggregate contents of tracked references at the beginning of each BB.  */
   vec<ipa_known_agg_contents_list *> begin_agg_cnt;
 
+  /* True if this BB is the designated owner of the corresponding pointer in
+     begin_agg_cnt. */
+  vec<bool> own_begin_agg_cnt;
+
   /* Changes in aggregate contents of tracked references.  */
   vec<ipa_known_agg_contents_list *> agg_deltas;
 
@@ -1993,26 +1997,77 @@ determine_locally_known_aggregate_parts
     }
 }
 
-/* Apply basic block DELTAS to INITial aggregate contents description.  */
+/* Apply basic block DELTAS to INITial aggregate contents description.  FBI
+   describes the current function.  Set *OWN to true if the result is
+   exclusively held and can be modified in place.  */
 
 static struct ipa_known_agg_contents_list *
-apply_agg_contents_deltas (struct ipa_known_agg_contents_list *init,
-			   struct ipa_known_agg_contents_list *deltas)
+apply_agg_contents_deltas (struct func_body_info *fbi,
+			   struct ipa_known_agg_contents_list *init,
+			   struct ipa_known_agg_contents_list *deltas,
+			   bool *own)
 {
-  /* TODO: This over-conservative but should work for Fortran descriptors.
-     Will be replaced in a subsequent patches with real merging.  */
-
-  gcc_assert (init != AGG_CONTENTS_TOP);
-  if (deltas)
-    return deltas;
-  else
-    {
+  gcc_checking_assert (init != AGG_CONTENTS_TOP);
 #ifdef ENABLE_CHECKING
-      for (struct ipa_known_agg_contents_list *p = init; p; p = p->next)
-	gcc_assert (p->only_unescaped);
+  for (struct ipa_known_agg_contents_list *p = init; p; p = p->next)
+    gcc_assert (p->only_unescaped);
 #endif
+
+  if (!init)
+    {
+      *own = false;
+      return deltas;
+    }
+  if (!deltas)
+    {
+      *own = false;
       return init;
     }
+
+  *own = true;
+  struct ipa_known_agg_contents_list *list = NULL, **r = &list;
+  while (init || deltas)
+    {
+      struct ipa_known_agg_contents_list *p = NULL;
+      if (deltas && (!init || deltas->offset < init->offset))
+	{
+	  if (deltas->constant)
+	    p = deltas;
+	  while (init && init->offset < deltas->offset + deltas->size)
+	    init = init->next;
+	  deltas = deltas->next;
+	}
+      else if (init && (!deltas || init->offset < deltas->offset))
+	{
+	  if (init->constant
+	      && (!deltas
+		  || init->offset + init->size >= deltas->offset))
+	    p = init;
+	  init = init->next;
+	}
+      else
+	{
+	  gcc_checking_assert (init->offset == deltas->offset);
+	  if (deltas->constant)
+	    p = deltas;
+	  while (init && init->offset < deltas->offset + deltas->size)
+	    init = init->next;
+	  deltas = deltas->next;
+	}
+
+      if (p)
+	{
+	  *r = (struct ipa_known_agg_contents_list *)
+	    pool_alloc (fbi->agg_contents_pool);
+	  (*r)->offset = p->offset;
+	  (*r)->size = p->size;
+	  (*r)->constant = p->constant;
+	  (*r)->only_unescaped = p->only_unescaped;
+	  (*r)->next = NULL;
+	  r = &(*r)->next;
+	}
+    }
+  return list;
 }
 
 static tree
@@ -2264,9 +2319,10 @@ ipa_compute_jump_functions_for_edge (str
 		  struct ipa_bb_info *bi;
 		  bi = ipa_get_bb_info (fbi, gimple_bb (cs->call_stmt));
 		  struct ipa_known_agg_contents_list *begin, *final, *p;
+		  bool dummy;
 		  begin = bi->begin_agg_cnt[ref_index];
-		  final = apply_agg_contents_deltas (begin, (*dvec)[n]);
-
+		  final = apply_agg_contents_deltas (fbi, begin, (*dvec)[n],
+						     &dummy);
 		  int const_count = 0;
 		  for (p = final; p; p = p->next)
 		    if (p->constant)
@@ -2892,6 +2948,8 @@ ipa_analyze_bb_statements (struct func_b
 	      sizeof (int) * ipa_get_tracked_refs_count (fbi->info));
 
       bi->begin_agg_cnt.safe_grow (ipa_get_tracked_refs_count (fbi->info));
+      bi->own_begin_agg_cnt.safe_grow_cleared
+	(ipa_get_tracked_refs_count (fbi->info));
       for (int i = 0; i < ipa_get_tracked_refs_count (fbi->info); ++i)
 	bi->begin_agg_cnt[i] = AGG_CONTENTS_TOP;
     }
@@ -2982,6 +3040,7 @@ free_ipa_bb_info (struct ipa_bb_info *bi
   bi->cg_edges.release ();
   bi->param_aa_statuses.release ();
   bi->begin_agg_cnt.release ();
+  bi->own_begin_agg_cnt.release ();
   bi->agg_deltas.release ();
 }
 
@@ -3313,31 +3372,123 @@ gather_picked_escapes (struct func_body_
     }
 }
 
-/* Merge aggregate contents FINAL with those in *TARGET.  Return true if those
-   in *TARGET have changed.  */
+/* Remova all items from the list in *TARGET that are not also in INCOMING.
+   Return true if any was actually removed.  */
 
 static bool
-merge_agg_contents (struct ipa_known_agg_contents_list *final,
+prune_agg_contents (struct ipa_known_agg_contents_list *incoming,
 		    struct ipa_known_agg_contents_list **target)
 {
-  /* TODO: This over-conservative but should work for Fortran descriptors.
-     Will be replaced in a subsequent patches by real merging.  */
+  bool res = false;
+  while (*target && incoming)
+    {
+      if ((*target)->offset > incoming->offset)
+	{
+	  while (*target
+		 && (*target)->offset > incoming->offset
+		 && (*target)->offset < incoming->offset + incoming->size)
+	    {
+	      *target = (*target)->next;
+	      res = true;
+	    }
+
+	  incoming = incoming->next;
+	}
+      else if ((*target)->offset < incoming->offset)
+	{
+	  while (incoming
+		 && (*target)->offset < incoming->offset
+		 && (*target)->offset + (*target)->size > incoming->size)
+	    incoming = incoming->next;
+
+	  *target = (*target)->next;
+	  res = true;
+	}
+      else if ((*target)->size != incoming->size
+	       || !(*target)->constant
+	       || !incoming->constant
+	       || !operand_equal_p ((*target)->constant, incoming->constant, 0))
+	{
+	  *target = (*target)->next;
+	  incoming = incoming->next;
+	  res = true;
+	}
+      else
+	{
+	  gcc_checking_assert ((*target)->only_unescaped);
+	  target = &(*target)->next;
+	  incoming = incoming->next;
+	}
+    }
+
+  res |= *target != NULL;
+  *target = NULL;
+  return res;
+}
+
+/* Merge aggregate contents INCOMING with those in *TARGET.  Return true if
+   those in *TARGET have changed.  FBI describes the current function.  INC_OWN
+   determines whether the INCOMING list is exclusively owned and can be
+   modified in place or not.  TARGET_OWN points to a flag that describes
+   *TARGET in the same way and which may be modified.  */
+
+static bool
+merge_agg_contents (struct func_body_info *fbi,
+		    struct ipa_known_agg_contents_list *incoming, bool inc_own,
+		    struct ipa_known_agg_contents_list **target,
+		    bool *target_own)
+{
   if (*target == AGG_CONTENTS_TOP)
     {
-      *target = final;
+      *target = incoming;
+      *target_own = inc_own;
       return true;
     }
-  else if (*target != final)
+  if (*target == NULL
+      || *target == incoming)
+    return false;
+
+  if (*target_own)
+    return prune_agg_contents (incoming, target);
+
+  /* TODO: This may save some memory but is it worthwile? */
+  for (struct ipa_known_agg_contents_list *p = (*target)->next; p; p = p->next)
+    if (p == incoming)
+      {
+	*target = p;
+	return true;
+      }
+
+  struct ipa_known_agg_contents_list *p = *target, *list = NULL, **r = &list;
+  while (p && incoming)
     {
-      if (*target)
+      if (p->offset > incoming->offset)
+	incoming = incoming->next;
+      else if (p->offset < incoming->offset)
+	p = p->next;
+      else
 	{
-	  *target = NULL;
-	  return true;
+	  if (p->size == incoming->size
+	      && p->constant
+	      && incoming->constant
+	      && operand_equal_p (p->constant, incoming->constant, 0))
+	    {
+	      *r = (struct ipa_known_agg_contents_list *)
+		pool_alloc (fbi->agg_contents_pool);
+	      (*r)->offset = p->offset;
+	      (*r)->size = p->size;
+	      (*r)->constant = p->constant;
+	      (*r)->only_unescaped = true;
+	      (*r)->next = NULL;
+	      r = &(*r)->next;
+	    }
+	  p = p->next;
+	  incoming = incoming->next;
 	}
-      else
-	return false;
     }
-  return false;
+  *target_own = true;
+  *target = list;
+  return true;
 }
 
 /* Apply all computed aggregate deltas for the given BB and merge results into
@@ -3356,7 +3507,10 @@ propagate_agg_cnts_through_bb (struct fu
     else
       deltas = bi->agg_deltas[i];
 
-    final = apply_agg_contents_deltas (bi->begin_agg_cnt[i], deltas);
+    bool fin_own;
+    final = apply_agg_contents_deltas (fbi, bi->begin_agg_cnt[i], deltas,
+				       &fin_own);
+    fin_own = fin_own && single_succ_p (bb);
 
     edge e;
     edge_iterator ei;
@@ -3371,7 +3525,9 @@ propagate_agg_cnts_through_bb (struct fu
 
 	gcc_checking_assert (succ_info->begin_agg_cnt.length ()
 			     >= (unsigned) i);
-	if (merge_agg_contents (final, &succ_info->begin_agg_cnt[i])
+	if (merge_agg_contents (fbi, final, fin_own,
+				&succ_info->begin_agg_cnt[i],
+				&succ_info->own_begin_agg_cnt[i])
 	    && !succ_info->queued)
 	  {
 	    succ_info->queued = true;
Index: src/gcc/testsuite/gcc.dg/ipa/ipcp-agg-15.c
===================================================================
--- /dev/null
+++ src/gcc/testsuite/gcc.dg/ipa/ipcp-agg-15.c
@@ -0,0 +1,45 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fno-ipa-sra -fdump-ipa-cp-details"  } */
+/* { dg-add-options bind_pic_locally } */
+
+struct S
+{
+  int i,j;
+};
+
+volatile int g;
+int *p;
+
+static int __attribute__ ((noinline, noclone))
+something_unpredictable (void)
+{
+  *p = 6;
+  return 1;
+}
+
+
+static void __attribute__ ((noinline))
+bar (struct S *ps)
+{
+  something_unpredictable ();
+  g = ps->j;
+}
+
+int
+main (int argc, char **argv)
+{
+  struct S s;
+
+  s.i = 2;
+  s.j = 8;
+
+  if (something_unpredictable ())
+    s.i = 6;
+
+  bar (&s);
+
+  return 0;
+}
+
+/* { dg-final { scan-ipa-dump "Creating a specialized node of bar.*for all known contexts" "cp" } } */
+/* { dg-final { cleanup-ipa-dump "cp" } } */

^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH 0/7] ipa-prop escape analysis
@ 2014-05-21 13:31 Martin Jambor
  2014-05-21 13:31 ` [PATCH 4/7] Break up determine_known_aggregate_parts Martin Jambor
                   ` (6 more replies)
  0 siblings, 7 replies; 29+ messages in thread
From: Martin Jambor @ 2014-05-21 13:31 UTC (permalink / raw)
  To: GCC Patches; +Cc: Jan Hubicka

Hi,

this patch series implements ipa-prop escape and clobber analysis and
then more advanced jump function building on top of them.  Better
descriptions of individual patches are in their corresponding email
messages, they however need to be applied in this order and so I'm
sending them in this thread.

Thanks,

Martin

^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH 2/7] Analyze BBs in DOM order in ipa-prop.c
  2014-05-21 13:31 [PATCH 0/7] ipa-prop escape analysis Martin Jambor
                   ` (2 preceding siblings ...)
  2014-05-21 13:31 ` [PATCH 3/7] IPA-CP escape and clobber analysis Martin Jambor
@ 2014-05-21 13:31 ` Martin Jambor
  2014-05-21 13:31 ` [PATCH 1/7] Add missing documentation of four IPA-CP params Martin Jambor
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 29+ messages in thread
From: Martin Jambor @ 2014-05-21 13:31 UTC (permalink / raw)
  To: GCC Patches; +Cc: Jan Hubicka

[-- Attachment #1: analyze_edges_in_dom_order.diff --]
[-- Type: text/plain, Size: 46590 bytes --]

Hi,

this patch has already been reviewed and pre-approved by Honza, so I'm
including this mainly for reference and I will commit it once the
previous (documentation) patch is approved.  The original submission
can be found at http://gcc.gnu.org/ml/gcc-patches/2014-04/msg01681.html.

This patch introduce a better structure for holding intermediate
information about the current function during analysis called
function_body_info and above all analyzes BBs in DOM order which alone
improves results as seen in PR 53787.

Bootstrapped and tested on x86_64-linux.  I have also LTO-bootstrapped
it and LTO built Firefox with it. 

Thanks,

Martin



2014-05-15  Martin Jambor  <mjambor@suse.cz>

	PR tree-optimization/53787
	* params.def (PARAM_IPA_CP_LOOP_HINT_BONUS): New param.
	* ipa-prop.h (ipa_node_params): Rename uses_analysis_done to
	analysis_done, update all uses.
	* ipa-prop.c: Include domwalk.h
	(param_analysis_info): Removed.
	(param_aa_status): New type.
	(ipa_bb_info): Likewise.
	(func_body_info): Likewise.
	(ipa_get_bb_info): New function.
	(aa_overwalked): Likewise.
	(find_dominating_aa_status): Likewise.
	(parm_bb_aa_status_for_bb): Likewise.
	(parm_preserved_before_stmt_p): Changed to use new param AA info.
	(load_from_unmodified_param): Accept func_body_info as a parameter
	instead of parms_ainfo.
	(parm_ref_data_preserved_p): Changed to use new param AA info.
	(parm_ref_data_pass_through_p): Likewise.
	(ipa_load_from_parm_agg_1): Likewise.  Update callers.
	(compute_complex_assign_jump_func): Changed to use new param AA info.
	(compute_complex_ancestor_jump_func): Likewise.
	(ipa_compute_jump_functions_for_edge): Likewise.
	(ipa_compute_jump_functions): Removed.
	(ipa_compute_jump_functions_for_bb): New function.
	(ipa_analyze_indirect_call_uses): Likewise, moved variable
	declarations down.
	(ipa_analyze_virtual_call_uses): Accept func_body_info instead of node
	and info, moved variable declarations down.
	(ipa_analyze_call_uses): Accept and pass on func_body_info instead of
	node and info.
	(ipa_analyze_stmt_uses): Likewise.
	(ipa_analyze_params_uses): Removed.
	(ipa_analyze_params_uses_in_bb): New function.
	(ipa_analyze_controlled_uses): Likewise.
	(free_ipa_bb_info): Likewise.
	(analysis_dom_walker): New class.
	(ipa_analyze_node): Handle node-specific forbidden analysis,
	initialize and free func_body_info, use dominator walker.
	(ipcp_modif_dom_walker): New class.
	(ipcp_transform_function): Create and free func_body_info, use
	ipcp_modif_dom_walker, moved a lot of functionality there.

Index: src/gcc/ipa-prop.c
===================================================================
--- src.orig/gcc/ipa-prop.c
+++ src/gcc/ipa-prop.c
@@ -59,14 +59,57 @@ along with GCC; see the file COPYING3.
 #include "ipa-utils.h"
 #include "stringpool.h"
 #include "tree-ssanames.h"
+#include "domwalk.h"
 
-/* Intermediate information about a parameter that is only useful during the
-   run of ipa_analyze_node and is not kept afterwards.  */
+/* Intermediate information that we get from alias analysis about a particular
+   parameter in a particular basic_block.  When a parameter or the memory it
+   references is marked modified, we use that information in all dominatd
+   blocks without cosulting alias analysis oracle.  */
+
+struct param_aa_status
+{
+  /* Set when this structure contains meaningful information.  If not, the
+     structure describing a dominating BB should be used instead.  */
+  bool valid;
+
+  /* Whether we have seen something which might have modified the data in
+     question.  PARM is for the parameter itself, REF is for data it points to
+     but using the alias type of individual accesses and PT is the same thing
+     but for computing aggregate pass-through functions using a very inclusive
+     ao_ref.  */
+  bool parm_modified, ref_modified, pt_modified;
+};
 
-struct param_analysis_info
+/* Information related to a given BB that used only when looking at function
+   body.  */
+
+struct ipa_bb_info
 {
-  bool parm_modified, ref_modified, pt_modified;
-  bitmap parm_visited_statements, pt_visited_statements;
+  /* Call graph edges going out of this BB.  */
+  vec<cgraph_edge_p> cg_edges;
+  /* Alias analysis statuses of each formal parameter at this bb.  */
+  vec<param_aa_status> param_aa_statuses;
+};
+
+/* Structure with global information that is only used when looking at function
+   body. */
+
+struct func_body_info
+{
+  /* The node that is being analyzed.  */
+  cgraph_node *node;
+
+  /* Its info.  */
+  struct ipa_node_params *info;
+
+  /* Information about individual BBs. */
+  vec<ipa_bb_info> bb_infos;
+
+  /* Number of parameters.  */
+  int param_count;
+
+  /* Number of statements already walked by when analyzing this function.  */
+  unsigned int aa_walked;
 };
 
 /* Vector where the parameter infos are actually stored. */
@@ -510,6 +553,16 @@ ipa_binfo_from_known_type_jfunc (struct
 			      jfunc->value.known_type.component_type);
 }
 
+/* Get IPA BB information about the given BB.  FBI is the context of analyzis
+   of this function body.  */
+
+static struct ipa_bb_info *
+ipa_get_bb_info (struct func_body_info *fbi, basic_block bb)
+{
+  gcc_checking_assert (fbi);
+  return &fbi->bb_infos[bb->index];
+}
+
 /* Structure to be passed in between detect_type_change and
    check_stmt_for_type_change.  */
 
@@ -769,34 +822,101 @@ mark_modified (ao_ref *ao ATTRIBUTE_UNUS
   return true;
 }
 
+/* Return true if we have already walked so many statements in AA that we
+   should really just start giving up.  */
+
+static bool
+aa_overwalked (struct func_body_info *fbi)
+{
+  gcc_checking_assert (fbi);
+  return fbi->aa_walked > (unsigned) PARAM_VALUE (PARAM_IPA_MAX_AA_STEPS);
+}
+
+/* Find the nearest valid aa status for parameter specified by INDEX that
+   dominates BB.  */
+
+static struct param_aa_status *
+find_dominating_aa_status (struct func_body_info *fbi, basic_block bb,
+			   int index)
+{
+  while (true)
+    {
+      bb = get_immediate_dominator (CDI_DOMINATORS, bb);
+      if (!bb)
+	return NULL;
+      struct ipa_bb_info *bi = ipa_get_bb_info (fbi, bb);
+      if (!bi->param_aa_statuses.is_empty ()
+	  && bi->param_aa_statuses[index].valid)
+	return &bi->param_aa_statuses[index];
+    }
+}
+
+/* Get AA status structure for the given BB and parameter with INDEX.  Allocate
+   structures and/or intialize the result with a dominating description as
+   necessary.  */
+
+static struct param_aa_status *
+parm_bb_aa_status_for_bb (struct func_body_info *fbi, basic_block bb,
+			  int index)
+{
+  gcc_checking_assert (fbi);
+  struct ipa_bb_info *bi = ipa_get_bb_info (fbi, bb);
+  if (bi->param_aa_statuses.is_empty ())
+    bi->param_aa_statuses.safe_grow_cleared (fbi->param_count);
+  struct param_aa_status *paa = &bi->param_aa_statuses[index];
+  if (!paa->valid)
+    {
+      gcc_checking_assert (!paa->parm_modified
+			   && !paa->ref_modified
+			   && !paa->pt_modified);
+      struct param_aa_status *dom_paa;
+      dom_paa = find_dominating_aa_status (fbi, bb, index);
+      if (dom_paa)
+	*paa = *dom_paa;
+      else
+	paa->valid = true;
+    }
+
+  return paa;
+}
+
 /* Return true if a load from a formal parameter PARM_LOAD is known to retrieve
    a value known not to be modified in this function before reaching the
-   statement STMT.  PARM_AINFO is a pointer to a structure containing temporary
-   information about the parameter.  */
+   statement STMT.  FBI holds information about the function we have so far
+   gathered but do not survive the summary building stage.  */
 
 static bool
-parm_preserved_before_stmt_p (struct param_analysis_info *parm_ainfo,
-			       gimple stmt, tree parm_load)
+parm_preserved_before_stmt_p (struct func_body_info *fbi, int index,
+			      gimple stmt, tree parm_load)
 {
+  struct param_aa_status *paa;
   bool modified = false;
-  bitmap *visited_stmts;
   ao_ref refd;
 
-  if (parm_ainfo && parm_ainfo->parm_modified)
-    return false;
+  /* FIXME: FBI can be NULL if we are being called from outside
+     ipa_node_analysis or ipcp_transform_function, which currently happens
+     during inlining analysis.  It would be great to extend fbi's lifetime and
+     always have it.  Currently, we are just not afraid of too much walking in
+     that case.  */
+  if (fbi)
+    {
+      if (aa_overwalked (fbi))
+	return false;
+      paa = parm_bb_aa_status_for_bb (fbi, gimple_bb (stmt), index);
+      if (paa->parm_modified)
+	return false;
+    }
+  else
+    paa = NULL;
 
   gcc_checking_assert (gimple_vuse (stmt) != NULL_TREE);
   ao_ref_init (&refd, parm_load);
-  /* We can cache visited statements only when parm_ainfo is available and when
-     we are looking at a naked load of the whole parameter.  */
-  if (!parm_ainfo || TREE_CODE (parm_load) != PARM_DECL)
-    visited_stmts = NULL;
-  else
-    visited_stmts = &parm_ainfo->parm_visited_statements;
-  walk_aliased_vdefs (&refd, gimple_vuse (stmt), mark_modified, &modified,
-		      visited_stmts);
-  if (parm_ainfo && modified)
-    parm_ainfo->parm_modified = true;
+  int walked = walk_aliased_vdefs (&refd, gimple_vuse (stmt), mark_modified,
+				   &modified, NULL);
+  if (fbi)
+    fbi->aa_walked += walked;
+  if (paa && modified)
+    paa->parm_modified = true;
   return !modified;
 }
 
@@ -805,8 +925,8 @@ parm_preserved_before_stmt_p (struct par
    modified.  Otherwise return -1.  */
 
 static int
-load_from_unmodified_param (vec<ipa_param_descriptor> descriptors,
-			    struct param_analysis_info *parms_ainfo,
+load_from_unmodified_param (struct func_body_info *fbi,
+			    vec<ipa_param_descriptor> descriptors,
 			    gimple stmt)
 {
   int index;
@@ -821,45 +941,58 @@ load_from_unmodified_param (vec<ipa_para
 
   index = ipa_get_param_decl_index_1 (descriptors, op1);
   if (index < 0
-      || !parm_preserved_before_stmt_p (parms_ainfo ? &parms_ainfo[index]
-					: NULL, stmt, op1))
+      || !parm_preserved_before_stmt_p (fbi, index, stmt, op1))
     return -1;
 
   return index;
 }
 
-/* Return true if memory reference REF loads data that are known to be
-   unmodified in this function before reaching statement STMT.  PARM_AINFO, if
-   non-NULL, is a pointer to a structure containing temporary information about
-   PARM.  */
+/* Return true if memory reference REF (which must be a load through parameter
+   with INDEX) loads data that are known to be unmodified in this function
+   before reaching statement STMT.  */
 
 static bool
-parm_ref_data_preserved_p (struct param_analysis_info *parm_ainfo,
-			      gimple stmt, tree ref)
+parm_ref_data_preserved_p (struct func_body_info *fbi,
+			   int index, gimple stmt, tree ref)
 {
+  struct param_aa_status *paa;
   bool modified = false;
   ao_ref refd;
 
-  gcc_checking_assert (gimple_vuse (stmt));
-  if (parm_ainfo && parm_ainfo->ref_modified)
-    return false;
+  /* FIXME: FBI can be NULL if we are being called from outside
+     ipa_node_analysis or ipcp_transform_function, which currently happens
+     during inlining analysis.  It would be great to extend fbi's lifetime and
+     always have it.  Currently, we are just not afraid of too much walking in
+     that case.  */
+  if (fbi)
+    {
+      if (aa_overwalked (fbi))
+	return false;
+      paa = parm_bb_aa_status_for_bb (fbi, gimple_bb (stmt), index);
+      if (paa->ref_modified)
+	return false;
+    }
+  else
+    paa = NULL;
 
+  gcc_checking_assert (gimple_vuse (stmt));
   ao_ref_init (&refd, ref);
-  walk_aliased_vdefs (&refd, gimple_vuse (stmt), mark_modified, &modified,
-		      NULL);
-  if (parm_ainfo && modified)
-    parm_ainfo->ref_modified = true;
+  int walked = walk_aliased_vdefs (&refd, gimple_vuse (stmt), mark_modified,
+				   &modified, NULL);
+  if (fbi)
+    fbi->aa_walked += walked;
+  if (paa && modified)
+    paa->ref_modified = true;
   return !modified;
 }
 
-/* Return true if the data pointed to by PARM is known to be unmodified in this
-   function before reaching call statement CALL into which it is passed.
-   PARM_AINFO is a pointer to a structure containing temporary information
-   about PARM.  */
+/* Return true if the data pointed to by PARM (which is a parameter with INDEX)
+   is known to be unmodified in this function before reaching call statement
+   CALL into which it is passed.  FBI describes the function body.  */
 
 static bool
-parm_ref_data_pass_through_p (struct param_analysis_info *parm_ainfo,
-			       gimple call, tree parm)
+parm_ref_data_pass_through_p (struct func_body_info *fbi, int index,
+			      gimple call, tree parm)
 {
   bool modified = false;
   ao_ref refd;
@@ -868,17 +1001,21 @@ parm_ref_data_pass_through_p (struct par
      function because it is not goin to use it.  But do not cache the result
      either.  Also, no such calculations for non-pointers.  */
   if (!gimple_vuse (call)
-      || !POINTER_TYPE_P (TREE_TYPE (parm)))
+      || !POINTER_TYPE_P (TREE_TYPE (parm))
+      || aa_overwalked (fbi))
     return false;
 
-  if (parm_ainfo->pt_modified)
+  struct param_aa_status *paa = parm_bb_aa_status_for_bb (fbi, gimple_bb (call),
+							  index);
+  if (paa->pt_modified)
     return false;
 
   ao_ref_init_from_ptr_and_size (&refd, parm, NULL_TREE);
-  walk_aliased_vdefs (&refd, gimple_vuse (call), mark_modified, &modified,
-		      parm_ainfo ? &parm_ainfo->pt_visited_statements : NULL);
+  int walked = walk_aliased_vdefs (&refd, gimple_vuse (call), mark_modified,
+				   &modified, NULL);
+  fbi->aa_walked += walked;
   if (modified)
-    parm_ainfo->pt_modified = true;
+    paa->pt_modified = true;
   return !modified;
 }
 
@@ -893,10 +1030,11 @@ parm_ref_data_pass_through_p (struct par
    reference respectively.  */
 
 static bool
-ipa_load_from_parm_agg_1 (vec<ipa_param_descriptor> descriptors,
-			  struct param_analysis_info *parms_ainfo, gimple stmt,
-			  tree op, int *index_p, HOST_WIDE_INT *offset_p,
-			  HOST_WIDE_INT *size_p, bool *by_ref_p)
+ipa_load_from_parm_agg_1 (struct func_body_info *fbi,
+			  vec<ipa_param_descriptor> descriptors,
+			  gimple stmt, tree op, int *index_p,
+			  HOST_WIDE_INT *offset_p, HOST_WIDE_INT *size_p,
+			  bool *by_ref_p)
 {
   int index;
   HOST_WIDE_INT size, max_size;
@@ -909,8 +1047,7 @@ ipa_load_from_parm_agg_1 (vec<ipa_param_
     {
       int index = ipa_get_param_decl_index_1 (descriptors, base);
       if (index >= 0
-	  && parm_preserved_before_stmt_p (parms_ainfo ? &parms_ainfo[index]
-					   : NULL, stmt, op))
+	  && parm_preserved_before_stmt_p (fbi, index, stmt, op))
 	{
 	  *index_p = index;
 	  *by_ref_p = false;
@@ -949,12 +1086,11 @@ ipa_load_from_parm_agg_1 (vec<ipa_param_
       */
 
       gimple def = SSA_NAME_DEF_STMT (TREE_OPERAND (base, 0));
-      index = load_from_unmodified_param (descriptors, parms_ainfo, def);
+      index = load_from_unmodified_param (fbi, descriptors, def);
     }
 
   if (index >= 0
-      && parm_ref_data_preserved_p (parms_ainfo ? &parms_ainfo[index] : NULL,
-				    stmt, op))
+      && parm_ref_data_preserved_p (fbi, index, stmt, op))
     {
       *index_p = index;
       *by_ref_p = true;
@@ -973,7 +1109,7 @@ ipa_load_from_parm_agg (struct ipa_node_
 			tree op, int *index_p, HOST_WIDE_INT *offset_p,
 			bool *by_ref_p)
 {
-  return ipa_load_from_parm_agg_1 (info->descriptors, NULL, stmt, op, index_p,
+  return ipa_load_from_parm_agg_1 (NULL, info->descriptors, stmt, op, index_p,
 				   offset_p, NULL, by_ref_p);
 }
 
@@ -1031,8 +1167,8 @@ ipa_load_from_parm_agg (struct ipa_node_
    only needed for intraprocedural analysis.  */
 
 static void
-compute_complex_assign_jump_func (struct ipa_node_params *info,
-				  struct param_analysis_info *parms_ainfo,
+compute_complex_assign_jump_func (struct func_body_info *fbi,
+				  struct ipa_node_params *info,
 				  struct ipa_jump_func *jfunc,
 				  gimple call, gimple stmt, tree name,
 				  tree param_type)
@@ -1048,13 +1184,13 @@ compute_complex_assign_jump_func (struct
       if (SSA_NAME_IS_DEFAULT_DEF (op1))
 	index = ipa_get_param_decl_index (info, SSA_NAME_VAR (op1));
       else
-	index = load_from_unmodified_param (info->descriptors, parms_ainfo,
+	index = load_from_unmodified_param (fbi, info->descriptors,
 					    SSA_NAME_DEF_STMT (op1));
       tc_ssa = op1;
     }
   else
     {
-      index = load_from_unmodified_param (info->descriptors, parms_ainfo, stmt);
+      index = load_from_unmodified_param (fbi, info->descriptors, stmt);
       tc_ssa = gimple_assign_lhs (stmt);
     }
 
@@ -1075,8 +1211,7 @@ compute_complex_assign_jump_func (struct
 	}
       else if (gimple_assign_single_p (stmt))
 	{
-	  bool agg_p = parm_ref_data_pass_through_p (&parms_ainfo[index],
-						     call, tc_ssa);
+	  bool agg_p = parm_ref_data_pass_through_p (fbi, index, call, tc_ssa);
 	  bool type_p = false;
 
 	  if (param_type && POINTER_TYPE_P (param_type))
@@ -1115,7 +1250,7 @@ compute_complex_assign_jump_func (struct
       if (type_p || jfunc->type == IPA_JF_UNKNOWN)
 	ipa_set_ancestor_jf (jfunc, offset,
 			     type_p ? TREE_TYPE (param_type) : NULL, index,
-			     parm_ref_data_pass_through_p (&parms_ainfo[index],
+			     parm_ref_data_pass_through_p (fbi, index,
 							   call, ssa), type_p);
     }
 }
@@ -1187,8 +1322,8 @@ get_ancestor_addr_info (gimple assign, t
      return D.1879_6;  */
 
 static void
-compute_complex_ancestor_jump_func (struct ipa_node_params *info,
-				    struct param_analysis_info *parms_ainfo,
+compute_complex_ancestor_jump_func (struct func_body_info *fbi,
+				    struct ipa_node_params *info,
 				    struct ipa_jump_func *jfunc,
 				    gimple call, gimple phi, tree param_type)
 {
@@ -1247,9 +1382,10 @@ compute_complex_ancestor_jump_func (stru
     type_p = !detect_type_change (obj, expr, TREE_TYPE (param_type),
 				  call, jfunc, offset);
   if (type_p || jfunc->type == IPA_JF_UNKNOWN)
-    ipa_set_ancestor_jf (jfunc, offset, type_p ? TREE_TYPE (param_type) : NULL, index,
-			 parm_ref_data_pass_through_p (&parms_ainfo[index],
-						       call, parm), type_p);
+    ipa_set_ancestor_jf (jfunc, offset, type_p ? TREE_TYPE (param_type) : NULL,
+			 index,
+			 parm_ref_data_pass_through_p (fbi, index, call, parm),
+			 type_p);
 }
 
 /* Given OP which is passed as an actual argument to a called function,
@@ -1594,7 +1730,7 @@ ipa_get_callee_param_type (struct cgraph
    to this callsite.  */
 
 static void
-ipa_compute_jump_functions_for_edge (struct param_analysis_info *parms_ainfo,
+ipa_compute_jump_functions_for_edge (struct func_body_info *fbi,
 				     struct cgraph_edge *cs)
 {
   struct ipa_node_params *info = IPA_NODE_REF (cs->caller);
@@ -1628,7 +1764,7 @@ ipa_compute_jump_functions_for_edge (str
 	  /* Aggregate passed by value, check for pass-through, otherwise we
 	     will attempt to fill in aggregate contents later in this
 	     for cycle.  */
-	  if (parm_preserved_before_stmt_p (&parms_ainfo[index], call, arg))
+	  if (parm_preserved_before_stmt_p (fbi, index, call, arg))
 	    {
 	      ipa_set_jf_simple_pass_through (jfunc, index, false, false);
 	      continue;
@@ -1642,8 +1778,7 @@ ipa_compute_jump_functions_for_edge (str
 	      if (index >= 0)
 		{
 		  bool agg_p, type_p;
-		  agg_p = parm_ref_data_pass_through_p (&parms_ainfo[index],
-							call, arg);
+		  agg_p = parm_ref_data_pass_through_p (fbi, index, call, arg);
 		  if (param_type && POINTER_TYPE_P (param_type))
 		    type_p = !detect_type_change_ssa (arg, TREE_TYPE (param_type),
 						      call, jfunc);
@@ -1658,10 +1793,10 @@ ipa_compute_jump_functions_for_edge (str
 	    {
 	      gimple stmt = SSA_NAME_DEF_STMT (arg);
 	      if (is_gimple_assign (stmt))
-		compute_complex_assign_jump_func (info, parms_ainfo, jfunc,
+		compute_complex_assign_jump_func (fbi, info, jfunc,
 						  call, stmt, arg, param_type);
 	      else if (gimple_code (stmt) == GIMPLE_PHI)
-		compute_complex_ancestor_jump_func (info, parms_ainfo, jfunc,
+		compute_complex_ancestor_jump_func (fbi, info, jfunc,
 						    call, stmt, param_type);
 	    }
 	}
@@ -1692,27 +1827,29 @@ ipa_compute_jump_functions_for_edge (str
 }
 
 /* Compute jump functions for all edges - both direct and indirect - outgoing
-   from NODE.  Also count the actual arguments in the process.  */
+   from BB.  */
 
 static void
-ipa_compute_jump_functions (struct cgraph_node *node,
-			    struct param_analysis_info *parms_ainfo)
+ipa_compute_jump_functions_for_bb (struct func_body_info *fbi, basic_block bb)
 {
+  struct ipa_bb_info *bi = ipa_get_bb_info (fbi, bb);
+  int i;
   struct cgraph_edge *cs;
 
-  for (cs = node->callees; cs; cs = cs->next_callee)
+  FOR_EACH_VEC_ELT_REVERSE (bi->cg_edges, i, cs)
     {
-      struct cgraph_node *callee = cgraph_function_or_thunk_node (cs->callee,
-								  NULL);
-      /* We do not need to bother analyzing calls to unknown
-	 functions unless they may become known during lto/whopr.  */
-      if (!callee->definition && !flag_lto)
-	continue;
-      ipa_compute_jump_functions_for_edge (parms_ainfo, cs);
-    }
+      struct cgraph_node *callee = cs->callee;
 
-  for (cs = node->indirect_calls; cs; cs = cs->next_callee)
-    ipa_compute_jump_functions_for_edge (parms_ainfo, cs);
+      if (callee)
+	{
+	  cgraph_function_or_thunk_node (callee, NULL);
+	  /* We do not need to bother analyzing calls to unknown functions
+	     unless they may become known during lto/whopr.  */
+	  if (!callee->definition && !flag_lto)
+	    continue;
+	}
+      ipa_compute_jump_functions_for_edge (fbi, cs);
+    }
 }
 
 /* If STMT looks like a statement loading a value from a member pointer formal
@@ -1855,37 +1992,30 @@ ipa_note_param_call (struct cgraph_node
    passed by value or reference.  */
 
 static void
-ipa_analyze_indirect_call_uses (struct cgraph_node *node,
-				struct ipa_node_params *info,
-				struct param_analysis_info *parms_ainfo,
-				gimple call, tree target)
-{
-  gimple def;
-  tree n1, n2;
-  gimple d1, d2;
-  tree rec, rec2, cond;
-  gimple branch;
-  int index;
-  basic_block bb, virt_bb, join;
+ipa_analyze_indirect_call_uses (struct func_body_info *fbi, gimple call,
+				tree target)
+{
+  struct ipa_node_params *info = fbi->info;
   HOST_WIDE_INT offset;
   bool by_ref;
 
   if (SSA_NAME_IS_DEFAULT_DEF (target))
     {
       tree var = SSA_NAME_VAR (target);
-      index = ipa_get_param_decl_index (info, var);
+      int index = ipa_get_param_decl_index (info, var);
       if (index >= 0)
-	ipa_note_param_call (node, index, call);
+	ipa_note_param_call (fbi->node, index, call);
       return;
     }
 
-  def = SSA_NAME_DEF_STMT (target);
+  int index;
+  gimple def = SSA_NAME_DEF_STMT (target);
   if (gimple_assign_single_p (def)
-      && ipa_load_from_parm_agg_1 (info->descriptors, parms_ainfo, def,
+      && ipa_load_from_parm_agg_1 (fbi, info->descriptors, def,
 				   gimple_assign_rhs1 (def), &index, &offset,
 				   NULL, &by_ref))
     {
-      struct cgraph_edge *cs = ipa_note_param_call (node, index, call);
+      struct cgraph_edge *cs = ipa_note_param_call (fbi->node, index, call);
       if (cs->indirect_info->offset != offset)
 	cs->indirect_info->outer_type = NULL;
       cs->indirect_info->offset = offset;
@@ -1904,14 +2034,16 @@ ipa_analyze_indirect_call_uses (struct c
 
   /* First, we need to check whether one of these is a load from a member
      pointer that is a parameter to this function. */
-  n1 = PHI_ARG_DEF (def, 0);
-  n2 = PHI_ARG_DEF (def, 1);
+  tree n1 = PHI_ARG_DEF (def, 0);
+  tree n2 = PHI_ARG_DEF (def, 1);
   if (!ipa_is_ssa_with_stmt_def (n1) || !ipa_is_ssa_with_stmt_def (n2))
     return;
-  d1 = SSA_NAME_DEF_STMT (n1);
-  d2 = SSA_NAME_DEF_STMT (n2);
+  gimple d1 = SSA_NAME_DEF_STMT (n1);
+  gimple d2 = SSA_NAME_DEF_STMT (n2);
 
-  join = gimple_bb (def);
+  tree rec;
+  basic_block bb, virt_bb;
+  basic_block join = gimple_bb (def);
   if ((rec = ipa_get_stmt_member_ptr_load_param (d1, false, &offset)))
     {
       if (ipa_get_stmt_member_ptr_load_param (d2, false, NULL))
@@ -1939,7 +2071,7 @@ ipa_analyze_indirect_call_uses (struct c
   /* Third, let's see that the branching is done depending on the least
      significant bit of the pfn. */
 
-  branch = last_stmt (bb);
+  gimple branch = last_stmt (bb);
   if (!branch || gimple_code (branch) != GIMPLE_COND)
     return;
 
@@ -1948,7 +2080,7 @@ ipa_analyze_indirect_call_uses (struct c
       || !integer_zerop (gimple_cond_rhs (branch)))
     return;
 
-  cond = gimple_cond_lhs (branch);
+  tree cond = gimple_cond_lhs (branch);
   if (!ipa_is_ssa_with_stmt_def (cond))
     return;
 
@@ -1973,6 +2105,7 @@ ipa_analyze_indirect_call_uses (struct c
       def = SSA_NAME_DEF_STMT (cond);
     }
 
+  tree rec2;
   rec2 = ipa_get_stmt_member_ptr_load_param (def,
 					     (TARGET_PTRMEMFUNC_VBIT_LOCATION
 					      == ptrmemfunc_vbit_in_delta),
@@ -1982,9 +2115,9 @@ ipa_analyze_indirect_call_uses (struct c
 
   index = ipa_get_param_decl_index (info, rec);
   if (index >= 0
-      && parm_preserved_before_stmt_p (&parms_ainfo[index], call, rec))
+      && parm_preserved_before_stmt_p (fbi, index, call, rec))
     {
-      struct cgraph_edge *cs = ipa_note_param_call (node, index, call);
+      struct cgraph_edge *cs = ipa_note_param_call (fbi->node, index, call);
       if (cs->indirect_info->offset != offset)
 	cs->indirect_info->outer_type = NULL;
       cs->indirect_info->offset = offset;
@@ -1997,16 +2130,13 @@ ipa_analyze_indirect_call_uses (struct c
 
 /* Analyze a CALL to an OBJ_TYPE_REF which is passed in TARGET and if the
    object referenced in the expression is a formal parameter of the caller
-   (described by INFO), create a call note for the statement. */
+   FBI->node (described by FBI->info), create a call note for the
+   statement.  */
 
 static void
-ipa_analyze_virtual_call_uses (struct cgraph_node *node,
-			       struct ipa_node_params *info, gimple call,
-			       tree target)
+ipa_analyze_virtual_call_uses (struct func_body_info *fbi,
+			       gimple call, tree target)
 {
-  struct cgraph_edge *cs;
-  struct cgraph_indirect_call_info *ii;
-  struct ipa_jump_func jfunc;
   tree obj = OBJ_TYPE_REF_OBJECT (target);
   int index;
   HOST_WIDE_INT anc_offset;
@@ -2017,8 +2147,10 @@ ipa_analyze_virtual_call_uses (struct cg
   if (TREE_CODE (obj) != SSA_NAME)
     return;
 
+  struct ipa_node_params *info = fbi->info;
   if (SSA_NAME_IS_DEFAULT_DEF (obj))
     {
+      struct ipa_jump_func jfunc;
       if (TREE_CODE (SSA_NAME_VAR (obj)) != PARM_DECL)
 	return;
 
@@ -2031,6 +2163,7 @@ ipa_analyze_virtual_call_uses (struct cg
     }
   else
     {
+      struct ipa_jump_func jfunc;
       gimple stmt = SSA_NAME_DEF_STMT (obj);
       tree expr;
 
@@ -2045,8 +2178,8 @@ ipa_analyze_virtual_call_uses (struct cg
 	return;
     }
 
-  cs = ipa_note_param_call (node, index, call);
-  ii = cs->indirect_info;
+  struct cgraph_edge *cs = ipa_note_param_call (fbi->node, index, call);
+  struct cgraph_indirect_call_info *ii = cs->indirect_info;
   ii->offset = anc_offset;
   ii->otr_token = tree_to_uhwi (OBJ_TYPE_REF_TOKEN (target));
   ii->otr_type = obj_type_ref_class (target);
@@ -2058,12 +2191,9 @@ ipa_analyze_virtual_call_uses (struct cg
    containing intermediate information about each formal parameter.  */
 
 static void
-ipa_analyze_call_uses (struct cgraph_node *node,
-		       struct ipa_node_params *info,
-		       struct param_analysis_info *parms_ainfo, gimple call)
+ipa_analyze_call_uses (struct func_body_info *fbi, gimple call)
 {
   tree target = gimple_call_fn (call);
-  struct cgraph_edge *cs;
 
   if (!target
       || (TREE_CODE (target) != SSA_NAME
@@ -2072,27 +2202,25 @@ ipa_analyze_call_uses (struct cgraph_nod
 
   /* If we previously turned the call into a direct call, there is
      no need to analyze.  */
-  cs = cgraph_edge (node, call);
+  struct cgraph_edge *cs = cgraph_edge (fbi->node, call);
   if (cs && !cs->indirect_unknown_callee)
     return;
   if (TREE_CODE (target) == SSA_NAME)
-    ipa_analyze_indirect_call_uses (node, info, parms_ainfo, call, target);
+    ipa_analyze_indirect_call_uses (fbi, call, target);
   else if (virtual_method_call_p (target))
-    ipa_analyze_virtual_call_uses (node, info, call, target);
+    ipa_analyze_virtual_call_uses (fbi, call, target);
 }
 
 
 /* Analyze the call statement STMT with respect to formal parameters (described
-   in INFO) of caller given by NODE.  Currently it only checks whether formal
-   parameters are called.  PARMS_AINFO is a pointer to a vector containing
-   intermediate information about each formal parameter.  */
+   in INFO) of caller given by FBI->NODE.  Currently it only checks whether
+   formal parameters are called.  */
 
 static void
-ipa_analyze_stmt_uses (struct cgraph_node *node, struct ipa_node_params *info,
-		       struct param_analysis_info *parms_ainfo, gimple stmt)
+ipa_analyze_stmt_uses (struct func_body_info *fbi, gimple stmt)
 {
   if (is_gimple_call (stmt))
-    ipa_analyze_call_uses (node, info, parms_ainfo, stmt);
+    ipa_analyze_call_uses (fbi, stmt);
 }
 
 /* Callback of walk_stmt_load_store_addr_ops for the visit_load.
@@ -2116,37 +2244,43 @@ visit_ref_for_mod_analysis (gimple, tree
   return false;
 }
 
-/* Scan the function body of NODE and inspect the uses of formal parameters.
-   Store the findings in various structures of the associated ipa_node_params
-   structure, such as parameter flags, notes etc.  PARMS_AINFO is a pointer to a
-   vector containing intermediate information about each formal parameter.   */
+/* Scan the statements in BB and inspect the uses of formal parameters.  Store
+   the findings in various structures of the associated ipa_node_params
+   structure, such as parameter flags, notes etc.  FBI holds various data about
+   the function being analyzed.  */
 
 static void
-ipa_analyze_params_uses (struct cgraph_node *node,
-			 struct param_analysis_info *parms_ainfo)
+ipa_analyze_params_uses_in_bb (struct func_body_info *fbi, basic_block bb)
 {
-  tree decl = node->decl;
-  basic_block bb;
-  struct function *func;
   gimple_stmt_iterator gsi;
-  struct ipa_node_params *info = IPA_NODE_REF (node);
-  int i;
+  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+    {
+      gimple stmt = gsi_stmt (gsi);
 
-  if (ipa_get_param_count (info) == 0 || info->uses_analysis_done)
-    return;
+      if (is_gimple_debug (stmt))
+	continue;
 
-  info->uses_analysis_done = 1;
-  if (ipa_func_spec_opts_forbid_analysis_p (node))
-    {
-      for (i = 0; i < ipa_get_param_count (info); i++)
-	{
-	  ipa_set_param_used (info, i, true);
-	  ipa_set_controlled_uses (info, i, IPA_UNDESCRIBED_USE);
-	}
-      return;
-    }
+      ipa_analyze_stmt_uses (fbi, stmt);
+      walk_stmt_load_store_addr_ops (stmt, fbi->info,
+				     visit_ref_for_mod_analysis,
+				     visit_ref_for_mod_analysis,
+				     visit_ref_for_mod_analysis);
+    }
+  for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+    walk_stmt_load_store_addr_ops (gsi_stmt (gsi), fbi->info,
+				   visit_ref_for_mod_analysis,
+				   visit_ref_for_mod_analysis,
+				   visit_ref_for_mod_analysis);
+}
+
+/* Calculate controlled uses of parameters of NODE.  */
+
+static void
+ipa_analyze_controlled_uses (struct cgraph_node *node)
+{
+  struct ipa_node_params *info = IPA_NODE_REF (node);
 
-  for (i = 0; i < ipa_get_param_count (info); i++)
+  for (int i = 0; i < ipa_get_param_count (info); i++)
     {
       tree parm = ipa_get_param (info, i);
       int controlled_uses = 0;
@@ -2182,45 +2316,36 @@ ipa_analyze_params_uses (struct cgraph_n
 	controlled_uses = IPA_UNDESCRIBED_USE;
       ipa_set_controlled_uses (info, i, controlled_uses);
     }
+}
 
-  func = DECL_STRUCT_FUNCTION (decl);
-  FOR_EACH_BB_FN (bb, func)
-    {
-      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
-	{
-	  gimple stmt = gsi_stmt (gsi);
-
-	  if (is_gimple_debug (stmt))
-	    continue;
+/* Free stuff in BI.  */
 
-	  ipa_analyze_stmt_uses (node, info, parms_ainfo, stmt);
-	  walk_stmt_load_store_addr_ops (stmt, info,
-					 visit_ref_for_mod_analysis,
-					 visit_ref_for_mod_analysis,
-					 visit_ref_for_mod_analysis);
-	}
-      for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
-	walk_stmt_load_store_addr_ops (gsi_stmt (gsi), info,
-				       visit_ref_for_mod_analysis,
-				       visit_ref_for_mod_analysis,
-				       visit_ref_for_mod_analysis);
-    }
+static void
+free_ipa_bb_info (struct ipa_bb_info *bi)
+{
+  bi->cg_edges.release ();
+  bi->param_aa_statuses.release ();
 }
 
-/* Free stuff in PARMS_AINFO, assume there are PARAM_COUNT parameters.  */
+/* Dominator walker driving the analysis.  */
 
-static void
-free_parms_ainfo (struct param_analysis_info *parms_ainfo, int param_count)
+class analysis_dom_walker : public dom_walker
 {
-  int i;
+public:
+  analysis_dom_walker (struct func_body_info *fbi)
+    : dom_walker (CDI_DOMINATORS), m_fbi (fbi) {}
 
-  for (i = 0; i < param_count; i++)
-    {
-      if (parms_ainfo[i].parm_visited_statements)
-	BITMAP_FREE (parms_ainfo[i].parm_visited_statements);
-      if (parms_ainfo[i].pt_visited_statements)
-	BITMAP_FREE (parms_ainfo[i].pt_visited_statements);
-    }
+  virtual void before_dom_children (basic_block);
+
+private:
+  struct func_body_info *m_fbi;
+};
+
+void
+analysis_dom_walker::before_dom_children (basic_block bb)
+{
+  ipa_analyze_params_uses_in_bb (m_fbi, bb);
+  ipa_compute_jump_functions_for_bb (m_fbi, bb);
 }
 
 /* Initialize the array describing properties of of formal parameters
@@ -2230,24 +2355,60 @@ free_parms_ainfo (struct param_analysis_
 void
 ipa_analyze_node (struct cgraph_node *node)
 {
+  struct func_body_info fbi;
   struct ipa_node_params *info;
-  struct param_analysis_info *parms_ainfo;
-  int param_count;
 
   ipa_check_create_node_params ();
   ipa_check_create_edge_args ();
   info = IPA_NODE_REF (node);
-  push_cfun (DECL_STRUCT_FUNCTION (node->decl));
+
+  if (info->analysis_done)
+    return;
+  info->analysis_done = 1;
+
+  if (ipa_func_spec_opts_forbid_analysis_p (node))
+    {
+      for (int i = 0; i < ipa_get_param_count (info); i++)
+	{
+	  ipa_set_param_used (info, i, true);
+	  ipa_set_controlled_uses (info, i, IPA_UNDESCRIBED_USE);
+	}
+      return;
+    }
+
+  struct function *func = DECL_STRUCT_FUNCTION (node->decl);
+  push_cfun (func);
+  calculate_dominance_info (CDI_DOMINATORS);
   ipa_initialize_node_params (node);
+  ipa_analyze_controlled_uses (node);
+
+  fbi.node = node;
+  fbi.info = IPA_NODE_REF (node);
+  fbi.bb_infos = vNULL;
+  fbi.bb_infos.safe_grow_cleared (last_basic_block_for_fn (cfun));
+  fbi.param_count = ipa_get_param_count (info);
+  fbi.aa_walked = 0;
+
+  for (struct cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
+    {
+      ipa_bb_info *bi = ipa_get_bb_info (&fbi, gimple_bb (cs->call_stmt));
+      bi->cg_edges.safe_push (cs);
+    }
 
-  param_count = ipa_get_param_count (info);
-  parms_ainfo = XALLOCAVEC (struct param_analysis_info, param_count);
-  memset (parms_ainfo, 0, sizeof (struct param_analysis_info) * param_count);
+  for (struct cgraph_edge *cs = node->indirect_calls; cs; cs = cs->next_callee)
+    {
+      ipa_bb_info *bi = ipa_get_bb_info (&fbi, gimple_bb (cs->call_stmt));
+      bi->cg_edges.safe_push (cs);
+    }
 
-  ipa_analyze_params_uses (node, parms_ainfo);
-  ipa_compute_jump_functions (node, parms_ainfo);
+  analysis_dom_walker (&fbi).walk (ENTRY_BLOCK_PTR_FOR_FN (cfun));
 
-  free_parms_ainfo (parms_ainfo, param_count);
+  int i;
+  struct ipa_bb_info *bi;
+  FOR_EACH_VEC_ELT (fbi.bb_infos, i, bi)
+    free_ipa_bb_info (bi);
+  fbi.bb_infos.release ();
+  free_dominance_info (CDI_DOMINATORS);
   pop_cfun ();
 }
 
@@ -3303,7 +3464,7 @@ ipa_node_duplication_hook (struct cgraph
   new_info->lattices = NULL;
   new_info->ipcp_orig_node = old_info->ipcp_orig_node;
 
-  new_info->uses_analysis_done = old_info->uses_analysis_done;
+  new_info->analysis_done = old_info->analysis_done;
   new_info->node_enqueued = old_info->node_enqueued;
 
   old_av = ipa_get_agg_replacements_for_node (src);
@@ -4428,7 +4589,7 @@ ipa_write_node_info (struct output_block
   for (j = 0; j < ipa_get_param_count (info); j++)
     streamer_write_uhwi (ob, ipa_get_param_move_cost (info, j));
   bp = bitpack_create (ob->main_stream);
-  gcc_assert (info->uses_analysis_done
+  gcc_assert (info->analysis_done
 	      || ipa_get_param_count (info) == 0);
   gcc_assert (!info->node_enqueued);
   gcc_assert (!info->ipcp_orig_node);
@@ -4474,7 +4635,7 @@ ipa_read_node_info (struct lto_input_blo
     
   bp = streamer_read_bitpack (ib);
   if (ipa_get_param_count (info) != 0)
-    info->uses_analysis_done = true;
+    info->analysis_done = true;
   info->node_enqueued = false;
   for (k = 0; k < ipa_get_param_count (info); k++)
     ipa_set_param_used (info, k, bp_unpack_value (&bp, 1));
@@ -4824,17 +4985,129 @@ adjust_agg_replacement_values (struct cg
     v->index = adj[v->index];
 }
 
+/* Dominator walker driving the ipcp modification phase.  */
+
+class ipcp_modif_dom_walker : public dom_walker
+{
+public:
+  ipcp_modif_dom_walker (struct func_body_info *fbi,
+			 vec<ipa_param_descriptor> descs,
+			 struct ipa_agg_replacement_value *av,
+			 bool *sc, bool *cc)
+    : dom_walker (CDI_DOMINATORS), m_fbi (fbi), m_descriptors (descs),
+      m_aggval (av), m_something_changed (sc), m_cfg_changed (cc) {}
+
+  virtual void before_dom_children (basic_block);
+
+private:
+  struct func_body_info *m_fbi;
+  vec<ipa_param_descriptor> m_descriptors;
+  struct ipa_agg_replacement_value *m_aggval;
+  bool *m_something_changed, *m_cfg_changed;
+};
+
+void
+ipcp_modif_dom_walker::before_dom_children (basic_block bb)
+{
+  gimple_stmt_iterator gsi;
+  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+    {
+      struct ipa_agg_replacement_value *v;
+      gimple stmt = gsi_stmt (gsi);
+      tree rhs, val, t;
+      HOST_WIDE_INT offset, size;
+      int index;
+      bool by_ref, vce;
+
+      if (!gimple_assign_load_p (stmt))
+	continue;
+      rhs = gimple_assign_rhs1 (stmt);
+      if (!is_gimple_reg_type (TREE_TYPE (rhs)))
+	continue;
+
+      vce = false;
+      t = rhs;
+      while (handled_component_p (t))
+	{
+	  /* V_C_E can do things like convert an array of integers to one
+	     bigger integer and similar things we do not handle below.  */
+	  if (TREE_CODE (rhs) == VIEW_CONVERT_EXPR)
+	    {
+	      vce = true;
+	      break;
+	    }
+	  t = TREE_OPERAND (t, 0);
+	}
+      if (vce)
+	continue;
+
+      if (!ipa_load_from_parm_agg_1 (m_fbi, m_descriptors, stmt, rhs, &index,
+				     &offset, &size, &by_ref))
+	continue;
+      for (v = m_aggval; v; v = v->next)
+	if (v->index == index
+	    && v->offset == offset)
+	  break;
+      if (!v
+	  || v->by_ref != by_ref
+	  || tree_to_shwi (TYPE_SIZE (TREE_TYPE (v->value))) != size)
+	continue;
+
+      gcc_checking_assert (is_gimple_ip_invariant (v->value));
+      if (!useless_type_conversion_p (TREE_TYPE (rhs), TREE_TYPE (v->value)))
+	{
+	  if (fold_convertible_p (TREE_TYPE (rhs), v->value))
+	    val = fold_build1 (NOP_EXPR, TREE_TYPE (rhs), v->value);
+	  else if (TYPE_SIZE (TREE_TYPE (rhs))
+		   == TYPE_SIZE (TREE_TYPE (v->value)))
+	    val = fold_build1 (VIEW_CONVERT_EXPR, TREE_TYPE (rhs), v->value);
+	  else
+	    {
+	      if (dump_file)
+		{
+		  fprintf (dump_file, "    const ");
+		  print_generic_expr (dump_file, v->value, 0);
+		  fprintf (dump_file, "  can't be converted to type of ");
+		  print_generic_expr (dump_file, rhs, 0);
+		  fprintf (dump_file, "\n");
+		}
+	      continue;
+	    }
+	}
+      else
+	val = v->value;
+
+      if (dump_file && (dump_flags & TDF_DETAILS))
+	{
+	  fprintf (dump_file, "Modifying stmt:\n  ");
+	  print_gimple_stmt (dump_file, stmt, 0, 0);
+	}
+      gimple_assign_set_rhs_from_tree (&gsi, val);
+      update_stmt (stmt);
+
+      if (dump_file && (dump_flags & TDF_DETAILS))
+	{
+	  fprintf (dump_file, "into:\n  ");
+	  print_gimple_stmt (dump_file, stmt, 0, 0);
+	  fprintf (dump_file, "\n");
+	}
+
+      *m_something_changed = true;
+      if (maybe_clean_eh_stmt (stmt)
+	  && gimple_purge_dead_eh_edges (gimple_bb (stmt)))
+	*m_cfg_changed = true;
+    }
 
-/* Function body transformation phase.  */
+}
+
+/* IPCP transformation phase doing propagation of aggregate values.  */
 
 unsigned int
 ipcp_transform_function (struct cgraph_node *node)
 {
   vec<ipa_param_descriptor> descriptors = vNULL;
-  struct param_analysis_info *parms_ainfo;
+  struct func_body_info fbi;
   struct ipa_agg_replacement_value *aggval;
-  gimple_stmt_iterator gsi;
-  basic_block bb;
   int param_count;
   bool cfg_changed = false, something_changed = false;
 
@@ -4854,102 +5127,27 @@ ipcp_transform_function (struct cgraph_n
   adjust_agg_replacement_values (node, aggval);
   if (dump_file)
     ipa_dump_agg_replacement_values (dump_file, aggval);
-  parms_ainfo = XALLOCAVEC (struct param_analysis_info, param_count);
-  memset (parms_ainfo, 0, sizeof (struct param_analysis_info) * param_count);
-  descriptors.safe_grow_cleared (param_count);
-  ipa_populate_param_decls (node, descriptors);
-
-  FOR_EACH_BB_FN (bb, cfun)
-    for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
-      {
-	struct ipa_agg_replacement_value *v;
-	gimple stmt = gsi_stmt (gsi);
-	tree rhs, val, t;
-	HOST_WIDE_INT offset, size;
-	int index;
-	bool by_ref, vce;
-
-	if (!gimple_assign_load_p (stmt))
-	  continue;
-	rhs = gimple_assign_rhs1 (stmt);
-	if (!is_gimple_reg_type (TREE_TYPE (rhs)))
-	  continue;
-
-	vce = false;
-	t = rhs;
-	while (handled_component_p (t))
-	  {
-	    /* V_C_E can do things like convert an array of integers to one
-               bigger integer and similar things we do not handle below.  */
-            if (TREE_CODE (rhs) == VIEW_CONVERT_EXPR)
-	      {
-		vce = true;
-		break;
-	      }
-	    t = TREE_OPERAND (t, 0);
-	  }
-	if (vce)
-	  continue;
 
-	if (!ipa_load_from_parm_agg_1 (descriptors, parms_ainfo, stmt,
-				       rhs, &index, &offset, &size, &by_ref))
-	  continue;
-	for (v = aggval; v; v = v->next)
-	  if (v->index == index
-	      && v->offset == offset)
-	    break;
-	if (!v
-	    || v->by_ref != by_ref
-	    || tree_to_shwi (TYPE_SIZE (TREE_TYPE (v->value))) != size)
-	  continue;
+  fbi.node = node;
+  fbi.info = NULL;
+  fbi.bb_infos = vNULL;
+  fbi.bb_infos.safe_grow_cleared (last_basic_block_for_fn (cfun));
+  fbi.param_count = param_count;
+  fbi.aa_walked = 0;
 
-	gcc_checking_assert (is_gimple_ip_invariant (v->value));
-	if (!useless_type_conversion_p (TREE_TYPE (rhs), TREE_TYPE (v->value)))
-	  {
-	    if (fold_convertible_p (TREE_TYPE (rhs), v->value))
-	      val = fold_build1 (NOP_EXPR, TREE_TYPE (rhs), v->value);
-	    else if (TYPE_SIZE (TREE_TYPE (rhs))
-		     == TYPE_SIZE (TREE_TYPE (v->value)))
-	      val = fold_build1 (VIEW_CONVERT_EXPR, TREE_TYPE (rhs), v->value);
-	    else
-	      {
-		if (dump_file)
-		  {
-		    fprintf (dump_file, "    const ");
-		    print_generic_expr (dump_file, v->value, 0);
-		    fprintf (dump_file, "  can't be converted to type of ");
-		    print_generic_expr (dump_file, rhs, 0);
-		    fprintf (dump_file, "\n");
-		  }
-		continue;
-	      }
-	  }
-	else
-	  val = v->value;
-
-	if (dump_file && (dump_flags & TDF_DETAILS))
-	  {
-	    fprintf (dump_file, "Modifying stmt:\n  ");
-	    print_gimple_stmt (dump_file, stmt, 0, 0);
-	  }
-	gimple_assign_set_rhs_from_tree (&gsi, val);
-	update_stmt (stmt);
-
-	if (dump_file && (dump_flags & TDF_DETAILS))
-	  {
-	    fprintf (dump_file, "into:\n  ");
-	    print_gimple_stmt (dump_file, stmt, 0, 0);
-	    fprintf (dump_file, "\n");
-	  }
-
-	something_changed = true;
-	if (maybe_clean_eh_stmt (stmt)
-	    && gimple_purge_dead_eh_edges (gimple_bb (stmt)))
-	  cfg_changed = true;
-      }
+  descriptors.safe_grow_cleared (param_count);
+  ipa_populate_param_decls (node, descriptors);
+  calculate_dominance_info (CDI_DOMINATORS);
+  ipcp_modif_dom_walker (&fbi, descriptors, aggval, &something_changed,
+			 &cfg_changed).walk (ENTRY_BLOCK_PTR_FOR_FN (cfun));
 
+  int i;
+  struct ipa_bb_info *bi;
+  FOR_EACH_VEC_ELT (fbi.bb_infos, i, bi)
+    free_ipa_bb_info (bi);
+  fbi.bb_infos.release ();
+  free_dominance_info (CDI_DOMINATORS);
   (*ipa_node_agg_replacements)[node->uid] = NULL;
-  free_parms_ainfo (parms_ainfo, param_count);
   descriptors.release ();
 
   if (!something_changed)
Index: src/gcc/ipa-prop.h
===================================================================
--- src.orig/gcc/ipa-prop.h
+++ src/gcc/ipa-prop.h
@@ -371,8 +371,9 @@ struct ipa_node_params
   /* If this node is an ipa-cp clone, these are the known values that describe
      what it has been specialized for.  */
   vec<tree> known_vals;
-  /* Whether the param uses analysis has already been performed.  */
-  unsigned uses_analysis_done : 1;
+  /* Whether the param uses analysis and jump function computation has already
+     been performed.  */
+  unsigned analysis_done : 1;
   /* Whether the function is enqueued in ipa-cp propagation stack.  */
   unsigned node_enqueued : 1;
   /* Whether we should create a specialized version based on values that are
Index: src/gcc/params.def
===================================================================
--- src.orig/gcc/params.def
+++ src/gcc/params.def
@@ -959,6 +959,12 @@ DEFPARAM (PARAM_IPA_CP_ARRAY_INDEX_HINT_
 	  "index known.",
 	  48, 0, 0)
 
+DEFPARAM (PARAM_IPA_MAX_AA_STEPS,
+	  "ipa-max-aa-steps",
+	  "Maximum number of statements that will be visited by IPA formal "
+	  "parameter analysis based on alias analysis in any given function",
+	  25000, 0, 0)
+
 /* WHOPR partitioning configuration.  */
 
 DEFPARAM (PARAM_LTO_PARTITIONS,
Index: src/gcc/doc/invoke.texi
===================================================================
--- src.orig/gcc/doc/invoke.texi
+++ src/gcc/doc/invoke.texi
@@ -10098,6 +10098,13 @@ an array access known, it adds a bonus o
 @option{ipa-cp-array-index-hint-bonus} bonus to the profitability
 score of the candidate.
 
+@item ipa-max-aa-steps
+During its analysis of function bodies, IPA-CP employs alias analysis
+in order to track values pointed to by function parameters.  In order
+not spend too much time analyzing huge functions, it will give up and
+consider all memory clobbered after examining
+@option{ipa-max-aa-steps} statements modifying memory.
+
 @item lto-partitions
 Specify desired number of partitions produced during WHOPR compilation.
 The number of partitions should exceed the number of CPUs used for compilation.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH 1/7] Add missing documentation of four IPA-CP params
  2014-05-21 13:31 [PATCH 0/7] ipa-prop escape analysis Martin Jambor
                   ` (3 preceding siblings ...)
  2014-05-21 13:31 ` [PATCH 2/7] Analyze BBs in DOM order in ipa-prop.c Martin Jambor
@ 2014-05-21 13:31 ` Martin Jambor
  2014-05-21 15:58   ` Jeff Law
  2014-06-10 12:13   ` Gerald Pfeifer
  2014-05-21 13:31 ` [PATCH 7/7] Plug ipa-prop escape analysis into gimple_call_arg_flags Martin Jambor
  2014-05-21 13:31 ` [PATCH 5/7] Advanced aggregate jump function construction Martin Jambor
  6 siblings, 2 replies; 29+ messages in thread
From: Martin Jambor @ 2014-05-21 13:31 UTC (permalink / raw)
  To: GCC Patches; +Cc: Jan Hubicka

[-- Attachment #1: doc_missing_params.diff --]
[-- Type: text/plain, Size: 2101 bytes --]

Hi,

in his review of the next patch of this series, Honza pointed out that
the path did not include documentation of a parameter it was adding.
When I set out to correct this I found out I had already been guilty
of not adding documentation of four other parameters before.

I'd like to correct this by this separate patch which I'd like to
propose also for 4.9 because it is missing there as well.  Checked by
running make info.  OK for both branches?

Thanks,

Martin


2014-05-16  Martin Jambor  <mjambor@suse.cz>

	* doc/invoke.texi (Optimize Options): Document parameters
	ipa-cp-eval-threshold, ipa-max-agg-items, ipa-cp-loop-hint-bonus and
	ipa-cp-array-index-hint-bonus.

Index: src/gcc/doc/invoke.texi
===================================================================
--- src.orig/gcc/doc/invoke.texi
+++ src/gcc/doc/invoke.texi
@@ -10076,6 +10076,28 @@ parameter in order to propagate them and
 @option{ipa-cp-value-list-size} is the maximum number of values and types it
 stores per one formal parameter of a function.
 
+@item ipa-cp-eval-threshold
+IPA-CP calculates its own score of cloning profitability heuristics
+and performs those cloning opportunities with scores that exceed
+@option{ipa-cp-eval-threshold}.
+
+@item ipa-max-agg-items
+IPA-CP is also capable to propagate a number of scalar values passed
+in an aggregate. @option{ipa-max-agg-items} controls the maximum
+number of such values per one parameter.
+
+@item ipa-cp-loop-hint-bonus
+When IPA-CP determines that a cloning candidate would make the number
+of iterations of a loop known, it adds a bonus of
+@option{ipa-cp-loop-hint-bonus} bonus to the profitability score of
+the candidate.
+
+@item ipa-cp-array-index-hint-bonus
+When IPA-CP determines that a cloning candidate would make the index of
+an array access known, it adds a bonus of
+@option{ipa-cp-array-index-hint-bonus} bonus to the profitability
+score of the candidate.
+
 @item lto-partitions
 Specify desired number of partitions produced during WHOPR compilation.
 The number of partitions should exceed the number of CPUs used for compilation.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH 7/7] Plug ipa-prop escape analysis into gimple_call_arg_flags
  2014-05-21 13:31 [PATCH 0/7] ipa-prop escape analysis Martin Jambor
                   ` (4 preceding siblings ...)
  2014-05-21 13:31 ` [PATCH 1/7] Add missing documentation of four IPA-CP params Martin Jambor
@ 2014-05-21 13:31 ` Martin Jambor
  2014-05-21 14:27   ` Richard Biener
  2014-05-21 13:31 ` [PATCH 5/7] Advanced aggregate jump function construction Martin Jambor
  6 siblings, 1 reply; 29+ messages in thread
From: Martin Jambor @ 2014-05-21 13:31 UTC (permalink / raw)
  To: GCC Patches; +Cc: Jan Hubicka

[-- Attachment #1: plug_ipa_escape_into_pta.diff --]
[-- Type: text/plain, Size: 2782 bytes --]

Hi,

this demonstrates how results of ipa-prop escape analysis from
previous patches can be used at a later stage of compilation by
directly returning them from gimple_call_arg_flags which currently
relies on fnspec annotations.

Bootstrapped and tested on x86_64-linux and also passes LTO bootstrap.
I have only had a brief look at behavior of this in SPEC 2006 and for
example in astar 1.19% of invocations of gimple_call_arg_flags return
noescape where we previously never did and in calculix this increases
from 15.62% (from annotations) to 18.14%.  Noclobber flag is reported
far less often still but for example in gamess that number raises from
5.21% to 7.66%.

Thanks,

Martin


2014-04-30  Martin Jambor  <mjambor@suse.cz>

	* gimple.c: Include cgraph.h.
	(gimple_call_arg_flags): Also query bitmaps in cgraph_node.

Index: src/gcc/gimple.c
===================================================================
--- src.orig/gcc/gimple.c
+++ src/gcc/gimple.c
@@ -47,7 +47,7 @@ along with GCC; see the file COPYING3.
 #include "demangle.h"
 #include "langhooks.h"
 #include "bitmap.h"
-
+#include "cgraph.h"
 
 /* All the tuples have their operand vector (if present) at the very bottom
    of the structure.  Therefore, the offset required to find the
@@ -1349,32 +1349,50 @@ int
 gimple_call_arg_flags (const_gimple stmt, unsigned arg)
 {
   tree attr = gimple_call_fnspec (stmt);
+  int ret;
 
-  if (!attr || 1 + arg >= (unsigned) TREE_STRING_LENGTH (attr))
-    return 0;
-
-  switch (TREE_STRING_POINTER (attr)[1 + arg])
+  if (attr && 1 + arg < (unsigned) TREE_STRING_LENGTH (attr))
     {
-    case 'x':
-    case 'X':
-      return EAF_UNUSED;
-
-    case 'R':
-      return EAF_DIRECT | EAF_NOCLOBBER | EAF_NOESCAPE;
-
-    case 'r':
-      return EAF_NOCLOBBER | EAF_NOESCAPE;
-
-    case 'W':
-      return EAF_DIRECT | EAF_NOESCAPE;
-
-    case 'w':
-      return EAF_NOESCAPE;
+      switch (TREE_STRING_POINTER (attr)[1 + arg])
+	{
+	case 'x':
+	case 'X':
+	  ret = EAF_UNUSED;
+	  break;
+	case 'R':
+	  ret = EAF_DIRECT | EAF_NOCLOBBER | EAF_NOESCAPE;
+	  break;
+	case 'r':
+	  ret = EAF_NOCLOBBER | EAF_NOESCAPE;
+	  break;
+	case 'W':
+	  ret = EAF_DIRECT | EAF_NOESCAPE;
+	  break;
+	case 'w':
+	  ret = EAF_NOESCAPE;
+	  break;
+	case '.':
+	default:
+	  ret = 0;
+	}
+    }
+  else
+    ret = 0;
 
-    case '.':
-    default:
-      return 0;
+  tree callee_decl = gimple_call_fndecl (stmt);
+  if (callee_decl)
+    {
+      cgraph_node *callee_node = cgraph_get_node (callee_decl);
+      if (callee_node)
+	{
+	  if (cgraph_param_noescape_p (callee_node, arg))
+	    ret |= EAF_NOESCAPE;
+	  if (cgraph_param_noclobber_p (callee_node, arg))
+	    ret |= EAF_NOCLOBBER;
+	}
     }
+
+  return ret;
 }
 
 /* Detects return flags for the call STMT.  */

^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH 5/7] Advanced aggregate jump function construction
  2014-05-21 13:31 [PATCH 0/7] ipa-prop escape analysis Martin Jambor
                   ` (5 preceding siblings ...)
  2014-05-21 13:31 ` [PATCH 7/7] Plug ipa-prop escape analysis into gimple_call_arg_flags Martin Jambor
@ 2014-05-21 13:31 ` Martin Jambor
  6 siblings, 0 replies; 29+ messages in thread
From: Martin Jambor @ 2014-05-21 13:31 UTC (permalink / raw)
  To: GCC Patches; +Cc: Jan Hubicka

[-- Attachment #1: advanced_agg_jfunc_construction.diff --]
[-- Type: text/plain, Size: 35877 bytes --]

Hi,

this patch bring about more sophisticated building of aggregate jump
functions under the optimistic assumption the aggregate will not
escape.  If it is then discovered to escape, the appropriate jump
functions are invalidated at IPA time before anything else can see
them.  Invalidating is done by storing NULL_TREE value, some other
functions had to be made aware of the possibility.

The implementation is not surprising, the patch keeps information
about aggregate contents at the beginning of each BB and deltas from
there to each call and BB end.  Merging information from different BB
predecessors and applying deltas is very simplistic and replaced in
the subsequent patch.

The only thing which could be perhaps unexpected is that whether an
aggregate jump function requires non-escaped-ness is stored on item by
item bases in order not to regress.  This way, all stores to one
particular aggregate performed immediately before the call will result
in a valid jump function regardless of whether the aggregate somehow
escapes - exactly the previous behavior.

Bootstrapped and tested on x86_64-linux, also passes LTO-bootstrap and
I have successfully built Firefox with LTO with it.  I'm sure there
will be comments but eventually I'd like to commit this to trunk.

Thanks,

Martin


2014-02-18  Martin Jambor  <mjambor@suse.cz>

	* ipa-prop.h (ipa_agg_jf_item): New fields size and only_unescaped.
	* ipa-prop.c (ipa_known_agg_contents_list): Moved up, new fields
	counter and only_unescaped.
	(AGG_CONTENTS_TOP): New macro.
	(dump_agg_contents_list): New function.
	(debug_agg_contents_list): Likewise.
	(ipa_bb_info): New fields begin_agg_cnt, agg_deltas and queued.
	(ipa_escape): New field decl_p.
	(func_body_info): New fields call_agg_deltas_map agg_contents_pool and
	worklist.
	(ipa_print_node_jump_functions_for_edge): Expect NULL aggregate
	values, also dump only_unescaped flag.
	(build_agg_jump_func_from_list): New parameter honor_unescaped_flag,
	also fill in new fields only_unescaped and size.  Assert that the
	actual size does not exceed precision of size field.
	(determine_locally_known_aggregate_parts): Fill in new field
	only_unescaped, pass true honor_unescaped_flag.
	(apply_agg_contents_deltas): New function.
	(ipa_compute_jump_functions_for_edge): Build aggregate jump functions
	from global information.
	(ipa_analyze_stmt_uses): Removed, functionality integrated to
	ipa_analyze_bb_statements.
	(present_tracked_refs_p): New function.
	(escape_all_tracked_references): Likewise.
	(copy_aggg_deltas_to_cg_edge): Likewise.
	(update_agg_deltas_for_stmt): Likewise.
	(ipa_analyze_bb_statements): Create BB and call statement aggregate
	deltas.
	(free_ipa_bb_info): Also free aggregate contents vectors.
	(analysis_dom_walker): Put into anonymous namespace.  Do not compute
	jump functions.
	(jfunc_builder_dom_walker): New class.
	(create_escape_structures): Initialize call_agg_deltas_map,
	agg_contents_pool and worklist fields.
	(free_escape_structures): Deallocate the same three data structures.
	(merge_agg_contents): New function.
	(propagate_agg_cnts_through_bb): Likewise.
	(propagate_agg_contents_accross_bbs): Likewise.
	(ipa_verify_escaped_flags_match): Likewise.
	(ipa_analyze_node): Propagate aggregate contents accross the function
	and only then build jump functions.
	(kill_invalid_escaped_agg_jfs): New function.
	(ipa_spread_escapes): Call kill_invalid_escaped_agg_jfs on all call
	graph edges.
	(ipa_find_agg_cst_for_param): Ignore NULL values.
	(ipa_write_jump_function): Stream new fields size and only_unescaped.
	(ipa_read_jump_function): Likewise.
	(ipcp_transform_function): Initialize new func_body_info fields.
	* ipa-cp.c (propagate_aggs_accross_jump_function): Use size directly
	from the aggregate jump function, check it fits the constant.  Ignore
	NULL values.

testsuite/
	* gcc.dg/ipa/ipcp-agg-11.c: New test.
	* gcc.dg/ipa/ipcp-agg-12.c: Likewise.
	* gcc.dg/ipa/ipcp-agg-13.c: Likewise.
	* gcc.dg/ipa/ipcp-agg-14.c: Likewise.
	* g++.dg/ipa/devirt-24.C: Bump scan-times to two times.


Index: src/gcc/ipa-prop.c
===================================================================
--- src.orig/gcc/ipa-prop.c
+++ src/gcc/ipa-prop.c
@@ -83,6 +83,82 @@ struct param_aa_status
   bool parm_modified, ref_modified, pt_modified;
 };
 
+/* Simple linked list describing known contents of an aggregate.  */
+
+struct ipa_known_agg_contents_list
+{
+  /* Offset and size of the described part of the aggregate.  */
+  HOST_WIDE_INT offset, size;
+  /* Known constant value or NULL if the contents is known to be unknown.  */
+  tree constant;
+  /* Pointer to the next structure in the list.  */
+  struct ipa_known_agg_contents_list *next;
+
+  /* In chains describing BBs, this field is a BB local counter of
+     possibly-clobbering statements.  */
+  unsigned counter : 31;
+
+  /* In chains describing call-deltas, this is set if the contents is only
+     valid if the memory referenced is not escaped.  */
+  unsigned only_unescaped : 1;
+};
+
+/* Special value for an aggregate description during the phase propagating them
+   over BBs.  */
+
+#define AGG_CONTENTS_TOP ((struct ipa_known_agg_contents_list *) -1)
+
+/* Dump list P into file F in human readable form begining each new line with
+   INDENT spaces.  */
+
+static void
+dump_agg_contents_list (FILE *f, struct ipa_known_agg_contents_list *p,
+			int indent)
+{
+  if (p == AGG_CONTENTS_TOP)
+    {
+      for (int i = 0; i < indent; ++i)
+	fputc (' ', f);
+      fprintf (f, "AGG_CONTENTS_TOP\n");
+      return;
+    }
+
+  if (!p)
+    {
+      for (int i = 0; i < indent; ++i)
+	fputc (' ', f);
+      fprintf (f, "EMPTY\n");
+      return;
+    }
+
+  while (p)
+    {
+      for (int i = 0; i < indent; ++i)
+	fputc (' ', f);
+      fprintf (f, "offset: " HOST_WIDE_INT_PRINT_DEC
+	       " size: " HOST_WIDE_INT_PRINT_DEC " ", p->offset, p->size);
+      if (p->constant)
+	{
+	  fprintf (f, "cst: ");
+	  print_generic_expr (f, p->constant, 0);
+	  fprintf (f, " (counter: %u, only_unesc: %u)\n",
+		   p->counter, p->only_unescaped);
+	}
+      else
+	fprintf (f, "VARIABLE\n");
+      p = p->next;
+    }
+}
+
+/* Dump P to stderr in human readable format.  */
+
+DEBUG_FUNCTION void
+debug_agg_contents_list (struct ipa_known_agg_contents_list *p)
+{
+  dump_agg_contents_list (stderr, p, 0);
+}
+
+
 /* Information related to a given BB that used only when looking at function
    body.  */
 
@@ -92,6 +168,15 @@ struct ipa_bb_info
   vec<cgraph_edge_p> cg_edges;
   /* Alias analysis statuses of each formal parameter at this bb.  */
   vec<param_aa_status> param_aa_statuses;
+
+  /* Aggregate contents of tracked references at the beginning of each BB.  */
+  vec<ipa_known_agg_contents_list *> begin_agg_cnt;
+
+  /* Changes in aggregate contents of tracked references.  */
+  vec<ipa_known_agg_contents_list *> agg_deltas;
+
+  /* Set when the associated BB is already present in the work list.  */
+  bool queued;
 };
 
 /* Structure used for intra-procedural escape analysis (and associated
@@ -119,6 +204,10 @@ struct ipa_escape
      one.  Zero means this structure will remain unused.  */
   int result_index;
 
+  /* Set when this structure represents a declaration rather than memory
+     pointed at by an SSA_NAME.  */
+  bool decl_p;
+
   /* True if we have already dealt with this SSA name.  Valid even if target is
      non-NULL.  */
   bool analyzed;
@@ -168,6 +257,15 @@ struct func_body_info
   /* Mapping from VAR_DECLS to escape information.  */
   pointer_map <ipa_escape *> *decl_escapes;
 
+  /* Mapping from call statements to aggregate content deltas.  */
+  pointer_map <vec <ipa_known_agg_contents_list *> > *call_agg_deltas_map;
+
+  /* Allocation pool for instances of ipa_known_agg_contents_list.  */
+  alloc_pool agg_contents_pool;
+
+  /* Propagation work list of basic blocks.  */
+  vec <basic_block> worklist;
+
   /* Number of parameters.  */
   int param_count;
 
@@ -423,6 +521,9 @@ ipa_print_node_jump_functions_for_edge (
 		   jump_func->agg.by_ref ? "reference" : "value");
 	  FOR_EACH_VEC_SAFE_ELT (jump_func->agg.items, j, item)
 	    {
+	      if (!item->value)
+		continue;
+
 	      fprintf (f, "           offset: " HOST_WIDE_INT_PRINT_DEC ", ",
 		       item->offset);
 	      if (TYPE_P (item->value))
@@ -433,6 +534,8 @@ ipa_print_node_jump_functions_for_edge (
 		  fprintf (f, "cst: ");
 		  print_generic_expr (f, item->value, 0);
 		}
+	      if (item->only_unescaped)
+		fprintf (f, " only_unescaped");
 	      fprintf (f, "\n");
 	    }
 	}
@@ -1666,19 +1769,6 @@ get_ssa_def_if_simple_copy (tree rhs)
   return rhs;
 }
 
-/* Simple linked list, describing known contents of an aggregate beforere
-   call.  */
-
-struct ipa_known_agg_contents_list
-{
-  /* Offset and size of the described part of the aggregate.  */
-  HOST_WIDE_INT offset, size;
-  /* Known constant value or NULL if the contents is known to be unknown.  */
-  tree constant;
-  /* Pointer to the next structure in the list.  */
-  struct ipa_known_agg_contents_list *next;
-};
-
 /* Find the proper place in linked list of ipa_known_agg_contents_list
    structures where to put a new one with the given LHS_OFFSET and LHS_SIZE,
    unless there is a partial overlap, in which case return NULL, or such
@@ -1729,7 +1819,11 @@ build_agg_jump_func_from_list (struct ip
 	  struct ipa_agg_jf_item item;
 	  item.offset = list->offset - arg_offset;
 	  gcc_assert ((item.offset % BITS_PER_UNIT) == 0);
+	  gcc_checking_assert (list->size
+			       == (HOST_WIDE_INT) (unsigned) list->size);
+	  item.size = (unsigned) list->size;
 	  item.value = unshare_expr_without_location (list->constant);
+	  item.only_unescaped = list->only_unescaped;
 	  jfunc->agg.items->quick_push (item);
 	}
       list = list->next;
@@ -1871,6 +1965,7 @@ determine_locally_known_aggregate_parts
       n = XALLOCA (struct ipa_known_agg_contents_list);
       n->size = lhs_size;
       n->offset = lhs_offset;
+      n->only_unescaped = false;
       if (is_gimple_ip_invariant (rhs))
 	{
 	  n->constant = rhs;
@@ -1898,6 +1993,28 @@ determine_locally_known_aggregate_parts
     }
 }
 
+/* Apply basic block DELTAS to INITial aggregate contents description.  */
+
+static struct ipa_known_agg_contents_list *
+apply_agg_contents_deltas (struct ipa_known_agg_contents_list *init,
+			   struct ipa_known_agg_contents_list *deltas)
+{
+  /* TODO: This over-conservative but should work for Fortran descriptors.
+     Will be replaced in a subsequent patches with real merging.  */
+
+  gcc_assert (init != AGG_CONTENTS_TOP);
+  if (deltas)
+    return deltas;
+  else
+    {
+#ifdef ENABLE_CHECKING
+      for (struct ipa_known_agg_contents_list *p = init; p; p = p->next)
+	gcc_assert (p->only_unescaped);
+#endif
+      return init;
+    }
+}
+
 static tree
 ipa_get_callee_param_type (struct cgraph_edge *e, int i)
 {
@@ -2103,19 +2220,30 @@ ipa_compute_jump_functions_for_edge (str
       if (!param_type)
 	param_type = TREE_TYPE (arg);
 
-      HOST_WIDE_INT dummy_offset;
-      struct ipa_escape *esc = get_escape_for_value (fbi, arg, &dummy_offset);
+      HOST_WIDE_INT arg_offset;
+      struct ipa_escape *esc = get_escape_for_value (fbi, arg, &arg_offset);
       int ref_index;
-      if (esc && valid_escape_result_index (esc, &ref_index))
+      if (esc && !esc->escaped && valid_escape_result_index (esc, &ref_index))
 	{
+	  bool build_agg_jfs;
 	  if (jfunc->type == IPA_JF_UNKNOWN)
-	    ipa_set_jf_unknown_ref_index (jfunc, ref_index);
+	    {
+	      ipa_set_jf_unknown_ref_index (jfunc, ref_index);
+	      build_agg_jfs = true;
+	    }
 	  else if (jfunc->type == IPA_JF_KNOWN_TYPE)
-	    ipa_set_jf_known_type_ref_index (jfunc, ref_index);
+	    {
+	      ipa_set_jf_known_type_ref_index (jfunc, ref_index);
+	      build_agg_jfs = true;
+	    }
 	  else if (jfunc->type == IPA_JF_CONST)
-	    ipa_set_jf_constant_ref_index (jfunc, ref_index);
+	    {
+	      ipa_set_jf_constant_ref_index (jfunc, ref_index);
+	      build_agg_jfs = true;
+	    }
 	  else
 	    {
+	      build_agg_jfs = false;
 	      gcc_checking_assert
 		(jfunc->type != IPA_JF_PASS_THROUGH
 		 || ipa_get_jf_pass_through_formal_id (jfunc) == ref_index);
@@ -2123,14 +2251,39 @@ ipa_compute_jump_functions_for_edge (str
 		(jfunc->type != IPA_JF_ANCESTOR
 		 || ipa_get_jf_ancestor_formal_id (jfunc) == ref_index);
 	    }
-	}
 
-      /* TODO: We should allow aggregate jump functions even for these types of
-	 jump functions but we need to be able to combine them first.  */
-      if (jfunc->type != IPA_JF_PASS_THROUGH
-	  && jfunc->type != IPA_JF_ANCESTOR
-	  && (AGGREGATE_TYPE_P (TREE_TYPE (arg))
-	      || POINTER_TYPE_P (param_type)))
+	  /* TODO: We should allow aggregate jump functions even for other
+	     types of jump functions but we need to be able to combine them
+	     first.  */
+	  if (build_agg_jfs)
+	    {
+	      vec <ipa_known_agg_contents_list *> *dvec;
+	      dvec = fbi->call_agg_deltas_map->contains (cs->call_stmt);
+	      if (dvec)
+		{
+		  struct ipa_bb_info *bi;
+		  bi = ipa_get_bb_info (fbi, gimple_bb (cs->call_stmt));
+		  struct ipa_known_agg_contents_list *begin, *final, *p;
+		  begin = bi->begin_agg_cnt[ref_index];
+		  final = apply_agg_contents_deltas (begin, (*dvec)[n]);
+
+		  int const_count = 0;
+		  for (p = final; p; p = p->next)
+		    if (p->constant)
+		      const_count++;
+		  if (const_count)
+		    {
+		      jfunc->agg.by_ref = true;
+		      build_agg_jump_func_from_list (final, const_count,
+						     arg_offset, jfunc);
+		    }
+		}
+	    }
+	}
+      else if (jfunc->type != IPA_JF_PASS_THROUGH
+	       && jfunc->type != IPA_JF_ANCESTOR
+	       && (AGGREGATE_TYPE_P (TREE_TYPE (arg))
+		   || POINTER_TYPE_P (param_type)))
 	determine_locally_known_aggregate_parts (call, arg, param_type, jfunc);
     }
 }
@@ -2520,18 +2673,6 @@ ipa_analyze_call_uses (struct func_body_
     ipa_analyze_virtual_call_uses (fbi, call, target);
 }
 
-
-/* Analyze the call statement STMT with respect to formal parameters (described
-   in INFO) of caller given by FBI->NODE.  Currently it only checks whether
-   formal parameters are called.  */
-
-static void
-ipa_analyze_stmt_uses (struct func_body_info *fbi, gimple stmt)
-{
-  if (is_gimple_call (stmt))
-    ipa_analyze_call_uses (fbi, stmt);
-}
-
 /* Callback of walk_stmt_load_store_addr_ops.  If OP is a parameter
    declaration, mark it as used in the info structure passed in DATA.  */
 
@@ -2552,6 +2693,186 @@ visit_ref_mark_it_used (gimple, tree op,
   return false;
 }
 
+/* Return true if there are any  tracked references.  */
+
+static bool
+present_tracked_refs_p (struct func_body_info *fbi)
+{
+  return ipa_get_tracked_refs_count (fbi->info) != 0;
+}
+
+/* Mark all tracked references as escaped.  */
+
+static void
+escape_all_tracked_references (struct func_body_info *fbi)
+{
+  for (int i = 0; i < ipa_get_tracked_refs_count (fbi->info); ++i)
+    ipa_set_ref_escaped (fbi->info, i, 1);
+  unsigned int ui;
+  struct ipa_escape *esc;
+  FOR_EACH_VEC_ELT (fbi->escapes, ui, esc)
+    esc->escaped = true;
+}
+
+/* Copy aggregate deltas gathered at this point to the
+   FBI->call_agg_deltas_map.  FBI describes the current function and BB the
+   current basic block.  CUR_COUNTER is the current value of a BB-local
+   possibly clobbering statements counter.  */
+
+static void
+copy_aggg_deltas_to_cg_edge (struct func_body_info *fbi,
+			     struct ipa_bb_info *bi,
+			     unsigned cur_counter,
+			     gimple call)
+{
+  vec<ipa_known_agg_contents_list *> deltas = vNULL;
+  int arg_num = gimple_call_num_args (call);
+
+  for (int i = 0; i < arg_num; ++i)
+    {
+      ipa_known_agg_contents_list *list = NULL;
+      HOST_WIDE_INT dummy_offset;
+      int ri;
+      tree arg = gimple_call_arg (call, i);
+      struct ipa_escape *esc = get_escape_for_value (fbi, arg, &dummy_offset);
+      if (esc && !esc->escaped
+	  && valid_escape_result_index (esc, &ri)
+	  && !bi->agg_deltas.is_empty ())
+	{
+	  ipa_known_agg_contents_list *o, **p = &list;
+	  for (o = bi->agg_deltas[ri]; o; o = o->next)
+	    {
+	      struct ipa_known_agg_contents_list *n;
+	      n = (struct ipa_known_agg_contents_list *)
+		pool_alloc (fbi->agg_contents_pool);
+	      n->offset = o->offset;
+	      n->size = o->size;
+	      n->constant = o->constant;
+	      n->only_unescaped = o->counter != cur_counter;
+	      n->next = NULL;
+	      *p = n;
+	      p = &n->next;
+	    }
+	}
+      deltas.safe_push (list);
+    }
+  *fbi->call_agg_deltas_map->insert (call) = deltas;
+}
+
+/* Update current aggregate offset deltas.  Return true if the statement might
+   have modified also an escaped memory reference.  */
+
+static bool
+update_agg_deltas_for_stmt (struct func_body_info *fbi,
+			    struct ipa_bb_info *bi,
+			    int *cnt_desc_counts,
+			    unsigned counter, gimple stmt)
+{
+  tree lhs, rhs;
+  HOST_WIDE_INT offset, size, max_size;
+  bool res;
+
+  if (!gimple_vdef (stmt) || !present_tracked_refs_p (fbi))
+    return false;
+
+  if (gimple_assign_single_p (stmt))
+    {
+      lhs = gimple_assign_lhs (stmt);
+      rhs = gimple_assign_rhs1 (stmt);
+      res = false;
+    }
+  else if (is_gimple_call (stmt))
+    {
+      lhs = gimple_call_lhs (stmt);
+      if (!lhs)
+	return true;
+      rhs = NULL_TREE;
+      res = true;
+    }
+  else
+    {
+      if (dump_file)
+	{
+	  fprintf (dump_file, "Marking all tracked references in %s/%i "
+		   "because of statement: \n", fbi->node->name (),
+		   fbi->node->order);
+	  print_gimple_stmt (dump_file, stmt, 2, TDF_SLIM);
+	  fprintf (dump_file, "\n");
+	}
+      escape_all_tracked_references (fbi);
+      return true;
+    }
+
+  struct ipa_escape *esc = get_escape_for_ref (fbi, lhs, &offset, &size,
+					       &max_size);
+
+  int ri;
+  if (!esc || esc->escaped || !valid_escape_result_index (esc, &ri))
+    return true;
+
+  gcc_checking_assert (ri < ipa_get_tracked_refs_count (fbi->info));
+  if (max_size < 0 || size < 0
+      || TREE_CODE (lhs) == BIT_FIELD_REF
+      || contains_bitfld_component_ref_p (lhs))
+    {
+      /* TODO: Eventually this should only be a clobber on paths through the
+	 BB.  */
+      ipa_set_ref_escaped (fbi->info, ri, 1);
+      esc->escaped = true;
+      return true;
+    }
+
+  ipa_set_ref_clobbered (fbi->info, ri, 1);
+  if (bi->agg_deltas.is_empty ())
+    bi->agg_deltas.safe_grow_cleared (ipa_get_tracked_refs_count (fbi->info));
+
+  struct ipa_known_agg_contents_list *n, **p, **list = &bi->agg_deltas[ri];
+  bool already_there = false;
+  p = get_place_in_agg_contents_list (list, offset, max_size, &already_there);
+  if (!p)
+    {
+      /* TODO: Eventually this should only be a clobber on paths through the
+	 BB.  */
+      ipa_set_ref_escaped (fbi->info, ri, 1);
+      esc->escaped = true;
+      return true;
+    }
+
+  if (already_there)
+    n = *p;
+  else
+    {
+      cnt_desc_counts[ri]++;
+      if (cnt_desc_counts[ri] > PARAM_VALUE (PARAM_IPA_MAX_AGG_ITEMS))
+	{
+	  /* TODO: Eventually this should only be a clobber on paths
+	     through the BB.  */
+	  ipa_set_ref_escaped (fbi->info, ri, 1);
+	  esc->escaped = true;
+	  return true;
+	}
+
+      n = (struct ipa_known_agg_contents_list *)
+	pool_alloc (fbi->agg_contents_pool);
+      n->size = max_size;
+      n->offset = offset;
+      n->next = *p;
+      *p = n;
+    }
+
+  if (rhs
+      && size == max_size
+      && is_gimple_reg_type (TREE_TYPE (rhs))
+      && is_gimple_ip_invariant (rhs))
+    n->constant = rhs;
+  else
+    n->constant = NULL_TREE;
+
+  n->counter = counter;
+  n->only_unescaped = true;
+  return res && esc->decl_p;
+}
+
 /* Scan the statements in BB and inspect the uses of formal parameters, escape
    analysis and so on.  FBI holds various data about the function being
    analyzed.  */
@@ -2559,6 +2880,24 @@ visit_ref_mark_it_used (gimple, tree op,
 static void
 ipa_analyze_bb_statements (struct func_body_info *fbi, basic_block bb)
 {
+  struct ipa_bb_info *bi = ipa_get_bb_info (fbi, bb);
+  unsigned clobbering_counter = 0;
+  int *cnt_desc_counts;
+
+  if (present_tracked_refs_p (fbi))
+    {
+      cnt_desc_counts = XALLOCAVEC (int,
+				    ipa_get_tracked_refs_count (fbi->info));
+      memset (cnt_desc_counts, 0,
+	      sizeof (int) * ipa_get_tracked_refs_count (fbi->info));
+
+      bi->begin_agg_cnt.safe_grow (ipa_get_tracked_refs_count (fbi->info));
+      for (int i = 0; i < ipa_get_tracked_refs_count (fbi->info); ++i)
+	bi->begin_agg_cnt[i] = AGG_CONTENTS_TOP;
+    }
+  else
+    cnt_desc_counts = NULL;
+
   gimple_stmt_iterator gsi;
   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
     {
@@ -2567,11 +2906,21 @@ ipa_analyze_bb_statements (struct func_b
       if (is_gimple_debug (stmt))
 	continue;
 
-      ipa_analyze_stmt_uses (fbi, stmt);
+      if (is_gimple_call (stmt))
+	{
+	  ipa_analyze_call_uses (fbi, stmt);
+	  if (present_tracked_refs_p (fbi))
+	    copy_aggg_deltas_to_cg_edge (fbi, bi, clobbering_counter, stmt);
+	}
+
+      if (update_agg_deltas_for_stmt (fbi, bi, cnt_desc_counts,
+				      clobbering_counter, stmt))
+	clobbering_counter++;
       walk_stmt_load_store_addr_ops (stmt, fbi->info,
 				     visit_ref_mark_it_used,
 				     visit_ref_mark_it_used,
 				     visit_ref_mark_it_used);
+
     }
   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
     walk_stmt_load_store_addr_ops (gsi_stmt (gsi), fbi->info,
@@ -2632,6 +2981,8 @@ free_ipa_bb_info (struct ipa_bb_info *bi
 {
   bi->cg_edges.release ();
   bi->param_aa_statuses.release ();
+  bi->begin_agg_cnt.release ();
+  bi->agg_deltas.release ();
 }
 
 /* Dominator walker driving the analysis.  */
@@ -2652,6 +3003,25 @@ void
 analysis_dom_walker::before_dom_children (basic_block bb)
 {
   ipa_analyze_bb_statements (m_fbi, bb);
+}
+
+/* Dominator walker driving jump_function creation.  */
+
+class jfunc_builder_dom_walker : public dom_walker
+{
+public:
+  jfunc_builder_dom_walker (struct func_body_info *fbi)
+    : dom_walker (CDI_DOMINATORS), m_fbi (fbi) {}
+
+  virtual void before_dom_children (basic_block);
+
+private:
+  struct func_body_info *m_fbi;
+};
+
+void
+jfunc_builder_dom_walker::before_dom_children (basic_block bb)
+{
   ipa_compute_jump_functions_for_bb (m_fbi, bb);
 }
 
@@ -2685,6 +3055,7 @@ static void
 analyze_ssa_escape (struct func_body_info *fbi, tree ssa,
 		    struct ipa_escape *esc)
 {
+  gcc_assert (fbi->info->ref_descs.is_empty ());
   esc->analyzed = true;
   if (!POINTER_TYPE_P (TREE_TYPE (ssa)))
     {
@@ -2826,7 +3197,7 @@ analyze_all_ssa_escapes (struct func_bod
 	continue;
       struct ipa_escape *esc = &fbi->escapes[SSA_NAME_VERSION (ssa)];
       if (esc->analyzed)
-	return;
+	continue;
       analyze_ssa_escape (fbi, ssa, esc);
     }
 }
@@ -2863,6 +3234,15 @@ create_escape_structures (struct func_bo
   FOR_EACH_LOCAL_DECL (fbi->func, i, var)
     if (TREE_CODE (var) == VAR_DECL && TREE_ADDRESSABLE (var))
       *fbi->decl_escapes->insert (var) = &fbi->escapes[var_idx++];
+
+  for (unsigned j = SSANAMES (fbi->func)->length (); j < var_idx; ++j)
+    fbi->escapes[j].decl_p = true;
+
+  fbi->call_agg_deltas_map
+    = new pointer_map <vec <ipa_known_agg_contents_list *> >;
+  fbi->agg_contents_pool = create_alloc_pool ("IPA-PROP agg contents pool",
+			    sizeof (struct ipa_known_agg_contents_list),32);
+  fbi->worklist = vNULL;
 }
 
 /* Free escape analysis structures in the FBI.  */
@@ -2872,6 +3252,10 @@ free_escape_structures (struct func_body
 {
   fbi->escapes.release ();
   delete fbi->decl_escapes;
+  delete fbi->call_agg_deltas_map;
+  free_alloc_pool (fbi->agg_contents_pool);
+  gcc_checking_assert (fbi->worklist.is_empty ());
+  fbi->worklist.release ();
 }
 
 /* Go over call argument of CS and if any warrants a result_index for an escape
@@ -2929,6 +3313,113 @@ gather_picked_escapes (struct func_body_
     }
 }
 
+/* Merge aggregate contents FINAL with those in *TARGET.  Return true if those
+   in *TARGET have changed.  */
+
+static bool
+merge_agg_contents (struct ipa_known_agg_contents_list *final,
+		    struct ipa_known_agg_contents_list **target)
+{
+  /* TODO: This over-conservative but should work for Fortran descriptors.
+     Will be replaced in a subsequent patches by real merging.  */
+  if (*target == AGG_CONTENTS_TOP)
+    {
+      *target = final;
+      return true;
+    }
+  else if (*target != final)
+    {
+      if (*target)
+	{
+	  *target = NULL;
+	  return true;
+	}
+      else
+	return false;
+    }
+  return false;
+}
+
+/* Apply all computed aggregate deltas for the given BB and merge results into
+   corresponding aggregate contents description at the beginning of successors.
+   Enqueue all successors with changed information.  */
+
+static void
+propagate_agg_cnts_through_bb (struct func_body_info *fbi,
+			       basic_block bb, ipa_bb_info *bi)
+{
+  for (int i = 0; i < ipa_get_tracked_refs_count (fbi->info); ++i)
+  {
+    struct ipa_known_agg_contents_list *deltas, *final;
+    if (bi->agg_deltas.is_empty ())
+      deltas = NULL;
+    else
+      deltas = bi->agg_deltas[i];
+
+    final = apply_agg_contents_deltas (bi->begin_agg_cnt[i], deltas);
+
+    edge e;
+    edge_iterator ei;
+    FOR_EACH_EDGE (e, ei, bb->succs)
+      {
+	basic_block succ = e->dest;
+
+	if (succ->index == EXIT_BLOCK)
+	  continue;
+
+	ipa_bb_info *succ_info = ipa_get_bb_info (fbi, succ);
+
+	gcc_checking_assert (succ_info->begin_agg_cnt.length ()
+			     >= (unsigned) i);
+	if (merge_agg_contents (final, &succ_info->begin_agg_cnt[i])
+	    && !succ_info->queued)
+	  {
+	    succ_info->queued = true;
+	    fbi->worklist.safe_push (succ);
+	  }
+      }
+  }
+}
+
+/* Global propagation of aggregate contents accross all BBs. */
+
+static void
+propagate_agg_contents_accross_bbs (struct func_body_info *fbi)
+{
+  if (!present_tracked_refs_p (fbi))
+    return;
+
+  basic_block first = single_succ (ENTRY_BLOCK_PTR_FOR_FN (fbi->func));
+  struct ipa_bb_info *first_bi = ipa_get_bb_info (fbi, first);
+  for (int i = 0; i < ipa_get_tracked_refs_count (fbi->info); ++i)
+    first_bi->begin_agg_cnt[i] = NULL;
+  propagate_agg_cnts_through_bb (fbi, first, first_bi);
+
+  while (!fbi->worklist.is_empty ())
+    {
+      basic_block bb = fbi->worklist.pop ();
+      struct ipa_bb_info *bi = ipa_get_bb_info (fbi, bb);
+      bi->queued = false;
+      propagate_agg_cnts_through_bb (fbi, bb, bi);
+    }
+}
+
+/* Verify that the correspondng escaped flags in ipa_escape structures and in
+   reference descriptors match.  */
+
+DEBUG_FUNCTION void
+ipa_verify_escaped_flags_match (struct func_body_info *fbi)
+{
+  int i;
+  struct ipa_escape *esc;
+  FOR_EACH_VEC_ELT (fbi->escapes, i, esc)
+    {
+      int ridx;
+      if (valid_escape_result_index (esc, &ridx))
+	gcc_assert (esc->escaped == ipa_is_ref_escaped (fbi->info, ridx));
+    }
+}
+
 /* Initialize the array describing properties of of formal parameters
    of NODE, analyze their uses and compute jump functions associated
    with actual arguments of calls from within NODE.  */
@@ -3002,6 +3493,12 @@ ipa_analyze_node (struct cgraph_node *no
 
   gather_picked_escapes (&fbi, ri);
   analysis_dom_walker (&fbi).walk (ENTRY_BLOCK_PTR_FOR_FN (cfun));
+  propagate_agg_contents_accross_bbs (&fbi);
+  jfunc_builder_dom_walker (&fbi).walk (ENTRY_BLOCK_PTR_FOR_FN (cfun));
+
+#ifdef ENABLE_CHECKING
+  ipa_verify_escaped_flags_match (&fbi);
+#endif
 
   free_escape_structures (&fbi);
   int i;
@@ -3211,6 +3708,38 @@ spread_escapes_down (struct escape_sprea
     }
 }
 
+/* Invalidate all aggregate jump function items in all jump functions associated
+   with CS that are marked as only valid if the reference is unescaped if the
+   reference is actually escaped or found to be modified by a callee.  */
+
+static void
+kill_invalid_escaped_agg_jfs (struct ipa_node_params *info,
+			      struct cgraph_edge *cs)
+{
+  struct ipa_edge_args *args = IPA_EDGE_REF (cs);
+  int count = ipa_get_cs_argument_count (args);
+
+  for (int i = 0; i < count; i++)
+    {
+      struct ipa_jump_func *jf = ipa_get_ith_jump_func (args, i);
+      if (!jf->agg.items)
+	continue;
+
+      int origin = escape_origin_from_jfunc (jf);
+      if (origin >= 0
+	  && !ipa_is_ref_escaped (info, origin)
+	  && !ipa_is_ref_callee_clobbered (info, origin))
+	continue;
+
+      int j;
+      struct ipa_agg_jf_item *ai;
+      FOR_EACH_VEC_ELT (*jf->agg.items, j, ai)
+	if (ai->only_unescaped)
+	    ai->value = NULL_TREE;
+    }
+}
+
+
 /* Spread escape flags through jump functions accross the call graph.  */
 
 void
@@ -3275,6 +3804,13 @@ ipa_spread_escapes ()
 	    if (!ipa_is_ref_clobbered (info, i))
 	      cgraph_set_param_noclobber (node, i);
 	  }
+
+      for (struct cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
+	kill_invalid_escaped_agg_jfs (info, cs);
+      for (struct cgraph_edge *cs = node->indirect_calls;
+	   cs;
+	   cs = cs->next_callee)
+	kill_invalid_escaped_agg_jfs (info, cs);
     }
 }
 
@@ -3626,6 +4162,8 @@ ipa_find_agg_cst_for_param (struct ipa_a
   FOR_EACH_VEC_SAFE_ELT (agg->items, i, item)
     if (item->offset == offset)
       {
+	if (!item->value)
+	  return NULL;
 	/* Currently we do not have clobber values, return NULL for them once
 	   we do.  */
 	gcc_checking_assert (is_gimple_ip_invariant (item->value));
@@ -5374,6 +5912,11 @@ ipa_write_jump_function (struct output_b
     {
       streamer_write_uhwi (ob, item->offset);
       stream_write_tree (ob, item->value, true);
+      streamer_write_uhwi (ob, item->size);
+
+      bp = bitpack_create (ob->main_stream);
+      bp_pack_value (&bp, item->only_unescaped, 1);
+      streamer_write_bitpack (&bp);
     }
 }
 
@@ -5478,6 +6021,11 @@ ipa_read_jump_function (struct lto_input
       struct ipa_agg_jf_item item;
       item.offset = streamer_read_uhwi (ib);
       item.value = stream_read_tree (ib, data_in);
+      item.size = streamer_read_uhwi (ib);
+
+      struct bitpack_d bp = streamer_read_bitpack (ib);
+      item.only_unescaped = bp_unpack_value (&bp, 1);
+
       jump_func->agg.items->quick_push (item);
     }
 }
@@ -6136,6 +6684,9 @@ ipcp_transform_function (struct cgraph_n
   fbi.aa_walked = 0;
   fbi.escapes = vNULL;
   fbi.decl_escapes = NULL;
+  fbi.call_agg_deltas_map = NULL;
+  fbi.agg_contents_pool = NULL;
+  fbi.worklist = vNULL;
 
   descriptors.safe_grow_cleared (param_count);
   ipa_populate_param_decls (node, descriptors);
Index: src/gcc/ipa-cp.c
===================================================================
--- src.orig/gcc/ipa-cp.c
+++ src/gcc/ipa-cp.c
@@ -1390,17 +1390,23 @@ propagate_aggs_accross_jump_function (st
 
       FOR_EACH_VEC_ELT (*jfunc->agg.items, i, item)
 	{
-	  HOST_WIDE_INT val_size;
-
 	  if (item->offset < 0)
 	    continue;
-	  gcc_checking_assert (is_gimple_ip_invariant (item->value));
-	  val_size = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (item->value)));
 
-	  if (merge_agg_lats_step (dest_plats, item->offset, val_size,
+	  if (merge_agg_lats_step (dest_plats, item->offset,
+				   (HOST_WIDE_INT) item->size,
 				   &aglat, pre_existing, &ret))
 	    {
-	      ret |= add_value_to_lattice (*aglat, item->value, cs, NULL, 0, 0);
+	      if (item->value)
+		{
+		  gcc_checking_assert (is_gimple_ip_invariant (item->value));
+		  gcc_checking_assert (item->size == tree_to_uhwi
+				       (TYPE_SIZE (TREE_TYPE (item->value))));
+		  ret |= add_value_to_lattice (*aglat, item->value, cs, NULL,
+					       0, 0);
+		}
+	      else
+		ret |= set_lattice_contains_variable (*aglat);
 	      aglat = &(*aglat)->next;
 	    }
 	  else if (dest_plats->aggs_bottom)
Index: src/gcc/ipa-prop.h
===================================================================
--- src.orig/gcc/ipa-prop.h
+++ src/gcc/ipa-prop.h
@@ -178,8 +178,15 @@ struct GTY(()) ipa_agg_jf_item
   /* The offset at which the known value is located within the aggregate.  */
   HOST_WIDE_INT offset;
 
-  /* The known constant or type if this is a clobber.  */
+  /* The known constant or NULL if the entry has been invalidated after
+     creation or is a clobber.  */
   tree value;
+
+  /* Size of the constant.  */
+  unsigned size;
+
+  /* Set if the value is only valid if the referring pointer is unescaped.  */
+  unsigned only_unescaped : 1;
 };
 
 
Index: src/gcc/testsuite/gcc.dg/ipa/ipcp-agg-11.c
===================================================================
--- /dev/null
+++ src/gcc/testsuite/gcc.dg/ipa/ipcp-agg-11.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fno-ipa-sra -fdump-ipa-cp-details"  } */
+/* { dg-add-options bind_pic_locally } */
+
+volatile int g;
+
+static void __attribute__ ((noinline))
+bar (int *i)
+{
+  g = *i;
+}
+
+int
+main (int argc, char **argv)
+{
+  int i = 8;
+
+  bar (&i);
+  bar (&i);
+
+  return 0;
+}
+
+/* { dg-final { scan-ipa-dump "Creating a specialized node of bar.*for all known contexts" "cp" } } */
+/* { dg-final { cleanup-ipa-dump "cp" } } */
Index: src/gcc/testsuite/gcc.dg/ipa/ipcp-agg-12.c
===================================================================
--- /dev/null
+++ src/gcc/testsuite/gcc.dg/ipa/ipcp-agg-12.c
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fno-ipa-sra -fdump-ipa-cp-details"  } */
+/* { dg-add-options bind_pic_locally } */
+
+volatile int g;
+static int *e;
+
+static void __attribute__ ((noinline))
+foo (int *i)
+{
+  e = i;
+}
+
+static void __attribute__ ((noinline))
+bar (int *i)
+{
+  g = *i;
+  foo (i);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i = 8;
+
+  bar (&i);
+  bar (&i);
+
+  return 0;
+}
+
+/* { dg-final { scan-ipa-dump-not "Creating a specialized node" "cp" } } */
+/* { dg-final { cleanup-ipa-dump "cp" } } */
Index: src/gcc/testsuite/gcc.dg/ipa/ipcp-agg-13.c
===================================================================
--- /dev/null
+++ src/gcc/testsuite/gcc.dg/ipa/ipcp-agg-13.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fno-ipa-sra -fdump-ipa-cp-details"  } */
+/* { dg-add-options bind_pic_locally } */
+
+volatile int g;
+
+static void __attribute__ ((noinline))
+foo (int *i)
+{
+  *i = 16;
+}
+
+static void __attribute__ ((noinline))
+bar (int *i)
+{
+  g = *i;
+}
+
+int
+main (int argc, char **argv)
+{
+  int i = 8;
+
+  bar (&i);
+  foo (&i);
+  bar (&i);
+
+  return 0;
+}
+/* { dg-final { scan-ipa-dump-not "Creating a specialized node" "cp" } } */
+/* { dg-final { cleanup-ipa-dump "cp" } } */
Index: src/gcc/testsuite/gcc.dg/ipa/ipcp-agg-14.c
===================================================================
--- /dev/null
+++ src/gcc/testsuite/gcc.dg/ipa/ipcp-agg-14.c
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fno-ipa-sra -fdump-ipa-cp-details"  } */
+/* { dg-add-options bind_pic_locally } */
+
+volatile int g;
+
+static void __attribute__ ((noinline))
+bar (int *i)
+{
+  g = *i;
+}
+
+static int __attribute__ ((noinline, noclone))
+something_unpredictable (void)
+{
+  return 1;
+}
+
+
+int
+main (int argc, char **argv)
+{
+  int i = 8;
+
+  if (something_unpredictable ())
+    g = 6;
+
+  bar (&i);
+
+  return 0;
+}
+
+/* { dg-final { scan-ipa-dump "Creating a specialized node of bar.*for all known contexts" "cp" } } */
+/* { dg-final { cleanup-ipa-dump "cp" } } */
Index: src/gcc/testsuite/g++.dg/ipa/devirt-24.C
===================================================================
--- src.orig/gcc/testsuite/g++.dg/ipa/devirt-24.C
+++ src/gcc/testsuite/g++.dg/ipa/devirt-24.C
@@ -38,5 +38,5 @@ C *b = new (C);
 }
 /* { dg-final { scan-ipa-dump-times "Discovered a virtual call to a known target" 1 "inline"  } } */
 /* { dg-final { cleanup-ipa-dump "inline" } } */
-/* { dg-final { scan-ipa-dump-times "Aggregate passed by reference" 1 "cp"  } } */
+/* { dg-final { scan-ipa-dump-times "Aggregate passed by reference" 2 "cp"  } } */
 /* { dg-final { cleanup-ipa-dump "cp" } } */

^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH 3/7] IPA-CP escape and clobber analysis
  2014-05-21 13:31 [PATCH 0/7] ipa-prop escape analysis Martin Jambor
  2014-05-21 13:31 ` [PATCH 4/7] Break up determine_known_aggregate_parts Martin Jambor
  2014-05-21 13:31 ` [PATCH 6/7] Real aggregate contents merge and application of deltas Martin Jambor
@ 2014-05-21 13:31 ` Martin Jambor
  2014-05-21 14:51   ` Richard Biener
  2014-05-21 13:31 ` [PATCH 2/7] Analyze BBs in DOM order in ipa-prop.c Martin Jambor
                   ` (3 subsequent siblings)
  6 siblings, 1 reply; 29+ messages in thread
From: Martin Jambor @ 2014-05-21 13:31 UTC (permalink / raw)
  To: GCC Patches; +Cc: Jan Hubicka

[-- Attachment #1: agg-tracking-first.diff --]
[-- Type: text/plain, Size: 91879 bytes --]

Hi,

this patch is rather big but not overly complicated.  Its goal is to
figure out whether data passed to a function by reference escapes
(somewhere, not necessarily in that particular function) and is
potentially clobbered (in that one function or its callees).

The result is stored into call graph node global structure, at least
for now, because it is supposed to live longer than IPA-CP
optimization info and be available for PTA later in the pipeline.
Before that, however, quite a lot of intermediate results are stored
in a number of places.  First of all, there is a vector describing all
SSA names and address taken local aggregates which is used to figure
out relations between them and do the local escape and clobber
analysis (I am aware that a local aggregate might incorrectly pass as
non-clobbered, that is fixed by the fifth patch, this one is big
enough as it is and it does not really matter here).

We then store the local results describing formal parameters and
so-far-presumed-unescaped aggregates and malloced data that is passed
as actual arguments to other functions into a new vector ref_descs.  I
did not store this into the existing descriptors vector because there
are often more elements.  Also, I had to extend the UNKNOWN,
KNOWN_TYPE and CONSTANT jump functions with an index into this new
vector (PASS_THROUGH and ANCESTOR reuse the index into parameters), so
there is quite a lot of new getter and setter methods.

This information is used by simple queue based interprocedural
propagation.  Eventually, the information is stored into the call
graph node, as described above.  After propagation, data in ref_descs
and in the call graph are the same, only the call graph can live much
longer.  One set of flags that is not copied to call graph nodes are
callee_clobbered flags, which only IPA-CP uses it in a subsequent
patch (and which would require maintenance during inlining).

There are more uses of the flags introduced by subsequent patches.  In
this one, the only one is that IPA-CP modification phase is able to
use the results instead of querying AA and is capable of doing more
replacements of aggregate values when the aggregate is unescaped and
not clobbered.

The following table summarizes what the pass can discover now.  All
compilations are with -Ofast -flto.  (I should have counted only
pointer typed parameters but well, that thought occurred to me too
late.  All non-pointer ones are automatically considered clobbered.)
Please note that in Fortran benchmarks, this information is often
already available through fnspec flags.  But we can discover a few
more (see the last patch for some more information).

 |                    |        |          |       |           |       |    Callee |       |
 | Test               | Params | Noescape |     % | Noclobber |     % | noclobber |     % |
 |                    |        |          |       |           |       |           |       |
 |--------------------+--------+----------+-------+-----------+-------+-----------+-------+
 | FF libxul.so       | 462725 |    10422 |  2.25 |      4954 |  1.07 |      8872 |  1.92 |
 | Tramp 3D           |   6344 |     1019 | 16.06 |       985 | 15.53 |      1005 | 15.84 |
 |--------------------+--------+----------+-------+-----------+-------+-----------+-------+
 | perlbench          |   2550 |       87 |  3.41 |        10 |  0.39 |        61 |  2.39 |
 | bzip               |    194 |       28 | 14.43 |         1 |  0.52 |        13 |  6.70 |
 | gcc                |  10725 |      179 |  1.67 |        18 |  0.17 |       147 |  1.37 |
 | mcf                |     57 |        4 |  7.02 |         0 |  0.00 |         4 |  7.02 |
 | gobmk              |   8873 |      132 |  1.49 |         3 |  0.03 |        85 |  0.96 |
 | hmmer              |    643 |       71 | 11.04 |         8 |  1.24 |        64 |  9.95 |
 | sjeng              |    161 |        5 |  3.11 |         0 |  0.00 |         5 |  3.11 |
 | libquantum         |    187 |       48 | 25.67 |         6 |  3.21 |        14 |  7.49 |
 | h264ref            |   1092 |       48 |  4.40 |         4 |  0.37 |        47 |  4.30 |
 | astar              |    217 |       28 | 12.90 |         3 |  1.38 |        15 |  6.91 |
 | xalancbmk          |  28861 |      737 |  2.55 |       536 |  1.86 |       712 |  2.47 |
 |--------------------+--------+----------+-------+-----------+-------+-----------+-------+
 | bwaves             |     74 |       35 | 47.30 |        25 | 33.78 |        35 | 47.30 |
 | gamess             |  26059 |     3693 | 14.17 |      2796 | 10.73 |      3572 | 13.71 |
 | milc               |    429 |       22 |  5.13 |        11 |  2.56 |        22 |  5.13 |
 | zeusmp             |    284 |       31 | 10.92 |         2 |  0.70 |        31 | 10.92 |
 | gromacs            |   5514 |      230 |  4.17 |        54 |  0.98 |       202 |  3.66 |
 | cactusADM          |   2354 |       49 |  2.08 |        13 |  0.55 |        44 |  1.87 |
 | leslie3d           |     18 |        0 |  0.00 |         0 |  0.00 |         0 |  0.00 |
 | namd               |    163 |        0 |  0.00 |         0 |  0.00 |         0 |  0.00 |
 | soplex             |   2341 |       80 |  3.42 |        10 |  0.43 |        55 |  2.35 |
 | povray             |   4046 |      244 |  6.03 |        51 |  1.26 |       201 |  4.97 |
 | calculix           |   6260 |     1109 | 17.72 |       672 | 10.73 |       933 | 14.90 |
 | GemsFDTD           |    289 |       41 | 14.19 |        27 |  9.34 |        32 | 11.07 |
 | tonto              |   7255 |     1361 | 18.76 |      1178 | 16.24 |      1329 | 18.32 |
 | lbm                |     27 |        4 | 14.81 |         3 | 11.11 |         4 | 14.81 |
 | wrf                |  14212 |     4375 | 30.78 |      3358 | 23.63 |      4120 | 28.99 |
 | sphinx3            |    770 |       16 |  2.08 |         1 |  0.13 |        15 |  1.95 |
 |--------------------+--------+----------+-------+-----------+-------+-----------+-------+
 | ac.f90             |     21 |       14 | 66.67 |         7 | 33.33 |        14 | 66.67 |
 | aermod.f90         |    600 |      134 | 22.33 |        59 |  9.83 |       124 | 20.67 |
 | air.f90            |     85 |       41 | 48.24 |        14 | 16.47 |        41 | 48.24 |
 | capacita.f90       |     42 |       18 | 42.86 |        16 | 38.10 |        18 | 42.86 |
 | channel2.f90       |     12 |        4 | 33.33 |         4 | 33.33 |         4 | 33.33 |
 | doduc.f90          |    132 |       68 | 51.52 |        39 | 29.55 |        68 | 51.52 |
 | fatigue2.f90       |     65 |       43 | 66.15 |        20 | 30.77 |        43 | 66.15 |
 | gas_dyn2.f90       |     97 |       22 | 22.68 |         6 |  6.19 |        21 | 21.65 |
 | induct2.f90        |    121 |       41 | 33.88 |        24 | 19.83 |        41 | 33.88 |
 | linpk.f90          |     42 |       10 | 23.81 |         7 | 16.67 |        10 | 23.81 |
 | mdbx.f90           |     51 |       26 | 50.98 |         9 | 17.65 |        26 | 50.98 |
 | mp_prop_design.f90 |      2 |        0 |  0.00 |         0 |  0.00 |         0 |  0.00 |
 | nf.f90             |     41 |        8 | 19.51 |         8 | 19.51 |         8 | 19.51 |
 | protein.f90        |    116 |       40 | 34.48 |        25 | 21.55 |        35 | 30.17 |
 | rnflow.f90         |    212 |       54 | 25.47 |        37 | 17.45 |        51 | 24.06 |
 | test_fpu2.f90      |    160 |       22 | 13.75 |        14 |  8.75 |        18 | 11.25 |
 | tfft2.f90          |      7 |        3 | 42.86 |         0 |  0.00 |         3 | 42.86 |

I hope to improve the results for example by propagating malloc
attribute to callers.

I have bootstrapped and tested this on x86_64, additionally I also
checked it passes an LTO-bootstrap and LTO-built Firefox.  I assume
there will be many comments but after I address them, I'd like to
commit this to trunk.

Thanks,

Martin


2014-04-30  Martin Jambor  <mjambor@suse.cz>

	* cgraph.h (cgraph_global_info): New fields noescape_parameters
	and noclobber_parameters.
	(cgraph_param_noescape_p): Declare.
	(cgraph_set_param_noescape): Likewise.
	(cgraph_param_noclobber_p): Likewise.
	(cgraph_set_param_noclobber): Likewise.
	* ipa-prop.h (ipa_unknown_data): New type.
	(ipa_known_type_data): New fields escape_ref_valid and
	escape_ref_index.
	(ipa_constant_data): Likewise.
	(jump_func_value): New field unknown.
	(ipa_get_jf_unknown_esc_ref_valid): New function.
	(ipa_get_jf_unknown_esc_ref_index): Likewise.
	(ipa_get_jf_known_type_esc_ref_valid): Likewise.
	(ipa_get_jf_known_type_esc_ref_index): Likewise.
	(ipa_get_jf_constant_esc_ref_valid): Likewise.
	(ipa_get_jf_constant_esc_ref_index): Likewise.
	(ipa_ref_descriptor): New type.
	(ipa_node_params): New fields ref_descs and node_up_enqueued.
	(ipa_is_ref_escaped): New function.
	(ipa_is_ref_clobbered): Likewise.
	(ipa_is_ref_callee_clobbered): Likewise.
	(ipa_is_param_ref_safely_constant): Likewise.
	(ipa_spread_escapes): Declare.
	* ipa-prop.c: Include stringpool.h, tree-ssaname.h and pointer-set.h.
	(ipa_escape): New type.
	(valid_escape_result_index): New function.
	(func_body_info): New fields func, escapes and decl_escapes.
	(ipa_print_node_jump_functions_for_edge): Dump new fields.
	(ipa_set_jf_unknown): New function.  Use it instead of directly
	setting a jump functions type elsewhere.
	(ipa_set_jf_unknown_copy): New function.
	(ipa_set_jf_unknown_ref_index): Likewise.
	(ipa_set_jf_known_type_copy): Likewise.
	(ipa_set_jf_known_type): Initialize new fields.
	(ipa_set_jf_known_type_ref_index): New function.
	(ipa_set_jf_constant): Initialize new fields.
	(ipa_set_jf_constant_ref_index): New function.
	(ipa_get_tracked_refs_count): Likewise.
	(ipa_set_ref_clobbered): Likewise.
	(ipa_get_tracked_refs_count): Likewise.
	(ipa_set_ref_escaped): Likewise.
	(ipa_set_ref_clobbered): Likewise.
	(ipa_set_ref_callee_clobbered): Likewise.
	(ipa_load_from_parm_agg_1): Use const_ref parameter flag.
	(get_escape_for_ref): New function.
	(get_escape_for_value): Likewise.
	(ipa_compute_jump_functions_for_edge): Add reference info to jump
	functions.  Wrapped comments to 80 columns, added a checking assert
	all jump functions start with no information.
	(visit_ref_for_mod_analysis): Renamed to visit_ref_mark_it_used.
	Simplified comment.
	(ipa_analyze_params_uses_in_bb): Renamed to ipa_analyze_bb_statements.
	Simplified comment.
	(analyze_phi_escapes): New function.
	(analyze_ssa_escape): Likewise.
	(analyze_all_ssa_escapes): Likewise.
	(create_escape_structures): Likewise.
	(free_escape_structures): Likewise.
	(pick_escapes_from_call): Likewise.
	(gather_picked_escapes): Likewise.
	(ipa_analyze_node): Initialize and deinitialize new fbi fields and
	escape structures, call create_escape_structures,
	analyze_all_ssa_escapes and pick_escapes_from_call, assign ref indices
	to formal parameters.
	(escape_spreading_data): New type.
	(enque_to_propagate_escapes_up): New function.
	(enque_to_propagate_escapes_down): Likewise.
	(escape_origin_from_jfunc): Likewise.
	(spread_escapes_up_from_one_alias): Likewise.
	(spread_escapes_up): Likewise.
	(spread_escapes_down): Likewise.
	(ipa_spread_escapes): Likewise.
	(make_unknown_jf_from_known_type_jf): Likewise.
	(combine_known_type_and_ancestor_jfs): Also update ref index fields.
	Switch arguments for consistency, changed the one caller.
	(update_jump_functions_after_inlining): Also update ref index fields,
	make use of unescaped info.
	(update_indirect_edges_after_inlining): Make use of unescaped info.
	(ipa_free_node_params_substructures): Free also ref_desc vector.
	(ipa_node_duplication_hook): Also copy reference descriptor vector and
	const_refs.
	(ipa_print_node_params): Also print reference flags.
	(ipa_write_jump_function): Stream new fields.
	(ipa_read_jump_function): Likewise.
	(ipa_write_node_info): Stream reference description.
	(ipa_read_node_info): Likewise, also clear new flag node_up_enqueued.
	(read_agg_replacement_chain): Whitespace fix.
	(adjust_agg_replacement_values): Also assign const_refs in descriptors
	from those in tranformation data.
	(ipcp_transform_function): Initialize new fields of fbi.
	* ipa-cp.c (agg_pass_through_permissible_p): Make use of the new
	escape information.  Accept caller_infom as a parameter, updated all
	callers.
	(propagate_aggs_accross_jump_function): Make use of the new escape
	information.
	(intersect_aggregates_with_edge): Bail out early if a pass_through
	jump function does not allow passing aggregates.  Make use of the new
	escape information.  Allow NULL values in aggregate jump functions.
	(ipcp_driver): Call spread_escapes.
	* ipa-inline.c (ipa_inline): Call spread_escapes if necessary.
	* cgraph.c (cgraph_param_noescape_p): New function.
	(cgraph_set_param_noescape): Likewise.
	(cgraph_param_noclobber_p): Likewise.
	(cgraph_set_param_noclobber): Likewise.
	* cgraphclones.c (duplicate_thunk_for_node): Assert that noclone and
	noescape bitmaps are NULL.
	(copy_noescape_noclobber_bitmaps): New function.
	(cgraph_clone_node): Copy noescpae and noclobber bitmaps.
	(cgraph_copy_node_for_versioning): Likewise.
	* lto-cgraph.c (output_param_bitmap): Likewise.
	(output_node_opt_summary): Use it to stream args_to_skip,
	combined_args_to_skip, noescape_parameters and noclobber_parameters
	bitmaps.
	(input_param_bitmap): New function.
	(input_node_opt_summary): Use it to stream args_to_skip,
	combined_args_to_skip, noescape_parameters and noclobber_parameters
	bitmaps.
	* tree-inline.c (update_noescape_noclobber_bitmaps): New function.
	(tree_function_versioning): Call it.

testsuite/
	* gcc.dg/ipa/ipcp-agg-10.c: New test.

Index: src/gcc/ipa-prop.c
===================================================================
--- src.orig/gcc/ipa-prop.c
+++ src/gcc/ipa-prop.c
@@ -43,6 +43,8 @@ along with GCC; see the file COPYING3.
 #include "gimple-ssa.h"
 #include "tree-cfg.h"
 #include "tree-phinodes.h"
+#include "stringpool.h"
+#include "tree-ssanames.h"
 #include "ssa-iterators.h"
 #include "tree-into-ssa.h"
 #include "tree-dfa.h"
@@ -60,6 +62,7 @@ along with GCC; see the file COPYING3.
 #include "stringpool.h"
 #include "tree-ssanames.h"
 #include "domwalk.h"
+#include "pointer-set.h"
 
 /* Intermediate information that we get from alias analysis about a particular
    parameter in a particular basic_block.  When a parameter or the memory it
@@ -91,11 +94,64 @@ struct ipa_bb_info
   vec<param_aa_status> param_aa_statuses;
 };
 
+/* Structure used for intra-procedural escape analysis (and associated
+   memory-write detection).  When analyzing function body, we have one for each
+   SSA name and for all address-taken local declarations.  */
+
+struct ipa_escape
+{
+  /* If target is non-NULL, this is the offset relative to the reference
+     described by target.  */
+  HOST_WIDE_INT offset;
+
+  /* If this describes (a part of) data described by other ipa_escape
+     structure, target is non-NULL.  In that case, that structure should be
+     used instead of this one and unless explicitely noted, other fields are
+     meaningless.  */
+  struct ipa_escape *target;
+
+  /* The last seen edge that had a reference to this data among its parameters.
+     Used to make sure we do not pass the same data in two different
+     arguments.  */
+  struct cgraph_edge *last_seen_cs;
+
+  /* Index of the bool slot where the analyzed flag is going to end up plus
+     one.  Zero means this structure will remain unused.  */
+  int result_index;
+
+  /* True if we have already dealt with this SSA name.  Valid even if target is
+     non-NULL.  */
+  bool analyzed;
+
+  /* Could the address of the data have escaped?  */
+  bool escaped;
+
+  /* Flag set when an SSA name has been used as a base for a memory write.
+     Only valid when the SSA name is not considered escaped, otherwise it might
+     be incorrectly clear.  */
+  bool write_base;
+};
+
+/* If ESC has a valid (i.e. non-zero) result_index, return true and store the
+   directly usable (i.e. decremented) index to *INDEX.  */
+
+static inline bool
+valid_escape_result_index (struct ipa_escape *esc, int *index)
+{
+  if (esc->result_index == 0)
+    return false;
+  *index = esc->result_index - 1;
+  return true;
+}
+
 /* Structure with global information that is only used when looking at function
    body. */
 
 struct func_body_info
 {
+  /* Struct function of the function that is being analyzed.  */
+  struct function *func;
+
   /* The node that is being analyzed.  */
   cgraph_node *node;
 
@@ -105,6 +161,13 @@ struct func_body_info
   /* Information about individual BBs. */
   vec<ipa_bb_info> bb_infos;
 
+  /* Escape analysis information for SSA flags and local addressable
+     declarations.  */
+  vec<ipa_escape> escapes;
+
+  /* Mapping from VAR_DECLS to escape information.  */
+  pointer_map <ipa_escape *> *decl_escapes;
+
   /* Number of parameters.  */
   int param_count;
 
@@ -282,7 +345,14 @@ ipa_print_node_jump_functions_for_edge (
 
       fprintf (f, "       param %d: ", i);
       if (type == IPA_JF_UNKNOWN)
-	fprintf (f, "UNKNOWN\n");
+	{
+	  fprintf (f, "UNKNOWN");
+	  if (ipa_get_jf_unknown_esc_ref_valid (jump_func))
+	    fprintf (f, ", escape ref: %i\n",
+		     ipa_get_jf_unknown_esc_ref_index (jump_func));
+	  else
+	    fprintf (f, "\n");
+	}
       else if (type == IPA_JF_KNOWN_TYPE)
 	{
 	  fprintf (f, "KNOWN TYPE: base  ");
@@ -290,6 +360,9 @@ ipa_print_node_jump_functions_for_edge (
 	  fprintf (f, ", offset "HOST_WIDE_INT_PRINT_DEC", component ",
 		   jump_func->value.known_type.offset);
 	  print_generic_expr (f, jump_func->value.known_type.component_type, 0);
+	  if (ipa_get_jf_known_type_esc_ref_valid (jump_func))
+	    fprintf (f, ", escape ref: %i",
+		     ipa_get_jf_known_type_esc_ref_index (jump_func));
 	  fprintf (f, "\n");
 	}
       else if (type == IPA_JF_CONST)
@@ -304,6 +377,9 @@ ipa_print_node_jump_functions_for_edge (
 	      print_generic_expr (f, DECL_INITIAL (TREE_OPERAND (val, 0)),
 				  0);
 	    }
+	  if (ipa_get_jf_constant_esc_ref_valid (jump_func))
+	    fprintf (f, ", escape ref: %i",
+		     ipa_get_jf_constant_esc_ref_index (jump_func));
 	  fprintf (f, "\n");
 	}
       else if (type == IPA_JF_PASS_THROUGH)
@@ -430,6 +506,39 @@ ipa_print_all_jump_functions (FILE *f)
     }
 }
 
+/* Set jfunc to be a jump function with invalid reference index.  */
+
+static void
+ipa_set_jf_unknown (struct ipa_jump_func *jfunc)
+{
+  jfunc->type = IPA_JF_UNKNOWN;
+  jfunc->value.unknown.escape_ref_valid = false;
+}
+
+/* Set JFUNC to be a copy of another unknown jump function SRC. */
+
+static void
+ipa_set_jf_unknown_copy (struct ipa_jump_func *dst,
+			 struct ipa_jump_func *src)
+
+{
+  gcc_checking_assert (src->type == IPA_JF_UNKNOWN);
+  dst->type = IPA_JF_UNKNOWN;
+  dst->value.unknown = src->value.unknown;
+}
+
+/* Set reference description of unknown JFUNC to be valid and referring to
+   INDEX.  */
+
+static void
+ipa_set_jf_unknown_ref_index (struct ipa_jump_func *jfunc, int index)
+{
+  gcc_checking_assert (jfunc->type == IPA_JF_UNKNOWN);
+  gcc_checking_assert (index >= 0);
+  jfunc->value.unknown.escape_ref_valid = true;
+  jfunc->value.unknown.escape_ref_index = index;
+}
+
 /* Set JFUNC to be a known type jump function.  */
 
 static void
@@ -445,11 +554,37 @@ ipa_set_jf_known_type (struct ipa_jump_f
   jfunc->value.known_type.offset = offset,
   jfunc->value.known_type.base_type = base_type;
   jfunc->value.known_type.component_type = component_type;
+  jfunc->value.known_type.escape_ref_valid = false;
+  jfunc->value.known_type.escape_ref_index = 0;
   gcc_assert (component_type);
 }
 
-/* Set JFUNC to be a copy of another jmp (to be used by jump function
-   combination code).  The two functions will share their rdesc.  */
+/* Set reference description of known_type JFUNC to be valid and referring to
+   INDEX.  */
+
+static void
+ipa_set_jf_known_type_ref_index (struct ipa_jump_func *jfunc, int index)
+{
+  gcc_checking_assert (jfunc->type == IPA_JF_KNOWN_TYPE);
+  gcc_checking_assert (index >= 0);
+  jfunc->value.known_type.escape_ref_valid = true;
+  jfunc->value.known_type.escape_ref_index = index;
+}
+
+/* Set DST to be a copy of another known type jump function SRC.  */
+
+static void
+ipa_set_jf_known_type_copy (struct ipa_jump_func *dst,
+			    struct ipa_jump_func *src)
+
+{
+  gcc_checking_assert (src->type == IPA_JF_KNOWN_TYPE);
+  dst->type = IPA_JF_KNOWN_TYPE;
+  dst->value.known_type = src->value.known_type;
+}
+
+/* Set DST to be a copy of another constant jump function SRC.  The two
+   functions will share their rdesc.  */
 
 static void
 ipa_set_jf_cst_copy (struct ipa_jump_func *dst,
@@ -472,6 +607,8 @@ ipa_set_jf_constant (struct ipa_jump_fun
     SET_EXPR_LOCATION (constant, UNKNOWN_LOCATION);
   jfunc->type = IPA_JF_CONST;
   jfunc->value.constant.value = unshare_expr_without_location (constant);
+  jfunc->value.constant.escape_ref_valid = false;
+  jfunc->value.constant.escape_ref_index = 0;
 
   if (TREE_CODE (constant) == ADDR_EXPR
       && TREE_CODE (TREE_OPERAND (constant, 0)) == FUNCTION_DECL)
@@ -491,6 +628,19 @@ ipa_set_jf_constant (struct ipa_jump_fun
     jfunc->value.constant.rdesc = NULL;
 }
 
+/* Set reference description of constant JFUNC to be valid and referring to
+   INDEX.  */
+
+static void
+ipa_set_jf_constant_ref_index (struct ipa_jump_func *jfunc, int index)
+{
+  gcc_checking_assert (jfunc->type == IPA_JF_CONST);
+  gcc_checking_assert (index >= 0);
+  jfunc->value.constant.escape_ref_valid = true;
+  jfunc->value.constant.escape_ref_index = index;
+}
+
+
 /* Set JFUNC to be a simple pass-through jump function.  */
 static void
 ipa_set_jf_simple_pass_through (struct ipa_jump_func *jfunc, int formal_id,
@@ -539,6 +689,41 @@ ipa_set_ancestor_jf (struct ipa_jump_fun
   jfunc->value.ancestor.type_preserved = type_preserved;
 }
 
+/* Return the number of references tracked for escape analysis in INFO.  */
+
+static inline int
+ipa_get_tracked_refs_count (struct ipa_node_params *info)
+{
+  return info->ref_descs.length ();
+}
+
+/* Set escape flag of reference number I of a function corresponding to NODE to
+   VAL.  */
+
+static inline void
+ipa_set_ref_escaped (struct ipa_node_params *info, int i, bool val)
+{
+  info->ref_descs[i].escaped = val;
+}
+
+/* Set the clobbered flag corresponding to the Ith tracked reference of the
+   function associated with INFO to VAL.  */
+
+static inline void
+ipa_set_ref_clobbered (struct ipa_node_params *info, int i, bool val)
+{
+  info->ref_descs[i].clobbered = val;
+}
+
+/* Set the callee_clobbered flag corresponding to the Ith tracked reference of
+   the function associated with INFO to VAL.  */
+
+static inline void
+ipa_set_ref_callee_clobbered (struct ipa_node_params *info, int i, bool val)
+{
+  info->ref_descs[i].callee_clobbered = val;
+}
+
 /* Extract the acual BINFO being described by JFUNC which must be a known type
    jump function.  */
 
@@ -784,7 +969,7 @@ detect_type_change (tree arg, tree base,
   if (!tci.known_current_type
       || tci.multiple_types_encountered
       || offset != 0)
-    jfunc->type = IPA_JF_UNKNOWN;
+    ipa_set_jf_unknown (jfunc);
   else
     ipa_set_jf_known_type (jfunc, 0, tci.known_current_type, comp_type);
 
@@ -1090,7 +1275,8 @@ ipa_load_from_parm_agg_1 (struct func_bo
     }
 
   if (index >= 0
-      && parm_ref_data_preserved_p (fbi, index, stmt, op))
+      && ((fbi && cgraph_param_noclobber_p (fbi->node, index))
+	  || parm_ref_data_preserved_p (fbi, index, stmt, op)))
     {
       *index_p = index;
       *by_ref_p = true;
@@ -1725,6 +1911,86 @@ ipa_get_callee_param_type (struct cgraph
   return NULL;
 }
 
+static void
+analyze_ssa_escape (struct func_body_info *fbi, tree ssa,
+		    struct ipa_escape *esc);
+
+/* Return the ipa_escape structure suitable for REFERENCE, if it is a
+   declaration or a MEM_REF.  Return NULL if there is no structure describing
+   REFERENCE.  If a non-NULL result is returned, put the offset of the
+   REFERENCE relative to the start of data described by the result into
+   *OFFSET, and size and max_size as returned by get_ref_base_and_extent to
+   *SIZE and *MAX_SIZE respectively.  */
+
+static struct ipa_escape *
+get_escape_for_ref (struct func_body_info *fbi, tree reference,
+		    HOST_WIDE_INT *offset, HOST_WIDE_INT *size,
+		    HOST_WIDE_INT *max_size)
+{
+  struct ipa_escape *res;
+  tree base = get_ref_base_and_extent (reference, offset, size, max_size);
+
+  if (DECL_P (base))
+    {
+      ipa_escape **d_esc = fbi->decl_escapes->contains (base);
+      if (!d_esc)
+	return NULL;
+      res = *d_esc;
+    }
+  else if (TREE_CODE (base) == MEM_REF
+	   && TREE_CODE (TREE_OPERAND (base, 0)) == SSA_NAME)
+    {
+      tree ssa = TREE_OPERAND (base, 0);
+      res = &fbi->escapes[SSA_NAME_VERSION (ssa)];
+      if (!res->analyzed)
+	analyze_ssa_escape (fbi, ssa, res);
+    }
+  else
+    return NULL;
+
+  if (res->target)
+    {
+      *offset += res->offset;
+      res = res->target;
+    }
+  return res;
+}
+
+/* Return the ipa_escape structure suitable for T, if it is an ssa_name or an
+   ADDR_EXPR.  Return NULL if there is not structure for T.  If a non-NULL
+   result is returned, put the offset of the value T relative to the start of
+   data described by the result into *OFFSET.  */
+
+static struct ipa_escape *
+get_escape_for_value (struct func_body_info *fbi, tree t,
+		      HOST_WIDE_INT *offset)
+{
+  if (TREE_CODE (t) == SSA_NAME)
+    {
+      struct ipa_escape *res;
+      *offset = 0;
+      res = &fbi->escapes[SSA_NAME_VERSION (t)];
+      if (!res->analyzed)
+	analyze_ssa_escape (fbi, t, res);
+
+      if (res->target)
+	{
+	  *offset += res->offset;
+	  res = res->target;
+	}
+
+      return res;
+    }
+  else if (TREE_CODE (t) == ADDR_EXPR)
+    {
+      HOST_WIDE_INT dummy_size, dummy_max_size;
+      return get_escape_for_ref (fbi, TREE_OPERAND (t, 0), offset, &dummy_size,
+				 &dummy_max_size);
+    }
+  else
+    return NULL;
+}
+
 /* Compute jump function for all arguments of callsite CS and insert the
    information in the jump_functions array in the ipa_edge_args corresponding
    to this callsite.  */
@@ -1753,6 +2019,8 @@ ipa_compute_jump_functions_for_edge (str
       tree arg = gimple_call_arg (call, n);
       tree param_type = ipa_get_callee_param_type (cs, n);
 
+      gcc_checking_assert (jfunc->type == IPA_JF_UNKNOWN
+			   && !ipa_get_jf_unknown_esc_ref_valid (jfunc));
       if (is_gimple_ip_invariant (arg))
 	ipa_set_jf_constant (jfunc, arg, cs);
       else if (!is_gimple_reg_type (TREE_TYPE (arg))
@@ -1807,19 +2075,42 @@ ipa_compute_jump_functions_for_edge (str
 				      ? TREE_TYPE (param_type)
 				      : NULL);
 
-      /* If ARG is pointer, we can not use its type to determine the type of aggregate
-	 passed (because type conversions are ignored in gimple).  Usually we can
-	 safely get type from function declaration, but in case of K&R prototypes or
-	 variadic functions we can try our luck with type of the pointer passed.
-	 TODO: Since we look for actual initialization of the memory object, we may better
-	 work out the type based on the memory stores we find.  */
+      /* If ARG is pointer, we can not use its type to determine the type of
+	 aggregate passed (because type conversions are ignored in gimple).
+	 Usually we can safely get type from function declaration, but in case
+	 of K&R prototypes or variadic functions we can try our luck with type
+	 of the pointer passed.
+	 TODO: Since we look for actual initialization of the memory object, we
+	 may better work out the type based on the memory stores we find.  */
       if (!param_type)
 	param_type = TREE_TYPE (arg);
 
-      if ((jfunc->type != IPA_JF_PASS_THROUGH
-	      || !ipa_get_jf_pass_through_agg_preserved (jfunc))
-	  && (jfunc->type != IPA_JF_ANCESTOR
-	      || !ipa_get_jf_ancestor_agg_preserved (jfunc))
+      HOST_WIDE_INT dummy_offset;
+      struct ipa_escape *esc = get_escape_for_value (fbi, arg, &dummy_offset);
+      int ref_index;
+      if (esc && valid_escape_result_index (esc, &ref_index))
+	{
+	  if (jfunc->type == IPA_JF_UNKNOWN)
+	    ipa_set_jf_unknown_ref_index (jfunc, ref_index);
+	  else if (jfunc->type == IPA_JF_KNOWN_TYPE)
+	    ipa_set_jf_known_type_ref_index (jfunc, ref_index);
+	  else if (jfunc->type == IPA_JF_CONST)
+	    ipa_set_jf_constant_ref_index (jfunc, ref_index);
+	  else
+	    {
+	      gcc_checking_assert
+		(jfunc->type != IPA_JF_PASS_THROUGH
+		 || ipa_get_jf_pass_through_formal_id (jfunc) == ref_index);
+	      gcc_checking_assert
+		(jfunc->type != IPA_JF_ANCESTOR
+		 || ipa_get_jf_ancestor_formal_id (jfunc) == ref_index);
+	    }
+	}
+
+      /* TODO: We should allow aggregate jump functions even for these types of
+	 jump functions but we need to be able to combine them first.  */
+      if (jfunc->type != IPA_JF_PASS_THROUGH
+	  && jfunc->type != IPA_JF_ANCESTOR
 	  && (AGGREGATE_TYPE_P (TREE_TYPE (arg))
 	      || POINTER_TYPE_P (param_type)))
 	determine_known_aggregate_parts (call, arg, param_type, jfunc);
@@ -2223,12 +2514,11 @@ ipa_analyze_stmt_uses (struct func_body_
     ipa_analyze_call_uses (fbi, stmt);
 }
 
-/* Callback of walk_stmt_load_store_addr_ops for the visit_load.
-   If OP is a parameter declaration, mark it as used in the info structure
-   passed in DATA.  */
+/* Callback of walk_stmt_load_store_addr_ops.  If OP is a parameter
+   declaration, mark it as used in the info structure passed in DATA.  */
 
 static bool
-visit_ref_for_mod_analysis (gimple, tree op, tree, void *data)
+visit_ref_mark_it_used (gimple, tree op, tree, void *data)
 {
   struct ipa_node_params *info = (struct ipa_node_params *) data;
 
@@ -2244,13 +2534,12 @@ visit_ref_for_mod_analysis (gimple, tree
   return false;
 }
 
-/* Scan the statements in BB and inspect the uses of formal parameters.  Store
-   the findings in various structures of the associated ipa_node_params
-   structure, such as parameter flags, notes etc.  FBI holds various data about
-   the function being analyzed.  */
+/* Scan the statements in BB and inspect the uses of formal parameters, escape
+   analysis and so on.  FBI holds various data about the function being
+   analyzed.  */
 
 static void
-ipa_analyze_params_uses_in_bb (struct func_body_info *fbi, basic_block bb)
+ipa_analyze_bb_statements (struct func_body_info *fbi, basic_block bb)
 {
   gimple_stmt_iterator gsi;
   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
@@ -2262,15 +2551,15 @@ ipa_analyze_params_uses_in_bb (struct fu
 
       ipa_analyze_stmt_uses (fbi, stmt);
       walk_stmt_load_store_addr_ops (stmt, fbi->info,
-				     visit_ref_for_mod_analysis,
-				     visit_ref_for_mod_analysis,
-				     visit_ref_for_mod_analysis);
+				     visit_ref_mark_it_used,
+				     visit_ref_mark_it_used,
+				     visit_ref_mark_it_used);
     }
   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
     walk_stmt_load_store_addr_ops (gsi_stmt (gsi), fbi->info,
-				   visit_ref_for_mod_analysis,
-				   visit_ref_for_mod_analysis,
-				   visit_ref_for_mod_analysis);
+				   visit_ref_mark_it_used,
+				   visit_ref_mark_it_used,
+				   visit_ref_mark_it_used);
 }
 
 /* Calculate controlled uses of parameters of NODE.  */
@@ -2344,10 +2633,284 @@ private:
 void
 analysis_dom_walker::before_dom_children (basic_block bb)
 {
-  ipa_analyze_params_uses_in_bb (m_fbi, bb);
+  ipa_analyze_bb_statements (m_fbi, bb);
   ipa_compute_jump_functions_for_bb (m_fbi, bb);
 }
 
+/* Look at operands of PHI and if any of them is an address of a declaration,
+   mark that declaration escaped.  */
+
+void
+analyze_phi_escapes (gimple phi, struct func_body_info *fbi)
+{
+  for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
+    {
+      tree op = gimple_phi_arg_def (phi, i);
+      if (TREE_CODE (op) != ADDR_EXPR)
+	continue;
+
+      tree base = get_base_address (TREE_OPERAND (op, 0));
+      if (!DECL_P (base))
+	continue;
+
+      ipa_escape **d_esc = fbi->decl_escapes->contains (base);
+      if (!d_esc)
+	continue;
+      (*d_esc)->escaped = true;
+    }
+}
+
+/* Check definition and uses of SSA and update ESC (and potentially escape
+   structures associated with other SSA names) accordingly.  */
+
+static void
+analyze_ssa_escape (struct func_body_info *fbi, tree ssa,
+		    struct ipa_escape *esc)
+{
+  esc->analyzed = true;
+  if (!POINTER_TYPE_P (TREE_TYPE (ssa)))
+    {
+      esc->escaped = true;
+      return;
+    }
+
+  /* First we need to check the definition and figure out whether we can work
+     with it or whether this name actually refers to data described by another
+     structure.  */
+  if (!SSA_NAME_IS_DEFAULT_DEF (ssa))
+    {
+      gimple def = SSA_NAME_DEF_STMT (ssa);
+
+      if (gimple_assign_single_p (def))
+	{
+	  tree rhs = gimple_assign_rhs1 (def);
+	  HOST_WIDE_INT offset;
+	  struct ipa_escape *r_esc = get_escape_for_value (fbi, rhs, &offset);
+	  if (r_esc)
+	    {
+	      esc->offset = offset;
+	      esc->target = r_esc;
+	    }
+	  else
+	    {
+	      esc->escaped = true;
+	      return;
+	    }
+	}
+      else if (is_gimple_call (def))
+	{
+	  /* TODO: If only C++ new had malloc attribute.  */
+	  int flags = gimple_call_flags (def);
+	  if ((flags & ECF_MALLOC) == 0)
+	    {
+	      esc->escaped = true;
+	      return;
+	    }
+	}
+      else
+	{
+	  if (gimple_code (def) == GIMPLE_PHI)
+	    /* Any SSA defined by a PHI is doomed but it is a convenient place
+	       to check every pointer phi . */
+	    analyze_phi_escapes (def, fbi);
+
+	  esc->escaped = true;
+	  return;
+	}
+    }
+
+  if (esc->target)
+    esc = esc->target;
+  if (esc->escaped)
+    return;
+
+  /* If the definition is fine, we need to check the uses.  */
+
+  imm_use_iterator imm_iter;
+  use_operand_p use;
+  FOR_EACH_IMM_USE_FAST (use, imm_iter, ssa)
+    {
+      gimple stmt = USE_STMT (use);
+      if (is_gimple_debug (stmt))
+	continue;
+
+      switch (gimple_code (stmt))
+	{
+	case GIMPLE_ASSIGN:
+	  {
+	    if (!gimple_assign_single_p (stmt))
+	      {
+		esc->escaped = true;
+		return;
+	      }
+
+	      tree lhs = gimple_assign_lhs (stmt);
+	      /* Statements assigning to another SSA are OK, we check all of
+		 them.  */
+	      if (TREE_CODE (lhs) != SSA_NAME
+		  /* If LHS is not an SSA_NAME, RHS cannot be an ADDR_EXPR, and
+		     must be either a naked SSA_NAME or a load or an invariant.
+		     We only care if it is the SSA name we are after.  It can
+		     be a different SSA name if the use was on the LHS in a
+		     MEM_REF.  */
+		  && gimple_assign_rhs1 (stmt) == ssa)
+		{
+		  esc->escaped = true;
+		  return;
+		}
+
+	      while (handled_component_p (lhs))
+		lhs = TREE_OPERAND (lhs, 0);
+	      if (TREE_CODE (lhs) == MEM_REF
+		  && TREE_OPERAND (lhs, 0) == ssa)
+		esc->write_base = true;
+	    }
+	  break;
+
+	case GIMPLE_CALL:
+	  /* Calls will be dealt with when constructing jump functions.
+	     However, indirect calls mean that all values escape (we do IPA
+	     escape propagation before any devirtualization) and when not in
+	     LTO, even calls to functions in other compilation units are dark
+	     holes.  On the other hand, builtin free is whitelisted.  */
+	  if (!gimple_call_builtin_p (stmt, BUILT_IN_FREE))
+	    {
+	      struct cgraph_edge *cs = cgraph_edge (fbi->node, stmt);
+	      if (!cs || !cs->callee || (!cs->callee->definition && !flag_lto))
+		{
+		  esc->escaped = true;
+		  return;
+		}
+	    }
+	  break;
+
+	case GIMPLE_SWITCH:
+	case GIMPLE_COND:
+	  /* These are harmless.  */
+	  break;
+
+	default:
+	  esc->escaped = true;
+	  return;
+	}
+    }
+}
+
+/* Examine escapes of all SSA names.   */
+
+static void
+analyze_all_ssa_escapes (struct func_body_info *fbi)
+{
+  for (unsigned i = 1; i < fbi->func->gimple_df->ssa_names->length (); ++i)
+    {
+      tree ssa = ssa_name (i);
+      if (!ssa)
+	continue;
+      struct ipa_escape *esc = &fbi->escapes[SSA_NAME_VERSION (ssa)];
+      if (esc->analyzed)
+	return;
+      analyze_ssa_escape (fbi, ssa, esc);
+    }
+}
+
+/* Initialize escape analysis structures in the FBI corresponding to FUNC.  */
+
+static void
+create_escape_structures (struct func_body_info *fbi)
+{
+  tree var, parm;
+  unsigned int i, var_idx, var_count = 0;
+
+  for (parm = DECL_ARGUMENTS (fbi->node->decl);
+       parm;
+       parm = DECL_CHAIN (parm))
+    if (TREE_ADDRESSABLE (parm))
+      var_count++;
+
+  FOR_EACH_LOCAL_DECL (fbi->func, i, var)
+    if (TREE_CODE (var) == VAR_DECL && TREE_ADDRESSABLE (var))
+      var_count++;
+
+  fbi->escapes = vNULL;
+  fbi->escapes.safe_grow_cleared (SSANAMES (fbi->func)->length () + var_count);
+  fbi->decl_escapes = new pointer_map <ipa_escape *>;
+
+  var_idx = SSANAMES (fbi->func)->length ();
+  for (parm = DECL_ARGUMENTS (fbi->node->decl);
+       parm;
+       parm = DECL_CHAIN (parm))
+    if (TREE_ADDRESSABLE (parm))
+      *fbi->decl_escapes->insert (parm) = &fbi->escapes[var_idx++];
+
+  FOR_EACH_LOCAL_DECL (fbi->func, i, var)
+    if (TREE_CODE (var) == VAR_DECL && TREE_ADDRESSABLE (var))
+      *fbi->decl_escapes->insert (var) = &fbi->escapes[var_idx++];
+}
+
+/* Free escape analysis structures in the FBI.  */
+
+static void
+free_escape_structures (struct func_body_info *fbi)
+{
+  fbi->escapes.release ();
+  delete fbi->decl_escapes;
+}
+
+/* Go over call argument of CS and if any warrants a result_index for an escape
+   structure, assign to it *RI and increment it.  */
+
+void
+pick_escapes_from_call (struct func_body_info *fbi, struct cgraph_edge *cs,
+			int *ri)
+{
+  int arg_num = gimple_call_num_args (cs->call_stmt);
+
+  for (int i = 0; i < arg_num; ++i)
+    {
+      HOST_WIDE_INT offset;
+      tree arg = gimple_call_arg (cs->call_stmt, i);
+      struct ipa_escape *esc = get_escape_for_value (fbi, arg, &offset);
+
+      if (!esc || esc->escaped)
+	continue;
+
+      if (esc->last_seen_cs == cs)
+	{
+	  esc->escaped = true;
+	  continue;
+	}
+      esc->last_seen_cs = cs;
+
+      if (!esc->result_index)
+	{
+	  *ri = *ri + 1;
+	  esc->result_index = *ri;
+	}
+    }
+}
+
+/* Copy result escape flags to node info.  There must be exactly COUNT result
+   escapes.  */
+
+void
+gather_picked_escapes (struct func_body_info *fbi, int count)
+{
+  if (count == 0)
+    return;
+  fbi->info->ref_descs.safe_grow_cleared (count);
+
+  for (unsigned i = 0; i < fbi->escapes.length (); ++i)
+    {
+      struct ipa_escape *esc = &fbi->escapes[i];
+      int idx;
+      if (valid_escape_result_index (esc, &idx))
+	{
+	  ipa_set_ref_escaped (fbi->info, idx, esc->escaped);
+	  ipa_set_ref_clobbered (fbi->info, idx, esc->write_base);
+	}
+    }
+}
+
 /* Initialize the array describing properties of of formal parameters
    of NODE, analyze their uses and compute jump functions associated
    with actual arguments of calls from within NODE.  */
@@ -2381,28 +2944,48 @@ ipa_analyze_node (struct cgraph_node *no
   calculate_dominance_info (CDI_DOMINATORS);
   ipa_initialize_node_params (node);
   ipa_analyze_controlled_uses (node);
+  info->ref_descs = vNULL;
 
+  fbi.func = func;
   fbi.node = node;
   fbi.info = IPA_NODE_REF (node);
   fbi.bb_infos = vNULL;
   fbi.bb_infos.safe_grow_cleared (last_basic_block_for_fn (cfun));
   fbi.param_count = ipa_get_param_count (info);
   fbi.aa_walked = 0;
+  create_escape_structures (&fbi);
+  analyze_all_ssa_escapes (&fbi);
 
+  for (int i = 0; i < fbi.param_count; ++i)
+    {
+      tree ddef, parm = fbi.info->descriptors[i].decl;
+      if (is_gimple_reg (parm)
+	  && (ddef = ssa_default_def (cfun, parm)))
+	{
+	  struct ipa_escape *esc = &fbi.escapes[SSA_NAME_VERSION (ddef)];
+	  esc->result_index = i + 1;
+	}
+    }
+
+  int ri = fbi.param_count;
   for (struct cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
     {
       ipa_bb_info *bi = ipa_get_bb_info (&fbi, gimple_bb (cs->call_stmt));
       bi->cg_edges.safe_push (cs);
+      pick_escapes_from_call (&fbi, cs, &ri);
     }
 
   for (struct cgraph_edge *cs = node->indirect_calls; cs; cs = cs->next_callee)
     {
       ipa_bb_info *bi = ipa_get_bb_info (&fbi, gimple_bb (cs->call_stmt));
       bi->cg_edges.safe_push (cs);
+      pick_escapes_from_call (&fbi, cs, &ri);
     }
 
+  gather_picked_escapes (&fbi, ri);
   analysis_dom_walker (&fbi).walk (ENTRY_BLOCK_PTR_FOR_FN (cfun));
 
+  free_escape_structures (&fbi);
   int i;
   struct ipa_bb_info *bi;
   FOR_EACH_VEC_ELT (fbi.bb_infos, i, bi)
@@ -2412,6 +2995,271 @@ ipa_analyze_node (struct cgraph_node *no
   pop_cfun ();
 }
 
+/* Data about the current status of escape propagation. */
+
+struct escape_spreading_data
+{
+  /* To-do lists for escape spreading.  */
+  vec<cgraph_node *> up_stack;
+  vec<cgraph_node *> down_stack;
+
+  /* The current info coresponding to the node from which we are spreading
+     escaped flags.  */
+  struct ipa_node_params *info;
+};
+
+/* Put the NODE into the upward propagation work list in ESD, unless it is
+   already there.  */
+
+static void
+enque_to_propagate_escapes_up (struct escape_spreading_data *esd,
+			       struct cgraph_node *node)
+{
+  struct ipa_node_params *info = IPA_NODE_REF (node);
+  if (info->node_up_enqueued)
+    return;
+  info->node_up_enqueued = true;
+  esd->up_stack.safe_push (node);
+}
+
+/* Put the NODE into the downward propagation work list in ESD, unless it is
+   already there.  */
+
+static void
+enque_to_propagate_escapes_down (struct escape_spreading_data *esd,
+				 struct cgraph_node *node)
+{
+  struct ipa_node_params *info = IPA_NODE_REF (node);
+  if (info->node_enqueued)
+    return;
+  info->node_enqueued = true;
+  esd->down_stack.safe_push (node);
+}
+
+/* Return the escape origin from a JFUNC regardless of its type, or -1 if there
+   is none.  */
+
+static int
+escape_origin_from_jfunc (struct ipa_jump_func *jfunc)
+{
+  if (jfunc->type == IPA_JF_PASS_THROUGH)
+    return ipa_get_jf_pass_through_formal_id (jfunc);
+  else if (jfunc->type == IPA_JF_ANCESTOR)
+    return ipa_get_jf_ancestor_formal_id (jfunc);
+  else if (jfunc->type == IPA_JF_UNKNOWN
+	   && ipa_get_jf_unknown_esc_ref_valid (jfunc))
+    return ipa_get_jf_unknown_esc_ref_index (jfunc);
+  else if (jfunc->type == IPA_JF_KNOWN_TYPE
+	   && ipa_get_jf_known_type_esc_ref_valid (jfunc))
+    return ipa_get_jf_known_type_esc_ref_index (jfunc);
+  else if (jfunc->type == IPA_JF_CONST
+	   && ipa_get_jf_constant_esc_ref_valid (jfunc))
+    return ipa_get_jf_constant_esc_ref_index (jfunc);
+  else
+    return -1;
+}
+
+/* Callback of cgraph_for_node_and_aliases, spread escpe flags to callers.  */
+
+static bool
+spread_escapes_up_from_one_alias (struct cgraph_node *node, void *data)
+{
+  struct escape_spreading_data *esd = (struct escape_spreading_data *) data;
+  struct cgraph_edge *cs;
+
+  for (cs = node->callers; cs; cs = cs->next_caller)
+    {
+      if (cs->caller->thunk.thunk_p)
+	{
+	  cgraph_for_node_and_aliases (cs->caller,
+				       spread_escapes_up_from_one_alias,
+				       esd, true);
+	  continue;
+	}
+      enum availability avail;
+      cgraph_function_or_thunk_node (node, &avail);
+
+      struct ipa_node_params *caller_info = IPA_NODE_REF (cs->caller);
+      struct ipa_edge_args *args = IPA_EDGE_REF (cs);
+      int args_count = ipa_get_cs_argument_count (args);
+      int param_count = ipa_get_param_count (esd->info);
+
+      for (int i = 0; i < args_count; ++i)
+	if (i >= param_count
+	    || ipa_is_ref_escaped (esd->info, i)
+	    || avail == AVAIL_OVERWRITABLE)
+	  {
+	    struct ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
+	    int origin = escape_origin_from_jfunc (jfunc);
+	    if (origin < 0)
+	      continue;
+
+	    if (!ipa_is_ref_escaped (caller_info, origin))
+	      {
+		if (dump_file && (dump_flags & TDF_DETAILS))
+		  fprintf (dump_file, "escape propagated up (%i) from %s/%i to "
+			   "%s/%i ref %i, from arg %i\n", __LINE__,
+			   node->name (), node->order, cs->caller->name (),
+			   cs->caller->order, origin, i);
+
+		ipa_set_ref_escaped (caller_info, origin, true);
+		enque_to_propagate_escapes_up (esd, cs->caller);
+		enque_to_propagate_escapes_down (esd, cs->caller);
+	      }
+	  }
+	else if (ipa_is_ref_clobbered (esd->info, i))
+	  {
+	    struct ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
+	    int origin = escape_origin_from_jfunc (jfunc);
+	    if (origin < 0)
+	      continue;
+
+	    ipa_set_ref_callee_clobbered (caller_info, origin, true);
+	    if (!ipa_is_ref_clobbered (caller_info, origin))
+	      {
+		if (dump_file && (dump_flags & TDF_DETAILS))
+		  fprintf (dump_file, "clobbered propagated up (%i) from "
+			   "%s/%i to %s/%i ref %i, from arg %i\n", __LINE__,
+			   node->name (), node->order, cs->caller->name (),
+			   cs->caller->order, origin, i);
+
+		ipa_set_ref_clobbered (caller_info, origin, true);
+		enque_to_propagate_escapes_up (esd, cs->caller);
+	      }
+	  }
+    }
+  return false;
+}
+
+/* Spread set escape flags from ESD->node and all its aliases and thunks to
+   their callers.  */
+
+static void
+spread_escapes_up (struct escape_spreading_data *esd, cgraph_node *node)
+{
+  cgraph_for_node_and_aliases (node, spread_escapes_up_from_one_alias,
+			       esd, true);
+}
+
+/* Spread set escape flags from ESD->node to all its callees.   */
+
+static void
+spread_escapes_down (struct escape_spreading_data *esd, cgraph_node *node)
+{
+  struct cgraph_edge *cs;
+  for (cs = node->callees; cs; cs = cs->next_callee)
+    {
+      enum availability availability;
+      cgraph_node *callee = cgraph_function_node (cs->callee, &availability);
+
+      struct ipa_node_params *callee_info = IPA_NODE_REF (callee);
+      struct ipa_edge_args *args = IPA_EDGE_REF (cs);
+      int args_count = ipa_get_cs_argument_count (args);
+      int parms_count = ipa_get_param_count (callee_info);
+
+      for (int i = 0; i < parms_count; ++i)
+	if (i >= args_count)
+	  {
+	    if (!ipa_is_ref_escaped (callee_info, i))
+	      {
+		if (dump_file && (dump_flags & TDF_DETAILS))
+		  fprintf (dump_file, "escape propagated down (%i) from %s/%i "
+			   " to %s/%i ref %i\n", __LINE__, node->name (),
+			   node->order, callee->name (), callee->order, i);
+
+		ipa_set_ref_escaped (callee_info, i, true);
+		enque_to_propagate_escapes_down (esd, callee);
+	      }
+	  }
+	else
+	  {
+	    struct ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
+	    int origin = escape_origin_from_jfunc (jfunc);
+
+	    if ((origin < 0
+		 || ipa_is_ref_escaped (esd->info, origin))
+		&& !ipa_is_ref_escaped (callee_info, i))
+	      {
+		if (dump_file && (dump_flags & TDF_DETAILS))
+		  fprintf (dump_file, "escape propagated down (%i) from %s/%i "
+			   " to %s/%i ref %i, origin %i\n", __LINE__,
+			   node->name (), node->order, callee->name (),
+			   callee->order, i, origin);
+
+		ipa_set_ref_escaped (callee_info, i, true);
+		enque_to_propagate_escapes_down (esd, callee);
+	      }
+	  }
+    }
+}
+
+/* Spread escape flags through jump functions accross the call graph.  */
+
+void
+ipa_spread_escapes ()
+{
+  struct cgraph_node *node;
+  struct escape_spreading_data esd;
+  esd.up_stack = vNULL;
+  esd.down_stack = vNULL;
+
+  if (dump_file)
+    fprintf (dump_file, "\nPropagating escape flags\n");
+
+  ipa_check_create_node_params ();
+  ipa_check_create_edge_args ();
+  FOR_EACH_FUNCTION (node)
+    {
+      struct ipa_node_params *info = IPA_NODE_REF (node);
+      esd.info = info;
+      /* FIXME: This test is copied from IPA-CP but I wonder whether we
+	 should check it for all aliases too?  */
+      if (!node->local.local)
+	{
+	  /* Set escape flags corresponding to formal parameters.  */
+	  int param_count = ipa_get_param_count (esd.info);
+	  for (int i = 0; i < param_count; ++i)
+	    ipa_set_ref_escaped (info, i, true);
+	}
+
+      spread_escapes_up (&esd, node);
+      spread_escapes_down (&esd, node);
+    }
+
+  while (!esd.up_stack.is_empty ())
+    {
+      node = esd.up_stack.pop ();
+      esd.info = IPA_NODE_REF (node);
+      esd.info->node_up_enqueued = false;
+      spread_escapes_up (&esd, node);
+    }
+
+  while (!esd.down_stack.is_empty ())
+    {
+      node = esd.down_stack.pop ();
+      esd.info = IPA_NODE_REF (node);
+      esd.info->node_enqueued = false;
+      spread_escapes_down (&esd, node);
+    }
+
+  esd.up_stack.release ();
+  esd.down_stack.release ();
+
+  FOR_EACH_FUNCTION (node)
+    {
+      struct ipa_node_params *info = IPA_NODE_REF (node);
+      int param_count = ipa_get_param_count (info);
+
+      for (int i = 0; i < param_count; i++)
+	if (!ipa_is_ref_escaped (info, i))
+	  {
+	    cgraph_set_param_noescape (node, i);
+	    if (!ipa_is_ref_clobbered (info, i))
+	      cgraph_set_param_noclobber (node, i);
+	  }
+    }
+}
+
 /* Given a statement CALL which must be a GIMPLE_CALL calling an OBJ_TYPE_REF
    attempt a type-based devirtualization.  If successful, return the
    target function declaration, otherwise return NULL.  */
@@ -2423,7 +3271,7 @@ ipa_intraprocedural_devirtualization (gi
   struct ipa_jump_func jfunc;
   tree otr = gimple_call_fn (call);
 
-  jfunc.type = IPA_JF_UNKNOWN;
+  ipa_set_jf_unknown (&jfunc);
   compute_known_type_jump_func (OBJ_TYPE_REF_OBJECT (otr), &jfunc,
 				call, obj_type_ref_class (otr));
   if (jfunc.type != IPA_JF_KNOWN_TYPE)
@@ -2442,30 +3290,53 @@ ipa_intraprocedural_devirtualization (gi
   return fndecl;
 }
 
+/* Set DST to be unknown jump function.  if SRC, which must be known type jump
+   function, has a valid reference index, copy that index to DST, otherwise
+   keep DST's ref index invalid.  */
+
+static void
+make_unknown_jf_from_known_type_jf (struct ipa_jump_func *dst,
+				    struct ipa_jump_func *src)
+{
+  ipa_set_jf_unknown (dst);
+  if (ipa_get_jf_known_type_esc_ref_valid (src))
+    ipa_set_jf_unknown_ref_index (dst,
+				  ipa_get_jf_known_type_esc_ref_index (src));
+}
+
 /* Update the jump function DST when the call graph edge corresponding to SRC is
    is being inlined, knowing that DST is of type ancestor and src of known
    type.  */
 
 static void
-combine_known_type_and_ancestor_jfs (struct ipa_jump_func *src,
-				     struct ipa_jump_func *dst)
+combine_known_type_and_ancestor_jfs (struct ipa_jump_func *dst,
+				     struct ipa_jump_func *src)
 {
-  HOST_WIDE_INT combined_offset;
-  tree combined_type;
-
   if (!ipa_get_jf_ancestor_type_preserved (dst))
     {
-      dst->type = IPA_JF_UNKNOWN;
+      make_unknown_jf_from_known_type_jf (dst, src);
       return;
     }
 
-  combined_offset = ipa_get_jf_known_type_offset (src)
+  bool esc_ref_valid;
+  int  esc_ref_index = -1;
+  if (ipa_get_jf_known_type_esc_ref_valid (src))
+    {
+      esc_ref_valid = true;
+      esc_ref_index = ipa_get_jf_known_type_esc_ref_index (src);
+    }
+  else
+    esc_ref_valid = false;
+
+  HOST_WIDE_INT combined_offset = ipa_get_jf_known_type_offset (src)
     + ipa_get_jf_ancestor_offset (dst);
-  combined_type = ipa_get_jf_ancestor_type (dst);
+  tree combined_type = ipa_get_jf_ancestor_type (dst);
 
   ipa_set_jf_known_type (dst, combined_offset,
 			 ipa_get_jf_known_type_base_type (src),
 			 combined_type);
+  if (esc_ref_valid)
+    ipa_set_jf_known_type_ref_index (dst, esc_ref_index);
 }
 
 /* Update the jump functions associated with call graph edge E when the call
@@ -2478,6 +3349,7 @@ update_jump_functions_after_inlining (st
 {
   struct ipa_edge_args *top = IPA_EDGE_REF (cs);
   struct ipa_edge_args *args = IPA_EDGE_REF (e);
+  struct ipa_node_params *old_info = IPA_NODE_REF (cs->callee);
   int count = ipa_get_cs_argument_count (args);
   int i;
 
@@ -2495,14 +3367,16 @@ update_jump_functions_after_inlining (st
 	     don't.  */
 	  if (dst_fid >= ipa_get_cs_argument_count (top))
 	    {
-	      dst->type = IPA_JF_UNKNOWN;
+	      ipa_set_jf_unknown (dst);
 	      continue;
 	    }
 
 	  src = ipa_get_ith_jump_func (top, dst_fid);
 
 	  if (src->agg.items
-	      && (dst->value.ancestor.agg_preserved || !src->agg.by_ref))
+	      && (dst->value.ancestor.agg_preserved
+		  || !src->agg.by_ref
+		  || ipa_is_param_ref_safely_constant (old_info, dst_fid)))
 	    {
 	      struct ipa_agg_jf_item *item;
 	      int j;
@@ -2518,7 +3392,7 @@ update_jump_functions_after_inlining (st
 	    }
 
 	  if (src->type == IPA_JF_KNOWN_TYPE)
-	    combine_known_type_and_ancestor_jfs (src, dst);
+	    combine_known_type_and_ancestor_jfs (dst, src);
 	  else if (src->type == IPA_JF_PASS_THROUGH
 		   && src->value.pass_through.operation == NOP_EXPR)
 	    {
@@ -2538,7 +3412,7 @@ update_jump_functions_after_inlining (st
 		src->value.ancestor.type_preserved;
 	    }
 	  else
-	    dst->type = IPA_JF_UNKNOWN;
+	    ipa_set_jf_unknown (dst);
 	}
       else if (dst->type == IPA_JF_PASS_THROUGH)
 	{
@@ -2552,20 +3426,19 @@ update_jump_functions_after_inlining (st
 	      int dst_fid = dst->value.pass_through.formal_id;
 	      src = ipa_get_ith_jump_func (top, dst_fid);
 	      bool dst_agg_p = ipa_get_jf_pass_through_agg_preserved (dst);
+	      bool pass_aggs_by_ref = dst_agg_p
+		|| ipa_is_param_ref_safely_constant (old_info, dst_fid);
 
 	      switch (src->type)
 		{
 		case IPA_JF_UNKNOWN:
-		  dst->type = IPA_JF_UNKNOWN;
+		  ipa_set_jf_unknown_copy (dst, src);
 		  break;
 		case IPA_JF_KNOWN_TYPE:
 		  if (ipa_get_jf_pass_through_type_preserved (dst))
-		    ipa_set_jf_known_type (dst,
-					   ipa_get_jf_known_type_offset (src),
-					   ipa_get_jf_known_type_base_type (src),
-					   ipa_get_jf_known_type_component_type (src));
+		    ipa_set_jf_known_type_copy (dst, src);
 		  else
-		    dst->type = IPA_JF_UNKNOWN;
+		    make_unknown_jf_from_known_type_jf (dst, src);
 		  break;
 		case IPA_JF_CONST:
 		  ipa_set_jf_cst_copy (dst, src);
@@ -2614,7 +3487,7 @@ update_jump_functions_after_inlining (st
 		}
 
 	      if (src->agg.items
-		  && (dst_agg_p || !src->agg.by_ref))
+		  && (pass_aggs_by_ref || !src->agg.by_ref))
 		{
 		  /* Currently we do not produce clobber aggregate jump
 		     functions, replace with merging when we do.  */
@@ -2625,7 +3498,7 @@ update_jump_functions_after_inlining (st
 		}
 	    }
 	  else
-	    dst->type = IPA_JF_UNKNOWN;
+	    ipa_set_jf_unknown (dst);
 	}
     }
 }
@@ -2975,11 +3848,12 @@ update_indirect_edges_after_inlining (st
 {
   struct ipa_edge_args *top;
   struct cgraph_edge *ie, *next_ie, *new_direct_edge;
-  struct ipa_node_params *new_root_info;
+  struct ipa_node_params *new_root_info, *old_root_info;
   bool res = false;
 
   ipa_check_create_edge_args ();
   top = IPA_EDGE_REF (cs);
+  old_root_info = IPA_NODE_REF (cs->callee);
   new_root_info = IPA_NODE_REF (cs->caller->global.inlined_to
 				? cs->caller->global.inlined_to
 				: cs->caller);
@@ -3039,6 +3913,7 @@ update_indirect_edges_after_inlining (st
 	       && ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
 	{
 	  if ((ici->agg_contents
+	       && !ipa_is_param_ref_safely_constant (old_root_info, param_index)
 	       && !ipa_get_jf_pass_through_agg_preserved (jfunc))
 	      || (ici->polymorphic
 		  && !ipa_get_jf_pass_through_type_preserved (jfunc)))
@@ -3049,6 +3924,7 @@ update_indirect_edges_after_inlining (st
       else if (jfunc->type == IPA_JF_ANCESTOR)
 	{
 	  if ((ici->agg_contents
+	       && !ipa_is_param_ref_safely_constant (old_root_info, param_index)
 	       && !ipa_get_jf_ancestor_agg_preserved (jfunc))
 	      || (ici->polymorphic
 		  && !ipa_get_jf_ancestor_type_preserved (jfunc)))
@@ -3286,6 +4162,7 @@ void
 ipa_free_node_params_substructures (struct ipa_node_params *info)
 {
   info->descriptors.release ();
+  info->ref_descs.release ();
   free (info->lattices);
   /* Lattice values and their sources are deallocated with their alocation
      pool.  */
@@ -3461,6 +4338,7 @@ ipa_node_duplication_hook (struct cgraph
   new_info = IPA_NODE_REF (dst);
 
   new_info->descriptors = old_info->descriptors.copy ();
+  new_info->ref_descs = old_info->ref_descs.copy ();
   new_info->lattices = NULL;
   new_info->ipcp_orig_node = old_info->ipcp_orig_node;
 
@@ -3577,7 +4455,7 @@ ipa_free_all_structures_after_iinln (voi
 void
 ipa_print_node_params (FILE *f, struct cgraph_node *node)
 {
-  int i, count;
+  unsigned count;
   struct ipa_node_params *info;
 
   if (!node->definition)
@@ -3586,7 +4464,7 @@ ipa_print_node_params (FILE *f, struct c
   fprintf (f, "  function  %s/%i parameter descriptors:\n",
 	   node->name (), node->order);
   count = ipa_get_param_count (info);
-  for (i = 0; i < count; i++)
+  for (unsigned i = 0; i < count; i++)
     {
       int c;
 
@@ -3598,7 +4476,38 @@ ipa_print_node_params (FILE *f, struct c
       if (c == IPA_UNDESCRIBED_USE)
 	fprintf (f, " undescribed_use");
       else
-	fprintf (f, "  controlled_uses=%i", c);
+	fprintf (f, " controlled_uses=%i", c);
+      if (ipa_get_tracked_refs_count (info) > 0)
+	{
+	  if (ipa_is_ref_escaped (info, i))
+	    fprintf (f, " escaped");
+	  else
+	    fprintf (f, " not_esc %s %s",
+		     ipa_is_ref_clobbered (info, i) ? "clobber" : "not_clobber",
+		     ipa_is_ref_callee_clobbered (info, i) ? "call_clobber"
+		     : "not_call_clobber");
+	}
+      fprintf (f, "\n");
+    }
+
+  if ((unsigned) ipa_get_tracked_refs_count (info) > count)
+    {
+      fprintf (f, "   The rest of reference escaped flags: ");
+      bool first = true;
+      for (int i = count; i < ipa_get_tracked_refs_count (info); ++i)
+	{
+	  if (!first)
+	    fprintf (f, ", ");
+	  else
+	    first = false;
+	  if (ipa_is_ref_escaped (info, i))
+	    fprintf (f, "%i: esc", i);
+	  else
+	    fprintf (f, "%i: not_esc %s %s", i,
+		     ipa_is_ref_clobbered (info, i) ? "clobber" : "not_clobber",
+		     ipa_is_ref_callee_clobbered (info, i) ? "call_clobber"
+		     : "not_call_clobber");
+	}
       fprintf (f, "\n");
     }
 }
@@ -4378,16 +5287,34 @@ ipa_write_jump_function (struct output_b
   switch (jump_func->type)
     {
     case IPA_JF_UNKNOWN:
+      bp = bitpack_create (ob->main_stream);
+      bp_pack_value (&bp, ipa_get_jf_unknown_esc_ref_valid (jump_func), 1);
+      streamer_write_bitpack (&bp);
+      if (ipa_get_jf_unknown_esc_ref_valid (jump_func))
+	streamer_write_uhwi (ob, ipa_get_jf_unknown_esc_ref_index (jump_func));
       break;
     case IPA_JF_KNOWN_TYPE:
       streamer_write_uhwi (ob, jump_func->value.known_type.offset);
       stream_write_tree (ob, jump_func->value.known_type.base_type, true);
       stream_write_tree (ob, jump_func->value.known_type.component_type, true);
+      bp = bitpack_create (ob->main_stream);
+      bp_pack_value (&bp, ipa_get_jf_known_type_esc_ref_valid (jump_func), 1);
+      streamer_write_bitpack (&bp);
+      if (ipa_get_jf_known_type_esc_ref_valid (jump_func))
+	streamer_write_uhwi (ob,
+			     ipa_get_jf_known_type_esc_ref_index (jump_func));
       break;
     case IPA_JF_CONST:
       gcc_assert (
 	  EXPR_LOCATION (jump_func->value.constant.value) == UNKNOWN_LOCATION);
       stream_write_tree (ob, jump_func->value.constant.value, true);
+
+      bp = bitpack_create (ob->main_stream);
+      bp_pack_value (&bp, ipa_get_jf_constant_esc_ref_valid (jump_func), 1);
+      streamer_write_bitpack (&bp);
+      if (ipa_get_jf_constant_esc_ref_valid (jump_func))
+	streamer_write_uhwi (ob,
+			     ipa_get_jf_constant_esc_ref_index (jump_func));
       break;
     case IPA_JF_PASS_THROUGH:
       streamer_write_uhwi (ob, jump_func->value.pass_through.operation);
@@ -4448,19 +5375,44 @@ ipa_read_jump_function (struct lto_input
   switch (jftype)
     {
     case IPA_JF_UNKNOWN:
-      jump_func->type = IPA_JF_UNKNOWN;
+      {
+	ipa_set_jf_unknown (jump_func);
+	struct bitpack_d bp = streamer_read_bitpack (ib);
+	bool esc_ref_valid = bp_unpack_value (&bp, 1);
+	if (esc_ref_valid)
+	  {
+	    unsigned esc_ref_idx = streamer_read_uhwi (ib);
+	    ipa_set_jf_unknown_ref_index (jump_func, esc_ref_idx);
+	  }
+      }
       break;
     case IPA_JF_KNOWN_TYPE:
       {
 	HOST_WIDE_INT offset = streamer_read_uhwi (ib);
 	tree base_type = stream_read_tree (ib, data_in);
 	tree component_type = stream_read_tree (ib, data_in);
+	struct bitpack_d bp = streamer_read_bitpack (ib);
+	bool esc_ref_valid = bp_unpack_value (&bp, 1);
 
 	ipa_set_jf_known_type (jump_func, offset, base_type, component_type);
+	if (esc_ref_valid)
+	  {
+	    unsigned esc_ref_idx = streamer_read_uhwi (ib);
+	    ipa_set_jf_known_type_ref_index (jump_func, esc_ref_idx);
+	  }
 	break;
       }
     case IPA_JF_CONST:
-      ipa_set_jf_constant (jump_func, stream_read_tree (ib, data_in), cs);
+      {
+	ipa_set_jf_constant (jump_func, stream_read_tree (ib, data_in), cs);
+	struct bitpack_d bp = streamer_read_bitpack (ib);
+	bool esc_ref_valid = bp_unpack_value (&bp, 1);
+	if (esc_ref_valid)
+	  {
+	    unsigned esc_ref_idx = streamer_read_uhwi (ib);
+	    ipa_set_jf_constant_ref_index (jump_func, esc_ref_idx);
+	  }
+      }
       break;
     case IPA_JF_PASS_THROUGH:
       operation = (enum tree_code) streamer_read_uhwi (ib);
@@ -4592,12 +5544,27 @@ ipa_write_node_info (struct output_block
   gcc_assert (info->analysis_done
 	      || ipa_get_param_count (info) == 0);
   gcc_assert (!info->node_enqueued);
+  gcc_assert (!info->node_up_enqueued);
   gcc_assert (!info->ipcp_orig_node);
   for (j = 0; j < ipa_get_param_count (info); j++)
     bp_pack_value (&bp, ipa_is_param_used (info, j), 1);
   streamer_write_bitpack (&bp);
   for (j = 0; j < ipa_get_param_count (info); j++)
     streamer_write_hwi (ob, ipa_get_controlled_uses (info, j));
+
+  streamer_write_uhwi (ob, ipa_get_tracked_refs_count (info));
+  if (ipa_get_tracked_refs_count (info) > 0)
+    {
+      bp = bitpack_create (ob->main_stream);
+      for (int i = 0; i < ipa_get_tracked_refs_count (info); ++i)
+	{
+	  bp_pack_value (&bp, ipa_is_ref_escaped (info, i), 1);
+	  bp_pack_value (&bp, ipa_is_ref_clobbered (info, i), 1);
+	  bp_pack_value (&bp, ipa_is_ref_callee_clobbered (info, i), 1);
+	}
+      streamer_write_bitpack (&bp);
+    }
+
   for (e = node->callees; e; e = e->next_callee)
     {
       struct ipa_edge_args *args = IPA_EDGE_REF (e);
@@ -4632,15 +5599,30 @@ ipa_read_node_info (struct lto_input_blo
 
   for (k = 0; k < ipa_get_param_count (info); k++)
     info->descriptors[k].move_cost = streamer_read_uhwi (ib);
-    
+
   bp = streamer_read_bitpack (ib);
   if (ipa_get_param_count (info) != 0)
     info->analysis_done = true;
   info->node_enqueued = false;
+  info->node_up_enqueued = false;
   for (k = 0; k < ipa_get_param_count (info); k++)
     ipa_set_param_used (info, k, bp_unpack_value (&bp, 1));
   for (k = 0; k < ipa_get_param_count (info); k++)
     ipa_set_controlled_uses (info, k, streamer_read_hwi (ib));
+
+  unsigned ref_count = streamer_read_uhwi (ib);
+  if (ref_count > 0)
+    {
+      bp = streamer_read_bitpack (ib);
+      info->ref_descs.safe_grow_cleared (ref_count);
+      for (unsigned i = 0; i < ref_count; ++i)
+	{
+	  ipa_set_ref_escaped (info, i, bp_unpack_value (&bp, 1));
+	  ipa_set_ref_clobbered (info, i, bp_unpack_value (&bp, 1));
+	  ipa_set_ref_callee_clobbered (info, i, bp_unpack_value (&bp, 1));
+	}
+    }
+
   for (e = node->callees; e; e = e->next_callee)
     {
       struct ipa_edge_args *args = IPA_EDGE_REF (e);
@@ -4830,7 +5812,7 @@ read_agg_replacement_chain (struct lto_i
   unsigned int count, i;
 
   count = streamer_read_uhwi (ib);
-  for (i = 0; i <count; i++)
+  for (i = 0; i < count; i++)
     {
       struct ipa_agg_replacement_value *av;
       struct bitpack_d bp;
@@ -5134,6 +6116,8 @@ ipcp_transform_function (struct cgraph_n
   fbi.bb_infos.safe_grow_cleared (last_basic_block_for_fn (cfun));
   fbi.param_count = param_count;
   fbi.aa_walked = 0;
+  fbi.escapes = vNULL;
+  fbi.decl_escapes = NULL;
 
   descriptors.safe_grow_cleared (param_count);
   ipa_populate_param_decls (node, descriptors);
Index: src/gcc/ipa-prop.h
===================================================================
--- src.orig/gcc/ipa-prop.h
+++ src/gcc/ipa-prop.h
@@ -73,6 +73,17 @@ enum jump_func_type
   IPA_JF_ANCESTOR	    /* represented by field ancestor */
 };
 
+
+/* Structure describing data which are genreally unknown at compile time, yet
+   may have some useful properties.  */
+struct GTY (()) ipa_unknown_data
+{
+  /* If True, the next field contains valid index.  */
+  unsigned escape_ref_valid : 1;
+  /* Index into escaped_ref flags that describe data this refers to.  */
+  unsigned escape_ref_index : 31;
+};
+
 /* Structure holding data required to describe a known type jump function.  */
 struct GTY(()) ipa_known_type_data
 {
@@ -82,6 +93,10 @@ struct GTY(()) ipa_known_type_data
   tree base_type;
   /* Type of the component of the object that is being described.  */
   tree component_type;
+  /* If True, the next field contains valid index.  */
+  unsigned escape_ref_valid : 1;
+  /* Index into escaped_ref flags that describe data this refers to.  */
+  unsigned escape_ref_index : 31;
 };
 
 struct ipa_cst_ref_desc;
@@ -93,6 +108,10 @@ struct GTY(()) ipa_constant_data
   tree value;
   /* Pointer to the structure that describes the reference.  */
   struct ipa_cst_ref_desc GTY((skip)) *rdesc;
+  /* If True, the next field contains valid index.  */
+  unsigned escape_ref_valid : 1;
+  /* Index into escaped_ref flags that describe data this refers to.  */
+  unsigned escape_ref_index : 31;
 };
 
 /* Structure holding data required to describe a pass-through jump function.  */
@@ -187,11 +206,10 @@ struct GTY (()) ipa_jump_func
   struct ipa_agg_jump_function agg;
 
   enum jump_func_type type;
-  /* Represents a value of a jump function.  pass_through is used only in jump
-     function context.  constant represents the actual constant in constant jump
-     functions and member_cst holds constant c++ member functions.  */
+  /* Represents a value of a jump function.  */
   union jump_func_value
   {
+    struct ipa_unknown_data GTY ((tag ("IPA_JF_UNKNOWN"))) unknown;
     struct ipa_known_type_data GTY ((tag ("IPA_JF_KNOWN_TYPE"))) known_type;
     struct ipa_constant_data GTY ((tag ("IPA_JF_CONST"))) constant;
     struct ipa_pass_through_data GTY ((tag ("IPA_JF_PASS_THROUGH"))) pass_through;
@@ -199,6 +217,26 @@ struct GTY (()) ipa_jump_func
   } GTY ((desc ("%1.type"))) value;
 };
 
+/* Return whether the unknown jump function JFUNC has an associated valid index
+   into callers escaped_ref flags.  */
+
+static inline bool
+ipa_get_jf_unknown_esc_ref_valid (struct ipa_jump_func *jfunc)
+{
+  gcc_checking_assert (jfunc->type == IPA_JF_UNKNOWN);
+  return jfunc->value.unknown.escape_ref_valid;
+}
+
+/* Return the index into escaped ref flags of the caller that corresponds to
+   data described by an unknown jump function JFUNC.  */
+
+static inline int
+ipa_get_jf_unknown_esc_ref_index (struct ipa_jump_func *jfunc)
+{
+  gcc_checking_assert (jfunc->type == IPA_JF_UNKNOWN);
+  gcc_checking_assert (ipa_get_jf_unknown_esc_ref_valid (jfunc));
+  return jfunc->value.unknown.escape_ref_index;
+}
 
 /* Return the offset of the component that is described by a known type jump
    function JFUNC.  */
@@ -228,6 +266,27 @@ ipa_get_jf_known_type_component_type (st
   return jfunc->value.known_type.component_type;
 }
 
+/* Return whether the described known type jump functiion JFUNC has a valid
+   index into callers escaped_ref flags.  */
+
+static inline bool
+ipa_get_jf_known_type_esc_ref_valid (struct ipa_jump_func *jfunc)
+{
+  gcc_checking_assert (jfunc->type == IPA_JF_KNOWN_TYPE);
+  return jfunc->value.known_type.escape_ref_valid;
+}
+
+/* Return the index into escaped ref flags of the caller that corresponds to
+   data described by a known type jump function.  */
+
+static inline int
+ipa_get_jf_known_type_esc_ref_index (struct ipa_jump_func *jfunc)
+{
+  gcc_checking_assert (jfunc->type == IPA_JF_KNOWN_TYPE);
+  gcc_checking_assert (ipa_get_jf_known_type_esc_ref_valid (jfunc));
+  return jfunc->value.known_type.escape_ref_index;
+}
+
 /* Return the constant stored in a constant jump functin JFUNC.  */
 
 static inline tree
@@ -237,6 +296,8 @@ ipa_get_jf_constant (struct ipa_jump_fun
   return jfunc->value.constant.value;
 }
 
+/* Return the reference description stored in constant jump function JFUNC.  */
+
 static inline struct ipa_cst_ref_desc *
 ipa_get_jf_constant_rdesc (struct ipa_jump_func *jfunc)
 {
@@ -244,6 +305,27 @@ ipa_get_jf_constant_rdesc (struct ipa_ju
   return jfunc->value.constant.rdesc;
 }
 
+/* Return whether the described known type jump functiion JFUNC has a valid
+   index into callers escaped_ref flags.  */
+
+static inline bool
+ipa_get_jf_constant_esc_ref_valid (struct ipa_jump_func *jfunc)
+{
+  gcc_checking_assert (jfunc->type == IPA_JF_CONST);
+  return jfunc->value.constant.escape_ref_valid;
+}
+
+/* Return the index into escaped ref flags of the caller that corresponds to
+   data described by a known type jump function.  */
+
+static inline int
+ipa_get_jf_constant_esc_ref_index (struct ipa_jump_func *jfunc)
+{
+  gcc_checking_assert (jfunc->type == IPA_JF_CONST);
+  gcc_checking_assert (ipa_get_jf_constant_esc_ref_valid (jfunc));
+  return jfunc->value.constant.escape_ref_index;
+}
+
 /* Return the operand of a pass through jmp function JFUNC.  */
 
 static inline tree
@@ -346,13 +428,41 @@ struct ipa_param_descriptor
      says how many there are.  If any use could not be described by means of
      ipa-prop structures, this is IPA_UNDESCRIBED_USE.  */
   int controlled_uses;
-  unsigned int move_cost : 31;
+  unsigned int move_cost : 30;
   /* The parameter is used.  */
   unsigned used : 1;
 };
 
 struct ipcp_lattice;
 
+/* Interproceduaral information about references that we try to prove have not
+   escaped, among other properties.  We keep this information for all formal
+   parameters, even when they are not in fact references, so that indices into
+   param descriptors match those to reference descriptors, however we also keep
+   it for some other references that we pass as actual arguments to callees,
+   their indices must be derived from jump functions.
+
+   These flags hold results of intraprocedural summary gathering and
+   intermediate values during interprocedural propagation, as opposed to
+   corresponding bitmaps in cgraph_node which hold final results.  After
+   ipa_spread_escapes finishes, the corresponding bits in both structures are
+   the same, however ipa_ref_descriptor is freed at the end of the IPA
+   analysis stage.  */
+
+struct ipa_ref_descriptor
+{
+  /* Set if the reference could have escaped.  */
+  unsigned int escaped : 1;
+  /* Valid only if escaped is false.  Set when the memory the reference refers
+     to could have been written to in this function or in any of the
+     callees.  */
+  unsigned int clobbered : 1;
+  /* Valid only if escaped is false.  Set when the memory the reference refers
+     to could have been written to in any of the callees this function has
+     (i.e. disregarding any modifications in this particular function).  */
+  unsigned int callee_clobbered : 1;
+};
+
 /* ipa_node_params stores information related to formal parameters of functions
    and some other information for interprocedural passes that operate on
    parameters (such as ipa-cp).  */
@@ -362,6 +472,11 @@ struct ipa_node_params
   /* Information about individual formal parameters that are gathered when
      summaries are generated. */
   vec<ipa_param_descriptor> descriptors;
+
+  /* Escape and other information about formal parameters and also some
+     references passed as actual parameters to callees. */
+  vec<ipa_ref_descriptor> ref_descs;
+
   /* Pointer to an array of structures describing individual formal
      parameters.  */
   struct ipcp_param_lattices *lattices;
@@ -374,8 +489,12 @@ struct ipa_node_params
   /* Whether the param uses analysis and jump function computation has already
      been performed.  */
   unsigned analysis_done : 1;
-  /* Whether the function is enqueued in ipa-cp propagation stack.  */
+  /* Whether the function is enqueued in ipa-cp propagation stack or when
+     propagating escape flags "downwards" (i.e. from callers to callees).  */
   unsigned node_enqueued : 1;
+  /* Whether the function is enqueued in a to-do list of "upwards" escape flag
+     propagation (i.e. from callees to callers).  */
+  unsigned node_up_enqueued : 1;
   /* Whether we should create a specialized version based on values that are
      known to be constant in all contexts.  */
   unsigned do_clone_for_all_contexts : 1;
@@ -452,6 +571,45 @@ ipa_is_param_used (struct ipa_node_param
   return info->descriptors[i].used;
 }
 
+/* Return if reference number I (there are more of them than parameters, we
+   also have this information for some actual arguments passed to callees) of
+   the function associated with INFO has uncontrollably escaped.  */
+
+static inline bool
+ipa_is_ref_escaped (struct ipa_node_params *info, int i)
+{
+  return info->ref_descs[i].escaped;
+}
+
+/* Return if the reference number I tracked in function corresponding to INFO
+   is clobbered in any way during the run of the function.  */
+
+static inline bool
+ipa_is_ref_clobbered (struct ipa_node_params *info, int i)
+{
+  return info->ref_descs[i].clobbered;
+}
+
+/* Return if the reference number I tracked in function corresponding to INFO
+   is clobbered in any way during the run of the function.  */
+
+static inline bool
+ipa_is_ref_callee_clobbered (struct ipa_node_params *info, int i)
+{
+  return info->ref_descs[i].callee_clobbered;
+}
+
+/* Return true iff we know that the the Ith parameter of function described by
+   INFO does not escape and that it or any pointers derived from it are not
+   used as a base for a memory write in the node described by INFO and all its
+   (even indirect) callees.  */
+
+static inline bool
+ipa_is_param_ref_safely_constant (struct ipa_node_params *info, int i)
+{
+  return !ipa_is_ref_escaped (info, i) && !ipa_is_ref_clobbered (info, i);
+}
+
 /* Information about replacements done in aggregates for a given node (each
    node has its linked list).  */
 struct GTY(()) ipa_agg_replacement_value
@@ -589,6 +747,7 @@ tree ipa_intraprocedural_devirtualizatio
 
 /* Functions related to both.  */
 void ipa_analyze_node (struct cgraph_node *);
+void ipa_spread_escapes ();
 
 /* Aggregate jump function related functions.  */
 tree ipa_find_agg_cst_for_param (struct ipa_agg_jump_function *, HOST_WIDE_INT,
Index: src/gcc/ipa-cp.c
===================================================================
--- src.orig/gcc/ipa-cp.c
+++ src/gcc/ipa-cp.c
@@ -1311,12 +1311,17 @@ merge_aggregate_lattices (struct cgraph_
    rules about propagating values passed by reference.  */
 
 static bool
-agg_pass_through_permissible_p (struct ipcp_param_lattices *src_plats,
+agg_pass_through_permissible_p (struct ipa_node_params *caller_info,
+				struct ipcp_param_lattices *src_plats,
 				struct ipa_jump_func *jfunc)
 {
-  return src_plats->aggs
-    && (!src_plats->aggs_by_ref
-	|| ipa_get_jf_pass_through_agg_preserved (jfunc));
+  if (!src_plats->aggs)
+    return false;
+
+  return !src_plats->aggs_by_ref
+    || ipa_is_param_ref_safely_constant (caller_info,
+				 ipa_get_jf_pass_through_formal_id (jfunc))
+    || ipa_get_jf_pass_through_agg_preserved (jfunc);
 }
 
 /* Propagate scalar values across jump function JFUNC that is associated with
@@ -1340,7 +1345,7 @@ propagate_aggs_accross_jump_function (st
       struct ipcp_param_lattices *src_plats;
 
       src_plats = ipa_get_parm_lattices (caller_info, src_idx);
-      if (agg_pass_through_permissible_p (src_plats, jfunc))
+      if (agg_pass_through_permissible_p (caller_info, src_plats, jfunc))
 	{
 	  /* Currently we do not produce clobber aggregate jump
 	     functions, replace with merging when we do.  */
@@ -1351,15 +1356,16 @@ propagate_aggs_accross_jump_function (st
       else
 	ret |= set_agg_lats_contain_variable (dest_plats);
     }
-  else if (jfunc->type == IPA_JF_ANCESTOR
-	   && ipa_get_jf_ancestor_agg_preserved (jfunc))
+  else if (jfunc->type == IPA_JF_ANCESTOR)
     {
       struct ipa_node_params *caller_info = IPA_NODE_REF (cs->caller);
       int src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
       struct ipcp_param_lattices *src_plats;
 
       src_plats = ipa_get_parm_lattices (caller_info, src_idx);
-      if (src_plats->aggs && src_plats->aggs_by_ref)
+      if (src_plats->aggs && src_plats->aggs_by_ref
+	  && (ipa_is_param_ref_safely_constant (caller_info, src_idx)
+	      || ipa_get_jf_ancestor_agg_preserved (jfunc)))
 	{
 	  /* Currently we do not produce clobber aggregate jump
 	     functions, replace with merging when we do.  */
@@ -1367,7 +1373,7 @@ propagate_aggs_accross_jump_function (st
 	  ret |= merge_aggregate_lattices (cs, dest_plats, src_plats, src_idx,
 					   ipa_get_jf_ancestor_offset (jfunc));
 	}
-      else if (!src_plats->aggs_by_ref)
+      else if (src_plats->aggs && !src_plats->aggs_by_ref)
 	ret |= set_agg_lats_to_bottom (dest_plats);
       else
 	ret |= set_agg_lats_contain_variable (dest_plats);
@@ -3037,39 +3043,49 @@ intersect_aggregates_with_edge (struct c
 	  struct ipcp_param_lattices *orig_plats;
 	  orig_plats = ipa_get_parm_lattices (IPA_NODE_REF (orig_node),
 					      src_idx);
-	  if (agg_pass_through_permissible_p (orig_plats, jfunc))
+	  if (!agg_pass_through_permissible_p (caller_info, orig_plats, jfunc))
 	    {
-	      if (!inter.exists ())
-		inter = agg_replacements_to_vector (cs->caller, src_idx, 0);
-	      else
-		intersect_with_agg_replacements (cs->caller, src_idx,
-						 &inter, 0);
+	      inter.release ();
+	      return vNULL;
 	    }
+	  if (!inter.exists ())
+	    inter = agg_replacements_to_vector (cs->caller, src_idx, 0);
+	  else
+	    intersect_with_agg_replacements (cs->caller, src_idx,
+					     &inter, 0);
 	}
       else
 	{
 	  struct ipcp_param_lattices *src_plats;
 	  src_plats = ipa_get_parm_lattices (caller_info, src_idx);
-	  if (agg_pass_through_permissible_p (src_plats, jfunc))
+	  if (!agg_pass_through_permissible_p (caller_info, src_plats, jfunc))
 	    {
-	      /* Currently we do not produce clobber aggregate jump
-		 functions, adjust when we do.  */
-	      gcc_checking_assert (!jfunc->agg.items);
-	      if (!inter.exists ())
-		inter = copy_plats_to_inter (src_plats, 0);
-	      else
-		intersect_with_plats (src_plats, &inter, 0);
+	      inter.release ();
+	      return vNULL;
 	    }
+	  /* Currently we do not produce clobber aggregate jump functions,
+	     adjust when we do.  */
+	  gcc_checking_assert (!jfunc->agg.items);
+	  if (!inter.exists ())
+	    inter = copy_plats_to_inter (src_plats, 0);
+	  else
+	    intersect_with_plats (src_plats, &inter, 0);
 	}
     }
-  else if (jfunc->type == IPA_JF_ANCESTOR
-	   && ipa_get_jf_ancestor_agg_preserved (jfunc))
+  else if (jfunc->type == IPA_JF_ANCESTOR)
     {
       struct ipa_node_params *caller_info = IPA_NODE_REF (cs->caller);
       int src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
       struct ipcp_param_lattices *src_plats;
       HOST_WIDE_INT delta = ipa_get_jf_ancestor_offset (jfunc);
 
+      if (!ipa_is_param_ref_safely_constant (caller_info, src_idx)
+	  && !ipa_get_jf_ancestor_agg_preserved (jfunc))
+	{
+	  inter.release ();
+	  return vNULL;
+	}
+
       if (caller_info->ipcp_orig_node)
 	{
 	  if (!inter.exists ())
@@ -3115,9 +3131,8 @@ intersect_aggregates_with_edge (struct c
 		  break;
 		if (ti->offset == item->offset)
 		  {
-		    gcc_checking_assert (ti->value);
-		    if (values_equal_for_ipcp_p (item->value,
-						 ti->value))
+		    if (ti->value
+			&& values_equal_for_ipcp_p (item->value, ti->value))
 		      found = true;
 		    break;
 		  }
@@ -3686,6 +3701,9 @@ ipcp_driver (void)
 
   ipa_check_create_node_params ();
   ipa_check_create_edge_args ();
+
+  ipa_spread_escapes ();
+
   grow_edge_clone_vectors ();
   edge_duplication_hook_holder =
     cgraph_add_edge_duplication_hook (&ipcp_edge_duplication_hook, NULL);
Index: src/gcc/ipa-inline.c
===================================================================
--- src.orig/gcc/ipa-inline.c
+++ src/gcc/ipa-inline.c
@@ -2143,6 +2143,9 @@ ipa_inline (void)
   if (!optimize)
     return 0;
 
+  if (!flag_ipa_cp)
+    ipa_spread_escapes ();
+
   order = XCNEWVEC (struct cgraph_node *, cgraph_n_nodes);
 
   if (in_lto_p && optimize)
Index: src/gcc/testsuite/gcc.dg/ipa/ipcp-agg-10.c
===================================================================
--- /dev/null
+++ src/gcc/testsuite/gcc.dg/ipa/ipcp-agg-10.c
@@ -0,0 +1,35 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-ipa-sra -fdump-ipa-cp-details"  } */
+/* { dg-add-options bind_pic_locally } */
+
+volatile int g1, g2;
+
+static void __attribute__ ((noinline))
+bar (int *i)
+{
+  g1 = *i;
+}
+
+static void __attribute__ ((noinline))
+foo (int *i)
+{
+  bar (i);
+  bar (i);
+
+  g2 = *i;
+}
+
+int
+main (int argc, char **argv)
+{
+  int i = 8;
+
+  foo (&i);
+
+  return 0;
+}
+
+/* { dg-final { scan-ipa-dump "Creating a specialized node of foo.*for all known contexts" "cp" } } */
+/* { dg-final { scan-ipa-dump "Creating a specialized node of bar.*for all known contexts" "cp" } } */
+/* { dg-final { scan-ipa-dump-times "= 8" 6 "cp" } } */
+/* { dg-final { cleanup-ipa-dump "cp" } } */
Index: src/gcc/cgraph.c
===================================================================
--- src.orig/gcc/cgraph.c
+++ src/gcc/cgraph.c
@@ -3174,4 +3174,47 @@ gimple_check_call_matching_types (gimple
   return true;
 }
 
+/* Return true if parameter number I of NODE is marked as known not to
+   escape.  */
+
+bool
+cgraph_param_noescape_p (cgraph_node *node, int i)
+{
+  return node->global.noescape_parameters
+    && bitmap_bit_p (node->global.noescape_parameters, i);
+}
+
+/* Mark parameter of NODE numbeR I as known not to escape.  */
+
+void
+cgraph_set_param_noescape (cgraph_node *node, int i)
+{
+  if (!node->global.noescape_parameters)
+    node->global.noescape_parameters = BITMAP_GGC_ALLOC ();
+  bitmap_set_bit (node->global.noescape_parameters, i);
+}
+
+/* Return true if memory accessible through parameter number I of NODE is
+   marked as known not be clobbered.  */
+
+bool
+cgraph_param_noclobber_p (cgraph_node *node, int i)
+{
+  return node->global.noclobber_parameters
+    && bitmap_bit_p (node->global.noclobber_parameters, i);
+}
+
+/* Mark memory reachable by parameter number I of NODE as known not to be
+   clobbered.  */
+
+void
+cgraph_set_param_noclobber (cgraph_node *node, int i)
+{
+  if (!node->global.noclobber_parameters)
+    node->global.noclobber_parameters = BITMAP_GGC_ALLOC ();
+  bitmap_set_bit (node->global.noclobber_parameters, i);
+}
+
+
+
 #include "gt-cgraph.h"
Index: src/gcc/cgraph.h
===================================================================
--- src.orig/gcc/cgraph.h
+++ src/gcc/cgraph.h
@@ -227,6 +227,13 @@ struct GTY(()) cgraph_global_info {
   /* For inline clones this points to the function they will be
      inlined into.  */
   struct cgraph_node *inlined_to;
+
+  /* Parameters that are known not to escape from this function.  */
+  bitmap noescape_parameters;
+
+  /* Parameters for which the memory reached by the them is known not to be
+     clobbered.  */
+  bitmap noclobber_parameters;
 };
 
 /* Information about the function that is propagated by the RTL backend.
@@ -870,6 +877,11 @@ void cgraph_speculative_call_info (struc
 				   struct ipa_ref *&);
 extern bool gimple_check_call_matching_types (gimple, tree, bool);
 
+extern bool cgraph_param_noescape_p (cgraph_node *node, int i);
+extern void cgraph_set_param_noescape (cgraph_node *node, int i);
+extern bool cgraph_param_noclobber_p (cgraph_node *node, int i);
+extern void cgraph_set_param_noclobber (cgraph_node *node, int i);
+
 /* In cgraphunit.c  */
 struct asm_node *add_asm_node (tree);
 extern FILE *cgraph_dump_file;
Index: src/gcc/cgraphclones.c
===================================================================
--- src.orig/gcc/cgraphclones.c
+++ src/gcc/cgraphclones.c
@@ -338,6 +338,8 @@ duplicate_thunk_for_node (cgraph_node *t
   gcc_checking_assert (!DECL_INITIAL (new_decl));
   gcc_checking_assert (!DECL_RESULT (new_decl));
   gcc_checking_assert (!DECL_RTL_SET_P (new_decl));
+  gcc_checking_assert (!thunk->global.noescape_parameters
+		       && !thunk->global.noclobber_parameters);
 
   DECL_NAME (new_decl) = clone_function_name (thunk->decl, "artificial_thunk");
   SET_DECL_ASSEMBLER_NAME (new_decl, DECL_NAME (new_decl));
@@ -375,6 +377,26 @@ redirect_edge_duplicating_thunks (struct
   cgraph_redirect_edge_callee (e, n);
 }
 
+/* Copy global.noescape_parameters and global.noclobber_parameters of SRC to
+   DEST.  */
+
+static void
+copy_noescape_noclobber_bitmaps (cgraph_node *dst, cgraph_node *src)
+{
+  if (src->global.noescape_parameters)
+    {
+      dst->global.noescape_parameters = BITMAP_GGC_ALLOC ();
+      bitmap_copy (dst->global.noescape_parameters,
+		   src->global.noescape_parameters);
+    }
+  if (src->global.noclobber_parameters)
+    {
+      dst->global.noclobber_parameters = BITMAP_GGC_ALLOC ();
+      bitmap_copy (dst->global.noclobber_parameters,
+		   src->global.noclobber_parameters);
+    }
+}
+
 /* Create node representing clone of N executed COUNT times.  Decrease
    the execution counts from original node too.
    The new clone will have decl set to DECL that may or may not be the same
@@ -418,8 +440,8 @@ cgraph_clone_node (struct cgraph_node *n
   new_node->local = n->local;
   new_node->externally_visible = false;
   new_node->local.local = true;
-  new_node->global = n->global;
   new_node->global.inlined_to = new_inlined_to;
+  copy_noescape_noclobber_bitmaps (new_node, n);
   new_node->rtl = n->rtl;
   new_node->count = count;
   new_node->frequency = n->frequency;
@@ -883,7 +905,8 @@ cgraph_copy_node_for_versioning (struct
    new_version->local = old_version->local;
    new_version->externally_visible = false;
    new_version->local.local = new_version->definition;
-   new_version->global = old_version->global;
+   new_version->global.inlined_to = old_version->global.inlined_to;
+   copy_noescape_noclobber_bitmaps (new_version, old_version);
    new_version->rtl = old_version->rtl;
    new_version->count = old_version->count;
 
Index: src/gcc/lto-cgraph.c
===================================================================
--- src.orig/gcc/lto-cgraph.c
+++ src/gcc/lto-cgraph.c
@@ -1626,6 +1626,24 @@ output_edge_opt_summary (struct output_b
 {
 }
 
+/* Output a bitmap BMP.  Aimed primarily at bitmaps describing parameters in
+   cgraph_node.  */
+
+static void
+output_param_bitmap (struct output_block *ob, bitmap bmp)
+{
+  if (bmp)
+    {
+      unsigned int index;
+      bitmap_iterator bi;
+      streamer_write_uhwi (ob, bitmap_count_bits (bmp));
+      EXECUTE_IF_SET_IN_BITMAP (bmp, 0, index, bi)
+	streamer_write_uhwi (ob, index);
+    }
+  else
+    streamer_write_uhwi (ob, 0);
+}
+
 /* Output optimization summary for NODE to OB.  */
 
 static void
@@ -1633,29 +1651,15 @@ output_node_opt_summary (struct output_b
 			 struct cgraph_node *node,
 			 lto_symtab_encoder_t encoder)
 {
-  unsigned int index;
-  bitmap_iterator bi;
   struct ipa_replace_map *map;
   struct bitpack_d bp;
   int i;
   struct cgraph_edge *e;
 
-  if (node->clone.args_to_skip)
-    {
-      streamer_write_uhwi (ob, bitmap_count_bits (node->clone.args_to_skip));
-      EXECUTE_IF_SET_IN_BITMAP (node->clone.args_to_skip, 0, index, bi)
-	streamer_write_uhwi (ob, index);
-    }
-  else
-    streamer_write_uhwi (ob, 0);
-  if (node->clone.combined_args_to_skip)
-    {
-      streamer_write_uhwi (ob, bitmap_count_bits (node->clone.combined_args_to_skip));
-      EXECUTE_IF_SET_IN_BITMAP (node->clone.combined_args_to_skip, 0, index, bi)
-	streamer_write_uhwi (ob, index);
-    }
-  else
-    streamer_write_uhwi (ob, 0);
+  output_param_bitmap (ob, node->clone.args_to_skip);
+  output_param_bitmap (ob, node->clone.combined_args_to_skip);
+  output_param_bitmap (ob, node->global.noescape_parameters);
+  output_param_bitmap (ob, node->global.noclobber_parameters);
   streamer_write_uhwi (ob, vec_safe_length (node->clone.tree_map));
   FOR_EACH_VEC_SAFE_ELT (node->clone.tree_map, i, map)
     {
@@ -1724,6 +1728,25 @@ input_edge_opt_summary (struct cgraph_ed
 {
 }
 
+/* Input and return a bitmap that was output by output_param_bitmap. */
+
+static bitmap
+input_param_bitmap (struct lto_input_block *ib_main)
+{
+  int count;
+
+  count = streamer_read_uhwi (ib_main);
+  if (!count)
+    return NULL;
+  bitmap res = BITMAP_GGC_ALLOC ();
+  for (int i = 0; i < count; i++)
+    {
+      int bit = streamer_read_uhwi (ib_main);
+      bitmap_set_bit (res, bit);
+    }
+  return res;
+}
+
 /* Input optimisation summary of NODE.  */
 
 static void
@@ -1731,28 +1754,14 @@ input_node_opt_summary (struct cgraph_no
 			struct lto_input_block *ib_main,
 			struct data_in *data_in)
 {
-  int i;
   int count;
-  int bit;
+  int i;
   struct bitpack_d bp;
   struct cgraph_edge *e;
-
-  count = streamer_read_uhwi (ib_main);
-  if (count)
-    node->clone.args_to_skip = BITMAP_GGC_ALLOC ();
-  for (i = 0; i < count; i++)
-    {
-      bit = streamer_read_uhwi (ib_main);
-      bitmap_set_bit (node->clone.args_to_skip, bit);
-    }
-  count = streamer_read_uhwi (ib_main);
-  if (count)
-    node->clone.combined_args_to_skip = BITMAP_GGC_ALLOC ();
-  for (i = 0; i < count; i++)
-    {
-      bit = streamer_read_uhwi (ib_main);
-      bitmap_set_bit (node->clone.combined_args_to_skip, bit);
-    }
+  node->clone.args_to_skip = input_param_bitmap (ib_main);
+  node->clone.combined_args_to_skip = input_param_bitmap (ib_main);
+  node->global.noescape_parameters = input_param_bitmap (ib_main);
+  node->global.noclobber_parameters = input_param_bitmap (ib_main);
   count = streamer_read_uhwi (ib_main);
   for (i = 0; i < count; i++)
     {
Index: src/gcc/tree-inline.c
===================================================================
--- src.orig/gcc/tree-inline.c
+++ src/gcc/tree-inline.c
@@ -5248,6 +5248,55 @@ update_clone_info (copy_body_data * id)
     }
 }
 
+/* Update global.noescape_parameters and global.noclobber_parameters of NODE to
+   reflect parameters about to be skipped as indicated in ARGS_TO_SKIP.
+   ORIG_PARM is the chain of parameters of the original node.  */
+
+void
+update_noescape_noclobber_bitmaps (cgraph_node *node, tree orig_parm,
+				   bitmap args_to_skip)
+{
+  if (!args_to_skip || bitmap_empty_p (args_to_skip)
+      || !orig_parm
+      || (!node->global.noescape_parameters
+	  && !node->global.noclobber_parameters))
+    return;
+
+  int count = 0;
+  while (orig_parm)
+    {
+      count++;
+      orig_parm = DECL_CHAIN (orig_parm);
+    }
+
+  bitmap new_noescape = NULL;
+  bitmap new_noclobber = NULL;
+
+  int ni = 0;
+  for (int i = 0; i < count; i++)
+    if (!bitmap_bit_p (args_to_skip, i))
+      {
+	if (node->global.noescape_parameters
+	    && bitmap_bit_p (node->global.noescape_parameters, i))
+	  {
+	    if (!new_noescape)
+	      new_noescape = BITMAP_GGC_ALLOC ();
+	    bitmap_set_bit (new_noescape, ni);
+	  }
+	if (node->global.noclobber_parameters
+	    && bitmap_bit_p (node->global.noclobber_parameters, i))
+	  {
+	    if (!new_noclobber)
+	      new_noclobber = BITMAP_GGC_ALLOC ();
+	    bitmap_set_bit (new_noclobber, ni);
+	  }
+	ni++;
+      }
+  node->global.noescape_parameters = new_noescape;
+  node->global.noclobber_parameters = new_noclobber;
+}
+
+
 /* Create a copy of a function's tree.
    OLD_DECL and NEW_DECL are FUNCTION_DECL tree nodes
    of the original function and the new copied function
@@ -5405,6 +5454,8 @@ tree_function_versioning (tree old_decl,
 	      }
 	  }
       }
+  update_noescape_noclobber_bitmaps (new_version_node,
+				     DECL_ARGUMENTS (old_decl), args_to_skip);
   /* Copy the function's arguments.  */
   if (DECL_ARGUMENTS (old_decl) != NULL_TREE)
     DECL_ARGUMENTS (new_decl) =

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 7/7] Plug ipa-prop escape analysis into gimple_call_arg_flags
  2014-05-21 13:31 ` [PATCH 7/7] Plug ipa-prop escape analysis into gimple_call_arg_flags Martin Jambor
@ 2014-05-21 14:27   ` Richard Biener
  2014-05-22 12:49     ` Martin Jambor
  0 siblings, 1 reply; 29+ messages in thread
From: Richard Biener @ 2014-05-21 14:27 UTC (permalink / raw)
  To: Martin Jambor; +Cc: GCC Patches, Jan Hubicka

On Wed, May 21, 2014 at 3:16 PM, Martin Jambor <mjambor@suse.cz> wrote:
> Hi,
>
> this demonstrates how results of ipa-prop escape analysis from
> previous patches can be used at a later stage of compilation by
> directly returning them from gimple_call_arg_flags which currently
> relies on fnspec annotations.
>
> Bootstrapped and tested on x86_64-linux and also passes LTO bootstrap.
> I have only had a brief look at behavior of this in SPEC 2006 and for
> example in astar 1.19% of invocations of gimple_call_arg_flags return
> noescape where we previously never did and in calculix this increases
> from 15.62% (from annotations) to 18.14%.  Noclobber flag is reported
> far less often still but for example in gamess that number raises from
> 5.21% to 7.66%.
>
> Thanks,
>
> Martin
>
>
> 2014-04-30  Martin Jambor  <mjambor@suse.cz>
>
>         * gimple.c: Include cgraph.h.
>         (gimple_call_arg_flags): Also query bitmaps in cgraph_node.
>
> Index: src/gcc/gimple.c
> ===================================================================
> --- src.orig/gcc/gimple.c
> +++ src/gcc/gimple.c
> @@ -47,7 +47,7 @@ along with GCC; see the file COPYING3.
>  #include "demangle.h"
>  #include "langhooks.h"
>  #include "bitmap.h"
> -
> +#include "cgraph.h"
>
>  /* All the tuples have their operand vector (if present) at the very bottom
>     of the structure.  Therefore, the offset required to find the
> @@ -1349,32 +1349,50 @@ int
>  gimple_call_arg_flags (const_gimple stmt, unsigned arg)
>  {
>    tree attr = gimple_call_fnspec (stmt);
> +  int ret;
>
> -  if (!attr || 1 + arg >= (unsigned) TREE_STRING_LENGTH (attr))
> -    return 0;
> -
> -  switch (TREE_STRING_POINTER (attr)[1 + arg])
> +  if (attr && 1 + arg < (unsigned) TREE_STRING_LENGTH (attr))
>      {
> -    case 'x':
> -    case 'X':
> -      return EAF_UNUSED;
> -
> -    case 'R':
> -      return EAF_DIRECT | EAF_NOCLOBBER | EAF_NOESCAPE;
> -
> -    case 'r':
> -      return EAF_NOCLOBBER | EAF_NOESCAPE;
> -
> -    case 'W':
> -      return EAF_DIRECT | EAF_NOESCAPE;
> -
> -    case 'w':
> -      return EAF_NOESCAPE;
> +      switch (TREE_STRING_POINTER (attr)[1 + arg])
> +       {
> +       case 'x':
> +       case 'X':
> +         ret = EAF_UNUSED;
> +         break;
> +       case 'R':
> +         ret = EAF_DIRECT | EAF_NOCLOBBER | EAF_NOESCAPE;
> +         break;
> +       case 'r':
> +         ret = EAF_NOCLOBBER | EAF_NOESCAPE;
> +         break;
> +       case 'W':
> +         ret = EAF_DIRECT | EAF_NOESCAPE;
> +         break;
> +       case 'w':
> +         ret = EAF_NOESCAPE;
> +         break;
> +       case '.':
> +       default:
> +         ret = 0;
> +       }
> +    }
> +  else
> +    ret = 0;
>
> -    case '.':
> -    default:
> -      return 0;
> +  tree callee_decl = gimple_call_fndecl (stmt);
> +  if (callee_decl)
> +    {
> +      cgraph_node *callee_node = cgraph_get_node (callee_decl);
> +      if (callee_node)
> +       {
> +         if (cgraph_param_noescape_p (callee_node, arg))
> +           ret |= EAF_NOESCAPE;
> +         if (cgraph_param_noclobber_p (callee_node, arg))
> +           ret |= EAF_NOCLOBBER;

That's quite expensive.  I guess we need a better way to store
those?

> +       }
>      }
> +
> +  return ret;
>  }
>
>  /* Detects return flags for the call STMT.  */
>

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/7] IPA-CP escape and clobber analysis
  2014-05-21 13:31 ` [PATCH 3/7] IPA-CP escape and clobber analysis Martin Jambor
@ 2014-05-21 14:51   ` Richard Biener
  2014-05-23 14:50     ` Martin Jambor
  0 siblings, 1 reply; 29+ messages in thread
From: Richard Biener @ 2014-05-21 14:51 UTC (permalink / raw)
  To: Martin Jambor; +Cc: GCC Patches, Jan Hubicka

On Wed, May 21, 2014 at 3:16 PM, Martin Jambor <mjambor@suse.cz> wrote:
> Hi,
>
> this patch is rather big but not overly complicated.  Its goal is to
> figure out whether data passed to a function by reference escapes
> (somewhere, not necessarily in that particular function) and is
> potentially clobbered (in that one function or its callees).
>
> The result is stored into call graph node global structure, at least
> for now, because it is supposed to live longer than IPA-CP
> optimization info and be available for PTA later in the pipeline.
> Before that, however, quite a lot of intermediate results are stored
> in a number of places.  First of all, there is a vector describing all
> SSA names and address taken local aggregates which is used to figure
> out relations between them and do the local escape and clobber
> analysis (I am aware that a local aggregate might incorrectly pass as
> non-clobbered, that is fixed by the fifth patch, this one is big
> enough as it is and it does not really matter here).
>
> We then store the local results describing formal parameters and
> so-far-presumed-unescaped aggregates and malloced data that is passed
> as actual arguments to other functions into a new vector ref_descs.  I
> did not store this into the existing descriptors vector because there
> are often more elements.  Also, I had to extend the UNKNOWN,
> KNOWN_TYPE and CONSTANT jump functions with an index into this new
> vector (PASS_THROUGH and ANCESTOR reuse the index into parameters), so
> there is quite a lot of new getter and setter methods.
>
> This information is used by simple queue based interprocedural
> propagation.  Eventually, the information is stored into the call
> graph node, as described above.  After propagation, data in ref_descs
> and in the call graph are the same, only the call graph can live much
> longer.  One set of flags that is not copied to call graph nodes are
> callee_clobbered flags, which only IPA-CP uses it in a subsequent
> patch (and which would require maintenance during inlining).
>
> There are more uses of the flags introduced by subsequent patches.  In
> this one, the only one is that IPA-CP modification phase is able to
> use the results instead of querying AA and is capable of doing more
> replacements of aggregate values when the aggregate is unescaped and
> not clobbered.
>
> The following table summarizes what the pass can discover now.  All
> compilations are with -Ofast -flto.  (I should have counted only
> pointer typed parameters but well, that thought occurred to me too
> late.  All non-pointer ones are automatically considered clobbered.)
> Please note that in Fortran benchmarks, this information is often
> already available through fnspec flags.  But we can discover a few
> more (see the last patch for some more information).
>
>  |                    |        |          |       |           |       |    Callee |       |
>  | Test               | Params | Noescape |     % | Noclobber |     % | noclobber |     % |
>  |                    |        |          |       |           |       |           |       |
>  |--------------------+--------+----------+-------+-----------+-------+-----------+-------+
>  | FF libxul.so       | 462725 |    10422 |  2.25 |      4954 |  1.07 |      8872 |  1.92 |
>  | Tramp 3D           |   6344 |     1019 | 16.06 |       985 | 15.53 |      1005 | 15.84 |
>  |--------------------+--------+----------+-------+-----------+-------+-----------+-------+
>  | perlbench          |   2550 |       87 |  3.41 |        10 |  0.39 |        61 |  2.39 |
>  | bzip               |    194 |       28 | 14.43 |         1 |  0.52 |        13 |  6.70 |
>  | gcc                |  10725 |      179 |  1.67 |        18 |  0.17 |       147 |  1.37 |
>  | mcf                |     57 |        4 |  7.02 |         0 |  0.00 |         4 |  7.02 |
>  | gobmk              |   8873 |      132 |  1.49 |         3 |  0.03 |        85 |  0.96 |
>  | hmmer              |    643 |       71 | 11.04 |         8 |  1.24 |        64 |  9.95 |
>  | sjeng              |    161 |        5 |  3.11 |         0 |  0.00 |         5 |  3.11 |
>  | libquantum         |    187 |       48 | 25.67 |         6 |  3.21 |        14 |  7.49 |
>  | h264ref            |   1092 |       48 |  4.40 |         4 |  0.37 |        47 |  4.30 |
>  | astar              |    217 |       28 | 12.90 |         3 |  1.38 |        15 |  6.91 |
>  | xalancbmk          |  28861 |      737 |  2.55 |       536 |  1.86 |       712 |  2.47 |
>  |--------------------+--------+----------+-------+-----------+-------+-----------+-------+
>  | bwaves             |     74 |       35 | 47.30 |        25 | 33.78 |        35 | 47.30 |
>  | gamess             |  26059 |     3693 | 14.17 |      2796 | 10.73 |      3572 | 13.71 |
>  | milc               |    429 |       22 |  5.13 |        11 |  2.56 |        22 |  5.13 |
>  | zeusmp             |    284 |       31 | 10.92 |         2 |  0.70 |        31 | 10.92 |
>  | gromacs            |   5514 |      230 |  4.17 |        54 |  0.98 |       202 |  3.66 |
>  | cactusADM          |   2354 |       49 |  2.08 |        13 |  0.55 |        44 |  1.87 |
>  | leslie3d           |     18 |        0 |  0.00 |         0 |  0.00 |         0 |  0.00 |
>  | namd               |    163 |        0 |  0.00 |         0 |  0.00 |         0 |  0.00 |
>  | soplex             |   2341 |       80 |  3.42 |        10 |  0.43 |        55 |  2.35 |
>  | povray             |   4046 |      244 |  6.03 |        51 |  1.26 |       201 |  4.97 |
>  | calculix           |   6260 |     1109 | 17.72 |       672 | 10.73 |       933 | 14.90 |
>  | GemsFDTD           |    289 |       41 | 14.19 |        27 |  9.34 |        32 | 11.07 |
>  | tonto              |   7255 |     1361 | 18.76 |      1178 | 16.24 |      1329 | 18.32 |
>  | lbm                |     27 |        4 | 14.81 |         3 | 11.11 |         4 | 14.81 |
>  | wrf                |  14212 |     4375 | 30.78 |      3358 | 23.63 |      4120 | 28.99 |
>  | sphinx3            |    770 |       16 |  2.08 |         1 |  0.13 |        15 |  1.95 |
>  |--------------------+--------+----------+-------+-----------+-------+-----------+-------+
>  | ac.f90             |     21 |       14 | 66.67 |         7 | 33.33 |        14 | 66.67 |
>  | aermod.f90         |    600 |      134 | 22.33 |        59 |  9.83 |       124 | 20.67 |
>  | air.f90            |     85 |       41 | 48.24 |        14 | 16.47 |        41 | 48.24 |
>  | capacita.f90       |     42 |       18 | 42.86 |        16 | 38.10 |        18 | 42.86 |
>  | channel2.f90       |     12 |        4 | 33.33 |         4 | 33.33 |         4 | 33.33 |
>  | doduc.f90          |    132 |       68 | 51.52 |        39 | 29.55 |        68 | 51.52 |
>  | fatigue2.f90       |     65 |       43 | 66.15 |        20 | 30.77 |        43 | 66.15 |
>  | gas_dyn2.f90       |     97 |       22 | 22.68 |         6 |  6.19 |        21 | 21.65 |
>  | induct2.f90        |    121 |       41 | 33.88 |        24 | 19.83 |        41 | 33.88 |
>  | linpk.f90          |     42 |       10 | 23.81 |         7 | 16.67 |        10 | 23.81 |
>  | mdbx.f90           |     51 |       26 | 50.98 |         9 | 17.65 |        26 | 50.98 |
>  | mp_prop_design.f90 |      2 |        0 |  0.00 |         0 |  0.00 |         0 |  0.00 |
>  | nf.f90             |     41 |        8 | 19.51 |         8 | 19.51 |         8 | 19.51 |
>  | protein.f90        |    116 |       40 | 34.48 |        25 | 21.55 |        35 | 30.17 |
>  | rnflow.f90         |    212 |       54 | 25.47 |        37 | 17.45 |        51 | 24.06 |
>  | test_fpu2.f90      |    160 |       22 | 13.75 |        14 |  8.75 |        18 | 11.25 |
>  | tfft2.f90          |      7 |        3 | 42.86 |         0 |  0.00 |         3 | 42.86 |
>
> I hope to improve the results for example by propagating malloc
> attribute to callers.
>
> I have bootstrapped and tested this on x86_64, additionally I also
> checked it passes an LTO-bootstrap and LTO-built Firefox.  I assume
> there will be many comments but after I address them, I'd like to
> commit this to trunk.
>
> Thanks,
>
> Martin
>
>
> 2014-04-30  Martin Jambor  <mjambor@suse.cz>
>
>         * cgraph.h (cgraph_global_info): New fields noescape_parameters
>         and noclobber_parameters.
>         (cgraph_param_noescape_p): Declare.
>         (cgraph_set_param_noescape): Likewise.
>         (cgraph_param_noclobber_p): Likewise.
>         (cgraph_set_param_noclobber): Likewise.
>         * ipa-prop.h (ipa_unknown_data): New type.
>         (ipa_known_type_data): New fields escape_ref_valid and
>         escape_ref_index.
>         (ipa_constant_data): Likewise.
>         (jump_func_value): New field unknown.
>         (ipa_get_jf_unknown_esc_ref_valid): New function.
>         (ipa_get_jf_unknown_esc_ref_index): Likewise.
>         (ipa_get_jf_known_type_esc_ref_valid): Likewise.
>         (ipa_get_jf_known_type_esc_ref_index): Likewise.
>         (ipa_get_jf_constant_esc_ref_valid): Likewise.
>         (ipa_get_jf_constant_esc_ref_index): Likewise.
>         (ipa_ref_descriptor): New type.
>         (ipa_node_params): New fields ref_descs and node_up_enqueued.
>         (ipa_is_ref_escaped): New function.
>         (ipa_is_ref_clobbered): Likewise.
>         (ipa_is_ref_callee_clobbered): Likewise.
>         (ipa_is_param_ref_safely_constant): Likewise.
>         (ipa_spread_escapes): Declare.
>         * ipa-prop.c: Include stringpool.h, tree-ssaname.h and pointer-set.h.
>         (ipa_escape): New type.
>         (valid_escape_result_index): New function.
>         (func_body_info): New fields func, escapes and decl_escapes.
>         (ipa_print_node_jump_functions_for_edge): Dump new fields.
>         (ipa_set_jf_unknown): New function.  Use it instead of directly
>         setting a jump functions type elsewhere.
>         (ipa_set_jf_unknown_copy): New function.
>         (ipa_set_jf_unknown_ref_index): Likewise.
>         (ipa_set_jf_known_type_copy): Likewise.
>         (ipa_set_jf_known_type): Initialize new fields.
>         (ipa_set_jf_known_type_ref_index): New function.
>         (ipa_set_jf_constant): Initialize new fields.
>         (ipa_set_jf_constant_ref_index): New function.
>         (ipa_get_tracked_refs_count): Likewise.
>         (ipa_set_ref_clobbered): Likewise.
>         (ipa_get_tracked_refs_count): Likewise.
>         (ipa_set_ref_escaped): Likewise.
>         (ipa_set_ref_clobbered): Likewise.
>         (ipa_set_ref_callee_clobbered): Likewise.
>         (ipa_load_from_parm_agg_1): Use const_ref parameter flag.
>         (get_escape_for_ref): New function.
>         (get_escape_for_value): Likewise.
>         (ipa_compute_jump_functions_for_edge): Add reference info to jump
>         functions.  Wrapped comments to 80 columns, added a checking assert
>         all jump functions start with no information.
>         (visit_ref_for_mod_analysis): Renamed to visit_ref_mark_it_used.
>         Simplified comment.
>         (ipa_analyze_params_uses_in_bb): Renamed to ipa_analyze_bb_statements.
>         Simplified comment.
>         (analyze_phi_escapes): New function.
>         (analyze_ssa_escape): Likewise.
>         (analyze_all_ssa_escapes): Likewise.
>         (create_escape_structures): Likewise.
>         (free_escape_structures): Likewise.
>         (pick_escapes_from_call): Likewise.
>         (gather_picked_escapes): Likewise.
>         (ipa_analyze_node): Initialize and deinitialize new fbi fields and
>         escape structures, call create_escape_structures,
>         analyze_all_ssa_escapes and pick_escapes_from_call, assign ref indices
>         to formal parameters.
>         (escape_spreading_data): New type.
>         (enque_to_propagate_escapes_up): New function.
>         (enque_to_propagate_escapes_down): Likewise.
>         (escape_origin_from_jfunc): Likewise.
>         (spread_escapes_up_from_one_alias): Likewise.
>         (spread_escapes_up): Likewise.
>         (spread_escapes_down): Likewise.
>         (ipa_spread_escapes): Likewise.
>         (make_unknown_jf_from_known_type_jf): Likewise.
>         (combine_known_type_and_ancestor_jfs): Also update ref index fields.
>         Switch arguments for consistency, changed the one caller.
>         (update_jump_functions_after_inlining): Also update ref index fields,
>         make use of unescaped info.
>         (update_indirect_edges_after_inlining): Make use of unescaped info.
>         (ipa_free_node_params_substructures): Free also ref_desc vector.
>         (ipa_node_duplication_hook): Also copy reference descriptor vector and
>         const_refs.
>         (ipa_print_node_params): Also print reference flags.
>         (ipa_write_jump_function): Stream new fields.
>         (ipa_read_jump_function): Likewise.
>         (ipa_write_node_info): Stream reference description.
>         (ipa_read_node_info): Likewise, also clear new flag node_up_enqueued.
>         (read_agg_replacement_chain): Whitespace fix.
>         (adjust_agg_replacement_values): Also assign const_refs in descriptors
>         from those in tranformation data.
>         (ipcp_transform_function): Initialize new fields of fbi.
>         * ipa-cp.c (agg_pass_through_permissible_p): Make use of the new
>         escape information.  Accept caller_infom as a parameter, updated all
>         callers.
>         (propagate_aggs_accross_jump_function): Make use of the new escape
>         information.
>         (intersect_aggregates_with_edge): Bail out early if a pass_through
>         jump function does not allow passing aggregates.  Make use of the new
>         escape information.  Allow NULL values in aggregate jump functions.
>         (ipcp_driver): Call spread_escapes.
>         * ipa-inline.c (ipa_inline): Call spread_escapes if necessary.
>         * cgraph.c (cgraph_param_noescape_p): New function.
>         (cgraph_set_param_noescape): Likewise.
>         (cgraph_param_noclobber_p): Likewise.
>         (cgraph_set_param_noclobber): Likewise.
>         * cgraphclones.c (duplicate_thunk_for_node): Assert that noclone and
>         noescape bitmaps are NULL.
>         (copy_noescape_noclobber_bitmaps): New function.
>         (cgraph_clone_node): Copy noescpae and noclobber bitmaps.
>         (cgraph_copy_node_for_versioning): Likewise.
>         * lto-cgraph.c (output_param_bitmap): Likewise.
>         (output_node_opt_summary): Use it to stream args_to_skip,
>         combined_args_to_skip, noescape_parameters and noclobber_parameters
>         bitmaps.
>         (input_param_bitmap): New function.
>         (input_node_opt_summary): Use it to stream args_to_skip,
>         combined_args_to_skip, noescape_parameters and noclobber_parameters
>         bitmaps.
>         * tree-inline.c (update_noescape_noclobber_bitmaps): New function.
>         (tree_function_versioning): Call it.
>
> testsuite/
>         * gcc.dg/ipa/ipcp-agg-10.c: New test.
>
> Index: src/gcc/ipa-prop.c
> ===================================================================
> --- src.orig/gcc/ipa-prop.c
> +++ src/gcc/ipa-prop.c
> @@ -43,6 +43,8 @@ along with GCC; see the file COPYING3.
>  #include "gimple-ssa.h"
>  #include "tree-cfg.h"
>  #include "tree-phinodes.h"
> +#include "stringpool.h"
> +#include "tree-ssanames.h"
>  #include "ssa-iterators.h"
>  #include "tree-into-ssa.h"
>  #include "tree-dfa.h"
> @@ -60,6 +62,7 @@ along with GCC; see the file COPYING3.
>  #include "stringpool.h"
>  #include "tree-ssanames.h"
>  #include "domwalk.h"
> +#include "pointer-set.h"
>
>  /* Intermediate information that we get from alias analysis about a particular
>     parameter in a particular basic_block.  When a parameter or the memory it
> @@ -91,11 +94,64 @@ struct ipa_bb_info
>    vec<param_aa_status> param_aa_statuses;
>  };
>
> +/* Structure used for intra-procedural escape analysis (and associated
> +   memory-write detection).  When analyzing function body, we have one for each
> +   SSA name and for all address-taken local declarations.  */

And for all functions at the same time?  It asks to space optimize
this ...

> +struct ipa_escape
> +{
> +  /* If target is non-NULL, this is the offset relative to the reference
> +     described by target.  */
> +  HOST_WIDE_INT offset;
> +
> +  /* If this describes (a part of) data described by other ipa_escape
> +     structure, target is non-NULL.  In that case, that structure should be
> +     used instead of this one and unless explicitely noted, other fields are
> +     meaningless.  */
> +  struct ipa_escape *target;
> +
> +  /* The last seen edge that had a reference to this data among its parameters.
> +     Used to make sure we do not pass the same data in two different
> +     arguments.  */
> +  struct cgraph_edge *last_seen_cs;
> +
> +  /* Index of the bool slot where the analyzed flag is going to end up plus
> +     one.  Zero means this structure will remain unused.  */
> +  int result_index;
> +
> +  /* True if we have already dealt with this SSA name.  Valid even if target is
> +     non-NULL.  */
> +  bool analyzed;
> +
> +  /* Could the address of the data have escaped?  */
> +  bool escaped;
> +
> +  /* Flag set when an SSA name has been used as a base for a memory write.
> +     Only valid when the SSA name is not considered escaped, otherwise it might
> +     be incorrectly clear.  */
> +  bool write_base;
> +};
> +
> +/* If ESC has a valid (i.e. non-zero) result_index, return true and store the
> +   directly usable (i.e. decremented) index to *INDEX.  */
> +
> +static inline bool
> +valid_escape_result_index (struct ipa_escape *esc, int *index)
> +{
> +  if (esc->result_index == 0)
> +    return false;
> +  *index = esc->result_index - 1;
> +  return true;
> +}
> +
>  /* Structure with global information that is only used when looking at function
>     body. */
>
>  struct func_body_info
>  {
> +  /* Struct function of the function that is being analyzed.  */
> +  struct function *func;
> +
>    /* The node that is being analyzed.  */
>    cgraph_node *node;

DECL_STRUCT_FUNCTION (node->decl) == func?

> @@ -105,6 +161,13 @@ struct func_body_info
>    /* Information about individual BBs. */
>    vec<ipa_bb_info> bb_infos;
>
> +  /* Escape analysis information for SSA flags and local addressable
> +     declarations.  */
> +  vec<ipa_escape> escapes;
> +
> +  /* Mapping from VAR_DECLS to escape information.  */
> +  pointer_map <ipa_escape *> *decl_escapes;
> +

You can map from DECL_UID to an index in escapes which would
be more space efficient?

>    /* Number of parameters.  */
>    int param_count;
>
> @@ -282,7 +345,14 @@ ipa_print_node_jump_functions_for_edge (
>
>        fprintf (f, "       param %d: ", i);
>        if (type == IPA_JF_UNKNOWN)
> -       fprintf (f, "UNKNOWN\n");
> +       {
> +         fprintf (f, "UNKNOWN");
> +         if (ipa_get_jf_unknown_esc_ref_valid (jump_func))
> +           fprintf (f, ", escape ref: %i\n",
> +                    ipa_get_jf_unknown_esc_ref_index (jump_func));
> +         else
> +           fprintf (f, "\n");
> +       }
>        else if (type == IPA_JF_KNOWN_TYPE)
>         {
>           fprintf (f, "KNOWN TYPE: base  ");
> @@ -290,6 +360,9 @@ ipa_print_node_jump_functions_for_edge (
>           fprintf (f, ", offset "HOST_WIDE_INT_PRINT_DEC", component ",
>                    jump_func->value.known_type.offset);
>           print_generic_expr (f, jump_func->value.known_type.component_type, 0);
> +         if (ipa_get_jf_known_type_esc_ref_valid (jump_func))
> +           fprintf (f, ", escape ref: %i",
> +                    ipa_get_jf_known_type_esc_ref_index (jump_func));
>           fprintf (f, "\n");
>         }
>        else if (type == IPA_JF_CONST)
> @@ -304,6 +377,9 @@ ipa_print_node_jump_functions_for_edge (
>               print_generic_expr (f, DECL_INITIAL (TREE_OPERAND (val, 0)),
>                                   0);
>             }
> +         if (ipa_get_jf_constant_esc_ref_valid (jump_func))
> +           fprintf (f, ", escape ref: %i",
> +                    ipa_get_jf_constant_esc_ref_index (jump_func));
>           fprintf (f, "\n");
>         }
>        else if (type == IPA_JF_PASS_THROUGH)
> @@ -430,6 +506,39 @@ ipa_print_all_jump_functions (FILE *f)
>      }
>  }
>
> +/* Set jfunc to be a jump function with invalid reference index.  */
> +
> +static void
> +ipa_set_jf_unknown (struct ipa_jump_func *jfunc)
> +{
> +  jfunc->type = IPA_JF_UNKNOWN;
> +  jfunc->value.unknown.escape_ref_valid = false;
> +}
> +
> +/* Set JFUNC to be a copy of another unknown jump function SRC. */
> +
> +static void
> +ipa_set_jf_unknown_copy (struct ipa_jump_func *dst,
> +                        struct ipa_jump_func *src)
> +
> +{
> +  gcc_checking_assert (src->type == IPA_JF_UNKNOWN);
> +  dst->type = IPA_JF_UNKNOWN;
> +  dst->value.unknown = src->value.unknown;
> +}
> +
> +/* Set reference description of unknown JFUNC to be valid and referring to
> +   INDEX.  */
> +
> +static void
> +ipa_set_jf_unknown_ref_index (struct ipa_jump_func *jfunc, int index)
> +{
> +  gcc_checking_assert (jfunc->type == IPA_JF_UNKNOWN);
> +  gcc_checking_assert (index >= 0);
> +  jfunc->value.unknown.escape_ref_valid = true;
> +  jfunc->value.unknown.escape_ref_index = index;
> +}
> +
>  /* Set JFUNC to be a known type jump function.  */
>
>  static void
> @@ -445,11 +554,37 @@ ipa_set_jf_known_type (struct ipa_jump_f
>    jfunc->value.known_type.offset = offset,
>    jfunc->value.known_type.base_type = base_type;
>    jfunc->value.known_type.component_type = component_type;
> +  jfunc->value.known_type.escape_ref_valid = false;
> +  jfunc->value.known_type.escape_ref_index = 0;
>    gcc_assert (component_type);
>  }
>
> -/* Set JFUNC to be a copy of another jmp (to be used by jump function
> -   combination code).  The two functions will share their rdesc.  */
> +/* Set reference description of known_type JFUNC to be valid and referring to
> +   INDEX.  */
> +
> +static void
> +ipa_set_jf_known_type_ref_index (struct ipa_jump_func *jfunc, int index)
> +{
> +  gcc_checking_assert (jfunc->type == IPA_JF_KNOWN_TYPE);
> +  gcc_checking_assert (index >= 0);
> +  jfunc->value.known_type.escape_ref_valid = true;
> +  jfunc->value.known_type.escape_ref_index = index;
> +}
> +
> +/* Set DST to be a copy of another known type jump function SRC.  */
> +
> +static void
> +ipa_set_jf_known_type_copy (struct ipa_jump_func *dst,
> +                           struct ipa_jump_func *src)
> +
> +{
> +  gcc_checking_assert (src->type == IPA_JF_KNOWN_TYPE);
> +  dst->type = IPA_JF_KNOWN_TYPE;
> +  dst->value.known_type = src->value.known_type;
> +}
> +
> +/* Set DST to be a copy of another constant jump function SRC.  The two
> +   functions will share their rdesc.  */
>
>  static void
>  ipa_set_jf_cst_copy (struct ipa_jump_func *dst,
> @@ -472,6 +607,8 @@ ipa_set_jf_constant (struct ipa_jump_fun
>      SET_EXPR_LOCATION (constant, UNKNOWN_LOCATION);
>    jfunc->type = IPA_JF_CONST;
>    jfunc->value.constant.value = unshare_expr_without_location (constant);
> +  jfunc->value.constant.escape_ref_valid = false;
> +  jfunc->value.constant.escape_ref_index = 0;
>
>    if (TREE_CODE (constant) == ADDR_EXPR
>        && TREE_CODE (TREE_OPERAND (constant, 0)) == FUNCTION_DECL)
> @@ -491,6 +628,19 @@ ipa_set_jf_constant (struct ipa_jump_fun
>      jfunc->value.constant.rdesc = NULL;
>  }
>
> +/* Set reference description of constant JFUNC to be valid and referring to
> +   INDEX.  */
> +
> +static void
> +ipa_set_jf_constant_ref_index (struct ipa_jump_func *jfunc, int index)
> +{
> +  gcc_checking_assert (jfunc->type == IPA_JF_CONST);
> +  gcc_checking_assert (index >= 0);
> +  jfunc->value.constant.escape_ref_valid = true;
> +  jfunc->value.constant.escape_ref_index = index;
> +}
> +
> +
>  /* Set JFUNC to be a simple pass-through jump function.  */
>  static void
>  ipa_set_jf_simple_pass_through (struct ipa_jump_func *jfunc, int formal_id,
> @@ -539,6 +689,41 @@ ipa_set_ancestor_jf (struct ipa_jump_fun
>    jfunc->value.ancestor.type_preserved = type_preserved;
>  }
>
> +/* Return the number of references tracked for escape analysis in INFO.  */
> +
> +static inline int
> +ipa_get_tracked_refs_count (struct ipa_node_params *info)
> +{
> +  return info->ref_descs.length ();
> +}
> +
> +/* Set escape flag of reference number I of a function corresponding to NODE to
> +   VAL.  */
> +
> +static inline void
> +ipa_set_ref_escaped (struct ipa_node_params *info, int i, bool val)
> +{
> +  info->ref_descs[i].escaped = val;
> +}
> +
> +/* Set the clobbered flag corresponding to the Ith tracked reference of the
> +   function associated with INFO to VAL.  */
> +
> +static inline void
> +ipa_set_ref_clobbered (struct ipa_node_params *info, int i, bool val)
> +{
> +  info->ref_descs[i].clobbered = val;
> +}
> +
> +/* Set the callee_clobbered flag corresponding to the Ith tracked reference of
> +   the function associated with INFO to VAL.  */
> +
> +static inline void
> +ipa_set_ref_callee_clobbered (struct ipa_node_params *info, int i, bool val)
> +{
> +  info->ref_descs[i].callee_clobbered = val;
> +}
> +
>  /* Extract the acual BINFO being described by JFUNC which must be a known type
>     jump function.  */
>
> @@ -784,7 +969,7 @@ detect_type_change (tree arg, tree base,
>    if (!tci.known_current_type
>        || tci.multiple_types_encountered
>        || offset != 0)
> -    jfunc->type = IPA_JF_UNKNOWN;
> +    ipa_set_jf_unknown (jfunc);
>    else
>      ipa_set_jf_known_type (jfunc, 0, tci.known_current_type, comp_type);
>
> @@ -1090,7 +1275,8 @@ ipa_load_from_parm_agg_1 (struct func_bo
>      }
>
>    if (index >= 0
> -      && parm_ref_data_preserved_p (fbi, index, stmt, op))
> +      && ((fbi && cgraph_param_noclobber_p (fbi->node, index))
> +         || parm_ref_data_preserved_p (fbi, index, stmt, op)))
>      {
>        *index_p = index;
>        *by_ref_p = true;
> @@ -1725,6 +1911,86 @@ ipa_get_callee_param_type (struct cgraph
>    return NULL;
>  }
>
> +static void
> +analyze_ssa_escape (struct func_body_info *fbi, tree ssa,
> +                   struct ipa_escape *esc);
> +
> +/* Return the ipa_escape structure suitable for REFERENCE, if it is a
> +   declaration or a MEM_REF.  Return NULL if there is no structure describing
> +   REFERENCE.  If a non-NULL result is returned, put the offset of the
> +   REFERENCE relative to the start of data described by the result into
> +   *OFFSET, and size and max_size as returned by get_ref_base_and_extent to
> +   *SIZE and *MAX_SIZE respectively.  */
> +
> +static struct ipa_escape *
> +get_escape_for_ref (struct func_body_info *fbi, tree reference,
> +                   HOST_WIDE_INT *offset, HOST_WIDE_INT *size,
> +                   HOST_WIDE_INT *max_size)
> +{
> +  struct ipa_escape *res;
> +  tree base = get_ref_base_and_extent (reference, offset, size, max_size);
> +
> +  if (DECL_P (base))
> +    {
> +      ipa_escape **d_esc = fbi->decl_escapes->contains (base);
> +      if (!d_esc)
> +       return NULL;
> +      res = *d_esc;
> +    }
> +  else if (TREE_CODE (base) == MEM_REF
> +          && TREE_CODE (TREE_OPERAND (base, 0)) == SSA_NAME)
> +    {
> +      tree ssa = TREE_OPERAND (base, 0);
> +      res = &fbi->escapes[SSA_NAME_VERSION (ssa)];
> +      if (!res->analyzed)
> +       analyze_ssa_escape (fbi, ssa, res);
> +    }
> +  else
> +    return NULL;
> +
> +  if (res->target)
> +    {
> +      *offset += res->offset;
> +      res = res->target;
> +    }
> +  return res;
> +}
> +
> +/* Return the ipa_escape structure suitable for T, if it is an ssa_name or an
> +   ADDR_EXPR.  Return NULL if there is not structure for T.  If a non-NULL
> +   result is returned, put the offset of the value T relative to the start of
> +   data described by the result into *OFFSET.  */
> +
> +static struct ipa_escape *
> +get_escape_for_value (struct func_body_info *fbi, tree t,
> +                     HOST_WIDE_INT *offset)
> +{
> +  if (TREE_CODE (t) == SSA_NAME)
> +    {
> +      struct ipa_escape *res;
> +      *offset = 0;
> +      res = &fbi->escapes[SSA_NAME_VERSION (t)];
> +      if (!res->analyzed)
> +       analyze_ssa_escape (fbi, t, res);
> +
> +      if (res->target)
> +       {
> +         *offset += res->offset;
> +         res = res->target;
> +       }
> +
> +      return res;
> +    }
> +  else if (TREE_CODE (t) == ADDR_EXPR)
> +    {
> +      HOST_WIDE_INT dummy_size, dummy_max_size;
> +      return get_escape_for_ref (fbi, TREE_OPERAND (t, 0), offset, &dummy_size,
> +                                &dummy_max_size);
> +    }
> +  else
> +    return NULL;
> +}
> +
>  /* Compute jump function for all arguments of callsite CS and insert the
>     information in the jump_functions array in the ipa_edge_args corresponding
>     to this callsite.  */
> @@ -1753,6 +2019,8 @@ ipa_compute_jump_functions_for_edge (str
>        tree arg = gimple_call_arg (call, n);
>        tree param_type = ipa_get_callee_param_type (cs, n);
>
> +      gcc_checking_assert (jfunc->type == IPA_JF_UNKNOWN
> +                          && !ipa_get_jf_unknown_esc_ref_valid (jfunc));
>        if (is_gimple_ip_invariant (arg))
>         ipa_set_jf_constant (jfunc, arg, cs);
>        else if (!is_gimple_reg_type (TREE_TYPE (arg))
> @@ -1807,19 +2075,42 @@ ipa_compute_jump_functions_for_edge (str
>                                       ? TREE_TYPE (param_type)
>                                       : NULL);
>
> -      /* If ARG is pointer, we can not use its type to determine the type of aggregate
> -        passed (because type conversions are ignored in gimple).  Usually we can
> -        safely get type from function declaration, but in case of K&R prototypes or
> -        variadic functions we can try our luck with type of the pointer passed.
> -        TODO: Since we look for actual initialization of the memory object, we may better
> -        work out the type based on the memory stores we find.  */
> +      /* If ARG is pointer, we can not use its type to determine the type of
> +        aggregate passed (because type conversions are ignored in gimple).
> +        Usually we can safely get type from function declaration, but in case
> +        of K&R prototypes or variadic functions we can try our luck with type
> +        of the pointer passed.
> +        TODO: Since we look for actual initialization of the memory object, we
> +        may better work out the type based on the memory stores we find.  */
>        if (!param_type)
>         param_type = TREE_TYPE (arg);
>
> -      if ((jfunc->type != IPA_JF_PASS_THROUGH
> -             || !ipa_get_jf_pass_through_agg_preserved (jfunc))
> -         && (jfunc->type != IPA_JF_ANCESTOR
> -             || !ipa_get_jf_ancestor_agg_preserved (jfunc))
> +      HOST_WIDE_INT dummy_offset;
> +      struct ipa_escape *esc = get_escape_for_value (fbi, arg, &dummy_offset);
> +      int ref_index;
> +      if (esc && valid_escape_result_index (esc, &ref_index))
> +       {
> +         if (jfunc->type == IPA_JF_UNKNOWN)
> +           ipa_set_jf_unknown_ref_index (jfunc, ref_index);
> +         else if (jfunc->type == IPA_JF_KNOWN_TYPE)
> +           ipa_set_jf_known_type_ref_index (jfunc, ref_index);
> +         else if (jfunc->type == IPA_JF_CONST)
> +           ipa_set_jf_constant_ref_index (jfunc, ref_index);
> +         else
> +           {
> +             gcc_checking_assert
> +               (jfunc->type != IPA_JF_PASS_THROUGH
> +                || ipa_get_jf_pass_through_formal_id (jfunc) == ref_index);
> +             gcc_checking_assert
> +               (jfunc->type != IPA_JF_ANCESTOR
> +                || ipa_get_jf_ancestor_formal_id (jfunc) == ref_index);
> +           }
> +       }
> +
> +      /* TODO: We should allow aggregate jump functions even for these types of
> +        jump functions but we need to be able to combine them first.  */
> +      if (jfunc->type != IPA_JF_PASS_THROUGH
> +         && jfunc->type != IPA_JF_ANCESTOR
>           && (AGGREGATE_TYPE_P (TREE_TYPE (arg))
>               || POINTER_TYPE_P (param_type)))
>         determine_known_aggregate_parts (call, arg, param_type, jfunc);
> @@ -2223,12 +2514,11 @@ ipa_analyze_stmt_uses (struct func_body_
>      ipa_analyze_call_uses (fbi, stmt);
>  }
>
> -/* Callback of walk_stmt_load_store_addr_ops for the visit_load.
> -   If OP is a parameter declaration, mark it as used in the info structure
> -   passed in DATA.  */
> +/* Callback of walk_stmt_load_store_addr_ops.  If OP is a parameter
> +   declaration, mark it as used in the info structure passed in DATA.  */
>
>  static bool
> -visit_ref_for_mod_analysis (gimple, tree op, tree, void *data)
> +visit_ref_mark_it_used (gimple, tree op, tree, void *data)
>  {
>    struct ipa_node_params *info = (struct ipa_node_params *) data;
>
> @@ -2244,13 +2534,12 @@ visit_ref_for_mod_analysis (gimple, tree
>    return false;
>  }
>
> -/* Scan the statements in BB and inspect the uses of formal parameters.  Store
> -   the findings in various structures of the associated ipa_node_params
> -   structure, such as parameter flags, notes etc.  FBI holds various data about
> -   the function being analyzed.  */
> +/* Scan the statements in BB and inspect the uses of formal parameters, escape
> +   analysis and so on.  FBI holds various data about the function being
> +   analyzed.  */
>
>  static void
> -ipa_analyze_params_uses_in_bb (struct func_body_info *fbi, basic_block bb)
> +ipa_analyze_bb_statements (struct func_body_info *fbi, basic_block bb)
>  {
>    gimple_stmt_iterator gsi;
>    for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
> @@ -2262,15 +2551,15 @@ ipa_analyze_params_uses_in_bb (struct fu
>
>        ipa_analyze_stmt_uses (fbi, stmt);
>        walk_stmt_load_store_addr_ops (stmt, fbi->info,
> -                                    visit_ref_for_mod_analysis,
> -                                    visit_ref_for_mod_analysis,
> -                                    visit_ref_for_mod_analysis);
> +                                    visit_ref_mark_it_used,
> +                                    visit_ref_mark_it_used,
> +                                    visit_ref_mark_it_used);
>      }
>    for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
>      walk_stmt_load_store_addr_ops (gsi_stmt (gsi), fbi->info,
> -                                  visit_ref_for_mod_analysis,
> -                                  visit_ref_for_mod_analysis,
> -                                  visit_ref_for_mod_analysis);
> +                                  visit_ref_mark_it_used,
> +                                  visit_ref_mark_it_used,
> +                                  visit_ref_mark_it_used);
>  }
>
>  /* Calculate controlled uses of parameters of NODE.  */
> @@ -2344,10 +2633,284 @@ private:
>  void
>  analysis_dom_walker::before_dom_children (basic_block bb)
>  {
> -  ipa_analyze_params_uses_in_bb (m_fbi, bb);
> +  ipa_analyze_bb_statements (m_fbi, bb);
>    ipa_compute_jump_functions_for_bb (m_fbi, bb);
>  }
>
> +/* Look at operands of PHI and if any of them is an address of a declaration,
> +   mark that declaration escaped.  */
> +
> +void
> +analyze_phi_escapes (gimple phi, struct func_body_info *fbi)
> +{
> +  for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
> +    {
> +      tree op = gimple_phi_arg_def (phi, i);
> +      if (TREE_CODE (op) != ADDR_EXPR)
> +       continue;
> +
> +      tree base = get_base_address (TREE_OPERAND (op, 0));
> +      if (!DECL_P (base))
> +       continue;

So this means that 'a' escapes in

  tem_1 = &a[i_2];

?

> +      ipa_escape **d_esc = fbi->decl_escapes->contains (base);
> +      if (!d_esc)
> +       continue;
> +      (*d_esc)->escaped = true;
> +    }
> +}
> +
> +/* Check definition and uses of SSA and update ESC (and potentially escape
> +   structures associated with other SSA names) accordingly.  */
> +
> +static void
> +analyze_ssa_escape (struct func_body_info *fbi, tree ssa,
> +                   struct ipa_escape *esc)
> +{
> +  esc->analyzed = true;
> +  if (!POINTER_TYPE_P (TREE_TYPE (ssa)))
> +    {
> +      esc->escaped = true;
> +      return;
> +    }
> +
> +  /* First we need to check the definition and figure out whether we can work
> +     with it or whether this name actually refers to data described by another
> +     structure.  */
> +  if (!SSA_NAME_IS_DEFAULT_DEF (ssa))
> +    {
> +      gimple def = SSA_NAME_DEF_STMT (ssa);
> +
> +      if (gimple_assign_single_p (def))
> +       {
> +         tree rhs = gimple_assign_rhs1 (def);
> +         HOST_WIDE_INT offset;
> +         struct ipa_escape *r_esc = get_escape_for_value (fbi, rhs, &offset);
> +         if (r_esc)
> +           {
> +             esc->offset = offset;
> +             esc->target = r_esc;
> +           }
> +         else
> +           {
> +             esc->escaped = true;
> +             return;
> +           }
> +       }
> +      else if (is_gimple_call (def))
> +       {
> +         /* TODO: If only C++ new had malloc attribute.  */
> +         int flags = gimple_call_flags (def);

How does ECF_MALLOC make something not escape?!  And why
does the definition "escape" in any stmt?!

> +         if ((flags & ECF_MALLOC) == 0)
> +           {
> +             esc->escaped = true;
> +             return;
> +           }
> +       }
> +      else
> +       {
> +         if (gimple_code (def) == GIMPLE_PHI)
> +           /* Any SSA defined by a PHI is doomed but it is a convenient place
> +              to check every pointer phi . */
> +           analyze_phi_escapes (def, fbi);
> +
> +         esc->escaped = true;
> +         return;
> +       }
> +    }
> +
> +  if (esc->target)
> +    esc = esc->target;
> +  if (esc->escaped)
> +    return;
> +
> +  /* If the definition is fine, we need to check the uses.  */
> +
> +  imm_use_iterator imm_iter;
> +  use_operand_p use;
> +  FOR_EACH_IMM_USE_FAST (use, imm_iter, ssa)
> +    {
> +      gimple stmt = USE_STMT (use);
> +      if (is_gimple_debug (stmt))
> +       continue;
> +
> +      switch (gimple_code (stmt))
> +       {
> +       case GIMPLE_ASSIGN:
> +         {
> +           if (!gimple_assign_single_p (stmt))
> +             {
> +               esc->escaped = true;

Does that make SSA escape in

   tem_1 = ssa p+ 1;

?

> +               return;
> +             }
> +
> +             tree lhs = gimple_assign_lhs (stmt);
> +             /* Statements assigning to another SSA are OK, we check all of
> +                them.  */
> +             if (TREE_CODE (lhs) != SSA_NAME
> +                 /* If LHS is not an SSA_NAME, RHS cannot be an ADDR_EXPR, and
> +                    must be either a naked SSA_NAME or a load or an invariant.
> +                    We only care if it is the SSA name we are after.  It can
> +                    be a different SSA name if the use was on the LHS in a
> +                    MEM_REF.  */
> +                 && gimple_assign_rhs1 (stmt) == ssa)
> +               {
> +                 esc->escaped = true;
> +                 return;
> +               }
> +
> +             while (handled_component_p (lhs))
> +               lhs = TREE_OPERAND (lhs, 0);
> +             if (TREE_CODE (lhs) == MEM_REF
> +                 && TREE_OPERAND (lhs, 0) == ssa)
> +               esc->write_base = true;
> +           }
> +         break;
> +
> +       case GIMPLE_CALL:
> +         /* Calls will be dealt with when constructing jump functions.
> +            However, indirect calls mean that all values escape (we do IPA
> +            escape propagation before any devirtualization) and when not in
> +            LTO, even calls to functions in other compilation units are dark
> +            holes.  On the other hand, builtin free is whitelisted.  */
> +         if (!gimple_call_builtin_p (stmt, BUILT_IN_FREE))
> +           {
> +             struct cgraph_edge *cs = cgraph_edge (fbi->node, stmt);
> +             if (!cs || !cs->callee || (!cs->callee->definition && !flag_lto))
> +               {
> +                 esc->escaped = true;
> +                 return;
> +               }
> +           }
> +         break;
> +
> +       case GIMPLE_SWITCH:
> +       case GIMPLE_COND:
> +         /* These are harmless.  */
> +         break;
> +
> +       default:
> +         esc->escaped = true;
> +         return;
> +       }
> +    }
> +}
> +
> +/* Examine escapes of all SSA names.   */
> +
> +static void
> +analyze_all_ssa_escapes (struct func_body_info *fbi)
> +{
> +  for (unsigned i = 1; i < fbi->func->gimple_df->ssa_names->length (); ++i)

SSANAMES (fbi->func)->length ();

> +    {
> +      tree ssa = ssa_name (i);
> +      if (!ssa)
> +       continue;
> +      struct ipa_escape *esc = &fbi->escapes[SSA_NAME_VERSION (ssa)];
> +      if (esc->analyzed)
> +       return;
> +      analyze_ssa_escape (fbi, ssa, esc);

I think it's more cache friendly to walk all stmts instead of all
SSA names and their immediate uses.  But maybe not.

> +    }
> +}
> +
> +/* Initialize escape analysis structures in the FBI corresponding to FUNC.  */
> +
> +static void
> +create_escape_structures (struct func_body_info *fbi)
> +{
> +  tree var, parm;
> +  unsigned int i, var_idx, var_count = 0;
> +
> +  for (parm = DECL_ARGUMENTS (fbi->node->decl);
> +       parm;
> +       parm = DECL_CHAIN (parm))
> +    if (TREE_ADDRESSABLE (parm))
> +      var_count++;
> +
> +  FOR_EACH_LOCAL_DECL (fbi->func, i, var)
> +    if (TREE_CODE (var) == VAR_DECL && TREE_ADDRESSABLE (var))
> +      var_count++;
> +
> +  fbi->escapes = vNULL;
> +  fbi->escapes.safe_grow_cleared (SSANAMES (fbi->func)->length () + var_count);

you want to use reserve_exact first and then grow.

I miss an overall comment about the used algorithm and its caveats.
I see the following
 1) it's not flow or context sensitive
 2) it doesn't handle local memory as non-escape sites
 3) it doesn't handle escapes through return (it handles them pessimistically)

IPA-PTA does 2) and 3), of course not 1).

For what it does this seems to be quite heavy-weight?

What parts are actually carried out at WPA time?  Not escape sites
it seems but only the actual objects (not) escaping, right?

How would your algorithm compare to one running at local_pure_const
time looking at a similar simple set of escapes of its function arguments
(but using computed fnspec attributes to handle callees already processed)?

What would be the complication of handling return functions?

Maybe I missed them sofar, but do you have testcases that show
actual optimizations that are possible with this?  Do you have
numbers for SPEC for optimizations that are triggered?

At least it seems that all this convolutes the existing (already
convoluted) IPA-PROP engine even more.

Thanks,
Richard.

> +  fbi->decl_escapes = new pointer_map <ipa_escape *>;
> +
> +  var_idx = SSANAMES (fbi->func)->length ();
> +  for (parm = DECL_ARGUMENTS (fbi->node->decl);
> +       parm;
> +       parm = DECL_CHAIN (parm))
> +    if (TREE_ADDRESSABLE (parm))
> +      *fbi->decl_escapes->insert (parm) = &fbi->escapes[var_idx++];
> +
> +  FOR_EACH_LOCAL_DECL (fbi->func, i, var)
> +    if (TREE_CODE (var) == VAR_DECL && TREE_ADDRESSABLE (var))
> +      *fbi->decl_escapes->insert (var) = &fbi->escapes[var_idx++];
> +}
> +
> +/* Free escape analysis structures in the FBI.  */
> +
> +static void
> +free_escape_structures (struct func_body_info *fbi)
> +{
> +  fbi->escapes.release ();
> +  delete fbi->decl_escapes;
> +}
> +
> +/* Go over call argument of CS and if any warrants a result_index for an escape
> +   structure, assign to it *RI and increment it.  */
> +
> +void
> +pick_escapes_from_call (struct func_body_info *fbi, struct cgraph_edge *cs,
> +                       int *ri)
> +{
> +  int arg_num = gimple_call_num_args (cs->call_stmt);
> +
> +  for (int i = 0; i < arg_num; ++i)
> +    {
> +      HOST_WIDE_INT offset;
> +      tree arg = gimple_call_arg (cs->call_stmt, i);
> +      struct ipa_escape *esc = get_escape_for_value (fbi, arg, &offset);
> +
> +      if (!esc || esc->escaped)
> +       continue;
> +
> +      if (esc->last_seen_cs == cs)
> +       {
> +         esc->escaped = true;
> +         continue;
> +       }
> +      esc->last_seen_cs = cs;
> +
> +      if (!esc->result_index)
> +       {
> +         *ri = *ri + 1;
> +         esc->result_index = *ri;
> +       }
> +    }
> +}
> +
> +/* Copy result escape flags to node info.  There must be exactly COUNT result
> +   escapes.  */
> +
> +void
> +gather_picked_escapes (struct func_body_info *fbi, int count)
> +{
> +  if (count == 0)
> +    return;
> +  fbi->info->ref_descs.safe_grow_cleared (count);
> +
> +  for (unsigned i = 0; i < fbi->escapes.length (); ++i)
> +    {
> +      struct ipa_escape *esc = &fbi->escapes[i];
> +      int idx;
> +      if (valid_escape_result_index (esc, &idx))
> +       {
> +         ipa_set_ref_escaped (fbi->info, idx, esc->escaped);
> +         ipa_set_ref_clobbered (fbi->info, idx, esc->write_base);
> +       }
> +    }
> +}
> +
>  /* Initialize the array describing properties of of formal parameters
>     of NODE, analyze their uses and compute jump functions associated
>     with actual arguments of calls from within NODE.  */
> @@ -2381,28 +2944,48 @@ ipa_analyze_node (struct cgraph_node *no
>    calculate_dominance_info (CDI_DOMINATORS);
>    ipa_initialize_node_params (node);
>    ipa_analyze_controlled_uses (node);
> +  info->ref_descs = vNULL;
>
> +  fbi.func = func;
>    fbi.node = node;
>    fbi.info = IPA_NODE_REF (node);
>    fbi.bb_infos = vNULL;
>    fbi.bb_infos.safe_grow_cleared (last_basic_block_for_fn (cfun));
>    fbi.param_count = ipa_get_param_count (info);
>    fbi.aa_walked = 0;
> +  create_escape_structures (&fbi);
> +  analyze_all_ssa_escapes (&fbi);
>
> +  for (int i = 0; i < fbi.param_count; ++i)
> +    {
> +      tree ddef, parm = fbi.info->descriptors[i].decl;
> +      if (is_gimple_reg (parm)
> +         && (ddef = ssa_default_def (cfun, parm)))
> +       {
> +         struct ipa_escape *esc = &fbi.escapes[SSA_NAME_VERSION (ddef)];
> +         esc->result_index = i + 1;
> +       }
> +    }
> +
> +  int ri = fbi.param_count;
>    for (struct cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
>      {
>        ipa_bb_info *bi = ipa_get_bb_info (&fbi, gimple_bb (cs->call_stmt));
>        bi->cg_edges.safe_push (cs);
> +      pick_escapes_from_call (&fbi, cs, &ri);
>      }
>
>    for (struct cgraph_edge *cs = node->indirect_calls; cs; cs = cs->next_callee)
>      {
>        ipa_bb_info *bi = ipa_get_bb_info (&fbi, gimple_bb (cs->call_stmt));
>        bi->cg_edges.safe_push (cs);
> +      pick_escapes_from_call (&fbi, cs, &ri);
>      }
>
> +  gather_picked_escapes (&fbi, ri);
>    analysis_dom_walker (&fbi).walk (ENTRY_BLOCK_PTR_FOR_FN (cfun));
>
> +  free_escape_structures (&fbi);
>    int i;
>    struct ipa_bb_info *bi;
>    FOR_EACH_VEC_ELT (fbi.bb_infos, i, bi)
> @@ -2412,6 +2995,271 @@ ipa_analyze_node (struct cgraph_node *no
>    pop_cfun ();
>  }
>
> +/* Data about the current status of escape propagation. */
> +
> +struct escape_spreading_data
> +{
> +  /* To-do lists for escape spreading.  */
> +  vec<cgraph_node *> up_stack;
> +  vec<cgraph_node *> down_stack;
> +
> +  /* The current info coresponding to the node from which we are spreading
> +     escaped flags.  */
> +  struct ipa_node_params *info;
> +};
> +
> +/* Put the NODE into the upward propagation work list in ESD, unless it is
> +   already there.  */
> +
> +static void
> +enque_to_propagate_escapes_up (struct escape_spreading_data *esd,
> +                              struct cgraph_node *node)
> +{
> +  struct ipa_node_params *info = IPA_NODE_REF (node);
> +  if (info->node_up_enqueued)
> +    return;
> +  info->node_up_enqueued = true;
> +  esd->up_stack.safe_push (node);
> +}
> +
> +/* Put the NODE into the downward propagation work list in ESD, unless it is
> +   already there.  */
> +
> +static void
> +enque_to_propagate_escapes_down (struct escape_spreading_data *esd,
> +                                struct cgraph_node *node)
> +{
> +  struct ipa_node_params *info = IPA_NODE_REF (node);
> +  if (info->node_enqueued)
> +    return;
> +  info->node_enqueued = true;
> +  esd->down_stack.safe_push (node);
> +}
> +
> +/* Return the escape origin from a JFUNC regardless of its type, or -1 if there
> +   is none.  */
> +
> +static int
> +escape_origin_from_jfunc (struct ipa_jump_func *jfunc)
> +{
> +  if (jfunc->type == IPA_JF_PASS_THROUGH)
> +    return ipa_get_jf_pass_through_formal_id (jfunc);
> +  else if (jfunc->type == IPA_JF_ANCESTOR)
> +    return ipa_get_jf_ancestor_formal_id (jfunc);
> +  else if (jfunc->type == IPA_JF_UNKNOWN
> +          && ipa_get_jf_unknown_esc_ref_valid (jfunc))
> +    return ipa_get_jf_unknown_esc_ref_index (jfunc);
> +  else if (jfunc->type == IPA_JF_KNOWN_TYPE
> +          && ipa_get_jf_known_type_esc_ref_valid (jfunc))
> +    return ipa_get_jf_known_type_esc_ref_index (jfunc);
> +  else if (jfunc->type == IPA_JF_CONST
> +          && ipa_get_jf_constant_esc_ref_valid (jfunc))
> +    return ipa_get_jf_constant_esc_ref_index (jfunc);
> +  else
> +    return -1;
> +}
> +
> +/* Callback of cgraph_for_node_and_aliases, spread escpe flags to callers.  */
> +
> +static bool
> +spread_escapes_up_from_one_alias (struct cgraph_node *node, void *data)
> +{
> +  struct escape_spreading_data *esd = (struct escape_spreading_data *) data;
> +  struct cgraph_edge *cs;
> +
> +  for (cs = node->callers; cs; cs = cs->next_caller)
> +    {
> +      if (cs->caller->thunk.thunk_p)
> +       {
> +         cgraph_for_node_and_aliases (cs->caller,
> +                                      spread_escapes_up_from_one_alias,
> +                                      esd, true);
> +         continue;
> +       }
> +      enum availability avail;
> +      cgraph_function_or_thunk_node (node, &avail);
> +
> +      struct ipa_node_params *caller_info = IPA_NODE_REF (cs->caller);
> +      struct ipa_edge_args *args = IPA_EDGE_REF (cs);
> +      int args_count = ipa_get_cs_argument_count (args);
> +      int param_count = ipa_get_param_count (esd->info);
> +
> +      for (int i = 0; i < args_count; ++i)
> +       if (i >= param_count
> +           || ipa_is_ref_escaped (esd->info, i)
> +           || avail == AVAIL_OVERWRITABLE)
> +         {
> +           struct ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
> +           int origin = escape_origin_from_jfunc (jfunc);
> +           if (origin < 0)
> +             continue;
> +
> +           if (!ipa_is_ref_escaped (caller_info, origin))
> +             {
> +               if (dump_file && (dump_flags & TDF_DETAILS))
> +                 fprintf (dump_file, "escape propagated up (%i) from %s/%i to "
> +                          "%s/%i ref %i, from arg %i\n", __LINE__,
> +                          node->name (), node->order, cs->caller->name (),
> +                          cs->caller->order, origin, i);
> +
> +               ipa_set_ref_escaped (caller_info, origin, true);
> +               enque_to_propagate_escapes_up (esd, cs->caller);
> +               enque_to_propagate_escapes_down (esd, cs->caller);
> +             }
> +         }
> +       else if (ipa_is_ref_clobbered (esd->info, i))
> +         {
> +           struct ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
> +           int origin = escape_origin_from_jfunc (jfunc);
> +           if (origin < 0)
> +             continue;
> +
> +           ipa_set_ref_callee_clobbered (caller_info, origin, true);
> +           if (!ipa_is_ref_clobbered (caller_info, origin))
> +             {
> +               if (dump_file && (dump_flags & TDF_DETAILS))
> +                 fprintf (dump_file, "clobbered propagated up (%i) from "
> +                          "%s/%i to %s/%i ref %i, from arg %i\n", __LINE__,
> +                          node->name (), node->order, cs->caller->name (),
> +                          cs->caller->order, origin, i);
> +
> +               ipa_set_ref_clobbered (caller_info, origin, true);
> +               enque_to_propagate_escapes_up (esd, cs->caller);
> +             }
> +         }
> +    }
> +  return false;
> +}
> +
> +/* Spread set escape flags from ESD->node and all its aliases and thunks to
> +   their callers.  */
> +
> +static void
> +spread_escapes_up (struct escape_spreading_data *esd, cgraph_node *node)
> +{
> +  cgraph_for_node_and_aliases (node, spread_escapes_up_from_one_alias,
> +                              esd, true);
> +}
> +
> +/* Spread set escape flags from ESD->node to all its callees.   */
> +
> +static void
> +spread_escapes_down (struct escape_spreading_data *esd, cgraph_node *node)
> +{
> +  struct cgraph_edge *cs;
> +  for (cs = node->callees; cs; cs = cs->next_callee)
> +    {
> +      enum availability availability;
> +      cgraph_node *callee = cgraph_function_node (cs->callee, &availability);
> +
> +      struct ipa_node_params *callee_info = IPA_NODE_REF (callee);
> +      struct ipa_edge_args *args = IPA_EDGE_REF (cs);
> +      int args_count = ipa_get_cs_argument_count (args);
> +      int parms_count = ipa_get_param_count (callee_info);
> +
> +      for (int i = 0; i < parms_count; ++i)
> +       if (i >= args_count)
> +         {
> +           if (!ipa_is_ref_escaped (callee_info, i))
> +             {
> +               if (dump_file && (dump_flags & TDF_DETAILS))
> +                 fprintf (dump_file, "escape propagated down (%i) from %s/%i "
> +                          " to %s/%i ref %i\n", __LINE__, node->name (),
> +                          node->order, callee->name (), callee->order, i);
> +
> +               ipa_set_ref_escaped (callee_info, i, true);
> +               enque_to_propagate_escapes_down (esd, callee);
> +             }
> +         }
> +       else
> +         {
> +           struct ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
> +           int origin = escape_origin_from_jfunc (jfunc);
> +
> +           if ((origin < 0
> +                || ipa_is_ref_escaped (esd->info, origin))
> +               && !ipa_is_ref_escaped (callee_info, i))
> +             {
> +               if (dump_file && (dump_flags & TDF_DETAILS))
> +                 fprintf (dump_file, "escape propagated down (%i) from %s/%i "
> +                          " to %s/%i ref %i, origin %i\n", __LINE__,
> +                          node->name (), node->order, callee->name (),
> +                          callee->order, i, origin);
> +
> +               ipa_set_ref_escaped (callee_info, i, true);
> +               enque_to_propagate_escapes_down (esd, callee);
> +             }
> +         }
> +    }
> +}
> +
> +/* Spread escape flags through jump functions accross the call graph.  */
> +
> +void
> +ipa_spread_escapes ()
> +{
> +  struct cgraph_node *node;
> +  struct escape_spreading_data esd;
> +  esd.up_stack = vNULL;
> +  esd.down_stack = vNULL;
> +
> +  if (dump_file)
> +    fprintf (dump_file, "\nPropagating escape flags\n");
> +
> +  ipa_check_create_node_params ();
> +  ipa_check_create_edge_args ();
> +  FOR_EACH_FUNCTION (node)
> +    {
> +      struct ipa_node_params *info = IPA_NODE_REF (node);
> +      esd.info = info;
> +      /* FIXME: This test is copied from IPA-CP but I wonder whether we
> +        should check it for all aliases too?  */
> +      if (!node->local.local)
> +       {
> +         /* Set escape flags corresponding to formal parameters.  */
> +         int param_count = ipa_get_param_count (esd.info);
> +         for (int i = 0; i < param_count; ++i)
> +           ipa_set_ref_escaped (info, i, true);
> +       }
> +
> +      spread_escapes_up (&esd, node);
> +      spread_escapes_down (&esd, node);
> +    }
> +
> +  while (!esd.up_stack.is_empty ())
> +    {
> +      node = esd.up_stack.pop ();
> +      esd.info = IPA_NODE_REF (node);
> +      esd.info->node_up_enqueued = false;
> +      spread_escapes_up (&esd, node);
> +    }
> +
> +  while (!esd.down_stack.is_empty ())
> +    {
> +      node = esd.down_stack.pop ();
> +      esd.info = IPA_NODE_REF (node);
> +      esd.info->node_enqueued = false;
> +      spread_escapes_down (&esd, node);
> +    }
> +
> +  esd.up_stack.release ();
> +  esd.down_stack.release ();
> +
> +  FOR_EACH_FUNCTION (node)
> +    {
> +      struct ipa_node_params *info = IPA_NODE_REF (node);
> +      int param_count = ipa_get_param_count (info);
> +
> +      for (int i = 0; i < param_count; i++)
> +       if (!ipa_is_ref_escaped (info, i))
> +         {
> +           cgraph_set_param_noescape (node, i);
> +           if (!ipa_is_ref_clobbered (info, i))
> +             cgraph_set_param_noclobber (node, i);
> +         }
> +    }
> +}
> +
>  /* Given a statement CALL which must be a GIMPLE_CALL calling an OBJ_TYPE_REF
>     attempt a type-based devirtualization.  If successful, return the
>     target function declaration, otherwise return NULL.  */
> @@ -2423,7 +3271,7 @@ ipa_intraprocedural_devirtualization (gi
>    struct ipa_jump_func jfunc;
>    tree otr = gimple_call_fn (call);
>
> -  jfunc.type = IPA_JF_UNKNOWN;
> +  ipa_set_jf_unknown (&jfunc);
>    compute_known_type_jump_func (OBJ_TYPE_REF_OBJECT (otr), &jfunc,
>                                 call, obj_type_ref_class (otr));
>    if (jfunc.type != IPA_JF_KNOWN_TYPE)
> @@ -2442,30 +3290,53 @@ ipa_intraprocedural_devirtualization (gi
>    return fndecl;
>  }
>
> +/* Set DST to be unknown jump function.  if SRC, which must be known type jump
> +   function, has a valid reference index, copy that index to DST, otherwise
> +   keep DST's ref index invalid.  */
> +
> +static void
> +make_unknown_jf_from_known_type_jf (struct ipa_jump_func *dst,
> +                                   struct ipa_jump_func *src)
> +{
> +  ipa_set_jf_unknown (dst);
> +  if (ipa_get_jf_known_type_esc_ref_valid (src))
> +    ipa_set_jf_unknown_ref_index (dst,
> +                                 ipa_get_jf_known_type_esc_ref_index (src));
> +}
> +
>  /* Update the jump function DST when the call graph edge corresponding to SRC is
>     is being inlined, knowing that DST is of type ancestor and src of known
>     type.  */
>
>  static void
> -combine_known_type_and_ancestor_jfs (struct ipa_jump_func *src,
> -                                    struct ipa_jump_func *dst)
> +combine_known_type_and_ancestor_jfs (struct ipa_jump_func *dst,
> +                                    struct ipa_jump_func *src)
>  {
> -  HOST_WIDE_INT combined_offset;
> -  tree combined_type;
> -
>    if (!ipa_get_jf_ancestor_type_preserved (dst))
>      {
> -      dst->type = IPA_JF_UNKNOWN;
> +      make_unknown_jf_from_known_type_jf (dst, src);
>        return;
>      }
>
> -  combined_offset = ipa_get_jf_known_type_offset (src)
> +  bool esc_ref_valid;
> +  int  esc_ref_index = -1;
> +  if (ipa_get_jf_known_type_esc_ref_valid (src))
> +    {
> +      esc_ref_valid = true;
> +      esc_ref_index = ipa_get_jf_known_type_esc_ref_index (src);
> +    }
> +  else
> +    esc_ref_valid = false;
> +
> +  HOST_WIDE_INT combined_offset = ipa_get_jf_known_type_offset (src)
>      + ipa_get_jf_ancestor_offset (dst);
> -  combined_type = ipa_get_jf_ancestor_type (dst);
> +  tree combined_type = ipa_get_jf_ancestor_type (dst);
>
>    ipa_set_jf_known_type (dst, combined_offset,
>                          ipa_get_jf_known_type_base_type (src),
>                          combined_type);
> +  if (esc_ref_valid)
> +    ipa_set_jf_known_type_ref_index (dst, esc_ref_index);
>  }
>
>  /* Update the jump functions associated with call graph edge E when the call
> @@ -2478,6 +3349,7 @@ update_jump_functions_after_inlining (st
>  {
>    struct ipa_edge_args *top = IPA_EDGE_REF (cs);
>    struct ipa_edge_args *args = IPA_EDGE_REF (e);
> +  struct ipa_node_params *old_info = IPA_NODE_REF (cs->callee);
>    int count = ipa_get_cs_argument_count (args);
>    int i;
>
> @@ -2495,14 +3367,16 @@ update_jump_functions_after_inlining (st
>              don't.  */
>           if (dst_fid >= ipa_get_cs_argument_count (top))
>             {
> -             dst->type = IPA_JF_UNKNOWN;
> +             ipa_set_jf_unknown (dst);
>               continue;
>             }
>
>           src = ipa_get_ith_jump_func (top, dst_fid);
>
>           if (src->agg.items
> -             && (dst->value.ancestor.agg_preserved || !src->agg.by_ref))
> +             && (dst->value.ancestor.agg_preserved
> +                 || !src->agg.by_ref
> +                 || ipa_is_param_ref_safely_constant (old_info, dst_fid)))
>             {
>               struct ipa_agg_jf_item *item;
>               int j;
> @@ -2518,7 +3392,7 @@ update_jump_functions_after_inlining (st
>             }
>
>           if (src->type == IPA_JF_KNOWN_TYPE)
> -           combine_known_type_and_ancestor_jfs (src, dst);
> +           combine_known_type_and_ancestor_jfs (dst, src);
>           else if (src->type == IPA_JF_PASS_THROUGH
>                    && src->value.pass_through.operation == NOP_EXPR)
>             {
> @@ -2538,7 +3412,7 @@ update_jump_functions_after_inlining (st
>                 src->value.ancestor.type_preserved;
>             }
>           else
> -           dst->type = IPA_JF_UNKNOWN;
> +           ipa_set_jf_unknown (dst);
>         }
>        else if (dst->type == IPA_JF_PASS_THROUGH)
>         {
> @@ -2552,20 +3426,19 @@ update_jump_functions_after_inlining (st
>               int dst_fid = dst->value.pass_through.formal_id;
>               src = ipa_get_ith_jump_func (top, dst_fid);
>               bool dst_agg_p = ipa_get_jf_pass_through_agg_preserved (dst);
> +             bool pass_aggs_by_ref = dst_agg_p
> +               || ipa_is_param_ref_safely_constant (old_info, dst_fid);
>
>               switch (src->type)
>                 {
>                 case IPA_JF_UNKNOWN:
> -                 dst->type = IPA_JF_UNKNOWN;
> +                 ipa_set_jf_unknown_copy (dst, src);
>                   break;
>                 case IPA_JF_KNOWN_TYPE:
>                   if (ipa_get_jf_pass_through_type_preserved (dst))
> -                   ipa_set_jf_known_type (dst,
> -                                          ipa_get_jf_known_type_offset (src),
> -                                          ipa_get_jf_known_type_base_type (src),
> -                                          ipa_get_jf_known_type_component_type (src));
> +                   ipa_set_jf_known_type_copy (dst, src);
>                   else
> -                   dst->type = IPA_JF_UNKNOWN;
> +                   make_unknown_jf_from_known_type_jf (dst, src);
>                   break;
>                 case IPA_JF_CONST:
>                   ipa_set_jf_cst_copy (dst, src);
> @@ -2614,7 +3487,7 @@ update_jump_functions_after_inlining (st
>                 }
>
>               if (src->agg.items
> -                 && (dst_agg_p || !src->agg.by_ref))
> +                 && (pass_aggs_by_ref || !src->agg.by_ref))
>                 {
>                   /* Currently we do not produce clobber aggregate jump
>                      functions, replace with merging when we do.  */
> @@ -2625,7 +3498,7 @@ update_jump_functions_after_inlining (st
>                 }
>             }
>           else
> -           dst->type = IPA_JF_UNKNOWN;
> +           ipa_set_jf_unknown (dst);
>         }
>      }
>  }
> @@ -2975,11 +3848,12 @@ update_indirect_edges_after_inlining (st
>  {
>    struct ipa_edge_args *top;
>    struct cgraph_edge *ie, *next_ie, *new_direct_edge;
> -  struct ipa_node_params *new_root_info;
> +  struct ipa_node_params *new_root_info, *old_root_info;
>    bool res = false;
>
>    ipa_check_create_edge_args ();
>    top = IPA_EDGE_REF (cs);
> +  old_root_info = IPA_NODE_REF (cs->callee);
>    new_root_info = IPA_NODE_REF (cs->caller->global.inlined_to
>                                 ? cs->caller->global.inlined_to
>                                 : cs->caller);
> @@ -3039,6 +3913,7 @@ update_indirect_edges_after_inlining (st
>                && ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
>         {
>           if ((ici->agg_contents
> +              && !ipa_is_param_ref_safely_constant (old_root_info, param_index)
>                && !ipa_get_jf_pass_through_agg_preserved (jfunc))
>               || (ici->polymorphic
>                   && !ipa_get_jf_pass_through_type_preserved (jfunc)))
> @@ -3049,6 +3924,7 @@ update_indirect_edges_after_inlining (st
>        else if (jfunc->type == IPA_JF_ANCESTOR)
>         {
>           if ((ici->agg_contents
> +              && !ipa_is_param_ref_safely_constant (old_root_info, param_index)
>                && !ipa_get_jf_ancestor_agg_preserved (jfunc))
>               || (ici->polymorphic
>                   && !ipa_get_jf_ancestor_type_preserved (jfunc)))
> @@ -3286,6 +4162,7 @@ void
>  ipa_free_node_params_substructures (struct ipa_node_params *info)
>  {
>    info->descriptors.release ();
> +  info->ref_descs.release ();
>    free (info->lattices);
>    /* Lattice values and their sources are deallocated with their alocation
>       pool.  */
> @@ -3461,6 +4338,7 @@ ipa_node_duplication_hook (struct cgraph
>    new_info = IPA_NODE_REF (dst);
>
>    new_info->descriptors = old_info->descriptors.copy ();
> +  new_info->ref_descs = old_info->ref_descs.copy ();
>    new_info->lattices = NULL;
>    new_info->ipcp_orig_node = old_info->ipcp_orig_node;
>
> @@ -3577,7 +4455,7 @@ ipa_free_all_structures_after_iinln (voi
>  void
>  ipa_print_node_params (FILE *f, struct cgraph_node *node)
>  {
> -  int i, count;
> +  unsigned count;
>    struct ipa_node_params *info;
>
>    if (!node->definition)
> @@ -3586,7 +4464,7 @@ ipa_print_node_params (FILE *f, struct c
>    fprintf (f, "  function  %s/%i parameter descriptors:\n",
>            node->name (), node->order);
>    count = ipa_get_param_count (info);
> -  for (i = 0; i < count; i++)
> +  for (unsigned i = 0; i < count; i++)
>      {
>        int c;
>
> @@ -3598,7 +4476,38 @@ ipa_print_node_params (FILE *f, struct c
>        if (c == IPA_UNDESCRIBED_USE)
>         fprintf (f, " undescribed_use");
>        else
> -       fprintf (f, "  controlled_uses=%i", c);
> +       fprintf (f, " controlled_uses=%i", c);
> +      if (ipa_get_tracked_refs_count (info) > 0)
> +       {
> +         if (ipa_is_ref_escaped (info, i))
> +           fprintf (f, " escaped");
> +         else
> +           fprintf (f, " not_esc %s %s",
> +                    ipa_is_ref_clobbered (info, i) ? "clobber" : "not_clobber",
> +                    ipa_is_ref_callee_clobbered (info, i) ? "call_clobber"
> +                    : "not_call_clobber");
> +       }
> +      fprintf (f, "\n");
> +    }
> +
> +  if ((unsigned) ipa_get_tracked_refs_count (info) > count)
> +    {
> +      fprintf (f, "   The rest of reference escaped flags: ");
> +      bool first = true;
> +      for (int i = count; i < ipa_get_tracked_refs_count (info); ++i)
> +       {
> +         if (!first)
> +           fprintf (f, ", ");
> +         else
> +           first = false;
> +         if (ipa_is_ref_escaped (info, i))
> +           fprintf (f, "%i: esc", i);
> +         else
> +           fprintf (f, "%i: not_esc %s %s", i,
> +                    ipa_is_ref_clobbered (info, i) ? "clobber" : "not_clobber",
> +                    ipa_is_ref_callee_clobbered (info, i) ? "call_clobber"
> +                    : "not_call_clobber");
> +       }
>        fprintf (f, "\n");
>      }
>  }
> @@ -4378,16 +5287,34 @@ ipa_write_jump_function (struct output_b
>    switch (jump_func->type)
>      {
>      case IPA_JF_UNKNOWN:
> +      bp = bitpack_create (ob->main_stream);
> +      bp_pack_value (&bp, ipa_get_jf_unknown_esc_ref_valid (jump_func), 1);
> +      streamer_write_bitpack (&bp);
> +      if (ipa_get_jf_unknown_esc_ref_valid (jump_func))
> +       streamer_write_uhwi (ob, ipa_get_jf_unknown_esc_ref_index (jump_func));
>        break;
>      case IPA_JF_KNOWN_TYPE:
>        streamer_write_uhwi (ob, jump_func->value.known_type.offset);
>        stream_write_tree (ob, jump_func->value.known_type.base_type, true);
>        stream_write_tree (ob, jump_func->value.known_type.component_type, true);
> +      bp = bitpack_create (ob->main_stream);
> +      bp_pack_value (&bp, ipa_get_jf_known_type_esc_ref_valid (jump_func), 1);
> +      streamer_write_bitpack (&bp);
> +      if (ipa_get_jf_known_type_esc_ref_valid (jump_func))
> +       streamer_write_uhwi (ob,
> +                            ipa_get_jf_known_type_esc_ref_index (jump_func));
>        break;
>      case IPA_JF_CONST:
>        gcc_assert (
>           EXPR_LOCATION (jump_func->value.constant.value) == UNKNOWN_LOCATION);
>        stream_write_tree (ob, jump_func->value.constant.value, true);
> +
> +      bp = bitpack_create (ob->main_stream);
> +      bp_pack_value (&bp, ipa_get_jf_constant_esc_ref_valid (jump_func), 1);
> +      streamer_write_bitpack (&bp);
> +      if (ipa_get_jf_constant_esc_ref_valid (jump_func))
> +       streamer_write_uhwi (ob,
> +                            ipa_get_jf_constant_esc_ref_index (jump_func));
>        break;
>      case IPA_JF_PASS_THROUGH:
>        streamer_write_uhwi (ob, jump_func->value.pass_through.operation);
> @@ -4448,19 +5375,44 @@ ipa_read_jump_function (struct lto_input
>    switch (jftype)
>      {
>      case IPA_JF_UNKNOWN:
> -      jump_func->type = IPA_JF_UNKNOWN;
> +      {
> +       ipa_set_jf_unknown (jump_func);
> +       struct bitpack_d bp = streamer_read_bitpack (ib);
> +       bool esc_ref_valid = bp_unpack_value (&bp, 1);
> +       if (esc_ref_valid)
> +         {
> +           unsigned esc_ref_idx = streamer_read_uhwi (ib);
> +           ipa_set_jf_unknown_ref_index (jump_func, esc_ref_idx);
> +         }
> +      }
>        break;
>      case IPA_JF_KNOWN_TYPE:
>        {
>         HOST_WIDE_INT offset = streamer_read_uhwi (ib);
>         tree base_type = stream_read_tree (ib, data_in);
>         tree component_type = stream_read_tree (ib, data_in);
> +       struct bitpack_d bp = streamer_read_bitpack (ib);
> +       bool esc_ref_valid = bp_unpack_value (&bp, 1);
>
>         ipa_set_jf_known_type (jump_func, offset, base_type, component_type);
> +       if (esc_ref_valid)
> +         {
> +           unsigned esc_ref_idx = streamer_read_uhwi (ib);
> +           ipa_set_jf_known_type_ref_index (jump_func, esc_ref_idx);
> +         }
>         break;
>        }
>      case IPA_JF_CONST:
> -      ipa_set_jf_constant (jump_func, stream_read_tree (ib, data_in), cs);
> +      {
> +       ipa_set_jf_constant (jump_func, stream_read_tree (ib, data_in), cs);
> +       struct bitpack_d bp = streamer_read_bitpack (ib);
> +       bool esc_ref_valid = bp_unpack_value (&bp, 1);
> +       if (esc_ref_valid)
> +         {
> +           unsigned esc_ref_idx = streamer_read_uhwi (ib);
> +           ipa_set_jf_constant_ref_index (jump_func, esc_ref_idx);
> +         }
> +      }
>        break;
>      case IPA_JF_PASS_THROUGH:
>        operation = (enum tree_code) streamer_read_uhwi (ib);
> @@ -4592,12 +5544,27 @@ ipa_write_node_info (struct output_block
>    gcc_assert (info->analysis_done
>               || ipa_get_param_count (info) == 0);
>    gcc_assert (!info->node_enqueued);
> +  gcc_assert (!info->node_up_enqueued);
>    gcc_assert (!info->ipcp_orig_node);
>    for (j = 0; j < ipa_get_param_count (info); j++)
>      bp_pack_value (&bp, ipa_is_param_used (info, j), 1);
>    streamer_write_bitpack (&bp);
>    for (j = 0; j < ipa_get_param_count (info); j++)
>      streamer_write_hwi (ob, ipa_get_controlled_uses (info, j));
> +
> +  streamer_write_uhwi (ob, ipa_get_tracked_refs_count (info));
> +  if (ipa_get_tracked_refs_count (info) > 0)
> +    {
> +      bp = bitpack_create (ob->main_stream);
> +      for (int i = 0; i < ipa_get_tracked_refs_count (info); ++i)
> +       {
> +         bp_pack_value (&bp, ipa_is_ref_escaped (info, i), 1);
> +         bp_pack_value (&bp, ipa_is_ref_clobbered (info, i), 1);
> +         bp_pack_value (&bp, ipa_is_ref_callee_clobbered (info, i), 1);
> +       }
> +      streamer_write_bitpack (&bp);
> +    }
> +
>    for (e = node->callees; e; e = e->next_callee)
>      {
>        struct ipa_edge_args *args = IPA_EDGE_REF (e);
> @@ -4632,15 +5599,30 @@ ipa_read_node_info (struct lto_input_blo
>
>    for (k = 0; k < ipa_get_param_count (info); k++)
>      info->descriptors[k].move_cost = streamer_read_uhwi (ib);
> -
> +
>    bp = streamer_read_bitpack (ib);
>    if (ipa_get_param_count (info) != 0)
>      info->analysis_done = true;
>    info->node_enqueued = false;
> +  info->node_up_enqueued = false;
>    for (k = 0; k < ipa_get_param_count (info); k++)
>      ipa_set_param_used (info, k, bp_unpack_value (&bp, 1));
>    for (k = 0; k < ipa_get_param_count (info); k++)
>      ipa_set_controlled_uses (info, k, streamer_read_hwi (ib));
> +
> +  unsigned ref_count = streamer_read_uhwi (ib);
> +  if (ref_count > 0)
> +    {
> +      bp = streamer_read_bitpack (ib);
> +      info->ref_descs.safe_grow_cleared (ref_count);
> +      for (unsigned i = 0; i < ref_count; ++i)
> +       {
> +         ipa_set_ref_escaped (info, i, bp_unpack_value (&bp, 1));
> +         ipa_set_ref_clobbered (info, i, bp_unpack_value (&bp, 1));
> +         ipa_set_ref_callee_clobbered (info, i, bp_unpack_value (&bp, 1));
> +       }
> +    }
> +
>    for (e = node->callees; e; e = e->next_callee)
>      {
>        struct ipa_edge_args *args = IPA_EDGE_REF (e);
> @@ -4830,7 +5812,7 @@ read_agg_replacement_chain (struct lto_i
>    unsigned int count, i;
>
>    count = streamer_read_uhwi (ib);
> -  for (i = 0; i <count; i++)
> +  for (i = 0; i < count; i++)
>      {
>        struct ipa_agg_replacement_value *av;
>        struct bitpack_d bp;
> @@ -5134,6 +6116,8 @@ ipcp_transform_function (struct cgraph_n
>    fbi.bb_infos.safe_grow_cleared (last_basic_block_for_fn (cfun));
>    fbi.param_count = param_count;
>    fbi.aa_walked = 0;
> +  fbi.escapes = vNULL;
> +  fbi.decl_escapes = NULL;
>
>    descriptors.safe_grow_cleared (param_count);
>    ipa_populate_param_decls (node, descriptors);
> Index: src/gcc/ipa-prop.h
> ===================================================================
> --- src.orig/gcc/ipa-prop.h
> +++ src/gcc/ipa-prop.h
> @@ -73,6 +73,17 @@ enum jump_func_type
>    IPA_JF_ANCESTOR          /* represented by field ancestor */
>  };
>
> +
> +/* Structure describing data which are genreally unknown at compile time, yet
> +   may have some useful properties.  */
> +struct GTY (()) ipa_unknown_data
> +{
> +  /* If True, the next field contains valid index.  */
> +  unsigned escape_ref_valid : 1;
> +  /* Index into escaped_ref flags that describe data this refers to.  */
> +  unsigned escape_ref_index : 31;
> +};
> +
>  /* Structure holding data required to describe a known type jump function.  */
>  struct GTY(()) ipa_known_type_data
>  {
> @@ -82,6 +93,10 @@ struct GTY(()) ipa_known_type_data
>    tree base_type;
>    /* Type of the component of the object that is being described.  */
>    tree component_type;
> +  /* If True, the next field contains valid index.  */
> +  unsigned escape_ref_valid : 1;
> +  /* Index into escaped_ref flags that describe data this refers to.  */
> +  unsigned escape_ref_index : 31;
>  };
>
>  struct ipa_cst_ref_desc;
> @@ -93,6 +108,10 @@ struct GTY(()) ipa_constant_data
>    tree value;
>    /* Pointer to the structure that describes the reference.  */
>    struct ipa_cst_ref_desc GTY((skip)) *rdesc;
> +  /* If True, the next field contains valid index.  */
> +  unsigned escape_ref_valid : 1;
> +  /* Index into escaped_ref flags that describe data this refers to.  */
> +  unsigned escape_ref_index : 31;
>  };
>
>  /* Structure holding data required to describe a pass-through jump function.  */
> @@ -187,11 +206,10 @@ struct GTY (()) ipa_jump_func
>    struct ipa_agg_jump_function agg;
>
>    enum jump_func_type type;
> -  /* Represents a value of a jump function.  pass_through is used only in jump
> -     function context.  constant represents the actual constant in constant jump
> -     functions and member_cst holds constant c++ member functions.  */
> +  /* Represents a value of a jump function.  */
>    union jump_func_value
>    {
> +    struct ipa_unknown_data GTY ((tag ("IPA_JF_UNKNOWN"))) unknown;
>      struct ipa_known_type_data GTY ((tag ("IPA_JF_KNOWN_TYPE"))) known_type;
>      struct ipa_constant_data GTY ((tag ("IPA_JF_CONST"))) constant;
>      struct ipa_pass_through_data GTY ((tag ("IPA_JF_PASS_THROUGH"))) pass_through;
> @@ -199,6 +217,26 @@ struct GTY (()) ipa_jump_func
>    } GTY ((desc ("%1.type"))) value;
>  };
>
> +/* Return whether the unknown jump function JFUNC has an associated valid index
> +   into callers escaped_ref flags.  */
> +
> +static inline bool
> +ipa_get_jf_unknown_esc_ref_valid (struct ipa_jump_func *jfunc)
> +{
> +  gcc_checking_assert (jfunc->type == IPA_JF_UNKNOWN);
> +  return jfunc->value.unknown.escape_ref_valid;
> +}
> +
> +/* Return the index into escaped ref flags of the caller that corresponds to
> +   data described by an unknown jump function JFUNC.  */
> +
> +static inline int
> +ipa_get_jf_unknown_esc_ref_index (struct ipa_jump_func *jfunc)
> +{
> +  gcc_checking_assert (jfunc->type == IPA_JF_UNKNOWN);
> +  gcc_checking_assert (ipa_get_jf_unknown_esc_ref_valid (jfunc));
> +  return jfunc->value.unknown.escape_ref_index;
> +}
>
>  /* Return the offset of the component that is described by a known type jump
>     function JFUNC.  */
> @@ -228,6 +266,27 @@ ipa_get_jf_known_type_component_type (st
>    return jfunc->value.known_type.component_type;
>  }
>
> +/* Return whether the described known type jump functiion JFUNC has a valid
> +   index into callers escaped_ref flags.  */
> +
> +static inline bool
> +ipa_get_jf_known_type_esc_ref_valid (struct ipa_jump_func *jfunc)
> +{
> +  gcc_checking_assert (jfunc->type == IPA_JF_KNOWN_TYPE);
> +  return jfunc->value.known_type.escape_ref_valid;
> +}
> +
> +/* Return the index into escaped ref flags of the caller that corresponds to
> +   data described by a known type jump function.  */
> +
> +static inline int
> +ipa_get_jf_known_type_esc_ref_index (struct ipa_jump_func *jfunc)
> +{
> +  gcc_checking_assert (jfunc->type == IPA_JF_KNOWN_TYPE);
> +  gcc_checking_assert (ipa_get_jf_known_type_esc_ref_valid (jfunc));
> +  return jfunc->value.known_type.escape_ref_index;
> +}
> +
>  /* Return the constant stored in a constant jump functin JFUNC.  */
>
>  static inline tree
> @@ -237,6 +296,8 @@ ipa_get_jf_constant (struct ipa_jump_fun
>    return jfunc->value.constant.value;
>  }
>
> +/* Return the reference description stored in constant jump function JFUNC.  */
> +
>  static inline struct ipa_cst_ref_desc *
>  ipa_get_jf_constant_rdesc (struct ipa_jump_func *jfunc)
>  {
> @@ -244,6 +305,27 @@ ipa_get_jf_constant_rdesc (struct ipa_ju
>    return jfunc->value.constant.rdesc;
>  }
>
> +/* Return whether the described known type jump functiion JFUNC has a valid
> +   index into callers escaped_ref flags.  */
> +
> +static inline bool
> +ipa_get_jf_constant_esc_ref_valid (struct ipa_jump_func *jfunc)
> +{
> +  gcc_checking_assert (jfunc->type == IPA_JF_CONST);
> +  return jfunc->value.constant.escape_ref_valid;
> +}
> +
> +/* Return the index into escaped ref flags of the caller that corresponds to
> +   data described by a known type jump function.  */
> +
> +static inline int
> +ipa_get_jf_constant_esc_ref_index (struct ipa_jump_func *jfunc)
> +{
> +  gcc_checking_assert (jfunc->type == IPA_JF_CONST);
> +  gcc_checking_assert (ipa_get_jf_constant_esc_ref_valid (jfunc));
> +  return jfunc->value.constant.escape_ref_index;
> +}
> +
>  /* Return the operand of a pass through jmp function JFUNC.  */
>
>  static inline tree
> @@ -346,13 +428,41 @@ struct ipa_param_descriptor
>       says how many there are.  If any use could not be described by means of
>       ipa-prop structures, this is IPA_UNDESCRIBED_USE.  */
>    int controlled_uses;
> -  unsigned int move_cost : 31;
> +  unsigned int move_cost : 30;
>    /* The parameter is used.  */
>    unsigned used : 1;
>  };
>
>  struct ipcp_lattice;
>
> +/* Interproceduaral information about references that we try to prove have not
> +   escaped, among other properties.  We keep this information for all formal
> +   parameters, even when they are not in fact references, so that indices into
> +   param descriptors match those to reference descriptors, however we also keep
> +   it for some other references that we pass as actual arguments to callees,
> +   their indices must be derived from jump functions.
> +
> +   These flags hold results of intraprocedural summary gathering and
> +   intermediate values during interprocedural propagation, as opposed to
> +   corresponding bitmaps in cgraph_node which hold final results.  After
> +   ipa_spread_escapes finishes, the corresponding bits in both structures are
> +   the same, however ipa_ref_descriptor is freed at the end of the IPA
> +   analysis stage.  */
> +
> +struct ipa_ref_descriptor
> +{
> +  /* Set if the reference could have escaped.  */
> +  unsigned int escaped : 1;
> +  /* Valid only if escaped is false.  Set when the memory the reference refers
> +     to could have been written to in this function or in any of the
> +     callees.  */
> +  unsigned int clobbered : 1;
> +  /* Valid only if escaped is false.  Set when the memory the reference refers
> +     to could have been written to in any of the callees this function has
> +     (i.e. disregarding any modifications in this particular function).  */
> +  unsigned int callee_clobbered : 1;
> +};
> +
>  /* ipa_node_params stores information related to formal parameters of functions
>     and some other information for interprocedural passes that operate on
>     parameters (such as ipa-cp).  */
> @@ -362,6 +472,11 @@ struct ipa_node_params
>    /* Information about individual formal parameters that are gathered when
>       summaries are generated. */
>    vec<ipa_param_descriptor> descriptors;
> +
> +  /* Escape and other information about formal parameters and also some
> +     references passed as actual parameters to callees. */
> +  vec<ipa_ref_descriptor> ref_descs;
> +
>    /* Pointer to an array of structures describing individual formal
>       parameters.  */
>    struct ipcp_param_lattices *lattices;
> @@ -374,8 +489,12 @@ struct ipa_node_params
>    /* Whether the param uses analysis and jump function computation has already
>       been performed.  */
>    unsigned analysis_done : 1;
> -  /* Whether the function is enqueued in ipa-cp propagation stack.  */
> +  /* Whether the function is enqueued in ipa-cp propagation stack or when
> +     propagating escape flags "downwards" (i.e. from callers to callees).  */
>    unsigned node_enqueued : 1;
> +  /* Whether the function is enqueued in a to-do list of "upwards" escape flag
> +     propagation (i.e. from callees to callers).  */
> +  unsigned node_up_enqueued : 1;
>    /* Whether we should create a specialized version based on values that are
>       known to be constant in all contexts.  */
>    unsigned do_clone_for_all_contexts : 1;
> @@ -452,6 +571,45 @@ ipa_is_param_used (struct ipa_node_param
>    return info->descriptors[i].used;
>  }
>
> +/* Return if reference number I (there are more of them than parameters, we
> +   also have this information for some actual arguments passed to callees) of
> +   the function associated with INFO has uncontrollably escaped.  */
> +
> +static inline bool
> +ipa_is_ref_escaped (struct ipa_node_params *info, int i)
> +{
> +  return info->ref_descs[i].escaped;
> +}
> +
> +/* Return if the reference number I tracked in function corresponding to INFO
> +   is clobbered in any way during the run of the function.  */
> +
> +static inline bool
> +ipa_is_ref_clobbered (struct ipa_node_params *info, int i)
> +{
> +  return info->ref_descs[i].clobbered;
> +}
> +
> +/* Return if the reference number I tracked in function corresponding to INFO
> +   is clobbered in any way during the run of the function.  */
> +
> +static inline bool
> +ipa_is_ref_callee_clobbered (struct ipa_node_params *info, int i)
> +{
> +  return info->ref_descs[i].callee_clobbered;
> +}
> +
> +/* Return true iff we know that the the Ith parameter of function described by
> +   INFO does not escape and that it or any pointers derived from it are not
> +   used as a base for a memory write in the node described by INFO and all its
> +   (even indirect) callees.  */
> +
> +static inline bool
> +ipa_is_param_ref_safely_constant (struct ipa_node_params *info, int i)
> +{
> +  return !ipa_is_ref_escaped (info, i) && !ipa_is_ref_clobbered (info, i);
> +}
> +
>  /* Information about replacements done in aggregates for a given node (each
>     node has its linked list).  */
>  struct GTY(()) ipa_agg_replacement_value
> @@ -589,6 +747,7 @@ tree ipa_intraprocedural_devirtualizatio
>
>  /* Functions related to both.  */
>  void ipa_analyze_node (struct cgraph_node *);
> +void ipa_spread_escapes ();
>
>  /* Aggregate jump function related functions.  */
>  tree ipa_find_agg_cst_for_param (struct ipa_agg_jump_function *, HOST_WIDE_INT,
> Index: src/gcc/ipa-cp.c
> ===================================================================
> --- src.orig/gcc/ipa-cp.c
> +++ src/gcc/ipa-cp.c
> @@ -1311,12 +1311,17 @@ merge_aggregate_lattices (struct cgraph_
>     rules about propagating values passed by reference.  */
>
>  static bool
> -agg_pass_through_permissible_p (struct ipcp_param_lattices *src_plats,
> +agg_pass_through_permissible_p (struct ipa_node_params *caller_info,
> +                               struct ipcp_param_lattices *src_plats,
>                                 struct ipa_jump_func *jfunc)
>  {
> -  return src_plats->aggs
> -    && (!src_plats->aggs_by_ref
> -       || ipa_get_jf_pass_through_agg_preserved (jfunc));
> +  if (!src_plats->aggs)
> +    return false;
> +
> +  return !src_plats->aggs_by_ref
> +    || ipa_is_param_ref_safely_constant (caller_info,
> +                                ipa_get_jf_pass_through_formal_id (jfunc))
> +    || ipa_get_jf_pass_through_agg_preserved (jfunc);
>  }
>
>  /* Propagate scalar values across jump function JFUNC that is associated with
> @@ -1340,7 +1345,7 @@ propagate_aggs_accross_jump_function (st
>        struct ipcp_param_lattices *src_plats;
>
>        src_plats = ipa_get_parm_lattices (caller_info, src_idx);
> -      if (agg_pass_through_permissible_p (src_plats, jfunc))
> +      if (agg_pass_through_permissible_p (caller_info, src_plats, jfunc))
>         {
>           /* Currently we do not produce clobber aggregate jump
>              functions, replace with merging when we do.  */
> @@ -1351,15 +1356,16 @@ propagate_aggs_accross_jump_function (st
>        else
>         ret |= set_agg_lats_contain_variable (dest_plats);
>      }
> -  else if (jfunc->type == IPA_JF_ANCESTOR
> -          && ipa_get_jf_ancestor_agg_preserved (jfunc))
> +  else if (jfunc->type == IPA_JF_ANCESTOR)
>      {
>        struct ipa_node_params *caller_info = IPA_NODE_REF (cs->caller);
>        int src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
>        struct ipcp_param_lattices *src_plats;
>
>        src_plats = ipa_get_parm_lattices (caller_info, src_idx);
> -      if (src_plats->aggs && src_plats->aggs_by_ref)
> +      if (src_plats->aggs && src_plats->aggs_by_ref
> +         && (ipa_is_param_ref_safely_constant (caller_info, src_idx)
> +             || ipa_get_jf_ancestor_agg_preserved (jfunc)))
>         {
>           /* Currently we do not produce clobber aggregate jump
>              functions, replace with merging when we do.  */
> @@ -1367,7 +1373,7 @@ propagate_aggs_accross_jump_function (st
>           ret |= merge_aggregate_lattices (cs, dest_plats, src_plats, src_idx,
>                                            ipa_get_jf_ancestor_offset (jfunc));
>         }
> -      else if (!src_plats->aggs_by_ref)
> +      else if (src_plats->aggs && !src_plats->aggs_by_ref)
>         ret |= set_agg_lats_to_bottom (dest_plats);
>        else
>         ret |= set_agg_lats_contain_variable (dest_plats);
> @@ -3037,39 +3043,49 @@ intersect_aggregates_with_edge (struct c
>           struct ipcp_param_lattices *orig_plats;
>           orig_plats = ipa_get_parm_lattices (IPA_NODE_REF (orig_node),
>                                               src_idx);
> -         if (agg_pass_through_permissible_p (orig_plats, jfunc))
> +         if (!agg_pass_through_permissible_p (caller_info, orig_plats, jfunc))
>             {
> -             if (!inter.exists ())
> -               inter = agg_replacements_to_vector (cs->caller, src_idx, 0);
> -             else
> -               intersect_with_agg_replacements (cs->caller, src_idx,
> -                                                &inter, 0);
> +             inter.release ();
> +             return vNULL;
>             }
> +         if (!inter.exists ())
> +           inter = agg_replacements_to_vector (cs->caller, src_idx, 0);
> +         else
> +           intersect_with_agg_replacements (cs->caller, src_idx,
> +                                            &inter, 0);
>         }
>        else
>         {
>           struct ipcp_param_lattices *src_plats;
>           src_plats = ipa_get_parm_lattices (caller_info, src_idx);
> -         if (agg_pass_through_permissible_p (src_plats, jfunc))
> +         if (!agg_pass_through_permissible_p (caller_info, src_plats, jfunc))
>             {
> -             /* Currently we do not produce clobber aggregate jump
> -                functions, adjust when we do.  */
> -             gcc_checking_assert (!jfunc->agg.items);
> -             if (!inter.exists ())
> -               inter = copy_plats_to_inter (src_plats, 0);
> -             else
> -               intersect_with_plats (src_plats, &inter, 0);
> +             inter.release ();
> +             return vNULL;
>             }
> +         /* Currently we do not produce clobber aggregate jump functions,
> +            adjust when we do.  */
> +         gcc_checking_assert (!jfunc->agg.items);
> +         if (!inter.exists ())
> +           inter = copy_plats_to_inter (src_plats, 0);
> +         else
> +           intersect_with_plats (src_plats, &inter, 0);
>         }
>      }
> -  else if (jfunc->type == IPA_JF_ANCESTOR
> -          && ipa_get_jf_ancestor_agg_preserved (jfunc))
> +  else if (jfunc->type == IPA_JF_ANCESTOR)
>      {
>        struct ipa_node_params *caller_info = IPA_NODE_REF (cs->caller);
>        int src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
>        struct ipcp_param_lattices *src_plats;
>        HOST_WIDE_INT delta = ipa_get_jf_ancestor_offset (jfunc);
>
> +      if (!ipa_is_param_ref_safely_constant (caller_info, src_idx)
> +         && !ipa_get_jf_ancestor_agg_preserved (jfunc))
> +       {
> +         inter.release ();
> +         return vNULL;
> +       }
> +
>        if (caller_info->ipcp_orig_node)
>         {
>           if (!inter.exists ())
> @@ -3115,9 +3131,8 @@ intersect_aggregates_with_edge (struct c
>                   break;
>                 if (ti->offset == item->offset)
>                   {
> -                   gcc_checking_assert (ti->value);
> -                   if (values_equal_for_ipcp_p (item->value,
> -                                                ti->value))
> +                   if (ti->value
> +                       && values_equal_for_ipcp_p (item->value, ti->value))
>                       found = true;
>                     break;
>                   }
> @@ -3686,6 +3701,9 @@ ipcp_driver (void)
>
>    ipa_check_create_node_params ();
>    ipa_check_create_edge_args ();
> +
> +  ipa_spread_escapes ();
> +
>    grow_edge_clone_vectors ();
>    edge_duplication_hook_holder =
>      cgraph_add_edge_duplication_hook (&ipcp_edge_duplication_hook, NULL);
> Index: src/gcc/ipa-inline.c
> ===================================================================
> --- src.orig/gcc/ipa-inline.c
> +++ src/gcc/ipa-inline.c
> @@ -2143,6 +2143,9 @@ ipa_inline (void)
>    if (!optimize)
>      return 0;
>
> +  if (!flag_ipa_cp)
> +    ipa_spread_escapes ();
> +
>    order = XCNEWVEC (struct cgraph_node *, cgraph_n_nodes);
>
>    if (in_lto_p && optimize)
> Index: src/gcc/testsuite/gcc.dg/ipa/ipcp-agg-10.c
> ===================================================================
> --- /dev/null
> +++ src/gcc/testsuite/gcc.dg/ipa/ipcp-agg-10.c
> @@ -0,0 +1,35 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fno-ipa-sra -fdump-ipa-cp-details"  } */
> +/* { dg-add-options bind_pic_locally } */
> +
> +volatile int g1, g2;
> +
> +static void __attribute__ ((noinline))
> +bar (int *i)
> +{
> +  g1 = *i;
> +}
> +
> +static void __attribute__ ((noinline))
> +foo (int *i)
> +{
> +  bar (i);
> +  bar (i);
> +
> +  g2 = *i;
> +}
> +
> +int
> +main (int argc, char **argv)
> +{
> +  int i = 8;
> +
> +  foo (&i);
> +
> +  return 0;
> +}
> +
> +/* { dg-final { scan-ipa-dump "Creating a specialized node of foo.*for all known contexts" "cp" } } */
> +/* { dg-final { scan-ipa-dump "Creating a specialized node of bar.*for all known contexts" "cp" } } */
> +/* { dg-final { scan-ipa-dump-times "= 8" 6 "cp" } } */
> +/* { dg-final { cleanup-ipa-dump "cp" } } */
> Index: src/gcc/cgraph.c
> ===================================================================
> --- src.orig/gcc/cgraph.c
> +++ src/gcc/cgraph.c
> @@ -3174,4 +3174,47 @@ gimple_check_call_matching_types (gimple
>    return true;
>  }
>
> +/* Return true if parameter number I of NODE is marked as known not to
> +   escape.  */
> +
> +bool
> +cgraph_param_noescape_p (cgraph_node *node, int i)
> +{
> +  return node->global.noescape_parameters
> +    && bitmap_bit_p (node->global.noescape_parameters, i);
> +}
> +
> +/* Mark parameter of NODE numbeR I as known not to escape.  */
> +
> +void
> +cgraph_set_param_noescape (cgraph_node *node, int i)
> +{
> +  if (!node->global.noescape_parameters)
> +    node->global.noescape_parameters = BITMAP_GGC_ALLOC ();
> +  bitmap_set_bit (node->global.noescape_parameters, i);
> +}
> +
> +/* Return true if memory accessible through parameter number I of NODE is
> +   marked as known not be clobbered.  */
> +
> +bool
> +cgraph_param_noclobber_p (cgraph_node *node, int i)
> +{
> +  return node->global.noclobber_parameters
> +    && bitmap_bit_p (node->global.noclobber_parameters, i);
> +}
> +
> +/* Mark memory reachable by parameter number I of NODE as known not to be
> +   clobbered.  */
> +
> +void
> +cgraph_set_param_noclobber (cgraph_node *node, int i)
> +{
> +  if (!node->global.noclobber_parameters)
> +    node->global.noclobber_parameters = BITMAP_GGC_ALLOC ();
> +  bitmap_set_bit (node->global.noclobber_parameters, i);
> +}
> +
> +
> +
>  #include "gt-cgraph.h"
> Index: src/gcc/cgraph.h
> ===================================================================
> --- src.orig/gcc/cgraph.h
> +++ src/gcc/cgraph.h
> @@ -227,6 +227,13 @@ struct GTY(()) cgraph_global_info {
>    /* For inline clones this points to the function they will be
>       inlined into.  */
>    struct cgraph_node *inlined_to;
> +
> +  /* Parameters that are known not to escape from this function.  */
> +  bitmap noescape_parameters;
> +
> +  /* Parameters for which the memory reached by the them is known not to be
> +     clobbered.  */
> +  bitmap noclobber_parameters;
>  };
>
>  /* Information about the function that is propagated by the RTL backend.
> @@ -870,6 +877,11 @@ void cgraph_speculative_call_info (struc
>                                    struct ipa_ref *&);
>  extern bool gimple_check_call_matching_types (gimple, tree, bool);
>
> +extern bool cgraph_param_noescape_p (cgraph_node *node, int i);
> +extern void cgraph_set_param_noescape (cgraph_node *node, int i);
> +extern bool cgraph_param_noclobber_p (cgraph_node *node, int i);
> +extern void cgraph_set_param_noclobber (cgraph_node *node, int i);
> +
>  /* In cgraphunit.c  */
>  struct asm_node *add_asm_node (tree);
>  extern FILE *cgraph_dump_file;
> Index: src/gcc/cgraphclones.c
> ===================================================================
> --- src.orig/gcc/cgraphclones.c
> +++ src/gcc/cgraphclones.c
> @@ -338,6 +338,8 @@ duplicate_thunk_for_node (cgraph_node *t
>    gcc_checking_assert (!DECL_INITIAL (new_decl));
>    gcc_checking_assert (!DECL_RESULT (new_decl));
>    gcc_checking_assert (!DECL_RTL_SET_P (new_decl));
> +  gcc_checking_assert (!thunk->global.noescape_parameters
> +                      && !thunk->global.noclobber_parameters);
>
>    DECL_NAME (new_decl) = clone_function_name (thunk->decl, "artificial_thunk");
>    SET_DECL_ASSEMBLER_NAME (new_decl, DECL_NAME (new_decl));
> @@ -375,6 +377,26 @@ redirect_edge_duplicating_thunks (struct
>    cgraph_redirect_edge_callee (e, n);
>  }
>
> +/* Copy global.noescape_parameters and global.noclobber_parameters of SRC to
> +   DEST.  */
> +
> +static void
> +copy_noescape_noclobber_bitmaps (cgraph_node *dst, cgraph_node *src)
> +{
> +  if (src->global.noescape_parameters)
> +    {
> +      dst->global.noescape_parameters = BITMAP_GGC_ALLOC ();
> +      bitmap_copy (dst->global.noescape_parameters,
> +                  src->global.noescape_parameters);
> +    }
> +  if (src->global.noclobber_parameters)
> +    {
> +      dst->global.noclobber_parameters = BITMAP_GGC_ALLOC ();
> +      bitmap_copy (dst->global.noclobber_parameters,
> +                  src->global.noclobber_parameters);
> +    }
> +}
> +
>  /* Create node representing clone of N executed COUNT times.  Decrease
>     the execution counts from original node too.
>     The new clone will have decl set to DECL that may or may not be the same
> @@ -418,8 +440,8 @@ cgraph_clone_node (struct cgraph_node *n
>    new_node->local = n->local;
>    new_node->externally_visible = false;
>    new_node->local.local = true;
> -  new_node->global = n->global;
>    new_node->global.inlined_to = new_inlined_to;
> +  copy_noescape_noclobber_bitmaps (new_node, n);
>    new_node->rtl = n->rtl;
>    new_node->count = count;
>    new_node->frequency = n->frequency;
> @@ -883,7 +905,8 @@ cgraph_copy_node_for_versioning (struct
>     new_version->local = old_version->local;
>     new_version->externally_visible = false;
>     new_version->local.local = new_version->definition;
> -   new_version->global = old_version->global;
> +   new_version->global.inlined_to = old_version->global.inlined_to;
> +   copy_noescape_noclobber_bitmaps (new_version, old_version);
>     new_version->rtl = old_version->rtl;
>     new_version->count = old_version->count;
>
> Index: src/gcc/lto-cgraph.c
> ===================================================================
> --- src.orig/gcc/lto-cgraph.c
> +++ src/gcc/lto-cgraph.c
> @@ -1626,6 +1626,24 @@ output_edge_opt_summary (struct output_b
>  {
>  }
>
> +/* Output a bitmap BMP.  Aimed primarily at bitmaps describing parameters in
> +   cgraph_node.  */
> +
> +static void
> +output_param_bitmap (struct output_block *ob, bitmap bmp)
> +{
> +  if (bmp)
> +    {
> +      unsigned int index;
> +      bitmap_iterator bi;
> +      streamer_write_uhwi (ob, bitmap_count_bits (bmp));
> +      EXECUTE_IF_SET_IN_BITMAP (bmp, 0, index, bi)
> +       streamer_write_uhwi (ob, index);
> +    }
> +  else
> +    streamer_write_uhwi (ob, 0);
> +}
> +
>  /* Output optimization summary for NODE to OB.  */
>
>  static void
> @@ -1633,29 +1651,15 @@ output_node_opt_summary (struct output_b
>                          struct cgraph_node *node,
>                          lto_symtab_encoder_t encoder)
>  {
> -  unsigned int index;
> -  bitmap_iterator bi;
>    struct ipa_replace_map *map;
>    struct bitpack_d bp;
>    int i;
>    struct cgraph_edge *e;
>
> -  if (node->clone.args_to_skip)
> -    {
> -      streamer_write_uhwi (ob, bitmap_count_bits (node->clone.args_to_skip));
> -      EXECUTE_IF_SET_IN_BITMAP (node->clone.args_to_skip, 0, index, bi)
> -       streamer_write_uhwi (ob, index);
> -    }
> -  else
> -    streamer_write_uhwi (ob, 0);
> -  if (node->clone.combined_args_to_skip)
> -    {
> -      streamer_write_uhwi (ob, bitmap_count_bits (node->clone.combined_args_to_skip));
> -      EXECUTE_IF_SET_IN_BITMAP (node->clone.combined_args_to_skip, 0, index, bi)
> -       streamer_write_uhwi (ob, index);
> -    }
> -  else
> -    streamer_write_uhwi (ob, 0);
> +  output_param_bitmap (ob, node->clone.args_to_skip);
> +  output_param_bitmap (ob, node->clone.combined_args_to_skip);
> +  output_param_bitmap (ob, node->global.noescape_parameters);
> +  output_param_bitmap (ob, node->global.noclobber_parameters);
>    streamer_write_uhwi (ob, vec_safe_length (node->clone.tree_map));
>    FOR_EACH_VEC_SAFE_ELT (node->clone.tree_map, i, map)
>      {
> @@ -1724,6 +1728,25 @@ input_edge_opt_summary (struct cgraph_ed
>  {
>  }
>
> +/* Input and return a bitmap that was output by output_param_bitmap. */
> +
> +static bitmap
> +input_param_bitmap (struct lto_input_block *ib_main)
> +{
> +  int count;
> +
> +  count = streamer_read_uhwi (ib_main);
> +  if (!count)
> +    return NULL;
> +  bitmap res = BITMAP_GGC_ALLOC ();
> +  for (int i = 0; i < count; i++)
> +    {
> +      int bit = streamer_read_uhwi (ib_main);
> +      bitmap_set_bit (res, bit);
> +    }
> +  return res;
> +}
> +
>  /* Input optimisation summary of NODE.  */
>
>  static void
> @@ -1731,28 +1754,14 @@ input_node_opt_summary (struct cgraph_no
>                         struct lto_input_block *ib_main,
>                         struct data_in *data_in)
>  {
> -  int i;
>    int count;
> -  int bit;
> +  int i;
>    struct bitpack_d bp;
>    struct cgraph_edge *e;
> -
> -  count = streamer_read_uhwi (ib_main);
> -  if (count)
> -    node->clone.args_to_skip = BITMAP_GGC_ALLOC ();
> -  for (i = 0; i < count; i++)
> -    {
> -      bit = streamer_read_uhwi (ib_main);
> -      bitmap_set_bit (node->clone.args_to_skip, bit);
> -    }
> -  count = streamer_read_uhwi (ib_main);
> -  if (count)
> -    node->clone.combined_args_to_skip = BITMAP_GGC_ALLOC ();
> -  for (i = 0; i < count; i++)
> -    {
> -      bit = streamer_read_uhwi (ib_main);
> -      bitmap_set_bit (node->clone.combined_args_to_skip, bit);
> -    }
> +  node->clone.args_to_skip = input_param_bitmap (ib_main);
> +  node->clone.combined_args_to_skip = input_param_bitmap (ib_main);
> +  node->global.noescape_parameters = input_param_bitmap (ib_main);
> +  node->global.noclobber_parameters = input_param_bitmap (ib_main);
>    count = streamer_read_uhwi (ib_main);
>    for (i = 0; i < count; i++)
>      {
> Index: src/gcc/tree-inline.c
> ===================================================================
> --- src.orig/gcc/tree-inline.c
> +++ src/gcc/tree-inline.c
> @@ -5248,6 +5248,55 @@ update_clone_info (copy_body_data * id)
>      }
>  }
>
> +/* Update global.noescape_parameters and global.noclobber_parameters of NODE to
> +   reflect parameters about to be skipped as indicated in ARGS_TO_SKIP.
> +   ORIG_PARM is the chain of parameters of the original node.  */
> +
> +void
> +update_noescape_noclobber_bitmaps (cgraph_node *node, tree orig_parm,
> +                                  bitmap args_to_skip)
> +{
> +  if (!args_to_skip || bitmap_empty_p (args_to_skip)
> +      || !orig_parm
> +      || (!node->global.noescape_parameters
> +         && !node->global.noclobber_parameters))
> +    return;
> +
> +  int count = 0;
> +  while (orig_parm)
> +    {
> +      count++;
> +      orig_parm = DECL_CHAIN (orig_parm);
> +    }
> +
> +  bitmap new_noescape = NULL;
> +  bitmap new_noclobber = NULL;
> +
> +  int ni = 0;
> +  for (int i = 0; i < count; i++)
> +    if (!bitmap_bit_p (args_to_skip, i))
> +      {
> +       if (node->global.noescape_parameters
> +           && bitmap_bit_p (node->global.noescape_parameters, i))
> +         {
> +           if (!new_noescape)
> +             new_noescape = BITMAP_GGC_ALLOC ();
> +           bitmap_set_bit (new_noescape, ni);
> +         }
> +       if (node->global.noclobber_parameters
> +           && bitmap_bit_p (node->global.noclobber_parameters, i))
> +         {
> +           if (!new_noclobber)
> +             new_noclobber = BITMAP_GGC_ALLOC ();
> +           bitmap_set_bit (new_noclobber, ni);
> +         }
> +       ni++;
> +      }
> +  node->global.noescape_parameters = new_noescape;
> +  node->global.noclobber_parameters = new_noclobber;
> +}
> +
> +
>  /* Create a copy of a function's tree.
>     OLD_DECL and NEW_DECL are FUNCTION_DECL tree nodes
>     of the original function and the new copied function
> @@ -5405,6 +5454,8 @@ tree_function_versioning (tree old_decl,
>               }
>           }
>        }
> +  update_noescape_noclobber_bitmaps (new_version_node,
> +                                    DECL_ARGUMENTS (old_decl), args_to_skip);
>    /* Copy the function's arguments.  */
>    if (DECL_ARGUMENTS (old_decl) != NULL_TREE)
>      DECL_ARGUMENTS (new_decl) =
>

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 1/7] Add missing documentation of four IPA-CP params
  2014-05-21 13:31 ` [PATCH 1/7] Add missing documentation of four IPA-CP params Martin Jambor
@ 2014-05-21 15:58   ` Jeff Law
  2014-06-10 12:13   ` Gerald Pfeifer
  1 sibling, 0 replies; 29+ messages in thread
From: Jeff Law @ 2014-05-21 15:58 UTC (permalink / raw)
  To: Martin Jambor, GCC Patches; +Cc: Jan Hubicka

On 05/21/14 07:16, Martin Jambor wrote:
> 2014-05-16  Martin Jambor<mjambor@suse.cz>
>
> 	* doc/invoke.texi (Optimize Options): Document parameters
> 	ipa-cp-eval-threshold, ipa-max-agg-items, ipa-cp-loop-hint-bonus and
> 	ipa-cp-array-index-hint-bonus.
OK.
jeff

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 7/7] Plug ipa-prop escape analysis into gimple_call_arg_flags
  2014-05-21 14:27   ` Richard Biener
@ 2014-05-22 12:49     ` Martin Jambor
  2014-05-22 13:34       ` Richard Biener
  0 siblings, 1 reply; 29+ messages in thread
From: Martin Jambor @ 2014-05-22 12:49 UTC (permalink / raw)
  To: Richard Biener; +Cc: GCC Patches, Jan Hubicka

Hi,

On Wed, May 21, 2014 at 04:27:32PM +0200, Richard Biener wrote:
> On Wed, May 21, 2014 at 3:16 PM, Martin Jambor <mjambor@suse.cz> wrote:
> > Hi,
> >
> > this demonstrates how results of ipa-prop escape analysis from
> > previous patches can be used at a later stage of compilation by
> > directly returning them from gimple_call_arg_flags which currently
> > relies on fnspec annotations.
> >
> > Bootstrapped and tested on x86_64-linux and also passes LTO bootstrap.
> > I have only had a brief look at behavior of this in SPEC 2006 and for
> > example in astar 1.19% of invocations of gimple_call_arg_flags return
> > noescape where we previously never did and in calculix this increases
> > from 15.62% (from annotations) to 18.14%.  Noclobber flag is reported
> > far less often still but for example in gamess that number raises from
> > 5.21% to 7.66%.
> >
> > Thanks,
> >
> > Martin
> >
> >
> > 2014-04-30  Martin Jambor  <mjambor@suse.cz>
> >
> >         * gimple.c: Include cgraph.h.
> >         (gimple_call_arg_flags): Also query bitmaps in cgraph_node.
> >
> > Index: src/gcc/gimple.c
> > ===================================================================
> > --- src.orig/gcc/gimple.c
> > +++ src/gcc/gimple.c
> > @@ -47,7 +47,7 @@ along with GCC; see the file COPYING3.
> >  #include "demangle.h"
> >  #include "langhooks.h"
> >  #include "bitmap.h"
> > -
> > +#include "cgraph.h"
> >
> >  /* All the tuples have their operand vector (if present) at the very bottom
> >     of the structure.  Therefore, the offset required to find the
> > @@ -1349,32 +1349,50 @@ int
> >  gimple_call_arg_flags (const_gimple stmt, unsigned arg)
> >  {
> >    tree attr = gimple_call_fnspec (stmt);
> > +  int ret;
> >
> > -  if (!attr || 1 + arg >= (unsigned) TREE_STRING_LENGTH (attr))
> > -    return 0;
> > -
> > -  switch (TREE_STRING_POINTER (attr)[1 + arg])
> > +  if (attr && 1 + arg < (unsigned) TREE_STRING_LENGTH (attr))
> >      {
> > -    case 'x':
> > -    case 'X':
> > -      return EAF_UNUSED;
> > -
> > -    case 'R':
> > -      return EAF_DIRECT | EAF_NOCLOBBER | EAF_NOESCAPE;
> > -
> > -    case 'r':
> > -      return EAF_NOCLOBBER | EAF_NOESCAPE;
> > -
> > -    case 'W':
> > -      return EAF_DIRECT | EAF_NOESCAPE;
> > -
> > -    case 'w':
> > -      return EAF_NOESCAPE;
> > +      switch (TREE_STRING_POINTER (attr)[1 + arg])
> > +       {
> > +       case 'x':
> > +       case 'X':
> > +         ret = EAF_UNUSED;
> > +         break;
> > +       case 'R':
> > +         ret = EAF_DIRECT | EAF_NOCLOBBER | EAF_NOESCAPE;
> > +         break;
> > +       case 'r':
> > +         ret = EAF_NOCLOBBER | EAF_NOESCAPE;
> > +         break;
> > +       case 'W':
> > +         ret = EAF_DIRECT | EAF_NOESCAPE;
> > +         break;
> > +       case 'w':
> > +         ret = EAF_NOESCAPE;
> > +         break;
> > +       case '.':
> > +       default:
> > +         ret = 0;
> > +       }
> > +    }
> > +  else
> > +    ret = 0;
> >
> > -    case '.':
> > -    default:
> > -      return 0;
> > +  tree callee_decl = gimple_call_fndecl (stmt);
> > +  if (callee_decl)
> > +    {
> > +      cgraph_node *callee_node = cgraph_get_node (callee_decl);
> > +      if (callee_node)
> > +       {
> > +         if (cgraph_param_noescape_p (callee_node, arg))
> > +           ret |= EAF_NOESCAPE;
> > +         if (cgraph_param_noclobber_p (callee_node, arg))
> > +           ret |= EAF_NOCLOBBER;
> 
> That's quite expensive.  I guess we need a better way to store
> those?

if we want to avoid the cgraph_node lookup, then I think we need to
store this information in the decl or struct function.  That is
certainly possible and might even be more appropriate.

Thanks,

Martin

> 
> > +       }
> >      }
> > +
> > +  return ret;
> >  }
> >
> >  /* Detects return flags for the call STMT.  */
> >

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 7/7] Plug ipa-prop escape analysis into gimple_call_arg_flags
  2014-05-22 12:49     ` Martin Jambor
@ 2014-05-22 13:34       ` Richard Biener
  2014-05-22 15:24         ` Jan Hubicka
  2014-05-26  1:01         ` Jan Hubicka
  0 siblings, 2 replies; 29+ messages in thread
From: Richard Biener @ 2014-05-22 13:34 UTC (permalink / raw)
  To: Richard Biener, GCC Patches, Jan Hubicka

On Thu, May 22, 2014 at 2:49 PM, Martin Jambor <mjambor@suse.cz> wrote:
> Hi,
>
> On Wed, May 21, 2014 at 04:27:32PM +0200, Richard Biener wrote:
>> On Wed, May 21, 2014 at 3:16 PM, Martin Jambor <mjambor@suse.cz> wrote:
>> > Hi,
>> >
>> > this demonstrates how results of ipa-prop escape analysis from
>> > previous patches can be used at a later stage of compilation by
>> > directly returning them from gimple_call_arg_flags which currently
>> > relies on fnspec annotations.
>> >
>> > Bootstrapped and tested on x86_64-linux and also passes LTO bootstrap.
>> > I have only had a brief look at behavior of this in SPEC 2006 and for
>> > example in astar 1.19% of invocations of gimple_call_arg_flags return
>> > noescape where we previously never did and in calculix this increases
>> > from 15.62% (from annotations) to 18.14%.  Noclobber flag is reported
>> > far less often still but for example in gamess that number raises from
>> > 5.21% to 7.66%.
>> >
>> > Thanks,
>> >
>> > Martin
>> >
>> >
>> > 2014-04-30  Martin Jambor  <mjambor@suse.cz>
>> >
>> >         * gimple.c: Include cgraph.h.
>> >         (gimple_call_arg_flags): Also query bitmaps in cgraph_node.
>> >
>> > Index: src/gcc/gimple.c
>> > ===================================================================
>> > --- src.orig/gcc/gimple.c
>> > +++ src/gcc/gimple.c
>> > @@ -47,7 +47,7 @@ along with GCC; see the file COPYING3.
>> >  #include "demangle.h"
>> >  #include "langhooks.h"
>> >  #include "bitmap.h"
>> > -
>> > +#include "cgraph.h"
>> >
>> >  /* All the tuples have their operand vector (if present) at the very bottom
>> >     of the structure.  Therefore, the offset required to find the
>> > @@ -1349,32 +1349,50 @@ int
>> >  gimple_call_arg_flags (const_gimple stmt, unsigned arg)
>> >  {
>> >    tree attr = gimple_call_fnspec (stmt);
>> > +  int ret;
>> >
>> > -  if (!attr || 1 + arg >= (unsigned) TREE_STRING_LENGTH (attr))
>> > -    return 0;
>> > -
>> > -  switch (TREE_STRING_POINTER (attr)[1 + arg])
>> > +  if (attr && 1 + arg < (unsigned) TREE_STRING_LENGTH (attr))
>> >      {
>> > -    case 'x':
>> > -    case 'X':
>> > -      return EAF_UNUSED;
>> > -
>> > -    case 'R':
>> > -      return EAF_DIRECT | EAF_NOCLOBBER | EAF_NOESCAPE;
>> > -
>> > -    case 'r':
>> > -      return EAF_NOCLOBBER | EAF_NOESCAPE;
>> > -
>> > -    case 'W':
>> > -      return EAF_DIRECT | EAF_NOESCAPE;
>> > -
>> > -    case 'w':
>> > -      return EAF_NOESCAPE;
>> > +      switch (TREE_STRING_POINTER (attr)[1 + arg])
>> > +       {
>> > +       case 'x':
>> > +       case 'X':
>> > +         ret = EAF_UNUSED;
>> > +         break;
>> > +       case 'R':
>> > +         ret = EAF_DIRECT | EAF_NOCLOBBER | EAF_NOESCAPE;
>> > +         break;
>> > +       case 'r':
>> > +         ret = EAF_NOCLOBBER | EAF_NOESCAPE;
>> > +         break;
>> > +       case 'W':
>> > +         ret = EAF_DIRECT | EAF_NOESCAPE;
>> > +         break;
>> > +       case 'w':
>> > +         ret = EAF_NOESCAPE;
>> > +         break;
>> > +       case '.':
>> > +       default:
>> > +         ret = 0;
>> > +       }
>> > +    }
>> > +  else
>> > +    ret = 0;
>> >
>> > -    case '.':
>> > -    default:
>> > -      return 0;
>> > +  tree callee_decl = gimple_call_fndecl (stmt);
>> > +  if (callee_decl)
>> > +    {
>> > +      cgraph_node *callee_node = cgraph_get_node (callee_decl);
>> > +      if (callee_node)
>> > +       {
>> > +         if (cgraph_param_noescape_p (callee_node, arg))
>> > +           ret |= EAF_NOESCAPE;
>> > +         if (cgraph_param_noclobber_p (callee_node, arg))
>> > +           ret |= EAF_NOCLOBBER;
>>
>> That's quite expensive.  I guess we need a better way to store
>> those?
>
> if we want to avoid the cgraph_node lookup, then I think we need to
> store this information in the decl or struct function.  That is
> certainly possible and might even be more appropriate.

Can we?  If the body is not readily available we only have decl and
cgraph-node, not struct function.

I suppose we could exchange the struct function pointer in
tree_function_decl for a cgraph_node pointer and put
the struct function pointer into the cgraph_node.

Of course that may have impacts on FEs who might create
struct function before creating a cgraph node.  But at least
it would avoid enlarging FUNCTION_DECL.

In the end most of the tree_decl_with_vis stuff should move over to symtab
and var-decls should get a varpool_node pointer as well.

Back to the call flags stuff - I also meant the representation of the
"fn spec" attribute.  Rather than parsing that again and again move
it to a better place (which you seem to invent?) and better unified
representation.

Can you try if removing the cgraph hash is possible with the
struct function pointer idea?

Thanks,
Richard.

> Thanks,
>
> Martin
>
>>
>> > +       }
>> >      }
>> > +
>> > +  return ret;
>> >  }
>> >
>> >  /* Detects return flags for the call STMT.  */
>> >

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 7/7] Plug ipa-prop escape analysis into gimple_call_arg_flags
  2014-05-22 13:34       ` Richard Biener
@ 2014-05-22 15:24         ` Jan Hubicka
  2014-05-22 15:36           ` Richard Biener
  2014-05-26  1:01         ` Jan Hubicka
  1 sibling, 1 reply; 29+ messages in thread
From: Jan Hubicka @ 2014-05-22 15:24 UTC (permalink / raw)
  To: Richard Biener; +Cc: GCC Patches, Jan Hubicka

> 
> Can we?  If the body is not readily available we only have decl and
> cgraph-node, not struct function.
> 
> I suppose we could exchange the struct function pointer in
> tree_function_decl for a cgraph_node pointer and put
> the struct function pointer into the cgraph_node.
> 
> Of course that may have impacts on FEs who might create
> struct function before creating a cgraph node.  But at least
> it would avoid enlarging FUNCTION_DECL.
> 
> In the end most of the tree_decl_with_vis stuff should move over to symtab
> and var-decls should get a varpool_node pointer as well.
> 
> Back to the call flags stuff - I also meant the representation of the
> "fn spec" attribute.  Rather than parsing that again and again move
> it to a better place (which you seem to invent?) and better unified
> representation.
> 
> Can you try if removing the cgraph hash is possible with the
> struct function pointer idea?

It won't be so easy, because struct function is really built at relatively convoluted
places within frontend before cgraph node is assigned to them (I tried that few years
back).
I think we may be on better track moving DECL_ASSEMBLER_NAME that is calculated later,
but then we have problem with DECL_ASSEMBLER_NAME being set for assembler names and
const decls, too that still go around symtab.
Given that decl_assembler_name is a function, I suppose we could go with extra conditoinal
in there.

Getting struct function out of frontend busyness would be nice indeed, too, but probably
should be independent of Martin's work here.

Honza
> 
> Thanks,
> Richard.
> 
> > Thanks,
> >
> > Martin
> >
> >>
> >> > +       }
> >> >      }
> >> > +
> >> > +  return ret;
> >> >  }
> >> >
> >> >  /* Detects return flags for the call STMT.  */
> >> >

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 7/7] Plug ipa-prop escape analysis into gimple_call_arg_flags
  2014-05-22 15:24         ` Jan Hubicka
@ 2014-05-22 15:36           ` Richard Biener
  2014-05-22 18:11             ` Jan Hubicka
  0 siblings, 1 reply; 29+ messages in thread
From: Richard Biener @ 2014-05-22 15:36 UTC (permalink / raw)
  To: Jan Hubicka; +Cc: GCC Patches

On May 22, 2014 5:24:33 PM CEST, Jan Hubicka <hubicka@ucw.cz> wrote:
>> 
>> Can we?  If the body is not readily available we only have decl and
>> cgraph-node, not struct function.
>> 
>> I suppose we could exchange the struct function pointer in
>> tree_function_decl for a cgraph_node pointer and put
>> the struct function pointer into the cgraph_node.
>> 
>> Of course that may have impacts on FEs who might create
>> struct function before creating a cgraph node.  But at least
>> it would avoid enlarging FUNCTION_DECL.
>> 
>> In the end most of the tree_decl_with_vis stuff should move over to
>symtab
>> and var-decls should get a varpool_node pointer as well.
>> 
>> Back to the call flags stuff - I also meant the representation of the
>> "fn spec" attribute.  Rather than parsing that again and again move
>> it to a better place (which you seem to invent?) and better unified
>> representation.
>> 
>> Can you try if removing the cgraph hash is possible with the
>> struct function pointer idea?
>
>It won't be so easy, because struct function is really built at
>relatively convoluted
>places within frontend before cgraph node is assigned to them (I tried
>that few years
>back).

Well, just call cgraph create node from struct Funktion allocation.

Richard.

>I think we may be on better track moving DECL_ASSEMBLER_NAME that is
>calculated later,
>but then we have problem with DECL_ASSEMBLER_NAME being set for
>assembler names and
>const decls, too that still go around symtab.
>Given that decl_assembler_name is a function, I suppose we could go
>with extra conditoinal
>in there.
>
>Getting struct function out of frontend busyness would be nice indeed,
>too, but probably
>should be independent of Martin's work here.
>
>Honza
>> 
>> Thanks,
>> Richard.
>> 
>> > Thanks,
>> >
>> > Martin
>> >
>> >>
>> >> > +       }
>> >> >      }
>> >> > +
>> >> > +  return ret;
>> >> >  }
>> >> >
>> >> >  /* Detects return flags for the call STMT.  */
>> >> >


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 7/7] Plug ipa-prop escape analysis into gimple_call_arg_flags
  2014-05-22 15:36           ` Richard Biener
@ 2014-05-22 18:11             ` Jan Hubicka
  2014-05-23 10:03               ` Richard Biener
  0 siblings, 1 reply; 29+ messages in thread
From: Jan Hubicka @ 2014-05-22 18:11 UTC (permalink / raw)
  To: Richard Biener; +Cc: Jan Hubicka, GCC Patches

> >It won't be so easy, because struct function is really built at
> >relatively convoluted
> >places within frontend before cgraph node is assigned to them (I tried
> >that few years
> >back).
> 
> Well, just call cgraph create node from struct Funktion allocation.

That will make uninstantiated templates to land symbol table (and if you have
aliases, also do the assembler name mangling) that is not that cool either :(

Honza
> 
> Richard.
> 
> >I think we may be on better track moving DECL_ASSEMBLER_NAME that is
> >calculated later,
> >but then we have problem with DECL_ASSEMBLER_NAME being set for
> >assembler names and
> >const decls, too that still go around symtab.
> >Given that decl_assembler_name is a function, I suppose we could go
> >with extra conditoinal
> >in there.
> >
> >Getting struct function out of frontend busyness would be nice indeed,
> >too, but probably
> >should be independent of Martin's work here.
> >
> >Honza
> >> 
> >> Thanks,
> >> Richard.
> >> 
> >> > Thanks,
> >> >
> >> > Martin
> >> >
> >> >>
> >> >> > +       }
> >> >> >      }
> >> >> > +
> >> >> > +  return ret;
> >> >> >  }
> >> >> >
> >> >> >  /* Detects return flags for the call STMT.  */
> >> >> >
> 

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 7/7] Plug ipa-prop escape analysis into gimple_call_arg_flags
  2014-05-22 18:11             ` Jan Hubicka
@ 2014-05-23 10:03               ` Richard Biener
  2014-05-23 22:29                 ` Jan Hubicka
  2014-05-24  7:39                 ` Jan Hubicka
  0 siblings, 2 replies; 29+ messages in thread
From: Richard Biener @ 2014-05-23 10:03 UTC (permalink / raw)
  To: Jan Hubicka; +Cc: GCC Patches

On Thu, May 22, 2014 at 8:11 PM, Jan Hubicka <hubicka@ucw.cz> wrote:
>> >It won't be so easy, because struct function is really built at
>> >relatively convoluted
>> >places within frontend before cgraph node is assigned to them (I tried
>> >that few years
>> >back).
>>
>> Well, just call cgraph create node from struct Funktion allocation.
>
> That will make uninstantiated templates to land symbol table (and if you have
> aliases, also do the assembler name mangling) that is not that cool either :(

Well, allocate_struct_function has a abstract_p argument for that.  But
yes, a simple patch like

Index: gcc/function.c
===================================================================
--- gcc/function.c      (revision 210845)
+++ gcc/function.c      (working copy)
@@ -64,6 +64,7 @@ along with GCC; see the file COPYING3.
 #include "params.h"
 #include "bb-reorder.h"
 #include "shrink-wrap.h"
+#include "cgraph.h"

 /* So we can assign to cfun in this file.  */
 #undef cfun
@@ -4512,6 +4513,8 @@ allocate_struct_function (tree fndecl, b

   if (fndecl != NULL_TREE)
     {
+      if (!abstract_p)
+       cgraph_get_create_node (fndecl);
       DECL_STRUCT_FUNCTION (fndecl) = cfun;
       cfun->decl = fndecl;
       current_function_funcdef_no = get_next_funcdef_no ();

ICEs during bootstrap with (at least)

/space/rguenther/src/svn/trunk/libgcc/config/i386/cpuinfo.c:405:1:
error: node differs from symtab decl hashtable
 }
 ^
__get_cpuid_max.constprop.0/42 (__get_cpuid_max.constprop) @0x7ff486232290
  Type: function definition analyzed
  Visibility: artificial
  previous sharing asm name: 43
  References:
  Referring:
  Function __get_cpuid_max.constprop/42 is inline copy in __get_cpuid_output/40
  Availability: local
  First run: 0
  Function flags: local only_called_at_startup
  Called by: __get_cpuid_output/40 (1.00 per call) (inlined)
  Calls:
/space/rguenther/src/svn/trunk/libgcc/config/i386/cpuinfo.c:405:1:
internal compiler error: verify_cgraph_node failed

so I guess we would need to have a way to create a "dummy" cgraph
node first and later populate it properly.

But as we currently have a back-pointer from struct function to fndecl
it would be nice to hook the cgraph node in there - that way we get
away without any extra pointer (we could even save symtab decl
pointer and create a cyclic fndecl -> cgraph -> function -> fndecl
chain ...).

I'm fine with enlarging tree_function_decl for now - ideally we'd push
stuff from it elsewhere (like target and optimization option tree nodes,
or most of the visibility and symbol related stuff).  Not sure why
tree_type_decl inherits from tree_decl_non_common (and thus
tree_decl_with_vis).  Probably because of the non-common parts
being (ab-)used by FEs.  Otherwise I'd say simply put a symtab
node pointer into tree_decl_with_vis ... (can we move
section_name and comdat_group more easily than assembler_name?)

Richard.

> Honza
>>
>> Richard.
>>
>> >I think we may be on better track moving DECL_ASSEMBLER_NAME that is
>> >calculated later,
>> >but then we have problem with DECL_ASSEMBLER_NAME being set for
>> >assembler names and
>> >const decls, too that still go around symtab.
>> >Given that decl_assembler_name is a function, I suppose we could go
>> >with extra conditoinal
>> >in there.
>> >
>> >Getting struct function out of frontend busyness would be nice indeed,
>> >too, but probably
>> >should be independent of Martin's work here.
>> >
>> >Honza
>> >>
>> >> Thanks,
>> >> Richard.
>> >>
>> >> > Thanks,
>> >> >
>> >> > Martin
>> >> >
>> >> >>
>> >> >> > +       }
>> >> >> >      }
>> >> >> > +
>> >> >> > +  return ret;
>> >> >> >  }
>> >> >> >
>> >> >> >  /* Detects return flags for the call STMT.  */
>> >> >> >
>>

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/7] IPA-CP escape and clobber analysis
  2014-05-21 14:51   ` Richard Biener
@ 2014-05-23 14:50     ` Martin Jambor
  0 siblings, 0 replies; 29+ messages in thread
From: Martin Jambor @ 2014-05-23 14:50 UTC (permalink / raw)
  To: Richard Biener; +Cc: GCC Patches, Jan Hubicka

Hi,

sorry, I should have added a better description of the overall
algorithm, I will try to do that now, I hope I will at least clarify
what stage does what.

At summary generation time, we process one function after another,
looking at their bodies.  There are three new things in the generated
summaries:

1) How do actual arguments of calls relate to the formal parameters
   and how they relate to each other.  If an argument in one call
   refers to the same object as an argument in another call or as a
   formal parameter, we want to know they are the same memory object.
   This is captured in jump functions, jump functions which did not
   have one get an integer index.

2) Which pointer formal parameters escape in this function and which
   pointer actual arguments of all calls escape in this function.
   This is stored as a bit per each formal parameter and each other
   (locally unescaped) memory object passed as an actual argument in
   one or more calls.
   
3) Whether we modify the memory pointed to by the formal and actual
   arguments in this function. Also just a but per memory object.

When doing the local analysis, we allocate an ipa_escape structure for
each SSA name and each addressable local declaration and after
examining SSA definitions, store how they relate to each other (ie
which point to the same thing and to what offsets which is useful for
subsequent patches).  We also look at uses of SSA names to see what
objects escape locally.  

These structures are also used when building jump functions.  When we
now that passed data escapes locally, we mark it directly to the jump
function.  If it does not, we store an index into the jump function
which identifies the memory object - it is an index to vector of
ipa_ref_descriptor structures.  Which are allocated to have one
element for each formal parameter - locally escaped and non-pointer
ones are marked as escaped - and every other locally unescaped memory
object which is passed to a called function.  ipa_escapes and
associated data are then deallocated and we move on to another
function.

During WPA, we basically propagate escape and clobber flags across the
call graph.  Escape flags are propagated more or less in both
directions, it is perhaps best described by figure 4 of
http://sysrun.haifa.il.ibm.com/hrl/greps2007/papers/ipa-agg-no_copyright.pdf
(I called them unusable flags in that paper some seven years ago)
Modified flags are propagated only from callees to callers, of course.


On Wed, May 21, 2014 at 04:50:33PM +0200, Richard Biener wrote:
> On Wed, May 21, 2014 at 3:16 PM, Martin Jambor <mjambor@suse.cz> wrote:
> > Hi,
> >
> > this patch is rather big but not overly complicated.  Its goal is to
> > figure out whether data passed to a function by reference escapes
> > (somewhere, not necessarily in that particular function) and is
> > potentially clobbered (in that one function or its callees).
> >
> > The result is stored into call graph node global structure, at least
> > for now, because it is supposed to live longer than IPA-CP
> > optimization info and be available for PTA later in the pipeline.
> > Before that, however, quite a lot of intermediate results are stored
> > in a number of places.  First of all, there is a vector describing all
> > SSA names and address taken local aggregates which is used to figure
> > out relations between them and do the local escape and clobber
> > analysis (I am aware that a local aggregate might incorrectly pass as
> > non-clobbered, that is fixed by the fifth patch, this one is big
> > enough as it is and it does not really matter here).
> >
> > We then store the local results describing formal parameters and
> > so-far-presumed-unescaped aggregates and malloced data that is passed
> > as actual arguments to other functions into a new vector ref_descs.  I
> > did not store this into the existing descriptors vector because there
> > are often more elements.  Also, I had to extend the UNKNOWN,
> > KNOWN_TYPE and CONSTANT jump functions with an index into this new
> > vector (PASS_THROUGH and ANCESTOR reuse the index into parameters), so
> > there is quite a lot of new getter and setter methods.
> >
> > This information is used by simple queue based interprocedural
> > propagation.  Eventually, the information is stored into the call
> > graph node, as described above.  After propagation, data in ref_descs
> > and in the call graph are the same, only the call graph can live much
> > longer.  One set of flags that is not copied to call graph nodes are
> > callee_clobbered flags, which only IPA-CP uses it in a subsequent
> > patch (and which would require maintenance during inlining).
> >
> > There are more uses of the flags introduced by subsequent patches.  In
> > this one, the only one is that IPA-CP modification phase is able to
> > use the results instead of querying AA and is capable of doing more
> > replacements of aggregate values when the aggregate is unescaped and
> > not clobbered.
> >
> > The following table summarizes what the pass can discover now.  All
> > compilations are with -Ofast -flto.  (I should have counted only
> > pointer typed parameters but well, that thought occurred to me too
> > late.  All non-pointer ones are automatically considered clobbered.)
> > Please note that in Fortran benchmarks, this information is often
> > already available through fnspec flags.  But we can discover a few
> > more (see the last patch for some more information).
> >
> >  |                    |        |          |       |           |       |    Callee |       |
> >  | Test               | Params | Noescape |     % | Noclobber |     % | noclobber |     % |
> >  |                    |        |          |       |           |       |           |       |
> >  |--------------------+--------+----------+-------+-----------+-------+-----------+-------+
> >  | FF libxul.so       | 462725 |    10422 |  2.25 |      4954 |  1.07 |      8872 |  1.92 |
> >  | Tramp 3D           |   6344 |     1019 | 16.06 |       985 | 15.53 |      1005 | 15.84 |
> >  |--------------------+--------+----------+-------+-----------+-------+-----------+-------+
> >  | perlbench          |   2550 |       87 |  3.41 |        10 |  0.39 |        61 |  2.39 |
> >  | bzip               |    194 |       28 | 14.43 |         1 |  0.52 |        13 |  6.70 |
> >  | gcc                |  10725 |      179 |  1.67 |        18 |  0.17 |       147 |  1.37 |
> >  | mcf                |     57 |        4 |  7.02 |         0 |  0.00 |         4 |  7.02 |
> >  | gobmk              |   8873 |      132 |  1.49 |         3 |  0.03 |        85 |  0.96 |
> >  | hmmer              |    643 |       71 | 11.04 |         8 |  1.24 |        64 |  9.95 |
> >  | sjeng              |    161 |        5 |  3.11 |         0 |  0.00 |         5 |  3.11 |
> >  | libquantum         |    187 |       48 | 25.67 |         6 |  3.21 |        14 |  7.49 |
> >  | h264ref            |   1092 |       48 |  4.40 |         4 |  0.37 |        47 |  4.30 |
> >  | astar              |    217 |       28 | 12.90 |         3 |  1.38 |        15 |  6.91 |
> >  | xalancbmk          |  28861 |      737 |  2.55 |       536 |  1.86 |       712 |  2.47 |
> >  |--------------------+--------+----------+-------+-----------+-------+-----------+-------+
> >  | bwaves             |     74 |       35 | 47.30 |        25 | 33.78 |        35 | 47.30 |
> >  | gamess             |  26059 |     3693 | 14.17 |      2796 | 10.73 |      3572 | 13.71 |
> >  | milc               |    429 |       22 |  5.13 |        11 |  2.56 |        22 |  5.13 |
> >  | zeusmp             |    284 |       31 | 10.92 |         2 |  0.70 |        31 | 10.92 |
> >  | gromacs            |   5514 |      230 |  4.17 |        54 |  0.98 |       202 |  3.66 |
> >  | cactusADM          |   2354 |       49 |  2.08 |        13 |  0.55 |        44 |  1.87 |
> >  | leslie3d           |     18 |        0 |  0.00 |         0 |  0.00 |         0 |  0.00 |
> >  | namd               |    163 |        0 |  0.00 |         0 |  0.00 |         0 |  0.00 |
> >  | soplex             |   2341 |       80 |  3.42 |        10 |  0.43 |        55 |  2.35 |
> >  | povray             |   4046 |      244 |  6.03 |        51 |  1.26 |       201 |  4.97 |
> >  | calculix           |   6260 |     1109 | 17.72 |       672 | 10.73 |       933 | 14.90 |
> >  | GemsFDTD           |    289 |       41 | 14.19 |        27 |  9.34 |        32 | 11.07 |
> >  | tonto              |   7255 |     1361 | 18.76 |      1178 | 16.24 |      1329 | 18.32 |
> >  | lbm                |     27 |        4 | 14.81 |         3 | 11.11 |         4 | 14.81 |
> >  | wrf                |  14212 |     4375 | 30.78 |      3358 | 23.63 |      4120 | 28.99 |
> >  | sphinx3            |    770 |       16 |  2.08 |         1 |  0.13 |        15 |  1.95 |
> >  |--------------------+--------+----------+-------+-----------+-------+-----------+-------+
> >  | ac.f90             |     21 |       14 | 66.67 |         7 | 33.33 |        14 | 66.67 |
> >  | aermod.f90         |    600 |      134 | 22.33 |        59 |  9.83 |       124 | 20.67 |
> >  | air.f90            |     85 |       41 | 48.24 |        14 | 16.47 |        41 | 48.24 |
> >  | capacita.f90       |     42 |       18 | 42.86 |        16 | 38.10 |        18 | 42.86 |
> >  | channel2.f90       |     12 |        4 | 33.33 |         4 | 33.33 |         4 | 33.33 |
> >  | doduc.f90          |    132 |       68 | 51.52 |        39 | 29.55 |        68 | 51.52 |
> >  | fatigue2.f90       |     65 |       43 | 66.15 |        20 | 30.77 |        43 | 66.15 |
> >  | gas_dyn2.f90       |     97 |       22 | 22.68 |         6 |  6.19 |        21 | 21.65 |
> >  | induct2.f90        |    121 |       41 | 33.88 |        24 | 19.83 |        41 | 33.88 |
> >  | linpk.f90          |     42 |       10 | 23.81 |         7 | 16.67 |        10 | 23.81 |
> >  | mdbx.f90           |     51 |       26 | 50.98 |         9 | 17.65 |        26 | 50.98 |
> >  | mp_prop_design.f90 |      2 |        0 |  0.00 |         0 |  0.00 |         0 |  0.00 |
> >  | nf.f90             |     41 |        8 | 19.51 |         8 | 19.51 |         8 | 19.51 |
> >  | protein.f90        |    116 |       40 | 34.48 |        25 | 21.55 |        35 | 30.17 |
> >  | rnflow.f90         |    212 |       54 | 25.47 |        37 | 17.45 |        51 | 24.06 |
> >  | test_fpu2.f90      |    160 |       22 | 13.75 |        14 |  8.75 |        18 | 11.25 |
> >  | tfft2.f90          |      7 |        3 | 42.86 |         0 |  0.00 |         3 | 42.86 |
> >
> > I hope to improve the results for example by propagating malloc
> > attribute to callers.
> >
> > I have bootstrapped and tested this on x86_64, additionally I also
> > checked it passes an LTO-bootstrap and LTO-built Firefox.  I assume
> > there will be many comments but after I address them, I'd like to
> > commit this to trunk.
> >
> > Thanks,
> >
> > Martin
> >
> >
> > 2014-04-30  Martin Jambor  <mjambor@suse.cz>
> >
> >         * cgraph.h (cgraph_global_info): New fields noescape_parameters
> >         and noclobber_parameters.
> >         (cgraph_param_noescape_p): Declare.
> >         (cgraph_set_param_noescape): Likewise.
> >         (cgraph_param_noclobber_p): Likewise.
> >         (cgraph_set_param_noclobber): Likewise.
> >         * ipa-prop.h (ipa_unknown_data): New type.
> >         (ipa_known_type_data): New fields escape_ref_valid and
> >         escape_ref_index.
> >         (ipa_constant_data): Likewise.
> >         (jump_func_value): New field unknown.
> >         (ipa_get_jf_unknown_esc_ref_valid): New function.
> >         (ipa_get_jf_unknown_esc_ref_index): Likewise.
> >         (ipa_get_jf_known_type_esc_ref_valid): Likewise.
> >         (ipa_get_jf_known_type_esc_ref_index): Likewise.
> >         (ipa_get_jf_constant_esc_ref_valid): Likewise.
> >         (ipa_get_jf_constant_esc_ref_index): Likewise.
> >         (ipa_ref_descriptor): New type.
> >         (ipa_node_params): New fields ref_descs and node_up_enqueued.
> >         (ipa_is_ref_escaped): New function.
> >         (ipa_is_ref_clobbered): Likewise.
> >         (ipa_is_ref_callee_clobbered): Likewise.
> >         (ipa_is_param_ref_safely_constant): Likewise.
> >         (ipa_spread_escapes): Declare.
> >         * ipa-prop.c: Include stringpool.h, tree-ssaname.h and pointer-set.h.
> >         (ipa_escape): New type.
> >         (valid_escape_result_index): New function.
> >         (func_body_info): New fields func, escapes and decl_escapes.
> >         (ipa_print_node_jump_functions_for_edge): Dump new fields.
> >         (ipa_set_jf_unknown): New function.  Use it instead of directly
> >         setting a jump functions type elsewhere.
> >         (ipa_set_jf_unknown_copy): New function.
> >         (ipa_set_jf_unknown_ref_index): Likewise.
> >         (ipa_set_jf_known_type_copy): Likewise.
> >         (ipa_set_jf_known_type): Initialize new fields.
> >         (ipa_set_jf_known_type_ref_index): New function.
> >         (ipa_set_jf_constant): Initialize new fields.
> >         (ipa_set_jf_constant_ref_index): New function.
> >         (ipa_get_tracked_refs_count): Likewise.
> >         (ipa_set_ref_clobbered): Likewise.
> >         (ipa_get_tracked_refs_count): Likewise.
> >         (ipa_set_ref_escaped): Likewise.
> >         (ipa_set_ref_clobbered): Likewise.
> >         (ipa_set_ref_callee_clobbered): Likewise.
> >         (ipa_load_from_parm_agg_1): Use const_ref parameter flag.
> >         (get_escape_for_ref): New function.
> >         (get_escape_for_value): Likewise.
> >         (ipa_compute_jump_functions_for_edge): Add reference info to jump
> >         functions.  Wrapped comments to 80 columns, added a checking assert
> >         all jump functions start with no information.
> >         (visit_ref_for_mod_analysis): Renamed to visit_ref_mark_it_used.
> >         Simplified comment.
> >         (ipa_analyze_params_uses_in_bb): Renamed to ipa_analyze_bb_statements.
> >         Simplified comment.
> >         (analyze_phi_escapes): New function.
> >         (analyze_ssa_escape): Likewise.
> >         (analyze_all_ssa_escapes): Likewise.
> >         (create_escape_structures): Likewise.
> >         (free_escape_structures): Likewise.
> >         (pick_escapes_from_call): Likewise.
> >         (gather_picked_escapes): Likewise.
> >         (ipa_analyze_node): Initialize and deinitialize new fbi fields and
> >         escape structures, call create_escape_structures,
> >         analyze_all_ssa_escapes and pick_escapes_from_call, assign ref indices
> >         to formal parameters.
> >         (escape_spreading_data): New type.
> >         (enque_to_propagate_escapes_up): New function.
> >         (enque_to_propagate_escapes_down): Likewise.
> >         (escape_origin_from_jfunc): Likewise.
> >         (spread_escapes_up_from_one_alias): Likewise.
> >         (spread_escapes_up): Likewise.
> >         (spread_escapes_down): Likewise.
> >         (ipa_spread_escapes): Likewise.
> >         (make_unknown_jf_from_known_type_jf): Likewise.
> >         (combine_known_type_and_ancestor_jfs): Also update ref index fields.
> >         Switch arguments for consistency, changed the one caller.
> >         (update_jump_functions_after_inlining): Also update ref index fields,
> >         make use of unescaped info.
> >         (update_indirect_edges_after_inlining): Make use of unescaped info.
> >         (ipa_free_node_params_substructures): Free also ref_desc vector.
> >         (ipa_node_duplication_hook): Also copy reference descriptor vector and
> >         const_refs.
> >         (ipa_print_node_params): Also print reference flags.
> >         (ipa_write_jump_function): Stream new fields.
> >         (ipa_read_jump_function): Likewise.
> >         (ipa_write_node_info): Stream reference description.
> >         (ipa_read_node_info): Likewise, also clear new flag node_up_enqueued.
> >         (read_agg_replacement_chain): Whitespace fix.
> >         (adjust_agg_replacement_values): Also assign const_refs in descriptors
> >         from those in tranformation data.
> >         (ipcp_transform_function): Initialize new fields of fbi.
> >         * ipa-cp.c (agg_pass_through_permissible_p): Make use of the new
> >         escape information.  Accept caller_infom as a parameter, updated all
> >         callers.
> >         (propagate_aggs_accross_jump_function): Make use of the new escape
> >         information.
> >         (intersect_aggregates_with_edge): Bail out early if a pass_through
> >         jump function does not allow passing aggregates.  Make use of the new
> >         escape information.  Allow NULL values in aggregate jump functions.
> >         (ipcp_driver): Call spread_escapes.
> >         * ipa-inline.c (ipa_inline): Call spread_escapes if necessary.
> >         * cgraph.c (cgraph_param_noescape_p): New function.
> >         (cgraph_set_param_noescape): Likewise.
> >         (cgraph_param_noclobber_p): Likewise.
> >         (cgraph_set_param_noclobber): Likewise.
> >         * cgraphclones.c (duplicate_thunk_for_node): Assert that noclone and
> >         noescape bitmaps are NULL.
> >         (copy_noescape_noclobber_bitmaps): New function.
> >         (cgraph_clone_node): Copy noescpae and noclobber bitmaps.
> >         (cgraph_copy_node_for_versioning): Likewise.
> >         * lto-cgraph.c (output_param_bitmap): Likewise.
> >         (output_node_opt_summary): Use it to stream args_to_skip,
> >         combined_args_to_skip, noescape_parameters and noclobber_parameters
> >         bitmaps.
> >         (input_param_bitmap): New function.
> >         (input_node_opt_summary): Use it to stream args_to_skip,
> >         combined_args_to_skip, noescape_parameters and noclobber_parameters
> >         bitmaps.
> >         * tree-inline.c (update_noescape_noclobber_bitmaps): New function.
> >         (tree_function_versioning): Call it.
> >
> > testsuite/
> >         * gcc.dg/ipa/ipcp-agg-10.c: New test.
> >
> > Index: src/gcc/ipa-prop.c
> > ===================================================================
> > --- src.orig/gcc/ipa-prop.c
> > +++ src/gcc/ipa-prop.c
> > @@ -43,6 +43,8 @@ along with GCC; see the file COPYING3.
> >  #include "gimple-ssa.h"
> >  #include "tree-cfg.h"
> >  #include "tree-phinodes.h"
> > +#include "stringpool.h"
> > +#include "tree-ssanames.h"
> >  #include "ssa-iterators.h"
> >  #include "tree-into-ssa.h"
> >  #include "tree-dfa.h"
> > @@ -60,6 +62,7 @@ along with GCC; see the file COPYING3.
> >  #include "stringpool.h"
> >  #include "tree-ssanames.h"
> >  #include "domwalk.h"
> > +#include "pointer-set.h"
> >
> >  /* Intermediate information that we get from alias analysis about a particular
> >     parameter in a particular basic_block.  When a parameter or the memory it
> > @@ -91,11 +94,64 @@ struct ipa_bb_info
> >    vec<param_aa_status> param_aa_statuses;
> >  };
> >
> > +/* Structure used for intra-procedural escape analysis (and associated
> > +   memory-write detection).  When analyzing function body, we have one for each
> > +   SSA name and for all address-taken local declarations.  */
> 
> And for all functions at the same time?  It asks to space optimize
> this ...

No, after analyzing one function, the (so far overly optimistic)
results are copied to vector of ipa_ref_descriptor which are kept for
all functions, and everything accessible only from function_body_info
is deallocated.  I try to be very careful with data I keep across the
whole analysis.  I was not so strict with the data that live only
while looking at one function, allowed for some convenience and it
certainly might be compacted a bit.

> 
> > +struct ipa_escape
> > +{
> > +  /* If target is non-NULL, this is the offset relative to the reference
> > +     described by target.  */
> > +  HOST_WIDE_INT offset;
> > +
> > +  /* If this describes (a part of) data described by other ipa_escape
> > +     structure, target is non-NULL.  In that case, that structure should be
> > +     used instead of this one and unless explicitely noted, other fields are
> > +     meaningless.  */
> > +  struct ipa_escape *target;
> > +
> > +  /* The last seen edge that had a reference to this data among its parameters.
> > +     Used to make sure we do not pass the same data in two different
> > +     arguments.  */
> > +  struct cgraph_edge *last_seen_cs;
> > +
> > +  /* Index of the bool slot where the analyzed flag is going to end up plus
> > +     one.  Zero means this structure will remain unused.  */
> > +  int result_index;
> > +
> > +  /* True if we have already dealt with this SSA name.  Valid even if target is
> > +     non-NULL.  */
> > +  bool analyzed;
> > +
> > +  /* Could the address of the data have escaped?  */
> > +  bool escaped;
> > +
> > +  /* Flag set when an SSA name has been used as a base for a memory write.
> > +     Only valid when the SSA name is not considered escaped, otherwise it might
> > +     be incorrectly clear.  */
> > +  bool write_base;
> > +};
> > +
> > +/* If ESC has a valid (i.e. non-zero) result_index, return true and store the
> > +   directly usable (i.e. decremented) index to *INDEX.  */
> > +
> > +static inline bool
> > +valid_escape_result_index (struct ipa_escape *esc, int *index)
> > +{
> > +  if (esc->result_index == 0)
> > +    return false;
> > +  *index = esc->result_index - 1;
> > +  return true;
> > +}
> > +
> >  /* Structure with global information that is only used when looking at function
> >     body. */
> >
> >  struct func_body_info
> >  {
> > +  /* Struct function of the function that is being analyzed.  */
> > +  struct function *func;
> > +
> >    /* The node that is being analyzed.  */
> >    cgraph_node *node;
> 
> DECL_STRUCT_FUNCTION (node->decl) == func?

Yes, this is the convenience I was writing above.  I can remove it but
it is really only one pointer.

> 
> > @@ -105,6 +161,13 @@ struct func_body_info
> >    /* Information about individual BBs. */
> >    vec<ipa_bb_info> bb_infos;
> >
> > +  /* Escape analysis information for SSA flags and local addressable
> > +     declarations.  */
> > +  vec<ipa_escape> escapes;
> > +
> > +  /* Mapping from VAR_DECLS to escape information.  */
> > +  pointer_map <ipa_escape *> *decl_escapes;
> > +
> 
> You can map from DECL_UID to an index in escapes which would
> be more space efficient?

OK, I will make the change.

> 
> >    /* Number of parameters.  */
> >    int param_count;
> >
> > @@ -282,7 +345,14 @@ ipa_print_node_jump_functions_for_edge (
> >
> >        fprintf (f, "       param %d: ", i);
> >        if (type == IPA_JF_UNKNOWN)
> > -       fprintf (f, "UNKNOWN\n");
> > +       {
> > +         fprintf (f, "UNKNOWN");
> > +         if (ipa_get_jf_unknown_esc_ref_valid (jump_func))
> > +           fprintf (f, ", escape ref: %i\n",
> > +                    ipa_get_jf_unknown_esc_ref_index (jump_func));
> > +         else
> > +           fprintf (f, "\n");
> > +       }
> >        else if (type == IPA_JF_KNOWN_TYPE)
> >         {
> >           fprintf (f, "KNOWN TYPE: base  ");
> > @@ -290,6 +360,9 @@ ipa_print_node_jump_functions_for_edge (
> >           fprintf (f, ", offset "HOST_WIDE_INT_PRINT_DEC", component ",
> >                    jump_func->value.known_type.offset);
> >           print_generic_expr (f, jump_func->value.known_type.component_type, 0);
> > +         if (ipa_get_jf_known_type_esc_ref_valid (jump_func))
> > +           fprintf (f, ", escape ref: %i",
> > +                    ipa_get_jf_known_type_esc_ref_index (jump_func));
> >           fprintf (f, "\n");
> >         }
> >        else if (type == IPA_JF_CONST)
> > @@ -304,6 +377,9 @@ ipa_print_node_jump_functions_for_edge (
> >               print_generic_expr (f, DECL_INITIAL (TREE_OPERAND (val, 0)),
> >                                   0);
> >             }
> > +         if (ipa_get_jf_constant_esc_ref_valid (jump_func))
> > +           fprintf (f, ", escape ref: %i",
> > +                    ipa_get_jf_constant_esc_ref_index (jump_func));
> >           fprintf (f, "\n");
> >         }
> >        else if (type == IPA_JF_PASS_THROUGH)
> > @@ -430,6 +506,39 @@ ipa_print_all_jump_functions (FILE *f)
> >      }
> >  }
> >
> > +/* Set jfunc to be a jump function with invalid reference index.  */
> > +
> > +static void
> > +ipa_set_jf_unknown (struct ipa_jump_func *jfunc)
> > +{
> > +  jfunc->type = IPA_JF_UNKNOWN;
> > +  jfunc->value.unknown.escape_ref_valid = false;
> > +}
> > +
> > +/* Set JFUNC to be a copy of another unknown jump function SRC. */
> > +
> > +static void
> > +ipa_set_jf_unknown_copy (struct ipa_jump_func *dst,
> > +                        struct ipa_jump_func *src)
> > +
> > +{
> > +  gcc_checking_assert (src->type == IPA_JF_UNKNOWN);
> > +  dst->type = IPA_JF_UNKNOWN;
> > +  dst->value.unknown = src->value.unknown;
> > +}
> > +
> > +/* Set reference description of unknown JFUNC to be valid and referring to
> > +   INDEX.  */
> > +
> > +static void
> > +ipa_set_jf_unknown_ref_index (struct ipa_jump_func *jfunc, int index)
> > +{
> > +  gcc_checking_assert (jfunc->type == IPA_JF_UNKNOWN);
> > +  gcc_checking_assert (index >= 0);
> > +  jfunc->value.unknown.escape_ref_valid = true;
> > +  jfunc->value.unknown.escape_ref_index = index;
> > +}
> > +
> >  /* Set JFUNC to be a known type jump function.  */
> >
> >  static void
> > @@ -445,11 +554,37 @@ ipa_set_jf_known_type (struct ipa_jump_f
> >    jfunc->value.known_type.offset = offset,
> >    jfunc->value.known_type.base_type = base_type;
> >    jfunc->value.known_type.component_type = component_type;
> > +  jfunc->value.known_type.escape_ref_valid = false;
> > +  jfunc->value.known_type.escape_ref_index = 0;
> >    gcc_assert (component_type);
> >  }
> >
> > -/* Set JFUNC to be a copy of another jmp (to be used by jump function
> > -   combination code).  The two functions will share their rdesc.  */
> > +/* Set reference description of known_type JFUNC to be valid and referring to
> > +   INDEX.  */
> > +
> > +static void
> > +ipa_set_jf_known_type_ref_index (struct ipa_jump_func *jfunc, int index)
> > +{
> > +  gcc_checking_assert (jfunc->type == IPA_JF_KNOWN_TYPE);
> > +  gcc_checking_assert (index >= 0);
> > +  jfunc->value.known_type.escape_ref_valid = true;
> > +  jfunc->value.known_type.escape_ref_index = index;
> > +}
> > +
> > +/* Set DST to be a copy of another known type jump function SRC.  */
> > +
> > +static void
> > +ipa_set_jf_known_type_copy (struct ipa_jump_func *dst,
> > +                           struct ipa_jump_func *src)
> > +
> > +{
> > +  gcc_checking_assert (src->type == IPA_JF_KNOWN_TYPE);
> > +  dst->type = IPA_JF_KNOWN_TYPE;
> > +  dst->value.known_type = src->value.known_type;
> > +}
> > +
> > +/* Set DST to be a copy of another constant jump function SRC.  The two
> > +   functions will share their rdesc.  */
> >
> >  static void
> >  ipa_set_jf_cst_copy (struct ipa_jump_func *dst,
> > @@ -472,6 +607,8 @@ ipa_set_jf_constant (struct ipa_jump_fun
> >      SET_EXPR_LOCATION (constant, UNKNOWN_LOCATION);
> >    jfunc->type = IPA_JF_CONST;
> >    jfunc->value.constant.value = unshare_expr_without_location (constant);
> > +  jfunc->value.constant.escape_ref_valid = false;
> > +  jfunc->value.constant.escape_ref_index = 0;
> >
> >    if (TREE_CODE (constant) == ADDR_EXPR
> >        && TREE_CODE (TREE_OPERAND (constant, 0)) == FUNCTION_DECL)
> > @@ -491,6 +628,19 @@ ipa_set_jf_constant (struct ipa_jump_fun
> >      jfunc->value.constant.rdesc = NULL;
> >  }
> >
> > +/* Set reference description of constant JFUNC to be valid and referring to
> > +   INDEX.  */
> > +
> > +static void
> > +ipa_set_jf_constant_ref_index (struct ipa_jump_func *jfunc, int index)
> > +{
> > +  gcc_checking_assert (jfunc->type == IPA_JF_CONST);
> > +  gcc_checking_assert (index >= 0);
> > +  jfunc->value.constant.escape_ref_valid = true;
> > +  jfunc->value.constant.escape_ref_index = index;
> > +}
> > +
> > +
> >  /* Set JFUNC to be a simple pass-through jump function.  */
> >  static void
> >  ipa_set_jf_simple_pass_through (struct ipa_jump_func *jfunc, int formal_id,
> > @@ -539,6 +689,41 @@ ipa_set_ancestor_jf (struct ipa_jump_fun
> >    jfunc->value.ancestor.type_preserved = type_preserved;
> >  }
> >
> > +/* Return the number of references tracked for escape analysis in INFO.  */
> > +
> > +static inline int
> > +ipa_get_tracked_refs_count (struct ipa_node_params *info)
> > +{
> > +  return info->ref_descs.length ();
> > +}
> > +
> > +/* Set escape flag of reference number I of a function corresponding to NODE to
> > +   VAL.  */
> > +
> > +static inline void
> > +ipa_set_ref_escaped (struct ipa_node_params *info, int i, bool val)
> > +{
> > +  info->ref_descs[i].escaped = val;
> > +}
> > +
> > +/* Set the clobbered flag corresponding to the Ith tracked reference of the
> > +   function associated with INFO to VAL.  */
> > +
> > +static inline void
> > +ipa_set_ref_clobbered (struct ipa_node_params *info, int i, bool val)
> > +{
> > +  info->ref_descs[i].clobbered = val;
> > +}
> > +
> > +/* Set the callee_clobbered flag corresponding to the Ith tracked reference of
> > +   the function associated with INFO to VAL.  */
> > +
> > +static inline void
> > +ipa_set_ref_callee_clobbered (struct ipa_node_params *info, int i, bool val)
> > +{
> > +  info->ref_descs[i].callee_clobbered = val;
> > +}
> > +
> >  /* Extract the acual BINFO being described by JFUNC which must be a known type
> >     jump function.  */
> >
> > @@ -784,7 +969,7 @@ detect_type_change (tree arg, tree base,
> >    if (!tci.known_current_type
> >        || tci.multiple_types_encountered
> >        || offset != 0)
> > -    jfunc->type = IPA_JF_UNKNOWN;
> > +    ipa_set_jf_unknown (jfunc);
> >    else
> >      ipa_set_jf_known_type (jfunc, 0, tci.known_current_type, comp_type);
> >
> > @@ -1090,7 +1275,8 @@ ipa_load_from_parm_agg_1 (struct func_bo
> >      }
> >
> >    if (index >= 0
> > -      && parm_ref_data_preserved_p (fbi, index, stmt, op))
> > +      && ((fbi && cgraph_param_noclobber_p (fbi->node, index))
> > +         || parm_ref_data_preserved_p (fbi, index, stmt, op)))
> >      {
> >        *index_p = index;
> >        *by_ref_p = true;
> > @@ -1725,6 +1911,86 @@ ipa_get_callee_param_type (struct cgraph
> >    return NULL;
> >  }
> >
> > +static void
> > +analyze_ssa_escape (struct func_body_info *fbi, tree ssa,
> > +                   struct ipa_escape *esc);
> > +
> > +/* Return the ipa_escape structure suitable for REFERENCE, if it is a
> > +   declaration or a MEM_REF.  Return NULL if there is no structure describing
> > +   REFERENCE.  If a non-NULL result is returned, put the offset of the
> > +   REFERENCE relative to the start of data described by the result into
> > +   *OFFSET, and size and max_size as returned by get_ref_base_and_extent to
> > +   *SIZE and *MAX_SIZE respectively.  */
> > +
> > +static struct ipa_escape *
> > +get_escape_for_ref (struct func_body_info *fbi, tree reference,
> > +                   HOST_WIDE_INT *offset, HOST_WIDE_INT *size,
> > +                   HOST_WIDE_INT *max_size)
> > +{
> > +  struct ipa_escape *res;
> > +  tree base = get_ref_base_and_extent (reference, offset, size, max_size);
> > +
> > +  if (DECL_P (base))
> > +    {
> > +      ipa_escape **d_esc = fbi->decl_escapes->contains (base);
> > +      if (!d_esc)
> > +       return NULL;
> > +      res = *d_esc;
> > +    }
> > +  else if (TREE_CODE (base) == MEM_REF
> > +          && TREE_CODE (TREE_OPERAND (base, 0)) == SSA_NAME)
> > +    {
> > +      tree ssa = TREE_OPERAND (base, 0);
> > +      res = &fbi->escapes[SSA_NAME_VERSION (ssa)];
> > +      if (!res->analyzed)
> > +       analyze_ssa_escape (fbi, ssa, res);
> > +    }
> > +  else
> > +    return NULL;
> > +
> > +  if (res->target)
> > +    {
> > +      *offset += res->offset;
> > +      res = res->target;
> > +    }
> > +  return res;
> > +}
> > +
> > +/* Return the ipa_escape structure suitable for T, if it is an ssa_name or an
> > +   ADDR_EXPR.  Return NULL if there is not structure for T.  If a non-NULL
> > +   result is returned, put the offset of the value T relative to the start of
> > +   data described by the result into *OFFSET.  */
> > +
> > +static struct ipa_escape *
> > +get_escape_for_value (struct func_body_info *fbi, tree t,
> > +                     HOST_WIDE_INT *offset)
> > +{
> > +  if (TREE_CODE (t) == SSA_NAME)
> > +    {
> > +      struct ipa_escape *res;
> > +      *offset = 0;
> > +      res = &fbi->escapes[SSA_NAME_VERSION (t)];
> > +      if (!res->analyzed)
> > +       analyze_ssa_escape (fbi, t, res);
> > +
> > +      if (res->target)
> > +       {
> > +         *offset += res->offset;
> > +         res = res->target;
> > +       }
> > +
> > +      return res;
> > +    }
> > +  else if (TREE_CODE (t) == ADDR_EXPR)
> > +    {
> > +      HOST_WIDE_INT dummy_size, dummy_max_size;
> > +      return get_escape_for_ref (fbi, TREE_OPERAND (t, 0), offset, &dummy_size,
> > +                                &dummy_max_size);
> > +    }
> > +  else
> > +    return NULL;
> > +}
> > +
> >  /* Compute jump function for all arguments of callsite CS and insert the
> >     information in the jump_functions array in the ipa_edge_args corresponding
> >     to this callsite.  */
> > @@ -1753,6 +2019,8 @@ ipa_compute_jump_functions_for_edge (str
> >        tree arg = gimple_call_arg (call, n);
> >        tree param_type = ipa_get_callee_param_type (cs, n);
> >
> > +      gcc_checking_assert (jfunc->type == IPA_JF_UNKNOWN
> > +                          && !ipa_get_jf_unknown_esc_ref_valid (jfunc));
> >        if (is_gimple_ip_invariant (arg))
> >         ipa_set_jf_constant (jfunc, arg, cs);
> >        else if (!is_gimple_reg_type (TREE_TYPE (arg))
> > @@ -1807,19 +2075,42 @@ ipa_compute_jump_functions_for_edge (str
> >                                       ? TREE_TYPE (param_type)
> >                                       : NULL);
> >
> > -      /* If ARG is pointer, we can not use its type to determine the type of aggregate
> > -        passed (because type conversions are ignored in gimple).  Usually we can
> > -        safely get type from function declaration, but in case of K&R prototypes or
> > -        variadic functions we can try our luck with type of the pointer passed.
> > -        TODO: Since we look for actual initialization of the memory object, we may better
> > -        work out the type based on the memory stores we find.  */
> > +      /* If ARG is pointer, we can not use its type to determine the type of
> > +        aggregate passed (because type conversions are ignored in gimple).
> > +        Usually we can safely get type from function declaration, but in case
> > +        of K&R prototypes or variadic functions we can try our luck with type
> > +        of the pointer passed.
> > +        TODO: Since we look for actual initialization of the memory object, we
> > +        may better work out the type based on the memory stores we find.  */
> >        if (!param_type)
> >         param_type = TREE_TYPE (arg);
> >
> > -      if ((jfunc->type != IPA_JF_PASS_THROUGH
> > -             || !ipa_get_jf_pass_through_agg_preserved (jfunc))
> > -         && (jfunc->type != IPA_JF_ANCESTOR
> > -             || !ipa_get_jf_ancestor_agg_preserved (jfunc))
> > +      HOST_WIDE_INT dummy_offset;
> > +      struct ipa_escape *esc = get_escape_for_value (fbi, arg, &dummy_offset);
> > +      int ref_index;
> > +      if (esc && valid_escape_result_index (esc, &ref_index))
> > +       {
> > +         if (jfunc->type == IPA_JF_UNKNOWN)
> > +           ipa_set_jf_unknown_ref_index (jfunc, ref_index);
> > +         else if (jfunc->type == IPA_JF_KNOWN_TYPE)
> > +           ipa_set_jf_known_type_ref_index (jfunc, ref_index);
> > +         else if (jfunc->type == IPA_JF_CONST)
> > +           ipa_set_jf_constant_ref_index (jfunc, ref_index);
> > +         else
> > +           {
> > +             gcc_checking_assert
> > +               (jfunc->type != IPA_JF_PASS_THROUGH
> > +                || ipa_get_jf_pass_through_formal_id (jfunc) == ref_index);
> > +             gcc_checking_assert
> > +               (jfunc->type != IPA_JF_ANCESTOR
> > +                || ipa_get_jf_ancestor_formal_id (jfunc) == ref_index);
> > +           }
> > +       }
> > +
> > +      /* TODO: We should allow aggregate jump functions even for these types of
> > +        jump functions but we need to be able to combine them first.  */
> > +      if (jfunc->type != IPA_JF_PASS_THROUGH
> > +         && jfunc->type != IPA_JF_ANCESTOR
> >           && (AGGREGATE_TYPE_P (TREE_TYPE (arg))
> >               || POINTER_TYPE_P (param_type)))
> >         determine_known_aggregate_parts (call, arg, param_type, jfunc);
> > @@ -2223,12 +2514,11 @@ ipa_analyze_stmt_uses (struct func_body_
> >      ipa_analyze_call_uses (fbi, stmt);
> >  }
> >
> > -/* Callback of walk_stmt_load_store_addr_ops for the visit_load.
> > -   If OP is a parameter declaration, mark it as used in the info structure
> > -   passed in DATA.  */
> > +/* Callback of walk_stmt_load_store_addr_ops.  If OP is a parameter
> > +   declaration, mark it as used in the info structure passed in DATA.  */
> >
> >  static bool
> > -visit_ref_for_mod_analysis (gimple, tree op, tree, void *data)
> > +visit_ref_mark_it_used (gimple, tree op, tree, void *data)
> >  {
> >    struct ipa_node_params *info = (struct ipa_node_params *) data;
> >
> > @@ -2244,13 +2534,12 @@ visit_ref_for_mod_analysis (gimple, tree
> >    return false;
> >  }
> >
> > -/* Scan the statements in BB and inspect the uses of formal parameters.  Store
> > -   the findings in various structures of the associated ipa_node_params
> > -   structure, such as parameter flags, notes etc.  FBI holds various data about
> > -   the function being analyzed.  */
> > +/* Scan the statements in BB and inspect the uses of formal parameters, escape
> > +   analysis and so on.  FBI holds various data about the function being
> > +   analyzed.  */
> >
> >  static void
> > -ipa_analyze_params_uses_in_bb (struct func_body_info *fbi, basic_block bb)
> > +ipa_analyze_bb_statements (struct func_body_info *fbi, basic_block bb)
> >  {
> >    gimple_stmt_iterator gsi;
> >    for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
> > @@ -2262,15 +2551,15 @@ ipa_analyze_params_uses_in_bb (struct fu
> >
> >        ipa_analyze_stmt_uses (fbi, stmt);
> >        walk_stmt_load_store_addr_ops (stmt, fbi->info,
> > -                                    visit_ref_for_mod_analysis,
> > -                                    visit_ref_for_mod_analysis,
> > -                                    visit_ref_for_mod_analysis);
> > +                                    visit_ref_mark_it_used,
> > +                                    visit_ref_mark_it_used,
> > +                                    visit_ref_mark_it_used);
> >      }
> >    for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
> >      walk_stmt_load_store_addr_ops (gsi_stmt (gsi), fbi->info,
> > -                                  visit_ref_for_mod_analysis,
> > -                                  visit_ref_for_mod_analysis,
> > -                                  visit_ref_for_mod_analysis);
> > +                                  visit_ref_mark_it_used,
> > +                                  visit_ref_mark_it_used,
> > +                                  visit_ref_mark_it_used);
> >  }
> >
> >  /* Calculate controlled uses of parameters of NODE.  */
> > @@ -2344,10 +2633,284 @@ private:
> >  void
> >  analysis_dom_walker::before_dom_children (basic_block bb)
> >  {
> > -  ipa_analyze_params_uses_in_bb (m_fbi, bb);
> > +  ipa_analyze_bb_statements (m_fbi, bb);
> >    ipa_compute_jump_functions_for_bb (m_fbi, bb);
> >  }
> >
> > +/* Look at operands of PHI and if any of them is an address of a declaration,
> > +   mark that declaration escaped.  */
> > +
> > +void
> > +analyze_phi_escapes (gimple phi, struct func_body_info *fbi)
> > +{
> > +  for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
> > +    {
> > +      tree op = gimple_phi_arg_def (phi, i);
> > +      if (TREE_CODE (op) != ADDR_EXPR)
> > +       continue;
> > +
> > +      tree base = get_base_address (TREE_OPERAND (op, 0));
> > +      if (!DECL_P (base))
> > +       continue;
> 
> So this means that 'a' escapes in
> 
>   tem_1 = &a[i_2];
> 
> ?

Only if that appears in a phi node (my understanding that with a
constant index it can).  I do treat phi nodes very pessimistically.

> 
> > +      ipa_escape **d_esc = fbi->decl_escapes->contains (base);
> > +      if (!d_esc)
> > +       continue;
> > +      (*d_esc)->escaped = true;
> > +    }
> > +}
> > +
> > +/* Check definition and uses of SSA and update ESC (and potentially escape
> > +   structures associated with other SSA names) accordingly.  */
> > +
> > +static void
> > +analyze_ssa_escape (struct func_body_info *fbi, tree ssa,
> > +                   struct ipa_escape *esc)
> > +{
> > +  esc->analyzed = true;
> > +  if (!POINTER_TYPE_P (TREE_TYPE (ssa)))
> > +    {
> > +      esc->escaped = true;
> > +      return;
> > +    }
> > +
> > +  /* First we need to check the definition and figure out whether we can work
> > +     with it or whether this name actually refers to data described by another
> > +     structure.  */
> > +  if (!SSA_NAME_IS_DEFAULT_DEF (ssa))
> > +    {
> > +      gimple def = SSA_NAME_DEF_STMT (ssa);
> > +
> > +      if (gimple_assign_single_p (def))
> > +       {
> > +         tree rhs = gimple_assign_rhs1 (def);
> > +         HOST_WIDE_INT offset;
> > +         struct ipa_escape *r_esc = get_escape_for_value (fbi, rhs, &offset);
> > +         if (r_esc)
> > +           {
> > +             esc->offset = offset;
> > +             esc->target = r_esc;
> > +           }
> > +         else
> > +           {
> > +             esc->escaped = true;
> > +             return;
> > +           }
> > +       }
> > +      else if (is_gimple_call (def))
> > +       {
> > +         /* TODO: If only C++ new had malloc attribute.  */
> > +         int flags = gimple_call_flags (def);
> 
> How does ECF_MALLOC make something not escape?!

I am not sure I understand.  It is the other way round, a result of a
non-ECF_MALLOC function is considered escaped.  Of course a result of
a malloc can escape because of other statements or even in another
function.

>  And why
> does the definition "escape" in any stmt?!

If this is overly pessimistic (and I see that I should handle at least
pointer arithmetics), I can address it as a followup.

> 
> > +         if ((flags & ECF_MALLOC) == 0)
> > +           {
> > +             esc->escaped = true;
> > +             return;
> > +           }
> > +       }
> > +      else
> > +       {
> > +         if (gimple_code (def) == GIMPLE_PHI)
> > +           /* Any SSA defined by a PHI is doomed but it is a convenient place
> > +              to check every pointer phi . */
> > +           analyze_phi_escapes (def, fbi);
> > +
> > +         esc->escaped = true;
> > +         return;
> > +       }
> > +    }
> > +
> > +  if (esc->target)
> > +    esc = esc->target;
> > +  if (esc->escaped)
> > +    return;
> > +
> > +  /* If the definition is fine, we need to check the uses.  */
> > +
> > +  imm_use_iterator imm_iter;
> > +  use_operand_p use;
> > +  FOR_EACH_IMM_USE_FAST (use, imm_iter, ssa)
> > +    {
> > +      gimple stmt = USE_STMT (use);
> > +      if (is_gimple_debug (stmt))
> > +       continue;
> > +
> > +      switch (gimple_code (stmt))
> > +       {
> > +       case GIMPLE_ASSIGN:
> > +         {
> > +           if (!gimple_assign_single_p (stmt))
> > +             {
> > +               esc->escaped = true;
> 
> Does that make SSA escape in
> 
>    tem_1 = ssa p+ 1;
> 
> ?

Yes, and this is an ommission that I forgot to come back to and fix.
I will do that.

> 
> > +               return;
> > +             }
> > +
> > +             tree lhs = gimple_assign_lhs (stmt);
> > +             /* Statements assigning to another SSA are OK, we check all of
> > +                them.  */
> > +             if (TREE_CODE (lhs) != SSA_NAME
> > +                 /* If LHS is not an SSA_NAME, RHS cannot be an ADDR_EXPR, and
> > +                    must be either a naked SSA_NAME or a load or an invariant.
> > +                    We only care if it is the SSA name we are after.  It can
> > +                    be a different SSA name if the use was on the LHS in a
> > +                    MEM_REF.  */
> > +                 && gimple_assign_rhs1 (stmt) == ssa)
> > +               {
> > +                 esc->escaped = true;
> > +                 return;
> > +               }
> > +
> > +             while (handled_component_p (lhs))
> > +               lhs = TREE_OPERAND (lhs, 0);
> > +             if (TREE_CODE (lhs) == MEM_REF
> > +                 && TREE_OPERAND (lhs, 0) == ssa)
> > +               esc->write_base = true;
> > +           }
> > +         break;
> > +
> > +       case GIMPLE_CALL:
> > +         /* Calls will be dealt with when constructing jump functions.
> > +            However, indirect calls mean that all values escape (we do IPA
> > +            escape propagation before any devirtualization) and when not in
> > +            LTO, even calls to functions in other compilation units are dark
> > +            holes.  On the other hand, builtin free is whitelisted.  */
> > +         if (!gimple_call_builtin_p (stmt, BUILT_IN_FREE))
> > +           {
> > +             struct cgraph_edge *cs = cgraph_edge (fbi->node, stmt);
> > +             if (!cs || !cs->callee || (!cs->callee->definition && !flag_lto))
> > +               {
> > +                 esc->escaped = true;
> > +                 return;
> > +               }
> > +           }
> > +         break;
> > +
> > +       case GIMPLE_SWITCH:
> > +       case GIMPLE_COND:
> > +         /* These are harmless.  */
> > +         break;
> > +
> > +       default:
> > +         esc->escaped = true;
> > +         return;
> > +       }
> > +    }
> > +}
> > +
> > +/* Examine escapes of all SSA names.   */
> > +
> > +static void
> > +analyze_all_ssa_escapes (struct func_body_info *fbi)
> > +{
> > +  for (unsigned i = 1; i < fbi->func->gimple_df->ssa_names->length (); ++i)
> 
> SSANAMES (fbi->func)->length ();

Changed.

> 
> > +    {
> > +      tree ssa = ssa_name (i);
> > +      if (!ssa)
> > +       continue;
> > +      struct ipa_escape *esc = &fbi->escapes[SSA_NAME_VERSION (ssa)];
> > +      if (esc->analyzed)
> > +       return;
> > +      analyze_ssa_escape (fbi, ssa, esc);
> 
> I think it's more cache friendly to walk all stmts instead of all
> SSA names and their immediate uses.  But maybe not.

This is just more convenient.  However, it can be done.

> 
> > +    }
> > +}
> > +
> > +/* Initialize escape analysis structures in the FBI corresponding to FUNC.  */
> > +
> > +static void
> > +create_escape_structures (struct func_body_info *fbi)
> > +{
> > +  tree var, parm;
> > +  unsigned int i, var_idx, var_count = 0;
> > +
> > +  for (parm = DECL_ARGUMENTS (fbi->node->decl);
> > +       parm;
> > +       parm = DECL_CHAIN (parm))
> > +    if (TREE_ADDRESSABLE (parm))
> > +      var_count++;
> > +
> > +  FOR_EACH_LOCAL_DECL (fbi->func, i, var)
> > +    if (TREE_CODE (var) == VAR_DECL && TREE_ADDRESSABLE (var))
> > +      var_count++;
> > +
> > +  fbi->escapes = vNULL;
> > +  fbi->escapes.safe_grow_cleared (SSANAMES (fbi->func)->length () + var_count);
> 
> you want to use reserve_exact first and then grow.

OK

> 
> I miss an overall comment about the used algorithm and its caveats.

I hope the few paragraphs at the top of this email will help to
clarify the algorithm some more.  I will try to add an explanatory
comment to the top of the source code file as well.

> I see the following
>  1) it's not flow or context sensitive
>  2) it doesn't handle local memory as non-escape sites
>  3) it doesn't handle escapes through return (it handles them pessimistically)

Handling returns for the sake of computation of the escape flag would
be probably easy.  But it would cause problems jump function building
that is based on this patch.  We would discover that what we
optimistically considered independent pointers are actually aliases
only at WPA stage and would have to discard all the associated jump
functions anyway.  The jump function building can however have its own
flag (in ipa-cp structures, living only until WPA end).  The situation
can be similar when it comes to one reference passed in two argument
to the same call.

Thinking about it more, perhaps the analysis should not even be called
escape analysis, it really is an automatic detection of restrict
pointer parameters.  Code building and using jump functions wants to
know that the data can only be accessed through pointers it has under
control and knows about.  Using that result in PTA as escape is
possible but the calculated property is more strict than escape.

> 
> IPA-PTA does 2) and 3), of course not 1).
> 
> For what it does this seems to be quite heavy-weight?

I would not say it is heavy-weight.  It does add the scan of SSA
definitions and uses, the latter can be made part of the body sweep.

> 
> What parts are actually carried out at WPA time?  Not escape sites
> it seems but only the actual objects (not) escaping, right?

If I understand correctly then yes, only the escape propagation is
performed at WPA, but it is also performed downwards so...

> 
> How would your algorithm compare to one running at local_pure_const
> time looking at a similar simple set of escapes of its function arguments
> (but using computed fnspec attributes to handle callees already processed)?

...this would not propagate escapes to callees.

> 
> What would be the complication of handling return functions?
> 
> Maybe I missed them sofar, but do you have testcases that show
> actual optimizations that are possible with this?  Do you have
> numbers for SPEC for optimizations that are triggered?

Testcases are mostly in the subsequent patches, this patch has only
one.  I will have a look at the benchmarks some more.

> 
> At least it seems that all this convolutes the existing (already
> convoluted) IPA-PROP engine even more.

I will try to make the ipa-prop.c file more comprehensible and better
divided and documented.

Thanks,

Martin

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 7/7] Plug ipa-prop escape analysis into gimple_call_arg_flags
  2014-05-23 10:03               ` Richard Biener
@ 2014-05-23 22:29                 ` Jan Hubicka
  2014-05-24  7:39                 ` Jan Hubicka
  1 sibling, 0 replies; 29+ messages in thread
From: Jan Hubicka @ 2014-05-23 22:29 UTC (permalink / raw)
  To: Richard Biener; +Cc: Jan Hubicka, GCC Patches

> Well, allocate_struct_function has a abstract_p argument for that.  But
> yes, a simple patch like
> 
> Index: gcc/function.c
> ===================================================================
> --- gcc/function.c      (revision 210845)
> +++ gcc/function.c      (working copy)
> @@ -64,6 +64,7 @@ along with GCC; see the file COPYING3.
>  #include "params.h"
>  #include "bb-reorder.h"
>  #include "shrink-wrap.h"
> +#include "cgraph.h"
> 
>  /* So we can assign to cfun in this file.  */
>  #undef cfun
> @@ -4512,6 +4513,8 @@ allocate_struct_function (tree fndecl, b
> 
>    if (fndecl != NULL_TREE)
>      {
> +      if (!abstract_p)
> +       cgraph_get_create_node (fndecl);
>        DECL_STRUCT_FUNCTION (fndecl) = cfun;
>        cfun->decl = fndecl;
>        current_function_funcdef_no = get_next_funcdef_no ();
> 
> ICEs during bootstrap with (at least)
> 
> /space/rguenther/src/svn/trunk/libgcc/config/i386/cpuinfo.c:405:1:
> error: node differs from symtab decl hashtable
>  }
>  ^
> __get_cpuid_max.constprop.0/42 (__get_cpuid_max.constprop) @0x7ff486232290
>   Type: function definition analyzed
>   Visibility: artificial
>   previous sharing asm name: 43
>   References:
>   Referring:
>   Function __get_cpuid_max.constprop/42 is inline copy in __get_cpuid_output/40
>   Availability: local
>   First run: 0
>   Function flags: local only_called_at_startup
>   Called by: __get_cpuid_output/40 (1.00 per call) (inlined)
>   Calls:
> /space/rguenther/src/svn/trunk/libgcc/config/i386/cpuinfo.c:405:1:
> internal compiler error: verify_cgraph_node failed
> 
> so I guess we would need to have a way to create a "dummy" cgraph
> node first and later populate it properly.

Yep, I was wondering about following:

struct GTY(()) tree_decl_with_vis {
 struct tree_decl_with_rtl common;
 tree assembler_name;
 tree section_name;
 tree comdat_group;

for majority of decl_with_vis we create we won't have section name/assembler
name and comdat group.

We already do memory optimization for INIT_PRIORITY in the following way:
#define DECL_HAS_INIT_PRIORITY_P(NODE) \
  (VAR_DECL_CHECK (NODE)->decl_with_vis.init_priority_p)
#define DECL_INIT_PRIORITY(NODE) \
  (decl_init_priority_lookup (NODE))

where init priority sits on separate hashtab.

I think at the moment we can't move assembler_name/section_name/comat_group
all to symbol table, because we do need the for off-symbol table things
(CONST_DECL, LABEL_DECL, off symtab VAR_DECL).
But I would suggest dropping them to off-tree hashtables, too, and have
pointer in symtab.
The accessor would be then something like

tree get_comdat_group (tree node)
{
  if (!node->decl_with_vis.comdat_group_p)
    return NULL;
  if (node->decl_with_vis.symtab_node)
    return node->decl_with_vis.symtab_node->comdat_group;
  decl_comdat_group_loop (node);
}
> 
> But as we currently have a back-pointer from struct function to fndecl
> it would be nice to hook the cgraph node in there - that way we get
> away without any extra pointer (we could even save symtab decl
> pointer and create a cyclic fndecl -> cgraph -> function -> fndecl
> chain ...).
> 
> I'm fine with enlarging tree_function_decl for now - ideally we'd push
> stuff from it elsewhere (like target and optimization option tree nodes,
> or most of the visibility and symbol related stuff).  Not sure why
> tree_type_decl inherits from tree_decl_non_common (and thus
> tree_decl_with_vis).  Probably because of the non-common parts
> being (ab-)used by FEs.  Otherwise I'd say simply put a symtab
> node pointer into tree_decl_with_vis ... (can we move
> section_name and comdat_group more easily than assembler_name?)

Lets see, I suppose putting it on side first is good incremental step.
Then we need to revisit frontends to avoid defining those too early,
that might be relatively simple to do (at least for C++ FE, I think it all
is defined at a time we call import/export decl.)

Honza
> 
> Richard.
> 
> > Honza
> >>
> >> Richard.
> >>
> >> >I think we may be on better track moving DECL_ASSEMBLER_NAME that is
> >> >calculated later,
> >> >but then we have problem with DECL_ASSEMBLER_NAME being set for
> >> >assembler names and
> >> >const decls, too that still go around symtab.
> >> >Given that decl_assembler_name is a function, I suppose we could go
> >> >with extra conditoinal
> >> >in there.
> >> >
> >> >Getting struct function out of frontend busyness would be nice indeed,
> >> >too, but probably
> >> >should be independent of Martin's work here.
> >> >
> >> >Honza
> >> >>
> >> >> Thanks,
> >> >> Richard.
> >> >>
> >> >> > Thanks,
> >> >> >
> >> >> > Martin
> >> >> >
> >> >> >>
> >> >> >> > +       }
> >> >> >> >      }
> >> >> >> > +
> >> >> >> > +  return ret;
> >> >> >> >  }
> >> >> >> >
> >> >> >> >  /* Detects return flags for the call STMT.  */
> >> >> >> >
> >>

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 7/7] Plug ipa-prop escape analysis into gimple_call_arg_flags
  2014-05-23 10:03               ` Richard Biener
  2014-05-23 22:29                 ` Jan Hubicka
@ 2014-05-24  7:39                 ` Jan Hubicka
  2014-05-26 13:03                   ` Rainer Orth
  1 sibling, 1 reply; 29+ messages in thread
From: Jan Hubicka @ 2014-05-24  7:39 UTC (permalink / raw)
  To: Richard Biener; +Cc: Jan Hubicka, GCC Patches

> I'm fine with enlarging tree_function_decl for now - ideally we'd push
> stuff from it elsewhere (like target and optimization option tree nodes,
> or most of the visibility and symbol related stuff).  Not sure why
> tree_type_decl inherits from tree_decl_non_common (and thus
> tree_decl_with_vis).  Probably because of the non-common parts
> being (ab-)used by FEs.  Otherwise I'd say simply put a symtab
> node pointer into tree_decl_with_vis ... (can we move
> section_name and comdat_group more easily than assembler_name?)

Hi,
this patch removes comdat_group pointer and adds direct symtab pointer.  As
expected, the change is not completely easy. The main uglyness in C++'s version
of duplicate_decl that creates a duplicated decl with duplicated symtab node
now and needs to remove it.  Other problem is copy_node and c's duplicate_decl
that does memcpy on a node and thus also copie the symtab pointer that is not
the right thing to do.

On the other hand on middle-end side several things simplify, so I think overall
the approach works relatively well.

I have bootstrapped/regtested x86_64-linux and I plan to give it more testing
tomorrow and commit if there are no complains.  Incrementally I would like then
to cleanup way the decl_with_vis.symtab_node pointer is maintained.  I do not
want to allow users to tamper with it, so I did not make accessor macro for
it, however there are more direct uses than I would like: I will need to figure
out how to reduce those.

I can also incrementally move DECL_SECTION and some other stuff I think now.

Honza

	* tree-core.h (tree_decl_with_vis): Replace comdat_group by
	symtab_node pointer.
	* tree.c (copy_node_stat): Be sure tonot copy
	symtab_node pointer.
	(find_decls_types_r): Do not walk COMDAT_GROUP.
	* tree.h (DECL_COMDAT_GROUP): Revamp to use decl_comdat_group.
	* varasm.c (make_decl_one_only): Use set_comdat_group;
	create node if needed.
	* ipa-inline-transform.c (save_inline_function_body): Update
	way we decl->symtab mapping.
	* symtab.c (symtab_hash, hash_node, eq_node
	symtab_insert_node_to_hashtable): Remove.
	(symtab_register_node): Update.
	(symtab_unregister_node): Update.
	(symtab_get_node): Reimplement as inline function.
	(symtab_add_to_same_comdat_group): Update.
	(symtab_dissolve_same_comdat_group_list): Update.
	(dump_symtab_base): Update.
	(verify_symtab_base): Update.
	(symtab_make_decl_local): Update.
	(fixup_same_cpp_alias_visibility): Update.
	(symtab_nonoverwritable_alias): Update.
	* cgraphclones.c (set_new_clone_decl_and_node_flags): Update.
	* ipa.c (update_visibility_by_resolution_info): UPdate.
	* bb-reorder.c: Include cgraph.h
	* lto-streamer-out.c (DFS_write_tree_body, hash_tree): Do not deal
	with comdat groups.
	* ipa-comdats.c (set_comdat_group, ipa_comdats): Update.
	* cgraph.c (cgraph_get_create_node): Update.
	* cgraph.h (struct symtab_node): Add get_comdat_group, set_comdat_group
	and comdat_group_.
	(symtab_get_node): Make inline.
	(symtab_insert_node_to_hashtable): Remove.
	(symtab_can_be_discarded): Update.
	(decl_comdat_group): New function.
	* tree-streamer-in.c (lto_input_ts_decl_with_vis_tree_pointers): Update.
	* lto-cgraph.c (lto_output_node, lto_output_varpool_node): Stream out
	comdat group name.
	(read_comdat_group): New function.
	(input_node, input_varpool_node): Use it.
	* trans-mem.c (ipa_tm_create_version_alias): Update code creating
	comdat groups.
	* mips.c (mips_start_unique_function): Likewise.
	(ix86_code_end): Likewise.
	(rs6000_code_end): Likweise.
	* tree-streamer-out.c (DECL_COMDAT_GROUP): Do not stream
	comdat group.

	* lto-symtab.c (lto_symtab_merge_symbols): Update code setting
	symtab pointer.
	* lto.c (compare_tree_sccs_1): Do not compare comdat groups.

	* optmize.c (maybe_thunk_body): Use set_comdat_group.
	(maybe_clone_body): Likewise.
	* decl.c (duplicate_decls): Update code duplicating comdat group;
	do not copy symtab pointer; before freeing newdecl remove it
	from symtab.
	* decl2.c (constrain_visibility): Use set_comdat_group.

	* c-decl.c (merge_decls): Preserve symtab node pointers.
	(duplicate_decls): Free new decl.
Index: tree-core.h
===================================================================
--- tree-core.h	(revision 210887)
+++ tree-core.h	(working copy)
@@ -1442,7 +1442,7 @@ struct GTY(()) tree_decl_with_vis {
  struct tree_decl_with_rtl common;
  tree assembler_name;
  tree section_name;
- tree comdat_group;
+ struct symtab_node *symtab_node;
 
  /* Belong to VAR_DECL exclusively.  */
  unsigned defer_output : 1;
Index: tree.c
===================================================================
--- tree.c	(revision 210887)
+++ tree.c	(working copy)
@@ -972,14 +972,20 @@ copy_node_stat (tree node MEM_STAT_DECL)
 	}
       /* DECL_DEBUG_EXPR is copied explicitely by callers.  */
       if (TREE_CODE (node) == VAR_DECL)
-	DECL_HAS_DEBUG_EXPR_P (t) = 0;
+	{
+	  DECL_HAS_DEBUG_EXPR_P (t) = 0;
+	  t->decl_with_vis.symtab_node = NULL;
+	}
       if (TREE_CODE (node) == VAR_DECL && DECL_HAS_INIT_PRIORITY_P (node))
 	{
 	  SET_DECL_INIT_PRIORITY (t, DECL_INIT_PRIORITY (node));
 	  DECL_HAS_INIT_PRIORITY_P (t) = 1;
 	}
       if (TREE_CODE (node) == FUNCTION_DECL)
-	DECL_STRUCT_FUNCTION (t) = NULL;
+	{
+	  DECL_STRUCT_FUNCTION (t) = NULL;
+	  t->decl_with_vis.symtab_node = NULL;
+	}
     }
   else if (TREE_CODE_CLASS (code) == tcc_type)
     {
@@ -5238,7 +5244,6 @@ find_decls_types_r (tree *tp, int *ws, v
       else if (TREE_CODE (t) == VAR_DECL)
 	{
 	  fld_worklist_push (DECL_SECTION_NAME (t), fld);
-	  fld_worklist_push (DECL_COMDAT_GROUP (t), fld);
 	}
 
       if ((TREE_CODE (t) == VAR_DECL || TREE_CODE (t) == PARM_DECL)
Index: tree.h
===================================================================
--- tree.h	(revision 210887)
+++ tree.h	(working copy)
@@ -2323,7 +2323,7 @@ extern void decl_value_expr_insert (tree
   (DECL_WITH_VIS_CHECK (NODE)->decl_with_vis.comdat_flag)
 
 #define DECL_COMDAT_GROUP(NODE) \
-  (DECL_WITH_VIS_CHECK (NODE)->decl_with_vis.comdat_group)
+  decl_comdat_group (NODE)
 
 /* Used in TREE_PUBLIC decls to indicate that copies of this DECL in
    multiple translation units should be merged.  */
Index: varasm.c
===================================================================
--- varasm.c	(revision 210887)
+++ varasm.c	(working copy)
@@ -5919,17 +5919,23 @@ supports_one_only (void)
 void
 make_decl_one_only (tree decl, tree comdat_group)
 {
+  struct symtab_node *symbol;
   gcc_assert (TREE_CODE (decl) == VAR_DECL
 	      || TREE_CODE (decl) == FUNCTION_DECL);
 
   TREE_PUBLIC (decl) = 1;
 
+  if (TREE_CODE (decl) == VAR_DECL)
+    symbol = varpool_node_for_decl (decl);
+  else
+    symbol = cgraph_get_create_node (decl);
+
   if (SUPPORTS_ONE_ONLY)
     {
 #ifdef MAKE_DECL_ONE_ONLY
       MAKE_DECL_ONE_ONLY (decl);
 #endif
-      DECL_COMDAT_GROUP (decl) = comdat_group;
+      symbol->set_comdat_group (comdat_group);
     }
   else if (TREE_CODE (decl) == VAR_DECL
       && (DECL_INITIAL (decl) == 0 || DECL_INITIAL (decl) == error_mark_node))
Index: ipa-inline-transform.c
===================================================================
--- ipa-inline-transform.c	(revision 210887)
+++ ipa-inline-transform.c	(working copy)
@@ -341,7 +341,7 @@ save_inline_function_body (struct cgraph
   /* first_clone will be turned into real function.  */
   first_clone = node->clones;
   first_clone->decl = copy_node (node->decl);
-  symtab_insert_node_to_hashtable (first_clone);
+  first_clone->decl->decl_with_vis.symtab_node = first_clone;
   gcc_assert (first_clone == cgraph_get_node (first_clone->decl));
 
   /* Now reshape the clone tree, so all other clones descends from
Index: cp/optimize.c
===================================================================
--- cp/optimize.c	(revision 210887)
+++ cp/optimize.c	(working copy)
@@ -285,7 +285,7 @@ maybe_thunk_body (tree fn, bool force)
   else if (HAVE_COMDAT_GROUP)
     {
       tree comdat_group = cdtor_comdat_group (fns[1], fns[0]);
-      DECL_COMDAT_GROUP (fns[0]) = comdat_group;
+      cgraph_get_create_node (fns[0])->set_comdat_group (comdat_group);
       symtab_add_to_same_comdat_group (cgraph_get_create_node (fns[1]),
 				       cgraph_get_create_node (fns[0]));
       symtab_add_to_same_comdat_group (symtab_get_node (fn),
@@ -473,7 +473,7 @@ maybe_clone_body (tree fn)
 	 name of fn was corrupted by write_mangled_name by adding *INTERNAL*
 	 to it. By doing so, it also corrupted the comdat group. */
       if (DECL_ONE_ONLY (fn))
-	DECL_COMDAT_GROUP (clone) = cxx_comdat_group (clone);
+	cgraph_get_create_node (clone)->set_comdat_group (cxx_comdat_group (clone));
       DECL_SECTION_NAME (clone) = DECL_SECTION_NAME (fn);
       DECL_USE_TEMPLATE (clone) = DECL_USE_TEMPLATE (fn);
       DECL_EXTERNAL (clone) = DECL_EXTERNAL (fn);
@@ -550,7 +550,7 @@ maybe_clone_body (tree fn)
 		 into the same, *[CD]5* comdat group instead of
 		 *[CD][12]*.  */
 	      comdat_group = cdtor_comdat_group (fns[1], fns[0]);
-	      DECL_COMDAT_GROUP (fns[0]) = comdat_group;
+	      cgraph_get_create_node (fns[0])->set_comdat_group (comdat_group);
 	      symtab_add_to_same_comdat_group (symtab_get_node (clone),
 					       symtab_get_node (fns[0]));
 	    }
Index: cp/decl.c
===================================================================
--- cp/decl.c	(revision 210887)
+++ cp/decl.c	(working copy)
@@ -2065,8 +2065,17 @@ duplicate_decls (tree newdecl, tree oldd
   /* Merge the storage class information.  */
   merge_weak (newdecl, olddecl);
 
-  if (DECL_ONE_ONLY (olddecl))
-    DECL_COMDAT_GROUP (newdecl) = DECL_COMDAT_GROUP (olddecl);
+  if ((TREE_CODE (olddecl) == FUNCTION_DECL || TREE_CODE (olddecl) == VAR_DECL)
+      && (DECL_EXTERNAL (olddecl) || TREE_PUBLIC (olddecl) || TREE_STATIC (olddecl))
+      && DECL_ONE_ONLY (olddecl))
+    {
+      struct symtab_node *symbol;
+      if (TREE_CODE (olddecl) == FUNCTION_DECL)
+	symbol = cgraph_get_create_node (newdecl);
+      else
+	symbol = varpool_node_for_decl (newdecl);
+      symbol->set_comdat_group (symtab_get_node (olddecl)->get_comdat_group ());
+    }
 
   DECL_DEFER_OUTPUT (newdecl) |= DECL_DEFER_OUTPUT (olddecl);
   TREE_PUBLIC (newdecl) = TREE_PUBLIC (olddecl);
@@ -2376,6 +2385,7 @@ duplicate_decls (tree newdecl, tree oldd
   if (TREE_CODE (newdecl) == FUNCTION_DECL)
     {
       int function_size;
+      struct symtab_node *snode = symtab_get_node (olddecl);
 
       function_size = sizeof (struct tree_decl_common);
 
@@ -2386,6 +2396,10 @@ duplicate_decls (tree newdecl, tree oldd
       memcpy ((char *) olddecl + sizeof (struct tree_decl_common),
 	      (char *) newdecl + sizeof (struct tree_decl_common),
 	      sizeof (struct tree_function_decl) - sizeof (struct tree_decl_common));
+
+      /* Preserve symtab node mapping.  */
+      olddecl->decl_with_vis.symtab_node = snode;
+
       if (new_template_info)
 	/* If newdecl is a template instantiation, it is possible that
 	   the following sequence of events has occurred:
@@ -2415,6 +2429,7 @@ duplicate_decls (tree newdecl, tree oldd
   else
     {
       size_t size = tree_code_size (TREE_CODE (olddecl));
+
       memcpy ((char *) olddecl + sizeof (struct tree_common),
 	      (char *) newdecl + sizeof (struct tree_common),
 	      sizeof (struct tree_decl_common) - sizeof (struct tree_common));
@@ -2428,10 +2443,17 @@ duplicate_decls (tree newdecl, tree oldd
 	case TYPE_DECL:
 	case CONST_DECL:
 	  {
+            struct symtab_node *snode = NULL;
+
+            if (TREE_CODE (olddecl) == VAR_DECL
+		&& (TREE_STATIC (olddecl) || TREE_PUBLIC (olddecl) || DECL_EXTERNAL (olddecl)))
+	      snode = symtab_get_node (olddecl);
 	    memcpy ((char *) olddecl + sizeof (struct tree_decl_common),
 		    (char *) newdecl + sizeof (struct tree_decl_common),
 		    size - sizeof (struct tree_decl_common)
 		    + TREE_CODE_LENGTH (TREE_CODE (newdecl)) * sizeof (char *));
+            if (TREE_CODE (olddecl) == VAR_DECL)
+	      olddecl->decl_with_vis.symtab_node = snode;
 	  }
 	  break;
 	default:
@@ -2466,7 +2488,21 @@ duplicate_decls (tree newdecl, tree oldd
 
   /* The NEWDECL will no longer be needed.  Because every out-of-class
      declaration of a member results in a call to duplicate_decls,
-     freeing these nodes represents in a significant savings.  */
+     freeing these nodes represents in a significant savings.
+
+     Before releasing the node, be sore to remove function from symbol
+     table that might have been inserted there to record comdat group.
+     Be sure to however do not free DECL_STRUCT_FUNCTION becuase this
+     structure is shared in between newdecl and oldecl.  */
+  if (TREE_CODE (newdecl) == FUNCTION_DECL)
+    DECL_STRUCT_FUNCTION (newdecl) = NULL;
+  if (TREE_CODE (newdecl) == FUNCTION_DECL
+      || TREE_CODE (newdecl) == VAR_DECL)
+    {
+      struct symtab_node *snode = symtab_get_node (newdecl);
+      if (snode)
+	symtab_remove_node (snode);
+    }
   ggc_free (newdecl);
 
   return olddecl;
Index: cp/decl2.c
===================================================================
--- cp/decl2.c	(revision 210887)
+++ cp/decl2.c	(working copy)
@@ -2093,7 +2093,14 @@ constrain_visibility (tree decl, int vis
 	  TREE_PUBLIC (decl) = 0;
 	  DECL_WEAK (decl) = 0;
 	  DECL_COMMON (decl) = 0;
-	  DECL_COMDAT_GROUP (decl) = NULL_TREE;
+	  if (TREE_CODE (decl) == FUNCTION_DECL
+	      || TREE_CODE (decl) == VAR_DECL)
+	    {
+	      struct symtab_node *snode = symtab_get_node (decl);
+
+	      if (snode)
+	        snode->set_comdat_group (NULL);
+	    }
 	  DECL_INTERFACE_KNOWN (decl) = 1;
 	  if (DECL_LANG_SPECIFIC (decl))
 	    DECL_NOT_REALLY_EXTERN (decl) = 1;
Index: symtab.c
===================================================================
--- symtab.c	(revision 210887)
+++ symtab.c	(working copy)
@@ -57,8 +57,6 @@ const char * const ld_plugin_symbol_reso
   "prevailing_def_ironly_exp"
 };
 
-/* Hash table used to convert declarations into nodes.  */
-static GTY((param_is (symtab_node))) htab_t symtab_hash;
 /* Hash table used to convert assembler names into nodes.  */
 static GTY((param_is (symtab_node))) htab_t assembler_name_hash;
 
@@ -70,26 +68,6 @@ symtab_node *symtab_nodes;
    them, to support -fno-toplevel-reorder.  */
 int symtab_order;
 
-/* Returns a hash code for P.  */
-
-static hashval_t
-hash_node (const void *p)
-{
-  const symtab_node *n = (const symtab_node *) p;
-  return (hashval_t) DECL_UID (n->decl);
-}
-
-
-/* Returns nonzero if P1 and P2 are equal.  */
-
-static int
-eq_node (const void *p1, const void *p2)
-{
-  const symtab_node *n1 = (const symtab_node *) p1;
-  const symtab_node *n2 = (const symtab_node *) p2;
-  return DECL_UID (n1->decl) == DECL_UID (n2->decl);
-}
-
 /* Hash asmnames ignoring the user specified marks.  */
 
 static hashval_t
@@ -282,21 +260,14 @@ symtab_prevail_in_asm_name_hash (symtab_
 void
 symtab_register_node (symtab_node *node)
 {
-  struct symtab_node key;
-  symtab_node **slot;
-
   node->next = symtab_nodes;
   node->previous = NULL;
   if (symtab_nodes)
     symtab_nodes->previous = node;
   symtab_nodes = node;
 
-  if (!symtab_hash)
-    symtab_hash = htab_create_ggc (10, hash_node, eq_node, NULL);
-  key.decl = node->decl;
-  slot = (symtab_node **) htab_find_slot (symtab_hash, &key, INSERT);
-  if (*slot == NULL)
-    *slot = node;
+  if (!node->decl->decl_with_vis.symtab_node)
+    node->decl->decl_with_vis.symtab_node = node;
 
   ipa_empty_ref_list (&node->ref_list);
 
@@ -307,22 +278,6 @@ symtab_register_node (symtab_node *node)
   insert_to_assembler_name_hash (node, false);
 }
 
-/* Make NODE to be the one symtab hash is pointing to.  Used when reshaping tree
-   of inline clones.  */
-
-void
-symtab_insert_node_to_hashtable (symtab_node *node)
-{
-  struct symtab_node key;
-  symtab_node **slot;
-
-  if (!symtab_hash)
-    symtab_hash = htab_create_ggc (10, hash_node, eq_node, NULL);
-  key.decl = node->decl;
-  slot = (symtab_node **) htab_find_slot (symtab_hash, &key, INSERT);
-  *slot = node;
-}
-
 /* Remove NODE from same comdat group.   */
 
 void
@@ -349,7 +304,6 @@ symtab_remove_from_same_comdat_group (sy
 void
 symtab_unregister_node (symtab_node *node)
 {
-  void **slot;
   ipa_remove_all_references (&node->ref_list);
   ipa_remove_all_referring (&node->ref_list);
 
@@ -364,55 +318,20 @@ symtab_unregister_node (symtab_node *nod
   node->next = NULL;
   node->previous = NULL;
 
-  slot = htab_find_slot (symtab_hash, node, NO_INSERT);
-
   /* During LTO symtab merging we temporarily corrupt decl to symtab node
      hash.  */
-  gcc_assert ((slot && *slot) || in_lto_p);
-  if (slot && *slot && *slot == node)
+  gcc_assert (node->decl->decl_with_vis.symtab_node || in_lto_p);
+  if (node->decl->decl_with_vis.symtab_node == node)
     {
       symtab_node *replacement_node = NULL;
       if (cgraph_node *cnode = dyn_cast <cgraph_node *> (node))
 	replacement_node = cgraph_find_replacement_node (cnode);
-      if (!replacement_node)
-	htab_clear_slot (symtab_hash, slot);
-      else
-	*slot = replacement_node;
+      node->decl->decl_with_vis.symtab_node = replacement_node;
     }
   if (!is_a <varpool_node *> (node) || !DECL_HARD_REGISTER (node->decl))
     unlink_from_assembler_name_hash (node, false);
 }
 
-/* Return symbol table node associated with DECL, if any,
-   and NULL otherwise.  */
-
-symtab_node *
-symtab_get_node (const_tree decl)
-{
-  symtab_node **slot;
-  struct symtab_node key;
-
-#ifdef ENABLE_CHECKING
-  /* Check that we are called for sane type of object - functions
-     and static or external variables.  */
-  gcc_checking_assert (TREE_CODE (decl) == FUNCTION_DECL
-		       || (TREE_CODE (decl) == VAR_DECL
-			   && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)
-			       || in_lto_p)));
-#endif
-
-  if (!symtab_hash)
-    return NULL;
-
-  key.decl = CONST_CAST2 (tree, const_tree, decl);
-
-  slot = (symtab_node **) htab_find_slot (symtab_hash, &key,
-					 NO_INSERT);
-
-  if (slot)
-    return *slot;
-  return NULL;
-}
 
 /* Remove symtab NODE from the symbol table.  */
 
@@ -513,11 +432,11 @@ void
 symtab_add_to_same_comdat_group (symtab_node *new_node,
 				 symtab_node *old_node)
 {
-  gcc_assert (DECL_COMDAT_GROUP (old_node->decl));
+  gcc_assert (old_node->get_comdat_group ());
   gcc_assert (!new_node->same_comdat_group);
   gcc_assert (new_node != old_node);
 
-  DECL_COMDAT_GROUP (new_node->decl) = DECL_COMDAT_GROUP (old_node->decl);
+  new_node->set_comdat_group (old_node->get_comdat_group ());
   new_node->same_comdat_group = old_node;
   if (!old_node->same_comdat_group)
     old_node->same_comdat_group = new_node;
@@ -546,10 +465,10 @@ symtab_dissolve_same_comdat_group_list (
     {
       next = n->same_comdat_group;
       n->same_comdat_group = NULL;
-      /* Clear DECL_COMDAT_GROUP for comdat locals, since
+      /* Clear comdat_group for comdat locals, since
          make_decl_local doesn't.  */
       if (!TREE_PUBLIC (n->decl))
-	DECL_COMDAT_GROUP (n->decl) = NULL_TREE;
+	n->set_comdat_group (NULL);
       n = next;
     }
   while (n != node);
@@ -639,9 +558,9 @@ dump_symtab_base (FILE *f, symtab_node *
     fprintf (f, " dll_import");
   if (DECL_COMDAT (node->decl))
     fprintf (f, " comdat");
-  if (DECL_COMDAT_GROUP (node->decl))
+  if (node->get_comdat_group ())
     fprintf (f, " comdat_group:%s",
-	     IDENTIFIER_POINTER (DECL_COMDAT_GROUP (node->decl)));
+	     IDENTIFIER_POINTER (node->get_comdat_group ()));
   if (DECL_ONE_ONLY (node->decl))
     fprintf (f, " one_only");
   if (DECL_SECTION_NAME (node->decl))
@@ -766,7 +685,7 @@ verify_symtab_base (symtab_node *node)
       hashed_node = symtab_get_node (node->decl);
       if (!hashed_node)
 	{
-	  error ("node not found in symtab decl hashtable");
+	  error ("node not found node->decl->decl_with_vis.symtab_node");
 	  error_found = true;
 	}
       if (hashed_node != node
@@ -775,7 +694,7 @@ verify_symtab_base (symtab_node *node)
 	      || dyn_cast <cgraph_node *> (node)->clone_of->decl
 		 != node->decl))
 	{
-	  error ("node differs from symtab decl hashtable");
+	  error ("node differs from node->decl->decl_with_vis.symtab_node");
 	  error_found = true;
 	}
     }
@@ -832,12 +751,12 @@ verify_symtab_base (symtab_node *node)
     {
       symtab_node *n = node->same_comdat_group;
 
-      if (!DECL_COMDAT_GROUP (n->decl))
+      if (!n->get_comdat_group ())
 	{
-	  error ("node is in same_comdat_group list but has no DECL_COMDAT_GROUP");
+	  error ("node is in same_comdat_group list but has no comdat_group");
 	  error_found = true;
 	}
-      if (DECL_COMDAT_GROUP (n->decl) != DECL_COMDAT_GROUP (node->same_comdat_group->decl))
+      if (n->get_comdat_group () != node->get_comdat_group ())
 	{
 	  error ("same_comdat_group list across different groups");
 	  error_found = true;
@@ -950,7 +869,7 @@ symtab_make_decl_local (tree decl)
 {
   rtx rtl, symbol;
 
-  /* Avoid clearing DECL_COMDAT_GROUP on comdat-local decls.  */
+  /* Avoid clearing comdat_groups on comdat-local decls.  */
   if (TREE_PUBLIC (decl) == 0)
     return;
 
@@ -958,12 +877,11 @@ symtab_make_decl_local (tree decl)
     DECL_COMMON (decl) = 0;
   else gcc_assert (TREE_CODE (decl) == FUNCTION_DECL);
 
-  if (DECL_COMDAT_GROUP (decl) || DECL_COMDAT (decl))
+  if (DECL_COMDAT (decl))
     {
       DECL_SECTION_NAME (decl) = 0;
       DECL_COMDAT (decl) = 0;
     }
-  DECL_COMDAT_GROUP (decl) = 0;
   DECL_WEAK (decl) = 0;
   DECL_EXTERNAL (decl) = 0;
   DECL_VISIBILITY_SPECIFIED (decl) = 0;
@@ -1097,11 +1015,13 @@ fixup_same_cpp_alias_visibility (symtab_
   DECL_VIRTUAL_P (node->decl) = DECL_VIRTUAL_P (target->decl);
   if (TREE_PUBLIC (node->decl))
     {
+      tree group;
+
       DECL_EXTERNAL (node->decl) = DECL_EXTERNAL (target->decl);
       DECL_COMDAT (node->decl) = DECL_COMDAT (target->decl);
-      DECL_COMDAT_GROUP (node->decl)
-	 = DECL_COMDAT_GROUP (target->decl);
-      if (DECL_COMDAT_GROUP (target->decl)
+      group = target->get_comdat_group ();
+      node->set_comdat_group (group);
+      if (group
 	  && !node->same_comdat_group)
 	symtab_add_to_same_comdat_group (node, target);
     }
@@ -1231,9 +1151,6 @@ symtab_nonoverwritable_alias (symtab_nod
 
   /* Update the properties.  */
   DECL_EXTERNAL (new_decl) = 0;
-  if (DECL_COMDAT_GROUP (node->decl))
-    DECL_SECTION_NAME (new_decl) = NULL;
-  DECL_COMDAT_GROUP (new_decl) = 0;
   TREE_PUBLIC (new_decl) = 0;
   DECL_COMDAT (new_decl) = 0;
   DECL_WEAK (new_decl) = 0;
@@ -1246,8 +1163,7 @@ symtab_nonoverwritable_alias (symtab_nod
 				 (new_decl, node->decl);
     }
   else
-    new_node = varpool_create_variable_alias (new_decl,
-							    node->decl);
+    new_node = varpool_create_variable_alias (new_decl, node->decl);
   symtab_resolve_alias (new_node, node);  
   gcc_assert (decl_binds_to_current_def_p (new_decl));
   return new_node;
Index: cgraphclones.c
===================================================================
--- cgraphclones.c	(revision 210887)
+++ cgraphclones.c	(working copy)
@@ -283,7 +283,6 @@ static void
 set_new_clone_decl_and_node_flags (cgraph_node *new_node)
 {
   DECL_EXTERNAL (new_node->decl) = 0;
-  DECL_COMDAT_GROUP (new_node->decl) = 0;
   TREE_PUBLIC (new_node->decl) = 0;
   DECL_COMDAT (new_node->decl) = 0;
   DECL_WEAK (new_node->decl) = 0;
@@ -558,7 +557,7 @@ cgraph_create_virtual_clone (struct cgra
      that is not weak also.
      ??? We cannot use COMDAT linkage because there is no
      ABI support for this.  */
-  if (DECL_COMDAT_GROUP (old_decl))
+  if (old_node->get_comdat_group ())
     DECL_SECTION_NAME (new_node->decl) = NULL;
   set_new_clone_decl_and_node_flags (new_node);
   new_node->clone.tree_map = tree_map;
Index: ipa.c
===================================================================
--- ipa.c	(revision 210887)
+++ ipa.c	(working copy)
@@ -1021,13 +1021,13 @@ update_visibility_by_resolution_info (sy
     for (symtab_node *next = node->same_comdat_group;
 	 next != node; next = next->same_comdat_group)
       {
-	DECL_COMDAT_GROUP (next->decl) = NULL;
+	next->set_comdat_group (NULL);
 	DECL_WEAK (next->decl) = false;
 	if (next->externally_visible
 	    && !define)
 	  DECL_EXTERNAL (next->decl) = true;
       }
-  DECL_COMDAT_GROUP (node->decl) = NULL;
+  node->set_comdat_group (NULL);
   DECL_WEAK (node->decl) = false;
   if (!define)
     DECL_EXTERNAL (node->decl) = true;
@@ -1163,8 +1163,7 @@ function_and_variable_visibility (bool w
 	    {
 	      gcc_checking_assert (DECL_COMDAT (node->decl)
 				   == DECL_COMDAT (decl_node->decl));
-	      gcc_checking_assert (DECL_COMDAT_GROUP (node->decl)
-				   == DECL_COMDAT_GROUP (decl_node->decl));
+	      gcc_checking_assert (symtab_in_same_comdat_p (node, decl_node));
 	      gcc_checking_assert (node->same_comdat_group);
 	    }
 	  node->forced_by_abi = decl_node->forced_by_abi;
Index: bb-reorder.c
===================================================================
--- bb-reorder.c	(revision 210887)
+++ bb-reorder.c	(working copy)
@@ -99,6 +99,7 @@
 #include "tree-pass.h"
 #include "df.h"
 #include "bb-reorder.h"
+#include "cgraph.h"
 #include "except.h"
 
 /* The number of rounds.  In most cases there will only be 4 rounds, but
Index: lto-streamer-out.c
===================================================================
--- lto-streamer-out.c	(revision 210887)
+++ lto-streamer-out.c	(working copy)
@@ -535,7 +535,6 @@ DFS_write_tree_body (struct output_block
       if (DECL_ASSEMBLER_NAME_SET_P (expr))
 	DFS_follow_tree_edge (DECL_ASSEMBLER_NAME (expr));
       DFS_follow_tree_edge (DECL_SECTION_NAME (expr));
-      DFS_follow_tree_edge (DECL_COMDAT_GROUP (expr));
     }
 
   if (CODE_CONTAINS_STRUCT (code, TS_FIELD_DECL))
@@ -974,7 +973,6 @@ hash_tree (struct streamer_tree_cache_d
       if (DECL_ASSEMBLER_NAME_SET_P (t))
 	visit (DECL_ASSEMBLER_NAME (t));
       visit (DECL_SECTION_NAME (t));
-      visit (DECL_COMDAT_GROUP (t));
     }
 
   if (CODE_CONTAINS_STRUCT (code, TS_FIELD_DECL))
Index: ipa-comdats.c
===================================================================
--- ipa-comdats.c	(revision 210887)
+++ ipa-comdats.c	(working copy)
@@ -202,8 +202,8 @@ set_comdat_group (symtab_node *symbol,
 {
   symtab_node *head = (symtab_node *)head_p;
 
-  gcc_assert (!DECL_COMDAT_GROUP (symbol->decl));
-  DECL_COMDAT_GROUP (symbol->decl) = DECL_COMDAT_GROUP (head->decl);
+  gcc_assert (!symbol->get_comdat_group ());
+  symbol->set_comdat_group (head->get_comdat_group ());
   symtab_add_to_same_comdat_group (symbol, head);
   return false;
 }
@@ -218,6 +218,7 @@ ipa_comdats (void)
   symtab_node *symbol;
   bool comdat_group_seen = false;
   symtab_node *first = (symtab_node *) (void *) 1;
+  tree group;
 
   /* Start the dataflow by assigning comdat group to symbols that are in comdat
      groups already.  All other externally visible symbols must stay, we use
@@ -226,10 +227,10 @@ ipa_comdats (void)
   FOR_EACH_DEFINED_SYMBOL (symbol)
     if (!symtab_real_symbol_p (symbol))
       ;
-    else if (DECL_COMDAT_GROUP (symbol->decl))
+    else if ((group = symbol->get_comdat_group ()) != NULL)
       {
-        *map.insert (symbol) = DECL_COMDAT_GROUP (symbol->decl);
-        *comdat_head_map.insert (DECL_COMDAT_GROUP (symbol->decl)) = symbol;
+        *map.insert (symbol) = group;
+        *comdat_head_map.insert (group) = symbol;
 	comdat_group_seen = true;
 
 	/* Mark the symbol so we won't waste time visiting it for dataflow.  */
@@ -313,7 +314,7 @@ ipa_comdats (void)
   FOR_EACH_DEFINED_SYMBOL (symbol)
     {
       symbol->aux = NULL; 
-      if (!DECL_COMDAT_GROUP (symbol->decl)
+      if (!symbol->get_comdat_group ()
 	  && !symbol->alias
 	  && symtab_real_symbol_p (symbol))
 	{
Index: cgraph.c
===================================================================
--- cgraph.c	(revision 210887)
+++ cgraph.c	(working copy)
@@ -565,7 +565,7 @@ cgraph_get_create_node (tree decl)
       first_clone->clone_of = node;
       node->clones = first_clone;
       symtab_prevail_in_asm_name_hash (node);
-      symtab_insert_node_to_hashtable (node);
+      node->decl->decl_with_vis.symtab_node = node;
       if (dump_file)
 	fprintf (dump_file, "Introduced new external node "
 		 "(%s/%i) and turned into root of the clone tree.\n",
Index: cgraph.h
===================================================================
--- cgraph.h	(revision 210887)
+++ cgraph.h	(working copy)
@@ -141,6 +141,18 @@ public:
   /* Circular list of nodes in the same comdat group if non-NULL.  */
   symtab_node *same_comdat_group;
 
+  /* Return comdat group.  */
+  tree get_comdat_group ()
+    {
+      return comdat_group_;
+    }
+
+  /* Set comdat group.  */
+  void set_comdat_group (tree group)
+    {
+      comdat_group_ = group;
+    }
+
   /* Vectors of referring and referenced entities.  */
   struct ipa_ref_list ref_list;
 
@@ -153,6 +165,9 @@ public:
   struct lto_file_decl_data * lto_file_data;
 
   PTR GTY ((skip)) aux;
+
+  /* Comdat group the symbol is in.  Can be private if GGC allowed that.  */
+  tree comdat_group_;
 };
 
 enum availability
@@ -727,9 +742,7 @@ void symtab_register_node (symtab_node *
 void symtab_unregister_node (symtab_node *);
 void symtab_remove_from_same_comdat_group (symtab_node *);
 void symtab_remove_node (symtab_node *);
-symtab_node *symtab_get_node (const_tree);
 symtab_node *symtab_node_for_asm (const_tree asmname);
-void symtab_insert_node_to_hashtable (symtab_node *);
 void symtab_add_to_same_comdat_group (symtab_node *, symtab_node *);
 void symtab_dissolve_same_comdat_group_list (symtab_node *node);
 void dump_symtab (FILE *);
@@ -989,6 +1002,28 @@ void varpool_remove_initializer (varpool
 /* In cgraph.c */
 extern void change_decl_assembler_name (tree, tree);
 
+/* Return symbol table node associated with DECL, if any,
+   and NULL otherwise.  */
+
+static inline symtab_node *
+symtab_get_node (const_tree decl)
+{
+#ifdef ENABLE_CHECKING
+  /* Check that we are called for sane type of object - functions
+     and static or external variables.  */
+  gcc_checking_assert (TREE_CODE (decl) == FUNCTION_DECL
+		       || (TREE_CODE (decl) == VAR_DECL
+			   && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)
+			       || in_lto_p)));
+  /* Check that the mapping is sane - perhaps this check can go away,
+     but at the moment frontends tends to corrupt the mapping by calling
+     memcpy/memset on the tree nodes.  */
+  gcc_checking_assert (!decl->decl_with_vis.symtab_node
+		       || decl->decl_with_vis.symtab_node->decl == decl);
+#endif
+  return decl->decl_with_vis.symtab_node;
+}
+
 /* Return callgraph node for given symbol and check it is a function. */
 static inline struct cgraph_node *
 cgraph (symtab_node *node)
@@ -1548,7 +1583,7 @@ static inline bool
 symtab_can_be_discarded (symtab_node *node)
 {
   return (DECL_EXTERNAL (node->decl)
-	  || (DECL_ONE_ONLY (node->decl)
+	  || (node->get_comdat_group ()
 	      && node->resolution != LDPR_PREVAILING_DEF
 	      && node->resolution != LDPR_PREVAILING_DEF_IRONLY
 	      && node->resolution != LDPR_PREVAILING_DEF_IRONLY_EXP));
@@ -1580,6 +1615,16 @@ symtab_in_same_comdat_p (symtab_node *on
 	two = cn->global.inlined_to;
     }
 
-  return DECL_COMDAT_GROUP (one->decl) == DECL_COMDAT_GROUP (two->decl);
+  return one->get_comdat_group () == two->get_comdat_group ();
+}
+
+/* Return comdat group of DECL.  */
+static inline tree
+decl_comdat_group (tree node)
+{
+  struct symtab_node *snode = symtab_get_node (node);
+  if (!snode)
+    return NULL;
+  return snode->get_comdat_group ();
 }
 #endif  /* GCC_CGRAPH_H  */
Index: tree-streamer-in.c
===================================================================
--- tree-streamer-in.c	(revision 210887)
+++ tree-streamer-in.c	(working copy)
@@ -760,7 +760,6 @@ lto_input_ts_decl_with_vis_tree_pointers
     }
 
   DECL_SECTION_NAME (expr) = stream_read_tree (ib, data_in);
-  DECL_COMDAT_GROUP (expr) = stream_read_tree (ib, data_in);
 }
 
 
Index: lto-cgraph.c
===================================================================
--- lto-cgraph.c	(revision 210887)
+++ lto-cgraph.c	(working copy)
@@ -395,6 +395,8 @@ lto_output_node (struct lto_simple_outpu
   ipa_opt_pass_d *pass;
   int i;
   bool alias_p;
+  const char *comdat;
+  tree group;
 
   boundary_p = !lto_symtab_encoder_in_partition_p (encoder, node);
 
@@ -478,15 +480,24 @@ lto_output_node (struct lto_simple_outpu
       streamer_write_hwi_stream (ob->main_stream, ref);
     }
 
-  if (node->same_comdat_group && !boundary_p)
+  group = node->get_comdat_group ();
+  if (group)
+    comdat = IDENTIFIER_POINTER (group);
+  else
+    comdat = "";
+  lto_output_data_stream (ob->main_stream, comdat, strlen (comdat) + 1);
+  if (group)
     {
-      ref = lto_symtab_encoder_lookup (encoder,
-				       node->same_comdat_group);
-      gcc_assert (ref != LCC_NOT_FOUND);
+      if (node->same_comdat_group && !boundary_p)
+	{
+	  ref = lto_symtab_encoder_lookup (encoder,
+					   node->same_comdat_group);
+	  gcc_assert (ref != LCC_NOT_FOUND);
+	}
+      else
+	ref = LCC_NOT_FOUND;
+      streamer_write_hwi_stream (ob->main_stream, ref);
     }
-  else
-    ref = LCC_NOT_FOUND;
-  streamer_write_hwi_stream (ob->main_stream, ref);
 
   streamer_write_hwi_stream (ob->main_stream, node->tp_first_run);
 
@@ -551,6 +562,8 @@ lto_output_varpool_node (struct lto_simp
   struct bitpack_d bp;
   int ref;
   bool alias_p;
+  const char *comdat;
+  tree group;
 
   streamer_write_enum (ob->main_stream, LTO_symtab_tags, LTO_symtab_last_tag,
 		       LTO_symtab_variable);
@@ -587,15 +600,24 @@ lto_output_varpool_node (struct lto_simp
 	  /* in_other_partition.  */
     }
   streamer_write_bitpack (&bp);
-  if (node->same_comdat_group && !boundary_p)
+  group = node->get_comdat_group ();
+  if (group)
+    comdat = IDENTIFIER_POINTER (group);
+  else
+    comdat = "";
+  lto_output_data_stream (ob->main_stream, comdat, strlen (comdat) + 1);
+  if (group)
     {
-      ref = lto_symtab_encoder_lookup (encoder,
-				       node->same_comdat_group);
-      gcc_assert (ref != LCC_NOT_FOUND);
+      if (node->same_comdat_group && !boundary_p)
+	{
+	  ref = lto_symtab_encoder_lookup (encoder,
+					   node->same_comdat_group);
+	  gcc_assert (ref != LCC_NOT_FOUND);
+	}
+      else
+	ref = LCC_NOT_FOUND;
+      streamer_write_hwi_stream (ob->main_stream, ref);
     }
-  else
-    ref = LCC_NOT_FOUND;
-  streamer_write_hwi_stream (ob->main_stream, ref);
   streamer_write_enum (ob->main_stream, ld_plugin_symbol_resolution,
 		       LDPR_NUM_KNOWN, node->resolution);
 }
@@ -946,6 +968,26 @@ output_symtab (void)
   output_refs (encoder);
 }
 
+/* Return COMDAT_GROUP encoded in IB as a plain string.  */
+
+static tree
+read_comdat_group (struct lto_input_block *ib)
+{
+  unsigned int len = strnlen (ib->data + ib->p, ib->len - ib->p - 1);
+  tree group;
+
+  if (ib->data[ib->p + len])
+    lto_section_overrun (ib);
+  if (!len)
+    {
+      ib->p++;
+      return NULL;
+    }
+  group = get_identifier (ib->data + ib->p);
+  ib->p += len;
+  return group;
+}
+
 /* Overwrite the information in NODE based on FILE_DATA, TAG, FLAGS,
    STACK_SIZE, SELF_TIME and SELF_SIZE.  This is called either to initialize
    NODE or to replace the values in it, for instance because the first
@@ -1034,6 +1076,7 @@ input_node (struct lto_file_decl_data *f
   int clone_ref;
   int order;
   int i, count;
+  tree group;
 
   order = streamer_read_hwi (ib) + order_base;
   clone_ref = streamer_read_hwi (ib);
@@ -1079,7 +1122,9 @@ input_node (struct lto_file_decl_data *f
   if (tag == LTO_symtab_analyzed_node)
     ref = streamer_read_hwi (ib);
 
-  ref2 = streamer_read_hwi (ib);
+  group = read_comdat_group (ib);
+  if (group)
+    ref2 = streamer_read_hwi (ib);
 
   /* Make sure that we have not read this node before.  Nodes that
      have already been read will have their tag stored in the 'aux'
@@ -1098,8 +1143,14 @@ input_node (struct lto_file_decl_data *f
   /* Store a reference for now, and fix up later to be a pointer.  */
   node->global.inlined_to = (cgraph_node_ptr) (intptr_t) ref;
 
-  /* Store a reference for now, and fix up later to be a pointer.  */
-  node->same_comdat_group = (symtab_node *) (intptr_t) ref2;
+  if (group)
+    {
+      node->set_comdat_group (group);
+      /* Store a reference for now, and fix up later to be a pointer.  */
+      node->same_comdat_group = (symtab_node *) (intptr_t) ref2;
+    }
+  else
+    node->same_comdat_group = (symtab_node *) (intptr_t) LCC_NOT_FOUND;
 
   if (node->thunk.thunk_p)
     {
@@ -1131,6 +1182,7 @@ input_varpool_node (struct lto_file_decl
   struct bitpack_d bp;
   int ref = LCC_NOT_FOUND;
   int order;
+  tree group;
 
   order = streamer_read_hwi (ib) + order_base;
   decl_index = streamer_read_uhwi (ib);
@@ -1168,9 +1220,16 @@ input_varpool_node (struct lto_file_decl
     }
   if (node->alias && !node->analyzed && node->weakref)
     node->alias_target = get_alias_symbol (node->decl);
-  ref = streamer_read_hwi (ib);
-  /* Store a reference for now, and fix up later to be a pointer.  */
-  node->same_comdat_group = (symtab_node *) (intptr_t) ref;
+  group = read_comdat_group (ib);
+  if (group)
+    {
+      node->set_comdat_group (group);
+      ref = streamer_read_hwi (ib);
+      /* Store a reference for now, and fix up later to be a pointer.  */
+      node->same_comdat_group = (symtab_node *) (intptr_t) ref;
+    }
+  else
+    node->same_comdat_group = (symtab_node *) (intptr_t) LCC_NOT_FOUND;
   node->resolution = streamer_read_enum (ib, ld_plugin_symbol_resolution,
 					        LDPR_NUM_KNOWN);
   gcc_assert (flag_ltrans
Index: c/c-decl.c
===================================================================
--- c/c-decl.c	(revision 210887)
+++ c/c-decl.c	(working copy)
@@ -2507,8 +2507,18 @@ merge_decls (tree newdecl, tree olddecl,
     switch (TREE_CODE (olddecl))
       {
       case FUNCTION_DECL:
-      case FIELD_DECL:
       case VAR_DECL:
+	{
+	  struct symtab_node *snode = olddecl->decl_with_vis.symtab_node;
+
+	  memcpy ((char *) olddecl + sizeof (struct tree_decl_common),
+		  (char *) newdecl + sizeof (struct tree_decl_common),
+		  tree_code_size (TREE_CODE (olddecl)) - sizeof (struct tree_decl_common));
+	  olddecl->decl_with_vis.symtab_node = snode;
+	  break;
+	}
+
+      case FIELD_DECL:
       case PARM_DECL:
       case LABEL_DECL:
       case RESULT_DECL:
@@ -2561,6 +2571,9 @@ duplicate_decls (tree newdecl, tree oldd
     }
 
   merge_decls (newdecl, olddecl, newtype, oldtype);
+
+  /* The NEWDECL will no longer be needed.  */
+  ggc_free (newdecl);
   return true;
 }
 
Index: trans-mem.c
===================================================================
--- trans-mem.c	(revision 210887)
+++ trans-mem.c	(working copy)
@@ -4852,7 +4852,7 @@ ipa_tm_create_version_alias (struct cgra
 
   /* Perform the same remapping to the comdat group.  */
   if (DECL_ONE_ONLY (new_decl))
-    DECL_COMDAT_GROUP (new_decl) = tm_mangle (DECL_COMDAT_GROUP (old_decl));
+    varpool_get_node (new_decl)->set_comdat_group (tm_mangle (DECL_COMDAT_GROUP (old_decl)));
 
   new_node = cgraph_same_body_alias (NULL, new_decl, info->new_decl);
   new_node->tm_clone = true;
@@ -4892,7 +4892,7 @@ ipa_tm_create_version (struct cgraph_nod
 
   /* Perform the same remapping to the comdat group.  */
   if (DECL_ONE_ONLY (new_decl))
-    DECL_COMDAT_GROUP (new_decl) = tm_mangle (DECL_COMDAT_GROUP (old_decl));
+    varpool_get_node (new_decl)->set_comdat_group (tm_mangle (DECL_COMDAT_GROUP (old_decl)));
 
   gcc_assert (!old_node->ipa_transforms_to_apply.exists ());
   new_node = cgraph_copy_node_for_versioning (old_node, new_decl, vNULL, NULL);
Index: config/mips/mips.c
===================================================================
--- config/mips/mips.c	(revision 210887)
+++ config/mips/mips.c	(working copy)
@@ -6275,7 +6275,7 @@ mips_start_unique_function (const char *
   TREE_PUBLIC (decl) = 1;
   TREE_STATIC (decl) = 1;
 
-  DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
+  cgraph_create_node (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
 
   targetm.asm_out.unique_section (decl, 0);
   switch_to_section (get_named_section (decl, NULL, 0));
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c	(revision 210887)
+++ config/i386/i386.c	(working copy)
@@ -9183,7 +9183,7 @@ ix86_code_end (void)
 #endif
       if (USE_HIDDEN_LINKONCE)
 	{
-	  DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
+	  cgraph_create_node (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
 
 	  targetm.asm_out.unique_section (decl, 0);
 	  switch_to_section (get_named_section (decl, NULL, 0));
Index: config/rs6000/rs6000.c
===================================================================
--- config/rs6000/rs6000.c	(revision 210887)
+++ config/rs6000/rs6000.c	(working copy)
@@ -32331,7 +32331,7 @@ rs6000_code_end (void)
 #if RS6000_WEAK
   if (USE_HIDDEN_LINKONCE)
     {
-      DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
+      cgraph_create_node (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
       targetm.asm_out.unique_section (decl, 0);
       switch_to_section (get_named_section (decl, NULL, 0));
       DECL_WEAK (decl) = 1;
Index: lto/lto-symtab.c
===================================================================
--- lto/lto-symtab.c	(revision 210887)
+++ lto/lto-symtab.c	(working copy)
@@ -644,7 +644,7 @@ lto_symtab_merge_symbols (void)
 		       && cnode2 != cnode)
 		cgraph_remove_node (cnode2);
 
-	      symtab_insert_node_to_hashtable (node);
+	      node->decl->decl_with_vis.symtab_node = node;
 	    }
 	}
     }
Index: lto/lto.c
===================================================================
--- lto/lto.c	(revision 210887)
+++ lto/lto.c	(working copy)
@@ -1530,7 +1530,6 @@ compare_tree_sccs_1 (tree t1, tree t2, t
 	compare_tree_edges (DECL_ASSEMBLER_NAME (t1),
 			    DECL_ASSEMBLER_NAME (t2));
       compare_tree_edges (DECL_SECTION_NAME (t1), DECL_SECTION_NAME (t2));
-      compare_tree_edges (DECL_COMDAT_GROUP (t1), DECL_COMDAT_GROUP (t2));
     }
 
   if (CODE_CONTAINS_STRUCT (code, TS_FIELD_DECL))
Index: tree-streamer-out.c
===================================================================
--- tree-streamer-out.c	(revision 210887)
+++ tree-streamer-out.c	(working copy)
@@ -662,7 +662,6 @@ write_ts_decl_with_vis_tree_pointers (st
     stream_write_tree (ob, NULL_TREE, false);
 
   stream_write_tree (ob, DECL_SECTION_NAME (expr), ref_p);
-  stream_write_tree (ob, DECL_COMDAT_GROUP (expr), ref_p);
 }
 
 

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 4/7] Break up determine_known_aggregate_parts
  2014-05-21 13:31 ` [PATCH 4/7] Break up determine_known_aggregate_parts Martin Jambor
@ 2014-05-26  0:54   ` Jan Hubicka
  2014-06-06 13:28     ` Martin Jambor
  0 siblings, 1 reply; 29+ messages in thread
From: Jan Hubicka @ 2014-05-26  0:54 UTC (permalink / raw)
  To: Martin Jambor; +Cc: GCC Patches, Jan Hubicka

> Hi,
> 
> the main purpose of this patch is to break up function
> determine_known_aggregate_parts so that the next patch can use the
> standalone bits and to make the changes slightly easier for review.
> 
> However, this patch also removes some of the offset checks which Honza
> correctly thought superfluous and even possibly filtering out useful
> information.
>  
> Bootstrapped and tested and LTO-bootstrapped on x86_64-linux.
> OK for trunk after the preceeding patches get in?
> 
> Thanks,
> 
> Martin
> 
> 
> 2014-02-19  Martin Jambor  <mjambor@suse.cz>
> 
> 	* ipa-prop.c (get_place_in_agg_contents_list): New function.
> 	(build_agg_jump_func_from_list): Likewise.
> 	(determine_known_aggregate_parts): Renamed to
> 	determine_locally_known_aggregate_parts.  Moved some functionality
> 	to the two functions above, removed bound checks.

This is OK.
Does it depend on part 4?

Honza

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 7/7] Plug ipa-prop escape analysis into gimple_call_arg_flags
  2014-05-22 13:34       ` Richard Biener
  2014-05-22 15:24         ` Jan Hubicka
@ 2014-05-26  1:01         ` Jan Hubicka
  1 sibling, 0 replies; 29+ messages in thread
From: Jan Hubicka @ 2014-05-26  1:01 UTC (permalink / raw)
  To: Richard Biener; +Cc: GCC Patches, Jan Hubicka

> On Thu, May 22, 2014 at 2:49 PM, Martin Jambor <mjambor@suse.cz> wrote:
> > Hi,
> >
> > On Wed, May 21, 2014 at 04:27:32PM +0200, Richard Biener wrote:
> >> On Wed, May 21, 2014 at 3:16 PM, Martin Jambor <mjambor@suse.cz> wrote:
> >> > Hi,
> >> >
> >> > this demonstrates how results of ipa-prop escape analysis from
> >> > previous patches can be used at a later stage of compilation by
> >> > directly returning them from gimple_call_arg_flags which currently
> >> > relies on fnspec annotations.
> >> >
> >> > Bootstrapped and tested on x86_64-linux and also passes LTO bootstrap.
> >> > I have only had a brief look at behavior of this in SPEC 2006 and for
> >> > example in astar 1.19% of invocations of gimple_call_arg_flags return
> >> > noescape where we previously never did and in calculix this increases
> >> > from 15.62% (from annotations) to 18.14%.  Noclobber flag is reported
> >> > far less often still but for example in gamess that number raises from
> >> > 5.21% to 7.66%.
> >> >
> >> > Thanks,
> >> >
> >> > Martin
> >> >
> >> >
> >> > 2014-04-30  Martin Jambor  <mjambor@suse.cz>
> >> >
> >> >         * gimple.c: Include cgraph.h.
> >> >         (gimple_call_arg_flags): Also query bitmaps in cgraph_node.
> >> >
> >> > Index: src/gcc/gimple.c
> >> > ===================================================================
> >> > --- src.orig/gcc/gimple.c
> >> > +++ src/gcc/gimple.c
> >> > @@ -47,7 +47,7 @@ along with GCC; see the file COPYING3.
> >> >  #include "demangle.h"
> >> >  #include "langhooks.h"
> >> >  #include "bitmap.h"
> >> > -
> >> > +#include "cgraph.h"
> >> >
> >> >  /* All the tuples have their operand vector (if present) at the very bottom
> >> >     of the structure.  Therefore, the offset required to find the
> >> > @@ -1349,32 +1349,50 @@ int
> >> >  gimple_call_arg_flags (const_gimple stmt, unsigned arg)
> >> >  {
> >> >    tree attr = gimple_call_fnspec (stmt);
> >> > +  int ret;
> >> >
> >> > -  if (!attr || 1 + arg >= (unsigned) TREE_STRING_LENGTH (attr))
> >> > -    return 0;
> >> > -
> >> > -  switch (TREE_STRING_POINTER (attr)[1 + arg])
> >> > +  if (attr && 1 + arg < (unsigned) TREE_STRING_LENGTH (attr))
> >> >      {
> >> > -    case 'x':
> >> > -    case 'X':
> >> > -      return EAF_UNUSED;
> >> > -
> >> > -    case 'R':
> >> > -      return EAF_DIRECT | EAF_NOCLOBBER | EAF_NOESCAPE;
> >> > -
> >> > -    case 'r':
> >> > -      return EAF_NOCLOBBER | EAF_NOESCAPE;
> >> > -
> >> > -    case 'W':
> >> > -      return EAF_DIRECT | EAF_NOESCAPE;
> >> > -
> >> > -    case 'w':
> >> > -      return EAF_NOESCAPE;
> >> > +      switch (TREE_STRING_POINTER (attr)[1 + arg])
> >> > +       {
> >> > +       case 'x':
> >> > +       case 'X':
> >> > +         ret = EAF_UNUSED;
> >> > +         break;
> >> > +       case 'R':
> >> > +         ret = EAF_DIRECT | EAF_NOCLOBBER | EAF_NOESCAPE;
> >> > +         break;
> >> > +       case 'r':
> >> > +         ret = EAF_NOCLOBBER | EAF_NOESCAPE;
> >> > +         break;
> >> > +       case 'W':
> >> > +         ret = EAF_DIRECT | EAF_NOESCAPE;
> >> > +         break;
> >> > +       case 'w':
> >> > +         ret = EAF_NOESCAPE;
> >> > +         break;
> >> > +       case '.':
> >> > +       default:
> >> > +         ret = 0;
> >> > +       }
> >> > +    }
> >> > +  else
> >> > +    ret = 0;
> >> >
> >> > -    case '.':
> >> > -    default:
> >> > -      return 0;
> >> > +  tree callee_decl = gimple_call_fndecl (stmt);
> >> > +  if (callee_decl)
> >> > +    {
> >> > +      cgraph_node *callee_node = cgraph_get_node (callee_decl);
> >> > +      if (callee_node)
> >> > +       {
> >> > +         if (cgraph_param_noescape_p (callee_node, arg))
> >> > +           ret |= EAF_NOESCAPE;
> >> > +         if (cgraph_param_noclobber_p (callee_node, arg))
> >> > +           ret |= EAF_NOCLOBBER;
> >>
> >> That's quite expensive.  I guess we need a better way to store
> >> those?
> >
> > if we want to avoid the cgraph_node lookup, then I think we need to
> > store this information in the decl or struct function.  That is
> > certainly possible and might even be more appropriate.
> 
> Can we?  If the body is not readily available we only have decl and
> cgraph-node, not struct function.

Yep, indeed I think cgraph_node is good place to home this so it can
work with partitioning.   With the weekend changes to have direct pointer,
I guess the performance problems are gone.

My plan is to add a template that makes it easy to annotate symbol nodes
either by array (as we do by hand now in ipa-inline/ipa-prop and other places)
or by hashtable (for sparse data, like thunk information, or comdat information)
and move some of stuff we currently have in cgraph there
(rtl info, thunks, aliases, comdats, nested function tree can all go). For
performance critical stuff I have no problem adding it into cgraph nodes
themselves.

If someone wants to beat me and write GGC friendly template for this,
I would very welcome it.
> 
> I suppose we could exchange the struct function pointer in
> tree_function_decl for a cgraph_node pointer and put
> the struct function pointer into the cgraph_node.
> 
> Of course that may have impacts on FEs who might create
> struct function before creating a cgraph node.  But at least
> it would avoid enlarging FUNCTION_DECL.
> 
> In the end most of the tree_decl_with_vis stuff should move over to symtab
> and var-decls should get a varpool_node pointer as well.
> 
> Back to the call flags stuff - I also meant the representation of the
> "fn spec" attribute.  Rather than parsing that again and again move
> it to a better place (which you seem to invent?) and better unified
> representation.
> 
> Can you try if removing the cgraph hash is possible with the
> struct function pointer idea?

If there are no problems, I plan to move also DECL_SECTION and do
some cleanups to my weekend change (in particular I do not like the
way it works with duplicate_decl. Perhaps we don't really need to
duplicate this info).
We can experiment with assembler name and struct function next.

Honza
> 
> Thanks,
> Richard.
> 
> > Thanks,
> >
> > Martin
> >
> >>
> >> > +       }
> >> >      }
> >> > +
> >> > +  return ret;
> >> >  }
> >> >
> >> >  /* Detects return flags for the call STMT.  */
> >> >

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 7/7] Plug ipa-prop escape analysis into gimple_call_arg_flags
  2014-05-24  7:39                 ` Jan Hubicka
@ 2014-05-26 13:03                   ` Rainer Orth
  2014-05-27 17:51                     ` Jan Hubicka
  0 siblings, 1 reply; 29+ messages in thread
From: Rainer Orth @ 2014-05-26 13:03 UTC (permalink / raw)
  To: Jan Hubicka; +Cc: Richard Biener, GCC Patches

[-- Attachment #1: Type: text/plain, Size: 2334 bytes --]

Jan Hubicka <hubicka@ucw.cz> writes:

>> I'm fine with enlarging tree_function_decl for now - ideally we'd push
>> stuff from it elsewhere (like target and optimization option tree nodes,
>> or most of the visibility and symbol related stuff).  Not sure why
>> tree_type_decl inherits from tree_decl_non_common (and thus
>> tree_decl_with_vis).  Probably because of the non-common parts
>> being (ab-)used by FEs.  Otherwise I'd say simply put a symtab
>> node pointer into tree_decl_with_vis ... (can we move
>> section_name and comdat_group more easily than assembler_name?)
>
> Hi,
> this patch removes comdat_group pointer and adds direct symtab pointer.  As
> expected, the change is not completely easy. The main uglyness in C++'s version
> of duplicate_decl that creates a duplicated decl with duplicated symtab node
> now and needs to remove it.  Other problem is copy_node and c's duplicate_decl
> that does memcpy on a node and thus also copie the symtab pointer that is not
> the right thing to do.
>
> On the other hand on middle-end side several things simplify, so I think overall
> the approach works relatively well.
>
> I have bootstrapped/regtested x86_64-linux and I plan to give it more testing
> tomorrow and commit if there are no complains.  Incrementally I would like then
> to cleanup way the decl_with_vis.symtab_node pointer is maintained.  I do not
> want to allow users to tamper with it, so I did not make accessor macro for
> it, however there are more direct uses than I would like: I will need to figure
> out how to reduce those.

This patch broke Solaris bootstrap:

/vol/gcc/src/hg/trunk/local/gcc/config/sol2.c: In function 'void solaris_elf_asm_comdat_section(const char*, unsigned int, tree)':
/vol/gcc/src/hg/trunk/local/gcc/config/sol2.c:213:17: error: 'decl_comdat_group' was not declared in this scope
/vol/gcc/src/hg/trunk/local/gcc/config/sol2.c: In function 'int solaris_define_comdat_signature(comdat_entry**, void*)':
/vol/gcc/src/hg/trunk/local/gcc/config/sol2.c:267:12: error: 'decl_comdat_group' was not declared in this scope

The following snippet allows a sparc-sun-solaris2.11 bootstrap to go
along further, only to break again later in libjava for what seems to be
unrelated reasons.

2014-05-26  Rainer Orth  <ro@CeBiTec.Uni-Bielefeld.DE>

	* config/sol2.c: Include cgraph.h.


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: sol2.patch --]
[-- Type: text/x-patch, Size: 332 bytes --]

diff --git a/gcc/config/sol2.c b/gcc/config/sol2.c
--- a/gcc/config/sol2.c
+++ b/gcc/config/sol2.c
@@ -32,6 +32,7 @@ along with GCC; see the file COPYING3.  
 #include "diagnostic-core.h"
 #include "ggc.h"
 #include "hash-table.h"
+#include "cgraph.h"
 
 tree solaris_pending_aligns, solaris_pending_inits, solaris_pending_finis;
 

[-- Attachment #3: Type: text/plain, Size: 553 bytes --]


I'm not sure if this is the right approach, though, using
get_comdat_group seems to be preferred!?

ISTM that other ports might have similar problems: darwin.c, mep/mep.c,
and mips/mips.c all use DECL_COMDAT_GROUP without including cgraph.h.

> 	* mips.c (mips_start_unique_function): Likewise.
> 	(ix86_code_end): Likewise.
> 	(rs6000_code_end): Likweise.

The last two entries lack the file names.

	Rainer

-- 
-----------------------------------------------------------------------------
Rainer Orth, Center for Biotechnology, Bielefeld University

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 7/7] Plug ipa-prop escape analysis into gimple_call_arg_flags
  2014-05-26 13:03                   ` Rainer Orth
@ 2014-05-27 17:51                     ` Jan Hubicka
  2014-05-27 18:16                       ` Rainer Orth
  0 siblings, 1 reply; 29+ messages in thread
From: Jan Hubicka @ 2014-05-27 17:51 UTC (permalink / raw)
  To: Rainer Orth; +Cc: Jan Hubicka, Richard Biener, GCC Patches

> 
> This patch broke Solaris bootstrap:
> 
> /vol/gcc/src/hg/trunk/local/gcc/config/sol2.c: In function 'void solaris_elf_asm_comdat_section(const char*, unsigned int, tree)':
> /vol/gcc/src/hg/trunk/local/gcc/config/sol2.c:213:17: error: 'decl_comdat_group' was not declared in this scope
> /vol/gcc/src/hg/trunk/local/gcc/config/sol2.c: In function 'int solaris_define_comdat_signature(comdat_entry**, void*)':
> /vol/gcc/src/hg/trunk/local/gcc/config/sol2.c:267:12: error: 'decl_comdat_group' was not declared in this scope
> 
> The following snippet allows a sparc-sun-solaris2.11 bootstrap to go
> along further, only to break again later in libjava for what seems to be
> unrelated reasons.
> 
> 2014-05-26  Rainer Orth  <ro@CeBiTec.Uni-Bielefeld.DE>
> 
> 	* config/sol2.c: Include cgraph.h.

I moved decl_comdat_group offline that should fix the problem. Does it work now?

Honza

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 7/7] Plug ipa-prop escape analysis into gimple_call_arg_flags
  2014-05-27 17:51                     ` Jan Hubicka
@ 2014-05-27 18:16                       ` Rainer Orth
  0 siblings, 0 replies; 29+ messages in thread
From: Rainer Orth @ 2014-05-27 18:16 UTC (permalink / raw)
  To: Jan Hubicka; +Cc: Richard Biener, GCC Patches

Jan Hubicka <hubicka@ucw.cz> writes:

>> This patch broke Solaris bootstrap:
>> 
>> /vol/gcc/src/hg/trunk/local/gcc/config/sol2.c: In function 'void solaris_elf_asm_comdat_section(const char*, unsigned int, tree)':
>> /vol/gcc/src/hg/trunk/local/gcc/config/sol2.c:213:17: error: 'decl_comdat_group' was not declared in this scope
>> /vol/gcc/src/hg/trunk/local/gcc/config/sol2.c: In function 'int solaris_define_comdat_signature(comdat_entry**, void*)':
>> /vol/gcc/src/hg/trunk/local/gcc/config/sol2.c:267:12: error: 'decl_comdat_group' was not declared in this scope
>> 
>> The following snippet allows a sparc-sun-solaris2.11 bootstrap to go
>> along further, only to break again later in libjava for what seems to be
>> unrelated reasons.
>> 
>> 2014-05-26  Rainer Orth  <ro@CeBiTec.Uni-Bielefeld.DE>
>> 
>> 	* config/sol2.c: Include cgraph.h.
>
> I moved decl_comdat_group offline that should fix the problem. Does it work now?

It does indeed.

Thanks.
        Rainer

-- 
-----------------------------------------------------------------------------
Rainer Orth, Center for Biotechnology, Bielefeld University

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 4/7] Break up determine_known_aggregate_parts
  2014-05-26  0:54   ` Jan Hubicka
@ 2014-06-06 13:28     ` Martin Jambor
  0 siblings, 0 replies; 29+ messages in thread
From: Martin Jambor @ 2014-06-06 13:28 UTC (permalink / raw)
  To: Jan Hubicka; +Cc: GCC Patches

On Mon, May 26, 2014 at 02:54:17AM +0200, Jan Hubicka wrote:
> > Hi,
> > 
> > the main purpose of this patch is to break up function
> > determine_known_aggregate_parts so that the next patch can use the
> > standalone bits and to make the changes slightly easier for review.
> > 
> > However, this patch also removes some of the offset checks which Honza
> > correctly thought superfluous and even possibly filtering out useful
> > information.
> >  
> > Bootstrapped and tested and LTO-bootstrapped on x86_64-linux.
> > OK for trunk after the preceeding patches get in?
> > 
> > Thanks,
> > 
> > Martin
> > 
> > 
> > 2014-02-19  Martin Jambor  <mjambor@suse.cz>
> > 
> > 	* ipa-prop.c (get_place_in_agg_contents_list): New function.
> > 	(build_agg_jump_func_from_list): Likewise.
> > 	(determine_known_aggregate_parts): Renamed to
> > 	determine_locally_known_aggregate_parts.  Moved some functionality
> > 	to the two functions above, removed bound checks.
> 
> This is OK.
> Does it depend on part 4?
> 

No, it does not.  I have re-tested the new order of the patches and
committed this one.

Thanks,

Martin

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 1/7] Add missing documentation of four IPA-CP params
  2014-05-21 13:31 ` [PATCH 1/7] Add missing documentation of four IPA-CP params Martin Jambor
  2014-05-21 15:58   ` Jeff Law
@ 2014-06-10 12:13   ` Gerald Pfeifer
  2014-06-29 23:07     ` Gerald Pfeifer
  1 sibling, 1 reply; 29+ messages in thread
From: Gerald Pfeifer @ 2014-06-10 12:13 UTC (permalink / raw)
  To: Martin Jambor; +Cc: gcc-patches, Jan Hubicka

On Wed, 21 May 2014, Martin Jambor wrote:
> +@item ipa-cp-loop-hint-bonus
> +When IPA-CP determines that a cloning candidate would make the number
> +of iterations of a loop known, it adds a bonus of
                                            ^^^^^
> +@option{ipa-cp-loop-hint-bonus} bonus to the profitability score of
> +the candidate.                  ^^^^^

That's a bit much bonus in there. :-)

> +@item ipa-cp-array-index-hint-bonus
> +When IPA-CP determines that a cloning candidate would make the index of
> +an array access known, it adds a bonus of
> +@option{ipa-cp-array-index-hint-bonus} bonus to the profitability
> +score of the candidate.

In here, too.

Gerald

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 1/7] Add missing documentation of four IPA-CP params
  2014-06-10 12:13   ` Gerald Pfeifer
@ 2014-06-29 23:07     ` Gerald Pfeifer
  2014-07-15 12:01       ` Martin Jambor
  0 siblings, 1 reply; 29+ messages in thread
From: Gerald Pfeifer @ 2014-06-29 23:07 UTC (permalink / raw)
  To: Martin Jambor; +Cc: gcc-patches, Jan Hubicka

On Tue, 10 Jun 2014, Gerald Pfeifer wrote:
> On Wed, 21 May 2014, Martin Jambor wrote:
>> +@item ipa-cp-loop-hint-bonus
>> +When IPA-CP determines that a cloning candidate would make the number
>> +of iterations of a loop known, it adds a bonus of
>                                            ^^^^^
>> +@option{ipa-cp-loop-hint-bonus} bonus to the profitability score of
>> +the candidate.                  ^^^^^
> 
> That's a bit much bonus in there. :-)
> 
>> +@item ipa-cp-array-index-hint-bonus
>> +When IPA-CP determines that a cloning candidate would make the index of
>> +an array access known, it adds a bonus of
>> +@option{ipa-cp-array-index-hint-bonus} bonus to the profitability
>> +score of the candidate.
> 
> In here, too.

Hmm, I expected you'd fix this?

Apparently not, so I just applied the following.

2014-06-30  Gerald Pfeifer  <gerald@pfeifer.com>

	* doc/invoke.texi (Optimize Options): Fix descriptions of
	ipa-cp-loop-hint-bonus and ipa-cp-array-index-hint-bonus.

Index: doc/invoke.texi
===================================================================
--- doc/invoke.texi	(revision 212127)
+++ doc/invoke.texi	(working copy)
@@ -10163,13 +10163,13 @@
 @item ipa-cp-loop-hint-bonus
 When IPA-CP determines that a cloning candidate would make the number
 of iterations of a loop known, it adds a bonus of
-@option{ipa-cp-loop-hint-bonus} bonus to the profitability score of
+@option{ipa-cp-loop-hint-bonus} to the profitability score of
 the candidate.
 
 @item ipa-cp-array-index-hint-bonus
 When IPA-CP determines that a cloning candidate would make the index of
 an array access known, it adds a bonus of
-@option{ipa-cp-array-index-hint-bonus} bonus to the profitability
+@option{ipa-cp-array-index-hint-bonus} to the profitability
 score of the candidate.
 
 @item ipa-max-aa-steps

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 1/7] Add missing documentation of four IPA-CP params
  2014-06-29 23:07     ` Gerald Pfeifer
@ 2014-07-15 12:01       ` Martin Jambor
  0 siblings, 0 replies; 29+ messages in thread
From: Martin Jambor @ 2014-07-15 12:01 UTC (permalink / raw)
  To: Gerald Pfeifer; +Cc: gcc-patches, Jan Hubicka

On Mon, Jun 30, 2014 at 01:06:55AM +0200, Gerald Pfeifer wrote:
> On Tue, 10 Jun 2014, Gerald Pfeifer wrote:
> > On Wed, 21 May 2014, Martin Jambor wrote:
> >> +@item ipa-cp-loop-hint-bonus
> >> +When IPA-CP determines that a cloning candidate would make the number
> >> +of iterations of a loop known, it adds a bonus of
> >                                            ^^^^^
> >> +@option{ipa-cp-loop-hint-bonus} bonus to the profitability score of
> >> +the candidate.                  ^^^^^
> > 
> > That's a bit much bonus in there. :-)
> > 
> >> +@item ipa-cp-array-index-hint-bonus
> >> +When IPA-CP determines that a cloning candidate would make the index of
> >> +an array access known, it adds a bonus of
> >> +@option{ipa-cp-array-index-hint-bonus} bonus to the profitability
> >> +score of the candidate.
> > 
> > In here, too.
> 
> Hmm, I expected you'd fix this?
> 
> Apparently not, so I just applied the following.

I'm sorry, apprently your email somehow got into the group of things
to do after I return from vacation.  That was a mistake, I did mean to
postpone it for that long.  Thanks for correcting it yourself,

Martin

> 
> 2014-06-30  Gerald Pfeifer  <gerald@pfeifer.com>
> 
> 	* doc/invoke.texi (Optimize Options): Fix descriptions of
> 	ipa-cp-loop-hint-bonus and ipa-cp-array-index-hint-bonus.
> 
> Index: doc/invoke.texi
> ===================================================================
> --- doc/invoke.texi	(revision 212127)
> +++ doc/invoke.texi	(working copy)
> @@ -10163,13 +10163,13 @@
>  @item ipa-cp-loop-hint-bonus
>  When IPA-CP determines that a cloning candidate would make the number
>  of iterations of a loop known, it adds a bonus of
> -@option{ipa-cp-loop-hint-bonus} bonus to the profitability score of
> +@option{ipa-cp-loop-hint-bonus} to the profitability score of
>  the candidate.
>  
>  @item ipa-cp-array-index-hint-bonus
>  When IPA-CP determines that a cloning candidate would make the index of
>  an array access known, it adds a bonus of
> -@option{ipa-cp-array-index-hint-bonus} bonus to the profitability
> +@option{ipa-cp-array-index-hint-bonus} to the profitability
>  score of the candidate.
>  
>  @item ipa-max-aa-steps

^ permalink raw reply	[flat|nested] 29+ messages in thread

end of thread, other threads:[~2014-07-15 11:58 UTC | newest]

Thread overview: 29+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-05-21 13:31 [PATCH 0/7] ipa-prop escape analysis Martin Jambor
2014-05-21 13:31 ` [PATCH 4/7] Break up determine_known_aggregate_parts Martin Jambor
2014-05-26  0:54   ` Jan Hubicka
2014-06-06 13:28     ` Martin Jambor
2014-05-21 13:31 ` [PATCH 6/7] Real aggregate contents merge and application of deltas Martin Jambor
2014-05-21 13:31 ` [PATCH 3/7] IPA-CP escape and clobber analysis Martin Jambor
2014-05-21 14:51   ` Richard Biener
2014-05-23 14:50     ` Martin Jambor
2014-05-21 13:31 ` [PATCH 2/7] Analyze BBs in DOM order in ipa-prop.c Martin Jambor
2014-05-21 13:31 ` [PATCH 1/7] Add missing documentation of four IPA-CP params Martin Jambor
2014-05-21 15:58   ` Jeff Law
2014-06-10 12:13   ` Gerald Pfeifer
2014-06-29 23:07     ` Gerald Pfeifer
2014-07-15 12:01       ` Martin Jambor
2014-05-21 13:31 ` [PATCH 7/7] Plug ipa-prop escape analysis into gimple_call_arg_flags Martin Jambor
2014-05-21 14:27   ` Richard Biener
2014-05-22 12:49     ` Martin Jambor
2014-05-22 13:34       ` Richard Biener
2014-05-22 15:24         ` Jan Hubicka
2014-05-22 15:36           ` Richard Biener
2014-05-22 18:11             ` Jan Hubicka
2014-05-23 10:03               ` Richard Biener
2014-05-23 22:29                 ` Jan Hubicka
2014-05-24  7:39                 ` Jan Hubicka
2014-05-26 13:03                   ` Rainer Orth
2014-05-27 17:51                     ` Jan Hubicka
2014-05-27 18:16                       ` Rainer Orth
2014-05-26  1:01         ` Jan Hubicka
2014-05-21 13:31 ` [PATCH 5/7] Advanced aggregate jump function construction Martin Jambor

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).