public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH][6/n] tree LIM TLC
@ 2013-03-12 15:25 Richard Biener
  2013-03-12 15:29 ` Steven Bosscher
  0 siblings, 1 reply; 4+ messages in thread
From: Richard Biener @ 2013-03-12 15:25 UTC (permalink / raw)
  To: gcc-patches


(Un-?)surprisingly the most effective compile-time reduction for
the testcase in PR39326 is to employ ao_ref caching for
alias oracle queries and caching of expanded affine-combinations
for affine disambiguations.

This reduces compile-time to a manageable amount in the first place
for me (so I'm sending it "late" in the series).

Bootstrap and regtest scheduled on x86_64-unknown-linux-gnu, queued
for 4.9.

Richard.

2013-03-12  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/39326
	* tree-ssa-loop-im.c (struct mem_ref): Replace mem member
	with an ao_ref typed one.  Add affine-combination cache members.
	(MEM_ANALYZABLE): Adjust.
	(memref_eq): Likewise.
	(mem_ref_alloc): Likewise.
	(gather_mem_refs_stmt): Likewise.
	(execute_sm_if_changed_flag_set): Likewise.
	(execute_sm): Likewise.
	(ref_always_accessed_p): Likewise.
	(refs_independent_p): Likewise.
	(can_sm_ref_p): Likewise.
	(mem_refs_may_alias_p): Use ao_ref members to query the oracle.
	Cache expanded affine combinations.

Index: trunk/gcc/tree-ssa-loop-im.c
===================================================================
*** trunk.orig/gcc/tree-ssa-loop-im.c	2013-03-12 15:11:12.000000000 +0100
--- trunk/gcc/tree-ssa-loop-im.c	2013-03-12 16:20:49.115169595 +0100
*************** typedef struct mem_ref_locs
*** 117,126 ****
  
  typedef struct mem_ref
  {
-   tree mem;			/* The memory itself.  */
    unsigned id;			/* ID assigned to the memory reference
  				   (its index in memory_accesses.refs_list)  */
    hashval_t hash;		/* Its hash value.  */
    bitmap stored;		/* The set of loops in that this memory location
  				   is stored to.  */
    vec<mem_ref_locs_p> accesses_in_loop;
--- 117,130 ----
  
  typedef struct mem_ref
  {
    unsigned id;			/* ID assigned to the memory reference
  				   (its index in memory_accesses.refs_list)  */
    hashval_t hash;		/* Its hash value.  */
+ 
+   /* The memory access itself and associated caching of alias-oracle
+      query meta-data.  */
+   ao_ref mem;			/* The ao_ref of this memory access.  */
+ 
    bitmap stored;		/* The set of loops in that this memory location
  				   is stored to.  */
    vec<mem_ref_locs_p> accesses_in_loop;
*************** typedef struct mem_ref
*** 142,147 ****
--- 146,155 ----
    bitmap indep_ref;		/* The set of memory references on that
  				   this reference is independent.  */
    bitmap dep_ref;		/* The complement of INDEP_REF.  */
+ 
+   /* The expanded affine combination of this memory access.  */
+   aff_tree aff_off;
+   double_int aff_size;
  } *mem_ref_p;
  
  
*************** static bool ref_indep_loop_p (struct loo
*** 186,192 ****
  #define SET_ALWAYS_EXECUTED_IN(BB, VAL) ((BB)->aux = (void *) (VAL))
  
  /* Whether the reference was analyzable.  */
! #define MEM_ANALYZABLE(REF) ((REF)->mem != error_mark_node)
  
  static struct lim_aux_data *
  init_lim_data (gimple stmt)
--- 194,200 ----
  #define SET_ALWAYS_EXECUTED_IN(BB, VAL) ((BB)->aux = (void *) (VAL))
  
  /* Whether the reference was analyzable.  */
! #define MEM_ANALYZABLE(REF) ((REF)->mem.ref != error_mark_node)
  
  static struct lim_aux_data *
  init_lim_data (gimple stmt)
*************** memref_eq (const void *obj1, const void
*** 1435,1441 ****
  {
    const struct mem_ref *const mem1 = (const struct mem_ref *) obj1;
  
!   return operand_equal_p (mem1->mem, (const_tree) obj2, 0);
  }
  
  /* Releases list of memory reference locations ACCS.  */
--- 1443,1449 ----
  {
    const struct mem_ref *const mem1 = (const struct mem_ref *) obj1;
  
!   return operand_equal_p (mem1->mem.ref, (const_tree) obj2, 0);
  }
  
  /* Releases list of memory reference locations ACCS.  */
*************** static mem_ref_p
*** 1477,1483 ****
  mem_ref_alloc (tree mem, unsigned hash, unsigned id)
  {
    mem_ref_p ref = XNEW (struct mem_ref);
!   ref->mem = mem;
    ref->id = id;
    ref->hash = hash;
    ref->stored = BITMAP_ALLOC (&lim_bitmap_obstack);
--- 1485,1491 ----
  mem_ref_alloc (tree mem, unsigned hash, unsigned id)
  {
    mem_ref_p ref = XNEW (struct mem_ref);
!   ao_ref_init (&ref->mem, mem);
    ref->id = id;
    ref->hash = hash;
    ref->stored = BITMAP_ALLOC (&lim_bitmap_obstack);
*************** mem_ref_alloc (tree mem, unsigned hash,
*** 1487,1492 ****
--- 1495,1502 ----
    ref->dep_ref = BITMAP_ALLOC (&lim_bitmap_obstack);
    ref->accesses_in_loop.create (0);
  
+   ref->aff_off.type = NULL_TREE;
+ 
    return ref;
  }
  
*************** gather_mem_refs_stmt (struct loop *loop,
*** 1586,1592 ****
        if (dump_file && (dump_flags & TDF_DETAILS))
  	{
  	  fprintf (dump_file, "Memory reference %u: ", id);
! 	  print_generic_expr (dump_file, ref->mem, TDF_SLIM);
  	  fprintf (dump_file, "\n");
  	}
      }
--- 1596,1602 ----
        if (dump_file && (dump_flags & TDF_DETAILS))
  	{
  	  fprintf (dump_file, "Memory reference %u: ", id);
! 	  print_generic_expr (dump_file, ref->mem.ref, TDF_SLIM);
  	  fprintf (dump_file, "\n");
  	}
      }
*************** analyze_memory_references (struct loop *
*** 1638,1653 ****
     tree_to_aff_combination_expand.  */
  
  static bool
! mem_refs_may_alias_p (tree mem1, tree mem2, struct pointer_map_t **ttae_cache)
  {
    /* Perform BASE + OFFSET analysis -- if MEM1 and MEM2 are based on the same
       object and their offset differ in such a way that the locations cannot
       overlap, then they cannot alias.  */
-   double_int size1, size2;
-   aff_tree off1, off2;
  
    /* Perform basic offset and type-based disambiguation.  */
!   if (!refs_may_alias_p (mem1, mem2))
      return false;
  
    /* The expansion of addresses may be a bit expensive, thus we only do
--- 1648,1661 ----
     tree_to_aff_combination_expand.  */
  
  static bool
! mem_refs_may_alias_p (mem_ref_p mem1, mem_ref_p mem2)
  {
    /* Perform BASE + OFFSET analysis -- if MEM1 and MEM2 are based on the same
       object and their offset differ in such a way that the locations cannot
       overlap, then they cannot alias.  */
  
    /* Perform basic offset and type-based disambiguation.  */
!   if (!refs_may_alias_p_1 (&mem1->mem, &mem2->mem, true))
      return false;
  
    /* The expansion of addresses may be a bit expensive, thus we only do
*************** mem_refs_may_alias_p (tree mem1, tree me
*** 1655,1668 ****
    if (optimize < 2)
      return true;
  
!   get_inner_reference_aff (mem1, &off1, &size1);
!   get_inner_reference_aff (mem2, &off2, &size2);
!   aff_combination_expand (&off1, ttae_cache);
!   aff_combination_expand (&off2, ttae_cache);
!   aff_combination_scale (&off1, double_int_minus_one);
!   aff_combination_add (&off2, &off1);
  
!   if (aff_comb_cannot_overlap_p (&off2, size1, size2))
      return false;
  
    return true;
--- 1663,1686 ----
    if (optimize < 2)
      return true;
  
!   if (mem1->aff_off.type == NULL_TREE)
!     {
!       get_inner_reference_aff (mem1->mem.ref, &mem1->aff_off, &mem1->aff_size);
!       aff_combination_expand (&mem1->aff_off, &memory_accesses.ttae_cache);
!       gcc_assert (mem1->aff_off.type != NULL_TREE);
!     }
!   if (mem2->aff_off.type == NULL_TREE)
!     {
!       get_inner_reference_aff (mem2->mem.ref, &mem2->aff_off, &mem2->aff_size);
!       aff_combination_expand (&mem2->aff_off, &memory_accesses.ttae_cache);
!       gcc_assert (mem2->aff_off.type != NULL_TREE);
!     }
! 
!   aff_tree tem = mem1->aff_off;
!   aff_combination_scale (&tem, double_int_minus_one);
!   aff_combination_add (&tem, &mem2->aff_off);
  
!   if (aff_comb_cannot_overlap_p (&tem, mem1->aff_size, mem2->aff_size))
      return false;
  
    return true;
*************** execute_sm_if_changed_flag_set (struct l
*** 1987,1993 ****
    mem_ref_loc_p loc;
    tree flag;
    vec<mem_ref_loc_p> locs = vNULL;
!   char *str = get_lsm_tmp_name (ref->mem, ~0);
  
    lsm_tmp_name_add ("_flag");
    flag = create_tmp_reg (boolean_type_node, str);
--- 2005,2011 ----
    mem_ref_loc_p loc;
    tree flag;
    vec<mem_ref_loc_p> locs = vNULL;
!   char *str = get_lsm_tmp_name (ref->mem.ref, ~0);
  
    lsm_tmp_name_add ("_flag");
    flag = create_tmp_reg (boolean_type_node, str);
*************** execute_sm (struct loop *loop, vec<edge>
*** 2029,2044 ****
    if (dump_file && (dump_flags & TDF_DETAILS))
      {
        fprintf (dump_file, "Executing store motion of ");
!       print_generic_expr (dump_file, ref->mem, 0);
        fprintf (dump_file, " from loop %d\n", loop->num);
      }
  
!   tmp_var = create_tmp_reg (TREE_TYPE (ref->mem),
! 			      get_lsm_tmp_name (ref->mem, ~0));
  
    fmt_data.loop = loop;
    fmt_data.orig_loop = loop;
!   for_each_index (&ref->mem, force_move_till, &fmt_data);
  
    if (block_in_transaction (loop_preheader_edge (loop)->src)
        || !PARAM_VALUE (PARAM_ALLOW_STORE_DATA_RACES))
--- 2047,2062 ----
    if (dump_file && (dump_flags & TDF_DETAILS))
      {
        fprintf (dump_file, "Executing store motion of ");
!       print_generic_expr (dump_file, ref->mem.ref, 0);
        fprintf (dump_file, " from loop %d\n", loop->num);
      }
  
!   tmp_var = create_tmp_reg (TREE_TYPE (ref->mem.ref),
! 			      get_lsm_tmp_name (ref->mem.ref, ~0));
  
    fmt_data.loop = loop;
    fmt_data.orig_loop = loop;
!   for_each_index (&ref->mem.ref, force_move_till, &fmt_data);
  
    if (block_in_transaction (loop_preheader_edge (loop)->src)
        || !PARAM_VALUE (PARAM_ALLOW_STORE_DATA_RACES))
*************** execute_sm (struct loop *loop, vec<edge>
*** 2056,2062 ****
    /* FIXME/TODO: For the multi-threaded variant, we could avoid this
       load altogether, since the store is predicated by a flag.  We
       could, do the load only if it was originally in the loop.  */
!   load = gimple_build_assign (tmp_var, unshare_expr (ref->mem));
    lim_data = init_lim_data (load);
    lim_data->max_loop = loop;
    lim_data->tgt_loop = loop;
--- 2074,2080 ----
    /* FIXME/TODO: For the multi-threaded variant, we could avoid this
       load altogether, since the store is predicated by a flag.  We
       could, do the load only if it was originally in the loop.  */
!   load = gimple_build_assign (tmp_var, unshare_expr (ref->mem.ref));
    lim_data = init_lim_data (load);
    lim_data->max_loop = loop;
    lim_data->tgt_loop = loop;
*************** execute_sm (struct loop *loop, vec<edge>
*** 2076,2086 ****
      if (!multi_threaded_model_p)
        {
  	gimple store;
! 	store = gimple_build_assign (unshare_expr (ref->mem), tmp_var);
  	gsi_insert_on_edge (ex, store);
        }
      else
!       execute_sm_if_changed (ex, ref->mem, tmp_var, store_flag);
  }
  
  /* Hoists memory references MEM_REFS out of LOOP.  EXITS is the list of exit
--- 2094,2104 ----
      if (!multi_threaded_model_p)
        {
  	gimple store;
! 	store = gimple_build_assign (unshare_expr (ref->mem.ref), tmp_var);
  	gsi_insert_on_edge (ex, store);
        }
      else
!       execute_sm_if_changed (ex, ref->mem.ref, tmp_var, store_flag);
  }
  
  /* Hoists memory references MEM_REFS out of LOOP.  EXITS is the list of exit
*************** ref_always_accessed_p (struct loop *loop
*** 2114,2120 ****
    struct loop *must_exec;
    tree base;
  
!   base = get_base_address (ref->mem);
    if (INDIRECT_REF_P (base)
        || TREE_CODE (base) == MEM_REF)
      base = TREE_OPERAND (base, 0);
--- 2132,2138 ----
    struct loop *must_exec;
    tree base;
  
!   base = ao_ref_base (&ref->mem);
    if (INDIRECT_REF_P (base)
        || TREE_CODE (base) == MEM_REF)
      base = TREE_OPERAND (base, 0);
*************** refs_independent_p (mem_ref_p ref1, mem_
*** 2187,2194 ****
      fprintf (dump_file, "Querying dependency of refs %u and %u: ",
  	     ref1->id, ref2->id);
  
!   if (mem_refs_may_alias_p (ref1->mem, ref2->mem,
! 			    &memory_accesses.ttae_cache))
      {
        bitmap_set_bit (ref1->dep_ref, ref2->id);
        if (dump_file && (dump_flags & TDF_DETAILS))
--- 2205,2211 ----
      fprintf (dump_file, "Querying dependency of refs %u and %u: ",
  	     ref1->id, ref2->id);
  
!   if (mem_refs_may_alias_p (ref1, ref2))
      {
        bitmap_set_bit (ref1->dep_ref, ref2->id);
        if (dump_file && (dump_flags & TDF_DETAILS))
*************** can_sm_ref_p (struct loop *loop, mem_ref
*** 2284,2304 ****
      return false;
  
    /* It should be movable.  */
!   if (!is_gimple_reg_type (TREE_TYPE (ref->mem))
!       || TREE_THIS_VOLATILE (ref->mem)
!       || !for_each_index (&ref->mem, may_move_till, loop))
      return false;
  
    /* If it can throw fail, we do not properly update EH info.  */
!   if (tree_could_throw_p (ref->mem))
      return false;
  
    /* If it can trap, it must be always executed in LOOP.
       Readonly memory locations may trap when storing to them, but
       tree_could_trap_p is a predicate for rvalues, so check that
       explicitly.  */
!   base = get_base_address (ref->mem);
!   if ((tree_could_trap_p (ref->mem)
         || (DECL_P (base) && TREE_READONLY (base)))
        && !ref_always_accessed_p (loop, ref, true))
      return false;
--- 2301,2321 ----
      return false;
  
    /* It should be movable.  */
!   if (!is_gimple_reg_type (TREE_TYPE (ref->mem.ref))
!       || TREE_THIS_VOLATILE (ref->mem.ref)
!       || !for_each_index (&ref->mem.ref, may_move_till, loop))
      return false;
  
    /* If it can throw fail, we do not properly update EH info.  */
!   if (tree_could_throw_p (ref->mem.ref))
      return false;
  
    /* If it can trap, it must be always executed in LOOP.
       Readonly memory locations may trap when storing to them, but
       tree_could_trap_p is a predicate for rvalues, so check that
       explicitly.  */
!   base = ao_ref_base (&ref->mem);
!   if ((tree_could_trap_p (ref->mem.ref)
         || (DECL_P (base) && TREE_READONLY (base)))
        && !ref_always_accessed_p (loop, ref, true))
      return false;

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH][6/n] tree LIM TLC
  2013-03-12 15:25 [PATCH][6/n] tree LIM TLC Richard Biener
@ 2013-03-12 15:29 ` Steven Bosscher
  2013-03-12 15:33   ` Richard Biener
  0 siblings, 1 reply; 4+ messages in thread
From: Steven Bosscher @ 2013-03-12 15:29 UTC (permalink / raw)
  To: Richard Biener; +Cc: gcc-patches

On Tue, Mar 12, 2013 at 4:25 PM, Richard Biener wrote:
>
> (Un-?)surprisingly the most effective compile-time reduction for
> the testcase in PR39326 is to employ ao_ref caching for
> alias oracle queries and caching of expanded affine-combinations
> for affine disambiguations.
>
> This reduces compile-time to a manageable amount in the first place
> for me (so I'm sending it "late" in the series).

I suppose this renders my LIM patch obsolete.

Did you also look at the memory foot print?

Ciao!
Steven

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH][6/n] tree LIM TLC
  2013-03-12 15:29 ` Steven Bosscher
@ 2013-03-12 15:33   ` Richard Biener
  2013-03-12 16:01     ` Steven Bosscher
  0 siblings, 1 reply; 4+ messages in thread
From: Richard Biener @ 2013-03-12 15:33 UTC (permalink / raw)
  To: Steven Bosscher; +Cc: gcc-patches

On Tue, 12 Mar 2013, Steven Bosscher wrote:

> On Tue, Mar 12, 2013 at 4:25 PM, Richard Biener wrote:
> >
> > (Un-?)surprisingly the most effective compile-time reduction for
> > the testcase in PR39326 is to employ ao_ref caching for
> > alias oracle queries and caching of expanded affine-combinations
> > for affine disambiguations.
> >
> > This reduces compile-time to a manageable amount in the first place
> > for me (so I'm sending it "late" in the series).
> 
> I suppose this renders my LIM patch obsolete.

Not really - it's still

 tree loop invariant motion: 588.31 (78%) usr

so limiting the O(n^2) dependence testing is a good thing.  But I
can take it over from here and implement that ontop of my patches
if you like.

> Did you also look at the memory foot print?

Yeah, unfortunately processing outermost loops separately doesn't
reduce peak memory consumption.  I'll look into getting rid of the
all-refs bitmaps, but I'm not there yet.

Currently the testcase peaks at 1.7GB for me (after LIM, then
it gets worse with DSE and IRA).  And I only tested -O1 sofar.

Thanks,
Richard.

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH][6/n] tree LIM TLC
  2013-03-12 15:33   ` Richard Biener
@ 2013-03-12 16:01     ` Steven Bosscher
  0 siblings, 0 replies; 4+ messages in thread
From: Steven Bosscher @ 2013-03-12 16:01 UTC (permalink / raw)
  To: Richard Biener; +Cc: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 1532 bytes --]

On Tue, Mar 12, 2013 at 4:33 PM, Richard Biener wrote:
> On Tue, 12 Mar 2013, Steven Bosscher wrote:

>> I suppose this renders my LIM patch obsolete.
>
> Not really - it's still
>
>  tree loop invariant motion: 588.31 (78%) usr
>
> so limiting the O(n^2) dependence testing is a good thing.  But I
> can take it over from here and implement that ontop of my patches
> if you like.

That'd be good, let's keep it in one hand, one set.


>> Did you also look at the memory foot print?
>
> Yeah, unfortunately processing outermost loops separately doesn't
> reduce peak memory consumption.  I'll look into getting rid of the
> all-refs bitmaps, but I'm not there yet.

A few more ideas (though probably not with as much impact):

Is it possible to use a bitmap_head for the (now merged)
dep_loop/indep_loop, instead of bitmap? Likewise for a few other
bitmaps, especially the vectors of bitmaps.

Put "struct depend" in an alloc pool. (Also allows one to wipe them
all out in free_lim_aux_data.)
Likewise for "struct mem_ref".

Use a shared mem_ref for the error_mark_node case (and hoist the
MEM_ANALYZABLE checks in refs_independent_p above the bitmap tests).

Use nameless temps instead of lsm_tmp_name_add.


> Currently the testcase peaks at 1.7GB for me (after LIM, then
> it gets worse with DSE and IRA).  And I only tested -O1 sofar.

Try my DSE patch (corrected version attached).

What are you using now to measure per-pass memory usage? I'm still
using my old hack (also attached) but it's not quite optimal.

Ciao!
Steven

[-- Attachment #2: PR39326_RTLDSE.diff --]
[-- Type: application/octet-stream, Size: 3885 bytes --]

	PR middle-end/39326
	* dse.c (replace_read): If the stored value is a pseudo-register
	that is set only once, re-use it to replace the load instead of
	creating a new register.

Index: dse.c
===================================================================
--- dse.c	(revision 196576)
+++ dse.c	(working copy)
@@ -2011,7 +2011,7 @@ replace_read (store_info_t store_info, insn_info_t
 {
   enum machine_mode store_mode = GET_MODE (store_info->mem);
   enum machine_mode read_mode = GET_MODE (read_info->mem);
-  rtx insns, this_insn, read_reg;
+  rtx insns, this_insn, read_val, read_reg;
   basic_block bb;
 
   if (!dbg_cnt (dse))
@@ -2033,19 +2033,27 @@ replace_read (store_info_t store_info, insn_info_t
 	     GET_MODE_NAME (store_mode), INSN_UID (store_insn->insn));
   start_sequence ();
   bb = BLOCK_FOR_INSN (read_insn->insn);
-  read_reg = get_stored_val (store_info,
+  read_val = get_stored_val (store_info,
 			     read_mode, read_info->begin, read_info->end,
 			     bb, false);
-  if (read_reg == NULL_RTX)
+  if (read_val == NULL_RTX)
     {
       end_sequence ();
       if (dump_file && (dump_flags & TDF_DETAILS))
 	fprintf (dump_file, " -- could not extract bits of stored value\n");
       return false;
     }
-  /* Force the value into a new register so that it won't be clobbered
-     between the store and the load.  */
-  read_reg = copy_to_mode_reg (read_mode, read_reg);
+
+  /* If READ_REG is a pseudo that is only set once, re-use the register.
+     Otherwise, force the value into a new register so that it won't be
+     clobbered between the store and the load.  */
+  if (! REG_P (read_val) || HARD_REGISTER_P (read_val)
+      || REGNO (read_val) >= DF_REG_SIZE (df)
+      || DF_REG_DEF_COUNT (REGNO (read_val)) != 1)
+    read_reg = copy_to_mode_reg (read_mode, read_val);
+  else
+    read_reg = read_val;
+
   insns = get_insns ();
   end_sequence ();
 
@@ -2079,16 +2087,15 @@ replace_read (store_info_t store_info, insn_info_t
 
   if (validate_change (read_insn->insn, loc, read_reg, 0))
     {
-      deferred_change_t deferred_change =
-	(deferred_change_t) pool_alloc (deferred_change_pool);
-
-      /* Insert this right before the store insn where it will be safe
-	 from later insns that might change it before the read.  */
-      emit_insn_before (insns, store_insn->insn);
-
-      /* And now for the kludge part: cselib croaks if you just
-	 return at this point.  There are two reasons for this:
-
+      if (insns != NULL_RTX)
+	/* Insert this right before the store insn where it will be safe
+	   from later insns that might change it before the read.  */
+	emit_insn_before (insns, store_insn->insn);
+
+      /* And now for the kludge part: cselib croaks if you have
+	 create a new register and just return at this point.
+	 There are two reasons for this:
+
 	 1) Cselib has an idea of how many pseudos there are and
 	 that does not include the new ones we just added.
 
@@ -2108,13 +2115,17 @@ replace_read (store_info_t store_info, insn_info_t
 	 and when we are finished with the block, we undo this.  We
 	 keep a table of mems to get rid of.  At the end of the basic
 	 block we can put them back.  */
+      if (read_reg != read_val)
+	{
+	  deferred_change_t deferred_change =
+	    (deferred_change_t) pool_alloc (deferred_change_pool);
+	  *loc = read_info->mem;
+	  deferred_change->next = deferred_change_list;
+	  deferred_change_list = deferred_change;
+	  deferred_change->loc = loc;
+	  deferred_change->reg = read_reg;
+	}
 
-      *loc = read_info->mem;
-      deferred_change->next = deferred_change_list;
-      deferred_change_list = deferred_change;
-      deferred_change->loc = loc;
-      deferred_change->reg = read_reg;
-
       /* Get rid of the read_info, from the point of view of the
 	 rest of dse, play like this read never happened.  */
       read_insn->read_rec = read_info->next;

[-- Attachment #3: passes_memstat.diff --]
[-- Type: application/octet-stream, Size: 2311 bytes --]

Index: passes.c
===================================================================
--- passes.c	(revision 196182)
+++ passes.c	(working copy)
@@ -77,15 +77,68 @@ struct opt_pass *current_pass;
 
 static void register_pass_name (struct opt_pass *, const char *);
 
+typedef struct
+{
+  unsigned long size,resident,share,text,lib,data,dt;
+} statm_t;
+
+static void
+read_proc_memory_status (statm_t &result)
+{
+  const char* statm_path = "/proc/self/statm";
+
+  FILE *f = fopen(statm_path,"r");
+  if (!f)
+    {
+      perror (statm_path);
+      gcc_unreachable ();
+    }
+  if (7 != fscanf (f, "%lu %lu %lu %lu %lu %lu %lu",
+		   &result.size, &result.resident, &result.share,
+		   &result.text, &result.lib, &result.data,
+		   &result.dt))
+    {
+      perror (statm_path);
+      gcc_unreachable ();
+    }
+  fclose(f);
+}
+
 /* Call from anywhere to find out what pass this is.  Useful for
    printing out debugging information deep inside an service
    routine.  */
+
+#include "bitmap.h"
+#include "regset.h"
+
+static size_t
+obstack_memory_used2 (struct obstack *h)
+{
+  struct _obstack_chunk* lp;
+  size_t nbytes = 0;
+
+  for (lp = h->chunk; lp != 0; lp = lp->prev)
+    {
+      nbytes += (size_t) (lp->limit - (char *) lp);
+    }
+  return nbytes;
+}
+
 void
 print_current_pass (FILE *file)
 {
   if (current_pass)
-    fprintf (file, "current pass = %s (%d)\n",
-	     current_pass->name, current_pass->static_pass_number);
+    {
+      statm_t statm;
+      int pagesize = getpagesize ();
+      unsigned bos = obstack_memory_used2 (&bitmap_default_obstack.obstack);
+      unsigned ros = obstack_memory_used2 (&reg_obstack.obstack);
+      read_proc_memory_status (statm);
+      fprintf (file, "current pass = %32s (%3d) %12lu %12lu %12lu %12u %12u\n",
+	       current_pass->name, current_pass->static_pass_number,
+	       statm.size * pagesize, statm.resident * pagesize,
+	       statm.share * pagesize, bos, ros);
+    }
   else
     fprintf (file, "no current pass.\n");
 }
@@ -2286,7 +2339,7 @@ execute_one_pass (struct opt_pass *pass)
       current_pass = NULL;
       return false;
     }
-
+print_current_pass (stderr);
   /* Pass execution event trigger: useful to identify passes being
      executed.  */
   invoke_plugin_callbacks (PLUGIN_PASS_EXECUTION, pass);

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2013-03-12 16:01 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-03-12 15:25 [PATCH][6/n] tree LIM TLC Richard Biener
2013-03-12 15:29 ` Steven Bosscher
2013-03-12 15:33   ` Richard Biener
2013-03-12 16:01     ` Steven Bosscher

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).