public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] Re-organize toplevel LIM dependence query
@ 2013-03-22 11:22 Richard Biener
  0 siblings, 0 replies; only message in thread
From: Richard Biener @ 2013-03-22 11:22 UTC (permalink / raw)
  To: gcc-patches


This re-organizes the toplevel LIM dependence query which is
to ask whether a reference is independent in a whole loop nest.
The patch makes it compute the answer for all sub-loops of the
nest and compute an overall answer based on the answers from
the sub-loops.  This avoids repeated dependence tests on
references contained in subloops, making the reference dependence
check cache even more useless as can be seen from instrumentation
on the PR39326 testcase.  Before the patch we do

101 lim "loop cache queries == 155636" 1
101 lim "loop cache hitrate == 11" 1
101 lim "ref cache queries == 1572939576" 1
101 lim "ref cache hitrate == 12" 1
124 lim "loop cache queries == 143444" 1
124 lim "loop cache hitrate == 12" 1
124 lim "ref cache queries == 294471688" 1
124 lim "ref cache hitrate == 24" 1

and after it (the first LIM pass does quite some store-motion)

101 lim "loop cache queries == 2011316" 1
101 lim "loop cache hitrate == 3" 1
101 lim "ref cache queries == 1490087040" 1
101 lim "ref cache hitrate == 7" 1
124 lim "loop cache queries == 1199348" 1
124 lim "loop cache hitrate == 6" 1
124 lim "ref cache queries == 222856846" 1
124 lim "ref cache hitrate == 0" 1

so we increase the load (and cost) of the loop cache (because its
querying is more fine-grained now) and reduce the number of
redundant reference dependence checks significantly, down to
the point where the caches benefit does no longer outweight
its quadratic cost in memory (removing it speeds up compile-time
of the PR39326 testcase by 50% as well!)

With the recursive nature we also avoid using the all_refs bitmaps
which are a source of quadratic memory use in LIM and the more
costly one of both is now easily removed (separate patch).

Bootstrapped and tested on x86_64-unknown-linux-gnu and committed.

Richard.

2013-03-22  Richard Biener  <rguenther@suse.de>

	* tree-ssa-loop-im.c (memory_references): Add refs_stored_in_loop
	bitmaps.
	(gather_mem_refs_in_loops): Perform store accumulation here.
	(create_vop_ref_mapping_loop): Remove.
	(create_vop_ref_mapping): Likewise.
	(analyze_memory_references): Initialize refs_stored_in_loop.
	(LOOP_DEP_BIT): New define to map to bits in (in)dep_loop
	bitmaps.
	(record_indep_loop): Remove.
	(record_dep_loop): New function.
	(ref_indep_loop_p_1): Adjust to only walk over references
	in the loop, not its subloops.
	(ref_indep_loop_p): Rename to ...
	(ref_indep_loop_p_2): ... this and recurse over the loop tree,
	maintaining a more fine-grained cache.
	(ref_indep_loop_p): Wrap ref_indep_loop_p_2.
	(tree_ssa_lim_finalize): Free refs_stored_in_loop.

Index: trunk/gcc/tree-ssa-loop-im.c
===================================================================
*** trunk.orig/gcc/tree-ssa-loop-im.c	2013-03-22 10:13:16.000000000 +0100
--- trunk/gcc/tree-ssa-loop-im.c	2013-03-22 10:35:18.347671377 +0100
*************** typedef struct mem_ref
*** 140,145 ****
--- 140,150 ----
    bitmap dep_ref;		/* The complement of INDEP_REF.  */
  } *mem_ref_p;
  
+ /* We use two bits per loop in the ref->{in,}dep_loop bitmaps, the first
+    to record (in)dependence against stores in the loop and its subloops, the
+    second to record (in)dependence against all references in the loop
+    and its subloops.  */
+ #define LOOP_DEP_BIT(loopnum, storedp) (2 * (loopnum) + (storedp ? 1 : 0))
  
  
  
*************** static struct
*** 156,167 ****
    /* The set of memory references accessed in each loop.  */
    vec<bitmap> refs_in_loop;
  
    /* The set of memory references accessed in each loop, including
       subloops.  */
    vec<bitmap> all_refs_in_loop;
  
!   /* The set of memory references stored in each loop, including
!      subloops.  */
    vec<bitmap> all_refs_stored_in_loop;
  
    /* Cache for expanding memory addresses.  */
--- 161,174 ----
    /* The set of memory references accessed in each loop.  */
    vec<bitmap> refs_in_loop;
  
+   /* The set of memory references stored in each loop.  */
+   vec<bitmap> refs_stored_in_loop;
+ 
    /* The set of memory references accessed in each loop, including
       subloops.  */
    vec<bitmap> all_refs_in_loop;
  
!   /* The set of memory references stored in each loop, including subloops .  */
    vec<bitmap> all_refs_stored_in_loop;
  
    /* Cache for expanding memory addresses.  */
*************** gather_mem_refs_stmt (struct loop *loop,
*** 1575,1581 ****
      }
    bitmap_set_bit (memory_accesses.refs_in_loop[loop->num], ref->id);
    if (is_stored)
!     mark_ref_stored (ref, loop);
    return;
  }
  
--- 1582,1591 ----
      }
    bitmap_set_bit (memory_accesses.refs_in_loop[loop->num], ref->id);
    if (is_stored)
!     {
!       bitmap_set_bit (memory_accesses.refs_stored_in_loop[loop->num], ref->id);
!       mark_ref_stored (ref, loop);
!     }
    return;
  }
  
*************** gather_mem_refs_in_loops (void)
*** 1602,1610 ****
  {
    gimple_stmt_iterator bsi;
    basic_block bb, *bbs;
!   struct loop *loop;
    loop_iterator li;
-   bitmap lrefs, alrefs, alrefso;
    unsigned i, n;
  
    /* Initialize bb_loop_postorder with a mapping from loop->num to
--- 1612,1619 ----
  {
    gimple_stmt_iterator bsi;
    basic_block bb, *bbs;
!   struct loop *loop, *outer;
    loop_iterator li;
    unsigned i, n;
  
    /* Initialize bb_loop_postorder with a mapping from loop->num to
*************** gather_mem_refs_in_loops (void)
*** 1639,1694 ****
       the loop hierarchy.  */
    FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
      {
!       lrefs = memory_accesses.refs_in_loop[loop->num];
!       alrefs = memory_accesses.all_refs_in_loop[loop->num];
!       bitmap_ior_into (alrefs, lrefs);
! 
!       if (loop_outer (loop) == current_loops->tree_root)
  	continue;
  
!       alrefso = memory_accesses.all_refs_in_loop[loop_outer (loop)->num];
!       bitmap_ior_into (alrefso, alrefs);
!     }
! }
! 
! /* Create a mapping from virtual operands to references that touch them
!    in LOOP.  */
! 
! static void
! create_vop_ref_mapping_loop (struct loop *loop)
! {
!   bitmap refs = memory_accesses.refs_in_loop[loop->num];
!   struct loop *sloop;
!   bitmap_iterator bi;
!   unsigned i;
!   mem_ref_p ref;
! 
!   EXECUTE_IF_SET_IN_BITMAP (refs, 0, i, bi)
!     {
!       ref = memory_accesses.refs_list[i];
!       for (sloop = loop; sloop != current_loops->tree_root;
! 	   sloop = loop_outer (sloop))
! 	if (bitmap_bit_p (ref->stored, loop->num))
! 	  {
! 	    bitmap refs_stored
! 	      = memory_accesses.all_refs_stored_in_loop[sloop->num];
! 	    bitmap_set_bit (refs_stored, ref->id);
! 	  }
!     }
! }
! 
! /* For each non-clobbered virtual operand and each loop, record the memory
!    references in this loop that touch the operand.  */
! 
! static void
! create_vop_ref_mapping (void)
! {
!   loop_iterator li;
!   struct loop *loop;
! 
!   FOR_EACH_LOOP (li, loop, 0)
!     {
!       create_vop_ref_mapping_loop (loop);
      }
  }
  
--- 1648,1669 ----
       the loop hierarchy.  */
    FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
      {
!       /* Finalize the overall touched references (including subloops).  */
!       bitmap_ior_into (memory_accesses.all_refs_in_loop[loop->num],
! 		       memory_accesses.refs_in_loop[loop->num]);
!       bitmap_ior_into (memory_accesses.all_refs_stored_in_loop[loop->num],
! 		       memory_accesses.refs_stored_in_loop[loop->num]);
! 
!       /* Propagate the information about accessed memory references up
! 	 the loop hierarchy.  */
!       outer = loop_outer (loop);
!       if (outer == current_loops->tree_root)
  	continue;
  
!       bitmap_ior_into (memory_accesses.all_refs_in_loop[outer->num],
! 		       memory_accesses.all_refs_in_loop[loop->num]);
!       bitmap_ior_into (memory_accesses.all_refs_stored_in_loop[outer->num],
! 		       memory_accesses.all_refs_stored_in_loop[loop->num]);
      }
  }
  
*************** analyze_memory_references (void)
*** 1707,1712 ****
--- 1682,1688 ----
      (mem_ref_alloc (error_mark_node, 0, UNANALYZABLE_MEM_ID));
  
    memory_accesses.refs_in_loop.create (number_of_loops ());
+   memory_accesses.refs_stored_in_loop.create (number_of_loops ());
    memory_accesses.all_refs_in_loop.create (number_of_loops ());
    memory_accesses.all_refs_stored_in_loop.create (number_of_loops ());
  
*************** analyze_memory_references (void)
*** 1715,1720 ****
--- 1691,1698 ----
        empty = BITMAP_ALLOC (&lim_bitmap_obstack);
        memory_accesses.refs_in_loop.quick_push (empty);
        empty = BITMAP_ALLOC (&lim_bitmap_obstack);
+       memory_accesses.refs_stored_in_loop.quick_push (empty);
+       empty = BITMAP_ALLOC (&lim_bitmap_obstack);
        memory_accesses.all_refs_in_loop.quick_push (empty);
        empty = BITMAP_ALLOC (&lim_bitmap_obstack);
        memory_accesses.all_refs_stored_in_loop.quick_push (empty);
*************** analyze_memory_references (void)
*** 1723,1729 ****
    memory_accesses.ttae_cache = NULL;
  
    gather_mem_refs_in_loops ();
-   create_vop_ref_mapping ();
  }
  
  /* Returns true if MEM1 and MEM2 may alias.  TTAE_CACHE is used as a cache in
--- 1701,1706 ----
*************** refs_independent_p (mem_ref_p ref1, mem_
*** 2299,2332 ****
      }
  }
  
! /* Records the information whether REF is independent in LOOP (according
!    to INDEP).  */
  
  static void
! record_indep_loop (struct loop *loop, mem_ref_p ref, bool indep)
  {
!   if (indep)
!     bitmap_set_bit (ref->indep_loop, loop->num);
!   else
!     bitmap_set_bit (ref->dep_loop, loop->num);
  }
  
  /* Returns true if REF is independent on all other memory references in
     LOOP.  */
  
  static bool
! ref_indep_loop_p_1 (struct loop *loop, mem_ref_p ref)
  {
    bitmap refs_to_check;
    unsigned i;
    bitmap_iterator bi;
-   bool ret = true, stored = bitmap_bit_p (ref->stored, loop->num);
    mem_ref_p aref;
  
!   if (stored)
!     refs_to_check = memory_accesses.all_refs_in_loop[loop->num];
    else
!     refs_to_check = memory_accesses.all_refs_stored_in_loop[loop->num];
  
    if (bitmap_bit_p (refs_to_check, UNANALYZABLE_MEM_ID))
      return false;
--- 2276,2309 ----
      }
  }
  
! /* Mark REF dependent on stores or loads (according to STORED_P) in LOOP
!    and its super-loops.  */
  
  static void
! record_dep_loop (struct loop *loop, mem_ref_p ref, bool stored_p)
  {
!   /* We can propagate dependent-in-loop bits up the loop
!      hierarchy to all outer loops.  */
!   while (loop != current_loops->tree_root
! 	 && bitmap_set_bit (ref->dep_loop, LOOP_DEP_BIT (loop->num, stored_p)))
!     loop = loop_outer (loop);
  }
  
  /* Returns true if REF is independent on all other memory references in
     LOOP.  */
  
  static bool
! ref_indep_loop_p_1 (struct loop *loop, mem_ref_p ref, bool stored_p)
  {
    bitmap refs_to_check;
    unsigned i;
    bitmap_iterator bi;
    mem_ref_p aref;
  
!   if (stored_p)
!     refs_to_check = memory_accesses.refs_in_loop[loop->num];
    else
!     refs_to_check = memory_accesses.refs_stored_in_loop[loop->num];
  
    if (bitmap_bit_p (refs_to_check, UNANALYZABLE_MEM_ID))
      return false;
*************** ref_indep_loop_p_1 (struct loop *loop, m
*** 2335,2374 ****
      {
        aref = memory_accesses.refs_list[i];
        if (!refs_independent_p (ref, aref))
! 	{
! 	  ret = false;
! 	  record_indep_loop (loop, aref, false);
! 	  break;
! 	}
      }
  
!   return ret;
  }
  
  /* Returns true if REF is independent on all other memory references in
     LOOP.  Wrapper over ref_indep_loop_p_1, caching its results.  */
  
  static bool
! ref_indep_loop_p (struct loop *loop, mem_ref_p ref)
  {
!   bool ret;
! 
!   gcc_checking_assert (MEM_ANALYZABLE (ref));
  
!   if (bitmap_bit_p (ref->indep_loop, loop->num))
      return true;
!   if (bitmap_bit_p (ref->dep_loop, loop->num))
      return false;
  
!   ret = ref_indep_loop_p_1 (loop, ref);
  
    if (dump_file && (dump_flags & TDF_DETAILS))
      fprintf (dump_file, "Querying dependencies of ref %u in loop %d: %s\n",
! 	     ref->id, loop->num, ret ? "independent" : "dependent");
  
!   record_indep_loop (loop, ref, ret);
  
!   return ret;
  }
  
  /* Returns true if we can perform store motion of REF from LOOP.  */
--- 2312,2384 ----
      {
        aref = memory_accesses.refs_list[i];
        if (!refs_independent_p (ref, aref))
! 	return false;
      }
  
!   return true;
  }
  
  /* Returns true if REF is independent on all other memory references in
     LOOP.  Wrapper over ref_indep_loop_p_1, caching its results.  */
  
  static bool
! ref_indep_loop_p_2 (struct loop *loop, mem_ref_p ref, bool stored_p)
  {
!   stored_p |= bitmap_bit_p (ref->stored, loop->num);
  
!   if (bitmap_bit_p (ref->indep_loop, LOOP_DEP_BIT (loop->num, stored_p)))
      return true;
!   if (bitmap_bit_p (ref->dep_loop, LOOP_DEP_BIT (loop->num, stored_p)))
      return false;
  
!   struct loop *inner = loop->inner;
!   while (inner)
!     {
!       if (!ref_indep_loop_p_2 (inner, ref, stored_p))
! 	return false;
!       inner = inner->next;
!     }
! 
!   bool indep_p = ref_indep_loop_p_1 (loop, ref, stored_p);
  
    if (dump_file && (dump_flags & TDF_DETAILS))
      fprintf (dump_file, "Querying dependencies of ref %u in loop %d: %s\n",
! 	     ref->id, loop->num, indep_p ? "independent" : "dependent");
  
!   /* Record the computed result in the cache.  */
!   if (indep_p)
!     {
!       if (bitmap_set_bit (ref->indep_loop, LOOP_DEP_BIT (loop->num, stored_p))
! 	  && stored_p)
! 	{
! 	  /* If it's independend against all refs then it's independent
! 	     against stores, too.  */
! 	  bitmap_set_bit (ref->indep_loop, LOOP_DEP_BIT (loop->num, false));
! 	}
!     }
!   else
!     {
!       record_dep_loop (loop, ref, stored_p);
!       if (!stored_p)
! 	{
! 	  /* If it's dependent against stores it's dependent against
! 	     all refs, too.  */
! 	  record_dep_loop (loop, ref, true);
! 	}
!     }
  
!   return indep_p;
! }
! 
! /* Returns true if REF is independent on all other memory references in
!    LOOP.  */
! 
! static bool
! ref_indep_loop_p (struct loop *loop, mem_ref_p ref)
! {
!   gcc_checking_assert (MEM_ANALYZABLE (ref));
! 
!   return ref_indep_loop_p_2 (loop, ref, false);
  }
  
  /* Returns true if we can perform store motion of REF from LOOP.  */
*************** tree_ssa_lim_finalize (void)
*** 2619,2624 ****
--- 2629,2635 ----
    memory_accesses.refs_list.release ();
  
    memory_accesses.refs_in_loop.release ();
+   memory_accesses.refs_stored_in_loop.release ();
    memory_accesses.all_refs_in_loop.release ();
    memory_accesses.all_refs_stored_in_loop.release ();
  

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2013-03-22 11:22 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-03-22 11:22 [PATCH] Re-organize toplevel LIM dependence query Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).