public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] Improve BB vectorization dependence analysis
@ 2015-11-09 12:55 Richard Biener
  2015-11-16 18:35 ` Alan Lawrence
  2015-11-23  4:37 ` H.J. Lu
  0 siblings, 2 replies; 5+ messages in thread
From: Richard Biener @ 2015-11-09 12:55 UTC (permalink / raw)
  To: gcc-patches


Currently BB vectorization computes all dependences inside a BB
region and fails all vectorization if it cannot handle some of them.

This is obviously not needed - BB vectorization can restrict the
dependence tests to those that are needed to apply the load/store
motion effectively performed by the vectorization (sinking all
participating loads/stores to the place of the last one).

With restructuring it that way it's also easy to not give up completely
but only for the SLP instance we cannot vectorize (this gives
a slight bump in my SPEC CPU 2006 testing to 756 vectorized basic
block regions).

But first and foremost this patch is to reduce the dependence analysis
cost and somewhat mitigate the compile-time effects of the first patch.

For fixing PR56118 only a cost model issue remains.

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.

Richard.

2015-11-09  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/56118
	* tree-vectorizer.h (vect_find_last_scalar_stmt_in_slp): Declare.
	* tree-vect-slp.c (vect_find_last_scalar_stmt_in_slp): Export.
	* tree-vect-data-refs.c (vect_slp_analyze_node_dependences): New
	function.
	(vect_slp_analyze_data_ref_dependences): Instead of computing
	all dependences of the region DRs just analyze the code motions
	SLP vectorization will perform.  Remove SLP instances that
	cannot have their store/load motions applied.
	(vect_analyze_data_refs): Allow DRs without a vectype
	in BB vectorization.

	* gcc.dg/vect/no-tree-sra-bb-slp-pr50730.c: Adjust.

Index: gcc/tree-vectorizer.h
===================================================================
*** gcc/tree-vectorizer.h.orig	2015-11-09 11:01:55.688175321 +0100
--- gcc/tree-vectorizer.h	2015-11-09 11:02:18.987432840 +0100
*************** extern void vect_detect_hybrid_slp (loop
*** 1075,1080 ****
--- 1075,1081 ----
  extern void vect_get_slp_defs (vec<tree> , slp_tree,
  			       vec<vec<tree> > *, int);
  extern bool vect_slp_bb (basic_block);
+ extern gimple *vect_find_last_scalar_stmt_in_slp (slp_tree);
  
  /* In tree-vect-patterns.c.  */
  /* Pattern recognition functions.
Index: gcc/tree-vect-data-refs.c
===================================================================
*** gcc/tree-vect-data-refs.c.orig	2015-11-09 10:22:33.140125722 +0100
--- gcc/tree-vect-data-refs.c	2015-11-09 11:33:05.503874719 +0100
*************** vect_slp_analyze_data_ref_dependence (st
*** 581,586 ****
--- 581,629 ----
  }
  
  
+ /* Analyze dependences involved in the transform of SLP NODE.  */
+ 
+ static bool
+ vect_slp_analyze_node_dependences (slp_instance instance, slp_tree node)
+ {
+   /* This walks over all stmts involved in the SLP load/store done
+      in NODE verifying we can sink them up to the last stmt in the
+      group.  */
+   gimple *last_access = vect_find_last_scalar_stmt_in_slp (node);
+   for (unsigned k = 0; k < SLP_INSTANCE_GROUP_SIZE (instance); ++k)
+     {
+       gimple *access = SLP_TREE_SCALAR_STMTS (node)[k];
+       if (access == last_access)
+ 	continue;
+       stmt_vec_info access_stmt_info = vinfo_for_stmt (access);
+       gimple_stmt_iterator gsi = gsi_for_stmt (access);
+       gsi_next (&gsi);
+       for (; gsi_stmt (gsi) != last_access; gsi_next (&gsi))
+ 	{
+ 	  gimple *stmt = gsi_stmt (gsi);
+ 	  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+ 	  if (!STMT_VINFO_DATA_REF (stmt_info)
+ 	      || (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info))
+ 		  && DR_IS_READ (STMT_VINFO_DATA_REF (access_stmt_info))))
+ 	    continue;
+ 
+ 	  ddr_p ddr = initialize_data_dependence_relation
+ 	      (STMT_VINFO_DATA_REF (access_stmt_info),
+ 	       STMT_VINFO_DATA_REF (stmt_info), vNULL);
+ 	  if (vect_slp_analyze_data_ref_dependence (ddr))
+ 	    {
+ 	      /* ???  If the dependence analysis failed we can resort to the
+ 		 alias oracle which can handle more kinds of stmts.  */
+ 	      free_dependence_relation (ddr);
+ 	      return false;
+ 	    }
+ 	  free_dependence_relation (ddr);
+ 	}
+     }
+   return true;
+ }
+ 
+ 
  /* Function vect_analyze_data_ref_dependences.
  
     Examine all the data references in the basic-block, and make sure there
*************** vect_slp_analyze_data_ref_dependence (st
*** 590,610 ****
  bool
  vect_slp_analyze_data_ref_dependences (bb_vec_info bb_vinfo)
  {
-   struct data_dependence_relation *ddr;
-   unsigned int i;
- 
    if (dump_enabled_p ())
      dump_printf_loc (MSG_NOTE, vect_location,
                       "=== vect_slp_analyze_data_ref_dependences ===\n");
  
!   if (!compute_all_dependences (BB_VINFO_DATAREFS (bb_vinfo),
! 				&BB_VINFO_DDRS (bb_vinfo),
! 				vNULL, true))
!     return false;
  
!   FOR_EACH_VEC_ELT (BB_VINFO_DDRS (bb_vinfo), i, ddr)
!     if (vect_slp_analyze_data_ref_dependence (ddr))
!       return false;
  
    return true;
  }
--- 633,677 ----
  bool
  vect_slp_analyze_data_ref_dependences (bb_vec_info bb_vinfo)
  {
    if (dump_enabled_p ())
      dump_printf_loc (MSG_NOTE, vect_location,
                       "=== vect_slp_analyze_data_ref_dependences ===\n");
  
!   slp_instance instance;
!   slp_tree load;
!   unsigned int i, j;
!   for (i = 0; BB_VINFO_SLP_INSTANCES (bb_vinfo).iterate (i, &instance); )
!     {
!       bool remove = false;
!       /* Verify we can sink loads to the vectorized stmt insert location.  */
!       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), j, load)
! 	if (! vect_slp_analyze_node_dependences (instance, load))
! 	  {
! 	    remove = true;
! 	    break;
! 	  }
!       /* Verify we can sink stores to the vectorized stmt insert location.  */
!       slp_tree store = SLP_INSTANCE_TREE (instance);
!       if (!remove
! 	  && STMT_VINFO_DATA_REF
! 		(vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (store)[0]))
! 	  && ! vect_slp_analyze_node_dependences (instance, store))
! 	remove = true;
!       if (remove)
! 	{
! 	  dump_printf_loc (MSG_NOTE, vect_location,
! 			   "removing SLP instance operations starting from: ");
! 	  dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
! 			    SLP_TREE_SCALAR_STMTS
! 			      (SLP_INSTANCE_TREE (instance))[0], 0);
! 	  vect_free_slp_instance (instance);
! 	  BB_VINFO_SLP_INSTANCES (bb_vinfo).ordered_remove (i);
! 	}
!       i++;
!     }
  
!   if (!BB_VINFO_SLP_INSTANCES (bb_vinfo).length ())
!     return false;
  
    return true;
  }
*************** again:
*** 3715,3721 ****
              }
  
            if (is_a <bb_vec_info> (vinfo))
! 	    break;
  
  	  if (gatherscatter != SG_NONE || simd_lane_access)
  	    {
--- 3782,3793 ----
              }
  
            if (is_a <bb_vec_info> (vinfo))
! 	    {
! 	      /* No vector type is fine, the ref can still participate
! 	         in dependence analysis, we just can't vectorize it.  */
! 	      STMT_VINFO_VECTORIZABLE (stmt_info) = false;
! 	      continue;
! 	    }
  
  	  if (gatherscatter != SG_NONE || simd_lane_access)
  	    {
Index: gcc/tree-vect-slp.c
===================================================================
*** gcc/tree-vect-slp.c.orig	2015-11-09 11:01:55.720175675 +0100
--- gcc/tree-vect-slp.c	2015-11-09 11:02:19.004433028 +0100
*************** vect_supported_load_permutation_p (slp_i
*** 1426,1432 ****
  
  /* Find the last store in SLP INSTANCE.  */
  
! static gimple *
  vect_find_last_scalar_stmt_in_slp (slp_tree node)
  {
    gimple *last = NULL, *stmt;
--- 1426,1432 ----
  
  /* Find the last store in SLP INSTANCE.  */
  
! gimple *
  vect_find_last_scalar_stmt_in_slp (slp_tree node)
  {
    gimple *last = NULL, *stmt;
Index: gcc/testsuite/gcc.dg/vect/no-tree-sra-bb-slp-pr50730.c
===================================================================
*** gcc/testsuite/gcc.dg/vect/no-tree-sra-bb-slp-pr50730.c.orig	2015-11-06 12:11:17.347076131 +0100
--- gcc/testsuite/gcc.dg/vect/no-tree-sra-bb-slp-pr50730.c	2015-11-09 11:02:19.061433658 +0100
*************** A sum(A a,A b)
*** 13,16 ****
    return a;
  }
  
! /* { dg-final { scan-tree-dump-times "not vectorized: more than one data ref in stmt" 0 "slp2" } } */
--- 13,16 ----
    return a;
  }
  
! /* { dg-final { scan-tree-dump-times "basic block vectorized" 1 "slp2" } } */

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2015-11-23  2:46 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-11-09 12:55 [PATCH] Improve BB vectorization dependence analysis Richard Biener
2015-11-16 18:35 ` Alan Lawrence
2015-11-17  9:54   ` Richard Biener
2015-11-17 11:04     ` Richard Biener
2015-11-23  4:37 ` H.J. Lu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).