public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] Improve vect cost model for PR37150
@ 2016-11-04 13:22 Richard Biener
  0 siblings, 0 replies; only message in thread
From: Richard Biener @ 2016-11-04 13:22 UTC (permalink / raw)
  To: gcc-patches


The following implements some easy improvements for the SLP cost model
for PR37150 which shows excess cost for dead loads and permutes
accounted when vectorizing a basic-block.

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

Still doesn't vectorize the testcase in that PR without 
-fno-vect-cost-model though.  Real improvements are only possible
with re-doing the vectorizer data structures.

Richard.

2016-11-04  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/37150
	* tree-vectorizer.h (vect_transform_slp_perm_load): Add n_perms
	parameter.
	* tree-vect-slp.c (vect_supported_load_permutation_p): Adjust.
	(vect_analyze_slp_cost_1): Account for the real number of
	permutations emitted and for dead loads.
	(vect_transform_slp_perm_load): Add n_perms parameter counting
	the number of emitted permutations.
	* tree-vect-stmts.c (vectorizable_load): Adjust.

Index: gcc/tree-vectorizer.h
===================================================================
--- gcc/tree-vectorizer.h	(revision 241791)
+++ gcc/tree-vectorizer.h	(working copy)
@@ -1166,7 +1168,7 @@ extern int vect_get_known_peeling_cost (
 extern void vect_free_slp_instance (slp_instance);
 extern bool vect_transform_slp_perm_load (slp_tree, vec<tree> ,
                                           gimple_stmt_iterator *, int,
-                                          slp_instance, bool);
+                                          slp_instance, bool, unsigned *);
 extern bool vect_slp_analyze_operations (vec<slp_instance> slp_instances,
 					 void *);
 extern bool vect_schedule_slp (vec_info *);
Index: gcc/tree-vect-slp.c
===================================================================
--- gcc/tree-vect-slp.c	(revision 241791)
+++ gcc/tree-vect-slp.c	(working copy)
@@ -1461,8 +1461,9 @@ vect_supported_load_permutation_p (slp_i
 	    {
 	      /* Verify the permutation can be generated.  */
 	      vec<tree> tem;
+	      unsigned n_perms;
 	      if (!vect_transform_slp_perm_load (node, tem, NULL,
-						 1, slp_instn, true))
+						 1, slp_instn, true, &n_perms))
 		{
 		  dump_printf_loc (MSG_MISSED_OPTIMIZATION,
 				   vect_location,
@@ -1475,11 +1476,13 @@ vect_supported_load_permutation_p (slp_i
     }
 
   /* For loop vectorization verify we can generate the permutation.  */
+  unsigned n_perms;
   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
     if (node->load_permutation.exists ()
 	&& !vect_transform_slp_perm_load
 	      (node, vNULL, NULL,
-	       SLP_INSTANCE_UNROLLING_FACTOR (slp_instn), slp_instn, true))
+	       SLP_INSTANCE_UNROLLING_FACTOR (slp_instn), slp_instn, true,
+	       &n_perms))
       return false;
 
   return true;
@@ -1548,14 +1551,38 @@ vect_analyze_slp_cost_1 (slp_instance in
 	      stmt = GROUP_FIRST_ELEMENT (stmt_info);
 	      stmt_info = vinfo_for_stmt (stmt);
 	      /* Record the cost for the permutation.  */
-	      record_stmt_cost (body_cost_vec, ncopies_for_cost, vec_perm,
+	      unsigned n_perms;
+	      vect_transform_slp_perm_load (node, vNULL, NULL,
+					    ncopies_for_cost, instance, true,
+					    &n_perms);
+	      record_stmt_cost (body_cost_vec, n_perms, vec_perm,
 				stmt_info, 0, vect_body);
-	      /* And adjust the number of loads performed.  */
 	      unsigned nunits
 		= TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
-	      ncopies_for_cost
-	        = (GROUP_SIZE (stmt_info) - GROUP_GAP (stmt_info)
-		   + nunits - 1) / nunits;
+	      /* And adjust the number of loads performed.  This handles
+	         redundancies as well as loads that are later dead.  */
+	      auto_sbitmap perm (GROUP_SIZE (stmt_info));
+	      bitmap_clear (perm);
+	      for (i = 0; i < SLP_TREE_LOAD_PERMUTATION (node).length (); ++i)
+		bitmap_set_bit (perm, SLP_TREE_LOAD_PERMUTATION (node)[i]);
+	      ncopies_for_cost = 0;
+	      bool load_seen = false;
+	      for (i = 0; i < GROUP_SIZE (stmt_info); ++i)
+		{
+		  if (i % nunits == 0)
+		    {
+		      if (load_seen)
+			ncopies_for_cost++;
+		      load_seen = false;
+		    }
+		  if (bitmap_bit_p (perm, i))
+		    load_seen = true;
+		}
+	      if (load_seen)
+		ncopies_for_cost++;
+	      gcc_assert (ncopies_for_cost
+			  <= (GROUP_SIZE (stmt_info) - GROUP_GAP (stmt_info)
+			      + nunits - 1) / nunits);
 	      ncopies_for_cost *= SLP_INSTANCE_UNROLLING_FACTOR (instance);
 	    }
 	  /* Record the cost for the vector loads.  */
@@ -3402,7 +3489,8 @@ vect_create_mask_and_perm (gimple *stmt,
 bool
 vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
                               gimple_stmt_iterator *gsi, int vf,
-                              slp_instance slp_node_instance, bool analyze_only)
+                              slp_instance slp_node_instance, bool analyze_only,
+			      unsigned *n_perms)
 {
   gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
@@ -3457,6 +3545,7 @@ vect_transform_slp_perm_load (slp_tree n
   int first_vec_index = -1;
   int second_vec_index = -1;
   bool noop_p = true;
+  *n_perms = 0;
 
   for (int j = 0; j < unroll_factor; j++)
     {
@@ -3513,6 +3602,9 @@ vect_transform_slp_perm_load (slp_tree n
 		  return false;
 		}
 
+	      if (! noop_p)
+		++*n_perms;
+
 	      if (!analyze_only)
 		{
 		  tree mask_vec = NULL_TREE;
Index: gcc/tree-vect-stmts.c
===================================================================
--- gcc/tree-vect-stmts.c	(revision 241791)
+++ gcc/tree-vect-stmts.c	(working copy)
@@ -6978,8 +7041,11 @@ vectorizable_load (gimple *stmt, gimple_
 	    }
 	}
       if (slp_perm)
-	vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
-				      slp_node_instance, false);
+	{
+	  unsigned n_perms;
+	  vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
+					slp_node_instance, false, &n_perms);
+	}
       return true;
     }
 
@@ -7497,8 +7563,10 @@ vectorizable_load (gimple *stmt, gimple_
 
       if (slp_perm)
         {
+	  unsigned n_perms;
           if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
-                                             slp_node_instance, false))
+                                             slp_node_instance, false,
+					     &n_perms))
             {
               dr_chain.release ();
               return false;

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2016-11-04 13:22 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-11-04 13:22 [PATCH] Improve vect cost model for PR37150 Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).