public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] Address SLP cost-model issue of PR83202
@ 2017-11-29 13:55 Richard Biener
  0 siblings, 0 replies; only message in thread
From: Richard Biener @ 2017-11-29 13:55 UTC (permalink / raw)
  To: gcc-patches


SLP costing and code generation has the very old issue that it operates
on the SLP tree which, because it isn't a graph, contains redundancies.

The following patch builds upon the hashing infrastructure I added
a few months back and implements a workaround at code-generation and
costing time without changing the core data structures.

We simply can re-use vectorized stmts from a SLP node with exactly
the same set of scalar stmts.  Likewise we only have to cost them
once.

I've implemented the code-generation side to verify correctness
of the costing adjustment.  The simple example that we fail to
vectorize on x86_64 because the cost model sees it as never
profitable made me do this now (also because the implementation
turned out to be so simple).

Bootstrap and regtest in progress on x86_64-unknown-linux-gnu.

Richard.

2017-11-29  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/83202
	* tree-vect-slp.c (scalar_stmts_set_t): New typedef.
	(bst_fail): Use it.
	(vect_analyze_slp_cost_1): Add visited set, do not account SLP
	nodes vectorized to the same stmts multiple times.
	(vect_analyze_slp_cost): Allocate a visited set and pass it down.
	(vect_analyze_slp_instance): Adjust.
	(scalar_stmts_to_slp_tree_map_t): New typedef.
	(vect_schedule_slp_instance): Add a map recording the SLP node
	representing the vectorized stmts for a set of scalar stmts.
	Avoid code-generating redundancies.
	(vect_schedule_slp): Allocate map and pass it down.

	* gcc.dg/vect/costmodel/x86_64/costmodel-pr83202.c: New testcase.

Index: gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-pr83202.c
===================================================================
--- gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-pr83202.c	(nonexistent)
+++ gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-pr83202.c	(working copy)
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_double } */
+
+void test(double data[4][2])
+{
+  for (int i = 0; i < 4; i++)
+    {
+      data[i][0] = data[i][0] * data[i][0];
+      data[i][1] = data[i][1] * data[i][1];
+    }
+}
+
+/* We should vectorize this with SLP and V2DF vectors.  */
+/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" } } */
+/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } } */
Index: gcc/tree-vect-slp.c
===================================================================
--- gcc/tree-vect-slp.c	(revision 255229)
+++ gcc/tree-vect-slp.c	(working copy)
@@ -961,7 +961,8 @@ bst_traits::equal (value_type existing,
   return true;
 }
 
-static hash_set <vec <gimple *>, bst_traits> *bst_fail;
+typedef hash_set <vec <gimple *>, bst_traits> scalar_stmts_set_t;
+static scalar_stmts_set_t *bst_fail;
 
 static slp_tree
 vect_build_slp_tree_2 (vec_info *vinfo,
@@ -1674,7 +1675,8 @@ static void
 vect_analyze_slp_cost_1 (slp_instance instance, slp_tree node,
 			 stmt_vector_for_cost *prologue_cost_vec,
 			 stmt_vector_for_cost *body_cost_vec,
-			 unsigned ncopies_for_cost)
+			 unsigned ncopies_for_cost,
+			 scalar_stmts_set_t* visited)
 {
   unsigned i, j;
   slp_tree child;
@@ -1682,11 +1684,18 @@ vect_analyze_slp_cost_1 (slp_instance in
   stmt_vec_info stmt_info;
   tree lhs;
 
+  /* If we already costed the exact same set of scalar stmts we're done.
+     We share the generated vector stmts for those.  */
+  if (visited->contains (SLP_TREE_SCALAR_STMTS (node)))
+    return;
+
+  visited->add (SLP_TREE_SCALAR_STMTS (node).copy ());
+
   /* Recurse down the SLP tree.  */
   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
     if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
       vect_analyze_slp_cost_1 (instance, child, prologue_cost_vec,
-			       body_cost_vec, ncopies_for_cost);
+			       body_cost_vec, ncopies_for_cost, visited);
 
   /* Look at the first scalar stmt to determine the cost.  */
   stmt = SLP_TREE_SCALAR_STMTS (node)[0];
@@ -1871,9 +1880,11 @@ vect_analyze_slp_cost (slp_instance inst
 
   prologue_cost_vec.create (10);
   body_cost_vec.create (10);
+  scalar_stmts_set_t *visited = new scalar_stmts_set_t ();
   vect_analyze_slp_cost_1 (instance, SLP_INSTANCE_TREE (instance),
 			   &prologue_cost_vec, &body_cost_vec,
-			   ncopies_for_cost);
+			   ncopies_for_cost, visited);
+  delete visited;
 
   /* Record the prologue costs, which were delayed until we were
      sure that SLP was successful.  */
@@ -2037,7 +2048,7 @@ vect_analyze_slp_instance (vec_info *vin
   /* Build the tree for the SLP instance.  */
   bool *matches = XALLOCAVEC (bool, group_size);
   unsigned npermutes = 0;
-  bst_fail = new hash_set <vec <gimple *>, bst_traits> ();
+  bst_fail = new scalar_stmts_set_t ();
   node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
 				   &max_nunits, &loads, matches, &npermutes,
 			      NULL, max_tree_size);
@@ -3702,12 +3713,15 @@ vect_transform_slp_perm_load (slp_tree n
   return true;
 }
 
-
+typedef hash_map <vec <gimple *>, slp_tree,
+		  simple_hashmap_traits <bst_traits, slp_tree> >
+  scalar_stmts_to_slp_tree_map_t;
 
 /* Vectorize SLP instance tree in postorder.  */
 
 static bool
-vect_schedule_slp_instance (slp_tree node, slp_instance instance)
+vect_schedule_slp_instance (slp_tree node, slp_instance instance,
+			    scalar_stmts_to_slp_tree_map_t *bst_map)
 {
   gimple *stmt;
   bool grouped_store, is_store;
@@ -3721,8 +3735,19 @@ vect_schedule_slp_instance (slp_tree nod
   if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
     return false;
 
+  /* See if we have already vectorized the same set of stmts and reuse their
+     vectorized stmts.  */
+  slp_tree &leader
+    = bst_map->get_or_insert (SLP_TREE_SCALAR_STMTS (node).copy ());
+  if (leader)
+    {
+      SLP_TREE_VEC_STMTS (node).safe_splice (SLP_TREE_VEC_STMTS (leader));
+      return false;
+    }
+
+  leader = node;
   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
-    vect_schedule_slp_instance (child, instance);
+    vect_schedule_slp_instance (child, instance, bst_map);
 
   /* Push SLP node def-type to stmts.  */
   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
@@ -3894,8 +3919,11 @@ vect_schedule_slp (vec_info *vinfo)
   FOR_EACH_VEC_ELT (slp_instances, i, instance)
     {
       /* Schedule the tree of INSTANCE.  */
+      scalar_stmts_to_slp_tree_map_t *bst_map
+	= new scalar_stmts_to_slp_tree_map_t ();
       is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
-                                             instance);
+                                             instance, bst_map);
+      delete bst_map;
       if (dump_enabled_p ())
 	dump_printf_loc (MSG_NOTE, vect_location,
                          "vectorizing stmts using SLP.\n");

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2017-11-29 12:34 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-11-29 13:55 [PATCH] Address SLP cost-model issue of PR83202 Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).