public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/users/rguenth/heads/slp-reorg)] cost SLP invariant nodes directly
@ 2020-03-25 14:22 Richard Biener
  0 siblings, 0 replies; 5+ messages in thread
From: Richard Biener @ 2020-03-25 14:22 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:776665ba543ed685990a94045047576b35f90def

commit 776665ba543ed685990a94045047576b35f90def
Author: Richard Biener <rguenther@suse.de>
Date:   Mon Mar 16 15:06:56 2020 +0100

    cost SLP invariant nodes directly
    
    This costs SLP nodes for externals and invariants separately from
    the stmts using the invariants.  This properly deals with eventual
    CSE of them and is more in-line with the rest of the SLP operation.

Diff:
---
 gcc/config/i386/i386.c |   5 +-
 gcc/target.def         |   2 +-
 gcc/targhooks.c        |   5 +-
 gcc/targhooks.h        |   2 +-
 gcc/tree-vect-loop.c   |  46 +++++++++---------
 gcc/tree-vect-slp.c    |  73 ++++++++++++++++++++++++++--
 gcc/tree-vect-stmts.c  | 128 ++++++++++++-------------------------------------
 gcc/tree-vectorizer.c  |   2 +-
 gcc/tree-vectorizer.h  |  18 +++++--
 9 files changed, 145 insertions(+), 136 deletions(-)

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 48b957bc0ba..422c92aaf6a 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -21876,15 +21876,14 @@ ix86_init_cost (class loop *)
 static unsigned
 ix86_add_stmt_cost (class vec_info *vinfo, void *data, int count,
 		    enum vect_cost_for_stmt kind,
-		    class _stmt_vec_info *stmt_info, int misalign,
+		    class _stmt_vec_info *stmt_info, tree vectype,
+		    int misalign,
 		    enum vect_cost_model_location where)
 {
   unsigned *cost = (unsigned *) data;
   unsigned retval = 0;
   bool scalar_p
     = (kind == scalar_stmt || kind == scalar_load || kind == scalar_store);
-
-  tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
   int stmt_cost = - 1;
 
   bool fp = false;
diff --git a/gcc/target.def b/gcc/target.def
index f8d26e63021..d907b59e484 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -2031,7 +2031,7 @@ DEFHOOK
  "revised.",
  unsigned,
  (class vec_info *, void *data, int count, enum vect_cost_for_stmt kind,
-  class _stmt_vec_info *stmt_info, int misalign,
+  class _stmt_vec_info *stmt_info, tree vectype, int misalign,
   enum vect_cost_model_location where),
  default_add_stmt_cost)
 
diff --git a/gcc/targhooks.c b/gcc/targhooks.c
index 4caab8cfbfa..3f57c10b1b0 100644
--- a/gcc/targhooks.c
+++ b/gcc/targhooks.c
@@ -1350,13 +1350,12 @@ default_init_cost (class loop *loop_info ATTRIBUTE_UNUSED)
 unsigned
 default_add_stmt_cost (class vec_info *vinfo, void *data, int count,
 		       enum vect_cost_for_stmt kind,
-		       class _stmt_vec_info *stmt_info, int misalign,
+		       class _stmt_vec_info *stmt_info, tree vectype,
+		       int misalign,
 		       enum vect_cost_model_location where)
 {
   unsigned *cost = (unsigned *) data;
   unsigned retval = 0;
-
-  tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
   int stmt_cost = targetm.vectorize.builtin_vectorization_cost (kind, vectype,
 								misalign);
    /* Statements in an inner loop relative to the loop being
diff --git a/gcc/targhooks.h b/gcc/targhooks.h
index 72f3064e8f8..af7cb536640 100644
--- a/gcc/targhooks.h
+++ b/gcc/targhooks.h
@@ -119,7 +119,7 @@ extern opt_machine_mode default_get_mask_mode (machine_mode);
 extern bool default_empty_mask_is_expensive (unsigned);
 extern void *default_init_cost (class loop *);
 extern unsigned default_add_stmt_cost (void *, int, enum vect_cost_for_stmt,
-				       class _stmt_vec_info *, int,
+				       class _stmt_vec_info *, tree, int,
 				       enum vect_cost_model_location);
 extern void default_finish_cost (void *, unsigned *, unsigned *, unsigned *);
 extern void default_destroy_cost_data (void *);
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index d1ad1990bb1..d5d9cb6fb56 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -1124,8 +1124,8 @@ vect_compute_single_scalar_iteration_cost (loop_vec_info loop_vinfo)
   FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
 		    j, si)
     (void) add_stmt_cost (loop_vinfo, target_cost_data, si->count,
-			  si->kind, si->stmt_info, si->misalign,
-			  vect_body);
+			  si->kind, si->stmt_info, si->vectype,
+			  si->misalign, vect_body);
   unsigned dummy, body_cost = 0;
   finish_cost (target_cost_data, &dummy, &body_cost, &dummy);
   destroy_cost_data (target_cost_data);
@@ -3295,9 +3295,9 @@ vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue,
       /* If peeled iterations are known but number of scalar loop
          iterations are unknown, count a taken branch per peeled loop.  */
       retval = record_stmt_cost (prologue_cost_vec, 1, cond_branch_taken,
-				 NULL, 0, vect_prologue);
+				 NULL, NULL_TREE, 0, vect_prologue);
       retval += record_stmt_cost (epilogue_cost_vec, 1, cond_branch_taken,
-				  NULL, 0, vect_epilogue);
+				  NULL, NULL_TREE, 0, vect_epilogue);
     }
   else
     {
@@ -3378,7 +3378,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
       /*  FIXME: Make cost depend on complexity of individual check.  */
       unsigned len = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).length ();
       (void) add_stmt_cost (loop_vinfo, target_cost_data, len, vector_stmt,
-			    NULL, 0, vect_prologue);
+			    NULL, NULL_TREE, 0, vect_prologue);
       if (dump_enabled_p ())
 	dump_printf (MSG_NOTE,
 		     "cost model: Adding cost of checks for loop "
@@ -3391,12 +3391,12 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
       /*  FIXME: Make cost depend on complexity of individual check.  */
       unsigned len = LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo).length ();
       (void) add_stmt_cost (loop_vinfo, target_cost_data, len, vector_stmt,
-			    NULL, 0, vect_prologue);
+			    NULL, NULL_TREE, 0, vect_prologue);
       len = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).length ();
       if (len)
 	/* Count LEN - 1 ANDs and LEN comparisons.  */
 	(void) add_stmt_cost (loop_vinfo, target_cost_data, len * 2 - 1,
-			      scalar_stmt, NULL, 0, vect_prologue);
+			      scalar_stmt, NULL, NULL_TREE, 0, vect_prologue);
       len = LOOP_VINFO_LOWER_BOUNDS (loop_vinfo).length ();
       if (len)
 	{
@@ -3407,7 +3407,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
 	    if (!LOOP_VINFO_LOWER_BOUNDS (loop_vinfo)[i].unsigned_p)
 	      nstmts += 1;
 	  (void) add_stmt_cost (loop_vinfo, target_cost_data, nstmts,
-				scalar_stmt, NULL, 0, vect_prologue);
+				scalar_stmt, NULL, NULL_TREE, 0, vect_prologue);
 	}
       if (dump_enabled_p ())
 	dump_printf (MSG_NOTE,
@@ -3420,7 +3420,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
     {
       /*  FIXME: Make cost depend on complexity of individual check.  */
       (void) add_stmt_cost (loop_vinfo, target_cost_data, 1, vector_stmt,
-			    NULL, 0, vect_prologue);
+			    NULL, NULL_TREE, 0, vect_prologue);
       if (dump_enabled_p ())
 	dump_printf (MSG_NOTE,
 		     "cost model: Adding cost of checks for loop "
@@ -3429,7 +3429,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
 
   if (LOOP_REQUIRES_VERSIONING (loop_vinfo))
     (void) add_stmt_cost (loop_vinfo, target_cost_data, 1, cond_branch_taken,
-			  NULL, 0, vect_prologue);
+			  NULL, NULL_TREE, 0, vect_prologue);
 
   /* Count statements in scalar loop.  Using this as scalar cost for a single
      iteration for now.
@@ -3465,8 +3465,8 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
 	  FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
 			    j, si)
 	    (void) add_stmt_cost (loop_vinfo, target_cost_data, si->count,
-				  si->kind, si->stmt_info, si->misalign,
-				  vect_epilogue);
+				  si->kind, si->stmt_info, si->vectype,
+				  si->misalign, vect_epilogue);
 	}
 
       /* Calculate how many masks we need to generate.  */
@@ -3492,10 +3492,10 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
 	 probably better not to vectorize.  */
       (void) add_stmt_cost (loop_vinfo,
 			    target_cost_data, num_masks, vector_stmt,
-			    NULL, 0, vect_prologue);
+			    NULL, NULL_TREE, 0, vect_prologue);
       (void) add_stmt_cost (loop_vinfo,
 			    target_cost_data, num_masks - 1, vector_stmt,
-			    NULL, 0, vect_body);
+			    NULL, NULL_TREE, 0, vect_body);
     }
   else if (npeel < 0)
     {
@@ -3517,26 +3517,28 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
          vector iterations are not known since peeled prologue iterations are
          not known. Hence guards remain the same.  */
       (void) add_stmt_cost (loop_vinfo, target_cost_data, 1, cond_branch_taken,
-			    NULL, 0, vect_prologue);
+			    NULL, NULL_TREE, 0, vect_prologue);
       (void) add_stmt_cost (loop_vinfo,
 			    target_cost_data, 1, cond_branch_not_taken,
-			    NULL, 0, vect_prologue);
+			    NULL, NULL_TREE, 0, vect_prologue);
       (void) add_stmt_cost (loop_vinfo, target_cost_data, 1, cond_branch_taken,
-			    NULL, 0, vect_epilogue);
+			    NULL, NULL_TREE, 0, vect_epilogue);
       (void) add_stmt_cost (loop_vinfo,
 			    target_cost_data, 1, cond_branch_not_taken,
-			    NULL, 0, vect_epilogue);
+			    NULL, NULL_TREE, 0, vect_epilogue);
       stmt_info_for_cost *si;
       int j;
       FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo), j, si)
 	{
 	  (void) add_stmt_cost (loop_vinfo, target_cost_data,
 				si->count * peel_iters_prologue,
-				si->kind, si->stmt_info, si->misalign,
+				si->kind, si->stmt_info, si->vectype,
+				si->misalign,
 				vect_prologue);
 	  (void) add_stmt_cost (loop_vinfo, target_cost_data,
 				si->count * peel_iters_epilogue,
-				si->kind, si->stmt_info, si->misalign,
+				si->kind, si->stmt_info, si->vectype,
+				si->misalign,
 				vect_epilogue);
 	}
     }
@@ -3561,12 +3563,12 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
       FOR_EACH_VEC_ELT (prologue_cost_vec, j, si)
 	(void) add_stmt_cost (loop_vinfo,
 			      data, si->count, si->kind, si->stmt_info,
-			      si->misalign, vect_prologue);
+			      si->vectype, si->misalign, vect_prologue);
 
       FOR_EACH_VEC_ELT (epilogue_cost_vec, j, si)
 	(void) add_stmt_cost (loop_vinfo,
 			      data, si->count, si->kind, si->stmt_info,
-			      si->misalign, vect_epilogue);
+			      si->vectype, si->misalign, vect_epilogue);
 
       prologue_cost_vec.release ();
       epilogue_cost_vec.release ();
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index 00e43e0d0b5..a1f08814a62 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -2796,6 +2796,69 @@ vect_slp_convert_to_external (vec_info *vinfo, slp_tree node,
   return true;
 }
 
+/* Compute the prologue cost for invariant or constant operands represented
+   by NODE.  */
+
+static void
+vect_prologue_cost_for_slp (vec_info *vinfo,
+			    slp_tree node,
+			    stmt_vector_for_cost *cost_vec)
+{
+  tree op = SLP_TREE_SCALAR_OPS (node)[0];
+
+  /* Without looking at the actual initializer a vector of
+     constants can be implemented as load from the constant pool.
+     When all elements are the same we can use a splat.  */
+  tree vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), node);
+  unsigned group_size = SLP_TREE_SCALAR_OPS (node).length ();
+  unsigned num_vects_to_check;
+  unsigned HOST_WIDE_INT const_nunits;
+  unsigned nelt_limit;
+  if (TYPE_VECTOR_SUBPARTS (vectype).is_constant (&const_nunits)
+      && ! multiple_p (const_nunits, group_size))
+    {
+      num_vects_to_check = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
+      nelt_limit = const_nunits;
+    }
+  else
+    {
+      /* If either the vector has variable length or the vectors
+	 are composed of repeated whole groups we only need to
+	 cost construction once.  All vectors will be the same.  */
+      num_vects_to_check = 1;
+      nelt_limit = group_size;
+    }
+  tree elt = NULL_TREE;
+  unsigned nelt = 0;
+  for (unsigned j = 0; j < num_vects_to_check * nelt_limit; ++j)
+    {
+      unsigned si = j % group_size;
+      if (nelt == 0)
+	elt = SLP_TREE_SCALAR_OPS (node)[si];
+      /* ???  We're just tracking whether all operands of a single
+	 vector initializer are the same, ideally we'd check if
+	 we emitted the same one already.  */
+      /* ???  Instead cost invariants/externals via
+	 vect_slp_analyze_node_operations.  */
+      else if (elt != SLP_TREE_SCALAR_OPS (node)[si])
+	elt = NULL_TREE;
+      nelt++;
+      if (nelt == nelt_limit)
+	{
+	  /* ???  We need to pass down stmt_info for a vector type
+	     even if it points to the wrong stmt.  But here we don't
+	     have one so we should amend things to record a vector
+	     type directly.  */
+	  record_stmt_cost (cost_vec, 1,
+			    SLP_TREE_DEF_TYPE (node)
+			    ? (elt ? scalar_to_vec : vec_construct)
+			    : vector_load,
+			    vectype, 0, vect_prologue);
+	  nelt = 0;
+	}
+    }
+}
+
 /* Analyze statements contained in SLP tree NODE after recursively analyzing
    the subtree.  NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
 
@@ -2811,9 +2874,6 @@ vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
   int i, j;
   slp_tree child;
 
-  if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
-    return true;
-
   /* If we already analyzed the exact same set of scalar stmts we're done.
      We share the generated vector stmts for those.
      The SLP graph is acyclic so not caching whether we failed or succeeded
@@ -2823,6 +2883,13 @@ vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
       || lvisited.add (node))
     return true;
 
+  if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
+    {
+      /* Assume we can code-generate all invariants, but cost them.  */
+      vect_prologue_cost_for_slp (vinfo, node, cost_vec);
+      return true;
+    }
+
   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
     if (!vect_slp_analyze_node_operations (vinfo, child, node_instance,
 					   visited, lvisited, cost_vec))
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 438835c7e04..1961cdcc5f8 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -92,7 +92,8 @@ stmt_in_inner_loop_p (vec_info *vinfo, class _stmt_vec_info *stmt_info)
 unsigned
 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
 		  enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
-		  int misalign, enum vect_cost_model_location where)
+		  tree vectype, int misalign,
+		  enum vect_cost_model_location where)
 {
   if ((kind == vector_load || kind == unaligned_load)
       && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
@@ -101,14 +102,37 @@ record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
       && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
     kind = vector_scatter_store;
 
-  stmt_info_for_cost si = { count, kind, where, stmt_info, misalign };
+  stmt_info_for_cost si = { count, kind, where, stmt_info, vectype, misalign };
   body_cost_vec->safe_push (si);
 
-  tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
   return (unsigned)
       (builtin_vectorization_cost (kind, vectype, misalign) * count);
 }
 
+/* Overload with implicit vectype through STMT_INFO.  */
+
+unsigned
+record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
+		  enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
+		  int misalign, enum vect_cost_model_location where)
+{
+  return record_stmt_cost (body_cost_vec, count, kind,
+			   stmt_info, STMT_VINFO_VECTYPE (stmt_info),
+			   misalign, where);
+}
+
+/* Overload with implicit NULL stmt_vec_info but explicit vector type.  */
+
+unsigned
+record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
+		  enum vect_cost_for_stmt kind, tree vectype,
+		  int misalign, enum vect_cost_model_location where)
+{
+  return record_stmt_cost (body_cost_vec, count, kind, NULL, vectype,
+			   misalign, where);
+}
+
+
 /* Return a variable of type ELEM_TYPE[NELEMS].  */
 
 static tree
@@ -786,71 +810,6 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
   return opt_result::success ();
 }
 
-/* Compute the prologue cost for invariant or constant operands.  */
-
-static unsigned
-vect_prologue_cost_for_slp_op (vec_info *vinfo,
-			       slp_tree node, stmt_vec_info stmt_info,
-			       unsigned opno, enum vect_def_type dt,
-			       stmt_vector_for_cost *cost_vec)
-{
-  gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt;
-  tree op = gimple_op (stmt, opno);
-  unsigned prologue_cost = 0;
-
-  /* Without looking at the actual initializer a vector of
-     constants can be implemented as load from the constant pool.
-     When all elements are the same we can use a splat.  */
-  tree vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), node);
-  unsigned group_size = SLP_TREE_SCALAR_STMTS (node).length ();
-  unsigned num_vects_to_check;
-  unsigned HOST_WIDE_INT const_nunits;
-  unsigned nelt_limit;
-  if (TYPE_VECTOR_SUBPARTS (vectype).is_constant (&const_nunits)
-      && ! multiple_p (const_nunits, group_size))
-    {
-      num_vects_to_check = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
-      nelt_limit = const_nunits;
-    }
-  else
-    {
-      /* If either the vector has variable length or the vectors
-	 are composed of repeated whole groups we only need to
-	 cost construction once.  All vectors will be the same.  */
-      num_vects_to_check = 1;
-      nelt_limit = group_size;
-    }
-  tree elt = NULL_TREE;
-  unsigned nelt = 0;
-  for (unsigned j = 0; j < num_vects_to_check * nelt_limit; ++j)
-    {
-      unsigned si = j % group_size;
-      if (nelt == 0)
-	elt = gimple_op (SLP_TREE_SCALAR_STMTS (node)[si]->stmt, opno);
-      /* ???  We're just tracking whether all operands of a single
-	 vector initializer are the same, ideally we'd check if
-	 we emitted the same one already.  */
-      else if (elt != gimple_op (SLP_TREE_SCALAR_STMTS (node)[si]->stmt,
-				 opno))
-	elt = NULL_TREE;
-      nelt++;
-      if (nelt == nelt_limit)
-	{
-	  /* ???  We need to pass down stmt_info for a vector type
-	     even if it points to the wrong stmt.  */
-	  prologue_cost += record_stmt_cost
-	      (cost_vec, 1,
-	       dt == vect_external_def
-	       ? (elt ? scalar_to_vec : vec_construct)
-	       : vector_load,
-	       stmt_info, 0, vect_prologue);
-	  nelt = 0;
-	}
-    }
-
-  return prologue_cost;
-}
-
 /* Function vect_model_simple_cost.
 
    Models cost for simple operations, i.e. those that only emit ncopies of a
@@ -858,7 +817,7 @@ vect_prologue_cost_for_slp_op (vec_info *vinfo,
    be generated for the single vector op.  We will handle that shortly.  */
 
 static void
-vect_model_simple_cost (vec_info *vinfo,
+vect_model_simple_cost (vec_info *,
 			stmt_vec_info stmt_info, int ncopies,
 			enum vect_def_type *dt,
 			int ndts,
@@ -874,27 +833,7 @@ vect_model_simple_cost (vec_info *vinfo,
   if (node)
     ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
 
-  if (node)
-    {
-      /* Scan operands and account for prologue cost of constants/externals.
-	 ???  This over-estimates cost for multiple uses and should be
-	 re-engineered.  */
-      gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt;
-      tree lhs = gimple_get_lhs (stmt);
-      for (unsigned i = 0; i < gimple_num_ops (stmt); ++i)
-	{
-	  tree op = gimple_op (stmt, i);
-	  enum vect_def_type dt;
-	  if (!op || op == lhs)
-	    continue;
-	  if (vect_is_simple_use (op, vinfo, &dt)
-	      && (dt == vect_constant_def || dt == vect_external_def))
-	    prologue_cost += vect_prologue_cost_for_slp_op (vinfo, node,
-							    stmt_info,
-							    i, dt, cost_vec);
-	}
-    }
-  else
+  if (!node)
     /* Cost the "broadcast" of a scalar operand in to a vector operand.
        Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
        cost model.  */
@@ -995,7 +934,6 @@ cfun_returns (tree decl)
 
 static void
 vect_model_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
-		       enum vect_def_type dt,
 		       vect_memory_access_type memory_access_type,
 		       vec_load_store_type vls_type, slp_tree slp_node,
 		       stmt_vector_for_cost *cost_vec)
@@ -1010,11 +948,7 @@ vect_model_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
 
   if (vls_type == VLS_STORE_INVARIANT)
     {
-      if (slp_node)
-	prologue_cost += vect_prologue_cost_for_slp_op (vinfo, slp_node,
-							stmt_info,
-							1, dt, cost_vec);
-      else
+      if (!slp_node)
 	prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
 					   stmt_info, 0, vect_prologue);
     }
@@ -7524,7 +7458,7 @@ vectorizable_store (vec_info *vinfo,
 				  memory_access_type, &gs_info, mask);
 
       STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
-      vect_model_store_cost (vinfo, stmt_info, ncopies, rhs_dt,
+      vect_model_store_cost (vinfo, stmt_info, ncopies,
 			     memory_access_type, vls_type, slp_node, cost_vec);
       return true;
     }
diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c
index 41ff6791966..584f13095ab 100644
--- a/gcc/tree-vectorizer.c
+++ b/gcc/tree-vectorizer.c
@@ -99,7 +99,7 @@ auto_purge_vect_location::~auto_purge_vect_location ()
 
 void
 dump_stmt_cost (FILE *f, void *data, int count, enum vect_cost_for_stmt kind,
-		stmt_vec_info stmt_info, int misalign, unsigned cost,
+		stmt_vec_info stmt_info, tree, int misalign, unsigned cost,
 		enum vect_cost_model_location where)
 {
   fprintf (f, "%p ", data);
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 20784251b6f..5d1371d4efd 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -99,6 +99,7 @@ struct stmt_info_for_cost {
   enum vect_cost_for_stmt kind;
   enum vect_cost_model_location where;
   stmt_vec_info stmt_info;
+  tree vectype;
   int misalign;
 };
 
@@ -1355,7 +1356,7 @@ init_cost (class loop *loop_info)
 }
 
 extern void dump_stmt_cost (FILE *, void *, int, enum vect_cost_for_stmt,
-			    stmt_vec_info, int, unsigned,
+			    stmt_vec_info, tree, int, unsigned,
 			    enum vect_cost_model_location);
 
 /* Alias targetm.vectorize.add_stmt_cost.  */
@@ -1363,13 +1364,14 @@ extern void dump_stmt_cost (FILE *, void *, int, enum vect_cost_for_stmt,
 static inline unsigned
 add_stmt_cost (vec_info *vinfo, void *data, int count,
 	       enum vect_cost_for_stmt kind,
-	       stmt_vec_info stmt_info, int misalign,
+	       stmt_vec_info stmt_info, tree vectype, int misalign,
 	       enum vect_cost_model_location where)
 {
   unsigned cost = targetm.vectorize.add_stmt_cost (vinfo, data, count, kind,
-						   stmt_info, misalign, where);
+						   stmt_info, vectype,
+						   misalign, where);
   if (dump_file && (dump_flags & TDF_DETAILS))
-    dump_stmt_cost (dump_file, data, count, kind, stmt_info, misalign,
+    dump_stmt_cost (dump_file, data, count, kind, stmt_info, vectype, misalign,
 		    cost, where);
   return cost;
 }
@@ -1398,7 +1400,7 @@ add_stmt_costs (vec_info *vinfo, void *data, stmt_vector_for_cost *cost_vec)
   unsigned i;
   FOR_EACH_VEC_ELT (*cost_vec, i, cost)
     add_stmt_cost (vinfo, data, cost->count, cost->kind, cost->stmt_info,
-		   cost->misalign, cost->where);
+		   cost->vectype, cost->misalign, cost->where);
 }
 
 /*-----------------------------------------------------------------*/
@@ -1701,6 +1703,12 @@ extern bool supportable_narrowing_operation (enum tree_code, tree, tree,
 					     vec<tree> *);
 extern unsigned record_stmt_cost (stmt_vector_for_cost *, int,
 				  enum vect_cost_for_stmt, stmt_vec_info,
+				  tree, int, enum vect_cost_model_location);
+extern unsigned record_stmt_cost (stmt_vector_for_cost *, int,
+				  enum vect_cost_for_stmt, stmt_vec_info,
+				  int, enum vect_cost_model_location);
+extern unsigned record_stmt_cost (stmt_vector_for_cost *, int,
+				  enum vect_cost_for_stmt, tree,
 				  int, enum vect_cost_model_location);
 extern stmt_vec_info vect_finish_replace_stmt (vec_info *,
 					       stmt_vec_info, gimple *);


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [gcc(refs/users/rguenth/heads/slp-reorg)] cost SLP invariant nodes directly
@ 2020-03-23 16:01 Richard Biener
  0 siblings, 0 replies; 5+ messages in thread
From: Richard Biener @ 2020-03-23 16:01 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:cb624f05f1200868202dad124e10907db4f29089

commit cb624f05f1200868202dad124e10907db4f29089
Author: Richard Biener <rguenther@suse.de>
Date:   Mon Mar 16 15:06:56 2020 +0100

    cost SLP invariant nodes directly
    
    This costs SLP nodes for externals and invariants separately from
    the stmts using the invariants.  This properly deals with eventual
    CSE of them and is more in-line with the rest of the SLP operation.

Diff:
---
 gcc/config/i386/i386.c |   5 +-
 gcc/target.def         |   2 +-
 gcc/targhooks.c        |   5 +-
 gcc/targhooks.h        |   2 +-
 gcc/tree-vect-loop.c   |  46 +++++++++---------
 gcc/tree-vect-slp.c    |  73 ++++++++++++++++++++++++++--
 gcc/tree-vect-stmts.c  | 128 ++++++++++++-------------------------------------
 gcc/tree-vectorizer.c  |   2 +-
 gcc/tree-vectorizer.h  |  18 +++++--
 9 files changed, 145 insertions(+), 136 deletions(-)

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 48b957bc0ba..422c92aaf6a 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -21876,15 +21876,14 @@ ix86_init_cost (class loop *)
 static unsigned
 ix86_add_stmt_cost (class vec_info *vinfo, void *data, int count,
 		    enum vect_cost_for_stmt kind,
-		    class _stmt_vec_info *stmt_info, int misalign,
+		    class _stmt_vec_info *stmt_info, tree vectype,
+		    int misalign,
 		    enum vect_cost_model_location where)
 {
   unsigned *cost = (unsigned *) data;
   unsigned retval = 0;
   bool scalar_p
     = (kind == scalar_stmt || kind == scalar_load || kind == scalar_store);
-
-  tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
   int stmt_cost = - 1;
 
   bool fp = false;
diff --git a/gcc/target.def b/gcc/target.def
index f8d26e63021..d907b59e484 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -2031,7 +2031,7 @@ DEFHOOK
  "revised.",
  unsigned,
  (class vec_info *, void *data, int count, enum vect_cost_for_stmt kind,
-  class _stmt_vec_info *stmt_info, int misalign,
+  class _stmt_vec_info *stmt_info, tree vectype, int misalign,
   enum vect_cost_model_location where),
  default_add_stmt_cost)
 
diff --git a/gcc/targhooks.c b/gcc/targhooks.c
index 4caab8cfbfa..3f57c10b1b0 100644
--- a/gcc/targhooks.c
+++ b/gcc/targhooks.c
@@ -1350,13 +1350,12 @@ default_init_cost (class loop *loop_info ATTRIBUTE_UNUSED)
 unsigned
 default_add_stmt_cost (class vec_info *vinfo, void *data, int count,
 		       enum vect_cost_for_stmt kind,
-		       class _stmt_vec_info *stmt_info, int misalign,
+		       class _stmt_vec_info *stmt_info, tree vectype,
+		       int misalign,
 		       enum vect_cost_model_location where)
 {
   unsigned *cost = (unsigned *) data;
   unsigned retval = 0;
-
-  tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
   int stmt_cost = targetm.vectorize.builtin_vectorization_cost (kind, vectype,
 								misalign);
    /* Statements in an inner loop relative to the loop being
diff --git a/gcc/targhooks.h b/gcc/targhooks.h
index 72f3064e8f8..af7cb536640 100644
--- a/gcc/targhooks.h
+++ b/gcc/targhooks.h
@@ -119,7 +119,7 @@ extern opt_machine_mode default_get_mask_mode (machine_mode);
 extern bool default_empty_mask_is_expensive (unsigned);
 extern void *default_init_cost (class loop *);
 extern unsigned default_add_stmt_cost (void *, int, enum vect_cost_for_stmt,
-				       class _stmt_vec_info *, int,
+				       class _stmt_vec_info *, tree, int,
 				       enum vect_cost_model_location);
 extern void default_finish_cost (void *, unsigned *, unsigned *, unsigned *);
 extern void default_destroy_cost_data (void *);
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index d1ad1990bb1..d5d9cb6fb56 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -1124,8 +1124,8 @@ vect_compute_single_scalar_iteration_cost (loop_vec_info loop_vinfo)
   FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
 		    j, si)
     (void) add_stmt_cost (loop_vinfo, target_cost_data, si->count,
-			  si->kind, si->stmt_info, si->misalign,
-			  vect_body);
+			  si->kind, si->stmt_info, si->vectype,
+			  si->misalign, vect_body);
   unsigned dummy, body_cost = 0;
   finish_cost (target_cost_data, &dummy, &body_cost, &dummy);
   destroy_cost_data (target_cost_data);
@@ -3295,9 +3295,9 @@ vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue,
       /* If peeled iterations are known but number of scalar loop
          iterations are unknown, count a taken branch per peeled loop.  */
       retval = record_stmt_cost (prologue_cost_vec, 1, cond_branch_taken,
-				 NULL, 0, vect_prologue);
+				 NULL, NULL_TREE, 0, vect_prologue);
       retval += record_stmt_cost (epilogue_cost_vec, 1, cond_branch_taken,
-				  NULL, 0, vect_epilogue);
+				  NULL, NULL_TREE, 0, vect_epilogue);
     }
   else
     {
@@ -3378,7 +3378,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
       /*  FIXME: Make cost depend on complexity of individual check.  */
       unsigned len = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).length ();
       (void) add_stmt_cost (loop_vinfo, target_cost_data, len, vector_stmt,
-			    NULL, 0, vect_prologue);
+			    NULL, NULL_TREE, 0, vect_prologue);
       if (dump_enabled_p ())
 	dump_printf (MSG_NOTE,
 		     "cost model: Adding cost of checks for loop "
@@ -3391,12 +3391,12 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
       /*  FIXME: Make cost depend on complexity of individual check.  */
       unsigned len = LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo).length ();
       (void) add_stmt_cost (loop_vinfo, target_cost_data, len, vector_stmt,
-			    NULL, 0, vect_prologue);
+			    NULL, NULL_TREE, 0, vect_prologue);
       len = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).length ();
       if (len)
 	/* Count LEN - 1 ANDs and LEN comparisons.  */
 	(void) add_stmt_cost (loop_vinfo, target_cost_data, len * 2 - 1,
-			      scalar_stmt, NULL, 0, vect_prologue);
+			      scalar_stmt, NULL, NULL_TREE, 0, vect_prologue);
       len = LOOP_VINFO_LOWER_BOUNDS (loop_vinfo).length ();
       if (len)
 	{
@@ -3407,7 +3407,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
 	    if (!LOOP_VINFO_LOWER_BOUNDS (loop_vinfo)[i].unsigned_p)
 	      nstmts += 1;
 	  (void) add_stmt_cost (loop_vinfo, target_cost_data, nstmts,
-				scalar_stmt, NULL, 0, vect_prologue);
+				scalar_stmt, NULL, NULL_TREE, 0, vect_prologue);
 	}
       if (dump_enabled_p ())
 	dump_printf (MSG_NOTE,
@@ -3420,7 +3420,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
     {
       /*  FIXME: Make cost depend on complexity of individual check.  */
       (void) add_stmt_cost (loop_vinfo, target_cost_data, 1, vector_stmt,
-			    NULL, 0, vect_prologue);
+			    NULL, NULL_TREE, 0, vect_prologue);
       if (dump_enabled_p ())
 	dump_printf (MSG_NOTE,
 		     "cost model: Adding cost of checks for loop "
@@ -3429,7 +3429,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
 
   if (LOOP_REQUIRES_VERSIONING (loop_vinfo))
     (void) add_stmt_cost (loop_vinfo, target_cost_data, 1, cond_branch_taken,
-			  NULL, 0, vect_prologue);
+			  NULL, NULL_TREE, 0, vect_prologue);
 
   /* Count statements in scalar loop.  Using this as scalar cost for a single
      iteration for now.
@@ -3465,8 +3465,8 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
 	  FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
 			    j, si)
 	    (void) add_stmt_cost (loop_vinfo, target_cost_data, si->count,
-				  si->kind, si->stmt_info, si->misalign,
-				  vect_epilogue);
+				  si->kind, si->stmt_info, si->vectype,
+				  si->misalign, vect_epilogue);
 	}
 
       /* Calculate how many masks we need to generate.  */
@@ -3492,10 +3492,10 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
 	 probably better not to vectorize.  */
       (void) add_stmt_cost (loop_vinfo,
 			    target_cost_data, num_masks, vector_stmt,
-			    NULL, 0, vect_prologue);
+			    NULL, NULL_TREE, 0, vect_prologue);
       (void) add_stmt_cost (loop_vinfo,
 			    target_cost_data, num_masks - 1, vector_stmt,
-			    NULL, 0, vect_body);
+			    NULL, NULL_TREE, 0, vect_body);
     }
   else if (npeel < 0)
     {
@@ -3517,26 +3517,28 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
          vector iterations are not known since peeled prologue iterations are
          not known. Hence guards remain the same.  */
       (void) add_stmt_cost (loop_vinfo, target_cost_data, 1, cond_branch_taken,
-			    NULL, 0, vect_prologue);
+			    NULL, NULL_TREE, 0, vect_prologue);
       (void) add_stmt_cost (loop_vinfo,
 			    target_cost_data, 1, cond_branch_not_taken,
-			    NULL, 0, vect_prologue);
+			    NULL, NULL_TREE, 0, vect_prologue);
       (void) add_stmt_cost (loop_vinfo, target_cost_data, 1, cond_branch_taken,
-			    NULL, 0, vect_epilogue);
+			    NULL, NULL_TREE, 0, vect_epilogue);
       (void) add_stmt_cost (loop_vinfo,
 			    target_cost_data, 1, cond_branch_not_taken,
-			    NULL, 0, vect_epilogue);
+			    NULL, NULL_TREE, 0, vect_epilogue);
       stmt_info_for_cost *si;
       int j;
       FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo), j, si)
 	{
 	  (void) add_stmt_cost (loop_vinfo, target_cost_data,
 				si->count * peel_iters_prologue,
-				si->kind, si->stmt_info, si->misalign,
+				si->kind, si->stmt_info, si->vectype,
+				si->misalign,
 				vect_prologue);
 	  (void) add_stmt_cost (loop_vinfo, target_cost_data,
 				si->count * peel_iters_epilogue,
-				si->kind, si->stmt_info, si->misalign,
+				si->kind, si->stmt_info, si->vectype,
+				si->misalign,
 				vect_epilogue);
 	}
     }
@@ -3561,12 +3563,12 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
       FOR_EACH_VEC_ELT (prologue_cost_vec, j, si)
 	(void) add_stmt_cost (loop_vinfo,
 			      data, si->count, si->kind, si->stmt_info,
-			      si->misalign, vect_prologue);
+			      si->vectype, si->misalign, vect_prologue);
 
       FOR_EACH_VEC_ELT (epilogue_cost_vec, j, si)
 	(void) add_stmt_cost (loop_vinfo,
 			      data, si->count, si->kind, si->stmt_info,
-			      si->misalign, vect_epilogue);
+			      si->vectype, si->misalign, vect_epilogue);
 
       prologue_cost_vec.release ();
       epilogue_cost_vec.release ();
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index 00e43e0d0b5..a1f08814a62 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -2796,6 +2796,69 @@ vect_slp_convert_to_external (vec_info *vinfo, slp_tree node,
   return true;
 }
 
+/* Compute the prologue cost for invariant or constant operands represented
+   by NODE.  */
+
+static void
+vect_prologue_cost_for_slp (vec_info *vinfo,
+			    slp_tree node,
+			    stmt_vector_for_cost *cost_vec)
+{
+  tree op = SLP_TREE_SCALAR_OPS (node)[0];
+
+  /* Without looking at the actual initializer a vector of
+     constants can be implemented as load from the constant pool.
+     When all elements are the same we can use a splat.  */
+  tree vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), node);
+  unsigned group_size = SLP_TREE_SCALAR_OPS (node).length ();
+  unsigned num_vects_to_check;
+  unsigned HOST_WIDE_INT const_nunits;
+  unsigned nelt_limit;
+  if (TYPE_VECTOR_SUBPARTS (vectype).is_constant (&const_nunits)
+      && ! multiple_p (const_nunits, group_size))
+    {
+      num_vects_to_check = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
+      nelt_limit = const_nunits;
+    }
+  else
+    {
+      /* If either the vector has variable length or the vectors
+	 are composed of repeated whole groups we only need to
+	 cost construction once.  All vectors will be the same.  */
+      num_vects_to_check = 1;
+      nelt_limit = group_size;
+    }
+  tree elt = NULL_TREE;
+  unsigned nelt = 0;
+  for (unsigned j = 0; j < num_vects_to_check * nelt_limit; ++j)
+    {
+      unsigned si = j % group_size;
+      if (nelt == 0)
+	elt = SLP_TREE_SCALAR_OPS (node)[si];
+      /* ???  We're just tracking whether all operands of a single
+	 vector initializer are the same, ideally we'd check if
+	 we emitted the same one already.  */
+      /* ???  Instead cost invariants/externals via
+	 vect_slp_analyze_node_operations.  */
+      else if (elt != SLP_TREE_SCALAR_OPS (node)[si])
+	elt = NULL_TREE;
+      nelt++;
+      if (nelt == nelt_limit)
+	{
+	  /* ???  We need to pass down stmt_info for a vector type
+	     even if it points to the wrong stmt.  But here we don't
+	     have one so we should amend things to record a vector
+	     type directly.  */
+	  record_stmt_cost (cost_vec, 1,
+			    SLP_TREE_DEF_TYPE (node)
+			    ? (elt ? scalar_to_vec : vec_construct)
+			    : vector_load,
+			    vectype, 0, vect_prologue);
+	  nelt = 0;
+	}
+    }
+}
+
 /* Analyze statements contained in SLP tree NODE after recursively analyzing
    the subtree.  NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
 
@@ -2811,9 +2874,6 @@ vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
   int i, j;
   slp_tree child;
 
-  if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
-    return true;
-
   /* If we already analyzed the exact same set of scalar stmts we're done.
      We share the generated vector stmts for those.
      The SLP graph is acyclic so not caching whether we failed or succeeded
@@ -2823,6 +2883,13 @@ vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
       || lvisited.add (node))
     return true;
 
+  if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
+    {
+      /* Assume we can code-generate all invariants, but cost them.  */
+      vect_prologue_cost_for_slp (vinfo, node, cost_vec);
+      return true;
+    }
+
   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
     if (!vect_slp_analyze_node_operations (vinfo, child, node_instance,
 					   visited, lvisited, cost_vec))
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 438835c7e04..1961cdcc5f8 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -92,7 +92,8 @@ stmt_in_inner_loop_p (vec_info *vinfo, class _stmt_vec_info *stmt_info)
 unsigned
 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
 		  enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
-		  int misalign, enum vect_cost_model_location where)
+		  tree vectype, int misalign,
+		  enum vect_cost_model_location where)
 {
   if ((kind == vector_load || kind == unaligned_load)
       && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
@@ -101,14 +102,37 @@ record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
       && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
     kind = vector_scatter_store;
 
-  stmt_info_for_cost si = { count, kind, where, stmt_info, misalign };
+  stmt_info_for_cost si = { count, kind, where, stmt_info, vectype, misalign };
   body_cost_vec->safe_push (si);
 
-  tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
   return (unsigned)
       (builtin_vectorization_cost (kind, vectype, misalign) * count);
 }
 
+/* Overload with implicit vectype through STMT_INFO.  */
+
+unsigned
+record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
+		  enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
+		  int misalign, enum vect_cost_model_location where)
+{
+  return record_stmt_cost (body_cost_vec, count, kind,
+			   stmt_info, STMT_VINFO_VECTYPE (stmt_info),
+			   misalign, where);
+}
+
+/* Overload with implicit NULL stmt_vec_info but explicit vector type.  */
+
+unsigned
+record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
+		  enum vect_cost_for_stmt kind, tree vectype,
+		  int misalign, enum vect_cost_model_location where)
+{
+  return record_stmt_cost (body_cost_vec, count, kind, NULL, vectype,
+			   misalign, where);
+}
+
+
 /* Return a variable of type ELEM_TYPE[NELEMS].  */
 
 static tree
@@ -786,71 +810,6 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
   return opt_result::success ();
 }
 
-/* Compute the prologue cost for invariant or constant operands.  */
-
-static unsigned
-vect_prologue_cost_for_slp_op (vec_info *vinfo,
-			       slp_tree node, stmt_vec_info stmt_info,
-			       unsigned opno, enum vect_def_type dt,
-			       stmt_vector_for_cost *cost_vec)
-{
-  gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt;
-  tree op = gimple_op (stmt, opno);
-  unsigned prologue_cost = 0;
-
-  /* Without looking at the actual initializer a vector of
-     constants can be implemented as load from the constant pool.
-     When all elements are the same we can use a splat.  */
-  tree vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), node);
-  unsigned group_size = SLP_TREE_SCALAR_STMTS (node).length ();
-  unsigned num_vects_to_check;
-  unsigned HOST_WIDE_INT const_nunits;
-  unsigned nelt_limit;
-  if (TYPE_VECTOR_SUBPARTS (vectype).is_constant (&const_nunits)
-      && ! multiple_p (const_nunits, group_size))
-    {
-      num_vects_to_check = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
-      nelt_limit = const_nunits;
-    }
-  else
-    {
-      /* If either the vector has variable length or the vectors
-	 are composed of repeated whole groups we only need to
-	 cost construction once.  All vectors will be the same.  */
-      num_vects_to_check = 1;
-      nelt_limit = group_size;
-    }
-  tree elt = NULL_TREE;
-  unsigned nelt = 0;
-  for (unsigned j = 0; j < num_vects_to_check * nelt_limit; ++j)
-    {
-      unsigned si = j % group_size;
-      if (nelt == 0)
-	elt = gimple_op (SLP_TREE_SCALAR_STMTS (node)[si]->stmt, opno);
-      /* ???  We're just tracking whether all operands of a single
-	 vector initializer are the same, ideally we'd check if
-	 we emitted the same one already.  */
-      else if (elt != gimple_op (SLP_TREE_SCALAR_STMTS (node)[si]->stmt,
-				 opno))
-	elt = NULL_TREE;
-      nelt++;
-      if (nelt == nelt_limit)
-	{
-	  /* ???  We need to pass down stmt_info for a vector type
-	     even if it points to the wrong stmt.  */
-	  prologue_cost += record_stmt_cost
-	      (cost_vec, 1,
-	       dt == vect_external_def
-	       ? (elt ? scalar_to_vec : vec_construct)
-	       : vector_load,
-	       stmt_info, 0, vect_prologue);
-	  nelt = 0;
-	}
-    }
-
-  return prologue_cost;
-}
-
 /* Function vect_model_simple_cost.
 
    Models cost for simple operations, i.e. those that only emit ncopies of a
@@ -858,7 +817,7 @@ vect_prologue_cost_for_slp_op (vec_info *vinfo,
    be generated for the single vector op.  We will handle that shortly.  */
 
 static void
-vect_model_simple_cost (vec_info *vinfo,
+vect_model_simple_cost (vec_info *,
 			stmt_vec_info stmt_info, int ncopies,
 			enum vect_def_type *dt,
 			int ndts,
@@ -874,27 +833,7 @@ vect_model_simple_cost (vec_info *vinfo,
   if (node)
     ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
 
-  if (node)
-    {
-      /* Scan operands and account for prologue cost of constants/externals.
-	 ???  This over-estimates cost for multiple uses and should be
-	 re-engineered.  */
-      gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt;
-      tree lhs = gimple_get_lhs (stmt);
-      for (unsigned i = 0; i < gimple_num_ops (stmt); ++i)
-	{
-	  tree op = gimple_op (stmt, i);
-	  enum vect_def_type dt;
-	  if (!op || op == lhs)
-	    continue;
-	  if (vect_is_simple_use (op, vinfo, &dt)
-	      && (dt == vect_constant_def || dt == vect_external_def))
-	    prologue_cost += vect_prologue_cost_for_slp_op (vinfo, node,
-							    stmt_info,
-							    i, dt, cost_vec);
-	}
-    }
-  else
+  if (!node)
     /* Cost the "broadcast" of a scalar operand in to a vector operand.
        Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
        cost model.  */
@@ -995,7 +934,6 @@ cfun_returns (tree decl)
 
 static void
 vect_model_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
-		       enum vect_def_type dt,
 		       vect_memory_access_type memory_access_type,
 		       vec_load_store_type vls_type, slp_tree slp_node,
 		       stmt_vector_for_cost *cost_vec)
@@ -1010,11 +948,7 @@ vect_model_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
 
   if (vls_type == VLS_STORE_INVARIANT)
     {
-      if (slp_node)
-	prologue_cost += vect_prologue_cost_for_slp_op (vinfo, slp_node,
-							stmt_info,
-							1, dt, cost_vec);
-      else
+      if (!slp_node)
 	prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
 					   stmt_info, 0, vect_prologue);
     }
@@ -7524,7 +7458,7 @@ vectorizable_store (vec_info *vinfo,
 				  memory_access_type, &gs_info, mask);
 
       STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
-      vect_model_store_cost (vinfo, stmt_info, ncopies, rhs_dt,
+      vect_model_store_cost (vinfo, stmt_info, ncopies,
 			     memory_access_type, vls_type, slp_node, cost_vec);
       return true;
     }
diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c
index 41ff6791966..584f13095ab 100644
--- a/gcc/tree-vectorizer.c
+++ b/gcc/tree-vectorizer.c
@@ -99,7 +99,7 @@ auto_purge_vect_location::~auto_purge_vect_location ()
 
 void
 dump_stmt_cost (FILE *f, void *data, int count, enum vect_cost_for_stmt kind,
-		stmt_vec_info stmt_info, int misalign, unsigned cost,
+		stmt_vec_info stmt_info, tree, int misalign, unsigned cost,
 		enum vect_cost_model_location where)
 {
   fprintf (f, "%p ", data);
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 20784251b6f..5d1371d4efd 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -99,6 +99,7 @@ struct stmt_info_for_cost {
   enum vect_cost_for_stmt kind;
   enum vect_cost_model_location where;
   stmt_vec_info stmt_info;
+  tree vectype;
   int misalign;
 };
 
@@ -1355,7 +1356,7 @@ init_cost (class loop *loop_info)
 }
 
 extern void dump_stmt_cost (FILE *, void *, int, enum vect_cost_for_stmt,
-			    stmt_vec_info, int, unsigned,
+			    stmt_vec_info, tree, int, unsigned,
 			    enum vect_cost_model_location);
 
 /* Alias targetm.vectorize.add_stmt_cost.  */
@@ -1363,13 +1364,14 @@ extern void dump_stmt_cost (FILE *, void *, int, enum vect_cost_for_stmt,
 static inline unsigned
 add_stmt_cost (vec_info *vinfo, void *data, int count,
 	       enum vect_cost_for_stmt kind,
-	       stmt_vec_info stmt_info, int misalign,
+	       stmt_vec_info stmt_info, tree vectype, int misalign,
 	       enum vect_cost_model_location where)
 {
   unsigned cost = targetm.vectorize.add_stmt_cost (vinfo, data, count, kind,
-						   stmt_info, misalign, where);
+						   stmt_info, vectype,
+						   misalign, where);
   if (dump_file && (dump_flags & TDF_DETAILS))
-    dump_stmt_cost (dump_file, data, count, kind, stmt_info, misalign,
+    dump_stmt_cost (dump_file, data, count, kind, stmt_info, vectype, misalign,
 		    cost, where);
   return cost;
 }
@@ -1398,7 +1400,7 @@ add_stmt_costs (vec_info *vinfo, void *data, stmt_vector_for_cost *cost_vec)
   unsigned i;
   FOR_EACH_VEC_ELT (*cost_vec, i, cost)
     add_stmt_cost (vinfo, data, cost->count, cost->kind, cost->stmt_info,
-		   cost->misalign, cost->where);
+		   cost->vectype, cost->misalign, cost->where);
 }
 
 /*-----------------------------------------------------------------*/
@@ -1701,6 +1703,12 @@ extern bool supportable_narrowing_operation (enum tree_code, tree, tree,
 					     vec<tree> *);
 extern unsigned record_stmt_cost (stmt_vector_for_cost *, int,
 				  enum vect_cost_for_stmt, stmt_vec_info,
+				  tree, int, enum vect_cost_model_location);
+extern unsigned record_stmt_cost (stmt_vector_for_cost *, int,
+				  enum vect_cost_for_stmt, stmt_vec_info,
+				  int, enum vect_cost_model_location);
+extern unsigned record_stmt_cost (stmt_vector_for_cost *, int,
+				  enum vect_cost_for_stmt, tree,
 				  int, enum vect_cost_model_location);
 extern stmt_vec_info vect_finish_replace_stmt (vec_info *,
 					       stmt_vec_info, gimple *);


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [gcc(refs/users/rguenth/heads/slp-reorg)] cost SLP invariant nodes directly
@ 2020-03-20  8:22 Richard Biener
  0 siblings, 0 replies; 5+ messages in thread
From: Richard Biener @ 2020-03-20  8:22 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:2e9ce3ec018a345b77a6e458a8b8aecd4dd4fc49

commit 2e9ce3ec018a345b77a6e458a8b8aecd4dd4fc49
Author: Richard Biener <rguenther@suse.de>
Date:   Mon Mar 16 15:06:56 2020 +0100

    cost SLP invariant nodes directly
    
    This costs SLP nodes for externals and invariants separately from
    the stmts using the invariants.  This properly deals with eventual
    CSE of them and is more in-line with the rest of the SLP operation.

Diff:
---
 gcc/config/i386/i386.c |   5 +-
 gcc/target.def         |   2 +-
 gcc/targhooks.c        |   5 +-
 gcc/targhooks.h        |   2 +-
 gcc/tree-vect-loop.c   |  46 +++++++++---------
 gcc/tree-vect-slp.c    |  73 ++++++++++++++++++++++++++--
 gcc/tree-vect-stmts.c  | 128 ++++++++++++-------------------------------------
 gcc/tree-vectorizer.c  |   2 +-
 gcc/tree-vectorizer.h  |  18 +++++--
 9 files changed, 145 insertions(+), 136 deletions(-)

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 48b957bc0ba..422c92aaf6a 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -21876,15 +21876,14 @@ ix86_init_cost (class loop *)
 static unsigned
 ix86_add_stmt_cost (class vec_info *vinfo, void *data, int count,
 		    enum vect_cost_for_stmt kind,
-		    class _stmt_vec_info *stmt_info, int misalign,
+		    class _stmt_vec_info *stmt_info, tree vectype,
+		    int misalign,
 		    enum vect_cost_model_location where)
 {
   unsigned *cost = (unsigned *) data;
   unsigned retval = 0;
   bool scalar_p
     = (kind == scalar_stmt || kind == scalar_load || kind == scalar_store);
-
-  tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
   int stmt_cost = - 1;
 
   bool fp = false;
diff --git a/gcc/target.def b/gcc/target.def
index f8d26e63021..d907b59e484 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -2031,7 +2031,7 @@ DEFHOOK
  "revised.",
  unsigned,
  (class vec_info *, void *data, int count, enum vect_cost_for_stmt kind,
-  class _stmt_vec_info *stmt_info, int misalign,
+  class _stmt_vec_info *stmt_info, tree vectype, int misalign,
   enum vect_cost_model_location where),
  default_add_stmt_cost)
 
diff --git a/gcc/targhooks.c b/gcc/targhooks.c
index 4caab8cfbfa..3f57c10b1b0 100644
--- a/gcc/targhooks.c
+++ b/gcc/targhooks.c
@@ -1350,13 +1350,12 @@ default_init_cost (class loop *loop_info ATTRIBUTE_UNUSED)
 unsigned
 default_add_stmt_cost (class vec_info *vinfo, void *data, int count,
 		       enum vect_cost_for_stmt kind,
-		       class _stmt_vec_info *stmt_info, int misalign,
+		       class _stmt_vec_info *stmt_info, tree vectype,
+		       int misalign,
 		       enum vect_cost_model_location where)
 {
   unsigned *cost = (unsigned *) data;
   unsigned retval = 0;
-
-  tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
   int stmt_cost = targetm.vectorize.builtin_vectorization_cost (kind, vectype,
 								misalign);
    /* Statements in an inner loop relative to the loop being
diff --git a/gcc/targhooks.h b/gcc/targhooks.h
index 72f3064e8f8..af7cb536640 100644
--- a/gcc/targhooks.h
+++ b/gcc/targhooks.h
@@ -119,7 +119,7 @@ extern opt_machine_mode default_get_mask_mode (machine_mode);
 extern bool default_empty_mask_is_expensive (unsigned);
 extern void *default_init_cost (class loop *);
 extern unsigned default_add_stmt_cost (void *, int, enum vect_cost_for_stmt,
-				       class _stmt_vec_info *, int,
+				       class _stmt_vec_info *, tree, int,
 				       enum vect_cost_model_location);
 extern void default_finish_cost (void *, unsigned *, unsigned *, unsigned *);
 extern void default_destroy_cost_data (void *);
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index d1ad1990bb1..d5d9cb6fb56 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -1124,8 +1124,8 @@ vect_compute_single_scalar_iteration_cost (loop_vec_info loop_vinfo)
   FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
 		    j, si)
     (void) add_stmt_cost (loop_vinfo, target_cost_data, si->count,
-			  si->kind, si->stmt_info, si->misalign,
-			  vect_body);
+			  si->kind, si->stmt_info, si->vectype,
+			  si->misalign, vect_body);
   unsigned dummy, body_cost = 0;
   finish_cost (target_cost_data, &dummy, &body_cost, &dummy);
   destroy_cost_data (target_cost_data);
@@ -3295,9 +3295,9 @@ vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue,
       /* If peeled iterations are known but number of scalar loop
          iterations are unknown, count a taken branch per peeled loop.  */
       retval = record_stmt_cost (prologue_cost_vec, 1, cond_branch_taken,
-				 NULL, 0, vect_prologue);
+				 NULL, NULL_TREE, 0, vect_prologue);
       retval += record_stmt_cost (epilogue_cost_vec, 1, cond_branch_taken,
-				  NULL, 0, vect_epilogue);
+				  NULL, NULL_TREE, 0, vect_epilogue);
     }
   else
     {
@@ -3378,7 +3378,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
       /*  FIXME: Make cost depend on complexity of individual check.  */
       unsigned len = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).length ();
       (void) add_stmt_cost (loop_vinfo, target_cost_data, len, vector_stmt,
-			    NULL, 0, vect_prologue);
+			    NULL, NULL_TREE, 0, vect_prologue);
       if (dump_enabled_p ())
 	dump_printf (MSG_NOTE,
 		     "cost model: Adding cost of checks for loop "
@@ -3391,12 +3391,12 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
       /*  FIXME: Make cost depend on complexity of individual check.  */
       unsigned len = LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo).length ();
       (void) add_stmt_cost (loop_vinfo, target_cost_data, len, vector_stmt,
-			    NULL, 0, vect_prologue);
+			    NULL, NULL_TREE, 0, vect_prologue);
       len = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).length ();
       if (len)
 	/* Count LEN - 1 ANDs and LEN comparisons.  */
 	(void) add_stmt_cost (loop_vinfo, target_cost_data, len * 2 - 1,
-			      scalar_stmt, NULL, 0, vect_prologue);
+			      scalar_stmt, NULL, NULL_TREE, 0, vect_prologue);
       len = LOOP_VINFO_LOWER_BOUNDS (loop_vinfo).length ();
       if (len)
 	{
@@ -3407,7 +3407,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
 	    if (!LOOP_VINFO_LOWER_BOUNDS (loop_vinfo)[i].unsigned_p)
 	      nstmts += 1;
 	  (void) add_stmt_cost (loop_vinfo, target_cost_data, nstmts,
-				scalar_stmt, NULL, 0, vect_prologue);
+				scalar_stmt, NULL, NULL_TREE, 0, vect_prologue);
 	}
       if (dump_enabled_p ())
 	dump_printf (MSG_NOTE,
@@ -3420,7 +3420,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
     {
       /*  FIXME: Make cost depend on complexity of individual check.  */
       (void) add_stmt_cost (loop_vinfo, target_cost_data, 1, vector_stmt,
-			    NULL, 0, vect_prologue);
+			    NULL, NULL_TREE, 0, vect_prologue);
       if (dump_enabled_p ())
 	dump_printf (MSG_NOTE,
 		     "cost model: Adding cost of checks for loop "
@@ -3429,7 +3429,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
 
   if (LOOP_REQUIRES_VERSIONING (loop_vinfo))
     (void) add_stmt_cost (loop_vinfo, target_cost_data, 1, cond_branch_taken,
-			  NULL, 0, vect_prologue);
+			  NULL, NULL_TREE, 0, vect_prologue);
 
   /* Count statements in scalar loop.  Using this as scalar cost for a single
      iteration for now.
@@ -3465,8 +3465,8 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
 	  FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
 			    j, si)
 	    (void) add_stmt_cost (loop_vinfo, target_cost_data, si->count,
-				  si->kind, si->stmt_info, si->misalign,
-				  vect_epilogue);
+				  si->kind, si->stmt_info, si->vectype,
+				  si->misalign, vect_epilogue);
 	}
 
       /* Calculate how many masks we need to generate.  */
@@ -3492,10 +3492,10 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
 	 probably better not to vectorize.  */
       (void) add_stmt_cost (loop_vinfo,
 			    target_cost_data, num_masks, vector_stmt,
-			    NULL, 0, vect_prologue);
+			    NULL, NULL_TREE, 0, vect_prologue);
       (void) add_stmt_cost (loop_vinfo,
 			    target_cost_data, num_masks - 1, vector_stmt,
-			    NULL, 0, vect_body);
+			    NULL, NULL_TREE, 0, vect_body);
     }
   else if (npeel < 0)
     {
@@ -3517,26 +3517,28 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
          vector iterations are not known since peeled prologue iterations are
          not known. Hence guards remain the same.  */
       (void) add_stmt_cost (loop_vinfo, target_cost_data, 1, cond_branch_taken,
-			    NULL, 0, vect_prologue);
+			    NULL, NULL_TREE, 0, vect_prologue);
       (void) add_stmt_cost (loop_vinfo,
 			    target_cost_data, 1, cond_branch_not_taken,
-			    NULL, 0, vect_prologue);
+			    NULL, NULL_TREE, 0, vect_prologue);
       (void) add_stmt_cost (loop_vinfo, target_cost_data, 1, cond_branch_taken,
-			    NULL, 0, vect_epilogue);
+			    NULL, NULL_TREE, 0, vect_epilogue);
       (void) add_stmt_cost (loop_vinfo,
 			    target_cost_data, 1, cond_branch_not_taken,
-			    NULL, 0, vect_epilogue);
+			    NULL, NULL_TREE, 0, vect_epilogue);
       stmt_info_for_cost *si;
       int j;
       FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo), j, si)
 	{
 	  (void) add_stmt_cost (loop_vinfo, target_cost_data,
 				si->count * peel_iters_prologue,
-				si->kind, si->stmt_info, si->misalign,
+				si->kind, si->stmt_info, si->vectype,
+				si->misalign,
 				vect_prologue);
 	  (void) add_stmt_cost (loop_vinfo, target_cost_data,
 				si->count * peel_iters_epilogue,
-				si->kind, si->stmt_info, si->misalign,
+				si->kind, si->stmt_info, si->vectype,
+				si->misalign,
 				vect_epilogue);
 	}
     }
@@ -3561,12 +3563,12 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
       FOR_EACH_VEC_ELT (prologue_cost_vec, j, si)
 	(void) add_stmt_cost (loop_vinfo,
 			      data, si->count, si->kind, si->stmt_info,
-			      si->misalign, vect_prologue);
+			      si->vectype, si->misalign, vect_prologue);
 
       FOR_EACH_VEC_ELT (epilogue_cost_vec, j, si)
 	(void) add_stmt_cost (loop_vinfo,
 			      data, si->count, si->kind, si->stmt_info,
-			      si->misalign, vect_epilogue);
+			      si->vectype, si->misalign, vect_epilogue);
 
       prologue_cost_vec.release ();
       epilogue_cost_vec.release ();
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index 41510d2218e..28a056ebe9d 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -2832,6 +2832,69 @@ vect_slp_convert_to_external (vec_info *vinfo, slp_tree node,
   return true;
 }
 
+/* Compute the prologue cost for invariant or constant operands represented
+   by NODE.  */
+
+static void
+vect_prologue_cost_for_slp (vec_info *vinfo,
+			    slp_tree node,
+			    stmt_vector_for_cost *cost_vec)
+{
+  tree op = SLP_TREE_SCALAR_OPS (node)[0];
+
+  /* Without looking at the actual initializer a vector of
+     constants can be implemented as load from the constant pool.
+     When all elements are the same we can use a splat.  */
+  tree vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), node);
+  unsigned group_size = SLP_TREE_SCALAR_OPS (node).length ();
+  unsigned num_vects_to_check;
+  unsigned HOST_WIDE_INT const_nunits;
+  unsigned nelt_limit;
+  if (TYPE_VECTOR_SUBPARTS (vectype).is_constant (&const_nunits)
+      && ! multiple_p (const_nunits, group_size))
+    {
+      num_vects_to_check = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
+      nelt_limit = const_nunits;
+    }
+  else
+    {
+      /* If either the vector has variable length or the vectors
+	 are composed of repeated whole groups we only need to
+	 cost construction once.  All vectors will be the same.  */
+      num_vects_to_check = 1;
+      nelt_limit = group_size;
+    }
+  tree elt = NULL_TREE;
+  unsigned nelt = 0;
+  for (unsigned j = 0; j < num_vects_to_check * nelt_limit; ++j)
+    {
+      unsigned si = j % group_size;
+      if (nelt == 0)
+	elt = SLP_TREE_SCALAR_OPS (node)[si];
+      /* ???  We're just tracking whether all operands of a single
+	 vector initializer are the same, ideally we'd check if
+	 we emitted the same one already.  */
+      /* ???  Instead cost invariants/externals via
+	 vect_slp_analyze_node_operations.  */
+      else if (elt != SLP_TREE_SCALAR_OPS (node)[si])
+	elt = NULL_TREE;
+      nelt++;
+      if (nelt == nelt_limit)
+	{
+	  /* ???  We need to pass down stmt_info for a vector type
+	     even if it points to the wrong stmt.  But here we don't
+	     have one so we should amend things to record a vector
+	     type directly.  */
+	  record_stmt_cost (cost_vec, 1,
+			    SLP_TREE_DEF_TYPE (node)
+			    ? (elt ? scalar_to_vec : vec_construct)
+			    : vector_load,
+			    vectype, 0, vect_prologue);
+	  nelt = 0;
+	}
+    }
+}
+
 /* Analyze statements contained in SLP tree NODE after recursively analyzing
    the subtree.  NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
 
@@ -2847,9 +2910,6 @@ vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
   int i, j;
   slp_tree child;
 
-  if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
-    return true;
-
   /* If we already analyzed the exact same set of scalar stmts we're done.
      We share the generated vector stmts for those.
      The SLP graph is acyclic so not caching whether we failed or succeeded
@@ -2859,6 +2919,13 @@ vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
       || lvisited.add (node))
     return true;
 
+  if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
+    {
+      /* Assume we can code-generate all invariants, but cost them.  */
+      vect_prologue_cost_for_slp (vinfo, node, cost_vec);
+      return true;
+    }
+
   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
     if (!vect_slp_analyze_node_operations (vinfo, child, node_instance,
 					   visited, lvisited, cost_vec))
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 438835c7e04..1961cdcc5f8 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -92,7 +92,8 @@ stmt_in_inner_loop_p (vec_info *vinfo, class _stmt_vec_info *stmt_info)
 unsigned
 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
 		  enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
-		  int misalign, enum vect_cost_model_location where)
+		  tree vectype, int misalign,
+		  enum vect_cost_model_location where)
 {
   if ((kind == vector_load || kind == unaligned_load)
       && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
@@ -101,14 +102,37 @@ record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
       && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
     kind = vector_scatter_store;
 
-  stmt_info_for_cost si = { count, kind, where, stmt_info, misalign };
+  stmt_info_for_cost si = { count, kind, where, stmt_info, vectype, misalign };
   body_cost_vec->safe_push (si);
 
-  tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
   return (unsigned)
       (builtin_vectorization_cost (kind, vectype, misalign) * count);
 }
 
+/* Overload with implicit vectype through STMT_INFO.  */
+
+unsigned
+record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
+		  enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
+		  int misalign, enum vect_cost_model_location where)
+{
+  return record_stmt_cost (body_cost_vec, count, kind,
+			   stmt_info, STMT_VINFO_VECTYPE (stmt_info),
+			   misalign, where);
+}
+
+/* Overload with implicit NULL stmt_vec_info but explicit vector type.  */
+
+unsigned
+record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
+		  enum vect_cost_for_stmt kind, tree vectype,
+		  int misalign, enum vect_cost_model_location where)
+{
+  return record_stmt_cost (body_cost_vec, count, kind, NULL, vectype,
+			   misalign, where);
+}
+
+
 /* Return a variable of type ELEM_TYPE[NELEMS].  */
 
 static tree
@@ -786,71 +810,6 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
   return opt_result::success ();
 }
 
-/* Compute the prologue cost for invariant or constant operands.  */
-
-static unsigned
-vect_prologue_cost_for_slp_op (vec_info *vinfo,
-			       slp_tree node, stmt_vec_info stmt_info,
-			       unsigned opno, enum vect_def_type dt,
-			       stmt_vector_for_cost *cost_vec)
-{
-  gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt;
-  tree op = gimple_op (stmt, opno);
-  unsigned prologue_cost = 0;
-
-  /* Without looking at the actual initializer a vector of
-     constants can be implemented as load from the constant pool.
-     When all elements are the same we can use a splat.  */
-  tree vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), node);
-  unsigned group_size = SLP_TREE_SCALAR_STMTS (node).length ();
-  unsigned num_vects_to_check;
-  unsigned HOST_WIDE_INT const_nunits;
-  unsigned nelt_limit;
-  if (TYPE_VECTOR_SUBPARTS (vectype).is_constant (&const_nunits)
-      && ! multiple_p (const_nunits, group_size))
-    {
-      num_vects_to_check = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
-      nelt_limit = const_nunits;
-    }
-  else
-    {
-      /* If either the vector has variable length or the vectors
-	 are composed of repeated whole groups we only need to
-	 cost construction once.  All vectors will be the same.  */
-      num_vects_to_check = 1;
-      nelt_limit = group_size;
-    }
-  tree elt = NULL_TREE;
-  unsigned nelt = 0;
-  for (unsigned j = 0; j < num_vects_to_check * nelt_limit; ++j)
-    {
-      unsigned si = j % group_size;
-      if (nelt == 0)
-	elt = gimple_op (SLP_TREE_SCALAR_STMTS (node)[si]->stmt, opno);
-      /* ???  We're just tracking whether all operands of a single
-	 vector initializer are the same, ideally we'd check if
-	 we emitted the same one already.  */
-      else if (elt != gimple_op (SLP_TREE_SCALAR_STMTS (node)[si]->stmt,
-				 opno))
-	elt = NULL_TREE;
-      nelt++;
-      if (nelt == nelt_limit)
-	{
-	  /* ???  We need to pass down stmt_info for a vector type
-	     even if it points to the wrong stmt.  */
-	  prologue_cost += record_stmt_cost
-	      (cost_vec, 1,
-	       dt == vect_external_def
-	       ? (elt ? scalar_to_vec : vec_construct)
-	       : vector_load,
-	       stmt_info, 0, vect_prologue);
-	  nelt = 0;
-	}
-    }
-
-  return prologue_cost;
-}
-
 /* Function vect_model_simple_cost.
 
    Models cost for simple operations, i.e. those that only emit ncopies of a
@@ -858,7 +817,7 @@ vect_prologue_cost_for_slp_op (vec_info *vinfo,
    be generated for the single vector op.  We will handle that shortly.  */
 
 static void
-vect_model_simple_cost (vec_info *vinfo,
+vect_model_simple_cost (vec_info *,
 			stmt_vec_info stmt_info, int ncopies,
 			enum vect_def_type *dt,
 			int ndts,
@@ -874,27 +833,7 @@ vect_model_simple_cost (vec_info *vinfo,
   if (node)
     ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
 
-  if (node)
-    {
-      /* Scan operands and account for prologue cost of constants/externals.
-	 ???  This over-estimates cost for multiple uses and should be
-	 re-engineered.  */
-      gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt;
-      tree lhs = gimple_get_lhs (stmt);
-      for (unsigned i = 0; i < gimple_num_ops (stmt); ++i)
-	{
-	  tree op = gimple_op (stmt, i);
-	  enum vect_def_type dt;
-	  if (!op || op == lhs)
-	    continue;
-	  if (vect_is_simple_use (op, vinfo, &dt)
-	      && (dt == vect_constant_def || dt == vect_external_def))
-	    prologue_cost += vect_prologue_cost_for_slp_op (vinfo, node,
-							    stmt_info,
-							    i, dt, cost_vec);
-	}
-    }
-  else
+  if (!node)
     /* Cost the "broadcast" of a scalar operand in to a vector operand.
        Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
        cost model.  */
@@ -995,7 +934,6 @@ cfun_returns (tree decl)
 
 static void
 vect_model_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
-		       enum vect_def_type dt,
 		       vect_memory_access_type memory_access_type,
 		       vec_load_store_type vls_type, slp_tree slp_node,
 		       stmt_vector_for_cost *cost_vec)
@@ -1010,11 +948,7 @@ vect_model_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
 
   if (vls_type == VLS_STORE_INVARIANT)
     {
-      if (slp_node)
-	prologue_cost += vect_prologue_cost_for_slp_op (vinfo, slp_node,
-							stmt_info,
-							1, dt, cost_vec);
-      else
+      if (!slp_node)
 	prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
 					   stmt_info, 0, vect_prologue);
     }
@@ -7524,7 +7458,7 @@ vectorizable_store (vec_info *vinfo,
 				  memory_access_type, &gs_info, mask);
 
       STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
-      vect_model_store_cost (vinfo, stmt_info, ncopies, rhs_dt,
+      vect_model_store_cost (vinfo, stmt_info, ncopies,
 			     memory_access_type, vls_type, slp_node, cost_vec);
       return true;
     }
diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c
index 41ff6791966..584f13095ab 100644
--- a/gcc/tree-vectorizer.c
+++ b/gcc/tree-vectorizer.c
@@ -99,7 +99,7 @@ auto_purge_vect_location::~auto_purge_vect_location ()
 
 void
 dump_stmt_cost (FILE *f, void *data, int count, enum vect_cost_for_stmt kind,
-		stmt_vec_info stmt_info, int misalign, unsigned cost,
+		stmt_vec_info stmt_info, tree, int misalign, unsigned cost,
 		enum vect_cost_model_location where)
 {
   fprintf (f, "%p ", data);
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 20784251b6f..5d1371d4efd 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -99,6 +99,7 @@ struct stmt_info_for_cost {
   enum vect_cost_for_stmt kind;
   enum vect_cost_model_location where;
   stmt_vec_info stmt_info;
+  tree vectype;
   int misalign;
 };
 
@@ -1355,7 +1356,7 @@ init_cost (class loop *loop_info)
 }
 
 extern void dump_stmt_cost (FILE *, void *, int, enum vect_cost_for_stmt,
-			    stmt_vec_info, int, unsigned,
+			    stmt_vec_info, tree, int, unsigned,
 			    enum vect_cost_model_location);
 
 /* Alias targetm.vectorize.add_stmt_cost.  */
@@ -1363,13 +1364,14 @@ extern void dump_stmt_cost (FILE *, void *, int, enum vect_cost_for_stmt,
 static inline unsigned
 add_stmt_cost (vec_info *vinfo, void *data, int count,
 	       enum vect_cost_for_stmt kind,
-	       stmt_vec_info stmt_info, int misalign,
+	       stmt_vec_info stmt_info, tree vectype, int misalign,
 	       enum vect_cost_model_location where)
 {
   unsigned cost = targetm.vectorize.add_stmt_cost (vinfo, data, count, kind,
-						   stmt_info, misalign, where);
+						   stmt_info, vectype,
+						   misalign, where);
   if (dump_file && (dump_flags & TDF_DETAILS))
-    dump_stmt_cost (dump_file, data, count, kind, stmt_info, misalign,
+    dump_stmt_cost (dump_file, data, count, kind, stmt_info, vectype, misalign,
 		    cost, where);
   return cost;
 }
@@ -1398,7 +1400,7 @@ add_stmt_costs (vec_info *vinfo, void *data, stmt_vector_for_cost *cost_vec)
   unsigned i;
   FOR_EACH_VEC_ELT (*cost_vec, i, cost)
     add_stmt_cost (vinfo, data, cost->count, cost->kind, cost->stmt_info,
-		   cost->misalign, cost->where);
+		   cost->vectype, cost->misalign, cost->where);
 }
 
 /*-----------------------------------------------------------------*/
@@ -1701,6 +1703,12 @@ extern bool supportable_narrowing_operation (enum tree_code, tree, tree,
 					     vec<tree> *);
 extern unsigned record_stmt_cost (stmt_vector_for_cost *, int,
 				  enum vect_cost_for_stmt, stmt_vec_info,
+				  tree, int, enum vect_cost_model_location);
+extern unsigned record_stmt_cost (stmt_vector_for_cost *, int,
+				  enum vect_cost_for_stmt, stmt_vec_info,
+				  int, enum vect_cost_model_location);
+extern unsigned record_stmt_cost (stmt_vector_for_cost *, int,
+				  enum vect_cost_for_stmt, tree,
 				  int, enum vect_cost_model_location);
 extern stmt_vec_info vect_finish_replace_stmt (vec_info *,
 					       stmt_vec_info, gimple *);


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [gcc(refs/users/rguenth/heads/slp-reorg)] cost SLP invariant nodes directly
@ 2020-03-17  8:27 Richard Biener
  0 siblings, 0 replies; 5+ messages in thread
From: Richard Biener @ 2020-03-17  8:27 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:9114af42dc61451b93760f0a0812cdb91ab55675

commit 9114af42dc61451b93760f0a0812cdb91ab55675
Author: Richard Biener <rguenther@suse.de>
Date:   Mon Mar 16 15:06:56 2020 +0100

    cost SLP invariant nodes directly
    
    This costs SLP nodes for externals and invariants separately from
    the stmts using the invariants.  This properly deals with eventual
    CSE of them and is more in-line with the rest of the SLP operation.

Diff:
---
 gcc/config/i386/i386.c |   5 +-
 gcc/target.def         |   2 +-
 gcc/targhooks.c        |   5 +-
 gcc/targhooks.h        |   2 +-
 gcc/tree-vect-loop.c   |  46 +++++++++---------
 gcc/tree-vect-slp.c    |  73 ++++++++++++++++++++++++++--
 gcc/tree-vect-stmts.c  | 128 ++++++++++++-------------------------------------
 gcc/tree-vectorizer.c  |   2 +-
 gcc/tree-vectorizer.h  |  18 +++++--
 9 files changed, 145 insertions(+), 136 deletions(-)

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 6ec0a5111b1..20e7896c32e 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -21876,15 +21876,14 @@ ix86_init_cost (class loop *)
 static unsigned
 ix86_add_stmt_cost (class vec_info *vinfo, void *data, int count,
 		    enum vect_cost_for_stmt kind,
-		    class _stmt_vec_info *stmt_info, int misalign,
+		    class _stmt_vec_info *stmt_info, tree vectype,
+		    int misalign,
 		    enum vect_cost_model_location where)
 {
   unsigned *cost = (unsigned *) data;
   unsigned retval = 0;
   bool scalar_p
     = (kind == scalar_stmt || kind == scalar_load || kind == scalar_store);
-
-  tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
   int stmt_cost = - 1;
 
   bool fp = false;
diff --git a/gcc/target.def b/gcc/target.def
index 6d5f4b0a108..443c60ef91d 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -2031,7 +2031,7 @@ DEFHOOK
  "revised.",
  unsigned,
  (class vec_info *, void *data, int count, enum vect_cost_for_stmt kind,
-  class _stmt_vec_info *stmt_info, int misalign,
+  class _stmt_vec_info *stmt_info, tree vectype, int misalign,
   enum vect_cost_model_location where),
  default_add_stmt_cost)
 
diff --git a/gcc/targhooks.c b/gcc/targhooks.c
index 4caab8cfbfa..3f57c10b1b0 100644
--- a/gcc/targhooks.c
+++ b/gcc/targhooks.c
@@ -1350,13 +1350,12 @@ default_init_cost (class loop *loop_info ATTRIBUTE_UNUSED)
 unsigned
 default_add_stmt_cost (class vec_info *vinfo, void *data, int count,
 		       enum vect_cost_for_stmt kind,
-		       class _stmt_vec_info *stmt_info, int misalign,
+		       class _stmt_vec_info *stmt_info, tree vectype,
+		       int misalign,
 		       enum vect_cost_model_location where)
 {
   unsigned *cost = (unsigned *) data;
   unsigned retval = 0;
-
-  tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
   int stmt_cost = targetm.vectorize.builtin_vectorization_cost (kind, vectype,
 								misalign);
    /* Statements in an inner loop relative to the loop being
diff --git a/gcc/targhooks.h b/gcc/targhooks.h
index 72f3064e8f8..af7cb536640 100644
--- a/gcc/targhooks.h
+++ b/gcc/targhooks.h
@@ -119,7 +119,7 @@ extern opt_machine_mode default_get_mask_mode (machine_mode);
 extern bool default_empty_mask_is_expensive (unsigned);
 extern void *default_init_cost (class loop *);
 extern unsigned default_add_stmt_cost (void *, int, enum vect_cost_for_stmt,
-				       class _stmt_vec_info *, int,
+				       class _stmt_vec_info *, tree, int,
 				       enum vect_cost_model_location);
 extern void default_finish_cost (void *, unsigned *, unsigned *, unsigned *);
 extern void default_destroy_cost_data (void *);
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 67f1513e495..3e4f20d0e26 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -1124,8 +1124,8 @@ vect_compute_single_scalar_iteration_cost (loop_vec_info loop_vinfo)
   FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
 		    j, si)
     (void) add_stmt_cost (loop_vinfo, target_cost_data, si->count,
-			  si->kind, si->stmt_info, si->misalign,
-			  vect_body);
+			  si->kind, si->stmt_info, si->vectype,
+			  si->misalign, vect_body);
   unsigned dummy, body_cost = 0;
   finish_cost (target_cost_data, &dummy, &body_cost, &dummy);
   destroy_cost_data (target_cost_data);
@@ -3295,9 +3295,9 @@ vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue,
       /* If peeled iterations are known but number of scalar loop
          iterations are unknown, count a taken branch per peeled loop.  */
       retval = record_stmt_cost (prologue_cost_vec, 1, cond_branch_taken,
-				 NULL, 0, vect_prologue);
+				 NULL, NULL_TREE, 0, vect_prologue);
       retval += record_stmt_cost (epilogue_cost_vec, 1, cond_branch_taken,
-				  NULL, 0, vect_epilogue);
+				  NULL, NULL_TREE, 0, vect_epilogue);
     }
   else
     {
@@ -3378,7 +3378,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
       /*  FIXME: Make cost depend on complexity of individual check.  */
       unsigned len = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).length ();
       (void) add_stmt_cost (loop_vinfo, target_cost_data, len, vector_stmt,
-			    NULL, 0, vect_prologue);
+			    NULL, NULL_TREE, 0, vect_prologue);
       if (dump_enabled_p ())
 	dump_printf (MSG_NOTE,
 		     "cost model: Adding cost of checks for loop "
@@ -3391,12 +3391,12 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
       /*  FIXME: Make cost depend on complexity of individual check.  */
       unsigned len = LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo).length ();
       (void) add_stmt_cost (loop_vinfo, target_cost_data, len, vector_stmt,
-			    NULL, 0, vect_prologue);
+			    NULL, NULL_TREE, 0, vect_prologue);
       len = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).length ();
       if (len)
 	/* Count LEN - 1 ANDs and LEN comparisons.  */
 	(void) add_stmt_cost (loop_vinfo, target_cost_data, len * 2 - 1,
-			      scalar_stmt, NULL, 0, vect_prologue);
+			      scalar_stmt, NULL, NULL_TREE, 0, vect_prologue);
       len = LOOP_VINFO_LOWER_BOUNDS (loop_vinfo).length ();
       if (len)
 	{
@@ -3407,7 +3407,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
 	    if (!LOOP_VINFO_LOWER_BOUNDS (loop_vinfo)[i].unsigned_p)
 	      nstmts += 1;
 	  (void) add_stmt_cost (loop_vinfo, target_cost_data, nstmts,
-				scalar_stmt, NULL, 0, vect_prologue);
+				scalar_stmt, NULL, NULL_TREE, 0, vect_prologue);
 	}
       if (dump_enabled_p ())
 	dump_printf (MSG_NOTE,
@@ -3420,7 +3420,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
     {
       /*  FIXME: Make cost depend on complexity of individual check.  */
       (void) add_stmt_cost (loop_vinfo, target_cost_data, 1, vector_stmt,
-			    NULL, 0, vect_prologue);
+			    NULL, NULL_TREE, 0, vect_prologue);
       if (dump_enabled_p ())
 	dump_printf (MSG_NOTE,
 		     "cost model: Adding cost of checks for loop "
@@ -3429,7 +3429,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
 
   if (LOOP_REQUIRES_VERSIONING (loop_vinfo))
     (void) add_stmt_cost (loop_vinfo, target_cost_data, 1, cond_branch_taken,
-			  NULL, 0, vect_prologue);
+			  NULL, NULL_TREE, 0, vect_prologue);
 
   /* Count statements in scalar loop.  Using this as scalar cost for a single
      iteration for now.
@@ -3465,8 +3465,8 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
 	  FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
 			    j, si)
 	    (void) add_stmt_cost (loop_vinfo, target_cost_data, si->count,
-				  si->kind, si->stmt_info, si->misalign,
-				  vect_epilogue);
+				  si->kind, si->stmt_info, si->vectype,
+				  si->misalign, vect_epilogue);
 	}
 
       /* Calculate how many masks we need to generate.  */
@@ -3492,10 +3492,10 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
 	 probably better not to vectorize.  */
       (void) add_stmt_cost (loop_vinfo,
 			    target_cost_data, num_masks, vector_stmt,
-			    NULL, 0, vect_prologue);
+			    NULL, NULL_TREE, 0, vect_prologue);
       (void) add_stmt_cost (loop_vinfo,
 			    target_cost_data, num_masks - 1, vector_stmt,
-			    NULL, 0, vect_body);
+			    NULL, NULL_TREE, 0, vect_body);
     }
   else if (npeel < 0)
     {
@@ -3517,26 +3517,28 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
          vector iterations are not known since peeled prologue iterations are
          not known. Hence guards remain the same.  */
       (void) add_stmt_cost (loop_vinfo, target_cost_data, 1, cond_branch_taken,
-			    NULL, 0, vect_prologue);
+			    NULL, NULL_TREE, 0, vect_prologue);
       (void) add_stmt_cost (loop_vinfo,
 			    target_cost_data, 1, cond_branch_not_taken,
-			    NULL, 0, vect_prologue);
+			    NULL, NULL_TREE, 0, vect_prologue);
       (void) add_stmt_cost (loop_vinfo, target_cost_data, 1, cond_branch_taken,
-			    NULL, 0, vect_epilogue);
+			    NULL, NULL_TREE, 0, vect_epilogue);
       (void) add_stmt_cost (loop_vinfo,
 			    target_cost_data, 1, cond_branch_not_taken,
-			    NULL, 0, vect_epilogue);
+			    NULL, NULL_TREE, 0, vect_epilogue);
       stmt_info_for_cost *si;
       int j;
       FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo), j, si)
 	{
 	  (void) add_stmt_cost (loop_vinfo, target_cost_data,
 				si->count * peel_iters_prologue,
-				si->kind, si->stmt_info, si->misalign,
+				si->kind, si->stmt_info, si->vectype,
+				si->misalign,
 				vect_prologue);
 	  (void) add_stmt_cost (loop_vinfo, target_cost_data,
 				si->count * peel_iters_epilogue,
-				si->kind, si->stmt_info, si->misalign,
+				si->kind, si->stmt_info, si->vectype,
+				si->misalign,
 				vect_epilogue);
 	}
     }
@@ -3561,12 +3563,12 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
       FOR_EACH_VEC_ELT (prologue_cost_vec, j, si)
 	(void) add_stmt_cost (loop_vinfo,
 			      data, si->count, si->kind, si->stmt_info,
-			      si->misalign, vect_prologue);
+			      si->vectype, si->misalign, vect_prologue);
 
       FOR_EACH_VEC_ELT (epilogue_cost_vec, j, si)
 	(void) add_stmt_cost (loop_vinfo,
 			      data, si->count, si->kind, si->stmt_info,
-			      si->misalign, vect_epilogue);
+			      si->vectype, si->misalign, vect_epilogue);
 
       prologue_cost_vec.release ();
       epilogue_cost_vec.release ();
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index 41510d2218e..28a056ebe9d 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -2832,6 +2832,69 @@ vect_slp_convert_to_external (vec_info *vinfo, slp_tree node,
   return true;
 }
 
+/* Compute the prologue cost for invariant or constant operands represented
+   by NODE.  */
+
+static void
+vect_prologue_cost_for_slp (vec_info *vinfo,
+			    slp_tree node,
+			    stmt_vector_for_cost *cost_vec)
+{
+  tree op = SLP_TREE_SCALAR_OPS (node)[0];
+
+  /* Without looking at the actual initializer a vector of
+     constants can be implemented as load from the constant pool.
+     When all elements are the same we can use a splat.  */
+  tree vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), node);
+  unsigned group_size = SLP_TREE_SCALAR_OPS (node).length ();
+  unsigned num_vects_to_check;
+  unsigned HOST_WIDE_INT const_nunits;
+  unsigned nelt_limit;
+  if (TYPE_VECTOR_SUBPARTS (vectype).is_constant (&const_nunits)
+      && ! multiple_p (const_nunits, group_size))
+    {
+      num_vects_to_check = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
+      nelt_limit = const_nunits;
+    }
+  else
+    {
+      /* If either the vector has variable length or the vectors
+	 are composed of repeated whole groups we only need to
+	 cost construction once.  All vectors will be the same.  */
+      num_vects_to_check = 1;
+      nelt_limit = group_size;
+    }
+  tree elt = NULL_TREE;
+  unsigned nelt = 0;
+  for (unsigned j = 0; j < num_vects_to_check * nelt_limit; ++j)
+    {
+      unsigned si = j % group_size;
+      if (nelt == 0)
+	elt = SLP_TREE_SCALAR_OPS (node)[si];
+      /* ???  We're just tracking whether all operands of a single
+	 vector initializer are the same, ideally we'd check if
+	 we emitted the same one already.  */
+      /* ???  Instead cost invariants/externals via
+	 vect_slp_analyze_node_operations.  */
+      else if (elt != SLP_TREE_SCALAR_OPS (node)[si])
+	elt = NULL_TREE;
+      nelt++;
+      if (nelt == nelt_limit)
+	{
+	  /* ???  We need to pass down stmt_info for a vector type
+	     even if it points to the wrong stmt.  But here we don't
+	     have one so we should amend things to record a vector
+	     type directly.  */
+	  record_stmt_cost (cost_vec, 1,
+			    SLP_TREE_DEF_TYPE (node)
+			    ? (elt ? scalar_to_vec : vec_construct)
+			    : vector_load,
+			    vectype, 0, vect_prologue);
+	  nelt = 0;
+	}
+    }
+}
+
 /* Analyze statements contained in SLP tree NODE after recursively analyzing
    the subtree.  NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
 
@@ -2847,9 +2910,6 @@ vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
   int i, j;
   slp_tree child;
 
-  if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
-    return true;
-
   /* If we already analyzed the exact same set of scalar stmts we're done.
      We share the generated vector stmts for those.
      The SLP graph is acyclic so not caching whether we failed or succeeded
@@ -2859,6 +2919,13 @@ vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
       || lvisited.add (node))
     return true;
 
+  if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
+    {
+      /* Assume we can code-generate all invariants, but cost them.  */
+      vect_prologue_cost_for_slp (vinfo, node, cost_vec);
+      return true;
+    }
+
   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
     if (!vect_slp_analyze_node_operations (vinfo, child, node_instance,
 					   visited, lvisited, cost_vec))
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 438835c7e04..1961cdcc5f8 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -92,7 +92,8 @@ stmt_in_inner_loop_p (vec_info *vinfo, class _stmt_vec_info *stmt_info)
 unsigned
 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
 		  enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
-		  int misalign, enum vect_cost_model_location where)
+		  tree vectype, int misalign,
+		  enum vect_cost_model_location where)
 {
   if ((kind == vector_load || kind == unaligned_load)
       && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
@@ -101,14 +102,37 @@ record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
       && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
     kind = vector_scatter_store;
 
-  stmt_info_for_cost si = { count, kind, where, stmt_info, misalign };
+  stmt_info_for_cost si = { count, kind, where, stmt_info, vectype, misalign };
   body_cost_vec->safe_push (si);
 
-  tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
   return (unsigned)
       (builtin_vectorization_cost (kind, vectype, misalign) * count);
 }
 
+/* Overload with implicit vectype through STMT_INFO.  */
+
+unsigned
+record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
+		  enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
+		  int misalign, enum vect_cost_model_location where)
+{
+  return record_stmt_cost (body_cost_vec, count, kind,
+			   stmt_info, STMT_VINFO_VECTYPE (stmt_info),
+			   misalign, where);
+}
+
+/* Overload with implicit NULL stmt_vec_info but explicit vector type.  */
+
+unsigned
+record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
+		  enum vect_cost_for_stmt kind, tree vectype,
+		  int misalign, enum vect_cost_model_location where)
+{
+  return record_stmt_cost (body_cost_vec, count, kind, NULL, vectype,
+			   misalign, where);
+}
+
+
 /* Return a variable of type ELEM_TYPE[NELEMS].  */
 
 static tree
@@ -786,71 +810,6 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
   return opt_result::success ();
 }
 
-/* Compute the prologue cost for invariant or constant operands.  */
-
-static unsigned
-vect_prologue_cost_for_slp_op (vec_info *vinfo,
-			       slp_tree node, stmt_vec_info stmt_info,
-			       unsigned opno, enum vect_def_type dt,
-			       stmt_vector_for_cost *cost_vec)
-{
-  gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt;
-  tree op = gimple_op (stmt, opno);
-  unsigned prologue_cost = 0;
-
-  /* Without looking at the actual initializer a vector of
-     constants can be implemented as load from the constant pool.
-     When all elements are the same we can use a splat.  */
-  tree vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), node);
-  unsigned group_size = SLP_TREE_SCALAR_STMTS (node).length ();
-  unsigned num_vects_to_check;
-  unsigned HOST_WIDE_INT const_nunits;
-  unsigned nelt_limit;
-  if (TYPE_VECTOR_SUBPARTS (vectype).is_constant (&const_nunits)
-      && ! multiple_p (const_nunits, group_size))
-    {
-      num_vects_to_check = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
-      nelt_limit = const_nunits;
-    }
-  else
-    {
-      /* If either the vector has variable length or the vectors
-	 are composed of repeated whole groups we only need to
-	 cost construction once.  All vectors will be the same.  */
-      num_vects_to_check = 1;
-      nelt_limit = group_size;
-    }
-  tree elt = NULL_TREE;
-  unsigned nelt = 0;
-  for (unsigned j = 0; j < num_vects_to_check * nelt_limit; ++j)
-    {
-      unsigned si = j % group_size;
-      if (nelt == 0)
-	elt = gimple_op (SLP_TREE_SCALAR_STMTS (node)[si]->stmt, opno);
-      /* ???  We're just tracking whether all operands of a single
-	 vector initializer are the same, ideally we'd check if
-	 we emitted the same one already.  */
-      else if (elt != gimple_op (SLP_TREE_SCALAR_STMTS (node)[si]->stmt,
-				 opno))
-	elt = NULL_TREE;
-      nelt++;
-      if (nelt == nelt_limit)
-	{
-	  /* ???  We need to pass down stmt_info for a vector type
-	     even if it points to the wrong stmt.  */
-	  prologue_cost += record_stmt_cost
-	      (cost_vec, 1,
-	       dt == vect_external_def
-	       ? (elt ? scalar_to_vec : vec_construct)
-	       : vector_load,
-	       stmt_info, 0, vect_prologue);
-	  nelt = 0;
-	}
-    }
-
-  return prologue_cost;
-}
-
 /* Function vect_model_simple_cost.
 
    Models cost for simple operations, i.e. those that only emit ncopies of a
@@ -858,7 +817,7 @@ vect_prologue_cost_for_slp_op (vec_info *vinfo,
    be generated for the single vector op.  We will handle that shortly.  */
 
 static void
-vect_model_simple_cost (vec_info *vinfo,
+vect_model_simple_cost (vec_info *,
 			stmt_vec_info stmt_info, int ncopies,
 			enum vect_def_type *dt,
 			int ndts,
@@ -874,27 +833,7 @@ vect_model_simple_cost (vec_info *vinfo,
   if (node)
     ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
 
-  if (node)
-    {
-      /* Scan operands and account for prologue cost of constants/externals.
-	 ???  This over-estimates cost for multiple uses and should be
-	 re-engineered.  */
-      gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt;
-      tree lhs = gimple_get_lhs (stmt);
-      for (unsigned i = 0; i < gimple_num_ops (stmt); ++i)
-	{
-	  tree op = gimple_op (stmt, i);
-	  enum vect_def_type dt;
-	  if (!op || op == lhs)
-	    continue;
-	  if (vect_is_simple_use (op, vinfo, &dt)
-	      && (dt == vect_constant_def || dt == vect_external_def))
-	    prologue_cost += vect_prologue_cost_for_slp_op (vinfo, node,
-							    stmt_info,
-							    i, dt, cost_vec);
-	}
-    }
-  else
+  if (!node)
     /* Cost the "broadcast" of a scalar operand in to a vector operand.
        Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
        cost model.  */
@@ -995,7 +934,6 @@ cfun_returns (tree decl)
 
 static void
 vect_model_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
-		       enum vect_def_type dt,
 		       vect_memory_access_type memory_access_type,
 		       vec_load_store_type vls_type, slp_tree slp_node,
 		       stmt_vector_for_cost *cost_vec)
@@ -1010,11 +948,7 @@ vect_model_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
 
   if (vls_type == VLS_STORE_INVARIANT)
     {
-      if (slp_node)
-	prologue_cost += vect_prologue_cost_for_slp_op (vinfo, slp_node,
-							stmt_info,
-							1, dt, cost_vec);
-      else
+      if (!slp_node)
 	prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
 					   stmt_info, 0, vect_prologue);
     }
@@ -7524,7 +7458,7 @@ vectorizable_store (vec_info *vinfo,
 				  memory_access_type, &gs_info, mask);
 
       STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
-      vect_model_store_cost (vinfo, stmt_info, ncopies, rhs_dt,
+      vect_model_store_cost (vinfo, stmt_info, ncopies,
 			     memory_access_type, vls_type, slp_node, cost_vec);
       return true;
     }
diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c
index 41ff6791966..584f13095ab 100644
--- a/gcc/tree-vectorizer.c
+++ b/gcc/tree-vectorizer.c
@@ -99,7 +99,7 @@ auto_purge_vect_location::~auto_purge_vect_location ()
 
 void
 dump_stmt_cost (FILE *f, void *data, int count, enum vect_cost_for_stmt kind,
-		stmt_vec_info stmt_info, int misalign, unsigned cost,
+		stmt_vec_info stmt_info, tree, int misalign, unsigned cost,
 		enum vect_cost_model_location where)
 {
   fprintf (f, "%p ", data);
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 20784251b6f..5d1371d4efd 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -99,6 +99,7 @@ struct stmt_info_for_cost {
   enum vect_cost_for_stmt kind;
   enum vect_cost_model_location where;
   stmt_vec_info stmt_info;
+  tree vectype;
   int misalign;
 };
 
@@ -1355,7 +1356,7 @@ init_cost (class loop *loop_info)
 }
 
 extern void dump_stmt_cost (FILE *, void *, int, enum vect_cost_for_stmt,
-			    stmt_vec_info, int, unsigned,
+			    stmt_vec_info, tree, int, unsigned,
 			    enum vect_cost_model_location);
 
 /* Alias targetm.vectorize.add_stmt_cost.  */
@@ -1363,13 +1364,14 @@ extern void dump_stmt_cost (FILE *, void *, int, enum vect_cost_for_stmt,
 static inline unsigned
 add_stmt_cost (vec_info *vinfo, void *data, int count,
 	       enum vect_cost_for_stmt kind,
-	       stmt_vec_info stmt_info, int misalign,
+	       stmt_vec_info stmt_info, tree vectype, int misalign,
 	       enum vect_cost_model_location where)
 {
   unsigned cost = targetm.vectorize.add_stmt_cost (vinfo, data, count, kind,
-						   stmt_info, misalign, where);
+						   stmt_info, vectype,
+						   misalign, where);
   if (dump_file && (dump_flags & TDF_DETAILS))
-    dump_stmt_cost (dump_file, data, count, kind, stmt_info, misalign,
+    dump_stmt_cost (dump_file, data, count, kind, stmt_info, vectype, misalign,
 		    cost, where);
   return cost;
 }
@@ -1398,7 +1400,7 @@ add_stmt_costs (vec_info *vinfo, void *data, stmt_vector_for_cost *cost_vec)
   unsigned i;
   FOR_EACH_VEC_ELT (*cost_vec, i, cost)
     add_stmt_cost (vinfo, data, cost->count, cost->kind, cost->stmt_info,
-		   cost->misalign, cost->where);
+		   cost->vectype, cost->misalign, cost->where);
 }
 
 /*-----------------------------------------------------------------*/
@@ -1701,6 +1703,12 @@ extern bool supportable_narrowing_operation (enum tree_code, tree, tree,
 					     vec<tree> *);
 extern unsigned record_stmt_cost (stmt_vector_for_cost *, int,
 				  enum vect_cost_for_stmt, stmt_vec_info,
+				  tree, int, enum vect_cost_model_location);
+extern unsigned record_stmt_cost (stmt_vector_for_cost *, int,
+				  enum vect_cost_for_stmt, stmt_vec_info,
+				  int, enum vect_cost_model_location);
+extern unsigned record_stmt_cost (stmt_vector_for_cost *, int,
+				  enum vect_cost_for_stmt, tree,
 				  int, enum vect_cost_model_location);
 extern stmt_vec_info vect_finish_replace_stmt (vec_info *,
 					       stmt_vec_info, gimple *);


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [gcc(refs/users/rguenth/heads/slp-reorg)] cost SLP invariant nodes directly
@ 2020-03-16 14:59 Richard Biener
  0 siblings, 0 replies; 5+ messages in thread
From: Richard Biener @ 2020-03-16 14:59 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:7ef662823b01ad5ea028e9769a46f8e9a6618c6f

commit 7ef662823b01ad5ea028e9769a46f8e9a6618c6f
Author: Richard Biener <rguenther@suse.de>
Date:   Mon Mar 16 15:06:56 2020 +0100

    cost SLP invariant nodes directly
    
    This costs SLP nodes for externals and invariants separately from
    the stmts using the invariants.  This properly deals with eventual
    CSE of them and is more in-line with the rest of the SLP operation.

Diff:
---
 gcc/config/i386/i386.c |   5 +-
 gcc/target.def         |   2 +-
 gcc/targhooks.c        |   5 +-
 gcc/targhooks.h        |   2 +-
 gcc/tree-vect-loop.c   |  46 +++++++++---------
 gcc/tree-vect-slp.c    |  73 ++++++++++++++++++++++++++--
 gcc/tree-vect-stmts.c  | 128 ++++++++++++-------------------------------------
 gcc/tree-vectorizer.c  |   2 +-
 gcc/tree-vectorizer.h  |  18 +++++--
 9 files changed, 145 insertions(+), 136 deletions(-)

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 6ec0a5111b1..20e7896c32e 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -21876,15 +21876,14 @@ ix86_init_cost (class loop *)
 static unsigned
 ix86_add_stmt_cost (class vec_info *vinfo, void *data, int count,
 		    enum vect_cost_for_stmt kind,
-		    class _stmt_vec_info *stmt_info, int misalign,
+		    class _stmt_vec_info *stmt_info, tree vectype,
+		    int misalign,
 		    enum vect_cost_model_location where)
 {
   unsigned *cost = (unsigned *) data;
   unsigned retval = 0;
   bool scalar_p
     = (kind == scalar_stmt || kind == scalar_load || kind == scalar_store);
-
-  tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
   int stmt_cost = - 1;
 
   bool fp = false;
diff --git a/gcc/target.def b/gcc/target.def
index 6d5f4b0a108..443c60ef91d 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -2031,7 +2031,7 @@ DEFHOOK
  "revised.",
  unsigned,
  (class vec_info *, void *data, int count, enum vect_cost_for_stmt kind,
-  class _stmt_vec_info *stmt_info, int misalign,
+  class _stmt_vec_info *stmt_info, tree vectype, int misalign,
   enum vect_cost_model_location where),
  default_add_stmt_cost)
 
diff --git a/gcc/targhooks.c b/gcc/targhooks.c
index 4caab8cfbfa..3f57c10b1b0 100644
--- a/gcc/targhooks.c
+++ b/gcc/targhooks.c
@@ -1350,13 +1350,12 @@ default_init_cost (class loop *loop_info ATTRIBUTE_UNUSED)
 unsigned
 default_add_stmt_cost (class vec_info *vinfo, void *data, int count,
 		       enum vect_cost_for_stmt kind,
-		       class _stmt_vec_info *stmt_info, int misalign,
+		       class _stmt_vec_info *stmt_info, tree vectype,
+		       int misalign,
 		       enum vect_cost_model_location where)
 {
   unsigned *cost = (unsigned *) data;
   unsigned retval = 0;
-
-  tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
   int stmt_cost = targetm.vectorize.builtin_vectorization_cost (kind, vectype,
 								misalign);
    /* Statements in an inner loop relative to the loop being
diff --git a/gcc/targhooks.h b/gcc/targhooks.h
index 72f3064e8f8..af7cb536640 100644
--- a/gcc/targhooks.h
+++ b/gcc/targhooks.h
@@ -119,7 +119,7 @@ extern opt_machine_mode default_get_mask_mode (machine_mode);
 extern bool default_empty_mask_is_expensive (unsigned);
 extern void *default_init_cost (class loop *);
 extern unsigned default_add_stmt_cost (void *, int, enum vect_cost_for_stmt,
-				       class _stmt_vec_info *, int,
+				       class _stmt_vec_info *, tree, int,
 				       enum vect_cost_model_location);
 extern void default_finish_cost (void *, unsigned *, unsigned *, unsigned *);
 extern void default_destroy_cost_data (void *);
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 67f1513e495..3e4f20d0e26 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -1124,8 +1124,8 @@ vect_compute_single_scalar_iteration_cost (loop_vec_info loop_vinfo)
   FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
 		    j, si)
     (void) add_stmt_cost (loop_vinfo, target_cost_data, si->count,
-			  si->kind, si->stmt_info, si->misalign,
-			  vect_body);
+			  si->kind, si->stmt_info, si->vectype,
+			  si->misalign, vect_body);
   unsigned dummy, body_cost = 0;
   finish_cost (target_cost_data, &dummy, &body_cost, &dummy);
   destroy_cost_data (target_cost_data);
@@ -3295,9 +3295,9 @@ vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue,
       /* If peeled iterations are known but number of scalar loop
          iterations are unknown, count a taken branch per peeled loop.  */
       retval = record_stmt_cost (prologue_cost_vec, 1, cond_branch_taken,
-				 NULL, 0, vect_prologue);
+				 NULL, NULL_TREE, 0, vect_prologue);
       retval += record_stmt_cost (epilogue_cost_vec, 1, cond_branch_taken,
-				  NULL, 0, vect_epilogue);
+				  NULL, NULL_TREE, 0, vect_epilogue);
     }
   else
     {
@@ -3378,7 +3378,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
       /*  FIXME: Make cost depend on complexity of individual check.  */
       unsigned len = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).length ();
       (void) add_stmt_cost (loop_vinfo, target_cost_data, len, vector_stmt,
-			    NULL, 0, vect_prologue);
+			    NULL, NULL_TREE, 0, vect_prologue);
       if (dump_enabled_p ())
 	dump_printf (MSG_NOTE,
 		     "cost model: Adding cost of checks for loop "
@@ -3391,12 +3391,12 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
       /*  FIXME: Make cost depend on complexity of individual check.  */
       unsigned len = LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo).length ();
       (void) add_stmt_cost (loop_vinfo, target_cost_data, len, vector_stmt,
-			    NULL, 0, vect_prologue);
+			    NULL, NULL_TREE, 0, vect_prologue);
       len = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).length ();
       if (len)
 	/* Count LEN - 1 ANDs and LEN comparisons.  */
 	(void) add_stmt_cost (loop_vinfo, target_cost_data, len * 2 - 1,
-			      scalar_stmt, NULL, 0, vect_prologue);
+			      scalar_stmt, NULL, NULL_TREE, 0, vect_prologue);
       len = LOOP_VINFO_LOWER_BOUNDS (loop_vinfo).length ();
       if (len)
 	{
@@ -3407,7 +3407,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
 	    if (!LOOP_VINFO_LOWER_BOUNDS (loop_vinfo)[i].unsigned_p)
 	      nstmts += 1;
 	  (void) add_stmt_cost (loop_vinfo, target_cost_data, nstmts,
-				scalar_stmt, NULL, 0, vect_prologue);
+				scalar_stmt, NULL, NULL_TREE, 0, vect_prologue);
 	}
       if (dump_enabled_p ())
 	dump_printf (MSG_NOTE,
@@ -3420,7 +3420,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
     {
       /*  FIXME: Make cost depend on complexity of individual check.  */
       (void) add_stmt_cost (loop_vinfo, target_cost_data, 1, vector_stmt,
-			    NULL, 0, vect_prologue);
+			    NULL, NULL_TREE, 0, vect_prologue);
       if (dump_enabled_p ())
 	dump_printf (MSG_NOTE,
 		     "cost model: Adding cost of checks for loop "
@@ -3429,7 +3429,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
 
   if (LOOP_REQUIRES_VERSIONING (loop_vinfo))
     (void) add_stmt_cost (loop_vinfo, target_cost_data, 1, cond_branch_taken,
-			  NULL, 0, vect_prologue);
+			  NULL, NULL_TREE, 0, vect_prologue);
 
   /* Count statements in scalar loop.  Using this as scalar cost for a single
      iteration for now.
@@ -3465,8 +3465,8 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
 	  FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
 			    j, si)
 	    (void) add_stmt_cost (loop_vinfo, target_cost_data, si->count,
-				  si->kind, si->stmt_info, si->misalign,
-				  vect_epilogue);
+				  si->kind, si->stmt_info, si->vectype,
+				  si->misalign, vect_epilogue);
 	}
 
       /* Calculate how many masks we need to generate.  */
@@ -3492,10 +3492,10 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
 	 probably better not to vectorize.  */
       (void) add_stmt_cost (loop_vinfo,
 			    target_cost_data, num_masks, vector_stmt,
-			    NULL, 0, vect_prologue);
+			    NULL, NULL_TREE, 0, vect_prologue);
       (void) add_stmt_cost (loop_vinfo,
 			    target_cost_data, num_masks - 1, vector_stmt,
-			    NULL, 0, vect_body);
+			    NULL, NULL_TREE, 0, vect_body);
     }
   else if (npeel < 0)
     {
@@ -3517,26 +3517,28 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
          vector iterations are not known since peeled prologue iterations are
          not known. Hence guards remain the same.  */
       (void) add_stmt_cost (loop_vinfo, target_cost_data, 1, cond_branch_taken,
-			    NULL, 0, vect_prologue);
+			    NULL, NULL_TREE, 0, vect_prologue);
       (void) add_stmt_cost (loop_vinfo,
 			    target_cost_data, 1, cond_branch_not_taken,
-			    NULL, 0, vect_prologue);
+			    NULL, NULL_TREE, 0, vect_prologue);
       (void) add_stmt_cost (loop_vinfo, target_cost_data, 1, cond_branch_taken,
-			    NULL, 0, vect_epilogue);
+			    NULL, NULL_TREE, 0, vect_epilogue);
       (void) add_stmt_cost (loop_vinfo,
 			    target_cost_data, 1, cond_branch_not_taken,
-			    NULL, 0, vect_epilogue);
+			    NULL, NULL_TREE, 0, vect_epilogue);
       stmt_info_for_cost *si;
       int j;
       FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo), j, si)
 	{
 	  (void) add_stmt_cost (loop_vinfo, target_cost_data,
 				si->count * peel_iters_prologue,
-				si->kind, si->stmt_info, si->misalign,
+				si->kind, si->stmt_info, si->vectype,
+				si->misalign,
 				vect_prologue);
 	  (void) add_stmt_cost (loop_vinfo, target_cost_data,
 				si->count * peel_iters_epilogue,
-				si->kind, si->stmt_info, si->misalign,
+				si->kind, si->stmt_info, si->vectype,
+				si->misalign,
 				vect_epilogue);
 	}
     }
@@ -3561,12 +3563,12 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
       FOR_EACH_VEC_ELT (prologue_cost_vec, j, si)
 	(void) add_stmt_cost (loop_vinfo,
 			      data, si->count, si->kind, si->stmt_info,
-			      si->misalign, vect_prologue);
+			      si->vectype, si->misalign, vect_prologue);
 
       FOR_EACH_VEC_ELT (epilogue_cost_vec, j, si)
 	(void) add_stmt_cost (loop_vinfo,
 			      data, si->count, si->kind, si->stmt_info,
-			      si->misalign, vect_epilogue);
+			      si->vectype, si->misalign, vect_epilogue);
 
       prologue_cost_vec.release ();
       epilogue_cost_vec.release ();
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index 41510d2218e..28a056ebe9d 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -2832,6 +2832,69 @@ vect_slp_convert_to_external (vec_info *vinfo, slp_tree node,
   return true;
 }
 
+/* Compute the prologue cost for invariant or constant operands represented
+   by NODE.  */
+
+static void
+vect_prologue_cost_for_slp (vec_info *vinfo,
+			    slp_tree node,
+			    stmt_vector_for_cost *cost_vec)
+{
+  tree op = SLP_TREE_SCALAR_OPS (node)[0];
+
+  /* Without looking at the actual initializer a vector of
+     constants can be implemented as load from the constant pool.
+     When all elements are the same we can use a splat.  */
+  tree vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), node);
+  unsigned group_size = SLP_TREE_SCALAR_OPS (node).length ();
+  unsigned num_vects_to_check;
+  unsigned HOST_WIDE_INT const_nunits;
+  unsigned nelt_limit;
+  if (TYPE_VECTOR_SUBPARTS (vectype).is_constant (&const_nunits)
+      && ! multiple_p (const_nunits, group_size))
+    {
+      num_vects_to_check = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
+      nelt_limit = const_nunits;
+    }
+  else
+    {
+      /* If either the vector has variable length or the vectors
+	 are composed of repeated whole groups we only need to
+	 cost construction once.  All vectors will be the same.  */
+      num_vects_to_check = 1;
+      nelt_limit = group_size;
+    }
+  tree elt = NULL_TREE;
+  unsigned nelt = 0;
+  for (unsigned j = 0; j < num_vects_to_check * nelt_limit; ++j)
+    {
+      unsigned si = j % group_size;
+      if (nelt == 0)
+	elt = SLP_TREE_SCALAR_OPS (node)[si];
+      /* ???  We're just tracking whether all operands of a single
+	 vector initializer are the same, ideally we'd check if
+	 we emitted the same one already.  */
+      /* ???  Instead cost invariants/externals via
+	 vect_slp_analyze_node_operations.  */
+      else if (elt != SLP_TREE_SCALAR_OPS (node)[si])
+	elt = NULL_TREE;
+      nelt++;
+      if (nelt == nelt_limit)
+	{
+	  /* ???  We need to pass down stmt_info for a vector type
+	     even if it points to the wrong stmt.  But here we don't
+	     have one so we should amend things to record a vector
+	     type directly.  */
+	  record_stmt_cost (cost_vec, 1,
+			    SLP_TREE_DEF_TYPE (node)
+			    ? (elt ? scalar_to_vec : vec_construct)
+			    : vector_load,
+			    vectype, 0, vect_prologue);
+	  nelt = 0;
+	}
+    }
+}
+
 /* Analyze statements contained in SLP tree NODE after recursively analyzing
    the subtree.  NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
 
@@ -2847,9 +2910,6 @@ vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
   int i, j;
   slp_tree child;
 
-  if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
-    return true;
-
   /* If we already analyzed the exact same set of scalar stmts we're done.
      We share the generated vector stmts for those.
      The SLP graph is acyclic so not caching whether we failed or succeeded
@@ -2859,6 +2919,13 @@ vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
       || lvisited.add (node))
     return true;
 
+  if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
+    {
+      /* Assume we can code-generate all invariants, but cost them.  */
+      vect_prologue_cost_for_slp (vinfo, node, cost_vec);
+      return true;
+    }
+
   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
     if (!vect_slp_analyze_node_operations (vinfo, child, node_instance,
 					   visited, lvisited, cost_vec))
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 438835c7e04..1961cdcc5f8 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -92,7 +92,8 @@ stmt_in_inner_loop_p (vec_info *vinfo, class _stmt_vec_info *stmt_info)
 unsigned
 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
 		  enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
-		  int misalign, enum vect_cost_model_location where)
+		  tree vectype, int misalign,
+		  enum vect_cost_model_location where)
 {
   if ((kind == vector_load || kind == unaligned_load)
       && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
@@ -101,14 +102,37 @@ record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
       && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
     kind = vector_scatter_store;
 
-  stmt_info_for_cost si = { count, kind, where, stmt_info, misalign };
+  stmt_info_for_cost si = { count, kind, where, stmt_info, vectype, misalign };
   body_cost_vec->safe_push (si);
 
-  tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
   return (unsigned)
       (builtin_vectorization_cost (kind, vectype, misalign) * count);
 }
 
+/* Overload with implicit vectype through STMT_INFO.  */
+
+unsigned
+record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
+		  enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
+		  int misalign, enum vect_cost_model_location where)
+{
+  return record_stmt_cost (body_cost_vec, count, kind,
+			   stmt_info, STMT_VINFO_VECTYPE (stmt_info),
+			   misalign, where);
+}
+
+/* Overload with implicit NULL stmt_vec_info but explicit vector type.  */
+
+unsigned
+record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
+		  enum vect_cost_for_stmt kind, tree vectype,
+		  int misalign, enum vect_cost_model_location where)
+{
+  return record_stmt_cost (body_cost_vec, count, kind, NULL, vectype,
+			   misalign, where);
+}
+
+
 /* Return a variable of type ELEM_TYPE[NELEMS].  */
 
 static tree
@@ -786,71 +810,6 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
   return opt_result::success ();
 }
 
-/* Compute the prologue cost for invariant or constant operands.  */
-
-static unsigned
-vect_prologue_cost_for_slp_op (vec_info *vinfo,
-			       slp_tree node, stmt_vec_info stmt_info,
-			       unsigned opno, enum vect_def_type dt,
-			       stmt_vector_for_cost *cost_vec)
-{
-  gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt;
-  tree op = gimple_op (stmt, opno);
-  unsigned prologue_cost = 0;
-
-  /* Without looking at the actual initializer a vector of
-     constants can be implemented as load from the constant pool.
-     When all elements are the same we can use a splat.  */
-  tree vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), node);
-  unsigned group_size = SLP_TREE_SCALAR_STMTS (node).length ();
-  unsigned num_vects_to_check;
-  unsigned HOST_WIDE_INT const_nunits;
-  unsigned nelt_limit;
-  if (TYPE_VECTOR_SUBPARTS (vectype).is_constant (&const_nunits)
-      && ! multiple_p (const_nunits, group_size))
-    {
-      num_vects_to_check = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
-      nelt_limit = const_nunits;
-    }
-  else
-    {
-      /* If either the vector has variable length or the vectors
-	 are composed of repeated whole groups we only need to
-	 cost construction once.  All vectors will be the same.  */
-      num_vects_to_check = 1;
-      nelt_limit = group_size;
-    }
-  tree elt = NULL_TREE;
-  unsigned nelt = 0;
-  for (unsigned j = 0; j < num_vects_to_check * nelt_limit; ++j)
-    {
-      unsigned si = j % group_size;
-      if (nelt == 0)
-	elt = gimple_op (SLP_TREE_SCALAR_STMTS (node)[si]->stmt, opno);
-      /* ???  We're just tracking whether all operands of a single
-	 vector initializer are the same, ideally we'd check if
-	 we emitted the same one already.  */
-      else if (elt != gimple_op (SLP_TREE_SCALAR_STMTS (node)[si]->stmt,
-				 opno))
-	elt = NULL_TREE;
-      nelt++;
-      if (nelt == nelt_limit)
-	{
-	  /* ???  We need to pass down stmt_info for a vector type
-	     even if it points to the wrong stmt.  */
-	  prologue_cost += record_stmt_cost
-	      (cost_vec, 1,
-	       dt == vect_external_def
-	       ? (elt ? scalar_to_vec : vec_construct)
-	       : vector_load,
-	       stmt_info, 0, vect_prologue);
-	  nelt = 0;
-	}
-    }
-
-  return prologue_cost;
-}
-
 /* Function vect_model_simple_cost.
 
    Models cost for simple operations, i.e. those that only emit ncopies of a
@@ -858,7 +817,7 @@ vect_prologue_cost_for_slp_op (vec_info *vinfo,
    be generated for the single vector op.  We will handle that shortly.  */
 
 static void
-vect_model_simple_cost (vec_info *vinfo,
+vect_model_simple_cost (vec_info *,
 			stmt_vec_info stmt_info, int ncopies,
 			enum vect_def_type *dt,
 			int ndts,
@@ -874,27 +833,7 @@ vect_model_simple_cost (vec_info *vinfo,
   if (node)
     ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
 
-  if (node)
-    {
-      /* Scan operands and account for prologue cost of constants/externals.
-	 ???  This over-estimates cost for multiple uses and should be
-	 re-engineered.  */
-      gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt;
-      tree lhs = gimple_get_lhs (stmt);
-      for (unsigned i = 0; i < gimple_num_ops (stmt); ++i)
-	{
-	  tree op = gimple_op (stmt, i);
-	  enum vect_def_type dt;
-	  if (!op || op == lhs)
-	    continue;
-	  if (vect_is_simple_use (op, vinfo, &dt)
-	      && (dt == vect_constant_def || dt == vect_external_def))
-	    prologue_cost += vect_prologue_cost_for_slp_op (vinfo, node,
-							    stmt_info,
-							    i, dt, cost_vec);
-	}
-    }
-  else
+  if (!node)
     /* Cost the "broadcast" of a scalar operand in to a vector operand.
        Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
        cost model.  */
@@ -995,7 +934,6 @@ cfun_returns (tree decl)
 
 static void
 vect_model_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
-		       enum vect_def_type dt,
 		       vect_memory_access_type memory_access_type,
 		       vec_load_store_type vls_type, slp_tree slp_node,
 		       stmt_vector_for_cost *cost_vec)
@@ -1010,11 +948,7 @@ vect_model_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
 
   if (vls_type == VLS_STORE_INVARIANT)
     {
-      if (slp_node)
-	prologue_cost += vect_prologue_cost_for_slp_op (vinfo, slp_node,
-							stmt_info,
-							1, dt, cost_vec);
-      else
+      if (!slp_node)
 	prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
 					   stmt_info, 0, vect_prologue);
     }
@@ -7524,7 +7458,7 @@ vectorizable_store (vec_info *vinfo,
 				  memory_access_type, &gs_info, mask);
 
       STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
-      vect_model_store_cost (vinfo, stmt_info, ncopies, rhs_dt,
+      vect_model_store_cost (vinfo, stmt_info, ncopies,
 			     memory_access_type, vls_type, slp_node, cost_vec);
       return true;
     }
diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c
index 41ff6791966..584f13095ab 100644
--- a/gcc/tree-vectorizer.c
+++ b/gcc/tree-vectorizer.c
@@ -99,7 +99,7 @@ auto_purge_vect_location::~auto_purge_vect_location ()
 
 void
 dump_stmt_cost (FILE *f, void *data, int count, enum vect_cost_for_stmt kind,
-		stmt_vec_info stmt_info, int misalign, unsigned cost,
+		stmt_vec_info stmt_info, tree, int misalign, unsigned cost,
 		enum vect_cost_model_location where)
 {
   fprintf (f, "%p ", data);
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 20784251b6f..5d1371d4efd 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -99,6 +99,7 @@ struct stmt_info_for_cost {
   enum vect_cost_for_stmt kind;
   enum vect_cost_model_location where;
   stmt_vec_info stmt_info;
+  tree vectype;
   int misalign;
 };
 
@@ -1355,7 +1356,7 @@ init_cost (class loop *loop_info)
 }
 
 extern void dump_stmt_cost (FILE *, void *, int, enum vect_cost_for_stmt,
-			    stmt_vec_info, int, unsigned,
+			    stmt_vec_info, tree, int, unsigned,
 			    enum vect_cost_model_location);
 
 /* Alias targetm.vectorize.add_stmt_cost.  */
@@ -1363,13 +1364,14 @@ extern void dump_stmt_cost (FILE *, void *, int, enum vect_cost_for_stmt,
 static inline unsigned
 add_stmt_cost (vec_info *vinfo, void *data, int count,
 	       enum vect_cost_for_stmt kind,
-	       stmt_vec_info stmt_info, int misalign,
+	       stmt_vec_info stmt_info, tree vectype, int misalign,
 	       enum vect_cost_model_location where)
 {
   unsigned cost = targetm.vectorize.add_stmt_cost (vinfo, data, count, kind,
-						   stmt_info, misalign, where);
+						   stmt_info, vectype,
+						   misalign, where);
   if (dump_file && (dump_flags & TDF_DETAILS))
-    dump_stmt_cost (dump_file, data, count, kind, stmt_info, misalign,
+    dump_stmt_cost (dump_file, data, count, kind, stmt_info, vectype, misalign,
 		    cost, where);
   return cost;
 }
@@ -1398,7 +1400,7 @@ add_stmt_costs (vec_info *vinfo, void *data, stmt_vector_for_cost *cost_vec)
   unsigned i;
   FOR_EACH_VEC_ELT (*cost_vec, i, cost)
     add_stmt_cost (vinfo, data, cost->count, cost->kind, cost->stmt_info,
-		   cost->misalign, cost->where);
+		   cost->vectype, cost->misalign, cost->where);
 }
 
 /*-----------------------------------------------------------------*/
@@ -1701,6 +1703,12 @@ extern bool supportable_narrowing_operation (enum tree_code, tree, tree,
 					     vec<tree> *);
 extern unsigned record_stmt_cost (stmt_vector_for_cost *, int,
 				  enum vect_cost_for_stmt, stmt_vec_info,
+				  tree, int, enum vect_cost_model_location);
+extern unsigned record_stmt_cost (stmt_vector_for_cost *, int,
+				  enum vect_cost_for_stmt, stmt_vec_info,
+				  int, enum vect_cost_model_location);
+extern unsigned record_stmt_cost (stmt_vector_for_cost *, int,
+				  enum vect_cost_for_stmt, tree,
 				  int, enum vect_cost_model_location);
 extern stmt_vec_info vect_finish_replace_stmt (vec_info *,
 					       stmt_vec_info, gimple *);


^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2020-03-25 14:22 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-03-25 14:22 [gcc(refs/users/rguenth/heads/slp-reorg)] cost SLP invariant nodes directly Richard Biener
  -- strict thread matches above, loose matches on Subject: below --
2020-03-23 16:01 Richard Biener
2020-03-20  8:22 Richard Biener
2020-03-17  8:27 Richard Biener
2020-03-16 14:59 Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).