public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH, vec-tails 04/10] Add masking cost
@ 2016-05-19 19:41 Ilya Enkovich
  2016-05-20  9:24 ` Richard Biener
  2016-06-16  6:17 ` Jeff Law
  0 siblings, 2 replies; 9+ messages in thread
From: Ilya Enkovich @ 2016-05-19 19:41 UTC (permalink / raw)
  To: gcc-patches

Hi,

This patch extends vectorizer cost model to include masking cost by
adding new cost model locations and new target hook to compute
masking cost.

Thanks,
Ilya
--
gcc/

2016-05-19  Ilya Enkovich  <ilya.enkovich@intel.com>

	* config/i386/i386.c (ix86_init_cost): Extend costs array.
	(ix86_add_stmt_masking_cost): New.
	(ix86_finish_cost): Add masking_prologue_cost and masking_body_cost
	args.
	(TARGET_VECTORIZE_ADD_STMT_MASKING_COST): New.
	* config/i386/i386.h (TARGET_INCREASE_MASK_STORE_COST): New.
	* config/i386/x86-tune.def (X86_TUNE_INCREASE_MASK_STORE_COST): New.
	* config/rs6000/rs6000.c (_rs6000_cost_data): Extend cost array.
	(rs6000_init_cost): Initialize new cost elements.
	(rs6000_finish_cost): Add masking_prologue_cost and masking_body_cost.
	* config/spu/spu.c (spu_init_cost): Extend costs array.
	(spu_finish_cost): Add masking_prologue_cost and masking_body_cost args.
	* doc/tm.texi.in (TARGET_VECTORIZE_ADD_STMT_MASKING_COST): New.
	* doc/tm.texi: Regenerated.
	* target.def (add_stmt_masking_cost): New.
	(finish_cost): Add masking_prologue_cost and masking_body_cost args.
	* target.h (enum vect_cost_for_stmt): Add vector_mask_load and
	vector_mask_store.
	(enum vect_cost_model_location): Add vect_masking_prologue
	and vect_masking_body.
	* targhooks.c (default_builtin_vectorization_cost): Support
	vector_mask_load and vector_mask_store.
	(default_init_cost): Extend costs array.
	(default_add_stmt_masking_cost): New.
	(default_finish_cost): Add masking_prologue_cost and masking_body_cost
	args.
	* targhooks.h (default_add_stmt_masking_cost): New.
	* tree-vect-loop.c (vect_estimate_min_profitable_iters): Adjust
	finish_cost call.
	* tree-vect-slp.c (vect_bb_vectorization_profitable_p): Likewise.
	* tree-vectorizer.h (add_stmt_masking_cost): New.
	(finish_cost): Add masking_prologue_cost and masking_body_cost args.


diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 9f62089..6c2c364 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -53932,8 +53932,12 @@ ix86_spill_class (reg_class_t rclass, machine_mode mode)
 static void *
 ix86_init_cost (struct loop *)
 {
-  unsigned *cost = XNEWVEC (unsigned, 3);
-  cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
+  unsigned *cost = XNEWVEC (unsigned, 5);
+  cost[vect_prologue] = 0;
+  cost[vect_body]     = 0;
+  cost[vect_epilogue] = 0;
+  cost[vect_masking_prologue] = 0;
+  cost[vect_masking_body] = 0;
   return cost;
 }
 
@@ -53974,16 +53978,56 @@ ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
   return retval;
 }
 
+/* Implement targetm.vectorize.add_stmt_masking_cost.  */
+
+static unsigned
+ix86_add_stmt_masking_cost (void *data, int count, enum vect_cost_for_stmt kind,
+			    struct _stmt_vec_info *stmt_info, int misalign,
+			    enum vect_cost_model_location where)
+{
+  bool embedded_masking = false;
+  unsigned *cost = (unsigned *) data;
+  unsigned retval = 0;
+
+  tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
+  if (vectype)
+    {
+      machine_mode mode
+	= ix86_get_mask_mode (TYPE_VECTOR_SUBPARTS (vectype),
+			      tree_to_uhwi (TYPE_SIZE_UNIT (vectype)));
+      embedded_masking = !VECTOR_MODE_P (mode);
+    }
+  else
+    embedded_masking = TARGET_AVX512F;
+
+  if (embedded_masking || kind == vector_load)
+    return retval;
+
+  if (kind == vector_store)
+    return TARGET_INCREASE_MASK_STORE_COST ? 10 : 0;
+
+  int stmt_cost = ix86_builtin_vectorization_cost (vector_stmt, vectype, misalign);
+  retval = (unsigned) (count * stmt_cost);
+
+  cost[where] += retval;
+
+  return retval;
+}
+
 /* Implement targetm.vectorize.finish_cost.  */
 
 static void
 ix86_finish_cost (void *data, unsigned *prologue_cost,
-		  unsigned *body_cost, unsigned *epilogue_cost)
+		  unsigned *body_cost, unsigned *epilogue_cost,
+		  unsigned *masking_prologue_cost,
+		  unsigned *masking_body_cost)
 {
   unsigned *cost = (unsigned *) data;
   *prologue_cost = cost[vect_prologue];
   *body_cost     = cost[vect_body];
   *epilogue_cost = cost[vect_epilogue];
+  *masking_prologue_cost = cost[vect_masking_prologue];
+  *masking_body_cost = cost[vect_masking_body];
 }
 
 /* Implement targetm.vectorize.destroy_cost_data.  */
@@ -54964,6 +55008,8 @@ ix86_addr_space_zero_address_valid (addr_space_t as)
 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
 #undef TARGET_VECTORIZE_ADD_STMT_COST
 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
+#undef TARGET_VECTORIZE_ADD_STMT_MASKING_COST
+#define TARGET_VECTORIZE_ADD_STMT_MASKING_COST ix86_add_stmt_masking_cost
 #undef TARGET_VECTORIZE_FINISH_COST
 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index d0b418b..b42cfa2 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -501,6 +501,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
 	ix86_tune_features[X86_TUNE_AVOID_FALSE_DEP_FOR_BMI]
 #define TARGET_ONE_IF_CONV_INSN \
 	ix86_tune_features[X86_TUNE_ONE_IF_CONV_INSN]
+#define TARGET_INCREASE_MASK_STORE_COST \
+	ix86_tune_features[X86_TUNE_INCREASE_MASK_STORE_COST]
 
 /* Feature tests against the various architecture variations.  */
 enum ix86_arch_indices {
diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
index 31a87b9..3bbcee8 100644
--- a/gcc/config/i386/x86-tune.def
+++ b/gcc/config/i386/x86-tune.def
@@ -527,6 +527,11 @@ DEF_TUNE (X86_TUNE_AVOID_VECTOR_DECODE, "avoid_vector_decode",
 DEF_TUNE (X86_TUNE_AVOID_FALSE_DEP_FOR_BMI, "avoid_false_dep_for_bmi",
 	  m_SANDYBRIDGE | m_HASWELL | m_GENERIC)
 
+/* X86_TUNE_INCREASE_MASK_STORE_COST: Increase coast of masked store for
+   some platforms.  */
+DEF_TUNE (X86_TUNE_INCREASE_MASK_STORE_COST, "increase_mask_store_cost",
+	  m_HASWELL | m_BDVER4 | m_ZNVER1)
+
 /*****************************************************************************/
 /* This never worked well before.                                            */
 /*****************************************************************************/
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 0f70bb9..295deaf 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -5207,7 +5207,7 @@ rs6000_preferred_simd_mode (machine_mode mode)
 typedef struct _rs6000_cost_data
 {
   struct loop *loop_info;
-  unsigned cost[3];
+  unsigned cost[5];
 } rs6000_cost_data;
 
 /* Test for likely overcommitment of vector hardware resources.  If a
@@ -5269,6 +5269,8 @@ rs6000_init_cost (struct loop *loop_info)
   data->cost[vect_prologue] = 0;
   data->cost[vect_body]     = 0;
   data->cost[vect_epilogue] = 0;
+  data->cost[vect_masking_prologue] = 0;
+  data->cost[vect_masking_body] = 0;
   return data;
 }
 
@@ -5304,7 +5306,9 @@ rs6000_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
 
 static void
 rs6000_finish_cost (void *data, unsigned *prologue_cost,
-		    unsigned *body_cost, unsigned *epilogue_cost)
+		    unsigned *body_cost, unsigned *epilogue_cost,
+		    unsigned *masking_prologue_cost,
+		    unsigned *masking_body_cost)
 {
   rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
 
@@ -5314,6 +5318,8 @@ rs6000_finish_cost (void *data, unsigned *prologue_cost,
   *prologue_cost = cost_data->cost[vect_prologue];
   *body_cost     = cost_data->cost[vect_body];
   *epilogue_cost = cost_data->cost[vect_epilogue];
+  *masking_prologue_cost = cost_data->cost[vect_masking_prologue];
+  *masking_body_cost = cost_data->cost[vect_masking_body];
 }
 
 /* Implement targetm.vectorize.destroy_cost_data.  */
diff --git a/gcc/config/spu/spu.c b/gcc/config/spu/spu.c
index c3757eb..60d6e6b 100644
--- a/gcc/config/spu/spu.c
+++ b/gcc/config/spu/spu.c
@@ -6630,8 +6630,12 @@ spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
 static void *
 spu_init_cost (struct loop *loop_info ATTRIBUTE_UNUSED)
 {
-  unsigned *cost = XNEWVEC (unsigned, 3);
-  cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
+  unsigned *cost = XNEWVEC (unsigned, 5);
+  cost[vect_prologue] = 0;
+  cost[vect_body]     = 0;
+  cost[vect_epilogue] = 0;
+  cost[vect_masking_prologue] = 0;
+  cost[vect_masking_body] = 0;
   return cost;
 }
 
@@ -6667,12 +6671,16 @@ spu_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
 
 static void
 spu_finish_cost (void *data, unsigned *prologue_cost,
-		 unsigned *body_cost, unsigned *epilogue_cost)
+		 unsigned *body_cost, unsigned *epilogue_cost,
+		 unsigned *masking_prologue_cost,
+		 unsigned *masking_body_cost)
 {
   unsigned *cost = (unsigned *) data;
   *prologue_cost = cost[vect_prologue];
   *body_cost     = cost[vect_body];
   *epilogue_cost = cost[vect_epilogue];
+  *masking_prologue_cost = cost[vect_masking_prologue];
+  *masking_body_cost = cost[vect_masking_body];
 }
 
 /* Implement targetm.vectorize.destroy_cost_data.  */
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 057ac9a..5d23910 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -5720,8 +5720,12 @@ This hook should initialize target-specific data structures in preparation for m
 This hook should update the target-specific @var{data} in response to adding @var{count} copies of the given @var{kind} of statement to a loop or basic block.  The default adds the builtin vectorizer cost for the copies of the statement to the accumulator specified by @var{where}, (the prologue, body, or epilogue) and returns the amount added.  The return value should be viewed as a tentative cost that may later be revised.
 @end deftypefn
 
-@deftypefn {Target Hook} void TARGET_VECTORIZE_FINISH_COST (void *@var{data}, unsigned *@var{prologue_cost}, unsigned *@var{body_cost}, unsigned *@var{epilogue_cost})
-This hook should complete calculations of the cost of vectorizing a loop or basic block based on @var{data}, and return the prologue, body, and epilogue costs as unsigned integers.  The default returns the value of the three accumulators.
+@deftypefn {Target Hook} unsigned TARGET_VECTORIZE_ADD_STMT_MASKING_COST (void *@var{data}, int @var{count}, enum vect_cost_for_stmt @var{kind}, struct _stmt_vec_info *@var{stmt_info}, int @var{misalign}, enum vect_cost_model_location @var{where})
+This hook has arguments similar to @code{TARGET_VECTORIZE_ADD_STMT_COST} but adds cost of statement masking.
+@end deftypefn
+
+@deftypefn {Target Hook} void TARGET_VECTORIZE_FINISH_COST (void *@var{data}, unsigned *@var{prologue_cost}, unsigned *@var{body_cost}, unsigned *@var{epilogue_cost}, unsigned *@var{masking_prologue_cost}, unsigned *@var{masking_body_cost})
+This hook should complete calculations of the cost of vectorizing a loop or basic block based on @var{data}, and return the prologue, body, epilogue, masking prologue and masking body costs as unsigned integers.  The default returns the value of the five accumulators.
 @end deftypefn
 
 @deftypefn {Target Hook} void TARGET_VECTORIZE_DESTROY_COST_DATA (void *@var{data})
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index 643f0eb..2e92b47 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -4249,6 +4249,8 @@ address;  but often a machine-dependent strategy can generate better code.
 
 @hook TARGET_VECTORIZE_ADD_STMT_COST
 
+@hook TARGET_VECTORIZE_ADD_STMT_MASKING_COST
+
 @hook TARGET_VECTORIZE_FINISH_COST
 
 @hook TARGET_VECTORIZE_DESTROY_COST_DATA
diff --git a/gcc/target.def b/gcc/target.def
index 20f2b32..c1c6705 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -1898,17 +1898,29 @@ DEFHOOK
   enum vect_cost_model_location where),
  default_add_stmt_cost)
 
+/* Similar to add_stmt_cost but records cost of statemnent masking.  */
+DEFHOOK
+(add_stmt_masking_cost,
+ "This hook has arguments similar to @code{TARGET_VECTORIZE_ADD_STMT_COST} "
+ "but adds cost of statement masking.",
+ unsigned,
+ (void *data, int count, enum vect_cost_for_stmt kind,
+  struct _stmt_vec_info *stmt_info, int misalign,
+  enum vect_cost_model_location where),
+ default_add_stmt_masking_cost)
+
 /* Target function to calculate the total cost of the current vectorized
    loop or block.  */
 DEFHOOK
 (finish_cost,
  "This hook should complete calculations of the cost of vectorizing a loop "
- "or basic block based on @var{data}, and return the prologue, body, and "
- "epilogue costs as unsigned integers.  The default returns the value of "
- "the three accumulators.",
+ "or basic block based on @var{data}, and return the prologue, body, epilogue, "
+ "masking prologue and masking body costs as unsigned integers.  The default "
+ "returns the value of the five accumulators.",
  void,
  (void *data, unsigned *prologue_cost, unsigned *body_cost,
-  unsigned *epilogue_cost),
+  unsigned *epilogue_cost, unsigned *masking_prologue_cost,
+  unsigned *masking_body_cost),
  default_finish_cost)
 
 /* Function to delete target-specific cost modeling data.  */
diff --git a/gcc/target.h b/gcc/target.h
index 43022bd..17e3803 100644
--- a/gcc/target.h
+++ b/gcc/target.h
@@ -170,7 +170,9 @@ enum vect_cost_for_stmt
   cond_branch_taken,
   vec_perm,
   vec_promote_demote,
-  vec_construct
+  vec_construct,
+  vector_mask_load,
+  vector_mask_store
 };
 
 /* Separate locations for which the vectorizer cost model should
@@ -178,7 +180,9 @@ enum vect_cost_for_stmt
 enum vect_cost_model_location {
   vect_prologue = 0,
   vect_body = 1,
-  vect_epilogue = 2
+  vect_epilogue = 2,
+  vect_masking_prologue = 3,
+  vect_masking_body = 4
 };
 
 /* The target structure.  This holds all the backend hooks.  */
diff --git a/gcc/targhooks.c b/gcc/targhooks.c
index 6b4601b..072345d 100644
--- a/gcc/targhooks.c
+++ b/gcc/targhooks.c
@@ -579,6 +579,8 @@ default_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
       case cond_branch_not_taken:
       case vec_perm:
       case vec_promote_demote:
+      case vector_mask_load:
+      case vector_mask_store:
         return 1;
 
       case unaligned_load:
@@ -1115,8 +1117,12 @@ default_get_mask_mode (unsigned nunits, unsigned vector_size)
 void *
 default_init_cost (struct loop *loop_info ATTRIBUTE_UNUSED)
 {
-  unsigned *cost = XNEWVEC (unsigned, 3);
-  cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
+  unsigned *cost = XNEWVEC (unsigned, 5);
+  cost[vect_prologue] = 0;
+  cost[vect_body]     = 0;
+  cost[vect_epilogue] = 0;
+  cost[vect_masking_prologue] = 0;
+  cost[vect_masking_body] = 0;
   return cost;
 }
 
@@ -1147,16 +1153,48 @@ default_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
   return retval;
 }
 
+/* By default, the cost model assume we use VEC_COND_EXPR to mask
+   statement result.  For memory accesses we need to adjust used mask
+   in case aceess is already masked.  */
+
+unsigned
+default_add_stmt_masking_cost (void *data, int count,
+			       enum vect_cost_for_stmt kind,
+			       struct _stmt_vec_info *stmt_info,
+			       int misalign,
+			       enum vect_cost_model_location where)
+{
+  unsigned *cost = (unsigned *) data;
+  unsigned retval = 0;
+
+  if (kind == vector_load || kind == vector_store)
+    return retval;
+
+  tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
+  int stmt_cost = targetm.vectorize.builtin_vectorization_cost (vector_stmt,
+								vectype,
+								misalign);
+
+  retval = (unsigned) (count * stmt_cost);
+  cost[where] += retval;
+
+  return retval;
+}
+
 /* By default, the cost model just returns the accumulated costs.  */
 
 void
 default_finish_cost (void *data, unsigned *prologue_cost,
-		     unsigned *body_cost, unsigned *epilogue_cost)
+		     unsigned *body_cost, unsigned *epilogue_cost,
+		     unsigned *masking_prologue_cost,
+		     unsigned *masking_body_cost)
 {
   unsigned *cost = (unsigned *) data;
   *prologue_cost = cost[vect_prologue];
   *body_cost     = cost[vect_body];
   *epilogue_cost = cost[vect_epilogue];
+  *masking_prologue_cost = cost[vect_masking_prologue];
+  *masking_body_cost = cost[vect_masking_body];
 }
 
 /* Free the cost data.  */
diff --git a/gcc/targhooks.h b/gcc/targhooks.h
index 7687c39..5a1c749 100644
--- a/gcc/targhooks.h
+++ b/gcc/targhooks.h
@@ -106,6 +106,10 @@ extern void *default_init_cost (struct loop *);
 extern unsigned default_add_stmt_cost (void *, int, enum vect_cost_for_stmt,
 				       struct _stmt_vec_info *, int,
 				       enum vect_cost_model_location);
+extern unsigned default_add_stmt_masking_cost (void *, int,
+					       enum vect_cost_for_stmt,
+					       struct _stmt_vec_info *, int,
+					       enum vect_cost_model_location);
 extern void default_finish_cost (void *, unsigned *, unsigned *, unsigned *);
 extern void default_destroy_cost_data (void *);
 
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index a537ef4..e25a0ce 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -3121,6 +3121,8 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
   int vec_outside_cost = 0;
   unsigned vec_prologue_cost = 0;
   unsigned vec_epilogue_cost = 0;
+  unsigned masking_prologue_cost = 0;
+  unsigned masking_inside_cost = 0;
   int scalar_single_iter_cost = 0;
   int scalar_outside_cost = 0;
   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
@@ -3339,7 +3341,8 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
 
   /* Complete the target-specific cost calculations.  */
   finish_cost (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo), &vec_prologue_cost,
-	       &vec_inside_cost, &vec_epilogue_cost);
+	       &vec_inside_cost, &vec_epilogue_cost, &masking_prologue_cost,
+	       &masking_inside_cost);
 
   vec_outside_cost = (int)(vec_prologue_cost + vec_epilogue_cost);
   
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index d713848..09480ea 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -2454,6 +2454,7 @@ vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
   int i;
   unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
   unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
+  unsigned int masking_prologue_cost = 0, masking_inside_cost = 0;
 
   /* Calculate scalar cost.  */
   FOR_EACH_VEC_ELT (slp_instances, i, instance)
@@ -2472,7 +2473,8 @@ vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
 
   /* Complete the target-specific cost calculation.  */
   finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
-	       &vec_inside_cost, &vec_epilogue_cost);
+	       &vec_inside_cost, &vec_epilogue_cost, &masking_prologue_cost,
+	       &masking_inside_cost);
 
   vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
 
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index b269752..d3450b6 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -897,13 +897,27 @@ add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
 					  stmt_info, misalign, where);
 }
 
+/* Alias targetm.vectorize.add_stmt_masking_cost.  */
+
+static inline unsigned
+add_stmt_masking_cost (void *data, int count, enum vect_cost_for_stmt kind,
+		       stmt_vec_info stmt_info, int misalign,
+		       enum vect_cost_model_location where)
+{
+  return targetm.vectorize.add_stmt_masking_cost (data, count, kind,
+						  stmt_info, misalign, where);
+}
+
 /* Alias targetm.vectorize.finish_cost.  */
 
 static inline void
 finish_cost (void *data, unsigned *prologue_cost,
-	     unsigned *body_cost, unsigned *epilogue_cost)
+	     unsigned *body_cost, unsigned *epilogue_cost,
+	     unsigned *masking_prologue_cost,
+	     unsigned *masking_body_cost)
 {
-  targetm.vectorize.finish_cost (data, prologue_cost, body_cost, epilogue_cost);
+  targetm.vectorize.finish_cost (data, prologue_cost, body_cost, epilogue_cost,
+				 masking_prologue_cost, masking_body_cost);
 }
 
 /* Alias targetm.vectorize.destroy_cost_data.  */

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH, vec-tails 04/10] Add masking cost
  2016-05-19 19:41 [PATCH, vec-tails 04/10] Add masking cost Ilya Enkovich
@ 2016-05-20  9:24 ` Richard Biener
  2016-05-20  9:44   ` Ilya Enkovich
  2016-06-16  6:17 ` Jeff Law
  1 sibling, 1 reply; 9+ messages in thread
From: Richard Biener @ 2016-05-20  9:24 UTC (permalink / raw)
  To: Ilya Enkovich; +Cc: GCC Patches

On Thu, May 19, 2016 at 9:40 PM, Ilya Enkovich <enkovich.gnu@gmail.com> wrote:
> Hi,
>
> This patch extends vectorizer cost model to include masking cost by
> adding new cost model locations and new target hook to compute
> masking cost.

Can you explain a bit why you add separate overall
masking_prologue/body_cost rather
than using the existing prologue/body cost for that?

I realize that the current vectorizer cost infrastructure is a big
mess, but isn't it possible
to achieve what you did with the current add_stmt_cost hook?  (by
inspecting stmt_info)

Richard.

> Thanks,
> Ilya
> --
> gcc/
>
> 2016-05-19  Ilya Enkovich  <ilya.enkovich@intel.com>
>
>         * config/i386/i386.c (ix86_init_cost): Extend costs array.
>         (ix86_add_stmt_masking_cost): New.
>         (ix86_finish_cost): Add masking_prologue_cost and masking_body_cost
>         args.
>         (TARGET_VECTORIZE_ADD_STMT_MASKING_COST): New.
>         * config/i386/i386.h (TARGET_INCREASE_MASK_STORE_COST): New.
>         * config/i386/x86-tune.def (X86_TUNE_INCREASE_MASK_STORE_COST): New.
>         * config/rs6000/rs6000.c (_rs6000_cost_data): Extend cost array.
>         (rs6000_init_cost): Initialize new cost elements.
>         (rs6000_finish_cost): Add masking_prologue_cost and masking_body_cost.
>         * config/spu/spu.c (spu_init_cost): Extend costs array.
>         (spu_finish_cost): Add masking_prologue_cost and masking_body_cost args.
>         * doc/tm.texi.in (TARGET_VECTORIZE_ADD_STMT_MASKING_COST): New.
>         * doc/tm.texi: Regenerated.
>         * target.def (add_stmt_masking_cost): New.
>         (finish_cost): Add masking_prologue_cost and masking_body_cost args.
>         * target.h (enum vect_cost_for_stmt): Add vector_mask_load and
>         vector_mask_store.
>         (enum vect_cost_model_location): Add vect_masking_prologue
>         and vect_masking_body.
>         * targhooks.c (default_builtin_vectorization_cost): Support
>         vector_mask_load and vector_mask_store.
>         (default_init_cost): Extend costs array.
>         (default_add_stmt_masking_cost): New.
>         (default_finish_cost): Add masking_prologue_cost and masking_body_cost
>         args.
>         * targhooks.h (default_add_stmt_masking_cost): New.
>         * tree-vect-loop.c (vect_estimate_min_profitable_iters): Adjust
>         finish_cost call.
>         * tree-vect-slp.c (vect_bb_vectorization_profitable_p): Likewise.
>         * tree-vectorizer.h (add_stmt_masking_cost): New.
>         (finish_cost): Add masking_prologue_cost and masking_body_cost args.
>
>
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index 9f62089..6c2c364 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -53932,8 +53932,12 @@ ix86_spill_class (reg_class_t rclass, machine_mode mode)
>  static void *
>  ix86_init_cost (struct loop *)
>  {
> -  unsigned *cost = XNEWVEC (unsigned, 3);
> -  cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
> +  unsigned *cost = XNEWVEC (unsigned, 5);
> +  cost[vect_prologue] = 0;
> +  cost[vect_body]     = 0;
> +  cost[vect_epilogue] = 0;
> +  cost[vect_masking_prologue] = 0;
> +  cost[vect_masking_body] = 0;
>    return cost;
>  }
>
> @@ -53974,16 +53978,56 @@ ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
>    return retval;
>  }
>
> +/* Implement targetm.vectorize.add_stmt_masking_cost.  */
> +
> +static unsigned
> +ix86_add_stmt_masking_cost (void *data, int count, enum vect_cost_for_stmt kind,
> +                           struct _stmt_vec_info *stmt_info, int misalign,
> +                           enum vect_cost_model_location where)
> +{
> +  bool embedded_masking = false;
> +  unsigned *cost = (unsigned *) data;
> +  unsigned retval = 0;
> +
> +  tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
> +  if (vectype)
> +    {
> +      machine_mode mode
> +       = ix86_get_mask_mode (TYPE_VECTOR_SUBPARTS (vectype),
> +                             tree_to_uhwi (TYPE_SIZE_UNIT (vectype)));
> +      embedded_masking = !VECTOR_MODE_P (mode);
> +    }
> +  else
> +    embedded_masking = TARGET_AVX512F;
> +
> +  if (embedded_masking || kind == vector_load)
> +    return retval;
> +
> +  if (kind == vector_store)
> +    return TARGET_INCREASE_MASK_STORE_COST ? 10 : 0;
> +
> +  int stmt_cost = ix86_builtin_vectorization_cost (vector_stmt, vectype, misalign);
> +  retval = (unsigned) (count * stmt_cost);
> +
> +  cost[where] += retval;
> +
> +  return retval;
> +}
> +
>  /* Implement targetm.vectorize.finish_cost.  */
>
>  static void
>  ix86_finish_cost (void *data, unsigned *prologue_cost,
> -                 unsigned *body_cost, unsigned *epilogue_cost)
> +                 unsigned *body_cost, unsigned *epilogue_cost,
> +                 unsigned *masking_prologue_cost,
> +                 unsigned *masking_body_cost)
>  {
>    unsigned *cost = (unsigned *) data;
>    *prologue_cost = cost[vect_prologue];
>    *body_cost     = cost[vect_body];
>    *epilogue_cost = cost[vect_epilogue];
> +  *masking_prologue_cost = cost[vect_masking_prologue];
> +  *masking_body_cost = cost[vect_masking_body];
>  }
>
>  /* Implement targetm.vectorize.destroy_cost_data.  */
> @@ -54964,6 +55008,8 @@ ix86_addr_space_zero_address_valid (addr_space_t as)
>  #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
>  #undef TARGET_VECTORIZE_ADD_STMT_COST
>  #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
> +#undef TARGET_VECTORIZE_ADD_STMT_MASKING_COST
> +#define TARGET_VECTORIZE_ADD_STMT_MASKING_COST ix86_add_stmt_masking_cost
>  #undef TARGET_VECTORIZE_FINISH_COST
>  #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
>  #undef TARGET_VECTORIZE_DESTROY_COST_DATA
> diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
> index d0b418b..b42cfa2 100644
> --- a/gcc/config/i386/i386.h
> +++ b/gcc/config/i386/i386.h
> @@ -501,6 +501,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
>         ix86_tune_features[X86_TUNE_AVOID_FALSE_DEP_FOR_BMI]
>  #define TARGET_ONE_IF_CONV_INSN \
>         ix86_tune_features[X86_TUNE_ONE_IF_CONV_INSN]
> +#define TARGET_INCREASE_MASK_STORE_COST \
> +       ix86_tune_features[X86_TUNE_INCREASE_MASK_STORE_COST]
>
>  /* Feature tests against the various architecture variations.  */
>  enum ix86_arch_indices {
> diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
> index 31a87b9..3bbcee8 100644
> --- a/gcc/config/i386/x86-tune.def
> +++ b/gcc/config/i386/x86-tune.def
> @@ -527,6 +527,11 @@ DEF_TUNE (X86_TUNE_AVOID_VECTOR_DECODE, "avoid_vector_decode",
>  DEF_TUNE (X86_TUNE_AVOID_FALSE_DEP_FOR_BMI, "avoid_false_dep_for_bmi",
>           m_SANDYBRIDGE | m_HASWELL | m_GENERIC)
>
> +/* X86_TUNE_INCREASE_MASK_STORE_COST: Increase coast of masked store for
> +   some platforms.  */
> +DEF_TUNE (X86_TUNE_INCREASE_MASK_STORE_COST, "increase_mask_store_cost",
> +         m_HASWELL | m_BDVER4 | m_ZNVER1)
> +
>  /*****************************************************************************/
>  /* This never worked well before.                                            */
>  /*****************************************************************************/
> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
> index 0f70bb9..295deaf 100644
> --- a/gcc/config/rs6000/rs6000.c
> +++ b/gcc/config/rs6000/rs6000.c
> @@ -5207,7 +5207,7 @@ rs6000_preferred_simd_mode (machine_mode mode)
>  typedef struct _rs6000_cost_data
>  {
>    struct loop *loop_info;
> -  unsigned cost[3];
> +  unsigned cost[5];
>  } rs6000_cost_data;
>
>  /* Test for likely overcommitment of vector hardware resources.  If a
> @@ -5269,6 +5269,8 @@ rs6000_init_cost (struct loop *loop_info)
>    data->cost[vect_prologue] = 0;
>    data->cost[vect_body]     = 0;
>    data->cost[vect_epilogue] = 0;
> +  data->cost[vect_masking_prologue] = 0;
> +  data->cost[vect_masking_body] = 0;
>    return data;
>  }
>
> @@ -5304,7 +5306,9 @@ rs6000_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
>
>  static void
>  rs6000_finish_cost (void *data, unsigned *prologue_cost,
> -                   unsigned *body_cost, unsigned *epilogue_cost)
> +                   unsigned *body_cost, unsigned *epilogue_cost,
> +                   unsigned *masking_prologue_cost,
> +                   unsigned *masking_body_cost)
>  {
>    rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
>
> @@ -5314,6 +5318,8 @@ rs6000_finish_cost (void *data, unsigned *prologue_cost,
>    *prologue_cost = cost_data->cost[vect_prologue];
>    *body_cost     = cost_data->cost[vect_body];
>    *epilogue_cost = cost_data->cost[vect_epilogue];
> +  *masking_prologue_cost = cost_data->cost[vect_masking_prologue];
> +  *masking_body_cost = cost_data->cost[vect_masking_body];
>  }
>
>  /* Implement targetm.vectorize.destroy_cost_data.  */
> diff --git a/gcc/config/spu/spu.c b/gcc/config/spu/spu.c
> index c3757eb..60d6e6b 100644
> --- a/gcc/config/spu/spu.c
> +++ b/gcc/config/spu/spu.c
> @@ -6630,8 +6630,12 @@ spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
>  static void *
>  spu_init_cost (struct loop *loop_info ATTRIBUTE_UNUSED)
>  {
> -  unsigned *cost = XNEWVEC (unsigned, 3);
> -  cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
> +  unsigned *cost = XNEWVEC (unsigned, 5);
> +  cost[vect_prologue] = 0;
> +  cost[vect_body]     = 0;
> +  cost[vect_epilogue] = 0;
> +  cost[vect_masking_prologue] = 0;
> +  cost[vect_masking_body] = 0;
>    return cost;
>  }
>
> @@ -6667,12 +6671,16 @@ spu_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
>
>  static void
>  spu_finish_cost (void *data, unsigned *prologue_cost,
> -                unsigned *body_cost, unsigned *epilogue_cost)
> +                unsigned *body_cost, unsigned *epilogue_cost,
> +                unsigned *masking_prologue_cost,
> +                unsigned *masking_body_cost)
>  {
>    unsigned *cost = (unsigned *) data;
>    *prologue_cost = cost[vect_prologue];
>    *body_cost     = cost[vect_body];
>    *epilogue_cost = cost[vect_epilogue];
> +  *masking_prologue_cost = cost[vect_masking_prologue];
> +  *masking_body_cost = cost[vect_masking_body];
>  }
>
>  /* Implement targetm.vectorize.destroy_cost_data.  */
> diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
> index 057ac9a..5d23910 100644
> --- a/gcc/doc/tm.texi
> +++ b/gcc/doc/tm.texi
> @@ -5720,8 +5720,12 @@ This hook should initialize target-specific data structures in preparation for m
>  This hook should update the target-specific @var{data} in response to adding @var{count} copies of the given @var{kind} of statement to a loop or basic block.  The default adds the builtin vectorizer cost for the copies of the statement to the accumulator specified by @var{where}, (the prologue, body, or epilogue) and returns the amount added.  The return value should be viewed as a tentative cost that may later be revised.
>  @end deftypefn
>
> -@deftypefn {Target Hook} void TARGET_VECTORIZE_FINISH_COST (void *@var{data}, unsigned *@var{prologue_cost}, unsigned *@var{body_cost}, unsigned *@var{epilogue_cost})
> -This hook should complete calculations of the cost of vectorizing a loop or basic block based on @var{data}, and return the prologue, body, and epilogue costs as unsigned integers.  The default returns the value of the three accumulators.
> +@deftypefn {Target Hook} unsigned TARGET_VECTORIZE_ADD_STMT_MASKING_COST (void *@var{data}, int @var{count}, enum vect_cost_for_stmt @var{kind}, struct _stmt_vec_info *@var{stmt_info}, int @var{misalign}, enum vect_cost_model_location @var{where})
> +This hook has arguments similar to @code{TARGET_VECTORIZE_ADD_STMT_COST} but adds cost of statement masking.
> +@end deftypefn
> +
> +@deftypefn {Target Hook} void TARGET_VECTORIZE_FINISH_COST (void *@var{data}, unsigned *@var{prologue_cost}, unsigned *@var{body_cost}, unsigned *@var{epilogue_cost}, unsigned *@var{masking_prologue_cost}, unsigned *@var{masking_body_cost})
> +This hook should complete calculations of the cost of vectorizing a loop or basic block based on @var{data}, and return the prologue, body, epilogue, masking prologue and masking body costs as unsigned integers.  The default returns the value of the five accumulators.
>  @end deftypefn
>
>  @deftypefn {Target Hook} void TARGET_VECTORIZE_DESTROY_COST_DATA (void *@var{data})
> diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
> index 643f0eb..2e92b47 100644
> --- a/gcc/doc/tm.texi.in
> +++ b/gcc/doc/tm.texi.in
> @@ -4249,6 +4249,8 @@ address;  but often a machine-dependent strategy can generate better code.
>
>  @hook TARGET_VECTORIZE_ADD_STMT_COST
>
> +@hook TARGET_VECTORIZE_ADD_STMT_MASKING_COST
> +
>  @hook TARGET_VECTORIZE_FINISH_COST
>
>  @hook TARGET_VECTORIZE_DESTROY_COST_DATA
> diff --git a/gcc/target.def b/gcc/target.def
> index 20f2b32..c1c6705 100644
> --- a/gcc/target.def
> +++ b/gcc/target.def
> @@ -1898,17 +1898,29 @@ DEFHOOK
>    enum vect_cost_model_location where),
>   default_add_stmt_cost)
>
> +/* Similar to add_stmt_cost but records cost of statemnent masking.  */
> +DEFHOOK
> +(add_stmt_masking_cost,
> + "This hook has arguments similar to @code{TARGET_VECTORIZE_ADD_STMT_COST} "
> + "but adds cost of statement masking.",
> + unsigned,
> + (void *data, int count, enum vect_cost_for_stmt kind,
> +  struct _stmt_vec_info *stmt_info, int misalign,
> +  enum vect_cost_model_location where),
> + default_add_stmt_masking_cost)
> +
>  /* Target function to calculate the total cost of the current vectorized
>     loop or block.  */
>  DEFHOOK
>  (finish_cost,
>   "This hook should complete calculations of the cost of vectorizing a loop "
> - "or basic block based on @var{data}, and return the prologue, body, and "
> - "epilogue costs as unsigned integers.  The default returns the value of "
> - "the three accumulators.",
> + "or basic block based on @var{data}, and return the prologue, body, epilogue, "
> + "masking prologue and masking body costs as unsigned integers.  The default "
> + "returns the value of the five accumulators.",
>   void,
>   (void *data, unsigned *prologue_cost, unsigned *body_cost,
> -  unsigned *epilogue_cost),
> +  unsigned *epilogue_cost, unsigned *masking_prologue_cost,
> +  unsigned *masking_body_cost),
>   default_finish_cost)
>
>  /* Function to delete target-specific cost modeling data.  */
> diff --git a/gcc/target.h b/gcc/target.h
> index 43022bd..17e3803 100644
> --- a/gcc/target.h
> +++ b/gcc/target.h
> @@ -170,7 +170,9 @@ enum vect_cost_for_stmt
>    cond_branch_taken,
>    vec_perm,
>    vec_promote_demote,
> -  vec_construct
> +  vec_construct,
> +  vector_mask_load,
> +  vector_mask_store
>  };
>
>  /* Separate locations for which the vectorizer cost model should
> @@ -178,7 +180,9 @@ enum vect_cost_for_stmt
>  enum vect_cost_model_location {
>    vect_prologue = 0,
>    vect_body = 1,
> -  vect_epilogue = 2
> +  vect_epilogue = 2,
> +  vect_masking_prologue = 3,
> +  vect_masking_body = 4
>  };
>
>  /* The target structure.  This holds all the backend hooks.  */
> diff --git a/gcc/targhooks.c b/gcc/targhooks.c
> index 6b4601b..072345d 100644
> --- a/gcc/targhooks.c
> +++ b/gcc/targhooks.c
> @@ -579,6 +579,8 @@ default_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
>        case cond_branch_not_taken:
>        case vec_perm:
>        case vec_promote_demote:
> +      case vector_mask_load:
> +      case vector_mask_store:
>          return 1;
>
>        case unaligned_load:
> @@ -1115,8 +1117,12 @@ default_get_mask_mode (unsigned nunits, unsigned vector_size)
>  void *
>  default_init_cost (struct loop *loop_info ATTRIBUTE_UNUSED)
>  {
> -  unsigned *cost = XNEWVEC (unsigned, 3);
> -  cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
> +  unsigned *cost = XNEWVEC (unsigned, 5);
> +  cost[vect_prologue] = 0;
> +  cost[vect_body]     = 0;
> +  cost[vect_epilogue] = 0;
> +  cost[vect_masking_prologue] = 0;
> +  cost[vect_masking_body] = 0;
>    return cost;
>  }
>
> @@ -1147,16 +1153,48 @@ default_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
>    return retval;
>  }
>
> +/* By default, the cost model assume we use VEC_COND_EXPR to mask
> +   statement result.  For memory accesses we need to adjust used mask
> +   in case aceess is already masked.  */
> +
> +unsigned
> +default_add_stmt_masking_cost (void *data, int count,
> +                              enum vect_cost_for_stmt kind,
> +                              struct _stmt_vec_info *stmt_info,
> +                              int misalign,
> +                              enum vect_cost_model_location where)
> +{
> +  unsigned *cost = (unsigned *) data;
> +  unsigned retval = 0;
> +
> +  if (kind == vector_load || kind == vector_store)
> +    return retval;
> +
> +  tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
> +  int stmt_cost = targetm.vectorize.builtin_vectorization_cost (vector_stmt,
> +                                                               vectype,
> +                                                               misalign);
> +
> +  retval = (unsigned) (count * stmt_cost);
> +  cost[where] += retval;
> +
> +  return retval;
> +}
> +
>  /* By default, the cost model just returns the accumulated costs.  */
>
>  void
>  default_finish_cost (void *data, unsigned *prologue_cost,
> -                    unsigned *body_cost, unsigned *epilogue_cost)
> +                    unsigned *body_cost, unsigned *epilogue_cost,
> +                    unsigned *masking_prologue_cost,
> +                    unsigned *masking_body_cost)
>  {
>    unsigned *cost = (unsigned *) data;
>    *prologue_cost = cost[vect_prologue];
>    *body_cost     = cost[vect_body];
>    *epilogue_cost = cost[vect_epilogue];
> +  *masking_prologue_cost = cost[vect_masking_prologue];
> +  *masking_body_cost = cost[vect_masking_body];
>  }
>
>  /* Free the cost data.  */
> diff --git a/gcc/targhooks.h b/gcc/targhooks.h
> index 7687c39..5a1c749 100644
> --- a/gcc/targhooks.h
> +++ b/gcc/targhooks.h
> @@ -106,6 +106,10 @@ extern void *default_init_cost (struct loop *);
>  extern unsigned default_add_stmt_cost (void *, int, enum vect_cost_for_stmt,
>                                        struct _stmt_vec_info *, int,
>                                        enum vect_cost_model_location);
> +extern unsigned default_add_stmt_masking_cost (void *, int,
> +                                              enum vect_cost_for_stmt,
> +                                              struct _stmt_vec_info *, int,
> +                                              enum vect_cost_model_location);
>  extern void default_finish_cost (void *, unsigned *, unsigned *, unsigned *);
>  extern void default_destroy_cost_data (void *);
>
> diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
> index a537ef4..e25a0ce 100644
> --- a/gcc/tree-vect-loop.c
> +++ b/gcc/tree-vect-loop.c
> @@ -3121,6 +3121,8 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
>    int vec_outside_cost = 0;
>    unsigned vec_prologue_cost = 0;
>    unsigned vec_epilogue_cost = 0;
> +  unsigned masking_prologue_cost = 0;
> +  unsigned masking_inside_cost = 0;
>    int scalar_single_iter_cost = 0;
>    int scalar_outside_cost = 0;
>    int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
> @@ -3339,7 +3341,8 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
>
>    /* Complete the target-specific cost calculations.  */
>    finish_cost (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo), &vec_prologue_cost,
> -              &vec_inside_cost, &vec_epilogue_cost);
> +              &vec_inside_cost, &vec_epilogue_cost, &masking_prologue_cost,
> +              &masking_inside_cost);
>
>    vec_outside_cost = (int)(vec_prologue_cost + vec_epilogue_cost);
>
> diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
> index d713848..09480ea 100644
> --- a/gcc/tree-vect-slp.c
> +++ b/gcc/tree-vect-slp.c
> @@ -2454,6 +2454,7 @@ vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
>    int i;
>    unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
>    unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
> +  unsigned int masking_prologue_cost = 0, masking_inside_cost = 0;
>
>    /* Calculate scalar cost.  */
>    FOR_EACH_VEC_ELT (slp_instances, i, instance)
> @@ -2472,7 +2473,8 @@ vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
>
>    /* Complete the target-specific cost calculation.  */
>    finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
> -              &vec_inside_cost, &vec_epilogue_cost);
> +              &vec_inside_cost, &vec_epilogue_cost, &masking_prologue_cost,
> +              &masking_inside_cost);
>
>    vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
>
> diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> index b269752..d3450b6 100644
> --- a/gcc/tree-vectorizer.h
> +++ b/gcc/tree-vectorizer.h
> @@ -897,13 +897,27 @@ add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
>                                           stmt_info, misalign, where);
>  }
>
> +/* Alias targetm.vectorize.add_stmt_masking_cost.  */
> +
> +static inline unsigned
> +add_stmt_masking_cost (void *data, int count, enum vect_cost_for_stmt kind,
> +                      stmt_vec_info stmt_info, int misalign,
> +                      enum vect_cost_model_location where)
> +{
> +  return targetm.vectorize.add_stmt_masking_cost (data, count, kind,
> +                                                 stmt_info, misalign, where);
> +}
> +
>  /* Alias targetm.vectorize.finish_cost.  */
>
>  static inline void
>  finish_cost (void *data, unsigned *prologue_cost,
> -            unsigned *body_cost, unsigned *epilogue_cost)
> +            unsigned *body_cost, unsigned *epilogue_cost,
> +            unsigned *masking_prologue_cost,
> +            unsigned *masking_body_cost)
>  {
> -  targetm.vectorize.finish_cost (data, prologue_cost, body_cost, epilogue_cost);
> +  targetm.vectorize.finish_cost (data, prologue_cost, body_cost, epilogue_cost,
> +                                masking_prologue_cost, masking_body_cost);
>  }
>
>  /* Alias targetm.vectorize.destroy_cost_data.  */

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH, vec-tails 04/10] Add masking cost
  2016-05-20  9:24 ` Richard Biener
@ 2016-05-20  9:44   ` Ilya Enkovich
  2016-05-20 11:15     ` Richard Biener
  0 siblings, 1 reply; 9+ messages in thread
From: Ilya Enkovich @ 2016-05-20  9:44 UTC (permalink / raw)
  To: Richard Biener; +Cc: GCC Patches

2016-05-20 12:24 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
> On Thu, May 19, 2016 at 9:40 PM, Ilya Enkovich <enkovich.gnu@gmail.com> wrote:
>> Hi,
>>
>> This patch extends vectorizer cost model to include masking cost by
>> adding new cost model locations and new target hook to compute
>> masking cost.
>
> Can you explain a bit why you add separate overall
> masking_prologue/body_cost rather
> than using the existing prologue/body cost for that?

When I make a decision I need vector loop cost without masking (what
we currently
have) and with masking (what I add).  This allows me to compute
profitability for
all options (scalar epilogue, combined epilogue, masked epilogue) and choose one
of them.  Using existing prologue/body cost would allow me compute masking
profitability with no fall back to scalar loop profitability.


>
> I realize that the current vectorizer cost infrastructure is a big
> mess, but isn't it possible
> to achieve what you did with the current add_stmt_cost hook?  (by
> inspecting stmt_info)

Cost of a statement and cost of masking a statement are different things.
Two hooks called for the same statement return different values. I can
add vect_cost_for_stmt enum elements to cover masking but I thought
having stmt_masking_cost would me more clear.

Thanks,
Ilya

>
> Richard.
>
>> Thanks,
>> Ilya

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH, vec-tails 04/10] Add masking cost
  2016-05-20  9:44   ` Ilya Enkovich
@ 2016-05-20 11:15     ` Richard Biener
  2016-05-20 11:32       ` Ilya Enkovich
  0 siblings, 1 reply; 9+ messages in thread
From: Richard Biener @ 2016-05-20 11:15 UTC (permalink / raw)
  To: Ilya Enkovich; +Cc: GCC Patches

On Fri, May 20, 2016 at 11:44 AM, Ilya Enkovich <enkovich.gnu@gmail.com> wrote:
> 2016-05-20 12:24 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>> On Thu, May 19, 2016 at 9:40 PM, Ilya Enkovich <enkovich.gnu@gmail.com> wrote:
>>> Hi,
>>>
>>> This patch extends vectorizer cost model to include masking cost by
>>> adding new cost model locations and new target hook to compute
>>> masking cost.
>>
>> Can you explain a bit why you add separate overall
>> masking_prologue/body_cost rather
>> than using the existing prologue/body cost for that?
>
> When I make a decision I need vector loop cost without masking (what
> we currently
> have) and with masking (what I add).  This allows me to compute
> profitability for
> all options (scalar epilogue, combined epilogue, masked epilogue) and choose one
> of them.  Using existing prologue/body cost would allow me compute masking
> profitability with no fall back to scalar loop profitability.

Yes, but for this kind of purpose you could simply re-start
separate costing via the init_cost hook?

>> I realize that the current vectorizer cost infrastructure is a big
>> mess, but isn't it possible
>> to achieve what you did with the current add_stmt_cost hook?  (by
>> inspecting stmt_info)
>
> Cost of a statement and cost of masking a statement are different things.
> Two hooks called for the same statement return different values. I can
> add vect_cost_for_stmt enum elements to cover masking but I thought
> having stmt_masking_cost would me more clear.

I agree we need some kind of overloading and I'm not against a separate hook
for this.  On a related note what is "masking cost" here?  I could imagine
that masking doesn't unconditionally add a cost to a stmt but its execution
cost may now depend on whether an element is masked or not.

Does the hook return the cost of the masked stmt or the cost of masking
the stmt only (so you need to do add_stmt_cost as well on the same stmt)?

Thanks,
Richard.

> Thanks,
> Ilya
>
>>
>> Richard.
>>
>>> Thanks,
>>> Ilya

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH, vec-tails 04/10] Add masking cost
  2016-05-20 11:15     ` Richard Biener
@ 2016-05-20 11:32       ` Ilya Enkovich
  2016-06-16  6:06         ` Jeff Law
  0 siblings, 1 reply; 9+ messages in thread
From: Ilya Enkovich @ 2016-05-20 11:32 UTC (permalink / raw)
  To: Richard Biener; +Cc: GCC Patches

2016-05-20 14:15 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
> On Fri, May 20, 2016 at 11:44 AM, Ilya Enkovich <enkovich.gnu@gmail.com> wrote:
>> 2016-05-20 12:24 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>>> On Thu, May 19, 2016 at 9:40 PM, Ilya Enkovich <enkovich.gnu@gmail.com> wrote:
>>>> Hi,
>>>>
>>>> This patch extends vectorizer cost model to include masking cost by
>>>> adding new cost model locations and new target hook to compute
>>>> masking cost.
>>>
>>> Can you explain a bit why you add separate overall
>>> masking_prologue/body_cost rather
>>> than using the existing prologue/body cost for that?
>>
>> When I make a decision I need vector loop cost without masking (what
>> we currently
>> have) and with masking (what I add).  This allows me to compute
>> profitability for
>> all options (scalar epilogue, combined epilogue, masked epilogue) and choose one
>> of them.  Using existing prologue/body cost would allow me compute masking
>> profitability with no fall back to scalar loop profitability.
>
> Yes, but for this kind of purpose you could simply re-start
> separate costing via the init_cost hook?

But that would require double scan through loop statements + double
profitability
estimations.  I compute masking cost during statements analysis
(see patch #05) in parallel with regular costs computations.  Note that masking
costs is a cost of masking only.  Thus cost of a vector masked iteration is
body cost + body masking cost.

>
>>> I realize that the current vectorizer cost infrastructure is a big
>>> mess, but isn't it possible
>>> to achieve what you did with the current add_stmt_cost hook?  (by
>>> inspecting stmt_info)
>>
>> Cost of a statement and cost of masking a statement are different things.
>> Two hooks called for the same statement return different values. I can
>> add vect_cost_for_stmt enum elements to cover masking but I thought
>> having stmt_masking_cost would me more clear.
>
> I agree we need some kind of overloading and I'm not against a separate hook
> for this.  On a related note what is "masking cost" here?  I could imagine
> that masking doesn't unconditionally add a cost to a stmt but its execution
> cost may now depend on whether an element is masked or not.
>
> Does the hook return the cost of the masked stmt or the cost of masking
> the stmt only (so you need to do add_stmt_cost as well on the same stmt)?

It returns the cost of masking the statement only.  Thus if a hardware has
no penalty for executing masked instruction then return value should be 0.

Thanks,
Ilya

>
> Thanks,
> Richard.
>
>> Thanks,
>> Ilya
>>
>>>
>>> Richard.
>>>
>>>> Thanks,
>>>> Ilya

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH, vec-tails 04/10] Add masking cost
  2016-05-20 11:32       ` Ilya Enkovich
@ 2016-06-16  6:06         ` Jeff Law
  0 siblings, 0 replies; 9+ messages in thread
From: Jeff Law @ 2016-06-16  6:06 UTC (permalink / raw)
  To: Ilya Enkovich, Richard Biener; +Cc: GCC Patches

On 05/20/2016 05:32 AM, Ilya Enkovich wrote:
> 2016-05-20 14:15 GMT+03:00 Richard Biener
> <richard.guenther@gmail.com>:
>> On Fri, May 20, 2016 at 11:44 AM, Ilya Enkovich
>> <enkovich.gnu@gmail.com> wrote:
>>> 2016-05-20 12:24 GMT+03:00 Richard Biener
>>> <richard.guenther@gmail.com>:
>>>> On Thu, May 19, 2016 at 9:40 PM, Ilya Enkovich
>>>> <enkovich.gnu@gmail.com> wrote:
>>>>> Hi,
>>>>>
>>>>> This patch extends vectorizer cost model to include masking
>>>>> cost by adding new cost model locations and new target hook
>>>>> to compute masking cost.
>>>>
>>>> Can you explain a bit why you add separate overall
>>>> masking_prologue/body_cost rather than using the existing
>>>> prologue/body cost for that?
>>>
>>> When I make a decision I need vector loop cost without masking
>>> (what we currently have) and with masking (what I add).  This
>>> allows me to compute profitability for all options (scalar
>>> epilogue, combined epilogue, masked epilogue) and choose one of
>>> them.  Using existing prologue/body cost would allow me compute
>>> masking profitability with no fall back to scalar loop
>>> profitability.
>>
>> Yes, but for this kind of purpose you could simply re-start
>> separate costing via the init_cost hook?
>
> But that would require double scan through loop statements + double
> profitability estimations.  I compute masking cost during statements
> analysis (see patch #05) in parallel with regular costs computations.
> Note that masking costs is a cost of masking only.  Thus cost of a
> vector masked iteration is body cost + body masking cost.
Unless there's some inherent reason not to, I prefer a single scan 
through the loop to compute the costs.  Presumably the cost of the 
epilogue is not derived from the cost of the loop, so we ought to be 
able to build the costs via  single scan.


>
>>
>>>> I realize that the current vectorizer cost infrastructure is a
>>>> big mess, but isn't it possible to achieve what you did with
>>>> the current add_stmt_cost hook?  (by inspecting stmt_info)
>>>
>>> Cost of a statement and cost of masking a statement are different
>>> things. Two hooks called for the same statement return different
>>> values. I can add vect_cost_for_stmt enum elements to cover
>>> masking but I thought having stmt_masking_cost would me more
>>> clear.
>>
>> I agree we need some kind of overloading and I'm not against a
>> separate hook for this.  On a related note what is "masking cost"
>> here?  I could imagine that masking doesn't unconditionally add a
>> cost to a stmt but its execution cost may now depend on whether an
>> element is masked or not.
>>
>> Does the hook return the cost of the masked stmt or the cost of
>> masking the stmt only (so you need to do add_stmt_cost as well on
>> the same stmt)?
>
> It returns the cost of masking the statement only.  Thus if a
> hardware has no penalty for executing masked instruction then return
> value should be 0.
Probably worth clarifying in the docs/code.

Presumably if there's some kind of micro-architectural cost based on 
prior statements, their masking state, dependencies, etc we'd need a 
more robust API for computing this cost.  But that's probably 
over-engineering at this point.

Jeff

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH, vec-tails 04/10] Add masking cost
  2016-05-19 19:41 [PATCH, vec-tails 04/10] Add masking cost Ilya Enkovich
  2016-05-20  9:24 ` Richard Biener
@ 2016-06-16  6:17 ` Jeff Law
  2016-06-22 14:16   ` Ilya Enkovich
  1 sibling, 1 reply; 9+ messages in thread
From: Jeff Law @ 2016-06-16  6:17 UTC (permalink / raw)
  To: Ilya Enkovich, gcc-patches

On 05/19/2016 01:40 PM, Ilya Enkovich wrote:
> Hi,
>
> This patch extends vectorizer cost model to include masking cost by
> adding new cost model locations and new target hook to compute
> masking cost.
>
> Thanks,
> Ilya
> --
> gcc/
>
> 2016-05-19  Ilya Enkovich  <ilya.enkovich@intel.com>
>
> 	* config/i386/i386.c (ix86_init_cost): Extend costs array.
> 	(ix86_add_stmt_masking_cost): New.
> 	(ix86_finish_cost): Add masking_prologue_cost and masking_body_cost
> 	args.
> 	(TARGET_VECTORIZE_ADD_STMT_MASKING_COST): New.
> 	* config/i386/i386.h (TARGET_INCREASE_MASK_STORE_COST): New.
> 	* config/i386/x86-tune.def (X86_TUNE_INCREASE_MASK_STORE_COST): New.
> 	* config/rs6000/rs6000.c (_rs6000_cost_data): Extend cost array.
> 	(rs6000_init_cost): Initialize new cost elements.
> 	(rs6000_finish_cost): Add masking_prologue_cost and masking_body_cost.
> 	* config/spu/spu.c (spu_init_cost): Extend costs array.
> 	(spu_finish_cost): Add masking_prologue_cost and masking_body_cost args.
> 	* doc/tm.texi.in (TARGET_VECTORIZE_ADD_STMT_MASKING_COST): New.
> 	* doc/tm.texi: Regenerated.
> 	* target.def (add_stmt_masking_cost): New.
> 	(finish_cost): Add masking_prologue_cost and masking_body_cost args.
> 	* target.h (enum vect_cost_for_stmt): Add vector_mask_load and
> 	vector_mask_store.
> 	(enum vect_cost_model_location): Add vect_masking_prologue
> 	and vect_masking_body.
> 	* targhooks.c (default_builtin_vectorization_cost): Support
> 	vector_mask_load and vector_mask_store.
> 	(default_init_cost): Extend costs array.
> 	(default_add_stmt_masking_cost): New.
> 	(default_finish_cost): Add masking_prologue_cost and masking_body_cost
> 	args.
> 	* targhooks.h (default_add_stmt_masking_cost): New.
> 	* tree-vect-loop.c (vect_estimate_min_profitable_iters): Adjust
> 	finish_cost call.
> 	* tree-vect-slp.c (vect_bb_vectorization_profitable_p): Likewise.
> 	* tree-vectorizer.h (add_stmt_masking_cost): New.
> 	(finish_cost): Add masking_prologue_cost and masking_body_cost args.
>
>
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index 9f62089..6c2c364 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -53932,8 +53932,12 @@ ix86_spill_class (reg_class_t rclass, machine_mode mode)
>  static void *
>  ix86_init_cost (struct loop *)
>  {
> -  unsigned *cost = XNEWVEC (unsigned, 3);
> -  cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
> +  unsigned *cost = XNEWVEC (unsigned, 5);
> +  cost[vect_prologue] = 0;
> +  cost[vect_body]     = 0;
> +  cost[vect_epilogue] = 0;
> +  cost[vect_masking_prologue] = 0;
> +  cost[vect_masking_body] = 0;
>    return cost;
Trivial nit -- no need or desire to use whitespace to line up the 
initializers.   It looks like others may have done this in the 
duplicated instances of finish_cost. But we shouldn't propagate that 
mistake into the init_cost hooks ;-)


@@ -1115,8 +1117,12 @@ default_get_mask_mode (unsigned nunits, unsigned 
vector_size)
>  void *
>  default_init_cost (struct loop *loop_info ATTRIBUTE_UNUSED)
>  {
> -  unsigned *cost = XNEWVEC (unsigned, 3);
> -  cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
> +  unsigned *cost = XNEWVEC (unsigned, 5);
> +  cost[vect_prologue] = 0;
> +  cost[vect_body]     = 0;
> +  cost[vect_epilogue] = 0;
> +  cost[vect_masking_prologue] = 0;
> +  cost[vect_masking_body] = 0;
>    return cost;
Here too.  There's others.  I won't point them all out.  Please double 
check for this nit in any added code.  You don't have to go back and fix 
existing problems of this nature.

I don't see anything here I really object to -- Richi and I may disagree 
on the compute-costs once in a single scan vs restarting the scan.  If 
Richi feels strongly about restarting for some reason, I'll defer to him 
-- he's done more work in the vectorizer than myself.

I'd suggest taking another stab at the docs for the hooks based on 
Richi's question about whether or not the hook returns the cost of hte 
masked statement or the cost of masking the statement.

jeff

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH, vec-tails 04/10] Add masking cost
  2016-06-16  6:17 ` Jeff Law
@ 2016-06-22 14:16   ` Ilya Enkovich
  2016-07-11 13:38     ` Ilya Enkovich
  0 siblings, 1 reply; 9+ messages in thread
From: Ilya Enkovich @ 2016-06-22 14:16 UTC (permalink / raw)
  To: Jeff Law; +Cc: gcc-patches

On 16 Jun 00:16, Jeff Law wrote:
> On 05/19/2016 01:40 PM, Ilya Enkovich wrote:
> >Hi,
> >
> >This patch extends vectorizer cost model to include masking cost by
> >adding new cost model locations and new target hook to compute
> >masking cost.
> >
> >Thanks,
> >Ilya
> >--
> >gcc/
> >
> >2016-05-19  Ilya Enkovich  <ilya.enkovich@intel.com>
> >
> >	* config/i386/i386.c (ix86_init_cost): Extend costs array.
> >	(ix86_add_stmt_masking_cost): New.
> >	(ix86_finish_cost): Add masking_prologue_cost and masking_body_cost
> >	args.
> >	(TARGET_VECTORIZE_ADD_STMT_MASKING_COST): New.
> >	* config/i386/i386.h (TARGET_INCREASE_MASK_STORE_COST): New.
> >	* config/i386/x86-tune.def (X86_TUNE_INCREASE_MASK_STORE_COST): New.
> >	* config/rs6000/rs6000.c (_rs6000_cost_data): Extend cost array.
> >	(rs6000_init_cost): Initialize new cost elements.
> >	(rs6000_finish_cost): Add masking_prologue_cost and masking_body_cost.
> >	* config/spu/spu.c (spu_init_cost): Extend costs array.
> >	(spu_finish_cost): Add masking_prologue_cost and masking_body_cost args.
> >	* doc/tm.texi.in (TARGET_VECTORIZE_ADD_STMT_MASKING_COST): New.
> >	* doc/tm.texi: Regenerated.
> >	* target.def (add_stmt_masking_cost): New.
> >	(finish_cost): Add masking_prologue_cost and masking_body_cost args.
> >	* target.h (enum vect_cost_for_stmt): Add vector_mask_load and
> >	vector_mask_store.
> >	(enum vect_cost_model_location): Add vect_masking_prologue
> >	and vect_masking_body.
> >	* targhooks.c (default_builtin_vectorization_cost): Support
> >	vector_mask_load and vector_mask_store.
> >	(default_init_cost): Extend costs array.
> >	(default_add_stmt_masking_cost): New.
> >	(default_finish_cost): Add masking_prologue_cost and masking_body_cost
> >	args.
> >	* targhooks.h (default_add_stmt_masking_cost): New.
> >	* tree-vect-loop.c (vect_estimate_min_profitable_iters): Adjust
> >	finish_cost call.
> >	* tree-vect-slp.c (vect_bb_vectorization_profitable_p): Likewise.
> >	* tree-vectorizer.h (add_stmt_masking_cost): New.
> >	(finish_cost): Add masking_prologue_cost and masking_body_cost args.
> >
> >
> >diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> >index 9f62089..6c2c364 100644
> >--- a/gcc/config/i386/i386.c
> >+++ b/gcc/config/i386/i386.c
> >@@ -53932,8 +53932,12 @@ ix86_spill_class (reg_class_t rclass, machine_mode mode)
> > static void *
> > ix86_init_cost (struct loop *)
> > {
> >-  unsigned *cost = XNEWVEC (unsigned, 3);
> >-  cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
> >+  unsigned *cost = XNEWVEC (unsigned, 5);
> >+  cost[vect_prologue] = 0;
> >+  cost[vect_body]     = 0;
> >+  cost[vect_epilogue] = 0;
> >+  cost[vect_masking_prologue] = 0;
> >+  cost[vect_masking_body] = 0;
> >   return cost;
> Trivial nit -- no need or desire to use whitespace to line up the
> initializers.   It looks like others may have done this in the duplicated
> instances of finish_cost. But we shouldn't propagate that mistake into the
> init_cost hooks ;-)
> 
> 
> @@ -1115,8 +1117,12 @@ default_get_mask_mode (unsigned nunits, unsigned
> vector_size)
> > void *
> > default_init_cost (struct loop *loop_info ATTRIBUTE_UNUSED)
> > {
> >-  unsigned *cost = XNEWVEC (unsigned, 3);
> >-  cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
> >+  unsigned *cost = XNEWVEC (unsigned, 5);
> >+  cost[vect_prologue] = 0;
> >+  cost[vect_body]     = 0;
> >+  cost[vect_epilogue] = 0;
> >+  cost[vect_masking_prologue] = 0;
> >+  cost[vect_masking_body] = 0;
> >   return cost;
> Here too.  There's others.  I won't point them all out.  Please double check
> for this nit in any added code.  You don't have to go back and fix existing
> problems of this nature.
> 
> I don't see anything here I really object to -- Richi and I may disagree on
> the compute-costs once in a single scan vs restarting the scan.  If Richi
> feels strongly about restarting for some reason, I'll defer to him -- he's
> done more work in the vectorizer than myself.
> 
> I'd suggest taking another stab at the docs for the hooks based on Richi's
> question about whether or not the hook returns the cost of hte masked
> statement or the cost of masking the statement.
> 
> jeff

Thanks for review.  Here is an updated version with initializers and
documentation fixed.

Thanks,
Ilya
--
gcc/

2016-05-22  Ilya Enkovich  <ilya.enkovich@intel.com>

	* config/i386/i386.c (ix86_init_cost): Extend costs array.
	(ix86_add_stmt_masking_cost): New.
	(ix86_finish_cost): Add masking_prologue_cost and masking_body_cost
	args.
	(TARGET_VECTORIZE_ADD_STMT_MASKING_COST): New.
	* config/i386/i386.h (TARGET_INCREASE_MASK_STORE_COST): New.
	* config/i386/x86-tune.def (X86_TUNE_INCREASE_MASK_STORE_COST): New.
	* config/rs6000/rs6000.c (_rs6000_cost_data): Extend cost array.
	(rs6000_init_cost): Initialize new cost elements.
	(rs6000_finish_cost): Add masking_prologue_cost and masking_body_cost.
	* config/spu/spu.c (spu_init_cost): Extend costs array.
	(spu_finish_cost): Add masking_prologue_cost and masking_body_cost args.
	* doc/tm.texi.in (TARGET_VECTORIZE_ADD_STMT_MASKING_COST): New.
	* doc/tm.texi: Regenerated.
	* target.def (add_stmt_masking_cost): New.
	(finish_cost): Add masking_prologue_cost and masking_body_cost args.
	* target.h (enum vect_cost_for_stmt): Add vector_mask_load and
	vector_mask_store.
	(enum vect_cost_model_location): Add vect_masking_prologue
	and vect_masking_body.
	* targhooks.c (default_builtin_vectorization_cost): Support
	vector_mask_load and vector_mask_store.
	(default_init_cost): Extend costs array.
	(default_add_stmt_masking_cost): New.
	(default_finish_cost): Add masking_prologue_cost and masking_body_cost
	args.
	* targhooks.h (default_add_stmt_masking_cost): New.
	* tree-vect-loop.c (vect_estimate_min_profitable_iters): Adjust
	finish_cost call.
	* tree-vect-slp.c (vect_bb_vectorization_profitable_p): Likewise.
	* tree-vectorizer.h (add_stmt_masking_cost): New.
	(finish_cost): Add masking_prologue_cost and masking_body_cost args.


diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 7e9f511..9ff7c91 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -54003,8 +54003,12 @@ ix86_spill_class (reg_class_t rclass, machine_mode mode)
 static void *
 ix86_init_cost (struct loop *)
 {
-  unsigned *cost = XNEWVEC (unsigned, 3);
-  cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
+  unsigned *cost = XNEWVEC (unsigned, 5);
+  cost[vect_prologue] = 0;
+  cost[vect_body] = 0;
+  cost[vect_epilogue] = 0;
+  cost[vect_masking_prologue] = 0;
+  cost[vect_masking_body] = 0;
   return cost;
 }
 
@@ -54045,16 +54049,56 @@ ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
   return retval;
 }
 
+/* Implement targetm.vectorize.add_stmt_masking_cost.  */
+
+static unsigned
+ix86_add_stmt_masking_cost (void *data, int count, enum vect_cost_for_stmt kind,
+			    struct _stmt_vec_info *stmt_info, int misalign,
+			    enum vect_cost_model_location where)
+{
+  bool embedded_masking = false;
+  unsigned *cost = (unsigned *) data;
+  unsigned retval = 0;
+
+  tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
+  if (vectype)
+    {
+      machine_mode mode
+	= ix86_get_mask_mode (TYPE_VECTOR_SUBPARTS (vectype),
+			      tree_to_uhwi (TYPE_SIZE_UNIT (vectype)));
+      embedded_masking = !VECTOR_MODE_P (mode);
+    }
+  else
+    embedded_masking = TARGET_AVX512F;
+
+  if (embedded_masking || kind == vector_load)
+    return retval;
+
+  if (kind == vector_store)
+    return TARGET_INCREASE_MASK_STORE_COST ? 10 : 0;
+
+  int stmt_cost = ix86_builtin_vectorization_cost (vector_stmt, vectype, misalign);
+  retval = (unsigned) (count * stmt_cost);
+
+  cost[where] += retval;
+
+  return retval;
+}
+
 /* Implement targetm.vectorize.finish_cost.  */
 
 static void
 ix86_finish_cost (void *data, unsigned *prologue_cost,
-		  unsigned *body_cost, unsigned *epilogue_cost)
+		  unsigned *body_cost, unsigned *epilogue_cost,
+		  unsigned *masking_prologue_cost,
+		  unsigned *masking_body_cost)
 {
   unsigned *cost = (unsigned *) data;
   *prologue_cost = cost[vect_prologue];
   *body_cost     = cost[vect_body];
   *epilogue_cost = cost[vect_epilogue];
+  *masking_prologue_cost = cost[vect_masking_prologue];
+  *masking_body_cost = cost[vect_masking_body];
 }
 
 /* Implement targetm.vectorize.destroy_cost_data.  */
@@ -55035,6 +55079,8 @@ ix86_addr_space_zero_address_valid (addr_space_t as)
 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
 #undef TARGET_VECTORIZE_ADD_STMT_COST
 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
+#undef TARGET_VECTORIZE_ADD_STMT_MASKING_COST
+#define TARGET_VECTORIZE_ADD_STMT_MASKING_COST ix86_add_stmt_masking_cost
 #undef TARGET_VECTORIZE_FINISH_COST
 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 47427f5..828344d 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -503,6 +503,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
 	ix86_tune_features[X86_TUNE_AVOID_FALSE_DEP_FOR_BMI]
 #define TARGET_ONE_IF_CONV_INSN \
 	ix86_tune_features[X86_TUNE_ONE_IF_CONV_INSN]
+#define TARGET_INCREASE_MASK_STORE_COST \
+	ix86_tune_features[X86_TUNE_INCREASE_MASK_STORE_COST]
 
 /* Feature tests against the various architecture variations.  */
 enum ix86_arch_indices {
diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
index 31a87b9..3bbcee8 100644
--- a/gcc/config/i386/x86-tune.def
+++ b/gcc/config/i386/x86-tune.def
@@ -527,6 +527,11 @@ DEF_TUNE (X86_TUNE_AVOID_VECTOR_DECODE, "avoid_vector_decode",
 DEF_TUNE (X86_TUNE_AVOID_FALSE_DEP_FOR_BMI, "avoid_false_dep_for_bmi",
 	  m_SANDYBRIDGE | m_HASWELL | m_GENERIC)
 
+/* X86_TUNE_INCREASE_MASK_STORE_COST: Increase coast of masked store for
+   some platforms.  */
+DEF_TUNE (X86_TUNE_INCREASE_MASK_STORE_COST, "increase_mask_store_cost",
+	  m_HASWELL | m_BDVER4 | m_ZNVER1)
+
 /*****************************************************************************/
 /* This never worked well before.                                            */
 /*****************************************************************************/
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index c6b2b6a..3b9d4c0 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -5305,7 +5305,7 @@ rs6000_preferred_simd_mode (machine_mode mode)
 typedef struct _rs6000_cost_data
 {
   struct loop *loop_info;
-  unsigned cost[3];
+  unsigned cost[5];
 } rs6000_cost_data;
 
 /* Test for likely overcommitment of vector hardware resources.  If a
@@ -5367,6 +5367,8 @@ rs6000_init_cost (struct loop *loop_info)
   data->cost[vect_prologue] = 0;
   data->cost[vect_body]     = 0;
   data->cost[vect_epilogue] = 0;
+  data->cost[vect_masking_prologue] = 0;
+  data->cost[vect_masking_body] = 0;
   return data;
 }
 
@@ -5402,7 +5404,9 @@ rs6000_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
 
 static void
 rs6000_finish_cost (void *data, unsigned *prologue_cost,
-		    unsigned *body_cost, unsigned *epilogue_cost)
+		    unsigned *body_cost, unsigned *epilogue_cost,
+		    unsigned *masking_prologue_cost,
+		    unsigned *masking_body_cost)
 {
   rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
 
@@ -5412,6 +5416,8 @@ rs6000_finish_cost (void *data, unsigned *prologue_cost,
   *prologue_cost = cost_data->cost[vect_prologue];
   *body_cost     = cost_data->cost[vect_body];
   *epilogue_cost = cost_data->cost[vect_epilogue];
+  *masking_prologue_cost = cost_data->cost[vect_masking_prologue];
+  *masking_body_cost = cost_data->cost[vect_masking_body];
 }
 
 /* Implement targetm.vectorize.destroy_cost_data.  */
diff --git a/gcc/config/spu/spu.c b/gcc/config/spu/spu.c
index c3757eb..a9cb924 100644
--- a/gcc/config/spu/spu.c
+++ b/gcc/config/spu/spu.c
@@ -6630,8 +6630,12 @@ spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
 static void *
 spu_init_cost (struct loop *loop_info ATTRIBUTE_UNUSED)
 {
-  unsigned *cost = XNEWVEC (unsigned, 3);
-  cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
+  unsigned *cost = XNEWVEC (unsigned, 5);
+  cost[vect_prologue] = 0;
+  cost[vect_body] = 0;
+  cost[vect_epilogue] = 0;
+  cost[vect_masking_prologue] = 0;
+  cost[vect_masking_body] = 0;
   return cost;
 }
 
@@ -6667,12 +6671,16 @@ spu_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
 
 static void
 spu_finish_cost (void *data, unsigned *prologue_cost,
-		 unsigned *body_cost, unsigned *epilogue_cost)
+		 unsigned *body_cost, unsigned *epilogue_cost,
+		 unsigned *masking_prologue_cost,
+		 unsigned *masking_body_cost)
 {
   unsigned *cost = (unsigned *) data;
   *prologue_cost = cost[vect_prologue];
   *body_cost     = cost[vect_body];
   *epilogue_cost = cost[vect_epilogue];
+  *masking_prologue_cost = cost[vect_masking_prologue];
+  *masking_body_cost = cost[vect_masking_body];
 }
 
 /* Implement targetm.vectorize.destroy_cost_data.  */
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index f963a58..9eaacdc 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -4249,6 +4249,8 @@ address;  but often a machine-dependent strategy can generate better code.
 
 @hook TARGET_VECTORIZE_ADD_STMT_COST
 
+@hook TARGET_VECTORIZE_ADD_STMT_MASKING_COST
+
 @hook TARGET_VECTORIZE_FINISH_COST
 
 @hook TARGET_VECTORIZE_DESTROY_COST_DATA
diff --git a/gcc/target.def b/gcc/target.def
index 6392e73..442ac14 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -1898,17 +1898,32 @@ DEFHOOK
   enum vect_cost_model_location where),
  default_add_stmt_cost)
 
+/* Similar to add_stmt_cost but records cost of statemnent masking.  */
+DEFHOOK
+(add_stmt_masking_cost,
+ "This hook has arguments similar to @code{TARGET_VECTORIZE_ADD_STMT_COST} "
+ "but adds cost of statement masking.  Masking cost doesn't include cost "
+ "of mask creation and cost of the original unmasked statement.  Full cost "
+ "of masked statements should be computed using both add_stmt_cost and "
+ "add_stmt_masking_cost.",
+ unsigned,
+ (void *data, int count, enum vect_cost_for_stmt kind,
+  struct _stmt_vec_info *stmt_info, int misalign,
+  enum vect_cost_model_location where),
+ default_add_stmt_masking_cost)
+
 /* Target function to calculate the total cost of the current vectorized
    loop or block.  */
 DEFHOOK
 (finish_cost,
  "This hook should complete calculations of the cost of vectorizing a loop "
- "or basic block based on @var{data}, and return the prologue, body, and "
- "epilogue costs as unsigned integers.  The default returns the value of "
- "the three accumulators.",
+ "or basic block based on @var{data}, and return the prologue, body, epilogue, "
+ "masking prologue and masking body costs as unsigned integers.  The default "
+ "returns the value of the five accumulators.",
  void,
  (void *data, unsigned *prologue_cost, unsigned *body_cost,
-  unsigned *epilogue_cost),
+  unsigned *epilogue_cost, unsigned *masking_prologue_cost,
+  unsigned *masking_body_cost),
  default_finish_cost)
 
 /* Function to delete target-specific cost modeling data.  */
diff --git a/gcc/target.h b/gcc/target.h
index 43022bd..17e3803 100644
--- a/gcc/target.h
+++ b/gcc/target.h
@@ -170,7 +170,9 @@ enum vect_cost_for_stmt
   cond_branch_taken,
   vec_perm,
   vec_promote_demote,
-  vec_construct
+  vec_construct,
+  vector_mask_load,
+  vector_mask_store
 };
 
 /* Separate locations for which the vectorizer cost model should
@@ -178,7 +180,9 @@ enum vect_cost_for_stmt
 enum vect_cost_model_location {
   vect_prologue = 0,
   vect_body = 1,
-  vect_epilogue = 2
+  vect_epilogue = 2,
+  vect_masking_prologue = 3,
+  vect_masking_body = 4
 };
 
 /* The target structure.  This holds all the backend hooks.  */
diff --git a/gcc/targhooks.c b/gcc/targhooks.c
index 6b4601b..a0040e1 100644
--- a/gcc/targhooks.c
+++ b/gcc/targhooks.c
@@ -579,6 +579,8 @@ default_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
       case cond_branch_not_taken:
       case vec_perm:
       case vec_promote_demote:
+      case vector_mask_load:
+      case vector_mask_store:
         return 1;
 
       case unaligned_load:
@@ -1115,8 +1117,12 @@ default_get_mask_mode (unsigned nunits, unsigned vector_size)
 void *
 default_init_cost (struct loop *loop_info ATTRIBUTE_UNUSED)
 {
-  unsigned *cost = XNEWVEC (unsigned, 3);
-  cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
+  unsigned *cost = XNEWVEC (unsigned, 5);
+  cost[vect_prologue] = 0;
+  cost[vect_body] = 0;
+  cost[vect_epilogue] = 0;
+  cost[vect_masking_prologue] = 0;
+  cost[vect_masking_body] = 0;
   return cost;
 }
 
@@ -1147,16 +1153,48 @@ default_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
   return retval;
 }
 
+/* By default, the cost model assume we use VEC_COND_EXPR to mask
+   statement result.  For memory accesses we need to adjust used mask
+   in case aceess is already masked.  */
+
+unsigned
+default_add_stmt_masking_cost (void *data, int count,
+			       enum vect_cost_for_stmt kind,
+			       struct _stmt_vec_info *stmt_info,
+			       int misalign,
+			       enum vect_cost_model_location where)
+{
+  unsigned *cost = (unsigned *) data;
+  unsigned retval = 0;
+
+  if (kind == vector_load || kind == vector_store)
+    return retval;
+
+  tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
+  int stmt_cost = targetm.vectorize.builtin_vectorization_cost (vector_stmt,
+								vectype,
+								misalign);
+
+  retval = (unsigned) (count * stmt_cost);
+  cost[where] += retval;
+
+  return retval;
+}
+
 /* By default, the cost model just returns the accumulated costs.  */
 
 void
 default_finish_cost (void *data, unsigned *prologue_cost,
-		     unsigned *body_cost, unsigned *epilogue_cost)
+		     unsigned *body_cost, unsigned *epilogue_cost,
+		     unsigned *masking_prologue_cost,
+		     unsigned *masking_body_cost)
 {
   unsigned *cost = (unsigned *) data;
   *prologue_cost = cost[vect_prologue];
   *body_cost     = cost[vect_body];
   *epilogue_cost = cost[vect_epilogue];
+  *masking_prologue_cost = cost[vect_masking_prologue];
+  *masking_body_cost = cost[vect_masking_body];
 }
 
 /* Free the cost data.  */
diff --git a/gcc/targhooks.h b/gcc/targhooks.h
index 7687c39..5a1c749 100644
--- a/gcc/targhooks.h
+++ b/gcc/targhooks.h
@@ -106,6 +106,10 @@ extern void *default_init_cost (struct loop *);
 extern unsigned default_add_stmt_cost (void *, int, enum vect_cost_for_stmt,
 				       struct _stmt_vec_info *, int,
 				       enum vect_cost_model_location);
+extern unsigned default_add_stmt_masking_cost (void *, int,
+					       enum vect_cost_for_stmt,
+					       struct _stmt_vec_info *, int,
+					       enum vect_cost_model_location);
 extern void default_finish_cost (void *, unsigned *, unsigned *, unsigned *);
 extern void default_destroy_cost_data (void *);
 
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 5ca9327..c75d234 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -3125,6 +3125,8 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
   int vec_outside_cost = 0;
   unsigned vec_prologue_cost = 0;
   unsigned vec_epilogue_cost = 0;
+  unsigned masking_prologue_cost = 0;
+  unsigned masking_inside_cost = 0;
   int scalar_single_iter_cost = 0;
   int scalar_outside_cost = 0;
   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
@@ -3343,7 +3345,8 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
 
   /* Complete the target-specific cost calculations.  */
   finish_cost (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo), &vec_prologue_cost,
-	       &vec_inside_cost, &vec_epilogue_cost);
+	       &vec_inside_cost, &vec_epilogue_cost, &masking_prologue_cost,
+	       &masking_inside_cost);
 
   vec_outside_cost = (int)(vec_prologue_cost + vec_epilogue_cost);
   
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index 3de53d1..5d99763 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -2460,6 +2460,7 @@ vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
   int i;
   unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
   unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
+  unsigned int masking_prologue_cost = 0, masking_inside_cost = 0;
 
   /* Calculate scalar cost.  */
   FOR_EACH_VEC_ELT (slp_instances, i, instance)
@@ -2478,7 +2479,8 @@ vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
 
   /* Complete the target-specific cost calculation.  */
   finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
-	       &vec_inside_cost, &vec_epilogue_cost);
+	       &vec_inside_cost, &vec_epilogue_cost, &masking_prologue_cost,
+	       &masking_inside_cost);
 
   vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
 
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 26d84b4..8a61690 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -897,13 +897,27 @@ add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
 					  stmt_info, misalign, where);
 }
 
+/* Alias targetm.vectorize.add_stmt_masking_cost.  */
+
+static inline unsigned
+add_stmt_masking_cost (void *data, int count, enum vect_cost_for_stmt kind,
+		       stmt_vec_info stmt_info, int misalign,
+		       enum vect_cost_model_location where)
+{
+  return targetm.vectorize.add_stmt_masking_cost (data, count, kind,
+						  stmt_info, misalign, where);
+}
+
 /* Alias targetm.vectorize.finish_cost.  */
 
 static inline void
 finish_cost (void *data, unsigned *prologue_cost,
-	     unsigned *body_cost, unsigned *epilogue_cost)
+	     unsigned *body_cost, unsigned *epilogue_cost,
+	     unsigned *masking_prologue_cost,
+	     unsigned *masking_body_cost)
 {
-  targetm.vectorize.finish_cost (data, prologue_cost, body_cost, epilogue_cost);
+  targetm.vectorize.finish_cost (data, prologue_cost, body_cost, epilogue_cost,
+				 masking_prologue_cost, masking_body_cost);
 }
 
 /* Alias targetm.vectorize.destroy_cost_data.  */

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH, vec-tails 04/10] Add masking cost
  2016-06-22 14:16   ` Ilya Enkovich
@ 2016-07-11 13:38     ` Ilya Enkovich
  0 siblings, 0 replies; 9+ messages in thread
From: Ilya Enkovich @ 2016-07-11 13:38 UTC (permalink / raw)
  To: Jeff Law; +Cc: gcc-patches, Yuri Rumyantsev, Igor Zamyatin

Ping

2016-06-22 17:13 GMT+03:00 Ilya Enkovich <enkovich.gnu@gmail.com>:
> On 16 Jun 00:16, Jeff Law wrote:
>> On 05/19/2016 01:40 PM, Ilya Enkovich wrote:
>> >Hi,
>> >
>> >This patch extends vectorizer cost model to include masking cost by
>> >adding new cost model locations and new target hook to compute
>> >masking cost.
>> >
>> >Thanks,
>> >Ilya
>> >--
>> >gcc/
>> >
>> >2016-05-19  Ilya Enkovich  <ilya.enkovich@intel.com>
>> >
>> >     * config/i386/i386.c (ix86_init_cost): Extend costs array.
>> >     (ix86_add_stmt_masking_cost): New.
>> >     (ix86_finish_cost): Add masking_prologue_cost and masking_body_cost
>> >     args.
>> >     (TARGET_VECTORIZE_ADD_STMT_MASKING_COST): New.
>> >     * config/i386/i386.h (TARGET_INCREASE_MASK_STORE_COST): New.
>> >     * config/i386/x86-tune.def (X86_TUNE_INCREASE_MASK_STORE_COST): New.
>> >     * config/rs6000/rs6000.c (_rs6000_cost_data): Extend cost array.
>> >     (rs6000_init_cost): Initialize new cost elements.
>> >     (rs6000_finish_cost): Add masking_prologue_cost and masking_body_cost.
>> >     * config/spu/spu.c (spu_init_cost): Extend costs array.
>> >     (spu_finish_cost): Add masking_prologue_cost and masking_body_cost args.
>> >     * doc/tm.texi.in (TARGET_VECTORIZE_ADD_STMT_MASKING_COST): New.
>> >     * doc/tm.texi: Regenerated.
>> >     * target.def (add_stmt_masking_cost): New.
>> >     (finish_cost): Add masking_prologue_cost and masking_body_cost args.
>> >     * target.h (enum vect_cost_for_stmt): Add vector_mask_load and
>> >     vector_mask_store.
>> >     (enum vect_cost_model_location): Add vect_masking_prologue
>> >     and vect_masking_body.
>> >     * targhooks.c (default_builtin_vectorization_cost): Support
>> >     vector_mask_load and vector_mask_store.
>> >     (default_init_cost): Extend costs array.
>> >     (default_add_stmt_masking_cost): New.
>> >     (default_finish_cost): Add masking_prologue_cost and masking_body_cost
>> >     args.
>> >     * targhooks.h (default_add_stmt_masking_cost): New.
>> >     * tree-vect-loop.c (vect_estimate_min_profitable_iters): Adjust
>> >     finish_cost call.
>> >     * tree-vect-slp.c (vect_bb_vectorization_profitable_p): Likewise.
>> >     * tree-vectorizer.h (add_stmt_masking_cost): New.
>> >     (finish_cost): Add masking_prologue_cost and masking_body_cost args.
>> >
>> >
>> >diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
>> >index 9f62089..6c2c364 100644
>> >--- a/gcc/config/i386/i386.c
>> >+++ b/gcc/config/i386/i386.c
>> >@@ -53932,8 +53932,12 @@ ix86_spill_class (reg_class_t rclass, machine_mode mode)
>> > static void *
>> > ix86_init_cost (struct loop *)
>> > {
>> >-  unsigned *cost = XNEWVEC (unsigned, 3);
>> >-  cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
>> >+  unsigned *cost = XNEWVEC (unsigned, 5);
>> >+  cost[vect_prologue] = 0;
>> >+  cost[vect_body]     = 0;
>> >+  cost[vect_epilogue] = 0;
>> >+  cost[vect_masking_prologue] = 0;
>> >+  cost[vect_masking_body] = 0;
>> >   return cost;
>> Trivial nit -- no need or desire to use whitespace to line up the
>> initializers.   It looks like others may have done this in the duplicated
>> instances of finish_cost. But we shouldn't propagate that mistake into the
>> init_cost hooks ;-)
>>
>>
>> @@ -1115,8 +1117,12 @@ default_get_mask_mode (unsigned nunits, unsigned
>> vector_size)
>> > void *
>> > default_init_cost (struct loop *loop_info ATTRIBUTE_UNUSED)
>> > {
>> >-  unsigned *cost = XNEWVEC (unsigned, 3);
>> >-  cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
>> >+  unsigned *cost = XNEWVEC (unsigned, 5);
>> >+  cost[vect_prologue] = 0;
>> >+  cost[vect_body]     = 0;
>> >+  cost[vect_epilogue] = 0;
>> >+  cost[vect_masking_prologue] = 0;
>> >+  cost[vect_masking_body] = 0;
>> >   return cost;
>> Here too.  There's others.  I won't point them all out.  Please double check
>> for this nit in any added code.  You don't have to go back and fix existing
>> problems of this nature.
>>
>> I don't see anything here I really object to -- Richi and I may disagree on
>> the compute-costs once in a single scan vs restarting the scan.  If Richi
>> feels strongly about restarting for some reason, I'll defer to him -- he's
>> done more work in the vectorizer than myself.
>>
>> I'd suggest taking another stab at the docs for the hooks based on Richi's
>> question about whether or not the hook returns the cost of hte masked
>> statement or the cost of masking the statement.
>>
>> jeff
>
> Thanks for review.  Here is an updated version with initializers and
> documentation fixed.
>
> Thanks,
> Ilya
> --
> gcc/
>
> 2016-05-22  Ilya Enkovich  <ilya.enkovich@intel.com>
>
>         * config/i386/i386.c (ix86_init_cost): Extend costs array.
>         (ix86_add_stmt_masking_cost): New.
>         (ix86_finish_cost): Add masking_prologue_cost and masking_body_cost
>         args.
>         (TARGET_VECTORIZE_ADD_STMT_MASKING_COST): New.
>         * config/i386/i386.h (TARGET_INCREASE_MASK_STORE_COST): New.
>         * config/i386/x86-tune.def (X86_TUNE_INCREASE_MASK_STORE_COST): New.
>         * config/rs6000/rs6000.c (_rs6000_cost_data): Extend cost array.
>         (rs6000_init_cost): Initialize new cost elements.
>         (rs6000_finish_cost): Add masking_prologue_cost and masking_body_cost.
>         * config/spu/spu.c (spu_init_cost): Extend costs array.
>         (spu_finish_cost): Add masking_prologue_cost and masking_body_cost args.
>         * doc/tm.texi.in (TARGET_VECTORIZE_ADD_STMT_MASKING_COST): New.
>         * doc/tm.texi: Regenerated.
>         * target.def (add_stmt_masking_cost): New.
>         (finish_cost): Add masking_prologue_cost and masking_body_cost args.
>         * target.h (enum vect_cost_for_stmt): Add vector_mask_load and
>         vector_mask_store.
>         (enum vect_cost_model_location): Add vect_masking_prologue
>         and vect_masking_body.
>         * targhooks.c (default_builtin_vectorization_cost): Support
>         vector_mask_load and vector_mask_store.
>         (default_init_cost): Extend costs array.
>         (default_add_stmt_masking_cost): New.
>         (default_finish_cost): Add masking_prologue_cost and masking_body_cost
>         args.
>         * targhooks.h (default_add_stmt_masking_cost): New.
>         * tree-vect-loop.c (vect_estimate_min_profitable_iters): Adjust
>         finish_cost call.
>         * tree-vect-slp.c (vect_bb_vectorization_profitable_p): Likewise.
>         * tree-vectorizer.h (add_stmt_masking_cost): New.
>         (finish_cost): Add masking_prologue_cost and masking_body_cost args.
>
>
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index 7e9f511..9ff7c91 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -54003,8 +54003,12 @@ ix86_spill_class (reg_class_t rclass, machine_mode mode)
>  static void *
>  ix86_init_cost (struct loop *)
>  {
> -  unsigned *cost = XNEWVEC (unsigned, 3);
> -  cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
> +  unsigned *cost = XNEWVEC (unsigned, 5);
> +  cost[vect_prologue] = 0;
> +  cost[vect_body] = 0;
> +  cost[vect_epilogue] = 0;
> +  cost[vect_masking_prologue] = 0;
> +  cost[vect_masking_body] = 0;
>    return cost;
>  }
>
> @@ -54045,16 +54049,56 @@ ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
>    return retval;
>  }
>
> +/* Implement targetm.vectorize.add_stmt_masking_cost.  */
> +
> +static unsigned
> +ix86_add_stmt_masking_cost (void *data, int count, enum vect_cost_for_stmt kind,
> +                           struct _stmt_vec_info *stmt_info, int misalign,
> +                           enum vect_cost_model_location where)
> +{
> +  bool embedded_masking = false;
> +  unsigned *cost = (unsigned *) data;
> +  unsigned retval = 0;
> +
> +  tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
> +  if (vectype)
> +    {
> +      machine_mode mode
> +       = ix86_get_mask_mode (TYPE_VECTOR_SUBPARTS (vectype),
> +                             tree_to_uhwi (TYPE_SIZE_UNIT (vectype)));
> +      embedded_masking = !VECTOR_MODE_P (mode);
> +    }
> +  else
> +    embedded_masking = TARGET_AVX512F;
> +
> +  if (embedded_masking || kind == vector_load)
> +    return retval;
> +
> +  if (kind == vector_store)
> +    return TARGET_INCREASE_MASK_STORE_COST ? 10 : 0;
> +
> +  int stmt_cost = ix86_builtin_vectorization_cost (vector_stmt, vectype, misalign);
> +  retval = (unsigned) (count * stmt_cost);
> +
> +  cost[where] += retval;
> +
> +  return retval;
> +}
> +
>  /* Implement targetm.vectorize.finish_cost.  */
>
>  static void
>  ix86_finish_cost (void *data, unsigned *prologue_cost,
> -                 unsigned *body_cost, unsigned *epilogue_cost)
> +                 unsigned *body_cost, unsigned *epilogue_cost,
> +                 unsigned *masking_prologue_cost,
> +                 unsigned *masking_body_cost)
>  {
>    unsigned *cost = (unsigned *) data;
>    *prologue_cost = cost[vect_prologue];
>    *body_cost     = cost[vect_body];
>    *epilogue_cost = cost[vect_epilogue];
> +  *masking_prologue_cost = cost[vect_masking_prologue];
> +  *masking_body_cost = cost[vect_masking_body];
>  }
>
>  /* Implement targetm.vectorize.destroy_cost_data.  */
> @@ -55035,6 +55079,8 @@ ix86_addr_space_zero_address_valid (addr_space_t as)
>  #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
>  #undef TARGET_VECTORIZE_ADD_STMT_COST
>  #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
> +#undef TARGET_VECTORIZE_ADD_STMT_MASKING_COST
> +#define TARGET_VECTORIZE_ADD_STMT_MASKING_COST ix86_add_stmt_masking_cost
>  #undef TARGET_VECTORIZE_FINISH_COST
>  #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
>  #undef TARGET_VECTORIZE_DESTROY_COST_DATA
> diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
> index 47427f5..828344d 100644
> --- a/gcc/config/i386/i386.h
> +++ b/gcc/config/i386/i386.h
> @@ -503,6 +503,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
>         ix86_tune_features[X86_TUNE_AVOID_FALSE_DEP_FOR_BMI]
>  #define TARGET_ONE_IF_CONV_INSN \
>         ix86_tune_features[X86_TUNE_ONE_IF_CONV_INSN]
> +#define TARGET_INCREASE_MASK_STORE_COST \
> +       ix86_tune_features[X86_TUNE_INCREASE_MASK_STORE_COST]
>
>  /* Feature tests against the various architecture variations.  */
>  enum ix86_arch_indices {
> diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
> index 31a87b9..3bbcee8 100644
> --- a/gcc/config/i386/x86-tune.def
> +++ b/gcc/config/i386/x86-tune.def
> @@ -527,6 +527,11 @@ DEF_TUNE (X86_TUNE_AVOID_VECTOR_DECODE, "avoid_vector_decode",
>  DEF_TUNE (X86_TUNE_AVOID_FALSE_DEP_FOR_BMI, "avoid_false_dep_for_bmi",
>           m_SANDYBRIDGE | m_HASWELL | m_GENERIC)
>
> +/* X86_TUNE_INCREASE_MASK_STORE_COST: Increase coast of masked store for
> +   some platforms.  */
> +DEF_TUNE (X86_TUNE_INCREASE_MASK_STORE_COST, "increase_mask_store_cost",
> +         m_HASWELL | m_BDVER4 | m_ZNVER1)
> +
>  /*****************************************************************************/
>  /* This never worked well before.                                            */
>  /*****************************************************************************/
> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
> index c6b2b6a..3b9d4c0 100644
> --- a/gcc/config/rs6000/rs6000.c
> +++ b/gcc/config/rs6000/rs6000.c
> @@ -5305,7 +5305,7 @@ rs6000_preferred_simd_mode (machine_mode mode)
>  typedef struct _rs6000_cost_data
>  {
>    struct loop *loop_info;
> -  unsigned cost[3];
> +  unsigned cost[5];
>  } rs6000_cost_data;
>
>  /* Test for likely overcommitment of vector hardware resources.  If a
> @@ -5367,6 +5367,8 @@ rs6000_init_cost (struct loop *loop_info)
>    data->cost[vect_prologue] = 0;
>    data->cost[vect_body]     = 0;
>    data->cost[vect_epilogue] = 0;
> +  data->cost[vect_masking_prologue] = 0;
> +  data->cost[vect_masking_body] = 0;
>    return data;
>  }
>
> @@ -5402,7 +5404,9 @@ rs6000_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
>
>  static void
>  rs6000_finish_cost (void *data, unsigned *prologue_cost,
> -                   unsigned *body_cost, unsigned *epilogue_cost)
> +                   unsigned *body_cost, unsigned *epilogue_cost,
> +                   unsigned *masking_prologue_cost,
> +                   unsigned *masking_body_cost)
>  {
>    rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
>
> @@ -5412,6 +5416,8 @@ rs6000_finish_cost (void *data, unsigned *prologue_cost,
>    *prologue_cost = cost_data->cost[vect_prologue];
>    *body_cost     = cost_data->cost[vect_body];
>    *epilogue_cost = cost_data->cost[vect_epilogue];
> +  *masking_prologue_cost = cost_data->cost[vect_masking_prologue];
> +  *masking_body_cost = cost_data->cost[vect_masking_body];
>  }
>
>  /* Implement targetm.vectorize.destroy_cost_data.  */
> diff --git a/gcc/config/spu/spu.c b/gcc/config/spu/spu.c
> index c3757eb..a9cb924 100644
> --- a/gcc/config/spu/spu.c
> +++ b/gcc/config/spu/spu.c
> @@ -6630,8 +6630,12 @@ spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
>  static void *
>  spu_init_cost (struct loop *loop_info ATTRIBUTE_UNUSED)
>  {
> -  unsigned *cost = XNEWVEC (unsigned, 3);
> -  cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
> +  unsigned *cost = XNEWVEC (unsigned, 5);
> +  cost[vect_prologue] = 0;
> +  cost[vect_body] = 0;
> +  cost[vect_epilogue] = 0;
> +  cost[vect_masking_prologue] = 0;
> +  cost[vect_masking_body] = 0;
>    return cost;
>  }
>
> @@ -6667,12 +6671,16 @@ spu_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
>
>  static void
>  spu_finish_cost (void *data, unsigned *prologue_cost,
> -                unsigned *body_cost, unsigned *epilogue_cost)
> +                unsigned *body_cost, unsigned *epilogue_cost,
> +                unsigned *masking_prologue_cost,
> +                unsigned *masking_body_cost)
>  {
>    unsigned *cost = (unsigned *) data;
>    *prologue_cost = cost[vect_prologue];
>    *body_cost     = cost[vect_body];
>    *epilogue_cost = cost[vect_epilogue];
> +  *masking_prologue_cost = cost[vect_masking_prologue];
> +  *masking_body_cost = cost[vect_masking_body];
>  }
>
>  /* Implement targetm.vectorize.destroy_cost_data.  */
> diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
> index f963a58..9eaacdc 100644
> --- a/gcc/doc/tm.texi.in
> +++ b/gcc/doc/tm.texi.in
> @@ -4249,6 +4249,8 @@ address;  but often a machine-dependent strategy can generate better code.
>
>  @hook TARGET_VECTORIZE_ADD_STMT_COST
>
> +@hook TARGET_VECTORIZE_ADD_STMT_MASKING_COST
> +
>  @hook TARGET_VECTORIZE_FINISH_COST
>
>  @hook TARGET_VECTORIZE_DESTROY_COST_DATA
> diff --git a/gcc/target.def b/gcc/target.def
> index 6392e73..442ac14 100644
> --- a/gcc/target.def
> +++ b/gcc/target.def
> @@ -1898,17 +1898,32 @@ DEFHOOK
>    enum vect_cost_model_location where),
>   default_add_stmt_cost)
>
> +/* Similar to add_stmt_cost but records cost of statemnent masking.  */
> +DEFHOOK
> +(add_stmt_masking_cost,
> + "This hook has arguments similar to @code{TARGET_VECTORIZE_ADD_STMT_COST} "
> + "but adds cost of statement masking.  Masking cost doesn't include cost "
> + "of mask creation and cost of the original unmasked statement.  Full cost "
> + "of masked statements should be computed using both add_stmt_cost and "
> + "add_stmt_masking_cost.",
> + unsigned,
> + (void *data, int count, enum vect_cost_for_stmt kind,
> +  struct _stmt_vec_info *stmt_info, int misalign,
> +  enum vect_cost_model_location where),
> + default_add_stmt_masking_cost)
> +
>  /* Target function to calculate the total cost of the current vectorized
>     loop or block.  */
>  DEFHOOK
>  (finish_cost,
>   "This hook should complete calculations of the cost of vectorizing a loop "
> - "or basic block based on @var{data}, and return the prologue, body, and "
> - "epilogue costs as unsigned integers.  The default returns the value of "
> - "the three accumulators.",
> + "or basic block based on @var{data}, and return the prologue, body, epilogue, "
> + "masking prologue and masking body costs as unsigned integers.  The default "
> + "returns the value of the five accumulators.",
>   void,
>   (void *data, unsigned *prologue_cost, unsigned *body_cost,
> -  unsigned *epilogue_cost),
> +  unsigned *epilogue_cost, unsigned *masking_prologue_cost,
> +  unsigned *masking_body_cost),
>   default_finish_cost)
>
>  /* Function to delete target-specific cost modeling data.  */
> diff --git a/gcc/target.h b/gcc/target.h
> index 43022bd..17e3803 100644
> --- a/gcc/target.h
> +++ b/gcc/target.h
> @@ -170,7 +170,9 @@ enum vect_cost_for_stmt
>    cond_branch_taken,
>    vec_perm,
>    vec_promote_demote,
> -  vec_construct
> +  vec_construct,
> +  vector_mask_load,
> +  vector_mask_store
>  };
>
>  /* Separate locations for which the vectorizer cost model should
> @@ -178,7 +180,9 @@ enum vect_cost_for_stmt
>  enum vect_cost_model_location {
>    vect_prologue = 0,
>    vect_body = 1,
> -  vect_epilogue = 2
> +  vect_epilogue = 2,
> +  vect_masking_prologue = 3,
> +  vect_masking_body = 4
>  };
>
>  /* The target structure.  This holds all the backend hooks.  */
> diff --git a/gcc/targhooks.c b/gcc/targhooks.c
> index 6b4601b..a0040e1 100644
> --- a/gcc/targhooks.c
> +++ b/gcc/targhooks.c
> @@ -579,6 +579,8 @@ default_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
>        case cond_branch_not_taken:
>        case vec_perm:
>        case vec_promote_demote:
> +      case vector_mask_load:
> +      case vector_mask_store:
>          return 1;
>
>        case unaligned_load:
> @@ -1115,8 +1117,12 @@ default_get_mask_mode (unsigned nunits, unsigned vector_size)
>  void *
>  default_init_cost (struct loop *loop_info ATTRIBUTE_UNUSED)
>  {
> -  unsigned *cost = XNEWVEC (unsigned, 3);
> -  cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
> +  unsigned *cost = XNEWVEC (unsigned, 5);
> +  cost[vect_prologue] = 0;
> +  cost[vect_body] = 0;
> +  cost[vect_epilogue] = 0;
> +  cost[vect_masking_prologue] = 0;
> +  cost[vect_masking_body] = 0;
>    return cost;
>  }
>
> @@ -1147,16 +1153,48 @@ default_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
>    return retval;
>  }
>
> +/* By default, the cost model assume we use VEC_COND_EXPR to mask
> +   statement result.  For memory accesses we need to adjust used mask
> +   in case aceess is already masked.  */
> +
> +unsigned
> +default_add_stmt_masking_cost (void *data, int count,
> +                              enum vect_cost_for_stmt kind,
> +                              struct _stmt_vec_info *stmt_info,
> +                              int misalign,
> +                              enum vect_cost_model_location where)
> +{
> +  unsigned *cost = (unsigned *) data;
> +  unsigned retval = 0;
> +
> +  if (kind == vector_load || kind == vector_store)
> +    return retval;
> +
> +  tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
> +  int stmt_cost = targetm.vectorize.builtin_vectorization_cost (vector_stmt,
> +                                                               vectype,
> +                                                               misalign);
> +
> +  retval = (unsigned) (count * stmt_cost);
> +  cost[where] += retval;
> +
> +  return retval;
> +}
> +
>  /* By default, the cost model just returns the accumulated costs.  */
>
>  void
>  default_finish_cost (void *data, unsigned *prologue_cost,
> -                    unsigned *body_cost, unsigned *epilogue_cost)
> +                    unsigned *body_cost, unsigned *epilogue_cost,
> +                    unsigned *masking_prologue_cost,
> +                    unsigned *masking_body_cost)
>  {
>    unsigned *cost = (unsigned *) data;
>    *prologue_cost = cost[vect_prologue];
>    *body_cost     = cost[vect_body];
>    *epilogue_cost = cost[vect_epilogue];
> +  *masking_prologue_cost = cost[vect_masking_prologue];
> +  *masking_body_cost = cost[vect_masking_body];
>  }
>
>  /* Free the cost data.  */
> diff --git a/gcc/targhooks.h b/gcc/targhooks.h
> index 7687c39..5a1c749 100644
> --- a/gcc/targhooks.h
> +++ b/gcc/targhooks.h
> @@ -106,6 +106,10 @@ extern void *default_init_cost (struct loop *);
>  extern unsigned default_add_stmt_cost (void *, int, enum vect_cost_for_stmt,
>                                        struct _stmt_vec_info *, int,
>                                        enum vect_cost_model_location);
> +extern unsigned default_add_stmt_masking_cost (void *, int,
> +                                              enum vect_cost_for_stmt,
> +                                              struct _stmt_vec_info *, int,
> +                                              enum vect_cost_model_location);
>  extern void default_finish_cost (void *, unsigned *, unsigned *, unsigned *);
>  extern void default_destroy_cost_data (void *);
>
> diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
> index 5ca9327..c75d234 100644
> --- a/gcc/tree-vect-loop.c
> +++ b/gcc/tree-vect-loop.c
> @@ -3125,6 +3125,8 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
>    int vec_outside_cost = 0;
>    unsigned vec_prologue_cost = 0;
>    unsigned vec_epilogue_cost = 0;
> +  unsigned masking_prologue_cost = 0;
> +  unsigned masking_inside_cost = 0;
>    int scalar_single_iter_cost = 0;
>    int scalar_outside_cost = 0;
>    int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
> @@ -3343,7 +3345,8 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
>
>    /* Complete the target-specific cost calculations.  */
>    finish_cost (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo), &vec_prologue_cost,
> -              &vec_inside_cost, &vec_epilogue_cost);
> +              &vec_inside_cost, &vec_epilogue_cost, &masking_prologue_cost,
> +              &masking_inside_cost);
>
>    vec_outside_cost = (int)(vec_prologue_cost + vec_epilogue_cost);
>
> diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
> index 3de53d1..5d99763 100644
> --- a/gcc/tree-vect-slp.c
> +++ b/gcc/tree-vect-slp.c
> @@ -2460,6 +2460,7 @@ vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
>    int i;
>    unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
>    unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
> +  unsigned int masking_prologue_cost = 0, masking_inside_cost = 0;
>
>    /* Calculate scalar cost.  */
>    FOR_EACH_VEC_ELT (slp_instances, i, instance)
> @@ -2478,7 +2479,8 @@ vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
>
>    /* Complete the target-specific cost calculation.  */
>    finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
> -              &vec_inside_cost, &vec_epilogue_cost);
> +              &vec_inside_cost, &vec_epilogue_cost, &masking_prologue_cost,
> +              &masking_inside_cost);
>
>    vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
>
> diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> index 26d84b4..8a61690 100644
> --- a/gcc/tree-vectorizer.h
> +++ b/gcc/tree-vectorizer.h
> @@ -897,13 +897,27 @@ add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
>                                           stmt_info, misalign, where);
>  }
>
> +/* Alias targetm.vectorize.add_stmt_masking_cost.  */
> +
> +static inline unsigned
> +add_stmt_masking_cost (void *data, int count, enum vect_cost_for_stmt kind,
> +                      stmt_vec_info stmt_info, int misalign,
> +                      enum vect_cost_model_location where)
> +{
> +  return targetm.vectorize.add_stmt_masking_cost (data, count, kind,
> +                                                 stmt_info, misalign, where);
> +}
> +
>  /* Alias targetm.vectorize.finish_cost.  */
>
>  static inline void
>  finish_cost (void *data, unsigned *prologue_cost,
> -            unsigned *body_cost, unsigned *epilogue_cost)
> +            unsigned *body_cost, unsigned *epilogue_cost,
> +            unsigned *masking_prologue_cost,
> +            unsigned *masking_body_cost)
>  {
> -  targetm.vectorize.finish_cost (data, prologue_cost, body_cost, epilogue_cost);
> +  targetm.vectorize.finish_cost (data, prologue_cost, body_cost, epilogue_cost,
> +                                masking_prologue_cost, masking_body_cost);
>  }
>
>  /* Alias targetm.vectorize.destroy_cost_data.  */

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2016-07-11 13:38 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-05-19 19:41 [PATCH, vec-tails 04/10] Add masking cost Ilya Enkovich
2016-05-20  9:24 ` Richard Biener
2016-05-20  9:44   ` Ilya Enkovich
2016-05-20 11:15     ` Richard Biener
2016-05-20 11:32       ` Ilya Enkovich
2016-06-16  6:06         ` Jeff Law
2016-06-16  6:17 ` Jeff Law
2016-06-22 14:16   ` Ilya Enkovich
2016-07-11 13:38     ` Ilya Enkovich

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).