public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: Kewen Lin <linkw@linux.ibm.com>
To: gcc-patches@gcc.gnu.org
Cc: richard.guenther@gmail.com, richard.sandiford@arm.com,
	segher@kernel.crashing.org, bergner@linux.ibm.com
Subject: [PATCH 4/9] vect: Adjust vectorizable_load costing on VMAT_ELEMENTWISE and VMAT_STRIDED_SLP
Date: Mon, 12 Jun 2023 21:03:25 -0500	[thread overview]
Message-ID: <0281a2a022869efe379130aea6e0782e4827ef61.1686573640.git.linkw@linux.ibm.com> (raw)
In-Reply-To: <cover.1686573640.git.linkw@linux.ibm.com>

This patch adjusts the cost handling on VMAT_ELEMENTWISE
and VMAT_STRIDED_SLP in function vectorizable_load.  We
don't call function vect_model_load_cost for them any more.

As PR82255 shows, we don't always need a vector construction
there, moving costing next to the transform can make us only
cost for vector construction when it's actually needed.
Besides, it can count the number of loads consistently for
some cases.

	 PR tree-optimization/82255

gcc/ChangeLog:

	* tree-vect-stmts.cc (vectorizable_load): Adjust the cost handling
	on VMAT_ELEMENTWISE and VMAT_STRIDED_SLP without calling
	vect_model_load_cost.
	(vect_model_load_cost): Assert it won't get VMAT_ELEMENTWISE and
	VMAT_STRIDED_SLP any more, and remove their related handlings.

gcc/testsuite/ChangeLog:

	* gcc.dg/vect/costmodel/ppc/costmodel-pr82255.c: New test.

2023-06-13  Bill Schmidt  <wschmidt@linux.ibm.com>
	    Kewen Lin  <linkw@linux.ibm.com>
---
 .../vect/costmodel/ppc/costmodel-pr82255.c    |  31 ++++
 gcc/tree-vect-stmts.cc                        | 170 +++++++++++-------
 2 files changed, 134 insertions(+), 67 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-pr82255.c

diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-pr82255.c b/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-pr82255.c
new file mode 100644
index 00000000000..9317ee2e15b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-pr82255.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_int } */
+
+/* PR82255: Ensure we don't require a vec_construct cost when we aren't
+   going to generate a strided load.  */
+
+extern int abs (int __x) __attribute__ ((__nothrow__, __leaf__))
+__attribute__ ((__const__));
+
+static int
+foo (unsigned char *w, int i, unsigned char *x, int j)
+{
+  int tot = 0;
+  for (int a = 0; a < 16; a++)
+    {
+#pragma GCC unroll 16
+      for (int b = 0; b < 16; b++)
+	tot += abs (w[b] - x[b]);
+      w += i;
+      x += j;
+    }
+  return tot;
+}
+
+void
+bar (unsigned char *w, unsigned char *x, int i, int *result)
+{
+  *result = foo (w, 16, x, i);
+}
+
+/* { dg-final { scan-tree-dump-times "vec_construct" 0 "vect" } } */
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 19c61d703c8..651dc800380 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -1136,7 +1136,9 @@ vect_model_load_cost (vec_info *vinfo,
 		      stmt_vector_for_cost *cost_vec)
 {
   gcc_assert ((memory_access_type != VMAT_GATHER_SCATTER || !gs_info->decl)
-	      && memory_access_type != VMAT_INVARIANT);
+	      && memory_access_type != VMAT_INVARIANT
+	      && memory_access_type != VMAT_ELEMENTWISE
+	      && memory_access_type != VMAT_STRIDED_SLP);
 
   unsigned int inside_cost = 0, prologue_cost = 0;
   bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
@@ -1221,8 +1223,7 @@ vect_model_load_cost (vec_info *vinfo,
     }
 
   /* The loads themselves.  */
-  if (memory_access_type == VMAT_ELEMENTWISE
-      || memory_access_type == VMAT_GATHER_SCATTER)
+  if (memory_access_type == VMAT_GATHER_SCATTER)
     {
       tree vectype = STMT_VINFO_VECTYPE (stmt_info);
       unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
@@ -1244,10 +1245,10 @@ vect_model_load_cost (vec_info *vinfo,
 			alignment_support_scheme, misalignment, first_stmt_p,
 			&inside_cost, &prologue_cost, 
 			cost_vec, cost_vec, true);
-  if (memory_access_type == VMAT_ELEMENTWISE
-      || memory_access_type == VMAT_STRIDED_SLP
-      || (memory_access_type == VMAT_GATHER_SCATTER
-	  && gs_info->ifn == IFN_LAST && !gs_info->decl))
+
+  if (memory_access_type == VMAT_GATHER_SCATTER
+      && gs_info->ifn == IFN_LAST
+      && !gs_info->decl)
     inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
 				     stmt_info, 0, vect_body);
 
@@ -9591,14 +9592,6 @@ vectorizable_load (vec_info *vinfo,
   if (memory_access_type == VMAT_ELEMENTWISE
       || memory_access_type == VMAT_STRIDED_SLP)
     {
-      if (costing_p)
-	{
-	  vect_model_load_cost (vinfo, stmt_info, ncopies, vf,
-				memory_access_type, alignment_support_scheme,
-				misalignment, &gs_info, slp_node, cost_vec);
-	  return true;
-	}
-
       gimple_stmt_iterator incr_gsi;
       bool insert_after;
       tree offvar;
@@ -9610,6 +9603,7 @@ vectorizable_load (vec_info *vinfo,
       unsigned int const_nunits = nunits.to_constant ();
       unsigned HOST_WIDE_INT cst_offset = 0;
       tree dr_offset;
+      unsigned int inside_cost = 0;
 
       gcc_assert (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo));
       gcc_assert (!nested_in_vect_loop);
@@ -9624,6 +9618,7 @@ vectorizable_load (vec_info *vinfo,
 	  first_stmt_info = stmt_info;
 	  first_dr_info = dr_info;
 	}
+
       if (slp && grouped_load)
 	{
 	  group_size = DR_GROUP_SIZE (first_stmt_info);
@@ -9640,43 +9635,44 @@ vectorizable_load (vec_info *vinfo,
 	  ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
 	}
 
-      dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
-      stride_base
-	= fold_build_pointer_plus
-	    (DR_BASE_ADDRESS (first_dr_info->dr),
-	     size_binop (PLUS_EXPR,
-			 convert_to_ptrofftype (dr_offset),
-			 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
-      stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
+      if (!costing_p)
+	{
+	  dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
+	  stride_base = fold_build_pointer_plus (
+	    DR_BASE_ADDRESS (first_dr_info->dr),
+	    size_binop (PLUS_EXPR, convert_to_ptrofftype (dr_offset),
+			convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
+	  stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
 
-      /* For a load with loop-invariant (but other than power-of-2)
-         stride (i.e. not a grouped access) like so:
+	  /* For a load with loop-invariant (but other than power-of-2)
+	     stride (i.e. not a grouped access) like so:
 
-	   for (i = 0; i < n; i += stride)
-	     ... = array[i];
+	       for (i = 0; i < n; i += stride)
+		 ... = array[i];
 
-	 we generate a new induction variable and new accesses to
-	 form a new vector (or vectors, depending on ncopies):
+	     we generate a new induction variable and new accesses to
+	     form a new vector (or vectors, depending on ncopies):
 
-	   for (j = 0; ; j += VF*stride)
-	     tmp1 = array[j];
-	     tmp2 = array[j + stride];
-	     ...
-	     vectemp = {tmp1, tmp2, ...}
-         */
+	       for (j = 0; ; j += VF*stride)
+		 tmp1 = array[j];
+		 tmp2 = array[j + stride];
+		 ...
+		 vectemp = {tmp1, tmp2, ...}
+	     */
 
-      ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
-			    build_int_cst (TREE_TYPE (stride_step), vf));
+	  ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
+				build_int_cst (TREE_TYPE (stride_step), vf));
 
-      standard_iv_increment_position (loop, &incr_gsi, &insert_after);
+	  standard_iv_increment_position (loop, &incr_gsi, &insert_after);
 
-      stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
-      ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
-      create_iv (stride_base, PLUS_EXPR, ivstep, NULL,
-		 loop, &incr_gsi, insert_after,
-		 &offvar, NULL);
+	  stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
+	  ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
+	  create_iv (stride_base, PLUS_EXPR, ivstep, NULL,
+		     loop, &incr_gsi, insert_after,
+		     &offvar, NULL);
 
-      stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
+	  stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
+	}
 
       running_off = offvar;
       alias_off = build_int_cst (ref_type, 0);
@@ -9743,11 +9739,23 @@ vectorizable_load (vec_info *vinfo,
       unsigned int n_groups = 0;
       for (j = 0; j < ncopies; j++)
 	{
-	  if (nloads > 1)
+	  if (nloads > 1 && !costing_p)
 	    vec_alloc (v, nloads);
 	  gimple *new_stmt = NULL;
 	  for (i = 0; i < nloads; i++)
 	    {
+	      if (costing_p)
+		{
+		  if (VECTOR_TYPE_P (ltype))
+		    vect_get_load_cost (vinfo, stmt_info, 1,
+					alignment_support_scheme, misalignment,
+					false, &inside_cost, nullptr, cost_vec,
+					cost_vec, true);
+		  else
+		    inside_cost += record_stmt_cost (cost_vec, 1, scalar_load,
+						     stmt_info, 0, vect_body);
+		  continue;
+		}
 	      tree this_off = build_int_cst (TREE_TYPE (alias_off),
 					     group_el * elsz + cst_offset);
 	      tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
@@ -9778,42 +9786,70 @@ vectorizable_load (vec_info *vinfo,
 		  group_el = 0;
 		}
 	    }
+
 	  if (nloads > 1)
 	    {
-	      tree vec_inv = build_constructor (lvectype, v);
-	      new_temp = vect_init_vector (vinfo, stmt_info,
-					   vec_inv, lvectype, gsi);
-	      new_stmt = SSA_NAME_DEF_STMT (new_temp);
-	      if (lvectype != vectype)
+	      if (costing_p)
+		inside_cost += record_stmt_cost (cost_vec, 1, vec_construct,
+						 stmt_info, 0, vect_body);
+	      else
 		{
-		  new_stmt = gimple_build_assign (make_ssa_name (vectype),
-						  VIEW_CONVERT_EXPR,
-						  build1 (VIEW_CONVERT_EXPR,
-							  vectype, new_temp));
-		  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+		  tree vec_inv = build_constructor (lvectype, v);
+		  new_temp = vect_init_vector (vinfo, stmt_info, vec_inv,
+					       lvectype, gsi);
+		  new_stmt = SSA_NAME_DEF_STMT (new_temp);
+		  if (lvectype != vectype)
+		    {
+		      new_stmt
+			= gimple_build_assign (make_ssa_name (vectype),
+					       VIEW_CONVERT_EXPR,
+					       build1 (VIEW_CONVERT_EXPR,
+						       vectype, new_temp));
+		      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
+						   gsi);
+		    }
 		}
 	    }
 
-	  if (slp)
+	  if (!costing_p)
 	    {
-	      if (slp_perm)
-		dr_chain.quick_push (gimple_assign_lhs (new_stmt));
+	      if (slp)
+		{
+		  if (slp_perm)
+		    dr_chain.quick_push (gimple_assign_lhs (new_stmt));
+		  else
+		    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
+		}
 	      else
-		SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
-	    }
-	  else
-	    {
-	      if (j == 0)
-		*vec_stmt = new_stmt;
-	      STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
+		{
+		  if (j == 0)
+		    *vec_stmt = new_stmt;
+		  STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
+		}
 	    }
 	}
       if (slp_perm)
 	{
 	  unsigned n_perms;
-	  vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf,
-					false, &n_perms);
+	  if (costing_p)
+	    {
+	      unsigned n_loads;
+	      vect_transform_slp_perm_load (vinfo, slp_node, vNULL, NULL, vf,
+					    true, &n_perms, &n_loads);
+	      inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
+					       first_stmt_info, 0, vect_body);
+	    }
+	  else
+	    vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf,
+					  false, &n_perms);
 	}
+
+      if (costing_p && dump_enabled_p ())
+	dump_printf_loc (MSG_NOTE, vect_location,
+			 "vect_model_load_cost: inside_cost = %u, "
+			 "prologue_cost = 0 .\n",
+			 inside_cost);
+
       return true;
     }
 
-- 
2.31.1


  parent reply	other threads:[~2023-06-13  2:06 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-06-13  2:03 [PATCH 0/9] vect: Move costing next to the transform for vect load Kewen Lin
2023-06-13  2:03 ` [PATCH 1/9] vect: Move vect_model_load_cost next to the transform in vectorizable_load Kewen Lin
2023-06-13  2:03 ` [PATCH 2/9] vect: Adjust vectorizable_load costing on VMAT_GATHER_SCATTER && gs_info.decl Kewen Lin
2023-06-30 11:11   ` Richard Biener
2023-07-03  2:57     ` [PATCH 2/9 v2] " Kewen.Lin
2023-06-13  2:03 ` [PATCH 3/9] vect: Adjust vectorizable_load costing on VMAT_INVARIANT Kewen Lin
2023-06-30 11:18   ` Richard Biener
2023-07-03  2:58     ` [PATCH 3/9 v2] " Kewen.Lin
2023-06-13  2:03 ` Kewen Lin [this message]
2023-07-02  8:58   ` [PATCH 4/9] vect: Adjust vectorizable_load costing on VMAT_ELEMENTWISE and VMAT_STRIDED_SLP Richard Sandiford
2023-07-03  3:19     ` Kewen.Lin
2023-07-22 15:58       ` Iain Sandoe
2023-07-24  1:50         ` Kewen.Lin
2023-06-13  2:03 ` [PATCH 5/9] vect: Adjust vectorizable_load costing on VMAT_GATHER_SCATTER Kewen Lin
2023-07-03  3:01   ` [PATCH 5/9 v2] " Kewen.Lin
2023-06-13  2:03 ` [PATCH 6/9] vect: Adjust vectorizable_load costing on VMAT_LOAD_STORE_LANES Kewen Lin
2023-06-13  2:03 ` [PATCH 7/9] vect: Adjust vectorizable_load costing on VMAT_CONTIGUOUS_REVERSE Kewen Lin
2023-06-13  2:03 ` [PATCH 8/9] vect: Adjust vectorizable_load costing on VMAT_CONTIGUOUS_PERMUTE Kewen Lin
2023-06-14  8:17   ` Hongtao Liu
2023-06-19  7:23     ` Kewen.Lin
2023-06-13  2:03 ` [PATCH 9/9] vect: Adjust vectorizable_load costing on VMAT_CONTIGUOUS Kewen Lin
2023-07-03  3:06   ` [PATCH 9/9 v2] " Kewen.Lin
2023-06-26  6:00 ` [PATCH 0/9] vect: Move costing next to the transform for vect load Kewen.Lin
2023-06-30 11:37 ` Richard Biener
2023-07-02  9:13   ` Richard Sandiford
2023-07-03  3:39   ` Kewen.Lin
2023-07-03  8:42     ` Richard Biener

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=0281a2a022869efe379130aea6e0782e4827ef61.1686573640.git.linkw@linux.ibm.com \
    --to=linkw@linux.ibm.com \
    --cc=bergner@linux.ibm.com \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=richard.guenther@gmail.com \
    --cc=richard.sandiford@arm.com \
    --cc=segher@kernel.crashing.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).