[PATCH 5/9] vect: Adjust vectorizable_load costing on VMAT_GATHER_SCATTER

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

From: Kewen Lin <linkw@linux.ibm.com>
To: gcc-patches@gcc.gnu.org
Cc: richard.guenther@gmail.com, richard.sandiford@arm.com,
	segher@kernel.crashing.org, bergner@linux.ibm.com
Subject: [PATCH 5/9] vect: Adjust vectorizable_load costing on VMAT_GATHER_SCATTER
Date: Mon, 12 Jun 2023 21:03:26 -0500	[thread overview]
Message-ID: <ea7b896e3cbc7ffc13e802a12f3ece1c625b0c4d.1686573640.git.linkw@linux.ibm.com> (raw)
In-Reply-To: <cover.1686573640.git.linkw@linux.ibm.com>

This patch adjusts the cost handling on VMAT_GATHER_SCATTER
in function vectorizable_load.  We don't call function
vect_model_load_cost for it any more.

It's mainly for gather loads with IFN or emulated gather
loads, it follows the handlings in function
vect_model_load_cost.  This patch shouldn't have any
functional changes.

gcc/ChangeLog:

	* tree-vect-stmts.cc (vectorizable_load): Adjust the cost handling on
	VMAT_GATHER_SCATTER without calling vect_model_load_cost.
	(vect_model_load_cost): Adjut the assertion on VMAT_GATHER_SCATTER,
	remove VMAT_GATHER_SCATTER related handlings and the related parameter
	gs_info.
---
 gcc/tree-vect-stmts.cc | 123 +++++++++++++++++++++++++----------------
 1 file changed, 75 insertions(+), 48 deletions(-)

diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 651dc800380..a3fd0bf879e 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -1131,11 +1131,10 @@ vect_model_load_cost (vec_info *vinfo,
 		      vect_memory_access_type memory_access_type,
 		      dr_alignment_support alignment_support_scheme,
 		      int misalignment,
-		      gather_scatter_info *gs_info,
 		      slp_tree slp_node,
 		      stmt_vector_for_cost *cost_vec)
 {
-  gcc_assert ((memory_access_type != VMAT_GATHER_SCATTER || !gs_info->decl)
+  gcc_assert (memory_access_type != VMAT_GATHER_SCATTER
 	      && memory_access_type != VMAT_INVARIANT
 	      && memory_access_type != VMAT_ELEMENTWISE
 	      && memory_access_type != VMAT_STRIDED_SLP);
@@ -1222,35 +1221,9 @@ vect_model_load_cost (vec_info *vinfo,
                          group_size);
     }
 
-  /* The loads themselves.  */
-  if (memory_access_type == VMAT_GATHER_SCATTER)
-    {
-      tree vectype = STMT_VINFO_VECTYPE (stmt_info);
-      unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
-      if (memory_access_type == VMAT_GATHER_SCATTER
-	  && gs_info->ifn == IFN_LAST && !gs_info->decl)
-	/* For emulated gathers N offset vector element extracts
-	   (we assume the scalar scaling and ptr + offset add is consumed by
-	   the load).  */
-	inside_cost += record_stmt_cost (cost_vec, ncopies * assumed_nunits,
-					 vec_to_scalar, stmt_info, 0,
-					 vect_body);
-      /* N scalar loads plus gathering them into a vector.  */
-      inside_cost += record_stmt_cost (cost_vec,
-				       ncopies * assumed_nunits,
-				       scalar_load, stmt_info, 0, vect_body);
-    }
-  else
-    vect_get_load_cost (vinfo, stmt_info, ncopies,
-			alignment_support_scheme, misalignment, first_stmt_p,
-			&inside_cost, &prologue_cost, 
-			cost_vec, cost_vec, true);
-
-  if (memory_access_type == VMAT_GATHER_SCATTER
-      && gs_info->ifn == IFN_LAST
-      && !gs_info->decl)
-    inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
-				     stmt_info, 0, vect_body);
+  vect_get_load_cost (vinfo, stmt_info, ncopies, alignment_support_scheme,
+		      misalignment, first_stmt_p, &inside_cost, &prologue_cost,
+		      cost_vec, cost_vec, true);
 
   if (dump_enabled_p ())
     dump_printf_loc (MSG_NOTE, vect_location,
@@ -10137,6 +10110,7 @@ vectorizable_load (vec_info *vinfo,
     }
   tree vec_mask = NULL_TREE;
   poly_uint64 group_elt = 0;
+  unsigned int inside_cost = 0;
   for (j = 0; j < ncopies; j++)
     {
       /* 1. Create the vector or array pointer update chain.  */
@@ -10268,23 +10242,25 @@ vectorizable_load (vec_info *vinfo,
 	  /* Record that VEC_ARRAY is now dead.  */
 	  vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
 	}
-      else if (!costing_p)
+      else
 	{
 	  for (i = 0; i < vec_num; i++)
 	    {
 	      tree final_mask = NULL_TREE;
-	      if (loop_masks
-		  && memory_access_type != VMAT_INVARIANT)
-		final_mask = vect_get_loop_mask (gsi, loop_masks,
-						 vec_num * ncopies,
-						 vectype, vec_num * j + i);
-	      if (vec_mask)
-		final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
-					       final_mask, vec_mask, gsi);
-
-	      if (i > 0 && !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
-		dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
-					       gsi, stmt_info, bump);
+	      if (!costing_p)
+		{
+		  if (loop_masks && memory_access_type != VMAT_INVARIANT)
+		    final_mask
+		      = vect_get_loop_mask (gsi, loop_masks, vec_num * ncopies,
+					    vectype, vec_num * j + i);
+		  if (vec_mask)
+		    final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
+						   final_mask, vec_mask, gsi);
+
+		  if (i > 0 && !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+		    dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
+						   gsi, stmt_info, bump);
+		}
 
 	      /* 2. Create the vector-load in the loop.  */
 	      switch (alignment_support_scheme)
@@ -10298,6 +10274,16 @@ vectorizable_load (vec_info *vinfo,
 		    if (memory_access_type == VMAT_GATHER_SCATTER
 			&& gs_info.ifn != IFN_LAST)
 		      {
+			if (costing_p)
+			  {
+			    unsigned int cnunits
+			      = vect_nunits_for_cost (vectype);
+			    inside_cost
+			      = record_stmt_cost (cost_vec, cnunits,
+						  scalar_load, stmt_info, 0,
+						  vect_body);
+			    goto vec_num_loop_costing_end;
+			  }
 			if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
 			  vec_offset = vec_offsets[vec_num * j + i];
 			tree zero = build_zero_cst (vectype);
@@ -10322,6 +10308,25 @@ vectorizable_load (vec_info *vinfo,
 			gcc_assert (!final_mask);
 			unsigned HOST_WIDE_INT const_nunits
 			  = nunits.to_constant ();
+			if (costing_p)
+			  {
+			    /* For emulated gathers N offset vector element
+			       offset add is consumed by the load).  */
+			    inside_cost
+			      = record_stmt_cost (cost_vec, const_nunits,
+						  vec_to_scalar, stmt_info, 0,
+						  vect_body);
+			    /* N scalar loads plus gathering them into a
+			       vector.  */
+			    inside_cost
+			      = record_stmt_cost (cost_vec, const_nunits,
+						  scalar_load, stmt_info, 0,
+						  vect_body);
+			    inside_cost
+			      = record_stmt_cost (cost_vec, 1, vec_construct,
+						  stmt_info, 0, vect_body);
+			    goto vec_num_loop_costing_end;
+			  }
 			unsigned HOST_WIDE_INT const_offset_nunits
 			  = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype)
 			      .to_constant ();
@@ -10374,6 +10379,9 @@ vectorizable_load (vec_info *vinfo,
 			break;
 		      }
 
+		    if (costing_p)
+		      goto vec_num_loop_costing_end;
+
 		    align =
 		      known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
 		    if (alignment_support_scheme == dr_aligned)
@@ -10544,6 +10552,8 @@ vectorizable_load (vec_info *vinfo,
 		  }
 		case dr_explicit_realign:
 		  {
+		    if (costing_p)
+		      goto vec_num_loop_costing_end;
 		    tree ptr, bump;
 
 		    tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
@@ -10606,6 +10616,8 @@ vectorizable_load (vec_info *vinfo,
 		  }
 		case dr_explicit_realign_optimized:
 		  {
+		    if (costing_p)
+		      goto vec_num_loop_costing_end;
 		    if (TREE_CODE (dataref_ptr) == SSA_NAME)
 		      new_temp = copy_ssa_name (dataref_ptr);
 		    else
@@ -10702,10 +10714,14 @@ vectorizable_load (vec_info *vinfo,
 						 gsi, stmt_info, bump);
 		  group_elt = 0;
 		}
+vec_num_loop_costing_end:
+	      ;
 	    }
 	  /* Bump the vector pointer to account for a gap or for excess
 	     elements loaded for a permuted SLP load.  */
-	  if (maybe_ne (group_gap_adj, 0U) && slp_perm)
+	  if (!costing_p
+	      && maybe_ne (group_gap_adj, 0U)
+	      && slp_perm)
 	    {
 	      poly_wide_int bump_val
 		= (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
@@ -10754,9 +10770,20 @@ vectorizable_load (vec_info *vinfo,
     *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
 
   if (costing_p)
-    vect_model_load_cost (vinfo, stmt_info, ncopies, vf, memory_access_type,
-			  alignment_support_scheme, misalignment, &gs_info,
-			  slp_node, cost_vec);
+    {
+      if (memory_access_type == VMAT_GATHER_SCATTER)
+	{
+	  if (dump_enabled_p ())
+	    dump_printf_loc (MSG_NOTE, vect_location,
+			     "vect_model_load_cost: inside_cost = %u, "
+			     "prologue_cost = 0 .\n",
+			     inside_cost);
+	}
+      else
+	vect_model_load_cost (vinfo, stmt_info, ncopies, vf, memory_access_type,
+			      alignment_support_scheme, misalignment, slp_node,
+			      cost_vec);
+    }
 
   return true;
 }
-- 
2.31.1

next prev parent reply	other threads:[~2023-06-13  2:06 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-06-13  2:03 [PATCH 0/9] vect: Move costing next to the transform for vect load Kewen Lin
2023-06-13  2:03 ` [PATCH 1/9] vect: Move vect_model_load_cost next to the transform in vectorizable_load Kewen Lin
2023-06-13  2:03 ` [PATCH 2/9] vect: Adjust vectorizable_load costing on VMAT_GATHER_SCATTER && gs_info.decl Kewen Lin
2023-06-30 11:11   ` Richard Biener
2023-07-03  2:57     ` [PATCH 2/9 v2] " Kewen.Lin
2023-06-13  2:03 ` [PATCH 3/9] vect: Adjust vectorizable_load costing on VMAT_INVARIANT Kewen Lin
2023-06-30 11:18   ` Richard Biener
2023-07-03  2:58     ` [PATCH 3/9 v2] " Kewen.Lin
2023-06-13  2:03 ` [PATCH 4/9] vect: Adjust vectorizable_load costing on VMAT_ELEMENTWISE and VMAT_STRIDED_SLP Kewen Lin
2023-07-02  8:58   ` Richard Sandiford
2023-07-03  3:19     ` Kewen.Lin
2023-07-22 15:58       ` Iain Sandoe
2023-07-24  1:50         ` Kewen.Lin
2023-06-13  2:03 ` Kewen Lin [this message]
2023-07-03  3:01   ` [PATCH 5/9 v2] vect: Adjust vectorizable_load costing on VMAT_GATHER_SCATTER Kewen.Lin
2023-06-13  2:03 ` [PATCH 6/9] vect: Adjust vectorizable_load costing on VMAT_LOAD_STORE_LANES Kewen Lin
2023-06-13  2:03 ` [PATCH 7/9] vect: Adjust vectorizable_load costing on VMAT_CONTIGUOUS_REVERSE Kewen Lin
2023-06-13  2:03 ` [PATCH 8/9] vect: Adjust vectorizable_load costing on VMAT_CONTIGUOUS_PERMUTE Kewen Lin
2023-06-14  8:17   ` Hongtao Liu
2023-06-19  7:23     ` Kewen.Lin
2023-06-13  2:03 ` [PATCH 9/9] vect: Adjust vectorizable_load costing on VMAT_CONTIGUOUS Kewen Lin
2023-07-03  3:06   ` [PATCH 9/9 v2] " Kewen.Lin
2023-06-26  6:00 ` [PATCH 0/9] vect: Move costing next to the transform for vect load Kewen.Lin
2023-06-30 11:37 ` Richard Biener
2023-07-02  9:13   ` Richard Sandiford
2023-07-03  3:39   ` Kewen.Lin
2023-07-03  8:42     ` Richard Biener

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ea7b896e3cbc7ffc13e802a12f3ece1c625b0c4d.1686573640.git.linkw@linux.ibm.com \
    --to=linkw@linux.ibm.com \
    --cc=bergner@linux.ibm.com \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=richard.guenther@gmail.com \
    --cc=richard.sandiford@arm.com \
    --cc=segher@kernel.crashing.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).