public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r14-4744] Refactor x86 vectorized gather path
@ 2023-10-19 12:28 Richard Biener
  0 siblings, 0 replies; only message in thread
From: Richard Biener @ 2023-10-19 12:28 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:b068886dcd7eb4a88dd82643a7a6176215471889

commit r14-4744-gb068886dcd7eb4a88dd82643a7a6176215471889
Author: Richard Biener <rguenther@suse.de>
Date:   Wed Oct 18 14:39:21 2023 +0200

    Refactor x86 vectorized gather path
    
    The following moves the builtin decl gather vectorization path along
    the internal function and emulated gather vectorization paths,
    simplifying the existing function down to generating the call and
    required conversions to the actual argument types.  This thereby
    exposes the unique support of two times larger number of offset
    or data vector lanes.  It also makes the code path handle SLP
    in principle (but SLP build needs adjustments for this, patch coming).
    
            * tree-vect-stmts.cc (vect_build_gather_load_calls): Rename
            to ...
            (vect_build_one_gather_load_call): ... this.  Refactor,
            inline widening/narrowing support ...
            (vectorizable_load): ... here, do gather vectorization
            with builtin decls along other gather vectorization.

Diff:
---
 gcc/tree-vect-stmts.cc | 406 ++++++++++++++++++++++---------------------------
 1 file changed, 179 insertions(+), 227 deletions(-)

diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index e5ff44c25f10..ee5f56bbbdaf 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -2595,268 +2595,99 @@ vect_build_zero_merge_argument (vec_info *vinfo,
 /* Build a gather load call while vectorizing STMT_INFO.  Insert new
    instructions before GSI and add them to VEC_STMT.  GS_INFO describes
    the gather load operation.  If the load is conditional, MASK is the
-   unvectorized condition and MASK_DT is its definition type, otherwise
-   MASK is null.  */
+   vectorized condition, otherwise MASK is null.  PTR is the base
+   pointer and OFFSET is the vectorized offset.  */
 
-static void
-vect_build_gather_load_calls (vec_info *vinfo, stmt_vec_info stmt_info,
-			      gimple_stmt_iterator *gsi,
-			      gimple **vec_stmt,
-			      gather_scatter_info *gs_info,
-			      tree mask,
-			      stmt_vector_for_cost *cost_vec)
+static gimple *
+vect_build_one_gather_load_call (vec_info *vinfo, stmt_vec_info stmt_info,
+				 gimple_stmt_iterator *gsi,
+				 gather_scatter_info *gs_info,
+				 tree ptr, tree offset, tree mask)
 {
-  loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
-  class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
-  poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
-  int ncopies = vect_get_num_copies (loop_vinfo, vectype);
-  edge pe = loop_preheader_edge (loop);
-  enum { NARROW, NONE, WIDEN } modifier;
-  poly_uint64 gather_off_nunits
-    = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
-
-  /* FIXME: Keep the previous costing way in vect_model_load_cost by costing
-     N scalar loads, but it should be tweaked to use target specific costs
-     on related gather load calls.  */
-  if (cost_vec)
-    {
-      unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
-      unsigned int inside_cost;
-      inside_cost = record_stmt_cost (cost_vec, ncopies * assumed_nunits,
-				      scalar_load, stmt_info, 0, vect_body);
-      if (dump_enabled_p ())
-	dump_printf_loc (MSG_NOTE, vect_location,
-			 "vect_model_load_cost: inside_cost = %d, "
-			 "prologue_cost = 0 .\n",
-			 inside_cost);
-      return;
-    }
-
   tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
   tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
   tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
-  tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
+  /* ptrtype */ arglist = TREE_CHAIN (arglist);
   tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
   tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
   tree scaletype = TREE_VALUE (arglist);
-  tree real_masktype = masktype;
+  tree var;
   gcc_checking_assert (types_compatible_p (srctype, rettype)
 		       && (!mask
 			   || TREE_CODE (masktype) == INTEGER_TYPE
 			   || types_compatible_p (srctype, masktype)));
-  if (mask)
-    masktype = truth_type_for (srctype);
-
-  tree mask_halftype = masktype;
-  tree perm_mask = NULL_TREE;
-  tree mask_perm_mask = NULL_TREE;
-  if (known_eq (nunits, gather_off_nunits))
-    modifier = NONE;
-  else if (known_eq (nunits * 2, gather_off_nunits))
-    {
-      modifier = WIDEN;
 
-      /* Currently widening gathers and scatters are only supported for
-	 fixed-length vectors.  */
-      int count = gather_off_nunits.to_constant ();
-      vec_perm_builder sel (count, count, 1);
-      for (int i = 0; i < count; ++i)
-	sel.quick_push (i | (count / 2));
-
-      vec_perm_indices indices (sel, 1, count);
-      perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
-					      indices);
-    }
-  else if (known_eq (nunits, gather_off_nunits * 2))
+  tree op = offset;
+  if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
     {
-      modifier = NARROW;
-
-      /* Currently narrowing gathers and scatters are only supported for
-	 fixed-length vectors.  */
-      int count = nunits.to_constant ();
-      vec_perm_builder sel (count, count, 1);
-      sel.quick_grow (count);
-      for (int i = 0; i < count; ++i)
-	sel[i] = i < count / 2 ? i : i + count / 2;
-      vec_perm_indices indices (sel, 2, count);
-      perm_mask = vect_gen_perm_mask_checked (vectype, indices);
-
-      ncopies *= 2;
-
-      if (mask && VECTOR_TYPE_P (real_masktype))
-	{
-	  for (int i = 0; i < count; ++i)
-	    sel[i] = i | (count / 2);
-	  indices.new_vector (sel, 2, count);
-	  mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
-	}
-      else if (mask)
-	mask_halftype = truth_type_for (gs_info->offset_vectype);
-    }
-  else
-    gcc_unreachable ();
-
-  tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
-  tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
-
-  tree ptr = fold_convert (ptrtype, gs_info->base);
-  if (!is_gimple_min_invariant (ptr))
-    {
-      gimple_seq seq;
-      ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
-      basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
-      gcc_assert (!new_bb);
+      gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
+			    TYPE_VECTOR_SUBPARTS (idxtype)));
+      var = vect_get_new_ssa_name (idxtype, vect_simple_var);
+      op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
+      gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
+      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+      op = var;
     }
 
-  tree scale = build_int_cst (scaletype, gs_info->scale);
-
-  tree vec_oprnd0 = NULL_TREE;
-  tree vec_mask = NULL_TREE;
   tree src_op = NULL_TREE;
   tree mask_op = NULL_TREE;
-  tree prev_res = NULL_TREE;
-
-  if (!mask)
-    {
-      src_op = vect_build_zero_merge_argument (vinfo, stmt_info, rettype);
-      mask_op = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
-    }
-
-  auto_vec<tree> vec_oprnds0;
-  auto_vec<tree> vec_masks;
-  vect_get_vec_defs_for_operand (vinfo, stmt_info,
-				 modifier == WIDEN ? ncopies / 2 : ncopies,
-				 gs_info->offset, &vec_oprnds0);
   if (mask)
-    vect_get_vec_defs_for_operand (vinfo, stmt_info,
-				   modifier == NARROW ? ncopies / 2 : ncopies,
-				   mask, &vec_masks, masktype);
-  for (int j = 0; j < ncopies; ++j)
     {
-      tree op, var;
-      if (modifier == WIDEN && (j & 1))
-	op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
-				   perm_mask, stmt_info, gsi);
-      else
-	op = vec_oprnd0 = vec_oprnds0[modifier == WIDEN ? j / 2 : j];
-
-      if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
-	{
-	  gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
-				TYPE_VECTOR_SUBPARTS (idxtype)));
-	  var = vect_get_new_ssa_name (idxtype, vect_simple_var);
-	  op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
-	  gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
-	  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
-	  op = var;
-	}
-
-      if (mask)
+      if (!useless_type_conversion_p (masktype, TREE_TYPE (mask)))
 	{
-	  if (mask_perm_mask && (j & 1))
-	    mask_op = permute_vec_elements (vinfo, mask_op, mask_op,
-					    mask_perm_mask, stmt_info, gsi);
-	  else
-	    {
-	      if (modifier == NARROW)
-		{
-		  if ((j & 1) == 0)
-		    vec_mask = vec_masks[j / 2];
-		}
-	      else
-		vec_mask = vec_masks[j];
-
-	      mask_op = vec_mask;
-	      if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
-		{
-		  poly_uint64 sub1 = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op));
-		  poly_uint64 sub2 = TYPE_VECTOR_SUBPARTS (masktype);
-		  gcc_assert (known_eq (sub1, sub2));
-		  var = vect_get_new_ssa_name (masktype, vect_simple_var);
-		  mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
-		  gassign *new_stmt
-		    = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
-		  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
-		  mask_op = var;
-		}
-	    }
-	  if (modifier == NARROW && !VECTOR_TYPE_P (real_masktype))
-	    {
-	      var = vect_get_new_ssa_name (mask_halftype, vect_simple_var);
-	      gassign *new_stmt
-		= gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
-						    : VEC_UNPACK_LO_EXPR,
-				       mask_op);
-	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
-	      mask_op = var;
-	    }
-	  src_op = mask_op;
-	}
-
-      tree mask_arg = mask_op;
-      if (masktype != real_masktype)
-	{
-	  tree utype, optype = TREE_TYPE (mask_op);
-	  if (VECTOR_TYPE_P (real_masktype)
-	      || TYPE_MODE (real_masktype) == TYPE_MODE (optype))
-	    utype = real_masktype;
+	  tree utype, optype = TREE_TYPE (mask);
+	  if (VECTOR_TYPE_P (masktype)
+	      || TYPE_MODE (masktype) == TYPE_MODE (optype))
+	    utype = masktype;
 	  else
 	    utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
 	  var = vect_get_new_ssa_name (utype, vect_scalar_var);
-	  mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_op);
+	  tree mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask);
 	  gassign *new_stmt
-	    = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
+	      = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
 	  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
 	  mask_arg = var;
-	  if (!useless_type_conversion_p (real_masktype, utype))
+	  if (!useless_type_conversion_p (masktype, utype))
 	    {
 	      gcc_assert (TYPE_PRECISION (utype)
-			  <= TYPE_PRECISION (real_masktype));
-	      var = vect_get_new_ssa_name (real_masktype, vect_scalar_var);
+			  <= TYPE_PRECISION (masktype));
+	      var = vect_get_new_ssa_name (masktype, vect_scalar_var);
 	      new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
 	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
 	      mask_arg = var;
 	    }
 	  src_op = build_zero_cst (srctype);
-	}
-      gimple *new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
-					    mask_arg, scale);
-
-      if (!useless_type_conversion_p (vectype, rettype))
-	{
-	  gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
-				TYPE_VECTOR_SUBPARTS (rettype)));
-	  op = vect_get_new_ssa_name (rettype, vect_simple_var);
-	  gimple_call_set_lhs (new_stmt, op);
-	  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
-	  var = make_ssa_name (vec_dest);
-	  op = build1 (VIEW_CONVERT_EXPR, vectype, op);
-	  new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
-	  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+	  mask_op = mask_arg;
 	}
       else
 	{
-	  var = make_ssa_name (vec_dest, new_stmt);
-	  gimple_call_set_lhs (new_stmt, var);
-	  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+	  src_op = mask;
+	  mask_op = mask;
 	}
+    }
+  else
+    {
+      src_op = vect_build_zero_merge_argument (vinfo, stmt_info, rettype);
+      mask_op = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
+    }
 
-      if (modifier == NARROW)
-	{
-	  if ((j & 1) == 0)
-	    {
-	      prev_res = var;
-	      continue;
-	    }
-	  var = permute_vec_elements (vinfo, prev_res, var, perm_mask,
-				      stmt_info, gsi);
-	  new_stmt = SSA_NAME_DEF_STMT (var);
-	}
+  tree scale = build_int_cst (scaletype, gs_info->scale);
+  gimple *new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
+					mask_op, scale);
 
-      STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
+  if (!useless_type_conversion_p (vectype, rettype))
+    {
+      gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
+			    TYPE_VECTOR_SUBPARTS (rettype)));
+      op = vect_get_new_ssa_name (rettype, vect_simple_var);
+      gimple_call_set_lhs (new_stmt, op);
+      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+      op = build1 (VIEW_CONVERT_EXPR, vectype, op);
+      new_stmt = gimple_build_assign (NULL_TREE, VIEW_CONVERT_EXPR, op);
     }
-  *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
+
+  return new_stmt;
 }
 
 /* Build a scatter store call while vectorizing STMT_INFO.  Insert new
@@ -10112,13 +9943,6 @@ vectorizable_load (vec_info *vinfo,
   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
   ensure_base_align (dr_info);
 
-  if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
-    {
-      vect_build_gather_load_calls (vinfo, stmt_info, gsi, vec_stmt, &gs_info,
-				    mask, cost_vec);
-      return true;
-    }
-
   if (memory_access_type == VMAT_INVARIANT)
     {
       gcc_assert (!grouped_load && !mask && !bb_vinfo);
@@ -11016,6 +10840,134 @@ vectorizable_load (vec_info *vinfo,
 		  new_stmt = call;
 		  data_ref = NULL_TREE;
 		}
+	      else if (gs_info.decl)
+		{
+		  /* The builtin decls path for gather is legacy, x86 only.  */
+		  gcc_assert (!final_len && nunits.is_constant ());
+		  if (costing_p)
+		    {
+		      unsigned int cnunits = vect_nunits_for_cost (vectype);
+		      inside_cost
+			= record_stmt_cost (cost_vec, cnunits, scalar_load,
+					    stmt_info, 0, vect_body);
+		      continue;
+		    }
+		  poly_uint64 offset_nunits
+		    = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
+		  if (known_eq (nunits, offset_nunits))
+		    {
+		      new_stmt = vect_build_one_gather_load_call
+				   (vinfo, stmt_info, gsi, &gs_info,
+				    dataref_ptr, vec_offsets[vec_num * j + i],
+				    final_mask);
+		      data_ref = NULL_TREE;
+		    }
+		  else if (known_eq (nunits, offset_nunits * 2))
+		    {
+		      /* We have a offset vector with half the number of
+			 lanes but the builtins will produce full vectype
+			 data with just the lower lanes filled.  */
+		      new_stmt = vect_build_one_gather_load_call
+			  (vinfo, stmt_info, gsi, &gs_info,
+			   dataref_ptr, vec_offsets[2 * vec_num * j + 2 * i],
+			   final_mask);
+		      tree low = make_ssa_name (vectype);
+		      gimple_set_lhs (new_stmt, low);
+		      vect_finish_stmt_generation (vinfo, stmt_info,
+						   new_stmt, gsi);
+
+		      /* now put upper half of final_mask in final_mask low. */
+		      if (final_mask
+			  && !SCALAR_INT_MODE_P
+				(TYPE_MODE (TREE_TYPE (final_mask))))
+			{
+			  int count = nunits.to_constant ();
+			  vec_perm_builder sel (count, count, 1);
+			  sel.quick_grow (count);
+			  for (int i = 0; i < count; ++i)
+			    sel[i] = i | (count / 2);
+			  vec_perm_indices indices (sel, 2, count);
+			  tree perm_mask = vect_gen_perm_mask_checked
+					     (TREE_TYPE (final_mask), indices);
+			  new_stmt = gimple_build_assign (NULL_TREE,
+							  VEC_PERM_EXPR,
+							  final_mask,
+							  final_mask,
+							  perm_mask);
+			  final_mask = make_ssa_name (TREE_TYPE (final_mask));
+			  gimple_set_lhs (new_stmt, final_mask);
+			  vect_finish_stmt_generation (vinfo, stmt_info,
+						       new_stmt, gsi);
+			}
+		      else if (final_mask)
+			{
+			  new_stmt = gimple_build_assign (NULL_TREE,
+							  VEC_UNPACK_HI_EXPR,
+							  final_mask);
+			  final_mask = make_ssa_name
+			    (truth_type_for (gs_info.offset_vectype));
+			  gimple_set_lhs (new_stmt, final_mask);
+			  vect_finish_stmt_generation (vinfo, stmt_info,
+						       new_stmt, gsi);
+			}
+
+		      new_stmt = vect_build_one_gather_load_call
+				   (vinfo, stmt_info, gsi, &gs_info,
+				    dataref_ptr,
+				    vec_offsets[2 * vec_num * j + 2 * i + 1],
+				    final_mask);
+		      tree high = make_ssa_name (vectype);
+		      gimple_set_lhs (new_stmt, high);
+		      vect_finish_stmt_generation (vinfo, stmt_info,
+						   new_stmt, gsi);
+
+		      /* compose low + high.  */
+		      int count = nunits.to_constant ();
+		      vec_perm_builder sel (count, count, 1);
+		      sel.quick_grow (count);
+		      for (int i = 0; i < count; ++i)
+			sel[i] = i < count / 2 ? i : i + count / 2;
+		      vec_perm_indices indices (sel, 2, count);
+		      tree perm_mask
+			= vect_gen_perm_mask_checked (vectype, indices);
+		      new_stmt = gimple_build_assign (NULL_TREE,
+						      VEC_PERM_EXPR,
+						      low, high, perm_mask);
+		      data_ref = NULL_TREE;
+		    }
+		  else if (known_eq (nunits * 2, offset_nunits))
+		    {
+		      /* We have a offset vector with double the number of
+			 lanes.  Select the low/high part accordingly.  */
+		      vec_offset = vec_offsets[(vec_num * j + i) / 2];
+		      if ((vec_num * j + i) & 1)
+			{
+			  int count = offset_nunits.to_constant ();
+			  vec_perm_builder sel (count, count, 1);
+			  sel.quick_grow (count);
+			  for (int i = 0; i < count; ++i)
+			    sel[i] = i | (count / 2);
+			  vec_perm_indices indices (sel, 2, count);
+			  tree perm_mask = vect_gen_perm_mask_checked
+					     (TREE_TYPE (vec_offset), indices);
+			  new_stmt = gimple_build_assign (NULL_TREE,
+							  VEC_PERM_EXPR,
+							  vec_offset,
+							  vec_offset,
+							  perm_mask);
+			  vec_offset = make_ssa_name (TREE_TYPE (vec_offset));
+			  gimple_set_lhs (new_stmt, vec_offset);
+			  vect_finish_stmt_generation (vinfo, stmt_info,
+						       new_stmt, gsi);
+			}
+		      new_stmt = vect_build_one_gather_load_call
+				   (vinfo, stmt_info, gsi, &gs_info,
+				    dataref_ptr, vec_offset, final_mask);
+		      data_ref = NULL_TREE;
+		    }
+		  else
+		    gcc_unreachable ();
+		}
 	      else
 		{
 		  /* Emulated gather-scatter.  */

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-10-19 12:28 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-10-19 12:28 [gcc r14-4744] Refactor x86 vectorized gather path Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).