public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] Add SLP_TREE_MEMORY_ACCESS_TYPE
@ 2024-06-06 14:01 Richard Biener
  2024-06-11  7:32 ` Richard Sandiford
  0 siblings, 1 reply; 2+ messages in thread
From: Richard Biener @ 2024-06-06 14:01 UTC (permalink / raw)
  To: gcc-patches; +Cc: richard.sandiford

It turns out target costing code looks at STMT_VINFO_MEMORY_ACCESS_TYPE
to identify operations from (emulated) gathers for example.  This
doesn't work for SLP loads since we do not set STMT_VINFO_MEMORY_ACCESS_TYPE
there as the vectorization strathegy might differ between different
stmt uses.  It seems we got away with setting it for stores though.
The following adds a memory_access_type field to slp_tree and sets it
from load and store vectorization code.  All the costing doesn't record
the SLP node (that was only done selectively for some corner case).  The
costing is really in need of a big overhaul, the following just massages
the two relevant ops to fix gcc.dg/target/pr88531-2[bc].c FAILs when
switching on SLP for non-grouped stores.  In particular currently
we either have a SLP node or a stmt_info in the cost hook but not both.

So the following is a hack(?).  Other targets look possibly affected as
well.  I do want to postpone rewriting all of the costing to after
all-SLP.

Any comments?

	* tree-vectorizer.h (_slp_tree::memory_access_type): Add.
	(SLP_TREE_MEMORY_ACCESS_TYPE): New.
	(record_stmt_cost): Add another overload.
	* tree-vect-slp.cc (_slp_tree::_slp_tree): Initialize
	memory_access_type.
	* tree-vect-stmts.cc (vectorizable_store): Set
	SLP_TREE_MEMORY_ACCESS_TYPE.
	(vectorizable_load): Likewise.  Also record the SLP node
	when costing emulated gather offset decompose and vector
	composition.
	* config/i386/i386.cc (ix86_vector_costs::add_stmt_cost): Also
	recognize SLP emulated gather/scatter.
---
 gcc/config/i386/i386.cc |  22 ++++++---
 gcc/tree-vect-slp.cc    |   1 +
 gcc/tree-vect-stmts.cc  |  16 +++++--
 gcc/tree-vectorizer.h   | 102 ++++++++++++++++++++++++----------------
 4 files changed, 91 insertions(+), 50 deletions(-)

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 4126ab24a79..32ecf31d8d1 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -25150,13 +25150,21 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
      (AGU and load ports).  Try to account for this by scaling the
      construction cost by the number of elements involved.  */
   if ((kind == vec_construct || kind == vec_to_scalar)
-      && stmt_info
-      && (STMT_VINFO_TYPE (stmt_info) == load_vec_info_type
-	  || STMT_VINFO_TYPE (stmt_info) == store_vec_info_type)
-      && ((STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_ELEMENTWISE
-	   && (TREE_CODE (DR_STEP (STMT_VINFO_DATA_REF (stmt_info)))
-	       != INTEGER_CST))
-	  || STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_GATHER_SCATTER))
+      && ((stmt_info
+	   && (STMT_VINFO_TYPE (stmt_info) == load_vec_info_type
+	       || STMT_VINFO_TYPE (stmt_info) == store_vec_info_type)
+	   && ((STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_ELEMENTWISE
+		&& (TREE_CODE (DR_STEP (STMT_VINFO_DATA_REF (stmt_info)))
+		    != INTEGER_CST))
+	       || (STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info)
+		   == VMAT_GATHER_SCATTER)))
+	  || (node
+	      && ((SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_ELEMENTWISE
+		  && (TREE_CODE (DR_STEP (STMT_VINFO_DATA_REF
+					    (SLP_TREE_REPRESENTATIVE (node))))
+		      != INTEGER_CST))
+		  || (SLP_TREE_MEMORY_ACCESS_TYPE (node)
+		      == VMAT_GATHER_SCATTER)))))
     {
       stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
       stmt_cost *= (TYPE_VECTOR_SUBPARTS (vectype) + 1);
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index e1e47b786c2..c359e8a0bbc 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -122,6 +122,7 @@ _slp_tree::_slp_tree ()
   SLP_TREE_CODE (this) = ERROR_MARK;
   SLP_TREE_VECTYPE (this) = NULL_TREE;
   SLP_TREE_REPRESENTATIVE (this) = NULL;
+  SLP_TREE_MEMORY_ACCESS_TYPE (this) = VMAT_INVARIANT;
   SLP_TREE_REF_COUNT (this) = 1;
   this->failed = NULL;
   this->max_nunits = 1;
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index bd7dd149d11..8049c458136 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -8316,6 +8316,8 @@ vectorizable_store (vec_info *vinfo,
   if (costing_p) /* transformation not required.  */
     {
       STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
+      if (slp_node)
+	SLP_TREE_MEMORY_ACCESS_TYPE (slp_node) = memory_access_type;
 
       if (loop_vinfo
 	  && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
@@ -8356,7 +8358,10 @@ vectorizable_store (vec_info *vinfo,
 	  && first_stmt_info != stmt_info)
 	return true;
     }
-  gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
+  if (slp_node)
+    gcc_assert (memory_access_type == SLP_TREE_MEMORY_ACCESS_TYPE (stmt_info));
+  else
+    gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
 
   /* Transform.  */
 
@@ -10144,6 +10149,8 @@ vectorizable_load (vec_info *vinfo,
 
       if (!slp)
 	STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
+      else
+	SLP_TREE_MEMORY_ACCESS_TYPE (slp_node) = memory_access_type;
 
       if (loop_vinfo
 	  && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
@@ -10168,6 +10175,9 @@ vectorizable_load (vec_info *vinfo,
   if (!slp)
     gcc_assert (memory_access_type
 		== STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
+  else
+    gcc_assert (memory_access_type
+		== SLP_TREE_MEMORY_ACCESS_TYPE (slp_node));
 
   if (dump_enabled_p () && !costing_p)
     dump_printf_loc (MSG_NOTE, vect_location,
@@ -11235,7 +11245,7 @@ vectorizable_load (vec_info *vinfo,
 			 offset add is consumed by the load).  */
 		      inside_cost = record_stmt_cost (cost_vec, const_nunits,
 						      vec_to_scalar, stmt_info,
-						      0, vect_body);
+						      slp_node, 0, vect_body);
 		      /* N scalar loads plus gathering them into a
 			 vector.  */
 		      inside_cost
@@ -11243,7 +11253,7 @@ vectorizable_load (vec_info *vinfo,
 					    stmt_info, 0, vect_body);
 		      inside_cost
 			= record_stmt_cost (cost_vec, 1, vec_construct,
-					    stmt_info, 0, vect_body);
+					    stmt_info, slp_node, 0, vect_body);
 		      continue;
 		    }
 		  unsigned HOST_WIDE_INT const_offset_nunits
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 97ec9c341e7..121ce7ad3cf 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -161,6 +161,46 @@ struct vect_scalar_ops_slice_hash : typed_noop_remove<vect_scalar_ops_slice>
   static bool equal (const value_type &, const compare_type &);
 };
 
+/* Describes how we're going to vectorize an individual load or store,
+   or a group of loads or stores.  */
+enum vect_memory_access_type {
+  /* An access to an invariant address.  This is used only for loads.  */
+  VMAT_INVARIANT,
+
+  /* A simple contiguous access.  */
+  VMAT_CONTIGUOUS,
+
+  /* A contiguous access that goes down in memory rather than up,
+     with no additional permutation.  This is used only for stores
+     of invariants.  */
+  VMAT_CONTIGUOUS_DOWN,
+
+  /* A simple contiguous access in which the elements need to be permuted
+     after loading or before storing.  Only used for loop vectorization;
+     SLP uses separate permutes.  */
+  VMAT_CONTIGUOUS_PERMUTE,
+
+  /* A simple contiguous access in which the elements need to be reversed
+     after loading or before storing.  */
+  VMAT_CONTIGUOUS_REVERSE,
+
+  /* An access that uses IFN_LOAD_LANES or IFN_STORE_LANES.  */
+  VMAT_LOAD_STORE_LANES,
+
+  /* An access in which each scalar element is loaded or stored
+     individually.  */
+  VMAT_ELEMENTWISE,
+
+  /* A hybrid of VMAT_CONTIGUOUS and VMAT_ELEMENTWISE, used for grouped
+     SLP accesses.  Each unrolled iteration uses a contiguous load
+     or store for the whole group, but the groups from separate iterations
+     are combined in the same way as for VMAT_ELEMENTWISE.  */
+  VMAT_STRIDED_SLP,
+
+  /* The access uses gather loads or scatter stores.  */
+  VMAT_GATHER_SCATTER
+};
+
 /************************************************************************
   SLP
  ************************************************************************/
@@ -225,6 +265,10 @@ struct _slp_tree {
 
   int vertex;
 
+  /* Classifies how the load or store is going to be implemented
+     for loop vectorization.  */
+  vect_memory_access_type memory_access_type;
+
   /* If not NULL this is a cached failed SLP discovery attempt with
      the lanes that failed during SLP discovery as 'false'.  This is
      a copy of the matches array.  */
@@ -312,6 +356,7 @@ public:
 #define SLP_TREE_REPRESENTATIVE(S)		 (S)->representative
 #define SLP_TREE_LANES(S)			 (S)->lanes
 #define SLP_TREE_CODE(S)			 (S)->code
+#define SLP_TREE_MEMORY_ACCESS_TYPE(S)		 (S)->memory_access_type
 
 enum vect_partial_vector_style {
     vect_partial_vectors_none,
@@ -1193,46 +1238,6 @@ enum vec_load_store_type {
   VLS_STORE_INVARIANT
 };
 
-/* Describes how we're going to vectorize an individual load or store,
-   or a group of loads or stores.  */
-enum vect_memory_access_type {
-  /* An access to an invariant address.  This is used only for loads.  */
-  VMAT_INVARIANT,
-
-  /* A simple contiguous access.  */
-  VMAT_CONTIGUOUS,
-
-  /* A contiguous access that goes down in memory rather than up,
-     with no additional permutation.  This is used only for stores
-     of invariants.  */
-  VMAT_CONTIGUOUS_DOWN,
-
-  /* A simple contiguous access in which the elements need to be permuted
-     after loading or before storing.  Only used for loop vectorization;
-     SLP uses separate permutes.  */
-  VMAT_CONTIGUOUS_PERMUTE,
-
-  /* A simple contiguous access in which the elements need to be reversed
-     after loading or before storing.  */
-  VMAT_CONTIGUOUS_REVERSE,
-
-  /* An access that uses IFN_LOAD_LANES or IFN_STORE_LANES.  */
-  VMAT_LOAD_STORE_LANES,
-
-  /* An access in which each scalar element is loaded or stored
-     individually.  */
-  VMAT_ELEMENTWISE,
-
-  /* A hybrid of VMAT_CONTIGUOUS and VMAT_ELEMENTWISE, used for grouped
-     SLP accesses.  Each unrolled iteration uses a contiguous load
-     or store for the whole group, but the groups from separate iterations
-     are combined in the same way as for VMAT_ELEMENTWISE.  */
-  VMAT_STRIDED_SLP,
-
-  /* The access uses gather loads or scatter stores.  */
-  VMAT_GATHER_SCATTER
-};
-
 class dr_vec_info {
 public:
   /* The data reference itself.  */
@@ -2287,6 +2292,23 @@ record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
 			   STMT_VINFO_VECTYPE (stmt_info), misalign, where);
 }
 
+/* Overload of record_stmt_cost with VECTYPE derived from STMT_INFO and
+   SLP node specified.  */
+
+inline unsigned
+record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
+		  enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
+		  slp_tree node,
+		  int misalign, enum vect_cost_model_location where)
+{
+  if (node)
+    return record_stmt_cost (body_cost_vec, count, kind, node,
+			     STMT_VINFO_VECTYPE (stmt_info), misalign, where);
+  else
+    return record_stmt_cost (body_cost_vec, count, kind, stmt_info,
+			     STMT_VINFO_VECTYPE (stmt_info), misalign, where);
+}
+
 extern void vect_finish_replace_stmt (vec_info *, stmt_vec_info, gimple *);
 extern void vect_finish_stmt_generation (vec_info *, stmt_vec_info, gimple *,
 					 gimple_stmt_iterator *);
-- 
2.35.3

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH] Add SLP_TREE_MEMORY_ACCESS_TYPE
  2024-06-06 14:01 [PATCH] Add SLP_TREE_MEMORY_ACCESS_TYPE Richard Biener
@ 2024-06-11  7:32 ` Richard Sandiford
  0 siblings, 0 replies; 2+ messages in thread
From: Richard Sandiford @ 2024-06-11  7:32 UTC (permalink / raw)
  To: Richard Biener; +Cc: gcc-patches

Richard Biener <rguenther@suse.de> writes:
> It turns out target costing code looks at STMT_VINFO_MEMORY_ACCESS_TYPE
> to identify operations from (emulated) gathers for example.  This
> doesn't work for SLP loads since we do not set STMT_VINFO_MEMORY_ACCESS_TYPE
> there as the vectorization strathegy might differ between different
> stmt uses.  It seems we got away with setting it for stores though.
> The following adds a memory_access_type field to slp_tree and sets it
> from load and store vectorization code.  All the costing doesn't record
> the SLP node (that was only done selectively for some corner case).  The
> costing is really in need of a big overhaul, the following just massages
> the two relevant ops to fix gcc.dg/target/pr88531-2[bc].c FAILs when
> switching on SLP for non-grouped stores.  In particular currently
> we either have a SLP node or a stmt_info in the cost hook but not both.

Yeah, agree that costing needs an overhaul, and that it only makes sense
to do that once the final shape of the SLP-only form is clearer.

> So the following is a hack(?).  Other targets look possibly affected as
> well.  I do want to postpone rewriting all of the costing to after
> all-SLP.

Yeah, AArch64 uses STMT_VINFO_MEMORY_ACCESS_TYPE.  I think there it'd
be better to wait for a bit (for the reasons above), rather than
propagate the intermediate stage through the current code.  If this
ends up being the state for GCC 15, we can adjust the costs later
in stage 1.

> Any comments?

LGTM FWIW.

Thanks,
Richard

>
> 	* tree-vectorizer.h (_slp_tree::memory_access_type): Add.
> 	(SLP_TREE_MEMORY_ACCESS_TYPE): New.
> 	(record_stmt_cost): Add another overload.
> 	* tree-vect-slp.cc (_slp_tree::_slp_tree): Initialize
> 	memory_access_type.
> 	* tree-vect-stmts.cc (vectorizable_store): Set
> 	SLP_TREE_MEMORY_ACCESS_TYPE.
> 	(vectorizable_load): Likewise.  Also record the SLP node
> 	when costing emulated gather offset decompose and vector
> 	composition.
> 	* config/i386/i386.cc (ix86_vector_costs::add_stmt_cost): Also
> 	recognize SLP emulated gather/scatter.
> ---
>  gcc/config/i386/i386.cc |  22 ++++++---
>  gcc/tree-vect-slp.cc    |   1 +
>  gcc/tree-vect-stmts.cc  |  16 +++++--
>  gcc/tree-vectorizer.h   | 102 ++++++++++++++++++++++++----------------
>  4 files changed, 91 insertions(+), 50 deletions(-)
>
> diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> index 4126ab24a79..32ecf31d8d1 100644
> --- a/gcc/config/i386/i386.cc
> +++ b/gcc/config/i386/i386.cc
> @@ -25150,13 +25150,21 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
>       (AGU and load ports).  Try to account for this by scaling the
>       construction cost by the number of elements involved.  */
>    if ((kind == vec_construct || kind == vec_to_scalar)
> -      && stmt_info
> -      && (STMT_VINFO_TYPE (stmt_info) == load_vec_info_type
> -	  || STMT_VINFO_TYPE (stmt_info) == store_vec_info_type)
> -      && ((STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_ELEMENTWISE
> -	   && (TREE_CODE (DR_STEP (STMT_VINFO_DATA_REF (stmt_info)))
> -	       != INTEGER_CST))
> -	  || STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_GATHER_SCATTER))
> +      && ((stmt_info
> +	   && (STMT_VINFO_TYPE (stmt_info) == load_vec_info_type
> +	       || STMT_VINFO_TYPE (stmt_info) == store_vec_info_type)
> +	   && ((STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_ELEMENTWISE
> +		&& (TREE_CODE (DR_STEP (STMT_VINFO_DATA_REF (stmt_info)))
> +		    != INTEGER_CST))
> +	       || (STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info)
> +		   == VMAT_GATHER_SCATTER)))
> +	  || (node
> +	      && ((SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_ELEMENTWISE
> +		  && (TREE_CODE (DR_STEP (STMT_VINFO_DATA_REF
> +					    (SLP_TREE_REPRESENTATIVE (node))))
> +		      != INTEGER_CST))
> +		  || (SLP_TREE_MEMORY_ACCESS_TYPE (node)
> +		      == VMAT_GATHER_SCATTER)))))
>      {
>        stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
>        stmt_cost *= (TYPE_VECTOR_SUBPARTS (vectype) + 1);
> diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
> index e1e47b786c2..c359e8a0bbc 100644
> --- a/gcc/tree-vect-slp.cc
> +++ b/gcc/tree-vect-slp.cc
> @@ -122,6 +122,7 @@ _slp_tree::_slp_tree ()
>    SLP_TREE_CODE (this) = ERROR_MARK;
>    SLP_TREE_VECTYPE (this) = NULL_TREE;
>    SLP_TREE_REPRESENTATIVE (this) = NULL;
> +  SLP_TREE_MEMORY_ACCESS_TYPE (this) = VMAT_INVARIANT;
>    SLP_TREE_REF_COUNT (this) = 1;
>    this->failed = NULL;
>    this->max_nunits = 1;
> diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
> index bd7dd149d11..8049c458136 100644
> --- a/gcc/tree-vect-stmts.cc
> +++ b/gcc/tree-vect-stmts.cc
> @@ -8316,6 +8316,8 @@ vectorizable_store (vec_info *vinfo,
>    if (costing_p) /* transformation not required.  */
>      {
>        STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
> +      if (slp_node)
> +	SLP_TREE_MEMORY_ACCESS_TYPE (slp_node) = memory_access_type;
>  
>        if (loop_vinfo
>  	  && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
> @@ -8356,7 +8358,10 @@ vectorizable_store (vec_info *vinfo,
>  	  && first_stmt_info != stmt_info)
>  	return true;
>      }
> -  gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
> +  if (slp_node)
> +    gcc_assert (memory_access_type == SLP_TREE_MEMORY_ACCESS_TYPE (stmt_info));
> +  else
> +    gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
>  
>    /* Transform.  */
>  
> @@ -10144,6 +10149,8 @@ vectorizable_load (vec_info *vinfo,
>  
>        if (!slp)
>  	STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
> +      else
> +	SLP_TREE_MEMORY_ACCESS_TYPE (slp_node) = memory_access_type;
>  
>        if (loop_vinfo
>  	  && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
> @@ -10168,6 +10175,9 @@ vectorizable_load (vec_info *vinfo,
>    if (!slp)
>      gcc_assert (memory_access_type
>  		== STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
> +  else
> +    gcc_assert (memory_access_type
> +		== SLP_TREE_MEMORY_ACCESS_TYPE (slp_node));
>  
>    if (dump_enabled_p () && !costing_p)
>      dump_printf_loc (MSG_NOTE, vect_location,
> @@ -11235,7 +11245,7 @@ vectorizable_load (vec_info *vinfo,
>  			 offset add is consumed by the load).  */
>  		      inside_cost = record_stmt_cost (cost_vec, const_nunits,
>  						      vec_to_scalar, stmt_info,
> -						      0, vect_body);
> +						      slp_node, 0, vect_body);
>  		      /* N scalar loads plus gathering them into a
>  			 vector.  */
>  		      inside_cost
> @@ -11243,7 +11253,7 @@ vectorizable_load (vec_info *vinfo,
>  					    stmt_info, 0, vect_body);
>  		      inside_cost
>  			= record_stmt_cost (cost_vec, 1, vec_construct,
> -					    stmt_info, 0, vect_body);
> +					    stmt_info, slp_node, 0, vect_body);
>  		      continue;
>  		    }
>  		  unsigned HOST_WIDE_INT const_offset_nunits
> diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> index 97ec9c341e7..121ce7ad3cf 100644
> --- a/gcc/tree-vectorizer.h
> +++ b/gcc/tree-vectorizer.h
> @@ -161,6 +161,46 @@ struct vect_scalar_ops_slice_hash : typed_noop_remove<vect_scalar_ops_slice>
>    static bool equal (const value_type &, const compare_type &);
>  };
>  
> +/* Describes how we're going to vectorize an individual load or store,
> +   or a group of loads or stores.  */
> +enum vect_memory_access_type {
> +  /* An access to an invariant address.  This is used only for loads.  */
> +  VMAT_INVARIANT,
> +
> +  /* A simple contiguous access.  */
> +  VMAT_CONTIGUOUS,
> +
> +  /* A contiguous access that goes down in memory rather than up,
> +     with no additional permutation.  This is used only for stores
> +     of invariants.  */
> +  VMAT_CONTIGUOUS_DOWN,
> +
> +  /* A simple contiguous access in which the elements need to be permuted
> +     after loading or before storing.  Only used for loop vectorization;
> +     SLP uses separate permutes.  */
> +  VMAT_CONTIGUOUS_PERMUTE,
> +
> +  /* A simple contiguous access in which the elements need to be reversed
> +     after loading or before storing.  */
> +  VMAT_CONTIGUOUS_REVERSE,
> +
> +  /* An access that uses IFN_LOAD_LANES or IFN_STORE_LANES.  */
> +  VMAT_LOAD_STORE_LANES,
> +
> +  /* An access in which each scalar element is loaded or stored
> +     individually.  */
> +  VMAT_ELEMENTWISE,
> +
> +  /* A hybrid of VMAT_CONTIGUOUS and VMAT_ELEMENTWISE, used for grouped
> +     SLP accesses.  Each unrolled iteration uses a contiguous load
> +     or store for the whole group, but the groups from separate iterations
> +     are combined in the same way as for VMAT_ELEMENTWISE.  */
> +  VMAT_STRIDED_SLP,
> +
> +  /* The access uses gather loads or scatter stores.  */
> +  VMAT_GATHER_SCATTER
> +};
> +
>  /************************************************************************
>    SLP
>   ************************************************************************/
> @@ -225,6 +265,10 @@ struct _slp_tree {
>  
>    int vertex;
>  
> +  /* Classifies how the load or store is going to be implemented
> +     for loop vectorization.  */
> +  vect_memory_access_type memory_access_type;
> +
>    /* If not NULL this is a cached failed SLP discovery attempt with
>       the lanes that failed during SLP discovery as 'false'.  This is
>       a copy of the matches array.  */
> @@ -312,6 +356,7 @@ public:
>  #define SLP_TREE_REPRESENTATIVE(S)		 (S)->representative
>  #define SLP_TREE_LANES(S)			 (S)->lanes
>  #define SLP_TREE_CODE(S)			 (S)->code
> +#define SLP_TREE_MEMORY_ACCESS_TYPE(S)		 (S)->memory_access_type
>  
>  enum vect_partial_vector_style {
>      vect_partial_vectors_none,
> @@ -1193,46 +1238,6 @@ enum vec_load_store_type {
>    VLS_STORE_INVARIANT
>  };
>  
> -/* Describes how we're going to vectorize an individual load or store,
> -   or a group of loads or stores.  */
> -enum vect_memory_access_type {
> -  /* An access to an invariant address.  This is used only for loads.  */
> -  VMAT_INVARIANT,
> -
> -  /* A simple contiguous access.  */
> -  VMAT_CONTIGUOUS,
> -
> -  /* A contiguous access that goes down in memory rather than up,
> -     with no additional permutation.  This is used only for stores
> -     of invariants.  */
> -  VMAT_CONTIGUOUS_DOWN,
> -
> -  /* A simple contiguous access in which the elements need to be permuted
> -     after loading or before storing.  Only used for loop vectorization;
> -     SLP uses separate permutes.  */
> -  VMAT_CONTIGUOUS_PERMUTE,
> -
> -  /* A simple contiguous access in which the elements need to be reversed
> -     after loading or before storing.  */
> -  VMAT_CONTIGUOUS_REVERSE,
> -
> -  /* An access that uses IFN_LOAD_LANES or IFN_STORE_LANES.  */
> -  VMAT_LOAD_STORE_LANES,
> -
> -  /* An access in which each scalar element is loaded or stored
> -     individually.  */
> -  VMAT_ELEMENTWISE,
> -
> -  /* A hybrid of VMAT_CONTIGUOUS and VMAT_ELEMENTWISE, used for grouped
> -     SLP accesses.  Each unrolled iteration uses a contiguous load
> -     or store for the whole group, but the groups from separate iterations
> -     are combined in the same way as for VMAT_ELEMENTWISE.  */
> -  VMAT_STRIDED_SLP,
> -
> -  /* The access uses gather loads or scatter stores.  */
> -  VMAT_GATHER_SCATTER
> -};
> -
>  class dr_vec_info {
>  public:
>    /* The data reference itself.  */
> @@ -2287,6 +2292,23 @@ record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
>  			   STMT_VINFO_VECTYPE (stmt_info), misalign, where);
>  }
>  
> +/* Overload of record_stmt_cost with VECTYPE derived from STMT_INFO and
> +   SLP node specified.  */
> +
> +inline unsigned
> +record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
> +		  enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
> +		  slp_tree node,
> +		  int misalign, enum vect_cost_model_location where)
> +{
> +  if (node)
> +    return record_stmt_cost (body_cost_vec, count, kind, node,
> +			     STMT_VINFO_VECTYPE (stmt_info), misalign, where);
> +  else
> +    return record_stmt_cost (body_cost_vec, count, kind, stmt_info,
> +			     STMT_VINFO_VECTYPE (stmt_info), misalign, where);
> +}
> +
>  extern void vect_finish_replace_stmt (vec_info *, stmt_vec_info, gimple *);
>  extern void vect_finish_stmt_generation (vec_info *, stmt_vec_info, gimple *,
>  					 gimple_stmt_iterator *);

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2024-06-11  7:33 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-06-06 14:01 [PATCH] Add SLP_TREE_MEMORY_ACCESS_TYPE Richard Biener
2024-06-11  7:32 ` Richard Sandiford

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).