public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r14-4745] tree-optimization/111131 - SLP for non-IFN gathers
@ 2023-10-19 12:28 Richard Biener
  0 siblings, 0 replies; only message in thread
From: Richard Biener @ 2023-10-19 12:28 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:beab5b95c581452adeb26efd59ae84a61fb3b429

commit r14-4745-gbeab5b95c581452adeb26efd59ae84a61fb3b429
Author: Richard Biener <rguenther@suse.de>
Date:   Thu Oct 19 10:33:01 2023 +0200

    tree-optimization/111131 - SLP for non-IFN gathers
    
    The following implements SLP vectorization support for gathers
    without relying on IFNs being pattern detected (and supported by
    the target).  That includes support for emulated gathers but also
    the legacy x86 builtin path.
    
            PR tree-optimization/111131
            * tree-vect-loop.cc (update_epilogue_loop_vinfo): Make
            sure to update all gather/scatter stmt DRs, not only those
            that eventually got VMAT_GATHER_SCATTER set.
            * tree-vect-slp.cc (_slp_oprnd_info::first_gs_info): Add.
            (vect_get_and_check_slp_defs): Handle gathers/scatters,
            adding the offset as SLP operand and comparing base and scale.
            (vect_build_slp_tree_1): Handle gathers.
            (vect_build_slp_tree_2): Likewise.
    
            * gcc.dg/vect/vect-gather-1.c: Now expected to vectorize
            everywhere.
            * gcc.dg/vect/vect-gather-2.c: Expected to not SLP anywhere.
            Massage the scale case to more reliably produce a different
            one.  Scan for the specific messages.
            * gcc.dg/vect/vect-gather-3.c: Masked gather is also supported
            for AVX2, but not emulated.
            * gcc.dg/vect/vect-gather-4.c: Expected to not SLP anywhere.
            Massage to more properly ensure this.
            * gcc.dg/vect/tsvc/vect-tsvc-s353.c: Expect to vectorize
            everywhere.

Diff:
---
 gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s353.c |  2 +-
 gcc/testsuite/gcc.dg/vect/vect-gather-1.c       |  2 +-
 gcc/testsuite/gcc.dg/vect/vect-gather-2.c       | 13 ++++---
 gcc/testsuite/gcc.dg/vect/vect-gather-3.c       |  2 +-
 gcc/testsuite/gcc.dg/vect/vect-gather-4.c       |  6 ++--
 gcc/tree-vect-loop.cc                           |  6 ++--
 gcc/tree-vect-slp.cc                            | 45 +++++++++++++++++++++++--
 7 files changed, 61 insertions(+), 15 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s353.c b/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s353.c
index 98ba75224710..2c4fa3f59915 100644
--- a/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s353.c
+++ b/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s353.c
@@ -44,4 +44,4 @@ int main (int argc, char **argv)
   return 0;
 }
 
-/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { xfail { ! riscv_v } } } } */
+/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-gather-1.c b/gcc/testsuite/gcc.dg/vect/vect-gather-1.c
index e3bbf5c0bf8d..5f6640d9ab65 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-gather-1.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-gather-1.c
@@ -58,4 +58,4 @@ main (void)
   return 0;
 }
 
-/* { dg-final { scan-tree-dump "Loop contains only SLP stmts" vect { target vect_gather_load_ifn } } } */
+/* { dg-final { scan-tree-dump "Loop contains only SLP stmts" vect } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-gather-2.c b/gcc/testsuite/gcc.dg/vect/vect-gather-2.c
index a1f6ba458a97..4c23b8083332 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-gather-2.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-gather-2.c
@@ -8,6 +8,7 @@ f1 (int *restrict y, int *restrict x1, int *restrict x2,
 {
   for (int i = 0; i < N; ++i)
     {
+      /* Different base.  */
       y[i * 2] = x1[indices[i * 2]] + 1;
       y[i * 2 + 1] = x2[indices[i * 2 + 1]] + 2;
     }
@@ -18,8 +19,9 @@ f2 (int *restrict y, int *restrict x, int *restrict indices)
 {
   for (int i = 0; i < N; ++i)
     {
-      y[i * 2] = x[indices[i * 2]] + 1;
-      y[i * 2 + 1] = x[indices[i * 2 + 1] * 2] + 2;
+      /* Different scale.  */
+      y[i * 2] = *(int *)((char *)x + (__UINTPTR_TYPE__)indices[i * 2] * 4) + 1;
+      y[i * 2 + 1] = *(int *)((char *)x + (__UINTPTR_TYPE__)indices[i * 2 + 1] * 2) + 2;
     }
 }
 
@@ -28,9 +30,12 @@ f3 (int *restrict y, int *restrict x, int *restrict indices)
 {
   for (int i = 0; i < N; ++i)
     {
+      /* Different type.  */
       y[i * 2] = x[indices[i * 2]] + 1;
-      y[i * 2 + 1] = x[(unsigned int) indices[i * 2 + 1]] + 2;
+      y[i * 2 + 1] = x[((unsigned int *) indices)[i * 2 + 1]] + 2;
     }
 }
 
-/* { dg-final { scan-tree-dump-not "Loop contains only SLP stmts" vect { target vect_gather_load_ifn } } } */
+/* { dg-final { scan-tree-dump-not "Loop contains only SLP stmts" vect } } */
+/* { dg-final { scan-tree-dump "different gather base" vect { target { ! vect_gather_load_ifn } } } } */
+/* { dg-final { scan-tree-dump "different gather scale" vect { target { ! vect_gather_load_ifn } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-gather-3.c b/gcc/testsuite/gcc.dg/vect/vect-gather-3.c
index adfef3bf407f..30ba6789e038 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-gather-3.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-gather-3.c
@@ -62,4 +62,4 @@ main (void)
   return 0;
 }
 
-/* { dg-final { scan-tree-dump "Loop contains only SLP stmts" vect { target { vect_gather_load_ifn && vect_masked_load } } } } */
+/* { dg-final { scan-tree-dump "Loop contains only SLP stmts" vect { target { { vect_gather_load_ifn || avx2 } && vect_masked_load } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-gather-4.c b/gcc/testsuite/gcc.dg/vect/vect-gather-4.c
index ee2e4e4999a8..1ce63e691999 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-gather-4.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-gather-4.c
@@ -39,10 +39,10 @@ f3 (int *restrict y, int *restrict x, int *restrict indices)
       y[i * 2] = (indices[i * 2] < N * 2
 		  ? x[indices[i * 2]] + 1
 		  : 1);
-      y[i * 2 + 1] = (indices[i * 2 + 1] < N * 2
-		      ? x[(unsigned int) indices[i * 2 + 1]] + 2
+      y[i * 2 + 1] = (((unsigned int *)indices)[i * 2 + 1] < N * 2
+		      ? x[((unsigned int *) indices)[i * 2 + 1]] + 2
 		      : 2);
     }
 }
 
-/* { dg-final { scan-tree-dump-not "Loop contains only SLP stmts" vect { target vect_gather_load_ifn } } } */
+/* { dg-final { scan-tree-dump-not "Loop contains only SLP stmts" vect } } */
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index ebab1953b9c0..8877ebde2466 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -11362,8 +11362,7 @@ update_epilogue_loop_vinfo (class loop *epilogue, tree advance)
 	 updated offset we set using ADVANCE.  Instead we have to make sure the
 	 reference in the data references point to the corresponding copy of
 	 the original in the epilogue.  */
-      if (STMT_VINFO_MEMORY_ACCESS_TYPE (vect_stmt_to_vectorize (stmt_vinfo))
-	  == VMAT_GATHER_SCATTER)
+      if (STMT_VINFO_GATHER_SCATTER_P (vect_stmt_to_vectorize (stmt_vinfo)))
 	{
 	  DR_REF (dr)
 	    = simplify_replace_tree (DR_REF (dr), NULL_TREE, NULL_TREE,
@@ -11372,6 +11371,9 @@ update_epilogue_loop_vinfo (class loop *epilogue, tree advance)
 	    = simplify_replace_tree (DR_BASE_ADDRESS (dr), NULL_TREE, NULL_TREE,
 				     &find_in_mapping, &mapping);
 	}
+      else
+	gcc_assert (STMT_VINFO_MEMORY_ACCESS_TYPE (vect_stmt_to_vectorize (stmt_vinfo))
+		    != VMAT_GATHER_SCATTER);
       DR_STMT (dr) = STMT_VINFO_STMT (stmt_vinfo);
       stmt_vinfo->dr_aux.stmt = stmt_vinfo;
       /* The vector size of the epilogue is smaller than that of the main loop
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index d081999a763c..8efff2e912d7 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -283,10 +283,11 @@ typedef struct _slp_oprnd_info
   vec<tree> ops;
   /* Information about the first statement, its vector def-type, type, the
      operand itself in case it's constant, and an indication if it's a pattern
-     stmt.  */
+     stmt and gather/scatter info.  */
   tree first_op_type;
   enum vect_def_type first_dt;
   bool any_pattern;
+  gather_scatter_info first_gs_info;
 } *slp_oprnd_info;
 
 
@@ -609,6 +610,7 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char swap,
   unsigned int i, number_of_oprnds;
   enum vect_def_type dt = vect_uninitialized_def;
   slp_oprnd_info oprnd_info;
+  gather_scatter_info gs_info;
   unsigned int commutative_op = -1U;
   bool first = stmt_num == 0;
 
@@ -660,6 +662,19 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char swap,
 
       oprnd_info = (*oprnds_info)[i];
 
+      if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+	{
+	  gcc_assert (number_of_oprnds == 1);
+	  if (!is_a <loop_vec_info> (vinfo)
+	      || !vect_check_gather_scatter (stmt_info,
+					     as_a <loop_vec_info> (vinfo),
+					     first ? &oprnd_info->first_gs_info
+					     : &gs_info))
+	    return -1;
+
+	  oprnd = first ? oprnd_info->first_gs_info.offset : gs_info.offset;
+	}
+
       stmt_vec_info def_stmt_info;
       if (!vect_is_simple_use (oprnd, vinfo, &dts[i], &def_stmt_info))
 	{
@@ -792,6 +807,25 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char swap,
 	  return 1;
 	}
 
+      if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+	{
+	  if (!operand_equal_p (oprnd_info->first_gs_info.base,
+				gs_info.base))
+	    {
+	      if (dump_enabled_p ())
+		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+				 "Build SLP failed: different gather base\n");
+	      return 1;
+	    }
+	  if (oprnd_info->first_gs_info.scale != gs_info.scale)
+	    {
+	      if (dump_enabled_p ())
+		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+				 "Build SLP failed: different gather scale\n");
+	      return 1;
+	    }
+	}
+
       /* Not first stmt of the group, check that the def-stmt/s match
 	 the def-stmt/s of the first stmt.  Allow different definition
 	 types for reduction chains: the first stmt must be a
@@ -1235,6 +1269,9 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
 			|| rhs_code == INDIRECT_REF
 			|| rhs_code == COMPONENT_REF
 			|| rhs_code == MEM_REF)))
+	      || (ldst_p
+		  && (STMT_VINFO_GATHER_SCATTER_P (stmt_info)
+		      != STMT_VINFO_GATHER_SCATTER_P (first_stmt_info)))
 	      || first_stmt_ldst_p != ldst_p
 	      || first_stmt_phi_p != phi_p)
 	    {
@@ -1357,12 +1394,12 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
 	  if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info))
 	      && rhs_code != CFN_GATHER_LOAD
 	      && rhs_code != CFN_MASK_GATHER_LOAD
+	      && !STMT_VINFO_GATHER_SCATTER_P (stmt_info)
 	      /* Not grouped loads are handled as externals for BB
 		 vectorization.  For loop vectorization we can handle
 		 splats the same we handle single element interleaving.  */
 	      && (is_a <bb_vec_info> (vinfo)
-		  || stmt_info != first_stmt_info
-		  || STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
+		  || stmt_info != first_stmt_info))
 	    {
 	      /* Not grouped load.  */
 	      if (dump_enabled_p ())
@@ -1858,6 +1895,8 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
 	gcc_assert (gimple_call_internal_p (stmt, IFN_MASK_LOAD)
 		    || gimple_call_internal_p (stmt, IFN_GATHER_LOAD)
 		    || gimple_call_internal_p (stmt, IFN_MASK_GATHER_LOAD));
+      else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+	gcc_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)));
       else
 	{
 	  *max_nunits = this_max_nunits;

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-10-19 12:28 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-10-19 12:28 [gcc r14-4745] tree-optimization/111131 - SLP for non-IFN gathers Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).