public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] Fixup vect_get_and_check_slp_defs for gathers and .MASK_LOAD
@ 2023-10-20  7:37 Richard Biener
  2023-10-20  7:51 ` Lehua Ding
  0 siblings, 1 reply; 5+ messages in thread
From: Richard Biener @ 2023-10-20  7:37 UTC (permalink / raw)
  To: gcc-patches; +Cc: lehua.ding

I went a little bit too simple with implementing SLP gather support
for emulated and builtin based gathers.  The following fixes the
conflict that appears when running into .MASK_LOAD where we rely
on vect_get_operand_map and the bolted-on STMT_VINFO_GATHER_SCATTER_P
checking wrecks that.  The following properly integrates this with
vect_get_operand_map, adding another special index refering to
the vect_check_gather_scatter analyzed offset.

This unbreaks aarch64 (and hopefully riscv), I'll followup with
more fixes and testsuite coverage for x86 where I think I got
masked gather SLP support wrong.

Boostrap and regtest running on x86_64-unknown-linux-gnu.

Richard.

	* tree-vect-slp.cc (off_map, off_op0_map, off_arg2_map,
	off_arg3_arg2_map): New.
	(vect_get_operand_map): Get flag whether the stmt was
	recognized as gather or scatter and use the above
	accordingly.
	(vect_get_and_check_slp_defs): Adjust.
	(vect_build_slp_tree_2): Likewise.
---
 gcc/tree-vect-slp.cc | 57 +++++++++++++++++++++++++++-----------------
 1 file changed, 35 insertions(+), 22 deletions(-)

diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 8efff2e912d..c905ed40a94 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -508,6 +508,10 @@ static const int arg2_map[] = { 1, 2 };
 static const int arg1_arg4_map[] = { 2, 1, 4 };
 static const int arg3_arg2_map[] = { 2, 3, 2 };
 static const int op1_op0_map[] = { 2, 1, 0 };
+static const int off_map[] = { 1, -3 };
+static const int off_op0_map[] = { 2, -3, 0 };
+static const int off_arg2_map[] = { 2, -3, 2 };
+static const int off_arg3_arg2_map[] = { 3, -3, 3, 2 };
 static const int mask_call_maps[6][7] = {
   { 1, 1, },
   { 2, 1, 2, },
@@ -525,11 +529,14 @@ static const int mask_call_maps[6][7] = {
    - for each child node, the index of the argument associated with that node.
      The special index -1 is the first operand of an embedded comparison and
      the special index -2 is the second operand of an embedded comparison.
+     The special indes -3 is the offset of a gather as analyzed by
+     vect_check_gather_scatter.
 
    SWAP is as for vect_get_and_check_slp_defs.  */
 
 static const int *
-vect_get_operand_map (const gimple *stmt, unsigned char swap = 0)
+vect_get_operand_map (const gimple *stmt, bool gather_scatter_p = false,
+		      unsigned char swap = 0)
 {
   if (auto assign = dyn_cast<const gassign *> (stmt))
     {
@@ -539,6 +546,8 @@ vect_get_operand_map (const gimple *stmt, unsigned char swap = 0)
       if (TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) == tcc_comparison
 	  && swap)
 	return op1_op0_map;
+      if (gather_scatter_p)
+	return gimple_vdef (stmt) ? off_op0_map : off_map;
     }
   gcc_assert (!swap);
   if (auto call = dyn_cast<const gcall *> (stmt))
@@ -547,7 +556,7 @@ vect_get_operand_map (const gimple *stmt, unsigned char swap = 0)
 	switch (gimple_call_internal_fn (call))
 	  {
 	  case IFN_MASK_LOAD:
-	    return arg2_map;
+	    return gather_scatter_p ? off_arg2_map : arg2_map;
 
 	  case IFN_GATHER_LOAD:
 	    return arg1_map;
@@ -556,7 +565,7 @@ vect_get_operand_map (const gimple *stmt, unsigned char swap = 0)
 	    return arg1_arg4_map;
 
 	  case IFN_MASK_STORE:
-	    return arg3_arg2_map;
+	    return gather_scatter_p ? off_arg3_arg2_map : arg3_arg2_map;
 
 	  case IFN_MASK_CALL:
 	    {
@@ -611,6 +620,7 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char swap,
   enum vect_def_type dt = vect_uninitialized_def;
   slp_oprnd_info oprnd_info;
   gather_scatter_info gs_info;
+  bool gs_p = false;
   unsigned int commutative_op = -1U;
   bool first = stmt_num == 0;
 
@@ -620,7 +630,9 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char swap,
     return -1;
 
   number_of_oprnds = gimple_num_args (stmt_info->stmt);
-  const int *map = vect_get_operand_map (stmt_info->stmt, swap);
+  const int *map
+    = vect_get_operand_map (stmt_info->stmt,
+			    STMT_VINFO_GATHER_SCATTER_P (stmt_info), swap);
   if (map)
     number_of_oprnds = *map++;
   if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
@@ -642,8 +654,22 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char swap,
   enum vect_def_type *dts = XALLOCAVEC (enum vect_def_type, number_of_oprnds);
   for (i = 0; i < number_of_oprnds; i++)
     {
+      oprnd_info = (*oprnds_info)[i];
       int opno = map ? map[i] : int (i);
-      if (opno < 0)
+      if (opno == -3)
+	{
+	  gcc_assert (STMT_VINFO_GATHER_SCATTER_P (stmt_info));
+	  if (!is_a <loop_vec_info> (vinfo)
+	      || !vect_check_gather_scatter (stmt_info,
+					     as_a <loop_vec_info> (vinfo),
+					     first ? &oprnd_info->first_gs_info
+					     : &gs_info))
+	    return -1;
+
+	  gs_p = true;
+	  oprnd = first ? oprnd_info->first_gs_info.offset : gs_info.offset;
+	}
+      else if (opno < 0)
 	oprnd = TREE_OPERAND (gimple_arg (stmt_info->stmt, 0), -1 - opno);
       else
 	{
@@ -660,21 +686,6 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char swap,
       if (TREE_CODE (oprnd) == VIEW_CONVERT_EXPR)
 	oprnd = TREE_OPERAND (oprnd, 0);
 
-      oprnd_info = (*oprnds_info)[i];
-
-      if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
-	{
-	  gcc_assert (number_of_oprnds == 1);
-	  if (!is_a <loop_vec_info> (vinfo)
-	      || !vect_check_gather_scatter (stmt_info,
-					     as_a <loop_vec_info> (vinfo),
-					     first ? &oprnd_info->first_gs_info
-					     : &gs_info))
-	    return -1;
-
-	  oprnd = first ? oprnd_info->first_gs_info.offset : gs_info.offset;
-	}
-
       stmt_vec_info def_stmt_info;
       if (!vect_is_simple_use (oprnd, vinfo, &dts[i], &def_stmt_info))
 	{
@@ -807,7 +818,7 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char swap,
 	  return 1;
 	}
 
-      if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+      if (gs_p)
 	{
 	  if (!operand_equal_p (oprnd_info->first_gs_info.base,
 				gs_info.base))
@@ -1817,7 +1828,9 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
     return NULL;
 
   nops = gimple_num_args (stmt_info->stmt);
-  if (const int *map = vect_get_operand_map (stmt_info->stmt))
+  if (const int *map = vect_get_operand_map (stmt_info->stmt,
+					     STMT_VINFO_GATHER_SCATTER_P
+					       (stmt_info)))
     nops = map[0];
 
   /* If the SLP node is a PHI (induction or reduction), terminate
-- 
2.35.3

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] Fixup vect_get_and_check_slp_defs for gathers and .MASK_LOAD
  2023-10-20  7:37 [PATCH] Fixup vect_get_and_check_slp_defs for gathers and .MASK_LOAD Richard Biener
@ 2023-10-20  7:51 ` Lehua Ding
  2023-10-20  8:28   ` Richard Biener
  0 siblings, 1 reply; 5+ messages in thread
From: Lehua Ding @ 2023-10-20  7:51 UTC (permalink / raw)
  To: Richard Biener, gcc-patches; +Cc: Robin Dapp

Hi Richard,

I recompile the testcase with the fixup patch and still get the same ICE.

On 2023/10/20 15:37, Richard Biener wrote:
> I went a little bit too simple with implementing SLP gather support
> for emulated and builtin based gathers.  The following fixes the
> conflict that appears when running into .MASK_LOAD where we rely
> on vect_get_operand_map and the bolted-on STMT_VINFO_GATHER_SCATTER_P
> checking wrecks that.  The following properly integrates this with
> vect_get_operand_map, adding another special index refering to
> the vect_check_gather_scatter analyzed offset.
> 
> This unbreaks aarch64 (and hopefully riscv), I'll followup with
> more fixes and testsuite coverage for x86 where I think I got
> masked gather SLP support wrong.
> 
> Boostrap and regtest running on x86_64-unknown-linux-gnu.
> 
> Richard.
> 
> 	* tree-vect-slp.cc (off_map, off_op0_map, off_arg2_map,
> 	off_arg3_arg2_map): New.
> 	(vect_get_operand_map): Get flag whether the stmt was
> 	recognized as gather or scatter and use the above
> 	accordingly.
> 	(vect_get_and_check_slp_defs): Adjust.
> 	(vect_build_slp_tree_2): Likewise.
> ---
>   gcc/tree-vect-slp.cc | 57 +++++++++++++++++++++++++++-----------------
>   1 file changed, 35 insertions(+), 22 deletions(-)
> 
> diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
> index 8efff2e912d..c905ed40a94 100644
> --- a/gcc/tree-vect-slp.cc
> +++ b/gcc/tree-vect-slp.cc
> @@ -508,6 +508,10 @@ static const int arg2_map[] = { 1, 2 };
>   static const int arg1_arg4_map[] = { 2, 1, 4 };
>   static const int arg3_arg2_map[] = { 2, 3, 2 };
>   static const int op1_op0_map[] = { 2, 1, 0 };
> +static const int off_map[] = { 1, -3 };
> +static const int off_op0_map[] = { 2, -3, 0 };
> +static const int off_arg2_map[] = { 2, -3, 2 };
> +static const int off_arg3_arg2_map[] = { 3, -3, 3, 2 };
>   static const int mask_call_maps[6][7] = {
>     { 1, 1, },
>     { 2, 1, 2, },
> @@ -525,11 +529,14 @@ static const int mask_call_maps[6][7] = {
>      - for each child node, the index of the argument associated with that node.
>        The special index -1 is the first operand of an embedded comparison and
>        the special index -2 is the second operand of an embedded comparison.
> +     The special indes -3 is the offset of a gather as analyzed by
> +     vect_check_gather_scatter.
>   
>      SWAP is as for vect_get_and_check_slp_defs.  */
>   
>   static const int *
> -vect_get_operand_map (const gimple *stmt, unsigned char swap = 0)
> +vect_get_operand_map (const gimple *stmt, bool gather_scatter_p = false,
> +		      unsigned char swap = 0)
>   {
>     if (auto assign = dyn_cast<const gassign *> (stmt))
>       {
> @@ -539,6 +546,8 @@ vect_get_operand_map (const gimple *stmt, unsigned char swap = 0)
>         if (TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) == tcc_comparison
>   	  && swap)
>   	return op1_op0_map;
> +      if (gather_scatter_p)
> +	return gimple_vdef (stmt) ? off_op0_map : off_map;
>       }
>     gcc_assert (!swap);
>     if (auto call = dyn_cast<const gcall *> (stmt))
> @@ -547,7 +556,7 @@ vect_get_operand_map (const gimple *stmt, unsigned char swap = 0)
>   	switch (gimple_call_internal_fn (call))
>   	  {
>   	  case IFN_MASK_LOAD:
> -	    return arg2_map;
> +	    return gather_scatter_p ? off_arg2_map : arg2_map;
>   
>   	  case IFN_GATHER_LOAD:
>   	    return arg1_map;
> @@ -556,7 +565,7 @@ vect_get_operand_map (const gimple *stmt, unsigned char swap = 0)
>   	    return arg1_arg4_map;
>   
>   	  case IFN_MASK_STORE:
> -	    return arg3_arg2_map;
> +	    return gather_scatter_p ? off_arg3_arg2_map : arg3_arg2_map;
>   
>   	  case IFN_MASK_CALL:
>   	    {
> @@ -611,6 +620,7 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char swap,
>     enum vect_def_type dt = vect_uninitialized_def;
>     slp_oprnd_info oprnd_info;
>     gather_scatter_info gs_info;
> +  bool gs_p = false;
>     unsigned int commutative_op = -1U;
>     bool first = stmt_num == 0;
>   
> @@ -620,7 +630,9 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char swap,
>       return -1;
>   
>     number_of_oprnds = gimple_num_args (stmt_info->stmt);
> -  const int *map = vect_get_operand_map (stmt_info->stmt, swap);
> +  const int *map
> +    = vect_get_operand_map (stmt_info->stmt,
> +			    STMT_VINFO_GATHER_SCATTER_P (stmt_info), swap);
>     if (map)
>       number_of_oprnds = *map++;
>     if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
> @@ -642,8 +654,22 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char swap,
>     enum vect_def_type *dts = XALLOCAVEC (enum vect_def_type, number_of_oprnds);
>     for (i = 0; i < number_of_oprnds; i++)
>       {
> +      oprnd_info = (*oprnds_info)[i];
>         int opno = map ? map[i] : int (i);
> -      if (opno < 0)
> +      if (opno == -3)
> +	{
> +	  gcc_assert (STMT_VINFO_GATHER_SCATTER_P (stmt_info));
> +	  if (!is_a <loop_vec_info> (vinfo)
> +	      || !vect_check_gather_scatter (stmt_info,
> +					     as_a <loop_vec_info> (vinfo),
> +					     first ? &oprnd_info->first_gs_info
> +					     : &gs_info))
> +	    return -1;
> +
> +	  gs_p = true;
> +	  oprnd = first ? oprnd_info->first_gs_info.offset : gs_info.offset;
> +	}
> +      else if (opno < 0)
>   	oprnd = TREE_OPERAND (gimple_arg (stmt_info->stmt, 0), -1 - opno);
>         else
>   	{
> @@ -660,21 +686,6 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char swap,
>         if (TREE_CODE (oprnd) == VIEW_CONVERT_EXPR)
>   	oprnd = TREE_OPERAND (oprnd, 0);
>   
> -      oprnd_info = (*oprnds_info)[i];
> -
> -      if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
> -	{
> -	  gcc_assert (number_of_oprnds == 1);
> -	  if (!is_a <loop_vec_info> (vinfo)
> -	      || !vect_check_gather_scatter (stmt_info,
> -					     as_a <loop_vec_info> (vinfo),
> -					     first ? &oprnd_info->first_gs_info
> -					     : &gs_info))
> -	    return -1;
> -
> -	  oprnd = first ? oprnd_info->first_gs_info.offset : gs_info.offset;
> -	}
> -
>         stmt_vec_info def_stmt_info;
>         if (!vect_is_simple_use (oprnd, vinfo, &dts[i], &def_stmt_info))
>   	{
> @@ -807,7 +818,7 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char swap,
>   	  return 1;
>   	}
>   
> -      if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
> +      if (gs_p)
>   	{
>   	  if (!operand_equal_p (oprnd_info->first_gs_info.base,
>   				gs_info.base))
> @@ -1817,7 +1828,9 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
>       return NULL;
>   
>     nops = gimple_num_args (stmt_info->stmt);
> -  if (const int *map = vect_get_operand_map (stmt_info->stmt))
> +  if (const int *map = vect_get_operand_map (stmt_info->stmt,
> +					     STMT_VINFO_GATHER_SCATTER_P
> +					       (stmt_info)))
>       nops = map[0];
>   
>     /* If the SLP node is a PHI (induction or reduction), terminate

-- 
Best,
Lehua (RiVAI)
lehua.ding@rivai.ai

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] Fixup vect_get_and_check_slp_defs for gathers and .MASK_LOAD
  2023-10-20  7:51 ` Lehua Ding
@ 2023-10-20  8:28   ` Richard Biener
  2023-10-20  8:37     ` Lehua Ding
  0 siblings, 1 reply; 5+ messages in thread
From: Richard Biener @ 2023-10-20  8:28 UTC (permalink / raw)
  To: Lehua Ding; +Cc: gcc-patches, Robin Dapp

On Fri, 20 Oct 2023, Lehua Ding wrote:

> Hi Richard,
> 
> I recompile the testcase with the fixup patch and still get the same ICE.

The following fixes it.

From 377e911b1b64298def75ba9d9c46fdd22fe4cf84 Mon Sep 17 00:00:00 2001
From: Richard Biener <rguenther@suse.de>
Date: Fri, 20 Oct 2023 10:25:31 +0200
Subject: [PATCH] Rewrite more refs for epilogue vectorization
To: gcc-patches@gcc.gnu.org

The following makes sure to rewrite all gather/scatter detected by
dataref analysis plus stmts classified as VMAT_GATHER_SCATTER.  Maybe
we need to rewrite all refs, the following covers the cases I've
run into now.

	* tree-vect-loop.cc (update_epilogue_loop_vinfo): Rewrite
	both STMT_VINFO_GATHER_SCATTER_P and VMAT_GATHER_SCATTER
	stmt refs.
---
 gcc/tree-vect-loop.cc | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 8877ebde246..4a8b0a18800 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -11361,8 +11361,12 @@ update_epilogue_loop_vinfo (class loop *epilogue, tree advance)
       /* Data references for gather loads and scatter stores do not use the
 	 updated offset we set using ADVANCE.  Instead we have to make sure the
 	 reference in the data references point to the corresponding copy of
-	 the original in the epilogue.  */
-      if (STMT_VINFO_GATHER_SCATTER_P (vect_stmt_to_vectorize (stmt_vinfo)))
+	 the original in the epilogue.  Make sure to update both
+	 gather/scatters recognized by dataref analysis and also other
+	 refs that get_load_store_type classified as VMAT_GATHER_SCATTER.  */
+      auto vstmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
+      if (STMT_VINFO_MEMORY_ACCESS_TYPE (vstmt_vinfo) == VMAT_GATHER_SCATTER
+	  || STMT_VINFO_GATHER_SCATTER_P (vstmt_vinfo))
 	{
 	  DR_REF (dr)
 	    = simplify_replace_tree (DR_REF (dr), NULL_TREE, NULL_TREE,
@@ -11371,9 +11375,6 @@ update_epilogue_loop_vinfo (class loop *epilogue, tree advance)
 	    = simplify_replace_tree (DR_BASE_ADDRESS (dr), NULL_TREE, NULL_TREE,
 				     &find_in_mapping, &mapping);
 	}
-      else
-	gcc_assert (STMT_VINFO_MEMORY_ACCESS_TYPE (vect_stmt_to_vectorize (stmt_vinfo))
-		    != VMAT_GATHER_SCATTER);
       DR_STMT (dr) = STMT_VINFO_STMT (stmt_vinfo);
       stmt_vinfo->dr_aux.stmt = stmt_vinfo;
       /* The vector size of the epilogue is smaller than that of the main loop
-- 
2.35.3


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] Fixup vect_get_and_check_slp_defs for gathers and .MASK_LOAD
  2023-10-20  8:28   ` Richard Biener
@ 2023-10-20  8:37     ` Lehua Ding
  0 siblings, 0 replies; 5+ messages in thread
From: Lehua Ding @ 2023-10-20  8:37 UTC (permalink / raw)
  To: Richard Biener; +Cc: gcc-patches, Robin Dapp

Hi Richard,

On 2023/10/20 16:28, Richard Biener wrote:
> On Fri, 20 Oct 2023, Lehua Ding wrote:
> 
>> Hi Richard,
>>
>> I recompile the testcase with the fixup patch and still get the same ICE.
> 
> The following fixes it.

Using this patch did fix it, thank you very much.

> 
>  From 377e911b1b64298def75ba9d9c46fdd22fe4cf84 Mon Sep 17 00:00:00 2001
> From: Richard Biener <rguenther@suse.de>
> Date: Fri, 20 Oct 2023 10:25:31 +0200
> Subject: [PATCH] Rewrite more refs for epilogue vectorization
> To: gcc-patches@gcc.gnu.org
> 
> The following makes sure to rewrite all gather/scatter detected by
> dataref analysis plus stmts classified as VMAT_GATHER_SCATTER.  Maybe
> we need to rewrite all refs, the following covers the cases I've
> run into now.
> 
> 	* tree-vect-loop.cc (update_epilogue_loop_vinfo): Rewrite
> 	both STMT_VINFO_GATHER_SCATTER_P and VMAT_GATHER_SCATTER
> 	stmt refs.
> ---
>   gcc/tree-vect-loop.cc | 11 ++++++-----
>   1 file changed, 6 insertions(+), 5 deletions(-)
> 
> diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> index 8877ebde246..4a8b0a18800 100644
> --- a/gcc/tree-vect-loop.cc
> +++ b/gcc/tree-vect-loop.cc
> @@ -11361,8 +11361,12 @@ update_epilogue_loop_vinfo (class loop *epilogue, tree advance)
>         /* Data references for gather loads and scatter stores do not use the
>   	 updated offset we set using ADVANCE.  Instead we have to make sure the
>   	 reference in the data references point to the corresponding copy of
> -	 the original in the epilogue.  */
> -      if (STMT_VINFO_GATHER_SCATTER_P (vect_stmt_to_vectorize (stmt_vinfo)))
> +	 the original in the epilogue.  Make sure to update both
> +	 gather/scatters recognized by dataref analysis and also other
> +	 refs that get_load_store_type classified as VMAT_GATHER_SCATTER.  */
> +      auto vstmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
> +      if (STMT_VINFO_MEMORY_ACCESS_TYPE (vstmt_vinfo) == VMAT_GATHER_SCATTER
> +	  || STMT_VINFO_GATHER_SCATTER_P (vstmt_vinfo))
>   	{
>   	  DR_REF (dr)
>   	    = simplify_replace_tree (DR_REF (dr), NULL_TREE, NULL_TREE,
> @@ -11371,9 +11375,6 @@ update_epilogue_loop_vinfo (class loop *epilogue, tree advance)
>   	    = simplify_replace_tree (DR_BASE_ADDRESS (dr), NULL_TREE, NULL_TREE,
>   				     &find_in_mapping, &mapping);
>   	}
> -      else
> -	gcc_assert (STMT_VINFO_MEMORY_ACCESS_TYPE (vect_stmt_to_vectorize (stmt_vinfo))
> -		    != VMAT_GATHER_SCATTER);
>         DR_STMT (dr) = STMT_VINFO_STMT (stmt_vinfo);
>         stmt_vinfo->dr_aux.stmt = stmt_vinfo;
>         /* The vector size of the epilogue is smaller than that of the main loop

-- 
Best,
Lehua (RiVAI)
lehua.ding@rivai.ai


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] Fixup vect_get_and_check_slp_defs for gathers and .MASK_LOAD
@ 2023-10-20  9:09 Richard Biener
  0 siblings, 0 replies; 5+ messages in thread
From: Richard Biener @ 2023-10-20  9:09 UTC (permalink / raw)
  To: gcc-patches

On Fri, 20 Oct 2023, Richard Biener wrote:

> I went a little bit too simple with implementing SLP gather support
> for emulated and builtin based gathers.  The following fixes the
> conflict that appears when running into .MASK_LOAD where we rely
> on vect_get_operand_map and the bolted-on STMT_VINFO_GATHER_SCATTER_P
> checking wrecks that.  The following properly integrates this with
> vect_get_operand_map, adding another special index refering to
> the vect_check_gather_scatter analyzed offset.
> 
> This unbreaks aarch64 (and hopefully riscv), I'll followup with
> more fixes and testsuite coverage for x86 where I think I got
> masked gather SLP support wrong.
> 
> Boostrap and regtest running on x86_64-unknown-linux-gnu.

The following is what I have installed.

Bootstrapped and tested on x86_64-unknown-linux-gnu.

Richard.

From a3feee109136696c5593d9bc9efd0d145f89a81d Mon Sep 17 00:00:00 2001
From: Richard Biener <rguenther@suse.de>
Date: Fri, 20 Oct 2023 09:30:45 +0200
Subject: [PATCH] Fixup vect_get_and_check_slp_defs for gathers and .MASK_LOAD
To: gcc-patches@gcc.gnu.org

I went a little bit too simple with implementing SLP gather support
for emulated and builtin based gathers.  The following fixes the
conflict that appears when running into .MASK_LOAD where we rely
on vect_get_operand_map and the bolted-on STMT_VINFO_GATHER_SCATTER_P
checking wrecks that.  The following properly integrates this with
vect_get_operand_map, adding another special index refering to
the vect_check_gather_scatter analyzed offset.

This unbreaks aarch64 (and hopefully riscv), I'll followup with
more fixes and testsuite coverage for x86 where I think I got
masked gather SLP support wrong.

	* tree-vect-slp.cc (off_map, off_op0_map, off_arg2_map,
	off_arg3_arg2_map): New.
	(vect_get_operand_map): Get flag whether the stmt was
	recognized as gather or scatter and use the above
	accordingly.
	(vect_get_and_check_slp_defs): Adjust.
	(vect_build_slp_tree_2): Likewise.
---
 gcc/tree-vect-slp.cc | 74 +++++++++++++++++++++++++++++++-------------
 1 file changed, 52 insertions(+), 22 deletions(-)

diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 8efff2e912d..24bf6582f8d 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -287,6 +287,7 @@ typedef struct _slp_oprnd_info
   tree first_op_type;
   enum vect_def_type first_dt;
   bool any_pattern;
+  bool first_gs_p;
   gather_scatter_info first_gs_info;
 } *slp_oprnd_info;
 
@@ -309,6 +310,7 @@ vect_create_oprnd_info (int nops, int group_size)
       oprnd_info->first_dt = vect_uninitialized_def;
       oprnd_info->first_op_type = NULL_TREE;
       oprnd_info->any_pattern = false;
+      oprnd_info->first_gs_p = false;
       oprnds_info.quick_push (oprnd_info);
     }
 
@@ -508,6 +510,10 @@ static const int arg2_map[] = { 1, 2 };
 static const int arg1_arg4_map[] = { 2, 1, 4 };
 static const int arg3_arg2_map[] = { 2, 3, 2 };
 static const int op1_op0_map[] = { 2, 1, 0 };
+static const int off_map[] = { 1, -3 };
+static const int off_op0_map[] = { 2, -3, 0 };
+static const int off_arg2_map[] = { 2, -3, 2 };
+static const int off_arg3_arg2_map[] = { 3, -3, 3, 2 };
 static const int mask_call_maps[6][7] = {
   { 1, 1, },
   { 2, 1, 2, },
@@ -525,11 +531,14 @@ static const int mask_call_maps[6][7] = {
    - for each child node, the index of the argument associated with that node.
      The special index -1 is the first operand of an embedded comparison and
      the special index -2 is the second operand of an embedded comparison.
+     The special indes -3 is the offset of a gather as analyzed by
+     vect_check_gather_scatter.
 
    SWAP is as for vect_get_and_check_slp_defs.  */
 
 static const int *
-vect_get_operand_map (const gimple *stmt, unsigned char swap = 0)
+vect_get_operand_map (const gimple *stmt, bool gather_scatter_p = false,
+		      unsigned char swap = 0)
 {
   if (auto assign = dyn_cast<const gassign *> (stmt))
     {
@@ -539,6 +548,8 @@ vect_get_operand_map (const gimple *stmt, unsigned char swap = 0)
       if (TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) == tcc_comparison
 	  && swap)
 	return op1_op0_map;
+      if (gather_scatter_p)
+	return gimple_vdef (stmt) ? off_op0_map : off_map;
     }
   gcc_assert (!swap);
   if (auto call = dyn_cast<const gcall *> (stmt))
@@ -547,7 +558,7 @@ vect_get_operand_map (const gimple *stmt, unsigned char swap = 0)
 	switch (gimple_call_internal_fn (call))
 	  {
 	  case IFN_MASK_LOAD:
-	    return arg2_map;
+	    return gather_scatter_p ? off_arg2_map : arg2_map;
 
 	  case IFN_GATHER_LOAD:
 	    return arg1_map;
@@ -556,7 +567,7 @@ vect_get_operand_map (const gimple *stmt, unsigned char swap = 0)
 	    return arg1_arg4_map;
 
 	  case IFN_MASK_STORE:
-	    return arg3_arg2_map;
+	    return gather_scatter_p ? off_arg3_arg2_map : arg3_arg2_map;
 
 	  case IFN_MASK_CALL:
 	    {
@@ -611,6 +622,7 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char swap,
   enum vect_def_type dt = vect_uninitialized_def;
   slp_oprnd_info oprnd_info;
   gather_scatter_info gs_info;
+  unsigned int gs_op = -1u;
   unsigned int commutative_op = -1U;
   bool first = stmt_num == 0;
 
@@ -620,7 +632,9 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char swap,
     return -1;
 
   number_of_oprnds = gimple_num_args (stmt_info->stmt);
-  const int *map = vect_get_operand_map (stmt_info->stmt, swap);
+  const int *map
+    = vect_get_operand_map (stmt_info->stmt,
+			    STMT_VINFO_GATHER_SCATTER_P (stmt_info), swap);
   if (map)
     number_of_oprnds = *map++;
   if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
@@ -642,8 +656,30 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char swap,
   enum vect_def_type *dts = XALLOCAVEC (enum vect_def_type, number_of_oprnds);
   for (i = 0; i < number_of_oprnds; i++)
     {
+      oprnd_info = (*oprnds_info)[i];
       int opno = map ? map[i] : int (i);
-      if (opno < 0)
+      if (opno == -3)
+	{
+	  gcc_assert (STMT_VINFO_GATHER_SCATTER_P (stmt_info));
+	  if (!is_a <loop_vec_info> (vinfo)
+	      || !vect_check_gather_scatter (stmt_info,
+					     as_a <loop_vec_info> (vinfo),
+					     first ? &oprnd_info->first_gs_info
+					     : &gs_info))
+	    return -1;
+
+	  if (first)
+	    {
+	      oprnd_info->first_gs_p = true;
+	      oprnd = oprnd_info->first_gs_info.offset;
+	    }
+	  else
+	    {
+	      gs_op = i;
+	      oprnd = gs_info.offset;
+	    }
+	}
+      else if (opno < 0)
 	oprnd = TREE_OPERAND (gimple_arg (stmt_info->stmt, 0), -1 - opno);
       else
 	{
@@ -660,21 +696,6 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char swap,
       if (TREE_CODE (oprnd) == VIEW_CONVERT_EXPR)
 	oprnd = TREE_OPERAND (oprnd, 0);
 
-      oprnd_info = (*oprnds_info)[i];
-
-      if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
-	{
-	  gcc_assert (number_of_oprnds == 1);
-	  if (!is_a <loop_vec_info> (vinfo)
-	      || !vect_check_gather_scatter (stmt_info,
-					     as_a <loop_vec_info> (vinfo),
-					     first ? &oprnd_info->first_gs_info
-					     : &gs_info))
-	    return -1;
-
-	  oprnd = first ? oprnd_info->first_gs_info.offset : gs_info.offset;
-	}
-
       stmt_vec_info def_stmt_info;
       if (!vect_is_simple_use (oprnd, vinfo, &dts[i], &def_stmt_info))
 	{
@@ -807,7 +828,14 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char swap,
 	  return 1;
 	}
 
-      if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+      if ((gs_op == i) != oprnd_info->first_gs_p)
+	{
+	  if (dump_enabled_p ())
+	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+			     "Build SLP failed: mixed gather and non-gather\n");
+	  return 1;
+	}
+      else if (gs_op == i)
 	{
 	  if (!operand_equal_p (oprnd_info->first_gs_info.base,
 				gs_info.base))
@@ -1817,7 +1845,9 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
     return NULL;
 
   nops = gimple_num_args (stmt_info->stmt);
-  if (const int *map = vect_get_operand_map (stmt_info->stmt))
+  if (const int *map = vect_get_operand_map (stmt_info->stmt,
+					     STMT_VINFO_GATHER_SCATTER_P
+					       (stmt_info)))
     nops = map[0];
 
   /* If the SLP node is a PHI (induction or reduction), terminate
-- 
2.35.3


^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2023-10-20  9:09 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-10-20  7:37 [PATCH] Fixup vect_get_and_check_slp_defs for gathers and .MASK_LOAD Richard Biener
2023-10-20  7:51 ` Lehua Ding
2023-10-20  8:28   ` Richard Biener
2023-10-20  8:37     ` Lehua Ding
2023-10-20  9:09 Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).