public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] VECT: Enhance SLP of MASK_LEN_GATHER_LOAD[PR111721]
@ 2023-10-11 12:27 Juzhe-Zhong
  2023-10-11 12:50 ` Richard Biener
  0 siblings, 1 reply; 14+ messages in thread
From: Juzhe-Zhong @ 2023-10-11 12:27 UTC (permalink / raw)
  To: gcc-patches; +Cc: richard.sandiford, rguenther, Juzhe-Zhong

This patch fixes this following FAILs in RISC-V regression:

FAIL: gcc.dg/vect/vect-gather-1.c -flto -ffat-lto-objects  scan-tree-dump vect "Loop contains only SLP stmts"
FAIL: gcc.dg/vect/vect-gather-1.c scan-tree-dump vect "Loop contains only SLP stmts"
FAIL: gcc.dg/vect/vect-gather-3.c -flto -ffat-lto-objects  scan-tree-dump vect "Loop contains only SLP stmts"
FAIL: gcc.dg/vect/vect-gather-3.c scan-tree-dump vect "Loop contains only SLP stmts"

The root cause of these FAIL is that GCC SLP failed on MASK_LEN_GATHER_LOAD.

Since for RVV, we build MASK_LEN_GATHER_LOAD with dummy mask (-1) in tree-vect-patterns.cc if it is same
situation as GATHER_LOAD (no conditional mask).

So we make MASK_LEN_GATHER_LOAD leverage the flow of GATHER_LOAD if mask argument is a dummy mask.

gcc/ChangeLog:

	* tree-vect-slp.cc (vect_get_operand_map):
	(vect_build_slp_tree_1):
	(vect_build_slp_tree_2):
	* tree-vect-stmts.cc (vectorizable_load):

---
 gcc/tree-vect-slp.cc   | 18 ++++++++++++++++--
 gcc/tree-vect-stmts.cc |  4 ++--
 2 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index fa098f9ff4e..712c04ec278 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -544,6 +544,17 @@ vect_get_operand_map (const gimple *stmt, unsigned char swap = 0)
 	  case IFN_MASK_GATHER_LOAD:
 	    return arg1_arg4_map;
 
+	  case IFN_MASK_LEN_GATHER_LOAD:
+	    /* In tree-vect-patterns.cc, we will have these 2 situations:
+
+		- Unconditional gather load transforms
+		  into MASK_LEN_GATHER_LOAD with dummy mask which is -1.
+
+		- Conditional gather load transforms
+		  into MASK_LEN_GATHER_LOAD with real conditional mask.*/
+	    return integer_minus_onep (gimple_call_arg (call, 4)) ? arg1_map
+								  : nullptr;
+
 	  case IFN_MASK_STORE:
 	    return arg3_arg2_map;
 
@@ -1077,7 +1088,8 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
 
 	  if (cfn == CFN_MASK_LOAD
 	      || cfn == CFN_GATHER_LOAD
-	      || cfn == CFN_MASK_GATHER_LOAD)
+	      || cfn == CFN_MASK_GATHER_LOAD
+	      || cfn == CFN_MASK_LEN_GATHER_LOAD)
 	    ldst_p = true;
 	  else if (cfn == CFN_MASK_STORE)
 	    {
@@ -1337,6 +1349,7 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
 	  if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info))
 	      && rhs_code != CFN_GATHER_LOAD
 	      && rhs_code != CFN_MASK_GATHER_LOAD
+	      && rhs_code != CFN_MASK_LEN_GATHER_LOAD
 	      /* Not grouped loads are handled as externals for BB
 		 vectorization.  For loop vectorization we can handle
 		 splats the same we handle single element interleaving.  */
@@ -1837,7 +1850,8 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
       if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
 	gcc_assert (gimple_call_internal_p (stmt, IFN_MASK_LOAD)
 		    || gimple_call_internal_p (stmt, IFN_GATHER_LOAD)
-		    || gimple_call_internal_p (stmt, IFN_MASK_GATHER_LOAD));
+		    || gimple_call_internal_p (stmt, IFN_MASK_GATHER_LOAD)
+		    || gimple_call_internal_p (stmt, IFN_MASK_LEN_GATHER_LOAD));
       else
 	{
 	  *max_nunits = this_max_nunits;
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index cd7c1090d88..263acf5d3cd 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -9575,9 +9575,9 @@ vectorizable_load (vec_info *vinfo,
 	return false;
 
       mask_index = internal_fn_mask_index (ifn);
-      if (mask_index >= 0 && slp_node)
+      if (mask_index >= 0 && slp_node && internal_fn_len_index (ifn) < 0)
 	mask_index = vect_slp_child_index_for_operand (call, mask_index);
-      if (mask_index >= 0
+      if (mask_index >= 0 && internal_fn_len_index (ifn) < 0
 	  && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
 				      &mask, NULL, &mask_dt, &mask_vectype))
 	return false;
-- 
2.36.3


^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2023-10-13  4:33 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-10-11 12:27 [PATCH] VECT: Enhance SLP of MASK_LEN_GATHER_LOAD[PR111721] Juzhe-Zhong
2023-10-11 12:50 ` Richard Biener
2023-10-11 16:02   ` 钟居哲
2023-10-12  9:44     ` Richard Biener
2023-10-12  9:50       ` juzhe.zhong
2023-10-12  9:55         ` Richard Biener
2023-10-12 10:18           ` juzhe.zhong
2023-10-12 11:12             ` Richard Biener
2023-10-12 10:36           ` juzhe.zhong
2023-10-12 11:13             ` Richard Biener
2023-10-12 10:57           ` juzhe.zhong
2023-10-12 11:14             ` Richard Biener
2023-10-12 11:24               ` juzhe.zhong
2023-10-13  4:33               ` juzhe.zhong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).