public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH V4] SCCVN: Add LEN_MASK_STORE and fix LEN_STORE
@ 2023-06-27  6:47 juzhe.zhong
  2023-06-27  7:33 ` Richard Biener
  0 siblings, 1 reply; 11+ messages in thread
From: juzhe.zhong @ 2023-06-27  6:47 UTC (permalink / raw)
  To: gcc-patches; +Cc: richard.sandiford, rguenther, pan2.li, Ju-Zhe Zhong

From: Ju-Zhe Zhong <juzhe.zhong@rivai.ai>

Hi, Richi.

I tried to understand your last email and to refactor the do-while loop using VECTOR_CST_NELTS.

This patch works fine for LEN_MASK_STORE and compiler can CSE redundant store.
I have appended testcase in this patch to test VN for LEN_MASK_STORE.

I am not sure whether I am on the same page with you.

Feel free to correct me, Thanks.

gcc/ChangeLog:

        * tree-ssa-sccvn.cc (vn_reference_lookup_3): Add LEN_MASK_STORE and fix LEN_STORE

gcc/testsuite/ChangeLog:

        * gcc.target/riscv/rvv/autovec/partial/len_maskstore_vn-1.c: New test.

---
 .../rvv/autovec/partial/len_maskstore_vn-1.c  | 30 +++++++++++++++++++
 gcc/tree-ssa-sccvn.cc                         | 24 +++++++++++----
 2 files changed, 49 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/len_maskstore_vn-1.c

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/len_maskstore_vn-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/len_maskstore_vn-1.c
new file mode 100644
index 00000000000..0b2d03693dc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/len_maskstore_vn-1.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv_zvl256b -mabi=ilp32d --param riscv-autovec-preference=fixed-vlmax -O3 -fdump-tree-fre5" } */
+
+void __attribute__((noinline,noclone))
+foo (int *out, int *res)
+{
+  int mask[] = { 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 };
+  int i;
+  for (i = 0; i < 16; ++i)
+    {
+      if (mask[i])
+        out[i] = i;
+    }
+  int o0 = out[0];
+  int o7 = out[7];
+  int o14 = out[14];
+  int o15 = out[15];
+  res[0] = o0;
+  res[2] = o7;
+  res[4] = o14;
+  res[6] = o15;
+}
+
+/* Vectorization produces .LEN_MASK_STORE, unrolling will unroll the two
+   vector iterations.  FRE5 after that should be able to CSE
+   out[7] and out[15], but leave out[0] and out[14] alone.  */
+/* { dg-final { scan-tree-dump " = o0_\[0-9\]+;" "fre5" } } */
+/* { dg-final { scan-tree-dump " = 7;" "fre5" } } */
+/* { dg-final { scan-tree-dump " = o14_\[0-9\]+;" "fre5" } } */
+/* { dg-final { scan-tree-dump " = 15;" "fre5" } } */
diff --git a/gcc/tree-ssa-sccvn.cc b/gcc/tree-ssa-sccvn.cc
index 11061a374a2..242d82d6274 100644
--- a/gcc/tree-ssa-sccvn.cc
+++ b/gcc/tree-ssa-sccvn.cc
@@ -3304,6 +3304,16 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_,
 	  if (!tree_fits_uhwi_p (len) || !tree_fits_shwi_p (bias))
 	    return (void *)-1;
 	  break;
+	case IFN_LEN_MASK_STORE:
+	  len = gimple_call_arg (call, 2);
+	  bias = gimple_call_arg (call, 5);
+	  if (!tree_fits_uhwi_p (len) || !tree_fits_shwi_p (bias))
+	    return (void *)-1;
+	  mask = gimple_call_arg (call, internal_fn_mask_index (fn));
+	  mask = vn_valueize (mask);
+	  if (TREE_CODE (mask) != VECTOR_CST)
+	    return (void *)-1;
+	  break;
 	default:
 	  return (void *)-1;
 	}
@@ -3344,11 +3354,17 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_,
 	      tree vectype = TREE_TYPE (def_rhs);
 	      unsigned HOST_WIDE_INT elsz
 		= tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype)));
+	      /* Set initial len value is the UINT_MAX, so mask_idx < actual_len
+		 is always true for MASK_STORE.  */
+	      unsigned actual_len = UINT_MAX;
+	      if (len)
+		actual_len = tree_to_uhwi (len) + tree_to_shwi (bias);
+	      unsigned nunits
+		= MIN (actual_len, VECTOR_CST_NELTS (mask).coeffs[0]);
 	      if (mask)
 		{
 		  HOST_WIDE_INT start = 0, length = 0;
-		  unsigned mask_idx = 0;
-		  do
+		  for (unsigned mask_idx = 0; mask_idx < nunits; mask_idx++)
 		    {
 		      if (integer_zerop (VECTOR_CST_ELT (mask, mask_idx)))
 			{
@@ -3371,9 +3387,7 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_,
 			}
 		      else
 			length += elsz;
-		      mask_idx++;
 		    }
-		  while (known_lt (mask_idx, TYPE_VECTOR_SUBPARTS (vectype)));
 		  if (length != 0)
 		    {
 		      pd.rhs_off = start;
@@ -3389,7 +3403,7 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_,
 		{
 		  pd.offset = offset2i;
 		  pd.size = (tree_to_uhwi (len)
-			     + -tree_to_shwi (bias)) * BITS_PER_UNIT;
+			     + tree_to_shwi (bias)) * BITS_PER_UNIT;
 		  if (BYTES_BIG_ENDIAN)
 		    pd.rhs_off = pd.size - tree_to_uhwi (TYPE_SIZE (vectype));
 		  else
-- 
2.36.1


^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2023-06-27  8:59 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-06-27  6:47 [PATCH V4] SCCVN: Add LEN_MASK_STORE and fix LEN_STORE juzhe.zhong
2023-06-27  7:33 ` Richard Biener
2023-06-27  7:41   ` juzhe.zhong
2023-06-27  7:47     ` Richard Biener
2023-06-27  8:01       ` juzhe.zhong
2023-06-27  8:28         ` Richard Biener
2023-06-27  8:09       ` juzhe.zhong
2023-06-27  8:34         ` Richard Biener
2023-06-27  8:47           ` juzhe.zhong
2023-06-27  8:56             ` Richard Biener
2023-06-27  8:59               ` juzhe.zhong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).