public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* PR111648: Fix wrong code-gen due to incorrect VEC_PERM_EXPR folding
@ 2023-10-04 10:16 Prathamesh Kulkarni
  2023-10-09 11:35 ` Richard Sandiford
  0 siblings, 1 reply; 9+ messages in thread
From: Prathamesh Kulkarni @ 2023-10-04 10:16 UTC (permalink / raw)
  To: gcc Patches, Richard Sandiford

[-- Attachment #1: Type: text/plain, Size: 1818 bytes --]

Hi,
The attached patch attempts to fix PR111648.
As mentioned in PR, the issue is when a1 is a multiple of vector
length, we end up creating following encoding in result: { base_elem,
arg[0], arg[1], ... } (assuming S = 1),
where arg is chosen input vector, which is incorrect, since the
encoding originally in arg would be: { arg[0], arg[1], arg[2], ... }

For the test-case mentioned in PR, vectorizer pass creates
VEC_PERM_EXPR<arg0, arg, sel> where:
arg0: { -16, -9, -10, -11 }
arg1: { -12, -5, -6, -7 }
sel = { 3, 4, 5, 6 }

arg0, arg1 and sel are encoded with npatterns = 1 and nelts_per_pattern = 3.
Since a1 = 4 and arg_len = 4, it ended up creating the result with
following encoding:
res = { arg0[3], arg1[0], arg1[1] } // npatterns = 1, nelts_per_pattern = 3
      = { -11, -12, -5 }

So for res[3], it used S = (-5) - (-12) = 7
And hence computed it as -5 + 7 = 2.
instead of selecting arg1[2], ie, -6.

The patch tweaks valid_mask_for_fold_vec_perm_cst_p to punt if a1 is a multiple
of vector length, so a1 ... ae select elements only from stepped part
of the pattern
from input vector and return false for this case.

Since the vectors are VLS, fold_vec_perm_cst then sets:
res_npatterns = res_nelts
res_nelts_per_pattern  = 1
which seems to fix the issue by encoding all the elements.

The patch resulted in Case 4 and Case 5 failing from test_nunits_min_2 because
they used sel = { 0, 0, 1, ... } and {len, 0, 1, ... } respectively,
which used a1 = 0, and thus selected arg1[0].

I removed Case 4 because it was already covered in test_nunits_min_4,
and moved Case 5 to test_nunits_min_4, with sel = { len, 1, 2, ... }
and added a new Case 9 to test for this issue.

Passes bootstrap+test on aarch64-linux-gnu with and without SVE,
and on x86_64-linux-gnu.
Does the patch look OK ?

Thanks,
Prathamesh

[-- Attachment #2: gnu-958-3.txt --]
[-- Type: text/plain, Size: 8013 bytes --]

[PR111648] Fix wrong code-gen due to incorrect VEC_PERM_EXPR folding.

gcc/ChangeLog:
	PR tree-optimization/111648
	* fold-const.cc (valid_mask_for_fold_vec_perm_cst_p): Punt if a1
	is a multiple of vector length.
	(test_nunits_min_2): Remove Case 4 and move Case 5 to ...
	(test_nunits_min_4): ... here and rename case numbers. Also add
	Case 9.

gcc/testsuite/ChangeLog:
	PR tree-optimization/111648
	* gcc.dg/vect/pr111648.c: New test.


diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc
index 4f8561509ff..c5f421d6b76 100644
--- a/gcc/fold-const.cc
+++ b/gcc/fold-const.cc
@@ -10682,8 +10682,8 @@ valid_mask_for_fold_vec_perm_cst_p (tree arg0, tree arg1,
 	  return false;
 	}
 
-      /* Ensure that the stepped sequence always selects from the same
-	 input pattern.  */
+      /* Ensure that the stepped sequence always selects from the stepped
+	 part of same input pattern.  */
       unsigned arg_npatterns
 	= ((q1 & 1) == 0) ? VECTOR_CST_NPATTERNS (arg0)
 			  : VECTOR_CST_NPATTERNS (arg1);
@@ -10694,6 +10694,20 @@ valid_mask_for_fold_vec_perm_cst_p (tree arg0, tree arg1,
 	    *reason = "step is not multiple of npatterns";
 	  return false;
 	}
+
+      /* If a1 is a multiple of len, it will select base element of input
+	 vector resulting in following encoding:
+	 { base_elem, arg[0], arg[1], ... } where arg is the chosen input
+	 vector. This encoding is not originally present in arg, since it's
+	 defined as:
+	 { arg[0], arg[1], arg[2], ... }.  */
+
+      if (multiple_p (a1, arg_len))
+	{
+	  if (reason)
+	    *reason = "selecting base element of input vector";
+	  return false;
+	}
     }
 
   return true;
@@ -17425,47 +17439,6 @@ test_nunits_min_2 (machine_mode vmode)
 	tree expected_res[] = { ARG0(0), ARG1(0), ARG0(1), ARG1(1) };
 	validate_res (2, 2, res, expected_res);
       }
-
-      /* Case 4: mask = {0, 0, 1, ...} // (1, 3)
-	 Test that the stepped sequence of the pattern selects from
-	 same input pattern. Since input vectors have npatterns = 2,
-	 and step (a2 - a1) = 1, step is not a multiple of npatterns
-	 in input vector. So return NULL_TREE.  */
-      {
-	tree arg0 = build_vec_cst_rand (vmode, 2, 3, 1);
-	tree arg1 = build_vec_cst_rand (vmode, 2, 3, 1);
-	poly_uint64 len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
-
-	vec_perm_builder builder (len, 1, 3);
-	poly_uint64 mask_elems[] = { 0, 0, 1 };
-	builder_push_elems (builder, mask_elems);
-
-	vec_perm_indices sel (builder, 2, len);
-	const char *reason;
-	tree res = fold_vec_perm_cst (TREE_TYPE (arg0), arg0, arg1, sel,
-				      &reason);
-	ASSERT_TRUE (res == NULL_TREE);
-	ASSERT_TRUE (!strcmp (reason, "step is not multiple of npatterns"));
-      }
-
-      /* Case 5: mask = {len, 0, 1, ...} // (1, 3)
-	 Test that stepped sequence of the pattern selects from arg0.
-	 res = { arg1[0], arg0[0], arg0[1], ... } // (1, 3)  */
-      {
-	tree arg0 = build_vec_cst_rand (vmode, 1, 3, 1);
-	tree arg1 = build_vec_cst_rand (vmode, 1, 3, 1);
-	poly_uint64 len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
-
-	vec_perm_builder builder (len, 1, 3);
-	poly_uint64 mask_elems[] = { len, 0, 1 };
-	builder_push_elems (builder, mask_elems);
-
-	vec_perm_indices sel (builder, 2, len);
-	tree res = fold_vec_perm_cst (TREE_TYPE (arg0), arg0, arg1, sel);
-
-	tree expected_res[] = { ARG1(0), ARG0(0), ARG0(1) };
-	validate_res (1, 3, res, expected_res);
-      }
     }
 }
 
@@ -17528,7 +17501,26 @@ test_nunits_min_4 (machine_mode vmode)
 	validate_res (1, 3, res, expected_res);
       }
 
-      /* Case 4:
+      /* Case 4: mask = {len, 1, 2, ...} // (1, 3)
+	 Test that stepped sequence of the pattern selects from arg0.
+	 res = { arg1[0], arg0[1], arg0[2], ... } // (1, 3)  */
+      {
+	tree arg0 = build_vec_cst_rand (vmode, 1, 3, 1);
+	tree arg1 = build_vec_cst_rand (vmode, 1, 3, 1);
+	poly_uint64 len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
+
+	vec_perm_builder builder (len, 1, 3);
+	poly_uint64 mask_elems[] = { len, 1, 2 };
+	builder_push_elems (builder, mask_elems);
+
+	vec_perm_indices sel (builder, 2, len);
+	tree res = fold_vec_perm_cst (TREE_TYPE (arg0), arg0, arg1, sel);
+
+	tree expected_res[] = { ARG1(0), ARG0(1), ARG0(2) };
+	validate_res (1, 3, res, expected_res);
+      }
+
+      /* Case 5:
 	sel = { len, 0, 2, ... } // (1, 3)
 	This should return NULL because we cross the input vectors.
 	Because,
@@ -17561,7 +17553,7 @@ test_nunits_min_4 (machine_mode vmode)
 	ASSERT_TRUE (!strcmp (reason, "crossed input vectors"));
       }
 
-      /* Case 5: npatterns(arg0) = 4 > npatterns(sel) = 2
+      /* Case 6: npatterns(arg0) = 4 > npatterns(sel) = 2
 	 mask = { 0, len, 1, len + 1, ...} // (2, 2)
 	 res = { arg0[0], arg1[0], arg0[1], arg1[1], ... } // (2, 2)
 
@@ -17583,7 +17575,7 @@ test_nunits_min_4 (machine_mode vmode)
 	validate_res (2, 2, res, expected_res);
       }
 
-      /* Case 6: Test combination in sel, where one pattern is dup and other
+      /* Case 7: Test combination in sel, where one pattern is dup and other
 	 is stepped sequence.
 	 sel = { 0, 0, 0, 1, 0, 2, ... } // (2, 3)
 	 res = { arg0[0], arg0[0], arg0[0],
@@ -17605,7 +17597,7 @@ test_nunits_min_4 (machine_mode vmode)
 	validate_res (2, 3, res, expected_res);
       }
 
-      /* Case 7: PR111048: Check that we set arg_npatterns correctly,
+      /* Case 8: PR111048: Check that we set arg_npatterns correctly,
 	 when arg0, arg1 and sel have different number of patterns.
 	 arg0 is of shape (1, 1)
 	 arg1 is of shape (4, 1)
@@ -17634,6 +17626,51 @@ test_nunits_min_4 (machine_mode vmode)
 	ASSERT_TRUE (res == NULL_TREE);
 	ASSERT_TRUE (!strcmp (reason, "step is not multiple of npatterns"));
       }
+
+      /* Case 9: PR111648 - a1 is multiple of vector length,
+	 which results in incorrect encoding. Verify that we return
+	 NULL for this case.
+	 sel = { base_elem, len, len+1, ... } // (1, 3)
+	 In this case, the single pattern is: { base_elem len, len+1, ...}
+	 Let's assume that base_elem is used for indexing into arg0,
+	 and a1 ... ae chooses elements from arg1.
+	 So res = { arg0[base_elem], arg1[0], arg1[1], ... } // (1, 3)
+	 Which creates an incorrect encoding with S = arg1[1] - arg1[0]
+	 while the original encoding in arg1 is
+	 arg1: { arg1[0], arg1[1], arg1[2], ... }
+	 with S = arg1[2] - arg1[1].
+
+	 As a concrete example, for above PR:
+	 arg0: { -16, -9, -10, -11 }
+	 arg1: { -12, -5, -6, -7 }
+	 sel = { 3, 4, 5, 6 }
+
+	 arg0, arg1 and sel are encoded with npatterns = 1 and nelts_per_pattern = 3.
+	 Since a1 = 4 and arg_len = 4, it ended up creating the result with
+	 following encoding:
+	 res = { arg0[3], arg1[0], arg1[1] } // (1, 3)
+	     = { -11, -12, -5 }
+
+	 So for res[3], it used S = (-5) - (-12) = 7
+	 And hence computed it as -5 + 7 = 2.
+	 instead of arg1[2], ie, -6, which is the correct value.
+	 Ensure that valid_mask_for_fold_vec_perm_cst returns false for this case.  */
+      {
+	tree arg0 = build_vec_cst_rand (vmode, 1, 3);
+	tree arg1 = build_vec_cst_rand (vmode, 1, 3);
+	poly_uint64 len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
+
+	vec_perm_builder builder (len, 1, 3);
+	poly_uint64 mask_elems[] = { 0, len, len+1 };
+	builder_push_elems (builder, mask_elems);
+
+	vec_perm_indices sel (builder, 2, len);
+	const char *reason;
+	tree res = fold_vec_perm_cst (TREE_TYPE (arg0), arg0, arg1, sel, &reason);
+	ASSERT_TRUE (res == NULL_TREE);
+	ASSERT_TRUE (!strcmp (reason,
+			      "selecting base element of input vector"));
+      }
     }
 }
 
diff --git a/gcc/testsuite/gcc.dg/vect/pr111648.c b/gcc/testsuite/gcc.dg/vect/pr111648.c
new file mode 100644
index 00000000000..093e2b02654
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr111648.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+int a;
+int *b = &a;
+static int **c = &b;
+static int d;
+short e;
+short f;
+
+_Bool foo ()
+{
+  f = -21;
+  for (; f < -5; f++) {
+    e = f ^ 3;
+    d = *b;
+    **c = e;
+  }
+
+  return d == -6;
+}
+
+/* { dg-final { scan-tree-dump "return 1" "optimized" } } */

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2023-10-18 19:05 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-10-04 10:16 PR111648: Fix wrong code-gen due to incorrect VEC_PERM_EXPR folding Prathamesh Kulkarni
2023-10-09 11:35 ` Richard Sandiford
2023-10-11 11:12   ` Prathamesh Kulkarni
2023-10-11 11:27     ` Prathamesh Kulkarni
2023-10-12  9:54       ` Prathamesh Kulkarni
2023-10-16 21:10         ` Richard Sandiford
2023-10-17 18:53           ` Prathamesh Kulkarni
2023-10-18 17:52             ` Richard Sandiford
2023-10-18 19:04               ` Prathamesh Kulkarni

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).