public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* Extend fold_vec_perm to fold VEC_PERM_EXPR in VLA manner
@ 2022-08-17 12:39 Prathamesh Kulkarni
  2022-08-29  6:08 ` Prathamesh Kulkarni
  2022-09-05 10:21 ` Richard Sandiford
  0 siblings, 2 replies; 29+ messages in thread
From: Prathamesh Kulkarni @ 2022-08-17 12:39 UTC (permalink / raw)
  To: gcc Patches, Richard Biener, Richard Sandiford

[-- Attachment #1: Type: text/plain, Size: 671 bytes --]

Hi,
The attached prototype patch extends fold_vec_perm to fold VEC_PERM_EXPR
in VLA manner, and currently handles the following cases:
(a) fixed len arg0, arg1 and fixed len sel.
(b) fixed len arg0, arg1 and vla sel
(c) vla arg0, arg1 and vla sel with arg0, arg1 being VECTOR_CST.

It seems to work for the VLA tests written in
test_vec_perm_vla_folding (), and am working thru the fallout observed in
regression testing.

Does the approach taken in the patch look in the right direction ?
I am not sure if I have got the conversion from "sel_index"
to index of either arg0, or arg1 entirely correct.
I would be grateful for suggestions on the patch.

Thanks,
Prathamesh

[-- Attachment #2: fold-vec-perm-1.txt --]
[-- Type: text/plain, Size: 11841 bytes --]

diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc
index 4f4ec81c8d4..5e12260211e 100644
--- a/gcc/fold-const.cc
+++ b/gcc/fold-const.cc
@@ -85,6 +85,9 @@ along with GCC; see the file COPYING3.  If not see
 #include "vec-perm-indices.h"
 #include "asan.h"
 #include "gimple-range.h"
+#include "tree-pretty-print.h"
+#include "gimple-pretty-print.h"
+#include "print-tree.h"
 
 /* Nonzero if we are folding constants inside an initializer or a C++
    manifestly-constant-evaluated context; zero otherwise.
@@ -10496,40 +10499,6 @@ fold_mult_zconjz (location_t loc, tree type, tree expr)
 			  build_zero_cst (itype));
 }
 
-
-/* Helper function for fold_vec_perm.  Store elements of VECTOR_CST or
-   CONSTRUCTOR ARG into array ELTS, which has NELTS elements, and return
-   true if successful.  */
-
-static bool
-vec_cst_ctor_to_array (tree arg, unsigned int nelts, tree *elts)
-{
-  unsigned HOST_WIDE_INT i, nunits;
-
-  if (TREE_CODE (arg) == VECTOR_CST
-      && VECTOR_CST_NELTS (arg).is_constant (&nunits))
-    {
-      for (i = 0; i < nunits; ++i)
-	elts[i] = VECTOR_CST_ELT (arg, i);
-    }
-  else if (TREE_CODE (arg) == CONSTRUCTOR)
-    {
-      constructor_elt *elt;
-
-      FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (arg), i, elt)
-	if (i >= nelts || TREE_CODE (TREE_TYPE (elt->value)) == VECTOR_TYPE)
-	  return false;
-	else
-	  elts[i] = elt->value;
-    }
-  else
-    return false;
-  for (; i < nelts; i++)
-    elts[i]
-      = fold_convert (TREE_TYPE (TREE_TYPE (arg)), integer_zero_node);
-  return true;
-}
-
 /* Attempt to fold vector permutation of ARG0 and ARG1 vectors using SEL
    selector.  Return the folded VECTOR_CST or CONSTRUCTOR if successful,
    NULL_TREE otherwise.  */
@@ -10537,45 +10506,149 @@ vec_cst_ctor_to_array (tree arg, unsigned int nelts, tree *elts)
 tree
 fold_vec_perm (tree type, tree arg0, tree arg1, const vec_perm_indices &sel)
 {
-  unsigned int i;
-  unsigned HOST_WIDE_INT nelts;
-  bool need_ctor = false;
+  poly_uint64 arg0_len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
+  poly_uint64 arg1_len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg1));
+
+  gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (type),
+			sel.length ()));
+  gcc_assert (known_eq (arg0_len, arg1_len));
 
-  if (!sel.length ().is_constant (&nelts))
-    return NULL_TREE;
-  gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (type), nelts)
-	      && known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)), nelts)
-	      && known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg1)), nelts));
   if (TREE_TYPE (TREE_TYPE (arg0)) != TREE_TYPE (type)
       || TREE_TYPE (TREE_TYPE (arg1)) != TREE_TYPE (type))
     return NULL_TREE;
 
-  tree *in_elts = XALLOCAVEC (tree, nelts * 2);
-  if (!vec_cst_ctor_to_array (arg0, nelts, in_elts)
-      || !vec_cst_ctor_to_array (arg1, nelts, in_elts + nelts))
+  unsigned input_npatterns = 0;
+  unsigned out_npatterns = sel.encoding ().npatterns ();
+  unsigned out_nelts_per_pattern = sel.encoding ().nelts_per_pattern ();
+
+  /* FIXME: How to reshape fixed length vector_cst, so that
+     npatterns == vector.length () and nelts_per_pattern == 1 ?
+     It seems the vector is canonicalized to minimize npatterns.  */
+
+  if (arg0_len.is_constant ())
+    {
+      /* If arg0, arg1 are fixed width vectors, and sel is VLA,
+         ensure that it is a dup sequence and has same period
+	 as input vector.  */
+
+      if (!sel.length ().is_constant ()
+	  && (sel.encoding ().nelts_per_pattern () > 2
+	      || !known_eq (arg0_len, sel.encoding ().npatterns ())))
+	return NULL_TREE;
+
+      input_npatterns = arg0_len.to_constant ();
+
+      if (sel.length ().is_constant ())
+	{
+	  out_npatterns = sel.length ().to_constant ();
+	  out_nelts_per_pattern = 1;
+	}
+    }
+  else if (TREE_CODE (arg0) == VECTOR_CST
+	   && TREE_CODE (arg1) == VECTOR_CST)
+    {
+      unsigned npatterns = VECTOR_CST_NPATTERNS (arg0);
+      unsigned input_nelts_per_pattern = VECTOR_CST_NELTS_PER_PATTERN (arg0);
+
+      /* If arg0, arg1 are VLA, then ensure that,
+	 (a) sel also has same length as input vectors.
+	 (b) arg0 and arg1 have same encoding.
+	 (c) sel has same number of patterns as input vectors.
+	 (d) if sel is a stepped sequence, then it has same
+	     encoding as input vectors.  */
+
+      if (!known_eq (arg0_len, sel.length ())
+	  || npatterns != VECTOR_CST_NPATTERNS (arg1)
+	  || input_nelts_per_pattern != VECTOR_CST_NELTS_PER_PATTERN (arg1)
+	  || npatterns != sel.encoding ().npatterns ()
+	  || (sel.encoding ().nelts_per_pattern () > 2
+	      && sel.encoding ().nelts_per_pattern () != input_nelts_per_pattern))
+	return NULL_TREE;
+
+      input_npatterns = npatterns;
+    }
+  else
     return NULL_TREE;
 
-  tree_vector_builder out_elts (type, nelts, 1);
-  for (i = 0; i < nelts; i++)
+  tree_vector_builder out_elts_builder (type, out_npatterns,
+					out_nelts_per_pattern);
+  bool need_ctor = false;
+  unsigned out_encoded_nelts = out_npatterns * out_nelts_per_pattern;
+
+  for (unsigned i = 0; i < out_encoded_nelts; i++)
     {
-      HOST_WIDE_INT index;
-      if (!sel[i].is_constant (&index))
+      HOST_WIDE_INT sel_index;
+      if (!sel[i].is_constant (&sel_index))
 	return NULL_TREE;
-      if (!CONSTANT_CLASS_P (in_elts[index]))
-	need_ctor = true;
-      out_elts.quick_push (unshare_expr (in_elts[index]));
+
+      /* Convert sel_index to index of either arg0 or arg1.
+	 For eg:
+	 arg0: {a0, b0, a1, b1, a1 + S, b1 + S, ...}
+	 arg1: {c0, d0, c1, d1, c1 + S, d1 + S, ...}
+	 Both have npatterns == 2, nelts_per_pattern == 3.
+	 Then the combined vector would be:
+	 {a0, b0, c0, d0, a1, b1, c1, d1, a1 + S, b1 + S, c1 + S, d1 + S, ... }
+	 This combined vector will have,
+	 npatterns = 2 * input_npatterns == 4.
+	 sel_index is used to index this above combined vector.
+
+	 Since we don't explicitly build the combined vector, we convert
+	 sel_index to corresponding index for either arg0 or arg1.
+	 For eg, if sel_index == 7,
+	 pattern = 7 % 4 == 3.
+	 Since pattern > input_npatterns, the elem will come from:
+	 pattern = 3 - input_npatterns ie, pattern 1 from arg1.
+	 elem_index_in_pattern = 7 / 4 == 1.
+	 So the actual index of the element in arg1 would be: 1 + (1 * 2) == 3.
+	 So, sel_index == 7 corresponds to arg1[3], ie, d1.  */
+
+      unsigned pattern = sel_index % (2 * input_npatterns);
+      unsigned elem_index_in_pattern = sel_index / (2 * input_npatterns);
+      tree arg;
+      if (pattern < input_npatterns)
+	arg = arg0;
+      else
+	{
+	  arg = arg1;
+	  pattern -= input_npatterns;
+	}
+
+      unsigned elem_index = (elem_index_in_pattern * input_npatterns) + pattern;
+      tree elem;
+      if (TREE_CODE (arg) == VECTOR_CST)
+	{
+	  /* If arg is fixed width vector, and elem_index goes out of range,
+	     then return NULL_TREE.  */
+	  if (TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg)).is_constant ()
+	      && elem_index > vector_cst_encoded_nelts (arg))
+	    return NULL_TREE;
+	  elem = vector_cst_elt (arg, elem_index);
+	}
+      else
+	{
+	  gcc_assert (TREE_CODE (arg) == CONSTRUCTOR);
+	  if (elem_index >= CONSTRUCTOR_NELTS (arg))
+	    return NULL_TREE;
+	  elem = CONSTRUCTOR_ELT (arg, elem_index)->value;
+	  if (VECTOR_TYPE_P (TREE_TYPE (elem)))
+	    return NULL_TREE;
+	  need_ctor = true;
+	}
+
+      out_elts_builder.quick_push (unshare_expr (elem));
     }
 
   if (need_ctor)
     {
       vec<constructor_elt, va_gc> *v;
-      vec_alloc (v, nelts);
-      for (i = 0; i < nelts; i++)
-	CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, out_elts[i]);
+      vec_alloc (v, out_encoded_nelts);
+
+      for (unsigned i = 0; i < out_encoded_nelts; i++)
+	CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, out_elts_builder[i]);
       return build_constructor (type, v);
     }
-  else
-    return out_elts.build ();
+
+  return out_elts_builder.build ();
 }
 
 /* Try to fold a pointer difference of type TYPE two address expressions of
@@ -16912,6 +16985,91 @@ test_vec_duplicate_folding ()
   ASSERT_TRUE (operand_equal_p (dup5_expr, dup5_cst, 0));
 }
 
+static tree
+build_vec_int_cst (unsigned npatterns, unsigned nelts_per_pattern,
+		   int *encoded_elems)
+{
+  scalar_int_mode int_mode = SCALAR_INT_TYPE_MODE (integer_type_node);
+  machine_mode vmode = targetm.vectorize.preferred_simd_mode (int_mode);
+  poly_uint64 nunits = GET_MODE_NUNITS (vmode);
+  tree vectype = build_vector_type (integer_type_node, nunits);
+
+  tree_vector_builder builder (vectype, npatterns, nelts_per_pattern);
+  for (unsigned i = 0; i < npatterns * nelts_per_pattern; i++)
+    builder.quick_push (build_int_cst (integer_type_node, encoded_elems[i]));
+  return builder.build ();
+}
+
+static void
+vpe_verify_res (tree res, unsigned npatterns, unsigned nelts_per_pattern,
+		int *encoded_elems)
+{
+  ASSERT_TRUE (res != NULL_TREE);
+  ASSERT_TRUE (VECTOR_CST_NPATTERNS (res) == npatterns);
+  ASSERT_TRUE (VECTOR_CST_NELTS_PER_PATTERN (res) == nelts_per_pattern);
+
+  for (unsigned i = 0; i < npatterns * nelts_per_pattern; i++)
+    ASSERT_TRUE (wi::to_wide (VECTOR_CST_ELT (res, i))
+			      == encoded_elems[i]);
+}
+
+static void
+test_vec_perm_vla_folding ()
+{
+  /* For all cases
+     arg0: {1, 11, 21, 31, 2, 12, 22, 32, 3, 13, 23, 33, ...}, npatterns == 4, nelts_per_pattern == 3.
+     arg1: {41, 51, 61, 71, 42, 52, 62, 72, 43, 53, 63, 73 ...}, npatterns == 4, nelts_per_pattern == 3.  */
+
+  int arg0_elems[] = { 1, 11, 21, 31, 2, 12, 22, 32, 3, 13, 23, 33 };
+  tree arg0 = build_vec_int_cst (4, 3, arg0_elems);
+
+  int arg1_elems[] = { 41, 51, 61, 71, 42, 52, 62, 72, 43, 53, 63, 73 };
+  tree arg1 = build_vec_int_cst (4, 3, arg1_elems);
+
+  if (TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)).is_constant ()
+      || TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg1)).is_constant ())
+    return;
+
+  /* Case 1: Dup mask sequence.
+     mask = {0, 9, 3, 11, ...}, npatterns == 4, nelts_per_pattern == 1.
+     expected result: {1, 21, 31, 32, ...}, npatterns == 4, nelts_per_pattern == 1.  */
+  {
+    int mask_elems[] = {0, 9, 3, 12};
+    tree mask = build_vec_int_cst (4, 1, mask_elems);
+    if (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask)).is_constant ())
+      return;
+    tree res = fold_ternary (VEC_PERM_EXPR, TREE_TYPE (arg0), arg0, arg1, mask);
+    int res_encoded_elems[] = {1, 12, 31, 42};
+    vpe_verify_res (res, 4, 1, res_encoded_elems);
+  }
+
+  /* Case 2:
+     mask = {0, 4, 1, 5, 8, 12, 9, 13 ...}, npatterns == 4, nelts_per_pattern == 2.
+     expected result: {1, 41, 11, 51, 2, 12, 42, 52, ...}, npatterns == 4, nelts_per_pattern == 2.  */
+  {
+    int mask_elems[] = {0, 4, 1, 5, 8, 12, 9, 13};
+    tree mask = build_vec_int_cst (4, 2, mask_elems);
+    if (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask)).is_constant ())
+      return;
+    tree res = fold_ternary (VEC_PERM_EXPR, TREE_TYPE (arg0), arg0, arg1, mask);
+    int res_encoded_elems[] = {1, 41, 11, 51, 2, 42, 12, 52};
+    vpe_verify_res (res, 4, 2, res_encoded_elems);
+  }
+
+  /* Case 3: Stepped mask sequence.
+     mask = {0, 4, 1, 5, 8, 12, 9, 13, 16, 20, 17, 21}, npatterns == 4, nelts_per_pattern == 3.
+     expected result = {1, 41, 11, 51, 2, 42, 12, 52, 3, 43, 13, 53 ...}, npatterns == 4, nelts_per_pattern == 3.  */
+  {
+    int mask_elems[] = {0, 4, 1, 5, 8, 12, 9, 13, 16, 20, 17, 21};
+    tree mask = build_vec_int_cst (4, 3, mask_elems);
+    if (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask)).is_constant ())
+      return;
+    tree res = fold_ternary (VEC_PERM_EXPR, TREE_TYPE (arg0), arg0, arg1, mask);
+    int res_encoded_elems[] = {1, 41, 11, 51, 2, 42, 12, 52, 3, 43, 13, 53};
+    vpe_verify_res (res, 4, 3, res_encoded_elems);
+  }
+}
+
 /* Run all of the selftests within this file.  */
 
 void
@@ -16920,6 +17078,7 @@ fold_const_cc_tests ()
   test_arithmetic_folding ();
   test_vector_folding ();
   test_vec_duplicate_folding ();
+  test_vec_perm_vla_folding ();
 }
 
 } // namespace selftest

^ permalink raw reply	[flat|nested] 29+ messages in thread

end of thread, other threads:[~2023-02-01 10:02 UTC | newest]

Thread overview: 29+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-08-17 12:39 Extend fold_vec_perm to fold VEC_PERM_EXPR in VLA manner Prathamesh Kulkarni
2022-08-29  6:08 ` Prathamesh Kulkarni
2022-09-05  8:53   ` Prathamesh Kulkarni
2022-09-05 10:21 ` Richard Sandiford
2022-09-09 13:59   ` Prathamesh Kulkarni
2022-09-12 14:27     ` Richard Sandiford
2022-09-15 12:26       ` Prathamesh Kulkarni
2022-09-20 12:39         ` Richard Sandiford
2022-09-23 11:59           ` Prathamesh Kulkarni
2022-09-23 16:03             ` Richard Sandiford
2022-09-26 19:33               ` Prathamesh Kulkarni
2022-09-26 20:29                 ` Richard Sandiford
2022-09-30 14:41                   ` Prathamesh Kulkarni
2022-09-30 16:00                     ` Richard Sandiford
2022-09-30 16:08                       ` Richard Sandiford
2022-10-10 10:48                         ` Prathamesh Kulkarni
2022-10-17 10:32                           ` Prathamesh Kulkarni
2022-10-24  8:12                             ` Prathamesh Kulkarni
2022-10-26 15:37                           ` Richard Sandiford
2022-10-28 14:46                             ` Prathamesh Kulkarni
2022-10-31  9:57                               ` Richard Sandiford
2022-11-04  8:30                                 ` Prathamesh Kulkarni
2022-11-21  9:07                                   ` Prathamesh Kulkarni
2022-11-28 11:44                                     ` Prathamesh Kulkarni
2022-12-06 15:30                                     ` Richard Sandiford
2022-12-13  6:05                                       ` Prathamesh Kulkarni
2022-12-26  4:26                                         ` Prathamesh Kulkarni
2023-01-17 11:54                                           ` Prathamesh Kulkarni
2023-02-01 10:01                                             ` Prathamesh Kulkarni

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).