diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc index 4f4ec81c8d4..5e12260211e 100644 --- a/gcc/fold-const.cc +++ b/gcc/fold-const.cc @@ -85,6 +85,9 @@ along with GCC; see the file COPYING3. If not see #include "vec-perm-indices.h" #include "asan.h" #include "gimple-range.h" +#include "tree-pretty-print.h" +#include "gimple-pretty-print.h" +#include "print-tree.h" /* Nonzero if we are folding constants inside an initializer or a C++ manifestly-constant-evaluated context; zero otherwise. @@ -10496,40 +10499,6 @@ fold_mult_zconjz (location_t loc, tree type, tree expr) build_zero_cst (itype)); } - -/* Helper function for fold_vec_perm. Store elements of VECTOR_CST or - CONSTRUCTOR ARG into array ELTS, which has NELTS elements, and return - true if successful. */ - -static bool -vec_cst_ctor_to_array (tree arg, unsigned int nelts, tree *elts) -{ - unsigned HOST_WIDE_INT i, nunits; - - if (TREE_CODE (arg) == VECTOR_CST - && VECTOR_CST_NELTS (arg).is_constant (&nunits)) - { - for (i = 0; i < nunits; ++i) - elts[i] = VECTOR_CST_ELT (arg, i); - } - else if (TREE_CODE (arg) == CONSTRUCTOR) - { - constructor_elt *elt; - - FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (arg), i, elt) - if (i >= nelts || TREE_CODE (TREE_TYPE (elt->value)) == VECTOR_TYPE) - return false; - else - elts[i] = elt->value; - } - else - return false; - for (; i < nelts; i++) - elts[i] - = fold_convert (TREE_TYPE (TREE_TYPE (arg)), integer_zero_node); - return true; -} - /* Attempt to fold vector permutation of ARG0 and ARG1 vectors using SEL selector. Return the folded VECTOR_CST or CONSTRUCTOR if successful, NULL_TREE otherwise. */ @@ -10537,45 +10506,149 @@ vec_cst_ctor_to_array (tree arg, unsigned int nelts, tree *elts) tree fold_vec_perm (tree type, tree arg0, tree arg1, const vec_perm_indices &sel) { - unsigned int i; - unsigned HOST_WIDE_INT nelts; - bool need_ctor = false; + poly_uint64 arg0_len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)); + poly_uint64 arg1_len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg1)); + + gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (type), + sel.length ())); + gcc_assert (known_eq (arg0_len, arg1_len)); - if (!sel.length ().is_constant (&nelts)) - return NULL_TREE; - gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (type), nelts) - && known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)), nelts) - && known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg1)), nelts)); if (TREE_TYPE (TREE_TYPE (arg0)) != TREE_TYPE (type) || TREE_TYPE (TREE_TYPE (arg1)) != TREE_TYPE (type)) return NULL_TREE; - tree *in_elts = XALLOCAVEC (tree, nelts * 2); - if (!vec_cst_ctor_to_array (arg0, nelts, in_elts) - || !vec_cst_ctor_to_array (arg1, nelts, in_elts + nelts)) + unsigned input_npatterns = 0; + unsigned out_npatterns = sel.encoding ().npatterns (); + unsigned out_nelts_per_pattern = sel.encoding ().nelts_per_pattern (); + + /* FIXME: How to reshape fixed length vector_cst, so that + npatterns == vector.length () and nelts_per_pattern == 1 ? + It seems the vector is canonicalized to minimize npatterns. */ + + if (arg0_len.is_constant ()) + { + /* If arg0, arg1 are fixed width vectors, and sel is VLA, + ensure that it is a dup sequence and has same period + as input vector. */ + + if (!sel.length ().is_constant () + && (sel.encoding ().nelts_per_pattern () > 2 + || !known_eq (arg0_len, sel.encoding ().npatterns ()))) + return NULL_TREE; + + input_npatterns = arg0_len.to_constant (); + + if (sel.length ().is_constant ()) + { + out_npatterns = sel.length ().to_constant (); + out_nelts_per_pattern = 1; + } + } + else if (TREE_CODE (arg0) == VECTOR_CST + && TREE_CODE (arg1) == VECTOR_CST) + { + unsigned npatterns = VECTOR_CST_NPATTERNS (arg0); + unsigned input_nelts_per_pattern = VECTOR_CST_NELTS_PER_PATTERN (arg0); + + /* If arg0, arg1 are VLA, then ensure that, + (a) sel also has same length as input vectors. + (b) arg0 and arg1 have same encoding. + (c) sel has same number of patterns as input vectors. + (d) if sel is a stepped sequence, then it has same + encoding as input vectors. */ + + if (!known_eq (arg0_len, sel.length ()) + || npatterns != VECTOR_CST_NPATTERNS (arg1) + || input_nelts_per_pattern != VECTOR_CST_NELTS_PER_PATTERN (arg1) + || npatterns != sel.encoding ().npatterns () + || (sel.encoding ().nelts_per_pattern () > 2 + && sel.encoding ().nelts_per_pattern () != input_nelts_per_pattern)) + return NULL_TREE; + + input_npatterns = npatterns; + } + else return NULL_TREE; - tree_vector_builder out_elts (type, nelts, 1); - for (i = 0; i < nelts; i++) + tree_vector_builder out_elts_builder (type, out_npatterns, + out_nelts_per_pattern); + bool need_ctor = false; + unsigned out_encoded_nelts = out_npatterns * out_nelts_per_pattern; + + for (unsigned i = 0; i < out_encoded_nelts; i++) { - HOST_WIDE_INT index; - if (!sel[i].is_constant (&index)) + HOST_WIDE_INT sel_index; + if (!sel[i].is_constant (&sel_index)) return NULL_TREE; - if (!CONSTANT_CLASS_P (in_elts[index])) - need_ctor = true; - out_elts.quick_push (unshare_expr (in_elts[index])); + + /* Convert sel_index to index of either arg0 or arg1. + For eg: + arg0: {a0, b0, a1, b1, a1 + S, b1 + S, ...} + arg1: {c0, d0, c1, d1, c1 + S, d1 + S, ...} + Both have npatterns == 2, nelts_per_pattern == 3. + Then the combined vector would be: + {a0, b0, c0, d0, a1, b1, c1, d1, a1 + S, b1 + S, c1 + S, d1 + S, ... } + This combined vector will have, + npatterns = 2 * input_npatterns == 4. + sel_index is used to index this above combined vector. + + Since we don't explicitly build the combined vector, we convert + sel_index to corresponding index for either arg0 or arg1. + For eg, if sel_index == 7, + pattern = 7 % 4 == 3. + Since pattern > input_npatterns, the elem will come from: + pattern = 3 - input_npatterns ie, pattern 1 from arg1. + elem_index_in_pattern = 7 / 4 == 1. + So the actual index of the element in arg1 would be: 1 + (1 * 2) == 3. + So, sel_index == 7 corresponds to arg1[3], ie, d1. */ + + unsigned pattern = sel_index % (2 * input_npatterns); + unsigned elem_index_in_pattern = sel_index / (2 * input_npatterns); + tree arg; + if (pattern < input_npatterns) + arg = arg0; + else + { + arg = arg1; + pattern -= input_npatterns; + } + + unsigned elem_index = (elem_index_in_pattern * input_npatterns) + pattern; + tree elem; + if (TREE_CODE (arg) == VECTOR_CST) + { + /* If arg is fixed width vector, and elem_index goes out of range, + then return NULL_TREE. */ + if (TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg)).is_constant () + && elem_index > vector_cst_encoded_nelts (arg)) + return NULL_TREE; + elem = vector_cst_elt (arg, elem_index); + } + else + { + gcc_assert (TREE_CODE (arg) == CONSTRUCTOR); + if (elem_index >= CONSTRUCTOR_NELTS (arg)) + return NULL_TREE; + elem = CONSTRUCTOR_ELT (arg, elem_index)->value; + if (VECTOR_TYPE_P (TREE_TYPE (elem))) + return NULL_TREE; + need_ctor = true; + } + + out_elts_builder.quick_push (unshare_expr (elem)); } if (need_ctor) { vec *v; - vec_alloc (v, nelts); - for (i = 0; i < nelts; i++) - CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, out_elts[i]); + vec_alloc (v, out_encoded_nelts); + + for (unsigned i = 0; i < out_encoded_nelts; i++) + CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, out_elts_builder[i]); return build_constructor (type, v); } - else - return out_elts.build (); + + return out_elts_builder.build (); } /* Try to fold a pointer difference of type TYPE two address expressions of @@ -16912,6 +16985,91 @@ test_vec_duplicate_folding () ASSERT_TRUE (operand_equal_p (dup5_expr, dup5_cst, 0)); } +static tree +build_vec_int_cst (unsigned npatterns, unsigned nelts_per_pattern, + int *encoded_elems) +{ + scalar_int_mode int_mode = SCALAR_INT_TYPE_MODE (integer_type_node); + machine_mode vmode = targetm.vectorize.preferred_simd_mode (int_mode); + poly_uint64 nunits = GET_MODE_NUNITS (vmode); + tree vectype = build_vector_type (integer_type_node, nunits); + + tree_vector_builder builder (vectype, npatterns, nelts_per_pattern); + for (unsigned i = 0; i < npatterns * nelts_per_pattern; i++) + builder.quick_push (build_int_cst (integer_type_node, encoded_elems[i])); + return builder.build (); +} + +static void +vpe_verify_res (tree res, unsigned npatterns, unsigned nelts_per_pattern, + int *encoded_elems) +{ + ASSERT_TRUE (res != NULL_TREE); + ASSERT_TRUE (VECTOR_CST_NPATTERNS (res) == npatterns); + ASSERT_TRUE (VECTOR_CST_NELTS_PER_PATTERN (res) == nelts_per_pattern); + + for (unsigned i = 0; i < npatterns * nelts_per_pattern; i++) + ASSERT_TRUE (wi::to_wide (VECTOR_CST_ELT (res, i)) + == encoded_elems[i]); +} + +static void +test_vec_perm_vla_folding () +{ + /* For all cases + arg0: {1, 11, 21, 31, 2, 12, 22, 32, 3, 13, 23, 33, ...}, npatterns == 4, nelts_per_pattern == 3. + arg1: {41, 51, 61, 71, 42, 52, 62, 72, 43, 53, 63, 73 ...}, npatterns == 4, nelts_per_pattern == 3. */ + + int arg0_elems[] = { 1, 11, 21, 31, 2, 12, 22, 32, 3, 13, 23, 33 }; + tree arg0 = build_vec_int_cst (4, 3, arg0_elems); + + int arg1_elems[] = { 41, 51, 61, 71, 42, 52, 62, 72, 43, 53, 63, 73 }; + tree arg1 = build_vec_int_cst (4, 3, arg1_elems); + + if (TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)).is_constant () + || TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg1)).is_constant ()) + return; + + /* Case 1: Dup mask sequence. + mask = {0, 9, 3, 11, ...}, npatterns == 4, nelts_per_pattern == 1. + expected result: {1, 21, 31, 32, ...}, npatterns == 4, nelts_per_pattern == 1. */ + { + int mask_elems[] = {0, 9, 3, 12}; + tree mask = build_vec_int_cst (4, 1, mask_elems); + if (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask)).is_constant ()) + return; + tree res = fold_ternary (VEC_PERM_EXPR, TREE_TYPE (arg0), arg0, arg1, mask); + int res_encoded_elems[] = {1, 12, 31, 42}; + vpe_verify_res (res, 4, 1, res_encoded_elems); + } + + /* Case 2: + mask = {0, 4, 1, 5, 8, 12, 9, 13 ...}, npatterns == 4, nelts_per_pattern == 2. + expected result: {1, 41, 11, 51, 2, 12, 42, 52, ...}, npatterns == 4, nelts_per_pattern == 2. */ + { + int mask_elems[] = {0, 4, 1, 5, 8, 12, 9, 13}; + tree mask = build_vec_int_cst (4, 2, mask_elems); + if (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask)).is_constant ()) + return; + tree res = fold_ternary (VEC_PERM_EXPR, TREE_TYPE (arg0), arg0, arg1, mask); + int res_encoded_elems[] = {1, 41, 11, 51, 2, 42, 12, 52}; + vpe_verify_res (res, 4, 2, res_encoded_elems); + } + + /* Case 3: Stepped mask sequence. + mask = {0, 4, 1, 5, 8, 12, 9, 13, 16, 20, 17, 21}, npatterns == 4, nelts_per_pattern == 3. + expected result = {1, 41, 11, 51, 2, 42, 12, 52, 3, 43, 13, 53 ...}, npatterns == 4, nelts_per_pattern == 3. */ + { + int mask_elems[] = {0, 4, 1, 5, 8, 12, 9, 13, 16, 20, 17, 21}; + tree mask = build_vec_int_cst (4, 3, mask_elems); + if (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask)).is_constant ()) + return; + tree res = fold_ternary (VEC_PERM_EXPR, TREE_TYPE (arg0), arg0, arg1, mask); + int res_encoded_elems[] = {1, 41, 11, 51, 2, 42, 12, 52, 3, 43, 13, 53}; + vpe_verify_res (res, 4, 3, res_encoded_elems); + } +} + /* Run all of the selftests within this file. */ void @@ -16920,6 +17078,7 @@ fold_const_cc_tests () test_arithmetic_folding (); test_vector_folding (); test_vec_duplicate_folding (); + test_vec_perm_vla_folding (); } } // namespace selftest