diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc index 9f7beae14e5..2f45979d4ac 100644 --- a/gcc/fold-const.cc +++ b/gcc/fold-const.cc @@ -85,6 +85,9 @@ along with GCC; see the file COPYING3. If not see #include "vec-perm-indices.h" #include "asan.h" #include "gimple-range.h" +#include +#include "tree-pretty-print.h" +#include "print-tree.h" /* Nonzero if we are folding constants inside an initializer or a C++ manifestly-constant-evaluated context; zero otherwise. @@ -10494,38 +10497,55 @@ fold_mult_zconjz (location_t loc, tree type, tree expr) build_zero_cst (itype)); } +/* Check if PATTERN in SEL selects either ARG0 or ARG1, + and return the selected arg, otherwise return NULL_TREE. */ -/* Helper function for fold_vec_perm. Store elements of VECTOR_CST or - CONSTRUCTOR ARG into array ELTS, which has NELTS elements, and return - true if successful. */ - -static bool -vec_cst_ctor_to_array (tree arg, unsigned int nelts, tree *elts) +static tree +get_vector_for_pattern (tree arg0, tree arg1, + const vec_perm_indices &sel, unsigned pattern, + unsigned sel_npatterns, int &S) { - unsigned HOST_WIDE_INT i, nunits; + unsigned sel_nelts_per_pattern = sel.encoding ().nelts_per_pattern (); - if (TREE_CODE (arg) == VECTOR_CST - && VECTOR_CST_NELTS (arg).is_constant (&nunits)) + poly_uint64 n1 = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)); + poly_uint64 nsel = sel.length (); + poly_uint64 esel; + + if (!multiple_p (nsel, sel_npatterns, &esel)) + return NULL_TREE; + + poly_uint64 a1 = sel[pattern + sel_npatterns]; + S = 0; + if (sel_nelts_per_pattern == 3) { - for (i = 0; i < nunits; ++i) - elts[i] = VECTOR_CST_ELT (arg, i); + poly_uint64 a2 = sel[pattern + 2 * sel_npatterns]; + S = (a2 - a1).to_constant (); + if (S != 0 && !pow2p_hwi (S)) + return NULL_TREE; } - else if (TREE_CODE (arg) == CONSTRUCTOR) + + poly_uint64 ae = a1 + (esel - 2) * S; + uint64_t q1, qe; + poly_uint64 r1, re; + + if (!can_div_trunc_p (a1, n1, &q1, &r1) + || !can_div_trunc_p (ae, n1, &qe, &re) + || (q1 != qe)) + return NULL_TREE; + + tree arg = ((q1 & 1) == 0) ? arg0 : arg1; + + if (S < 0) { - constructor_elt *elt; + poly_uint64 a0 = sel[pattern]; + if (!known_eq (S, a1 - a0)) + return NULL_TREE; - FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (arg), i, elt) - if (i >= nelts || TREE_CODE (TREE_TYPE (elt->value)) == VECTOR_TYPE) - return false; - else - elts[i] = elt->value; + if (!known_gt (re, VECTOR_CST_NPATTERNS (arg))) + return NULL_TREE; } - else - return false; - for (; i < nelts; i++) - elts[i] - = fold_convert (TREE_TYPE (TREE_TYPE (arg)), integer_zero_node); - return true; + + return arg; } /* Attempt to fold vector permutation of ARG0 and ARG1 vectors using SEL @@ -10539,41 +10559,135 @@ fold_vec_perm (tree type, tree arg0, tree arg1, const vec_perm_indices &sel) unsigned HOST_WIDE_INT nelts; bool need_ctor = false; - if (!sel.length ().is_constant (&nelts)) - return NULL_TREE; - gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (type), nelts) - && known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)), nelts) - && known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg1)), nelts)); + gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (type), sel.length ()) + && known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)), + TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg1)))); if (TREE_TYPE (TREE_TYPE (arg0)) != TREE_TYPE (type) || TREE_TYPE (TREE_TYPE (arg1)) != TREE_TYPE (type)) return NULL_TREE; - tree *in_elts = XALLOCAVEC (tree, nelts * 2); - if (!vec_cst_ctor_to_array (arg0, nelts, in_elts) - || !vec_cst_ctor_to_array (arg1, nelts, in_elts + nelts)) + unsigned res_npatterns = 0; + unsigned res_nelts_per_pattern = 0; + unsigned sel_npatterns = 0; + tree *vector_for_pattern = NULL; + + if (TREE_CODE (arg0) == VECTOR_CST + && TREE_CODE (arg1) == VECTOR_CST + && !sel.length ().is_constant ()) + { + unsigned arg0_npatterns = VECTOR_CST_NPATTERNS (arg0); + unsigned arg1_npatterns = VECTOR_CST_NPATTERNS (arg1); + sel_npatterns = sel.encoding ().npatterns (); + + if (!pow2p_hwi (arg0_npatterns) + || !pow2p_hwi (arg1_npatterns) + || !pow2p_hwi (sel_npatterns)) + return NULL_TREE; + + unsigned N = 1; + vector_for_pattern = XALLOCAVEC (tree, sel_npatterns); + for (unsigned i = 0; i < sel_npatterns; i++) + { + int S = 0; + tree op = get_vector_for_pattern (arg0, arg1, sel, i, sel_npatterns, S); + if (!op) + return NULL_TREE; + vector_for_pattern[i] = op; + unsigned N_pattern = + (S > 0) ? std::max(S, VECTOR_CST_NPATTERNS (op)) / S : 1; + N = std::max (N, N_pattern); + } + + res_npatterns + = std::max (sel_npatterns * N, std::max (arg0_npatterns, arg1_npatterns)); + + res_nelts_per_pattern + = std::max(sel.encoding ().nelts_per_pattern (), + std::max (VECTOR_CST_NELTS_PER_PATTERN (arg0), + VECTOR_CST_NELTS_PER_PATTERN (arg1))); + } + else if (sel.length ().is_constant (&nelts) + && TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)).is_constant () + && TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)).to_constant () == nelts) + { + /* For VLS vectors, treat all vectors with + npatterns = nelts, nelts_per_pattern = 1. */ + res_npatterns = sel_npatterns = nelts; + res_nelts_per_pattern = 1; + vector_for_pattern = XALLOCAVEC (tree, nelts); + for (unsigned i = 0; i < nelts; i++) + { + HOST_WIDE_INT index; + if (!sel[i].is_constant (&index)) + return NULL_TREE; + vector_for_pattern[i] = (index < nelts) ? arg0 : arg1; + } + } + else return NULL_TREE; - tree_vector_builder out_elts (type, nelts, 1); - for (i = 0; i < nelts; i++) + tree_vector_builder out_elts (type, res_npatterns, + res_nelts_per_pattern); + unsigned res_nelts = res_npatterns * res_nelts_per_pattern; + for (unsigned i = 0; i < res_nelts; i++) { - HOST_WIDE_INT index; - if (!sel[i].is_constant (&index)) - return NULL_TREE; - if (!CONSTANT_CLASS_P (in_elts[index])) - need_ctor = true; - out_elts.quick_push (unshare_expr (in_elts[index])); + /* For VLA vectors, i % sel_npatterns would give the original + pattern the element belongs to, which is sufficient to get the arg. + Even if sel_npatterns has been multiplied by N, + they will always come from the same input vector. + For VLS vectors, sel_npatterns == res_nelts == nelts, + so i % sel_npatterns == i since i < nelts */ + + tree arg = vector_for_pattern[i % sel_npatterns]; + unsigned HOST_WIDE_INT index; + + if (arg == arg0) + { + if (!sel[i].is_constant ()) + return NULL_TREE; + index = sel[i].to_constant (); + } + else + { + gcc_assert (arg == arg1); + poly_uint64 n1 = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)); + uint64_t q; + poly_uint64 r; + + /* Divide sel[i] by input vector length, to obtain remainder, + which would be the index for either input vector. */ + if (!can_div_trunc_p (sel[i], n1, &q, &r)) + return NULL_TREE; + + if (!r.is_constant (&index)) + return NULL_TREE; + } + + tree elem; + if (TREE_CODE (arg) == CONSTRUCTOR) + { + gcc_assert (index < nelts); + if (index >= vec_safe_length (CONSTRUCTOR_ELTS (arg))) + return NULL_TREE; + elem = CONSTRUCTOR_ELT (arg, index)->value; + if (VECTOR_TYPE_P (TREE_TYPE (elem))) + return NULL_TREE; + need_ctor = true; + } + else + elem = vector_cst_elt (arg, index); + out_elts.quick_push (elem); } if (need_ctor) { vec *v; - vec_alloc (v, nelts); - for (i = 0; i < nelts; i++) + vec_alloc (v, res_nelts); + for (i = 0; i < res_nelts; i++) CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, out_elts[i]); return build_constructor (type, v); } - else - return out_elts.build (); + return out_elts.build (); } /* Try to fold a pointer difference of type TYPE two address expressions of @@ -16910,6 +17024,97 @@ test_vec_duplicate_folding () ASSERT_TRUE (operand_equal_p (dup5_expr, dup5_cst, 0)); } +static tree +build_vec_int_cst (unsigned npatterns, unsigned nelts_per_pattern, + int *encoded_elems) +{ + scalar_int_mode int_mode = SCALAR_INT_TYPE_MODE (integer_type_node); + machine_mode vmode = targetm.vectorize.preferred_simd_mode (int_mode); + //machine_mode vmode = VNx4SImode; + poly_uint64 nunits = GET_MODE_NUNITS (vmode); + tree vectype = build_vector_type (integer_type_node, nunits); + + tree_vector_builder builder (vectype, npatterns, nelts_per_pattern); + for (unsigned i = 0; i < npatterns * nelts_per_pattern; i++) + builder.quick_push (build_int_cst (integer_type_node, encoded_elems[i])); + return builder.build (); +} + +static void +test_vec_perm_vla_folding () +{ + int arg0_elems[] = { 1, 11, 2, 12, 3, 13 }; + tree arg0 = build_vec_int_cst (2, 3, arg0_elems); + + int arg1_elems[] = { 21, 31, 22, 32, 23, 33 }; + tree arg1 = build_vec_int_cst (2, 3, arg1_elems); + + if (TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)).is_constant () + || TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg1)).is_constant ()) + return; + + /* Case 1: For mask: {0, 1, 2, ...}, npatterns == 1, nelts_per_pattern == 3, + should select arg0. */ + { + int mask_elems[] = {0, 1, 2}; + tree mask = build_vec_int_cst (1, 3, mask_elems); + tree res = fold_ternary (VEC_PERM_EXPR, TREE_TYPE (arg0), arg0, arg1, mask); + ASSERT_TRUE (res != NULL_TREE); + ASSERT_TRUE (VECTOR_CST_NPATTERNS (res) == 2); + ASSERT_TRUE (VECTOR_CST_NELTS_PER_PATTERN (res) == 3); + + unsigned res_nelts = vector_cst_encoded_nelts (res); + for (unsigned i = 0; i < res_nelts; i++) + ASSERT_TRUE (operand_equal_p (VECTOR_CST_ELT (res, i), + VECTOR_CST_ELT (arg0, i), 0)); + } + + /* Case 2: For mask: {4, 5, 6, ...}, npatterns == 1, nelts_per_pattern == 3, + should return NULL because for len = 4 + 4x, + if x == 0, we select from arg1 + if x > 0, we select from arg0 + and thus cannot determine result at compile time. */ + { + int mask_elems[] = {4, 5, 6}; + tree mask = build_vec_int_cst (1, 3, mask_elems); + tree res = fold_ternary (VEC_PERM_EXPR, TREE_TYPE (arg0), arg0, arg1, mask); + gcc_assert (res == NULL_TREE); + } + + /* Case 3: + mask: {0, 0, 0, 1, 0, 2, ...} + npatterns == 2, nelts_per_pattern == 3 + Pattern {0, ...} should select arg0[0], ie, 1. + Pattern {0, 1, 2, ...} should select arg0: {1, 11, 2, ...}, + so res = {1, 1, 1, 11, 1, 2, ...}. */ + { + int mask_elems[] = {0, 0, 0, 1, 0, 2}; + tree mask = build_vec_int_cst (2, 3, mask_elems); + tree res = fold_ternary (VEC_PERM_EXPR, TREE_TYPE (arg0), arg0, arg1, mask); + ASSERT_TRUE (VECTOR_CST_NPATTERNS (res) == 4); + ASSERT_TRUE (VECTOR_CST_NELTS_PER_PATTERN (res) == 3); + + /* Check encoding: {1, 1, 1, 11, 1, 2, 1, 12, 1, 3, 1, 13, ...} */ + int res_encoded_elems[] = {1, 1, 1, 11, 1, 2, 1, 12, 1, 3, 1, 13}; + for (unsigned i = 0; i < vector_cst_encoded_nelts (res); i++) + ASSERT_TRUE (wi::to_wide(VECTOR_CST_ELT (res, i)) == res_encoded_elems[i]); + } + + /* Case 4: + mask: {0, 4 + 4x, 0, 5 + 4x, 0, 6 + 4x, ...} + npatterns == 2, nelts_per_pattern == 3 + Pattern {0, ...} should select arg0[1] + Pattern {4 + 4x, 5 + 4x, 6 + 4x, ...} should select from arg1, since: + a1 = 5 + 4x + ae = (5 + 4x) + ((4 + 4x) / 2 - 2) * 1 + = 5 + 6x + Since a1/4+4x == ae/4+4x == 1, we select arg1[0], arg1[1], arg1[2], ... + res: {1, 21, 1, 31, 1, 22, ... } + FIXME: How to build vector with poly_int elems ? */ + + /* Case 5: S < 0. */ +} + /* Run all of the selftests within this file. */ void @@ -16918,6 +17123,7 @@ fold_const_cc_tests () test_arithmetic_folding (); test_vector_folding (); test_vec_duplicate_folding (); + test_vec_perm_vla_folding (); } } // namespace selftest