diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc index e80be8049e1..b1ed90e629b 100644 --- a/gcc/fold-const.cc +++ b/gcc/fold-const.cc @@ -85,6 +85,9 @@ along with GCC; see the file COPYING3. If not see #include "vec-perm-indices.h" #include "asan.h" #include "gimple-range.h" +#include +#include "gimple-pretty-print.h" +#include "tree-pretty-print.h" /* Nonzero if we are folding constants inside an initializer or a C++ manifestly-constant-evaluated context; zero otherwise. @@ -10544,6 +10547,124 @@ vec_cst_ctor_to_array (tree arg, unsigned int nelts, tree *elts) return true; } +static std::pair +get_vector_for_pattern (tree arg0, tree arg1, const vec_perm_indices &sel, + unsigned pattern) +{ + unsigned sel_nelts_per_pattern = sel.encoding ().nelts_per_pattern (); + gcc_assert (sel_nelts_per_pattern == 3); + + unsigned sel_npatterns = sel.encoding ().npatterns (); + poly_uint64 len = sel.length (); + poly_uint64 esel; + + if (!multiple_p (len, sel_npatterns, &esel)) + return std::make_pair (NULL_TREE, 0); + + poly_uint64 a1 = sel[pattern + sel_npatterns]; + poly_uint64 a2 = sel[pattern + 2 * sel_npatterns]; + poly_uint64 diff = a2 - a1; + if (!diff.is_constant ()) + return std::make_pair (NULL_TREE, 0); + int S = diff.to_constant (); + if (!pow2p_hwi (S)) + return std::make_pair (NULL_TREE, 0); + + poly_uint64 ae = a1 + (esel - 2) * S; + uint64_t q1, qe; + poly_uint64 r1, re; + if (!can_div_trunc_p (a1, len, &q1, &r1) + || !can_div_trunc_p (ae, len, &qe, &re) + || (q1 != qe)) + return std::make_pair (NULL_TREE, 0); + + tree arg = ((q1 & 1) == 0) ? arg0 : arg1; + + if (S < 0 + && !known_eq (S, a1 - sel[pattern]) + && !known_gt (re, VECTOR_CST_NPATTERNS (arg))) + return std::make_pair (NULL_TREE, 0); + + return std::make_pair (arg, S); +} + +static tree +fold_vec_perm_vla (tree type, tree arg0, tree arg1, const vec_perm_indices &sel) +{ + if (TREE_CODE (arg0) != VECTOR_CST + || TREE_CODE (arg1) != VECTOR_CST) + return NULL_TREE; + + unsigned arg0_npatterns = VECTOR_CST_NPATTERNS (arg0); + unsigned arg1_npatterns = VECTOR_CST_NPATTERNS (arg1); + unsigned sel_npatterns = sel.encoding ().npatterns (); + unsigned sel_nelts_per_pattern = sel.encoding ().nelts_per_pattern (); + poly_uint64 nelts = sel.length (); + + if (!pow2p_hwi (arg0_npatterns) + || !pow2p_hwi (arg1_npatterns) + || !pow2p_hwi (sel_npatterns)) + return NULL_TREE; + + unsigned N = 1; + if (sel_nelts_per_pattern == 3) + for (unsigned i = 0; i < sel_npatterns; i++) + { + std::pair ret = get_vector_for_pattern (arg0, arg1, sel, i); + tree arg = ret.first; + if (!arg) + return NULL_TREE; + + int S = ret.second; + /* S can be 0 if one of the patterns is a dup but + other is a stepped sequence. For eg: {0, 0, 0, 1, 0, 2, ...} */ + unsigned N_pattern + = (S > 0) ? std::max (S, VECTOR_CST_NPATTERNS (arg)) / S : 1; + N = std::max (N, N_pattern); + } + + unsigned res_npatterns + = std::max (sel_npatterns * N, std::max (arg0_npatterns, arg1_npatterns)); + + unsigned res_nelts_per_pattern + = std::max (sel_nelts_per_pattern, std::max (VECTOR_CST_NELTS_PER_PATTERN (arg0), + VECTOR_CST_NELTS_PER_PATTERN (arg1))); + + tree_vector_builder out_elts (type, res_npatterns, res_nelts_per_pattern); + for (unsigned i = 0; i < res_npatterns * res_nelts_per_pattern; i++) + { + /* Get pattern corresponding to sel[i] and use that to figure out + the input vector. + For a stepped sequence, a0 may choose different vector, + however a1 ... ae must select from same pattern from same vector. + So if i < sel_npatterns, set pattern_index to index of a0, + and if i >= sel_npatterns, set pattern_index to index of a1. */ + + unsigned pattern_index = i % sel_npatterns; + if (i >= sel_npatterns) + pattern_index += sel_npatterns; + + uint64_t q; + poly_uint64 r; + if (!can_div_trunc_p (sel[pattern_index], nelts, &q, &r)) + return NULL_TREE; + tree arg = ((q & 1) == 0) ? arg0 : arg1; + + unsigned HOST_WIDE_INT index; + if (sel[i].is_constant ()) + index = sel[i].to_constant (); + else + { + poly_uint64 diff = sel[i] - nelts; + if (!diff.is_constant (&index)) + return NULL_TREE; + } + tree elem = vector_cst_elt (arg, index); + out_elts.quick_push (elem); + } + return out_elts.build (); +} + /* Attempt to fold vector permutation of ARG0 and ARG1 vectors using SEL selector. Return the folded VECTOR_CST or CONSTRUCTOR if successful, NULL_TREE otherwise. */ @@ -10555,15 +10676,20 @@ fold_vec_perm (tree type, tree arg0, tree arg1, const vec_perm_indices &sel) unsigned HOST_WIDE_INT nelts; bool need_ctor = false; - if (!sel.length ().is_constant (&nelts)) - return NULL_TREE; - gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (type), nelts) - && known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)), nelts) - && known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg1)), nelts)); + gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (type), sel.length ()) + && known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)), + TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg1)))); + if (TREE_TYPE (TREE_TYPE (arg0)) != TREE_TYPE (type) || TREE_TYPE (TREE_TYPE (arg1)) != TREE_TYPE (type)) return NULL_TREE; + if (tree res = fold_vec_perm_vla (type, arg0, arg1, sel)) + return res; + + if (!sel.length().is_constant (&nelts)) + return NULL_TREE; + tree *in_elts = XALLOCAVEC (tree, nelts * 2); if (!vec_cst_ctor_to_array (arg0, nelts, in_elts) || !vec_cst_ctor_to_array (arg1, nelts, in_elts + nelts)) @@ -16958,6 +17084,168 @@ test_vec_duplicate_folding () ASSERT_TRUE (operand_equal_p (dup5_expr, dup5_cst, 0)); } +static tree +build_vec_int_cst (unsigned npatterns, unsigned nelts_per_pattern, + int *encoded_elems) +{ + scalar_int_mode int_mode = SCALAR_INT_TYPE_MODE (integer_type_node); + //machine_mode vmode = targetm.vectorize.preferred_simd_mode (int_mode); + machine_mode vmode = VNx4SImode; + poly_uint64 nunits = GET_MODE_NUNITS (vmode); + tree vectype = build_vector_type (integer_type_node, nunits); + + tree_vector_builder builder (vectype, npatterns, nelts_per_pattern); + for (unsigned i = 0; i < npatterns * nelts_per_pattern; i++) + builder.quick_push (build_int_cst (integer_type_node, encoded_elems[i])); + return builder.build (); +} + +static tree +fold_vec_perm_vla_mask (tree type, tree arg0, tree arg1, + unsigned mask_npatterns, + unsigned mask_nelts_per_pattern, + poly_uint64 *mask_elems) +{ + poly_uint64 len = TYPE_VECTOR_SUBPARTS (type); + vec_perm_builder builder (len, mask_npatterns, mask_nelts_per_pattern); + for (unsigned i = 0; i < mask_npatterns * mask_nelts_per_pattern; i++) + builder.quick_push (mask_elems[i]); + vec_perm_indices sel (builder, (arg0 == arg1) ? 1 : 2, len); + return fold_vec_perm_vla (type, arg0, arg1, sel); +} + +static void +check_vec_perm_vla_result(tree res, int *res_elems, + unsigned npatterns, unsigned nelts_per_pattern) +{ + ASSERT_TRUE (TREE_CODE (res) == VECTOR_CST); + ASSERT_TRUE (VECTOR_CST_NPATTERNS (res) == npatterns); + ASSERT_TRUE (VECTOR_CST_NELTS_PER_PATTERN (res) == nelts_per_pattern); + + for (unsigned i = 0; i < vector_cst_encoded_nelts (res); i++) + ASSERT_TRUE (wi::to_wide (VECTOR_CST_ELT (res, i)) == res_elems[i]); +} + +static void +test_vec_perm_vla_folding () +{ + int arg0_elems[] = { 1, 11, 2, 12, 3, 13 }; + tree arg0 = build_vec_int_cst (2, 3, arg0_elems); + + int arg1_elems[] = { 21, 31, 22, 32, 23, 33 }; + tree arg1 = build_vec_int_cst (2, 3, arg1_elems); + + if (TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)).is_constant () + || TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg1)).is_constant ()) + return; + + /* Case 1: mask is {0, 1, 2, 3, ... }. + npatterns = 4, nelts_per_pattern = 1. + All elements in mask < 4 + 4x. */ + { + poly_uint64 mask_elems[] = {0, 1, 2, 3}; + tree res = fold_vec_perm_vla_mask (TREE_TYPE (arg0), arg0, arg1, 4, 1, mask_elems); + int res_elems[] = { arg0_elems[0], arg0_elems[1], arg0_elems[2], arg0_elems[3] }; + check_vec_perm_vla_result (res, res_elems, 4, 1); + } + + /* Case 2: mask = {0, 4, 1, 5, ...} + npatterns = 4, nelts_per_pattern = 1. + Should return NULL, because result cannot be determined at compile time, + since len = 4 + 4x and thus {4, 5} can select either from arg0 or arg1 + depending on runtime length of the vector. */ + { + poly_uint64 mask_elems[] = {0, 4, 1, 5}; + tree res = fold_vec_perm_vla_mask (TREE_TYPE (arg0), arg0, arg1, 4, 1, mask_elems); + ASSERT_TRUE (res == NULL_TREE); + } + + /* Case 3: mask = { 4 + 4x, 5 + 4x, 6 + 4x, 7 + 4x, ... } + npatterns = 4, nelts_per_pattern = 1 + All elements in mask >= 4 + 4x. */ + { + poly_uint64 len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)); + poly_uint64 mask_elems[] = { len, len + 1, len + 2, len + 3 }; + tree res = fold_vec_perm_vla_mask (TREE_TYPE (arg0), arg0, arg1, 4, 1, mask_elems); + int res_elems[] = { arg1_elems[0], arg1_elems[1], arg1_elems[2], arg1_elems[3] }; + check_vec_perm_vla_result (res, res_elems, 4, 1); + } + + /* Case 4: mask = {0, 1, 4 + 4x, 5 + 4x, ... } + npatterns = 4, nelts_per_pattern = 1 + res = { arg0[0], arg0[1], arg1[0], arg1[1], ... } */ + { + poly_uint64 len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)); + poly_uint64 mask_elems[] = { 0, 1, len, len + 1 }; + tree res = fold_vec_perm_vla_mask (TREE_TYPE (arg0), arg0, arg1, 4, 1, mask_elems); + int res_elems[] = { arg0_elems[0], arg0_elems[1], arg1_elems[0], arg1_elems[1] }; + check_vec_perm_vla_result (res, res_elems, 4, 1); + } + + /* Case 5: mask = {0, 1, 2, 3, 4 + 4x, 5 + 4x, 6 + 4x, 7 + 4x, ... } + npatterns = 4, nelts_per_pattern = 2. + res = { arg0[0], arg0[1], arg0[2], arg0[3], arg1[0], arg1[1], arg1[2], arg1[3], ... } */ + { + poly_uint64 len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)); + poly_uint64 mask_elems[] = { 0, 1, 2, 3, len, len + 1, len + 2, len + 3 }; + tree res = fold_vec_perm_vla_mask (TREE_TYPE (arg0), arg0, arg1, 4, 2, mask_elems); + int res_elems[] = { arg0_elems[0], arg0_elems[1], arg0_elems[2], arg0_elems[3], + arg1_elems[0], arg1_elems[1], arg1_elems[2], arg1_elems[3] }; + check_vec_perm_vla_result (res, res_elems, 4, 2); + } + + /* Case 6: mask = {0, ... }. + npatterns = 1, nelts_per_pattern = 1. + Test for npatterns(mask) < npatterns(arg0) */ + { + poly_uint64 mask_elems[] = {0}; + tree res = fold_vec_perm_vla_mask (TREE_TYPE (arg0), arg0, arg1, 1, 1, mask_elems); + int res_elems[] = { arg0_elems[0] }; + check_vec_perm_vla_result (res, res_elems, 1, 1); + } + + /* Case 7: mask = { 0, 1, 2, ... }. + npatterns = 1, nelts_per_pattern = 3. + Since {0, 1, 2} will select {1, 11, 2} it will be incorrect. + Re-encode sel such that each pattern of sel selects elements from + same pattern from arg0. + Thus the pattern must be divided into + npatterns(arg0) / S = 2 / 1 = 2 distinct patterns. + Re-encoded sel: {0, 1, 2, 3, 4, 5, ...} + with patterns: {0, 2, 4, ...} and {1, 3, 5, ...} + Now each pattern selects elements only from same pattern + of arg0. + Expected res: {1, 11, 2, 12, 3, 13, ...} */ + { + poly_uint64 mask_elems[] = { 0, 1, 2 }; + tree res = fold_vec_perm_vla_mask (TREE_TYPE (arg0), arg0, arg1, 1, 3, mask_elems); + int res_elems[] = { arg0_elems[0], arg0_elems[1], arg0_elems[2], arg0_elems[3], + arg0_elems[4], arg0_elems[5] }; + check_vec_perm_vla_result (res, res_elems, 2, 3); + } + + /* Case 8: mask = {len, 0, 1, ... } + npatterns = 1, nelts_per_pattern = 3. + Test for case when a0 selects a different vector from a1 ... ae. */ + { + poly_uint64 len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)); + poly_uint64 mask_elems[] = {len, 0, 1}; + int res_elems[] = { arg1_elems[0], arg0_elems[0], arg0_elems[1], arg0_elems[2], + arg0_elems[3], arg0_elems[4] }; + tree res = fold_vec_perm_vla_mask (TREE_TYPE (arg0), arg0, arg1, 1, 3, mask_elems); + check_vec_perm_vla_result (res, res_elems, 2, 3); + } + + /* Case 9: mask = {len, len + 1, len + 2, ...} + npatterns = 1, nelts_per_pattern = 3. */ + { + poly_uint64 len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)); + poly_uint64 mask_elems[] = { len, len + 1, len + 2 }; + tree res = fold_vec_perm_vla_mask (TREE_TYPE (arg0), arg0, arg1, 1, 3, mask_elems); + } + +} + /* Run all of the selftests within this file. */ void @@ -16966,6 +17254,7 @@ fold_const_cc_tests () test_arithmetic_folding (); test_vector_folding (); test_vec_duplicate_folding (); + test_vec_perm_vla_folding (); } } // namespace selftest