From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 111496 invoked by alias); 2 Jan 2018 13:08:15 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Received: (qmail 111445 invoked by uid 89); 2 Jan 2018 13:08:14 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-10.4 required=5.0 tests=AWL,BAYES_00,FREEMAIL_FROM,GIT_PATCH_1,GIT_PATCH_2,KAM_ASCII_DIVIDERS,RCVD_IN_DNSWL_NONE,SPF_PASS autolearn=ham version=3.3.2 spammy=perm, ****, OTHER X-HELO: mail-wm0-f50.google.com Received: from mail-wm0-f50.google.com (HELO mail-wm0-f50.google.com) (74.125.82.50) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Tue, 02 Jan 2018 13:08:12 +0000 Received: by mail-wm0-f50.google.com with SMTP id i11so60713131wmf.4 for ; Tue, 02 Jan 2018 05:08:11 -0800 (PST) X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20161025; h=x-gm-message-state:mime-version:in-reply-to:references:from:date :message-id:subject:to; bh=AWfW0AEphZisXlDIBqv0mrBfFf8xVu+zuEPJNGjh5rQ=; b=Q7xEfdd9/FYWb7k+cVrneYXOmUwkEQFDmLJBrYx/xZTySOFfIn/EZuZZGZLA59LP9k HHs51Tory9Qlhb213JMNKG+7y8EXUxXgQVM/+bsVvrqFJzOh7PfaHN9jRna5Eyhxa91E ur/ps+E9Dran9Eq1nNmyzsdSWQDKWTo2qpXL/rpmbkHkPRxWetOvSj1ixvHcO3QMedor rDLNo8And08PhMPBdTdMvnje/59c09Ee0CZOKfiMeTHQu9flaeP6jTj6F4jyqgsxjQ1Y avxyF6Neo+F9Dhm/2wXMMDboF1BS4aIv7YpECg1YWgz0QCVIc3rEGsjWhqvj+xbKf12M T5MQ== X-Gm-Message-State: AKGB3mJidee27L9QOF5pEb2cOxclswnQy97S2bLRDjGsfxWJz4/5XTVN nkhLBhlW+yHpEE2TaMI52QSFxCZD1YxwaTxrx66XGA== X-Google-Smtp-Source: ACJfBovB870AniPyRgGiLrul32tR1XjlXMHNfPPCL7d0vHfuXpFk1ccSafngV/OJbPXzQWYQ/DRPNNnMMVqP3hhqRjw= X-Received: by 10.80.175.161 with SMTP id h30mr62731992edd.292.1514898489912; Tue, 02 Jan 2018 05:08:09 -0800 (PST) MIME-Version: 1.0 Received: by 10.80.167.196 with HTTP; Tue, 2 Jan 2018 05:08:09 -0800 (PST) In-Reply-To: <877etvlc4g.fsf@linaro.org> References: <87indfmrgt.fsf@linaro.org> <877etvlc4g.fsf@linaro.org> From: Richard Biener Date: Tue, 02 Jan 2018 13:08:00 -0000 Message-ID: Subject: Re: [10/13] Rework VEC_PERM_EXPR folding To: GCC Patches , Richard Sandiford Content-Type: text/plain; charset="UTF-8" X-IsSubscribed: yes X-SW-Source: 2018-01/txt/msg00025.txt.bz2 On Sun, Dec 10, 2017 at 12:23 AM, Richard Sandiford wrote: > This patch reworks the VEC_PERM_EXPR folding so that more of it works > for variable-length vectors. E.g. it means that we can now recognise > variable-length permutes that reduce to a single vector, or cases in > which a variable-length permute only needs one input. There should be > no functional change for fixed-length vectors. Ok. Richard. > > 2017-12-09 Richard Sandiford > > gcc/ > * selftest.h (selftest::vec_perm_indices_c_tests): Declare. > * selftest-run-tests.c (selftest::run_tests): Call it. > * vector-builder.h (vector_builder::operator ==): New function. > (vector_builder::operator !=): Likewise. > * vec-perm-indices.h (vec_perm_indices::series_p): Declare. > (vec_perm_indices::all_from_input_p): New function. > * vec-perm-indices.c (vec_perm_indices::series_p): Likewise. > (test_vec_perm_12, selftest::vec_perm_indices_c_tests): Likewise. > * fold-const.c (fold_ternary_loc): Use tree_to_vec_perm_builder > instead of reading the VECTOR_CST directly. Detect whether both > vector inputs are the same before constructing the vec_perm_indices, > and update the number of inputs argument accordingly. Use the > utility functions added above. Only construct sel2 if we need to. > > Index: gcc/selftest.h > =================================================================== > *** gcc/selftest.h 2017-12-09 23:06:55.002855594 +0000 > --- gcc/selftest.h 2017-12-09 23:21:51.517599734 +0000 > *************** extern void vec_c_tests (); > *** 201,206 **** > --- 201,207 ---- > extern void wide_int_cc_tests (); > extern void predict_c_tests (); > extern void simplify_rtx_c_tests (); > + extern void vec_perm_indices_c_tests (); > > extern int num_passes; > > Index: gcc/selftest-run-tests.c > =================================================================== > *** gcc/selftest-run-tests.c 2017-12-09 23:06:55.002855594 +0000 > --- gcc/selftest-run-tests.c 2017-12-09 23:21:51.517599734 +0000 > *************** selftest::run_tests () > *** 73,78 **** > --- 73,79 ---- > > /* Mid-level data structures. */ > input_c_tests (); > + vec_perm_indices_c_tests (); > tree_c_tests (); > gimple_c_tests (); > rtl_tests_c_tests (); > Index: gcc/vector-builder.h > =================================================================== > *** gcc/vector-builder.h 2017-12-09 23:06:55.002855594 +0000 > --- gcc/vector-builder.h 2017-12-09 23:21:51.518600090 +0000 > *************** #define GCC_VECTOR_BUILDER_H > *** 97,102 **** > --- 97,105 ---- > bool encoded_full_vector_p () const; > T elt (unsigned int) const; > > + bool operator == (const Derived &) const; > + bool operator != (const Derived &x) const { return !operator == (x); } > + > void finalize (); > > protected: > *************** vector_builder::new_vector ( > *** 168,173 **** > --- 171,196 ---- > this->truncate (0); > } > > + /* Return true if this vector and OTHER have the same elements and > + are encoded in the same way. */ > + > + template > + bool > + vector_builder::operator == (const Derived &other) const > + { > + if (m_full_nelts != other.m_full_nelts > + || m_npatterns != other.m_npatterns > + || m_nelts_per_pattern != other.m_nelts_per_pattern) > + return false; > + > + unsigned int nelts = encoded_nelts (); > + for (unsigned int i = 0; i < nelts; ++i) > + if (!derived ()->equal_p ((*this)[i], other[i])) > + return false; > + > + return true; > + } > + > /* Return the value of vector element I, which might or might not be > encoded explicitly. */ > > Index: gcc/vec-perm-indices.h > =================================================================== > *** gcc/vec-perm-indices.h 2017-12-09 23:20:13.233112018 +0000 > --- gcc/vec-perm-indices.h 2017-12-09 23:21:51.517599734 +0000 > *************** typedef int_vector_builder *** 62,68 **** > --- 62,70 ---- > > element_type clamp (element_type) const; > element_type operator[] (unsigned int i) const; > + bool series_p (unsigned int, unsigned int, element_type, element_type) const; > bool all_in_range_p (element_type, element_type) const; > + bool all_from_input_p (unsigned int) const; > > private: > vec_perm_indices (const vec_perm_indices &); > *************** vec_perm_indices::operator[] (unsigned i > *** 119,122 **** > --- 121,133 ---- > return clamp (m_encoding.elt (i)); > } > > + /* Return true if the permutation vector only selects elements from > + input I. */ > + > + inline bool > + vec_perm_indices::all_from_input_p (unsigned int i) const > + { > + return all_in_range_p (i * m_nelts_per_input, m_nelts_per_input); > + } > + > #endif > Index: gcc/vec-perm-indices.c > =================================================================== > *** gcc/vec-perm-indices.c 2017-12-09 23:20:13.233112018 +0000 > --- gcc/vec-perm-indices.c 2017-12-09 23:21:51.517599734 +0000 > *************** Software Foundation; either version 3, o > *** 28,33 **** > --- 28,34 ---- > #include "rtl.h" > #include "memmodel.h" > #include "emit-rtl.h" > + #include "selftest.h" > > /* Switch to a new permutation vector that selects between NINPUTS vector > inputs that have NELTS_PER_INPUT elements each. Take the elements of the > *************** vec_perm_indices::rotate_inputs (int del > *** 85,90 **** > --- 86,139 ---- > m_encoding[i] = clamp (m_encoding[i] + element_delta); > } > > + /* Return true if index OUT_BASE + I * OUT_STEP selects input > + element IN_BASE + I * IN_STEP. */ > + > + bool > + vec_perm_indices::series_p (unsigned int out_base, unsigned int out_step, > + element_type in_base, element_type in_step) const > + { > + /* Check the base value. */ > + if (clamp (m_encoding.elt (out_base)) != clamp (in_base)) > + return false; > + > + unsigned int full_nelts = m_encoding.full_nelts (); > + unsigned int npatterns = m_encoding.npatterns (); > + > + /* Calculate which multiple of OUT_STEP elements we need to get > + back to the same pattern. */ > + unsigned int cycle_length = least_common_multiple (out_step, npatterns); > + > + /* Check the steps. */ > + in_step = clamp (in_step); > + out_base += out_step; > + unsigned int limit = 0; > + for (;;) > + { > + /* Succeed if we've checked all the elements in the vector. */ > + if (out_base >= full_nelts) > + return true; > + > + if (out_base >= npatterns) > + { > + /* We've got to the end of the "foreground" values. Check > + 2 elements from each pattern in the "background" values. */ > + if (limit == 0) > + limit = out_base + cycle_length * 2; > + else if (out_base >= limit) > + return true; > + } > + > + element_type v0 = m_encoding.elt (out_base - out_step); > + element_type v1 = m_encoding.elt (out_base); > + if (clamp (v1 - v0) != in_step) > + return false; > + > + out_base += out_step; > + } > + return true; > + } > + > /* Return true if all elements of the permutation vector are in the range > [START, START + SIZE). */ > > *************** vec_perm_indices_to_rtx (machine_mode mo > *** 180,182 **** > --- 229,280 ---- > RTVEC_ELT (v, i) = gen_int_mode (indices[i], GET_MODE_INNER (mode)); > return gen_rtx_CONST_VECTOR (mode, v); > } > + > + #if CHECKING_P > + > + namespace selftest { > + > + /* Test a 12-element vector. */ > + > + static void > + test_vec_perm_12 (void) > + { > + vec_perm_builder builder (12, 12, 1); > + for (unsigned int i = 0; i < 4; ++i) > + { > + builder.quick_push (i * 5); > + builder.quick_push (3 + i); > + builder.quick_push (2 + 3 * i); > + } > + vec_perm_indices indices (builder, 1, 12); > + ASSERT_TRUE (indices.series_p (0, 3, 0, 5)); > + ASSERT_FALSE (indices.series_p (0, 3, 3, 5)); > + ASSERT_FALSE (indices.series_p (0, 3, 0, 8)); > + ASSERT_TRUE (indices.series_p (1, 3, 3, 1)); > + ASSERT_TRUE (indices.series_p (2, 3, 2, 3)); > + > + ASSERT_TRUE (indices.series_p (0, 4, 0, 4)); > + ASSERT_FALSE (indices.series_p (1, 4, 3, 4)); > + > + ASSERT_TRUE (indices.series_p (0, 6, 0, 10)); > + ASSERT_FALSE (indices.series_p (0, 6, 0, 100)); > + > + ASSERT_FALSE (indices.series_p (1, 10, 3, 7)); > + ASSERT_TRUE (indices.series_p (1, 10, 3, 8)); > + > + ASSERT_TRUE (indices.series_p (0, 12, 0, 10)); > + ASSERT_TRUE (indices.series_p (0, 12, 0, 11)); > + ASSERT_TRUE (indices.series_p (0, 12, 0, 100)); > + } > + > + /* Run selftests for this file. */ > + > + void > + vec_perm_indices_c_tests () > + { > + test_vec_perm_12 (); > + } > + > + } // namespace selftest > + > + #endif > Index: gcc/fold-const.c > =================================================================== > *** gcc/fold-const.c 2017-12-09 23:18:12.040041251 +0000 > --- gcc/fold-const.c 2017-12-09 23:21:51.517599734 +0000 > *************** fold_ternary_loc (location_t loc, enum t > *** 11547,11645 **** > case VEC_PERM_EXPR: > if (TREE_CODE (arg2) == VECTOR_CST) > { > ! unsigned int nelts = VECTOR_CST_NELTS (arg2), i, mask, mask2; > ! bool need_mask_canon = false; > ! bool need_mask_canon2 = false; > ! bool all_in_vec0 = true; > ! bool all_in_vec1 = true; > ! bool maybe_identity = true; > ! bool single_arg = (op0 == op1); > ! bool changed = false; > ! > ! mask2 = 2 * nelts - 1; > ! mask = single_arg ? (nelts - 1) : mask2; > ! gcc_assert (nelts == TYPE_VECTOR_SUBPARTS (type)); > ! vec_perm_builder sel (nelts, nelts, 1); > ! vec_perm_builder sel2 (nelts, nelts, 1); > ! for (i = 0; i < nelts; i++) > ! { > ! tree val = VECTOR_CST_ELT (arg2, i); > ! if (TREE_CODE (val) != INTEGER_CST) > ! return NULL_TREE; > ! > ! /* Make sure that the perm value is in an acceptable > ! range. */ > ! wi::tree_to_wide_ref t = wi::to_wide (val); > ! need_mask_canon |= wi::gtu_p (t, mask); > ! need_mask_canon2 |= wi::gtu_p (t, mask2); > ! unsigned int elt = t.to_uhwi () & mask; > ! unsigned int elt2 = t.to_uhwi () & mask2; > ! > ! if (elt < nelts) > ! all_in_vec1 = false; > ! else > ! all_in_vec0 = false; > ! > ! if ((elt & (nelts - 1)) != i) > ! maybe_identity = false; > ! > ! sel.quick_push (elt); > ! sel2.quick_push (elt2); > ! } > > ! if (maybe_identity) > ! { > ! if (all_in_vec0) > ! return op0; > ! if (all_in_vec1) > ! return op1; > ! } > > ! if (all_in_vec0) > ! op1 = op0; > ! else if (all_in_vec1) > ! { > ! op0 = op1; > ! for (i = 0; i < nelts; i++) > ! sel[i] -= nelts; > ! need_mask_canon = true; > } > > - vec_perm_indices indices (sel, 2, nelts); > if ((TREE_CODE (op0) == VECTOR_CST > || TREE_CODE (op0) == CONSTRUCTOR) > && (TREE_CODE (op1) == VECTOR_CST > || TREE_CODE (op1) == CONSTRUCTOR)) > { > ! tree t = fold_vec_perm (type, op0, op1, indices); > if (t != NULL_TREE) > return t; > } > > ! if (op0 == op1 && !single_arg) > ! changed = true; > > ! /* Some targets are deficient and fail to expand a single > ! argument permutation while still allowing an equivalent > ! 2-argument version. */ > ! if (need_mask_canon && arg2 == op2 > ! && !can_vec_perm_const_p (TYPE_MODE (type), indices, false) > ! && can_vec_perm_const_p (TYPE_MODE (type), > ! vec_perm_indices (sel2, 2, nelts), > ! false)) > { > ! need_mask_canon = need_mask_canon2; > ! sel.truncate (0); > ! sel.splice (sel2); > ! } > ! > ! if (need_mask_canon && arg2 == op2) > ! { > ! tree eltype = TREE_TYPE (TREE_TYPE (arg2)); > ! tree_vector_builder tsel (TREE_TYPE (arg2), nelts, 1); > ! for (i = 0; i < nelts; i++) > ! tsel.quick_push (build_int_cst (eltype, sel[i])); > ! op2 = tsel.build (); > changed = true; > } > > --- 11547,11611 ---- > case VEC_PERM_EXPR: > if (TREE_CODE (arg2) == VECTOR_CST) > { > ! /* Build a vector of integers from the tree mask. */ > ! vec_perm_builder builder; > ! if (!tree_to_vec_perm_builder (&builder, arg2)) > ! return NULL_TREE; > > ! /* Create a vec_perm_indices for the integer vector. */ > ! unsigned int nelts = TYPE_VECTOR_SUBPARTS (type); > ! bool single_arg = (op0 == op1); > ! vec_perm_indices sel (builder, single_arg ? 1 : 2, nelts); > > ! /* Check for cases that fold to OP0 or OP1 in their original > ! element order. */ > ! if (sel.series_p (0, 1, 0, 1)) > ! return op0; > ! if (sel.series_p (0, 1, nelts, 1)) > ! return op1; > ! > ! if (!single_arg) > ! { > ! if (sel.all_from_input_p (0)) > ! op1 = op0; > ! else if (sel.all_from_input_p (1)) > ! { > ! op0 = op1; > ! sel.rotate_inputs (1); > ! } > } > > if ((TREE_CODE (op0) == VECTOR_CST > || TREE_CODE (op0) == CONSTRUCTOR) > && (TREE_CODE (op1) == VECTOR_CST > || TREE_CODE (op1) == CONSTRUCTOR)) > { > ! tree t = fold_vec_perm (type, op0, op1, sel); > if (t != NULL_TREE) > return t; > } > > ! bool changed = (op0 == op1 && !single_arg); > > ! /* Generate a canonical form of the selector. */ > ! if (arg2 == op2 && sel.encoding () != builder) > { > ! /* Some targets are deficient and fail to expand a single > ! argument permutation while still allowing an equivalent > ! 2-argument version. */ > ! if (sel.ninputs () == 2 > ! || can_vec_perm_const_p (TYPE_MODE (type), sel, false)) > ! op2 = vec_perm_indices_to_tree (TREE_TYPE (arg2), sel); > ! else > ! { > ! vec_perm_indices sel2 (builder, 2, nelts); > ! if (can_vec_perm_const_p (TYPE_MODE (type), sel2, false)) > ! op2 = vec_perm_indices_to_tree (TREE_TYPE (arg2), sel2); > ! else > ! /* Not directly supported with either encoding, > ! so use the preferred form. */ > ! op2 = vec_perm_indices_to_tree (TREE_TYPE (arg2), sel); > ! } > changed = true; > } >