From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1130) id CDFEF382F0A5; Tue, 30 Aug 2022 14:44:11 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org CDFEF382F0A5 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1661870651; bh=3e8wpmMJOxesza6pr+J9JcrerGmRyM403+nzFb6+B+4=; h=From:To:Subject:Date:From; b=lde+Waoxgl6jRZV/8poE/S6daumesX/ZMSPe281MBB3se+22KPg5Xli0iMPsaDdol IogEmXo1hVlXeyx0H5lO+Ql3xAg7RDN0Q0LNalVfbFTPGKJ6WMuK4P6P9O5XsFq8zx YAMw0wpBOfJVuu+ljJOatqHEyKruwFbyTLs0ThhM= MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset="utf-8" From: Richard Sandiford To: gcc-cvs@gcc.gnu.org Subject: [gcc r13-2283] Split code out of vectorizable_slp_permutation X-Act-Checkin: gcc X-Git-Author: Richard Sandiford X-Git-Refname: refs/heads/trunk X-Git-Oldrev: 25c2a50cc343eb7c2500b69a6556551d5221393f X-Git-Newrev: 5edc67b773372bf013f98a357912381d99de65f5 Message-Id: <20220830144411.CDFEF382F0A5@sourceware.org> Date: Tue, 30 Aug 2022 14:44:11 +0000 (GMT) List-Id: https://gcc.gnu.org/g:5edc67b773372bf013f98a357912381d99de65f5 commit r13-2283-g5edc67b773372bf013f98a357912381d99de65f5 Author: Richard Sandiford Date: Tue Aug 30 15:43:45 2022 +0100 Split code out of vectorizable_slp_permutation A later patch needs to test whether the target supports a lane_permutation_t without having to construct a full SLP node to test that. This patch splits out most of the work of vectorizable_slp_permutation into a subroutine, so that properties of the permutation can be passed explicitly without disturbing the main interface. The new subroutine still uses an slp_tree argument to get things like the number of lanes and the vector type. That's a bit clunky, but it seemed like the least worst option. gcc/ * tree-vect-slp.cc (vectorizable_slp_permutation_1): Split out from... (vectorizable_slp_permutation): ...here. Diff: --- gcc/tree-vect-slp.cc | 98 +++++++++++++++++++++++++++++++++++----------------- 1 file changed, 66 insertions(+), 32 deletions(-) diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index dab5daddcc5..13c242e5012 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -6976,20 +6976,22 @@ vect_add_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi, SLP_TREE_VEC_STMTS (node).quick_push (perm_stmt); } -/* Vectorize the SLP permutations in NODE as specified - in SLP_TREE_LANE_PERMUTATION which is a vector of pairs of SLP - child number and lane number. - Interleaving of two two-lane two-child SLP subtrees (not supported): - [ { 0, 0 }, { 1, 0 }, { 0, 1 }, { 1, 1 } ] - A blend of two four-lane two-child SLP subtrees: - [ { 0, 0 }, { 1, 1 }, { 0, 2 }, { 1, 3 } ] - Highpart of a four-lane one-child SLP subtree (not supported): - [ { 0, 2 }, { 0, 3 } ] - Where currently only a subset is supported by code generating below. */ +/* Subroutine of vectorizable_slp_permutation. Check whether the target + can perform permutation PERM on the (1 or 2) input nodes in CHILDREN. + If GSI is nonnull, emit the permutation there. -static bool -vectorizable_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi, - slp_tree node, stmt_vector_for_cost *cost_vec) + When GSI is null, the only purpose of NODE is to give properties + of the result, such as the vector type and number of SLP lanes. + The node does not need to be a VEC_PERM_EXPR. + + If the target supports the operation, return the number of individual + VEC_PERM_EXPRs needed, otherwise return -1. Print information to the + dump file if DUMP_P is true. */ + +static int +vectorizable_slp_permutation_1 (vec_info *vinfo, gimple_stmt_iterator *gsi, + slp_tree node, lane_permutation_t &perm, + vec &children, bool dump_p) { tree vectype = SLP_TREE_VECTYPE (node); @@ -7001,7 +7003,7 @@ vectorizable_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi, poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); bool repeating_p = multiple_p (nunits, SLP_TREE_LANES (node)); tree op_vectype = NULL_TREE; - FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) + FOR_EACH_VEC_ELT (children, i, child) if (SLP_TREE_VECTYPE (child)) { op_vectype = SLP_TREE_VECTYPE (child); @@ -7009,25 +7011,24 @@ vectorizable_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi, } if (!op_vectype) op_vectype = vectype; - FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) + FOR_EACH_VEC_ELT (children, i, child) { if ((SLP_TREE_DEF_TYPE (child) != vect_internal_def && !vect_maybe_update_slp_op_vectype (child, op_vectype)) || !types_compatible_p (SLP_TREE_VECTYPE (child), op_vectype) || !types_compatible_p (TREE_TYPE (vectype), TREE_TYPE (op_vectype))) { - if (dump_enabled_p ()) + if (dump_p) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, "Unsupported vector types in lane permutation\n"); - return false; + return -1; } if (SLP_TREE_LANES (child) != SLP_TREE_LANES (node)) repeating_p = false; } - vec > &perm = SLP_TREE_LANE_PERMUTATION (node); gcc_assert (perm.length () == SLP_TREE_LANES (node)); - if (dump_enabled_p ()) + if (dump_p) { dump_printf_loc (MSG_NOTE, vect_location, "vectorizing permutation"); @@ -7076,11 +7077,11 @@ vectorizable_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi, /* Calculate every element of every permute mask vector explicitly, instead of relying on the pattern described above. */ if (!nunits.is_constant (&npatterns)) - return false; + return -1; nelts_per_pattern = ncopies = 1; if (loop_vec_info linfo = dyn_cast (vinfo)) if (!LOOP_VINFO_VECT_FACTOR (linfo).is_constant (&ncopies)) - return false; + return -1; noutputs_per_mask = 1; } unsigned olanes = ncopies * SLP_TREE_LANES (node); @@ -7093,13 +7094,13 @@ vectorizable_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi, auto_vec, unsigned> > vperm; auto_vec active_lane; vperm.create (olanes); - active_lane.safe_grow_cleared (SLP_TREE_CHILDREN (node).length (), true); + active_lane.safe_grow_cleared (children.length (), true); for (unsigned i = 0; i < ncopies; ++i) { for (unsigned pi = 0; pi < perm.length (); ++pi) { std::pair p = perm[pi]; - tree vtype = SLP_TREE_VECTYPE (SLP_TREE_CHILDREN (node)[p.first]); + tree vtype = SLP_TREE_VECTYPE (children[p.first]); if (repeating_p) vperm.quick_push ({{p.first, 0}, p.second + active_lane[p.first]}); else @@ -7112,12 +7113,19 @@ vectorizable_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi, } } /* Advance to the next group. */ - for (unsigned j = 0; j < SLP_TREE_CHILDREN (node).length (); ++j) - active_lane[j] += SLP_TREE_LANES (SLP_TREE_CHILDREN (node)[j]); + for (unsigned j = 0; j < children.length (); ++j) + active_lane[j] += SLP_TREE_LANES (children[j]); } - if (dump_enabled_p ()) + if (dump_p) { + dump_printf_loc (MSG_NOTE, vect_location, + "vectorizing permutation"); + for (unsigned i = 0; i < perm.length (); ++i) + dump_printf (MSG_NOTE, " op%u[%u]", perm[i].first, perm[i].second); + if (repeating_p) + dump_printf (MSG_NOTE, " (repeat %d)\n", SLP_TREE_LANES (node)); + dump_printf (MSG_NOTE, "\n"); dump_printf_loc (MSG_NOTE, vect_location, "as"); for (unsigned i = 0; i < vperm.length (); ++i) { @@ -7163,12 +7171,12 @@ vectorizable_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi, } else { - if (dump_enabled_p ()) + if (dump_p) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, "permutation requires at " "least three vectors\n"); gcc_assert (!gsi); - return false; + return -1; } mask[index++] = mask_element; @@ -7190,7 +7198,7 @@ vectorizable_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi, TYPE_VECTOR_SUBPARTS (op_vectype), &c) || c != 2))) { - if (dump_enabled_p ()) + if (dump_p) { dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -7203,7 +7211,7 @@ vectorizable_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi, dump_printf (MSG_MISSED_OPTIMIZATION, "}\n"); } gcc_assert (!gsi); - return false; + return -1; } if (!identity_p) @@ -7214,8 +7222,8 @@ vectorizable_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi, second_vec = first_vec; slp_tree - first_node = SLP_TREE_CHILDREN (node)[first_vec.first], - second_node = SLP_TREE_CHILDREN (node)[second_vec.first]; + first_node = children[first_vec.first], + second_node = children[second_vec.first]; tree mask_vec = NULL_TREE; if (!identity_p) @@ -7240,6 +7248,32 @@ vectorizable_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi, } } + return nperms; +} + +/* Vectorize the SLP permutations in NODE as specified + in SLP_TREE_LANE_PERMUTATION which is a vector of pairs of SLP + child number and lane number. + Interleaving of two two-lane two-child SLP subtrees (not supported): + [ { 0, 0 }, { 1, 0 }, { 0, 1 }, { 1, 1 } ] + A blend of two four-lane two-child SLP subtrees: + [ { 0, 0 }, { 1, 1 }, { 0, 2 }, { 1, 3 } ] + Highpart of a four-lane one-child SLP subtree (not supported): + [ { 0, 2 }, { 0, 3 } ] + Where currently only a subset is supported by code generating below. */ + +static bool +vectorizable_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi, + slp_tree node, stmt_vector_for_cost *cost_vec) +{ + tree vectype = SLP_TREE_VECTYPE (node); + lane_permutation_t &perm = SLP_TREE_LANE_PERMUTATION (node); + int nperms = vectorizable_slp_permutation_1 (vinfo, gsi, node, perm, + SLP_TREE_CHILDREN (node), + dump_enabled_p ()); + if (nperms < 0) + return false; + if (!gsi) record_stmt_cost (cost_vec, nperms, vec_perm, node, vectype, 0, vect_body);