From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1666) id 6C94738708CB; Mon, 13 May 2024 14:27:33 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 6C94738708CB DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1715610453; bh=FbYluuz9w+O94PnhL6sDLoQakozh8GIe8RXTTlTI6WE=; h=From:To:Subject:Date:From; b=HrsfkXHWYViC6HCkzlnp5a/DrXlLgpkt9loPlbUbwSaqPX3Pm2ZYQFuD7oWZy6yhN xIfS89j6q42YDdTWamWZbV3H5+bNEoBnzRW/FuWt2+83owtsGAmeSaEn7glNfvuWst e3KI2M8KgiFP4ZrqUdXE3tSwEGhVanv/8hXmZu04= Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: Richard Biener To: gcc-cvs@gcc.gnu.org Subject: [gcc(refs/users/rguenth/heads/vect-force-slp)] Do single-lane SLP discovery for reductions X-Act-Checkin: gcc X-Git-Author: Richard Biener X-Git-Refname: refs/users/rguenth/heads/vect-force-slp X-Git-Oldrev: 95f3686d4788d7420422d514996c5b6e7a8facfd X-Git-Newrev: b423891ad43d003a565e7b5c6ed648e446bd3c7c Message-Id: <20240513142733.6C94738708CB@sourceware.org> Date: Mon, 13 May 2024 14:27:33 +0000 (GMT) List-Id: https://gcc.gnu.org/g:b423891ad43d003a565e7b5c6ed648e446bd3c7c commit b423891ad43d003a565e7b5c6ed648e446bd3c7c Author: Richard Biener Date: Fri Feb 23 11:45:50 2024 +0100 Do single-lane SLP discovery for reductions The following performs single-lane SLP discovery for reductions. This exposes a latent issue with reduction SLP in outer loop vectorization and makes gcc.dg/vect/vect-outer-4[fgkl].c FAIL execution. * tree-vect-slp.cc (vect_build_slp_tree_2): Only multi-lane discoveries are reduction chains and need special backedge treatment. (vect_analyze_slp): Fall back to single-lane SLP discovery for reductions. Make sure to try single-lane SLP reduction for all reductions as fallback. Diff: --- gcc/tree-vect-slp.cc | 58 +++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 48 insertions(+), 10 deletions(-) diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index ecc185aae885..f39cde3a8d50 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -1918,7 +1918,8 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node, /* Reduction chain backedge defs are filled manually. ??? Need a better way to identify a SLP reduction chain PHI. Or a better overall way to SLP match those. */ - if (all_same && def_type == vect_reduction_def) + if (stmts.length () > 1 + && all_same && def_type == vect_reduction_def) skip_args[loop_latch_edge (loop)->dest_idx] = true; } else if (def_type != vect_internal_def) @@ -3911,7 +3912,7 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size) } /* Find SLP sequences starting from groups of reductions. */ - if (loop_vinfo->reductions.length () > 1) + if (loop_vinfo->reductions.length () > 0) { /* Collect reduction statements. */ vec scalar_stmts; @@ -3934,17 +3935,54 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size) && gimple_assign_rhs_code (g) != WIDEN_SUM_EXPR && gimple_assign_rhs_code (g) != SAD_EXPR))) scalar_stmts.quick_push (next_info); + else if (param_vect_single_lane_slp != 0) + { + vec stmts; + vec roots = vNULL; + vec remain = vNULL; + stmts.create (1); + stmts.quick_push (next_info); + bool res = vect_build_slp_instance (vinfo, + slp_inst_kind_reduc_group, + stmts, roots, remain, + max_tree_size, &limit, + bst_map, NULL); + gcc_assert (res); + } } - if (scalar_stmts.length () > 1) + vec roots = vNULL; + vec remain = vNULL; + vec saved_stmts = vNULL; + if (param_vect_single_lane_slp != 0) + /* ??? scalar_stmts ownership and arg passing sucks. */ + saved_stmts = scalar_stmts.copy (); + if ((scalar_stmts.length () <= 1 + || !vect_build_slp_instance (loop_vinfo, + slp_inst_kind_reduc_group, + scalar_stmts, roots, remain, + max_tree_size, &limit, bst_map, + NULL)) + && param_vect_single_lane_slp != 0) { - vec roots = vNULL; - vec remain = vNULL; - vect_build_slp_instance (loop_vinfo, slp_inst_kind_reduc_group, - scalar_stmts, roots, remain, - max_tree_size, &limit, bst_map, NULL); + if (scalar_stmts.length () <= 1) + scalar_stmts.release (); + /* Do SLP discovery for single-lane reductions. */ + for (auto stmt_info : saved_stmts) + { + vec stmts; + vec roots = vNULL; + vec remain = vNULL; + stmts.create (1); + stmts.quick_push (vect_stmt_to_vectorize (stmt_info)); + bool res = vect_build_slp_instance (vinfo, + slp_inst_kind_reduc_group, + stmts, roots, remain, + max_tree_size, &limit, + bst_map, NULL); + gcc_assert (res); + } + saved_stmts.release (); } - else - scalar_stmts.release (); } }