From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1923) id 2E0483858C2F; Tue, 23 Jan 2024 20:57:41 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 2E0483858C2F DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1706043462; bh=M5dNESSQ+VJamSceg+Xc7S+1uwC1NRoD7+CFWSQmrzU=; h=From:To:Subject:Date:From; b=iQ+btTZsUpa+t0KQdoeRrUwUO9yliWejl2K2rWGQZqnEsiRLEzxrx/knYm6J58Rp7 qCW/0+glq4JsyZUUGTUuVetC8O7fVB3aV5AWN+kwB4MB5RXDwl7jzR24EBDKlnQVcl RgyCW1BfpcbI6oZrV1E4jcS8LGqaE+wUXn8LZJes= Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: Philipp Tomsich To: gcc-cvs@gcc.gnu.org Subject: [gcc(refs/vendors/vrull/heads/slp-improvements)] Fix issues and improve vect_slp_optimize_permutes. X-Act-Checkin: gcc X-Git-Author: Manolis Tsamis X-Git-Refname: refs/vendors/vrull/heads/slp-improvements X-Git-Oldrev: 962f4ee487340f09823314783cc746a2c854c496 X-Git-Newrev: 042460234612e9817e26d6f1c1dd41aa144a4c20 Message-Id: <20240123205742.2E0483858C2F@sourceware.org> Date: Tue, 23 Jan 2024 20:57:41 +0000 (GMT) List-Id: https://gcc.gnu.org/g:042460234612e9817e26d6f1c1dd41aa144a4c20 commit 042460234612e9817e26d6f1c1dd41aa144a4c20 Author: Manolis Tsamis Date: Thu Nov 23 11:28:44 2023 +0100 Fix issues and improve vect_slp_optimize_permutes. * Don't use tree_to_vec_perm_builder to get the elements of the permute constant vectors. This function uses vector_cst_encoded_nelts internally which may return fewer elements than VECTOR_CST_NELTS and in turn fewer elements than what is observed on the GIMPLE statement. Because we iterate based on NELTS this would cause memory corruption issues. The resulting code is also more clean and performant. * Call update_stmt after rewriting permutes because otherwise we may get SSA imm_use lists issues. Ref #346 Diff: --- gcc/tree-vect-slp.cc | 39 +++++++++++++++++++++++---------------- 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index 68b745f8445..8dd9d85d9b8 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -8385,33 +8385,36 @@ vect_slp_optimize_permutes (function *fun) continue; vec_perm_builder sel (nelts, nelts, 1); - vec_perm_builder sel1_1, sel1_2, sel2_1, sel2_2; - if (!tree_to_vec_perm_builder (&sel1_1, gimple_assign_rhs3 (src1_1)) - || !tree_to_vec_perm_builder (&sel1_2, gimple_assign_rhs3 (src1_2)) - || !tree_to_vec_perm_builder (&sel2_1, gimple_assign_rhs3 (src2_1)) - || !tree_to_vec_perm_builder (&sel2_2, gimple_assign_rhs3 (src2_2))) - continue; + vec_perm_builder sel_1 (nelts, nelts, 1); + vec_perm_builder sel_2 (nelts, nelts, 1); /* Rewrite the permutations based on MASK1_LANES/MASK2_LANES. */ for (i = 0; i < nelts; i++) { + /* Calculate new mask for STMT2. */ tree val = VECTOR_CST_ELT (mask2, i); - unsigned HOST_WIDE_INT j + unsigned HOST_WIDE_INT lane = TREE_INT_CST_LOW (val) & (2 * nelts - 1); - unsigned off = (j < nelts) ? 0 : nelts; - unsigned HOST_WIDE_INT new_lane = *mask2_lanes.get (j - off) + off; + unsigned off = (lane < nelts) ? 0 : nelts; + unsigned HOST_WIDE_INT new_lane + = *mask2_lanes.get (lane - off) + off; sel.quick_push (new_lane); - if (!mask1_lanes.get (i)) - { - sel1_1[i] = nelts + sel2_1[i]; - sel1_2[i] = nelts + sel2_2[i]; - } + /* Calculate new masks for SRC1_1 and SRC1_2. */ + bool use_src1 = mask1_lanes.get (i); + tree mask1 = gimple_assign_rhs3 (use_src1 ? src1_1 : src2_1); + tree mask2 = gimple_assign_rhs3 (use_src1 ? src1_2 : src2_2); + unsigned HOST_WIDE_INT lane1 + = TREE_INT_CST_LOW (VECTOR_CST_ELT (mask1, i)) & (nelts - 1); + unsigned HOST_WIDE_INT lane2 + = TREE_INT_CST_LOW (VECTOR_CST_ELT (mask2, i)) & (nelts - 1); + sel_1.quick_push (lane1 + (use_src1 ? 0 : nelts)); + sel_2.quick_push (lane2 + (use_src1 ? 0 : nelts)); } vec_perm_indices indices (sel, 2, nelts); - vec_perm_indices indices1_1 (sel1_1, 2, nelts); - vec_perm_indices indices1_2 (sel1_2, 2, nelts); + vec_perm_indices indices1_1 (sel_1, 2, nelts); + vec_perm_indices indices1_2 (sel_2, 2, nelts); tree vectype = TREE_TYPE (gimple_assign_lhs (stmt2)); if (!can_vec_perm_const_p (TYPE_MODE (vectype), TYPE_MODE (vectype), indices) @@ -8429,6 +8432,10 @@ vect_slp_optimize_permutes (function *fun) gimple_assign_set_rhs3 (src1_1, vect_gen_perm_mask_checked (vectype, indices1_1)); gimple_assign_set_rhs3 (src1_2, vect_gen_perm_mask_checked (vectype, indices1_2)); + + update_stmt (stmt2); + update_stmt (src1_1); + update_stmt (src1_2); } }