From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1923) id 63D0C3858C62; Wed, 17 Jan 2024 19:15:27 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 63D0C3858C62 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1705518927; bh=RQqdQBfhbEfKf0GlNlLcFe/qh8QnYMorbE6Zpl4oMsw=; h=From:To:Subject:Date:From; b=jaC5qpGoukFxOAlp3xBxEUq5bVqv4+SB1vLSoHwyFigQX43/vhRMw4fhJuI1IRoX1 4CVoF9rEy9Ny2DMcXTVB6m5QNzVUgSVQeU8CrUdH+7m8uPMTudqMvxgUqQ+P2vU6LA EJQLXCG3U7Gi3LjpQ82wyEy5whVtAGaYbjjpjDUM= Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: Philipp Tomsich To: gcc-cvs@gcc.gnu.org Subject: [gcc(refs/vendors/vrull/heads/slp-improvements)] Fold some NxM vector permute networks in vect_slp_optimize_permutes (#354) X-Act-Checkin: gcc X-Git-Author: Manolis Tsamis X-Git-Refname: refs/vendors/vrull/heads/slp-improvements X-Git-Oldrev: 89f3268cde0fcd58f45311f4d28b3ea1ee13af8c X-Git-Newrev: 926fbfaa3c8b3bde520bd89f0acd3ed9f69d28ce Message-Id: <20240117191527.63D0C3858C62@sourceware.org> Date: Wed, 17 Jan 2024 19:15:27 +0000 (GMT) List-Id: https://gcc.gnu.org/g:926fbfaa3c8b3bde520bd89f0acd3ed9f69d28ce commit 926fbfaa3c8b3bde520bd89f0acd3ed9f69d28ce Author: Manolis Tsamis Date: Wed Dec 13 14:39:30 2023 +0100 Fold some NxM vector permute networks in vect_slp_optimize_permutes (#354) More specifically, we try to identify: P1 = VEC_PERM(MASK_1) P2 = VEC_PERM(MASK_2) P1 = VIEW_CONVERT<...>(P1) (optional) P2 = VIEW_CONVERT<...>(P2) (optional) R1 = VEC_PERM(MASK_3) R2 = VEC_PERM(MASK_4) ... RN = VEC_PERM(MASK_M) and convert it to: R1 = VEC_PERM(NEW_MASK_1) R2 = VEC_PERM(NEW_MASK_2) ... RN = VEC_PERM(NEW_MASK_N) if possible. Diff: --- gcc/tree-vect-slp.cc | 152 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 149 insertions(+), 3 deletions(-) diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index 4a9da25558a..67ca0ffc0da 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -8123,6 +8123,29 @@ get_tree_def (tree name, bool single_use_only) return dyn_cast (def_stmt); } +static bool +get_perm_def (tree name, gassign **perm, gassign **def) +{ + gassign *stmt1 = get_tree_def(name, false); + + if (!stmt1) + return false; + + gassign *stmt2 = stmt1; + if (gimple_assign_rhs_code (stmt1) == VIEW_CONVERT_EXPR) + stmt2 = get_tree_def(TREE_OPERAND (gimple_assign_rhs1 (stmt1), 0), false); + + if (!stmt2) + return false; + + if (gimple_assign_rhs_code (stmt2) != VEC_PERM_EXPR) + return false; + + *perm = stmt2; + *def = (stmt1 != stmt2) ? stmt1 : stmt2; + return true; +} + /* Helper function for vect_slp_optimize_permutes. Return true if STMT is an expression of the form: @@ -8141,9 +8164,7 @@ recognise_perm_binop_perm_pattern (gassign *stmt, gassign **bop1_out, gassign **bop2_out, gassign **perm1_out, gassign **perm2_out) { - enum tree_code code = gimple_assign_rhs_code (stmt); - - if (code != VEC_PERM_EXPR) + if (gimple_assign_rhs_code (stmt) != VEC_PERM_EXPR) return false; gassign *bop1, *bop2; @@ -8342,6 +8363,131 @@ vect_slp_optimize_permutes (function *fun) gsi_insert_before (&gsi_stmt1, bop1_1, GSI_SAME_STMT); gsi_insert_before (&gsi_stmt1, bop1_2, GSI_SAME_STMT); } + + /* Try to identify: + + P1 = VEC_PERM(MASK_1) + P2 = VEC_PERM(MASK_2) + P1 = VIEW_CONVERT<...>(P1) ? + P2 = VIEW_CONVERT<...>(P2) ? + R1 = VEC_PERM(MASK_3) + R2 = VEC_PERM(MASK_4) + ... + RN = VEC_PERM(MASK_M) + + and convert it to: + + R1 = VEC_PERM(NEW_MASK_1) + R2 = VEC_PERM(NEW_MASK_2) + ... + RN = VEC_PERM(NEW_MASK_N) + + if possible. */ + FOR_EACH_BB_FN (bb, fun) + for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); + gsi_next (&gsi)) + { + gassign *stmt = dyn_cast (gsi_stmt (gsi)); + + if (!stmt + || gimple_assign_rhs_code (stmt) != VEC_PERM_EXPR + || !VECTOR_CST_NELTS (gimple_assign_rhs3 (stmt)).is_constant ()) + continue; + + /* STMT corresponds to R1 so we look at the definitions of STMT's + rhs 1 and 2 to find P1 and P2. */ + gassign *perms[2]; + gassign *defs[2]; + if (!get_perm_def(gimple_assign_rhs1 (stmt), &perms[0], &defs[0]) + || !get_perm_def(gimple_assign_rhs2 (stmt), &perms[1], &defs[1]) + || defs[0] == defs[1]) + continue; + + if (gimple_assign_rhs1 (perms[0]) != gimple_assign_rhs1 (perms[1]) + || gimple_assign_rhs2 (perms[0]) != gimple_assign_rhs2 (perms[1]) + || !VECTOR_CST_NELTS (gimple_assign_rhs3 (perms[0])).is_constant () + || !VECTOR_CST_NELTS (gimple_assign_rhs3 (perms[1])).is_constant ()) + continue; + + hash_set all_perms; + all_perms.add (stmt); + + tree def0_name = gimple_assign_lhs (defs[0]); + tree def1_name = gimple_assign_lhs (defs[1]); + bool only_same_perms = true; + imm_use_iterator imm_iter; + use_operand_p use_p; + /* Check if all uses are also permutes. Otherwise it's not obvious + that folding is an improvement. */ + for (int i = 0; i < 2; i++) + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_assign_lhs (defs[i])) + { + gimple *use = USE_STMT (use_p); + if (!is_a (use) + || gimple_bb (use) != bb + || gimple_assign_rhs_code (use) != VEC_PERM_EXPR + || (gimple_assign_rhs1 (use) != def0_name + && gimple_assign_rhs1 (use) != def1_name) + || (gimple_assign_rhs2 (use) != def0_name + && gimple_assign_rhs2 (use) != def1_name) + || !VECTOR_CST_NELTS (gimple_assign_rhs3 (use)) + .is_constant ()) + { + only_same_perms = false; + break; + } + + all_perms.add (use); + } + + if (!only_same_perms) + continue; + + for (hash_set ::iterator iter = all_perms.begin (); + iter != all_perms.end (); ++iter) + { + gassign *perm = dyn_cast (*iter); + tree mask = gimple_assign_rhs3 (perm); + unsigned HOST_WIDE_INT nelts + = VECTOR_CST_NELTS (mask).to_constant (); + vec_perm_builder sel (nelts, nelts, 1); + + gassign *src1 = perms[gimple_assign_rhs1 (perm) == def1_name]; + tree mask1 = gimple_assign_rhs3 (src1); + gassign *src2 = perms[gimple_assign_rhs2 (perm) == def1_name]; + tree mask2 = gimple_assign_rhs3 (src2); + + for (unsigned HOST_WIDE_INT i = 0; i < nelts; i++) + { + tree val = VECTOR_CST_ELT (mask, i); + unsigned HOST_WIDE_INT lane + = TREE_INT_CST_LOW (val) & (2 * nelts - 1); + unsigned HOST_WIDE_INT new_lane; + + if (lane < nelts) + new_lane = TREE_INT_CST_LOW (VECTOR_CST_ELT (mask1, lane)); + else + new_lane = TREE_INT_CST_LOW (VECTOR_CST_ELT (mask2, lane - nelts)); + + sel.quick_push (new_lane); + } + + vec_perm_indices indices (sel, 2, nelts); + tree vectype = TREE_TYPE (gimple_assign_lhs (perm)); + if (!can_vec_perm_const_p (TYPE_MODE (vectype), TYPE_MODE (vectype), indices)) + continue; + + tree new_mask = vect_gen_perm_mask_checked (vectype, indices); + gimple_assign_set_rhs1 (perm, gimple_assign_rhs1 (perms[0])); + gimple_assign_set_rhs2 (perm, gimple_assign_rhs2 (perms[0])); + gimple_assign_set_rhs3 (perm, new_mask); + update_stmt (perm); + + if (dump_file) { + fprintf(dump_file, " New "); print_gimple_stmt(dump_file, perm, 0); + } + } + } } /* Main entry for the BB vectorizer. Analyze and transform BB, returns