From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 2066) id 18FD63858C27; Mon, 31 May 2021 05:55:53 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 18FD63858C27 Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: Jiu Fu Guo To: gcc-cvs@gcc.gnu.org Subject: [gcc(refs/users/guojiufu/heads/guojiufu-branch)] Split loop for at safe boundary for wrap/overflow. X-Act-Checkin: gcc X-Git-Author: Jiufu Guo X-Git-Refname: refs/users/guojiufu/heads/guojiufu-branch X-Git-Oldrev: af3824d29806e59b0f58eea465e96da68cb2fc86 X-Git-Newrev: 0e1e9f6a45dbb13fe45097772764a63c2861e1a5 Message-Id: <20210531055553.18FD63858C27@sourceware.org> Date: Mon, 31 May 2021 05:55:53 +0000 (GMT) X-BeenThere: gcc-cvs@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-cvs mailing list List-Unsubscribe: , List-Archive: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 31 May 2021 05:55:53 -0000 https://gcc.gnu.org/g:0e1e9f6a45dbb13fe45097772764a63c2861e1a5 commit 0e1e9f6a45dbb13fe45097772764a63c2861e1a5 Author: Jiufu Guo Date: Tue May 18 13:43:06 2021 +0800 Split loop for at safe boundary for wrap/overflow. When there is the possibility that overflow/wrap may happen on the loop index, a few optimizations would not happen. For example code: foo (int *a, int *b, unsigned k, unsigned n) { while (++k != n) a[k] = b[k] + 1; } For this code, if "k > n", k would wrap. if "k < n" at begining, it could be optimized (e.g. vectorization). We can split the loop into two loops and guard them at wrap boundary to ensure one part loop are not wrap/overflow. like below: if (k < n) while (++k < n) a[k] = b[k] + 1; else while (++k != n) a[k] = b[k] + 1; then for the first loop, it could be optimized. This patch implements this to achieve better performance. Diff: --- gcc/tree-ssa-loop-split.c | 159 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 149 insertions(+), 10 deletions(-) diff --git a/gcc/tree-ssa-loop-split.c b/gcc/tree-ssa-loop-split.c index 30e717c3004..4a48e27fb8b 100644 --- a/gcc/tree-ssa-loop-split.c +++ b/gcc/tree-ssa-loop-split.c @@ -234,8 +234,7 @@ easy_exit_values (class loop *loop) this. The loops need to fulfill easy_exit_values(). */ static void -connect_loop_phis (class loop *loop1, class loop *loop2, edge new_e, - bool use_prev = false) +connect_loop_phis (class loop *loop1, class loop *loop2, edge new_e) { basic_block rest = loop_preheader_edge (loop2)->src; gcc_assert (new_e->dest == rest); @@ -281,8 +280,7 @@ connect_loop_phis (class loop *loop1, class loop *loop2, edge new_e, gphi * newphi = create_phi_node (new_init, rest); add_phi_arg (newphi, init, skip_first, UNKNOWN_LOCATION); - add_phi_arg (newphi, use_prev ? PHI_RESULT (phi_first) : next, new_e, - UNKNOWN_LOCATION); + add_phi_arg (newphi, next, new_e, UNKNOWN_LOCATION); SET_USE (op, new_init); } } @@ -1597,18 +1595,122 @@ split_loop_on_cond (struct loop *loop) } /* Check if the loop is possible to wrap at index. - Return the assumption under which the wrap would happen. + Return the assumption under which the wrap will not happen. Return NULL_TREE, if overflow/wrap will not happen. */ static tree -get_wrap_assumption (class loop *loop) +get_wrap_assumption (class loop *loop, edge *exit) { int i; edge e; auto_vec edges = get_loop_exit_edges (loop); FOR_EACH_VEC_ELT (edges, i, e) { + if (e->flags & EDGE_FAKE) + continue; + + basic_block bb = e->src; + + /* Check gcond. */ + gimple *last = last_stmt (bb); + if (!last || gimple_code (last) != GIMPLE_COND) + continue; + gcond *gc = as_a (last); + + /* Check if bound is invarant. */ + tree bnd = gimple_cond_rhs (gc); + tree idx = gimple_cond_lhs (gc); + enum tree_code code = gimple_cond_code (gc); + if (expr_invariant_in_loop_p (loop, idx)) + { + std::swap (idx, bnd); + code = swap_tree_comparison (code); + } + else if (!expr_invariant_in_loop_p (loop, bnd)) + continue; + + /* Check if bound is MAX/MIN val of a integral type. */ + tree bnd_type = TREE_TYPE (bnd); + if (!(INTEGRAL_TYPE_P (bnd_type))) + continue; + if (TREE_CODE (bnd) == INTEGER_CST + && (bnd == TYPE_MAX_VALUE (bnd_type) + || bnd == TYPE_MIN_VALUE (bnd_type))) + continue; + /* Check if it is "idx != bnd" or "idx < bnd". */ + if (e->flags & EDGE_TRUE_VALUE) + code = invert_tree_comparison (code, false); + if (code != NE_EXPR && code != LT_EXPR && code != LE_EXPR) + continue; + + /* Filter conversion from idx. */ + tree cmp_idx = idx; + tree small_type = TREE_TYPE (idx); + tree large_type = small_type; + gimple *stmt = NULL; + if (TREE_CODE (idx) == SSA_NAME) + stmt = SSA_NAME_DEF_STMT (idx); + while (stmt && is_gimple_assign (stmt) + && flow_bb_inside_loop_p (loop, gimple_bb (stmt))) + { + enum tree_code code = gimple_assign_rhs_code (stmt); + if (CONVERT_EXPR_CODE_P (code)) + { + idx = gimple_assign_rhs1 (stmt); + tree type = TREE_TYPE (idx); + if (TYPE_PRECISION (small_type) > TYPE_PRECISION (type) + || (TYPE_PRECISION (small_type) == TYPE_PRECISION (type) + && TYPE_UNSIGNED (small_type) && !TYPE_UNSIGNED (type))) + small_type = type; + if (TYPE_PRECISION (large_type) < TYPE_PRECISION (type) + || (TYPE_PRECISION (large_type) == TYPE_PRECISION (type) + && !TYPE_UNSIGNED (large_type) && TYPE_UNSIGNED (type))) + large_type = type; + } + else if (PLUS_EXPR == code + && integer_onep (gimple_assign_rhs2 (stmt))) + idx = gimple_assign_rhs1 (stmt); + else + break; + + if (TREE_CODE (idx) != SSA_NAME) + break; + stmt = SSA_NAME_DEF_STMT (idx); + } + + /* Check if idx is iv with base and step. */ + affine_iv iv; + tree iv_niters = NULL_TREE; + if (!simple_iv_with_niters (loop, loop_containing_stmt (gc), idx, &iv, + &iv_niters, false)) + continue; + if (!iv.base || !iv.step || !integer_onep (iv.step)) + continue; + + tree base = iv.base; + tree maxv = TYPE_MAX_VALUE (small_type); + if (large_type != TREE_TYPE (base)) + base = fold_convert (large_type, base); + if (large_type != TREE_TYPE (bnd)) + bnd = fold_convert (large_type, bnd); + if (large_type != small_type) + maxv = fold_convert (large_type, maxv); + + /* for "< or <=", only check if bnd <= max value of idx type. */ + tree no_wrap = fold_build2 (LE_EXPR, boolean_type_node, bnd, maxv); + /* for "!=", and check if base <= bnd. */ + if (code == NE_EXPR) + no_wrap + = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, no_wrap, + fold_build2 (LE_EXPR, boolean_type_node, base, bnd)); + + /* Check if it is sure wrap or no wrap. */ + if (integer_zerop (no_wrap) || integer_onep (no_wrap)) + continue; + + *exit = e; + return no_wrap; } return NULL_TREE; @@ -1618,10 +1720,42 @@ get_wrap_assumption (class loop *loop) under the guard that WRAP_ASSUMPTION will not be true. */ static bool -split_wrap_boundary (class loop *loop, tree wrap_assumption) +split_wrap_boundary (class loop *loop, edge e, tree no_wrap_cond) { - - return false; + /* Convert the condition into a suitable gcond. */ + gimple_seq stmts = NULL; + no_wrap_cond = force_gimple_operand_1 (no_wrap_cond, &stmts, + is_gimple_condexpr, NULL_TREE); + + /* Version the loop. */ + initialize_original_copy_tables (); + basic_block cond_bb; + loop_version (loop, no_wrap_cond, &cond_bb, + profile_probability::very_likely (), + profile_probability::very_unlikely (), + profile_probability::very_likely (), + profile_probability::very_unlikely (), true); + free_original_copy_tables (); + + /* Insert the statements that feed COND. */ + if (stmts) + { + gimple_stmt_iterator gsi = gsi_last_bb (cond_bb); + gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT); + } + + gcond *gc = as_a (last_stmt (e->src)); + enum tree_code code = gimple_cond_code (gc); + enum tree_code new_code; + bool inv = expr_invariant_in_loop_p (loop, gimple_cond_lhs (gc)); + new_code = inv ? GT_EXPR : LT_EXPR; + if (code == EQ_EXPR) + new_code = invert_tree_comparison (new_code, false); + + if (code == NE_EXPR || code == EQ_EXPR) + gimple_cond_set_code (gc, new_code); + + return true; } /* Split loop if there is possible wrap. @@ -1667,8 +1801,13 @@ split_loop_on_wrap (class loop *loop) free (bbs); return false; } - free (bbs); + + edge e; + tree nowrap_assumption = get_wrap_assumption (loop, &e); + if (nowrap_assumption && split_wrap_boundary (loop, e, nowrap_assumption)) + return true; + return false; }