From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1851) id 04AA73858420; Thu, 9 Dec 2021 12:55:12 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 04AA73858420 Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: Martin Liska To: gcc-cvs@gcc.gnu.org Subject: [gcc(refs/users/marxin/heads/loop-unswitch-improvement-v8)] Loop unswitching: refactoring & support for gswitch X-Act-Checkin: gcc X-Git-Author: Martin Liska X-Git-Refname: refs/users/marxin/heads/loop-unswitch-improvement-v8 X-Git-Oldrev: 5791bf7a0a7705be6c3989c445e7c739220f3290 X-Git-Newrev: 346f01f6a1812177631bce8896b57de4b1fa9c3f Message-Id: <20211209125512.04AA73858420@sourceware.org> Date: Thu, 9 Dec 2021 12:55:12 +0000 (GMT) X-BeenThere: gcc-cvs@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-cvs mailing list List-Unsubscribe: , List-Archive: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 09 Dec 2021 12:55:12 -0000 https://gcc.gnu.org/g:346f01f6a1812177631bce8896b57de4b1fa9c3f commit 346f01f6a1812177631bce8896b57de4b1fa9c3f Author: Martin Liska Date: Mon Nov 22 13:54:20 2021 +0100 Loop unswitching: refactoring & support for gswitch gcc/ChangeLog: * dbgcnt.def (DEBUG_COUNTER): Add loop_unswitch counter. * tree-cfg.c (gimple_lv_add_condition_to_bb): Support not gimplified expressions. * tree-ssa-loop-unswitch.c (struct unswitch_predicate): New. (tree_unswitch_single_loop): Rework the function using pre-computed predicates. (tree_may_unswitch_on): Rename to ... (find_unswitching_predicates_for_bb): ... this. (clean_up_after_unswitching): New. (get_predicates_for_bb): Likewise. (set_predicates_for_bb): Likewise. (init_loop_unswitch_info): Likewise. (tree_ssa_unswitch_loops): Prepare stuff before calling tree_unswitch_single_loop. (merge_last): New. (add_predicate_to_path): Likewise. (find_range_for_lhs): Likewise. (simplify_using_entry_checks): Rename to ... (evaluate_control_stmt_using_entry_checks): ... this. (simplify_loop_version): Rework. (evaluate_insns): New. (evaluate_loop_insns_for_predicate): Likewise. (tree_unswitch_loop): Remove an assert. gcc/testsuite/ChangeLog: * gcc.dg/loop-unswitch-10.c: New test. * gcc.dg/loop-unswitch-11.c: New test. * gcc.dg/loop-unswitch-12.c: New test. * gcc.dg/loop-unswitch-13.c: New test. * gcc.dg/loop-unswitch-6.c: New test. * gcc.dg/loop-unswitch-7.c: New test. * gcc.dg/loop-unswitch-8.c: New test. * gcc.dg/loop-unswitch-9.c: New test. Diff: --- gcc/dbgcnt.def | 1 + gcc/testsuite/gcc.dg/loop-unswitch-10.c | 56 ++ gcc/testsuite/gcc.dg/loop-unswitch-11.c | 45 ++ gcc/testsuite/gcc.dg/loop-unswitch-12.c | 28 + gcc/testsuite/gcc.dg/loop-unswitch-13.c | 34 ++ gcc/testsuite/gcc.dg/loop-unswitch-6.c | 60 ++ gcc/testsuite/gcc.dg/loop-unswitch-7.c | 28 + gcc/testsuite/gcc.dg/loop-unswitch-8.c | 31 ++ gcc/testsuite/gcc.dg/loop-unswitch-9.c | 27 + gcc/tree-cfg.c | 7 +- gcc/tree-ssa-loop-unswitch.c | 935 +++++++++++++++++++++++++------- 11 files changed, 1044 insertions(+), 208 deletions(-) diff --git a/gcc/dbgcnt.def b/gcc/dbgcnt.def index f8a15f3d1d1..278fb1112b3 100644 --- a/gcc/dbgcnt.def +++ b/gcc/dbgcnt.def @@ -187,6 +187,7 @@ DEBUG_COUNTER (ira_move) DEBUG_COUNTER (ivopts_loop) DEBUG_COUNTER (lim) DEBUG_COUNTER (local_alloc_for_sched) +DEBUG_COUNTER (loop_unswitch) DEBUG_COUNTER (match) DEBUG_COUNTER (merged_ipa_icf) DEBUG_COUNTER (phiopt_edge_range) diff --git a/gcc/testsuite/gcc.dg/loop-unswitch-10.c b/gcc/testsuite/gcc.dg/loop-unswitch-10.c new file mode 100644 index 00000000000..2ab196b527f --- /dev/null +++ b/gcc/testsuite/gcc.dg/loop-unswitch-10.c @@ -0,0 +1,56 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -funswitch-loops -fdump-tree-unswitch-optimized" } */ + +int +__attribute__((noipa)) +foo(double *a, double *b, double *c, double *d, double *r, int size, int order) +{ + for (int i = 0; i < size; i++) + { + double tmp, tmp2; + + switch(order) + { + case 0: + tmp = -8 * a[i]; + tmp2 = 2 * b[i]; + break; + case 1: + tmp = 3 * a[i] - 2 * b[i]; + tmp2 = 5 * b[i] - 2 * c[i]; + break; + case 2: + tmp = 9 * a[i] + 2 * b[i] + c[i]; + tmp2 = 4 * b[i] + 2 * c[i] + 8 * d[i]; + break; + case 3: + tmp = 3 * a[i] + 2 * b[i] - c[i]; + tmp2 = b[i] - 2 * c[i] + 8 * d[i]; + break; + defaut: + __builtin_unreachable (); + } + + double x = 3 * tmp + d[i] + tmp; + double y = 3.4f * tmp + d[i] + tmp2; + r[i] = x + y; + } + + return 0; +} + +#define N 16 * 1024 +double aa[N], bb[N], cc[N], dd[N], rr[N]; + +int main() +{ + for (int i = 0; i < 100 * 1000; i++) + foo (aa, bb, cc, dd, rr, N, i % 4); +} + + +/* Test that we actually unswitched something. */ +/* { dg-final { scan-tree-dump-times "Unswitching loop on condition: order.* == 0" 1 "unswitch" } } */ +/* { dg-final { scan-tree-dump-times "Unswitching loop on condition: order.* == 1" 1 "unswitch" } } */ +/* { dg-final { scan-tree-dump-times "Unswitching loop on condition: order.* == 2" 1 "unswitch" } } */ +/* { dg-final { scan-tree-dump-times "Unswitching loop on condition: order.* == 3" 1 "unswitch" } } */ diff --git a/gcc/testsuite/gcc.dg/loop-unswitch-11.c b/gcc/testsuite/gcc.dg/loop-unswitch-11.c new file mode 100644 index 00000000000..310e4f40423 --- /dev/null +++ b/gcc/testsuite/gcc.dg/loop-unswitch-11.c @@ -0,0 +1,45 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -funswitch-loops -fdump-tree-unswitch-optimized" } */ + +int +foo(double *a, double *b, double *c, double *d, double *r, int size, int order) +{ + for (int i = 0; i < size; i++) + { + double tmp, tmp2; + + switch(order) + { + case 5 ... 6: + case 9: + tmp = -8 * a[i]; + tmp2 = 2 * b[i]; + break; + case 11: + tmp = 3 * a[i] - 2 * b[i]; + tmp2 = 5 * b[i] - 2 * c[i]; + break; + case 22: + tmp = 9 * a[i] + 2 * b[i] + c[i]; + tmp2 = 4 * b[i] + 2 * c[i] + 8 * d[i]; + break; + case 33: + tmp = 3 * a[i] + 2 * b[i] - c[i]; + tmp2 = b[i] - 2 * c[i] + 8 * d[i]; + break; + defaut: + __builtin_unreachable (); + } + + double x = 3 * tmp + d[i] + tmp; + double y = 3.4f * tmp + d[i] + tmp2; + r[i] = x + y; + } + + return 0; +} + +/* { dg-final { scan-tree-dump-times "Unswitching loop on condition: order_.* >= 5 & order_.* <= 6 \\| order_.* == 9" 1 "unswitch" } } */ +/* { dg-final { scan-tree-dump-times "Unswitching loop on condition: order.* == 1" 1 "unswitch" } } */ +/* { dg-final { scan-tree-dump-times "Unswitching loop on condition: order.* == 2" 1 "unswitch" } } */ +/* { dg-final { scan-tree-dump-times "Unswitching loop on condition: order.* == 3" 1 "unswitch" } } */ diff --git a/gcc/testsuite/gcc.dg/loop-unswitch-12.c b/gcc/testsuite/gcc.dg/loop-unswitch-12.c new file mode 100644 index 00000000000..adf0cdae7a8 --- /dev/null +++ b/gcc/testsuite/gcc.dg/loop-unswitch-12.c @@ -0,0 +1,28 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -funswitch-loops -fdump-tree-unswitch-optimized" } */ + +int +foo(double *a, double *b, double *c, double *d, double *r, int size, int order) +{ + for (int i = 0; i < size; i++) + { + double tmp; + + if (order == 1) + tmp = -8 * a[i]; + else + tmp = -4 * b[i]; + + double x = 3 * tmp + d[i] + tmp; + + if (order == 1) + x += 2; + + double y = 3.4f * tmp + d[i]; + r[i] = x + y; + } + + return 0; +} + +/* { dg-final { scan-tree-dump-times "Unswitching loop on condition: order.* == 1" 1 "unswitch" } } */ diff --git a/gcc/testsuite/gcc.dg/loop-unswitch-13.c b/gcc/testsuite/gcc.dg/loop-unswitch-13.c new file mode 100644 index 00000000000..db59b881247 --- /dev/null +++ b/gcc/testsuite/gcc.dg/loop-unswitch-13.c @@ -0,0 +1,34 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -funswitch-loops -fdump-tree-unswitch-optimized" } */ + +int +foo(double *a, double *b, double *c, double *d, double *r, int size, unsigned order) +{ + for (int i = 0; i < size; i++) + { + double tmp; + + switch (order) + { + case 0 ... 4: + tmp = -8 * a[i]; + break; + default: + tmp = -4 * b[i]; + break; + } + + double x = 3 * tmp + d[i] + tmp; + + /* This should not be unswitched as it's mutually excluded with case 0 ... 4. */ + if (order >= 5) + x += 2; + + double y = 3.4f * tmp + d[i]; + r[i] = x + y; + } + + return 0; +} + +/* { dg-final { scan-tree-dump-times "Unswitching loop on condition: order.* <= 4" 1 "unswitch" } } */ diff --git a/gcc/testsuite/gcc.dg/loop-unswitch-6.c b/gcc/testsuite/gcc.dg/loop-unswitch-6.c new file mode 100644 index 00000000000..ccf3c0f8978 --- /dev/null +++ b/gcc/testsuite/gcc.dg/loop-unswitch-6.c @@ -0,0 +1,60 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -funswitch-loops -fdump-tree-unswitch-optimized --param=max-unswitch-insns=1000" } */ + +int +__attribute__((noipa)) +foo(double *a, double *b, double *c, double *d, double *r, int size, int order) +{ + for (int i = 0; i < size; i++) + { + double tmp, tmp2; + + if (order <= 0) + tmp = 123; + + switch(order) + { + case 0: + tmp += -8 * a[i]; + tmp2 = 2 * b[i]; + break; + case 1: + tmp = 3 * a[i] - 2 * b[i]; + tmp2 = 5 * b[i] - 2 * c[i]; + break; + case 2: + tmp = 9 * a[i] + 2 * b[i] + c[i]; + tmp2 = 4 * b[i] + 2 * c[i] + 8 * d[i]; + break; + case 3: + tmp = 3 * a[i] + 2 * b[i] - c[i]; + tmp2 = b[i] - 2 * c[i] + 8 * d[i]; + break; + defaut: + __builtin_unreachable (); + } + + double x = 3 * tmp + d[i] + tmp; + double y = 3.4f * tmp + d[i] + tmp2; + r[i] = x + y; + } + + return 0; +} + +#define N 16 * 1024 +double aa[N], bb[N], cc[N], dd[N], rr[N]; + +int main() +{ + for (int i = 0; i < 100 * 1000; i++) + foo (aa, bb, cc, dd, rr, N, i % 4); +} + + +/* Test that we actually unswitched something. */ +/* { dg-final { scan-tree-dump-times "Unswitching loop on condition: order.* <= 0" 1 "unswitch" } } */ +/* { dg-final { scan-tree-dump-times "Unswitching loop on condition: order.* == 0" 1 "unswitch" } } */ +/* { dg-final { scan-tree-dump-times "Unswitching loop on condition: order.* == 1" 1 "unswitch" } } */ +/* { dg-final { scan-tree-dump-times "Unswitching loop on condition: order.* == 2" 1 "unswitch" } } */ +/* { dg-final { scan-tree-dump-times "Unswitching loop on condition: order.* == 3" 1 "unswitch" } } */ diff --git a/gcc/testsuite/gcc.dg/loop-unswitch-7.c b/gcc/testsuite/gcc.dg/loop-unswitch-7.c new file mode 100644 index 00000000000..19282cd731b --- /dev/null +++ b/gcc/testsuite/gcc.dg/loop-unswitch-7.c @@ -0,0 +1,28 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -funswitch-loops -fdump-tree-unswitch-optimized" } */ + +int +foo(double *a, double *b, double *c, double *d, double *r, int size, float order) +{ + for (int i = 0; i < size; i++) + { + double tmp; + + if (order == 1.f) + tmp = -8 * a[i]; + else + tmp = -4 * b[i]; + + double x = 3 * tmp + d[i] + tmp; + + if (order == 1.f) + x += 2; + + double y = 3.4f * tmp + d[i]; + r[i] = x + y; + } + + return 0; +} + +/* { dg-final { scan-tree-dump-times "Unswitching loop on condition: order.* == 1.0e" 1 "unswitch" } } */ diff --git a/gcc/testsuite/gcc.dg/loop-unswitch-8.c b/gcc/testsuite/gcc.dg/loop-unswitch-8.c new file mode 100644 index 00000000000..a08fd813dfb --- /dev/null +++ b/gcc/testsuite/gcc.dg/loop-unswitch-8.c @@ -0,0 +1,31 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -funswitch-loops -fdump-tree-unswitch-optimized" } */ + +int +foo(double *a, double *b, double *c, double *d, double *r, int size, int order) +{ + for (int i = 0; i < size; i++) + { + double tmp; + + if (order < 3) + tmp = -8 * a[i]; + else + tmp = -4 * b[i]; + + double x = 3 * tmp + d[i] + tmp; + + if (5 > order) + x += 2; + + if (order == 12345) + x *= 5; + + double y = 3.4f * tmp + d[i]; + r[i] = x + y; + } + + return 0; +} + +/* { dg-final { scan-tree-dump-times "Unswitching loop on condition: order" 3 "unswitch" } } */ diff --git a/gcc/testsuite/gcc.dg/loop-unswitch-9.c b/gcc/testsuite/gcc.dg/loop-unswitch-9.c new file mode 100644 index 00000000000..2c9fb31c7b9 --- /dev/null +++ b/gcc/testsuite/gcc.dg/loop-unswitch-9.c @@ -0,0 +1,27 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -funswitch-loops -fdump-tree-unswitch-optimized" } */ + +int +foo(double *a, double *b, double *c, double *d, double *r, int size, int order) +{ + for (int i = 0; i < size; i++) + { + double tmp; + + if (order == 1) + tmp = -8 * a[i]; + else + { + if (order == 2) + tmp = -4 * b[i]; + else + tmp = a[i]; + } + + r[i] = 3.4f * tmp + d[i]; + } + + return 0; +} + +/* { dg-final { scan-tree-dump-times "Unswitching loop on condition: order" 2 "unswitch" } } */ diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c index ebbd894ae03..4eaa5cf6c97 100644 --- a/gcc/tree-cfg.c +++ b/gcc/tree-cfg.c @@ -9063,11 +9063,16 @@ gimple_lv_add_condition_to_bb (basic_block first_head ATTRIBUTE_UNUSED, edge e0; /* Build new conditional expr */ + gsi = gsi_last_bb (cond_bb); + + cond_expr = force_gimple_operand_gsi_1 (&gsi, cond_expr, + is_gimple_condexpr_for_cond, + NULL_TREE, false, + GSI_CONTINUE_LINKING); new_cond_expr = gimple_build_cond_from_tree (cond_expr, NULL_TREE, NULL_TREE); /* Add new cond in cond_bb. */ - gsi = gsi_last_bb (cond_bb); gsi_insert_after (&gsi, new_cond_expr, GSI_NEW_STMT); /* Adjust edges appropriately to connect new head with first head diff --git a/gcc/tree-ssa-loop-unswitch.c b/gcc/tree-ssa-loop-unswitch.c index 9fae549bf71..8757982330a 100644 --- a/gcc/tree-ssa-loop-unswitch.c +++ b/gcc/tree-ssa-loop-unswitch.c @@ -38,6 +38,10 @@ along with GCC; see the file COPYING3. If not see #include "cfghooks.h" #include "tree-ssa-loop-manip.h" #include "tree-vectorizer.h" +#include "tree-pretty-print.h" +#include "gimple-range.h" +#include "dbgcnt.h" +#include "cfganal.h" /* This file implements the loop unswitching, i.e. transformation of loops like @@ -75,9 +79,76 @@ along with GCC; see the file COPYING3. If not see tree-ssa-loop-im.c ensures that all the suitable conditions are in this shape. */ +/* A tuple that holds GIMPLE condition and value range for an unswitching + predicate. */ + +struct unswitch_predicate +{ + /* Default constructor. */ + unswitch_predicate (tree cond, tree lhs_, int edge_index_ = -1) + : condition (cond), lhs (lhs_), true_range (), false_range (), + merged_true_range (), merged_false_range (), + edge_index (edge_index_), handled (false) + {} + + /* Based on true_range, compute inverted range. */ + + inline void + init_false_edge (void) + { + false_range = true_range; + + if (!false_range.varying_p () + && !false_range.undefined_p ()) + false_range.invert (); + } + + /* Copy ranges for purpose of usage in predicate path. */ + + inline void + copy_merged_ranges () + { + merged_true_range = true_range; + merged_false_range = false_range; + } + + /* Unswitching expression. */ + tree condition; + + /* LHS of the expression. */ + tree lhs; + + /* Initial ranges (when the expression is true/false) for the expression. */ + int_range_max true_range, false_range; + + /* Modified range that is part of a predicate path. */ + int_range_max merged_true_range, merged_false_range; + + /* For switch predicates, index of the edge the predicate belongs to. */ + int edge_index; + + /* True if the predicate was already used for unswitching. */ + bool handled; +}; + +/* Cache storage for unswitch_predicate belonging to a basic block. */ +static vec> *bb_predicates = NULL; + +/* Ranger instance used in the pass. */ +static gimple_ranger *ranger = NULL; + +/* The type represents a predicate path leading to a basic block. */ +typedef vec> predicate_vector; + static class loop *tree_unswitch_loop (class loop *, basic_block, tree); -static bool tree_unswitch_single_loop (class loop *, int); -static tree tree_may_unswitch_on (basic_block, class loop *); +static bool tree_unswitch_single_loop (class loop *, int, + predicate_vector &predicate_path, + unsigned budget, + const auto_edge_flag &ignored_edge_flag); +static void +find_unswitching_predicates_for_bb (basic_block bb, class loop *loop, + vec &candidates, + const auto_edge_flag &ignored_edge_flag); static bool tree_unswitch_outer_loop (class loop *); static edge find_loop_guard (class loop *); static bool empty_bb_without_guard_p (class loop *, basic_block); @@ -85,6 +156,71 @@ static bool used_outside_loop_p (class loop *, tree); static void hoist_guard (class loop *, edge); static bool check_exit_phi (class loop *); static tree get_vop_from_header (class loop *); +static void clean_up_after_unswitching (const auto_edge_flag &); + +/* Return vector of predicates that belong to a basic block. */ + +static vec & +get_predicates_for_bb (basic_block bb) +{ + gimple *last = last_stmt (bb); + return (*bb_predicates)[last == NULL ? 0 : gimple_uid (last)]; +} + +/* Save predicates that belong to a basic block. */ + +static void +set_predicates_for_bb (basic_block bb, vec predicates) +{ + gimple_set_uid (last_stmt (bb), bb_predicates->length ()); + bb_predicates->safe_push (predicates); +} + +/* Initialize LOOP information reused during the unswitching pass. + Return total number of instructions in the loop. */ + +static unsigned +init_loop_unswitch_info (class loop *loop, + const auto_edge_flag &ignored_edge_flag) +{ + unsigned total_insns = 0; + + /* Calculate instruction count. */ + basic_block *bbs = get_loop_body (loop); + for (unsigned i = 0; i < loop->num_nodes; i++) + { + unsigned insns = 0; + for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); + gsi_next (&gsi)) + insns += estimate_num_insns (gsi_stmt (gsi), &eni_size_weights); + + bbs[i]->aux = (void *)(size_t)insns; + total_insns += insns; + } + + /* Find all unswitching candidates. */ + for (unsigned i = 0; i != loop->num_nodes; i++) + { + /* Find a bb to unswitch on. */ + vec candidates; + candidates.create (1); + find_unswitching_predicates_for_bb (bbs[i], loop, candidates, + ignored_edge_flag); + if (!candidates.is_empty ()) + set_predicates_for_bb (bbs[i], candidates); + else + { + candidates.release (); + gimple *last = last_stmt (bbs[i]); + if (last != NULL) + gimple_set_uid (last, 0); + } + } + + free (bbs); + + return total_insns; +} /* Main entry point. Perform loop unswitching on all suitable loops. */ @@ -92,19 +228,52 @@ unsigned int tree_ssa_unswitch_loops (void) { bool changed = false; + auto_edge_flag ignored_edge_flag (cfun); + + ranger = enable_ranger (cfun); /* Go through all loops starting from innermost. */ for (auto loop : loops_list (cfun, LI_FROM_INNERMOST)) { if (!loop->inner) - /* Unswitch innermost loop. */ - changed |= tree_unswitch_single_loop (loop, 0); + { + bb_predicates = new vec> (); + bb_predicates->safe_push (vec ()); + + /* Unswitch innermost loop. */ + unsigned int budget + = (init_loop_unswitch_info (loop, ignored_edge_flag) + + param_max_unswitch_insns); + + predicate_vector predicate_path; + predicate_path.create (8); + changed |= tree_unswitch_single_loop (loop, 0, predicate_path, + budget, ignored_edge_flag); + predicate_path.release (); + + for (auto predlist: bb_predicates) + { + for (auto predicate: predlist) + delete predicate; + predlist.release (); + } + + bb_predicates->release (); + delete bb_predicates; + bb_predicates = NULL; + } else changed |= tree_unswitch_outer_loop (loop); } + + disable_ranger (cfun); + clear_aux_for_blocks (); + clean_up_after_unswitching (ignored_edge_flag); + if (changed) return TODO_cleanup_cfg; + return 0; } @@ -183,94 +352,481 @@ is_maybe_undefined (const tree name, gimple *stmt, class loop *loop) } /* Checks whether we can unswitch LOOP on condition at end of BB -- one of its - basic blocks (for what it means see comments below). */ + basic blocks (for what it means see comments below). + All candidates all filled to the provided vector CANDIDATES. */ -static tree -tree_may_unswitch_on (basic_block bb, class loop *loop) +static void +find_unswitching_predicates_for_bb (basic_block bb, class loop *loop, + vec &candidates, + const auto_edge_flag &ignored_edge_flag) { gimple *last, *def; - gcond *stmt; tree cond, use; basic_block def_bb; ssa_op_iter iter; /* BB must end in a simple conditional jump. */ last = last_stmt (bb); - if (!last || gimple_code (last) != GIMPLE_COND) - return NULL_TREE; - stmt = as_a (last); - - /* To keep the things simple, we do not directly remove the conditions, - but just replace tests with 0 != 0 resp. 1 != 0. Prevent the infinite - loop where we would unswitch again on such a condition. */ - if (gimple_cond_true_p (stmt) || gimple_cond_false_p (stmt)) - return NULL_TREE; - - /* Condition must be invariant. */ - FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE) + if (!last) + return; + + if (gcond *stmt = safe_dyn_cast (last)) { - def = SSA_NAME_DEF_STMT (use); + /* To keep the things simple, we do not directly remove the conditions, + but just replace tests with 0 != 0 resp. 1 != 0. Prevent the infinite + loop where we would unswitch again on such a condition. */ + if (gimple_cond_true_p (stmt) || gimple_cond_false_p (stmt)) + return; + + /* Condition must be invariant. */ + FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE) + { + def = SSA_NAME_DEF_STMT (use); + def_bb = gimple_bb (def); + if (def_bb + && flow_bb_inside_loop_p (loop, def_bb)) + return; + /* Unswitching on undefined values would introduce undefined + behavior that the original program might never exercise. */ + if (is_maybe_undefined (use, stmt, loop)) + return; + } + + tree lhs = gimple_cond_lhs (stmt); + tree rhs = gimple_cond_rhs (stmt); + + if (TREE_CODE (lhs) != SSA_NAME + || !TREE_CONSTANT (rhs)) + return; + + cond = build2 (gimple_cond_code (stmt), boolean_type_node, lhs, rhs); + edge edge_true, edge_false; + extract_true_false_edges_from_block (bb, &edge_true, &edge_false); + + unswitch_predicate *predicate = new unswitch_predicate (cond, lhs); + + if (irange::supports_type_p (TREE_TYPE (lhs))) + { + ranger->gori ().outgoing_edge_range_p (predicate->true_range, + edge_true, lhs, + *get_global_range_query ()); + predicate->init_false_edge (); + } + + candidates.safe_push (predicate); + } + else if (gswitch *stmt = safe_dyn_cast (last)) + { + unsigned nlabels = gimple_switch_num_labels (stmt); + tree idx = gimple_switch_index (stmt); + if (TREE_CODE (idx) != SSA_NAME + || nlabels < 1) + return; + /* Index must be invariant. */ + def = SSA_NAME_DEF_STMT (idx); def_bb = gimple_bb (def); if (def_bb && flow_bb_inside_loop_p (loop, def_bb)) - return NULL_TREE; + return; /* Unswitching on undefined values would introduce undefined behavior that the original program might never exercise. */ - if (is_maybe_undefined (use, stmt, loop)) - return NULL_TREE; + if (is_maybe_undefined (idx, stmt, loop)) + return; + + edge e; + edge_iterator ei; + unsigned edge_index = 0; + FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->succs) + { + if (!(e->flags & ignored_edge_flag)) + { + /* Build compound expression for all cases leading + to this edge. */ + tree expr = NULL_TREE; + for (unsigned i = 1; i < gimple_switch_num_labels (stmt); ++i) + { + tree lab = gimple_switch_label (stmt, i); + basic_block dest = label_to_block (cfun, CASE_LABEL (lab)); + edge e2 = find_edge (gimple_bb (stmt), dest); + if (e == e2) + { + tree cmp; + if (CASE_HIGH (lab) != NULL_TREE) + { + tree cmp1 = build2 (GE_EXPR, boolean_type_node, idx, + CASE_LOW (lab)); + tree cmp2 = build2 (LE_EXPR, boolean_type_node, idx, + CASE_HIGH (lab)); + cmp = build2 (BIT_AND_EXPR, boolean_type_node, cmp1, + cmp2); + } + else + cmp = build2 (EQ_EXPR, boolean_type_node, idx, + CASE_LOW (lab)); + + /* Combine the expression with the existing one. */ + if (expr == NULL_TREE) + expr = cmp; + else + expr = build2 (BIT_IOR_EXPR, boolean_type_node, expr, + cmp); + } + } + + if (expr != NULL_TREE) + { + unswitch_predicate *predicate + = new unswitch_predicate (expr, idx, edge_index); + ranger->gori ().outgoing_edge_range_p (predicate->true_range, e, + idx, *get_global_range_query ()); + /* Huge switches are not supported by Ranger. */ + if (predicate->true_range.undefined_p ()) + { + delete predicate; + return; + } + predicate->init_false_edge (); + + candidates.safe_push (predicate); + } + } + edge_index++; + } } +} - cond = build2 (gimple_cond_code (stmt), boolean_type_node, - gimple_cond_lhs (stmt), gimple_cond_rhs (stmt)); +/* Merge ranges for the last item of PREDICATE_PATH with a predicate + that shared the same LHS. */ - return cond; +static void +merge_last (predicate_vector &predicate_path) +{ + unswitch_predicate *last_predicate = predicate_path.last ().first; + + for (int i = predicate_path.length () - 2; i >= 0; i--) + { + unswitch_predicate *predicate = predicate_path[i].first; + bool true_edge = predicate_path[i].second; + + if (operand_equal_p (predicate->lhs, last_predicate->lhs, 0)) + { + irange &other = (true_edge ? predicate->merged_true_range + : predicate->merged_false_range); + last_predicate->merged_true_range.intersect (other); + last_predicate->merged_false_range.intersect (other); + return; + } + } } -/* Simplifies COND using checks in front of the entry of the LOOP. Just very - simplish (sufficient to prevent us from duplicating loop in unswitching - unnecessarily). */ +/* Add PREDICATE to PREDICATE_PATH on TRUE_EDGE. */ + +static void +add_predicate_to_path (predicate_vector &predicate_path, + unswitch_predicate *predicate, bool true_edge) +{ + predicate->copy_merged_ranges (); + predicate_path.safe_push (std::make_pair (predicate, true_edge)); + merge_last (predicate_path); +} + +bool +find_range_for_lhs (predicate_vector &predicate_path, tree lhs, + int_range_max &range) +{ + for (int i = predicate_path.length () - 1; i >= 0; i--) + { + unswitch_predicate *predicate = predicate_path[i].first; + bool true_edge = predicate_path[i].second; + + if (operand_equal_p (predicate->lhs, lhs, 0)) + { + range = (true_edge ? predicate->merged_true_range + : predicate->merged_false_range); + return true; + } + } + + return false; +} + +/* Simplifies COND using checks in front of the entry of the LOOP. + Utilize both symbolic expressions and value ranges calculated by Ranger. */ static tree -simplify_using_entry_checks (class loop *loop, tree cond) +evaluate_control_stmt_using_entry_checks (gimple *stmt, + predicate_vector &predicate_path, + const auto_edge_flag &ignored_edge_flag, + hash_set *ignored_edges) { - edge e = loop_preheader_edge (loop); - gimple *stmt; + tree lhs; + + gcond *condition = dyn_cast (stmt); + gswitch *swtch = dyn_cast (stmt); - while (1) + unswitch_predicate *last_predicate = predicate_path.last ().first; + bool true_edge = predicate_path.last ().second; + + if (condition != NULL + && (lhs = gimple_cond_lhs (stmt)) + && operand_equal_p (lhs, last_predicate->lhs, 0)) { - stmt = last_stmt (e->src); - if (stmt - && gimple_code (stmt) == GIMPLE_COND - && gimple_cond_code (stmt) == TREE_CODE (cond) - && operand_equal_p (gimple_cond_lhs (stmt), - TREE_OPERAND (cond, 0), 0) + if (gimple_cond_code (stmt) == TREE_CODE (last_predicate->condition) && operand_equal_p (gimple_cond_rhs (stmt), - TREE_OPERAND (cond, 1), 0)) - return (e->flags & EDGE_TRUE_VALUE - ? boolean_true_node - : boolean_false_node); + TREE_OPERAND (last_predicate->condition, 1), 0)) + { + edge edge_true, edge_false; + extract_true_false_edges_from_block (gimple_bb (stmt), + &edge_true, &edge_false); + if (true_edge) + { + if (ignored_edges != NULL) + ignored_edges->add (edge_true); + return boolean_true_node; + } + else + { + if (ignored_edges != NULL) + ignored_edges->add (edge_false); + return boolean_false_node; + } + } + else if (irange::supports_type_p (TREE_TYPE (lhs))) + { + int_range_max r; + int_range_max path_range; + + if (find_range_for_lhs (predicate_path, lhs, path_range) + && fold_range (r, stmt, path_range) + && r.singleton_p ()) + return r.zero_p () ? boolean_false_node : boolean_true_node; + } + } + else if (swtch != NULL) + { + unsigned nlabels = gimple_switch_num_labels (swtch); + + tree idx = gimple_switch_index (swtch); + + /* Already folded switch. */ + if (TREE_CONSTANT (idx)) + return NULL_TREE; + + tree result = NULL_TREE; + for (unsigned i = 0; i < nlabels; ++i) + { + tree lab = gimple_switch_label (swtch, i); + basic_block dest = label_to_block (cfun, CASE_LABEL (lab)); + edge e = find_edge (gimple_bb (stmt), dest); + if (e->flags & ignored_edge_flag) + { + ignored_edges->add (e); + continue; + } + + int_range_max r; + int_range_max path_range; + + ranger->gori ().outgoing_edge_range_p (r, e, idx, + *get_global_range_query ()); + if (find_range_for_lhs (predicate_path, idx, path_range)) + { + r.intersect (path_range); + if (r.undefined_p ()) + ignored_edges->add (e); + else + result = CASE_LOW (lab); + } + } + + /* Only one edge from the switch is alive. */ + unsigned edge_count = EDGE_COUNT (gimple_bb (swtch)->succs); + if (ignored_edges->elements () + 1 == edge_count) + return result; + } - if (!single_pred_p (e->src)) - return cond; + return NULL_TREE; +} + +/* Simplify LOOP based on PREDICATE_PATH where dead edges are properly + marked. */ + +static bool +simplify_loop_version (class loop *loop, predicate_vector &predicate_path, + const auto_edge_flag &ignored_edge_flag) +{ + bool changed = false; + basic_block *bbs = get_loop_body (loop); + + for (unsigned i = 0; i != loop->num_nodes; i++) + { + vec &predicates = get_predicates_for_bb (bbs[i]); + if (!predicates.is_empty ()) + { + hash_set ignored_edges; + gimple *stmt = last_stmt (bbs[i]); + tree folded = evaluate_control_stmt_using_entry_checks (stmt, + predicate_path, + ignored_edge_flag, + &ignored_edges); + + gcond *cond = dyn_cast (stmt); + gswitch *swtch = dyn_cast (stmt); + + if (cond != NULL + && folded != NULL_TREE) + { + /* Remove path. */ + if (integer_nonzerop (folded)) + gimple_cond_set_condition_from_tree (cond, boolean_true_node); + else + gimple_cond_set_condition_from_tree (cond, boolean_false_node); + + gimple_set_uid (cond, 0); + update_stmt (cond); + changed = true; + } + else if (swtch != NULL) + { + edge e; + edge_iterator ei; + FOR_EACH_EDGE (e, ei, bbs[i]->succs) + if (ignored_edges.contains (e)) + e->flags = ignored_edge_flag; + + for (unsigned j = 0; j < predicates.length (); j++) + { + edge e = EDGE_SUCC (bbs[i], predicates[j]->edge_index); + if (ignored_edges.contains (e)) + predicates[j]->handled = true; + } + + if (folded) + { + gimple_switch_set_index (swtch, folded); + update_stmt (swtch); + changed = true; + } + } + } + } + + free (bbs); + return changed; +} + +/* Evaluate how many instructions will be executed if we unswitch + LOOP (with BBS) based on PREDICATE_PATH. + REACHABLE_FLAG is used for marking of the basic blocks. */ + +static unsigned +evaluate_insns (class loop *loop, basic_block *bbs, + predicate_vector &predicate_path, + auto_bb_flag &reachable_flag, + const auto_edge_flag &ignored_edge_flag) +{ + auto_vec worklist (loop->num_nodes); + worklist.quick_push (bbs[0]); + hash_set ignored_edges; + + while (!worklist.is_empty ()) + { + edge e; + edge_iterator ei; + int flags = 0; + basic_block bb = worklist.pop (); + + gimple *last = last_stmt (bb); + gcond *cond = last != NULL ? dyn_cast (last) : NULL; + gswitch *swtch = last != NULL ? dyn_cast (last) : NULL; + + if (cond != NULL) + { + if (gimple_cond_true_p (cond)) + flags = EDGE_FALSE_VALUE; + else if (gimple_cond_false_p (cond)) + flags = EDGE_TRUE_VALUE; + else + { + if (!get_predicates_for_bb (bb).is_empty ()) + evaluate_control_stmt_using_entry_checks (cond, + predicate_path, + ignored_edge_flag, + &ignored_edges); + } + } + else if (swtch != NULL + && !get_predicates_for_bb (bb).is_empty ()) + evaluate_control_stmt_using_entry_checks (swtch, predicate_path, + ignored_edge_flag, + &ignored_edges); + + FOR_EACH_EDGE (e, ei, bb->succs) + { + basic_block dest = e->dest; - e = single_pred_edge (e->src); - if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun)) - return cond; + if (dest->loop_father == loop + && !(dest->flags & reachable_flag) + && !(e->flags & flags) + && !ignored_edges.contains (e)) + { + worklist.safe_push (dest); + dest->flags |= reachable_flag; + } + } } + + /* Evaluate insns. */ + unsigned size = 0; + + for (unsigned i = 0; i < loop->num_nodes; i++) + if (bbs[i]->flags & reachable_flag) + size += (size_t)bbs[i]->aux; + + /* Clear the flag from basic blocks. */ + for (unsigned i = 0; i < loop->num_nodes; i++) + bbs[i]->flags &= ~reachable_flag; + + return size; +} + +/* Evaluate how many instruction will we have if we unswitch LOOP (with BBS) + based on PREDICATE predicate (using PREDICATE_PATH). */ + +static unsigned +evaluate_loop_insns_for_predicate (class loop *loop, basic_block *bbs, + predicate_vector &predicate_path, + unswitch_predicate *predicate, + const auto_edge_flag &ignored_edge_flag) +{ + auto_bb_flag reachable_flag (cfun); + + add_predicate_to_path (predicate_path, predicate, true); + unsigned true_loop_cost = evaluate_insns (loop, bbs, predicate_path, + reachable_flag, ignored_edge_flag); + predicate_path.pop (); + + add_predicate_to_path (predicate_path, predicate, false); + unsigned false_loop_cost = evaluate_insns (loop, bbs, predicate_path, + reachable_flag, ignored_edge_flag); + predicate_path.pop (); + + return true_loop_cost + false_loop_cost; } /* Unswitch single LOOP. NUM is number of unswitchings done; we do not allow it to grow too much, it is too easy to create example on that the code would - grow exponentially. */ + grow exponentially. PREDICATE_PATH contains so far used predicates + for unswitching. BUDGET is number of instruction for which we can increase + the loop. */ static bool -tree_unswitch_single_loop (class loop *loop, int num) +tree_unswitch_single_loop (class loop *loop, int num, + predicate_vector &predicate_path, unsigned budget, + const auto_edge_flag &ignored_edge_flag) { - basic_block *bbs; + basic_block *bbs = NULL; class loop *nloop; - unsigned i, found; - tree cond = NULL_TREE; - gimple *stmt; bool changed = false; HOST_WIDE_INT iterations; @@ -288,16 +844,6 @@ tree_unswitch_single_loop (class loop *loop, int num) return false; } - /* The loop should not be too large, to limit code growth. */ - if (tree_num_loop_insns (loop, &eni_size_weights) - > (unsigned) param_max_unswitch_insns) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_NOTE, loc, - "Not unswitching, loop too big\n"); - return false; - } - /* If the loop is not expected to iterate, there is no need for unswitching. */ iterations = estimated_loop_iterations_int (loop); @@ -313,168 +859,97 @@ tree_unswitch_single_loop (class loop *loop, int num) } } - i = 0; + unswitch_predicate *predicate = NULL; + basic_block bb = NULL; + if (num > param_max_unswitch_level) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, loc, + "Not unswitching anymore, hit max level\n"); + goto exit; + } + bbs = get_loop_body (loop); - found = loop->num_nodes; - while (1) + for (unsigned i = 0; i < loop->num_nodes; i++) { - /* Find a bb to unswitch on. */ - for (; i < loop->num_nodes; i++) - if ((cond = tree_may_unswitch_on (bbs[i], loop))) - break; - - if (i == loop->num_nodes) + vec &preds = get_predicates_for_bb (bbs[i]); + for (auto pred: preds) { - if (dump_enabled_p () - && num > param_max_unswitch_level) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, loc, - "Not unswitching anymore, hit max level\n"); + if (pred->handled) + continue; - if (found == loop->num_nodes) + unsigned cost + = evaluate_loop_insns_for_predicate (loop, bbs, predicate_path, + pred, ignored_edge_flag); + + /* FIXME: right now we select first candidate, but we can choose + a cheapest (best) one. */ + if (cost <= budget) { - free (bbs); - return changed; + predicate = pred; + bb = bbs[i]; + budget -= cost; + break; } - break; - } - - cond = simplify_using_entry_checks (loop, cond); - stmt = last_stmt (bbs[i]); - if (integer_nonzerop (cond)) - { - /* Remove false path. */ - gimple_cond_set_condition_from_tree (as_a (stmt), - boolean_true_node); - changed = true; - } - else if (integer_zerop (cond)) - { - /* Remove true path. */ - gimple_cond_set_condition_from_tree (as_a (stmt), - boolean_false_node); - changed = true; - } - /* Do not unswitch too much. */ - else if (num > param_max_unswitch_level) - { - i++; - continue; - } - /* In nested tree_unswitch_single_loop first optimize all conditions - using entry checks, then discover still reachable blocks in the - loop and find the condition only among those still reachable bbs. */ - else if (num != 0) - { - if (found == loop->num_nodes) - found = i; - i++; - continue; - } - else - { - found = i; - break; + else if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, loc, + "Not unswitching condition, cost too big " + "(%d insns)\n", cost); } - - update_stmt (stmt); - i++; } - if (num != 0) + if (predicate != NULL) { - basic_block *tos, *worklist; - - /* When called recursively, first do a quick discovery - of reachable bbs after the above changes and only - consider conditions in still reachable bbs. */ - tos = worklist = XNEWVEC (basic_block, loop->num_nodes); - - for (i = 0; i < loop->num_nodes; i++) - bbs[i]->flags &= ~BB_REACHABLE; - - /* Start with marking header. */ - *tos++ = bbs[0]; - bbs[0]->flags |= BB_REACHABLE; + if (!dbg_cnt (loop_unswitch)) + goto exit; - /* Iterate: find everything reachable from what we've already seen - within the same innermost loop. Don't look through false edges - if condition is always true or true edges if condition is - always false. */ - while (tos != worklist) + if (dump_enabled_p ()) + dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc, + "Unswitching loop on condition: %T\n", + predicate->condition); + + predicate->handled = true; + initialize_original_copy_tables (); + /* Unswitch the loop on this condition. */ + nloop = tree_unswitch_loop (loop, bb, predicate->condition); + if (!nloop) { - basic_block b = *--tos; - edge e; - edge_iterator ei; - int flags = 0; - - if (EDGE_COUNT (b->succs) == 2) - { - gimple *stmt = last_stmt (b); - if (stmt - && gimple_code (stmt) == GIMPLE_COND) - { - gcond *cond_stmt = as_a (stmt); - if (gimple_cond_true_p (cond_stmt)) - flags = EDGE_FALSE_VALUE; - else if (gimple_cond_false_p (cond_stmt)) - flags = EDGE_TRUE_VALUE; - } - } - - FOR_EACH_EDGE (e, ei, b->succs) - { - basic_block dest = e->dest; - - if (dest->loop_father == loop - && !(dest->flags & BB_REACHABLE) - && !(e->flags & flags)) - { - *tos++ = dest; - dest->flags |= BB_REACHABLE; - } - } + free_original_copy_tables (); + goto exit; } - free (worklist); + /* Copy BB costs. */ + basic_block *bbs2 = get_loop_body (nloop); + for (unsigned i = 0; i < nloop->num_nodes; i++) + bbs2[i]->aux = get_bb_original (bbs2[i])->aux; - /* Find a bb to unswitch on. */ - for (; found < loop->num_nodes; found++) - if ((bbs[found]->flags & BB_REACHABLE) - && (cond = tree_may_unswitch_on (bbs[found], loop))) - break; - - if (found == loop->num_nodes) - { - free (bbs); - return changed; - } - } + free (bbs2); - if (dump_enabled_p ()) - dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc, - "Unswitching loop on condition: %G\n", - last_stmt (bbs[found])); - - initialize_original_copy_tables (); - /* Unswitch the loop on this condition. */ - nloop = tree_unswitch_loop (loop, bbs[found], cond); - if (!nloop) - { + /* Update the SSA form after unswitching. */ + update_ssa (TODO_update_ssa); free_original_copy_tables (); - free (bbs); - return changed; - } - /* Update the SSA form after unswitching. */ - update_ssa (TODO_update_ssa); - free_original_copy_tables (); + /* Invoke itself on modified loops. */ + add_predicate_to_path (predicate_path, predicate, false); + changed |= simplify_loop_version (nloop, predicate_path, + ignored_edge_flag); + tree_unswitch_single_loop (nloop, num + 1, predicate_path, budget, + ignored_edge_flag); + predicate_path.pop (); + + add_predicate_to_path (predicate_path, predicate, true); + changed |= simplify_loop_version (loop, predicate_path, + ignored_edge_flag); + tree_unswitch_single_loop (loop, num + 1, predicate_path, budget, + ignored_edge_flag); + predicate_path.pop (); + changed = true; + } - /* Invoke itself on modified loops. */ - tree_unswitch_single_loop (nloop, num + 1); - tree_unswitch_single_loop (loop, num + 1); +exit: free (bbs); - return true; + return changed; } /* Unswitch a LOOP w.r. to given basic block UNSWITCH_ON. We only support @@ -491,7 +966,7 @@ tree_unswitch_loop (class loop *loop, /* Some sanity checking. */ gcc_assert (flow_bb_inside_loop_p (loop, unswitch_on)); - gcc_assert (EDGE_COUNT (unswitch_on->succs) == 2); + gcc_assert (EDGE_COUNT (unswitch_on->succs) >= 2); gcc_assert (loop->inner == NULL); extract_true_false_edges_from_block (unswitch_on, &edge_true, &edge_false); @@ -992,6 +1467,52 @@ check_exit_phi (class loop *loop) return true; } +/* Remove all dead cases from switches that are unswitched. */ + +static void +clean_up_after_unswitching (const auto_edge_flag &ignored_edge_flag) +{ + basic_block bb; + + FOR_EACH_BB_FN (bb, cfun) + { + gimple *last = last_stmt (bb); + if (gswitch *stmt = safe_dyn_cast (last)) + { + unsigned nlabels = gimple_switch_num_labels (stmt); + unsigned index = 1; + for (unsigned i = 1; i < nlabels; ++i) + { + tree lab = gimple_switch_label (stmt, i); + basic_block dest = label_to_block (cfun, CASE_LABEL (lab)); + edge e = find_edge (gimple_bb (stmt), dest); + if (e == NULL) + ; /* The edge is already removed. */ + else if (e->flags & ignored_edge_flag) + remove_edge (e); + else + { + gimple_switch_set_label (stmt, index, lab); + ++index; + } + } + + if (index != nlabels) + gimple_switch_set_num_labels (stmt, index); + } + } + + /* Clean up the ignored_edge_flag from edges. */ + FOR_EACH_BB_FN (bb, cfun) + { + edge e; + edge_iterator ei; + + FOR_EACH_EDGE (e, ei, bb->succs) + e->flags &= ~ignored_edge_flag; + } +} + /* Loop unswitching pass. */ namespace {