diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 6667cd0..12f1dfd 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -1912,6 +1912,9 @@ static const struct attribute_spec rs6000_attribute_table[] = #undef TARGET_PREDICT_DOLOOP_P #define TARGET_PREDICT_DOLOOP_P rs6000_predict_doloop_p +#undef TARGET_HAVE_COUNT_REG_DECR_P +#define TARGET_HAVE_COUNT_REG_DECR_P rs6000_have_count_reg_decr_p + #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv @@ -39437,6 +39440,14 @@ rs6000_predict_doloop_p (struct loop *loop) return true; } +/* Return true if count register for branch is supported. */ + +static bool +rs6000_have_count_reg_decr_p () +{ + return flag_branch_on_count_reg; +} + struct gcc_target targetm = TARGET_INITIALIZER; #include "gt-rs6000.h" diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index c2aa4d0..46e488f 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -11618,6 +11618,12 @@ loops, and will help ivopts to make some decisions. The default version of this hook returns false. @end deftypefn +@deftypefn {Target Hook} bool TARGET_HAVE_COUNT_REG_DECR_P (void) +Return true if the target supports hardware count register for decrement +and branch. +The default version of this hook returns false. +@end deftypefn + @deftypefn {Target Hook} bool TARGET_CAN_USE_DOLOOP_P (const widest_int @var{&iterations}, const widest_int @var{&iterations_max}, unsigned int @var{loop_depth}, bool @var{entered_at_top}) Return true if it is possible to use low-overhead loops (@code{doloop_end} and @code{doloop_begin}) for a particular loop. @var{iterations} gives the diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in index b4d57b8..5f43b27 100644 --- a/gcc/doc/tm.texi.in +++ b/gcc/doc/tm.texi.in @@ -7946,6 +7946,8 @@ to by @var{ce_info}. @hook TARGET_PREDICT_DOLOOP_P +@hook TARGET_HAVE_COUNT_REG_DECR_P + @hook TARGET_CAN_USE_DOLOOP_P @hook TARGET_INVALID_WITHIN_DOLOOP diff --git a/gcc/target.def b/gcc/target.def index 71b6972..ec15a6d 100644 --- a/gcc/target.def +++ b/gcc/target.def @@ -4247,6 +4247,14 @@ The default version of this hook returns false.", default_predict_doloop_p) DEFHOOK +(have_count_reg_decr_p, + "Return true if the target supports hardware count register for decrement\n\ +and branch.\n\ +The default version of this hook returns false.", + bool, (void), + hook_bool_void_false) + +DEFHOOK (can_use_doloop_p, "Return true if it is possible to use low-overhead loops (@code{doloop_end}\n\ and @code{doloop_begin}) for a particular loop. @var{iterations} gives the\n\ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt.c b/gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt.c index 7d5859b..71d7f67 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt.c @@ -17,6 +17,7 @@ f1 (char *p, uintptr_t i, uintptr_t n) while (i < n); } -/* { dg-final { scan-tree-dump-times "PHI" 1 "ivopts" } } */ -/* { dg-final { scan-tree-dump-times "PHI vuses; }; @@ -612,6 +614,9 @@ struct ivopts_data /* Whether the loop body can only be exited via single exit. */ bool loop_single_exit_p; + + /* Whether the loop has doloop comparison use. */ + bool doloop_use_p; }; /* An assignment of iv candidates to uses. */ @@ -1528,6 +1533,7 @@ record_group (struct ivopts_data *data, enum use_type type) group->type = type; group->related_cands = BITMAP_ALLOC (NULL); group->vuses.create (1); + group->bind_cand = NULL; data->vgroups.safe_push (group); return group; @@ -3724,7 +3730,7 @@ prepare_decl_rtl (tree *expr_p, int *ws, void *data) Some RTL specific checks seems unable to be checked in gimple, if any new checks or easy checks _are_ missing here, please add them. */ -static bool ATTRIBUTE_UNUSED +static bool generic_predict_doloop_p (struct ivopts_data *data) { struct loop *loop = data->current_loop; @@ -5291,6 +5297,21 @@ determine_group_iv_cost_cond (struct ivopts_data *data, return !cost.infinite_cost_p (); } +/* Set no cost for pair between doloop iv use GROUP and iv cand CAND. */ + +static bool +adjust_group_iv_cost_for_doloop (struct ivopts_data *data, + struct iv_group *group, struct iv_cand *cand) +{ + struct cost_pair *cp = get_group_iv_cost (data, group, cand); + if (!cp) + set_group_iv_cost (data, group, cand, no_cost, NULL, NULL_TREE, ERROR_MARK, + NULL); + else + cp->cost = no_cost; + return true; +} + /* Determines cost of computing uses in GROUP with CAND. Returns false if USE cannot be represented with CAND. */ @@ -5308,7 +5329,12 @@ determine_group_iv_cost (struct ivopts_data *data, return determine_group_iv_cost_address (data, group, cand); case USE_COMPARE: - return determine_group_iv_cost_cond (data, group, cand); + { + bool finite_cost_p = determine_group_iv_cost_cond (data, group, cand); + if (data->doloop_use_p && group->bind_cand) + finite_cost_p = adjust_group_iv_cost_for_doloop (data, group, cand); + return finite_cost_p; + } default: gcc_unreachable (); @@ -6723,6 +6749,29 @@ find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp) return set; } +/* For doloop use, if the algothrim selects some candidate which invalid for + later rewrite, fix it up with bind_cand. */ + +static void +fixup_doloop_groups (struct ivopts_data *data, struct iv_ca *set) +{ + for (unsigned i = 0; i < data->vgroups.length (); i++) + { + struct iv_group *group = data->vgroups[i]; + if (group->bind_cand) + { + struct cost_pair *cp = iv_ca_cand_for_group (set, group); + gcc_assert (cp); + if (cp->cand != group->bind_cand && cp->value == NULL_TREE) + { + struct cost_pair *bind_cp + = get_group_iv_cost (data, group, group->bind_cand); + iv_ca_set_cp (data, set, group, bind_cp); + } + } + } +} + static struct iv_ca * find_optimal_iv_set (struct ivopts_data *data) { @@ -6760,6 +6809,9 @@ find_optimal_iv_set (struct ivopts_data *data) else if (origset) iv_ca_free (&origset); + if (data->doloop_use_p) + fixup_doloop_groups (data, set); + for (i = 0; i < data->vgroups.length (); i++) { struct iv_group *group = data->vgroups[i]; @@ -7568,6 +7620,72 @@ determine_scaling_factor (struct ivopts_data *data, basic_block *body) } } +/* Find doloop comparison use and set its related bind_cand. We adjust the + doloop use group cost against various IV cands, it's possible to assign + some cost like zero rather than original inifite cost. The point is to + give more chances to consider other IV cands instead of BIV. The cost + originally given on doloop use can affect optimal decision because it can + become dead and get eliminated but considered too much here. + + So it's possible that doloop use is assigned one invalid IV cand to rewrite. + In this case, we need bind_cand to fix up. Even if the bind_cand doesn't + exist in final iv_ca set, it won't affect optimal decision since it gets + eliminated along with doloop use. */ + +static bool +find_doloop_use_and_its_bind (struct ivopts_data *data) +{ + struct loop *loop = data->current_loop; + + for (unsigned i = 0; i < data->vgroups.length (); i++) + { + struct iv_group *group = data->vgroups[i]; + if (group->type == USE_COMPARE) + { + gcc_assert (group->vuses.length () == 1); + struct iv_use *use = group->vuses[0]; + gimple *stmt = use->stmt; + if (gimple_code (stmt) == GIMPLE_COND) + { + basic_block bb = gimple_bb (stmt); + edge true_edge, false_edge; + extract_true_false_edges_from_block (bb, &true_edge, &false_edge); + /* This comparison is used for loop latch. Require latch is empty + for now. */ + if ((loop->latch == true_edge->dest + || loop->latch == false_edge->dest) + && empty_block_p (loop->latch)) + { + for (unsigned j = 0; j < data->vcands.length (); j++) + { + if (bitmap_bit_p (group->related_cands, j)) + { + struct iv_cand *cand = data->vcands[j]; + tree op = use->iv->ssa_name; + if (op == cand->var_before || op == cand->var_after) + { + group->bind_cand = cand; + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Doloop cmp iv use: "); + print_gimple_stmt (dump_file, stmt, + TDF_DETAILS); + dump_cand (dump_file, cand); + } + break; + } + } + } + if (group->bind_cand) + return true; + } + } + } + } + + return false; +} + /* Optimizes the LOOP. Returns true if anything changed. */ static bool @@ -7580,6 +7698,7 @@ tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop, basic_block *body; gcc_assert (!data->niters); + data->doloop_use_p = false; data->current_loop = loop; data->loop_loc = find_loop_location (loop).get_location_t (); data->speed = optimize_loop_for_speed_p (loop); @@ -7625,6 +7744,18 @@ tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop, /* Finds candidates for the induction variables (item 2). */ find_iv_candidates (data); + if (targetm.have_count_reg_decr_p () && generic_predict_doloop_p (data)) + { + data->doloop_use_p = find_doloop_use_and_its_bind (data); + if (data->doloop_use_p && dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, + "Predict loop %d can perform doloop optimization later.\n", + loop->num); + flow_loop_dump (loop, dump_file, NULL, 1); + } + } + /* Calculates the costs (item 3, part 1). */ determine_iv_costs (data); determine_group_iv_costs (data);