From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 26104 invoked by alias); 19 Dec 2014 10:20:15 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Received: (qmail 26091 invoked by uid 89); 19 Dec 2014 10:20:14 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-2.0 required=5.0 tests=AWL,BAYES_00,SPF_PASS,T_RP_MATCHES_RCVD autolearn=ham version=3.3.2 X-HELO: mga11.intel.com Received: from mga11.intel.com (HELO mga11.intel.com) (192.55.52.93) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Fri, 19 Dec 2014 10:20:11 +0000 Received: from fmsmga002.fm.intel.com ([10.253.24.26]) by fmsmga102.fm.intel.com with ESMTP; 19 Dec 2014 02:20:10 -0800 X-ExtLoop1: 1 Received: from irsmsx104.ger.corp.intel.com ([163.33.3.159]) by fmsmga002.fm.intel.com with ESMTP; 19 Dec 2014 02:20:09 -0800 Received: from irsmsx101.ger.corp.intel.com ([169.254.1.126]) by IRSMSX104.ger.corp.intel.com ([169.254.5.209]) with mapi id 14.03.0195.001; Fri, 19 Dec 2014 10:20:08 +0000 From: "Zamyatin, Igor" To: "GCC Patches (gcc-patches@gcc.gnu.org)" CC: "ysrumyan@gmail.com" Subject: [PATCH] Fix for PR64081 in RTL loop unroller Date: Fri, 19 Dec 2014 10:24:00 -0000 Message-ID: <0EFAB2BDD0F67E4FB6CCC8B9F87D756969CF7BFC@IRSMSX101.ger.corp.intel.com> Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 X-IsSubscribed: yes X-SW-Source: 2014-12/txt/msg01625.txt.bz2 Hi! This is an attempt to extend RTL unroller to allow cases like mentioned in = the PR -=20 namely when loop has duplicated exit blocks and back edges. Bootstrapped and regtested on x86_64, also checking wide range of benchmark= s - spec2K, spec2006, EEMBC Is it ok for trunk in case if no testing issues? Thanks, Igor Changelog: Gcc: 2014-12-19 Igor Zamyatin PR rtl-optimization/64081 * loop-iv.c (def_pred_latch_p): New function. (latch_dominating_def): Allow specific cases with non-single definitions. (iv_get_reaching_def): Likewise. (check_complex_exit_p): New function. (check_simple_exit): Use check_complex_exit_p to allow certain cases with exits not executing on any iteration. Testsuite: 2014-12-19 Igor Zamyatin PR rtl-optimization/64081 * gcc.dg/pr64081.c: New test. diff --git a/gcc/loop-iv.c b/gcc/loop-iv.c index f55cea2..d5d48f1 100644 --- a/gcc/loop-iv.c +++ b/gcc/loop-iv.c @@ -314,34 +314,67 @@ iv_analysis_loop_init (struct loop *loop) check_iv_ref_table_size (); } =20 +/* Checks that def is in an immediate predecessor of the latch block. */ + +static bool +def_pred_latch_p (df_ref d_ref) +{ + basic_block bb =3D DF_REF_BB (d_ref); + edge_iterator ei; + edge e; + + FOR_EACH_EDGE (e, ei, current_loop->latch->preds) + { + if (e->src =3D=3D bb) + return true; + } + return false; +} + /* Finds the definition of REG that dominates loop latch and stores it to DEF. Returns false if there is not a single definition - dominating the latch. If REG has no definition in loop, DEF + dominating the latch or all defs are same and they are on different + predecessors of loop latch. If REG has no definition in loop, DEF is set to NULL and true is returned. */ =20 static bool latch_dominating_def (rtx reg, df_ref *def) { df_ref single_rd =3D NULL, adef; - unsigned regno =3D REGNO (reg); + unsigned regno =3D REGNO (reg), def_num =3D 0; struct df_rd_bb_info *bb_info =3D DF_RD_BB_INFO (current_loop->latch); =20 for (adef =3D DF_REG_DEF_CHAIN (regno); adef; adef =3D DF_REF_NEXT_REG (= adef)) { + /* Initialize this to true for the very first iteration when + SINGLE_RD is NULL. */ + bool def_pred_latch =3D true; + if (!bitmap_bit_p (df->blocks_to_analyze, DF_REF_BBNO (adef)) || !bitmap_bit_p (&bb_info->out, DF_REF_ID (adef))) continue; =20 - /* More than one reaching definition. */ + /* More than one reaching definition is ok in case definitions are + in predecessors of latch block and those definitions are the same. + Probably this could be relaxed and check for sub-dominance instead + predecessor. */ if (single_rd) - return false; - - if (!just_once_each_iteration_p (current_loop, DF_REF_BB (adef))) - return false; + { + def_num++; + if (!(def_pred_latch =3D def_pred_latch_p (adef)) + || !rtx_equal_p( PATTERN (DF_REF_INSN (single_rd)), + PATTERN (DF_REF_INSN (adef)))) + return false; + } =20 single_rd =3D adef; } =20 + /* If we have single definition it has to be excuted on each iteration. = */ + if (!def_num && single_rd + && !just_once_each_iteration_p (current_loop, DF_REF_BB (single_rd))) + return false; + *def =3D single_rd; return true; } @@ -351,10 +384,10 @@ latch_dominating_def (rtx reg, df_ref *def) static enum iv_grd_result iv_get_reaching_def (rtx_insn *insn, rtx reg, df_ref *def) { - df_ref use, adef; + df_ref use, adef =3D NULL; basic_block def_bb, use_bb; rtx_insn *def_insn; - bool dom_p; + bool dom_p, dom_latch_p =3D false; =20 *def =3D NULL; if (!simple_reg_p (reg)) @@ -369,11 +402,26 @@ iv_get_reaching_def (rtx_insn *insn, rtx reg, df_ref = *def) if (!DF_REF_CHAIN (use)) return GRD_INVARIANT; =20 - /* More than one reaching def. */ + adef =3D DF_REF_CHAIN (use)->ref; + /* Having more than one reaching def is ok once all defs are in blocks w= hich + are latch's predecessors. */ if (DF_REF_CHAIN (use)->next) - return GRD_INVALID; + { + df_link* defs; + unsigned int def_num =3D 0; =20 - adef =3D DF_REF_CHAIN (use)->ref; + for (defs =3D DF_REF_CHAIN (use); defs; defs =3D defs->next) + { + if (!def_pred_latch_p (defs->ref)) + return GRD_INVALID; + def_num++; + } + /* Make sure all preds contain definitions. */ + if (def_num !=3D EDGE_COUNT (current_loop->latch->preds)) + return GRD_INVALID; + + dom_latch_p =3D true; + } =20 /* We do not handle setting only part of the register. */ if (DF_REF_FLAGS (adef) & DF_REF_READ_WRITE) @@ -396,8 +444,8 @@ iv_get_reaching_def (rtx_insn *insn, rtx reg, df_ref *d= ef) =20 /* The definition does not dominate the use. This is still OK if this may be a use of a biv, i.e. if the def_bb dominates loop - latch. */ - if (just_once_each_iteration_p (current_loop, def_bb)) + latch or all defs are in latch's predecessors. */ + if (dom_latch_p || just_once_each_iteration_p (current_loop, def_bb)) return GRD_MAYBE_BIV; =20 return GRD_INVALID; @@ -2910,6 +2958,49 @@ fail: return; } =20 +/* Check whether exit is not simple but still good for further analysis. + Looks like such loops mostly are a result of jump threading. */ + +static bool +check_complex_exit_p (struct loop* loop, basic_block bb) +{ + edge e; + basic_block pred, exit; + + if (EDGE_COUNT (bb->preds) > 1) + return false; + + e =3D EDGE_PRED (bb, 0); + + pred =3D e->src; + if (EDGE_COUNT (pred->succs) !=3D 2) + return false; + + /* Predecessor must be tested (at least) once during any iteration. */ + if (!dominated_by_p (CDI_DOMINATORS, loop->latch, pred)) + return false; + + if (EDGE_SUCC (pred, 0)->dest =3D=3D bb) + exit =3D EDGE_SUCC (pred, 1)->dest; + else + exit =3D EDGE_SUCC (pred, 0)->dest; + + /* Check that EXIT is really loop exit. */ + if (flow_bb_inside_loop_p (loop, exit)) + { + edge_iterator eei; + edge ee; + + FOR_EACH_EDGE (ee, eei, exit->succs) + { + if (!flow_bb_inside_loop_p (loop, ee->dest)) + return true; + } + } + return false; + +} + /* Checks whether E is a simple exit from LOOP and stores its description into DESC. */ =20 @@ -2929,7 +3020,8 @@ check_simple_exit (struct loop *loop, edge e, struct = niter_desc *desc) return; =20 /* It must be tested (at least) once during any iteration. */ - if (!dominated_by_p (CDI_DOMINATORS, loop->latch, exit_bb)) + if (!dominated_by_p (CDI_DOMINATORS, loop->latch, exit_bb) + && !check_complex_exit_p (loop, exit_bb)) return; =20 /* It must end in a simple conditional jump. */ diff --git a/gcc/testsuite/gcc.dg/pr64081.c b/gcc/testsuite/gcc.dg/pr64081.c new file mode 100644 index 0000000..6151d00 --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr64081.c @@ -0,0 +1,41 @@ +/* PR rtl-optimization/64081 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -funroll-loops -fdump-rtl-loop2_unroll" } */ + +long token; +long *data; +long *arr1; +long *arr2; + +int main (int argc, const char* argv[]) +{ + unsigned long i; + static long pos, start, count; + static int dir; + + pos =3D 0; + dir =3D 1; + + for (i =3D 0; i < argc; i++) + { + start =3D pos; + for (count =3D 0; count <=3D 400; count++) + { + if (token =3D=3D data[pos]) + break; + if (dir =3D=3D 1) + pos =3D arr1[pos]; + else + pos =3D arr2[pos]; + if (pos =3D=3D -1) + { + pos =3D start; + dir =3D !dir; + } + } + } + return pos + dir; +} + +/* { dg-final { scan-rtl-dump "loop unrolled 7 times" "loop2_unroll" } } */ +/* { dg-final { cleanup-rtl-dump "loop*" } } */