From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 2136) id 904A7385840B; Tue, 5 Oct 2021 16:25:32 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 904A7385840B MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset="utf-8" From: Aldy Hernandez To: gcc-cvs@gcc.gnu.org Subject: [gcc r12-4195] Loosen loop crossing restriction in threader. X-Act-Checkin: gcc X-Git-Author: Aldy Hernandez X-Git-Refname: refs/heads/master X-Git-Oldrev: 1f51e9af7b615838424214e6aaea0de793cb10fe X-Git-Newrev: ec0124e0acb556cdf5dba0e8d0ca6b69d9537fcc Message-Id: <20211005162532.904A7385840B@sourceware.org> Date: Tue, 5 Oct 2021 16:25:32 +0000 (GMT) X-BeenThere: gcc-cvs@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-cvs mailing list List-Unsubscribe: , List-Archive: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 05 Oct 2021 16:25:32 -0000 https://gcc.gnu.org/g:ec0124e0acb556cdf5dba0e8d0ca6b69d9537fcc commit r12-4195-gec0124e0acb556cdf5dba0e8d0ca6b69d9537fcc Author: Aldy Hernandez Date: Tue Oct 5 15:03:34 2021 +0200 Loosen loop crossing restriction in threader. Crossing loops is generally discouraged from the threader, but we can make an exception when we don't cross the latch or enter another loop, since this is just an early exit out of the loop. In fact, the whole threaded path is logically outside the loop. This has nice secondary effects. For example, objects on the threaded path will no longer necessarily be live throughout the loop, so we can get register allocation improvements. The threaded path can physically move outside the loop resulting in better icache efficiency, etc. Tested on x86-64 Linux, and on a visium-elf cross making sure that the following tests do not have an abort in the final assembly: gcc.c-torture/execute/960218-1.c gcc.c-torture/execute/visium-pending-4.c gcc.c-torture/execute/pr58209.c gcc/ChangeLog: * tree-ssa-threadupdate.c (jt_path_registry::cancel_invalid_paths): Loosen restrictions gcc/testsuite/ChangeLog: * gcc.dg/tree-ssa/ssa-thread-valid.c: New test. Diff: --- gcc/testsuite/gcc.dg/tree-ssa/ssa-thread-valid.c | 39 +++++++++++++++++++++++ gcc/tree-ssa-threadupdate.c | 40 +++++++++++++++++------- 2 files changed, 68 insertions(+), 11 deletions(-) diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-thread-valid.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-thread-valid.c new file mode 100644 index 00000000000..7adca97cc2b --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-thread-valid.c @@ -0,0 +1,39 @@ +// { dg-do compile } +// { dg-options "-O2 -fgimple -fdump-statistics" } + +// This is a collection of threadable paths. To simplify maintenance, +// there should only be one threadable path per function. + +int global; + +// The thread from 3->4->5 crosses loops but is allowed because it +// never crosses the latch (BB3) and is just an early exit out of the +// loop. +int __GIMPLE (ssa) +foo1 (int x) +{ + int D_1420; + int a; + + __BB(2): + a_4 = ~x_3(D); + goto __BB4; + + // Latch. + __BB(3): + global = a_1; + goto __BB4; + + __BB(4,loop_header(1)): + a_1 = __PHI (__BB2: a_4, __BB3: 0); + if (a_1 != 0) + goto __BB3; + else + goto __BB5; + + __BB(5): + return; + +} + +// { dg-final { scan-tree-dump "Jumps threaded\" \"foo1\" 1" "statistics" } } diff --git a/gcc/tree-ssa-threadupdate.c b/gcc/tree-ssa-threadupdate.c index dcabfdb30d2..32ce1e3af40 100644 --- a/gcc/tree-ssa-threadupdate.c +++ b/gcc/tree-ssa-threadupdate.c @@ -2766,10 +2766,17 @@ bool jt_path_registry::cancel_invalid_paths (vec &path) { gcc_checking_assert (!path.is_empty ()); - edge taken_edge = path[path.length () - 1]->e; - loop_p loop = taken_edge->src->loop_father; + edge entry = path[0]->e; + edge exit = path[path.length () - 1]->e; bool seen_latch = false; - bool path_crosses_loops = false; + int loops_crossed = 0; + bool crossed_latch = false; + // Use ->dest here instead of ->src to ignore the first block. The + // first block is allowed to be in a different loop, since it'll be + // redirected. See similar comment in profitable_path_p: "we don't + // care about that block...". + loop_p loop = entry->dest->loop_father; + loop_p curr_loop = loop; for (unsigned int i = 0; i < path.length (); i++) { @@ -2784,19 +2791,30 @@ jt_path_registry::cancel_invalid_paths (vec &path) } if (loop->latch == e->src || loop->latch == e->dest) - seen_latch = true; + { + seen_latch = true; + // Like seen_latch, but excludes the first block. + if (e->src != entry->src) + crossed_latch = true; + } - // The first entry represents the block with an outgoing edge - // that we will redirect to the jump threading path. Thus we - // don't care about that block's loop father. - if ((i > 0 && e->src->loop_father != loop) - || e->dest->loop_father != loop) - path_crosses_loops = true; + if (e->dest->loop_father != curr_loop) + { + curr_loop = e->dest->loop_father; + ++loops_crossed; + } if (flag_checking && !m_backedge_threads) gcc_assert ((path[i]->e->flags & EDGE_DFS_BACK) == 0); } + // If we crossed a loop into an outer loop without crossing the + // latch, this is just an early exit from the loop. + if (loops_crossed == 1 + && !crossed_latch + && flow_loop_nested_p (exit->dest->loop_father, exit->src->loop_father)) + return false; + if (cfun->curr_properties & PROP_loop_opts_done) return false; @@ -2806,7 +2824,7 @@ jt_path_registry::cancel_invalid_paths (vec &path) "would create non-empty latch"); return true; } - if (path_crosses_loops) + if (loops_crossed) { cancel_thread (&path, "Path crosses loops"); return true;