From: Christophe Lyon <christophe.lyon@linaro.org>
To: Richard Biener <rguenther@suse.de>
Cc: gcc-patches@gcc.gnu.org,
"richard.sandiford@arm.com" <richard.sandiford@arm.com>
Subject: Re: [PATCH] tree-optimization/109849 - missed code hoisting
Date: Wed, 24 May 2023 12:07:48 +0200 [thread overview]
Message-ID: <CAPS5khbo9a19byiXLhKyNL2-EY7FjDnRvqbPtWCriUBdnNY9_w@mail.gmail.com> (raw)
In-Reply-To: <20230523095533.B1C0713588@imap2.suse-dmz.suse.de>
[-- Attachment #1: Type: text/plain, Size: 6660 bytes --]
Hi Richard,
On Tue, 23 May 2023 at 11:55, Richard Biener via Gcc-patches <
gcc-patches@gcc.gnu.org> wrote:
> The following fixes code hoisting to properly consider ANTIC_OUT instead
> of ANTIC_IN. That's a bit expensive to re-compute but since we no
> longer iterate we're doing this only once per BB which should be
> acceptable. This avoids missing hoistings to the end of blocks where
> something in the block clobbers the hoisted value.
>
> Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.
>
> PR tree-optimization/109849
> * tree-ssa-pre.cc (do_hoist_insertion): Compute ANTIC_OUT
> and use that to determine what to hoist.
>
> * gcc.dg/tree-ssa/ssa-hoist-8.c: New testcase.
>
This patch causes a regression on aarch64:
gcc.target/aarch64/sve/fmla_2.c: \\tst1d found 3 times
FAIL: gcc.target/aarch64/sve/fmla_2.c scan-assembler-times \\tst1d 2
We used to generate:
mov x6, 0
mov w7, 55
whilelo p7.d, wzr, w7
.p2align 3,,7
.L2:
ld1d z30.d, p7/z, [x5, x6, lsl 3]
ld1d z31.d, p7/z, [x4, x6, lsl 3]
cmpne p6.d, p7/z, z30.d, #0
ld1d z30.d, p7/z, [x3, x6, lsl 3]
ld1d z29.d, p6/z, [x2, x6, lsl 3]
movprfx z28, z30
fmla z28.d, p6/m, z31.d, z29.d
fmla z31.d, p6/m, z30.d, z29.d
st1d z28.d, p7, [x1, x6, lsl 3]
st1d z31.d, p7, [x0, x6, lsl 3]
incd x6
whilelo p7.d, w6, w7
b.any .L2
But now:
mov x6, 0
mov w7, 55
ptrue p4.b, all
whilelo p7.d, wzr, w7
.p2align 3,,7
.L2:
ld1d z30.d, p7/z, [x5, x6, lsl 3]
ld1d z31.d, p7/z, [x4, x6, lsl 3]
cmpne p6.d, p7/z, z30.d, #0
cmpeq p5.d, p7/z, z30.d, #0
ld1d z29.d, p6/z, [x2, x6, lsl 3]
ld1d z28.d, p6/z, [x3, x6, lsl 3]
ld1d z30.d, p5/z, [x3, x6, lsl 3]
movprfx z27, z31
fmla z27.d, p4/m, z29.d, z28.d
movprfx z30.d, p6/m, z28.d
fmla z30.d, p6/m, z31.d, z29.d
st1d z27.d, p6, [x0, x6, lsl 3]
st1d z30.d, p7, [x1, x6, lsl 3]
st1d z31.d, p5, [x0, x6, lsl 3]
incd x6
whilelo p7.d, w6, w7
b.any .L2
Christophe
---
> gcc/testsuite/gcc.dg/tree-ssa/ssa-hoist-8.c | 22 ++++++++++
> gcc/tree-ssa-pre.cc | 48 ++++++++++++++++++---
> 2 files changed, 64 insertions(+), 6 deletions(-)
> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-hoist-8.c
>
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-hoist-8.c
> b/gcc/testsuite/gcc.dg/tree-ssa/ssa-hoist-8.c
> new file mode 100644
> index 00000000000..66bb48e0dc1
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-hoist-8.c
> @@ -0,0 +1,22 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-pre-stats" } */
> +
> +int mem;
> +void foo ();
> +int bar (int flag)
> +{
> + int res;
> + foo ();
> + /* Hoist the load of mem here even though foo () clobbers it. */
> + if (flag)
> + res = mem;
> + else
> + {
> + res = mem;
> + mem = 2;
> + }
> + return res;
> +}
> +
> +/* { dg-final { scan-tree-dump "HOIST inserted: 1" "pre" } } */
> +/* { dg-final { scan-tree-dump-times " = mem;" 1 "pre" } } */
> diff --git a/gcc/tree-ssa-pre.cc b/gcc/tree-ssa-pre.cc
> index 1f7eea93c16..d56431b4145 100644
> --- a/gcc/tree-ssa-pre.cc
> +++ b/gcc/tree-ssa-pre.cc
> @@ -3622,19 +3622,51 @@ do_hoist_insertion (basic_block block)
> && stmt_ends_bb_p (gsi_stmt (last)))
> return false;
>
> - /* Compute the set of hoistable expressions from ANTIC_IN. First
> compute
> + /* We have multiple successors, compute ANTIC_OUT by taking the
> intersection
> + of all of ANTIC_IN translating through PHI nodes. Note we do not
> have to
> + worry about iteration stability here so just intersect the
> expression sets
> + as well. This is a simplification of what we do in
> compute_antic_aux. */
> + bitmap_set_t ANTIC_OUT = bitmap_set_new ();
> + bool first = true;
> + FOR_EACH_EDGE (e, ei, block->succs)
> + {
> + if (first)
> + {
> + phi_translate_set (ANTIC_OUT, ANTIC_IN (e->dest), e);
> + first = false;
> + }
> + else if (!gimple_seq_empty_p (phi_nodes (e->dest)))
> + {
> + bitmap_set_t tmp = bitmap_set_new ();
> + phi_translate_set (tmp, ANTIC_IN (e->dest), e);
> + bitmap_and_into (&ANTIC_OUT->values, &tmp->values);
> + bitmap_and_into (&ANTIC_OUT->expressions, &tmp->expressions);
> + bitmap_set_free (tmp);
> + }
> + else
> + {
> + bitmap_and_into (&ANTIC_OUT->values, &ANTIC_IN
> (e->dest)->values);
> + bitmap_and_into (&ANTIC_OUT->expressions,
> + &ANTIC_IN (e->dest)->expressions);
> + }
> + }
> +
> + /* Compute the set of hoistable expressions from ANTIC_OUT. First
> compute
> hoistable values. */
> bitmap_set hoistable_set;
>
> - /* A hoistable value must be in ANTIC_IN(block)
> + /* A hoistable value must be in ANTIC_OUT(block)
> but not in AVAIL_OUT(BLOCK). */
> bitmap_initialize (&hoistable_set.values, &grand_bitmap_obstack);
> bitmap_and_compl (&hoistable_set.values,
> - &ANTIC_IN (block)->values, &AVAIL_OUT (block)->values);
> + &ANTIC_OUT->values, &AVAIL_OUT (block)->values);
>
> /* Short-cut for a common case: hoistable_set is empty. */
> if (bitmap_empty_p (&hoistable_set.values))
> - return false;
> + {
> + bitmap_set_free (ANTIC_OUT);
> + return false;
> + }
>
> /* Compute which of the hoistable values is in AVAIL_OUT of
> at least one of the successors of BLOCK. */
> @@ -3652,11 +3684,14 @@ do_hoist_insertion (basic_block block)
>
> /* Short-cut for a common case: availout_in_some is empty. */
> if (bitmap_empty_p (&availout_in_some))
> - return false;
> + {
> + bitmap_set_free (ANTIC_OUT);
> + return false;
> + }
>
> /* Hack hoitable_set in-place so we can use
> sorted_array_from_bitmap_set. */
> bitmap_move (&hoistable_set.values, &availout_in_some);
> - hoistable_set.expressions = ANTIC_IN (block)->expressions;
> + hoistable_set.expressions = ANTIC_OUT->expressions;
>
> /* Now finally construct the topological-ordered expression set. */
> vec<pre_expr> exprs = sorted_array_from_bitmap_set (&hoistable_set);
> @@ -3731,6 +3766,7 @@ do_hoist_insertion (basic_block block)
> }
>
> exprs.release ();
> + bitmap_set_free (ANTIC_OUT);
>
> return new_stuff;
> }
> --
> 2.35.3
>
next prev parent reply other threads:[~2023-05-24 10:07 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-05-23 9:55 Richard Biener
2023-05-24 10:07 ` Christophe Lyon [this message]
2023-05-24 10:41 ` Richard Biener
2023-05-24 12:45 ` Christophe Lyon
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=CAPS5khbo9a19byiXLhKyNL2-EY7FjDnRvqbPtWCriUBdnNY9_w@mail.gmail.com \
--to=christophe.lyon@linaro.org \
--cc=gcc-patches@gcc.gnu.org \
--cc=rguenther@suse.de \
--cc=richard.sandiford@arm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).