* [PATCH] rtl-optimization/96812 - remap dependence info on RTL loop unrolling
@ 2020-08-27 12:37 Richard Biener
2020-09-01 7:09 ` Richard Biener
0 siblings, 1 reply; 2+ messages in thread
From: Richard Biener @ 2020-08-27 12:37 UTC (permalink / raw)
To: gcc-patches
This carries over the PR87609 fix also to RTL loop unrolling. The
gcc.dg/torture/pr90328.c testcase otherwise is miscompiled with
the tree-ssa-address.c hunk (or alternatively with -fno-ivopts
on master). I've tried to find the correct abstraction and
adjusted two other duplicate_insn_chain users for which I do not
have testcases. There may be other insn-chain copying routines
that could be affected but hopefully most appropriately go through
CFG hooks.
Bootstrap / regtest running on x86_64-unknown-linux-gnu.
Any comments?
Thanks,
Richard.
2020-08-27 Richard Biener <rguenther@suse.de>
PR rtl-optimization/96812
* tree-ssa-address.c (copy_ref_info): Also copy dependence info.
* cfgrtl.h (duplicate_insn_chain): Adjust prototype.
* cfgrtl.c (duplicate_insn_chain): Remap dependence info
if requested.
(cfg_layout_duplicate_bb): Make sure we remap dependence info.
* modulo-sched.c (duplicate_insns_of_cycles): Remap dependence
info.
(generate_prolog_epilog): Adjust.
* config/c6x/c6x.c (hwloop_optimize): Remap dependence info.
---
gcc/cfgrtl.c | 60 ++++++++++++++++++++++++++++++++++++++----
gcc/cfgrtl.h | 3 ++-
gcc/config/c6x/c6x.c | 3 ++-
gcc/modulo-sched.c | 10 ++++---
gcc/tree-ssa-address.c | 10 +++++++
5 files changed, 75 insertions(+), 11 deletions(-)
diff --git a/gcc/cfgrtl.c b/gcc/cfgrtl.c
index 03fa688fed6..eb5ccd42ed7 100644
--- a/gcc/cfgrtl.c
+++ b/gcc/cfgrtl.c
@@ -61,6 +61,8 @@ along with GCC; see the file COPYING3. If not see
#include "cfgloop.h"
#include "tree-pass.h"
#include "print-rtl.h"
+#include "rtl-iter.h"
+#include "gimplify.h"
/* Disable warnings about missing quoting in GCC diagnostics. */
#if __GNUC__ >= 10
@@ -4199,7 +4201,8 @@ cfg_layout_can_duplicate_bb_p (const_basic_block bb)
}
rtx_insn *
-duplicate_insn_chain (rtx_insn *from, rtx_insn *to)
+duplicate_insn_chain (rtx_insn *from, rtx_insn *to,
+ class loop *loop, copy_bb_data *id)
{
rtx_insn *insn, *next, *copy;
rtx_note *last;
@@ -4228,6 +4231,51 @@ duplicate_insn_chain (rtx_insn *from, rtx_insn *to)
&& ANY_RETURN_P (JUMP_LABEL (insn)))
JUMP_LABEL (copy) = JUMP_LABEL (insn);
maybe_copy_prologue_epilogue_insn (insn, copy);
+ /* If requested remap dependence info of cliques brought in
+ via inlining. */
+ if (id)
+ {
+ subrtx_iterator::array_type array;
+ FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
+ if (MEM_P (*iter) && MEM_EXPR (*iter))
+ {
+ tree op = MEM_EXPR (*iter);
+ if (TREE_CODE (op) == WITH_SIZE_EXPR)
+ op = TREE_OPERAND (op, 0);
+ while (handled_component_p (op))
+ op = TREE_OPERAND (op, 0);
+ if ((TREE_CODE (op) == MEM_REF
+ || TREE_CODE (op) == TARGET_MEM_REF)
+ && MR_DEPENDENCE_CLIQUE (op) > 1
+ && (!loop
+ || (MR_DEPENDENCE_CLIQUE (op)
+ != loop->owned_clique)))
+ {
+ if (!id->dependence_map)
+ id->dependence_map = new hash_map<dependence_hash,
+ unsigned short>;
+ bool existed;
+ unsigned short &newc = id->dependence_map->get_or_insert
+ (MR_DEPENDENCE_CLIQUE (op), &existed);
+ if (!existed)
+ {
+ gcc_assert
+ (MR_DEPENDENCE_CLIQUE (op) <= cfun->last_clique);
+ newc = ++cfun->last_clique;
+ }
+ /* We cannot adjust MR_DEPENDENCE_CLIQUE in-place
+ since MEM_EXPR is shared so make a copy and
+ walk to the subtree again. */
+ tree new_expr = unshare_expr (MEM_EXPR (*iter));
+ if (TREE_CODE (new_expr) == WITH_SIZE_EXPR)
+ new_expr = TREE_OPERAND (new_expr, 0);
+ while (handled_component_p (new_expr))
+ new_expr = TREE_OPERAND (new_expr, 0);
+ MR_DEPENDENCE_CLIQUE (new_expr) = newc;
+ set_mem_expr (const_cast <rtx> (*iter), new_expr);
+ }
+ }
+ }
break;
case JUMP_TABLE_DATA:
@@ -4292,12 +4340,14 @@ duplicate_insn_chain (rtx_insn *from, rtx_insn *to)
/* Create a duplicate of the basic block BB. */
static basic_block
-cfg_layout_duplicate_bb (basic_block bb, copy_bb_data *)
+cfg_layout_duplicate_bb (basic_block bb, copy_bb_data *id)
{
rtx_insn *insn;
basic_block new_bb;
- insn = duplicate_insn_chain (BB_HEAD (bb), BB_END (bb));
+ class loop *loop = (id && current_loops) ? bb->loop_father : NULL;
+
+ insn = duplicate_insn_chain (BB_HEAD (bb), BB_END (bb), loop, id);
new_bb = create_basic_block (insn,
insn ? get_last_insn () : NULL,
EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
@@ -4308,7 +4358,7 @@ cfg_layout_duplicate_bb (basic_block bb, copy_bb_data *)
insn = BB_HEADER (bb);
while (NEXT_INSN (insn))
insn = NEXT_INSN (insn);
- insn = duplicate_insn_chain (BB_HEADER (bb), insn);
+ insn = duplicate_insn_chain (BB_HEADER (bb), insn, loop, id);
if (insn)
BB_HEADER (new_bb) = unlink_insn_chain (insn, get_last_insn ());
}
@@ -4318,7 +4368,7 @@ cfg_layout_duplicate_bb (basic_block bb, copy_bb_data *)
insn = BB_FOOTER (bb);
while (NEXT_INSN (insn))
insn = NEXT_INSN (insn);
- insn = duplicate_insn_chain (BB_FOOTER (bb), insn);
+ insn = duplicate_insn_chain (BB_FOOTER (bb), insn, loop, id);
if (insn)
BB_FOOTER (new_bb) = unlink_insn_chain (insn, get_last_insn ());
}
diff --git a/gcc/cfgrtl.h b/gcc/cfgrtl.h
index ae03e82650d..ae62d6cf05c 100644
--- a/gcc/cfgrtl.h
+++ b/gcc/cfgrtl.h
@@ -49,7 +49,8 @@ extern bool purge_all_dead_edges (void);
extern bool fixup_abnormal_edges (void);
extern rtx_insn *unlink_insn_chain (rtx_insn *, rtx_insn *);
extern void relink_block_chain (bool);
-extern rtx_insn *duplicate_insn_chain (rtx_insn *, rtx_insn *);
+extern rtx_insn *duplicate_insn_chain (rtx_insn *, rtx_insn *,
+ class loop *, class copy_bb_data *);
extern void cfg_layout_initialize (int);
extern void cfg_layout_finalize (void);
extern void break_superblocks (void);
diff --git a/gcc/config/c6x/c6x.c b/gcc/config/c6x/c6x.c
index 6bd793bdd21..9aa7ef0620c 100644
--- a/gcc/config/c6x/c6x.c
+++ b/gcc/config/c6x/c6x.c
@@ -5600,7 +5600,8 @@ hwloop_optimize (hwloop_info loop)
int j;
rtx_insn *this_iter;
- this_iter = duplicate_insn_chain (head_insn, tail_insn);
+ copy_bb_data id;
+ this_iter = duplicate_insn_chain (head_insn, tail_insn, NULL, &id);
j = 0;
while (this_iter)
{
diff --git a/gcc/modulo-sched.c b/gcc/modulo-sched.c
index 86701e05550..6f699a874e3 100644
--- a/gcc/modulo-sched.c
+++ b/gcc/modulo-sched.c
@@ -1085,10 +1085,11 @@ optimize_sc (partial_schedule_ptr ps, ddg_ptr g)
static void
duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
- int to_stage, rtx count_reg)
+ int to_stage, rtx count_reg, class loop *loop)
{
int row;
ps_insn_ptr ps_ij;
+ copy_bb_data id;
for (row = 0; row < ps->ii; row++)
for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
@@ -1113,7 +1114,8 @@ duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
if (from_stage <= last_u && to_stage >= first_u)
{
if (u < ps->g->num_nodes)
- duplicate_insn_chain (ps_first_note (ps, u), u_insn);
+ duplicate_insn_chain (ps_first_note (ps, u), u_insn,
+ loop, &id);
else
emit_insn (copy_rtx (PATTERN (u_insn)));
}
@@ -1151,7 +1153,7 @@ generate_prolog_epilog (partial_schedule_ptr ps, class loop *loop,
}
for (i = 0; i < last_stage; i++)
- duplicate_insns_of_cycles (ps, 0, i, count_reg);
+ duplicate_insns_of_cycles (ps, 0, i, count_reg, loop);
/* Put the prolog on the entry edge. */
e = loop_preheader_edge (loop);
@@ -1165,7 +1167,7 @@ generate_prolog_epilog (partial_schedule_ptr ps, class loop *loop,
start_sequence ();
for (i = 0; i < last_stage; i++)
- duplicate_insns_of_cycles (ps, i + 1, last_stage, count_reg);
+ duplicate_insns_of_cycles (ps, i + 1, last_stage, count_reg, loop);
/* Put the epilogue on the exit edge. */
gcc_assert (single_exit (loop));
diff --git a/gcc/tree-ssa-address.c b/gcc/tree-ssa-address.c
index 93eae192212..baf495a00bc 100644
--- a/gcc/tree-ssa-address.c
+++ b/gcc/tree-ssa-address.c
@@ -1044,6 +1044,16 @@ copy_ref_info (tree new_ref, tree old_ref)
}
}
+ /* We can transfer dependence info. */
+ if (!MR_DEPENDENCE_CLIQUE (new_ref)
+ && (TREE_CODE (base) == MEM_REF
+ || TREE_CODE (base) == TARGET_MEM_REF)
+ && MR_DEPENDENCE_CLIQUE (base))
+ {
+ MR_DEPENDENCE_CLIQUE (new_ref) = MR_DEPENDENCE_CLIQUE (base);
+ MR_DEPENDENCE_BASE (new_ref) = MR_DEPENDENCE_BASE (base);
+ }
+
/* And alignment info. Note we cannot transfer misalignment info
since that sits on the SSA name but this is flow-sensitive info
which we cannot transfer in this generic routine. */
--
2.26.2
^ permalink raw reply [flat|nested] 2+ messages in thread
* Re: [PATCH] rtl-optimization/96812 - remap dependence info on RTL loop unrolling
2020-08-27 12:37 [PATCH] rtl-optimization/96812 - remap dependence info on RTL loop unrolling Richard Biener
@ 2020-09-01 7:09 ` Richard Biener
0 siblings, 0 replies; 2+ messages in thread
From: Richard Biener @ 2020-09-01 7:09 UTC (permalink / raw)
To: gcc-patches
On Thu, 27 Aug 2020, Richard Biener wrote:
> This carries over the PR87609 fix also to RTL loop unrolling. The
> gcc.dg/torture/pr90328.c testcase otherwise is miscompiled with
> the tree-ssa-address.c hunk (or alternatively with -fno-ivopts
> on master). I've tried to find the correct abstraction and
> adjusted two other duplicate_insn_chain users for which I do not
> have testcases. There may be other insn-chain copying routines
> that could be affected but hopefully most appropriately go through
> CFG hooks.
>
> Bootstrap / regtest running on x86_64-unknown-linux-gnu.
>
> Any comments?
None, so I'll push this later today after re-boostrap/test.
Richard.
> Thanks,
> Richard.
>
> 2020-08-27 Richard Biener <rguenther@suse.de>
>
> PR rtl-optimization/96812
> * tree-ssa-address.c (copy_ref_info): Also copy dependence info.
> * cfgrtl.h (duplicate_insn_chain): Adjust prototype.
> * cfgrtl.c (duplicate_insn_chain): Remap dependence info
> if requested.
> (cfg_layout_duplicate_bb): Make sure we remap dependence info.
> * modulo-sched.c (duplicate_insns_of_cycles): Remap dependence
> info.
> (generate_prolog_epilog): Adjust.
> * config/c6x/c6x.c (hwloop_optimize): Remap dependence info.
> ---
> gcc/cfgrtl.c | 60 ++++++++++++++++++++++++++++++++++++++----
> gcc/cfgrtl.h | 3 ++-
> gcc/config/c6x/c6x.c | 3 ++-
> gcc/modulo-sched.c | 10 ++++---
> gcc/tree-ssa-address.c | 10 +++++++
> 5 files changed, 75 insertions(+), 11 deletions(-)
>
> diff --git a/gcc/cfgrtl.c b/gcc/cfgrtl.c
> index 03fa688fed6..eb5ccd42ed7 100644
> --- a/gcc/cfgrtl.c
> +++ b/gcc/cfgrtl.c
> @@ -61,6 +61,8 @@ along with GCC; see the file COPYING3. If not see
> #include "cfgloop.h"
> #include "tree-pass.h"
> #include "print-rtl.h"
> +#include "rtl-iter.h"
> +#include "gimplify.h"
>
> /* Disable warnings about missing quoting in GCC diagnostics. */
> #if __GNUC__ >= 10
> @@ -4199,7 +4201,8 @@ cfg_layout_can_duplicate_bb_p (const_basic_block bb)
> }
>
> rtx_insn *
> -duplicate_insn_chain (rtx_insn *from, rtx_insn *to)
> +duplicate_insn_chain (rtx_insn *from, rtx_insn *to,
> + class loop *loop, copy_bb_data *id)
> {
> rtx_insn *insn, *next, *copy;
> rtx_note *last;
> @@ -4228,6 +4231,51 @@ duplicate_insn_chain (rtx_insn *from, rtx_insn *to)
> && ANY_RETURN_P (JUMP_LABEL (insn)))
> JUMP_LABEL (copy) = JUMP_LABEL (insn);
> maybe_copy_prologue_epilogue_insn (insn, copy);
> + /* If requested remap dependence info of cliques brought in
> + via inlining. */
> + if (id)
> + {
> + subrtx_iterator::array_type array;
> + FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
> + if (MEM_P (*iter) && MEM_EXPR (*iter))
> + {
> + tree op = MEM_EXPR (*iter);
> + if (TREE_CODE (op) == WITH_SIZE_EXPR)
> + op = TREE_OPERAND (op, 0);
> + while (handled_component_p (op))
> + op = TREE_OPERAND (op, 0);
> + if ((TREE_CODE (op) == MEM_REF
> + || TREE_CODE (op) == TARGET_MEM_REF)
> + && MR_DEPENDENCE_CLIQUE (op) > 1
> + && (!loop
> + || (MR_DEPENDENCE_CLIQUE (op)
> + != loop->owned_clique)))
> + {
> + if (!id->dependence_map)
> + id->dependence_map = new hash_map<dependence_hash,
> + unsigned short>;
> + bool existed;
> + unsigned short &newc = id->dependence_map->get_or_insert
> + (MR_DEPENDENCE_CLIQUE (op), &existed);
> + if (!existed)
> + {
> + gcc_assert
> + (MR_DEPENDENCE_CLIQUE (op) <= cfun->last_clique);
> + newc = ++cfun->last_clique;
> + }
> + /* We cannot adjust MR_DEPENDENCE_CLIQUE in-place
> + since MEM_EXPR is shared so make a copy and
> + walk to the subtree again. */
> + tree new_expr = unshare_expr (MEM_EXPR (*iter));
> + if (TREE_CODE (new_expr) == WITH_SIZE_EXPR)
> + new_expr = TREE_OPERAND (new_expr, 0);
> + while (handled_component_p (new_expr))
> + new_expr = TREE_OPERAND (new_expr, 0);
> + MR_DEPENDENCE_CLIQUE (new_expr) = newc;
> + set_mem_expr (const_cast <rtx> (*iter), new_expr);
> + }
> + }
> + }
> break;
>
> case JUMP_TABLE_DATA:
> @@ -4292,12 +4340,14 @@ duplicate_insn_chain (rtx_insn *from, rtx_insn *to)
> /* Create a duplicate of the basic block BB. */
>
> static basic_block
> -cfg_layout_duplicate_bb (basic_block bb, copy_bb_data *)
> +cfg_layout_duplicate_bb (basic_block bb, copy_bb_data *id)
> {
> rtx_insn *insn;
> basic_block new_bb;
>
> - insn = duplicate_insn_chain (BB_HEAD (bb), BB_END (bb));
> + class loop *loop = (id && current_loops) ? bb->loop_father : NULL;
> +
> + insn = duplicate_insn_chain (BB_HEAD (bb), BB_END (bb), loop, id);
> new_bb = create_basic_block (insn,
> insn ? get_last_insn () : NULL,
> EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
> @@ -4308,7 +4358,7 @@ cfg_layout_duplicate_bb (basic_block bb, copy_bb_data *)
> insn = BB_HEADER (bb);
> while (NEXT_INSN (insn))
> insn = NEXT_INSN (insn);
> - insn = duplicate_insn_chain (BB_HEADER (bb), insn);
> + insn = duplicate_insn_chain (BB_HEADER (bb), insn, loop, id);
> if (insn)
> BB_HEADER (new_bb) = unlink_insn_chain (insn, get_last_insn ());
> }
> @@ -4318,7 +4368,7 @@ cfg_layout_duplicate_bb (basic_block bb, copy_bb_data *)
> insn = BB_FOOTER (bb);
> while (NEXT_INSN (insn))
> insn = NEXT_INSN (insn);
> - insn = duplicate_insn_chain (BB_FOOTER (bb), insn);
> + insn = duplicate_insn_chain (BB_FOOTER (bb), insn, loop, id);
> if (insn)
> BB_FOOTER (new_bb) = unlink_insn_chain (insn, get_last_insn ());
> }
> diff --git a/gcc/cfgrtl.h b/gcc/cfgrtl.h
> index ae03e82650d..ae62d6cf05c 100644
> --- a/gcc/cfgrtl.h
> +++ b/gcc/cfgrtl.h
> @@ -49,7 +49,8 @@ extern bool purge_all_dead_edges (void);
> extern bool fixup_abnormal_edges (void);
> extern rtx_insn *unlink_insn_chain (rtx_insn *, rtx_insn *);
> extern void relink_block_chain (bool);
> -extern rtx_insn *duplicate_insn_chain (rtx_insn *, rtx_insn *);
> +extern rtx_insn *duplicate_insn_chain (rtx_insn *, rtx_insn *,
> + class loop *, class copy_bb_data *);
> extern void cfg_layout_initialize (int);
> extern void cfg_layout_finalize (void);
> extern void break_superblocks (void);
> diff --git a/gcc/config/c6x/c6x.c b/gcc/config/c6x/c6x.c
> index 6bd793bdd21..9aa7ef0620c 100644
> --- a/gcc/config/c6x/c6x.c
> +++ b/gcc/config/c6x/c6x.c
> @@ -5600,7 +5600,8 @@ hwloop_optimize (hwloop_info loop)
> int j;
> rtx_insn *this_iter;
>
> - this_iter = duplicate_insn_chain (head_insn, tail_insn);
> + copy_bb_data id;
> + this_iter = duplicate_insn_chain (head_insn, tail_insn, NULL, &id);
> j = 0;
> while (this_iter)
> {
> diff --git a/gcc/modulo-sched.c b/gcc/modulo-sched.c
> index 86701e05550..6f699a874e3 100644
> --- a/gcc/modulo-sched.c
> +++ b/gcc/modulo-sched.c
> @@ -1085,10 +1085,11 @@ optimize_sc (partial_schedule_ptr ps, ddg_ptr g)
>
> static void
> duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
> - int to_stage, rtx count_reg)
> + int to_stage, rtx count_reg, class loop *loop)
> {
> int row;
> ps_insn_ptr ps_ij;
> + copy_bb_data id;
>
> for (row = 0; row < ps->ii; row++)
> for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
> @@ -1113,7 +1114,8 @@ duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
> if (from_stage <= last_u && to_stage >= first_u)
> {
> if (u < ps->g->num_nodes)
> - duplicate_insn_chain (ps_first_note (ps, u), u_insn);
> + duplicate_insn_chain (ps_first_note (ps, u), u_insn,
> + loop, &id);
> else
> emit_insn (copy_rtx (PATTERN (u_insn)));
> }
> @@ -1151,7 +1153,7 @@ generate_prolog_epilog (partial_schedule_ptr ps, class loop *loop,
> }
>
> for (i = 0; i < last_stage; i++)
> - duplicate_insns_of_cycles (ps, 0, i, count_reg);
> + duplicate_insns_of_cycles (ps, 0, i, count_reg, loop);
>
> /* Put the prolog on the entry edge. */
> e = loop_preheader_edge (loop);
> @@ -1165,7 +1167,7 @@ generate_prolog_epilog (partial_schedule_ptr ps, class loop *loop,
> start_sequence ();
>
> for (i = 0; i < last_stage; i++)
> - duplicate_insns_of_cycles (ps, i + 1, last_stage, count_reg);
> + duplicate_insns_of_cycles (ps, i + 1, last_stage, count_reg, loop);
>
> /* Put the epilogue on the exit edge. */
> gcc_assert (single_exit (loop));
> diff --git a/gcc/tree-ssa-address.c b/gcc/tree-ssa-address.c
> index 93eae192212..baf495a00bc 100644
> --- a/gcc/tree-ssa-address.c
> +++ b/gcc/tree-ssa-address.c
> @@ -1044,6 +1044,16 @@ copy_ref_info (tree new_ref, tree old_ref)
> }
> }
>
> + /* We can transfer dependence info. */
> + if (!MR_DEPENDENCE_CLIQUE (new_ref)
> + && (TREE_CODE (base) == MEM_REF
> + || TREE_CODE (base) == TARGET_MEM_REF)
> + && MR_DEPENDENCE_CLIQUE (base))
> + {
> + MR_DEPENDENCE_CLIQUE (new_ref) = MR_DEPENDENCE_CLIQUE (base);
> + MR_DEPENDENCE_BASE (new_ref) = MR_DEPENDENCE_BASE (base);
> + }
> +
> /* And alignment info. Note we cannot transfer misalignment info
> since that sits on the SSA name but this is flow-sensitive info
> which we cannot transfer in this generic routine. */
>
--
Richard Biener <rguenther@suse.de>
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2020-09-01 7:09 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-08-27 12:37 [PATCH] rtl-optimization/96812 - remap dependence info on RTL loop unrolling Richard Biener
2020-09-01 7:09 ` Richard Biener
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).