* [PATCH] DSE: Enhance dse with def-ref analysis
@ 2022-09-22 7:06 juzhe.zhong
2022-09-22 7:32 ` Richard Biener
0 siblings, 1 reply; 10+ messages in thread
From: juzhe.zhong @ 2022-09-22 7:06 UTC (permalink / raw)
To: gcc-patches; +Cc: richard.sandiford, rguenther, Ju-Zhe Zhong
From: Ju-Zhe Zhong <juzhe.zhong@rivai.ai>
This patch fix issue: PR 99407
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99407
The enhancement implementation is simple:
1.Search gimple statement in program reverse order.
2.Queue the store statement which may be possible kill the def
of previous store statement.
3.Perform dse_def_ref_analysis to remove stores will not kill
any def.
For example:
a[i_18] = _5;
...
foo (&a);
a[i_18] = _7;
a[i_18] = _7 is queued at the begining and will be removed
in dse_def_ref_analysis.
4.Remove the store if the def is confirmed to be killed.
I have fully tested it in RISC-V foundation downstream port (RVV):
https://github.com/riscv-collab/riscv-gcc/tree/riscv-gcc-rvv-next
Are you willing to review this patch and test it in ARM/x86?
gcc/ChangeLog:
* tree-ssa-dse.cc (dse_search_def_stores): New function.
(dse_can_def_ref_p): Ditto.
(dse_def_ref_analysis): Add a new argument.
(dse_optimize_stmt): Pass through stores_queue.
(pass_dse::execute): Add dse_def_ref_analysis and stores_queue.
gcc/testsuite/ChangeLog:
* gcc.dg/tree-ssa/pr99407.c: New test.
---
gcc/testsuite/gcc.dg/tree-ssa/pr99407.c | 30 ++++
gcc/tree-ssa-dse.cc | 209 +++++++++++++++++++++++-
2 files changed, 236 insertions(+), 3 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr99407.c
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr99407.c b/gcc/testsuite/gcc.dg/tree-ssa/pr99407.c
new file mode 100644
index 00000000000..57cea77da7c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr99407.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-dse1-details" } */
+typedef float real_t;
+
+#define iterations 100000
+#define LEN_1D 32000
+#define LEN_2D 256
+real_t flat_2d_array[LEN_2D*LEN_2D];
+
+real_t x[LEN_1D];
+
+real_t a[LEN_1D],b[LEN_1D],c[LEN_1D],d[LEN_1D],e[LEN_1D],
+bb[LEN_2D][LEN_2D],cc[LEN_2D][LEN_2D],tt[LEN_2D][LEN_2D];
+
+int indx[LEN_1D];
+
+real_t* __restrict__ xx;
+real_t* yy;
+real_t s243(void)
+{
+ for (int nl = 0; nl < iterations; nl++) {
+ for (int i = 0; i < LEN_1D-1; i++) {
+ a[i] = b[i] + c[i ] * d[i];
+ b[i] = a[i] + d[i ] * e[i];
+ a[i] = b[i] + a[i+1] * d[i];
+ }
+ }
+}
+
+/* { dg-final { scan-tree-dump "Deleted dead store" "dse1" } } */
\ No newline at end of file
diff --git a/gcc/tree-ssa-dse.cc b/gcc/tree-ssa-dse.cc
index 34cfd1a8802..a8ca3672da2 100644
--- a/gcc/tree-ssa-dse.cc
+++ b/gcc/tree-ssa-dse.cc
@@ -1332,6 +1332,186 @@ dse_optimize_call (gimple_stmt_iterator *gsi, sbitmap live_bytes)
return true;
}
+/* Search the stores_queue to see whether there is a store has a same vdef
+ as the stmt. */
+
+static bool
+dse_search_def_stores (function *fun, auto_vec<gimple *> &stores_queue,
+ gimple *stmt)
+{
+ /* Consider the following sequcence:
+ a[i_18] = _5;
+ _8 = e[i_18];
+ _9 = _3 * _8;
+ _10 = _5 + _9;
+ b[i_18] = _10;
+ _12 = i_18 + 1;
+ _13 = a[_12];
+ _15 = _3 * _13;
+ _16 = _10 + _15;
+ a[i_18] = _16
+
+ We should be able to remove a[i_18] = _5. */
+ for (unsigned int i = 0; i < stores_queue.length (); ++i)
+ {
+ if (!stores_queue[i])
+ continue;
+ tree lhs1 = gimple_assign_lhs (stores_queue[i]);
+ tree lhs2 = gimple_assign_lhs (stmt);
+
+ if (TREE_CODE (lhs1) != TREE_CODE (lhs2))
+ continue;
+ if (operand_equal_p (gimple_assign_lhs (stores_queue[i]),
+ gimple_assign_lhs (stmt), OEP_ADDRESS_OF))
+ {
+ /* No matter it can be eliminated or not, remove it
+ in the worklist. */
+ stores_queue[i] = NULL;
+ if (gimple_assign_single_p (stmt) && !gimple_has_side_effects (stmt)
+ && !is_ctrl_altering_stmt (stmt)
+ && (!stmt_could_throw_p (fun, stmt)
+ || fun->can_delete_dead_exceptions))
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/* Return true if the TREE_CODE of the mem op is allowed to do dse
+ according to def-ref analysis. */
+
+static bool
+dse_can_def_ref_p (gimple *stmt)
+{
+ /*TODO: For now, we only support dse according to
+ def-ref analysis for ARRAY_REF. */
+ return TREE_CODE (gimple_assign_lhs (stmt)) == ARRAY_REF;
+}
+
+/* Perform def-ref analysis on all the stores of stores_queue worklist.
+ Since dse is running on reverse program order walk, the stores in
+ stores_queue are always after stmt, clear the store in the stores_queue
+ if the address of store lhs is changed or the lhs of store is used
+ in stmt. */
+
+static void
+dse_def_ref_analysis (gimple *stmt, auto_vec<gimple *> &stores_queue)
+{
+ for (unsigned int i = 0; i < stores_queue.length (); ++i)
+ {
+ if (!stores_queue[i])
+ continue;
+
+ /* If we meet non-call or non-assign statement, we disable the possible
+ * dse. */
+ if (gimple_code (stmt) != GIMPLE_CALL
+ && gimple_code (stmt) != GIMPLE_ASSIGN)
+ {
+ stores_queue[i] = NULL;
+ continue;
+ }
+
+ tree lhs = gimple_get_lhs (stores_queue[i]);
+ if (!lhs)
+ continue;
+ switch (TREE_CODE (lhs))
+ {
+ /* Handle the array like a[i_18]:
+ 1.if i_18 is changed in stmt which means lhs of stmt == i_18,
+ we remove the store from the stores_queue.
+
+ 2.a or a[i_18] is used in stmt, we remove the store from the
+ stores_queue. */
+ case ARRAY_REF: {
+ if (gimple_get_lhs (stmt)
+ && operand_equal_p (gimple_get_lhs (stmt),
+ TREE_OPERAND (lhs, 1), 0))
+ {
+ stores_queue[i] = NULL;
+ continue;
+ }
+
+ for (unsigned int j = 0; j < gimple_num_ops (stmt); ++j)
+ {
+ tree op = gimple_op (stmt, j);
+ if (!op)
+ continue;
+
+ /* We only check the use op. */
+ if (op == gimple_get_lhs (stmt))
+ continue;
+
+ if (TREE_OPERAND_LENGTH (op) == 0)
+ {
+ if (operand_equal_p (op, lhs, 0)
+ || operand_equal_p (op, TREE_OPERAND (lhs, 0), 0))
+ stores_queue[i] = NULL;
+ }
+ else
+ {
+ for (int k = 0; k < TREE_OPERAND_LENGTH (op); ++k)
+ {
+ if (!TREE_OPERAND (op, k))
+ continue;
+
+ if (TREE_CODE (op) == ARRAY_REF)
+ {
+ /*
+ Disable optimization like this:
+ a[i_18] = _5;
+ ...
+ foo (a[i_18]).
+
+ Don't disable optimization like this:
+ a[i_18] = _5;
+ ...
+ foo (a[i_12]).
+ */
+ if (operand_equal_p (op, lhs, OEP_ADDRESS_OF))
+ {
+ stores_queue[i] = NULL;
+ break;
+ }
+ }
+ else
+ {
+ /* To be conservative, if TREE_OPERAND (op, k)
+ length > 1. Disable the possible dse
+ optimization.
+ Disable optimization like this:
+ a[i_18] = _5;
+ ...
+ foo (&a).
+
+ Or
+ a[i_18] = _5;
+ ...
+ foo (test (&a)).
+ */
+ if (TREE_OPERAND_LENGTH (TREE_OPERAND (op, k)) > 0
+ || operand_equal_p (TREE_OPERAND (op, k), lhs,
+ 0)
+ || operand_equal_p (TREE_OPERAND (op, k),
+ TREE_OPERAND (lhs, 0), 0))
+ {
+ stores_queue[i] = NULL;
+ break;
+ }
+ }
+ }
+ }
+ if (!stores_queue[i])
+ break;
+ }
+ }
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ }
+}
+
/* Attempt to eliminate dead stores in the statement referenced by BSI.
A dead store is a store into a memory location which will later be
@@ -1344,7 +1524,8 @@ dse_optimize_call (gimple_stmt_iterator *gsi, sbitmap live_bytes)
post dominates the first store, then the first store is dead. */
static void
-dse_optimize_stmt (function *fun, gimple_stmt_iterator *gsi, sbitmap live_bytes)
+dse_optimize_stmt (function *fun, gimple_stmt_iterator *gsi, sbitmap live_bytes,
+ auto_vec<gimple *> &stores_queue)
{
gimple *stmt = gsi_stmt (*gsi);
@@ -1469,7 +1650,25 @@ dse_optimize_stmt (function *fun, gimple_stmt_iterator *gsi, sbitmap live_bytes)
byte_tracking_enabled,
live_bytes, &by_clobber_p);
if (store_status == DSE_STORE_LIVE)
- return;
+ {
+ if (gimple_assign_single_p (stmt) && !gimple_has_side_effects (stmt)
+ && !is_ctrl_altering_stmt (stmt)
+ && (!stmt_could_throw_p (fun, stmt)
+ || fun->can_delete_dead_exceptions))
+ {
+ if (dse_search_def_stores (fun, stores_queue, stmt))
+ ;
+ else if (dse_can_def_ref_p (stmt))
+ {
+ stores_queue.safe_push (stmt);
+ return;
+ }
+ else
+ return;
+ }
+ else
+ return;
+ }
if (store_status == DSE_STORE_MAYBE_PARTIAL_DEAD)
{
@@ -1566,13 +1765,17 @@ pass_dse::execute (function *fun)
{
basic_block bb = BASIC_BLOCK_FOR_FN (fun, rpo[i-1]);
gimple_stmt_iterator gsi;
+ /* Queue the stores which potentially updates mem of a previous
+ redundant store. We only do the optimization within a basic block. */
+ auto_vec<gimple *> stores_queue;
for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi);)
{
gimple *stmt = gsi_stmt (gsi);
+ dse_def_ref_analysis (stmt, stores_queue);
if (gimple_vdef (stmt))
- dse_optimize_stmt (fun, &gsi, live_bytes);
+ dse_optimize_stmt (fun, &gsi, live_bytes, stores_queue);
else if (def_operand_p
def_p = single_ssa_def_operand (stmt, SSA_OP_DEF))
{
--
2.36.1
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] DSE: Enhance dse with def-ref analysis
2022-09-22 7:06 [PATCH] DSE: Enhance dse with def-ref analysis juzhe.zhong
@ 2022-09-22 7:32 ` Richard Biener
2022-09-22 7:40 ` juzhe.zhong
2022-09-22 7:44 ` Richard Biener
0 siblings, 2 replies; 10+ messages in thread
From: Richard Biener @ 2022-09-22 7:32 UTC (permalink / raw)
To: Ju-Zhe Zhong; +Cc: gcc-patches, richard.sandiford
On Thu, 22 Sep 2022, juzhe.zhong@rivai.ai wrote:
> From: Ju-Zhe Zhong <juzhe.zhong@rivai.ai>
>
> This patch fix issue: PR 99407
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99407
>
> The enhancement implementation is simple:
> 1.Search gimple statement in program reverse order.
> 2.Queue the store statement which may be possible kill the def
> of previous store statement.
> 3.Perform dse_def_ref_analysis to remove stores will not kill
> any def.
> For example:
> a[i_18] = _5;
> ...
> foo (&a);
> a[i_18] = _7;
>
> a[i_18] = _7 is queued at the begining and will be removed
> in dse_def_ref_analysis.
> 4.Remove the store if the def is confirmed to be killed.
But we already do the very same thing in dse_classify_store, I fail
to see why we need to have an alternate implementation? It also
seems to be quadratic in the size of a basic-block?
The issue with dse_classify_store is that it relies on
ref_maybe_used_by_stmt_p but that doesn't handle
a[i] = ..;
.. = a[i+1];
but when seeing a[_1] vs. a[_2] (two variable offsets), it gives
up, asserting may-aliasing. We do have infrastructure to catch
such cases with data reference analysis. If we want to catch
these cases we should use that instead. Given we have a
DSE/DCE pass pair right before loop optimizations we could even
move those inside of the loop pipeline and perform this more
expensive checks conditional on loop/scev availability.
Richard.
> I have fully tested it in RISC-V foundation downstream port (RVV):
> https://github.com/riscv-collab/riscv-gcc/tree/riscv-gcc-rvv-next
>
> Are you willing to review this patch and test it in ARM/x86?
>
> gcc/ChangeLog:
>
> * tree-ssa-dse.cc (dse_search_def_stores): New function.
> (dse_can_def_ref_p): Ditto.
> (dse_def_ref_analysis): Add a new argument.
> (dse_optimize_stmt): Pass through stores_queue.
> (pass_dse::execute): Add dse_def_ref_analysis and stores_queue.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.dg/tree-ssa/pr99407.c: New test.
>
> ---
> gcc/testsuite/gcc.dg/tree-ssa/pr99407.c | 30 ++++
> gcc/tree-ssa-dse.cc | 209 +++++++++++++++++++++++-
> 2 files changed, 236 insertions(+), 3 deletions(-)
> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr99407.c
>
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr99407.c b/gcc/testsuite/gcc.dg/tree-ssa/pr99407.c
> new file mode 100644
> index 00000000000..57cea77da7c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr99407.c
> @@ -0,0 +1,30 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-dse1-details" } */
> +typedef float real_t;
> +
> +#define iterations 100000
> +#define LEN_1D 32000
> +#define LEN_2D 256
> +real_t flat_2d_array[LEN_2D*LEN_2D];
> +
> +real_t x[LEN_1D];
> +
> +real_t a[LEN_1D],b[LEN_1D],c[LEN_1D],d[LEN_1D],e[LEN_1D],
> +bb[LEN_2D][LEN_2D],cc[LEN_2D][LEN_2D],tt[LEN_2D][LEN_2D];
> +
> +int indx[LEN_1D];
> +
> +real_t* __restrict__ xx;
> +real_t* yy;
> +real_t s243(void)
> +{
> + for (int nl = 0; nl < iterations; nl++) {
> + for (int i = 0; i < LEN_1D-1; i++) {
> + a[i] = b[i] + c[i ] * d[i];
> + b[i] = a[i] + d[i ] * e[i];
> + a[i] = b[i] + a[i+1] * d[i];
> + }
> + }
> +}
> +
> +/* { dg-final { scan-tree-dump "Deleted dead store" "dse1" } } */
> \ No newline at end of file
> diff --git a/gcc/tree-ssa-dse.cc b/gcc/tree-ssa-dse.cc
> index 34cfd1a8802..a8ca3672da2 100644
> --- a/gcc/tree-ssa-dse.cc
> +++ b/gcc/tree-ssa-dse.cc
> @@ -1332,6 +1332,186 @@ dse_optimize_call (gimple_stmt_iterator *gsi, sbitmap live_bytes)
> return true;
> }
>
> +/* Search the stores_queue to see whether there is a store has a same vdef
> + as the stmt. */
> +
> +static bool
> +dse_search_def_stores (function *fun, auto_vec<gimple *> &stores_queue,
> + gimple *stmt)
> +{
> + /* Consider the following sequcence:
> + a[i_18] = _5;
> + _8 = e[i_18];
> + _9 = _3 * _8;
> + _10 = _5 + _9;
> + b[i_18] = _10;
> + _12 = i_18 + 1;
> + _13 = a[_12];
> + _15 = _3 * _13;
> + _16 = _10 + _15;
> + a[i_18] = _16
> +
> + We should be able to remove a[i_18] = _5. */
> + for (unsigned int i = 0; i < stores_queue.length (); ++i)
> + {
> + if (!stores_queue[i])
> + continue;
> + tree lhs1 = gimple_assign_lhs (stores_queue[i]);
> + tree lhs2 = gimple_assign_lhs (stmt);
> +
> + if (TREE_CODE (lhs1) != TREE_CODE (lhs2))
> + continue;
> + if (operand_equal_p (gimple_assign_lhs (stores_queue[i]),
> + gimple_assign_lhs (stmt), OEP_ADDRESS_OF))
> + {
> + /* No matter it can be eliminated or not, remove it
> + in the worklist. */
> + stores_queue[i] = NULL;
> + if (gimple_assign_single_p (stmt) && !gimple_has_side_effects (stmt)
> + && !is_ctrl_altering_stmt (stmt)
> + && (!stmt_could_throw_p (fun, stmt)
> + || fun->can_delete_dead_exceptions))
> + return true;
> + }
> + }
> +
> + return false;
> +}
> +
> +/* Return true if the TREE_CODE of the mem op is allowed to do dse
> + according to def-ref analysis. */
> +
> +static bool
> +dse_can_def_ref_p (gimple *stmt)
> +{
> + /*TODO: For now, we only support dse according to
> + def-ref analysis for ARRAY_REF. */
> + return TREE_CODE (gimple_assign_lhs (stmt)) == ARRAY_REF;
> +}
> +
> +/* Perform def-ref analysis on all the stores of stores_queue worklist.
> + Since dse is running on reverse program order walk, the stores in
> + stores_queue are always after stmt, clear the store in the stores_queue
> + if the address of store lhs is changed or the lhs of store is used
> + in stmt. */
> +
> +static void
> +dse_def_ref_analysis (gimple *stmt, auto_vec<gimple *> &stores_queue)
> +{
> + for (unsigned int i = 0; i < stores_queue.length (); ++i)
> + {
> + if (!stores_queue[i])
> + continue;
> +
> + /* If we meet non-call or non-assign statement, we disable the possible
> + * dse. */
> + if (gimple_code (stmt) != GIMPLE_CALL
> + && gimple_code (stmt) != GIMPLE_ASSIGN)
> + {
> + stores_queue[i] = NULL;
> + continue;
> + }
> +
> + tree lhs = gimple_get_lhs (stores_queue[i]);
> + if (!lhs)
> + continue;
> + switch (TREE_CODE (lhs))
> + {
> + /* Handle the array like a[i_18]:
> + 1.if i_18 is changed in stmt which means lhs of stmt == i_18,
> + we remove the store from the stores_queue.
> +
> + 2.a or a[i_18] is used in stmt, we remove the store from the
> + stores_queue. */
> + case ARRAY_REF: {
> + if (gimple_get_lhs (stmt)
> + && operand_equal_p (gimple_get_lhs (stmt),
> + TREE_OPERAND (lhs, 1), 0))
> + {
> + stores_queue[i] = NULL;
> + continue;
> + }
> +
> + for (unsigned int j = 0; j < gimple_num_ops (stmt); ++j)
> + {
> + tree op = gimple_op (stmt, j);
> + if (!op)
> + continue;
> +
> + /* We only check the use op. */
> + if (op == gimple_get_lhs (stmt))
> + continue;
> +
> + if (TREE_OPERAND_LENGTH (op) == 0)
> + {
> + if (operand_equal_p (op, lhs, 0)
> + || operand_equal_p (op, TREE_OPERAND (lhs, 0), 0))
> + stores_queue[i] = NULL;
> + }
> + else
> + {
> + for (int k = 0; k < TREE_OPERAND_LENGTH (op); ++k)
> + {
> + if (!TREE_OPERAND (op, k))
> + continue;
> +
> + if (TREE_CODE (op) == ARRAY_REF)
> + {
> + /*
> + Disable optimization like this:
> + a[i_18] = _5;
> + ...
> + foo (a[i_18]).
> +
> + Don't disable optimization like this:
> + a[i_18] = _5;
> + ...
> + foo (a[i_12]).
> + */
> + if (operand_equal_p (op, lhs, OEP_ADDRESS_OF))
> + {
> + stores_queue[i] = NULL;
> + break;
> + }
> + }
> + else
> + {
> + /* To be conservative, if TREE_OPERAND (op, k)
> + length > 1. Disable the possible dse
> + optimization.
> + Disable optimization like this:
> + a[i_18] = _5;
> + ...
> + foo (&a).
> +
> + Or
> + a[i_18] = _5;
> + ...
> + foo (test (&a)).
> + */
> + if (TREE_OPERAND_LENGTH (TREE_OPERAND (op, k)) > 0
> + || operand_equal_p (TREE_OPERAND (op, k), lhs,
> + 0)
> + || operand_equal_p (TREE_OPERAND (op, k),
> + TREE_OPERAND (lhs, 0), 0))
> + {
> + stores_queue[i] = NULL;
> + break;
> + }
> + }
> + }
> + }
> + if (!stores_queue[i])
> + break;
> + }
> + }
> + break;
> + default:
> + gcc_unreachable ();
> + }
> + }
> +}
> +
> /* Attempt to eliminate dead stores in the statement referenced by BSI.
>
> A dead store is a store into a memory location which will later be
> @@ -1344,7 +1524,8 @@ dse_optimize_call (gimple_stmt_iterator *gsi, sbitmap live_bytes)
> post dominates the first store, then the first store is dead. */
>
> static void
> -dse_optimize_stmt (function *fun, gimple_stmt_iterator *gsi, sbitmap live_bytes)
> +dse_optimize_stmt (function *fun, gimple_stmt_iterator *gsi, sbitmap live_bytes,
> + auto_vec<gimple *> &stores_queue)
> {
> gimple *stmt = gsi_stmt (*gsi);
>
> @@ -1469,7 +1650,25 @@ dse_optimize_stmt (function *fun, gimple_stmt_iterator *gsi, sbitmap live_bytes)
> byte_tracking_enabled,
> live_bytes, &by_clobber_p);
> if (store_status == DSE_STORE_LIVE)
> - return;
> + {
> + if (gimple_assign_single_p (stmt) && !gimple_has_side_effects (stmt)
> + && !is_ctrl_altering_stmt (stmt)
> + && (!stmt_could_throw_p (fun, stmt)
> + || fun->can_delete_dead_exceptions))
> + {
> + if (dse_search_def_stores (fun, stores_queue, stmt))
> + ;
> + else if (dse_can_def_ref_p (stmt))
> + {
> + stores_queue.safe_push (stmt);
> + return;
> + }
> + else
> + return;
> + }
> + else
> + return;
> + }
>
> if (store_status == DSE_STORE_MAYBE_PARTIAL_DEAD)
> {
> @@ -1566,13 +1765,17 @@ pass_dse::execute (function *fun)
> {
> basic_block bb = BASIC_BLOCK_FOR_FN (fun, rpo[i-1]);
> gimple_stmt_iterator gsi;
> + /* Queue the stores which potentially updates mem of a previous
> + redundant store. We only do the optimization within a basic block. */
> + auto_vec<gimple *> stores_queue;
>
> for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi);)
> {
> gimple *stmt = gsi_stmt (gsi);
> + dse_def_ref_analysis (stmt, stores_queue);
>
> if (gimple_vdef (stmt))
> - dse_optimize_stmt (fun, &gsi, live_bytes);
> + dse_optimize_stmt (fun, &gsi, live_bytes, stores_queue);
> else if (def_operand_p
> def_p = single_ssa_def_operand (stmt, SSA_OP_DEF))
> {
>
--
Richard Biener <rguenther@suse.de>
SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg,
Germany; GF: Ivo Totev, Andrew Myers, Andrew McDonald, Boudien Moerman;
HRB 36809 (AG Nuernberg)
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: Re: [PATCH] DSE: Enhance dse with def-ref analysis
2022-09-22 7:32 ` Richard Biener
@ 2022-09-22 7:40 ` juzhe.zhong
2022-09-22 7:44 ` Richard Biener
1 sibling, 0 replies; 10+ messages in thread
From: juzhe.zhong @ 2022-09-22 7:40 UTC (permalink / raw)
To: rguenther; +Cc: gcc-patches
[-- Attachment #1: Type: text/plain, Size: 11464 bytes --]
OK. You mean we should check why if fails in ref_maybe_used_by_stmt_p
instead of doing the data-ref analysis outside dse_classify_store ?
juzhe.zhong@rivai.ai
From: Richard Biener
Date: 2022-09-22 15:32
To: Ju-Zhe Zhong
CC: gcc-patches; richard.sandiford
Subject: Re: [PATCH] DSE: Enhance dse with def-ref analysis
On Thu, 22 Sep 2022, juzhe.zhong@rivai.ai wrote:
> From: Ju-Zhe Zhong <juzhe.zhong@rivai.ai>
>
> This patch fix issue: PR 99407
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99407
>
> The enhancement implementation is simple:
> 1.Search gimple statement in program reverse order.
> 2.Queue the store statement which may be possible kill the def
> of previous store statement.
> 3.Perform dse_def_ref_analysis to remove stores will not kill
> any def.
> For example:
> a[i_18] = _5;
> ...
> foo (&a);
> a[i_18] = _7;
>
> a[i_18] = _7 is queued at the begining and will be removed
> in dse_def_ref_analysis.
> 4.Remove the store if the def is confirmed to be killed.
But we already do the very same thing in dse_classify_store, I fail
to see why we need to have an alternate implementation? It also
seems to be quadratic in the size of a basic-block?
The issue with dse_classify_store is that it relies on
ref_maybe_used_by_stmt_p but that doesn't handle
a[i] = ..;
.. = a[i+1];
but when seeing a[_1] vs. a[_2] (two variable offsets), it gives
up, asserting may-aliasing. We do have infrastructure to catch
such cases with data reference analysis. If we want to catch
these cases we should use that instead. Given we have a
DSE/DCE pass pair right before loop optimizations we could even
move those inside of the loop pipeline and perform this more
expensive checks conditional on loop/scev availability.
Richard.
> I have fully tested it in RISC-V foundation downstream port (RVV):
> https://github.com/riscv-collab/riscv-gcc/tree/riscv-gcc-rvv-next
>
> Are you willing to review this patch and test it in ARM/x86?
>
> gcc/ChangeLog:
>
> * tree-ssa-dse.cc (dse_search_def_stores): New function.
> (dse_can_def_ref_p): Ditto.
> (dse_def_ref_analysis): Add a new argument.
> (dse_optimize_stmt): Pass through stores_queue.
> (pass_dse::execute): Add dse_def_ref_analysis and stores_queue.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.dg/tree-ssa/pr99407.c: New test.
>
> ---
> gcc/testsuite/gcc.dg/tree-ssa/pr99407.c | 30 ++++
> gcc/tree-ssa-dse.cc | 209 +++++++++++++++++++++++-
> 2 files changed, 236 insertions(+), 3 deletions(-)
> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr99407.c
>
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr99407.c b/gcc/testsuite/gcc.dg/tree-ssa/pr99407.c
> new file mode 100644
> index 00000000000..57cea77da7c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr99407.c
> @@ -0,0 +1,30 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-dse1-details" } */
> +typedef float real_t;
> +
> +#define iterations 100000
> +#define LEN_1D 32000
> +#define LEN_2D 256
> +real_t flat_2d_array[LEN_2D*LEN_2D];
> +
> +real_t x[LEN_1D];
> +
> +real_t a[LEN_1D],b[LEN_1D],c[LEN_1D],d[LEN_1D],e[LEN_1D],
> +bb[LEN_2D][LEN_2D],cc[LEN_2D][LEN_2D],tt[LEN_2D][LEN_2D];
> +
> +int indx[LEN_1D];
> +
> +real_t* __restrict__ xx;
> +real_t* yy;
> +real_t s243(void)
> +{
> + for (int nl = 0; nl < iterations; nl++) {
> + for (int i = 0; i < LEN_1D-1; i++) {
> + a[i] = b[i] + c[i ] * d[i];
> + b[i] = a[i] + d[i ] * e[i];
> + a[i] = b[i] + a[i+1] * d[i];
> + }
> + }
> +}
> +
> +/* { dg-final { scan-tree-dump "Deleted dead store" "dse1" } } */
> \ No newline at end of file
> diff --git a/gcc/tree-ssa-dse.cc b/gcc/tree-ssa-dse.cc
> index 34cfd1a8802..a8ca3672da2 100644
> --- a/gcc/tree-ssa-dse.cc
> +++ b/gcc/tree-ssa-dse.cc
> @@ -1332,6 +1332,186 @@ dse_optimize_call (gimple_stmt_iterator *gsi, sbitmap live_bytes)
> return true;
> }
>
> +/* Search the stores_queue to see whether there is a store has a same vdef
> + as the stmt. */
> +
> +static bool
> +dse_search_def_stores (function *fun, auto_vec<gimple *> &stores_queue,
> + gimple *stmt)
> +{
> + /* Consider the following sequcence:
> + a[i_18] = _5;
> + _8 = e[i_18];
> + _9 = _3 * _8;
> + _10 = _5 + _9;
> + b[i_18] = _10;
> + _12 = i_18 + 1;
> + _13 = a[_12];
> + _15 = _3 * _13;
> + _16 = _10 + _15;
> + a[i_18] = _16
> +
> + We should be able to remove a[i_18] = _5. */
> + for (unsigned int i = 0; i < stores_queue.length (); ++i)
> + {
> + if (!stores_queue[i])
> + continue;
> + tree lhs1 = gimple_assign_lhs (stores_queue[i]);
> + tree lhs2 = gimple_assign_lhs (stmt);
> +
> + if (TREE_CODE (lhs1) != TREE_CODE (lhs2))
> + continue;
> + if (operand_equal_p (gimple_assign_lhs (stores_queue[i]),
> + gimple_assign_lhs (stmt), OEP_ADDRESS_OF))
> + {
> + /* No matter it can be eliminated or not, remove it
> + in the worklist. */
> + stores_queue[i] = NULL;
> + if (gimple_assign_single_p (stmt) && !gimple_has_side_effects (stmt)
> + && !is_ctrl_altering_stmt (stmt)
> + && (!stmt_could_throw_p (fun, stmt)
> + || fun->can_delete_dead_exceptions))
> + return true;
> + }
> + }
> +
> + return false;
> +}
> +
> +/* Return true if the TREE_CODE of the mem op is allowed to do dse
> + according to def-ref analysis. */
> +
> +static bool
> +dse_can_def_ref_p (gimple *stmt)
> +{
> + /*TODO: For now, we only support dse according to
> + def-ref analysis for ARRAY_REF. */
> + return TREE_CODE (gimple_assign_lhs (stmt)) == ARRAY_REF;
> +}
> +
> +/* Perform def-ref analysis on all the stores of stores_queue worklist.
> + Since dse is running on reverse program order walk, the stores in
> + stores_queue are always after stmt, clear the store in the stores_queue
> + if the address of store lhs is changed or the lhs of store is used
> + in stmt. */
> +
> +static void
> +dse_def_ref_analysis (gimple *stmt, auto_vec<gimple *> &stores_queue)
> +{
> + for (unsigned int i = 0; i < stores_queue.length (); ++i)
> + {
> + if (!stores_queue[i])
> + continue;
> +
> + /* If we meet non-call or non-assign statement, we disable the possible
> + * dse. */
> + if (gimple_code (stmt) != GIMPLE_CALL
> + && gimple_code (stmt) != GIMPLE_ASSIGN)
> + {
> + stores_queue[i] = NULL;
> + continue;
> + }
> +
> + tree lhs = gimple_get_lhs (stores_queue[i]);
> + if (!lhs)
> + continue;
> + switch (TREE_CODE (lhs))
> + {
> + /* Handle the array like a[i_18]:
> + 1.if i_18 is changed in stmt which means lhs of stmt == i_18,
> + we remove the store from the stores_queue.
> +
> + 2.a or a[i_18] is used in stmt, we remove the store from the
> + stores_queue. */
> + case ARRAY_REF: {
> + if (gimple_get_lhs (stmt)
> + && operand_equal_p (gimple_get_lhs (stmt),
> + TREE_OPERAND (lhs, 1), 0))
> + {
> + stores_queue[i] = NULL;
> + continue;
> + }
> +
> + for (unsigned int j = 0; j < gimple_num_ops (stmt); ++j)
> + {
> + tree op = gimple_op (stmt, j);
> + if (!op)
> + continue;
> +
> + /* We only check the use op. */
> + if (op == gimple_get_lhs (stmt))
> + continue;
> +
> + if (TREE_OPERAND_LENGTH (op) == 0)
> + {
> + if (operand_equal_p (op, lhs, 0)
> + || operand_equal_p (op, TREE_OPERAND (lhs, 0), 0))
> + stores_queue[i] = NULL;
> + }
> + else
> + {
> + for (int k = 0; k < TREE_OPERAND_LENGTH (op); ++k)
> + {
> + if (!TREE_OPERAND (op, k))
> + continue;
> +
> + if (TREE_CODE (op) == ARRAY_REF)
> + {
> + /*
> + Disable optimization like this:
> + a[i_18] = _5;
> + ...
> + foo (a[i_18]).
> +
> + Don't disable optimization like this:
> + a[i_18] = _5;
> + ...
> + foo (a[i_12]).
> + */
> + if (operand_equal_p (op, lhs, OEP_ADDRESS_OF))
> + {
> + stores_queue[i] = NULL;
> + break;
> + }
> + }
> + else
> + {
> + /* To be conservative, if TREE_OPERAND (op, k)
> + length > 1. Disable the possible dse
> + optimization.
> + Disable optimization like this:
> + a[i_18] = _5;
> + ...
> + foo (&a).
> +
> + Or
> + a[i_18] = _5;
> + ...
> + foo (test (&a)).
> + */
> + if (TREE_OPERAND_LENGTH (TREE_OPERAND (op, k)) > 0
> + || operand_equal_p (TREE_OPERAND (op, k), lhs,
> + 0)
> + || operand_equal_p (TREE_OPERAND (op, k),
> + TREE_OPERAND (lhs, 0), 0))
> + {
> + stores_queue[i] = NULL;
> + break;
> + }
> + }
> + }
> + }
> + if (!stores_queue[i])
> + break;
> + }
> + }
> + break;
> + default:
> + gcc_unreachable ();
> + }
> + }
> +}
> +
> /* Attempt to eliminate dead stores in the statement referenced by BSI.
>
> A dead store is a store into a memory location which will later be
> @@ -1344,7 +1524,8 @@ dse_optimize_call (gimple_stmt_iterator *gsi, sbitmap live_bytes)
> post dominates the first store, then the first store is dead. */
>
> static void
> -dse_optimize_stmt (function *fun, gimple_stmt_iterator *gsi, sbitmap live_bytes)
> +dse_optimize_stmt (function *fun, gimple_stmt_iterator *gsi, sbitmap live_bytes,
> + auto_vec<gimple *> &stores_queue)
> {
> gimple *stmt = gsi_stmt (*gsi);
>
> @@ -1469,7 +1650,25 @@ dse_optimize_stmt (function *fun, gimple_stmt_iterator *gsi, sbitmap live_bytes)
> byte_tracking_enabled,
> live_bytes, &by_clobber_p);
> if (store_status == DSE_STORE_LIVE)
> - return;
> + {
> + if (gimple_assign_single_p (stmt) && !gimple_has_side_effects (stmt)
> + && !is_ctrl_altering_stmt (stmt)
> + && (!stmt_could_throw_p (fun, stmt)
> + || fun->can_delete_dead_exceptions))
> + {
> + if (dse_search_def_stores (fun, stores_queue, stmt))
> + ;
> + else if (dse_can_def_ref_p (stmt))
> + {
> + stores_queue.safe_push (stmt);
> + return;
> + }
> + else
> + return;
> + }
> + else
> + return;
> + }
>
> if (store_status == DSE_STORE_MAYBE_PARTIAL_DEAD)
> {
> @@ -1566,13 +1765,17 @@ pass_dse::execute (function *fun)
> {
> basic_block bb = BASIC_BLOCK_FOR_FN (fun, rpo[i-1]);
> gimple_stmt_iterator gsi;
> + /* Queue the stores which potentially updates mem of a previous
> + redundant store. We only do the optimization within a basic block. */
> + auto_vec<gimple *> stores_queue;
>
> for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi);)
> {
> gimple *stmt = gsi_stmt (gsi);
> + dse_def_ref_analysis (stmt, stores_queue);
>
> if (gimple_vdef (stmt))
> - dse_optimize_stmt (fun, &gsi, live_bytes);
> + dse_optimize_stmt (fun, &gsi, live_bytes, stores_queue);
> else if (def_operand_p
> def_p = single_ssa_def_operand (stmt, SSA_OP_DEF))
> {
>
--
Richard Biener <rguenther@suse.de>
SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg,
Germany; GF: Ivo Totev, Andrew Myers, Andrew McDonald, Boudien Moerman;
HRB 36809 (AG Nuernberg)
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] DSE: Enhance dse with def-ref analysis
2022-09-22 7:32 ` Richard Biener
2022-09-22 7:40 ` juzhe.zhong
@ 2022-09-22 7:44 ` Richard Biener
[not found] ` <2794D55ECC21EB61+2022092215574700429612@rivai.ai>
1 sibling, 1 reply; 10+ messages in thread
From: Richard Biener @ 2022-09-22 7:44 UTC (permalink / raw)
To: Ju-Zhe Zhong; +Cc: gcc-patches, richard.sandiford
On Thu, 22 Sep 2022, Richard Biener wrote:
> On Thu, 22 Sep 2022, juzhe.zhong@rivai.ai wrote:
>
> > From: Ju-Zhe Zhong <juzhe.zhong@rivai.ai>
> >
> > This patch fix issue: PR 99407
> > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99407
> >
> > The enhancement implementation is simple:
> > 1.Search gimple statement in program reverse order.
> > 2.Queue the store statement which may be possible kill the def
> > of previous store statement.
> > 3.Perform dse_def_ref_analysis to remove stores will not kill
> > any def.
> > For example:
> > a[i_18] = _5;
> > ...
> > foo (&a);
> > a[i_18] = _7;
> >
> > a[i_18] = _7 is queued at the begining and will be removed
> > in dse_def_ref_analysis.
> > 4.Remove the store if the def is confirmed to be killed.
>
> But we already do the very same thing in dse_classify_store, I fail
> to see why we need to have an alternate implementation? It also
> seems to be quadratic in the size of a basic-block?
>
> The issue with dse_classify_store is that it relies on
> ref_maybe_used_by_stmt_p but that doesn't handle
>
> a[i] = ..;
> .. = a[i+1];
>
> but when seeing a[_1] vs. a[_2] (two variable offsets), it gives
> up, asserting may-aliasing. We do have infrastructure to catch
> such cases with data reference analysis. If we want to catch
> these cases we should use that instead. Given we have a
> DSE/DCE pass pair right before loop optimizations we could even
> move those inside of the loop pipeline and perform this more
> expensive checks conditional on loop/scev availability.
Oh, and when doing non-loop aware analysis we don't need SCEV. The
following optimizes the testcase but as said I don't think we want
to perform this for each of the DSE passes since it can be somewhat
expensive, at least without doing more caching (we could keep a
stmt -> data-ref hash-map and compute data-refs at most once for each
statement, that would make it more acceptable).
Richard.
From 515b213e9d06c2bd36160e66728f57e48095bb84 Mon Sep 17 00:00:00 2001
From: Richard Biener <rguenther@suse.de>
Date: Thu, 22 Sep 2022 09:40:40 +0200
Subject: [PATCH] tree-optimization/99407 - DSE with data-ref analysis
To: gcc-patches@gcc.gnu.org
* tree-ssa-dse.c (dse_classify_store): Use data-ref analysis
to disambiguate more uses.
---
gcc/tree-ssa-dse.cc | 21 +++++++++++++++++++++
1 file changed, 21 insertions(+)
diff --git a/gcc/tree-ssa-dse.cc b/gcc/tree-ssa-dse.cc
index 34cfd1a8802..340a54f4105 100644
--- a/gcc/tree-ssa-dse.cc
+++ b/gcc/tree-ssa-dse.cc
@@ -45,6 +45,8 @@ along with GCC; see the file COPYING3. If not see
#include "ipa-modref.h"
#include "target.h"
#include "tree-ssa-loop-niter.h"
+#include "cfgloop.h"
+#include "tree-data-ref.h"
/* This file implements dead store elimination.
@@ -1019,6 +1021,25 @@ dse_classify_store (ao_ref *ref, gimple *stmt,
/* If the statement is a use the store is not dead. */
else if (ref_maybe_used_by_stmt_p (use_stmt, ref))
{
+ if (is_gimple_assign (use_stmt))
+ {
+ data_reference_p dra, drb;
+ dra = create_data_ref (NULL, NULL, ref->ref, stmt,
+ false, false);
+ drb = create_data_ref (NULL, NULL,
+ gimple_assign_rhs1 (use_stmt),
+ use_stmt, false, false);
+ bool alias_p = dr_may_alias_p (dra, drb, NULL);
+ free_data_ref (dra);
+ free_data_ref (drb);
+ if (!alias_p)
+ {
+ if (gimple_vdef (use_stmt))
+ defs.safe_push (use_stmt);
+ continue;
+ }
+ }
+
/* Handle common cases where we can easily build an ao_ref
structure for USE_STMT and in doing so we find that the
references hit non-live bytes and thus can be ignored.
--
2.35.3
^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH] DSE: Enhance dse with def-ref analysis
@ 2022-09-22 6:58 juzhe.zhong
0 siblings, 0 replies; 10+ messages in thread
From: juzhe.zhong @ 2022-09-22 6:58 UTC (permalink / raw)
To: gcc-patches; +Cc: richard.sandiford, rguenther, Ju-Zhe Zhong
From: Ju-Zhe Zhong <juzhe.zhong@rivai.ai>
This patch fix issue: PR 99407
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99407
The enhancement implementation is simple:
1.Search gimple statement in program reverse order.
2.Queue the store statement which may be possible kill the def
of previous store statement.
3.Perform dse_def_ref_analysis to remove stores will not kill
any def.
For example:
a[i_18] = _5;
...
foo (&a);
a[i_18] = _7;
a[i_18] = _7 is queued at the begining and will be removed
in dse_def_ref_analysis.
4.Remove the store if the def is confirmed to be killed.
I have fully tested it in RISC-V foundation downstream port (RVV):
https://github.com/riscv-collab/riscv-gcc/tree/riscv-gcc-rvv-next
Are you willing to review this patch and test it in ARM/x86?
gcc/ChangeLog:
* tree-ssa-dse.cc (dse_search_def_stores):
(dse_can_def_ref_p):
(dse_def_ref_analysis):
(dse_optimize_stmt):
(pass_dse::execute):
gcc/testsuite/ChangeLog:
* gcc.dg/tree-ssa/pr99407.c: New test.
---
gcc/testsuite/gcc.dg/tree-ssa/pr99407.c | 30 ++++
gcc/tree-ssa-dse.cc | 209 +++++++++++++++++++++++-
2 files changed, 236 insertions(+), 3 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr99407.c
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr99407.c b/gcc/testsuite/gcc.dg/tree-ssa/pr99407.c
new file mode 100644
index 00000000000..57cea77da7c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr99407.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-dse1-details" } */
+typedef float real_t;
+
+#define iterations 100000
+#define LEN_1D 32000
+#define LEN_2D 256
+real_t flat_2d_array[LEN_2D*LEN_2D];
+
+real_t x[LEN_1D];
+
+real_t a[LEN_1D],b[LEN_1D],c[LEN_1D],d[LEN_1D],e[LEN_1D],
+bb[LEN_2D][LEN_2D],cc[LEN_2D][LEN_2D],tt[LEN_2D][LEN_2D];
+
+int indx[LEN_1D];
+
+real_t* __restrict__ xx;
+real_t* yy;
+real_t s243(void)
+{
+ for (int nl = 0; nl < iterations; nl++) {
+ for (int i = 0; i < LEN_1D-1; i++) {
+ a[i] = b[i] + c[i ] * d[i];
+ b[i] = a[i] + d[i ] * e[i];
+ a[i] = b[i] + a[i+1] * d[i];
+ }
+ }
+}
+
+/* { dg-final { scan-tree-dump "Deleted dead store" "dse1" } } */
\ No newline at end of file
diff --git a/gcc/tree-ssa-dse.cc b/gcc/tree-ssa-dse.cc
index 34cfd1a8802..a8ca3672da2 100644
--- a/gcc/tree-ssa-dse.cc
+++ b/gcc/tree-ssa-dse.cc
@@ -1332,6 +1332,186 @@ dse_optimize_call (gimple_stmt_iterator *gsi, sbitmap live_bytes)
return true;
}
+/* Search the stores_queue to see whether there is a store has a same vdef
+ as the stmt. */
+
+static bool
+dse_search_def_stores (function *fun, auto_vec<gimple *> &stores_queue,
+ gimple *stmt)
+{
+ /* Consider the following sequcence:
+ a[i_18] = _5;
+ _8 = e[i_18];
+ _9 = _3 * _8;
+ _10 = _5 + _9;
+ b[i_18] = _10;
+ _12 = i_18 + 1;
+ _13 = a[_12];
+ _15 = _3 * _13;
+ _16 = _10 + _15;
+ a[i_18] = _16
+
+ We should be able to remove a[i_18] = _5. */
+ for (unsigned int i = 0; i < stores_queue.length (); ++i)
+ {
+ if (!stores_queue[i])
+ continue;
+ tree lhs1 = gimple_assign_lhs (stores_queue[i]);
+ tree lhs2 = gimple_assign_lhs (stmt);
+
+ if (TREE_CODE (lhs1) != TREE_CODE (lhs2))
+ continue;
+ if (operand_equal_p (gimple_assign_lhs (stores_queue[i]),
+ gimple_assign_lhs (stmt), OEP_ADDRESS_OF))
+ {
+ /* No matter it can be eliminated or not, remove it
+ in the worklist. */
+ stores_queue[i] = NULL;
+ if (gimple_assign_single_p (stmt) && !gimple_has_side_effects (stmt)
+ && !is_ctrl_altering_stmt (stmt)
+ && (!stmt_could_throw_p (fun, stmt)
+ || fun->can_delete_dead_exceptions))
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/* Return true if the TREE_CODE of the mem op is allowed to do dse
+ according to def-ref analysis. */
+
+static bool
+dse_can_def_ref_p (gimple *stmt)
+{
+ /*TODO: For now, we only support dse according to
+ def-ref analysis for ARRAY_REF. */
+ return TREE_CODE (gimple_assign_lhs (stmt)) == ARRAY_REF;
+}
+
+/* Perform def-ref analysis on all the stores of stores_queue worklist.
+ Since dse is running on reverse program order walk, the stores in
+ stores_queue are always after stmt, clear the store in the stores_queue
+ if the address of store lhs is changed or the lhs of store is used
+ in stmt. */
+
+static void
+dse_def_ref_analysis (gimple *stmt, auto_vec<gimple *> &stores_queue)
+{
+ for (unsigned int i = 0; i < stores_queue.length (); ++i)
+ {
+ if (!stores_queue[i])
+ continue;
+
+ /* If we meet non-call or non-assign statement, we disable the possible
+ * dse. */
+ if (gimple_code (stmt) != GIMPLE_CALL
+ && gimple_code (stmt) != GIMPLE_ASSIGN)
+ {
+ stores_queue[i] = NULL;
+ continue;
+ }
+
+ tree lhs = gimple_get_lhs (stores_queue[i]);
+ if (!lhs)
+ continue;
+ switch (TREE_CODE (lhs))
+ {
+ /* Handle the array like a[i_18]:
+ 1.if i_18 is changed in stmt which means lhs of stmt == i_18,
+ we remove the store from the stores_queue.
+
+ 2.a or a[i_18] is used in stmt, we remove the store from the
+ stores_queue. */
+ case ARRAY_REF: {
+ if (gimple_get_lhs (stmt)
+ && operand_equal_p (gimple_get_lhs (stmt),
+ TREE_OPERAND (lhs, 1), 0))
+ {
+ stores_queue[i] = NULL;
+ continue;
+ }
+
+ for (unsigned int j = 0; j < gimple_num_ops (stmt); ++j)
+ {
+ tree op = gimple_op (stmt, j);
+ if (!op)
+ continue;
+
+ /* We only check the use op. */
+ if (op == gimple_get_lhs (stmt))
+ continue;
+
+ if (TREE_OPERAND_LENGTH (op) == 0)
+ {
+ if (operand_equal_p (op, lhs, 0)
+ || operand_equal_p (op, TREE_OPERAND (lhs, 0), 0))
+ stores_queue[i] = NULL;
+ }
+ else
+ {
+ for (int k = 0; k < TREE_OPERAND_LENGTH (op); ++k)
+ {
+ if (!TREE_OPERAND (op, k))
+ continue;
+
+ if (TREE_CODE (op) == ARRAY_REF)
+ {
+ /*
+ Disable optimization like this:
+ a[i_18] = _5;
+ ...
+ foo (a[i_18]).
+
+ Don't disable optimization like this:
+ a[i_18] = _5;
+ ...
+ foo (a[i_12]).
+ */
+ if (operand_equal_p (op, lhs, OEP_ADDRESS_OF))
+ {
+ stores_queue[i] = NULL;
+ break;
+ }
+ }
+ else
+ {
+ /* To be conservative, if TREE_OPERAND (op, k)
+ length > 1. Disable the possible dse
+ optimization.
+ Disable optimization like this:
+ a[i_18] = _5;
+ ...
+ foo (&a).
+
+ Or
+ a[i_18] = _5;
+ ...
+ foo (test (&a)).
+ */
+ if (TREE_OPERAND_LENGTH (TREE_OPERAND (op, k)) > 0
+ || operand_equal_p (TREE_OPERAND (op, k), lhs,
+ 0)
+ || operand_equal_p (TREE_OPERAND (op, k),
+ TREE_OPERAND (lhs, 0), 0))
+ {
+ stores_queue[i] = NULL;
+ break;
+ }
+ }
+ }
+ }
+ if (!stores_queue[i])
+ break;
+ }
+ }
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ }
+}
+
/* Attempt to eliminate dead stores in the statement referenced by BSI.
A dead store is a store into a memory location which will later be
@@ -1344,7 +1524,8 @@ dse_optimize_call (gimple_stmt_iterator *gsi, sbitmap live_bytes)
post dominates the first store, then the first store is dead. */
static void
-dse_optimize_stmt (function *fun, gimple_stmt_iterator *gsi, sbitmap live_bytes)
+dse_optimize_stmt (function *fun, gimple_stmt_iterator *gsi, sbitmap live_bytes,
+ auto_vec<gimple *> &stores_queue)
{
gimple *stmt = gsi_stmt (*gsi);
@@ -1469,7 +1650,25 @@ dse_optimize_stmt (function *fun, gimple_stmt_iterator *gsi, sbitmap live_bytes)
byte_tracking_enabled,
live_bytes, &by_clobber_p);
if (store_status == DSE_STORE_LIVE)
- return;
+ {
+ if (gimple_assign_single_p (stmt) && !gimple_has_side_effects (stmt)
+ && !is_ctrl_altering_stmt (stmt)
+ && (!stmt_could_throw_p (fun, stmt)
+ || fun->can_delete_dead_exceptions))
+ {
+ if (dse_search_def_stores (fun, stores_queue, stmt))
+ ;
+ else if (dse_can_def_ref_p (stmt))
+ {
+ stores_queue.safe_push (stmt);
+ return;
+ }
+ else
+ return;
+ }
+ else
+ return;
+ }
if (store_status == DSE_STORE_MAYBE_PARTIAL_DEAD)
{
@@ -1566,13 +1765,17 @@ pass_dse::execute (function *fun)
{
basic_block bb = BASIC_BLOCK_FOR_FN (fun, rpo[i-1]);
gimple_stmt_iterator gsi;
+ /* Queue the stores which potentially updates mem of a previous
+ redundant store. We only do the optimization within a basic block. */
+ auto_vec<gimple *> stores_queue;
for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi);)
{
gimple *stmt = gsi_stmt (gsi);
+ dse_def_ref_analysis (stmt, stores_queue);
if (gimple_vdef (stmt))
- dse_optimize_stmt (fun, &gsi, live_bytes);
+ dse_optimize_stmt (fun, &gsi, live_bytes, stores_queue);
else if (def_operand_p
def_p = single_ssa_def_operand (stmt, SSA_OP_DEF))
{
--
2.36.1
^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2022-09-22 9:29 UTC | newest]
Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-09-22 7:06 [PATCH] DSE: Enhance dse with def-ref analysis juzhe.zhong
2022-09-22 7:32 ` Richard Biener
2022-09-22 7:40 ` juzhe.zhong
2022-09-22 7:44 ` Richard Biener
[not found] ` <2794D55ECC21EB61+2022092215574700429612@rivai.ai>
[not found] ` <nycvar.YFH.7.77.849.2209220800020.6652@jbgna.fhfr.qr>
2022-09-22 8:08 ` juzhe.zhong
2022-09-22 8:48 ` Richard Biener
2022-09-22 8:51 ` juzhe.zhong
2022-09-22 9:12 ` juzhe.zhong
2022-09-22 9:29 ` Richard Biener
-- strict thread matches above, loose matches on Subject: below --
2022-09-22 6:58 juzhe.zhong
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).