public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/users/aoliva/heads/testme)] check for mergeable loads, choose insertion points accordingly
@ 2023-07-27 9:22 Alexandre Oliva
0 siblings, 0 replies; 9+ messages in thread
From: Alexandre Oliva @ 2023-07-27 9:22 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:8b7710da628edb522b9fd8f8b62d622d9a86f915
commit 8b7710da628edb522b9fd8f8b62d622d9a86f915
Author: Alexandre Oliva <oliva@gnu.org>
Date: Thu Jul 27 05:15:20 2023 -0300
check for mergeable loads, choose insertion points accordingly
Diff:
---
gcc/gimple-fold.cc | 253 ++++++++++++++++++++++++++++++++++++++++++++++-------
1 file changed, 219 insertions(+), 34 deletions(-)
diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
index f65922309fb..72814f902b1 100644
--- a/gcc/gimple-fold.cc
+++ b/gcc/gimple-fold.cc
@@ -69,6 +69,7 @@ along with GCC; see the file COPYING3. If not see
#include "varasm.h"
#include "internal-fn.h"
#include "gimple-range.h"
+#include "tree-ssa-loop-niter.h" // stmt_dominates_stmt_p
/* ??? Move this to some header, it's defined in fold-const.c. */
extern tree
@@ -7080,7 +7081,7 @@ maybe_fold_comparisons_from_match_pd (tree type, enum tree_code code,
Same as ssa_is_replaceable_p, except that we don't insist it has a
single use. */
-bool
+static bool
ssa_is_substitutable_p (gimple *stmt)
{
#if 0
@@ -7161,9 +7162,10 @@ is_cast_p (tree *name)
if (gimple_num_ops (def) != 2)
break;
- if (get_gimple_rhs_class (gimple_expr_code (def))
- == GIMPLE_SINGLE_RHS)
+ if (gimple_assign_single_p (def))
{
+ if (gimple_assign_load_p (def))
+ break;
*name = gimple_assign_rhs1 (def);
continue;
}
@@ -7200,8 +7202,7 @@ is_binop_p (enum tree_code code, tree *name)
return 0;
case 2:
- if (get_gimple_rhs_class (gimple_expr_code (def))
- == GIMPLE_SINGLE_RHS)
+ if (gimple_assign_single_p (def) && !gimple_assign_load_p (def))
{
*name = gimple_assign_rhs1 (def);
continue;
@@ -7209,7 +7210,7 @@ is_binop_p (enum tree_code code, tree *name)
return 0;
case 3:
- ;
+ break;
}
if (gimple_assign_rhs_code (def) != code)
@@ -7254,6 +7255,26 @@ prepare_xor (tree l_arg, tree *r_arg)
return ret;
}
+/* If EXP is a SSA_NAME whose DEF is a load stmt, set *LOAD to it and
+ return its RHS, otherwise return EXP. */
+
+static tree
+follow_load (tree exp, gimple **load)
+{
+ if (TREE_CODE (exp) == SSA_NAME
+ && !SSA_NAME_IS_DEFAULT_DEF (exp))
+ {
+ gimple *def = SSA_NAME_DEF_STMT (exp);
+ if (gimple_assign_load_p (def))
+ {
+ *load = def;
+ gimple_assign_rhs1 (def);
+ }
+ }
+
+ return exp;
+}
+
/* Subroutine for fold_truth_andor_1: decode a field reference.
If EXP is a comparison reference, we return the innermost reference.
@@ -7280,6 +7301,9 @@ prepare_xor (tree l_arg, tree *r_arg)
BIT_XOR_EXPR compared with zero. We're to take the first or second
operand thereof if so. It should be zero otherwise.
+ *LOAD is set to the load stmt of the innermost reference, if any,
+ *and NULL otherwise.
+
Return 0 if this is not a component reference or is one that we can't
do anything with. */
@@ -7287,7 +7311,8 @@ static tree
decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
HOST_WIDE_INT *pbitpos, machine_mode *pmode,
int *punsignedp, int *preversep, int *pvolatilep,
- tree *pmask, tree *pand_mask, int xor_which)
+ tree *pmask, tree *pand_mask, int xor_which,
+ gimple **load)
{
tree exp = *exp_;
tree outer_type = 0;
@@ -7297,11 +7322,13 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
unsigned int precision;
HOST_WIDE_INT shiftrt = 0;
+ *load = NULL;
+
/* All the optimizations using this function assume integer fields.
There are problems with FP fields since the type_for_size call
below can fail for, e.g., XFmode. */
if (! INTEGRAL_TYPE_P (TREE_TYPE (exp)))
- return 0;
+ return NULL_TREE;
/* We are interested in the bare arrangement of bits, so strip everything
that doesn't affect the machine mode. However, record the type of the
@@ -7311,7 +7338,7 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
if ((and_mask = is_binop_p (BIT_AND_EXPR, &exp)))
{
if (TREE_CODE (and_mask) != INTEGER_CST)
- return 0;
+ return NULL_TREE;
}
if (xor_which)
@@ -7329,16 +7356,18 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
if (tree shift = is_binop_p (RSHIFT_EXPR, &exp))
{
if (TREE_CODE (shift) != INTEGER_CST || !tree_fits_shwi_p (shift))
- return 0;
+ return NULL_TREE;
shiftrt = tree_to_shwi (shift);
if (shiftrt <= 0)
- return 0;
+ return NULL_TREE;
}
if (tree t = is_cast_p (&exp))
if (!outer_type)
outer_type = t;
+ exp = follow_load (exp, load);
+
poly_int64 poly_bitsize, poly_bitpos;
inner = get_inner_reference (exp, &poly_bitsize, &poly_bitpos, &offset,
pmode, punsignedp, preversep, pvolatilep);
@@ -7487,6 +7516,30 @@ compute_split_boundary_from_align (HOST_WIDE_INT align,
return boundary;
}
+/* Make a bit_field_ref. If POINT is NULL, return the BIT_FIELD_REF.
+ Otherwise, build and insert a load stmt before POINT, and return
+ the SSA_NAME. ??? Rewrite LOAD in terms of the bitfield? */
+
+static tree
+make_bit_field_load (location_t loc, tree inner, tree orig_inner, tree type,
+ HOST_WIDE_INT bitsize, poly_int64 bitpos,
+ int unsignedp, int reversep, gimple *point)
+{
+ tree ref = make_bit_field_ref (loc, inner, orig_inner,
+ type, bitsize, bitpos,
+ unsignedp, reversep);
+ if (!point)
+ return ref;
+
+ tree tmp = make_ssa_name (TREE_TYPE (ref));
+ gimple_stmt_iterator gsi = gsi_for_stmt (point);
+ gsi_insert_before (&gsi,
+ gimple_build_assign (tmp, ref),
+ GSI_NEW_STMT);
+
+ return tmp;
+}
+
/* Initialize ln_arg[0] and ln_arg[1] to a pair of newly-created (at
LOC) loads from INNER (from ORIG_INNER), of modes MODE and MODE2,
respectively, starting at BIT_POS, using reversed endianness if
@@ -7506,7 +7559,8 @@ build_split_load (tree /* out */ ln_arg[2],
HOST_WIDE_INT /* out */ shifted[2],
location_t loc, tree inner, tree orig_inner,
scalar_int_mode mode, scalar_int_mode mode2,
- HOST_WIDE_INT bit_pos, bool reversep)
+ HOST_WIDE_INT bit_pos, bool reversep,
+ gimple *point[2])
{
bitsiz[0] = GET_MODE_BITSIZE (mode);
bitsiz[1] = GET_MODE_BITSIZE (mode2);
@@ -7515,9 +7569,9 @@ build_split_load (tree /* out */ ln_arg[2],
{
tree type = lang_hooks.types.type_for_size (bitsiz[i], 1);
bitpos[i] = bit_pos;
- ln_arg[i] = make_bit_field_ref (loc, inner, orig_inner,
+ ln_arg[i] = make_bit_field_load (loc, inner, orig_inner,
type, bitsiz[i],
- bit_pos, 1, reversep);
+ bit_pos, 1, reversep, point[i]);
bit_pos += bitsiz[i];
}
@@ -7578,6 +7632,87 @@ reuse_split_load (tree /* in[0] out[1] */ ln_arg[2],
}
}
+/* Return nonzero if LSTMT and RSTMT, assumed to load from the same
+ words, can be safely merged into a single load. Return zero if
+ there are intervening stores, or if neither stmt dominates the
+ other. If they are mergeable, return -1 if the merged load should
+ be inserted before LSTMT to dominate both, otherwise return +1. */
+
+static int
+mergeable_loads_p (gimple *lstmt, gimple *rstmt)
+{
+ gimple *stmts[2] = { lstmt, rstmt };
+ int ret;
+
+ if (stmt_dominates_stmt_p (lstmt, rstmt))
+ {
+ ret = -1;
+ stmts[0] = lstmt;
+ stmts[1] = rstmt;
+ }
+ else if (stmt_dominates_stmt_p (rstmt, lstmt))
+ {
+ ret = 1;
+ stmts[1] = lstmt;
+ stmts[0] = rstmt;
+ }
+ else
+ return 0;
+
+ basic_block bbs[2] = { gimple_bb (stmts[0]), gimple_bb (stmts[1]) };
+ if (bbs[0] == bbs[1])
+ {
+ for (gimple_stmt_iterator gsi = gsi_for_stmt (stmts[0]);
+ gsi_stmt (gsi) != stmts[1]; gsi_next (&gsi))
+ /* ??? For now, stop if we find any intervening vdefs. */
+ if (gimple_vdef (gsi_stmt (gsi)))
+ return 0;
+ return ret;
+ }
+
+ for (gimple_stmt_iterator gsi = gsi_for_stmt (stmts[0]);
+ !gsi_end_p (gsi); gsi_next (&gsi))
+ /* ??? For now, stop if we find any intervening vdefs. */
+ if (gimple_vdef (gsi_stmt (gsi)))
+ return 0;
+
+ for (gphi_iterator gpi = gsi_start_phis (bbs[1]);
+ !gsi_end_p (gpi); gsi_next (&gpi))
+ /* ??? For now, stop if we find any intervening vdefs. */
+ if (gimple_vdef (gsi_stmt (gpi)))
+ return 0;
+
+ for (gimple_stmt_iterator gsi = gsi_for_stmt (stmts[1]);
+ !gsi_end_p (gsi); gsi_prev (&gsi))
+ /* ??? For now, stop if we find any intervening vdefs. */
+ if (gimple_vdef (gsi_stmt (gsi)))
+ return 0;
+
+ for (basic_block bb = bbs[1]; bb != bbs[0]; )
+ {
+ /* ??? For now, stop if any visited block has more than one
+ predecessor. */
+ if (!single_pred_p (bb))
+ return 0;
+
+ bb = single_pred (bb);
+
+ for (gphi_iterator gpi = gsi_start_phis (bbs[1]);
+ !gsi_end_p (gpi); gsi_next (&gpi))
+ /* ??? For now, stop if we find any intervening vdefs. */
+ if (gimple_vdef (gsi_stmt (gpi)))
+ return 0;
+
+ for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
+ !gsi_end_p (gsi); gsi_next (&gsi))
+ /* ??? For now, stop if we find any intervening vdefs. */
+ if (gimple_vdef (gsi_stmt (gsi)))
+ return 0;
+ }
+
+ return ret;
+}
+
/* Find ways of folding logical expressions of LHS and RHS:
Try to merge two comparisons to the same innermost item.
Look for range tests like "ch >= '0' && ch <= '9'".
@@ -7629,6 +7764,8 @@ fold_truth_andor_maybe_separate (location_t loc,
enum tree_code orig_code = code;
enum tree_code wanted_code;
tree ll_inner, lr_inner, rl_inner, rr_inner;
+ gimple *ll_load, *lr_load, *rl_load, *rr_load;
+ int l_mergeable = 0, r_mergeable = 0;
HOST_WIDE_INT ll_bitsize, ll_bitpos, lr_bitsize, lr_bitpos;
HOST_WIDE_INT rl_bitsize, rl_bitpos, rr_bitsize, rr_bitpos;
HOST_WIDE_INT xll_bitpos, xlr_bitpos, xrl_bitpos, xrr_bitpos;
@@ -7693,20 +7830,24 @@ fold_truth_andor_maybe_separate (location_t loc,
ll_inner = decode_field_reference (loc, &ll_arg,
&ll_bitsize, &ll_bitpos, &ll_mode,
&ll_unsignedp, &ll_reversep, &volatilep,
- &ll_mask, &ll_and_mask, l_xor);
+ &ll_mask, &ll_and_mask, l_xor,
+ &ll_load);
lr_inner = decode_field_reference (loc, &lr_arg,
&lr_bitsize, &lr_bitpos, &lr_mode,
&lr_unsignedp, &lr_reversep, &volatilep,
- &lr_mask, &lr_and_mask, 2 * l_xor);
+ &lr_mask, &lr_and_mask, 2 * l_xor,
+ &lr_load);
int r_xor = prepare_xor (rl_arg, &rr_arg);
rl_inner = decode_field_reference (loc, &rl_arg,
&rl_bitsize, &rl_bitpos, &rl_mode,
&rl_unsignedp, &rl_reversep, &volatilep,
- &rl_mask, &rl_and_mask, r_xor);
+ &rl_mask, &rl_and_mask, r_xor,
+ &rl_load);
rr_inner = decode_field_reference (loc, &rr_arg,
&rr_bitsize, &rr_bitpos, &rr_mode,
&rr_unsignedp, &rr_reversep, &volatilep,
- &rr_mask, &rr_and_mask, 2 * r_xor);
+ &rr_mask, &rr_and_mask, 2 * r_xor,
+ &rr_load);
/* It must be true that the inner operation on the lhs of each
comparison must be the same if we are to be able to do anything.
@@ -7715,7 +7856,9 @@ fold_truth_andor_maybe_separate (location_t loc,
if (volatilep
|| ll_reversep != rl_reversep
|| ll_inner == 0 || rl_inner == 0
- || ! operand_equal_p (ll_inner, rl_inner, 0))
+ || ! operand_equal_p (ll_inner, rl_inner, 0)
+ || (ll_load && rl_load
+ && ! (l_mergeable = mergeable_loads_p (ll_load, rl_load))))
return 0;
if (TREE_CODE (lr_arg) == INTEGER_CST
@@ -7726,7 +7869,9 @@ fold_truth_andor_maybe_separate (location_t loc,
}
else if (lr_reversep != rr_reversep
|| lr_inner == 0 || rr_inner == 0
- || ! operand_equal_p (lr_inner, rr_inner, 0))
+ || ! operand_equal_p (lr_inner, rr_inner, 0)
+ || (lr_load && rr_load
+ && ! (r_mergeable = mergeable_loads_p (lr_load, rr_load))))
return 0;
else
l_const = r_const = 0;
@@ -8035,18 +8180,38 @@ fold_truth_andor_maybe_separate (location_t loc,
{
bitpos[1][0] = rnbitpos;
bitsiz[1][0] = rnbitsize;
- ld_arg[1][0] = make_bit_field_ref (loc, lr_inner, lr_arg,
- rntype, rnbitsize, rnbitpos,
- lr_unsignedp || rr_unsignedp,
- lr_reversep);
+ ld_arg[1][0] = make_bit_field_load (loc, lr_inner, lr_arg,
+ rntype, rnbitsize, rnbitpos,
+ lr_unsignedp || rr_unsignedp,
+ lr_reversep,
+ r_mergeable == 0
+ ? NULL
+ : r_mergeable < 0
+ ? lr_load
+ : rr_load);
}
if (parts > 1)
{
if (r_split_load)
- build_split_load (ld_arg[1], bitpos[1], bitsiz[1], toshift[1],
- shifted[1], loc, lr_inner, lr_arg,
- rnmode, rnmode2, rnbitpos, lr_reversep);
+ {
+ gimple *point[2];
+ if (r_mergeable == 0)
+ point[0] = point[1] = NULL;
+ else if (r_mergeable < 0)
+ {
+ point[0] = lr_load;
+ point[1] = rr_load;
+ }
+ else
+ {
+ point[0] = rr_load;
+ point[1] = lr_load;
+ }
+ build_split_load (ld_arg[1], bitpos[1], bitsiz[1], toshift[1],
+ shifted[1], loc, lr_inner, lr_arg,
+ rnmode, rnmode2, rnbitpos, lr_reversep, point);
+ }
else
reuse_split_load (ld_arg[1], bitpos[1], bitsiz[1], toshift[1],
shifted[1], xmask[1],
@@ -8118,18 +8283,38 @@ fold_truth_andor_maybe_separate (location_t loc,
{
bitpos[0][0] = lnbitpos;
bitsiz[0][0] = lnbitsize;
- ld_arg[0][0] = make_bit_field_ref (loc, ll_inner, ll_arg,
- lntype, lnbitsize, lnbitpos,
- ll_unsignedp || rl_unsignedp,
- ll_reversep);
+ ld_arg[0][0] = make_bit_field_load (loc, ll_inner, ll_arg,
+ lntype, lnbitsize, lnbitpos,
+ ll_unsignedp || rl_unsignedp,
+ ll_reversep,
+ l_mergeable == 0
+ ? NULL
+ : l_mergeable < 0
+ ? ll_load
+ : rl_load);
}
if (parts > 1)
{
if (l_split_load)
- build_split_load (ld_arg[0], bitpos[0], bitsiz[0], toshift[0],
- shifted[0], loc, ll_inner, ll_arg,
- lnmode, lnmode2, lnbitpos, ll_reversep);
+ {
+ gimple *point[2];
+ if (l_mergeable == 0)
+ point[0] = point[1] = NULL;
+ else if (l_mergeable < 0)
+ {
+ point[0] = ll_load;
+ point[1] = rl_load;
+ }
+ else
+ {
+ point[0] = rl_load;
+ point[1] = ll_load;
+ }
+ build_split_load (ld_arg[0], bitpos[0], bitsiz[0], toshift[0],
+ shifted[0], loc, ll_inner, ll_arg,
+ lnmode, lnmode2, lnbitpos, ll_reversep, point);
+ }
else
reuse_split_load (ld_arg[0], bitpos[0], bitsiz[0], toshift[0],
shifted[0], xmask[0],
^ permalink raw reply [flat|nested] 9+ messages in thread
* [gcc(refs/users/aoliva/heads/testme)] check for mergeable loads, choose insertion points accordingly
@ 2023-08-18 3:55 Alexandre Oliva
0 siblings, 0 replies; 9+ messages in thread
From: Alexandre Oliva @ 2023-08-18 3:55 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:a58547af9f6aa1a9b3c5c4e32f899880fcc5abf5
commit a58547af9f6aa1a9b3c5c4e32f899880fcc5abf5
Author: Alexandre Oliva <oliva@gnu.org>
Date: Thu Jul 27 05:15:20 2023 -0300
check for mergeable loads, choose insertion points accordingly
Diff:
---
gcc/gimple-fold.cc | 253 ++++++++++++++++++++++++++++++++++++++++++++++-------
1 file changed, 219 insertions(+), 34 deletions(-)
diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
index a12cea549a65..31156e23e862 100644
--- a/gcc/gimple-fold.cc
+++ b/gcc/gimple-fold.cc
@@ -69,6 +69,7 @@ along with GCC; see the file COPYING3. If not see
#include "varasm.h"
#include "internal-fn.h"
#include "gimple-range.h"
+#include "tree-ssa-loop-niter.h" // stmt_dominates_stmt_p
/* ??? Move this to some header, it's defined in fold-const.c. */
extern tree
@@ -7077,7 +7078,7 @@ maybe_fold_comparisons_from_match_pd (tree type, enum tree_code code,
Same as ssa_is_replaceable_p, except that we don't insist it has a
single use. */
-bool
+static bool
ssa_is_substitutable_p (gimple *stmt)
{
#if 0
@@ -7158,9 +7159,10 @@ is_cast_p (tree *name)
if (gimple_num_ops (def) != 2)
break;
- if (get_gimple_rhs_class (gimple_expr_code (def))
- == GIMPLE_SINGLE_RHS)
+ if (gimple_assign_single_p (def))
{
+ if (gimple_assign_load_p (def))
+ break;
*name = gimple_assign_rhs1 (def);
continue;
}
@@ -7197,8 +7199,7 @@ is_binop_p (enum tree_code code, tree *name)
return 0;
case 2:
- if (get_gimple_rhs_class (gimple_expr_code (def))
- == GIMPLE_SINGLE_RHS)
+ if (gimple_assign_single_p (def) && !gimple_assign_load_p (def))
{
*name = gimple_assign_rhs1 (def);
continue;
@@ -7206,7 +7207,7 @@ is_binop_p (enum tree_code code, tree *name)
return 0;
case 3:
- ;
+ break;
}
if (gimple_assign_rhs_code (def) != code)
@@ -7251,6 +7252,26 @@ prepare_xor (tree l_arg, tree *r_arg)
return ret;
}
+/* If EXP is a SSA_NAME whose DEF is a load stmt, set *LOAD to it and
+ return its RHS, otherwise return EXP. */
+
+static tree
+follow_load (tree exp, gimple **load)
+{
+ if (TREE_CODE (exp) == SSA_NAME
+ && !SSA_NAME_IS_DEFAULT_DEF (exp))
+ {
+ gimple *def = SSA_NAME_DEF_STMT (exp);
+ if (gimple_assign_load_p (def))
+ {
+ *load = def;
+ exp = gimple_assign_rhs1 (def);
+ }
+ }
+
+ return exp;
+}
+
/* Subroutine for fold_truth_andor_1: decode a field reference.
If EXP is a comparison reference, we return the innermost reference.
@@ -7277,6 +7298,9 @@ prepare_xor (tree l_arg, tree *r_arg)
BIT_XOR_EXPR compared with zero. We're to take the first or second
operand thereof if so. It should be zero otherwise.
+ *LOAD is set to the load stmt of the innermost reference, if any,
+ *and NULL otherwise.
+
Return 0 if this is not a component reference or is one that we can't
do anything with. */
@@ -7284,7 +7308,8 @@ static tree
decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
HOST_WIDE_INT *pbitpos, machine_mode *pmode,
int *punsignedp, int *preversep, int *pvolatilep,
- tree *pmask, tree *pand_mask, int xor_which)
+ tree *pmask, tree *pand_mask, int xor_which,
+ gimple **load)
{
tree exp = *exp_;
tree outer_type = 0;
@@ -7294,11 +7319,13 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
unsigned int precision;
HOST_WIDE_INT shiftrt = 0;
+ *load = NULL;
+
/* All the optimizations using this function assume integer fields.
There are problems with FP fields since the type_for_size call
below can fail for, e.g., XFmode. */
if (! INTEGRAL_TYPE_P (TREE_TYPE (exp)))
- return 0;
+ return NULL_TREE;
/* We are interested in the bare arrangement of bits, so strip everything
that doesn't affect the machine mode. However, record the type of the
@@ -7308,7 +7335,7 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
if ((and_mask = is_binop_p (BIT_AND_EXPR, &exp)))
{
if (TREE_CODE (and_mask) != INTEGER_CST)
- return 0;
+ return NULL_TREE;
}
if (xor_which)
@@ -7326,16 +7353,18 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
if (tree shift = is_binop_p (RSHIFT_EXPR, &exp))
{
if (TREE_CODE (shift) != INTEGER_CST || !tree_fits_shwi_p (shift))
- return 0;
+ return NULL_TREE;
shiftrt = tree_to_shwi (shift);
if (shiftrt <= 0)
- return 0;
+ return NULL_TREE;
}
if (tree t = is_cast_p (&exp))
if (!outer_type)
outer_type = t;
+ exp = follow_load (exp, load);
+
poly_int64 poly_bitsize, poly_bitpos;
inner = get_inner_reference (exp, &poly_bitsize, &poly_bitpos, &offset,
pmode, punsignedp, preversep, pvolatilep);
@@ -7484,6 +7513,28 @@ compute_split_boundary_from_align (HOST_WIDE_INT align,
return boundary;
}
+/* Make a bit_field_ref. If POINT is NULL, return the BIT_FIELD_REF.
+ Otherwise, build and insert a load stmt before POINT, and return
+ the SSA_NAME. ??? Rewrite LOAD in terms of the bitfield? */
+
+static tree
+make_bit_field_load (location_t loc, tree inner, tree orig_inner, tree type,
+ HOST_WIDE_INT bitsize, poly_int64 bitpos,
+ int unsignedp, int reversep, gimple *point)
+{
+ tree ref = make_bit_field_ref (loc, unshare_expr (inner),
+ unshare_expr (orig_inner),
+ type, bitsize, bitpos,
+ unsignedp, reversep);
+ if (!point)
+ return ref;
+
+ gimple_stmt_iterator gsi = gsi_for_stmt (point);
+ return force_gimple_operand_gsi (&gsi, ref,
+ true, NULL_TREE,
+ true, GSI_SAME_STMT);
+}
+
/* Initialize ln_arg[0] and ln_arg[1] to a pair of newly-created (at
LOC) loads from INNER (from ORIG_INNER), of modes MODE and MODE2,
respectively, starting at BIT_POS, using reversed endianness if
@@ -7503,7 +7554,8 @@ build_split_load (tree /* out */ ln_arg[2],
HOST_WIDE_INT /* out */ shifted[2],
location_t loc, tree inner, tree orig_inner,
scalar_int_mode mode, scalar_int_mode mode2,
- HOST_WIDE_INT bit_pos, bool reversep)
+ HOST_WIDE_INT bit_pos, bool reversep,
+ gimple *point[2])
{
bitsiz[0] = GET_MODE_BITSIZE (mode);
bitsiz[1] = GET_MODE_BITSIZE (mode2);
@@ -7512,9 +7564,9 @@ build_split_load (tree /* out */ ln_arg[2],
{
tree type = lang_hooks.types.type_for_size (bitsiz[i], 1);
bitpos[i] = bit_pos;
- ln_arg[i] = make_bit_field_ref (loc, inner, orig_inner,
+ ln_arg[i] = make_bit_field_load (loc, inner, orig_inner,
type, bitsiz[i],
- bit_pos, 1, reversep);
+ bit_pos, 1, reversep, point[i]);
bit_pos += bitsiz[i];
}
@@ -7575,6 +7627,87 @@ reuse_split_load (tree /* in[0] out[1] */ ln_arg[2],
}
}
+/* Return nonzero if LSTMT and RSTMT, assumed to load from the same
+ words, can be safely merged into a single load. Return zero if
+ there are intervening stores, or if neither stmt dominates the
+ other. If they are mergeable, return -1 if the merged load should
+ be inserted before LSTMT to dominate both, otherwise return +1. */
+
+static int
+mergeable_loads_p (gimple *lstmt, gimple *rstmt)
+{
+ gimple *stmts[2] = { lstmt, rstmt };
+ int ret;
+
+ if (stmt_dominates_stmt_p (lstmt, rstmt))
+ {
+ ret = -1;
+ stmts[0] = lstmt;
+ stmts[1] = rstmt;
+ }
+ else if (stmt_dominates_stmt_p (rstmt, lstmt))
+ {
+ ret = 1;
+ stmts[1] = lstmt;
+ stmts[0] = rstmt;
+ }
+ else
+ return 0;
+
+ basic_block bbs[2] = { gimple_bb (stmts[0]), gimple_bb (stmts[1]) };
+ if (bbs[0] == bbs[1])
+ {
+ for (gimple_stmt_iterator gsi = gsi_for_stmt (stmts[0]);
+ gsi_stmt (gsi) != stmts[1]; gsi_next (&gsi))
+ /* ??? For now, stop if we find any intervening vdefs. */
+ if (gimple_vdef (gsi_stmt (gsi)))
+ return 0;
+ return ret;
+ }
+
+ for (gimple_stmt_iterator gsi = gsi_for_stmt (stmts[0]);
+ !gsi_end_p (gsi); gsi_next (&gsi))
+ /* ??? For now, stop if we find any intervening vdefs. */
+ if (gimple_vdef (gsi_stmt (gsi)))
+ return 0;
+
+ for (gphi_iterator gpi = gsi_start_phis (bbs[1]);
+ !gsi_end_p (gpi); gsi_next (&gpi))
+ /* ??? For now, stop if we find any intervening vdefs. */
+ if (gimple_vdef (gsi_stmt (gpi)))
+ return 0;
+
+ for (gimple_stmt_iterator gsi = gsi_for_stmt (stmts[1]);
+ !gsi_end_p (gsi); gsi_prev (&gsi))
+ /* ??? For now, stop if we find any intervening vdefs. */
+ if (gimple_vdef (gsi_stmt (gsi)))
+ return 0;
+
+ for (basic_block bb = bbs[1]; bb != bbs[0]; )
+ {
+ /* ??? For now, stop if any visited block has more than one
+ predecessor. */
+ if (!single_pred_p (bb))
+ return 0;
+
+ bb = single_pred (bb);
+
+ for (gphi_iterator gpi = gsi_start_phis (bbs[1]);
+ !gsi_end_p (gpi); gsi_next (&gpi))
+ /* ??? For now, stop if we find any intervening vdefs. */
+ if (gimple_vdef (gsi_stmt (gpi)))
+ return 0;
+
+ for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
+ !gsi_end_p (gsi); gsi_next (&gsi))
+ /* ??? For now, stop if we find any intervening vdefs. */
+ if (gimple_vdef (gsi_stmt (gsi)))
+ return 0;
+ }
+
+ return ret;
+}
+
/* Find ways of folding logical expressions of LHS and RHS:
Try to merge two comparisons to the same innermost item.
Look for range tests like "ch >= '0' && ch <= '9'".
@@ -7626,6 +7759,8 @@ fold_truth_andor_maybe_separate (location_t loc,
enum tree_code orig_code = code;
enum tree_code wanted_code;
tree ll_inner, lr_inner, rl_inner, rr_inner;
+ gimple *ll_load, *lr_load, *rl_load, *rr_load;
+ int l_mergeable = 0, r_mergeable = 0;
HOST_WIDE_INT ll_bitsize, ll_bitpos, lr_bitsize, lr_bitpos;
HOST_WIDE_INT rl_bitsize, rl_bitpos, rr_bitsize, rr_bitpos;
HOST_WIDE_INT xll_bitpos, xlr_bitpos, xrl_bitpos, xrr_bitpos;
@@ -7690,20 +7825,24 @@ fold_truth_andor_maybe_separate (location_t loc,
ll_inner = decode_field_reference (loc, &ll_arg,
&ll_bitsize, &ll_bitpos, &ll_mode,
&ll_unsignedp, &ll_reversep, &volatilep,
- &ll_mask, &ll_and_mask, l_xor);
+ &ll_mask, &ll_and_mask, l_xor,
+ &ll_load);
lr_inner = decode_field_reference (loc, &lr_arg,
&lr_bitsize, &lr_bitpos, &lr_mode,
&lr_unsignedp, &lr_reversep, &volatilep,
- &lr_mask, &lr_and_mask, 2 * l_xor);
+ &lr_mask, &lr_and_mask, 2 * l_xor,
+ &lr_load);
int r_xor = prepare_xor (rl_arg, &rr_arg);
rl_inner = decode_field_reference (loc, &rl_arg,
&rl_bitsize, &rl_bitpos, &rl_mode,
&rl_unsignedp, &rl_reversep, &volatilep,
- &rl_mask, &rl_and_mask, r_xor);
+ &rl_mask, &rl_and_mask, r_xor,
+ &rl_load);
rr_inner = decode_field_reference (loc, &rr_arg,
&rr_bitsize, &rr_bitpos, &rr_mode,
&rr_unsignedp, &rr_reversep, &volatilep,
- &rr_mask, &rr_and_mask, 2 * r_xor);
+ &rr_mask, &rr_and_mask, 2 * r_xor,
+ &rr_load);
/* It must be true that the inner operation on the lhs of each
comparison must be the same if we are to be able to do anything.
@@ -7712,7 +7851,9 @@ fold_truth_andor_maybe_separate (location_t loc,
if (volatilep
|| ll_reversep != rl_reversep
|| ll_inner == 0 || rl_inner == 0
- || ! operand_equal_p (ll_inner, rl_inner, 0))
+ || ! operand_equal_p (ll_inner, rl_inner, 0)
+ || (ll_load && rl_load
+ && ! (l_mergeable = mergeable_loads_p (ll_load, rl_load))))
return 0;
if (TREE_CODE (lr_arg) == INTEGER_CST
@@ -7723,7 +7864,9 @@ fold_truth_andor_maybe_separate (location_t loc,
}
else if (lr_reversep != rr_reversep
|| lr_inner == 0 || rr_inner == 0
- || ! operand_equal_p (lr_inner, rr_inner, 0))
+ || ! operand_equal_p (lr_inner, rr_inner, 0)
+ || (lr_load && rr_load
+ && ! (r_mergeable = mergeable_loads_p (lr_load, rr_load))))
return 0;
else
l_const = r_const = 0;
@@ -8032,18 +8175,38 @@ fold_truth_andor_maybe_separate (location_t loc,
{
bitpos[1][0] = rnbitpos;
bitsiz[1][0] = rnbitsize;
- ld_arg[1][0] = make_bit_field_ref (loc, lr_inner, lr_arg,
- rntype, rnbitsize, rnbitpos,
- lr_unsignedp || rr_unsignedp,
- lr_reversep);
+ ld_arg[1][0] = make_bit_field_load (loc, lr_inner, lr_arg,
+ rntype, rnbitsize, rnbitpos,
+ lr_unsignedp || rr_unsignedp,
+ lr_reversep,
+ r_mergeable == 0
+ ? NULL
+ : r_mergeable < 0
+ ? lr_load
+ : rr_load);
}
if (parts > 1)
{
if (r_split_load)
- build_split_load (ld_arg[1], bitpos[1], bitsiz[1], toshift[1],
- shifted[1], loc, lr_inner, lr_arg,
- rnmode, rnmode2, rnbitpos, lr_reversep);
+ {
+ gimple *point[2];
+ if (r_mergeable == 0)
+ point[0] = point[1] = NULL;
+ else if (r_mergeable < 0)
+ {
+ point[0] = lr_load;
+ point[1] = rr_load;
+ }
+ else
+ {
+ point[0] = rr_load;
+ point[1] = lr_load;
+ }
+ build_split_load (ld_arg[1], bitpos[1], bitsiz[1], toshift[1],
+ shifted[1], loc, lr_inner, lr_arg,
+ rnmode, rnmode2, rnbitpos, lr_reversep, point);
+ }
else
reuse_split_load (ld_arg[1], bitpos[1], bitsiz[1], toshift[1],
shifted[1], xmask[1],
@@ -8115,18 +8278,38 @@ fold_truth_andor_maybe_separate (location_t loc,
{
bitpos[0][0] = lnbitpos;
bitsiz[0][0] = lnbitsize;
- ld_arg[0][0] = make_bit_field_ref (loc, ll_inner, ll_arg,
- lntype, lnbitsize, lnbitpos,
- ll_unsignedp || rl_unsignedp,
- ll_reversep);
+ ld_arg[0][0] = make_bit_field_load (loc, ll_inner, ll_arg,
+ lntype, lnbitsize, lnbitpos,
+ ll_unsignedp || rl_unsignedp,
+ ll_reversep,
+ l_mergeable == 0
+ ? NULL
+ : l_mergeable < 0
+ ? ll_load
+ : rl_load);
}
if (parts > 1)
{
if (l_split_load)
- build_split_load (ld_arg[0], bitpos[0], bitsiz[0], toshift[0],
- shifted[0], loc, ll_inner, ll_arg,
- lnmode, lnmode2, lnbitpos, ll_reversep);
+ {
+ gimple *point[2];
+ if (l_mergeable == 0)
+ point[0] = point[1] = NULL;
+ else if (l_mergeable < 0)
+ {
+ point[0] = ll_load;
+ point[1] = rl_load;
+ }
+ else
+ {
+ point[0] = rl_load;
+ point[1] = ll_load;
+ }
+ build_split_load (ld_arg[0], bitpos[0], bitsiz[0], toshift[0],
+ shifted[0], loc, ll_inner, ll_arg,
+ lnmode, lnmode2, lnbitpos, ll_reversep, point);
+ }
else
reuse_split_load (ld_arg[0], bitpos[0], bitsiz[0], toshift[0],
shifted[0], xmask[0],
@@ -8141,6 +8324,8 @@ fold_truth_andor_maybe_separate (location_t loc,
for (int j = 0; j < 2; j++)
{
+ op[j] = unshare_expr (op[j]);
+
/* Mask out the bits belonging to the other part. */
if (xmask[j][i])
mask[j] = int_const_binop (BIT_AND_EXPR, mask[j], xmask[j][i]);
^ permalink raw reply [flat|nested] 9+ messages in thread
* [gcc(refs/users/aoliva/heads/testme)] check for mergeable loads, choose insertion points accordingly
@ 2023-07-27 12:20 Alexandre Oliva
0 siblings, 0 replies; 9+ messages in thread
From: Alexandre Oliva @ 2023-07-27 12:20 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:c5f6afb8f094b35311036f224ccbdc6f5d0f5328
commit c5f6afb8f094b35311036f224ccbdc6f5d0f5328
Author: Alexandre Oliva <oliva@gnu.org>
Date: Thu Jul 27 05:15:20 2023 -0300
check for mergeable loads, choose insertion points accordingly
Diff:
---
gcc/gimple-fold.cc | 253 ++++++++++++++++++++++++++++++++++++++++++++++-------
1 file changed, 219 insertions(+), 34 deletions(-)
diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
index f65922309fb..41fb802803f 100644
--- a/gcc/gimple-fold.cc
+++ b/gcc/gimple-fold.cc
@@ -69,6 +69,7 @@ along with GCC; see the file COPYING3. If not see
#include "varasm.h"
#include "internal-fn.h"
#include "gimple-range.h"
+#include "tree-ssa-loop-niter.h" // stmt_dominates_stmt_p
/* ??? Move this to some header, it's defined in fold-const.c. */
extern tree
@@ -7080,7 +7081,7 @@ maybe_fold_comparisons_from_match_pd (tree type, enum tree_code code,
Same as ssa_is_replaceable_p, except that we don't insist it has a
single use. */
-bool
+static bool
ssa_is_substitutable_p (gimple *stmt)
{
#if 0
@@ -7161,9 +7162,10 @@ is_cast_p (tree *name)
if (gimple_num_ops (def) != 2)
break;
- if (get_gimple_rhs_class (gimple_expr_code (def))
- == GIMPLE_SINGLE_RHS)
+ if (gimple_assign_single_p (def))
{
+ if (gimple_assign_load_p (def))
+ break;
*name = gimple_assign_rhs1 (def);
continue;
}
@@ -7200,8 +7202,7 @@ is_binop_p (enum tree_code code, tree *name)
return 0;
case 2:
- if (get_gimple_rhs_class (gimple_expr_code (def))
- == GIMPLE_SINGLE_RHS)
+ if (gimple_assign_single_p (def) && !gimple_assign_load_p (def))
{
*name = gimple_assign_rhs1 (def);
continue;
@@ -7209,7 +7210,7 @@ is_binop_p (enum tree_code code, tree *name)
return 0;
case 3:
- ;
+ break;
}
if (gimple_assign_rhs_code (def) != code)
@@ -7254,6 +7255,26 @@ prepare_xor (tree l_arg, tree *r_arg)
return ret;
}
+/* If EXP is a SSA_NAME whose DEF is a load stmt, set *LOAD to it and
+ return its RHS, otherwise return EXP. */
+
+static tree
+follow_load (tree exp, gimple **load)
+{
+ if (TREE_CODE (exp) == SSA_NAME
+ && !SSA_NAME_IS_DEFAULT_DEF (exp))
+ {
+ gimple *def = SSA_NAME_DEF_STMT (exp);
+ if (gimple_assign_load_p (def))
+ {
+ *load = def;
+ exp = gimple_assign_rhs1 (def);
+ }
+ }
+
+ return exp;
+}
+
/* Subroutine for fold_truth_andor_1: decode a field reference.
If EXP is a comparison reference, we return the innermost reference.
@@ -7280,6 +7301,9 @@ prepare_xor (tree l_arg, tree *r_arg)
BIT_XOR_EXPR compared with zero. We're to take the first or second
operand thereof if so. It should be zero otherwise.
+ *LOAD is set to the load stmt of the innermost reference, if any,
+ *and NULL otherwise.
+
Return 0 if this is not a component reference or is one that we can't
do anything with. */
@@ -7287,7 +7311,8 @@ static tree
decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
HOST_WIDE_INT *pbitpos, machine_mode *pmode,
int *punsignedp, int *preversep, int *pvolatilep,
- tree *pmask, tree *pand_mask, int xor_which)
+ tree *pmask, tree *pand_mask, int xor_which,
+ gimple **load)
{
tree exp = *exp_;
tree outer_type = 0;
@@ -7297,11 +7322,13 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
unsigned int precision;
HOST_WIDE_INT shiftrt = 0;
+ *load = NULL;
+
/* All the optimizations using this function assume integer fields.
There are problems with FP fields since the type_for_size call
below can fail for, e.g., XFmode. */
if (! INTEGRAL_TYPE_P (TREE_TYPE (exp)))
- return 0;
+ return NULL_TREE;
/* We are interested in the bare arrangement of bits, so strip everything
that doesn't affect the machine mode. However, record the type of the
@@ -7311,7 +7338,7 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
if ((and_mask = is_binop_p (BIT_AND_EXPR, &exp)))
{
if (TREE_CODE (and_mask) != INTEGER_CST)
- return 0;
+ return NULL_TREE;
}
if (xor_which)
@@ -7329,16 +7356,18 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
if (tree shift = is_binop_p (RSHIFT_EXPR, &exp))
{
if (TREE_CODE (shift) != INTEGER_CST || !tree_fits_shwi_p (shift))
- return 0;
+ return NULL_TREE;
shiftrt = tree_to_shwi (shift);
if (shiftrt <= 0)
- return 0;
+ return NULL_TREE;
}
if (tree t = is_cast_p (&exp))
if (!outer_type)
outer_type = t;
+ exp = follow_load (exp, load);
+
poly_int64 poly_bitsize, poly_bitpos;
inner = get_inner_reference (exp, &poly_bitsize, &poly_bitpos, &offset,
pmode, punsignedp, preversep, pvolatilep);
@@ -7487,6 +7516,28 @@ compute_split_boundary_from_align (HOST_WIDE_INT align,
return boundary;
}
+/* Make a bit_field_ref. If POINT is NULL, return the BIT_FIELD_REF.
+ Otherwise, build and insert a load stmt before POINT, and return
+ the SSA_NAME. ??? Rewrite LOAD in terms of the bitfield? */
+
+static tree
+make_bit_field_load (location_t loc, tree inner, tree orig_inner, tree type,
+ HOST_WIDE_INT bitsize, poly_int64 bitpos,
+ int unsignedp, int reversep, gimple *point)
+{
+ tree ref = make_bit_field_ref (loc, unshare_expr (inner),
+ unshare_expr (orig_inner),
+ type, bitsize, bitpos,
+ unsignedp, reversep);
+ if (!point)
+ return ref;
+
+ gimple_stmt_iterator gsi = gsi_for_stmt (point);
+ return force_gimple_operand_gsi (&gsi, ref,
+ true, NULL_TREE,
+ true, GSI_SAME_STMT);
+}
+
/* Initialize ln_arg[0] and ln_arg[1] to a pair of newly-created (at
LOC) loads from INNER (from ORIG_INNER), of modes MODE and MODE2,
respectively, starting at BIT_POS, using reversed endianness if
@@ -7506,7 +7557,8 @@ build_split_load (tree /* out */ ln_arg[2],
HOST_WIDE_INT /* out */ shifted[2],
location_t loc, tree inner, tree orig_inner,
scalar_int_mode mode, scalar_int_mode mode2,
- HOST_WIDE_INT bit_pos, bool reversep)
+ HOST_WIDE_INT bit_pos, bool reversep,
+ gimple *point[2])
{
bitsiz[0] = GET_MODE_BITSIZE (mode);
bitsiz[1] = GET_MODE_BITSIZE (mode2);
@@ -7515,9 +7567,9 @@ build_split_load (tree /* out */ ln_arg[2],
{
tree type = lang_hooks.types.type_for_size (bitsiz[i], 1);
bitpos[i] = bit_pos;
- ln_arg[i] = make_bit_field_ref (loc, inner, orig_inner,
+ ln_arg[i] = make_bit_field_load (loc, inner, orig_inner,
type, bitsiz[i],
- bit_pos, 1, reversep);
+ bit_pos, 1, reversep, point[i]);
bit_pos += bitsiz[i];
}
@@ -7578,6 +7630,87 @@ reuse_split_load (tree /* in[0] out[1] */ ln_arg[2],
}
}
+/* Return nonzero if LSTMT and RSTMT, assumed to load from the same
+ words, can be safely merged into a single load. Return zero if
+ there are intervening stores, or if neither stmt dominates the
+ other. If they are mergeable, return -1 if the merged load should
+ be inserted before LSTMT to dominate both, otherwise return +1. */
+
+static int
+mergeable_loads_p (gimple *lstmt, gimple *rstmt)
+{
+ gimple *stmts[2] = { lstmt, rstmt };
+ int ret;
+
+ if (stmt_dominates_stmt_p (lstmt, rstmt))
+ {
+ ret = -1;
+ stmts[0] = lstmt;
+ stmts[1] = rstmt;
+ }
+ else if (stmt_dominates_stmt_p (rstmt, lstmt))
+ {
+ ret = 1;
+ stmts[1] = lstmt;
+ stmts[0] = rstmt;
+ }
+ else
+ return 0;
+
+ basic_block bbs[2] = { gimple_bb (stmts[0]), gimple_bb (stmts[1]) };
+ if (bbs[0] == bbs[1])
+ {
+ for (gimple_stmt_iterator gsi = gsi_for_stmt (stmts[0]);
+ gsi_stmt (gsi) != stmts[1]; gsi_next (&gsi))
+ /* ??? For now, stop if we find any intervening vdefs. */
+ if (gimple_vdef (gsi_stmt (gsi)))
+ return 0;
+ return ret;
+ }
+
+ for (gimple_stmt_iterator gsi = gsi_for_stmt (stmts[0]);
+ !gsi_end_p (gsi); gsi_next (&gsi))
+ /* ??? For now, stop if we find any intervening vdefs. */
+ if (gimple_vdef (gsi_stmt (gsi)))
+ return 0;
+
+ for (gphi_iterator gpi = gsi_start_phis (bbs[1]);
+ !gsi_end_p (gpi); gsi_next (&gpi))
+ /* ??? For now, stop if we find any intervening vdefs. */
+ if (gimple_vdef (gsi_stmt (gpi)))
+ return 0;
+
+ for (gimple_stmt_iterator gsi = gsi_for_stmt (stmts[1]);
+ !gsi_end_p (gsi); gsi_prev (&gsi))
+ /* ??? For now, stop if we find any intervening vdefs. */
+ if (gimple_vdef (gsi_stmt (gsi)))
+ return 0;
+
+ for (basic_block bb = bbs[1]; bb != bbs[0]; )
+ {
+ /* ??? For now, stop if any visited block has more than one
+ predecessor. */
+ if (!single_pred_p (bb))
+ return 0;
+
+ bb = single_pred (bb);
+
+ for (gphi_iterator gpi = gsi_start_phis (bbs[1]);
+ !gsi_end_p (gpi); gsi_next (&gpi))
+ /* ??? For now, stop if we find any intervening vdefs. */
+ if (gimple_vdef (gsi_stmt (gpi)))
+ return 0;
+
+ for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
+ !gsi_end_p (gsi); gsi_next (&gsi))
+ /* ??? For now, stop if we find any intervening vdefs. */
+ if (gimple_vdef (gsi_stmt (gsi)))
+ return 0;
+ }
+
+ return ret;
+}
+
/* Find ways of folding logical expressions of LHS and RHS:
Try to merge two comparisons to the same innermost item.
Look for range tests like "ch >= '0' && ch <= '9'".
@@ -7629,6 +7762,8 @@ fold_truth_andor_maybe_separate (location_t loc,
enum tree_code orig_code = code;
enum tree_code wanted_code;
tree ll_inner, lr_inner, rl_inner, rr_inner;
+ gimple *ll_load, *lr_load, *rl_load, *rr_load;
+ int l_mergeable = 0, r_mergeable = 0;
HOST_WIDE_INT ll_bitsize, ll_bitpos, lr_bitsize, lr_bitpos;
HOST_WIDE_INT rl_bitsize, rl_bitpos, rr_bitsize, rr_bitpos;
HOST_WIDE_INT xll_bitpos, xlr_bitpos, xrl_bitpos, xrr_bitpos;
@@ -7693,20 +7828,24 @@ fold_truth_andor_maybe_separate (location_t loc,
ll_inner = decode_field_reference (loc, &ll_arg,
&ll_bitsize, &ll_bitpos, &ll_mode,
&ll_unsignedp, &ll_reversep, &volatilep,
- &ll_mask, &ll_and_mask, l_xor);
+ &ll_mask, &ll_and_mask, l_xor,
+ &ll_load);
lr_inner = decode_field_reference (loc, &lr_arg,
&lr_bitsize, &lr_bitpos, &lr_mode,
&lr_unsignedp, &lr_reversep, &volatilep,
- &lr_mask, &lr_and_mask, 2 * l_xor);
+ &lr_mask, &lr_and_mask, 2 * l_xor,
+ &lr_load);
int r_xor = prepare_xor (rl_arg, &rr_arg);
rl_inner = decode_field_reference (loc, &rl_arg,
&rl_bitsize, &rl_bitpos, &rl_mode,
&rl_unsignedp, &rl_reversep, &volatilep,
- &rl_mask, &rl_and_mask, r_xor);
+ &rl_mask, &rl_and_mask, r_xor,
+ &rl_load);
rr_inner = decode_field_reference (loc, &rr_arg,
&rr_bitsize, &rr_bitpos, &rr_mode,
&rr_unsignedp, &rr_reversep, &volatilep,
- &rr_mask, &rr_and_mask, 2 * r_xor);
+ &rr_mask, &rr_and_mask, 2 * r_xor,
+ &rr_load);
/* It must be true that the inner operation on the lhs of each
comparison must be the same if we are to be able to do anything.
@@ -7715,7 +7854,9 @@ fold_truth_andor_maybe_separate (location_t loc,
if (volatilep
|| ll_reversep != rl_reversep
|| ll_inner == 0 || rl_inner == 0
- || ! operand_equal_p (ll_inner, rl_inner, 0))
+ || ! operand_equal_p (ll_inner, rl_inner, 0)
+ || (ll_load && rl_load
+ && ! (l_mergeable = mergeable_loads_p (ll_load, rl_load))))
return 0;
if (TREE_CODE (lr_arg) == INTEGER_CST
@@ -7726,7 +7867,9 @@ fold_truth_andor_maybe_separate (location_t loc,
}
else if (lr_reversep != rr_reversep
|| lr_inner == 0 || rr_inner == 0
- || ! operand_equal_p (lr_inner, rr_inner, 0))
+ || ! operand_equal_p (lr_inner, rr_inner, 0)
+ || (lr_load && rr_load
+ && ! (r_mergeable = mergeable_loads_p (lr_load, rr_load))))
return 0;
else
l_const = r_const = 0;
@@ -8035,18 +8178,38 @@ fold_truth_andor_maybe_separate (location_t loc,
{
bitpos[1][0] = rnbitpos;
bitsiz[1][0] = rnbitsize;
- ld_arg[1][0] = make_bit_field_ref (loc, lr_inner, lr_arg,
- rntype, rnbitsize, rnbitpos,
- lr_unsignedp || rr_unsignedp,
- lr_reversep);
+ ld_arg[1][0] = make_bit_field_load (loc, lr_inner, lr_arg,
+ rntype, rnbitsize, rnbitpos,
+ lr_unsignedp || rr_unsignedp,
+ lr_reversep,
+ r_mergeable == 0
+ ? NULL
+ : r_mergeable < 0
+ ? lr_load
+ : rr_load);
}
if (parts > 1)
{
if (r_split_load)
- build_split_load (ld_arg[1], bitpos[1], bitsiz[1], toshift[1],
- shifted[1], loc, lr_inner, lr_arg,
- rnmode, rnmode2, rnbitpos, lr_reversep);
+ {
+ gimple *point[2];
+ if (r_mergeable == 0)
+ point[0] = point[1] = NULL;
+ else if (r_mergeable < 0)
+ {
+ point[0] = lr_load;
+ point[1] = rr_load;
+ }
+ else
+ {
+ point[0] = rr_load;
+ point[1] = lr_load;
+ }
+ build_split_load (ld_arg[1], bitpos[1], bitsiz[1], toshift[1],
+ shifted[1], loc, lr_inner, lr_arg,
+ rnmode, rnmode2, rnbitpos, lr_reversep, point);
+ }
else
reuse_split_load (ld_arg[1], bitpos[1], bitsiz[1], toshift[1],
shifted[1], xmask[1],
@@ -8118,18 +8281,38 @@ fold_truth_andor_maybe_separate (location_t loc,
{
bitpos[0][0] = lnbitpos;
bitsiz[0][0] = lnbitsize;
- ld_arg[0][0] = make_bit_field_ref (loc, ll_inner, ll_arg,
- lntype, lnbitsize, lnbitpos,
- ll_unsignedp || rl_unsignedp,
- ll_reversep);
+ ld_arg[0][0] = make_bit_field_load (loc, ll_inner, ll_arg,
+ lntype, lnbitsize, lnbitpos,
+ ll_unsignedp || rl_unsignedp,
+ ll_reversep,
+ l_mergeable == 0
+ ? NULL
+ : l_mergeable < 0
+ ? ll_load
+ : rl_load);
}
if (parts > 1)
{
if (l_split_load)
- build_split_load (ld_arg[0], bitpos[0], bitsiz[0], toshift[0],
- shifted[0], loc, ll_inner, ll_arg,
- lnmode, lnmode2, lnbitpos, ll_reversep);
+ {
+ gimple *point[2];
+ if (l_mergeable == 0)
+ point[0] = point[1] = NULL;
+ else if (l_mergeable < 0)
+ {
+ point[0] = ll_load;
+ point[1] = rl_load;
+ }
+ else
+ {
+ point[0] = rl_load;
+ point[1] = ll_load;
+ }
+ build_split_load (ld_arg[0], bitpos[0], bitsiz[0], toshift[0],
+ shifted[0], loc, ll_inner, ll_arg,
+ lnmode, lnmode2, lnbitpos, ll_reversep, point);
+ }
else
reuse_split_load (ld_arg[0], bitpos[0], bitsiz[0], toshift[0],
shifted[0], xmask[0],
@@ -8144,6 +8327,8 @@ fold_truth_andor_maybe_separate (location_t loc,
for (int j = 0; j < 2; j++)
{
+ op[j] = unshare_expr (op[j]);
+
/* Mask out the bits belonging to the other part. */
if (xmask[j][i])
mask[j] = int_const_binop (BIT_AND_EXPR, mask[j], xmask[j][i]);
^ permalink raw reply [flat|nested] 9+ messages in thread
* [gcc(refs/users/aoliva/heads/testme)] check for mergeable loads, choose insertion points accordingly
@ 2023-07-27 11:52 Alexandre Oliva
0 siblings, 0 replies; 9+ messages in thread
From: Alexandre Oliva @ 2023-07-27 11:52 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:865c0dab37e34a394617a8260fd7ad520f49b73a
commit 865c0dab37e34a394617a8260fd7ad520f49b73a
Author: Alexandre Oliva <oliva@gnu.org>
Date: Thu Jul 27 05:15:20 2023 -0300
check for mergeable loads, choose insertion points accordingly
Diff:
---
gcc/gimple-fold.cc | 256 ++++++++++++++++++++++++++++++++++++++++++++++-------
1 file changed, 222 insertions(+), 34 deletions(-)
diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
index f65922309fb..160ab2c75fe 100644
--- a/gcc/gimple-fold.cc
+++ b/gcc/gimple-fold.cc
@@ -69,6 +69,7 @@ along with GCC; see the file COPYING3. If not see
#include "varasm.h"
#include "internal-fn.h"
#include "gimple-range.h"
+#include "tree-ssa-loop-niter.h" // stmt_dominates_stmt_p
/* ??? Move this to some header, it's defined in fold-const.c. */
extern tree
@@ -7080,7 +7081,7 @@ maybe_fold_comparisons_from_match_pd (tree type, enum tree_code code,
Same as ssa_is_replaceable_p, except that we don't insist it has a
single use. */
-bool
+static bool
ssa_is_substitutable_p (gimple *stmt)
{
#if 0
@@ -7161,9 +7162,10 @@ is_cast_p (tree *name)
if (gimple_num_ops (def) != 2)
break;
- if (get_gimple_rhs_class (gimple_expr_code (def))
- == GIMPLE_SINGLE_RHS)
+ if (gimple_assign_single_p (def))
{
+ if (gimple_assign_load_p (def))
+ break;
*name = gimple_assign_rhs1 (def);
continue;
}
@@ -7200,8 +7202,7 @@ is_binop_p (enum tree_code code, tree *name)
return 0;
case 2:
- if (get_gimple_rhs_class (gimple_expr_code (def))
- == GIMPLE_SINGLE_RHS)
+ if (gimple_assign_single_p (def) && !gimple_assign_load_p (def))
{
*name = gimple_assign_rhs1 (def);
continue;
@@ -7209,7 +7210,7 @@ is_binop_p (enum tree_code code, tree *name)
return 0;
case 3:
- ;
+ break;
}
if (gimple_assign_rhs_code (def) != code)
@@ -7254,6 +7255,26 @@ prepare_xor (tree l_arg, tree *r_arg)
return ret;
}
+/* If EXP is a SSA_NAME whose DEF is a load stmt, set *LOAD to it and
+ return its RHS, otherwise return EXP. */
+
+static tree
+follow_load (tree exp, gimple **load)
+{
+ if (TREE_CODE (exp) == SSA_NAME
+ && !SSA_NAME_IS_DEFAULT_DEF (exp))
+ {
+ gimple *def = SSA_NAME_DEF_STMT (exp);
+ if (gimple_assign_load_p (def))
+ {
+ *load = def;
+ exp = gimple_assign_rhs1 (def);
+ }
+ }
+
+ return exp;
+}
+
/* Subroutine for fold_truth_andor_1: decode a field reference.
If EXP is a comparison reference, we return the innermost reference.
@@ -7280,6 +7301,9 @@ prepare_xor (tree l_arg, tree *r_arg)
BIT_XOR_EXPR compared with zero. We're to take the first or second
operand thereof if so. It should be zero otherwise.
+ *LOAD is set to the load stmt of the innermost reference, if any,
+ *and NULL otherwise.
+
Return 0 if this is not a component reference or is one that we can't
do anything with. */
@@ -7287,7 +7311,8 @@ static tree
decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
HOST_WIDE_INT *pbitpos, machine_mode *pmode,
int *punsignedp, int *preversep, int *pvolatilep,
- tree *pmask, tree *pand_mask, int xor_which)
+ tree *pmask, tree *pand_mask, int xor_which,
+ gimple **load)
{
tree exp = *exp_;
tree outer_type = 0;
@@ -7297,11 +7322,13 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
unsigned int precision;
HOST_WIDE_INT shiftrt = 0;
+ *load = NULL;
+
/* All the optimizations using this function assume integer fields.
There are problems with FP fields since the type_for_size call
below can fail for, e.g., XFmode. */
if (! INTEGRAL_TYPE_P (TREE_TYPE (exp)))
- return 0;
+ return NULL_TREE;
/* We are interested in the bare arrangement of bits, so strip everything
that doesn't affect the machine mode. However, record the type of the
@@ -7311,7 +7338,7 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
if ((and_mask = is_binop_p (BIT_AND_EXPR, &exp)))
{
if (TREE_CODE (and_mask) != INTEGER_CST)
- return 0;
+ return NULL_TREE;
}
if (xor_which)
@@ -7329,16 +7356,18 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
if (tree shift = is_binop_p (RSHIFT_EXPR, &exp))
{
if (TREE_CODE (shift) != INTEGER_CST || !tree_fits_shwi_p (shift))
- return 0;
+ return NULL_TREE;
shiftrt = tree_to_shwi (shift);
if (shiftrt <= 0)
- return 0;
+ return NULL_TREE;
}
if (tree t = is_cast_p (&exp))
if (!outer_type)
outer_type = t;
+ exp = follow_load (exp, load);
+
poly_int64 poly_bitsize, poly_bitpos;
inner = get_inner_reference (exp, &poly_bitsize, &poly_bitpos, &offset,
pmode, punsignedp, preversep, pvolatilep);
@@ -7487,6 +7516,31 @@ compute_split_boundary_from_align (HOST_WIDE_INT align,
return boundary;
}
+/* Make a bit_field_ref. If POINT is NULL, return the BIT_FIELD_REF.
+ Otherwise, build and insert a load stmt before POINT, and return
+ the SSA_NAME. ??? Rewrite LOAD in terms of the bitfield? */
+
+static tree
+make_bit_field_load (location_t loc, tree inner, tree orig_inner, tree type,
+ HOST_WIDE_INT bitsize, poly_int64 bitpos,
+ int unsignedp, int reversep, gimple *point)
+{
+ tree ref = make_bit_field_ref (loc, unshare_expr (inner),
+ unshare_expr (orig_inner),
+ type, bitsize, bitpos,
+ unsignedp, reversep);
+ if (!point)
+ return ref;
+
+ tree tmp = make_ssa_name (TREE_TYPE (ref));
+ gimple_stmt_iterator gsi = gsi_for_stmt (point);
+ gsi_insert_before (&gsi,
+ gimple_build_assign (tmp, ref),
+ GSI_NEW_STMT);
+
+ return tmp;
+}
+
/* Initialize ln_arg[0] and ln_arg[1] to a pair of newly-created (at
LOC) loads from INNER (from ORIG_INNER), of modes MODE and MODE2,
respectively, starting at BIT_POS, using reversed endianness if
@@ -7506,7 +7560,8 @@ build_split_load (tree /* out */ ln_arg[2],
HOST_WIDE_INT /* out */ shifted[2],
location_t loc, tree inner, tree orig_inner,
scalar_int_mode mode, scalar_int_mode mode2,
- HOST_WIDE_INT bit_pos, bool reversep)
+ HOST_WIDE_INT bit_pos, bool reversep,
+ gimple *point[2])
{
bitsiz[0] = GET_MODE_BITSIZE (mode);
bitsiz[1] = GET_MODE_BITSIZE (mode2);
@@ -7515,9 +7570,9 @@ build_split_load (tree /* out */ ln_arg[2],
{
tree type = lang_hooks.types.type_for_size (bitsiz[i], 1);
bitpos[i] = bit_pos;
- ln_arg[i] = make_bit_field_ref (loc, inner, orig_inner,
+ ln_arg[i] = make_bit_field_load (loc, inner, orig_inner,
type, bitsiz[i],
- bit_pos, 1, reversep);
+ bit_pos, 1, reversep, point[i]);
bit_pos += bitsiz[i];
}
@@ -7578,6 +7633,87 @@ reuse_split_load (tree /* in[0] out[1] */ ln_arg[2],
}
}
+/* Return nonzero if LSTMT and RSTMT, assumed to load from the same
+ words, can be safely merged into a single load. Return zero if
+ there are intervening stores, or if neither stmt dominates the
+ other. If they are mergeable, return -1 if the merged load should
+ be inserted before LSTMT to dominate both, otherwise return +1. */
+
+static int
+mergeable_loads_p (gimple *lstmt, gimple *rstmt)
+{
+ gimple *stmts[2] = { lstmt, rstmt };
+ int ret;
+
+ if (stmt_dominates_stmt_p (lstmt, rstmt))
+ {
+ ret = -1;
+ stmts[0] = lstmt;
+ stmts[1] = rstmt;
+ }
+ else if (stmt_dominates_stmt_p (rstmt, lstmt))
+ {
+ ret = 1;
+ stmts[1] = lstmt;
+ stmts[0] = rstmt;
+ }
+ else
+ return 0;
+
+ basic_block bbs[2] = { gimple_bb (stmts[0]), gimple_bb (stmts[1]) };
+ if (bbs[0] == bbs[1])
+ {
+ for (gimple_stmt_iterator gsi = gsi_for_stmt (stmts[0]);
+ gsi_stmt (gsi) != stmts[1]; gsi_next (&gsi))
+ /* ??? For now, stop if we find any intervening vdefs. */
+ if (gimple_vdef (gsi_stmt (gsi)))
+ return 0;
+ return ret;
+ }
+
+ for (gimple_stmt_iterator gsi = gsi_for_stmt (stmts[0]);
+ !gsi_end_p (gsi); gsi_next (&gsi))
+ /* ??? For now, stop if we find any intervening vdefs. */
+ if (gimple_vdef (gsi_stmt (gsi)))
+ return 0;
+
+ for (gphi_iterator gpi = gsi_start_phis (bbs[1]);
+ !gsi_end_p (gpi); gsi_next (&gpi))
+ /* ??? For now, stop if we find any intervening vdefs. */
+ if (gimple_vdef (gsi_stmt (gpi)))
+ return 0;
+
+ for (gimple_stmt_iterator gsi = gsi_for_stmt (stmts[1]);
+ !gsi_end_p (gsi); gsi_prev (&gsi))
+ /* ??? For now, stop if we find any intervening vdefs. */
+ if (gimple_vdef (gsi_stmt (gsi)))
+ return 0;
+
+ for (basic_block bb = bbs[1]; bb != bbs[0]; )
+ {
+ /* ??? For now, stop if any visited block has more than one
+ predecessor. */
+ if (!single_pred_p (bb))
+ return 0;
+
+ bb = single_pred (bb);
+
+ for (gphi_iterator gpi = gsi_start_phis (bbs[1]);
+ !gsi_end_p (gpi); gsi_next (&gpi))
+ /* ??? For now, stop if we find any intervening vdefs. */
+ if (gimple_vdef (gsi_stmt (gpi)))
+ return 0;
+
+ for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
+ !gsi_end_p (gsi); gsi_next (&gsi))
+ /* ??? For now, stop if we find any intervening vdefs. */
+ if (gimple_vdef (gsi_stmt (gsi)))
+ return 0;
+ }
+
+ return ret;
+}
+
/* Find ways of folding logical expressions of LHS and RHS:
Try to merge two comparisons to the same innermost item.
Look for range tests like "ch >= '0' && ch <= '9'".
@@ -7629,6 +7765,8 @@ fold_truth_andor_maybe_separate (location_t loc,
enum tree_code orig_code = code;
enum tree_code wanted_code;
tree ll_inner, lr_inner, rl_inner, rr_inner;
+ gimple *ll_load, *lr_load, *rl_load, *rr_load;
+ int l_mergeable = 0, r_mergeable = 0;
HOST_WIDE_INT ll_bitsize, ll_bitpos, lr_bitsize, lr_bitpos;
HOST_WIDE_INT rl_bitsize, rl_bitpos, rr_bitsize, rr_bitpos;
HOST_WIDE_INT xll_bitpos, xlr_bitpos, xrl_bitpos, xrr_bitpos;
@@ -7693,20 +7831,24 @@ fold_truth_andor_maybe_separate (location_t loc,
ll_inner = decode_field_reference (loc, &ll_arg,
&ll_bitsize, &ll_bitpos, &ll_mode,
&ll_unsignedp, &ll_reversep, &volatilep,
- &ll_mask, &ll_and_mask, l_xor);
+ &ll_mask, &ll_and_mask, l_xor,
+ &ll_load);
lr_inner = decode_field_reference (loc, &lr_arg,
&lr_bitsize, &lr_bitpos, &lr_mode,
&lr_unsignedp, &lr_reversep, &volatilep,
- &lr_mask, &lr_and_mask, 2 * l_xor);
+ &lr_mask, &lr_and_mask, 2 * l_xor,
+ &lr_load);
int r_xor = prepare_xor (rl_arg, &rr_arg);
rl_inner = decode_field_reference (loc, &rl_arg,
&rl_bitsize, &rl_bitpos, &rl_mode,
&rl_unsignedp, &rl_reversep, &volatilep,
- &rl_mask, &rl_and_mask, r_xor);
+ &rl_mask, &rl_and_mask, r_xor,
+ &rl_load);
rr_inner = decode_field_reference (loc, &rr_arg,
&rr_bitsize, &rr_bitpos, &rr_mode,
&rr_unsignedp, &rr_reversep, &volatilep,
- &rr_mask, &rr_and_mask, 2 * r_xor);
+ &rr_mask, &rr_and_mask, 2 * r_xor,
+ &rr_load);
/* It must be true that the inner operation on the lhs of each
comparison must be the same if we are to be able to do anything.
@@ -7715,7 +7857,9 @@ fold_truth_andor_maybe_separate (location_t loc,
if (volatilep
|| ll_reversep != rl_reversep
|| ll_inner == 0 || rl_inner == 0
- || ! operand_equal_p (ll_inner, rl_inner, 0))
+ || ! operand_equal_p (ll_inner, rl_inner, 0)
+ || (ll_load && rl_load
+ && ! (l_mergeable = mergeable_loads_p (ll_load, rl_load))))
return 0;
if (TREE_CODE (lr_arg) == INTEGER_CST
@@ -7726,7 +7870,9 @@ fold_truth_andor_maybe_separate (location_t loc,
}
else if (lr_reversep != rr_reversep
|| lr_inner == 0 || rr_inner == 0
- || ! operand_equal_p (lr_inner, rr_inner, 0))
+ || ! operand_equal_p (lr_inner, rr_inner, 0)
+ || (lr_load && rr_load
+ && ! (r_mergeable = mergeable_loads_p (lr_load, rr_load))))
return 0;
else
l_const = r_const = 0;
@@ -8035,18 +8181,38 @@ fold_truth_andor_maybe_separate (location_t loc,
{
bitpos[1][0] = rnbitpos;
bitsiz[1][0] = rnbitsize;
- ld_arg[1][0] = make_bit_field_ref (loc, lr_inner, lr_arg,
- rntype, rnbitsize, rnbitpos,
- lr_unsignedp || rr_unsignedp,
- lr_reversep);
+ ld_arg[1][0] = make_bit_field_load (loc, lr_inner, lr_arg,
+ rntype, rnbitsize, rnbitpos,
+ lr_unsignedp || rr_unsignedp,
+ lr_reversep,
+ r_mergeable == 0
+ ? NULL
+ : r_mergeable < 0
+ ? lr_load
+ : rr_load);
}
if (parts > 1)
{
if (r_split_load)
- build_split_load (ld_arg[1], bitpos[1], bitsiz[1], toshift[1],
- shifted[1], loc, lr_inner, lr_arg,
- rnmode, rnmode2, rnbitpos, lr_reversep);
+ {
+ gimple *point[2];
+ if (r_mergeable == 0)
+ point[0] = point[1] = NULL;
+ else if (r_mergeable < 0)
+ {
+ point[0] = lr_load;
+ point[1] = rr_load;
+ }
+ else
+ {
+ point[0] = rr_load;
+ point[1] = lr_load;
+ }
+ build_split_load (ld_arg[1], bitpos[1], bitsiz[1], toshift[1],
+ shifted[1], loc, lr_inner, lr_arg,
+ rnmode, rnmode2, rnbitpos, lr_reversep, point);
+ }
else
reuse_split_load (ld_arg[1], bitpos[1], bitsiz[1], toshift[1],
shifted[1], xmask[1],
@@ -8118,18 +8284,38 @@ fold_truth_andor_maybe_separate (location_t loc,
{
bitpos[0][0] = lnbitpos;
bitsiz[0][0] = lnbitsize;
- ld_arg[0][0] = make_bit_field_ref (loc, ll_inner, ll_arg,
- lntype, lnbitsize, lnbitpos,
- ll_unsignedp || rl_unsignedp,
- ll_reversep);
+ ld_arg[0][0] = make_bit_field_load (loc, ll_inner, ll_arg,
+ lntype, lnbitsize, lnbitpos,
+ ll_unsignedp || rl_unsignedp,
+ ll_reversep,
+ l_mergeable == 0
+ ? NULL
+ : l_mergeable < 0
+ ? ll_load
+ : rl_load);
}
if (parts > 1)
{
if (l_split_load)
- build_split_load (ld_arg[0], bitpos[0], bitsiz[0], toshift[0],
- shifted[0], loc, ll_inner, ll_arg,
- lnmode, lnmode2, lnbitpos, ll_reversep);
+ {
+ gimple *point[2];
+ if (l_mergeable == 0)
+ point[0] = point[1] = NULL;
+ else if (l_mergeable < 0)
+ {
+ point[0] = ll_load;
+ point[1] = rl_load;
+ }
+ else
+ {
+ point[0] = rl_load;
+ point[1] = ll_load;
+ }
+ build_split_load (ld_arg[0], bitpos[0], bitsiz[0], toshift[0],
+ shifted[0], loc, ll_inner, ll_arg,
+ lnmode, lnmode2, lnbitpos, ll_reversep, point);
+ }
else
reuse_split_load (ld_arg[0], bitpos[0], bitsiz[0], toshift[0],
shifted[0], xmask[0],
@@ -8144,6 +8330,8 @@ fold_truth_andor_maybe_separate (location_t loc,
for (int j = 0; j < 2; j++)
{
+ op[j] = unshare_expr (op[j]);
+
/* Mask out the bits belonging to the other part. */
if (xmask[j][i])
mask[j] = int_const_binop (BIT_AND_EXPR, mask[j], xmask[j][i]);
^ permalink raw reply [flat|nested] 9+ messages in thread
* [gcc(refs/users/aoliva/heads/testme)] check for mergeable loads, choose insertion points accordingly
@ 2023-07-27 11:08 Alexandre Oliva
0 siblings, 0 replies; 9+ messages in thread
From: Alexandre Oliva @ 2023-07-27 11:08 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:a26f9f3bba9e51e2cac71bfb1837f9e3b0a4f91a
commit a26f9f3bba9e51e2cac71bfb1837f9e3b0a4f91a
Author: Alexandre Oliva <oliva@gnu.org>
Date: Thu Jul 27 05:15:20 2023 -0300
check for mergeable loads, choose insertion points accordingly
Diff:
---
gcc/gimple-fold.cc | 253 ++++++++++++++++++++++++++++++++++++++++++++++-------
1 file changed, 219 insertions(+), 34 deletions(-)
diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
index f65922309fb..71fc728467d 100644
--- a/gcc/gimple-fold.cc
+++ b/gcc/gimple-fold.cc
@@ -69,6 +69,7 @@ along with GCC; see the file COPYING3. If not see
#include "varasm.h"
#include "internal-fn.h"
#include "gimple-range.h"
+#include "tree-ssa-loop-niter.h" // stmt_dominates_stmt_p
/* ??? Move this to some header, it's defined in fold-const.c. */
extern tree
@@ -7080,7 +7081,7 @@ maybe_fold_comparisons_from_match_pd (tree type, enum tree_code code,
Same as ssa_is_replaceable_p, except that we don't insist it has a
single use. */
-bool
+static bool
ssa_is_substitutable_p (gimple *stmt)
{
#if 0
@@ -7161,9 +7162,10 @@ is_cast_p (tree *name)
if (gimple_num_ops (def) != 2)
break;
- if (get_gimple_rhs_class (gimple_expr_code (def))
- == GIMPLE_SINGLE_RHS)
+ if (gimple_assign_single_p (def))
{
+ if (gimple_assign_load_p (def))
+ break;
*name = gimple_assign_rhs1 (def);
continue;
}
@@ -7200,8 +7202,7 @@ is_binop_p (enum tree_code code, tree *name)
return 0;
case 2:
- if (get_gimple_rhs_class (gimple_expr_code (def))
- == GIMPLE_SINGLE_RHS)
+ if (gimple_assign_single_p (def) && !gimple_assign_load_p (def))
{
*name = gimple_assign_rhs1 (def);
continue;
@@ -7209,7 +7210,7 @@ is_binop_p (enum tree_code code, tree *name)
return 0;
case 3:
- ;
+ break;
}
if (gimple_assign_rhs_code (def) != code)
@@ -7254,6 +7255,26 @@ prepare_xor (tree l_arg, tree *r_arg)
return ret;
}
+/* If EXP is a SSA_NAME whose DEF is a load stmt, set *LOAD to it and
+ return its RHS, otherwise return EXP. */
+
+static tree
+follow_load (tree exp, gimple **load)
+{
+ if (TREE_CODE (exp) == SSA_NAME
+ && !SSA_NAME_IS_DEFAULT_DEF (exp))
+ {
+ gimple *def = SSA_NAME_DEF_STMT (exp);
+ if (gimple_assign_load_p (def))
+ {
+ *load = def;
+ exp = gimple_assign_rhs1 (def);
+ }
+ }
+
+ return exp;
+}
+
/* Subroutine for fold_truth_andor_1: decode a field reference.
If EXP is a comparison reference, we return the innermost reference.
@@ -7280,6 +7301,9 @@ prepare_xor (tree l_arg, tree *r_arg)
BIT_XOR_EXPR compared with zero. We're to take the first or second
operand thereof if so. It should be zero otherwise.
+ *LOAD is set to the load stmt of the innermost reference, if any,
+ *and NULL otherwise.
+
Return 0 if this is not a component reference or is one that we can't
do anything with. */
@@ -7287,7 +7311,8 @@ static tree
decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
HOST_WIDE_INT *pbitpos, machine_mode *pmode,
int *punsignedp, int *preversep, int *pvolatilep,
- tree *pmask, tree *pand_mask, int xor_which)
+ tree *pmask, tree *pand_mask, int xor_which,
+ gimple **load)
{
tree exp = *exp_;
tree outer_type = 0;
@@ -7297,11 +7322,13 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
unsigned int precision;
HOST_WIDE_INT shiftrt = 0;
+ *load = NULL;
+
/* All the optimizations using this function assume integer fields.
There are problems with FP fields since the type_for_size call
below can fail for, e.g., XFmode. */
if (! INTEGRAL_TYPE_P (TREE_TYPE (exp)))
- return 0;
+ return NULL_TREE;
/* We are interested in the bare arrangement of bits, so strip everything
that doesn't affect the machine mode. However, record the type of the
@@ -7311,7 +7338,7 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
if ((and_mask = is_binop_p (BIT_AND_EXPR, &exp)))
{
if (TREE_CODE (and_mask) != INTEGER_CST)
- return 0;
+ return NULL_TREE;
}
if (xor_which)
@@ -7329,16 +7356,18 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
if (tree shift = is_binop_p (RSHIFT_EXPR, &exp))
{
if (TREE_CODE (shift) != INTEGER_CST || !tree_fits_shwi_p (shift))
- return 0;
+ return NULL_TREE;
shiftrt = tree_to_shwi (shift);
if (shiftrt <= 0)
- return 0;
+ return NULL_TREE;
}
if (tree t = is_cast_p (&exp))
if (!outer_type)
outer_type = t;
+ exp = follow_load (exp, load);
+
poly_int64 poly_bitsize, poly_bitpos;
inner = get_inner_reference (exp, &poly_bitsize, &poly_bitpos, &offset,
pmode, punsignedp, preversep, pvolatilep);
@@ -7487,6 +7516,30 @@ compute_split_boundary_from_align (HOST_WIDE_INT align,
return boundary;
}
+/* Make a bit_field_ref. If POINT is NULL, return the BIT_FIELD_REF.
+ Otherwise, build and insert a load stmt before POINT, and return
+ the SSA_NAME. ??? Rewrite LOAD in terms of the bitfield? */
+
+static tree
+make_bit_field_load (location_t loc, tree inner, tree orig_inner, tree type,
+ HOST_WIDE_INT bitsize, poly_int64 bitpos,
+ int unsignedp, int reversep, gimple *point)
+{
+ tree ref = make_bit_field_ref (loc, inner, orig_inner,
+ type, bitsize, bitpos,
+ unsignedp, reversep);
+ if (!point)
+ return ref;
+
+ tree tmp = make_ssa_name (TREE_TYPE (ref));
+ gimple_stmt_iterator gsi = gsi_for_stmt (point);
+ gsi_insert_before (&gsi,
+ gimple_build_assign (tmp, ref),
+ GSI_NEW_STMT);
+
+ return tmp;
+}
+
/* Initialize ln_arg[0] and ln_arg[1] to a pair of newly-created (at
LOC) loads from INNER (from ORIG_INNER), of modes MODE and MODE2,
respectively, starting at BIT_POS, using reversed endianness if
@@ -7506,7 +7559,8 @@ build_split_load (tree /* out */ ln_arg[2],
HOST_WIDE_INT /* out */ shifted[2],
location_t loc, tree inner, tree orig_inner,
scalar_int_mode mode, scalar_int_mode mode2,
- HOST_WIDE_INT bit_pos, bool reversep)
+ HOST_WIDE_INT bit_pos, bool reversep,
+ gimple *point[2])
{
bitsiz[0] = GET_MODE_BITSIZE (mode);
bitsiz[1] = GET_MODE_BITSIZE (mode2);
@@ -7515,9 +7569,9 @@ build_split_load (tree /* out */ ln_arg[2],
{
tree type = lang_hooks.types.type_for_size (bitsiz[i], 1);
bitpos[i] = bit_pos;
- ln_arg[i] = make_bit_field_ref (loc, inner, orig_inner,
+ ln_arg[i] = make_bit_field_load (loc, inner, orig_inner,
type, bitsiz[i],
- bit_pos, 1, reversep);
+ bit_pos, 1, reversep, point[i]);
bit_pos += bitsiz[i];
}
@@ -7578,6 +7632,87 @@ reuse_split_load (tree /* in[0] out[1] */ ln_arg[2],
}
}
+/* Return nonzero if LSTMT and RSTMT, assumed to load from the same
+ words, can be safely merged into a single load. Return zero if
+ there are intervening stores, or if neither stmt dominates the
+ other. If they are mergeable, return -1 if the merged load should
+ be inserted before LSTMT to dominate both, otherwise return +1. */
+
+static int
+mergeable_loads_p (gimple *lstmt, gimple *rstmt)
+{
+ gimple *stmts[2] = { lstmt, rstmt };
+ int ret;
+
+ if (stmt_dominates_stmt_p (lstmt, rstmt))
+ {
+ ret = -1;
+ stmts[0] = lstmt;
+ stmts[1] = rstmt;
+ }
+ else if (stmt_dominates_stmt_p (rstmt, lstmt))
+ {
+ ret = 1;
+ stmts[1] = lstmt;
+ stmts[0] = rstmt;
+ }
+ else
+ return 0;
+
+ basic_block bbs[2] = { gimple_bb (stmts[0]), gimple_bb (stmts[1]) };
+ if (bbs[0] == bbs[1])
+ {
+ for (gimple_stmt_iterator gsi = gsi_for_stmt (stmts[0]);
+ gsi_stmt (gsi) != stmts[1]; gsi_next (&gsi))
+ /* ??? For now, stop if we find any intervening vdefs. */
+ if (gimple_vdef (gsi_stmt (gsi)))
+ return 0;
+ return ret;
+ }
+
+ for (gimple_stmt_iterator gsi = gsi_for_stmt (stmts[0]);
+ !gsi_end_p (gsi); gsi_next (&gsi))
+ /* ??? For now, stop if we find any intervening vdefs. */
+ if (gimple_vdef (gsi_stmt (gsi)))
+ return 0;
+
+ for (gphi_iterator gpi = gsi_start_phis (bbs[1]);
+ !gsi_end_p (gpi); gsi_next (&gpi))
+ /* ??? For now, stop if we find any intervening vdefs. */
+ if (gimple_vdef (gsi_stmt (gpi)))
+ return 0;
+
+ for (gimple_stmt_iterator gsi = gsi_for_stmt (stmts[1]);
+ !gsi_end_p (gsi); gsi_prev (&gsi))
+ /* ??? For now, stop if we find any intervening vdefs. */
+ if (gimple_vdef (gsi_stmt (gsi)))
+ return 0;
+
+ for (basic_block bb = bbs[1]; bb != bbs[0]; )
+ {
+ /* ??? For now, stop if any visited block has more than one
+ predecessor. */
+ if (!single_pred_p (bb))
+ return 0;
+
+ bb = single_pred (bb);
+
+ for (gphi_iterator gpi = gsi_start_phis (bbs[1]);
+ !gsi_end_p (gpi); gsi_next (&gpi))
+ /* ??? For now, stop if we find any intervening vdefs. */
+ if (gimple_vdef (gsi_stmt (gpi)))
+ return 0;
+
+ for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
+ !gsi_end_p (gsi); gsi_next (&gsi))
+ /* ??? For now, stop if we find any intervening vdefs. */
+ if (gimple_vdef (gsi_stmt (gsi)))
+ return 0;
+ }
+
+ return ret;
+}
+
/* Find ways of folding logical expressions of LHS and RHS:
Try to merge two comparisons to the same innermost item.
Look for range tests like "ch >= '0' && ch <= '9'".
@@ -7629,6 +7764,8 @@ fold_truth_andor_maybe_separate (location_t loc,
enum tree_code orig_code = code;
enum tree_code wanted_code;
tree ll_inner, lr_inner, rl_inner, rr_inner;
+ gimple *ll_load, *lr_load, *rl_load, *rr_load;
+ int l_mergeable = 0, r_mergeable = 0;
HOST_WIDE_INT ll_bitsize, ll_bitpos, lr_bitsize, lr_bitpos;
HOST_WIDE_INT rl_bitsize, rl_bitpos, rr_bitsize, rr_bitpos;
HOST_WIDE_INT xll_bitpos, xlr_bitpos, xrl_bitpos, xrr_bitpos;
@@ -7693,20 +7830,24 @@ fold_truth_andor_maybe_separate (location_t loc,
ll_inner = decode_field_reference (loc, &ll_arg,
&ll_bitsize, &ll_bitpos, &ll_mode,
&ll_unsignedp, &ll_reversep, &volatilep,
- &ll_mask, &ll_and_mask, l_xor);
+ &ll_mask, &ll_and_mask, l_xor,
+ &ll_load);
lr_inner = decode_field_reference (loc, &lr_arg,
&lr_bitsize, &lr_bitpos, &lr_mode,
&lr_unsignedp, &lr_reversep, &volatilep,
- &lr_mask, &lr_and_mask, 2 * l_xor);
+ &lr_mask, &lr_and_mask, 2 * l_xor,
+ &lr_load);
int r_xor = prepare_xor (rl_arg, &rr_arg);
rl_inner = decode_field_reference (loc, &rl_arg,
&rl_bitsize, &rl_bitpos, &rl_mode,
&rl_unsignedp, &rl_reversep, &volatilep,
- &rl_mask, &rl_and_mask, r_xor);
+ &rl_mask, &rl_and_mask, r_xor,
+ &rl_load);
rr_inner = decode_field_reference (loc, &rr_arg,
&rr_bitsize, &rr_bitpos, &rr_mode,
&rr_unsignedp, &rr_reversep, &volatilep,
- &rr_mask, &rr_and_mask, 2 * r_xor);
+ &rr_mask, &rr_and_mask, 2 * r_xor,
+ &rr_load);
/* It must be true that the inner operation on the lhs of each
comparison must be the same if we are to be able to do anything.
@@ -7715,7 +7856,9 @@ fold_truth_andor_maybe_separate (location_t loc,
if (volatilep
|| ll_reversep != rl_reversep
|| ll_inner == 0 || rl_inner == 0
- || ! operand_equal_p (ll_inner, rl_inner, 0))
+ || ! operand_equal_p (ll_inner, rl_inner, 0)
+ || (ll_load && rl_load
+ && ! (l_mergeable = mergeable_loads_p (ll_load, rl_load))))
return 0;
if (TREE_CODE (lr_arg) == INTEGER_CST
@@ -7726,7 +7869,9 @@ fold_truth_andor_maybe_separate (location_t loc,
}
else if (lr_reversep != rr_reversep
|| lr_inner == 0 || rr_inner == 0
- || ! operand_equal_p (lr_inner, rr_inner, 0))
+ || ! operand_equal_p (lr_inner, rr_inner, 0)
+ || (lr_load && rr_load
+ && ! (r_mergeable = mergeable_loads_p (lr_load, rr_load))))
return 0;
else
l_const = r_const = 0;
@@ -8035,18 +8180,38 @@ fold_truth_andor_maybe_separate (location_t loc,
{
bitpos[1][0] = rnbitpos;
bitsiz[1][0] = rnbitsize;
- ld_arg[1][0] = make_bit_field_ref (loc, lr_inner, lr_arg,
- rntype, rnbitsize, rnbitpos,
- lr_unsignedp || rr_unsignedp,
- lr_reversep);
+ ld_arg[1][0] = make_bit_field_load (loc, lr_inner, lr_arg,
+ rntype, rnbitsize, rnbitpos,
+ lr_unsignedp || rr_unsignedp,
+ lr_reversep,
+ r_mergeable == 0
+ ? NULL
+ : r_mergeable < 0
+ ? lr_load
+ : rr_load);
}
if (parts > 1)
{
if (r_split_load)
- build_split_load (ld_arg[1], bitpos[1], bitsiz[1], toshift[1],
- shifted[1], loc, lr_inner, lr_arg,
- rnmode, rnmode2, rnbitpos, lr_reversep);
+ {
+ gimple *point[2];
+ if (r_mergeable == 0)
+ point[0] = point[1] = NULL;
+ else if (r_mergeable < 0)
+ {
+ point[0] = lr_load;
+ point[1] = rr_load;
+ }
+ else
+ {
+ point[0] = rr_load;
+ point[1] = lr_load;
+ }
+ build_split_load (ld_arg[1], bitpos[1], bitsiz[1], toshift[1],
+ shifted[1], loc, lr_inner, lr_arg,
+ rnmode, rnmode2, rnbitpos, lr_reversep, point);
+ }
else
reuse_split_load (ld_arg[1], bitpos[1], bitsiz[1], toshift[1],
shifted[1], xmask[1],
@@ -8118,18 +8283,38 @@ fold_truth_andor_maybe_separate (location_t loc,
{
bitpos[0][0] = lnbitpos;
bitsiz[0][0] = lnbitsize;
- ld_arg[0][0] = make_bit_field_ref (loc, ll_inner, ll_arg,
- lntype, lnbitsize, lnbitpos,
- ll_unsignedp || rl_unsignedp,
- ll_reversep);
+ ld_arg[0][0] = make_bit_field_load (loc, ll_inner, ll_arg,
+ lntype, lnbitsize, lnbitpos,
+ ll_unsignedp || rl_unsignedp,
+ ll_reversep,
+ l_mergeable == 0
+ ? NULL
+ : l_mergeable < 0
+ ? ll_load
+ : rl_load);
}
if (parts > 1)
{
if (l_split_load)
- build_split_load (ld_arg[0], bitpos[0], bitsiz[0], toshift[0],
- shifted[0], loc, ll_inner, ll_arg,
- lnmode, lnmode2, lnbitpos, ll_reversep);
+ {
+ gimple *point[2];
+ if (l_mergeable == 0)
+ point[0] = point[1] = NULL;
+ else if (l_mergeable < 0)
+ {
+ point[0] = ll_load;
+ point[1] = rl_load;
+ }
+ else
+ {
+ point[0] = rl_load;
+ point[1] = ll_load;
+ }
+ build_split_load (ld_arg[0], bitpos[0], bitsiz[0], toshift[0],
+ shifted[0], loc, ll_inner, ll_arg,
+ lnmode, lnmode2, lnbitpos, ll_reversep, point);
+ }
else
reuse_split_load (ld_arg[0], bitpos[0], bitsiz[0], toshift[0],
shifted[0], xmask[0],
^ permalink raw reply [flat|nested] 9+ messages in thread
* [gcc(refs/users/aoliva/heads/testme)] check for mergeable loads, choose insertion points accordingly
@ 2023-07-27 9:05 Alexandre Oliva
0 siblings, 0 replies; 9+ messages in thread
From: Alexandre Oliva @ 2023-07-27 9:05 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:399a05175f229f54eb10a61b1450c12ea228e438
commit 399a05175f229f54eb10a61b1450c12ea228e438
Author: Alexandre Oliva <oliva@gnu.org>
Date: Thu Jul 27 05:15:20 2023 -0300
check for mergeable loads, choose insertion points accordingly
Diff:
---
gcc/gimple-fold.cc | 257 +++++++++++++++++++++++++++++++++++++++++++++--------
1 file changed, 222 insertions(+), 35 deletions(-)
diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
index f65922309fb..477b5854485 100644
--- a/gcc/gimple-fold.cc
+++ b/gcc/gimple-fold.cc
@@ -69,6 +69,7 @@ along with GCC; see the file COPYING3. If not see
#include "varasm.h"
#include "internal-fn.h"
#include "gimple-range.h"
+#include "tree-ssa-loop-niter.h" // stmt_dominates_stmt_p
/* ??? Move this to some header, it's defined in fold-const.c. */
extern tree
@@ -7080,7 +7081,7 @@ maybe_fold_comparisons_from_match_pd (tree type, enum tree_code code,
Same as ssa_is_replaceable_p, except that we don't insist it has a
single use. */
-bool
+static bool
ssa_is_substitutable_p (gimple *stmt)
{
#if 0
@@ -7161,9 +7162,10 @@ is_cast_p (tree *name)
if (gimple_num_ops (def) != 2)
break;
- if (get_gimple_rhs_class (gimple_expr_code (def))
- == GIMPLE_SINGLE_RHS)
+ if (gimple_assign_single_p (def))
{
+ if (gimple_assign_load_p (def))
+ break;
*name = gimple_assign_rhs1 (def);
continue;
}
@@ -7200,8 +7202,7 @@ is_binop_p (enum tree_code code, tree *name)
return 0;
case 2:
- if (get_gimple_rhs_class (gimple_expr_code (def))
- == GIMPLE_SINGLE_RHS)
+ if (gimple_assign_single_p (def) && !gimple_assign_load_p (def))
{
*name = gimple_assign_rhs1 (def);
continue;
@@ -7209,7 +7210,7 @@ is_binop_p (enum tree_code code, tree *name)
return 0;
case 3:
- ;
+ break;
}
if (gimple_assign_rhs_code (def) != code)
@@ -7254,6 +7255,26 @@ prepare_xor (tree l_arg, tree *r_arg)
return ret;
}
+/* If EXP is a SSA_NAME whose DEF is a load stmt, set *LOAD to it and
+ return its RHS, otherwise return EXP. */
+
+static tree
+follow_load (tree exp, gimple **load)
+{
+ if (TREE_CODE (exp) == SSA_NAME
+ && !SSA_NAME_IS_DEFAULT_DEF (exp))
+ {
+ gimple *def = SSA_NAME_DEF_STMT (exp);
+ if (gimple_assign_load_p (def))
+ {
+ *load = def;
+ gimple_assign_rhs1 (def);
+ }
+ }
+
+ return exp;
+}
+
/* Subroutine for fold_truth_andor_1: decode a field reference.
If EXP is a comparison reference, we return the innermost reference.
@@ -7280,6 +7301,9 @@ prepare_xor (tree l_arg, tree *r_arg)
BIT_XOR_EXPR compared with zero. We're to take the first or second
operand thereof if so. It should be zero otherwise.
+ *LOAD is set to the load stmt of the innermost reference, if any,
+ *and NULL otherwise.
+
Return 0 if this is not a component reference or is one that we can't
do anything with. */
@@ -7287,7 +7311,8 @@ static tree
decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
HOST_WIDE_INT *pbitpos, machine_mode *pmode,
int *punsignedp, int *preversep, int *pvolatilep,
- tree *pmask, tree *pand_mask, int xor_which)
+ tree *pmask, tree *pand_mask, int xor_which,
+ gimple **load)
{
tree exp = *exp_;
tree outer_type = 0;
@@ -7297,11 +7322,13 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
unsigned int precision;
HOST_WIDE_INT shiftrt = 0;
+ *load = NULL;
+
/* All the optimizations using this function assume integer fields.
There are problems with FP fields since the type_for_size call
below can fail for, e.g., XFmode. */
if (! INTEGRAL_TYPE_P (TREE_TYPE (exp)))
- return 0;
+ return NULL_TREE;
/* We are interested in the bare arrangement of bits, so strip everything
that doesn't affect the machine mode. However, record the type of the
@@ -7311,7 +7338,7 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
if ((and_mask = is_binop_p (BIT_AND_EXPR, &exp)))
{
if (TREE_CODE (and_mask) != INTEGER_CST)
- return 0;
+ return NULL_TREE;
}
if (xor_which)
@@ -7329,16 +7356,18 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
if (tree shift = is_binop_p (RSHIFT_EXPR, &exp))
{
if (TREE_CODE (shift) != INTEGER_CST || !tree_fits_shwi_p (shift))
- return 0;
+ return NULL_TREE;
shiftrt = tree_to_shwi (shift);
if (shiftrt <= 0)
- return 0;
+ return NULL_TREE;
}
if (tree t = is_cast_p (&exp))
if (!outer_type)
outer_type = t;
+ exp = follow_load (exp, load);
+
poly_int64 poly_bitsize, poly_bitpos;
inner = get_inner_reference (exp, &poly_bitsize, &poly_bitpos, &offset,
pmode, punsignedp, preversep, pvolatilep);
@@ -7487,6 +7516,30 @@ compute_split_boundary_from_align (HOST_WIDE_INT align,
return boundary;
}
+/* Make a bit_field_ref. If POINT is NULL, return the BIT_FIELD_REF.
+ Otherwise, build and insert a load stmt before POINT, and return
+ the SSA_NAME. ??? Rewrite LOAD in terms of the bitfield? */
+
+static tree
+make_bit_field_load (location_t loc, tree inner, tree orig_inner, tree type,
+ HOST_WIDE_INT bitsize, poly_int64 bitpos,
+ int unsignedp, int reversep, gimple *point)
+{
+ tree ref = make_bit_field_ref (loc, inner, orig_inner,
+ type, bitsize, bitpos,
+ unsignedp, reversep);
+ if (!point)
+ return ref;
+
+ tree tmp = make_ssa_name (TREE_TYPE (ref));
+ gimple_stmt_iterator gsi = gsi_for_stmt (point);
+ gsi_insert_before (&gsi,
+ gimple_build_assign (tmp, ref),
+ GSI_NEW_STMT);
+
+ return tmp;
+}
+
/* Initialize ln_arg[0] and ln_arg[1] to a pair of newly-created (at
LOC) loads from INNER (from ORIG_INNER), of modes MODE and MODE2,
respectively, starting at BIT_POS, using reversed endianness if
@@ -7506,7 +7559,8 @@ build_split_load (tree /* out */ ln_arg[2],
HOST_WIDE_INT /* out */ shifted[2],
location_t loc, tree inner, tree orig_inner,
scalar_int_mode mode, scalar_int_mode mode2,
- HOST_WIDE_INT bit_pos, bool reversep)
+ HOST_WIDE_INT bit_pos, bool reversep,
+ gimple *point[2])
{
bitsiz[0] = GET_MODE_BITSIZE (mode);
bitsiz[1] = GET_MODE_BITSIZE (mode2);
@@ -7515,9 +7569,9 @@ build_split_load (tree /* out */ ln_arg[2],
{
tree type = lang_hooks.types.type_for_size (bitsiz[i], 1);
bitpos[i] = bit_pos;
- ln_arg[i] = make_bit_field_ref (loc, inner, orig_inner,
+ ln_arg[i] = make_bit_field_load (loc, inner, orig_inner,
type, bitsiz[i],
- bit_pos, 1, reversep);
+ bit_pos, 1, reversep, point[i]);
bit_pos += bitsiz[i];
}
@@ -7578,6 +7632,87 @@ reuse_split_load (tree /* in[0] out[1] */ ln_arg[2],
}
}
+/* Return nonzero if LSTMT and RSTMT, assumed to load from the same
+ words, can be safely merged into a single load. Return zero if
+ there are intervening stores, or if neither stmt dominates the
+ other. If they are mergeable, return -1 if the merged load should
+ be inserted before LSTMT to dominate both, otherwise return +1. */
+
+static int
+mergeable_loads_p (gimple *lstmt, gimple *rstmt)
+{
+ gimple *stmts[2] = { lstmt, rstmt };
+ int ret;
+
+ if (stmt_dominates_stmt_p (lstmt, rstmt))
+ {
+ ret = -1;
+ stmts[0] = lstmt;
+ stmts[1] = rstmt;
+ }
+ else if (stmt_dominates_stmt_p (rstmt, lstmt))
+ {
+ ret = 1;
+ stmts[1] = lstmt;
+ stmts[0] = rstmt;
+ }
+ else
+ return 0;
+
+ basic_block bbs[2] = { gimple_bb (stmts[0]), gimple_bb (stmts[1]) };
+ if (bbs[0] == bbs[1])
+ {
+ for (gimple_stmt_iterator gsi = gsi_for_stmt (stmts[0]);
+ gsi_stmt (gsi) != stmts[1]; gsi_next (&gsi))
+ /* ??? For now, stop if we find any intervening vdefs. */
+ if (gimple_vdef (gsi_stmt (gsi)))
+ return 0;
+ return ret;
+ }
+
+ for (gimple_stmt_iterator gsi = gsi_for_stmt (stmts[0]);
+ !gsi_end_p (gsi); gsi_next (&gsi))
+ /* ??? For now, stop if we find any intervening vdefs. */
+ if (gimple_vdef (gsi_stmt (gsi)))
+ return 0;
+
+ for (gphi_iterator gpi = gsi_start_phis (bbs[1]);
+ !gsi_end_p (gpi); gsi_next (&gpi))
+ /* ??? For now, stop if we find any intervening vdefs. */
+ if (gimple_vdef (gsi_stmt (gpi)))
+ return 0;
+
+ for (gimple_stmt_iterator gsi = gsi_for_stmt (stmts[1]);
+ !gsi_end_p (gsi); gsi_prev (&gsi))
+ /* ??? For now, stop if we find any intervening vdefs. */
+ if (gimple_vdef (gsi_stmt (gsi)))
+ return 0;
+
+ for (basic_block bb = bbs[1]; bb != bbs[0]; )
+ {
+ /* ??? For now, stop if any visited block has more than one
+ predecessor. */
+ if (!single_pred_p (bb))
+ return 0;
+
+ bb = single_pred (bb);
+
+ for (gphi_iterator gpi = gsi_start_phis (bbs[1]);
+ !gsi_end_p (gpi); gsi_next (&gpi))
+ /* ??? For now, stop if we find any intervening vdefs. */
+ if (gimple_vdef (gsi_stmt (gpi)))
+ return 0;
+
+ for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
+ !gsi_end_p (gsi); gsi_next (&gsi))
+ /* ??? For now, stop if we find any intervening vdefs. */
+ if (gimple_vdef (gsi_stmt (gsi)))
+ return 0;
+ }
+
+ return ret;
+}
+
/* Find ways of folding logical expressions of LHS and RHS:
Try to merge two comparisons to the same innermost item.
Look for range tests like "ch >= '0' && ch <= '9'".
@@ -7629,6 +7764,8 @@ fold_truth_andor_maybe_separate (location_t loc,
enum tree_code orig_code = code;
enum tree_code wanted_code;
tree ll_inner, lr_inner, rl_inner, rr_inner;
+ gimple *ll_load, *lr_load, *rl_load, *rr_load;
+ int l_mergeable, r_mergeable;
HOST_WIDE_INT ll_bitsize, ll_bitpos, lr_bitsize, lr_bitpos;
HOST_WIDE_INT rl_bitsize, rl_bitpos, rr_bitsize, rr_bitpos;
HOST_WIDE_INT xll_bitpos, xlr_bitpos, xrl_bitpos, xrr_bitpos;
@@ -7693,20 +7830,24 @@ fold_truth_andor_maybe_separate (location_t loc,
ll_inner = decode_field_reference (loc, &ll_arg,
&ll_bitsize, &ll_bitpos, &ll_mode,
&ll_unsignedp, &ll_reversep, &volatilep,
- &ll_mask, &ll_and_mask, l_xor);
+ &ll_mask, &ll_and_mask, l_xor,
+ &ll_load);
lr_inner = decode_field_reference (loc, &lr_arg,
&lr_bitsize, &lr_bitpos, &lr_mode,
&lr_unsignedp, &lr_reversep, &volatilep,
- &lr_mask, &lr_and_mask, 2 * l_xor);
+ &lr_mask, &lr_and_mask, 2 * l_xor,
+ &lr_load);
int r_xor = prepare_xor (rl_arg, &rr_arg);
rl_inner = decode_field_reference (loc, &rl_arg,
&rl_bitsize, &rl_bitpos, &rl_mode,
&rl_unsignedp, &rl_reversep, &volatilep,
- &rl_mask, &rl_and_mask, r_xor);
+ &rl_mask, &rl_and_mask, r_xor,
+ &rl_load);
rr_inner = decode_field_reference (loc, &rr_arg,
&rr_bitsize, &rr_bitpos, &rr_mode,
&rr_unsignedp, &rr_reversep, &volatilep,
- &rr_mask, &rr_and_mask, 2 * r_xor);
+ &rr_mask, &rr_and_mask, 2 * r_xor,
+ &rr_load);
/* It must be true that the inner operation on the lhs of each
comparison must be the same if we are to be able to do anything.
@@ -7715,7 +7856,8 @@ fold_truth_andor_maybe_separate (location_t loc,
if (volatilep
|| ll_reversep != rl_reversep
|| ll_inner == 0 || rl_inner == 0
- || ! operand_equal_p (ll_inner, rl_inner, 0))
+ || ! operand_equal_p (ll_inner, rl_inner, 0)
+ || ! (l_mergeable = mergeable_loads_p (ll_load, rl_load)))
return 0;
if (TREE_CODE (lr_arg) == INTEGER_CST
@@ -7723,13 +7865,18 @@ fold_truth_andor_maybe_separate (location_t loc,
{
l_const = lr_arg, r_const = rr_arg;
lr_reversep = ll_reversep;
+ r_mergeable = 0;
}
else if (lr_reversep != rr_reversep
|| lr_inner == 0 || rr_inner == 0
- || ! operand_equal_p (lr_inner, rr_inner, 0))
+ || ! operand_equal_p (lr_inner, rr_inner, 0)
+ || ! (r_mergeable = mergeable_loads_p (lr_load, rr_load)))
return 0;
else
- l_const = r_const = 0;
+ {
+ l_const = r_const = 0;
+ r_mergeable = 0;
+ }
if (lsignbit)
{
@@ -8035,18 +8182,38 @@ fold_truth_andor_maybe_separate (location_t loc,
{
bitpos[1][0] = rnbitpos;
bitsiz[1][0] = rnbitsize;
- ld_arg[1][0] = make_bit_field_ref (loc, lr_inner, lr_arg,
- rntype, rnbitsize, rnbitpos,
- lr_unsignedp || rr_unsignedp,
- lr_reversep);
+ ld_arg[1][0] = make_bit_field_load (loc, lr_inner, lr_arg,
+ rntype, rnbitsize, rnbitpos,
+ lr_unsignedp || rr_unsignedp,
+ lr_reversep,
+ r_mergeable == 0
+ ? NULL
+ : r_mergeable < 0
+ ? lr_load
+ : rr_load);
}
if (parts > 1)
{
if (r_split_load)
- build_split_load (ld_arg[1], bitpos[1], bitsiz[1], toshift[1],
- shifted[1], loc, lr_inner, lr_arg,
- rnmode, rnmode2, rnbitpos, lr_reversep);
+ {
+ gimple *point[2];
+ if (r_mergeable == 0)
+ point[0] = point[1] = NULL;
+ else if (r_mergeable < 0)
+ {
+ point[0] = lr_load;
+ point[1] = rr_load;
+ }
+ else
+ {
+ point[0] = rr_load;
+ point[1] = lr_load;
+ }
+ build_split_load (ld_arg[1], bitpos[1], bitsiz[1], toshift[1],
+ shifted[1], loc, lr_inner, lr_arg,
+ rnmode, rnmode2, rnbitpos, lr_reversep, point);
+ }
else
reuse_split_load (ld_arg[1], bitpos[1], bitsiz[1], toshift[1],
shifted[1], xmask[1],
@@ -8118,18 +8285,38 @@ fold_truth_andor_maybe_separate (location_t loc,
{
bitpos[0][0] = lnbitpos;
bitsiz[0][0] = lnbitsize;
- ld_arg[0][0] = make_bit_field_ref (loc, ll_inner, ll_arg,
- lntype, lnbitsize, lnbitpos,
- ll_unsignedp || rl_unsignedp,
- ll_reversep);
+ ld_arg[0][0] = make_bit_field_load (loc, ll_inner, ll_arg,
+ lntype, lnbitsize, lnbitpos,
+ ll_unsignedp || rl_unsignedp,
+ ll_reversep,
+ l_mergeable == 0
+ ? NULL
+ : l_mergeable < 0
+ ? ll_load
+ : rl_load);
}
if (parts > 1)
{
if (l_split_load)
- build_split_load (ld_arg[0], bitpos[0], bitsiz[0], toshift[0],
- shifted[0], loc, ll_inner, ll_arg,
- lnmode, lnmode2, lnbitpos, ll_reversep);
+ {
+ gimple *point[2];
+ if (l_mergeable == 0)
+ point[0] = point[1] = NULL;
+ else if (l_mergeable < 0)
+ {
+ point[0] = ll_load;
+ point[1] = rl_load;
+ }
+ else
+ {
+ point[0] = rl_load;
+ point[1] = ll_load;
+ }
+ build_split_load (ld_arg[0], bitpos[0], bitsiz[0], toshift[0],
+ shifted[0], loc, ll_inner, ll_arg,
+ lnmode, lnmode2, lnbitpos, ll_reversep, point);
+ }
else
reuse_split_load (ld_arg[0], bitpos[0], bitsiz[0], toshift[0],
shifted[0], xmask[0],
^ permalink raw reply [flat|nested] 9+ messages in thread
* [gcc(refs/users/aoliva/heads/testme)] check for mergeable loads, choose insertion points accordingly
@ 2023-07-27 8:59 Alexandre Oliva
0 siblings, 0 replies; 9+ messages in thread
From: Alexandre Oliva @ 2023-07-27 8:59 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:97f45a4cd2b6ea461c878401ab5a8c544b5d22b9
commit 97f45a4cd2b6ea461c878401ab5a8c544b5d22b9
Author: Alexandre Oliva <oliva@gnu.org>
Date: Thu Jul 27 05:15:20 2023 -0300
check for mergeable loads, choose insertion points accordingly
Diff:
---
gcc/gimple-fold.cc | 259 +++++++++++++++++++++++++++++++++++++++++++++--------
1 file changed, 224 insertions(+), 35 deletions(-)
diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
index f65922309fb..a3665c04d50 100644
--- a/gcc/gimple-fold.cc
+++ b/gcc/gimple-fold.cc
@@ -69,6 +69,7 @@ along with GCC; see the file COPYING3. If not see
#include "varasm.h"
#include "internal-fn.h"
#include "gimple-range.h"
+#include "tree-ssa-loop-niter.h" // stmt_dominates_stmt_p
/* ??? Move this to some header, it's defined in fold-const.c. */
extern tree
@@ -7080,7 +7081,7 @@ maybe_fold_comparisons_from_match_pd (tree type, enum tree_code code,
Same as ssa_is_replaceable_p, except that we don't insist it has a
single use. */
-bool
+static bool
ssa_is_substitutable_p (gimple *stmt)
{
#if 0
@@ -7161,9 +7162,10 @@ is_cast_p (tree *name)
if (gimple_num_ops (def) != 2)
break;
- if (get_gimple_rhs_class (gimple_expr_code (def))
- == GIMPLE_SINGLE_RHS)
+ if (gimple_assign_single_p (def))
{
+ if (gimple_assign_load_p (def))
+ break;
*name = gimple_assign_rhs1 (def);
continue;
}
@@ -7200,8 +7202,7 @@ is_binop_p (enum tree_code code, tree *name)
return 0;
case 2:
- if (get_gimple_rhs_class (gimple_expr_code (def))
- == GIMPLE_SINGLE_RHS)
+ if (gimple_assign_single_p (def) && !gimple_assign_load_p (def))
{
*name = gimple_assign_rhs1 (def);
continue;
@@ -7209,7 +7210,7 @@ is_binop_p (enum tree_code code, tree *name)
return 0;
case 3:
- ;
+ break;
}
if (gimple_assign_rhs_code (def) != code)
@@ -7254,6 +7255,26 @@ prepare_xor (tree l_arg, tree *r_arg)
return ret;
}
+/* If EXP is a SSA_NAME whose DEF is a load stmt, set *LOAD to it and
+ return its RHS, otherwise return EXP. */
+
+static tree
+follow_load (tree exp, gimple **load)
+{
+ if (TREE_CODE (exp) == SSA_NAME
+ && !SSA_NAME_IS_DEFAULT_DEF (exp))
+ {
+ gimple *def = SSA_NAME_DEF_STMT (exp);
+ if (gimple_assign_load_p (def))
+ {
+ *load = def;
+ gimple_assign_rhs1 (def);
+ }
+ }
+
+ return exp;
+}
+
/* Subroutine for fold_truth_andor_1: decode a field reference.
If EXP is a comparison reference, we return the innermost reference.
@@ -7280,6 +7301,9 @@ prepare_xor (tree l_arg, tree *r_arg)
BIT_XOR_EXPR compared with zero. We're to take the first or second
operand thereof if so. It should be zero otherwise.
+ *LOAD is set to the load stmt of the innermost reference, if any,
+ *and NULL otherwise.
+
Return 0 if this is not a component reference or is one that we can't
do anything with. */
@@ -7287,7 +7311,8 @@ static tree
decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
HOST_WIDE_INT *pbitpos, machine_mode *pmode,
int *punsignedp, int *preversep, int *pvolatilep,
- tree *pmask, tree *pand_mask, int xor_which)
+ tree *pmask, tree *pand_mask, int xor_which,
+ gimple **load)
{
tree exp = *exp_;
tree outer_type = 0;
@@ -7297,11 +7322,13 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
unsigned int precision;
HOST_WIDE_INT shiftrt = 0;
+ *load = NULL;
+
/* All the optimizations using this function assume integer fields.
There are problems with FP fields since the type_for_size call
below can fail for, e.g., XFmode. */
if (! INTEGRAL_TYPE_P (TREE_TYPE (exp)))
- return 0;
+ return NULL_TREE;
/* We are interested in the bare arrangement of bits, so strip everything
that doesn't affect the machine mode. However, record the type of the
@@ -7311,7 +7338,7 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
if ((and_mask = is_binop_p (BIT_AND_EXPR, &exp)))
{
if (TREE_CODE (and_mask) != INTEGER_CST)
- return 0;
+ return NULL_TREE;
}
if (xor_which)
@@ -7329,16 +7356,18 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
if (tree shift = is_binop_p (RSHIFT_EXPR, &exp))
{
if (TREE_CODE (shift) != INTEGER_CST || !tree_fits_shwi_p (shift))
- return 0;
+ return NULL_TREE;
shiftrt = tree_to_shwi (shift);
if (shiftrt <= 0)
- return 0;
+ return NULL_TREE;
}
if (tree t = is_cast_p (&exp))
if (!outer_type)
outer_type = t;
+ exp = follow_load (exp, load);
+
poly_int64 poly_bitsize, poly_bitpos;
inner = get_inner_reference (exp, &poly_bitsize, &poly_bitpos, &offset,
pmode, punsignedp, preversep, pvolatilep);
@@ -7487,6 +7516,32 @@ compute_split_boundary_from_align (HOST_WIDE_INT align,
return boundary;
}
+/* Make a bit_field_ref. If POINT is NULL, return the BIT_FIELD_REF.
+ Otherwise, build and insert a load stmt before POINT, and return
+ the SSA_NAME. ??? Rewrite LOAD in terms of the bitfield? */
+
+static tree
+make_bit_field_load (location_t loc, tree inner, tree orig_inner, tree type,
+ HOST_WIDE_INT bitsize, poly_int64 bitpos,
+ int unsignedp, int reversep, gimple *point)
+{
+ tree ref = make_bit_field_ref (loc, inner, orig_inner,
+ type, bitsize, bitpos,
+ unsignedp, reversep);
+ if (!point)
+ return ref;
+
+ tree tmp = make_ssa_name (TREE_TYPE (ref));
+ gimple_stmt_iterator gsi = gsi_for_stmt (r_mergeable < 0
+ ? lr_load
+ : rr_load);
+ gsi_insert_before (point,
+ gimple_build_assign (tmp, ref),
+ GSI_NEW_STMT);
+
+ return tmp;
+}
+
/* Initialize ln_arg[0] and ln_arg[1] to a pair of newly-created (at
LOC) loads from INNER (from ORIG_INNER), of modes MODE and MODE2,
respectively, starting at BIT_POS, using reversed endianness if
@@ -7506,7 +7561,8 @@ build_split_load (tree /* out */ ln_arg[2],
HOST_WIDE_INT /* out */ shifted[2],
location_t loc, tree inner, tree orig_inner,
scalar_int_mode mode, scalar_int_mode mode2,
- HOST_WIDE_INT bit_pos, bool reversep)
+ HOST_WIDE_INT bit_pos, bool reversep,
+ gimple *point[2])
{
bitsiz[0] = GET_MODE_BITSIZE (mode);
bitsiz[1] = GET_MODE_BITSIZE (mode2);
@@ -7515,9 +7571,9 @@ build_split_load (tree /* out */ ln_arg[2],
{
tree type = lang_hooks.types.type_for_size (bitsiz[i], 1);
bitpos[i] = bit_pos;
- ln_arg[i] = make_bit_field_ref (loc, inner, orig_inner,
+ ln_arg[i] = make_bit_field_load (loc, inner, orig_inner,
type, bitsiz[i],
- bit_pos, 1, reversep);
+ bit_pos, 1, reversep, point[i]);
bit_pos += bitsiz[i];
}
@@ -7578,6 +7634,87 @@ reuse_split_load (tree /* in[0] out[1] */ ln_arg[2],
}
}
+/* Return nonzero if LSTMT and RSTMT, assumed to load from the same
+ words, can be safely merged into a single load. Return zero if
+ there are intervening stores, or if neither stmt dominates the
+ other. If they are mergeable, return -1 if the merged load should
+ be inserted before LSTMT to dominate both, otherwise return +1. */
+
+static int
+mergeable_loads_p (gimple *lstmt, gimple *rstmt)
+{
+ gimple *stmts[2] = { lstmt, rstmt };
+ int ret;
+
+ if (stmt_dominates_stmt_p (lstmt, rstmt))
+ {
+ ret = -1;
+ stmts[0] = lstmt;
+ stmts[1] = rstmt;
+ }
+ else if (stmt_dominates_stmt_p (rstmt, lstmt))
+ {
+ ret = 1;
+ stmts[1] = lstmt;
+ stmts[0] = rstmt;
+ }
+ else
+ return 0;
+
+ basic_block bbs[2] = { gimple_bb (stmts[0]), gimple_bb (stmts[1]) };
+ if (bbs[0] == bbs[1])
+ {
+ for (gimple_stmt_iterator gsi = gsi_for_stmt (stmts[0]);
+ gsi_stmt (gsi) != stmts[1]; gsi_next (&gsi))
+ /* ??? For now, stop if we find any intervening vdefs. */
+ if (gimple_vdef (gsi_stmt (gsi)))
+ return 0;
+ return ret;
+ }
+
+ for (gimple_stmt_iterator gsi = gsi_for_stmt (stmts[0]);
+ !gsi_end_p (gsi); gsi_next (&gsi))
+ /* ??? For now, stop if we find any intervening vdefs. */
+ if (gimple_vdef (gsi_stmt (gsi)))
+ return 0;
+
+ for (gphi_iterator gpi = gsi_start_phis (bbs[1]);
+ !gsi_end_p (gpi); gsi_next (&gpi))
+ /* ??? For now, stop if we find any intervening vdefs. */
+ if (gimple_vdef (gsi_stmt (gpi)))
+ return 0;
+
+ for (gimple_stmt_iterator gsi = gsi_for_stmt (stmts[1]);
+ !gsi_end_p (gsi); gsi_prev (&gsi))
+ /* ??? For now, stop if we find any intervening vdefs. */
+ if (gimple_vdef (gsi_stmt (gsi)))
+ return 0;
+
+ for (basic_block bb = bbs[1]; bb != bbs[0]; )
+ {
+ /* ??? For now, stop if any visited block has more than one
+ predecessor. */
+ if (!single_pred_p (bb))
+ return 0;
+
+ bb = single_pred (bb);
+
+ for (gphi_iterator gpi = gsi_start_phis (bbs[1]);
+ !gsi_end_p (gpi); gsi_next (&gpi))
+ /* ??? For now, stop if we find any intervening vdefs. */
+ if (gimple_vdef (gsi_stmt (gpi)))
+ return 0;
+
+ for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
+ !gsi_end_p (gsi); gsi_next (&gsi))
+ /* ??? For now, stop if we find any intervening vdefs. */
+ if (gimple_vdef (gsi_stmt (gsi)))
+ return 0;
+ }
+
+ return ret;
+}
+
/* Find ways of folding logical expressions of LHS and RHS:
Try to merge two comparisons to the same innermost item.
Look for range tests like "ch >= '0' && ch <= '9'".
@@ -7629,6 +7766,8 @@ fold_truth_andor_maybe_separate (location_t loc,
enum tree_code orig_code = code;
enum tree_code wanted_code;
tree ll_inner, lr_inner, rl_inner, rr_inner;
+ gimple *ll_load, *lr_load, *rl_load, *rr_load;
+ int l_mergeable, r_mergeable;
HOST_WIDE_INT ll_bitsize, ll_bitpos, lr_bitsize, lr_bitpos;
HOST_WIDE_INT rl_bitsize, rl_bitpos, rr_bitsize, rr_bitpos;
HOST_WIDE_INT xll_bitpos, xlr_bitpos, xrl_bitpos, xrr_bitpos;
@@ -7693,20 +7832,24 @@ fold_truth_andor_maybe_separate (location_t loc,
ll_inner = decode_field_reference (loc, &ll_arg,
&ll_bitsize, &ll_bitpos, &ll_mode,
&ll_unsignedp, &ll_reversep, &volatilep,
- &ll_mask, &ll_and_mask, l_xor);
+ &ll_mask, &ll_and_mask, l_xor,
+ &ll_load);
lr_inner = decode_field_reference (loc, &lr_arg,
&lr_bitsize, &lr_bitpos, &lr_mode,
&lr_unsignedp, &lr_reversep, &volatilep,
- &lr_mask, &lr_and_mask, 2 * l_xor);
+ &lr_mask, &lr_and_mask, 2 * l_xor,
+ &lr_load);
int r_xor = prepare_xor (rl_arg, &rr_arg);
rl_inner = decode_field_reference (loc, &rl_arg,
&rl_bitsize, &rl_bitpos, &rl_mode,
&rl_unsignedp, &rl_reversep, &volatilep,
- &rl_mask, &rl_and_mask, r_xor);
+ &rl_mask, &rl_and_mask, r_xor,
+ &rl_load);
rr_inner = decode_field_reference (loc, &rr_arg,
&rr_bitsize, &rr_bitpos, &rr_mode,
&rr_unsignedp, &rr_reversep, &volatilep,
- &rr_mask, &rr_and_mask, 2 * r_xor);
+ &rr_mask, &rr_and_mask, 2 * r_xor,
+ &rr_load);
/* It must be true that the inner operation on the lhs of each
comparison must be the same if we are to be able to do anything.
@@ -7715,7 +7858,8 @@ fold_truth_andor_maybe_separate (location_t loc,
if (volatilep
|| ll_reversep != rl_reversep
|| ll_inner == 0 || rl_inner == 0
- || ! operand_equal_p (ll_inner, rl_inner, 0))
+ || ! operand_equal_p (ll_inner, rl_inner, 0)
+ || ! (l_mergeable = mergeable_loads_p (ll_load, rl_load)))
return 0;
if (TREE_CODE (lr_arg) == INTEGER_CST
@@ -7723,13 +7867,18 @@ fold_truth_andor_maybe_separate (location_t loc,
{
l_const = lr_arg, r_const = rr_arg;
lr_reversep = ll_reversep;
+ r_mergeable = 0;
}
else if (lr_reversep != rr_reversep
|| lr_inner == 0 || rr_inner == 0
- || ! operand_equal_p (lr_inner, rr_inner, 0))
+ || ! operand_equal_p (lr_inner, rr_inner, 0)
+ || ! (r_mergeable = mergeable_loads_p (lr_load, rr_load)))
return 0;
else
- l_const = r_const = 0;
+ {
+ l_const = r_const = 0;
+ r_mergeable = 0;
+ }
if (lsignbit)
{
@@ -8035,18 +8184,38 @@ fold_truth_andor_maybe_separate (location_t loc,
{
bitpos[1][0] = rnbitpos;
bitsiz[1][0] = rnbitsize;
- ld_arg[1][0] = make_bit_field_ref (loc, lr_inner, lr_arg,
- rntype, rnbitsize, rnbitpos,
- lr_unsignedp || rr_unsignedp,
- lr_reversep);
+ ld_arg[1][0] = make_bit_field_load (loc, lr_inner, lr_arg,
+ rntype, rnbitsize, rnbitpos,
+ lr_unsignedp || rr_unsignedp,
+ lr_reversep,
+ r_mergeable == 0
+ ? NULL
+ : r_mergeable < 0
+ ? lr_load
+ : rr_load);
}
if (parts > 1)
{
if (r_split_load)
- build_split_load (ld_arg[1], bitpos[1], bitsiz[1], toshift[1],
- shifted[1], loc, lr_inner, lr_arg,
- rnmode, rnmode2, rnbitpos, lr_reversep);
+ {
+ gimple *point[2];
+ if (r_mergeable == 0)
+ point[0] = point[1] = NULL;
+ else if (r_mergeable < 0)
+ {
+ point[0] = lr_load;
+ point[1] = rr_load;
+ }
+ else
+ {
+ point[0] = rr_load;
+ point[1] = lr_load;
+ }
+ build_split_load (ld_arg[1], bitpos[1], bitsiz[1], toshift[1],
+ shifted[1], loc, lr_inner, lr_arg,
+ rnmode, rnmode2, rnbitpos, lr_reversep, point);
+ }
else
reuse_split_load (ld_arg[1], bitpos[1], bitsiz[1], toshift[1],
shifted[1], xmask[1],
@@ -8118,18 +8287,38 @@ fold_truth_andor_maybe_separate (location_t loc,
{
bitpos[0][0] = lnbitpos;
bitsiz[0][0] = lnbitsize;
- ld_arg[0][0] = make_bit_field_ref (loc, ll_inner, ll_arg,
- lntype, lnbitsize, lnbitpos,
- ll_unsignedp || rl_unsignedp,
- ll_reversep);
+ ld_arg[0][0] = make_bit_field_load (loc, ll_inner, ll_arg,
+ lntype, lnbitsize, lnbitpos,
+ ll_unsignedp || rl_unsignedp,
+ ll_reversep,
+ l_mergeable == 0
+ ? NULL
+ : l_mergeable < 0
+ ? ll_load
+ : rl_load);
}
if (parts > 1)
{
if (l_split_load)
- build_split_load (ld_arg[0], bitpos[0], bitsiz[0], toshift[0],
- shifted[0], loc, ll_inner, ll_arg,
- lnmode, lnmode2, lnbitpos, ll_reversep);
+ {
+ gimple *point[2];
+ if (l_mergeable == 0)
+ point[0] = point[1] = NULL;
+ else if (l_mergeable < 0)
+ {
+ point[0] = ll_load;
+ point[1] = rl_load;
+ }
+ else
+ {
+ point[0] = rl_load;
+ point[1] = ll_load;
+ }
+ build_split_load (ld_arg[0], bitpos[0], bitsiz[0], toshift[0],
+ shifted[0], loc, ll_inner, ll_arg,
+ lnmode, lnmode2, lnbitpos, ll_reversep, point);
+ }
else
reuse_split_load (ld_arg[0], bitpos[0], bitsiz[0], toshift[0],
shifted[0], xmask[0],
^ permalink raw reply [flat|nested] 9+ messages in thread
* [gcc(refs/users/aoliva/heads/testme)] check for mergeable loads, choose insertion points accordingly
@ 2023-07-27 8:28 Alexandre Oliva
0 siblings, 0 replies; 9+ messages in thread
From: Alexandre Oliva @ 2023-07-27 8:28 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:ae97b6fb7279db3dd83041e6ea9d9be296d5a3aa
commit ae97b6fb7279db3dd83041e6ea9d9be296d5a3aa
Author: Alexandre Oliva <oliva@gnu.org>
Date: Thu Jul 27 05:15:20 2023 -0300
check for mergeable loads, choose insertion points accordingly
Diff:
---
gcc/gimple-fold.cc | 259 +++++++++++++++++++++++++++++++++++++++++++++--------
1 file changed, 224 insertions(+), 35 deletions(-)
diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
index f65922309fb..03fd402a99f 100644
--- a/gcc/gimple-fold.cc
+++ b/gcc/gimple-fold.cc
@@ -69,6 +69,7 @@ along with GCC; see the file COPYING3. If not see
#include "varasm.h"
#include "internal-fn.h"
#include "gimple-range.h"
+#include "tree-ssa-loop-inter.h" // stmt_dominats_stmt_p
/* ??? Move this to some header, it's defined in fold-const.c. */
extern tree
@@ -7080,7 +7081,7 @@ maybe_fold_comparisons_from_match_pd (tree type, enum tree_code code,
Same as ssa_is_replaceable_p, except that we don't insist it has a
single use. */
-bool
+static bool
ssa_is_substitutable_p (gimple *stmt)
{
#if 0
@@ -7161,9 +7162,10 @@ is_cast_p (tree *name)
if (gimple_num_ops (def) != 2)
break;
- if (get_gimple_rhs_class (gimple_expr_code (def))
- == GIMPLE_SINGLE_RHS)
+ if (gimple_assign_single_p (def))
{
+ if (gimple_assign_load_p (def))
+ break;
*name = gimple_assign_rhs1 (def);
continue;
}
@@ -7200,8 +7202,7 @@ is_binop_p (enum tree_code code, tree *name)
return 0;
case 2:
- if (get_gimple_rhs_class (gimple_expr_code (def))
- == GIMPLE_SINGLE_RHS)
+ if (gimple_assign_single_p (def) && !gimple_assign_load_p (def))
{
*name = gimple_assign_rhs1 (def);
continue;
@@ -7209,7 +7210,7 @@ is_binop_p (enum tree_code code, tree *name)
return 0;
case 3:
- ;
+ break;
}
if (gimple_assign_rhs_code (def) != code)
@@ -7254,6 +7255,26 @@ prepare_xor (tree l_arg, tree *r_arg)
return ret;
}
+/* If EXP is a SSA_NAME whose DEF is a load stmt, set *LOAD to it and
+ return its RHS, otherwise return EXP. */
+
+static tree
+follow_load (tree exp, gimple **load)
+{
+ if (TREE_CODE (exp) == SSA_NAME
+ && !SSA_NAME_IS_DEFAULT_DEF (exp))
+ {
+ gimple *def = SSA_NAME_DEF_STMT (exp);
+ if (gimple_assign_load_p (def))
+ {
+ *load = def;
+ gimple_assign_rhs1 (def);
+ }
+ }
+
+ return exp;
+}
+
/* Subroutine for fold_truth_andor_1: decode a field reference.
If EXP is a comparison reference, we return the innermost reference.
@@ -7280,6 +7301,9 @@ prepare_xor (tree l_arg, tree *r_arg)
BIT_XOR_EXPR compared with zero. We're to take the first or second
operand thereof if so. It should be zero otherwise.
+ *LOAD is set to the load stmt of the innermost reference, if any,
+ *and NULL otherwise.
+
Return 0 if this is not a component reference or is one that we can't
do anything with. */
@@ -7287,7 +7311,8 @@ static tree
decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
HOST_WIDE_INT *pbitpos, machine_mode *pmode,
int *punsignedp, int *preversep, int *pvolatilep,
- tree *pmask, tree *pand_mask, int xor_which)
+ tree *pmask, tree *pand_mask, int xor_which,
+ gimple **load)
{
tree exp = *exp_;
tree outer_type = 0;
@@ -7297,11 +7322,13 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
unsigned int precision;
HOST_WIDE_INT shiftrt = 0;
+ *load = NULL;
+
/* All the optimizations using this function assume integer fields.
There are problems with FP fields since the type_for_size call
below can fail for, e.g., XFmode. */
if (! INTEGRAL_TYPE_P (TREE_TYPE (exp)))
- return 0;
+ return NULL_TREE;
/* We are interested in the bare arrangement of bits, so strip everything
that doesn't affect the machine mode. However, record the type of the
@@ -7311,7 +7338,7 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
if ((and_mask = is_binop_p (BIT_AND_EXPR, &exp)))
{
if (TREE_CODE (and_mask) != INTEGER_CST)
- return 0;
+ return NULL_TREE;
}
if (xor_which)
@@ -7329,16 +7356,18 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
if (tree shift = is_binop_p (RSHIFT_EXPR, &exp))
{
if (TREE_CODE (shift) != INTEGER_CST || !tree_fits_shwi_p (shift))
- return 0;
+ return NULL_TREE;
shiftrt = tree_to_shwi (shift);
if (shiftrt <= 0)
- return 0;
+ return NULL_TREE;
}
if (tree t = is_cast_p (&exp))
if (!outer_type)
outer_type = t;
+ exp = follow_load (exp, load);
+
poly_int64 poly_bitsize, poly_bitpos;
inner = get_inner_reference (exp, &poly_bitsize, &poly_bitpos, &offset,
pmode, punsignedp, preversep, pvolatilep);
@@ -7487,6 +7516,32 @@ compute_split_boundary_from_align (HOST_WIDE_INT align,
return boundary;
}
+/* Make a bit_field_ref. If POINT is NULL, return the BIT_FIELD_REF.
+ Otherwise, build and insert a load stmt before POINT, and return
+ the SSA_NAME. ??? Rewrite LOAD in terms of the bitfield? */
+
+static tree
+make_bit_field_load (location_t loc, tree inner, tree orig_inner, tree type,
+ HOST_WIDE_INT bitsize, poly_int64 bitpos,
+ int unsignedp, int reversep, gimple *point)
+{
+ tree ref = make_bit_field_ref (loc, inner, orig_inner,
+ type, bitsize, bitpos,
+ unsignedp, reversep);
+ if (!point)
+ return ref;
+
+ tree tmp = make_ssa_name (TREE_TYPE (ref));
+ gimple_stmt_iterator gsi = gsi_for_stmt (r_mergeable < 0
+ ? lr_load
+ : rr_load);
+ gsi_insert_before (point,
+ gimple_build_assign (tmp, ref),
+ GSI_NEW_STMT);
+
+ return tmp;
+}
+
/* Initialize ln_arg[0] and ln_arg[1] to a pair of newly-created (at
LOC) loads from INNER (from ORIG_INNER), of modes MODE and MODE2,
respectively, starting at BIT_POS, using reversed endianness if
@@ -7506,7 +7561,8 @@ build_split_load (tree /* out */ ln_arg[2],
HOST_WIDE_INT /* out */ shifted[2],
location_t loc, tree inner, tree orig_inner,
scalar_int_mode mode, scalar_int_mode mode2,
- HOST_WIDE_INT bit_pos, bool reversep)
+ HOST_WIDE_INT bit_pos, bool reversep,
+ gimple *point[2])
{
bitsiz[0] = GET_MODE_BITSIZE (mode);
bitsiz[1] = GET_MODE_BITSIZE (mode2);
@@ -7515,9 +7571,9 @@ build_split_load (tree /* out */ ln_arg[2],
{
tree type = lang_hooks.types.type_for_size (bitsiz[i], 1);
bitpos[i] = bit_pos;
- ln_arg[i] = make_bit_field_ref (loc, inner, orig_inner,
+ ln_arg[i] = make_bit_field_load (loc, inner, orig_inner,
type, bitsiz[i],
- bit_pos, 1, reversep);
+ bit_pos, 1, reversep, point[i]);
bit_pos += bitsiz[i];
}
@@ -7578,6 +7634,87 @@ reuse_split_load (tree /* in[0] out[1] */ ln_arg[2],
}
}
+/* Return nonzero if LSTMT and RSTMT, assumed to load from the same
+ words, can be safely merged into a single load. Return zero if
+ there are intervening stores, or if neither stmt dominates the
+ other. If they are mergeable, return -1 if the merged load should
+ be inserted before LSTMT to dominate both, otherwise return +1. */
+
+static int
+mergeable_loads_p (gimple *lstmt, gimple *rstmt)
+{
+ gimple *stmts[2] = { lstmt, rstmt };
+ int ret;
+
+ if (stmt_dominates_stmt_p (lstmt, rstmt))
+ {
+ ret = -1;
+ stmts[0] = lstmt;
+ stmts[1] = rstmt;
+ }
+ else if (stmt_dominates_stmt_p (rstmt, lstmt))
+ {
+ ret = 1;
+ stmts[1] = lstmt;
+ stmts[0] = rstmt;
+ }
+ else
+ return 0;
+
+ basic_block bbs[2] = { gimple_bb (stmts[0]), gimple_bb (stmts[1]) };
+ if (bbs[0] == bbs[1])
+ {
+ for (gimple_stmt_iterator gsi = gsi_for_stmt (stmts[0]);
+ gsi_stmt (gsi) != stmts[1]; gsi_next (&gsi))
+ /* ??? For now, stop if we find any intervening vdefs. */
+ if (gimple_vdef (gsi_stmt (gsi)))
+ return 0;
+ return ret;
+ }
+
+ for (gimple_stmt_iterator gsi = gsi_for_stmt (stmts[0]);
+ !gsi_end_p (gsi); gsi_next (&gsi))
+ /* ??? For now, stop if we find any intervening vdefs. */
+ if (gimple_vdef (gsi_stmt (gsi)))
+ return 0;
+
+ for (gphi_iterator gpi = gsi_start_phis (bbs[1]);
+ !gsi_end_p (gpi); gsi_next (&gpi))
+ /* ??? For now, stop if we find any intervening vdefs. */
+ if (gimple_vdef (gsi_stmt (gpi)))
+ return 0;
+
+ for (gimple_stmt_iterator gsi = gsi_for_stmt (stmts[1]);
+ !gsi_end_p (gsi); gsi_prev (&gsi))
+ /* ??? For now, stop if we find any intervening vdefs. */
+ if (gimple_vdef (gsi_stmt (gsi)))
+ return 0;
+
+ for (basic_block bb = bbs[1]; bb != bbs[0]; )
+ {
+ /* ??? For now, stop if any visited block has more than one
+ predecessor. */
+ if (!single_pred_p (bb))
+ return 0;
+
+ bb = single_pred (bb);
+
+ for (gphi_iterator gpi = gsi_start_phis (bbs[1]);
+ !gsi_end_p (gpi); gsi_next (&gpi))
+ /* ??? For now, stop if we find any intervening vdefs. */
+ if (gimple_vdef (gsi_stmt (gpi)))
+ return 0;
+
+ for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
+ !gsi_end_p (gsi); gsi_next (&gsi))
+ /* ??? For now, stop if we find any intervening vdefs. */
+ if (gimple_vdef (gsi_stmt (gsi)))
+ return 0;
+ }
+
+ return ret;
+}
+
/* Find ways of folding logical expressions of LHS and RHS:
Try to merge two comparisons to the same innermost item.
Look for range tests like "ch >= '0' && ch <= '9'".
@@ -7629,6 +7766,8 @@ fold_truth_andor_maybe_separate (location_t loc,
enum tree_code orig_code = code;
enum tree_code wanted_code;
tree ll_inner, lr_inner, rl_inner, rr_inner;
+ gimple *ll_load, *lr_load, *rl_load, *rr_load;
+ int l_mergeable, r_mergeable;
HOST_WIDE_INT ll_bitsize, ll_bitpos, lr_bitsize, lr_bitpos;
HOST_WIDE_INT rl_bitsize, rl_bitpos, rr_bitsize, rr_bitpos;
HOST_WIDE_INT xll_bitpos, xlr_bitpos, xrl_bitpos, xrr_bitpos;
@@ -7693,20 +7832,24 @@ fold_truth_andor_maybe_separate (location_t loc,
ll_inner = decode_field_reference (loc, &ll_arg,
&ll_bitsize, &ll_bitpos, &ll_mode,
&ll_unsignedp, &ll_reversep, &volatilep,
- &ll_mask, &ll_and_mask, l_xor);
+ &ll_mask, &ll_and_mask, l_xor,
+ &ll_load);
lr_inner = decode_field_reference (loc, &lr_arg,
&lr_bitsize, &lr_bitpos, &lr_mode,
&lr_unsignedp, &lr_reversep, &volatilep,
- &lr_mask, &lr_and_mask, 2 * l_xor);
+ &lr_mask, &lr_and_mask, 2 * l_xor,
+ &lr_load);
int r_xor = prepare_xor (rl_arg, &rr_arg);
rl_inner = decode_field_reference (loc, &rl_arg,
&rl_bitsize, &rl_bitpos, &rl_mode,
&rl_unsignedp, &rl_reversep, &volatilep,
- &rl_mask, &rl_and_mask, r_xor);
+ &rl_mask, &rl_and_mask, r_xor,
+ &rl_load);
rr_inner = decode_field_reference (loc, &rr_arg,
&rr_bitsize, &rr_bitpos, &rr_mode,
&rr_unsignedp, &rr_reversep, &volatilep,
- &rr_mask, &rr_and_mask, 2 * r_xor);
+ &rr_mask, &rr_and_mask, 2 * r_xor,
+ &rr_load);
/* It must be true that the inner operation on the lhs of each
comparison must be the same if we are to be able to do anything.
@@ -7715,7 +7858,8 @@ fold_truth_andor_maybe_separate (location_t loc,
if (volatilep
|| ll_reversep != rl_reversep
|| ll_inner == 0 || rl_inner == 0
- || ! operand_equal_p (ll_inner, rl_inner, 0))
+ || ! operand_equal_p (ll_inner, rl_inner, 0)
+ || ! (l_mergeable = mergeable_loads_p (ll_load, rl_load)))
return 0;
if (TREE_CODE (lr_arg) == INTEGER_CST
@@ -7723,13 +7867,18 @@ fold_truth_andor_maybe_separate (location_t loc,
{
l_const = lr_arg, r_const = rr_arg;
lr_reversep = ll_reversep;
+ r_mergeable = 0;
}
else if (lr_reversep != rr_reversep
|| lr_inner == 0 || rr_inner == 0
- || ! operand_equal_p (lr_inner, rr_inner, 0))
+ || ! operand_equal_p (lr_inner, rr_inner, 0)
+ || ! (r_mergeable = mergeable_loads_p (lr_load, rr_load)))
return 0;
else
- l_const = r_const = 0;
+ {
+ l_const = r_const = 0;
+ r_mergeable = 0;
+ }
if (lsignbit)
{
@@ -8035,18 +8184,38 @@ fold_truth_andor_maybe_separate (location_t loc,
{
bitpos[1][0] = rnbitpos;
bitsiz[1][0] = rnbitsize;
- ld_arg[1][0] = make_bit_field_ref (loc, lr_inner, lr_arg,
- rntype, rnbitsize, rnbitpos,
- lr_unsignedp || rr_unsignedp,
- lr_reversep);
+ ld_arg[1][0] = make_bit_field_load (loc, lr_inner, lr_arg,
+ rntype, rnbitsize, rnbitpos,
+ lr_unsignedp || rr_unsignedp,
+ lr_reversep,
+ r_mergeable == 0
+ ? NULL
+ : r_mergeable < 0
+ ? lr_load
+ : rr_load);
}
if (parts > 1)
{
if (r_split_load)
- build_split_load (ld_arg[1], bitpos[1], bitsiz[1], toshift[1],
- shifted[1], loc, lr_inner, lr_arg,
- rnmode, rnmode2, rnbitpos, lr_reversep);
+ {
+ gimple *point[2];
+ if (r_mergeable == 0)
+ point[0] = point[1] = NULL;
+ else if (r_mergeable < 0)
+ {
+ point[0] = lr_load;
+ point[1] = rr_load;
+ }
+ else
+ {
+ point[0] = rr_load;
+ point[1] = lr_load;
+ }
+ build_split_load (ld_arg[1], bitpos[1], bitsiz[1], toshift[1],
+ shifted[1], loc, lr_inner, lr_arg,
+ rnmode, rnmode2, rnbitpos, lr_reversep, point);
+ }
else
reuse_split_load (ld_arg[1], bitpos[1], bitsiz[1], toshift[1],
shifted[1], xmask[1],
@@ -8118,18 +8287,38 @@ fold_truth_andor_maybe_separate (location_t loc,
{
bitpos[0][0] = lnbitpos;
bitsiz[0][0] = lnbitsize;
- ld_arg[0][0] = make_bit_field_ref (loc, ll_inner, ll_arg,
- lntype, lnbitsize, lnbitpos,
- ll_unsignedp || rl_unsignedp,
- ll_reversep);
+ ld_arg[0][0] = make_bit_field_load (loc, ll_inner, ll_arg,
+ lntype, lnbitsize, lnbitpos,
+ ll_unsignedp || rl_unsignedp,
+ ll_reversep,
+ l_mergeable == 0
+ ? NULL
+ : l_mergeable < 0
+ ? ll_load
+ : rl_load);
}
if (parts > 1)
{
if (l_split_load)
- build_split_load (ld_arg[0], bitpos[0], bitsiz[0], toshift[0],
- shifted[0], loc, ll_inner, ll_arg,
- lnmode, lnmode2, lnbitpos, ll_reversep);
+ {
+ gimple *point[2];
+ if (l_mergeable == 0)
+ point[0] = point[1] = NULL;
+ else if (l_mergeable < 0)
+ {
+ point[0] = ll_load;
+ point[1] = rl_load;
+ }
+ else
+ {
+ point[0] = rl_load;
+ point[1] = ll_load;
+ }
+ build_split_load (ld_arg[0], bitpos[0], bitsiz[0], toshift[0],
+ shifted[0], loc, ll_inner, ll_arg,
+ lnmode, lnmode2, lnbitpos, ll_reversep, point);
+ }
else
reuse_split_load (ld_arg[0], bitpos[0], bitsiz[0], toshift[0],
shifted[0], xmask[0],
^ permalink raw reply [flat|nested] 9+ messages in thread
* [gcc(refs/users/aoliva/heads/testme)] check for mergeable loads, choose insertion points accordingly
@ 2023-07-27 8:20 Alexandre Oliva
0 siblings, 0 replies; 9+ messages in thread
From: Alexandre Oliva @ 2023-07-27 8:20 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:f8d184038b84e264f8c756348617a8992721d66f
commit f8d184038b84e264f8c756348617a8992721d66f
Author: Alexandre Oliva <oliva@gnu.org>
Date: Thu Jul 27 05:15:20 2023 -0300
check for mergeable loads, choose insertion points accordingly
Diff:
---
gcc/gimple-fold.cc | 255 +++++++++++++++++++++++++++++++++++++++++++++--------
1 file changed, 220 insertions(+), 35 deletions(-)
diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
index f65922309fb..f1bb8d6e758 100644
--- a/gcc/gimple-fold.cc
+++ b/gcc/gimple-fold.cc
@@ -7080,7 +7080,7 @@ maybe_fold_comparisons_from_match_pd (tree type, enum tree_code code,
Same as ssa_is_replaceable_p, except that we don't insist it has a
single use. */
-bool
+static bool
ssa_is_substitutable_p (gimple *stmt)
{
#if 0
@@ -7161,9 +7161,10 @@ is_cast_p (tree *name)
if (gimple_num_ops (def) != 2)
break;
- if (get_gimple_rhs_class (gimple_expr_code (def))
- == GIMPLE_SINGLE_RHS)
+ if (gimple_assign_single_p (def))
{
+ if (gimple_assign_load_p (def))
+ break;
*name = gimple_assign_rhs1 (def);
continue;
}
@@ -7200,8 +7201,7 @@ is_binop_p (enum tree_code code, tree *name)
return 0;
case 2:
- if (get_gimple_rhs_class (gimple_expr_code (def))
- == GIMPLE_SINGLE_RHS)
+ if (gimple_assign_single_p (def) && !gimple_assign_load_p (def))
{
*name = gimple_assign_rhs1 (def);
continue;
@@ -7209,7 +7209,7 @@ is_binop_p (enum tree_code code, tree *name)
return 0;
case 3:
- ;
+ break;
}
if (gimple_assign_rhs_code (def) != code)
@@ -7254,6 +7254,26 @@ prepare_xor (tree l_arg, tree *r_arg)
return ret;
}
+/* If EXP is a SSA_NAME whose DEF is a load stmt, set *LOAD to it and
+ return its RHS, otherwise return EXP. */
+
+static tree
+follow_load (tree exp, gimple *load)
+{
+ if (TREE_CODE (*name) == SSA_NAME
+ && !SSA_NAME_IS_DEFAULT_DEF (*name))
+ {
+ gimple *def = SSA_NAME_DEF_STMT (*name);
+ if (is_gimple_load_p (def))
+ {
+ *load = def;
+ gimple_assign_rhs1 (def);
+ }
+ }
+
+ return exp;
+}
+
/* Subroutine for fold_truth_andor_1: decode a field reference.
If EXP is a comparison reference, we return the innermost reference.
@@ -7280,6 +7300,9 @@ prepare_xor (tree l_arg, tree *r_arg)
BIT_XOR_EXPR compared with zero. We're to take the first or second
operand thereof if so. It should be zero otherwise.
+ *LOAD is set to the load stmt of the innermost reference, if any,
+ *and NULL otherwise.
+
Return 0 if this is not a component reference or is one that we can't
do anything with. */
@@ -7287,7 +7310,8 @@ static tree
decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
HOST_WIDE_INT *pbitpos, machine_mode *pmode,
int *punsignedp, int *preversep, int *pvolatilep,
- tree *pmask, tree *pand_mask, int xor_which)
+ tree *pmask, tree *pand_mask, int xor_which,
+ gimple **load)
{
tree exp = *exp_;
tree outer_type = 0;
@@ -7297,11 +7321,13 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
unsigned int precision;
HOST_WIDE_INT shiftrt = 0;
+ *load = NULL;
+
/* All the optimizations using this function assume integer fields.
There are problems with FP fields since the type_for_size call
below can fail for, e.g., XFmode. */
if (! INTEGRAL_TYPE_P (TREE_TYPE (exp)))
- return 0;
+ return NULL_TREE;
/* We are interested in the bare arrangement of bits, so strip everything
that doesn't affect the machine mode. However, record the type of the
@@ -7311,7 +7337,7 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
if ((and_mask = is_binop_p (BIT_AND_EXPR, &exp)))
{
if (TREE_CODE (and_mask) != INTEGER_CST)
- return 0;
+ return NULL_TREE;
}
if (xor_which)
@@ -7329,16 +7355,18 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
if (tree shift = is_binop_p (RSHIFT_EXPR, &exp))
{
if (TREE_CODE (shift) != INTEGER_CST || !tree_fits_shwi_p (shift))
- return 0;
+ return NULL_TREE;
shiftrt = tree_to_shwi (shift);
if (shiftrt <= 0)
- return 0;
+ return NULL_TREE;
}
if (tree t = is_cast_p (&exp))
if (!outer_type)
outer_type = t;
+ exp = follow_load (exp, load);
+
poly_int64 poly_bitsize, poly_bitpos;
inner = get_inner_reference (exp, &poly_bitsize, &poly_bitpos, &offset,
pmode, punsignedp, preversep, pvolatilep);
@@ -7487,6 +7515,32 @@ compute_split_boundary_from_align (HOST_WIDE_INT align,
return boundary;
}
+/* Make a bit_field_ref. If POINT is NULL, return the BIT_FIELD_REF.
+ Otherwise, build and insert a load stmt before POINT, and return
+ the SSA_NAME. ??? Rewrite LOAD in terms of the bitfield? */
+
+static tree
+make_bit_field_load (location_t loc, tree inner, tree orig_inner, tree type,
+ HOST_WIDE_INT bitsize, poly_int64 bitpos,
+ int unsignedp, int reversep, gimple *point);
+{
+ tree ref = make_bit_field_ref (loc, inner, orig_inner,
+ type, bitsize, bitpos,
+ unsignedp, reversep);
+ if (!point)
+ return ref;
+
+ tree tmp = make_ssa_name (TREE_TYPE (ref));
+ gimple_stmt_iterator gsi = gsi_for_stmt (r_mergeable < 0
+ ? lr_load
+ : rr_load);
+ gsi_insert_before (point,
+ gimple_build_assign (tmp, ref),
+ GSI_NEW_STMT);
+
+ return tmp;
+}
+
/* Initialize ln_arg[0] and ln_arg[1] to a pair of newly-created (at
LOC) loads from INNER (from ORIG_INNER), of modes MODE and MODE2,
respectively, starting at BIT_POS, using reversed endianness if
@@ -7506,7 +7560,8 @@ build_split_load (tree /* out */ ln_arg[2],
HOST_WIDE_INT /* out */ shifted[2],
location_t loc, tree inner, tree orig_inner,
scalar_int_mode mode, scalar_int_mode mode2,
- HOST_WIDE_INT bit_pos, bool reversep)
+ HOST_WIDE_INT bit_pos, bool reversep,
+ gimple *point[2])
{
bitsiz[0] = GET_MODE_BITSIZE (mode);
bitsiz[1] = GET_MODE_BITSIZE (mode2);
@@ -7515,9 +7570,9 @@ build_split_load (tree /* out */ ln_arg[2],
{
tree type = lang_hooks.types.type_for_size (bitsiz[i], 1);
bitpos[i] = bit_pos;
- ln_arg[i] = make_bit_field_ref (loc, inner, orig_inner,
+ ln_arg[i] = make_bit_field_load (loc, inner, orig_inner,
type, bitsiz[i],
- bit_pos, 1, reversep);
+ bit_pos, 1, reversep, point[i]);
bit_pos += bitsiz[i];
}
@@ -7578,6 +7633,87 @@ reuse_split_load (tree /* in[0] out[1] */ ln_arg[2],
}
}
+/* Return nonzero if LSTMT and RSTMT, assumed to load from the same
+ words, can be safely merged into a single load. Return zero if
+ there are intervening stores, or if neither stmt dominates the
+ other. If they are mergeable, return -1 if the merged load should
+ be inserted before LSTMT to dominate both, otherwise return +1. */
+
+static int
+mergeable_loads_p (gimple *lstmt, gimple *rstmt)
+{
+ gimple *stmts[2] = { lstmt, rstmt };
+ int ret;
+
+ if (stmt_dominates_stmt_p (lstmt, rstmt))
+ {
+ ret = -1;
+ stmts[0] = lstmt;
+ stmts[1] = rstmt;
+ }
+ else if (stmt_dominates_stmt_p (rstmt, lstmt))
+ {
+ ret = 1;
+ stmts[1] = lstmt;
+ stmts[0] = rstmt;
+ }
+ else
+ return 0;
+
+ basic_block bbs[2] = { gimple_bb (stmts[0]), gimple_bb (stmts[1]) };
+ if (bbs[0] == bbs[1])
+ {
+ for (gimple_stmt_iterator gsi = gsi_for_stmt (stmts[0]);
+ gsi_stmt (gsi) != stmts[1]; gsi_next (&gsi))
+ /* ??? For now, stop if we find any intervening vdefs. */
+ if (gimple_vdef (gsi_stmt (gsi)))
+ return 0;
+ return ret;
+ }
+
+ for (gimple_stmt_iterator gsi = gsi_for_stmt (stmts[0]);
+ !gsi_end_p (gsi); gsi_next (&gsi))
+ /* ??? For now, stop if we find any intervening vdefs. */
+ if (gimple_vdef (gsi_stmt (gsi)))
+ return 0;
+
+ for (gphi_iterator gpi = gsi_start_phis (bbs[1]);
+ !gsi_end_p (gpi); gsi_next (&gpi))
+ /* ??? For now, stop if we find any intervening vdefs. */
+ if (gimple_vdef (gsi_stmt (gsi)))
+ return 0;
+
+ for (gimple_stmt_iterator gsi = gsi_for_stmt (stmts[1]);
+ !gsi_end_p (gsi); gsi_prev (&gsi))
+ /* ??? For now, stop if we find any intervening vdefs. */
+ if (gimple_vdef (gsi_stmt (gsi)))
+ return 0;
+
+ for (basic_block bb = bbs[1]; bb != bb[0];)
+ {
+ /* ??? For now, stop if any visited block has more than one
+ predecessor. */
+ if (!single_pred_p (bb))
+ return 0;
+
+ bb = single_pred (bb);
+
+ for (gphi_iterator gpi = gsi_start_phis (bbs[1]);
+ !gsi_end_p (gpi); gsi_next (&gpi))
+ /* ??? For now, stop if we find any intervening vdefs. */
+ if (gimple_vdef (gsi_stmt (gsi)))
+ return 0;
+
+ for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
+ !gsi_end_p (gsi); gsi_next (&gsi))
+ /* ??? For now, stop if we find any intervening vdefs. */
+ if (gimple_vdef (gsi_stmt (gsi)))
+ return 0;
+ }
+
+ return ret;
+}
+
/* Find ways of folding logical expressions of LHS and RHS:
Try to merge two comparisons to the same innermost item.
Look for range tests like "ch >= '0' && ch <= '9'".
@@ -7629,6 +7765,8 @@ fold_truth_andor_maybe_separate (location_t loc,
enum tree_code orig_code = code;
enum tree_code wanted_code;
tree ll_inner, lr_inner, rl_inner, rr_inner;
+ gimple *ll_load, *lr_load, *rl_load, *rr_load;
+ int l_mergeable, r_mergeable;
HOST_WIDE_INT ll_bitsize, ll_bitpos, lr_bitsize, lr_bitpos;
HOST_WIDE_INT rl_bitsize, rl_bitpos, rr_bitsize, rr_bitpos;
HOST_WIDE_INT xll_bitpos, xlr_bitpos, xrl_bitpos, xrr_bitpos;
@@ -7693,20 +7831,24 @@ fold_truth_andor_maybe_separate (location_t loc,
ll_inner = decode_field_reference (loc, &ll_arg,
&ll_bitsize, &ll_bitpos, &ll_mode,
&ll_unsignedp, &ll_reversep, &volatilep,
- &ll_mask, &ll_and_mask, l_xor);
+ &ll_mask, &ll_and_mask, l_xor,
+ &ll_load);
lr_inner = decode_field_reference (loc, &lr_arg,
&lr_bitsize, &lr_bitpos, &lr_mode,
&lr_unsignedp, &lr_reversep, &volatilep,
- &lr_mask, &lr_and_mask, 2 * l_xor);
+ &lr_mask, &lr_and_mask, 2 * l_xor,
+ &lr_load);
int r_xor = prepare_xor (rl_arg, &rr_arg);
rl_inner = decode_field_reference (loc, &rl_arg,
&rl_bitsize, &rl_bitpos, &rl_mode,
&rl_unsignedp, &rl_reversep, &volatilep,
- &rl_mask, &rl_and_mask, r_xor);
+ &rl_mask, &rl_and_mask, r_xor.
+ &rl_load);
rr_inner = decode_field_reference (loc, &rr_arg,
&rr_bitsize, &rr_bitpos, &rr_mode,
&rr_unsignedp, &rr_reversep, &volatilep,
- &rr_mask, &rr_and_mask, 2 * r_xor);
+ &rr_mask, &rr_and_mask, 2 * r_xor,
+ &rr_load);
/* It must be true that the inner operation on the lhs of each
comparison must be the same if we are to be able to do anything.
@@ -7715,7 +7857,8 @@ fold_truth_andor_maybe_separate (location_t loc,
if (volatilep
|| ll_reversep != rl_reversep
|| ll_inner == 0 || rl_inner == 0
- || ! operand_equal_p (ll_inner, rl_inner, 0))
+ || ! operand_equal_p (ll_inner, rl_inner, 0)
+ || ! (l_mergeable = mergeable_loads_p (ll_load, rl_load)))
return 0;
if (TREE_CODE (lr_arg) == INTEGER_CST
@@ -7723,13 +7866,15 @@ fold_truth_andor_maybe_separate (location_t loc,
{
l_const = lr_arg, r_const = rr_arg;
lr_reversep = ll_reversep;
+ r_mergeable = 0;
}
else if (lr_reversep != rr_reversep
|| lr_inner == 0 || rr_inner == 0
- || ! operand_equal_p (lr_inner, rr_inner, 0))
+ || ! operand_equal_p (lr_inner, rr_inner, 0)
+ || ! (r_mergeable = mergeable_loads_p (lr_load, rr_load)))
return 0;
else
- l_const = r_const = 0;
+ l_const = r_const = r_mergeable = 0;
if (lsignbit)
{
@@ -8035,18 +8180,38 @@ fold_truth_andor_maybe_separate (location_t loc,
{
bitpos[1][0] = rnbitpos;
bitsiz[1][0] = rnbitsize;
- ld_arg[1][0] = make_bit_field_ref (loc, lr_inner, lr_arg,
- rntype, rnbitsize, rnbitpos,
- lr_unsignedp || rr_unsignedp,
- lr_reversep);
+ ld_arg[1][0] = make_bit_field_load (loc, lr_inner, lr_arg,
+ rntype, rnbitsize, rnbitpos,
+ lr_unsignedp || rr_unsignedp,
+ lr_reversep,
+ r_mergeable == 0
+ ? NULL
+ : r_mergeable < 0
+ ? lr_load
+ : rr_load);
}
if (parts > 1)
{
if (r_split_load)
- build_split_load (ld_arg[1], bitpos[1], bitsiz[1], toshift[1],
- shifted[1], loc, lr_inner, lr_arg,
- rnmode, rnmode2, rnbitpos, lr_reversep);
+ {
+ gimple *point[2];
+ if (r_mergeable == 0)
+ point[0] = point[1] = NULL;
+ else if (r_mergeable < 0)
+ {
+ point[0] = lr_load;
+ point[1] = rr_load;
+ }
+ else
+ {
+ point[0] = rr_load;
+ point[1] = lr_load;
+ }
+ build_split_load (ld_arg[1], bitpos[1], bitsiz[1], toshift[1],
+ shifted[1], loc, lr_inner, lr_arg,
+ rnmode, rnmode2, rnbitpos, lr_reversep, point);
+ }
else
reuse_split_load (ld_arg[1], bitpos[1], bitsiz[1], toshift[1],
shifted[1], xmask[1],
@@ -8118,18 +8283,38 @@ fold_truth_andor_maybe_separate (location_t loc,
{
bitpos[0][0] = lnbitpos;
bitsiz[0][0] = lnbitsize;
- ld_arg[0][0] = make_bit_field_ref (loc, ll_inner, ll_arg,
- lntype, lnbitsize, lnbitpos,
- ll_unsignedp || rl_unsignedp,
- ll_reversep);
+ ld_arg[0][0] = make_bit_field_load (loc, ll_inner, ll_arg,
+ lntype, lnbitsize, lnbitpos,
+ ll_unsignedp || rl_unsignedp,
+ ll_reversep,
+ l_mergeable == 0
+ ? NULL
+ : l_mergeable < 0
+ ? ll_load
+ : rl_load);
}
if (parts > 1)
{
if (l_split_load)
- build_split_load (ld_arg[0], bitpos[0], bitsiz[0], toshift[0],
- shifted[0], loc, ll_inner, ll_arg,
- lnmode, lnmode2, lnbitpos, ll_reversep);
+ {
+ gimple *point[2];
+ if (l_mergeable == 0)
+ point[0] = point[1] = NULL;
+ else if (l_mergeable < 0)
+ {
+ point[0] = ll_load;
+ point[1] = rl_load;
+ }
+ else
+ {
+ point[0] = rl_load;
+ point[1] = ll_load;
+ }
+ build_split_load (ld_arg[0], bitpos[0], bitsiz[0], toshift[0],
+ shifted[0], loc, ll_inner, ll_arg,
+ lnmode, lnmode2, lnbitpos, ll_reversep, point);
+ }
else
reuse_split_load (ld_arg[0], bitpos[0], bitsiz[0], toshift[0],
shifted[0], xmask[0],
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2023-08-18 3:55 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-07-27 9:22 [gcc(refs/users/aoliva/heads/testme)] check for mergeable loads, choose insertion points accordingly Alexandre Oliva
-- strict thread matches above, loose matches on Subject: below --
2023-08-18 3:55 Alexandre Oliva
2023-07-27 12:20 Alexandre Oliva
2023-07-27 11:52 Alexandre Oliva
2023-07-27 11:08 Alexandre Oliva
2023-07-27 9:05 Alexandre Oliva
2023-07-27 8:59 Alexandre Oliva
2023-07-27 8:28 Alexandre Oliva
2023-07-27 8:20 Alexandre Oliva
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).