public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/users/aoliva/heads/testme)] improvements for fold_truth_andor_1
@ 2020-09-15 16:44 Alexandre Oliva
0 siblings, 0 replies; 8+ messages in thread
From: Alexandre Oliva @ 2020-09-15 16:44 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:2375252d9c19dff1638efdd27064c2e5402290e2
commit 2375252d9c19dff1638efdd27064c2e5402290e2
Author: Alexandre Oliva <oliva@adacore.com>
Date: Tue Sep 15 13:32:07 2020 -0300
improvements for fold_truth_andor_1
Enable combining with the rightmost non-and/or, however deeply-nested.
Handle shift-and-mask.
Handle fields that cross alignment boundaries, when either part can be
combined.
Diff:
---
gcc/fold-const.c | 203 ++++++++++++++++++++++++++++++++++++++++++++++++-------
1 file changed, 178 insertions(+), 25 deletions(-)
diff --git a/gcc/fold-const.c b/gcc/fold-const.c
index 0cc80adf632..e91e7778999 100644
--- a/gcc/fold-const.c
+++ b/gcc/fold-const.c
@@ -4640,6 +4640,7 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
tree mask, inner, offset;
tree unsigned_type;
unsigned int precision;
+ HOST_WIDE_INT shiftrt = 0;
/* All the optimizations using this function assume integer fields.
There are problems with FP fields since the type_for_size call
@@ -4664,13 +4665,28 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
return NULL_TREE;
}
+ if (TREE_CODE (exp) == RSHIFT_EXPR
+ && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST
+ && tree_fits_shwi_p (TREE_OPERAND (exp, 1)))
+ {
+ shiftrt = tree_to_shwi (TREE_OPERAND (exp, 1));
+ if (shiftrt > 0)
+ exp = TREE_OPERAND (exp, 0);
+ else
+ shiftrt = 0;
+ }
+
+ if (TREE_CODE (exp) == NOP_EXPR)
+ exp = TREE_OPERAND (exp, 0);
+
poly_int64 poly_bitsize, poly_bitpos;
inner = get_inner_reference (exp, &poly_bitsize, &poly_bitpos, &offset,
pmode, punsignedp, preversep, pvolatilep);
+
if ((inner == exp && and_mask == 0)
|| !poly_bitsize.is_constant (pbitsize)
|| !poly_bitpos.is_constant (pbitpos)
- || *pbitsize < 0
+ || *pbitsize < shiftrt
|| offset != 0
|| TREE_CODE (inner) == PLACEHOLDER_EXPR
/* Reject out-of-bound accesses (PR79731). */
@@ -4679,6 +4695,12 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
*pbitpos + *pbitsize) < 0))
return NULL_TREE;
+ if (shiftrt)
+ {
+ *pbitpos += shiftrt;
+ *pbitsize -= shiftrt;
+ }
+
unsigned_type = lang_hooks.types.type_for_size (*pbitsize, 1);
if (unsigned_type == NULL_TREE)
return NULL_TREE;
@@ -6157,6 +6179,7 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
convert EQ_EXPR to NE_EXPR so we need not reject the "wrong"
comparison for one-bit fields. */
+ enum tree_code orig_code = code;
enum tree_code wanted_code;
enum tree_code lcode, rcode;
tree ll_arg, lr_arg, rl_arg, rr_arg;
@@ -6168,13 +6191,14 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
int ll_unsignedp, lr_unsignedp, rl_unsignedp, rr_unsignedp;
int ll_reversep, lr_reversep, rl_reversep, rr_reversep;
machine_mode ll_mode, lr_mode, rl_mode, rr_mode;
- scalar_int_mode lnmode, rnmode;
+ scalar_int_mode lnmode, lnmode2, rnmode;
tree ll_mask, lr_mask, rl_mask, rr_mask;
tree ll_and_mask, lr_and_mask, rl_and_mask, rr_and_mask;
tree l_const, r_const;
tree lntype, rntype, result;
HOST_WIDE_INT first_bit, end_bit;
int volatilep;
+ bool resplit_load;
/* Start by getting the comparison codes. Fail if anything is volatile.
If one operand is a BIT_AND_EXPR with the constant one, treat it as if
@@ -6202,7 +6226,48 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
if (TREE_CODE_CLASS (lcode) != tcc_comparison
|| TREE_CODE_CLASS (rcode) != tcc_comparison)
- return 0;
+ {
+ /* Check for the possibility of merging component references.
+ If any of our operands is another similar operation, recurse
+ to try to merge individual operands, but avoiding double
+ recursion: recurse to each leaf of lhs, and from there to
+ each leaf of rhs, but don't bother recursing into lhs if rhs
+ is neither a comparison nor a compound expr, nor into rhs if
+ the lhs leaf isn't a comparison. In case of no successful
+ merging, recursion depth is limited to the sum of the depths
+ of lhs and rhs, and the non-recursing code below will run no
+ more times than the product of the leaf counts of lhs and
+ rhs. If there is a successful merge, we (recursively)
+ further attempt to fold the result, so recursion depth and
+ merge attempts are harder to compute. */
+ if (TREE_CODE (lhs) == code && TREE_TYPE (lhs) == truth_type
+ && (TREE_CODE_CLASS (rcode) == tcc_comparison
+ || (TREE_CODE (rhs) == code && TREE_TYPE (rhs) == truth_type)))
+ {
+ if ((result = fold_truth_andor_1 (loc, code, truth_type,
+ TREE_OPERAND (lhs, 1), rhs)) != 0)
+ return fold_build2_loc (loc, code, truth_type,
+ TREE_OPERAND (lhs, 0), result);
+ if ((result = fold_truth_andor_1 (loc, code, truth_type,
+ TREE_OPERAND (lhs, 0), rhs)) != 0)
+ return fold_build2_loc (loc, code, truth_type,
+ result, TREE_OPERAND (lhs, 1));
+ }
+ else if (TREE_CODE_CLASS (lcode) == tcc_comparison
+ && TREE_CODE (rhs) == code && TREE_TYPE (rhs) == truth_type)
+ {
+ if ((result = fold_truth_andor_1 (loc, code, truth_type, lhs,
+ TREE_OPERAND (rhs, 0))) != 0)
+ return fold_build2_loc (loc, code, truth_type,
+ result, TREE_OPERAND (rhs, 1));
+ if ((result = fold_truth_andor_1 (loc, code, truth_type, lhs,
+ TREE_OPERAND (rhs, 1))) != 0)
+ return fold_build2_loc (loc, code, truth_type,
+ result, TREE_OPERAND (rhs, 0));
+ }
+
+ return 0;
+ }
ll_arg = TREE_OPERAND (lhs, 0);
lr_arg = TREE_OPERAND (lhs, 1);
@@ -6357,10 +6422,48 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
if (!get_best_mode (end_bit - first_bit, first_bit, 0, 0,
TYPE_ALIGN (TREE_TYPE (ll_inner)), BITS_PER_WORD,
volatilep, &lnmode))
- return 0;
+ {
+ /* Consider the possibility of recombining loads if any of the
+ fields straddles across an alignment boundary, so that either
+ part can be loaded along with the other field. */
+ HOST_WIDE_INT align = TYPE_ALIGN (TREE_TYPE (ll_inner));
+ HOST_WIDE_INT amask = ~(align - 1);
+
+ HOST_WIDE_INT boundary = (end_bit - 1) & amask;
+ /* Make sure we're only crossing one alignment boundary.
+
+ ??? This won't recombine loads of two adjacent fields that
+ each crosses a different alignment boundary, so as to load
+ the middle word only once. */
+ if (boundary - first_bit > align)
+ return 0;
+
+ HOST_WIDE_INT ll_start_word = ll_bitpos & amask;
+ HOST_WIDE_INT ll_end_word = (ll_bitpos + ll_bitsize - 1) & amask;
+
+ HOST_WIDE_INT rl_start_word = rl_bitpos & amask;
+ HOST_WIDE_INT rl_end_word = (rl_bitpos + rl_bitsize - 1) & amask;
+
+ /* If neither field straddles across an alignment boundary,
+ we've nothing further ado. */
+ if (ll_start_word == ll_end_word && rl_start_word == rl_end_word)
+ return 0;
+
+ if (!get_best_mode (boundary - first_bit, first_bit, 0, 0,
+ align, BITS_PER_WORD, volatilep, &lnmode)
+ || !get_best_mode (end_bit - boundary, boundary, 0, 0,
+ align, BITS_PER_WORD, volatilep, &lnmode2))
+ return 0;
+
+ resplit_load = true;
+ }
+ else
+ resplit_load = false;
lnbitsize = GET_MODE_BITSIZE (lnmode);
lnbitpos = first_bit & ~ (lnbitsize - 1);
+ if (resplit_load)
+ lnbitsize += GET_MODE_BITSIZE (lnmode2);
lntype = lang_hooks.types.type_for_size (lnbitsize, 1);
xll_bitpos = ll_bitpos - lnbitpos, xrl_bitpos = rl_bitpos - lnbitpos;
@@ -6414,7 +6517,8 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
|| ll_reversep != lr_reversep
/* Make sure the two fields on the right
correspond to the left without being swapped. */
- || ll_bitpos - rl_bitpos != lr_bitpos - rr_bitpos)
+ || ll_bitpos - rl_bitpos != lr_bitpos - rr_bitpos
+ || resplit_load)
return 0;
first_bit = MIN (lr_bitpos, rr_bitpos);
@@ -6555,20 +6659,77 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
if (lnbitpos < 0)
return 0;
- /* Construct the expression we will return. First get the component
- reference we will make. Unless the mask is all ones the width of
- that field, perform the mask operation. Then compare with the
- merged constant. */
- result = make_bit_field_ref (loc, ll_inner, ll_arg,
- lntype, lnbitsize, lnbitpos,
- ll_unsignedp || rl_unsignedp, ll_reversep);
+ if (resplit_load)
+ {
+ HOST_WIDE_INT align = TYPE_ALIGN (TREE_TYPE (ll_inner));
+ HOST_WIDE_INT lnlbitsize = GET_MODE_BITSIZE (lnmode);
+ HOST_WIDE_INT lnrbitsize = GET_MODE_BITSIZE (lnmode2);
+ HOST_WIDE_INT midpoint = lnlbitsize;
+ HOST_WIDE_INT lnrbitpos = lnbitpos + midpoint;
+ HOST_WIDE_INT lnlbitpos = lnrbitpos - lnlbitsize;
+ gcc_checking_assert ((lnrbitpos & (align - 1)) == 0);
+ tree lnltype = lang_hooks.types.type_for_size (lnlbitsize, 1);
+ tree lnrtype = lang_hooks.types.type_for_size (lnrbitsize, 1);
+ tree lll_arg = make_bit_field_ref (loc, ll_inner, ll_arg,
+ lnltype, lnlbitsize, lnlbitpos,
+ ll_unsignedp || rl_unsignedp,
+ ll_reversep);
+ tree llr_arg = make_bit_field_ref (loc, ll_inner, ll_arg,
+ lnrtype, lnrbitsize, lnrbitpos,
+ ll_unsignedp || rl_unsignedp,
+ ll_reversep);
+
+ /* ??? Handle ll_reversep and endianness below? */
+ HOST_WIDE_INT lnlcbitpos = 0;
+ HOST_WIDE_INT lnrcbitpos = lnlbitsize;
+
+ tree mask = const_binop (BIT_IOR_EXPR, ll_mask, rl_mask);
+ tree lll_mask = fold_build3 (BIT_FIELD_REF, lnltype, mask,
+ bitsize_int (lnlbitsize),
+ bitsize_int (lnlcbitpos));
+ if (! integer_all_onesp (lll_mask))
+ lll_arg = fold_build2_loc (loc, BIT_AND_EXPR, lnltype,
+ lll_arg, lll_mask);
+ tree llr_mask = fold_build3 (BIT_FIELD_REF, lnrtype, mask,
+ bitsize_int (lnrbitsize),
+ bitsize_int (lnrcbitpos));
+ if (! integer_all_onesp (llr_mask))
+ llr_arg = fold_build2_loc (loc, BIT_AND_EXPR, lnrtype,
+ llr_arg, llr_mask);
+ tree lnr_const = const_binop (BIT_IOR_EXPR, l_const, r_const);
+ tree lll_const = fold_build3 (BIT_FIELD_REF, lnltype, lnr_const,
+ bitsize_int (lnlbitsize),
+ bitsize_int (lnlcbitpos));
+ tree lll_result = fold_build2_loc (loc, wanted_code, truth_type,
+ lll_arg, lll_const);
+ tree llr_const = fold_build3 (BIT_FIELD_REF, lnrtype, lnr_const,
+ bitsize_int (lnrbitsize),
+ bitsize_int (lnrcbitpos));
+ tree llr_result = fold_build2_loc (loc, wanted_code, truth_type,
+ llr_arg, llr_const);
+ result = build2_loc (loc, orig_code, truth_type,
+ lll_result, llr_result);
+ }
+ else
+ {
- ll_mask = const_binop (BIT_IOR_EXPR, ll_mask, rl_mask);
- if (! all_ones_mask_p (ll_mask, lnbitsize))
- result = build2_loc (loc, BIT_AND_EXPR, lntype, result, ll_mask);
+ /* Construct the expression we will return. First get the component
+ reference we will make. Unless the mask is all ones the width of
+ that field, perform the mask operation. Then compare with the
+ merged constant. */
+ result = make_bit_field_ref (loc, ll_inner, ll_arg,
+ lntype, lnbitsize, lnbitpos,
+ ll_unsignedp || rl_unsignedp, ll_reversep);
+
+ ll_mask = const_binop (BIT_IOR_EXPR, ll_mask, rl_mask);
+ if (! integer_all_onesp (ll_mask))
+ result = build2_loc (loc, BIT_AND_EXPR, lntype, result, ll_mask);
- return build2_loc (loc, wanted_code, truth_type, result,
- const_binop (BIT_IOR_EXPR, l_const, r_const));
+ result = build2_loc (loc, wanted_code, truth_type, result,
+ const_binop (BIT_IOR_EXPR, l_const, r_const));
+ }
+
+ return result;
}
\f
/* T is an integer expression that is being multiplied, divided, or taken a
@@ -9166,14 +9327,6 @@ fold_truth_andor (location_t loc, enum tree_code code, tree type,
return fold_build2_loc (loc, code, type, arg0, tem);
}
- /* Check for the possibility of merging component references. If our
- lhs is another similar operation, try to merge its rhs with our
- rhs. Then try to merge our lhs and rhs. */
- if (TREE_CODE (arg0) == code
- && (tem = fold_truth_andor_1 (loc, code, type,
- TREE_OPERAND (arg0, 1), arg1)) != 0)
- return fold_build2_loc (loc, code, type, TREE_OPERAND (arg0, 0), tem);
-
if ((tem = fold_truth_andor_1 (loc, code, type, arg0, arg1)) != 0)
return tem;
^ permalink raw reply [flat|nested] 8+ messages in thread
* [gcc(refs/users/aoliva/heads/testme)] improvements for fold_truth_andor_1
@ 2020-09-23 23:23 Alexandre Oliva
0 siblings, 0 replies; 8+ messages in thread
From: Alexandre Oliva @ 2020-09-23 23:23 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:de4bb02889b888a7a99758ef63f08fbb9d2d57de
commit de4bb02889b888a7a99758ef63f08fbb9d2d57de
Author: Alexandre Oliva <oliva@adacore.com>
Date: Tue Sep 15 13:32:07 2020 -0300
improvements for fold_truth_andor_1
Enable combining with the rightmost non-and/or, however deeply-nested.
Handle shift-and-mask.
Handle fields that cross alignment boundaries, when either part can be
combined.
Diff:
---
gcc/config/rs6000/t-rs6000 | 4 +
gcc/fold-const.c | 341 ++++++++++++++++++++++++++++++++---
gcc/testsuite/gcc.dg/field-merge-1.c | 64 +++++++
gcc/testsuite/gcc.dg/field-merge-2.c | 31 ++++
4 files changed, 413 insertions(+), 27 deletions(-)
diff --git a/gcc/config/rs6000/t-rs6000 b/gcc/config/rs6000/t-rs6000
index 1ddb5729cb2..516486df9a4 100644
--- a/gcc/config/rs6000/t-rs6000
+++ b/gcc/config/rs6000/t-rs6000
@@ -52,6 +52,10 @@ $(srcdir)/config/rs6000/rs6000-tables.opt: $(srcdir)/config/rs6000/genopt.sh \
$(SHELL) $(srcdir)/config/rs6000/genopt.sh $(srcdir)/config/rs6000 > \
$(srcdir)/config/rs6000/rs6000-tables.opt
+# FRAME_GROWS_DOWNWARD tests flag_sanitize in a way that rules out a
+# test in toplev.c.
+toplev.o-warn = -Wno-error
+
# The rs6000 backend doesn't cause warnings in these files.
insn-conditions.o-warn =
diff --git a/gcc/fold-const.c b/gcc/fold-const.c
index 0cc80adf632..081c41f7fbe 100644
--- a/gcc/fold-const.c
+++ b/gcc/fold-const.c
@@ -4640,6 +4640,7 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
tree mask, inner, offset;
tree unsigned_type;
unsigned int precision;
+ HOST_WIDE_INT shiftrt = 0;
/* All the optimizations using this function assume integer fields.
There are problems with FP fields since the type_for_size call
@@ -4664,13 +4665,28 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
return NULL_TREE;
}
+ if (TREE_CODE (exp) == RSHIFT_EXPR
+ && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST
+ && tree_fits_shwi_p (TREE_OPERAND (exp, 1)))
+ {
+ shiftrt = tree_to_shwi (TREE_OPERAND (exp, 1));
+ if (shiftrt > 0)
+ exp = TREE_OPERAND (exp, 0);
+ else
+ shiftrt = 0;
+ }
+
+ if (TREE_CODE (exp) == NOP_EXPR)
+ exp = TREE_OPERAND (exp, 0);
+
poly_int64 poly_bitsize, poly_bitpos;
inner = get_inner_reference (exp, &poly_bitsize, &poly_bitpos, &offset,
pmode, punsignedp, preversep, pvolatilep);
+
if ((inner == exp && and_mask == 0)
|| !poly_bitsize.is_constant (pbitsize)
|| !poly_bitpos.is_constant (pbitpos)
- || *pbitsize < 0
+ || *pbitsize < shiftrt
|| offset != 0
|| TREE_CODE (inner) == PLACEHOLDER_EXPR
/* Reject out-of-bound accesses (PR79731). */
@@ -4679,6 +4695,12 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
*pbitpos + *pbitsize) < 0))
return NULL_TREE;
+ if (shiftrt)
+ {
+ *pbitpos += shiftrt;
+ *pbitsize -= shiftrt;
+ }
+
unsigned_type = lang_hooks.types.type_for_size (*pbitsize, 1);
if (unsigned_type == NULL_TREE)
return NULL_TREE;
@@ -6120,6 +6142,50 @@ merge_truthop_with_opposite_arm (location_t loc, tree op, tree cmpop,
return NULL_TREE;
}
+/* Return the one bitpos within bit extents L or R that is at an
+ ALIGN-bit alignment boundary, or -1 if there is more than one such
+ boundary, if there isn't any, or if there is any such boundary
+ between the extents. L and R are given by bitpos and bitsize. If
+ it doesn't return -1, there are two consecutive ALIGN-bit words
+ that contain both extents, and at least one of the extents
+ straddles across the returned alignment boundary. */
+static inline HOST_WIDE_INT
+compute_split_boundary_from_align (HOST_WIDE_INT align,
+ HOST_WIDE_INT l_bitpos,
+ HOST_WIDE_INT l_bitsize,
+ HOST_WIDE_INT r_bitpos,
+ HOST_WIDE_INT r_bitsize)
+{
+ HOST_WIDE_INT amask = ~(align - 1);
+
+ HOST_WIDE_INT first_bit = MIN (l_bitpos, r_bitpos);
+ HOST_WIDE_INT end_bit = MAX (l_bitpos + l_bitsize, r_bitpos + r_bitsize);
+
+ HOST_WIDE_INT boundary = (end_bit - 1) & amask;
+
+ /* Make sure we're crossing no more than one alignment boundary.
+
+ ??? We don't have logic to recombine loads of two adjacent
+ fields that each crosses a different alignment boundary, so
+ as to load the middle word only once, if other words can't be
+ otherwise recombined. */
+ if (boundary - first_bit > align)
+ return -1;
+
+ HOST_WIDE_INT l_start_word = l_bitpos & amask;
+ HOST_WIDE_INT l_end_word = (l_bitpos + l_bitsize - 1) & amask;
+
+ HOST_WIDE_INT r_start_word = r_bitpos & amask;
+ HOST_WIDE_INT r_end_word = (r_bitpos + r_bitsize - 1) & amask;
+
+ /* If neither field straddles across an alignment boundary, it's no
+ use to even try to merge them. */
+ if (l_start_word == l_end_word && r_start_word == r_end_word)
+ return -1;
+
+ return boundary;
+}
+
/* Find ways of folding logical expressions of LHS and RHS:
Try to merge two comparisons to the same innermost item.
Look for range tests like "ch >= '0' && ch <= '9'".
@@ -6142,11 +6208,19 @@ merge_truthop_with_opposite_arm (location_t loc, tree op, tree cmpop,
TRUTH_TYPE is the type of the logical operand and LHS and RHS are its
two operands.
+ SEPARATEP should be NULL if LHS and RHS are adjacent in
+ CODE-chained compares, and a non-NULL pointer to NULL_TREE
+ otherwise. If the "words" accessed by RHS are already accessed by
+ LHS, this won't matter, but if RHS accesses "words" that LHS
+ doesn't, then *SEPARATEP will be set to the compares that should
+ take RHS's place. By "words" we mean contiguous bits that do not
+ cross a an TYPE_ALIGN boundary of the accessed object's type.
+
We return the simplified tree or 0 if no optimization is possible. */
static tree
fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
- tree lhs, tree rhs)
+ tree lhs, tree rhs, tree *separatep)
{
/* If this is the "or" of two comparisons, we can do something if
the comparisons are NE_EXPR. If this is the "and", we can do something
@@ -6157,6 +6231,7 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
convert EQ_EXPR to NE_EXPR so we need not reject the "wrong"
comparison for one-bit fields. */
+ enum tree_code orig_code = code;
enum tree_code wanted_code;
enum tree_code lcode, rcode;
tree ll_arg, lr_arg, rl_arg, rr_arg;
@@ -6168,13 +6243,16 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
int ll_unsignedp, lr_unsignedp, rl_unsignedp, rr_unsignedp;
int ll_reversep, lr_reversep, rl_reversep, rr_reversep;
machine_mode ll_mode, lr_mode, rl_mode, rr_mode;
- scalar_int_mode lnmode, rnmode;
+ scalar_int_mode lnmode, lnmode2, rnmode;
tree ll_mask, lr_mask, rl_mask, rr_mask;
tree ll_and_mask, lr_and_mask, rl_and_mask, rr_and_mask;
tree l_const, r_const;
tree lntype, rntype, result;
HOST_WIDE_INT first_bit, end_bit;
int volatilep;
+ bool l_split_load;
+
+ gcc_checking_assert (!separatep || !*separatep);
/* Start by getting the comparison codes. Fail if anything is volatile.
If one operand is a BIT_AND_EXPR with the constant one, treat it as if
@@ -6202,7 +6280,116 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
if (TREE_CODE_CLASS (lcode) != tcc_comparison
|| TREE_CODE_CLASS (rcode) != tcc_comparison)
- return 0;
+ {
+ tree separate = NULL;
+
+ /* Check for the possibility of merging component references.
+ If any of our operands is another similar operation, recurse
+ to try to merge individual operands, but avoiding double
+ recursion: recurse to each leaf of LHS, and from there to
+ each leaf of RHS, but don't bother recursing into LHS if RHS
+ is neither a comparison nor a compound expr, nor into RHS if
+ the LHS leaf isn't a comparison. In case of no successful
+ merging, recursion depth is limited to the sum of the depths
+ of LHS and RHS, and the non-recursing code below will run no
+ more times than the product of the leaf counts of LHS and
+ RHS. If there is a successful merge, we (recursively)
+ further attempt to fold the result, so recursion depth and
+ merge attempts are harder to compute. */
+ if (TREE_CODE (lhs) == code && TREE_TYPE (lhs) == truth_type
+ && (TREE_CODE_CLASS (rcode) == tcc_comparison
+ || (TREE_CODE (rhs) == code && TREE_TYPE (rhs) == truth_type)))
+ {
+ if ((result = fold_truth_andor_1 (loc, code, truth_type,
+ TREE_OPERAND (lhs, 1), rhs,
+ separatep
+ ? separatep
+ : NULL)) != 0)
+ {
+ /* We have combined the latter part of LHS with RHS. If
+ they were separate, the recursion already placed any
+ remains of RHS in *SEPARATEP, otherwise they are all
+ in RESULT, so we just have to prepend to result the
+ former part of LHS. */
+ result = fold_build2_loc (loc, code, truth_type,
+ TREE_OPERAND (lhs, 0), result);
+ return result;
+ }
+ if ((result = fold_truth_andor_1 (loc, code, truth_type,
+ TREE_OPERAND (lhs, 0), rhs,
+ separatep
+ ? separatep
+ : &separate)) != 0)
+ {
+ /* We have combined the former part of LHS with RHS. If
+ they were separate, the recursive call will have
+ placed remnants of RHS in *SEPARATEP. If they
+ wereń't, they will be in SEPARATE instead. Append
+ the latter part of LHS to the result, and then any
+ remnants of RHS that we haven't passed on to the
+ caller. */
+ result = fold_build2_loc (loc, code, truth_type,
+ result, TREE_OPERAND (lhs, 1));
+ if (separate)
+ result = fold_build2_loc (loc, code, truth_type,
+ result, separate);
+ return result;
+ }
+ }
+ else if (TREE_CODE_CLASS (lcode) == tcc_comparison
+ && TREE_CODE (rhs) == code && TREE_TYPE (rhs) == truth_type)
+ {
+ if ((result = fold_truth_andor_1 (loc, code, truth_type,
+ lhs, TREE_OPERAND (rhs, 0),
+ separatep
+ ? &separate
+ : NULL)) != 0)
+ {
+ /* We have combined LHS with the former part of RHS. If
+ they were separate, have any remnants of RHS placed
+ in separate, so that we can combine them with the
+ latter part of RHS, and then send them back for the
+ caller to handle. If they were adjacent, we can just
+ append the latter part of RHS to the RESULT. */
+ if (!separate)
+ separate = TREE_OPERAND (rhs, 1);
+ else
+ separate = fold_build2_loc (loc, code, truth_type,
+ separate, TREE_OPERAND (rhs, 1));
+ if (separatep)
+ *separatep = separate;
+ else
+ result = fold_build2_loc (loc, code, truth_type,
+ result, separate);
+ return result;
+ }
+ if ((result = fold_truth_andor_1 (loc, code, truth_type,
+ lhs, TREE_OPERAND (rhs, 1),
+ &separate)) != 0)
+ {
+ /* We have combined LHS with the latter part of RHS.
+ They're definitely not adjacent, so we get the
+ remains of RHS in SEPARATE, and then prepend the
+ former part of RHS to it. If LHS and RHS were
+ already separate to begin with, we leave the remnants
+ of RHS for the caller to deal with, otherwise we
+ append them to the RESULT. */
+ if (!separate)
+ separate = TREE_OPERAND (rhs, 0);
+ else
+ separate = fold_build2_loc (loc, code, truth_type,
+ TREE_OPERAND (rhs, 0), separate);
+ if (separatep)
+ *separatep = separate;
+ else
+ result = fold_build2_loc (loc, code, truth_type,
+ result, separate);
+ return result;
+ }
+ }
+
+ return 0;
+ }
ll_arg = TREE_OPERAND (lhs, 0);
lr_arg = TREE_OPERAND (lhs, 1);
@@ -6357,10 +6544,30 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
if (!get_best_mode (end_bit - first_bit, first_bit, 0, 0,
TYPE_ALIGN (TREE_TYPE (ll_inner)), BITS_PER_WORD,
volatilep, &lnmode))
- return 0;
+ {
+ /* Consider the possibility of recombining loads if any of the
+ fields straddles across an alignment boundary, so that either
+ part can be loaded along with the other field. */
+ HOST_WIDE_INT align = TYPE_ALIGN (TREE_TYPE (ll_inner));
+ HOST_WIDE_INT boundary = compute_split_boundary_from_align
+ (align, ll_bitpos, ll_bitsize, rl_bitpos, rl_bitsize);
+
+ if (boundary < 0
+ || !get_best_mode (boundary - first_bit, first_bit, 0, 0,
+ align, BITS_PER_WORD, volatilep, &lnmode)
+ || !get_best_mode (end_bit - boundary, boundary, 0, 0,
+ align, BITS_PER_WORD, volatilep, &lnmode2))
+ return 0;
+
+ l_split_load = true;
+ }
+ else
+ l_split_load = false;
lnbitsize = GET_MODE_BITSIZE (lnmode);
lnbitpos = first_bit & ~ (lnbitsize - 1);
+ if (l_split_load)
+ lnbitsize += GET_MODE_BITSIZE (lnmode2);
lntype = lang_hooks.types.type_for_size (lnbitsize, 1);
xll_bitpos = ll_bitpos - lnbitpos, xrl_bitpos = rl_bitpos - lnbitpos;
@@ -6414,7 +6621,8 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
|| ll_reversep != lr_reversep
/* Make sure the two fields on the right
correspond to the left without being swapped. */
- || ll_bitpos - rl_bitpos != lr_bitpos - rr_bitpos)
+ || ll_bitpos - rl_bitpos != lr_bitpos - rr_bitpos
+ || l_split_load)
return 0;
first_bit = MIN (lr_bitpos, rr_bitpos);
@@ -6555,20 +6763,107 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
if (lnbitpos < 0)
return 0;
- /* Construct the expression we will return. First get the component
- reference we will make. Unless the mask is all ones the width of
- that field, perform the mask operation. Then compare with the
- merged constant. */
- result = make_bit_field_ref (loc, ll_inner, ll_arg,
- lntype, lnbitsize, lnbitpos,
- ll_unsignedp || rl_unsignedp, ll_reversep);
+ if (l_split_load)
+ {
+ HOST_WIDE_INT align = TYPE_ALIGN (TREE_TYPE (ll_inner));
+ HOST_WIDE_INT lnlbitsize = GET_MODE_BITSIZE (lnmode);
+ HOST_WIDE_INT lnrbitsize = GET_MODE_BITSIZE (lnmode2);
+ HOST_WIDE_INT midpoint = lnlbitsize;
+ HOST_WIDE_INT lnrbitpos = lnbitpos + midpoint;
+ HOST_WIDE_INT lnlbitpos = lnrbitpos - lnlbitsize;
+ gcc_checking_assert ((lnrbitpos & (align - 1)) == 0);
+ tree lnltype = lang_hooks.types.type_for_size (lnlbitsize, 1);
+ tree lnrtype = lang_hooks.types.type_for_size (lnrbitsize, 1);
+ tree lll_arg = make_bit_field_ref (loc, ll_inner, ll_arg,
+ lnltype, lnlbitsize, lnlbitpos,
+ ll_unsignedp || rl_unsignedp,
+ ll_reversep);
+ tree llr_arg = make_bit_field_ref (loc, ll_inner, ll_arg,
+ lnrtype, lnrbitsize, lnrbitpos,
+ ll_unsignedp || rl_unsignedp,
+ ll_reversep);
+
+ HOST_WIDE_INT lnlcbitpos;
+ HOST_WIDE_INT lnrcbitpos;
+ /* The bit field ref constant folding below already takes care
+ of default target endianness. */
+ if (!ll_reversep)
+ {
+ lnlcbitpos = 0;
+ lnrcbitpos = lnlbitsize;
+ }
+ else
+ {
+ lnlcbitpos = lnrbitsize;
+ lnrcbitpos = 0;
+ }
+
+ tree mask = const_binop (BIT_IOR_EXPR, ll_mask, rl_mask);
+ tree lll_mask = fold_build3 (BIT_FIELD_REF, lnltype, mask,
+ bitsize_int (lnlbitsize),
+ bitsize_int (lnlcbitpos));
+ if (! integer_all_onesp (lll_mask))
+ lll_arg = fold_build2_loc (loc, BIT_AND_EXPR, lnltype,
+ lll_arg, lll_mask);
+ tree llr_mask = fold_build3 (BIT_FIELD_REF, lnrtype, mask,
+ bitsize_int (lnrbitsize),
+ bitsize_int (lnrcbitpos));
+ if (! integer_all_onesp (llr_mask))
+ llr_arg = fold_build2_loc (loc, BIT_AND_EXPR, lnrtype,
+ llr_arg, llr_mask);
+ tree lnr_const = const_binop (BIT_IOR_EXPR, l_const, r_const);
+ tree lll_const = fold_build3 (BIT_FIELD_REF, lnltype, lnr_const,
+ bitsize_int (lnlbitsize),
+ bitsize_int (lnlcbitpos));
+ tree lll_result = fold_build2_loc (loc, wanted_code, truth_type,
+ lll_arg, lll_const);
+ tree llr_const = fold_build3 (BIT_FIELD_REF, lnrtype, lnr_const,
+ bitsize_int (lnrbitsize),
+ bitsize_int (lnrcbitpos));
+ tree llr_result = fold_build2_loc (loc, wanted_code, truth_type,
+ llr_arg, llr_const);
+
+ /* If the accesses were adjacent in the compare chain, or if LHS
+ already accessed both "words", then we can have both words
+ accessed in the RESULT, leaving no remains of *RHS for
+ *SEPARATEP. Otherwise, figure out which part of the object
+ was already accessed by LHS, and put it in the RESULT,
+ returning the bits newly-accessed by RHS in *SEPARATEP. */
+ if (!separatep
+ || (ll_bitpos < lnrbitpos && ll_bitpos + ll_bitsize > lnrbitpos))
+ result = build2_loc (loc, orig_code, truth_type,
+ lll_result, llr_result);
+ else if (ll_bitpos >= lnrbitpos)
+ {
+ result = llr_result;
+ *separatep = lll_result;
+ }
+ else
+ {
+ result = lll_result;
+ *separatep = llr_result;
+ }
+ }
+ else
+ {
+
+ /* Construct the expression we will return. First get the component
+ reference we will make. Unless the mask is all ones the width of
+ that field, perform the mask operation. Then compare with the
+ merged constant. */
+ result = make_bit_field_ref (loc, ll_inner, ll_arg,
+ lntype, lnbitsize, lnbitpos,
+ ll_unsignedp || rl_unsignedp, ll_reversep);
+
+ ll_mask = const_binop (BIT_IOR_EXPR, ll_mask, rl_mask);
+ if (! integer_all_onesp (ll_mask))
+ result = build2_loc (loc, BIT_AND_EXPR, lntype, result, ll_mask);
- ll_mask = const_binop (BIT_IOR_EXPR, ll_mask, rl_mask);
- if (! all_ones_mask_p (ll_mask, lnbitsize))
- result = build2_loc (loc, BIT_AND_EXPR, lntype, result, ll_mask);
+ result = build2_loc (loc, wanted_code, truth_type, result,
+ const_binop (BIT_IOR_EXPR, l_const, r_const));
+ }
- return build2_loc (loc, wanted_code, truth_type, result,
- const_binop (BIT_IOR_EXPR, l_const, r_const));
+ return result;
}
\f
/* T is an integer expression that is being multiplied, divided, or taken a
@@ -9166,15 +9461,7 @@ fold_truth_andor (location_t loc, enum tree_code code, tree type,
return fold_build2_loc (loc, code, type, arg0, tem);
}
- /* Check for the possibility of merging component references. If our
- lhs is another similar operation, try to merge its rhs with our
- rhs. Then try to merge our lhs and rhs. */
- if (TREE_CODE (arg0) == code
- && (tem = fold_truth_andor_1 (loc, code, type,
- TREE_OPERAND (arg0, 1), arg1)) != 0)
- return fold_build2_loc (loc, code, type, TREE_OPERAND (arg0, 0), tem);
-
- if ((tem = fold_truth_andor_1 (loc, code, type, arg0, arg1)) != 0)
+ if ((tem = fold_truth_andor_1 (loc, code, type, arg0, arg1, NULL)) != 0)
return tem;
bool logical_op_non_short_circuit = LOGICAL_OP_NON_SHORT_CIRCUIT;
diff --git a/gcc/testsuite/gcc.dg/field-merge-1.c b/gcc/testsuite/gcc.dg/field-merge-1.c
new file mode 100644
index 00000000000..1818e104437
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/field-merge-1.c
@@ -0,0 +1,64 @@
+/* { dg-do run } */
+/* { dg-options "-O -save-temps -fdump-tree-optimized" } */
+
+/* Check that field loads compared with constants are merged, even if
+ tested out of order, and when fields straddle across alignment
+ boundaries. */
+
+struct TL {
+ unsigned char p;
+ unsigned int a;
+ unsigned char q;
+ unsigned int b;
+ unsigned char r;
+ unsigned int c;
+ unsigned char s;
+} __attribute__ ((packed, aligned (4), scalar_storage_order ("little-endian")));
+
+struct TB {
+ unsigned char p;
+ unsigned int a;
+ unsigned char q;
+ unsigned int b;
+ unsigned char r;
+ unsigned int c;
+ unsigned char s;
+} __attribute__ ((packed, aligned (4), scalar_storage_order ("big-endian")));
+
+#define vc 0xaa
+#define vi 0x12345678
+
+struct TL vL = { vc, vi, vc, vi, vc, vi, vc };
+struct TB vB = { vc, vi, vc, vi, vc, vi, vc };
+
+void f (void) {
+ /* Which words of | vL | vB | */
+ /* are accessed by |0123|0123| */
+ if (0 /* the tests? | | | */
+ || vL.p != vc /* |* | | */
+ || vB.p != vc /* | |* | */
+ || vL.s != vc /* | *| | */
+ || vB.q != vc /* | | * | */
+ || vL.a != vi /* |^* | | */
+ || vB.b != vi /* | | ^* | */
+ || vL.c != vi /* | *^| | */
+ || vB.c != vi /* | | ^*| */
+ || vL.b != vi /* | ^^ | | */
+ || vL.q != vc /* | ^ | | */
+ || vB.a != vi /* | |^^ | */
+ || vB.r != vc /* | | ^ | */
+ || vB.s != vc /* | | ^| */
+ || vL.r != vc /* | ^ | | */
+ )
+ __builtin_abort ();
+}
+
+int main () {
+ f ();
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "BIT_FIELD_REF" 8 "optimized" } } */
+/* { dg-final { scan-assembler-not "cmpb" { target { i*86-*-* || x86_64-*-* } } } } */
+/* { dg-final { scan-assembler-times "cmpl" 8 { target { i*86-*-* || x86_64-*-* } } } } */
+/* { dg-final { scan-assembler-times "cmpw" 8 { target { powerpc*-*-* || rs6000-*-* } } } } */
diff --git a/gcc/testsuite/gcc.dg/field-merge-2.c b/gcc/testsuite/gcc.dg/field-merge-2.c
new file mode 100644
index 00000000000..80c573b31dd
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/field-merge-2.c
@@ -0,0 +1,31 @@
+/* { dg-do run } */
+/* { dg-options "-O" } */
+
+struct TL {
+ unsigned short a;
+ unsigned short b;
+} __attribute__ ((packed, aligned (8)));
+
+struct TB {
+ unsigned char p;
+ unsigned short a;
+ unsigned short b;
+} __attribute__ ((packed, aligned (8)));
+
+#define vc 0xaa
+
+struct TL vL = { vc, vc };
+struct TB vB = { vc, vc, vc };
+
+void f (void) {
+ if (0
+ || vL.b != vB.b
+ || vL.a != vB.a
+ )
+ __builtin_abort ();
+}
+
+int main () {
+ f ();
+ return 0;
+}
^ permalink raw reply [flat|nested] 8+ messages in thread
* [gcc(refs/users/aoliva/heads/testme)] improvements for fold_truth_andor_1
@ 2020-09-17 11:31 Alexandre Oliva
0 siblings, 0 replies; 8+ messages in thread
From: Alexandre Oliva @ 2020-09-17 11:31 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:0dc8eabc87885d6eb5b395ac2c43aafec59fdcc9
commit 0dc8eabc87885d6eb5b395ac2c43aafec59fdcc9
Author: Alexandre Oliva <oliva@adacore.com>
Date: Tue Sep 15 13:32:07 2020 -0300
improvements for fold_truth_andor_1
Enable combining with the rightmost non-and/or, however deeply-nested.
Handle shift-and-mask.
Handle fields that cross alignment boundaries, when either part can be
combined.
Diff:
---
gcc/config/rs6000/t-rs6000 | 4 +
gcc/fold-const.c | 315 ++++++++++++++++++++++++++++++++---
gcc/testsuite/gcc.dg/field-merge-1.c | 64 +++++++
gcc/testsuite/gcc.dg/field-merge-2.c | 31 ++++
4 files changed, 387 insertions(+), 27 deletions(-)
diff --git a/gcc/config/rs6000/t-rs6000 b/gcc/config/rs6000/t-rs6000
index 1ddb5729cb2..516486df9a4 100644
--- a/gcc/config/rs6000/t-rs6000
+++ b/gcc/config/rs6000/t-rs6000
@@ -52,6 +52,10 @@ $(srcdir)/config/rs6000/rs6000-tables.opt: $(srcdir)/config/rs6000/genopt.sh \
$(SHELL) $(srcdir)/config/rs6000/genopt.sh $(srcdir)/config/rs6000 > \
$(srcdir)/config/rs6000/rs6000-tables.opt
+# FRAME_GROWS_DOWNWARD tests flag_sanitize in a way that rules out a
+# test in toplev.c.
+toplev.o-warn = -Wno-error
+
# The rs6000 backend doesn't cause warnings in these files.
insn-conditions.o-warn =
diff --git a/gcc/fold-const.c b/gcc/fold-const.c
index 0cc80adf632..c835327dac9 100644
--- a/gcc/fold-const.c
+++ b/gcc/fold-const.c
@@ -4640,6 +4640,7 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
tree mask, inner, offset;
tree unsigned_type;
unsigned int precision;
+ HOST_WIDE_INT shiftrt = 0;
/* All the optimizations using this function assume integer fields.
There are problems with FP fields since the type_for_size call
@@ -4664,13 +4665,28 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
return NULL_TREE;
}
+ if (TREE_CODE (exp) == RSHIFT_EXPR
+ && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST
+ && tree_fits_shwi_p (TREE_OPERAND (exp, 1)))
+ {
+ shiftrt = tree_to_shwi (TREE_OPERAND (exp, 1));
+ if (shiftrt > 0)
+ exp = TREE_OPERAND (exp, 0);
+ else
+ shiftrt = 0;
+ }
+
+ if (TREE_CODE (exp) == NOP_EXPR)
+ exp = TREE_OPERAND (exp, 0);
+
poly_int64 poly_bitsize, poly_bitpos;
inner = get_inner_reference (exp, &poly_bitsize, &poly_bitpos, &offset,
pmode, punsignedp, preversep, pvolatilep);
+
if ((inner == exp && and_mask == 0)
|| !poly_bitsize.is_constant (pbitsize)
|| !poly_bitpos.is_constant (pbitpos)
- || *pbitsize < 0
+ || *pbitsize < shiftrt
|| offset != 0
|| TREE_CODE (inner) == PLACEHOLDER_EXPR
/* Reject out-of-bound accesses (PR79731). */
@@ -4679,6 +4695,12 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
*pbitpos + *pbitsize) < 0))
return NULL_TREE;
+ if (shiftrt)
+ {
+ *pbitpos += shiftrt;
+ *pbitsize -= shiftrt;
+ }
+
unsigned_type = lang_hooks.types.type_for_size (*pbitsize, 1);
if (unsigned_type == NULL_TREE)
return NULL_TREE;
@@ -6142,11 +6164,19 @@ merge_truthop_with_opposite_arm (location_t loc, tree op, tree cmpop,
TRUTH_TYPE is the type of the logical operand and LHS and RHS are its
two operands.
+ SEPARATEP should be NULL if LHS and RHS are adjacent in
+ CODE-chained compares, and a non-NULL pointer to NULL_TREE
+ otherwise. If the "words" accessed by RHS are already accessed by
+ LHS, this won't matter, but if RHS accesses "words" that LHS
+ doesn't, then *SEPARATEP will be set to the compares that should
+ take RHS's place. By "words" we mean contiguous bits that do not
+ cross a an TYPE_ALIGN boundary of the accessed object's type.
+
We return the simplified tree or 0 if no optimization is possible. */
static tree
fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
- tree lhs, tree rhs)
+ tree lhs, tree rhs, tree *separatep)
{
/* If this is the "or" of two comparisons, we can do something if
the comparisons are NE_EXPR. If this is the "and", we can do something
@@ -6157,6 +6187,7 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
convert EQ_EXPR to NE_EXPR so we need not reject the "wrong"
comparison for one-bit fields. */
+ enum tree_code orig_code = code;
enum tree_code wanted_code;
enum tree_code lcode, rcode;
tree ll_arg, lr_arg, rl_arg, rr_arg;
@@ -6168,13 +6199,16 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
int ll_unsignedp, lr_unsignedp, rl_unsignedp, rr_unsignedp;
int ll_reversep, lr_reversep, rl_reversep, rr_reversep;
machine_mode ll_mode, lr_mode, rl_mode, rr_mode;
- scalar_int_mode lnmode, rnmode;
+ scalar_int_mode lnmode, lnmode2, rnmode;
tree ll_mask, lr_mask, rl_mask, rr_mask;
tree ll_and_mask, lr_and_mask, rl_and_mask, rr_and_mask;
tree l_const, r_const;
tree lntype, rntype, result;
HOST_WIDE_INT first_bit, end_bit;
int volatilep;
+ bool resplit_load;
+
+ gcc_checking_assert (!separatep || !*separatep);
/* Start by getting the comparison codes. Fail if anything is volatile.
If one operand is a BIT_AND_EXPR with the constant one, treat it as if
@@ -6202,7 +6236,116 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
if (TREE_CODE_CLASS (lcode) != tcc_comparison
|| TREE_CODE_CLASS (rcode) != tcc_comparison)
- return 0;
+ {
+ tree separate = NULL;
+
+ /* Check for the possibility of merging component references.
+ If any of our operands is another similar operation, recurse
+ to try to merge individual operands, but avoiding double
+ recursion: recurse to each leaf of LHS, and from there to
+ each leaf of RHS, but don't bother recursing into LHS if RHS
+ is neither a comparison nor a compound expr, nor into RHS if
+ the LHS leaf isn't a comparison. In case of no successful
+ merging, recursion depth is limited to the sum of the depths
+ of LHS and RHS, and the non-recursing code below will run no
+ more times than the product of the leaf counts of LHS and
+ RHS. If there is a successful merge, we (recursively)
+ further attempt to fold the result, so recursion depth and
+ merge attempts are harder to compute. */
+ if (TREE_CODE (lhs) == code && TREE_TYPE (lhs) == truth_type
+ && (TREE_CODE_CLASS (rcode) == tcc_comparison
+ || (TREE_CODE (rhs) == code && TREE_TYPE (rhs) == truth_type)))
+ {
+ if ((result = fold_truth_andor_1 (loc, code, truth_type,
+ TREE_OPERAND (lhs, 1), rhs,
+ separatep
+ ? separatep
+ : NULL)) != 0)
+ {
+ /* We have combined the latter part of LHS with RHS. If
+ they were separate, the recursion already placed any
+ remains of RHS in *SEPARATEP, otherwise they are all
+ in RESULT, so we just have to prepend to result the
+ former part of LHS. */
+ result = fold_build2_loc (loc, code, truth_type,
+ TREE_OPERAND (lhs, 0), result);
+ return result;
+ }
+ if ((result = fold_truth_andor_1 (loc, code, truth_type,
+ TREE_OPERAND (lhs, 0), rhs,
+ separatep
+ ? separatep
+ : &separate)) != 0)
+ {
+ /* We have combined the former part of LHS with RHS. If
+ they were separate, the recursive call will have
+ placed remnants of RHS in *SEPARATEP. If they
+ wereń't, they will be in SEPARATE instead. Append
+ the latter part of LHS to the result, and then any
+ remnants of RHS that we haven't passed on to the
+ caller. */
+ result = fold_build2_loc (loc, code, truth_type,
+ result, TREE_OPERAND (lhs, 1));
+ if (separate)
+ result = fold_build2_loc (loc, code, truth_type,
+ result, separate);
+ return result;
+ }
+ }
+ else if (TREE_CODE_CLASS (lcode) == tcc_comparison
+ && TREE_CODE (rhs) == code && TREE_TYPE (rhs) == truth_type)
+ {
+ if ((result = fold_truth_andor_1 (loc, code, truth_type,
+ lhs, TREE_OPERAND (rhs, 0),
+ separatep
+ ? &separate
+ : NULL)) != 0)
+ {
+ /* We have combined LHS with the former part of RHS. If
+ they were separate, have any remnants of RHS placed
+ in separate, so that we can combine them with the
+ latter part of RHS, and then send them back for the
+ caller to handle. If they were adjacent, we can just
+ append the latter part of RHS to the RESULT. */
+ if (!separate)
+ separate = TREE_OPERAND (rhs, 1);
+ else
+ separate = fold_build2_loc (loc, code, truth_type,
+ separate, TREE_OPERAND (rhs, 1));
+ if (separatep)
+ *separatep = separate;
+ else
+ result = fold_build2_loc (loc, code, truth_type,
+ result, separate);
+ return result;
+ }
+ if ((result = fold_truth_andor_1 (loc, code, truth_type,
+ lhs, TREE_OPERAND (rhs, 1),
+ &separate)) != 0)
+ {
+ /* We have combined LHS with the latter part of RHS.
+ They're definitely not adjacent, so we get the
+ remains of RHS in SEPARATE, and then prepend the
+ former part of RHS to it. If LHS and RHS were
+ already separate to begin with, we leave the remnants
+ of RHS for the caller to deal with, otherwise we
+ append them to the RESULT. */
+ if (!separate)
+ separate = TREE_OPERAND (rhs, 0);
+ else
+ separate = fold_build2_loc (loc, code, truth_type,
+ TREE_OPERAND (rhs, 0), separate);
+ if (separatep)
+ *separatep = separate;
+ else
+ result = fold_build2_loc (loc, code, truth_type,
+ result, separate);
+ return result;
+ }
+ }
+
+ return 0;
+ }
ll_arg = TREE_OPERAND (lhs, 0);
lr_arg = TREE_OPERAND (lhs, 1);
@@ -6357,10 +6500,48 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
if (!get_best_mode (end_bit - first_bit, first_bit, 0, 0,
TYPE_ALIGN (TREE_TYPE (ll_inner)), BITS_PER_WORD,
volatilep, &lnmode))
- return 0;
+ {
+ /* Consider the possibility of recombining loads if any of the
+ fields straddles across an alignment boundary, so that either
+ part can be loaded along with the other field. */
+ HOST_WIDE_INT align = TYPE_ALIGN (TREE_TYPE (ll_inner));
+ HOST_WIDE_INT amask = ~(align - 1);
+
+ HOST_WIDE_INT boundary = (end_bit - 1) & amask;
+ /* Make sure we're only crossing one alignment boundary.
+
+ ??? This won't recombine loads of two adjacent fields that
+ each crosses a different alignment boundary, so as to load
+ the middle word only once. */
+ if (boundary - first_bit > align)
+ return 0;
+
+ HOST_WIDE_INT ll_start_word = ll_bitpos & amask;
+ HOST_WIDE_INT ll_end_word = (ll_bitpos + ll_bitsize - 1) & amask;
+
+ HOST_WIDE_INT rl_start_word = rl_bitpos & amask;
+ HOST_WIDE_INT rl_end_word = (rl_bitpos + rl_bitsize - 1) & amask;
+
+ /* If neither field straddles across an alignment boundary,
+ we've nothing further ado. */
+ if (ll_start_word == ll_end_word && rl_start_word == rl_end_word)
+ return 0;
+
+ if (!get_best_mode (boundary - first_bit, first_bit, 0, 0,
+ align, BITS_PER_WORD, volatilep, &lnmode)
+ || !get_best_mode (end_bit - boundary, boundary, 0, 0,
+ align, BITS_PER_WORD, volatilep, &lnmode2))
+ return 0;
+
+ resplit_load = true;
+ }
+ else
+ resplit_load = false;
lnbitsize = GET_MODE_BITSIZE (lnmode);
lnbitpos = first_bit & ~ (lnbitsize - 1);
+ if (resplit_load)
+ lnbitsize += GET_MODE_BITSIZE (lnmode2);
lntype = lang_hooks.types.type_for_size (lnbitsize, 1);
xll_bitpos = ll_bitpos - lnbitpos, xrl_bitpos = rl_bitpos - lnbitpos;
@@ -6414,7 +6595,8 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
|| ll_reversep != lr_reversep
/* Make sure the two fields on the right
correspond to the left without being swapped. */
- || ll_bitpos - rl_bitpos != lr_bitpos - rr_bitpos)
+ || ll_bitpos - rl_bitpos != lr_bitpos - rr_bitpos
+ || resplit_load)
return 0;
first_bit = MIN (lr_bitpos, rr_bitpos);
@@ -6555,20 +6737,107 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
if (lnbitpos < 0)
return 0;
- /* Construct the expression we will return. First get the component
- reference we will make. Unless the mask is all ones the width of
- that field, perform the mask operation. Then compare with the
- merged constant. */
- result = make_bit_field_ref (loc, ll_inner, ll_arg,
- lntype, lnbitsize, lnbitpos,
- ll_unsignedp || rl_unsignedp, ll_reversep);
+ if (resplit_load)
+ {
+ HOST_WIDE_INT align = TYPE_ALIGN (TREE_TYPE (ll_inner));
+ HOST_WIDE_INT lnlbitsize = GET_MODE_BITSIZE (lnmode);
+ HOST_WIDE_INT lnrbitsize = GET_MODE_BITSIZE (lnmode2);
+ HOST_WIDE_INT midpoint = lnlbitsize;
+ HOST_WIDE_INT lnrbitpos = lnbitpos + midpoint;
+ HOST_WIDE_INT lnlbitpos = lnrbitpos - lnlbitsize;
+ gcc_checking_assert ((lnrbitpos & (align - 1)) == 0);
+ tree lnltype = lang_hooks.types.type_for_size (lnlbitsize, 1);
+ tree lnrtype = lang_hooks.types.type_for_size (lnrbitsize, 1);
+ tree lll_arg = make_bit_field_ref (loc, ll_inner, ll_arg,
+ lnltype, lnlbitsize, lnlbitpos,
+ ll_unsignedp || rl_unsignedp,
+ ll_reversep);
+ tree llr_arg = make_bit_field_ref (loc, ll_inner, ll_arg,
+ lnrtype, lnrbitsize, lnrbitpos,
+ ll_unsignedp || rl_unsignedp,
+ ll_reversep);
+
+ HOST_WIDE_INT lnlcbitpos;
+ HOST_WIDE_INT lnrcbitpos;
+ /* The bit field ref constant folding below already takes care
+ of default target endianness. */
+ if (!ll_reversep)
+ {
+ lnlcbitpos = 0;
+ lnrcbitpos = lnlbitsize;
+ }
+ else
+ {
+ lnlcbitpos = lnrbitsize;
+ lnrcbitpos = 0;
+ }
+
+ tree mask = const_binop (BIT_IOR_EXPR, ll_mask, rl_mask);
+ tree lll_mask = fold_build3 (BIT_FIELD_REF, lnltype, mask,
+ bitsize_int (lnlbitsize),
+ bitsize_int (lnlcbitpos));
+ if (! integer_all_onesp (lll_mask))
+ lll_arg = fold_build2_loc (loc, BIT_AND_EXPR, lnltype,
+ lll_arg, lll_mask);
+ tree llr_mask = fold_build3 (BIT_FIELD_REF, lnrtype, mask,
+ bitsize_int (lnrbitsize),
+ bitsize_int (lnrcbitpos));
+ if (! integer_all_onesp (llr_mask))
+ llr_arg = fold_build2_loc (loc, BIT_AND_EXPR, lnrtype,
+ llr_arg, llr_mask);
+ tree lnr_const = const_binop (BIT_IOR_EXPR, l_const, r_const);
+ tree lll_const = fold_build3 (BIT_FIELD_REF, lnltype, lnr_const,
+ bitsize_int (lnlbitsize),
+ bitsize_int (lnlcbitpos));
+ tree lll_result = fold_build2_loc (loc, wanted_code, truth_type,
+ lll_arg, lll_const);
+ tree llr_const = fold_build3 (BIT_FIELD_REF, lnrtype, lnr_const,
+ bitsize_int (lnrbitsize),
+ bitsize_int (lnrcbitpos));
+ tree llr_result = fold_build2_loc (loc, wanted_code, truth_type,
+ llr_arg, llr_const);
+
+ /* If the accesses were adjacent in the compare chain, or if LHS
+ already accessed both "words", then we can have both words
+ accessed in the RESULT, leaving no remains of *RHS for
+ *SEPARATEP. Otherwise, figure out which part of the object
+ was already accessed by LHS, and put it in the RESULT,
+ returning the bits newly-accessed by RHS in *SEPARATEP. */
+ if (!separatep
+ || (ll_bitpos < lnrbitpos && ll_bitpos + ll_bitsize > lnrbitpos))
+ result = build2_loc (loc, orig_code, truth_type,
+ lll_result, llr_result);
+ else if (ll_bitpos >= lnrbitpos)
+ {
+ result = llr_result;
+ *separatep = lll_result;
+ }
+ else
+ {
+ result = lll_result;
+ *separatep = llr_result;
+ }
+ }
+ else
+ {
+
+ /* Construct the expression we will return. First get the component
+ reference we will make. Unless the mask is all ones the width of
+ that field, perform the mask operation. Then compare with the
+ merged constant. */
+ result = make_bit_field_ref (loc, ll_inner, ll_arg,
+ lntype, lnbitsize, lnbitpos,
+ ll_unsignedp || rl_unsignedp, ll_reversep);
- ll_mask = const_binop (BIT_IOR_EXPR, ll_mask, rl_mask);
- if (! all_ones_mask_p (ll_mask, lnbitsize))
- result = build2_loc (loc, BIT_AND_EXPR, lntype, result, ll_mask);
+ ll_mask = const_binop (BIT_IOR_EXPR, ll_mask, rl_mask);
+ if (! integer_all_onesp (ll_mask))
+ result = build2_loc (loc, BIT_AND_EXPR, lntype, result, ll_mask);
- return build2_loc (loc, wanted_code, truth_type, result,
- const_binop (BIT_IOR_EXPR, l_const, r_const));
+ result = build2_loc (loc, wanted_code, truth_type, result,
+ const_binop (BIT_IOR_EXPR, l_const, r_const));
+ }
+
+ return result;
}
\f
/* T is an integer expression that is being multiplied, divided, or taken a
@@ -9166,15 +9435,7 @@ fold_truth_andor (location_t loc, enum tree_code code, tree type,
return fold_build2_loc (loc, code, type, arg0, tem);
}
- /* Check for the possibility of merging component references. If our
- lhs is another similar operation, try to merge its rhs with our
- rhs. Then try to merge our lhs and rhs. */
- if (TREE_CODE (arg0) == code
- && (tem = fold_truth_andor_1 (loc, code, type,
- TREE_OPERAND (arg0, 1), arg1)) != 0)
- return fold_build2_loc (loc, code, type, TREE_OPERAND (arg0, 0), tem);
-
- if ((tem = fold_truth_andor_1 (loc, code, type, arg0, arg1)) != 0)
+ if ((tem = fold_truth_andor_1 (loc, code, type, arg0, arg1, NULL)) != 0)
return tem;
bool logical_op_non_short_circuit = LOGICAL_OP_NON_SHORT_CIRCUIT;
diff --git a/gcc/testsuite/gcc.dg/field-merge-1.c b/gcc/testsuite/gcc.dg/field-merge-1.c
new file mode 100644
index 00000000000..1818e104437
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/field-merge-1.c
@@ -0,0 +1,64 @@
+/* { dg-do run } */
+/* { dg-options "-O -save-temps -fdump-tree-optimized" } */
+
+/* Check that field loads compared with constants are merged, even if
+ tested out of order, and when fields straddle across alignment
+ boundaries. */
+
+struct TL {
+ unsigned char p;
+ unsigned int a;
+ unsigned char q;
+ unsigned int b;
+ unsigned char r;
+ unsigned int c;
+ unsigned char s;
+} __attribute__ ((packed, aligned (4), scalar_storage_order ("little-endian")));
+
+struct TB {
+ unsigned char p;
+ unsigned int a;
+ unsigned char q;
+ unsigned int b;
+ unsigned char r;
+ unsigned int c;
+ unsigned char s;
+} __attribute__ ((packed, aligned (4), scalar_storage_order ("big-endian")));
+
+#define vc 0xaa
+#define vi 0x12345678
+
+struct TL vL = { vc, vi, vc, vi, vc, vi, vc };
+struct TB vB = { vc, vi, vc, vi, vc, vi, vc };
+
+void f (void) {
+ /* Which words of | vL | vB | */
+ /* are accessed by |0123|0123| */
+ if (0 /* the tests? | | | */
+ || vL.p != vc /* |* | | */
+ || vB.p != vc /* | |* | */
+ || vL.s != vc /* | *| | */
+ || vB.q != vc /* | | * | */
+ || vL.a != vi /* |^* | | */
+ || vB.b != vi /* | | ^* | */
+ || vL.c != vi /* | *^| | */
+ || vB.c != vi /* | | ^*| */
+ || vL.b != vi /* | ^^ | | */
+ || vL.q != vc /* | ^ | | */
+ || vB.a != vi /* | |^^ | */
+ || vB.r != vc /* | | ^ | */
+ || vB.s != vc /* | | ^| */
+ || vL.r != vc /* | ^ | | */
+ )
+ __builtin_abort ();
+}
+
+int main () {
+ f ();
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "BIT_FIELD_REF" 8 "optimized" } } */
+/* { dg-final { scan-assembler-not "cmpb" { target { i*86-*-* || x86_64-*-* } } } } */
+/* { dg-final { scan-assembler-times "cmpl" 8 { target { i*86-*-* || x86_64-*-* } } } } */
+/* { dg-final { scan-assembler-times "cmpw" 8 { target { powerpc*-*-* || rs6000-*-* } } } } */
diff --git a/gcc/testsuite/gcc.dg/field-merge-2.c b/gcc/testsuite/gcc.dg/field-merge-2.c
new file mode 100644
index 00000000000..80c573b31dd
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/field-merge-2.c
@@ -0,0 +1,31 @@
+/* { dg-do run } */
+/* { dg-options "-O" } */
+
+struct TL {
+ unsigned short a;
+ unsigned short b;
+} __attribute__ ((packed, aligned (8)));
+
+struct TB {
+ unsigned char p;
+ unsigned short a;
+ unsigned short b;
+} __attribute__ ((packed, aligned (8)));
+
+#define vc 0xaa
+
+struct TL vL = { vc, vc };
+struct TB vB = { vc, vc, vc };
+
+void f (void) {
+ if (0
+ || vL.b != vB.b
+ || vL.a != vB.a
+ )
+ __builtin_abort ();
+}
+
+int main () {
+ f ();
+ return 0;
+}
^ permalink raw reply [flat|nested] 8+ messages in thread
* [gcc(refs/users/aoliva/heads/testme)] improvements for fold_truth_andor_1
@ 2020-09-17 8:22 Alexandre Oliva
0 siblings, 0 replies; 8+ messages in thread
From: Alexandre Oliva @ 2020-09-17 8:22 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:a8de178570781759ed6e7791d80752a8bad63463
commit a8de178570781759ed6e7791d80752a8bad63463
Author: Alexandre Oliva <oliva@adacore.com>
Date: Tue Sep 15 13:32:07 2020 -0300
improvements for fold_truth_andor_1
Enable combining with the rightmost non-and/or, however deeply-nested.
Handle shift-and-mask.
Handle fields that cross alignment boundaries, when either part can be
combined.
Diff:
---
gcc/config/rs6000/t-rs6000 | 4 +
gcc/fold-const.c | 315 ++++++++++++++++++++++++++++++++---
gcc/testsuite/gcc.dg/field-merge-1.c | 64 +++++++
3 files changed, 356 insertions(+), 27 deletions(-)
diff --git a/gcc/config/rs6000/t-rs6000 b/gcc/config/rs6000/t-rs6000
index 1ddb5729cb2..516486df9a4 100644
--- a/gcc/config/rs6000/t-rs6000
+++ b/gcc/config/rs6000/t-rs6000
@@ -52,6 +52,10 @@ $(srcdir)/config/rs6000/rs6000-tables.opt: $(srcdir)/config/rs6000/genopt.sh \
$(SHELL) $(srcdir)/config/rs6000/genopt.sh $(srcdir)/config/rs6000 > \
$(srcdir)/config/rs6000/rs6000-tables.opt
+# FRAME_GROWS_DOWNWARD tests flag_sanitize in a way that rules out a
+# test in toplev.c.
+toplev.o-warn = -Wno-error
+
# The rs6000 backend doesn't cause warnings in these files.
insn-conditions.o-warn =
diff --git a/gcc/fold-const.c b/gcc/fold-const.c
index 0cc80adf632..c835327dac9 100644
--- a/gcc/fold-const.c
+++ b/gcc/fold-const.c
@@ -4640,6 +4640,7 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
tree mask, inner, offset;
tree unsigned_type;
unsigned int precision;
+ HOST_WIDE_INT shiftrt = 0;
/* All the optimizations using this function assume integer fields.
There are problems with FP fields since the type_for_size call
@@ -4664,13 +4665,28 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
return NULL_TREE;
}
+ if (TREE_CODE (exp) == RSHIFT_EXPR
+ && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST
+ && tree_fits_shwi_p (TREE_OPERAND (exp, 1)))
+ {
+ shiftrt = tree_to_shwi (TREE_OPERAND (exp, 1));
+ if (shiftrt > 0)
+ exp = TREE_OPERAND (exp, 0);
+ else
+ shiftrt = 0;
+ }
+
+ if (TREE_CODE (exp) == NOP_EXPR)
+ exp = TREE_OPERAND (exp, 0);
+
poly_int64 poly_bitsize, poly_bitpos;
inner = get_inner_reference (exp, &poly_bitsize, &poly_bitpos, &offset,
pmode, punsignedp, preversep, pvolatilep);
+
if ((inner == exp && and_mask == 0)
|| !poly_bitsize.is_constant (pbitsize)
|| !poly_bitpos.is_constant (pbitpos)
- || *pbitsize < 0
+ || *pbitsize < shiftrt
|| offset != 0
|| TREE_CODE (inner) == PLACEHOLDER_EXPR
/* Reject out-of-bound accesses (PR79731). */
@@ -4679,6 +4695,12 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
*pbitpos + *pbitsize) < 0))
return NULL_TREE;
+ if (shiftrt)
+ {
+ *pbitpos += shiftrt;
+ *pbitsize -= shiftrt;
+ }
+
unsigned_type = lang_hooks.types.type_for_size (*pbitsize, 1);
if (unsigned_type == NULL_TREE)
return NULL_TREE;
@@ -6142,11 +6164,19 @@ merge_truthop_with_opposite_arm (location_t loc, tree op, tree cmpop,
TRUTH_TYPE is the type of the logical operand and LHS and RHS are its
two operands.
+ SEPARATEP should be NULL if LHS and RHS are adjacent in
+ CODE-chained compares, and a non-NULL pointer to NULL_TREE
+ otherwise. If the "words" accessed by RHS are already accessed by
+ LHS, this won't matter, but if RHS accesses "words" that LHS
+ doesn't, then *SEPARATEP will be set to the compares that should
+ take RHS's place. By "words" we mean contiguous bits that do not
+ cross a an TYPE_ALIGN boundary of the accessed object's type.
+
We return the simplified tree or 0 if no optimization is possible. */
static tree
fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
- tree lhs, tree rhs)
+ tree lhs, tree rhs, tree *separatep)
{
/* If this is the "or" of two comparisons, we can do something if
the comparisons are NE_EXPR. If this is the "and", we can do something
@@ -6157,6 +6187,7 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
convert EQ_EXPR to NE_EXPR so we need not reject the "wrong"
comparison for one-bit fields. */
+ enum tree_code orig_code = code;
enum tree_code wanted_code;
enum tree_code lcode, rcode;
tree ll_arg, lr_arg, rl_arg, rr_arg;
@@ -6168,13 +6199,16 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
int ll_unsignedp, lr_unsignedp, rl_unsignedp, rr_unsignedp;
int ll_reversep, lr_reversep, rl_reversep, rr_reversep;
machine_mode ll_mode, lr_mode, rl_mode, rr_mode;
- scalar_int_mode lnmode, rnmode;
+ scalar_int_mode lnmode, lnmode2, rnmode;
tree ll_mask, lr_mask, rl_mask, rr_mask;
tree ll_and_mask, lr_and_mask, rl_and_mask, rr_and_mask;
tree l_const, r_const;
tree lntype, rntype, result;
HOST_WIDE_INT first_bit, end_bit;
int volatilep;
+ bool resplit_load;
+
+ gcc_checking_assert (!separatep || !*separatep);
/* Start by getting the comparison codes. Fail if anything is volatile.
If one operand is a BIT_AND_EXPR with the constant one, treat it as if
@@ -6202,7 +6236,116 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
if (TREE_CODE_CLASS (lcode) != tcc_comparison
|| TREE_CODE_CLASS (rcode) != tcc_comparison)
- return 0;
+ {
+ tree separate = NULL;
+
+ /* Check for the possibility of merging component references.
+ If any of our operands is another similar operation, recurse
+ to try to merge individual operands, but avoiding double
+ recursion: recurse to each leaf of LHS, and from there to
+ each leaf of RHS, but don't bother recursing into LHS if RHS
+ is neither a comparison nor a compound expr, nor into RHS if
+ the LHS leaf isn't a comparison. In case of no successful
+ merging, recursion depth is limited to the sum of the depths
+ of LHS and RHS, and the non-recursing code below will run no
+ more times than the product of the leaf counts of LHS and
+ RHS. If there is a successful merge, we (recursively)
+ further attempt to fold the result, so recursion depth and
+ merge attempts are harder to compute. */
+ if (TREE_CODE (lhs) == code && TREE_TYPE (lhs) == truth_type
+ && (TREE_CODE_CLASS (rcode) == tcc_comparison
+ || (TREE_CODE (rhs) == code && TREE_TYPE (rhs) == truth_type)))
+ {
+ if ((result = fold_truth_andor_1 (loc, code, truth_type,
+ TREE_OPERAND (lhs, 1), rhs,
+ separatep
+ ? separatep
+ : NULL)) != 0)
+ {
+ /* We have combined the latter part of LHS with RHS. If
+ they were separate, the recursion already placed any
+ remains of RHS in *SEPARATEP, otherwise they are all
+ in RESULT, so we just have to prepend to result the
+ former part of LHS. */
+ result = fold_build2_loc (loc, code, truth_type,
+ TREE_OPERAND (lhs, 0), result);
+ return result;
+ }
+ if ((result = fold_truth_andor_1 (loc, code, truth_type,
+ TREE_OPERAND (lhs, 0), rhs,
+ separatep
+ ? separatep
+ : &separate)) != 0)
+ {
+ /* We have combined the former part of LHS with RHS. If
+ they were separate, the recursive call will have
+ placed remnants of RHS in *SEPARATEP. If they
+ wereń't, they will be in SEPARATE instead. Append
+ the latter part of LHS to the result, and then any
+ remnants of RHS that we haven't passed on to the
+ caller. */
+ result = fold_build2_loc (loc, code, truth_type,
+ result, TREE_OPERAND (lhs, 1));
+ if (separate)
+ result = fold_build2_loc (loc, code, truth_type,
+ result, separate);
+ return result;
+ }
+ }
+ else if (TREE_CODE_CLASS (lcode) == tcc_comparison
+ && TREE_CODE (rhs) == code && TREE_TYPE (rhs) == truth_type)
+ {
+ if ((result = fold_truth_andor_1 (loc, code, truth_type,
+ lhs, TREE_OPERAND (rhs, 0),
+ separatep
+ ? &separate
+ : NULL)) != 0)
+ {
+ /* We have combined LHS with the former part of RHS. If
+ they were separate, have any remnants of RHS placed
+ in separate, so that we can combine them with the
+ latter part of RHS, and then send them back for the
+ caller to handle. If they were adjacent, we can just
+ append the latter part of RHS to the RESULT. */
+ if (!separate)
+ separate = TREE_OPERAND (rhs, 1);
+ else
+ separate = fold_build2_loc (loc, code, truth_type,
+ separate, TREE_OPERAND (rhs, 1));
+ if (separatep)
+ *separatep = separate;
+ else
+ result = fold_build2_loc (loc, code, truth_type,
+ result, separate);
+ return result;
+ }
+ if ((result = fold_truth_andor_1 (loc, code, truth_type,
+ lhs, TREE_OPERAND (rhs, 1),
+ &separate)) != 0)
+ {
+ /* We have combined LHS with the latter part of RHS.
+ They're definitely not adjacent, so we get the
+ remains of RHS in SEPARATE, and then prepend the
+ former part of RHS to it. If LHS and RHS were
+ already separate to begin with, we leave the remnants
+ of RHS for the caller to deal with, otherwise we
+ append them to the RESULT. */
+ if (!separate)
+ separate = TREE_OPERAND (rhs, 0);
+ else
+ separate = fold_build2_loc (loc, code, truth_type,
+ TREE_OPERAND (rhs, 0), separate);
+ if (separatep)
+ *separatep = separate;
+ else
+ result = fold_build2_loc (loc, code, truth_type,
+ result, separate);
+ return result;
+ }
+ }
+
+ return 0;
+ }
ll_arg = TREE_OPERAND (lhs, 0);
lr_arg = TREE_OPERAND (lhs, 1);
@@ -6357,10 +6500,48 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
if (!get_best_mode (end_bit - first_bit, first_bit, 0, 0,
TYPE_ALIGN (TREE_TYPE (ll_inner)), BITS_PER_WORD,
volatilep, &lnmode))
- return 0;
+ {
+ /* Consider the possibility of recombining loads if any of the
+ fields straddles across an alignment boundary, so that either
+ part can be loaded along with the other field. */
+ HOST_WIDE_INT align = TYPE_ALIGN (TREE_TYPE (ll_inner));
+ HOST_WIDE_INT amask = ~(align - 1);
+
+ HOST_WIDE_INT boundary = (end_bit - 1) & amask;
+ /* Make sure we're only crossing one alignment boundary.
+
+ ??? This won't recombine loads of two adjacent fields that
+ each crosses a different alignment boundary, so as to load
+ the middle word only once. */
+ if (boundary - first_bit > align)
+ return 0;
+
+ HOST_WIDE_INT ll_start_word = ll_bitpos & amask;
+ HOST_WIDE_INT ll_end_word = (ll_bitpos + ll_bitsize - 1) & amask;
+
+ HOST_WIDE_INT rl_start_word = rl_bitpos & amask;
+ HOST_WIDE_INT rl_end_word = (rl_bitpos + rl_bitsize - 1) & amask;
+
+ /* If neither field straddles across an alignment boundary,
+ we've nothing further ado. */
+ if (ll_start_word == ll_end_word && rl_start_word == rl_end_word)
+ return 0;
+
+ if (!get_best_mode (boundary - first_bit, first_bit, 0, 0,
+ align, BITS_PER_WORD, volatilep, &lnmode)
+ || !get_best_mode (end_bit - boundary, boundary, 0, 0,
+ align, BITS_PER_WORD, volatilep, &lnmode2))
+ return 0;
+
+ resplit_load = true;
+ }
+ else
+ resplit_load = false;
lnbitsize = GET_MODE_BITSIZE (lnmode);
lnbitpos = first_bit & ~ (lnbitsize - 1);
+ if (resplit_load)
+ lnbitsize += GET_MODE_BITSIZE (lnmode2);
lntype = lang_hooks.types.type_for_size (lnbitsize, 1);
xll_bitpos = ll_bitpos - lnbitpos, xrl_bitpos = rl_bitpos - lnbitpos;
@@ -6414,7 +6595,8 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
|| ll_reversep != lr_reversep
/* Make sure the two fields on the right
correspond to the left without being swapped. */
- || ll_bitpos - rl_bitpos != lr_bitpos - rr_bitpos)
+ || ll_bitpos - rl_bitpos != lr_bitpos - rr_bitpos
+ || resplit_load)
return 0;
first_bit = MIN (lr_bitpos, rr_bitpos);
@@ -6555,20 +6737,107 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
if (lnbitpos < 0)
return 0;
- /* Construct the expression we will return. First get the component
- reference we will make. Unless the mask is all ones the width of
- that field, perform the mask operation. Then compare with the
- merged constant. */
- result = make_bit_field_ref (loc, ll_inner, ll_arg,
- lntype, lnbitsize, lnbitpos,
- ll_unsignedp || rl_unsignedp, ll_reversep);
+ if (resplit_load)
+ {
+ HOST_WIDE_INT align = TYPE_ALIGN (TREE_TYPE (ll_inner));
+ HOST_WIDE_INT lnlbitsize = GET_MODE_BITSIZE (lnmode);
+ HOST_WIDE_INT lnrbitsize = GET_MODE_BITSIZE (lnmode2);
+ HOST_WIDE_INT midpoint = lnlbitsize;
+ HOST_WIDE_INT lnrbitpos = lnbitpos + midpoint;
+ HOST_WIDE_INT lnlbitpos = lnrbitpos - lnlbitsize;
+ gcc_checking_assert ((lnrbitpos & (align - 1)) == 0);
+ tree lnltype = lang_hooks.types.type_for_size (lnlbitsize, 1);
+ tree lnrtype = lang_hooks.types.type_for_size (lnrbitsize, 1);
+ tree lll_arg = make_bit_field_ref (loc, ll_inner, ll_arg,
+ lnltype, lnlbitsize, lnlbitpos,
+ ll_unsignedp || rl_unsignedp,
+ ll_reversep);
+ tree llr_arg = make_bit_field_ref (loc, ll_inner, ll_arg,
+ lnrtype, lnrbitsize, lnrbitpos,
+ ll_unsignedp || rl_unsignedp,
+ ll_reversep);
+
+ HOST_WIDE_INT lnlcbitpos;
+ HOST_WIDE_INT lnrcbitpos;
+ /* The bit field ref constant folding below already takes care
+ of default target endianness. */
+ if (!ll_reversep)
+ {
+ lnlcbitpos = 0;
+ lnrcbitpos = lnlbitsize;
+ }
+ else
+ {
+ lnlcbitpos = lnrbitsize;
+ lnrcbitpos = 0;
+ }
+
+ tree mask = const_binop (BIT_IOR_EXPR, ll_mask, rl_mask);
+ tree lll_mask = fold_build3 (BIT_FIELD_REF, lnltype, mask,
+ bitsize_int (lnlbitsize),
+ bitsize_int (lnlcbitpos));
+ if (! integer_all_onesp (lll_mask))
+ lll_arg = fold_build2_loc (loc, BIT_AND_EXPR, lnltype,
+ lll_arg, lll_mask);
+ tree llr_mask = fold_build3 (BIT_FIELD_REF, lnrtype, mask,
+ bitsize_int (lnrbitsize),
+ bitsize_int (lnrcbitpos));
+ if (! integer_all_onesp (llr_mask))
+ llr_arg = fold_build2_loc (loc, BIT_AND_EXPR, lnrtype,
+ llr_arg, llr_mask);
+ tree lnr_const = const_binop (BIT_IOR_EXPR, l_const, r_const);
+ tree lll_const = fold_build3 (BIT_FIELD_REF, lnltype, lnr_const,
+ bitsize_int (lnlbitsize),
+ bitsize_int (lnlcbitpos));
+ tree lll_result = fold_build2_loc (loc, wanted_code, truth_type,
+ lll_arg, lll_const);
+ tree llr_const = fold_build3 (BIT_FIELD_REF, lnrtype, lnr_const,
+ bitsize_int (lnrbitsize),
+ bitsize_int (lnrcbitpos));
+ tree llr_result = fold_build2_loc (loc, wanted_code, truth_type,
+ llr_arg, llr_const);
+
+ /* If the accesses were adjacent in the compare chain, or if LHS
+ already accessed both "words", then we can have both words
+ accessed in the RESULT, leaving no remains of *RHS for
+ *SEPARATEP. Otherwise, figure out which part of the object
+ was already accessed by LHS, and put it in the RESULT,
+ returning the bits newly-accessed by RHS in *SEPARATEP. */
+ if (!separatep
+ || (ll_bitpos < lnrbitpos && ll_bitpos + ll_bitsize > lnrbitpos))
+ result = build2_loc (loc, orig_code, truth_type,
+ lll_result, llr_result);
+ else if (ll_bitpos >= lnrbitpos)
+ {
+ result = llr_result;
+ *separatep = lll_result;
+ }
+ else
+ {
+ result = lll_result;
+ *separatep = llr_result;
+ }
+ }
+ else
+ {
+
+ /* Construct the expression we will return. First get the component
+ reference we will make. Unless the mask is all ones the width of
+ that field, perform the mask operation. Then compare with the
+ merged constant. */
+ result = make_bit_field_ref (loc, ll_inner, ll_arg,
+ lntype, lnbitsize, lnbitpos,
+ ll_unsignedp || rl_unsignedp, ll_reversep);
- ll_mask = const_binop (BIT_IOR_EXPR, ll_mask, rl_mask);
- if (! all_ones_mask_p (ll_mask, lnbitsize))
- result = build2_loc (loc, BIT_AND_EXPR, lntype, result, ll_mask);
+ ll_mask = const_binop (BIT_IOR_EXPR, ll_mask, rl_mask);
+ if (! integer_all_onesp (ll_mask))
+ result = build2_loc (loc, BIT_AND_EXPR, lntype, result, ll_mask);
- return build2_loc (loc, wanted_code, truth_type, result,
- const_binop (BIT_IOR_EXPR, l_const, r_const));
+ result = build2_loc (loc, wanted_code, truth_type, result,
+ const_binop (BIT_IOR_EXPR, l_const, r_const));
+ }
+
+ return result;
}
\f
/* T is an integer expression that is being multiplied, divided, or taken a
@@ -9166,15 +9435,7 @@ fold_truth_andor (location_t loc, enum tree_code code, tree type,
return fold_build2_loc (loc, code, type, arg0, tem);
}
- /* Check for the possibility of merging component references. If our
- lhs is another similar operation, try to merge its rhs with our
- rhs. Then try to merge our lhs and rhs. */
- if (TREE_CODE (arg0) == code
- && (tem = fold_truth_andor_1 (loc, code, type,
- TREE_OPERAND (arg0, 1), arg1)) != 0)
- return fold_build2_loc (loc, code, type, TREE_OPERAND (arg0, 0), tem);
-
- if ((tem = fold_truth_andor_1 (loc, code, type, arg0, arg1)) != 0)
+ if ((tem = fold_truth_andor_1 (loc, code, type, arg0, arg1, NULL)) != 0)
return tem;
bool logical_op_non_short_circuit = LOGICAL_OP_NON_SHORT_CIRCUIT;
diff --git a/gcc/testsuite/gcc.dg/field-merge-1.c b/gcc/testsuite/gcc.dg/field-merge-1.c
new file mode 100644
index 00000000000..1818e104437
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/field-merge-1.c
@@ -0,0 +1,64 @@
+/* { dg-do run } */
+/* { dg-options "-O -save-temps -fdump-tree-optimized" } */
+
+/* Check that field loads compared with constants are merged, even if
+ tested out of order, and when fields straddle across alignment
+ boundaries. */
+
+struct TL {
+ unsigned char p;
+ unsigned int a;
+ unsigned char q;
+ unsigned int b;
+ unsigned char r;
+ unsigned int c;
+ unsigned char s;
+} __attribute__ ((packed, aligned (4), scalar_storage_order ("little-endian")));
+
+struct TB {
+ unsigned char p;
+ unsigned int a;
+ unsigned char q;
+ unsigned int b;
+ unsigned char r;
+ unsigned int c;
+ unsigned char s;
+} __attribute__ ((packed, aligned (4), scalar_storage_order ("big-endian")));
+
+#define vc 0xaa
+#define vi 0x12345678
+
+struct TL vL = { vc, vi, vc, vi, vc, vi, vc };
+struct TB vB = { vc, vi, vc, vi, vc, vi, vc };
+
+void f (void) {
+ /* Which words of | vL | vB | */
+ /* are accessed by |0123|0123| */
+ if (0 /* the tests? | | | */
+ || vL.p != vc /* |* | | */
+ || vB.p != vc /* | |* | */
+ || vL.s != vc /* | *| | */
+ || vB.q != vc /* | | * | */
+ || vL.a != vi /* |^* | | */
+ || vB.b != vi /* | | ^* | */
+ || vL.c != vi /* | *^| | */
+ || vB.c != vi /* | | ^*| */
+ || vL.b != vi /* | ^^ | | */
+ || vL.q != vc /* | ^ | | */
+ || vB.a != vi /* | |^^ | */
+ || vB.r != vc /* | | ^ | */
+ || vB.s != vc /* | | ^| */
+ || vL.r != vc /* | ^ | | */
+ )
+ __builtin_abort ();
+}
+
+int main () {
+ f ();
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "BIT_FIELD_REF" 8 "optimized" } } */
+/* { dg-final { scan-assembler-not "cmpb" { target { i*86-*-* || x86_64-*-* } } } } */
+/* { dg-final { scan-assembler-times "cmpl" 8 { target { i*86-*-* || x86_64-*-* } } } } */
+/* { dg-final { scan-assembler-times "cmpw" 8 { target { powerpc*-*-* || rs6000-*-* } } } } */
^ permalink raw reply [flat|nested] 8+ messages in thread
* [gcc(refs/users/aoliva/heads/testme)] improvements for fold_truth_andor_1
@ 2020-09-17 3:20 Alexandre Oliva
0 siblings, 0 replies; 8+ messages in thread
From: Alexandre Oliva @ 2020-09-17 3:20 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:c5108baf0f0b1943976c40c6ede4d4af97b4a5b1
commit c5108baf0f0b1943976c40c6ede4d4af97b4a5b1
Author: Alexandre Oliva <oliva@adacore.com>
Date: Tue Sep 15 13:32:07 2020 -0300
improvements for fold_truth_andor_1
Enable combining with the rightmost non-and/or, however deeply-nested.
Handle shift-and-mask.
Handle fields that cross alignment boundaries, when either part can be
combined.
Diff:
---
gcc/config/rs6000/t-rs6000 | 4 +
gcc/fold-const.c | 214 +++++++++++++++++++++++++++++++----
gcc/testsuite/gcc.dg/field-merge-1.c | 50 ++++++++
3 files changed, 243 insertions(+), 25 deletions(-)
diff --git a/gcc/config/rs6000/t-rs6000 b/gcc/config/rs6000/t-rs6000
index 1ddb5729cb2..516486df9a4 100644
--- a/gcc/config/rs6000/t-rs6000
+++ b/gcc/config/rs6000/t-rs6000
@@ -52,6 +52,10 @@ $(srcdir)/config/rs6000/rs6000-tables.opt: $(srcdir)/config/rs6000/genopt.sh \
$(SHELL) $(srcdir)/config/rs6000/genopt.sh $(srcdir)/config/rs6000 > \
$(srcdir)/config/rs6000/rs6000-tables.opt
+# FRAME_GROWS_DOWNWARD tests flag_sanitize in a way that rules out a
+# test in toplev.c.
+toplev.o-warn = -Wno-error
+
# The rs6000 backend doesn't cause warnings in these files.
insn-conditions.o-warn =
diff --git a/gcc/fold-const.c b/gcc/fold-const.c
index 0cc80adf632..b163950f2c3 100644
--- a/gcc/fold-const.c
+++ b/gcc/fold-const.c
@@ -4640,6 +4640,7 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
tree mask, inner, offset;
tree unsigned_type;
unsigned int precision;
+ HOST_WIDE_INT shiftrt = 0;
/* All the optimizations using this function assume integer fields.
There are problems with FP fields since the type_for_size call
@@ -4664,13 +4665,28 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
return NULL_TREE;
}
+ if (TREE_CODE (exp) == RSHIFT_EXPR
+ && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST
+ && tree_fits_shwi_p (TREE_OPERAND (exp, 1)))
+ {
+ shiftrt = tree_to_shwi (TREE_OPERAND (exp, 1));
+ if (shiftrt > 0)
+ exp = TREE_OPERAND (exp, 0);
+ else
+ shiftrt = 0;
+ }
+
+ if (TREE_CODE (exp) == NOP_EXPR)
+ exp = TREE_OPERAND (exp, 0);
+
poly_int64 poly_bitsize, poly_bitpos;
inner = get_inner_reference (exp, &poly_bitsize, &poly_bitpos, &offset,
pmode, punsignedp, preversep, pvolatilep);
+
if ((inner == exp && and_mask == 0)
|| !poly_bitsize.is_constant (pbitsize)
|| !poly_bitpos.is_constant (pbitpos)
- || *pbitsize < 0
+ || *pbitsize < shiftrt
|| offset != 0
|| TREE_CODE (inner) == PLACEHOLDER_EXPR
/* Reject out-of-bound accesses (PR79731). */
@@ -4679,6 +4695,12 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
*pbitpos + *pbitsize) < 0))
return NULL_TREE;
+ if (shiftrt)
+ {
+ *pbitpos += shiftrt;
+ *pbitsize -= shiftrt;
+ }
+
unsigned_type = lang_hooks.types.type_for_size (*pbitsize, 1);
if (unsigned_type == NULL_TREE)
return NULL_TREE;
@@ -6157,6 +6179,7 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
convert EQ_EXPR to NE_EXPR so we need not reject the "wrong"
comparison for one-bit fields. */
+ enum tree_code orig_code = code;
enum tree_code wanted_code;
enum tree_code lcode, rcode;
tree ll_arg, lr_arg, rl_arg, rr_arg;
@@ -6168,13 +6191,14 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
int ll_unsignedp, lr_unsignedp, rl_unsignedp, rr_unsignedp;
int ll_reversep, lr_reversep, rl_reversep, rr_reversep;
machine_mode ll_mode, lr_mode, rl_mode, rr_mode;
- scalar_int_mode lnmode, rnmode;
+ scalar_int_mode lnmode, lnmode2, rnmode;
tree ll_mask, lr_mask, rl_mask, rr_mask;
tree ll_and_mask, lr_and_mask, rl_and_mask, rr_and_mask;
tree l_const, r_const;
tree lntype, rntype, result;
HOST_WIDE_INT first_bit, end_bit;
int volatilep;
+ bool resplit_load;
/* Start by getting the comparison codes. Fail if anything is volatile.
If one operand is a BIT_AND_EXPR with the constant one, treat it as if
@@ -6202,7 +6226,48 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
if (TREE_CODE_CLASS (lcode) != tcc_comparison
|| TREE_CODE_CLASS (rcode) != tcc_comparison)
- return 0;
+ {
+ /* Check for the possibility of merging component references.
+ If any of our operands is another similar operation, recurse
+ to try to merge individual operands, but avoiding double
+ recursion: recurse to each leaf of lhs, and from there to
+ each leaf of rhs, but don't bother recursing into lhs if rhs
+ is neither a comparison nor a compound expr, nor into rhs if
+ the lhs leaf isn't a comparison. In case of no successful
+ merging, recursion depth is limited to the sum of the depths
+ of lhs and rhs, and the non-recursing code below will run no
+ more times than the product of the leaf counts of lhs and
+ rhs. If there is a successful merge, we (recursively)
+ further attempt to fold the result, so recursion depth and
+ merge attempts are harder to compute. */
+ if (TREE_CODE (lhs) == code && TREE_TYPE (lhs) == truth_type
+ && (TREE_CODE_CLASS (rcode) == tcc_comparison
+ || (TREE_CODE (rhs) == code && TREE_TYPE (rhs) == truth_type)))
+ {
+ if ((result = fold_truth_andor_1 (loc, code, truth_type,
+ TREE_OPERAND (lhs, 1), rhs)) != 0)
+ return fold_build2_loc (loc, code, truth_type,
+ TREE_OPERAND (lhs, 0), result);
+ if ((result = fold_truth_andor_1 (loc, code, truth_type,
+ TREE_OPERAND (lhs, 0), rhs)) != 0)
+ return fold_build2_loc (loc, code, truth_type,
+ result, TREE_OPERAND (lhs, 1));
+ }
+ else if (TREE_CODE_CLASS (lcode) == tcc_comparison
+ && TREE_CODE (rhs) == code && TREE_TYPE (rhs) == truth_type)
+ {
+ if ((result = fold_truth_andor_1 (loc, code, truth_type, lhs,
+ TREE_OPERAND (rhs, 0))) != 0)
+ return fold_build2_loc (loc, code, truth_type,
+ result, TREE_OPERAND (rhs, 1));
+ if ((result = fold_truth_andor_1 (loc, code, truth_type, lhs,
+ TREE_OPERAND (rhs, 1))) != 0)
+ return fold_build2_loc (loc, code, truth_type,
+ result, TREE_OPERAND (rhs, 0));
+ }
+
+ return 0;
+ }
ll_arg = TREE_OPERAND (lhs, 0);
lr_arg = TREE_OPERAND (lhs, 1);
@@ -6357,10 +6422,48 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
if (!get_best_mode (end_bit - first_bit, first_bit, 0, 0,
TYPE_ALIGN (TREE_TYPE (ll_inner)), BITS_PER_WORD,
volatilep, &lnmode))
- return 0;
+ {
+ /* Consider the possibility of recombining loads if any of the
+ fields straddles across an alignment boundary, so that either
+ part can be loaded along with the other field. */
+ HOST_WIDE_INT align = TYPE_ALIGN (TREE_TYPE (ll_inner));
+ HOST_WIDE_INT amask = ~(align - 1);
+
+ HOST_WIDE_INT boundary = (end_bit - 1) & amask;
+ /* Make sure we're only crossing one alignment boundary.
+
+ ??? This won't recombine loads of two adjacent fields that
+ each crosses a different alignment boundary, so as to load
+ the middle word only once. */
+ if (boundary - first_bit > align)
+ return 0;
+
+ HOST_WIDE_INT ll_start_word = ll_bitpos & amask;
+ HOST_WIDE_INT ll_end_word = (ll_bitpos + ll_bitsize - 1) & amask;
+
+ HOST_WIDE_INT rl_start_word = rl_bitpos & amask;
+ HOST_WIDE_INT rl_end_word = (rl_bitpos + rl_bitsize - 1) & amask;
+
+ /* If neither field straddles across an alignment boundary,
+ we've nothing further ado. */
+ if (ll_start_word == ll_end_word && rl_start_word == rl_end_word)
+ return 0;
+
+ if (!get_best_mode (boundary - first_bit, first_bit, 0, 0,
+ align, BITS_PER_WORD, volatilep, &lnmode)
+ || !get_best_mode (end_bit - boundary, boundary, 0, 0,
+ align, BITS_PER_WORD, volatilep, &lnmode2))
+ return 0;
+
+ resplit_load = true;
+ }
+ else
+ resplit_load = false;
lnbitsize = GET_MODE_BITSIZE (lnmode);
lnbitpos = first_bit & ~ (lnbitsize - 1);
+ if (resplit_load)
+ lnbitsize += GET_MODE_BITSIZE (lnmode2);
lntype = lang_hooks.types.type_for_size (lnbitsize, 1);
xll_bitpos = ll_bitpos - lnbitpos, xrl_bitpos = rl_bitpos - lnbitpos;
@@ -6414,7 +6517,8 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
|| ll_reversep != lr_reversep
/* Make sure the two fields on the right
correspond to the left without being swapped. */
- || ll_bitpos - rl_bitpos != lr_bitpos - rr_bitpos)
+ || ll_bitpos - rl_bitpos != lr_bitpos - rr_bitpos
+ || resplit_load)
return 0;
first_bit = MIN (lr_bitpos, rr_bitpos);
@@ -6555,20 +6659,88 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
if (lnbitpos < 0)
return 0;
- /* Construct the expression we will return. First get the component
- reference we will make. Unless the mask is all ones the width of
- that field, perform the mask operation. Then compare with the
- merged constant. */
- result = make_bit_field_ref (loc, ll_inner, ll_arg,
- lntype, lnbitsize, lnbitpos,
- ll_unsignedp || rl_unsignedp, ll_reversep);
+ if (resplit_load)
+ {
+ HOST_WIDE_INT align = TYPE_ALIGN (TREE_TYPE (ll_inner));
+ HOST_WIDE_INT lnlbitsize = GET_MODE_BITSIZE (lnmode);
+ HOST_WIDE_INT lnrbitsize = GET_MODE_BITSIZE (lnmode2);
+ HOST_WIDE_INT midpoint = lnlbitsize;
+ HOST_WIDE_INT lnrbitpos = lnbitpos + midpoint;
+ HOST_WIDE_INT lnlbitpos = lnrbitpos - lnlbitsize;
+ gcc_checking_assert ((lnrbitpos & (align - 1)) == 0);
+ tree lnltype = lang_hooks.types.type_for_size (lnlbitsize, 1);
+ tree lnrtype = lang_hooks.types.type_for_size (lnrbitsize, 1);
+ tree lll_arg = make_bit_field_ref (loc, ll_inner, ll_arg,
+ lnltype, lnlbitsize, lnlbitpos,
+ ll_unsignedp || rl_unsignedp,
+ ll_reversep);
+ tree llr_arg = make_bit_field_ref (loc, ll_inner, ll_arg,
+ lnrtype, lnrbitsize, lnrbitpos,
+ ll_unsignedp || rl_unsignedp,
+ ll_reversep);
+
+ HOST_WIDE_INT lnlcbitpos;
+ HOST_WIDE_INT lnrcbitpos;
+ /* The bit field ref constant folding below already takes care
+ of default target endianness. */
+ if (!ll_reversep)
+ {
+ lnlcbitpos = 0;
+ lnrcbitpos = lnlbitsize;
+ }
+ else
+ {
+ lnlcbitpos = lnrbitsize;
+ lnrcbitpos = 0;
+ }
+
+ tree mask = const_binop (BIT_IOR_EXPR, ll_mask, rl_mask);
+ tree lll_mask = fold_build3 (BIT_FIELD_REF, lnltype, mask,
+ bitsize_int (lnlbitsize),
+ bitsize_int (lnlcbitpos));
+ if (! integer_all_onesp (lll_mask))
+ lll_arg = fold_build2_loc (loc, BIT_AND_EXPR, lnltype,
+ lll_arg, lll_mask);
+ tree llr_mask = fold_build3 (BIT_FIELD_REF, lnrtype, mask,
+ bitsize_int (lnrbitsize),
+ bitsize_int (lnrcbitpos));
+ if (! integer_all_onesp (llr_mask))
+ llr_arg = fold_build2_loc (loc, BIT_AND_EXPR, lnrtype,
+ llr_arg, llr_mask);
+ tree lnr_const = const_binop (BIT_IOR_EXPR, l_const, r_const);
+ tree lll_const = fold_build3 (BIT_FIELD_REF, lnltype, lnr_const,
+ bitsize_int (lnlbitsize),
+ bitsize_int (lnlcbitpos));
+ tree lll_result = fold_build2_loc (loc, wanted_code, truth_type,
+ lll_arg, lll_const);
+ tree llr_const = fold_build3 (BIT_FIELD_REF, lnrtype, lnr_const,
+ bitsize_int (lnrbitsize),
+ bitsize_int (lnrcbitpos));
+ tree llr_result = fold_build2_loc (loc, wanted_code, truth_type,
+ llr_arg, llr_const);
+ result = build2_loc (loc, orig_code, truth_type,
+ lll_result, llr_result);
+ }
+ else
+ {
+
+ /* Construct the expression we will return. First get the component
+ reference we will make. Unless the mask is all ones the width of
+ that field, perform the mask operation. Then compare with the
+ merged constant. */
+ result = make_bit_field_ref (loc, ll_inner, ll_arg,
+ lntype, lnbitsize, lnbitpos,
+ ll_unsignedp || rl_unsignedp, ll_reversep);
- ll_mask = const_binop (BIT_IOR_EXPR, ll_mask, rl_mask);
- if (! all_ones_mask_p (ll_mask, lnbitsize))
- result = build2_loc (loc, BIT_AND_EXPR, lntype, result, ll_mask);
+ ll_mask = const_binop (BIT_IOR_EXPR, ll_mask, rl_mask);
+ if (! integer_all_onesp (ll_mask))
+ result = build2_loc (loc, BIT_AND_EXPR, lntype, result, ll_mask);
+
+ result = build2_loc (loc, wanted_code, truth_type, result,
+ const_binop (BIT_IOR_EXPR, l_const, r_const));
+ }
- return build2_loc (loc, wanted_code, truth_type, result,
- const_binop (BIT_IOR_EXPR, l_const, r_const));
+ return result;
}
\f
/* T is an integer expression that is being multiplied, divided, or taken a
@@ -9166,14 +9338,6 @@ fold_truth_andor (location_t loc, enum tree_code code, tree type,
return fold_build2_loc (loc, code, type, arg0, tem);
}
- /* Check for the possibility of merging component references. If our
- lhs is another similar operation, try to merge its rhs with our
- rhs. Then try to merge our lhs and rhs. */
- if (TREE_CODE (arg0) == code
- && (tem = fold_truth_andor_1 (loc, code, type,
- TREE_OPERAND (arg0, 1), arg1)) != 0)
- return fold_build2_loc (loc, code, type, TREE_OPERAND (arg0, 0), tem);
-
if ((tem = fold_truth_andor_1 (loc, code, type, arg0, arg1)) != 0)
return tem;
diff --git a/gcc/testsuite/gcc.dg/field-merge-1.c b/gcc/testsuite/gcc.dg/field-merge-1.c
new file mode 100644
index 00000000000..41d173f445c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/field-merge-1.c
@@ -0,0 +1,50 @@
+/* { dg-do run } */
+/* { dg-options "-O -save-temps -fdump-tree-optimized" } */
+
+/* Check that field loads compared with constants are merged, even if
+ tested out of order, and when fields straddle across alignment
+ boundaries. */
+
+struct TL {
+ unsigned char p;
+ unsigned int a;
+ unsigned char q;
+ unsigned int b;
+ unsigned char r;
+ unsigned int c;
+ unsigned char s;
+} __attribute__ ((packed, aligned (4), scalar_storage_order ("little-endian")));
+
+struct TB {
+ unsigned char p;
+ unsigned int a;
+ unsigned char q;
+ unsigned int b;
+ unsigned char r;
+ unsigned int c;
+ unsigned char s;
+} __attribute__ ((packed, aligned (4), scalar_storage_order ("big-endian")));
+
+#define vc 0xaa
+#define vi 0x12345678
+
+struct TL vL = { vc, vi, vc, vi, vc, vi, vc };
+struct TB vB = { vc, vi, vc, vi, vc, vi, vc };
+
+void f (void) {
+ if (vL.a != vi || vL.b != vi || vL.c != vi
+ || vB.a != vi || vB.b != vi || vB.c != vi
+ || vL.p != vc || vL.q != vc || vL.r != vc || vL.s != vc
+ || vB.p != vc || vB.q != vc || vB.r != vc || vB.s != vc)
+ __builtin_abort ();
+}
+
+int main () {
+ f ();
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "BIT_FIELD_REF" 8 "optimized" } } */
+/* { dg-final { scan-assembler-not "cmpb" { target { i*86-*-* || x86_64-*-* } } } } */
+/* { dg-final { scan-assembler-times "cmpl" 8 { target { i*86-*-* || x86_64-*-* } } } } */
+/* { dg-final { scan-assembler-times "cmpw" 8 { target { powerpc*-*-* || rs6000-*-* } } } } */
^ permalink raw reply [flat|nested] 8+ messages in thread
* [gcc(refs/users/aoliva/heads/testme)] improvements for fold_truth_andor_1
@ 2020-09-17 2:52 Alexandre Oliva
0 siblings, 0 replies; 8+ messages in thread
From: Alexandre Oliva @ 2020-09-17 2:52 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:8e6137d4c9d9cba7dc7ea5ec2cfada53f40f6b6b
commit 8e6137d4c9d9cba7dc7ea5ec2cfada53f40f6b6b
Author: Alexandre Oliva <oliva@adacore.com>
Date: Tue Sep 15 13:32:07 2020 -0300
improvements for fold_truth_andor_1
Enable combining with the rightmost non-and/or, however deeply-nested.
Handle shift-and-mask.
Handle fields that cross alignment boundaries, when either part can be
combined.
Diff:
---
gcc/config/rs6000/t-rs6000 | 4 +
gcc/fold-const.c | 214 +++++++++++++++++++++++++++++++----
gcc/testsuite/gcc.dg/field-merge-1.c | 48 ++++++++
3 files changed, 241 insertions(+), 25 deletions(-)
diff --git a/gcc/config/rs6000/t-rs6000 b/gcc/config/rs6000/t-rs6000
index 1ddb5729cb2..516486df9a4 100644
--- a/gcc/config/rs6000/t-rs6000
+++ b/gcc/config/rs6000/t-rs6000
@@ -52,6 +52,10 @@ $(srcdir)/config/rs6000/rs6000-tables.opt: $(srcdir)/config/rs6000/genopt.sh \
$(SHELL) $(srcdir)/config/rs6000/genopt.sh $(srcdir)/config/rs6000 > \
$(srcdir)/config/rs6000/rs6000-tables.opt
+# FRAME_GROWS_DOWNWARD tests flag_sanitize in a way that rules out a
+# test in toplev.c.
+toplev.o-warn = -Wno-error
+
# The rs6000 backend doesn't cause warnings in these files.
insn-conditions.o-warn =
diff --git a/gcc/fold-const.c b/gcc/fold-const.c
index 0cc80adf632..b163950f2c3 100644
--- a/gcc/fold-const.c
+++ b/gcc/fold-const.c
@@ -4640,6 +4640,7 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
tree mask, inner, offset;
tree unsigned_type;
unsigned int precision;
+ HOST_WIDE_INT shiftrt = 0;
/* All the optimizations using this function assume integer fields.
There are problems with FP fields since the type_for_size call
@@ -4664,13 +4665,28 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
return NULL_TREE;
}
+ if (TREE_CODE (exp) == RSHIFT_EXPR
+ && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST
+ && tree_fits_shwi_p (TREE_OPERAND (exp, 1)))
+ {
+ shiftrt = tree_to_shwi (TREE_OPERAND (exp, 1));
+ if (shiftrt > 0)
+ exp = TREE_OPERAND (exp, 0);
+ else
+ shiftrt = 0;
+ }
+
+ if (TREE_CODE (exp) == NOP_EXPR)
+ exp = TREE_OPERAND (exp, 0);
+
poly_int64 poly_bitsize, poly_bitpos;
inner = get_inner_reference (exp, &poly_bitsize, &poly_bitpos, &offset,
pmode, punsignedp, preversep, pvolatilep);
+
if ((inner == exp && and_mask == 0)
|| !poly_bitsize.is_constant (pbitsize)
|| !poly_bitpos.is_constant (pbitpos)
- || *pbitsize < 0
+ || *pbitsize < shiftrt
|| offset != 0
|| TREE_CODE (inner) == PLACEHOLDER_EXPR
/* Reject out-of-bound accesses (PR79731). */
@@ -4679,6 +4695,12 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
*pbitpos + *pbitsize) < 0))
return NULL_TREE;
+ if (shiftrt)
+ {
+ *pbitpos += shiftrt;
+ *pbitsize -= shiftrt;
+ }
+
unsigned_type = lang_hooks.types.type_for_size (*pbitsize, 1);
if (unsigned_type == NULL_TREE)
return NULL_TREE;
@@ -6157,6 +6179,7 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
convert EQ_EXPR to NE_EXPR so we need not reject the "wrong"
comparison for one-bit fields. */
+ enum tree_code orig_code = code;
enum tree_code wanted_code;
enum tree_code lcode, rcode;
tree ll_arg, lr_arg, rl_arg, rr_arg;
@@ -6168,13 +6191,14 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
int ll_unsignedp, lr_unsignedp, rl_unsignedp, rr_unsignedp;
int ll_reversep, lr_reversep, rl_reversep, rr_reversep;
machine_mode ll_mode, lr_mode, rl_mode, rr_mode;
- scalar_int_mode lnmode, rnmode;
+ scalar_int_mode lnmode, lnmode2, rnmode;
tree ll_mask, lr_mask, rl_mask, rr_mask;
tree ll_and_mask, lr_and_mask, rl_and_mask, rr_and_mask;
tree l_const, r_const;
tree lntype, rntype, result;
HOST_WIDE_INT first_bit, end_bit;
int volatilep;
+ bool resplit_load;
/* Start by getting the comparison codes. Fail if anything is volatile.
If one operand is a BIT_AND_EXPR with the constant one, treat it as if
@@ -6202,7 +6226,48 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
if (TREE_CODE_CLASS (lcode) != tcc_comparison
|| TREE_CODE_CLASS (rcode) != tcc_comparison)
- return 0;
+ {
+ /* Check for the possibility of merging component references.
+ If any of our operands is another similar operation, recurse
+ to try to merge individual operands, but avoiding double
+ recursion: recurse to each leaf of lhs, and from there to
+ each leaf of rhs, but don't bother recursing into lhs if rhs
+ is neither a comparison nor a compound expr, nor into rhs if
+ the lhs leaf isn't a comparison. In case of no successful
+ merging, recursion depth is limited to the sum of the depths
+ of lhs and rhs, and the non-recursing code below will run no
+ more times than the product of the leaf counts of lhs and
+ rhs. If there is a successful merge, we (recursively)
+ further attempt to fold the result, so recursion depth and
+ merge attempts are harder to compute. */
+ if (TREE_CODE (lhs) == code && TREE_TYPE (lhs) == truth_type
+ && (TREE_CODE_CLASS (rcode) == tcc_comparison
+ || (TREE_CODE (rhs) == code && TREE_TYPE (rhs) == truth_type)))
+ {
+ if ((result = fold_truth_andor_1 (loc, code, truth_type,
+ TREE_OPERAND (lhs, 1), rhs)) != 0)
+ return fold_build2_loc (loc, code, truth_type,
+ TREE_OPERAND (lhs, 0), result);
+ if ((result = fold_truth_andor_1 (loc, code, truth_type,
+ TREE_OPERAND (lhs, 0), rhs)) != 0)
+ return fold_build2_loc (loc, code, truth_type,
+ result, TREE_OPERAND (lhs, 1));
+ }
+ else if (TREE_CODE_CLASS (lcode) == tcc_comparison
+ && TREE_CODE (rhs) == code && TREE_TYPE (rhs) == truth_type)
+ {
+ if ((result = fold_truth_andor_1 (loc, code, truth_type, lhs,
+ TREE_OPERAND (rhs, 0))) != 0)
+ return fold_build2_loc (loc, code, truth_type,
+ result, TREE_OPERAND (rhs, 1));
+ if ((result = fold_truth_andor_1 (loc, code, truth_type, lhs,
+ TREE_OPERAND (rhs, 1))) != 0)
+ return fold_build2_loc (loc, code, truth_type,
+ result, TREE_OPERAND (rhs, 0));
+ }
+
+ return 0;
+ }
ll_arg = TREE_OPERAND (lhs, 0);
lr_arg = TREE_OPERAND (lhs, 1);
@@ -6357,10 +6422,48 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
if (!get_best_mode (end_bit - first_bit, first_bit, 0, 0,
TYPE_ALIGN (TREE_TYPE (ll_inner)), BITS_PER_WORD,
volatilep, &lnmode))
- return 0;
+ {
+ /* Consider the possibility of recombining loads if any of the
+ fields straddles across an alignment boundary, so that either
+ part can be loaded along with the other field. */
+ HOST_WIDE_INT align = TYPE_ALIGN (TREE_TYPE (ll_inner));
+ HOST_WIDE_INT amask = ~(align - 1);
+
+ HOST_WIDE_INT boundary = (end_bit - 1) & amask;
+ /* Make sure we're only crossing one alignment boundary.
+
+ ??? This won't recombine loads of two adjacent fields that
+ each crosses a different alignment boundary, so as to load
+ the middle word only once. */
+ if (boundary - first_bit > align)
+ return 0;
+
+ HOST_WIDE_INT ll_start_word = ll_bitpos & amask;
+ HOST_WIDE_INT ll_end_word = (ll_bitpos + ll_bitsize - 1) & amask;
+
+ HOST_WIDE_INT rl_start_word = rl_bitpos & amask;
+ HOST_WIDE_INT rl_end_word = (rl_bitpos + rl_bitsize - 1) & amask;
+
+ /* If neither field straddles across an alignment boundary,
+ we've nothing further ado. */
+ if (ll_start_word == ll_end_word && rl_start_word == rl_end_word)
+ return 0;
+
+ if (!get_best_mode (boundary - first_bit, first_bit, 0, 0,
+ align, BITS_PER_WORD, volatilep, &lnmode)
+ || !get_best_mode (end_bit - boundary, boundary, 0, 0,
+ align, BITS_PER_WORD, volatilep, &lnmode2))
+ return 0;
+
+ resplit_load = true;
+ }
+ else
+ resplit_load = false;
lnbitsize = GET_MODE_BITSIZE (lnmode);
lnbitpos = first_bit & ~ (lnbitsize - 1);
+ if (resplit_load)
+ lnbitsize += GET_MODE_BITSIZE (lnmode2);
lntype = lang_hooks.types.type_for_size (lnbitsize, 1);
xll_bitpos = ll_bitpos - lnbitpos, xrl_bitpos = rl_bitpos - lnbitpos;
@@ -6414,7 +6517,8 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
|| ll_reversep != lr_reversep
/* Make sure the two fields on the right
correspond to the left without being swapped. */
- || ll_bitpos - rl_bitpos != lr_bitpos - rr_bitpos)
+ || ll_bitpos - rl_bitpos != lr_bitpos - rr_bitpos
+ || resplit_load)
return 0;
first_bit = MIN (lr_bitpos, rr_bitpos);
@@ -6555,20 +6659,88 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
if (lnbitpos < 0)
return 0;
- /* Construct the expression we will return. First get the component
- reference we will make. Unless the mask is all ones the width of
- that field, perform the mask operation. Then compare with the
- merged constant. */
- result = make_bit_field_ref (loc, ll_inner, ll_arg,
- lntype, lnbitsize, lnbitpos,
- ll_unsignedp || rl_unsignedp, ll_reversep);
+ if (resplit_load)
+ {
+ HOST_WIDE_INT align = TYPE_ALIGN (TREE_TYPE (ll_inner));
+ HOST_WIDE_INT lnlbitsize = GET_MODE_BITSIZE (lnmode);
+ HOST_WIDE_INT lnrbitsize = GET_MODE_BITSIZE (lnmode2);
+ HOST_WIDE_INT midpoint = lnlbitsize;
+ HOST_WIDE_INT lnrbitpos = lnbitpos + midpoint;
+ HOST_WIDE_INT lnlbitpos = lnrbitpos - lnlbitsize;
+ gcc_checking_assert ((lnrbitpos & (align - 1)) == 0);
+ tree lnltype = lang_hooks.types.type_for_size (lnlbitsize, 1);
+ tree lnrtype = lang_hooks.types.type_for_size (lnrbitsize, 1);
+ tree lll_arg = make_bit_field_ref (loc, ll_inner, ll_arg,
+ lnltype, lnlbitsize, lnlbitpos,
+ ll_unsignedp || rl_unsignedp,
+ ll_reversep);
+ tree llr_arg = make_bit_field_ref (loc, ll_inner, ll_arg,
+ lnrtype, lnrbitsize, lnrbitpos,
+ ll_unsignedp || rl_unsignedp,
+ ll_reversep);
+
+ HOST_WIDE_INT lnlcbitpos;
+ HOST_WIDE_INT lnrcbitpos;
+ /* The bit field ref constant folding below already takes care
+ of default target endianness. */
+ if (!ll_reversep)
+ {
+ lnlcbitpos = 0;
+ lnrcbitpos = lnlbitsize;
+ }
+ else
+ {
+ lnlcbitpos = lnrbitsize;
+ lnrcbitpos = 0;
+ }
+
+ tree mask = const_binop (BIT_IOR_EXPR, ll_mask, rl_mask);
+ tree lll_mask = fold_build3 (BIT_FIELD_REF, lnltype, mask,
+ bitsize_int (lnlbitsize),
+ bitsize_int (lnlcbitpos));
+ if (! integer_all_onesp (lll_mask))
+ lll_arg = fold_build2_loc (loc, BIT_AND_EXPR, lnltype,
+ lll_arg, lll_mask);
+ tree llr_mask = fold_build3 (BIT_FIELD_REF, lnrtype, mask,
+ bitsize_int (lnrbitsize),
+ bitsize_int (lnrcbitpos));
+ if (! integer_all_onesp (llr_mask))
+ llr_arg = fold_build2_loc (loc, BIT_AND_EXPR, lnrtype,
+ llr_arg, llr_mask);
+ tree lnr_const = const_binop (BIT_IOR_EXPR, l_const, r_const);
+ tree lll_const = fold_build3 (BIT_FIELD_REF, lnltype, lnr_const,
+ bitsize_int (lnlbitsize),
+ bitsize_int (lnlcbitpos));
+ tree lll_result = fold_build2_loc (loc, wanted_code, truth_type,
+ lll_arg, lll_const);
+ tree llr_const = fold_build3 (BIT_FIELD_REF, lnrtype, lnr_const,
+ bitsize_int (lnrbitsize),
+ bitsize_int (lnrcbitpos));
+ tree llr_result = fold_build2_loc (loc, wanted_code, truth_type,
+ llr_arg, llr_const);
+ result = build2_loc (loc, orig_code, truth_type,
+ lll_result, llr_result);
+ }
+ else
+ {
+
+ /* Construct the expression we will return. First get the component
+ reference we will make. Unless the mask is all ones the width of
+ that field, perform the mask operation. Then compare with the
+ merged constant. */
+ result = make_bit_field_ref (loc, ll_inner, ll_arg,
+ lntype, lnbitsize, lnbitpos,
+ ll_unsignedp || rl_unsignedp, ll_reversep);
- ll_mask = const_binop (BIT_IOR_EXPR, ll_mask, rl_mask);
- if (! all_ones_mask_p (ll_mask, lnbitsize))
- result = build2_loc (loc, BIT_AND_EXPR, lntype, result, ll_mask);
+ ll_mask = const_binop (BIT_IOR_EXPR, ll_mask, rl_mask);
+ if (! integer_all_onesp (ll_mask))
+ result = build2_loc (loc, BIT_AND_EXPR, lntype, result, ll_mask);
+
+ result = build2_loc (loc, wanted_code, truth_type, result,
+ const_binop (BIT_IOR_EXPR, l_const, r_const));
+ }
- return build2_loc (loc, wanted_code, truth_type, result,
- const_binop (BIT_IOR_EXPR, l_const, r_const));
+ return result;
}
\f
/* T is an integer expression that is being multiplied, divided, or taken a
@@ -9166,14 +9338,6 @@ fold_truth_andor (location_t loc, enum tree_code code, tree type,
return fold_build2_loc (loc, code, type, arg0, tem);
}
- /* Check for the possibility of merging component references. If our
- lhs is another similar operation, try to merge its rhs with our
- rhs. Then try to merge our lhs and rhs. */
- if (TREE_CODE (arg0) == code
- && (tem = fold_truth_andor_1 (loc, code, type,
- TREE_OPERAND (arg0, 1), arg1)) != 0)
- return fold_build2_loc (loc, code, type, TREE_OPERAND (arg0, 0), tem);
-
if ((tem = fold_truth_andor_1 (loc, code, type, arg0, arg1)) != 0)
return tem;
diff --git a/gcc/testsuite/gcc.dg/field-merge-1.c b/gcc/testsuite/gcc.dg/field-merge-1.c
new file mode 100644
index 00000000000..491ec2936f1
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/field-merge-1.c
@@ -0,0 +1,48 @@
+/* { dg-do run } */
+/* { dg-options "-O -save-temps" } */
+
+/* Check that field loads compared with constants are merged, even if
+ tested out of order, and when fields straddle across alignment
+ boundaries. */
+
+struct TL {
+ unsigned char p;
+ unsigned int a;
+ unsigned char q;
+ unsigned int b;
+ unsigned char r;
+ unsigned int c;
+ unsigned char s;
+} __attribute__ ((packed, aligned (4), scalar_storage_order ("little-endian")));
+
+struct TB {
+ unsigned char p;
+ unsigned int a;
+ unsigned char q;
+ unsigned int b;
+ unsigned char r;
+ unsigned int c;
+ unsigned char s;
+} __attribute__ ((packed, aligned (4), scalar_storage_order ("big-endian")));
+
+#define vc 0xaa
+#define vi 0x12345678
+
+struct TL vL = { vc, vi, vc, vi, vc, vi, vc };
+struct TB vB = { vc, vi, vc, vi, vc, vi, vc };
+
+void f (void) {
+ if (vL.a != vi || vL.b != vi || vL.c != vi
+ || vB.a != vi || vB.b != vi || vB.c != vi
+ || vL.p != vc || vL.q != vc || vL.r != vc || vL.s != vc
+ || vB.p != vc || vB.q != vc || vB.r != vc || vB.s != vc)
+ __builtin_abort ();
+}
+
+int main () {
+ f ();
+ return 0;
+}
+
+/* { dg-final { scan-assembler-not "cmpb" { target { i*86-*-* || x86_64-*-* } } } } */
+/* { dg-final { scan-assembler-times "cmpl" 8 { target { i*86-*-* || x86_64-*-* } } } } */
^ permalink raw reply [flat|nested] 8+ messages in thread
* [gcc(refs/users/aoliva/heads/testme)] improvements for fold_truth_andor_1
@ 2020-09-16 21:38 Alexandre Oliva
0 siblings, 0 replies; 8+ messages in thread
From: Alexandre Oliva @ 2020-09-16 21:38 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:012db34a07d657fa9da81c116736396b2bc96b74
commit 012db34a07d657fa9da81c116736396b2bc96b74
Author: Alexandre Oliva <oliva@adacore.com>
Date: Tue Sep 15 13:32:07 2020 -0300
improvements for fold_truth_andor_1
Enable combining with the rightmost non-and/or, however deeply-nested.
Handle shift-and-mask.
Handle fields that cross alignment boundaries, when either part can be
combined.
Diff:
---
gcc/config/rs6000/t-rs6000 | 4 +
gcc/fold-const.c | 203 +++++++++++++++++++++++++++++++++++++++------
2 files changed, 182 insertions(+), 25 deletions(-)
diff --git a/gcc/config/rs6000/t-rs6000 b/gcc/config/rs6000/t-rs6000
index 1ddb5729cb2..516486df9a4 100644
--- a/gcc/config/rs6000/t-rs6000
+++ b/gcc/config/rs6000/t-rs6000
@@ -52,6 +52,10 @@ $(srcdir)/config/rs6000/rs6000-tables.opt: $(srcdir)/config/rs6000/genopt.sh \
$(SHELL) $(srcdir)/config/rs6000/genopt.sh $(srcdir)/config/rs6000 > \
$(srcdir)/config/rs6000/rs6000-tables.opt
+# FRAME_GROWS_DOWNWARD tests flag_sanitize in a way that rules out a
+# test in toplev.c.
+toplev.o-warn = -Wno-error
+
# The rs6000 backend doesn't cause warnings in these files.
insn-conditions.o-warn =
diff --git a/gcc/fold-const.c b/gcc/fold-const.c
index 0cc80adf632..e91e7778999 100644
--- a/gcc/fold-const.c
+++ b/gcc/fold-const.c
@@ -4640,6 +4640,7 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
tree mask, inner, offset;
tree unsigned_type;
unsigned int precision;
+ HOST_WIDE_INT shiftrt = 0;
/* All the optimizations using this function assume integer fields.
There are problems with FP fields since the type_for_size call
@@ -4664,13 +4665,28 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
return NULL_TREE;
}
+ if (TREE_CODE (exp) == RSHIFT_EXPR
+ && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST
+ && tree_fits_shwi_p (TREE_OPERAND (exp, 1)))
+ {
+ shiftrt = tree_to_shwi (TREE_OPERAND (exp, 1));
+ if (shiftrt > 0)
+ exp = TREE_OPERAND (exp, 0);
+ else
+ shiftrt = 0;
+ }
+
+ if (TREE_CODE (exp) == NOP_EXPR)
+ exp = TREE_OPERAND (exp, 0);
+
poly_int64 poly_bitsize, poly_bitpos;
inner = get_inner_reference (exp, &poly_bitsize, &poly_bitpos, &offset,
pmode, punsignedp, preversep, pvolatilep);
+
if ((inner == exp && and_mask == 0)
|| !poly_bitsize.is_constant (pbitsize)
|| !poly_bitpos.is_constant (pbitpos)
- || *pbitsize < 0
+ || *pbitsize < shiftrt
|| offset != 0
|| TREE_CODE (inner) == PLACEHOLDER_EXPR
/* Reject out-of-bound accesses (PR79731). */
@@ -4679,6 +4695,12 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
*pbitpos + *pbitsize) < 0))
return NULL_TREE;
+ if (shiftrt)
+ {
+ *pbitpos += shiftrt;
+ *pbitsize -= shiftrt;
+ }
+
unsigned_type = lang_hooks.types.type_for_size (*pbitsize, 1);
if (unsigned_type == NULL_TREE)
return NULL_TREE;
@@ -6157,6 +6179,7 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
convert EQ_EXPR to NE_EXPR so we need not reject the "wrong"
comparison for one-bit fields. */
+ enum tree_code orig_code = code;
enum tree_code wanted_code;
enum tree_code lcode, rcode;
tree ll_arg, lr_arg, rl_arg, rr_arg;
@@ -6168,13 +6191,14 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
int ll_unsignedp, lr_unsignedp, rl_unsignedp, rr_unsignedp;
int ll_reversep, lr_reversep, rl_reversep, rr_reversep;
machine_mode ll_mode, lr_mode, rl_mode, rr_mode;
- scalar_int_mode lnmode, rnmode;
+ scalar_int_mode lnmode, lnmode2, rnmode;
tree ll_mask, lr_mask, rl_mask, rr_mask;
tree ll_and_mask, lr_and_mask, rl_and_mask, rr_and_mask;
tree l_const, r_const;
tree lntype, rntype, result;
HOST_WIDE_INT first_bit, end_bit;
int volatilep;
+ bool resplit_load;
/* Start by getting the comparison codes. Fail if anything is volatile.
If one operand is a BIT_AND_EXPR with the constant one, treat it as if
@@ -6202,7 +6226,48 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
if (TREE_CODE_CLASS (lcode) != tcc_comparison
|| TREE_CODE_CLASS (rcode) != tcc_comparison)
- return 0;
+ {
+ /* Check for the possibility of merging component references.
+ If any of our operands is another similar operation, recurse
+ to try to merge individual operands, but avoiding double
+ recursion: recurse to each leaf of lhs, and from there to
+ each leaf of rhs, but don't bother recursing into lhs if rhs
+ is neither a comparison nor a compound expr, nor into rhs if
+ the lhs leaf isn't a comparison. In case of no successful
+ merging, recursion depth is limited to the sum of the depths
+ of lhs and rhs, and the non-recursing code below will run no
+ more times than the product of the leaf counts of lhs and
+ rhs. If there is a successful merge, we (recursively)
+ further attempt to fold the result, so recursion depth and
+ merge attempts are harder to compute. */
+ if (TREE_CODE (lhs) == code && TREE_TYPE (lhs) == truth_type
+ && (TREE_CODE_CLASS (rcode) == tcc_comparison
+ || (TREE_CODE (rhs) == code && TREE_TYPE (rhs) == truth_type)))
+ {
+ if ((result = fold_truth_andor_1 (loc, code, truth_type,
+ TREE_OPERAND (lhs, 1), rhs)) != 0)
+ return fold_build2_loc (loc, code, truth_type,
+ TREE_OPERAND (lhs, 0), result);
+ if ((result = fold_truth_andor_1 (loc, code, truth_type,
+ TREE_OPERAND (lhs, 0), rhs)) != 0)
+ return fold_build2_loc (loc, code, truth_type,
+ result, TREE_OPERAND (lhs, 1));
+ }
+ else if (TREE_CODE_CLASS (lcode) == tcc_comparison
+ && TREE_CODE (rhs) == code && TREE_TYPE (rhs) == truth_type)
+ {
+ if ((result = fold_truth_andor_1 (loc, code, truth_type, lhs,
+ TREE_OPERAND (rhs, 0))) != 0)
+ return fold_build2_loc (loc, code, truth_type,
+ result, TREE_OPERAND (rhs, 1));
+ if ((result = fold_truth_andor_1 (loc, code, truth_type, lhs,
+ TREE_OPERAND (rhs, 1))) != 0)
+ return fold_build2_loc (loc, code, truth_type,
+ result, TREE_OPERAND (rhs, 0));
+ }
+
+ return 0;
+ }
ll_arg = TREE_OPERAND (lhs, 0);
lr_arg = TREE_OPERAND (lhs, 1);
@@ -6357,10 +6422,48 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
if (!get_best_mode (end_bit - first_bit, first_bit, 0, 0,
TYPE_ALIGN (TREE_TYPE (ll_inner)), BITS_PER_WORD,
volatilep, &lnmode))
- return 0;
+ {
+ /* Consider the possibility of recombining loads if any of the
+ fields straddles across an alignment boundary, so that either
+ part can be loaded along with the other field. */
+ HOST_WIDE_INT align = TYPE_ALIGN (TREE_TYPE (ll_inner));
+ HOST_WIDE_INT amask = ~(align - 1);
+
+ HOST_WIDE_INT boundary = (end_bit - 1) & amask;
+ /* Make sure we're only crossing one alignment boundary.
+
+ ??? This won't recombine loads of two adjacent fields that
+ each crosses a different alignment boundary, so as to load
+ the middle word only once. */
+ if (boundary - first_bit > align)
+ return 0;
+
+ HOST_WIDE_INT ll_start_word = ll_bitpos & amask;
+ HOST_WIDE_INT ll_end_word = (ll_bitpos + ll_bitsize - 1) & amask;
+
+ HOST_WIDE_INT rl_start_word = rl_bitpos & amask;
+ HOST_WIDE_INT rl_end_word = (rl_bitpos + rl_bitsize - 1) & amask;
+
+ /* If neither field straddles across an alignment boundary,
+ we've nothing further ado. */
+ if (ll_start_word == ll_end_word && rl_start_word == rl_end_word)
+ return 0;
+
+ if (!get_best_mode (boundary - first_bit, first_bit, 0, 0,
+ align, BITS_PER_WORD, volatilep, &lnmode)
+ || !get_best_mode (end_bit - boundary, boundary, 0, 0,
+ align, BITS_PER_WORD, volatilep, &lnmode2))
+ return 0;
+
+ resplit_load = true;
+ }
+ else
+ resplit_load = false;
lnbitsize = GET_MODE_BITSIZE (lnmode);
lnbitpos = first_bit & ~ (lnbitsize - 1);
+ if (resplit_load)
+ lnbitsize += GET_MODE_BITSIZE (lnmode2);
lntype = lang_hooks.types.type_for_size (lnbitsize, 1);
xll_bitpos = ll_bitpos - lnbitpos, xrl_bitpos = rl_bitpos - lnbitpos;
@@ -6414,7 +6517,8 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
|| ll_reversep != lr_reversep
/* Make sure the two fields on the right
correspond to the left without being swapped. */
- || ll_bitpos - rl_bitpos != lr_bitpos - rr_bitpos)
+ || ll_bitpos - rl_bitpos != lr_bitpos - rr_bitpos
+ || resplit_load)
return 0;
first_bit = MIN (lr_bitpos, rr_bitpos);
@@ -6555,20 +6659,77 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
if (lnbitpos < 0)
return 0;
- /* Construct the expression we will return. First get the component
- reference we will make. Unless the mask is all ones the width of
- that field, perform the mask operation. Then compare with the
- merged constant. */
- result = make_bit_field_ref (loc, ll_inner, ll_arg,
- lntype, lnbitsize, lnbitpos,
- ll_unsignedp || rl_unsignedp, ll_reversep);
+ if (resplit_load)
+ {
+ HOST_WIDE_INT align = TYPE_ALIGN (TREE_TYPE (ll_inner));
+ HOST_WIDE_INT lnlbitsize = GET_MODE_BITSIZE (lnmode);
+ HOST_WIDE_INT lnrbitsize = GET_MODE_BITSIZE (lnmode2);
+ HOST_WIDE_INT midpoint = lnlbitsize;
+ HOST_WIDE_INT lnrbitpos = lnbitpos + midpoint;
+ HOST_WIDE_INT lnlbitpos = lnrbitpos - lnlbitsize;
+ gcc_checking_assert ((lnrbitpos & (align - 1)) == 0);
+ tree lnltype = lang_hooks.types.type_for_size (lnlbitsize, 1);
+ tree lnrtype = lang_hooks.types.type_for_size (lnrbitsize, 1);
+ tree lll_arg = make_bit_field_ref (loc, ll_inner, ll_arg,
+ lnltype, lnlbitsize, lnlbitpos,
+ ll_unsignedp || rl_unsignedp,
+ ll_reversep);
+ tree llr_arg = make_bit_field_ref (loc, ll_inner, ll_arg,
+ lnrtype, lnrbitsize, lnrbitpos,
+ ll_unsignedp || rl_unsignedp,
+ ll_reversep);
+
+ /* ??? Handle ll_reversep and endianness below? */
+ HOST_WIDE_INT lnlcbitpos = 0;
+ HOST_WIDE_INT lnrcbitpos = lnlbitsize;
+
+ tree mask = const_binop (BIT_IOR_EXPR, ll_mask, rl_mask);
+ tree lll_mask = fold_build3 (BIT_FIELD_REF, lnltype, mask,
+ bitsize_int (lnlbitsize),
+ bitsize_int (lnlcbitpos));
+ if (! integer_all_onesp (lll_mask))
+ lll_arg = fold_build2_loc (loc, BIT_AND_EXPR, lnltype,
+ lll_arg, lll_mask);
+ tree llr_mask = fold_build3 (BIT_FIELD_REF, lnrtype, mask,
+ bitsize_int (lnrbitsize),
+ bitsize_int (lnrcbitpos));
+ if (! integer_all_onesp (llr_mask))
+ llr_arg = fold_build2_loc (loc, BIT_AND_EXPR, lnrtype,
+ llr_arg, llr_mask);
+ tree lnr_const = const_binop (BIT_IOR_EXPR, l_const, r_const);
+ tree lll_const = fold_build3 (BIT_FIELD_REF, lnltype, lnr_const,
+ bitsize_int (lnlbitsize),
+ bitsize_int (lnlcbitpos));
+ tree lll_result = fold_build2_loc (loc, wanted_code, truth_type,
+ lll_arg, lll_const);
+ tree llr_const = fold_build3 (BIT_FIELD_REF, lnrtype, lnr_const,
+ bitsize_int (lnrbitsize),
+ bitsize_int (lnrcbitpos));
+ tree llr_result = fold_build2_loc (loc, wanted_code, truth_type,
+ llr_arg, llr_const);
+ result = build2_loc (loc, orig_code, truth_type,
+ lll_result, llr_result);
+ }
+ else
+ {
- ll_mask = const_binop (BIT_IOR_EXPR, ll_mask, rl_mask);
- if (! all_ones_mask_p (ll_mask, lnbitsize))
- result = build2_loc (loc, BIT_AND_EXPR, lntype, result, ll_mask);
+ /* Construct the expression we will return. First get the component
+ reference we will make. Unless the mask is all ones the width of
+ that field, perform the mask operation. Then compare with the
+ merged constant. */
+ result = make_bit_field_ref (loc, ll_inner, ll_arg,
+ lntype, lnbitsize, lnbitpos,
+ ll_unsignedp || rl_unsignedp, ll_reversep);
+
+ ll_mask = const_binop (BIT_IOR_EXPR, ll_mask, rl_mask);
+ if (! integer_all_onesp (ll_mask))
+ result = build2_loc (loc, BIT_AND_EXPR, lntype, result, ll_mask);
- return build2_loc (loc, wanted_code, truth_type, result,
- const_binop (BIT_IOR_EXPR, l_const, r_const));
+ result = build2_loc (loc, wanted_code, truth_type, result,
+ const_binop (BIT_IOR_EXPR, l_const, r_const));
+ }
+
+ return result;
}
\f
/* T is an integer expression that is being multiplied, divided, or taken a
@@ -9166,14 +9327,6 @@ fold_truth_andor (location_t loc, enum tree_code code, tree type,
return fold_build2_loc (loc, code, type, arg0, tem);
}
- /* Check for the possibility of merging component references. If our
- lhs is another similar operation, try to merge its rhs with our
- rhs. Then try to merge our lhs and rhs. */
- if (TREE_CODE (arg0) == code
- && (tem = fold_truth_andor_1 (loc, code, type,
- TREE_OPERAND (arg0, 1), arg1)) != 0)
- return fold_build2_loc (loc, code, type, TREE_OPERAND (arg0, 0), tem);
-
if ((tem = fold_truth_andor_1 (loc, code, type, arg0, arg1)) != 0)
return tem;
^ permalink raw reply [flat|nested] 8+ messages in thread
* [gcc(refs/users/aoliva/heads/testme)] improvements for fold_truth_andor_1
@ 2020-09-15 15:55 Alexandre Oliva
0 siblings, 0 replies; 8+ messages in thread
From: Alexandre Oliva @ 2020-09-15 15:55 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:54381337e28e63a33c29b1352f061b5d51842d18
commit 54381337e28e63a33c29b1352f061b5d51842d18
Author: Alexandre Oliva <oliva@adacore.com>
Date: Fri Sep 11 16:44:52 2020 -0300
improvements for fold_truth_andor_1
Enable combining with the rightmost non-and/or, however deeply-nested.
Handle shift-and-mask.
Handle fields that cross alignment boundaries, when either part can be
combined.
Diff:
---
gcc/fold-const.c | 211 ++++++++++++++++++++++++++++++++++++++++++++++++-------
1 file changed, 186 insertions(+), 25 deletions(-)
diff --git a/gcc/fold-const.c b/gcc/fold-const.c
index 1f861630225..8a43e959a8e 100644
--- a/gcc/fold-const.c
+++ b/gcc/fold-const.c
@@ -4640,6 +4640,7 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
tree mask, inner, offset;
tree unsigned_type;
unsigned int precision;
+ HOST_WIDE_INT shiftrt = 0;
/* All the optimizations using this function assume integer fields.
There are problems with FP fields since the type_for_size call
@@ -4664,13 +4665,28 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
return NULL_TREE;
}
+ if (TREE_CODE (exp) == RSHIFT_EXPR
+ && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST
+ && tree_fits_shwi_p (TREE_OPERAND (exp, 1)))
+ {
+ shiftrt = tree_to_shwi (TREE_OPERAND (exp, 1));
+ if (shiftrt > 0)
+ exp = TREE_OPERAND (exp, 0);
+ else
+ shiftrt = 0;
+ }
+
+ if (TREE_CODE (exp) == NOP_EXPR)
+ exp = TREE_OPERAND (exp, 0);
+
poly_int64 poly_bitsize, poly_bitpos;
inner = get_inner_reference (exp, &poly_bitsize, &poly_bitpos, &offset,
pmode, punsignedp, preversep, pvolatilep);
+
if ((inner == exp && and_mask == 0)
|| !poly_bitsize.is_constant (pbitsize)
|| !poly_bitpos.is_constant (pbitpos)
- || *pbitsize < 0
+ || *pbitsize < shiftrt
|| offset != 0
|| TREE_CODE (inner) == PLACEHOLDER_EXPR
/* Reject out-of-bound accesses (PR79731). */
@@ -4679,6 +4695,12 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
*pbitpos + *pbitsize) < 0))
return NULL_TREE;
+ if (shiftrt)
+ {
+ *pbitpos += shiftrt;
+ *pbitsize -= shiftrt;
+ }
+
unsigned_type = lang_hooks.types.type_for_size (*pbitsize, 1);
if (unsigned_type == NULL_TREE)
return NULL_TREE;
@@ -6157,6 +6179,7 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
convert EQ_EXPR to NE_EXPR so we need not reject the "wrong"
comparison for one-bit fields. */
+ enum tree_code orig_code = code;
enum tree_code wanted_code;
enum tree_code lcode, rcode;
tree ll_arg, lr_arg, rl_arg, rr_arg;
@@ -6168,13 +6191,14 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
int ll_unsignedp, lr_unsignedp, rl_unsignedp, rr_unsignedp;
int ll_reversep, lr_reversep, rl_reversep, rr_reversep;
machine_mode ll_mode, lr_mode, rl_mode, rr_mode;
- scalar_int_mode lnmode, rnmode;
+ scalar_int_mode lnmode, lnmode2, rnmode;
tree ll_mask, lr_mask, rl_mask, rr_mask;
tree ll_and_mask, lr_and_mask, rl_and_mask, rr_and_mask;
tree l_const, r_const;
tree lntype, rntype, result;
HOST_WIDE_INT first_bit, end_bit;
int volatilep;
+ bool resplit_load;
/* Start by getting the comparison codes. Fail if anything is volatile.
If one operand is a BIT_AND_EXPR with the constant one, treat it as if
@@ -6183,6 +6207,10 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
if (TREE_SIDE_EFFECTS (lhs) || TREE_SIDE_EFFECTS (rhs))
return 0;
+#if 0
+ inform (loc, "trying to merge %qT and %qT", lhs, rhs);
+#endif
+
lcode = TREE_CODE (lhs);
rcode = TREE_CODE (rhs);
@@ -6202,7 +6230,48 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
if (TREE_CODE_CLASS (lcode) != tcc_comparison
|| TREE_CODE_CLASS (rcode) != tcc_comparison)
- return 0;
+ {
+ /* Check for the possibility of merging component references.
+ If any of our operands is another similar operation, recurse
+ to try to merge individual operands, but avoiding double
+ recursion: recurse to each leaf of lhs, and from there to
+ each leaf of rhs, but don't bother recursing into lhs if rhs
+ is neither a comparison nor a compound expr, nor into rhs if
+ the lhs leaf isn't a comparison. In case of no successful
+ merging, recursion depth is limited to the sum of the depths
+ of lhs and rhs, and the non-recursing code below will run no
+ more times than the product of the leaf counts of lhs and
+ rhs. If there is a successful merge, we (recursively)
+ further attempt to fold the result, so recursion depth and
+ merge attempts are harder to compute. */
+ if (TREE_CODE (lhs) == code && TREE_TYPE (lhs) == truth_type
+ && (TREE_CODE_CLASS (rcode) == tcc_comparison
+ || (TREE_CODE (rhs) == code && TREE_TYPE (rhs) == truth_type)))
+ {
+ if ((result = fold_truth_andor_1 (loc, code, truth_type,
+ TREE_OPERAND (lhs, 1), rhs)) != 0)
+ return fold_build2_loc (loc, code, truth_type,
+ TREE_OPERAND (lhs, 0), result);
+ if ((result = fold_truth_andor_1 (loc, code, truth_type,
+ TREE_OPERAND (lhs, 0), rhs)) != 0)
+ return fold_build2_loc (loc, code, truth_type,
+ result, TREE_OPERAND (lhs, 1));
+ }
+ else if (TREE_CODE_CLASS (lcode) == tcc_comparison
+ && TREE_CODE (rhs) == code && TREE_TYPE (rhs) == truth_type)
+ {
+ if ((result = fold_truth_andor_1 (loc, code, truth_type, lhs,
+ TREE_OPERAND (rhs, 0))) != 0)
+ return fold_build2_loc (loc, code, truth_type,
+ result, TREE_OPERAND (rhs, 1));
+ if ((result = fold_truth_andor_1 (loc, code, truth_type, lhs,
+ TREE_OPERAND (rhs, 1))) != 0)
+ return fold_build2_loc (loc, code, truth_type,
+ result, TREE_OPERAND (rhs, 0));
+ }
+
+ return 0;
+ }
ll_arg = TREE_OPERAND (lhs, 0);
lr_arg = TREE_OPERAND (lhs, 1);
@@ -6357,10 +6426,48 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
if (!get_best_mode (end_bit - first_bit, first_bit, 0, 0,
TYPE_ALIGN (TREE_TYPE (ll_inner)), BITS_PER_WORD,
volatilep, &lnmode))
- return 0;
+ {
+ /* Consider the possibility of recombining loads if any of the
+ fields straddles across an alignment boundary, so that either
+ part can be loaded along with the other field. */
+ HOST_WIDE_INT align = TYPE_ALIGN (TREE_TYPE (ll_inner));
+ HOST_WIDE_INT amask = ~(align - 1);
+
+ HOST_WIDE_INT boundary = (end_bit - 1) & amask;
+ /* Make sure we're only crossing one alignment boundary.
+
+ ??? This won't recombine loads of two adjacent fields that
+ each crosses a different alignment boundary, so as to load
+ the middle word only once. */
+ if (boundary - first_bit > align)
+ return 0;
+
+ HOST_WIDE_INT ll_start_word = ll_bitpos & amask;
+ HOST_WIDE_INT ll_end_word = (ll_bitpos + ll_bitsize - 1) & amask;
+
+ HOST_WIDE_INT rl_start_word = rl_bitpos & amask;
+ HOST_WIDE_INT rl_end_word = (rl_bitpos + rl_bitsize - 1) & amask;
+
+ /* If neither field straddles across an alignment boundary,
+ we've nothing further ado. */
+ if (ll_start_word == ll_end_word && rl_start_word == rl_end_word)
+ return 0;
+
+ if (!get_best_mode (boundary - first_bit, first_bit, 0, 0,
+ align, BITS_PER_WORD, volatilep, &lnmode)
+ || !get_best_mode (end_bit - boundary, boundary, 0, 0,
+ align, BITS_PER_WORD, volatilep, &lnmode2))
+ return 0;
+
+ resplit_load = true;
+ }
+ else
+ resplit_load = false;
lnbitsize = GET_MODE_BITSIZE (lnmode);
lnbitpos = first_bit & ~ (lnbitsize - 1);
+ if (resplit_load)
+ lnbitsize += GET_MODE_BITSIZE (lnmode2);
lntype = lang_hooks.types.type_for_size (lnbitsize, 1);
xll_bitpos = ll_bitpos - lnbitpos, xrl_bitpos = rl_bitpos - lnbitpos;
@@ -6414,7 +6521,8 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
|| ll_reversep != lr_reversep
/* Make sure the two fields on the right
correspond to the left without being swapped. */
- || ll_bitpos - rl_bitpos != lr_bitpos - rr_bitpos)
+ || ll_bitpos - rl_bitpos != lr_bitpos - rr_bitpos
+ || resplit_load)
return 0;
first_bit = MIN (lr_bitpos, rr_bitpos);
@@ -6555,20 +6663,81 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
if (lnbitpos < 0)
return 0;
- /* Construct the expression we will return. First get the component
- reference we will make. Unless the mask is all ones the width of
- that field, perform the mask operation. Then compare with the
- merged constant. */
- result = make_bit_field_ref (loc, ll_inner, ll_arg,
- lntype, lnbitsize, lnbitpos,
- ll_unsignedp || rl_unsignedp, ll_reversep);
+ if (resplit_load)
+ {
+ HOST_WIDE_INT align = TYPE_ALIGN (TREE_TYPE (ll_inner));
+ HOST_WIDE_INT lnlbitsize = GET_MODE_BITSIZE (lnmode);
+ HOST_WIDE_INT lnrbitsize = GET_MODE_BITSIZE (lnmode2);
+ HOST_WIDE_INT midpoint = lnlbitsize;
+ HOST_WIDE_INT lnrbitpos = lnbitpos + midpoint;
+ HOST_WIDE_INT lnlbitpos = lnrbitpos - lnlbitsize;
+ gcc_checking_assert ((lnrbitpos & (align - 1)) == 0);
+ tree lnltype = lang_hooks.types.type_for_size (lnlbitsize, 1);
+ tree lnrtype = lang_hooks.types.type_for_size (lnrbitsize, 1);
+ tree lll_arg = make_bit_field_ref (loc, ll_inner, ll_arg,
+ lnltype, lnlbitsize, lnlbitpos,
+ ll_unsignedp || rl_unsignedp,
+ ll_reversep);
+ tree llr_arg = make_bit_field_ref (loc, ll_inner, ll_arg,
+ lnrtype, lnrbitsize, lnrbitpos,
+ ll_unsignedp || rl_unsignedp,
+ ll_reversep);
+
+ /* ??? Handle ll_reversep and endianness below? */
+ HOST_WIDE_INT lnlcbitpos = 0;
+ HOST_WIDE_INT lnrcbitpos = lnlbitsize;
+
+ tree mask = const_binop (BIT_IOR_EXPR, ll_mask, rl_mask);
+ tree lll_mask = fold_build3 (BIT_FIELD_REF, lnltype, mask,
+ bitsize_int (lnlbitsize),
+ bitsize_int (lnlcbitpos));
+ if (! integer_all_onesp (lll_mask))
+ lll_arg = fold_build2_loc (loc, BIT_AND_EXPR, lnltype,
+ lll_arg, lll_mask);
+ tree llr_mask = fold_build3 (BIT_FIELD_REF, lnrtype, mask,
+ bitsize_int (lnrbitsize),
+ bitsize_int (lnrcbitpos));
+ if (! integer_all_onesp (llr_mask))
+ llr_arg = fold_build2_loc (loc, BIT_AND_EXPR, lnrtype,
+ llr_arg, llr_mask);
+ tree lnr_const = const_binop (BIT_IOR_EXPR, l_const, r_const);
+ tree lll_const = fold_build3 (BIT_FIELD_REF, lnltype, lnr_const,
+ bitsize_int (lnlbitsize),
+ bitsize_int (lnlcbitpos));
+ tree lll_result = fold_build2_loc (loc, wanted_code, truth_type,
+ lll_arg, lll_const);
+ tree llr_const = fold_build3 (BIT_FIELD_REF, lnrtype, lnr_const,
+ bitsize_int (lnrbitsize),
+ bitsize_int (lnrcbitpos));
+ tree llr_result = fold_build2_loc (loc, wanted_code, truth_type,
+ llr_arg, llr_const);
+ result = build2_loc (loc, orig_code, truth_type,
+ lll_result, llr_result);
+ }
+ else
+ {
+
+ /* Construct the expression we will return. First get the component
+ reference we will make. Unless the mask is all ones the width of
+ that field, perform the mask operation. Then compare with the
+ merged constant. */
+ result = make_bit_field_ref (loc, ll_inner, ll_arg,
+ lntype, lnbitsize, lnbitpos,
+ ll_unsignedp || rl_unsignedp, ll_reversep);
- ll_mask = const_binop (BIT_IOR_EXPR, ll_mask, rl_mask);
- if (! all_ones_mask_p (ll_mask, lnbitsize))
- result = build2_loc (loc, BIT_AND_EXPR, lntype, result, ll_mask);
+ ll_mask = const_binop (BIT_IOR_EXPR, ll_mask, rl_mask);
+ if (! integer_all_onesp (ll_mask))
+ result = build2_loc (loc, BIT_AND_EXPR, lntype, result, ll_mask);
+
+ result = build2_loc (loc, wanted_code, truth_type, result,
+ const_binop (BIT_IOR_EXPR, l_const, r_const));
+ }
- return build2_loc (loc, wanted_code, truth_type, result,
- const_binop (BIT_IOR_EXPR, l_const, r_const));
+#if 0
+ inform (loc, "merged %qT and %qT into %qT", lhs, rhs, result);
+#endif
+
+ return result;
}
\f
/* T is an integer expression that is being multiplied, divided, or taken a
@@ -9166,14 +9335,6 @@ fold_truth_andor (location_t loc, enum tree_code code, tree type,
return fold_build2_loc (loc, code, type, arg0, tem);
}
- /* Check for the possibility of merging component references. If our
- lhs is another similar operation, try to merge its rhs with our
- rhs. Then try to merge our lhs and rhs. */
- if (TREE_CODE (arg0) == code
- && (tem = fold_truth_andor_1 (loc, code, type,
- TREE_OPERAND (arg0, 1), arg1)) != 0)
- return fold_build2_loc (loc, code, type, TREE_OPERAND (arg0, 0), tem);
-
if ((tem = fold_truth_andor_1 (loc, code, type, arg0, arg1)) != 0)
return tem;
^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2020-09-23 23:23 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-09-15 16:44 [gcc(refs/users/aoliva/heads/testme)] improvements for fold_truth_andor_1 Alexandre Oliva
-- strict thread matches above, loose matches on Subject: below --
2020-09-23 23:23 Alexandre Oliva
2020-09-17 11:31 Alexandre Oliva
2020-09-17 8:22 Alexandre Oliva
2020-09-17 3:20 Alexandre Oliva
2020-09-17 2:52 Alexandre Oliva
2020-09-16 21:38 Alexandre Oliva
2020-09-15 15:55 Alexandre Oliva
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).