public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/users/aoliva/heads/testme)] support split loads of rhs too
@ 2020-09-23 23:24 Alexandre Oliva
0 siblings, 0 replies; 2+ messages in thread
From: Alexandre Oliva @ 2020-09-23 23:24 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:14b296e02473900af08e388408f382283303bdef
commit 14b296e02473900af08e388408f382283303bdef
Author: Alexandre Oliva <oliva@adacore.com>
Date: Mon Sep 21 21:20:25 2020 -0300
support split loads of rhs too
Diff:
---
gcc/fold-const.c | 319 ++++++++++++++++++++++++++++++-----
gcc/testsuite/gcc.dg/field-merge-4.c | 38 +++++
2 files changed, 318 insertions(+), 39 deletions(-)
diff --git a/gcc/fold-const.c b/gcc/fold-const.c
index 961e8954128..db082b9b7e0 100644
--- a/gcc/fold-const.c
+++ b/gcc/fold-const.c
@@ -6217,6 +6217,90 @@ compute_split_boundary_from_align (HOST_WIDE_INT align,
return boundary;
}
+/* Initialize ln_arg[0] and ln_arg[1] to a pair of newly-created (at
+ LOC) loads from INNER (from ORIG_INNER), of modes MODE and MODE2,
+ respectively, starting at BIT_POS, using reversed endianness if
+ REVERSEP. Also initialize BITPOS (the starting position of each
+ part into INNER), BITSIZ (the bit count starting at BITPOS),
+ TOSHIFT[1] (the amount by which the part and its mask are to be
+ shifted right to bring its least-significant bit to bit zero) and
+ SHIFTED (the amount by which the part, by separate loading, has
+ already been shifted right, but that the mask needs shifting to
+ match). */
+static inline void
+build_split_load (tree /* out */ ln_arg[2],
+ HOST_WIDE_INT /* out */ bitpos[2],
+ HOST_WIDE_INT /* out */ bitsiz[2],
+ HOST_WIDE_INT /* in[0] out[0..1] */ toshift[2],
+ HOST_WIDE_INT /* out */ shifted[2],
+ location_t loc, tree inner, tree orig_inner,
+ scalar_int_mode mode, scalar_int_mode mode2,
+ HOST_WIDE_INT bit_pos, bool reversep)
+{
+ bitsiz[0] = GET_MODE_BITSIZE (mode);
+ bitsiz[1] = GET_MODE_BITSIZE (mode2);
+
+ for (int i = 0; i < 2; i++)
+ {
+ tree type = lang_hooks.types.type_for_size (bitsiz[i], 1);
+ bitpos[i] = bit_pos;
+ ln_arg[i] = make_bit_field_ref (loc, inner, orig_inner,
+ type, bitsiz[i],
+ bit_pos, 1, reversep);
+ bit_pos += bitsiz[i];
+ }
+
+ toshift[1] = toshift[0];
+ if (reversep ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
+ {
+ shifted[0] = bitsiz[1];
+ shifted[1] = 0;
+ toshift[0] = 0;
+ }
+ else
+ {
+ shifted[1] = bitsiz[0];
+ shifted[0] = 0;
+ toshift[1] = 0;
+ }
+}
+
+/* Make arrangements to split at bit BOUNDARY a single loaded word
+ (with REVERSEP bit order) LN_ARG[0], to be shifted right by
+ TOSHIFT[0] to bring the field of interest to the least-significant
+ bit. The expectation is that the same loaded word will be
+ propagated from part 0 to part 1, with just different shifting and
+ masking to extract both parts. MASK is not expected to do more
+ than masking out the bits that belong to the other part. See
+ build_split_load for more information on the other fields. */
+static inline void
+reuse_split_load (tree /* in[0] out[1] */ ln_arg[2],
+ HOST_WIDE_INT /* in[0] out[1] */ bitpos[2],
+ HOST_WIDE_INT /* in[0] out[1] */ bitsiz[2],
+ HOST_WIDE_INT /* in[0] out[0..1] */ toshift[2],
+ HOST_WIDE_INT /* out */ shifted[2],
+ tree /* out */ mask[2],
+ HOST_WIDE_INT boundary, bool reversep)
+{
+ ln_arg[1] = ln_arg[0];
+ bitpos[1] = bitpos[0];
+ bitsiz[1] = bitsiz[0];
+ shifted[1] = shifted[0] = 0;
+
+ tree basemask = build_int_cst_type (TREE_TYPE (ln_arg[0]), -1);
+ mask[1] = const_binop (LSHIFT_EXPR, basemask, bitsize_int (boundary));
+ mask[0] = const_binop (BIT_XOR_EXPR, basemask, mask[1]);
+
+ if (reversep ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
+ {
+ toshift[1] = toshift[0];
+ toshift[0] = bitpos[0] + bitsiz[0] - boundary;
+ std::swap (mask[0], mask[1]);
+ }
+ else
+ toshift[1] = boundary - bitpos[1];
+}
+
/* Find ways of folding logical expressions of LHS and RHS:
Try to merge two comparisons to the same innermost item.
Look for range tests like "ch >= '0' && ch <= '9'".
@@ -6616,6 +6700,11 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
if (l_split_load)
lnbitsize += GET_MODE_BITSIZE (lnmode2);
lntype = lang_hooks.types.type_for_size (lnbitsize, 1);
+ if (!lntype)
+ {
+ gcc_checking_assert (l_split_load);
+ lntype = build_nonstandard_integer_type (lnbitsize, 1);
+ }
xll_bitpos = ll_bitpos - lnbitpos, xrl_bitpos = rl_bitpos - lnbitpos;
if (ll_reversep ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
@@ -6669,20 +6758,52 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
/* Make sure the two fields on the right
correspond to the left without being swapped. */
|| ll_bitpos - rl_bitpos != lr_bitpos - rr_bitpos
- || lnbitpos < 0
- || l_split_load)
+ || lnbitpos < 0)
return 0;
+ bool r_split_load;
+ scalar_int_mode rnmode2;
+
first_bit = MIN (lr_bitpos, rr_bitpos);
end_bit = MAX (lr_bitpos + lr_bitsize, rr_bitpos + rr_bitsize);
if (!get_best_mode (end_bit - first_bit, first_bit, 0, 0,
TYPE_ALIGN (TREE_TYPE (lr_inner)), BITS_PER_WORD,
volatilep, &rnmode))
- return 0;
+ {
+ /* Consider the possibility of recombining loads if any of the
+ fields straddles across an alignment boundary, so that either
+ part can be loaded along with the other field. */
+ HOST_WIDE_INT align = TYPE_ALIGN (TREE_TYPE (lr_inner));
+ HOST_WIDE_INT boundary = compute_split_boundary_from_align
+ (align, lr_bitpos, lr_bitsize, rr_bitpos, rr_bitsize);
+
+ if (boundary < 0
+ /* If we're to split both, make sure the split point is
+ the same. */
+ || (l_split_load
+ && (boundary - lr_bitpos
+ != (lnbitpos + GET_MODE_BITSIZE (lnmode)) - ll_bitpos))
+ || !get_best_mode (boundary - first_bit, first_bit, 0, 0,
+ align, BITS_PER_WORD, volatilep, &rnmode)
+ || !get_best_mode (end_bit - boundary, boundary, 0, 0,
+ align, BITS_PER_WORD, volatilep, &rnmode2))
+ return 0;
+
+ r_split_load = true;
+ }
+ else
+ r_split_load = false;
rnbitsize = GET_MODE_BITSIZE (rnmode);
rnbitpos = first_bit & ~ (rnbitsize - 1);
+ if (r_split_load)
+ rnbitsize += GET_MODE_BITSIZE (rnmode2);
rntype = lang_hooks.types.type_for_size (rnbitsize, 1);
+ if (!rntype)
+ {
+ gcc_checking_assert (r_split_load);
+ rntype = build_nonstandard_integer_type (rnbitsize, 1);
+ }
xlr_bitpos = lr_bitpos - rnbitpos, xrr_bitpos = rr_bitpos - rnbitpos;
if (lr_reversep ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
@@ -6702,7 +6823,7 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
lr_mask = const_binop (BIT_IOR_EXPR, lr_mask, rr_mask);
bool report
- = (l_split_load
+ = (l_split_load || r_split_load
|| (!(lnbitsize == rnbitsize
&& xll_bitpos == xlr_bitpos
&& lnbitpos >= 0
@@ -6717,56 +6838,176 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
&& rr_bitpos >= 0)));
tree orig_lhs = lhs, orig_rhs = rhs;
- lhs = make_bit_field_ref (loc, ll_inner, ll_arg,
- lntype, lnbitsize, lnbitpos,
- ll_unsignedp || rl_unsignedp, ll_reversep);
- rhs = make_bit_field_ref (loc, lr_inner, lr_arg,
- rntype, rnbitsize, rnbitpos,
- lr_unsignedp || rr_unsignedp, lr_reversep);
- HOST_WIDE_INT shift = (MIN (xll_bitpos, xrl_bitpos)
- - MIN (xlr_bitpos, xrr_bitpos));
+ int parts = 1;
+ tree ld_arg[2][2];
+ HOST_WIDE_INT bitpos[2][2];
+ HOST_WIDE_INT bitsiz[2][2];
+ HOST_WIDE_INT shifted[2][2];
+ HOST_WIDE_INT toshift[2][2];
+ tree xmask[2][2] = {};
+
+ /* Consider we're comparing two non-contiguous fields of packed
+ structs, both aligned at 32-bit boundaries:
+
+ ll_arg: an 8-bit field at offset 0
+ lr_arg: a 16-bit field at offset 2
+
+ rl_arg: an 8-bit field at offset 1
+ rr_arg: a 16-bit field at offset 3
+
+ We'll have r_split_load, because rr_arg straddles across an
+ alignment boundary.
+
+ We'll want to have:
+
+ bitpos = { { 0, 0 }, { 0, 32 } }
+ bitsiz = { { 32, 32 }, { 32, 8 } }
+
+ And, for little-endian:
+
+ shifted = { { 0, 0 }, { 0, 32 } }
+ toshift = { { 0, 24 }, { 0, 0 } }
+
+ Or, for big-endian:
- if (shift > 0)
+ shifted = { { 0, 0 }, { 8, 0 } }
+ toshift = { { 8, 0 }, { 0, 0 } }
+ */
+
+ toshift[0][0] = MIN (xll_bitpos, xrl_bitpos);
+ shifted[0][0] = 0;
+
+ if (!l_split_load)
{
- tree shiftsz = bitsize_int (shift);
- lhs = fold_build2_loc (loc, RSHIFT_EXPR, lntype,
- lhs, shiftsz);
- ll_mask = const_binop (RSHIFT_EXPR, ll_mask, shiftsz);
+ bitpos[0][0] = lnbitpos;
+ bitsiz[0][0] = lnbitsize;
+ ld_arg[0][0] = make_bit_field_ref (loc, ll_inner, ll_arg,
+ lntype, lnbitsize, lnbitpos,
+ ll_unsignedp || rl_unsignedp,
+ ll_reversep);
}
- else if (shift < 0)
+
+ toshift[1][0] = MIN (xlr_bitpos, xrr_bitpos);
+ shifted[1][0] = 0;
+
+ if (!r_split_load)
{
- tree shiftsz = bitsize_int (-shift);
- rhs = fold_build2_loc (loc, RSHIFT_EXPR, rntype,
- rhs, shiftsz);
- lr_mask = const_binop (RSHIFT_EXPR, lr_mask, shiftsz);
+ bitpos[1][0] = rnbitpos;
+ bitsiz[1][0] = rnbitsize;
+ ld_arg[1][0] = make_bit_field_ref (loc, lr_inner, lr_arg,
+ rntype, rnbitsize, rnbitpos,
+ lr_unsignedp || rr_unsignedp,
+ lr_reversep);
}
- /* Convert to the smaller type before masking out unwanted bits. */
- tree type = lntype;
- if (lntype != rntype)
+ if (l_split_load || r_split_load)
{
- if (lnbitsize > rnbitsize)
+ parts = 2;
+
+ if (l_split_load)
+ build_split_load (ld_arg[0], bitpos[0], bitsiz[0], toshift[0],
+ shifted[0], loc, ll_inner, ll_arg,
+ lnmode, lnmode2, lnbitpos, ll_reversep);
+ else
+ reuse_split_load (ld_arg[0], bitpos[0], bitsiz[0], toshift[0],
+ shifted[0], xmask[0],
+ rnbitpos + GET_MODE_BITSIZE (rnmode)
+ - lr_bitpos + ll_bitpos, ll_reversep);
+
+ if (r_split_load)
+ build_split_load (ld_arg[1], bitpos[1], bitsiz[1], toshift[1],
+ shifted[1], loc, lr_inner, lr_arg,
+ rnmode, rnmode2, rnbitpos, lr_reversep);
+ else
+ reuse_split_load (ld_arg[1], bitpos[1], bitsiz[1], toshift[1],
+ shifted[1], xmask[1],
+ lnbitpos + GET_MODE_BITSIZE (lnmode)
+ - ll_bitpos + lr_bitpos, lr_reversep);
+ }
+
+ tree cmp[2];
+
+ for (int i = 0; i < parts; i++)
+ {
+ tree op[2] = { ld_arg[0][i], ld_arg[1][i] };
+ tree mask[2] = { ll_mask, lr_mask };
+
+ for (int j = 0; j < 2; j++)
{
- type = rntype;
- lhs = fold_convert_loc (loc, type, lhs);
- ll_mask = fold_convert_loc (loc, type, ll_mask);
+ /* Mask out the bits belonging to the other part. */
+ if (xmask[j][i])
+ mask[j] = const_binop (BIT_AND_EXPR, mask[j], xmask[j][i]);
+
+ if (shifted[j][i])
+ {
+ tree shiftsz = bitsize_int (shifted[j][i]);
+ mask[j] = const_binop (RSHIFT_EXPR, mask[j], shiftsz);
+ }
+ mask[j] = fold_convert_loc (loc, TREE_TYPE (op[j]), mask[j]);
}
- else if (lnbitsize < rnbitsize)
+
+ HOST_WIDE_INT shift = (toshift[0][i] - toshift[1][i]);
+
+ if (shift)
{
- type = lntype;
- rhs = fold_convert_loc (loc, type, rhs);
- lr_mask = fold_convert_loc (loc, type, lr_mask);
+ int j;
+ if (shift > 0)
+ j = 0;
+ else
+ {
+ j = 1;
+ shift = -shift;
+ }
+
+ tree shiftsz = bitsize_int (shift);
+ op[j] = fold_build2_loc (loc, RSHIFT_EXPR, TREE_TYPE (op[j]),
+ op[j], shiftsz);
+ mask[j] = const_binop (RSHIFT_EXPR, mask[j], shiftsz);
}
- }
- if (! integer_all_onesp (ll_mask))
- lhs = build2_loc (loc, BIT_AND_EXPR, type, lhs, ll_mask);
+ /* Convert to the smaller type before masking out unwanted
+ bits. */
+ tree type = TREE_TYPE (op[0]);
+ if (type != TREE_TYPE (op[1]))
+ {
+ int j = (TYPE_PRECISION (type)
+ < TYPE_PRECISION (TREE_TYPE (op[1])));
+ if (!j)
+ type = TREE_TYPE (op[1]);
+ op[j] = fold_convert_loc (loc, type, op[j]);
+ mask[j] = fold_convert_loc (loc, type, mask[j]);
+ }
+
+ for (int j = 0; j < 2; j++)
+ if (! integer_all_onesp (mask[j]))
+ op[j] = build2_loc (loc, BIT_AND_EXPR, type,
+ op[j], mask[j]);
- if (! integer_all_onesp (lr_mask))
- rhs = build2_loc (loc, BIT_AND_EXPR, type, rhs, lr_mask);
+ cmp[i] = build2_loc (loc, wanted_code, truth_type, op[0], op[1]);
+ }
- result = build2_loc (loc, wanted_code, truth_type, lhs, rhs);
+ if (parts == 1)
+ result = cmp[0];
+ else if (!separatep
+ || ((!l_split_load
+ || (ll_bitpos < bitpos[0][1]
+ && ll_bitpos + ll_bitsize > bitpos[0][1]))
+ && (!r_split_load
+ || (lr_bitpos < bitpos[1][1]
+ && lr_bitpos + lr_bitsize > bitpos[1][1]))))
+ result = build2_loc (loc, orig_code, truth_type, cmp[0], cmp[1]);
+ else if ((l_split_load && ll_bitpos >= bitpos[0][1])
+ || (r_split_load && lr_bitpos >= bitpos[1][1]))
+ {
+ result = cmp[1];
+ *separatep = cmp[0];
+ }
+ else
+ {
+ result = cmp[0];
+ *separatep = cmp[1];
+ }
if (report)
inform (loc, "merged nc extra %qE and %qE into %qE",
diff --git a/gcc/testsuite/gcc.dg/field-merge-4.c b/gcc/testsuite/gcc.dg/field-merge-4.c
new file mode 100644
index 00000000000..dea4f9c2914
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/field-merge-4.c
@@ -0,0 +1,38 @@
+/* { dg-do run } */
+/* { dg-options "-O" } */
+
+struct T1 {
+ unsigned char p;
+ unsigned char qn;
+ unsigned short a;
+ unsigned int z;
+} __attribute__((__packed__, __aligned__(4)));
+
+struct T2 {
+ unsigned char rn;
+ unsigned char p;
+ unsigned char qn;
+ unsigned short a;
+ unsigned int z;
+} __attribute__((__packed__, __aligned__(4)));
+
+#define vc 0xaa
+#define vs 0xccdd
+#define vi 0x12345678
+
+struct T1 v1 = { vc, 1, vs, vi };
+struct T2 v2 = { 0, vc, 1, vs, vi };
+
+void f (void) {
+ if (0
+ || v1.p != v2.p
+ || v1.a != v2.a
+ || v1.z != v2.z
+ )
+ __builtin_abort ();
+}
+
+int main () {
+ f ();
+ return 0;
+}
^ permalink raw reply [flat|nested] 2+ messages in thread
* [gcc(refs/users/aoliva/heads/testme)] support split loads of rhs too
@ 2020-09-24 14:43 Alexandre Oliva
0 siblings, 0 replies; 2+ messages in thread
From: Alexandre Oliva @ 2020-09-24 14:43 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:4a00e09c155303faf76a3d21f9ab89e0540d9b23
commit 4a00e09c155303faf76a3d21f9ab89e0540d9b23
Author: Alexandre Oliva <oliva@adacore.com>
Date: Mon Sep 21 21:20:25 2020 -0300
support split loads of rhs too
Diff:
---
gcc/fold-const.c | 324 ++++++++++++++++++++++++++++++-----
gcc/testsuite/gcc.dg/field-merge-4.c | 40 +++++
2 files changed, 325 insertions(+), 39 deletions(-)
diff --git a/gcc/fold-const.c b/gcc/fold-const.c
index 961e8954128..817d4f9010d 100644
--- a/gcc/fold-const.c
+++ b/gcc/fold-const.c
@@ -6217,6 +6217,95 @@ compute_split_boundary_from_align (HOST_WIDE_INT align,
return boundary;
}
+/* Initialize ln_arg[0] and ln_arg[1] to a pair of newly-created (at
+ LOC) loads from INNER (from ORIG_INNER), of modes MODE and MODE2,
+ respectively, starting at BIT_POS, using reversed endianness if
+ REVERSEP. Also initialize BITPOS (the starting position of each
+ part into INNER), BITSIZ (the bit count starting at BITPOS),
+ TOSHIFT[1] (the amount by which the part and its mask are to be
+ shifted right to bring its least-significant bit to bit zero) and
+ SHIFTED (the amount by which the part, by separate loading, has
+ already been shifted right, but that the mask needs shifting to
+ match). */
+static inline void
+build_split_load (tree /* out */ ln_arg[2],
+ HOST_WIDE_INT /* out */ bitpos[2],
+ HOST_WIDE_INT /* out */ bitsiz[2],
+ HOST_WIDE_INT /* in[0] out[0..1] */ toshift[2],
+ HOST_WIDE_INT /* out */ shifted[2],
+ location_t loc, tree inner, tree orig_inner,
+ scalar_int_mode mode, scalar_int_mode mode2,
+ HOST_WIDE_INT bit_pos, bool reversep)
+{
+ bitsiz[0] = GET_MODE_BITSIZE (mode);
+ bitsiz[1] = GET_MODE_BITSIZE (mode2);
+
+ for (int i = 0; i < 2; i++)
+ {
+ tree type = lang_hooks.types.type_for_size (bitsiz[i], 1);
+ bitpos[i] = bit_pos;
+ ln_arg[i] = make_bit_field_ref (loc, inner, orig_inner,
+ type, bitsiz[i],
+ bit_pos, 1, reversep);
+ bit_pos += bitsiz[i];
+ }
+
+ toshift[1] = toshift[0];
+ if (reversep ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
+ {
+ shifted[0] = bitsiz[1];
+ shifted[1] = 0;
+ toshift[0] = 0;
+ }
+ else
+ {
+ shifted[1] = bitsiz[0];
+ shifted[0] = 0;
+ toshift[1] = 0;
+ }
+}
+
+/* Make arrangements to split at bit BOUNDARY a single loaded word
+ (with REVERSEP bit order) LN_ARG[0], to be shifted right by
+ TOSHIFT[0] to bring the field of interest to the least-significant
+ bit. The expectation is that the same loaded word will be
+ propagated from part 0 to part 1, with just different shifting and
+ masking to extract both parts. MASK is not expected to do more
+ than masking out the bits that belong to the other part. See
+ build_split_load for more information on the other fields. */
+static inline void
+reuse_split_load (tree /* in[0] out[1] */ ln_arg[2],
+ HOST_WIDE_INT /* in[0] out[1] */ bitpos[2],
+ HOST_WIDE_INT /* in[0] out[1] */ bitsiz[2],
+ HOST_WIDE_INT /* in[0] out[0..1] */ toshift[2],
+ HOST_WIDE_INT /* out */ shifted[2],
+ tree /* out */ mask[2],
+ HOST_WIDE_INT boundary, bool reversep)
+{
+ ln_arg[1] = ln_arg[0];
+ bitpos[1] = bitpos[0];
+ bitsiz[1] = bitsiz[0];
+ shifted[1] = shifted[0] = 0;
+
+ tree basemask = build_int_cst_type (TREE_TYPE (ln_arg[0]), -1);
+
+ if (reversep ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
+ {
+ toshift[1] = toshift[0];
+ toshift[0] = bitpos[0] + bitsiz[0] - boundary;
+ mask[0] = const_binop (LSHIFT_EXPR, basemask,
+ bitsize_int (toshift[0]));
+ mask[1] = const_binop (BIT_XOR_EXPR, basemask, mask[0]);
+ }
+ else
+ {
+ toshift[1] = boundary - bitpos[1];
+ mask[1] = const_binop (LSHIFT_EXPR, basemask,
+ bitsize_int (toshift[1]));
+ mask[0] = const_binop (BIT_XOR_EXPR, basemask, mask[1]);
+ }
+}
+
/* Find ways of folding logical expressions of LHS and RHS:
Try to merge two comparisons to the same innermost item.
Look for range tests like "ch >= '0' && ch <= '9'".
@@ -6616,6 +6705,11 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
if (l_split_load)
lnbitsize += GET_MODE_BITSIZE (lnmode2);
lntype = lang_hooks.types.type_for_size (lnbitsize, 1);
+ if (!lntype)
+ {
+ gcc_checking_assert (l_split_load);
+ lntype = build_nonstandard_integer_type (lnbitsize, 1);
+ }
xll_bitpos = ll_bitpos - lnbitpos, xrl_bitpos = rl_bitpos - lnbitpos;
if (ll_reversep ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
@@ -6669,20 +6763,52 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
/* Make sure the two fields on the right
correspond to the left without being swapped. */
|| ll_bitpos - rl_bitpos != lr_bitpos - rr_bitpos
- || lnbitpos < 0
- || l_split_load)
+ || lnbitpos < 0)
return 0;
+ bool r_split_load;
+ scalar_int_mode rnmode2;
+
first_bit = MIN (lr_bitpos, rr_bitpos);
end_bit = MAX (lr_bitpos + lr_bitsize, rr_bitpos + rr_bitsize);
if (!get_best_mode (end_bit - first_bit, first_bit, 0, 0,
TYPE_ALIGN (TREE_TYPE (lr_inner)), BITS_PER_WORD,
volatilep, &rnmode))
- return 0;
+ {
+ /* Consider the possibility of recombining loads if any of the
+ fields straddles across an alignment boundary, so that either
+ part can be loaded along with the other field. */
+ HOST_WIDE_INT align = TYPE_ALIGN (TREE_TYPE (lr_inner));
+ HOST_WIDE_INT boundary = compute_split_boundary_from_align
+ (align, lr_bitpos, lr_bitsize, rr_bitpos, rr_bitsize);
+
+ if (boundary < 0
+ /* If we're to split both, make sure the split point is
+ the same. */
+ || (l_split_load
+ && (boundary - lr_bitpos
+ != (lnbitpos + GET_MODE_BITSIZE (lnmode)) - ll_bitpos))
+ || !get_best_mode (boundary - first_bit, first_bit, 0, 0,
+ align, BITS_PER_WORD, volatilep, &rnmode)
+ || !get_best_mode (end_bit - boundary, boundary, 0, 0,
+ align, BITS_PER_WORD, volatilep, &rnmode2))
+ return 0;
+
+ r_split_load = true;
+ }
+ else
+ r_split_load = false;
rnbitsize = GET_MODE_BITSIZE (rnmode);
rnbitpos = first_bit & ~ (rnbitsize - 1);
+ if (r_split_load)
+ rnbitsize += GET_MODE_BITSIZE (rnmode2);
rntype = lang_hooks.types.type_for_size (rnbitsize, 1);
+ if (!rntype)
+ {
+ gcc_checking_assert (r_split_load);
+ rntype = build_nonstandard_integer_type (rnbitsize, 1);
+ }
xlr_bitpos = lr_bitpos - rnbitpos, xrr_bitpos = rr_bitpos - rnbitpos;
if (lr_reversep ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
@@ -6702,7 +6828,7 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
lr_mask = const_binop (BIT_IOR_EXPR, lr_mask, rr_mask);
bool report
- = (l_split_load
+ = (l_split_load || r_split_load
|| (!(lnbitsize == rnbitsize
&& xll_bitpos == xlr_bitpos
&& lnbitpos >= 0
@@ -6717,56 +6843,176 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
&& rr_bitpos >= 0)));
tree orig_lhs = lhs, orig_rhs = rhs;
- lhs = make_bit_field_ref (loc, ll_inner, ll_arg,
- lntype, lnbitsize, lnbitpos,
- ll_unsignedp || rl_unsignedp, ll_reversep);
- rhs = make_bit_field_ref (loc, lr_inner, lr_arg,
- rntype, rnbitsize, rnbitpos,
- lr_unsignedp || rr_unsignedp, lr_reversep);
- HOST_WIDE_INT shift = (MIN (xll_bitpos, xrl_bitpos)
- - MIN (xlr_bitpos, xrr_bitpos));
+ int parts = 1;
+ tree ld_arg[2][2];
+ HOST_WIDE_INT bitpos[2][2];
+ HOST_WIDE_INT bitsiz[2][2];
+ HOST_WIDE_INT shifted[2][2];
+ HOST_WIDE_INT toshift[2][2];
+ tree xmask[2][2] = {};
+
+ /* Consider we're comparing two non-contiguous fields of packed
+ structs, both aligned at 32-bit boundaries:
+
+ ll_arg: an 8-bit field at offset 0
+ lr_arg: a 16-bit field at offset 2
+
+ rl_arg: an 8-bit field at offset 1
+ rr_arg: a 16-bit field at offset 3
+
+ We'll have r_split_load, because rr_arg straddles across an
+ alignment boundary.
+
+ We'll want to have:
- if (shift > 0)
+ bitpos = { { 0, 0 }, { 0, 32 } }
+ bitsiz = { { 32, 32 }, { 32, 8 } }
+
+ And, for little-endian:
+
+ shifted = { { 0, 0 }, { 0, 32 } }
+ toshift = { { 0, 24 }, { 0, 0 } }
+
+ Or, for big-endian:
+
+ shifted = { { 0, 0 }, { 8, 0 } }
+ toshift = { { 8, 0 }, { 0, 0 } }
+ */
+
+ toshift[0][0] = MIN (xll_bitpos, xrl_bitpos);
+ shifted[0][0] = 0;
+
+ if (!l_split_load)
{
- tree shiftsz = bitsize_int (shift);
- lhs = fold_build2_loc (loc, RSHIFT_EXPR, lntype,
- lhs, shiftsz);
- ll_mask = const_binop (RSHIFT_EXPR, ll_mask, shiftsz);
+ bitpos[0][0] = lnbitpos;
+ bitsiz[0][0] = lnbitsize;
+ ld_arg[0][0] = make_bit_field_ref (loc, ll_inner, ll_arg,
+ lntype, lnbitsize, lnbitpos,
+ ll_unsignedp || rl_unsignedp,
+ ll_reversep);
}
- else if (shift < 0)
+
+ toshift[1][0] = MIN (xlr_bitpos, xrr_bitpos);
+ shifted[1][0] = 0;
+
+ if (!r_split_load)
{
- tree shiftsz = bitsize_int (-shift);
- rhs = fold_build2_loc (loc, RSHIFT_EXPR, rntype,
- rhs, shiftsz);
- lr_mask = const_binop (RSHIFT_EXPR, lr_mask, shiftsz);
+ bitpos[1][0] = rnbitpos;
+ bitsiz[1][0] = rnbitsize;
+ ld_arg[1][0] = make_bit_field_ref (loc, lr_inner, lr_arg,
+ rntype, rnbitsize, rnbitpos,
+ lr_unsignedp || rr_unsignedp,
+ lr_reversep);
}
- /* Convert to the smaller type before masking out unwanted bits. */
- tree type = lntype;
- if (lntype != rntype)
+ if (l_split_load || r_split_load)
{
- if (lnbitsize > rnbitsize)
+ parts = 2;
+
+ if (l_split_load)
+ build_split_load (ld_arg[0], bitpos[0], bitsiz[0], toshift[0],
+ shifted[0], loc, ll_inner, ll_arg,
+ lnmode, lnmode2, lnbitpos, ll_reversep);
+ else
+ reuse_split_load (ld_arg[0], bitpos[0], bitsiz[0], toshift[0],
+ shifted[0], xmask[0],
+ rnbitpos + GET_MODE_BITSIZE (rnmode)
+ - lr_bitpos + ll_bitpos, ll_reversep);
+
+ if (r_split_load)
+ build_split_load (ld_arg[1], bitpos[1], bitsiz[1], toshift[1],
+ shifted[1], loc, lr_inner, lr_arg,
+ rnmode, rnmode2, rnbitpos, lr_reversep);
+ else
+ reuse_split_load (ld_arg[1], bitpos[1], bitsiz[1], toshift[1],
+ shifted[1], xmask[1],
+ lnbitpos + GET_MODE_BITSIZE (lnmode)
+ - ll_bitpos + lr_bitpos, lr_reversep);
+ }
+
+ tree cmp[2];
+
+ for (int i = 0; i < parts; i++)
+ {
+ tree op[2] = { ld_arg[0][i], ld_arg[1][i] };
+ tree mask[2] = { ll_mask, lr_mask };
+
+ for (int j = 0; j < 2; j++)
{
- type = rntype;
- lhs = fold_convert_loc (loc, type, lhs);
- ll_mask = fold_convert_loc (loc, type, ll_mask);
+ /* Mask out the bits belonging to the other part. */
+ if (xmask[j][i])
+ mask[j] = const_binop (BIT_AND_EXPR, mask[j], xmask[j][i]);
+
+ if (shifted[j][i])
+ {
+ tree shiftsz = bitsize_int (shifted[j][i]);
+ mask[j] = const_binop (RSHIFT_EXPR, mask[j], shiftsz);
+ }
+ mask[j] = fold_convert_loc (loc, TREE_TYPE (op[j]), mask[j]);
}
- else if (lnbitsize < rnbitsize)
+
+ HOST_WIDE_INT shift = (toshift[0][i] - toshift[1][i]);
+
+ if (shift)
{
- type = lntype;
- rhs = fold_convert_loc (loc, type, rhs);
- lr_mask = fold_convert_loc (loc, type, lr_mask);
+ int j;
+ if (shift > 0)
+ j = 0;
+ else
+ {
+ j = 1;
+ shift = -shift;
+ }
+
+ tree shiftsz = bitsize_int (shift);
+ op[j] = fold_build2_loc (loc, RSHIFT_EXPR, TREE_TYPE (op[j]),
+ op[j], shiftsz);
+ mask[j] = const_binop (RSHIFT_EXPR, mask[j], shiftsz);
}
- }
- if (! integer_all_onesp (ll_mask))
- lhs = build2_loc (loc, BIT_AND_EXPR, type, lhs, ll_mask);
+ /* Convert to the smaller type before masking out unwanted
+ bits. */
+ tree type = TREE_TYPE (op[0]);
+ if (type != TREE_TYPE (op[1]))
+ {
+ int j = (TYPE_PRECISION (type)
+ < TYPE_PRECISION (TREE_TYPE (op[1])));
+ if (!j)
+ type = TREE_TYPE (op[1]);
+ op[j] = fold_convert_loc (loc, type, op[j]);
+ mask[j] = fold_convert_loc (loc, type, mask[j]);
+ }
- if (! integer_all_onesp (lr_mask))
- rhs = build2_loc (loc, BIT_AND_EXPR, type, rhs, lr_mask);
+ for (int j = 0; j < 2; j++)
+ if (! integer_all_onesp (mask[j]))
+ op[j] = build2_loc (loc, BIT_AND_EXPR, type,
+ op[j], mask[j]);
- result = build2_loc (loc, wanted_code, truth_type, lhs, rhs);
+ cmp[i] = build2_loc (loc, wanted_code, truth_type, op[0], op[1]);
+ }
+
+ if (parts == 1)
+ result = cmp[0];
+ else if (!separatep
+ || ((!l_split_load
+ || (ll_bitpos < bitpos[0][1]
+ && ll_bitpos + ll_bitsize > bitpos[0][1]))
+ && (!r_split_load
+ || (lr_bitpos < bitpos[1][1]
+ && lr_bitpos + lr_bitsize > bitpos[1][1]))))
+ result = build2_loc (loc, orig_code, truth_type, cmp[0], cmp[1]);
+ else if ((l_split_load && ll_bitpos >= bitpos[0][1])
+ || (r_split_load && lr_bitpos >= bitpos[1][1]))
+ {
+ result = cmp[1];
+ *separatep = cmp[0];
+ }
+ else
+ {
+ result = cmp[0];
+ *separatep = cmp[1];
+ }
if (report)
inform (loc, "merged nc extra %qE and %qE into %qE",
diff --git a/gcc/testsuite/gcc.dg/field-merge-4.c b/gcc/testsuite/gcc.dg/field-merge-4.c
new file mode 100644
index 00000000000..c629069e52b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/field-merge-4.c
@@ -0,0 +1,40 @@
+/* { dg-do run } */
+/* { dg-options "-O" } */
+
+struct T1 {
+ unsigned int zn;
+ unsigned char p;
+ unsigned char qn;
+ unsigned short a;
+ unsigned int z;
+} __attribute__((__packed__, __aligned__(4)));
+
+struct T2 {
+ unsigned int zn;
+ unsigned char rn;
+ unsigned char p;
+ unsigned char qn;
+ unsigned short a;
+ unsigned int z;
+} __attribute__((__packed__, __aligned__(4)));
+
+#define vc 0xaa
+#define vs 0xccdd
+#define vi 0x12345678
+
+struct T1 v1 = { -1, vc, 1, vs, vi };
+struct T2 v2 = { -1, 0, vc, 1, vs, vi };
+
+void f (void) {
+ if (0
+ || v1.p != v2.p
+ || v1.a != v2.a
+ || v1.z != v2.z
+ )
+ __builtin_abort ();
+}
+
+int main () {
+ f ();
+ return 0;
+}
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2020-09-24 14:43 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-09-23 23:24 [gcc(refs/users/aoliva/heads/testme)] support split loads of rhs too Alexandre Oliva
2020-09-24 14:43 Alexandre Oliva
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).