From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 2140) id 6023B396ECB0; Thu, 24 Sep 2020 14:43:30 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 6023B396ECB0 Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: Alexandre Oliva To: gcc-cvs@gcc.gnu.org Subject: [gcc(refs/users/aoliva/heads/testme)] support split loads of rhs too X-Act-Checkin: gcc X-Git-Author: Alexandre Oliva X-Git-Refname: refs/users/aoliva/heads/testme X-Git-Oldrev: b90d7a7ffa1fc32af74fdf09ba89e16a4def59ee X-Git-Newrev: 4a00e09c155303faf76a3d21f9ab89e0540d9b23 Message-Id: <20200924144330.6023B396ECB0@sourceware.org> Date: Thu, 24 Sep 2020 14:43:30 +0000 (GMT) X-BeenThere: gcc-cvs@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-cvs mailing list List-Unsubscribe: , List-Archive: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 24 Sep 2020 14:43:30 -0000 https://gcc.gnu.org/g:4a00e09c155303faf76a3d21f9ab89e0540d9b23 commit 4a00e09c155303faf76a3d21f9ab89e0540d9b23 Author: Alexandre Oliva Date: Mon Sep 21 21:20:25 2020 -0300 support split loads of rhs too Diff: --- gcc/fold-const.c | 324 ++++++++++++++++++++++++++++++----- gcc/testsuite/gcc.dg/field-merge-4.c | 40 +++++ 2 files changed, 325 insertions(+), 39 deletions(-) diff --git a/gcc/fold-const.c b/gcc/fold-const.c index 961e8954128..817d4f9010d 100644 --- a/gcc/fold-const.c +++ b/gcc/fold-const.c @@ -6217,6 +6217,95 @@ compute_split_boundary_from_align (HOST_WIDE_INT align, return boundary; } +/* Initialize ln_arg[0] and ln_arg[1] to a pair of newly-created (at + LOC) loads from INNER (from ORIG_INNER), of modes MODE and MODE2, + respectively, starting at BIT_POS, using reversed endianness if + REVERSEP. Also initialize BITPOS (the starting position of each + part into INNER), BITSIZ (the bit count starting at BITPOS), + TOSHIFT[1] (the amount by which the part and its mask are to be + shifted right to bring its least-significant bit to bit zero) and + SHIFTED (the amount by which the part, by separate loading, has + already been shifted right, but that the mask needs shifting to + match). */ +static inline void +build_split_load (tree /* out */ ln_arg[2], + HOST_WIDE_INT /* out */ bitpos[2], + HOST_WIDE_INT /* out */ bitsiz[2], + HOST_WIDE_INT /* in[0] out[0..1] */ toshift[2], + HOST_WIDE_INT /* out */ shifted[2], + location_t loc, tree inner, tree orig_inner, + scalar_int_mode mode, scalar_int_mode mode2, + HOST_WIDE_INT bit_pos, bool reversep) +{ + bitsiz[0] = GET_MODE_BITSIZE (mode); + bitsiz[1] = GET_MODE_BITSIZE (mode2); + + for (int i = 0; i < 2; i++) + { + tree type = lang_hooks.types.type_for_size (bitsiz[i], 1); + bitpos[i] = bit_pos; + ln_arg[i] = make_bit_field_ref (loc, inner, orig_inner, + type, bitsiz[i], + bit_pos, 1, reversep); + bit_pos += bitsiz[i]; + } + + toshift[1] = toshift[0]; + if (reversep ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN) + { + shifted[0] = bitsiz[1]; + shifted[1] = 0; + toshift[0] = 0; + } + else + { + shifted[1] = bitsiz[0]; + shifted[0] = 0; + toshift[1] = 0; + } +} + +/* Make arrangements to split at bit BOUNDARY a single loaded word + (with REVERSEP bit order) LN_ARG[0], to be shifted right by + TOSHIFT[0] to bring the field of interest to the least-significant + bit. The expectation is that the same loaded word will be + propagated from part 0 to part 1, with just different shifting and + masking to extract both parts. MASK is not expected to do more + than masking out the bits that belong to the other part. See + build_split_load for more information on the other fields. */ +static inline void +reuse_split_load (tree /* in[0] out[1] */ ln_arg[2], + HOST_WIDE_INT /* in[0] out[1] */ bitpos[2], + HOST_WIDE_INT /* in[0] out[1] */ bitsiz[2], + HOST_WIDE_INT /* in[0] out[0..1] */ toshift[2], + HOST_WIDE_INT /* out */ shifted[2], + tree /* out */ mask[2], + HOST_WIDE_INT boundary, bool reversep) +{ + ln_arg[1] = ln_arg[0]; + bitpos[1] = bitpos[0]; + bitsiz[1] = bitsiz[0]; + shifted[1] = shifted[0] = 0; + + tree basemask = build_int_cst_type (TREE_TYPE (ln_arg[0]), -1); + + if (reversep ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN) + { + toshift[1] = toshift[0]; + toshift[0] = bitpos[0] + bitsiz[0] - boundary; + mask[0] = const_binop (LSHIFT_EXPR, basemask, + bitsize_int (toshift[0])); + mask[1] = const_binop (BIT_XOR_EXPR, basemask, mask[0]); + } + else + { + toshift[1] = boundary - bitpos[1]; + mask[1] = const_binop (LSHIFT_EXPR, basemask, + bitsize_int (toshift[1])); + mask[0] = const_binop (BIT_XOR_EXPR, basemask, mask[1]); + } +} + /* Find ways of folding logical expressions of LHS and RHS: Try to merge two comparisons to the same innermost item. Look for range tests like "ch >= '0' && ch <= '9'". @@ -6616,6 +6705,11 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type, if (l_split_load) lnbitsize += GET_MODE_BITSIZE (lnmode2); lntype = lang_hooks.types.type_for_size (lnbitsize, 1); + if (!lntype) + { + gcc_checking_assert (l_split_load); + lntype = build_nonstandard_integer_type (lnbitsize, 1); + } xll_bitpos = ll_bitpos - lnbitpos, xrl_bitpos = rl_bitpos - lnbitpos; if (ll_reversep ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN) @@ -6669,20 +6763,52 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type, /* Make sure the two fields on the right correspond to the left without being swapped. */ || ll_bitpos - rl_bitpos != lr_bitpos - rr_bitpos - || lnbitpos < 0 - || l_split_load) + || lnbitpos < 0) return 0; + bool r_split_load; + scalar_int_mode rnmode2; + first_bit = MIN (lr_bitpos, rr_bitpos); end_bit = MAX (lr_bitpos + lr_bitsize, rr_bitpos + rr_bitsize); if (!get_best_mode (end_bit - first_bit, first_bit, 0, 0, TYPE_ALIGN (TREE_TYPE (lr_inner)), BITS_PER_WORD, volatilep, &rnmode)) - return 0; + { + /* Consider the possibility of recombining loads if any of the + fields straddles across an alignment boundary, so that either + part can be loaded along with the other field. */ + HOST_WIDE_INT align = TYPE_ALIGN (TREE_TYPE (lr_inner)); + HOST_WIDE_INT boundary = compute_split_boundary_from_align + (align, lr_bitpos, lr_bitsize, rr_bitpos, rr_bitsize); + + if (boundary < 0 + /* If we're to split both, make sure the split point is + the same. */ + || (l_split_load + && (boundary - lr_bitpos + != (lnbitpos + GET_MODE_BITSIZE (lnmode)) - ll_bitpos)) + || !get_best_mode (boundary - first_bit, first_bit, 0, 0, + align, BITS_PER_WORD, volatilep, &rnmode) + || !get_best_mode (end_bit - boundary, boundary, 0, 0, + align, BITS_PER_WORD, volatilep, &rnmode2)) + return 0; + + r_split_load = true; + } + else + r_split_load = false; rnbitsize = GET_MODE_BITSIZE (rnmode); rnbitpos = first_bit & ~ (rnbitsize - 1); + if (r_split_load) + rnbitsize += GET_MODE_BITSIZE (rnmode2); rntype = lang_hooks.types.type_for_size (rnbitsize, 1); + if (!rntype) + { + gcc_checking_assert (r_split_load); + rntype = build_nonstandard_integer_type (rnbitsize, 1); + } xlr_bitpos = lr_bitpos - rnbitpos, xrr_bitpos = rr_bitpos - rnbitpos; if (lr_reversep ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN) @@ -6702,7 +6828,7 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type, lr_mask = const_binop (BIT_IOR_EXPR, lr_mask, rr_mask); bool report - = (l_split_load + = (l_split_load || r_split_load || (!(lnbitsize == rnbitsize && xll_bitpos == xlr_bitpos && lnbitpos >= 0 @@ -6717,56 +6843,176 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type, && rr_bitpos >= 0))); tree orig_lhs = lhs, orig_rhs = rhs; - lhs = make_bit_field_ref (loc, ll_inner, ll_arg, - lntype, lnbitsize, lnbitpos, - ll_unsignedp || rl_unsignedp, ll_reversep); - rhs = make_bit_field_ref (loc, lr_inner, lr_arg, - rntype, rnbitsize, rnbitpos, - lr_unsignedp || rr_unsignedp, lr_reversep); - HOST_WIDE_INT shift = (MIN (xll_bitpos, xrl_bitpos) - - MIN (xlr_bitpos, xrr_bitpos)); + int parts = 1; + tree ld_arg[2][2]; + HOST_WIDE_INT bitpos[2][2]; + HOST_WIDE_INT bitsiz[2][2]; + HOST_WIDE_INT shifted[2][2]; + HOST_WIDE_INT toshift[2][2]; + tree xmask[2][2] = {}; + + /* Consider we're comparing two non-contiguous fields of packed + structs, both aligned at 32-bit boundaries: + + ll_arg: an 8-bit field at offset 0 + lr_arg: a 16-bit field at offset 2 + + rl_arg: an 8-bit field at offset 1 + rr_arg: a 16-bit field at offset 3 + + We'll have r_split_load, because rr_arg straddles across an + alignment boundary. + + We'll want to have: - if (shift > 0) + bitpos = { { 0, 0 }, { 0, 32 } } + bitsiz = { { 32, 32 }, { 32, 8 } } + + And, for little-endian: + + shifted = { { 0, 0 }, { 0, 32 } } + toshift = { { 0, 24 }, { 0, 0 } } + + Or, for big-endian: + + shifted = { { 0, 0 }, { 8, 0 } } + toshift = { { 8, 0 }, { 0, 0 } } + */ + + toshift[0][0] = MIN (xll_bitpos, xrl_bitpos); + shifted[0][0] = 0; + + if (!l_split_load) { - tree shiftsz = bitsize_int (shift); - lhs = fold_build2_loc (loc, RSHIFT_EXPR, lntype, - lhs, shiftsz); - ll_mask = const_binop (RSHIFT_EXPR, ll_mask, shiftsz); + bitpos[0][0] = lnbitpos; + bitsiz[0][0] = lnbitsize; + ld_arg[0][0] = make_bit_field_ref (loc, ll_inner, ll_arg, + lntype, lnbitsize, lnbitpos, + ll_unsignedp || rl_unsignedp, + ll_reversep); } - else if (shift < 0) + + toshift[1][0] = MIN (xlr_bitpos, xrr_bitpos); + shifted[1][0] = 0; + + if (!r_split_load) { - tree shiftsz = bitsize_int (-shift); - rhs = fold_build2_loc (loc, RSHIFT_EXPR, rntype, - rhs, shiftsz); - lr_mask = const_binop (RSHIFT_EXPR, lr_mask, shiftsz); + bitpos[1][0] = rnbitpos; + bitsiz[1][0] = rnbitsize; + ld_arg[1][0] = make_bit_field_ref (loc, lr_inner, lr_arg, + rntype, rnbitsize, rnbitpos, + lr_unsignedp || rr_unsignedp, + lr_reversep); } - /* Convert to the smaller type before masking out unwanted bits. */ - tree type = lntype; - if (lntype != rntype) + if (l_split_load || r_split_load) { - if (lnbitsize > rnbitsize) + parts = 2; + + if (l_split_load) + build_split_load (ld_arg[0], bitpos[0], bitsiz[0], toshift[0], + shifted[0], loc, ll_inner, ll_arg, + lnmode, lnmode2, lnbitpos, ll_reversep); + else + reuse_split_load (ld_arg[0], bitpos[0], bitsiz[0], toshift[0], + shifted[0], xmask[0], + rnbitpos + GET_MODE_BITSIZE (rnmode) + - lr_bitpos + ll_bitpos, ll_reversep); + + if (r_split_load) + build_split_load (ld_arg[1], bitpos[1], bitsiz[1], toshift[1], + shifted[1], loc, lr_inner, lr_arg, + rnmode, rnmode2, rnbitpos, lr_reversep); + else + reuse_split_load (ld_arg[1], bitpos[1], bitsiz[1], toshift[1], + shifted[1], xmask[1], + lnbitpos + GET_MODE_BITSIZE (lnmode) + - ll_bitpos + lr_bitpos, lr_reversep); + } + + tree cmp[2]; + + for (int i = 0; i < parts; i++) + { + tree op[2] = { ld_arg[0][i], ld_arg[1][i] }; + tree mask[2] = { ll_mask, lr_mask }; + + for (int j = 0; j < 2; j++) { - type = rntype; - lhs = fold_convert_loc (loc, type, lhs); - ll_mask = fold_convert_loc (loc, type, ll_mask); + /* Mask out the bits belonging to the other part. */ + if (xmask[j][i]) + mask[j] = const_binop (BIT_AND_EXPR, mask[j], xmask[j][i]); + + if (shifted[j][i]) + { + tree shiftsz = bitsize_int (shifted[j][i]); + mask[j] = const_binop (RSHIFT_EXPR, mask[j], shiftsz); + } + mask[j] = fold_convert_loc (loc, TREE_TYPE (op[j]), mask[j]); } - else if (lnbitsize < rnbitsize) + + HOST_WIDE_INT shift = (toshift[0][i] - toshift[1][i]); + + if (shift) { - type = lntype; - rhs = fold_convert_loc (loc, type, rhs); - lr_mask = fold_convert_loc (loc, type, lr_mask); + int j; + if (shift > 0) + j = 0; + else + { + j = 1; + shift = -shift; + } + + tree shiftsz = bitsize_int (shift); + op[j] = fold_build2_loc (loc, RSHIFT_EXPR, TREE_TYPE (op[j]), + op[j], shiftsz); + mask[j] = const_binop (RSHIFT_EXPR, mask[j], shiftsz); } - } - if (! integer_all_onesp (ll_mask)) - lhs = build2_loc (loc, BIT_AND_EXPR, type, lhs, ll_mask); + /* Convert to the smaller type before masking out unwanted + bits. */ + tree type = TREE_TYPE (op[0]); + if (type != TREE_TYPE (op[1])) + { + int j = (TYPE_PRECISION (type) + < TYPE_PRECISION (TREE_TYPE (op[1]))); + if (!j) + type = TREE_TYPE (op[1]); + op[j] = fold_convert_loc (loc, type, op[j]); + mask[j] = fold_convert_loc (loc, type, mask[j]); + } - if (! integer_all_onesp (lr_mask)) - rhs = build2_loc (loc, BIT_AND_EXPR, type, rhs, lr_mask); + for (int j = 0; j < 2; j++) + if (! integer_all_onesp (mask[j])) + op[j] = build2_loc (loc, BIT_AND_EXPR, type, + op[j], mask[j]); - result = build2_loc (loc, wanted_code, truth_type, lhs, rhs); + cmp[i] = build2_loc (loc, wanted_code, truth_type, op[0], op[1]); + } + + if (parts == 1) + result = cmp[0]; + else if (!separatep + || ((!l_split_load + || (ll_bitpos < bitpos[0][1] + && ll_bitpos + ll_bitsize > bitpos[0][1])) + && (!r_split_load + || (lr_bitpos < bitpos[1][1] + && lr_bitpos + lr_bitsize > bitpos[1][1])))) + result = build2_loc (loc, orig_code, truth_type, cmp[0], cmp[1]); + else if ((l_split_load && ll_bitpos >= bitpos[0][1]) + || (r_split_load && lr_bitpos >= bitpos[1][1])) + { + result = cmp[1]; + *separatep = cmp[0]; + } + else + { + result = cmp[0]; + *separatep = cmp[1]; + } if (report) inform (loc, "merged nc extra %qE and %qE into %qE", diff --git a/gcc/testsuite/gcc.dg/field-merge-4.c b/gcc/testsuite/gcc.dg/field-merge-4.c new file mode 100644 index 00000000000..c629069e52b --- /dev/null +++ b/gcc/testsuite/gcc.dg/field-merge-4.c @@ -0,0 +1,40 @@ +/* { dg-do run } */ +/* { dg-options "-O" } */ + +struct T1 { + unsigned int zn; + unsigned char p; + unsigned char qn; + unsigned short a; + unsigned int z; +} __attribute__((__packed__, __aligned__(4))); + +struct T2 { + unsigned int zn; + unsigned char rn; + unsigned char p; + unsigned char qn; + unsigned short a; + unsigned int z; +} __attribute__((__packed__, __aligned__(4))); + +#define vc 0xaa +#define vs 0xccdd +#define vi 0x12345678 + +struct T1 v1 = { -1, vc, 1, vs, vi }; +struct T2 v2 = { -1, 0, vc, 1, vs, vi }; + +void f (void) { + if (0 + || v1.p != v2.p + || v1.a != v2.a + || v1.z != v2.z + ) + __builtin_abort (); +} + +int main () { + f (); + return 0; +}