public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/users/aoliva/heads/testme)] support split loads of rhs too
@ 2020-09-23 23:24 Alexandre Oliva
  0 siblings, 0 replies; 2+ messages in thread
From: Alexandre Oliva @ 2020-09-23 23:24 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:14b296e02473900af08e388408f382283303bdef

commit 14b296e02473900af08e388408f382283303bdef
Author: Alexandre Oliva <oliva@adacore.com>
Date:   Mon Sep 21 21:20:25 2020 -0300

    support split loads of rhs too

Diff:
---
 gcc/fold-const.c                     | 319 ++++++++++++++++++++++++++++++-----
 gcc/testsuite/gcc.dg/field-merge-4.c |  38 +++++
 2 files changed, 318 insertions(+), 39 deletions(-)

diff --git a/gcc/fold-const.c b/gcc/fold-const.c
index 961e8954128..db082b9b7e0 100644
--- a/gcc/fold-const.c
+++ b/gcc/fold-const.c
@@ -6217,6 +6217,90 @@ compute_split_boundary_from_align (HOST_WIDE_INT align,
   return boundary;
 }
 
+/* Initialize ln_arg[0] and ln_arg[1] to a pair of newly-created (at
+   LOC) loads from INNER (from ORIG_INNER), of modes MODE and MODE2,
+   respectively, starting at BIT_POS, using reversed endianness if
+   REVERSEP.  Also initialize BITPOS (the starting position of each
+   part into INNER), BITSIZ (the bit count starting at BITPOS),
+   TOSHIFT[1] (the amount by which the part and its mask are to be
+   shifted right to bring its least-significant bit to bit zero) and
+   SHIFTED (the amount by which the part, by separate loading, has
+   already been shifted right, but that the mask needs shifting to
+   match).  */
+static inline void
+build_split_load (tree /* out */ ln_arg[2],
+		  HOST_WIDE_INT /* out */ bitpos[2],
+		  HOST_WIDE_INT /* out */ bitsiz[2],
+		  HOST_WIDE_INT /* in[0] out[0..1] */ toshift[2],
+		  HOST_WIDE_INT /* out */ shifted[2],
+		  location_t loc, tree inner, tree orig_inner,
+		  scalar_int_mode mode, scalar_int_mode mode2,
+		  HOST_WIDE_INT bit_pos, bool reversep)
+{
+  bitsiz[0] = GET_MODE_BITSIZE (mode);
+  bitsiz[1] = GET_MODE_BITSIZE (mode2);
+
+  for (int i = 0; i < 2; i++)
+    {
+      tree type = lang_hooks.types.type_for_size (bitsiz[i], 1);
+      bitpos[i] = bit_pos;
+      ln_arg[i] = make_bit_field_ref (loc, inner, orig_inner,
+				      type, bitsiz[i],
+				      bit_pos, 1, reversep);
+      bit_pos += bitsiz[i];
+    }
+
+  toshift[1] = toshift[0];
+  if (reversep ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
+    {
+      shifted[0] = bitsiz[1];
+      shifted[1] = 0;
+      toshift[0] = 0;
+    }
+  else
+    {
+      shifted[1] = bitsiz[0];
+      shifted[0] = 0;
+      toshift[1] = 0;
+    }
+}
+
+/* Make arrangements to split at bit BOUNDARY a single loaded word
+   (with REVERSEP bit order) LN_ARG[0], to be shifted right by
+   TOSHIFT[0] to bring the field of interest to the least-significant
+   bit.  The expectation is that the same loaded word will be
+   propagated from part 0 to part 1, with just different shifting and
+   masking to extract both parts.  MASK is not expected to do more
+   than masking out the bits that belong to the other part.  See
+   build_split_load for more information on the other fields.  */
+static inline void
+reuse_split_load (tree /* in[0] out[1] */ ln_arg[2],
+		  HOST_WIDE_INT /* in[0] out[1] */ bitpos[2],
+		  HOST_WIDE_INT /* in[0] out[1] */ bitsiz[2],
+		  HOST_WIDE_INT /* in[0] out[0..1] */ toshift[2],
+		  HOST_WIDE_INT /* out */ shifted[2],
+		  tree /* out */ mask[2],
+		  HOST_WIDE_INT boundary, bool reversep)
+{
+  ln_arg[1] = ln_arg[0];
+  bitpos[1] = bitpos[0];
+  bitsiz[1] = bitsiz[0];
+  shifted[1] = shifted[0] = 0;
+
+  tree basemask = build_int_cst_type (TREE_TYPE (ln_arg[0]), -1);
+  mask[1] = const_binop (LSHIFT_EXPR, basemask, bitsize_int (boundary));
+  mask[0] = const_binop (BIT_XOR_EXPR, basemask, mask[1]);
+
+  if (reversep ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
+    {
+      toshift[1] = toshift[0];
+      toshift[0] = bitpos[0] + bitsiz[0] - boundary;
+      std::swap (mask[0], mask[1]);
+    }
+  else
+    toshift[1] = boundary - bitpos[1];
+}
+
 /* Find ways of folding logical expressions of LHS and RHS:
    Try to merge two comparisons to the same innermost item.
    Look for range tests like "ch >= '0' && ch <= '9'".
@@ -6616,6 +6700,11 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
   if (l_split_load)
     lnbitsize += GET_MODE_BITSIZE (lnmode2);
   lntype = lang_hooks.types.type_for_size (lnbitsize, 1);
+  if (!lntype)
+    {
+      gcc_checking_assert (l_split_load);
+      lntype = build_nonstandard_integer_type (lnbitsize, 1);
+    }
   xll_bitpos = ll_bitpos - lnbitpos, xrl_bitpos = rl_bitpos - lnbitpos;
 
   if (ll_reversep ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
@@ -6669,20 +6758,52 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
 	  /* Make sure the two fields on the right
 	     correspond to the left without being swapped.  */
 	  || ll_bitpos - rl_bitpos != lr_bitpos - rr_bitpos
-	  || lnbitpos < 0
-	  || l_split_load)
+	  || lnbitpos < 0)
 	return 0;
 
+      bool r_split_load;
+      scalar_int_mode rnmode2;
+
       first_bit = MIN (lr_bitpos, rr_bitpos);
       end_bit = MAX (lr_bitpos + lr_bitsize, rr_bitpos + rr_bitsize);
       if (!get_best_mode (end_bit - first_bit, first_bit, 0, 0,
 			  TYPE_ALIGN (TREE_TYPE (lr_inner)), BITS_PER_WORD,
 			  volatilep, &rnmode))
-	return 0;
+	{
+	  /* Consider the possibility of recombining loads if any of the
+	     fields straddles across an alignment boundary, so that either
+	     part can be loaded along with the other field.  */
+	  HOST_WIDE_INT align = TYPE_ALIGN (TREE_TYPE (lr_inner));
+	  HOST_WIDE_INT boundary = compute_split_boundary_from_align
+	    (align, lr_bitpos, lr_bitsize, rr_bitpos, rr_bitsize);
+
+	  if (boundary < 0
+	      /* If we're to split both, make sure the split point is
+		 the same.  */
+	      || (l_split_load
+		  && (boundary - lr_bitpos
+		      != (lnbitpos + GET_MODE_BITSIZE (lnmode)) - ll_bitpos))
+	      || !get_best_mode (boundary - first_bit, first_bit, 0, 0,
+				 align, BITS_PER_WORD, volatilep, &rnmode)
+	      || !get_best_mode (end_bit - boundary, boundary, 0, 0,
+				 align, BITS_PER_WORD, volatilep, &rnmode2))
+	    return 0;
+
+	  r_split_load = true;
+	}
+      else
+	r_split_load = false;
 
       rnbitsize = GET_MODE_BITSIZE (rnmode);
       rnbitpos = first_bit & ~ (rnbitsize - 1);
+      if (r_split_load)
+	rnbitsize += GET_MODE_BITSIZE (rnmode2);
       rntype = lang_hooks.types.type_for_size (rnbitsize, 1);
+      if (!rntype)
+	{
+	  gcc_checking_assert (r_split_load);
+	  rntype = build_nonstandard_integer_type (rnbitsize, 1);
+	}
       xlr_bitpos = lr_bitpos - rnbitpos, xrr_bitpos = rr_bitpos - rnbitpos;
 
       if (lr_reversep ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
@@ -6702,7 +6823,7 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
       lr_mask = const_binop (BIT_IOR_EXPR, lr_mask, rr_mask);
 
       bool report
-	= (l_split_load
+	= (l_split_load || r_split_load
 	   || (!(lnbitsize == rnbitsize
 		 && xll_bitpos == xlr_bitpos
 		 && lnbitpos >= 0
@@ -6717,56 +6838,176 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
 		    && rr_bitpos >= 0)));
 
       tree orig_lhs = lhs, orig_rhs = rhs;
-      lhs = make_bit_field_ref (loc, ll_inner, ll_arg,
-				lntype, lnbitsize, lnbitpos,
-				ll_unsignedp || rl_unsignedp, ll_reversep);
-      rhs = make_bit_field_ref (loc, lr_inner, lr_arg,
-				rntype, rnbitsize, rnbitpos,
-				lr_unsignedp || rr_unsignedp, lr_reversep);
 
-      HOST_WIDE_INT shift = (MIN (xll_bitpos, xrl_bitpos)
-			     - MIN (xlr_bitpos, xrr_bitpos));
+      int parts = 1;
+      tree ld_arg[2][2];
+      HOST_WIDE_INT bitpos[2][2];
+      HOST_WIDE_INT bitsiz[2][2];
+      HOST_WIDE_INT shifted[2][2];
+      HOST_WIDE_INT toshift[2][2];
+      tree xmask[2][2] = {};
+
+      /* Consider we're comparing two non-contiguous fields of packed
+	 structs, both aligned at 32-bit boundaries:
+
+	 ll_arg: an 8-bit field at offset 0
+	 lr_arg: a 16-bit field at offset 2
+
+	 rl_arg: an 8-bit field at offset 1
+	 rr_arg: a 16-bit field at offset 3
+
+	 We'll have r_split_load, because rr_arg straddles across an
+	 alignment boundary.
+
+	 We'll want to have:
+
+	 bitpos  = { {  0,  0 }, {  0, 32 } }
+	 bitsiz  = { { 32, 32 }, { 32,  8 } }
+
+	 And, for little-endian:
+
+	 shifted = { {  0,  0 }, {  0, 32 } }
+	 toshift = { {  0, 24 }, {  0,  0 } }
+
+	 Or, for big-endian:
 
-      if (shift > 0)
+	 shifted = { {  0,  0 }, {  8,  0 } }
+	 toshift = { {  8,  0 }, {  0,  0 } }
+      */
+
+      toshift[0][0] = MIN (xll_bitpos, xrl_bitpos);
+      shifted[0][0] = 0;
+
+      if (!l_split_load)
 	{
-	  tree shiftsz = bitsize_int (shift);
-	  lhs = fold_build2_loc (loc, RSHIFT_EXPR, lntype,
-				 lhs, shiftsz);
-	  ll_mask = const_binop (RSHIFT_EXPR, ll_mask, shiftsz);
+	  bitpos[0][0] = lnbitpos;
+	  bitsiz[0][0] = lnbitsize;
+	  ld_arg[0][0] = make_bit_field_ref (loc, ll_inner, ll_arg,
+					     lntype, lnbitsize, lnbitpos,
+					     ll_unsignedp || rl_unsignedp,
+					     ll_reversep);
 	}
-      else if (shift < 0)
+
+      toshift[1][0] = MIN (xlr_bitpos, xrr_bitpos);
+      shifted[1][0] = 0;
+
+      if (!r_split_load)
 	{
-	  tree shiftsz = bitsize_int (-shift);
-	  rhs = fold_build2_loc (loc, RSHIFT_EXPR, rntype,
-				 rhs, shiftsz);
-	  lr_mask = const_binop (RSHIFT_EXPR, lr_mask, shiftsz);
+	  bitpos[1][0] = rnbitpos;
+	  bitsiz[1][0] = rnbitsize;
+	  ld_arg[1][0] = make_bit_field_ref (loc, lr_inner, lr_arg,
+					     rntype, rnbitsize, rnbitpos,
+					     lr_unsignedp || rr_unsignedp,
+					     lr_reversep);
 	}
 
-      /* Convert to the smaller type before masking out unwanted bits.  */
-      tree type = lntype;
-      if (lntype != rntype)
+      if (l_split_load || r_split_load)
 	{
-	  if (lnbitsize > rnbitsize)
+	  parts = 2;
+
+	  if (l_split_load)
+	    build_split_load (ld_arg[0], bitpos[0], bitsiz[0], toshift[0],
+			      shifted[0], loc, ll_inner, ll_arg,
+			      lnmode, lnmode2, lnbitpos, ll_reversep);
+	  else
+	    reuse_split_load (ld_arg[0], bitpos[0], bitsiz[0], toshift[0],
+			      shifted[0], xmask[0],
+			      rnbitpos + GET_MODE_BITSIZE (rnmode)
+			      - lr_bitpos + ll_bitpos, ll_reversep);
+
+	  if (r_split_load)
+	    build_split_load (ld_arg[1], bitpos[1], bitsiz[1], toshift[1],
+			      shifted[1], loc, lr_inner, lr_arg,
+			      rnmode, rnmode2, rnbitpos, lr_reversep);
+	  else
+	    reuse_split_load (ld_arg[1], bitpos[1], bitsiz[1], toshift[1],
+			      shifted[1], xmask[1],
+			      lnbitpos + GET_MODE_BITSIZE (lnmode)
+			      - ll_bitpos + lr_bitpos, lr_reversep);
+	}
+
+      tree cmp[2];
+
+      for (int i = 0; i < parts; i++)
+	{
+	  tree op[2] = { ld_arg[0][i], ld_arg[1][i] };
+	  tree mask[2] = { ll_mask, lr_mask };
+
+	  for (int j = 0; j < 2; j++)
 	    {
-	      type = rntype;
-	      lhs = fold_convert_loc (loc, type, lhs);
-	      ll_mask = fold_convert_loc (loc, type, ll_mask);
+	      /* Mask out the bits belonging to the other part.  */
+	      if (xmask[j][i])
+		mask[j] = const_binop (BIT_AND_EXPR, mask[j], xmask[j][i]);
+
+	      if (shifted[j][i])
+		{
+		  tree shiftsz = bitsize_int (shifted[j][i]);
+		  mask[j] = const_binop (RSHIFT_EXPR, mask[j], shiftsz);
+		}
+	      mask[j] = fold_convert_loc (loc, TREE_TYPE (op[j]), mask[j]);
 	    }
-	  else if (lnbitsize < rnbitsize)
+
+	  HOST_WIDE_INT shift = (toshift[0][i] - toshift[1][i]);
+
+	  if (shift)
 	    {
-	      type = lntype;
-	      rhs = fold_convert_loc (loc, type, rhs);
-	      lr_mask = fold_convert_loc (loc, type, lr_mask);
+	      int j;
+	      if (shift > 0)
+		j = 0;
+	      else
+		{
+		  j = 1;
+		  shift = -shift;
+		}
+
+	      tree shiftsz = bitsize_int (shift);
+	      op[j] = fold_build2_loc (loc, RSHIFT_EXPR, TREE_TYPE (op[j]),
+				       op[j], shiftsz);
+	      mask[j] = const_binop (RSHIFT_EXPR, mask[j], shiftsz);
 	    }
-	}
 
-      if (! integer_all_onesp (ll_mask))
-	lhs = build2_loc (loc, BIT_AND_EXPR, type, lhs, ll_mask);
+	  /* Convert to the smaller type before masking out unwanted
+	     bits.  */
+	  tree type = TREE_TYPE (op[0]);
+	  if (type != TREE_TYPE (op[1]))
+	    {
+	      int j = (TYPE_PRECISION (type)
+		       < TYPE_PRECISION (TREE_TYPE (op[1])));
+	      if (!j)
+		type = TREE_TYPE (op[1]);
+	      op[j] = fold_convert_loc (loc, type, op[j]);
+	      mask[j] = fold_convert_loc (loc, type, mask[j]);
+	    }
+
+	  for (int j = 0; j < 2; j++)
+	    if (! integer_all_onesp (mask[j]))
+	      op[j] = build2_loc (loc, BIT_AND_EXPR, type,
+				  op[j], mask[j]);
 
-      if (! integer_all_onesp (lr_mask))
-	rhs = build2_loc (loc, BIT_AND_EXPR, type, rhs, lr_mask);
+	  cmp[i] = build2_loc (loc, wanted_code, truth_type, op[0], op[1]);
+	}
 
-      result = build2_loc (loc, wanted_code, truth_type, lhs, rhs);
+      if (parts == 1)
+	result = cmp[0];
+      else if (!separatep
+	       || ((!l_split_load
+		    || (ll_bitpos < bitpos[0][1]
+			&& ll_bitpos + ll_bitsize > bitpos[0][1]))
+		   && (!r_split_load
+		       || (lr_bitpos < bitpos[1][1]
+			   && lr_bitpos + lr_bitsize > bitpos[1][1]))))
+	result = build2_loc (loc, orig_code, truth_type, cmp[0], cmp[1]);
+      else if ((l_split_load && ll_bitpos >= bitpos[0][1])
+	       || (r_split_load && lr_bitpos >= bitpos[1][1]))
+	{
+	  result = cmp[1];
+	  *separatep = cmp[0];
+	}
+      else
+	{
+	  result = cmp[0];
+	  *separatep = cmp[1];
+	}
 
       if (report)
 	inform (loc, "merged nc extra %qE and %qE into %qE",
diff --git a/gcc/testsuite/gcc.dg/field-merge-4.c b/gcc/testsuite/gcc.dg/field-merge-4.c
new file mode 100644
index 00000000000..dea4f9c2914
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/field-merge-4.c
@@ -0,0 +1,38 @@
+/* { dg-do run } */
+/* { dg-options "-O" } */
+
+struct T1 {
+  unsigned char p;
+  unsigned char qn;
+  unsigned short a;
+  unsigned int z;
+} __attribute__((__packed__, __aligned__(4)));
+
+struct T2 {
+  unsigned char rn;
+  unsigned char p;
+  unsigned char qn;
+  unsigned short a;
+  unsigned int z;
+} __attribute__((__packed__, __aligned__(4)));
+
+#define vc 0xaa
+#define vs 0xccdd
+#define vi 0x12345678
+
+struct T1 v1 = { vc, 1, vs, vi };
+struct T2 v2 = { 0, vc, 1, vs, vi };
+
+void f (void) {
+  if (0
+      || v1.p != v2.p
+      || v1.a != v2.a
+      || v1.z != v2.z
+      )
+    __builtin_abort ();
+}
+
+int main () {
+  f ();
+  return 0;
+}


^ permalink raw reply	[flat|nested] 2+ messages in thread

* [gcc(refs/users/aoliva/heads/testme)] support split loads of rhs too
@ 2020-09-24 14:43 Alexandre Oliva
  0 siblings, 0 replies; 2+ messages in thread
From: Alexandre Oliva @ 2020-09-24 14:43 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:4a00e09c155303faf76a3d21f9ab89e0540d9b23

commit 4a00e09c155303faf76a3d21f9ab89e0540d9b23
Author: Alexandre Oliva <oliva@adacore.com>
Date:   Mon Sep 21 21:20:25 2020 -0300

    support split loads of rhs too

Diff:
---
 gcc/fold-const.c                     | 324 ++++++++++++++++++++++++++++++-----
 gcc/testsuite/gcc.dg/field-merge-4.c |  40 +++++
 2 files changed, 325 insertions(+), 39 deletions(-)

diff --git a/gcc/fold-const.c b/gcc/fold-const.c
index 961e8954128..817d4f9010d 100644
--- a/gcc/fold-const.c
+++ b/gcc/fold-const.c
@@ -6217,6 +6217,95 @@ compute_split_boundary_from_align (HOST_WIDE_INT align,
   return boundary;
 }
 
+/* Initialize ln_arg[0] and ln_arg[1] to a pair of newly-created (at
+   LOC) loads from INNER (from ORIG_INNER), of modes MODE and MODE2,
+   respectively, starting at BIT_POS, using reversed endianness if
+   REVERSEP.  Also initialize BITPOS (the starting position of each
+   part into INNER), BITSIZ (the bit count starting at BITPOS),
+   TOSHIFT[1] (the amount by which the part and its mask are to be
+   shifted right to bring its least-significant bit to bit zero) and
+   SHIFTED (the amount by which the part, by separate loading, has
+   already been shifted right, but that the mask needs shifting to
+   match).  */
+static inline void
+build_split_load (tree /* out */ ln_arg[2],
+		  HOST_WIDE_INT /* out */ bitpos[2],
+		  HOST_WIDE_INT /* out */ bitsiz[2],
+		  HOST_WIDE_INT /* in[0] out[0..1] */ toshift[2],
+		  HOST_WIDE_INT /* out */ shifted[2],
+		  location_t loc, tree inner, tree orig_inner,
+		  scalar_int_mode mode, scalar_int_mode mode2,
+		  HOST_WIDE_INT bit_pos, bool reversep)
+{
+  bitsiz[0] = GET_MODE_BITSIZE (mode);
+  bitsiz[1] = GET_MODE_BITSIZE (mode2);
+
+  for (int i = 0; i < 2; i++)
+    {
+      tree type = lang_hooks.types.type_for_size (bitsiz[i], 1);
+      bitpos[i] = bit_pos;
+      ln_arg[i] = make_bit_field_ref (loc, inner, orig_inner,
+				      type, bitsiz[i],
+				      bit_pos, 1, reversep);
+      bit_pos += bitsiz[i];
+    }
+
+  toshift[1] = toshift[0];
+  if (reversep ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
+    {
+      shifted[0] = bitsiz[1];
+      shifted[1] = 0;
+      toshift[0] = 0;
+    }
+  else
+    {
+      shifted[1] = bitsiz[0];
+      shifted[0] = 0;
+      toshift[1] = 0;
+    }
+}
+
+/* Make arrangements to split at bit BOUNDARY a single loaded word
+   (with REVERSEP bit order) LN_ARG[0], to be shifted right by
+   TOSHIFT[0] to bring the field of interest to the least-significant
+   bit.  The expectation is that the same loaded word will be
+   propagated from part 0 to part 1, with just different shifting and
+   masking to extract both parts.  MASK is not expected to do more
+   than masking out the bits that belong to the other part.  See
+   build_split_load for more information on the other fields.  */
+static inline void
+reuse_split_load (tree /* in[0] out[1] */ ln_arg[2],
+		  HOST_WIDE_INT /* in[0] out[1] */ bitpos[2],
+		  HOST_WIDE_INT /* in[0] out[1] */ bitsiz[2],
+		  HOST_WIDE_INT /* in[0] out[0..1] */ toshift[2],
+		  HOST_WIDE_INT /* out */ shifted[2],
+		  tree /* out */ mask[2],
+		  HOST_WIDE_INT boundary, bool reversep)
+{
+  ln_arg[1] = ln_arg[0];
+  bitpos[1] = bitpos[0];
+  bitsiz[1] = bitsiz[0];
+  shifted[1] = shifted[0] = 0;
+
+  tree basemask = build_int_cst_type (TREE_TYPE (ln_arg[0]), -1);
+
+  if (reversep ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
+    {
+      toshift[1] = toshift[0];
+      toshift[0] = bitpos[0] + bitsiz[0] - boundary;
+      mask[0] = const_binop (LSHIFT_EXPR, basemask,
+			     bitsize_int (toshift[0]));
+      mask[1] = const_binop (BIT_XOR_EXPR, basemask, mask[0]);
+    }
+  else
+    {
+      toshift[1] = boundary - bitpos[1];
+      mask[1] = const_binop (LSHIFT_EXPR, basemask,
+			     bitsize_int (toshift[1]));
+      mask[0] = const_binop (BIT_XOR_EXPR, basemask, mask[1]);
+    }
+}
+
 /* Find ways of folding logical expressions of LHS and RHS:
    Try to merge two comparisons to the same innermost item.
    Look for range tests like "ch >= '0' && ch <= '9'".
@@ -6616,6 +6705,11 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
   if (l_split_load)
     lnbitsize += GET_MODE_BITSIZE (lnmode2);
   lntype = lang_hooks.types.type_for_size (lnbitsize, 1);
+  if (!lntype)
+    {
+      gcc_checking_assert (l_split_load);
+      lntype = build_nonstandard_integer_type (lnbitsize, 1);
+    }
   xll_bitpos = ll_bitpos - lnbitpos, xrl_bitpos = rl_bitpos - lnbitpos;
 
   if (ll_reversep ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
@@ -6669,20 +6763,52 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
 	  /* Make sure the two fields on the right
 	     correspond to the left without being swapped.  */
 	  || ll_bitpos - rl_bitpos != lr_bitpos - rr_bitpos
-	  || lnbitpos < 0
-	  || l_split_load)
+	  || lnbitpos < 0)
 	return 0;
 
+      bool r_split_load;
+      scalar_int_mode rnmode2;
+
       first_bit = MIN (lr_bitpos, rr_bitpos);
       end_bit = MAX (lr_bitpos + lr_bitsize, rr_bitpos + rr_bitsize);
       if (!get_best_mode (end_bit - first_bit, first_bit, 0, 0,
 			  TYPE_ALIGN (TREE_TYPE (lr_inner)), BITS_PER_WORD,
 			  volatilep, &rnmode))
-	return 0;
+	{
+	  /* Consider the possibility of recombining loads if any of the
+	     fields straddles across an alignment boundary, so that either
+	     part can be loaded along with the other field.  */
+	  HOST_WIDE_INT align = TYPE_ALIGN (TREE_TYPE (lr_inner));
+	  HOST_WIDE_INT boundary = compute_split_boundary_from_align
+	    (align, lr_bitpos, lr_bitsize, rr_bitpos, rr_bitsize);
+
+	  if (boundary < 0
+	      /* If we're to split both, make sure the split point is
+		 the same.  */
+	      || (l_split_load
+		  && (boundary - lr_bitpos
+		      != (lnbitpos + GET_MODE_BITSIZE (lnmode)) - ll_bitpos))
+	      || !get_best_mode (boundary - first_bit, first_bit, 0, 0,
+				 align, BITS_PER_WORD, volatilep, &rnmode)
+	      || !get_best_mode (end_bit - boundary, boundary, 0, 0,
+				 align, BITS_PER_WORD, volatilep, &rnmode2))
+	    return 0;
+
+	  r_split_load = true;
+	}
+      else
+	r_split_load = false;
 
       rnbitsize = GET_MODE_BITSIZE (rnmode);
       rnbitpos = first_bit & ~ (rnbitsize - 1);
+      if (r_split_load)
+	rnbitsize += GET_MODE_BITSIZE (rnmode2);
       rntype = lang_hooks.types.type_for_size (rnbitsize, 1);
+      if (!rntype)
+	{
+	  gcc_checking_assert (r_split_load);
+	  rntype = build_nonstandard_integer_type (rnbitsize, 1);
+	}
       xlr_bitpos = lr_bitpos - rnbitpos, xrr_bitpos = rr_bitpos - rnbitpos;
 
       if (lr_reversep ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
@@ -6702,7 +6828,7 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
       lr_mask = const_binop (BIT_IOR_EXPR, lr_mask, rr_mask);
 
       bool report
-	= (l_split_load
+	= (l_split_load || r_split_load
 	   || (!(lnbitsize == rnbitsize
 		 && xll_bitpos == xlr_bitpos
 		 && lnbitpos >= 0
@@ -6717,56 +6843,176 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
 		    && rr_bitpos >= 0)));
 
       tree orig_lhs = lhs, orig_rhs = rhs;
-      lhs = make_bit_field_ref (loc, ll_inner, ll_arg,
-				lntype, lnbitsize, lnbitpos,
-				ll_unsignedp || rl_unsignedp, ll_reversep);
-      rhs = make_bit_field_ref (loc, lr_inner, lr_arg,
-				rntype, rnbitsize, rnbitpos,
-				lr_unsignedp || rr_unsignedp, lr_reversep);
 
-      HOST_WIDE_INT shift = (MIN (xll_bitpos, xrl_bitpos)
-			     - MIN (xlr_bitpos, xrr_bitpos));
+      int parts = 1;
+      tree ld_arg[2][2];
+      HOST_WIDE_INT bitpos[2][2];
+      HOST_WIDE_INT bitsiz[2][2];
+      HOST_WIDE_INT shifted[2][2];
+      HOST_WIDE_INT toshift[2][2];
+      tree xmask[2][2] = {};
+
+      /* Consider we're comparing two non-contiguous fields of packed
+	 structs, both aligned at 32-bit boundaries:
+
+	 ll_arg: an 8-bit field at offset 0
+	 lr_arg: a 16-bit field at offset 2
+
+	 rl_arg: an 8-bit field at offset 1
+	 rr_arg: a 16-bit field at offset 3
+
+	 We'll have r_split_load, because rr_arg straddles across an
+	 alignment boundary.
+
+	 We'll want to have:
 
-      if (shift > 0)
+	 bitpos  = { {  0,  0 }, {  0, 32 } }
+	 bitsiz  = { { 32, 32 }, { 32,  8 } }
+
+	 And, for little-endian:
+
+	 shifted = { {  0,  0 }, {  0, 32 } }
+	 toshift = { {  0, 24 }, {  0,  0 } }
+
+	 Or, for big-endian:
+
+	 shifted = { {  0,  0 }, {  8,  0 } }
+	 toshift = { {  8,  0 }, {  0,  0 } }
+      */
+
+      toshift[0][0] = MIN (xll_bitpos, xrl_bitpos);
+      shifted[0][0] = 0;
+
+      if (!l_split_load)
 	{
-	  tree shiftsz = bitsize_int (shift);
-	  lhs = fold_build2_loc (loc, RSHIFT_EXPR, lntype,
-				 lhs, shiftsz);
-	  ll_mask = const_binop (RSHIFT_EXPR, ll_mask, shiftsz);
+	  bitpos[0][0] = lnbitpos;
+	  bitsiz[0][0] = lnbitsize;
+	  ld_arg[0][0] = make_bit_field_ref (loc, ll_inner, ll_arg,
+					     lntype, lnbitsize, lnbitpos,
+					     ll_unsignedp || rl_unsignedp,
+					     ll_reversep);
 	}
-      else if (shift < 0)
+
+      toshift[1][0] = MIN (xlr_bitpos, xrr_bitpos);
+      shifted[1][0] = 0;
+
+      if (!r_split_load)
 	{
-	  tree shiftsz = bitsize_int (-shift);
-	  rhs = fold_build2_loc (loc, RSHIFT_EXPR, rntype,
-				 rhs, shiftsz);
-	  lr_mask = const_binop (RSHIFT_EXPR, lr_mask, shiftsz);
+	  bitpos[1][0] = rnbitpos;
+	  bitsiz[1][0] = rnbitsize;
+	  ld_arg[1][0] = make_bit_field_ref (loc, lr_inner, lr_arg,
+					     rntype, rnbitsize, rnbitpos,
+					     lr_unsignedp || rr_unsignedp,
+					     lr_reversep);
 	}
 
-      /* Convert to the smaller type before masking out unwanted bits.  */
-      tree type = lntype;
-      if (lntype != rntype)
+      if (l_split_load || r_split_load)
 	{
-	  if (lnbitsize > rnbitsize)
+	  parts = 2;
+
+	  if (l_split_load)
+	    build_split_load (ld_arg[0], bitpos[0], bitsiz[0], toshift[0],
+			      shifted[0], loc, ll_inner, ll_arg,
+			      lnmode, lnmode2, lnbitpos, ll_reversep);
+	  else
+	    reuse_split_load (ld_arg[0], bitpos[0], bitsiz[0], toshift[0],
+			      shifted[0], xmask[0],
+			      rnbitpos + GET_MODE_BITSIZE (rnmode)
+			      - lr_bitpos + ll_bitpos, ll_reversep);
+
+	  if (r_split_load)
+	    build_split_load (ld_arg[1], bitpos[1], bitsiz[1], toshift[1],
+			      shifted[1], loc, lr_inner, lr_arg,
+			      rnmode, rnmode2, rnbitpos, lr_reversep);
+	  else
+	    reuse_split_load (ld_arg[1], bitpos[1], bitsiz[1], toshift[1],
+			      shifted[1], xmask[1],
+			      lnbitpos + GET_MODE_BITSIZE (lnmode)
+			      - ll_bitpos + lr_bitpos, lr_reversep);
+	}
+
+      tree cmp[2];
+
+      for (int i = 0; i < parts; i++)
+	{
+	  tree op[2] = { ld_arg[0][i], ld_arg[1][i] };
+	  tree mask[2] = { ll_mask, lr_mask };
+
+	  for (int j = 0; j < 2; j++)
 	    {
-	      type = rntype;
-	      lhs = fold_convert_loc (loc, type, lhs);
-	      ll_mask = fold_convert_loc (loc, type, ll_mask);
+	      /* Mask out the bits belonging to the other part.  */
+	      if (xmask[j][i])
+		mask[j] = const_binop (BIT_AND_EXPR, mask[j], xmask[j][i]);
+
+	      if (shifted[j][i])
+		{
+		  tree shiftsz = bitsize_int (shifted[j][i]);
+		  mask[j] = const_binop (RSHIFT_EXPR, mask[j], shiftsz);
+		}
+	      mask[j] = fold_convert_loc (loc, TREE_TYPE (op[j]), mask[j]);
 	    }
-	  else if (lnbitsize < rnbitsize)
+
+	  HOST_WIDE_INT shift = (toshift[0][i] - toshift[1][i]);
+
+	  if (shift)
 	    {
-	      type = lntype;
-	      rhs = fold_convert_loc (loc, type, rhs);
-	      lr_mask = fold_convert_loc (loc, type, lr_mask);
+	      int j;
+	      if (shift > 0)
+		j = 0;
+	      else
+		{
+		  j = 1;
+		  shift = -shift;
+		}
+
+	      tree shiftsz = bitsize_int (shift);
+	      op[j] = fold_build2_loc (loc, RSHIFT_EXPR, TREE_TYPE (op[j]),
+				       op[j], shiftsz);
+	      mask[j] = const_binop (RSHIFT_EXPR, mask[j], shiftsz);
 	    }
-	}
 
-      if (! integer_all_onesp (ll_mask))
-	lhs = build2_loc (loc, BIT_AND_EXPR, type, lhs, ll_mask);
+	  /* Convert to the smaller type before masking out unwanted
+	     bits.  */
+	  tree type = TREE_TYPE (op[0]);
+	  if (type != TREE_TYPE (op[1]))
+	    {
+	      int j = (TYPE_PRECISION (type)
+		       < TYPE_PRECISION (TREE_TYPE (op[1])));
+	      if (!j)
+		type = TREE_TYPE (op[1]);
+	      op[j] = fold_convert_loc (loc, type, op[j]);
+	      mask[j] = fold_convert_loc (loc, type, mask[j]);
+	    }
 
-      if (! integer_all_onesp (lr_mask))
-	rhs = build2_loc (loc, BIT_AND_EXPR, type, rhs, lr_mask);
+	  for (int j = 0; j < 2; j++)
+	    if (! integer_all_onesp (mask[j]))
+	      op[j] = build2_loc (loc, BIT_AND_EXPR, type,
+				  op[j], mask[j]);
 
-      result = build2_loc (loc, wanted_code, truth_type, lhs, rhs);
+	  cmp[i] = build2_loc (loc, wanted_code, truth_type, op[0], op[1]);
+	}
+
+      if (parts == 1)
+	result = cmp[0];
+      else if (!separatep
+	       || ((!l_split_load
+		    || (ll_bitpos < bitpos[0][1]
+			&& ll_bitpos + ll_bitsize > bitpos[0][1]))
+		   && (!r_split_load
+		       || (lr_bitpos < bitpos[1][1]
+			   && lr_bitpos + lr_bitsize > bitpos[1][1]))))
+	result = build2_loc (loc, orig_code, truth_type, cmp[0], cmp[1]);
+      else if ((l_split_load && ll_bitpos >= bitpos[0][1])
+	       || (r_split_load && lr_bitpos >= bitpos[1][1]))
+	{
+	  result = cmp[1];
+	  *separatep = cmp[0];
+	}
+      else
+	{
+	  result = cmp[0];
+	  *separatep = cmp[1];
+	}
 
       if (report)
 	inform (loc, "merged nc extra %qE and %qE into %qE",
diff --git a/gcc/testsuite/gcc.dg/field-merge-4.c b/gcc/testsuite/gcc.dg/field-merge-4.c
new file mode 100644
index 00000000000..c629069e52b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/field-merge-4.c
@@ -0,0 +1,40 @@
+/* { dg-do run } */
+/* { dg-options "-O" } */
+
+struct T1 {
+  unsigned int zn;
+  unsigned char p;
+  unsigned char qn;
+  unsigned short a;
+  unsigned int z;
+} __attribute__((__packed__, __aligned__(4)));
+
+struct T2 {
+  unsigned int zn;
+  unsigned char rn;
+  unsigned char p;
+  unsigned char qn;
+  unsigned short a;
+  unsigned int z;
+} __attribute__((__packed__, __aligned__(4)));
+
+#define vc 0xaa
+#define vs 0xccdd
+#define vi 0x12345678
+
+struct T1 v1 = { -1, vc, 1, vs, vi };
+struct T2 v2 = { -1, 0, vc, 1, vs, vi };
+
+void f (void) {
+  if (0
+      || v1.p != v2.p
+      || v1.a != v2.a
+      || v1.z != v2.z
+      )
+    __builtin_abort ();
+}
+
+int main () {
+  f ();
+  return 0;
+}


^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2020-09-24 14:43 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-09-23 23:24 [gcc(refs/users/aoliva/heads/testme)] support split loads of rhs too Alexandre Oliva
2020-09-24 14:43 Alexandre Oliva

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).