public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/users/aoliva/heads/testme)] improvements for fold_truth_andor_1
@ 2020-09-15 16:44 Alexandre Oliva
  0 siblings, 0 replies; 8+ messages in thread
From: Alexandre Oliva @ 2020-09-15 16:44 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:2375252d9c19dff1638efdd27064c2e5402290e2

commit 2375252d9c19dff1638efdd27064c2e5402290e2
Author: Alexandre Oliva <oliva@adacore.com>
Date:   Tue Sep 15 13:32:07 2020 -0300

    improvements for fold_truth_andor_1
    
    Enable combining with the rightmost non-and/or, however deeply-nested.
    
    Handle shift-and-mask.
    
    Handle fields that cross alignment boundaries, when either part can be
    combined.

Diff:
---
 gcc/fold-const.c | 203 ++++++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 178 insertions(+), 25 deletions(-)

diff --git a/gcc/fold-const.c b/gcc/fold-const.c
index 0cc80adf632..e91e7778999 100644
--- a/gcc/fold-const.c
+++ b/gcc/fold-const.c
@@ -4640,6 +4640,7 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
   tree mask, inner, offset;
   tree unsigned_type;
   unsigned int precision;
+  HOST_WIDE_INT shiftrt = 0;
 
   /* All the optimizations using this function assume integer fields.
      There are problems with FP fields since the type_for_size call
@@ -4664,13 +4665,28 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
 	return NULL_TREE;
     }
 
+  if (TREE_CODE (exp) == RSHIFT_EXPR
+      && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST
+      && tree_fits_shwi_p (TREE_OPERAND (exp, 1)))
+    {
+      shiftrt = tree_to_shwi (TREE_OPERAND (exp, 1));
+      if (shiftrt > 0)
+	exp = TREE_OPERAND (exp, 0);
+      else
+	shiftrt = 0;
+    }
+
+  if (TREE_CODE (exp) == NOP_EXPR)
+    exp = TREE_OPERAND (exp, 0);
+
   poly_int64 poly_bitsize, poly_bitpos;
   inner = get_inner_reference (exp, &poly_bitsize, &poly_bitpos, &offset,
 			       pmode, punsignedp, preversep, pvolatilep);
+
   if ((inner == exp && and_mask == 0)
       || !poly_bitsize.is_constant (pbitsize)
       || !poly_bitpos.is_constant (pbitpos)
-      || *pbitsize < 0
+      || *pbitsize < shiftrt
       || offset != 0
       || TREE_CODE (inner) == PLACEHOLDER_EXPR
       /* Reject out-of-bound accesses (PR79731).  */
@@ -4679,6 +4695,12 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
 			       *pbitpos + *pbitsize) < 0))
     return NULL_TREE;
 
+  if (shiftrt)
+    {
+      *pbitpos += shiftrt;
+      *pbitsize -= shiftrt;
+    }
+
   unsigned_type = lang_hooks.types.type_for_size (*pbitsize, 1);
   if (unsigned_type == NULL_TREE)
     return NULL_TREE;
@@ -6157,6 +6179,7 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
      convert EQ_EXPR to NE_EXPR so we need not reject the "wrong"
      comparison for one-bit fields.  */
 
+  enum tree_code orig_code = code;
   enum tree_code wanted_code;
   enum tree_code lcode, rcode;
   tree ll_arg, lr_arg, rl_arg, rr_arg;
@@ -6168,13 +6191,14 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
   int ll_unsignedp, lr_unsignedp, rl_unsignedp, rr_unsignedp;
   int ll_reversep, lr_reversep, rl_reversep, rr_reversep;
   machine_mode ll_mode, lr_mode, rl_mode, rr_mode;
-  scalar_int_mode lnmode, rnmode;
+  scalar_int_mode lnmode, lnmode2, rnmode;
   tree ll_mask, lr_mask, rl_mask, rr_mask;
   tree ll_and_mask, lr_and_mask, rl_and_mask, rr_and_mask;
   tree l_const, r_const;
   tree lntype, rntype, result;
   HOST_WIDE_INT first_bit, end_bit;
   int volatilep;
+  bool resplit_load;
 
   /* Start by getting the comparison codes.  Fail if anything is volatile.
      If one operand is a BIT_AND_EXPR with the constant one, treat it as if
@@ -6202,7 +6226,48 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
 
   if (TREE_CODE_CLASS (lcode) != tcc_comparison
       || TREE_CODE_CLASS (rcode) != tcc_comparison)
-    return 0;
+    {
+      /* Check for the possibility of merging component references.
+	 If any of our operands is another similar operation, recurse
+	 to try to merge individual operands, but avoiding double
+	 recursion: recurse to each leaf of lhs, and from there to
+	 each leaf of rhs, but don't bother recursing into lhs if rhs
+	 is neither a comparison nor a compound expr, nor into rhs if
+	 the lhs leaf isn't a comparison.  In case of no successful
+	 merging, recursion depth is limited to the sum of the depths
+	 of lhs and rhs, and the non-recursing code below will run no
+	 more times than the product of the leaf counts of lhs and
+	 rhs.  If there is a successful merge, we (recursively)
+	 further attempt to fold the result, so recursion depth and
+	 merge attempts are harder to compute.  */
+      if (TREE_CODE (lhs) == code && TREE_TYPE (lhs) == truth_type
+	  && (TREE_CODE_CLASS (rcode) == tcc_comparison
+	      || (TREE_CODE (rhs) == code && TREE_TYPE (rhs) == truth_type)))
+	{
+	  if ((result = fold_truth_andor_1 (loc, code, truth_type,
+					 TREE_OPERAND (lhs, 1), rhs)) != 0)
+	    return fold_build2_loc (loc, code, truth_type,
+				    TREE_OPERAND (lhs, 0), result);
+	  if ((result = fold_truth_andor_1 (loc, code, truth_type,
+					 TREE_OPERAND (lhs, 0), rhs)) != 0)
+	    return fold_build2_loc (loc, code, truth_type,
+				    result, TREE_OPERAND (lhs, 1));
+	}
+      else if (TREE_CODE_CLASS (lcode) == tcc_comparison
+	       && TREE_CODE (rhs) == code && TREE_TYPE (rhs) == truth_type)
+	{
+	  if ((result = fold_truth_andor_1 (loc, code, truth_type, lhs,
+					 TREE_OPERAND (rhs, 0))) != 0)
+	    return fold_build2_loc (loc, code, truth_type,
+				    result, TREE_OPERAND (rhs, 1));
+	  if ((result = fold_truth_andor_1 (loc, code, truth_type, lhs,
+					 TREE_OPERAND (rhs, 1))) != 0)
+	    return fold_build2_loc (loc, code, truth_type,
+				    result, TREE_OPERAND (rhs, 0));
+	}
+
+      return 0;
+    }
 
   ll_arg = TREE_OPERAND (lhs, 0);
   lr_arg = TREE_OPERAND (lhs, 1);
@@ -6357,10 +6422,48 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
   if (!get_best_mode (end_bit - first_bit, first_bit, 0, 0,
 		      TYPE_ALIGN (TREE_TYPE (ll_inner)), BITS_PER_WORD,
 		      volatilep, &lnmode))
-    return 0;
+    {
+      /* Consider the possibility of recombining loads if any of the
+	 fields straddles across an alignment boundary, so that either
+	 part can be loaded along with the other field.  */
+      HOST_WIDE_INT align = TYPE_ALIGN (TREE_TYPE (ll_inner));
+      HOST_WIDE_INT amask = ~(align - 1);
+
+      HOST_WIDE_INT boundary = (end_bit - 1) & amask;
+      /* Make sure we're only crossing one alignment boundary.
+
+	 ??? This won't recombine loads of two adjacent fields that
+	 each crosses a different alignment boundary, so as to load
+	 the middle word only once.  */
+      if (boundary - first_bit > align)
+	return 0;
+
+      HOST_WIDE_INT ll_start_word = ll_bitpos & amask;
+      HOST_WIDE_INT ll_end_word = (ll_bitpos + ll_bitsize - 1) & amask;
+
+      HOST_WIDE_INT rl_start_word = rl_bitpos & amask;
+      HOST_WIDE_INT rl_end_word = (rl_bitpos + rl_bitsize - 1) & amask;
+
+      /* If neither field straddles across an alignment boundary,
+	 we've nothing further ado.  */
+      if (ll_start_word == ll_end_word && rl_start_word == rl_end_word)
+	return 0;
+
+      if (!get_best_mode (boundary - first_bit, first_bit, 0, 0,
+			  align, BITS_PER_WORD, volatilep, &lnmode)
+	  || !get_best_mode (end_bit - boundary, boundary, 0, 0,
+			     align, BITS_PER_WORD, volatilep, &lnmode2))
+	return 0;
+
+      resplit_load = true;
+    }
+  else
+    resplit_load = false;
 
   lnbitsize = GET_MODE_BITSIZE (lnmode);
   lnbitpos = first_bit & ~ (lnbitsize - 1);
+  if (resplit_load)
+    lnbitsize += GET_MODE_BITSIZE (lnmode2);
   lntype = lang_hooks.types.type_for_size (lnbitsize, 1);
   xll_bitpos = ll_bitpos - lnbitpos, xrl_bitpos = rl_bitpos - lnbitpos;
 
@@ -6414,7 +6517,8 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
 	  || ll_reversep != lr_reversep
 	  /* Make sure the two fields on the right
 	     correspond to the left without being swapped.  */
-	  || ll_bitpos - rl_bitpos != lr_bitpos - rr_bitpos)
+	  || ll_bitpos - rl_bitpos != lr_bitpos - rr_bitpos
+	  || resplit_load)
 	return 0;
 
       first_bit = MIN (lr_bitpos, rr_bitpos);
@@ -6555,20 +6659,77 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
   if (lnbitpos < 0)
     return 0;
 
-  /* Construct the expression we will return.  First get the component
-     reference we will make.  Unless the mask is all ones the width of
-     that field, perform the mask operation.  Then compare with the
-     merged constant.  */
-  result = make_bit_field_ref (loc, ll_inner, ll_arg,
-			       lntype, lnbitsize, lnbitpos,
-			       ll_unsignedp || rl_unsignedp, ll_reversep);
+  if (resplit_load)
+    {
+      HOST_WIDE_INT align = TYPE_ALIGN (TREE_TYPE (ll_inner));
+      HOST_WIDE_INT lnlbitsize = GET_MODE_BITSIZE (lnmode);
+      HOST_WIDE_INT lnrbitsize = GET_MODE_BITSIZE (lnmode2);
+      HOST_WIDE_INT midpoint = lnlbitsize;
+      HOST_WIDE_INT lnrbitpos = lnbitpos + midpoint;
+      HOST_WIDE_INT lnlbitpos = lnrbitpos - lnlbitsize;
+      gcc_checking_assert ((lnrbitpos & (align - 1)) == 0);
+      tree lnltype = lang_hooks.types.type_for_size (lnlbitsize, 1);
+      tree lnrtype = lang_hooks.types.type_for_size (lnrbitsize, 1);
+      tree lll_arg = make_bit_field_ref (loc, ll_inner, ll_arg,
+					 lnltype, lnlbitsize, lnlbitpos,
+					 ll_unsignedp || rl_unsignedp,
+					 ll_reversep);
+      tree llr_arg = make_bit_field_ref (loc, ll_inner, ll_arg,
+					 lnrtype, lnrbitsize, lnrbitpos,
+					 ll_unsignedp || rl_unsignedp,
+					 ll_reversep);
+
+      /* ??? Handle ll_reversep and endianness below?  */
+      HOST_WIDE_INT lnlcbitpos = 0;
+      HOST_WIDE_INT lnrcbitpos = lnlbitsize;
+
+      tree mask = const_binop (BIT_IOR_EXPR, ll_mask, rl_mask);
+      tree lll_mask = fold_build3 (BIT_FIELD_REF, lnltype, mask,
+				   bitsize_int (lnlbitsize),
+				   bitsize_int (lnlcbitpos));
+      if (! integer_all_onesp (lll_mask))
+	lll_arg = fold_build2_loc (loc, BIT_AND_EXPR, lnltype,
+				   lll_arg, lll_mask);
+      tree llr_mask = fold_build3 (BIT_FIELD_REF, lnrtype, mask,
+				   bitsize_int (lnrbitsize),
+				   bitsize_int (lnrcbitpos));
+      if (! integer_all_onesp (llr_mask))
+	llr_arg = fold_build2_loc (loc, BIT_AND_EXPR, lnrtype,
+				   llr_arg, llr_mask);
+      tree lnr_const = const_binop (BIT_IOR_EXPR, l_const, r_const);
+      tree lll_const = fold_build3 (BIT_FIELD_REF, lnltype, lnr_const,
+				    bitsize_int (lnlbitsize),
+				    bitsize_int (lnlcbitpos));
+      tree lll_result = fold_build2_loc (loc, wanted_code, truth_type,
+					 lll_arg, lll_const);
+      tree llr_const = fold_build3 (BIT_FIELD_REF, lnrtype, lnr_const,
+				    bitsize_int (lnrbitsize),
+				    bitsize_int (lnrcbitpos));
+      tree llr_result = fold_build2_loc (loc, wanted_code, truth_type,
+					 llr_arg, llr_const);
+      result = build2_loc (loc, orig_code, truth_type,
+			   lll_result, llr_result);
+    }
+  else
+    {
 
-  ll_mask = const_binop (BIT_IOR_EXPR, ll_mask, rl_mask);
-  if (! all_ones_mask_p (ll_mask, lnbitsize))
-    result = build2_loc (loc, BIT_AND_EXPR, lntype, result, ll_mask);
+      /* Construct the expression we will return.  First get the component
+	 reference we will make.  Unless the mask is all ones the width of
+	 that field, perform the mask operation.  Then compare with the
+	 merged constant.  */
+      result = make_bit_field_ref (loc, ll_inner, ll_arg,
+				   lntype, lnbitsize, lnbitpos,
+				   ll_unsignedp || rl_unsignedp, ll_reversep);
+
+      ll_mask = const_binop (BIT_IOR_EXPR, ll_mask, rl_mask);
+      if (! integer_all_onesp (ll_mask))
+	result = build2_loc (loc, BIT_AND_EXPR, lntype, result, ll_mask);
 
-  return build2_loc (loc, wanted_code, truth_type, result,
-		     const_binop (BIT_IOR_EXPR, l_const, r_const));
+      result = build2_loc (loc, wanted_code, truth_type, result,
+			   const_binop (BIT_IOR_EXPR, l_const, r_const));
+    }
+
+  return result;
 }
 \f
 /* T is an integer expression that is being multiplied, divided, or taken a
@@ -9166,14 +9327,6 @@ fold_truth_andor (location_t loc, enum tree_code code, tree type,
 	return fold_build2_loc (loc, code, type, arg0, tem);
     }
 
-  /* Check for the possibility of merging component references.  If our
-     lhs is another similar operation, try to merge its rhs with our
-     rhs.  Then try to merge our lhs and rhs.  */
-  if (TREE_CODE (arg0) == code
-      && (tem = fold_truth_andor_1 (loc, code, type,
-				    TREE_OPERAND (arg0, 1), arg1)) != 0)
-    return fold_build2_loc (loc, code, type, TREE_OPERAND (arg0, 0), tem);
-
   if ((tem = fold_truth_andor_1 (loc, code, type, arg0, arg1)) != 0)
     return tem;


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [gcc(refs/users/aoliva/heads/testme)] improvements for fold_truth_andor_1
@ 2020-09-23 23:23 Alexandre Oliva
  0 siblings, 0 replies; 8+ messages in thread
From: Alexandre Oliva @ 2020-09-23 23:23 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:de4bb02889b888a7a99758ef63f08fbb9d2d57de

commit de4bb02889b888a7a99758ef63f08fbb9d2d57de
Author: Alexandre Oliva <oliva@adacore.com>
Date:   Tue Sep 15 13:32:07 2020 -0300

    improvements for fold_truth_andor_1
    
    Enable combining with the rightmost non-and/or, however deeply-nested.
    
    Handle shift-and-mask.
    
    Handle fields that cross alignment boundaries, when either part can be
    combined.

Diff:
---
 gcc/config/rs6000/t-rs6000           |   4 +
 gcc/fold-const.c                     | 341 ++++++++++++++++++++++++++++++++---
 gcc/testsuite/gcc.dg/field-merge-1.c |  64 +++++++
 gcc/testsuite/gcc.dg/field-merge-2.c |  31 ++++
 4 files changed, 413 insertions(+), 27 deletions(-)

diff --git a/gcc/config/rs6000/t-rs6000 b/gcc/config/rs6000/t-rs6000
index 1ddb5729cb2..516486df9a4 100644
--- a/gcc/config/rs6000/t-rs6000
+++ b/gcc/config/rs6000/t-rs6000
@@ -52,6 +52,10 @@ $(srcdir)/config/rs6000/rs6000-tables.opt: $(srcdir)/config/rs6000/genopt.sh \
 	$(SHELL) $(srcdir)/config/rs6000/genopt.sh $(srcdir)/config/rs6000 > \
 		$(srcdir)/config/rs6000/rs6000-tables.opt
 
+# FRAME_GROWS_DOWNWARD tests flag_sanitize in a way that rules out a
+# test in toplev.c.
+toplev.o-warn = -Wno-error
+
 # The rs6000 backend doesn't cause warnings in these files.
 insn-conditions.o-warn =
 
diff --git a/gcc/fold-const.c b/gcc/fold-const.c
index 0cc80adf632..081c41f7fbe 100644
--- a/gcc/fold-const.c
+++ b/gcc/fold-const.c
@@ -4640,6 +4640,7 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
   tree mask, inner, offset;
   tree unsigned_type;
   unsigned int precision;
+  HOST_WIDE_INT shiftrt = 0;
 
   /* All the optimizations using this function assume integer fields.
      There are problems with FP fields since the type_for_size call
@@ -4664,13 +4665,28 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
 	return NULL_TREE;
     }
 
+  if (TREE_CODE (exp) == RSHIFT_EXPR
+      && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST
+      && tree_fits_shwi_p (TREE_OPERAND (exp, 1)))
+    {
+      shiftrt = tree_to_shwi (TREE_OPERAND (exp, 1));
+      if (shiftrt > 0)
+	exp = TREE_OPERAND (exp, 0);
+      else
+	shiftrt = 0;
+    }
+
+  if (TREE_CODE (exp) == NOP_EXPR)
+    exp = TREE_OPERAND (exp, 0);
+
   poly_int64 poly_bitsize, poly_bitpos;
   inner = get_inner_reference (exp, &poly_bitsize, &poly_bitpos, &offset,
 			       pmode, punsignedp, preversep, pvolatilep);
+
   if ((inner == exp && and_mask == 0)
       || !poly_bitsize.is_constant (pbitsize)
       || !poly_bitpos.is_constant (pbitpos)
-      || *pbitsize < 0
+      || *pbitsize < shiftrt
       || offset != 0
       || TREE_CODE (inner) == PLACEHOLDER_EXPR
       /* Reject out-of-bound accesses (PR79731).  */
@@ -4679,6 +4695,12 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
 			       *pbitpos + *pbitsize) < 0))
     return NULL_TREE;
 
+  if (shiftrt)
+    {
+      *pbitpos += shiftrt;
+      *pbitsize -= shiftrt;
+    }
+
   unsigned_type = lang_hooks.types.type_for_size (*pbitsize, 1);
   if (unsigned_type == NULL_TREE)
     return NULL_TREE;
@@ -6120,6 +6142,50 @@ merge_truthop_with_opposite_arm (location_t loc, tree op, tree cmpop,
   return NULL_TREE;
 }
 
+/* Return the one bitpos within bit extents L or R that is at an
+   ALIGN-bit alignment boundary, or -1 if there is more than one such
+   boundary, if there isn't any, or if there is any such boundary
+   between the extents.  L and R are given by bitpos and bitsize.  If
+   it doesn't return -1, there are two consecutive ALIGN-bit words
+   that contain both extents, and at least one of the extents
+   straddles across the returned alignment boundary.  */
+static inline HOST_WIDE_INT
+compute_split_boundary_from_align (HOST_WIDE_INT align,
+				   HOST_WIDE_INT l_bitpos,
+				   HOST_WIDE_INT l_bitsize,
+				   HOST_WIDE_INT r_bitpos,
+				   HOST_WIDE_INT r_bitsize)
+{
+  HOST_WIDE_INT amask = ~(align - 1);
+
+  HOST_WIDE_INT first_bit = MIN (l_bitpos, r_bitpos);
+  HOST_WIDE_INT end_bit = MAX (l_bitpos + l_bitsize, r_bitpos + r_bitsize);
+
+  HOST_WIDE_INT boundary = (end_bit - 1) & amask;
+
+  /* Make sure we're crossing no more than one alignment boundary.
+
+     ??? We don't have logic to recombine loads of two adjacent
+     fields that each crosses a different alignment boundary, so
+     as to load the middle word only once, if other words can't be
+     otherwise recombined.  */
+  if (boundary - first_bit > align)
+    return -1;
+
+  HOST_WIDE_INT l_start_word = l_bitpos & amask;
+  HOST_WIDE_INT l_end_word = (l_bitpos + l_bitsize - 1) & amask;
+
+  HOST_WIDE_INT r_start_word = r_bitpos & amask;
+  HOST_WIDE_INT r_end_word = (r_bitpos + r_bitsize - 1) & amask;
+
+  /* If neither field straddles across an alignment boundary, it's no
+     use to even try to merge them.  */
+  if (l_start_word == l_end_word && r_start_word == r_end_word)
+    return -1;
+
+  return boundary;
+}
+
 /* Find ways of folding logical expressions of LHS and RHS:
    Try to merge two comparisons to the same innermost item.
    Look for range tests like "ch >= '0' && ch <= '9'".
@@ -6142,11 +6208,19 @@ merge_truthop_with_opposite_arm (location_t loc, tree op, tree cmpop,
    TRUTH_TYPE is the type of the logical operand and LHS and RHS are its
    two operands.
 
+   SEPARATEP should be NULL if LHS and RHS are adjacent in
+   CODE-chained compares, and a non-NULL pointer to NULL_TREE
+   otherwise.  If the "words" accessed by RHS are already accessed by
+   LHS, this won't matter, but if RHS accesses "words" that LHS
+   doesn't, then *SEPARATEP will be set to the compares that should
+   take RHS's place.  By "words" we mean contiguous bits that do not
+   cross a an TYPE_ALIGN boundary of the accessed object's type.
+
    We return the simplified tree or 0 if no optimization is possible.  */
 
 static tree
 fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
-		    tree lhs, tree rhs)
+		    tree lhs, tree rhs, tree *separatep)
 {
   /* If this is the "or" of two comparisons, we can do something if
      the comparisons are NE_EXPR.  If this is the "and", we can do something
@@ -6157,6 +6231,7 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
      convert EQ_EXPR to NE_EXPR so we need not reject the "wrong"
      comparison for one-bit fields.  */
 
+  enum tree_code orig_code = code;
   enum tree_code wanted_code;
   enum tree_code lcode, rcode;
   tree ll_arg, lr_arg, rl_arg, rr_arg;
@@ -6168,13 +6243,16 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
   int ll_unsignedp, lr_unsignedp, rl_unsignedp, rr_unsignedp;
   int ll_reversep, lr_reversep, rl_reversep, rr_reversep;
   machine_mode ll_mode, lr_mode, rl_mode, rr_mode;
-  scalar_int_mode lnmode, rnmode;
+  scalar_int_mode lnmode, lnmode2, rnmode;
   tree ll_mask, lr_mask, rl_mask, rr_mask;
   tree ll_and_mask, lr_and_mask, rl_and_mask, rr_and_mask;
   tree l_const, r_const;
   tree lntype, rntype, result;
   HOST_WIDE_INT first_bit, end_bit;
   int volatilep;
+  bool l_split_load;
+
+  gcc_checking_assert (!separatep || !*separatep);
 
   /* Start by getting the comparison codes.  Fail if anything is volatile.
      If one operand is a BIT_AND_EXPR with the constant one, treat it as if
@@ -6202,7 +6280,116 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
 
   if (TREE_CODE_CLASS (lcode) != tcc_comparison
       || TREE_CODE_CLASS (rcode) != tcc_comparison)
-    return 0;
+    {
+      tree separate = NULL;
+
+      /* Check for the possibility of merging component references.
+	 If any of our operands is another similar operation, recurse
+	 to try to merge individual operands, but avoiding double
+	 recursion: recurse to each leaf of LHS, and from there to
+	 each leaf of RHS, but don't bother recursing into LHS if RHS
+	 is neither a comparison nor a compound expr, nor into RHS if
+	 the LHS leaf isn't a comparison.  In case of no successful
+	 merging, recursion depth is limited to the sum of the depths
+	 of LHS and RHS, and the non-recursing code below will run no
+	 more times than the product of the leaf counts of LHS and
+	 RHS.  If there is a successful merge, we (recursively)
+	 further attempt to fold the result, so recursion depth and
+	 merge attempts are harder to compute.  */
+      if (TREE_CODE (lhs) == code && TREE_TYPE (lhs) == truth_type
+	  && (TREE_CODE_CLASS (rcode) == tcc_comparison
+	      || (TREE_CODE (rhs) == code && TREE_TYPE (rhs) == truth_type)))
+	{
+	  if ((result = fold_truth_andor_1 (loc, code, truth_type,
+					    TREE_OPERAND (lhs, 1), rhs,
+					    separatep
+					    ? separatep
+					    : NULL)) != 0)
+	    {
+	      /* We have combined the latter part of LHS with RHS.  If
+		 they were separate, the recursion already placed any
+		 remains of RHS in *SEPARATEP, otherwise they are all
+		 in RESULT, so we just have to prepend to result the
+		 former part of LHS.  */
+	      result = fold_build2_loc (loc, code, truth_type,
+					TREE_OPERAND (lhs, 0), result);
+	      return result;
+	    }
+	  if ((result = fold_truth_andor_1 (loc, code, truth_type,
+					    TREE_OPERAND (lhs, 0), rhs,
+					    separatep
+					    ? separatep
+					    : &separate)) != 0)
+	    {
+	      /* We have combined the former part of LHS with RHS.  If
+		 they were separate, the recursive call will have
+		 placed remnants of RHS in *SEPARATEP.  If they
+		 wereń't, they will be in SEPARATE instead.  Append
+		 the latter part of LHS to the result, and then any
+		 remnants of RHS that we haven't passed on to the
+		 caller.  */
+	      result = fold_build2_loc (loc, code, truth_type,
+					result, TREE_OPERAND (lhs, 1));
+	      if (separate)
+		result = fold_build2_loc (loc, code, truth_type,
+					  result, separate);
+	      return result;
+	    }
+	}
+      else if (TREE_CODE_CLASS (lcode) == tcc_comparison
+	       && TREE_CODE (rhs) == code && TREE_TYPE (rhs) == truth_type)
+	{
+	  if ((result = fold_truth_andor_1 (loc, code, truth_type,
+					    lhs, TREE_OPERAND (rhs, 0),
+					    separatep
+					    ? &separate
+					    : NULL)) != 0)
+	    {
+	      /* We have combined LHS with the former part of RHS.  If
+		 they were separate, have any remnants of RHS placed
+		 in separate, so that we can combine them with the
+		 latter part of RHS, and then send them back for the
+		 caller to handle.  If they were adjacent, we can just
+		 append the latter part of RHS to the RESULT.  */
+	      if (!separate)
+		separate = TREE_OPERAND (rhs, 1);
+	      else
+		separate = fold_build2_loc (loc, code, truth_type,
+					    separate, TREE_OPERAND (rhs, 1));
+	      if (separatep)
+		*separatep = separate;
+	      else
+		result = fold_build2_loc (loc, code, truth_type,
+					  result, separate);
+	      return result;
+	    }
+	  if ((result = fold_truth_andor_1 (loc, code, truth_type,
+					    lhs, TREE_OPERAND (rhs, 1),
+					    &separate)) != 0)
+	    {
+	      /* We have combined LHS with the latter part of RHS.
+		 They're definitely not adjacent, so we get the
+		 remains of RHS in SEPARATE, and then prepend the
+		 former part of RHS to it.  If LHS and RHS were
+		 already separate to begin with, we leave the remnants
+		 of RHS for the caller to deal with, otherwise we
+		 append them to the RESULT.  */
+	      if (!separate)
+		separate = TREE_OPERAND (rhs, 0);
+	      else
+		separate = fold_build2_loc (loc, code, truth_type,
+					    TREE_OPERAND (rhs, 0), separate);
+	      if (separatep)
+		*separatep = separate;
+	      else
+		result = fold_build2_loc (loc, code, truth_type,
+					  result, separate);
+	      return result;
+	    }
+	}
+
+      return 0;
+    }
 
   ll_arg = TREE_OPERAND (lhs, 0);
   lr_arg = TREE_OPERAND (lhs, 1);
@@ -6357,10 +6544,30 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
   if (!get_best_mode (end_bit - first_bit, first_bit, 0, 0,
 		      TYPE_ALIGN (TREE_TYPE (ll_inner)), BITS_PER_WORD,
 		      volatilep, &lnmode))
-    return 0;
+    {
+      /* Consider the possibility of recombining loads if any of the
+	 fields straddles across an alignment boundary, so that either
+	 part can be loaded along with the other field.  */
+      HOST_WIDE_INT align = TYPE_ALIGN (TREE_TYPE (ll_inner));
+      HOST_WIDE_INT boundary = compute_split_boundary_from_align
+	(align, ll_bitpos, ll_bitsize, rl_bitpos, rl_bitsize);
+
+      if (boundary < 0
+	  || !get_best_mode (boundary - first_bit, first_bit, 0, 0,
+			     align, BITS_PER_WORD, volatilep, &lnmode)
+	  || !get_best_mode (end_bit - boundary, boundary, 0, 0,
+			     align, BITS_PER_WORD, volatilep, &lnmode2))
+	return 0;
+
+      l_split_load = true;
+    }
+  else
+    l_split_load = false;
 
   lnbitsize = GET_MODE_BITSIZE (lnmode);
   lnbitpos = first_bit & ~ (lnbitsize - 1);
+  if (l_split_load)
+    lnbitsize += GET_MODE_BITSIZE (lnmode2);
   lntype = lang_hooks.types.type_for_size (lnbitsize, 1);
   xll_bitpos = ll_bitpos - lnbitpos, xrl_bitpos = rl_bitpos - lnbitpos;
 
@@ -6414,7 +6621,8 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
 	  || ll_reversep != lr_reversep
 	  /* Make sure the two fields on the right
 	     correspond to the left without being swapped.  */
-	  || ll_bitpos - rl_bitpos != lr_bitpos - rr_bitpos)
+	  || ll_bitpos - rl_bitpos != lr_bitpos - rr_bitpos
+	  || l_split_load)
 	return 0;
 
       first_bit = MIN (lr_bitpos, rr_bitpos);
@@ -6555,20 +6763,107 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
   if (lnbitpos < 0)
     return 0;
 
-  /* Construct the expression we will return.  First get the component
-     reference we will make.  Unless the mask is all ones the width of
-     that field, perform the mask operation.  Then compare with the
-     merged constant.  */
-  result = make_bit_field_ref (loc, ll_inner, ll_arg,
-			       lntype, lnbitsize, lnbitpos,
-			       ll_unsignedp || rl_unsignedp, ll_reversep);
+  if (l_split_load)
+    {
+      HOST_WIDE_INT align = TYPE_ALIGN (TREE_TYPE (ll_inner));
+      HOST_WIDE_INT lnlbitsize = GET_MODE_BITSIZE (lnmode);
+      HOST_WIDE_INT lnrbitsize = GET_MODE_BITSIZE (lnmode2);
+      HOST_WIDE_INT midpoint = lnlbitsize;
+      HOST_WIDE_INT lnrbitpos = lnbitpos + midpoint;
+      HOST_WIDE_INT lnlbitpos = lnrbitpos - lnlbitsize;
+      gcc_checking_assert ((lnrbitpos & (align - 1)) == 0);
+      tree lnltype = lang_hooks.types.type_for_size (lnlbitsize, 1);
+      tree lnrtype = lang_hooks.types.type_for_size (lnrbitsize, 1);
+      tree lll_arg = make_bit_field_ref (loc, ll_inner, ll_arg,
+					 lnltype, lnlbitsize, lnlbitpos,
+					 ll_unsignedp || rl_unsignedp,
+					 ll_reversep);
+      tree llr_arg = make_bit_field_ref (loc, ll_inner, ll_arg,
+					 lnrtype, lnrbitsize, lnrbitpos,
+					 ll_unsignedp || rl_unsignedp,
+					 ll_reversep);
+
+      HOST_WIDE_INT lnlcbitpos;
+      HOST_WIDE_INT lnrcbitpos;
+      /* The bit field ref constant folding below already takes care
+	 of default target endianness.  */
+      if (!ll_reversep)
+	{
+	  lnlcbitpos = 0;
+	  lnrcbitpos = lnlbitsize;
+	}
+      else
+	{
+	  lnlcbitpos = lnrbitsize;
+	  lnrcbitpos = 0;
+	}
+
+      tree mask = const_binop (BIT_IOR_EXPR, ll_mask, rl_mask);
+      tree lll_mask = fold_build3 (BIT_FIELD_REF, lnltype, mask,
+				   bitsize_int (lnlbitsize),
+				   bitsize_int (lnlcbitpos));
+      if (! integer_all_onesp (lll_mask))
+	lll_arg = fold_build2_loc (loc, BIT_AND_EXPR, lnltype,
+				   lll_arg, lll_mask);
+      tree llr_mask = fold_build3 (BIT_FIELD_REF, lnrtype, mask,
+				   bitsize_int (lnrbitsize),
+				   bitsize_int (lnrcbitpos));
+      if (! integer_all_onesp (llr_mask))
+	llr_arg = fold_build2_loc (loc, BIT_AND_EXPR, lnrtype,
+				   llr_arg, llr_mask);
+      tree lnr_const = const_binop (BIT_IOR_EXPR, l_const, r_const);
+      tree lll_const = fold_build3 (BIT_FIELD_REF, lnltype, lnr_const,
+				    bitsize_int (lnlbitsize),
+				    bitsize_int (lnlcbitpos));
+      tree lll_result = fold_build2_loc (loc, wanted_code, truth_type,
+					 lll_arg, lll_const);
+      tree llr_const = fold_build3 (BIT_FIELD_REF, lnrtype, lnr_const,
+				    bitsize_int (lnrbitsize),
+				    bitsize_int (lnrcbitpos));
+      tree llr_result = fold_build2_loc (loc, wanted_code, truth_type,
+					 llr_arg, llr_const);
+
+      /* If the accesses were adjacent in the compare chain, or if LHS
+	 already accessed both "words", then we can have both words
+	 accessed in the RESULT, leaving no remains of *RHS for
+	 *SEPARATEP.  Otherwise, figure out which part of the object
+	 was already accessed by LHS, and put it in the RESULT,
+	 returning the bits newly-accessed by RHS in *SEPARATEP.  */
+      if (!separatep
+	  || (ll_bitpos < lnrbitpos && ll_bitpos + ll_bitsize > lnrbitpos))
+	result = build2_loc (loc, orig_code, truth_type,
+			     lll_result, llr_result);
+      else if (ll_bitpos >= lnrbitpos)
+	{
+	  result = llr_result;
+	  *separatep = lll_result;
+	}
+      else
+	{
+	  result = lll_result;
+	  *separatep = llr_result;
+	}
+    }
+  else
+    {
+
+      /* Construct the expression we will return.  First get the component
+	 reference we will make.  Unless the mask is all ones the width of
+	 that field, perform the mask operation.  Then compare with the
+	 merged constant.  */
+      result = make_bit_field_ref (loc, ll_inner, ll_arg,
+				   lntype, lnbitsize, lnbitpos,
+				   ll_unsignedp || rl_unsignedp, ll_reversep);
+
+      ll_mask = const_binop (BIT_IOR_EXPR, ll_mask, rl_mask);
+      if (! integer_all_onesp (ll_mask))
+	result = build2_loc (loc, BIT_AND_EXPR, lntype, result, ll_mask);
 
-  ll_mask = const_binop (BIT_IOR_EXPR, ll_mask, rl_mask);
-  if (! all_ones_mask_p (ll_mask, lnbitsize))
-    result = build2_loc (loc, BIT_AND_EXPR, lntype, result, ll_mask);
+      result = build2_loc (loc, wanted_code, truth_type, result,
+			   const_binop (BIT_IOR_EXPR, l_const, r_const));
+    }
 
-  return build2_loc (loc, wanted_code, truth_type, result,
-		     const_binop (BIT_IOR_EXPR, l_const, r_const));
+  return result;
 }
 \f
 /* T is an integer expression that is being multiplied, divided, or taken a
@@ -9166,15 +9461,7 @@ fold_truth_andor (location_t loc, enum tree_code code, tree type,
 	return fold_build2_loc (loc, code, type, arg0, tem);
     }
 
-  /* Check for the possibility of merging component references.  If our
-     lhs is another similar operation, try to merge its rhs with our
-     rhs.  Then try to merge our lhs and rhs.  */
-  if (TREE_CODE (arg0) == code
-      && (tem = fold_truth_andor_1 (loc, code, type,
-				    TREE_OPERAND (arg0, 1), arg1)) != 0)
-    return fold_build2_loc (loc, code, type, TREE_OPERAND (arg0, 0), tem);
-
-  if ((tem = fold_truth_andor_1 (loc, code, type, arg0, arg1)) != 0)
+  if ((tem = fold_truth_andor_1 (loc, code, type, arg0, arg1, NULL)) != 0)
     return tem;
 
   bool logical_op_non_short_circuit = LOGICAL_OP_NON_SHORT_CIRCUIT;
diff --git a/gcc/testsuite/gcc.dg/field-merge-1.c b/gcc/testsuite/gcc.dg/field-merge-1.c
new file mode 100644
index 00000000000..1818e104437
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/field-merge-1.c
@@ -0,0 +1,64 @@
+/* { dg-do run } */
+/* { dg-options "-O -save-temps -fdump-tree-optimized" } */
+
+/* Check that field loads compared with constants are merged, even if
+   tested out of order, and when fields straddle across alignment
+   boundaries.  */
+
+struct TL {
+  unsigned char p;
+  unsigned int a;
+  unsigned char q;
+  unsigned int b;
+  unsigned char r;
+  unsigned int c;
+  unsigned char s;
+} __attribute__ ((packed, aligned (4), scalar_storage_order ("little-endian")));
+
+struct TB {
+  unsigned char p;
+  unsigned int a;
+  unsigned char q;
+  unsigned int b;
+  unsigned char r;
+  unsigned int c;
+  unsigned char s;
+} __attribute__ ((packed, aligned (4), scalar_storage_order ("big-endian")));
+
+#define vc 0xaa
+#define vi 0x12345678
+
+struct TL vL = { vc, vi, vc, vi, vc, vi, vc };
+struct TB vB = { vc, vi, vc, vi, vc, vi, vc };
+
+void f (void) {
+  /* Which words of    | vL | vB | */
+  /* are accessed by   |0123|0123| */
+  if (0 /* the tests?  |    |    | */
+      || vL.p != vc /* |*   |    | */
+      || vB.p != vc /* |    |*   | */
+      || vL.s != vc /* |   *|    | */
+      || vB.q != vc /* |    | *  | */
+      || vL.a != vi /* |^*  |    | */
+      || vB.b != vi /* |    | ^* | */
+      || vL.c != vi /* |  *^|    | */
+      || vB.c != vi /* |    |  ^*| */
+      || vL.b != vi /* | ^^ |    | */
+      || vL.q != vc /* | ^  |    | */
+      || vB.a != vi /* |    |^^  | */
+      || vB.r != vc /* |    |  ^ | */
+      || vB.s != vc /* |    |   ^| */
+      || vL.r != vc /* |  ^ |    | */
+      )
+    __builtin_abort ();
+}
+
+int main () {
+  f ();
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "BIT_FIELD_REF" 8 "optimized" } } */
+/* { dg-final { scan-assembler-not "cmpb" { target { i*86-*-* || x86_64-*-* } } } } */
+/* { dg-final { scan-assembler-times "cmpl" 8 { target { i*86-*-* || x86_64-*-* } } } } */
+/* { dg-final { scan-assembler-times "cmpw" 8 { target { powerpc*-*-* || rs6000-*-* } } } } */
diff --git a/gcc/testsuite/gcc.dg/field-merge-2.c b/gcc/testsuite/gcc.dg/field-merge-2.c
new file mode 100644
index 00000000000..80c573b31dd
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/field-merge-2.c
@@ -0,0 +1,31 @@
+/* { dg-do run } */
+/* { dg-options "-O" } */
+
+struct TL {
+  unsigned short a;
+  unsigned short b;
+} __attribute__ ((packed, aligned (8)));
+
+struct TB {
+  unsigned char p;
+  unsigned short a;
+  unsigned short b;
+} __attribute__ ((packed, aligned (8)));
+
+#define vc 0xaa
+
+struct TL vL = { vc, vc };
+struct TB vB = { vc, vc, vc };
+
+void f (void) {
+  if (0
+      || vL.b != vB.b
+      || vL.a != vB.a
+      )
+    __builtin_abort ();
+}
+
+int main () {
+  f ();
+  return 0;
+}


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [gcc(refs/users/aoliva/heads/testme)] improvements for fold_truth_andor_1
@ 2020-09-17 11:31 Alexandre Oliva
  0 siblings, 0 replies; 8+ messages in thread
From: Alexandre Oliva @ 2020-09-17 11:31 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:0dc8eabc87885d6eb5b395ac2c43aafec59fdcc9

commit 0dc8eabc87885d6eb5b395ac2c43aafec59fdcc9
Author: Alexandre Oliva <oliva@adacore.com>
Date:   Tue Sep 15 13:32:07 2020 -0300

    improvements for fold_truth_andor_1
    
    Enable combining with the rightmost non-and/or, however deeply-nested.
    
    Handle shift-and-mask.
    
    Handle fields that cross alignment boundaries, when either part can be
    combined.

Diff:
---
 gcc/config/rs6000/t-rs6000           |   4 +
 gcc/fold-const.c                     | 315 ++++++++++++++++++++++++++++++++---
 gcc/testsuite/gcc.dg/field-merge-1.c |  64 +++++++
 gcc/testsuite/gcc.dg/field-merge-2.c |  31 ++++
 4 files changed, 387 insertions(+), 27 deletions(-)

diff --git a/gcc/config/rs6000/t-rs6000 b/gcc/config/rs6000/t-rs6000
index 1ddb5729cb2..516486df9a4 100644
--- a/gcc/config/rs6000/t-rs6000
+++ b/gcc/config/rs6000/t-rs6000
@@ -52,6 +52,10 @@ $(srcdir)/config/rs6000/rs6000-tables.opt: $(srcdir)/config/rs6000/genopt.sh \
 	$(SHELL) $(srcdir)/config/rs6000/genopt.sh $(srcdir)/config/rs6000 > \
 		$(srcdir)/config/rs6000/rs6000-tables.opt
 
+# FRAME_GROWS_DOWNWARD tests flag_sanitize in a way that rules out a
+# test in toplev.c.
+toplev.o-warn = -Wno-error
+
 # The rs6000 backend doesn't cause warnings in these files.
 insn-conditions.o-warn =
 
diff --git a/gcc/fold-const.c b/gcc/fold-const.c
index 0cc80adf632..c835327dac9 100644
--- a/gcc/fold-const.c
+++ b/gcc/fold-const.c
@@ -4640,6 +4640,7 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
   tree mask, inner, offset;
   tree unsigned_type;
   unsigned int precision;
+  HOST_WIDE_INT shiftrt = 0;
 
   /* All the optimizations using this function assume integer fields.
      There are problems with FP fields since the type_for_size call
@@ -4664,13 +4665,28 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
 	return NULL_TREE;
     }
 
+  if (TREE_CODE (exp) == RSHIFT_EXPR
+      && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST
+      && tree_fits_shwi_p (TREE_OPERAND (exp, 1)))
+    {
+      shiftrt = tree_to_shwi (TREE_OPERAND (exp, 1));
+      if (shiftrt > 0)
+	exp = TREE_OPERAND (exp, 0);
+      else
+	shiftrt = 0;
+    }
+
+  if (TREE_CODE (exp) == NOP_EXPR)
+    exp = TREE_OPERAND (exp, 0);
+
   poly_int64 poly_bitsize, poly_bitpos;
   inner = get_inner_reference (exp, &poly_bitsize, &poly_bitpos, &offset,
 			       pmode, punsignedp, preversep, pvolatilep);
+
   if ((inner == exp && and_mask == 0)
       || !poly_bitsize.is_constant (pbitsize)
       || !poly_bitpos.is_constant (pbitpos)
-      || *pbitsize < 0
+      || *pbitsize < shiftrt
       || offset != 0
       || TREE_CODE (inner) == PLACEHOLDER_EXPR
       /* Reject out-of-bound accesses (PR79731).  */
@@ -4679,6 +4695,12 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
 			       *pbitpos + *pbitsize) < 0))
     return NULL_TREE;
 
+  if (shiftrt)
+    {
+      *pbitpos += shiftrt;
+      *pbitsize -= shiftrt;
+    }
+
   unsigned_type = lang_hooks.types.type_for_size (*pbitsize, 1);
   if (unsigned_type == NULL_TREE)
     return NULL_TREE;
@@ -6142,11 +6164,19 @@ merge_truthop_with_opposite_arm (location_t loc, tree op, tree cmpop,
    TRUTH_TYPE is the type of the logical operand and LHS and RHS are its
    two operands.
 
+   SEPARATEP should be NULL if LHS and RHS are adjacent in
+   CODE-chained compares, and a non-NULL pointer to NULL_TREE
+   otherwise.  If the "words" accessed by RHS are already accessed by
+   LHS, this won't matter, but if RHS accesses "words" that LHS
+   doesn't, then *SEPARATEP will be set to the compares that should
+   take RHS's place.  By "words" we mean contiguous bits that do not
+   cross a an TYPE_ALIGN boundary of the accessed object's type.
+
    We return the simplified tree or 0 if no optimization is possible.  */
 
 static tree
 fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
-		    tree lhs, tree rhs)
+		    tree lhs, tree rhs, tree *separatep)
 {
   /* If this is the "or" of two comparisons, we can do something if
      the comparisons are NE_EXPR.  If this is the "and", we can do something
@@ -6157,6 +6187,7 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
      convert EQ_EXPR to NE_EXPR so we need not reject the "wrong"
      comparison for one-bit fields.  */
 
+  enum tree_code orig_code = code;
   enum tree_code wanted_code;
   enum tree_code lcode, rcode;
   tree ll_arg, lr_arg, rl_arg, rr_arg;
@@ -6168,13 +6199,16 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
   int ll_unsignedp, lr_unsignedp, rl_unsignedp, rr_unsignedp;
   int ll_reversep, lr_reversep, rl_reversep, rr_reversep;
   machine_mode ll_mode, lr_mode, rl_mode, rr_mode;
-  scalar_int_mode lnmode, rnmode;
+  scalar_int_mode lnmode, lnmode2, rnmode;
   tree ll_mask, lr_mask, rl_mask, rr_mask;
   tree ll_and_mask, lr_and_mask, rl_and_mask, rr_and_mask;
   tree l_const, r_const;
   tree lntype, rntype, result;
   HOST_WIDE_INT first_bit, end_bit;
   int volatilep;
+  bool resplit_load;
+
+  gcc_checking_assert (!separatep || !*separatep);
 
   /* Start by getting the comparison codes.  Fail if anything is volatile.
      If one operand is a BIT_AND_EXPR with the constant one, treat it as if
@@ -6202,7 +6236,116 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
 
   if (TREE_CODE_CLASS (lcode) != tcc_comparison
       || TREE_CODE_CLASS (rcode) != tcc_comparison)
-    return 0;
+    {
+      tree separate = NULL;
+
+      /* Check for the possibility of merging component references.
+	 If any of our operands is another similar operation, recurse
+	 to try to merge individual operands, but avoiding double
+	 recursion: recurse to each leaf of LHS, and from there to
+	 each leaf of RHS, but don't bother recursing into LHS if RHS
+	 is neither a comparison nor a compound expr, nor into RHS if
+	 the LHS leaf isn't a comparison.  In case of no successful
+	 merging, recursion depth is limited to the sum of the depths
+	 of LHS and RHS, and the non-recursing code below will run no
+	 more times than the product of the leaf counts of LHS and
+	 RHS.  If there is a successful merge, we (recursively)
+	 further attempt to fold the result, so recursion depth and
+	 merge attempts are harder to compute.  */
+      if (TREE_CODE (lhs) == code && TREE_TYPE (lhs) == truth_type
+	  && (TREE_CODE_CLASS (rcode) == tcc_comparison
+	      || (TREE_CODE (rhs) == code && TREE_TYPE (rhs) == truth_type)))
+	{
+	  if ((result = fold_truth_andor_1 (loc, code, truth_type,
+					    TREE_OPERAND (lhs, 1), rhs,
+					    separatep
+					    ? separatep
+					    : NULL)) != 0)
+	    {
+	      /* We have combined the latter part of LHS with RHS.  If
+		 they were separate, the recursion already placed any
+		 remains of RHS in *SEPARATEP, otherwise they are all
+		 in RESULT, so we just have to prepend to result the
+		 former part of LHS.  */
+	      result = fold_build2_loc (loc, code, truth_type,
+					TREE_OPERAND (lhs, 0), result);
+	      return result;
+	    }
+	  if ((result = fold_truth_andor_1 (loc, code, truth_type,
+					    TREE_OPERAND (lhs, 0), rhs,
+					    separatep
+					    ? separatep
+					    : &separate)) != 0)
+	    {
+	      /* We have combined the former part of LHS with RHS.  If
+		 they were separate, the recursive call will have
+		 placed remnants of RHS in *SEPARATEP.  If they
+		 wereń't, they will be in SEPARATE instead.  Append
+		 the latter part of LHS to the result, and then any
+		 remnants of RHS that we haven't passed on to the
+		 caller.  */
+	      result = fold_build2_loc (loc, code, truth_type,
+					result, TREE_OPERAND (lhs, 1));
+	      if (separate)
+		result = fold_build2_loc (loc, code, truth_type,
+					  result, separate);
+	      return result;
+	    }
+	}
+      else if (TREE_CODE_CLASS (lcode) == tcc_comparison
+	       && TREE_CODE (rhs) == code && TREE_TYPE (rhs) == truth_type)
+	{
+	  if ((result = fold_truth_andor_1 (loc, code, truth_type,
+					    lhs, TREE_OPERAND (rhs, 0),
+					    separatep
+					    ? &separate
+					    : NULL)) != 0)
+	    {
+	      /* We have combined LHS with the former part of RHS.  If
+		 they were separate, have any remnants of RHS placed
+		 in separate, so that we can combine them with the
+		 latter part of RHS, and then send them back for the
+		 caller to handle.  If they were adjacent, we can just
+		 append the latter part of RHS to the RESULT.  */
+	      if (!separate)
+		separate = TREE_OPERAND (rhs, 1);
+	      else
+		separate = fold_build2_loc (loc, code, truth_type,
+					    separate, TREE_OPERAND (rhs, 1));
+	      if (separatep)
+		*separatep = separate;
+	      else
+		result = fold_build2_loc (loc, code, truth_type,
+					  result, separate);
+	      return result;
+	    }
+	  if ((result = fold_truth_andor_1 (loc, code, truth_type,
+					    lhs, TREE_OPERAND (rhs, 1),
+					    &separate)) != 0)
+	    {
+	      /* We have combined LHS with the latter part of RHS.
+		 They're definitely not adjacent, so we get the
+		 remains of RHS in SEPARATE, and then prepend the
+		 former part of RHS to it.  If LHS and RHS were
+		 already separate to begin with, we leave the remnants
+		 of RHS for the caller to deal with, otherwise we
+		 append them to the RESULT.  */
+	      if (!separate)
+		separate = TREE_OPERAND (rhs, 0);
+	      else
+		separate = fold_build2_loc (loc, code, truth_type,
+					    TREE_OPERAND (rhs, 0), separate);
+	      if (separatep)
+		*separatep = separate;
+	      else
+		result = fold_build2_loc (loc, code, truth_type,
+					  result, separate);
+	      return result;
+	    }
+	}
+
+      return 0;
+    }
 
   ll_arg = TREE_OPERAND (lhs, 0);
   lr_arg = TREE_OPERAND (lhs, 1);
@@ -6357,10 +6500,48 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
   if (!get_best_mode (end_bit - first_bit, first_bit, 0, 0,
 		      TYPE_ALIGN (TREE_TYPE (ll_inner)), BITS_PER_WORD,
 		      volatilep, &lnmode))
-    return 0;
+    {
+      /* Consider the possibility of recombining loads if any of the
+	 fields straddles across an alignment boundary, so that either
+	 part can be loaded along with the other field.  */
+      HOST_WIDE_INT align = TYPE_ALIGN (TREE_TYPE (ll_inner));
+      HOST_WIDE_INT amask = ~(align - 1);
+
+      HOST_WIDE_INT boundary = (end_bit - 1) & amask;
+      /* Make sure we're only crossing one alignment boundary.
+
+	 ??? This won't recombine loads of two adjacent fields that
+	 each crosses a different alignment boundary, so as to load
+	 the middle word only once.  */
+      if (boundary - first_bit > align)
+	return 0;
+
+      HOST_WIDE_INT ll_start_word = ll_bitpos & amask;
+      HOST_WIDE_INT ll_end_word = (ll_bitpos + ll_bitsize - 1) & amask;
+
+      HOST_WIDE_INT rl_start_word = rl_bitpos & amask;
+      HOST_WIDE_INT rl_end_word = (rl_bitpos + rl_bitsize - 1) & amask;
+
+      /* If neither field straddles across an alignment boundary,
+	 we've nothing further ado.  */
+      if (ll_start_word == ll_end_word && rl_start_word == rl_end_word)
+	return 0;
+
+      if (!get_best_mode (boundary - first_bit, first_bit, 0, 0,
+			  align, BITS_PER_WORD, volatilep, &lnmode)
+	  || !get_best_mode (end_bit - boundary, boundary, 0, 0,
+			     align, BITS_PER_WORD, volatilep, &lnmode2))
+	return 0;
+
+      resplit_load = true;
+    }
+  else
+    resplit_load = false;
 
   lnbitsize = GET_MODE_BITSIZE (lnmode);
   lnbitpos = first_bit & ~ (lnbitsize - 1);
+  if (resplit_load)
+    lnbitsize += GET_MODE_BITSIZE (lnmode2);
   lntype = lang_hooks.types.type_for_size (lnbitsize, 1);
   xll_bitpos = ll_bitpos - lnbitpos, xrl_bitpos = rl_bitpos - lnbitpos;
 
@@ -6414,7 +6595,8 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
 	  || ll_reversep != lr_reversep
 	  /* Make sure the two fields on the right
 	     correspond to the left without being swapped.  */
-	  || ll_bitpos - rl_bitpos != lr_bitpos - rr_bitpos)
+	  || ll_bitpos - rl_bitpos != lr_bitpos - rr_bitpos
+	  || resplit_load)
 	return 0;
 
       first_bit = MIN (lr_bitpos, rr_bitpos);
@@ -6555,20 +6737,107 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
   if (lnbitpos < 0)
     return 0;
 
-  /* Construct the expression we will return.  First get the component
-     reference we will make.  Unless the mask is all ones the width of
-     that field, perform the mask operation.  Then compare with the
-     merged constant.  */
-  result = make_bit_field_ref (loc, ll_inner, ll_arg,
-			       lntype, lnbitsize, lnbitpos,
-			       ll_unsignedp || rl_unsignedp, ll_reversep);
+  if (resplit_load)
+    {
+      HOST_WIDE_INT align = TYPE_ALIGN (TREE_TYPE (ll_inner));
+      HOST_WIDE_INT lnlbitsize = GET_MODE_BITSIZE (lnmode);
+      HOST_WIDE_INT lnrbitsize = GET_MODE_BITSIZE (lnmode2);
+      HOST_WIDE_INT midpoint = lnlbitsize;
+      HOST_WIDE_INT lnrbitpos = lnbitpos + midpoint;
+      HOST_WIDE_INT lnlbitpos = lnrbitpos - lnlbitsize;
+      gcc_checking_assert ((lnrbitpos & (align - 1)) == 0);
+      tree lnltype = lang_hooks.types.type_for_size (lnlbitsize, 1);
+      tree lnrtype = lang_hooks.types.type_for_size (lnrbitsize, 1);
+      tree lll_arg = make_bit_field_ref (loc, ll_inner, ll_arg,
+					 lnltype, lnlbitsize, lnlbitpos,
+					 ll_unsignedp || rl_unsignedp,
+					 ll_reversep);
+      tree llr_arg = make_bit_field_ref (loc, ll_inner, ll_arg,
+					 lnrtype, lnrbitsize, lnrbitpos,
+					 ll_unsignedp || rl_unsignedp,
+					 ll_reversep);
+
+      HOST_WIDE_INT lnlcbitpos;
+      HOST_WIDE_INT lnrcbitpos;
+      /* The bit field ref constant folding below already takes care
+	 of default target endianness.  */
+      if (!ll_reversep)
+	{
+	  lnlcbitpos = 0;
+	  lnrcbitpos = lnlbitsize;
+	}
+      else
+	{
+	  lnlcbitpos = lnrbitsize;
+	  lnrcbitpos = 0;
+	}
+
+      tree mask = const_binop (BIT_IOR_EXPR, ll_mask, rl_mask);
+      tree lll_mask = fold_build3 (BIT_FIELD_REF, lnltype, mask,
+				   bitsize_int (lnlbitsize),
+				   bitsize_int (lnlcbitpos));
+      if (! integer_all_onesp (lll_mask))
+	lll_arg = fold_build2_loc (loc, BIT_AND_EXPR, lnltype,
+				   lll_arg, lll_mask);
+      tree llr_mask = fold_build3 (BIT_FIELD_REF, lnrtype, mask,
+				   bitsize_int (lnrbitsize),
+				   bitsize_int (lnrcbitpos));
+      if (! integer_all_onesp (llr_mask))
+	llr_arg = fold_build2_loc (loc, BIT_AND_EXPR, lnrtype,
+				   llr_arg, llr_mask);
+      tree lnr_const = const_binop (BIT_IOR_EXPR, l_const, r_const);
+      tree lll_const = fold_build3 (BIT_FIELD_REF, lnltype, lnr_const,
+				    bitsize_int (lnlbitsize),
+				    bitsize_int (lnlcbitpos));
+      tree lll_result = fold_build2_loc (loc, wanted_code, truth_type,
+					 lll_arg, lll_const);
+      tree llr_const = fold_build3 (BIT_FIELD_REF, lnrtype, lnr_const,
+				    bitsize_int (lnrbitsize),
+				    bitsize_int (lnrcbitpos));
+      tree llr_result = fold_build2_loc (loc, wanted_code, truth_type,
+					 llr_arg, llr_const);
+
+      /* If the accesses were adjacent in the compare chain, or if LHS
+	 already accessed both "words", then we can have both words
+	 accessed in the RESULT, leaving no remains of *RHS for
+	 *SEPARATEP.  Otherwise, figure out which part of the object
+	 was already accessed by LHS, and put it in the RESULT,
+	 returning the bits newly-accessed by RHS in *SEPARATEP.  */
+      if (!separatep
+	  || (ll_bitpos < lnrbitpos && ll_bitpos + ll_bitsize > lnrbitpos))
+	result = build2_loc (loc, orig_code, truth_type,
+			     lll_result, llr_result);
+      else if (ll_bitpos >= lnrbitpos)
+	{
+	  result = llr_result;
+	  *separatep = lll_result;
+	}
+      else
+	{
+	  result = lll_result;
+	  *separatep = llr_result;
+	}
+    }
+  else
+    {
+
+      /* Construct the expression we will return.  First get the component
+	 reference we will make.  Unless the mask is all ones the width of
+	 that field, perform the mask operation.  Then compare with the
+	 merged constant.  */
+      result = make_bit_field_ref (loc, ll_inner, ll_arg,
+				   lntype, lnbitsize, lnbitpos,
+				   ll_unsignedp || rl_unsignedp, ll_reversep);
 
-  ll_mask = const_binop (BIT_IOR_EXPR, ll_mask, rl_mask);
-  if (! all_ones_mask_p (ll_mask, lnbitsize))
-    result = build2_loc (loc, BIT_AND_EXPR, lntype, result, ll_mask);
+      ll_mask = const_binop (BIT_IOR_EXPR, ll_mask, rl_mask);
+      if (! integer_all_onesp (ll_mask))
+	result = build2_loc (loc, BIT_AND_EXPR, lntype, result, ll_mask);
 
-  return build2_loc (loc, wanted_code, truth_type, result,
-		     const_binop (BIT_IOR_EXPR, l_const, r_const));
+      result = build2_loc (loc, wanted_code, truth_type, result,
+			   const_binop (BIT_IOR_EXPR, l_const, r_const));
+    }
+
+  return result;
 }
 \f
 /* T is an integer expression that is being multiplied, divided, or taken a
@@ -9166,15 +9435,7 @@ fold_truth_andor (location_t loc, enum tree_code code, tree type,
 	return fold_build2_loc (loc, code, type, arg0, tem);
     }
 
-  /* Check for the possibility of merging component references.  If our
-     lhs is another similar operation, try to merge its rhs with our
-     rhs.  Then try to merge our lhs and rhs.  */
-  if (TREE_CODE (arg0) == code
-      && (tem = fold_truth_andor_1 (loc, code, type,
-				    TREE_OPERAND (arg0, 1), arg1)) != 0)
-    return fold_build2_loc (loc, code, type, TREE_OPERAND (arg0, 0), tem);
-
-  if ((tem = fold_truth_andor_1 (loc, code, type, arg0, arg1)) != 0)
+  if ((tem = fold_truth_andor_1 (loc, code, type, arg0, arg1, NULL)) != 0)
     return tem;
 
   bool logical_op_non_short_circuit = LOGICAL_OP_NON_SHORT_CIRCUIT;
diff --git a/gcc/testsuite/gcc.dg/field-merge-1.c b/gcc/testsuite/gcc.dg/field-merge-1.c
new file mode 100644
index 00000000000..1818e104437
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/field-merge-1.c
@@ -0,0 +1,64 @@
+/* { dg-do run } */
+/* { dg-options "-O -save-temps -fdump-tree-optimized" } */
+
+/* Check that field loads compared with constants are merged, even if
+   tested out of order, and when fields straddle across alignment
+   boundaries.  */
+
+struct TL {
+  unsigned char p;
+  unsigned int a;
+  unsigned char q;
+  unsigned int b;
+  unsigned char r;
+  unsigned int c;
+  unsigned char s;
+} __attribute__ ((packed, aligned (4), scalar_storage_order ("little-endian")));
+
+struct TB {
+  unsigned char p;
+  unsigned int a;
+  unsigned char q;
+  unsigned int b;
+  unsigned char r;
+  unsigned int c;
+  unsigned char s;
+} __attribute__ ((packed, aligned (4), scalar_storage_order ("big-endian")));
+
+#define vc 0xaa
+#define vi 0x12345678
+
+struct TL vL = { vc, vi, vc, vi, vc, vi, vc };
+struct TB vB = { vc, vi, vc, vi, vc, vi, vc };
+
+void f (void) {
+  /* Which words of    | vL | vB | */
+  /* are accessed by   |0123|0123| */
+  if (0 /* the tests?  |    |    | */
+      || vL.p != vc /* |*   |    | */
+      || vB.p != vc /* |    |*   | */
+      || vL.s != vc /* |   *|    | */
+      || vB.q != vc /* |    | *  | */
+      || vL.a != vi /* |^*  |    | */
+      || vB.b != vi /* |    | ^* | */
+      || vL.c != vi /* |  *^|    | */
+      || vB.c != vi /* |    |  ^*| */
+      || vL.b != vi /* | ^^ |    | */
+      || vL.q != vc /* | ^  |    | */
+      || vB.a != vi /* |    |^^  | */
+      || vB.r != vc /* |    |  ^ | */
+      || vB.s != vc /* |    |   ^| */
+      || vL.r != vc /* |  ^ |    | */
+      )
+    __builtin_abort ();
+}
+
+int main () {
+  f ();
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "BIT_FIELD_REF" 8 "optimized" } } */
+/* { dg-final { scan-assembler-not "cmpb" { target { i*86-*-* || x86_64-*-* } } } } */
+/* { dg-final { scan-assembler-times "cmpl" 8 { target { i*86-*-* || x86_64-*-* } } } } */
+/* { dg-final { scan-assembler-times "cmpw" 8 { target { powerpc*-*-* || rs6000-*-* } } } } */
diff --git a/gcc/testsuite/gcc.dg/field-merge-2.c b/gcc/testsuite/gcc.dg/field-merge-2.c
new file mode 100644
index 00000000000..80c573b31dd
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/field-merge-2.c
@@ -0,0 +1,31 @@
+/* { dg-do run } */
+/* { dg-options "-O" } */
+
+struct TL {
+  unsigned short a;
+  unsigned short b;
+} __attribute__ ((packed, aligned (8)));
+
+struct TB {
+  unsigned char p;
+  unsigned short a;
+  unsigned short b;
+} __attribute__ ((packed, aligned (8)));
+
+#define vc 0xaa
+
+struct TL vL = { vc, vc };
+struct TB vB = { vc, vc, vc };
+
+void f (void) {
+  if (0
+      || vL.b != vB.b
+      || vL.a != vB.a
+      )
+    __builtin_abort ();
+}
+
+int main () {
+  f ();
+  return 0;
+}


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [gcc(refs/users/aoliva/heads/testme)] improvements for fold_truth_andor_1
@ 2020-09-17  8:22 Alexandre Oliva
  0 siblings, 0 replies; 8+ messages in thread
From: Alexandre Oliva @ 2020-09-17  8:22 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:a8de178570781759ed6e7791d80752a8bad63463

commit a8de178570781759ed6e7791d80752a8bad63463
Author: Alexandre Oliva <oliva@adacore.com>
Date:   Tue Sep 15 13:32:07 2020 -0300

    improvements for fold_truth_andor_1
    
    Enable combining with the rightmost non-and/or, however deeply-nested.
    
    Handle shift-and-mask.
    
    Handle fields that cross alignment boundaries, when either part can be
    combined.

Diff:
---
 gcc/config/rs6000/t-rs6000           |   4 +
 gcc/fold-const.c                     | 315 ++++++++++++++++++++++++++++++++---
 gcc/testsuite/gcc.dg/field-merge-1.c |  64 +++++++
 3 files changed, 356 insertions(+), 27 deletions(-)

diff --git a/gcc/config/rs6000/t-rs6000 b/gcc/config/rs6000/t-rs6000
index 1ddb5729cb2..516486df9a4 100644
--- a/gcc/config/rs6000/t-rs6000
+++ b/gcc/config/rs6000/t-rs6000
@@ -52,6 +52,10 @@ $(srcdir)/config/rs6000/rs6000-tables.opt: $(srcdir)/config/rs6000/genopt.sh \
 	$(SHELL) $(srcdir)/config/rs6000/genopt.sh $(srcdir)/config/rs6000 > \
 		$(srcdir)/config/rs6000/rs6000-tables.opt
 
+# FRAME_GROWS_DOWNWARD tests flag_sanitize in a way that rules out a
+# test in toplev.c.
+toplev.o-warn = -Wno-error
+
 # The rs6000 backend doesn't cause warnings in these files.
 insn-conditions.o-warn =
 
diff --git a/gcc/fold-const.c b/gcc/fold-const.c
index 0cc80adf632..c835327dac9 100644
--- a/gcc/fold-const.c
+++ b/gcc/fold-const.c
@@ -4640,6 +4640,7 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
   tree mask, inner, offset;
   tree unsigned_type;
   unsigned int precision;
+  HOST_WIDE_INT shiftrt = 0;
 
   /* All the optimizations using this function assume integer fields.
      There are problems with FP fields since the type_for_size call
@@ -4664,13 +4665,28 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
 	return NULL_TREE;
     }
 
+  if (TREE_CODE (exp) == RSHIFT_EXPR
+      && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST
+      && tree_fits_shwi_p (TREE_OPERAND (exp, 1)))
+    {
+      shiftrt = tree_to_shwi (TREE_OPERAND (exp, 1));
+      if (shiftrt > 0)
+	exp = TREE_OPERAND (exp, 0);
+      else
+	shiftrt = 0;
+    }
+
+  if (TREE_CODE (exp) == NOP_EXPR)
+    exp = TREE_OPERAND (exp, 0);
+
   poly_int64 poly_bitsize, poly_bitpos;
   inner = get_inner_reference (exp, &poly_bitsize, &poly_bitpos, &offset,
 			       pmode, punsignedp, preversep, pvolatilep);
+
   if ((inner == exp && and_mask == 0)
       || !poly_bitsize.is_constant (pbitsize)
       || !poly_bitpos.is_constant (pbitpos)
-      || *pbitsize < 0
+      || *pbitsize < shiftrt
       || offset != 0
       || TREE_CODE (inner) == PLACEHOLDER_EXPR
       /* Reject out-of-bound accesses (PR79731).  */
@@ -4679,6 +4695,12 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
 			       *pbitpos + *pbitsize) < 0))
     return NULL_TREE;
 
+  if (shiftrt)
+    {
+      *pbitpos += shiftrt;
+      *pbitsize -= shiftrt;
+    }
+
   unsigned_type = lang_hooks.types.type_for_size (*pbitsize, 1);
   if (unsigned_type == NULL_TREE)
     return NULL_TREE;
@@ -6142,11 +6164,19 @@ merge_truthop_with_opposite_arm (location_t loc, tree op, tree cmpop,
    TRUTH_TYPE is the type of the logical operand and LHS and RHS are its
    two operands.
 
+   SEPARATEP should be NULL if LHS and RHS are adjacent in
+   CODE-chained compares, and a non-NULL pointer to NULL_TREE
+   otherwise.  If the "words" accessed by RHS are already accessed by
+   LHS, this won't matter, but if RHS accesses "words" that LHS
+   doesn't, then *SEPARATEP will be set to the compares that should
+   take RHS's place.  By "words" we mean contiguous bits that do not
+   cross a an TYPE_ALIGN boundary of the accessed object's type.
+
    We return the simplified tree or 0 if no optimization is possible.  */
 
 static tree
 fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
-		    tree lhs, tree rhs)
+		    tree lhs, tree rhs, tree *separatep)
 {
   /* If this is the "or" of two comparisons, we can do something if
      the comparisons are NE_EXPR.  If this is the "and", we can do something
@@ -6157,6 +6187,7 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
      convert EQ_EXPR to NE_EXPR so we need not reject the "wrong"
      comparison for one-bit fields.  */
 
+  enum tree_code orig_code = code;
   enum tree_code wanted_code;
   enum tree_code lcode, rcode;
   tree ll_arg, lr_arg, rl_arg, rr_arg;
@@ -6168,13 +6199,16 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
   int ll_unsignedp, lr_unsignedp, rl_unsignedp, rr_unsignedp;
   int ll_reversep, lr_reversep, rl_reversep, rr_reversep;
   machine_mode ll_mode, lr_mode, rl_mode, rr_mode;
-  scalar_int_mode lnmode, rnmode;
+  scalar_int_mode lnmode, lnmode2, rnmode;
   tree ll_mask, lr_mask, rl_mask, rr_mask;
   tree ll_and_mask, lr_and_mask, rl_and_mask, rr_and_mask;
   tree l_const, r_const;
   tree lntype, rntype, result;
   HOST_WIDE_INT first_bit, end_bit;
   int volatilep;
+  bool resplit_load;
+
+  gcc_checking_assert (!separatep || !*separatep);
 
   /* Start by getting the comparison codes.  Fail if anything is volatile.
      If one operand is a BIT_AND_EXPR with the constant one, treat it as if
@@ -6202,7 +6236,116 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
 
   if (TREE_CODE_CLASS (lcode) != tcc_comparison
       || TREE_CODE_CLASS (rcode) != tcc_comparison)
-    return 0;
+    {
+      tree separate = NULL;
+
+      /* Check for the possibility of merging component references.
+	 If any of our operands is another similar operation, recurse
+	 to try to merge individual operands, but avoiding double
+	 recursion: recurse to each leaf of LHS, and from there to
+	 each leaf of RHS, but don't bother recursing into LHS if RHS
+	 is neither a comparison nor a compound expr, nor into RHS if
+	 the LHS leaf isn't a comparison.  In case of no successful
+	 merging, recursion depth is limited to the sum of the depths
+	 of LHS and RHS, and the non-recursing code below will run no
+	 more times than the product of the leaf counts of LHS and
+	 RHS.  If there is a successful merge, we (recursively)
+	 further attempt to fold the result, so recursion depth and
+	 merge attempts are harder to compute.  */
+      if (TREE_CODE (lhs) == code && TREE_TYPE (lhs) == truth_type
+	  && (TREE_CODE_CLASS (rcode) == tcc_comparison
+	      || (TREE_CODE (rhs) == code && TREE_TYPE (rhs) == truth_type)))
+	{
+	  if ((result = fold_truth_andor_1 (loc, code, truth_type,
+					    TREE_OPERAND (lhs, 1), rhs,
+					    separatep
+					    ? separatep
+					    : NULL)) != 0)
+	    {
+	      /* We have combined the latter part of LHS with RHS.  If
+		 they were separate, the recursion already placed any
+		 remains of RHS in *SEPARATEP, otherwise they are all
+		 in RESULT, so we just have to prepend to result the
+		 former part of LHS.  */
+	      result = fold_build2_loc (loc, code, truth_type,
+					TREE_OPERAND (lhs, 0), result);
+	      return result;
+	    }
+	  if ((result = fold_truth_andor_1 (loc, code, truth_type,
+					    TREE_OPERAND (lhs, 0), rhs,
+					    separatep
+					    ? separatep
+					    : &separate)) != 0)
+	    {
+	      /* We have combined the former part of LHS with RHS.  If
+		 they were separate, the recursive call will have
+		 placed remnants of RHS in *SEPARATEP.  If they
+		 wereń't, they will be in SEPARATE instead.  Append
+		 the latter part of LHS to the result, and then any
+		 remnants of RHS that we haven't passed on to the
+		 caller.  */
+	      result = fold_build2_loc (loc, code, truth_type,
+					result, TREE_OPERAND (lhs, 1));
+	      if (separate)
+		result = fold_build2_loc (loc, code, truth_type,
+					  result, separate);
+	      return result;
+	    }
+	}
+      else if (TREE_CODE_CLASS (lcode) == tcc_comparison
+	       && TREE_CODE (rhs) == code && TREE_TYPE (rhs) == truth_type)
+	{
+	  if ((result = fold_truth_andor_1 (loc, code, truth_type,
+					    lhs, TREE_OPERAND (rhs, 0),
+					    separatep
+					    ? &separate
+					    : NULL)) != 0)
+	    {
+	      /* We have combined LHS with the former part of RHS.  If
+		 they were separate, have any remnants of RHS placed
+		 in separate, so that we can combine them with the
+		 latter part of RHS, and then send them back for the
+		 caller to handle.  If they were adjacent, we can just
+		 append the latter part of RHS to the RESULT.  */
+	      if (!separate)
+		separate = TREE_OPERAND (rhs, 1);
+	      else
+		separate = fold_build2_loc (loc, code, truth_type,
+					    separate, TREE_OPERAND (rhs, 1));
+	      if (separatep)
+		*separatep = separate;
+	      else
+		result = fold_build2_loc (loc, code, truth_type,
+					  result, separate);
+	      return result;
+	    }
+	  if ((result = fold_truth_andor_1 (loc, code, truth_type,
+					    lhs, TREE_OPERAND (rhs, 1),
+					    &separate)) != 0)
+	    {
+	      /* We have combined LHS with the latter part of RHS.
+		 They're definitely not adjacent, so we get the
+		 remains of RHS in SEPARATE, and then prepend the
+		 former part of RHS to it.  If LHS and RHS were
+		 already separate to begin with, we leave the remnants
+		 of RHS for the caller to deal with, otherwise we
+		 append them to the RESULT.  */
+	      if (!separate)
+		separate = TREE_OPERAND (rhs, 0);
+	      else
+		separate = fold_build2_loc (loc, code, truth_type,
+					    TREE_OPERAND (rhs, 0), separate);
+	      if (separatep)
+		*separatep = separate;
+	      else
+		result = fold_build2_loc (loc, code, truth_type,
+					  result, separate);
+	      return result;
+	    }
+	}
+
+      return 0;
+    }
 
   ll_arg = TREE_OPERAND (lhs, 0);
   lr_arg = TREE_OPERAND (lhs, 1);
@@ -6357,10 +6500,48 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
   if (!get_best_mode (end_bit - first_bit, first_bit, 0, 0,
 		      TYPE_ALIGN (TREE_TYPE (ll_inner)), BITS_PER_WORD,
 		      volatilep, &lnmode))
-    return 0;
+    {
+      /* Consider the possibility of recombining loads if any of the
+	 fields straddles across an alignment boundary, so that either
+	 part can be loaded along with the other field.  */
+      HOST_WIDE_INT align = TYPE_ALIGN (TREE_TYPE (ll_inner));
+      HOST_WIDE_INT amask = ~(align - 1);
+
+      HOST_WIDE_INT boundary = (end_bit - 1) & amask;
+      /* Make sure we're only crossing one alignment boundary.
+
+	 ??? This won't recombine loads of two adjacent fields that
+	 each crosses a different alignment boundary, so as to load
+	 the middle word only once.  */
+      if (boundary - first_bit > align)
+	return 0;
+
+      HOST_WIDE_INT ll_start_word = ll_bitpos & amask;
+      HOST_WIDE_INT ll_end_word = (ll_bitpos + ll_bitsize - 1) & amask;
+
+      HOST_WIDE_INT rl_start_word = rl_bitpos & amask;
+      HOST_WIDE_INT rl_end_word = (rl_bitpos + rl_bitsize - 1) & amask;
+
+      /* If neither field straddles across an alignment boundary,
+	 we've nothing further ado.  */
+      if (ll_start_word == ll_end_word && rl_start_word == rl_end_word)
+	return 0;
+
+      if (!get_best_mode (boundary - first_bit, first_bit, 0, 0,
+			  align, BITS_PER_WORD, volatilep, &lnmode)
+	  || !get_best_mode (end_bit - boundary, boundary, 0, 0,
+			     align, BITS_PER_WORD, volatilep, &lnmode2))
+	return 0;
+
+      resplit_load = true;
+    }
+  else
+    resplit_load = false;
 
   lnbitsize = GET_MODE_BITSIZE (lnmode);
   lnbitpos = first_bit & ~ (lnbitsize - 1);
+  if (resplit_load)
+    lnbitsize += GET_MODE_BITSIZE (lnmode2);
   lntype = lang_hooks.types.type_for_size (lnbitsize, 1);
   xll_bitpos = ll_bitpos - lnbitpos, xrl_bitpos = rl_bitpos - lnbitpos;
 
@@ -6414,7 +6595,8 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
 	  || ll_reversep != lr_reversep
 	  /* Make sure the two fields on the right
 	     correspond to the left without being swapped.  */
-	  || ll_bitpos - rl_bitpos != lr_bitpos - rr_bitpos)
+	  || ll_bitpos - rl_bitpos != lr_bitpos - rr_bitpos
+	  || resplit_load)
 	return 0;
 
       first_bit = MIN (lr_bitpos, rr_bitpos);
@@ -6555,20 +6737,107 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
   if (lnbitpos < 0)
     return 0;
 
-  /* Construct the expression we will return.  First get the component
-     reference we will make.  Unless the mask is all ones the width of
-     that field, perform the mask operation.  Then compare with the
-     merged constant.  */
-  result = make_bit_field_ref (loc, ll_inner, ll_arg,
-			       lntype, lnbitsize, lnbitpos,
-			       ll_unsignedp || rl_unsignedp, ll_reversep);
+  if (resplit_load)
+    {
+      HOST_WIDE_INT align = TYPE_ALIGN (TREE_TYPE (ll_inner));
+      HOST_WIDE_INT lnlbitsize = GET_MODE_BITSIZE (lnmode);
+      HOST_WIDE_INT lnrbitsize = GET_MODE_BITSIZE (lnmode2);
+      HOST_WIDE_INT midpoint = lnlbitsize;
+      HOST_WIDE_INT lnrbitpos = lnbitpos + midpoint;
+      HOST_WIDE_INT lnlbitpos = lnrbitpos - lnlbitsize;
+      gcc_checking_assert ((lnrbitpos & (align - 1)) == 0);
+      tree lnltype = lang_hooks.types.type_for_size (lnlbitsize, 1);
+      tree lnrtype = lang_hooks.types.type_for_size (lnrbitsize, 1);
+      tree lll_arg = make_bit_field_ref (loc, ll_inner, ll_arg,
+					 lnltype, lnlbitsize, lnlbitpos,
+					 ll_unsignedp || rl_unsignedp,
+					 ll_reversep);
+      tree llr_arg = make_bit_field_ref (loc, ll_inner, ll_arg,
+					 lnrtype, lnrbitsize, lnrbitpos,
+					 ll_unsignedp || rl_unsignedp,
+					 ll_reversep);
+
+      HOST_WIDE_INT lnlcbitpos;
+      HOST_WIDE_INT lnrcbitpos;
+      /* The bit field ref constant folding below already takes care
+	 of default target endianness.  */
+      if (!ll_reversep)
+	{
+	  lnlcbitpos = 0;
+	  lnrcbitpos = lnlbitsize;
+	}
+      else
+	{
+	  lnlcbitpos = lnrbitsize;
+	  lnrcbitpos = 0;
+	}
+
+      tree mask = const_binop (BIT_IOR_EXPR, ll_mask, rl_mask);
+      tree lll_mask = fold_build3 (BIT_FIELD_REF, lnltype, mask,
+				   bitsize_int (lnlbitsize),
+				   bitsize_int (lnlcbitpos));
+      if (! integer_all_onesp (lll_mask))
+	lll_arg = fold_build2_loc (loc, BIT_AND_EXPR, lnltype,
+				   lll_arg, lll_mask);
+      tree llr_mask = fold_build3 (BIT_FIELD_REF, lnrtype, mask,
+				   bitsize_int (lnrbitsize),
+				   bitsize_int (lnrcbitpos));
+      if (! integer_all_onesp (llr_mask))
+	llr_arg = fold_build2_loc (loc, BIT_AND_EXPR, lnrtype,
+				   llr_arg, llr_mask);
+      tree lnr_const = const_binop (BIT_IOR_EXPR, l_const, r_const);
+      tree lll_const = fold_build3 (BIT_FIELD_REF, lnltype, lnr_const,
+				    bitsize_int (lnlbitsize),
+				    bitsize_int (lnlcbitpos));
+      tree lll_result = fold_build2_loc (loc, wanted_code, truth_type,
+					 lll_arg, lll_const);
+      tree llr_const = fold_build3 (BIT_FIELD_REF, lnrtype, lnr_const,
+				    bitsize_int (lnrbitsize),
+				    bitsize_int (lnrcbitpos));
+      tree llr_result = fold_build2_loc (loc, wanted_code, truth_type,
+					 llr_arg, llr_const);
+
+      /* If the accesses were adjacent in the compare chain, or if LHS
+	 already accessed both "words", then we can have both words
+	 accessed in the RESULT, leaving no remains of *RHS for
+	 *SEPARATEP.  Otherwise, figure out which part of the object
+	 was already accessed by LHS, and put it in the RESULT,
+	 returning the bits newly-accessed by RHS in *SEPARATEP.  */
+      if (!separatep
+	  || (ll_bitpos < lnrbitpos && ll_bitpos + ll_bitsize > lnrbitpos))
+	result = build2_loc (loc, orig_code, truth_type,
+			     lll_result, llr_result);
+      else if (ll_bitpos >= lnrbitpos)
+	{
+	  result = llr_result;
+	  *separatep = lll_result;
+	}
+      else
+	{
+	  result = lll_result;
+	  *separatep = llr_result;
+	}
+    }
+  else
+    {
+
+      /* Construct the expression we will return.  First get the component
+	 reference we will make.  Unless the mask is all ones the width of
+	 that field, perform the mask operation.  Then compare with the
+	 merged constant.  */
+      result = make_bit_field_ref (loc, ll_inner, ll_arg,
+				   lntype, lnbitsize, lnbitpos,
+				   ll_unsignedp || rl_unsignedp, ll_reversep);
 
-  ll_mask = const_binop (BIT_IOR_EXPR, ll_mask, rl_mask);
-  if (! all_ones_mask_p (ll_mask, lnbitsize))
-    result = build2_loc (loc, BIT_AND_EXPR, lntype, result, ll_mask);
+      ll_mask = const_binop (BIT_IOR_EXPR, ll_mask, rl_mask);
+      if (! integer_all_onesp (ll_mask))
+	result = build2_loc (loc, BIT_AND_EXPR, lntype, result, ll_mask);
 
-  return build2_loc (loc, wanted_code, truth_type, result,
-		     const_binop (BIT_IOR_EXPR, l_const, r_const));
+      result = build2_loc (loc, wanted_code, truth_type, result,
+			   const_binop (BIT_IOR_EXPR, l_const, r_const));
+    }
+
+  return result;
 }
 \f
 /* T is an integer expression that is being multiplied, divided, or taken a
@@ -9166,15 +9435,7 @@ fold_truth_andor (location_t loc, enum tree_code code, tree type,
 	return fold_build2_loc (loc, code, type, arg0, tem);
     }
 
-  /* Check for the possibility of merging component references.  If our
-     lhs is another similar operation, try to merge its rhs with our
-     rhs.  Then try to merge our lhs and rhs.  */
-  if (TREE_CODE (arg0) == code
-      && (tem = fold_truth_andor_1 (loc, code, type,
-				    TREE_OPERAND (arg0, 1), arg1)) != 0)
-    return fold_build2_loc (loc, code, type, TREE_OPERAND (arg0, 0), tem);
-
-  if ((tem = fold_truth_andor_1 (loc, code, type, arg0, arg1)) != 0)
+  if ((tem = fold_truth_andor_1 (loc, code, type, arg0, arg1, NULL)) != 0)
     return tem;
 
   bool logical_op_non_short_circuit = LOGICAL_OP_NON_SHORT_CIRCUIT;
diff --git a/gcc/testsuite/gcc.dg/field-merge-1.c b/gcc/testsuite/gcc.dg/field-merge-1.c
new file mode 100644
index 00000000000..1818e104437
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/field-merge-1.c
@@ -0,0 +1,64 @@
+/* { dg-do run } */
+/* { dg-options "-O -save-temps -fdump-tree-optimized" } */
+
+/* Check that field loads compared with constants are merged, even if
+   tested out of order, and when fields straddle across alignment
+   boundaries.  */
+
+struct TL {
+  unsigned char p;
+  unsigned int a;
+  unsigned char q;
+  unsigned int b;
+  unsigned char r;
+  unsigned int c;
+  unsigned char s;
+} __attribute__ ((packed, aligned (4), scalar_storage_order ("little-endian")));
+
+struct TB {
+  unsigned char p;
+  unsigned int a;
+  unsigned char q;
+  unsigned int b;
+  unsigned char r;
+  unsigned int c;
+  unsigned char s;
+} __attribute__ ((packed, aligned (4), scalar_storage_order ("big-endian")));
+
+#define vc 0xaa
+#define vi 0x12345678
+
+struct TL vL = { vc, vi, vc, vi, vc, vi, vc };
+struct TB vB = { vc, vi, vc, vi, vc, vi, vc };
+
+void f (void) {
+  /* Which words of    | vL | vB | */
+  /* are accessed by   |0123|0123| */
+  if (0 /* the tests?  |    |    | */
+      || vL.p != vc /* |*   |    | */
+      || vB.p != vc /* |    |*   | */
+      || vL.s != vc /* |   *|    | */
+      || vB.q != vc /* |    | *  | */
+      || vL.a != vi /* |^*  |    | */
+      || vB.b != vi /* |    | ^* | */
+      || vL.c != vi /* |  *^|    | */
+      || vB.c != vi /* |    |  ^*| */
+      || vL.b != vi /* | ^^ |    | */
+      || vL.q != vc /* | ^  |    | */
+      || vB.a != vi /* |    |^^  | */
+      || vB.r != vc /* |    |  ^ | */
+      || vB.s != vc /* |    |   ^| */
+      || vL.r != vc /* |  ^ |    | */
+      )
+    __builtin_abort ();
+}
+
+int main () {
+  f ();
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "BIT_FIELD_REF" 8 "optimized" } } */
+/* { dg-final { scan-assembler-not "cmpb" { target { i*86-*-* || x86_64-*-* } } } } */
+/* { dg-final { scan-assembler-times "cmpl" 8 { target { i*86-*-* || x86_64-*-* } } } } */
+/* { dg-final { scan-assembler-times "cmpw" 8 { target { powerpc*-*-* || rs6000-*-* } } } } */


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [gcc(refs/users/aoliva/heads/testme)] improvements for fold_truth_andor_1
@ 2020-09-17  3:20 Alexandre Oliva
  0 siblings, 0 replies; 8+ messages in thread
From: Alexandre Oliva @ 2020-09-17  3:20 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:c5108baf0f0b1943976c40c6ede4d4af97b4a5b1

commit c5108baf0f0b1943976c40c6ede4d4af97b4a5b1
Author: Alexandre Oliva <oliva@adacore.com>
Date:   Tue Sep 15 13:32:07 2020 -0300

    improvements for fold_truth_andor_1
    
    Enable combining with the rightmost non-and/or, however deeply-nested.
    
    Handle shift-and-mask.
    
    Handle fields that cross alignment boundaries, when either part can be
    combined.

Diff:
---
 gcc/config/rs6000/t-rs6000           |   4 +
 gcc/fold-const.c                     | 214 +++++++++++++++++++++++++++++++----
 gcc/testsuite/gcc.dg/field-merge-1.c |  50 ++++++++
 3 files changed, 243 insertions(+), 25 deletions(-)

diff --git a/gcc/config/rs6000/t-rs6000 b/gcc/config/rs6000/t-rs6000
index 1ddb5729cb2..516486df9a4 100644
--- a/gcc/config/rs6000/t-rs6000
+++ b/gcc/config/rs6000/t-rs6000
@@ -52,6 +52,10 @@ $(srcdir)/config/rs6000/rs6000-tables.opt: $(srcdir)/config/rs6000/genopt.sh \
 	$(SHELL) $(srcdir)/config/rs6000/genopt.sh $(srcdir)/config/rs6000 > \
 		$(srcdir)/config/rs6000/rs6000-tables.opt
 
+# FRAME_GROWS_DOWNWARD tests flag_sanitize in a way that rules out a
+# test in toplev.c.
+toplev.o-warn = -Wno-error
+
 # The rs6000 backend doesn't cause warnings in these files.
 insn-conditions.o-warn =
 
diff --git a/gcc/fold-const.c b/gcc/fold-const.c
index 0cc80adf632..b163950f2c3 100644
--- a/gcc/fold-const.c
+++ b/gcc/fold-const.c
@@ -4640,6 +4640,7 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
   tree mask, inner, offset;
   tree unsigned_type;
   unsigned int precision;
+  HOST_WIDE_INT shiftrt = 0;
 
   /* All the optimizations using this function assume integer fields.
      There are problems with FP fields since the type_for_size call
@@ -4664,13 +4665,28 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
 	return NULL_TREE;
     }
 
+  if (TREE_CODE (exp) == RSHIFT_EXPR
+      && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST
+      && tree_fits_shwi_p (TREE_OPERAND (exp, 1)))
+    {
+      shiftrt = tree_to_shwi (TREE_OPERAND (exp, 1));
+      if (shiftrt > 0)
+	exp = TREE_OPERAND (exp, 0);
+      else
+	shiftrt = 0;
+    }
+
+  if (TREE_CODE (exp) == NOP_EXPR)
+    exp = TREE_OPERAND (exp, 0);
+
   poly_int64 poly_bitsize, poly_bitpos;
   inner = get_inner_reference (exp, &poly_bitsize, &poly_bitpos, &offset,
 			       pmode, punsignedp, preversep, pvolatilep);
+
   if ((inner == exp && and_mask == 0)
       || !poly_bitsize.is_constant (pbitsize)
       || !poly_bitpos.is_constant (pbitpos)
-      || *pbitsize < 0
+      || *pbitsize < shiftrt
       || offset != 0
       || TREE_CODE (inner) == PLACEHOLDER_EXPR
       /* Reject out-of-bound accesses (PR79731).  */
@@ -4679,6 +4695,12 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
 			       *pbitpos + *pbitsize) < 0))
     return NULL_TREE;
 
+  if (shiftrt)
+    {
+      *pbitpos += shiftrt;
+      *pbitsize -= shiftrt;
+    }
+
   unsigned_type = lang_hooks.types.type_for_size (*pbitsize, 1);
   if (unsigned_type == NULL_TREE)
     return NULL_TREE;
@@ -6157,6 +6179,7 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
      convert EQ_EXPR to NE_EXPR so we need not reject the "wrong"
      comparison for one-bit fields.  */
 
+  enum tree_code orig_code = code;
   enum tree_code wanted_code;
   enum tree_code lcode, rcode;
   tree ll_arg, lr_arg, rl_arg, rr_arg;
@@ -6168,13 +6191,14 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
   int ll_unsignedp, lr_unsignedp, rl_unsignedp, rr_unsignedp;
   int ll_reversep, lr_reversep, rl_reversep, rr_reversep;
   machine_mode ll_mode, lr_mode, rl_mode, rr_mode;
-  scalar_int_mode lnmode, rnmode;
+  scalar_int_mode lnmode, lnmode2, rnmode;
   tree ll_mask, lr_mask, rl_mask, rr_mask;
   tree ll_and_mask, lr_and_mask, rl_and_mask, rr_and_mask;
   tree l_const, r_const;
   tree lntype, rntype, result;
   HOST_WIDE_INT first_bit, end_bit;
   int volatilep;
+  bool resplit_load;
 
   /* Start by getting the comparison codes.  Fail if anything is volatile.
      If one operand is a BIT_AND_EXPR with the constant one, treat it as if
@@ -6202,7 +6226,48 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
 
   if (TREE_CODE_CLASS (lcode) != tcc_comparison
       || TREE_CODE_CLASS (rcode) != tcc_comparison)
-    return 0;
+    {
+      /* Check for the possibility of merging component references.
+	 If any of our operands is another similar operation, recurse
+	 to try to merge individual operands, but avoiding double
+	 recursion: recurse to each leaf of lhs, and from there to
+	 each leaf of rhs, but don't bother recursing into lhs if rhs
+	 is neither a comparison nor a compound expr, nor into rhs if
+	 the lhs leaf isn't a comparison.  In case of no successful
+	 merging, recursion depth is limited to the sum of the depths
+	 of lhs and rhs, and the non-recursing code below will run no
+	 more times than the product of the leaf counts of lhs and
+	 rhs.  If there is a successful merge, we (recursively)
+	 further attempt to fold the result, so recursion depth and
+	 merge attempts are harder to compute.  */
+      if (TREE_CODE (lhs) == code && TREE_TYPE (lhs) == truth_type
+	  && (TREE_CODE_CLASS (rcode) == tcc_comparison
+	      || (TREE_CODE (rhs) == code && TREE_TYPE (rhs) == truth_type)))
+	{
+	  if ((result = fold_truth_andor_1 (loc, code, truth_type,
+					 TREE_OPERAND (lhs, 1), rhs)) != 0)
+	    return fold_build2_loc (loc, code, truth_type,
+				    TREE_OPERAND (lhs, 0), result);
+	  if ((result = fold_truth_andor_1 (loc, code, truth_type,
+					 TREE_OPERAND (lhs, 0), rhs)) != 0)
+	    return fold_build2_loc (loc, code, truth_type,
+				    result, TREE_OPERAND (lhs, 1));
+	}
+      else if (TREE_CODE_CLASS (lcode) == tcc_comparison
+	       && TREE_CODE (rhs) == code && TREE_TYPE (rhs) == truth_type)
+	{
+	  if ((result = fold_truth_andor_1 (loc, code, truth_type, lhs,
+					 TREE_OPERAND (rhs, 0))) != 0)
+	    return fold_build2_loc (loc, code, truth_type,
+				    result, TREE_OPERAND (rhs, 1));
+	  if ((result = fold_truth_andor_1 (loc, code, truth_type, lhs,
+					 TREE_OPERAND (rhs, 1))) != 0)
+	    return fold_build2_loc (loc, code, truth_type,
+				    result, TREE_OPERAND (rhs, 0));
+	}
+
+      return 0;
+    }
 
   ll_arg = TREE_OPERAND (lhs, 0);
   lr_arg = TREE_OPERAND (lhs, 1);
@@ -6357,10 +6422,48 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
   if (!get_best_mode (end_bit - first_bit, first_bit, 0, 0,
 		      TYPE_ALIGN (TREE_TYPE (ll_inner)), BITS_PER_WORD,
 		      volatilep, &lnmode))
-    return 0;
+    {
+      /* Consider the possibility of recombining loads if any of the
+	 fields straddles across an alignment boundary, so that either
+	 part can be loaded along with the other field.  */
+      HOST_WIDE_INT align = TYPE_ALIGN (TREE_TYPE (ll_inner));
+      HOST_WIDE_INT amask = ~(align - 1);
+
+      HOST_WIDE_INT boundary = (end_bit - 1) & amask;
+      /* Make sure we're only crossing one alignment boundary.
+
+	 ??? This won't recombine loads of two adjacent fields that
+	 each crosses a different alignment boundary, so as to load
+	 the middle word only once.  */
+      if (boundary - first_bit > align)
+	return 0;
+
+      HOST_WIDE_INT ll_start_word = ll_bitpos & amask;
+      HOST_WIDE_INT ll_end_word = (ll_bitpos + ll_bitsize - 1) & amask;
+
+      HOST_WIDE_INT rl_start_word = rl_bitpos & amask;
+      HOST_WIDE_INT rl_end_word = (rl_bitpos + rl_bitsize - 1) & amask;
+
+      /* If neither field straddles across an alignment boundary,
+	 we've nothing further ado.  */
+      if (ll_start_word == ll_end_word && rl_start_word == rl_end_word)
+	return 0;
+
+      if (!get_best_mode (boundary - first_bit, first_bit, 0, 0,
+			  align, BITS_PER_WORD, volatilep, &lnmode)
+	  || !get_best_mode (end_bit - boundary, boundary, 0, 0,
+			     align, BITS_PER_WORD, volatilep, &lnmode2))
+	return 0;
+
+      resplit_load = true;
+    }
+  else
+    resplit_load = false;
 
   lnbitsize = GET_MODE_BITSIZE (lnmode);
   lnbitpos = first_bit & ~ (lnbitsize - 1);
+  if (resplit_load)
+    lnbitsize += GET_MODE_BITSIZE (lnmode2);
   lntype = lang_hooks.types.type_for_size (lnbitsize, 1);
   xll_bitpos = ll_bitpos - lnbitpos, xrl_bitpos = rl_bitpos - lnbitpos;
 
@@ -6414,7 +6517,8 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
 	  || ll_reversep != lr_reversep
 	  /* Make sure the two fields on the right
 	     correspond to the left without being swapped.  */
-	  || ll_bitpos - rl_bitpos != lr_bitpos - rr_bitpos)
+	  || ll_bitpos - rl_bitpos != lr_bitpos - rr_bitpos
+	  || resplit_load)
 	return 0;
 
       first_bit = MIN (lr_bitpos, rr_bitpos);
@@ -6555,20 +6659,88 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
   if (lnbitpos < 0)
     return 0;
 
-  /* Construct the expression we will return.  First get the component
-     reference we will make.  Unless the mask is all ones the width of
-     that field, perform the mask operation.  Then compare with the
-     merged constant.  */
-  result = make_bit_field_ref (loc, ll_inner, ll_arg,
-			       lntype, lnbitsize, lnbitpos,
-			       ll_unsignedp || rl_unsignedp, ll_reversep);
+  if (resplit_load)
+    {
+      HOST_WIDE_INT align = TYPE_ALIGN (TREE_TYPE (ll_inner));
+      HOST_WIDE_INT lnlbitsize = GET_MODE_BITSIZE (lnmode);
+      HOST_WIDE_INT lnrbitsize = GET_MODE_BITSIZE (lnmode2);
+      HOST_WIDE_INT midpoint = lnlbitsize;
+      HOST_WIDE_INT lnrbitpos = lnbitpos + midpoint;
+      HOST_WIDE_INT lnlbitpos = lnrbitpos - lnlbitsize;
+      gcc_checking_assert ((lnrbitpos & (align - 1)) == 0);
+      tree lnltype = lang_hooks.types.type_for_size (lnlbitsize, 1);
+      tree lnrtype = lang_hooks.types.type_for_size (lnrbitsize, 1);
+      tree lll_arg = make_bit_field_ref (loc, ll_inner, ll_arg,
+					 lnltype, lnlbitsize, lnlbitpos,
+					 ll_unsignedp || rl_unsignedp,
+					 ll_reversep);
+      tree llr_arg = make_bit_field_ref (loc, ll_inner, ll_arg,
+					 lnrtype, lnrbitsize, lnrbitpos,
+					 ll_unsignedp || rl_unsignedp,
+					 ll_reversep);
+
+      HOST_WIDE_INT lnlcbitpos;
+      HOST_WIDE_INT lnrcbitpos;
+      /* The bit field ref constant folding below already takes care
+	 of default target endianness.  */
+      if (!ll_reversep)
+	{
+	  lnlcbitpos = 0;
+	  lnrcbitpos = lnlbitsize;
+	}
+      else
+	{
+	  lnlcbitpos = lnrbitsize;
+	  lnrcbitpos = 0;
+	}
+
+      tree mask = const_binop (BIT_IOR_EXPR, ll_mask, rl_mask);
+      tree lll_mask = fold_build3 (BIT_FIELD_REF, lnltype, mask,
+				   bitsize_int (lnlbitsize),
+				   bitsize_int (lnlcbitpos));
+      if (! integer_all_onesp (lll_mask))
+	lll_arg = fold_build2_loc (loc, BIT_AND_EXPR, lnltype,
+				   lll_arg, lll_mask);
+      tree llr_mask = fold_build3 (BIT_FIELD_REF, lnrtype, mask,
+				   bitsize_int (lnrbitsize),
+				   bitsize_int (lnrcbitpos));
+      if (! integer_all_onesp (llr_mask))
+	llr_arg = fold_build2_loc (loc, BIT_AND_EXPR, lnrtype,
+				   llr_arg, llr_mask);
+      tree lnr_const = const_binop (BIT_IOR_EXPR, l_const, r_const);
+      tree lll_const = fold_build3 (BIT_FIELD_REF, lnltype, lnr_const,
+				    bitsize_int (lnlbitsize),
+				    bitsize_int (lnlcbitpos));
+      tree lll_result = fold_build2_loc (loc, wanted_code, truth_type,
+					 lll_arg, lll_const);
+      tree llr_const = fold_build3 (BIT_FIELD_REF, lnrtype, lnr_const,
+				    bitsize_int (lnrbitsize),
+				    bitsize_int (lnrcbitpos));
+      tree llr_result = fold_build2_loc (loc, wanted_code, truth_type,
+					 llr_arg, llr_const);
+      result = build2_loc (loc, orig_code, truth_type,
+			   lll_result, llr_result);
+    }
+  else
+    {
+
+      /* Construct the expression we will return.  First get the component
+	 reference we will make.  Unless the mask is all ones the width of
+	 that field, perform the mask operation.  Then compare with the
+	 merged constant.  */
+      result = make_bit_field_ref (loc, ll_inner, ll_arg,
+				   lntype, lnbitsize, lnbitpos,
+				   ll_unsignedp || rl_unsignedp, ll_reversep);
 
-  ll_mask = const_binop (BIT_IOR_EXPR, ll_mask, rl_mask);
-  if (! all_ones_mask_p (ll_mask, lnbitsize))
-    result = build2_loc (loc, BIT_AND_EXPR, lntype, result, ll_mask);
+      ll_mask = const_binop (BIT_IOR_EXPR, ll_mask, rl_mask);
+      if (! integer_all_onesp (ll_mask))
+	result = build2_loc (loc, BIT_AND_EXPR, lntype, result, ll_mask);
+
+      result = build2_loc (loc, wanted_code, truth_type, result,
+			   const_binop (BIT_IOR_EXPR, l_const, r_const));
+    }
 
-  return build2_loc (loc, wanted_code, truth_type, result,
-		     const_binop (BIT_IOR_EXPR, l_const, r_const));
+  return result;
 }
 \f
 /* T is an integer expression that is being multiplied, divided, or taken a
@@ -9166,14 +9338,6 @@ fold_truth_andor (location_t loc, enum tree_code code, tree type,
 	return fold_build2_loc (loc, code, type, arg0, tem);
     }
 
-  /* Check for the possibility of merging component references.  If our
-     lhs is another similar operation, try to merge its rhs with our
-     rhs.  Then try to merge our lhs and rhs.  */
-  if (TREE_CODE (arg0) == code
-      && (tem = fold_truth_andor_1 (loc, code, type,
-				    TREE_OPERAND (arg0, 1), arg1)) != 0)
-    return fold_build2_loc (loc, code, type, TREE_OPERAND (arg0, 0), tem);
-
   if ((tem = fold_truth_andor_1 (loc, code, type, arg0, arg1)) != 0)
     return tem;
 
diff --git a/gcc/testsuite/gcc.dg/field-merge-1.c b/gcc/testsuite/gcc.dg/field-merge-1.c
new file mode 100644
index 00000000000..41d173f445c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/field-merge-1.c
@@ -0,0 +1,50 @@
+/* { dg-do run } */
+/* { dg-options "-O -save-temps -fdump-tree-optimized" } */
+
+/* Check that field loads compared with constants are merged, even if
+   tested out of order, and when fields straddle across alignment
+   boundaries.  */
+
+struct TL {
+  unsigned char p;
+  unsigned int a;
+  unsigned char q;
+  unsigned int b;
+  unsigned char r;
+  unsigned int c;
+  unsigned char s;
+} __attribute__ ((packed, aligned (4), scalar_storage_order ("little-endian")));
+
+struct TB {
+  unsigned char p;
+  unsigned int a;
+  unsigned char q;
+  unsigned int b;
+  unsigned char r;
+  unsigned int c;
+  unsigned char s;
+} __attribute__ ((packed, aligned (4), scalar_storage_order ("big-endian")));
+
+#define vc 0xaa
+#define vi 0x12345678
+
+struct TL vL = { vc, vi, vc, vi, vc, vi, vc };
+struct TB vB = { vc, vi, vc, vi, vc, vi, vc };
+
+void f (void) {
+  if (vL.a != vi || vL.b != vi || vL.c != vi
+      || vB.a != vi || vB.b != vi || vB.c != vi
+      || vL.p != vc || vL.q != vc || vL.r != vc || vL.s != vc
+      || vB.p != vc || vB.q != vc || vB.r != vc || vB.s != vc)
+    __builtin_abort ();
+}
+
+int main () {
+  f ();
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "BIT_FIELD_REF" 8 "optimized" } } */
+/* { dg-final { scan-assembler-not "cmpb" { target { i*86-*-* || x86_64-*-* } } } } */
+/* { dg-final { scan-assembler-times "cmpl" 8 { target { i*86-*-* || x86_64-*-* } } } } */
+/* { dg-final { scan-assembler-times "cmpw" 8 { target { powerpc*-*-* || rs6000-*-* } } } } */


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [gcc(refs/users/aoliva/heads/testme)] improvements for fold_truth_andor_1
@ 2020-09-17  2:52 Alexandre Oliva
  0 siblings, 0 replies; 8+ messages in thread
From: Alexandre Oliva @ 2020-09-17  2:52 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:8e6137d4c9d9cba7dc7ea5ec2cfada53f40f6b6b

commit 8e6137d4c9d9cba7dc7ea5ec2cfada53f40f6b6b
Author: Alexandre Oliva <oliva@adacore.com>
Date:   Tue Sep 15 13:32:07 2020 -0300

    improvements for fold_truth_andor_1
    
    Enable combining with the rightmost non-and/or, however deeply-nested.
    
    Handle shift-and-mask.
    
    Handle fields that cross alignment boundaries, when either part can be
    combined.

Diff:
---
 gcc/config/rs6000/t-rs6000           |   4 +
 gcc/fold-const.c                     | 214 +++++++++++++++++++++++++++++++----
 gcc/testsuite/gcc.dg/field-merge-1.c |  48 ++++++++
 3 files changed, 241 insertions(+), 25 deletions(-)

diff --git a/gcc/config/rs6000/t-rs6000 b/gcc/config/rs6000/t-rs6000
index 1ddb5729cb2..516486df9a4 100644
--- a/gcc/config/rs6000/t-rs6000
+++ b/gcc/config/rs6000/t-rs6000
@@ -52,6 +52,10 @@ $(srcdir)/config/rs6000/rs6000-tables.opt: $(srcdir)/config/rs6000/genopt.sh \
 	$(SHELL) $(srcdir)/config/rs6000/genopt.sh $(srcdir)/config/rs6000 > \
 		$(srcdir)/config/rs6000/rs6000-tables.opt
 
+# FRAME_GROWS_DOWNWARD tests flag_sanitize in a way that rules out a
+# test in toplev.c.
+toplev.o-warn = -Wno-error
+
 # The rs6000 backend doesn't cause warnings in these files.
 insn-conditions.o-warn =
 
diff --git a/gcc/fold-const.c b/gcc/fold-const.c
index 0cc80adf632..b163950f2c3 100644
--- a/gcc/fold-const.c
+++ b/gcc/fold-const.c
@@ -4640,6 +4640,7 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
   tree mask, inner, offset;
   tree unsigned_type;
   unsigned int precision;
+  HOST_WIDE_INT shiftrt = 0;
 
   /* All the optimizations using this function assume integer fields.
      There are problems with FP fields since the type_for_size call
@@ -4664,13 +4665,28 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
 	return NULL_TREE;
     }
 
+  if (TREE_CODE (exp) == RSHIFT_EXPR
+      && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST
+      && tree_fits_shwi_p (TREE_OPERAND (exp, 1)))
+    {
+      shiftrt = tree_to_shwi (TREE_OPERAND (exp, 1));
+      if (shiftrt > 0)
+	exp = TREE_OPERAND (exp, 0);
+      else
+	shiftrt = 0;
+    }
+
+  if (TREE_CODE (exp) == NOP_EXPR)
+    exp = TREE_OPERAND (exp, 0);
+
   poly_int64 poly_bitsize, poly_bitpos;
   inner = get_inner_reference (exp, &poly_bitsize, &poly_bitpos, &offset,
 			       pmode, punsignedp, preversep, pvolatilep);
+
   if ((inner == exp && and_mask == 0)
       || !poly_bitsize.is_constant (pbitsize)
       || !poly_bitpos.is_constant (pbitpos)
-      || *pbitsize < 0
+      || *pbitsize < shiftrt
       || offset != 0
       || TREE_CODE (inner) == PLACEHOLDER_EXPR
       /* Reject out-of-bound accesses (PR79731).  */
@@ -4679,6 +4695,12 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
 			       *pbitpos + *pbitsize) < 0))
     return NULL_TREE;
 
+  if (shiftrt)
+    {
+      *pbitpos += shiftrt;
+      *pbitsize -= shiftrt;
+    }
+
   unsigned_type = lang_hooks.types.type_for_size (*pbitsize, 1);
   if (unsigned_type == NULL_TREE)
     return NULL_TREE;
@@ -6157,6 +6179,7 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
      convert EQ_EXPR to NE_EXPR so we need not reject the "wrong"
      comparison for one-bit fields.  */
 
+  enum tree_code orig_code = code;
   enum tree_code wanted_code;
   enum tree_code lcode, rcode;
   tree ll_arg, lr_arg, rl_arg, rr_arg;
@@ -6168,13 +6191,14 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
   int ll_unsignedp, lr_unsignedp, rl_unsignedp, rr_unsignedp;
   int ll_reversep, lr_reversep, rl_reversep, rr_reversep;
   machine_mode ll_mode, lr_mode, rl_mode, rr_mode;
-  scalar_int_mode lnmode, rnmode;
+  scalar_int_mode lnmode, lnmode2, rnmode;
   tree ll_mask, lr_mask, rl_mask, rr_mask;
   tree ll_and_mask, lr_and_mask, rl_and_mask, rr_and_mask;
   tree l_const, r_const;
   tree lntype, rntype, result;
   HOST_WIDE_INT first_bit, end_bit;
   int volatilep;
+  bool resplit_load;
 
   /* Start by getting the comparison codes.  Fail if anything is volatile.
      If one operand is a BIT_AND_EXPR with the constant one, treat it as if
@@ -6202,7 +6226,48 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
 
   if (TREE_CODE_CLASS (lcode) != tcc_comparison
       || TREE_CODE_CLASS (rcode) != tcc_comparison)
-    return 0;
+    {
+      /* Check for the possibility of merging component references.
+	 If any of our operands is another similar operation, recurse
+	 to try to merge individual operands, but avoiding double
+	 recursion: recurse to each leaf of lhs, and from there to
+	 each leaf of rhs, but don't bother recursing into lhs if rhs
+	 is neither a comparison nor a compound expr, nor into rhs if
+	 the lhs leaf isn't a comparison.  In case of no successful
+	 merging, recursion depth is limited to the sum of the depths
+	 of lhs and rhs, and the non-recursing code below will run no
+	 more times than the product of the leaf counts of lhs and
+	 rhs.  If there is a successful merge, we (recursively)
+	 further attempt to fold the result, so recursion depth and
+	 merge attempts are harder to compute.  */
+      if (TREE_CODE (lhs) == code && TREE_TYPE (lhs) == truth_type
+	  && (TREE_CODE_CLASS (rcode) == tcc_comparison
+	      || (TREE_CODE (rhs) == code && TREE_TYPE (rhs) == truth_type)))
+	{
+	  if ((result = fold_truth_andor_1 (loc, code, truth_type,
+					 TREE_OPERAND (lhs, 1), rhs)) != 0)
+	    return fold_build2_loc (loc, code, truth_type,
+				    TREE_OPERAND (lhs, 0), result);
+	  if ((result = fold_truth_andor_1 (loc, code, truth_type,
+					 TREE_OPERAND (lhs, 0), rhs)) != 0)
+	    return fold_build2_loc (loc, code, truth_type,
+				    result, TREE_OPERAND (lhs, 1));
+	}
+      else if (TREE_CODE_CLASS (lcode) == tcc_comparison
+	       && TREE_CODE (rhs) == code && TREE_TYPE (rhs) == truth_type)
+	{
+	  if ((result = fold_truth_andor_1 (loc, code, truth_type, lhs,
+					 TREE_OPERAND (rhs, 0))) != 0)
+	    return fold_build2_loc (loc, code, truth_type,
+				    result, TREE_OPERAND (rhs, 1));
+	  if ((result = fold_truth_andor_1 (loc, code, truth_type, lhs,
+					 TREE_OPERAND (rhs, 1))) != 0)
+	    return fold_build2_loc (loc, code, truth_type,
+				    result, TREE_OPERAND (rhs, 0));
+	}
+
+      return 0;
+    }
 
   ll_arg = TREE_OPERAND (lhs, 0);
   lr_arg = TREE_OPERAND (lhs, 1);
@@ -6357,10 +6422,48 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
   if (!get_best_mode (end_bit - first_bit, first_bit, 0, 0,
 		      TYPE_ALIGN (TREE_TYPE (ll_inner)), BITS_PER_WORD,
 		      volatilep, &lnmode))
-    return 0;
+    {
+      /* Consider the possibility of recombining loads if any of the
+	 fields straddles across an alignment boundary, so that either
+	 part can be loaded along with the other field.  */
+      HOST_WIDE_INT align = TYPE_ALIGN (TREE_TYPE (ll_inner));
+      HOST_WIDE_INT amask = ~(align - 1);
+
+      HOST_WIDE_INT boundary = (end_bit - 1) & amask;
+      /* Make sure we're only crossing one alignment boundary.
+
+	 ??? This won't recombine loads of two adjacent fields that
+	 each crosses a different alignment boundary, so as to load
+	 the middle word only once.  */
+      if (boundary - first_bit > align)
+	return 0;
+
+      HOST_WIDE_INT ll_start_word = ll_bitpos & amask;
+      HOST_WIDE_INT ll_end_word = (ll_bitpos + ll_bitsize - 1) & amask;
+
+      HOST_WIDE_INT rl_start_word = rl_bitpos & amask;
+      HOST_WIDE_INT rl_end_word = (rl_bitpos + rl_bitsize - 1) & amask;
+
+      /* If neither field straddles across an alignment boundary,
+	 we've nothing further ado.  */
+      if (ll_start_word == ll_end_word && rl_start_word == rl_end_word)
+	return 0;
+
+      if (!get_best_mode (boundary - first_bit, first_bit, 0, 0,
+			  align, BITS_PER_WORD, volatilep, &lnmode)
+	  || !get_best_mode (end_bit - boundary, boundary, 0, 0,
+			     align, BITS_PER_WORD, volatilep, &lnmode2))
+	return 0;
+
+      resplit_load = true;
+    }
+  else
+    resplit_load = false;
 
   lnbitsize = GET_MODE_BITSIZE (lnmode);
   lnbitpos = first_bit & ~ (lnbitsize - 1);
+  if (resplit_load)
+    lnbitsize += GET_MODE_BITSIZE (lnmode2);
   lntype = lang_hooks.types.type_for_size (lnbitsize, 1);
   xll_bitpos = ll_bitpos - lnbitpos, xrl_bitpos = rl_bitpos - lnbitpos;
 
@@ -6414,7 +6517,8 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
 	  || ll_reversep != lr_reversep
 	  /* Make sure the two fields on the right
 	     correspond to the left without being swapped.  */
-	  || ll_bitpos - rl_bitpos != lr_bitpos - rr_bitpos)
+	  || ll_bitpos - rl_bitpos != lr_bitpos - rr_bitpos
+	  || resplit_load)
 	return 0;
 
       first_bit = MIN (lr_bitpos, rr_bitpos);
@@ -6555,20 +6659,88 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
   if (lnbitpos < 0)
     return 0;
 
-  /* Construct the expression we will return.  First get the component
-     reference we will make.  Unless the mask is all ones the width of
-     that field, perform the mask operation.  Then compare with the
-     merged constant.  */
-  result = make_bit_field_ref (loc, ll_inner, ll_arg,
-			       lntype, lnbitsize, lnbitpos,
-			       ll_unsignedp || rl_unsignedp, ll_reversep);
+  if (resplit_load)
+    {
+      HOST_WIDE_INT align = TYPE_ALIGN (TREE_TYPE (ll_inner));
+      HOST_WIDE_INT lnlbitsize = GET_MODE_BITSIZE (lnmode);
+      HOST_WIDE_INT lnrbitsize = GET_MODE_BITSIZE (lnmode2);
+      HOST_WIDE_INT midpoint = lnlbitsize;
+      HOST_WIDE_INT lnrbitpos = lnbitpos + midpoint;
+      HOST_WIDE_INT lnlbitpos = lnrbitpos - lnlbitsize;
+      gcc_checking_assert ((lnrbitpos & (align - 1)) == 0);
+      tree lnltype = lang_hooks.types.type_for_size (lnlbitsize, 1);
+      tree lnrtype = lang_hooks.types.type_for_size (lnrbitsize, 1);
+      tree lll_arg = make_bit_field_ref (loc, ll_inner, ll_arg,
+					 lnltype, lnlbitsize, lnlbitpos,
+					 ll_unsignedp || rl_unsignedp,
+					 ll_reversep);
+      tree llr_arg = make_bit_field_ref (loc, ll_inner, ll_arg,
+					 lnrtype, lnrbitsize, lnrbitpos,
+					 ll_unsignedp || rl_unsignedp,
+					 ll_reversep);
+
+      HOST_WIDE_INT lnlcbitpos;
+      HOST_WIDE_INT lnrcbitpos;
+      /* The bit field ref constant folding below already takes care
+	 of default target endianness.  */
+      if (!ll_reversep)
+	{
+	  lnlcbitpos = 0;
+	  lnrcbitpos = lnlbitsize;
+	}
+      else
+	{
+	  lnlcbitpos = lnrbitsize;
+	  lnrcbitpos = 0;
+	}
+
+      tree mask = const_binop (BIT_IOR_EXPR, ll_mask, rl_mask);
+      tree lll_mask = fold_build3 (BIT_FIELD_REF, lnltype, mask,
+				   bitsize_int (lnlbitsize),
+				   bitsize_int (lnlcbitpos));
+      if (! integer_all_onesp (lll_mask))
+	lll_arg = fold_build2_loc (loc, BIT_AND_EXPR, lnltype,
+				   lll_arg, lll_mask);
+      tree llr_mask = fold_build3 (BIT_FIELD_REF, lnrtype, mask,
+				   bitsize_int (lnrbitsize),
+				   bitsize_int (lnrcbitpos));
+      if (! integer_all_onesp (llr_mask))
+	llr_arg = fold_build2_loc (loc, BIT_AND_EXPR, lnrtype,
+				   llr_arg, llr_mask);
+      tree lnr_const = const_binop (BIT_IOR_EXPR, l_const, r_const);
+      tree lll_const = fold_build3 (BIT_FIELD_REF, lnltype, lnr_const,
+				    bitsize_int (lnlbitsize),
+				    bitsize_int (lnlcbitpos));
+      tree lll_result = fold_build2_loc (loc, wanted_code, truth_type,
+					 lll_arg, lll_const);
+      tree llr_const = fold_build3 (BIT_FIELD_REF, lnrtype, lnr_const,
+				    bitsize_int (lnrbitsize),
+				    bitsize_int (lnrcbitpos));
+      tree llr_result = fold_build2_loc (loc, wanted_code, truth_type,
+					 llr_arg, llr_const);
+      result = build2_loc (loc, orig_code, truth_type,
+			   lll_result, llr_result);
+    }
+  else
+    {
+
+      /* Construct the expression we will return.  First get the component
+	 reference we will make.  Unless the mask is all ones the width of
+	 that field, perform the mask operation.  Then compare with the
+	 merged constant.  */
+      result = make_bit_field_ref (loc, ll_inner, ll_arg,
+				   lntype, lnbitsize, lnbitpos,
+				   ll_unsignedp || rl_unsignedp, ll_reversep);
 
-  ll_mask = const_binop (BIT_IOR_EXPR, ll_mask, rl_mask);
-  if (! all_ones_mask_p (ll_mask, lnbitsize))
-    result = build2_loc (loc, BIT_AND_EXPR, lntype, result, ll_mask);
+      ll_mask = const_binop (BIT_IOR_EXPR, ll_mask, rl_mask);
+      if (! integer_all_onesp (ll_mask))
+	result = build2_loc (loc, BIT_AND_EXPR, lntype, result, ll_mask);
+
+      result = build2_loc (loc, wanted_code, truth_type, result,
+			   const_binop (BIT_IOR_EXPR, l_const, r_const));
+    }
 
-  return build2_loc (loc, wanted_code, truth_type, result,
-		     const_binop (BIT_IOR_EXPR, l_const, r_const));
+  return result;
 }
 \f
 /* T is an integer expression that is being multiplied, divided, or taken a
@@ -9166,14 +9338,6 @@ fold_truth_andor (location_t loc, enum tree_code code, tree type,
 	return fold_build2_loc (loc, code, type, arg0, tem);
     }
 
-  /* Check for the possibility of merging component references.  If our
-     lhs is another similar operation, try to merge its rhs with our
-     rhs.  Then try to merge our lhs and rhs.  */
-  if (TREE_CODE (arg0) == code
-      && (tem = fold_truth_andor_1 (loc, code, type,
-				    TREE_OPERAND (arg0, 1), arg1)) != 0)
-    return fold_build2_loc (loc, code, type, TREE_OPERAND (arg0, 0), tem);
-
   if ((tem = fold_truth_andor_1 (loc, code, type, arg0, arg1)) != 0)
     return tem;
 
diff --git a/gcc/testsuite/gcc.dg/field-merge-1.c b/gcc/testsuite/gcc.dg/field-merge-1.c
new file mode 100644
index 00000000000..491ec2936f1
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/field-merge-1.c
@@ -0,0 +1,48 @@
+/* { dg-do run } */
+/* { dg-options "-O -save-temps" } */
+
+/* Check that field loads compared with constants are merged, even if
+   tested out of order, and when fields straddle across alignment
+   boundaries.  */
+
+struct TL {
+  unsigned char p;
+  unsigned int a;
+  unsigned char q;
+  unsigned int b;
+  unsigned char r;
+  unsigned int c;
+  unsigned char s;
+} __attribute__ ((packed, aligned (4), scalar_storage_order ("little-endian")));
+
+struct TB {
+  unsigned char p;
+  unsigned int a;
+  unsigned char q;
+  unsigned int b;
+  unsigned char r;
+  unsigned int c;
+  unsigned char s;
+} __attribute__ ((packed, aligned (4), scalar_storage_order ("big-endian")));
+
+#define vc 0xaa
+#define vi 0x12345678
+
+struct TL vL = { vc, vi, vc, vi, vc, vi, vc };
+struct TB vB = { vc, vi, vc, vi, vc, vi, vc };
+
+void f (void) {
+  if (vL.a != vi || vL.b != vi || vL.c != vi
+      || vB.a != vi || vB.b != vi || vB.c != vi
+      || vL.p != vc || vL.q != vc || vL.r != vc || vL.s != vc
+      || vB.p != vc || vB.q != vc || vB.r != vc || vB.s != vc)
+    __builtin_abort ();
+}
+
+int main () {
+  f ();
+  return 0;
+}
+
+/* { dg-final { scan-assembler-not "cmpb" { target { i*86-*-* || x86_64-*-* } } } } */
+/* { dg-final { scan-assembler-times "cmpl" 8 { target { i*86-*-* || x86_64-*-* } } } } */


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [gcc(refs/users/aoliva/heads/testme)] improvements for fold_truth_andor_1
@ 2020-09-16 21:38 Alexandre Oliva
  0 siblings, 0 replies; 8+ messages in thread
From: Alexandre Oliva @ 2020-09-16 21:38 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:012db34a07d657fa9da81c116736396b2bc96b74

commit 012db34a07d657fa9da81c116736396b2bc96b74
Author: Alexandre Oliva <oliva@adacore.com>
Date:   Tue Sep 15 13:32:07 2020 -0300

    improvements for fold_truth_andor_1
    
    Enable combining with the rightmost non-and/or, however deeply-nested.
    
    Handle shift-and-mask.
    
    Handle fields that cross alignment boundaries, when either part can be
    combined.

Diff:
---
 gcc/config/rs6000/t-rs6000 |   4 +
 gcc/fold-const.c           | 203 +++++++++++++++++++++++++++++++++++++++------
 2 files changed, 182 insertions(+), 25 deletions(-)

diff --git a/gcc/config/rs6000/t-rs6000 b/gcc/config/rs6000/t-rs6000
index 1ddb5729cb2..516486df9a4 100644
--- a/gcc/config/rs6000/t-rs6000
+++ b/gcc/config/rs6000/t-rs6000
@@ -52,6 +52,10 @@ $(srcdir)/config/rs6000/rs6000-tables.opt: $(srcdir)/config/rs6000/genopt.sh \
 	$(SHELL) $(srcdir)/config/rs6000/genopt.sh $(srcdir)/config/rs6000 > \
 		$(srcdir)/config/rs6000/rs6000-tables.opt
 
+# FRAME_GROWS_DOWNWARD tests flag_sanitize in a way that rules out a
+# test in toplev.c.
+toplev.o-warn = -Wno-error
+
 # The rs6000 backend doesn't cause warnings in these files.
 insn-conditions.o-warn =
 
diff --git a/gcc/fold-const.c b/gcc/fold-const.c
index 0cc80adf632..e91e7778999 100644
--- a/gcc/fold-const.c
+++ b/gcc/fold-const.c
@@ -4640,6 +4640,7 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
   tree mask, inner, offset;
   tree unsigned_type;
   unsigned int precision;
+  HOST_WIDE_INT shiftrt = 0;
 
   /* All the optimizations using this function assume integer fields.
      There are problems with FP fields since the type_for_size call
@@ -4664,13 +4665,28 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
 	return NULL_TREE;
     }
 
+  if (TREE_CODE (exp) == RSHIFT_EXPR
+      && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST
+      && tree_fits_shwi_p (TREE_OPERAND (exp, 1)))
+    {
+      shiftrt = tree_to_shwi (TREE_OPERAND (exp, 1));
+      if (shiftrt > 0)
+	exp = TREE_OPERAND (exp, 0);
+      else
+	shiftrt = 0;
+    }
+
+  if (TREE_CODE (exp) == NOP_EXPR)
+    exp = TREE_OPERAND (exp, 0);
+
   poly_int64 poly_bitsize, poly_bitpos;
   inner = get_inner_reference (exp, &poly_bitsize, &poly_bitpos, &offset,
 			       pmode, punsignedp, preversep, pvolatilep);
+
   if ((inner == exp && and_mask == 0)
       || !poly_bitsize.is_constant (pbitsize)
       || !poly_bitpos.is_constant (pbitpos)
-      || *pbitsize < 0
+      || *pbitsize < shiftrt
       || offset != 0
       || TREE_CODE (inner) == PLACEHOLDER_EXPR
       /* Reject out-of-bound accesses (PR79731).  */
@@ -4679,6 +4695,12 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
 			       *pbitpos + *pbitsize) < 0))
     return NULL_TREE;
 
+  if (shiftrt)
+    {
+      *pbitpos += shiftrt;
+      *pbitsize -= shiftrt;
+    }
+
   unsigned_type = lang_hooks.types.type_for_size (*pbitsize, 1);
   if (unsigned_type == NULL_TREE)
     return NULL_TREE;
@@ -6157,6 +6179,7 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
      convert EQ_EXPR to NE_EXPR so we need not reject the "wrong"
      comparison for one-bit fields.  */
 
+  enum tree_code orig_code = code;
   enum tree_code wanted_code;
   enum tree_code lcode, rcode;
   tree ll_arg, lr_arg, rl_arg, rr_arg;
@@ -6168,13 +6191,14 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
   int ll_unsignedp, lr_unsignedp, rl_unsignedp, rr_unsignedp;
   int ll_reversep, lr_reversep, rl_reversep, rr_reversep;
   machine_mode ll_mode, lr_mode, rl_mode, rr_mode;
-  scalar_int_mode lnmode, rnmode;
+  scalar_int_mode lnmode, lnmode2, rnmode;
   tree ll_mask, lr_mask, rl_mask, rr_mask;
   tree ll_and_mask, lr_and_mask, rl_and_mask, rr_and_mask;
   tree l_const, r_const;
   tree lntype, rntype, result;
   HOST_WIDE_INT first_bit, end_bit;
   int volatilep;
+  bool resplit_load;
 
   /* Start by getting the comparison codes.  Fail if anything is volatile.
      If one operand is a BIT_AND_EXPR with the constant one, treat it as if
@@ -6202,7 +6226,48 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
 
   if (TREE_CODE_CLASS (lcode) != tcc_comparison
       || TREE_CODE_CLASS (rcode) != tcc_comparison)
-    return 0;
+    {
+      /* Check for the possibility of merging component references.
+	 If any of our operands is another similar operation, recurse
+	 to try to merge individual operands, but avoiding double
+	 recursion: recurse to each leaf of lhs, and from there to
+	 each leaf of rhs, but don't bother recursing into lhs if rhs
+	 is neither a comparison nor a compound expr, nor into rhs if
+	 the lhs leaf isn't a comparison.  In case of no successful
+	 merging, recursion depth is limited to the sum of the depths
+	 of lhs and rhs, and the non-recursing code below will run no
+	 more times than the product of the leaf counts of lhs and
+	 rhs.  If there is a successful merge, we (recursively)
+	 further attempt to fold the result, so recursion depth and
+	 merge attempts are harder to compute.  */
+      if (TREE_CODE (lhs) == code && TREE_TYPE (lhs) == truth_type
+	  && (TREE_CODE_CLASS (rcode) == tcc_comparison
+	      || (TREE_CODE (rhs) == code && TREE_TYPE (rhs) == truth_type)))
+	{
+	  if ((result = fold_truth_andor_1 (loc, code, truth_type,
+					 TREE_OPERAND (lhs, 1), rhs)) != 0)
+	    return fold_build2_loc (loc, code, truth_type,
+				    TREE_OPERAND (lhs, 0), result);
+	  if ((result = fold_truth_andor_1 (loc, code, truth_type,
+					 TREE_OPERAND (lhs, 0), rhs)) != 0)
+	    return fold_build2_loc (loc, code, truth_type,
+				    result, TREE_OPERAND (lhs, 1));
+	}
+      else if (TREE_CODE_CLASS (lcode) == tcc_comparison
+	       && TREE_CODE (rhs) == code && TREE_TYPE (rhs) == truth_type)
+	{
+	  if ((result = fold_truth_andor_1 (loc, code, truth_type, lhs,
+					 TREE_OPERAND (rhs, 0))) != 0)
+	    return fold_build2_loc (loc, code, truth_type,
+				    result, TREE_OPERAND (rhs, 1));
+	  if ((result = fold_truth_andor_1 (loc, code, truth_type, lhs,
+					 TREE_OPERAND (rhs, 1))) != 0)
+	    return fold_build2_loc (loc, code, truth_type,
+				    result, TREE_OPERAND (rhs, 0));
+	}
+
+      return 0;
+    }
 
   ll_arg = TREE_OPERAND (lhs, 0);
   lr_arg = TREE_OPERAND (lhs, 1);
@@ -6357,10 +6422,48 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
   if (!get_best_mode (end_bit - first_bit, first_bit, 0, 0,
 		      TYPE_ALIGN (TREE_TYPE (ll_inner)), BITS_PER_WORD,
 		      volatilep, &lnmode))
-    return 0;
+    {
+      /* Consider the possibility of recombining loads if any of the
+	 fields straddles across an alignment boundary, so that either
+	 part can be loaded along with the other field.  */
+      HOST_WIDE_INT align = TYPE_ALIGN (TREE_TYPE (ll_inner));
+      HOST_WIDE_INT amask = ~(align - 1);
+
+      HOST_WIDE_INT boundary = (end_bit - 1) & amask;
+      /* Make sure we're only crossing one alignment boundary.
+
+	 ??? This won't recombine loads of two adjacent fields that
+	 each crosses a different alignment boundary, so as to load
+	 the middle word only once.  */
+      if (boundary - first_bit > align)
+	return 0;
+
+      HOST_WIDE_INT ll_start_word = ll_bitpos & amask;
+      HOST_WIDE_INT ll_end_word = (ll_bitpos + ll_bitsize - 1) & amask;
+
+      HOST_WIDE_INT rl_start_word = rl_bitpos & amask;
+      HOST_WIDE_INT rl_end_word = (rl_bitpos + rl_bitsize - 1) & amask;
+
+      /* If neither field straddles across an alignment boundary,
+	 we've nothing further ado.  */
+      if (ll_start_word == ll_end_word && rl_start_word == rl_end_word)
+	return 0;
+
+      if (!get_best_mode (boundary - first_bit, first_bit, 0, 0,
+			  align, BITS_PER_WORD, volatilep, &lnmode)
+	  || !get_best_mode (end_bit - boundary, boundary, 0, 0,
+			     align, BITS_PER_WORD, volatilep, &lnmode2))
+	return 0;
+
+      resplit_load = true;
+    }
+  else
+    resplit_load = false;
 
   lnbitsize = GET_MODE_BITSIZE (lnmode);
   lnbitpos = first_bit & ~ (lnbitsize - 1);
+  if (resplit_load)
+    lnbitsize += GET_MODE_BITSIZE (lnmode2);
   lntype = lang_hooks.types.type_for_size (lnbitsize, 1);
   xll_bitpos = ll_bitpos - lnbitpos, xrl_bitpos = rl_bitpos - lnbitpos;
 
@@ -6414,7 +6517,8 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
 	  || ll_reversep != lr_reversep
 	  /* Make sure the two fields on the right
 	     correspond to the left without being swapped.  */
-	  || ll_bitpos - rl_bitpos != lr_bitpos - rr_bitpos)
+	  || ll_bitpos - rl_bitpos != lr_bitpos - rr_bitpos
+	  || resplit_load)
 	return 0;
 
       first_bit = MIN (lr_bitpos, rr_bitpos);
@@ -6555,20 +6659,77 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
   if (lnbitpos < 0)
     return 0;
 
-  /* Construct the expression we will return.  First get the component
-     reference we will make.  Unless the mask is all ones the width of
-     that field, perform the mask operation.  Then compare with the
-     merged constant.  */
-  result = make_bit_field_ref (loc, ll_inner, ll_arg,
-			       lntype, lnbitsize, lnbitpos,
-			       ll_unsignedp || rl_unsignedp, ll_reversep);
+  if (resplit_load)
+    {
+      HOST_WIDE_INT align = TYPE_ALIGN (TREE_TYPE (ll_inner));
+      HOST_WIDE_INT lnlbitsize = GET_MODE_BITSIZE (lnmode);
+      HOST_WIDE_INT lnrbitsize = GET_MODE_BITSIZE (lnmode2);
+      HOST_WIDE_INT midpoint = lnlbitsize;
+      HOST_WIDE_INT lnrbitpos = lnbitpos + midpoint;
+      HOST_WIDE_INT lnlbitpos = lnrbitpos - lnlbitsize;
+      gcc_checking_assert ((lnrbitpos & (align - 1)) == 0);
+      tree lnltype = lang_hooks.types.type_for_size (lnlbitsize, 1);
+      tree lnrtype = lang_hooks.types.type_for_size (lnrbitsize, 1);
+      tree lll_arg = make_bit_field_ref (loc, ll_inner, ll_arg,
+					 lnltype, lnlbitsize, lnlbitpos,
+					 ll_unsignedp || rl_unsignedp,
+					 ll_reversep);
+      tree llr_arg = make_bit_field_ref (loc, ll_inner, ll_arg,
+					 lnrtype, lnrbitsize, lnrbitpos,
+					 ll_unsignedp || rl_unsignedp,
+					 ll_reversep);
+
+      /* ??? Handle ll_reversep and endianness below?  */
+      HOST_WIDE_INT lnlcbitpos = 0;
+      HOST_WIDE_INT lnrcbitpos = lnlbitsize;
+
+      tree mask = const_binop (BIT_IOR_EXPR, ll_mask, rl_mask);
+      tree lll_mask = fold_build3 (BIT_FIELD_REF, lnltype, mask,
+				   bitsize_int (lnlbitsize),
+				   bitsize_int (lnlcbitpos));
+      if (! integer_all_onesp (lll_mask))
+	lll_arg = fold_build2_loc (loc, BIT_AND_EXPR, lnltype,
+				   lll_arg, lll_mask);
+      tree llr_mask = fold_build3 (BIT_FIELD_REF, lnrtype, mask,
+				   bitsize_int (lnrbitsize),
+				   bitsize_int (lnrcbitpos));
+      if (! integer_all_onesp (llr_mask))
+	llr_arg = fold_build2_loc (loc, BIT_AND_EXPR, lnrtype,
+				   llr_arg, llr_mask);
+      tree lnr_const = const_binop (BIT_IOR_EXPR, l_const, r_const);
+      tree lll_const = fold_build3 (BIT_FIELD_REF, lnltype, lnr_const,
+				    bitsize_int (lnlbitsize),
+				    bitsize_int (lnlcbitpos));
+      tree lll_result = fold_build2_loc (loc, wanted_code, truth_type,
+					 lll_arg, lll_const);
+      tree llr_const = fold_build3 (BIT_FIELD_REF, lnrtype, lnr_const,
+				    bitsize_int (lnrbitsize),
+				    bitsize_int (lnrcbitpos));
+      tree llr_result = fold_build2_loc (loc, wanted_code, truth_type,
+					 llr_arg, llr_const);
+      result = build2_loc (loc, orig_code, truth_type,
+			   lll_result, llr_result);
+    }
+  else
+    {
 
-  ll_mask = const_binop (BIT_IOR_EXPR, ll_mask, rl_mask);
-  if (! all_ones_mask_p (ll_mask, lnbitsize))
-    result = build2_loc (loc, BIT_AND_EXPR, lntype, result, ll_mask);
+      /* Construct the expression we will return.  First get the component
+	 reference we will make.  Unless the mask is all ones the width of
+	 that field, perform the mask operation.  Then compare with the
+	 merged constant.  */
+      result = make_bit_field_ref (loc, ll_inner, ll_arg,
+				   lntype, lnbitsize, lnbitpos,
+				   ll_unsignedp || rl_unsignedp, ll_reversep);
+
+      ll_mask = const_binop (BIT_IOR_EXPR, ll_mask, rl_mask);
+      if (! integer_all_onesp (ll_mask))
+	result = build2_loc (loc, BIT_AND_EXPR, lntype, result, ll_mask);
 
-  return build2_loc (loc, wanted_code, truth_type, result,
-		     const_binop (BIT_IOR_EXPR, l_const, r_const));
+      result = build2_loc (loc, wanted_code, truth_type, result,
+			   const_binop (BIT_IOR_EXPR, l_const, r_const));
+    }
+
+  return result;
 }
 \f
 /* T is an integer expression that is being multiplied, divided, or taken a
@@ -9166,14 +9327,6 @@ fold_truth_andor (location_t loc, enum tree_code code, tree type,
 	return fold_build2_loc (loc, code, type, arg0, tem);
     }
 
-  /* Check for the possibility of merging component references.  If our
-     lhs is another similar operation, try to merge its rhs with our
-     rhs.  Then try to merge our lhs and rhs.  */
-  if (TREE_CODE (arg0) == code
-      && (tem = fold_truth_andor_1 (loc, code, type,
-				    TREE_OPERAND (arg0, 1), arg1)) != 0)
-    return fold_build2_loc (loc, code, type, TREE_OPERAND (arg0, 0), tem);
-
   if ((tem = fold_truth_andor_1 (loc, code, type, arg0, arg1)) != 0)
     return tem;


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [gcc(refs/users/aoliva/heads/testme)] improvements for fold_truth_andor_1
@ 2020-09-15 15:55 Alexandre Oliva
  0 siblings, 0 replies; 8+ messages in thread
From: Alexandre Oliva @ 2020-09-15 15:55 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:54381337e28e63a33c29b1352f061b5d51842d18

commit 54381337e28e63a33c29b1352f061b5d51842d18
Author: Alexandre Oliva <oliva@adacore.com>
Date:   Fri Sep 11 16:44:52 2020 -0300

    improvements for fold_truth_andor_1
    
    Enable combining with the rightmost non-and/or, however deeply-nested.
    
    Handle shift-and-mask.
    
    Handle fields that cross alignment boundaries, when either part can be
    combined.

Diff:
---
 gcc/fold-const.c | 211 ++++++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 186 insertions(+), 25 deletions(-)

diff --git a/gcc/fold-const.c b/gcc/fold-const.c
index 1f861630225..8a43e959a8e 100644
--- a/gcc/fold-const.c
+++ b/gcc/fold-const.c
@@ -4640,6 +4640,7 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
   tree mask, inner, offset;
   tree unsigned_type;
   unsigned int precision;
+  HOST_WIDE_INT shiftrt = 0;
 
   /* All the optimizations using this function assume integer fields.
      There are problems with FP fields since the type_for_size call
@@ -4664,13 +4665,28 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
 	return NULL_TREE;
     }
 
+  if (TREE_CODE (exp) == RSHIFT_EXPR
+      && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST
+      && tree_fits_shwi_p (TREE_OPERAND (exp, 1)))
+    {
+      shiftrt = tree_to_shwi (TREE_OPERAND (exp, 1));
+      if (shiftrt > 0)
+	exp = TREE_OPERAND (exp, 0);
+      else
+	shiftrt = 0;
+    }
+
+  if (TREE_CODE (exp) == NOP_EXPR)
+    exp = TREE_OPERAND (exp, 0);
+
   poly_int64 poly_bitsize, poly_bitpos;
   inner = get_inner_reference (exp, &poly_bitsize, &poly_bitpos, &offset,
 			       pmode, punsignedp, preversep, pvolatilep);
+
   if ((inner == exp && and_mask == 0)
       || !poly_bitsize.is_constant (pbitsize)
       || !poly_bitpos.is_constant (pbitpos)
-      || *pbitsize < 0
+      || *pbitsize < shiftrt
       || offset != 0
       || TREE_CODE (inner) == PLACEHOLDER_EXPR
       /* Reject out-of-bound accesses (PR79731).  */
@@ -4679,6 +4695,12 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
 			       *pbitpos + *pbitsize) < 0))
     return NULL_TREE;
 
+  if (shiftrt)
+    {
+      *pbitpos += shiftrt;
+      *pbitsize -= shiftrt;
+    }
+
   unsigned_type = lang_hooks.types.type_for_size (*pbitsize, 1);
   if (unsigned_type == NULL_TREE)
     return NULL_TREE;
@@ -6157,6 +6179,7 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
      convert EQ_EXPR to NE_EXPR so we need not reject the "wrong"
      comparison for one-bit fields.  */
 
+  enum tree_code orig_code = code;
   enum tree_code wanted_code;
   enum tree_code lcode, rcode;
   tree ll_arg, lr_arg, rl_arg, rr_arg;
@@ -6168,13 +6191,14 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
   int ll_unsignedp, lr_unsignedp, rl_unsignedp, rr_unsignedp;
   int ll_reversep, lr_reversep, rl_reversep, rr_reversep;
   machine_mode ll_mode, lr_mode, rl_mode, rr_mode;
-  scalar_int_mode lnmode, rnmode;
+  scalar_int_mode lnmode, lnmode2, rnmode;
   tree ll_mask, lr_mask, rl_mask, rr_mask;
   tree ll_and_mask, lr_and_mask, rl_and_mask, rr_and_mask;
   tree l_const, r_const;
   tree lntype, rntype, result;
   HOST_WIDE_INT first_bit, end_bit;
   int volatilep;
+  bool resplit_load;
 
   /* Start by getting the comparison codes.  Fail if anything is volatile.
      If one operand is a BIT_AND_EXPR with the constant one, treat it as if
@@ -6183,6 +6207,10 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
   if (TREE_SIDE_EFFECTS (lhs) || TREE_SIDE_EFFECTS (rhs))
     return 0;
 
+#if 0
+  inform (loc, "trying to merge %qT and %qT", lhs, rhs);
+#endif
+
   lcode = TREE_CODE (lhs);
   rcode = TREE_CODE (rhs);
 
@@ -6202,7 +6230,48 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
 
   if (TREE_CODE_CLASS (lcode) != tcc_comparison
       || TREE_CODE_CLASS (rcode) != tcc_comparison)
-    return 0;
+    {
+      /* Check for the possibility of merging component references.
+	 If any of our operands is another similar operation, recurse
+	 to try to merge individual operands, but avoiding double
+	 recursion: recurse to each leaf of lhs, and from there to
+	 each leaf of rhs, but don't bother recursing into lhs if rhs
+	 is neither a comparison nor a compound expr, nor into rhs if
+	 the lhs leaf isn't a comparison.  In case of no successful
+	 merging, recursion depth is limited to the sum of the depths
+	 of lhs and rhs, and the non-recursing code below will run no
+	 more times than the product of the leaf counts of lhs and
+	 rhs.  If there is a successful merge, we (recursively)
+	 further attempt to fold the result, so recursion depth and
+	 merge attempts are harder to compute.  */
+      if (TREE_CODE (lhs) == code && TREE_TYPE (lhs) == truth_type
+	  && (TREE_CODE_CLASS (rcode) == tcc_comparison
+	      || (TREE_CODE (rhs) == code && TREE_TYPE (rhs) == truth_type)))
+	{
+	  if ((result = fold_truth_andor_1 (loc, code, truth_type,
+					 TREE_OPERAND (lhs, 1), rhs)) != 0)
+	    return fold_build2_loc (loc, code, truth_type,
+				    TREE_OPERAND (lhs, 0), result);
+	  if ((result = fold_truth_andor_1 (loc, code, truth_type,
+					 TREE_OPERAND (lhs, 0), rhs)) != 0)
+	    return fold_build2_loc (loc, code, truth_type,
+				    result, TREE_OPERAND (lhs, 1));
+	}
+      else if (TREE_CODE_CLASS (lcode) == tcc_comparison
+	       && TREE_CODE (rhs) == code && TREE_TYPE (rhs) == truth_type)
+	{
+	  if ((result = fold_truth_andor_1 (loc, code, truth_type, lhs,
+					 TREE_OPERAND (rhs, 0))) != 0)
+	    return fold_build2_loc (loc, code, truth_type,
+				    result, TREE_OPERAND (rhs, 1));
+	  if ((result = fold_truth_andor_1 (loc, code, truth_type, lhs,
+					 TREE_OPERAND (rhs, 1))) != 0)
+	    return fold_build2_loc (loc, code, truth_type,
+				    result, TREE_OPERAND (rhs, 0));
+	}
+
+      return 0;
+    }
 
   ll_arg = TREE_OPERAND (lhs, 0);
   lr_arg = TREE_OPERAND (lhs, 1);
@@ -6357,10 +6426,48 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
   if (!get_best_mode (end_bit - first_bit, first_bit, 0, 0,
 		      TYPE_ALIGN (TREE_TYPE (ll_inner)), BITS_PER_WORD,
 		      volatilep, &lnmode))
-    return 0;
+    {
+      /* Consider the possibility of recombining loads if any of the
+	 fields straddles across an alignment boundary, so that either
+	 part can be loaded along with the other field.  */
+      HOST_WIDE_INT align = TYPE_ALIGN (TREE_TYPE (ll_inner));
+      HOST_WIDE_INT amask = ~(align - 1);
+
+      HOST_WIDE_INT boundary = (end_bit - 1) & amask;
+      /* Make sure we're only crossing one alignment boundary.
+
+	 ??? This won't recombine loads of two adjacent fields that
+	 each crosses a different alignment boundary, so as to load
+	 the middle word only once.  */
+      if (boundary - first_bit > align)
+	return 0;
+
+      HOST_WIDE_INT ll_start_word = ll_bitpos & amask;
+      HOST_WIDE_INT ll_end_word = (ll_bitpos + ll_bitsize - 1) & amask;
+
+      HOST_WIDE_INT rl_start_word = rl_bitpos & amask;
+      HOST_WIDE_INT rl_end_word = (rl_bitpos + rl_bitsize - 1) & amask;
+
+      /* If neither field straddles across an alignment boundary,
+	 we've nothing further ado.  */
+      if (ll_start_word == ll_end_word && rl_start_word == rl_end_word)
+	return 0;
+
+      if (!get_best_mode (boundary - first_bit, first_bit, 0, 0,
+			  align, BITS_PER_WORD, volatilep, &lnmode)
+	  || !get_best_mode (end_bit - boundary, boundary, 0, 0,
+			     align, BITS_PER_WORD, volatilep, &lnmode2))
+	return 0;
+
+      resplit_load = true;
+    }
+  else
+    resplit_load = false;
 
   lnbitsize = GET_MODE_BITSIZE (lnmode);
   lnbitpos = first_bit & ~ (lnbitsize - 1);
+  if (resplit_load)
+    lnbitsize += GET_MODE_BITSIZE (lnmode2);
   lntype = lang_hooks.types.type_for_size (lnbitsize, 1);
   xll_bitpos = ll_bitpos - lnbitpos, xrl_bitpos = rl_bitpos - lnbitpos;
 
@@ -6414,7 +6521,8 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
 	  || ll_reversep != lr_reversep
 	  /* Make sure the two fields on the right
 	     correspond to the left without being swapped.  */
-	  || ll_bitpos - rl_bitpos != lr_bitpos - rr_bitpos)
+	  || ll_bitpos - rl_bitpos != lr_bitpos - rr_bitpos
+	  || resplit_load)
 	return 0;
 
       first_bit = MIN (lr_bitpos, rr_bitpos);
@@ -6555,20 +6663,81 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
   if (lnbitpos < 0)
     return 0;
 
-  /* Construct the expression we will return.  First get the component
-     reference we will make.  Unless the mask is all ones the width of
-     that field, perform the mask operation.  Then compare with the
-     merged constant.  */
-  result = make_bit_field_ref (loc, ll_inner, ll_arg,
-			       lntype, lnbitsize, lnbitpos,
-			       ll_unsignedp || rl_unsignedp, ll_reversep);
+  if (resplit_load)
+    {
+      HOST_WIDE_INT align = TYPE_ALIGN (TREE_TYPE (ll_inner));
+      HOST_WIDE_INT lnlbitsize = GET_MODE_BITSIZE (lnmode);
+      HOST_WIDE_INT lnrbitsize = GET_MODE_BITSIZE (lnmode2);
+      HOST_WIDE_INT midpoint = lnlbitsize;
+      HOST_WIDE_INT lnrbitpos = lnbitpos + midpoint;
+      HOST_WIDE_INT lnlbitpos = lnrbitpos - lnlbitsize;
+      gcc_checking_assert ((lnrbitpos & (align - 1)) == 0);
+      tree lnltype = lang_hooks.types.type_for_size (lnlbitsize, 1);
+      tree lnrtype = lang_hooks.types.type_for_size (lnrbitsize, 1);
+      tree lll_arg = make_bit_field_ref (loc, ll_inner, ll_arg,
+					 lnltype, lnlbitsize, lnlbitpos,
+					 ll_unsignedp || rl_unsignedp,
+					 ll_reversep);
+      tree llr_arg = make_bit_field_ref (loc, ll_inner, ll_arg,
+					 lnrtype, lnrbitsize, lnrbitpos,
+					 ll_unsignedp || rl_unsignedp,
+					 ll_reversep);
+
+      /* ??? Handle ll_reversep and endianness below?  */
+      HOST_WIDE_INT lnlcbitpos = 0;
+      HOST_WIDE_INT lnrcbitpos = lnlbitsize;
+
+      tree mask = const_binop (BIT_IOR_EXPR, ll_mask, rl_mask);
+      tree lll_mask = fold_build3 (BIT_FIELD_REF, lnltype, mask,
+				   bitsize_int (lnlbitsize),
+				   bitsize_int (lnlcbitpos));
+      if (! integer_all_onesp (lll_mask))
+	lll_arg = fold_build2_loc (loc, BIT_AND_EXPR, lnltype,
+				   lll_arg, lll_mask);
+      tree llr_mask = fold_build3 (BIT_FIELD_REF, lnrtype, mask,
+				   bitsize_int (lnrbitsize),
+				   bitsize_int (lnrcbitpos));
+      if (! integer_all_onesp (llr_mask))
+	llr_arg = fold_build2_loc (loc, BIT_AND_EXPR, lnrtype,
+				   llr_arg, llr_mask);
+      tree lnr_const = const_binop (BIT_IOR_EXPR, l_const, r_const);
+      tree lll_const = fold_build3 (BIT_FIELD_REF, lnltype, lnr_const,
+				    bitsize_int (lnlbitsize),
+				    bitsize_int (lnlcbitpos));
+      tree lll_result = fold_build2_loc (loc, wanted_code, truth_type,
+					 lll_arg, lll_const);
+      tree llr_const = fold_build3 (BIT_FIELD_REF, lnrtype, lnr_const,
+				    bitsize_int (lnrbitsize),
+				    bitsize_int (lnrcbitpos));
+      tree llr_result = fold_build2_loc (loc, wanted_code, truth_type,
+					 llr_arg, llr_const);
+      result = build2_loc (loc, orig_code, truth_type,
+			   lll_result, llr_result);
+    }
+  else
+    {
+
+      /* Construct the expression we will return.  First get the component
+	 reference we will make.  Unless the mask is all ones the width of
+	 that field, perform the mask operation.  Then compare with the
+	 merged constant.  */
+      result = make_bit_field_ref (loc, ll_inner, ll_arg,
+				   lntype, lnbitsize, lnbitpos,
+				   ll_unsignedp || rl_unsignedp, ll_reversep);
 
-  ll_mask = const_binop (BIT_IOR_EXPR, ll_mask, rl_mask);
-  if (! all_ones_mask_p (ll_mask, lnbitsize))
-    result = build2_loc (loc, BIT_AND_EXPR, lntype, result, ll_mask);
+      ll_mask = const_binop (BIT_IOR_EXPR, ll_mask, rl_mask);
+      if (! integer_all_onesp (ll_mask))
+	result = build2_loc (loc, BIT_AND_EXPR, lntype, result, ll_mask);
+
+      result = build2_loc (loc, wanted_code, truth_type, result,
+			   const_binop (BIT_IOR_EXPR, l_const, r_const));
+    }
 
-  return build2_loc (loc, wanted_code, truth_type, result,
-		     const_binop (BIT_IOR_EXPR, l_const, r_const));
+#if 0
+  inform (loc, "merged %qT and %qT into %qT", lhs, rhs, result);
+#endif
+
+  return result;
 }
 \f
 /* T is an integer expression that is being multiplied, divided, or taken a
@@ -9166,14 +9335,6 @@ fold_truth_andor (location_t loc, enum tree_code code, tree type,
 	return fold_build2_loc (loc, code, type, arg0, tem);
     }
 
-  /* Check for the possibility of merging component references.  If our
-     lhs is another similar operation, try to merge its rhs with our
-     rhs.  Then try to merge our lhs and rhs.  */
-  if (TREE_CODE (arg0) == code
-      && (tem = fold_truth_andor_1 (loc, code, type,
-				    TREE_OPERAND (arg0, 1), arg1)) != 0)
-    return fold_build2_loc (loc, code, type, TREE_OPERAND (arg0, 0), tem);
-
   if ((tem = fold_truth_andor_1 (loc, code, type, arg0, arg1)) != 0)
     return tem;


^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2020-09-23 23:23 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-09-15 16:44 [gcc(refs/users/aoliva/heads/testme)] improvements for fold_truth_andor_1 Alexandre Oliva
  -- strict thread matches above, loose matches on Subject: below --
2020-09-23 23:23 Alexandre Oliva
2020-09-17 11:31 Alexandre Oliva
2020-09-17  8:22 Alexandre Oliva
2020-09-17  3:20 Alexandre Oliva
2020-09-17  2:52 Alexandre Oliva
2020-09-16 21:38 Alexandre Oliva
2020-09-15 15:55 Alexandre Oliva

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).