public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/users/aoliva/heads/testme)] check for mergeable loads, choose insertion points accordingly
@ 2023-07-27 12:20 Alexandre Oliva
  0 siblings, 0 replies; 9+ messages in thread
From: Alexandre Oliva @ 2023-07-27 12:20 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:c5f6afb8f094b35311036f224ccbdc6f5d0f5328

commit c5f6afb8f094b35311036f224ccbdc6f5d0f5328
Author: Alexandre Oliva <oliva@gnu.org>
Date:   Thu Jul 27 05:15:20 2023 -0300

    check for mergeable loads, choose insertion points accordingly

Diff:
---
 gcc/gimple-fold.cc | 253 ++++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 219 insertions(+), 34 deletions(-)

diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
index f65922309fb..41fb802803f 100644
--- a/gcc/gimple-fold.cc
+++ b/gcc/gimple-fold.cc
@@ -69,6 +69,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "varasm.h"
 #include "internal-fn.h"
 #include "gimple-range.h"
+#include "tree-ssa-loop-niter.h" // stmt_dominates_stmt_p
 
 /* ??? Move this to some header, it's defined in fold-const.c.  */
 extern tree
@@ -7080,7 +7081,7 @@ maybe_fold_comparisons_from_match_pd (tree type, enum tree_code code,
    Same as ssa_is_replaceable_p, except that we don't insist it has a
    single use.  */
 
-bool
+static bool
 ssa_is_substitutable_p (gimple *stmt)
 {
 #if 0
@@ -7161,9 +7162,10 @@ is_cast_p (tree *name)
       if (gimple_num_ops (def) != 2)
 	break;
 
-      if (get_gimple_rhs_class (gimple_expr_code (def))
-	  == GIMPLE_SINGLE_RHS)
+      if (gimple_assign_single_p (def))
 	{
+	  if (gimple_assign_load_p (def))
+	    break;
 	  *name = gimple_assign_rhs1 (def);
 	  continue;
 	}
@@ -7200,8 +7202,7 @@ is_binop_p (enum tree_code code, tree *name)
 	  return 0;
 
 	case 2:
-	  if (get_gimple_rhs_class (gimple_expr_code (def))
-	      == GIMPLE_SINGLE_RHS)
+	  if (gimple_assign_single_p (def) && !gimple_assign_load_p (def))
 	    {
 	      *name = gimple_assign_rhs1 (def);
 	      continue;
@@ -7209,7 +7210,7 @@ is_binop_p (enum tree_code code, tree *name)
 	  return 0;
 
 	case 3:
-	  ;
+	  break;
 	}
 
       if (gimple_assign_rhs_code (def) != code)
@@ -7254,6 +7255,26 @@ prepare_xor (tree l_arg, tree *r_arg)
   return ret;
 }
 
+/* If EXP is a SSA_NAME whose DEF is a load stmt, set *LOAD to it and
+   return its RHS, otherwise return EXP.  */
+
+static tree
+follow_load (tree exp, gimple **load)
+{
+  if (TREE_CODE (exp) == SSA_NAME
+      && !SSA_NAME_IS_DEFAULT_DEF (exp))
+    {
+      gimple *def = SSA_NAME_DEF_STMT (exp);
+      if (gimple_assign_load_p (def))
+	{
+	  *load = def;
+	  exp = gimple_assign_rhs1 (def);
+	}
+    }
+
+  return exp;
+}
+
 /* Subroutine for fold_truth_andor_1: decode a field reference.
 
    If EXP is a comparison reference, we return the innermost reference.
@@ -7280,6 +7301,9 @@ prepare_xor (tree l_arg, tree *r_arg)
    BIT_XOR_EXPR compared with zero.  We're to take the first or second
    operand thereof if so.  It should be zero otherwise.
 
+   *LOAD is set to the load stmt of the innermost reference, if any,
+   *and NULL otherwise.
+
    Return 0 if this is not a component reference or is one that we can't
    do anything with.  */
 
@@ -7287,7 +7311,8 @@ static tree
 decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
 			HOST_WIDE_INT *pbitpos, machine_mode *pmode,
 			int *punsignedp, int *preversep, int *pvolatilep,
-			tree *pmask, tree *pand_mask, int xor_which)
+			tree *pmask, tree *pand_mask, int xor_which,
+			gimple **load)
 {
   tree exp = *exp_;
   tree outer_type = 0;
@@ -7297,11 +7322,13 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
   unsigned int precision;
   HOST_WIDE_INT shiftrt = 0;
 
+  *load = NULL;
+
   /* All the optimizations using this function assume integer fields.
      There are problems with FP fields since the type_for_size call
      below can fail for, e.g., XFmode.  */
   if (! INTEGRAL_TYPE_P (TREE_TYPE (exp)))
-    return 0;
+    return NULL_TREE;
 
   /* We are interested in the bare arrangement of bits, so strip everything
      that doesn't affect the machine mode.  However, record the type of the
@@ -7311,7 +7338,7 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
   if ((and_mask = is_binop_p (BIT_AND_EXPR, &exp)))
     {
       if (TREE_CODE (and_mask) != INTEGER_CST)
-	return 0;
+	return NULL_TREE;
     }
 
   if (xor_which)
@@ -7329,16 +7356,18 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
   if (tree shift = is_binop_p (RSHIFT_EXPR, &exp))
     {
       if (TREE_CODE (shift) != INTEGER_CST || !tree_fits_shwi_p (shift))
-	return 0;
+	return NULL_TREE;
       shiftrt = tree_to_shwi (shift);
       if (shiftrt <= 0)
-	return 0;
+	return NULL_TREE;
     }
 
   if (tree t = is_cast_p (&exp))
     if (!outer_type)
       outer_type = t;
 
+  exp = follow_load (exp, load);
+
   poly_int64 poly_bitsize, poly_bitpos;
   inner = get_inner_reference (exp, &poly_bitsize, &poly_bitpos, &offset,
 			       pmode, punsignedp, preversep, pvolatilep);
@@ -7487,6 +7516,28 @@ compute_split_boundary_from_align (HOST_WIDE_INT align,
   return boundary;
 }
 
+/* Make a bit_field_ref.  If POINT is NULL, return the BIT_FIELD_REF.
+   Otherwise, build and insert a load stmt before POINT, and return
+   the SSA_NAME.  ???  Rewrite LOAD in terms of the bitfield?  */
+
+static tree
+make_bit_field_load (location_t loc, tree inner, tree orig_inner, tree type,
+		     HOST_WIDE_INT bitsize, poly_int64 bitpos,
+		     int unsignedp, int reversep, gimple *point)
+{
+  tree ref = make_bit_field_ref (loc, unshare_expr (inner),
+				 unshare_expr (orig_inner),
+				 type, bitsize, bitpos,
+				 unsignedp, reversep);
+  if (!point)
+    return ref;
+
+  gimple_stmt_iterator gsi = gsi_for_stmt (point);
+  return force_gimple_operand_gsi (&gsi, ref,
+				   true, NULL_TREE,
+				   true, GSI_SAME_STMT);
+}
+
 /* Initialize ln_arg[0] and ln_arg[1] to a pair of newly-created (at
    LOC) loads from INNER (from ORIG_INNER), of modes MODE and MODE2,
    respectively, starting at BIT_POS, using reversed endianness if
@@ -7506,7 +7557,8 @@ build_split_load (tree /* out */ ln_arg[2],
 		  HOST_WIDE_INT /* out */ shifted[2],
 		  location_t loc, tree inner, tree orig_inner,
 		  scalar_int_mode mode, scalar_int_mode mode2,
-		  HOST_WIDE_INT bit_pos, bool reversep)
+		  HOST_WIDE_INT bit_pos, bool reversep,
+		  gimple *point[2])
 {
   bitsiz[0] = GET_MODE_BITSIZE (mode);
   bitsiz[1] = GET_MODE_BITSIZE (mode2);
@@ -7515,9 +7567,9 @@ build_split_load (tree /* out */ ln_arg[2],
     {
       tree type = lang_hooks.types.type_for_size (bitsiz[i], 1);
       bitpos[i] = bit_pos;
-      ln_arg[i] = make_bit_field_ref (loc, inner, orig_inner,
+      ln_arg[i] = make_bit_field_load (loc, inner, orig_inner,
 				      type, bitsiz[i],
-				      bit_pos, 1, reversep);
+				       bit_pos, 1, reversep, point[i]);
       bit_pos += bitsiz[i];
     }
 
@@ -7578,6 +7630,87 @@ reuse_split_load (tree /* in[0] out[1] */ ln_arg[2],
     }
 }
 
+/* Return nonzero if LSTMT and RSTMT, assumed to load from the same
+   words, can be safely merged into a single load.  Return zero if
+   there are intervening stores, or if neither stmt dominates the
+   other.  If they are mergeable, return -1 if the merged load should
+   be inserted before LSTMT to dominate both, otherwise return +1.  */
+
+static int
+mergeable_loads_p (gimple *lstmt, gimple *rstmt)
+{
+  gimple *stmts[2] = { lstmt, rstmt };
+  int ret;
+
+  if (stmt_dominates_stmt_p (lstmt, rstmt))
+    {
+      ret = -1;
+      stmts[0] = lstmt;
+      stmts[1] = rstmt;
+    }
+  else if (stmt_dominates_stmt_p (rstmt, lstmt))
+    {
+      ret = 1;
+      stmts[1] = lstmt;
+      stmts[0] = rstmt;
+    }
+  else
+    return 0;
+
+  basic_block bbs[2] = { gimple_bb (stmts[0]), gimple_bb (stmts[1]) };
+  if (bbs[0] == bbs[1])
+    {
+      for (gimple_stmt_iterator gsi = gsi_for_stmt (stmts[0]);
+	   gsi_stmt (gsi) != stmts[1]; gsi_next (&gsi))
+	/* ??? For now, stop if we find any intervening vdefs.  */
+	if (gimple_vdef (gsi_stmt (gsi)))
+	  return 0;
+      return ret;
+    }
+
+  for (gimple_stmt_iterator gsi = gsi_for_stmt (stmts[0]);
+       !gsi_end_p (gsi); gsi_next (&gsi))
+    /* ??? For now, stop if we find any intervening vdefs.  */
+    if (gimple_vdef (gsi_stmt (gsi)))
+      return 0;
+
+  for (gphi_iterator gpi = gsi_start_phis (bbs[1]);
+       !gsi_end_p (gpi); gsi_next (&gpi))
+    /* ??? For now, stop if we find any intervening vdefs.  */
+    if (gimple_vdef (gsi_stmt (gpi)))
+      return 0;
+
+  for (gimple_stmt_iterator gsi = gsi_for_stmt (stmts[1]);
+       !gsi_end_p (gsi); gsi_prev (&gsi))
+    /* ??? For now, stop if we find any intervening vdefs.  */
+    if (gimple_vdef (gsi_stmt (gsi)))
+      return 0;
+
+  for (basic_block bb = bbs[1]; bb != bbs[0]; )
+    {
+      /* ??? For now, stop if any visited block has more than one
+	 predecessor.  */
+      if (!single_pred_p (bb))
+	return 0;
+
+      bb = single_pred (bb);
+
+      for (gphi_iterator gpi = gsi_start_phis (bbs[1]);
+	   !gsi_end_p (gpi); gsi_next (&gpi))
+	/* ??? For now, stop if we find any intervening vdefs.  */
+	if (gimple_vdef (gsi_stmt (gpi)))
+	  return 0;
+
+      for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
+	   !gsi_end_p (gsi); gsi_next (&gsi))
+	/* ??? For now, stop if we find any intervening vdefs.  */
+	if (gimple_vdef (gsi_stmt (gsi)))
+	  return 0;
+    }
+
+  return ret;
+}
+
 /* Find ways of folding logical expressions of LHS and RHS:
    Try to merge two comparisons to the same innermost item.
    Look for range tests like "ch >= '0' && ch <= '9'".
@@ -7629,6 +7762,8 @@ fold_truth_andor_maybe_separate (location_t loc,
   enum tree_code orig_code = code;
   enum tree_code wanted_code;
   tree ll_inner, lr_inner, rl_inner, rr_inner;
+  gimple *ll_load, *lr_load, *rl_load, *rr_load;
+  int l_mergeable = 0, r_mergeable = 0;
   HOST_WIDE_INT ll_bitsize, ll_bitpos, lr_bitsize, lr_bitpos;
   HOST_WIDE_INT rl_bitsize, rl_bitpos, rr_bitsize, rr_bitpos;
   HOST_WIDE_INT xll_bitpos, xlr_bitpos, xrl_bitpos, xrr_bitpos;
@@ -7693,20 +7828,24 @@ fold_truth_andor_maybe_separate (location_t loc,
   ll_inner = decode_field_reference (loc, &ll_arg,
 				     &ll_bitsize, &ll_bitpos, &ll_mode,
 				     &ll_unsignedp, &ll_reversep, &volatilep,
-				     &ll_mask, &ll_and_mask, l_xor);
+				     &ll_mask, &ll_and_mask, l_xor,
+				     &ll_load);
   lr_inner = decode_field_reference (loc, &lr_arg,
 				     &lr_bitsize, &lr_bitpos, &lr_mode,
 				     &lr_unsignedp, &lr_reversep, &volatilep,
-				     &lr_mask, &lr_and_mask, 2 * l_xor);
+				     &lr_mask, &lr_and_mask, 2 * l_xor,
+				     &lr_load);
   int r_xor = prepare_xor (rl_arg, &rr_arg);
   rl_inner = decode_field_reference (loc, &rl_arg,
 				     &rl_bitsize, &rl_bitpos, &rl_mode,
 				     &rl_unsignedp, &rl_reversep, &volatilep,
-				     &rl_mask, &rl_and_mask, r_xor);
+				     &rl_mask, &rl_and_mask, r_xor,
+				     &rl_load);
   rr_inner = decode_field_reference (loc, &rr_arg,
 				     &rr_bitsize, &rr_bitpos, &rr_mode,
 				     &rr_unsignedp, &rr_reversep, &volatilep,
-				     &rr_mask, &rr_and_mask, 2 * r_xor);
+				     &rr_mask, &rr_and_mask, 2 * r_xor,
+				     &rr_load);
 
   /* It must be true that the inner operation on the lhs of each
      comparison must be the same if we are to be able to do anything.
@@ -7715,7 +7854,9 @@ fold_truth_andor_maybe_separate (location_t loc,
   if (volatilep
       || ll_reversep != rl_reversep
       || ll_inner == 0 || rl_inner == 0
-      || ! operand_equal_p (ll_inner, rl_inner, 0))
+      || ! operand_equal_p (ll_inner, rl_inner, 0)
+      || (ll_load && rl_load
+	  && ! (l_mergeable = mergeable_loads_p (ll_load, rl_load))))
     return 0;
 
   if (TREE_CODE (lr_arg) == INTEGER_CST
@@ -7726,7 +7867,9 @@ fold_truth_andor_maybe_separate (location_t loc,
     }
   else if (lr_reversep != rr_reversep
 	   || lr_inner == 0 || rr_inner == 0
-	   || ! operand_equal_p (lr_inner, rr_inner, 0))
+	   || ! operand_equal_p (lr_inner, rr_inner, 0)
+	   || (lr_load && rr_load
+	       && ! (r_mergeable = mergeable_loads_p (lr_load, rr_load))))
     return 0;
   else
     l_const = r_const = 0;
@@ -8035,18 +8178,38 @@ fold_truth_andor_maybe_separate (location_t loc,
 	{
 	  bitpos[1][0] = rnbitpos;
 	  bitsiz[1][0] = rnbitsize;
-	  ld_arg[1][0] = make_bit_field_ref (loc, lr_inner, lr_arg,
-					     rntype, rnbitsize, rnbitpos,
-					     lr_unsignedp || rr_unsignedp,
-					     lr_reversep);
+	  ld_arg[1][0] = make_bit_field_load (loc, lr_inner, lr_arg,
+					      rntype, rnbitsize, rnbitpos,
+					      lr_unsignedp || rr_unsignedp,
+					      lr_reversep,
+					      r_mergeable == 0
+					      ? NULL
+					      : r_mergeable < 0
+					      ? lr_load
+					      : rr_load);
 	}
 
       if (parts > 1)
 	{
 	  if (r_split_load)
-	    build_split_load (ld_arg[1], bitpos[1], bitsiz[1], toshift[1],
-			      shifted[1], loc, lr_inner, lr_arg,
-			      rnmode, rnmode2, rnbitpos, lr_reversep);
+	    {
+	      gimple *point[2];
+	      if (r_mergeable == 0)
+		point[0] = point[1] = NULL;
+	      else if (r_mergeable < 0)
+		{
+		  point[0] = lr_load;
+		  point[1] = rr_load;
+		}
+	      else
+		{
+		  point[0] = rr_load;
+		  point[1] = lr_load;
+		}
+	      build_split_load (ld_arg[1], bitpos[1], bitsiz[1], toshift[1],
+				shifted[1], loc, lr_inner, lr_arg,
+				rnmode, rnmode2, rnbitpos, lr_reversep, point);
+	    }
 	  else
 	    reuse_split_load (ld_arg[1], bitpos[1], bitsiz[1], toshift[1],
 			      shifted[1], xmask[1],
@@ -8118,18 +8281,38 @@ fold_truth_andor_maybe_separate (location_t loc,
     {
       bitpos[0][0] = lnbitpos;
       bitsiz[0][0] = lnbitsize;
-      ld_arg[0][0] = make_bit_field_ref (loc, ll_inner, ll_arg,
-					 lntype, lnbitsize, lnbitpos,
-					 ll_unsignedp || rl_unsignedp,
-					 ll_reversep);
+      ld_arg[0][0] = make_bit_field_load (loc, ll_inner, ll_arg,
+					  lntype, lnbitsize, lnbitpos,
+					  ll_unsignedp || rl_unsignedp,
+					  ll_reversep,
+					  l_mergeable == 0
+					  ? NULL
+					  : l_mergeable < 0
+					  ? ll_load
+					  : rl_load);
     }
 
   if (parts > 1)
     {
       if (l_split_load)
-	build_split_load (ld_arg[0], bitpos[0], bitsiz[0], toshift[0],
-			  shifted[0], loc, ll_inner, ll_arg,
-			  lnmode, lnmode2, lnbitpos, ll_reversep);
+	    {
+	      gimple *point[2];
+	      if (l_mergeable == 0)
+		point[0] = point[1] = NULL;
+	      else if (l_mergeable < 0)
+		{
+		  point[0] = ll_load;
+		  point[1] = rl_load;
+		}
+	      else
+		{
+		  point[0] = rl_load;
+		  point[1] = ll_load;
+		}
+	      build_split_load (ld_arg[0], bitpos[0], bitsiz[0], toshift[0],
+				shifted[0], loc, ll_inner, ll_arg,
+				lnmode, lnmode2, lnbitpos, ll_reversep, point);
+	    }
       else
 	reuse_split_load (ld_arg[0], bitpos[0], bitsiz[0], toshift[0],
 			  shifted[0], xmask[0],
@@ -8144,6 +8327,8 @@ fold_truth_andor_maybe_separate (location_t loc,
 
       for (int j = 0; j < 2; j++)
 	{
+	  op[j] = unshare_expr (op[j]);
+
 	  /* Mask out the bits belonging to the other part.  */
 	  if (xmask[j][i])
 	    mask[j] = int_const_binop (BIT_AND_EXPR, mask[j], xmask[j][i]);

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [gcc(refs/users/aoliva/heads/testme)] check for mergeable loads, choose insertion points accordingly
@ 2023-08-18  3:55 Alexandre Oliva
  0 siblings, 0 replies; 9+ messages in thread
From: Alexandre Oliva @ 2023-08-18  3:55 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:a58547af9f6aa1a9b3c5c4e32f899880fcc5abf5

commit a58547af9f6aa1a9b3c5c4e32f899880fcc5abf5
Author: Alexandre Oliva <oliva@gnu.org>
Date:   Thu Jul 27 05:15:20 2023 -0300

    check for mergeable loads, choose insertion points accordingly

Diff:
---
 gcc/gimple-fold.cc | 253 ++++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 219 insertions(+), 34 deletions(-)

diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
index a12cea549a65..31156e23e862 100644
--- a/gcc/gimple-fold.cc
+++ b/gcc/gimple-fold.cc
@@ -69,6 +69,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "varasm.h"
 #include "internal-fn.h"
 #include "gimple-range.h"
+#include "tree-ssa-loop-niter.h" // stmt_dominates_stmt_p
 
 /* ??? Move this to some header, it's defined in fold-const.c.  */
 extern tree
@@ -7077,7 +7078,7 @@ maybe_fold_comparisons_from_match_pd (tree type, enum tree_code code,
    Same as ssa_is_replaceable_p, except that we don't insist it has a
    single use.  */
 
-bool
+static bool
 ssa_is_substitutable_p (gimple *stmt)
 {
 #if 0
@@ -7158,9 +7159,10 @@ is_cast_p (tree *name)
       if (gimple_num_ops (def) != 2)
 	break;
 
-      if (get_gimple_rhs_class (gimple_expr_code (def))
-	  == GIMPLE_SINGLE_RHS)
+      if (gimple_assign_single_p (def))
 	{
+	  if (gimple_assign_load_p (def))
+	    break;
 	  *name = gimple_assign_rhs1 (def);
 	  continue;
 	}
@@ -7197,8 +7199,7 @@ is_binop_p (enum tree_code code, tree *name)
 	  return 0;
 
 	case 2:
-	  if (get_gimple_rhs_class (gimple_expr_code (def))
-	      == GIMPLE_SINGLE_RHS)
+	  if (gimple_assign_single_p (def) && !gimple_assign_load_p (def))
 	    {
 	      *name = gimple_assign_rhs1 (def);
 	      continue;
@@ -7206,7 +7207,7 @@ is_binop_p (enum tree_code code, tree *name)
 	  return 0;
 
 	case 3:
-	  ;
+	  break;
 	}
 
       if (gimple_assign_rhs_code (def) != code)
@@ -7251,6 +7252,26 @@ prepare_xor (tree l_arg, tree *r_arg)
   return ret;
 }
 
+/* If EXP is a SSA_NAME whose DEF is a load stmt, set *LOAD to it and
+   return its RHS, otherwise return EXP.  */
+
+static tree
+follow_load (tree exp, gimple **load)
+{
+  if (TREE_CODE (exp) == SSA_NAME
+      && !SSA_NAME_IS_DEFAULT_DEF (exp))
+    {
+      gimple *def = SSA_NAME_DEF_STMT (exp);
+      if (gimple_assign_load_p (def))
+	{
+	  *load = def;
+	  exp = gimple_assign_rhs1 (def);
+	}
+    }
+
+  return exp;
+}
+
 /* Subroutine for fold_truth_andor_1: decode a field reference.
 
    If EXP is a comparison reference, we return the innermost reference.
@@ -7277,6 +7298,9 @@ prepare_xor (tree l_arg, tree *r_arg)
    BIT_XOR_EXPR compared with zero.  We're to take the first or second
    operand thereof if so.  It should be zero otherwise.
 
+   *LOAD is set to the load stmt of the innermost reference, if any,
+   *and NULL otherwise.
+
    Return 0 if this is not a component reference or is one that we can't
    do anything with.  */
 
@@ -7284,7 +7308,8 @@ static tree
 decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
 			HOST_WIDE_INT *pbitpos, machine_mode *pmode,
 			int *punsignedp, int *preversep, int *pvolatilep,
-			tree *pmask, tree *pand_mask, int xor_which)
+			tree *pmask, tree *pand_mask, int xor_which,
+			gimple **load)
 {
   tree exp = *exp_;
   tree outer_type = 0;
@@ -7294,11 +7319,13 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
   unsigned int precision;
   HOST_WIDE_INT shiftrt = 0;
 
+  *load = NULL;
+
   /* All the optimizations using this function assume integer fields.
      There are problems with FP fields since the type_for_size call
      below can fail for, e.g., XFmode.  */
   if (! INTEGRAL_TYPE_P (TREE_TYPE (exp)))
-    return 0;
+    return NULL_TREE;
 
   /* We are interested in the bare arrangement of bits, so strip everything
      that doesn't affect the machine mode.  However, record the type of the
@@ -7308,7 +7335,7 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
   if ((and_mask = is_binop_p (BIT_AND_EXPR, &exp)))
     {
       if (TREE_CODE (and_mask) != INTEGER_CST)
-	return 0;
+	return NULL_TREE;
     }
 
   if (xor_which)
@@ -7326,16 +7353,18 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
   if (tree shift = is_binop_p (RSHIFT_EXPR, &exp))
     {
       if (TREE_CODE (shift) != INTEGER_CST || !tree_fits_shwi_p (shift))
-	return 0;
+	return NULL_TREE;
       shiftrt = tree_to_shwi (shift);
       if (shiftrt <= 0)
-	return 0;
+	return NULL_TREE;
     }
 
   if (tree t = is_cast_p (&exp))
     if (!outer_type)
       outer_type = t;
 
+  exp = follow_load (exp, load);
+
   poly_int64 poly_bitsize, poly_bitpos;
   inner = get_inner_reference (exp, &poly_bitsize, &poly_bitpos, &offset,
 			       pmode, punsignedp, preversep, pvolatilep);
@@ -7484,6 +7513,28 @@ compute_split_boundary_from_align (HOST_WIDE_INT align,
   return boundary;
 }
 
+/* Make a bit_field_ref.  If POINT is NULL, return the BIT_FIELD_REF.
+   Otherwise, build and insert a load stmt before POINT, and return
+   the SSA_NAME.  ???  Rewrite LOAD in terms of the bitfield?  */
+
+static tree
+make_bit_field_load (location_t loc, tree inner, tree orig_inner, tree type,
+		     HOST_WIDE_INT bitsize, poly_int64 bitpos,
+		     int unsignedp, int reversep, gimple *point)
+{
+  tree ref = make_bit_field_ref (loc, unshare_expr (inner),
+				 unshare_expr (orig_inner),
+				 type, bitsize, bitpos,
+				 unsignedp, reversep);
+  if (!point)
+    return ref;
+
+  gimple_stmt_iterator gsi = gsi_for_stmt (point);
+  return force_gimple_operand_gsi (&gsi, ref,
+				   true, NULL_TREE,
+				   true, GSI_SAME_STMT);
+}
+
 /* Initialize ln_arg[0] and ln_arg[1] to a pair of newly-created (at
    LOC) loads from INNER (from ORIG_INNER), of modes MODE and MODE2,
    respectively, starting at BIT_POS, using reversed endianness if
@@ -7503,7 +7554,8 @@ build_split_load (tree /* out */ ln_arg[2],
 		  HOST_WIDE_INT /* out */ shifted[2],
 		  location_t loc, tree inner, tree orig_inner,
 		  scalar_int_mode mode, scalar_int_mode mode2,
-		  HOST_WIDE_INT bit_pos, bool reversep)
+		  HOST_WIDE_INT bit_pos, bool reversep,
+		  gimple *point[2])
 {
   bitsiz[0] = GET_MODE_BITSIZE (mode);
   bitsiz[1] = GET_MODE_BITSIZE (mode2);
@@ -7512,9 +7564,9 @@ build_split_load (tree /* out */ ln_arg[2],
     {
       tree type = lang_hooks.types.type_for_size (bitsiz[i], 1);
       bitpos[i] = bit_pos;
-      ln_arg[i] = make_bit_field_ref (loc, inner, orig_inner,
+      ln_arg[i] = make_bit_field_load (loc, inner, orig_inner,
 				      type, bitsiz[i],
-				      bit_pos, 1, reversep);
+				       bit_pos, 1, reversep, point[i]);
       bit_pos += bitsiz[i];
     }
 
@@ -7575,6 +7627,87 @@ reuse_split_load (tree /* in[0] out[1] */ ln_arg[2],
     }
 }
 
+/* Return nonzero if LSTMT and RSTMT, assumed to load from the same
+   words, can be safely merged into a single load.  Return zero if
+   there are intervening stores, or if neither stmt dominates the
+   other.  If they are mergeable, return -1 if the merged load should
+   be inserted before LSTMT to dominate both, otherwise return +1.  */
+
+static int
+mergeable_loads_p (gimple *lstmt, gimple *rstmt)
+{
+  gimple *stmts[2] = { lstmt, rstmt };
+  int ret;
+
+  if (stmt_dominates_stmt_p (lstmt, rstmt))
+    {
+      ret = -1;
+      stmts[0] = lstmt;
+      stmts[1] = rstmt;
+    }
+  else if (stmt_dominates_stmt_p (rstmt, lstmt))
+    {
+      ret = 1;
+      stmts[1] = lstmt;
+      stmts[0] = rstmt;
+    }
+  else
+    return 0;
+
+  basic_block bbs[2] = { gimple_bb (stmts[0]), gimple_bb (stmts[1]) };
+  if (bbs[0] == bbs[1])
+    {
+      for (gimple_stmt_iterator gsi = gsi_for_stmt (stmts[0]);
+	   gsi_stmt (gsi) != stmts[1]; gsi_next (&gsi))
+	/* ??? For now, stop if we find any intervening vdefs.  */
+	if (gimple_vdef (gsi_stmt (gsi)))
+	  return 0;
+      return ret;
+    }
+
+  for (gimple_stmt_iterator gsi = gsi_for_stmt (stmts[0]);
+       !gsi_end_p (gsi); gsi_next (&gsi))
+    /* ??? For now, stop if we find any intervening vdefs.  */
+    if (gimple_vdef (gsi_stmt (gsi)))
+      return 0;
+
+  for (gphi_iterator gpi = gsi_start_phis (bbs[1]);
+       !gsi_end_p (gpi); gsi_next (&gpi))
+    /* ??? For now, stop if we find any intervening vdefs.  */
+    if (gimple_vdef (gsi_stmt (gpi)))
+      return 0;
+
+  for (gimple_stmt_iterator gsi = gsi_for_stmt (stmts[1]);
+       !gsi_end_p (gsi); gsi_prev (&gsi))
+    /* ??? For now, stop if we find any intervening vdefs.  */
+    if (gimple_vdef (gsi_stmt (gsi)))
+      return 0;
+
+  for (basic_block bb = bbs[1]; bb != bbs[0]; )
+    {
+      /* ??? For now, stop if any visited block has more than one
+	 predecessor.  */
+      if (!single_pred_p (bb))
+	return 0;
+
+      bb = single_pred (bb);
+
+      for (gphi_iterator gpi = gsi_start_phis (bbs[1]);
+	   !gsi_end_p (gpi); gsi_next (&gpi))
+	/* ??? For now, stop if we find any intervening vdefs.  */
+	if (gimple_vdef (gsi_stmt (gpi)))
+	  return 0;
+
+      for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
+	   !gsi_end_p (gsi); gsi_next (&gsi))
+	/* ??? For now, stop if we find any intervening vdefs.  */
+	if (gimple_vdef (gsi_stmt (gsi)))
+	  return 0;
+    }
+
+  return ret;
+}
+
 /* Find ways of folding logical expressions of LHS and RHS:
    Try to merge two comparisons to the same innermost item.
    Look for range tests like "ch >= '0' && ch <= '9'".
@@ -7626,6 +7759,8 @@ fold_truth_andor_maybe_separate (location_t loc,
   enum tree_code orig_code = code;
   enum tree_code wanted_code;
   tree ll_inner, lr_inner, rl_inner, rr_inner;
+  gimple *ll_load, *lr_load, *rl_load, *rr_load;
+  int l_mergeable = 0, r_mergeable = 0;
   HOST_WIDE_INT ll_bitsize, ll_bitpos, lr_bitsize, lr_bitpos;
   HOST_WIDE_INT rl_bitsize, rl_bitpos, rr_bitsize, rr_bitpos;
   HOST_WIDE_INT xll_bitpos, xlr_bitpos, xrl_bitpos, xrr_bitpos;
@@ -7690,20 +7825,24 @@ fold_truth_andor_maybe_separate (location_t loc,
   ll_inner = decode_field_reference (loc, &ll_arg,
 				     &ll_bitsize, &ll_bitpos, &ll_mode,
 				     &ll_unsignedp, &ll_reversep, &volatilep,
-				     &ll_mask, &ll_and_mask, l_xor);
+				     &ll_mask, &ll_and_mask, l_xor,
+				     &ll_load);
   lr_inner = decode_field_reference (loc, &lr_arg,
 				     &lr_bitsize, &lr_bitpos, &lr_mode,
 				     &lr_unsignedp, &lr_reversep, &volatilep,
-				     &lr_mask, &lr_and_mask, 2 * l_xor);
+				     &lr_mask, &lr_and_mask, 2 * l_xor,
+				     &lr_load);
   int r_xor = prepare_xor (rl_arg, &rr_arg);
   rl_inner = decode_field_reference (loc, &rl_arg,
 				     &rl_bitsize, &rl_bitpos, &rl_mode,
 				     &rl_unsignedp, &rl_reversep, &volatilep,
-				     &rl_mask, &rl_and_mask, r_xor);
+				     &rl_mask, &rl_and_mask, r_xor,
+				     &rl_load);
   rr_inner = decode_field_reference (loc, &rr_arg,
 				     &rr_bitsize, &rr_bitpos, &rr_mode,
 				     &rr_unsignedp, &rr_reversep, &volatilep,
-				     &rr_mask, &rr_and_mask, 2 * r_xor);
+				     &rr_mask, &rr_and_mask, 2 * r_xor,
+				     &rr_load);
 
   /* It must be true that the inner operation on the lhs of each
      comparison must be the same if we are to be able to do anything.
@@ -7712,7 +7851,9 @@ fold_truth_andor_maybe_separate (location_t loc,
   if (volatilep
       || ll_reversep != rl_reversep
       || ll_inner == 0 || rl_inner == 0
-      || ! operand_equal_p (ll_inner, rl_inner, 0))
+      || ! operand_equal_p (ll_inner, rl_inner, 0)
+      || (ll_load && rl_load
+	  && ! (l_mergeable = mergeable_loads_p (ll_load, rl_load))))
     return 0;
 
   if (TREE_CODE (lr_arg) == INTEGER_CST
@@ -7723,7 +7864,9 @@ fold_truth_andor_maybe_separate (location_t loc,
     }
   else if (lr_reversep != rr_reversep
 	   || lr_inner == 0 || rr_inner == 0
-	   || ! operand_equal_p (lr_inner, rr_inner, 0))
+	   || ! operand_equal_p (lr_inner, rr_inner, 0)
+	   || (lr_load && rr_load
+	       && ! (r_mergeable = mergeable_loads_p (lr_load, rr_load))))
     return 0;
   else
     l_const = r_const = 0;
@@ -8032,18 +8175,38 @@ fold_truth_andor_maybe_separate (location_t loc,
 	{
 	  bitpos[1][0] = rnbitpos;
 	  bitsiz[1][0] = rnbitsize;
-	  ld_arg[1][0] = make_bit_field_ref (loc, lr_inner, lr_arg,
-					     rntype, rnbitsize, rnbitpos,
-					     lr_unsignedp || rr_unsignedp,
-					     lr_reversep);
+	  ld_arg[1][0] = make_bit_field_load (loc, lr_inner, lr_arg,
+					      rntype, rnbitsize, rnbitpos,
+					      lr_unsignedp || rr_unsignedp,
+					      lr_reversep,
+					      r_mergeable == 0
+					      ? NULL
+					      : r_mergeable < 0
+					      ? lr_load
+					      : rr_load);
 	}
 
       if (parts > 1)
 	{
 	  if (r_split_load)
-	    build_split_load (ld_arg[1], bitpos[1], bitsiz[1], toshift[1],
-			      shifted[1], loc, lr_inner, lr_arg,
-			      rnmode, rnmode2, rnbitpos, lr_reversep);
+	    {
+	      gimple *point[2];
+	      if (r_mergeable == 0)
+		point[0] = point[1] = NULL;
+	      else if (r_mergeable < 0)
+		{
+		  point[0] = lr_load;
+		  point[1] = rr_load;
+		}
+	      else
+		{
+		  point[0] = rr_load;
+		  point[1] = lr_load;
+		}
+	      build_split_load (ld_arg[1], bitpos[1], bitsiz[1], toshift[1],
+				shifted[1], loc, lr_inner, lr_arg,
+				rnmode, rnmode2, rnbitpos, lr_reversep, point);
+	    }
 	  else
 	    reuse_split_load (ld_arg[1], bitpos[1], bitsiz[1], toshift[1],
 			      shifted[1], xmask[1],
@@ -8115,18 +8278,38 @@ fold_truth_andor_maybe_separate (location_t loc,
     {
       bitpos[0][0] = lnbitpos;
       bitsiz[0][0] = lnbitsize;
-      ld_arg[0][0] = make_bit_field_ref (loc, ll_inner, ll_arg,
-					 lntype, lnbitsize, lnbitpos,
-					 ll_unsignedp || rl_unsignedp,
-					 ll_reversep);
+      ld_arg[0][0] = make_bit_field_load (loc, ll_inner, ll_arg,
+					  lntype, lnbitsize, lnbitpos,
+					  ll_unsignedp || rl_unsignedp,
+					  ll_reversep,
+					  l_mergeable == 0
+					  ? NULL
+					  : l_mergeable < 0
+					  ? ll_load
+					  : rl_load);
     }
 
   if (parts > 1)
     {
       if (l_split_load)
-	build_split_load (ld_arg[0], bitpos[0], bitsiz[0], toshift[0],
-			  shifted[0], loc, ll_inner, ll_arg,
-			  lnmode, lnmode2, lnbitpos, ll_reversep);
+	    {
+	      gimple *point[2];
+	      if (l_mergeable == 0)
+		point[0] = point[1] = NULL;
+	      else if (l_mergeable < 0)
+		{
+		  point[0] = ll_load;
+		  point[1] = rl_load;
+		}
+	      else
+		{
+		  point[0] = rl_load;
+		  point[1] = ll_load;
+		}
+	      build_split_load (ld_arg[0], bitpos[0], bitsiz[0], toshift[0],
+				shifted[0], loc, ll_inner, ll_arg,
+				lnmode, lnmode2, lnbitpos, ll_reversep, point);
+	    }
       else
 	reuse_split_load (ld_arg[0], bitpos[0], bitsiz[0], toshift[0],
 			  shifted[0], xmask[0],
@@ -8141,6 +8324,8 @@ fold_truth_andor_maybe_separate (location_t loc,
 
       for (int j = 0; j < 2; j++)
 	{
+	  op[j] = unshare_expr (op[j]);
+
 	  /* Mask out the bits belonging to the other part.  */
 	  if (xmask[j][i])
 	    mask[j] = int_const_binop (BIT_AND_EXPR, mask[j], xmask[j][i]);

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [gcc(refs/users/aoliva/heads/testme)] check for mergeable loads, choose insertion points accordingly
@ 2023-07-27 11:52 Alexandre Oliva
  0 siblings, 0 replies; 9+ messages in thread
From: Alexandre Oliva @ 2023-07-27 11:52 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:865c0dab37e34a394617a8260fd7ad520f49b73a

commit 865c0dab37e34a394617a8260fd7ad520f49b73a
Author: Alexandre Oliva <oliva@gnu.org>
Date:   Thu Jul 27 05:15:20 2023 -0300

    check for mergeable loads, choose insertion points accordingly

Diff:
---
 gcc/gimple-fold.cc | 256 ++++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 222 insertions(+), 34 deletions(-)

diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
index f65922309fb..160ab2c75fe 100644
--- a/gcc/gimple-fold.cc
+++ b/gcc/gimple-fold.cc
@@ -69,6 +69,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "varasm.h"
 #include "internal-fn.h"
 #include "gimple-range.h"
+#include "tree-ssa-loop-niter.h" // stmt_dominates_stmt_p
 
 /* ??? Move this to some header, it's defined in fold-const.c.  */
 extern tree
@@ -7080,7 +7081,7 @@ maybe_fold_comparisons_from_match_pd (tree type, enum tree_code code,
    Same as ssa_is_replaceable_p, except that we don't insist it has a
    single use.  */
 
-bool
+static bool
 ssa_is_substitutable_p (gimple *stmt)
 {
 #if 0
@@ -7161,9 +7162,10 @@ is_cast_p (tree *name)
       if (gimple_num_ops (def) != 2)
 	break;
 
-      if (get_gimple_rhs_class (gimple_expr_code (def))
-	  == GIMPLE_SINGLE_RHS)
+      if (gimple_assign_single_p (def))
 	{
+	  if (gimple_assign_load_p (def))
+	    break;
 	  *name = gimple_assign_rhs1 (def);
 	  continue;
 	}
@@ -7200,8 +7202,7 @@ is_binop_p (enum tree_code code, tree *name)
 	  return 0;
 
 	case 2:
-	  if (get_gimple_rhs_class (gimple_expr_code (def))
-	      == GIMPLE_SINGLE_RHS)
+	  if (gimple_assign_single_p (def) && !gimple_assign_load_p (def))
 	    {
 	      *name = gimple_assign_rhs1 (def);
 	      continue;
@@ -7209,7 +7210,7 @@ is_binop_p (enum tree_code code, tree *name)
 	  return 0;
 
 	case 3:
-	  ;
+	  break;
 	}
 
       if (gimple_assign_rhs_code (def) != code)
@@ -7254,6 +7255,26 @@ prepare_xor (tree l_arg, tree *r_arg)
   return ret;
 }
 
+/* If EXP is a SSA_NAME whose DEF is a load stmt, set *LOAD to it and
+   return its RHS, otherwise return EXP.  */
+
+static tree
+follow_load (tree exp, gimple **load)
+{
+  if (TREE_CODE (exp) == SSA_NAME
+      && !SSA_NAME_IS_DEFAULT_DEF (exp))
+    {
+      gimple *def = SSA_NAME_DEF_STMT (exp);
+      if (gimple_assign_load_p (def))
+	{
+	  *load = def;
+	  exp = gimple_assign_rhs1 (def);
+	}
+    }
+
+  return exp;
+}
+
 /* Subroutine for fold_truth_andor_1: decode a field reference.
 
    If EXP is a comparison reference, we return the innermost reference.
@@ -7280,6 +7301,9 @@ prepare_xor (tree l_arg, tree *r_arg)
    BIT_XOR_EXPR compared with zero.  We're to take the first or second
    operand thereof if so.  It should be zero otherwise.
 
+   *LOAD is set to the load stmt of the innermost reference, if any,
+   *and NULL otherwise.
+
    Return 0 if this is not a component reference or is one that we can't
    do anything with.  */
 
@@ -7287,7 +7311,8 @@ static tree
 decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
 			HOST_WIDE_INT *pbitpos, machine_mode *pmode,
 			int *punsignedp, int *preversep, int *pvolatilep,
-			tree *pmask, tree *pand_mask, int xor_which)
+			tree *pmask, tree *pand_mask, int xor_which,
+			gimple **load)
 {
   tree exp = *exp_;
   tree outer_type = 0;
@@ -7297,11 +7322,13 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
   unsigned int precision;
   HOST_WIDE_INT shiftrt = 0;
 
+  *load = NULL;
+
   /* All the optimizations using this function assume integer fields.
      There are problems with FP fields since the type_for_size call
      below can fail for, e.g., XFmode.  */
   if (! INTEGRAL_TYPE_P (TREE_TYPE (exp)))
-    return 0;
+    return NULL_TREE;
 
   /* We are interested in the bare arrangement of bits, so strip everything
      that doesn't affect the machine mode.  However, record the type of the
@@ -7311,7 +7338,7 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
   if ((and_mask = is_binop_p (BIT_AND_EXPR, &exp)))
     {
       if (TREE_CODE (and_mask) != INTEGER_CST)
-	return 0;
+	return NULL_TREE;
     }
 
   if (xor_which)
@@ -7329,16 +7356,18 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
   if (tree shift = is_binop_p (RSHIFT_EXPR, &exp))
     {
       if (TREE_CODE (shift) != INTEGER_CST || !tree_fits_shwi_p (shift))
-	return 0;
+	return NULL_TREE;
       shiftrt = tree_to_shwi (shift);
       if (shiftrt <= 0)
-	return 0;
+	return NULL_TREE;
     }
 
   if (tree t = is_cast_p (&exp))
     if (!outer_type)
       outer_type = t;
 
+  exp = follow_load (exp, load);
+
   poly_int64 poly_bitsize, poly_bitpos;
   inner = get_inner_reference (exp, &poly_bitsize, &poly_bitpos, &offset,
 			       pmode, punsignedp, preversep, pvolatilep);
@@ -7487,6 +7516,31 @@ compute_split_boundary_from_align (HOST_WIDE_INT align,
   return boundary;
 }
 
+/* Make a bit_field_ref.  If POINT is NULL, return the BIT_FIELD_REF.
+   Otherwise, build and insert a load stmt before POINT, and return
+   the SSA_NAME.  ???  Rewrite LOAD in terms of the bitfield?  */
+
+static tree
+make_bit_field_load (location_t loc, tree inner, tree orig_inner, tree type,
+		     HOST_WIDE_INT bitsize, poly_int64 bitpos,
+		     int unsignedp, int reversep, gimple *point)
+{
+  tree ref = make_bit_field_ref (loc, unshare_expr (inner),
+				 unshare_expr (orig_inner),
+				 type, bitsize, bitpos,
+				 unsignedp, reversep);
+  if (!point)
+    return ref;
+
+  tree tmp = make_ssa_name (TREE_TYPE (ref));
+  gimple_stmt_iterator gsi = gsi_for_stmt (point);
+  gsi_insert_before (&gsi,
+		     gimple_build_assign (tmp, ref),
+		     GSI_NEW_STMT);
+
+  return tmp;
+}
+
 /* Initialize ln_arg[0] and ln_arg[1] to a pair of newly-created (at
    LOC) loads from INNER (from ORIG_INNER), of modes MODE and MODE2,
    respectively, starting at BIT_POS, using reversed endianness if
@@ -7506,7 +7560,8 @@ build_split_load (tree /* out */ ln_arg[2],
 		  HOST_WIDE_INT /* out */ shifted[2],
 		  location_t loc, tree inner, tree orig_inner,
 		  scalar_int_mode mode, scalar_int_mode mode2,
-		  HOST_WIDE_INT bit_pos, bool reversep)
+		  HOST_WIDE_INT bit_pos, bool reversep,
+		  gimple *point[2])
 {
   bitsiz[0] = GET_MODE_BITSIZE (mode);
   bitsiz[1] = GET_MODE_BITSIZE (mode2);
@@ -7515,9 +7570,9 @@ build_split_load (tree /* out */ ln_arg[2],
     {
       tree type = lang_hooks.types.type_for_size (bitsiz[i], 1);
       bitpos[i] = bit_pos;
-      ln_arg[i] = make_bit_field_ref (loc, inner, orig_inner,
+      ln_arg[i] = make_bit_field_load (loc, inner, orig_inner,
 				      type, bitsiz[i],
-				      bit_pos, 1, reversep);
+				       bit_pos, 1, reversep, point[i]);
       bit_pos += bitsiz[i];
     }
 
@@ -7578,6 +7633,87 @@ reuse_split_load (tree /* in[0] out[1] */ ln_arg[2],
     }
 }
 
+/* Return nonzero if LSTMT and RSTMT, assumed to load from the same
+   words, can be safely merged into a single load.  Return zero if
+   there are intervening stores, or if neither stmt dominates the
+   other.  If they are mergeable, return -1 if the merged load should
+   be inserted before LSTMT to dominate both, otherwise return +1.  */
+
+static int
+mergeable_loads_p (gimple *lstmt, gimple *rstmt)
+{
+  gimple *stmts[2] = { lstmt, rstmt };
+  int ret;
+
+  if (stmt_dominates_stmt_p (lstmt, rstmt))
+    {
+      ret = -1;
+      stmts[0] = lstmt;
+      stmts[1] = rstmt;
+    }
+  else if (stmt_dominates_stmt_p (rstmt, lstmt))
+    {
+      ret = 1;
+      stmts[1] = lstmt;
+      stmts[0] = rstmt;
+    }
+  else
+    return 0;
+
+  basic_block bbs[2] = { gimple_bb (stmts[0]), gimple_bb (stmts[1]) };
+  if (bbs[0] == bbs[1])
+    {
+      for (gimple_stmt_iterator gsi = gsi_for_stmt (stmts[0]);
+	   gsi_stmt (gsi) != stmts[1]; gsi_next (&gsi))
+	/* ??? For now, stop if we find any intervening vdefs.  */
+	if (gimple_vdef (gsi_stmt (gsi)))
+	  return 0;
+      return ret;
+    }
+
+  for (gimple_stmt_iterator gsi = gsi_for_stmt (stmts[0]);
+       !gsi_end_p (gsi); gsi_next (&gsi))
+    /* ??? For now, stop if we find any intervening vdefs.  */
+    if (gimple_vdef (gsi_stmt (gsi)))
+      return 0;
+
+  for (gphi_iterator gpi = gsi_start_phis (bbs[1]);
+       !gsi_end_p (gpi); gsi_next (&gpi))
+    /* ??? For now, stop if we find any intervening vdefs.  */
+    if (gimple_vdef (gsi_stmt (gpi)))
+      return 0;
+
+  for (gimple_stmt_iterator gsi = gsi_for_stmt (stmts[1]);
+       !gsi_end_p (gsi); gsi_prev (&gsi))
+    /* ??? For now, stop if we find any intervening vdefs.  */
+    if (gimple_vdef (gsi_stmt (gsi)))
+      return 0;
+
+  for (basic_block bb = bbs[1]; bb != bbs[0]; )
+    {
+      /* ??? For now, stop if any visited block has more than one
+	 predecessor.  */
+      if (!single_pred_p (bb))
+	return 0;
+
+      bb = single_pred (bb);
+
+      for (gphi_iterator gpi = gsi_start_phis (bbs[1]);
+	   !gsi_end_p (gpi); gsi_next (&gpi))
+	/* ??? For now, stop if we find any intervening vdefs.  */
+	if (gimple_vdef (gsi_stmt (gpi)))
+	  return 0;
+
+      for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
+	   !gsi_end_p (gsi); gsi_next (&gsi))
+	/* ??? For now, stop if we find any intervening vdefs.  */
+	if (gimple_vdef (gsi_stmt (gsi)))
+	  return 0;
+    }
+
+  return ret;
+}
+
 /* Find ways of folding logical expressions of LHS and RHS:
    Try to merge two comparisons to the same innermost item.
    Look for range tests like "ch >= '0' && ch <= '9'".
@@ -7629,6 +7765,8 @@ fold_truth_andor_maybe_separate (location_t loc,
   enum tree_code orig_code = code;
   enum tree_code wanted_code;
   tree ll_inner, lr_inner, rl_inner, rr_inner;
+  gimple *ll_load, *lr_load, *rl_load, *rr_load;
+  int l_mergeable = 0, r_mergeable = 0;
   HOST_WIDE_INT ll_bitsize, ll_bitpos, lr_bitsize, lr_bitpos;
   HOST_WIDE_INT rl_bitsize, rl_bitpos, rr_bitsize, rr_bitpos;
   HOST_WIDE_INT xll_bitpos, xlr_bitpos, xrl_bitpos, xrr_bitpos;
@@ -7693,20 +7831,24 @@ fold_truth_andor_maybe_separate (location_t loc,
   ll_inner = decode_field_reference (loc, &ll_arg,
 				     &ll_bitsize, &ll_bitpos, &ll_mode,
 				     &ll_unsignedp, &ll_reversep, &volatilep,
-				     &ll_mask, &ll_and_mask, l_xor);
+				     &ll_mask, &ll_and_mask, l_xor,
+				     &ll_load);
   lr_inner = decode_field_reference (loc, &lr_arg,
 				     &lr_bitsize, &lr_bitpos, &lr_mode,
 				     &lr_unsignedp, &lr_reversep, &volatilep,
-				     &lr_mask, &lr_and_mask, 2 * l_xor);
+				     &lr_mask, &lr_and_mask, 2 * l_xor,
+				     &lr_load);
   int r_xor = prepare_xor (rl_arg, &rr_arg);
   rl_inner = decode_field_reference (loc, &rl_arg,
 				     &rl_bitsize, &rl_bitpos, &rl_mode,
 				     &rl_unsignedp, &rl_reversep, &volatilep,
-				     &rl_mask, &rl_and_mask, r_xor);
+				     &rl_mask, &rl_and_mask, r_xor,
+				     &rl_load);
   rr_inner = decode_field_reference (loc, &rr_arg,
 				     &rr_bitsize, &rr_bitpos, &rr_mode,
 				     &rr_unsignedp, &rr_reversep, &volatilep,
-				     &rr_mask, &rr_and_mask, 2 * r_xor);
+				     &rr_mask, &rr_and_mask, 2 * r_xor,
+				     &rr_load);
 
   /* It must be true that the inner operation on the lhs of each
      comparison must be the same if we are to be able to do anything.
@@ -7715,7 +7857,9 @@ fold_truth_andor_maybe_separate (location_t loc,
   if (volatilep
       || ll_reversep != rl_reversep
       || ll_inner == 0 || rl_inner == 0
-      || ! operand_equal_p (ll_inner, rl_inner, 0))
+      || ! operand_equal_p (ll_inner, rl_inner, 0)
+      || (ll_load && rl_load
+	  && ! (l_mergeable = mergeable_loads_p (ll_load, rl_load))))
     return 0;
 
   if (TREE_CODE (lr_arg) == INTEGER_CST
@@ -7726,7 +7870,9 @@ fold_truth_andor_maybe_separate (location_t loc,
     }
   else if (lr_reversep != rr_reversep
 	   || lr_inner == 0 || rr_inner == 0
-	   || ! operand_equal_p (lr_inner, rr_inner, 0))
+	   || ! operand_equal_p (lr_inner, rr_inner, 0)
+	   || (lr_load && rr_load
+	       && ! (r_mergeable = mergeable_loads_p (lr_load, rr_load))))
     return 0;
   else
     l_const = r_const = 0;
@@ -8035,18 +8181,38 @@ fold_truth_andor_maybe_separate (location_t loc,
 	{
 	  bitpos[1][0] = rnbitpos;
 	  bitsiz[1][0] = rnbitsize;
-	  ld_arg[1][0] = make_bit_field_ref (loc, lr_inner, lr_arg,
-					     rntype, rnbitsize, rnbitpos,
-					     lr_unsignedp || rr_unsignedp,
-					     lr_reversep);
+	  ld_arg[1][0] = make_bit_field_load (loc, lr_inner, lr_arg,
+					      rntype, rnbitsize, rnbitpos,
+					      lr_unsignedp || rr_unsignedp,
+					      lr_reversep,
+					      r_mergeable == 0
+					      ? NULL
+					      : r_mergeable < 0
+					      ? lr_load
+					      : rr_load);
 	}
 
       if (parts > 1)
 	{
 	  if (r_split_load)
-	    build_split_load (ld_arg[1], bitpos[1], bitsiz[1], toshift[1],
-			      shifted[1], loc, lr_inner, lr_arg,
-			      rnmode, rnmode2, rnbitpos, lr_reversep);
+	    {
+	      gimple *point[2];
+	      if (r_mergeable == 0)
+		point[0] = point[1] = NULL;
+	      else if (r_mergeable < 0)
+		{
+		  point[0] = lr_load;
+		  point[1] = rr_load;
+		}
+	      else
+		{
+		  point[0] = rr_load;
+		  point[1] = lr_load;
+		}
+	      build_split_load (ld_arg[1], bitpos[1], bitsiz[1], toshift[1],
+				shifted[1], loc, lr_inner, lr_arg,
+				rnmode, rnmode2, rnbitpos, lr_reversep, point);
+	    }
 	  else
 	    reuse_split_load (ld_arg[1], bitpos[1], bitsiz[1], toshift[1],
 			      shifted[1], xmask[1],
@@ -8118,18 +8284,38 @@ fold_truth_andor_maybe_separate (location_t loc,
     {
       bitpos[0][0] = lnbitpos;
       bitsiz[0][0] = lnbitsize;
-      ld_arg[0][0] = make_bit_field_ref (loc, ll_inner, ll_arg,
-					 lntype, lnbitsize, lnbitpos,
-					 ll_unsignedp || rl_unsignedp,
-					 ll_reversep);
+      ld_arg[0][0] = make_bit_field_load (loc, ll_inner, ll_arg,
+					  lntype, lnbitsize, lnbitpos,
+					  ll_unsignedp || rl_unsignedp,
+					  ll_reversep,
+					  l_mergeable == 0
+					  ? NULL
+					  : l_mergeable < 0
+					  ? ll_load
+					  : rl_load);
     }
 
   if (parts > 1)
     {
       if (l_split_load)
-	build_split_load (ld_arg[0], bitpos[0], bitsiz[0], toshift[0],
-			  shifted[0], loc, ll_inner, ll_arg,
-			  lnmode, lnmode2, lnbitpos, ll_reversep);
+	    {
+	      gimple *point[2];
+	      if (l_mergeable == 0)
+		point[0] = point[1] = NULL;
+	      else if (l_mergeable < 0)
+		{
+		  point[0] = ll_load;
+		  point[1] = rl_load;
+		}
+	      else
+		{
+		  point[0] = rl_load;
+		  point[1] = ll_load;
+		}
+	      build_split_load (ld_arg[0], bitpos[0], bitsiz[0], toshift[0],
+				shifted[0], loc, ll_inner, ll_arg,
+				lnmode, lnmode2, lnbitpos, ll_reversep, point);
+	    }
       else
 	reuse_split_load (ld_arg[0], bitpos[0], bitsiz[0], toshift[0],
 			  shifted[0], xmask[0],
@@ -8144,6 +8330,8 @@ fold_truth_andor_maybe_separate (location_t loc,
 
       for (int j = 0; j < 2; j++)
 	{
+	  op[j] = unshare_expr (op[j]);
+
 	  /* Mask out the bits belonging to the other part.  */
 	  if (xmask[j][i])
 	    mask[j] = int_const_binop (BIT_AND_EXPR, mask[j], xmask[j][i]);

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [gcc(refs/users/aoliva/heads/testme)] check for mergeable loads, choose insertion points accordingly
@ 2023-07-27 11:08 Alexandre Oliva
  0 siblings, 0 replies; 9+ messages in thread
From: Alexandre Oliva @ 2023-07-27 11:08 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:a26f9f3bba9e51e2cac71bfb1837f9e3b0a4f91a

commit a26f9f3bba9e51e2cac71bfb1837f9e3b0a4f91a
Author: Alexandre Oliva <oliva@gnu.org>
Date:   Thu Jul 27 05:15:20 2023 -0300

    check for mergeable loads, choose insertion points accordingly

Diff:
---
 gcc/gimple-fold.cc | 253 ++++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 219 insertions(+), 34 deletions(-)

diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
index f65922309fb..71fc728467d 100644
--- a/gcc/gimple-fold.cc
+++ b/gcc/gimple-fold.cc
@@ -69,6 +69,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "varasm.h"
 #include "internal-fn.h"
 #include "gimple-range.h"
+#include "tree-ssa-loop-niter.h" // stmt_dominates_stmt_p
 
 /* ??? Move this to some header, it's defined in fold-const.c.  */
 extern tree
@@ -7080,7 +7081,7 @@ maybe_fold_comparisons_from_match_pd (tree type, enum tree_code code,
    Same as ssa_is_replaceable_p, except that we don't insist it has a
    single use.  */
 
-bool
+static bool
 ssa_is_substitutable_p (gimple *stmt)
 {
 #if 0
@@ -7161,9 +7162,10 @@ is_cast_p (tree *name)
       if (gimple_num_ops (def) != 2)
 	break;
 
-      if (get_gimple_rhs_class (gimple_expr_code (def))
-	  == GIMPLE_SINGLE_RHS)
+      if (gimple_assign_single_p (def))
 	{
+	  if (gimple_assign_load_p (def))
+	    break;
 	  *name = gimple_assign_rhs1 (def);
 	  continue;
 	}
@@ -7200,8 +7202,7 @@ is_binop_p (enum tree_code code, tree *name)
 	  return 0;
 
 	case 2:
-	  if (get_gimple_rhs_class (gimple_expr_code (def))
-	      == GIMPLE_SINGLE_RHS)
+	  if (gimple_assign_single_p (def) && !gimple_assign_load_p (def))
 	    {
 	      *name = gimple_assign_rhs1 (def);
 	      continue;
@@ -7209,7 +7210,7 @@ is_binop_p (enum tree_code code, tree *name)
 	  return 0;
 
 	case 3:
-	  ;
+	  break;
 	}
 
       if (gimple_assign_rhs_code (def) != code)
@@ -7254,6 +7255,26 @@ prepare_xor (tree l_arg, tree *r_arg)
   return ret;
 }
 
+/* If EXP is a SSA_NAME whose DEF is a load stmt, set *LOAD to it and
+   return its RHS, otherwise return EXP.  */
+
+static tree
+follow_load (tree exp, gimple **load)
+{
+  if (TREE_CODE (exp) == SSA_NAME
+      && !SSA_NAME_IS_DEFAULT_DEF (exp))
+    {
+      gimple *def = SSA_NAME_DEF_STMT (exp);
+      if (gimple_assign_load_p (def))
+	{
+	  *load = def;
+	  exp = gimple_assign_rhs1 (def);
+	}
+    }
+
+  return exp;
+}
+
 /* Subroutine for fold_truth_andor_1: decode a field reference.
 
    If EXP is a comparison reference, we return the innermost reference.
@@ -7280,6 +7301,9 @@ prepare_xor (tree l_arg, tree *r_arg)
    BIT_XOR_EXPR compared with zero.  We're to take the first or second
    operand thereof if so.  It should be zero otherwise.
 
+   *LOAD is set to the load stmt of the innermost reference, if any,
+   *and NULL otherwise.
+
    Return 0 if this is not a component reference or is one that we can't
    do anything with.  */
 
@@ -7287,7 +7311,8 @@ static tree
 decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
 			HOST_WIDE_INT *pbitpos, machine_mode *pmode,
 			int *punsignedp, int *preversep, int *pvolatilep,
-			tree *pmask, tree *pand_mask, int xor_which)
+			tree *pmask, tree *pand_mask, int xor_which,
+			gimple **load)
 {
   tree exp = *exp_;
   tree outer_type = 0;
@@ -7297,11 +7322,13 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
   unsigned int precision;
   HOST_WIDE_INT shiftrt = 0;
 
+  *load = NULL;
+
   /* All the optimizations using this function assume integer fields.
      There are problems with FP fields since the type_for_size call
      below can fail for, e.g., XFmode.  */
   if (! INTEGRAL_TYPE_P (TREE_TYPE (exp)))
-    return 0;
+    return NULL_TREE;
 
   /* We are interested in the bare arrangement of bits, so strip everything
      that doesn't affect the machine mode.  However, record the type of the
@@ -7311,7 +7338,7 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
   if ((and_mask = is_binop_p (BIT_AND_EXPR, &exp)))
     {
       if (TREE_CODE (and_mask) != INTEGER_CST)
-	return 0;
+	return NULL_TREE;
     }
 
   if (xor_which)
@@ -7329,16 +7356,18 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
   if (tree shift = is_binop_p (RSHIFT_EXPR, &exp))
     {
       if (TREE_CODE (shift) != INTEGER_CST || !tree_fits_shwi_p (shift))
-	return 0;
+	return NULL_TREE;
       shiftrt = tree_to_shwi (shift);
       if (shiftrt <= 0)
-	return 0;
+	return NULL_TREE;
     }
 
   if (tree t = is_cast_p (&exp))
     if (!outer_type)
       outer_type = t;
 
+  exp = follow_load (exp, load);
+
   poly_int64 poly_bitsize, poly_bitpos;
   inner = get_inner_reference (exp, &poly_bitsize, &poly_bitpos, &offset,
 			       pmode, punsignedp, preversep, pvolatilep);
@@ -7487,6 +7516,30 @@ compute_split_boundary_from_align (HOST_WIDE_INT align,
   return boundary;
 }
 
+/* Make a bit_field_ref.  If POINT is NULL, return the BIT_FIELD_REF.
+   Otherwise, build and insert a load stmt before POINT, and return
+   the SSA_NAME.  ???  Rewrite LOAD in terms of the bitfield?  */
+
+static tree
+make_bit_field_load (location_t loc, tree inner, tree orig_inner, tree type,
+		     HOST_WIDE_INT bitsize, poly_int64 bitpos,
+		     int unsignedp, int reversep, gimple *point)
+{
+  tree ref = make_bit_field_ref (loc, inner, orig_inner,
+				 type, bitsize, bitpos,
+				 unsignedp, reversep);
+  if (!point)
+    return ref;
+
+  tree tmp = make_ssa_name (TREE_TYPE (ref));
+  gimple_stmt_iterator gsi = gsi_for_stmt (point);
+  gsi_insert_before (&gsi,
+		     gimple_build_assign (tmp, ref),
+		     GSI_NEW_STMT);
+
+  return tmp;
+}
+
 /* Initialize ln_arg[0] and ln_arg[1] to a pair of newly-created (at
    LOC) loads from INNER (from ORIG_INNER), of modes MODE and MODE2,
    respectively, starting at BIT_POS, using reversed endianness if
@@ -7506,7 +7559,8 @@ build_split_load (tree /* out */ ln_arg[2],
 		  HOST_WIDE_INT /* out */ shifted[2],
 		  location_t loc, tree inner, tree orig_inner,
 		  scalar_int_mode mode, scalar_int_mode mode2,
-		  HOST_WIDE_INT bit_pos, bool reversep)
+		  HOST_WIDE_INT bit_pos, bool reversep,
+		  gimple *point[2])
 {
   bitsiz[0] = GET_MODE_BITSIZE (mode);
   bitsiz[1] = GET_MODE_BITSIZE (mode2);
@@ -7515,9 +7569,9 @@ build_split_load (tree /* out */ ln_arg[2],
     {
       tree type = lang_hooks.types.type_for_size (bitsiz[i], 1);
       bitpos[i] = bit_pos;
-      ln_arg[i] = make_bit_field_ref (loc, inner, orig_inner,
+      ln_arg[i] = make_bit_field_load (loc, inner, orig_inner,
 				      type, bitsiz[i],
-				      bit_pos, 1, reversep);
+				       bit_pos, 1, reversep, point[i]);
       bit_pos += bitsiz[i];
     }
 
@@ -7578,6 +7632,87 @@ reuse_split_load (tree /* in[0] out[1] */ ln_arg[2],
     }
 }
 
+/* Return nonzero if LSTMT and RSTMT, assumed to load from the same
+   words, can be safely merged into a single load.  Return zero if
+   there are intervening stores, or if neither stmt dominates the
+   other.  If they are mergeable, return -1 if the merged load should
+   be inserted before LSTMT to dominate both, otherwise return +1.  */
+
+static int
+mergeable_loads_p (gimple *lstmt, gimple *rstmt)
+{
+  gimple *stmts[2] = { lstmt, rstmt };
+  int ret;
+
+  if (stmt_dominates_stmt_p (lstmt, rstmt))
+    {
+      ret = -1;
+      stmts[0] = lstmt;
+      stmts[1] = rstmt;
+    }
+  else if (stmt_dominates_stmt_p (rstmt, lstmt))
+    {
+      ret = 1;
+      stmts[1] = lstmt;
+      stmts[0] = rstmt;
+    }
+  else
+    return 0;
+
+  basic_block bbs[2] = { gimple_bb (stmts[0]), gimple_bb (stmts[1]) };
+  if (bbs[0] == bbs[1])
+    {
+      for (gimple_stmt_iterator gsi = gsi_for_stmt (stmts[0]);
+	   gsi_stmt (gsi) != stmts[1]; gsi_next (&gsi))
+	/* ??? For now, stop if we find any intervening vdefs.  */
+	if (gimple_vdef (gsi_stmt (gsi)))
+	  return 0;
+      return ret;
+    }
+
+  for (gimple_stmt_iterator gsi = gsi_for_stmt (stmts[0]);
+       !gsi_end_p (gsi); gsi_next (&gsi))
+    /* ??? For now, stop if we find any intervening vdefs.  */
+    if (gimple_vdef (gsi_stmt (gsi)))
+      return 0;
+
+  for (gphi_iterator gpi = gsi_start_phis (bbs[1]);
+       !gsi_end_p (gpi); gsi_next (&gpi))
+    /* ??? For now, stop if we find any intervening vdefs.  */
+    if (gimple_vdef (gsi_stmt (gpi)))
+      return 0;
+
+  for (gimple_stmt_iterator gsi = gsi_for_stmt (stmts[1]);
+       !gsi_end_p (gsi); gsi_prev (&gsi))
+    /* ??? For now, stop if we find any intervening vdefs.  */
+    if (gimple_vdef (gsi_stmt (gsi)))
+      return 0;
+
+  for (basic_block bb = bbs[1]; bb != bbs[0]; )
+    {
+      /* ??? For now, stop if any visited block has more than one
+	 predecessor.  */
+      if (!single_pred_p (bb))
+	return 0;
+
+      bb = single_pred (bb);
+
+      for (gphi_iterator gpi = gsi_start_phis (bbs[1]);
+	   !gsi_end_p (gpi); gsi_next (&gpi))
+	/* ??? For now, stop if we find any intervening vdefs.  */
+	if (gimple_vdef (gsi_stmt (gpi)))
+	  return 0;
+
+      for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
+	   !gsi_end_p (gsi); gsi_next (&gsi))
+	/* ??? For now, stop if we find any intervening vdefs.  */
+	if (gimple_vdef (gsi_stmt (gsi)))
+	  return 0;
+    }
+
+  return ret;
+}
+
 /* Find ways of folding logical expressions of LHS and RHS:
    Try to merge two comparisons to the same innermost item.
    Look for range tests like "ch >= '0' && ch <= '9'".
@@ -7629,6 +7764,8 @@ fold_truth_andor_maybe_separate (location_t loc,
   enum tree_code orig_code = code;
   enum tree_code wanted_code;
   tree ll_inner, lr_inner, rl_inner, rr_inner;
+  gimple *ll_load, *lr_load, *rl_load, *rr_load;
+  int l_mergeable = 0, r_mergeable = 0;
   HOST_WIDE_INT ll_bitsize, ll_bitpos, lr_bitsize, lr_bitpos;
   HOST_WIDE_INT rl_bitsize, rl_bitpos, rr_bitsize, rr_bitpos;
   HOST_WIDE_INT xll_bitpos, xlr_bitpos, xrl_bitpos, xrr_bitpos;
@@ -7693,20 +7830,24 @@ fold_truth_andor_maybe_separate (location_t loc,
   ll_inner = decode_field_reference (loc, &ll_arg,
 				     &ll_bitsize, &ll_bitpos, &ll_mode,
 				     &ll_unsignedp, &ll_reversep, &volatilep,
-				     &ll_mask, &ll_and_mask, l_xor);
+				     &ll_mask, &ll_and_mask, l_xor,
+				     &ll_load);
   lr_inner = decode_field_reference (loc, &lr_arg,
 				     &lr_bitsize, &lr_bitpos, &lr_mode,
 				     &lr_unsignedp, &lr_reversep, &volatilep,
-				     &lr_mask, &lr_and_mask, 2 * l_xor);
+				     &lr_mask, &lr_and_mask, 2 * l_xor,
+				     &lr_load);
   int r_xor = prepare_xor (rl_arg, &rr_arg);
   rl_inner = decode_field_reference (loc, &rl_arg,
 				     &rl_bitsize, &rl_bitpos, &rl_mode,
 				     &rl_unsignedp, &rl_reversep, &volatilep,
-				     &rl_mask, &rl_and_mask, r_xor);
+				     &rl_mask, &rl_and_mask, r_xor,
+				     &rl_load);
   rr_inner = decode_field_reference (loc, &rr_arg,
 				     &rr_bitsize, &rr_bitpos, &rr_mode,
 				     &rr_unsignedp, &rr_reversep, &volatilep,
-				     &rr_mask, &rr_and_mask, 2 * r_xor);
+				     &rr_mask, &rr_and_mask, 2 * r_xor,
+				     &rr_load);
 
   /* It must be true that the inner operation on the lhs of each
      comparison must be the same if we are to be able to do anything.
@@ -7715,7 +7856,9 @@ fold_truth_andor_maybe_separate (location_t loc,
   if (volatilep
       || ll_reversep != rl_reversep
       || ll_inner == 0 || rl_inner == 0
-      || ! operand_equal_p (ll_inner, rl_inner, 0))
+      || ! operand_equal_p (ll_inner, rl_inner, 0)
+      || (ll_load && rl_load
+	  && ! (l_mergeable = mergeable_loads_p (ll_load, rl_load))))
     return 0;
 
   if (TREE_CODE (lr_arg) == INTEGER_CST
@@ -7726,7 +7869,9 @@ fold_truth_andor_maybe_separate (location_t loc,
     }
   else if (lr_reversep != rr_reversep
 	   || lr_inner == 0 || rr_inner == 0
-	   || ! operand_equal_p (lr_inner, rr_inner, 0))
+	   || ! operand_equal_p (lr_inner, rr_inner, 0)
+	   || (lr_load && rr_load
+	       && ! (r_mergeable = mergeable_loads_p (lr_load, rr_load))))
     return 0;
   else
     l_const = r_const = 0;
@@ -8035,18 +8180,38 @@ fold_truth_andor_maybe_separate (location_t loc,
 	{
 	  bitpos[1][0] = rnbitpos;
 	  bitsiz[1][0] = rnbitsize;
-	  ld_arg[1][0] = make_bit_field_ref (loc, lr_inner, lr_arg,
-					     rntype, rnbitsize, rnbitpos,
-					     lr_unsignedp || rr_unsignedp,
-					     lr_reversep);
+	  ld_arg[1][0] = make_bit_field_load (loc, lr_inner, lr_arg,
+					      rntype, rnbitsize, rnbitpos,
+					      lr_unsignedp || rr_unsignedp,
+					      lr_reversep,
+					      r_mergeable == 0
+					      ? NULL
+					      : r_mergeable < 0
+					      ? lr_load
+					      : rr_load);
 	}
 
       if (parts > 1)
 	{
 	  if (r_split_load)
-	    build_split_load (ld_arg[1], bitpos[1], bitsiz[1], toshift[1],
-			      shifted[1], loc, lr_inner, lr_arg,
-			      rnmode, rnmode2, rnbitpos, lr_reversep);
+	    {
+	      gimple *point[2];
+	      if (r_mergeable == 0)
+		point[0] = point[1] = NULL;
+	      else if (r_mergeable < 0)
+		{
+		  point[0] = lr_load;
+		  point[1] = rr_load;
+		}
+	      else
+		{
+		  point[0] = rr_load;
+		  point[1] = lr_load;
+		}
+	      build_split_load (ld_arg[1], bitpos[1], bitsiz[1], toshift[1],
+				shifted[1], loc, lr_inner, lr_arg,
+				rnmode, rnmode2, rnbitpos, lr_reversep, point);
+	    }
 	  else
 	    reuse_split_load (ld_arg[1], bitpos[1], bitsiz[1], toshift[1],
 			      shifted[1], xmask[1],
@@ -8118,18 +8283,38 @@ fold_truth_andor_maybe_separate (location_t loc,
     {
       bitpos[0][0] = lnbitpos;
       bitsiz[0][0] = lnbitsize;
-      ld_arg[0][0] = make_bit_field_ref (loc, ll_inner, ll_arg,
-					 lntype, lnbitsize, lnbitpos,
-					 ll_unsignedp || rl_unsignedp,
-					 ll_reversep);
+      ld_arg[0][0] = make_bit_field_load (loc, ll_inner, ll_arg,
+					  lntype, lnbitsize, lnbitpos,
+					  ll_unsignedp || rl_unsignedp,
+					  ll_reversep,
+					  l_mergeable == 0
+					  ? NULL
+					  : l_mergeable < 0
+					  ? ll_load
+					  : rl_load);
     }
 
   if (parts > 1)
     {
       if (l_split_load)
-	build_split_load (ld_arg[0], bitpos[0], bitsiz[0], toshift[0],
-			  shifted[0], loc, ll_inner, ll_arg,
-			  lnmode, lnmode2, lnbitpos, ll_reversep);
+	    {
+	      gimple *point[2];
+	      if (l_mergeable == 0)
+		point[0] = point[1] = NULL;
+	      else if (l_mergeable < 0)
+		{
+		  point[0] = ll_load;
+		  point[1] = rl_load;
+		}
+	      else
+		{
+		  point[0] = rl_load;
+		  point[1] = ll_load;
+		}
+	      build_split_load (ld_arg[0], bitpos[0], bitsiz[0], toshift[0],
+				shifted[0], loc, ll_inner, ll_arg,
+				lnmode, lnmode2, lnbitpos, ll_reversep, point);
+	    }
       else
 	reuse_split_load (ld_arg[0], bitpos[0], bitsiz[0], toshift[0],
 			  shifted[0], xmask[0],

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [gcc(refs/users/aoliva/heads/testme)] check for mergeable loads, choose insertion points accordingly
@ 2023-07-27  9:22 Alexandre Oliva
  0 siblings, 0 replies; 9+ messages in thread
From: Alexandre Oliva @ 2023-07-27  9:22 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:8b7710da628edb522b9fd8f8b62d622d9a86f915

commit 8b7710da628edb522b9fd8f8b62d622d9a86f915
Author: Alexandre Oliva <oliva@gnu.org>
Date:   Thu Jul 27 05:15:20 2023 -0300

    check for mergeable loads, choose insertion points accordingly

Diff:
---
 gcc/gimple-fold.cc | 253 ++++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 219 insertions(+), 34 deletions(-)

diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
index f65922309fb..72814f902b1 100644
--- a/gcc/gimple-fold.cc
+++ b/gcc/gimple-fold.cc
@@ -69,6 +69,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "varasm.h"
 #include "internal-fn.h"
 #include "gimple-range.h"
+#include "tree-ssa-loop-niter.h" // stmt_dominates_stmt_p
 
 /* ??? Move this to some header, it's defined in fold-const.c.  */
 extern tree
@@ -7080,7 +7081,7 @@ maybe_fold_comparisons_from_match_pd (tree type, enum tree_code code,
    Same as ssa_is_replaceable_p, except that we don't insist it has a
    single use.  */
 
-bool
+static bool
 ssa_is_substitutable_p (gimple *stmt)
 {
 #if 0
@@ -7161,9 +7162,10 @@ is_cast_p (tree *name)
       if (gimple_num_ops (def) != 2)
 	break;
 
-      if (get_gimple_rhs_class (gimple_expr_code (def))
-	  == GIMPLE_SINGLE_RHS)
+      if (gimple_assign_single_p (def))
 	{
+	  if (gimple_assign_load_p (def))
+	    break;
 	  *name = gimple_assign_rhs1 (def);
 	  continue;
 	}
@@ -7200,8 +7202,7 @@ is_binop_p (enum tree_code code, tree *name)
 	  return 0;
 
 	case 2:
-	  if (get_gimple_rhs_class (gimple_expr_code (def))
-	      == GIMPLE_SINGLE_RHS)
+	  if (gimple_assign_single_p (def) && !gimple_assign_load_p (def))
 	    {
 	      *name = gimple_assign_rhs1 (def);
 	      continue;
@@ -7209,7 +7210,7 @@ is_binop_p (enum tree_code code, tree *name)
 	  return 0;
 
 	case 3:
-	  ;
+	  break;
 	}
 
       if (gimple_assign_rhs_code (def) != code)
@@ -7254,6 +7255,26 @@ prepare_xor (tree l_arg, tree *r_arg)
   return ret;
 }
 
+/* If EXP is a SSA_NAME whose DEF is a load stmt, set *LOAD to it and
+   return its RHS, otherwise return EXP.  */
+
+static tree
+follow_load (tree exp, gimple **load)
+{
+  if (TREE_CODE (exp) == SSA_NAME
+      && !SSA_NAME_IS_DEFAULT_DEF (exp))
+    {
+      gimple *def = SSA_NAME_DEF_STMT (exp);
+      if (gimple_assign_load_p (def))
+	{
+	  *load = def;
+	  gimple_assign_rhs1 (def);
+	}
+    }
+
+  return exp;
+}
+
 /* Subroutine for fold_truth_andor_1: decode a field reference.
 
    If EXP is a comparison reference, we return the innermost reference.
@@ -7280,6 +7301,9 @@ prepare_xor (tree l_arg, tree *r_arg)
    BIT_XOR_EXPR compared with zero.  We're to take the first or second
    operand thereof if so.  It should be zero otherwise.
 
+   *LOAD is set to the load stmt of the innermost reference, if any,
+   *and NULL otherwise.
+
    Return 0 if this is not a component reference or is one that we can't
    do anything with.  */
 
@@ -7287,7 +7311,8 @@ static tree
 decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
 			HOST_WIDE_INT *pbitpos, machine_mode *pmode,
 			int *punsignedp, int *preversep, int *pvolatilep,
-			tree *pmask, tree *pand_mask, int xor_which)
+			tree *pmask, tree *pand_mask, int xor_which,
+			gimple **load)
 {
   tree exp = *exp_;
   tree outer_type = 0;
@@ -7297,11 +7322,13 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
   unsigned int precision;
   HOST_WIDE_INT shiftrt = 0;
 
+  *load = NULL;
+
   /* All the optimizations using this function assume integer fields.
      There are problems with FP fields since the type_for_size call
      below can fail for, e.g., XFmode.  */
   if (! INTEGRAL_TYPE_P (TREE_TYPE (exp)))
-    return 0;
+    return NULL_TREE;
 
   /* We are interested in the bare arrangement of bits, so strip everything
      that doesn't affect the machine mode.  However, record the type of the
@@ -7311,7 +7338,7 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
   if ((and_mask = is_binop_p (BIT_AND_EXPR, &exp)))
     {
       if (TREE_CODE (and_mask) != INTEGER_CST)
-	return 0;
+	return NULL_TREE;
     }
 
   if (xor_which)
@@ -7329,16 +7356,18 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
   if (tree shift = is_binop_p (RSHIFT_EXPR, &exp))
     {
       if (TREE_CODE (shift) != INTEGER_CST || !tree_fits_shwi_p (shift))
-	return 0;
+	return NULL_TREE;
       shiftrt = tree_to_shwi (shift);
       if (shiftrt <= 0)
-	return 0;
+	return NULL_TREE;
     }
 
   if (tree t = is_cast_p (&exp))
     if (!outer_type)
       outer_type = t;
 
+  exp = follow_load (exp, load);
+
   poly_int64 poly_bitsize, poly_bitpos;
   inner = get_inner_reference (exp, &poly_bitsize, &poly_bitpos, &offset,
 			       pmode, punsignedp, preversep, pvolatilep);
@@ -7487,6 +7516,30 @@ compute_split_boundary_from_align (HOST_WIDE_INT align,
   return boundary;
 }
 
+/* Make a bit_field_ref.  If POINT is NULL, return the BIT_FIELD_REF.
+   Otherwise, build and insert a load stmt before POINT, and return
+   the SSA_NAME.  ???  Rewrite LOAD in terms of the bitfield?  */
+
+static tree
+make_bit_field_load (location_t loc, tree inner, tree orig_inner, tree type,
+		     HOST_WIDE_INT bitsize, poly_int64 bitpos,
+		     int unsignedp, int reversep, gimple *point)
+{
+  tree ref = make_bit_field_ref (loc, inner, orig_inner,
+				 type, bitsize, bitpos,
+				 unsignedp, reversep);
+  if (!point)
+    return ref;
+
+  tree tmp = make_ssa_name (TREE_TYPE (ref));
+  gimple_stmt_iterator gsi = gsi_for_stmt (point);
+  gsi_insert_before (&gsi,
+		     gimple_build_assign (tmp, ref),
+		     GSI_NEW_STMT);
+
+  return tmp;
+}
+
 /* Initialize ln_arg[0] and ln_arg[1] to a pair of newly-created (at
    LOC) loads from INNER (from ORIG_INNER), of modes MODE and MODE2,
    respectively, starting at BIT_POS, using reversed endianness if
@@ -7506,7 +7559,8 @@ build_split_load (tree /* out */ ln_arg[2],
 		  HOST_WIDE_INT /* out */ shifted[2],
 		  location_t loc, tree inner, tree orig_inner,
 		  scalar_int_mode mode, scalar_int_mode mode2,
-		  HOST_WIDE_INT bit_pos, bool reversep)
+		  HOST_WIDE_INT bit_pos, bool reversep,
+		  gimple *point[2])
 {
   bitsiz[0] = GET_MODE_BITSIZE (mode);
   bitsiz[1] = GET_MODE_BITSIZE (mode2);
@@ -7515,9 +7569,9 @@ build_split_load (tree /* out */ ln_arg[2],
     {
       tree type = lang_hooks.types.type_for_size (bitsiz[i], 1);
       bitpos[i] = bit_pos;
-      ln_arg[i] = make_bit_field_ref (loc, inner, orig_inner,
+      ln_arg[i] = make_bit_field_load (loc, inner, orig_inner,
 				      type, bitsiz[i],
-				      bit_pos, 1, reversep);
+				       bit_pos, 1, reversep, point[i]);
       bit_pos += bitsiz[i];
     }
 
@@ -7578,6 +7632,87 @@ reuse_split_load (tree /* in[0] out[1] */ ln_arg[2],
     }
 }
 
+/* Return nonzero if LSTMT and RSTMT, assumed to load from the same
+   words, can be safely merged into a single load.  Return zero if
+   there are intervening stores, or if neither stmt dominates the
+   other.  If they are mergeable, return -1 if the merged load should
+   be inserted before LSTMT to dominate both, otherwise return +1.  */
+
+static int
+mergeable_loads_p (gimple *lstmt, gimple *rstmt)
+{
+  gimple *stmts[2] = { lstmt, rstmt };
+  int ret;
+
+  if (stmt_dominates_stmt_p (lstmt, rstmt))
+    {
+      ret = -1;
+      stmts[0] = lstmt;
+      stmts[1] = rstmt;
+    }
+  else if (stmt_dominates_stmt_p (rstmt, lstmt))
+    {
+      ret = 1;
+      stmts[1] = lstmt;
+      stmts[0] = rstmt;
+    }
+  else
+    return 0;
+
+  basic_block bbs[2] = { gimple_bb (stmts[0]), gimple_bb (stmts[1]) };
+  if (bbs[0] == bbs[1])
+    {
+      for (gimple_stmt_iterator gsi = gsi_for_stmt (stmts[0]);
+	   gsi_stmt (gsi) != stmts[1]; gsi_next (&gsi))
+	/* ??? For now, stop if we find any intervening vdefs.  */
+	if (gimple_vdef (gsi_stmt (gsi)))
+	  return 0;
+      return ret;
+    }
+
+  for (gimple_stmt_iterator gsi = gsi_for_stmt (stmts[0]);
+       !gsi_end_p (gsi); gsi_next (&gsi))
+    /* ??? For now, stop if we find any intervening vdefs.  */
+    if (gimple_vdef (gsi_stmt (gsi)))
+      return 0;
+
+  for (gphi_iterator gpi = gsi_start_phis (bbs[1]);
+       !gsi_end_p (gpi); gsi_next (&gpi))
+    /* ??? For now, stop if we find any intervening vdefs.  */
+    if (gimple_vdef (gsi_stmt (gpi)))
+      return 0;
+
+  for (gimple_stmt_iterator gsi = gsi_for_stmt (stmts[1]);
+       !gsi_end_p (gsi); gsi_prev (&gsi))
+    /* ??? For now, stop if we find any intervening vdefs.  */
+    if (gimple_vdef (gsi_stmt (gsi)))
+      return 0;
+
+  for (basic_block bb = bbs[1]; bb != bbs[0]; )
+    {
+      /* ??? For now, stop if any visited block has more than one
+	 predecessor.  */
+      if (!single_pred_p (bb))
+	return 0;
+
+      bb = single_pred (bb);
+
+      for (gphi_iterator gpi = gsi_start_phis (bbs[1]);
+	   !gsi_end_p (gpi); gsi_next (&gpi))
+	/* ??? For now, stop if we find any intervening vdefs.  */
+	if (gimple_vdef (gsi_stmt (gpi)))
+	  return 0;
+
+      for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
+	   !gsi_end_p (gsi); gsi_next (&gsi))
+	/* ??? For now, stop if we find any intervening vdefs.  */
+	if (gimple_vdef (gsi_stmt (gsi)))
+	  return 0;
+    }
+
+  return ret;
+}
+
 /* Find ways of folding logical expressions of LHS and RHS:
    Try to merge two comparisons to the same innermost item.
    Look for range tests like "ch >= '0' && ch <= '9'".
@@ -7629,6 +7764,8 @@ fold_truth_andor_maybe_separate (location_t loc,
   enum tree_code orig_code = code;
   enum tree_code wanted_code;
   tree ll_inner, lr_inner, rl_inner, rr_inner;
+  gimple *ll_load, *lr_load, *rl_load, *rr_load;
+  int l_mergeable = 0, r_mergeable = 0;
   HOST_WIDE_INT ll_bitsize, ll_bitpos, lr_bitsize, lr_bitpos;
   HOST_WIDE_INT rl_bitsize, rl_bitpos, rr_bitsize, rr_bitpos;
   HOST_WIDE_INT xll_bitpos, xlr_bitpos, xrl_bitpos, xrr_bitpos;
@@ -7693,20 +7830,24 @@ fold_truth_andor_maybe_separate (location_t loc,
   ll_inner = decode_field_reference (loc, &ll_arg,
 				     &ll_bitsize, &ll_bitpos, &ll_mode,
 				     &ll_unsignedp, &ll_reversep, &volatilep,
-				     &ll_mask, &ll_and_mask, l_xor);
+				     &ll_mask, &ll_and_mask, l_xor,
+				     &ll_load);
   lr_inner = decode_field_reference (loc, &lr_arg,
 				     &lr_bitsize, &lr_bitpos, &lr_mode,
 				     &lr_unsignedp, &lr_reversep, &volatilep,
-				     &lr_mask, &lr_and_mask, 2 * l_xor);
+				     &lr_mask, &lr_and_mask, 2 * l_xor,
+				     &lr_load);
   int r_xor = prepare_xor (rl_arg, &rr_arg);
   rl_inner = decode_field_reference (loc, &rl_arg,
 				     &rl_bitsize, &rl_bitpos, &rl_mode,
 				     &rl_unsignedp, &rl_reversep, &volatilep,
-				     &rl_mask, &rl_and_mask, r_xor);
+				     &rl_mask, &rl_and_mask, r_xor,
+				     &rl_load);
   rr_inner = decode_field_reference (loc, &rr_arg,
 				     &rr_bitsize, &rr_bitpos, &rr_mode,
 				     &rr_unsignedp, &rr_reversep, &volatilep,
-				     &rr_mask, &rr_and_mask, 2 * r_xor);
+				     &rr_mask, &rr_and_mask, 2 * r_xor,
+				     &rr_load);
 
   /* It must be true that the inner operation on the lhs of each
      comparison must be the same if we are to be able to do anything.
@@ -7715,7 +7856,9 @@ fold_truth_andor_maybe_separate (location_t loc,
   if (volatilep
       || ll_reversep != rl_reversep
       || ll_inner == 0 || rl_inner == 0
-      || ! operand_equal_p (ll_inner, rl_inner, 0))
+      || ! operand_equal_p (ll_inner, rl_inner, 0)
+      || (ll_load && rl_load
+	  && ! (l_mergeable = mergeable_loads_p (ll_load, rl_load))))
     return 0;
 
   if (TREE_CODE (lr_arg) == INTEGER_CST
@@ -7726,7 +7869,9 @@ fold_truth_andor_maybe_separate (location_t loc,
     }
   else if (lr_reversep != rr_reversep
 	   || lr_inner == 0 || rr_inner == 0
-	   || ! operand_equal_p (lr_inner, rr_inner, 0))
+	   || ! operand_equal_p (lr_inner, rr_inner, 0)
+	   || (lr_load && rr_load
+	       && ! (r_mergeable = mergeable_loads_p (lr_load, rr_load))))
     return 0;
   else
     l_const = r_const = 0;
@@ -8035,18 +8180,38 @@ fold_truth_andor_maybe_separate (location_t loc,
 	{
 	  bitpos[1][0] = rnbitpos;
 	  bitsiz[1][0] = rnbitsize;
-	  ld_arg[1][0] = make_bit_field_ref (loc, lr_inner, lr_arg,
-					     rntype, rnbitsize, rnbitpos,
-					     lr_unsignedp || rr_unsignedp,
-					     lr_reversep);
+	  ld_arg[1][0] = make_bit_field_load (loc, lr_inner, lr_arg,
+					      rntype, rnbitsize, rnbitpos,
+					      lr_unsignedp || rr_unsignedp,
+					      lr_reversep,
+					      r_mergeable == 0
+					      ? NULL
+					      : r_mergeable < 0
+					      ? lr_load
+					      : rr_load);
 	}
 
       if (parts > 1)
 	{
 	  if (r_split_load)
-	    build_split_load (ld_arg[1], bitpos[1], bitsiz[1], toshift[1],
-			      shifted[1], loc, lr_inner, lr_arg,
-			      rnmode, rnmode2, rnbitpos, lr_reversep);
+	    {
+	      gimple *point[2];
+	      if (r_mergeable == 0)
+		point[0] = point[1] = NULL;
+	      else if (r_mergeable < 0)
+		{
+		  point[0] = lr_load;
+		  point[1] = rr_load;
+		}
+	      else
+		{
+		  point[0] = rr_load;
+		  point[1] = lr_load;
+		}
+	      build_split_load (ld_arg[1], bitpos[1], bitsiz[1], toshift[1],
+				shifted[1], loc, lr_inner, lr_arg,
+				rnmode, rnmode2, rnbitpos, lr_reversep, point);
+	    }
 	  else
 	    reuse_split_load (ld_arg[1], bitpos[1], bitsiz[1], toshift[1],
 			      shifted[1], xmask[1],
@@ -8118,18 +8283,38 @@ fold_truth_andor_maybe_separate (location_t loc,
     {
       bitpos[0][0] = lnbitpos;
       bitsiz[0][0] = lnbitsize;
-      ld_arg[0][0] = make_bit_field_ref (loc, ll_inner, ll_arg,
-					 lntype, lnbitsize, lnbitpos,
-					 ll_unsignedp || rl_unsignedp,
-					 ll_reversep);
+      ld_arg[0][0] = make_bit_field_load (loc, ll_inner, ll_arg,
+					  lntype, lnbitsize, lnbitpos,
+					  ll_unsignedp || rl_unsignedp,
+					  ll_reversep,
+					  l_mergeable == 0
+					  ? NULL
+					  : l_mergeable < 0
+					  ? ll_load
+					  : rl_load);
     }
 
   if (parts > 1)
     {
       if (l_split_load)
-	build_split_load (ld_arg[0], bitpos[0], bitsiz[0], toshift[0],
-			  shifted[0], loc, ll_inner, ll_arg,
-			  lnmode, lnmode2, lnbitpos, ll_reversep);
+	    {
+	      gimple *point[2];
+	      if (l_mergeable == 0)
+		point[0] = point[1] = NULL;
+	      else if (l_mergeable < 0)
+		{
+		  point[0] = ll_load;
+		  point[1] = rl_load;
+		}
+	      else
+		{
+		  point[0] = rl_load;
+		  point[1] = ll_load;
+		}
+	      build_split_load (ld_arg[0], bitpos[0], bitsiz[0], toshift[0],
+				shifted[0], loc, ll_inner, ll_arg,
+				lnmode, lnmode2, lnbitpos, ll_reversep, point);
+	    }
       else
 	reuse_split_load (ld_arg[0], bitpos[0], bitsiz[0], toshift[0],
 			  shifted[0], xmask[0],

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [gcc(refs/users/aoliva/heads/testme)] check for mergeable loads, choose insertion points accordingly
@ 2023-07-27  9:05 Alexandre Oliva
  0 siblings, 0 replies; 9+ messages in thread
From: Alexandre Oliva @ 2023-07-27  9:05 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:399a05175f229f54eb10a61b1450c12ea228e438

commit 399a05175f229f54eb10a61b1450c12ea228e438
Author: Alexandre Oliva <oliva@gnu.org>
Date:   Thu Jul 27 05:15:20 2023 -0300

    check for mergeable loads, choose insertion points accordingly

Diff:
---
 gcc/gimple-fold.cc | 257 +++++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 222 insertions(+), 35 deletions(-)

diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
index f65922309fb..477b5854485 100644
--- a/gcc/gimple-fold.cc
+++ b/gcc/gimple-fold.cc
@@ -69,6 +69,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "varasm.h"
 #include "internal-fn.h"
 #include "gimple-range.h"
+#include "tree-ssa-loop-niter.h" // stmt_dominates_stmt_p
 
 /* ??? Move this to some header, it's defined in fold-const.c.  */
 extern tree
@@ -7080,7 +7081,7 @@ maybe_fold_comparisons_from_match_pd (tree type, enum tree_code code,
    Same as ssa_is_replaceable_p, except that we don't insist it has a
    single use.  */
 
-bool
+static bool
 ssa_is_substitutable_p (gimple *stmt)
 {
 #if 0
@@ -7161,9 +7162,10 @@ is_cast_p (tree *name)
       if (gimple_num_ops (def) != 2)
 	break;
 
-      if (get_gimple_rhs_class (gimple_expr_code (def))
-	  == GIMPLE_SINGLE_RHS)
+      if (gimple_assign_single_p (def))
 	{
+	  if (gimple_assign_load_p (def))
+	    break;
 	  *name = gimple_assign_rhs1 (def);
 	  continue;
 	}
@@ -7200,8 +7202,7 @@ is_binop_p (enum tree_code code, tree *name)
 	  return 0;
 
 	case 2:
-	  if (get_gimple_rhs_class (gimple_expr_code (def))
-	      == GIMPLE_SINGLE_RHS)
+	  if (gimple_assign_single_p (def) && !gimple_assign_load_p (def))
 	    {
 	      *name = gimple_assign_rhs1 (def);
 	      continue;
@@ -7209,7 +7210,7 @@ is_binop_p (enum tree_code code, tree *name)
 	  return 0;
 
 	case 3:
-	  ;
+	  break;
 	}
 
       if (gimple_assign_rhs_code (def) != code)
@@ -7254,6 +7255,26 @@ prepare_xor (tree l_arg, tree *r_arg)
   return ret;
 }
 
+/* If EXP is a SSA_NAME whose DEF is a load stmt, set *LOAD to it and
+   return its RHS, otherwise return EXP.  */
+
+static tree
+follow_load (tree exp, gimple **load)
+{
+  if (TREE_CODE (exp) == SSA_NAME
+      && !SSA_NAME_IS_DEFAULT_DEF (exp))
+    {
+      gimple *def = SSA_NAME_DEF_STMT (exp);
+      if (gimple_assign_load_p (def))
+	{
+	  *load = def;
+	  gimple_assign_rhs1 (def);
+	}
+    }
+
+  return exp;
+}
+
 /* Subroutine for fold_truth_andor_1: decode a field reference.
 
    If EXP is a comparison reference, we return the innermost reference.
@@ -7280,6 +7301,9 @@ prepare_xor (tree l_arg, tree *r_arg)
    BIT_XOR_EXPR compared with zero.  We're to take the first or second
    operand thereof if so.  It should be zero otherwise.
 
+   *LOAD is set to the load stmt of the innermost reference, if any,
+   *and NULL otherwise.
+
    Return 0 if this is not a component reference or is one that we can't
    do anything with.  */
 
@@ -7287,7 +7311,8 @@ static tree
 decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
 			HOST_WIDE_INT *pbitpos, machine_mode *pmode,
 			int *punsignedp, int *preversep, int *pvolatilep,
-			tree *pmask, tree *pand_mask, int xor_which)
+			tree *pmask, tree *pand_mask, int xor_which,
+			gimple **load)
 {
   tree exp = *exp_;
   tree outer_type = 0;
@@ -7297,11 +7322,13 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
   unsigned int precision;
   HOST_WIDE_INT shiftrt = 0;
 
+  *load = NULL;
+
   /* All the optimizations using this function assume integer fields.
      There are problems with FP fields since the type_for_size call
      below can fail for, e.g., XFmode.  */
   if (! INTEGRAL_TYPE_P (TREE_TYPE (exp)))
-    return 0;
+    return NULL_TREE;
 
   /* We are interested in the bare arrangement of bits, so strip everything
      that doesn't affect the machine mode.  However, record the type of the
@@ -7311,7 +7338,7 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
   if ((and_mask = is_binop_p (BIT_AND_EXPR, &exp)))
     {
       if (TREE_CODE (and_mask) != INTEGER_CST)
-	return 0;
+	return NULL_TREE;
     }
 
   if (xor_which)
@@ -7329,16 +7356,18 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
   if (tree shift = is_binop_p (RSHIFT_EXPR, &exp))
     {
       if (TREE_CODE (shift) != INTEGER_CST || !tree_fits_shwi_p (shift))
-	return 0;
+	return NULL_TREE;
       shiftrt = tree_to_shwi (shift);
       if (shiftrt <= 0)
-	return 0;
+	return NULL_TREE;
     }
 
   if (tree t = is_cast_p (&exp))
     if (!outer_type)
       outer_type = t;
 
+  exp = follow_load (exp, load);
+
   poly_int64 poly_bitsize, poly_bitpos;
   inner = get_inner_reference (exp, &poly_bitsize, &poly_bitpos, &offset,
 			       pmode, punsignedp, preversep, pvolatilep);
@@ -7487,6 +7516,30 @@ compute_split_boundary_from_align (HOST_WIDE_INT align,
   return boundary;
 }
 
+/* Make a bit_field_ref.  If POINT is NULL, return the BIT_FIELD_REF.
+   Otherwise, build and insert a load stmt before POINT, and return
+   the SSA_NAME.  ???  Rewrite LOAD in terms of the bitfield?  */
+
+static tree
+make_bit_field_load (location_t loc, tree inner, tree orig_inner, tree type,
+		     HOST_WIDE_INT bitsize, poly_int64 bitpos,
+		     int unsignedp, int reversep, gimple *point)
+{
+  tree ref = make_bit_field_ref (loc, inner, orig_inner,
+				 type, bitsize, bitpos,
+				 unsignedp, reversep);
+  if (!point)
+    return ref;
+
+  tree tmp = make_ssa_name (TREE_TYPE (ref));
+  gimple_stmt_iterator gsi = gsi_for_stmt (point);
+  gsi_insert_before (&gsi,
+		     gimple_build_assign (tmp, ref),
+		     GSI_NEW_STMT);
+
+  return tmp;
+}
+
 /* Initialize ln_arg[0] and ln_arg[1] to a pair of newly-created (at
    LOC) loads from INNER (from ORIG_INNER), of modes MODE and MODE2,
    respectively, starting at BIT_POS, using reversed endianness if
@@ -7506,7 +7559,8 @@ build_split_load (tree /* out */ ln_arg[2],
 		  HOST_WIDE_INT /* out */ shifted[2],
 		  location_t loc, tree inner, tree orig_inner,
 		  scalar_int_mode mode, scalar_int_mode mode2,
-		  HOST_WIDE_INT bit_pos, bool reversep)
+		  HOST_WIDE_INT bit_pos, bool reversep,
+		  gimple *point[2])
 {
   bitsiz[0] = GET_MODE_BITSIZE (mode);
   bitsiz[1] = GET_MODE_BITSIZE (mode2);
@@ -7515,9 +7569,9 @@ build_split_load (tree /* out */ ln_arg[2],
     {
       tree type = lang_hooks.types.type_for_size (bitsiz[i], 1);
       bitpos[i] = bit_pos;
-      ln_arg[i] = make_bit_field_ref (loc, inner, orig_inner,
+      ln_arg[i] = make_bit_field_load (loc, inner, orig_inner,
 				      type, bitsiz[i],
-				      bit_pos, 1, reversep);
+				       bit_pos, 1, reversep, point[i]);
       bit_pos += bitsiz[i];
     }
 
@@ -7578,6 +7632,87 @@ reuse_split_load (tree /* in[0] out[1] */ ln_arg[2],
     }
 }
 
+/* Return nonzero if LSTMT and RSTMT, assumed to load from the same
+   words, can be safely merged into a single load.  Return zero if
+   there are intervening stores, or if neither stmt dominates the
+   other.  If they are mergeable, return -1 if the merged load should
+   be inserted before LSTMT to dominate both, otherwise return +1.  */
+
+static int
+mergeable_loads_p (gimple *lstmt, gimple *rstmt)
+{
+  gimple *stmts[2] = { lstmt, rstmt };
+  int ret;
+
+  if (stmt_dominates_stmt_p (lstmt, rstmt))
+    {
+      ret = -1;
+      stmts[0] = lstmt;
+      stmts[1] = rstmt;
+    }
+  else if (stmt_dominates_stmt_p (rstmt, lstmt))
+    {
+      ret = 1;
+      stmts[1] = lstmt;
+      stmts[0] = rstmt;
+    }
+  else
+    return 0;
+
+  basic_block bbs[2] = { gimple_bb (stmts[0]), gimple_bb (stmts[1]) };
+  if (bbs[0] == bbs[1])
+    {
+      for (gimple_stmt_iterator gsi = gsi_for_stmt (stmts[0]);
+	   gsi_stmt (gsi) != stmts[1]; gsi_next (&gsi))
+	/* ??? For now, stop if we find any intervening vdefs.  */
+	if (gimple_vdef (gsi_stmt (gsi)))
+	  return 0;
+      return ret;
+    }
+
+  for (gimple_stmt_iterator gsi = gsi_for_stmt (stmts[0]);
+       !gsi_end_p (gsi); gsi_next (&gsi))
+    /* ??? For now, stop if we find any intervening vdefs.  */
+    if (gimple_vdef (gsi_stmt (gsi)))
+      return 0;
+
+  for (gphi_iterator gpi = gsi_start_phis (bbs[1]);
+       !gsi_end_p (gpi); gsi_next (&gpi))
+    /* ??? For now, stop if we find any intervening vdefs.  */
+    if (gimple_vdef (gsi_stmt (gpi)))
+      return 0;
+
+  for (gimple_stmt_iterator gsi = gsi_for_stmt (stmts[1]);
+       !gsi_end_p (gsi); gsi_prev (&gsi))
+    /* ??? For now, stop if we find any intervening vdefs.  */
+    if (gimple_vdef (gsi_stmt (gsi)))
+      return 0;
+
+  for (basic_block bb = bbs[1]; bb != bbs[0]; )
+    {
+      /* ??? For now, stop if any visited block has more than one
+	 predecessor.  */
+      if (!single_pred_p (bb))
+	return 0;
+
+      bb = single_pred (bb);
+
+      for (gphi_iterator gpi = gsi_start_phis (bbs[1]);
+	   !gsi_end_p (gpi); gsi_next (&gpi))
+	/* ??? For now, stop if we find any intervening vdefs.  */
+	if (gimple_vdef (gsi_stmt (gpi)))
+	  return 0;
+
+      for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
+	   !gsi_end_p (gsi); gsi_next (&gsi))
+	/* ??? For now, stop if we find any intervening vdefs.  */
+	if (gimple_vdef (gsi_stmt (gsi)))
+	  return 0;
+    }
+
+  return ret;
+}
+
 /* Find ways of folding logical expressions of LHS and RHS:
    Try to merge two comparisons to the same innermost item.
    Look for range tests like "ch >= '0' && ch <= '9'".
@@ -7629,6 +7764,8 @@ fold_truth_andor_maybe_separate (location_t loc,
   enum tree_code orig_code = code;
   enum tree_code wanted_code;
   tree ll_inner, lr_inner, rl_inner, rr_inner;
+  gimple *ll_load, *lr_load, *rl_load, *rr_load;
+  int l_mergeable, r_mergeable;
   HOST_WIDE_INT ll_bitsize, ll_bitpos, lr_bitsize, lr_bitpos;
   HOST_WIDE_INT rl_bitsize, rl_bitpos, rr_bitsize, rr_bitpos;
   HOST_WIDE_INT xll_bitpos, xlr_bitpos, xrl_bitpos, xrr_bitpos;
@@ -7693,20 +7830,24 @@ fold_truth_andor_maybe_separate (location_t loc,
   ll_inner = decode_field_reference (loc, &ll_arg,
 				     &ll_bitsize, &ll_bitpos, &ll_mode,
 				     &ll_unsignedp, &ll_reversep, &volatilep,
-				     &ll_mask, &ll_and_mask, l_xor);
+				     &ll_mask, &ll_and_mask, l_xor,
+				     &ll_load);
   lr_inner = decode_field_reference (loc, &lr_arg,
 				     &lr_bitsize, &lr_bitpos, &lr_mode,
 				     &lr_unsignedp, &lr_reversep, &volatilep,
-				     &lr_mask, &lr_and_mask, 2 * l_xor);
+				     &lr_mask, &lr_and_mask, 2 * l_xor,
+				     &lr_load);
   int r_xor = prepare_xor (rl_arg, &rr_arg);
   rl_inner = decode_field_reference (loc, &rl_arg,
 				     &rl_bitsize, &rl_bitpos, &rl_mode,
 				     &rl_unsignedp, &rl_reversep, &volatilep,
-				     &rl_mask, &rl_and_mask, r_xor);
+				     &rl_mask, &rl_and_mask, r_xor,
+				     &rl_load);
   rr_inner = decode_field_reference (loc, &rr_arg,
 				     &rr_bitsize, &rr_bitpos, &rr_mode,
 				     &rr_unsignedp, &rr_reversep, &volatilep,
-				     &rr_mask, &rr_and_mask, 2 * r_xor);
+				     &rr_mask, &rr_and_mask, 2 * r_xor,
+				     &rr_load);
 
   /* It must be true that the inner operation on the lhs of each
      comparison must be the same if we are to be able to do anything.
@@ -7715,7 +7856,8 @@ fold_truth_andor_maybe_separate (location_t loc,
   if (volatilep
       || ll_reversep != rl_reversep
       || ll_inner == 0 || rl_inner == 0
-      || ! operand_equal_p (ll_inner, rl_inner, 0))
+      || ! operand_equal_p (ll_inner, rl_inner, 0)
+      || ! (l_mergeable = mergeable_loads_p (ll_load, rl_load)))
     return 0;
 
   if (TREE_CODE (lr_arg) == INTEGER_CST
@@ -7723,13 +7865,18 @@ fold_truth_andor_maybe_separate (location_t loc,
     {
       l_const = lr_arg, r_const = rr_arg;
       lr_reversep = ll_reversep;
+      r_mergeable = 0;
     }
   else if (lr_reversep != rr_reversep
 	   || lr_inner == 0 || rr_inner == 0
-	   || ! operand_equal_p (lr_inner, rr_inner, 0))
+	   || ! operand_equal_p (lr_inner, rr_inner, 0)
+	   || ! (r_mergeable = mergeable_loads_p (lr_load, rr_load)))
     return 0;
   else
-    l_const = r_const = 0;
+    {
+      l_const = r_const = 0;
+      r_mergeable = 0;
+    }
 
   if (lsignbit)
     {
@@ -8035,18 +8182,38 @@ fold_truth_andor_maybe_separate (location_t loc,
 	{
 	  bitpos[1][0] = rnbitpos;
 	  bitsiz[1][0] = rnbitsize;
-	  ld_arg[1][0] = make_bit_field_ref (loc, lr_inner, lr_arg,
-					     rntype, rnbitsize, rnbitpos,
-					     lr_unsignedp || rr_unsignedp,
-					     lr_reversep);
+	  ld_arg[1][0] = make_bit_field_load (loc, lr_inner, lr_arg,
+					      rntype, rnbitsize, rnbitpos,
+					      lr_unsignedp || rr_unsignedp,
+					      lr_reversep,
+					      r_mergeable == 0
+					      ? NULL
+					      : r_mergeable < 0
+					      ? lr_load
+					      : rr_load);
 	}
 
       if (parts > 1)
 	{
 	  if (r_split_load)
-	    build_split_load (ld_arg[1], bitpos[1], bitsiz[1], toshift[1],
-			      shifted[1], loc, lr_inner, lr_arg,
-			      rnmode, rnmode2, rnbitpos, lr_reversep);
+	    {
+	      gimple *point[2];
+	      if (r_mergeable == 0)
+		point[0] = point[1] = NULL;
+	      else if (r_mergeable < 0)
+		{
+		  point[0] = lr_load;
+		  point[1] = rr_load;
+		}
+	      else
+		{
+		  point[0] = rr_load;
+		  point[1] = lr_load;
+		}
+	      build_split_load (ld_arg[1], bitpos[1], bitsiz[1], toshift[1],
+				shifted[1], loc, lr_inner, lr_arg,
+				rnmode, rnmode2, rnbitpos, lr_reversep, point);
+	    }
 	  else
 	    reuse_split_load (ld_arg[1], bitpos[1], bitsiz[1], toshift[1],
 			      shifted[1], xmask[1],
@@ -8118,18 +8285,38 @@ fold_truth_andor_maybe_separate (location_t loc,
     {
       bitpos[0][0] = lnbitpos;
       bitsiz[0][0] = lnbitsize;
-      ld_arg[0][0] = make_bit_field_ref (loc, ll_inner, ll_arg,
-					 lntype, lnbitsize, lnbitpos,
-					 ll_unsignedp || rl_unsignedp,
-					 ll_reversep);
+      ld_arg[0][0] = make_bit_field_load (loc, ll_inner, ll_arg,
+					  lntype, lnbitsize, lnbitpos,
+					  ll_unsignedp || rl_unsignedp,
+					  ll_reversep,
+					  l_mergeable == 0
+					  ? NULL
+					  : l_mergeable < 0
+					  ? ll_load
+					  : rl_load);
     }
 
   if (parts > 1)
     {
       if (l_split_load)
-	build_split_load (ld_arg[0], bitpos[0], bitsiz[0], toshift[0],
-			  shifted[0], loc, ll_inner, ll_arg,
-			  lnmode, lnmode2, lnbitpos, ll_reversep);
+	    {
+	      gimple *point[2];
+	      if (l_mergeable == 0)
+		point[0] = point[1] = NULL;
+	      else if (l_mergeable < 0)
+		{
+		  point[0] = ll_load;
+		  point[1] = rl_load;
+		}
+	      else
+		{
+		  point[0] = rl_load;
+		  point[1] = ll_load;
+		}
+	      build_split_load (ld_arg[0], bitpos[0], bitsiz[0], toshift[0],
+				shifted[0], loc, ll_inner, ll_arg,
+				lnmode, lnmode2, lnbitpos, ll_reversep, point);
+	    }
       else
 	reuse_split_load (ld_arg[0], bitpos[0], bitsiz[0], toshift[0],
 			  shifted[0], xmask[0],

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [gcc(refs/users/aoliva/heads/testme)] check for mergeable loads, choose insertion points accordingly
@ 2023-07-27  8:59 Alexandre Oliva
  0 siblings, 0 replies; 9+ messages in thread
From: Alexandre Oliva @ 2023-07-27  8:59 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:97f45a4cd2b6ea461c878401ab5a8c544b5d22b9

commit 97f45a4cd2b6ea461c878401ab5a8c544b5d22b9
Author: Alexandre Oliva <oliva@gnu.org>
Date:   Thu Jul 27 05:15:20 2023 -0300

    check for mergeable loads, choose insertion points accordingly

Diff:
---
 gcc/gimple-fold.cc | 259 +++++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 224 insertions(+), 35 deletions(-)

diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
index f65922309fb..a3665c04d50 100644
--- a/gcc/gimple-fold.cc
+++ b/gcc/gimple-fold.cc
@@ -69,6 +69,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "varasm.h"
 #include "internal-fn.h"
 #include "gimple-range.h"
+#include "tree-ssa-loop-niter.h" // stmt_dominates_stmt_p
 
 /* ??? Move this to some header, it's defined in fold-const.c.  */
 extern tree
@@ -7080,7 +7081,7 @@ maybe_fold_comparisons_from_match_pd (tree type, enum tree_code code,
    Same as ssa_is_replaceable_p, except that we don't insist it has a
    single use.  */
 
-bool
+static bool
 ssa_is_substitutable_p (gimple *stmt)
 {
 #if 0
@@ -7161,9 +7162,10 @@ is_cast_p (tree *name)
       if (gimple_num_ops (def) != 2)
 	break;
 
-      if (get_gimple_rhs_class (gimple_expr_code (def))
-	  == GIMPLE_SINGLE_RHS)
+      if (gimple_assign_single_p (def))
 	{
+	  if (gimple_assign_load_p (def))
+	    break;
 	  *name = gimple_assign_rhs1 (def);
 	  continue;
 	}
@@ -7200,8 +7202,7 @@ is_binop_p (enum tree_code code, tree *name)
 	  return 0;
 
 	case 2:
-	  if (get_gimple_rhs_class (gimple_expr_code (def))
-	      == GIMPLE_SINGLE_RHS)
+	  if (gimple_assign_single_p (def) && !gimple_assign_load_p (def))
 	    {
 	      *name = gimple_assign_rhs1 (def);
 	      continue;
@@ -7209,7 +7210,7 @@ is_binop_p (enum tree_code code, tree *name)
 	  return 0;
 
 	case 3:
-	  ;
+	  break;
 	}
 
       if (gimple_assign_rhs_code (def) != code)
@@ -7254,6 +7255,26 @@ prepare_xor (tree l_arg, tree *r_arg)
   return ret;
 }
 
+/* If EXP is a SSA_NAME whose DEF is a load stmt, set *LOAD to it and
+   return its RHS, otherwise return EXP.  */
+
+static tree
+follow_load (tree exp, gimple **load)
+{
+  if (TREE_CODE (exp) == SSA_NAME
+      && !SSA_NAME_IS_DEFAULT_DEF (exp))
+    {
+      gimple *def = SSA_NAME_DEF_STMT (exp);
+      if (gimple_assign_load_p (def))
+	{
+	  *load = def;
+	  gimple_assign_rhs1 (def);
+	}
+    }
+
+  return exp;
+}
+
 /* Subroutine for fold_truth_andor_1: decode a field reference.
 
    If EXP is a comparison reference, we return the innermost reference.
@@ -7280,6 +7301,9 @@ prepare_xor (tree l_arg, tree *r_arg)
    BIT_XOR_EXPR compared with zero.  We're to take the first or second
    operand thereof if so.  It should be zero otherwise.
 
+   *LOAD is set to the load stmt of the innermost reference, if any,
+   *and NULL otherwise.
+
    Return 0 if this is not a component reference or is one that we can't
    do anything with.  */
 
@@ -7287,7 +7311,8 @@ static tree
 decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
 			HOST_WIDE_INT *pbitpos, machine_mode *pmode,
 			int *punsignedp, int *preversep, int *pvolatilep,
-			tree *pmask, tree *pand_mask, int xor_which)
+			tree *pmask, tree *pand_mask, int xor_which,
+			gimple **load)
 {
   tree exp = *exp_;
   tree outer_type = 0;
@@ -7297,11 +7322,13 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
   unsigned int precision;
   HOST_WIDE_INT shiftrt = 0;
 
+  *load = NULL;
+
   /* All the optimizations using this function assume integer fields.
      There are problems with FP fields since the type_for_size call
      below can fail for, e.g., XFmode.  */
   if (! INTEGRAL_TYPE_P (TREE_TYPE (exp)))
-    return 0;
+    return NULL_TREE;
 
   /* We are interested in the bare arrangement of bits, so strip everything
      that doesn't affect the machine mode.  However, record the type of the
@@ -7311,7 +7338,7 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
   if ((and_mask = is_binop_p (BIT_AND_EXPR, &exp)))
     {
       if (TREE_CODE (and_mask) != INTEGER_CST)
-	return 0;
+	return NULL_TREE;
     }
 
   if (xor_which)
@@ -7329,16 +7356,18 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
   if (tree shift = is_binop_p (RSHIFT_EXPR, &exp))
     {
       if (TREE_CODE (shift) != INTEGER_CST || !tree_fits_shwi_p (shift))
-	return 0;
+	return NULL_TREE;
       shiftrt = tree_to_shwi (shift);
       if (shiftrt <= 0)
-	return 0;
+	return NULL_TREE;
     }
 
   if (tree t = is_cast_p (&exp))
     if (!outer_type)
       outer_type = t;
 
+  exp = follow_load (exp, load);
+
   poly_int64 poly_bitsize, poly_bitpos;
   inner = get_inner_reference (exp, &poly_bitsize, &poly_bitpos, &offset,
 			       pmode, punsignedp, preversep, pvolatilep);
@@ -7487,6 +7516,32 @@ compute_split_boundary_from_align (HOST_WIDE_INT align,
   return boundary;
 }
 
+/* Make a bit_field_ref.  If POINT is NULL, return the BIT_FIELD_REF.
+   Otherwise, build and insert a load stmt before POINT, and return
+   the SSA_NAME.  ???  Rewrite LOAD in terms of the bitfield?  */
+
+static tree
+make_bit_field_load (location_t loc, tree inner, tree orig_inner, tree type,
+		     HOST_WIDE_INT bitsize, poly_int64 bitpos,
+		     int unsignedp, int reversep, gimple *point)
+{
+  tree ref = make_bit_field_ref (loc, inner, orig_inner,
+				 type, bitsize, bitpos,
+				 unsignedp, reversep);
+  if (!point)
+    return ref;
+
+  tree tmp = make_ssa_name (TREE_TYPE (ref));
+  gimple_stmt_iterator gsi = gsi_for_stmt (r_mergeable < 0
+					   ? lr_load
+					   : rr_load);
+  gsi_insert_before (point,
+		     gimple_build_assign (tmp, ref),
+		     GSI_NEW_STMT);
+
+  return tmp;
+}
+
 /* Initialize ln_arg[0] and ln_arg[1] to a pair of newly-created (at
    LOC) loads from INNER (from ORIG_INNER), of modes MODE and MODE2,
    respectively, starting at BIT_POS, using reversed endianness if
@@ -7506,7 +7561,8 @@ build_split_load (tree /* out */ ln_arg[2],
 		  HOST_WIDE_INT /* out */ shifted[2],
 		  location_t loc, tree inner, tree orig_inner,
 		  scalar_int_mode mode, scalar_int_mode mode2,
-		  HOST_WIDE_INT bit_pos, bool reversep)
+		  HOST_WIDE_INT bit_pos, bool reversep,
+		  gimple *point[2])
 {
   bitsiz[0] = GET_MODE_BITSIZE (mode);
   bitsiz[1] = GET_MODE_BITSIZE (mode2);
@@ -7515,9 +7571,9 @@ build_split_load (tree /* out */ ln_arg[2],
     {
       tree type = lang_hooks.types.type_for_size (bitsiz[i], 1);
       bitpos[i] = bit_pos;
-      ln_arg[i] = make_bit_field_ref (loc, inner, orig_inner,
+      ln_arg[i] = make_bit_field_load (loc, inner, orig_inner,
 				      type, bitsiz[i],
-				      bit_pos, 1, reversep);
+				       bit_pos, 1, reversep, point[i]);
       bit_pos += bitsiz[i];
     }
 
@@ -7578,6 +7634,87 @@ reuse_split_load (tree /* in[0] out[1] */ ln_arg[2],
     }
 }
 
+/* Return nonzero if LSTMT and RSTMT, assumed to load from the same
+   words, can be safely merged into a single load.  Return zero if
+   there are intervening stores, or if neither stmt dominates the
+   other.  If they are mergeable, return -1 if the merged load should
+   be inserted before LSTMT to dominate both, otherwise return +1.  */
+
+static int
+mergeable_loads_p (gimple *lstmt, gimple *rstmt)
+{
+  gimple *stmts[2] = { lstmt, rstmt };
+  int ret;
+
+  if (stmt_dominates_stmt_p (lstmt, rstmt))
+    {
+      ret = -1;
+      stmts[0] = lstmt;
+      stmts[1] = rstmt;
+    }
+  else if (stmt_dominates_stmt_p (rstmt, lstmt))
+    {
+      ret = 1;
+      stmts[1] = lstmt;
+      stmts[0] = rstmt;
+    }
+  else
+    return 0;
+
+  basic_block bbs[2] = { gimple_bb (stmts[0]), gimple_bb (stmts[1]) };
+  if (bbs[0] == bbs[1])
+    {
+      for (gimple_stmt_iterator gsi = gsi_for_stmt (stmts[0]);
+	   gsi_stmt (gsi) != stmts[1]; gsi_next (&gsi))
+	/* ??? For now, stop if we find any intervening vdefs.  */
+	if (gimple_vdef (gsi_stmt (gsi)))
+	  return 0;
+      return ret;
+    }
+
+  for (gimple_stmt_iterator gsi = gsi_for_stmt (stmts[0]);
+       !gsi_end_p (gsi); gsi_next (&gsi))
+    /* ??? For now, stop if we find any intervening vdefs.  */
+    if (gimple_vdef (gsi_stmt (gsi)))
+      return 0;
+
+  for (gphi_iterator gpi = gsi_start_phis (bbs[1]);
+       !gsi_end_p (gpi); gsi_next (&gpi))
+    /* ??? For now, stop if we find any intervening vdefs.  */
+    if (gimple_vdef (gsi_stmt (gpi)))
+      return 0;
+
+  for (gimple_stmt_iterator gsi = gsi_for_stmt (stmts[1]);
+       !gsi_end_p (gsi); gsi_prev (&gsi))
+    /* ??? For now, stop if we find any intervening vdefs.  */
+    if (gimple_vdef (gsi_stmt (gsi)))
+      return 0;
+
+  for (basic_block bb = bbs[1]; bb != bbs[0]; )
+    {
+      /* ??? For now, stop if any visited block has more than one
+	 predecessor.  */
+      if (!single_pred_p (bb))
+	return 0;
+
+      bb = single_pred (bb);
+
+      for (gphi_iterator gpi = gsi_start_phis (bbs[1]);
+	   !gsi_end_p (gpi); gsi_next (&gpi))
+	/* ??? For now, stop if we find any intervening vdefs.  */
+	if (gimple_vdef (gsi_stmt (gpi)))
+	  return 0;
+
+      for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
+	   !gsi_end_p (gsi); gsi_next (&gsi))
+	/* ??? For now, stop if we find any intervening vdefs.  */
+	if (gimple_vdef (gsi_stmt (gsi)))
+	  return 0;
+    }
+
+  return ret;
+}
+
 /* Find ways of folding logical expressions of LHS and RHS:
    Try to merge two comparisons to the same innermost item.
    Look for range tests like "ch >= '0' && ch <= '9'".
@@ -7629,6 +7766,8 @@ fold_truth_andor_maybe_separate (location_t loc,
   enum tree_code orig_code = code;
   enum tree_code wanted_code;
   tree ll_inner, lr_inner, rl_inner, rr_inner;
+  gimple *ll_load, *lr_load, *rl_load, *rr_load;
+  int l_mergeable, r_mergeable;
   HOST_WIDE_INT ll_bitsize, ll_bitpos, lr_bitsize, lr_bitpos;
   HOST_WIDE_INT rl_bitsize, rl_bitpos, rr_bitsize, rr_bitpos;
   HOST_WIDE_INT xll_bitpos, xlr_bitpos, xrl_bitpos, xrr_bitpos;
@@ -7693,20 +7832,24 @@ fold_truth_andor_maybe_separate (location_t loc,
   ll_inner = decode_field_reference (loc, &ll_arg,
 				     &ll_bitsize, &ll_bitpos, &ll_mode,
 				     &ll_unsignedp, &ll_reversep, &volatilep,
-				     &ll_mask, &ll_and_mask, l_xor);
+				     &ll_mask, &ll_and_mask, l_xor,
+				     &ll_load);
   lr_inner = decode_field_reference (loc, &lr_arg,
 				     &lr_bitsize, &lr_bitpos, &lr_mode,
 				     &lr_unsignedp, &lr_reversep, &volatilep,
-				     &lr_mask, &lr_and_mask, 2 * l_xor);
+				     &lr_mask, &lr_and_mask, 2 * l_xor,
+				     &lr_load);
   int r_xor = prepare_xor (rl_arg, &rr_arg);
   rl_inner = decode_field_reference (loc, &rl_arg,
 				     &rl_bitsize, &rl_bitpos, &rl_mode,
 				     &rl_unsignedp, &rl_reversep, &volatilep,
-				     &rl_mask, &rl_and_mask, r_xor);
+				     &rl_mask, &rl_and_mask, r_xor,
+				     &rl_load);
   rr_inner = decode_field_reference (loc, &rr_arg,
 				     &rr_bitsize, &rr_bitpos, &rr_mode,
 				     &rr_unsignedp, &rr_reversep, &volatilep,
-				     &rr_mask, &rr_and_mask, 2 * r_xor);
+				     &rr_mask, &rr_and_mask, 2 * r_xor,
+				     &rr_load);
 
   /* It must be true that the inner operation on the lhs of each
      comparison must be the same if we are to be able to do anything.
@@ -7715,7 +7858,8 @@ fold_truth_andor_maybe_separate (location_t loc,
   if (volatilep
       || ll_reversep != rl_reversep
       || ll_inner == 0 || rl_inner == 0
-      || ! operand_equal_p (ll_inner, rl_inner, 0))
+      || ! operand_equal_p (ll_inner, rl_inner, 0)
+      || ! (l_mergeable = mergeable_loads_p (ll_load, rl_load)))
     return 0;
 
   if (TREE_CODE (lr_arg) == INTEGER_CST
@@ -7723,13 +7867,18 @@ fold_truth_andor_maybe_separate (location_t loc,
     {
       l_const = lr_arg, r_const = rr_arg;
       lr_reversep = ll_reversep;
+      r_mergeable = 0;
     }
   else if (lr_reversep != rr_reversep
 	   || lr_inner == 0 || rr_inner == 0
-	   || ! operand_equal_p (lr_inner, rr_inner, 0))
+	   || ! operand_equal_p (lr_inner, rr_inner, 0)
+	   || ! (r_mergeable = mergeable_loads_p (lr_load, rr_load)))
     return 0;
   else
-    l_const = r_const = 0;
+    {
+      l_const = r_const = 0;
+      r_mergeable = 0;
+    }
 
   if (lsignbit)
     {
@@ -8035,18 +8184,38 @@ fold_truth_andor_maybe_separate (location_t loc,
 	{
 	  bitpos[1][0] = rnbitpos;
 	  bitsiz[1][0] = rnbitsize;
-	  ld_arg[1][0] = make_bit_field_ref (loc, lr_inner, lr_arg,
-					     rntype, rnbitsize, rnbitpos,
-					     lr_unsignedp || rr_unsignedp,
-					     lr_reversep);
+	  ld_arg[1][0] = make_bit_field_load (loc, lr_inner, lr_arg,
+					      rntype, rnbitsize, rnbitpos,
+					      lr_unsignedp || rr_unsignedp,
+					      lr_reversep,
+					      r_mergeable == 0
+					      ? NULL
+					      : r_mergeable < 0
+					      ? lr_load
+					      : rr_load);
 	}
 
       if (parts > 1)
 	{
 	  if (r_split_load)
-	    build_split_load (ld_arg[1], bitpos[1], bitsiz[1], toshift[1],
-			      shifted[1], loc, lr_inner, lr_arg,
-			      rnmode, rnmode2, rnbitpos, lr_reversep);
+	    {
+	      gimple *point[2];
+	      if (r_mergeable == 0)
+		point[0] = point[1] = NULL;
+	      else if (r_mergeable < 0)
+		{
+		  point[0] = lr_load;
+		  point[1] = rr_load;
+		}
+	      else
+		{
+		  point[0] = rr_load;
+		  point[1] = lr_load;
+		}
+	      build_split_load (ld_arg[1], bitpos[1], bitsiz[1], toshift[1],
+				shifted[1], loc, lr_inner, lr_arg,
+				rnmode, rnmode2, rnbitpos, lr_reversep, point);
+	    }
 	  else
 	    reuse_split_load (ld_arg[1], bitpos[1], bitsiz[1], toshift[1],
 			      shifted[1], xmask[1],
@@ -8118,18 +8287,38 @@ fold_truth_andor_maybe_separate (location_t loc,
     {
       bitpos[0][0] = lnbitpos;
       bitsiz[0][0] = lnbitsize;
-      ld_arg[0][0] = make_bit_field_ref (loc, ll_inner, ll_arg,
-					 lntype, lnbitsize, lnbitpos,
-					 ll_unsignedp || rl_unsignedp,
-					 ll_reversep);
+      ld_arg[0][0] = make_bit_field_load (loc, ll_inner, ll_arg,
+					  lntype, lnbitsize, lnbitpos,
+					  ll_unsignedp || rl_unsignedp,
+					  ll_reversep,
+					  l_mergeable == 0
+					  ? NULL
+					  : l_mergeable < 0
+					  ? ll_load
+					  : rl_load);
     }
 
   if (parts > 1)
     {
       if (l_split_load)
-	build_split_load (ld_arg[0], bitpos[0], bitsiz[0], toshift[0],
-			  shifted[0], loc, ll_inner, ll_arg,
-			  lnmode, lnmode2, lnbitpos, ll_reversep);
+	    {
+	      gimple *point[2];
+	      if (l_mergeable == 0)
+		point[0] = point[1] = NULL;
+	      else if (l_mergeable < 0)
+		{
+		  point[0] = ll_load;
+		  point[1] = rl_load;
+		}
+	      else
+		{
+		  point[0] = rl_load;
+		  point[1] = ll_load;
+		}
+	      build_split_load (ld_arg[0], bitpos[0], bitsiz[0], toshift[0],
+				shifted[0], loc, ll_inner, ll_arg,
+				lnmode, lnmode2, lnbitpos, ll_reversep, point);
+	    }
       else
 	reuse_split_load (ld_arg[0], bitpos[0], bitsiz[0], toshift[0],
 			  shifted[0], xmask[0],

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [gcc(refs/users/aoliva/heads/testme)] check for mergeable loads, choose insertion points accordingly
@ 2023-07-27  8:28 Alexandre Oliva
  0 siblings, 0 replies; 9+ messages in thread
From: Alexandre Oliva @ 2023-07-27  8:28 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:ae97b6fb7279db3dd83041e6ea9d9be296d5a3aa

commit ae97b6fb7279db3dd83041e6ea9d9be296d5a3aa
Author: Alexandre Oliva <oliva@gnu.org>
Date:   Thu Jul 27 05:15:20 2023 -0300

    check for mergeable loads, choose insertion points accordingly

Diff:
---
 gcc/gimple-fold.cc | 259 +++++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 224 insertions(+), 35 deletions(-)

diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
index f65922309fb..03fd402a99f 100644
--- a/gcc/gimple-fold.cc
+++ b/gcc/gimple-fold.cc
@@ -69,6 +69,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "varasm.h"
 #include "internal-fn.h"
 #include "gimple-range.h"
+#include "tree-ssa-loop-inter.h" // stmt_dominats_stmt_p
 
 /* ??? Move this to some header, it's defined in fold-const.c.  */
 extern tree
@@ -7080,7 +7081,7 @@ maybe_fold_comparisons_from_match_pd (tree type, enum tree_code code,
    Same as ssa_is_replaceable_p, except that we don't insist it has a
    single use.  */
 
-bool
+static bool
 ssa_is_substitutable_p (gimple *stmt)
 {
 #if 0
@@ -7161,9 +7162,10 @@ is_cast_p (tree *name)
       if (gimple_num_ops (def) != 2)
 	break;
 
-      if (get_gimple_rhs_class (gimple_expr_code (def))
-	  == GIMPLE_SINGLE_RHS)
+      if (gimple_assign_single_p (def))
 	{
+	  if (gimple_assign_load_p (def))
+	    break;
 	  *name = gimple_assign_rhs1 (def);
 	  continue;
 	}
@@ -7200,8 +7202,7 @@ is_binop_p (enum tree_code code, tree *name)
 	  return 0;
 
 	case 2:
-	  if (get_gimple_rhs_class (gimple_expr_code (def))
-	      == GIMPLE_SINGLE_RHS)
+	  if (gimple_assign_single_p (def) && !gimple_assign_load_p (def))
 	    {
 	      *name = gimple_assign_rhs1 (def);
 	      continue;
@@ -7209,7 +7210,7 @@ is_binop_p (enum tree_code code, tree *name)
 	  return 0;
 
 	case 3:
-	  ;
+	  break;
 	}
 
       if (gimple_assign_rhs_code (def) != code)
@@ -7254,6 +7255,26 @@ prepare_xor (tree l_arg, tree *r_arg)
   return ret;
 }
 
+/* If EXP is a SSA_NAME whose DEF is a load stmt, set *LOAD to it and
+   return its RHS, otherwise return EXP.  */
+
+static tree
+follow_load (tree exp, gimple **load)
+{
+  if (TREE_CODE (exp) == SSA_NAME
+      && !SSA_NAME_IS_DEFAULT_DEF (exp))
+    {
+      gimple *def = SSA_NAME_DEF_STMT (exp);
+      if (gimple_assign_load_p (def))
+	{
+	  *load = def;
+	  gimple_assign_rhs1 (def);
+	}
+    }
+
+  return exp;
+}
+
 /* Subroutine for fold_truth_andor_1: decode a field reference.
 
    If EXP is a comparison reference, we return the innermost reference.
@@ -7280,6 +7301,9 @@ prepare_xor (tree l_arg, tree *r_arg)
    BIT_XOR_EXPR compared with zero.  We're to take the first or second
    operand thereof if so.  It should be zero otherwise.
 
+   *LOAD is set to the load stmt of the innermost reference, if any,
+   *and NULL otherwise.
+
    Return 0 if this is not a component reference or is one that we can't
    do anything with.  */
 
@@ -7287,7 +7311,8 @@ static tree
 decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
 			HOST_WIDE_INT *pbitpos, machine_mode *pmode,
 			int *punsignedp, int *preversep, int *pvolatilep,
-			tree *pmask, tree *pand_mask, int xor_which)
+			tree *pmask, tree *pand_mask, int xor_which,
+			gimple **load)
 {
   tree exp = *exp_;
   tree outer_type = 0;
@@ -7297,11 +7322,13 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
   unsigned int precision;
   HOST_WIDE_INT shiftrt = 0;
 
+  *load = NULL;
+
   /* All the optimizations using this function assume integer fields.
      There are problems with FP fields since the type_for_size call
      below can fail for, e.g., XFmode.  */
   if (! INTEGRAL_TYPE_P (TREE_TYPE (exp)))
-    return 0;
+    return NULL_TREE;
 
   /* We are interested in the bare arrangement of bits, so strip everything
      that doesn't affect the machine mode.  However, record the type of the
@@ -7311,7 +7338,7 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
   if ((and_mask = is_binop_p (BIT_AND_EXPR, &exp)))
     {
       if (TREE_CODE (and_mask) != INTEGER_CST)
-	return 0;
+	return NULL_TREE;
     }
 
   if (xor_which)
@@ -7329,16 +7356,18 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
   if (tree shift = is_binop_p (RSHIFT_EXPR, &exp))
     {
       if (TREE_CODE (shift) != INTEGER_CST || !tree_fits_shwi_p (shift))
-	return 0;
+	return NULL_TREE;
       shiftrt = tree_to_shwi (shift);
       if (shiftrt <= 0)
-	return 0;
+	return NULL_TREE;
     }
 
   if (tree t = is_cast_p (&exp))
     if (!outer_type)
       outer_type = t;
 
+  exp = follow_load (exp, load);
+
   poly_int64 poly_bitsize, poly_bitpos;
   inner = get_inner_reference (exp, &poly_bitsize, &poly_bitpos, &offset,
 			       pmode, punsignedp, preversep, pvolatilep);
@@ -7487,6 +7516,32 @@ compute_split_boundary_from_align (HOST_WIDE_INT align,
   return boundary;
 }
 
+/* Make a bit_field_ref.  If POINT is NULL, return the BIT_FIELD_REF.
+   Otherwise, build and insert a load stmt before POINT, and return
+   the SSA_NAME.  ???  Rewrite LOAD in terms of the bitfield?  */
+
+static tree
+make_bit_field_load (location_t loc, tree inner, tree orig_inner, tree type,
+		     HOST_WIDE_INT bitsize, poly_int64 bitpos,
+		     int unsignedp, int reversep, gimple *point)
+{
+  tree ref = make_bit_field_ref (loc, inner, orig_inner,
+				 type, bitsize, bitpos,
+				 unsignedp, reversep);
+  if (!point)
+    return ref;
+
+  tree tmp = make_ssa_name (TREE_TYPE (ref));
+  gimple_stmt_iterator gsi = gsi_for_stmt (r_mergeable < 0
+					   ? lr_load
+					   : rr_load);
+  gsi_insert_before (point,
+		     gimple_build_assign (tmp, ref),
+		     GSI_NEW_STMT);
+
+  return tmp;
+}
+
 /* Initialize ln_arg[0] and ln_arg[1] to a pair of newly-created (at
    LOC) loads from INNER (from ORIG_INNER), of modes MODE and MODE2,
    respectively, starting at BIT_POS, using reversed endianness if
@@ -7506,7 +7561,8 @@ build_split_load (tree /* out */ ln_arg[2],
 		  HOST_WIDE_INT /* out */ shifted[2],
 		  location_t loc, tree inner, tree orig_inner,
 		  scalar_int_mode mode, scalar_int_mode mode2,
-		  HOST_WIDE_INT bit_pos, bool reversep)
+		  HOST_WIDE_INT bit_pos, bool reversep,
+		  gimple *point[2])
 {
   bitsiz[0] = GET_MODE_BITSIZE (mode);
   bitsiz[1] = GET_MODE_BITSIZE (mode2);
@@ -7515,9 +7571,9 @@ build_split_load (tree /* out */ ln_arg[2],
     {
       tree type = lang_hooks.types.type_for_size (bitsiz[i], 1);
       bitpos[i] = bit_pos;
-      ln_arg[i] = make_bit_field_ref (loc, inner, orig_inner,
+      ln_arg[i] = make_bit_field_load (loc, inner, orig_inner,
 				      type, bitsiz[i],
-				      bit_pos, 1, reversep);
+				       bit_pos, 1, reversep, point[i]);
       bit_pos += bitsiz[i];
     }
 
@@ -7578,6 +7634,87 @@ reuse_split_load (tree /* in[0] out[1] */ ln_arg[2],
     }
 }
 
+/* Return nonzero if LSTMT and RSTMT, assumed to load from the same
+   words, can be safely merged into a single load.  Return zero if
+   there are intervening stores, or if neither stmt dominates the
+   other.  If they are mergeable, return -1 if the merged load should
+   be inserted before LSTMT to dominate both, otherwise return +1.  */
+
+static int
+mergeable_loads_p (gimple *lstmt, gimple *rstmt)
+{
+  gimple *stmts[2] = { lstmt, rstmt };
+  int ret;
+
+  if (stmt_dominates_stmt_p (lstmt, rstmt))
+    {
+      ret = -1;
+      stmts[0] = lstmt;
+      stmts[1] = rstmt;
+    }
+  else if (stmt_dominates_stmt_p (rstmt, lstmt))
+    {
+      ret = 1;
+      stmts[1] = lstmt;
+      stmts[0] = rstmt;
+    }
+  else
+    return 0;
+
+  basic_block bbs[2] = { gimple_bb (stmts[0]), gimple_bb (stmts[1]) };
+  if (bbs[0] == bbs[1])
+    {
+      for (gimple_stmt_iterator gsi = gsi_for_stmt (stmts[0]);
+	   gsi_stmt (gsi) != stmts[1]; gsi_next (&gsi))
+	/* ??? For now, stop if we find any intervening vdefs.  */
+	if (gimple_vdef (gsi_stmt (gsi)))
+	  return 0;
+      return ret;
+    }
+
+  for (gimple_stmt_iterator gsi = gsi_for_stmt (stmts[0]);
+       !gsi_end_p (gsi); gsi_next (&gsi))
+    /* ??? For now, stop if we find any intervening vdefs.  */
+    if (gimple_vdef (gsi_stmt (gsi)))
+      return 0;
+
+  for (gphi_iterator gpi = gsi_start_phis (bbs[1]);
+       !gsi_end_p (gpi); gsi_next (&gpi))
+    /* ??? For now, stop if we find any intervening vdefs.  */
+    if (gimple_vdef (gsi_stmt (gpi)))
+      return 0;
+
+  for (gimple_stmt_iterator gsi = gsi_for_stmt (stmts[1]);
+       !gsi_end_p (gsi); gsi_prev (&gsi))
+    /* ??? For now, stop if we find any intervening vdefs.  */
+    if (gimple_vdef (gsi_stmt (gsi)))
+      return 0;
+
+  for (basic_block bb = bbs[1]; bb != bbs[0]; )
+    {
+      /* ??? For now, stop if any visited block has more than one
+	 predecessor.  */
+      if (!single_pred_p (bb))
+	return 0;
+
+      bb = single_pred (bb);
+
+      for (gphi_iterator gpi = gsi_start_phis (bbs[1]);
+	   !gsi_end_p (gpi); gsi_next (&gpi))
+	/* ??? For now, stop if we find any intervening vdefs.  */
+	if (gimple_vdef (gsi_stmt (gpi)))
+	  return 0;
+
+      for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
+	   !gsi_end_p (gsi); gsi_next (&gsi))
+	/* ??? For now, stop if we find any intervening vdefs.  */
+	if (gimple_vdef (gsi_stmt (gsi)))
+	  return 0;
+    }
+
+  return ret;
+}
+
 /* Find ways of folding logical expressions of LHS and RHS:
    Try to merge two comparisons to the same innermost item.
    Look for range tests like "ch >= '0' && ch <= '9'".
@@ -7629,6 +7766,8 @@ fold_truth_andor_maybe_separate (location_t loc,
   enum tree_code orig_code = code;
   enum tree_code wanted_code;
   tree ll_inner, lr_inner, rl_inner, rr_inner;
+  gimple *ll_load, *lr_load, *rl_load, *rr_load;
+  int l_mergeable, r_mergeable;
   HOST_WIDE_INT ll_bitsize, ll_bitpos, lr_bitsize, lr_bitpos;
   HOST_WIDE_INT rl_bitsize, rl_bitpos, rr_bitsize, rr_bitpos;
   HOST_WIDE_INT xll_bitpos, xlr_bitpos, xrl_bitpos, xrr_bitpos;
@@ -7693,20 +7832,24 @@ fold_truth_andor_maybe_separate (location_t loc,
   ll_inner = decode_field_reference (loc, &ll_arg,
 				     &ll_bitsize, &ll_bitpos, &ll_mode,
 				     &ll_unsignedp, &ll_reversep, &volatilep,
-				     &ll_mask, &ll_and_mask, l_xor);
+				     &ll_mask, &ll_and_mask, l_xor,
+				     &ll_load);
   lr_inner = decode_field_reference (loc, &lr_arg,
 				     &lr_bitsize, &lr_bitpos, &lr_mode,
 				     &lr_unsignedp, &lr_reversep, &volatilep,
-				     &lr_mask, &lr_and_mask, 2 * l_xor);
+				     &lr_mask, &lr_and_mask, 2 * l_xor,
+				     &lr_load);
   int r_xor = prepare_xor (rl_arg, &rr_arg);
   rl_inner = decode_field_reference (loc, &rl_arg,
 				     &rl_bitsize, &rl_bitpos, &rl_mode,
 				     &rl_unsignedp, &rl_reversep, &volatilep,
-				     &rl_mask, &rl_and_mask, r_xor);
+				     &rl_mask, &rl_and_mask, r_xor,
+				     &rl_load);
   rr_inner = decode_field_reference (loc, &rr_arg,
 				     &rr_bitsize, &rr_bitpos, &rr_mode,
 				     &rr_unsignedp, &rr_reversep, &volatilep,
-				     &rr_mask, &rr_and_mask, 2 * r_xor);
+				     &rr_mask, &rr_and_mask, 2 * r_xor,
+				     &rr_load);
 
   /* It must be true that the inner operation on the lhs of each
      comparison must be the same if we are to be able to do anything.
@@ -7715,7 +7858,8 @@ fold_truth_andor_maybe_separate (location_t loc,
   if (volatilep
       || ll_reversep != rl_reversep
       || ll_inner == 0 || rl_inner == 0
-      || ! operand_equal_p (ll_inner, rl_inner, 0))
+      || ! operand_equal_p (ll_inner, rl_inner, 0)
+      || ! (l_mergeable = mergeable_loads_p (ll_load, rl_load)))
     return 0;
 
   if (TREE_CODE (lr_arg) == INTEGER_CST
@@ -7723,13 +7867,18 @@ fold_truth_andor_maybe_separate (location_t loc,
     {
       l_const = lr_arg, r_const = rr_arg;
       lr_reversep = ll_reversep;
+      r_mergeable = 0;
     }
   else if (lr_reversep != rr_reversep
 	   || lr_inner == 0 || rr_inner == 0
-	   || ! operand_equal_p (lr_inner, rr_inner, 0))
+	   || ! operand_equal_p (lr_inner, rr_inner, 0)
+	   || ! (r_mergeable = mergeable_loads_p (lr_load, rr_load)))
     return 0;
   else
-    l_const = r_const = 0;
+    {
+      l_const = r_const = 0;
+      r_mergeable = 0;
+    }
 
   if (lsignbit)
     {
@@ -8035,18 +8184,38 @@ fold_truth_andor_maybe_separate (location_t loc,
 	{
 	  bitpos[1][0] = rnbitpos;
 	  bitsiz[1][0] = rnbitsize;
-	  ld_arg[1][0] = make_bit_field_ref (loc, lr_inner, lr_arg,
-					     rntype, rnbitsize, rnbitpos,
-					     lr_unsignedp || rr_unsignedp,
-					     lr_reversep);
+	  ld_arg[1][0] = make_bit_field_load (loc, lr_inner, lr_arg,
+					      rntype, rnbitsize, rnbitpos,
+					      lr_unsignedp || rr_unsignedp,
+					      lr_reversep,
+					      r_mergeable == 0
+					      ? NULL
+					      : r_mergeable < 0
+					      ? lr_load
+					      : rr_load);
 	}
 
       if (parts > 1)
 	{
 	  if (r_split_load)
-	    build_split_load (ld_arg[1], bitpos[1], bitsiz[1], toshift[1],
-			      shifted[1], loc, lr_inner, lr_arg,
-			      rnmode, rnmode2, rnbitpos, lr_reversep);
+	    {
+	      gimple *point[2];
+	      if (r_mergeable == 0)
+		point[0] = point[1] = NULL;
+	      else if (r_mergeable < 0)
+		{
+		  point[0] = lr_load;
+		  point[1] = rr_load;
+		}
+	      else
+		{
+		  point[0] = rr_load;
+		  point[1] = lr_load;
+		}
+	      build_split_load (ld_arg[1], bitpos[1], bitsiz[1], toshift[1],
+				shifted[1], loc, lr_inner, lr_arg,
+				rnmode, rnmode2, rnbitpos, lr_reversep, point);
+	    }
 	  else
 	    reuse_split_load (ld_arg[1], bitpos[1], bitsiz[1], toshift[1],
 			      shifted[1], xmask[1],
@@ -8118,18 +8287,38 @@ fold_truth_andor_maybe_separate (location_t loc,
     {
       bitpos[0][0] = lnbitpos;
       bitsiz[0][0] = lnbitsize;
-      ld_arg[0][0] = make_bit_field_ref (loc, ll_inner, ll_arg,
-					 lntype, lnbitsize, lnbitpos,
-					 ll_unsignedp || rl_unsignedp,
-					 ll_reversep);
+      ld_arg[0][0] = make_bit_field_load (loc, ll_inner, ll_arg,
+					  lntype, lnbitsize, lnbitpos,
+					  ll_unsignedp || rl_unsignedp,
+					  ll_reversep,
+					  l_mergeable == 0
+					  ? NULL
+					  : l_mergeable < 0
+					  ? ll_load
+					  : rl_load);
     }
 
   if (parts > 1)
     {
       if (l_split_load)
-	build_split_load (ld_arg[0], bitpos[0], bitsiz[0], toshift[0],
-			  shifted[0], loc, ll_inner, ll_arg,
-			  lnmode, lnmode2, lnbitpos, ll_reversep);
+	    {
+	      gimple *point[2];
+	      if (l_mergeable == 0)
+		point[0] = point[1] = NULL;
+	      else if (l_mergeable < 0)
+		{
+		  point[0] = ll_load;
+		  point[1] = rl_load;
+		}
+	      else
+		{
+		  point[0] = rl_load;
+		  point[1] = ll_load;
+		}
+	      build_split_load (ld_arg[0], bitpos[0], bitsiz[0], toshift[0],
+				shifted[0], loc, ll_inner, ll_arg,
+				lnmode, lnmode2, lnbitpos, ll_reversep, point);
+	    }
       else
 	reuse_split_load (ld_arg[0], bitpos[0], bitsiz[0], toshift[0],
 			  shifted[0], xmask[0],

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [gcc(refs/users/aoliva/heads/testme)] check for mergeable loads, choose insertion points accordingly
@ 2023-07-27  8:20 Alexandre Oliva
  0 siblings, 0 replies; 9+ messages in thread
From: Alexandre Oliva @ 2023-07-27  8:20 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:f8d184038b84e264f8c756348617a8992721d66f

commit f8d184038b84e264f8c756348617a8992721d66f
Author: Alexandre Oliva <oliva@gnu.org>
Date:   Thu Jul 27 05:15:20 2023 -0300

    check for mergeable loads, choose insertion points accordingly

Diff:
---
 gcc/gimple-fold.cc | 255 +++++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 220 insertions(+), 35 deletions(-)

diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
index f65922309fb..f1bb8d6e758 100644
--- a/gcc/gimple-fold.cc
+++ b/gcc/gimple-fold.cc
@@ -7080,7 +7080,7 @@ maybe_fold_comparisons_from_match_pd (tree type, enum tree_code code,
    Same as ssa_is_replaceable_p, except that we don't insist it has a
    single use.  */
 
-bool
+static bool
 ssa_is_substitutable_p (gimple *stmt)
 {
 #if 0
@@ -7161,9 +7161,10 @@ is_cast_p (tree *name)
       if (gimple_num_ops (def) != 2)
 	break;
 
-      if (get_gimple_rhs_class (gimple_expr_code (def))
-	  == GIMPLE_SINGLE_RHS)
+      if (gimple_assign_single_p (def))
 	{
+	  if (gimple_assign_load_p (def))
+	    break;
 	  *name = gimple_assign_rhs1 (def);
 	  continue;
 	}
@@ -7200,8 +7201,7 @@ is_binop_p (enum tree_code code, tree *name)
 	  return 0;
 
 	case 2:
-	  if (get_gimple_rhs_class (gimple_expr_code (def))
-	      == GIMPLE_SINGLE_RHS)
+	  if (gimple_assign_single_p (def) && !gimple_assign_load_p (def))
 	    {
 	      *name = gimple_assign_rhs1 (def);
 	      continue;
@@ -7209,7 +7209,7 @@ is_binop_p (enum tree_code code, tree *name)
 	  return 0;
 
 	case 3:
-	  ;
+	  break;
 	}
 
       if (gimple_assign_rhs_code (def) != code)
@@ -7254,6 +7254,26 @@ prepare_xor (tree l_arg, tree *r_arg)
   return ret;
 }
 
+/* If EXP is a SSA_NAME whose DEF is a load stmt, set *LOAD to it and
+   return its RHS, otherwise return EXP.  */
+
+static tree
+follow_load (tree exp, gimple *load)
+{
+  if (TREE_CODE (*name) == SSA_NAME
+      && !SSA_NAME_IS_DEFAULT_DEF (*name))
+    {
+      gimple *def = SSA_NAME_DEF_STMT (*name);
+      if (is_gimple_load_p (def))
+	{
+	  *load = def;
+	  gimple_assign_rhs1 (def);
+	}
+    }
+
+  return exp;
+}
+
 /* Subroutine for fold_truth_andor_1: decode a field reference.
 
    If EXP is a comparison reference, we return the innermost reference.
@@ -7280,6 +7300,9 @@ prepare_xor (tree l_arg, tree *r_arg)
    BIT_XOR_EXPR compared with zero.  We're to take the first or second
    operand thereof if so.  It should be zero otherwise.
 
+   *LOAD is set to the load stmt of the innermost reference, if any,
+   *and NULL otherwise.
+
    Return 0 if this is not a component reference or is one that we can't
    do anything with.  */
 
@@ -7287,7 +7310,8 @@ static tree
 decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
 			HOST_WIDE_INT *pbitpos, machine_mode *pmode,
 			int *punsignedp, int *preversep, int *pvolatilep,
-			tree *pmask, tree *pand_mask, int xor_which)
+			tree *pmask, tree *pand_mask, int xor_which,
+			gimple **load)
 {
   tree exp = *exp_;
   tree outer_type = 0;
@@ -7297,11 +7321,13 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
   unsigned int precision;
   HOST_WIDE_INT shiftrt = 0;
 
+  *load = NULL;
+
   /* All the optimizations using this function assume integer fields.
      There are problems with FP fields since the type_for_size call
      below can fail for, e.g., XFmode.  */
   if (! INTEGRAL_TYPE_P (TREE_TYPE (exp)))
-    return 0;
+    return NULL_TREE;
 
   /* We are interested in the bare arrangement of bits, so strip everything
      that doesn't affect the machine mode.  However, record the type of the
@@ -7311,7 +7337,7 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
   if ((and_mask = is_binop_p (BIT_AND_EXPR, &exp)))
     {
       if (TREE_CODE (and_mask) != INTEGER_CST)
-	return 0;
+	return NULL_TREE;
     }
 
   if (xor_which)
@@ -7329,16 +7355,18 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
   if (tree shift = is_binop_p (RSHIFT_EXPR, &exp))
     {
       if (TREE_CODE (shift) != INTEGER_CST || !tree_fits_shwi_p (shift))
-	return 0;
+	return NULL_TREE;
       shiftrt = tree_to_shwi (shift);
       if (shiftrt <= 0)
-	return 0;
+	return NULL_TREE;
     }
 
   if (tree t = is_cast_p (&exp))
     if (!outer_type)
       outer_type = t;
 
+  exp = follow_load (exp, load);
+
   poly_int64 poly_bitsize, poly_bitpos;
   inner = get_inner_reference (exp, &poly_bitsize, &poly_bitpos, &offset,
 			       pmode, punsignedp, preversep, pvolatilep);
@@ -7487,6 +7515,32 @@ compute_split_boundary_from_align (HOST_WIDE_INT align,
   return boundary;
 }
 
+/* Make a bit_field_ref.  If POINT is NULL, return the BIT_FIELD_REF.
+   Otherwise, build and insert a load stmt before POINT, and return
+   the SSA_NAME.  ???  Rewrite LOAD in terms of the bitfield?  */
+
+static tree
+make_bit_field_load (location_t loc, tree inner, tree orig_inner, tree type,
+		     HOST_WIDE_INT bitsize, poly_int64 bitpos,
+		     int unsignedp, int reversep, gimple *point);
+{
+  tree ref = make_bit_field_ref (loc, inner, orig_inner,
+				 type, bitsize, bitpos,
+				 unsignedp, reversep);
+  if (!point)
+    return ref;
+
+  tree tmp = make_ssa_name (TREE_TYPE (ref));
+  gimple_stmt_iterator gsi = gsi_for_stmt (r_mergeable < 0
+					   ? lr_load
+					   : rr_load);
+  gsi_insert_before (point,
+		     gimple_build_assign (tmp, ref),
+		     GSI_NEW_STMT);
+
+  return tmp;
+}
+
 /* Initialize ln_arg[0] and ln_arg[1] to a pair of newly-created (at
    LOC) loads from INNER (from ORIG_INNER), of modes MODE and MODE2,
    respectively, starting at BIT_POS, using reversed endianness if
@@ -7506,7 +7560,8 @@ build_split_load (tree /* out */ ln_arg[2],
 		  HOST_WIDE_INT /* out */ shifted[2],
 		  location_t loc, tree inner, tree orig_inner,
 		  scalar_int_mode mode, scalar_int_mode mode2,
-		  HOST_WIDE_INT bit_pos, bool reversep)
+		  HOST_WIDE_INT bit_pos, bool reversep,
+		  gimple *point[2])
 {
   bitsiz[0] = GET_MODE_BITSIZE (mode);
   bitsiz[1] = GET_MODE_BITSIZE (mode2);
@@ -7515,9 +7570,9 @@ build_split_load (tree /* out */ ln_arg[2],
     {
       tree type = lang_hooks.types.type_for_size (bitsiz[i], 1);
       bitpos[i] = bit_pos;
-      ln_arg[i] = make_bit_field_ref (loc, inner, orig_inner,
+      ln_arg[i] = make_bit_field_load (loc, inner, orig_inner,
 				      type, bitsiz[i],
-				      bit_pos, 1, reversep);
+				       bit_pos, 1, reversep, point[i]);
       bit_pos += bitsiz[i];
     }
 
@@ -7578,6 +7633,87 @@ reuse_split_load (tree /* in[0] out[1] */ ln_arg[2],
     }
 }
 
+/* Return nonzero if LSTMT and RSTMT, assumed to load from the same
+   words, can be safely merged into a single load.  Return zero if
+   there are intervening stores, or if neither stmt dominates the
+   other.  If they are mergeable, return -1 if the merged load should
+   be inserted before LSTMT to dominate both, otherwise return +1.  */
+
+static int
+mergeable_loads_p (gimple *lstmt, gimple *rstmt)
+{
+  gimple *stmts[2] = { lstmt, rstmt };
+  int ret;
+
+  if (stmt_dominates_stmt_p (lstmt, rstmt))
+    {
+      ret = -1;
+      stmts[0] = lstmt;
+      stmts[1] = rstmt;
+    }
+  else if (stmt_dominates_stmt_p (rstmt, lstmt))
+    {
+      ret = 1;
+      stmts[1] = lstmt;
+      stmts[0] = rstmt;
+    }
+  else
+    return 0;
+
+  basic_block bbs[2] = { gimple_bb (stmts[0]), gimple_bb (stmts[1]) };
+  if (bbs[0] == bbs[1])
+    {
+      for (gimple_stmt_iterator gsi = gsi_for_stmt (stmts[0]);
+	   gsi_stmt (gsi) != stmts[1]; gsi_next (&gsi))
+	/* ??? For now, stop if we find any intervening vdefs.  */
+	if (gimple_vdef (gsi_stmt (gsi)))
+	  return 0;
+      return ret;
+    }
+
+  for (gimple_stmt_iterator gsi = gsi_for_stmt (stmts[0]);
+       !gsi_end_p (gsi); gsi_next (&gsi))
+    /* ??? For now, stop if we find any intervening vdefs.  */
+    if (gimple_vdef (gsi_stmt (gsi)))
+      return 0;
+
+  for (gphi_iterator gpi = gsi_start_phis (bbs[1]);
+       !gsi_end_p (gpi); gsi_next (&gpi))
+    /* ??? For now, stop if we find any intervening vdefs.  */
+    if (gimple_vdef (gsi_stmt (gsi)))
+      return 0;
+
+  for (gimple_stmt_iterator gsi = gsi_for_stmt (stmts[1]);
+       !gsi_end_p (gsi); gsi_prev (&gsi))
+    /* ??? For now, stop if we find any intervening vdefs.  */
+    if (gimple_vdef (gsi_stmt (gsi)))
+      return 0;
+
+  for (basic_block bb = bbs[1]; bb != bb[0];)
+    {
+      /* ??? For now, stop if any visited block has more than one
+	 predecessor.  */
+      if (!single_pred_p (bb))
+	return 0;
+
+      bb = single_pred (bb);
+
+      for (gphi_iterator gpi = gsi_start_phis (bbs[1]);
+	   !gsi_end_p (gpi); gsi_next (&gpi))
+	/* ??? For now, stop if we find any intervening vdefs.  */
+	if (gimple_vdef (gsi_stmt (gsi)))
+	  return 0;
+
+      for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
+	   !gsi_end_p (gsi); gsi_next (&gsi))
+	/* ??? For now, stop if we find any intervening vdefs.  */
+	if (gimple_vdef (gsi_stmt (gsi)))
+	  return 0;
+    }
+
+  return ret;
+}
+
 /* Find ways of folding logical expressions of LHS and RHS:
    Try to merge two comparisons to the same innermost item.
    Look for range tests like "ch >= '0' && ch <= '9'".
@@ -7629,6 +7765,8 @@ fold_truth_andor_maybe_separate (location_t loc,
   enum tree_code orig_code = code;
   enum tree_code wanted_code;
   tree ll_inner, lr_inner, rl_inner, rr_inner;
+  gimple *ll_load, *lr_load, *rl_load, *rr_load;
+  int l_mergeable, r_mergeable;
   HOST_WIDE_INT ll_bitsize, ll_bitpos, lr_bitsize, lr_bitpos;
   HOST_WIDE_INT rl_bitsize, rl_bitpos, rr_bitsize, rr_bitpos;
   HOST_WIDE_INT xll_bitpos, xlr_bitpos, xrl_bitpos, xrr_bitpos;
@@ -7693,20 +7831,24 @@ fold_truth_andor_maybe_separate (location_t loc,
   ll_inner = decode_field_reference (loc, &ll_arg,
 				     &ll_bitsize, &ll_bitpos, &ll_mode,
 				     &ll_unsignedp, &ll_reversep, &volatilep,
-				     &ll_mask, &ll_and_mask, l_xor);
+				     &ll_mask, &ll_and_mask, l_xor,
+				     &ll_load);
   lr_inner = decode_field_reference (loc, &lr_arg,
 				     &lr_bitsize, &lr_bitpos, &lr_mode,
 				     &lr_unsignedp, &lr_reversep, &volatilep,
-				     &lr_mask, &lr_and_mask, 2 * l_xor);
+				     &lr_mask, &lr_and_mask, 2 * l_xor,
+				     &lr_load);
   int r_xor = prepare_xor (rl_arg, &rr_arg);
   rl_inner = decode_field_reference (loc, &rl_arg,
 				     &rl_bitsize, &rl_bitpos, &rl_mode,
 				     &rl_unsignedp, &rl_reversep, &volatilep,
-				     &rl_mask, &rl_and_mask, r_xor);
+				     &rl_mask, &rl_and_mask, r_xor.
+				     &rl_load);
   rr_inner = decode_field_reference (loc, &rr_arg,
 				     &rr_bitsize, &rr_bitpos, &rr_mode,
 				     &rr_unsignedp, &rr_reversep, &volatilep,
-				     &rr_mask, &rr_and_mask, 2 * r_xor);
+				     &rr_mask, &rr_and_mask, 2 * r_xor,
+				     &rr_load);
 
   /* It must be true that the inner operation on the lhs of each
      comparison must be the same if we are to be able to do anything.
@@ -7715,7 +7857,8 @@ fold_truth_andor_maybe_separate (location_t loc,
   if (volatilep
       || ll_reversep != rl_reversep
       || ll_inner == 0 || rl_inner == 0
-      || ! operand_equal_p (ll_inner, rl_inner, 0))
+      || ! operand_equal_p (ll_inner, rl_inner, 0)
+      || ! (l_mergeable = mergeable_loads_p (ll_load, rl_load)))
     return 0;
 
   if (TREE_CODE (lr_arg) == INTEGER_CST
@@ -7723,13 +7866,15 @@ fold_truth_andor_maybe_separate (location_t loc,
     {
       l_const = lr_arg, r_const = rr_arg;
       lr_reversep = ll_reversep;
+      r_mergeable = 0;
     }
   else if (lr_reversep != rr_reversep
 	   || lr_inner == 0 || rr_inner == 0
-	   || ! operand_equal_p (lr_inner, rr_inner, 0))
+	   || ! operand_equal_p (lr_inner, rr_inner, 0)
+	   || ! (r_mergeable = mergeable_loads_p (lr_load, rr_load)))
     return 0;
   else
-    l_const = r_const = 0;
+    l_const = r_const = r_mergeable = 0;
 
   if (lsignbit)
     {
@@ -8035,18 +8180,38 @@ fold_truth_andor_maybe_separate (location_t loc,
 	{
 	  bitpos[1][0] = rnbitpos;
 	  bitsiz[1][0] = rnbitsize;
-	  ld_arg[1][0] = make_bit_field_ref (loc, lr_inner, lr_arg,
-					     rntype, rnbitsize, rnbitpos,
-					     lr_unsignedp || rr_unsignedp,
-					     lr_reversep);
+	  ld_arg[1][0] = make_bit_field_load (loc, lr_inner, lr_arg,
+					      rntype, rnbitsize, rnbitpos,
+					      lr_unsignedp || rr_unsignedp,
+					      lr_reversep,
+					      r_mergeable == 0
+					      ? NULL
+					      : r_mergeable < 0
+					      ? lr_load
+					      : rr_load);
 	}
 
       if (parts > 1)
 	{
 	  if (r_split_load)
-	    build_split_load (ld_arg[1], bitpos[1], bitsiz[1], toshift[1],
-			      shifted[1], loc, lr_inner, lr_arg,
-			      rnmode, rnmode2, rnbitpos, lr_reversep);
+	    {
+	      gimple *point[2];
+	      if (r_mergeable == 0)
+		point[0] = point[1] = NULL;
+	      else if (r_mergeable < 0)
+		{
+		  point[0] = lr_load;
+		  point[1] = rr_load;
+		}
+	      else
+		{
+		  point[0] = rr_load;
+		  point[1] = lr_load;
+		}
+	      build_split_load (ld_arg[1], bitpos[1], bitsiz[1], toshift[1],
+				shifted[1], loc, lr_inner, lr_arg,
+				rnmode, rnmode2, rnbitpos, lr_reversep, point);
+	    }
 	  else
 	    reuse_split_load (ld_arg[1], bitpos[1], bitsiz[1], toshift[1],
 			      shifted[1], xmask[1],
@@ -8118,18 +8283,38 @@ fold_truth_andor_maybe_separate (location_t loc,
     {
       bitpos[0][0] = lnbitpos;
       bitsiz[0][0] = lnbitsize;
-      ld_arg[0][0] = make_bit_field_ref (loc, ll_inner, ll_arg,
-					 lntype, lnbitsize, lnbitpos,
-					 ll_unsignedp || rl_unsignedp,
-					 ll_reversep);
+      ld_arg[0][0] = make_bit_field_load (loc, ll_inner, ll_arg,
+					  lntype, lnbitsize, lnbitpos,
+					  ll_unsignedp || rl_unsignedp,
+					  ll_reversep,
+					  l_mergeable == 0
+					  ? NULL
+					  : l_mergeable < 0
+					  ? ll_load
+					  : rl_load);
     }
 
   if (parts > 1)
     {
       if (l_split_load)
-	build_split_load (ld_arg[0], bitpos[0], bitsiz[0], toshift[0],
-			  shifted[0], loc, ll_inner, ll_arg,
-			  lnmode, lnmode2, lnbitpos, ll_reversep);
+	    {
+	      gimple *point[2];
+	      if (l_mergeable == 0)
+		point[0] = point[1] = NULL;
+	      else if (l_mergeable < 0)
+		{
+		  point[0] = ll_load;
+		  point[1] = rl_load;
+		}
+	      else
+		{
+		  point[0] = rl_load;
+		  point[1] = ll_load;
+		}
+	      build_split_load (ld_arg[0], bitpos[0], bitsiz[0], toshift[0],
+				shifted[0], loc, ll_inner, ll_arg,
+				lnmode, lnmode2, lnbitpos, ll_reversep, point);
+	    }
       else
 	reuse_split_load (ld_arg[0], bitpos[0], bitsiz[0], toshift[0],
 			  shifted[0], xmask[0],

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2023-08-18  3:55 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-07-27 12:20 [gcc(refs/users/aoliva/heads/testme)] check for mergeable loads, choose insertion points accordingly Alexandre Oliva
  -- strict thread matches above, loose matches on Subject: below --
2023-08-18  3:55 Alexandre Oliva
2023-07-27 11:52 Alexandre Oliva
2023-07-27 11:08 Alexandre Oliva
2023-07-27  9:22 Alexandre Oliva
2023-07-27  9:05 Alexandre Oliva
2023-07-27  8:59 Alexandre Oliva
2023-07-27  8:28 Alexandre Oliva
2023-07-27  8:20 Alexandre Oliva

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).