public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
From: Alexandre Oliva <aoliva@gcc.gnu.org>
To: gcc-cvs@gcc.gnu.org
Subject: [gcc(refs/users/aoliva/heads/testme)] check for mergeable loads, choose insertion points accordingly
Date: Thu, 27 Jul 2023 09:22:58 +0000 (GMT)	[thread overview]
Message-ID: <20230727092258.2EFA83882025@sourceware.org> (raw)

https://gcc.gnu.org/g:8b7710da628edb522b9fd8f8b62d622d9a86f915

commit 8b7710da628edb522b9fd8f8b62d622d9a86f915
Author: Alexandre Oliva <oliva@gnu.org>
Date:   Thu Jul 27 05:15:20 2023 -0300

    check for mergeable loads, choose insertion points accordingly

Diff:
---
 gcc/gimple-fold.cc | 253 ++++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 219 insertions(+), 34 deletions(-)

diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
index f65922309fb..72814f902b1 100644
--- a/gcc/gimple-fold.cc
+++ b/gcc/gimple-fold.cc
@@ -69,6 +69,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "varasm.h"
 #include "internal-fn.h"
 #include "gimple-range.h"
+#include "tree-ssa-loop-niter.h" // stmt_dominates_stmt_p
 
 /* ??? Move this to some header, it's defined in fold-const.c.  */
 extern tree
@@ -7080,7 +7081,7 @@ maybe_fold_comparisons_from_match_pd (tree type, enum tree_code code,
    Same as ssa_is_replaceable_p, except that we don't insist it has a
    single use.  */
 
-bool
+static bool
 ssa_is_substitutable_p (gimple *stmt)
 {
 #if 0
@@ -7161,9 +7162,10 @@ is_cast_p (tree *name)
       if (gimple_num_ops (def) != 2)
 	break;
 
-      if (get_gimple_rhs_class (gimple_expr_code (def))
-	  == GIMPLE_SINGLE_RHS)
+      if (gimple_assign_single_p (def))
 	{
+	  if (gimple_assign_load_p (def))
+	    break;
 	  *name = gimple_assign_rhs1 (def);
 	  continue;
 	}
@@ -7200,8 +7202,7 @@ is_binop_p (enum tree_code code, tree *name)
 	  return 0;
 
 	case 2:
-	  if (get_gimple_rhs_class (gimple_expr_code (def))
-	      == GIMPLE_SINGLE_RHS)
+	  if (gimple_assign_single_p (def) && !gimple_assign_load_p (def))
 	    {
 	      *name = gimple_assign_rhs1 (def);
 	      continue;
@@ -7209,7 +7210,7 @@ is_binop_p (enum tree_code code, tree *name)
 	  return 0;
 
 	case 3:
-	  ;
+	  break;
 	}
 
       if (gimple_assign_rhs_code (def) != code)
@@ -7254,6 +7255,26 @@ prepare_xor (tree l_arg, tree *r_arg)
   return ret;
 }
 
+/* If EXP is a SSA_NAME whose DEF is a load stmt, set *LOAD to it and
+   return its RHS, otherwise return EXP.  */
+
+static tree
+follow_load (tree exp, gimple **load)
+{
+  if (TREE_CODE (exp) == SSA_NAME
+      && !SSA_NAME_IS_DEFAULT_DEF (exp))
+    {
+      gimple *def = SSA_NAME_DEF_STMT (exp);
+      if (gimple_assign_load_p (def))
+	{
+	  *load = def;
+	  gimple_assign_rhs1 (def);
+	}
+    }
+
+  return exp;
+}
+
 /* Subroutine for fold_truth_andor_1: decode a field reference.
 
    If EXP is a comparison reference, we return the innermost reference.
@@ -7280,6 +7301,9 @@ prepare_xor (tree l_arg, tree *r_arg)
    BIT_XOR_EXPR compared with zero.  We're to take the first or second
    operand thereof if so.  It should be zero otherwise.
 
+   *LOAD is set to the load stmt of the innermost reference, if any,
+   *and NULL otherwise.
+
    Return 0 if this is not a component reference or is one that we can't
    do anything with.  */
 
@@ -7287,7 +7311,8 @@ static tree
 decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
 			HOST_WIDE_INT *pbitpos, machine_mode *pmode,
 			int *punsignedp, int *preversep, int *pvolatilep,
-			tree *pmask, tree *pand_mask, int xor_which)
+			tree *pmask, tree *pand_mask, int xor_which,
+			gimple **load)
 {
   tree exp = *exp_;
   tree outer_type = 0;
@@ -7297,11 +7322,13 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
   unsigned int precision;
   HOST_WIDE_INT shiftrt = 0;
 
+  *load = NULL;
+
   /* All the optimizations using this function assume integer fields.
      There are problems with FP fields since the type_for_size call
      below can fail for, e.g., XFmode.  */
   if (! INTEGRAL_TYPE_P (TREE_TYPE (exp)))
-    return 0;
+    return NULL_TREE;
 
   /* We are interested in the bare arrangement of bits, so strip everything
      that doesn't affect the machine mode.  However, record the type of the
@@ -7311,7 +7338,7 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
   if ((and_mask = is_binop_p (BIT_AND_EXPR, &exp)))
     {
       if (TREE_CODE (and_mask) != INTEGER_CST)
-	return 0;
+	return NULL_TREE;
     }
 
   if (xor_which)
@@ -7329,16 +7356,18 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
   if (tree shift = is_binop_p (RSHIFT_EXPR, &exp))
     {
       if (TREE_CODE (shift) != INTEGER_CST || !tree_fits_shwi_p (shift))
-	return 0;
+	return NULL_TREE;
       shiftrt = tree_to_shwi (shift);
       if (shiftrt <= 0)
-	return 0;
+	return NULL_TREE;
     }
 
   if (tree t = is_cast_p (&exp))
     if (!outer_type)
       outer_type = t;
 
+  exp = follow_load (exp, load);
+
   poly_int64 poly_bitsize, poly_bitpos;
   inner = get_inner_reference (exp, &poly_bitsize, &poly_bitpos, &offset,
 			       pmode, punsignedp, preversep, pvolatilep);
@@ -7487,6 +7516,30 @@ compute_split_boundary_from_align (HOST_WIDE_INT align,
   return boundary;
 }
 
+/* Make a bit_field_ref.  If POINT is NULL, return the BIT_FIELD_REF.
+   Otherwise, build and insert a load stmt before POINT, and return
+   the SSA_NAME.  ???  Rewrite LOAD in terms of the bitfield?  */
+
+static tree
+make_bit_field_load (location_t loc, tree inner, tree orig_inner, tree type,
+		     HOST_WIDE_INT bitsize, poly_int64 bitpos,
+		     int unsignedp, int reversep, gimple *point)
+{
+  tree ref = make_bit_field_ref (loc, inner, orig_inner,
+				 type, bitsize, bitpos,
+				 unsignedp, reversep);
+  if (!point)
+    return ref;
+
+  tree tmp = make_ssa_name (TREE_TYPE (ref));
+  gimple_stmt_iterator gsi = gsi_for_stmt (point);
+  gsi_insert_before (&gsi,
+		     gimple_build_assign (tmp, ref),
+		     GSI_NEW_STMT);
+
+  return tmp;
+}
+
 /* Initialize ln_arg[0] and ln_arg[1] to a pair of newly-created (at
    LOC) loads from INNER (from ORIG_INNER), of modes MODE and MODE2,
    respectively, starting at BIT_POS, using reversed endianness if
@@ -7506,7 +7559,8 @@ build_split_load (tree /* out */ ln_arg[2],
 		  HOST_WIDE_INT /* out */ shifted[2],
 		  location_t loc, tree inner, tree orig_inner,
 		  scalar_int_mode mode, scalar_int_mode mode2,
-		  HOST_WIDE_INT bit_pos, bool reversep)
+		  HOST_WIDE_INT bit_pos, bool reversep,
+		  gimple *point[2])
 {
   bitsiz[0] = GET_MODE_BITSIZE (mode);
   bitsiz[1] = GET_MODE_BITSIZE (mode2);
@@ -7515,9 +7569,9 @@ build_split_load (tree /* out */ ln_arg[2],
     {
       tree type = lang_hooks.types.type_for_size (bitsiz[i], 1);
       bitpos[i] = bit_pos;
-      ln_arg[i] = make_bit_field_ref (loc, inner, orig_inner,
+      ln_arg[i] = make_bit_field_load (loc, inner, orig_inner,
 				      type, bitsiz[i],
-				      bit_pos, 1, reversep);
+				       bit_pos, 1, reversep, point[i]);
       bit_pos += bitsiz[i];
     }
 
@@ -7578,6 +7632,87 @@ reuse_split_load (tree /* in[0] out[1] */ ln_arg[2],
     }
 }
 
+/* Return nonzero if LSTMT and RSTMT, assumed to load from the same
+   words, can be safely merged into a single load.  Return zero if
+   there are intervening stores, or if neither stmt dominates the
+   other.  If they are mergeable, return -1 if the merged load should
+   be inserted before LSTMT to dominate both, otherwise return +1.  */
+
+static int
+mergeable_loads_p (gimple *lstmt, gimple *rstmt)
+{
+  gimple *stmts[2] = { lstmt, rstmt };
+  int ret;
+
+  if (stmt_dominates_stmt_p (lstmt, rstmt))
+    {
+      ret = -1;
+      stmts[0] = lstmt;
+      stmts[1] = rstmt;
+    }
+  else if (stmt_dominates_stmt_p (rstmt, lstmt))
+    {
+      ret = 1;
+      stmts[1] = lstmt;
+      stmts[0] = rstmt;
+    }
+  else
+    return 0;
+
+  basic_block bbs[2] = { gimple_bb (stmts[0]), gimple_bb (stmts[1]) };
+  if (bbs[0] == bbs[1])
+    {
+      for (gimple_stmt_iterator gsi = gsi_for_stmt (stmts[0]);
+	   gsi_stmt (gsi) != stmts[1]; gsi_next (&gsi))
+	/* ??? For now, stop if we find any intervening vdefs.  */
+	if (gimple_vdef (gsi_stmt (gsi)))
+	  return 0;
+      return ret;
+    }
+
+  for (gimple_stmt_iterator gsi = gsi_for_stmt (stmts[0]);
+       !gsi_end_p (gsi); gsi_next (&gsi))
+    /* ??? For now, stop if we find any intervening vdefs.  */
+    if (gimple_vdef (gsi_stmt (gsi)))
+      return 0;
+
+  for (gphi_iterator gpi = gsi_start_phis (bbs[1]);
+       !gsi_end_p (gpi); gsi_next (&gpi))
+    /* ??? For now, stop if we find any intervening vdefs.  */
+    if (gimple_vdef (gsi_stmt (gpi)))
+      return 0;
+
+  for (gimple_stmt_iterator gsi = gsi_for_stmt (stmts[1]);
+       !gsi_end_p (gsi); gsi_prev (&gsi))
+    /* ??? For now, stop if we find any intervening vdefs.  */
+    if (gimple_vdef (gsi_stmt (gsi)))
+      return 0;
+
+  for (basic_block bb = bbs[1]; bb != bbs[0]; )
+    {
+      /* ??? For now, stop if any visited block has more than one
+	 predecessor.  */
+      if (!single_pred_p (bb))
+	return 0;
+
+      bb = single_pred (bb);
+
+      for (gphi_iterator gpi = gsi_start_phis (bbs[1]);
+	   !gsi_end_p (gpi); gsi_next (&gpi))
+	/* ??? For now, stop if we find any intervening vdefs.  */
+	if (gimple_vdef (gsi_stmt (gpi)))
+	  return 0;
+
+      for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
+	   !gsi_end_p (gsi); gsi_next (&gsi))
+	/* ??? For now, stop if we find any intervening vdefs.  */
+	if (gimple_vdef (gsi_stmt (gsi)))
+	  return 0;
+    }
+
+  return ret;
+}
+
 /* Find ways of folding logical expressions of LHS and RHS:
    Try to merge two comparisons to the same innermost item.
    Look for range tests like "ch >= '0' && ch <= '9'".
@@ -7629,6 +7764,8 @@ fold_truth_andor_maybe_separate (location_t loc,
   enum tree_code orig_code = code;
   enum tree_code wanted_code;
   tree ll_inner, lr_inner, rl_inner, rr_inner;
+  gimple *ll_load, *lr_load, *rl_load, *rr_load;
+  int l_mergeable = 0, r_mergeable = 0;
   HOST_WIDE_INT ll_bitsize, ll_bitpos, lr_bitsize, lr_bitpos;
   HOST_WIDE_INT rl_bitsize, rl_bitpos, rr_bitsize, rr_bitpos;
   HOST_WIDE_INT xll_bitpos, xlr_bitpos, xrl_bitpos, xrr_bitpos;
@@ -7693,20 +7830,24 @@ fold_truth_andor_maybe_separate (location_t loc,
   ll_inner = decode_field_reference (loc, &ll_arg,
 				     &ll_bitsize, &ll_bitpos, &ll_mode,
 				     &ll_unsignedp, &ll_reversep, &volatilep,
-				     &ll_mask, &ll_and_mask, l_xor);
+				     &ll_mask, &ll_and_mask, l_xor,
+				     &ll_load);
   lr_inner = decode_field_reference (loc, &lr_arg,
 				     &lr_bitsize, &lr_bitpos, &lr_mode,
 				     &lr_unsignedp, &lr_reversep, &volatilep,
-				     &lr_mask, &lr_and_mask, 2 * l_xor);
+				     &lr_mask, &lr_and_mask, 2 * l_xor,
+				     &lr_load);
   int r_xor = prepare_xor (rl_arg, &rr_arg);
   rl_inner = decode_field_reference (loc, &rl_arg,
 				     &rl_bitsize, &rl_bitpos, &rl_mode,
 				     &rl_unsignedp, &rl_reversep, &volatilep,
-				     &rl_mask, &rl_and_mask, r_xor);
+				     &rl_mask, &rl_and_mask, r_xor,
+				     &rl_load);
   rr_inner = decode_field_reference (loc, &rr_arg,
 				     &rr_bitsize, &rr_bitpos, &rr_mode,
 				     &rr_unsignedp, &rr_reversep, &volatilep,
-				     &rr_mask, &rr_and_mask, 2 * r_xor);
+				     &rr_mask, &rr_and_mask, 2 * r_xor,
+				     &rr_load);
 
   /* It must be true that the inner operation on the lhs of each
      comparison must be the same if we are to be able to do anything.
@@ -7715,7 +7856,9 @@ fold_truth_andor_maybe_separate (location_t loc,
   if (volatilep
       || ll_reversep != rl_reversep
       || ll_inner == 0 || rl_inner == 0
-      || ! operand_equal_p (ll_inner, rl_inner, 0))
+      || ! operand_equal_p (ll_inner, rl_inner, 0)
+      || (ll_load && rl_load
+	  && ! (l_mergeable = mergeable_loads_p (ll_load, rl_load))))
     return 0;
 
   if (TREE_CODE (lr_arg) == INTEGER_CST
@@ -7726,7 +7869,9 @@ fold_truth_andor_maybe_separate (location_t loc,
     }
   else if (lr_reversep != rr_reversep
 	   || lr_inner == 0 || rr_inner == 0
-	   || ! operand_equal_p (lr_inner, rr_inner, 0))
+	   || ! operand_equal_p (lr_inner, rr_inner, 0)
+	   || (lr_load && rr_load
+	       && ! (r_mergeable = mergeable_loads_p (lr_load, rr_load))))
     return 0;
   else
     l_const = r_const = 0;
@@ -8035,18 +8180,38 @@ fold_truth_andor_maybe_separate (location_t loc,
 	{
 	  bitpos[1][0] = rnbitpos;
 	  bitsiz[1][0] = rnbitsize;
-	  ld_arg[1][0] = make_bit_field_ref (loc, lr_inner, lr_arg,
-					     rntype, rnbitsize, rnbitpos,
-					     lr_unsignedp || rr_unsignedp,
-					     lr_reversep);
+	  ld_arg[1][0] = make_bit_field_load (loc, lr_inner, lr_arg,
+					      rntype, rnbitsize, rnbitpos,
+					      lr_unsignedp || rr_unsignedp,
+					      lr_reversep,
+					      r_mergeable == 0
+					      ? NULL
+					      : r_mergeable < 0
+					      ? lr_load
+					      : rr_load);
 	}
 
       if (parts > 1)
 	{
 	  if (r_split_load)
-	    build_split_load (ld_arg[1], bitpos[1], bitsiz[1], toshift[1],
-			      shifted[1], loc, lr_inner, lr_arg,
-			      rnmode, rnmode2, rnbitpos, lr_reversep);
+	    {
+	      gimple *point[2];
+	      if (r_mergeable == 0)
+		point[0] = point[1] = NULL;
+	      else if (r_mergeable < 0)
+		{
+		  point[0] = lr_load;
+		  point[1] = rr_load;
+		}
+	      else
+		{
+		  point[0] = rr_load;
+		  point[1] = lr_load;
+		}
+	      build_split_load (ld_arg[1], bitpos[1], bitsiz[1], toshift[1],
+				shifted[1], loc, lr_inner, lr_arg,
+				rnmode, rnmode2, rnbitpos, lr_reversep, point);
+	    }
 	  else
 	    reuse_split_load (ld_arg[1], bitpos[1], bitsiz[1], toshift[1],
 			      shifted[1], xmask[1],
@@ -8118,18 +8283,38 @@ fold_truth_andor_maybe_separate (location_t loc,
     {
       bitpos[0][0] = lnbitpos;
       bitsiz[0][0] = lnbitsize;
-      ld_arg[0][0] = make_bit_field_ref (loc, ll_inner, ll_arg,
-					 lntype, lnbitsize, lnbitpos,
-					 ll_unsignedp || rl_unsignedp,
-					 ll_reversep);
+      ld_arg[0][0] = make_bit_field_load (loc, ll_inner, ll_arg,
+					  lntype, lnbitsize, lnbitpos,
+					  ll_unsignedp || rl_unsignedp,
+					  ll_reversep,
+					  l_mergeable == 0
+					  ? NULL
+					  : l_mergeable < 0
+					  ? ll_load
+					  : rl_load);
     }
 
   if (parts > 1)
     {
       if (l_split_load)
-	build_split_load (ld_arg[0], bitpos[0], bitsiz[0], toshift[0],
-			  shifted[0], loc, ll_inner, ll_arg,
-			  lnmode, lnmode2, lnbitpos, ll_reversep);
+	    {
+	      gimple *point[2];
+	      if (l_mergeable == 0)
+		point[0] = point[1] = NULL;
+	      else if (l_mergeable < 0)
+		{
+		  point[0] = ll_load;
+		  point[1] = rl_load;
+		}
+	      else
+		{
+		  point[0] = rl_load;
+		  point[1] = ll_load;
+		}
+	      build_split_load (ld_arg[0], bitpos[0], bitsiz[0], toshift[0],
+				shifted[0], loc, ll_inner, ll_arg,
+				lnmode, lnmode2, lnbitpos, ll_reversep, point);
+	    }
       else
 	reuse_split_load (ld_arg[0], bitpos[0], bitsiz[0], toshift[0],
 			  shifted[0], xmask[0],

             reply	other threads:[~2023-07-27  9:22 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-07-27  9:22 Alexandre Oliva [this message]
  -- strict thread matches above, loose matches on Subject: below --
2023-08-18  3:55 Alexandre Oliva
2023-07-27 12:20 Alexandre Oliva
2023-07-27 11:52 Alexandre Oliva
2023-07-27 11:08 Alexandre Oliva
2023-07-27  9:05 Alexandre Oliva
2023-07-27  8:59 Alexandre Oliva
2023-07-27  8:28 Alexandre Oliva
2023-07-27  8:20 Alexandre Oliva

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230727092258.2EFA83882025@sourceware.org \
    --to=aoliva@gcc.gnu.org \
    --cc=gcc-cvs@gcc.gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).