public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/vendors/riscv/heads/ext-dce)] Adjust test in safe_for_live_propagation. Comment fixes. Fix for FUSAGE on use side. Adjust some
@ 2023-11-16 15:15 Jeff Law
  0 siblings, 0 replies; only message in thread
From: Jeff Law @ 2023-11-16 15:15 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:3c141081ddc95f38ee9d43e98aefed5401cade16

commit 3c141081ddc95f38ee9d43e98aefed5401cade16
Author: Jeff Law <jlaw@ventanamicro.com>
Date:   Wed Nov 15 15:50:22 2023 -0700

    Adjust test in safe_for_live_propagation.  Comment fixes.  Fix for FUSAGE on use side.  Adjust some SUBREG handling

Diff:
---
 gcc/ext-dce.cc | 92 ++++++++++++++++++++++++++++++++++++++++------------------
 1 file changed, 64 insertions(+), 28 deletions(-)

diff --git a/gcc/ext-dce.cc b/gcc/ext-dce.cc
index 64f6885d484..8a63d408494 100644
--- a/gcc/ext-dce.cc
+++ b/gcc/ext-dce.cc
@@ -40,6 +40,9 @@ along with GCC; see the file COPYING3.  If not see
    bit 16..31
    bit 32..BITS_PER_WORD-1  */
 
+/* Note this pass could be used to narrow memory loads too.  It's
+   not clear if that's profitable or not in general.  */
+
 #define UNSPEC_P(X) (GET_CODE (X) == UNSPEC || GET_CODE (X) == UNSPEC_VOLATILE)
 
 /* If we know the destination of CODE only uses some low bits
@@ -50,26 +53,56 @@ along with GCC; see the file COPYING3.  If not see
 static bool
 safe_for_live_propagation (rtx_code code)
 {
+  /* First handle rtx classes which as a whole are known to
+     be either safe or unsafe.  */
+  switch (GET_RTX_CLASS (code))
+    {
+      case RTX_OBJ:
+	return true;
+
+      case RTX_COMPARE:
+      case RTX_COMM_COMPARE:
+      case RTX_TERNARY:
+	return false;
+
+      default:
+	break;
+    }
+
+  /* What's left are specific codes.  We only need to identify those
+     which are safe.   */
   switch (code)
     {
-    case REG:
+    /* These are trivially safe.  */
     case SUBREG:
-    case AND:
-    case IOR:
-    case XOR:
     case NOT:
-    case PLUS:
-    case MULT:
     case ZERO_EXTEND:
     case SIGN_EXTEND:
-
-    /* ?!? These should be double-checked.  */
-    case MINUS:
     case TRUNCATE:
+    case SS_TRUNCATE:
+    case US_TRUNCATE:
+    case PLUS:
+    case MULT:
+    case SS_MULT:
+    case US_MULT:
+    case SMUL_HIGHPART:
+    case UMUL_HIGHPART:
+    case AND:
+    case IOR:
+    case XOR:
+    case SS_PLUS:
+    case US_PLUS:
+      return true;
 
-    /* This seems wrong for the shift count.  */
+    /* We can propagate for the shifted operand, but not the shift
+       count.  The count is handled specially.  */
+    case SS_ASHIFT:
+    case US_ASHIFT:
     case ASHIFT:
       return true;
+
+    /* There may be other safe codes.  If so they can be added
+       individually when discovered.  */
     default:
       return false;
     }
@@ -226,6 +259,11 @@ ext_dce_try_optimize_insn (rtx_insn *insn, rtx set, bitmap changed_pseudos)
   rtx src = SET_SRC (set);
   rtx inner = XEXP (src, 0);
 
+  /* Avoid (subreg (mem)) and other constructs which may are valid RTL, but
+     not useful for this optimization.  */
+  if (!REG_P (inner) && !SUBREG_P (inner))
+    return;
+
   rtx new_pattern;
   if (dump_file)
     {
@@ -309,9 +347,9 @@ ext_dce_process_uses (rtx_insn *insn, bitmap livenow, bitmap live_tmp,
 	bitmap_set_range (livenow, HARD_FRAME_POINTER_REGNUM * 4, 4);
     }
 
- restart:
   subrtx_var_iterator::array_type array_var;
   rtx pat = PATTERN (insn);
+ restart:
   FOR_EACH_SUBRTX_VAR (iter, array_var, pat, NONCONST)
     {
       /* An EXPR_LIST (from call fusage) ends in NULL_RTX.  */
@@ -328,7 +366,7 @@ ext_dce_process_uses (rtx_insn *insn, bitmap livenow, bitmap live_tmp,
 	 is never safe as it can lead us to fail to process some of the
 	 RTL and thus not make objects live when necessary.  */
       enum rtx_code xcode = GET_CODE (x);
-      if (GET_CODE (x) == SET)
+      if (xcode == SET)
 	{
 	  const_rtx dst = SET_DEST (x);
 	  rtx src = SET_SRC (x);
@@ -395,16 +433,6 @@ ext_dce_process_uses (rtx_insn *insn, bitmap livenow, bitmap live_tmp,
 		  unsigned HOST_WIDE_INT src_mask
 		    = GET_MODE_MASK (GET_MODE (inner));
 
-		  /* (subreg (mem)) is technically valid RTL, but is
-		     severely discouraged.  So give up if we're about to
-		     create one.
-
-		     If this were to be loosened, then we'd still need to
-		     reject mode dependent addresses and volatile memory
-		     accesses.  */
-		  if (MEM_P (inner))
-		    continue;
-
 		  /* DST_MASK could be zero if we had something in the SET
 		     that we couldn't handle.  */
 		  if (modify && dst_mask && (dst_mask & ~src_mask) == 0)
@@ -438,8 +466,13 @@ ext_dce_process_uses (rtx_insn *insn, bitmap livenow, bitmap live_tmp,
 		 if we see something we don't know how to handle.  */
 	      for (;;)
 		{
-		  if (paradoxical_subreg_p (y))
-		    y = SUBREG_REG (y);
+		  /* Strip an outer STRICT_LOW_PART or paradoxical subreg.
+		     That has the effect of making the whole referenced
+		     register live.  We might be able to avoid that for
+		     STRICT_LOW_PART at some point.  */
+		  if (GET_CODE (x) == STRICT_LOW_PART
+		      || paradoxical_subreg_p (x))
+		    x = XEXP (x, 0);
 		  else if (SUBREG_P (y))
 		    {
 		      /* For anything but (subreg (reg)), break the inner loop
@@ -462,6 +495,10 @@ ext_dce_process_uses (rtx_insn *insn, bitmap livenow, bitmap live_tmp,
 
 		  if (REG_P (y))
 		    {
+		      /* We have found the use of a register.  We need to mark
+			 the appropriate chunks of the register live.  The mode
+			 of the REG is a starting point.  We may refine that
+			 based on what chunks in the output were live.  */
 		      rn = 4 * REGNO (y);
 		      unsigned HOST_WIDE_INT tmp_mask = dst_mask;
 
@@ -483,7 +520,7 @@ ext_dce_process_uses (rtx_insn *insn, bitmap livenow, bitmap live_tmp,
 		      /* Some operators imply their second operand
 			 is fully live, break this inner loop which
 			 will cause the iterator to descent into the
-			 sub-rtxs which should be safe.  */
+			 sub-rtxs outside the SET processing.  */
 		      if (binop_implies_op2_fully_live (code))
 			break;
 		    }
@@ -496,7 +533,7 @@ ext_dce_process_uses (rtx_insn *insn, bitmap livenow, bitmap live_tmp,
 
 		  /* If this was anything but a binary operand, break the inner
 		     loop.  This is conservatively correct as it will cause the
-		     iterator to look at the sub-rtxs.  */
+		     iterator to look at the sub-rtxs outside the SET context.  */
 		  if (!BINARY_P (src))
 		    break;
 
@@ -505,11 +542,10 @@ ext_dce_process_uses (rtx_insn *insn, bitmap livenow, bitmap live_tmp,
 		  y = XEXP (src, 1), src = pc_rtx;
 		}
 
+	      /* These are leaf nodes, no need to iterate down into them.  */
 	      if (REG_P (y) || CONSTANT_P (y))
 		iter.skip_subrtxes ();
 	    }
-	  else if (REG_P (dst))
-	    iter.substitute (src);
 	}
       /* If we are reading the low part of a SUBREG, then we can
 	 refine liveness of the input register, otherwise let the

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-11-16 15:15 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-11-16 15:15 [gcc(refs/vendors/riscv/heads/ext-dce)] Adjust test in safe_for_live_propagation. Comment fixes. Fix for FUSAGE on use side. Adjust some Jeff Law

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).