public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc/devel/c++-modules] hppa: PR middle-end/87256: Improved hppa_rtx_costs avoids synth_mult madness.
@ 2020-08-28 16:03 Nathan Sidwell
  0 siblings, 0 replies; only message in thread
From: Nathan Sidwell @ 2020-08-28 16:03 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:050fc8b27a852007f8bb667999e1c8cfd31f90e1

commit 050fc8b27a852007f8bb667999e1c8cfd31f90e1
Author: Roger Sayle <roger@nextmovesoftware.com>
Date:   Tue Aug 25 19:02:45 2020 +0100

    hppa: PR middle-end/87256: Improved hppa_rtx_costs avoids synth_mult madness.
    
    This is my proposed fix to PR middle-end/87256 where synth_mult takes an
    unreasonable amount of CPU time determining an optimal sequence of
    instructions to perform multiplication by (large) integer constants on hppa.
    One workaround proposed in bugzilla, is to increase the hash table used
    to cache/reuse intermediate results. This helps but is a workaround for
    the (hidden) underlying problem.
    
    The real issue is that the hppa_rtx_costs function is providing wildly
    inaccurate values (estimates) to the middle-end.  For example, (p*q)+(r*s)
    would appear to be cheaper than a single multiplication.  Another
    example is that "(ashiftrt:di regA regB)" is claimed to be only be
    COST_N_INSNS(1) when in fact the hppa backend actually generates
    slightly more than a single instruction.
    
    It turns out that simply tightening up the logic in hppa_rtx_costs to
    return more reasonable values, dramatically reduces the number of recursive
    invocations in synth_mult for the test case in PR87256, and presumably
    also produces faster code (that should be observable in benchmarks).
    
    2020-08-25  Roger Sayle  <roger@nextmovesoftware.com>
    
    gcc/ChangeLog
            PR middle-end/87256
            * config/pa/pa.c (hppa_rtx_costs_shadd_p): New helper function
            to check for coefficients supported by shNadd and shladd,l.
            (hppa_rtx_costs):  Rewrite to avoid using estimates based upon
            FACTOR and enable recursing deeper into RTL expressions.

Diff:
---
 gcc/config/pa/pa.c | 172 ++++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 138 insertions(+), 34 deletions(-)

diff --git a/gcc/config/pa/pa.c b/gcc/config/pa/pa.c
index 07d32877154..cb888528765 100644
--- a/gcc/config/pa/pa.c
+++ b/gcc/config/pa/pa.c
@@ -1492,6 +1492,33 @@ hppa_address_cost (rtx X, machine_mode mode ATTRIBUTE_UNUSED,
     }
 }
 
+/* Return true if X represents a (possibly non-canonical) shNadd pattern.
+   The machine mode of X is known to be SImode or DImode.  */
+
+static bool
+hppa_rtx_costs_shadd_p (rtx x)
+{
+  if (GET_CODE (x) != PLUS
+      || !REG_P (XEXP (x, 1)))
+    return false;
+  rtx op0 = XEXP (x, 0);
+  if (GET_CODE (op0) == ASHIFT
+      && CONST_INT_P (XEXP (op0, 1))
+      && REG_P (XEXP (op0, 0)))
+    {
+      unsigned HOST_WIDE_INT x = UINTVAL (XEXP (op0, 1));
+      return x == 1 || x == 2 || x == 3;
+    }
+  if (GET_CODE (op0) == MULT
+      && CONST_INT_P (XEXP (op0, 1))
+      && REG_P (XEXP (op0, 0)))
+    {
+      unsigned HOST_WIDE_INT x = UINTVAL (XEXP (op0, 1));
+      return x == 2 || x == 4 || x == 8;
+    }
+  return false;
+}
+
 /* Compute a (partial) cost for rtx X.  Return true if the complete
    cost has been computed, and false if subexpressions should be
    scanned.  In either case, *TOTAL contains the cost result.  */
@@ -1499,15 +1526,16 @@ hppa_address_cost (rtx X, machine_mode mode ATTRIBUTE_UNUSED,
 static bool
 hppa_rtx_costs (rtx x, machine_mode mode, int outer_code,
 		int opno ATTRIBUTE_UNUSED,
-		int *total, bool speed ATTRIBUTE_UNUSED)
+		int *total, bool speed)
 {
-  int factor;
   int code = GET_CODE (x);
 
   switch (code)
     {
     case CONST_INT:
-      if (INTVAL (x) == 0)
+      if (outer_code == SET)
+	*total = COSTS_N_INSNS (1);
+      else if (INTVAL (x) == 0)
 	*total = 0;
       else if (INT_14_BITS (x))
 	*total = 1;
@@ -1530,32 +1558,35 @@ hppa_rtx_costs (rtx x, machine_mode mode, int outer_code,
 	  && outer_code != SET)
 	*total = 0;
       else
-        *total = 8;
+	*total = 8;
       return true;
 
     case MULT:
       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
 	{
 	  *total = COSTS_N_INSNS (3);
-	  return true;
 	}
-
-      /* A mode size N times larger than SImode needs O(N*N) more insns.  */
-      factor = GET_MODE_SIZE (mode) / 4;
-      if (factor == 0)
-	factor = 1;
-
-      if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT)
-	*total = factor * factor * COSTS_N_INSNS (8);
+      else if (mode == DImode)
+	{
+	  if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT)
+	    *total = COSTS_N_INSNS (32);
+	  else
+	    *total = COSTS_N_INSNS (80);
+	}
       else
-	*total = factor * factor * COSTS_N_INSNS (20);
-      return true;
+	{
+	  if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT)
+	    *total = COSTS_N_INSNS (8);
+	  else
+	    *total = COSTS_N_INSNS (20);
+	}
+      return REG_P (XEXP (x, 0)) && REG_P (XEXP (x, 1));
 
     case DIV:
       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
 	{
 	  *total = COSTS_N_INSNS (14);
-	  return true;
+	  return false;
 	}
       /* FALLTHRU */
 
@@ -1563,34 +1594,107 @@ hppa_rtx_costs (rtx x, machine_mode mode, int outer_code,
     case MOD:
     case UMOD:
       /* A mode size N times larger than SImode needs O(N*N) more insns.  */
-      factor = GET_MODE_SIZE (mode) / 4;
-      if (factor == 0)
-	factor = 1;
-
-      *total = factor * factor * COSTS_N_INSNS (60);
-      return true;
+      if (mode == DImode)
+	*total = COSTS_N_INSNS (240);
+      else
+	*total = COSTS_N_INSNS (60);
+      return REG_P (XEXP (x, 0)) && REG_P (XEXP (x, 1));
 
     case PLUS: /* this includes shNadd insns */
     case MINUS:
       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
+	*total = COSTS_N_INSNS (3);
+      else if (mode == DImode)
 	{
-	  *total = COSTS_N_INSNS (3);
-	  return true;
+	  if (TARGET_64BIT)
+	    {
+	      *total = COSTS_N_INSNS (1);
+	      /* Handle shladd,l instructions.  */
+	      if (hppa_rtx_costs_shadd_p (x))
+		return true;
+	    }
+	  else
+	    *total = COSTS_N_INSNS (2);
 	}
-
-      /* A size N times larger than UNITS_PER_WORD needs N times as
-	 many insns, taking N times as long.  */
-      factor = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
-      if (factor == 0)
-	factor = 1;
-      *total = factor * COSTS_N_INSNS (1);
-      return true;
+      else
+	{
+	  *total = COSTS_N_INSNS (1);
+	  /* Handle shNadd instructions.  */
+	  if (hppa_rtx_costs_shadd_p (x))
+	    return true;
+	}
+      return REG_P (XEXP (x, 0))
+	     && (REG_P (XEXP (x, 1))
+		 || CONST_INT_P (XEXP (x, 1)));
 
     case ASHIFT:
+      if (mode == DImode)
+	{
+	  if (TARGET_64BIT)
+	    *total = COSTS_N_INSNS (3);
+	  else if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
+	    {
+	      *total = COSTS_N_INSNS (2);
+	      return true;
+	    }
+	  else if (speed)
+	    *total = COSTS_N_INSNS (13);
+	  else
+	    *total = COSTS_N_INSNS (18);
+	}
+      else if (TARGET_64BIT)
+	*total = COSTS_N_INSNS (4);
+      else
+	*total = COSTS_N_INSNS (2);
+      return REG_P (XEXP (x, 0))
+	     && (REG_P (XEXP (x, 1))
+		 || CONST_INT_P (XEXP (x, 1)));
+
     case ASHIFTRT:
+      if (mode == DImode)
+	{
+	  if (TARGET_64BIT)
+	    *total = COSTS_N_INSNS (3);
+	  else if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
+	    {
+	      *total = COSTS_N_INSNS (2);
+	      return true;
+	    }
+	  else if (speed)
+	    *total = COSTS_N_INSNS (14);
+	  else
+	    *total = COSTS_N_INSNS (19);
+	}
+      else if (TARGET_64BIT)
+	*total = COSTS_N_INSNS (4);
+      else
+	*total = COSTS_N_INSNS (2);
+      return REG_P (XEXP (x, 0))
+	     && (REG_P (XEXP (x, 1))
+		 || CONST_INT_P (XEXP (x, 1)));
+
     case LSHIFTRT:
-      *total = COSTS_N_INSNS (1);
-      return true;
+      if (mode == DImode)
+	{
+	  if (TARGET_64BIT)
+	    *total = COSTS_N_INSNS (2);
+	  else if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
+	    {
+	      *total = COSTS_N_INSNS (2);
+	      return true;
+	    }
+	  else if (speed)
+	    *total = COSTS_N_INSNS (12);
+	  else
+	    *total = COSTS_N_INSNS (15);
+	}
+      else if (TARGET_64BIT)
+	*total = COSTS_N_INSNS (3);
+      else
+	*total = COSTS_N_INSNS (2);
+      return REG_P (XEXP (x, 0))
+	     && (REG_P (XEXP (x, 1))
+		 || CONST_INT_P (XEXP (x, 1)));
 
     default:
       return false;


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2020-08-28 16:03 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-08-28 16:03 [gcc/devel/c++-modules] hppa: PR middle-end/87256: Improved hppa_rtx_costs avoids synth_mult madness Nathan Sidwell

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).