public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH][committed] aarch64: PR target/108840 Simplify register shift RTX costs and eliminate shift amount masking
@ 2023-04-19  8:35 Kyrylo Tkachov
  0 siblings, 0 replies; only message in thread
From: Kyrylo Tkachov @ 2023-04-19  8:35 UTC (permalink / raw)
  To: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 1286 bytes --]

Hi all,

In this PR we fail to eliminate explicit &31 operations for variable shifts such as in:
void
bar (int x[3], int y)
{
  x[0] <<= (y & 31);
  x[1] <<= (y & 31);
  x[2] <<= (y & 31);
}

This is rejected by RTX costs that end up giving too high a cost for:
(set (reg:SI 96)
    (ashift:SI (reg:SI 98)
        (subreg:QI (and:SI (reg:SI 99)
                (const_int 31 [0x1f])) 0)))

There is code to handle the AND-31 case in rtx costs, but it gets confused by the subreg.
It's easy enough to fix by looking inside the subreg when costing the expression.
While doing that I noticed that the ASHIFT case and the other shift-like cases are almost identical
and we should just merge them. This code will only be used for valid insns anyway, so the code after this
patch should do the Right Thing (TM) for all such shift cases.

With this patch there are no more "and wn, wn, 31" instructions left in the testcase.

Bootstrapped and tested on aarch64-none-linux-gnu.
Pushing to trunk.
Thanks,
Kyrill

	PR target/108840

gcc/ChangeLog:

	* config/aarch64/aarch64.cc (aarch64_rtx_costs): Merge ASHIFT and
	ROTATE, ROTATERT, LSHIFTRT, ASHIFTRT cases.  Handle subregs in op1.

gcc/testsuite/ChangeLog:

	* gcc.target/aarch64/pr108840.c: New test.

[-- Attachment #2: reg-shift-and.patch --]
[-- Type: application/octet-stream, Size: 3471 bytes --]

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index f070218eca1130d5a2b8f3788ce2f2018eea22af..49caff66960f751d04e63294580d58b55678cf5d 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -14680,6 +14680,10 @@ cost_plus:
 	}
       return false;
 
+    case ROTATE:
+    case ROTATERT:
+    case LSHIFTRT:
+    case ASHIFTRT:
     case ASHIFT:
       op0 = XEXP (x, 0);
       op1 = XEXP (x, 1);
@@ -14695,8 +14699,8 @@ cost_plus:
 		}
 	      else
 		{
-		  /* LSL (immediate), UBMF, UBFIZ and friends.  These are all
-		     aliases.  */
+		  /* LSL (immediate), ASR (immediate), UBMF, UBFIZ and friends.
+		     These are all aliases.  */
 		  *cost += extra_cost->alu.shift;
 		}
 	    }
@@ -14720,9 +14724,13 @@ cost_plus:
 	  else
 	    {
 	      if (speed)
-		/* LSLV.  */
+		/* LSLV, ASRV.  */
 		*cost += extra_cost->alu.shift_reg;
 
+	       /* The register shift amount may be in a shorter mode expressed
+		  as a lowpart SUBREG.  For costing purposes just look inside.  */
+	      if (SUBREG_P (op1) && subreg_lowpart_p (op1))
+		op1 = SUBREG_REG (op1);
 	      if (GET_CODE (op1) == AND && REG_P (XEXP (op1, 0))
 		  && CONST_INT_P (XEXP (op1, 1))
 		  && known_eq (INTVAL (XEXP (op1, 1)),
@@ -14737,55 +14745,6 @@ cost_plus:
 	  return false;  /* All arguments need to be in registers.  */
         }
 
-    case ROTATE:
-    case ROTATERT:
-    case LSHIFTRT:
-    case ASHIFTRT:
-      op0 = XEXP (x, 0);
-      op1 = XEXP (x, 1);
-
-      if (CONST_INT_P (op1))
-	{
-	  /* ASR (immediate) and friends.  */
-	  if (speed)
-	    {
-	      if (VECTOR_MODE_P (mode))
-		*cost += extra_cost->vect.alu;
-	      else
-		*cost += extra_cost->alu.shift;
-	    }
-
-	  *cost += rtx_cost (op0, mode, (enum rtx_code) code, 0, speed);
-	  return true;
-	}
-      else
-	{
-	  if (VECTOR_MODE_P (mode))
-	    {
-	      if (speed)
-		/* Vector shift (register).  */
-		*cost += extra_cost->vect.alu;
-	    }
-	  else
-	    {
-	      if (speed)
-		/* ASR (register) and friends.  */
-		*cost += extra_cost->alu.shift_reg;
-
-	      if (GET_CODE (op1) == AND && REG_P (XEXP (op1, 0))
-		  && CONST_INT_P (XEXP (op1, 1))
-		  && known_eq (INTVAL (XEXP (op1, 1)),
-			       GET_MODE_BITSIZE (mode) - 1))
-		{
-		  *cost += rtx_cost (op0, mode, (rtx_code) code, 0, speed);
-		  /* We already demanded XEXP (op1, 0) to be REG_P, so
-		     don't recurse into it.  */
-		  return true;
-		}
-	    }
-	  return false;  /* All arguments need to be in registers.  */
-	}
-
     case SYMBOL_REF:
 
       if (aarch64_cmodel == AARCH64_CMODEL_LARGE
diff --git a/gcc/testsuite/gcc.target/aarch64/pr108840.c b/gcc/testsuite/gcc.target/aarch64/pr108840.c
new file mode 100644
index 0000000000000000000000000000000000000000..804c1cd915675ed3f4ad8e668548c885574fb18a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr108840.c
@@ -0,0 +1,38 @@
+/* PR target/108840.  Check that the explicit &31 is eliminated.  */
+/* { dg-do compile } */
+/* { dg-options "-O" } */
+
+int
+foo (int x, int y)
+{
+  return x << (y & 31);
+}
+
+void
+bar (int x[3], int y)
+{
+  x[0] <<= (y & 31);
+  x[1] <<= (y & 31);
+  x[2] <<= (y & 31);
+}
+
+void
+baz (int x[3], int y)
+{
+  y &= 31;
+  x[0] <<= y;
+  x[1] <<= y;
+  x[2] <<= y;
+}
+
+void corge (int, int, int);
+
+void
+qux (int x, int y, int z, int n)
+{
+  n &= 31;
+  corge (x << n, y << n, z >> n);
+}
+
+/* { dg-final { scan-assembler-not {and\tw[0-9]+, w[0-9]+, 31} } } */
+

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-04-19  8:35 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-04-19  8:35 [PATCH][committed] aarch64: PR target/108840 Simplify register shift RTX costs and eliminate shift amount masking Kyrylo Tkachov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).