public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] s390: Change SET rtx_cost handling.
@ 2022-02-25 11:38 Robin Dapp
  2022-02-25 17:50 ` Andreas Krebbel
  0 siblings, 1 reply; 2+ messages in thread
From: Robin Dapp @ 2022-02-25 11:38 UTC (permalink / raw)
  To: GCC Patches, Andreas Krebbel

Hi,

the IF_THEN_ELSE detection currently prevents us from properly costing
register-register moves which causes the lower-subreg pass to assume
that a VR-VR move is as expensive as two GPR-GPR moves.

This patch adds handling for SETs containing REGs as well as MEMs and is
inspired by the aarch64 implementation.

Bootstrapped and regtested on z900 up to z15. Is it OK?

Regards
 Robin

--

gcc/ChangeLog:

	* config/s390/s390.cc (s390_address_cost): Declare.
       	(s390_hard_regno_nregs): Declare.
       	(s390_rtx_costs): Add handling for REG and MEM in SET.

gcc/testsuite/ChangeLog:

       	* gcc.target/s390/vector/vec-sum-across-no-lower-subreg-1.c: New
test.


From 8c4c6f029dbf0c9db12c2877189a5ec0ce0a9c89 Mon Sep 17 00:00:00 2001
From: Robin Dapp <rdapp@linux.ibm.com>
Date: Thu, 3 Feb 2022 12:50:04 +0100
Subject: [PATCH] s390: Change SET rtx_cost handling.

The IF_THEN_ELSE detection currently prevents us from properly costing
register-register moves which causes the lower-subreg pass to assume that
a VR-VR move is as expensive as two GPR-GPR moves.

This patch adds handling for SETs containing REGs as well as MEMs and is
inspired by the aarch64 implementation.

gcc/ChangeLog:

	* config/s390/s390.cc (s390_address_cost): Declare.
       	(s390_hard_regno_nregs): Declare.
       	(s390_rtx_costs): Add handling for REG and MEM in SET.

gcc/testsuite/ChangeLog:

       	* gcc.target/s390/vector/vec-sum-across-no-lower-subreg-1.c: New
test.
---
 gcc/config/s390/s390.cc                       | 140 +++++++++++++-----
 .../vector/vec-sum-across-no-lower-subreg-1.c |  17 +++
 2 files changed, 118 insertions(+), 39 deletions(-)
 create mode 100644
gcc/testsuite/gcc.target/s390/vector/vec-sum-across-no-lower-subreg-1.c

diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
index d2af6d8813d..e647c90ab29 100644
--- a/gcc/config/s390/s390.cc
+++ b/gcc/config/s390/s390.cc
@@ -429,6 +429,14 @@ struct s390_address
    bytes on a z10 (or higher) CPU.  */
 #define PREDICT_DISTANCE (TARGET_Z10 ? 384 : 2048)

+static int
+s390_address_cost (rtx addr, machine_mode mode ATTRIBUTE_UNUSED,
+		   addr_space_t as ATTRIBUTE_UNUSED,
+		   bool speed ATTRIBUTE_UNUSED);
+
+static unsigned int
+s390_hard_regno_nregs (unsigned int regno, machine_mode mode);
+
 /* Masks per jump target register indicating which thunk need to be
    generated.  */
 static GTY(()) int indirect_branch_prez10thunk_mask = 0;
@@ -3619,50 +3627,104 @@ s390_rtx_costs (rtx x, machine_mode mode, int
outer_code,
     case MEM:
       *total = 0;
       return true;
-
     case SET:
-      {
-	/* Without this a conditional move instruction would be
-	   accounted as 3 * COSTS_N_INSNS (set, if_then_else,
-	   comparison operator).  That's a bit pessimistic.  */
+	{
+	  rtx dest = SET_DEST (x);
+	  rtx src = SET_SRC (x);

-	if (!TARGET_Z196 || GET_CODE (SET_SRC (x)) != IF_THEN_ELSE)
-	  return false;
+	  switch (GET_CODE (src))
+	    {
+	    case IF_THEN_ELSE:
+		{
+		  /* Without this a conditional move instruction would be
+		     accounted as 3 * COSTS_N_INSNS (set, if_then_else,
+		     comparison operator).  That's a bit pessimistic.  */
+
+		  if (!TARGET_Z196)
+		    return false;
+
+		  rtx cond = XEXP (src, 0);
+		  if (!CC_REG_P (XEXP (cond, 0)) || !CONST_INT_P (XEXP (cond, 1)))
+		    return false;
+
+		  /* It is going to be a load/store on condition.  Make it
+		     slightly more expensive than a normal load.  */
+		  *total = COSTS_N_INSNS (1) + 2;
+
+		  rtx dst = SET_DEST (src);
+		  rtx then = XEXP (src, 1);
+		  rtx els = XEXP (src, 2);
+
+		  /* It is a real IF-THEN-ELSE.  An additional move will be
+		     needed to implement that.  */
+		  if (!TARGET_Z15
+		      && reload_completed
+		      && !rtx_equal_p (dst, then)
+		      && !rtx_equal_p (dst, els))
+		    *total += COSTS_N_INSNS (1) / 2;
+
+		  /* A minor penalty for constants we cannot directly handle.  */
+		  if ((CONST_INT_P (then) || CONST_INT_P (els))
+		      && (!TARGET_Z13 || MEM_P (dst)
+			  || (CONST_INT_P (then) && !satisfies_constraint_K (then))
+			  || (CONST_INT_P (els) && !satisfies_constraint_K (els))))
+		    *total += COSTS_N_INSNS (1) / 2;
+
+		  /* A store on condition can only handle register src operands.  */
+		  if (MEM_P (dst) && (!REG_P (then) || !REG_P (els)))
+		    *total += COSTS_N_INSNS (1) / 2;
+
+		  return true;
+		}
+	    default:
+	      break;
+	    }

-	rtx cond = XEXP (SET_SRC (x), 0);
+	  switch (GET_CODE (dest))
+	    {
+	    case SUBREG:
+	      if (!REG_P (SUBREG_REG (dest)))
+		*total += rtx_cost (SUBREG_REG (src), VOIDmode, SET, 0, speed);
+	      /* fallthrough */
+	    case REG:
+	      /* If this is a VR -> VR copy, count the number of
+		 registers.  */
+	      if (VECTOR_MODE_P (GET_MODE (dest)) && REG_P (src))
+		{
+		  int nregs = s390_hard_regno_nregs (VR0_REGNUM, GET_MODE
+						     (dest));
+		  *total = COSTS_N_INSNS (nregs);
+		}
+	      /* Same for GPRs.  */
+	      else if (REG_P (src))
+		{
+		  int nregs = s390_hard_regno_nregs (GPR0_REGNUM, GET_MODE
+						     (dest));
+		  *total = COSTS_N_INSNS (nregs);
+		}
+	      else
+		/* Otherwise just cost the src.  */
+		*total += rtx_cost (src, mode, SET, 1, speed);
+	      return true;
+	    case MEM:
+		{
+		  rtx address = XEXP (dest, 0);
+		  rtx tmp;
+		  long tmp2;
+		  if (s390_loadrelative_operand_p (address, &tmp, &tmp2))
+		    *total = COSTS_N_INSNS (1);
+		  else
+		    *total = s390_address_cost (address, mode, 0, speed);
+		  return true;
+		}
+	    default:
+	      /* Not handled for now, assume default costs.  */
+	      *total = COSTS_N_INSNS (1);
+	      return false;
+	    }

-	if (!CC_REG_P (XEXP (cond, 0)) || !CONST_INT_P (XEXP (cond, 1)))
 	  return false;
-
-	/* It is going to be a load/store on condition.  Make it
-	   slightly more expensive than a normal load.  */
-	*total = COSTS_N_INSNS (1) + 2;
-
-	rtx dst = SET_DEST (x);
-	rtx then = XEXP (SET_SRC (x), 1);
-	rtx els = XEXP (SET_SRC (x), 2);
-
-	/* It is a real IF-THEN-ELSE.  An additional move will be
-	   needed to implement that.  */
-	if (!TARGET_Z15
-	    && reload_completed
-	    && !rtx_equal_p (dst, then)
-	    && !rtx_equal_p (dst, els))
-	  *total += COSTS_N_INSNS (1) / 2;
-
-	/* A minor penalty for constants we cannot directly handle.  */
-	if ((CONST_INT_P (then) || CONST_INT_P (els))
-	    && (!TARGET_Z13 || MEM_P (dst)
-		|| (CONST_INT_P (then) && !satisfies_constraint_K (then))
-		|| (CONST_INT_P (els) && !satisfies_constraint_K (els))))
-	  *total += COSTS_N_INSNS (1) / 2;
-
-	/* A store on condition can only handle register src operands.  */
-	if (MEM_P (dst) && (!REG_P (then) || !REG_P (els)))
-	  *total += COSTS_N_INSNS (1) / 2;
-
-	return true;
-      }
+	}
     case IOR:

       /* nnrk, nngrk */
diff --git
a/gcc/testsuite/gcc.target/s390/vector/vec-sum-across-no-lower-subreg-1.c
b/gcc/testsuite/gcc.target/s390/vector/vec-sum-across-no-lower-subreg-1.c
new file mode 100644
index 00000000000..17870f397ed
--- /dev/null
+++
b/gcc/testsuitgcc.target/s390/vector/vec-sum-across-no-lower-subreg-1.ce/gcc.target/s390/vector/vec-sum-across-no-lower-subreg-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile { target { s390*-*-* } } } */
+/* { dg-options "-O3 -mzarch -mzvector -march=z15 -fdump-rtl-subreg1" } */
+
+/* { dg-final { scan-rtl-dump-times "Skipping mode V2DI for copy
lowering" 2 "subreg1" } } */
+
+#include <vecintrin.h>
+
+#define STYPE long long
+#define VTYPE __attribute__((vector_size(16))) STYPE
+
+STYPE foo1 (VTYPE a)
+{
+  /* { dg-final { scan-assembler-not "vst\t.*" } } */
+  /* { dg-final { scan-assembler-not "lg\t.*" } } */
+  /* { dg-final { scan-assembler-not "lgr\t.*" } } */
+  return a[0] + a[1];
+}
-- 
2.23.0


^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH] s390: Change SET rtx_cost handling.
  2022-02-25 11:38 [PATCH] s390: Change SET rtx_cost handling Robin Dapp
@ 2022-02-25 17:50 ` Andreas Krebbel
  0 siblings, 0 replies; 2+ messages in thread
From: Andreas Krebbel @ 2022-02-25 17:50 UTC (permalink / raw)
  To: Robin Dapp, GCC Patches

On 2/25/22 12:38, Robin Dapp wrote:
> Hi,
> 
> the IF_THEN_ELSE detection currently prevents us from properly costing
> register-register moves which causes the lower-subreg pass to assume
> that a VR-VR move is as expensive as two GPR-GPR moves.
> 
> This patch adds handling for SETs containing REGs as well as MEMs and is
> inspired by the aarch64 implementation.
> 
> Bootstrapped and regtested on z900 up to z15. Is it OK?
> 
> Regards
>  Robin
> 
> --
> 
> gcc/ChangeLog:
> 
> 	* config/s390/s390.cc (s390_address_cost): Declare.
>        	(s390_hard_regno_nregs): Declare.
>        	(s390_rtx_costs): Add handling for REG and MEM in SET.
> 
> gcc/testsuite/ChangeLog:
> 
>        	* gcc.target/s390/vector/vec-sum-across-no-lower-subreg-1.c: New
> test.

Ok. Thanks

Andreas

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2022-02-25 17:50 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-02-25 11:38 [PATCH] s390: Change SET rtx_cost handling Robin Dapp
2022-02-25 17:50 ` Andreas Krebbel

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).