From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 2017) id 4546A3857029; Mon, 29 Aug 2022 09:18:15 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 4546A3857029 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1661764695; bh=kjBsCbpD3GDJh8hbmsTnbXzh4Mc6/0L80KCV9hFpFrI=; h=From:To:Subject:Date:From; b=u36JbK5MGkT2Le2N72dVAsTUz6AfT+wjrjgqvFlfN+bTi/RcfZWeYqVHOmmgw82M9 HpLYNU/h+Pzy4LMGU076XEyMB3kRJLYXaMGKPwz/OrY+Bqyl4k2KoCj7JKlk2gTdJ0 fIREOe6Gay+cKUaL7/BOQeIyT2ZX/fAAbmFcDlcw= MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset="utf-8" From: Robin Dapp To: gcc-cvs@gcc.gnu.org Subject: [gcc r13-2251] s390: Change SET rtx_cost handling. X-Act-Checkin: gcc X-Git-Author: Robin Dapp X-Git-Refname: refs/heads/master X-Git-Oldrev: 34895b6606561a36ffded81efee2904feb9c1bfd X-Git-Newrev: 1930c5d05ceff2173a32f23e5dbd0738460f521e Message-Id: <20220829091815.4546A3857029@sourceware.org> Date: Mon, 29 Aug 2022 09:18:15 +0000 (GMT) List-Id: https://gcc.gnu.org/g:1930c5d05ceff2173a32f23e5dbd0738460f521e commit r13-2251-g1930c5d05ceff2173a32f23e5dbd0738460f521e Author: Robin Dapp Date: Thu Feb 3 12:50:04 2022 +0100 s390: Change SET rtx_cost handling. The IF_THEN_ELSE detection currently prevents us from properly costing register-register moves which causes the lower-subreg pass to assume that a VR-VR move is as expensive as two GPR-GPR moves. This patch adds handling for SETs containing REGs as well as MEMs and is inspired by the aarch64 implementation. gcc/ChangeLog: * config/s390/s390.cc (s390_address_cost): Declare. (s390_hard_regno_nregs): Declare. (s390_rtx_costs): Add handling for REG and MEM in SET. gcc/testsuite/ChangeLog: * gcc.target/s390/vector/vec-sum-across-no-lower-subreg-1.c: New test. Diff: --- gcc/config/s390/s390.cc | 130 +++++++++++++++------ .../s390/vector/vec-sum-across-no-lower-subreg-1.c | 18 +++ 2 files changed, 111 insertions(+), 37 deletions(-) diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc index a10e9a7b957..076c97a8b22 100644 --- a/gcc/config/s390/s390.cc +++ b/gcc/config/s390/s390.cc @@ -430,6 +430,14 @@ struct s390_address bytes on a z10 (or higher) CPU. */ #define PREDICT_DISTANCE (TARGET_Z10 ? 384 : 2048) +static int +s390_address_cost (rtx addr, machine_mode mode ATTRIBUTE_UNUSED, + addr_space_t as ATTRIBUTE_UNUSED, + bool speed ATTRIBUTE_UNUSED); + +static unsigned int +s390_hard_regno_nregs (unsigned int regno, machine_mode mode); + /* Masks per jump target register indicating which thunk need to be generated. */ static GTY(()) int indirect_branch_prez10thunk_mask = 0; @@ -3639,49 +3647,97 @@ s390_rtx_costs (rtx x, machine_mode mode, int outer_code, case MEM: *total = 0; return true; + case SET: { + rtx dest = SET_DEST (x); + rtx src = SET_SRC (x); - case SET: - { - /* Without this a conditional move instruction would be - accounted as 3 * COSTS_N_INSNS (set, if_then_else, - comparison operator). That's a bit pessimistic. */ + switch (GET_CODE (src)) + { + case IF_THEN_ELSE: { + /* Without this a conditional move instruction would be + accounted as 3 * COSTS_N_INSNS (set, if_then_else, + comparison operator). That's a bit pessimistic. */ - if (!TARGET_Z196 || GET_CODE (SET_SRC (x)) != IF_THEN_ELSE) - return false; + if (!TARGET_Z196) + return false; - rtx cond = XEXP (SET_SRC (x), 0); + rtx cond = XEXP (src, 0); + if (!CC_REG_P (XEXP (cond, 0)) || !CONST_INT_P (XEXP (cond, 1))) + return false; - if (!CC_REG_P (XEXP (cond, 0)) || !CONST_INT_P (XEXP (cond, 1))) - return false; + /* It is going to be a load/store on condition. Make it + slightly more expensive than a normal load. */ + *total = COSTS_N_INSNS (1) + 2; - /* It is going to be a load/store on condition. Make it - slightly more expensive than a normal load. */ - *total = COSTS_N_INSNS (1) + 2; - - rtx dst = SET_DEST (x); - rtx then = XEXP (SET_SRC (x), 1); - rtx els = XEXP (SET_SRC (x), 2); - - /* It is a real IF-THEN-ELSE. An additional move will be - needed to implement that. */ - if (!TARGET_Z15 - && reload_completed - && !rtx_equal_p (dst, then) - && !rtx_equal_p (dst, els)) - *total += COSTS_N_INSNS (1) / 2; - - /* A minor penalty for constants we cannot directly handle. */ - if ((CONST_INT_P (then) || CONST_INT_P (els)) - && (!TARGET_Z13 || MEM_P (dst) - || (CONST_INT_P (then) && !satisfies_constraint_K (then)) - || (CONST_INT_P (els) && !satisfies_constraint_K (els)))) - *total += COSTS_N_INSNS (1) / 2; - - /* A store on condition can only handle register src operands. */ - if (MEM_P (dst) && (!REG_P (then) || !REG_P (els))) - *total += COSTS_N_INSNS (1) / 2; + rtx dst = SET_DEST (src); + rtx then = XEXP (src, 1); + rtx els = XEXP (src, 2); - return true; + /* It is a real IF-THEN-ELSE. An additional move will be + needed to implement that. */ + if (!TARGET_Z15 && reload_completed && !rtx_equal_p (dst, then) + && !rtx_equal_p (dst, els)) + *total += COSTS_N_INSNS (1) / 2; + + /* A minor penalty for constants we cannot directly handle. */ + if ((CONST_INT_P (then) || CONST_INT_P (els)) + && (!TARGET_Z13 || MEM_P (dst) + || (CONST_INT_P (then) && !satisfies_constraint_K (then)) + || (CONST_INT_P (els) && !satisfies_constraint_K (els)))) + *total += COSTS_N_INSNS (1) / 2; + + /* A store on condition can only handle register src operands. */ + if (MEM_P (dst) && (!REG_P (then) || !REG_P (els))) + *total += COSTS_N_INSNS (1) / 2; + + return true; + } + default: + break; + } + + switch (GET_CODE (dest)) + { + case SUBREG: + if (!REG_P (SUBREG_REG (dest))) + *total += rtx_cost (SUBREG_REG (src), VOIDmode, SET, 0, speed); + /* fallthrough */ + case REG: + /* If this is a VR -> VR copy, count the number of + registers. */ + if (VECTOR_MODE_P (GET_MODE (dest)) && REG_P (src)) + { + int nregs = s390_hard_regno_nregs (VR0_REGNUM, GET_MODE (dest)); + *total = COSTS_N_INSNS (nregs); + } + /* Same for GPRs. */ + else if (REG_P (src)) + { + int nregs + = s390_hard_regno_nregs (GPR0_REGNUM, GET_MODE (dest)); + *total = COSTS_N_INSNS (nregs); + } + else + /* Otherwise just cost the src. */ + *total += rtx_cost (src, mode, SET, 1, speed); + return true; + case MEM: { + rtx address = XEXP (dest, 0); + rtx tmp; + long tmp2; + if (s390_loadrelative_operand_p (address, &tmp, &tmp2)) + *total = COSTS_N_INSNS (1); + else + *total = s390_address_cost (address, mode, 0, speed); + return true; + } + default: + /* Not handled for now, assume default costs. */ + *total = COSTS_N_INSNS (1); + return false; + } + + return false; } case IOR: diff --git a/gcc/testsuite/gcc.target/s390/vector/vec-sum-across-no-lower-subreg-1.c b/gcc/testsuite/gcc.target/s390/vector/vec-sum-across-no-lower-subreg-1.c new file mode 100644 index 00000000000..8d63e8c8595 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/vector/vec-sum-across-no-lower-subreg-1.c @@ -0,0 +1,18 @@ +/* { dg-do compile { target { s390*-*-* } } } */ +/* { dg-options "-O3 -mzarch -mzvector -march=z15 -fdump-rtl-subreg1" } */ + +/* { dg-final { scan-rtl-dump-times "Skipping mode V2DI for copy lowering" 2 "subreg1" } } */ + +#include + +#define STYPE long long +#define VTYPE __attribute__ ((vector_size (16))) STYPE + +STYPE +foo1 (VTYPE a) +{ + /* { dg-final { scan-assembler-not "vst\t.*" } } */ + /* { dg-final { scan-assembler-not "lg\t.*" } } */ + /* { dg-final { scan-assembler-not "lgr\t.*" } } */ + return a[0] + a[1]; +}