From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1895) id B510B384AB53; Fri, 26 Apr 2024 17:25:59 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org B510B384AB53 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1714152359; bh=bqHvNbn7eKMISp7rcZW84TkrQPeZp5gKJ9hpXEunlZc=; h=From:To:Subject:Date:From; b=IAT9G3M04H2wS8HljYwBoLUBF1u9JwIE7Vsqb50AwfBt09qucLm6aJdJYD6F4Mxkx odHqzyzxb9DOWmpct6Ah06VzH9VsOhycBYDOjraW85pZcXu/H85ATx8TsNjNmBF17y heNLJgKpYTioYzzCXBT/9jZNCws6wpnvNGIjl9zw= MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset="utf-8" From: Wilco Dijkstra To: gcc-cvs@gcc.gnu.org Subject: [gcc r15-7] AArch64: Remove AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS X-Act-Checkin: gcc X-Git-Author: Wilco Dijkstra X-Git-Refname: refs/heads/master X-Git-Oldrev: 5716f8daf3f2abc54ececa61350fff0af2e7ce90 X-Git-Newrev: 768fbb56b3285b2a3cf067881e745e0f8caec215 Message-Id: <20240426172559.B510B384AB53@sourceware.org> Date: Fri, 26 Apr 2024 17:25:59 +0000 (GMT) List-Id: https://gcc.gnu.org/g:768fbb56b3285b2a3cf067881e745e0f8caec215 commit r15-7-g768fbb56b3285b2a3cf067881e745e0f8caec215 Author: Wilco Dijkstra Date: Fri Apr 26 15:09:31 2024 +0100 AArch64: Remove AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS Remove the tune AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS since it is only used by an old core and doesn't properly support -Os. SPECINT_2017 shows that removing it has no performance difference, while codesize is reduced by 0.07%. gcc/ChangeLog: * config/aarch64/aarch64.cc (aarch64_mode_valid_for_sched_fusion_p): Remove check for AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS. (aarch64_advsimd_ldp_stp_p): Likewise. (aarch64_stp_sequence_cost): Likewise. (aarch64_expand_cpymem): Likewise. (aarch64_expand_setmem): Likewise. * config/aarch64/aarch64-ldp-fusion.cc (ldp_operand_mode_ok_p): Likewise. * config/aarch64/aarch64-ldpstp.md: Likewise. * config/aarch64/aarch64-tuning-flags.def: Remove NO_LDP_STP_QREGS. * config/aarch64/tuning_models/emag.h: Likewise. * config/aarch64/tuning_models/xgene1.h: Likewise. gcc/testsuite: * gcc.target/aarch64/ldp_stp_q_disable.c: Remove test. Diff: --- gcc/config/aarch64/aarch64-ldp-fusion.cc | 8 ------- gcc/config/aarch64/aarch64-ldpstp.md | 8 ++----- gcc/config/aarch64/aarch64-tuning-flags.def | 3 --- gcc/config/aarch64/aarch64.cc | 22 ++++-------------- gcc/config/aarch64/tuning_models/emag.h | 2 +- gcc/config/aarch64/tuning_models/xgene1.h | 2 +- .../gcc.target/aarch64/ldp_stp_q_disable.c | 26 ---------------------- 7 files changed, 8 insertions(+), 63 deletions(-) diff --git a/gcc/config/aarch64/aarch64-ldp-fusion.cc b/gcc/config/aarch64/aarch64-ldp-fusion.cc index 365dcf48b22..0bc225dae7b 100644 --- a/gcc/config/aarch64/aarch64-ldp-fusion.cc +++ b/gcc/config/aarch64/aarch64-ldp-fusion.cc @@ -315,17 +315,9 @@ any_post_modify_p (rtx x) static bool ldp_operand_mode_ok_p (machine_mode mode) { - const bool allow_qregs - = !(aarch64_tune_params.extra_tuning_flags - & AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS); - if (!aarch64_ldpstp_operand_mode_p (mode)) return false; - const auto size = GET_MODE_SIZE (mode).to_constant (); - if (size == 16 && !allow_qregs) - return false; - // We don't pair up TImode accesses before RA because TImode is // special in that it can be allocated to a pair of GPRs or a single // FPR, and the RA is best placed to make that decision. diff --git a/gcc/config/aarch64/aarch64-ldpstp.md b/gcc/config/aarch64/aarch64-ldpstp.md index b7c0bf05cd1..7890a8cc32b 100644 --- a/gcc/config/aarch64/aarch64-ldpstp.md +++ b/gcc/config/aarch64/aarch64-ldpstp.md @@ -96,9 +96,7 @@ (set (match_operand:VQ2 2 "register_operand" "") (match_operand:VQ2 3 "memory_operand" ""))] "TARGET_FLOAT - && aarch64_operands_ok_for_ldpstp (operands, true) - && (aarch64_tune_params.extra_tuning_flags - & AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS) == 0" + && aarch64_operands_ok_for_ldpstp (operands, true)" [(const_int 0)] { aarch64_finish_ldpstp_peephole (operands, true); @@ -111,9 +109,7 @@ (set (match_operand:VQ2 2 "memory_operand" "") (match_operand:VQ2 3 "register_operand" ""))] "TARGET_FLOAT - && aarch64_operands_ok_for_ldpstp (operands, false) - && (aarch64_tune_params.extra_tuning_flags - & AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS) == 0" + && aarch64_operands_ok_for_ldpstp (operands, false)" [(const_int 0)] { aarch64_finish_ldpstp_peephole (operands, false); diff --git a/gcc/config/aarch64/aarch64-tuning-flags.def b/gcc/config/aarch64/aarch64-tuning-flags.def index d917da720b2..d5bcaebce77 100644 --- a/gcc/config/aarch64/aarch64-tuning-flags.def +++ b/gcc/config/aarch64/aarch64-tuning-flags.def @@ -36,9 +36,6 @@ AARCH64_EXTRA_TUNING_OPTION ("rename_fma_regs", RENAME_FMA_REGS) are not considered cheap. */ AARCH64_EXTRA_TUNING_OPTION ("cheap_shift_extend", CHEAP_SHIFT_EXTEND) -/* Disallow load/store pair instructions on Q-registers. */ -AARCH64_EXTRA_TUNING_OPTION ("no_ldp_stp_qregs", NO_LDP_STP_QREGS) - AARCH64_EXTRA_TUNING_OPTION ("rename_load_regs", RENAME_LOAD_REGS) AARCH64_EXTRA_TUNING_OPTION ("cse_sve_vl_constants", CSE_SVE_VL_CONSTANTS) diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index 1beec94629d..bf5fb129b45 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -10400,9 +10400,7 @@ aarch64_mode_valid_for_sched_fusion_p (machine_mode mode) || mode == SDmode || mode == DDmode || (aarch64_vector_mode_supported_p (mode) && (known_eq (GET_MODE_SIZE (mode), 8) - || (known_eq (GET_MODE_SIZE (mode), 16) - && (aarch64_tune_params.extra_tuning_flags - & AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS) == 0))); + || known_eq (GET_MODE_SIZE (mode), 16))); } /* Return true if REGNO is a virtual pointer register, or an eliminable @@ -16519,10 +16517,6 @@ aarch64_advsimd_ldp_stp_p (enum vect_cost_for_stmt kind, return false; } - if (aarch64_tune_params.extra_tuning_flags - & AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS) - return false; - return is_gimple_assign (stmt_info->stmt); } @@ -17170,9 +17164,6 @@ aarch64_stp_sequence_cost (unsigned int count, vect_cost_for_stmt kind, /* Count 1 insn per vector if we can't form STP Q pairs. */ if (aarch64_sve_mode_p (TYPE_MODE (vectype))) return count * 2; - if (aarch64_tune_params.extra_tuning_flags - & AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS) - return count * 2; if (stmt_info) { @@ -26625,11 +26616,9 @@ aarch64_expand_cpymem (rtx *operands, bool is_memmove) return aarch64_expand_cpymem_mops (operands, is_memmove); unsigned HOST_WIDE_INT size = UINTVAL (operands[2]); - bool use_ldpq = TARGET_SIMD && !(aarch64_tune_params.extra_tuning_flags - & AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS); /* Set inline limits for memmove/memcpy. MOPS has a separate threshold. */ - unsigned max_copy_size = use_ldpq ? 256 : 128; + unsigned max_copy_size = TARGET_SIMD ? 256 : 128; unsigned mops_threshold = is_memmove ? aarch64_mops_memmove_size_threshold : aarch64_mops_memcpy_size_threshold; @@ -26834,11 +26823,8 @@ aarch64_expand_setmem (rtx *operands) /* Convert len to bits to make the rest of the code simpler. */ n = len * BITS_PER_UNIT; - /* Maximum amount to copy in one go. We allow 256-bit chunks based on the - AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS tuning parameter. */ - const int copy_limit = (aarch64_tune_params.extra_tuning_flags - & AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS) - ? GET_MODE_BITSIZE (TImode) : 256; + /* Maximum amount to copy in one go. We allow 256-bit chunks. */ + const int copy_limit = 256; while (n > 0) { diff --git a/gcc/config/aarch64/tuning_models/emag.h b/gcc/config/aarch64/tuning_models/emag.h index cbaf8853ec4..b6a9c9e2eb1 100644 --- a/gcc/config/aarch64/tuning_models/emag.h +++ b/gcc/config/aarch64/tuning_models/emag.h @@ -51,7 +51,7 @@ static const struct tune_params emag_tunings = 2, /* min_div_recip_mul_df. */ 17, /* max_case_values. */ tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */ - (AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS), /* tune_flags. */ + (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */ &xgene1_prefetch_tune, AARCH64_LDP_STP_POLICY_ALWAYS, /* ldp_policy_model. */ AARCH64_LDP_STP_POLICY_ALWAYS /* stp_policy_model. */ diff --git a/gcc/config/aarch64/tuning_models/xgene1.h b/gcc/config/aarch64/tuning_models/xgene1.h index 3301f025260..432793eba9c 100644 --- a/gcc/config/aarch64/tuning_models/xgene1.h +++ b/gcc/config/aarch64/tuning_models/xgene1.h @@ -136,7 +136,7 @@ static const struct tune_params xgene1_tunings = 2, /* min_div_recip_mul_df. */ 17, /* max_case_values. */ tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */ - (AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS), /* tune_flags. */ + (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */ &xgene1_prefetch_tune, AARCH64_LDP_STP_POLICY_ALWAYS, /* ldp_policy_model. */ AARCH64_LDP_STP_POLICY_ALWAYS /* stp_policy_model. */ diff --git a/gcc/testsuite/gcc.target/aarch64/ldp_stp_q_disable.c b/gcc/testsuite/gcc.target/aarch64/ldp_stp_q_disable.c deleted file mode 100644 index 38c1870c47c..00000000000 --- a/gcc/testsuite/gcc.target/aarch64/ldp_stp_q_disable.c +++ /dev/null @@ -1,26 +0,0 @@ -/* { dg-options "-O2 -moverride=tune=no_ldp_stp_qregs" } */ - -typedef float float32x4_t __attribute__ ((__vector_size__ ((16)))); - -float32x4_t arr[4][4]; - -void -foo (float32x4_t x, float32x4_t y) -{ - arr[0][1] = x; - arr[1][0] = y; - arr[2][0] = x; - arr[1][1] = y; - arr[0][2] = x; - arr[0][3] = y; - arr[1][2] = x; - arr[2][1] = y; - arr[3][0] = x; - arr[3][1] = y; - arr[2][2] = x; - arr[1][3] = y; - arr[2][3] = x; - arr[3][2] = y; -} - -/* { dg-final { scan-assembler-not "stp\tq\[0-9\]+, q\[0-9\]" } } */