From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 7928) id C32DC385696F; Mon, 16 Oct 2023 09:19:00 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org C32DC385696F DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1697447940; bh=aJCXpyj/jOGzzOxZv/eZojUKs0lSXX8MQJI0d5ikTmI=; h=From:To:Subject:Date:From; b=WZBeQs7k+Ox3o3cvJ8+JiDtprRM1M11MZ0KDUQn6r0hbtfm7wxT3nKZmbKXIoSmog bXfjmLe5bGIhGCj61/fK/8pr1i2sEc6GCfVDh9Gc/E0fHJGS6+V2d208NAII8PfNGR +s9PaLjapQkmJm/WmEHejeCVoal6QbSxEYotIUwI= MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset="utf-8" From: Lehua Ding To: gcc-cvs@gcc.gnu.org Subject: [gcc r14-4656] RISC-V: Use VLS modes if the NITERS is known and smaller than VLS mode elements. X-Act-Checkin: gcc X-Git-Author: Juzhe-Zhong X-Git-Refname: refs/heads/trunk X-Git-Oldrev: b7a28c0904fa67f98d7ca7e9d828fc5fc58c7078 X-Git-Newrev: a5fe9f063de64aa2755c9ae9e90ed1ff64ee0f47 Message-Id: <20231016091900.C32DC385696F@sourceware.org> Date: Mon, 16 Oct 2023 09:19:00 +0000 (GMT) List-Id: https://gcc.gnu.org/g:a5fe9f063de64aa2755c9ae9e90ed1ff64ee0f47 commit r14-4656-ga5fe9f063de64aa2755c9ae9e90ed1ff64ee0f47 Author: Juzhe-Zhong Date: Mon Oct 16 16:25:04 2023 +0800 RISC-V: Use VLS modes if the NITERS is known and smaller than VLS mode elements. void foo8 (int64_t *restrict a) { for (int i = 0; i < 16; ++i) a[i] = a[i]-16; } We use VLS modes instead of VLA modes even it is specified by dynamic LMUL. gcc/ChangeLog: * config/riscv/riscv-vector-costs.cc (costs::preferred_new_lmul_p): Use VLS modes. gcc/testsuite/ChangeLog: * gcc.dg/vect/costmodel/riscv/rvv/no-dynamic-lmul-1.c: New test. Diff: --- gcc/config/riscv/riscv-vector-costs.cc | 13 +++-- .../vect/costmodel/riscv/rvv/no-dynamic-lmul-1.c | 64 ++++++++++++++++++++++ 2 files changed, 73 insertions(+), 4 deletions(-) diff --git a/gcc/config/riscv/riscv-vector-costs.cc b/gcc/config/riscv/riscv-vector-costs.cc index 878f72c85c57..0b8903965354 100644 --- a/gcc/config/riscv/riscv-vector-costs.cc +++ b/gcc/config/riscv/riscv-vector-costs.cc @@ -446,10 +446,6 @@ costs::preferred_new_lmul_p (const vector_costs *uncast_other) const auto other_loop_vinfo = as_a (other->m_vinfo); class loop *loop = LOOP_VINFO_LOOP (this_loop_vinfo); - if (!LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (this_loop_vinfo) - && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (other_loop_vinfo)) - return false; - if (loop_autovec_infos.get (loop) && loop_autovec_infos.get (loop)->end_p) return false; else if (loop_autovec_infos.get (loop)) @@ -483,6 +479,15 @@ costs::preferred_new_lmul_p (const vector_costs *uncast_other) const machine_mode biggest_mode = compute_local_live_ranges (program_points_per_bb, live_ranges_per_bb); + /* If we can use simple VLS modes to handle NITERS element. + We don't need to use VLA modes with partial vector auto-vectorization. */ + if (LOOP_VINFO_NITERS_KNOWN_P (this_loop_vinfo) + && known_le (tree_to_poly_int64 (LOOP_VINFO_NITERS (this_loop_vinfo)) + * GET_MODE_SIZE (biggest_mode).to_constant (), + (int) RVV_M8 * BYTES_PER_RISCV_VECTOR) + && pow2p_hwi (LOOP_VINFO_INT_NITERS (this_loop_vinfo))) + return vector_costs::better_main_loop_than_p (other); + /* Update live ranges according to PHI. */ update_local_live_ranges (other->m_vinfo, program_points_per_bb, live_ranges_per_bb); diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/no-dynamic-lmul-1.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/no-dynamic-lmul-1.c new file mode 100644 index 000000000000..7ede148396fe --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/no-dynamic-lmul-1.c @@ -0,0 +1,64 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv_zvl4096b -mabi=lp64d -fdump-tree-vect-details" } */ + +#include + +void +foo (int8_t *restrict a) +{ + for (int i = 0; i < 4096; ++i) + a[i] = a[i]-16; +} + +void +foo2 (int16_t *restrict a) +{ + for (int i = 0; i < 2048; ++i) + a[i] = a[i]-16; +} + +void +foo3 (int32_t *restrict a) +{ + for (int i = 0; i < 1024; ++i) + a[i] = a[i]-16; +} + +void +foo4 (int64_t *restrict a) +{ + for (int i = 0; i < 512; ++i) + a[i] = a[i]-16; +} + +void +foo5 (int8_t *restrict a) +{ + for (int i = 0; i < 16; ++i) + a[i] = a[i]-16; +} + +void +foo6 (int16_t *restrict a) +{ + for (int i = 0; i < 16; ++i) + a[i] = a[i]-16; +} + +void +foo7 (int32_t *restrict a) +{ + for (int i = 0; i < 16; ++i) + a[i] = a[i]-16; +} + +void +foo8 (int64_t *restrict a) +{ + for (int i = 0; i < 16; ++i) + a[i] = a[i]-16; +} + +/* { dg-final { scan-tree-dump-not "Maximum lmul" "vect" } } */ +/* { dg-final { scan-assembler-times {vsetvli} 4 } } */ +/* { dg-final { scan-assembler-times {vsetivli} 4 } } */