From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 7924) id 945C63858D38; Mon, 31 Jul 2023 02:49:16 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 945C63858D38 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1690771756; bh=rv9uXTqPIihwz0UAOjrkVfDHLjaytCVhxp37aTdeM6w=; h=From:To:Subject:Date:From; b=YjeZJUMlwyevkB5n1j6mfh+mC+qtvcFTjNJ08P4Zd2nHs9vzDSCax+jDsM+Y1ffXW 5UIKY0Mvo1ROG7OSr3xtOdObXwlAmMp+pPLG+zZdwAaAB0RbkhBgcfEGaQgLwKXY2T /QMEGBS5UjCqGEP/8/wDGnWJPEv/nndvVG5o32Mc= MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset="utf-8" From: Pan Li To: gcc-cvs@gcc.gnu.org Subject: [gcc r14-2871] RISC-V: Enable basic VLS auto-vectorization X-Act-Checkin: gcc X-Git-Author: Juzhe-Zhong X-Git-Refname: refs/heads/master X-Git-Oldrev: bf36656a14aa691ca674d27f26aba9de420041d7 X-Git-Newrev: 92a891e869d35c940d2a7b92355af02d78c9a86e Message-Id: <20230731024916.945C63858D38@sourceware.org> Date: Mon, 31 Jul 2023 02:49:16 +0000 (GMT) List-Id: https://gcc.gnu.org/g:92a891e869d35c940d2a7b92355af02d78c9a86e commit r14-2871-g92a891e869d35c940d2a7b92355af02d78c9a86e Author: Juzhe-Zhong Date: Mon Jul 31 10:13:57 2023 +0800 RISC-V: Enable basic VLS auto-vectorization Consider this following case: void foo (int8_t *in, int8_t *out, int8_t x) { for (int i = 0; i < 16; i++) in[i] = x; } Compile option: --param=riscv-autovec-preference=scalable -fno-builtin Before this patch: foo: li a5,16 csrr a4,vlenb vsetvli a3,zero,e8,m1,ta,ma vmv.v.x v1,a2 bleu a5,a4,.L2 mv a5,a4 .L2: vsetvli zero,a5,e8,m1,ta,ma vse8.v v1,0(a0) ret After this patch: foo: vsetivli zero,16,e8,mf8,ta,ma vmv.v.x v1,a2 vse8.v v1,0(a0) ret gcc/ChangeLog: * config/riscv/autovec-vls.md (@vec_duplicate): New pattern. * config/riscv/riscv-v.cc (autovectorize_vector_modes): Add VLS autovec support. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/v-1.c: Adapt test. * gcc.target/riscv/rvv/autovec/zve32f_zvl128b-1.c: Ditto. * gcc.target/riscv/rvv/autovec/zve64d_zvl128b-1.c: Ditto. * gcc.target/riscv/rvv/autovec/zve64f_zvl128b-1.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/dup-1.c: New test. * gcc.target/riscv/rvv/autovec/vls/dup-2.c: New test. * gcc.target/riscv/rvv/autovec/vls/dup-3.c: New test. * gcc.target/riscv/rvv/autovec/vls/dup-4.c: New test. * gcc.target/riscv/rvv/autovec/vls/dup-5.c: New test. * gcc.target/riscv/rvv/autovec/vls/dup-6.c: New test. * gcc.target/riscv/rvv/autovec/vls/dup-7.c: New test. Diff: --- gcc/config/riscv/autovec-vls.md | 19 +++ gcc/config/riscv/riscv-v.cc | 21 ++- gcc/testsuite/gcc.target/riscv/rvv/autovec/v-1.c | 2 +- .../gcc.target/riscv/rvv/autovec/vls/dup-1.c | 168 +++++++++++++++++++++ .../gcc.target/riscv/rvv/autovec/vls/dup-2.c | 153 +++++++++++++++++++ .../gcc.target/riscv/rvv/autovec/vls/dup-3.c | 153 +++++++++++++++++++ .../gcc.target/riscv/rvv/autovec/vls/dup-4.c | 137 +++++++++++++++++ .../gcc.target/riscv/rvv/autovec/vls/dup-5.c | 137 +++++++++++++++++ .../gcc.target/riscv/rvv/autovec/vls/dup-6.c | 122 +++++++++++++++ .../gcc.target/riscv/rvv/autovec/vls/dup-7.c | 122 +++++++++++++++ .../riscv/rvv/autovec/zve32f_zvl128b-1.c | 2 +- .../riscv/rvv/autovec/zve64d_zvl128b-1.c | 2 +- .../riscv/rvv/autovec/zve64f_zvl128b-1.c | 2 +- 13 files changed, 1034 insertions(+), 6 deletions(-) diff --git a/gcc/config/riscv/autovec-vls.md b/gcc/config/riscv/autovec-vls.md index 9ece317ca4e..1a64dfdd91e 100644 --- a/gcc/config/riscv/autovec-vls.md +++ b/gcc/config/riscv/autovec-vls.md @@ -139,3 +139,22 @@ "vmv%m1r.v\t%0,%1" [(set_attr "type" "vmov") (set_attr "mode" "")]) + +;; ----------------------------------------------------------------- +;; ---- Duplicate Operations +;; ----------------------------------------------------------------- + +(define_insn_and_split "@vec_duplicate" + [(set (match_operand:VLS 0 "register_operand") + (vec_duplicate:VLS + (match_operand: 1 "reg_or_int_operand")))] + "TARGET_VECTOR && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] + { + riscv_vector::emit_vlmax_insn (code_for_pred_broadcast (mode), + riscv_vector::RVV_UNOP, operands); + DONE; + } +) diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index 4cfbcf79801..0a355eb3c7a 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -2475,7 +2475,6 @@ autovectorize_vector_modes (vector_modes *modes, bool) { if (autovec_use_vlmax_p ()) { - /* TODO: We will support RVV VLS auto-vectorization mode in the future. */ poly_uint64 full_size = BYTES_PER_RISCV_VECTOR * ((int) riscv_autovec_lmul); @@ -2503,7 +2502,25 @@ autovectorize_vector_modes (vector_modes *modes, bool) modes->safe_push (mode); } } - return 0; + unsigned int flag = 0; + if (TARGET_VECTOR_VLS) + { + /* Enable VECT_COMPARE_COSTS between VLA modes VLS modes for scalable + auto-vectorization. */ + flag |= VECT_COMPARE_COSTS; + /* Push all VLSmodes according to TARGET_MIN_VLEN. */ + unsigned int i = 0; + unsigned int base_size = TARGET_MIN_VLEN * riscv_autovec_lmul / 8; + unsigned int size = base_size; + machine_mode mode; + while (size > 0 && get_vector_mode (QImode, size).exists (&mode)) + { + modes->safe_push (mode); + i++; + size = base_size / (1U << i); + } + } + return flag; } /* If the given VECTOR_MODE is an RVV mode, first get the largest number diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/v-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/v-1.c index e68d05f5f48..ebbe5e210c5 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/v-1.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/v-1.c @@ -3,4 +3,4 @@ #include "template-1.h" -/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 5 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 6 "vect" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-1.c new file mode 100644 index 00000000000..1f520f2b0a7 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-1.c @@ -0,0 +1,168 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-builtin -fno-schedule-insns -fno-schedule-insns2 --param riscv-autovec-lmul=m8" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "def.h" + +/* +** foo1: +** vsetivli\s+zero,\s*4,\s*e8,\s*mf8,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo1 (int8_t *in, int8_t *out, int8_t x) +{ + for (int i = 0; i < 4; i++) + in[i] = x; +} + +/* +** foo2: +** vsetivli\s+zero,\s*8,\s*e8,\s*mf8,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo2 (int8_t *in, int8_t *out, int8_t x) +{ + for (int i = 0; i < 8; i++) + in[i] = x; +} + +/* +** foo3: +** vsetivli\s+zero,\s*16,\s*e8,\s*mf8,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo3 (int8_t *in, int8_t *out, int8_t x) +{ + for (int i = 0; i < 16; i++) + in[i] = x; +} + +/* +** foo4: +** li\s+[a-x0-9]+,32 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo4 (int8_t *in, int8_t *out, int8_t x) +{ + for (int i = 0; i < 32; i++) + in[i] = x; +} + +/* +** foo5: +** li\s+[a-x0-9]+,64 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo5 (int8_t *in, int8_t *out, int8_t x) +{ + for (int i = 0; i < 64; i++) + in[i] = x; +} + +/* +** foo6: +** li\s+[a-x0-9]+,128 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf4,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo6 (int8_t *in, int8_t *out, int8_t x) +{ + for (int i = 0; i < 128; i++) + in[i] = x; +} + +/* +** foo7: +** li\s+[a-x0-9]+,256 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf2,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo7 (int8_t *in, int8_t *out, int8_t x) +{ + for (int i = 0; i < 256; i++) + in[i] = x; +} + +/* +** foo8: +** li\s+[a-x0-9]+,512 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*m1,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo8 (int8_t *in, int8_t *out, int8_t x) +{ + for (int i = 0; i < 512; i++) + in[i] = x; +} + +/* +** foo9: +** li\s+[a-x0-9]+,1024 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*m2,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo9 (int8_t *in, int8_t *out, int8_t x) +{ + for (int i = 0; i < 1024; i++) + in[i] = x; +} + +/* +** foo10: +** li\s+[a-x0-9]+,4096 +** addi\s+[a-x0-9]+,[a-x0-9]+,-2048 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*m4,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo10 (int8_t *in, int8_t *out, int8_t x) +{ + for (int i = 0; i < 2048; i++) + in[i] = x; +} + +/* +** foo11: +** li\s+[a-x0-9]+,4096 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*m8,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo11 (int8_t *in, int8_t *out, int8_t x) +{ + for (int i = 0; i < 4096; i++) + in[i] = x; +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-2.c new file mode 100644 index 00000000000..1a930d059c8 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-2.c @@ -0,0 +1,153 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-builtin -fno-schedule-insns -fno-schedule-insns2 --param riscv-autovec-lmul=m8" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "def.h" + +/* +** foo1: +** vsetivli\s+zero,\s*4,\s*e16,\s*mf4,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo1 (int16_t *in, int16_t *out, int16_t x) +{ + for (int i = 0; i < 4; i++) + in[i] = x; +} + +/* +** foo2: +** vsetivli\s+zero,\s*8,\s*e16,\s*mf4,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo2 (int16_t *in, int16_t *out, int16_t x) +{ + for (int i = 0; i < 8; i++) + in[i] = x; +} + +/* +** foo3: +** vsetivli\s+zero,\s*16,\s*e16,\s*mf4,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo3 (int16_t *in, int16_t *out, int16_t x) +{ + for (int i = 0; i < 16; i++) + in[i] = x; +} + +/* +** foo4: +** li\s+[a-x0-9]+,32 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*mf4,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo4 (int16_t *in, int16_t *out, int16_t x) +{ + for (int i = 0; i < 32; i++) + in[i] = x; +} + +/* +** foo5: +** li\s+[a-x0-9]+,64 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*mf4,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo5 (int16_t *in, int16_t *out, int16_t x) +{ + for (int i = 0; i < 64; i++) + in[i] = x; +} + +/* +** foo6: +** li\s+[a-x0-9]+,128 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*mf2,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo6 (int16_t *in, int16_t *out, int16_t x) +{ + for (int i = 0; i < 128; i++) + in[i] = x; +} + +/* +** foo7: +** li\s+[a-x0-9]+,256 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*m1,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo7 (int16_t *in, int16_t *out, int16_t x) +{ + for (int i = 0; i < 256; i++) + in[i] = x; +} + +/* +** foo8: +** li\s+[a-x0-9]+,512 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*m2,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo8 (int16_t *in, int16_t *out, int16_t x) +{ + for (int i = 0; i < 512; i++) + in[i] = x; +} + +/* +** foo9: +** li\s+[a-x0-9]+,1024 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*m4,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo9 (int16_t *in, int16_t *out, int16_t x) +{ + for (int i = 0; i < 1024; i++) + in[i] = x; +} + +/* +** foo10: +** li\s+[a-x0-9]+,4096 +** addi\s+[a-x0-9]+,[a-x0-9]+,-2048 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*m8,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo10 (int16_t *in, int16_t *out, int16_t x) +{ + for (int i = 0; i < 2048; i++) + in[i] = x; +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-3.c new file mode 100644 index 00000000000..46fb5a525a5 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-3.c @@ -0,0 +1,153 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-builtin -fno-schedule-insns -fno-schedule-insns2 --param riscv-autovec-lmul=m8" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "def.h" + +/* +** foo1: +** vsetivli\s+zero,\s*4,\s*e16,\s*mf4,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo1 (_Float16 *in, _Float16 *out, _Float16 x) +{ + for (int i = 0; i < 4; i++) + in[i] = x; +} + +/* +** foo2: +** vsetivli\s+zero,\s*8,\s*e16,\s*mf4,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo2 (_Float16 *in, _Float16 *out, _Float16 x) +{ + for (int i = 0; i < 8; i++) + in[i] = x; +} + +/* +** foo3: +** vsetivli\s+zero,\s*16,\s*e16,\s*mf4,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo3 (_Float16 *in, _Float16 *out, _Float16 x) +{ + for (int i = 0; i < 16; i++) + in[i] = x; +} + +/* +** foo4: +** li\s+[a-x0-9]+,32 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*mf4,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo4 (_Float16 *in, _Float16 *out, _Float16 x) +{ + for (int i = 0; i < 32; i++) + in[i] = x; +} + +/* +** foo5: +** li\s+[a-x0-9]+,64 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*mf4,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo5 (_Float16 *in, _Float16 *out, _Float16 x) +{ + for (int i = 0; i < 64; i++) + in[i] = x; +} + +/* +** foo6: +** li\s+[a-x0-9]+,128 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*mf2,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo6 (_Float16 *in, _Float16 *out, _Float16 x) +{ + for (int i = 0; i < 128; i++) + in[i] = x; +} + +/* +** foo7: +** li\s+[a-x0-9]+,256 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*m1,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo7 (_Float16 *in, _Float16 *out, _Float16 x) +{ + for (int i = 0; i < 256; i++) + in[i] = x; +} + +/* +** foo8: +** li\s+[a-x0-9]+,512 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*m2,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo8 (_Float16 *in, _Float16 *out, _Float16 x) +{ + for (int i = 0; i < 512; i++) + in[i] = x; +} + +/* +** foo9: +** li\s+[a-x0-9]+,1024 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*m4,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo9 (_Float16 *in, _Float16 *out, _Float16 x) +{ + for (int i = 0; i < 1024; i++) + in[i] = x; +} + +/* +** foo10: +** li\s+[a-x0-9]+,4096 +** addi\s+[a-x0-9]+,[a-x0-9]+,-2048 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*m8,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo10 (_Float16 *in, _Float16 *out, _Float16 x) +{ + for (int i = 0; i < 2048; i++) + in[i] = x; +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-4.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-4.c new file mode 100644 index 00000000000..7e46dc42526 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-4.c @@ -0,0 +1,137 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-builtin -fno-schedule-insns -fno-schedule-insns2 --param riscv-autovec-lmul=m8" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "def.h" + +/* +** foo1: +** vsetivli\s+zero,\s*4,\s*e32,\s*mf2,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo1 (int32_t *in, int32_t *out, int32_t x) +{ + for (int i = 0; i < 4; i++) + in[i] = x; +} + +/* +** foo2: +** vsetivli\s+zero,\s*8,\s*e32,\s*mf2,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo2 (int32_t *in, int32_t *out, int32_t x) +{ + for (int i = 0; i < 8; i++) + in[i] = x; +} + +/* +** foo3: +** vsetivli\s+zero,\s*16,\s*e32,\s*mf2,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo3 (int32_t *in, int32_t *out, int32_t x) +{ + for (int i = 0; i < 16; i++) + in[i] = x; +} + +/* +** foo4: +** li\s+[a-x0-9]+,32 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*mf2,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo4 (int32_t *in, int32_t *out, int32_t x) +{ + for (int i = 0; i < 32; i++) + in[i] = x; +} + +/* +** foo5: +** li\s+[a-x0-9]+,64 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*mf2,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo5 (int32_t *in, int32_t *out, int32_t x) +{ + for (int i = 0; i < 64; i++) + in[i] = x; +} + +/* +** foo6: +** li\s+[a-x0-9]+,128 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m1,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo6 (int32_t *in, int32_t *out, int32_t x) +{ + for (int i = 0; i < 128; i++) + in[i] = x; +} + +/* +** foo7: +** li\s+[a-x0-9]+,256 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m2,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo7 (int32_t *in, int32_t *out, int32_t x) +{ + for (int i = 0; i < 256; i++) + in[i] = x; +} + +/* +** foo8: +** li\s+[a-x0-9]+,512 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m4,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo8 (int32_t *in, int32_t *out, int32_t x) +{ + for (int i = 0; i < 512; i++) + in[i] = x; +} + +/* +** foo9: +** li\s+[a-x0-9]+,1024 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m8,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo9 (int32_t *in, int32_t *out, int32_t x) +{ + for (int i = 0; i < 1024; i++) + in[i] = x; +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-5.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-5.c new file mode 100644 index 00000000000..9b9327bdd4d --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-5.c @@ -0,0 +1,137 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-builtin -fno-schedule-insns -fno-schedule-insns2 --param riscv-autovec-lmul=m8" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "def.h" + +/* +** foo1: +** vsetivli\s+zero,\s*4,\s*e32,\s*mf2,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo1 (float *in, float *out, float x) +{ + for (int i = 0; i < 4; i++) + in[i] = x; +} + +/* +** foo2: +** vsetivli\s+zero,\s*8,\s*e32,\s*mf2,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo2 (float *in, float *out, float x) +{ + for (int i = 0; i < 8; i++) + in[i] = x; +} + +/* +** foo3: +** vsetivli\s+zero,\s*16,\s*e32,\s*mf2,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo3 (float *in, float *out, float x) +{ + for (int i = 0; i < 16; i++) + in[i] = x; +} + +/* +** foo4: +** li\s+[a-x0-9]+,32 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*mf2,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo4 (float *in, float *out, float x) +{ + for (int i = 0; i < 32; i++) + in[i] = x; +} + +/* +** foo5: +** li\s+[a-x0-9]+,64 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*mf2,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo5 (float *in, float *out, float x) +{ + for (int i = 0; i < 64; i++) + in[i] = x; +} + +/* +** foo6: +** li\s+[a-x0-9]+,128 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m1,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo6 (float *in, float *out, float x) +{ + for (int i = 0; i < 128; i++) + in[i] = x; +} + +/* +** foo7: +** li\s+[a-x0-9]+,256 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m2,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo7 (float *in, float *out, float x) +{ + for (int i = 0; i < 256; i++) + in[i] = x; +} + +/* +** foo8: +** li\s+[a-x0-9]+,512 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m4,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo8 (float *in, float *out, float x) +{ + for (int i = 0; i < 512; i++) + in[i] = x; +} + +/* +** foo9: +** li\s+[a-x0-9]+,1024 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m8,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo9 (float *in, float *out, float x) +{ + for (int i = 0; i < 1024; i++) + in[i] = x; +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-6.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-6.c new file mode 100644 index 00000000000..52d5a65b44e --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-6.c @@ -0,0 +1,122 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-builtin -fno-schedule-insns -fno-schedule-insns2 --param riscv-autovec-lmul=m8" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "def.h" + +/* +** foo1: +** vsetivli\s+zero,\s*4,\s*e64,\s*m1,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo1 (int64_t *in, int64_t *out, int64_t x) +{ + for (int i = 0; i < 4; i++) + in[i] = x; +} + +/* +** foo2: +** vsetivli\s+zero,\s*8,\s*e64,\s*m1,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo2 (int64_t *in, int64_t *out, int64_t x) +{ + for (int i = 0; i < 8; i++) + in[i] = x; +} + +/* +** foo3: +** vsetivli\s+zero,\s*16,\s*e64,\s*m1,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo3 (int64_t *in, int64_t *out, int64_t x) +{ + for (int i = 0; i < 16; i++) + in[i] = x; +} + +/* +** foo4: +** li\s+[a-x0-9]+,32 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m1,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo4 (int64_t *in, int64_t *out, int64_t x) +{ + for (int i = 0; i < 32; i++) + in[i] = x; +} + +/* +** foo5: +** li\s+[a-x0-9]+,64 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m1,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo5 (int64_t *in, int64_t *out, int64_t x) +{ + for (int i = 0; i < 64; i++) + in[i] = x; +} + +/* +** foo6: +** li\s+[a-x0-9]+,128 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m2,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo6 (int64_t *in, int64_t *out, int64_t x) +{ + for (int i = 0; i < 128; i++) + in[i] = x; +} + +/* +** foo7: +** li\s+[a-x0-9]+,256 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m4,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo7 (int64_t *in, int64_t *out, int64_t x) +{ + for (int i = 0; i < 256; i++) + in[i] = x; +} + +/* +** foo8: +** li\s+[a-x0-9]+,512 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m8,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo8 (int64_t *in, int64_t *out, int64_t x) +{ + for (int i = 0; i < 512; i++) + in[i] = x; +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-7.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-7.c new file mode 100644 index 00000000000..39f27ece2e7 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-7.c @@ -0,0 +1,122 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-builtin -fno-schedule-insns -fno-schedule-insns2 --param riscv-autovec-lmul=m8" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "def.h" + +/* +** foo1: +** vsetivli\s+zero,\s*4,\s*e64,\s*m1,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo1 (double *in, double *out, double x) +{ + for (int i = 0; i < 4; i++) + in[i] = x; +} + +/* +** foo2: +** vsetivli\s+zero,\s*8,\s*e64,\s*m1,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo2 (double *in, double *out, double x) +{ + for (int i = 0; i < 8; i++) + in[i] = x; +} + +/* +** foo3: +** vsetivli\s+zero,\s*16,\s*e64,\s*m1,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo3 (double *in, double *out, double x) +{ + for (int i = 0; i < 16; i++) + in[i] = x; +} + +/* +** foo4: +** li\s+[a-x0-9]+,32 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m1,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo4 (double *in, double *out, double x) +{ + for (int i = 0; i < 32; i++) + in[i] = x; +} + +/* +** foo5: +** li\s+[a-x0-9]+,64 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m1,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo5 (double *in, double *out, double x) +{ + for (int i = 0; i < 64; i++) + in[i] = x; +} + +/* +** foo6: +** li\s+[a-x0-9]+,128 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m2,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo6 (double *in, double *out, double x) +{ + for (int i = 0; i < 128; i++) + in[i] = x; +} + +/* +** foo7: +** li\s+[a-x0-9]+,256 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m4,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo7 (double *in, double *out, double x) +{ + for (int i = 0; i < 256; i++) + in[i] = x; +} + +/* +** foo8: +** li\s+[a-x0-9]+,512 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m8,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo8 (double *in, double *out, double x) +{ + for (int i = 0; i < 512; i++) + in[i] = x; +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32f_zvl128b-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32f_zvl128b-1.c index ecfda79e19a..345e2f963d5 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32f_zvl128b-1.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32f_zvl128b-1.c @@ -3,4 +3,4 @@ #include "template-1.h" -/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 3 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64d_zvl128b-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64d_zvl128b-1.c index 6b320ca6f38..e13c27dcdb0 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64d_zvl128b-1.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64d_zvl128b-1.c @@ -3,4 +3,4 @@ #include "template-1.h" -/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 5 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 6 "vect" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64f_zvl128b-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64f_zvl128b-1.c index ae3f066477c..e767629ae54 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64f_zvl128b-1.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64f_zvl128b-1.c @@ -3,4 +3,4 @@ #include "template-1.h" -/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 5 "vect" } } */