public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] RISC-V: Add xfail test case for wv insn highest overlap
@ 2024-04-20 10:09 pan2.li
  2024-04-20 11:46 ` Robin Dapp
  0 siblings, 1 reply; 3+ messages in thread
From: pan2.li @ 2024-04-20 10:09 UTC (permalink / raw)
  To: gcc-patches; +Cc: juzhe.zhong, kito.cheng, rdapp.gcc, Pan Li

From: Pan Li <pan2.li@intel.com>

We reverted below patch for wv insn overlap, add the related wv
insn test and mark it as xfail.  And we will remove the xfail
after we support the register overlap in GCC-15.

7e854b58084 RISC-V: Support highest overlap for wv instructions

The below test suites are passed.
* The rv64gcv fully regression test.

gcc/testsuite/ChangeLog:

	* gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-11.c: Xfail csr check.
	* gcc.target/riscv/rvv/base/pr112431-39.c: New test.
	* gcc.target/riscv/rvv/base/pr112431-40.c: New test.
	* gcc.target/riscv/rvv/base/pr112431-41.c: New test.

Signed-off-by: Pan Li <pan2.li@intel.com>
---
 .../costmodel/riscv/rvv/dynamic-lmul8-11.c    |   2 +-
 .../gcc.target/riscv/rvv/base/pr112431-39.c   | 158 ++++++++++++++++++
 .../gcc.target/riscv/rvv/base/pr112431-40.c   |  94 +++++++++++
 .../gcc.target/riscv/rvv/base/pr112431-41.c   |  62 +++++++
 4 files changed, 315 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-39.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-40.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-41.c

diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-11.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-11.c
index c9e28251225..5a39f04b140 100644
--- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-11.c
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-11.c
@@ -40,7 +40,7 @@ void foo2 (int64_t *__restrict a,
 }
 
 /* { dg-final { scan-assembler {e64,m8} } } */
-/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-assembler-not {csrr} { xfail riscv*-*-* } } } */
 /* { dg-final { scan-tree-dump-not "Preferring smaller LMUL loop because it has unexpected spills" "vect" } } */
 /* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
 /* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-39.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-39.c
new file mode 100644
index 00000000000..770b5411666
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-39.c
@@ -0,0 +1,158 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+void
+foo (void *in, void *out, int n)
+{
+  for (int i = 0; i < n; i++)
+    {
+      asm volatile("nop" ::: "memory");
+      vint16m2_t v0 = __riscv_vle16_v_i16m2 (in, 4);in+=100;
+      v0 = __riscv_vwsub_wv_i16m2_tu (v0, v0, __riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v0, 1)), 4);
+      asm volatile("nop" ::: "memory");
+      vint16m2_t v1 = __riscv_vle16_v_i16m2 (in, 4);in+=100;
+      v1 = __riscv_vwsub_wv_i16m2_tu (v1, v1, __riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v1, 1)), 4);
+      asm volatile("nop" ::: "memory");
+      vint16m2_t v2 = __riscv_vle16_v_i16m2 (in, 4);in+=100;
+      v2 = __riscv_vwsub_wv_i16m2_tu (v2, v2, __riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v2, 1)), 4);
+      asm volatile("nop" ::: "memory");
+      vint16m2_t v3 = __riscv_vle16_v_i16m2 (in, 4);in+=100;
+      v3 = __riscv_vwsub_wv_i16m2_tu (v3, v3, __riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v3, 1)), 4);
+      asm volatile("nop" ::: "memory");
+      vint16m2_t v4 = __riscv_vle16_v_i16m2 (in, 4);in+=100;
+      v4 = __riscv_vwsub_wv_i16m2_tu (v4, v4, __riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v4, 1)), 4);
+      asm volatile("nop" ::: "memory");
+      vint16m2_t v5 = __riscv_vle16_v_i16m2 (in, 4);in+=100;
+      v5 = __riscv_vwsub_wv_i16m2_tu (v5, v5, __riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v5, 1)), 4);
+      asm volatile("nop" ::: "memory");
+      vint16m2_t v6 = __riscv_vle16_v_i16m2 (in, 4);in+=100;
+      v6 = __riscv_vwsub_wv_i16m2_tu (v6, v6, __riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v6, 1)), 4);
+      asm volatile("nop" ::: "memory");
+      vint16m2_t v7 = __riscv_vle16_v_i16m2 (in, 4);in+=100;
+      v7 = __riscv_vwsub_wv_i16m2_tu (v7, v7, __riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v7, 1)), 4);
+      asm volatile("nop" ::: "memory");
+      vint16m2_t v8 = __riscv_vle16_v_i16m2 (in, 4);in+=100;
+      v8 = __riscv_vwsub_wv_i16m2_tu (v8, v8, __riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v8, 1)), 4);
+      asm volatile("nop" ::: "memory");
+      vint16m2_t v9 = __riscv_vle16_v_i16m2 (in, 4);in+=100;
+      v9 = __riscv_vwsub_wv_i16m2_tu (v9, v9, __riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v9, 1)), 4);
+      asm volatile("nop" ::: "memory");
+      vint16m2_t v10 = __riscv_vle16_v_i16m2 (in, 4);in+=100;
+      v10 = __riscv_vwsub_wv_i16m2_tu (v10, v10, __riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v10, 1)), 4);
+      asm volatile("nop" ::: "memory");
+      vint16m2_t v11 = __riscv_vle16_v_i16m2 (in, 4);in+=100;
+      v11 = __riscv_vwsub_wv_i16m2_tu (v11, v11, __riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v11, 1)), 4);
+      asm volatile("nop" ::: "memory");
+      vint16m2_t v12 = __riscv_vle16_v_i16m2 (in, 4);in+=100;
+      v12 = __riscv_vwsub_wv_i16m2_tu (v12, v12, __riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v12, 1)), 4);
+      asm volatile("nop" ::: "memory");
+      vint16m2_t v13 = __riscv_vle16_v_i16m2 (in, 4);in+=100;
+      v13 = __riscv_vwsub_wv_i16m2_tu (v13, v13, __riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v13, 1)), 4);
+      asm volatile("nop" ::: "memory");
+      vint16m2_t v14 = __riscv_vle16_v_i16m2 (in, 4);in+=100;
+      v14 = __riscv_vwsub_wv_i16m2_tu (v14, v14, __riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v14, 1)), 4);
+      asm volatile("nop" ::: "memory");
+      vint8m1_t v15_n = __riscv_vle8_v_i8m1 (in, 4);in+=100;
+      vint16m2_t v15 = __riscv_vwcvt_x_x_v_i16m2 (v15_n, 4);
+
+      asm volatile("nop" ::: "memory");
+      __riscv_vsse16_v_i16m2 (out, 4, v0, 4);out+=100;
+      __riscv_vsse16_v_i16m2 (out, 4, v1, 4);out+=100;
+      __riscv_vsse16_v_i16m2 (out, 4, v2, 4);out+=100;
+      __riscv_vsse16_v_i16m2 (out, 4, v3, 4);out+=100;
+      __riscv_vsse16_v_i16m2 (out, 4, v4, 4);out+=100;
+      __riscv_vsse16_v_i16m2 (out, 4, v5, 4);out+=100;
+      __riscv_vsse16_v_i16m2 (out, 4, v6, 4);out+=100;
+      __riscv_vsse16_v_i16m2 (out, 4, v7, 4);out+=100;
+      __riscv_vsse16_v_i16m2 (out, 4, v8, 4);out+=100;
+      __riscv_vsse16_v_i16m2 (out, 4, v9, 4);out+=100;
+      __riscv_vsse16_v_i16m2 (out, 4, v10, 4);out+=100;
+      __riscv_vsse16_v_i16m2 (out, 4, v11, 4);out+=100;
+      __riscv_vsse16_v_i16m2 (out, 4, v12, 4);out+=100;
+      __riscv_vsse16_v_i16m2 (out, 4, v13, 4);out+=100;
+      __riscv_vsse16_v_i16m2 (out, 4, v14, 4);out+=100;
+      __riscv_vsse16_v_i16m2 (out, 4, v15, 4);out+=100;
+    }
+}
+
+void
+foo2 (void *in, void *out, int n)
+{
+  for (int i = 0; i < n; i++)
+    {
+      asm volatile("nop" ::: "memory");
+      vint16m2_t v0 = __riscv_vle16_v_i16m2 (in, 4);in+=100;
+      v0 = __riscv_vwadd_wv_i16m2_tu (v0, v0, __riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v0, 1)), 4);
+      asm volatile("nop" ::: "memory");
+      vint16m2_t v1 = __riscv_vle16_v_i16m2 (in, 4);in+=100;
+      v1 = __riscv_vwadd_wv_i16m2_tu (v1, v1, __riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v1, 1)), 4);
+      asm volatile("nop" ::: "memory");
+      vint16m2_t v2 = __riscv_vle16_v_i16m2 (in, 4);in+=100;
+      v2 = __riscv_vwadd_wv_i16m2_tu (v2, v2, __riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v2, 1)), 4);
+      asm volatile("nop" ::: "memory");
+      vint16m2_t v3 = __riscv_vle16_v_i16m2 (in, 4);in+=100;
+      v3 = __riscv_vwadd_wv_i16m2_tu (v3, v3, __riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v3, 1)), 4);
+      asm volatile("nop" ::: "memory");
+      vint16m2_t v4 = __riscv_vle16_v_i16m2 (in, 4);in+=100;
+      v4 = __riscv_vwadd_wv_i16m2_tu (v4, v4, __riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v4, 1)), 4);
+      asm volatile("nop" ::: "memory");
+      vint16m2_t v5 = __riscv_vle16_v_i16m2 (in, 4);in+=100;
+      v5 = __riscv_vwadd_wv_i16m2_tu (v5, v5, __riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v5, 1)), 4);
+      asm volatile("nop" ::: "memory");
+      vint16m2_t v6 = __riscv_vle16_v_i16m2 (in, 4);in+=100;
+      v6 = __riscv_vwadd_wv_i16m2_tu (v6, v6, __riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v6, 1)), 4);
+      asm volatile("nop" ::: "memory");
+      vint16m2_t v7 = __riscv_vle16_v_i16m2 (in, 4);in+=100;
+      v7 = __riscv_vwadd_wv_i16m2_tu (v7, v7, __riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v7, 1)), 4);
+      asm volatile("nop" ::: "memory");
+      vint16m2_t v8 = __riscv_vle16_v_i16m2 (in, 4);in+=100;
+      v8 = __riscv_vwadd_wv_i16m2_tu (v8, v8, __riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v8, 1)), 4);
+      asm volatile("nop" ::: "memory");
+      vint16m2_t v9 = __riscv_vle16_v_i16m2 (in, 4);in+=100;
+      v9 = __riscv_vwadd_wv_i16m2_tu (v9, v9, __riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v9, 1)), 4);
+      asm volatile("nop" ::: "memory");
+      vint16m2_t v10 = __riscv_vle16_v_i16m2 (in, 4);in+=100;
+      v10 = __riscv_vwadd_wv_i16m2_tu (v10, v10, __riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v10, 1)), 4);
+      asm volatile("nop" ::: "memory");
+      vint16m2_t v11 = __riscv_vle16_v_i16m2 (in, 4);in+=100;
+      v11 = __riscv_vwadd_wv_i16m2_tu (v11, v11, __riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v11, 1)), 4);
+      asm volatile("nop" ::: "memory");
+      vint16m2_t v12 = __riscv_vle16_v_i16m2 (in, 4);in+=100;
+      v12 = __riscv_vwadd_wv_i16m2_tu (v12, v12, __riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v12, 1)), 4);
+      asm volatile("nop" ::: "memory");
+      vint16m2_t v13 = __riscv_vle16_v_i16m2 (in, 4);in+=100;
+      v13 = __riscv_vwadd_wv_i16m2_tu (v13, v13, __riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v13, 1)), 4);
+      asm volatile("nop" ::: "memory");
+      vint16m2_t v14 = __riscv_vle16_v_i16m2 (in, 4);in+=100;
+      v14 = __riscv_vwadd_wv_i16m2_tu (v14, v14, __riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v14, 1)), 4);
+      asm volatile("nop" ::: "memory");
+      vint8m1_t v15_n = __riscv_vle8_v_i8m1 (in, 4);in+=100;
+      vint16m2_t v15 = __riscv_vwcvt_x_x_v_i16m2 (v15_n, 4);
+
+      asm volatile("nop" ::: "memory");
+      __riscv_vsse16_v_i16m2 (out, 4, v0, 4);out+=100;
+      __riscv_vsse16_v_i16m2 (out, 4, v1, 4);out+=100;
+      __riscv_vsse16_v_i16m2 (out, 4, v2, 4);out+=100;
+      __riscv_vsse16_v_i16m2 (out, 4, v3, 4);out+=100;
+      __riscv_vsse16_v_i16m2 (out, 4, v4, 4);out+=100;
+      __riscv_vsse16_v_i16m2 (out, 4, v5, 4);out+=100;
+      __riscv_vsse16_v_i16m2 (out, 4, v6, 4);out+=100;
+      __riscv_vsse16_v_i16m2 (out, 4, v7, 4);out+=100;
+      __riscv_vsse16_v_i16m2 (out, 4, v8, 4);out+=100;
+      __riscv_vsse16_v_i16m2 (out, 4, v9, 4);out+=100;
+      __riscv_vsse16_v_i16m2 (out, 4, v10, 4);out+=100;
+      __riscv_vsse16_v_i16m2 (out, 4, v11, 4);out+=100;
+      __riscv_vsse16_v_i16m2 (out, 4, v12, 4);out+=100;
+      __riscv_vsse16_v_i16m2 (out, 4, v13, 4);out+=100;
+      __riscv_vsse16_v_i16m2 (out, 4, v14, 4);out+=100;
+      __riscv_vsse16_v_i16m2 (out, 4, v15, 4);out+=100;
+    }
+}
+
+/* { dg-final { scan-assembler-not {vmv1r} } } */
+/* { dg-final { scan-assembler-not {vmv2r} { xfail riscv*-*-* } } } */
+/* { dg-final { scan-assembler-not {vmv4r} } } */
+/* { dg-final { scan-assembler-not {vmv8r} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-40.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-40.c
new file mode 100644
index 00000000000..f044a504fc8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-40.c
@@ -0,0 +1,94 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+void
+foo (void *in, void *out, int n)
+{
+  for (int i = 0; i < n; i++)
+    {
+      asm volatile("nop" ::: "memory");
+      vint16m4_t v0 = __riscv_vle16_v_i16m4 (in, 4);in+=100;
+      v0 = __riscv_vwsub_wv_i16m4_tu (v0, v0, __riscv_vreinterpret_v_i16m2_i8m2 (__riscv_vget_v_i16m4_i16m2 (v0, 1)), 4);
+      asm volatile("nop" ::: "memory");
+      vint16m4_t v1 = __riscv_vle16_v_i16m4 (in, 4);in+=100;
+      v1 = __riscv_vwsub_wv_i16m4_tu (v1, v1, __riscv_vreinterpret_v_i16m2_i8m2 (__riscv_vget_v_i16m4_i16m2 (v1, 1)), 4);
+      asm volatile("nop" ::: "memory");
+      vint16m4_t v2 = __riscv_vle16_v_i16m4 (in, 4);in+=100;
+      v2 = __riscv_vwsub_wv_i16m4_tu (v2, v2, __riscv_vreinterpret_v_i16m2_i8m2 (__riscv_vget_v_i16m4_i16m2 (v2, 1)), 4);
+      asm volatile("nop" ::: "memory");
+      vint16m4_t v3 = __riscv_vle16_v_i16m4 (in, 4);in+=100;
+      v3 = __riscv_vwsub_wv_i16m4_tu (v3, v3, __riscv_vreinterpret_v_i16m2_i8m2 (__riscv_vget_v_i16m4_i16m2 (v3, 1)), 4);
+      asm volatile("nop" ::: "memory");
+      vint16m4_t v4 = __riscv_vle16_v_i16m4 (in, 4);in+=100;
+      v4 = __riscv_vwsub_wv_i16m4_tu (v4, v4, __riscv_vreinterpret_v_i16m2_i8m2 (__riscv_vget_v_i16m4_i16m2 (v4, 1)), 4);
+      asm volatile("nop" ::: "memory");
+      vint16m4_t v5 = __riscv_vle16_v_i16m4 (in, 4);in+=100;
+      v5 = __riscv_vwsub_wv_i16m4_tu (v5, v5, __riscv_vreinterpret_v_i16m2_i8m2 (__riscv_vget_v_i16m4_i16m2 (v5, 1)), 4);
+      asm volatile("nop" ::: "memory");
+      vint16m4_t v6 = __riscv_vle16_v_i16m4 (in, 4);in+=100;
+      v6 = __riscv_vwsub_wv_i16m4_tu (v6, v6, __riscv_vreinterpret_v_i16m2_i8m2 (__riscv_vget_v_i16m4_i16m2 (v6, 1)), 4);
+      asm volatile("nop" ::: "memory");
+      vint8m2_t v7_n = __riscv_vle8_v_i8m2 (in, 4);in+=100;
+      vint16m4_t v7 = __riscv_vwcvt_x_x_v_i16m4 (v7_n, 4);
+
+      asm volatile("nop" ::: "memory");
+      __riscv_vsse16_v_i16m4 (out, 4, v0, 4);out+=100;
+      __riscv_vsse16_v_i16m4 (out, 4, v1, 4);out+=100;
+      __riscv_vsse16_v_i16m4 (out, 4, v2, 4);out+=100;
+      __riscv_vsse16_v_i16m4 (out, 4, v3, 4);out+=100;
+      __riscv_vsse16_v_i16m4 (out, 4, v4, 4);out+=100;
+      __riscv_vsse16_v_i16m4 (out, 4, v5, 4);out+=100;
+      __riscv_vsse16_v_i16m4 (out, 4, v6, 4);out+=100;
+      __riscv_vsse16_v_i16m4 (out, 4, v7, 4);out+=100;
+    }
+}
+
+void
+foo2 (void *in, void *out, int n)
+{
+  for (int i = 0; i < n; i++)
+    {
+      asm volatile("nop" ::: "memory");
+      vint16m4_t v0 = __riscv_vle16_v_i16m4 (in, 4);in+=100;
+      v0 = __riscv_vwadd_wv_i16m4_tu (v0, v0, __riscv_vreinterpret_v_i16m2_i8m2 (__riscv_vget_v_i16m4_i16m2 (v0, 1)), 4);
+      asm volatile("nop" ::: "memory");
+      vint16m4_t v1 = __riscv_vle16_v_i16m4 (in, 4);in+=100;
+      v1 = __riscv_vwadd_wv_i16m4_tu (v1, v1, __riscv_vreinterpret_v_i16m2_i8m2 (__riscv_vget_v_i16m4_i16m2 (v1, 1)), 4);
+      asm volatile("nop" ::: "memory");
+      vint16m4_t v2 = __riscv_vle16_v_i16m4 (in, 4);in+=100;
+      v2 = __riscv_vwadd_wv_i16m4_tu (v2, v2, __riscv_vreinterpret_v_i16m2_i8m2 (__riscv_vget_v_i16m4_i16m2 (v2, 1)), 4);
+      asm volatile("nop" ::: "memory");
+      vint16m4_t v3 = __riscv_vle16_v_i16m4 (in, 4);in+=100;
+      v3 = __riscv_vwadd_wv_i16m4_tu (v3, v3, __riscv_vreinterpret_v_i16m2_i8m2 (__riscv_vget_v_i16m4_i16m2 (v3, 1)), 4);
+      asm volatile("nop" ::: "memory");
+      vint16m4_t v4 = __riscv_vle16_v_i16m4 (in, 4);in+=100;
+      v4 = __riscv_vwadd_wv_i16m4_tu (v4, v4, __riscv_vreinterpret_v_i16m2_i8m2 (__riscv_vget_v_i16m4_i16m2 (v4, 1)), 4);
+      asm volatile("nop" ::: "memory");
+      vint16m4_t v5 = __riscv_vle16_v_i16m4 (in, 4);in+=100;
+      v5 = __riscv_vwadd_wv_i16m4_tu (v5, v5, __riscv_vreinterpret_v_i16m2_i8m2 (__riscv_vget_v_i16m4_i16m2 (v5, 1)), 4);
+      asm volatile("nop" ::: "memory");
+      vint16m4_t v6 = __riscv_vle16_v_i16m4 (in, 4);in+=100;
+      v6 = __riscv_vwadd_wv_i16m4_tu (v6, v6, __riscv_vreinterpret_v_i16m2_i8m2 (__riscv_vget_v_i16m4_i16m2 (v6, 1)), 4);
+      asm volatile("nop" ::: "memory");
+      vint8m2_t v7_n = __riscv_vle8_v_i8m2 (in, 4);in+=100;
+      vint16m4_t v7 = __riscv_vwcvt_x_x_v_i16m4 (v7_n, 4);
+
+      asm volatile("nop" ::: "memory");
+      __riscv_vsse16_v_i16m4 (out, 4, v0, 4);out+=100;
+      __riscv_vsse16_v_i16m4 (out, 4, v1, 4);out+=100;
+      __riscv_vsse16_v_i16m4 (out, 4, v2, 4);out+=100;
+      __riscv_vsse16_v_i16m4 (out, 4, v3, 4);out+=100;
+      __riscv_vsse16_v_i16m4 (out, 4, v4, 4);out+=100;
+      __riscv_vsse16_v_i16m4 (out, 4, v5, 4);out+=100;
+      __riscv_vsse16_v_i16m4 (out, 4, v6, 4);out+=100;
+      __riscv_vsse16_v_i16m4 (out, 4, v7, 4);out+=100;
+    }
+}
+
+/* { dg-final { scan-assembler-not {vmv1r} } } */
+/* { dg-final { scan-assembler-not {vmv2r} } } */
+/* { dg-final { scan-assembler-not {vmv4r} { xfail riscv*-*-* } } } */
+/* { dg-final { scan-assembler-not {vmv8r} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-41.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-41.c
new file mode 100644
index 00000000000..6bdcac82ea8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-41.c
@@ -0,0 +1,62 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+void
+foo (void *in, void *out, int n)
+{
+  for (int i = 0; i < n; i++)
+    {
+      asm volatile("nop" ::: "memory");
+      vint16m8_t v0 = __riscv_vle16_v_i16m8 (in, 4);in+=100;
+      v0 = __riscv_vwsub_wv_i16m8_tu (v0, v0, __riscv_vreinterpret_v_i16m4_i8m4 (__riscv_vget_v_i16m8_i16m4 (v0, 1)), 4);
+      asm volatile("nop" ::: "memory");
+      vint16m8_t v1 = __riscv_vle16_v_i16m8 (in, 4);in+=100;
+      v1 = __riscv_vwsub_wv_i16m8_tu (v1, v1, __riscv_vreinterpret_v_i16m4_i8m4 (__riscv_vget_v_i16m8_i16m4 (v1, 1)), 4);
+      asm volatile("nop" ::: "memory");
+      vint16m8_t v2 = __riscv_vle16_v_i16m8 (in, 4);in+=100;
+      v2 = __riscv_vwsub_wv_i16m8_tu (v2, v2, __riscv_vreinterpret_v_i16m4_i8m4 (__riscv_vget_v_i16m8_i16m4 (v2, 1)), 4);
+      asm volatile("nop" ::: "memory");
+      vint8m4_t v3_n = __riscv_vle8_v_i8m4 (in, 4);in+=100;
+      vint16m8_t v3 = __riscv_vwcvt_x_x_v_i16m8 (v3_n, 4);
+
+      asm volatile("nop" ::: "memory");
+      __riscv_vsse16_v_i16m8 (out, 4, v0, 4);out+=100;
+      __riscv_vsse16_v_i16m8 (out, 4, v1, 4);out+=100;
+      __riscv_vsse16_v_i16m8 (out, 4, v2, 4);out+=100;
+      __riscv_vsse16_v_i16m8 (out, 4, v3, 4);out+=100;
+    }
+}
+
+void
+foo2 (void *in, void *out, int n)
+{
+  for (int i = 0; i < n; i++)
+    {
+      asm volatile("nop" ::: "memory");
+      vint16m8_t v0 = __riscv_vle16_v_i16m8 (in, 4);in+=100;
+      v0 = __riscv_vwadd_wv_i16m8_tu (v0, v0, __riscv_vreinterpret_v_i16m4_i8m4 (__riscv_vget_v_i16m8_i16m4 (v0, 1)), 4);
+      asm volatile("nop" ::: "memory");
+      vint16m8_t v1 = __riscv_vle16_v_i16m8 (in, 4);in+=100;
+      v1 = __riscv_vwadd_wv_i16m8_tu (v1, v1, __riscv_vreinterpret_v_i16m4_i8m4 (__riscv_vget_v_i16m8_i16m4 (v1, 1)), 4);
+      asm volatile("nop" ::: "memory");
+      vint16m8_t v2 = __riscv_vle16_v_i16m8 (in, 4);in+=100;
+      v2 = __riscv_vwadd_wv_i16m8_tu (v2, v2, __riscv_vreinterpret_v_i16m4_i8m4 (__riscv_vget_v_i16m8_i16m4 (v2, 1)), 4);
+      asm volatile("nop" ::: "memory");
+      vint8m4_t v3_n = __riscv_vle8_v_i8m4 (in, 4);in+=100;
+      vint16m8_t v3 = __riscv_vwcvt_x_x_v_i16m8 (v3_n, 4);
+
+      asm volatile("nop" ::: "memory");
+      __riscv_vsse16_v_i16m8 (out, 4, v0, 4);out+=100;
+      __riscv_vsse16_v_i16m8 (out, 4, v1, 4);out+=100;
+      __riscv_vsse16_v_i16m8 (out, 4, v2, 4);out+=100;
+      __riscv_vsse16_v_i16m8 (out, 4, v3, 4);out+=100;
+    }
+}
+
+/* { dg-final { scan-assembler-not {vmv1r} } } */
+/* { dg-final { scan-assembler-not {vmv2r} } } */
+/* { dg-final { scan-assembler-not {vmv4r} } } */
+/* { dg-final { scan-assembler-not {vmv8r} { xfail riscv*-*-* } } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
-- 
2.34.1


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] RISC-V: Add xfail test case for wv insn highest overlap
  2024-04-20 10:09 [PATCH] RISC-V: Add xfail test case for wv insn highest overlap pan2.li
@ 2024-04-20 11:46 ` Robin Dapp
  2024-04-20 12:05   ` Li, Pan2
  0 siblings, 1 reply; 3+ messages in thread
From: Robin Dapp @ 2024-04-20 11:46 UTC (permalink / raw)
  To: pan2.li, gcc-patches; +Cc: rdapp.gcc, juzhe.zhong, kito.cheng

LGTM.

Regards
 Robin


^ permalink raw reply	[flat|nested] 3+ messages in thread

* RE: [PATCH] RISC-V: Add xfail test case for wv insn highest overlap
  2024-04-20 11:46 ` Robin Dapp
@ 2024-04-20 12:05   ` Li, Pan2
  0 siblings, 0 replies; 3+ messages in thread
From: Li, Pan2 @ 2024-04-20 12:05 UTC (permalink / raw)
  To: Robin Dapp, gcc-patches; +Cc: juzhe.zhong, kito.cheng

Committed, thanks Robin.

Pan

-----Original Message-----
From: Robin Dapp <rdapp.gcc@gmail.com> 
Sent: Saturday, April 20, 2024 7:46 PM
To: Li, Pan2 <pan2.li@intel.com>; gcc-patches@gcc.gnu.org
Cc: rdapp.gcc@gmail.com; juzhe.zhong@rivai.ai; kito.cheng@gmail.com
Subject: Re: [PATCH] RISC-V: Add xfail test case for wv insn highest overlap

LGTM.

Regards
 Robin


^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2024-04-20 12:05 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-04-20 10:09 [PATCH] RISC-V: Add xfail test case for wv insn highest overlap pan2.li
2024-04-20 11:46 ` Robin Dapp
2024-04-20 12:05   ` Li, Pan2

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).