public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: juzhe.zhong@rivai.ai
To: gcc-patches@gcc.gnu.org
Cc: kito.cheng@gmail.com, palmer@dabbelt.com,
	Juzhe-Zhong <juzhe.zhong@rivai.ai>
Subject: [GCC14 QUEUE PATCH] RISC-V: Fix RVV register order
Date: Fri, 24 Mar 2023 14:42:22 +0800	[thread overview]
Message-ID: <20230324064222.205360-1-juzhe.zhong@rivai.ai> (raw)

From: Juzhe-Zhong <juzhe.zhong@rivai.ai>

This patch fixes the issue of incorrect reigster order of RVV.
The new register order is coming from kito original RVV GCC implementation.

Consider this case:
void f (void *base,void *base2,void *out,size_t vl, int n)
{
    vuint64m8_t bindex = __riscv_vle64_v_u64m8 (base + 100, vl);
    for (int i = 0; i < n; i++){
      vbool8_t m = __riscv_vlm_v_b8 (base + i, vl);
      vuint64m8_t v = __riscv_vluxei64_v_u64m8_m(m,base,bindex,vl);
      vuint64m8_t v2 = __riscv_vle64_v_u64m8_tu (v, base2 + i, vl);
      vint8m1_t v3 = __riscv_vluxei64_v_i8m1_m(m,base,v,vl);
      vint8m1_t v4 = __riscv_vluxei64_v_i8m1_m(m,base,v2,vl);
      __riscv_vse8_v_i8m1 (out + 100*i,v3,vl);
      __riscv_vse8_v_i8m1 (out + 222*i,v4,vl);
    }
}

Before this patch:
f:
        csrr    t0,vlenb
        slli    t1,t0,3
        sub     sp,sp,t1
        addi    a5,a0,100
        vsetvli zero,a3,e64,m8,ta,ma
        vle64.v v24,0(a5)
        vs8r.v  v24,0(sp)
        ble     a4,zero,.L1
        mv      a6,a0
        add     a4,a4,a0
        mv      a5,a2
.L3:
        vsetvli zero,zero,e64,m8,ta,ma
        vl8re64.v       v24,0(sp)
        vlm.v   v0,0(a6)
        vluxei64.v      v24,(a0),v24,v0.t
        addi    a6,a6,1
        vsetvli zero,zero,e8,m1,tu,ma
        vmv8r.v v16,v24
        vluxei64.v      v8,(a0),v24,v0.t
        vle64.v v16,0(a1)
        vluxei64.v      v24,(a0),v16,v0.t
        vse8.v  v8,0(a2)
        vse8.v  v24,0(a5)
        addi    a1,a1,1
        addi    a2,a2,100
        addi    a5,a5,222
        bne     a4,a6,.L3
.L1:
        csrr    t0,vlenb
        slli    t1,t0,3
        add     sp,sp,t1
        jr      ra

After this patch:
f:
        addi    a5,a0,100
        vsetvli zero,a3,e64,m8,ta,ma
        vle64.v v24,0(a5)
        ble     a4,zero,.L1
        mv      a6,a0
        add     a4,a4,a0
        mv      a5,a2
.L3:
        vsetvli zero,zero,e64,m8,ta,ma
        vlm.v   v0,0(a6)
        addi    a6,a6,1
        vluxei64.v      v8,(a0),v24,v0.t
        vsetvli zero,zero,e8,m1,tu,ma
        vmv8r.v v16,v8
        vluxei64.v      v2,(a0),v8,v0.t
        vle64.v v16,0(a1)
        vluxei64.v      v1,(a0),v16,v0.t
        vse8.v  v2,0(a2)
        vse8.v  v1,0(a5)
        addi    a1,a1,1
        addi    a2,a2,100
        addi    a5,a5,222
        bne     a4,a6,.L3
.L1:
        ret

The redundant register spillings is eliminated.
However, there is one more issue need to be addressed which is the redundant 
move instruction "vmv8r.v". This is another story, and it will be fixed by another
patch (Fine tune RVV machine description RA constraint).

gcc/ChangeLog:

        * config/riscv/riscv.h (enum reg_class): Fix RVV register order.

gcc/testsuite/ChangeLog:

        * gcc.target/riscv/rvv/base/spill-4.c: Adapt testcase.
        * gcc.target/riscv/rvv/base/spill-6.c: Adapt testcase.
        * gcc.target/riscv/rvv/base/reg_order-1.c: New test.

---
 gcc/config/riscv/riscv.h                      | 13 ++++----
 .../gcc.target/riscv/rvv/base/reg_order-1.c   | 20 ++++++++++++
 .../gcc.target/riscv/rvv/base/spill-4.c       | 32 +++++++++----------
 .../gcc.target/riscv/rvv/base/spill-6.c       | 16 +++++-----
 4 files changed, 50 insertions(+), 31 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/reg_order-1.c

diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h
index 66fb07d6652..13038a39e5c 100644
--- a/gcc/config/riscv/riscv.h
+++ b/gcc/config/riscv/riscv.h
@@ -553,13 +553,12 @@ enum reg_class
   60, 61, 62, 63,							\
   /* Call-saved FPRs.  */						\
   40, 41, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,			\
-  /* V24 ~ V31.  */							\
-  120, 121, 122, 123, 124, 125, 126, 127,				\
-  /* V8 ~ V23.  */							\
-  104, 105, 106, 107, 108, 109, 110, 111,				\
-  112, 113, 114, 115, 116, 117, 118, 119,				\
-  /* V0 ~ V7.  */							\
-  96, 97, 98, 99, 100, 101, 102, 103,					\
+  /* v1 ~ v31 vector registers.  */					\
+  97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110,	\
+  111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123,	\
+  124, 125, 126, 127,							\
+  /* The vector mask register.  */					\
+  96,									\
   /* None of the remaining classes have defined call-saved		\
      registers.  */							\
   64, 65, 66, 67							\
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/reg_order-1.c b/gcc/testsuite/gcc.target/riscv/rvv/base/reg_order-1.c
new file mode 100644
index 00000000000..b33f9141286
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/reg_order-1.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+void f (void *base,void *base2,void *out,size_t vl, int n)
+{
+    vuint64m8_t bindex = __riscv_vle64_v_u64m8 (base + 100, vl);
+    for (int i = 0; i < n; i++){
+      vbool8_t m = __riscv_vlm_v_b8 (base + i, vl);
+      vuint64m8_t v = __riscv_vluxei64_v_u64m8_m(m,base,bindex,vl);
+      vuint64m8_t v2 = __riscv_vle64_v_u64m8_tu (v, base2 + i, vl);
+      vint8m1_t v3 = __riscv_vluxei64_v_i8m1_m(m,base,v,vl);
+      vint8m1_t v4 = __riscv_vluxei64_v_i8m1_m(m,base,v2,vl);
+      __riscv_vse8_v_i8m1 (out + 100*i,v3,vl);
+      __riscv_vse8_v_i8m1 (out + 222*i,v4,vl);
+    }
+}
+
+/* { dg-final { scan-assembler-not {csrr} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-4.c b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-4.c
index 83c80b0b045..ad7592f30bc 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-4.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-4.c
@@ -10,7 +10,7 @@
 **  csrr\tt0,vlenb
 **  sub\tsp,sp,t0
 **  ...
-**  vs1r.v\tv24,0\(sp\)
+**  vs1r.v\tv[0-9]+,0\(sp\)
 **  ...
 **  vl1re64.v\tv2,0\(sp\)
 **  vs1r.v\tv2,0\(a1\)
@@ -34,7 +34,7 @@ spill_4 (int64_t *in, int64_t *out)
 **  slli\tt1,t0,1
 **  sub\tsp,sp,t1
 **  ...
-**  vs2r.v\tv24,0\(sp\)
+**  vs2r.v\tv[0-9]+,0\(sp\)
 **  ...
 **  vl2re64.v\tv4,0\(sp\)
 **  vs2r.v\tv4,0\(a1\)
@@ -58,10 +58,10 @@ spill_5 (int64_t *in, int64_t *out)
 **  slli\tt1,t0,2
 **  sub\tsp,sp,t1
 **  ...
-**  vs4r.v\tv24,0\(sp\)
+**  vs4r.v\tv[0-9]+,0\(sp\)
 **  ...
-**  vl4re64.v\tv8,0\(sp\)
-**  vs4r.v\tv8,0\(a1\)
+**  vl4re64.v\tv[0-9]+,0\(sp\)
+**  vs4r.v\tv[0-9]+,0\(a1\)
 **  ...
 **  jr\tra
 */
@@ -82,10 +82,10 @@ spill_6 (int64_t *in, int64_t *out)
 **  slli\tt1,t0,3
 **  sub\tsp,sp,t1
 **  ...
-**  vs8r.v\tv24,0\(sp\)
+**  vs8r.v\tv[0-9]+,0\(sp\)
 **  ...
-**  vl8re64.v\tv16,0\(sp\)
-**  vs8r.v\tv16,0\(a1\)
+**  vl8re64.v\tv[0-9]+,0\(sp\)
+**  vs8r.v\tv[0-9]+,0\(a1\)
 **  ...
 **  jr\tra
 */
@@ -105,7 +105,7 @@ spill_7 (int64_t *in, int64_t *out)
 **  csrr\tt0,vlenb
 **  sub\tsp,sp,t0
 **  ...
-**  vs1r.v\tv24,0\(sp\)
+**  vs1r.v\tv[0-9]+,0\(sp\)
 **  ...
 **  vl1re64.v\tv2,0\(sp\)
 **  vs1r.v\tv2,0\(a1\)
@@ -129,7 +129,7 @@ spill_11 (uint64_t *in, uint64_t *out)
 **  slli\tt1,t0,1
 **  sub\tsp,sp,t1
 **  ...
-**  vs2r.v\tv24,0\(sp\)
+**  vs2r.v\tv[0-9]+,0\(sp\)
 **  ...
 **  vl2re64.v\tv4,0\(sp\)
 **  vs2r.v\tv4,0\(a1\)
@@ -153,10 +153,10 @@ spill_12 (uint64_t *in, uint64_t *out)
 **  slli\tt1,t0,2
 **  sub\tsp,sp,t1
 **  ...
-**  vs4r.v\tv24,0\(sp\)
+**  vs4r.v\tv[0-9]+,0\(sp\)
 **  ...
-**  vl4re64.v\tv8,0\(sp\)
-**  vs4r.v\tv8,0\(a1\)
+**  vl4re64.v\tv[0-9]+,0\(sp\)
+**  vs4r.v\tv[0-9]+,0\(a1\)
 **  ...
 **  jr\tra
 */
@@ -177,10 +177,10 @@ spill_13 (uint64_t *in, uint64_t *out)
 **  slli\tt1,t0,3
 **  sub\tsp,sp,t1
 **  ...
-**  vs8r.v\tv24,0\(sp\)
+**  vs8r.v\tv[0-9]+,0\(sp\)
 **  ...
-**  vl8re64.v\tv16,0\(sp\)
-**  vs8r.v\tv16,0\(a1\)
+**  vl8re64.v\tv[0-9]+,0\(sp\)
+**  vs8r.v\tv[0-9]+,0\(a1\)
 **  ...
 **  jr\tra
 */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-6.c b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-6.c
index 340029da88b..07eee61baa3 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-6.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-6.c
@@ -10,10 +10,10 @@
 **  csrr\tt0,vlenb
 **  sub\tsp,sp,t0
 **  ...
-**  vs1r.v\tv24,0\(sp\)
+**  vs1r.v\tv[0-9]+,0\(sp\)
 **  ...
-**  vl1re64.v\tv2,0\(sp\)
-**  vs1r.v\tv2,0\(a1\)
+**  vl1re64.v\tv[0-9]+,0\(sp\)
+**  vs1r.v\tv[0-9]+,0\(a1\)
 **  ...
 **  jr\tra
 */
@@ -34,7 +34,7 @@ spill_4 (double *in, double *out)
 **  slli\tt1,t0,1
 **  sub\tsp,sp,t1
 **  ...
-**  vs2r.v\tv24,0\(sp\)
+**  vs2r.v\tv[0-9]+,0\(sp\)
 **  ...
 **  vl2re64.v\tv4,0\(sp\)
 **  vs2r.v\tv4,0\(a1\)
@@ -58,7 +58,7 @@ spill_5 (double *in, double *out)
 **  slli\tt1,t0,2
 **  sub\tsp,sp,t1
 **  ...
-**  vs4r.v\tv24,0\(sp\)
+**  vs4r.v\tv[0-9]+,0\(sp\)
 **  ...
 **  vl4re64.v\tv8,0\(sp\)
 **  vs4r.v\tv8,0\(a1\)
@@ -82,10 +82,10 @@ spill_6 (double *in, double *out)
 **  slli\tt1,t0,3
 **  sub\tsp,sp,t1
 **  ...
-**  vs8r.v\tv24,0\(sp\)
+**  vs8r.v\tv[0-9]+,0\(sp\)
 **  ...
-**  vl8re64.v\tv16,0\(sp\)
-**  vs8r.v\tv16,0\(a1\)
+**  vl8re64.v\tv[0-9]+,0\(sp\)
+**  vs8r.v\tv[0-9]+,0\(a1\)
 **  ...
 **  jr\tra
 */
-- 
2.36.1


             reply	other threads:[~2023-03-24  6:42 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-03-24  6:42 juzhe.zhong [this message]
2023-03-24  6:57 juzhe.zhong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230324064222.205360-1-juzhe.zhong@rivai.ai \
    --to=juzhe.zhong@rivai.ai \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=kito.cheng@gmail.com \
    --cc=palmer@dabbelt.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).