public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r13-6634] RISC-V: Fine tune RA constraint for narrow instructions
@ 2023-03-13 16:25 Kito Cheng
  0 siblings, 0 replies; only message in thread
From: Kito Cheng @ 2023-03-13 16:25 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:7ff57009bcc728044ba2de339ecd16721d48aba3

commit r13-6634-g7ff57009bcc728044ba2de339ecd16721d48aba3
Author: Ju-Zhe Zhong <juzhe.zhong@rivai.ai>
Date:   Fri Mar 10 11:02:05 2023 +0800

    RISC-V: Fine tune RA constraint for narrow instructions
    
    According to RVV ISA, for narrow instructions:
    
    The destination EEW is smaller than the source EEW and the overlap is
    in the lowest-numbered part of the source register group.
    (e.g., when LMUL=1, vnsrl.wi v0, v0, 3 is legal, but a destination of v1 is not).
    
    We should allow narrow instructions partially overlap base on the rule of RVV ISA above
    so that we could exploit the useage of vector registers.
    
    Consider these cases:
    https://godbolt.org/z/o6sc4eqGj
    
    some cases in LLVM have redundant move instructions,
    some cases in LLVM have redundant register spillings.
    
    Now after this patch, GCC can have perfect RA && codegen for different pressure RA cases.
    
    gcc/ChangeLog:
    
            * config/riscv/vector.md: Fine tune RA constraints.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/riscv/rvv/base/narrow_constraint-1.c: New test.
            * gcc.target/riscv/rvv/base/narrow_constraint-10.c: New test.
            * gcc.target/riscv/rvv/base/narrow_constraint-11.c: New test.
            * gcc.target/riscv/rvv/base/narrow_constraint-2.c: New test.
            * gcc.target/riscv/rvv/base/narrow_constraint-3.c: New test.
            * gcc.target/riscv/rvv/base/narrow_constraint-4.c: New test.
            * gcc.target/riscv/rvv/base/narrow_constraint-5.c: New test.
            * gcc.target/riscv/rvv/base/narrow_constraint-6.c: New test.
            * gcc.target/riscv/rvv/base/narrow_constraint-7.c: New test.
            * gcc.target/riscv/rvv/base/narrow_constraint-8.c: New test.
            * gcc.target/riscv/rvv/base/narrow_constraint-9.c: New test.

Diff:
---
 gcc/config/riscv/vector.md                         | 168 ++++-----
 .../riscv/rvv/base/narrow_constraint-1.c           | 319 +++++++++++++++++
 .../riscv/rvv/base/narrow_constraint-10.c          | 293 +++++++++++++++
 .../riscv/rvv/base/narrow_constraint-11.c          | 293 +++++++++++++++
 .../riscv/rvv/base/narrow_constraint-2.c           | 370 +++++++++++++++++++
 .../riscv/rvv/base/narrow_constraint-3.c           | 392 +++++++++++++++++++++
 .../riscv/rvv/base/narrow_constraint-4.c           | 319 +++++++++++++++++
 .../riscv/rvv/base/narrow_constraint-5.c           | 319 +++++++++++++++++
 .../riscv/rvv/base/narrow_constraint-6.c           | 369 +++++++++++++++++++
 .../riscv/rvv/base/narrow_constraint-7.c           | 392 +++++++++++++++++++++
 .../riscv/rvv/base/narrow_constraint-8.c           | 319 +++++++++++++++++
 .../riscv/rvv/base/narrow_constraint-9.c           | 319 +++++++++++++++++
 12 files changed, 3788 insertions(+), 84 deletions(-)

diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index a4a68b67e24..178d2950493 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -3058,42 +3058,42 @@
 ;; lowest-numbered part of the source register group
 ;; e.g, when LMUL = 1, vnsrl.wi v0,v0,3 is legal but a destination of v1 is not.
 (define_insn "@pred_narrow_<optab><mode>"
-  [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand"           "=vd, vr,  &vr, vd, vr,  &vr")
+  [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand"           "=vd,vd, vr, vr,vd, vr,  &vr,  &vr, vd, vr,  &vr,  &vr")
 	(if_then_else:<V_DOUBLE_TRUNC>
 	  (unspec:<VM>
-	    [(match_operand:<VM> 1 "vector_mask_operand"               " vm,Wc1,vmWc1, vm,Wc1,vmWc1")
-	     (match_operand 5 "vector_length_operand"                  " rK, rK,   rK, rK, rK,   rK")
-	     (match_operand 6 "const_int_operand"                      "  i,  i,    i,  i,  i,    i")
-	     (match_operand 7 "const_int_operand"                      "  i,  i,    i,  i,  i,    i")
-	     (match_operand 8 "const_int_operand"                      "  i,  i,    i,  i,  i,    i")
+	    [(match_operand:<VM> 1 "vector_mask_operand"               " vm,vm,Wc1,Wc1,vm,Wc1,vmWc1,vmWc1, vm,Wc1,vmWc1,vmWc1")
+	     (match_operand 5 "vector_length_operand"                  " rK,rK, rK, rK,rK, rK,   rK,   rK, rK, rK,   rK,   rK")
+	     (match_operand 6 "const_int_operand"                      "  i, i,  i,  i, i,  i,    i,    i,  i,  i,    i,    i")
+	     (match_operand 7 "const_int_operand"                      "  i, i,  i,  i, i,  i,    i,    i,  i,  i,    i,    i")
+	     (match_operand 8 "const_int_operand"                      "  i, i,  i,  i, i,  i,    i,    i,  i,  i,    i,    i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
 	  (truncate:<V_DOUBLE_TRUNC>
 	    (any_shiftrt:VWEXTI
-	     (match_operand:VWEXTI 3 "register_operand"                "  0,  0,   vr,  0,  0,   vr")
-	     (match_operand:<V_DOUBLE_TRUNC> 4 "vector_shift_operand"  " vr, vr,   vr, vk, vk,   vk")))
-	  (match_operand:<V_DOUBLE_TRUNC> 2 "vector_merge_operand"     "0vu,0vu,  0vu,0vu,0vu,  0vu")))]
+	     (match_operand:VWEXTI 3 "register_operand"                " vr,vr, vr, vr, 0,  0,   vr,   vr,  0,  0,   vr,   vr")
+	     (match_operand:<V_DOUBLE_TRUNC> 4 "vector_shift_operand"  "  0, 0,  0,  0,vr, vr,   vr,   vr, vk, vk,   vk,   vk")))
+	  (match_operand:<V_DOUBLE_TRUNC> 2 "vector_merge_operand"     "  0,vu,  0, vu,vu, vu,   vu,    0, vu, vu,   vu,    0")))]
   "TARGET_VECTOR"
   "vn<insn>.w%o4\t%0,%3,%v4%p1"
   [(set_attr "type" "vnshift")
    (set_attr "mode" "<V_DOUBLE_TRUNC>")])
 
 (define_insn "@pred_narrow_<optab><mode>_scalar"
-  [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand"           "=vd, vr,  &vr, vd, vr,  &vr")
+  [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand"           "=vd, vd, vr, vr,  &vr,  &vr")
 	(if_then_else:<V_DOUBLE_TRUNC>
 	  (unspec:<VM>
-	    [(match_operand:<VM> 1 "vector_mask_operand"               " vm,Wc1,vmWc1, vm,Wc1,vmWc1")
-	     (match_operand 5 "vector_length_operand"                  " rK, rK,   rK, rK, rK,   rK")
-	     (match_operand 6 "const_int_operand"                      "  i,  i,    i,  i,  i,    i")
-	     (match_operand 7 "const_int_operand"                      "  i,  i,    i,  i,  i,    i")
-	     (match_operand 8 "const_int_operand"                      "  i,  i,    i,  i,  i,    i")
+	    [(match_operand:<VM> 1 "vector_mask_operand"               " vm, vm,Wc1,Wc1,vmWc1,vmWc1")
+	     (match_operand 5 "vector_length_operand"                  " rK, rK, rK, rK,   rK,   rK")
+	     (match_operand 6 "const_int_operand"                      "  i,  i,  i,  i,    i,    i")
+	     (match_operand 7 "const_int_operand"                      "  i,  i,  i,  i,    i,    i")
+	     (match_operand 8 "const_int_operand"                      "  i,  i,  i,  i,    i,    i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
 	  (truncate:<V_DOUBLE_TRUNC>
 	    (any_shiftrt:VWEXTI
-	     (match_operand:VWEXTI 3 "register_operand"                "  0,  0,   vr,  0,  0,   vr")
-	     (match_operand 4 "pmode_reg_or_uimm5_operand"             "  r,  r,    r,  K,  K,    K")))
-	  (match_operand:<V_DOUBLE_TRUNC> 2 "vector_merge_operand"     "0vu,0vu,  0vu,0vu,0vu,  0vu")))]
+	     (match_operand:VWEXTI 3 "register_operand"                "  0,  0,  0,  0,   vr,   vr")
+	     (match_operand 4 "pmode_reg_or_uimm5_operand"             " rK, rK, rK, rK,   rK,   rK")))
+	  (match_operand:<V_DOUBLE_TRUNC> 2 "vector_merge_operand"     " vu,  0, vu,  0,   vu,    0")))]
   "TARGET_VECTOR"
   "vn<insn>.w%o4\t%0,%3,%4%p1"
   [(set_attr "type" "vnshift")
@@ -3101,19 +3101,19 @@
 
 ;; vncvt.x.x.w
 (define_insn "@pred_trunc<mode>"
-  [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand"           "=vd, vr,  &vr")
+  [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand"           "=vd, vd, vr, vr,  &vr,  &vr")
 	(if_then_else:<V_DOUBLE_TRUNC>
 	  (unspec:<VM>
-	    [(match_operand:<VM> 1 "vector_mask_operand"               " vm,Wc1,vmWc1")
-	     (match_operand 4 "vector_length_operand"                  " rK, rK,   rK")
-	     (match_operand 5 "const_int_operand"                      "  i,  i,    i")
-	     (match_operand 6 "const_int_operand"                      "  i,  i,    i")
-	     (match_operand 7 "const_int_operand"                      "  i,  i,    i")
+	    [(match_operand:<VM> 1 "vector_mask_operand"               " vm, vm,Wc1,Wc1,vmWc1,vmWc1")
+	     (match_operand 4 "vector_length_operand"                  " rK, rK, rK, rK,   rK,   rK")
+	     (match_operand 5 "const_int_operand"                      "  i,  i,  i,  i,    i,    i")
+	     (match_operand 6 "const_int_operand"                      "  i,  i,  i,  i,    i,    i")
+	     (match_operand 7 "const_int_operand"                      "  i,  i,  i,  i,    i,    i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
 	  (truncate:<V_DOUBLE_TRUNC>
-	    (match_operand:VWEXTI 3 "register_operand"                 "  0,  0,   vr"))
-	  (match_operand:<V_DOUBLE_TRUNC> 2 "vector_merge_operand"     "0vu,0vu,  0vu")))]
+	    (match_operand:VWEXTI 3 "register_operand"                 "  0,  0,  0,  0,   vr,   vr"))
+	  (match_operand:<V_DOUBLE_TRUNC> 2 "vector_merge_operand"     " vu,  0, vu,  0,   vu,    0")))]
   "TARGET_VECTOR"
   "vncvt.x.x.w\t%0,%3%p1"
   [(set_attr "type" "vnshift")
@@ -3496,40 +3496,40 @@
 
 ;; CLIP
 (define_insn "@pred_narrow_clip<v_su><mode>"
-  [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand"           "=vd, vr,  &vr, vd, vr,  &vr")
+  [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand"           "=vd,vd, vr, vr,vd, vr,  &vr,  &vr, vd, vr,  &vr,  &vr")
 	(if_then_else:<V_DOUBLE_TRUNC>
 	  (unspec:<VM>
-	    [(match_operand:<VM> 1 "vector_mask_operand"               " vm,Wc1,vmWc1, vm,Wc1,vmWc1")
-	     (match_operand 5 "vector_length_operand"                  " rK, rK,   rK, rK, rK,   rK")
-	     (match_operand 6 "const_int_operand"                      "  i,  i,    i,  i,  i,    i")
-	     (match_operand 7 "const_int_operand"                      "  i,  i,    i,  i,  i,    i")
-	     (match_operand 8 "const_int_operand"                      "  i,  i,    i,  i,  i,    i")
+	    [(match_operand:<VM> 1 "vector_mask_operand"               " vm,vm,Wc1,Wc1,vm,Wc1,vmWc1,vmWc1, vm,Wc1,vmWc1,vmWc1")
+	     (match_operand 5 "vector_length_operand"                  " rK,rK, rK, rK,rK, rK,   rK,   rK, rK, rK,   rK,   rK")
+	     (match_operand 6 "const_int_operand"                      "  i, i,  i,  i, i,  i,    i,    i,  i,  i,    i,    i")
+	     (match_operand 7 "const_int_operand"                      "  i, i,  i,  i, i,  i,    i,    i,  i,  i,    i,    i")
+	     (match_operand 8 "const_int_operand"                      "  i, i,  i,  i, i,  i,    i,    i,  i,  i,    i,    i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
 	  (unspec:<V_DOUBLE_TRUNC>
-	    [(match_operand:VWEXTI 3 "register_operand"                 "  0,  0,   vr,  0,  0,   vr")
-	     (match_operand:<V_DOUBLE_TRUNC> 4 "vector_shift_operand"   " vr, vr,   vr, vk, vk,   vk")] VNCLIP)
-	  (match_operand:<V_DOUBLE_TRUNC> 2 "vector_merge_operand"      "0vu,0vu,  0vu,0vu,0vu,  0vu")))]
+	    [(match_operand:VWEXTI 3 "register_operand"                " vr,vr, vr, vr, 0,  0,   vr,   vr,  0,  0,   vr,   vr")
+	     (match_operand:<V_DOUBLE_TRUNC> 4 "vector_shift_operand"  "  0, 0,  0,  0,vr, vr,   vr,   vr, vk, vk,   vk,   vk")] VNCLIP)
+	  (match_operand:<V_DOUBLE_TRUNC> 2 "vector_merge_operand"     "  0,vu,  0, vu,vu, vu,   vu,    0, vu, vu,   vu,    0")))]
   "TARGET_VECTOR"
   "vnclip<v_su>.w%o4\t%0,%3,%v4%p1"
   [(set_attr "type" "vnclip")
    (set_attr "mode" "<V_DOUBLE_TRUNC>")])
 
 (define_insn "@pred_narrow_clip<v_su><mode>_scalar"
-  [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand"           "=vd, vr,  &vr, vd, vr,  &vr")
+  [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand"           "=vd, vd, vr, vr,  &vr,  &vr")
 	(if_then_else:<V_DOUBLE_TRUNC>
 	  (unspec:<VM>
-	    [(match_operand:<VM> 1 "vector_mask_operand"               " vm,Wc1,vmWc1, vm,Wc1,vmWc1")
-	     (match_operand 5 "vector_length_operand"                  " rK, rK,   rK, rK, rK,   rK")
-	     (match_operand 6 "const_int_operand"                      "  i,  i,    i,  i,  i,    i")
-	     (match_operand 7 "const_int_operand"                      "  i,  i,    i,  i,  i,    i")
-	     (match_operand 8 "const_int_operand"                      "  i,  i,    i,  i,  i,    i")
+	    [(match_operand:<VM> 1 "vector_mask_operand"               " vm, vm,Wc1,Wc1,vmWc1,vmWc1")
+	     (match_operand 5 "vector_length_operand"                  " rK, rK, rK, rK,   rK,   rK")
+	     (match_operand 6 "const_int_operand"                      "  i,  i,  i,  i,    i,    i")
+	     (match_operand 7 "const_int_operand"                      "  i,  i,  i,  i,    i,    i")
+	     (match_operand 8 "const_int_operand"                      "  i,  i,  i,  i,    i,    i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
 	  (unspec:<V_DOUBLE_TRUNC>
-	    [(match_operand:VWEXTI 3 "register_operand"                "  0,  0,   vr,  0,  0,   vr")
-	     (match_operand 4 "pmode_reg_or_uimm5_operand"             "  r,  r,    r,  K,  K,    K")] VNCLIP)
-	  (match_operand:<V_DOUBLE_TRUNC> 2 "vector_merge_operand"     "0vu,0vu,  0vu,0vu,0vu,  0vu")))]
+	    [(match_operand:VWEXTI 3 "register_operand"                "  0,  0,  0,  0,   vr,   vr")
+	     (match_operand 4 "pmode_reg_or_uimm5_operand"             " rK, rK, rK, rK,   rK,   rK")] VNCLIP)
+	  (match_operand:<V_DOUBLE_TRUNC> 2 "vector_merge_operand"     " vu,  0, vu,  0,   vu,    0")))]
   "TARGET_VECTOR"
   "vnclip<v_su>.w%o4\t%0,%3,%4%p1"
   [(set_attr "type" "vnclip")
@@ -6342,96 +6342,96 @@
 ;; -------------------------------------------------------------------------------
 
 (define_insn "@pred_narrow_fcvt_x<v_su>_f<mode>"
-  [(set (match_operand:<VNCONVERT> 0 "register_operand"        "=vd, vr, ?&vr")
+  [(set (match_operand:<VNCONVERT> 0 "register_operand"        "=vd, vd, vr, vr,  &vr,  &vr")
 	(if_then_else:<VNCONVERT>
 	  (unspec:<VM>
-	    [(match_operand:<VM> 1 "vector_mask_operand"       " vm,Wc1,vmWc1")
-	     (match_operand 4 "vector_length_operand"          " rK, rK,   rK")
-	     (match_operand 5 "const_int_operand"              "  i,  i,    i")
-	     (match_operand 6 "const_int_operand"              "  i,  i,    i")
-	     (match_operand 7 "const_int_operand"              "  i,  i,    i")
+	    [(match_operand:<VM> 1 "vector_mask_operand"       " vm, vm,Wc1,Wc1,vmWc1,vmWc1")
+	     (match_operand 4 "vector_length_operand"          " rK, rK, rK, rK,   rK,   rK")
+	     (match_operand 5 "const_int_operand"              "  i,  i,  i,  i,    i,    i")
+	     (match_operand 6 "const_int_operand"              "  i,  i,  i,  i,    i,    i")
+	     (match_operand 7 "const_int_operand"              "  i,  i,  i,  i,    i,    i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
 	  (unspec:<VNCONVERT>
-	     [(match_operand:VF 3 "register_operand"           "  0,  0,   vr")] VFCVTS)
-	  (match_operand:<VNCONVERT> 2 "vector_merge_operand"  "0vu,0vu,  0vu")))]
+	     [(match_operand:VF 3 "register_operand"           "  0,  0,  0,  0,   vr,   vr")] VFCVTS)
+	  (match_operand:<VNCONVERT> 2 "vector_merge_operand"  " vu,  0, vu,  0,   vu,    0")))]
   "TARGET_VECTOR"
   "vfncvt.x<v_su>.f.w\t%0,%3%p1"
   [(set_attr "type" "vfncvtftoi")
    (set_attr "mode" "<VNCONVERT>")])
 
 (define_insn "@pred_narrow_<fix_cvt><mode>"
-  [(set (match_operand:<VNCONVERT> 0 "register_operand"        "=vd, vr, ?&vr")
+  [(set (match_operand:<VNCONVERT> 0 "register_operand"        "=vd, vd, vr, vr,  &vr,  &vr")
 	(if_then_else:<VNCONVERT>
 	  (unspec:<VM>
-	    [(match_operand:<VM> 1 "vector_mask_operand"      "  vm,Wc1,vmWc1")
-	     (match_operand 4 "vector_length_operand"         "  rK, rK,   rK")
-	     (match_operand 5 "const_int_operand"             "   i,  i,    i")
-	     (match_operand 6 "const_int_operand"             "   i,  i,    i")
-	     (match_operand 7 "const_int_operand"             "   i,  i,    i")
+	    [(match_operand:<VM> 1 "vector_mask_operand"      " vm, vm,Wc1,Wc1,vmWc1,vmWc1")
+	     (match_operand 4 "vector_length_operand"         " rK, rK, rK, rK,   rK,   rK")
+	     (match_operand 5 "const_int_operand"             "  i,  i,  i,  i,    i,    i")
+	     (match_operand 6 "const_int_operand"             "  i,  i,  i,  i,    i,    i")
+	     (match_operand 7 "const_int_operand"             "  i,  i,  i,  i,    i,    i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
 	  (any_fix:<VNCONVERT>
-	     (match_operand:VF 3 "register_operand"           "   0,  0,   vr"))
-	  (match_operand:<VNCONVERT> 2 "vector_merge_operand" " 0vu,0vu,  0vu")))]
+	     (match_operand:VF 3 "register_operand"           "  0,  0,  0,  0,   vr,   vr"))
+	  (match_operand:<VNCONVERT> 2 "vector_merge_operand" " vu,  0, vu,  0,   vu,    0")))]
   "TARGET_VECTOR"
   "vfncvt.rtz.x<u>.f.w\t%0,%3%p1"
   [(set_attr "type" "vfncvtftoi")
    (set_attr "mode" "<VNCONVERT>")])
 
 (define_insn "@pred_narrow_<float_cvt><mode>"
-  [(set (match_operand:<VNCONVERT> 0 "register_operand"       "=vd, vr, ?&vr")
+  [(set (match_operand:<VNCONVERT> 0 "register_operand"       "=vd, vd, vr, vr,  &vr,  &vr")
 	(if_then_else:<VNCONVERT>
 	  (unspec:<VM>
-	    [(match_operand:<VM> 1 "vector_mask_operand"      " vm,Wc1,vmWc1")
-	     (match_operand 4 "vector_length_operand"         " rK, rK,   rK")
-	     (match_operand 5 "const_int_operand"             "  i,  i,    i")
-	     (match_operand 6 "const_int_operand"             "  i,  i,    i")
-	     (match_operand 7 "const_int_operand"             "  i,  i,    i")
+	    [(match_operand:<VM> 1 "vector_mask_operand"      " vm, vm,Wc1,Wc1,vmWc1,vmWc1")
+	     (match_operand 4 "vector_length_operand"         " rK, rK, rK, rK,   rK,   rK")
+	     (match_operand 5 "const_int_operand"             "  i,  i,  i,  i,    i,    i")
+	     (match_operand 6 "const_int_operand"             "  i,  i,  i,  i,    i,    i")
+	     (match_operand 7 "const_int_operand"             "  i,  i,  i,  i,    i,    i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
 	  (any_float:<VNCONVERT>
-	     (match_operand:VWCONVERTI 3 "register_operand"   "  0,  0,   vr"))
-	  (match_operand:<VNCONVERT> 2 "vector_merge_operand" "0vu,0vu,  0vu")))]
+	     (match_operand:VWCONVERTI 3 "register_operand"   "  0,  0,  0,  0,   vr,   vr"))
+	  (match_operand:<VNCONVERT> 2 "vector_merge_operand" " vu,  0, vu,  0,   vu,    0")))]
   "TARGET_VECTOR"
   "vfncvt.f.x<u>.w\t%0,%3%p1"
   [(set_attr "type" "vfncvtitof")
    (set_attr "mode" "<VNCONVERT>")])
 
 (define_insn "@pred_trunc<mode>"
-  [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand"       "=vd, vr, ?&vr")
+  [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand"       "=vd, vd, vr, vr,  &vr,  &vr")
 	(if_then_else:<V_DOUBLE_TRUNC>
 	  (unspec:<VM>
-	    [(match_operand:<VM> 1 "vector_mask_operand"           " vm,Wc1,vmWc1")
-	     (match_operand 4 "vector_length_operand"              " rK, rK,   rK")
-	     (match_operand 5 "const_int_operand"                  "  i,  i,    i")
-	     (match_operand 6 "const_int_operand"                  "  i,  i,    i")
-	     (match_operand 7 "const_int_operand"                  "  i,  i,    i")
+	    [(match_operand:<VM> 1 "vector_mask_operand"           " vm, vm,Wc1,Wc1,vmWc1,vmWc1")
+	     (match_operand 4 "vector_length_operand"              " rK, rK, rK, rK,   rK,   rK")
+	     (match_operand 5 "const_int_operand"                  "  i,  i,  i,  i,    i,    i")
+	     (match_operand 6 "const_int_operand"                  "  i,  i,  i,  i,    i,    i")
+	     (match_operand 7 "const_int_operand"                  "  i,  i,  i,  i,    i,    i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
 	  (float_truncate:<V_DOUBLE_TRUNC>
-	     (match_operand:VWEXTF 3 "register_operand"            "  0,  0,   vr"))
-	  (match_operand:<V_DOUBLE_TRUNC> 2 "vector_merge_operand" "0vu,0vu,  0vu")))]
+	     (match_operand:VWEXTF 3 "register_operand"            "  0,  0,  0,  0,   vr,   vr"))
+	  (match_operand:<V_DOUBLE_TRUNC> 2 "vector_merge_operand" " vu,  0, vu,  0,   vu,    0")))]
   "TARGET_VECTOR"
   "vfncvt.f.f.w\t%0,%3%p1"
   [(set_attr "type" "vfncvtftof")
    (set_attr "mode" "<V_DOUBLE_TRUNC>")])
 
 (define_insn "@pred_rod_trunc<mode>"
-  [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand"       "=vd, vr, ?&vr")
+  [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand"       "=vd, vd, vr, vr,  &vr,  &vr")
 	(if_then_else:<V_DOUBLE_TRUNC>
 	  (unspec:<VM>
-	    [(match_operand:<VM> 1 "vector_mask_operand"           " vm,Wc1,vmWc1")
-	     (match_operand 4 "vector_length_operand"              " rK, rK,   rK")
-	     (match_operand 5 "const_int_operand"                  "  i,  i,    i")
-	     (match_operand 6 "const_int_operand"                  "  i,  i,    i")
-	     (match_operand 7 "const_int_operand"                  "  i,  i,    i")
+	    [(match_operand:<VM> 1 "vector_mask_operand"           " vm, vm,Wc1,Wc1,vmWc1,vmWc1")
+	     (match_operand 4 "vector_length_operand"              " rK, rK, rK, rK,   rK,   rK")
+	     (match_operand 5 "const_int_operand"                  "  i,  i,  i,  i,    i,    i")
+	     (match_operand 6 "const_int_operand"                  "  i,  i,  i,  i,    i,    i")
+	     (match_operand 7 "const_int_operand"                  "  i,  i,  i,  i,    i,    i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
 	  (unspec:<V_DOUBLE_TRUNC>
 	    [(float_truncate:<V_DOUBLE_TRUNC>
-	       (match_operand:VWEXTF 3 "register_operand"            "  0,  0,   vr"))] UNSPEC_ROD)
-	  (match_operand:<V_DOUBLE_TRUNC> 2 "vector_merge_operand" "0vu,0vu,  0vu")))]
+	       (match_operand:VWEXTF 3 "register_operand"          "  0,  0,  0,  0,   vr,   vr"))] UNSPEC_ROD)
+	  (match_operand:<V_DOUBLE_TRUNC> 2 "vector_merge_operand" " vu,  0, vu,  0,   vu,    0")))]
   "TARGET_VECTOR"
   "vfncvt.rod.f.f.w\t%0,%3%p1"
   [(set_attr "type" "vfncvtftof")
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/narrow_constraint-1.c b/gcc/testsuite/gcc.target/riscv/rvv/base/narrow_constraint-1.c
new file mode 100644
index 00000000000..0cdf60cde06
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/narrow_constraint-1.c
@@ -0,0 +1,319 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+
+#include "riscv_vector.h"
+
+void f0 (int16_t *base,int8_t *out,size_t vl)
+{
+    vint16mf4_t src = __riscv_vle16_v_i16mf4 (base, vl);
+    vint8mf8_t v = __riscv_vncvt_x_x_w_i8mf8(src,vl);
+    __riscv_vse8_v_i8mf8 (out,v,vl);
+}
+
+void f1 (int16_t *base,int8_t *out,size_t vl)
+{
+    vint16mf4_t src = __riscv_vle16_v_i16mf4 (base, vl);
+    vint8mf8_t src2 = __riscv_vle8_v_i8mf8 ((int8_t *)(base + 100), vl);
+    vint8mf8_t v = __riscv_vncvt_x_x_w_i8mf8_tu(src2,src,vl);
+    __riscv_vse8_v_i8mf8 (out,v,vl);
+}
+
+void f2 (int16_t *base,int8_t *out,size_t vl)
+{
+    vint16mf4_t src = __riscv_vle16_v_i16mf4 (base, vl);
+    vint8mf8_t v = __riscv_vncvt_x_x_w_i8mf8(src,vl);
+    vint16mf4_t v2 = __riscv_vadd_vv_i16mf4 (src, src,vl);
+    __riscv_vse8_v_i8mf8 (out,v,vl);
+    __riscv_vse16_v_i16mf4 ((int16_t *)out,v2,vl);
+}
+
+void f3 (int16_t *base,int8_t *out,size_t vl, int n)
+{
+    for (int i = 0; i < n; i++){
+      vint16mf4_t src = __riscv_vle16_v_i16mf4 (base + 100*i, vl);
+      vint8mf8_t v = __riscv_vncvt_x_x_w_i8mf8(src,vl);
+      vint16mf4_t v2 = __riscv_vadd_vv_i16mf4 (src, src,vl);
+      __riscv_vse8_v_i8mf8 (out + 100*i,v,vl);
+      __riscv_vse16_v_i16mf4 ((int16_t *)(out + 200*i),v2,vl);
+    }
+}
+
+void f4 (int16_t *base,int8_t *out,size_t vl)
+{
+    vint16mf4_t src = __riscv_vle16_v_i16mf4 (base, vl);
+    vint8mf8_t v = __riscv_vncvt_x_x_w_i8mf8(src,vl);
+    v = __riscv_vncvt_x_x_w_i8mf8_tu(v,src,vl);
+    v = __riscv_vncvt_x_x_w_i8mf8_tu(v,src,vl);
+    vint16mf4_t v2 = __riscv_vadd_vv_i16mf4 (src, src,vl);
+    __riscv_vse8_v_i8mf8 (out,v,vl);
+    __riscv_vse16_v_i16mf4 ((int16_t *)out,v2,vl);
+}
+
+void f5 (void *base,void *base2,void *out,size_t vl, int n)
+{
+    vint16mf4_t src = __riscv_vle16_v_i16mf4 (base + 100, vl);
+    for (int i = 0; i < n; i++){
+      vbool64_t m = __riscv_vlm_v_b64 (base + i, vl);
+      vint8mf8_t v = __riscv_vncvt_x_x_w_i8mf8_m(m,src,vl);
+      v = __riscv_vncvt_x_x_w_i8mf8_tu(v,src,vl);
+      v = __riscv_vle8_v_i8mf8_tu (v, base2, vl);
+      __riscv_vse8_v_i8mf8 (out + 100*i,v,vl);
+    }
+}
+
+void f6 (int16_t *base,int8_t *out,size_t vl)
+{
+    vint16m2_t src = __riscv_vle16_v_i16m2 (base, vl);
+    vint8m1_t v = __riscv_vncvt_x_x_w_i8m1(src,vl);
+    __riscv_vse8_v_i8m1 (out,v,vl);
+}
+
+void f7 (int16_t *base,int8_t *out,size_t vl)
+{
+    vint16m2_t src = __riscv_vle16_v_i16m2 (base, vl);
+    vint8m1_t src2 = __riscv_vle8_v_i8m1 ((int8_t *)(base + 100), vl);
+    vint8m1_t v = __riscv_vncvt_x_x_w_i8m1_tu(src2,src,vl);
+    __riscv_vse8_v_i8m1 (out,v,vl);
+}
+
+void f8 (int16_t *base,int8_t *out,size_t vl)
+{
+    vint16m2_t src = __riscv_vle16_v_i16m2 (base, vl);
+    vint8m1_t v = __riscv_vncvt_x_x_w_i8m1(src,vl);
+    vint16m2_t v2 = __riscv_vadd_vv_i16m2 (src, src,vl);
+    __riscv_vse8_v_i8m1 (out,v,vl);
+    __riscv_vse16_v_i16m2 ((int16_t *)out,v2,vl);
+}
+
+void f9 (int16_t *base,int8_t *out,size_t vl, int n)
+{
+    for (int i = 0; i < n; i++){
+      vint16m2_t src = __riscv_vle16_v_i16m2 (base + 100*i, vl);
+      vint8m1_t v = __riscv_vncvt_x_x_w_i8m1(src,vl);
+      vint16m2_t v2 = __riscv_vadd_vv_i16m2 (src, src,vl);
+      __riscv_vse8_v_i8m1 (out + 100*i,v,vl);
+      __riscv_vse16_v_i16m2 ((int16_t *)(out + 200*i),v2,vl);
+    }
+}
+
+void f10 (int16_t *base,int8_t *out,size_t vl)
+{
+    vint16m2_t src = __riscv_vle16_v_i16m2 (base, vl);
+    vint8m1_t v = __riscv_vncvt_x_x_w_i8m1(src,vl);
+    v = __riscv_vncvt_x_x_w_i8m1_tu(v,src,vl);
+    v = __riscv_vncvt_x_x_w_i8m1_tu(v,src,vl);
+    vint16m2_t v2 = __riscv_vadd_vv_i16m2 (src, src,vl);
+    __riscv_vse8_v_i8m1 (out,v,vl);
+    __riscv_vse16_v_i16m2 ((int16_t *)out,v2,vl);
+}
+
+void f11 (void *base,void *base2,void *out,size_t vl, int n)
+{
+    vint16m2_t src = __riscv_vle16_v_i16m2 (base + 100, vl);
+    for (int i = 0; i < n; i++){
+      vbool8_t m = __riscv_vlm_v_b8 (base + i, vl);
+      vint8m1_t v = __riscv_vncvt_x_x_w_i8m1_m(m,src,vl);
+      v = __riscv_vncvt_x_x_w_i8m1_tu(v,src,vl);
+      v = __riscv_vle8_v_i8m1_tu (v, base2, vl);
+      __riscv_vse8_v_i8m1 (out + 100*i,v,vl);
+    }
+}
+
+void f12 (int16_t *base,int8_t *out,size_t vl, int n)
+{
+    vint8mf8_t v = __riscv_vle8_v_i8mf8 ((int8_t *)(base + 1000), vl);
+    for (int i = 0; i < n; i++){
+      vint16mf4_t src = __riscv_vle16_v_i16mf4 (base + 100*i, vl);
+      v = __riscv_vncvt_x_x_w_i8mf8_tu(v,src,vl);
+      v = __riscv_vncvt_x_x_w_i8mf8_tu(v,src,vl);
+      v = __riscv_vncvt_x_x_w_i8mf8_tu(v,src,vl);
+      v = __riscv_vncvt_x_x_w_i8mf8_tu(v,src,vl);
+      v = __riscv_vncvt_x_x_w_i8mf8_tu(v,src,vl);
+      v = __riscv_vncvt_x_x_w_i8mf8_tu(v,src,vl);
+      __riscv_vse8_v_i8mf8 (out + 100*i,v,vl);
+    }
+}
+
+void f13 (int16_t *base,int8_t *out,size_t vl, int n)
+{
+    vint8m1_t v = __riscv_vle8_v_i8m1 ((int8_t *)(base + 1000), vl);
+    for (int i = 0; i < n; i++){
+      vint16m2_t src = __riscv_vle16_v_i16m2 (base + 100*i, vl);
+      v = __riscv_vncvt_x_x_w_i8m1_tu(v,src,vl);
+      v = __riscv_vncvt_x_x_w_i8m1_tu(v,src,vl);
+      v = __riscv_vncvt_x_x_w_i8m1_tu(v,src,vl);
+      v = __riscv_vncvt_x_x_w_i8m1_tu(v,src,vl);
+      v = __riscv_vncvt_x_x_w_i8m1_tu(v,src,vl);
+      v = __riscv_vncvt_x_x_w_i8m1_tu(v,src,vl);
+      __riscv_vse8_v_i8m1 (out + 100*i,v,vl);
+    }
+}
+
+void f14 (int16_t *base,int8_t *out,size_t vl, int n)
+{
+    for (int i = 0; i < n; i++){
+      vint8mf8_t v = __riscv_vle8_v_i8mf8 ((int8_t *)(base + 1000 * i), vl);
+      vint16mf4_t src = __riscv_vle16_v_i16mf4 (base + 100*i, vl);
+      v = __riscv_vncvt_x_x_w_i8mf8_tu(v,src,vl);
+      v = __riscv_vncvt_x_x_w_i8mf8_tu(v,src,vl);
+      v = __riscv_vncvt_x_x_w_i8mf8_tu(v,src,vl);
+      v = __riscv_vncvt_x_x_w_i8mf8_tu(v,src,vl);
+      v = __riscv_vncvt_x_x_w_i8mf8_tu(v,src,vl);
+      v = __riscv_vncvt_x_x_w_i8mf8_tu(v,src,vl);
+      __riscv_vse8_v_i8mf8 (out + 100*i,v,vl);
+    }
+}
+
+void f15 (int16_t *base,int8_t *out,size_t vl, int n)
+{
+    for (int i = 0; i < n; i++){
+      vint8m1_t v = __riscv_vle8_v_i8m1 ((int8_t *)(base + 1000 * i), vl);
+      vint16m2_t src = __riscv_vle16_v_i16m2 (base + 100*i, vl);
+      v = __riscv_vncvt_x_x_w_i8m1_tu(v,src,vl);
+      v = __riscv_vncvt_x_x_w_i8m1_tu(v,src,vl);
+      v = __riscv_vncvt_x_x_w_i8m1_tu(v,src,vl);
+      v = __riscv_vncvt_x_x_w_i8m1_tu(v,src,vl);
+      v = __riscv_vncvt_x_x_w_i8m1_tu(v,src,vl);
+      v = __riscv_vncvt_x_x_w_i8m1_tu(v,src,vl);
+      __riscv_vse8_v_i8m1 (out + 100*i,v,vl);
+    }
+}
+
+void f16 (int16_t *base,int8_t *out,size_t vl, int n)
+{
+    for (int i = 0; i < n; i++){
+      vint8mf8_t v = __riscv_vle8_v_i8mf8 ((int8_t *)(base + 1000 * i), vl);
+      vint16mf4_t src1 = __riscv_vle16_v_i16mf4 (base + 100*i, vl);
+      vint16mf4_t src2 = __riscv_vle16_v_i16mf4 (base + 200*i, vl);
+      vint16mf4_t src3 = __riscv_vle16_v_i16mf4 (base + 300*i, vl);
+      vint16mf4_t src4 = __riscv_vle16_v_i16mf4 (base + 400*i, vl);
+      vint16mf4_t src5 = __riscv_vle16_v_i16mf4 (base + 500*i, vl);
+      vint16mf4_t src6 = __riscv_vle16_v_i16mf4 (base + 600*i, vl);
+      v = __riscv_vncvt_x_x_w_i8mf8_tu(v,src1,vl);
+      v = __riscv_vncvt_x_x_w_i8mf8_tu(v,src2,vl);
+      v = __riscv_vncvt_x_x_w_i8mf8_tu(v,src3,vl);
+      v = __riscv_vncvt_x_x_w_i8mf8_tu(v,src4,vl);
+      v = __riscv_vncvt_x_x_w_i8mf8_tu(v,src5,vl);
+      v = __riscv_vncvt_x_x_w_i8mf8_tu(v,src6,vl);
+      __riscv_vse8_v_i8mf8 (out + 100*i,v,vl);
+    }
+}
+
+void f17 (int16_t *base,int8_t *out,size_t vl, int n)
+{
+    for (int i = 0; i < n; i++){
+      vint8m1_t v = __riscv_vle8_v_i8m1 ((int8_t *)(base + 1000 * i), vl);
+      vint16m2_t src1 = __riscv_vle16_v_i16m2 (base + 100*i, vl);
+      vint16m2_t src2 = __riscv_vle16_v_i16m2 (base + 200*i, vl);
+      vint16m2_t src3 = __riscv_vle16_v_i16m2 (base + 300*i, vl);
+      vint16m2_t src4 = __riscv_vle16_v_i16m2 (base + 400*i, vl);
+      vint16m2_t src5 = __riscv_vle16_v_i16m2 (base + 500*i, vl);
+      vint16m2_t src6 = __riscv_vle16_v_i16m2 (base + 600*i, vl);
+      v = __riscv_vncvt_x_x_w_i8m1_tu(v,src1,vl);
+      v = __riscv_vncvt_x_x_w_i8m1_tu(v,src2,vl);
+      v = __riscv_vncvt_x_x_w_i8m1_tu(v,src3,vl);
+      v = __riscv_vncvt_x_x_w_i8m1_tu(v,src4,vl);
+      v = __riscv_vncvt_x_x_w_i8m1_tu(v,src5,vl);
+      v = __riscv_vncvt_x_x_w_i8m1_tu(v,src6,vl);
+      __riscv_vse8_v_i8m1 (out + 100*i,v,vl);
+    }
+}
+
+void f18 (void *base,void *base2,void *out,size_t vl, int n)
+{
+    vint32mf2_t src = __riscv_vle32_v_i32mf2 (base + 100, vl);
+    for (int i = 0; i < n; i++){
+      vbool64_t m = __riscv_vlm_v_b64 (base + i, vl);
+      vint16mf4_t v = __riscv_vncvt_x_x_w_i16mf4_m(m,src,vl);
+      vint16mf4_t v2 = __riscv_vle16_v_i16mf4_tu (v, base2 + i, vl);
+      vint8mf8_t v3 = __riscv_vncvt_x_x_w_i8mf8_m(m,v2,vl);
+      __riscv_vse8_v_i8mf8 (out + 100*i,v3,vl);
+    }
+}
+
+void f19 (void *base,void *base2,void *out,size_t vl, int n)
+{
+    vint32m4_t src = __riscv_vle32_v_i32m4 (base + 100, vl);
+    for (int i = 0; i < n; i++){
+      vbool8_t m = __riscv_vlm_v_b8 (base + i, vl);
+      vint16m2_t v = __riscv_vncvt_x_x_w_i16m2_m(m,src,vl);
+      vint16m2_t v2 = __riscv_vle16_v_i16m2_tu (v, base2 + i, vl);
+      vint8m1_t v3 = __riscv_vncvt_x_x_w_i8m1_m(m,v2,vl);
+      vint8m1_t v4 = __riscv_vncvt_x_x_w_i8m1_tumu(m,v3,v2,vl);
+      __riscv_vse8_v_i8m1 (out + 100*i,v3,vl);
+      __riscv_vse8_v_i8m1 (out + 222*i,v4,vl);
+    }
+}
+
+void f20 (int16_t *base,int8_t *out,size_t vl)
+{
+    vint16m2_t src = __riscv_vle16_v_i16m2 (base, vl);
+    /* Only allow load v30,v31.  */
+    asm volatile("#" ::
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",  
+		   "v26", "v27", "v28", "v29");
+
+    vint8m1_t v = __riscv_vncvt_x_x_w_i8m1(src,vl);
+    /* Only allow vncvt SRC == DEST v30.  */
+    asm volatile("#" ::                                                        
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", 
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",     
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",     
+		   "v26", "v27", "v28", "v29", "v31");
+
+    __riscv_vse8_v_i8m1 (out,v,vl);
+}
+
+void f21 (int16_t *base,int8_t *out,size_t vl)
+{
+    vint16m1_t src = __riscv_vle16_v_i16m1 (base, vl);
+    /* Only allow load v31.  */
+    asm volatile("#" ::
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",  
+		   "v26", "v27", "v28", "v29", "v30");
+
+    vint8mf2_t v = __riscv_vncvt_x_x_w_i8mf2(src,vl);
+    /* Only allow vncvt SRC == DEST v31.  */
+    asm volatile("#" ::
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",  
+		   "v26", "v27", "v28", "v29", "v30");
+
+    __riscv_vse8_v_i8mf2 (out,v,vl);
+}
+
+void f22 (int16_t *base,int8_t *out,size_t vl)
+{
+    vint16m2_t src = __riscv_vle16_v_i16m2 (base, vl);
+    /* Only allow load v30,v31.  */
+    asm volatile("#" ::
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",  
+		   "v26", "v27", "v28", "v29");
+
+    vint8m1_t v = __riscv_vncvt_x_x_w_i8m1(src,vl);
+    /* Only allow v29.  */
+    asm volatile("#" ::                                                        
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", 
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",     
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",     
+		   "v26", "v27", "v28", "v30", "v31");
+    v = __riscv_vadd_vv_i8m1 (v,v,vl);
+    /* Only allow v29.  */
+    asm volatile("#" ::                                                        
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", 
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",     
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",     
+		   "v26", "v27", "v28", "v30", "v31");
+
+    __riscv_vse8_v_i8m1 (out,v,vl);
+}
+
+/* { dg-final { scan-assembler-not {vmv} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/narrow_constraint-10.c b/gcc/testsuite/gcc.target/riscv/rvv/base/narrow_constraint-10.c
new file mode 100644
index 00000000000..5b371482d9b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/narrow_constraint-10.c
@@ -0,0 +1,293 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+
+#include "riscv_vector.h"
+
+void f0 (void *base,void *out,size_t vl)
+{
+    vfloat64m1_t src = __riscv_vle64_v_f64m1 (base, vl);
+    vfloat32mf2_t v = __riscv_vfncvt_f_f_w_f32mf2(src,vl);
+    __riscv_vse32_v_f32mf2 (out,v,vl);
+}
+
+void f1 (void *base,void *out,size_t vl)
+{
+    vfloat64m1_t src = __riscv_vle64_v_f64m1 (base, vl);
+    vfloat32mf2_t src2 = __riscv_vle32_v_f32mf2 ((void *)(base + 100), vl);
+    vfloat32mf2_t v = __riscv_vfncvt_f_f_w_f32mf2_tu(src2,src,vl);
+    __riscv_vse32_v_f32mf2 (out,v,vl);
+}
+
+void f2 (void *base,void *out,size_t vl)
+{
+    vfloat64m1_t src = __riscv_vle64_v_f64m1 (base, vl);
+    vfloat32mf2_t v = __riscv_vfncvt_f_f_w_f32mf2(src,vl);
+    vfloat64m1_t v2 = __riscv_vfadd_vv_f64m1 (src, src,vl);
+    __riscv_vse32_v_f32mf2 (out,v,vl);
+    __riscv_vse64_v_f64m1 ((void *)out,v2,vl);
+}
+
+void f3 (void *base,void *out,size_t vl, int n)
+{
+    for (int i = 0; i < n; i++){
+      vfloat64m1_t src = __riscv_vle64_v_f64m1 (base + 100*i, vl);
+      vfloat32mf2_t v = __riscv_vfncvt_f_f_w_f32mf2(src,vl);
+      vfloat64m1_t v2 = __riscv_vfadd_vv_f64m1 (src, src,vl);
+      __riscv_vse32_v_f32mf2 (out + 100*i,v,vl);
+      __riscv_vse64_v_f64m1 ((void *)(out + 200*i),v2,vl);
+    }
+}
+
+void f4 (void *base,void *out,size_t vl)
+{
+    vfloat64m1_t src = __riscv_vle64_v_f64m1 (base, vl);
+    vfloat32mf2_t v = __riscv_vfncvt_f_f_w_f32mf2(src,vl);
+    v = __riscv_vfncvt_f_f_w_f32mf2_tu(v,src,vl);
+    v = __riscv_vfncvt_f_f_w_f32mf2_tu(v,src,vl);
+    vfloat64m1_t v2 = __riscv_vfadd_vv_f64m1 (src, src,vl);
+    __riscv_vse32_v_f32mf2 (out,v,vl);
+    __riscv_vse64_v_f64m1 ((void *)out,v2,vl);
+}
+
+void f5 (void *base,void *base2,void *out,size_t vl, int n)
+{
+    vfloat64m1_t src = __riscv_vle64_v_f64m1 (base + 100, vl);
+    for (int i = 0; i < n; i++){
+      vbool64_t m = __riscv_vlm_v_b64 (base + i, vl);
+      vfloat32mf2_t v = __riscv_vfncvt_f_f_w_f32mf2_m(m,src,vl);
+      v = __riscv_vfncvt_f_f_w_f32mf2_tu(v,src,vl);
+      v = __riscv_vle32_v_f32mf2_tu (v, base2, vl);
+      __riscv_vse32_v_f32mf2 (out + 100*i,v,vl);
+    }
+}
+
+void f6 (void *base,void *out,size_t vl)
+{
+    vfloat64m2_t src = __riscv_vle64_v_f64m2 (base, vl);
+    vfloat32m1_t v = __riscv_vfncvt_f_f_w_f32m1(src,vl);
+    __riscv_vse32_v_f32m1 (out,v,vl);
+}
+
+void f7 (void *base,void *out,size_t vl)
+{
+    vfloat64m2_t src = __riscv_vle64_v_f64m2 (base, vl);
+    vfloat32m1_t src2 = __riscv_vle32_v_f32m1 ((void *)(base + 100), vl);
+    vfloat32m1_t v = __riscv_vfncvt_f_f_w_f32m1_tu(src2,src,vl);
+    __riscv_vse32_v_f32m1 (out,v,vl);
+}
+
+void f8 (void *base,void *out,size_t vl)
+{
+    vfloat64m2_t src = __riscv_vle64_v_f64m2 (base, vl);
+    vfloat32m1_t v = __riscv_vfncvt_f_f_w_f32m1(src,vl);
+    vfloat64m2_t v2 = __riscv_vfadd_vv_f64m2 (src, src,vl);
+    __riscv_vse32_v_f32m1 (out,v,vl);
+    __riscv_vse64_v_f64m2 ((void *)out,v2,vl);
+}
+
+void f9 (void *base,void *out,size_t vl, int n)
+{
+    for (int i = 0; i < n; i++){
+      vfloat64m2_t src = __riscv_vle64_v_f64m2 (base + 100*i, vl);
+      vfloat32m1_t v = __riscv_vfncvt_f_f_w_f32m1(src,vl);
+      vfloat64m2_t v2 = __riscv_vfadd_vv_f64m2 (src, src,vl);
+      __riscv_vse32_v_f32m1 (out + 100*i,v,vl);
+      __riscv_vse64_v_f64m2 ((void *)(out + 200*i),v2,vl);
+    }
+}
+
+void f10 (void *base,void *out,size_t vl)
+{
+    vfloat64m2_t src = __riscv_vle64_v_f64m2 (base, vl);
+    vfloat32m1_t v = __riscv_vfncvt_f_f_w_f32m1(src,vl);
+    v = __riscv_vfncvt_f_f_w_f32m1_tu(v,src,vl);
+    v = __riscv_vfncvt_f_f_w_f32m1_tu(v,src,vl);
+    vfloat64m2_t v2 = __riscv_vfadd_vv_f64m2 (src, src,vl);
+    __riscv_vse32_v_f32m1 (out,v,vl);
+    __riscv_vse64_v_f64m2 ((void *)out,v2,vl);
+}
+
+void f11 (void *base,void *base2,void *out,size_t vl, int n)
+{
+    vfloat64m2_t src = __riscv_vle64_v_f64m2 (base + 100, vl);
+    for (int i = 0; i < n; i++){
+      vbool32_t m = __riscv_vlm_v_b32 (base + i, vl);
+      vfloat32m1_t v = __riscv_vfncvt_f_f_w_f32m1_m(m,src,vl);
+      v = __riscv_vfncvt_f_f_w_f32m1_tu(v,src,vl);
+      v = __riscv_vle32_v_f32m1_tu (v, base2, vl);
+      __riscv_vse32_v_f32m1 (out + 100*i,v,vl);
+    }
+}
+
+void f12 (void *base,void *out,size_t vl, int n)
+{
+    vfloat32mf2_t v = __riscv_vle32_v_f32mf2 ((void *)(base + 1000), vl);
+    for (int i = 0; i < n; i++){
+      vfloat64m1_t src = __riscv_vle64_v_f64m1 (base + 100*i, vl);
+      v = __riscv_vfncvt_f_f_w_f32mf2_tu(v,src,vl);
+      v = __riscv_vfncvt_f_f_w_f32mf2_tu(v,src,vl);
+      v = __riscv_vfncvt_f_f_w_f32mf2_tu(v,src,vl);
+      v = __riscv_vfncvt_f_f_w_f32mf2_tu(v,src,vl);
+      v = __riscv_vfncvt_f_f_w_f32mf2_tu(v,src,vl);
+      v = __riscv_vfncvt_f_f_w_f32mf2_tu(v,src,vl);
+      __riscv_vse32_v_f32mf2 (out + 100*i,v,vl);
+    }
+}
+
+void f13 (void *base,void *out,size_t vl, int n)
+{
+    vfloat32m1_t v = __riscv_vle32_v_f32m1 ((void *)(base + 1000), vl);
+    for (int i = 0; i < n; i++){
+      vfloat64m2_t src = __riscv_vle64_v_f64m2 (base + 100*i, vl);
+      v = __riscv_vfncvt_f_f_w_f32m1_tu(v,src,vl);
+      v = __riscv_vfncvt_f_f_w_f32m1_tu(v,src,vl);
+      v = __riscv_vfncvt_f_f_w_f32m1_tu(v,src,vl);
+      v = __riscv_vfncvt_f_f_w_f32m1_tu(v,src,vl);
+      v = __riscv_vfncvt_f_f_w_f32m1_tu(v,src,vl);
+      v = __riscv_vfncvt_f_f_w_f32m1_tu(v,src,vl);
+      __riscv_vse32_v_f32m1 (out + 100*i,v,vl);
+    }
+}
+
+void f14 (void *base,void *out,size_t vl, int n)
+{
+    for (int i = 0; i < n; i++){
+      vfloat32mf2_t v = __riscv_vle32_v_f32mf2 ((void *)(base + 1000 * i), vl);
+      vfloat64m1_t src = __riscv_vle64_v_f64m1 (base + 100*i, vl);
+      v = __riscv_vfncvt_f_f_w_f32mf2_tu(v,src,vl);
+      v = __riscv_vfncvt_f_f_w_f32mf2_tu(v,src,vl);
+      v = __riscv_vfncvt_f_f_w_f32mf2_tu(v,src,vl);
+      v = __riscv_vfncvt_f_f_w_f32mf2_tu(v,src,vl);
+      v = __riscv_vfncvt_f_f_w_f32mf2_tu(v,src,vl);
+      v = __riscv_vfncvt_f_f_w_f32mf2_tu(v,src,vl);
+      __riscv_vse32_v_f32mf2 (out + 100*i,v,vl);
+    }
+}
+
+void f15 (void *base,void *out,size_t vl, int n)
+{
+    for (int i = 0; i < n; i++){
+      vfloat32m1_t v = __riscv_vle32_v_f32m1 ((void *)(base + 1000 * i), vl);
+      vfloat64m2_t src = __riscv_vle64_v_f64m2 (base + 100*i, vl);
+      v = __riscv_vfncvt_f_f_w_f32m1_tu(v,src,vl);
+      v = __riscv_vfncvt_f_f_w_f32m1_tu(v,src,vl);
+      v = __riscv_vfncvt_f_f_w_f32m1_tu(v,src,vl);
+      v = __riscv_vfncvt_f_f_w_f32m1_tu(v,src,vl);
+      v = __riscv_vfncvt_f_f_w_f32m1_tu(v,src,vl);
+      v = __riscv_vfncvt_f_f_w_f32m1_tu(v,src,vl);
+      __riscv_vse32_v_f32m1 (out + 100*i,v,vl);
+    }
+}
+
+void f16 (void *base,void *out,size_t vl, int n)
+{
+    for (int i = 0; i < n; i++){
+      vfloat32mf2_t v = __riscv_vle32_v_f32mf2 ((void *)(base + 1000 * i), vl);
+      vfloat64m1_t src1 = __riscv_vle64_v_f64m1 (base + 100*i, vl);
+      vfloat64m1_t src2 = __riscv_vle64_v_f64m1 (base + 200*i, vl);
+      vfloat64m1_t src3 = __riscv_vle64_v_f64m1 (base + 300*i, vl);
+      vfloat64m1_t src4 = __riscv_vle64_v_f64m1 (base + 400*i, vl);
+      vfloat64m1_t src5 = __riscv_vle64_v_f64m1 (base + 500*i, vl);
+      vfloat64m1_t src6 = __riscv_vle64_v_f64m1 (base + 600*i, vl);
+      v = __riscv_vfncvt_f_f_w_f32mf2_tu(v,src1,vl);
+      v = __riscv_vfncvt_f_f_w_f32mf2_tu(v,src2,vl);
+      v = __riscv_vfncvt_f_f_w_f32mf2_tu(v,src3,vl);
+      v = __riscv_vfncvt_f_f_w_f32mf2_tu(v,src4,vl);
+      v = __riscv_vfncvt_f_f_w_f32mf2_tu(v,src5,vl);
+      v = __riscv_vfncvt_f_f_w_f32mf2_tu(v,src6,vl);
+      __riscv_vse32_v_f32mf2 (out + 100*i,v,vl);
+    }
+}
+
+void f17 (void *base,void *out,size_t vl, int n)
+{
+    for (int i = 0; i < n; i++){
+      vfloat32m1_t v = __riscv_vle32_v_f32m1 ((void *)(base + 1000 * i), vl);
+      vfloat64m2_t src1 = __riscv_vle64_v_f64m2 (base + 100*i, vl);
+      vfloat64m2_t src2 = __riscv_vle64_v_f64m2 (base + 200*i, vl);
+      vfloat64m2_t src3 = __riscv_vle64_v_f64m2 (base + 300*i, vl);
+      vfloat64m2_t src4 = __riscv_vle64_v_f64m2 (base + 400*i, vl);
+      vfloat64m2_t src5 = __riscv_vle64_v_f64m2 (base + 500*i, vl);
+      vfloat64m2_t src6 = __riscv_vle64_v_f64m2 (base + 600*i, vl);
+      v = __riscv_vfncvt_f_f_w_f32m1_tu(v,src1,vl);
+      v = __riscv_vfncvt_f_f_w_f32m1_tu(v,src2,vl);
+      v = __riscv_vfncvt_f_f_w_f32m1_tu(v,src3,vl);
+      v = __riscv_vfncvt_f_f_w_f32m1_tu(v,src4,vl);
+      v = __riscv_vfncvt_f_f_w_f32m1_tu(v,src5,vl);
+      v = __riscv_vfncvt_f_f_w_f32m1_tu(v,src6,vl);
+      __riscv_vse32_v_f32m1 (out + 100*i,v,vl);
+    }
+}
+
+void f18 (void *base,void *out,size_t vl)
+{
+    vfloat64m2_t src = __riscv_vle64_v_f64m2 (base, vl);
+    /* Only allow load v30,v31.  */
+    asm volatile("#" ::
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",  
+		   "v26", "v27", "v28", "v29");
+
+    vfloat32m1_t v = __riscv_vfncvt_f_f_w_f32m1(src,vl);
+    /* Only allow vncvt SRC == DEST v30.  */
+    asm volatile("#" ::                                                        
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", 
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",     
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",     
+		   "v26", "v27", "v28", "v29", "v31");
+
+    __riscv_vse32_v_f32m1 (out,v,vl);
+}
+
+void f19 (void *base,void *out,size_t vl)
+{
+    vfloat64m1_t src = __riscv_vle64_v_f64m1 (base, vl);
+    /* Only allow load v31.  */
+    asm volatile("#" ::
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",  
+		   "v26", "v27", "v28", "v29", "v30");
+
+    vfloat32mf2_t v = __riscv_vfncvt_f_f_w_f32mf2(src,vl);
+    /* Only allow vncvt SRC == DEST v31.  */
+    asm volatile("#" ::
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",  
+		   "v26", "v27", "v28", "v29", "v30");
+
+    __riscv_vse32_v_f32mf2 (out,v,vl);
+}
+
+void f20 (void *base,void *out,size_t vl)
+{
+    vfloat64m2_t src = __riscv_vle64_v_f64m2 (base, vl);
+    /* Only allow load v30,v31.  */
+    asm volatile("#" ::
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",  
+		   "v26", "v27", "v28", "v29");
+
+    vfloat32m1_t v = __riscv_vfncvt_f_f_w_f32m1(src,vl);
+    /* Only allow v29.  */
+    asm volatile("#" ::                                                        
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", 
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",     
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",     
+		   "v26", "v27", "v28", "v30", "v31");
+    v = __riscv_vfadd_vv_f32m1 (v,v,vl);
+    /* Only allow v29.  */
+    asm volatile("#" ::                                                        
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", 
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",     
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",     
+		   "v26", "v27", "v28", "v30", "v31");
+
+    __riscv_vse32_v_f32m1 (out,v,vl);
+}
+
+/* { dg-final { scan-assembler-not {vmv} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/narrow_constraint-11.c b/gcc/testsuite/gcc.target/riscv/rvv/base/narrow_constraint-11.c
new file mode 100644
index 00000000000..4d4e4f30e9f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/narrow_constraint-11.c
@@ -0,0 +1,293 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+
+#include "riscv_vector.h"
+
+void f0 (void *base,void *out,size_t vl)
+{
+    vfloat64m1_t src = __riscv_vle64_v_f64m1 (base, vl);
+    vfloat32mf2_t v = __riscv_vfncvt_f_f_w_f32mf2(src,vl);
+    __riscv_vse32_v_f32mf2 (out,v,vl);
+}
+
+void f1 (void *base,void *out,size_t vl)
+{
+    vfloat64m1_t src = __riscv_vle64_v_f64m1 (base, vl);
+    vfloat32mf2_t src2 = __riscv_vle32_v_f32mf2 ((void *)(base + 100), vl);
+    vfloat32mf2_t v = __riscv_vfncvt_f_f_w_f32mf2_tu(src2,src,vl);
+    __riscv_vse32_v_f32mf2 (out,v,vl);
+}
+
+void f2 (void *base,void *out,size_t vl)
+{
+    vfloat64m1_t src = __riscv_vle64_v_f64m1 (base, vl);
+    vfloat32mf2_t v = __riscv_vfncvt_f_f_w_f32mf2(src,vl);
+    vfloat64m1_t v2 = __riscv_vfadd_vv_f64m1 (src, src,vl);
+    __riscv_vse32_v_f32mf2 (out,v,vl);
+    __riscv_vse64_v_f64m1 ((void *)out,v2,vl);
+}
+
+void f3 (void *base,void *out,size_t vl, int n)
+{
+    for (int i = 0; i < n; i++){
+      vfloat64m1_t src = __riscv_vle64_v_f64m1 (base + 100*i, vl);
+      vfloat32mf2_t v = __riscv_vfncvt_f_f_w_f32mf2(src,vl);
+      vfloat64m1_t v2 = __riscv_vfadd_vv_f64m1 (src, src,vl);
+      __riscv_vse32_v_f32mf2 (out + 100*i,v,vl);
+      __riscv_vse64_v_f64m1 ((void *)(out + 200*i),v2,vl);
+    }
+}
+
+void f4 (void *base,void *out,size_t vl)
+{
+    vfloat64m1_t src = __riscv_vle64_v_f64m1 (base, vl);
+    vfloat32mf2_t v = __riscv_vfncvt_f_f_w_f32mf2(src,vl);
+    v = __riscv_vfncvt_f_f_w_f32mf2_tu(v,src,vl);
+    v = __riscv_vfncvt_f_f_w_f32mf2_tu(v,src,vl);
+    vfloat64m1_t v2 = __riscv_vfadd_vv_f64m1 (src, src,vl);
+    __riscv_vse32_v_f32mf2 (out,v,vl);
+    __riscv_vse64_v_f64m1 ((void *)out,v2,vl);
+}
+
+void f5 (void *base,void *base2,void *out,size_t vl, int n)
+{
+    vfloat64m1_t src = __riscv_vle64_v_f64m1 (base + 100, vl);
+    for (int i = 0; i < n; i++){
+      vbool64_t m = __riscv_vlm_v_b64 (base + i, vl);
+      vfloat32mf2_t v = __riscv_vfncvt_f_f_w_f32mf2_m(m,src,vl);
+      v = __riscv_vfncvt_f_f_w_f32mf2_tu(v,src,vl);
+      v = __riscv_vle32_v_f32mf2_tu (v, base2, vl);
+      __riscv_vse32_v_f32mf2 (out + 100*i,v,vl);
+    }
+}
+
+void f6 (void *base,void *out,size_t vl)
+{
+    vfloat64m2_t src = __riscv_vle64_v_f64m2 (base, vl);
+    vfloat32m1_t v = __riscv_vfncvt_rod_f_f_w_f32m1(src,vl);
+    __riscv_vse32_v_f32m1 (out,v,vl);
+}
+
+void f7 (void *base,void *out,size_t vl)
+{
+    vfloat64m2_t src = __riscv_vle64_v_f64m2 (base, vl);
+    vfloat32m1_t src2 = __riscv_vle32_v_f32m1 ((void *)(base + 100), vl);
+    vfloat32m1_t v = __riscv_vfncvt_rod_f_f_w_f32m1_tu(src2,src,vl);
+    __riscv_vse32_v_f32m1 (out,v,vl);
+}
+
+void f8 (void *base,void *out,size_t vl)
+{
+    vfloat64m2_t src = __riscv_vle64_v_f64m2 (base, vl);
+    vfloat32m1_t v = __riscv_vfncvt_rod_f_f_w_f32m1(src,vl);
+    vfloat64m2_t v2 = __riscv_vfadd_vv_f64m2 (src, src,vl);
+    __riscv_vse32_v_f32m1 (out,v,vl);
+    __riscv_vse64_v_f64m2 ((void *)out,v2,vl);
+}
+
+void f9 (void *base,void *out,size_t vl, int n)
+{
+    for (int i = 0; i < n; i++){
+      vfloat64m2_t src = __riscv_vle64_v_f64m2 (base + 100*i, vl);
+      vfloat32m1_t v = __riscv_vfncvt_rod_f_f_w_f32m1(src,vl);
+      vfloat64m2_t v2 = __riscv_vfadd_vv_f64m2 (src, src,vl);
+      __riscv_vse32_v_f32m1 (out + 100*i,v,vl);
+      __riscv_vse64_v_f64m2 ((void *)(out + 200*i),v2,vl);
+    }
+}
+
+void f10 (void *base,void *out,size_t vl)
+{
+    vfloat64m2_t src = __riscv_vle64_v_f64m2 (base, vl);
+    vfloat32m1_t v = __riscv_vfncvt_rod_f_f_w_f32m1(src,vl);
+    v = __riscv_vfncvt_rod_f_f_w_f32m1_tu(v,src,vl);
+    v = __riscv_vfncvt_rod_f_f_w_f32m1_tu(v,src,vl);
+    vfloat64m2_t v2 = __riscv_vfadd_vv_f64m2 (src, src,vl);
+    __riscv_vse32_v_f32m1 (out,v,vl);
+    __riscv_vse64_v_f64m2 ((void *)out,v2,vl);
+}
+
+void f11 (void *base,void *base2,void *out,size_t vl, int n)
+{
+    vfloat64m2_t src = __riscv_vle64_v_f64m2 (base + 100, vl);
+    for (int i = 0; i < n; i++){
+      vbool32_t m = __riscv_vlm_v_b32 (base + i, vl);
+      vfloat32m1_t v = __riscv_vfncvt_rod_f_f_w_f32m1_m(m,src,vl);
+      v = __riscv_vfncvt_rod_f_f_w_f32m1_tu(v,src,vl);
+      v = __riscv_vle32_v_f32m1_tu (v, base2, vl);
+      __riscv_vse32_v_f32m1 (out + 100*i,v,vl);
+    }
+}
+
+void f12 (void *base,void *out,size_t vl, int n)
+{
+    vfloat32mf2_t v = __riscv_vle32_v_f32mf2 ((void *)(base + 1000), vl);
+    for (int i = 0; i < n; i++){
+      vfloat64m1_t src = __riscv_vle64_v_f64m1 (base + 100*i, vl);
+      v = __riscv_vfncvt_f_f_w_f32mf2_tu(v,src,vl);
+      v = __riscv_vfncvt_f_f_w_f32mf2_tu(v,src,vl);
+      v = __riscv_vfncvt_f_f_w_f32mf2_tu(v,src,vl);
+      v = __riscv_vfncvt_f_f_w_f32mf2_tu(v,src,vl);
+      v = __riscv_vfncvt_f_f_w_f32mf2_tu(v,src,vl);
+      v = __riscv_vfncvt_f_f_w_f32mf2_tu(v,src,vl);
+      __riscv_vse32_v_f32mf2 (out + 100*i,v,vl);
+    }
+}
+
+void f13 (void *base,void *out,size_t vl, int n)
+{
+    vfloat32m1_t v = __riscv_vle32_v_f32m1 ((void *)(base + 1000), vl);
+    for (int i = 0; i < n; i++){
+      vfloat64m2_t src = __riscv_vle64_v_f64m2 (base + 100*i, vl);
+      v = __riscv_vfncvt_rod_f_f_w_f32m1_tu(v,src,vl);
+      v = __riscv_vfncvt_rod_f_f_w_f32m1_tu(v,src,vl);
+      v = __riscv_vfncvt_rod_f_f_w_f32m1_tu(v,src,vl);
+      v = __riscv_vfncvt_rod_f_f_w_f32m1_tu(v,src,vl);
+      v = __riscv_vfncvt_rod_f_f_w_f32m1_tu(v,src,vl);
+      v = __riscv_vfncvt_rod_f_f_w_f32m1_tu(v,src,vl);
+      __riscv_vse32_v_f32m1 (out + 100*i,v,vl);
+    }
+}
+
+void f14 (void *base,void *out,size_t vl, int n)
+{
+    for (int i = 0; i < n; i++){
+      vfloat32mf2_t v = __riscv_vle32_v_f32mf2 ((void *)(base + 1000 * i), vl);
+      vfloat64m1_t src = __riscv_vle64_v_f64m1 (base + 100*i, vl);
+      v = __riscv_vfncvt_f_f_w_f32mf2_tu(v,src,vl);
+      v = __riscv_vfncvt_f_f_w_f32mf2_tu(v,src,vl);
+      v = __riscv_vfncvt_f_f_w_f32mf2_tu(v,src,vl);
+      v = __riscv_vfncvt_f_f_w_f32mf2_tu(v,src,vl);
+      v = __riscv_vfncvt_f_f_w_f32mf2_tu(v,src,vl);
+      v = __riscv_vfncvt_f_f_w_f32mf2_tu(v,src,vl);
+      __riscv_vse32_v_f32mf2 (out + 100*i,v,vl);
+    }
+}
+
+void f15 (void *base,void *out,size_t vl, int n)
+{
+    for (int i = 0; i < n; i++){
+      vfloat32m1_t v = __riscv_vle32_v_f32m1 ((void *)(base + 1000 * i), vl);
+      vfloat64m2_t src = __riscv_vle64_v_f64m2 (base + 100*i, vl);
+      v = __riscv_vfncvt_rod_f_f_w_f32m1_tu(v,src,vl);
+      v = __riscv_vfncvt_rod_f_f_w_f32m1_tu(v,src,vl);
+      v = __riscv_vfncvt_rod_f_f_w_f32m1_tu(v,src,vl);
+      v = __riscv_vfncvt_rod_f_f_w_f32m1_tu(v,src,vl);
+      v = __riscv_vfncvt_rod_f_f_w_f32m1_tu(v,src,vl);
+      v = __riscv_vfncvt_rod_f_f_w_f32m1_tu(v,src,vl);
+      __riscv_vse32_v_f32m1 (out + 100*i,v,vl);
+    }
+}
+
+void f16 (void *base,void *out,size_t vl, int n)
+{
+    for (int i = 0; i < n; i++){
+      vfloat32mf2_t v = __riscv_vle32_v_f32mf2 ((void *)(base + 1000 * i), vl);
+      vfloat64m1_t src1 = __riscv_vle64_v_f64m1 (base + 100*i, vl);
+      vfloat64m1_t src2 = __riscv_vle64_v_f64m1 (base + 200*i, vl);
+      vfloat64m1_t src3 = __riscv_vle64_v_f64m1 (base + 300*i, vl);
+      vfloat64m1_t src4 = __riscv_vle64_v_f64m1 (base + 400*i, vl);
+      vfloat64m1_t src5 = __riscv_vle64_v_f64m1 (base + 500*i, vl);
+      vfloat64m1_t src6 = __riscv_vle64_v_f64m1 (base + 600*i, vl);
+      v = __riscv_vfncvt_f_f_w_f32mf2_tu(v,src1,vl);
+      v = __riscv_vfncvt_f_f_w_f32mf2_tu(v,src2,vl);
+      v = __riscv_vfncvt_f_f_w_f32mf2_tu(v,src3,vl);
+      v = __riscv_vfncvt_f_f_w_f32mf2_tu(v,src4,vl);
+      v = __riscv_vfncvt_f_f_w_f32mf2_tu(v,src5,vl);
+      v = __riscv_vfncvt_f_f_w_f32mf2_tu(v,src6,vl);
+      __riscv_vse32_v_f32mf2 (out + 100*i,v,vl);
+    }
+}
+
+void f17 (void *base,void *out,size_t vl, int n)
+{
+    for (int i = 0; i < n; i++){
+      vfloat32m1_t v = __riscv_vle32_v_f32m1 ((void *)(base + 1000 * i), vl);
+      vfloat64m2_t src1 = __riscv_vle64_v_f64m2 (base + 100*i, vl);
+      vfloat64m2_t src2 = __riscv_vle64_v_f64m2 (base + 200*i, vl);
+      vfloat64m2_t src3 = __riscv_vle64_v_f64m2 (base + 300*i, vl);
+      vfloat64m2_t src4 = __riscv_vle64_v_f64m2 (base + 400*i, vl);
+      vfloat64m2_t src5 = __riscv_vle64_v_f64m2 (base + 500*i, vl);
+      vfloat64m2_t src6 = __riscv_vle64_v_f64m2 (base + 600*i, vl);
+      v = __riscv_vfncvt_rod_f_f_w_f32m1_tu(v,src1,vl);
+      v = __riscv_vfncvt_rod_f_f_w_f32m1_tu(v,src2,vl);
+      v = __riscv_vfncvt_rod_f_f_w_f32m1_tu(v,src3,vl);
+      v = __riscv_vfncvt_rod_f_f_w_f32m1_tu(v,src4,vl);
+      v = __riscv_vfncvt_rod_f_f_w_f32m1_tu(v,src5,vl);
+      v = __riscv_vfncvt_rod_f_f_w_f32m1_tu(v,src6,vl);
+      __riscv_vse32_v_f32m1 (out + 100*i,v,vl);
+    }
+}
+
+void f18 (void *base,void *out,size_t vl)
+{
+    vfloat64m2_t src = __riscv_vle64_v_f64m2 (base, vl);
+    /* Only allow load v30,v31.  */
+    asm volatile("#" ::
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",  
+		   "v26", "v27", "v28", "v29");
+
+    vfloat32m1_t v = __riscv_vfncvt_rod_f_f_w_f32m1(src,vl);
+    /* Only allow vncvt SRC == DEST v30.  */
+    asm volatile("#" ::                                                        
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", 
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",     
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",     
+		   "v26", "v27", "v28", "v29", "v31");
+
+    __riscv_vse32_v_f32m1 (out,v,vl);
+}
+
+void f19 (void *base,void *out,size_t vl)
+{
+    vfloat64m1_t src = __riscv_vle64_v_f64m1 (base, vl);
+    /* Only allow load v31.  */
+    asm volatile("#" ::
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",  
+		   "v26", "v27", "v28", "v29", "v30");
+
+    vfloat32mf2_t v = __riscv_vfncvt_f_f_w_f32mf2(src,vl);
+    /* Only allow vncvt SRC == DEST v31.  */
+    asm volatile("#" ::
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",  
+		   "v26", "v27", "v28", "v29", "v30");
+
+    __riscv_vse32_v_f32mf2 (out,v,vl);
+}
+
+void f20 (void *base,void *out,size_t vl)
+{
+    vfloat64m2_t src = __riscv_vle64_v_f64m2 (base, vl);
+    /* Only allow load v30,v31.  */
+    asm volatile("#" ::
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",  
+		   "v26", "v27", "v28", "v29");
+
+    vfloat32m1_t v = __riscv_vfncvt_rod_f_f_w_f32m1(src,vl);
+    /* Only allow v29.  */
+    asm volatile("#" ::                                                        
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", 
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",     
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",     
+		   "v26", "v27", "v28", "v30", "v31");
+    v = __riscv_vfadd_vv_f32m1 (v,v,vl);
+    /* Only allow v29.  */
+    asm volatile("#" ::                                                        
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", 
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",     
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",     
+		   "v26", "v27", "v28", "v30", "v31");
+
+    __riscv_vse32_v_f32m1 (out,v,vl);
+}
+
+/* { dg-final { scan-assembler-not {vmv} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/narrow_constraint-2.c b/gcc/testsuite/gcc.target/riscv/rvv/base/narrow_constraint-2.c
new file mode 100644
index 00000000000..28ea6217ce7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/narrow_constraint-2.c
@@ -0,0 +1,370 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3 -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void f0 (uint16_t *base,uint8_t *out,size_t vl, size_t shift)
+{
+    vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base, vl);
+    vuint8mf8_t v = __riscv_vnsrl_wx_u8mf8(src,shift,vl);
+    v = __riscv_vnsrl_wv_u8mf8(src,v,vl);
+    __riscv_vse8_v_u8mf8 (out,v,vl);
+}
+
+void f1 (uint16_t *base,uint8_t *out,size_t vl, size_t shift)
+{
+    vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base, vl);
+    vuint8mf8_t v = __riscv_vnsrl_wx_u8mf8(src,shift,vl);
+    v = __riscv_vnsrl_wv_u8mf8(src,v,vl);
+    __riscv_vse8_v_u8mf8 (out,v,vl);
+}
+
+void f2 (void *base,void *out,size_t vl, size_t shift)
+{
+    vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base, vl);
+    vuint8mf8_t v = __riscv_vnsrl_wx_u8mf8(src,shift,vl);
+    v = __riscv_vnsrl_wv_u8mf8_tu(v,src,v,vl);
+    __riscv_vse8_v_u8mf8 (out,v,vl);
+    __riscv_vse16_v_u16mf4 (out+100,src,vl);
+}
+
+void f3 (void *base,void *out,size_t vl, size_t shift)
+{
+    vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base, vl);
+    vuint8mf8_t v = __riscv_vnsrl_wx_u8mf8(src,shift,vl);
+    v = __riscv_vnsrl_wv_u8mf8(src,v,vl);
+    __riscv_vse8_v_u8mf8 (out,v,vl);
+    __riscv_vse16_v_u16mf4 (out+100,src,vl);
+}
+
+void f4 (void *base,void *out,size_t vl, size_t shift)
+{
+    vbool64_t m = __riscv_vlm_v_b64 (base + 500, vl);
+    vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base, vl);
+    vuint8mf8_t v = __riscv_vnsrl_wx_u8mf8(src,shift,vl);
+    v = __riscv_vnsrl_wv_u8mf8_tumu(m,v,src,v,vl);
+    __riscv_vse8_v_u8mf8 (out,v,vl);
+    __riscv_vse16_v_u16mf4 (out+100,src,vl);
+}
+
+void f5 (void *base,void *out,size_t vl, size_t shift)
+{
+    vbool64_t m = __riscv_vlm_v_b64 (base + 500, vl);
+    vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base, vl);
+    vuint8mf8_t v = __riscv_vnsrl_wx_u8mf8(src,shift,vl);
+    v = __riscv_vnsrl_wv_u8mf8_m(m,src,v,vl);
+    __riscv_vse8_v_u8mf8 (out,v,vl);
+    __riscv_vse16_v_u16mf4 (out+100,src,vl);
+}
+
+void f6 (void *base,void *out,size_t vl, size_t shift)
+{
+    vbool64_t m = __riscv_vlm_v_b64 (base + 500, vl);
+    vuint8mf8_t v = __riscv_vle8_v_u8mf8 (base + 600, vl);
+    vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base, vl);
+    vuint8mf8_t v2 = __riscv_vnsrl_wv_u8mf8_m(m,src,v,vl);
+    __riscv_vse8_v_u8mf8 (out,v2,vl);
+    __riscv_vse8_v_u8mf8 (out+100,v,vl);
+}
+
+void f7 (void *base,void *out,size_t vl, size_t shift)
+{
+    vuint8mf8_t v = __riscv_vle8_v_u8mf8 (base + 600, vl);
+    vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base, vl);
+    vuint8mf8_t v2 = __riscv_vnsrl_wx_u8mf8(src,shift,vl);
+    v2 = __riscv_vnsrl_wv_u8mf8 (src,v,vl);
+    __riscv_vse8_v_u8mf8 (out,v2,vl);
+    __riscv_vse8_v_u8mf8 (out+100,v,vl);
+}
+
+void f8 (void *base,void *out,size_t vl, size_t shift)
+{
+    vuint8mf8_t v = __riscv_vle8_v_u8mf8 (base + 600, vl);
+    vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base, vl);
+    vuint8mf8_t v2 = __riscv_vnsrl_wx_u8mf8(src,shift,vl);
+    v2 = __riscv_vnsrl_wv_u8mf8 (src,v,vl);
+    __riscv_vse8_v_u8mf8 (out,v2,vl);
+    __riscv_vse8_v_u8mf8 (out+100,v,vl);
+    __riscv_vse16_v_u16mf4 (out+200,src,vl);
+}
+
+void f9 (void *base,void *out,size_t vl, size_t shift)
+{
+    vuint8mf8_t v = __riscv_vle8_v_u8mf8 (base + 600, vl);
+    vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base, vl);
+    vuint8mf8_t v2 = __riscv_vnsrl_wx_u8mf8(src,shift,vl);
+    v2 = __riscv_vnsrl_wv_u8mf8_tu (v2,src,v,vl);
+    __riscv_vse8_v_u8mf8 (out,v2,vl);
+    __riscv_vse8_v_u8mf8 (out+100,v,vl);
+    __riscv_vse16_v_u16mf4 (out+200,src,vl);
+}
+
+void f10 (void *base,void *out,size_t vl, size_t shift)
+{
+    vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base, vl);
+    vuint8mf8_t v = __riscv_vnsrl_wx_u8mf8(src,shift,vl);
+    v = __riscv_vnsrl_wv_u8mf8_tu(v,src,v,vl);
+    v = __riscv_vnsrl_wv_u8mf8_tu(v,src,v,vl);
+    v = __riscv_vnsrl_wv_u8mf8_tu(v,src,v,vl);
+    v = __riscv_vnsrl_wv_u8mf8_tu(v,src,v,vl);
+    __riscv_vse8_v_u8mf8 (out,v,vl);
+    __riscv_vse16_v_u16mf4 (out+100,src,vl);
+}
+
+void f11 (void *base,void *out,size_t vl, size_t shift)
+{
+    vuint8mf8_t v = __riscv_vle8_v_u8mf8 (base + 600, vl);
+    vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base, vl);
+    vuint8mf8_t v2 = __riscv_vnsrl_wx_u8mf8(src,shift,vl);
+    v2 = __riscv_vnsrl_wv_u8mf8_tu (v2,src,v,vl);
+    v2 = __riscv_vnsrl_wv_u8mf8_tu (v2,src,v,vl);
+    v2 = __riscv_vnsrl_wv_u8mf8_tu (v2,src,v,vl);
+    v2 = __riscv_vnsrl_wv_u8mf8_tu (v2,src,v,vl);
+    v2 = __riscv_vnsrl_wv_u8mf8_tu (v2,src,v,vl);
+    __riscv_vse8_v_u8mf8 (out,v2,vl);
+    __riscv_vse8_v_u8mf8 (out+100,v,vl);
+    __riscv_vse16_v_u16mf4 (out+200,src,vl);
+}
+
+void f12 (void *base,void *out,size_t vl, size_t shift)
+{
+    vuint8mf8_t v = __riscv_vle8_v_u8mf8 (base + 600, vl);
+    vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base, vl);
+    vuint8mf8_t v2 = __riscv_vnsrl_wx_u8mf8(src,shift,vl);
+    v2 = __riscv_vnsrl_wv_u8mf8(src,v2,vl);
+    v2 = __riscv_vnsrl_wv_u8mf8(src,v2,vl);
+    v2 = __riscv_vnsrl_wv_u8mf8(src,v2,vl);
+    v2 = __riscv_vnsrl_wv_u8mf8 (src,v2,vl);
+    __riscv_vse8_v_u8mf8 (out,v2,vl);
+    __riscv_vse8_v_u8mf8 (out+100,v,vl);
+}
+
+void f13 (void *base,void *base2,void *out,size_t vl, int n)
+{
+    vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base + 100, vl);
+    for (int i = 0; i < n; i++){
+      vbool64_t m = __riscv_vlm_v_b64 (base + i, vl);
+      vuint8mf8_t v = __riscv_vnsrl_wx_u8mf8_m(m,src,vl,vl);
+      v = __riscv_vnsrl_wv_u8mf8_tu(v,src,v,vl);
+      v = __riscv_vle8_v_u8mf8_tu (v, base2, vl);
+      __riscv_vse8_v_u8mf8 (out + 100*i,v,vl);
+    }
+}
+
+void f14 (void *base,void *base2,void *out,size_t vl, int n)
+{
+    vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base + 100, vl);
+    for (int i = 0; i < n; i++){
+      vbool64_t m = __riscv_vlm_v_b64 (base + i, vl);
+      vuint8mf8_t v = __riscv_vle8_v_u8mf8 (base + 600, vl);
+      vuint8mf8_t v2 = __riscv_vnsrl_wv_u8mf8(src,v,vl);
+      v = __riscv_vle8_v_u8mf8_tu (v, base2, vl);
+      __riscv_vse8_v_u8mf8 (out + 100*i,v,vl);
+      __riscv_vse8_v_u8mf8 (out + 100*i,v2,vl);
+    }
+}
+
+void f15 (void *base,void *base2,void *out,size_t vl, int n)
+{
+    vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base + 100, vl);
+    for (int i = 0; i < n; i++){
+      vbool64_t m = __riscv_vlm_v_b64 (base + i, vl);
+      vuint8mf8_t v = __riscv_vle8_v_u8mf8 (base + 600, vl);
+      vuint8mf8_t v2 = __riscv_vnsrl_wv_u8mf8(src,v,vl);
+      v = __riscv_vnsrl_wv_u8mf8(src,v,vl);
+      v = __riscv_vnsrl_wv_u8mf8(src,v,vl);
+      v = __riscv_vnsrl_wv_u8mf8(src,v,vl);
+      v = __riscv_vnsrl_wv_u8mf8(src,v,vl);
+      v = __riscv_vle8_v_u8mf8_tu (v, base2, vl);
+      __riscv_vse8_v_u8mf8 (out + 100*i,v,vl);
+      __riscv_vse8_v_u8mf8 (out + 100*i,v2,vl);
+    }
+}
+
+void f16 (uint16_t *base,uint8_t *out,size_t vl, size_t shift)
+{
+    vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base, vl);
+    vuint8mf8_t v = __riscv_vncvt_x_x_w_u8mf8(src,vl);
+    vuint8mf8_t v3 = __riscv_vnsrl_wv_u8mf8(src,v,vl);
+    __riscv_vse8_v_u8mf8 (out,v,vl);
+    __riscv_vse8_v_u8mf8 (out + 100,v3,vl);
+}
+
+void f17 (void *base,void *out,size_t vl, int n)
+{
+    for (int i = 0; i < n; i++){
+      vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base + 100*i, vl);
+      vuint8mf8_t src2 = __riscv_vle8_v_u8mf8 (base + 200*i, vl);
+      vuint8mf8_t v = __riscv_vnsrl_wv_u8mf8(src,src2,vl);
+      vuint16mf4_t v2 = __riscv_vadd_vv_u16mf4 (src, src,vl);
+      asm volatile ("":::"memory");
+      __riscv_vse8_v_u8mf8 (out + 100*i,v,vl);
+      __riscv_vse16_v_u16mf4 ((out + 200*i),src,vl);
+      __riscv_vse8_v_u8mf8 ((out + 300*i),src2,vl);
+    }
+}
+
+void f18 (void *base,void *out,size_t vl, int n)
+{
+    vuint8mf8_t v = __riscv_vle8_v_u8mf8 ((base + 1000), vl);
+    for (int i = 0; i < n; i++){
+      vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base + 100*i, vl);
+      v = __riscv_vnsrl_wv_u8mf8_tu(v,src,v,vl);
+      v = __riscv_vnsrl_wv_u8mf8_tu(v,src,v,vl);
+      v = __riscv_vnsrl_wv_u8mf8_tu(v,src,v,vl);
+      v = __riscv_vnsrl_wv_u8mf8_tu(v,src,v,vl);
+      v = __riscv_vnsrl_wv_u8mf8_tu(v,src,v,vl);
+      v = __riscv_vnsrl_wv_u8mf8_tu(v,src,v,vl);
+      __riscv_vse8_v_u8mf8 (out + 100*i,v,vl);
+    }
+}
+
+void f19 (void *base,void *out,size_t vl, int n)
+{
+    vuint8mf8_t v = __riscv_vle8_v_u8mf8 ((base + 1000), vl);
+    for (int i = 0; i < n; i++){
+      vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base + 100*i, vl);
+      v = __riscv_vnsrl_wv_u8mf8(src,v,vl);
+      vuint8mf8_t v2 = __riscv_vnsrl_wv_u8mf8(src,v,vl);
+      v2 = __riscv_vnsrl_wv_u8mf8(src,v2,vl);
+      v2 = __riscv_vnsrl_wv_u8mf8(src,v2,vl);
+      v2 = __riscv_vnsrl_wv_u8mf8(src,v2,vl);
+      v2 = __riscv_vnsrl_wv_u8mf8(src,v2,vl);
+      __riscv_vse8_v_u8mf8 (out + 100*i,v,vl);
+      __riscv_vse8_v_u8mf8 (out + 200*i,v2,vl);
+    }
+}
+
+void f20 (void *base,void *out,size_t vl, int n)
+{
+    vuint8mf8_t v = __riscv_vle8_v_u8mf8 ((base + 1000), vl);
+    for (int i = 0; i < n; i++){
+      vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base + 100*i, vl);
+      v = __riscv_vnsrl_wv_u8mf8(src,v,vl);
+      vuint8mf8_t v2 = __riscv_vnsrl_wv_u8mf8(src,v,vl);
+      v2 = __riscv_vnsrl_wv_u8mf8(src,v2,vl);
+      __riscv_vse8_v_u8mf8 (out + 100*i,v,vl);
+      __riscv_vse8_v_u8mf8 (out + 200*i,v2,vl);
+    }
+}
+
+void f21 (void *base,void *out,size_t vl, int n)
+{
+    for (int i = 0; i < n; i++){
+      vuint8mf8_t v = __riscv_vle8_v_u8mf8 ((base + 1000 * i), vl);
+      vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base + 100*i, vl);
+      v = __riscv_vnsrl_wv_u8mf8_tu(v,src,v,vl);
+      v = __riscv_vnsrl_wv_u8mf8_tu(v,src,v,vl);
+      v = __riscv_vnsrl_wv_u8mf8_tu(v,src,v,vl);
+      v = __riscv_vnsrl_wv_u8mf8_tu(v,src,v,vl);
+      v = __riscv_vnsrl_wv_u8mf8_tu(v,src,v,vl);
+      v = __riscv_vnsrl_wv_u8mf8_tu(v,src,v,vl);
+      __riscv_vse8_v_u8mf8 (out + 100*i,v,vl);
+      __riscv_vse16_v_u16mf4 (out + 200*i,src,vl);
+    }
+}
+
+void f22 (uint16_t *base,uint8_t *out,size_t vl, int n)
+{
+    for (int i = 0; i < n; i++){
+      vuint8mf8_t v = __riscv_vle8_v_u8mf8 ((uint8_t *)(base + 1000 * i), vl);
+      vuint16mf4_t src1 = __riscv_vle16_v_u16mf4 (base + 100*i, vl);
+      vuint16mf4_t src2 = __riscv_vle16_v_u16mf4 (base + 200*i, vl);
+      vuint16mf4_t src3 = __riscv_vle16_v_u16mf4 (base + 300*i, vl);
+      vuint16mf4_t src4 = __riscv_vle16_v_u16mf4 (base + 400*i, vl);
+      vuint16mf4_t src5 = __riscv_vle16_v_u16mf4 (base + 500*i, vl);
+      vuint16mf4_t src6 = __riscv_vle16_v_u16mf4 (base + 600*i, vl);
+      v = __riscv_vnsrl_wv_u8mf8_tu(v,src1,v,vl);
+      v = __riscv_vnsrl_wv_u8mf8_tu(v,src2,v,vl);
+      v = __riscv_vnsrl_wv_u8mf8_tu(v,src3,v,vl);
+      v = __riscv_vnsrl_wv_u8mf8_tu(v,src4,v,vl);
+      v = __riscv_vnsrl_wv_u8mf8_tu(v,src5,v,vl);
+      v = __riscv_vnsrl_wv_u8mf8_tu(v,src6,v,vl);
+      __riscv_vse8_v_u8mf8 (out + 100*i,v,vl);
+    }
+}
+
+void f23 (uint16_t *base,uint8_t *out,size_t vl, int n)
+{
+    for (int i = 0; i < n; i++){
+      vuint8mf8_t v = __riscv_vle8_v_u8mf8 ((uint8_t *)(base + 1000 * i), vl);
+      vuint8mf8_t v2 = __riscv_vle8_v_u8mf8 ((uint8_t *)(base + 2000 * i), vl);
+      vuint16mf4_t src1 = __riscv_vle16_v_u16mf4 (base + 100*i, vl);
+      vuint16mf4_t src2 = __riscv_vle16_v_u16mf4 (base + 200*i, vl);
+      vuint16mf4_t src3 = __riscv_vle16_v_u16mf4 (base + 300*i, vl);
+      vuint16mf4_t src4 = __riscv_vle16_v_u16mf4 (base + 400*i, vl);
+      vuint16mf4_t src5 = __riscv_vle16_v_u16mf4 (base + 500*i, vl);
+      vuint16mf4_t src6 = __riscv_vle16_v_u16mf4 (base + 600*i, vl);
+      v = __riscv_vnsrl_wv_u8mf8_tu(v,src1,v2,vl);
+      v = __riscv_vnsrl_wv_u8mf8_tu(v,src2,v2,vl);
+      v = __riscv_vnsrl_wv_u8mf8_tu(v,src3,v2,vl);
+      v = __riscv_vnsrl_wv_u8mf8_tu(v,src4,v2,vl);
+      v = __riscv_vnsrl_wv_u8mf8_tu(v,src5,v2,vl);
+      v = __riscv_vnsrl_wv_u8mf8_tu(v,src6,v2,vl);
+      __riscv_vse8_v_u8mf8 (out + 100*i,v,vl);
+    }
+}
+
+void f24 (void *base,void *base2,void *out,size_t vl, int n)
+{
+    vuint32mf2_t src = __riscv_vle32_v_u32mf2 (base + 100, vl);
+    vuint16mf4_t src2 = __riscv_vle16_v_u16mf4 (base + 200, vl);
+    vuint8mf8_t src3 = __riscv_vle8_v_u8mf8 (base + 300, vl);
+    for (int i = 0; i < n; i++){
+      vbool64_t m = __riscv_vlm_v_b64 (base + i, vl);
+      vuint16mf4_t v = __riscv_vnsrl_wv_u16mf4_m(m,src,src2,vl);
+      vuint16mf4_t v2 = __riscv_vle16_v_u16mf4_tu (v, base2 + i, vl);
+      vuint8mf8_t v3 = __riscv_vnsrl_wv_u8mf8_m(m,v2,src3,vl);
+      __riscv_vse8_v_u8mf8 (out + 100*i,v3,vl);
+    }
+}
+
+void f25 (void *base,void *out,size_t vl, size_t shift)
+{
+    vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base, vl);
+    asm volatile("#" ::
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",  
+		   "v26", "v27", "v28", "v29", "v30");
+    vuint8mf8_t v = __riscv_vle8_v_u8mf8 (base + 100, vl);
+    vuint8mf8_t v2 = __riscv_vnsrl_wv_u8mf8(src,v,vl);
+    asm volatile("#" ::
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",  
+		   "v26", "v27", "v28", "v29", "v30");
+    __riscv_vse8_v_u8mf8 (out,v2,vl);
+    asm volatile("#" ::
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",  
+		   "v26", "v27", "v28", "v29", "v30");
+}
+
+void f26 (void *base,void *out,size_t vl, size_t shift)
+{
+    vuint8mf8_t v = __riscv_vle8_v_u8mf8 (base + 100, vl);
+    asm volatile("#" ::
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",  
+		   "v26", "v27", "v28", "v29", "v30");
+    vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base, vl);
+    vuint8mf8_t v2 = __riscv_vnsrl_wv_u8mf8(src,v,vl);
+    asm volatile("#" ::
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",  
+		   "v26", "v27", "v28", "v29", "v30");
+    __riscv_vse8_v_u8mf8 (out,v2,vl);
+    asm volatile("#" ::
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",  
+		   "v26", "v27", "v28", "v29", "v30");
+}
+
+/* { dg-final { scan-assembler-not {vmv} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
+
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/narrow_constraint-3.c b/gcc/testsuite/gcc.target/riscv/rvv/base/narrow_constraint-3.c
new file mode 100644
index 00000000000..72bff02c518
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/narrow_constraint-3.c
@@ -0,0 +1,392 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3 -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void f0 (uint16_t *base,uint8_t *out,size_t vl, size_t shift)
+{
+    vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
+    vuint8m1_t v = __riscv_vnsrl_wx_u8m1(src,shift,vl);
+    v = __riscv_vnsrl_wv_u8m1(src,v,vl);
+    __riscv_vse8_v_u8m1 (out,v,vl);
+}
+
+void f1 (uint16_t *base,uint8_t *out,size_t vl, size_t shift)
+{
+    vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
+    vuint8m1_t v = __riscv_vnsrl_wx_u8m1(src,shift,vl);
+    v = __riscv_vnsrl_wv_u8m1(src,v,vl);
+    __riscv_vse8_v_u8m1 (out,v,vl);
+}
+
+void f2 (void *base,void *out,size_t vl, size_t shift)
+{
+    vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
+    vuint8m1_t v = __riscv_vnsrl_wx_u8m1(src,shift,vl);
+    v = __riscv_vnsrl_wv_u8m1_tu(v,src,v,vl);
+    __riscv_vse8_v_u8m1 (out,v,vl);
+    __riscv_vse16_v_u16m2 (out+100,src,vl);
+}
+
+void f3 (void *base,void *out,size_t vl, size_t shift)
+{
+    vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
+    vuint8m1_t v = __riscv_vnsrl_wx_u8m1(src,shift,vl);
+    v = __riscv_vnsrl_wv_u8m1(src,v,vl);
+    __riscv_vse8_v_u8m1 (out,v,vl);
+    __riscv_vse16_v_u16m2 (out+100,src,vl);
+}
+
+void f4 (void *base,void *out,size_t vl, size_t shift)
+{
+    vbool8_t m = __riscv_vlm_v_b8 (base + 500, vl);
+    vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
+    vuint8m1_t v = __riscv_vnsrl_wx_u8m1(src,shift,vl);
+    v = __riscv_vnsrl_wv_u8m1_tumu(m,v,src,v,vl);
+    __riscv_vse8_v_u8m1 (out,v,vl);
+    __riscv_vse16_v_u16m2 (out+100,src,vl);
+}
+
+void f5 (void *base,void *out,size_t vl, size_t shift)
+{
+    vbool8_t m = __riscv_vlm_v_b8 (base + 500, vl);
+    vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
+    vuint8m1_t v = __riscv_vnsrl_wx_u8m1(src,shift,vl);
+    v = __riscv_vnsrl_wv_u8m1_m(m,src,v,vl);
+    __riscv_vse8_v_u8m1 (out,v,vl);
+    __riscv_vse16_v_u16m2 (out+100,src,vl);
+}
+
+void f6 (void *base,void *out,size_t vl, size_t shift)
+{
+    vbool8_t m = __riscv_vlm_v_b8 (base + 500, vl);
+    vuint8m1_t v = __riscv_vle8_v_u8m1 (base + 600, vl);
+    vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
+    vuint8m1_t v2 = __riscv_vnsrl_wv_u8m1_m(m,src,v,vl);
+    __riscv_vse8_v_u8m1 (out,v2,vl);
+    __riscv_vse8_v_u8m1 (out+100,v,vl);
+}
+
+void f7 (void *base,void *out,size_t vl, size_t shift)
+{
+    vuint8m1_t v = __riscv_vle8_v_u8m1 (base + 600, vl);
+    vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
+    vuint8m1_t v2 = __riscv_vnsrl_wx_u8m1(src,shift,vl);
+    v2 = __riscv_vnsrl_wv_u8m1 (src,v,vl);
+    __riscv_vse8_v_u8m1 (out,v2,vl);
+    __riscv_vse8_v_u8m1 (out+100,v,vl);
+}
+
+void f8 (void *base,void *out,size_t vl, size_t shift)
+{
+    vuint8m1_t v = __riscv_vle8_v_u8m1 (base + 600, vl);
+    vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
+    vuint8m1_t v2 = __riscv_vnsrl_wx_u8m1(src,shift,vl);
+    v2 = __riscv_vnsrl_wv_u8m1 (src,v,vl);
+    __riscv_vse8_v_u8m1 (out,v2,vl);
+    __riscv_vse8_v_u8m1 (out+100,v,vl);
+    __riscv_vse16_v_u16m2 (out+200,src,vl);
+}
+
+void f9 (void *base,void *out,size_t vl, size_t shift)
+{
+    vuint8m1_t v = __riscv_vle8_v_u8m1 (base + 600, vl);
+    vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
+    vuint8m1_t v2 = __riscv_vnsrl_wx_u8m1(src,shift,vl);
+    v2 = __riscv_vnsrl_wv_u8m1_tu (v2,src,v,vl);
+    __riscv_vse8_v_u8m1 (out,v2,vl);
+    __riscv_vse8_v_u8m1 (out+100,v,vl);
+    __riscv_vse16_v_u16m2 (out+200,src,vl);
+}
+
+void f10 (void *base,void *out,size_t vl, size_t shift)
+{
+    vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
+    vuint8m1_t v = __riscv_vnsrl_wx_u8m1(src,shift,vl);
+    v = __riscv_vnsrl_wv_u8m1_tu(v,src,v,vl);
+    v = __riscv_vnsrl_wv_u8m1_tu(v,src,v,vl);
+    v = __riscv_vnsrl_wv_u8m1_tu(v,src,v,vl);
+    v = __riscv_vnsrl_wv_u8m1_tu(v,src,v,vl);
+    __riscv_vse8_v_u8m1 (out,v,vl);
+    __riscv_vse16_v_u16m2 (out+100,src,vl);
+}
+
+void f11 (void *base,void *out,size_t vl, size_t shift)
+{
+    vuint8m1_t v = __riscv_vle8_v_u8m1 (base + 600, vl);
+    vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
+    vuint8m1_t v2 = __riscv_vnsrl_wx_u8m1(src,shift,vl);
+    v2 = __riscv_vnsrl_wv_u8m1_tu (v2,src,v,vl);
+    v2 = __riscv_vnsrl_wv_u8m1_tu (v2,src,v,vl);
+    v2 = __riscv_vnsrl_wv_u8m1_tu (v2,src,v,vl);
+    v2 = __riscv_vnsrl_wv_u8m1_tu (v2,src,v,vl);
+    v2 = __riscv_vnsrl_wv_u8m1_tu (v2,src,v,vl);
+    __riscv_vse8_v_u8m1 (out,v2,vl);
+    __riscv_vse8_v_u8m1 (out+100,v,vl);
+    __riscv_vse16_v_u16m2 (out+200,src,vl);
+}
+
+void f12 (void *base,void *out,size_t vl, size_t shift)
+{
+    vuint8m1_t v = __riscv_vle8_v_u8m1 (base + 600, vl);
+    vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
+    vuint8m1_t v2 = __riscv_vnsrl_wx_u8m1(src,shift,vl);
+    v2 = __riscv_vnsrl_wv_u8m1(src,v2,vl);
+    v2 = __riscv_vnsrl_wv_u8m1(src,v2,vl);
+    v2 = __riscv_vnsrl_wv_u8m1(src,v2,vl);
+    v2 = __riscv_vnsrl_wv_u8m1 (src,v2,vl);
+    __riscv_vse8_v_u8m1 (out,v2,vl);
+    __riscv_vse8_v_u8m1 (out+100,v,vl);
+}
+
+void f13 (void *base,void *base2,void *out,size_t vl, int n)
+{
+    vuint16m2_t src = __riscv_vle16_v_u16m2 (base + 100, vl);
+    for (int i = 0; i < n; i++){
+      vbool8_t m = __riscv_vlm_v_b8 (base + i, vl);
+      vuint8m1_t v = __riscv_vnsrl_wx_u8m1_m(m,src,vl,vl);
+      v = __riscv_vnsrl_wv_u8m1_tu(v,src,v,vl);
+      v = __riscv_vle8_v_u8m1_tu (v, base2, vl);
+      __riscv_vse8_v_u8m1 (out + 100*i,v,vl);
+    }
+}
+
+void f14 (void *base,void *base2,void *out,size_t vl, int n)
+{
+    vuint16m2_t src = __riscv_vle16_v_u16m2 (base + 100, vl);
+    for (int i = 0; i < n; i++){
+      vbool8_t m = __riscv_vlm_v_b8 (base + i, vl);
+      vuint8m1_t v = __riscv_vle8_v_u8m1 (base + 600, vl);
+      vuint8m1_t v2 = __riscv_vnsrl_wv_u8m1(src,v,vl);
+      v = __riscv_vle8_v_u8m1_tu (v, base2, vl);
+      __riscv_vse8_v_u8m1 (out + 100*i,v,vl);
+      __riscv_vse8_v_u8m1 (out + 100*i,v2,vl);
+    }
+}
+
+void f15 (void *base,void *base2,void *out,size_t vl, int n)
+{
+    vuint16m2_t src = __riscv_vle16_v_u16m2 (base + 100, vl);
+    for (int i = 0; i < n; i++){
+      vbool8_t m = __riscv_vlm_v_b8 (base + i, vl);
+      vuint8m1_t v = __riscv_vle8_v_u8m1 (base + 600, vl);
+      vuint8m1_t v2 = __riscv_vnsrl_wv_u8m1(src,v,vl);
+      v = __riscv_vnsrl_wv_u8m1(src,v,vl);
+      v = __riscv_vnsrl_wv_u8m1(src,v,vl);
+      v = __riscv_vnsrl_wv_u8m1(src,v,vl);
+      v = __riscv_vnsrl_wv_u8m1(src,v,vl);
+      v = __riscv_vle8_v_u8m1_tu (v, base2, vl);
+      __riscv_vse8_v_u8m1 (out + 100*i,v,vl);
+      __riscv_vse8_v_u8m1 (out + 100*i,v2,vl);
+    }
+}
+
+void f16 (uint16_t *base,uint8_t *out,size_t vl, size_t shift)
+{
+    vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
+    vuint8m1_t v = __riscv_vncvt_x_x_w_u8m1(src,vl);
+    vuint8m1_t v3 = __riscv_vnsrl_wv_u8m1(src,v,vl);
+    __riscv_vse8_v_u8m1 (out,v,vl);
+    __riscv_vse8_v_u8m1 (out + 100,v3,vl);
+}
+
+void f17 (void *base,void *out,size_t vl, int n)
+{
+    for (int i = 0; i < n; i++){
+      vuint16m2_t src = __riscv_vle16_v_u16m2 (base + 100*i, vl);
+      vuint8m1_t src2 = __riscv_vle8_v_u8m1 (base + 200*i, vl);
+      vuint8m1_t v = __riscv_vnsrl_wv_u8m1(src,src2,vl);
+      vuint16m2_t v2 = __riscv_vadd_vv_u16m2 (src, src,vl);
+      asm volatile ("":::"memory");
+      __riscv_vse8_v_u8m1 (out + 100*i,v,vl);
+      __riscv_vse16_v_u16m2 ((out + 200*i),src,vl);
+      __riscv_vse8_v_u8m1 ((out + 300*i),src2,vl);
+    }
+}
+
+void f18 (void *base,void *out,size_t vl, int n)
+{
+    vuint8m1_t v = __riscv_vle8_v_u8m1 ((base + 1000), vl);
+    for (int i = 0; i < n; i++){
+      vuint16m2_t src = __riscv_vle16_v_u16m2 (base + 100*i, vl);
+      v = __riscv_vnsrl_wv_u8m1_tu(v,src,v,vl);
+      v = __riscv_vnsrl_wv_u8m1_tu(v,src,v,vl);
+      v = __riscv_vnsrl_wv_u8m1_tu(v,src,v,vl);
+      v = __riscv_vnsrl_wv_u8m1_tu(v,src,v,vl);
+      v = __riscv_vnsrl_wv_u8m1_tu(v,src,v,vl);
+      v = __riscv_vnsrl_wv_u8m1_tu(v,src,v,vl);
+      __riscv_vse8_v_u8m1 (out + 100*i,v,vl);
+    }
+}
+
+void f19 (void *base,void *out,size_t vl, int n)
+{
+    vuint8m1_t v = __riscv_vle8_v_u8m1 ((base + 1000), vl);
+    for (int i = 0; i < n; i++){
+      vuint16m2_t src = __riscv_vle16_v_u16m2 (base + 100*i, vl);
+      v = __riscv_vnsrl_wv_u8m1(src,v,vl);
+      vuint8m1_t v2 = __riscv_vnsrl_wv_u8m1(src,v,vl);
+      v2 = __riscv_vnsrl_wv_u8m1(src,v2,vl);
+      v2 = __riscv_vnsrl_wv_u8m1(src,v2,vl);
+      v2 = __riscv_vnsrl_wv_u8m1(src,v2,vl);
+      v2 = __riscv_vnsrl_wv_u8m1(src,v2,vl);
+      __riscv_vse8_v_u8m1 (out + 100*i,v,vl);
+      __riscv_vse8_v_u8m1 (out + 200*i,v2,vl);
+    }
+}
+
+void f20 (void *base,void *out,size_t vl, int n)
+{
+    vuint8m1_t v = __riscv_vle8_v_u8m1 ((base + 1000), vl);
+    for (int i = 0; i < n; i++){
+      vuint16m2_t src = __riscv_vle16_v_u16m2 (base + 100*i, vl);
+      v = __riscv_vnsrl_wv_u8m1(src,v,vl);
+      vuint8m1_t v2 = __riscv_vnsrl_wv_u8m1(src,v,vl);
+      v2 = __riscv_vnsrl_wv_u8m1(src,v2,vl);
+      __riscv_vse8_v_u8m1 (out + 100*i,v,vl);
+      __riscv_vse8_v_u8m1 (out + 200*i,v2,vl);
+    }
+}
+
+void f21 (void *base,void *out,size_t vl, int n)
+{
+    for (int i = 0; i < n; i++){
+      vuint8m1_t v = __riscv_vle8_v_u8m1 ((base + 1000 * i), vl);
+      vuint16m2_t src = __riscv_vle16_v_u16m2 (base + 100*i, vl);
+      v = __riscv_vnsrl_wv_u8m1_tu(v,src,v,vl);
+      v = __riscv_vnsrl_wv_u8m1_tu(v,src,v,vl);
+      v = __riscv_vnsrl_wv_u8m1_tu(v,src,v,vl);
+      v = __riscv_vnsrl_wv_u8m1_tu(v,src,v,vl);
+      v = __riscv_vnsrl_wv_u8m1_tu(v,src,v,vl);
+      v = __riscv_vnsrl_wv_u8m1_tu(v,src,v,vl);
+      __riscv_vse8_v_u8m1 (out + 100*i,v,vl);
+      __riscv_vse16_v_u16m2 (out + 200*i,src,vl);
+    }
+}
+
+void f22 (uint16_t *base,uint8_t *out,size_t vl, int n)
+{
+    for (int i = 0; i < n; i++){
+      vuint8m1_t v = __riscv_vle8_v_u8m1 ((uint8_t *)(base + 1000 * i), vl);
+      vuint16m2_t src1 = __riscv_vle16_v_u16m2 (base + 100*i, vl);
+      vuint16m2_t src2 = __riscv_vle16_v_u16m2 (base + 200*i, vl);
+      vuint16m2_t src3 = __riscv_vle16_v_u16m2 (base + 300*i, vl);
+      vuint16m2_t src4 = __riscv_vle16_v_u16m2 (base + 400*i, vl);
+      vuint16m2_t src5 = __riscv_vle16_v_u16m2 (base + 500*i, vl);
+      vuint16m2_t src6 = __riscv_vle16_v_u16m2 (base + 600*i, vl);
+      v = __riscv_vnsrl_wv_u8m1_tu(v,src1,v,vl);
+      v = __riscv_vnsrl_wv_u8m1_tu(v,src2,v,vl);
+      v = __riscv_vnsrl_wv_u8m1_tu(v,src3,v,vl);
+      v = __riscv_vnsrl_wv_u8m1_tu(v,src4,v,vl);
+      v = __riscv_vnsrl_wv_u8m1_tu(v,src5,v,vl);
+      v = __riscv_vnsrl_wv_u8m1_tu(v,src6,v,vl);
+      __riscv_vse8_v_u8m1 (out + 100*i,v,vl);
+    }
+}
+
+void f23 (uint16_t *base,uint8_t *out,size_t vl, int n)
+{
+    for (int i = 0; i < n; i++){
+      vuint8m1_t v = __riscv_vle8_v_u8m1 ((uint8_t *)(base + 1000 * i), vl);
+      vuint8m1_t v2 = __riscv_vle8_v_u8m1 ((uint8_t *)(base + 2000 * i), vl);
+      vuint16m2_t src1 = __riscv_vle16_v_u16m2 (base + 100*i, vl);
+      vuint16m2_t src2 = __riscv_vle16_v_u16m2 (base + 200*i, vl);
+      vuint16m2_t src3 = __riscv_vle16_v_u16m2 (base + 300*i, vl);
+      vuint16m2_t src4 = __riscv_vle16_v_u16m2 (base + 400*i, vl);
+      vuint16m2_t src5 = __riscv_vle16_v_u16m2 (base + 500*i, vl);
+      vuint16m2_t src6 = __riscv_vle16_v_u16m2 (base + 600*i, vl);
+      v = __riscv_vnsrl_wv_u8m1_tu(v,src1,v2,vl);
+      v = __riscv_vnsrl_wv_u8m1_tu(v,src2,v2,vl);
+      v = __riscv_vnsrl_wv_u8m1_tu(v,src3,v2,vl);
+      v = __riscv_vnsrl_wv_u8m1_tu(v,src4,v2,vl);
+      v = __riscv_vnsrl_wv_u8m1_tu(v,src5,v2,vl);
+      v = __riscv_vnsrl_wv_u8m1_tu(v,src6,v2,vl);
+      __riscv_vse8_v_u8m1 (out + 100*i,v,vl);
+    }
+}
+
+void f24 (void *base,void *base2,void *out,size_t vl, int n)
+{
+    vuint32m4_t src = __riscv_vle32_v_u32m4 (base + 100, vl);
+    vuint16m2_t src2 = __riscv_vle16_v_u16m2 (base + 200, vl);
+    vuint8m1_t src3 = __riscv_vle8_v_u8m1 (base + 300, vl);
+    for (int i = 0; i < n; i++){
+      vbool8_t m = __riscv_vlm_v_b8 (base + i, vl);
+      vuint16m2_t v = __riscv_vnsrl_wv_u16m2_m(m,src,src2,vl);
+      vuint16m2_t v2 = __riscv_vle16_v_u16m2_tu (v, base2 + i, vl);
+      vuint8m1_t v3 = __riscv_vnsrl_wv_u8m1_m(m,v2,src3,vl);
+      __riscv_vse8_v_u8m1 (out + 100*i,v3,vl);
+    }
+}
+
+void f25 (void *base,void *out,size_t vl, size_t shift)
+{
+    vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
+    asm volatile("#" ::
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",  
+		   "v26", "v27", "v28", "v29");
+    vuint8m1_t v = __riscv_vle8_v_u8m1 (base + 100, vl);
+    vuint8m1_t v2 = __riscv_vnsrl_wv_u8m1(src,v,vl);
+    asm volatile("#" ::
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",  
+		   "v26", "v27", "v28", "v29", "v31");
+    __riscv_vse8_v_u8m1 (out,v2,vl);
+    asm volatile("#" ::
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",  
+		   "v26", "v27", "v28", "v29", "v31");
+}
+
+void f26 (void *base,void *out,size_t vl, size_t shift)
+{
+    vuint8m1_t v = __riscv_vle8_v_u8m1 (base + 100, vl);
+    vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
+    asm volatile("#" ::
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",  
+		   "v26", "v27", "v28");
+    vuint8m1_t v2 = __riscv_vnsrl_wv_u8m1(src,v,vl);
+    asm volatile("#" ::
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",  
+		   "v26", "v27", "v28", "v30", "v31");
+    __riscv_vse8_v_u8m1 (out,v2,vl);
+    asm volatile("#" ::
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",  
+		   "v26", "v27", "v28", "v30", "v31");
+}
+
+void f27 (void *base,void *out,size_t vl, size_t shift)
+{
+    vuint8m1_t v = __riscv_vle8_v_u8m1 (base + 100, vl);
+    vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
+    asm volatile("#" ::
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",  
+		   "v26", "v27", "v28");
+    vuint8m1_t v2 = __riscv_vnsrl_wv_u8m1(src,v,vl);
+    asm volatile("#" ::
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",  
+		   "v26", "v29", "v28", "v30", "v31");
+    __riscv_vse8_v_u8m1 (out,v2,vl);
+    asm volatile("#" ::
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",  
+		   "v26", "v29", "v28", "v30", "v31");
+}
+
+/* { dg-final { scan-assembler-not {vmv} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/narrow_constraint-4.c b/gcc/testsuite/gcc.target/riscv/rvv/base/narrow_constraint-4.c
new file mode 100644
index 00000000000..28971a0aad8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/narrow_constraint-4.c
@@ -0,0 +1,319 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+
+#include "riscv_vector.h"
+
+void f0 (int16_t *base,int8_t *out,size_t vl, size_t shift)
+{
+    vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base, vl);
+    vuint8mf8_t v = __riscv_vnsrl_wx_u8mf8(src,shift,vl);
+    __riscv_vse8_v_u8mf8 (out,v,vl);
+}
+
+void f1 (int16_t *base,int8_t *out,size_t vl, size_t shift)
+{
+    vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base, vl);
+    vuint8mf8_t src2 = __riscv_vle8_v_u8mf8 ((int8_t *)(base + 100), vl);
+    vuint8mf8_t v = __riscv_vnsrl_wx_u8mf8_tu(src2,src,shift,vl);
+    __riscv_vse8_v_u8mf8 (out,v,vl);
+}
+
+void f2 (int16_t *base,int8_t *out,size_t vl, size_t shift)
+{
+    vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base, vl);
+    vuint8mf8_t v = __riscv_vnsrl_wx_u8mf8(src,shift,vl);
+    vuint16mf4_t v2 = __riscv_vadd_vv_u16mf4 (src, src,vl);
+    __riscv_vse8_v_u8mf8 (out,v,vl);
+    __riscv_vse16_v_u16mf4 ((int16_t *)out,v2,vl);
+}
+
+void f3 (int16_t *base,int8_t *out,size_t vl, int n, size_t shift)
+{
+    for (int i = 0; i < n; i++){
+      vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base + 100*i, vl);
+      vuint8mf8_t v = __riscv_vnsrl_wx_u8mf8(src,shift,vl);
+      vuint16mf4_t v2 = __riscv_vadd_vv_u16mf4 (src, src,vl);
+      __riscv_vse8_v_u8mf8 (out + 100*i,v,vl);
+      __riscv_vse16_v_u16mf4 ((int16_t *)(out + 200*i),v2,vl);
+    }
+}
+
+void f4 (int16_t *base,int8_t *out,size_t vl, size_t shift)
+{
+    vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base, vl);
+    vuint8mf8_t v = __riscv_vnsrl_wx_u8mf8(src,shift,vl);
+    v = __riscv_vnsrl_wx_u8mf8_tu(v,src,shift,vl);
+    v = __riscv_vnsrl_wx_u8mf8_tu(v,src,shift,vl);
+    vuint16mf4_t v2 = __riscv_vadd_vv_u16mf4 (src, src,vl);
+    __riscv_vse8_v_u8mf8 (out,v,vl);
+    __riscv_vse16_v_u16mf4 ((int16_t *)out,v2,vl);
+}
+
+void f5 (void *base,void *base2,void *out,size_t vl, int n, size_t shift)
+{
+    vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base + 100, vl);
+    for (int i = 0; i < n; i++){
+      vbool64_t m = __riscv_vlm_v_b64 (base + i, vl);
+      vuint8mf8_t v = __riscv_vnsrl_wx_u8mf8_m(m,src,shift,vl);
+      v = __riscv_vnsrl_wx_u8mf8_tu(v,src,shift,vl);
+      v = __riscv_vle8_v_u8mf8_tu (v, base2, vl);
+      __riscv_vse8_v_u8mf8 (out + 100*i,v,vl);
+    }
+}
+
+void f6 (int16_t *base,int8_t *out,size_t vl, size_t shift)
+{
+    vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
+    vuint8m1_t v = __riscv_vnsrl_wx_u8m1(src,shift,vl);
+    __riscv_vse8_v_u8m1 (out,v,vl);
+}
+
+void f7 (int16_t *base,int8_t *out,size_t vl, size_t shift)
+{
+    vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
+    vuint8m1_t src2 = __riscv_vle8_v_u8m1 ((int8_t *)(base + 100), vl);
+    vuint8m1_t v = __riscv_vnsrl_wx_u8m1_tu(src2,src,shift,vl);
+    __riscv_vse8_v_u8m1 (out,v,vl);
+}
+
+void f8 (int16_t *base,int8_t *out,size_t vl, size_t shift)
+{
+    vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
+    vuint8m1_t v = __riscv_vnsrl_wx_u8m1(src,shift,vl);
+    vuint16m2_t v2 = __riscv_vadd_vv_u16m2 (src, src,vl);
+    __riscv_vse8_v_u8m1 (out,v,vl);
+    __riscv_vse16_v_u16m2 ((int16_t *)out,v2,vl);
+}
+
+void f9 (int16_t *base,int8_t *out,size_t vl, int n, size_t shift)
+{
+    for (int i = 0; i < n; i++){
+      vuint16m2_t src = __riscv_vle16_v_u16m2 (base + 100*i, vl);
+      vuint8m1_t v = __riscv_vnsrl_wx_u8m1(src,shift,vl);
+      vuint16m2_t v2 = __riscv_vadd_vv_u16m2 (src, src,vl);
+      __riscv_vse8_v_u8m1 (out + 100*i,v,vl);
+      __riscv_vse16_v_u16m2 ((int16_t *)(out + 200*i),v2,vl);
+    }
+}
+
+void f10 (int16_t *base,int8_t *out,size_t vl, size_t shift)
+{
+    vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
+    vuint8m1_t v = __riscv_vnsrl_wx_u8m1(src,shift,vl);
+    v = __riscv_vnsrl_wx_u8m1_tu(v,src,shift,vl);
+    v = __riscv_vnsrl_wx_u8m1_tu(v,src,shift,vl);
+    vuint16m2_t v2 = __riscv_vadd_vv_u16m2 (src, src,vl);
+    __riscv_vse8_v_u8m1 (out,v,vl);
+    __riscv_vse16_v_u16m2 ((int16_t *)out,v2,vl);
+}
+
+void f11 (void *base,void *base2,void *out,size_t vl, int n, size_t shift)
+{
+    vuint16m2_t src = __riscv_vle16_v_u16m2 (base + 100, vl);
+    for (int i = 0; i < n; i++){
+      vbool8_t m = __riscv_vlm_v_b8 (base + i, vl);
+      vuint8m1_t v = __riscv_vnsrl_wx_u8m1_m(m,src,shift,vl);
+      v = __riscv_vnsrl_wx_u8m1_tu(v,src,shift,vl);
+      v = __riscv_vle8_v_u8m1_tu (v, base2, vl);
+      __riscv_vse8_v_u8m1 (out + 100*i,v,vl);
+    }
+}
+
+void f12 (int16_t *base,int8_t *out,size_t vl, int n, size_t shift)
+{
+    vuint8mf8_t v = __riscv_vle8_v_u8mf8 ((int8_t *)(base + 1000), vl);
+    for (int i = 0; i < n; i++){
+      vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base + 100*i, vl);
+      v = __riscv_vnsrl_wx_u8mf8_tu(v,src,shift,vl);
+      v = __riscv_vnsrl_wx_u8mf8_tu(v,src,shift,vl);
+      v = __riscv_vnsrl_wx_u8mf8_tu(v,src,shift,vl);
+      v = __riscv_vnsrl_wx_u8mf8_tu(v,src,shift,vl);
+      v = __riscv_vnsrl_wx_u8mf8_tu(v,src,shift,vl);
+      v = __riscv_vnsrl_wx_u8mf8_tu(v,src,shift,vl);
+      __riscv_vse8_v_u8mf8 (out + 100*i,v,vl);
+    }
+}
+
+void f13 (int16_t *base,int8_t *out,size_t vl, int n, size_t shift)
+{
+    vuint8m1_t v = __riscv_vle8_v_u8m1 ((int8_t *)(base + 1000), vl);
+    for (int i = 0; i < n; i++){
+      vuint16m2_t src = __riscv_vle16_v_u16m2 (base + 100*i, vl);
+      v = __riscv_vnsrl_wx_u8m1_tu(v,src,shift,vl);
+      v = __riscv_vnsrl_wx_u8m1_tu(v,src,shift,vl);
+      v = __riscv_vnsrl_wx_u8m1_tu(v,src,shift,vl);
+      v = __riscv_vnsrl_wx_u8m1_tu(v,src,shift,vl);
+      v = __riscv_vnsrl_wx_u8m1_tu(v,src,shift,vl);
+      v = __riscv_vnsrl_wx_u8m1_tu(v,src,shift,vl);
+      __riscv_vse8_v_u8m1 (out + 100*i,v,vl);
+    }
+}
+
+void f14 (int16_t *base,int8_t *out,size_t vl, int n, size_t shift)
+{
+    for (int i = 0; i < n; i++){
+      vuint8mf8_t v = __riscv_vle8_v_u8mf8 ((int8_t *)(base + 1000 * i), vl);
+      vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base + 100*i, vl);
+      v = __riscv_vnsrl_wx_u8mf8_tu(v,src,shift,vl);
+      v = __riscv_vnsrl_wx_u8mf8_tu(v,src,shift,vl);
+      v = __riscv_vnsrl_wx_u8mf8_tu(v,src,shift,vl);
+      v = __riscv_vnsrl_wx_u8mf8_tu(v,src,shift,vl);
+      v = __riscv_vnsrl_wx_u8mf8_tu(v,src,shift,vl);
+      v = __riscv_vnsrl_wx_u8mf8_tu(v,src,shift,vl);
+      __riscv_vse8_v_u8mf8 (out + 100*i,v,vl);
+    }
+}
+
+void f15 (int16_t *base,int8_t *out,size_t vl, int n, size_t shift)
+{
+    for (int i = 0; i < n; i++){
+      vuint8m1_t v = __riscv_vle8_v_u8m1 ((int8_t *)(base + 1000 * i), vl);
+      vuint16m2_t src = __riscv_vle16_v_u16m2 (base + 100*i, vl);
+      v = __riscv_vnsrl_wx_u8m1_tu(v,src,shift,vl);
+      v = __riscv_vnsrl_wx_u8m1_tu(v,src,shift,vl);
+      v = __riscv_vnsrl_wx_u8m1_tu(v,src,shift,vl);
+      v = __riscv_vnsrl_wx_u8m1_tu(v,src,shift,vl);
+      v = __riscv_vnsrl_wx_u8m1_tu(v,src,shift,vl);
+      v = __riscv_vnsrl_wx_u8m1_tu(v,src,shift,vl);
+      __riscv_vse8_v_u8m1 (out + 100*i,v,vl);
+    }
+}
+
+void f16 (int16_t *base,int8_t *out,size_t vl, int n, size_t shift)
+{
+    for (int i = 0; i < n; i++){
+      vuint8mf8_t v = __riscv_vle8_v_u8mf8 ((int8_t *)(base + 1000 * i), vl);
+      vuint16mf4_t src1 = __riscv_vle16_v_u16mf4 (base + 100*i, vl);
+      vuint16mf4_t src2 = __riscv_vle16_v_u16mf4 (base + 200*i, vl);
+      vuint16mf4_t src3 = __riscv_vle16_v_u16mf4 (base + 300*i, vl);
+      vuint16mf4_t src4 = __riscv_vle16_v_u16mf4 (base + 400*i, vl);
+      vuint16mf4_t src5 = __riscv_vle16_v_u16mf4 (base + 500*i, vl);
+      vuint16mf4_t src6 = __riscv_vle16_v_u16mf4 (base + 600*i, vl);
+      v = __riscv_vnsrl_wx_u8mf8_tu(v,src1,shift,vl);
+      v = __riscv_vnsrl_wx_u8mf8_tu(v,src2,shift,vl);
+      v = __riscv_vnsrl_wx_u8mf8_tu(v,src3,shift,vl);
+      v = __riscv_vnsrl_wx_u8mf8_tu(v,src4,shift,vl);
+      v = __riscv_vnsrl_wx_u8mf8_tu(v,src5,shift,vl);
+      v = __riscv_vnsrl_wx_u8mf8_tu(v,src6,shift,vl);
+      __riscv_vse8_v_u8mf8 (out + 100*i,v,vl);
+    }
+}
+
+void f17 (int16_t *base,int8_t *out,size_t vl, int n, size_t shift)
+{
+    for (int i = 0; i < n; i++){
+      vuint8m1_t v = __riscv_vle8_v_u8m1 ((int8_t *)(base + 1000 * i), vl);
+      vuint16m2_t src1 = __riscv_vle16_v_u16m2 (base + 100*i, vl);
+      vuint16m2_t src2 = __riscv_vle16_v_u16m2 (base + 200*i, vl);
+      vuint16m2_t src3 = __riscv_vle16_v_u16m2 (base + 300*i, vl);
+      vuint16m2_t src4 = __riscv_vle16_v_u16m2 (base + 400*i, vl);
+      vuint16m2_t src5 = __riscv_vle16_v_u16m2 (base + 500*i, vl);
+      vuint16m2_t src6 = __riscv_vle16_v_u16m2 (base + 600*i, vl);
+      v = __riscv_vnsrl_wx_u8m1_tu(v,src1,shift,vl);
+      v = __riscv_vnsrl_wx_u8m1_tu(v,src2,shift,vl);
+      v = __riscv_vnsrl_wx_u8m1_tu(v,src3,shift,vl);
+      v = __riscv_vnsrl_wx_u8m1_tu(v,src4,shift,vl);
+      v = __riscv_vnsrl_wx_u8m1_tu(v,src5,shift,vl);
+      v = __riscv_vnsrl_wx_u8m1_tu(v,src6,shift,vl);
+      __riscv_vse8_v_u8m1 (out + 100*i,v,vl);
+    }
+}
+
+void f18 (void *base,void *base2,void *out,size_t vl, int n, size_t shift)
+{
+    vuint32mf2_t src = __riscv_vle32_v_u32mf2 (base + 100, vl);
+    for (int i = 0; i < n; i++){
+      vbool64_t m = __riscv_vlm_v_b64 (base + i, vl);
+      vuint16mf4_t v = __riscv_vnsrl_wx_u16mf4_m(m,src,shift,vl);
+      vuint16mf4_t v2 = __riscv_vle16_v_u16mf4_tu (v, base2 + i, vl);
+      vuint8mf8_t v3 = __riscv_vnsrl_wx_u8mf8_m(m,v2,shift,vl);
+      __riscv_vse8_v_u8mf8 (out + 100*i,v3,vl);
+    }
+}
+
+void f19 (void *base,void *base2,void *out,size_t vl, int n, size_t shift)
+{
+    vuint32m4_t src = __riscv_vle32_v_u32m4 (base + 100, vl);
+    for (int i = 0; i < n; i++){
+      vbool8_t m = __riscv_vlm_v_b8 (base + i, vl);
+      vuint16m2_t v = __riscv_vnsrl_wx_u16m2_m(m,src,shift,vl);
+      vuint16m2_t v2 = __riscv_vle16_v_u16m2_tu (v, base2 + i, vl);
+      vuint8m1_t v3 = __riscv_vnsrl_wx_u8m1_m(m,v2,shift,vl);
+      vuint8m1_t v4 = __riscv_vnsrl_wx_u8m1_tumu(m,v3,v2,shift,vl);
+      __riscv_vse8_v_u8m1 (out + 100*i,v3,vl);
+      __riscv_vse8_v_u8m1 (out + 222*i,v4,vl);
+    }
+}
+
+void f20 (int16_t *base,int8_t *out,size_t vl, size_t shift)
+{
+    vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
+    /* Only allow load v30,v31.  */
+    asm volatile("#" ::
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",  
+		   "v26", "v27", "v28", "v29");
+
+    vuint8m1_t v = __riscv_vnsrl_wx_u8m1(src,shift,vl);
+    /* Only allow vncvt SRC == DEST v30.  */
+    asm volatile("#" ::                                                        
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", 
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",     
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",     
+		   "v26", "v27", "v28", "v29", "v31");
+
+    __riscv_vse8_v_u8m1 (out,v,vl);
+}
+
+void f21 (int16_t *base,int8_t *out,size_t vl, size_t shift)
+{
+    vuint16m1_t src = __riscv_vle16_v_u16m1 (base, vl);
+    /* Only allow load v31.  */
+    asm volatile("#" ::
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",  
+		   "v26", "v27", "v28", "v29", "v30");
+
+    vuint8mf2_t v = __riscv_vnsrl_wx_u8mf2(src,shift,vl);
+    /* Only allow vncvt SRC == DEST v31.  */
+    asm volatile("#" ::
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",  
+		   "v26", "v27", "v28", "v29", "v30");
+
+    __riscv_vse8_v_u8mf2 (out,v,vl);
+}
+
+void f22 (int16_t *base,int8_t *out,size_t vl, size_t shift)
+{
+    vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
+    /* Only allow load v30,v31.  */
+    asm volatile("#" ::
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",  
+		   "v26", "v27", "v28", "v29");
+
+    vuint8m1_t v = __riscv_vnsrl_wx_u8m1(src,shift,vl);
+    /* Only allow v29.  */
+    asm volatile("#" ::                                                        
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", 
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",     
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",     
+		   "v26", "v27", "v28", "v30", "v31");
+    v = __riscv_vadd_vv_u8m1 (v,v,vl);
+    /* Only allow v29.  */
+    asm volatile("#" ::                                                        
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", 
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",     
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",     
+		   "v26", "v27", "v28", "v30", "v31");
+
+    __riscv_vse8_v_u8m1 (out,v,vl);
+}
+
+/* { dg-final { scan-assembler-not {vmv} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/narrow_constraint-5.c b/gcc/testsuite/gcc.target/riscv/rvv/base/narrow_constraint-5.c
new file mode 100644
index 00000000000..26675bcc87c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/narrow_constraint-5.c
@@ -0,0 +1,319 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+
+#include "riscv_vector.h"
+
+void f0 (int16_t *base,int8_t *out,size_t vl, size_t shift)
+{
+    vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base, vl);
+    vuint8mf8_t v = __riscv_vnsrl_wx_u8mf8(src,31,vl);
+    __riscv_vse8_v_u8mf8 (out,v,vl);
+}
+
+void f1 (int16_t *base,int8_t *out,size_t vl, size_t shift)
+{
+    vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base, vl);
+    vuint8mf8_t src2 = __riscv_vle8_v_u8mf8 ((int8_t *)(base + 100), vl);
+    vuint8mf8_t v = __riscv_vnsrl_wx_u8mf8_tu(src2,src,31,vl);
+    __riscv_vse8_v_u8mf8 (out,v,vl);
+}
+
+void f2 (int16_t *base,int8_t *out,size_t vl, size_t shift)
+{
+    vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base, vl);
+    vuint8mf8_t v = __riscv_vnsrl_wx_u8mf8(src,31,vl);
+    vuint16mf4_t v2 = __riscv_vadd_vv_u16mf4 (src, src,vl);
+    __riscv_vse8_v_u8mf8 (out,v,vl);
+    __riscv_vse16_v_u16mf4 ((int16_t *)out,v2,vl);
+}
+
+void f3 (int16_t *base,int8_t *out,size_t vl, int n, size_t shift)
+{
+    for (int i = 0; i < n; i++){
+      vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base + 100*i, vl);
+      vuint8mf8_t v = __riscv_vnsrl_wx_u8mf8(src,31,vl);
+      vuint16mf4_t v2 = __riscv_vadd_vv_u16mf4 (src, src,vl);
+      __riscv_vse8_v_u8mf8 (out + 100*i,v,vl);
+      __riscv_vse16_v_u16mf4 ((int16_t *)(out + 200*i),v2,vl);
+    }
+}
+
+void f4 (int16_t *base,int8_t *out,size_t vl, size_t shift)
+{
+    vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base, vl);
+    vuint8mf8_t v = __riscv_vnsrl_wx_u8mf8(src,31,vl);
+    v = __riscv_vnsrl_wx_u8mf8_tu(v,src,31,vl);
+    v = __riscv_vnsrl_wx_u8mf8_tu(v,src,31,vl);
+    vuint16mf4_t v2 = __riscv_vadd_vv_u16mf4 (src, src,vl);
+    __riscv_vse8_v_u8mf8 (out,v,vl);
+    __riscv_vse16_v_u16mf4 ((int16_t *)out,v2,vl);
+}
+
+void f5 (void *base,void *base2,void *out,size_t vl, int n, size_t shift)
+{
+    vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base + 100, vl);
+    for (int i = 0; i < n; i++){
+      vbool64_t m = __riscv_vlm_v_b64 (base + i, vl);
+      vuint8mf8_t v = __riscv_vnsrl_wx_u8mf8_m(m,src,31,vl);
+      v = __riscv_vnsrl_wx_u8mf8_tu(v,src,31,vl);
+      v = __riscv_vle8_v_u8mf8_tu (v, base2, vl);
+      __riscv_vse8_v_u8mf8 (out + 100*i,v,vl);
+    }
+}
+
+void f6 (int16_t *base,int8_t *out,size_t vl, size_t shift)
+{
+    vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
+    vuint8m1_t v = __riscv_vnsrl_wx_u8m1(src,31,vl);
+    __riscv_vse8_v_u8m1 (out,v,vl);
+}
+
+void f7 (int16_t *base,int8_t *out,size_t vl, size_t shift)
+{
+    vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
+    vuint8m1_t src2 = __riscv_vle8_v_u8m1 ((int8_t *)(base + 100), vl);
+    vuint8m1_t v = __riscv_vnsrl_wx_u8m1_tu(src2,src,31,vl);
+    __riscv_vse8_v_u8m1 (out,v,vl);
+}
+
+void f8 (int16_t *base,int8_t *out,size_t vl, size_t shift)
+{
+    vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
+    vuint8m1_t v = __riscv_vnsrl_wx_u8m1(src,31,vl);
+    vuint16m2_t v2 = __riscv_vadd_vv_u16m2 (src, src,vl);
+    __riscv_vse8_v_u8m1 (out,v,vl);
+    __riscv_vse16_v_u16m2 ((int16_t *)out,v2,vl);
+}
+
+void f9 (int16_t *base,int8_t *out,size_t vl, int n, size_t shift)
+{
+    for (int i = 0; i < n; i++){
+      vuint16m2_t src = __riscv_vle16_v_u16m2 (base + 100*i, vl);
+      vuint8m1_t v = __riscv_vnsrl_wx_u8m1(src,31,vl);
+      vuint16m2_t v2 = __riscv_vadd_vv_u16m2 (src, src,vl);
+      __riscv_vse8_v_u8m1 (out + 100*i,v,vl);
+      __riscv_vse16_v_u16m2 ((int16_t *)(out + 200*i),v2,vl);
+    }
+}
+
+void f10 (int16_t *base,int8_t *out,size_t vl, size_t shift)
+{
+    vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
+    vuint8m1_t v = __riscv_vnsrl_wx_u8m1(src,31,vl);
+    v = __riscv_vnsrl_wx_u8m1_tu(v,src,31,vl);
+    v = __riscv_vnsrl_wx_u8m1_tu(v,src,31,vl);
+    vuint16m2_t v2 = __riscv_vadd_vv_u16m2 (src, src,vl);
+    __riscv_vse8_v_u8m1 (out,v,vl);
+    __riscv_vse16_v_u16m2 ((int16_t *)out,v2,vl);
+}
+
+void f11 (void *base,void *base2,void *out,size_t vl, int n, size_t shift)
+{
+    vuint16m2_t src = __riscv_vle16_v_u16m2 (base + 100, vl);
+    for (int i = 0; i < n; i++){
+      vbool8_t m = __riscv_vlm_v_b8 (base + i, vl);
+      vuint8m1_t v = __riscv_vnsrl_wx_u8m1_m(m,src,31,vl);
+      v = __riscv_vnsrl_wx_u8m1_tu(v,src,31,vl);
+      v = __riscv_vle8_v_u8m1_tu (v, base2, vl);
+      __riscv_vse8_v_u8m1 (out + 100*i,v,vl);
+    }
+}
+
+void f12 (int16_t *base,int8_t *out,size_t vl, int n, size_t shift)
+{
+    vuint8mf8_t v = __riscv_vle8_v_u8mf8 ((int8_t *)(base + 1000), vl);
+    for (int i = 0; i < n; i++){
+      vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base + 100*i, vl);
+      v = __riscv_vnsrl_wx_u8mf8_tu(v,src,31,vl);
+      v = __riscv_vnsrl_wx_u8mf8_tu(v,src,31,vl);
+      v = __riscv_vnsrl_wx_u8mf8_tu(v,src,31,vl);
+      v = __riscv_vnsrl_wx_u8mf8_tu(v,src,31,vl);
+      v = __riscv_vnsrl_wx_u8mf8_tu(v,src,31,vl);
+      v = __riscv_vnsrl_wx_u8mf8_tu(v,src,31,vl);
+      __riscv_vse8_v_u8mf8 (out + 100*i,v,vl);
+    }
+}
+
+void f13 (int16_t *base,int8_t *out,size_t vl, int n, size_t shift)
+{
+    vuint8m1_t v = __riscv_vle8_v_u8m1 ((int8_t *)(base + 1000), vl);
+    for (int i = 0; i < n; i++){
+      vuint16m2_t src = __riscv_vle16_v_u16m2 (base + 100*i, vl);
+      v = __riscv_vnsrl_wx_u8m1_tu(v,src,31,vl);
+      v = __riscv_vnsrl_wx_u8m1_tu(v,src,31,vl);
+      v = __riscv_vnsrl_wx_u8m1_tu(v,src,31,vl);
+      v = __riscv_vnsrl_wx_u8m1_tu(v,src,31,vl);
+      v = __riscv_vnsrl_wx_u8m1_tu(v,src,31,vl);
+      v = __riscv_vnsrl_wx_u8m1_tu(v,src,31,vl);
+      __riscv_vse8_v_u8m1 (out + 100*i,v,vl);
+    }
+}
+
+void f14 (int16_t *base,int8_t *out,size_t vl, int n, size_t shift)
+{
+    for (int i = 0; i < n; i++){
+      vuint8mf8_t v = __riscv_vle8_v_u8mf8 ((int8_t *)(base + 1000 * i), vl);
+      vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base + 100*i, vl);
+      v = __riscv_vnsrl_wx_u8mf8_tu(v,src,31,vl);
+      v = __riscv_vnsrl_wx_u8mf8_tu(v,src,31,vl);
+      v = __riscv_vnsrl_wx_u8mf8_tu(v,src,31,vl);
+      v = __riscv_vnsrl_wx_u8mf8_tu(v,src,31,vl);
+      v = __riscv_vnsrl_wx_u8mf8_tu(v,src,31,vl);
+      v = __riscv_vnsrl_wx_u8mf8_tu(v,src,31,vl);
+      __riscv_vse8_v_u8mf8 (out + 100*i,v,vl);
+    }
+}
+
+void f15 (int16_t *base,int8_t *out,size_t vl, int n, size_t shift)
+{
+    for (int i = 0; i < n; i++){
+      vuint8m1_t v = __riscv_vle8_v_u8m1 ((int8_t *)(base + 1000 * i), vl);
+      vuint16m2_t src = __riscv_vle16_v_u16m2 (base + 100*i, vl);
+      v = __riscv_vnsrl_wx_u8m1_tu(v,src,31,vl);
+      v = __riscv_vnsrl_wx_u8m1_tu(v,src,31,vl);
+      v = __riscv_vnsrl_wx_u8m1_tu(v,src,31,vl);
+      v = __riscv_vnsrl_wx_u8m1_tu(v,src,31,vl);
+      v = __riscv_vnsrl_wx_u8m1_tu(v,src,31,vl);
+      v = __riscv_vnsrl_wx_u8m1_tu(v,src,31,vl);
+      __riscv_vse8_v_u8m1 (out + 100*i,v,vl);
+    }
+}
+
+void f16 (int16_t *base,int8_t *out,size_t vl, int n, size_t shift)
+{
+    for (int i = 0; i < n; i++){
+      vuint8mf8_t v = __riscv_vle8_v_u8mf8 ((int8_t *)(base + 1000 * i), vl);
+      vuint16mf4_t src1 = __riscv_vle16_v_u16mf4 (base + 100*i, vl);
+      vuint16mf4_t src2 = __riscv_vle16_v_u16mf4 (base + 200*i, vl);
+      vuint16mf4_t src3 = __riscv_vle16_v_u16mf4 (base + 300*i, vl);
+      vuint16mf4_t src4 = __riscv_vle16_v_u16mf4 (base + 400*i, vl);
+      vuint16mf4_t src5 = __riscv_vle16_v_u16mf4 (base + 500*i, vl);
+      vuint16mf4_t src6 = __riscv_vle16_v_u16mf4 (base + 600*i, vl);
+      v = __riscv_vnsrl_wx_u8mf8_tu(v,src1,31,vl);
+      v = __riscv_vnsrl_wx_u8mf8_tu(v,src2,31,vl);
+      v = __riscv_vnsrl_wx_u8mf8_tu(v,src3,31,vl);
+      v = __riscv_vnsrl_wx_u8mf8_tu(v,src4,31,vl);
+      v = __riscv_vnsrl_wx_u8mf8_tu(v,src5,31,vl);
+      v = __riscv_vnsrl_wx_u8mf8_tu(v,src6,31,vl);
+      __riscv_vse8_v_u8mf8 (out + 100*i,v,vl);
+    }
+}
+
+void f17 (int16_t *base,int8_t *out,size_t vl, int n, size_t shift)
+{
+    for (int i = 0; i < n; i++){
+      vuint8m1_t v = __riscv_vle8_v_u8m1 ((int8_t *)(base + 1000 * i), vl);
+      vuint16m2_t src1 = __riscv_vle16_v_u16m2 (base + 100*i, vl);
+      vuint16m2_t src2 = __riscv_vle16_v_u16m2 (base + 200*i, vl);
+      vuint16m2_t src3 = __riscv_vle16_v_u16m2 (base + 300*i, vl);
+      vuint16m2_t src4 = __riscv_vle16_v_u16m2 (base + 400*i, vl);
+      vuint16m2_t src5 = __riscv_vle16_v_u16m2 (base + 500*i, vl);
+      vuint16m2_t src6 = __riscv_vle16_v_u16m2 (base + 600*i, vl);
+      v = __riscv_vnsrl_wx_u8m1_tu(v,src1,31,vl);
+      v = __riscv_vnsrl_wx_u8m1_tu(v,src2,31,vl);
+      v = __riscv_vnsrl_wx_u8m1_tu(v,src3,31,vl);
+      v = __riscv_vnsrl_wx_u8m1_tu(v,src4,31,vl);
+      v = __riscv_vnsrl_wx_u8m1_tu(v,src5,31,vl);
+      v = __riscv_vnsrl_wx_u8m1_tu(v,src6,31,vl);
+      __riscv_vse8_v_u8m1 (out + 100*i,v,vl);
+    }
+}
+
+void f18 (void *base,void *base2,void *out,size_t vl, int n, size_t shift)
+{
+    vuint32mf2_t src = __riscv_vle32_v_u32mf2 (base + 100, vl);
+    for (int i = 0; i < n; i++){
+      vbool64_t m = __riscv_vlm_v_b64 (base + i, vl);
+      vuint16mf4_t v = __riscv_vnsrl_wx_u16mf4_m(m,src,31,vl);
+      vuint16mf4_t v2 = __riscv_vle16_v_u16mf4_tu (v, base2 + i, vl);
+      vuint8mf8_t v3 = __riscv_vnsrl_wx_u8mf8_m(m,v2,31,vl);
+      __riscv_vse8_v_u8mf8 (out + 100*i,v3,vl);
+    }
+}
+
+void f19 (void *base,void *base2,void *out,size_t vl, int n, size_t shift)
+{
+    vuint32m4_t src = __riscv_vle32_v_u32m4 (base + 100, vl);
+    for (int i = 0; i < n; i++){
+      vbool8_t m = __riscv_vlm_v_b8 (base + i, vl);
+      vuint16m2_t v = __riscv_vnsrl_wx_u16m2_m(m,src,31,vl);
+      vuint16m2_t v2 = __riscv_vle16_v_u16m2_tu (v, base2 + i, vl);
+      vuint8m1_t v3 = __riscv_vnsrl_wx_u8m1_m(m,v2,31,vl);
+      vuint8m1_t v4 = __riscv_vnsrl_wx_u8m1_tumu(m,v3,v2,31,vl);
+      __riscv_vse8_v_u8m1 (out + 100*i,v3,vl);
+      __riscv_vse8_v_u8m1 (out + 222*i,v4,vl);
+    }
+}
+
+void f20 (int16_t *base,int8_t *out,size_t vl, size_t shift)
+{
+    vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
+    /* Only allow load v30,v31.  */
+    asm volatile("#" ::
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",  
+		   "v26", "v27", "v28", "v29");
+
+    vuint8m1_t v = __riscv_vnsrl_wx_u8m1(src,31,vl);
+    /* Only allow vncvt SRC == DEST v30.  */
+    asm volatile("#" ::                                                        
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", 
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",     
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",     
+		   "v26", "v27", "v28", "v29", "v31");
+
+    __riscv_vse8_v_u8m1 (out,v,vl);
+}
+
+void f21 (int16_t *base,int8_t *out,size_t vl, size_t shift)
+{
+    vuint16m1_t src = __riscv_vle16_v_u16m1 (base, vl);
+    /* Only allow load v31.  */
+    asm volatile("#" ::
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",  
+		   "v26", "v27", "v28", "v29", "v30");
+
+    vuint8mf2_t v = __riscv_vnsrl_wx_u8mf2(src,31,vl);
+    /* Only allow vncvt SRC == DEST v31.  */
+    asm volatile("#" ::
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",  
+		   "v26", "v27", "v28", "v29", "v30");
+
+    __riscv_vse8_v_u8mf2 (out,v,vl);
+}
+
+void f22 (int16_t *base,int8_t *out,size_t vl, size_t shift)
+{
+    vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
+    /* Only allow load v30,v31.  */
+    asm volatile("#" ::
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",  
+		   "v26", "v27", "v28", "v29");
+
+    vuint8m1_t v = __riscv_vnsrl_wx_u8m1(src,31,vl);
+    /* Only allow v29.  */
+    asm volatile("#" ::                                                        
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", 
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",     
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",     
+		   "v26", "v27", "v28", "v30", "v31");
+    v = __riscv_vadd_vv_u8m1 (v,v,vl);
+    /* Only allow v29.  */
+    asm volatile("#" ::                                                        
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", 
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",     
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",     
+		   "v26", "v27", "v28", "v30", "v31");
+
+    __riscv_vse8_v_u8m1 (out,v,vl);
+}
+
+/* { dg-final { scan-assembler-not {vmv} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/narrow_constraint-6.c b/gcc/testsuite/gcc.target/riscv/rvv/base/narrow_constraint-6.c
new file mode 100644
index 00000000000..fd7ffd3c97b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/narrow_constraint-6.c
@@ -0,0 +1,369 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3 -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void f0 (uint16_t *base,uint8_t *out,size_t vl, size_t shift)
+{
+    vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base, vl);
+    vuint8mf8_t v = __riscv_vnclipu_wx_u8mf8(src,shift,vl);
+    v = __riscv_vnclipu_wv_u8mf8(src,v,vl);
+    __riscv_vse8_v_u8mf8 (out,v,vl);
+}
+
+void f1 (uint16_t *base,uint8_t *out,size_t vl, size_t shift)
+{
+    vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base, vl);
+    vuint8mf8_t v = __riscv_vnclipu_wx_u8mf8(src,shift,vl);
+    v = __riscv_vnclipu_wv_u8mf8(src,v,vl);
+    __riscv_vse8_v_u8mf8 (out,v,vl);
+}
+
+void f2 (void *base,void *out,size_t vl, size_t shift)
+{
+    vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base, vl);
+    vuint8mf8_t v = __riscv_vnclipu_wx_u8mf8(src,shift,vl);
+    v = __riscv_vnclipu_wv_u8mf8_tu(v,src,v,vl);
+    __riscv_vse8_v_u8mf8 (out,v,vl);
+    __riscv_vse16_v_u16mf4 (out+100,src,vl);
+}
+
+void f3 (void *base,void *out,size_t vl, size_t shift)
+{
+    vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base, vl);
+    vuint8mf8_t v = __riscv_vnclipu_wx_u8mf8(src,shift,vl);
+    v = __riscv_vnclipu_wv_u8mf8(src,v,vl);
+    __riscv_vse8_v_u8mf8 (out,v,vl);
+    __riscv_vse16_v_u16mf4 (out+100,src,vl);
+}
+
+void f4 (void *base,void *out,size_t vl, size_t shift)
+{
+    vbool64_t m = __riscv_vlm_v_b64 (base + 500, vl);
+    vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base, vl);
+    vuint8mf8_t v = __riscv_vnclipu_wx_u8mf8(src,shift,vl);
+    v = __riscv_vnclipu_wv_u8mf8_tumu(m,v,src,v,vl);
+    __riscv_vse8_v_u8mf8 (out,v,vl);
+    __riscv_vse16_v_u16mf4 (out+100,src,vl);
+}
+
+void f5 (void *base,void *out,size_t vl, size_t shift)
+{
+    vbool64_t m = __riscv_vlm_v_b64 (base + 500, vl);
+    vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base, vl);
+    vuint8mf8_t v = __riscv_vnclipu_wx_u8mf8(src,shift,vl);
+    v = __riscv_vnclipu_wv_u8mf8_m(m,src,v,vl);
+    __riscv_vse8_v_u8mf8 (out,v,vl);
+    __riscv_vse16_v_u16mf4 (out+100,src,vl);
+}
+
+void f6 (void *base,void *out,size_t vl, size_t shift)
+{
+    vbool64_t m = __riscv_vlm_v_b64 (base + 500, vl);
+    vuint8mf8_t v = __riscv_vle8_v_u8mf8 (base + 600, vl);
+    vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base, vl);
+    vuint8mf8_t v2 = __riscv_vnclipu_wv_u8mf8_m(m,src,v,vl);
+    __riscv_vse8_v_u8mf8 (out,v2,vl);
+    __riscv_vse8_v_u8mf8 (out+100,v,vl);
+}
+
+void f7 (void *base,void *out,size_t vl, size_t shift)
+{
+    vuint8mf8_t v = __riscv_vle8_v_u8mf8 (base + 600, vl);
+    vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base, vl);
+    vuint8mf8_t v2 = __riscv_vnclipu_wx_u8mf8(src,shift,vl);
+    v2 = __riscv_vnclipu_wv_u8mf8 (src,v,vl);
+    __riscv_vse8_v_u8mf8 (out,v2,vl);
+    __riscv_vse8_v_u8mf8 (out+100,v,vl);
+}
+
+void f8 (void *base,void *out,size_t vl, size_t shift)
+{
+    vuint8mf8_t v = __riscv_vle8_v_u8mf8 (base + 600, vl);
+    vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base, vl);
+    vuint8mf8_t v2 = __riscv_vnclipu_wx_u8mf8(src,shift,vl);
+    v2 = __riscv_vnclipu_wv_u8mf8 (src,v,vl);
+    __riscv_vse8_v_u8mf8 (out,v2,vl);
+    __riscv_vse8_v_u8mf8 (out+100,v,vl);
+    __riscv_vse16_v_u16mf4 (out+200,src,vl);
+}
+
+void f9 (void *base,void *out,size_t vl, size_t shift)
+{
+    vuint8mf8_t v = __riscv_vle8_v_u8mf8 (base + 600, vl);
+    vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base, vl);
+    vuint8mf8_t v2 = __riscv_vnclipu_wx_u8mf8(src,shift,vl);
+    v2 = __riscv_vnclipu_wv_u8mf8_tu (v2,src,v,vl);
+    __riscv_vse8_v_u8mf8 (out,v2,vl);
+    __riscv_vse8_v_u8mf8 (out+100,v,vl);
+    __riscv_vse16_v_u16mf4 (out+200,src,vl);
+}
+
+void f10 (void *base,void *out,size_t vl, size_t shift)
+{
+    vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base, vl);
+    vuint8mf8_t v = __riscv_vnclipu_wx_u8mf8(src,shift,vl);
+    v = __riscv_vnclipu_wv_u8mf8_tu(v,src,v,vl);
+    v = __riscv_vnclipu_wv_u8mf8_tu(v,src,v,vl);
+    v = __riscv_vnclipu_wv_u8mf8_tu(v,src,v,vl);
+    v = __riscv_vnclipu_wv_u8mf8_tu(v,src,v,vl);
+    __riscv_vse8_v_u8mf8 (out,v,vl);
+    __riscv_vse16_v_u16mf4 (out+100,src,vl);
+}
+
+void f11 (void *base,void *out,size_t vl, size_t shift)
+{
+    vuint8mf8_t v = __riscv_vle8_v_u8mf8 (base + 600, vl);
+    vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base, vl);
+    vuint8mf8_t v2 = __riscv_vnclipu_wx_u8mf8(src,shift,vl);
+    v2 = __riscv_vnclipu_wv_u8mf8_tu (v2,src,v,vl);
+    v2 = __riscv_vnclipu_wv_u8mf8_tu (v2,src,v,vl);
+    v2 = __riscv_vnclipu_wv_u8mf8_tu (v2,src,v,vl);
+    v2 = __riscv_vnclipu_wv_u8mf8_tu (v2,src,v,vl);
+    v2 = __riscv_vnclipu_wv_u8mf8_tu (v2,src,v,vl);
+    __riscv_vse8_v_u8mf8 (out,v2,vl);
+    __riscv_vse8_v_u8mf8 (out+100,v,vl);
+    __riscv_vse16_v_u16mf4 (out+200,src,vl);
+}
+
+void f12 (void *base,void *out,size_t vl, size_t shift)
+{
+    vuint8mf8_t v = __riscv_vle8_v_u8mf8 (base + 600, vl);
+    vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base, vl);
+    vuint8mf8_t v2 = __riscv_vnclipu_wx_u8mf8(src,shift,vl);
+    v2 = __riscv_vnclipu_wv_u8mf8(src,v2,vl);
+    v2 = __riscv_vnclipu_wv_u8mf8(src,v2,vl);
+    v2 = __riscv_vnclipu_wv_u8mf8(src,v2,vl);
+    v2 = __riscv_vnclipu_wv_u8mf8 (src,v2,vl);
+    __riscv_vse8_v_u8mf8 (out,v2,vl);
+    __riscv_vse8_v_u8mf8 (out+100,v,vl);
+}
+
+void f13 (void *base,void *base2,void *out,size_t vl, int n)
+{
+    vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base + 100, vl);
+    for (int i = 0; i < n; i++){
+      vbool64_t m = __riscv_vlm_v_b64 (base + i, vl);
+      vuint8mf8_t v = __riscv_vnclipu_wx_u8mf8_m(m,src,vl,vl);
+      v = __riscv_vnclipu_wv_u8mf8_tu(v,src,v,vl);
+      v = __riscv_vle8_v_u8mf8_tu (v, base2, vl);
+      __riscv_vse8_v_u8mf8 (out + 100*i,v,vl);
+    }
+}
+
+void f14 (void *base,void *base2,void *out,size_t vl, int n)
+{
+    vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base + 100, vl);
+    for (int i = 0; i < n; i++){
+      vbool64_t m = __riscv_vlm_v_b64 (base + i, vl);
+      vuint8mf8_t v = __riscv_vle8_v_u8mf8 (base + 600, vl);
+      vuint8mf8_t v2 = __riscv_vnclipu_wv_u8mf8(src,v,vl);
+      v = __riscv_vle8_v_u8mf8_tu (v, base2, vl);
+      __riscv_vse8_v_u8mf8 (out + 100*i,v,vl);
+      __riscv_vse8_v_u8mf8 (out + 100*i,v2,vl);
+    }
+}
+
+void f15 (void *base,void *base2,void *out,size_t vl, int n)
+{
+    vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base + 100, vl);
+    for (int i = 0; i < n; i++){
+      vbool64_t m = __riscv_vlm_v_b64 (base + i, vl);
+      vuint8mf8_t v = __riscv_vle8_v_u8mf8 (base + 600, vl);
+      vuint8mf8_t v2 = __riscv_vnclipu_wv_u8mf8(src,v,vl);
+      v = __riscv_vnclipu_wv_u8mf8(src,v,vl);
+      v = __riscv_vnclipu_wv_u8mf8(src,v,vl);
+      v = __riscv_vnclipu_wv_u8mf8(src,v,vl);
+      v = __riscv_vnclipu_wv_u8mf8(src,v,vl);
+      v = __riscv_vle8_v_u8mf8_tu (v, base2, vl);
+      __riscv_vse8_v_u8mf8 (out + 100*i,v,vl);
+      __riscv_vse8_v_u8mf8 (out + 100*i,v2,vl);
+    }
+}
+
+void f16 (uint16_t *base,uint8_t *out,size_t vl, size_t shift)
+{
+    vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base, vl);
+    vuint8mf8_t v = __riscv_vncvt_x_x_w_u8mf8(src,vl);
+    vuint8mf8_t v3 = __riscv_vnclipu_wv_u8mf8(src,v,vl);
+    __riscv_vse8_v_u8mf8 (out,v,vl);
+    __riscv_vse8_v_u8mf8 (out + 100,v3,vl);
+}
+
+void f17 (void *base,void *out,size_t vl, int n)
+{
+    for (int i = 0; i < n; i++){
+      vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base + 100*i, vl);
+      vuint8mf8_t src2 = __riscv_vle8_v_u8mf8 (base + 200*i, vl);
+      vuint8mf8_t v = __riscv_vnclipu_wv_u8mf8(src,src2,vl);
+      vuint16mf4_t v2 = __riscv_vadd_vv_u16mf4 (src, src,vl);
+      asm volatile ("":::"memory");
+      __riscv_vse8_v_u8mf8 (out + 100*i,v,vl);
+      __riscv_vse16_v_u16mf4 ((out + 200*i),src,vl);
+      __riscv_vse8_v_u8mf8 ((out + 300*i),src2,vl);
+    }
+}
+
+void f18 (void *base,void *out,size_t vl, int n)
+{
+    vuint8mf8_t v = __riscv_vle8_v_u8mf8 ((base + 1000), vl);
+    for (int i = 0; i < n; i++){
+      vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base + 100*i, vl);
+      v = __riscv_vnclipu_wv_u8mf8_tu(v,src,v,vl);
+      v = __riscv_vnclipu_wv_u8mf8_tu(v,src,v,vl);
+      v = __riscv_vnclipu_wv_u8mf8_tu(v,src,v,vl);
+      v = __riscv_vnclipu_wv_u8mf8_tu(v,src,v,vl);
+      v = __riscv_vnclipu_wv_u8mf8_tu(v,src,v,vl);
+      v = __riscv_vnclipu_wv_u8mf8_tu(v,src,v,vl);
+      __riscv_vse8_v_u8mf8 (out + 100*i,v,vl);
+    }
+}
+
+void f19 (void *base,void *out,size_t vl, int n)
+{
+    vuint8mf8_t v = __riscv_vle8_v_u8mf8 ((base + 1000), vl);
+    for (int i = 0; i < n; i++){
+      vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base + 100*i, vl);
+      v = __riscv_vnclipu_wv_u8mf8(src,v,vl);
+      vuint8mf8_t v2 = __riscv_vnclipu_wv_u8mf8(src,v,vl);
+      v2 = __riscv_vnclipu_wv_u8mf8(src,v2,vl);
+      v2 = __riscv_vnclipu_wv_u8mf8(src,v2,vl);
+      v2 = __riscv_vnclipu_wv_u8mf8(src,v2,vl);
+      v2 = __riscv_vnclipu_wv_u8mf8(src,v2,vl);
+      __riscv_vse8_v_u8mf8 (out + 100*i,v,vl);
+      __riscv_vse8_v_u8mf8 (out + 200*i,v2,vl);
+    }
+}
+
+void f20 (void *base,void *out,size_t vl, int n)
+{
+    vuint8mf8_t v = __riscv_vle8_v_u8mf8 ((base + 1000), vl);
+    for (int i = 0; i < n; i++){
+      vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base + 100*i, vl);
+      v = __riscv_vnclipu_wv_u8mf8(src,v,vl);
+      vuint8mf8_t v2 = __riscv_vnclipu_wv_u8mf8(src,v,vl);
+      v2 = __riscv_vnclipu_wv_u8mf8(src,v2,vl);
+      __riscv_vse8_v_u8mf8 (out + 100*i,v,vl);
+      __riscv_vse8_v_u8mf8 (out + 200*i,v2,vl);
+    }
+}
+
+void f21 (void *base,void *out,size_t vl, int n)
+{
+    for (int i = 0; i < n; i++){
+      vuint8mf8_t v = __riscv_vle8_v_u8mf8 ((base + 1000 * i), vl);
+      vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base + 100*i, vl);
+      v = __riscv_vnclipu_wv_u8mf8_tu(v,src,v,vl);
+      v = __riscv_vnclipu_wv_u8mf8_tu(v,src,v,vl);
+      v = __riscv_vnclipu_wv_u8mf8_tu(v,src,v,vl);
+      v = __riscv_vnclipu_wv_u8mf8_tu(v,src,v,vl);
+      v = __riscv_vnclipu_wv_u8mf8_tu(v,src,v,vl);
+      v = __riscv_vnclipu_wv_u8mf8_tu(v,src,v,vl);
+      __riscv_vse8_v_u8mf8 (out + 100*i,v,vl);
+      __riscv_vse16_v_u16mf4 (out + 200*i,src,vl);
+    }
+}
+
+void f22 (uint16_t *base,uint8_t *out,size_t vl, int n)
+{
+    for (int i = 0; i < n; i++){
+      vuint8mf8_t v = __riscv_vle8_v_u8mf8 ((uint8_t *)(base + 1000 * i), vl);
+      vuint16mf4_t src1 = __riscv_vle16_v_u16mf4 (base + 100*i, vl);
+      vuint16mf4_t src2 = __riscv_vle16_v_u16mf4 (base + 200*i, vl);
+      vuint16mf4_t src3 = __riscv_vle16_v_u16mf4 (base + 300*i, vl);
+      vuint16mf4_t src4 = __riscv_vle16_v_u16mf4 (base + 400*i, vl);
+      vuint16mf4_t src5 = __riscv_vle16_v_u16mf4 (base + 500*i, vl);
+      vuint16mf4_t src6 = __riscv_vle16_v_u16mf4 (base + 600*i, vl);
+      v = __riscv_vnclipu_wv_u8mf8_tu(v,src1,v,vl);
+      v = __riscv_vnclipu_wv_u8mf8_tu(v,src2,v,vl);
+      v = __riscv_vnclipu_wv_u8mf8_tu(v,src3,v,vl);
+      v = __riscv_vnclipu_wv_u8mf8_tu(v,src4,v,vl);
+      v = __riscv_vnclipu_wv_u8mf8_tu(v,src5,v,vl);
+      v = __riscv_vnclipu_wv_u8mf8_tu(v,src6,v,vl);
+      __riscv_vse8_v_u8mf8 (out + 100*i,v,vl);
+    }
+}
+
+void f23 (uint16_t *base,uint8_t *out,size_t vl, int n)
+{
+    for (int i = 0; i < n; i++){
+      vuint8mf8_t v = __riscv_vle8_v_u8mf8 ((uint8_t *)(base + 1000 * i), vl);
+      vuint8mf8_t v2 = __riscv_vle8_v_u8mf8 ((uint8_t *)(base + 2000 * i), vl);
+      vuint16mf4_t src1 = __riscv_vle16_v_u16mf4 (base + 100*i, vl);
+      vuint16mf4_t src2 = __riscv_vle16_v_u16mf4 (base + 200*i, vl);
+      vuint16mf4_t src3 = __riscv_vle16_v_u16mf4 (base + 300*i, vl);
+      vuint16mf4_t src4 = __riscv_vle16_v_u16mf4 (base + 400*i, vl);
+      vuint16mf4_t src5 = __riscv_vle16_v_u16mf4 (base + 500*i, vl);
+      vuint16mf4_t src6 = __riscv_vle16_v_u16mf4 (base + 600*i, vl);
+      v = __riscv_vnclipu_wv_u8mf8_tu(v,src1,v2,vl);
+      v = __riscv_vnclipu_wv_u8mf8_tu(v,src2,v2,vl);
+      v = __riscv_vnclipu_wv_u8mf8_tu(v,src3,v2,vl);
+      v = __riscv_vnclipu_wv_u8mf8_tu(v,src4,v2,vl);
+      v = __riscv_vnclipu_wv_u8mf8_tu(v,src5,v2,vl);
+      v = __riscv_vnclipu_wv_u8mf8_tu(v,src6,v2,vl);
+      __riscv_vse8_v_u8mf8 (out + 100*i,v,vl);
+    }
+}
+
+void f24 (void *base,void *base2,void *out,size_t vl, int n)
+{
+    vuint32mf2_t src = __riscv_vle32_v_u32mf2 (base + 100, vl);
+    vuint16mf4_t src2 = __riscv_vle16_v_u16mf4 (base + 200, vl);
+    vuint8mf8_t src3 = __riscv_vle8_v_u8mf8 (base + 300, vl);
+    for (int i = 0; i < n; i++){
+      vbool64_t m = __riscv_vlm_v_b64 (base + i, vl);
+      vuint16mf4_t v = __riscv_vnclipu_wv_u16mf4_m(m,src,src2,vl);
+      vuint16mf4_t v2 = __riscv_vle16_v_u16mf4_tu (v, base2 + i, vl);
+      vuint8mf8_t v3 = __riscv_vnclipu_wv_u8mf8_m(m,v2,src3,vl);
+      __riscv_vse8_v_u8mf8 (out + 100*i,v3,vl);
+    }
+}
+
+void f25 (void *base,void *out,size_t vl, size_t shift)
+{
+    vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base, vl);
+    asm volatile("#" ::
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",  
+		   "v26", "v27", "v28", "v29", "v30");
+    vuint8mf8_t v = __riscv_vle8_v_u8mf8 (base + 100, vl);
+    vuint8mf8_t v2 = __riscv_vnclipu_wv_u8mf8(src,v,vl);
+    asm volatile("#" ::
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",  
+		   "v26", "v27", "v28", "v29", "v30");
+    __riscv_vse8_v_u8mf8 (out,v2,vl);
+    asm volatile("#" ::
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",  
+		   "v26", "v27", "v28", "v29", "v30");
+}
+
+void f26 (void *base,void *out,size_t vl, size_t shift)
+{
+    vuint8mf8_t v = __riscv_vle8_v_u8mf8 (base + 100, vl);
+    asm volatile("#" ::
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",  
+		   "v26", "v27", "v28", "v29", "v30");
+    vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base, vl);
+    vuint8mf8_t v2 = __riscv_vnclipu_wv_u8mf8(src,v,vl);
+    asm volatile("#" ::
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",  
+		   "v26", "v27", "v28", "v29", "v30");
+    __riscv_vse8_v_u8mf8 (out,v2,vl);
+    asm volatile("#" ::
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",  
+		   "v26", "v27", "v28", "v29", "v30");
+}
+
+/* { dg-final { scan-assembler-not {vmv} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/narrow_constraint-7.c b/gcc/testsuite/gcc.target/riscv/rvv/base/narrow_constraint-7.c
new file mode 100644
index 00000000000..70ba7d7459e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/narrow_constraint-7.c
@@ -0,0 +1,392 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3 -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void f0 (uint16_t *base,uint8_t *out,size_t vl, size_t shift)
+{
+    vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
+    vuint8m1_t v = __riscv_vnclipu_wx_u8m1(src,shift,vl);
+    v = __riscv_vnclipu_wv_u8m1(src,v,vl);
+    __riscv_vse8_v_u8m1 (out,v,vl);
+}
+
+void f1 (uint16_t *base,uint8_t *out,size_t vl, size_t shift)
+{
+    vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
+    vuint8m1_t v = __riscv_vnclipu_wx_u8m1(src,shift,vl);
+    v = __riscv_vnclipu_wv_u8m1(src,v,vl);
+    __riscv_vse8_v_u8m1 (out,v,vl);
+}
+
+void f2 (void *base,void *out,size_t vl, size_t shift)
+{
+    vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
+    vuint8m1_t v = __riscv_vnclipu_wx_u8m1(src,shift,vl);
+    v = __riscv_vnclipu_wv_u8m1_tu(v,src,v,vl);
+    __riscv_vse8_v_u8m1 (out,v,vl);
+    __riscv_vse16_v_u16m2 (out+100,src,vl);
+}
+
+void f3 (void *base,void *out,size_t vl, size_t shift)
+{
+    vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
+    vuint8m1_t v = __riscv_vnclipu_wx_u8m1(src,shift,vl);
+    v = __riscv_vnclipu_wv_u8m1(src,v,vl);
+    __riscv_vse8_v_u8m1 (out,v,vl);
+    __riscv_vse16_v_u16m2 (out+100,src,vl);
+}
+
+void f4 (void *base,void *out,size_t vl, size_t shift)
+{
+    vbool8_t m = __riscv_vlm_v_b8 (base + 500, vl);
+    vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
+    vuint8m1_t v = __riscv_vnclipu_wx_u8m1(src,shift,vl);
+    v = __riscv_vnclipu_wv_u8m1_tumu(m,v,src,v,vl);
+    __riscv_vse8_v_u8m1 (out,v,vl);
+    __riscv_vse16_v_u16m2 (out+100,src,vl);
+}
+
+void f5 (void *base,void *out,size_t vl, size_t shift)
+{
+    vbool8_t m = __riscv_vlm_v_b8 (base + 500, vl);
+    vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
+    vuint8m1_t v = __riscv_vnclipu_wx_u8m1(src,shift,vl);
+    v = __riscv_vnclipu_wv_u8m1_m(m,src,v,vl);
+    __riscv_vse8_v_u8m1 (out,v,vl);
+    __riscv_vse16_v_u16m2 (out+100,src,vl);
+}
+
+void f6 (void *base,void *out,size_t vl, size_t shift)
+{
+    vbool8_t m = __riscv_vlm_v_b8 (base + 500, vl);
+    vuint8m1_t v = __riscv_vle8_v_u8m1 (base + 600, vl);
+    vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
+    vuint8m1_t v2 = __riscv_vnclipu_wv_u8m1_m(m,src,v,vl);
+    __riscv_vse8_v_u8m1 (out,v2,vl);
+    __riscv_vse8_v_u8m1 (out+100,v,vl);
+}
+
+void f7 (void *base,void *out,size_t vl, size_t shift)
+{
+    vuint8m1_t v = __riscv_vle8_v_u8m1 (base + 600, vl);
+    vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
+    vuint8m1_t v2 = __riscv_vnclipu_wx_u8m1(src,shift,vl);
+    v2 = __riscv_vnclipu_wv_u8m1 (src,v,vl);
+    __riscv_vse8_v_u8m1 (out,v2,vl);
+    __riscv_vse8_v_u8m1 (out+100,v,vl);
+}
+
+void f8 (void *base,void *out,size_t vl, size_t shift)
+{
+    vuint8m1_t v = __riscv_vle8_v_u8m1 (base + 600, vl);
+    vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
+    vuint8m1_t v2 = __riscv_vnclipu_wx_u8m1(src,shift,vl);
+    v2 = __riscv_vnclipu_wv_u8m1 (src,v,vl);
+    __riscv_vse8_v_u8m1 (out,v2,vl);
+    __riscv_vse8_v_u8m1 (out+100,v,vl);
+    __riscv_vse16_v_u16m2 (out+200,src,vl);
+}
+
+void f9 (void *base,void *out,size_t vl, size_t shift)
+{
+    vuint8m1_t v = __riscv_vle8_v_u8m1 (base + 600, vl);
+    vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
+    vuint8m1_t v2 = __riscv_vnclipu_wx_u8m1(src,shift,vl);
+    v2 = __riscv_vnclipu_wv_u8m1_tu (v2,src,v,vl);
+    __riscv_vse8_v_u8m1 (out,v2,vl);
+    __riscv_vse8_v_u8m1 (out+100,v,vl);
+    __riscv_vse16_v_u16m2 (out+200,src,vl);
+}
+
+void f10 (void *base,void *out,size_t vl, size_t shift)
+{
+    vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
+    vuint8m1_t v = __riscv_vnclipu_wx_u8m1(src,shift,vl);
+    v = __riscv_vnclipu_wv_u8m1_tu(v,src,v,vl);
+    v = __riscv_vnclipu_wv_u8m1_tu(v,src,v,vl);
+    v = __riscv_vnclipu_wv_u8m1_tu(v,src,v,vl);
+    v = __riscv_vnclipu_wv_u8m1_tu(v,src,v,vl);
+    __riscv_vse8_v_u8m1 (out,v,vl);
+    __riscv_vse16_v_u16m2 (out+100,src,vl);
+}
+
+void f11 (void *base,void *out,size_t vl, size_t shift)
+{
+    vuint8m1_t v = __riscv_vle8_v_u8m1 (base + 600, vl);
+    vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
+    vuint8m1_t v2 = __riscv_vnclipu_wx_u8m1(src,shift,vl);
+    v2 = __riscv_vnclipu_wv_u8m1_tu (v2,src,v,vl);
+    v2 = __riscv_vnclipu_wv_u8m1_tu (v2,src,v,vl);
+    v2 = __riscv_vnclipu_wv_u8m1_tu (v2,src,v,vl);
+    v2 = __riscv_vnclipu_wv_u8m1_tu (v2,src,v,vl);
+    v2 = __riscv_vnclipu_wv_u8m1_tu (v2,src,v,vl);
+    __riscv_vse8_v_u8m1 (out,v2,vl);
+    __riscv_vse8_v_u8m1 (out+100,v,vl);
+    __riscv_vse16_v_u16m2 (out+200,src,vl);
+}
+
+void f12 (void *base,void *out,size_t vl, size_t shift)
+{
+    vuint8m1_t v = __riscv_vle8_v_u8m1 (base + 600, vl);
+    vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
+    vuint8m1_t v2 = __riscv_vnclipu_wx_u8m1(src,shift,vl);
+    v2 = __riscv_vnclipu_wv_u8m1(src,v2,vl);
+    v2 = __riscv_vnclipu_wv_u8m1(src,v2,vl);
+    v2 = __riscv_vnclipu_wv_u8m1(src,v2,vl);
+    v2 = __riscv_vnclipu_wv_u8m1 (src,v2,vl);
+    __riscv_vse8_v_u8m1 (out,v2,vl);
+    __riscv_vse8_v_u8m1 (out+100,v,vl);
+}
+
+void f13 (void *base,void *base2,void *out,size_t vl, int n)
+{
+    vuint16m2_t src = __riscv_vle16_v_u16m2 (base + 100, vl);
+    for (int i = 0; i < n; i++){
+      vbool8_t m = __riscv_vlm_v_b8 (base + i, vl);
+      vuint8m1_t v = __riscv_vnclipu_wx_u8m1_m(m,src,vl,vl);
+      v = __riscv_vnclipu_wv_u8m1_tu(v,src,v,vl);
+      v = __riscv_vle8_v_u8m1_tu (v, base2, vl);
+      __riscv_vse8_v_u8m1 (out + 100*i,v,vl);
+    }
+}
+
+void f14 (void *base,void *base2,void *out,size_t vl, int n)
+{
+    vuint16m2_t src = __riscv_vle16_v_u16m2 (base + 100, vl);
+    for (int i = 0; i < n; i++){
+      vbool8_t m = __riscv_vlm_v_b8 (base + i, vl);
+      vuint8m1_t v = __riscv_vle8_v_u8m1 (base + 600, vl);
+      vuint8m1_t v2 = __riscv_vnclipu_wv_u8m1(src,v,vl);
+      v = __riscv_vle8_v_u8m1_tu (v, base2, vl);
+      __riscv_vse8_v_u8m1 (out + 100*i,v,vl);
+      __riscv_vse8_v_u8m1 (out + 100*i,v2,vl);
+    }
+}
+
+void f15 (void *base,void *base2,void *out,size_t vl, int n)
+{
+    vuint16m2_t src = __riscv_vle16_v_u16m2 (base + 100, vl);
+    for (int i = 0; i < n; i++){
+      vbool8_t m = __riscv_vlm_v_b8 (base + i, vl);
+      vuint8m1_t v = __riscv_vle8_v_u8m1 (base + 600, vl);
+      vuint8m1_t v2 = __riscv_vnclipu_wv_u8m1(src,v,vl);
+      v = __riscv_vnclipu_wv_u8m1(src,v,vl);
+      v = __riscv_vnclipu_wv_u8m1(src,v,vl);
+      v = __riscv_vnclipu_wv_u8m1(src,v,vl);
+      v = __riscv_vnclipu_wv_u8m1(src,v,vl);
+      v = __riscv_vle8_v_u8m1_tu (v, base2, vl);
+      __riscv_vse8_v_u8m1 (out + 100*i,v,vl);
+      __riscv_vse8_v_u8m1 (out + 100*i,v2,vl);
+    }
+}
+
+void f16 (uint16_t *base,uint8_t *out,size_t vl, size_t shift)
+{
+    vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
+    vuint8m1_t v = __riscv_vncvt_x_x_w_u8m1(src,vl);
+    vuint8m1_t v3 = __riscv_vnclipu_wv_u8m1(src,v,vl);
+    __riscv_vse8_v_u8m1 (out,v,vl);
+    __riscv_vse8_v_u8m1 (out + 100,v3,vl);
+}
+
+void f17 (void *base,void *out,size_t vl, int n)
+{
+    for (int i = 0; i < n; i++){
+      vuint16m2_t src = __riscv_vle16_v_u16m2 (base + 100*i, vl);
+      vuint8m1_t src2 = __riscv_vle8_v_u8m1 (base + 200*i, vl);
+      vuint8m1_t v = __riscv_vnclipu_wv_u8m1(src,src2,vl);
+      vuint16m2_t v2 = __riscv_vadd_vv_u16m2 (src, src,vl);
+      asm volatile ("":::"memory");
+      __riscv_vse8_v_u8m1 (out + 100*i,v,vl);
+      __riscv_vse16_v_u16m2 ((out + 200*i),src,vl);
+      __riscv_vse8_v_u8m1 ((out + 300*i),src2,vl);
+    }
+}
+
+void f18 (void *base,void *out,size_t vl, int n)
+{
+    vuint8m1_t v = __riscv_vle8_v_u8m1 ((base + 1000), vl);
+    for (int i = 0; i < n; i++){
+      vuint16m2_t src = __riscv_vle16_v_u16m2 (base + 100*i, vl);
+      v = __riscv_vnclipu_wv_u8m1_tu(v,src,v,vl);
+      v = __riscv_vnclipu_wv_u8m1_tu(v,src,v,vl);
+      v = __riscv_vnclipu_wv_u8m1_tu(v,src,v,vl);
+      v = __riscv_vnclipu_wv_u8m1_tu(v,src,v,vl);
+      v = __riscv_vnclipu_wv_u8m1_tu(v,src,v,vl);
+      v = __riscv_vnclipu_wv_u8m1_tu(v,src,v,vl);
+      __riscv_vse8_v_u8m1 (out + 100*i,v,vl);
+    }
+}
+
+void f19 (void *base,void *out,size_t vl, int n)
+{
+    vuint8m1_t v = __riscv_vle8_v_u8m1 ((base + 1000), vl);
+    for (int i = 0; i < n; i++){
+      vuint16m2_t src = __riscv_vle16_v_u16m2 (base + 100*i, vl);
+      v = __riscv_vnclipu_wv_u8m1(src,v,vl);
+      vuint8m1_t v2 = __riscv_vnclipu_wv_u8m1(src,v,vl);
+      v2 = __riscv_vnclipu_wv_u8m1(src,v2,vl);
+      v2 = __riscv_vnclipu_wv_u8m1(src,v2,vl);
+      v2 = __riscv_vnclipu_wv_u8m1(src,v2,vl);
+      v2 = __riscv_vnclipu_wv_u8m1(src,v2,vl);
+      __riscv_vse8_v_u8m1 (out + 100*i,v,vl);
+      __riscv_vse8_v_u8m1 (out + 200*i,v2,vl);
+    }
+}
+
+void f20 (void *base,void *out,size_t vl, int n)
+{
+    vuint8m1_t v = __riscv_vle8_v_u8m1 ((base + 1000), vl);
+    for (int i = 0; i < n; i++){
+      vuint16m2_t src = __riscv_vle16_v_u16m2 (base + 100*i, vl);
+      v = __riscv_vnclipu_wv_u8m1(src,v,vl);
+      vuint8m1_t v2 = __riscv_vnclipu_wv_u8m1(src,v,vl);
+      v2 = __riscv_vnclipu_wv_u8m1(src,v2,vl);
+      __riscv_vse8_v_u8m1 (out + 100*i,v,vl);
+      __riscv_vse8_v_u8m1 (out + 200*i,v2,vl);
+    }
+}
+
+void f21 (void *base,void *out,size_t vl, int n)
+{
+    for (int i = 0; i < n; i++){
+      vuint8m1_t v = __riscv_vle8_v_u8m1 ((base + 1000 * i), vl);
+      vuint16m2_t src = __riscv_vle16_v_u16m2 (base + 100*i, vl);
+      v = __riscv_vnclipu_wv_u8m1_tu(v,src,v,vl);
+      v = __riscv_vnclipu_wv_u8m1_tu(v,src,v,vl);
+      v = __riscv_vnclipu_wv_u8m1_tu(v,src,v,vl);
+      v = __riscv_vnclipu_wv_u8m1_tu(v,src,v,vl);
+      v = __riscv_vnclipu_wv_u8m1_tu(v,src,v,vl);
+      v = __riscv_vnclipu_wv_u8m1_tu(v,src,v,vl);
+      __riscv_vse8_v_u8m1 (out + 100*i,v,vl);
+      __riscv_vse16_v_u16m2 (out + 200*i,src,vl);
+    }
+}
+
+void f22 (uint16_t *base,uint8_t *out,size_t vl, int n)
+{
+    for (int i = 0; i < n; i++){
+      vuint8m1_t v = __riscv_vle8_v_u8m1 ((uint8_t *)(base + 1000 * i), vl);
+      vuint16m2_t src1 = __riscv_vle16_v_u16m2 (base + 100*i, vl);
+      vuint16m2_t src2 = __riscv_vle16_v_u16m2 (base + 200*i, vl);
+      vuint16m2_t src3 = __riscv_vle16_v_u16m2 (base + 300*i, vl);
+      vuint16m2_t src4 = __riscv_vle16_v_u16m2 (base + 400*i, vl);
+      vuint16m2_t src5 = __riscv_vle16_v_u16m2 (base + 500*i, vl);
+      vuint16m2_t src6 = __riscv_vle16_v_u16m2 (base + 600*i, vl);
+      v = __riscv_vnclipu_wv_u8m1_tu(v,src1,v,vl);
+      v = __riscv_vnclipu_wv_u8m1_tu(v,src2,v,vl);
+      v = __riscv_vnclipu_wv_u8m1_tu(v,src3,v,vl);
+      v = __riscv_vnclipu_wv_u8m1_tu(v,src4,v,vl);
+      v = __riscv_vnclipu_wv_u8m1_tu(v,src5,v,vl);
+      v = __riscv_vnclipu_wv_u8m1_tu(v,src6,v,vl);
+      __riscv_vse8_v_u8m1 (out + 100*i,v,vl);
+    }
+}
+
+void f23 (uint16_t *base,uint8_t *out,size_t vl, int n)
+{
+    for (int i = 0; i < n; i++){
+      vuint8m1_t v = __riscv_vle8_v_u8m1 ((uint8_t *)(base + 1000 * i), vl);
+      vuint8m1_t v2 = __riscv_vle8_v_u8m1 ((uint8_t *)(base + 2000 * i), vl);
+      vuint16m2_t src1 = __riscv_vle16_v_u16m2 (base + 100*i, vl);
+      vuint16m2_t src2 = __riscv_vle16_v_u16m2 (base + 200*i, vl);
+      vuint16m2_t src3 = __riscv_vle16_v_u16m2 (base + 300*i, vl);
+      vuint16m2_t src4 = __riscv_vle16_v_u16m2 (base + 400*i, vl);
+      vuint16m2_t src5 = __riscv_vle16_v_u16m2 (base + 500*i, vl);
+      vuint16m2_t src6 = __riscv_vle16_v_u16m2 (base + 600*i, vl);
+      v = __riscv_vnclipu_wv_u8m1_tu(v,src1,v2,vl);
+      v = __riscv_vnclipu_wv_u8m1_tu(v,src2,v2,vl);
+      v = __riscv_vnclipu_wv_u8m1_tu(v,src3,v2,vl);
+      v = __riscv_vnclipu_wv_u8m1_tu(v,src4,v2,vl);
+      v = __riscv_vnclipu_wv_u8m1_tu(v,src5,v2,vl);
+      v = __riscv_vnclipu_wv_u8m1_tu(v,src6,v2,vl);
+      __riscv_vse8_v_u8m1 (out + 100*i,v,vl);
+    }
+}
+
+void f24 (void *base,void *base2,void *out,size_t vl, int n)
+{
+    vuint32m4_t src = __riscv_vle32_v_u32m4 (base + 100, vl);
+    vuint16m2_t src2 = __riscv_vle16_v_u16m2 (base + 200, vl);
+    vuint8m1_t src3 = __riscv_vle8_v_u8m1 (base + 300, vl);
+    for (int i = 0; i < n; i++){
+      vbool8_t m = __riscv_vlm_v_b8 (base + i, vl);
+      vuint16m2_t v = __riscv_vnclipu_wv_u16m2_m(m,src,src2,vl);
+      vuint16m2_t v2 = __riscv_vle16_v_u16m2_tu (v, base2 + i, vl);
+      vuint8m1_t v3 = __riscv_vnclipu_wv_u8m1_m(m,v2,src3,vl);
+      __riscv_vse8_v_u8m1 (out + 100*i,v3,vl);
+    }
+}
+
+void f25 (void *base,void *out,size_t vl, size_t shift)
+{
+    vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
+    asm volatile("#" ::
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",  
+		   "v26", "v27", "v28", "v29");
+    vuint8m1_t v = __riscv_vle8_v_u8m1 (base + 100, vl);
+    vuint8m1_t v2 = __riscv_vnclipu_wv_u8m1(src,v,vl);
+    asm volatile("#" ::
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",  
+		   "v26", "v27", "v28", "v29", "v31");
+    __riscv_vse8_v_u8m1 (out,v2,vl);
+    asm volatile("#" ::
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",  
+		   "v26", "v27", "v28", "v29", "v31");
+}
+
+void f26 (void *base,void *out,size_t vl, size_t shift)
+{
+    vuint8m1_t v = __riscv_vle8_v_u8m1 (base + 100, vl);
+    vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
+    asm volatile("#" ::
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",  
+		   "v26", "v27", "v28");
+    vuint8m1_t v2 = __riscv_vnclipu_wv_u8m1(src,v,vl);
+    asm volatile("#" ::
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",  
+		   "v26", "v27", "v28", "v30", "v31");
+    __riscv_vse8_v_u8m1 (out,v2,vl);
+    asm volatile("#" ::
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",  
+		   "v26", "v27", "v28", "v30", "v31");
+}
+
+void f27 (void *base,void *out,size_t vl, size_t shift)
+{
+    vuint8m1_t v = __riscv_vle8_v_u8m1 (base + 100, vl);
+    vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
+    asm volatile("#" ::
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",  
+		   "v26", "v27", "v28");
+    vuint8m1_t v2 = __riscv_vnclipu_wv_u8m1(src,v,vl);
+    asm volatile("#" ::
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",  
+		   "v26", "v29", "v28", "v30", "v31");
+    __riscv_vse8_v_u8m1 (out,v2,vl);
+    asm volatile("#" ::
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",  
+		   "v26", "v29", "v28", "v30", "v31");
+}
+
+/* { dg-final { scan-assembler-not {vmv} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/narrow_constraint-8.c b/gcc/testsuite/gcc.target/riscv/rvv/base/narrow_constraint-8.c
new file mode 100644
index 00000000000..ec8a5565bd9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/narrow_constraint-8.c
@@ -0,0 +1,319 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+
+#include "riscv_vector.h"
+
+void f0 (int16_t *base,int8_t *out,size_t vl, size_t shift)
+{
+    vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base, vl);
+    vuint8mf8_t v = __riscv_vnclipu_wx_u8mf8(src,shift,vl);
+    __riscv_vse8_v_u8mf8 (out,v,vl);
+}
+
+void f1 (int16_t *base,int8_t *out,size_t vl, size_t shift)
+{
+    vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base, vl);
+    vuint8mf8_t src2 = __riscv_vle8_v_u8mf8 ((int8_t *)(base + 100), vl);
+    vuint8mf8_t v = __riscv_vnclipu_wx_u8mf8_tu(src2,src,shift,vl);
+    __riscv_vse8_v_u8mf8 (out,v,vl);
+}
+
+void f2 (int16_t *base,int8_t *out,size_t vl, size_t shift)
+{
+    vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base, vl);
+    vuint8mf8_t v = __riscv_vnclipu_wx_u8mf8(src,shift,vl);
+    vuint16mf4_t v2 = __riscv_vadd_vv_u16mf4 (src, src,vl);
+    __riscv_vse8_v_u8mf8 (out,v,vl);
+    __riscv_vse16_v_u16mf4 ((int16_t *)out,v2,vl);
+}
+
+void f3 (int16_t *base,int8_t *out,size_t vl, int n, size_t shift)
+{
+    for (int i = 0; i < n; i++){
+      vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base + 100*i, vl);
+      vuint8mf8_t v = __riscv_vnclipu_wx_u8mf8(src,shift,vl);
+      vuint16mf4_t v2 = __riscv_vadd_vv_u16mf4 (src, src,vl);
+      __riscv_vse8_v_u8mf8 (out + 100*i,v,vl);
+      __riscv_vse16_v_u16mf4 ((int16_t *)(out + 200*i),v2,vl);
+    }
+}
+
+void f4 (int16_t *base,int8_t *out,size_t vl, size_t shift)
+{
+    vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base, vl);
+    vuint8mf8_t v = __riscv_vnclipu_wx_u8mf8(src,shift,vl);
+    v = __riscv_vnclipu_wx_u8mf8_tu(v,src,shift,vl);
+    v = __riscv_vnclipu_wx_u8mf8_tu(v,src,shift,vl);
+    vuint16mf4_t v2 = __riscv_vadd_vv_u16mf4 (src, src,vl);
+    __riscv_vse8_v_u8mf8 (out,v,vl);
+    __riscv_vse16_v_u16mf4 ((int16_t *)out,v2,vl);
+}
+
+void f5 (void *base,void *base2,void *out,size_t vl, int n, size_t shift)
+{
+    vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base + 100, vl);
+    for (int i = 0; i < n; i++){
+      vbool64_t m = __riscv_vlm_v_b64 (base + i, vl);
+      vuint8mf8_t v = __riscv_vnclipu_wx_u8mf8_m(m,src,shift,vl);
+      v = __riscv_vnclipu_wx_u8mf8_tu(v,src,shift,vl);
+      v = __riscv_vle8_v_u8mf8_tu (v, base2, vl);
+      __riscv_vse8_v_u8mf8 (out + 100*i,v,vl);
+    }
+}
+
+void f6 (int16_t *base,int8_t *out,size_t vl, size_t shift)
+{
+    vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
+    vuint8m1_t v = __riscv_vnclipu_wx_u8m1(src,shift,vl);
+    __riscv_vse8_v_u8m1 (out,v,vl);
+}
+
+void f7 (int16_t *base,int8_t *out,size_t vl, size_t shift)
+{
+    vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
+    vuint8m1_t src2 = __riscv_vle8_v_u8m1 ((int8_t *)(base + 100), vl);
+    vuint8m1_t v = __riscv_vnclipu_wx_u8m1_tu(src2,src,shift,vl);
+    __riscv_vse8_v_u8m1 (out,v,vl);
+}
+
+void f8 (int16_t *base,int8_t *out,size_t vl, size_t shift)
+{
+    vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
+    vuint8m1_t v = __riscv_vnclipu_wx_u8m1(src,shift,vl);
+    vuint16m2_t v2 = __riscv_vadd_vv_u16m2 (src, src,vl);
+    __riscv_vse8_v_u8m1 (out,v,vl);
+    __riscv_vse16_v_u16m2 ((int16_t *)out,v2,vl);
+}
+
+void f9 (int16_t *base,int8_t *out,size_t vl, int n, size_t shift)
+{
+    for (int i = 0; i < n; i++){
+      vuint16m2_t src = __riscv_vle16_v_u16m2 (base + 100*i, vl);
+      vuint8m1_t v = __riscv_vnclipu_wx_u8m1(src,shift,vl);
+      vuint16m2_t v2 = __riscv_vadd_vv_u16m2 (src, src,vl);
+      __riscv_vse8_v_u8m1 (out + 100*i,v,vl);
+      __riscv_vse16_v_u16m2 ((int16_t *)(out + 200*i),v2,vl);
+    }
+}
+
+void f10 (int16_t *base,int8_t *out,size_t vl, size_t shift)
+{
+    vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
+    vuint8m1_t v = __riscv_vnclipu_wx_u8m1(src,shift,vl);
+    v = __riscv_vnclipu_wx_u8m1_tu(v,src,shift,vl);
+    v = __riscv_vnclipu_wx_u8m1_tu(v,src,shift,vl);
+    vuint16m2_t v2 = __riscv_vadd_vv_u16m2 (src, src,vl);
+    __riscv_vse8_v_u8m1 (out,v,vl);
+    __riscv_vse16_v_u16m2 ((int16_t *)out,v2,vl);
+}
+
+void f11 (void *base,void *base2,void *out,size_t vl, int n, size_t shift)
+{
+    vuint16m2_t src = __riscv_vle16_v_u16m2 (base + 100, vl);
+    for (int i = 0; i < n; i++){
+      vbool8_t m = __riscv_vlm_v_b8 (base + i, vl);
+      vuint8m1_t v = __riscv_vnclipu_wx_u8m1_m(m,src,shift,vl);
+      v = __riscv_vnclipu_wx_u8m1_tu(v,src,shift,vl);
+      v = __riscv_vle8_v_u8m1_tu (v, base2, vl);
+      __riscv_vse8_v_u8m1 (out + 100*i,v,vl);
+    }
+}
+
+void f12 (int16_t *base,int8_t *out,size_t vl, int n, size_t shift)
+{
+    vuint8mf8_t v = __riscv_vle8_v_u8mf8 ((int8_t *)(base + 1000), vl);
+    for (int i = 0; i < n; i++){
+      vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base + 100*i, vl);
+      v = __riscv_vnclipu_wx_u8mf8_tu(v,src,shift,vl);
+      v = __riscv_vnclipu_wx_u8mf8_tu(v,src,shift,vl);
+      v = __riscv_vnclipu_wx_u8mf8_tu(v,src,shift,vl);
+      v = __riscv_vnclipu_wx_u8mf8_tu(v,src,shift,vl);
+      v = __riscv_vnclipu_wx_u8mf8_tu(v,src,shift,vl);
+      v = __riscv_vnclipu_wx_u8mf8_tu(v,src,shift,vl);
+      __riscv_vse8_v_u8mf8 (out + 100*i,v,vl);
+    }
+}
+
+void f13 (int16_t *base,int8_t *out,size_t vl, int n, size_t shift)
+{
+    vuint8m1_t v = __riscv_vle8_v_u8m1 ((int8_t *)(base + 1000), vl);
+    for (int i = 0; i < n; i++){
+      vuint16m2_t src = __riscv_vle16_v_u16m2 (base + 100*i, vl);
+      v = __riscv_vnclipu_wx_u8m1_tu(v,src,shift,vl);
+      v = __riscv_vnclipu_wx_u8m1_tu(v,src,shift,vl);
+      v = __riscv_vnclipu_wx_u8m1_tu(v,src,shift,vl);
+      v = __riscv_vnclipu_wx_u8m1_tu(v,src,shift,vl);
+      v = __riscv_vnclipu_wx_u8m1_tu(v,src,shift,vl);
+      v = __riscv_vnclipu_wx_u8m1_tu(v,src,shift,vl);
+      __riscv_vse8_v_u8m1 (out + 100*i,v,vl);
+    }
+}
+
+void f14 (int16_t *base,int8_t *out,size_t vl, int n, size_t shift)
+{
+    for (int i = 0; i < n; i++){
+      vuint8mf8_t v = __riscv_vle8_v_u8mf8 ((int8_t *)(base + 1000 * i), vl);
+      vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base + 100*i, vl);
+      v = __riscv_vnclipu_wx_u8mf8_tu(v,src,shift,vl);
+      v = __riscv_vnclipu_wx_u8mf8_tu(v,src,shift,vl);
+      v = __riscv_vnclipu_wx_u8mf8_tu(v,src,shift,vl);
+      v = __riscv_vnclipu_wx_u8mf8_tu(v,src,shift,vl);
+      v = __riscv_vnclipu_wx_u8mf8_tu(v,src,shift,vl);
+      v = __riscv_vnclipu_wx_u8mf8_tu(v,src,shift,vl);
+      __riscv_vse8_v_u8mf8 (out + 100*i,v,vl);
+    }
+}
+
+void f15 (int16_t *base,int8_t *out,size_t vl, int n, size_t shift)
+{
+    for (int i = 0; i < n; i++){
+      vuint8m1_t v = __riscv_vle8_v_u8m1 ((int8_t *)(base + 1000 * i), vl);
+      vuint16m2_t src = __riscv_vle16_v_u16m2 (base + 100*i, vl);
+      v = __riscv_vnclipu_wx_u8m1_tu(v,src,shift,vl);
+      v = __riscv_vnclipu_wx_u8m1_tu(v,src,shift,vl);
+      v = __riscv_vnclipu_wx_u8m1_tu(v,src,shift,vl);
+      v = __riscv_vnclipu_wx_u8m1_tu(v,src,shift,vl);
+      v = __riscv_vnclipu_wx_u8m1_tu(v,src,shift,vl);
+      v = __riscv_vnclipu_wx_u8m1_tu(v,src,shift,vl);
+      __riscv_vse8_v_u8m1 (out + 100*i,v,vl);
+    }
+}
+
+void f16 (int16_t *base,int8_t *out,size_t vl, int n, size_t shift)
+{
+    for (int i = 0; i < n; i++){
+      vuint8mf8_t v = __riscv_vle8_v_u8mf8 ((int8_t *)(base + 1000 * i), vl);
+      vuint16mf4_t src1 = __riscv_vle16_v_u16mf4 (base + 100*i, vl);
+      vuint16mf4_t src2 = __riscv_vle16_v_u16mf4 (base + 200*i, vl);
+      vuint16mf4_t src3 = __riscv_vle16_v_u16mf4 (base + 300*i, vl);
+      vuint16mf4_t src4 = __riscv_vle16_v_u16mf4 (base + 400*i, vl);
+      vuint16mf4_t src5 = __riscv_vle16_v_u16mf4 (base + 500*i, vl);
+      vuint16mf4_t src6 = __riscv_vle16_v_u16mf4 (base + 600*i, vl);
+      v = __riscv_vnclipu_wx_u8mf8_tu(v,src1,shift,vl);
+      v = __riscv_vnclipu_wx_u8mf8_tu(v,src2,shift,vl);
+      v = __riscv_vnclipu_wx_u8mf8_tu(v,src3,shift,vl);
+      v = __riscv_vnclipu_wx_u8mf8_tu(v,src4,shift,vl);
+      v = __riscv_vnclipu_wx_u8mf8_tu(v,src5,shift,vl);
+      v = __riscv_vnclipu_wx_u8mf8_tu(v,src6,shift,vl);
+      __riscv_vse8_v_u8mf8 (out + 100*i,v,vl);
+    }
+}
+
+void f17 (int16_t *base,int8_t *out,size_t vl, int n, size_t shift)
+{
+    for (int i = 0; i < n; i++){
+      vuint8m1_t v = __riscv_vle8_v_u8m1 ((int8_t *)(base + 1000 * i), vl);
+      vuint16m2_t src1 = __riscv_vle16_v_u16m2 (base + 100*i, vl);
+      vuint16m2_t src2 = __riscv_vle16_v_u16m2 (base + 200*i, vl);
+      vuint16m2_t src3 = __riscv_vle16_v_u16m2 (base + 300*i, vl);
+      vuint16m2_t src4 = __riscv_vle16_v_u16m2 (base + 400*i, vl);
+      vuint16m2_t src5 = __riscv_vle16_v_u16m2 (base + 500*i, vl);
+      vuint16m2_t src6 = __riscv_vle16_v_u16m2 (base + 600*i, vl);
+      v = __riscv_vnclipu_wx_u8m1_tu(v,src1,shift,vl);
+      v = __riscv_vnclipu_wx_u8m1_tu(v,src2,shift,vl);
+      v = __riscv_vnclipu_wx_u8m1_tu(v,src3,shift,vl);
+      v = __riscv_vnclipu_wx_u8m1_tu(v,src4,shift,vl);
+      v = __riscv_vnclipu_wx_u8m1_tu(v,src5,shift,vl);
+      v = __riscv_vnclipu_wx_u8m1_tu(v,src6,shift,vl);
+      __riscv_vse8_v_u8m1 (out + 100*i,v,vl);
+    }
+}
+
+void f18 (void *base,void *base2,void *out,size_t vl, int n, size_t shift)
+{
+    vuint32mf2_t src = __riscv_vle32_v_u32mf2 (base + 100, vl);
+    for (int i = 0; i < n; i++){
+      vbool64_t m = __riscv_vlm_v_b64 (base + i, vl);
+      vuint16mf4_t v = __riscv_vnclipu_wx_u16mf4_m(m,src,shift,vl);
+      vuint16mf4_t v2 = __riscv_vle16_v_u16mf4_tu (v, base2 + i, vl);
+      vuint8mf8_t v3 = __riscv_vnclipu_wx_u8mf8_m(m,v2,shift,vl);
+      __riscv_vse8_v_u8mf8 (out + 100*i,v3,vl);
+    }
+}
+
+void f19 (void *base,void *base2,void *out,size_t vl, int n, size_t shift)
+{
+    vuint32m4_t src = __riscv_vle32_v_u32m4 (base + 100, vl);
+    for (int i = 0; i < n; i++){
+      vbool8_t m = __riscv_vlm_v_b8 (base + i, vl);
+      vuint16m2_t v = __riscv_vnclipu_wx_u16m2_m(m,src,shift,vl);
+      vuint16m2_t v2 = __riscv_vle16_v_u16m2_tu (v, base2 + i, vl);
+      vuint8m1_t v3 = __riscv_vnclipu_wx_u8m1_m(m,v2,shift,vl);
+      vuint8m1_t v4 = __riscv_vnclipu_wx_u8m1_tumu(m,v3,v2,shift,vl);
+      __riscv_vse8_v_u8m1 (out + 100*i,v3,vl);
+      __riscv_vse8_v_u8m1 (out + 222*i,v4,vl);
+    }
+}
+
+void f20 (int16_t *base,int8_t *out,size_t vl, size_t shift)
+{
+    vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
+    /* Only allow load v30,v31.  */
+    asm volatile("#" ::
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",  
+		   "v26", "v27", "v28", "v29");
+
+    vuint8m1_t v = __riscv_vnclipu_wx_u8m1(src,shift,vl);
+    /* Only allow vncvt SRC == DEST v30.  */
+    asm volatile("#" ::                                                        
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", 
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",     
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",     
+		   "v26", "v27", "v28", "v29", "v31");
+
+    __riscv_vse8_v_u8m1 (out,v,vl);
+}
+
+void f21 (int16_t *base,int8_t *out,size_t vl, size_t shift)
+{
+    vuint16m1_t src = __riscv_vle16_v_u16m1 (base, vl);
+    /* Only allow load v31.  */
+    asm volatile("#" ::
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",  
+		   "v26", "v27", "v28", "v29", "v30");
+
+    vuint8mf2_t v = __riscv_vnclipu_wx_u8mf2(src,shift,vl);
+    /* Only allow vncvt SRC == DEST v31.  */
+    asm volatile("#" ::
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",  
+		   "v26", "v27", "v28", "v29", "v30");
+
+    __riscv_vse8_v_u8mf2 (out,v,vl);
+}
+
+void f22 (int16_t *base,int8_t *out,size_t vl, size_t shift)
+{
+    vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
+    /* Only allow load v30,v31.  */
+    asm volatile("#" ::
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",  
+		   "v26", "v27", "v28", "v29");
+
+    vuint8m1_t v = __riscv_vnclipu_wx_u8m1(src,shift,vl);
+    /* Only allow v29.  */
+    asm volatile("#" ::                                                        
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", 
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",     
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",     
+		   "v26", "v27", "v28", "v30", "v31");
+    v = __riscv_vadd_vv_u8m1 (v,v,vl);
+    /* Only allow v29.  */
+    asm volatile("#" ::                                                        
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", 
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",     
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",     
+		   "v26", "v27", "v28", "v30", "v31");
+
+    __riscv_vse8_v_u8m1 (out,v,vl);
+}
+
+/* { dg-final { scan-assembler-not {vmv} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/narrow_constraint-9.c b/gcc/testsuite/gcc.target/riscv/rvv/base/narrow_constraint-9.c
new file mode 100644
index 00000000000..ff34749bdb9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/narrow_constraint-9.c
@@ -0,0 +1,319 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+
+#include "riscv_vector.h"
+
+void f0 (int16_t *base,int8_t *out,size_t vl, size_t shift)
+{
+    vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base, vl);
+    vuint8mf8_t v = __riscv_vnclipu_wx_u8mf8(src,31,vl);
+    __riscv_vse8_v_u8mf8 (out,v,vl);
+}
+
+void f1 (int16_t *base,int8_t *out,size_t vl, size_t shift)
+{
+    vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base, vl);
+    vuint8mf8_t src2 = __riscv_vle8_v_u8mf8 ((int8_t *)(base + 100), vl);
+    vuint8mf8_t v = __riscv_vnclipu_wx_u8mf8_tu(src2,src,31,vl);
+    __riscv_vse8_v_u8mf8 (out,v,vl);
+}
+
+void f2 (int16_t *base,int8_t *out,size_t vl, size_t shift)
+{
+    vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base, vl);
+    vuint8mf8_t v = __riscv_vnclipu_wx_u8mf8(src,31,vl);
+    vuint16mf4_t v2 = __riscv_vadd_vv_u16mf4 (src, src,vl);
+    __riscv_vse8_v_u8mf8 (out,v,vl);
+    __riscv_vse16_v_u16mf4 ((int16_t *)out,v2,vl);
+}
+
+void f3 (int16_t *base,int8_t *out,size_t vl, int n, size_t shift)
+{
+    for (int i = 0; i < n; i++){
+      vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base + 100*i, vl);
+      vuint8mf8_t v = __riscv_vnclipu_wx_u8mf8(src,31,vl);
+      vuint16mf4_t v2 = __riscv_vadd_vv_u16mf4 (src, src,vl);
+      __riscv_vse8_v_u8mf8 (out + 100*i,v,vl);
+      __riscv_vse16_v_u16mf4 ((int16_t *)(out + 200*i),v2,vl);
+    }
+}
+
+void f4 (int16_t *base,int8_t *out,size_t vl, size_t shift)
+{
+    vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base, vl);
+    vuint8mf8_t v = __riscv_vnclipu_wx_u8mf8(src,31,vl);
+    v = __riscv_vnclipu_wx_u8mf8_tu(v,src,31,vl);
+    v = __riscv_vnclipu_wx_u8mf8_tu(v,src,31,vl);
+    vuint16mf4_t v2 = __riscv_vadd_vv_u16mf4 (src, src,vl);
+    __riscv_vse8_v_u8mf8 (out,v,vl);
+    __riscv_vse16_v_u16mf4 ((int16_t *)out,v2,vl);
+}
+
+void f5 (void *base,void *base2,void *out,size_t vl, int n, size_t shift)
+{
+    vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base + 100, vl);
+    for (int i = 0; i < n; i++){
+      vbool64_t m = __riscv_vlm_v_b64 (base + i, vl);
+      vuint8mf8_t v = __riscv_vnclipu_wx_u8mf8_m(m,src,31,vl);
+      v = __riscv_vnclipu_wx_u8mf8_tu(v,src,31,vl);
+      v = __riscv_vle8_v_u8mf8_tu (v, base2, vl);
+      __riscv_vse8_v_u8mf8 (out + 100*i,v,vl);
+    }
+}
+
+void f6 (int16_t *base,int8_t *out,size_t vl, size_t shift)
+{
+    vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
+    vuint8m1_t v = __riscv_vnclipu_wx_u8m1(src,31,vl);
+    __riscv_vse8_v_u8m1 (out,v,vl);
+}
+
+void f7 (int16_t *base,int8_t *out,size_t vl, size_t shift)
+{
+    vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
+    vuint8m1_t src2 = __riscv_vle8_v_u8m1 ((int8_t *)(base + 100), vl);
+    vuint8m1_t v = __riscv_vnclipu_wx_u8m1_tu(src2,src,31,vl);
+    __riscv_vse8_v_u8m1 (out,v,vl);
+}
+
+void f8 (int16_t *base,int8_t *out,size_t vl, size_t shift)
+{
+    vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
+    vuint8m1_t v = __riscv_vnclipu_wx_u8m1(src,31,vl);
+    vuint16m2_t v2 = __riscv_vadd_vv_u16m2 (src, src,vl);
+    __riscv_vse8_v_u8m1 (out,v,vl);
+    __riscv_vse16_v_u16m2 ((int16_t *)out,v2,vl);
+}
+
+void f9 (int16_t *base,int8_t *out,size_t vl, int n, size_t shift)
+{
+    for (int i = 0; i < n; i++){
+      vuint16m2_t src = __riscv_vle16_v_u16m2 (base + 100*i, vl);
+      vuint8m1_t v = __riscv_vnclipu_wx_u8m1(src,31,vl);
+      vuint16m2_t v2 = __riscv_vadd_vv_u16m2 (src, src,vl);
+      __riscv_vse8_v_u8m1 (out + 100*i,v,vl);
+      __riscv_vse16_v_u16m2 ((int16_t *)(out + 200*i),v2,vl);
+    }
+}
+
+void f10 (int16_t *base,int8_t *out,size_t vl, size_t shift)
+{
+    vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
+    vuint8m1_t v = __riscv_vnclipu_wx_u8m1(src,31,vl);
+    v = __riscv_vnclipu_wx_u8m1_tu(v,src,31,vl);
+    v = __riscv_vnclipu_wx_u8m1_tu(v,src,31,vl);
+    vuint16m2_t v2 = __riscv_vadd_vv_u16m2 (src, src,vl);
+    __riscv_vse8_v_u8m1 (out,v,vl);
+    __riscv_vse16_v_u16m2 ((int16_t *)out,v2,vl);
+}
+
+void f11 (void *base,void *base2,void *out,size_t vl, int n, size_t shift)
+{
+    vuint16m2_t src = __riscv_vle16_v_u16m2 (base + 100, vl);
+    for (int i = 0; i < n; i++){
+      vbool8_t m = __riscv_vlm_v_b8 (base + i, vl);
+      vuint8m1_t v = __riscv_vnclipu_wx_u8m1_m(m,src,31,vl);
+      v = __riscv_vnclipu_wx_u8m1_tu(v,src,31,vl);
+      v = __riscv_vle8_v_u8m1_tu (v, base2, vl);
+      __riscv_vse8_v_u8m1 (out + 100*i,v,vl);
+    }
+}
+
+void f12 (int16_t *base,int8_t *out,size_t vl, int n, size_t shift)
+{
+    vuint8mf8_t v = __riscv_vle8_v_u8mf8 ((int8_t *)(base + 1000), vl);
+    for (int i = 0; i < n; i++){
+      vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base + 100*i, vl);
+      v = __riscv_vnclipu_wx_u8mf8_tu(v,src,31,vl);
+      v = __riscv_vnclipu_wx_u8mf8_tu(v,src,31,vl);
+      v = __riscv_vnclipu_wx_u8mf8_tu(v,src,31,vl);
+      v = __riscv_vnclipu_wx_u8mf8_tu(v,src,31,vl);
+      v = __riscv_vnclipu_wx_u8mf8_tu(v,src,31,vl);
+      v = __riscv_vnclipu_wx_u8mf8_tu(v,src,31,vl);
+      __riscv_vse8_v_u8mf8 (out + 100*i,v,vl);
+    }
+}
+
+void f13 (int16_t *base,int8_t *out,size_t vl, int n, size_t shift)
+{
+    vuint8m1_t v = __riscv_vle8_v_u8m1 ((int8_t *)(base + 1000), vl);
+    for (int i = 0; i < n; i++){
+      vuint16m2_t src = __riscv_vle16_v_u16m2 (base + 100*i, vl);
+      v = __riscv_vnclipu_wx_u8m1_tu(v,src,31,vl);
+      v = __riscv_vnclipu_wx_u8m1_tu(v,src,31,vl);
+      v = __riscv_vnclipu_wx_u8m1_tu(v,src,31,vl);
+      v = __riscv_vnclipu_wx_u8m1_tu(v,src,31,vl);
+      v = __riscv_vnclipu_wx_u8m1_tu(v,src,31,vl);
+      v = __riscv_vnclipu_wx_u8m1_tu(v,src,31,vl);
+      __riscv_vse8_v_u8m1 (out + 100*i,v,vl);
+    }
+}
+
+void f14 (int16_t *base,int8_t *out,size_t vl, int n, size_t shift)
+{
+    for (int i = 0; i < n; i++){
+      vuint8mf8_t v = __riscv_vle8_v_u8mf8 ((int8_t *)(base + 1000 * i), vl);
+      vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base + 100*i, vl);
+      v = __riscv_vnclipu_wx_u8mf8_tu(v,src,31,vl);
+      v = __riscv_vnclipu_wx_u8mf8_tu(v,src,31,vl);
+      v = __riscv_vnclipu_wx_u8mf8_tu(v,src,31,vl);
+      v = __riscv_vnclipu_wx_u8mf8_tu(v,src,31,vl);
+      v = __riscv_vnclipu_wx_u8mf8_tu(v,src,31,vl);
+      v = __riscv_vnclipu_wx_u8mf8_tu(v,src,31,vl);
+      __riscv_vse8_v_u8mf8 (out + 100*i,v,vl);
+    }
+}
+
+void f15 (int16_t *base,int8_t *out,size_t vl, int n, size_t shift)
+{
+    for (int i = 0; i < n; i++){
+      vuint8m1_t v = __riscv_vle8_v_u8m1 ((int8_t *)(base + 1000 * i), vl);
+      vuint16m2_t src = __riscv_vle16_v_u16m2 (base + 100*i, vl);
+      v = __riscv_vnclipu_wx_u8m1_tu(v,src,31,vl);
+      v = __riscv_vnclipu_wx_u8m1_tu(v,src,31,vl);
+      v = __riscv_vnclipu_wx_u8m1_tu(v,src,31,vl);
+      v = __riscv_vnclipu_wx_u8m1_tu(v,src,31,vl);
+      v = __riscv_vnclipu_wx_u8m1_tu(v,src,31,vl);
+      v = __riscv_vnclipu_wx_u8m1_tu(v,src,31,vl);
+      __riscv_vse8_v_u8m1 (out + 100*i,v,vl);
+    }
+}
+
+void f16 (int16_t *base,int8_t *out,size_t vl, int n, size_t shift)
+{
+    for (int i = 0; i < n; i++){
+      vuint8mf8_t v = __riscv_vle8_v_u8mf8 ((int8_t *)(base + 1000 * i), vl);
+      vuint16mf4_t src1 = __riscv_vle16_v_u16mf4 (base + 100*i, vl);
+      vuint16mf4_t src2 = __riscv_vle16_v_u16mf4 (base + 200*i, vl);
+      vuint16mf4_t src3 = __riscv_vle16_v_u16mf4 (base + 300*i, vl);
+      vuint16mf4_t src4 = __riscv_vle16_v_u16mf4 (base + 400*i, vl);
+      vuint16mf4_t src5 = __riscv_vle16_v_u16mf4 (base + 500*i, vl);
+      vuint16mf4_t src6 = __riscv_vle16_v_u16mf4 (base + 600*i, vl);
+      v = __riscv_vnclipu_wx_u8mf8_tu(v,src1,31,vl);
+      v = __riscv_vnclipu_wx_u8mf8_tu(v,src2,31,vl);
+      v = __riscv_vnclipu_wx_u8mf8_tu(v,src3,31,vl);
+      v = __riscv_vnclipu_wx_u8mf8_tu(v,src4,31,vl);
+      v = __riscv_vnclipu_wx_u8mf8_tu(v,src5,31,vl);
+      v = __riscv_vnclipu_wx_u8mf8_tu(v,src6,31,vl);
+      __riscv_vse8_v_u8mf8 (out + 100*i,v,vl);
+    }
+}
+
+void f17 (int16_t *base,int8_t *out,size_t vl, int n, size_t shift)
+{
+    for (int i = 0; i < n; i++){
+      vuint8m1_t v = __riscv_vle8_v_u8m1 ((int8_t *)(base + 1000 * i), vl);
+      vuint16m2_t src1 = __riscv_vle16_v_u16m2 (base + 100*i, vl);
+      vuint16m2_t src2 = __riscv_vle16_v_u16m2 (base + 200*i, vl);
+      vuint16m2_t src3 = __riscv_vle16_v_u16m2 (base + 300*i, vl);
+      vuint16m2_t src4 = __riscv_vle16_v_u16m2 (base + 400*i, vl);
+      vuint16m2_t src5 = __riscv_vle16_v_u16m2 (base + 500*i, vl);
+      vuint16m2_t src6 = __riscv_vle16_v_u16m2 (base + 600*i, vl);
+      v = __riscv_vnclipu_wx_u8m1_tu(v,src1,31,vl);
+      v = __riscv_vnclipu_wx_u8m1_tu(v,src2,31,vl);
+      v = __riscv_vnclipu_wx_u8m1_tu(v,src3,31,vl);
+      v = __riscv_vnclipu_wx_u8m1_tu(v,src4,31,vl);
+      v = __riscv_vnclipu_wx_u8m1_tu(v,src5,31,vl);
+      v = __riscv_vnclipu_wx_u8m1_tu(v,src6,31,vl);
+      __riscv_vse8_v_u8m1 (out + 100*i,v,vl);
+    }
+}
+
+void f18 (void *base,void *base2,void *out,size_t vl, int n, size_t shift)
+{
+    vuint32mf2_t src = __riscv_vle32_v_u32mf2 (base + 100, vl);
+    for (int i = 0; i < n; i++){
+      vbool64_t m = __riscv_vlm_v_b64 (base + i, vl);
+      vuint16mf4_t v = __riscv_vnclipu_wx_u16mf4_m(m,src,31,vl);
+      vuint16mf4_t v2 = __riscv_vle16_v_u16mf4_tu (v, base2 + i, vl);
+      vuint8mf8_t v3 = __riscv_vnclipu_wx_u8mf8_m(m,v2,31,vl);
+      __riscv_vse8_v_u8mf8 (out + 100*i,v3,vl);
+    }
+}
+
+void f19 (void *base,void *base2,void *out,size_t vl, int n, size_t shift)
+{
+    vuint32m4_t src = __riscv_vle32_v_u32m4 (base + 100, vl);
+    for (int i = 0; i < n; i++){
+      vbool8_t m = __riscv_vlm_v_b8 (base + i, vl);
+      vuint16m2_t v = __riscv_vnclipu_wx_u16m2_m(m,src,31,vl);
+      vuint16m2_t v2 = __riscv_vle16_v_u16m2_tu (v, base2 + i, vl);
+      vuint8m1_t v3 = __riscv_vnclipu_wx_u8m1_m(m,v2,31,vl);
+      vuint8m1_t v4 = __riscv_vnclipu_wx_u8m1_tumu(m,v3,v2,31,vl);
+      __riscv_vse8_v_u8m1 (out + 100*i,v3,vl);
+      __riscv_vse8_v_u8m1 (out + 222*i,v4,vl);
+    }
+}
+
+void f20 (int16_t *base,int8_t *out,size_t vl, size_t shift)
+{
+    vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
+    /* Only allow load v30,v31.  */
+    asm volatile("#" ::
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",  
+		   "v26", "v27", "v28", "v29");
+
+    vuint8m1_t v = __riscv_vnclipu_wx_u8m1(src,31,vl);
+    /* Only allow vncvt SRC == DEST v30.  */
+    asm volatile("#" ::                                                        
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", 
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",     
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",     
+		   "v26", "v27", "v28", "v29", "v31");
+
+    __riscv_vse8_v_u8m1 (out,v,vl);
+}
+
+void f21 (int16_t *base,int8_t *out,size_t vl, size_t shift)
+{
+    vuint16m1_t src = __riscv_vle16_v_u16m1 (base, vl);
+    /* Only allow load v31.  */
+    asm volatile("#" ::
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",  
+		   "v26", "v27", "v28", "v29", "v30");
+
+    vuint8mf2_t v = __riscv_vnclipu_wx_u8mf2(src,31,vl);
+    /* Only allow vncvt SRC == DEST v31.  */
+    asm volatile("#" ::
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",  
+		   "v26", "v27", "v28", "v29", "v30");
+
+    __riscv_vse8_v_u8mf2 (out,v,vl);
+}
+
+void f22 (int16_t *base,int8_t *out,size_t vl, size_t shift)
+{
+    vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
+    /* Only allow load v30,v31.  */
+    asm volatile("#" ::
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",  
+		   "v26", "v27", "v28", "v29");
+
+    vuint8m1_t v = __riscv_vnclipu_wx_u8m1(src,31,vl);
+    /* Only allow v29.  */
+    asm volatile("#" ::                                                        
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", 
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",     
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",     
+		   "v26", "v27", "v28", "v30", "v31");
+    v = __riscv_vadd_vv_u8m1 (v,v,vl);
+    /* Only allow v29.  */
+    asm volatile("#" ::                                                        
+		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", 
+		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",     
+		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",     
+		   "v26", "v27", "v28", "v30", "v31");
+
+    __riscv_vse8_v_u8m1 (out,v,vl);
+}
+
+/* { dg-final { scan-assembler-not {vmv} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-03-13 16:25 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-03-13 16:25 [gcc r13-6634] RISC-V: Fine tune RA constraint for narrow instructions Kito Cheng

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).