From: juzhe.zhong@rivai.ai
To: gcc-patches@gcc.gnu.org
Cc: kito.cheng@gmail.com, palmer@dabbelt.com, juzhe.zhong@rivai.ai
Subject: [PATCH 07/21] Add register spilling support
Date: Tue, 31 May 2022 16:49:58 +0800 [thread overview]
Message-ID: <20220531085012.269719-8-juzhe.zhong@rivai.ai> (raw)
In-Reply-To: <20220531085012.269719-1-juzhe.zhong@rivai.ai>
From: zhongjuzhe <juzhe.zhong@rivai.ai>
gcc/ChangeLog:
* config/riscv/riscv-protos.h (rvv_expand_const_vector): New function.
(rvv_expand_const_mask): New function.
(rvv_const_vec_all_same_in_range_p): New function.
* config/riscv/riscv-vector.cc (classify_vtype_field): Move codes location.
(get_lmulx8): New function. Move codes location.
(force_reg_for_over_uimm): New function. Move codes location.
(gen_vlx2): New function. Move codes location.
(emit_int64_to_vector_32bit): Move codes location.
(rvv_expand_const_vector): New function.
(rvv_expand_const_mask): New function.
(rvv_const_vec_all_same_in_range_p): New function.
* config/riscv/riscv.cc (riscv_const_insns): Add const vector cost.
* config/riscv/vector-iterators.md: New iterators and attributes.
* config/riscv/vector.md (mov<mode>): New pattern.
(*mov<mode>): New pattern.
(*mov<mode>_reg): New pattern.
(@vmclr<mode>_m): New pattern.
(@vmset<mode>_m): New pattern.
---
gcc/config/riscv/riscv-protos.h | 3 +
gcc/config/riscv/riscv-vector.cc | 349 ++++++++++++++++-----------
gcc/config/riscv/riscv.cc | 67 ++++-
gcc/config/riscv/vector-iterators.md | 24 ++
gcc/config/riscv/vector.md | 201 +++++++++++++++
5 files changed, 502 insertions(+), 142 deletions(-)
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 9a7e120854a..618eb746eaa 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -128,6 +128,9 @@ extern int rvv_regsize (machine_mode);
extern rtx rvv_gen_policy (unsigned int rvv_policy = 0);
extern opt_machine_mode rvv_get_mask_mode (machine_mode);
extern machine_mode rvv_translate_attr_mode (rtx_insn *);
+extern bool rvv_expand_const_vector (rtx, rtx);
+extern bool rvv_expand_const_mask (rtx, rtx);
+extern bool rvv_const_vec_all_same_in_range_p (rtx, HOST_WIDE_INT, HOST_WIDE_INT);
extern void
emit_op5 (
unsigned int unspec,
diff --git a/gcc/config/riscv/riscv-vector.cc b/gcc/config/riscv/riscv-vector.cc
index 426490945dd..4b2fe2a8d11 100644
--- a/gcc/config/riscv/riscv-vector.cc
+++ b/gcc/config/riscv/riscv-vector.cc
@@ -71,7 +71,165 @@
#include "target-def.h"
#include <string.h>
-/* Helper functions for RVV */
+
+/* Internal helper functions for RVV */
+
+/* Return the vtype field for a specific machine mode. */
+static unsigned int
+classify_vtype_field (machine_mode mode)
+{
+ unsigned int vlmul = rvv_classify_vlmul_field (mode);
+ unsigned int vsew = rvv_classify_vsew_field (mode);
+ unsigned int vtype = (vsew << 3) | (vlmul & 0x7) | 0x40;
+ return vtype;
+}
+
+/* lmul = real_lmul * 8
+ guarantee integer
+ e.g.
+ 1 => 1/8
+ 2 => 1/4
+ 4 => 1/2
+ 8 => 1
+ 16 => 2
+ 32 => 4
+ 64 => 8
+ */
+static unsigned int
+get_lmulx8 (machine_mode mode)
+{
+ unsigned int vlmul = rvv_classify_vlmul_field (mode);
+ switch (vlmul)
+ {
+ case VLMUL_FIELD_000:
+ return 8;
+ case VLMUL_FIELD_001:
+ return 16;
+ case VLMUL_FIELD_010:
+ return 32;
+ case VLMUL_FIELD_011:
+ return 64;
+ case VLMUL_FIELD_101:
+ return 1;
+ case VLMUL_FIELD_110:
+ return 2;
+ case VLMUL_FIELD_111:
+ return 4;
+ default:
+ gcc_unreachable ();
+ }
+}
+
+/* Helper functions for handling sew=64 on RV32 system. */
+static rtx
+force_reg_for_over_uimm (rtx vl)
+{
+ if (CONST_SCALAR_INT_P (vl) && INTVAL (vl) >= 32)
+ {
+ return force_reg (Pmode, vl);
+ }
+
+ return vl;
+}
+
+/* Helper functions for handling sew=64 on RV32 system. */
+static rtx
+gen_vlx2 (rtx avl, machine_mode Vmode, machine_mode VSImode)
+{
+ if (rtx_equal_p (avl, gen_rtx_REG (Pmode, X0_REGNUM)))
+ {
+ return avl;
+ }
+ rtx i32vl = NULL_RTX;
+ if (CONST_SCALAR_INT_P (avl))
+ {
+ unsigned int vlen_max;
+ unsigned int vlen_min;
+ if (riscv_vector_chunks.is_constant ())
+ {
+ vlen_max = riscv_vector_chunks.to_constant () * 64;
+ vlen_min = vlen_max;
+ }
+ else
+ {
+ /* TODO: vlen_max will be supported as 65536 in the future. */
+ vlen_max = RVV_4096;
+ vlen_min = RVV_128;
+ }
+ unsigned int max_vlmax = (vlen_max / GET_MODE_UNIT_BITSIZE (Vmode) * get_lmulx8 (Vmode)) / 8;
+ unsigned int min_vlmax = (vlen_min / GET_MODE_UNIT_BITSIZE (Vmode) * get_lmulx8 (Vmode)) / 8;
+
+ unsigned HOST_WIDE_INT avl_int = INTVAL (avl);
+ if (avl_int <= min_vlmax)
+ {
+ i32vl = gen_int_mode (2 * avl_int, SImode);
+ }
+ else if (avl_int >= 2 * max_vlmax)
+ {
+ // Just set i32vl to VLMAX in this situation
+ i32vl = gen_reg_rtx (Pmode);
+ unsigned int vtype = classify_vtype_field (VSImode);
+ emit_insn (gen_vsetvl (Pmode, i32vl, gen_rtx_REG (Pmode, X0_REGNUM), GEN_INT (vtype)));
+ }
+ else
+ {
+ // For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl
+ // is related to the hardware implementation.
+ // So let the following code handle
+ }
+ }
+ if (!i32vl)
+ {
+ // Using vsetvli instruction to get actually used length which related to
+ // the hardware implementation
+ rtx i64vl = gen_reg_rtx (Pmode);
+ unsigned int vtype = classify_vtype_field (Vmode);
+ emit_insn (gen_vsetvl (Pmode, i64vl, force_reg (Pmode, avl), GEN_INT (vtype)));
+ // scale 2 for 32-bit length
+ i32vl = gen_reg_rtx (Pmode);
+ emit_insn (gen_rtx_SET (i32vl, gen_rtx_ASHIFT (Pmode, i64vl, const1_rtx)));
+ }
+
+ return force_reg_for_over_uimm (i32vl);
+}
+
+/* Helper functions for handling sew=64 on RV32 system. */
+static void
+emit_int64_to_vector_32bit (machine_mode Vmode, machine_mode VSImode,
+ machine_mode VMSImode, rtx vd, rtx s, rtx vl,
+ rtx tail)
+{
+ if (CONST_SCALAR_INT_P (s))
+ {
+ s = force_reg (DImode, s);
+ }
+
+ rtx hi = gen_highpart (SImode, s);
+ rtx lo = gen_lowpart (SImode, s);
+
+ rtx zero = gen_rtx_REG (SImode, X0_REGNUM);
+
+ /* make a "0101..." mask vector */
+ rtx vm1 = gen_reg_rtx (VNx4SImode);
+ emit_insn (gen_vmv_v_x_internal (VNx4SImode, vm1, const0_rtx,
+ force_reg (SImode, GEN_INT (0x55555555)),
+ zero, rvv_gen_policy ()));
+ rtx vm2 = gen_reg_rtx (VMSImode);
+ emit_insn (gen_rtx_SET (vm2, gen_lowpart (VMSImode, vm1)));
+
+ rtx vlx2 = gen_vlx2 (vl, Vmode, VSImode);
+ rtx v2 = gen_reg_rtx (VSImode);
+ emit_insn (gen_vmv_v_x_internal (VSImode, v2, const0_rtx, hi, vlx2,
+ rvv_gen_policy ()));
+
+ rtx vd_si = gen_reg_rtx (VSImode);
+ emit_insn (gen_vmerge_vxm_internal (VSImode, vd_si, vm2, const0_rtx, v2, lo,
+ vlx2, tail));
+
+ emit_insn (gen_rtx_SET (vd, gen_lowpart (Vmode, vd_si)));
+}
+
+/* Globaer RVV implementation. */
/* Return true if it is a RVV mask mode. */
bool
@@ -370,159 +528,68 @@ rvv_translate_attr_mode (rtx_insn *insn)
return VOIDmode;
}
-/* Return the vtype field for a specific machine mode. */
-static unsigned int
-classify_vtype_field (machine_mode mode)
-{
- unsigned int vlmul = rvv_classify_vlmul_field (mode);
- unsigned int vsew = rvv_classify_vsew_field (mode);
- unsigned int vtype = (vsew << 3) | (vlmul & 0x7) | 0x40;
- return vtype;
-}
-
-/* lmul = real_lmul * 8
- guarantee integer
- e.g.
- 1 => 1/8
- 2 => 1/4
- 4 => 1/2
- 8 => 1
- 16 => 2
- 32 => 4
- 64 => 8
- */
-static unsigned int
-get_lmulx8 (machine_mode mode)
-{
- unsigned int vlmul = rvv_classify_vlmul_field (mode);
- switch (vlmul)
- {
- case VLMUL_FIELD_000:
- return 8;
- case VLMUL_FIELD_001:
- return 16;
- case VLMUL_FIELD_010:
- return 32;
- case VLMUL_FIELD_011:
- return 64;
- case VLMUL_FIELD_101:
- return 1;
- case VLMUL_FIELD_110:
- return 2;
- case VLMUL_FIELD_111:
- return 4;
- default:
- gcc_unreachable ();
- }
-}
-
-/* Helper functions for handling sew=64 on RV32 system. */
-static rtx
-force_reg_for_over_uimm (rtx vl)
+/* Expand const vector using RVV instructions. */
+bool
+rvv_expand_const_vector (rtx target, rtx src)
{
- if (CONST_SCALAR_INT_P (vl) && INTVAL (vl) >= 32)
+ rtx x;
+ machine_mode mode = GET_MODE (target);
+ machine_mode inner_mode = GET_MODE_INNER (mode);
+
+ /* Case 1: Handle const duplicate vector. */
+ if (const_vec_duplicate_p (src, &x))
{
- return force_reg (Pmode, vl);
+ if (FLOAT_MODE_P (mode))
+ x = force_reg (inner_mode, x);
+ emit_insn (gen_vec_duplicate (mode, target, x));
+ return true;
}
-
- return vl;
+ /* TODO: In case of intrinsic support, we only need to deal with const duplicate vector.
+ More cases will be supported for auto-vectorization. */
+ return false;
}
-/* Helper functions for handling sew=64 on RV32 system. */
-static rtx
-gen_vlx2 (rtx avl, machine_mode Vmode, machine_mode VSImode)
+/* Expand const mask using RVV instructions. */
+bool
+rvv_expand_const_mask (rtx target, rtx src)
{
- if (rtx_equal_p (avl, gen_rtx_REG (Pmode, X0_REGNUM)))
+ rtx ele;
+ rtx zero = gen_rtx_REG (Pmode, X0_REGNUM);
+ machine_mode mode = GET_MODE (target);
+ if (const_vec_duplicate_p (src, &ele))
{
- return avl;
- }
- rtx i32vl = NULL_RTX;
- if (CONST_SCALAR_INT_P (avl))
- {
- unsigned int vlen_max;
- unsigned int vlen_min;
- if (riscv_vector_chunks.is_constant ())
- {
- vlen_max = riscv_vector_chunks.to_constant () * 64;
- vlen_min = vlen_max;
- }
- else
- {
- /* TODO: vlen_max will be supported as 65536 in the future. */
- vlen_max = RVV_4096;
- vlen_min = RVV_128;
- }
- unsigned int max_vlmax = (vlen_max / GET_MODE_UNIT_BITSIZE (Vmode) * get_lmulx8 (Vmode)) / 8;
- unsigned int min_vlmax = (vlen_min / GET_MODE_UNIT_BITSIZE (Vmode) * get_lmulx8 (Vmode)) / 8;
-
- unsigned HOST_WIDE_INT avl_int = INTVAL (avl);
- if (avl_int <= min_vlmax)
- {
- i32vl = gen_int_mode (2 * avl_int, SImode);
- }
- else if (avl_int >= 2 * max_vlmax)
- {
- // Just set i32vl to VLMAX in this situation
- i32vl = gen_reg_rtx (Pmode);
- unsigned int vtype = classify_vtype_field (VSImode);
- emit_insn (gen_vsetvl (Pmode, i32vl, gen_rtx_REG (Pmode, X0_REGNUM), GEN_INT (vtype)));
- }
- else
+ gcc_assert (CONST_SCALAR_INT_P (ele));
+ switch (INTVAL (ele))
{
- // For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl
- // is related to the hardware implementation.
- // So let the following code handle
+ case 0:
+ emit_insn (gen_vmclr_m (mode, target, zero,
+ rvv_gen_policy ()));
+ break;
+ case 1:
+ emit_insn (gen_vmset_m (mode, target, zero,
+ rvv_gen_policy ()));
+ break;
+ default:
+ gcc_unreachable ();
}
+ return true;
}
- if (!i32vl)
- {
- // Using vsetvli instruction to get actually used length which related to
- // the hardware implementation
- rtx i64vl = gen_reg_rtx (Pmode);
- unsigned int vtype = classify_vtype_field (Vmode);
- emit_insn (gen_vsetvl (Pmode, i64vl, force_reg (Pmode, avl), GEN_INT (vtype)));
- // scale 2 for 32-bit length
- i32vl = gen_reg_rtx (Pmode);
- emit_insn (gen_rtx_SET (i32vl, gen_rtx_ASHIFT (Pmode, i64vl, const1_rtx)));
- }
-
- return force_reg_for_over_uimm (i32vl);
+
+ /* TODO: In case of intrinsic support, we only need to deal with const all zeros or const all ones mask.
+ More cases will be supported for auto-vectorization. */
+ return false;
}
-/* Helper functions for handling sew=64 on RV32 system. */
-static void
-emit_int64_to_vector_32bit (machine_mode Vmode, machine_mode VSImode,
- machine_mode VMSImode, rtx vd, rtx s, rtx vl,
- rtx tail)
-{
- if (CONST_SCALAR_INT_P (s))
- {
- s = force_reg (DImode, s);
- }
-
- rtx hi = gen_highpart (SImode, s);
- rtx lo = gen_lowpart (SImode, s);
-
- rtx zero = gen_rtx_REG (SImode, X0_REGNUM);
-
- /* make a "0101..." mask vector */
- rtx vm1 = gen_reg_rtx (VNx4SImode);
- emit_insn (gen_vmv_v_x_internal (VNx4SImode, vm1, const0_rtx,
- force_reg (SImode, GEN_INT (0x55555555)),
- zero, rvv_gen_policy ()));
- rtx vm2 = gen_reg_rtx (VMSImode);
- emit_insn (gen_rtx_SET (vm2, gen_lowpart (VMSImode, vm1)));
+/* Return true if X is a const_vector with all duplicate elements, which is in
+ the range between MINVAL and MAXVAL. */
- rtx vlx2 = gen_vlx2 (vl, Vmode, VSImode);
- rtx v2 = gen_reg_rtx (VSImode);
- emit_insn (gen_vmv_v_x_internal (VSImode, v2, const0_rtx, hi, vlx2,
- rvv_gen_policy ()));
-
- rtx vd_si = gen_reg_rtx (VSImode);
- emit_insn (gen_vmerge_vxm_internal (VSImode, vd_si, vm2, const0_rtx, v2, lo,
- vlx2, tail));
-
- emit_insn (gen_rtx_SET (vd, gen_lowpart (Vmode, vd_si)));
+bool
+rvv_const_vec_all_same_in_range_p (rtx x, HOST_WIDE_INT minval,
+ HOST_WIDE_INT maxval)
+{
+ rtx elt;
+ return (const_vec_duplicate_p (x, &elt) && CONST_INT_P (elt) &&
+ IN_RANGE (INTVAL (elt), minval, maxval));
}
/* Helper functions for handling sew=64 on RV32 system. */
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 8c78e726a19..fc27dc957dc 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -1097,9 +1097,74 @@ riscv_const_insns (rtx x)
}
case CONST_DOUBLE:
- case CONST_VECTOR:
/* We can use x0 to load floating-point zero. */
return x == CONST0_RTX (GET_MODE (x)) ? 1 : 0;
+ case CONST_VECTOR:
+ {
+ machine_mode mode = GET_MODE (x);
+ /* For the mode which is not RVV mode, we use
+ default configuration. */
+ if (!rvv_mode_p (mode))
+ return x == CONST0_RTX (GET_MODE (x)) ? 1 : 0;
+ unsigned int factor = 0;
+ if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_BOOL)
+ {
+ /* In RVV, we can use vmclr.m/vmset.m to generate
+ all 0s/1s bool vector. Otherwise we can only use
+ load instructions. */
+ if (x == CONST0_RTX (GET_MODE (x))
+ || x == CONSTM1_RTX (GET_MODE (x)))
+ return 1;
+ else
+ return 0;
+ }
+ else if (FLOAT_MODE_P (GET_MODE (x)))
+ {
+ /* In RVV, Floating-point should be first load
+ into floating-point register
+ then duplicate. */
+ factor = 3;
+ }
+ else
+ {
+ rtx elt;
+ if (!const_vec_duplicate_p (x, &elt))
+ {
+ rtx base, step;
+ if (const_vec_series_p (x, &base, &step))
+ {
+ /* For const vector: {0, 1, 2, ......},
+ we can use a single instruction vid.v
+ to generate the vector. */
+ if (INTVAL (step) == 1
+ && INTVAL (base) == 0)
+ factor = 1;
+ /* We need a vid + li + vmul.vx instruction. */
+ else if (INTVAL (base) == 0)
+ factor = 2 + riscv_integer_cost (INTVAL (step));
+ /* We need a vid + (li + vadd.vx)/vadd.vi instruction. */
+ else if (INTVAL (step) == 1)
+ factor = IN_RANGE (INTVAL (base), -16, 15) ? 2
+ : 2 + riscv_integer_cost (INTVAL (base));
+ /* We need a vid + (li + vadd.vx)/vadd.vi + li + vmul.vx instruction. */
+ else
+ factor = IN_RANGE (INTVAL (base), -16, 15) ? 4
+ : 4 + riscv_integer_cost (INTVAL (base));
+ }
+ else
+ factor = 0;
+ }
+ else
+ {
+ /* Use vmv.v.i. */
+ if (rvv_const_vec_all_same_in_range_p (x, -15, 16))
+ factor = 1;
+ /* Use li + vmv.v.x. */
+ else
+ factor = 1 + riscv_integer_cost (INTVAL (elt));
+ }
+ }
+ }
case CONST:
/* See if we can refer to X directly. */
diff --git a/gcc/config/riscv/vector-iterators.md b/gcc/config/riscv/vector-iterators.md
index 9832d2adaa3..e01305ef3fc 100644
--- a/gcc/config/riscv/vector-iterators.md
+++ b/gcc/config/riscv/vector-iterators.md
@@ -140,6 +140,30 @@
(VNx2HI "vnx2di") (VNx4HI "vnx2di") (VNx8HI "vnx2di") (VNx16HI "vnx2di") (VNx32HI "vnx2di") (VNx64HI "vnx2di")
(VNx2SI "vnx2di") (VNx4SI "vnx2di") (VNx8SI "vnx2di") (VNx16SI "vnx2di") (VNx32SI "vnx2di")
(VNx2DI "vnx2di") (VNx4DI "vnx4di") (VNx8DI "vnx8di") (VNx16DI "vnx16di")])
+
+;; Map a vector mode to SEW
+(define_mode_attr sew [
+ (VNx2QI "8") (VNx4QI "8") (VNx8QI "8") (VNx16QI "8")
+ (VNx32QI "8") (VNx64QI "8") (VNx128QI "8") (VNx2HI "16")
+ (VNx4HI "16") (VNx8HI "16") (VNx16HI "16") (VNx32HI "16")
+ (VNx64HI "16") (VNx2SI "32") (VNx4SI "32") (VNx8SI "32")
+ (VNx16SI "32") (VNx32SI "32") (VNx2DI "64") (VNx4DI "64")
+ (VNx8DI "64") (VNx16DI "64")
+ (VNx2SF "32") (VNx4SF "32") (VNx8SF "32") (VNx16SF "32")
+ (VNx32SF "32") (VNx2DF "64") (VNx4DF "64") (VNx8DF "64")
+ (VNx16DF "64")])
+
+;; Map a vector mode to its LMUL.
+(define_mode_attr lmul [
+ (VNx2QI "1") (VNx4QI "1") (VNx8QI "1") (VNx16QI "1")
+ (VNx32QI "2") (VNx64QI "4") (VNx128QI "8") (VNx2HI "1")
+ (VNx4HI "1") (VNx8HI "1") (VNx16HI "2") (VNx32HI "4")
+ (VNx64HI "8") (VNx2SI "1") (VNx4SI "1") (VNx8SI "2")
+ (VNx16SI "4") (VNx32SI "8") (VNx2DI "1") (VNx4DI "2")
+ (VNx8DI "4") (VNx16DI "8")
+ (VNx2SF "1") (VNx4SF "1") (VNx8SF "2") (VNx16SF "4")
+ (VNx32SF "8") (VNx2DF "1") (VNx4DF "2") (VNx8DF "4")
+ (VNx16DF "8")])
(define_int_iterator VMVOP [
UNSPEC_VMV
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 4a9c6769812..1731d969372 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -62,6 +62,179 @@
rvv_gen_policy ()));
DONE;
})
+
+;; =========================================================================
+;; == Vector spilling
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- Moves Operations
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - Full vector load/store/move
+;; - Partial vector load/store/move
+;; - All vector misalign move
+;; -------------------------------------------------------------------------
+
+;; Move Pattern for all vector modes.
+(define_expand "mov<mode>"
+ [(set (match_operand:VFULL 0 "reg_or_mem_operand")
+ (match_operand:VFULL 1 "vector_move_operand"))]
+ "TARGET_VECTOR"
+{
+ /* Need to force register if mem <- !reg. */
+ if (MEM_P (operands[0]) && !REG_P (operands[1]))
+ operands[1] = force_reg (<MODE>mode, operands[1]);
+
+ if (GET_CODE (operands[1]) == CONST_VECTOR &&
+ rvv_expand_const_vector (operands[0], operands[1]))
+ DONE;
+})
+
+;; Full vector load/store/move.
+(define_insn "*mov<mode>"
+ [(set (match_operand:VFULL 0 "reg_or_mem_operand" "=vr,m,vr")
+ (match_operand:VFULL 1 "reg_or_mem_operand" "m,vr,vr"))]
+ "TARGET_VECTOR"
+ "@
+ vl<lmul>re<sew>.v\t%0,%1
+ vs<lmul>r.v\t%1,%0
+ vmv<lmul>r.v\t%0,%1"
+ [(set_attr "type" "vload,vstore,vcopy")
+ (set_attr "mode" "<MODE>")])
+
+(define_expand "mov<mode>"
+ [(parallel [(set (match_operand:VPARTIAL 0 "reg_or_mem_operand")
+ (match_operand:VPARTIAL 1 "vector_move_operand"))
+ (clobber (scratch:SI))
+ (clobber (reg:SI VL_REGNUM))
+ (clobber (reg:SI VTYPE_REGNUM))])]
+ "TARGET_VECTOR"
+{
+ /* Need to force register if mem <- !reg. */
+ if (MEM_P (operands[0]) && !REG_P (operands[1]))
+ operands[1] = force_reg (<MODE>mode, operands[1]);
+
+ if (GET_CODE (operands[1]) == CONST_VECTOR &&
+ rvv_expand_const_vector (operands[0], operands[1]))
+ DONE;
+})
+
+;; Partial vector load/store/move.
+(define_insn_and_split "*mov<mode>"
+ [(set (match_operand:VPARTIAL 0 "reg_or_mem_operand" "=vr,m,vr")
+ (match_operand:VPARTIAL 1 "reg_or_mem_operand" "m,vr,vr"))
+ (clobber (match_scratch:SI 2 "=&r,&r,X"))
+ (clobber (reg:SI VL_REGNUM))
+ (clobber (reg:SI VTYPE_REGNUM))]
+ "TARGET_VECTOR"
+ "@
+ vle<sew>.v\t%0,%1
+ vse<sew>.v\t%1,%0
+ #"
+ "&& (!reload_completed || (REG_P (operands[0])
+ && REG_P (operands[1])))"
+ [(const_int 0)]
+ {
+ /* Need to force register if mem <- !reg. */
+ if (MEM_P (operands[0]) && !REG_P (operands[1]))
+ operands[1] = force_reg (<MODE>mode, operands[1]);
+
+ if (MEM_P (operands[0]))
+ {
+ emit_insn (gen_vse (<MODE>mode, const0_rtx, XEXP (operands[0], 0),
+ operands[1], gen_rtx_REG (Pmode, X0_REGNUM),
+ rvv_gen_policy ()));
+ DONE;
+ }
+ if (MEM_P (operands[1]))
+ {
+ emit_insn (gen_vle (<MODE>mode, operands[0], const0_rtx, const0_rtx,
+ XEXP (operands[1], 0), gen_rtx_REG (Pmode, X0_REGNUM),
+ rvv_gen_policy ()));
+ DONE;
+ }
+
+ emit_insn (gen_rtx_SET (operands[0], operands[1]));
+ DONE;
+ }
+ [(set_attr "type" "vle,vse,vcopy")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*mov<mode>_reg"
+ [(set (match_operand:VPARTIAL 0 "register_operand" "=vr")
+ (match_operand:VPARTIAL 1 "register_operand" "vr"))]
+ "TARGET_VECTOR"
+ "vmv1r.v\t%0,%1"
+ [(set_attr "type" "vcopy")
+ (set_attr "mode" "<MODE>")])
+
+;; Move pattern for mask modes.
+(define_expand "mov<mode>"
+ [(parallel [(set (match_operand:VB 0 "reg_or_mem_operand")
+ (match_operand:VB 1 "vector_move_operand"))
+ (clobber (scratch:SI))
+ (clobber (reg:SI VL_REGNUM))
+ (clobber (reg:SI VTYPE_REGNUM))])]
+ "TARGET_VECTOR"
+{
+ /* Need to force register if mem <- !reg. */
+ if (MEM_P (operands[0]) && !REG_P (operands[1]))
+ operands[1] = force_reg (<MODE>mode, operands[1]);
+
+ if (GET_CODE (operands[1]) == CONST_VECTOR
+ && rvv_expand_const_mask (operands[0], operands[1]))
+ DONE;
+})
+
+;; mask load/store/move.
+(define_insn_and_split "*mov<mode>"
+ [(set (match_operand:VB 0 "reg_or_mem_operand" "=vr,m,vr")
+ (match_operand:VB 1 "reg_or_mem_operand" "m,vr,vr"))
+ (clobber (match_scratch:SI 2 "=&r,&r,X"))
+ (clobber (reg:SI VL_REGNUM))
+ (clobber (reg:SI VTYPE_REGNUM))]
+ "TARGET_VECTOR"
+ "@
+ vlm.v\t%0,%1
+ vsm.v\t%1,%0
+ #"
+ "&& (!reload_completed || (REG_P (operands[0])
+ && REG_P (operands[1])))"
+ [(const_int 0)]
+ {
+ /* Need to force register if mem <- !reg. */
+ if (MEM_P (operands[0]) && !REG_P (operands[1]))
+ operands[1] = force_reg (<MODE>mode, operands[1]);
+
+ if (MEM_P (operands[0]))
+ {
+ emit_insn (gen_vsm (<MODE>mode, XEXP (operands[0], 0), operands[1],
+ gen_rtx_REG (Pmode, X0_REGNUM),
+ rvv_gen_policy ()));
+ DONE;
+ }
+ if (MEM_P (operands[1]))
+ {
+ emit_insn (gen_vlm (<MODE>mode, operands[0], XEXP (operands[1], 0),
+ gen_rtx_REG (Pmode, X0_REGNUM),
+ rvv_gen_policy ()));
+ DONE;
+ }
+
+ emit_insn (gen_rtx_SET (operands[0], operands[1]));
+ DONE;
+ }
+ [(set_attr "type" "vle,vse,vcopy")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*mov<mode>_reg"
+ [(set (match_operand:VB 0 "register_operand" "=vr")
+ (match_operand:VB 1 "register_operand" "vr"))]
+ "TARGET_VECTOR"
+ "vmv1r.v\t%0,%1"
+ [(set_attr "type" "vcopy")
+ (set_attr "mode" "<MODE>")])
;; ===============================================================================
;; == Intrinsics
@@ -370,4 +543,32 @@
vmerge.vxm\t%0,%3,%4,%1
vmerge.vim\t%0,%3,%4,%1"
[(set_attr "type" "vmerge")
+ (set_attr "mode" "<MODE>")])
+
+;; vmclr.m vd -> vmxor.mm vd,vd,vd # Clear mask register
+(define_insn "@vmclr<mode>_m"
+ [(set (match_operand:VB 0 "register_operand" "=vr")
+ (unspec:VB
+ [(vec_duplicate:VB (const_int 0))
+ (match_operand 1 "p_reg_or_const_csr_operand" "rK")
+ (match_operand 2 "const_int_operand")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_RVV))]
+ "TARGET_VECTOR"
+ "vmclr.m\t%0"
+ [(set_attr "type" "vmask")
+ (set_attr "mode" "<MODE>")])
+
+;; vmset.m vd -> vmxnor.mm vd,vd,vd # Set mask register
+(define_insn "@vmset<mode>_m"
+ [(set (match_operand:VB 0 "register_operand" "=vr")
+ (unspec:VB
+ [(vec_duplicate:VB (const_int 1))
+ (match_operand 1 "p_reg_or_const_csr_operand" "rK")
+ (match_operand 2 "const_int_operand")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_RVV))]
+ "TARGET_VECTOR"
+ "vmset.m\t%0"
+ [(set_attr "type" "vmask")
(set_attr "mode" "<MODE>")])
\ No newline at end of file
--
2.36.1
next prev parent reply other threads:[~2022-05-31 8:50 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-05-31 8:49 [PATCH 00/21] *** Add RVV (RISC-V 'V' Extension) support *** juzhe.zhong
2022-05-31 8:49 ` [PATCH 01/21] Add RVV modes and support scalable vector juzhe.zhong
2022-05-31 8:49 ` [PATCH 02/21] Add RVV intrinsic framework juzhe.zhong
2022-05-31 8:49 ` [PATCH 03/21] Add RVV datatypes juzhe.zhong
2022-05-31 8:49 ` [PATCH 04/21] Add RVV intrinsic enable #pragma riscv intrinsic "vector" and introduce RVV header "riscv_vector.h" juzhe.zhong
2022-05-31 8:49 ` [PATCH 05/21] Add RVV configuration intrinsic juzhe.zhong
2022-05-31 8:49 ` [PATCH 06/21] Add insert-vsetvl pass juzhe.zhong
2022-05-31 8:49 ` juzhe.zhong [this message]
2022-05-31 8:49 ` [PATCH 08/21] Add poly manipulation juzhe.zhong
2022-05-31 8:50 ` [PATCH 09/21] Add misc function intrinsic support juzhe.zhong
2022-05-31 8:50 ` [PATCH 11/21] Add calling function support juzhe.zhong
2022-05-31 8:50 ` [PATCH 12/21] Add set get intrinsic support juzhe.zhong
2022-05-31 8:50 ` [PATCH 13/21] Adjust scalable frame and full testcases juzhe.zhong
2022-05-31 8:50 ` [PATCH 15/21] Add integer intrinsics juzhe.zhong
2022-05-31 8:50 ` [PATCH 18/21] Add rest intrinsic support juzhe.zhong
2022-05-31 16:51 ` [PATCH 00/21] *** Add RVV (RISC-V 'V' Extension) support *** Palmer Dabbelt
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220531085012.269719-8-juzhe.zhong@rivai.ai \
--to=juzhe.zhong@rivai.ai \
--cc=gcc-patches@gcc.gnu.org \
--cc=kito.cheng@gmail.com \
--cc=palmer@dabbelt.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).