public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r15-582] RISC-V: Implement vectorizable early exit with vcond_mask_len
@ 2024-05-16 13:41 Pan Li
0 siblings, 0 replies; only message in thread
From: Pan Li @ 2024-05-16 13:41 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:6c1de786e53a11150feb16ba990d0d6c6fd910db
commit r15-582-g6c1de786e53a11150feb16ba990d0d6c6fd910db
Author: Pan Li <pan2.li@intel.com>
Date: Thu May 16 10:02:40 2024 +0800
RISC-V: Implement vectorizable early exit with vcond_mask_len
After we support the loop lens for the vectorizable, we would like to
implement the feature for the RISC-V target. Given below example:
unsigned vect_a[1923];
unsigned vect_b[1923];
void test (unsigned limit, int n)
{
for (int i = 0; i < n; i++)
{
vect_b[i] = limit + i;
if (vect_a[i] > limit)
{
ret = vect_b[i];
return ret;
}
vect_a[i] = limit;
}
}
Before this patch:
...
.L8:
sw a3,0(a5)
addiw a0,a0,1
addi a4,a4,4
addi a5,a5,4
beq a1,a0,.L2
.L4:
sw a0,0(a4)
lw a2,0(a5)
bleu a2,a3,.L8
ret
After this patch:
...
.L5:
vsetvli a5,a3,e8,mf4,ta,ma
vmv1r.v v4,v2
vsetvli t4,zero,e32,m1,ta,ma
vmv.v.x v1,a5
vadd.vv v2,v2,v1
vsetvli zero,a5,e32,m1,ta,ma
vadd.vv v5,v4,v3
slli a6,a5,2
vle32.v v1,0(t1)
vmsltu.vv v1,v3,v1
vcpop.m t4,v1
beq t4,zero,.L4
vmv.x.s a4,v4
.L3:
...
The below tests are passed for this patch:
1. The riscv fully regression tests.
gcc/ChangeLog:
* config/riscv/autovec-opt.md(*vcond_mask_len_popcount_<VB_VLS:mode><P:mode>):
New pattern of vcond_mask_len_popcount for vector bool mode.
* config/riscv/autovec.md (vcond_mask_len_<mode>): New pattern of
vcond_mask_len for vector bool mode.
(cbranch<mode>4): New pattern for vector bool mode.
* config/riscv/vector-iterators.md: Add new unspec UNSPEC_SELECT_MASK.
* config/riscv/vector.md (@pred_popcount<VB:mode><P:mode>): Add VLS mode
to popcount pattern.
(@pred_popcount<VB_VLS:mode><P:mode>): Ditto.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/autovec/early-break-1.c: New test.
* gcc.target/riscv/rvv/autovec/early-break-2.c: New test.
Signed-off-by: Pan Li <pan2.li@intel.com>
Diff:
---
gcc/config/riscv/autovec-opt.md | 33 ++++++++++++
gcc/config/riscv/autovec.md | 61 ++++++++++++++++++++++
gcc/config/riscv/vector-iterators.md | 1 +
gcc/config/riscv/vector.md | 18 +++----
.../gcc.target/riscv/rvv/autovec/early-break-1.c | 34 ++++++++++++
.../gcc.target/riscv/rvv/autovec/early-break-2.c | 37 +++++++++++++
6 files changed, 175 insertions(+), 9 deletions(-)
diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md
index 645dc53d8680..04f85d8e4553 100644
--- a/gcc/config/riscv/autovec-opt.md
+++ b/gcc/config/riscv/autovec-opt.md
@@ -1436,3 +1436,36 @@
DONE;
}
[(set_attr "type" "vmalu")])
+
+;; Optimization pattern for early break auto-vectorization
+;; vcond_mask_len (mask, ones, zeros, len, bias) + vlmax popcount
+;; -> non vlmax popcount (mask, len)
+(define_insn_and_split "*vcond_mask_len_popcount_<VB_VLS:mode><P:mode>"
+ [(set (match_operand:P 0 "register_operand")
+ (popcount:P
+ (unspec:VB_VLS [
+ (unspec:VB_VLS [
+ (match_operand:VB_VLS 1 "register_operand")
+ (match_operand:VB_VLS 2 "const_1_operand")
+ (match_operand:VB_VLS 3 "const_0_operand")
+ (match_operand 4 "autovec_length_operand")
+ (match_operand 5 "const_0_operand")] UNSPEC_SELECT_MASK)
+ (match_operand 6 "autovec_length_operand")
+ (const_int 1)
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)))]
+ "TARGET_VECTOR
+ && can_create_pseudo_p ()
+ && riscv_vector::get_vector_mode (Pmode, GET_MODE_NUNITS (<VB_VLS:MODE>mode)).exists ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+ {
+ riscv_vector::emit_nonvlmax_insn (
+ code_for_pred_popcount (<VB_VLS:MODE>mode, Pmode),
+ riscv_vector::CPOP_OP,
+ operands, operands[4]);
+ DONE;
+ }
+ [(set_attr "type" "vector")]
+)
diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index aa1ae0fe075b..1ee3c8052fb4 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -2612,3 +2612,64 @@
DONE;
}
)
+
+;; =========================================================================
+;; == Early break auto-vectorization patterns
+;; =========================================================================
+
+;; vcond_mask_len (mask, 1s, 0s, len, bias)
+;; => mask[i] = mask[i] && i < len ? 1 : 0
+(define_insn_and_split "vcond_mask_len_<mode>"
+ [(set (match_operand:VB 0 "register_operand")
+ (unspec: VB [
+ (match_operand:VB 1 "register_operand")
+ (match_operand:VB 2 "const_1_operand")
+ (match_operand:VB 3 "const_0_operand")
+ (match_operand 4 "autovec_length_operand")
+ (match_operand 5 "const_0_operand")] UNSPEC_SELECT_MASK))]
+ "TARGET_VECTOR
+ && can_create_pseudo_p ()
+ && riscv_vector::get_vector_mode (Pmode, GET_MODE_NUNITS (<MODE>mode)).exists ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+ {
+ machine_mode mode = riscv_vector::get_vector_mode (Pmode,
+ GET_MODE_NUNITS (<MODE>mode)).require ();
+ rtx reg = gen_reg_rtx (mode);
+ riscv_vector::expand_vec_series (reg, const0_rtx, const1_rtx);
+ rtx dup_rtx = gen_rtx_VEC_DUPLICATE (mode, operands[4]);
+ insn_code icode = code_for_pred_cmp_scalar (mode);
+ rtx cmp = gen_rtx_fmt_ee (LTU, <MODE>mode, reg, dup_rtx);
+ rtx ops[] = {operands[0], operands[1], operands[1], cmp, reg, operands[4]};
+ emit_vlmax_insn (icode, riscv_vector::COMPARE_OP_MU, ops);
+ DONE;
+ }
+ [(set_attr "type" "vector")])
+
+;; cbranch
+(define_expand "cbranch<mode>4"
+ [(set (pc)
+ (if_then_else
+ (match_operator 0 "equality_operator"
+ [(match_operand:VB_VLS 1 "register_operand")
+ (match_operand:VB_VLS 2 "reg_or_0_operand")])
+ (label_ref (match_operand 3 ""))
+ (pc)))]
+ "TARGET_VECTOR"
+ {
+ rtx pred;
+ if (operands[2] == CONST0_RTX (<MODE>mode))
+ pred = operands[1];
+ else
+ pred = expand_binop (<MODE>mode, xor_optab, operands[1],
+ operands[2], NULL_RTX, 0,
+ OPTAB_DIRECT);
+ rtx reg = gen_reg_rtx (Pmode);
+ rtx cpop_ops[] = {reg, pred};
+ emit_vlmax_insn (code_for_pred_popcount (<MODE>mode, Pmode),
+ riscv_vector::CPOP_OP, cpop_ops);
+ operands[1] = reg;
+ operands[2] = const0_rtx;
+ }
+)
diff --git a/gcc/config/riscv/vector-iterators.md b/gcc/config/riscv/vector-iterators.md
index a24e1bf078fb..76c27035a735 100644
--- a/gcc/config/riscv/vector-iterators.md
+++ b/gcc/config/riscv/vector-iterators.md
@@ -102,6 +102,7 @@
UNSPEC_WREDUC_SUMU
UNSPEC_WREDUC_SUM_ORDERED
UNSPEC_WREDUC_SUM_UNORDERED
+ UNSPEC_SELECT_MASK
])
(define_c_enum "unspecv" [
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 228d0f9a7663..95451dc762b0 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -6121,21 +6121,21 @@
(set_attr "vl_op_idx" "4")
(set (attr "avl_type_idx") (const_int 5))])
-(define_insn "@pred_popcount<VB:mode><P:mode>"
- [(set (match_operand:P 0 "register_operand" "=r")
+(define_insn "@pred_popcount<VB_VLS:mode><P:mode>"
+ [(set (match_operand:P 0 "register_operand" "=r")
(popcount:P
- (unspec:VB
- [(and:VB
- (match_operand:VB 1 "vector_mask_operand" "vmWc1")
- (match_operand:VB 2 "register_operand" " vr"))
- (match_operand 3 "vector_length_operand" " rK")
- (match_operand 4 "const_int_operand" " i")
+ (unspec:VB_VLS
+ [(and:VB_VLS
+ (match_operand:VB_VLS 1 "vector_mask_operand" "vmWc1")
+ (match_operand:VB_VLS 2 "register_operand" " vr"))
+ (match_operand 3 "vector_length_operand" " rK")
+ (match_operand 4 "const_int_operand" " i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)))]
"TARGET_VECTOR"
"vcpop.m\t%0,%2%p1"
[(set_attr "type" "vmpop")
- (set_attr "mode" "<VB:MODE>")])
+ (set_attr "mode" "<VB_VLS:MODE>")])
(define_insn "@pred_ffs<VB:mode><P:mode>"
[(set (match_operand:P 0 "register_operand" "=r")
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/early-break-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/early-break-1.c
new file mode 100644
index 000000000000..f70979e81f11
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/early-break-1.c
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fno-schedule-insns -fno-schedule-insns2 -fdump-tree-vect-details" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#define N 803
+
+unsigned vect_a[N];
+unsigned vect_b[N];
+
+/*
+** test:
+** ...
+** vmsltu\.vv\s+v[0-9]+\s*,v[0-9]+,\s*v[0-9]+
+** vcpop\.m\s+[atx][0-9]+\s*,v[0-9]+
+** ...
+*/
+unsigned test (unsigned x, int n)
+{
+ unsigned ret = 0;
+
+ for (int i = 0; i < n; i++)
+ {
+ vect_b[i] = x + i;
+
+ if (vect_a[i] > x)
+ break;
+
+ vect_a[i] = x;
+ }
+
+ return ret;
+}
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/early-break-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/early-break-2.c
new file mode 100644
index 000000000000..d405783d2c43
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/early-break-2.c
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fno-schedule-insns -fno-schedule-insns2 -fdump-tree-vect-details" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#define N 1728
+
+unsigned vect_a[N];
+unsigned vect_b[N];
+
+/*
+** test:
+** ...
+** vmsltu\.vv\s+v[0-9]+\s*,v[0-9]+,\s*v[0-9]+
+** vcpop\.m\s+[atx][0-9]+\s*,v[0-9]+
+** ...
+*/
+unsigned test (unsigned limit, int n)
+{
+ unsigned ret = 0;
+
+ for (int i = 0; i < n; i++)
+ {
+ vect_b[i] = limit + i;
+
+ if (vect_a[i] > limit)
+ {
+ ret = vect_b[i];
+ return ret;
+ }
+
+ vect_a[i] = limit;
+ }
+
+ return ret;
+}
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" } } */
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2024-05-16 13:41 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-05-16 13:41 [gcc r15-582] RISC-V: Implement vectorizable early exit with vcond_mask_len Pan Li
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).