* [PATCH v2 1/3] Vect: Support loop len in vectorizable early exit
@ 2024-05-16 4:05 pan2.li
2024-05-16 4:05 ` [PATCH v2 2/3] RISC-V: Implement vectorizable early exit with vcond_mask_len pan2.li
` (2 more replies)
0 siblings, 3 replies; 10+ messages in thread
From: pan2.li @ 2024-05-16 4:05 UTC (permalink / raw)
To: gcc-patches
Cc: juzhe.zhong, kito.cheng, tamar.christina, richard.guenther,
Richard.Sandiford, Pan Li
From: Pan Li <pan2.li@intel.com>
This patch adds early break auto-vectorization support for target which
use length on partial vectorization. Consider this following example:
unsigned vect_a[802];
unsigned vect_b[802];
void test (unsigned x, int n)
{
for (int i = 0; i < n; i++)
{
vect_b[i] = x + i;
if (vect_a[i] > x)
break;
vect_a[i] = x;
}
}
We use VCOND_MASK_LEN to simulate the generate (mask && i < len + bias).
And then the IR of RVV looks like below:
...
_87 = .SELECT_VL (ivtmp_85, POLY_INT_CST [32, 32]);
_55 = (int) _87;
...
mask_patt_6.13_69 = vect_cst__62 < vect__3.12_67;
vec_len_mask_72 = .VCOND_MASK_LEN (mask_patt_6.13_69, { -1, ... }, \
{0, ... }, _87, 0);
if (vec_len_mask_72 != { 0, ... })
goto <bb 6>; [5.50%]
else
goto <bb 7>; [94.50%]
The below tests are passed for this patch:
1. The riscv fully regression tests.
2. The x86 bootstrap tests.
3. The x86 fully regression tests.
gcc/ChangeLog:
* tree-vect-stmts.cc (vectorizable_early_exit): Add loop len
handling for one or multiple stmt.
gcc/ChangeLog:
* tree-vect-loop.cc (vect_gen_loop_len_mask): New func to gen
the loop len mask.
* tree-vect-stmts.cc (vectorizable_early_exit): Invoke the
vect_gen_loop_len_mask for 1 or more stmt(s).
* tree-vectorizer.h (vect_gen_loop_len_mask): New func decl
for vect_gen_loop_len_mask.
Signed-off-by: Pan Li <pan2.li@intel.com>
---
gcc/tree-vect-loop.cc | 27 +++++++++++++++++++++++++++
gcc/tree-vect-stmts.cc | 17 +++++++++++++++--
gcc/tree-vectorizer.h | 4 ++++
3 files changed, 46 insertions(+), 2 deletions(-)
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 361aec06488..83c0544b6aa 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -11416,6 +11416,33 @@ vect_get_loop_len (loop_vec_info loop_vinfo, gimple_stmt_iterator *gsi,
return loop_len;
}
+/* Generate the tree for the loop len mask and return it. Given the lens,
+ nvectors, vectype, index and factor to gen the len mask as below.
+
+ tree len_mask = VCOND_MASK_LEN (compare_mask, ones, zero, len, bias)
+*/
+tree
+vect_gen_loop_len_mask (loop_vec_info loop_vinfo, gimple_stmt_iterator *gsi,
+ gimple_stmt_iterator *cond_gsi, vec_loop_lens *lens,
+ unsigned int nvectors, tree vectype, tree stmt,
+ unsigned int index, unsigned int factor)
+{
+ tree all_one_mask = build_all_ones_cst (vectype);
+ tree all_zero_mask = build_zero_cst (vectype);
+ tree len = vect_get_loop_len (loop_vinfo, gsi, lens, nvectors, vectype, index,
+ factor);
+ tree bias = build_int_cst (intQI_type_node,
+ LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo));
+ tree len_mask = make_temp_ssa_name (TREE_TYPE (stmt), NULL, "vec_len_mask");
+ gcall *call = gimple_build_call_internal (IFN_VCOND_MASK_LEN, 5, stmt,
+ all_one_mask, all_zero_mask, len,
+ bias);
+ gimple_call_set_lhs (call, len_mask);
+ gsi_insert_before (cond_gsi, call, GSI_SAME_STMT);
+
+ return len_mask;
+}
+
/* Scale profiling counters by estimation for LOOP which is vectorized
by factor VF.
If FLAT is true, the loop we started with had unrealistically flat
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index b8a71605f1b..672959501bb 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -12895,7 +12895,9 @@ vectorizable_early_exit (vec_info *vinfo, stmt_vec_info stmt_info,
ncopies = vect_get_num_copies (loop_vinfo, vectype);
vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
+ vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
bool masked_loop_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
+ bool len_loop_p = LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo);
/* Now build the new conditional. Pattern gimple_conds get dropped during
codegen so we must replace the original insn. */
@@ -12959,12 +12961,11 @@ vectorizable_early_exit (vec_info *vinfo, stmt_vec_info stmt_info,
{
if (direct_internal_fn_supported_p (IFN_VCOND_MASK_LEN, vectype,
OPTIMIZE_FOR_SPEED))
- return false;
+ vect_record_loop_len (loop_vinfo, lens, ncopies, vectype, 1);
else
vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, NULL);
}
-
return true;
}
@@ -13017,6 +13018,15 @@ vectorizable_early_exit (vec_info *vinfo, stmt_vec_info stmt_info,
stmts[i], &cond_gsi);
workset.quick_push (stmt_mask);
}
+ else if (len_loop_p)
+ for (unsigned i = 0; i < stmts.length (); i++)
+ {
+ tree len_mask = vect_gen_loop_len_mask (loop_vinfo, gsi, &cond_gsi,
+ lens, ncopies, vectype,
+ stmts[i], i, 1);
+
+ workset.quick_push (len_mask);
+ }
else
workset.splice (stmts);
@@ -13041,6 +13051,9 @@ vectorizable_early_exit (vec_info *vinfo, stmt_vec_info stmt_info,
new_temp = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
new_temp, &cond_gsi);
}
+ else if (len_loop_p)
+ new_temp = vect_gen_loop_len_mask (loop_vinfo, gsi, &cond_gsi, lens,
+ ncopies, vectype, new_temp, 0, 1);
}
gcc_assert (new_temp);
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index db44d730b70..93bc30ef660 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -2408,6 +2408,10 @@ extern void vect_record_loop_len (loop_vec_info, vec_loop_lens *, unsigned int,
extern tree vect_get_loop_len (loop_vec_info, gimple_stmt_iterator *,
vec_loop_lens *, unsigned int, tree,
unsigned int, unsigned int);
+extern tree vect_gen_loop_len_mask (loop_vec_info, gimple_stmt_iterator *,
+ gimple_stmt_iterator *, vec_loop_lens *,
+ unsigned int, tree, tree, unsigned int,
+ unsigned int);
extern gimple_seq vect_gen_len (tree, tree, tree, tree);
extern stmt_vec_info info_for_reduction (vec_info *, stmt_vec_info);
extern bool reduction_fn_for_scalar_code (code_helper, internal_fn *);
--
2.34.1
^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH v2 2/3] RISC-V: Implement vectorizable early exit with vcond_mask_len
2024-05-16 4:05 [PATCH v2 1/3] Vect: Support loop len in vectorizable early exit pan2.li
@ 2024-05-16 4:05 ` pan2.li
2024-05-16 12:19 ` juzhe.zhong
2024-05-16 4:05 ` [PATCH v2 3/3] RISC-V: Enable vectorizable early exit testsuite pan2.li
2024-05-16 6:49 ` [PATCH v2 1/3] Vect: Support loop len in vectorizable early exit Tamar Christina
2 siblings, 1 reply; 10+ messages in thread
From: pan2.li @ 2024-05-16 4:05 UTC (permalink / raw)
To: gcc-patches
Cc: juzhe.zhong, kito.cheng, tamar.christina, richard.guenther,
Richard.Sandiford, Pan Li
From: Pan Li <pan2.li@intel.com>
After we support the loop lens for the vectorizable, we would like to
implement the feature for the RISC-V target. Given below example:
unsigned vect_a[1923];
unsigned vect_b[1923];
void test (unsigned limit, int n)
{
for (int i = 0; i < n; i++)
{
vect_b[i] = limit + i;
if (vect_a[i] > limit)
{
ret = vect_b[i];
return ret;
}
vect_a[i] = limit;
}
}
Before this patch:
...
.L8:
sw a3,0(a5)
addiw a0,a0,1
addi a4,a4,4
addi a5,a5,4
beq a1,a0,.L2
.L4:
sw a0,0(a4)
lw a2,0(a5)
bleu a2,a3,.L8
ret
After this patch:
...
.L5:
vsetvli a5,a3,e8,mf4,ta,ma
vmv1r.v v4,v2
vsetvli t4,zero,e32,m1,ta,ma
vmv.v.x v1,a5
vadd.vv v2,v2,v1
vsetvli zero,a5,e32,m1,ta,ma
vadd.vv v5,v4,v3
slli a6,a5,2
vle32.v v1,0(t1)
vmsltu.vv v1,v3,v1
vcpop.m t4,v1
beq t4,zero,.L4
vmv.x.s a4,v4
.L3:
...
The below tests are passed for this patch:
1. The riscv fully regression tests.
gcc/ChangeLog:
* config/riscv/autovec-opt.md
(*vcond_mask_len_popcount_<VB_VLS:mode><P:mode>):
New pattern of vcond_mask_len_popcount for vector bool mode.
* config/riscv/autovec.md (vcond_mask_len_<mode>): New pattern
of vcond_mask_len for vector bool mode.
(cbranch<mode>4): New pattern for vector bool mode.
* config/riscv/vector-iterators.md: Add new unspec
UNSPEC_SELECT_MASK.
* config/riscv/vector.md (@pred_popcount<VB:mode><P:mode>): Add
VLS mode to popcount pattern.
(@pred_popcount<VB_VLS:mode><P:mode>): Ditto.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/autovec/early-break-1.c: New test.
* gcc.target/riscv/rvv/autovec/early-break-2.c: New test.
Signed-off-by: Pan Li <pan2.li@intel.com>
---
gcc/config/riscv/autovec-opt.md | 33 ++++++++++
gcc/config/riscv/autovec.md | 61 +++++++++++++++++++
gcc/config/riscv/vector-iterators.md | 1 +
gcc/config/riscv/vector.md | 18 +++---
.../riscv/rvv/autovec/early-break-1.c | 34 +++++++++++
.../riscv/rvv/autovec/early-break-2.c | 37 +++++++++++
6 files changed, 175 insertions(+), 9 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/early-break-1.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/early-break-2.c
diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md
index 645dc53d868..04f85d8e455 100644
--- a/gcc/config/riscv/autovec-opt.md
+++ b/gcc/config/riscv/autovec-opt.md
@@ -1436,3 +1436,36 @@ (define_insn_and_split "*n<optab><mode>"
DONE;
}
[(set_attr "type" "vmalu")])
+
+;; Optimization pattern for early break auto-vectorization
+;; vcond_mask_len (mask, ones, zeros, len, bias) + vlmax popcount
+;; -> non vlmax popcount (mask, len)
+(define_insn_and_split "*vcond_mask_len_popcount_<VB_VLS:mode><P:mode>"
+ [(set (match_operand:P 0 "register_operand")
+ (popcount:P
+ (unspec:VB_VLS [
+ (unspec:VB_VLS [
+ (match_operand:VB_VLS 1 "register_operand")
+ (match_operand:VB_VLS 2 "const_1_operand")
+ (match_operand:VB_VLS 3 "const_0_operand")
+ (match_operand 4 "autovec_length_operand")
+ (match_operand 5 "const_0_operand")] UNSPEC_SELECT_MASK)
+ (match_operand 6 "autovec_length_operand")
+ (const_int 1)
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)))]
+ "TARGET_VECTOR
+ && can_create_pseudo_p ()
+ && riscv_vector::get_vector_mode (Pmode, GET_MODE_NUNITS (<VB_VLS:MODE>mode)).exists ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+ {
+ riscv_vector::emit_nonvlmax_insn (
+ code_for_pred_popcount (<VB_VLS:MODE>mode, Pmode),
+ riscv_vector::CPOP_OP,
+ operands, operands[4]);
+ DONE;
+ }
+ [(set_attr "type" "vector")]
+)
diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index aa1ae0fe075..1ee3c8052fb 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -2612,3 +2612,64 @@ (define_expand "rawmemchr<ANYI:mode>"
DONE;
}
)
+
+;; =========================================================================
+;; == Early break auto-vectorization patterns
+;; =========================================================================
+
+;; vcond_mask_len (mask, 1s, 0s, len, bias)
+;; => mask[i] = mask[i] && i < len ? 1 : 0
+(define_insn_and_split "vcond_mask_len_<mode>"
+ [(set (match_operand:VB 0 "register_operand")
+ (unspec: VB [
+ (match_operand:VB 1 "register_operand")
+ (match_operand:VB 2 "const_1_operand")
+ (match_operand:VB 3 "const_0_operand")
+ (match_operand 4 "autovec_length_operand")
+ (match_operand 5 "const_0_operand")] UNSPEC_SELECT_MASK))]
+ "TARGET_VECTOR
+ && can_create_pseudo_p ()
+ && riscv_vector::get_vector_mode (Pmode, GET_MODE_NUNITS (<MODE>mode)).exists ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+ {
+ machine_mode mode = riscv_vector::get_vector_mode (Pmode,
+ GET_MODE_NUNITS (<MODE>mode)).require ();
+ rtx reg = gen_reg_rtx (mode);
+ riscv_vector::expand_vec_series (reg, const0_rtx, const1_rtx);
+ rtx dup_rtx = gen_rtx_VEC_DUPLICATE (mode, operands[4]);
+ insn_code icode = code_for_pred_cmp_scalar (mode);
+ rtx cmp = gen_rtx_fmt_ee (LTU, <MODE>mode, reg, dup_rtx);
+ rtx ops[] = {operands[0], operands[1], operands[1], cmp, reg, operands[4]};
+ emit_vlmax_insn (icode, riscv_vector::COMPARE_OP_MU, ops);
+ DONE;
+ }
+ [(set_attr "type" "vector")])
+
+;; cbranch
+(define_expand "cbranch<mode>4"
+ [(set (pc)
+ (if_then_else
+ (match_operator 0 "equality_operator"
+ [(match_operand:VB_VLS 1 "register_operand")
+ (match_operand:VB_VLS 2 "reg_or_0_operand")])
+ (label_ref (match_operand 3 ""))
+ (pc)))]
+ "TARGET_VECTOR"
+ {
+ rtx pred;
+ if (operands[2] == CONST0_RTX (<MODE>mode))
+ pred = operands[1];
+ else
+ pred = expand_binop (<MODE>mode, xor_optab, operands[1],
+ operands[2], NULL_RTX, 0,
+ OPTAB_DIRECT);
+ rtx reg = gen_reg_rtx (Pmode);
+ rtx cpop_ops[] = {reg, pred};
+ emit_vlmax_insn (code_for_pred_popcount (<MODE>mode, Pmode),
+ riscv_vector::CPOP_OP, cpop_ops);
+ operands[1] = reg;
+ operands[2] = const0_rtx;
+ }
+)
diff --git a/gcc/config/riscv/vector-iterators.md b/gcc/config/riscv/vector-iterators.md
index a24e1bf078f..76c27035a73 100644
--- a/gcc/config/riscv/vector-iterators.md
+++ b/gcc/config/riscv/vector-iterators.md
@@ -102,6 +102,7 @@ (define_c_enum "unspec" [
UNSPEC_WREDUC_SUMU
UNSPEC_WREDUC_SUM_ORDERED
UNSPEC_WREDUC_SUM_UNORDERED
+ UNSPEC_SELECT_MASK
])
(define_c_enum "unspecv" [
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 228d0f9a766..95451dc762b 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -6121,21 +6121,21 @@ (define_insn "@pred_not<mode>"
(set_attr "vl_op_idx" "4")
(set (attr "avl_type_idx") (const_int 5))])
-(define_insn "@pred_popcount<VB:mode><P:mode>"
- [(set (match_operand:P 0 "register_operand" "=r")
+(define_insn "@pred_popcount<VB_VLS:mode><P:mode>"
+ [(set (match_operand:P 0 "register_operand" "=r")
(popcount:P
- (unspec:VB
- [(and:VB
- (match_operand:VB 1 "vector_mask_operand" "vmWc1")
- (match_operand:VB 2 "register_operand" " vr"))
- (match_operand 3 "vector_length_operand" " rK")
- (match_operand 4 "const_int_operand" " i")
+ (unspec:VB_VLS
+ [(and:VB_VLS
+ (match_operand:VB_VLS 1 "vector_mask_operand" "vmWc1")
+ (match_operand:VB_VLS 2 "register_operand" " vr"))
+ (match_operand 3 "vector_length_operand" " rK")
+ (match_operand 4 "const_int_operand" " i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)))]
"TARGET_VECTOR"
"vcpop.m\t%0,%2%p1"
[(set_attr "type" "vmpop")
- (set_attr "mode" "<VB:MODE>")])
+ (set_attr "mode" "<VB_VLS:MODE>")])
(define_insn "@pred_ffs<VB:mode><P:mode>"
[(set (match_operand:P 0 "register_operand" "=r")
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/early-break-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/early-break-1.c
new file mode 100644
index 00000000000..f70979e81f1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/early-break-1.c
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fno-schedule-insns -fno-schedule-insns2 -fdump-tree-vect-details" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#define N 803
+
+unsigned vect_a[N];
+unsigned vect_b[N];
+
+/*
+** test:
+** ...
+** vmsltu\.vv\s+v[0-9]+\s*,v[0-9]+,\s*v[0-9]+
+** vcpop\.m\s+[atx][0-9]+\s*,v[0-9]+
+** ...
+*/
+unsigned test (unsigned x, int n)
+{
+ unsigned ret = 0;
+
+ for (int i = 0; i < n; i++)
+ {
+ vect_b[i] = x + i;
+
+ if (vect_a[i] > x)
+ break;
+
+ vect_a[i] = x;
+ }
+
+ return ret;
+}
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/early-break-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/early-break-2.c
new file mode 100644
index 00000000000..d405783d2c4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/early-break-2.c
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fno-schedule-insns -fno-schedule-insns2 -fdump-tree-vect-details" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#define N 1728
+
+unsigned vect_a[N];
+unsigned vect_b[N];
+
+/*
+** test:
+** ...
+** vmsltu\.vv\s+v[0-9]+\s*,v[0-9]+,\s*v[0-9]+
+** vcpop\.m\s+[atx][0-9]+\s*,v[0-9]+
+** ...
+*/
+unsigned test (unsigned limit, int n)
+{
+ unsigned ret = 0;
+
+ for (int i = 0; i < n; i++)
+ {
+ vect_b[i] = limit + i;
+
+ if (vect_a[i] > limit)
+ {
+ ret = vect_b[i];
+ return ret;
+ }
+
+ vect_a[i] = limit;
+ }
+
+ return ret;
+}
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" } } */
--
2.34.1
^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH v2 3/3] RISC-V: Enable vectorizable early exit testsuite
2024-05-16 4:05 [PATCH v2 1/3] Vect: Support loop len in vectorizable early exit pan2.li
2024-05-16 4:05 ` [PATCH v2 2/3] RISC-V: Implement vectorizable early exit with vcond_mask_len pan2.li
@ 2024-05-16 4:05 ` pan2.li
2024-05-16 12:19 ` juzhe.zhong
2024-05-16 6:49 ` [PATCH v2 1/3] Vect: Support loop len in vectorizable early exit Tamar Christina
2 siblings, 1 reply; 10+ messages in thread
From: pan2.li @ 2024-05-16 4:05 UTC (permalink / raw)
To: gcc-patches
Cc: juzhe.zhong, kito.cheng, tamar.christina, richard.guenther,
Richard.Sandiford, Pan Li
From: Pan Li <pan2.li@intel.com>
After we supported vectorizable early exit in RISC-V, we would like to
enable the gcc vect test for vectorizable early test.
The vect-early-break_124-pr114403.c failed to vectorize for now.
Because that the __builtin_memcpy with 8 bytes failed to folded into
int64 assignment during ccp1. We will improve that first and mark
this as xfail for RISC-V.
The below tests are passed for this patch:
1. The riscv fully regression tests.
gcc/testsuite/ChangeLog:
* gcc.dg/vect/slp-mask-store-1.c: Add pragma novector as it will
have 2 times LOOP VECTORIZED in RISC-V.
* gcc.dg/vect/vect-early-break_124-pr114403.c: Xfail for the
riscv backend.
* lib/target-supports.exp: Add RISC-V backend.
Signed-off-by: Pan Li <pan2.li@intel.com>
---
gcc/testsuite/gcc.dg/vect/slp-mask-store-1.c | 2 ++
gcc/testsuite/gcc.dg/vect/vect-early-break_124-pr114403.c | 2 +-
gcc/testsuite/lib/target-supports.exp | 2 ++
3 files changed, 5 insertions(+), 1 deletion(-)
diff --git a/gcc/testsuite/gcc.dg/vect/slp-mask-store-1.c b/gcc/testsuite/gcc.dg/vect/slp-mask-store-1.c
index fdd9032da98..2f80bf89e5e 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-mask-store-1.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-mask-store-1.c
@@ -28,6 +28,8 @@ main ()
if (__builtin_memcmp (x, res, sizeof (x)) != 0)
abort ();
+
+#pragma GCC novector
for (int i = 0; i < 32; ++i)
if (flag[i] != 0 && flag[i] != 1)
abort ();
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_124-pr114403.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_124-pr114403.c
index 51abf245ccb..101ae1e0eaa 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-early-break_124-pr114403.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_124-pr114403.c
@@ -2,7 +2,7 @@
/* { dg-require-effective-target vect_early_break_hw } */
/* { dg-require-effective-target vect_long_long } */
-/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { xfail riscv*-*-* } } } */
#include "tree-vect.h"
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 6f5d477b128..ec9baa4f32a 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -4099,6 +4099,7 @@ proc check_effective_target_vect_early_break { } {
|| [check_effective_target_arm_v8_neon_ok]
|| [check_effective_target_sse4]
|| [istarget amdgcn-*-*]
+ || [check_effective_target_riscv_v]
}}]
}
@@ -4114,6 +4115,7 @@ proc check_effective_target_vect_early_break_hw { } {
|| [check_effective_target_arm_v8_neon_hw]
|| [check_sse4_hw_available]
|| [istarget amdgcn-*-*]
+ || [check_effective_target_riscv_v_ok]
}}]
}
--
2.34.1
^ permalink raw reply [flat|nested] 10+ messages in thread
* RE: [PATCH v2 1/3] Vect: Support loop len in vectorizable early exit
2024-05-16 4:05 [PATCH v2 1/3] Vect: Support loop len in vectorizable early exit pan2.li
2024-05-16 4:05 ` [PATCH v2 2/3] RISC-V: Implement vectorizable early exit with vcond_mask_len pan2.li
2024-05-16 4:05 ` [PATCH v2 3/3] RISC-V: Enable vectorizable early exit testsuite pan2.li
@ 2024-05-16 6:49 ` Tamar Christina
2024-05-16 12:13 ` Richard Biener
2 siblings, 1 reply; 10+ messages in thread
From: Tamar Christina @ 2024-05-16 6:49 UTC (permalink / raw)
To: pan2.li, gcc-patches
Cc: juzhe.zhong, kito.cheng, richard.guenther, Richard Sandiford
> -----Original Message-----
> From: pan2.li@intel.com <pan2.li@intel.com>
> Sent: Thursday, May 16, 2024 5:06 AM
> To: gcc-patches@gcc.gnu.org
> Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; Tamar Christina
> <Tamar.Christina@arm.com>; richard.guenther@gmail.com; Richard Sandiford
> <Richard.Sandiford@arm.com>; Pan Li <pan2.li@intel.com>
> Subject: [PATCH v2 1/3] Vect: Support loop len in vectorizable early exit
>
> From: Pan Li <pan2.li@intel.com>
>
> This patch adds early break auto-vectorization support for target which
> use length on partial vectorization. Consider this following example:
>
> unsigned vect_a[802];
> unsigned vect_b[802];
>
> void test (unsigned x, int n)
> {
> for (int i = 0; i < n; i++)
> {
> vect_b[i] = x + i;
>
> if (vect_a[i] > x)
> break;
>
> vect_a[i] = x;
> }
> }
>
> We use VCOND_MASK_LEN to simulate the generate (mask && i < len + bias).
> And then the IR of RVV looks like below:
>
> ...
> _87 = .SELECT_VL (ivtmp_85, POLY_INT_CST [32, 32]);
> _55 = (int) _87;
> ...
> mask_patt_6.13_69 = vect_cst__62 < vect__3.12_67;
> vec_len_mask_72 = .VCOND_MASK_LEN (mask_patt_6.13_69, { -1, ... }, \
> {0, ... }, _87, 0);
> if (vec_len_mask_72 != { 0, ... })
> goto <bb 6>; [5.50%]
> else
> goto <bb 7>; [94.50%]
>
> The below tests are passed for this patch:
> 1. The riscv fully regression tests.
> 2. The x86 bootstrap tests.
> 3. The x86 fully regression tests.
>
> gcc/ChangeLog:
>
> * tree-vect-stmts.cc (vectorizable_early_exit): Add loop len
> handling for one or multiple stmt.
>
> gcc/ChangeLog:
>
> * tree-vect-loop.cc (vect_gen_loop_len_mask): New func to gen
> the loop len mask.
> * tree-vect-stmts.cc (vectorizable_early_exit): Invoke the
> vect_gen_loop_len_mask for 1 or more stmt(s).
> * tree-vectorizer.h (vect_gen_loop_len_mask): New func decl
> for vect_gen_loop_len_mask.
>
Thanks, this version looks good to me!
You'll need Richi's review still.
Cheers,
Tamar
> Signed-off-by: Pan Li <pan2.li@intel.com>
> ---
> gcc/tree-vect-loop.cc | 27 +++++++++++++++++++++++++++
> gcc/tree-vect-stmts.cc | 17 +++++++++++++++--
> gcc/tree-vectorizer.h | 4 ++++
> 3 files changed, 46 insertions(+), 2 deletions(-)
>
> diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> index 361aec06488..83c0544b6aa 100644
> --- a/gcc/tree-vect-loop.cc
> +++ b/gcc/tree-vect-loop.cc
> @@ -11416,6 +11416,33 @@ vect_get_loop_len (loop_vec_info loop_vinfo,
> gimple_stmt_iterator *gsi,
> return loop_len;
> }
>
> +/* Generate the tree for the loop len mask and return it. Given the lens,
> + nvectors, vectype, index and factor to gen the len mask as below.
> +
> + tree len_mask = VCOND_MASK_LEN (compare_mask, ones, zero, len, bias)
> +*/
> +tree
> +vect_gen_loop_len_mask (loop_vec_info loop_vinfo, gimple_stmt_iterator *gsi,
> + gimple_stmt_iterator *cond_gsi, vec_loop_lens *lens,
> + unsigned int nvectors, tree vectype, tree stmt,
> + unsigned int index, unsigned int factor)
> +{
> + tree all_one_mask = build_all_ones_cst (vectype);
> + tree all_zero_mask = build_zero_cst (vectype);
> + tree len = vect_get_loop_len (loop_vinfo, gsi, lens, nvectors, vectype, index,
> + factor);
> + tree bias = build_int_cst (intQI_type_node,
> + LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS
> (loop_vinfo));
> + tree len_mask = make_temp_ssa_name (TREE_TYPE (stmt), NULL,
> "vec_len_mask");
> + gcall *call = gimple_build_call_internal (IFN_VCOND_MASK_LEN, 5, stmt,
> + all_one_mask, all_zero_mask, len,
> + bias);
> + gimple_call_set_lhs (call, len_mask);
> + gsi_insert_before (cond_gsi, call, GSI_SAME_STMT);
> +
> + return len_mask;
> +}
> +
> /* Scale profiling counters by estimation for LOOP which is vectorized
> by factor VF.
> If FLAT is true, the loop we started with had unrealistically flat
> diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
> index b8a71605f1b..672959501bb 100644
> --- a/gcc/tree-vect-stmts.cc
> +++ b/gcc/tree-vect-stmts.cc
> @@ -12895,7 +12895,9 @@ vectorizable_early_exit (vec_info *vinfo,
> stmt_vec_info stmt_info,
> ncopies = vect_get_num_copies (loop_vinfo, vectype);
>
> vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
> + vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
> bool masked_loop_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
> + bool len_loop_p = LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo);
>
> /* Now build the new conditional. Pattern gimple_conds get dropped during
> codegen so we must replace the original insn. */
> @@ -12959,12 +12961,11 @@ vectorizable_early_exit (vec_info *vinfo,
> stmt_vec_info stmt_info,
> {
> if (direct_internal_fn_supported_p (IFN_VCOND_MASK_LEN, vectype,
> OPTIMIZE_FOR_SPEED))
> - return false;
> + vect_record_loop_len (loop_vinfo, lens, ncopies, vectype, 1);
> else
> vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, NULL);
> }
>
> -
> return true;
> }
>
> @@ -13017,6 +13018,15 @@ vectorizable_early_exit (vec_info *vinfo,
> stmt_vec_info stmt_info,
> stmts[i], &cond_gsi);
> workset.quick_push (stmt_mask);
> }
> + else if (len_loop_p)
> + for (unsigned i = 0; i < stmts.length (); i++)
> + {
> + tree len_mask = vect_gen_loop_len_mask (loop_vinfo, gsi, &cond_gsi,
> + lens, ncopies, vectype,
> + stmts[i], i, 1);
> +
> + workset.quick_push (len_mask);
> + }
> else
> workset.splice (stmts);
>
> @@ -13041,6 +13051,9 @@ vectorizable_early_exit (vec_info *vinfo,
> stmt_vec_info stmt_info,
> new_temp = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
> new_temp, &cond_gsi);
> }
> + else if (len_loop_p)
> + new_temp = vect_gen_loop_len_mask (loop_vinfo, gsi, &cond_gsi, lens,
> + ncopies, vectype, new_temp, 0, 1);
> }
>
> gcc_assert (new_temp);
> diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> index db44d730b70..93bc30ef660 100644
> --- a/gcc/tree-vectorizer.h
> +++ b/gcc/tree-vectorizer.h
> @@ -2408,6 +2408,10 @@ extern void vect_record_loop_len (loop_vec_info,
> vec_loop_lens *, unsigned int,
> extern tree vect_get_loop_len (loop_vec_info, gimple_stmt_iterator *,
> vec_loop_lens *, unsigned int, tree,
> unsigned int, unsigned int);
> +extern tree vect_gen_loop_len_mask (loop_vec_info, gimple_stmt_iterator *,
> + gimple_stmt_iterator *, vec_loop_lens *,
> + unsigned int, tree, tree, unsigned int,
> + unsigned int);
> extern gimple_seq vect_gen_len (tree, tree, tree, tree);
> extern stmt_vec_info info_for_reduction (vec_info *, stmt_vec_info);
> extern bool reduction_fn_for_scalar_code (code_helper, internal_fn *);
> --
> 2.34.1
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH v2 1/3] Vect: Support loop len in vectorizable early exit
2024-05-16 6:49 ` [PATCH v2 1/3] Vect: Support loop len in vectorizable early exit Tamar Christina
@ 2024-05-16 12:13 ` Richard Biener
2024-05-16 12:27 ` Li, Pan2
0 siblings, 1 reply; 10+ messages in thread
From: Richard Biener @ 2024-05-16 12:13 UTC (permalink / raw)
To: Tamar Christina
Cc: pan2.li, gcc-patches, juzhe.zhong, kito.cheng, Richard Sandiford
On Thu, May 16, 2024 at 8:50 AM Tamar Christina <Tamar.Christina@arm.com> wrote:
>
> > -----Original Message-----
> > From: pan2.li@intel.com <pan2.li@intel.com>
> > Sent: Thursday, May 16, 2024 5:06 AM
> > To: gcc-patches@gcc.gnu.org
> > Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; Tamar Christina
> > <Tamar.Christina@arm.com>; richard.guenther@gmail.com; Richard Sandiford
> > <Richard.Sandiford@arm.com>; Pan Li <pan2.li@intel.com>
> > Subject: [PATCH v2 1/3] Vect: Support loop len in vectorizable early exit
> >
> > From: Pan Li <pan2.li@intel.com>
> >
> > This patch adds early break auto-vectorization support for target which
> > use length on partial vectorization. Consider this following example:
> >
> > unsigned vect_a[802];
> > unsigned vect_b[802];
> >
> > void test (unsigned x, int n)
> > {
> > for (int i = 0; i < n; i++)
> > {
> > vect_b[i] = x + i;
> >
> > if (vect_a[i] > x)
> > break;
> >
> > vect_a[i] = x;
> > }
> > }
> >
> > We use VCOND_MASK_LEN to simulate the generate (mask && i < len + bias).
> > And then the IR of RVV looks like below:
> >
> > ...
> > _87 = .SELECT_VL (ivtmp_85, POLY_INT_CST [32, 32]);
> > _55 = (int) _87;
> > ...
> > mask_patt_6.13_69 = vect_cst__62 < vect__3.12_67;
> > vec_len_mask_72 = .VCOND_MASK_LEN (mask_patt_6.13_69, { -1, ... }, \
> > {0, ... }, _87, 0);
> > if (vec_len_mask_72 != { 0, ... })
> > goto <bb 6>; [5.50%]
> > else
> > goto <bb 7>; [94.50%]
> >
> > The below tests are passed for this patch:
> > 1. The riscv fully regression tests.
> > 2. The x86 bootstrap tests.
> > 3. The x86 fully regression tests.
> >
> > gcc/ChangeLog:
> >
> > * tree-vect-stmts.cc (vectorizable_early_exit): Add loop len
> > handling for one or multiple stmt.
> >
> > gcc/ChangeLog:
> >
> > * tree-vect-loop.cc (vect_gen_loop_len_mask): New func to gen
> > the loop len mask.
> > * tree-vect-stmts.cc (vectorizable_early_exit): Invoke the
> > vect_gen_loop_len_mask for 1 or more stmt(s).
> > * tree-vectorizer.h (vect_gen_loop_len_mask): New func decl
> > for vect_gen_loop_len_mask.
> >
>
> Thanks, this version looks good to me!
>
> You'll need Richi's review still.
OK.
Thanks,
Richard.
> Cheers,
> Tamar
>
> > Signed-off-by: Pan Li <pan2.li@intel.com>
> > ---
> > gcc/tree-vect-loop.cc | 27 +++++++++++++++++++++++++++
> > gcc/tree-vect-stmts.cc | 17 +++++++++++++++--
> > gcc/tree-vectorizer.h | 4 ++++
> > 3 files changed, 46 insertions(+), 2 deletions(-)
> >
> > diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> > index 361aec06488..83c0544b6aa 100644
> > --- a/gcc/tree-vect-loop.cc
> > +++ b/gcc/tree-vect-loop.cc
> > @@ -11416,6 +11416,33 @@ vect_get_loop_len (loop_vec_info loop_vinfo,
> > gimple_stmt_iterator *gsi,
> > return loop_len;
> > }
> >
> > +/* Generate the tree for the loop len mask and return it. Given the lens,
> > + nvectors, vectype, index and factor to gen the len mask as below.
> > +
> > + tree len_mask = VCOND_MASK_LEN (compare_mask, ones, zero, len, bias)
> > +*/
> > +tree
> > +vect_gen_loop_len_mask (loop_vec_info loop_vinfo, gimple_stmt_iterator *gsi,
> > + gimple_stmt_iterator *cond_gsi, vec_loop_lens *lens,
> > + unsigned int nvectors, tree vectype, tree stmt,
> > + unsigned int index, unsigned int factor)
> > +{
> > + tree all_one_mask = build_all_ones_cst (vectype);
> > + tree all_zero_mask = build_zero_cst (vectype);
> > + tree len = vect_get_loop_len (loop_vinfo, gsi, lens, nvectors, vectype, index,
> > + factor);
> > + tree bias = build_int_cst (intQI_type_node,
> > + LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS
> > (loop_vinfo));
> > + tree len_mask = make_temp_ssa_name (TREE_TYPE (stmt), NULL,
> > "vec_len_mask");
> > + gcall *call = gimple_build_call_internal (IFN_VCOND_MASK_LEN, 5, stmt,
> > + all_one_mask, all_zero_mask, len,
> > + bias);
> > + gimple_call_set_lhs (call, len_mask);
> > + gsi_insert_before (cond_gsi, call, GSI_SAME_STMT);
> > +
> > + return len_mask;
> > +}
> > +
> > /* Scale profiling counters by estimation for LOOP which is vectorized
> > by factor VF.
> > If FLAT is true, the loop we started with had unrealistically flat
> > diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
> > index b8a71605f1b..672959501bb 100644
> > --- a/gcc/tree-vect-stmts.cc
> > +++ b/gcc/tree-vect-stmts.cc
> > @@ -12895,7 +12895,9 @@ vectorizable_early_exit (vec_info *vinfo,
> > stmt_vec_info stmt_info,
> > ncopies = vect_get_num_copies (loop_vinfo, vectype);
> >
> > vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
> > + vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
> > bool masked_loop_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
> > + bool len_loop_p = LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo);
> >
> > /* Now build the new conditional. Pattern gimple_conds get dropped during
> > codegen so we must replace the original insn. */
> > @@ -12959,12 +12961,11 @@ vectorizable_early_exit (vec_info *vinfo,
> > stmt_vec_info stmt_info,
> > {
> > if (direct_internal_fn_supported_p (IFN_VCOND_MASK_LEN, vectype,
> > OPTIMIZE_FOR_SPEED))
> > - return false;
> > + vect_record_loop_len (loop_vinfo, lens, ncopies, vectype, 1);
> > else
> > vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, NULL);
> > }
> >
> > -
> > return true;
> > }
> >
> > @@ -13017,6 +13018,15 @@ vectorizable_early_exit (vec_info *vinfo,
> > stmt_vec_info stmt_info,
> > stmts[i], &cond_gsi);
> > workset.quick_push (stmt_mask);
> > }
> > + else if (len_loop_p)
> > + for (unsigned i = 0; i < stmts.length (); i++)
> > + {
> > + tree len_mask = vect_gen_loop_len_mask (loop_vinfo, gsi, &cond_gsi,
> > + lens, ncopies, vectype,
> > + stmts[i], i, 1);
> > +
> > + workset.quick_push (len_mask);
> > + }
> > else
> > workset.splice (stmts);
> >
> > @@ -13041,6 +13051,9 @@ vectorizable_early_exit (vec_info *vinfo,
> > stmt_vec_info stmt_info,
> > new_temp = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
> > new_temp, &cond_gsi);
> > }
> > + else if (len_loop_p)
> > + new_temp = vect_gen_loop_len_mask (loop_vinfo, gsi, &cond_gsi, lens,
> > + ncopies, vectype, new_temp, 0, 1);
> > }
> >
> > gcc_assert (new_temp);
> > diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> > index db44d730b70..93bc30ef660 100644
> > --- a/gcc/tree-vectorizer.h
> > +++ b/gcc/tree-vectorizer.h
> > @@ -2408,6 +2408,10 @@ extern void vect_record_loop_len (loop_vec_info,
> > vec_loop_lens *, unsigned int,
> > extern tree vect_get_loop_len (loop_vec_info, gimple_stmt_iterator *,
> > vec_loop_lens *, unsigned int, tree,
> > unsigned int, unsigned int);
> > +extern tree vect_gen_loop_len_mask (loop_vec_info, gimple_stmt_iterator *,
> > + gimple_stmt_iterator *, vec_loop_lens *,
> > + unsigned int, tree, tree, unsigned int,
> > + unsigned int);
> > extern gimple_seq vect_gen_len (tree, tree, tree, tree);
> > extern stmt_vec_info info_for_reduction (vec_info *, stmt_vec_info);
> > extern bool reduction_fn_for_scalar_code (code_helper, internal_fn *);
> > --
> > 2.34.1
>
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH v2 2/3] RISC-V: Implement vectorizable early exit with vcond_mask_len
2024-05-16 4:05 ` [PATCH v2 2/3] RISC-V: Implement vectorizable early exit with vcond_mask_len pan2.li
@ 2024-05-16 12:19 ` juzhe.zhong
2024-05-16 13:44 ` Li, Pan2
0 siblings, 1 reply; 10+ messages in thread
From: juzhe.zhong @ 2024-05-16 12:19 UTC (permalink / raw)
To: pan2.li, gcc-patches
Cc: kito.cheng, tamar.christina, Richard Biener, richard.sandiford, pan2.li
[-- Attachment #1: Type: text/plain, Size: 10684 bytes --]
RISC-V part LGTM.
juzhe.zhong@rivai.ai
From: pan2.li
Date: 2024-05-16 12:05
To: gcc-patches
CC: juzhe.zhong; kito.cheng; tamar.christina; richard.guenther; Richard.Sandiford; Pan Li
Subject: [PATCH v2 2/3] RISC-V: Implement vectorizable early exit with vcond_mask_len
From: Pan Li <pan2.li@intel.com>
After we support the loop lens for the vectorizable, we would like to
implement the feature for the RISC-V target. Given below example:
unsigned vect_a[1923];
unsigned vect_b[1923];
void test (unsigned limit, int n)
{
for (int i = 0; i < n; i++)
{
vect_b[i] = limit + i;
if (vect_a[i] > limit)
{
ret = vect_b[i];
return ret;
}
vect_a[i] = limit;
}
}
Before this patch:
...
.L8:
sw a3,0(a5)
addiw a0,a0,1
addi a4,a4,4
addi a5,a5,4
beq a1,a0,.L2
.L4:
sw a0,0(a4)
lw a2,0(a5)
bleu a2,a3,.L8
ret
After this patch:
...
.L5:
vsetvli a5,a3,e8,mf4,ta,ma
vmv1r.v v4,v2
vsetvli t4,zero,e32,m1,ta,ma
vmv.v.x v1,a5
vadd.vv v2,v2,v1
vsetvli zero,a5,e32,m1,ta,ma
vadd.vv v5,v4,v3
slli a6,a5,2
vle32.v v1,0(t1)
vmsltu.vv v1,v3,v1
vcpop.m t4,v1
beq t4,zero,.L4
vmv.x.s a4,v4
.L3:
...
The below tests are passed for this patch:
1. The riscv fully regression tests.
gcc/ChangeLog:
* config/riscv/autovec-opt.md
(*vcond_mask_len_popcount_<VB_VLS:mode><P:mode>):
New pattern of vcond_mask_len_popcount for vector bool mode.
* config/riscv/autovec.md (vcond_mask_len_<mode>): New pattern
of vcond_mask_len for vector bool mode.
(cbranch<mode>4): New pattern for vector bool mode.
* config/riscv/vector-iterators.md: Add new unspec
UNSPEC_SELECT_MASK.
* config/riscv/vector.md (@pred_popcount<VB:mode><P:mode>): Add
VLS mode to popcount pattern.
(@pred_popcount<VB_VLS:mode><P:mode>): Ditto.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/autovec/early-break-1.c: New test.
* gcc.target/riscv/rvv/autovec/early-break-2.c: New test.
Signed-off-by: Pan Li <pan2.li@intel.com>
---
gcc/config/riscv/autovec-opt.md | 33 ++++++++++
gcc/config/riscv/autovec.md | 61 +++++++++++++++++++
gcc/config/riscv/vector-iterators.md | 1 +
gcc/config/riscv/vector.md | 18 +++---
.../riscv/rvv/autovec/early-break-1.c | 34 +++++++++++
.../riscv/rvv/autovec/early-break-2.c | 37 +++++++++++
6 files changed, 175 insertions(+), 9 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/early-break-1.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/early-break-2.c
diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md
index 645dc53d868..04f85d8e455 100644
--- a/gcc/config/riscv/autovec-opt.md
+++ b/gcc/config/riscv/autovec-opt.md
@@ -1436,3 +1436,36 @@ (define_insn_and_split "*n<optab><mode>"
DONE;
}
[(set_attr "type" "vmalu")])
+
+;; Optimization pattern for early break auto-vectorization
+;; vcond_mask_len (mask, ones, zeros, len, bias) + vlmax popcount
+;; -> non vlmax popcount (mask, len)
+(define_insn_and_split "*vcond_mask_len_popcount_<VB_VLS:mode><P:mode>"
+ [(set (match_operand:P 0 "register_operand")
+ (popcount:P
+ (unspec:VB_VLS [
+ (unspec:VB_VLS [
+ (match_operand:VB_VLS 1 "register_operand")
+ (match_operand:VB_VLS 2 "const_1_operand")
+ (match_operand:VB_VLS 3 "const_0_operand")
+ (match_operand 4 "autovec_length_operand")
+ (match_operand 5 "const_0_operand")] UNSPEC_SELECT_MASK)
+ (match_operand 6 "autovec_length_operand")
+ (const_int 1)
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)))]
+ "TARGET_VECTOR
+ && can_create_pseudo_p ()
+ && riscv_vector::get_vector_mode (Pmode, GET_MODE_NUNITS (<VB_VLS:MODE>mode)).exists ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+ {
+ riscv_vector::emit_nonvlmax_insn (
+ code_for_pred_popcount (<VB_VLS:MODE>mode, Pmode),
+ riscv_vector::CPOP_OP,
+ operands, operands[4]);
+ DONE;
+ }
+ [(set_attr "type" "vector")]
+)
diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index aa1ae0fe075..1ee3c8052fb 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -2612,3 +2612,64 @@ (define_expand "rawmemchr<ANYI:mode>"
DONE;
}
)
+
+;; =========================================================================
+;; == Early break auto-vectorization patterns
+;; =========================================================================
+
+;; vcond_mask_len (mask, 1s, 0s, len, bias)
+;; => mask[i] = mask[i] && i < len ? 1 : 0
+(define_insn_and_split "vcond_mask_len_<mode>"
+ [(set (match_operand:VB 0 "register_operand")
+ (unspec: VB [
+ (match_operand:VB 1 "register_operand")
+ (match_operand:VB 2 "const_1_operand")
+ (match_operand:VB 3 "const_0_operand")
+ (match_operand 4 "autovec_length_operand")
+ (match_operand 5 "const_0_operand")] UNSPEC_SELECT_MASK))]
+ "TARGET_VECTOR
+ && can_create_pseudo_p ()
+ && riscv_vector::get_vector_mode (Pmode, GET_MODE_NUNITS (<MODE>mode)).exists ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+ {
+ machine_mode mode = riscv_vector::get_vector_mode (Pmode,
+ GET_MODE_NUNITS (<MODE>mode)).require ();
+ rtx reg = gen_reg_rtx (mode);
+ riscv_vector::expand_vec_series (reg, const0_rtx, const1_rtx);
+ rtx dup_rtx = gen_rtx_VEC_DUPLICATE (mode, operands[4]);
+ insn_code icode = code_for_pred_cmp_scalar (mode);
+ rtx cmp = gen_rtx_fmt_ee (LTU, <MODE>mode, reg, dup_rtx);
+ rtx ops[] = {operands[0], operands[1], operands[1], cmp, reg, operands[4]};
+ emit_vlmax_insn (icode, riscv_vector::COMPARE_OP_MU, ops);
+ DONE;
+ }
+ [(set_attr "type" "vector")])
+
+;; cbranch
+(define_expand "cbranch<mode>4"
+ [(set (pc)
+ (if_then_else
+ (match_operator 0 "equality_operator"
+ [(match_operand:VB_VLS 1 "register_operand")
+ (match_operand:VB_VLS 2 "reg_or_0_operand")])
+ (label_ref (match_operand 3 ""))
+ (pc)))]
+ "TARGET_VECTOR"
+ {
+ rtx pred;
+ if (operands[2] == CONST0_RTX (<MODE>mode))
+ pred = operands[1];
+ else
+ pred = expand_binop (<MODE>mode, xor_optab, operands[1],
+ operands[2], NULL_RTX, 0,
+ OPTAB_DIRECT);
+ rtx reg = gen_reg_rtx (Pmode);
+ rtx cpop_ops[] = {reg, pred};
+ emit_vlmax_insn (code_for_pred_popcount (<MODE>mode, Pmode),
+ riscv_vector::CPOP_OP, cpop_ops);
+ operands[1] = reg;
+ operands[2] = const0_rtx;
+ }
+)
diff --git a/gcc/config/riscv/vector-iterators.md b/gcc/config/riscv/vector-iterators.md
index a24e1bf078f..76c27035a73 100644
--- a/gcc/config/riscv/vector-iterators.md
+++ b/gcc/config/riscv/vector-iterators.md
@@ -102,6 +102,7 @@ (define_c_enum "unspec" [
UNSPEC_WREDUC_SUMU
UNSPEC_WREDUC_SUM_ORDERED
UNSPEC_WREDUC_SUM_UNORDERED
+ UNSPEC_SELECT_MASK
])
(define_c_enum "unspecv" [
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 228d0f9a766..95451dc762b 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -6121,21 +6121,21 @@ (define_insn "@pred_not<mode>"
(set_attr "vl_op_idx" "4")
(set (attr "avl_type_idx") (const_int 5))])
-(define_insn "@pred_popcount<VB:mode><P:mode>"
- [(set (match_operand:P 0 "register_operand" "=r")
+(define_insn "@pred_popcount<VB_VLS:mode><P:mode>"
+ [(set (match_operand:P 0 "register_operand" "=r")
(popcount:P
- (unspec:VB
- [(and:VB
- (match_operand:VB 1 "vector_mask_operand" "vmWc1")
- (match_operand:VB 2 "register_operand" " vr"))
- (match_operand 3 "vector_length_operand" " rK")
- (match_operand 4 "const_int_operand" " i")
+ (unspec:VB_VLS
+ [(and:VB_VLS
+ (match_operand:VB_VLS 1 "vector_mask_operand" "vmWc1")
+ (match_operand:VB_VLS 2 "register_operand" " vr"))
+ (match_operand 3 "vector_length_operand" " rK")
+ (match_operand 4 "const_int_operand" " i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)))]
"TARGET_VECTOR"
"vcpop.m\t%0,%2%p1"
[(set_attr "type" "vmpop")
- (set_attr "mode" "<VB:MODE>")])
+ (set_attr "mode" "<VB_VLS:MODE>")])
(define_insn "@pred_ffs<VB:mode><P:mode>"
[(set (match_operand:P 0 "register_operand" "=r")
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/early-break-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/early-break-1.c
new file mode 100644
index 00000000000..f70979e81f1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/early-break-1.c
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fno-schedule-insns -fno-schedule-insns2 -fdump-tree-vect-details" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#define N 803
+
+unsigned vect_a[N];
+unsigned vect_b[N];
+
+/*
+** test:
+** ...
+** vmsltu\.vv\s+v[0-9]+\s*,v[0-9]+,\s*v[0-9]+
+** vcpop\.m\s+[atx][0-9]+\s*,v[0-9]+
+** ...
+*/
+unsigned test (unsigned x, int n)
+{
+ unsigned ret = 0;
+
+ for (int i = 0; i < n; i++)
+ {
+ vect_b[i] = x + i;
+
+ if (vect_a[i] > x)
+ break;
+
+ vect_a[i] = x;
+ }
+
+ return ret;
+}
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/early-break-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/early-break-2.c
new file mode 100644
index 00000000000..d405783d2c4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/early-break-2.c
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fno-schedule-insns -fno-schedule-insns2 -fdump-tree-vect-details" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#define N 1728
+
+unsigned vect_a[N];
+unsigned vect_b[N];
+
+/*
+** test:
+** ...
+** vmsltu\.vv\s+v[0-9]+\s*,v[0-9]+,\s*v[0-9]+
+** vcpop\.m\s+[atx][0-9]+\s*,v[0-9]+
+** ...
+*/
+unsigned test (unsigned limit, int n)
+{
+ unsigned ret = 0;
+
+ for (int i = 0; i < n; i++)
+ {
+ vect_b[i] = limit + i;
+
+ if (vect_a[i] > limit)
+ {
+ ret = vect_b[i];
+ return ret;
+ }
+
+ vect_a[i] = limit;
+ }
+
+ return ret;
+}
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" } } */
--
2.34.1
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH v2 3/3] RISC-V: Enable vectorizable early exit testsuite
2024-05-16 4:05 ` [PATCH v2 3/3] RISC-V: Enable vectorizable early exit testsuite pan2.li
@ 2024-05-16 12:19 ` juzhe.zhong
2024-05-16 13:44 ` Li, Pan2
0 siblings, 1 reply; 10+ messages in thread
From: juzhe.zhong @ 2024-05-16 12:19 UTC (permalink / raw)
To: pan2.li, gcc-patches
Cc: kito.cheng, tamar.christina, Richard Biener, richard.sandiford, pan2.li
[-- Attachment #1: Type: text/plain, Size: 3080 bytes --]
RISC-V part LGTM.
juzhe.zhong@rivai.ai
From: pan2.li
Date: 2024-05-16 12:05
To: gcc-patches
CC: juzhe.zhong; kito.cheng; tamar.christina; richard.guenther; Richard.Sandiford; Pan Li
Subject: [PATCH v2 3/3] RISC-V: Enable vectorizable early exit testsuite
From: Pan Li <pan2.li@intel.com>
After we supported vectorizable early exit in RISC-V, we would like to
enable the gcc vect test for vectorizable early test.
The vect-early-break_124-pr114403.c failed to vectorize for now.
Because that the __builtin_memcpy with 8 bytes failed to folded into
int64 assignment during ccp1. We will improve that first and mark
this as xfail for RISC-V.
The below tests are passed for this patch:
1. The riscv fully regression tests.
gcc/testsuite/ChangeLog:
* gcc.dg/vect/slp-mask-store-1.c: Add pragma novector as it will
have 2 times LOOP VECTORIZED in RISC-V.
* gcc.dg/vect/vect-early-break_124-pr114403.c: Xfail for the
riscv backend.
* lib/target-supports.exp: Add RISC-V backend.
Signed-off-by: Pan Li <pan2.li@intel.com>
---
gcc/testsuite/gcc.dg/vect/slp-mask-store-1.c | 2 ++
gcc/testsuite/gcc.dg/vect/vect-early-break_124-pr114403.c | 2 +-
gcc/testsuite/lib/target-supports.exp | 2 ++
3 files changed, 5 insertions(+), 1 deletion(-)
diff --git a/gcc/testsuite/gcc.dg/vect/slp-mask-store-1.c b/gcc/testsuite/gcc.dg/vect/slp-mask-store-1.c
index fdd9032da98..2f80bf89e5e 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-mask-store-1.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-mask-store-1.c
@@ -28,6 +28,8 @@ main ()
if (__builtin_memcmp (x, res, sizeof (x)) != 0)
abort ();
+
+#pragma GCC novector
for (int i = 0; i < 32; ++i)
if (flag[i] != 0 && flag[i] != 1)
abort ();
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_124-pr114403.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_124-pr114403.c
index 51abf245ccb..101ae1e0eaa 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-early-break_124-pr114403.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_124-pr114403.c
@@ -2,7 +2,7 @@
/* { dg-require-effective-target vect_early_break_hw } */
/* { dg-require-effective-target vect_long_long } */
-/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { xfail riscv*-*-* } } } */
#include "tree-vect.h"
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 6f5d477b128..ec9baa4f32a 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -4099,6 +4099,7 @@ proc check_effective_target_vect_early_break { } {
|| [check_effective_target_arm_v8_neon_ok]
|| [check_effective_target_sse4]
|| [istarget amdgcn-*-*]
+ || [check_effective_target_riscv_v]
}}]
}
@@ -4114,6 +4115,7 @@ proc check_effective_target_vect_early_break_hw { } {
|| [check_effective_target_arm_v8_neon_hw]
|| [check_sse4_hw_available]
|| [istarget amdgcn-*-*]
+ || [check_effective_target_riscv_v_ok]
}}]
}
--
2.34.1
^ permalink raw reply [flat|nested] 10+ messages in thread
* RE: [PATCH v2 1/3] Vect: Support loop len in vectorizable early exit
2024-05-16 12:13 ` Richard Biener
@ 2024-05-16 12:27 ` Li, Pan2
0 siblings, 0 replies; 10+ messages in thread
From: Li, Pan2 @ 2024-05-16 12:27 UTC (permalink / raw)
To: Richard Biener, Tamar Christina
Cc: gcc-patches, juzhe.zhong, kito.cheng, Richard Sandiford
Committed, thanks Richard.
Pan
-----Original Message-----
From: Richard Biener <richard.guenther@gmail.com>
Sent: Thursday, May 16, 2024 8:13 PM
To: Tamar Christina <Tamar.Christina@arm.com>
Cc: Li, Pan2 <pan2.li@intel.com>; gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; Richard Sandiford <Richard.Sandiford@arm.com>
Subject: Re: [PATCH v2 1/3] Vect: Support loop len in vectorizable early exit
On Thu, May 16, 2024 at 8:50 AM Tamar Christina <Tamar.Christina@arm.com> wrote:
>
> > -----Original Message-----
> > From: pan2.li@intel.com <pan2.li@intel.com>
> > Sent: Thursday, May 16, 2024 5:06 AM
> > To: gcc-patches@gcc.gnu.org
> > Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; Tamar Christina
> > <Tamar.Christina@arm.com>; richard.guenther@gmail.com; Richard Sandiford
> > <Richard.Sandiford@arm.com>; Pan Li <pan2.li@intel.com>
> > Subject: [PATCH v2 1/3] Vect: Support loop len in vectorizable early exit
> >
> > From: Pan Li <pan2.li@intel.com>
> >
> > This patch adds early break auto-vectorization support for target which
> > use length on partial vectorization. Consider this following example:
> >
> > unsigned vect_a[802];
> > unsigned vect_b[802];
> >
> > void test (unsigned x, int n)
> > {
> > for (int i = 0; i < n; i++)
> > {
> > vect_b[i] = x + i;
> >
> > if (vect_a[i] > x)
> > break;
> >
> > vect_a[i] = x;
> > }
> > }
> >
> > We use VCOND_MASK_LEN to simulate the generate (mask && i < len + bias).
> > And then the IR of RVV looks like below:
> >
> > ...
> > _87 = .SELECT_VL (ivtmp_85, POLY_INT_CST [32, 32]);
> > _55 = (int) _87;
> > ...
> > mask_patt_6.13_69 = vect_cst__62 < vect__3.12_67;
> > vec_len_mask_72 = .VCOND_MASK_LEN (mask_patt_6.13_69, { -1, ... }, \
> > {0, ... }, _87, 0);
> > if (vec_len_mask_72 != { 0, ... })
> > goto <bb 6>; [5.50%]
> > else
> > goto <bb 7>; [94.50%]
> >
> > The below tests are passed for this patch:
> > 1. The riscv fully regression tests.
> > 2. The x86 bootstrap tests.
> > 3. The x86 fully regression tests.
> >
> > gcc/ChangeLog:
> >
> > * tree-vect-stmts.cc (vectorizable_early_exit): Add loop len
> > handling for one or multiple stmt.
> >
> > gcc/ChangeLog:
> >
> > * tree-vect-loop.cc (vect_gen_loop_len_mask): New func to gen
> > the loop len mask.
> > * tree-vect-stmts.cc (vectorizable_early_exit): Invoke the
> > vect_gen_loop_len_mask for 1 or more stmt(s).
> > * tree-vectorizer.h (vect_gen_loop_len_mask): New func decl
> > for vect_gen_loop_len_mask.
> >
>
> Thanks, this version looks good to me!
>
> You'll need Richi's review still.
OK.
Thanks,
Richard.
> Cheers,
> Tamar
>
> > Signed-off-by: Pan Li <pan2.li@intel.com>
> > ---
> > gcc/tree-vect-loop.cc | 27 +++++++++++++++++++++++++++
> > gcc/tree-vect-stmts.cc | 17 +++++++++++++++--
> > gcc/tree-vectorizer.h | 4 ++++
> > 3 files changed, 46 insertions(+), 2 deletions(-)
> >
> > diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> > index 361aec06488..83c0544b6aa 100644
> > --- a/gcc/tree-vect-loop.cc
> > +++ b/gcc/tree-vect-loop.cc
> > @@ -11416,6 +11416,33 @@ vect_get_loop_len (loop_vec_info loop_vinfo,
> > gimple_stmt_iterator *gsi,
> > return loop_len;
> > }
> >
> > +/* Generate the tree for the loop len mask and return it. Given the lens,
> > + nvectors, vectype, index and factor to gen the len mask as below.
> > +
> > + tree len_mask = VCOND_MASK_LEN (compare_mask, ones, zero, len, bias)
> > +*/
> > +tree
> > +vect_gen_loop_len_mask (loop_vec_info loop_vinfo, gimple_stmt_iterator *gsi,
> > + gimple_stmt_iterator *cond_gsi, vec_loop_lens *lens,
> > + unsigned int nvectors, tree vectype, tree stmt,
> > + unsigned int index, unsigned int factor)
> > +{
> > + tree all_one_mask = build_all_ones_cst (vectype);
> > + tree all_zero_mask = build_zero_cst (vectype);
> > + tree len = vect_get_loop_len (loop_vinfo, gsi, lens, nvectors, vectype, index,
> > + factor);
> > + tree bias = build_int_cst (intQI_type_node,
> > + LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS
> > (loop_vinfo));
> > + tree len_mask = make_temp_ssa_name (TREE_TYPE (stmt), NULL,
> > "vec_len_mask");
> > + gcall *call = gimple_build_call_internal (IFN_VCOND_MASK_LEN, 5, stmt,
> > + all_one_mask, all_zero_mask, len,
> > + bias);
> > + gimple_call_set_lhs (call, len_mask);
> > + gsi_insert_before (cond_gsi, call, GSI_SAME_STMT);
> > +
> > + return len_mask;
> > +}
> > +
> > /* Scale profiling counters by estimation for LOOP which is vectorized
> > by factor VF.
> > If FLAT is true, the loop we started with had unrealistically flat
> > diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
> > index b8a71605f1b..672959501bb 100644
> > --- a/gcc/tree-vect-stmts.cc
> > +++ b/gcc/tree-vect-stmts.cc
> > @@ -12895,7 +12895,9 @@ vectorizable_early_exit (vec_info *vinfo,
> > stmt_vec_info stmt_info,
> > ncopies = vect_get_num_copies (loop_vinfo, vectype);
> >
> > vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
> > + vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
> > bool masked_loop_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
> > + bool len_loop_p = LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo);
> >
> > /* Now build the new conditional. Pattern gimple_conds get dropped during
> > codegen so we must replace the original insn. */
> > @@ -12959,12 +12961,11 @@ vectorizable_early_exit (vec_info *vinfo,
> > stmt_vec_info stmt_info,
> > {
> > if (direct_internal_fn_supported_p (IFN_VCOND_MASK_LEN, vectype,
> > OPTIMIZE_FOR_SPEED))
> > - return false;
> > + vect_record_loop_len (loop_vinfo, lens, ncopies, vectype, 1);
> > else
> > vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, NULL);
> > }
> >
> > -
> > return true;
> > }
> >
> > @@ -13017,6 +13018,15 @@ vectorizable_early_exit (vec_info *vinfo,
> > stmt_vec_info stmt_info,
> > stmts[i], &cond_gsi);
> > workset.quick_push (stmt_mask);
> > }
> > + else if (len_loop_p)
> > + for (unsigned i = 0; i < stmts.length (); i++)
> > + {
> > + tree len_mask = vect_gen_loop_len_mask (loop_vinfo, gsi, &cond_gsi,
> > + lens, ncopies, vectype,
> > + stmts[i], i, 1);
> > +
> > + workset.quick_push (len_mask);
> > + }
> > else
> > workset.splice (stmts);
> >
> > @@ -13041,6 +13051,9 @@ vectorizable_early_exit (vec_info *vinfo,
> > stmt_vec_info stmt_info,
> > new_temp = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
> > new_temp, &cond_gsi);
> > }
> > + else if (len_loop_p)
> > + new_temp = vect_gen_loop_len_mask (loop_vinfo, gsi, &cond_gsi, lens,
> > + ncopies, vectype, new_temp, 0, 1);
> > }
> >
> > gcc_assert (new_temp);
> > diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> > index db44d730b70..93bc30ef660 100644
> > --- a/gcc/tree-vectorizer.h
> > +++ b/gcc/tree-vectorizer.h
> > @@ -2408,6 +2408,10 @@ extern void vect_record_loop_len (loop_vec_info,
> > vec_loop_lens *, unsigned int,
> > extern tree vect_get_loop_len (loop_vec_info, gimple_stmt_iterator *,
> > vec_loop_lens *, unsigned int, tree,
> > unsigned int, unsigned int);
> > +extern tree vect_gen_loop_len_mask (loop_vec_info, gimple_stmt_iterator *,
> > + gimple_stmt_iterator *, vec_loop_lens *,
> > + unsigned int, tree, tree, unsigned int,
> > + unsigned int);
> > extern gimple_seq vect_gen_len (tree, tree, tree, tree);
> > extern stmt_vec_info info_for_reduction (vec_info *, stmt_vec_info);
> > extern bool reduction_fn_for_scalar_code (code_helper, internal_fn *);
> > --
> > 2.34.1
>
^ permalink raw reply [flat|nested] 10+ messages in thread
* RE: [PATCH v2 2/3] RISC-V: Implement vectorizable early exit with vcond_mask_len
2024-05-16 12:19 ` juzhe.zhong
@ 2024-05-16 13:44 ` Li, Pan2
0 siblings, 0 replies; 10+ messages in thread
From: Li, Pan2 @ 2024-05-16 13:44 UTC (permalink / raw)
To: juzhe.zhong, gcc-patches
Cc: kito.cheng, tamar.christina, Richard Biener, richard.sandiford
[-- Attachment #1: Type: text/plain, Size: 11514 bytes --]
Committed, thanks Juzhe.
Pan
From: juzhe.zhong@rivai.ai <juzhe.zhong@rivai.ai>
Sent: Thursday, May 16, 2024 8:19 PM
To: Li, Pan2 <pan2.li@intel.com>; gcc-patches <gcc-patches@gcc.gnu.org>
Cc: kito.cheng <kito.cheng@gmail.com>; tamar.christina <tamar.christina@arm.com>; Richard Biener <richard.guenther@gmail.com>; richard.sandiford <Richard.Sandiford@arm.com>; Li, Pan2 <pan2.li@intel.com>
Subject: Re: [PATCH v2 2/3] RISC-V: Implement vectorizable early exit with vcond_mask_len
RISC-V part LGTM.
________________________________
juzhe.zhong@rivai.ai<mailto:juzhe.zhong@rivai.ai>
From: pan2.li<mailto:pan2.li@intel.com>
Date: 2024-05-16 12:05
To: gcc-patches<mailto:gcc-patches@gcc.gnu.org>
CC: juzhe.zhong<mailto:juzhe.zhong@rivai.ai>; kito.cheng<mailto:kito.cheng@gmail.com>; tamar.christina<mailto:tamar.christina@arm.com>; richard.guenther<mailto:richard.guenther@gmail.com>; Richard.Sandiford<mailto:Richard.Sandiford@arm.com>; Pan Li<mailto:pan2.li@intel.com>
Subject: [PATCH v2 2/3] RISC-V: Implement vectorizable early exit with vcond_mask_len
From: Pan Li <pan2.li@intel.com<mailto:pan2.li@intel.com>>
After we support the loop lens for the vectorizable, we would like to
implement the feature for the RISC-V target. Given below example:
unsigned vect_a[1923];
unsigned vect_b[1923];
void test (unsigned limit, int n)
{
for (int i = 0; i < n; i++)
{
vect_b[i] = limit + i;
if (vect_a[i] > limit)
{
ret = vect_b[i];
return ret;
}
vect_a[i] = limit;
}
}
Before this patch:
...
.L8:
sw a3,0(a5)
addiw a0,a0,1
addi a4,a4,4
addi a5,a5,4
beq a1,a0,.L2
.L4:
sw a0,0(a4)
lw a2,0(a5)
bleu a2,a3,.L8
ret
After this patch:
...
.L5:
vsetvli a5,a3,e8,mf4,ta,ma
vmv1r.v v4,v2
vsetvli t4,zero,e32,m1,ta,ma
vmv.v.x v1,a5
vadd.vv v2,v2,v1
vsetvli zero,a5,e32,m1,ta,ma
vadd.vv v5,v4,v3
slli a6,a5,2
vle32.v v1,0(t1)
vmsltu.vv v1,v3,v1
vcpop.m t4,v1
beq t4,zero,.L4
vmv.x.s a4,v4
.L3:
...
The below tests are passed for this patch:
1. The riscv fully regression tests.
gcc/ChangeLog:
* config/riscv/autovec-opt.md
(*vcond_mask_len_popcount_<VB_VLS:mode><P:mode>):
New pattern of vcond_mask_len_popcount for vector bool mode.
* config/riscv/autovec.md (vcond_mask_len_<mode>): New pattern
of vcond_mask_len for vector bool mode.
(cbranch<mode>4): New pattern for vector bool mode.
* config/riscv/vector-iterators.md: Add new unspec
UNSPEC_SELECT_MASK.
* config/riscv/vector.md (@pred_popcount<VB:mode><P:mode>): Add
VLS mode to popcount pattern.
(@pred_popcount<VB_VLS:mode><P:mode>): Ditto.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/autovec/early-break-1.c: New test.
* gcc.target/riscv/rvv/autovec/early-break-2.c: New test.
Signed-off-by: Pan Li <pan2.li@intel.com<mailto:pan2.li@intel.com>>
---
gcc/config/riscv/autovec-opt.md | 33 ++++++++++
gcc/config/riscv/autovec.md | 61 +++++++++++++++++++
gcc/config/riscv/vector-iterators.md | 1 +
gcc/config/riscv/vector.md | 18 +++---
.../riscv/rvv/autovec/early-break-1.c | 34 +++++++++++
.../riscv/rvv/autovec/early-break-2.c | 37 +++++++++++
6 files changed, 175 insertions(+), 9 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/early-break-1.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/early-break-2.c
diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md
index 645dc53d868..04f85d8e455 100644
--- a/gcc/config/riscv/autovec-opt.md
+++ b/gcc/config/riscv/autovec-opt.md
@@ -1436,3 +1436,36 @@ (define_insn_and_split "*n<optab><mode>"
DONE;
}
[(set_attr "type" "vmalu")])
+
+;; Optimization pattern for early break auto-vectorization
+;; vcond_mask_len (mask, ones, zeros, len, bias) + vlmax popcount
+;; -> non vlmax popcount (mask, len)
+(define_insn_and_split "*vcond_mask_len_popcount_<VB_VLS:mode><P:mode>"
+ [(set (match_operand:P 0 "register_operand")
+ (popcount:P
+ (unspec:VB_VLS [
+ (unspec:VB_VLS [
+ (match_operand:VB_VLS 1 "register_operand")
+ (match_operand:VB_VLS 2 "const_1_operand")
+ (match_operand:VB_VLS 3 "const_0_operand")
+ (match_operand 4 "autovec_length_operand")
+ (match_operand 5 "const_0_operand")] UNSPEC_SELECT_MASK)
+ (match_operand 6 "autovec_length_operand")
+ (const_int 1)
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)))]
+ "TARGET_VECTOR
+ && can_create_pseudo_p ()
+ && riscv_vector::get_vector_mode (Pmode, GET_MODE_NUNITS (<VB_VLS:MODE>mode)).exists ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+ {
+ riscv_vector::emit_nonvlmax_insn (
+ code_for_pred_popcount (<VB_VLS:MODE>mode, Pmode),
+ riscv_vector::CPOP_OP,
+ operands, operands[4]);
+ DONE;
+ }
+ [(set_attr "type" "vector")]
+)
diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index aa1ae0fe075..1ee3c8052fb 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -2612,3 +2612,64 @@ (define_expand "rawmemchr<ANYI:mode>"
DONE;
}
)
+
+;; =========================================================================
+;; == Early break auto-vectorization patterns
+;; =========================================================================
+
+;; vcond_mask_len (mask, 1s, 0s, len, bias)
+;; => mask[i] = mask[i] && i < len ? 1 : 0
+(define_insn_and_split "vcond_mask_len_<mode>"
+ [(set (match_operand:VB 0 "register_operand")
+ (unspec: VB [
+ (match_operand:VB 1 "register_operand")
+ (match_operand:VB 2 "const_1_operand")
+ (match_operand:VB 3 "const_0_operand")
+ (match_operand 4 "autovec_length_operand")
+ (match_operand 5 "const_0_operand")] UNSPEC_SELECT_MASK))]
+ "TARGET_VECTOR
+ && can_create_pseudo_p ()
+ && riscv_vector::get_vector_mode (Pmode, GET_MODE_NUNITS (<MODE>mode)).exists ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+ {
+ machine_mode mode = riscv_vector::get_vector_mode (Pmode,
+ GET_MODE_NUNITS (<MODE>mode)).require ();
+ rtx reg = gen_reg_rtx (mode);
+ riscv_vector::expand_vec_series (reg, const0_rtx, const1_rtx);
+ rtx dup_rtx = gen_rtx_VEC_DUPLICATE (mode, operands[4]);
+ insn_code icode = code_for_pred_cmp_scalar (mode);
+ rtx cmp = gen_rtx_fmt_ee (LTU, <MODE>mode, reg, dup_rtx);
+ rtx ops[] = {operands[0], operands[1], operands[1], cmp, reg, operands[4]};
+ emit_vlmax_insn (icode, riscv_vector::COMPARE_OP_MU, ops);
+ DONE;
+ }
+ [(set_attr "type" "vector")])
+
+;; cbranch
+(define_expand "cbranch<mode>4"
+ [(set (pc)
+ (if_then_else
+ (match_operator 0 "equality_operator"
+ [(match_operand:VB_VLS 1 "register_operand")
+ (match_operand:VB_VLS 2 "reg_or_0_operand")])
+ (label_ref (match_operand 3 ""))
+ (pc)))]
+ "TARGET_VECTOR"
+ {
+ rtx pred;
+ if (operands[2] == CONST0_RTX (<MODE>mode))
+ pred = operands[1];
+ else
+ pred = expand_binop (<MODE>mode, xor_optab, operands[1],
+ operands[2], NULL_RTX, 0,
+ OPTAB_DIRECT);
+ rtx reg = gen_reg_rtx (Pmode);
+ rtx cpop_ops[] = {reg, pred};
+ emit_vlmax_insn (code_for_pred_popcount (<MODE>mode, Pmode),
+ riscv_vector::CPOP_OP, cpop_ops);
+ operands[1] = reg;
+ operands[2] = const0_rtx;
+ }
+)
diff --git a/gcc/config/riscv/vector-iterators.md b/gcc/config/riscv/vector-iterators.md
index a24e1bf078f..76c27035a73 100644
--- a/gcc/config/riscv/vector-iterators.md
+++ b/gcc/config/riscv/vector-iterators.md
@@ -102,6 +102,7 @@ (define_c_enum "unspec" [
UNSPEC_WREDUC_SUMU
UNSPEC_WREDUC_SUM_ORDERED
UNSPEC_WREDUC_SUM_UNORDERED
+ UNSPEC_SELECT_MASK
])
(define_c_enum "unspecv" [
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 228d0f9a766..95451dc762b 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -6121,21 +6121,21 @@ (define_insn "@pred_not<mode>"
(set_attr "vl_op_idx" "4")
(set (attr "avl_type_idx") (const_int 5))])
-(define_insn "@pred_popcount<VB:mode><P:mode>"
- [(set (match_operand:P 0 "register_operand" "=r")
+(define_insn "@pred_popcount<VB_VLS:mode><P:mode>"
+ [(set (match_operand:P 0 "register_operand" "=r")
(popcount:P
- (unspec:VB
- [(and:VB
- (match_operand:VB 1 "vector_mask_operand" "vmWc1")
- (match_operand:VB 2 "register_operand" " vr"))
- (match_operand 3 "vector_length_operand" " rK")
- (match_operand 4 "const_int_operand" " i")
+ (unspec:VB_VLS
+ [(and:VB_VLS
+ (match_operand:VB_VLS 1 "vector_mask_operand" "vmWc1")
+ (match_operand:VB_VLS 2 "register_operand" " vr"))
+ (match_operand 3 "vector_length_operand" " rK")
+ (match_operand 4 "const_int_operand" " i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)))]
"TARGET_VECTOR"
"vcpop.m\t%0,%2%p1"
[(set_attr "type" "vmpop")
- (set_attr "mode" "<VB:MODE>")])
+ (set_attr "mode" "<VB_VLS:MODE>")])
(define_insn "@pred_ffs<VB:mode><P:mode>"
[(set (match_operand:P 0 "register_operand" "=r")
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/early-break-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/early-break-1.c
new file mode 100644
index 00000000000..f70979e81f1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/early-break-1.c
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fno-schedule-insns -fno-schedule-insns2 -fdump-tree-vect-details" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#define N 803
+
+unsigned vect_a[N];
+unsigned vect_b[N];
+
+/*
+** test:
+** ...
+** vmsltu\.vv\s+v[0-9]+\s*,v[0-9]+,\s*v[0-9]+
+** vcpop\.m\s+[atx][0-9]+\s*,v[0-9]+
+** ...
+*/
+unsigned test (unsigned x, int n)
+{
+ unsigned ret = 0;
+
+ for (int i = 0; i < n; i++)
+ {
+ vect_b[i] = x + i;
+
+ if (vect_a[i] > x)
+ break;
+
+ vect_a[i] = x;
+ }
+
+ return ret;
+}
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/early-break-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/early-break-2.c
new file mode 100644
index 00000000000..d405783d2c4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/early-break-2.c
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fno-schedule-insns -fno-schedule-insns2 -fdump-tree-vect-details" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#define N 1728
+
+unsigned vect_a[N];
+unsigned vect_b[N];
+
+/*
+** test:
+** ...
+** vmsltu\.vv\s+v[0-9]+\s*,v[0-9]+,\s*v[0-9]+
+** vcpop\.m\s+[atx][0-9]+\s*,v[0-9]+
+** ...
+*/
+unsigned test (unsigned limit, int n)
+{
+ unsigned ret = 0;
+
+ for (int i = 0; i < n; i++)
+ {
+ vect_b[i] = limit + i;
+
+ if (vect_a[i] > limit)
+ {
+ ret = vect_b[i];
+ return ret;
+ }
+
+ vect_a[i] = limit;
+ }
+
+ return ret;
+}
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" } } */
--
2.34.1
^ permalink raw reply [flat|nested] 10+ messages in thread
* RE: [PATCH v2 3/3] RISC-V: Enable vectorizable early exit testsuite
2024-05-16 12:19 ` juzhe.zhong
@ 2024-05-16 13:44 ` Li, Pan2
0 siblings, 0 replies; 10+ messages in thread
From: Li, Pan2 @ 2024-05-16 13:44 UTC (permalink / raw)
To: juzhe.zhong, gcc-patches
Cc: kito.cheng, tamar.christina, Richard Biener, richard.sandiford
[-- Attachment #1: Type: text/plain, Size: 3904 bytes --]
Committed, thanks Juzhe.
Pan
From: juzhe.zhong@rivai.ai <juzhe.zhong@rivai.ai>
Sent: Thursday, May 16, 2024 8:19 PM
To: Li, Pan2 <pan2.li@intel.com>; gcc-patches <gcc-patches@gcc.gnu.org>
Cc: kito.cheng <kito.cheng@gmail.com>; tamar.christina <tamar.christina@arm.com>; Richard Biener <richard.guenther@gmail.com>; richard.sandiford <Richard.Sandiford@arm.com>; Li, Pan2 <pan2.li@intel.com>
Subject: Re: [PATCH v2 3/3] RISC-V: Enable vectorizable early exit testsuite
RISC-V part LGTM.
________________________________
juzhe.zhong@rivai.ai<mailto:juzhe.zhong@rivai.ai>
From: pan2.li<mailto:pan2.li@intel.com>
Date: 2024-05-16 12:05
To: gcc-patches<mailto:gcc-patches@gcc.gnu.org>
CC: juzhe.zhong<mailto:juzhe.zhong@rivai.ai>; kito.cheng<mailto:kito.cheng@gmail.com>; tamar.christina<mailto:tamar.christina@arm.com>; richard.guenther<mailto:richard.guenther@gmail.com>; Richard.Sandiford<mailto:Richard.Sandiford@arm.com>; Pan Li<mailto:pan2.li@intel.com>
Subject: [PATCH v2 3/3] RISC-V: Enable vectorizable early exit testsuite
From: Pan Li <pan2.li@intel.com<mailto:pan2.li@intel.com>>
After we supported vectorizable early exit in RISC-V, we would like to
enable the gcc vect test for vectorizable early test.
The vect-early-break_124-pr114403.c failed to vectorize for now.
Because that the __builtin_memcpy with 8 bytes failed to folded into
int64 assignment during ccp1. We will improve that first and mark
this as xfail for RISC-V.
The below tests are passed for this patch:
1. The riscv fully regression tests.
gcc/testsuite/ChangeLog:
* gcc.dg/vect/slp-mask-store-1.c: Add pragma novector as it will
have 2 times LOOP VECTORIZED in RISC-V.
* gcc.dg/vect/vect-early-break_124-pr114403.c: Xfail for the
riscv backend.
* lib/target-supports.exp: Add RISC-V backend.
Signed-off-by: Pan Li <pan2.li@intel.com<mailto:pan2.li@intel.com>>
---
gcc/testsuite/gcc.dg/vect/slp-mask-store-1.c | 2 ++
gcc/testsuite/gcc.dg/vect/vect-early-break_124-pr114403.c | 2 +-
gcc/testsuite/lib/target-supports.exp | 2 ++
3 files changed, 5 insertions(+), 1 deletion(-)
diff --git a/gcc/testsuite/gcc.dg/vect/slp-mask-store-1.c b/gcc/testsuite/gcc.dg/vect/slp-mask-store-1.c
index fdd9032da98..2f80bf89e5e 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-mask-store-1.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-mask-store-1.c
@@ -28,6 +28,8 @@ main ()
if (__builtin_memcmp (x, res, sizeof (x)) != 0)
abort ();
+
+#pragma GCC novector
for (int i = 0; i < 32; ++i)
if (flag[i] != 0 && flag[i] != 1)
abort ();
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_124-pr114403.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_124-pr114403.c
index 51abf245ccb..101ae1e0eaa 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-early-break_124-pr114403.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_124-pr114403.c
@@ -2,7 +2,7 @@
/* { dg-require-effective-target vect_early_break_hw } */
/* { dg-require-effective-target vect_long_long } */
-/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { xfail riscv*-*-* } } } */
#include "tree-vect.h"
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 6f5d477b128..ec9baa4f32a 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -4099,6 +4099,7 @@ proc check_effective_target_vect_early_break { } {
|| [check_effective_target_arm_v8_neon_ok]
|| [check_effective_target_sse4]
|| [istarget amdgcn-*-*]
+ || [check_effective_target_riscv_v]
}}]
}
@@ -4114,6 +4115,7 @@ proc check_effective_target_vect_early_break_hw { } {
|| [check_effective_target_arm_v8_neon_hw]
|| [check_sse4_hw_available]
|| [istarget amdgcn-*-*]
+ || [check_effective_target_riscv_v_ok]
}}]
}
--
2.34.1
^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2024-05-16 13:44 UTC | newest]
Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-05-16 4:05 [PATCH v2 1/3] Vect: Support loop len in vectorizable early exit pan2.li
2024-05-16 4:05 ` [PATCH v2 2/3] RISC-V: Implement vectorizable early exit with vcond_mask_len pan2.li
2024-05-16 12:19 ` juzhe.zhong
2024-05-16 13:44 ` Li, Pan2
2024-05-16 4:05 ` [PATCH v2 3/3] RISC-V: Enable vectorizable early exit testsuite pan2.li
2024-05-16 12:19 ` juzhe.zhong
2024-05-16 13:44 ` Li, Pan2
2024-05-16 6:49 ` [PATCH v2 1/3] Vect: Support loop len in vectorizable early exit Tamar Christina
2024-05-16 12:13 ` Richard Biener
2024-05-16 12:27 ` Li, Pan2
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).