Sorry for wrong description on the log: After this patch, the IR is: _36 = .SELECT_VL (ivtmp_34, POLY_INT_CST [4, 4]); _22 = (int) _36; vect_cst__21 = [vec_duplicate_expr] _22; juzhe.zhong@rivai.ai From: Juzhe-Zhong Date: 2023-11-08 18:53 To: gcc-patches CC: richard.sandiford; rguenther; kito.cheng; kito.cheng; Juzhe-Zhong Subject: [PATCH] Middle-end: Fix bug of induction variable vectorization for RVV PR: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112438 SELECT_VL result is not necessary always VF in non-final iteration. Current GIMPLE IR is wrong: # vect_vec_iv_.21_25 = PHI <_24(4), { 0, 1, 2, ... }(3)> ... _24 = vect_vec_iv_.21_25 + { POLY_INT_CST [4, 4], ... }; After this patch which is correct for SELECT_VL: # vect_vec_iv_.8_22 = PHI <_21(4), { 0, 1, 2, ... }(3)> ... _35 = .SELECT_VL (ivtmp_33, POLY_INT_CST [4, 4]); _21 = vect_vec_iv_.8_22 + { POLY_INT_CST [4, 4], ... }; kito, could you give more explanation ? PR middle/112438 gcc/ChangeLog: * tree-vect-loop.cc (vectorizable_induction): Fix bug. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/pr112438.c: New test. --- .../gcc.target/riscv/rvv/autovec/pr112438.c | 35 +++++++++++++++++ gcc/tree-vect-loop.cc | 39 +++++++++++++++---- 2 files changed, 67 insertions(+), 7 deletions(-) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr112438.c diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr112438.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr112438.c new file mode 100644 index 00000000000..b326d56a52c --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr112438.c @@ -0,0 +1,35 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fno-vect-cost-model -ffast-math -fdump-tree-optimized-details" } */ + +void +foo (int n, int *__restrict in, int *__restrict out) +{ + for (int i = 0; i < n; i += 1) + { + out[i] = in[i] + i; + } +} + +void +foo2 (int n, float * __restrict in, +float * __restrict out) +{ + for (int i = 0; i < n; i += 1) + { + out[i] = in[i] + i; + } +} + +void +foo3 (int n, float * __restrict in, +float * __restrict out, float x) +{ + for (int i = 0; i < n; i += 1) + { + out[i] = in[i] + i* i; + } +} + +/* We don't want to see vect_vec_iv_.21_25 + { POLY_INT_CST [4, 4], ... }. */ +/* { dg-final { scan-tree-dump-not "\\+ \{ POLY_INT_CST" "optimized" } } */ + diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index a544bc9b059..3e103946168 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -10309,10 +10309,30 @@ vectorizable_induction (loop_vec_info loop_vinfo, new_name = step_expr; else { + gimple_seq seq = NULL; + if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo)) + { + /* When we're using loop_len produced by SELEC_VL, the non-final + iterations are not always processing VF elements. So vectorize + induction variable instead of + + _21 = vect_vec_iv_.6_22 + { VF, ... }; + + We should generate: + + _35 = .SELECT_VL (ivtmp_33, VF); + vect_cst__22 = [vec_duplicate_expr] _35; + _21 = vect_vec_iv_.6_22 + vect_cst__22; */ + vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo); + tree len + = vect_get_loop_len (loop_vinfo, NULL, lens, 1, vectype, 0, 0); + expr = force_gimple_operand (fold_convert (TREE_TYPE (step_expr), + unshare_expr (len)), + &seq, true, NULL_TREE); + } /* iv_loop is the loop to be vectorized. Generate: vec_step = [VF*S, VF*S, VF*S, VF*S] */ - gimple_seq seq = NULL; - if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr))) + else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr))) { expr = build_int_cst (integer_type_node, vf); expr = gimple_build (&seq, FLOAT_EXPR, TREE_TYPE (step_expr), expr); @@ -10323,8 +10343,13 @@ vectorizable_induction (loop_vec_info loop_vinfo, expr, step_expr); if (seq) { - new_bb = gsi_insert_seq_on_edge_immediate (pe, seq); - gcc_assert (!new_bb); + if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo)) + gsi_insert_seq_before (&si, seq, GSI_SAME_STMT); + else + { + new_bb = gsi_insert_seq_on_edge_immediate (pe, seq); + gcc_assert (!new_bb); + } } } @@ -10332,9 +10357,9 @@ vectorizable_induction (loop_vec_info loop_vinfo, gcc_assert (CONSTANT_CLASS_P (new_name) || TREE_CODE (new_name) == SSA_NAME); new_vec = build_vector_from_val (step_vectype, t); - vec_step = vect_init_vector (loop_vinfo, stmt_info, - new_vec, step_vectype, NULL); - + vec_step + = vect_init_vector (loop_vinfo, stmt_info, new_vec, step_vectype, + LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo) ? &si : NULL); /* Create the following def-use cycle: loop prolog: -- 2.36.3