The input vectype of reduction PHI statement must be determined before vect cost computation for the reduction. Since lance-reducing operation has different input vectype from normal one, so we need to traverse all reduction statements to find out the input vectype with the least lanes, and set that to the PHI statement. Thanks, Feng --- gcc/ * tree-vect-loop.cc (vectorizable_reduction): Determine input vectype during traversal of reduction statements. --- gcc/tree-vect-loop.cc | 72 +++++++++++++++++++++++++++++-------------- 1 file changed, 49 insertions(+), 23 deletions(-) diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index 0f7b125e72d..39aa5cb1197 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -7643,7 +7643,9 @@ vectorizable_reduction (loop_vec_info loop_vinfo, { stmt_vec_info def = loop_vinfo->lookup_def (reduc_def); stmt_vec_info vdef = vect_stmt_to_vectorize (def); - if (STMT_VINFO_REDUC_IDX (vdef) == -1) + int reduc_idx = STMT_VINFO_REDUC_IDX (vdef); + + if (reduc_idx == -1) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -7686,10 +7688,50 @@ vectorizable_reduction (loop_vec_info loop_vinfo, return false; } } - else if (!stmt_info) - /* First non-conversion stmt. */ - stmt_info = vdef; - reduc_def = op.ops[STMT_VINFO_REDUC_IDX (vdef)]; + else + { + /* First non-conversion stmt. */ + if (!stmt_info) + stmt_info = vdef; + + if (lane_reducing_op_p (op.code)) + { + unsigned group_size = slp_node ? SLP_TREE_LANES (slp_node) : 0; + tree op_type = TREE_TYPE (op.ops[0]); + tree new_vectype_in = get_vectype_for_scalar_type (loop_vinfo, + op_type, + group_size); + + /* The last operand of lane-reducing operation is for + reduction. */ + gcc_assert (reduc_idx > 0 && reduc_idx == (int) op.num_ops - 1); + + /* For lane-reducing operation vectorizable analysis needs the + reduction PHI information */ + STMT_VINFO_REDUC_DEF (def) = phi_info; + + if (!new_vectype_in) + return false; + + /* Each lane-reducing operation has its own input vectype, while + reduction PHI will record the input vectype with the least + lanes. */ + STMT_VINFO_REDUC_VECTYPE_IN (vdef) = new_vectype_in; + + /* To accommodate lane-reducing operations of mixed input + vectypes, choose input vectype with the least lanes for the + reduction PHI statement, which would result in the most + ncopies for vectorized reduction results. */ + if (!vectype_in + || (GET_MODE_SIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype_in))) + < GET_MODE_SIZE (SCALAR_TYPE_MODE (op_type)))) + vectype_in = new_vectype_in; + } + else + vectype_in = STMT_VINFO_VECTYPE (phi_info); + } + + reduc_def = op.ops[reduc_idx]; reduc_chain_length++; if (!stmt_info && slp_node) slp_for_stmt_info = SLP_TREE_CHILDREN (slp_for_stmt_info)[0]; @@ -7747,6 +7789,8 @@ vectorizable_reduction (loop_vec_info loop_vinfo, tree vectype_out = STMT_VINFO_VECTYPE (stmt_info); STMT_VINFO_REDUC_VECTYPE (reduc_info) = vectype_out; + STMT_VINFO_REDUC_VECTYPE_IN (reduc_info) = vectype_in; + gimple_match_op op; if (!gimple_extract_op (stmt_info->stmt, &op)) gcc_unreachable (); @@ -7831,16 +7875,6 @@ vectorizable_reduction (loop_vec_info loop_vinfo, = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op.ops[i]), slp_op[i]); - /* To properly compute ncopies we are interested in the widest - non-reduction input type in case we're looking at a widening - accumulation that we later handle in vect_transform_reduction. */ - if (lane_reducing - && vectype_op[i] - && (!vectype_in - || (GET_MODE_SIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype_in))) - < GET_MODE_SIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype_op[i])))))) - vectype_in = vectype_op[i]; - /* Record how the non-reduction-def value of COND_EXPR is defined. ??? For a chain of multiple CONDs we'd have to match them up all. */ if (op.code == COND_EXPR && reduc_chain_length == 1) @@ -7859,14 +7893,6 @@ vectorizable_reduction (loop_vec_info loop_vinfo, } } } - if (!vectype_in) - vectype_in = STMT_VINFO_VECTYPE (phi_info); - STMT_VINFO_REDUC_VECTYPE_IN (reduc_info) = vectype_in; - - /* Each lane-reducing operation has its own input vectype, while reduction - PHI records the input vectype with least lanes. */ - if (lane_reducing) - STMT_VINFO_REDUC_VECTYPE_IN (stmt_info) = vectype_in; enum vect_reduction_type reduction_type = STMT_VINFO_REDUC_TYPE (phi_info); STMT_VINFO_REDUC_TYPE (reduc_info) = reduction_type; -- 2.17.1