From 74c4d39f823ae94767f135babea45af16b401811 Mon Sep 17 00:00:00 2001 From: Feng Xue Date: Sun, 16 Jun 2024 13:00:32 +0800 Subject: [PATCH 1/3] vect: Determine input vectype for multiple lane-reducing operations The input vectype of reduction PHI statement must be determined before vect cost computation for the reduction. Since lance-reducing operation has different input vectype from normal one, so we need to traverse all reduction statements to find out the input vectype with the least lanes, and set that to the PHI statement. 2024-06-16 Feng Xue gcc/ * tree-vect-loop.cc (vectorizable_reduction): Determine input vectype during traversal of reduction statements. --- gcc/tree-vect-loop.cc | 79 ++++++++++++++++++++++++++++++------------- 1 file changed, 56 insertions(+), 23 deletions(-) diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index 347dac97e49..419f4b08d2b 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -7643,7 +7643,9 @@ vectorizable_reduction (loop_vec_info loop_vinfo, { stmt_vec_info def = loop_vinfo->lookup_def (reduc_def); stmt_vec_info vdef = vect_stmt_to_vectorize (def); - if (STMT_VINFO_REDUC_IDX (vdef) == -1) + int reduc_idx = STMT_VINFO_REDUC_IDX (vdef); + + if (reduc_idx == -1) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -7686,10 +7688,57 @@ vectorizable_reduction (loop_vec_info loop_vinfo, return false; } } - else if (!stmt_info) - /* First non-conversion stmt. */ - stmt_info = vdef; - reduc_def = op.ops[STMT_VINFO_REDUC_IDX (vdef)]; + else + { + /* First non-conversion stmt. */ + if (!stmt_info) + stmt_info = vdef; + + if (lane_reducing_op_p (op.code)) + { + enum vect_def_type dt; + tree vectype_op; + + /* The last operand of lane-reducing operation is for + reduction. */ + gcc_assert (reduc_idx > 0 && reduc_idx == (int) op.num_ops - 1); + + if (!vect_is_simple_use (op.ops[0], loop_vinfo, &dt, &vectype_op)) + return false; + + tree type_op = TREE_TYPE (op.ops[0]); + + if (!vectype_op) + { + vectype_op = get_vectype_for_scalar_type (loop_vinfo, + type_op); + if (!vectype_op) + return false; + } + + /* For lane-reducing operation vectorizable analysis needs the + reduction PHI information */ + STMT_VINFO_REDUC_DEF (def) = phi_info; + + /* Each lane-reducing operation has its own input vectype, while + reduction PHI will record the input vectype with the least + lanes. */ + STMT_VINFO_REDUC_VECTYPE_IN (vdef) = vectype_op; + + /* To accommodate lane-reducing operations of mixed input + vectypes, choose input vectype with the least lanes for the + reduction PHI statement, which would result in the most + ncopies for vectorized reduction results. */ + if (!vectype_in + || (GET_MODE_SIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype_in))) + < GET_MODE_SIZE (SCALAR_TYPE_MODE (type_op)))) + vectype_in = vectype_op; + } + else + vectype_in = STMT_VINFO_VECTYPE (phi_info); + } + + reduc_def = op.ops[reduc_idx]; reduc_chain_length++; if (!stmt_info && slp_node) slp_for_stmt_info = SLP_TREE_CHILDREN (slp_for_stmt_info)[0]; @@ -7747,6 +7796,8 @@ vectorizable_reduction (loop_vec_info loop_vinfo, tree vectype_out = STMT_VINFO_VECTYPE (stmt_info); STMT_VINFO_REDUC_VECTYPE (reduc_info) = vectype_out; + STMT_VINFO_REDUC_VECTYPE_IN (reduc_info) = vectype_in; + gimple_match_op op; if (!gimple_extract_op (stmt_info->stmt, &op)) gcc_unreachable (); @@ -7831,16 +7882,6 @@ vectorizable_reduction (loop_vec_info loop_vinfo, = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op.ops[i]), slp_op[i]); - /* To properly compute ncopies we are interested in the widest - non-reduction input type in case we're looking at a widening - accumulation that we later handle in vect_transform_reduction. */ - if (lane_reducing - && vectype_op[i] - && (!vectype_in - || (GET_MODE_SIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype_in))) - < GET_MODE_SIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype_op[i])))))) - vectype_in = vectype_op[i]; - /* Record how the non-reduction-def value of COND_EXPR is defined. ??? For a chain of multiple CONDs we'd have to match them up all. */ if (op.code == COND_EXPR && reduc_chain_length == 1) @@ -7859,14 +7900,6 @@ vectorizable_reduction (loop_vec_info loop_vinfo, } } } - if (!vectype_in) - vectype_in = STMT_VINFO_VECTYPE (phi_info); - STMT_VINFO_REDUC_VECTYPE_IN (reduc_info) = vectype_in; - - /* Each lane-reducing operation has its own input vectype, while reduction - PHI records the input vectype with least lanes. */ - if (lane_reducing) - STMT_VINFO_REDUC_VECTYPE_IN (stmt_info) = vectype_in; enum vect_reduction_type reduction_type = STMT_VINFO_REDUC_TYPE (phi_info); STMT_VINFO_REDUC_TYPE (reduc_info) = reduction_type; -- 2.17.1