> > > > > > Hmm, but we're visiting them then? I wonder how you get along > > > without doing adjustmens on the uses if you consider > > > > > > _1 = a < b; > > > _2 = c != d; > > > _3 = _1 | _2; > > > if (_3 != 0) > > > exit loop; > > > > > > thus a combined condition like > > > > > > if (a < b || c != d) > > > > > > that we if-converted. We need to recognize that _1, _2 and _3 have > > > mask uses and thus possibly adjust them. > > > > > > What bad happens if you drop 'analyze_only'? We're not really > > > rewriting anything there. > > > > You mean drop it only in the above? We then fail to update the type for > > the gcond. So in certain circumstances like with > > > > int a, c, d; > > short b; > > > > int > > main () > > { > > int e[1]; > > for (; b < 2; b++) > > { > > a = 0; > > if (b == 28378) > > a = e[b]; > > if (!(d || b)) > > for (; c;) > > ; > > } > > return 0; > > } > > > > Unless we walk the statements regardless of whether they come from inside the > loop or not. > > What do you mean by "fail to update the type for the gcond"? If > I understood correctly the 'analyze_only' short-cuts some > checks, it doens't add some? > > But it's hard to follow what's actually done for a gcond ... > Yes so I had realized I had misunderstood what this pattern was doing and once I had made the first wrong change it snowballed. This is an updates patch where the only modification made is to check_bool_pattern to also return the type of the overall expression even if we are going to handle the conditional through an optab expansion. I'm piggybacking on the fact that this function has seen enough of the operands to be able to tell the precision needed when vectorizing. This is needed because in the cases where the condition to the gcond was already a bool The precision would be 1 bit, to find the actual mask since we have to dig through the operands which this function already does. Bootstrapped Regtested on aarch64-none-linux-gnu, x86_64-pc-linux-gnu and no issues. Ok for master? Thanks, Tamar gcc/ChangeLog: * tree-vect-patterns.cc (vect_init_pattern_stmt): Support gconds. (check_bool_pattern, vect_recog_bool_pattern): Support gconds type analysis. * tree-vect-stmts.cc (vectorizable_comparison_1): Support stmts without lhs. (vectorizable_early_exit): New. (vect_analyze_stmt, vect_transform_stmt): Use it. (vect_is_simple_use, vect_get_vector_types_for_stmt): Support gcond. --- inline copy of patch --- diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc index 7debe7f0731673cd1bf25cd39d55e23990a73d0e..6bf1c0aba8ce94f70ce4e952efd1c5695b189690 100644 --- a/gcc/tree-vect-patterns.cc +++ b/gcc/tree-vect-patterns.cc @@ -132,6 +132,7 @@ vect_init_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt, if (!STMT_VINFO_VECTYPE (pattern_stmt_info)) { gcc_assert (!vectype + || is_a (pattern_stmt) || (VECTOR_BOOLEAN_TYPE_P (vectype) == vect_use_mask_type_p (orig_stmt_info))); STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype; @@ -5210,10 +5211,12 @@ vect_recog_mixed_size_cond_pattern (vec_info *vinfo, true if bool VAR can and should be optimized that way. Assume it shouldn't in case it's a result of a comparison which can be directly vectorized into a vector comparison. Fills in STMTS with all stmts visited during the - walk. */ + walk. if VECTYPE then this value will contain the common type of the + operations making up the comparisons. */ static bool -check_bool_pattern (tree var, vec_info *vinfo, hash_set &stmts) +check_bool_pattern (tree var, vec_info *vinfo, hash_set &stmts, + tree *vectype) { tree rhs1; enum tree_code rhs_code; @@ -5234,27 +5237,28 @@ check_bool_pattern (tree var, vec_info *vinfo, hash_set &stmts) switch (rhs_code) { case SSA_NAME: - if (! check_bool_pattern (rhs1, vinfo, stmts)) + if (! check_bool_pattern (rhs1, vinfo, stmts, vectype)) return false; break; CASE_CONVERT: if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1))) return false; - if (! check_bool_pattern (rhs1, vinfo, stmts)) + if (! check_bool_pattern (rhs1, vinfo, stmts, vectype)) return false; break; case BIT_NOT_EXPR: - if (! check_bool_pattern (rhs1, vinfo, stmts)) + if (! check_bool_pattern (rhs1, vinfo, stmts, vectype)) return false; break; case BIT_AND_EXPR: case BIT_IOR_EXPR: case BIT_XOR_EXPR: - if (! check_bool_pattern (rhs1, vinfo, stmts) - || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts)) + if (! check_bool_pattern (rhs1, vinfo, stmts, vectype) + || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts, + vectype)) return false; break; @@ -5272,6 +5276,8 @@ check_bool_pattern (tree var, vec_info *vinfo, hash_set &stmts) if (comp_vectype == NULL_TREE) return false; + if (vectype) + *vectype = comp_vectype; tree mask_type = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (rhs1)); if (mask_type @@ -5608,13 +5614,28 @@ vect_recog_bool_pattern (vec_info *vinfo, enum tree_code rhs_code; tree var, lhs, rhs, vectype; gimple *pattern_stmt; - - if (!is_gimple_assign (last_stmt)) + gcond* cond = NULL; + if (!is_gimple_assign (last_stmt) + && !(cond = dyn_cast (last_stmt))) return NULL; - var = gimple_assign_rhs1 (last_stmt); - lhs = gimple_assign_lhs (last_stmt); - rhs_code = gimple_assign_rhs_code (last_stmt); + loop_vec_info loop_vinfo = dyn_cast (vinfo); + if (is_gimple_assign (last_stmt)) + { + var = gimple_assign_rhs1 (last_stmt); + lhs = gimple_assign_lhs (last_stmt); + rhs_code = gimple_assign_rhs_code (last_stmt); + } + else if (loop_vinfo && LOOP_VINFO_EARLY_BREAKS (loop_vinfo)) + { + /* If not multiple exits, and loop vectorization don't bother analyzing + the gcond as we don't support SLP today. */ + lhs = gimple_cond_lhs (last_stmt); + var = gimple_cond_lhs (last_stmt); + rhs_code = gimple_cond_code (last_stmt); + } + else + return NULL; if (rhs_code == VIEW_CONVERT_EXPR) var = TREE_OPERAND (var, 0); @@ -5632,7 +5653,7 @@ vect_recog_bool_pattern (vec_info *vinfo, return NULL; vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs)); - if (check_bool_pattern (var, vinfo, bool_stmts)) + if (check_bool_pattern (var, vinfo, bool_stmts, NULL)) { rhs = adjust_bool_stmts (vinfo, bool_stmts, TREE_TYPE (lhs), stmt_vinfo); @@ -5680,7 +5701,7 @@ vect_recog_bool_pattern (vec_info *vinfo, return pattern_stmt; } - else if (rhs_code == COND_EXPR + else if ((rhs_code == COND_EXPR || cond) && TREE_CODE (var) == SSA_NAME) { vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs)); @@ -5700,18 +5721,33 @@ vect_recog_bool_pattern (vec_info *vinfo, if (get_vectype_for_scalar_type (vinfo, type) == NULL_TREE) return NULL; - if (check_bool_pattern (var, vinfo, bool_stmts)) + tree comp_type = NULL_TREE; + if (check_bool_pattern (var, vinfo, bool_stmts, &comp_type)) var = adjust_bool_stmts (vinfo, bool_stmts, type, stmt_vinfo); - else if (integer_type_for_mask (var, vinfo)) + else if (!cond && integer_type_for_mask (var, vinfo)) + return NULL; + else if (cond && !comp_type) return NULL; - lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL); - pattern_stmt - = gimple_build_assign (lhs, COND_EXPR, - build2 (NE_EXPR, boolean_type_node, - var, build_int_cst (TREE_TYPE (var), 0)), - gimple_assign_rhs2 (last_stmt), - gimple_assign_rhs3 (last_stmt)); + if (!cond) + { + lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL); + pattern_stmt + = gimple_build_assign (lhs, COND_EXPR, + build2 (NE_EXPR, boolean_type_node, var, + build_int_cst (TREE_TYPE (var), 0)), + gimple_assign_rhs2 (last_stmt), + gimple_assign_rhs3 (last_stmt)); + } + else + { + pattern_stmt + = gimple_build_cond (NE_EXPR, + var, build_int_cst (TREE_TYPE (var), 0), + gimple_cond_true_label (cond), + gimple_cond_false_label (cond)); + vectype = truth_type_for (comp_type); + } *type_out = vectype; vect_pattern_detected ("vect_recog_bool_pattern", last_stmt); @@ -5725,7 +5761,7 @@ vect_recog_bool_pattern (vec_info *vinfo, if (!vectype || !VECTOR_MODE_P (TYPE_MODE (vectype))) return NULL; - if (check_bool_pattern (var, vinfo, bool_stmts)) + if (check_bool_pattern (var, vinfo, bool_stmts, NULL)) rhs = adjust_bool_stmts (vinfo, bool_stmts, TREE_TYPE (vectype), stmt_vinfo); else diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index 582c5e678fad802d6e76300fe3c939b9f2978f17..d0878250f6fb9de4d6e6a39d16956ca147be4b80 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -12489,7 +12489,7 @@ vectorizable_comparison_1 (vec_info *vinfo, tree vectype, vec vec_oprnds0 = vNULL; vec vec_oprnds1 = vNULL; tree mask_type; - tree mask; + tree mask = NULL_TREE; if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) return false; @@ -12629,8 +12629,9 @@ vectorizable_comparison_1 (vec_info *vinfo, tree vectype, /* Transform. */ /* Handle def. */ - lhs = gimple_assign_lhs (STMT_VINFO_STMT (stmt_info)); - mask = vect_create_destination_var (lhs, mask_type); + lhs = gimple_get_lhs (STMT_VINFO_STMT (stmt_info)); + if (lhs) + mask = vect_create_destination_var (lhs, mask_type); vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies, rhs1, &vec_oprnds0, vectype, @@ -12644,7 +12645,10 @@ vectorizable_comparison_1 (vec_info *vinfo, tree vectype, gimple *new_stmt; vec_rhs2 = vec_oprnds1[i]; - new_temp = make_ssa_name (mask); + if (lhs) + new_temp = make_ssa_name (mask); + else + new_temp = make_temp_ssa_name (mask_type, NULL, "cmp"); if (bitop1 == NOP_EXPR) { new_stmt = gimple_build_assign (new_temp, code, @@ -12723,6 +12727,198 @@ vectorizable_comparison (vec_info *vinfo, return true; } +/* Check to see if the current early break given in STMT_INFO is valid for + vectorization. */ + +static bool +vectorizable_early_exit (vec_info *vinfo, stmt_vec_info stmt_info, + gimple_stmt_iterator *gsi, gimple **vec_stmt, + slp_tree slp_node, stmt_vector_for_cost *cost_vec) +{ + loop_vec_info loop_vinfo = dyn_cast (vinfo); + if (!loop_vinfo + || !is_a (STMT_VINFO_STMT (stmt_info))) + return false; + + if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_condition_def) + return false; + + if (!STMT_VINFO_RELEVANT_P (stmt_info)) + return false; + + auto code = gimple_cond_code (STMT_VINFO_STMT (stmt_info)); + tree vectype = STMT_VINFO_VECTYPE (stmt_info); + gcc_assert (vectype); + + tree vectype_op0 = NULL_TREE; + slp_tree slp_op0; + tree op0; + enum vect_def_type dt0; + if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &op0, &slp_op0, &dt0, + &vectype_op0)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "use not simple.\n"); + return false; + } + + machine_mode mode = TYPE_MODE (vectype); + int ncopies; + + if (slp_node) + ncopies = 1; + else + ncopies = vect_get_num_copies (loop_vinfo, vectype); + + vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo); + bool masked_loop_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo); + + /* Analyze only. */ + if (!vec_stmt) + { + if (direct_optab_handler (cbranch_optab, mode) == CODE_FOR_nothing) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "can't vectorize early exit because the " + "target doesn't support flag setting vector " + "comparisons.\n"); + return false; + } + + if (ncopies > 1 + && direct_optab_handler (ior_optab, mode) == CODE_FOR_nothing) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "can't vectorize early exit because the " + "target does not support boolean vector OR for " + "type %T.\n", vectype); + return false; + } + + if (!vectorizable_comparison_1 (vinfo, vectype, stmt_info, code, gsi, + vec_stmt, slp_node, cost_vec)) + return false; + + if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)) + { + if (direct_internal_fn_supported_p (IFN_VCOND_MASK_LEN, vectype, + OPTIMIZE_FOR_SPEED)) + return false; + else + vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, NULL); + } + + + return true; + } + + /* Tranform. */ + + tree new_temp = NULL_TREE; + gimple *new_stmt = NULL; + + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, "transform early-exit.\n"); + + if (!vectorizable_comparison_1 (vinfo, vectype, stmt_info, code, gsi, + vec_stmt, slp_node, cost_vec)) + gcc_unreachable (); + + gimple *stmt = STMT_VINFO_STMT (stmt_info); + basic_block cond_bb = gimple_bb (stmt); + gimple_stmt_iterator cond_gsi = gsi_last_bb (cond_bb); + + auto_vec stmts; + + tree mask = NULL_TREE; + if (masked_loop_p) + mask = vect_get_loop_mask (loop_vinfo, gsi, masks, ncopies, vectype, 0); + + if (slp_node) + stmts.safe_splice (SLP_TREE_VEC_DEFS (slp_node)); + else + { + auto vec_stmts = STMT_VINFO_VEC_STMTS (stmt_info); + stmts.reserve_exact (vec_stmts.length ()); + for (auto stmt : vec_stmts) + stmts.quick_push (gimple_assign_lhs (stmt)); + } + + /* Determine if we need to reduce the final value. */ + if (stmts.length () > 1) + { + /* We build the reductions in a way to maintain as much parallelism as + possible. */ + auto_vec workset (stmts.length ()); + + /* Mask the statements as we queue them up. */ + if (masked_loop_p) + for (auto stmt : stmts) + workset.quick_push (prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), + mask, stmt, &cond_gsi)); + else + workset.splice (stmts); + + while (workset.length () > 1) + { + new_temp = make_temp_ssa_name (vectype, NULL, "vexit_reduc"); + tree arg0 = workset.pop (); + tree arg1 = workset.pop (); + new_stmt = gimple_build_assign (new_temp, BIT_IOR_EXPR, arg0, arg1); + vect_finish_stmt_generation (loop_vinfo, stmt_info, new_stmt, + &cond_gsi); + workset.quick_insert (0, new_temp); + } + } + else + new_temp = stmts[0]; + + gcc_assert (new_temp); + + tree cond = new_temp; + /* If we have multiple statements after reduction we should check all the + lanes and treat it as a full vector. */ + if (masked_loop_p) + cond = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask, cond, + &cond_gsi); + + /* Now build the new conditional. Pattern gimple_conds get dropped during + codegen so we must replace the original insn. */ + stmt = STMT_VINFO_STMT (vect_orig_stmt (stmt_info)); + gcond *cond_stmt = as_a (stmt); + /* When vectorizing we assume that if the branch edge is taken that we're + exiting the loop. This is not however always the case as the compiler will + rewrite conditions to always be a comparison against 0. To do this it + sometimes flips the edges. This is fine for scalar, but for vector we + then have to flip the test, as we're still assuming that if you take the + branch edge that we found the exit condition. */ + auto new_code = NE_EXPR; + tree cst = build_zero_cst (vectype); + if (flow_bb_inside_loop_p (LOOP_VINFO_LOOP (loop_vinfo), + BRANCH_EDGE (gimple_bb (cond_stmt))->dest)) + { + new_code = EQ_EXPR; + cst = build_minus_one_cst (vectype); + } + + gimple_cond_set_condition (cond_stmt, new_code, cond, cst); + update_stmt (stmt); + + if (slp_node) + SLP_TREE_VEC_DEFS (slp_node).truncate (0); + else + STMT_VINFO_VEC_STMTS (stmt_info).truncate (0); + + + if (!slp_node) + *vec_stmt = stmt; + + return true; +} + /* If SLP_NODE is nonnull, return true if vectorizable_live_operation can handle all live statements in the node. Otherwise return true if STMT_INFO is not live or if vectorizable_live_operation can handle it. @@ -12949,7 +13145,9 @@ vect_analyze_stmt (vec_info *vinfo, || vectorizable_lc_phi (as_a (vinfo), stmt_info, NULL, node) || vectorizable_recurr (as_a (vinfo), - stmt_info, NULL, node, cost_vec)); + stmt_info, NULL, node, cost_vec) + || vectorizable_early_exit (vinfo, stmt_info, NULL, NULL, node, + cost_vec)); else { if (bb_vinfo) @@ -12972,7 +13170,10 @@ vect_analyze_stmt (vec_info *vinfo, NULL, NULL, node, cost_vec) || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node, cost_vec) - || vectorizable_phi (vinfo, stmt_info, NULL, node, cost_vec)); + || vectorizable_phi (vinfo, stmt_info, NULL, node, cost_vec) + || vectorizable_early_exit (vinfo, stmt_info, NULL, NULL, node, + cost_vec)); + } if (node) @@ -13131,6 +13332,12 @@ vect_transform_stmt (vec_info *vinfo, gcc_assert (done); break; + case loop_exit_ctrl_vec_info_type: + done = vectorizable_early_exit (vinfo, stmt_info, gsi, &vec_stmt, + slp_node, NULL); + gcc_assert (done); + break; + default: if (!STMT_VINFO_LIVE_P (stmt_info)) { @@ -14321,10 +14528,19 @@ vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info, } else { + gcond *cond = NULL; if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info)) scalar_type = TREE_TYPE (DR_REF (dr)); else if (gimple_call_internal_p (stmt, IFN_MASK_STORE)) scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3)); + else if ((cond = dyn_cast (stmt))) + { + /* We can't convert the scalar type to boolean yet, since booleans have a + single bit precision and we need the vector boolean to be a + representation of the integer mask. So set the correct integer type and + convert to boolean vector once we have a vectype. */ + scalar_type = TREE_TYPE (gimple_cond_lhs (cond)); + } else scalar_type = TREE_TYPE (gimple_get_lhs (stmt)); @@ -14339,12 +14555,18 @@ vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info, "get vectype for scalar type: %T\n", scalar_type); } vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size); + if (!vectype) return opt_result::failure_at (stmt, "not vectorized:" " unsupported data-type %T\n", scalar_type); + /* If we were a gcond, convert the resulting type to a vector boolean type now + that we have the correct integer mask type. */ + if (cond) + vectype = truth_type_for (vectype); + if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype); }