diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c index c1d6e02194b251f7c940784c291d58c754f07454..ebb71948abe4ca27d495a2707254beb27e385a0d 100644 --- a/gcc/tree-vect-loop-manip.c +++ b/gcc/tree-vect-loop-manip.c @@ -1928,6 +1928,15 @@ vect_gen_prolog_loop_niters (loop_vec_info loop_vinfo, return iters_name; } +static bool +maybe_not_zero (tree t) +{ + if (!t) + return false; + if (TREE_CODE (t) != INTEGER_CST) + return true; + return !tree_int_cst_equal (t, build_zero_cst (TREE_TYPE (t))); +} /* Function vect_update_init_of_dr @@ -1954,6 +1963,76 @@ vect_update_init_of_dr (dr_vec_info *dr_info, tree niters, tree_code code) dr_info->offset = offset; } +static void +vect_update_base_of_dr (struct data_reference * dr, + loop_vec_info epilogue_vinfo, iv_update *iv_update) +{ + tree new_base_addr = iv_update->new_base_addr; + edge skip_e = iv_update->skip_edge; + if (skip_e) + { + /* If we have SKIP_E we need to use the phi-node that joins the IV coming + from the main loop and the initial IV. */ + gimple_seq stmts; + tree base_addr = DR_BASE_ADDRESS (dr); + tree type = TREE_TYPE (base_addr); + gphi *new_phi; + + edge e = EDGE_PRED (skip_e->dest, 0); + e = e != skip_e ? e : EDGE_PRED (skip_e->dest, 1); + + base_addr = force_gimple_operand (base_addr, &stmts, true, + NULL_TREE); + gimple_stmt_iterator gsi = gsi_last_bb (skip_e->src); + if (is_gimple_assign (gsi_stmt (gsi)) + || is_gimple_call (gsi_stmt (gsi))) + gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT); + else + gsi_insert_seq_before (&gsi, stmts, GSI_NEW_STMT); + + /* Make sure NEW_BASE_ADDR and the initial base address use the same + type. Not sure why I chose to use DR_BASE_ADDR's type here, probably + makes more sense to use the NEW_BASE_ADDR's type. */ + stmts = NULL; + new_base_addr = fold_convert (type, new_base_addr); + new_base_addr = force_gimple_operand (new_base_addr, &stmts, true, NULL_TREE); + gsi = gsi_last_bb (e->src); + if (is_gimple_assign (gsi_stmt (gsi)) + || is_gimple_call (gsi_stmt (gsi))) + gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT); + else + gsi_insert_seq_before (&gsi, stmts, GSI_NEW_STMT); + + new_phi = create_phi_node (make_ssa_name (type), skip_e->dest); + add_phi_arg (new_phi, new_base_addr, e, UNKNOWN_LOCATION); + add_phi_arg (new_phi, base_addr, skip_e, UNKNOWN_LOCATION); + + new_base_addr = gimple_phi_result (new_phi); + } + else + { + gimple_seq stmts; + class loop *loop = LOOP_VINFO_LOOP (epilogue_vinfo); + tree type = TREE_TYPE (DR_BASE_ADDRESS (dr)); + new_base_addr = fold_convert (type, new_base_addr); + new_base_addr = force_gimple_operand (new_base_addr, &stmts, true, + NULL_TREE); + gimple_stmt_iterator gsi + = gsi_last_bb (loop_preheader_edge (loop)->src); + if (!gsi_stmt (gsi) + || is_gimple_assign (gsi_stmt (gsi)) + || is_gimple_call (gsi_stmt (gsi))) + gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT); + else + gsi_insert_seq_before (&gsi, stmts, GSI_NEW_STMT); + } + DR_BASE_ADDRESS (dr) = new_base_addr; + /* TODO: This will need a different approach for vector_skip path with non-zero init or offset. These are currently disabled. */ + DR_INIT (dr) = build_zero_cst(sizetype); + DR_OFFSET (dr) = build_zero_cst(sizetype); +} + + /* Function vect_update_inits_of_drs @@ -1966,6 +2045,7 @@ vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters, { unsigned int i; vec datarefs = LOOP_VINFO_DATAREFS (loop_vinfo); + loop_vec_info orig_vinfo = LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo); struct data_reference *dr; DUMP_VECT_SCOPE ("vect_update_inits_of_dr"); @@ -1981,7 +2061,23 @@ vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters, { dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr); if (!STMT_VINFO_GATHER_SCATTER_P (dr_info->stmt)) - vect_update_init_of_dr (dr_info, niters, code); + { + iv_update *iv_update = NULL; + if (orig_vinfo) + iv_update + = LOOP_VINFO_IV_UPDATES (orig_vinfo)->get (dr); + /* dont use iv_update if we are using skip_vector, but DR_INIT is not + zero. */ + if (iv_update && iv_update->new_base_addr + && !(iv_update->skip_edge + && (maybe_not_zero (DR_INIT (dr)) + || maybe_not_zero (DR_OFFSET (dr))))) + vect_update_base_of_dr (dr, loop_vinfo, iv_update); + else + /* Advance data_reference's with the number of iterations of the previous + loop and its prologue. */ + vect_update_init_of_dr (dr_info, niters, code); + } } } @@ -2857,6 +2953,9 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, { epilogue_vinfo = loop_vinfo->epilogue_vinfos[0]; loop_vinfo->epilogue_vinfos.ordered_remove (0); + if (!LOOP_VINFO_IV_UPDATES (loop_vinfo)) + LOOP_VINFO_IV_UPDATES (loop_vinfo) + = new hash_map (); } tree niters_vector_mult_vf = NULL_TREE; @@ -3091,6 +3190,18 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, e = (e != guard_e ? e : EDGE_PRED (guard_to, 1)); slpeel_update_phi_nodes_for_guard1 (first_loop, epilog, guard_e, e); + if (epilogue_vinfo) { + struct data_reference *dr; + int i; + vec datarefs = LOOP_VINFO_DATAREFS (epilogue_vinfo); + FOR_EACH_VEC_ELT (datarefs, i, dr) + { + iv_update &iv_update + = LOOP_VINFO_IV_UPDATES (loop_vinfo)->get_or_insert (dr); + iv_update.skip_edge = guard_e; + } + } + /* Simply propagate profile info from guard_bb to guard_to which is a merge point of control flow. */ guard_to->count = guard_bb->count; diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index 3e973e774af8f9205be893e01ad9263281116885..500b0efbf34afa0e9202f6b0fa61e5e291482321 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -846,7 +846,8 @@ _loop_vec_info::_loop_vec_info (class loop *loop_in, vec_info_shared *shared) has_mask_store (false), scalar_loop_scaling (profile_probability::uninitialized ()), scalar_loop (NULL), - orig_loop_info (NULL) + orig_loop_info (NULL), + iv_updates_map(NULL) { /* CHECKME: We want to visit all BBs before their successors (except for latch blocks, for which this assertion wouldn't hold). In the simple @@ -926,6 +927,7 @@ _loop_vec_info::~_loop_vec_info () delete ivexpr_map; delete scan_map; epilogue_vinfos.release (); + delete iv_updates_map; /* When we release an epiloge vinfo that we do not intend to use avoid clearing AUX of the main loop which should continue to @@ -9264,8 +9266,8 @@ update_epilogue_loop_vinfo (class loop *epilogue, tree advance) free (LOOP_VINFO_BBS (epilogue_vinfo)); LOOP_VINFO_BBS (epilogue_vinfo) = epilogue_bbs; - /* Advance data_reference's with the number of iterations of the previous - loop and its prologue. */ + /* Either advance data_reference's with the number of iterations of the previous + loop and its prologue or reuse the previous loop's data_reference update. */ vect_update_inits_of_drs (epilogue_vinfo, advance, PLUS_EXPR); diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 85d3161fe3b2fbe289396457361c7af7519bd2b3..65650d71149abd22bd320135d01411edd38d9cf9 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -7123,7 +7123,39 @@ vectorizable_scan_store (vec_info *vinfo, return true; } +static void +vect_capture_iv_update (struct data_reference *dr, tree after_incr, + tree before_incr, loop_vec_info loop_vinfo) +{ + if (!loop_vinfo || !LOOP_VINFO_IV_UPDATES (loop_vinfo)) + return; + + /* We aren't updating the datareference, so nothing to update the base addr + with. */ + if (after_incr == NULL_TREE) + LOOP_VINFO_IV_UPDATES (loop_vinfo)->remove (dr); + + /* Use PTR_INCR for positive step's as that will contain the next dataref to + be accessed. For negative steps we access the data at step * datasize, so + use BEFORE_INCR as otherwise the epilogue's data access will be off by the + main loops datasize * step. */ + bool neg_step + = TREE_CODE (DR_STEP (dr)) == INTEGER_CST + && tree_int_cst_sgn (DR_STEP (dr)) == -1; + + tree new_base_addr; + if (neg_step) + new_base_addr + = fold_build2 (POINTER_PLUS_EXPR, + TREE_TYPE (before_incr), + before_incr, build_int_cst (sizetype, -1)); + else + new_base_addr = after_incr; + iv_update &iv_update + = LOOP_VINFO_IV_UPDATES (loop_vinfo)->get_or_insert (dr); + iv_update.new_base_addr = new_base_addr; +} /* Function vectorizable_store. Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure) @@ -7151,6 +7183,7 @@ vectorizable_store (vec_info *vinfo, tree dataref_ptr = NULL_TREE; tree dataref_offset = NULL_TREE; gimple *ptr_incr = NULL; + tree after_incr = NULL_TREE; int ncopies; int j; stmt_vec_info first_stmt_info; @@ -8279,6 +8312,11 @@ vectorizable_store (vec_info *vinfo, result_chain.release (); vec_oprnds.release (); + if (ptr_incr) + after_incr = gimple_get_lhs (ptr_incr); + + vect_capture_iv_update (first_dr_info->dr, after_incr, dataref_ptr, loop_vinfo); + return true; } @@ -8420,6 +8458,7 @@ vectorizable_load (vec_info *vinfo, tree dataref_ptr = NULL_TREE; tree dataref_offset = NULL_TREE; gimple *ptr_incr = NULL; + tree after_incr = NULL_TREE; int ncopies; int i, j; unsigned int group_size; @@ -9792,6 +9831,11 @@ vectorizable_load (vec_info *vinfo, } dr_chain.release (); } + + if (ptr_incr) + after_incr = gimple_get_lhs (ptr_incr); + vect_capture_iv_update (first_dr_info->dr, after_incr, dataref_ptr, loop_vinfo); + if (!slp) *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0]; diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index b861c97ab3aef179ba9b2900701cf09e75a847a5..95cb806f715d3f5b1389b2e56a026e07709da990 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -545,6 +545,12 @@ typedef auto_vec vec_loop_lens; typedef auto_vec > drs_init_vec; +typedef struct +{ + edge skip_edge; + tree new_base_addr; +} iv_update; + /*-----------------------------------------------------------------*/ /* Info on vectorized loops. */ /*-----------------------------------------------------------------*/ @@ -752,6 +758,8 @@ public: analysis. */ vec<_loop_vec_info *> epilogue_vinfos; + hash_map *iv_updates_map; + } *loop_vec_info; /* Access Functions. */ @@ -807,6 +815,7 @@ public: #define LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST(L) (L)->single_scalar_iteration_cost #define LOOP_VINFO_ORIG_LOOP_INFO(L) (L)->orig_loop_info #define LOOP_VINFO_SIMD_IF_COND(L) (L)->simd_if_cond +#define LOOP_VINFO_IV_UPDATES(L) (L)->iv_updates_map #define LOOP_VINFO_FULLY_MASKED_P(L) \ (LOOP_VINFO_USING_PARTIAL_VECTORS_P (L) \