Index: tree-vectorizer.c =================================================================== --- tree-vectorizer.c (revision 127394) +++ tree-vectorizer.c (working copy) @@ -146,20 +146,6 @@ along with GCC; see the file COPYING3. #include "tree-vectorizer.h" #include "tree-pass.h" -/************************************************************************* - Simple Loop Peeling Utilities - *************************************************************************/ -static void slpeel_update_phis_for_duplicate_loop - (struct loop *, struct loop *, bool after); -static void slpeel_update_phi_nodes_for_guard1 - (edge, struct loop *, bool, basic_block *, bitmap *); -static void slpeel_update_phi_nodes_for_guard2 - (edge, struct loop *, bool, basic_block *); -static edge slpeel_add_loop_guard (basic_block, tree, basic_block, basic_block); - -static void rename_use_op (use_operand_p); -static void rename_variables_in_bb (basic_block); -static void rename_variables_in_loop (struct loop *); /************************************************************************* General Vectorization Utilities Index: tree-vectorizer.h =================================================================== --- tree-vectorizer.h (revision 127394) +++ tree-vectorizer.h (working copy) @@ -304,6 +304,7 @@ typedef struct _stmt_vec_info { #define STMT_VINFO_DR_GROUP_GAP(S) (S)->gap #define STMT_VINFO_DR_GROUP_SAME_DR_STMT(S)(S)->same_dr_stmt #define STMT_VINFO_DR_GROUP_READ_WRITE_DEPENDENCE(S) (S)->read_write_dep +#define STMT_VINFO_STRIDED_ACCESS(S) ((S)->first_dr != NULL) #define DR_GROUP_FIRST_DR(S) (S)->first_dr #define DR_GROUP_NEXT_DR(S) (S)->next_dr Index: tree-vect-analyze.c =================================================================== --- tree-vect-analyze.c (revision 127394) +++ tree-vect-analyze.c (working copy) @@ -40,29 +40,6 @@ along with GCC; see the file COPYING3. #include "tree-vectorizer.h" #include "toplev.h" -/* Main analysis functions. */ -static loop_vec_info vect_analyze_loop_form (struct loop *); -static bool vect_analyze_data_refs (loop_vec_info); -static bool vect_mark_stmts_to_be_vectorized (loop_vec_info); -static void vect_analyze_scalar_cycles (loop_vec_info); -static bool vect_analyze_data_ref_accesses (loop_vec_info); -static bool vect_analyze_data_ref_dependences (loop_vec_info); -static bool vect_analyze_data_refs_alignment (loop_vec_info); -static bool vect_compute_data_refs_alignment (loop_vec_info); -static bool vect_enhance_data_refs_alignment (loop_vec_info); -static bool vect_analyze_operations (loop_vec_info); -static bool vect_determine_vectorization_factor (loop_vec_info); - -/* Utility functions for the analyses. */ -static bool exist_non_indexing_operands_for_use_p (tree, tree); -static tree vect_get_loop_niters (struct loop *, tree *); -static bool vect_analyze_data_ref_dependence - (struct data_dependence_relation *, loop_vec_info); -static bool vect_compute_data_ref_alignment (struct data_reference *); -static bool vect_analyze_data_ref_access (struct data_reference *); -static bool vect_can_advance_ivs_p (loop_vec_info); -static void vect_update_misalignment_for_peel - (struct data_reference *, struct data_reference *, int npeel); /* Function vect_determine_vectorization_factor @@ -283,6 +260,95 @@ vect_determine_vectorization_factor (loo } +/* Function vect_can_advance_ivs_p + + In case the number of iterations that LOOP iterates is unknown at compile + time, an epilog loop will be generated, and the loop induction variables + (IVs) will be "advanced" to the value they are supposed to take just before + the epilog loop. Here we check that the access function of the loop IVs + and the expression that represents the loop bound are simple enough. + These restrictions will be relaxed in the future. */ + +static bool +vect_can_advance_ivs_p (loop_vec_info loop_vinfo) +{ + struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); + basic_block bb = loop->header; + tree phi; + + /* Analyze phi functions of the loop header. */ + + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "vect_can_advance_ivs_p:"); + + for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi)) + { + tree access_fn = NULL; + tree evolution_part; + + if (vect_print_dump_info (REPORT_DETAILS)) + { + fprintf (vect_dump, "Analyze phi: "); + print_generic_expr (vect_dump, phi, TDF_SLIM); + } + + /* Skip virtual phi's. The data dependences that are associated with + virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */ + + if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi)))) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "virtual phi. skip."); + continue; + } + + /* Skip reduction phis. */ + + if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "reduc phi. skip."); + continue; + } + + /* Analyze the evolution function. */ + + access_fn = instantiate_parameters + (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi))); + + if (!access_fn) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "No Access function."); + return false; + } + + if (vect_print_dump_info (REPORT_DETAILS)) + { + fprintf (vect_dump, "Access function of PHI: "); + print_generic_expr (vect_dump, access_fn, TDF_SLIM); + } + + evolution_part = evolution_part_in_loop_num (access_fn, loop->num); + + if (evolution_part == NULL_TREE) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "No evolution."); + return false; + } + + /* FORNOW: We do not transform initial conditions of IVs + which evolution functions are a polynomial of degree >= 2. */ + + if (tree_is_chrec (evolution_part)) + return false; + } + + return true; +} + + /* Function vect_analyze_operations. Scan the loop stmts and make sure they are all vectorizable. */ @@ -1299,9 +1365,9 @@ vect_update_misalignment_for_peel (struc /* For interleaved data accesses the step in the loop must be multiplied by the size of the interleaving group. */ - if (DR_GROUP_FIRST_DR (stmt_info)) + if (STMT_VINFO_STRIDED_ACCESS (stmt_info)) dr_size *= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info))); - if (DR_GROUP_FIRST_DR (peel_stmt_info)) + if (STMT_VINFO_STRIDED_ACCESS (peel_stmt_info)) dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info); /* It can be assumed that the data refs with the same alignment as dr_peel @@ -1353,7 +1419,7 @@ vect_verify_datarefs_alignment (loop_vec stmt_vec_info stmt_info = vinfo_for_stmt (stmt); /* For interleaving, only the alignment of the first access matters. */ - if (DR_GROUP_FIRST_DR (stmt_info) + if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && DR_GROUP_FIRST_DR (stmt_info) != stmt) continue; @@ -1391,7 +1457,7 @@ vector_alignment_reachable_p (struct dat stmt_vec_info stmt_info = vinfo_for_stmt (stmt); tree vectype = STMT_VINFO_VECTYPE (stmt_info); - if (DR_GROUP_FIRST_DR (stmt_info)) + if (STMT_VINFO_STRIDED_ACCESS (stmt_info)) { /* For interleaved access we peel only if number of iterations in the prolog loop ({VF - misalignment}), is a multiple of the @@ -1604,7 +1670,7 @@ vect_enhance_data_refs_alignment (loop_v /* For interleaving, only the alignment of the first access matters. */ - if (DR_GROUP_FIRST_DR (stmt_info) + if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && DR_GROUP_FIRST_DR (stmt_info) != stmt) continue; @@ -1648,7 +1714,7 @@ vect_enhance_data_refs_alignment (loop_v members of the group, therefore we divide the number of iterations by the group size. */ stmt_info = vinfo_for_stmt (DR_STMT (dr0)); - if (DR_GROUP_FIRST_DR (stmt_info)) + if (STMT_VINFO_STRIDED_ACCESS (stmt_info)) npeel /= DR_GROUP_SIZE (stmt_info); if (vect_print_dump_info (REPORT_DETAILS)) @@ -1667,7 +1733,7 @@ vect_enhance_data_refs_alignment (loop_v stmt_info = vinfo_for_stmt (stmt); /* For interleaving, only the alignment of the first access matters. */ - if (DR_GROUP_FIRST_DR (stmt_info) + if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && DR_GROUP_FIRST_DR (stmt_info) != stmt) continue; @@ -1734,7 +1800,7 @@ vect_enhance_data_refs_alignment (loop_v /* For interleaving, only the alignment of the first access matters. */ if (aligned_access_p (dr) - || (DR_GROUP_FIRST_DR (stmt_info) + || (STMT_VINFO_STRIDED_ACCESS (stmt_info) && DR_GROUP_FIRST_DR (stmt_info) != stmt)) continue; @@ -1846,65 +1912,51 @@ vect_analyze_data_refs_alignment (loop_v } -/* Function vect_analyze_data_ref_access. - - Analyze the access pattern of the data-reference DR. For now, a data access - has to be consecutive to be considered vectorizable. */ +/* Analyze groups of strided accesses: check that DR belongs to a group of + strided accesses of legal size, step, etc. Detect gaps, single element + interleaving, and other special cases. Set strided access info. */ static bool -vect_analyze_data_ref_access (struct data_reference *dr) +vect_analyze_group_access (struct data_reference *dr) { tree step = DR_STEP (dr); - HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step); tree scalar_type = TREE_TYPE (DR_REF (dr)); HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type)); tree stmt = DR_STMT (dr); - /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the - interleaving group (including gaps). */ - HOST_WIDE_INT stride = dr_step / type_size; - - if (!step) - { - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "bad data-ref access"); - return false; - } + HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step); + HOST_WIDE_INT stride; - /* Consecutive? */ - if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))) - { - /* Mark that it is not interleaving. */ - DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL_TREE; - return true; - } + /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the + interleaving group (including gaps). */ + stride = dr_step / type_size; /* Not consecutive access is possible only if it is a part of interleaving. */ - if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt))) + if (!STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt))) { /* Check if it this DR is a part of interleaving, and is a single - element of the group that is accessed in the loop. */ - + element of the group that is accessed in the loop. */ + /* Gaps are supported only for loads. STEP must be a multiple of the type - size. The size of the group must be a power of 2. */ + size. The size of the group must be a power of 2. */ if (DR_IS_READ (dr) - && (dr_step % type_size) == 0 - && stride > 0 - && exact_log2 (stride) != -1) - { - DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = stmt; - DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride; - if (vect_print_dump_info (REPORT_DR_DETAILS)) - { - fprintf (vect_dump, "Detected single element interleaving %d ", - DR_GROUP_SIZE (vinfo_for_stmt (stmt))); - print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM); - fprintf (vect_dump, " step "); - print_generic_expr (vect_dump, step, TDF_SLIM); - } - return true; - } + && (dr_step % type_size) == 0 + && stride > 0 + && exact_log2 (stride) != -1) + { + DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = stmt; + DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride; + if (vect_print_dump_info (REPORT_DR_DETAILS)) + { + fprintf (vect_dump, "Detected single element interleaving %d ", + DR_GROUP_SIZE (vinfo_for_stmt (stmt))); + print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM); + fprintf (vect_dump, " step "); + print_generic_expr (vect_dump, step, TDF_SLIM); + } + return true; + } if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "not consecutive access"); + fprintf (vect_dump, "not consecutive access"); return false; } @@ -1920,99 +1972,99 @@ vect_analyze_data_ref_access (struct dat HOST_WIDE_INT diff, count_in_bytes; while (next) - { - /* Skip same data-refs. In case that two or more stmts share data-ref - (supported only for loads), we vectorize only the first stmt, and - the rest get their vectorized loads from the first one. */ - if (!tree_int_cst_compare (DR_INIT (data_ref), - DR_INIT (STMT_VINFO_DATA_REF ( - vinfo_for_stmt (next))))) - { + { + /* Skip same data-refs. In case that two or more stmts share data-ref + (supported only for loads), we vectorize only the first stmt, and + the rest get their vectorized loads from the first one. */ + if (!tree_int_cst_compare (DR_INIT (data_ref), + DR_INIT (STMT_VINFO_DATA_REF ( + vinfo_for_stmt (next))))) + { if (!DR_IS_READ (data_ref)) - { + { if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "Two store stmts share the same dr."); - return false; + return false; } - /* Check that there is no load-store dependencies for this loads + /* Check that there is no load-store dependencies for this loads to prevent a case of load-store-load to the same location. */ if (DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next)) || DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev))) { if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, + fprintf (vect_dump, "READ_WRITE dependence in interleaving."); return false; } - /* For load use the same data-ref load. */ - DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev; + /* For load use the same data-ref load. */ + DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev; - prev = next; - next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next)); - continue; - } - prev = next; + prev = next; + next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next)); + continue; + } + prev = next; - /* Check that all the accesses have the same STEP. */ - next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next))); - if (tree_int_cst_compare (step, next_step)) - { - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "not consecutive access in interleaving"); - return false; - } + /* Check that all the accesses have the same STEP. */ + next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next))); + if (tree_int_cst_compare (step, next_step)) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "not consecutive access in interleaving"); + return false; + } - data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next)); - /* Check that the distance between two accesses is equal to the type - size. Otherwise, we have gaps. */ - diff = (TREE_INT_CST_LOW (DR_INIT (data_ref)) - - TREE_INT_CST_LOW (prev_init)) / type_size; - if (!DR_IS_READ (data_ref) && diff != 1) - { - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "interleaved store with gaps"); - return false; - } - /* Store the gap from the previous member of the group. If there is no + data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next)); + /* Check that the distance between two accesses is equal to the type + size. Otherwise, we have gaps. */ + diff = (TREE_INT_CST_LOW (DR_INIT (data_ref)) + - TREE_INT_CST_LOW (prev_init)) / type_size; + if (!DR_IS_READ (data_ref) && diff != 1) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "interleaved store with gaps"); + return false; + } + /* Store the gap from the previous member of the group. If there is no gap in the access, DR_GROUP_GAP is always 1. */ - DR_GROUP_GAP (vinfo_for_stmt (next)) = diff; + DR_GROUP_GAP (vinfo_for_stmt (next)) = diff; - prev_init = DR_INIT (data_ref); - next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next)); - /* Count the number of data-refs in the chain. */ - count++; - } + prev_init = DR_INIT (data_ref); + next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next)); + /* Count the number of data-refs in the chain. */ + count++; + } - /* COUNT is the number of accesses found, we multiply it by the size of - the type to get COUNT_IN_BYTES. */ + /* COUNT is the number of accesses found, we multiply it by the size of + the type to get COUNT_IN_BYTES. */ count_in_bytes = type_size * count; /* Check that the size of the interleaving is not greater than STEP. */ - if (dr_step < count_in_bytes) - { - if (vect_print_dump_info (REPORT_DETAILS)) - { - fprintf (vect_dump, "interleaving size is greater than step for "); - print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM); - } - return false; - } + if (dr_step < count_in_bytes) + { + if (vect_print_dump_info (REPORT_DETAILS)) + { + fprintf (vect_dump, "interleaving size is greater than step for "); + print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM); + } + return false; + } - /* Check that the size of the interleaving is equal to STEP for stores, - i.e., that there are no gaps. */ - if (!DR_IS_READ (dr) && dr_step != count_in_bytes) - { - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "interleaved store with gaps"); - return false; - } + /* Check that the size of the interleaving is equal to STEP for stores, + i.e., that there are no gaps. */ + if (!DR_IS_READ (dr) && dr_step != count_in_bytes) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "interleaved store with gaps"); + return false; + } /* Check that STEP is a multiple of type size. */ if ((dr_step % type_size) != 0) - { - if (vect_print_dump_info (REPORT_DETAILS)) + { + if (vect_print_dump_info (REPORT_DETAILS)) { fprintf (vect_dump, "step is not a multiple of type size: step "); print_generic_expr (vect_dump, step, TDF_SLIM); @@ -2020,22 +2072,53 @@ vect_analyze_data_ref_access (struct dat print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type), TDF_SLIM); } - return false; - } + return false; + } /* FORNOW: we handle only interleaving that is a power of 2. */ if (exact_log2 (stride) == -1) - { - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "interleaving is not a power of 2"); - return false; - } + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "interleaving is not a power of 2"); + return false; + } DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride; } return true; } +/* Analyze the access pattern of the data-reference DR. + In case of non-consecutive accesse call vect_analyze_group_access() to + analyze groups of strided accesses. */ + +static bool +vect_analyze_data_ref_access (struct data_reference *dr) +{ + tree step = DR_STEP (dr); + tree scalar_type = TREE_TYPE (DR_REF (dr)); + tree stmt = DR_STMT (dr); + + if (!step) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "bad data-ref access"); + return false; + } + + /* Consecutive? */ + if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))) + { + /* Mark that it is not interleaving. */ + DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL_TREE; + return true; + } + + /* Not consecutive access - check if it's a part of interleaving group. */ + return vect_analyze_group_access (dr); +} + + /* Function vect_analyze_data_ref_accesses. Analyze the access pattern of all the data references in the loop. @@ -2519,95 +2602,6 @@ vect_mark_stmts_to_be_vectorized (loop_v } -/* Function vect_can_advance_ivs_p - - In case the number of iterations that LOOP iterates is unknown at compile - time, an epilog loop will be generated, and the loop induction variables - (IVs) will be "advanced" to the value they are supposed to take just before - the epilog loop. Here we check that the access function of the loop IVs - and the expression that represents the loop bound are simple enough. - These restrictions will be relaxed in the future. */ - -static bool -vect_can_advance_ivs_p (loop_vec_info loop_vinfo) -{ - struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); - basic_block bb = loop->header; - tree phi; - - /* Analyze phi functions of the loop header. */ - - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "vect_can_advance_ivs_p:"); - - for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi)) - { - tree access_fn = NULL; - tree evolution_part; - - if (vect_print_dump_info (REPORT_DETAILS)) - { - fprintf (vect_dump, "Analyze phi: "); - print_generic_expr (vect_dump, phi, TDF_SLIM); - } - - /* Skip virtual phi's. The data dependences that are associated with - virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */ - - if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi)))) - { - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "virtual phi. skip."); - continue; - } - - /* Skip reduction phis. */ - - if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def) - { - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "reduc phi. skip."); - continue; - } - - /* Analyze the evolution function. */ - - access_fn = instantiate_parameters - (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi))); - - if (!access_fn) - { - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "No Access function."); - return false; - } - - if (vect_print_dump_info (REPORT_DETAILS)) - { - fprintf (vect_dump, "Access function of PHI: "); - print_generic_expr (vect_dump, access_fn, TDF_SLIM); - } - - evolution_part = evolution_part_in_loop_num (access_fn, loop->num); - - if (evolution_part == NULL_TREE) - { - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "No evolution."); - return false; - } - - /* FORNOW: We do not transform initial conditions of IVs - which evolution functions are a polynomial of degree >= 2. */ - - if (tree_is_chrec (evolution_part)) - return false; - } - - return true; -} - - /* Function vect_get_loop_niters. Determine how many iterations the loop is executed. Index: tree-vect-transform.c =================================================================== --- tree-vect-transform.c (revision 127394) +++ tree-vect-transform.c (working copy) @@ -45,58 +45,33 @@ along with GCC; see the file COPYING3. #include "toplev.h" #include "real.h" -/* Utility functions for the code transformation. */ -static bool vect_transform_stmt (tree, block_stmt_iterator *, bool *); -static tree vect_create_destination_var (tree, tree); -static tree vect_create_data_ref_ptr - (tree, block_stmt_iterator *, tree, tree *, tree *, bool, tree); -static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree); -static tree vect_setup_realignment (tree, block_stmt_iterator *, tree *); -static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *); -static tree vect_get_vec_def_for_operand (tree, tree, tree *); -static tree vect_init_vector (tree, tree, tree); -static void vect_finish_stmt_generation - (tree stmt, tree vec_stmt, block_stmt_iterator *bsi); -static bool vect_is_simple_cond (tree, loop_vec_info); -static void vect_create_epilog_for_reduction (tree, tree, enum tree_code, tree); static tree get_initial_def_for_reduction (tree, tree, tree *); -/* Utility function dealing with loop peeling (not peeling itself). */ -static void vect_generate_tmps_on_preheader - (loop_vec_info, tree *, tree *, tree *); -static tree vect_build_loop_niters (loop_vec_info); -static void vect_update_ivs_after_vectorizer (loop_vec_info, tree, edge); -static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree); -static void vect_update_init_of_dr (struct data_reference *, tree niters); -static void vect_update_inits_of_drs (loop_vec_info, tree); -static int vect_min_worthwhile_factor (enum tree_code); - - static int cost_for_stmt (tree stmt) { stmt_vec_info stmt_info = vinfo_for_stmt (stmt); switch (STMT_VINFO_TYPE (stmt_info)) - { - case load_vec_info_type: - return TARG_SCALAR_LOAD_COST; - case store_vec_info_type: - return TARG_SCALAR_STORE_COST; - case op_vec_info_type: - case condition_vec_info_type: - case assignment_vec_info_type: - case reduc_vec_info_type: - case induc_vec_info_type: - case type_promotion_vec_info_type: - case type_demotion_vec_info_type: - case type_conversion_vec_info_type: - case call_vec_info_type: - return TARG_SCALAR_STMT_COST; - case undef_vec_info_type: - default: - gcc_unreachable (); - } + { + case load_vec_info_type: + return TARG_SCALAR_LOAD_COST; + case store_vec_info_type: + return TARG_SCALAR_STORE_COST; + case op_vec_info_type: + case condition_vec_info_type: + case assignment_vec_info_type: + case reduc_vec_info_type: + case induc_vec_info_type: + case type_promotion_vec_info_type: + case type_demotion_vec_info_type: + case type_conversion_vec_info_type: + case call_vec_info_type: + return TARG_SCALAR_STMT_COST; + case undef_vec_info_type: + default: + gcc_unreachable (); + } } @@ -442,14 +417,15 @@ vect_model_induction_cost (stmt_vec_info be generated for the single vector op. We will handle that shortly. */ static void -vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies, enum vect_def_type *dt) +vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies, + enum vect_def_type *dt) { int i; STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) = ncopies * TARG_VEC_STMT_COST; /* FORNOW: Assuming maximum 2 args per stmts. */ - for (i=0; i<2; i++) + for (i = 0; i < 2; i++) { if (dt[i] == vect_constant_def || dt[i] == vect_invariant_def) STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) += TARG_SCALAR_TO_VEC_COST; @@ -486,7 +462,8 @@ vect_cost_strided_group_size (stmt_vec_i has the overhead of the strided access attributed to it. */ static void -vect_model_store_cost (stmt_vec_info stmt_info, int ncopies, enum vect_def_type dt) +vect_model_store_cost (stmt_vec_info stmt_info, int ncopies, + enum vect_def_type dt) { int cost = 0; int group_size; @@ -495,7 +472,7 @@ vect_model_store_cost (stmt_vec_info stm STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = TARG_SCALAR_TO_VEC_COST; /* Strided access? */ - if (DR_GROUP_FIRST_DR (stmt_info)) + if (STMT_VINFO_STRIDED_ACCESS (stmt_info)) group_size = vect_cost_strided_group_size (stmt_info); /* Not a strided access. */ else @@ -612,7 +589,7 @@ vect_model_load_cost (stmt_vec_info stmt access in the group. Inside the loop, there is a load op and a realignment op. */ - if ((!DR_GROUP_FIRST_DR (stmt_info)) || group_size > 1) + if ((!STMT_VINFO_STRIDED_ACCESS (stmt_info)) || group_size > 1) { outer_cost = 2*TARG_VEC_STMT_COST; if (targetm.vectorize.builtin_mask_for_load) @@ -636,7 +613,6 @@ vect_model_load_cost (stmt_vec_info stmt fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, " "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info), STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info)); - } @@ -953,6 +929,38 @@ vect_create_data_ref_ptr (tree stmt, } +/* Function vect_finish_stmt_generation. + + Insert a new stmt. */ + +static void +vect_finish_stmt_generation (tree stmt, tree vec_stmt, + block_stmt_iterator *bsi) +{ + stmt_vec_info stmt_info = vinfo_for_stmt (stmt); + loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); + + bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT); + set_stmt_info (get_stmt_ann (vec_stmt), + new_stmt_vec_info (vec_stmt, loop_vinfo)); + + if (vect_print_dump_info (REPORT_DETAILS)) + { + fprintf (vect_dump, "add new stmt: "); + print_generic_expr (vect_dump, vec_stmt, TDF_SLIM); + } + + /* Make sure bsi points to the stmt that is being vectorized. */ + gcc_assert (stmt == bsi_stmt (*bsi)); + +#ifdef USE_MAPPED_LOCATION + SET_EXPR_LOCATION (vec_stmt, EXPR_LOCATION (stmt)); +#else + SET_EXPR_LOCUS (vec_stmt, EXPR_LOCUS (stmt)); +#endif +} + + /* Function bump_vector_ptr Increment a pointer (to a vector type) by vector-size. Connect the new @@ -1415,115 +1423,6 @@ vect_get_vec_def_for_operand (tree op, t } -/* Function vect_get_vec_def_for_stmt_copy - - Return a vector-def for an operand. This function is used when the - vectorized stmt to be created (by the caller to this function) is a "copy" - created in case the vectorized result cannot fit in one vector, and several - copies of the vector-stmt are required. In this case the vector-def is - retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field - of the stmt that defines VEC_OPRND. - DT is the type of the vector def VEC_OPRND. - - Context: - In case the vectorization factor (VF) is bigger than the number - of elements that can fit in a vectype (nunits), we have to generate - more than one vector stmt to vectorize the scalar stmt. This situation - arises when there are multiple data-types operated upon in the loop; the - smallest data-type determines the VF, and as a result, when vectorizing - stmts operating on wider types we need to create 'VF/nunits' "copies" of the - vector stmt (each computing a vector of 'nunits' results, and together - computing 'VF' results in each iteration). This function is called when - vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in - which VF=16 and nunits=4, so the number of copies required is 4): - - scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT - - S1: x = load VS1.0: vx.0 = memref0 VS1.1 - VS1.1: vx.1 = memref1 VS1.2 - VS1.2: vx.2 = memref2 VS1.3 - VS1.3: vx.3 = memref3 - - S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1 - VSnew.1: vz1 = vx.1 + ... VSnew.2 - VSnew.2: vz2 = vx.2 + ... VSnew.3 - VSnew.3: vz3 = vx.3 + ... - - The vectorization of S1 is explained in vectorizable_load. - The vectorization of S2: - To create the first vector-stmt out of the 4 copies - VSnew.0 - - the function 'vect_get_vec_def_for_operand' is called to - get the relevant vector-def for each operand of S2. For operand x it - returns the vector-def 'vx.0'. - - To create the remaining copies of the vector-stmt (VSnew.j), this - function is called to get the relevant vector-def for each operand. It is - obtained from the respective VS1.j stmt, which is recorded in the - STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND. - - For example, to obtain the vector-def 'vx.1' in order to create the - vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'. - Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the - STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1', - and return its def ('vx.1'). - Overall, to create the above sequence this function will be called 3 times: - vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0); - vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1); - vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */ - -static tree -vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd) -{ - tree vec_stmt_for_operand; - stmt_vec_info def_stmt_info; - - /* Do nothing; can reuse same def. */ - if (dt == vect_invariant_def || dt == vect_constant_def ) - return vec_oprnd; - - vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd); - def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand); - gcc_assert (def_stmt_info); - vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info); - gcc_assert (vec_stmt_for_operand); - vec_oprnd = GIMPLE_STMT_OPERAND (vec_stmt_for_operand, 0); - - return vec_oprnd; -} - - -/* Function vect_finish_stmt_generation. - - Insert a new stmt. */ - -static void -vect_finish_stmt_generation (tree stmt, tree vec_stmt, - block_stmt_iterator *bsi) -{ - stmt_vec_info stmt_info = vinfo_for_stmt (stmt); - loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); - - bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT); - set_stmt_info (get_stmt_ann (vec_stmt), - new_stmt_vec_info (vec_stmt, loop_vinfo)); - - if (vect_print_dump_info (REPORT_DETAILS)) - { - fprintf (vect_dump, "add new stmt: "); - print_generic_expr (vect_dump, vec_stmt, TDF_SLIM); - } - - /* Make sure bsi points to the stmt that is being vectorized. */ - gcc_assert (stmt == bsi_stmt (*bsi)); - -#ifdef USE_MAPPED_LOCATION - SET_EXPR_LOCATION (vec_stmt, EXPR_LOCATION (stmt)); -#else - SET_EXPR_LOCUS (vec_stmt, EXPR_LOCUS (stmt)); -#endif -} - - /* Function get_initial_def_for_reduction Input: @@ -1616,6 +1515,111 @@ get_initial_def_for_reduction (tree stmt } +/* Function vect_get_vec_def_for_stmt_copy + + Return a vector-def for an operand. This function is used when the + vectorized stmt to be created (by the caller to this function) is a "copy" + created in case the vectorized result cannot fit in one vector, and several + copies of the vector-stmt are required. In this case the vector-def is + retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field + of the stmt that defines VEC_OPRND. + DT is the type of the vector def VEC_OPRND. + + Context: + In case the vectorization factor (VF) is bigger than the number + of elements that can fit in a vectype (nunits), we have to generate + more than one vector stmt to vectorize the scalar stmt. This situation + arises when there are multiple data-types operated upon in the loop; the + smallest data-type determines the VF, and as a result, when vectorizing + stmts operating on wider types we need to create 'VF/nunits' "copies" of the + vector stmt (each computing a vector of 'nunits' results, and together + computing 'VF' results in each iteration). This function is called when + vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in + which VF=16 and nunits=4, so the number of copies required is 4): + + scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT + + S1: x = load VS1.0: vx.0 = memref0 VS1.1 + VS1.1: vx.1 = memref1 VS1.2 + VS1.2: vx.2 = memref2 VS1.3 + VS1.3: vx.3 = memref3 + + S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1 + VSnew.1: vz1 = vx.1 + ... VSnew.2 + VSnew.2: vz2 = vx.2 + ... VSnew.3 + VSnew.3: vz3 = vx.3 + ... + + The vectorization of S1 is explained in vectorizable_load. + The vectorization of S2: + To create the first vector-stmt out of the 4 copies - VSnew.0 - + the function 'vect_get_vec_def_for_operand' is called to + get the relevant vector-def for each operand of S2. For operand x it + returns the vector-def 'vx.0'. + + To create the remaining copies of the vector-stmt (VSnew.j), this + function is called to get the relevant vector-def for each operand. It is + obtained from the respective VS1.j stmt, which is recorded in the + STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND. + + For example, to obtain the vector-def 'vx.1' in order to create the + vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'. + Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the + STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1', + and return its def ('vx.1'). + Overall, to create the above sequence this function will be called 3 times: + vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0); + vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1); + vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */ + +static tree +vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd) +{ + tree vec_stmt_for_operand; + stmt_vec_info def_stmt_info; + + /* Do nothing; can reuse same def. */ + if (dt == vect_invariant_def || dt == vect_constant_def ) + return vec_oprnd; + + vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd); + def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand); + gcc_assert (def_stmt_info); + vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info); + gcc_assert (vec_stmt_for_operand); + vec_oprnd = GIMPLE_STMT_OPERAND (vec_stmt_for_operand, 0); + + return vec_oprnd; +} + + +/* Function vect_min_worthwhile_factor. + + For a loop where we could vectorize the operation indicated by CODE, + return the minimum vectorization factor that makes it worthwhile + to use generic vectors. */ + +static int +vect_min_worthwhile_factor (enum tree_code code) +{ + switch (code) + { + case PLUS_EXPR: + case MINUS_EXPR: + case NEGATE_EXPR: + return 4; + + case BIT_AND_EXPR: + case BIT_IOR_EXPR: + case BIT_XOR_EXPR: + case BIT_NOT_EXPR: + return 2; + + default: + return INT_MAX; + } +} + + /* Function vect_create_epilog_for_reduction Create code at the loop-epilog to finalize the result of a reduction @@ -2653,16 +2657,14 @@ vect_gen_widened_results_half (enum tree } -/* Function vectorizable_conversion. - -Check if STMT performs a conversion operation, that can be vectorized. -If VEC_STMT is also passed, vectorize the STMT: create a vectorized -stmt to replace it, put it in VEC_STMT, and insert it at BSI. -Return FALSE if not a vectorizable STMT, TRUE otherwise. */ +/* Check if STMT performs a conversion operation, that can be vectorized. + If VEC_STMT is also passed, vectorize the STMT: create a vectorized + stmt to replace it, put it in VEC_STMT, and insert it at BSI. + Return FALSE if not a vectorizable STMT, TRUE otherwise. */ bool -vectorizable_conversion (tree stmt, block_stmt_iterator * bsi, - tree * vec_stmt) +vectorizable_conversion (tree stmt, block_stmt_iterator *bsi, + tree *vec_stmt) { tree vec_dest; tree scalar_dest; @@ -2714,7 +2716,7 @@ vectorizable_conversion (tree stmt, bloc if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR) return false; - /* Check types of lhs and rhs */ + /* Check types of lhs and rhs. */ op0 = TREE_OPERAND (operation, 0); rhs_type = TREE_TYPE (op0); vectype_in = get_vectype_for_scalar_type (rhs_type); @@ -2738,7 +2740,7 @@ vectorizable_conversion (tree stmt, bloc if (modifier == NONE) gcc_assert (STMT_VINFO_VECTYPE (stmt_info) == vectype_out); - /* Bail out if the types are both integral or non-integral */ + /* Bail out if the types are both integral or non-integral. */ if ((INTEGRAL_TYPE_P (rhs_type) && INTEGRAL_TYPE_P (lhs_type)) || (!INTEGRAL_TYPE_P (rhs_type) && !INTEGRAL_TYPE_P (lhs_type))) return false; @@ -2901,6 +2903,7 @@ vectorizable_conversion (tree stmt, bloc *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info); } + return true; } @@ -2992,33 +2995,6 @@ vectorizable_assignment (tree stmt, bloc } -/* Function vect_min_worthwhile_factor. - - For a loop where we could vectorize the operation indicated by CODE, - return the minimum vectorization factor that makes it worthwhile - to use generic vectors. */ -static int -vect_min_worthwhile_factor (enum tree_code code) -{ - switch (code) - { - case PLUS_EXPR: - case MINUS_EXPR: - case NEGATE_EXPR: - return 4; - - case BIT_AND_EXPR: - case BIT_IOR_EXPR: - case BIT_XOR_EXPR: - case BIT_NOT_EXPR: - return 2; - - default: - return INT_MAX; - } -} - - /* Function vectorizable_induction Check if PHI performs an induction computation that can be vectorized. @@ -3331,7 +3307,7 @@ vectorizable_operation (tree stmt, block vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1); } - /* Arguments are ready. create the new vector stmt. */ + /* Arguments are ready. Create the new vector stmt. */ if (op_type == binary_op) new_stmt = build_gimple_modify_stmt (vec_dest, @@ -3881,6 +3857,7 @@ vectorizable_store (tree stmt, block_stm bool strided_store = false; unsigned int group_size, i; VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL; + gcc_assert (ncopies >= 1); if (!STMT_VINFO_RELEVANT_P (stmt_info)) @@ -3904,7 +3881,7 @@ vectorizable_store (tree stmt, block_stm scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0); if (TREE_CODE (scalar_dest) != ARRAY_REF && TREE_CODE (scalar_dest) != INDIRECT_REF - && !DR_GROUP_FIRST_DR (stmt_info)) + && !STMT_VINFO_STRIDED_ACCESS (stmt_info)) return false; op = GIMPLE_STMT_OPERAND (stmt, 1); @@ -3924,7 +3901,7 @@ vectorizable_store (tree stmt, block_stm if (!STMT_VINFO_DATA_REF (stmt_info)) return false; - if (DR_GROUP_FIRST_DR (stmt_info)) + if (STMT_VINFO_STRIDED_ACCESS (stmt_info)) { strided_store = true; if (!vect_strided_store_supported (vectype)) @@ -4517,6 +4494,8 @@ vectorizable_load (tree stmt, block_stmt bool strided_load = false; tree first_stmt; + gcc_assert (ncopies >= 1); + if (!STMT_VINFO_RELEVANT_P (stmt_info)) return false; @@ -4542,7 +4521,7 @@ vectorizable_load (tree stmt, block_stmt op = GIMPLE_STMT_OPERAND (stmt, 1); if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF - && !DR_GROUP_FIRST_DR (stmt_info)) + && !STMT_VINFO_STRIDED_ACCESS (stmt_info)) return false; if (!STMT_VINFO_DATA_REF (stmt_info)) @@ -4560,7 +4539,7 @@ vectorizable_load (tree stmt, block_stmt } /* Check if the load is a part of an interleaving chain. */ - if (DR_GROUP_FIRST_DR (stmt_info)) + if (STMT_VINFO_STRIDED_ACCESS (stmt_info)) { strided_load = true; @@ -5028,11 +5007,12 @@ vectorizable_condition (tree stmt, block return true; } + /* Function vect_transform_stmt. Create a vectorized stmt to replace STMT, and insert it at BSI. */ -bool +static bool vect_transform_stmt (tree stmt, block_stmt_iterator *bsi, bool *strided_store) { bool is_store = false; @@ -5081,7 +5061,7 @@ vect_transform_stmt (tree stmt, block_st case store_vec_info_type: done = vectorizable_store (stmt, bsi, &vec_stmt); gcc_assert (done); - if (DR_GROUP_FIRST_DR (stmt_info)) + if (STMT_VINFO_STRIDED_ACCESS (stmt_info)) { /* In case of interleaving, the whole chain is vectorized when the last store in the chain is reached. Store stmts before the last @@ -5517,7 +5497,7 @@ vect_gen_niters_for_prolog_loop (loop_ve int element_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr)))); int nelements = TYPE_VECTOR_SUBPARTS (vectype); - if (DR_GROUP_FIRST_DR (stmt_info)) + if (STMT_VINFO_STRIDED_ACCESS (stmt_info)) { /* For interleaved access element size must be multiplied by the size of the interleaved group. */ @@ -5989,7 +5969,7 @@ vect_transform_loop (loop_vec_info loop_ if (is_store) { stmt_ann_t ann; - if (DR_GROUP_FIRST_DR (stmt_info)) + if (STMT_VINFO_STRIDED_ACCESS (stmt_info)) { /* Interleaving. If IS_STORE is TRUE, the vectorization of the interleaving chain was completed - free all the stores in