diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index 2a58e54fe51471df5f55ce4a524d0022744054b0..89e226ca3a25a6c77b86d46ba234ce54bd3cb83b 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -2228,6 +2228,114 @@ calculate_unrolling_factor (poly_uint64 nunits, unsigned int group_size) return exact_div (common_multiple (nunits, group_size), group_size); } +/* Helper that checks to see if a node is a load node. This is done based on + two criterias: + 1) The node is internal + 2) The node has no childen. */ + +static inline bool +vect_is_slp_load_node (slp_tree root) +{ + return (SLP_TREE_DEF_TYPE (root) == vect_internal_def + && !SLP_TREE_CHILDREN (root).exists ()); +} + + +/* Helper function of optimize_load_redistribution that performs the operation + recursively. */ + +static slp_tree +optimize_load_redistribution_1 (scalar_stmts_to_slp_tree_map_t *bst_map, + vec_info *vinfo, unsigned int group_size, + hash_set *visited, slp_tree root) +{ + if (visited->add (root)) + return NULL; + + slp_tree node; + unsigned i; + + /* For now, we don't know anything about externals so do not do anything. */ + if (SLP_TREE_DEF_TYPE (root) != vect_internal_def) + return NULL; + else if (SLP_TREE_CODE (root) == VEC_PERM_EXPR) + { + /* First convert this node into a load node and add it to the leaves + list and flatten the permute from a lane to a load one. If it's + unneeded it will be elided later. */ + vec stmts; + stmts.create (SLP_TREE_LANES (root)); + lane_permutation_t lane_perm = SLP_TREE_LANE_PERMUTATION (root); + for (unsigned j = 0; j < lane_perm.length (); j++) + { + std::pair perm = lane_perm[j]; + node = SLP_TREE_CHILDREN (root)[perm.first]; + + if (!vect_is_slp_load_node (node)) + return NULL; + + stmts.quick_push (SLP_TREE_SCALAR_STMTS (node)[perm.second]); + } + + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "converting stmts on permute node %p\n", root); + + bool *matches = XALLOCAVEC (bool, group_size); + poly_uint64 max_nunits = 1; + unsigned tree_size = 0, limit = 1; + node = vect_build_slp_tree (vinfo, stmts, group_size, &max_nunits, + matches, &limit, &tree_size, bst_map); + if (!node) + stmts.release (); + + return node; + } + + FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (root), i , node) + { + slp_tree value + = optimize_load_redistribution_1 (bst_map, vinfo, group_size, visited, + node); + if (value) + { + SLP_TREE_CHILDREN (root)[i] = value; + vect_free_slp_tree (node); + } + } + + return NULL; +} + +/* Temporary workaround for loads not being CSEd during SLP build. This + function will traverse the SLP tree rooted in ROOT for INSTANCE and find + VEC_PERM nodes that blend vectors from multiple nodes that all read from the + same DR such that the final operation is equal to a permuted load. Such + NODES are then directly converted into LOADS themselves. The nodes are + CSEd using BST_MAP. */ + +static void +optimize_load_redistribution (scalar_stmts_to_slp_tree_map_t *bst_map, + vec_info *vinfo, unsigned int group_size, + slp_tree root) +{ + slp_tree node; + unsigned i; + hash_set visited; + + FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (root), i , node) + { + slp_tree value + = optimize_load_redistribution_1 (bst_map, vinfo, group_size, &visited, + node); + if (value) + { + SLP_TREE_CHILDREN (root)[i] = value; + vect_free_slp_tree (node); + } + } +} + /* Helper function of vect_match_slp_patterns. Attempts to match patterns against the slp tree rooted in REF_NODE using @@ -2276,7 +2384,7 @@ static bool vect_match_slp_patterns (slp_instance instance, vec_info *vinfo, hash_set *visited, slp_tree_to_load_perm_map_t *perm_cache, - scalar_stmts_to_slp_tree_map_t * /* bst_map */) + scalar_stmts_to_slp_tree_map_t *bst_map) { DUMP_VECT_SCOPE ("vect_match_slp_patterns"); slp_tree *ref_node = &SLP_INSTANCE_TREE (instance); @@ -2291,6 +2399,9 @@ vect_match_slp_patterns (slp_instance instance, vec_info *vinfo, if (found_p) { + optimize_load_redistribution (bst_map, vinfo, SLP_TREE_LANES (*ref_node), + *ref_node); + if (dump_enabled_p ()) { dump_printf_loc (MSG_NOTE, vect_location,