--- gcc/config/rs6000/rs6000.c | 88 +++++++++++++++++++++++++++++++++----- 1 file changed, 77 insertions(+), 11 deletions(-) diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index ffdf10098a9..5ae40d6f4ce 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -5245,12 +5245,16 @@ rs6000_density_test (rs6000_cost_data *data) const int DENSITY_PCT_THRESHOLD = 85; const int DENSITY_SIZE_THRESHOLD = 70; const int DENSITY_PENALTY = 10; + const int DENSITY_LOAD_PCT_THRESHOLD = 80; + const int DENSITY_LOAD_FOR_CTOR_PCT_THRESHOLD = 65; + const int DENSITY_LOAD_SIZE_THRESHOLD = 20; struct loop *loop = data->loop_info; basic_block *bbs = get_loop_body (loop); int nbbs = loop->num_nodes; loop_vec_info loop_vinfo = loop_vec_info_for_loop (data->loop_info); int vec_cost = data->cost[vect_body], not_vec_cost = 0; int i, density_pct; + unsigned int nload_total = 0, nctor_for_strided = 0, nload_for_ctor = 0; /* Only care about cost of vector version, so exclude scalar version here. */ if (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo) != (void *) data) @@ -5272,21 +5276,83 @@ rs6000_density_test (rs6000_cost_data *data) if (!STMT_VINFO_RELEVANT_P (stmt_info) && !STMT_VINFO_IN_PATTERN_P (stmt_info)) not_vec_cost++; + else + { + stmt_vec_info vstmt_info = vect_stmt_to_vectorize (stmt_info); + if (STMT_VINFO_DATA_REF (vstmt_info) + && DR_IS_READ (STMT_VINFO_DATA_REF (vstmt_info))) + { + if (STMT_VINFO_STRIDED_P (vstmt_info)) + { + unsigned int ncopies = 1; + unsigned int nunits = 1; + /* TODO: For VMAT_STRIDED_SLP, the total CTOR can be + fewer due to group access. Simply handle it here + for now. */ + if (!STMT_SLP_TYPE (vstmt_info)) + { + tree vectype = STMT_VINFO_VECTYPE (vstmt_info); + ncopies = vect_get_num_copies (loop_vinfo, vectype); + nunits = vect_nunits_for_cost (vectype); + } + unsigned int nloads = ncopies * nunits; + nload_for_ctor += nloads; + nload_total += nloads; + nctor_for_strided += ncopies; + } + else + nload_total++; + } + } } } - free (bbs); - density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost); - if (density_pct > DENSITY_PCT_THRESHOLD - && vec_cost + not_vec_cost > DENSITY_SIZE_THRESHOLD) - { - data->cost[vect_body] = vec_cost * (100 + DENSITY_PENALTY) / 100; - if (dump_enabled_p ()) - dump_printf_loc (MSG_NOTE, vect_location, - "density %d%%, cost %d exceeds threshold, penalizing " - "loop body cost by %d%%", density_pct, - vec_cost + not_vec_cost, DENSITY_PENALTY); + if (vec_cost + not_vec_cost > DENSITY_SIZE_THRESHOLD) + { + density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost); + if (density_pct > DENSITY_PCT_THRESHOLD) + { + data->cost[vect_body] = vec_cost * (100 + DENSITY_PENALTY) / 100; + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "density %d%%, cost %d exceeds threshold, " + "penalizing loop body cost by %d%%.\n", + density_pct, vec_cost + not_vec_cost, + DENSITY_PENALTY); + } + /* For one loop which has a large proportion scalar loads of all + loads fed into vector construction, if the density is high, + the loads will have more stalls than usual, further affect + the vector construction. One typical case is the innermost + loop of the hotspot of spec2017 503.bwaves_r without loop + interchange. Here we price more on the related vector + construction and penalize the body cost. */ + else if (density_pct > DENSITY_LOAD_PCT_THRESHOLD + && nload_total > DENSITY_LOAD_SIZE_THRESHOLD) + { + int load_for_ctor_pct = (nload_for_ctor * 100) / nload_total; + /* Large proportion of scalar loads fed to vector CTOR. */ + if (load_for_ctor_pct > DENSITY_LOAD_FOR_CTOR_PCT_THRESHOLD) + { + vec_cost += nctor_for_strided; + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "Found high density loop with a large " + "proportion %d%% of scalar loads fed to " + "vector ctor, add cost %d.\n", + load_for_ctor_pct, nctor_for_strided); + + data->cost[vect_body] = vec_cost * (100 + DENSITY_PENALTY) / 100; + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "density %d%%, cost %d exceeds threshold, " + "penalizing loop body cost by %d%% for " + "load.\n", + density_pct, vec_cost + not_vec_cost, + DENSITY_PENALTY); + } + } } } -- 2.17.1