diff --git a/gcc/testsuite/gcc.target/aarch64/masked_epilogue.c b/gcc/testsuite/gcc.target/aarch64/masked_epilogue.c new file mode 100644 index 0000000000000000000000000000000000000000..286a7be236f337fee4c4650f42da72000855c5e6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/masked_epilogue.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details -march=armv8-a+sve -msve-vector-bits=scalable" } */ + +void f(unsigned char y[restrict], + unsigned char x[restrict], int n) { + for (int i = 0; i < n; ++i) + y[i] = (y[i] + x[i] + 1) >> 1; +} + +/* { dg-final { scan-tree-dump {LOOP EPILOGUE VECTORIZED \(MODE=VNx} "vect" } } */ diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index a28bb6321d76b8222bc8cfdade151ca9b4dca406..17b090170d4a5dad22097a727bc25a63e230e278 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -2824,11 +2824,13 @@ vect_better_loop_vinfo_p (loop_vec_info new_loop_vinfo, { unsigned HOST_WIDE_INT main_vf_max = estimated_poly_value (main_poly_vf, POLY_VALUE_MAX); + unsigned HOST_WIDE_INT old_vf_max + = estimated_poly_value (old_vf, POLY_VALUE_MAX); + unsigned HOST_WIDE_INT new_vf_max + = estimated_poly_value (new_vf, POLY_VALUE_MAX); - old_factor = main_vf_max / estimated_poly_value (old_vf, - POLY_VALUE_MAX); - new_factor = main_vf_max / estimated_poly_value (new_vf, - POLY_VALUE_MAX); + old_factor = CEIL (main_vf_max, old_vf_max); + new_factor = CEIL (main_vf_max, new_vf_max); /* If the loop is not using partial vectors then it will iterate one time less than one that does. It is safe to subtract one here, @@ -3069,8 +3071,6 @@ vect_analyze_loop (class loop *loop, vec_info_shared *shared) machine_mode autodetected_vector_mode = VOIDmode; opt_loop_vec_info first_loop_vinfo = opt_loop_vec_info::success (NULL); unsigned int mode_i = 0; - unsigned int first_loop_i = 0; - unsigned int first_loop_next_i = 0; unsigned HOST_WIDE_INT simdlen = loop->simdlen; /* First determine the main loop vectorization mode, either the first @@ -3079,7 +3079,6 @@ vect_analyze_loop (class loop *loop, vec_info_shared *shared) lowest cost if pick_lowest_cost_p. */ while (1) { - unsigned int loop_vinfo_i = mode_i; bool fatal; opt_loop_vec_info loop_vinfo = vect_analyze_loop_1 (loop, shared, &loop_form_info, @@ -3108,11 +3107,7 @@ vect_analyze_loop (class loop *loop, vec_info_shared *shared) first_loop_vinfo = opt_loop_vec_info::success (NULL); } if (first_loop_vinfo == NULL) - { - first_loop_vinfo = loop_vinfo; - first_loop_i = loop_vinfo_i; - first_loop_next_i = mode_i; - } + first_loop_vinfo = loop_vinfo; else { delete loop_vinfo; @@ -3158,26 +3153,18 @@ vect_analyze_loop (class loop *loop, vec_info_shared *shared) /* Now analyze first_loop_vinfo for epilogue vectorization. */ poly_uint64 lowest_th = LOOP_VINFO_VERSIONING_THRESHOLD (first_loop_vinfo); - /* Handle the case that the original loop can use partial - vectorization, but want to only adopt it for the epilogue. - The retry should be in the same mode as original. */ - if (LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P (first_loop_vinfo)) - { - gcc_assert (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (first_loop_vinfo) - && !LOOP_VINFO_USING_PARTIAL_VECTORS_P (first_loop_vinfo)); - if (dump_enabled_p ()) - dump_printf_loc (MSG_NOTE, vect_location, - "***** Re-trying analysis with same vector mode" - " %s for epilogue with partial vectors.\n", - GET_MODE_NAME (first_loop_vinfo->vector_mode)); - mode_i = first_loop_i; - } - else - { - mode_i = first_loop_next_i; - if (mode_i == vector_modes.length ()) - return first_loop_vinfo; - } + /* For epilogues start the analysis from the first mode. The motivation + behind starting from the beginning comes from cases where the VECTOR_MODES + array may contain length agnostic and length fixed modes. Their ordering + is not guaranteed, so we could end up picking a mode for the main loop + that is after the epilogue's optimal mode. */ + mode_i = 0; + + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "***** Re-trying analysis with first vector mode" + " %s for epilogue.\n", + GET_MODE_NAME (vector_modes[mode_i])); /* ??? If first_loop_vinfo was using VOIDmode then we probably want to instead search for the corresponding mode in vector_modes[]. */