diff --git a/gcc/testsuite/gcc.target/aarch64/pr105219-2.c b/gcc/testsuite/gcc.target/aarch64/pr105219-2.c new file mode 100644 index 0000000000000000000000000000000000000000..c97d1dc100181b77af0766e08407e1e352f604fe --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/pr105219-2.c @@ -0,0 +1,29 @@ +/* { dg-do run } */ +/* { dg-options "-O3 -march=armv8.2-a -mtune=thunderx -fno-vect-cost-model" } */ +/* { dg-skip-if "incompatible options" { *-*-* } { "-march=*" } { "-march=armv8.2-a" } } */ +/* { dg-skip-if "incompatible options" { *-*-* } { "-mtune=*" } { "-mtune=thunderx" } } */ +/* { dg-skip-if "incompatible options" { *-*-* } { "-mcpu=*" } } */ +/* PR 105219. */ +int data[128]; + +void __attribute((noipa)) +foo (int *data, int n) +{ + for (int i = 0; i < n; ++i) + data[i] = i; +} + +int main() +{ + for (int start = 0; start < 16; ++start) + for (int n = 1; n < 3*16; ++n) + { + __builtin_memset (data, 0, sizeof (data)); + foo (&data[start], n); + for (int j = 0; j < n; ++j) + if (data[start + j] != j) + __builtin_abort (); + } + return 0; +} + diff --git a/gcc/testsuite/gcc.target/aarch64/pr105219-3.c b/gcc/testsuite/gcc.target/aarch64/pr105219-3.c new file mode 100644 index 0000000000000000000000000000000000000000..444352fc051b787369f6f1be6236d1ff0fc2d392 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/pr105219-3.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-skip-if "incompatible options" { *-*-* } { "-march=*" } { "-march=armv8.2-a" } } */ +/* { dg-skip-if "incompatible options" { *-*-* } { "-mtune=*" } { "-mtune=thunderx" } } */ +/* { dg-skip-if "incompatible options" { *-*-* } { "-mcpu=*" } } */ +/* { dg-options "-O3 -march=armv8.2-a -mtune=thunderx -fno-vect-cost-model -fdump-tree-vect-all" } */ +/* PR 105219. */ +int data[128]; + +void foo (void) +{ + for (int i = 0; i < 9; ++i) + data[i + 1] = i; +} + +/* { dg-final { scan-tree-dump "EPILOGUE VECTORIZED" "vect" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/pr105219.c b/gcc/testsuite/gcc.target/aarch64/pr105219.c new file mode 100644 index 0000000000000000000000000000000000000000..bbdefb549f6a4e803852f69d20ce1ef9152a526c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/pr105219.c @@ -0,0 +1,28 @@ +/* { dg-do run { target aarch64_sve128_hw } } */ +/* { dg-skip-if "incompatible options" { *-*-* } { "-march=*" } { "-march=armv8.2-a+sve" } } */ +/* { dg-skip-if "incompatible options" { *-*-* } { "-mtune=*" } { "-mtune=thunderx" } } */ +/* { dg-skip-if "incompatible options" { *-*-* } { "-mcpu=*" } } */ +/* { dg-skip-if "incompatible options" { *-*-* } { "-msve-vector-bits=*" } { "-msve-vector-bits=128" } } */ +/* { dg-options "-O3 -march=armv8.2-a+sve -msve-vector-bits=128 -mtune=thunderx" } */ +/* PR 105219. */ +int a; +char b[60]; +short c[18]; +short d[4][19]; +long long f; +void e(int g, int h, short k[][19]) { + for (signed i = 0; i < 3; i += 2) + for (signed j = 1; j < h + 14; j++) { + b[i * 14 + j] = 1; + c[i + j] = k[2][j]; + a = g ? k[i][j] : 0; + } +} +int main() { + e(9, 1, d); + for (long l = 0; l < 6; ++l) + for (long m = 0; m < 4; ++m) + f ^= b[l + m * 4]; + if (f) + __builtin_abort (); +} diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index d7bc34636bd52b2f67cdecd3dc16fcff684dba07..a23e6181dec8126bcb691ea9474095bf65483863 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -2942,6 +2942,38 @@ vect_analyze_loop_1 (class loop *loop, vec_info_shared *shared, return opt_loop_vec_info::success (loop_vinfo); } +/* Function vect_epilogue_when_peeling_for_alignment + + PR 105219: If we are peeling for alignment in the prologue then we do not + vectorize the epilogue unless we are certain we will enter the main + vectorized loop. This is to prevent entering the vectorized epilogue in + case there aren't enough iterations to enter the main loop. +*/ + +static bool +vect_epilogue_when_peeling_for_alignment (loop_vec_info loop_vinfo) +{ + if (vect_use_loop_mask_for_alignment_p (loop_vinfo)) + return true; + + int prologue_peeling = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo); + if (prologue_peeling > 0 && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)) + { + poly_uint64 niters_for_main + = upper_bound (LOOP_VINFO_VECT_FACTOR (loop_vinfo), + LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo)); + niters_for_main + = upper_bound (niters_for_main, + LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo)); + niters_for_main += prologue_peeling; + if (maybe_le (LOOP_VINFO_INT_NITERS (loop_vinfo), niters_for_main)) + return false; + } + else if (prologue_peeling < 0) + return false; + return true; +} + /* Function vect_analyze_loop. Apply a set of analyses on LOOP, and create a loop_vec_info struct @@ -3151,7 +3183,8 @@ vect_analyze_loop (class loop *loop, vec_info_shared *shared) } } /* For now only allow one epilogue loop. */ - if (first_loop_vinfo->epilogue_vinfos.is_empty ()) + if (first_loop_vinfo->epilogue_vinfos.is_empty () + && vect_epilogue_when_peeling_for_alignment (first_loop_vinfo)) { first_loop_vinfo->epilogue_vinfos.safe_push (loop_vinfo); poly_uint64 th = LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo);