public inbox for gcc-bugs@sourceware.org help / color / mirror / Atom feed
From: "jakub at gcc dot gnu.org" <gcc-bugzilla@gcc.gnu.org> To: gcc-bugs@gcc.gnu.org Subject: [Bug target/102789] [12 regression] libgomp.c++/simd-3.C fails after r12-4340 for 32 bits Date: Mon, 18 Oct 2021 10:05:49 +0000 [thread overview] Message-ID: <bug-102789-4-MAhHDNFZUa@http.gcc.gnu.org/bugzilla/> (raw) In-Reply-To: <bug-102789-4@http.gcc.gnu.org/bugzilla/> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102789 Jakub Jelinek <jakub at gcc dot gnu.org> changed: What |Removed |Added ---------------------------------------------------------------------------- Ever confirmed|0 |1 Component|libgomp |target Status|UNCONFIRMED |NEW Last reconfirmed| |2021-10-18 CC| |dje at gcc dot gnu.org, | |segher at gcc dot gnu.org --- Comment #2 from Jakub Jelinek <jakub at gcc dot gnu.org> --- Ok, I can reproduce, but only with -mcpu=power7. The cost model patch just uncovered a latent power7 vectorization bug (backend or vectorizer) I'd say. I've instrumented the testcase a little bit: // { dg-do run } // { dg-additional-options "-msse2" { target sse2_runtime } } // { dg-additional-options "-mavx" { target avx_runtime } } extern "C" void abort (); int a[1024] __attribute__((aligned (32))) = { 1 }; int b[1024] __attribute__((aligned (32))) = { 1 }; unsigned char c[1024] __attribute__((aligned (32))) = { 1 }; int k, m; __UINTPTR_TYPE__ u, u2, u3; __attribute__((noinline, noclone)) int foo (int *p) { int i, s = 0, s2 = 0, t, t2; #pragma omp simd aligned(a, b, p : 32) linear(k: m + 1) reduction(+:s) \ lastprivate (t2) for (i = 0; i < 512; i++) { a[i] *= p[i]; t2 = k + p[i]; k += m + 1; s += p[i] + k; c[i]++; } #pragma omp simd aligned(a, b, p : 32) linear(k: m + 1) reduction(+:s2) \ lastprivate (t, u, u2, u3) for (i = 512; i < 1024; i++) { a[i] *= p[i]; k += m + 1; t = k + p[i]; u = (__UINTPTR_TYPE__) &k; u2 = (__UINTPTR_TYPE__) &s2; u3 = (__UINTPTR_TYPE__) &t; s2 += t; c[i]++; } __builtin_printf ("foo %d %d %d %d\n", s, s2, t, t2); return s + s2 + t + t2; } __attribute__((noinline, noclone)) long int bar (int *p, long int n, long int o) { long int i, s = 0, s2 = 0, t, t2; #pragma omp simd aligned(a, b, p : 32) linear(k: m + 1) reduction(+:s) \ lastprivate (t2) for (i = 0; i < n; i++) { a[i] *= p[i]; t2 = k + p[i]; k += m + 1; s += p[i] + k; c[i]++; } #pragma omp simd aligned(a, b, p : 32) linear(k: m + 1) reduction(+:s2) \ lastprivate (t, u, u2, u3) for (i = n; i < o; i++) { a[i] *= p[i]; k += m + 1; t = k + p[i]; u = (__UINTPTR_TYPE__) &k; u2 = (__UINTPTR_TYPE__) &s2; u3 = (__UINTPTR_TYPE__) &t; s2 += t; c[i]++; } __builtin_printf ("bar %d %d %d %d\n", s, s2, t, t2); return s + s2 + t + t2; } int main () { #if __SIZEOF_INT__ >= 4 int i; k = 4; m = 2; for (i = 0; i < 1024; i++) { a[i] = i - 512; b[i] = (i - 51) % 39; c[i] = (unsigned char) i; } int s = foo (b); for (i = 0; i < 1024; i++) { if (b[i] != (i - 51) % 39 || a[i] != (i - 512) * b[i] || c[i] != (unsigned char) (i + 1)) { __builtin_printf ("#1 %d %d %d %d\n", i, b[i], a[i], c[i]); abort (); } a[i] = i - 512; } if (k != 4 + 3 * 1024 || s != 1596127 + (4 + 3 * 511 + b[511]) + (4 + 3 * 1024 + b[1023])) { __builtin_printf ("#2 %d %d\n", k, s); abort (); } k = 4; s = bar (b, 512, 1024); for (i = 0; i < 1024; i++) { if (b[i] != (i - 51) % 39 || a[i] != (i - 512) * b[i] || c[i] != (unsigned char) (i + 2)) { __builtin_printf ("#3 %d %d %d %d\n", i, b[i], a[i], c[i]); abort (); } a[i] = i - 512; } if (k != 4 + 3 * 1024 || s != 1596127 + (4 + 3 * 511 + b[511]) + (4 + 3 * 1024 + b[1023])) { __builtin_printf ("#4 %d %d\n", k, s); abort (); } k = 4; s = bar (b, 511, 1021); for (i = 0; i < 1021; i++) { if (b[i] != (i - 51) % 39 || a[i] != (i - 512) * b[i] || c[i] != (unsigned char) (i + 3)) { __builtin_printf ("#5 %d %d %d %d\n", i, b[i], a[i], c[i]); abort (); } a[i] = i - 512; } for (i = 1021; i < 1024; i++) if (b[i] != (i - 51) % 39 || a[i] != i - 512 || c[i] != (unsigned char) (i + 2)) { __builtin_printf ("#6 %d %d %d %d\n", i, b[i], a[i], c[i]); abort (); } if (k != 4 + 3 * 1021 || s != 1586803 + (4 + 3 * 510 + b[510]) + (4 + 3 * 1021 + b[1020])) { __builtin_printf ("#7 %d %d %d %d\n", k, s, b[510], b[1020]); abort (); } #endif return 0; } When compiled with -O2 -m32 -fopenmp -mcpu=power6, this prints: foo 403860 1192267 3112 1568 bar 403860 1192267 3112 1568 bar 402289 1184514 3100 1564 while with -O2 -m32 -fopenmp -mcpu=power7 it prints: foo 403860 1192267 3112 1568 bar 403860 1192267 3112 1568 bar 402289 919217 3100 1564 #7 3067 1326170 30 33 Aborted which seems to suggest it is the: #pragma omp simd aligned(a, b, p : 32) linear(k: m + 1) reduction(+:s2) \ lastprivate (t, u, u2, u3) for (i = n; i < o; i++) { a[i] *= p[i]; k += m + 1; t = k + p[i]; u = (__UINTPTR_TYPE__) &k; u2 = (__UINTPTR_TYPE__) &s2; u3 = (__UINTPTR_TYPE__) &t; s2 += t; c[i]++; } loop that is miscompiled and miscomputes s2. Now, the loop is invoked twice, once with n = 512 and o = 1024 and in that case it works fine, and then with n = 511 and o = 1021 and in that case it misbehaves, so I bet it must be related to the prologue or epilogue loops. Now, if I compile with -O2 -m32 -fopenmp -mcpu=power7 -fvect-cost-model=unlimited -fsimd-cost-model=unlimited, it is miscompiled the same way already in r9-1520-g42c5d1212ff6544be1061d488aa7ebee9463c375 (haven't bisected fully), but certainly r5-370-ged15c5984e10f6556dffdf397accff804bf60a7c through r9-1052-gfa725532c41ae543fd0078263ea348aa5af3997d have been ICEing on it instead: simd-3.C: In function ‘long int bar(int*, long int, long int)’: simd-3.C:44:1: internal compiler error: in vect_get_store_cost, at tree-vect-stmts.c:1123 bar (int *p, long int n, long int o) ^~~ 0x1510f87 vect_get_store_cost(_stmt_vec_info*, int, unsigned int*, vec<stmt_info_for_cost, va_heap, vl_ptr>*) ../../gcc/tree-vect-stmts.c:1123 0x1510da0 vect_model_store_cost ../../gcc/tree-vect-stmts.c:1057 0x152200b vectorizable_store ../../gcc/tree-vect-stmts.c:6396 0x152cbf2 vect_analyze_stmt(gimple*, bool*, _slp_tree*, _slp_instance*, vec<stmt_info_for_cost, va_heap, vl_ptr>*) ../../gcc/tree-vect-stmts.c:9550 0x153a01f vect_analyze_loop_operations ../../gcc/tree-vect-loop.c:1655 0x153ad28 vect_analyze_loop_2 ../../gcc/tree-vect-loop.c:2050 0x153bd78 vect_analyze_loop(loop*, _loop_vec_info*) ../../gcc/tree-vect-loop.c:2343 0x157062a vectorize_loops() ../../gcc/tree-vectorizer.c:758 0x14196b5 execute ../../gcc/tree-ssa-loop.c:414 Note, r5-370 would ICE with it even with just -O3 -fopenmp -m32 -mcpu=power7 or -O2 -fopenmp -m32 -mcpu=power7 -fvect-cost-model=unlimited.
next prev parent reply other threads:[~2021-10-18 10:05 UTC|newest] Thread overview: 17+ messages / expand[flat|nested] mbox.gz Atom feed top 2021-10-15 20:09 [Bug libgomp/102789] New: [12 regression] libgomp.c++/simd-3.C fails after r12-xxxx " seurer at gcc dot gnu.org 2021-10-15 20:10 ` [Bug libgomp/102789] " seurer at gcc dot gnu.org 2021-10-18 6:29 ` [Bug libgomp/102789] [12 regression] libgomp.c++/simd-3.C fails after r12-4340 " rguenth at gcc dot gnu.org 2021-10-18 10:05 ` jakub at gcc dot gnu.org [this message] 2021-10-18 10:20 ` [Bug target/102789] " jakub at gcc dot gnu.org 2021-10-19 6:51 ` linkw at gcc dot gnu.org 2021-10-20 6:24 ` linkw at gcc dot gnu.org 2021-10-25 3:08 ` linkw at gcc dot gnu.org 2021-10-26 3:18 ` cvs-commit at gcc dot gnu.org 2021-10-26 3:21 ` [Bug tree-optimization/102789] " linkw at gcc dot gnu.org 2021-10-26 3:28 ` linkw at gcc dot gnu.org 2021-11-05 13:57 ` [Bug tree-optimization/102789] " rguenth at gcc dot gnu.org 2021-11-05 14:00 ` jakub at gcc dot gnu.org 2021-11-08 5:31 ` cvs-commit at gcc dot gnu.org 2021-11-08 5:33 ` cvs-commit at gcc dot gnu.org 2021-11-08 5:34 ` cvs-commit at gcc dot gnu.org 2021-11-08 5:36 ` linkw at gcc dot gnu.org
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=bug-102789-4-MAhHDNFZUa@http.gcc.gnu.org/bugzilla/ \ --to=gcc-bugzilla@gcc.gnu.org \ --cc=gcc-bugs@gcc.gnu.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).