public inbox for gcc-bugs@sourceware.org help / color / mirror / Atom feed
From: "hubicka at gcc dot gnu.org" <gcc-bugzilla@gcc.gnu.org> To: gcc-bugs@gcc.gnu.org Subject: [Bug target/109812] GraphicsMagick resize is a lot slower in GCC 13.1 vs Clang 16 on Intel Raptor Lake Date: Sun, 28 May 2023 18:11:02 +0000 [thread overview] Message-ID: <bug-109812-4-AAH1RonwxN@http.gcc.gnu.org/bugzilla/> (raw) In-Reply-To: <bug-109812-4@http.gcc.gnu.org/bugzilla/> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109812 --- Comment #9 from Jan Hubicka <hubicka at gcc dot gnu.org> --- Oddly enough simplified version of the loop SLP vectorizes for me: struct rgb {unsigned char r,g,b;} *rgbs; int *addr; double *weights; struct drgb {double r,g,b;}; struct drgb sum() { struct drgb r; for (int i = 0; i < 100000; i++) { int j = addr[i]; double w = weights[i]; r.r += rgbs[j].r * w; r.g += rgbs[j].g * w; r.b += rgbs[j].b * w; } return r; } I get: L2: movslq (%r9,%rdx,4), %rax vmovsd (%r8,%rdx,8), %xmm1 incq %rdx leaq (%rax,%rax,2), %rax addq %rsi, %rax movzbl (%rax), %ecx vmovddup %xmm1, %xmm4 vmovd %ecx, %xmm0 movzbl 1(%rax), %ecx movzbl 2(%rax), %eax vpinsrd $1, %ecx, %xmm0, %xmm0 vcvtdq2pd %xmm0, %xmm0 vfmadd231pd %xmm4, %xmm0, %xmm2 vcvtsi2sdl %eax, %xmm5, %xmm0 vfmadd231sd %xmm1, %xmm0, %xmm3 cmpq $100000, %rdx jne .L2 I think the actual loop is: <bb 53> [local count: 44202554]: _106 = _262->pixel; _109 = *source_231(D).columns; <bb 54> [local count: 401841405]: # pixel$green_332 = PHI <_124(89), pixel$green_265(53)> # i_357 = PHI <i_298(89), 0(53)> # pixel$red_371 = PHI <_119(89), pixel$red_263(53)> # pixel$blue_377 = PHI <_129(89), pixel$blue_267(53)> i.51_102 = (long unsigned int) i_357; _103 = i.51_102 * 16; _104 = _262 + _103; _105 = _104->pixel; _107 = _105 - _106; _108 = (long unsigned int) _107; _110 = _108 * _109; _112 = _110 + _621; weight_297 = _104->weight; _113 = _112 * 4; _114 = _276 + _113; _115 = _114->red; _116 = (int) _115; _117 = (double) _116; _118 = _117 * weight_297; _119 = _118 + pixel$red_371; _120 = _114->green; _121 = (int) _120; _122 = (double) _121; _123 = _122 * weight_297; _124 = _123 + pixel$green_332; _125 = _114->blue; _126 = (int) _125; _127 = (double) _126; _128 = _127 * weight_297; _129 = _128 + pixel$blue_377; i_298 = i_357 + 1; if (n_195 > i_298) goto <bb 89>; [89.00%] else goto <bb 118>; [11.00%] <bb 118> [local count: 44202554]: # _607 = PHI <_124(54)> # _606 = PHI <_119(54)> # _605 = PHI <_129(54)> goto <bb 55>; [100.00%] <bb 89> [local count: 357638851]: goto <bb 54>; [100.00%] and SLP vectorizer seems to claim: ../magick/resize.c:1284:52: note: _125 = _114->blue; ../magick/resize.c:1284:52: note: _120 = _114->green; ../magick/resize.c:1284:52: note: _115 = _114->red; ../magick/resize.c:1284:52: missed: not consecutive access weight_297 = _104->weight; ../magick/resize.c:1284:52: missed: not consecutive access _105 = _104->pixel; ../magick/resize.c:1284:52: missed: not consecutive access _134->red = iftmp.57_207; ../magick/resize.c:1284:52: missed: not consecutive access _134->green = iftmp.60_208; ../magick/resize.c:1284:52: missed: not consecutive access _134->blue = iftmp.63_209; ../magick/resize.c:1284:52: missed: not consecutive access _134->opacity = 0; ../magick/resize.c:1284:52: missed: not consecutive access _63 = *source_231(D).columns; ../magick/resize.c:1284:52: missed: not consecutive access _60 = _262->pixel; Not sure if that is related to the real testcase: struct rgb {unsigned char r,g,b;} *rgbs; int *addr; double *weights; struct drgb {double r,g,b,o;}; struct drgb sum() { struct drgb r; for (int i = 0; i < 100000; i++) { int j = addr[i]; double w = weights[i]; r.r += rgbs[j].r * w; r.g += rgbs[j].g * w; r.b += rgbs[j].b * w; } return r; } make us to miss the vectorization even though there is nothing using drgb->o: sum: .LFB0: .cfi_startproc movq %rdi, %r8 movq weights(%rip), %rsi movq addr(%rip), %rdi vxorps %xmm2, %xmm2, %xmm2 movq rgbs(%rip), %rcx xorl %edx, %edx .p2align 4 .p2align 3 .L2: movslq (%rdi,%rdx,4), %rax vmovsd (%rsi,%rdx,8), %xmm0 incq %rdx leaq (%rax,%rax,2), %rax addq %rcx, %rax movzbl (%rax), %r9d vcvtsi2sdl %r9d, %xmm2, %xmm1 movzbl 1(%rax), %r9d movzbl 2(%rax), %eax vfmadd231sd %xmm0, %xmm1, %xmm3 vcvtsi2sdl %r9d, %xmm2, %xmm1 vfmadd231sd %xmm0, %xmm1, %xmm5 vcvtsi2sdl %eax, %xmm2, %xmm1 vfmadd231sd %xmm0, %xmm1, %xmm4 cmpq $100000, %rdx jne .L2 vmovq %xmm4, %xmm4 vunpcklpd %xmm5, %xmm3, %xmm0 movq %r8, %rax vinsertf128 $0x1, %xmm4, %ymm0, %ymm0 vmovupd %ymm0, (%r8) vzeroupper ret
next prev parent reply other threads:[~2023-05-28 18:11 UTC|newest] Thread overview: 23+ messages / expand[flat|nested] mbox.gz Atom feed top 2023-05-11 14:25 [Bug tree-optimization/109812] New: GraphicsMagick resize is a lot slower in GCC 13.1 vs Clang 16 aros at gmx dot com 2023-05-11 14:26 ` [Bug tree-optimization/109812] " aros at gmx dot com 2023-05-11 15:20 ` [Bug target/109812] " pinskia at gcc dot gnu.org 2023-05-11 15:50 ` aros at gmx dot com 2023-05-12 8:47 ` aros at gmx dot com 2023-05-16 22:43 ` juzhe.zhong at rivai dot ai 2023-05-17 0:08 ` sjames at gcc dot gnu.org 2023-05-28 16:46 ` hubicka at gcc dot gnu.org 2023-05-28 17:29 ` [Bug target/109812] GraphicsMagick resize is a lot slower in GCC 13.1 vs Clang 16 on Intel Raptor Lake hubicka at gcc dot gnu.org 2023-05-28 17:39 ` hubicka at gcc dot gnu.org 2023-05-28 18:11 ` hubicka at gcc dot gnu.org [this message] 2023-05-28 18:50 ` hubicka at gcc dot gnu.org 2023-05-30 0:05 ` zhangjungcc at gmail dot com 2023-05-31 12:42 ` hubicka at ucw dot cz 2023-05-31 16:11 ` hubicka at gcc dot gnu.org 2023-05-31 16:52 ` jamborm at gcc dot gnu.org 2023-06-01 9:38 ` jamborm at gcc dot gnu.org 2023-06-01 11:19 ` jakub at gcc dot gnu.org 2023-06-01 12:28 ` hubicka at gcc dot gnu.org 2023-06-21 9:46 ` ubizjak at gmail dot com 2023-10-12 4:48 ` cvs-commit at gcc dot gnu.org 2023-11-24 23:38 ` hubicka at gcc dot gnu.org 2023-11-25 10:21 ` liuhongt at gcc dot gnu.org
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=bug-109812-4-AAH1RonwxN@http.gcc.gnu.org/bugzilla/ \ --to=gcc-bugzilla@gcc.gnu.org \ --cc=gcc-bugs@gcc.gnu.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).