public inbox for gcc-bugs@sourceware.org help / color / mirror / Atom feed
From: "amker at gcc dot gnu.org" <gcc-bugzilla@gcc.gnu.org> To: gcc-bugs@gcc.gnu.org Subject: [Bug tree-optimization/66003] New: missed cse opportunity in addr expressions because of tree pre/lim Date: Mon, 04 May 2015 08:32:00 -0000 [thread overview] Message-ID: <bug-66003-4@http.gcc.gnu.org/bugzilla/> (raw) https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66003 Bug ID: 66003 Summary: missed cse opportunity in addr expressions because of tree pre/lim Product: gcc Version: 6.0 Status: UNCONFIRMED Severity: normal Priority: P3 Component: tree-optimization Assignee: unassigned at gcc dot gnu.org Reporter: amker at gcc dot gnu.org Target Milestone: --- Below simple case is reduced from spec, typedef struct { int x; int y; } coord; extern unsigned short **org; extern coord *c; void bar (unsigned short *ptr); void foo (int s, int n) { unsigned short arr[256], *ptr = arr; int x, y; for (y = c->y; y < c->y + 16; y++) for (x = c->x; x < c->x + 16; x++) *ptr++ = org [y][x]; bar (ptr); } When compiling with below two command lines A: $gcc -Ofast -S test.c -o x.S B: $gcc -Ofast -S test.c -o y.S -fno-tree-pre -fno-tree-loop-im The assembly difference is as below: $ diff x.S y.S 12,14c12,34 < subq $520, %rsp < .cfi_def_cfa_offset 528 < movq c(%rip), %rdx --- > pushq %r15 > .cfi_def_cfa_offset 16 > .cfi_offset 15, -16 > pushq %r14 > .cfi_def_cfa_offset 24 > .cfi_offset 14, -24 > pushq %r13 > .cfi_def_cfa_offset 32 > .cfi_offset 13, -32 > pushq %r12 > .cfi_def_cfa_offset 40 > .cfi_offset 12, -40 > pushq %rbp > .cfi_def_cfa_offset 48 > .cfi_offset 6, -48 > pushq %rbx > .cfi_def_cfa_offset 56 > .cfi_offset 3, -56 > subq $568, %rsp > .cfi_def_cfa_offset 624 > movq c(%rip), %rax > movslq (%rax), %rsi > movslq 4(%rax), %rdx 16,20c36,58 < movslq 4(%rdx), %rcx < leaq (%rax,%rcx,8), %rsi < movslq (%rdx), %rcx < movq %rsp, %rax < addq %rcx, %rcx --- > addq %rsi, %rsi > leaq 24(%rsi), %rcx > leaq 22(%rsi), %rdi > leaq 2(%rsi), %r15 > leaq 4(%rsi), %r14 > leaq 6(%rsi), %r13 > leaq 8(%rsi), %r12 > movq %rcx, 8(%rsp) > leaq 26(%rsi), %rcx > leaq 10(%rsi), %rbp > leaq 12(%rsi), %rbx > leaq 14(%rsi), %r11 > leaq 16(%rsi), %r10 > movq %rcx, 16(%rsp) > leaq 28(%rsi), %rcx > leaq 18(%rsi), %r9 > leaq 20(%rsi), %r8 > movq %rdi, 40(%rsp) > movq %rcx, 24(%rsp) > leaq 30(%rsi), %rcx > movq %rcx, 32(%rsp) > leaq (%rax,%rdx,8), %rcx > leaq 48(%rsp), %rax 24c62 < movq (%rsi), %rdx --- > movq (%rcx), %rdx 26,27c64,65 < addq $8, %rsi < movzwl (%rdx,%rcx), %edi --- > addq $8, %rcx > movzwl (%rdx,%rsi), %edi 29c67 < movzwl 2(%rdx,%rcx), %edi --- > movzwl (%rdx,%r15), %edi 31c69 < movzwl 4(%rdx,%rcx), %edi --- > movzwl (%rdx,%r14), %edi 33c71 < movzwl 6(%rdx,%rcx), %edi --- > movzwl (%rdx,%r13), %edi 35c73 < movzwl 8(%rdx,%rcx), %edi --- > movzwl (%rdx,%r12), %edi 37c75 < movzwl 10(%rdx,%rcx), %edi --- > movzwl (%rdx,%rbp), %edi 39c77 < movzwl 12(%rdx,%rcx), %edi --- > movzwl (%rdx,%rbx), %edi 41c79 < movzwl 14(%rdx,%rcx), %edi --- > movzwl (%rdx,%r11), %edi 43c81 < movzwl 16(%rdx,%rcx), %edi --- > movzwl (%rdx,%r10), %edi 45c83 < movzwl 18(%rdx,%rcx), %edi --- > movzwl (%rdx,%r9), %edi 47c85 < movzwl 20(%rdx,%rcx), %edi --- > movzwl (%rdx,%r8), %edi 49c87,88 < movzwl 22(%rdx,%rcx), %edi --- > movq 40(%rsp), %rdi > movzwl (%rdx,%rdi), %edi 51c90,91 < movzwl 24(%rdx,%rcx), %edi --- > movq 8(%rsp), %rdi > movzwl (%rdx,%rdi), %edi 53c93,94 < movzwl 26(%rdx,%rcx), %edi --- > movq 16(%rsp), %rdi > movzwl (%rdx,%rdi), %edi 55c96,97 < movzwl 28(%rdx,%rcx), %edi --- > movq 24(%rsp), %rdi > movzwl (%rdx,%rdi), %edi 57c99,100 < movzwl 30(%rdx,%rcx), %edx --- > movq 32(%rsp), %rdi > movzwl (%rdx,%rdi), %edx 59c102 < leaq 512(%rsp), %rdx --- > leaq 560(%rsp), %rdx 64c107,119 < addq $520, %rsp --- > addq $568, %rsp > .cfi_def_cfa_offset 56 > popq %rbx > .cfi_def_cfa_offset 48 > popq %rbp > .cfi_def_cfa_offset 40 > popq %r12 > .cfi_def_cfa_offset 32 > popq %r13 > .cfi_def_cfa_offset 24 > popq %r14 > .cfi_def_cfa_offset 16 > popq %r15 The tree-pre dump is as below: <bb 2>: c.0_8 = c; y_9 = c.0_8->y; _47 = y_9 + 15; pretmp_112 = c.0_8->x; pretmp_128 = org; pretmp_144 = (long unsigned int) pretmp_112; pretmp_159 = pretmp_144 * 2; pretmp_160 = pretmp_112 + 1; pretmp_175 = (long unsigned int) pretmp_160; pretmp_176 = pretmp_175 * 2; pretmp_191 = pretmp_112 + 2; pretmp_192 = (long unsigned int) pretmp_191; pretmp_207 = pretmp_192 * 2; pretmp_208 = pretmp_112 + 3; pretmp_223 = (long unsigned int) pretmp_208; pretmp_224 = pretmp_223 * 2; pretmp_239 = pretmp_112 + 4; pretmp_240 = (long unsigned int) pretmp_239; pretmp_255 = pretmp_240 * 2; pretmp_256 = pretmp_112 + 5; pretmp_271 = (long unsigned int) pretmp_256; pretmp_283 = pretmp_271 * 2; pretmp_12 = pretmp_112 + 6; pretmp_50 = (long unsigned int) pretmp_12; pretmp_51 = pretmp_50 * 2; pretmp_52 = pretmp_112 + 7; pretmp_53 = (long unsigned int) pretmp_52; pretmp_65 = pretmp_53 * 2; pretmp_66 = pretmp_112 + 8; pretmp_67 = (long unsigned int) pretmp_66; pretmp_68 = pretmp_67 * 2; pretmp_69 = pretmp_112 + 9; pretmp_81 = (long unsigned int) pretmp_69; pretmp_82 = pretmp_81 * 2; pretmp_83 = pretmp_112 + 10; pretmp_84 = (long unsigned int) pretmp_83; pretmp_85 = pretmp_84 * 2; pretmp_97 = pretmp_112 + 11; pretmp_98 = (long unsigned int) pretmp_97; pretmp_99 = pretmp_98 * 2; pretmp_100 = pretmp_112 + 12; pretmp_101 = (long unsigned int) pretmp_100; pretmp_113 = pretmp_101 * 2; pretmp_114 = pretmp_112 + 13; pretmp_115 = (long unsigned int) pretmp_114; pretmp_116 = pretmp_115 * 2; pretmp_117 = pretmp_112 + 14; pretmp_129 = (long unsigned int) pretmp_117; pretmp_130 = pretmp_129 * 2; pretmp_131 = pretmp_112 + 15; pretmp_132 = (long unsigned int) pretmp_131; pretmp_133 = pretmp_132 * 2; <bb 3>: # ptr_48 = PHI <&arr(2), ptr_272(3)> # y_64 = PHI <y_9(2), y_25(3)> _34 = (long unsigned int) y_64; _35 = _34 * 8; _36 = pretmp_128 + _35; _37 = *_36; _40 = _37 + pretmp_159; _41 = *_40; *ptr_48 = _41; _56 = _37 + pretmp_176; _57 = *_56; MEM[(short unsigned int *)ptr_48 + 2B] = _57; _72 = _37 + pretmp_207; _73 = *_72; MEM[(short unsigned int *)ptr_48 + 4B] = _73; _88 = _37 + pretmp_224; _89 = *_88; MEM[(short unsigned int *)ptr_48 + 6B] = _89; _104 = _37 + pretmp_255; _105 = *_104; MEM[(short unsigned int *)ptr_48 + 8B] = _105; _120 = _37 + pretmp_283; _121 = *_120; MEM[(short unsigned int *)ptr_48 + 10B] = _121; _136 = _37 + pretmp_51; _137 = *_136; MEM[(short unsigned int *)ptr_48 + 12B] = _137; _152 = _37 + pretmp_65; _153 = *_152; MEM[(short unsigned int *)ptr_48 + 14B] = _153; _168 = _37 + pretmp_68; _169 = *_168; MEM[(short unsigned int *)ptr_48 + 16B] = _169; _184 = _37 + pretmp_82; _185 = *_184; MEM[(short unsigned int *)ptr_48 + 18B] = _185; _200 = _37 + pretmp_85; _201 = *_200; MEM[(short unsigned int *)ptr_48 + 20B] = _201; _216 = _37 + pretmp_99; _217 = *_216; MEM[(short unsigned int *)ptr_48 + 22B] = _217; _232 = _37 + pretmp_113; _233 = *_232; MEM[(short unsigned int *)ptr_48 + 24B] = _233; _248 = _37 + pretmp_116; _249 = *_248; MEM[(short unsigned int *)ptr_48 + 26B] = _249; _264 = _37 + pretmp_130; _265 = *_264; MEM[(short unsigned int *)ptr_48 + 28B] = _265; ptr_272 = &MEM[(void *)ptr_48 + 32B]; _280 = _37 + pretmp_133; _281 = *_280; MEM[(short unsigned int *)ptr_48 + 30B] = _281; y_25 = y_64 + 1; if (y_25 > _47) goto <bb 4>; else goto <bb 3>; Pre hoist the index part of addr expression "base + (reg + i) *2" out of first loop. This introduces higher register pressure, prevents gcc from using powerful addressing expression on x86. On other targets like arm, only register pressure issue may hold. Both pre and lim will do same transformation. >From gcc-bugs-return-485330-listarch-gcc-bugs=gcc.gnu.org@gcc.gnu.org Mon May 04 08:37:45 2015 Return-Path: <gcc-bugs-return-485330-listarch-gcc-bugs=gcc.gnu.org@gcc.gnu.org> Delivered-To: listarch-gcc-bugs@gcc.gnu.org Received: (qmail 55774 invoked by alias); 4 May 2015 08:37:45 -0000 Mailing-List: contact gcc-bugs-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: <gcc-bugs.gcc.gnu.org> List-Archive: <http://gcc.gnu.org/ml/gcc-bugs/> List-Post: <mailto:gcc-bugs@gcc.gnu.org> List-Help: <mailto:gcc-bugs-help@gcc.gnu.org> Sender: gcc-bugs-owner@gcc.gnu.org Delivered-To: mailing list gcc-bugs@gcc.gnu.org Received: (qmail 55724 invoked by uid 48); 4 May 2015 08:37:41 -0000 From: "tschwinge at gcc dot gnu.org" <gcc-bugzilla@gcc.gnu.org> To: gcc-bugs@gcc.gnu.org Subject: [Bug libgomp/65993] [6 Regression] Numerous libgomp.oacc failures seen in r222712 Date: Mon, 04 May 2015 08:37:00 -0000 X-Bugzilla-Reason: CC X-Bugzilla-Type: changed X-Bugzilla-Watch-Reason: None X-Bugzilla-Product: gcc X-Bugzilla-Component: libgomp X-Bugzilla-Version: 5.0 X-Bugzilla-Keywords: openacc X-Bugzilla-Severity: normal X-Bugzilla-Who: tschwinge at gcc dot gnu.org X-Bugzilla-Status: ASSIGNED X-Bugzilla-Resolution: X-Bugzilla-Priority: P3 X-Bugzilla-Assigned-To: tschwinge at gcc dot gnu.org X-Bugzilla-Target-Milestone: --- X-Bugzilla-Flags: X-Bugzilla-Changed-Fields: bug_status cf_reconfirmed_on assigned_to everconfirmed Message-ID: <bug-65993-4-hry9aranJf@http.gcc.gnu.org/bugzilla/> In-Reply-To: <bug-65993-4@http.gcc.gnu.org/bugzilla/> References: <bug-65993-4@http.gcc.gnu.org/bugzilla/> Content-Type: text/plain; charset="UTF-8" Content-Transfer-Encoding: 7bit X-Bugzilla-URL: http://gcc.gnu.org/bugzilla/ Auto-Submitted: auto-generated MIME-Version: 1.0 X-SW-Source: 2015-05/txt/msg00170.txt.bz2 Content-length: 682 https://gcc.gnu.org/bugzilla/show_bug.cgi?ide993 Thomas Schwinge <tschwinge at gcc dot gnu.org> changed: What |Removed |Added ---------------------------------------------------------------------------- Status|UNCONFIRMED |ASSIGNED Last reconfirmed| |2015-05-04 Assignee|unassigned at gcc dot gnu.org |tschwinge at gcc dot gnu.org Ever confirmed|0 |1 --- Comment #2 from Thomas Schwinge <tschwinge at gcc dot gnu.org> --- Patch posted: <http://news.gmane.org/find-root.php?message_id=%3C87pp6gvj3v.fsf%40kepler.schwinge.homeip.net%3E>.
next reply other threads:[~2015-05-04 8:32 UTC|newest] Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top 2015-05-04 8:32 amker at gcc dot gnu.org [this message] 2015-05-04 11:41 ` [Bug tree-optimization/66003] " rguenth at gcc dot gnu.org 2015-05-05 1:31 ` amker at gcc dot gnu.org 2015-05-05 1:48 ` pinskia at gcc dot gnu.org 2024-03-18 6:46 ` pinskia at gcc dot gnu.org
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=bug-66003-4@http.gcc.gnu.org/bugzilla/ \ --to=gcc-bugzilla@gcc.gnu.org \ --cc=gcc-bugs@gcc.gnu.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).