public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed
* [Bug tree-optimization/66003] New: missed cse opportunity in addr expressions because of tree pre/lim
@ 2015-05-04 8:32 amker at gcc dot gnu.org
2015-05-04 11:41 ` [Bug tree-optimization/66003] " rguenth at gcc dot gnu.org
` (3 more replies)
0 siblings, 4 replies; 5+ messages in thread
From: amker at gcc dot gnu.org @ 2015-05-04 8:32 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66003
Bug ID: 66003
Summary: missed cse opportunity in addr expressions because of
tree pre/lim
Product: gcc
Version: 6.0
Status: UNCONFIRMED
Severity: normal
Priority: P3
Component: tree-optimization
Assignee: unassigned at gcc dot gnu.org
Reporter: amker at gcc dot gnu.org
Target Milestone: ---
Below simple case is reduced from spec,
typedef struct
{
int x;
int y;
} coord;
extern unsigned short **org;
extern coord *c;
void bar (unsigned short *ptr);
void foo (int s, int n)
{
unsigned short arr[256], *ptr = arr;
int x, y;
for (y = c->y; y < c->y + 16; y++)
for (x = c->x; x < c->x + 16; x++)
*ptr++ = org [y][x];
bar (ptr);
}
When compiling with below two command lines
A: $gcc -Ofast -S test.c -o x.S
B: $gcc -Ofast -S test.c -o y.S -fno-tree-pre -fno-tree-loop-im
The assembly difference is as below:
$ diff x.S y.S
12,14c12,34
< subq $520, %rsp
< .cfi_def_cfa_offset 528
< movq c(%rip), %rdx
---
> pushq %r15
> .cfi_def_cfa_offset 16
> .cfi_offset 15, -16
> pushq %r14
> .cfi_def_cfa_offset 24
> .cfi_offset 14, -24
> pushq %r13
> .cfi_def_cfa_offset 32
> .cfi_offset 13, -32
> pushq %r12
> .cfi_def_cfa_offset 40
> .cfi_offset 12, -40
> pushq %rbp
> .cfi_def_cfa_offset 48
> .cfi_offset 6, -48
> pushq %rbx
> .cfi_def_cfa_offset 56
> .cfi_offset 3, -56
> subq $568, %rsp
> .cfi_def_cfa_offset 624
> movq c(%rip), %rax
> movslq (%rax), %rsi
> movslq 4(%rax), %rdx
16,20c36,58
< movslq 4(%rdx), %rcx
< leaq (%rax,%rcx,8), %rsi
< movslq (%rdx), %rcx
< movq %rsp, %rax
< addq %rcx, %rcx
---
> addq %rsi, %rsi
> leaq 24(%rsi), %rcx
> leaq 22(%rsi), %rdi
> leaq 2(%rsi), %r15
> leaq 4(%rsi), %r14
> leaq 6(%rsi), %r13
> leaq 8(%rsi), %r12
> movq %rcx, 8(%rsp)
> leaq 26(%rsi), %rcx
> leaq 10(%rsi), %rbp
> leaq 12(%rsi), %rbx
> leaq 14(%rsi), %r11
> leaq 16(%rsi), %r10
> movq %rcx, 16(%rsp)
> leaq 28(%rsi), %rcx
> leaq 18(%rsi), %r9
> leaq 20(%rsi), %r8
> movq %rdi, 40(%rsp)
> movq %rcx, 24(%rsp)
> leaq 30(%rsi), %rcx
> movq %rcx, 32(%rsp)
> leaq (%rax,%rdx,8), %rcx
> leaq 48(%rsp), %rax
24c62
< movq (%rsi), %rdx
---
> movq (%rcx), %rdx
26,27c64,65
< addq $8, %rsi
< movzwl (%rdx,%rcx), %edi
---
> addq $8, %rcx
> movzwl (%rdx,%rsi), %edi
29c67
< movzwl 2(%rdx,%rcx), %edi
---
> movzwl (%rdx,%r15), %edi
31c69
< movzwl 4(%rdx,%rcx), %edi
---
> movzwl (%rdx,%r14), %edi
33c71
< movzwl 6(%rdx,%rcx), %edi
---
> movzwl (%rdx,%r13), %edi
35c73
< movzwl 8(%rdx,%rcx), %edi
---
> movzwl (%rdx,%r12), %edi
37c75
< movzwl 10(%rdx,%rcx), %edi
---
> movzwl (%rdx,%rbp), %edi
39c77
< movzwl 12(%rdx,%rcx), %edi
---
> movzwl (%rdx,%rbx), %edi
41c79
< movzwl 14(%rdx,%rcx), %edi
---
> movzwl (%rdx,%r11), %edi
43c81
< movzwl 16(%rdx,%rcx), %edi
---
> movzwl (%rdx,%r10), %edi
45c83
< movzwl 18(%rdx,%rcx), %edi
---
> movzwl (%rdx,%r9), %edi
47c85
< movzwl 20(%rdx,%rcx), %edi
---
> movzwl (%rdx,%r8), %edi
49c87,88
< movzwl 22(%rdx,%rcx), %edi
---
> movq 40(%rsp), %rdi
> movzwl (%rdx,%rdi), %edi
51c90,91
< movzwl 24(%rdx,%rcx), %edi
---
> movq 8(%rsp), %rdi
> movzwl (%rdx,%rdi), %edi
53c93,94
< movzwl 26(%rdx,%rcx), %edi
---
> movq 16(%rsp), %rdi
> movzwl (%rdx,%rdi), %edi
55c96,97
< movzwl 28(%rdx,%rcx), %edi
---
> movq 24(%rsp), %rdi
> movzwl (%rdx,%rdi), %edi
57c99,100
< movzwl 30(%rdx,%rcx), %edx
---
> movq 32(%rsp), %rdi
> movzwl (%rdx,%rdi), %edx
59c102
< leaq 512(%rsp), %rdx
---
> leaq 560(%rsp), %rdx
64c107,119
< addq $520, %rsp
---
> addq $568, %rsp
> .cfi_def_cfa_offset 56
> popq %rbx
> .cfi_def_cfa_offset 48
> popq %rbp
> .cfi_def_cfa_offset 40
> popq %r12
> .cfi_def_cfa_offset 32
> popq %r13
> .cfi_def_cfa_offset 24
> popq %r14
> .cfi_def_cfa_offset 16
> popq %r15
The tree-pre dump is as below:
<bb 2>:
c.0_8 = c;
y_9 = c.0_8->y;
_47 = y_9 + 15;
pretmp_112 = c.0_8->x;
pretmp_128 = org;
pretmp_144 = (long unsigned int) pretmp_112;
pretmp_159 = pretmp_144 * 2;
pretmp_160 = pretmp_112 + 1;
pretmp_175 = (long unsigned int) pretmp_160;
pretmp_176 = pretmp_175 * 2;
pretmp_191 = pretmp_112 + 2;
pretmp_192 = (long unsigned int) pretmp_191;
pretmp_207 = pretmp_192 * 2;
pretmp_208 = pretmp_112 + 3;
pretmp_223 = (long unsigned int) pretmp_208;
pretmp_224 = pretmp_223 * 2;
pretmp_239 = pretmp_112 + 4;
pretmp_240 = (long unsigned int) pretmp_239;
pretmp_255 = pretmp_240 * 2;
pretmp_256 = pretmp_112 + 5;
pretmp_271 = (long unsigned int) pretmp_256;
pretmp_283 = pretmp_271 * 2;
pretmp_12 = pretmp_112 + 6;
pretmp_50 = (long unsigned int) pretmp_12;
pretmp_51 = pretmp_50 * 2;
pretmp_52 = pretmp_112 + 7;
pretmp_53 = (long unsigned int) pretmp_52;
pretmp_65 = pretmp_53 * 2;
pretmp_66 = pretmp_112 + 8;
pretmp_67 = (long unsigned int) pretmp_66;
pretmp_68 = pretmp_67 * 2;
pretmp_69 = pretmp_112 + 9;
pretmp_81 = (long unsigned int) pretmp_69;
pretmp_82 = pretmp_81 * 2;
pretmp_83 = pretmp_112 + 10;
pretmp_84 = (long unsigned int) pretmp_83;
pretmp_85 = pretmp_84 * 2;
pretmp_97 = pretmp_112 + 11;
pretmp_98 = (long unsigned int) pretmp_97;
pretmp_99 = pretmp_98 * 2;
pretmp_100 = pretmp_112 + 12;
pretmp_101 = (long unsigned int) pretmp_100;
pretmp_113 = pretmp_101 * 2;
pretmp_114 = pretmp_112 + 13;
pretmp_115 = (long unsigned int) pretmp_114;
pretmp_116 = pretmp_115 * 2;
pretmp_117 = pretmp_112 + 14;
pretmp_129 = (long unsigned int) pretmp_117;
pretmp_130 = pretmp_129 * 2;
pretmp_131 = pretmp_112 + 15;
pretmp_132 = (long unsigned int) pretmp_131;
pretmp_133 = pretmp_132 * 2;
<bb 3>:
# ptr_48 = PHI <&arr(2), ptr_272(3)>
# y_64 = PHI <y_9(2), y_25(3)>
_34 = (long unsigned int) y_64;
_35 = _34 * 8;
_36 = pretmp_128 + _35;
_37 = *_36;
_40 = _37 + pretmp_159;
_41 = *_40;
*ptr_48 = _41;
_56 = _37 + pretmp_176;
_57 = *_56;
MEM[(short unsigned int *)ptr_48 + 2B] = _57;
_72 = _37 + pretmp_207;
_73 = *_72;
MEM[(short unsigned int *)ptr_48 + 4B] = _73;
_88 = _37 + pretmp_224;
_89 = *_88;
MEM[(short unsigned int *)ptr_48 + 6B] = _89;
_104 = _37 + pretmp_255;
_105 = *_104;
MEM[(short unsigned int *)ptr_48 + 8B] = _105;
_120 = _37 + pretmp_283;
_121 = *_120;
MEM[(short unsigned int *)ptr_48 + 10B] = _121;
_136 = _37 + pretmp_51;
_137 = *_136;
MEM[(short unsigned int *)ptr_48 + 12B] = _137;
_152 = _37 + pretmp_65;
_153 = *_152;
MEM[(short unsigned int *)ptr_48 + 14B] = _153;
_168 = _37 + pretmp_68;
_169 = *_168;
MEM[(short unsigned int *)ptr_48 + 16B] = _169;
_184 = _37 + pretmp_82;
_185 = *_184;
MEM[(short unsigned int *)ptr_48 + 18B] = _185;
_200 = _37 + pretmp_85;
_201 = *_200;
MEM[(short unsigned int *)ptr_48 + 20B] = _201;
_216 = _37 + pretmp_99;
_217 = *_216;
MEM[(short unsigned int *)ptr_48 + 22B] = _217;
_232 = _37 + pretmp_113;
_233 = *_232;
MEM[(short unsigned int *)ptr_48 + 24B] = _233;
_248 = _37 + pretmp_116;
_249 = *_248;
MEM[(short unsigned int *)ptr_48 + 26B] = _249;
_264 = _37 + pretmp_130;
_265 = *_264;
MEM[(short unsigned int *)ptr_48 + 28B] = _265;
ptr_272 = &MEM[(void *)ptr_48 + 32B];
_280 = _37 + pretmp_133;
_281 = *_280;
MEM[(short unsigned int *)ptr_48 + 30B] = _281;
y_25 = y_64 + 1;
if (y_25 > _47)
goto <bb 4>;
else
goto <bb 3>;
Pre hoist the index part of addr expression "base + (reg + i) *2" out of first
loop. This introduces higher register pressure, prevents gcc from using
powerful addressing expression on x86.
On other targets like arm, only register pressure issue may hold.
Both pre and lim will do same transformation.
>From gcc-bugs-return-485330-listarch-gcc-bugs=gcc.gnu.org@gcc.gnu.org Mon May 04 08:37:45 2015
Return-Path: <gcc-bugs-return-485330-listarch-gcc-bugs=gcc.gnu.org@gcc.gnu.org>
Delivered-To: listarch-gcc-bugs@gcc.gnu.org
Received: (qmail 55774 invoked by alias); 4 May 2015 08:37:45 -0000
Mailing-List: contact gcc-bugs-help@gcc.gnu.org; run by ezmlm
Precedence: bulk
List-Id: <gcc-bugs.gcc.gnu.org>
List-Archive: <http://gcc.gnu.org/ml/gcc-bugs/>
List-Post: <mailto:gcc-bugs@gcc.gnu.org>
List-Help: <mailto:gcc-bugs-help@gcc.gnu.org>
Sender: gcc-bugs-owner@gcc.gnu.org
Delivered-To: mailing list gcc-bugs@gcc.gnu.org
Received: (qmail 55724 invoked by uid 48); 4 May 2015 08:37:41 -0000
From: "tschwinge at gcc dot gnu.org" <gcc-bugzilla@gcc.gnu.org>
To: gcc-bugs@gcc.gnu.org
Subject: [Bug libgomp/65993] [6 Regression] Numerous libgomp.oacc failures seen in r222712
Date: Mon, 04 May 2015 08:37:00 -0000
X-Bugzilla-Reason: CC
X-Bugzilla-Type: changed
X-Bugzilla-Watch-Reason: None
X-Bugzilla-Product: gcc
X-Bugzilla-Component: libgomp
X-Bugzilla-Version: 5.0
X-Bugzilla-Keywords: openacc
X-Bugzilla-Severity: normal
X-Bugzilla-Who: tschwinge at gcc dot gnu.org
X-Bugzilla-Status: ASSIGNED
X-Bugzilla-Resolution:
X-Bugzilla-Priority: P3
X-Bugzilla-Assigned-To: tschwinge at gcc dot gnu.org
X-Bugzilla-Target-Milestone: ---
X-Bugzilla-Flags:
X-Bugzilla-Changed-Fields: bug_status cf_reconfirmed_on assigned_to everconfirmed
Message-ID: <bug-65993-4-hry9aranJf@http.gcc.gnu.org/bugzilla/>
In-Reply-To: <bug-65993-4@http.gcc.gnu.org/bugzilla/>
References: <bug-65993-4@http.gcc.gnu.org/bugzilla/>
Content-Type: text/plain; charset="UTF-8"
Content-Transfer-Encoding: 7bit
X-Bugzilla-URL: http://gcc.gnu.org/bugzilla/
Auto-Submitted: auto-generated
MIME-Version: 1.0
X-SW-Source: 2015-05/txt/msg00170.txt.bz2
Content-length: 682
https://gcc.gnu.org/bugzilla/show_bug.cgi?ide993
Thomas Schwinge <tschwinge at gcc dot gnu.org> changed:
What |Removed |Added
----------------------------------------------------------------------------
Status|UNCONFIRMED |ASSIGNED
Last reconfirmed| |2015-05-04
Assignee|unassigned at gcc dot gnu.org |tschwinge at gcc dot gnu.org
Ever confirmed|0 |1
--- Comment #2 from Thomas Schwinge <tschwinge at gcc dot gnu.org> ---
Patch posted:
<http://news.gmane.org/find-root.php?message_id=%3C87pp6gvj3v.fsf%40kepler.schwinge.homeip.net%3E>.
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2024-03-18 6:47 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-05-04 8:32 [Bug tree-optimization/66003] New: missed cse opportunity in addr expressions because of tree pre/lim amker at gcc dot gnu.org
2015-05-04 11:41 ` [Bug tree-optimization/66003] " rguenth at gcc dot gnu.org
2015-05-05 1:31 ` amker at gcc dot gnu.org
2015-05-05 1:48 ` pinskia at gcc dot gnu.org
2024-03-18 6:46 ` pinskia at gcc dot gnu.org
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).