public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed
* [Bug tree-optimization/49203] New: missed-optimization: useless expressions not moved out of loop
@ 2011-05-27 22:19 wouter.vermaelen at scarlet dot be
2011-05-27 23:53 ` [Bug tree-optimization/49203] " pinskia at gcc dot gnu.org
` (2 more replies)
0 siblings, 3 replies; 4+ messages in thread
From: wouter.vermaelen at scarlet dot be @ 2011-05-27 22:19 UTC (permalink / raw)
To: gcc-bugs
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=49203
Summary: missed-optimization: useless expressions not moved out
of loop
Product: gcc
Version: 4.7.0
Status: UNCONFIRMED
Severity: enhancement
Priority: P3
Component: tree-optimization
AssignedTo: unassigned@gcc.gnu.org
ReportedBy: wouter.vermaelen@scarlet.be
Hi all,
Below is (a simplified version of) some real code I recently
encountered. The stores to the 'output' array are written in the inner
loop, but the intention was probably to have them in the outer loop.
Gcc is able to 'correct' this programming mistake, but only partly:
the stores itself are moved to the outer loop, but the instructions that
calculate those values remain in the inner loop.
For this particular example, the best solution is of course to fix the
C code. But maybe this missed-optimization can also occur in other,
more valid, contexts.
Below I've included the generated x86_64 code for this example by
recent versions of both gcc and llvm.
///////////////////////////////////////////////////////////////////
unsigned char input[100];
unsigned char output[100];
void f() {
for (int i = 0; i < 32; i += 4) {
unsigned tmp = 0;
for (int j = 0; j < 16; ++j) {
tmp = (tmp << 2) | (input[i + j] & 0x03);
output[i + 0] = (tmp >> 24) & 0xFF;
output[i + 1] = (tmp >> 16) & 0xFF;
output[i + 2] = (tmp >> 8) & 0xFF;
output[i + 3] = (tmp >> 0) & 0xFF;
}
}
}
///////////////////////////////////////////////////////////////////
g++ (GCC) 4.7.0 20110527 (experimental)
g++ -O2 -S
movl $output, %r10d
movq %r10, %r9
.p2align 4,,10
.p2align 3
.L2:
movl %r9d, %esi
xorl %edx, %edx
xorl %eax, %eax
subl %r10d, %esi
.p2align 4,,10
.p2align 3
.L3:
leal 0(,%rax,4), %ecx
leal (%rdx,%rsi), %eax
addl $1, %edx
cltq
movzbl input(%rax), %eax
andl $3, %eax
orl %ecx, %eax
movl %eax, %r8d
movl %eax, %edi
movl %eax, %ecx
shrl $24, %r8d
shrl $16, %edi
shrl $8, %ecx
cmpl $16, %edx
jne .L3
movb %r8b, (%r9)
movb %dil, 1(%r9)
movb %cl, 2(%r9)
movb %al, 3(%r9)
addq $4, %r9
cmpq $output+32, %r9
jne .L2
rep
ret
///////////////////////////////////////////////////////////////////
clang version 3.0 (http://llvm.org/git/clang.git
855f41963e545172a935d07b4713d079e258a207)
clang++ -O2 -S
# BB#0: # %entry
xorl %eax, %eax
.align 16, 0x90
.LBB0_1: # %for.cond4.preheader
# =>This Loop Header: Depth=1
# Child Loop BB0_2 Depth 2
xorl %esi, %esi
movq $-16, %rdx
.align 16, 0x90
.LBB0_2: # %for.body7
# Parent Loop BB0_1 Depth=1
# => This Inner Loop Header: Depth=2
movl %esi, %ecx
movzbl input+16(%rdx,%rax), %edi
andl $3, %edi
leal (,%rcx,4), %esi
orl %edi, %esi
incq %rdx
jne .LBB0_2
# BB#3: # %for.inc44
# in Loop: Header=BB0_1 Depth=1
movb %sil, output+3(%rax)
movl %ecx, %edx
shrl $6, %edx
movb %dl, output+2(%rax)
movl %ecx, %edx
shrl $14, %edx
movb %dl, output+1(%rax)
shrl $22, %ecx
movb %cl, output(%rax)
addq $4, %rax
cmpq $32, %rax
jne .LBB0_1
# BB#4: # %for.end47
ret
^ permalink raw reply [flat|nested] 4+ messages in thread
* [Bug tree-optimization/49203] missed-optimization: useless expressions not moved out of loop
2011-05-27 22:19 [Bug tree-optimization/49203] New: missed-optimization: useless expressions not moved out of loop wouter.vermaelen at scarlet dot be
@ 2011-05-27 23:53 ` pinskia at gcc dot gnu.org
2011-05-28 12:59 ` rguenth at gcc dot gnu.org
2021-06-08 21:29 ` pinskia at gcc dot gnu.org
2 siblings, 0 replies; 4+ messages in thread
From: pinskia at gcc dot gnu.org @ 2011-05-27 23:53 UTC (permalink / raw)
To: gcc-bugs
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=49203
Andrew Pinski <pinskia at gcc dot gnu.org> changed:
What |Removed |Added
----------------------------------------------------------------------------
Keywords| |missed-optimization
Status|UNCONFIRMED |NEW
Last reconfirmed| |2011.05.27 22:31:13
Ever Confirmed|0 |1
--- Comment #1 from Andrew Pinski <pinskia at gcc dot gnu.org> 2011-05-27 22:31:13 UTC ---
Looks like LIM is only able to hoist invariants rather than push down
invariants. It can push down invariant stores though.
^ permalink raw reply [flat|nested] 4+ messages in thread
* [Bug tree-optimization/49203] missed-optimization: useless expressions not moved out of loop
2011-05-27 22:19 [Bug tree-optimization/49203] New: missed-optimization: useless expressions not moved out of loop wouter.vermaelen at scarlet dot be
2011-05-27 23:53 ` [Bug tree-optimization/49203] " pinskia at gcc dot gnu.org
@ 2011-05-28 12:59 ` rguenth at gcc dot gnu.org
2021-06-08 21:29 ` pinskia at gcc dot gnu.org
2 siblings, 0 replies; 4+ messages in thread
From: rguenth at gcc dot gnu.org @ 2011-05-28 12:59 UTC (permalink / raw)
To: gcc-bugs
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=49203
--- Comment #2 from Richard Guenther <rguenth at gcc dot gnu.org> 2011-05-28 12:57:19 UTC ---
(In reply to comment #1)
> Looks like LIM is only able to hoist invariants rather than push down
> invariants. It can push down invariant stores though.
Indeed. code-sinking (in tree-ssa-sink.c) would be able to fix this
but it is run before loop store motion.
^ permalink raw reply [flat|nested] 4+ messages in thread
* [Bug tree-optimization/49203] missed-optimization: useless expressions not moved out of loop
2011-05-27 22:19 [Bug tree-optimization/49203] New: missed-optimization: useless expressions not moved out of loop wouter.vermaelen at scarlet dot be
2011-05-27 23:53 ` [Bug tree-optimization/49203] " pinskia at gcc dot gnu.org
2011-05-28 12:59 ` rguenth at gcc dot gnu.org
@ 2021-06-08 21:29 ` pinskia at gcc dot gnu.org
2 siblings, 0 replies; 4+ messages in thread
From: pinskia at gcc dot gnu.org @ 2021-06-08 21:29 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=49203
Andrew Pinski <pinskia at gcc dot gnu.org> changed:
What |Removed |Added
----------------------------------------------------------------------------
Resolution|--- |FIXED
Status|NEW |RESOLVED
Known to work| |7.0
Target Milestone|--- |7.0
Known to fail| |4.8.5
--- Comment #3 from Andrew Pinski <pinskia at gcc dot gnu.org> ---
Fixed in at least in GCC 7.0:
.L2:
leaq 16(%r8), %rsi
movq %r8, %rdx
xorl %eax, %eax
.p2align 4,,10
.p2align 3
.L3:
movzbl (%rdx), %ecx
sall $2, %eax
addq $1, %rdx
andl $3, %ecx
orl %ecx, %eax
cmpq %rsi, %rdx
jne .L3
movl %eax, %edx
addq $4, %r8
movb %ah, 2(%rdi)
shrl $24, %edx
movb %al, 3(%rdi)
addq $4, %rdi
movb %dl, -4(%rdi)
movl %eax, %edx
shrl $16, %edx
movb %dl, -3(%rdi)
cmpq %r8, %r9
jne .L2
<bb 3> [94.12%]:
# tmp_37 = PHI <tmp_23(3), 0(5)>
# ivtmp.17_17 = PHI <ivtmp.17_15(3), ivtmp.28_79(5)>
_1 = tmp_37 << 2;
_87 = (void *) ivtmp.17_17;
_3 = MEM[base: _87, offset: 0B];
_20 = _3 & 3;
_4 = (unsigned int) _20;
tmp_23 = _1 | _4;
ivtmp.17_15 = ivtmp.17_17 + 1;
if (ivtmp.17_15 != _83)
goto <bb 3>; [93.75%]
else
goto <bb 4>; [6.25%]
<bb 4> [5.88%]:
_5 = tmp_23 >> 24;
_6 = (unsigned char) _5;
_76 = (void *) ivtmp.27_82;
MEM[base: _76, offset: 0B] = _6;
_7 = tmp_23 >> 16;
_9 = (unsigned char) _7;
MEM[base: _76, offset: 1B] = _9;
_10 = tmp_23 >> 8;
_12 = (unsigned char) _10;
MEM[base: _76, offset: 2B] = _12;
_14 = (unsigned char) tmp_23;
MEM[base: _76, offset: 3B] = _14;
ivtmp.27_81 = ivtmp.27_82 + 4;
ivtmp.28_78 = ivtmp.28_79 + 4;
if (_71 != ivtmp.28_78)
goto <bb 5>; [87.51%]
else
goto <bb 6>; [12.49%]
<bb 5> [5.88%]:
# ivtmp.27_82 = PHI <ivtmp.27_81(4), ivtmp.27_80(2)>
# ivtmp.28_79 = PHI <ivtmp.28_78(4), ivtmp.28_77(2)>
_83 = ivtmp.28_79 + 16;
goto <bb 3>; [100.00%]
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2021-06-08 21:29 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-05-27 22:19 [Bug tree-optimization/49203] New: missed-optimization: useless expressions not moved out of loop wouter.vermaelen at scarlet dot be
2011-05-27 23:53 ` [Bug tree-optimization/49203] " pinskia at gcc dot gnu.org
2011-05-28 12:59 ` rguenth at gcc dot gnu.org
2021-06-08 21:29 ` pinskia at gcc dot gnu.org
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).