public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed
* [Bug tree-optimization/23970] loop-invariant-motion is not doing it's work
2005-09-19 18:52 [Bug tree-optimization/23970] New: loop-invariant-motion is not doing it's work rguenth at gcc dot gnu dot org
@ 2005-09-19 18:52 ` rguenth at gcc dot gnu dot org
2005-09-20 13:34 ` rakdver at gcc dot gnu dot org
1 sibling, 0 replies; 5+ messages in thread
From: rguenth at gcc dot gnu dot org @ 2005-09-19 18:52 UTC (permalink / raw)
To: gcc-bugs
--
What |Removed |Added
----------------------------------------------------------------------------
CC| |rakdver at atrey dot karlin
| |dot mff dot cuni dot cz
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=23970
^ permalink raw reply [flat|nested] 5+ messages in thread
* [Bug tree-optimization/23970] New: loop-invariant-motion is not doing it's work
@ 2005-09-19 18:52 rguenth at gcc dot gnu dot org
2005-09-19 18:52 ` [Bug tree-optimization/23970] " rguenth at gcc dot gnu dot org
2005-09-20 13:34 ` rakdver at gcc dot gnu dot org
0 siblings, 2 replies; 5+ messages in thread
From: rguenth at gcc dot gnu dot org @ 2005-09-19 18:52 UTC (permalink / raw)
To: gcc-bugs
void Ekin(double *e, int *stridee,
double *vx, int *stridevx,
double *vy, int *stridevy,
double *vz, int *stridevz,
int *sz)
{
int i1 = sz[0];
int j1 = sz[1];
int k1 = sz[2];
int i, j, k;
for (k=0; k<k1; ++k)
for (j=0; j<j1; ++j)
for (i=0; i<i1; ++i)
{
e[i + j * stridee[1] + k * stridee[2]]
= 0.128 * (
((vx[i + j * stridevx[1] + k * stridevx[2]]
+ vx[i+1 + j * stridevx[1] + k * stridevx[2]])
* (vx[i + j * stridevx[1] + k * stridevx[2]]
+ vx[i+1 + j * stridevx[1] + k * stridevx[2]]))
+ ((vy[i + j * stridevy[1] + k * stridevy[2]]
+ vy[i + (j+1) * stridevy[1] + k * stridevy[2]])
* (vy[i + j * stridevy[1] + k * stridevy[2]]
+ vy[i + (j+1) * stridevy[1] + k * stridevy[2]]))
+ ((vz[i + j * stridevz[1] + k * stridevz[2]]
+ vz[i + j * stridevz[1] + (k+1) * stridevz[2]])
* (vz[i + j * stridevz[1] + k * stridevz[2]]
+ vz[i + j * stridevz[1] + (k+1) * stridevz[2]])));
}
}
lim moves all the j*stridev?[1] and k*stridev?[2] to the j-loop but
does not move the k*stridev?[2] to the k-loop. This results in the
following asm.
Ekin:
pushl %ebp
movl %esp, %ebp
pushl %edi
pushl %esi
pushl %ebx
subl $44, %esp
movl 40(%ebp), %eax
movl $0, -24(%ebp)
movl (%eax), %edx
movl 4(%eax), %ecx
movl 8(%eax), %eax
movl %edx, -40(%ebp)
movl %ecx, -36(%ebp)
testl %eax, %eax
movl %eax, -32(%ebp)
jle .L13
.L4:
movl -24(%ebp), %edx
movl -36(%ebp), %eax
incl %edx
testl %eax, %eax
movl %edx, -48(%ebp)
jle .L7
movl -40(%ebp), %ecx
movl $0, -16(%ebp)
testl %ecx, %ecx
jle .L17
.L10:
movl 28(%ebp), %eax
movl -24(%ebp), %ebx
movl -16(%ebp), %edi
movl $0, -56(%ebp)
fldl .LC0
movl 8(%eax), %edx
movl 4(%eax), %ecx
incl %edi
movl %edi, -28(%ebp)
movl -16(%ebp), %edi
imull %edx, %ebx
movl 36(%ebp), %edx
movl 4(%edx), %eax
movl 8(%edx), %esi
movl 12(%ebp), %edx
imull %eax, %edi
movl -16(%ebp), %eax
movl %edi, -44(%ebp)
movl 4(%edx), %edi
movl -24(%ebp), %edx
imull %edi, %eax
movl 12(%ebp), %edi
imull 8(%edi), %edx
addl %edx, %eax
movl -16(%ebp), %edx
sall $3, %eax
movl %eax, -20(%ebp)
movl 20(%ebp), %eax
movl 4(%eax), %edi
movl -24(%ebp), %eax
imull %edi, %edx
movl 20(%ebp), %edi
imull 8(%edi), %eax
addl %eax, %edx
movl -16(%ebp), %eax
imull %ecx, %eax
addl %ebx, %eax
leal 0(,%eax,8), %edi
movl -28(%ebp), %eax
imull %eax, %ecx
movl -24(%ebp), %eax
addl %ecx, %ebx
movl -44(%ebp), %ecx
imull %esi, %eax
sall $3, %ebx
addl %ecx, %eax
movl -48(%ebp), %ecx
sall $3, %eax
movl %eax, -52(%ebp)
imull %ecx, %esi
movl -44(%ebp), %ecx
addl %esi, %ecx
leal 0(,%ecx,8), %esi
.p2align 4,,15
.L5:
movl 16(%ebp), %eax
movl 24(%ebp), %ecx
incl -56(%ebp)
fldl (%eax,%edx,8)
faddl 8(%eax,%edx,8)
incl %edx
movl -52(%ebp), %eax
addl $8, -52(%ebp)
fldl (%edi,%ecx)
addl $8, %edi
faddl (%ecx,%ebx)
addl $8, %ebx
movl 32(%ebp), %ecx
fldl (%eax,%ecx)
faddl (%ecx,%esi)
fxch %st(2)
addl $8, %esi
movl -20(%ebp), %eax
movl 8(%ebp), %ecx
fmul %st(0), %st
fxch %st(1)
fmul %st(0), %st
faddp %st, %st(1)
fxch %st(1)
fmul %st(0), %st
faddp %st, %st(1)
fmul %st(1), %st
fstpl (%eax,%ecx)
addl $8, %eax
movl %eax, -20(%ebp)
movl -56(%ebp), %eax
cmpl %eax, -40(%ebp)
jne .L5
fstp %st(0)
movl -28(%ebp), %edx
cmpl %edx, -36(%ebp)
jle .L7
.L18:
movl -40(%ebp), %ecx
movl %edx, -16(%ebp)
testl %ecx, %ecx
jg .L10
.L17:
movl -16(%ebp), %ecx
incl %ecx
movl %ecx, -28(%ebp)
movl -28(%ebp), %edx
cmpl %edx, -36(%ebp)
jg .L18
.L7:
movl -48(%ebp), %eax
cmpl %eax, -32(%ebp)
movl %eax, -24(%ebp)
jne .L4
.L13:
addl $44, %esp
popl %ebx
popl %esi
popl %edi
popl %ebp
ret
--
Summary: loop-invariant-motion is not doing it's work
Product: gcc
Version: 4.1.0
Status: UNCONFIRMED
Keywords: missed-optimization
Severity: normal
Priority: P2
Component: tree-optimization
AssignedTo: unassigned at gcc dot gnu dot org
ReportedBy: rguenth at gcc dot gnu dot org
CC: gcc-bugs at gcc dot gnu dot org
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=23970
^ permalink raw reply [flat|nested] 5+ messages in thread
* [Bug tree-optimization/23970] loop-invariant-motion is not doing it's work
2005-09-19 18:52 [Bug tree-optimization/23970] New: loop-invariant-motion is not doing it's work rguenth at gcc dot gnu dot org
2005-09-19 18:52 ` [Bug tree-optimization/23970] " rguenth at gcc dot gnu dot org
@ 2005-09-20 13:34 ` rakdver at gcc dot gnu dot org
1 sibling, 0 replies; 5+ messages in thread
From: rakdver at gcc dot gnu dot org @ 2005-09-20 13:34 UTC (permalink / raw)
To: gcc-bugs
------- Additional Comments From rakdver at gcc dot gnu dot org 2005-09-20 13:33 -------
k*stridev?[2] cannot be moved more at the moment. The loops looks like
for (k = ...)
for (j = ...)
{
if (i >= i1)
continue;
do
{
tmp = k * stridevx[2];
...
} while (i < i1)
}
Note that if i >= i1, the load of stridevx[2] is never executed. And it is
possible that it will trap. Thus, we can only move it to point where we are
sure that we do not make it executed unconditionally. I.e. it is legal to move
it out of the innermost loop, but not out of the outer ones.
There are two optimization improvements that could help (and as this case is
fairly common, I will leave this PR open as an enhancement request).
The first optimization is predicated invariant motion. I.e. we cannot move
stridevx[2] out of the second loop, but we can move the expression (i < i1 ?
stridevx[2] : undefined) out of it, and it has the same value in all interesting
cases.
The second optimization is invariant exit motion. I.e. we can replace
do
{
code1;
if (invariant)
break;
code2;
} while (something);
with
if (invariant)
{
code1;
}
else
{
do
{
code1;
code2;
} while (something);
}
This could be achieved through loop unswitching (for non-innermost loops), or as
a separate optimization. Note however that for this optimization to be useful
for invariant motion, it would either have to be iterated with it, or, at the
expense of significant complications, to be implemented within invariant motion.
--
What |Removed |Added
----------------------------------------------------------------------------
Severity|normal |enhancement
Status|UNCONFIRMED |NEW
Ever Confirmed| |1
Last reconfirmed|0000-00-00 00:00:00 |2005-09-20 13:33:54
date| |
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=23970
^ permalink raw reply [flat|nested] 5+ messages in thread
* [Bug tree-optimization/23970] loop-invariant-motion is not doing it's work
[not found] <bug-23970-4@http.gcc.gnu.org/bugzilla/>
2021-12-26 22:07 ` pinskia at gcc dot gnu.org
@ 2023-09-01 9:17 ` rguenth at gcc dot gnu.org
1 sibling, 0 replies; 5+ messages in thread
From: rguenth at gcc dot gnu.org @ 2023-09-01 9:17 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=23970
--- Comment #3 from Richard Biener <rguenth at gcc dot gnu.org> ---
Unswitching does this as a separate transform now, the "hoist guards"
transform.
It's even done completely separate now:
unsigned int
tree_ssa_unswitch_loops (function *fun)
{
bool changed_unswitch = false;
bool changed_hoist = false;
auto_edge_flag ignored_edge_flag (fun);
ranger = enable_ranger (fun);
/* Go through all loops starting from innermost, hoisting guards. */
for (auto loop : loops_list (fun, LI_FROM_INNERMOST))
{
if (loop->inner)
changed_hoist |= tree_unswitch_outer_loop (loop);
}
/* Go through innermost loops, unswitching on invariant predicates
within those. */
...
the question is how we'd call the "cheap" unswitching transform. I guess
simply testing optimize > 2 || opt_enabled or so isn't quite desirable.
Note it's possible to separate this into an entirely separate pass as well.
^ permalink raw reply [flat|nested] 5+ messages in thread
* [Bug tree-optimization/23970] loop-invariant-motion is not doing it's work
[not found] <bug-23970-4@http.gcc.gnu.org/bugzilla/>
@ 2021-12-26 22:07 ` pinskia at gcc dot gnu.org
2023-09-01 9:17 ` rguenth at gcc dot gnu.org
1 sibling, 0 replies; 5+ messages in thread
From: pinskia at gcc dot gnu.org @ 2021-12-26 22:07 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=23970
--- Comment #2 from Andrew Pinski <pinskia at gcc dot gnu.org> ---
We are able to do the LIM at -O3 which enables loop unswitching.
I wonder if there is a way to enable a limited form of loop unswitching for -O2
where one branch of the loop to unswitch is empty.
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2023-09-01 9:17 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2005-09-19 18:52 [Bug tree-optimization/23970] New: loop-invariant-motion is not doing it's work rguenth at gcc dot gnu dot org
2005-09-19 18:52 ` [Bug tree-optimization/23970] " rguenth at gcc dot gnu dot org
2005-09-20 13:34 ` rakdver at gcc dot gnu dot org
[not found] <bug-23970-4@http.gcc.gnu.org/bugzilla/>
2021-12-26 22:07 ` pinskia at gcc dot gnu.org
2023-09-01 9:17 ` rguenth at gcc dot gnu.org
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).