public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed
* [Bug c++/17481] New: Dead temporaries saved across function calls
@ 2004-09-14 17:14 benoitsevigny at hotmail dot com
2004-09-14 18:15 ` [Bug c++/17481] " giovannibajo at libero dot it
0 siblings, 1 reply; 2+ messages in thread
From: benoitsevigny at hotmail dot com @ 2004-09-14 17:14 UTC (permalink / raw)
To: gcc-bugs
When optimizing, GCC needlessly saves dead temporary aggregates on the stack
across function calls.
The following code illustrates the case:
typedef float pod;
struct aggregate {
pod p;
inline aggregate()
{
}
inline aggregate(pod q) : p(q)
{
}
inline aggregate(const aggregate &a) : p(a.p)
{
}
inline aggregate &operator=(const aggregate &a)
{
p = a.p;
return *this;
}
};
inline aggregate operator+(const aggregate &a, const aggregate &b)
{
return aggregate(a.p + b.p);
}
inline aggregate operator*(const aggregate &a, const aggregate &b)
{
return aggregate(a.p * b.p);
}
#ifdef INLINE_FCALL
inline aggregate fcall(const aggregate &a)
{
return a;
}
#else
__attribute__((noinline)) aggregate fcall(const aggregate &a)
{
return a;
}
#endif
aggregate test(const aggregate &a, const aggregate &b, const aggregate &c)
{
aggregate d;
{
// expression generating temporaries
d = a*(b + c) + b*(c + a) + c*(a + b);
}
// dead temporaries are saved on the stack when fcall is not inlined
return fcall(d);
}
int main(int argc, const char *argv[])
{
aggregate a(1), b(2), c(3);
aggregate d = test(a, b, c);
return 0;
}
In the test() function, several temporaries are created in order to evaluate
the expression. When compiled with: gcc -O3 -DINLINE_FCALL -S, the fcall
function is integrated in its caller (resulting in no function call) and the
optimizer does a very good job at assigning temporaries to machine registers:
__Z4testRK9aggregateS1_S1_:
pushl %ebp
movl %esp, %ebp
subl $152, %esp # unused stack space
movl %ebx, -4(%ebp)
movl 20(%ebp), %ecx
movl 12(%ebp), %ebx
movl 16(%ebp), %edx
movl 8(%ebp), %eax
flds (%ebx)
flds (%ecx)
fadds (%edx)
fxch %st(1)
fadds (%ecx)
fxch %st(1)
fmuls (%ebx)
fxch %st(1)
fmuls (%edx)
faddp %st, %st(1)
flds (%edx)
fadds (%ebx)
fmuls (%ecx)
faddp %st, %st(1)
fstps (%eax)
movl -4(%ebp), %ebx
movl %ebp, %esp
popl %ebp
ret $4
However, when fcall is not inlined, (gcc -O3 -S), all temporaries are saved on
the stack even though they are no longer used (their scope is not even visible
to the call site), resulting in lots of dead stores:
__Z4testRK9aggregateS1_S1_:
pushl %ebp
movl %esp, %ebp
pushl %esi
leal -24(%ebp), %esi
pushl %ebx
subl $160, %esp
movl 20(%ebp), %ecx
movl 16(%ebp), %eax
movl 12(%ebp), %edx
movl 8(%ebp), %ebx
flds (%ecx)
fadds (%eax)
-> fsts -88(%ebp)
flds (%edx)
fmulp %st, %st(1)
-> fsts -72(%ebp)
flds (%edx)
fadds (%ecx)
-> fsts -120(%ebp)
fmuls (%eax)
fadd %st, %st(1)
-> fstps -104(%ebp)
-> fsts -56(%ebp)
flds (%eax)
fadds (%edx)
-> fsts -152(%ebp)
fmuls (%ecx)
movl %esi, 4(%esp)
movl %ebx, (%esp)
fadd %st, %st(1)
-> fstps -136(%ebp)
-> fsts -40(%ebp)
fstps -24(%ebp)
call __Z5fcallRK9aggregate
subl $4, %esp
movl %ebx, %eax
leal -8(%ebp), %esp
popl %ebx
popl %esi
popl %ebp
ret $4
I have seen the same problem with other basic types (including simd builtin
types) on several other targets (notably Apple's ppc port and Sony's mips5900
port). So when encapsulating a basic type into a structure, the optimizer
misses lots of optimization opportunities anytime a function call happens in a
function using temporaries, even when they are no longer used. Not only this
results in lots of dead stores, but it also consumes the equivalent amount of
stack space (each temporary is assigned a distinct stack slot).
Interesting note: even when there are no function calls, stack space is still
allocated for temporaries even though they never transit to memory (as we can
see in the first assembly output).
--
Summary: Dead temporaries saved across function calls
Product: gcc
Version: 3.3.1
Status: UNCONFIRMED
Severity: enhancement
Priority: P2
Component: c++
AssignedTo: unassigned at gcc dot gnu dot org
ReportedBy: benoitsevigny at hotmail dot com
CC: gcc-bugs at gcc dot gnu dot org
GCC build triplet: i686-pc-cygwin
GCC host triplet: i686-pc-cygwin
GCC target triplet: i686-pc-cygwin
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=17481
^ permalink raw reply [flat|nested] 2+ messages in thread
* [Bug c++/17481] Dead temporaries saved across function calls
2004-09-14 17:14 [Bug c++/17481] New: Dead temporaries saved across function calls benoitsevigny at hotmail dot com
@ 2004-09-14 18:15 ` giovannibajo at libero dot it
0 siblings, 0 replies; 2+ messages in thread
From: giovannibajo at libero dot it @ 2004-09-14 18:15 UTC (permalink / raw)
To: gcc-bugs
------- Additional Comments From giovannibajo at libero dot it 2004-09-14 18:15 -------
With 4.0.0, with -O3 -DINLINE_FCALL, we get:
_Z4testRK9aggregateS1_S1_:
pushl %ebp
movl %esp, %ebp
movl 8(%ebp), %eax
movl 12(%ebp), %edx
flds (%edx)
movl 16(%ebp), %edx
flds (%edx)
movl 20(%ebp), %edx
flds (%edx)
fld %st(2)
fadd %st(2), %st
fmul %st(1), %st
fld %st(3)
fadd %st(2), %st
fmul %st(3), %st
fxch %st(3)
faddp %st, %st(2)
fxch %st(3)
fmulp %st, %st(1)
faddp %st, %st(1)
faddp %st, %st(1)
fstps (%eax)
leave
ret $4
Notice that the code is more optimized, and the unused stack space is gone.
With simply -O3, we get:
_Z4testRK9aggregateS1_S1_:
pushl %ebp
movl %esp, %ebp
pushl %ebx
subl $16, %esp
movl 8(%ebp), %ebx
movl 12(%ebp), %eax
flds (%eax)
movl 16(%ebp), %eax
flds (%eax)
movl 20(%ebp), %eax
flds (%eax)
fld %st(2)
fadd %st(2), %st
fmul %st(1), %st
fld %st(3)
fadd %st(2), %st
fmul %st(3), %st
fxch %st(3)
faddp %st, %st(2)
fxch %st(3)
fmulp %st, %st(1)
faddp %st, %st(1)
faddp %st, %st(1)
fstps -8(%ebp)
leal -8(%ebp), %eax
pushl %eax
pushl %ebx
call _Z5fcallRK9aggregate
movl %ebx, %eax
movl -4(%ebp), %ebx
leave
ret $4
where all the useless code is gone.
This is another good example of how 4.0 does miracles with C++ code, thanks to
the brand new tree optimizers. I close this bug as fixed in 4.0, as obviously
it will not be fixed in any previous version.
Thanks for the report.
--
What |Removed |Added
----------------------------------------------------------------------------
Status|UNCONFIRMED |RESOLVED
Known to fail| |3.3.1
Known to work| |4.0.0
Resolution| |FIXED
Target Milestone|--- |4.0.0
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=17481
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2004-09-14 18:15 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2004-09-14 17:14 [Bug c++/17481] New: Dead temporaries saved across function calls benoitsevigny at hotmail dot com
2004-09-14 18:15 ` [Bug c++/17481] " giovannibajo at libero dot it
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).