public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed
* [Bug c++/17481] New: Dead temporaries saved across function calls
@ 2004-09-14 17:14 benoitsevigny at hotmail dot com
  2004-09-14 18:15 ` [Bug c++/17481] " giovannibajo at libero dot it
  0 siblings, 1 reply; 2+ messages in thread
From: benoitsevigny at hotmail dot com @ 2004-09-14 17:14 UTC (permalink / raw)
  To: gcc-bugs

When optimizing, GCC needlessly saves dead temporary aggregates on the stack 
across function calls.

The following code illustrates the case:

typedef float		pod;

struct aggregate {

	pod		p;

	inline aggregate()
	{
	}
	inline aggregate(pod q) : p(q)
	{
	}
	inline aggregate(const aggregate &a) : p(a.p)
	{
	}
	inline aggregate &operator=(const aggregate &a)
	{
		p = a.p;
		return *this;
	}
};
inline aggregate operator+(const aggregate &a, const aggregate &b)
{
	return aggregate(a.p + b.p);
}
inline aggregate operator*(const aggregate &a, const aggregate &b)
{
	return aggregate(a.p * b.p);
}

#ifdef INLINE_FCALL
	inline aggregate fcall(const aggregate &a)
	{
		return a;
	}
#else
	__attribute__((noinline)) aggregate fcall(const aggregate &a)
	{
		return a;
	}
#endif

aggregate test(const aggregate &a, const aggregate &b, const aggregate &c)
{
	aggregate d;
	{
                // expression generating temporaries
		d = a*(b + c) + b*(c + a) + c*(a + b);
	}
        // dead temporaries are saved on the stack when fcall is not inlined
	return fcall(d);
}

int main(int argc, const char *argv[])
{
	aggregate a(1), b(2), c(3);

	aggregate d = test(a, b, c);

	return 0;
}

In the test() function, several temporaries are created in order to evaluate 
the expression.  When compiled with: gcc -O3 -DINLINE_FCALL -S, the fcall 
function is integrated in its caller (resulting in no function call) and the 
optimizer does a very good job at assigning temporaries to machine registers:

__Z4testRK9aggregateS1_S1_:
	pushl	%ebp
	movl	%esp, %ebp
	subl	$152, %esp                # unused stack space
	movl	%ebx, -4(%ebp)
	movl	20(%ebp), %ecx
	movl	12(%ebp), %ebx
	movl	16(%ebp), %edx
	movl	8(%ebp), %eax
	flds	(%ebx)
	flds	(%ecx)
	fadds	(%edx)
	fxch	%st(1)
	fadds	(%ecx)
	fxch	%st(1)
	fmuls	(%ebx)
	fxch	%st(1)
	fmuls	(%edx)
	faddp	%st, %st(1)
	flds	(%edx)
	fadds	(%ebx)
	fmuls	(%ecx)
	faddp	%st, %st(1)
	fstps	(%eax)
	movl	-4(%ebp), %ebx
	movl	%ebp, %esp
	popl	%ebp
	ret	$4


However, when fcall is not inlined, (gcc -O3 -S), all temporaries are saved on 
the stack even though they are no longer used (their scope is not even visible 
to the call site), resulting in lots of dead stores:

__Z4testRK9aggregateS1_S1_:
	pushl	%ebp
	movl	%esp, %ebp
	pushl	%esi
	leal	-24(%ebp), %esi
	pushl	%ebx
	subl	$160, %esp
	movl	20(%ebp), %ecx
	movl	16(%ebp), %eax
	movl	12(%ebp), %edx
	movl	8(%ebp), %ebx
	flds	(%ecx)
	fadds	(%eax)
->	fsts	-88(%ebp)
	flds	(%edx)
	fmulp	%st, %st(1)
->	fsts	-72(%ebp)
	flds	(%edx)
	fadds	(%ecx)
->	fsts	-120(%ebp)
	fmuls	(%eax)
	fadd	%st, %st(1)
->	fstps	-104(%ebp)
->	fsts	-56(%ebp)
	flds	(%eax)
	fadds	(%edx)
->	fsts	-152(%ebp)
	fmuls	(%ecx)
	movl	%esi, 4(%esp)
	movl	%ebx, (%esp)
	fadd	%st, %st(1)
->	fstps	-136(%ebp)
->	fsts	-40(%ebp)
	fstps	-24(%ebp)
	call	__Z5fcallRK9aggregate
	subl	$4, %esp
	movl	%ebx, %eax
	leal	-8(%ebp), %esp
	popl	%ebx
	popl	%esi
	popl	%ebp
	ret	$4

I have seen the same problem with other basic types (including simd builtin 
types) on several other targets (notably Apple's ppc port and Sony's mips5900 
port).  So when encapsulating a basic type into a structure, the optimizer 
misses lots of optimization opportunities anytime a function call happens in a 
function using temporaries, even when they are no longer used.  Not only this 
results in lots of dead stores, but it also consumes the equivalent amount of 
stack space (each temporary is assigned a distinct stack slot).

Interesting note: even when there are no function calls, stack space is still 
allocated for temporaries even though they never transit to memory (as we can 
see in the first assembly output).

-- 
           Summary: Dead temporaries saved across function calls
           Product: gcc
           Version: 3.3.1
            Status: UNCONFIRMED
          Severity: enhancement
          Priority: P2
         Component: c++
        AssignedTo: unassigned at gcc dot gnu dot org
        ReportedBy: benoitsevigny at hotmail dot com
                CC: gcc-bugs at gcc dot gnu dot org
 GCC build triplet: i686-pc-cygwin
  GCC host triplet: i686-pc-cygwin
GCC target triplet: i686-pc-cygwin


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=17481


^ permalink raw reply	[flat|nested] 2+ messages in thread

* [Bug c++/17481] Dead temporaries saved across function calls
  2004-09-14 17:14 [Bug c++/17481] New: Dead temporaries saved across function calls benoitsevigny at hotmail dot com
@ 2004-09-14 18:15 ` giovannibajo at libero dot it
  0 siblings, 0 replies; 2+ messages in thread
From: giovannibajo at libero dot it @ 2004-09-14 18:15 UTC (permalink / raw)
  To: gcc-bugs


------- Additional Comments From giovannibajo at libero dot it  2004-09-14 18:15 -------
With 4.0.0, with -O3 -DINLINE_FCALL, we get:

_Z4testRK9aggregateS1_S1_:
        pushl   %ebp
        movl    %esp, %ebp
        movl    8(%ebp), %eax
        movl    12(%ebp), %edx
        flds    (%edx)
        movl    16(%ebp), %edx
        flds    (%edx)
        movl    20(%ebp), %edx
        flds    (%edx)
        fld     %st(2)
        fadd    %st(2), %st
        fmul    %st(1), %st
        fld     %st(3)
        fadd    %st(2), %st
        fmul    %st(3), %st
        fxch    %st(3)
        faddp   %st, %st(2)
        fxch    %st(3)
        fmulp   %st, %st(1)
        faddp   %st, %st(1)
        faddp   %st, %st(1)
        fstps   (%eax)
        leave
        ret     $4


Notice that the code is more optimized, and the unused stack space is gone.
With simply -O3, we get:


_Z4testRK9aggregateS1_S1_:
        pushl   %ebp
        movl    %esp, %ebp
        pushl   %ebx
        subl    $16, %esp
        movl    8(%ebp), %ebx
        movl    12(%ebp), %eax
        flds    (%eax)
        movl    16(%ebp), %eax
        flds    (%eax)
        movl    20(%ebp), %eax
        flds    (%eax)
        fld     %st(2)
        fadd    %st(2), %st
        fmul    %st(1), %st
        fld     %st(3)
        fadd    %st(2), %st
        fmul    %st(3), %st
        fxch    %st(3)
        faddp   %st, %st(2)
        fxch    %st(3)
        fmulp   %st, %st(1)
        faddp   %st, %st(1)
        faddp   %st, %st(1)
        fstps   -8(%ebp)
        leal    -8(%ebp), %eax
        pushl   %eax
        pushl   %ebx
        call    _Z5fcallRK9aggregate
        movl    %ebx, %eax
        movl    -4(%ebp), %ebx
        leave
        ret     $4


where all the useless code is gone.

This is another good example of how 4.0 does miracles with C++ code, thanks to 
the brand new tree optimizers. I close this bug as fixed in 4.0, as obviously 
it will not be fixed in any previous version.

Thanks for the report.


-- 
           What    |Removed                     |Added
----------------------------------------------------------------------------
             Status|UNCONFIRMED                 |RESOLVED
      Known to fail|                            |3.3.1
      Known to work|                            |4.0.0
         Resolution|                            |FIXED
   Target Milestone|---                         |4.0.0


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=17481


^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2004-09-14 18:15 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2004-09-14 17:14 [Bug c++/17481] New: Dead temporaries saved across function calls benoitsevigny at hotmail dot com
2004-09-14 18:15 ` [Bug c++/17481] " giovannibajo at libero dot it

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).