* ix86 backend: opportunities for optimizations ?
@ 1999-09-09 11:37 ¨Marcel Cox
1999-09-30 18:02 ` ¨Marcel Cox
0 siblings, 1 reply; 2+ messages in thread
From: èMarcel Cox @ 1999-09-09 11:37 UTC (permalink / raw)
To: gcc
I have written the following test program and I have noticed that the
resulting code produced by gcc is rather inefficient in a number of
points:
Program:
int func1(int);
int func2(double);
int func3(int a)
{
return func1(a)+func2(a);
}
Code produced by GCC 2.95 with the options -O3 -S -mpentiumpro:
.file "test2.c"
gcc2_compiled.:
___gnu_compiled_c:
text
.align 4
globl _func3
.def _func3; .scl 2; .type 32; .endef
_func3:
pushl %ebp
movl %esp,%ebp
subl $32,%esp
pushl %esi
pushl %ebx
movl 8(%ebp),%ebx
addl $-12,%esp
pushl %ebx
call _func1
movl %ebx,-4(%ebp)
fildl -4(%ebp)
movl %eax,%esi
addl $-8,%esp
subl $8,%esp
fstpl (%esp)
call _func2
addl %esi,%eax
leal -40(%ebp),%esp
popl %ebx
popl %esi
leave
ret
.def _func2; .scl 2; .type 32; .endef
.def _func1; .scl 2; .type 32; .endef
Code produced by the 19990907 snapshot with the same options:
.file "test2.c"
.version "01.01"
gcc2_compiled.:
text
.align 16
globl func3
.type func3,@function
func3:
pushl %ebp
movl %esp, %ebp
subl $16, %esp
pushl %esi
pushl %ebx
subl $12, %esp
movl 8(%ebp), %ebx
pushl %ebx
call func1
subl $8, %esp
movl %eax, %esi
pushl %ebx
fildl (%esp)
popl %ebx
leal -8(%esp), %esp
fstpl (%esp)
call func2
leal -24(%ebp), %esp
leal (%eax,%esi), %eax
popl %ebx
popl %esi
movl %ebp, %esp
popl %ebp
ret
Lfe1:
.size func3,.Lfe1-func3
.ident "GCC: (GNU) 2.96 19990907 (experimental)"
The inefficiencies I have noticed are :
1) When converting an integer to a floating point number, GCC prefers to
push an integer register to the tack and then load the value into a
floating point unit, rather than directly loading the value from memory
where the parameter a is stored. That would avoid the costy memory
accesses generated by pushl %ebx and popl %ebx
2) GCC does not combine instructions incrementing or decrementing the
stach pointer. For the GCC 2.95 code for example, the instructions "addl
$-8,%esp" and "subl $8,%esp" could be combined to a single instruction
decrementing the stack pointer by 16.
Same (though less obvious) for the GCC 2.96 code where "subl $8, %esp" and
"leal -8(%esp), %esp" could be combined.
BTW which of the instructions "subl $8, %esp" or "leal -8(%esp), %esp" is
more efficient ? Could the compiler use the same one in all cases ?
Marcel Cox
^ permalink raw reply [flat|nested] 2+ messages in thread
* ix86 backend: opportunities for optimizations ?
1999-09-09 11:37 ix86 backend: opportunities for optimizations ? ¨Marcel Cox
@ 1999-09-30 18:02 ` ¨Marcel Cox
0 siblings, 0 replies; 2+ messages in thread
From: èMarcel Cox @ 1999-09-30 18:02 UTC (permalink / raw)
To: gcc
I have written the following test program and I have noticed that the
resulting code produced by gcc is rather inefficient in a number of
points:
Program:
int func1(int);
int func2(double);
int func3(int a)
{
return func1(a)+func2(a);
}
Code produced by GCC 2.95 with the options -O3 -S -mpentiumpro:
.file "test2.c"
gcc2_compiled.:
___gnu_compiled_c:
text
.align 4
globl _func3
.def _func3; .scl 2; .type 32; .endef
_func3:
pushl %ebp
movl %esp,%ebp
subl $32,%esp
pushl %esi
pushl %ebx
movl 8(%ebp),%ebx
addl $-12,%esp
pushl %ebx
call _func1
movl %ebx,-4(%ebp)
fildl -4(%ebp)
movl %eax,%esi
addl $-8,%esp
subl $8,%esp
fstpl (%esp)
call _func2
addl %esi,%eax
leal -40(%ebp),%esp
popl %ebx
popl %esi
leave
ret
.def _func2; .scl 2; .type 32; .endef
.def _func1; .scl 2; .type 32; .endef
Code produced by the 19990907 snapshot with the same options:
.file "test2.c"
.version "01.01"
gcc2_compiled.:
text
.align 16
globl func3
.type func3,@function
func3:
pushl %ebp
movl %esp, %ebp
subl $16, %esp
pushl %esi
pushl %ebx
subl $12, %esp
movl 8(%ebp), %ebx
pushl %ebx
call func1
subl $8, %esp
movl %eax, %esi
pushl %ebx
fildl (%esp)
popl %ebx
leal -8(%esp), %esp
fstpl (%esp)
call func2
leal -24(%ebp), %esp
leal (%eax,%esi), %eax
popl %ebx
popl %esi
movl %ebp, %esp
popl %ebp
ret
Lfe1:
.size func3,.Lfe1-func3
.ident "GCC: (GNU) 2.96 19990907 (experimental)"
The inefficiencies I have noticed are :
1) When converting an integer to a floating point number, GCC prefers to
push an integer register to the tack and then load the value into a
floating point unit, rather than directly loading the value from memory
where the parameter a is stored. That would avoid the costy memory
accesses generated by pushl %ebx and popl %ebx
2) GCC does not combine instructions incrementing or decrementing the
stach pointer. For the GCC 2.95 code for example, the instructions "addl
$-8,%esp" and "subl $8,%esp" could be combined to a single instruction
decrementing the stack pointer by 16.
Same (though less obvious) for the GCC 2.96 code where "subl $8, %esp" and
"leal -8(%esp), %esp" could be combined.
BTW which of the instructions "subl $8, %esp" or "leal -8(%esp), %esp" is
more efficient ? Could the compiler use the same one in all cases ?
Marcel Cox
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~1999-09-30 18:02 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
1999-09-09 11:37 ix86 backend: opportunities for optimizations ? ¨Marcel Cox
1999-09-30 18:02 ` ¨Marcel Cox
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).