From: èMarcel Cox <marcel.cox@ibm.net>
To: gcc <gcc@gcc.gnu.org>
Subject: ix86 backend: opportunities for optimizations ?
Date: Thu, 09 Sep 1999 11:37:00 -0000 [thread overview]
Message-ID: <VA.000000bc.009ceec2@cimetmc> (raw)
I have written the following test program and I have noticed that the
resulting code produced by gcc is rather inefficient in a number of
points:
Program:
int func1(int);
int func2(double);
int func3(int a)
{
return func1(a)+func2(a);
}
Code produced by GCC 2.95 with the options -O3 -S -mpentiumpro:
.file "test2.c"
gcc2_compiled.:
___gnu_compiled_c:
text
.align 4
globl _func3
.def _func3; .scl 2; .type 32; .endef
_func3:
pushl %ebp
movl %esp,%ebp
subl $32,%esp
pushl %esi
pushl %ebx
movl 8(%ebp),%ebx
addl $-12,%esp
pushl %ebx
call _func1
movl %ebx,-4(%ebp)
fildl -4(%ebp)
movl %eax,%esi
addl $-8,%esp
subl $8,%esp
fstpl (%esp)
call _func2
addl %esi,%eax
leal -40(%ebp),%esp
popl %ebx
popl %esi
leave
ret
.def _func2; .scl 2; .type 32; .endef
.def _func1; .scl 2; .type 32; .endef
Code produced by the 19990907 snapshot with the same options:
.file "test2.c"
.version "01.01"
gcc2_compiled.:
text
.align 16
globl func3
.type func3,@function
func3:
pushl %ebp
movl %esp, %ebp
subl $16, %esp
pushl %esi
pushl %ebx
subl $12, %esp
movl 8(%ebp), %ebx
pushl %ebx
call func1
subl $8, %esp
movl %eax, %esi
pushl %ebx
fildl (%esp)
popl %ebx
leal -8(%esp), %esp
fstpl (%esp)
call func2
leal -24(%ebp), %esp
leal (%eax,%esi), %eax
popl %ebx
popl %esi
movl %ebp, %esp
popl %ebp
ret
Lfe1:
.size func3,.Lfe1-func3
.ident "GCC: (GNU) 2.96 19990907 (experimental)"
The inefficiencies I have noticed are :
1) When converting an integer to a floating point number, GCC prefers to
push an integer register to the tack and then load the value into a
floating point unit, rather than directly loading the value from memory
where the parameter a is stored. That would avoid the costy memory
accesses generated by pushl %ebx and popl %ebx
2) GCC does not combine instructions incrementing or decrementing the
stach pointer. For the GCC 2.95 code for example, the instructions "addl
$-8,%esp" and "subl $8,%esp" could be combined to a single instruction
decrementing the stack pointer by 16.
Same (though less obvious) for the GCC 2.96 code where "subl $8, %esp" and
"leal -8(%esp), %esp" could be combined.
BTW which of the instructions "subl $8, %esp" or "leal -8(%esp), %esp" is
more efficient ? Could the compiler use the same one in all cases ?
Marcel Cox
WARNING: multiple messages have this Message-ID
From: èMarcel Cox <marcel.cox@ibm.net>
To: gcc <gcc@gcc.gnu.org>
Subject: ix86 backend: opportunities for optimizations ?
Date: Thu, 30 Sep 1999 18:02:00 -0000 [thread overview]
Message-ID: <VA.000000bc.009ceec2@cimetmc> (raw)
Message-ID: <19990930180200.Pootb4gHhfmmyzAjvyHeJkfKfB9uqbBsCLJJvlg8lrw@z> (raw)
I have written the following test program and I have noticed that the
resulting code produced by gcc is rather inefficient in a number of
points:
Program:
int func1(int);
int func2(double);
int func3(int a)
{
return func1(a)+func2(a);
}
Code produced by GCC 2.95 with the options -O3 -S -mpentiumpro:
.file "test2.c"
gcc2_compiled.:
___gnu_compiled_c:
text
.align 4
globl _func3
.def _func3; .scl 2; .type 32; .endef
_func3:
pushl %ebp
movl %esp,%ebp
subl $32,%esp
pushl %esi
pushl %ebx
movl 8(%ebp),%ebx
addl $-12,%esp
pushl %ebx
call _func1
movl %ebx,-4(%ebp)
fildl -4(%ebp)
movl %eax,%esi
addl $-8,%esp
subl $8,%esp
fstpl (%esp)
call _func2
addl %esi,%eax
leal -40(%ebp),%esp
popl %ebx
popl %esi
leave
ret
.def _func2; .scl 2; .type 32; .endef
.def _func1; .scl 2; .type 32; .endef
Code produced by the 19990907 snapshot with the same options:
.file "test2.c"
.version "01.01"
gcc2_compiled.:
text
.align 16
globl func3
.type func3,@function
func3:
pushl %ebp
movl %esp, %ebp
subl $16, %esp
pushl %esi
pushl %ebx
subl $12, %esp
movl 8(%ebp), %ebx
pushl %ebx
call func1
subl $8, %esp
movl %eax, %esi
pushl %ebx
fildl (%esp)
popl %ebx
leal -8(%esp), %esp
fstpl (%esp)
call func2
leal -24(%ebp), %esp
leal (%eax,%esi), %eax
popl %ebx
popl %esi
movl %ebp, %esp
popl %ebp
ret
Lfe1:
.size func3,.Lfe1-func3
.ident "GCC: (GNU) 2.96 19990907 (experimental)"
The inefficiencies I have noticed are :
1) When converting an integer to a floating point number, GCC prefers to
push an integer register to the tack and then load the value into a
floating point unit, rather than directly loading the value from memory
where the parameter a is stored. That would avoid the costy memory
accesses generated by pushl %ebx and popl %ebx
2) GCC does not combine instructions incrementing or decrementing the
stach pointer. For the GCC 2.95 code for example, the instructions "addl
$-8,%esp" and "subl $8,%esp" could be combined to a single instruction
decrementing the stack pointer by 16.
Same (though less obvious) for the GCC 2.96 code where "subl $8, %esp" and
"leal -8(%esp), %esp" could be combined.
BTW which of the instructions "subl $8, %esp" or "leal -8(%esp), %esp" is
more efficient ? Could the compiler use the same one in all cases ?
Marcel Cox
next reply other threads:[~1999-09-09 11:37 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
1999-09-09 11:37 ¨Marcel Cox [this message]
1999-09-30 18:02 ` ¨Marcel Cox
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=VA.000000bc.009ceec2@cimetmc \
--to=marcel.cox@ibm.net \
--cc=gcc@gcc.gnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).