From mboxrd@z Thu Jan 1 00:00:00 1970 From: ¨Marcel Cox To: gcc Subject: ix86 backend: opportunities for optimizations ? Date: Thu, 30 Sep 1999 18:02:00 -0000 Message-ID: X-SW-Source: 1999-09n/msg00367.html Message-ID: <19990930180200.Pootb4gHhfmmyzAjvyHeJkfKfB9uqbBsCLJJvlg8lrw@z> I have written the following test program and I have noticed that the resulting code produced by gcc is rather inefficient in a number of points: Program: int func1(int); int func2(double); int func3(int a) { return func1(a)+func2(a); } Code produced by GCC 2.95 with the options -O3 -S -mpentiumpro: .file "test2.c" gcc2_compiled.: ___gnu_compiled_c: text .align 4 globl _func3 .def _func3; .scl 2; .type 32; .endef _func3: pushl %ebp movl %esp,%ebp subl $32,%esp pushl %esi pushl %ebx movl 8(%ebp),%ebx addl $-12,%esp pushl %ebx call _func1 movl %ebx,-4(%ebp) fildl -4(%ebp) movl %eax,%esi addl $-8,%esp subl $8,%esp fstpl (%esp) call _func2 addl %esi,%eax leal -40(%ebp),%esp popl %ebx popl %esi leave ret .def _func2; .scl 2; .type 32; .endef .def _func1; .scl 2; .type 32; .endef Code produced by the 19990907 snapshot with the same options: .file "test2.c" .version "01.01" gcc2_compiled.: text .align 16 globl func3 .type func3,@function func3: pushl %ebp movl %esp, %ebp subl $16, %esp pushl %esi pushl %ebx subl $12, %esp movl 8(%ebp), %ebx pushl %ebx call func1 subl $8, %esp movl %eax, %esi pushl %ebx fildl (%esp) popl %ebx leal -8(%esp), %esp fstpl (%esp) call func2 leal -24(%ebp), %esp leal (%eax,%esi), %eax popl %ebx popl %esi movl %ebp, %esp popl %ebp ret Lfe1: .size func3,.Lfe1-func3 .ident "GCC: (GNU) 2.96 19990907 (experimental)" The inefficiencies I have noticed are : 1) When converting an integer to a floating point number, GCC prefers to push an integer register to the tack and then load the value into a floating point unit, rather than directly loading the value from memory where the parameter a is stored. That would avoid the costy memory accesses generated by pushl %ebx and popl %ebx 2) GCC does not combine instructions incrementing or decrementing the stach pointer. For the GCC 2.95 code for example, the instructions "addl $-8,%esp" and "subl $8,%esp" could be combined to a single instruction decrementing the stack pointer by 16. Same (though less obvious) for the GCC 2.96 code where "subl $8, %esp" and "leal -8(%esp), %esp" could be combined. BTW which of the instructions "subl $8, %esp" or "leal -8(%esp), %esp" is more efficient ? Could the compiler use the same one in all cases ? Marcel Cox