From mboxrd@z Thu Jan 1 00:00:00 1970 From: Thomas Koenig To: egcs@cygnus.com Subject: Register allocation Date: Tue, 14 Oct 1997 05:51:00 -0000 Message-id: <199710141250.NAA03152@mvmap66.ciw.uni-karlsruhe.de> X-SW-Source: 1997-10/msg00533.html egcs 971008 with haifa enabled generates two unnecessary register moves for the function for Linux i386-glibc1: typedef struct pt { int x; int y; struct pt *n; } pt; #define WALL(a) ((a)->n == 0) #define SQR(a) ((double) (a)* (double) (a)) double e_point_point(pt *a, pt *b) { double res; res = SQR(a->x - b->x) + SQR(a->y - b->y); if (WALL(a) || WALL(b)) { res *= 4; } return res; } Here's the assembly output: .file "point.c" .version "01.01" / GNU C version egcs-2.90.12 971008 (gcc2-970802 experimental) (i586-pc-linux-gnulibc1) compiled by GNU C version egcs-2.90.12 971008 (gcc2-970802 experimental). / options passed: -O6 -fomit-frame-pointer -fno-exceptions / options enabled: -fdefer-pop -fomit-frame-pointer -fcse-follow-jumps / -fcse-skip-blocks -fexpensive-optimizations -fthread-jumps / -fstrength-reduce -fpeephole -fforce-mem -ffunction-cse / -finline-functions -finline -fkeep-static-consts -fcaller-saves / -fpcc-struct-return -frerun-cse-after-loop -fschedule-insns2 / -fsched-interblock -fsched-spec -fcommon -fverbose-asm -fgnu-linker / -fregmove -falias-check -fargument-alias -m80387 -mhard-float / -mno-soft-float -mieee-fp -mfp-ret-in-387 -mschedule-prologue / -mcpu=pentium -march=pentium gcc2_compiled.: .section .rodata .align 4 .LC1: .long 0x0,0x40100000 .text .align 4 .globl e_point_point .type e_point_point,@function e_point_point: pushl %ebx movl 8(%esp),%edx movl 12(%esp),%ecx movl (%edx),%ebx movl (%ecx),%eax fmul %st(0),%st subl %eax,%ebx movl %ebx,%eax ^^^^^^^^^^^^^^ pushl %eax fildl (%esp) addl $4,%esp fmul %st(0),%st faddp %st,%st(1) cmpl $0,8(%edx) je .L3 cmpl $0,8(%ecx) jne .L2 .L3: fldl .LC1 fmulp %st,%st(1) .L2: popl %ebx ret .Lfe1: .size e_point_point,.Lfe1-e_point_point .ident "GCC: (GNU) egcs-2.90.12 971008 (gcc2-970802 experimental)" Both of these register moves are unnecessary, and when I replace the first one with the more obvious subl %eax,%ebx pushl %ebx movl 4(%edx),%ebx fildl (%esp) the resulting code is indeed faster. -- Thomas Koenig, Thomas.Koenig@ciw.uni-karlsruhe.de, ig25@dkauni2.bitnet. The joy of engineering is to find a straight line on a double logarithmic diagram.