From mboxrd@z Thu Jan  1 00:00:00 1970
From: Thomas Koenig <ig25@mvmap66.ciw.uni-karlsruhe.de>
To: egcs@cygnus.com
Subject: Register allocation
Date: Tue, 14 Oct 1997 05:51:00 -0000
Message-id: <199710141250.NAA03152@mvmap66.ciw.uni-karlsruhe.de>
X-SW-Source: 1997-10/msg00533.html

egcs 971008 with haifa enabled generates two unnecessary register
moves for the function for Linux i386-glibc1:

typedef struct pt
{
    int x;
    int y;
    struct pt *n;
} pt;

#define WALL(a) ((a)->n == 0)
#define SQR(a) ((double) (a)* (double) (a))

double e_point_point(pt *a, pt *b)
{
    double res;
    res = SQR(a->x - b->x) + SQR(a->y - b->y);
    if (WALL(a) || WALL(b)) {
	res *= 4;
    }
    return res;
}

Here's the assembly output:

	.file	"point.c"
	.version	"01.01"
/ GNU C version egcs-2.90.12 971008 (gcc2-970802 experimental) (i586-pc-linux-gnulibc1) compiled by GNU C version egcs-2.90.12 971008 (gcc2-970802 experimental).
/ options passed:  -O6 -fomit-frame-pointer -fno-exceptions
/ options enabled:  -fdefer-pop -fomit-frame-pointer -fcse-follow-jumps
/ -fcse-skip-blocks -fexpensive-optimizations -fthread-jumps
/ -fstrength-reduce -fpeephole -fforce-mem -ffunction-cse
/ -finline-functions -finline -fkeep-static-consts -fcaller-saves
/ -fpcc-struct-return -frerun-cse-after-loop -fschedule-insns2
/ -fsched-interblock -fsched-spec -fcommon -fverbose-asm -fgnu-linker
/ -fregmove -falias-check -fargument-alias -m80387 -mhard-float
/ -mno-soft-float -mieee-fp -mfp-ret-in-387 -mschedule-prologue
/ -mcpu=pentium -march=pentium

gcc2_compiled.:
.section	.rodata
	.align 4
.LC1:
	.long 0x0,0x40100000
.text
	.align 4
.globl e_point_point
	.type	 e_point_point,@function
e_point_point:
	pushl %ebx
	movl 8(%esp),%edx
	movl 12(%esp),%ecx
	movl (%edx),%ebx
	movl (%ecx),%eax
	fmul %st(0),%st
	subl %eax,%ebx
	movl %ebx,%eax
        ^^^^^^^^^^^^^^
	pushl %eax
	fildl (%esp)
	addl $4,%esp
	fmul %st(0),%st
	faddp %st,%st(1)
	cmpl $0,8(%edx)
	je .L3
	cmpl $0,8(%ecx)
	jne .L2
.L3:
	fldl .LC1
	fmulp %st,%st(1)
.L2:
	popl %ebx
	ret
.Lfe1:
	.size	 e_point_point,.Lfe1-e_point_point
	.ident	"GCC: (GNU) egcs-2.90.12 971008 (gcc2-970802 experimental)"

Both of these register moves are unnecessary, and when I replace the
first one with the more obvious

	subl %eax,%ebx
	pushl %ebx
	movl 4(%edx),%ebx
	fildl (%esp)

the resulting code is indeed faster.
-- 
Thomas Koenig, Thomas.Koenig@ciw.uni-karlsruhe.de, ig25@dkauni2.bitnet.
The joy of engineering is to find a straight line on a double
logarithmic diagram.