public inbox for gcc@gcc.gnu.org
 help / color / mirror / Atom feed
* egcs 1.0.2 success and problem
@ 1998-04-05 21:29 Sumner Reuben
  1998-10-31  5:02 ` Jeffrey A Law
  0 siblings, 1 reply; 2+ messages in thread
From: Sumner Reuben @ 1998-04-05 21:29 UTC (permalink / raw)
  To: egcs

I successfully compiled egcs 1.0.2 on RedHat 5.0 (no surprise I'm sure)
I haven't included config.guess if you want it just ask.

I noticed an optimization problem with gcc 2.7.2 and it is still present
in egcs 1.0.2 (I specifically wanted to test this problem).  I have pared
down the sources a fair bit, if I need to do more I will try.

Here is the C source

typedef unsigned uword;
typedef unsigned long long udword;

#define mulmod(a,b,c) ((uword)(((udword)a*(udword)b)%(udword)c))

typedef struct {
uword p, count, *x, *den;
} server_struct;

int
transact_server(server_struct *server, uword client_no, uword cP)
{
   int i;

   for (i = 0; i < server->count; i += 1)
      if (server->x[i] == client_no) return 2;

   server->den[server->count] = 1;
   for (i = 0; i < server->count; i += 1) {
      cP = mulmod(cP, server->p - server->x[i], server->p);
      server->den[server->count] =
                  mulmod(server->den[server->count],client_no,server->p);
   }
   return 0;
}

Here is the generated assembly for a pentium.

	.file	"foo.c"
	.version	"01.01"
/ GNU C version egcs-2.90.27 980315 (egcs-1.0.2 release) (i586-pc-linux-gnu) compiled by GNU C version egcs-2.90.27 980315 (egcs-1.0.2 release).
/ options passed:  -O9 -Wall
/ options enabled:  -fdefer-pop -fcse-follow-jumps -fcse-skip-blocks
/ -fexpensive-optimizations -fthread-jumps -fstrength-reduce -fpeephole
/ -fforce-mem -ffunction-cse -finline-functions -finline
/ -fkeep-static-consts -fcaller-saves -fpcc-struct-return
/ -frerun-cse-after-loop -frerun-loop-opt -fschedule-insns2 -fcommon
/ -fverbose-asm -fgnu-linker -fregmove -falias-check -fargument-alias
/ -m80387 -mhard-float -mno-soft-float -mieee-fp -mfp-ret-in-387
/ -mschedule-prologue -mcpu=pentium -march=pentium

gcc2_compiled.:
.globl __umoddi3
.text
	.align 4
.globl transact_server
	.type	 transact_server,@function
transact_server:
	pushl %ebp
	movl %esp,%ebp
	subl $28,%esp
	pushl %edi
	pushl %esi
	pushl %ebx
	movl 8(%ebp),%eax
	xorl %edi,%edi
	movl 4(%eax),%ecx
	cmpl %ecx,%edi
	jae .L3
	movl %ecx,%ebx
	movl 8(%eax),%ecx
	.align 4
.L5:
	movl 12(%ebp),%edx        <-- why is this in the loop!!!
	cmpl %edx,(%ecx)
	jne .L4
	movl $2,%eax
	jmp .L13
	.align 4
.L4:
	addl $4,%ecx
	incl %edi
	cmpl %ebx,%edi
	jb .L5
.L3:
	movl 8(%ebp),%eax
	movl 8(%ebp),%edx
	movl 4(%eax),%eax
	movl 12(%edx),%edx
	movl $1,(%edx,%eax,4)
	movl 8(%ebp),%eax
	xorl %edi,%edi
	cmpl %edi,4(%eax)
	jbe .L9
	.align 4
.L11:
	movl 8(%ebp),%edx
	movl (%edx),%edx
	movl %edx,%eax
	xorl %edx,%edx
	movl %eax,-16(%ebp)
	movl %edx,-12(%ebp)
	movl 8(%ebp),%edx
	movl 4(%edx),%esi
	movl 12(%edx),%ebx
	movl 12(%ebp),%eax
	movl (%ebx,%esi,4),%ecx
	mull %ecx
	movl %eax,-8(%ebp)
	movl %edx,-4(%ebp)
	pushl -12(%ebp)
	pushl -16(%ebp)
	pushl -4(%ebp)
	pushl -8(%ebp)
	call __umoddi3
	movl %eax,-16(%ebp)
	movl %edx,-12(%ebp)
	addl $16,%esp
	movl -16(%ebp),%edx
	movl 8(%ebp),%eax
	incl %edi
	movl %edx,(%ebx,%esi,4)
	cmpl %edi,4(%eax)
	ja .L11
.L9:
	xorl %eax,%eax
.L13:
	leal -40(%ebp),%esp
	popl %ebx
	popl %esi
	popl %edi
	movl %ebp,%esp
	popl %ebp
	ret
.Lfe1:
	.size	 transact_server,.Lfe1-transact_server
	.ident	"GCC: (GNU) egcs-2.90.27 980315 (egcs-1.0.2 release)"

If you take out some more the junk in there then it does the right thing.
It also seems to me like the jumps could be optimized quite a bit (my
recollection is that branch not taken is the way the CPU is optimised so
the jne L4 should be reversed.

I also noticed that in the i386 machine description divide is

(define_insn "divqi3"
  [(set (match_operand:QI 0 "register_operand" "=a")
        (div:QI (match_operand:HI 1 "register_operand" "0")
                (match_operand:QI 2 "nonimmediate_operand" "qm")))]
  ""
  "idiv%B0 %2")
 
the "qm" doesn't allow for esi or edi which are valid.  Is this intentional?
(this is also the same in old gcc code of course)

Thanks,

Reuben

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: egcs 1.0.2 success and problem
  1998-04-05 21:29 egcs 1.0.2 success and problem Sumner Reuben
@ 1998-10-31  5:02 ` Jeffrey A Law
  0 siblings, 0 replies; 2+ messages in thread
From: Jeffrey A Law @ 1998-10-31  5:02 UTC (permalink / raw)
  To: Sumner Reuben; +Cc: egcs

  In message <199804052121.AAA00146@hadar.wisdom.weizmann.ac.il>you write:
  > I successfully compiled egcs 1.0.2 on RedHat 5.0 (no surprise I'm sure)
  > I haven't included config.guess if you want it just ask.
  > 
  > I noticed an optimization problem with gcc 2.7.2 and it is still present
  > in egcs 1.0.2 (I specifically wanted to test this problem).  I have pared
  > down the sources a fair bit, if I need to do more I will try.
I believe if you try the current sources in the egcs CVS tree you'll find that
the second (and more important) loop in your testcase is optimized noticably
better now.


Here's what you were getting:

  > .L11:
  > 	movl 8(%ebp),%edx
  > 	movl (%edx),%edx
  > 	movl %edx,%eax
  > 	xorl %edx,%edx
  > 	movl %eax,-16(%ebp)
  > 	movl %edx,-12(%ebp)
  > 	movl 8(%ebp),%edx
  > 	movl 4(%edx),%esi
  > 	movl 12(%edx),%ebx
  > 	movl 12(%ebp),%eax
  > 	movl (%ebx,%esi,4),%ecx
  > 	mull %ecx
  > 	movl %eax,-8(%ebp)
  > 	movl %edx,-4(%ebp)
  > 	pushl -12(%ebp)
  > 	pushl -16(%ebp)
  > 	pushl -4(%ebp)
  > 	pushl -8(%ebp)
  > 	call __umoddi3
  > 	movl %eax,-16(%ebp)
  > 	movl %edx,-12(%ebp)
  > 	addl $16,%esp
  > 	movl -16(%ebp),%edx
  > 	movl 8(%ebp),%eax
  > 	incl %edi
  > 	movl %edx,(%ebx,%esi,4)
  > 	cmpl %edi,4(%eax)
  > 	ja .L11

Here's what I get with the current sources:

.L11:
        movl -4(%ebp),%ebx
        movl 12(%ebp),%eax
        movl (%ebx,%edi,4),%ecx
        mull %ecx
        movl 8(%ebp),%ecx
        movl %eax,-12(%ebp)
        movl %edx,-8(%ebp)
        movl (%ecx),%eax
        xorl %edx,%edx
        pushl %edx
        pushl %eax
        movl -12(%ebp),%eax
        movl -8(%ebp),%edx
        pushl %edx
        pushl %eax
        call __umoddi3
        movl 8(%ebp),%edx
        addl $16,%esp
        movl %eax,(%ebx,%edi,4)
        incl %esi
        movl 4(%edx),%eax
        movl %eax,%edi
        cmpl %edi,%esi
        jb .L11


^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~1998-10-31  5:02 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
1998-04-05 21:29 egcs 1.0.2 success and problem Sumner Reuben
1998-10-31  5:02 ` Jeffrey A Law

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).