* m68k - GCC 4.4.0 generates not so good code from asm inline @ 2009-07-29 11:41 ami_stuff 2009-07-29 16:57 ` ami_stuff 0 siblings, 1 reply; 3+ messages in thread From: ami_stuff @ 2009-07-29 11:41 UTC (permalink / raw) To: gcc Hi, Here is a C source code which I compiled with GCC 3.4.0 and GCC 4.4.0. GCC 3.4.0 output looks a lot better. #include <stdio.h> #include <stdint.h> #define umul_ppmm(xh, xl, a, b) \ __asm__ ("| Inlined umul_ppmm\n" \ " move.l %0,%/d5\n" \ " move.l %1,%/d4\n" \ " moveq #16,%/d3\n" \ " move.l %0,%/d2\n" \ " mulu %1,%0\n" \ " lsr.l %/d3,%/d4\n" \ " lsr.l %/d3,%/d5\n" \ " mulu %/d4,%/d2\n" \ " mulu %/d5,%1\n" \ " mulu %/d5,%/d4\n" \ " move.l %/d2,%/d5\n" \ " lsr.l %/d3,%/d2\n" \ " add.w %1,%/d5\n" \ " addx.l %/d2,%/d4\n" \ " lsl.l %/d3,%/d5\n" \ " lsr.l %/d3,%1\n" \ " add.l %/d5,%0\n" \ " addx.l %/d4,%1" \ : "=d" ((uint32_t) (xl)), "=d" ((uint32_t) (xh)) \ : "0" ((uint32_t) (a)), "1" ((uint32_t) (b)) \ : "d2", "d3", "d4", "d5") inline int64_t MUL64(int a, int b) { uint32_t au = a; uint32_t bu = b; uint32_t resh, resl; uint64_t res; umul_ppmm(resh, resl, au, bu); if (a < 0) resh -= bu; if (b < 0) resh -= au; res = ((uint64_t)resh << 32) | resl; return res; } GCC 4.4.0 asm output: #NO_APP .text .even .globl _MUL64 _MUL64: movem.l #16128,-(sp) move.l 28(sp),d0 move.l 32(sp),a0 move.l d0,d6 move.l a0,d1 #APP ;# 36 "mul642.c" 1 | Inlined umul_ppmm move.l d6,d5 move.l d1,d4 moveq #16,d3 move.l d6,d2 mulu d1,d6 lsr.l d3,d4 lsr.l d3,d5 mulu d4,d2 mulu d5,d1 mulu d5,d4 move.l d2,d5 lsr.l d3,d2 add.w d1,d5 addx.l d2,d4 lsl.l d3,d5 lsr.l d3,d1 add.l d5,d6 addx.l d4,d1 #NO_APP tst.l d0 jlt L6 tst.l a0 jlt L7 L3: move.l d1,d2 clr.l d3 move.l d2,d0 move.l d3,d1 or.l d6,d1 move.l d0,d6 move.l d1,d7 move.l d7,d1 movem.l (sp)+,#252 rts L7: sub.l d0,d1 move.l d1,d2 clr.l d3 move.l d2,d0 move.l d3,d1 or.l d6,d1 move.l d0,d6 move.l d1,d7 move.l d7,d1 movem.l (sp)+,#252 rts L6: sub.l a0,d1 tst.l a0 jge L3 jra L7 GCC 3.4.0 asm output: #NO_APP .text .even .globl _MUL64 _MUL64: moveml #0x3f00,sp@- movel sp@(28),d1 movel sp@(32),d0 movel d1,d7 movel d0,d6 #APP | Inlined umul_ppmm move.l d7,d5 move.l d6,d4 moveq #16,d3 move.l d7,d2 mulu d6,d7 lsr.l d3,d4 lsr.l d3,d5 mulu d4,d2 mulu d5,d6 mulu d5,d4 move.l d2,d5 lsr.l d3,d2 add.w d6,d5 addx.l d2,d4 lsl.l d3,d5 lsr.l d3,d6 add.l d5,d7 addx.l d4,d6 #NO_APP tstl d1 jlt L5 tstl d0 jge L3 jra L6 .even L5: subl d0,d6 tstl d0 jge L3 .even L6: subl d1,d6 .even L3: movel d6,d0 clrl d1 orl d7,d1 moveml sp@+,#0xfc rts Is it a regression? Regards ^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: m68k - GCC 4.4.0 generates not so good code from asm inline 2009-07-29 11:41 m68k - GCC 4.4.0 generates not so good code from asm inline ami_stuff @ 2009-07-29 16:57 ` ami_stuff 2009-07-29 17:12 ` Bernd Roesch 0 siblings, 1 reply; 3+ messages in thread From: ami_stuff @ 2009-07-29 16:57 UTC (permalink / raw) To: ami_stuff; +Cc: gcc When I use -O1 with GCC 4.4.0 (-m68060 -fomit-frame-pointer), I get better code. #include <stdio.h> #include <stdint.h> inline int64_t MUL64(int a, int b) { uint32_t resh, resl; uint32_t au = a; uint32_t bu = b; __asm__ ("move.l %0, d5\n\t" "move.l %1, d4\n\t" "moveq #16, d3\n\t" "move.l %0, d2\n\t" "mulu %1, %0\n\t" "lsr.l d3, d4\n\t" "lsr.l d3, d5\n\t" "mulu d4, d2\n\t" "mulu d5, %1\n\t" "mulu d5, d4\n\t" "move.l d2, d5\n\t" "lsr.l d3, d2\n\t" "add.w %1, d5\n\t" "addx.l d2, d4\n\t" "lsl.l d3, d5\n\t" "lsr.l d3, %1\n\t" "add.l d5, %0\n\t" "addx.l d4, %1\n\t" : "=d"(resl), "=d"(resh) : "0"(au), "1"(bu) : "d2", "d3", "d4", "d5"); if (a < 0) resh -= bu; if (b < 0) resh -= au; return ((uint64_t)resh << 32) | resl; } GCC 4.4.0 -O3: #NO_APP .text .even .globl _MUL64 _MUL64: movem.l #16128,-(sp) move.l 28(sp),d0 move.l 32(sp),a0 move.l d0,d6 move.l a0,d1 #APP ;# 11 "mul645.c" 1 move.l d6, d5 move.l d1, d4 moveq #16, d3 move.l d6, d2 mulu d1, d6 lsr.l d3, d4 lsr.l d3, d5 mulu d4, d2 mulu d5, d1 mulu d5, d4 move.l d2, d5 lsr.l d3, d2 add.w d1, d5 addx.l d2, d4 lsl.l d3, d5 lsr.l d3, d1 add.l d5, d6 addx.l d4, d1 #NO_APP tst.l d0 jlt L6 tst.l a0 jlt L7 L3: move.l d1,d2 clr.l d3 move.l d2,d0 move.l d3,d1 or.l d6,d1 move.l d0,d6 move.l d1,d7 move.l d7,d1 movem.l (sp)+,#252 rts L7: sub.l d0,d1 move.l d1,d2 clr.l d3 move.l d2,d0 move.l d3,d1 or.l d6,d1 move.l d0,d6 move.l d1,d7 move.l d7,d1 movem.l (sp)+,#252 rts L6: sub.l a0,d1 tst.l a0 jge L3 jra L7 GCC 4.4.0 -O2: #NO_APP .text .even .globl _MUL64 _MUL64: movem.l #16128,-(sp) move.l 28(sp),d0 move.l 32(sp),a0 move.l d0,d6 move.l a0,d1 #APP ;# 11 "mul645.c" 1 move.l d6, d5 move.l d1, d4 moveq #16, d3 move.l d6, d2 mulu d1, d6 lsr.l d3, d4 lsr.l d3, d5 mulu d4, d2 mulu d5, d1 mulu d5, d4 move.l d2, d5 lsr.l d3, d2 add.w d1, d5 addx.l d2, d4 lsl.l d3, d5 lsr.l d3, d1 add.l d5, d6 addx.l d4, d1 #NO_APP tst.l d0 jlt L6 tst.l a0 jlt L7 L3: move.l d1,d2 clr.l d3 move.l d2,d0 move.l d3,d1 or.l d6,d1 move.l d0,d6 move.l d1,d7 move.l d7,d1 movem.l (sp)+,#252 rts L7: sub.l d0,d1 move.l d1,d2 clr.l d3 move.l d2,d0 move.l d3,d1 or.l d6,d1 move.l d0,d6 move.l d1,d7 move.l d7,d1 movem.l (sp)+,#252 rts L6: sub.l a0,d1 tst.l a0 jge L3 jra L7 GCC 4.4.0 -O1: #NO_APP .text .even .globl _MUL64 _MUL64: movem.l #16176,-(sp) move.l 40(sp),d0 move.l 36(sp),a2 move.l a2,d7 move.l d0,d6 #APP ;# 11 "mul645.c" 1 move.l d7, d5 move.l d6, d4 moveq #16, d3 move.l d7, d2 mulu d6, d7 lsr.l d3, d4 lsr.l d3, d5 mulu d4, d2 mulu d5, d6 mulu d5, d4 move.l d2, d5 lsr.l d3, d2 add.w d6, d5 addx.l d2, d4 lsl.l d3, d5 lsr.l d3, d6 add.l d5, d7 addx.l d4, d6 #NO_APP tst.l a2 jge L2 sub.l d0,d6 L2: tst.l d0 jge L3 sub.l a2,d6 L3: move.l d6,d1 clr.l d2 or.l d7,d2 move.l d1,d0 move.l d2,d1 movem.l (sp)+,#3324 rts GCC 4.4.0 -O0: #NO_APP .text .even .globl _MUL64 _MUL64: lea (-16,sp),sp movem.l #16128,-(sp) move.l 44(sp),32(sp) move.l 48(sp),36(sp) move.l 32(sp),d1 move.l 36(sp),d0 #APP ;# 11 "mul645.c" 1 move.l d1, d5 move.l d0, d4 moveq #16, d3 move.l d1, d2 mulu d0, d1 lsr.l d3, d4 lsr.l d3, d5 mulu d4, d2 mulu d5, d0 mulu d5, d4 move.l d2, d5 lsr.l d3, d2 add.w d0, d5 addx.l d2, d4 lsl.l d3, d5 lsr.l d3, d0 add.l d5, d1 addx.l d4, d0 #NO_APP move.l d1,28(sp) move.l d0,24(sp) tst.l 44(sp) jge L2 move.l 36(sp),d0 sub.l d0,24(sp) L2: tst.l 48(sp) jge L3 move.l 32(sp),d2 sub.l d2,24(sp) L3: move.l 24(sp),d7 clr.l d6 move.l d7,d0 clr.l d1 move.l 28(sp),a1 lea 0.w,a0 move.l a0,d2 move.l a1,d3 or.l d2,d0 or.l d3,d1 movem.l (sp)+,#252 lea (16,sp),sp rts Regards ^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: m68k - GCC 4.4.0 generates not so good code from asm inline 2009-07-29 16:57 ` ami_stuff @ 2009-07-29 17:12 ` Bernd Roesch 0 siblings, 0 replies; 3+ messages in thread From: Bernd Roesch @ 2009-07-29 17:12 UTC (permalink / raw) To: ami_stuff; +Cc: gcc Hello On 29.07.09, you wrote: if you have a account you can report that as a Bug. gcc4 have the advantage its possible to switch in source optimizer on or off, but how it work, i dont know. > When I use -O1 with GCC 4.4.0 (-m68060 -fomit-frame-pointer), I get better > code. > > #include <stdio.h> > #include <stdint.h> > > inline int64_t MUL64(int a, int b) > { > > uint32_t resh, resl; > uint32_t au = a; > uint32_t bu = b; > > __asm__ ("move.l %0, d5\n\t" > "move.l %1, d4\n\t" > "moveq #16, d3\n\t" > "move.l %0, d2\n\t" > "mulu %1, %0\n\t" > "lsr.l d3, d4\n\t" > "lsr.l d3, d5\n\t" > "mulu d4, d2\n\t" > "mulu d5, %1\n\t" > "mulu d5, d4\n\t" > "move.l d2, d5\n\t" > "lsr.l d3, d2\n\t" > "add.w %1, d5\n\t" > "addx.l d2, d4\n\t" > "lsl.l d3, d5\n\t" > "lsr.l d3, %1\n\t" > "add.l d5, %0\n\t" > "addx.l d4, %1\n\t" > : "=d"(resl), "=d"(resh) > : "0"(au), "1"(bu) > : "d2", "d3", "d4", "d5"); > > if (a < 0) > resh -= bu; > if (b < 0) > resh -= au; > > return ((uint64_t)resh << 32) | resl; > } > > GCC 4.4.0 -O3: > > #NO_APP > .text > .even > .globl _MUL64 > _MUL64: > movem.l #16128,-(sp) > move.l 28(sp),d0 > move.l 32(sp),a0 > move.l d0,d6 > move.l a0,d1 > #APP > ;# 11 "mul645.c" 1 > move.l d6, d5 > move.l d1, d4 > moveq #16, d3 > move.l d6, d2 > mulu d1, d6 > lsr.l d3, d4 > lsr.l d3, d5 > mulu d4, d2 > mulu d5, d1 > mulu d5, d4 > move.l d2, d5 > lsr.l d3, d2 > add.w d1, d5 > addx.l d2, d4 > lsl.l d3, d5 > lsr.l d3, d1 > add.l d5, d6 > addx.l d4, d1 > > #NO_APP > tst.l d0 > jlt L6 > tst.l a0 > jlt L7 > L3: > move.l d1,d2 > clr.l d3 > move.l d2,d0 > move.l d3,d1 > or.l d6,d1 > move.l d0,d6 > move.l d1,d7 > move.l d7,d1 > movem.l (sp)+,#252 > rts > L7: > sub.l d0,d1 > move.l d1,d2 > clr.l d3 > move.l d2,d0 > move.l d3,d1 > or.l d6,d1 > move.l d0,d6 > move.l d1,d7 > move.l d7,d1 > movem.l (sp)+,#252 > rts > L6: > sub.l a0,d1 > tst.l a0 > jge L3 > jra L7 > > GCC 4.4.0 -O2: > > #NO_APP > .text > .even > .globl _MUL64 > _MUL64: > movem.l #16128,-(sp) > move.l 28(sp),d0 > move.l 32(sp),a0 > move.l d0,d6 > move.l a0,d1 > #APP > ;# 11 "mul645.c" 1 > move.l d6, d5 > move.l d1, d4 > moveq #16, d3 > move.l d6, d2 > mulu d1, d6 > lsr.l d3, d4 > lsr.l d3, d5 > mulu d4, d2 > mulu d5, d1 > mulu d5, d4 > move.l d2, d5 > lsr.l d3, d2 > add.w d1, d5 > addx.l d2, d4 > lsl.l d3, d5 > lsr.l d3, d1 > add.l d5, d6 > addx.l d4, d1 > > #NO_APP > tst.l d0 > jlt L6 > tst.l a0 > jlt L7 > L3: > move.l d1,d2 > clr.l d3 > move.l d2,d0 > move.l d3,d1 > or.l d6,d1 > move.l d0,d6 > move.l d1,d7 > move.l d7,d1 > movem.l (sp)+,#252 > rts > L7: > sub.l d0,d1 > move.l d1,d2 > clr.l d3 > move.l d2,d0 > move.l d3,d1 > or.l d6,d1 > move.l d0,d6 > move.l d1,d7 > move.l d7,d1 > movem.l (sp)+,#252 > rts > L6: > sub.l a0,d1 > tst.l a0 > jge L3 > jra L7 > > GCC 4.4.0 -O1: > > #NO_APP > .text > .even > .globl _MUL64 > _MUL64: > movem.l #16176,-(sp) > move.l 40(sp),d0 > move.l 36(sp),a2 > move.l a2,d7 > move.l d0,d6 > #APP > ;# 11 "mul645.c" 1 > move.l d7, d5 > move.l d6, d4 > moveq #16, d3 > move.l d7, d2 > mulu d6, d7 > lsr.l d3, d4 > lsr.l d3, d5 > mulu d4, d2 > mulu d5, d6 > mulu d5, d4 > move.l d2, d5 > lsr.l d3, d2 > add.w d6, d5 > addx.l d2, d4 > lsl.l d3, d5 > lsr.l d3, d6 > add.l d5, d7 > addx.l d4, d6 > > #NO_APP > tst.l a2 > jge L2 > sub.l d0,d6 > L2: > tst.l d0 > jge L3 > sub.l a2,d6 > L3: > move.l d6,d1 > clr.l d2 > or.l d7,d2 > move.l d1,d0 > move.l d2,d1 > movem.l (sp)+,#3324 > rts > > GCC 4.4.0 -O0: > > #NO_APP > .text > .even > .globl _MUL64 > _MUL64: > lea (-16,sp),sp > movem.l #16128,-(sp) > move.l 44(sp),32(sp) > move.l 48(sp),36(sp) > move.l 32(sp),d1 > move.l 36(sp),d0 > #APP > ;# 11 "mul645.c" 1 > move.l d1, d5 > move.l d0, d4 > moveq #16, d3 > move.l d1, d2 > mulu d0, d1 > lsr.l d3, d4 > lsr.l d3, d5 > mulu d4, d2 > mulu d5, d0 > mulu d5, d4 > move.l d2, d5 > lsr.l d3, d2 > add.w d0, d5 > addx.l d2, d4 > lsl.l d3, d5 > lsr.l d3, d0 > add.l d5, d1 > addx.l d4, d0 > > #NO_APP > move.l d1,28(sp) > move.l d0,24(sp) > tst.l 44(sp) > jge L2 > move.l 36(sp),d0 > sub.l d0,24(sp) > L2: > tst.l 48(sp) > jge L3 > move.l 32(sp),d2 > sub.l d2,24(sp) > L3: > move.l 24(sp),d7 > clr.l d6 > move.l d7,d0 > clr.l d1 > move.l 28(sp),a1 > lea 0.w,a0 > move.l a0,d2 > move.l a1,d3 > or.l d2,d0 > or.l d3,d1 > movem.l (sp)+,#252 > lea (16,sp),sp > rts > > Regards > Regards ^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2009-07-29 17:12 UTC | newest] Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2009-07-29 11:41 m68k - GCC 4.4.0 generates not so good code from asm inline ami_stuff 2009-07-29 16:57 ` ami_stuff 2009-07-29 17:12 ` Bernd Roesch
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).