public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed
* [Bug c/50223] New: AVRGCC - dont clear r26 and r27.....its a (small) waste of CPU cycles.
@ 2011-08-29  3:01 NickParker at Eaton dot com
  2011-08-31 20:48 ` [Bug target/50223] " gjl at gcc dot gnu.org
  2011-09-17 19:48 ` gjl at gcc dot gnu.org
  0 siblings, 2 replies; 3+ messages in thread
From: NickParker at Eaton dot com @ 2011-08-29  3:01 UTC (permalink / raw)
  To: gcc-bugs

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=50223

             Bug #: 50223
           Summary: AVRGCC - dont clear r26 and r27.....its a (small)
                    waste of CPU cycles.
    Classification: Unclassified
           Product: gcc
           Version: unknown
            Status: UNCONFIRMED
          Severity: enhancement
          Priority: P3
         Component: c
        AssignedTo: unassigned@gcc.gnu.org
        ReportedBy: NickParker@Eaton.com
              Host: PC Windows XP
            Target: AVR Mega 128
             Build: avr-gcc (WinAVR 20100110) 4.3.3


Dont clear r26 and r27.....its a (small) waste of CPU cycles.
Regards, Nick.


This function normalises a 32bit unsigned integer and returns
the number of shifts.


uint8_t ldgZeroCntNormU32(uint32_t * x)
{
  uint8_t zCount=0;
  uint8_t shft;
  uint32_t quad;
  quad=*x;

  while (!(uint8_t)(quad >> 24))
  {
    zCount += 8;
    quad <<=8;
  }
  shft = pgm_read_byte(&leadingZeros[(uint8_t)(quad >> 24)]);
  *x = quad << shft;
  return (zCount + shft);
}


       .file   "divu16u16.c"
   2                __SREG__ = 0x3f
   3                __SP_H__ = 0x3e
   4                __SP_L__ = 0x3d
   5                __CCP__  = 0x34
   6                __tmp_reg__ = 0
   7                __zero_reg__ = 1
  15                .Ltext0:
  16                .global ldgZeroCntNormU32
  18                ldgZeroCntNormU32:
  19                .LFB12:
  20                .LM1:
  21                .LVL0:
  22 0000 CF93              push r28
  23 0002 DF93              push r29
  24                /* prologue: function */
  25                /* frame size = 0 */
  26 0004 EC01              movw r28,r24
  27                .LM2:
  28 0006 2881              ld r18,Y
  29 0008 3981              ldd r19,Y+1
  30 000a 4A81              ldd r20,Y+2
  31 000c 5B81              ldd r21,Y+3
  32                .LVL1:
  33                .LM3:
  34 000e 852F              mov r24,r21
  35 0010 9927              clr r25
  36 0012 AA27              clr r26
  37 0014 BB27              clr r27
  38                .LVL2:
  39 0016 E82F              mov r30,r24
  40 0018 8823              tst r24
  41 001a 01F4              brne .L8
  42 001c 60E0              ldi r22,lo8(0)
  43                .LVL3:
  44                .L4:
  45                .LM4:
  46 001e 685F              subi r22,lo8(-(8))
  47                .LM5:
  48 0020 542F              mov r21,r20
  49 0022 432F              mov r20,r19
  50 0024 322F              mov r19,r18
  51 0026 2227              clr r18
  52                .LM6:
  53 0028 852F              mov r24,r21
  54 002a 9927              clr r25
  55 002c AA27              clr r26
  56 002e BB27              clr r27
  57 0030 E82F              mov r30,r24
  58 0032 8823              tst r24
  59 0034 01F0              breq .L4
  60                .L3:
  61                .LBB2:
  62                .LM7:
  63 0036 F0E0              ldi r31,lo8(0)
  64 0038 E050              subi r30,lo8(-(leadingZeros))
  65 003a F040              sbci r31,hi8(-(leadingZeros))
  66                /* #APP */
  67                 ;  111 "divu16u16.c" 1
  68 003c E491              lpm r30, Z
  69                    
  70                 ;  0 "" 2
  71                .LVL4:
  72                /* #NOAPP */
  73                .LBE2:
  74                .LM8:
  75 003e 0E2E              mov r0,r30
  76 0040 00C0              rjmp 2f
  77 0042 220F          1:  lsl r18
  78 0044 331F              rol r19
  79 0046 441F              rol r20
  80 0048 551F              rol r21
  81 004a 0A94          2:  dec r0
  82 004c 02F4              brpl 1b
  83 004e 2883              st Y,r18
  84 0050 3983              std Y+1,r19
  85 0052 4A83              std Y+2,r20
  86 0054 5B83              std Y+3,r21
  87                .LM9:
  88 0056 8E2F              mov r24,r30
  89 0058 860F              add r24,r22
  90                /* epilogue start */
  91 005a DF91              pop r29
  92 005c CF91              pop r28
  93                .LVL5:
  94 005e 0895              ret


^ permalink raw reply	[flat|nested] 3+ messages in thread

* [Bug target/50223] AVRGCC - dont clear r26 and r27.....its a (small) waste of CPU cycles.
  2011-08-29  3:01 [Bug c/50223] New: AVRGCC - dont clear r26 and r27.....its a (small) waste of CPU cycles NickParker at Eaton dot com
@ 2011-08-31 20:48 ` gjl at gcc dot gnu.org
  2011-09-17 19:48 ` gjl at gcc dot gnu.org
  1 sibling, 0 replies; 3+ messages in thread
From: gjl at gcc dot gnu.org @ 2011-08-31 20:48 UTC (permalink / raw)
  To: gcc-bugs

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=50223

Georg-Johann Lay <gjl at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
           Keywords|                            |missed-optimization
                 CC|                            |gjl at gcc dot gnu.org

--- Comment #1 from Georg-Johann Lay <gjl at gcc dot gnu.org> 2011-08-31 20:23:25 UTC ---
This is just a missed optimization. Thus, you won't see a fix before gcc 4.7.

For gcc 4.7, notice that there are optimized versions of builtins that perform
your arithmetic like

__builtin_clz/clzl/clzll (count leading zeros)
__builtin_ctz/ctzl/ctzll (count trailing zeros)
__builtin_ffs/ffsl/ffsll (find first (lowest) set bit)
...


^ permalink raw reply	[flat|nested] 3+ messages in thread

* [Bug target/50223] AVRGCC - dont clear r26 and r27.....its a (small) waste of CPU cycles.
  2011-08-29  3:01 [Bug c/50223] New: AVRGCC - dont clear r26 and r27.....its a (small) waste of CPU cycles NickParker at Eaton dot com
  2011-08-31 20:48 ` [Bug target/50223] " gjl at gcc dot gnu.org
@ 2011-09-17 19:48 ` gjl at gcc dot gnu.org
  1 sibling, 0 replies; 3+ messages in thread
From: gjl at gcc dot gnu.org @ 2011-09-17 19:48 UTC (permalink / raw)
  To: gcc-bugs

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=50223

Georg-Johann Lay <gjl at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
             Status|UNCONFIRMED                 |RESOLVED
      Known to work|                            |4.6.1
         Resolution|                            |WORKSFORME

--- Comment #2 from Georg-Johann Lay <gjl at gcc dot gnu.org> 2011-09-17 18:35:52 UTC ---
Following test program

extern uint8_t leadingZeros[];

#define pgm_read(a,b) asm ("lpm %0,%a1" : "=r" (a) : "z" ((uint16_t)(b)))

uint8_t ldgZeroCntNormU32 (uint32_t* x)
{
  uint8_t zCount = 0;
  uint8_t shft;
  uint32_t quad = *x;

  while (!(uint8_t)(quad >> 24))
  {
    zCount += 8;
    quad <<=8;
  }
  pgm_read (shft, &leadingZeros[(uint8_t)(quad >> 24)]);
  *x = quad << shft;
  return zCount + shft;
}

yields with avr-gcc-4.6.1 -Os -mmcu=atmega8 -S following result:

ldgZeroCntNormU32:
    movw r26,r24
    ld r20,X+
    ld r21,X+
    ld r22,X+
    ld r23,X
    sbiw r26,3
    ldi r24,lo8(0)
    rjmp .L2
.L3:
    subi r24,lo8(-(8))
    mov r23,r22
    mov r22,r21
    mov r21,r20
    clr r20
.L2:
    mov r30,r23
    tst r23
    breq .L3
    ldi r31,lo8(0)
    subi r30,lo8(-(leadingZeros))
    sbci r31,hi8(-(leadingZeros))
/* #APP */
 ;  20 "foo.c" 1
    lpm r30,Z
 ;  0 "" 2
/* #NOAPP */
    mov r0,r30
    rjmp 2f
1:    lsl r20
    rol r21
    rol r22
    rol r23
2:    dec r0
    brpl 1b
    st X+,r20
    st X+,r21
    st X+,r22
    st X,r23
    sbiw r26,3
    add r24,r30
    ret

So there is no CLR for the while (!(uint8_t)(quad >> 24))

Closed as works for me.


^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2011-09-17 18:36 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-08-29  3:01 [Bug c/50223] New: AVRGCC - dont clear r26 and r27.....its a (small) waste of CPU cycles NickParker at Eaton dot com
2011-08-31 20:48 ` [Bug target/50223] " gjl at gcc dot gnu.org
2011-09-17 19:48 ` gjl at gcc dot gnu.org

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).