From: Georg-Johann Lay <avr@gjlay.de>
To: Denis Chertykov <chertykov@gmail.com>
Cc: gcc-patches@gcc.gnu.org,
Eric Weddington <eric.weddington@atmel.com>,
Anatoly Sokolov <aesok@post.ru>
Subject: Re: [Path,AVR]: Improve loading of 32-bit constants
Date: Wed, 06 Jul 2011 16:08:00 -0000 [thread overview]
Message-ID: <4E14859A.9010407@gjlay.de> (raw)
In-Reply-To: <CADOs=za9td8ge_3K1m23oRP_AHEXGgGGy1exK5RyGP5ZQizoaQ@mail.gmail.com>
[-- Attachment #1: Type: text/plain, Size: 1253 bytes --]
Denis Chertykov wrote:
> 2011/7/6 Georg-Johann Lay <avr@gjlay.de>:
>> For loading a 32-bit constant in a register, there is room for
>> improvement:
>>
>> * SF can be handled the same way as SI and therefore the patch
>> adds a peep2 to produce a *reload_insf analogon to *reload_insi.
>>
>> * If the destination register overlaps NO_LD_REGS, values already
>> loaded into some other byte can be reused by a simple MOV.
>> This is helpful then moving values like, e.g. -2, -100 etc. because
>> all high bytes are 0xff.
>>
>> * 0.0f can be directly moved to memory.
>>
>> * The mov insns contain "!d" constraint. I see no reason to make "d"
>> expensive and discourage use of d-regs. A "*d" to hide is better
>> because it does it neither puts additional pressure on "d" nor
>> discourages "d".
>>
>
> I would like to have a real code examples.
>
> Denis.
Hi Denis.
Attached you find a small C file and the asm that is generated by new
and old versions (-Os -mmcu=atmega88 -S -dp).
I took away some regs as potential clobbers (or -fno-peephole2) to
show the effect of high register pressure. Bit even if a clobber was
available you can see that the new version is smarter in reusing
values, e.g. note the loading of -1L to r22-r25.
Johann
[-- Attachment #2: oint.c --]
[-- Type: text/x-csrc, Size: 333 bytes --]
register int _x asm ("26");
register int _y asm ("28");
register int _z asm ("30");
void ibar (long, long, long, long);
void fbar (long, long, float, float);
void foo1 (long x)
{
ibar (-1, x, -2, 0xff008000);
}
void foo2 (long x)
{
ibar (x, x, 65537L, 0xffff0408);
}
void foo3 (long x)
{
fbar (x, x, -3.0f, 2.0f);
}
[-- Attachment #3: oint-old.s --]
[-- Type: text/plain, Size: 4279 bytes --]
.file "oint.c"
__SREG__ = 0x3f
__SP_H__ = 0x3e
__SP_L__ = 0x3d
__tmp_reg__ = 0
__zero_reg__ = 1
.global __do_copy_data
.global __do_clear_bss
.text
.global foo1
.type foo1, @function
foo1:
push r10 ; 16 *pushqi/1 [length = 1]
push r11 ; 17 *pushqi/1 [length = 1]
push r12 ; 18 *pushqi/1 [length = 1]
push r13 ; 19 *pushqi/1 [length = 1]
push r14 ; 20 *pushqi/1 [length = 1]
push r15 ; 21 *pushqi/1 [length = 1]
push r16 ; 22 *pushqi/1 [length = 1]
push r17 ; 23 *pushqi/1 [length = 1]
/* prologue: function */
/* frame size = 0 */
/* stack size = 8 */
.L__stack_usage = 8
movw r18,r22 ; 2 *movsi/1 [length = 2]
movw r20,r24
ldi r22,lo8(-1) ; 7 *movsi/5 [length = 4]
ldi r23,hi8(-1)
ldi r24,hlo8(-1)
ldi r25,hhi8(-1)
mov __tmp_reg__,r31 ; 9 *movsi/6 [length = 10]
ldi r31,lo8(-2)
mov r14,r31
ldi r31,hi8(-2)
mov r15,r31
ldi r31,hlo8(-2)
mov r16,r31
ldi r31,hhi8(-2)
mov r17,r31
mov r31,__tmp_reg__
mov __tmp_reg__,r31 ; 10 *movsi/6 [length = 10]
ldi r31,lo8(-16744448)
mov r10,r31
ldi r31,hi8(-16744448)
mov r11,r31
ldi r31,hlo8(-16744448)
mov r12,r31
ldi r31,hhi8(-16744448)
mov r13,r31
mov r31,__tmp_reg__
rcall ibar ; 11 call_insn/3 [length = 1]
/* epilogue start */
pop r17 ; 26 popqi [length = 1]
pop r16 ; 27 popqi [length = 1]
pop r15 ; 28 popqi [length = 1]
pop r14 ; 29 popqi [length = 1]
pop r13 ; 30 popqi [length = 1]
pop r12 ; 31 popqi [length = 1]
pop r11 ; 32 popqi [length = 1]
pop r10 ; 33 popqi [length = 1]
ret ; 34 return_from_epilogue [length = 1]
.size foo1, .-foo1
.global foo2
.type foo2, @function
foo2:
push r10 ; 16 *pushqi/1 [length = 1]
push r11 ; 17 *pushqi/1 [length = 1]
push r12 ; 18 *pushqi/1 [length = 1]
push r13 ; 19 *pushqi/1 [length = 1]
push r14 ; 20 *pushqi/1 [length = 1]
push r15 ; 21 *pushqi/1 [length = 1]
push r16 ; 22 *pushqi/1 [length = 1]
push r17 ; 23 *pushqi/1 [length = 1]
/* prologue: function */
/* frame size = 0 */
/* stack size = 8 */
.L__stack_usage = 8
movw r18,r22 ; 2 *movsi/1 [length = 2]
movw r20,r24
mov __tmp_reg__,r31 ; 9 *movsi/6 [length = 10]
ldi r31,lo8(65537)
mov r14,r31
ldi r31,hi8(65537)
mov r15,r31
ldi r31,hlo8(65537)
mov r16,r31
ldi r31,hhi8(65537)
mov r17,r31
mov r31,__tmp_reg__
mov __tmp_reg__,r31 ; 10 *movsi/6 [length = 10]
ldi r31,lo8(-64504)
mov r10,r31
ldi r31,hi8(-64504)
mov r11,r31
ldi r31,hlo8(-64504)
mov r12,r31
ldi r31,hhi8(-64504)
mov r13,r31
mov r31,__tmp_reg__
rcall ibar ; 11 call_insn/3 [length = 1]
/* epilogue start */
pop r17 ; 26 popqi [length = 1]
pop r16 ; 27 popqi [length = 1]
pop r15 ; 28 popqi [length = 1]
pop r14 ; 29 popqi [length = 1]
pop r13 ; 30 popqi [length = 1]
pop r12 ; 31 popqi [length = 1]
pop r11 ; 32 popqi [length = 1]
pop r10 ; 33 popqi [length = 1]
ret ; 34 return_from_epilogue [length = 1]
.size foo2, .-foo2
.global foo3
.type foo3, @function
foo3:
push r10 ; 16 *pushqi/1 [length = 1]
push r11 ; 17 *pushqi/1 [length = 1]
push r12 ; 18 *pushqi/1 [length = 1]
push r13 ; 19 *pushqi/1 [length = 1]
push r14 ; 20 *pushqi/1 [length = 1]
push r15 ; 21 *pushqi/1 [length = 1]
push r16 ; 22 *pushqi/1 [length = 1]
push r17 ; 23 *pushqi/1 [length = 1]
/* prologue: function */
/* frame size = 0 */
/* stack size = 8 */
.L__stack_usage = 8
movw r18,r22 ; 2 *movsi/1 [length = 2]
movw r20,r24
mov __tmp_reg__,r31 ; 9 *movsf/6 [length = 10]
ldi r31,lo8(0xc0400000)
mov r14,r31
ldi r31,hi8(0xc0400000)
mov r15,r31
ldi r31,hlo8(0xc0400000)
mov r16,r31
ldi r31,hhi8(0xc0400000)
mov r17,r31
mov r31,__tmp_reg__
mov __tmp_reg__,r31 ; 10 *movsf/6 [length = 10]
ldi r31,lo8(0x40000000)
mov r10,r31
ldi r31,hi8(0x40000000)
mov r11,r31
ldi r31,hlo8(0x40000000)
mov r12,r31
ldi r31,hhi8(0x40000000)
mov r13,r31
mov r31,__tmp_reg__
rcall fbar ; 11 call_insn/3 [length = 1]
/* epilogue start */
pop r17 ; 26 popqi [length = 1]
pop r16 ; 27 popqi [length = 1]
pop r15 ; 28 popqi [length = 1]
pop r14 ; 29 popqi [length = 1]
pop r13 ; 30 popqi [length = 1]
pop r12 ; 31 popqi [length = 1]
pop r11 ; 32 popqi [length = 1]
pop r10 ; 33 popqi [length = 1]
ret ; 34 return_from_epilogue [length = 1]
.size foo3, .-foo3
[-- Attachment #4: oint-new.s --]
[-- Type: text/plain, Size: 3529 bytes --]
.file "oint.c"
__SREG__ = 0x3f
__SP_H__ = 0x3e
__SP_L__ = 0x3d
__tmp_reg__ = 0
__zero_reg__ = 1
.text
.global foo1
.type foo1, @function
foo1:
push r10 ; 16 *pushqi/1 [length = 1]
push r11 ; 17 *pushqi/1 [length = 1]
push r12 ; 18 *pushqi/1 [length = 1]
push r13 ; 19 *pushqi/1 [length = 1]
push r14 ; 20 *pushqi/1 [length = 1]
push r15 ; 21 *pushqi/1 [length = 1]
push r16 ; 22 *pushqi/1 [length = 1]
push r17 ; 23 *pushqi/1 [length = 1]
/* prologue: function */
/* frame size = 0 */
/* stack size = 8 */
.L__stack_usage = 8
movw r18,r22 ; 2 *movsi/1 [length = 2]
movw r20,r24
ldi r22,lo8(-1) ; 7 *movsi/5 [length = 3]
ldi r23,lo8(-1)
movw r24,r22
ldi r17,lo8(-2) ; 9 *movsi/6 [length = 6]
mov r14,r17
clr r15
dec r15
ldi r16,lo8(-1)
ldi r17,lo8(-1)
clr r10 ; 10 *movsi/6 [length = 7]
set
clr r11
bld r11,7
clr r12
clr r13
dec r13
rcall ibar ; 11 *call_insn/2 [length = 1]
/* epilogue start */
pop r17 ; 26 popqi [length = 1]
pop r16 ; 27 popqi [length = 1]
pop r15 ; 28 popqi [length = 1]
pop r14 ; 29 popqi [length = 1]
pop r13 ; 30 popqi [length = 1]
pop r12 ; 31 popqi [length = 1]
pop r11 ; 32 popqi [length = 1]
pop r10 ; 33 popqi [length = 1]
ret ; 34 return_from_epilogue [length = 1]
.size foo1, .-foo1
.global foo2
.type foo2, @function
foo2:
push r10 ; 16 *pushqi/1 [length = 1]
push r11 ; 17 *pushqi/1 [length = 1]
push r12 ; 18 *pushqi/1 [length = 1]
push r13 ; 19 *pushqi/1 [length = 1]
push r14 ; 20 *pushqi/1 [length = 1]
push r15 ; 21 *pushqi/1 [length = 1]
push r16 ; 22 *pushqi/1 [length = 1]
push r17 ; 23 *pushqi/1 [length = 1]
/* prologue: function */
/* frame size = 0 */
/* stack size = 8 */
.L__stack_usage = 8
movw r18,r22 ; 2 *movsi/1 [length = 2]
movw r20,r24
ldi r17,lo8(1) ; 9 *movsi/6 [length = 4]
mov r14,r17
clr r15
movw r16,r14
set ; 10 *movsi/6 [length = 8]
clr r10
bld r10,3
clr r11
bld r11,2
clr r12
dec r12
mov r13,r12
rcall ibar ; 11 *call_insn/2 [length = 1]
/* epilogue start */
pop r17 ; 26 popqi [length = 1]
pop r16 ; 27 popqi [length = 1]
pop r15 ; 28 popqi [length = 1]
pop r14 ; 29 popqi [length = 1]
pop r13 ; 30 popqi [length = 1]
pop r12 ; 31 popqi [length = 1]
pop r11 ; 32 popqi [length = 1]
pop r10 ; 33 popqi [length = 1]
ret ; 34 return_from_epilogue [length = 1]
.size foo2, .-foo2
.global foo3
.type foo3, @function
foo3:
push r10 ; 16 *pushqi/1 [length = 1]
push r11 ; 17 *pushqi/1 [length = 1]
push r12 ; 18 *pushqi/1 [length = 1]
push r13 ; 19 *pushqi/1 [length = 1]
push r14 ; 20 *pushqi/1 [length = 1]
push r15 ; 21 *pushqi/1 [length = 1]
push r16 ; 22 *pushqi/1 [length = 1]
push r17 ; 23 *pushqi/1 [length = 1]
/* prologue: function */
/* frame size = 0 */
/* stack size = 8 */
.L__stack_usage = 8
movw r18,r22 ; 2 *movsi/1 [length = 2]
movw r20,r24
clr r14 ; 9 *movsf/6 [length = 4]
clr r15
ldi r16,lo8(64)
ldi r17,lo8(-64)
clr r10 ; 10 *movsf/6 [length = 6]
clr r11
clr r12
set
clr r13
bld r13,6
rcall fbar ; 11 *call_insn/2 [length = 1]
/* epilogue start */
pop r17 ; 26 popqi [length = 1]
pop r16 ; 27 popqi [length = 1]
pop r15 ; 28 popqi [length = 1]
pop r14 ; 29 popqi [length = 1]
pop r13 ; 30 popqi [length = 1]
pop r12 ; 31 popqi [length = 1]
pop r11 ; 32 popqi [length = 1]
pop r10 ; 33 popqi [length = 1]
ret ; 34 return_from_epilogue [length = 1]
.size foo3, .-foo3
.ident "GCC: (GNU) 4.7.0 20110704 (experimental)"
next prev parent reply other threads:[~2011-07-06 16:00 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-07-06 11:54 Georg-Johann Lay
2011-07-06 13:57 ` Denis Chertykov
2011-07-06 16:08 ` Georg-Johann Lay [this message]
2011-07-06 17:27 ` Denis Chertykov
2011-07-06 18:06 ` Georg-Johann Lay
2011-07-06 18:40 ` Georg-Johann Lay
2011-07-07 7:38 ` Denis Chertykov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4E14859A.9010407@gjlay.de \
--to=avr@gjlay.de \
--cc=aesok@post.ru \
--cc=chertykov@gmail.com \
--cc=eric.weddington@atmel.com \
--cc=gcc-patches@gcc.gnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).