public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: Georg-Johann Lay <avr@gjlay.de>
To: Denis Chertykov <chertykov@gmail.com>
Cc: gcc-patches@gcc.gnu.org,
	Eric Weddington <eric.weddington@atmel.com>,
	 Anatoly Sokolov <aesok@post.ru>
Subject: Re: [Path,AVR]: Improve loading of 32-bit constants
Date: Wed, 06 Jul 2011 16:08:00 -0000	[thread overview]
Message-ID: <4E14859A.9010407@gjlay.de> (raw)
In-Reply-To: <CADOs=za9td8ge_3K1m23oRP_AHEXGgGGy1exK5RyGP5ZQizoaQ@mail.gmail.com>

[-- Attachment #1: Type: text/plain, Size: 1253 bytes --]

Denis Chertykov wrote:
> 2011/7/6 Georg-Johann Lay <avr@gjlay.de>:
>> For loading a 32-bit constant in a register, there is room for
>> improvement:
>>
>> * SF can be handled the same way as SI and therefore the patch
>>  adds a peep2 to produce a *reload_insf analogon to *reload_insi.
>>
>> * If the destination register overlaps NO_LD_REGS, values already
>>  loaded into some other byte can be reused by a simple MOV.
>>  This is helpful then moving values like, e.g. -2, -100 etc. because
>>  all high bytes are 0xff.
>>
>> * 0.0f can be directly moved to memory.
>>
>> * The mov insns contain "!d" constraint. I see no reason to make "d"
>>  expensive and discourage use of d-regs.  A "*d" to hide is better
>>  because it does it neither puts additional pressure on "d" nor
>>  discourages "d".
>>
> 
> I would like to have a real code examples.
> 
> Denis.

Hi Denis.

Attached you find a small C file and the asm that is generated by new
and old versions (-Os -mmcu=atmega88 -S -dp).

I took away some regs as potential clobbers (or -fno-peephole2) to
show the effect of high register pressure.  Bit even if a clobber was
available you can see that the new version is smarter in reusing
values, e.g. note the loading of -1L to r22-r25.

Johann

[-- Attachment #2: oint.c --]
[-- Type: text/x-csrc, Size: 333 bytes --]

register int _x asm ("26");
register int _y asm ("28");
register int _z asm ("30");

void ibar (long, long, long, long);
void fbar (long, long, float, float);

void foo1 (long x)
{
    ibar (-1, x, -2, 0xff008000);
}

void foo2 (long x)
{
    ibar (x, x, 65537L, 0xffff0408);
}

void foo3 (long x)
{
    fbar (x, x, -3.0f, 2.0f);
}


[-- Attachment #3: oint-old.s --]
[-- Type: text/plain, Size: 4279 bytes --]

	.file	"oint.c"
__SREG__ = 0x3f
__SP_H__ = 0x3e
__SP_L__ = 0x3d
__tmp_reg__ = 0
__zero_reg__ = 1
	.global __do_copy_data
	.global __do_clear_bss
	.text
.global	foo1
	.type	foo1, @function
foo1:
	push r10	 ;  16	*pushqi/1	[length = 1]
	push r11	 ;  17	*pushqi/1	[length = 1]
	push r12	 ;  18	*pushqi/1	[length = 1]
	push r13	 ;  19	*pushqi/1	[length = 1]
	push r14	 ;  20	*pushqi/1	[length = 1]
	push r15	 ;  21	*pushqi/1	[length = 1]
	push r16	 ;  22	*pushqi/1	[length = 1]
	push r17	 ;  23	*pushqi/1	[length = 1]
/* prologue: function */
/* frame size = 0 */
/* stack size = 8 */
.L__stack_usage = 8
	movw r18,r22	 ;  2	*movsi/1	[length = 2]
	movw r20,r24
	ldi r22,lo8(-1)	 ;  7	*movsi/5	[length = 4]
	ldi r23,hi8(-1)
	ldi r24,hlo8(-1)
	ldi r25,hhi8(-1)
	mov __tmp_reg__,r31	 ;  9	*movsi/6	[length = 10]
	ldi r31,lo8(-2)
	mov r14,r31
	ldi r31,hi8(-2)
	mov r15,r31
	ldi r31,hlo8(-2)
	mov r16,r31
	ldi r31,hhi8(-2)
	mov r17,r31
	mov r31,__tmp_reg__
	mov __tmp_reg__,r31	 ;  10	*movsi/6	[length = 10]
	ldi r31,lo8(-16744448)
	mov r10,r31
	ldi r31,hi8(-16744448)
	mov r11,r31
	ldi r31,hlo8(-16744448)
	mov r12,r31
	ldi r31,hhi8(-16744448)
	mov r13,r31
	mov r31,__tmp_reg__
	rcall ibar	 ;  11	call_insn/3	[length = 1]
/* epilogue start */
	pop r17	 ;  26	popqi	[length = 1]
	pop r16	 ;  27	popqi	[length = 1]
	pop r15	 ;  28	popqi	[length = 1]
	pop r14	 ;  29	popqi	[length = 1]
	pop r13	 ;  30	popqi	[length = 1]
	pop r12	 ;  31	popqi	[length = 1]
	pop r11	 ;  32	popqi	[length = 1]
	pop r10	 ;  33	popqi	[length = 1]
	ret	 ;  34	return_from_epilogue	[length = 1]
	.size	foo1, .-foo1
.global	foo2
	.type	foo2, @function
foo2:
	push r10	 ;  16	*pushqi/1	[length = 1]
	push r11	 ;  17	*pushqi/1	[length = 1]
	push r12	 ;  18	*pushqi/1	[length = 1]
	push r13	 ;  19	*pushqi/1	[length = 1]
	push r14	 ;  20	*pushqi/1	[length = 1]
	push r15	 ;  21	*pushqi/1	[length = 1]
	push r16	 ;  22	*pushqi/1	[length = 1]
	push r17	 ;  23	*pushqi/1	[length = 1]
/* prologue: function */
/* frame size = 0 */
/* stack size = 8 */
.L__stack_usage = 8
	movw r18,r22	 ;  2	*movsi/1	[length = 2]
	movw r20,r24
	mov __tmp_reg__,r31	 ;  9	*movsi/6	[length = 10]
	ldi r31,lo8(65537)
	mov r14,r31
	ldi r31,hi8(65537)
	mov r15,r31
	ldi r31,hlo8(65537)
	mov r16,r31
	ldi r31,hhi8(65537)
	mov r17,r31
	mov r31,__tmp_reg__
	mov __tmp_reg__,r31	 ;  10	*movsi/6	[length = 10]
	ldi r31,lo8(-64504)
	mov r10,r31
	ldi r31,hi8(-64504)
	mov r11,r31
	ldi r31,hlo8(-64504)
	mov r12,r31
	ldi r31,hhi8(-64504)
	mov r13,r31
	mov r31,__tmp_reg__
	rcall ibar	 ;  11	call_insn/3	[length = 1]
/* epilogue start */
	pop r17	 ;  26	popqi	[length = 1]
	pop r16	 ;  27	popqi	[length = 1]
	pop r15	 ;  28	popqi	[length = 1]
	pop r14	 ;  29	popqi	[length = 1]
	pop r13	 ;  30	popqi	[length = 1]
	pop r12	 ;  31	popqi	[length = 1]
	pop r11	 ;  32	popqi	[length = 1]
	pop r10	 ;  33	popqi	[length = 1]
	ret	 ;  34	return_from_epilogue	[length = 1]
	.size	foo2, .-foo2
.global	foo3
	.type	foo3, @function
foo3:
	push r10	 ;  16	*pushqi/1	[length = 1]
	push r11	 ;  17	*pushqi/1	[length = 1]
	push r12	 ;  18	*pushqi/1	[length = 1]
	push r13	 ;  19	*pushqi/1	[length = 1]
	push r14	 ;  20	*pushqi/1	[length = 1]
	push r15	 ;  21	*pushqi/1	[length = 1]
	push r16	 ;  22	*pushqi/1	[length = 1]
	push r17	 ;  23	*pushqi/1	[length = 1]
/* prologue: function */
/* frame size = 0 */
/* stack size = 8 */
.L__stack_usage = 8
	movw r18,r22	 ;  2	*movsi/1	[length = 2]
	movw r20,r24
	mov __tmp_reg__,r31	 ;  9	*movsf/6	[length = 10]
	ldi r31,lo8(0xc0400000)
	mov r14,r31
	ldi r31,hi8(0xc0400000)
	mov r15,r31
	ldi r31,hlo8(0xc0400000)
	mov r16,r31
	ldi r31,hhi8(0xc0400000)
	mov r17,r31
	mov r31,__tmp_reg__
	mov __tmp_reg__,r31	 ;  10	*movsf/6	[length = 10]
	ldi r31,lo8(0x40000000)
	mov r10,r31
	ldi r31,hi8(0x40000000)
	mov r11,r31
	ldi r31,hlo8(0x40000000)
	mov r12,r31
	ldi r31,hhi8(0x40000000)
	mov r13,r31
	mov r31,__tmp_reg__
	rcall fbar	 ;  11	call_insn/3	[length = 1]
/* epilogue start */
	pop r17	 ;  26	popqi	[length = 1]
	pop r16	 ;  27	popqi	[length = 1]
	pop r15	 ;  28	popqi	[length = 1]
	pop r14	 ;  29	popqi	[length = 1]
	pop r13	 ;  30	popqi	[length = 1]
	pop r12	 ;  31	popqi	[length = 1]
	pop r11	 ;  32	popqi	[length = 1]
	pop r10	 ;  33	popqi	[length = 1]
	ret	 ;  34	return_from_epilogue	[length = 1]
	.size	foo3, .-foo3

[-- Attachment #4: oint-new.s --]
[-- Type: text/plain, Size: 3529 bytes --]

	.file	"oint.c"
__SREG__ = 0x3f
__SP_H__ = 0x3e
__SP_L__ = 0x3d
__tmp_reg__ = 0
__zero_reg__ = 1
	.text
.global	foo1
	.type	foo1, @function
foo1:
	push r10	 ;  16	*pushqi/1	[length = 1]
	push r11	 ;  17	*pushqi/1	[length = 1]
	push r12	 ;  18	*pushqi/1	[length = 1]
	push r13	 ;  19	*pushqi/1	[length = 1]
	push r14	 ;  20	*pushqi/1	[length = 1]
	push r15	 ;  21	*pushqi/1	[length = 1]
	push r16	 ;  22	*pushqi/1	[length = 1]
	push r17	 ;  23	*pushqi/1	[length = 1]
/* prologue: function */
/* frame size = 0 */
/* stack size = 8 */
.L__stack_usage = 8
	movw r18,r22	 ;  2	*movsi/1	[length = 2]
	movw r20,r24
	ldi r22,lo8(-1)	 ;  7	*movsi/5	[length = 3]
	ldi r23,lo8(-1)
	movw r24,r22
	ldi r17,lo8(-2)	 ;  9	*movsi/6	[length = 6]
	mov r14,r17
	clr r15
	dec r15
	ldi r16,lo8(-1)
	ldi r17,lo8(-1)
	clr r10	 ;  10	*movsi/6	[length = 7]
	set
	clr r11
	bld r11,7
	clr r12
	clr r13
	dec r13
	rcall ibar	 ;  11	*call_insn/2	[length = 1]
/* epilogue start */
	pop r17	 ;  26	popqi	[length = 1]
	pop r16	 ;  27	popqi	[length = 1]
	pop r15	 ;  28	popqi	[length = 1]
	pop r14	 ;  29	popqi	[length = 1]
	pop r13	 ;  30	popqi	[length = 1]
	pop r12	 ;  31	popqi	[length = 1]
	pop r11	 ;  32	popqi	[length = 1]
	pop r10	 ;  33	popqi	[length = 1]
	ret	 ;  34	return_from_epilogue	[length = 1]
	.size	foo1, .-foo1
.global	foo2
	.type	foo2, @function
foo2:
	push r10	 ;  16	*pushqi/1	[length = 1]
	push r11	 ;  17	*pushqi/1	[length = 1]
	push r12	 ;  18	*pushqi/1	[length = 1]
	push r13	 ;  19	*pushqi/1	[length = 1]
	push r14	 ;  20	*pushqi/1	[length = 1]
	push r15	 ;  21	*pushqi/1	[length = 1]
	push r16	 ;  22	*pushqi/1	[length = 1]
	push r17	 ;  23	*pushqi/1	[length = 1]
/* prologue: function */
/* frame size = 0 */
/* stack size = 8 */
.L__stack_usage = 8
	movw r18,r22	 ;  2	*movsi/1	[length = 2]
	movw r20,r24
	ldi r17,lo8(1)	 ;  9	*movsi/6	[length = 4]
	mov r14,r17
	clr r15
	movw r16,r14
	set	 ;  10	*movsi/6	[length = 8]
	clr r10
	bld r10,3
	clr r11
	bld r11,2
	clr r12
	dec r12
	mov r13,r12
	rcall ibar	 ;  11	*call_insn/2	[length = 1]
/* epilogue start */
	pop r17	 ;  26	popqi	[length = 1]
	pop r16	 ;  27	popqi	[length = 1]
	pop r15	 ;  28	popqi	[length = 1]
	pop r14	 ;  29	popqi	[length = 1]
	pop r13	 ;  30	popqi	[length = 1]
	pop r12	 ;  31	popqi	[length = 1]
	pop r11	 ;  32	popqi	[length = 1]
	pop r10	 ;  33	popqi	[length = 1]
	ret	 ;  34	return_from_epilogue	[length = 1]
	.size	foo2, .-foo2
.global	foo3
	.type	foo3, @function
foo3:
	push r10	 ;  16	*pushqi/1	[length = 1]
	push r11	 ;  17	*pushqi/1	[length = 1]
	push r12	 ;  18	*pushqi/1	[length = 1]
	push r13	 ;  19	*pushqi/1	[length = 1]
	push r14	 ;  20	*pushqi/1	[length = 1]
	push r15	 ;  21	*pushqi/1	[length = 1]
	push r16	 ;  22	*pushqi/1	[length = 1]
	push r17	 ;  23	*pushqi/1	[length = 1]
/* prologue: function */
/* frame size = 0 */
/* stack size = 8 */
.L__stack_usage = 8
	movw r18,r22	 ;  2	*movsi/1	[length = 2]
	movw r20,r24
	clr r14	 ;  9	*movsf/6	[length = 4]
	clr r15
	ldi r16,lo8(64)
	ldi r17,lo8(-64)
	clr r10	 ;  10	*movsf/6	[length = 6]
	clr r11
	clr r12
	set
	clr r13
	bld r13,6
	rcall fbar	 ;  11	*call_insn/2	[length = 1]
/* epilogue start */
	pop r17	 ;  26	popqi	[length = 1]
	pop r16	 ;  27	popqi	[length = 1]
	pop r15	 ;  28	popqi	[length = 1]
	pop r14	 ;  29	popqi	[length = 1]
	pop r13	 ;  30	popqi	[length = 1]
	pop r12	 ;  31	popqi	[length = 1]
	pop r11	 ;  32	popqi	[length = 1]
	pop r10	 ;  33	popqi	[length = 1]
	ret	 ;  34	return_from_epilogue	[length = 1]
	.size	foo3, .-foo3
	.ident	"GCC: (GNU) 4.7.0 20110704 (experimental)"

  reply	other threads:[~2011-07-06 16:00 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-07-06 11:54 Georg-Johann Lay
2011-07-06 13:57 ` Denis Chertykov
2011-07-06 16:08   ` Georg-Johann Lay [this message]
2011-07-06 17:27     ` Denis Chertykov
2011-07-06 18:06       ` Georg-Johann Lay
2011-07-06 18:40       ` Georg-Johann Lay
2011-07-07  7:38         ` Denis Chertykov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4E14859A.9010407@gjlay.de \
    --to=avr@gjlay.de \
    --cc=aesok@post.ru \
    --cc=chertykov@gmail.com \
    --cc=eric.weddington@atmel.com \
    --cc=gcc-patches@gcc.gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).