* [PATCH] xtensa: Correct the relative RTX cost that corresponds to the Move Immediate "MOVI" instruction
@ 2022-07-18 12:43 Takayuki 'January June' Suwa
2022-07-19 3:19 ` Max Filippov
0 siblings, 1 reply; 2+ messages in thread
From: Takayuki 'January June' Suwa @ 2022-07-18 12:43 UTC (permalink / raw)
To: GCC Patches
This patch corrects the overestimation of the relative cost of
'(set (reg) (const_int N))' where N fits into the instruction itself.
In fact, such overestimation confuses the RTL loop invariant motion pass.
As a result, it brings almost no negative impact from the speed point of
view, but addtiional reg-reg move instructions and register allocation
pressure about the size.
/* example, optimized for size */
extern int foo(void);
extern int array[16];
void test_0(void) {
unsigned int i;
for (i = 0; i < sizeof(array)/sizeof(*array); ++i)
array[i] = 1024;
}
void test_1(void) {
unsigned int i;
for (i = 0; i < sizeof(array)/sizeof(*array); ++i)
array[i] = array[i] ? 1024 : 0;
}
void test_2(void) {
unsigned int i;
for (i = 0; i < sizeof(array)/sizeof(*array); ++i)
array[i] = foo() ? 0 : 1024;
}
;; before
.literal_position
.literal .LC0, array
test_0:
l32r a3, .LC0
movi.n a2, 0
movi a4, 0x400 // OK
.L2:
s32i.n a4, a3, 0
addi.n a2, a2, 1
addi.n a3, a3, 4
bnei a2, 16, .L2
ret.n
.literal_position
.literal .LC1, array
test_1:
l32r a2, .LC1
movi.n a3, 0
movi a5, 0x400 // NG
.L6:
l32i.n a4, a2, 0
beqz.n a4, .L5
mov.n a4, a5 // should be "movi a4, 0x400"
.L5:
s32i.n a4, a2, 0
addi.n a3, a3, 1
addi.n a2, a2, 4
bnei a3, 16, .L6
ret.n
.literal_position
.literal .LC2, array
test_2:
addi sp, sp, -32
s32i.n a12, sp, 24
l32r a12, .LC2
s32i.n a13, sp, 20
s32i.n a14, sp, 16
s32i.n a15, sp, 12
s32i.n a0, sp, 28
addi a13, a12, 64
movi.n a15, 0 // NG
movi a14, 0x400 // and wastes callee-saved registers (only 4)
.L11:
call0 foo
mov.n a3, a14 // should be "movi a3, 0x400"
movnez a3, a15, a2
s32i.n a3, a12, 0
addi.n a12, a12, 4
bne a12, a13, .L11
l32i.n a0, sp, 28
l32i.n a12, sp, 24
l32i.n a13, sp, 20
l32i.n a14, sp, 16
l32i.n a15, sp, 12
addi sp, sp, 32
ret.n
;; after
.literal_position
.literal .LC0, array
test_0:
l32r a3, .LC0
movi.n a2, 0
movi a4, 0x400 // OK
.L2:
s32i.n a4, a3, 0
addi.n a2, a2, 1
addi.n a3, a3, 4
bnei a2, 16, .L2
ret.n
.literal_position
.literal .LC1, array
test_1:
l32r a2, .LC1
movi.n a3, 0
.L6:
l32i.n a4, a2, 0
beqz.n a4, .L5
movi a4, 0x400 // OK
.L5:
s32i.n a4, a2, 0
addi.n a3, a3, 1
addi.n a2, a2, 4
bnei a3, 16, .L6
ret.n
.literal_position
.literal .LC2, array
test_2:
addi sp, sp, -16
s32i.n a12, sp, 8
l32r a12, .LC2
s32i.n a13, sp, 4
s32i.n a0, sp, 12
addi a13, a12, 64
.L11:
call0 foo
movi.n a3, 0 // OK
movi a4, 0x400 // and less register allocation pressure
moveqz a3, a4, a2
s32i.n a3, a12, 0
addi.n a12, a12, 4
bne a12, a13, .L11
l32i.n a0, sp, 12
l32i.n a12, sp, 8
l32i.n a13, sp, 4
addi sp, sp, 16
ret.n
gcc/ChangeLog:
* config/xtensa/xtensa.cc (xtensa_rtx_costs):
Change the relative cost of '(set (reg) (const_int N))' where
N fits into signed 12-bit from 4 to 0 if optimizing for size.
And use the appropriate macro instead of the bare number 4.
---
gcc/config/xtensa/xtensa.cc | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index 94337452ba8..a851a7ae6b3 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -4073,7 +4073,7 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code,
case SET:
if (xtensa_simm12b (INTVAL (x)))
{
- *total = 4;
+ *total = speed ? COSTS_N_INSNS (1) : 0;
return true;
}
break;
--
2.20.1
^ permalink raw reply [flat|nested] 2+ messages in thread
* Re: [PATCH] xtensa: Correct the relative RTX cost that corresponds to the Move Immediate "MOVI" instruction
2022-07-18 12:43 [PATCH] xtensa: Correct the relative RTX cost that corresponds to the Move Immediate "MOVI" instruction Takayuki 'January June' Suwa
@ 2022-07-19 3:19 ` Max Filippov
0 siblings, 0 replies; 2+ messages in thread
From: Max Filippov @ 2022-07-19 3:19 UTC (permalink / raw)
To: Takayuki 'January June' Suwa; +Cc: GCC Patches
On Mon, Jul 18, 2022 at 5:47 AM Takayuki 'January June' Suwa
<jjsuwa_sys3175@yahoo.co.jp> wrote:
>
> This patch corrects the overestimation of the relative cost of
> '(set (reg) (const_int N))' where N fits into the instruction itself.
>
> In fact, such overestimation confuses the RTL loop invariant motion pass.
> As a result, it brings almost no negative impact from the speed point of
> view, but addtiional reg-reg move instructions and register allocation
> pressure about the size.
>
...
> gcc/ChangeLog:
>
> * config/xtensa/xtensa.cc (xtensa_rtx_costs):
> Change the relative cost of '(set (reg) (const_int N))' where
> N fits into signed 12-bit from 4 to 0 if optimizing for size.
> And use the appropriate macro instead of the bare number 4.
> ---
> gcc/config/xtensa/xtensa.cc | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
Regtested for target=xtensa-linux-uclibc, no new regressions.
Committed to master.
--
Thanks.
-- Max
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2022-07-19 3:19 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-07-18 12:43 [PATCH] xtensa: Correct the relative RTX cost that corresponds to the Move Immediate "MOVI" instruction Takayuki 'January June' Suwa
2022-07-19 3:19 ` Max Filippov
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).