public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r13-1740] xtensa: Correct the relative RTX cost that corresponds to the Move Immediate "MOVI" instruction
@ 2022-07-19  3:17 Max Filippov
  0 siblings, 0 replies; only message in thread
From: Max Filippov @ 2022-07-19  3:17 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:2180cdd8a0e65c2790a7732c82de87f83478487b

commit r13-1740-g2180cdd8a0e65c2790a7732c82de87f83478487b
Author: Takayuki 'January June' Suwa <jjsuwa_sys3175@yahoo.co.jp>
Date:   Mon Jul 18 21:43:45 2022 +0900

    xtensa: Correct the relative RTX cost that corresponds to the Move Immediate "MOVI" instruction
    
    This patch corrects the overestimation of the relative cost of
    '(set (reg) (const_int N))' where N fits into the instruction itself.
    
    In fact, such overestimation confuses the RTL loop invariant motion pass.
    As a result, it brings almost no negative impact from the speed point of
    view, but addtiional reg-reg move instructions and register allocation
    pressure about the size.
    
        /* example, optimized for size */
        extern int foo(void);
        extern int array[16];
        void test_0(void) {
          unsigned int i;
          for (i = 0; i < sizeof(array)/sizeof(*array); ++i)
            array[i] = 1024;
        }
        void test_1(void) {
          unsigned int i;
          for (i = 0; i < sizeof(array)/sizeof(*array); ++i)
            array[i] = array[i] ? 1024 : 0;
        }
        void test_2(void) {
          unsigned int i;
          for (i = 0; i < sizeof(array)/sizeof(*array); ++i)
            array[i] = foo() ? 0 : 1024;
        }
    
        ;; before
            .literal_position
            .literal .LC0, array
        test_0:
            l32r    a3, .LC0
            movi.n  a2, 0
            movi    a4, 0x400       // OK
        .L2:
            s32i.n  a4, a3, 0
            addi.n  a2, a2, 1
            addi.n  a3, a3, 4
            bnei    a2, 16, .L2
            ret.n
            .literal_position
            .literal .LC1, array
        test_1:
            l32r    a2, .LC1
            movi.n  a3, 0
            movi    a5, 0x400       // NG
        .L6:
            l32i.n  a4, a2, 0
            beqz.n  a4, .L5
            mov.n   a4, a5          // should be "movi a4, 0x400"
        .L5:
            s32i.n  a4, a2, 0
            addi.n  a3, a3, 1
            addi.n  a2, a2, 4
            bnei    a3, 16, .L6
            ret.n
            .literal_position
            .literal .LC2, array
        test_2:
            addi    sp, sp, -32
            s32i.n  a12, sp, 24
            l32r    a12, .LC2
            s32i.n  a13, sp, 20
            s32i.n  a14, sp, 16
            s32i.n  a15, sp, 12
            s32i.n  a0, sp, 28
            addi    a13, a12, 64
            movi.n  a15, 0          // NG
            movi    a14, 0x400      // and wastes callee-saved registers (only 4)
        .L11:
            call0   foo
            mov.n   a3, a14         // should be "movi a3, 0x400"
            movnez  a3, a15, a2
            s32i.n  a3, a12, 0
            addi.n  a12, a12, 4
            bne     a12, a13, .L11
            l32i.n  a0, sp, 28
            l32i.n  a12, sp, 24
            l32i.n  a13, sp, 20
            l32i.n  a14, sp, 16
            l32i.n  a15, sp, 12
            addi    sp, sp, 32
            ret.n
    
        ;; after
            .literal_position
            .literal .LC0, array
        test_0:
            l32r    a3, .LC0
            movi.n  a2, 0
            movi    a4, 0x400       // OK
        .L2:
            s32i.n  a4, a3, 0
            addi.n  a2, a2, 1
            addi.n  a3, a3, 4
            bnei    a2, 16, .L2
            ret.n
            .literal_position
            .literal .LC1, array
        test_1:
            l32r    a2, .LC1
            movi.n  a3, 0
        .L6:
            l32i.n  a4, a2, 0
            beqz.n  a4, .L5
            movi    a4, 0x400       // OK
        .L5:
            s32i.n  a4, a2, 0
            addi.n  a3, a3, 1
            addi.n  a2, a2, 4
            bnei    a3, 16, .L6
            ret.n
            .literal_position
            .literal .LC2, array
        test_2:
            addi    sp, sp, -16
            s32i.n  a12, sp, 8
            l32r    a12, .LC2
            s32i.n  a13, sp, 4
            s32i.n  a0, sp, 12
            addi    a13, a12, 64
        .L11:
            call0   foo
            movi.n  a3, 0           // OK
            movi    a4, 0x400       // and less register allocation pressure
            moveqz  a3, a4, a2
            s32i.n  a3, a12, 0
            addi.n  a12, a12, 4
            bne     a12, a13, .L11
            l32i.n  a0, sp, 12
            l32i.n  a12, sp, 8
            l32i.n  a13, sp, 4
            addi    sp, sp, 16
            ret.n
    
    gcc/ChangeLog:
    
            * config/xtensa/xtensa.cc (xtensa_rtx_costs):
            Change the relative cost of '(set (reg) (const_int N))' where
            N fits into signed 12-bit from 4 to 0 if optimizing for size.
            And use the appropriate macro instead of the bare number 4.

Diff:
---
 gcc/config/xtensa/xtensa.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index 94337452ba8..a851a7ae6b3 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -4073,7 +4073,7 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code,
 	case SET:
 	  if (xtensa_simm12b (INTVAL (x)))
 	    {
-	      *total = 4;
+	      *total = speed ? COSTS_N_INSNS (1) : 0;
 	      return true;
 	    }
 	  break;


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2022-07-19  3:17 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-07-19  3:17 [gcc r13-1740] xtensa: Correct the relative RTX cost that corresponds to the Move Immediate "MOVI" instruction Max Filippov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).