[Bug rtl-optimization/110093] New: [12/13/14 Regression][avr] Move frenzy leading to code bloat

public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed

* [Bug rtl-optimization/110093] New: [12/13/14 Regression][avr] Move frenzy leading to code bloat
@ 2023-06-02 13:22 gjl at gcc dot gnu.org
  2023-06-05  6:38 ` [Bug rtl-optimization/110093] " rguenth at gcc dot gnu.org
                   ` (7 more replies)
  0 siblings, 8 replies; 9+ messages in thread
From: gjl at gcc dot gnu.org @ 2023-06-02 13:22 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110093

            Bug ID: 110093
           Summary: [12/13/14 Regression][avr] Move frenzy leading to code
                    bloat
           Product: gcc
           Version: 14.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: rtl-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: gjl at gcc dot gnu.org
  Target Milestone: ---

So here is a C test case:

long add (long aa, long bb, long cc)
{
    if (cc < 0)
        return aa - cc;
    return aa + bb;
}

$ avr-gcc-8 -S -Os -dp
compiles this to following assembly:

add:
        push r14                 ;  30  [c=4 l=1]  pushqi1/0
        push r15                 ;  31  [c=4 l=1]  pushqi1/0
        push r16                 ;  32  [c=4 l=1]  pushqi1/0
        push r17                 ;  33  [c=4 l=1]  pushqi1/0
/* prologue: function */
/* frame size = 0 */
/* stack size = 4 */
        sbrs r17,7       ;  42  [c=28 l=2]  *sbrx_and_branchsi
        rjmp .L2        
        sub r22,r14      ;  11  [c=16 l=4]  subsi3/0
        sbc r23,r15
        sbc r24,r16
        sbc r25,r17
.L1:
/* epilogue start */
        pop r17          ;  36  [c=4 l=1]  popqi
        pop r16          ;  37  [c=4 l=1]  popqi
        pop r15          ;  38  [c=4 l=1]  popqi
        pop r14          ;  39  [c=4 l=1]  popqi
        ret              ;  40  [c=0 l=1]  return_from_epilogue
.L2:
        add r22,r18      ;  16  [c=16 l=4]  addsi3/0
        adc r23,r19
        adc r24,r20
        adc r25,r21
        rjmp .L1                 ;  43  [c=4 l=1]  jump

Notice that the operations on aa in SI:22 can be done in place, no moves
needed.  The superfluous PUSHes and POPs is PR109910, which is yet another
issue...

The code from above then deteriorates with v12, v13, v14 20230501 to a move
bonanza that starts moving stuff for no reason, leading to high register
pressure,  required stack increases from 4 bytes to 14 bytes, code size
increase from 20 instructions to 56:

add:
        push r4          ;  85  [c=4 l=1]  pushqi1/0
        push r5          ;  86  [c=4 l=1]  pushqi1/0
        push r6          ;  87  [c=4 l=1]  pushqi1/0
        push r7          ;  88  [c=4 l=1]  pushqi1/0
        push r8          ;  89  [c=4 l=1]  pushqi1/0
        push r9          ;  90  [c=4 l=1]  pushqi1/0
        push r10                 ;  91  [c=4 l=1]  pushqi1/0
        push r11                 ;  92  [c=4 l=1]  pushqi1/0
        push r12                 ;  93  [c=4 l=1]  pushqi1/0
        push r13                 ;  94  [c=4 l=1]  pushqi1/0
        push r14                 ;  95  [c=4 l=1]  pushqi1/0
        push r15                 ;  96  [c=4 l=1]  pushqi1/0
        push r16                 ;  97  [c=4 l=1]  pushqi1/0
        push r17                 ;  98  [c=4 l=1]  pushqi1/0
/* prologue: function */
/* frame size = 0 */
/* stack size = 14 */
.L__stack_usage = 14
        mov r4,r22       ;  68  [c=4 l=1]  movqi_insn/0
        mov r5,r23       ;  69  [c=4 l=1]  movqi_insn/0
        mov r6,r24       ;  70  [c=4 l=1]  movqi_insn/0
        mov r7,r25       ;  71  [c=4 l=1]  movqi_insn/0
        mov r8,r18       ;  72  [c=4 l=1]  movqi_insn/0
        mov r9,r19       ;  73  [c=4 l=1]  movqi_insn/0
        mov r10,r20      ;  74  [c=4 l=1]  movqi_insn/0
        mov r11,r21      ;  75  [c=4 l=1]  movqi_insn/0
        mov r12,r14      ;  78  [c=4 l=1]  movqi_insn/0
        mov r13,r15      ;  79  [c=4 l=1]  movqi_insn/0
        mov r14,r16      ;  80  [c=4 l=1]  movqi_insn/0
        mov r15,r17      ;  81  [c=4 l=1]  movqi_insn/0
        mov r25,r7       ;  66  [c=4 l=4]  *movsi/0
        mov r24,r6
        mov r23,r5
        mov r22,r4
        sbrs r15,7       ;  117 [c=28 l=2]  *sbrx_and_branchsi
        rjmp .L2        
        sub r22,r12      ;  67  [c=16 l=4]  *subsi3/0
        sbc r23,r13
        sbc r24,r14
        sbc r25,r15
.L1:
/* epilogue start */
        pop r17          ;  101 [c=4 l=1]  popqi
        pop r16          ;  102 [c=4 l=1]  popqi
        pop r15          ;  103 [c=4 l=1]  popqi
        pop r14          ;  104 [c=4 l=1]  popqi
        pop r13          ;  105 [c=4 l=1]  popqi
        pop r12          ;  106 [c=4 l=1]  popqi
        pop r11          ;  107 [c=4 l=1]  popqi
        pop r10          ;  108 [c=4 l=1]  popqi
        pop r9           ;  109 [c=4 l=1]  popqi
        pop r8           ;  110 [c=4 l=1]  popqi
        pop r7           ;  111 [c=4 l=1]  popqi
        pop r6           ;  112 [c=4 l=1]  popqi
        pop r5           ;  113 [c=4 l=1]  popqi
        pop r4           ;  114 [c=4 l=1]  popqi
        ret              ;  115 [c=0 l=1]  return_from_epilogue
.L2:
        add r22,r8       ;  65  [c=16 l=4]  *addsi3/0
        adc r23,r9
        adc r24,r10
        adc r25,r11
        rjmp .L1                 ;  118 [c=4 l=1]  jump

Then finally, with v14 20230602, crazyness increases even more to even requires
a stack frame and a frame pointer.  Register allocator starts to move stuff to
a stack slot and back again.  Code size increases again from 56 instructions to
68, more stack usage:

add:
        push r4          ;  84  [c=4 l=1]  pushqi1/0
        push r5          ;  85  [c=4 l=1]  pushqi1/0
        push r6          ;  86  [c=4 l=1]  pushqi1/0
        push r7          ;  87  [c=4 l=1]  pushqi1/0
        push r8          ;  88  [c=4 l=1]  pushqi1/0
        push r9          ;  89  [c=4 l=1]  pushqi1/0
        push r10                 ;  90  [c=4 l=1]  pushqi1/0
        push r11                 ;  91  [c=4 l=1]  pushqi1/0
        push r14                 ;  92  [c=4 l=1]  pushqi1/0
        push r15                 ;  93  [c=4 l=1]  pushqi1/0
        push r16                 ;  94  [c=4 l=1]  pushqi1/0
        push r17                 ;  95  [c=4 l=1]  pushqi1/0
        push r28                 ;  96  [c=4 l=1]  pushqi1/0
        push r29                 ;  97  [c=4 l=1]  pushqi1/0
         ; SP -= 4       ;  101 [c=4 l=2]  *addhi3_sp
        rcall . 
        rcall . 
        in r28,__SP_L__  ;  127 [c=4 l=2]  *movhi/7
        in r29,__SP_H__
/* prologue: function */
/* frame size = 4 */
/* stack size = 18 */
.L__stack_usage = 18
        mov r8,r22       ;  69  [c=4 l=1]  movqi_insn/0
        mov r9,r23       ;  70  [c=4 l=1]  movqi_insn/0
        mov r10,r24      ;  71  [c=4 l=1]  movqi_insn/0
        mov r11,r25      ;  72  [c=4 l=1]  movqi_insn/0
        std Y+1,r18      ;  73  [c=4 l=1]  movqi_insn/2
        std Y+2,r19      ;  74  [c=4 l=1]  movqi_insn/2
        std Y+3,r20      ;  75  [c=4 l=1]  movqi_insn/2
        std Y+4,r21      ;  76  [c=4 l=1]  movqi_insn/2
        mov r4,r14       ;  77  [c=4 l=1]  movqi_insn/0
        mov r5,r15       ;  78  [c=4 l=1]  movqi_insn/0
        mov r6,r16       ;  79  [c=4 l=1]  movqi_insn/0
        mov r7,r17       ;  80  [c=4 l=1]  movqi_insn/0
        sbrs r7,7        ;  124 [c=28 l=2]  *sbrx_and_branchsi
        rjmp .L2        
        mov r25,r11      ;  67  [c=4 l=4]  *movsi/0
        mov r24,r10
        mov r23,r9
        mov r22,r8
        sub r22,r4       ;  68  [c=16 l=4]  *subsi3/0
        sbc r23,r5
        sbc r24,r6
        sbc r25,r7
.L1:
/* epilogue start */
         ; SP += 4       ;  107 [c=4 l=4]  *addhi3_sp
        pop __tmp_reg__
        pop __tmp_reg__
        pop __tmp_reg__
        pop __tmp_reg__
        pop r29          ;  108 [c=4 l=1]  popqi
        pop r28          ;  109 [c=4 l=1]  popqi
        pop r17          ;  110 [c=4 l=1]  popqi
        pop r16          ;  111 [c=4 l=1]  popqi
        pop r15          ;  112 [c=4 l=1]  popqi
        pop r14          ;  113 [c=4 l=1]  popqi
        pop r11          ;  114 [c=4 l=1]  popqi
        pop r10          ;  115 [c=4 l=1]  popqi
        pop r9           ;  116 [c=4 l=1]  popqi
        pop r8           ;  117 [c=4 l=1]  popqi
        pop r7           ;  118 [c=4 l=1]  popqi
        pop r6           ;  119 [c=4 l=1]  popqi
        pop r5           ;  120 [c=4 l=1]  popqi
        pop r4           ;  121 [c=4 l=1]  popqi
        ret              ;  122 [c=0 l=1]  return_from_epilogue
.L2:
        ldd r22,Y+1      ;  65  [c=16 l=4]  *movsi/2
        ldd r23,Y+2
        ldd r24,Y+3
        ldd r25,Y+4
        add r22,r8       ;  66  [c=16 l=4]  *addsi3/0
        adc r23,r9
        adc r24,r10
        adc r25,r11
        rjmp .L1                 ;  125 [c=4 l=1]  jump

So we have the following results:

Optimal code:                 Size 12 Instr, no Stack
avr-gcc v8:                   Size 20 Instr,  4 Stack
avr-gcc v12, v13, v14 (May)   Size 56 Instr, 14 Stack
avr-gcc v14 (June)            Size 68 Instr, 18 Stack + Frame Pointer


Target: avr
Configured with: --target=avr --disable-nls --with-gnu-as --with-gnu-ld
--disable-shared --enable-languages=c,c++

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [Bug rtl-optimization/110093] [12/13/14 Regression][avr] Move frenzy leading to code bloat
  2023-06-02 13:22 [Bug rtl-optimization/110093] New: [12/13/14 Regression][avr] Move frenzy leading to code bloat gjl at gcc dot gnu.org
@ 2023-06-05  6:38 ` rguenth at gcc dot gnu.org
  2023-06-13 10:18 ` gjl at gcc dot gnu.org
                   ` (6 subsequent siblings)
  7 siblings, 0 replies; 9+ messages in thread
From: rguenth at gcc dot gnu.org @ 2023-06-05  6:38 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110093

Richard Biener <rguenth at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
           Keywords|                            |needs-bisection
   Target Milestone|---                         |12.4

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [Bug rtl-optimization/110093] [12/13/14 Regression][avr] Move frenzy leading to code bloat
  2023-06-02 13:22 [Bug rtl-optimization/110093] New: [12/13/14 Regression][avr] Move frenzy leading to code bloat gjl at gcc dot gnu.org
  2023-06-05  6:38 ` [Bug rtl-optimization/110093] " rguenth at gcc dot gnu.org
@ 2023-06-13 10:18 ` gjl at gcc dot gnu.org
  2023-08-22 14:25 ` gjl at gcc dot gnu.org
                   ` (5 subsequent siblings)
  7 siblings, 0 replies; 9+ messages in thread
From: gjl at gcc dot gnu.org @ 2023-06-13 10:18 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110093

Georg-Johann Lay <gjl at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
                 CC|                            |vmakarov at gcc dot gnu.org

--- Comment #1 from Georg-Johann Lay <gjl at gcc dot gnu.org> ---
CCing Vladimir.  Maybe he has a clue.  It reminds of PR90706 which should be
fixed by now.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [Bug rtl-optimization/110093] [12/13/14 Regression][avr] Move frenzy leading to code bloat
  2023-06-02 13:22 [Bug rtl-optimization/110093] New: [12/13/14 Regression][avr] Move frenzy leading to code bloat gjl at gcc dot gnu.org
  2023-06-05  6:38 ` [Bug rtl-optimization/110093] " rguenth at gcc dot gnu.org
  2023-06-13 10:18 ` gjl at gcc dot gnu.org
@ 2023-08-22 14:25 ` gjl at gcc dot gnu.org
  2023-08-22 14:25 ` gjl at gcc dot gnu.org
                   ` (4 subsequent siblings)
  7 siblings, 0 replies; 9+ messages in thread
From: gjl at gcc dot gnu.org @ 2023-08-22 14:25 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110093

--- Comment #2 from Georg-Johann Lay <gjl at gcc dot gnu.org> ---
Meanwhile (2023-08-22) the generated code from above got worse once again and
even pops a frame:

long add (long aa, long bb, long cc)
{
    if (cc < 0)
        return aa - cc;
    return aa + bb;
}

> avr-gcc -Os -S -dp

add:
        push r4          ;  83  [c=4 l=1]  pushqi1/0
        push r5          ;  84  [c=4 l=1]  pushqi1/0
        push r6          ;  85  [c=4 l=1]  pushqi1/0
        push r7          ;  86  [c=4 l=1]  pushqi1/0
        push r8          ;  87  [c=4 l=1]  pushqi1/0
        push r9          ;  88  [c=4 l=1]  pushqi1/0
        push r10                 ;  89  [c=4 l=1]  pushqi1/0
        push r11                 ;  90  [c=4 l=1]  pushqi1/0
        push r14                 ;  91  [c=4 l=1]  pushqi1/0
        push r15                 ;  92  [c=4 l=1]  pushqi1/0
        push r16                 ;  93  [c=4 l=1]  pushqi1/0
        push r17                 ;  94  [c=4 l=1]  pushqi1/0
        push r28                 ;  95  [c=4 l=1]  pushqi1/0
        push r29                 ;  96  [c=4 l=1]  pushqi1/0
         ; SP -= 4       ;  100 [c=4 l=2]  *addhi3_sp
        rcall . 
        rcall . 
        in r28,__SP_L__  ;  126 [c=4 l=2]  *movhi/7
        in r29,__SP_H__
/* prologue: function */
/* frame size = 4 */
/* stack size = 18 */
.L__stack_usage = 18
        mov r8,r22       ;  69  [c=4 l=1]  movqi_insn/0
        mov r9,r23       ;  70  [c=4 l=1]  movqi_insn/0
        mov r10,r24      ;  71  [c=4 l=1]  movqi_insn/0
        mov r11,r25      ;  72  [c=4 l=1]  movqi_insn/0
        std Y+1,r18      ;  73  [c=4 l=1]  movqi_insn/2
        std Y+2,r19      ;  74  [c=4 l=1]  movqi_insn/2
        std Y+3,r20      ;  75  [c=4 l=1]  movqi_insn/2
        std Y+4,r21      ;  76  [c=4 l=1]  movqi_insn/2
        mov r4,r14       ;  77  [c=4 l=1]  movqi_insn/0
        mov r5,r15       ;  78  [c=4 l=1]  movqi_insn/0
        mov r6,r16       ;  79  [c=4 l=1]  movqi_insn/0
        mov r7,r17       ;  80  [c=4 l=1]  movqi_insn/0
        sbrs r7,7        ;  123 [c=4 l=2]  *sbrx_branchhi
        rjmp .L2        
        mov r25,r11      ;  67  [c=4 l=4]  *movsi/0
        mov r24,r10
        mov r23,r9
        mov r22,r8
        sub r22,r4       ;  68  [c=16 l=4]  *subsi3/0
        sbc r23,r5
        sbc r24,r6
        sbc r25,r7
.L1:
/* epilogue start */
         ; SP += 4       ;  106 [c=4 l=4]  *addhi3_sp
        pop __tmp_reg__
        pop __tmp_reg__
        pop __tmp_reg__
        pop __tmp_reg__
        pop r29          ;  107 [c=4 l=1]  popqi
        pop r28          ;  108 [c=4 l=1]  popqi
        pop r17          ;  109 [c=4 l=1]  popqi
        pop r16          ;  110 [c=4 l=1]  popqi
        pop r15          ;  111 [c=4 l=1]  popqi
        pop r14          ;  112 [c=4 l=1]  popqi
        pop r11          ;  113 [c=4 l=1]  popqi
        pop r10          ;  114 [c=4 l=1]  popqi
        pop r9           ;  115 [c=4 l=1]  popqi
        pop r8           ;  116 [c=4 l=1]  popqi
        pop r7           ;  117 [c=4 l=1]  popqi
        pop r6           ;  118 [c=4 l=1]  popqi
        pop r5           ;  119 [c=4 l=1]  popqi
        pop r4           ;  120 [c=4 l=1]  popqi
        ret              ;  121 [c=0 l=1]  return_from_epilogue
.L2:
        ldd r22,Y+1      ;  65  [c=16 l=4]  *movsi/2
        ldd r23,Y+2
        ldd r24,Y+3
        ldd r25,Y+4
        add r22,r8       ;  66  [c=16 l=4]  *addsi3/0
        adc r23,r9
        adc r24,r10
        adc r25,r11
        rjmp .L1                 ;  124 [c=4 l=1]  jump

        .ident  "GCC: (GNU) 14.0.0 20230822 (experimental)"

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [Bug rtl-optimization/110093] [12/13/14 Regression][avr] Move frenzy leading to code bloat
  2023-06-02 13:22 [Bug rtl-optimization/110093] New: [12/13/14 Regression][avr] Move frenzy leading to code bloat gjl at gcc dot gnu.org
                   ` (2 preceding siblings ...)
  2023-08-22 14:25 ` gjl at gcc dot gnu.org
@ 2023-08-22 14:25 ` gjl at gcc dot gnu.org
  2023-08-29 17:15 ` vmakarov at gcc dot gnu.org
                   ` (3 subsequent siblings)
  7 siblings, 0 replies; 9+ messages in thread
From: gjl at gcc dot gnu.org @ 2023-08-22 14:25 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110093

Georg-Johann Lay <gjl at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
     Ever confirmed|0                           |1
   Last reconfirmed|                            |2023-08-22
             Status|UNCONFIRMED                 |NEW

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [Bug rtl-optimization/110093] [12/13/14 Regression][avr] Move frenzy leading to code bloat
  2023-06-02 13:22 [Bug rtl-optimization/110093] New: [12/13/14 Regression][avr] Move frenzy leading to code bloat gjl at gcc dot gnu.org
                   ` (3 preceding siblings ...)
  2023-08-22 14:25 ` gjl at gcc dot gnu.org
@ 2023-08-29 17:15 ` vmakarov at gcc dot gnu.org
  2023-08-30  8:16 ` gjl at gcc dot gnu.org
                   ` (2 subsequent siblings)
  7 siblings, 0 replies; 9+ messages in thread
From: vmakarov at gcc dot gnu.org @ 2023-08-29 17:15 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110093

--- Comment #3 from Vladimir Makarov <vmakarov at gcc dot gnu.org> ---
I worked on avr issues quite some time.  And here is my findings.
Before IRA we have start of BB2:

;; lr  in        14 [r14] 15 [r15] 16 [r16] 17 [r17] 18 [r18] 19 [r19] 20 [r20]
21 [r21] 22 [r22] 23 [r23] 24 [r24] 25 [r25] 28 [r28] 32 [__SP_L__] 34 [argL]
44 45 46

   33: r51:QI=r22:QI
       REG_DEAD r22:QI
   34: r52:QI=r23:QI
      REG_DEAD r23:QI
   35: r53:QI=r24:QI
      REG_DEAD r24:QI
   36: r54:QI=r25:QI
      REG_DEAD r25:QI
   37: r44:SI#0=r51:QI
      REG_DEAD r51:QI
   38: r44:SI#1=r52:QI
      REG_DEAD r52:QI
   39: r44:SI#2=r53:QI
      REG_DEAD r53:QI
   40: r44:SI#3=r54:QI
      REG_DEAD r54:QI

According GCC pseudo r44 conflicts with r51, r52 ...  In reality it is
not.  I could modify BB live analysis in IRA although it is a lot of
work.

But there is a bigger problem. A lot of passes including IRA uses
data-flow analysis framework for global life analysis and it does not
work on subreg level.  You can see that r44 still lives (lr in) at the
beginning of BB2.  DFA is not my responsibility but I can say
modifying DFA this way is a huge project as it will affect a lot of
targets.

Instead, as AVR regs are very small, I propose to avoid the above RTL
code by switching off subreg3 pass (or -fsplit-wide-types) for AVR by
default as it was for gcc-8.

There is still one minor problem: an additional reg-reg move generation for the
test case in comparison with gcc-8.  I'll try to fix it.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [Bug rtl-optimization/110093] [12/13/14 Regression][avr] Move frenzy leading to code bloat
  2023-06-02 13:22 [Bug rtl-optimization/110093] New: [12/13/14 Regression][avr] Move frenzy leading to code bloat gjl at gcc dot gnu.org
                   ` (4 preceding siblings ...)
  2023-08-29 17:15 ` vmakarov at gcc dot gnu.org
@ 2023-08-30  8:16 ` gjl at gcc dot gnu.org
  2023-08-30 14:04 ` vmakarov at gcc dot gnu.org
  2024-03-08 15:35 ` law at gcc dot gnu.org
  7 siblings, 0 replies; 9+ messages in thread
From: gjl at gcc dot gnu.org @ 2023-08-30  8:16 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110093

--- Comment #4 from Georg-Johann Lay <gjl at gcc dot gnu.org> ---
(In reply to Vladimir Makarov from comment #3)
> I propose to avoid the above RTL code by switching off subreg3
> pass (or -fsplit-wide-types) for AVR by default as it was for gcc-8.

Thanks for looking into this.

With v8, I don't see a difference with -f[no-]split-wide-types, everything
works fine.

Since v10 r280033 the default is -fsplit-wide-types-early, but that option has
no effect on testcase + master, only -fno-split-wide-types seems to "fix" the
problem, regardless of -f[no-]split-wide-types-early.

From my experience, -fno-split-wide-types has no clear edge over
-fsplit-wide-types, which very much depends on the code.  This is the reason
why -fsplit-wide-types is still the default.

So are you saying that the bug is actually in lower-subreg.cc ?

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [Bug rtl-optimization/110093] [12/13/14 Regression][avr] Move frenzy leading to code bloat
  2023-06-02 13:22 [Bug rtl-optimization/110093] New: [12/13/14 Regression][avr] Move frenzy leading to code bloat gjl at gcc dot gnu.org
                   ` (5 preceding siblings ...)
  2023-08-30  8:16 ` gjl at gcc dot gnu.org
@ 2023-08-30 14:04 ` vmakarov at gcc dot gnu.org
  2024-03-08 15:35 ` law at gcc dot gnu.org
  7 siblings, 0 replies; 9+ messages in thread
From: vmakarov at gcc dot gnu.org @ 2023-08-30 14:04 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110093

--- Comment #5 from Vladimir Makarov <vmakarov at gcc dot gnu.org> ---
(In reply to Georg-Johann Lay from comment #4)
>
> 
> So are you saying that the bug is actually in lower-subreg.cc ?

No. lower subreg is fine.

Sorry to be unclear.  To generate a better code for the current test case (or
analogous cases) we need live analysis on sub-register level.  Currently it is
done only on whole pseudo-register level.

  First of all DFA (data flow analysis framework) should be modified.  As I
showed DFA wrongly calculate that pseudo r44 lives at the start of BB2,
although it is not (r44 value is not used before insn #37).  It is a big job. 
The problem is also that the active development of DFA stopped long time ago
and their developers do not work on gcc anymore.

  Secondly, after DFA modification RA (and may be other optimizations) should
be modified to work with this information on BB-level.  It is a medium size
project for me and probably it would take 2-3 months of my work time.

So looking at this situation, I would suggest to make -fno-split-wide-types a
default for AVR target to solve this and and analogous PRs.  May be it is not
necessary for good performance of real avr applications.  I am not user AVR and
can not say how severe this problem for the real applications.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [Bug rtl-optimization/110093] [12/13/14 Regression][avr] Move frenzy leading to code bloat
  2023-06-02 13:22 [Bug rtl-optimization/110093] New: [12/13/14 Regression][avr] Move frenzy leading to code bloat gjl at gcc dot gnu.org
                   ` (6 preceding siblings ...)
  2023-08-30 14:04 ` vmakarov at gcc dot gnu.org
@ 2024-03-08 15:35 ` law at gcc dot gnu.org
  7 siblings, 0 replies; 9+ messages in thread
From: law at gcc dot gnu.org @ 2024-03-08 15:35 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110093

Jeffrey A. Law <law at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
                 CC|                            |law at gcc dot gnu.org
           Priority|P3                          |P2

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2024-03-08 15:35 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-06-02 13:22 [Bug rtl-optimization/110093] New: [12/13/14 Regression][avr] Move frenzy leading to code bloat gjl at gcc dot gnu.org
2023-06-05  6:38 ` [Bug rtl-optimization/110093] " rguenth at gcc dot gnu.org
2023-06-13 10:18 ` gjl at gcc dot gnu.org
2023-08-22 14:25 ` gjl at gcc dot gnu.org
2023-08-22 14:25 ` gjl at gcc dot gnu.org
2023-08-29 17:15 ` vmakarov at gcc dot gnu.org
2023-08-30  8:16 ` gjl at gcc dot gnu.org
2023-08-30 14:04 ` vmakarov at gcc dot gnu.org
2024-03-08 15:35 ` law at gcc dot gnu.org

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).