[Bug tree-optimization/112508] New: [14 Regression] Size regression when using -Os starting with r14-4089-gd45ddc2c04e

public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed

* [Bug tree-optimization/112508] New: [14 Regression] Size regression when using -Os starting with r14-4089-gd45ddc2c04e
@ 2023-11-13 11:11 theodort at inf dot ethz.ch
  2023-11-13 11:15 ` [Bug tree-optimization/112508] " rguenth at gcc dot gnu.org
                   ` (5 more replies)
  0 siblings, 6 replies; 7+ messages in thread
From: theodort at inf dot ethz.ch @ 2023-11-13 11:11 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112508

            Bug ID: 112508
           Summary: [14 Regression] Size regression when using -Os
                    starting with r14-4089-gd45ddc2c04e
           Product: gcc
           Version: 14.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: tree-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: theodort at inf dot ethz.ch
  Target Milestone: ---

https://godbolt.org/z/qE79f7qvW

Given the following code:

static int b, c = 8, d, e, f, g = 9;
static char h = 3;
static void(a)(int, unsigned i) {
    if (!(((i) >= 1) && ((i) <= 3421036188))) {
        __builtin_unreachable();
    }
}
int main1() {
    for (; h; --h) {
        for (; f <= 9; f++) {
            if (d) g = 0;
            if (e) continue;
            a(b, g && c);
        }
        e = d = 0;
        a(0, 3421036188);
    }
}

gcc-trunk -Os generates more and more complex code:

main1:
        movl    d(%rip), %edi
        movl    g(%rip), %esi
        xorl    %edx, %edx
        xorl    %ecx, %ecx
        movb    h(%rip), %r8b
        movl    e(%rip), %r9d
        xorl    %r10d, %r10d
        movl    f(%rip), %eax
        testb   %r8b, %r8b
        jne     .L49
        testb   %r10b, %r10b
        je      .L37
        movb    $0, h(%rip)
        testb   %dl, %dl
        je      .L41
        movl    %eax, f(%rip)
        jmp     .L41
.L37:
        testb   %dl, %dl
        je      .L44
        movl    %eax, f(%rip)
        jmp     .L44
.L41:
        movl    %r9d, e(%rip)
        testb   %cl, %cl
        je      .L45
        movl    %esi, g(%rip)
.L45:
        movl    %edi, d(%rip)
        jmp     .L47
.L44:
        testb   %cl, %cl
        je      .L47
        movl    %esi, g(%rip)
.L47:
        xorl    %eax, %eax
        ret
.L49:
        pushq   %rbx
.L35:
        movl    %eax, %r11d
.L4:
        movl    %r11d, %eax
        cmpl    $9, %r11d
        jg      .L51
        testl   %edi, %edi
        je      .L3
        movb    $1, %cl
        xorl    %esi, %esi
.L3:
        movl    %esi, %ebx
        incl    %r11d
        orl     %r9d, %ebx
        jne     .L22
        testb   %r10b, %r10b
        je      .L5
        movb    %r8b, h(%rip)
        testb   %dl, %dl
        je      .L7
        movl    %eax, f(%rip)
        jmp     .L7
.L5:
        testb   %dl, %dl
        je      .L8
        movl    %eax, f(%rip)
        jmp     .L8
.L7:
        xorl    %esi, %esi
        movl    %esi, e(%rip)
        testb   %cl, %cl
        je      .L9
        xorl    %edx, %edx
        movl    %edx, g(%rip)
.L9:
        movl    %edi, d(%rip)
.L19:
.L22:
        movb    $1, %dl
        jmp     .L4
.L51:
        decl    %r8d
        xorl    %r9d, %r9d
        movb    $1, %r10b
        xorl    %edi, %edi
        testb   %r8b, %r8b
        jne     .L35
        testb   %r10b, %r10b
        je      .L12
        movb    $0, h(%rip)
        testb   %dl, %dl
        je      .L14
        movl    %r11d, f(%rip)
        jmp     .L14
.L12:
        testb   %dl, %dl
        je      .L16
        movl    %r11d, f(%rip)
        jmp     .L16
.L14:
        movl    %r9d, e(%rip)
        testb   %cl, %cl
        je      .L17
        movl    %esi, g(%rip)
.L17:
        movl    %edi, d(%rip)
        jmp     .L18
.L8:
        xorl    %eax, %eax
        movl    %eax, g(%rip)
        jmp     .L19
.L16:
        testb   %cl, %cl
        je      .L18
        movl    %esi, g(%rip)
.L18:
        xorl    %eax, %eax
        popq    %rbx
        ret


gcc-13 -Os -o 13.o test.c -c
gcc-trunk -Os -o 14.o test.c -c
size 13.o 14.o
   text    data     bss     dec     hex filename
    235       8      12     255      ff 13.o
    422       8      12     442     1ba 14.o



gcc-13.2.0 -Os:

main1:
        movb    h(%rip), %dil
        xorl    %r8d, %r8d
        xorl    %esi, %esi
        xorl    %ecx, %ecx
        movl    d(%rip), %edx
        movl    f(%rip), %eax
        movb    $1, %r9b
        xorl    %r11d, %r11d
        movl    $10, %r10d
.L2:
        testb   %dil, %dil
        je      .L5
        movl    %eax, %ecx
.L6:
        cmpl    $9, %ecx
        jg      .L25
        testl   %edx, %edx
        movb    $1, %r8b
        cmovne  %r9d, %esi
        incl    %ecx
        jmp     .L6
.L25:
        movl    %r10d, %edx
        movb    $1, %cl
        subl    %eax, %edx
        cmpl    $10, %eax
        cmovg   %r11d, %edx
        decl    %edi
        addl    %edx, %eax
        xorl    %edx, %edx
        jmp     .L2
.L5:
        testb   %cl, %cl
        je      .L7
        xorl    %edi, %edi
        movb    $0, h(%rip)
        movl    %edi, e(%rip)
.L7:
        testb   %r8b, %r8b
        je      .L8
        movl    %eax, f(%rip)
.L8:
        testb   %sil, %sil
        je      .L9
        xorl    %eax, %eax
        movl    %eax, g(%rip)
.L9:
        testb   %cl, %cl
        je      .L10
        movl    %edx, d(%rip)
.L10:
        ret


Bisects to r14-4089-gd45ddc2c04e

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [Bug tree-optimization/112508] [14 Regression] Size regression when using -Os starting with r14-4089-gd45ddc2c04e
  2023-11-13 11:11 [Bug tree-optimization/112508] New: [14 Regression] Size regression when using -Os starting with r14-4089-gd45ddc2c04e theodort at inf dot ethz.ch
@ 2023-11-13 11:15 ` rguenth at gcc dot gnu.org
  2024-02-15 23:08 ` roger at nextmovesoftware dot com
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: rguenth at gcc dot gnu.org @ 2023-11-13 11:15 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112508

Richard Biener <rguenth at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
           Keywords|                            |missed-optimization
   Target Milestone|---                         |14.0

--- Comment #1 from Richard Biener <rguenth at gcc dot gnu.org> ---
It's expected that the change changes jump threading, so unfortunately this
kind of fallout is expected.

Needs analysis (looks like we're now unrolling?)

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [Bug tree-optimization/112508] [14 Regression] Size regression when using -Os starting with r14-4089-gd45ddc2c04e
  2023-11-13 11:11 [Bug tree-optimization/112508] New: [14 Regression] Size regression when using -Os starting with r14-4089-gd45ddc2c04e theodort at inf dot ethz.ch
  2023-11-13 11:15 ` [Bug tree-optimization/112508] " rguenth at gcc dot gnu.org
@ 2024-02-15 23:08 ` roger at nextmovesoftware dot com
  2024-02-16  8:11 ` rguenth at gcc dot gnu.org
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: roger at nextmovesoftware dot com @ 2024-02-15 23:08 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112508

Roger Sayle <roger at nextmovesoftware dot com> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
             Status|UNCONFIRMED                 |NEW
     Ever confirmed|0                           |1
                 CC|                            |roger at nextmovesoftware dot com
   Last reconfirmed|                            |2024-02-15

--- Comment #2 from Roger Sayle <roger at nextmovesoftware dot com> ---
The issue appears to be with (poor costing in) loop invariant store motion. 
Adding the command line option "-fno-move-loop-stores" reduces the .s file from
149 lines to 54 lines, and the size of main (as reported by objdump -d) from
317 bytes to 73 bytes.   To confirm that this isn't specific to this (possibly
pathological/obscure) test case, I ran the CSiBE benchmark on x86_64, comparing
"-Os" to "-Os -fno-move-loop-stores", which shows a net saving of 1606 bytes
with -fno-move-loop-stores.  There are cases where -fno-move-loop-stores
reduces code size (on x86_64, and I've not investigated other targets), so I
guess it would be preferrable to use more accurate size costs instead of just
disabling this sub-pass. Note that the bigger hammer, -fno-tree-loop-im, also
avoids the code growth, but the more precise/specific -fno-move-loop-stores is
sufficient.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [Bug tree-optimization/112508] [14 Regression] Size regression when using -Os starting with r14-4089-gd45ddc2c04e
  2023-11-13 11:11 [Bug tree-optimization/112508] New: [14 Regression] Size regression when using -Os starting with r14-4089-gd45ddc2c04e theodort at inf dot ethz.ch
  2023-11-13 11:15 ` [Bug tree-optimization/112508] " rguenth at gcc dot gnu.org
  2024-02-15 23:08 ` roger at nextmovesoftware dot com
@ 2024-02-16  8:11 ` rguenth at gcc dot gnu.org
  2024-02-16  8:20 ` rguenth at gcc dot gnu.org
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: rguenth at gcc dot gnu.org @ 2024-02-16  8:11 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112508

--- Comment #3 from Richard Biener <rguenth at gcc dot gnu.org> ---
Loop store-motion is a difficult thing to cost - it's a critical enabler for
many of our loop optimizations, including scalar evolution analysis.

Now, this might not hold true so much for the cases where we end up
using an extra flag to avoid store data races and this example also shows
we're doing a bad job in trying to unify flags for variables stored in the
same blocks (we don't try to do this at all ...).

Value-numbering has difficulties getting from zero flags to "same flags",
it only manages to elide one flag (but maybe that's all we can do - I
didn't exactly analyze).

Conditionally set (conditionally within a loop, not so much conditionally
executed subloops) vars at least less likely will help SCEV, so cost
modeling (aka estimating register pressure in a simplistic way, like
counting the number of IVs) of store-motion of those might be a way to
combat this.

Or, for example, disable conditional store-motion for -Os entirely.

For targets where -Os matters likely -fallow-store-data-races would be
a way to rescue.  With that I get on x86_64

main1:
.LFB1:
        .cfi_startproc
        movb    h(%rip), %sil
        movl    d(%rip), %edx
        movl    g(%rip), %edi
        movl    e(%rip), %ecx
        movl    f(%rip), %eax
.L2:
        testb   %sil, %sil
        je      .L5
        movl    %eax, %ecx
.L6:
        movl    %ecx, %eax
        cmpl    $9, %ecx
        jg      .L9
        testl   %edx, %edx
        je      .L3
        xorl    %edi, %edi
.L3:
        incl    %ecx
        jmp     .L6
.L9:
        decl    %esi
        xorl    %ecx, %ecx
        xorl    %edx, %edx
        jmp     .L2
.L5:
        movb    $0, h(%rip)
        movl    %eax, f(%rip)
        movl    %ecx, e(%rip)
        movl    %edi, g(%rip)
        movl    %edx, d(%rip)
        ret

Actionable items:

 a) disable flag store motion for cold loops (or stores only happening in
    cold parts of the loop)
 b) optimize flag variable allocation (try to use the same flag for multiple
    vars)
 c) some kind of register pressure estimation, possibly only for non-innermost
    loops

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [Bug tree-optimization/112508] [14 Regression] Size regression when using -Os starting with r14-4089-gd45ddc2c04e
  2023-11-13 11:11 [Bug tree-optimization/112508] New: [14 Regression] Size regression when using -Os starting with r14-4089-gd45ddc2c04e theodort at inf dot ethz.ch
                   ` (2 preceding siblings ...)
  2024-02-16  8:11 ` rguenth at gcc dot gnu.org
@ 2024-02-16  8:20 ` rguenth at gcc dot gnu.org
  2024-03-07 21:02 ` law at gcc dot gnu.org
  2024-05-07  7:42 ` [Bug tree-optimization/112508] [14/15 " rguenth at gcc dot gnu.org
  5 siblings, 0 replies; 7+ messages in thread
From: rguenth at gcc dot gnu.org @ 2024-02-16  8:20 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112508

--- Comment #4 from Richard Biener <rguenth at gcc dot gnu.org> ---
We are already doing

  /* Verify whether the candidate is hot for LOOP.  Only do store motion if the
    candidate's profile count is hot.  Statement in cold BB shouldn't be moved
    out of it's loop_father.  */
  if (!for_all_locs_in_loop (loop, ref, ref_in_loop_hot_body (loop)))
    return false;

but

/* Check the coldest loop between loop L and innermost loop.  If there is one
   cold loop between L and INNER_LOOP, store motion can be performed, otherwise
   no cold loop means no store motion.  get_coldest_out_loop also handles cases
   when l is inner_loop.  */
bool
ref_in_loop_hot_body::operator () (mem_ref_loc *loc)
{
  basic_block curr_bb = gimple_bb (loc->stmt);
  class loop *inner_loop = curr_bb->loop_father;
  return get_coldest_out_loop (l, inner_loop, curr_bb);
}

checks that there's a good place to move a store to but it doesn't verify
whether there's a store that's likely to be executed in its contained loop.
The for_all_locs_in_loop is also happy if _any_ of the stores for the var
can be moved to a colder place, likely the intent was to check that for
all of the stores (though it's unlikely to differ in simple cases).

This all doesn't distinguish between always and not always executed refs.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [Bug tree-optimization/112508] [14 Regression] Size regression when using -Os starting with r14-4089-gd45ddc2c04e
  2023-11-13 11:11 [Bug tree-optimization/112508] New: [14 Regression] Size regression when using -Os starting with r14-4089-gd45ddc2c04e theodort at inf dot ethz.ch
                   ` (3 preceding siblings ...)
  2024-02-16  8:20 ` rguenth at gcc dot gnu.org
@ 2024-03-07 21:02 ` law at gcc dot gnu.org
  2024-05-07  7:42 ` [Bug tree-optimization/112508] [14/15 " rguenth at gcc dot gnu.org
  5 siblings, 0 replies; 7+ messages in thread
From: law at gcc dot gnu.org @ 2024-03-07 21:02 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112508

Jeffrey A. Law <law at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
                 CC|                            |law at gcc dot gnu.org
           Priority|P3                          |P2

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [Bug tree-optimization/112508] [14/15 Regression] Size regression when using -Os starting with r14-4089-gd45ddc2c04e
  2023-11-13 11:11 [Bug tree-optimization/112508] New: [14 Regression] Size regression when using -Os starting with r14-4089-gd45ddc2c04e theodort at inf dot ethz.ch
                   ` (4 preceding siblings ...)
  2024-03-07 21:02 ` law at gcc dot gnu.org
@ 2024-05-07  7:42 ` rguenth at gcc dot gnu.org
  5 siblings, 0 replies; 7+ messages in thread
From: rguenth at gcc dot gnu.org @ 2024-05-07  7:42 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112508

Richard Biener <rguenth at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
   Target Milestone|14.0                        |14.2

--- Comment #5 from Richard Biener <rguenth at gcc dot gnu.org> ---
GCC 14.1 is being released, retargeting bugs to GCC 14.2.

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2024-05-07  7:42 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-11-13 11:11 [Bug tree-optimization/112508] New: [14 Regression] Size regression when using -Os starting with r14-4089-gd45ddc2c04e theodort at inf dot ethz.ch
2023-11-13 11:15 ` [Bug tree-optimization/112508] " rguenth at gcc dot gnu.org
2024-02-15 23:08 ` roger at nextmovesoftware dot com
2024-02-16  8:11 ` rguenth at gcc dot gnu.org
2024-02-16  8:20 ` rguenth at gcc dot gnu.org
2024-03-07 21:02 ` law at gcc dot gnu.org
2024-05-07  7:42 ` [Bug tree-optimization/112508] [14/15 " rguenth at gcc dot gnu.org

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).