public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed
* [Bug tree-optimization/108751] New: Removing dead code results in worse optimization at -Os
@ 2023-02-10 10:25 theodort at inf dot ethz.ch
  2023-02-10 10:29 ` [Bug tree-optimization/108751] Removing dead code results in worse generated target code " theodort at inf dot ethz.ch
  2023-02-10 17:03 ` jakub at gcc dot gnu.org
  0 siblings, 2 replies; 3+ messages in thread
From: theodort at inf dot ethz.ch @ 2023-02-10 10:25 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108751

            Bug ID: 108751
           Summary: Removing dead code results in worse optimization at
                    -Os
           Product: gcc
           Version: 13.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: tree-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: theodort at inf dot ethz.ch
  Target Milestone: ---

I found this case where slight changes in the program that, in theory, should
not affect the output (or affect it trivially) cause the compiler to generate
worse code: 

static int a = 0;
static int b = 1;
int main() {
  char c = 0;
  for (;;) {
    if (c)
      break;
    for (; a; a++) { // a is 0, this loop is dead
      if (b) // this is always true
        continue;
      else
        return 2; // this program will never return 2
    }
    c = 10;
  }
  return 3;
}

compiled with gcc-trunk -Os: 

main:
.L2:
        movl    a(%rip), %eax
        testl   %eax, %eax
        je      .L6
        incl    %eax
        movl    %eax, a(%rip)
        jmp     .L2
.L6:
        movl    $3, %eax
        ret

Clearly, the compiler has figured out that "return 2;" will never be executed.
But if I remove it from the source:

static int a = 0;
static int b = 1;
int main() {
  char c = 0;
  for (;;) {
    if (c)
      break;
    for (; a; a++) {
      if (b)
        continue;
      //else
      // return 2;
    }
    c = 10;
  }
  return 3;
}

and compile with gcc-trunk -Os again:

main:
        movl    a(%rip), %eax
        xorl    %edx, %edx
.L2:
        testl   %eax, %eax
        jne     .L4
        testb   %dl, %dl
        je      .L7
        xorl    %eax, %eax
        movl    %eax, a(%rip)
        jmp     .L7
.L4:
        incl    %eax
        movb    $1, %dl
        jmp     .L2
.L7:
        movl    $3, %eax
        ret

the generated code is worse. 

The same thing happens if the return value is changed:

static int a = 0;
static int b = 1;
int main() {
  char c = 0;
  for (;;) {
    if (c)
      break;
    for (; a; a++) {
      if (b)
        continue;
      else
        return 2;
    }
    c = 10;
  }
  return 1; // changed from return 3
}

gcc-trunk -Os: 

main:
        movl    a(%rip), %eax
        xorl    %edx, %edx
.L2:
        testl   %eax, %eax
        jne     .L4
        testb   %dl, %dl
        je      .L7
        xorl    %eax, %eax
        movl    %eax, a(%rip)
        jmp     .L7
.L4:
        incl    %eax
        movb    $1, %dl
        jmp     .L2
.L7:
        movl    $1, %eax
        ret

and if we constant propagate b:

static int a = 0;
int main() {
  char c = 0;
  for (;;) {
    if (c)
      break;
    for (; a; a++) {
      if (1) // this was if (b) before
        continue;
      else
        return 2;
    }
    c = 10;
  }
  return 1;
}

gcc-trunk -Os:

main:
        movl    a(%rip), %eax
        xorl    %edx, %edx
.L2:
        testl   %eax, %eax
        jne     .L12
        testb   %dl, %dl
        je      .L7
        xorl    %eax, %eax
        movl    %eax, a(%rip)
        jmp     .L7
.L12:
        incl    %eax
        movb    $1, %dl
        jmp     .L2
.L7:
        movl    $1, %eax
        ret

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [Bug tree-optimization/108751] Removing dead code results in worse generated target code at -Os
  2023-02-10 10:25 [Bug tree-optimization/108751] New: Removing dead code results in worse optimization at -Os theodort at inf dot ethz.ch
@ 2023-02-10 10:29 ` theodort at inf dot ethz.ch
  2023-02-10 17:03 ` jakub at gcc dot gnu.org
  1 sibling, 0 replies; 3+ messages in thread
From: theodort at inf dot ethz.ch @ 2023-02-10 10:29 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108751

--- Comment #1 from Theodoros Theodoridis <theodort at inf dot ethz.ch> ---
I am not sure if this qualifies as a "bug"/missed optimization but I'd be
interested in understanding why these changes cause such a difference. Thanks!

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [Bug tree-optimization/108751] Removing dead code results in worse generated target code at -Os
  2023-02-10 10:25 [Bug tree-optimization/108751] New: Removing dead code results in worse optimization at -Os theodort at inf dot ethz.ch
  2023-02-10 10:29 ` [Bug tree-optimization/108751] Removing dead code results in worse generated target code " theodort at inf dot ethz.ch
@ 2023-02-10 17:03 ` jakub at gcc dot gnu.org
  1 sibling, 0 replies; 3+ messages in thread
From: jakub at gcc dot gnu.org @ 2023-02-10 17:03 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108751

Jakub Jelinek <jakub at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
                 CC|                            |jakub at gcc dot gnu.org

--- Comment #2 from Jakub Jelinek <jakub at gcc dot gnu.org> ---
The code isn't smaller, which indeed for -Os is important, though many GIMPLE
decisions need to be done just from heuristics whether a particular
transformation typically results in smaller or larger code, because the sizes
can't be compared until much later, just estimated.
What happens in this testcase is that b is determined to be constant only
during IPA optimizations, ccp2 after IPA then propagates the value of 1 into b
users and before lim2 we have pretty much the same IL (if I rename ssa name
versions and temporary suffixes), the only difference of between one where b
has been discovered constant 1 after IPA and where it has been determined 1
earlier is in the counts and branch probabilities:
-  <bb 2> [local count: 1018865821]:
+  <bb 2> [local count: 536870913]:
   goto <bb 5>; [100.00%]

-  <bb 3> [local count: 54876003]:
+  <bb 3> [local count: 536870911]:
   return 3;

-  <bb 4> [local count: 460874625]:
+  <bb 4> [local count: 264428955]:
   _2 = a.2_3 + 1;
   a = _2;

-  <bb 5> [local count: 997745539]:
+  <bb 5> [local count: 801299868]:
   a.2_3 = a;
   if (a.2_3 != 0)
-    goto <bb 4>; [94.50%]
+    goto <bb 4>; [33.00%]
   else
-    goto <bb 3>; [5.50%]
+    goto <bb 3>; [67.00%]
Later on, lim2 decides to perform invariant motion in the latter case and not
in the former based on the probabilities.
In the first assembly
        movl    %eax, a(%rip)
is done in an inner loop, while in the latter case it is done only after the
loop finishes.

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2023-02-10 17:03 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-02-10 10:25 [Bug tree-optimization/108751] New: Removing dead code results in worse optimization at -Os theodort at inf dot ethz.ch
2023-02-10 10:29 ` [Bug tree-optimization/108751] Removing dead code results in worse generated target code " theodort at inf dot ethz.ch
2023-02-10 17:03 ` jakub at gcc dot gnu.org

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).