public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed
* [Bug tree-optimization/111864] New: [14 Regression] Dead Code Elimination Regression since r14-4038-gb975c0dc3be
@ 2023-10-18 14:10 theodort at inf dot ethz.ch
  2023-10-18 15:09 ` [Bug tree-optimization/111864] " pinskia at gcc dot gnu.org
                   ` (4 more replies)
  0 siblings, 5 replies; 6+ messages in thread
From: theodort at inf dot ethz.ch @ 2023-10-18 14:10 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111864

            Bug ID: 111864
           Summary: [14 Regression] Dead Code Elimination Regression since
                    r14-4038-gb975c0dc3be
           Product: gcc
           Version: 14.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: tree-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: theodort at inf dot ethz.ch
  Target Milestone: ---

https://godbolt.org/z/PoYvWMG7T

Given the following code:

void foo(void);
static int d, g = 4, k;
static int *h, *j, *s;
static int **i = &h, **t = &s;
static int ***l = &i;
static unsigned short *m, *n;
static unsigned short **o = &n, **u = &m;
void __assert_fail() __attribute__((__noreturn__));
static short(a)(short b, int c) { return b << c; }
static int(e)(int f) {
    if (!(((f) >= 0) && ((f) <= 0))) {
        __builtin_unreachable();
    }
    return d;
}
static int ***p(int **q) {
    j = *q;
    return &i;
}
static int *r() {
    short v = a((g && p(t)) <= 0, 5);
    e(v);
    if (s) o = u;
    if (o == &m || o == &n)
        ;
    else
        __assert_fail();
    return &k;
}
int main() {
    **l = r();
    if (h)
        ;
    else
        __assert_fail();
    if (j == &g == 0)
        ;
    else
        __assert_fail();
    if (o == &m || o == &n)
        ;
    else
        foo();
    ;
}

gcc-trunk -O2 does not eliminate the call to foo:

main:
        subq    $8, %rsp
        movq    s(%rip), %rax
        movq    i(%rip), %rdx
        movq    %rax, j(%rip)
        testq   %rax, %rax
        je      .L17
        movq    $k, (%rdx)
        cmpq    $0, h(%rip)
        movq    $m, o(%rip)
        je      .L5
        cmpq    $g, %rax
        je      .L5
        movl    $m, %eax
.L10:
        cmpq    $n, %rax
        je      .L12
        cmpq    $m, %rax
        je      .L12
        call    foo
.L12:
        xorl    %eax, %eax
        addq    $8, %rsp
        ret
.L17:
        movq    o(%rip), %rax
        cmpq    $m, %rax
        je      .L18
        cmpq    $n, %rax
        jne     .L5
        movq    $k, (%rdx)
        cmpq    $0, h(%rip)
        jne     .L10
.L5:
        xorl    %eax, %eax
        call    __assert_fail
.L18:
        movq    $k, (%rdx)
        cmpq    $0, h(%rip)
        jne     .L12
        jmp     .L5

gcc-13.2.0 -O2 eliminates the call to foo:

main:
        movq    s(%rip), %rax
        movq    i(%rip), %rdx
        movq    %rax, j(%rip)
        testq   %rax, %rax
        je      .L2
        movq    $k, (%rdx)
        cmpq    $0, h(%rip)
        movq    $m, o(%rip)
        je      .L4
        cmpq    $g, %rax
        je      .L4
.L8:
        xorl    %eax, %eax
        ret
.L2:
        movq    o(%rip), %rax
        cmpq    $m, %rax
        je      .L5
        cmpq    $n, %rax
        jne     .L4
.L5:
        movq    $k, (%rdx)
        cmpq    $0, h(%rip)
        jne     .L8
.L4:
        pushq   %rax
        xorl    %eax, %eax
        call    __assert_fail

Bisects to r14-4038-gb975c0dc3be

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [Bug tree-optimization/111864] [14 Regression] Dead Code Elimination Regression since r14-4038-gb975c0dc3be
  2023-10-18 14:10 [Bug tree-optimization/111864] New: [14 Regression] Dead Code Elimination Regression since r14-4038-gb975c0dc3be theodort at inf dot ethz.ch
@ 2023-10-18 15:09 ` pinskia at gcc dot gnu.org
  2023-10-18 16:15 ` [Bug tree-optimization/111864] [12/13/14 Regression] Dead Code Elimination Regression pinskia at gcc dot gnu.org
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: pinskia at gcc dot gnu.org @ 2023-10-18 15:09 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111864

Andrew Pinski <pinskia at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
           Keywords|                            |missed-optimization
   Target Milestone|---                         |14.0

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [Bug tree-optimization/111864] [12/13/14 Regression] Dead Code Elimination Regression
  2023-10-18 14:10 [Bug tree-optimization/111864] New: [14 Regression] Dead Code Elimination Regression since r14-4038-gb975c0dc3be theodort at inf dot ethz.ch
  2023-10-18 15:09 ` [Bug tree-optimization/111864] " pinskia at gcc dot gnu.org
@ 2023-10-18 16:15 ` pinskia at gcc dot gnu.org
  2024-03-07 21:04 ` law at gcc dot gnu.org
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: pinskia at gcc dot gnu.org @ 2023-10-18 16:15 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111864

Andrew Pinski <pinskia at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
     Ever confirmed|0                           |1
   Last reconfirmed|                            |2023-10-18
           Keywords|                            |needs-bisection
             Status|UNCONFIRMED                 |NEW
   Target Milestone|14.0                        |12.4
            Summary|[14 Regression] Dead Code   |[12/13/14 Regression] Dead
                   |Elimination Regression      |Code Elimination Regression
                   |since r14-4038-gb975c0dc3be |

--- Comment #1 from Andrew Pinski <pinskia at gcc dot gnu.org> ---
Confirmed but this is just happened to be a side effect.

In forwprop1 we can change:
```
  # iftmp.5_11 = PHI <1(3), 0(4)>
  _4 = iftmp.5_11 <= 0;
  _5 = (short int) _4;
  _25 = (int) _4;
  _26 = _25 << 5;
  _29 = (short int) _4;
  _27 = _29 << 5;
  _6 = (int) _27;
  if (_27 != 0)
```

Into just:
```
  # iftmp.5_11 = PHI <1(3), 0(4)>
  if (iftmp.5_11 <= 0)
```

And then ethread is able to thread through that bb.

You get the same missed optimization with removing the call to a.
that is changing:
```
    short v = a((g && p(t)) <= 0, 5);
```
to
```
    short v = ((g && p(t)) <= 0);
```

Which then becomes a regression between GCC 11 and GCC 12.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [Bug tree-optimization/111864] [12/13/14 Regression] Dead Code Elimination Regression
  2023-10-18 14:10 [Bug tree-optimization/111864] New: [14 Regression] Dead Code Elimination Regression since r14-4038-gb975c0dc3be theodort at inf dot ethz.ch
  2023-10-18 15:09 ` [Bug tree-optimization/111864] " pinskia at gcc dot gnu.org
  2023-10-18 16:15 ` [Bug tree-optimization/111864] [12/13/14 Regression] Dead Code Elimination Regression pinskia at gcc dot gnu.org
@ 2024-03-07 21:04 ` law at gcc dot gnu.org
  2024-03-09  5:31 ` law at gcc dot gnu.org
  2024-03-15 13:25 ` aldyh at gcc dot gnu.org
  4 siblings, 0 replies; 6+ messages in thread
From: law at gcc dot gnu.org @ 2024-03-07 21:04 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111864

Jeffrey A. Law <law at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
                 CC|                            |law at gcc dot gnu.org
           Priority|P3                          |P2

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [Bug tree-optimization/111864] [12/13/14 Regression] Dead Code Elimination Regression
  2023-10-18 14:10 [Bug tree-optimization/111864] New: [14 Regression] Dead Code Elimination Regression since r14-4038-gb975c0dc3be theodort at inf dot ethz.ch
                   ` (2 preceding siblings ...)
  2024-03-07 21:04 ` law at gcc dot gnu.org
@ 2024-03-09  5:31 ` law at gcc dot gnu.org
  2024-03-15 13:25 ` aldyh at gcc dot gnu.org
  4 siblings, 0 replies; 6+ messages in thread
From: law at gcc dot gnu.org @ 2024-03-09  5:31 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111864

Jeffrey A. Law <law at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
                 CC|                            |aldyh at gcc dot gnu.org

--- Comment #2 from Jeffrey A. Law <law at gcc dot gnu.org> ---
It almost looks like a costing issue.  The threaders find opportunities to
thread all the incoming edges in the key block to the path which avoids the
call..  But all the paths get rejected.  


This is the  key block:

;;   basic block 11, loop depth 0, count 976284897 (estimated locally, freq
0.9092), maybe hot
;;    prev block 10, next block 12, flags: (NEW, REACHABLE, VISITED)
;;    pred:       10 [99.8% (guessed)]  count:225266786 (estimated locally,
freq 0.2098) (TRUE_VALUE,EXECUTABLE)
;;                14 [100.0% (guessed)]  count:751018112 (estimated locally,
freq 0.6994) (TRUE_VALUE,EXECUTABLE)
  # o.10_11 = PHI <&m(10), o.10_28(14)>
  _17 = o.10_11 == &n;
  _20 = o.10_11 == &m;
  _27 = _20 | _17;
  if (_27 != 0)
    goto <bb 6>; [58.87%]
  else
    goto <bb 12>; [41.13%]

It's pretty obvious that 10->11 can thread to 6.  If we look at the other
incoming edge we need o.10_28 which comes from bb14 with the value &n.  So that
path should be 14->10->11 threading to 6.

But they get rejected during threadfull2.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [Bug tree-optimization/111864] [12/13/14 Regression] Dead Code Elimination Regression
  2023-10-18 14:10 [Bug tree-optimization/111864] New: [14 Regression] Dead Code Elimination Regression since r14-4038-gb975c0dc3be theodort at inf dot ethz.ch
                   ` (3 preceding siblings ...)
  2024-03-09  5:31 ` law at gcc dot gnu.org
@ 2024-03-15 13:25 ` aldyh at gcc dot gnu.org
  4 siblings, 0 replies; 6+ messages in thread
From: aldyh at gcc dot gnu.org @ 2024-03-15 13:25 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111864

--- Comment #3 from Aldy Hernandez <aldyh at gcc dot gnu.org> ---
(In reply to Jeffrey A. Law from comment #2)
> It almost looks like a costing issue.  The threaders find opportunities to
> thread all the incoming edges in the key block to the path which avoids the
> call..  But all the paths get rejected.  
> 
> 
> This is the  key block:
> 
> ;;   basic block 11, loop depth 0, count 976284897 (estimated locally, freq
> 0.9092), maybe hot
> ;;    prev block 10, next block 12, flags: (NEW, REACHABLE, VISITED)
> ;;    pred:       10 [99.8% (guessed)]  count:225266786 (estimated locally,
> freq 0.2098) (TRUE_VALUE,EXECUTABLE)
> ;;                14 [100.0% (guessed)]  count:751018112 (estimated locally,
> freq 0.6994) (TRUE_VALUE,EXECUTABLE)
>   # o.10_11 = PHI <&m(10), o.10_28(14)>
>   _17 = o.10_11 == &n;
>   _20 = o.10_11 == &m;
>   _27 = _20 | _17;
>   if (_27 != 0)
>     goto <bb 6>; [58.87%]
>   else
>     goto <bb 12>; [41.13%]
> 
> It's pretty obvious that 10->11 can thread to 6.  If we look at the other
> incoming edge we need o.10_28 which comes from bb14 with the value &n.  So
> that path should be 14->10->11 threading to 6.
> 
> But they get rejected during threadfull2.

In order for threadfull2 to thread 10->11->6, it would have to handle pointer
equivalences, but the new threader doesn't currently handle them.  In VRP we
are able to handle equivs through the pointer_equiv_analyzer class which
pushes/pops state.  It only works when traversing in dominator order, which I
suppose is technically the case when going down a path.

So...I think you could wire pointer_equiv_analyzer to the new threader and get
this, though it may need to be also taught about PHIs.  The class is very
simplistic, and was only a stop gap until we got pointer ranges (with points-to
info).

Question though, why didn't the old threader get this?  I see bb11 looks
exactly the same in DOM3, and the old threader does handle pointer equivalences
through its scoped tables.  May be the IL is too complicated?

If we want to fix this in this release, I think we could do it with
pointer_equiv_analyzer, but for future releases we should be able to get it for
free when pranges are implemented.  Half the work is already done...let's see
how far I get considering I'll be going on paternity leave for a few months
this year.

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2024-03-15 13:25 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-10-18 14:10 [Bug tree-optimization/111864] New: [14 Regression] Dead Code Elimination Regression since r14-4038-gb975c0dc3be theodort at inf dot ethz.ch
2023-10-18 15:09 ` [Bug tree-optimization/111864] " pinskia at gcc dot gnu.org
2023-10-18 16:15 ` [Bug tree-optimization/111864] [12/13/14 Regression] Dead Code Elimination Regression pinskia at gcc dot gnu.org
2024-03-07 21:04 ` law at gcc dot gnu.org
2024-03-09  5:31 ` law at gcc dot gnu.org
2024-03-15 13:25 ` aldyh at gcc dot gnu.org

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).