public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed
* [Bug tree-optimization/112282] New: wrong code (generated code hangs) at -O3 on x86_64-linux-gnu
@ 2023-10-29 22:39 zhendong.su at inf dot ethz.ch
  2023-10-29 22:41 ` [Bug tree-optimization/112282] [14 Regression] " pinskia at gcc dot gnu.org
                   ` (18 more replies)
  0 siblings, 19 replies; 20+ messages in thread
From: zhendong.su at inf dot ethz.ch @ 2023-10-29 22:39 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112282

            Bug ID: 112282
           Summary: wrong code (generated code hangs) at -O3 on
                    x86_64-linux-gnu
           Product: gcc
           Version: unknown
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: tree-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: zhendong.su at inf dot ethz.ch
  Target Milestone: ---

The code is very tough to reduce, but it appears to be a recent regression. 

Although the reduced code is still quite hairy, it does seem valid.

Compiler Explorer: https://godbolt.org/z/TxGdP87EW


[621] % gcctk -v
Using built-in specs.
COLLECT_GCC=gcctk
COLLECT_LTO_WRAPPER=/local/suz-local/software/local/gcc-trunk/libexec/gcc/x86_64-pc-linux-gnu/14.0.0/lto-wrapper
Target: x86_64-pc-linux-gnu
Configured with: ../gcc-trunk/configure --disable-bootstrap
--enable-checking=yes --prefix=/local/suz-local/software/local/gcc-trunk
--enable-sanitizers --enable-languages=c,c++ --disable-werror --enable-multilib
Thread model: posix
Supported LTO compression algorithms: zlib
gcc version 14.0.0 20231029 (experimental) (GCC) 
[622] % 
[622] % gcctk -O2 small.c; ./a.out
[623] % 
[623] % gcctk -O3 small.c
[624] % timeout -s 9 10 ./a.out
Killed
[625] % cat small.c
int printf(const char *, ...);
void __assert_fail();
int a, g, h, i, v, w = 2, x, y, ab, ac, ad, ae, af, ag;
static int f, j, m, n, p, r, u, aa;
struct b {
  int c : 20;
  int d : 20;
  int e : 10;
};
static struct b l, o, q = {3, 3, 5};
int s(int z) {
  struct b ah;
  int ai = 1, aj[7] = {1, 1, 1, 1, 1, 1, 1};
ak:
  for (u = -22; u < 2; ++u) {
    struct b al[8] = {{2, 7, 9}, {8, 7, 1}, {2, 7, 9}, {8, 7, 1}, {2, 7, 9},
{8, 7, 1}, {2, 7, 9}};
    y = z = 0;
    for (; z < 2; z++) {
      int am[18], k;
      ab = ac = 0;
      for (; ac < 1; ac++)
        for (k = 0; k < 9; k++)
          am[k] = 0;
      n = 0;
      while (1) {
        v = u < 0 || a;
        h = z < ~u && 4 & q.c;
        if ((aa <= l.c) > q.d && p)
          return o.c;
        if (w)
          break;
        return q.e;
      }
      a = j;
    }
  }
  for (x = 0; x < 2; x++) {
    struct b an = {1, 8, 4};
    int ao[28] = {5, 0, 0, 9, 0, 3, 0, 5, 0, 0, 9, 0, 3, 0, 5, 0, 0, 9, 0, 3,
0, 5, 0, 0, 9, 0, 3, 0};
    if (q.e) {
      int ap = ai || l.c + q.c, aq = q.d, ar = p & f;
      q.d = q.d || ar || ap;
      p = 0;
      if (!j && ai)
        goto as;
      if (q.d) {
        printf("", l);
        q.d = f >> j;
      }
      p = l.c = aq;
      an = q;
    } else {
      int at[12][1] = {{9}, {9}, {5}, {9}, {9}, {5}, {9}, {9}, {5}, {9}, {9},
{5}};
      struct b au;
      if (o.c)
        aa = ah.e;
      if (an.d)
        ah.e = (j & (aa * m)) ^ au.d;
      o.c = m + aa;
      int av = o.c || 0, aw = ai || q.c & l.c, ax = n;
      if (q.e < ai)
        q = an;
      if (r)
        break;
      ai = aw - av;
      an.e = 0;
      if (ai) {
        an.e = l.c || 0;
        f = q.c;
        ah.e = l.c % q.d;
        q.c = au.e;
        if ((q.d && q.c) || ah.e)
          __assert_fail();
        q.c = 0;
        if (au.d > m || ah.e)
          w = au.c | (n & ah.c);
      as:
        ae = af = ah.c;
        int ay = au.d & q.e & au.c || o.c, az = 0 || o.c, ba = m & ah.d;
        if (n)
          au.c = au.e = (q.e || ah.d) ^ (o.c + (az / au.e));
        n = au.c || au.e;
        if (ba) {
          printf("", ax);
          x = q.e | m;
          continue;
        }
        m = ay;
        n = printf("", au);
      }
      if (ah.d)
        o.c = l.c & o.c & q.c;
      if (q.d)
        __assert_fail();
      printf("", an);
      printf("", q);
      printf("", au);
      if (ah.e)
        while (u++) {
          struct b al[7] = {{7, 9, 8}, {7, 1, 2}, {7, 9, 8}, {7, 1, 2}, {7, 9,
8}, {7, 1, 2}, {7, 9, 0}};
          if (an.d) {
            int d[8] = {0, 1, 0, 1, 0, 1, 0, 1};
            if (ad)
              goto ak;
            while (ag)
              g = an.d = i = m;
            f = j;
          }
          n++;
        }
      f = q.d;
    }
    if (l.c && m) {
      int d[7] = {1, 0, 1, 0, 1, 0, 1};
      if (x)
        h = an.d;
      else
        g = 0;
    }
  }
  int bb = (q.d ^ ah.c) | aa | (q.e & q.c) | (f & ah.d);
  if (bb)
    return x;
  return 0;
}
int main() {
  j = 1;
  s(0);
  return 0;
}

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [Bug tree-optimization/112282] [14 Regression] wrong code (generated code hangs) at -O3 on x86_64-linux-gnu
  2023-10-29 22:39 [Bug tree-optimization/112282] New: wrong code (generated code hangs) at -O3 on x86_64-linux-gnu zhendong.su at inf dot ethz.ch
@ 2023-10-29 22:41 ` pinskia at gcc dot gnu.org
  2023-10-29 22:51 ` pinskia at gcc dot gnu.org
                   ` (17 subsequent siblings)
  18 siblings, 0 replies; 20+ messages in thread
From: pinskia at gcc dot gnu.org @ 2023-10-29 22:41 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112282

Andrew Pinski <pinskia at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
            Summary|wrong code (generated code  |[14 Regression] wrong code
                   |hangs) at -O3 on            |(generated code hangs) at
                   |x86_64-linux-gnu            |-O3 on x86_64-linux-gnu
   Target Milestone|---                         |14.0
           Keywords|                            |wrong-code

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [Bug tree-optimization/112282] [14 Regression] wrong code (generated code hangs) at -O3 on x86_64-linux-gnu
  2023-10-29 22:39 [Bug tree-optimization/112282] New: wrong code (generated code hangs) at -O3 on x86_64-linux-gnu zhendong.su at inf dot ethz.ch
  2023-10-29 22:41 ` [Bug tree-optimization/112282] [14 Regression] " pinskia at gcc dot gnu.org
@ 2023-10-29 22:51 ` pinskia at gcc dot gnu.org
  2023-10-30  0:40 ` sjames at gcc dot gnu.org
                   ` (16 subsequent siblings)
  18 siblings, 0 replies; 20+ messages in thread
From: pinskia at gcc dot gnu.org @ 2023-10-29 22:51 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112282

Andrew Pinski <pinskia at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
   Last reconfirmed|                            |2023-10-29
     Ever confirmed|0                           |1
             Status|UNCONFIRMED                 |NEW

--- Comment #1 from Andrew Pinski <pinskia at gcc dot gnu.org> ---
Confirmed. Looks bitfields and ifcvt related.
GCC 13 didn't vectorize the code and using -fno-tree-vectorize fixes the
infinite loop.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [Bug tree-optimization/112282] [14 Regression] wrong code (generated code hangs) at -O3 on x86_64-linux-gnu
  2023-10-29 22:39 [Bug tree-optimization/112282] New: wrong code (generated code hangs) at -O3 on x86_64-linux-gnu zhendong.su at inf dot ethz.ch
  2023-10-29 22:41 ` [Bug tree-optimization/112282] [14 Regression] " pinskia at gcc dot gnu.org
  2023-10-29 22:51 ` pinskia at gcc dot gnu.org
@ 2023-10-30  0:40 ` sjames at gcc dot gnu.org
  2023-10-30  0:41 ` sjames at gcc dot gnu.org
                   ` (15 subsequent siblings)
  18 siblings, 0 replies; 20+ messages in thread
From: sjames at gcc dot gnu.org @ 2023-10-30  0:40 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112282

Sam James <sjames at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
                 CC|                            |sjames at gcc dot gnu.org

--- Comment #2 from Sam James <sjames at gcc dot gnu.org> ---
I will have a go at a bisect.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [Bug tree-optimization/112282] [14 Regression] wrong code (generated code hangs) at -O3 on x86_64-linux-gnu
  2023-10-29 22:39 [Bug tree-optimization/112282] New: wrong code (generated code hangs) at -O3 on x86_64-linux-gnu zhendong.su at inf dot ethz.ch
                   ` (2 preceding siblings ...)
  2023-10-30  0:40 ` sjames at gcc dot gnu.org
@ 2023-10-30  0:41 ` sjames at gcc dot gnu.org
  2023-10-30  1:28 ` sjames at gcc dot gnu.org
                   ` (14 subsequent siblings)
  18 siblings, 0 replies; 20+ messages in thread
From: sjames at gcc dot gnu.org @ 2023-10-30  0:41 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112282

--- Comment #3 from Sam James <sjames at gcc dot gnu.org> ---
I can't reproduce with 20231022, so I'll wait until today's snapshot finishes.
That gives us a nice narrow window anyway...

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [Bug tree-optimization/112282] [14 Regression] wrong code (generated code hangs) at -O3 on x86_64-linux-gnu
  2023-10-29 22:39 [Bug tree-optimization/112282] New: wrong code (generated code hangs) at -O3 on x86_64-linux-gnu zhendong.su at inf dot ethz.ch
                   ` (3 preceding siblings ...)
  2023-10-30  0:41 ` sjames at gcc dot gnu.org
@ 2023-10-30  1:28 ` sjames at gcc dot gnu.org
  2023-10-30  1:48 ` pinskia at gcc dot gnu.org
                   ` (13 subsequent siblings)
  18 siblings, 0 replies; 20+ messages in thread
From: sjames at gcc dot gnu.org @ 2023-10-30  1:28 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112282

--- Comment #4 from Sam James <sjames at gcc dot gnu.org> ---
(In reply to Sam James from comment #3)
> I can't reproduce with 20231022, so I'll wait until today's snapshot
> finishes. That gives us a nice narrow window anyway...

Sorry, I was wrong there. -ftrivial-auto-var-init=zero (which I've been playing
with locally) was masking it.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [Bug tree-optimization/112282] [14 Regression] wrong code (generated code hangs) at -O3 on x86_64-linux-gnu
  2023-10-29 22:39 [Bug tree-optimization/112282] New: wrong code (generated code hangs) at -O3 on x86_64-linux-gnu zhendong.su at inf dot ethz.ch
                   ` (4 preceding siblings ...)
  2023-10-30  1:28 ` sjames at gcc dot gnu.org
@ 2023-10-30  1:48 ` pinskia at gcc dot gnu.org
  2023-10-30  1:53 ` pinskia at gcc dot gnu.org
                   ` (12 subsequent siblings)
  18 siblings, 0 replies; 20+ messages in thread
From: pinskia at gcc dot gnu.org @ 2023-10-30  1:48 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112282

--- Comment #5 from Andrew Pinski <pinskia at gcc dot gnu.org> ---
Hmm it is not ah:
struct b ah = {};
still causes the infinite loop.



But doing intializing au allows it work. that is:
struct b au = {};

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [Bug tree-optimization/112282] [14 Regression] wrong code (generated code hangs) at -O3 on x86_64-linux-gnu
  2023-10-29 22:39 [Bug tree-optimization/112282] New: wrong code (generated code hangs) at -O3 on x86_64-linux-gnu zhendong.su at inf dot ethz.ch
                   ` (5 preceding siblings ...)
  2023-10-30  1:48 ` pinskia at gcc dot gnu.org
@ 2023-10-30  1:53 ` pinskia at gcc dot gnu.org
  2023-10-30  3:49 ` [Bug tree-optimization/112282] [14 Regression] wrong code (generated code hangs) at -O3 on x86_64-linux-gnu since r14-4777-g88c27070c25309 sjames at gcc dot gnu.org
                   ` (11 subsequent siblings)
  18 siblings, 0 replies; 20+ messages in thread
From: pinskia at gcc dot gnu.org @ 2023-10-30  1:53 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112282

--- Comment #6 from Andrew Pinski <pinskia at gcc dot gnu.org> ---
(In reply to Andrew Pinski from comment #5)
> But doing intializing au allows it work. that is:
> struct b au = {};

But I put a `__builtin_trap();` any place where au might be used as
uninitalized, and we don't reach those section of code.  So ....

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [Bug tree-optimization/112282] [14 Regression] wrong code (generated code hangs) at -O3 on x86_64-linux-gnu since r14-4777-g88c27070c25309
  2023-10-29 22:39 [Bug tree-optimization/112282] New: wrong code (generated code hangs) at -O3 on x86_64-linux-gnu zhendong.su at inf dot ethz.ch
                   ` (6 preceding siblings ...)
  2023-10-30  1:53 ` pinskia at gcc dot gnu.org
@ 2023-10-30  3:49 ` sjames at gcc dot gnu.org
  2023-10-30  9:13 ` tnfchris at gcc dot gnu.org
                   ` (10 subsequent siblings)
  18 siblings, 0 replies; 20+ messages in thread
From: sjames at gcc dot gnu.org @ 2023-10-30  3:49 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112282

Sam James <sjames at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
            Summary|[14 Regression] wrong code  |[14 Regression] wrong code
                   |(generated code hangs) at   |(generated code hangs) at
                   |-O3 on x86_64-linux-gnu     |-O3 on x86_64-linux-gnu
                   |                            |since
                   |                            |r14-4777-g88c27070c25309
                 CC|                            |tnfchris at gcc dot gnu.org

--- Comment #7 from Sam James <sjames at gcc dot gnu.org> ---
88c27070c253094fb7e366583fbe09cec2371e8b is the first bad commit
commit 88c27070c253094fb7e366583fbe09cec2371e8b
Author: Tamar Christina <tamar.christina@arm.com>
Date:   Fri Oct 20 08:09:45 2023 +0100

    ifcvt: Support bitfield lowering of multiple-exit loops

i.e. r14-4777-g88c27070c25309

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [Bug tree-optimization/112282] [14 Regression] wrong code (generated code hangs) at -O3 on x86_64-linux-gnu since r14-4777-g88c27070c25309
  2023-10-29 22:39 [Bug tree-optimization/112282] New: wrong code (generated code hangs) at -O3 on x86_64-linux-gnu zhendong.su at inf dot ethz.ch
                   ` (7 preceding siblings ...)
  2023-10-30  3:49 ` [Bug tree-optimization/112282] [14 Regression] wrong code (generated code hangs) at -O3 on x86_64-linux-gnu since r14-4777-g88c27070c25309 sjames at gcc dot gnu.org
@ 2023-10-30  9:13 ` tnfchris at gcc dot gnu.org
  2023-10-30 14:21 ` rguenth at gcc dot gnu.org
                   ` (9 subsequent siblings)
  18 siblings, 0 replies; 20+ messages in thread
From: tnfchris at gcc dot gnu.org @ 2023-10-30  9:13 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112282

--- Comment #8 from Tamar Christina <tnfchris at gcc dot gnu.org> ---
Thanks for the report, that's very odd..

It looks like loop control is broken and `u` never gets incremented.  It's even
more strange since the structures getting lowered are both unused so should not
have had any effect at all..

will take a look.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [Bug tree-optimization/112282] [14 Regression] wrong code (generated code hangs) at -O3 on x86_64-linux-gnu since r14-4777-g88c27070c25309
  2023-10-29 22:39 [Bug tree-optimization/112282] New: wrong code (generated code hangs) at -O3 on x86_64-linux-gnu zhendong.su at inf dot ethz.ch
                   ` (8 preceding siblings ...)
  2023-10-30  9:13 ` tnfchris at gcc dot gnu.org
@ 2023-10-30 14:21 ` rguenth at gcc dot gnu.org
  2023-10-30 20:21 ` avieira at gcc dot gnu.org
                   ` (8 subsequent siblings)
  18 siblings, 0 replies; 20+ messages in thread
From: rguenth at gcc dot gnu.org @ 2023-10-30 14:21 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112282

Richard Biener <rguenth at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
           Priority|P3                          |P1
            Version|unknown                     |14.0

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [Bug tree-optimization/112282] [14 Regression] wrong code (generated code hangs) at -O3 on x86_64-linux-gnu since r14-4777-g88c27070c25309
  2023-10-29 22:39 [Bug tree-optimization/112282] New: wrong code (generated code hangs) at -O3 on x86_64-linux-gnu zhendong.su at inf dot ethz.ch
                   ` (9 preceding siblings ...)
  2023-10-30 14:21 ` rguenth at gcc dot gnu.org
@ 2023-10-30 20:21 ` avieira at gcc dot gnu.org
  2023-10-31  9:12 ` avieira at gcc dot gnu.org
                   ` (7 subsequent siblings)
  18 siblings, 0 replies; 20+ messages in thread
From: avieira at gcc dot gnu.org @ 2023-10-30 20:21 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112282

avieira at gcc dot gnu.org changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
                 CC|                            |avieira at gcc dot gnu.org

--- Comment #9 from avieira at gcc dot gnu.org ---
So I had a look at this and this is as far as I got.
It seems to get stuck in the 'for (u = -22; u < 2; ++u)' loop. It looks like
the loop IV never gets updated and it keeps looping.

Looking at the codegen it seems that cunroll decides to remove A LOT of code
and there is now:
bb 4:
..
# ivtmp_1055 = PHI <ivtmp_1056(24), 24(2)>
..
bb 24:
...
ivtmp_1056 = ivtmp_1055 - 1;
  goto <bb 4>; [100.00%]

I've not yet been able to figure out why this happens, the dumps weren't very
helpful. So I tried -fdisable-tree-cunroll, it was still failing. So I looked
at the dumps to try and see what was turning this loop into an infinite loop
and vrp2 shows me:
Global Exported: _19 = [irange] int [-21, 0]
Folding predicate _19 != 2 to 1

and in the dump before vrp2 we see:
 <bb 4> [local count: 7354175]:
  # u.13_485 = PHI <_19(105), -22(3)>
  # u_lsm.72_510 = PHI <_19(105), _497(D)(3)>
  # u_lsm_flag.73_235 = PHI <1(105), 0(3)>
...
  <bb 27> [local count: 6634488]:
  al ={v} {CLOBBER(eol)};
  _19 = u.13_485 + 1;
  if (_19 != 2)
    goto <bb 105>; [96.34%]
  else
    goto <bb 28>; [3.66%]

  <bb 105> [local count: 6391666]:
  goto <bb 4>; [100.00%]

Something to point out here, that u_lsm.72_510 seems odd. It is used to set
global 'u', but its initialized with _497(D) which is undefined... So that
itself seems wrong to me too... I'll try and find out what's causing that
codegen next. Maybe that can explain why the irange for _19 is so wrong here.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [Bug tree-optimization/112282] [14 Regression] wrong code (generated code hangs) at -O3 on x86_64-linux-gnu since r14-4777-g88c27070c25309
  2023-10-29 22:39 [Bug tree-optimization/112282] New: wrong code (generated code hangs) at -O3 on x86_64-linux-gnu zhendong.su at inf dot ethz.ch
                   ` (10 preceding siblings ...)
  2023-10-30 20:21 ` avieira at gcc dot gnu.org
@ 2023-10-31  9:12 ` avieira at gcc dot gnu.org
  2023-10-31  9:12 ` avieira at gcc dot gnu.org
                   ` (6 subsequent siblings)
  18 siblings, 0 replies; 20+ messages in thread
From: avieira at gcc dot gnu.org @ 2023-10-31  9:12 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112282

--- Comment #10 from avieira at gcc dot gnu.org ---
So I had a look at that u_lsm.72_510 variable and it's only undefined if we
don't loop, but if we don't loop then u_lsm_flag is set to 0 and we don't use
u_lsm. So it's OK. I also checked and the early exits are covered by the same
mechanism.
So really the question is, why does irange think the range is [-21, 0]. Anyone
have an idea of how to debug this?

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [Bug tree-optimization/112282] [14 Regression] wrong code (generated code hangs) at -O3 on x86_64-linux-gnu since r14-4777-g88c27070c25309
  2023-10-29 22:39 [Bug tree-optimization/112282] New: wrong code (generated code hangs) at -O3 on x86_64-linux-gnu zhendong.su at inf dot ethz.ch
                   ` (11 preceding siblings ...)
  2023-10-31  9:12 ` avieira at gcc dot gnu.org
@ 2023-10-31  9:12 ` avieira at gcc dot gnu.org
  2023-11-14 14:48 ` rguenth at gcc dot gnu.org
                   ` (5 subsequent siblings)
  18 siblings, 0 replies; 20+ messages in thread
From: avieira at gcc dot gnu.org @ 2023-10-31  9:12 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112282

--- Comment #11 from avieira at gcc dot gnu.org ---
So I had a look at that u_lsm.72_510 variable and it's only undefined if we
don't loop, but if we don't loop then u_lsm_flag is set to 0 and we don't use
u_lsm. So it's OK. I also checked and the early exits are covered by the same
mechanism.
So really the question is, why does irange think the range is [-21, 0]. Anyone
have an idea of how to debug this?

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [Bug tree-optimization/112282] [14 Regression] wrong code (generated code hangs) at -O3 on x86_64-linux-gnu since r14-4777-g88c27070c25309
  2023-10-29 22:39 [Bug tree-optimization/112282] New: wrong code (generated code hangs) at -O3 on x86_64-linux-gnu zhendong.su at inf dot ethz.ch
                   ` (12 preceding siblings ...)
  2023-10-31  9:12 ` avieira at gcc dot gnu.org
@ 2023-11-14 14:48 ` rguenth at gcc dot gnu.org
  2023-11-15 10:36 ` rguenth at gcc dot gnu.org
                   ` (4 subsequent siblings)
  18 siblings, 0 replies; 20+ messages in thread
From: rguenth at gcc dot gnu.org @ 2023-11-14 14:48 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112282

--- Comment #12 from Richard Biener <rguenth at gcc dot gnu.org> ---
+  _ifc__797 = al[0].D.2794;
+  _ifc__796 = BIT_INSERT_EXPR <_ifc__797, 7, 0 (20 bits)>;
+  _ifc__798 = BIT_INSERT_EXPR <_ifc__796, 9, 32 (20 bits)>;
+  _ifc__800 = BIT_INSERT_EXPR <_ifc__798, 8, 52 (10 bits)>;
+  al[0].D.2794 = _ifc__800;

from

  al[0].c = 2;
  al[0].d = 7;
  al[0].e = 9;

which looks OK.  We then vectorize the first part:

  vect__ifc__797.86_709 = MEM <vector(2) long unsigned int> [(struct b *)&al];
  vect__ifc__797.87_708 = MEM <vector(2) long unsigned int> [(struct b *)&al +
16B];
  vect__ifc__797.88_707 = MEM <vector(2) long unsigned int> [(struct b *)&al +
32B];
  vect_patt_751.89_706 = vect__ifc__797.86_709 & { 18446744073708503040,
18446744073708503040 };
...
  _ifc__830 = BIT_INSERT_EXPR <_ifc__828, 2, 52 (10 bits)>;
  MEM <vector(2) long unsigned int> [(struct b *)&al] = vect_patt_746.94_691;
  MEM <vector(2) long unsigned int> [(struct b *)&al + 16B] =
vect_patt_746.94_690;
  MEM <vector(2) long unsigned int> [(struct b *)&al + 32B] =
vect_patt_746.94_689;
  _ifc__833 = al[6].D.2794;
  _ifc__832 = BIT_INSERT_EXPR <_ifc__833, 7, 0 (20 bits)>;
  _ifc__834 = BIT_INSERT_EXPR <_ifc__832, 9, 32 (20 bits)>;
  _ifc__836 = BIT_INSERT_EXPR <_ifc__834, 0, 52 (10 bits)>;
  al[6].D.2794 = _ifc__836;

leaving around the last element unvectorized.  But later this is all
dead code it seems?  IL can be simplified with -fno-unswitch-loops.

Btw, I see we lower bitfields in a loop we are not if-converting fully
in the end - was that desired?  Looks like so, this is a multi-exit loop
which we don't if-convert but lower bitfields in?  It has complicated
control flow that would make early-break vectorization not successful
at least.

Not sure what goes wrong.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [Bug tree-optimization/112282] [14 Regression] wrong code (generated code hangs) at -O3 on x86_64-linux-gnu since r14-4777-g88c27070c25309
  2023-10-29 22:39 [Bug tree-optimization/112282] New: wrong code (generated code hangs) at -O3 on x86_64-linux-gnu zhendong.su at inf dot ethz.ch
                   ` (13 preceding siblings ...)
  2023-11-14 14:48 ` rguenth at gcc dot gnu.org
@ 2023-11-15 10:36 ` rguenth at gcc dot gnu.org
  2023-11-15 10:56 ` rguenth at gcc dot gnu.org
                   ` (3 subsequent siblings)
  18 siblings, 0 replies; 20+ messages in thread
From: rguenth at gcc dot gnu.org @ 2023-11-15 10:36 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112282

Richard Biener <rguenth at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
           Assignee|unassigned at gcc dot gnu.org      |rguenth at gcc dot gnu.org
             Status|NEW                         |ASSIGNED

--- Comment #13 from Richard Biener <rguenth at gcc dot gnu.org> ---
So I can cut off bitfield lowering completely, the important part is that we
version the loop and thus try to BB vectorize the loop header (yeah, we don't
BB vectorize the whole body - or rather, we think the header _is_ the fully
body).

But a key to the failure seems to be that we BB vectorize the unrolled

      for (; ac < 1; ac++)
        for (k = 0; k < 9; k++)
          am[k] = 0;

and doing that not from SLP but from loop vectorization of if-conversion
versioned (but otherwise unchanged) loop.

It's also solely triggered by unrolling the 'z' loop.  Disabling all
following passes will still reproduce it.  The region VN triggered by
ifconversion/vectorization/unrolling isn't needed either (I disabled it).

Maybe PR111572 is related (but it doesn't change unrolling and disabling
ch_vect doesn't avoid the problem).

Unrolling does

Analyzing # of iterations of loop 2
  exit condition [23, + , 4294967295] != 0
  bounds on difference of bases: -23 ... -23
  result:
    # of iterations 23, bounded by 23
Removed pointless exit: if (ivtmp_1055 != 0)

because we computed loop->nb_iterations_upper_bound to 21:

Statement (exit)if (ivtmp_1055 != 0)
 is executed at most 23 (bounded by 23) + 1 times in loop 2.
Induction variable (int) 21 + -1 * iteration does not wrap in statement _4 =
~u.13_485;
 in loop 2.
Statement _4 = ~u.13_485;
 is executed at most 21 (bounded by 21) + 1 times in loop 2.
Induction variable (int) -21 + 1 * iteration does not wrap in statement _19 =
u.13_485 + 1;
 in loop 2.
Statement _19 = u.13_485 + 1;
 is executed at most 23 (bounded by 23) + 1 times in loop 2.
Reducing loop iteration estimate by 1; undefined statement must be executed at
the last iteration.

we're SCEV analyzing _4 here, computing {21, +, -1}_2 and VRP1 computed
[irange] int [0, +INF] somehow for it.  u.13_485 has a global range of
[-2147483647, 1], so obviously it must infer sth else here somehow and
wrongly so?

That very same def also appears with plain -O3.

Global Exported: _4 = [irange] int [0, +INF]

Hmm.  We have

Folding statement: _64 = ~u.13_20;
Global Exported: _64 = [irange] int [-2, -1] MASK 0x1 VALUE 0xfffffffe

Folding statement: _4 = ~u.13_20;
Global Exported: _4 = [irange] int [0, +INF]

but the if-conversion pass hoists that before the .LOOP_VECTORIZED

properly resetting flow-sensitive info on stmts hoisted fixes this.

Meh.

Premature duplicate transforms ...

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [Bug tree-optimization/112282] [14 Regression] wrong code (generated code hangs) at -O3 on x86_64-linux-gnu since r14-4777-g88c27070c25309
  2023-10-29 22:39 [Bug tree-optimization/112282] New: wrong code (generated code hangs) at -O3 on x86_64-linux-gnu zhendong.su at inf dot ethz.ch
                   ` (14 preceding siblings ...)
  2023-11-15 10:36 ` rguenth at gcc dot gnu.org
@ 2023-11-15 10:56 ` rguenth at gcc dot gnu.org
  2023-11-15 12:12 ` cvs-commit at gcc dot gnu.org
                   ` (2 subsequent siblings)
  18 siblings, 0 replies; 20+ messages in thread
From: rguenth at gcc dot gnu.org @ 2023-11-15 10:56 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112282

--- Comment #14 from Richard Biener <rguenth at gcc dot gnu.org> ---
Well, but then I still question the causing rev. - we are only performing
bitfield lowering but not any if-conversion.  IMHO the rev is a hack.

Not sure if I want to add another hack to fix this miscompile?

The change as-is behaves totally un-intuitively, we can't easily detect
half-if-converted cases and require loop vectorization like we can when
.MASK_LOAD/STORE appear.

Testing a patch.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [Bug tree-optimization/112282] [14 Regression] wrong code (generated code hangs) at -O3 on x86_64-linux-gnu since r14-4777-g88c27070c25309
  2023-10-29 22:39 [Bug tree-optimization/112282] New: wrong code (generated code hangs) at -O3 on x86_64-linux-gnu zhendong.su at inf dot ethz.ch
                   ` (15 preceding siblings ...)
  2023-11-15 10:56 ` rguenth at gcc dot gnu.org
@ 2023-11-15 12:12 ` cvs-commit at gcc dot gnu.org
  2023-11-15 12:12 ` rguenth at gcc dot gnu.org
  2023-11-16  7:05 ` cvs-commit at gcc dot gnu.org
  18 siblings, 0 replies; 20+ messages in thread
From: cvs-commit at gcc dot gnu.org @ 2023-11-15 12:12 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112282

--- Comment #15 from CVS Commits <cvs-commit at gcc dot gnu.org> ---
The master branch has been updated by Richard Biener <rguenth@gcc.gnu.org>:

https://gcc.gnu.org/g:5cb8610d3a8f8849a4bb6a0f81a2934484d6a15a

commit r14-5493-g5cb8610d3a8f8849a4bb6a0f81a2934484d6a15a
Author: Richard Biener <rguenther@suse.de>
Date:   Wed Nov 15 12:24:46 2023 +0100

    tree-optimization/112282 - wrong-code with ifcvt hoisting

    The following avoids hoisting of invariants from conditionally
    executed parts of an if-converted loop.  That now makes a difference
    since we perform bitfield lowering even when we do not actually
    if-convert the loop.  if-conversion deals with resetting flow-sensitive
    info when necessary already.

            PR tree-optimization/112282
            * tree-if-conv.cc (ifcvt_hoist_invariants): Only hoist from
            the loop header.

            * gcc.dg/torture/pr112282.c: New testcase.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [Bug tree-optimization/112282] [14 Regression] wrong code (generated code hangs) at -O3 on x86_64-linux-gnu since r14-4777-g88c27070c25309
  2023-10-29 22:39 [Bug tree-optimization/112282] New: wrong code (generated code hangs) at -O3 on x86_64-linux-gnu zhendong.su at inf dot ethz.ch
                   ` (16 preceding siblings ...)
  2023-11-15 12:12 ` cvs-commit at gcc dot gnu.org
@ 2023-11-15 12:12 ` rguenth at gcc dot gnu.org
  2023-11-16  7:05 ` cvs-commit at gcc dot gnu.org
  18 siblings, 0 replies; 20+ messages in thread
From: rguenth at gcc dot gnu.org @ 2023-11-15 12:12 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112282

Richard Biener <rguenth at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
             Status|ASSIGNED                    |RESOLVED
         Resolution|---                         |FIXED

--- Comment #16 from Richard Biener <rguenth at gcc dot gnu.org> ---
Fixed.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [Bug tree-optimization/112282] [14 Regression] wrong code (generated code hangs) at -O3 on x86_64-linux-gnu since r14-4777-g88c27070c25309
  2023-10-29 22:39 [Bug tree-optimization/112282] New: wrong code (generated code hangs) at -O3 on x86_64-linux-gnu zhendong.su at inf dot ethz.ch
                   ` (17 preceding siblings ...)
  2023-11-15 12:12 ` rguenth at gcc dot gnu.org
@ 2023-11-16  7:05 ` cvs-commit at gcc dot gnu.org
  18 siblings, 0 replies; 20+ messages in thread
From: cvs-commit at gcc dot gnu.org @ 2023-11-16  7:05 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112282

--- Comment #17 from CVS Commits <cvs-commit at gcc dot gnu.org> ---
The master branch has been updated by Richard Biener <rguenth@gcc.gnu.org>:

https://gcc.gnu.org/g:31bf21c78029434b7515a94477ce3565bff0743f

commit r14-5517-g31bf21c78029434b7515a94477ce3565bff0743f
Author: Richard Biener <rguenther@suse.de>
Date:   Thu Nov 16 08:03:55 2023 +0100

    tree-optimization/112282 - fix testcase

    Avoid requiring a glibc specific symbol.

            PR tree-optimization/112282
            * gcc.dg/torture/pr112282.c: Do not use __assert_fail.

^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2023-11-16  7:05 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-10-29 22:39 [Bug tree-optimization/112282] New: wrong code (generated code hangs) at -O3 on x86_64-linux-gnu zhendong.su at inf dot ethz.ch
2023-10-29 22:41 ` [Bug tree-optimization/112282] [14 Regression] " pinskia at gcc dot gnu.org
2023-10-29 22:51 ` pinskia at gcc dot gnu.org
2023-10-30  0:40 ` sjames at gcc dot gnu.org
2023-10-30  0:41 ` sjames at gcc dot gnu.org
2023-10-30  1:28 ` sjames at gcc dot gnu.org
2023-10-30  1:48 ` pinskia at gcc dot gnu.org
2023-10-30  1:53 ` pinskia at gcc dot gnu.org
2023-10-30  3:49 ` [Bug tree-optimization/112282] [14 Regression] wrong code (generated code hangs) at -O3 on x86_64-linux-gnu since r14-4777-g88c27070c25309 sjames at gcc dot gnu.org
2023-10-30  9:13 ` tnfchris at gcc dot gnu.org
2023-10-30 14:21 ` rguenth at gcc dot gnu.org
2023-10-30 20:21 ` avieira at gcc dot gnu.org
2023-10-31  9:12 ` avieira at gcc dot gnu.org
2023-10-31  9:12 ` avieira at gcc dot gnu.org
2023-11-14 14:48 ` rguenth at gcc dot gnu.org
2023-11-15 10:36 ` rguenth at gcc dot gnu.org
2023-11-15 10:56 ` rguenth at gcc dot gnu.org
2023-11-15 12:12 ` cvs-commit at gcc dot gnu.org
2023-11-15 12:12 ` rguenth at gcc dot gnu.org
2023-11-16  7:05 ` cvs-commit at gcc dot gnu.org

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).