From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <gcc-bugzilla@gcc.gnu.org>
Received: by sourceware.org (Postfix, from userid 48)
	id 4DAE4385829C; Thu, 14 Mar 2024 20:52:59 +0000 (GMT)
DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 4DAE4385829C
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org;
	s=default; t=1710449579;
	bh=nhYbfOInCamqvGS8qkjbCY9MIk/eixpun1kOY3DsyV0=;
	h=From:To:Subject:Date:In-Reply-To:References:From;
	b=S19knBfRKPBrw3wGahgVTemmiFQodJXtUWRC/90QEsbRmKCc4/J4/xlEGkDadKXov
	 9/acqVMRje2W7nPKSBxZ2wTVGXLoNxb05sprToevdkyFKU1BRjaNjO/eudBh4T55OG
	 oLPidw1sR7GioUpvwBzjyj6Z85mPOA+U77L43viw=
From: "tnfchris at gcc dot gnu.org" <gcc-bugzilla@gcc.gnu.org>
To: gcc-bugs@gcc.gnu.org
Subject: [Bug tree-optimization/114339] [14 regression] Tor miscompiled with
 -O2 -mavx -fno-vect-cost-model since r14-6822
Date: Thu, 14 Mar 2024 20:52:58 +0000
X-Bugzilla-Reason: CC
X-Bugzilla-Type: changed
X-Bugzilla-Watch-Reason: None
X-Bugzilla-Product: gcc
X-Bugzilla-Component: tree-optimization
X-Bugzilla-Version: 14.0
X-Bugzilla-Keywords: wrong-code
X-Bugzilla-Severity: normal
X-Bugzilla-Who: tnfchris at gcc dot gnu.org
X-Bugzilla-Status: NEW
X-Bugzilla-Resolution: 
X-Bugzilla-Priority: P1
X-Bugzilla-Assigned-To: unassigned at gcc dot gnu.org
X-Bugzilla-Target-Milestone: 14.0
X-Bugzilla-Flags: 
X-Bugzilla-Changed-Fields: 
Message-ID: <bug-114339-4-1bNBGXs6oq@http.gcc.gnu.org/bugzilla/>
In-Reply-To: <bug-114339-4@http.gcc.gnu.org/bugzilla/>
References: <bug-114339-4@http.gcc.gnu.org/bugzilla/>
Content-Type: text/plain; charset="UTF-8"
Content-Transfer-Encoding: quoted-printable
X-Bugzilla-URL: http://gcc.gnu.org/bugzilla/
Auto-Submitted: auto-generated
MIME-Version: 1.0
List-Id: <gcc-bugs.sourceware.org>

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=3D114339
--- Comment #6 from Tamar Christina <tnfchris at gcc dot gnu.org> ---
vectorizer generates:

  mask_patt_21.19_58 =3D vect_perm_even_49 >=3D vect_cst__57;
  mask_patt_21.19_59 =3D vect_perm_even_55 >=3D vect_cst__57;
  vexit_reduc_63 =3D mask_patt_21.19_58 | mask_patt_21.19_59;
  if (vexit_reduc_63 !=3D { 0, 0 })
    goto <bb 14>; [20.00%]
  else
    goto <bb 5>; [80.00%]

This is changed at loopdone into:

  delays[3].nonprimary_delay =3D 129600;
  vect_cst__57 =3D {tdiff_6, tdiff_6};
  mask_patt_21.19_58 =3D vect_cst__57 <=3D { 0, 0 };
  mask_patt_21.19_59 =3D vect_cst__57 <=3D { 0, 0x7FFFFFFFFFFFFFFF };
  vexit_reduc_63 =3D mask_patt_21.19_58 | mask_patt_21.19_59;
  if (vexit_reduc_63 !=3D { 0, 0 })
    goto <bb 3>; [20.00%]
  else
    goto <bb 7>; [80.00%]

or in other words, if there's any value where the compare succeeds, find it=
 and
return.
This looks correct to me.

It could be that my AVX is rusty but, this generates:

   vmovdqa 0xf9c(%rip),%xmm1        # 0x402010=20
   mov    $0x1,%eax
   vmovq  %rcx,%xmm3
   vmovdqa %xmm0,(%rsp)
   vpunpcklqdq %xmm3,%xmm3,%xmm2
   vmovdqa %xmm0,0x10(%rsp)
   vpcmpgtq %xmm2,%xmm1,%xmm1#
   vmovdqa %xmm0,0x20(%rsp)
   vmovq  %rax,%xmm0
   vpunpcklqdq %xmm0,%xmm0,%xmm0
   movl   $0x1fa40,0x38(%rsp)
   vpcmpgtq %xmm2,%xmm0,%xmm0
   vpor   %xmm1,%xmm0,%xmm0
   vptest %xmm0,%xmm0

which looks off, particularly for the second compare it look like it doesn'=
t do
a load but instead just duplicates the constant 1.
gdb seems to confirm this. At the first compare:

(gdb) p $xmm2.v2_int64
$4 =3D {10412095, 10412095}
(gdb) p $xmm0.v2_int64
$5 =3D {0, 0}

which is what's expected, but at the second compare:

(gdb) p $xmm2.v2_int64
$7 =3D {10412095, 10412095}
(gdb) p $xmm0.v2_int64
$6 =3D {1, 1}

at the second it's comparing {1, 1} instead of {0, 0x7FFFFFFFFFFFFFFF}.

on AArch64 where it doesn't fail the comparison is:

   movi    v29.4s, 0
   add     x1, sp, 16
   ldr     x5, [x0, 8]
   mov     w0, 64064
   movk    w0, 0x1, lsl 16
   add     x3, sp, 48
   str     q29, [sp, 64]
   mov     x2, 57407
   mov     x4, 9223372036854775807
   str     x4, [sp, 64]
   movk    x2, 0x9e, lsl 16
   str     w0, [sp, 72]
   sub     x2, x2, x5
   stp     q29, q29, [x1]
   dup     v27.2d, x2
   ld2     {v30.2d - v31.2d}, [x1]
   str     q29, [sp, 48]
   ld2     {v28.2d - v29.2d}, [x3]
   cmge    v30.2d, v30.2d, v27.2d
   cmge    v28.2d, v28.2d, v27.2d
   orr     v30.16b, v30.16b, v28.16b
   umaxp   v30.4s, v30.4s, v30.4s
   fmov    x0, d30
   cbnz    x0, .L12

which has v30.2d being {0, 0} and v28.2d being {0, 0x7FFFFFFFFFFFFFFF} as
expected...

On AArch64 we don't inline the constants because whatever is propagating the
constants can't understand the LOAD_LANES:

  mask_patt_19.21_50 =3D vect__2.16_44 >=3D vect_cst__49;
  mask_patt_19.21_51 =3D vect__2.19_47 >=3D vect_cst__49;
  vexit_reduc_55 =3D mask_patt_19.21_50 | mask_patt_19.21_51;
  if (vexit_reduc_55 !=3D { 0, 0 })
    goto <bb 3>; [20.00%]
  else
    goto <bb 7>; [80.00%]

so could this be another expansion bug?

Note that a simpler reproducer is this:

---
long tdiff =3D 10412095;

int main() {
  struct {
    long maximum;
    int nonprimary_delay;
  } delays[] =3D {{}, {}, {}, {9223372036854775807, 36 * 60 * 60}};

  for (unsigned i =3D 0; i < sizeof(delays) / sizeof(delays[0]); ++i)
    if (tdiff <=3D delays[i].maximum)
      return delays[i].nonprimary_delay;

  __builtin_abort();
}
---

the key point is that we're not allowed to constprop tdiff at GIMPLE. If we=
 do,
e.g:

int main() {
  struct {
    long maximum;
    int nonprimary_delay;
  } delays[] =3D {{}, {}, {}, {9223372036854775807, 36 * 60 * 60}};
  long tdiff =3D 10412095;

  for (unsigned i =3D 0; i < sizeof(delays) / sizeof(delays[0]); ++i)
    if (tdiff <=3D delays[i].maximum)
      return delays[i].nonprimary_delay;

  __builtin_abort();
}

then after vectorization the const prop the entire expression is evaluated =
at
GIMPLE and it gets the right result.

This makes me believe it's a target expansion bug.=