From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <gcc-bugzilla@gcc.gnu.org>
Received: by sourceware.org (Postfix, from userid 48)
	id 88B823858410; Tue, 21 May 2024 15:50:49 +0000 (GMT)
DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 88B823858410
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org;
	s=default; t=1716306649;
	bh=C79DItUYlzx9vVBWs4VOhkG8W1fipZILNglmqyDsaPY=;
	h=From:To:Subject:Date:In-Reply-To:References:From;
	b=uYlZc3bebxh8Ph2m7XnFBfTgI0T3L0nu3OBOKnQiTQ02Ixedb2+Ng67J9fx/LuAxx
	 wsnxveZSHPO58gPW1JrvVxRnLheeCnWXXTLcFCv86QNUrrExzTnwNPSVoEg7kiFiVR
	 SI4XdOpo+itMb2NjBfeHmSQ3QNGr1RNUKMRKSW/U=
From: "slyfox at gcc dot gnu.org" <gcc-bugzilla@gcc.gnu.org>
To: gcc-bugs@gcc.gnu.org
Subject: [Bug target/115161] [15 Regression] highway-1.0.7 miscompilation of
 some SSE2 intrinsics
Date: Tue, 21 May 2024 15:50:49 +0000
X-Bugzilla-Reason: CC
X-Bugzilla-Type: changed
X-Bugzilla-Watch-Reason: None
X-Bugzilla-Product: gcc
X-Bugzilla-Component: target
X-Bugzilla-Version: 15.0
X-Bugzilla-Keywords: wrong-code
X-Bugzilla-Severity: normal
X-Bugzilla-Who: slyfox at gcc dot gnu.org
X-Bugzilla-Status: NEW
X-Bugzilla-Resolution: 
X-Bugzilla-Priority: P3
X-Bugzilla-Assigned-To: unassigned at gcc dot gnu.org
X-Bugzilla-Target-Milestone: 15.0
X-Bugzilla-Flags: 
X-Bugzilla-Changed-Fields: 
Message-ID: <bug-115161-4-WDfEqb4IJQ@http.gcc.gnu.org/bugzilla/>
In-Reply-To: <bug-115161-4@http.gcc.gnu.org/bugzilla/>
References: <bug-115161-4@http.gcc.gnu.org/bugzilla/>
Content-Type: text/plain; charset="UTF-8"
Content-Transfer-Encoding: quoted-printable
X-Bugzilla-URL: http://gcc.gnu.org/bugzilla/
Auto-Submitted: auto-generated
MIME-Version: 1.0
List-Id: <gcc-bugs.sourceware.org>

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=3D115161
--- Comment #8 from Sergei Trofimovich <slyfox at gcc dot gnu.org> ---
Thank you, Jakub!=20

> The reason the testcase FAILs is the same as in the other PRs, it is tryi=
ng to convert {0x0.8p+33f, 0x0.8p+33f, 0x0.8p+33f, 0x0.8p+33f} V4SFmode vec=
tor to V4SImode, and because the backend sees the constant operand of the f=
ix, it folds it to the unspecified value as with scalar conversion.

To be super-clear: the problem is the out-of-range value, not just any
V4SFmode->V4SImode, right?

Specifically, float32{INT32_MAX} -> int32_t should be fine, right?

I was trying to extract the following example (and likely failed):

It tries very hard not to pass anything outside float32{INT32_MAX} to
(a different) `PromoteTo()` at the end of the function from
https://github.com/google/highway/blob/2270e77d0d0ccc1d6bc7393f0ebb0b6352dd=
fd00/hwy/ops/x86_128-inl.h#L10275

 HWY_API VFromD<D> PromoteTo(D di64, VFromD<Rebind<float, D>> v) {
  const Rebind<int32_t, decltype(di64)> di32;
  const RebindToFloat<decltype(di32)> df32;
  const RebindToUnsigned<decltype(di32)> du32;
  const Repartition<uint8_t, decltype(du32)> du32_as_du8;

  const auto exponent_adj =3D BitCast(
      du32,
      Min(SaturatedSub(BitCast(du32_as_du8, ShiftRight<23>(BitCast(du32, v)=
)),
                       BitCast(du32_as_du8, Set(du32, uint32_t{157}))),
          BitCast(du32_as_du8, Set(du32, uint32_t{32}))));
  const auto adj_v =3D
      BitCast(df32, BitCast(du32, v) - ShiftLeft<23>(exponent_adj));

  const auto f32_to_i32_result =3D ConvertTo(di32, adj_v);
  const auto lo64_or_mask =3D PromoteTo(
      di64,
      BitCast(du32, VecFromMask(di32, Eq(f32_to_i32_result,
                                         Set(di32, LimitsMax<int32_t>()))))=
);

  return Or(PromoteTo(di64, BitCast(di32, f32_to_i32_result))
                << PromoteTo(di64, exponent_adj),
            lo64_or_mask);
 }

Specifically `const auto f32_to_i32_result =3D ConvertTo(di32, adj_v);` hits
overflow
and the masking below should account for that (I tried to preserve masking =
in
the original sample):

https://github.com/google/highway/blob/2270e77d0d0ccc1d6bc7393f0ebb0b6352dd=
fd00/hwy/ops/x86_128-inl.h#L10870

  template <class D, HWY_IF_I32_D(D)>
  HWY_API VFromD<D> ConvertTo(D di, VFromD<RebindToFloat<D>> v) {
    const RebindToFloat<decltype(di)> df;
    // See comment at the first occurrence of "IfThenElse(overflow,".
    const MFromD<D> overflow =3D RebindMask(di, Ge(v, Set(df, 2147483648.0f=
)));
    return IfThenElse(overflow, Set(di, LimitsMax<int32_t>()),
                      ConvertInRangeTo(di, v));
  }

Is it obvious from the minimized C code where I got it into overflow condit=
ion?
Or constant folding propagates through masks here?

I'll try to re-minimize it again.=