From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <gcc-bugzilla@gcc.gnu.org>
Received: by sourceware.org (Postfix, from userid 48)
	id C172F3858D39; Tue,  7 Mar 2023 06:01:35 +0000 (GMT)
DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org C172F3858D39
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org;
	s=default; t=1678168895;
	bh=OSHnk2hWAzqUYA145kves/A0LOquwVD7FWYRXMhEWOc=;
	h=From:To:Subject:Date:From;
	b=FwytJ++ofA7fN7mnhYCsP+hZgFLO2JHSJi3m6pOkWbkwYm0DNFMVrT1NC9OHbvNfp
	 NJKqgfIlCbYMrCEuotlYlGnbh8X7lTTL4GHDyiN9I047AaI9ZPrZFfnREnxBwQkNGv
	 nSMnY1wwohFKsuSnn/jeGd8OpjHtM5l4wuXE4MNE=
From: "crazylht at gmail dot com" <gcc-bugzilla@gcc.gnu.org>
To: gcc-bugs@gcc.gnu.org
Subject: [Bug tree-optimization/109048] New: [13 regression] redundant mask
 compare generated by vectorizer.
Date: Tue, 07 Mar 2023 06:01:35 +0000
X-Bugzilla-Reason: CC
X-Bugzilla-Type: new
X-Bugzilla-Watch-Reason: None
X-Bugzilla-Product: gcc
X-Bugzilla-Component: tree-optimization
X-Bugzilla-Version: 13.0
X-Bugzilla-Keywords: 
X-Bugzilla-Severity: normal
X-Bugzilla-Who: crazylht at gmail dot com
X-Bugzilla-Status: UNCONFIRMED
X-Bugzilla-Resolution: 
X-Bugzilla-Priority: P3
X-Bugzilla-Assigned-To: unassigned at gcc dot gnu.org
X-Bugzilla-Target-Milestone: ---
X-Bugzilla-Flags: 
X-Bugzilla-Changed-Fields: bug_id short_desc product version bug_status
 bug_severity priority component assigned_to reporter target_milestone
Message-ID: <bug-109048-4@http.gcc.gnu.org/bugzilla/>
Content-Type: text/plain; charset="UTF-8"
Content-Transfer-Encoding: quoted-printable
X-Bugzilla-URL: http://gcc.gnu.org/bugzilla/
Auto-Submitted: auto-generated
MIME-Version: 1.0
List-Id: <gcc-bugs.sourceware.org>

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=3D109048

            Bug ID: 109048
           Summary: [13 regression] redundant mask compare generated by
                    vectorizer.
           Product: gcc
           Version: 13.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: tree-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: crazylht at gmail dot com
  Target Milestone: ---

#include <math.h>

void tmp2 (float *af, int type, int type2, float *res)
{
    const int Etype =3D (type =3D=3D 1 || type2 =3D=3D 2);
    const float f1 =3D (type =3D=3D 3 || type2 =3D=3D 4) ? 4.f : 2.f;
    const float f2 =3D (type =3D=3D 3 || type2 =3D=3D 4) ? 0.25f : 0.5f;

    for (int i =3D 0; i < 256; i++)
    {
        float x =3D af[i];
        int z =3D (x < 0.f);
        float t1 =3D (z ? 1.f : f2) + (x < f1 ? 1.f : 0.f);
        float neg_t1 =3D -fabsf(t1);
        float t2 =3D Etype ? neg_t1 : t1;
        res[i] +=3D t2 + x;
    }
}

gcc trunk now generates


<bb 58> [local count: 5368707]:
  vect_cst__110 =3D {iftmp.0_34, iftmp.0_34, iftmp.0_34, iftmp.0_34, iftmp.=
0_34,
iftmp.0_34, iftmp.0_34, iftmp.0_34};
  vect_cst__119 =3D {prephitmp_41, prephitmp_41, prephitmp_41, prephitmp_41,
prephitmp_41, prephitmp_41, prephitmp_41, prephitmp_41};
  vect_cst__123 =3D {iftmp.1_16, iftmp.1_16, iftmp.1_16, iftmp.1_16, iftmp.=
1_16,
iftmp.1_16, iftmp.1_16, iftmp.1_16};

  <bb 17> [local count: 53687070]:
  # i_18 =3D PHI <i_47(26), 0(58)>
  # ivtmp_15 =3D PHI <ivtmp_43(26), 256(58)>
  # vectp_af.11_105 =3D PHI <vectp_af.11_106(26), af_24(D)(58)>
  # vectp_res.23_125 =3D PHI <vectp_res.23_126(26), res_28(D)(58)>
  # vectp_res.28_130 =3D PHI <vectp_res.28_131(26), res_28(D)(58)>
  # ivtmp_133 =3D PHI <ivtmp_134(26), 0(58)>
  # DEBUG i =3D> NULL
  # DEBUG BEGIN_STMT
  _38 =3D (long unsigned int) i_18;
  _37 =3D _38 * 4;
  _36 =3D af_24(D) + _37;
  vect_x_20.13_107 =3D MEM <vector(8) float> [(float *)vectp_af.11_105];
  x_20 =3D *_36;
  # DEBUG x =3D> NULL
  # DEBUG BEGIN_STMT
  # DEBUG D#1 =3D> NULL
  # DEBUG z =3D> NULL
  # DEBUG BEGIN_STMT
  mask__50.14_109 =3D vect_x_20.13_107 >=3D { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,=
 0.0,
0.0 };
  _50 =3D x_20 >=3D 0.0;
  mask__52.15_111 =3D vect_x_20.13_107 < vect_cst__110;
  _52 =3D x_20 < iftmp.0_34;
  mask__53.16_112 =3D mask__50.14_109 & mask__52.15_111;
  _53 =3D _50 & _52;
  mask__55.17_114 =3D vect_x_20.13_107 >=3D vect_cst__110;
  _55 =3D x_20 >=3D iftmp.0_34;
  mask__56.18_115 =3D mask__50.14_109 & mask__55.17_114;
  _56 =3D _50 & _55;
  mask__74.19_117 =3D vect_x_20.13_107 < { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.=
0, 0.0
};
  _74 =3D x_20 < 0.0;
  vect__ifc__73.20_120 =3D VEC_COND_EXPR <mask__74.19_117, { 2.0e+0, 2.0e+0,
2.0e+0, 2.0e+0, 2.0e+0, 2.0e+0, 2.0e+0, 2.0e+0 }, vect_cst__119>;
  _ifc__73 =3D _74 ? 2.0e+0 : prephitmp_41;
  _174 =3D ~mask__53.16_112;
  _175 =3D mask__74.19_117 & _174;
  vect__ifc__75.21_122 =3D VEC_COND_EXPR <_175, { 2.0e+0, 2.0e+0, 2.0e+0, 2=
.0e+0,
2.0e+0, 2.0e+0, 2.0e+0, 2.0e+0 }, vect_cst__119>;
  _ifc__75 =3D _53 ? prephitmp_41 : _ifc__73;
  vect_prephitmp_17.22_124 =3D VEC_COND_EXPR <mask__56.18_115, vect_cst__12=
3,
vect__ifc__75.21_122>;
  prephitmp_17 =3D _56 ? iftmp.1_16 : _ifc__75;
  # DEBUG t1 =3D> D#2
  # DEBUG BEGIN_STMT
  # DEBUG neg_t1 =3D> -D#2
  # DEBUG BEGIN_STMT
  # DEBUG t2 =3D> prephitmp_17
  # DEBUG BEGIN_STMT
  _12 =3D res_28(D) + _37;
  vect__26.25_127 =3D MEM <vector(8) float> [(float *)vectp_res.23_125];
  _26 =3D *_12;
  vect__27.26_128 =3D vect__26.25_127 + vect_x_20.13_107;
  _27 =3D _26 + x_20;
  vect__45.27_129 =3D vect_prephitmp_17.22_124 + vect__27.26_128;
  _45 =3D prephitmp_17 + _27;
  MEM <vector(8) float> [(float *)vectp_res.28_130] =3D vect__45.27_129;
  # DEBUG BEGIN_STMT
  i_47 =3D i_18 + 1;
  # DEBUG i =3D> i_47
  # DEBUG BEGIN_STMT
  ivtmp_43 =3D ivtmp_15 - 1;
  vectp_af.11_106 =3D vectp_af.11_105 + 32;
  vectp_res.23_126 =3D vectp_res.23_125 + 32;
  vectp_res.28_131 =3D vectp_res.28_130 + 32;
  ivtmp_134 =3D ivtmp_133 + 1;
  if (ivtmp_134 < 32)
    goto <bb 26>; [90.00%]
  else
    goto <bb 56>; [10.00%]

vs gcc12.2=20


<bb 57> [local count: 5368707]:
  vect_cst__128 =3D {iftmp.1_16, iftmp.1_16, iftmp.1_16, iftmp.1_16, iftmp.=
1_16,
iftmp.1_16, iftmp.1_16, iftmp.1_16};
  vect_cst__134 =3D {iftmp.0_33, iftmp.0_33, iftmp.0_33, iftmp.0_33, iftmp.=
0_33,
iftmp.0_33, iftmp.0_33, iftmp.0_33};

  <bb 5> [local count: 53687070]:
  # i_15 =3D PHI <i_30(13), 0(57)>
  # ivtmp_20 =3D PHI <ivtmp_49(13), 256(57)>
  # vectp_af.24_124 =3D PHI <vectp_af.24_125(13), af_24(D)(57)>
  # vectp_res.31_138 =3D PHI <vectp_res.31_139(13), res_28(D)(57)>
  # vectp_res.36_143 =3D PHI <vectp_res.36_144(13), res_28(D)(57)>
  # ivtmp_146 =3D PHI <ivtmp_147(13), 0(57)>
  # DEBUG i =3D> NULL
  # DEBUG BEGIN_STMT
  _7 =3D (long unsigned int) i_15;
  _8 =3D _7 * 4;
  _9 =3D af_24(D) + _8;
  vect_x_25.26_126 =3D MEM <vector(8) float> [(float *)vectp_af.24_124];
  x_25 =3D *_9;
  # DEBUG x =3D> NULL
  # DEBUG BEGIN_STMT
  # DEBUG D#1 =3D> NULL
  # DEBUG z =3D> NULL
  # DEBUG BEGIN_STMT
  _130 =3D vect_x_25.26_126 >=3D { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
  vect_iftmp.27_131 =3D VEC_COND_EXPR <_130, vect_cst__128, { 1.0e+0, 1.0e+=
0,
1.0e+0, 1.0e+0, 1.0e+0, 1.0e+0, 1.0e+0, 1.0e+0 }>;
  iftmp.2_17 =3D x_25 >=3D 0.0 ? iftmp.1_16 : 1.0e+0;
  vect__41.28_133 =3D vect_iftmp.27_131 + { 1.0e+0, 1.0e+0, 1.0e+0, 1.0e+0,
1.0e+0, 1.0e+0, 1.0e+0, 1.0e+0 };
  _41 =3D iftmp.2_17 + 1.0e+0;
  _135 =3D vect_x_25.26_126 >=3D vect_cst__134;
  vect_prephitmp_42.29_136 =3D VEC_COND_EXPR <_135, vect_iftmp.27_131,
vect__41.28_133>;
  prephitmp_42 =3D x_25 >=3D iftmp.0_33 ? iftmp.2_17 : _41;
  # DEBUG t1 =3D> NULL
  # DEBUG BEGIN_STMT
  # DEBUG neg_t1 =3D> -prephitmp_42
  # DEBUG BEGIN_STMT
  vect_neg_t1_27.30_137 =3D -vect_prephitmp_42.29_136;
  neg_t1_27 =3D -prephitmp_42;
  # DEBUG t2 =3D> neg_t1_27
  # DEBUG BEGIN_STMT
  _10 =3D res_28(D) + _8;
  vect__11.33_140 =3D MEM <vector(8) float> [(float *)vectp_res.31_138];
  _11 =3D *_10;
  vect__35.34_141 =3D vect__11.33_140 + vect_x_25.26_126;
  _35 =3D _11 + x_25;
  vect__13.35_142 =3D vect_neg_t1_27.30_137 + vect__35.34_141;
  _13 =3D neg_t1_27 + _35;
  MEM <vector(8) float> [(float *)vectp_res.36_143] =3D vect__13.35_142;
  # DEBUG BEGIN_STMT
  i_30 =3D i_15 + 1;
  # DEBUG i =3D> i_30
  # DEBUG BEGIN_STMT
  ivtmp_49 =3D ivtmp_20 - 1;
  vectp_af.24_125 =3D vectp_af.24_124 + 32;
  vectp_res.31_139 =3D vectp_res.31_138 + 32;
  vectp_res.36_144 =3D vectp_res.36_143 + 32;
  ivtmp_147 =3D ivtmp_146 + 1;
  if (ivtmp_147 < 32)=