From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 48) id C172F3858D39; Tue, 7 Mar 2023 06:01:35 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org C172F3858D39 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1678168895; bh=OSHnk2hWAzqUYA145kves/A0LOquwVD7FWYRXMhEWOc=; h=From:To:Subject:Date:From; b=FwytJ++ofA7fN7mnhYCsP+hZgFLO2JHSJi3m6pOkWbkwYm0DNFMVrT1NC9OHbvNfp NJKqgfIlCbYMrCEuotlYlGnbh8X7lTTL4GHDyiN9I047AaI9ZPrZFfnREnxBwQkNGv nSMnY1wwohFKsuSnn/jeGd8OpjHtM5l4wuXE4MNE= From: "crazylht at gmail dot com" To: gcc-bugs@gcc.gnu.org Subject: [Bug tree-optimization/109048] New: [13 regression] redundant mask compare generated by vectorizer. Date: Tue, 07 Mar 2023 06:01:35 +0000 X-Bugzilla-Reason: CC X-Bugzilla-Type: new X-Bugzilla-Watch-Reason: None X-Bugzilla-Product: gcc X-Bugzilla-Component: tree-optimization X-Bugzilla-Version: 13.0 X-Bugzilla-Keywords: X-Bugzilla-Severity: normal X-Bugzilla-Who: crazylht at gmail dot com X-Bugzilla-Status: UNCONFIRMED X-Bugzilla-Resolution: X-Bugzilla-Priority: P3 X-Bugzilla-Assigned-To: unassigned at gcc dot gnu.org X-Bugzilla-Target-Milestone: --- X-Bugzilla-Flags: X-Bugzilla-Changed-Fields: bug_id short_desc product version bug_status bug_severity priority component assigned_to reporter target_milestone Message-ID: Content-Type: text/plain; charset="UTF-8" Content-Transfer-Encoding: quoted-printable X-Bugzilla-URL: http://gcc.gnu.org/bugzilla/ Auto-Submitted: auto-generated MIME-Version: 1.0 List-Id: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=3D109048 Bug ID: 109048 Summary: [13 regression] redundant mask compare generated by vectorizer. Product: gcc Version: 13.0 Status: UNCONFIRMED Severity: normal Priority: P3 Component: tree-optimization Assignee: unassigned at gcc dot gnu.org Reporter: crazylht at gmail dot com Target Milestone: --- #include void tmp2 (float *af, int type, int type2, float *res) { const int Etype =3D (type =3D=3D 1 || type2 =3D=3D 2); const float f1 =3D (type =3D=3D 3 || type2 =3D=3D 4) ? 4.f : 2.f; const float f2 =3D (type =3D=3D 3 || type2 =3D=3D 4) ? 0.25f : 0.5f; for (int i =3D 0; i < 256; i++) { float x =3D af[i]; int z =3D (x < 0.f); float t1 =3D (z ? 1.f : f2) + (x < f1 ? 1.f : 0.f); float neg_t1 =3D -fabsf(t1); float t2 =3D Etype ? neg_t1 : t1; res[i] +=3D t2 + x; } } gcc trunk now generates [local count: 5368707]: vect_cst__110 =3D {iftmp.0_34, iftmp.0_34, iftmp.0_34, iftmp.0_34, iftmp.= 0_34, iftmp.0_34, iftmp.0_34, iftmp.0_34}; vect_cst__119 =3D {prephitmp_41, prephitmp_41, prephitmp_41, prephitmp_41, prephitmp_41, prephitmp_41, prephitmp_41, prephitmp_41}; vect_cst__123 =3D {iftmp.1_16, iftmp.1_16, iftmp.1_16, iftmp.1_16, iftmp.= 1_16, iftmp.1_16, iftmp.1_16, iftmp.1_16}; [local count: 53687070]: # i_18 =3D PHI # ivtmp_15 =3D PHI # vectp_af.11_105 =3D PHI # vectp_res.23_125 =3D PHI # vectp_res.28_130 =3D PHI # ivtmp_133 =3D PHI # DEBUG i =3D> NULL # DEBUG BEGIN_STMT _38 =3D (long unsigned int) i_18; _37 =3D _38 * 4; _36 =3D af_24(D) + _37; vect_x_20.13_107 =3D MEM [(float *)vectp_af.11_105]; x_20 =3D *_36; # DEBUG x =3D> NULL # DEBUG BEGIN_STMT # DEBUG D#1 =3D> NULL # DEBUG z =3D> NULL # DEBUG BEGIN_STMT mask__50.14_109 =3D vect_x_20.13_107 >=3D { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,= 0.0, 0.0 }; _50 =3D x_20 >=3D 0.0; mask__52.15_111 =3D vect_x_20.13_107 < vect_cst__110; _52 =3D x_20 < iftmp.0_34; mask__53.16_112 =3D mask__50.14_109 & mask__52.15_111; _53 =3D _50 & _52; mask__55.17_114 =3D vect_x_20.13_107 >=3D vect_cst__110; _55 =3D x_20 >=3D iftmp.0_34; mask__56.18_115 =3D mask__50.14_109 & mask__55.17_114; _56 =3D _50 & _55; mask__74.19_117 =3D vect_x_20.13_107 < { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.= 0, 0.0 }; _74 =3D x_20 < 0.0; vect__ifc__73.20_120 =3D VEC_COND_EXPR ; _ifc__73 =3D _74 ? 2.0e+0 : prephitmp_41; _174 =3D ~mask__53.16_112; _175 =3D mask__74.19_117 & _174; vect__ifc__75.21_122 =3D VEC_COND_EXPR <_175, { 2.0e+0, 2.0e+0, 2.0e+0, 2= .0e+0, 2.0e+0, 2.0e+0, 2.0e+0, 2.0e+0 }, vect_cst__119>; _ifc__75 =3D _53 ? prephitmp_41 : _ifc__73; vect_prephitmp_17.22_124 =3D VEC_COND_EXPR ; prephitmp_17 =3D _56 ? iftmp.1_16 : _ifc__75; # DEBUG t1 =3D> D#2 # DEBUG BEGIN_STMT # DEBUG neg_t1 =3D> -D#2 # DEBUG BEGIN_STMT # DEBUG t2 =3D> prephitmp_17 # DEBUG BEGIN_STMT _12 =3D res_28(D) + _37; vect__26.25_127 =3D MEM [(float *)vectp_res.23_125]; _26 =3D *_12; vect__27.26_128 =3D vect__26.25_127 + vect_x_20.13_107; _27 =3D _26 + x_20; vect__45.27_129 =3D vect_prephitmp_17.22_124 + vect__27.26_128; _45 =3D prephitmp_17 + _27; MEM [(float *)vectp_res.28_130] =3D vect__45.27_129; # DEBUG BEGIN_STMT i_47 =3D i_18 + 1; # DEBUG i =3D> i_47 # DEBUG BEGIN_STMT ivtmp_43 =3D ivtmp_15 - 1; vectp_af.11_106 =3D vectp_af.11_105 + 32; vectp_res.23_126 =3D vectp_res.23_125 + 32; vectp_res.28_131 =3D vectp_res.28_130 + 32; ivtmp_134 =3D ivtmp_133 + 1; if (ivtmp_134 < 32) goto ; [90.00%] else goto ; [10.00%] vs gcc12.2=20 [local count: 5368707]: vect_cst__128 =3D {iftmp.1_16, iftmp.1_16, iftmp.1_16, iftmp.1_16, iftmp.= 1_16, iftmp.1_16, iftmp.1_16, iftmp.1_16}; vect_cst__134 =3D {iftmp.0_33, iftmp.0_33, iftmp.0_33, iftmp.0_33, iftmp.= 0_33, iftmp.0_33, iftmp.0_33, iftmp.0_33}; [local count: 53687070]: # i_15 =3D PHI # ivtmp_20 =3D PHI # vectp_af.24_124 =3D PHI # vectp_res.31_138 =3D PHI # vectp_res.36_143 =3D PHI # ivtmp_146 =3D PHI # DEBUG i =3D> NULL # DEBUG BEGIN_STMT _7 =3D (long unsigned int) i_15; _8 =3D _7 * 4; _9 =3D af_24(D) + _8; vect_x_25.26_126 =3D MEM [(float *)vectp_af.24_124]; x_25 =3D *_9; # DEBUG x =3D> NULL # DEBUG BEGIN_STMT # DEBUG D#1 =3D> NULL # DEBUG z =3D> NULL # DEBUG BEGIN_STMT _130 =3D vect_x_25.26_126 >=3D { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; vect_iftmp.27_131 =3D VEC_COND_EXPR <_130, vect_cst__128, { 1.0e+0, 1.0e+= 0, 1.0e+0, 1.0e+0, 1.0e+0, 1.0e+0, 1.0e+0, 1.0e+0 }>; iftmp.2_17 =3D x_25 >=3D 0.0 ? iftmp.1_16 : 1.0e+0; vect__41.28_133 =3D vect_iftmp.27_131 + { 1.0e+0, 1.0e+0, 1.0e+0, 1.0e+0, 1.0e+0, 1.0e+0, 1.0e+0, 1.0e+0 }; _41 =3D iftmp.2_17 + 1.0e+0; _135 =3D vect_x_25.26_126 >=3D vect_cst__134; vect_prephitmp_42.29_136 =3D VEC_COND_EXPR <_135, vect_iftmp.27_131, vect__41.28_133>; prephitmp_42 =3D x_25 >=3D iftmp.0_33 ? iftmp.2_17 : _41; # DEBUG t1 =3D> NULL # DEBUG BEGIN_STMT # DEBUG neg_t1 =3D> -prephitmp_42 # DEBUG BEGIN_STMT vect_neg_t1_27.30_137 =3D -vect_prephitmp_42.29_136; neg_t1_27 =3D -prephitmp_42; # DEBUG t2 =3D> neg_t1_27 # DEBUG BEGIN_STMT _10 =3D res_28(D) + _8; vect__11.33_140 =3D MEM [(float *)vectp_res.31_138]; _11 =3D *_10; vect__35.34_141 =3D vect__11.33_140 + vect_x_25.26_126; _35 =3D _11 + x_25; vect__13.35_142 =3D vect_neg_t1_27.30_137 + vect__35.34_141; _13 =3D neg_t1_27 + _35; MEM [(float *)vectp_res.36_143] =3D vect__13.35_142; # DEBUG BEGIN_STMT i_30 =3D i_15 + 1; # DEBUG i =3D> i_30 # DEBUG BEGIN_STMT ivtmp_49 =3D ivtmp_20 - 1; vectp_af.24_125 =3D vectp_af.24_124 + 32; vectp_res.31_139 =3D vectp_res.31_138 + 32; vectp_res.36_144 =3D vectp_res.36_143 + 32; ivtmp_147 =3D ivtmp_146 + 1; if (ivtmp_147 < 32)=