public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed
* [Bug tree-optimization/109048] New: [13 regression] redundant mask compare generated by vectorizer.
@ 2023-03-07 6:01 crazylht at gmail dot com
2023-03-07 6:03 ` [Bug tree-optimization/109048] " crazylht at gmail dot com
` (12 more replies)
0 siblings, 13 replies; 14+ messages in thread
From: crazylht at gmail dot com @ 2023-03-07 6:01 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109048
Bug ID: 109048
Summary: [13 regression] redundant mask compare generated by
vectorizer.
Product: gcc
Version: 13.0
Status: UNCONFIRMED
Severity: normal
Priority: P3
Component: tree-optimization
Assignee: unassigned at gcc dot gnu.org
Reporter: crazylht at gmail dot com
Target Milestone: ---
#include <math.h>
void tmp2 (float *af, int type, int type2, float *res)
{
const int Etype = (type == 1 || type2 == 2);
const float f1 = (type == 3 || type2 == 4) ? 4.f : 2.f;
const float f2 = (type == 3 || type2 == 4) ? 0.25f : 0.5f;
for (int i = 0; i < 256; i++)
{
float x = af[i];
int z = (x < 0.f);
float t1 = (z ? 1.f : f2) + (x < f1 ? 1.f : 0.f);
float neg_t1 = -fabsf(t1);
float t2 = Etype ? neg_t1 : t1;
res[i] += t2 + x;
}
}
gcc trunk now generates
<bb 58> [local count: 5368707]:
vect_cst__110 = {iftmp.0_34, iftmp.0_34, iftmp.0_34, iftmp.0_34, iftmp.0_34,
iftmp.0_34, iftmp.0_34, iftmp.0_34};
vect_cst__119 = {prephitmp_41, prephitmp_41, prephitmp_41, prephitmp_41,
prephitmp_41, prephitmp_41, prephitmp_41, prephitmp_41};
vect_cst__123 = {iftmp.1_16, iftmp.1_16, iftmp.1_16, iftmp.1_16, iftmp.1_16,
iftmp.1_16, iftmp.1_16, iftmp.1_16};
<bb 17> [local count: 53687070]:
# i_18 = PHI <i_47(26), 0(58)>
# ivtmp_15 = PHI <ivtmp_43(26), 256(58)>
# vectp_af.11_105 = PHI <vectp_af.11_106(26), af_24(D)(58)>
# vectp_res.23_125 = PHI <vectp_res.23_126(26), res_28(D)(58)>
# vectp_res.28_130 = PHI <vectp_res.28_131(26), res_28(D)(58)>
# ivtmp_133 = PHI <ivtmp_134(26), 0(58)>
# DEBUG i => NULL
# DEBUG BEGIN_STMT
_38 = (long unsigned int) i_18;
_37 = _38 * 4;
_36 = af_24(D) + _37;
vect_x_20.13_107 = MEM <vector(8) float> [(float *)vectp_af.11_105];
x_20 = *_36;
# DEBUG x => NULL
# DEBUG BEGIN_STMT
# DEBUG D#1 => NULL
# DEBUG z => NULL
# DEBUG BEGIN_STMT
mask__50.14_109 = vect_x_20.13_107 >= { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
0.0 };
_50 = x_20 >= 0.0;
mask__52.15_111 = vect_x_20.13_107 < vect_cst__110;
_52 = x_20 < iftmp.0_34;
mask__53.16_112 = mask__50.14_109 & mask__52.15_111;
_53 = _50 & _52;
mask__55.17_114 = vect_x_20.13_107 >= vect_cst__110;
_55 = x_20 >= iftmp.0_34;
mask__56.18_115 = mask__50.14_109 & mask__55.17_114;
_56 = _50 & _55;
mask__74.19_117 = vect_x_20.13_107 < { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
};
_74 = x_20 < 0.0;
vect__ifc__73.20_120 = VEC_COND_EXPR <mask__74.19_117, { 2.0e+0, 2.0e+0,
2.0e+0, 2.0e+0, 2.0e+0, 2.0e+0, 2.0e+0, 2.0e+0 }, vect_cst__119>;
_ifc__73 = _74 ? 2.0e+0 : prephitmp_41;
_174 = ~mask__53.16_112;
_175 = mask__74.19_117 & _174;
vect__ifc__75.21_122 = VEC_COND_EXPR <_175, { 2.0e+0, 2.0e+0, 2.0e+0, 2.0e+0,
2.0e+0, 2.0e+0, 2.0e+0, 2.0e+0 }, vect_cst__119>;
_ifc__75 = _53 ? prephitmp_41 : _ifc__73;
vect_prephitmp_17.22_124 = VEC_COND_EXPR <mask__56.18_115, vect_cst__123,
vect__ifc__75.21_122>;
prephitmp_17 = _56 ? iftmp.1_16 : _ifc__75;
# DEBUG t1 => D#2
# DEBUG BEGIN_STMT
# DEBUG neg_t1 => -D#2
# DEBUG BEGIN_STMT
# DEBUG t2 => prephitmp_17
# DEBUG BEGIN_STMT
_12 = res_28(D) + _37;
vect__26.25_127 = MEM <vector(8) float> [(float *)vectp_res.23_125];
_26 = *_12;
vect__27.26_128 = vect__26.25_127 + vect_x_20.13_107;
_27 = _26 + x_20;
vect__45.27_129 = vect_prephitmp_17.22_124 + vect__27.26_128;
_45 = prephitmp_17 + _27;
MEM <vector(8) float> [(float *)vectp_res.28_130] = vect__45.27_129;
# DEBUG BEGIN_STMT
i_47 = i_18 + 1;
# DEBUG i => i_47
# DEBUG BEGIN_STMT
ivtmp_43 = ivtmp_15 - 1;
vectp_af.11_106 = vectp_af.11_105 + 32;
vectp_res.23_126 = vectp_res.23_125 + 32;
vectp_res.28_131 = vectp_res.28_130 + 32;
ivtmp_134 = ivtmp_133 + 1;
if (ivtmp_134 < 32)
goto <bb 26>; [90.00%]
else
goto <bb 56>; [10.00%]
vs gcc12.2
<bb 57> [local count: 5368707]:
vect_cst__128 = {iftmp.1_16, iftmp.1_16, iftmp.1_16, iftmp.1_16, iftmp.1_16,
iftmp.1_16, iftmp.1_16, iftmp.1_16};
vect_cst__134 = {iftmp.0_33, iftmp.0_33, iftmp.0_33, iftmp.0_33, iftmp.0_33,
iftmp.0_33, iftmp.0_33, iftmp.0_33};
<bb 5> [local count: 53687070]:
# i_15 = PHI <i_30(13), 0(57)>
# ivtmp_20 = PHI <ivtmp_49(13), 256(57)>
# vectp_af.24_124 = PHI <vectp_af.24_125(13), af_24(D)(57)>
# vectp_res.31_138 = PHI <vectp_res.31_139(13), res_28(D)(57)>
# vectp_res.36_143 = PHI <vectp_res.36_144(13), res_28(D)(57)>
# ivtmp_146 = PHI <ivtmp_147(13), 0(57)>
# DEBUG i => NULL
# DEBUG BEGIN_STMT
_7 = (long unsigned int) i_15;
_8 = _7 * 4;
_9 = af_24(D) + _8;
vect_x_25.26_126 = MEM <vector(8) float> [(float *)vectp_af.24_124];
x_25 = *_9;
# DEBUG x => NULL
# DEBUG BEGIN_STMT
# DEBUG D#1 => NULL
# DEBUG z => NULL
# DEBUG BEGIN_STMT
_130 = vect_x_25.26_126 >= { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
vect_iftmp.27_131 = VEC_COND_EXPR <_130, vect_cst__128, { 1.0e+0, 1.0e+0,
1.0e+0, 1.0e+0, 1.0e+0, 1.0e+0, 1.0e+0, 1.0e+0 }>;
iftmp.2_17 = x_25 >= 0.0 ? iftmp.1_16 : 1.0e+0;
vect__41.28_133 = vect_iftmp.27_131 + { 1.0e+0, 1.0e+0, 1.0e+0, 1.0e+0,
1.0e+0, 1.0e+0, 1.0e+0, 1.0e+0 };
_41 = iftmp.2_17 + 1.0e+0;
_135 = vect_x_25.26_126 >= vect_cst__134;
vect_prephitmp_42.29_136 = VEC_COND_EXPR <_135, vect_iftmp.27_131,
vect__41.28_133>;
prephitmp_42 = x_25 >= iftmp.0_33 ? iftmp.2_17 : _41;
# DEBUG t1 => NULL
# DEBUG BEGIN_STMT
# DEBUG neg_t1 => -prephitmp_42
# DEBUG BEGIN_STMT
vect_neg_t1_27.30_137 = -vect_prephitmp_42.29_136;
neg_t1_27 = -prephitmp_42;
# DEBUG t2 => neg_t1_27
# DEBUG BEGIN_STMT
_10 = res_28(D) + _8;
vect__11.33_140 = MEM <vector(8) float> [(float *)vectp_res.31_138];
_11 = *_10;
vect__35.34_141 = vect__11.33_140 + vect_x_25.26_126;
_35 = _11 + x_25;
vect__13.35_142 = vect_neg_t1_27.30_137 + vect__35.34_141;
_13 = neg_t1_27 + _35;
MEM <vector(8) float> [(float *)vectp_res.36_143] = vect__13.35_142;
# DEBUG BEGIN_STMT
i_30 = i_15 + 1;
# DEBUG i => i_30
# DEBUG BEGIN_STMT
ivtmp_49 = ivtmp_20 - 1;
vectp_af.24_125 = vectp_af.24_124 + 32;
vectp_res.31_139 = vectp_res.31_138 + 32;
vectp_res.36_144 = vectp_res.36_143 + 32;
ivtmp_147 = ivtmp_146 + 1;
if (ivtmp_147 < 32)
^ permalink raw reply [flat|nested] 14+ messages in thread
* [Bug tree-optimization/109048] [13 regression] redundant mask compare generated by vectorizer.
2023-03-07 6:01 [Bug tree-optimization/109048] New: [13 regression] redundant mask compare generated by vectorizer crazylht at gmail dot com
@ 2023-03-07 6:03 ` crazylht at gmail dot com
2023-03-07 6:13 ` crazylht at gmail dot com
` (11 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: crazylht at gmail dot com @ 2023-03-07 6:03 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109048
--- Comment #1 from Hongtao.liu <crazylht at gmail dot com> ---
There's also a backend misoptimization for x86 avx512 mask compare which is
same as PR88570.
^ permalink raw reply [flat|nested] 14+ messages in thread
* [Bug tree-optimization/109048] [13 regression] redundant mask compare generated by vectorizer.
2023-03-07 6:01 [Bug tree-optimization/109048] New: [13 regression] redundant mask compare generated by vectorizer crazylht at gmail dot com
2023-03-07 6:03 ` [Bug tree-optimization/109048] " crazylht at gmail dot com
@ 2023-03-07 6:13 ` crazylht at gmail dot com
2023-03-07 6:22 ` crazylht at gmail dot com
` (10 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: crazylht at gmail dot com @ 2023-03-07 6:13 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109048
--- Comment #2 from Hongtao.liu <crazylht at gmail dot com> ---
Lots of logic operation for mask in ifcvt
<bb 17> [local count: 531502205]:
# i_18 = PHI <i_47(26), 0(42)>
# ivtmp_15 = PHI <ivtmp_43(26), 256(42)>
# DEBUG i => NULL
# DEBUG BEGIN_STMT
_38 = (long unsigned int) i_18;
_37 = _38 * 4;
_36 = af_24(D) + _37;
x_20 = *_36;
# DEBUG x => NULL
# DEBUG BEGIN_STMT
# DEBUG D#1 => NULL
# DEBUG z => NULL
# DEBUG BEGIN_STMT
_50 = x_20 >= 0.0;
_52 = x_20 < iftmp.0_34;
_53 = _50 & _52;
_55 = x_20 >= iftmp.0_34;
_56 = _50 & _55;
_74 = x_20 < 0.0;
_ifc__73 = _74 ? 2.0e+0 : prephitmp_41;
_ifc__75 = _53 ? prephitmp_41 : _ifc__73;
prephitmp_17 = _56 ? iftmp.1_16 : _ifc__75;
# DEBUG t1 => D#2
# DEBUG BEGIN_STMT
# DEBUG neg_t1 => -D#2
# DEBUG BEGIN_STMT
# DEBUG t2 => prephitmp_17
# DEBUG BEGIN_STMT
_12 = res_28(D) + _37;
_26 = *_12;
_27 = _26 + x_20;
_45 = prephitmp_17 + _27;
*_12 = _45;
# DEBUG BEGIN_STMT
i_47 = i_18 + 1;
# DEBUG i => i_47
# DEBUG BEGIN_STMT
ivtmp_43 = ivtmp_15 - 1;
^ permalink raw reply [flat|nested] 14+ messages in thread
* [Bug tree-optimization/109048] [13 regression] redundant mask compare generated by vectorizer.
2023-03-07 6:01 [Bug tree-optimization/109048] New: [13 regression] redundant mask compare generated by vectorizer crazylht at gmail dot com
2023-03-07 6:03 ` [Bug tree-optimization/109048] " crazylht at gmail dot com
2023-03-07 6:13 ` crazylht at gmail dot com
@ 2023-03-07 6:22 ` crazylht at gmail dot com
2023-03-07 6:34 ` crazylht at gmail dot com
` (9 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: crazylht at gmail dot com @ 2023-03-07 6:22 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109048
--- Comment #3 from Hongtao.liu <crazylht at gmail dot com> ---
A three-operand phi make ifcvt generate worse code
<bb 4> [local count: 10737414]:
# iftmp.1_16 = PHI <2.5e-1(13), 5.0e-1(2)>
# iftmp.0_34 = PHI <4.0e+0(13), 2.0e+0(2)>
# prephitmp_40 = PHI <-2.5e-1(13), -5.0e-1(2)>
# prephitmp_41 = PHI <1.25e+0(13), 1.5e+0(2)>
# prephitmp_42 = PHI <-1.25e+0(13), -1.5e+0(2)>
# DEBUG f1 => NULL
# DEBUG f2 => iftmp.1_16
# DEBUG BEGIN_STMT
# DEBUG i => 0
# DEBUG BEGIN_STMT
if (_3 != 0)
goto <bb 28>; [50.00%]
else
goto <bb 29>; [50.00%]
<bb 29> [local count: 5368707]:
<bb 17> [local count: 531502204]:
# i_18 = PHI <0(29), i_47(26)>
# ivtmp_15 = PHI <256(29), ivtmp_43(26)>
# DEBUG i => i_18
# DEBUG BEGIN_STMT
_38 = (long unsigned int) i_18;
_37 = _38 * 4;
_36 = af_24(D) + _37;
x_20 = *_36;
# DEBUG x => x_20
# DEBUG BEGIN_STMT
# DEBUG D#1 => x_20 < 0.0
# DEBUG z => (int) D#1
# DEBUG BEGIN_STMT
if (x_20 >= 0.0)
goto <bb 19>; [59.00%]
else
goto <bb 22>; [41.00%]
<bb 19> [local count: 313586303]:
if (x_20 < iftmp.0_34)
goto <bb 22>; [20.00%]
else
goto <bb 21>; [80.00%]
<bb 21> [local count: 250869041]:
<bb 22> [local count: 531502205]:
# prephitmp_17 = PHI <2.0e+0(17), prephitmp_41(19), iftmp.1_16(21)>
# DEBUG t1 => D#2
# DEBUG BEGIN_STMT
# DEBUG neg_t1 => -D#2
# DEBUG BEGIN_STMT
# DEBUG t2 => prephitmp_17
# DEBUG BEGIN_STMT
_12 = res_28(D) + _37;
_26 = *_12;
_27 = _26 + x_20;
_45 = prephitmp_17 + _27;
*_12 = _45;
# DEBUG BEGIN_STMT
i_47 = i_18 + 1;
# DEBUG i => i_47
# DEBUG BEGIN_STMT
ivtmp_43 = ivtmp_15 - 1;
if (ivtmp_43 != 0)
^ permalink raw reply [flat|nested] 14+ messages in thread
* [Bug tree-optimization/109048] [13 regression] redundant mask compare generated by vectorizer.
2023-03-07 6:01 [Bug tree-optimization/109048] New: [13 regression] redundant mask compare generated by vectorizer crazylht at gmail dot com
` (2 preceding siblings ...)
2023-03-07 6:22 ` crazylht at gmail dot com
@ 2023-03-07 6:34 ` crazylht at gmail dot com
2023-03-07 9:17 ` rguenth at gcc dot gnu.org
` (8 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: crazylht at gmail dot com @ 2023-03-07 6:34 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109048
--- Comment #4 from Hongtao.liu <crazylht at gmail dot com> ---
(In reply to Hongtao.liu from comment #3)
> A three-operand phi make ifcvt generate worse code
>
It saved 1 add for potential 1.f + 0.f/1.f since it's constant, but generates
more logic operations for predicates.
Hmm, it looks good for scalar since it's just compare and jump, but hurt
vectorization due to more logical operations for predicates.
^ permalink raw reply [flat|nested] 14+ messages in thread
* [Bug tree-optimization/109048] [13 regression] redundant mask compare generated by vectorizer.
2023-03-07 6:01 [Bug tree-optimization/109048] New: [13 regression] redundant mask compare generated by vectorizer crazylht at gmail dot com
` (3 preceding siblings ...)
2023-03-07 6:34 ` crazylht at gmail dot com
@ 2023-03-07 9:17 ` rguenth at gcc dot gnu.org
2023-03-07 9:38 ` jakub at gcc dot gnu.org
` (7 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: rguenth at gcc dot gnu.org @ 2023-03-07 9:17 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109048
Richard Biener <rguenth at gcc dot gnu.org> changed:
What |Removed |Added
----------------------------------------------------------------------------
Keywords| |missed-optimization,
| |needs-bisection
Target Milestone|--- |13.0
^ permalink raw reply [flat|nested] 14+ messages in thread
* [Bug tree-optimization/109048] [13 regression] redundant mask compare generated by vectorizer.
2023-03-07 6:01 [Bug tree-optimization/109048] New: [13 regression] redundant mask compare generated by vectorizer crazylht at gmail dot com
` (4 preceding siblings ...)
2023-03-07 9:17 ` rguenth at gcc dot gnu.org
@ 2023-03-07 9:38 ` jakub at gcc dot gnu.org
2023-03-07 10:09 ` rguenther at suse dot de
` (6 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: jakub at gcc dot gnu.org @ 2023-03-07 9:38 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109048
Jakub Jelinek <jakub at gcc dot gnu.org> changed:
What |Removed |Added
----------------------------------------------------------------------------
Status|UNCONFIRMED |NEW
Ever confirmed|0 |1
Keywords|needs-bisection |
Last reconfirmed| |2023-03-07
CC| |aldyh at gcc dot gnu.org,
| |jakub at gcc dot gnu.org
--- Comment #5 from Jakub Jelinek <jakub at gcc dot gnu.org> ---
In bisection r13-1 through r13-6513 I see only one important change,
r13-2267-g4fbe3e6aa74dae5c75a
^ permalink raw reply [flat|nested] 14+ messages in thread
* [Bug tree-optimization/109048] [13 regression] redundant mask compare generated by vectorizer.
2023-03-07 6:01 [Bug tree-optimization/109048] New: [13 regression] redundant mask compare generated by vectorizer crazylht at gmail dot com
` (5 preceding siblings ...)
2023-03-07 9:38 ` jakub at gcc dot gnu.org
@ 2023-03-07 10:09 ` rguenther at suse dot de
2023-03-27 8:26 ` rguenth at gcc dot gnu.org
` (5 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: rguenther at suse dot de @ 2023-03-07 10:09 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109048
--- Comment #6 from rguenther at suse dot de <rguenther at suse dot de> ---
On Tue, 7 Mar 2023, jakub at gcc dot gnu.org wrote:
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109048
>
> Jakub Jelinek <jakub at gcc dot gnu.org> changed:
>
> What |Removed |Added
> ----------------------------------------------------------------------------
> Status|UNCONFIRMED |NEW
> Ever confirmed|0 |1
> Keywords|needs-bisection |
> Last reconfirmed| |2023-03-07
> CC| |aldyh at gcc dot gnu.org,
> | |jakub at gcc dot gnu.org
>
> --- Comment #5 from Jakub Jelinek <jakub at gcc dot gnu.org> ---
> In bisection r13-1 through r13-6513 I see only one important change,
> r13-2267-g4fbe3e6aa74dae5c75a
That would probably trigger extra jump threading here.
^ permalink raw reply [flat|nested] 14+ messages in thread
* [Bug tree-optimization/109048] [13 regression] redundant mask compare generated by vectorizer.
2023-03-07 6:01 [Bug tree-optimization/109048] New: [13 regression] redundant mask compare generated by vectorizer crazylht at gmail dot com
` (6 preceding siblings ...)
2023-03-07 10:09 ` rguenther at suse dot de
@ 2023-03-27 8:26 ` rguenth at gcc dot gnu.org
2023-03-27 9:18 ` rguenth at gcc dot gnu.org
` (4 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: rguenth at gcc dot gnu.org @ 2023-03-27 8:26 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109048
--- Comment #7 from Richard Biener <rguenth at gcc dot gnu.org> ---
The first threadfull pass ends up producing the three-argument PHI:
<bb 7> [local count: 1063004408]:
_7 = (long unsigned int) i_14;
_8 = _7 * 4;
_9 = af_24(D) + _8;
x_25 = *_9;
if (x_25 >= 0.0)
goto <bb 9>; [59.00%]
else
goto <bb 8>; [41.00%]
<bb 8> [local count: 435831803]:
<bb 9> [local count: 1063004408]:
# iftmp.2_17 = PHI <iftmp.1_16(7), 1.0e+0(8)>
if (iftmp.0_15 > x_25)
goto <bb 11>; [50.00%]
else
goto <bb 10>; [50.00%]
and we thread 9->11 from 8->9 (so when !(x_25 >= 0.0)). Like a related PR
this is a threading that turns a "conditional move" diamond into something more
complicated. We could heuristically decide that we don't want to do this
[early?]. But it might be more honest to more aggressively represent
those as COND_EXPRs then.
So I think we have a duplicate for this.
^ permalink raw reply [flat|nested] 14+ messages in thread
* [Bug tree-optimization/109048] [13 regression] redundant mask compare generated by vectorizer.
2023-03-07 6:01 [Bug tree-optimization/109048] New: [13 regression] redundant mask compare generated by vectorizer crazylht at gmail dot com
` (7 preceding siblings ...)
2023-03-27 8:26 ` rguenth at gcc dot gnu.org
@ 2023-03-27 9:18 ` rguenth at gcc dot gnu.org
2023-03-27 10:37 ` rguenth at gcc dot gnu.org
` (3 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: rguenth at gcc dot gnu.org @ 2023-03-27 9:18 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109048
--- Comment #8 from Richard Biener <rguenth at gcc dot gnu.org> ---
Created attachment 54762
--> https://gcc.gnu.org/bugzilla/attachment.cgi?id=54762&action=edit
heuristic
I am testing this heuristic
^ permalink raw reply [flat|nested] 14+ messages in thread
* [Bug tree-optimization/109048] [13 regression] redundant mask compare generated by vectorizer.
2023-03-07 6:01 [Bug tree-optimization/109048] New: [13 regression] redundant mask compare generated by vectorizer crazylht at gmail dot com
` (8 preceding siblings ...)
2023-03-27 9:18 ` rguenth at gcc dot gnu.org
@ 2023-03-27 10:37 ` rguenth at gcc dot gnu.org
2023-03-28 12:20 ` rguenth at gcc dot gnu.org
` (2 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: rguenth at gcc dot gnu.org @ 2023-03-27 10:37 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109048
--- Comment #9 from Richard Biener <rguenth at gcc dot gnu.org> ---
(In reply to Richard Biener from comment #8)
> Created attachment 54762 [details]
> heuristic
>
> I am testing this heuristic
FAIL: gcc.dg/tree-ssa/ranger-threader-3.c scan-tree-dump-times ethread
"Register
ing.*jump thread" 1
FAIL: gcc.dg/tree-ssa/ssa-dom-thread-14.c scan-tree-dump-times dom2 "Threaded"
2
is the fallout - both are the same testcase, both can be mitigated by adjusting
the dump to scan for and making sure PROP_loop_opts_done is also provided
for functions without loops.
But it also shows a case where we optimize things significantly later
after the change.
if ((!unsignedp || (!left && methods == OPTAB_WIDEN)))
{
enum optab_methods methods1 = methods;
if (unsignedp)
methods1 = OPTAB_MUST_WIDEN;
expand_binop (left ? lshift_optab : rshift_arith_optab,
unsignedp, methods1);
used to be threaded in early threading but now would be delayed to after
loop opts.
^ permalink raw reply [flat|nested] 14+ messages in thread
* [Bug tree-optimization/109048] [13 regression] redundant mask compare generated by vectorizer.
2023-03-07 6:01 [Bug tree-optimization/109048] New: [13 regression] redundant mask compare generated by vectorizer crazylht at gmail dot com
` (9 preceding siblings ...)
2023-03-27 10:37 ` rguenth at gcc dot gnu.org
@ 2023-03-28 12:20 ` rguenth at gcc dot gnu.org
2023-04-13 7:30 ` rguenth at gcc dot gnu.org
2023-04-13 8:14 ` crazylht at gmail dot com
12 siblings, 0 replies; 14+ messages in thread
From: rguenth at gcc dot gnu.org @ 2023-03-28 12:20 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109048
Richard Biener <rguenth at gcc dot gnu.org> changed:
What |Removed |Added
----------------------------------------------------------------------------
See Also| |https://gcc.gnu.org/bugzill
| |a/show_bug.cgi?id=109154
--- Comment #10 from Richard Biener <rguenth at gcc dot gnu.org> ---
PR109154 is another of these cases. We might want to de-dup them.
^ permalink raw reply [flat|nested] 14+ messages in thread
* [Bug tree-optimization/109048] [13 regression] redundant mask compare generated by vectorizer.
2023-03-07 6:01 [Bug tree-optimization/109048] New: [13 regression] redundant mask compare generated by vectorizer crazylht at gmail dot com
` (10 preceding siblings ...)
2023-03-28 12:20 ` rguenth at gcc dot gnu.org
@ 2023-04-13 7:30 ` rguenth at gcc dot gnu.org
2023-04-13 8:14 ` crazylht at gmail dot com
12 siblings, 0 replies; 14+ messages in thread
From: rguenth at gcc dot gnu.org @ 2023-04-13 7:30 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109048
--- Comment #11 from Richard Biener <rguenth at gcc dot gnu.org> ---
The recent patch improved this to avoid some of the compares. We still have
the three-argument PHI and thus three VEC_CONDs.
.L10:
vmovups (%rdi,%rdx), %ymm0
vcmpltps %ymm6, %ymm0, %ymm3
vcmpltps %ymm2, %ymm0, %ymm1
vpandn %ymm1, %ymm3, %ymm1
vblendvps %ymm1, %ymm5, %ymm4, %ymm1
vblendvps %ymm3, %ymm7, %ymm1, %ymm1
vaddps %ymm1, %ymm0, %ymm0
vaddps (%rax,%rdx), %ymm0, %ymm0
vmovups %ymm0, (%rax,%rdx)
addq $32, %rdx
cmpq $1024, %rdx
jne .L10
vs. GCC 12
.L6:
vmovups (%rdi,%rdx), %ymm1
vcmpltps %ymm5, %ymm1, %ymm0
vcmpltps %ymm6, %ymm1, %ymm4
vblendvps %ymm0, %ymm3, %ymm2, %ymm0
vandps %ymm3, %ymm4, %ymm4
vaddps %ymm4, %ymm0, %ymm0
vaddps %ymm1, %ymm0, %ymm0
vaddps (%rax,%rdx), %ymm0, %ymm0
vmovups %ymm0, (%rax,%rdx)
addq $32, %rdx
cmpq $1024, %rdx
jne .L6
which at least overall looks comparable.
^ permalink raw reply [flat|nested] 14+ messages in thread
* [Bug tree-optimization/109048] [13 regression] redundant mask compare generated by vectorizer.
2023-03-07 6:01 [Bug tree-optimization/109048] New: [13 regression] redundant mask compare generated by vectorizer crazylht at gmail dot com
` (11 preceding siblings ...)
2023-04-13 7:30 ` rguenth at gcc dot gnu.org
@ 2023-04-13 8:14 ` crazylht at gmail dot com
12 siblings, 0 replies; 14+ messages in thread
From: crazylht at gmail dot com @ 2023-04-13 8:14 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109048
Hongtao.liu <crazylht at gmail dot com> changed:
What |Removed |Added
----------------------------------------------------------------------------
Status|NEW |RESOLVED
Resolution|--- |FIXED
--- Comment #12 from Hongtao.liu <crazylht at gmail dot com> ---
So marked as fixed.
^ permalink raw reply [flat|nested] 14+ messages in thread
end of thread, other threads:[~2023-04-13 8:14 UTC | newest]
Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-03-07 6:01 [Bug tree-optimization/109048] New: [13 regression] redundant mask compare generated by vectorizer crazylht at gmail dot com
2023-03-07 6:03 ` [Bug tree-optimization/109048] " crazylht at gmail dot com
2023-03-07 6:13 ` crazylht at gmail dot com
2023-03-07 6:22 ` crazylht at gmail dot com
2023-03-07 6:34 ` crazylht at gmail dot com
2023-03-07 9:17 ` rguenth at gcc dot gnu.org
2023-03-07 9:38 ` jakub at gcc dot gnu.org
2023-03-07 10:09 ` rguenther at suse dot de
2023-03-27 8:26 ` rguenth at gcc dot gnu.org
2023-03-27 9:18 ` rguenth at gcc dot gnu.org
2023-03-27 10:37 ` rguenth at gcc dot gnu.org
2023-03-28 12:20 ` rguenth at gcc dot gnu.org
2023-04-13 7:30 ` rguenth at gcc dot gnu.org
2023-04-13 8:14 ` crazylht at gmail dot com
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).