public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed
* [Bug tree-optimization/109048] New: [13 regression] redundant mask compare generated by vectorizer.
@ 2023-03-07  6:01 crazylht at gmail dot com
  2023-03-07  6:03 ` [Bug tree-optimization/109048] " crazylht at gmail dot com
                   ` (12 more replies)
  0 siblings, 13 replies; 14+ messages in thread
From: crazylht at gmail dot com @ 2023-03-07  6:01 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109048

            Bug ID: 109048
           Summary: [13 regression] redundant mask compare generated by
                    vectorizer.
           Product: gcc
           Version: 13.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: tree-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: crazylht at gmail dot com
  Target Milestone: ---

#include <math.h>

void tmp2 (float *af, int type, int type2, float *res)
{
    const int Etype = (type == 1 || type2 == 2);
    const float f1 = (type == 3 || type2 == 4) ? 4.f : 2.f;
    const float f2 = (type == 3 || type2 == 4) ? 0.25f : 0.5f;

    for (int i = 0; i < 256; i++)
    {
        float x = af[i];
        int z = (x < 0.f);
        float t1 = (z ? 1.f : f2) + (x < f1 ? 1.f : 0.f);
        float neg_t1 = -fabsf(t1);
        float t2 = Etype ? neg_t1 : t1;
        res[i] += t2 + x;
    }
}

gcc trunk now generates


<bb 58> [local count: 5368707]:
  vect_cst__110 = {iftmp.0_34, iftmp.0_34, iftmp.0_34, iftmp.0_34, iftmp.0_34,
iftmp.0_34, iftmp.0_34, iftmp.0_34};
  vect_cst__119 = {prephitmp_41, prephitmp_41, prephitmp_41, prephitmp_41,
prephitmp_41, prephitmp_41, prephitmp_41, prephitmp_41};
  vect_cst__123 = {iftmp.1_16, iftmp.1_16, iftmp.1_16, iftmp.1_16, iftmp.1_16,
iftmp.1_16, iftmp.1_16, iftmp.1_16};

  <bb 17> [local count: 53687070]:
  # i_18 = PHI <i_47(26), 0(58)>
  # ivtmp_15 = PHI <ivtmp_43(26), 256(58)>
  # vectp_af.11_105 = PHI <vectp_af.11_106(26), af_24(D)(58)>
  # vectp_res.23_125 = PHI <vectp_res.23_126(26), res_28(D)(58)>
  # vectp_res.28_130 = PHI <vectp_res.28_131(26), res_28(D)(58)>
  # ivtmp_133 = PHI <ivtmp_134(26), 0(58)>
  # DEBUG i => NULL
  # DEBUG BEGIN_STMT
  _38 = (long unsigned int) i_18;
  _37 = _38 * 4;
  _36 = af_24(D) + _37;
  vect_x_20.13_107 = MEM <vector(8) float> [(float *)vectp_af.11_105];
  x_20 = *_36;
  # DEBUG x => NULL
  # DEBUG BEGIN_STMT
  # DEBUG D#1 => NULL
  # DEBUG z => NULL
  # DEBUG BEGIN_STMT
  mask__50.14_109 = vect_x_20.13_107 >= { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
0.0 };
  _50 = x_20 >= 0.0;
  mask__52.15_111 = vect_x_20.13_107 < vect_cst__110;
  _52 = x_20 < iftmp.0_34;
  mask__53.16_112 = mask__50.14_109 & mask__52.15_111;
  _53 = _50 & _52;
  mask__55.17_114 = vect_x_20.13_107 >= vect_cst__110;
  _55 = x_20 >= iftmp.0_34;
  mask__56.18_115 = mask__50.14_109 & mask__55.17_114;
  _56 = _50 & _55;
  mask__74.19_117 = vect_x_20.13_107 < { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
};
  _74 = x_20 < 0.0;
  vect__ifc__73.20_120 = VEC_COND_EXPR <mask__74.19_117, { 2.0e+0, 2.0e+0,
2.0e+0, 2.0e+0, 2.0e+0, 2.0e+0, 2.0e+0, 2.0e+0 }, vect_cst__119>;
  _ifc__73 = _74 ? 2.0e+0 : prephitmp_41;
  _174 = ~mask__53.16_112;
  _175 = mask__74.19_117 & _174;
  vect__ifc__75.21_122 = VEC_COND_EXPR <_175, { 2.0e+0, 2.0e+0, 2.0e+0, 2.0e+0,
2.0e+0, 2.0e+0, 2.0e+0, 2.0e+0 }, vect_cst__119>;
  _ifc__75 = _53 ? prephitmp_41 : _ifc__73;
  vect_prephitmp_17.22_124 = VEC_COND_EXPR <mask__56.18_115, vect_cst__123,
vect__ifc__75.21_122>;
  prephitmp_17 = _56 ? iftmp.1_16 : _ifc__75;
  # DEBUG t1 => D#2
  # DEBUG BEGIN_STMT
  # DEBUG neg_t1 => -D#2
  # DEBUG BEGIN_STMT
  # DEBUG t2 => prephitmp_17
  # DEBUG BEGIN_STMT
  _12 = res_28(D) + _37;
  vect__26.25_127 = MEM <vector(8) float> [(float *)vectp_res.23_125];
  _26 = *_12;
  vect__27.26_128 = vect__26.25_127 + vect_x_20.13_107;
  _27 = _26 + x_20;
  vect__45.27_129 = vect_prephitmp_17.22_124 + vect__27.26_128;
  _45 = prephitmp_17 + _27;
  MEM <vector(8) float> [(float *)vectp_res.28_130] = vect__45.27_129;
  # DEBUG BEGIN_STMT
  i_47 = i_18 + 1;
  # DEBUG i => i_47
  # DEBUG BEGIN_STMT
  ivtmp_43 = ivtmp_15 - 1;
  vectp_af.11_106 = vectp_af.11_105 + 32;
  vectp_res.23_126 = vectp_res.23_125 + 32;
  vectp_res.28_131 = vectp_res.28_130 + 32;
  ivtmp_134 = ivtmp_133 + 1;
  if (ivtmp_134 < 32)
    goto <bb 26>; [90.00%]
  else
    goto <bb 56>; [10.00%]

vs gcc12.2 


<bb 57> [local count: 5368707]:
  vect_cst__128 = {iftmp.1_16, iftmp.1_16, iftmp.1_16, iftmp.1_16, iftmp.1_16,
iftmp.1_16, iftmp.1_16, iftmp.1_16};
  vect_cst__134 = {iftmp.0_33, iftmp.0_33, iftmp.0_33, iftmp.0_33, iftmp.0_33,
iftmp.0_33, iftmp.0_33, iftmp.0_33};

  <bb 5> [local count: 53687070]:
  # i_15 = PHI <i_30(13), 0(57)>
  # ivtmp_20 = PHI <ivtmp_49(13), 256(57)>
  # vectp_af.24_124 = PHI <vectp_af.24_125(13), af_24(D)(57)>
  # vectp_res.31_138 = PHI <vectp_res.31_139(13), res_28(D)(57)>
  # vectp_res.36_143 = PHI <vectp_res.36_144(13), res_28(D)(57)>
  # ivtmp_146 = PHI <ivtmp_147(13), 0(57)>
  # DEBUG i => NULL
  # DEBUG BEGIN_STMT
  _7 = (long unsigned int) i_15;
  _8 = _7 * 4;
  _9 = af_24(D) + _8;
  vect_x_25.26_126 = MEM <vector(8) float> [(float *)vectp_af.24_124];
  x_25 = *_9;
  # DEBUG x => NULL
  # DEBUG BEGIN_STMT
  # DEBUG D#1 => NULL
  # DEBUG z => NULL
  # DEBUG BEGIN_STMT
  _130 = vect_x_25.26_126 >= { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
  vect_iftmp.27_131 = VEC_COND_EXPR <_130, vect_cst__128, { 1.0e+0, 1.0e+0,
1.0e+0, 1.0e+0, 1.0e+0, 1.0e+0, 1.0e+0, 1.0e+0 }>;
  iftmp.2_17 = x_25 >= 0.0 ? iftmp.1_16 : 1.0e+0;
  vect__41.28_133 = vect_iftmp.27_131 + { 1.0e+0, 1.0e+0, 1.0e+0, 1.0e+0,
1.0e+0, 1.0e+0, 1.0e+0, 1.0e+0 };
  _41 = iftmp.2_17 + 1.0e+0;
  _135 = vect_x_25.26_126 >= vect_cst__134;
  vect_prephitmp_42.29_136 = VEC_COND_EXPR <_135, vect_iftmp.27_131,
vect__41.28_133>;
  prephitmp_42 = x_25 >= iftmp.0_33 ? iftmp.2_17 : _41;
  # DEBUG t1 => NULL
  # DEBUG BEGIN_STMT
  # DEBUG neg_t1 => -prephitmp_42
  # DEBUG BEGIN_STMT
  vect_neg_t1_27.30_137 = -vect_prephitmp_42.29_136;
  neg_t1_27 = -prephitmp_42;
  # DEBUG t2 => neg_t1_27
  # DEBUG BEGIN_STMT
  _10 = res_28(D) + _8;
  vect__11.33_140 = MEM <vector(8) float> [(float *)vectp_res.31_138];
  _11 = *_10;
  vect__35.34_141 = vect__11.33_140 + vect_x_25.26_126;
  _35 = _11 + x_25;
  vect__13.35_142 = vect_neg_t1_27.30_137 + vect__35.34_141;
  _13 = neg_t1_27 + _35;
  MEM <vector(8) float> [(float *)vectp_res.36_143] = vect__13.35_142;
  # DEBUG BEGIN_STMT
  i_30 = i_15 + 1;
  # DEBUG i => i_30
  # DEBUG BEGIN_STMT
  ivtmp_49 = ivtmp_20 - 1;
  vectp_af.24_125 = vectp_af.24_124 + 32;
  vectp_res.31_139 = vectp_res.31_138 + 32;
  vectp_res.36_144 = vectp_res.36_143 + 32;
  ivtmp_147 = ivtmp_146 + 1;
  if (ivtmp_147 < 32)

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [Bug tree-optimization/109048] [13 regression] redundant mask compare generated by vectorizer.
  2023-03-07  6:01 [Bug tree-optimization/109048] New: [13 regression] redundant mask compare generated by vectorizer crazylht at gmail dot com
@ 2023-03-07  6:03 ` crazylht at gmail dot com
  2023-03-07  6:13 ` crazylht at gmail dot com
                   ` (11 subsequent siblings)
  12 siblings, 0 replies; 14+ messages in thread
From: crazylht at gmail dot com @ 2023-03-07  6:03 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109048

--- Comment #1 from Hongtao.liu <crazylht at gmail dot com> ---
There's also a backend misoptimization for x86 avx512 mask compare which is
same as PR88570.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [Bug tree-optimization/109048] [13 regression] redundant mask compare generated by vectorizer.
  2023-03-07  6:01 [Bug tree-optimization/109048] New: [13 regression] redundant mask compare generated by vectorizer crazylht at gmail dot com
  2023-03-07  6:03 ` [Bug tree-optimization/109048] " crazylht at gmail dot com
@ 2023-03-07  6:13 ` crazylht at gmail dot com
  2023-03-07  6:22 ` crazylht at gmail dot com
                   ` (10 subsequent siblings)
  12 siblings, 0 replies; 14+ messages in thread
From: crazylht at gmail dot com @ 2023-03-07  6:13 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109048

--- Comment #2 from Hongtao.liu <crazylht at gmail dot com> ---
Lots of logic operation for mask in ifcvt

  <bb 17> [local count: 531502205]:
  # i_18 = PHI <i_47(26), 0(42)>
  # ivtmp_15 = PHI <ivtmp_43(26), 256(42)>
  # DEBUG i => NULL
  # DEBUG BEGIN_STMT
  _38 = (long unsigned int) i_18;
  _37 = _38 * 4;
  _36 = af_24(D) + _37;
  x_20 = *_36;
  # DEBUG x => NULL
  # DEBUG BEGIN_STMT
  # DEBUG D#1 => NULL
  # DEBUG z => NULL
  # DEBUG BEGIN_STMT
  _50 = x_20 >= 0.0;
  _52 = x_20 < iftmp.0_34;
  _53 = _50 & _52;
  _55 = x_20 >= iftmp.0_34;
  _56 = _50 & _55;
  _74 = x_20 < 0.0;
  _ifc__73 = _74 ? 2.0e+0 : prephitmp_41;
  _ifc__75 = _53 ? prephitmp_41 : _ifc__73;
  prephitmp_17 = _56 ? iftmp.1_16 : _ifc__75;
  # DEBUG t1 => D#2
  # DEBUG BEGIN_STMT
  # DEBUG neg_t1 => -D#2
  # DEBUG BEGIN_STMT
  # DEBUG t2 => prephitmp_17
  # DEBUG BEGIN_STMT
  _12 = res_28(D) + _37;
  _26 = *_12;
  _27 = _26 + x_20;
  _45 = prephitmp_17 + _27;
  *_12 = _45;
  # DEBUG BEGIN_STMT
  i_47 = i_18 + 1;
  # DEBUG i => i_47
  # DEBUG BEGIN_STMT
  ivtmp_43 = ivtmp_15 - 1;

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [Bug tree-optimization/109048] [13 regression] redundant mask compare generated by vectorizer.
  2023-03-07  6:01 [Bug tree-optimization/109048] New: [13 regression] redundant mask compare generated by vectorizer crazylht at gmail dot com
  2023-03-07  6:03 ` [Bug tree-optimization/109048] " crazylht at gmail dot com
  2023-03-07  6:13 ` crazylht at gmail dot com
@ 2023-03-07  6:22 ` crazylht at gmail dot com
  2023-03-07  6:34 ` crazylht at gmail dot com
                   ` (9 subsequent siblings)
  12 siblings, 0 replies; 14+ messages in thread
From: crazylht at gmail dot com @ 2023-03-07  6:22 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109048

--- Comment #3 from Hongtao.liu <crazylht at gmail dot com> ---
A three-operand phi make ifcvt generate worse code

 <bb 4> [local count: 10737414]:
  # iftmp.1_16 = PHI <2.5e-1(13), 5.0e-1(2)>
  # iftmp.0_34 = PHI <4.0e+0(13), 2.0e+0(2)>
  # prephitmp_40 = PHI <-2.5e-1(13), -5.0e-1(2)>
  # prephitmp_41 = PHI <1.25e+0(13), 1.5e+0(2)>
  # prephitmp_42 = PHI <-1.25e+0(13), -1.5e+0(2)>
  # DEBUG f1 => NULL
  # DEBUG f2 => iftmp.1_16
  # DEBUG BEGIN_STMT
  # DEBUG i => 0
  # DEBUG BEGIN_STMT
  if (_3 != 0)
    goto <bb 28>; [50.00%]
  else
    goto <bb 29>; [50.00%]

  <bb 29> [local count: 5368707]:

  <bb 17> [local count: 531502204]:
  # i_18 = PHI <0(29), i_47(26)>
  # ivtmp_15 = PHI <256(29), ivtmp_43(26)>
  # DEBUG i => i_18
  # DEBUG BEGIN_STMT
  _38 = (long unsigned int) i_18;
  _37 = _38 * 4;
  _36 = af_24(D) + _37;
  x_20 = *_36;
  # DEBUG x => x_20
  # DEBUG BEGIN_STMT
  # DEBUG D#1 => x_20 < 0.0
  # DEBUG z => (int) D#1
  # DEBUG BEGIN_STMT
  if (x_20 >= 0.0)
    goto <bb 19>; [59.00%]
  else
    goto <bb 22>; [41.00%]

  <bb 19> [local count: 313586303]:
  if (x_20 < iftmp.0_34)
    goto <bb 22>; [20.00%]
  else
    goto <bb 21>; [80.00%]

  <bb 21> [local count: 250869041]:

  <bb 22> [local count: 531502205]:
  # prephitmp_17 = PHI <2.0e+0(17), prephitmp_41(19), iftmp.1_16(21)>
  # DEBUG t1 => D#2
  # DEBUG BEGIN_STMT
  # DEBUG neg_t1 => -D#2
  # DEBUG BEGIN_STMT
  # DEBUG t2 => prephitmp_17
  # DEBUG BEGIN_STMT
  _12 = res_28(D) + _37;
  _26 = *_12;
  _27 = _26 + x_20;
  _45 = prephitmp_17 + _27;
  *_12 = _45;
  # DEBUG BEGIN_STMT
  i_47 = i_18 + 1;
  # DEBUG i => i_47
  # DEBUG BEGIN_STMT
  ivtmp_43 = ivtmp_15 - 1;
  if (ivtmp_43 != 0)

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [Bug tree-optimization/109048] [13 regression] redundant mask compare generated by vectorizer.
  2023-03-07  6:01 [Bug tree-optimization/109048] New: [13 regression] redundant mask compare generated by vectorizer crazylht at gmail dot com
                   ` (2 preceding siblings ...)
  2023-03-07  6:22 ` crazylht at gmail dot com
@ 2023-03-07  6:34 ` crazylht at gmail dot com
  2023-03-07  9:17 ` rguenth at gcc dot gnu.org
                   ` (8 subsequent siblings)
  12 siblings, 0 replies; 14+ messages in thread
From: crazylht at gmail dot com @ 2023-03-07  6:34 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109048

--- Comment #4 from Hongtao.liu <crazylht at gmail dot com> ---
(In reply to Hongtao.liu from comment #3)
> A three-operand phi make ifcvt generate worse code
> 
It saved 1 add for potential 1.f + 0.f/1.f since it's constant, but generates
more logic operations for predicates.

Hmm, it looks good for scalar since it's just compare and jump, but hurt
vectorization due to more logical operations for predicates.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [Bug tree-optimization/109048] [13 regression] redundant mask compare generated by vectorizer.
  2023-03-07  6:01 [Bug tree-optimization/109048] New: [13 regression] redundant mask compare generated by vectorizer crazylht at gmail dot com
                   ` (3 preceding siblings ...)
  2023-03-07  6:34 ` crazylht at gmail dot com
@ 2023-03-07  9:17 ` rguenth at gcc dot gnu.org
  2023-03-07  9:38 ` jakub at gcc dot gnu.org
                   ` (7 subsequent siblings)
  12 siblings, 0 replies; 14+ messages in thread
From: rguenth at gcc dot gnu.org @ 2023-03-07  9:17 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109048

Richard Biener <rguenth at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
           Keywords|                            |missed-optimization,
                   |                            |needs-bisection
   Target Milestone|---                         |13.0

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [Bug tree-optimization/109048] [13 regression] redundant mask compare generated by vectorizer.
  2023-03-07  6:01 [Bug tree-optimization/109048] New: [13 regression] redundant mask compare generated by vectorizer crazylht at gmail dot com
                   ` (4 preceding siblings ...)
  2023-03-07  9:17 ` rguenth at gcc dot gnu.org
@ 2023-03-07  9:38 ` jakub at gcc dot gnu.org
  2023-03-07 10:09 ` rguenther at suse dot de
                   ` (6 subsequent siblings)
  12 siblings, 0 replies; 14+ messages in thread
From: jakub at gcc dot gnu.org @ 2023-03-07  9:38 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109048

Jakub Jelinek <jakub at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
             Status|UNCONFIRMED                 |NEW
     Ever confirmed|0                           |1
           Keywords|needs-bisection             |
   Last reconfirmed|                            |2023-03-07
                 CC|                            |aldyh at gcc dot gnu.org,
                   |                            |jakub at gcc dot gnu.org

--- Comment #5 from Jakub Jelinek <jakub at gcc dot gnu.org> ---
In bisection r13-1 through r13-6513 I see only one important change,
r13-2267-g4fbe3e6aa74dae5c75a

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [Bug tree-optimization/109048] [13 regression] redundant mask compare generated by vectorizer.
  2023-03-07  6:01 [Bug tree-optimization/109048] New: [13 regression] redundant mask compare generated by vectorizer crazylht at gmail dot com
                   ` (5 preceding siblings ...)
  2023-03-07  9:38 ` jakub at gcc dot gnu.org
@ 2023-03-07 10:09 ` rguenther at suse dot de
  2023-03-27  8:26 ` rguenth at gcc dot gnu.org
                   ` (5 subsequent siblings)
  12 siblings, 0 replies; 14+ messages in thread
From: rguenther at suse dot de @ 2023-03-07 10:09 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109048

--- Comment #6 from rguenther at suse dot de <rguenther at suse dot de> ---
On Tue, 7 Mar 2023, jakub at gcc dot gnu.org wrote:

> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109048
> 
> Jakub Jelinek <jakub at gcc dot gnu.org> changed:
> 
>            What    |Removed                     |Added
> ----------------------------------------------------------------------------
>              Status|UNCONFIRMED                 |NEW
>      Ever confirmed|0                           |1
>            Keywords|needs-bisection             |
>    Last reconfirmed|                            |2023-03-07
>                  CC|                            |aldyh at gcc dot gnu.org,
>                    |                            |jakub at gcc dot gnu.org
> 
> --- Comment #5 from Jakub Jelinek <jakub at gcc dot gnu.org> ---
> In bisection r13-1 through r13-6513 I see only one important change,
> r13-2267-g4fbe3e6aa74dae5c75a

That would probably trigger extra jump threading here.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [Bug tree-optimization/109048] [13 regression] redundant mask compare generated by vectorizer.
  2023-03-07  6:01 [Bug tree-optimization/109048] New: [13 regression] redundant mask compare generated by vectorizer crazylht at gmail dot com
                   ` (6 preceding siblings ...)
  2023-03-07 10:09 ` rguenther at suse dot de
@ 2023-03-27  8:26 ` rguenth at gcc dot gnu.org
  2023-03-27  9:18 ` rguenth at gcc dot gnu.org
                   ` (4 subsequent siblings)
  12 siblings, 0 replies; 14+ messages in thread
From: rguenth at gcc dot gnu.org @ 2023-03-27  8:26 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109048

--- Comment #7 from Richard Biener <rguenth at gcc dot gnu.org> ---
The first threadfull pass ends up producing the three-argument PHI:

<bb 7> [local count: 1063004408]:
_7 = (long unsigned int) i_14;
_8 = _7 * 4;
_9 = af_24(D) + _8;
x_25 = *_9;
if (x_25 >= 0.0)
  goto <bb 9>; [59.00%]
else
  goto <bb 8>; [41.00%]

<bb 8> [local count: 435831803]:

<bb 9> [local count: 1063004408]:
# iftmp.2_17 = PHI <iftmp.1_16(7), 1.0e+0(8)>
if (iftmp.0_15 > x_25)
  goto <bb 11>; [50.00%]
else
  goto <bb 10>; [50.00%]

and we thread 9->11 from 8->9 (so when !(x_25 >= 0.0)).  Like a related PR
this is a threading that turns a "conditional move" diamond into something more
complicated.  We could heuristically decide that we don't want to do this
[early?].  But it might be more honest to more aggressively represent
those as COND_EXPRs then.

So I think we have a duplicate for this.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [Bug tree-optimization/109048] [13 regression] redundant mask compare generated by vectorizer.
  2023-03-07  6:01 [Bug tree-optimization/109048] New: [13 regression] redundant mask compare generated by vectorizer crazylht at gmail dot com
                   ` (7 preceding siblings ...)
  2023-03-27  8:26 ` rguenth at gcc dot gnu.org
@ 2023-03-27  9:18 ` rguenth at gcc dot gnu.org
  2023-03-27 10:37 ` rguenth at gcc dot gnu.org
                   ` (3 subsequent siblings)
  12 siblings, 0 replies; 14+ messages in thread
From: rguenth at gcc dot gnu.org @ 2023-03-27  9:18 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109048

--- Comment #8 from Richard Biener <rguenth at gcc dot gnu.org> ---
Created attachment 54762
  --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=54762&action=edit
heuristic

I am testing this heuristic

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [Bug tree-optimization/109048] [13 regression] redundant mask compare generated by vectorizer.
  2023-03-07  6:01 [Bug tree-optimization/109048] New: [13 regression] redundant mask compare generated by vectorizer crazylht at gmail dot com
                   ` (8 preceding siblings ...)
  2023-03-27  9:18 ` rguenth at gcc dot gnu.org
@ 2023-03-27 10:37 ` rguenth at gcc dot gnu.org
  2023-03-28 12:20 ` rguenth at gcc dot gnu.org
                   ` (2 subsequent siblings)
  12 siblings, 0 replies; 14+ messages in thread
From: rguenth at gcc dot gnu.org @ 2023-03-27 10:37 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109048

--- Comment #9 from Richard Biener <rguenth at gcc dot gnu.org> ---
(In reply to Richard Biener from comment #8)
> Created attachment 54762 [details]
> heuristic
> 
> I am testing this heuristic

FAIL: gcc.dg/tree-ssa/ranger-threader-3.c scan-tree-dump-times ethread
"Register
ing.*jump thread" 1
FAIL: gcc.dg/tree-ssa/ssa-dom-thread-14.c scan-tree-dump-times dom2 "Threaded"
2

is the fallout - both are the same testcase, both can be mitigated by adjusting
the dump to scan for and making sure PROP_loop_opts_done is also provided
for functions without loops.

But it also shows a case where we optimize things significantly later
after the change.

  if ((!unsignedp || (!left && methods == OPTAB_WIDEN)))
    {
      enum optab_methods methods1 = methods;
      if (unsignedp)
        methods1 = OPTAB_MUST_WIDEN;
      expand_binop (left ? lshift_optab : rshift_arith_optab,
                           unsignedp, methods1);

used to be threaded in early threading but now would be delayed to after
loop opts.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [Bug tree-optimization/109048] [13 regression] redundant mask compare generated by vectorizer.
  2023-03-07  6:01 [Bug tree-optimization/109048] New: [13 regression] redundant mask compare generated by vectorizer crazylht at gmail dot com
                   ` (9 preceding siblings ...)
  2023-03-27 10:37 ` rguenth at gcc dot gnu.org
@ 2023-03-28 12:20 ` rguenth at gcc dot gnu.org
  2023-04-13  7:30 ` rguenth at gcc dot gnu.org
  2023-04-13  8:14 ` crazylht at gmail dot com
  12 siblings, 0 replies; 14+ messages in thread
From: rguenth at gcc dot gnu.org @ 2023-03-28 12:20 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109048

Richard Biener <rguenth at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
           See Also|                            |https://gcc.gnu.org/bugzill
                   |                            |a/show_bug.cgi?id=109154

--- Comment #10 from Richard Biener <rguenth at gcc dot gnu.org> ---
PR109154 is another of these cases.  We might want to de-dup them.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [Bug tree-optimization/109048] [13 regression] redundant mask compare generated by vectorizer.
  2023-03-07  6:01 [Bug tree-optimization/109048] New: [13 regression] redundant mask compare generated by vectorizer crazylht at gmail dot com
                   ` (10 preceding siblings ...)
  2023-03-28 12:20 ` rguenth at gcc dot gnu.org
@ 2023-04-13  7:30 ` rguenth at gcc dot gnu.org
  2023-04-13  8:14 ` crazylht at gmail dot com
  12 siblings, 0 replies; 14+ messages in thread
From: rguenth at gcc dot gnu.org @ 2023-04-13  7:30 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109048

--- Comment #11 from Richard Biener <rguenth at gcc dot gnu.org> ---
The recent patch improved this to avoid some of the compares.  We still have
the three-argument PHI and thus three VEC_CONDs.

.L10:
        vmovups (%rdi,%rdx), %ymm0
        vcmpltps        %ymm6, %ymm0, %ymm3
        vcmpltps        %ymm2, %ymm0, %ymm1
        vpandn  %ymm1, %ymm3, %ymm1
        vblendvps       %ymm1, %ymm5, %ymm4, %ymm1
        vblendvps       %ymm3, %ymm7, %ymm1, %ymm1
        vaddps  %ymm1, %ymm0, %ymm0
        vaddps  (%rax,%rdx), %ymm0, %ymm0
        vmovups %ymm0, (%rax,%rdx)
        addq    $32, %rdx
        cmpq    $1024, %rdx
        jne     .L10

vs. GCC 12

.L6:
        vmovups (%rdi,%rdx), %ymm1
        vcmpltps        %ymm5, %ymm1, %ymm0
        vcmpltps        %ymm6, %ymm1, %ymm4
        vblendvps       %ymm0, %ymm3, %ymm2, %ymm0
        vandps  %ymm3, %ymm4, %ymm4
        vaddps  %ymm4, %ymm0, %ymm0
        vaddps  %ymm1, %ymm0, %ymm0
        vaddps  (%rax,%rdx), %ymm0, %ymm0
        vmovups %ymm0, (%rax,%rdx)
        addq    $32, %rdx
        cmpq    $1024, %rdx
        jne     .L6

which at least overall looks comparable.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [Bug tree-optimization/109048] [13 regression] redundant mask compare generated by vectorizer.
  2023-03-07  6:01 [Bug tree-optimization/109048] New: [13 regression] redundant mask compare generated by vectorizer crazylht at gmail dot com
                   ` (11 preceding siblings ...)
  2023-04-13  7:30 ` rguenth at gcc dot gnu.org
@ 2023-04-13  8:14 ` crazylht at gmail dot com
  12 siblings, 0 replies; 14+ messages in thread
From: crazylht at gmail dot com @ 2023-04-13  8:14 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109048

Hongtao.liu <crazylht at gmail dot com> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
             Status|NEW                         |RESOLVED
         Resolution|---                         |FIXED

--- Comment #12 from Hongtao.liu <crazylht at gmail dot com> ---
So marked as fixed.

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2023-04-13  8:14 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-03-07  6:01 [Bug tree-optimization/109048] New: [13 regression] redundant mask compare generated by vectorizer crazylht at gmail dot com
2023-03-07  6:03 ` [Bug tree-optimization/109048] " crazylht at gmail dot com
2023-03-07  6:13 ` crazylht at gmail dot com
2023-03-07  6:22 ` crazylht at gmail dot com
2023-03-07  6:34 ` crazylht at gmail dot com
2023-03-07  9:17 ` rguenth at gcc dot gnu.org
2023-03-07  9:38 ` jakub at gcc dot gnu.org
2023-03-07 10:09 ` rguenther at suse dot de
2023-03-27  8:26 ` rguenth at gcc dot gnu.org
2023-03-27  9:18 ` rguenth at gcc dot gnu.org
2023-03-27 10:37 ` rguenth at gcc dot gnu.org
2023-03-28 12:20 ` rguenth at gcc dot gnu.org
2023-04-13  7:30 ` rguenth at gcc dot gnu.org
2023-04-13  8:14 ` crazylht at gmail dot com

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).