From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <gcc-bugzilla@gcc.gnu.org>
Received: by sourceware.org (Postfix, from userid 48)
 id 07C3A3858007; Tue, 10 Aug 2021 10:22:55 +0000 (GMT)
DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 07C3A3858007
From: "rguenth at gcc dot gnu.org" <gcc-bugzilla@gcc.gnu.org>
To: gcc-bugs@gcc.gnu.org
Subject: [Bug tree-optimization/101842] Vectorizer doesn't vectorize when
 loop bound depends on two independent variables that are unknown
Date: Tue, 10 Aug 2021 10:22:54 +0000
X-Bugzilla-Reason: CC
X-Bugzilla-Type: changed
X-Bugzilla-Watch-Reason: None
X-Bugzilla-Product: gcc
X-Bugzilla-Component: tree-optimization
X-Bugzilla-Version: 12.0
X-Bugzilla-Keywords: missed-optimization
X-Bugzilla-Severity: normal
X-Bugzilla-Who: rguenth at gcc dot gnu.org
X-Bugzilla-Status: ASSIGNED
X-Bugzilla-Resolution: 
X-Bugzilla-Priority: P3
X-Bugzilla-Assigned-To: rguenth at gcc dot gnu.org
X-Bugzilla-Target-Milestone: ---
X-Bugzilla-Flags: 
X-Bugzilla-Changed-Fields: 
Message-ID: <bug-101842-4-PufBDPCyGj@http.gcc.gnu.org/bugzilla/>
In-Reply-To: <bug-101842-4@http.gcc.gnu.org/bugzilla/>
References: <bug-101842-4@http.gcc.gnu.org/bugzilla/>
Content-Type: text/plain; charset="UTF-8"
Content-Transfer-Encoding: quoted-printable
X-Bugzilla-URL: http://gcc.gnu.org/bugzilla/
Auto-Submitted: auto-generated
MIME-Version: 1.0
X-BeenThere: gcc-bugs@gcc.gnu.org
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: Gcc-bugs mailing list <gcc-bugs.gcc.gnu.org>
List-Unsubscribe: <https://gcc.gnu.org/mailman/options/gcc-bugs>,
 <mailto:gcc-bugs-request@gcc.gnu.org?subject=unsubscribe>
List-Archive: <https://gcc.gnu.org/pipermail/gcc-bugs/>
List-Post: <mailto:gcc-bugs@gcc.gnu.org>
List-Help: <mailto:gcc-bugs-request@gcc.gnu.org?subject=help>
List-Subscribe: <https://gcc.gnu.org/mailman/listinfo/gcc-bugs>,
 <mailto:gcc-bugs-request@gcc.gnu.org?subject=subscribe>
X-List-Received-Date: Tue, 10 Aug 2021 10:22:55 -0000

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=3D101842

--- Comment #2 from Richard Biener <rguenth at gcc dot gnu.org> ---
OK, so with a hack like the following we vectorize the BB as

  vect__1.10_62 =3D MEM <vector(4) float> [(float *)p_34];
  vect_powmult_9.11_61 =3D vect__1.10_62 * vect__1.10_62;
  _60 =3D .REDUC_PLUS (vect_powmult_9.11_61);
  d_25 =3D d_35 - _60;
  p_26 =3D p_34 + 16;
  i_27 =3D i_37 + 4;
  _10 =3D len_20(D) > i_27;
  _11 =3D lim_21(D) <=3D d_25;
  _12 =3D _10 & _11;
  if (_12 !=3D 0)

and on x86_64 we get

.L3:
        movups  (%rdi), %xmm2
        addl    $4, %eax
        addq    $16, %rdi
        mulps   %xmm2, %xmm2
        movaps  %xmm2, %xmm3
        movhlps %xmm2, %xmm3
        addps   %xmm2, %xmm3
        movaps  %xmm3, %xmm2
        shufps  $85, %xmm3, %xmm2
        addps   %xmm3, %xmm2
        subss   %xmm2, %xmm0
        cmpl    %eax, %esi
        jle     .L2
        comiss  %xmm1, %xmm0
        jnb     .L3
.L2:
        ret

or with AVX

.L3:
        vmovups (%rdi), %xmm4
        addl    $4, %eax
        addq    $16, %rdi
        vmulps  %xmm4, %xmm4, %xmm2
        vmovhlps        %xmm2, %xmm2, %xmm3
        vaddps  %xmm2, %xmm3, %xmm3
        vshufps $85, %xmm3, %xmm3, %xmm2
        vaddps  %xmm3, %xmm2, %xmm2
        vsubss  %xmm2, %xmm0, %xmm0
        cmpl    %eax, %esi
        jle     .L2
        vcomiss %xmm1, %xmm0
        jnb     .L3
.L2:
        ret
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index f9ca24415a2..0e14c164635 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -5637,6 +5637,11 @@ vect_slp_check_for_constructors (bb_vec_info bb_vinf=
o)
                           || (gimple_assign_rhs_code (use_stmt)
                               !=3D (code =3D=3D PLUS_EXPR ? MINUS_EXPR :
PLUS_EXPR))))))
        {
+         gassign *next_stmt =3D assign;
+         while (next_stmt)
+           {
+             assign =3D next_stmt;
+             next_stmt =3D NULL;
          /* We start the match at the end of a possible association
             chain.  */
          auto_vec<chain_op_t> chain;
@@ -5666,10 +5671,12 @@ vect_slp_check_for_constructors (bb_vec_info bb_vin=
fo)
                {
                  if (chain[i].dt !=3D vect_internal_def)
                    invalid_cst =3D true;
-                 else if (chain[i].code !=3D code)
-                   invalid_op =3D true;
                  else
-                   valid_lanes++;
+                   {
+                     valid_lanes++;
+                     if (chain[i].code !=3D code)
+                       invalid_op =3D true;
+                   }
                }
              if (!invalid_op && !invalid_cst)
                {
@@ -5707,8 +5714,13 @@ vect_slp_check_for_constructors (bb_vec_info bb_vinf=
o)
                    statistics_counter_event (cfun, "BB reduction missed
(cst)", 1);
                  statistics_histogram_event (cfun, "BB reduction missed
lanes",
                                              valid_lanes);
+
+                 /* Try again.  */
+                 if (valid_lanes > 2)
+                   next_stmt =3D as_a <gassign *> (chain_stmts[1]);
                }
            }
+           }
        }
     }
 }


the hack simply re-starts reduction discovery at the "previous" stmt
(this breaks down after skipping the first stmt eventually).  As said,
it's a hack.  But is that the kind of vectorization you expect?=