public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed
From: "rguenth at gcc dot gnu.org" <gcc-bugzilla@gcc.gnu.org>
To: gcc-bugs@gcc.gnu.org
Subject: [Bug tree-optimization/101842] Vectorizer doesn't vectorize when loop bound depends on two independent variables that are unknown
Date: Tue, 10 Aug 2021 10:22:54 +0000	[thread overview]
Message-ID: <bug-101842-4-PufBDPCyGj@http.gcc.gnu.org/bugzilla/> (raw)
In-Reply-To: <bug-101842-4@http.gcc.gnu.org/bugzilla/>

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101842

--- Comment #2 from Richard Biener <rguenth at gcc dot gnu.org> ---
OK, so with a hack like the following we vectorize the BB as

  vect__1.10_62 = MEM <vector(4) float> [(float *)p_34];
  vect_powmult_9.11_61 = vect__1.10_62 * vect__1.10_62;
  _60 = .REDUC_PLUS (vect_powmult_9.11_61);
  d_25 = d_35 - _60;
  p_26 = p_34 + 16;
  i_27 = i_37 + 4;
  _10 = len_20(D) > i_27;
  _11 = lim_21(D) <= d_25;
  _12 = _10 & _11;
  if (_12 != 0)

and on x86_64 we get

.L3:
        movups  (%rdi), %xmm2
        addl    $4, %eax
        addq    $16, %rdi
        mulps   %xmm2, %xmm2
        movaps  %xmm2, %xmm3
        movhlps %xmm2, %xmm3
        addps   %xmm2, %xmm3
        movaps  %xmm3, %xmm2
        shufps  $85, %xmm3, %xmm2
        addps   %xmm3, %xmm2
        subss   %xmm2, %xmm0
        cmpl    %eax, %esi
        jle     .L2
        comiss  %xmm1, %xmm0
        jnb     .L3
.L2:
        ret

or with AVX

.L3:
        vmovups (%rdi), %xmm4
        addl    $4, %eax
        addq    $16, %rdi
        vmulps  %xmm4, %xmm4, %xmm2
        vmovhlps        %xmm2, %xmm2, %xmm3
        vaddps  %xmm2, %xmm3, %xmm3
        vshufps $85, %xmm3, %xmm3, %xmm2
        vaddps  %xmm3, %xmm2, %xmm2
        vsubss  %xmm2, %xmm0, %xmm0
        cmpl    %eax, %esi
        jle     .L2
        vcomiss %xmm1, %xmm0
        jnb     .L3
.L2:
        ret


diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index f9ca24415a2..0e14c164635 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -5637,6 +5637,11 @@ vect_slp_check_for_constructors (bb_vec_info bb_vinfo)
                           || (gimple_assign_rhs_code (use_stmt)
                               != (code == PLUS_EXPR ? MINUS_EXPR :
PLUS_EXPR))))))
        {
+         gassign *next_stmt = assign;
+         while (next_stmt)
+           {
+             assign = next_stmt;
+             next_stmt = NULL;
          /* We start the match at the end of a possible association
             chain.  */
          auto_vec<chain_op_t> chain;
@@ -5666,10 +5671,12 @@ vect_slp_check_for_constructors (bb_vec_info bb_vinfo)
                {
                  if (chain[i].dt != vect_internal_def)
                    invalid_cst = true;
-                 else if (chain[i].code != code)
-                   invalid_op = true;
                  else
-                   valid_lanes++;
+                   {
+                     valid_lanes++;
+                     if (chain[i].code != code)
+                       invalid_op = true;
+                   }
                }
              if (!invalid_op && !invalid_cst)
                {
@@ -5707,8 +5714,13 @@ vect_slp_check_for_constructors (bb_vec_info bb_vinfo)
                    statistics_counter_event (cfun, "BB reduction missed
(cst)", 1);
                  statistics_histogram_event (cfun, "BB reduction missed
lanes",
                                              valid_lanes);
+
+                 /* Try again.  */
+                 if (valid_lanes > 2)
+                   next_stmt = as_a <gassign *> (chain_stmts[1]);
                }
            }
+           }
        }
     }
 }


the hack simply re-starts reduction discovery at the "previous" stmt
(this breaks down after skipping the first stmt eventually).  As said,
it's a hack.  But is that the kind of vectorization you expect?

  parent reply	other threads:[~2021-08-10 10:22 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-08-10  9:41 [Bug tree-optimization/101842] New: " tnfchris at gcc dot gnu.org
2021-08-10 10:06 ` [Bug tree-optimization/101842] " rguenth at gcc dot gnu.org
2021-08-10 10:22 ` rguenth at gcc dot gnu.org [this message]
2021-08-10 10:44 ` tnfchris at gcc dot gnu.org
2021-08-10 10:54 ` rguenth at gcc dot gnu.org
2021-08-10 11:16 ` tnfchris at gcc dot gnu.org

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=bug-101842-4-PufBDPCyGj@http.gcc.gnu.org/bugzilla/ \
    --to=gcc-bugzilla@gcc.gnu.org \
    --cc=gcc-bugs@gcc.gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).