public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed
From: "juzhe.zhong at rivai dot ai" <gcc-bugzilla@gcc.gnu.org>
To: gcc-bugs@gcc.gnu.org
Subject: [Bug tree-optimization/109088] GCC does not always vectorize conditional reduction
Date: Fri, 10 Nov 2023 12:02:58 +0000	[thread overview]
Message-ID: <bug-109088-4-9VCvyq69d1@http.gcc.gnu.org/bugzilla/> (raw)
In-Reply-To: <bug-109088-4@http.gcc.gnu.org/bugzilla/>

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109088

--- Comment #15 from JuzheZhong <juzhe.zhong at rivai dot ai> ---
Hi,Richard.
Confirmed Robin's patch doesn't help with this issue.

The root cause of this issue is failed to recognize it as possible
vectorization of conditional reduction which means is_cond_scalar_reduction
is FALSE.

I have this following patch which bootstrap on X86 and regtest passed
also passed on aarch64. This following patch can enhance if-conv
conditional reduction recognition.

diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc
index a8c915913ae..2bdd3710a65 100644
--- a/gcc/tree-if-conv.cc
+++ b/gcc/tree-if-conv.cc
@@ -1784,14 +1784,119 @@ is_cond_scalar_reduction (gimple *phi, gimple **reduc,
tree arg_0, tree arg_1,
   r_nop2 = strip_nop_cond_scalar_reduction (*has_nop, r_op2);

   /* Make R_OP1 to hold reduction variable.  */
+  gimple *nonphi_use_stmt = NULL;
   if (r_nop2 == PHI_RESULT (header_phi)
       && commutative_tree_code (reduction_op))
     {
       std::swap (r_op1, r_op2);
       std::swap (r_nop1, r_nop2);
     }
-  else if (r_nop1 != PHI_RESULT (header_phi))
-    return false;
+  else if (r_nop1 == PHI_RESULT (header_phi))
+    ;
+  else
+    {
+      /* Analyze the statement chain of STMT so that we could teach generate
+        better if-converison code sequence.  We are trying to catch this
+        following situation:
+
+          loop-header:
+          reduc_1 = PHI <..., reduc_2>
+          ...
+          if (...)
+          tmp1 = reduc_1 + rhs1;
+          tmp2 = tmp1 + rhs2;
+          tmp3 = tmp2 + rhs3;
+          ...
+          reduc_3 = tmpN-1 + rhsN-1;
+
+          reduc_2 = PHI <reduc_1, reduc_3>
+
+          and convert to
+
+          reduc_2 = PHI <0, reduc_1>
+          tmp1 = rhs1;
+          tmp2 = tmp1 + rhs2;
+          tmp3 = tmp2 + rhs3;
+          ...
+          reduc_3 = tmpN-1 + rhsN-1;
+          ifcvt = cond_expr ? reduc_3 : 0;
+          reduc_1 = reduc_1 +/- ifcvt;  */
+      if (num_imm_uses (PHI_RESULT (header_phi)) != 2)
+       return false;
+      if (!ANY_INTEGRAL_TYPE_P (TREE_TYPE (PHI_RESULT (phi)))
+         && !(FLOAT_TYPE_P (TREE_TYPE (PHI_RESULT (phi)))
+              && !HONOR_SIGNED_ZEROS (TREE_TYPE (PHI_RESULT (phi)))
+              && !HONOR_SIGN_DEPENDENT_ROUNDING (TREE_TYPE (PHI_RESULT (phi)))
+              && !HONOR_NANS (TREE_TYPE (PHI_RESULT (phi)))))
+       return false;
+      gimple *prev_use_stmt, *curr_use_stmt;
+      use_operand_p use;
+      FOR_EACH_IMM_USE_FAST (use_p, imm_iter, PHI_RESULT (header_phi))
+       {
+         prev_use_stmt = curr_use_stmt = USE_STMT (use_p);
+         if (is_gimple_assign (curr_use_stmt))
+           {
+             if (TREE_CODE (gimple_assign_lhs (curr_use_stmt)) != SSA_NAME)
+               return false;
+             if (*has_nop)
+               {
+                 if (!CONVERT_EXPR_CODE_P (
+                       gimple_assign_rhs_code (curr_use_stmt)))
+                   return false;
+               }
+             else
+               {
+                 if (gimple_assign_rhs_code (curr_use_stmt) != reduction_op)
+                   return false;
+               }
+
+             bool visited_p = false;
+             nonphi_use_stmt = curr_use_stmt;
+             while (!visited_p)
+               {
+                 if (!single_imm_use (gimple_assign_lhs (prev_use_stmt), &use,
+                                      &curr_use_stmt)
+                     || gimple_bb (curr_use_stmt) != gimple_bb (stmt)
+                     || !is_gimple_assign (curr_use_stmt)
+                     || TREE_CODE (gimple_assign_lhs (curr_use_stmt))
+                          != SSA_NAME
+                     || gimple_assign_rhs_code (curr_use_stmt) !=
reduction_op)
+                   return false;
+                 if (curr_use_stmt == stmt)
+                   {
+                     if (*has_nop)
+                       {
+                         if (!single_imm_use (gimple_assign_lhs (
+                                                nonphi_use_stmt),
+                                              &use, &curr_use_stmt))
+                           return false;
+                         r_op1 = gimple_assign_lhs (nonphi_use_stmt);
+                         r_nop1 = PHI_RESULT (header_phi);
+                         nonphi_use_stmt = curr_use_stmt;
+                       }
+                     else
+                       r_op1 = PHI_RESULT (header_phi);
+
+                     if (*has_nop)
+                       {
+                         if (!single_imm_use (gimple_assign_lhs (stmt), &use,
+                                              &curr_use_stmt))
+                           return false;
+                         r_op2 = gimple_assign_lhs (stmt);
+                         r_nop2 = gimple_assign_lhs (curr_use_stmt);
+                       }
+                     else
+                       r_op2 = gimple_assign_lhs (stmt);
+                     visited_p = true;
+                   }
+                 else
+                   prev_use_stmt = curr_use_stmt;
+               }
+           }
+         else if (curr_use_stmt != phi)
+           return false;
+       }
+    }

   if (*has_nop)
     {
@@ -1816,12 +1921,41 @@ is_cond_scalar_reduction (gimple *phi, gimple **reduc,
tree arg_0, tree arg_1,
        continue;
       if (use_stmt == stmt)
        continue;
+      if (use_stmt == nonphi_use_stmt)
+       continue;
       if (gimple_code (use_stmt) != GIMPLE_PHI)
        return false;
     }

   *op0 = r_op1; *op1 = r_op2;
   *reduc = stmt;
+
+  if (nonphi_use_stmt)
+    {
+      /* Transform:
+
+       if (...)
+          tmp1 = reduc_1 + rhs1;
+          tmp2 = tmp1 + rhs2;
+          tmp3 = tmp2 + rhs3;
+
+       into:
+
+          tmp1 = rhs1 + 0;   ---> We replace reduc_1 into '0'
+          tmp2 = tmp1 + rhs2;
+          tmp3 = tmp2 + rhs3;
+          ...
+          reduc_3 = tmpN-1 + rhsN-1;
+          ifcvt = cond_expr ? reduc_3 : 0;  */
+      gcc_assert (gimple_assign_rhs_code (nonphi_use_stmt) == reduction_op);
+      if (gimple_assign_rhs1 (nonphi_use_stmt) == r_op1)
+       gimple_assign_set_rhs1 (nonphi_use_stmt,
+                               build_zero_cst (TREE_TYPE (r_op1)));
+      else if (gimple_assign_rhs2 (nonphi_use_stmt) == r_op1)
+       gimple_assign_set_rhs2 (nonphi_use_stmt,
+                               build_zero_cst (TREE_TYPE (r_op1)));
+      update_stmt (nonphi_use_stmt);
+    }
   return true;
 }

@@ -1886,12 +2020,17 @@ convert_scalar_cond_reduction (gimple *reduc,
gimple_stmt_iterator *gsi,
       gsi_remove (&stmt_it, true);
       release_defs (nop_reduc);
     }
+
   gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);

   /* Delete original reduction stmt.  */
-  stmt_it = gsi_for_stmt (reduc);
-  gsi_remove (&stmt_it, true);
-  release_defs (reduc);
+  if (op1 != gimple_assign_lhs (reduc))
+    {
+      stmt_it = gsi_for_stmt (reduc);
+      gsi_remove (&stmt_it, true);
+      release_defs (reduc);
+    }
+
   return rhs;
 }

I have fully tested it with different kinds of condtional reduction.
All can be vectorized.

I am not sure whether it is a feasible solution.

  parent reply	other threads:[~2023-11-10 12:02 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-03-10  9:24 [Bug c/109088] New: GCC fail auto-vectorization juzhe.zhong at rivai dot ai
2023-03-10 10:39 ` [Bug c/109088] " ubizjak at gmail dot com
2023-03-10 12:39 ` [Bug tree-optimization/109088] " rguenth at gcc dot gnu.org
2023-03-10 13:16 ` juzhe.zhong at rivai dot ai
2023-03-10 14:04 ` pinskia at gcc dot gnu.org
2023-03-10 14:09 ` [Bug tree-optimization/109088] GCC does not always vectorize conditional reduction pinskia at gcc dot gnu.org
2023-09-26 12:14 ` juzhe.zhong at rivai dot ai
2023-09-27  2:45 ` juzhe.zhong at rivai dot ai
2023-09-27  2:58 ` juzhe.zhong at rivai dot ai
2023-09-27  7:15 ` rguenth at gcc dot gnu.org
2023-09-27  7:34 ` juzhe.zhong at rivai dot ai
2023-09-27  9:06 ` rguenth at gcc dot gnu.org
2023-09-27  9:27 ` juzhe.zhong at rivai dot ai
2023-09-27 14:11 ` juzhe.zhong at rivai dot ai
2023-10-06  9:44 ` rguenth at gcc dot gnu.org
2023-11-10 12:02 ` juzhe.zhong at rivai dot ai [this message]
2023-11-10 13:10 ` rguenth at gcc dot gnu.org
2023-11-10 13:42 ` juzhe.zhong at rivai dot ai
2023-11-15 14:09 ` rguenth at gcc dot gnu.org
2023-11-15 14:38 ` juzhe.zhong at rivai dot ai
2023-11-15 14:42 ` rguenther at suse dot de
2023-11-16  1:06 ` juzhe.zhong at rivai dot ai
2023-11-16  6:50 ` rguenther at suse dot de

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=bug-109088-4-9VCvyq69d1@http.gcc.gnu.org/bugzilla/ \
    --to=gcc-bugzilla@gcc.gnu.org \
    --cc=gcc-bugs@gcc.gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).