public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] Extend is_cond_scalar_reduction to handle nop_expr after/before scalar reduction.[PR98365]
@ 2021-05-24  9:56 Hongtao Liu
  2021-05-25 10:24 ` Richard Biener
  0 siblings, 1 reply; 7+ messages in thread
From: Hongtao Liu @ 2021-05-24  9:56 UTC (permalink / raw)
  To: GCC Patches, Richard Biener

[-- Attachment #1: Type: text/plain, Size: 586 bytes --]

Hi:
  Details described in PR.
  Bootstrapped and regtest on
x86_64-linux-gnu{-m32,}/x86_64-linux-gnu{-m32\
-march=cascadelake,-march=cascadelake}
  Ok for trunk?

  gcc/ChangeLog:

        PR tree-optimization/pr98365
        * tree-if-conv.c (strip_nop_cond_scalar_reduction): New function.
        (is_cond_scalar_reduction): Handle nop_expr in cond scalar reduction.
        (convert_scalar_cond_reduction): Ditto.
        (predicate_scalar_phi): Ditto.

gcc/testsuite/ChangeLog:

        PR tree-optimization/pr98365
        * gcc.target/i386/pr98365.c: New test.

-- 
BR,
Hongtao

[-- Attachment #2: 0001-Extend-is_cond_scalar_reduction-to-handle-nop_expr-a.patch --]
[-- Type: text/x-patch, Size: 8810 bytes --]

From 0248efe310454a31e4bbeb1430f907ec663c39ae Mon Sep 17 00:00:00 2001
From: liuhongt <hongtao.liu@intel.com>
Date: Wed, 6 Jan 2021 16:33:27 +0800
Subject: [PATCH] Extend is_cond_scalar_reduction to handle nop_expr
 after/before scalar reduction.[PR98365]

gcc/ChangeLog:

	PR tree-optimization/pr98365
	* tree-if-conv.c (strip_nop_cond_scalar_reduction): New function.
	(is_cond_scalar_reduction): Handle nop_expr in cond scalar reduction.
	(convert_scalar_cond_reduction): Ditto.
	(predicate_scalar_phi): Ditto.

gcc/testsuite/ChangeLog:

	PR tree-optimization/pr98365
	* gcc.target/i386/pr98365.c: New test.
---
 gcc/testsuite/gcc.target/i386/pr98365.c |  22 ++++
 gcc/tree-if-conv.c                      | 131 +++++++++++++++++++++---
 2 files changed, 141 insertions(+), 12 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr98365.c

diff --git a/gcc/testsuite/gcc.target/i386/pr98365.c b/gcc/testsuite/gcc.target/i386/pr98365.c
new file mode 100644
index 00000000000..652210dcdd5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr98365.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx2 -ftree-vectorize -fdump-tree-vect-details" } */
+/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
+short foo1 (short* a, short* c, int n)
+{
+  int i;
+  short cnt=0;
+  for (int i = 0;i != n; i++)
+    if (a[i] == c[i])
+      cnt++;
+  return cnt;
+}
+
+char foo2 (char* a, char* c, int n)
+{
+  int i;
+  char cnt=0;
+  for (int i = 0;i != n; i++)
+    if (a[i] == c[i])
+      cnt++;
+  return cnt;
+}
diff --git a/gcc/tree-if-conv.c b/gcc/tree-if-conv.c
index 716eae44a21..5811eb00478 100644
--- a/gcc/tree-if-conv.c
+++ b/gcc/tree-if-conv.c
@@ -1579,6 +1579,31 @@ if_convertible_loop_p (class loop *loop)
   return res;
 }
 
+/* Return reduc_1 if has_nop.
+
+   if (...)
+     tmp1 = (unsigned type) reduc_1;
+     tmp2 = tmp1 + rhs2;
+     reduc_3 = (signed type) tmp2.  */
+static tree
+strip_nop_cond_scalar_reduction (bool has_nop, tree op)
+{
+  if (!has_nop)
+    return op;
+
+  if (TREE_CODE (op) != SSA_NAME)
+    return NULL_TREE;
+
+  gimple* stmt = SSA_NAME_DEF_STMT (op);
+  if (!stmt
+      || gimple_code (stmt) != GIMPLE_ASSIGN
+      || gimple_has_volatile_ops (stmt)
+      || gimple_assign_rhs_code (stmt) != NOP_EXPR)
+    return NULL_TREE;
+
+  return gimple_assign_rhs1 (stmt);
+}
+
 /* Returns true if def-stmt for phi argument ARG is simple increment/decrement
    which is in predicated basic block.
    In fact, the following PHI pattern is searching:
@@ -1595,9 +1620,10 @@ if_convertible_loop_p (class loop *loop)
 
 static bool
 is_cond_scalar_reduction (gimple *phi, gimple **reduc, tree arg_0, tree arg_1,
-			  tree *op0, tree *op1, bool extended)
+			  tree *op0, tree *op1, bool extended, bool* has_nop,
+			  gimple **nop_reduc)
 {
-  tree lhs, r_op1, r_op2;
+  tree lhs, r_op1, r_op2, r_nop1, r_nop2;
   gimple *stmt;
   gimple *header_phi = NULL;
   enum tree_code reduction_op;
@@ -1608,7 +1634,7 @@ is_cond_scalar_reduction (gimple *phi, gimple **reduc, tree arg_0, tree arg_1,
   use_operand_p use_p;
   edge e;
   edge_iterator ei;
-  bool result = false;
+  bool result = *has_nop = false;
   if (TREE_CODE (arg_0) != SSA_NAME || TREE_CODE (arg_1) != SSA_NAME)
     return false;
 
@@ -1656,18 +1682,78 @@ is_cond_scalar_reduction (gimple *phi, gimple **reduc, tree arg_0, tree arg_1,
     return false;
 
   reduction_op = gimple_assign_rhs_code (stmt);
+
+    /* Catch something like below
+
+     loop-header:
+     reduc_1 = PHI <..., reduc_2>
+     ...
+     if (...)
+     tmp1 = (unsigned type) reduc_1;
+     tmp2 = tmp1 + rhs2;
+     reduc_3 = (signed type) tmp2;
+
+     reduc_2 = PHI <reduc_1, reduc_3>
+
+     and convert to
+
+     reduc_2 = PHI <0, reduc_3>
+     tmp1 = (unsigned type)reduce_1;
+     ifcvt = cond_expr ? rhs2 : 0
+     tmp2 = tmp1 +/- ifcvt;
+     reduce_1 = (signed type)tmp2;  */
+
+  if (reduction_op == NOP_EXPR)
+    {
+      lhs = gimple_assign_rhs1 (stmt);
+      if (TREE_CODE (lhs) != SSA_NAME
+	  || !has_single_use (lhs))
+	return false;
+
+      *nop_reduc = stmt;
+      stmt = SSA_NAME_DEF_STMT (lhs);
+      if (gimple_bb (stmt) != gimple_bb (*nop_reduc)
+	  || gimple_code (stmt) != GIMPLE_ASSIGN
+	  || gimple_has_volatile_ops (stmt))
+	return false;
+
+      *has_nop = true;
+      reduction_op = gimple_assign_rhs_code (stmt);
+    }
+
   if (reduction_op != PLUS_EXPR && reduction_op != MINUS_EXPR)
     return false;
   r_op1 = gimple_assign_rhs1 (stmt);
   r_op2 = gimple_assign_rhs2 (stmt);
 
+  r_nop1 = strip_nop_cond_scalar_reduction (*has_nop, r_op1);
+  r_nop2 = strip_nop_cond_scalar_reduction (*has_nop, r_op2);
+
   /* Make R_OP1 to hold reduction variable.  */
-  if (r_op2 == PHI_RESULT (header_phi)
+  if (r_nop2 == PHI_RESULT (header_phi)
       && reduction_op == PLUS_EXPR)
-    std::swap (r_op1, r_op2);
-  else if (r_op1 != PHI_RESULT (header_phi))
+    {
+      std::swap (r_op1, r_op2);
+      std::swap (r_nop1, r_nop2);
+    }
+  else if (r_nop1 != PHI_RESULT (header_phi))
     return false;
 
+  if (*has_nop)
+    {
+      /* Check that R_NOP1 is used in nop_stmt or in PHI only.  */
+      FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_nop1)
+	{
+	  gimple *use_stmt = USE_STMT (use_p);
+	  if (is_gimple_debug (use_stmt))
+	    continue;
+	  if (use_stmt == SSA_NAME_DEF_STMT (r_op1))
+	    continue;
+	  if (gimple_code (use_stmt) != GIMPLE_PHI)
+	    return false;
+	}
+    }
+
   /* Check that R_OP1 is used in reduction stmt or in PHI only.  */
   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_op1)
     {
@@ -1705,7 +1791,8 @@ is_cond_scalar_reduction (gimple *phi, gimple **reduc, tree arg_0, tree arg_1,
 
 static tree
 convert_scalar_cond_reduction (gimple *reduc, gimple_stmt_iterator *gsi,
-			       tree cond, tree op0, tree op1, bool swap)
+			       tree cond, tree op0, tree op1, bool swap,
+			       bool has_nop, gimple* nop_reduc)
 {
   gimple_stmt_iterator stmt_it;
   gimple *new_assign;
@@ -1735,6 +1822,23 @@ convert_scalar_cond_reduction (gimple *reduc, gimple_stmt_iterator *gsi,
   rhs = fold_build2 (gimple_assign_rhs_code (reduc),
 		     TREE_TYPE (rhs1), op0, tmp);
 
+  if (has_nop)
+    {
+      /* Create assignment nop_rhs = op0 +/- _ifc_.  */
+      tree nop_rhs = make_temp_ssa_name (TREE_TYPE (rhs1), NULL, "_nop_");
+      gimple* new_assign2 = gimple_build_assign (nop_rhs, rhs);
+      gsi_insert_before (gsi, new_assign2, GSI_SAME_STMT);
+      /* Rebuild rhs for nop_expr.  */
+      rhs = fold_build1 (NOP_EXPR,
+			 TREE_TYPE (gimple_assign_lhs (nop_reduc)),
+			 nop_rhs);
+
+      /* Delete nop_reduc.  */
+      stmt_it = gsi_for_stmt (nop_reduc);
+      gsi_remove (&stmt_it, true);
+      release_defs (nop_reduc);
+    }
+
   /* Delete original reduction stmt.  */
   stmt_it = gsi_for_stmt (reduc);
   gsi_remove (&stmt_it, true);
@@ -1808,7 +1912,7 @@ ifcvt_follow_ssa_use_edges (tree val)
 static void
 predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi)
 {
-  gimple *new_stmt = NULL, *reduc;
+  gimple *new_stmt = NULL, *reduc, *nop_reduc;
   tree rhs, res, arg0, arg1, op0, op1, scev;
   tree cond;
   unsigned int index0;
@@ -1816,6 +1920,7 @@ predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi)
   edge e;
   basic_block bb;
   unsigned int i;
+  bool has_nop;
 
   res = gimple_phi_result (phi);
   if (virtual_operand_p (res))
@@ -1876,10 +1981,12 @@ predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi)
 	  arg1 = gimple_phi_arg_def (phi, 1);
 	}
       if (is_cond_scalar_reduction (phi, &reduc, arg0, arg1,
-				    &op0, &op1, false))
+				    &op0, &op1, false, &has_nop,
+				    &nop_reduc))
 	/* Convert reduction stmt into vectorizable form.  */
 	rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
-					     true_bb != gimple_bb (reduc));
+					     true_bb != gimple_bb (reduc),
+					     has_nop, nop_reduc);
       else
 	/* Build new RHS using selected condition and arguments.  */
 	rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond),
@@ -1961,14 +2068,14 @@ predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi)
 					 is_gimple_condexpr, NULL_TREE,
 					 true, GSI_SAME_STMT);
       if (!(is_cond_scalar_reduction (phi, &reduc, arg0 , arg1,
-				      &op0, &op1, true)))
+				      &op0, &op1, true, &has_nop, &nop_reduc)))
 	rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond),
 				    swap? arg1 : arg0,
 				    swap? arg0 : arg1);
       else
 	/* Convert reduction stmt into vectorizable form.  */
 	rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
-					     swap);
+					     swap,has_nop, nop_reduc);
       new_stmt = gimple_build_assign (res, rhs);
       gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
       update_stmt (new_stmt);
-- 
2.18.1


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] Extend is_cond_scalar_reduction to handle nop_expr after/before scalar reduction.[PR98365]
  2021-05-24  9:56 [PATCH] Extend is_cond_scalar_reduction to handle nop_expr after/before scalar reduction.[PR98365] Hongtao Liu
@ 2021-05-25 10:24 ` Richard Biener
  2021-05-26  5:10   ` Hongtao Liu
  0 siblings, 1 reply; 7+ messages in thread
From: Richard Biener @ 2021-05-25 10:24 UTC (permalink / raw)
  To: Hongtao Liu; +Cc: GCC Patches

On Mon, May 24, 2021 at 11:52 AM Hongtao Liu <crazylht@gmail.com> wrote:
>
> Hi:
>   Details described in PR.
>   Bootstrapped and regtest on
> x86_64-linux-gnu{-m32,}/x86_64-linux-gnu{-m32\
> -march=cascadelake,-march=cascadelake}
>   Ok for trunk?

+static tree
+strip_nop_cond_scalar_reduction (bool has_nop, tree op)
+{
+  if (!has_nop)
+    return op;
+
+  if (TREE_CODE (op) != SSA_NAME)
+    return NULL_TREE;
+
+  gimple* stmt = SSA_NAME_DEF_STMT (op);
+  if (!stmt
+      || gimple_code (stmt) != GIMPLE_ASSIGN
+      || gimple_has_volatile_ops (stmt)
+      || gimple_assign_rhs_code (stmt) != NOP_EXPR)
+    return NULL_TREE;
+
+  return gimple_assign_rhs1 (stmt);

this allows arbitrary conversions where the comment suggests you
only want to allow conversions to the same precision but different sign.
Sth like

  gassign *stmt = safe_dyn_cast <gassign *> (SSA_NAME_DEF_STMT (op));
  if (!stmt
      || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt))
      || !tree_nop_conversion_p (TREE_TYPE (op), TREE_TYPE
(gimple_assign_rhs1 (stmt))))
    return NULL_TREE;

+      if (gimple_bb (stmt) != gimple_bb (*nop_reduc)
+         || gimple_code (stmt) != GIMPLE_ASSIGN
+         || gimple_has_volatile_ops (stmt))
+       return false;

!is_gimple_assign (stmt) instead of gimple_code (stmt) != GIMPLE_ASSIGN

the gimple_has_volatile_ops check is superfluous given you restrict
the assign code.

+      /* Check that R_NOP1 is used in nop_stmt or in PHI only.  */
+      FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_nop1)
+       {
+         gimple *use_stmt = USE_STMT (use_p);
+         if (is_gimple_debug (use_stmt))
+           continue;
+         if (use_stmt == SSA_NAME_DEF_STMT (r_op1))
+           continue;
+         if (gimple_code (use_stmt) != GIMPLE_PHI)
+           return false;

can the last check be use_stmt == phi since we should have the
PHI readily available?

@@ -1735,6 +1822,23 @@ convert_scalar_cond_reduction (gimple *reduc,
gimple_stmt_iterator *gsi,
   rhs = fold_build2 (gimple_assign_rhs_code (reduc),
                     TREE_TYPE (rhs1), op0, tmp);

+  if (has_nop)
+    {
+      /* Create assignment nop_rhs = op0 +/- _ifc_.  */
+      tree nop_rhs = make_temp_ssa_name (TREE_TYPE (rhs1), NULL, "_nop_");
+      gimple* new_assign2 = gimple_build_assign (nop_rhs, rhs);
+      gsi_insert_before (gsi, new_assign2, GSI_SAME_STMT);
+      /* Rebuild rhs for nop_expr.  */
+      rhs = fold_build1 (NOP_EXPR,
+                        TREE_TYPE (gimple_assign_lhs (nop_reduc)),
+                        nop_rhs);
+
+      /* Delete nop_reduc.  */
+      stmt_it = gsi_for_stmt (nop_reduc);
+      gsi_remove (&stmt_it, true);
+      release_defs (nop_reduc);
+    }
+

hmm, the whole function could be cleaned up with sth like

     /* Build rhs for unconditional increment/decrement.  */
     gimple_seq stmts = NULL;
     rhs = gimple_build (&stmts, gimple_assing_rhs_code (reduc),
                                    TREE_TYPE (rhs1), op0, tmp);
     if (has_nop)
       rhs = gimple_convert (&stmts, TREE_TYPE (gimple_assign_lhs
(nop_reduc)), rhs);
     gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);

plus in the caller moving the

      new_stmt = gimple_build_assign (res, rhs);
      gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);

to the else branch as well as the folding done on new_stmt (maybe return
new_stmt instead of rhs from convert_scalar_cond_reduction.

Richard.

>   gcc/ChangeLog:
>
>         PR tree-optimization/pr98365
>         * tree-if-conv.c (strip_nop_cond_scalar_reduction): New function.
>         (is_cond_scalar_reduction): Handle nop_expr in cond scalar reduction.
>         (convert_scalar_cond_reduction): Ditto.
>         (predicate_scalar_phi): Ditto.
>
> gcc/testsuite/ChangeLog:
>
>         PR tree-optimization/pr98365
>         * gcc.target/i386/pr98365.c: New test.
>
> --
> BR,
> Hongtao

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] Extend is_cond_scalar_reduction to handle nop_expr after/before scalar reduction.[PR98365]
  2021-05-25 10:24 ` Richard Biener
@ 2021-05-26  5:10   ` Hongtao Liu
  2021-05-26 12:41     ` Richard Biener
  0 siblings, 1 reply; 7+ messages in thread
From: Hongtao Liu @ 2021-05-26  5:10 UTC (permalink / raw)
  To: Richard Biener; +Cc: GCC Patches

On Tue, May 25, 2021 at 6:24 PM Richard Biener
<richard.guenther@gmail.com> wrote:
>
> On Mon, May 24, 2021 at 11:52 AM Hongtao Liu <crazylht@gmail.com> wrote:
> >
> > Hi:
> >   Details described in PR.
> >   Bootstrapped and regtest on
> > x86_64-linux-gnu{-m32,}/x86_64-linux-gnu{-m32\
> > -march=cascadelake,-march=cascadelake}
> >   Ok for trunk?
>
> +static tree
> +strip_nop_cond_scalar_reduction (bool has_nop, tree op)
> +{
> +  if (!has_nop)
> +    return op;
> +
> +  if (TREE_CODE (op) != SSA_NAME)
> +    return NULL_TREE;
> +
> +  gimple* stmt = SSA_NAME_DEF_STMT (op);
> +  if (!stmt
> +      || gimple_code (stmt) != GIMPLE_ASSIGN
> +      || gimple_has_volatile_ops (stmt)
> +      || gimple_assign_rhs_code (stmt) != NOP_EXPR)
> +    return NULL_TREE;
> +
> +  return gimple_assign_rhs1 (stmt);
>
> this allows arbitrary conversions where the comment suggests you
> only want to allow conversions to the same precision but different sign.
> Sth like
>
>   gassign *stmt = safe_dyn_cast <gassign *> (SSA_NAME_DEF_STMT (op));
>   if (!stmt
>       || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt))
>       || !tree_nop_conversion_p (TREE_TYPE (op), TREE_TYPE
> (gimple_assign_rhs1 (stmt))))
>     return NULL_TREE;
>
> +      if (gimple_bb (stmt) != gimple_bb (*nop_reduc)
> +         || gimple_code (stmt) != GIMPLE_ASSIGN
> +         || gimple_has_volatile_ops (stmt))
> +       return false;
>
> !is_gimple_assign (stmt) instead of gimple_code (stmt) != GIMPLE_ASSIGN
>
> the gimple_has_volatile_ops check is superfluous given you restrict
> the assign code.
>
> +      /* Check that R_NOP1 is used in nop_stmt or in PHI only.  */
> +      FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_nop1)
> +       {
> +         gimple *use_stmt = USE_STMT (use_p);
> +         if (is_gimple_debug (use_stmt))
> +           continue;
> +         if (use_stmt == SSA_NAME_DEF_STMT (r_op1))
> +           continue;
> +         if (gimple_code (use_stmt) != GIMPLE_PHI)
> +           return false;
>
> can the last check be use_stmt == phi since we should have the
> PHI readily available?
>
> @@ -1735,6 +1822,23 @@ convert_scalar_cond_reduction (gimple *reduc,
> gimple_stmt_iterator *gsi,
>    rhs = fold_build2 (gimple_assign_rhs_code (reduc),
>                      TREE_TYPE (rhs1), op0, tmp);
>
> +  if (has_nop)
> +    {
> +      /* Create assignment nop_rhs = op0 +/- _ifc_.  */
> +      tree nop_rhs = make_temp_ssa_name (TREE_TYPE (rhs1), NULL, "_nop_");
> +      gimple* new_assign2 = gimple_build_assign (nop_rhs, rhs);
> +      gsi_insert_before (gsi, new_assign2, GSI_SAME_STMT);
> +      /* Rebuild rhs for nop_expr.  */
> +      rhs = fold_build1 (NOP_EXPR,
> +                        TREE_TYPE (gimple_assign_lhs (nop_reduc)),
> +                        nop_rhs);
> +
> +      /* Delete nop_reduc.  */
> +      stmt_it = gsi_for_stmt (nop_reduc);
> +      gsi_remove (&stmt_it, true);
> +      release_defs (nop_reduc);
> +    }
> +
>
> hmm, the whole function could be cleaned up with sth like
>
>      /* Build rhs for unconditional increment/decrement.  */
>      gimple_seq stmts = NULL;
>      rhs = gimple_build (&stmts, gimple_assing_rhs_code (reduc),
>                                     TREE_TYPE (rhs1), op0, tmp);
>      if (has_nop)
>        rhs = gimple_convert (&stmts, TREE_TYPE (gimple_assign_lhs
> (nop_reduc)), rhs);
>      gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
>
> plus in the caller moving the
>
>       new_stmt = gimple_build_assign (res, rhs);
>       gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
>
> to the else branch as well as the folding done on new_stmt (maybe return
> new_stmt instead of rhs from convert_scalar_cond_reduction.
Eventually, we needed to assign rhs to res, and with an extra mov stmt
from rhs to res, the vectorizer failed.
the only difference in 166t.ifcvt between successfully vectorization
and failed vectorization is below
   char * _24;
   char _25;
   unsigned char _ifc__29;
+  unsigned char _30;

   <bb 2> [local count: 118111600]:
   if (n_10(D) != 0)
@@ -70,7 +71,8 @@ char foo2 (char * a, char * c, int n)
   _5 = c_14(D) + _1;
   _6 = *_5;
   _ifc__29 = _3 == _6 ? 1 : 0;
-  cnt_7 = cnt_18 + _ifc__29;
+  _30 = cnt_18 + _ifc__29;
+  cnt_7 = _30;
   i_16 = i_20 + 1;
   if (n_10(D) != i_16)
     goto <bb 9>; [89.00%]
@@ -110,7 +112,7 @@ char foo2 (char * a, char * c, int n)
   goto <bb 12>; [100.00%]

   <bb 6> [local count: 105119324]:
-  # cnt_19 = PHI <cnt_7(3), cnt_27(15)>
+  # cnt_19 = PHI <_30(3), cnt_27(15)>
   _21 = (char) cnt_19;

if we want to eliminate the extra move, gimple_build and
gimple_convert is not suitable since they create a new lhs, is there
any interface like gimple_build_assign but accept stmts?
>
> Richard.
>
> >   gcc/ChangeLog:
> >
> >         PR tree-optimization/pr98365
> >         * tree-if-conv.c (strip_nop_cond_scalar_reduction): New function.
> >         (is_cond_scalar_reduction): Handle nop_expr in cond scalar reduction.
> >         (convert_scalar_cond_reduction): Ditto.
> >         (predicate_scalar_phi): Ditto.
> >
> > gcc/testsuite/ChangeLog:
> >
> >         PR tree-optimization/pr98365
> >         * gcc.target/i386/pr98365.c: New test.
> >
> > --
> > BR,
> > Hongtao



-- 
BR,
Hongtao

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] Extend is_cond_scalar_reduction to handle nop_expr after/before scalar reduction.[PR98365]
  2021-05-26  5:10   ` Hongtao Liu
@ 2021-05-26 12:41     ` Richard Biener
  2021-05-27  7:09       ` Hongtao Liu
  0 siblings, 1 reply; 7+ messages in thread
From: Richard Biener @ 2021-05-26 12:41 UTC (permalink / raw)
  To: Hongtao Liu; +Cc: GCC Patches

On Wed, May 26, 2021 at 7:06 AM Hongtao Liu <crazylht@gmail.com> wrote:
>
> On Tue, May 25, 2021 at 6:24 PM Richard Biener
> <richard.guenther@gmail.com> wrote:
> >
> > On Mon, May 24, 2021 at 11:52 AM Hongtao Liu <crazylht@gmail.com> wrote:
> > >
> > > Hi:
> > >   Details described in PR.
> > >   Bootstrapped and regtest on
> > > x86_64-linux-gnu{-m32,}/x86_64-linux-gnu{-m32\
> > > -march=cascadelake,-march=cascadelake}
> > >   Ok for trunk?
> >
> > +static tree
> > +strip_nop_cond_scalar_reduction (bool has_nop, tree op)
> > +{
> > +  if (!has_nop)
> > +    return op;
> > +
> > +  if (TREE_CODE (op) != SSA_NAME)
> > +    return NULL_TREE;
> > +
> > +  gimple* stmt = SSA_NAME_DEF_STMT (op);
> > +  if (!stmt
> > +      || gimple_code (stmt) != GIMPLE_ASSIGN
> > +      || gimple_has_volatile_ops (stmt)
> > +      || gimple_assign_rhs_code (stmt) != NOP_EXPR)
> > +    return NULL_TREE;
> > +
> > +  return gimple_assign_rhs1 (stmt);
> >
> > this allows arbitrary conversions where the comment suggests you
> > only want to allow conversions to the same precision but different sign.
> > Sth like
> >
> >   gassign *stmt = safe_dyn_cast <gassign *> (SSA_NAME_DEF_STMT (op));
> >   if (!stmt
> >       || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt))
> >       || !tree_nop_conversion_p (TREE_TYPE (op), TREE_TYPE
> > (gimple_assign_rhs1 (stmt))))
> >     return NULL_TREE;
> >
> > +      if (gimple_bb (stmt) != gimple_bb (*nop_reduc)
> > +         || gimple_code (stmt) != GIMPLE_ASSIGN
> > +         || gimple_has_volatile_ops (stmt))
> > +       return false;
> >
> > !is_gimple_assign (stmt) instead of gimple_code (stmt) != GIMPLE_ASSIGN
> >
> > the gimple_has_volatile_ops check is superfluous given you restrict
> > the assign code.
> >
> > +      /* Check that R_NOP1 is used in nop_stmt or in PHI only.  */
> > +      FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_nop1)
> > +       {
> > +         gimple *use_stmt = USE_STMT (use_p);
> > +         if (is_gimple_debug (use_stmt))
> > +           continue;
> > +         if (use_stmt == SSA_NAME_DEF_STMT (r_op1))
> > +           continue;
> > +         if (gimple_code (use_stmt) != GIMPLE_PHI)
> > +           return false;
> >
> > can the last check be use_stmt == phi since we should have the
> > PHI readily available?
> >
> > @@ -1735,6 +1822,23 @@ convert_scalar_cond_reduction (gimple *reduc,
> > gimple_stmt_iterator *gsi,
> >    rhs = fold_build2 (gimple_assign_rhs_code (reduc),
> >                      TREE_TYPE (rhs1), op0, tmp);
> >
> > +  if (has_nop)
> > +    {
> > +      /* Create assignment nop_rhs = op0 +/- _ifc_.  */
> > +      tree nop_rhs = make_temp_ssa_name (TREE_TYPE (rhs1), NULL, "_nop_");
> > +      gimple* new_assign2 = gimple_build_assign (nop_rhs, rhs);
> > +      gsi_insert_before (gsi, new_assign2, GSI_SAME_STMT);
> > +      /* Rebuild rhs for nop_expr.  */
> > +      rhs = fold_build1 (NOP_EXPR,
> > +                        TREE_TYPE (gimple_assign_lhs (nop_reduc)),
> > +                        nop_rhs);
> > +
> > +      /* Delete nop_reduc.  */
> > +      stmt_it = gsi_for_stmt (nop_reduc);
> > +      gsi_remove (&stmt_it, true);
> > +      release_defs (nop_reduc);
> > +    }
> > +
> >
> > hmm, the whole function could be cleaned up with sth like
> >
> >      /* Build rhs for unconditional increment/decrement.  */
> >      gimple_seq stmts = NULL;
> >      rhs = gimple_build (&stmts, gimple_assing_rhs_code (reduc),
> >                                     TREE_TYPE (rhs1), op0, tmp);
> >      if (has_nop)
> >        rhs = gimple_convert (&stmts, TREE_TYPE (gimple_assign_lhs
> > (nop_reduc)), rhs);
> >      gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
> >
> > plus in the caller moving the
> >
> >       new_stmt = gimple_build_assign (res, rhs);
> >       gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
> >
> > to the else branch as well as the folding done on new_stmt (maybe return
> > new_stmt instead of rhs from convert_scalar_cond_reduction.
> Eventually, we needed to assign rhs to res, and with an extra mov stmt
> from rhs to res, the vectorizer failed.
> the only difference in 166t.ifcvt between successfully vectorization
> and failed vectorization is below
>    char * _24;
>    char _25;
>    unsigned char _ifc__29;
> +  unsigned char _30;
>
>    <bb 2> [local count: 118111600]:
>    if (n_10(D) != 0)
> @@ -70,7 +71,8 @@ char foo2 (char * a, char * c, int n)
>    _5 = c_14(D) + _1;
>    _6 = *_5;
>    _ifc__29 = _3 == _6 ? 1 : 0;
> -  cnt_7 = cnt_18 + _ifc__29;
> +  _30 = cnt_18 + _ifc__29;
> +  cnt_7 = _30;
>    i_16 = i_20 + 1;
>    if (n_10(D) != i_16)
>      goto <bb 9>; [89.00%]
> @@ -110,7 +112,7 @@ char foo2 (char * a, char * c, int n)
>    goto <bb 12>; [100.00%]
>
>    <bb 6> [local count: 105119324]:
> -  # cnt_19 = PHI <cnt_7(3), cnt_27(15)>
> +  # cnt_19 = PHI <_30(3), cnt_27(15)>
>    _21 = (char) cnt_19;
>
> if we want to eliminate the extra move, gimple_build and
> gimple_convert is not suitable since they create a new lhs, is there
> any interface like gimple_build_assign but accept stmts?

Hmm, OK.  So what you could do is replacing all uses of 'res'
with 'rhs', alternatively replacing the last generated stmt LHS
by 'res'.  Both is a bit hackish of course.  Usually the vectorizer
just ignores copies like this but the reduction matching is
unnecessarily strict here (but it's also a bit awkward to fix there).

There's redundant_ssa_names which seems to be designed to
handle propagating those out and there's the do_rpo_vn run,
so I wonder why the stmt remains.  Now, for redundant_ssa_names
you'd need to push a std::pair (res, rhs) to it in case rhs is an
SSA name - does that help?

Richard.
> >
> > Richard.
> >
> > >   gcc/ChangeLog:
> > >
> > >         PR tree-optimization/pr98365
> > >         * tree-if-conv.c (strip_nop_cond_scalar_reduction): New function.
> > >         (is_cond_scalar_reduction): Handle nop_expr in cond scalar reduction.
> > >         (convert_scalar_cond_reduction): Ditto.
> > >         (predicate_scalar_phi): Ditto.
> > >
> > > gcc/testsuite/ChangeLog:
> > >
> > >         PR tree-optimization/pr98365
> > >         * gcc.target/i386/pr98365.c: New test.
> > >
> > > --
> > > BR,
> > > Hongtao
>
>
>
> --
> BR,
> Hongtao

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] Extend is_cond_scalar_reduction to handle nop_expr after/before scalar reduction.[PR98365]
  2021-05-26 12:41     ` Richard Biener
@ 2021-05-27  7:09       ` Hongtao Liu
  2021-05-31 10:14         ` Richard Biener
  0 siblings, 1 reply; 7+ messages in thread
From: Hongtao Liu @ 2021-05-27  7:09 UTC (permalink / raw)
  To: Richard Biener; +Cc: GCC Patches

[-- Attachment #1: Type: text/plain, Size: 6776 bytes --]

On Wed, May 26, 2021 at 8:41 PM Richard Biener
<richard.guenther@gmail.com> wrote:
>
> On Wed, May 26, 2021 at 7:06 AM Hongtao Liu <crazylht@gmail.com> wrote:
> >
> > On Tue, May 25, 2021 at 6:24 PM Richard Biener
> > <richard.guenther@gmail.com> wrote:
> > >
> > > On Mon, May 24, 2021 at 11:52 AM Hongtao Liu <crazylht@gmail.com> wrote:
> > > >
> > > > Hi:
> > > >   Details described in PR.
> > > >   Bootstrapped and regtest on
> > > > x86_64-linux-gnu{-m32,}/x86_64-linux-gnu{-m32\
> > > > -march=cascadelake,-march=cascadelake}
> > > >   Ok for trunk?
> > >
> > > +static tree
> > > +strip_nop_cond_scalar_reduction (bool has_nop, tree op)
> > > +{
> > > +  if (!has_nop)
> > > +    return op;
> > > +
> > > +  if (TREE_CODE (op) != SSA_NAME)
> > > +    return NULL_TREE;
> > > +
> > > +  gimple* stmt = SSA_NAME_DEF_STMT (op);
> > > +  if (!stmt
> > > +      || gimple_code (stmt) != GIMPLE_ASSIGN
> > > +      || gimple_has_volatile_ops (stmt)
> > > +      || gimple_assign_rhs_code (stmt) != NOP_EXPR)
> > > +    return NULL_TREE;
> > > +
> > > +  return gimple_assign_rhs1 (stmt);
> > >
> > > this allows arbitrary conversions where the comment suggests you
> > > only want to allow conversions to the same precision but different sign.
> > > Sth like
> > >
> > >   gassign *stmt = safe_dyn_cast <gassign *> (SSA_NAME_DEF_STMT (op));
> > >   if (!stmt
> > >       || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt))
> > >       || !tree_nop_conversion_p (TREE_TYPE (op), TREE_TYPE
> > > (gimple_assign_rhs1 (stmt))))
> > >     return NULL_TREE;
> > >
Changed.
> > > +      if (gimple_bb (stmt) != gimple_bb (*nop_reduc)
> > > +         || gimple_code (stmt) != GIMPLE_ASSIGN
> > > +         || gimple_has_volatile_ops (stmt))
> > > +       return false;
> > >
> > > !is_gimple_assign (stmt) instead of gimple_code (stmt) != GIMPLE_ASSIGN
> > >
Changed.
> > > the gimple_has_volatile_ops check is superfluous given you restrict
> > > the assign code.
> > >
Changed.
> > > +      /* Check that R_NOP1 is used in nop_stmt or in PHI only.  */
> > > +      FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_nop1)
> > > +       {
> > > +         gimple *use_stmt = USE_STMT (use_p);
> > > +         if (is_gimple_debug (use_stmt))
> > > +           continue;
> > > +         if (use_stmt == SSA_NAME_DEF_STMT (r_op1))
> > > +           continue;
> > > +         if (gimple_code (use_stmt) != GIMPLE_PHI)
> > > +           return false;
> > >
> > > can the last check be use_stmt == phi since we should have the
> > > PHI readily available?
> > >
Yes, changed.
> > > @@ -1735,6 +1822,23 @@ convert_scalar_cond_reduction (gimple *reduc,
> > > gimple_stmt_iterator *gsi,
> > >    rhs = fold_build2 (gimple_assign_rhs_code (reduc),
> > >                      TREE_TYPE (rhs1), op0, tmp);
> > >
> > > +  if (has_nop)
> > > +    {
> > > +      /* Create assignment nop_rhs = op0 +/- _ifc_.  */
> > > +      tree nop_rhs = make_temp_ssa_name (TREE_TYPE (rhs1), NULL, "_nop_");
> > > +      gimple* new_assign2 = gimple_build_assign (nop_rhs, rhs);
> > > +      gsi_insert_before (gsi, new_assign2, GSI_SAME_STMT);
> > > +      /* Rebuild rhs for nop_expr.  */
> > > +      rhs = fold_build1 (NOP_EXPR,
> > > +                        TREE_TYPE (gimple_assign_lhs (nop_reduc)),
> > > +                        nop_rhs);
> > > +
> > > +      /* Delete nop_reduc.  */
> > > +      stmt_it = gsi_for_stmt (nop_reduc);
> > > +      gsi_remove (&stmt_it, true);
> > > +      release_defs (nop_reduc);
> > > +    }
> > > +
> > >
> > > hmm, the whole function could be cleaned up with sth like
> > >
> > >      /* Build rhs for unconditional increment/decrement.  */
> > >      gimple_seq stmts = NULL;
> > >      rhs = gimple_build (&stmts, gimple_assing_rhs_code (reduc),
> > >                                     TREE_TYPE (rhs1), op0, tmp);
> > >      if (has_nop)
> > >        rhs = gimple_convert (&stmts, TREE_TYPE (gimple_assign_lhs
> > > (nop_reduc)), rhs);
> > >      gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
> > >
> > > plus in the caller moving the
> > >
> > >       new_stmt = gimple_build_assign (res, rhs);
> > >       gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
> > >
> > > to the else branch as well as the folding done on new_stmt (maybe return
> > > new_stmt instead of rhs from convert_scalar_cond_reduction.
> > Eventually, we needed to assign rhs to res, and with an extra mov stmt
> > from rhs to res, the vectorizer failed.
> > the only difference in 166t.ifcvt between successfully vectorization
> > and failed vectorization is below
> >    char * _24;
> >    char _25;
> >    unsigned char _ifc__29;
> > +  unsigned char _30;
> >
> >    <bb 2> [local count: 118111600]:
> >    if (n_10(D) != 0)
> > @@ -70,7 +71,8 @@ char foo2 (char * a, char * c, int n)
> >    _5 = c_14(D) + _1;
> >    _6 = *_5;
> >    _ifc__29 = _3 == _6 ? 1 : 0;
> > -  cnt_7 = cnt_18 + _ifc__29;
> > +  _30 = cnt_18 + _ifc__29;
> > +  cnt_7 = _30;
> >    i_16 = i_20 + 1;
> >    if (n_10(D) != i_16)
> >      goto <bb 9>; [89.00%]
> > @@ -110,7 +112,7 @@ char foo2 (char * a, char * c, int n)
> >    goto <bb 12>; [100.00%]
> >
> >    <bb 6> [local count: 105119324]:
> > -  # cnt_19 = PHI <cnt_7(3), cnt_27(15)>
> > +  # cnt_19 = PHI <_30(3), cnt_27(15)>
> >    _21 = (char) cnt_19;
> >
> > if we want to eliminate the extra move, gimple_build and
> > gimple_convert is not suitable since they create a new lhs, is there
> > any interface like gimple_build_assign but accept stmts?
>
> Hmm, OK.  So what you could do is replacing all uses of 'res'
> with 'rhs', alternatively replacing the last generated stmt LHS
> by 'res'.  Both is a bit hackish of course.  Usually the vectorizer
> just ignores copies like this but the reduction matching is
> unnecessarily strict here (but it's also a bit awkward to fix there).
>
> There's redundant_ssa_names which seems to be designed to
> handle propagating those out and there's the do_rpo_vn run,
> so I wonder why the stmt remains.  Now, for redundant_ssa_names
> you'd need to push a std::pair (res, rhs) to it in case rhs is an
> SSA name - does that help?
Yes, it worked.
>
> Richard.
> > >
> > > Richard.
> > >
> > > >   gcc/ChangeLog:
> > > >
> > > >         PR tree-optimization/pr98365
> > > >         * tree-if-conv.c (strip_nop_cond_scalar_reduction): New function.
> > > >         (is_cond_scalar_reduction): Handle nop_expr in cond scalar reduction.
> > > >         (convert_scalar_cond_reduction): Ditto.
> > > >         (predicate_scalar_phi): Ditto.
> > > >
> > > > gcc/testsuite/ChangeLog:
> > > >
> > > >         PR tree-optimization/pr98365
> > > >         * gcc.target/i386/pr98365.c: New test.
> > > >
> > > > --
> > > > BR,
> > > > Hongtao
> >
> >
> >
> > --
> > BR,
> > Hongtao


Update patch.

-- 
BR,
Hongtao

[-- Attachment #2: 0001-Extend-is_cond_scalar_reduction-to-handle-nop_expr-a_v2.patch --]
[-- Type: text/x-patch, Size: 9445 bytes --]

From f0593d1866af6f752e70e2ef2f1477471c252536 Mon Sep 17 00:00:00 2001
From: liuhongt <hongtao.liu@intel.com>
Date: Wed, 6 Jan 2021 16:33:27 +0800
Subject: [PATCH] Extend is_cond_scalar_reduction to handle nop_expr
 after/before scalar reduction.[PR98365]

gcc/ChangeLog:

	PR tree-optimization/pr98365
	* tree-if-conv.c (strip_nop_cond_scalar_reduction): New function.
	(is_cond_scalar_reduction): Handle nop_expr in cond scalar reduction.
	(convert_scalar_cond_reduction): Ditto.
	(predicate_scalar_phi): Ditto.

gcc/testsuite/ChangeLog:

	PR tree-optimization/pr98365
	* gcc.target/i386/pr98365.c: New test.
---
 gcc/testsuite/gcc.target/i386/pr98365.c |  22 ++++
 gcc/tree-if-conv.c                      | 142 +++++++++++++++++++++---
 2 files changed, 146 insertions(+), 18 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr98365.c

diff --git a/gcc/testsuite/gcc.target/i386/pr98365.c b/gcc/testsuite/gcc.target/i386/pr98365.c
new file mode 100644
index 00000000000..652210dcdd5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr98365.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx2 -ftree-vectorize -fdump-tree-vect-details" } */
+/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
+short foo1 (short* a, short* c, int n)
+{
+  int i;
+  short cnt=0;
+  for (int i = 0;i != n; i++)
+    if (a[i] == c[i])
+      cnt++;
+  return cnt;
+}
+
+char foo2 (char* a, char* c, int n)
+{
+  int i;
+  char cnt=0;
+  for (int i = 0;i != n; i++)
+    if (a[i] == c[i])
+      cnt++;
+  return cnt;
+}
diff --git a/gcc/tree-if-conv.c b/gcc/tree-if-conv.c
index 716eae44a21..5eed087de36 100644
--- a/gcc/tree-if-conv.c
+++ b/gcc/tree-if-conv.c
@@ -1579,6 +1579,31 @@ if_convertible_loop_p (class loop *loop)
   return res;
 }
 
+/* Return reduc_1 if has_nop.
+
+   if (...)
+     tmp1 = (unsigned type) reduc_1;
+     tmp2 = tmp1 + rhs2;
+     reduc_3 = (signed type) tmp2.  */
+static tree
+strip_nop_cond_scalar_reduction (bool has_nop, tree op)
+{
+  if (!has_nop)
+    return op;
+
+  if (TREE_CODE (op) != SSA_NAME)
+    return NULL_TREE;
+
+  gassign *stmt = safe_dyn_cast <gassign *> (SSA_NAME_DEF_STMT (op));
+  if (!stmt
+      || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt))
+      || !tree_nop_conversion_p (TREE_TYPE (op), TREE_TYPE
+				 (gimple_assign_rhs1 (stmt))))
+    return NULL_TREE;
+
+  return gimple_assign_rhs1 (stmt);
+}
+
 /* Returns true if def-stmt for phi argument ARG is simple increment/decrement
    which is in predicated basic block.
    In fact, the following PHI pattern is searching:
@@ -1595,9 +1620,10 @@ if_convertible_loop_p (class loop *loop)
 
 static bool
 is_cond_scalar_reduction (gimple *phi, gimple **reduc, tree arg_0, tree arg_1,
-			  tree *op0, tree *op1, bool extended)
+			  tree *op0, tree *op1, bool extended, bool* has_nop,
+			  gimple **nop_reduc)
 {
-  tree lhs, r_op1, r_op2;
+  tree lhs, r_op1, r_op2, r_nop1, r_nop2;
   gimple *stmt;
   gimple *header_phi = NULL;
   enum tree_code reduction_op;
@@ -1608,7 +1634,7 @@ is_cond_scalar_reduction (gimple *phi, gimple **reduc, tree arg_0, tree arg_1,
   use_operand_p use_p;
   edge e;
   edge_iterator ei;
-  bool result = false;
+  bool result = *has_nop = false;
   if (TREE_CODE (arg_0) != SSA_NAME || TREE_CODE (arg_1) != SSA_NAME)
     return false;
 
@@ -1656,18 +1682,77 @@ is_cond_scalar_reduction (gimple *phi, gimple **reduc, tree arg_0, tree arg_1,
     return false;
 
   reduction_op = gimple_assign_rhs_code (stmt);
+
+    /* Catch something like below
+
+     loop-header:
+     reduc_1 = PHI <..., reduc_2>
+     ...
+     if (...)
+     tmp1 = (unsigned type) reduc_1;
+     tmp2 = tmp1 + rhs2;
+     reduc_3 = (signed type) tmp2;
+
+     reduc_2 = PHI <reduc_1, reduc_3>
+
+     and convert to
+
+     reduc_2 = PHI <0, reduc_3>
+     tmp1 = (unsigned type)reduce_1;
+     ifcvt = cond_expr ? rhs2 : 0
+     tmp2 = tmp1 +/- ifcvt;
+     reduce_1 = (signed type)tmp2;  */
+
+  if (reduction_op == NOP_EXPR)
+    {
+      lhs = gimple_assign_rhs1 (stmt);
+      if (TREE_CODE (lhs) != SSA_NAME
+	  || !has_single_use (lhs))
+	return false;
+
+      *nop_reduc = stmt;
+      stmt = SSA_NAME_DEF_STMT (lhs);
+      if (gimple_bb (stmt) != gimple_bb (*nop_reduc)
+	  || !is_gimple_assign (stmt))
+	return false;
+
+      *has_nop = true;
+      reduction_op = gimple_assign_rhs_code (stmt);
+    }
+
   if (reduction_op != PLUS_EXPR && reduction_op != MINUS_EXPR)
     return false;
   r_op1 = gimple_assign_rhs1 (stmt);
   r_op2 = gimple_assign_rhs2 (stmt);
 
+  r_nop1 = strip_nop_cond_scalar_reduction (*has_nop, r_op1);
+  r_nop2 = strip_nop_cond_scalar_reduction (*has_nop, r_op2);
+
   /* Make R_OP1 to hold reduction variable.  */
-  if (r_op2 == PHI_RESULT (header_phi)
+  if (r_nop2 == PHI_RESULT (header_phi)
       && reduction_op == PLUS_EXPR)
-    std::swap (r_op1, r_op2);
-  else if (r_op1 != PHI_RESULT (header_phi))
+    {
+      std::swap (r_op1, r_op2);
+      std::swap (r_nop1, r_nop2);
+    }
+  else if (r_nop1 != PHI_RESULT (header_phi))
     return false;
 
+  if (*has_nop)
+    {
+      /* Check that R_NOP1 is used in nop_stmt or in PHI only.  */
+      FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_nop1)
+	{
+	  gimple *use_stmt = USE_STMT (use_p);
+	  if (is_gimple_debug (use_stmt))
+	    continue;
+	  if (use_stmt == SSA_NAME_DEF_STMT (r_op1))
+	    continue;
+	  if (use_stmt != phi)
+	    return false;
+	}
+    }
+
   /* Check that R_OP1 is used in reduction stmt or in PHI only.  */
   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_op1)
     {
@@ -1705,7 +1790,8 @@ is_cond_scalar_reduction (gimple *phi, gimple **reduc, tree arg_0, tree arg_1,
 
 static tree
 convert_scalar_cond_reduction (gimple *reduc, gimple_stmt_iterator *gsi,
-			       tree cond, tree op0, tree op1, bool swap)
+			       tree cond, tree op0, tree op1, bool swap,
+			       bool has_nop, gimple* nop_reduc)
 {
   gimple_stmt_iterator stmt_it;
   gimple *new_assign;
@@ -1714,6 +1800,7 @@ convert_scalar_cond_reduction (gimple *reduc, gimple_stmt_iterator *gsi,
   tree tmp = make_temp_ssa_name (TREE_TYPE (rhs1), NULL, "_ifc_");
   tree c;
   tree zero = build_zero_cst (TREE_TYPE (rhs1));
+  gimple_seq stmts = NULL;
 
   if (dump_file && (dump_flags & TDF_DETAILS))
     {
@@ -1732,8 +1819,18 @@ convert_scalar_cond_reduction (gimple *reduc, gimple_stmt_iterator *gsi,
   new_assign = gimple_build_assign (tmp, c);
   gsi_insert_before (gsi, new_assign, GSI_SAME_STMT);
   /* Build rhs for unconditional increment/decrement.  */
-  rhs = fold_build2 (gimple_assign_rhs_code (reduc),
-		     TREE_TYPE (rhs1), op0, tmp);
+  rhs = gimple_build (&stmts, gimple_assign_rhs_code (reduc),
+		      TREE_TYPE (rhs1), op0, tmp);
+
+  if (has_nop)
+    {
+      rhs = gimple_convert (&stmts,
+			    TREE_TYPE (gimple_assign_lhs (nop_reduc)), rhs);
+      stmt_it = gsi_for_stmt (nop_reduc);
+      gsi_remove (&stmt_it, true);
+      release_defs (nop_reduc);
+    }
+  gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
 
   /* Delete original reduction stmt.  */
   stmt_it = gsi_for_stmt (reduc);
@@ -1808,7 +1905,7 @@ ifcvt_follow_ssa_use_edges (tree val)
 static void
 predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi)
 {
-  gimple *new_stmt = NULL, *reduc;
+  gimple *new_stmt = NULL, *reduc, *nop_reduc;
   tree rhs, res, arg0, arg1, op0, op1, scev;
   tree cond;
   unsigned int index0;
@@ -1816,6 +1913,7 @@ predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi)
   edge e;
   basic_block bb;
   unsigned int i;
+  bool has_nop;
 
   res = gimple_phi_result (phi);
   if (virtual_operand_p (res))
@@ -1876,10 +1974,15 @@ predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi)
 	  arg1 = gimple_phi_arg_def (phi, 1);
 	}
       if (is_cond_scalar_reduction (phi, &reduc, arg0, arg1,
-				    &op0, &op1, false))
-	/* Convert reduction stmt into vectorizable form.  */
-	rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
-					     true_bb != gimple_bb (reduc));
+				    &op0, &op1, false, &has_nop,
+				    &nop_reduc))
+	{
+	  /* Convert reduction stmt into vectorizable form.  */
+	  rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
+					       true_bb != gimple_bb (reduc),
+					       has_nop, nop_reduc);
+	  redundant_ssa_names.safe_push (std::make_pair (res, rhs));
+	}
       else
 	/* Build new RHS using selected condition and arguments.  */
 	rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond),
@@ -1961,14 +2064,17 @@ predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi)
 					 is_gimple_condexpr, NULL_TREE,
 					 true, GSI_SAME_STMT);
       if (!(is_cond_scalar_reduction (phi, &reduc, arg0 , arg1,
-				      &op0, &op1, true)))
+				      &op0, &op1, true, &has_nop, &nop_reduc)))
 	rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond),
 				    swap? arg1 : arg0,
 				    swap? arg0 : arg1);
       else
-	/* Convert reduction stmt into vectorizable form.  */
-	rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
-					     swap);
+	{
+	  /* Convert reduction stmt into vectorizable form.  */
+	  rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
+					       swap,has_nop, nop_reduc);
+	  redundant_ssa_names.safe_push (std::make_pair (res, rhs));
+	}
       new_stmt = gimple_build_assign (res, rhs);
       gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
       update_stmt (new_stmt);
-- 
2.18.1


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] Extend is_cond_scalar_reduction to handle nop_expr after/before scalar reduction.[PR98365]
  2021-05-27  7:09       ` Hongtao Liu
@ 2021-05-31 10:14         ` Richard Biener
  2021-06-01  2:04           ` Hongtao Liu
  0 siblings, 1 reply; 7+ messages in thread
From: Richard Biener @ 2021-05-31 10:14 UTC (permalink / raw)
  To: Hongtao Liu; +Cc: GCC Patches

On Thu, May 27, 2021 at 9:05 AM Hongtao Liu <crazylht@gmail.com> wrote:
>
> On Wed, May 26, 2021 at 8:41 PM Richard Biener
> <richard.guenther@gmail.com> wrote:
> >
> > On Wed, May 26, 2021 at 7:06 AM Hongtao Liu <crazylht@gmail.com> wrote:
> > >
> > > On Tue, May 25, 2021 at 6:24 PM Richard Biener
> > > <richard.guenther@gmail.com> wrote:
> > > >
> > > > On Mon, May 24, 2021 at 11:52 AM Hongtao Liu <crazylht@gmail.com> wrote:
> > > > >
> > > > > Hi:
> > > > >   Details described in PR.
> > > > >   Bootstrapped and regtest on
> > > > > x86_64-linux-gnu{-m32,}/x86_64-linux-gnu{-m32\
> > > > > -march=cascadelake,-march=cascadelake}
> > > > >   Ok for trunk?
> > > >
> > > > +static tree
> > > > +strip_nop_cond_scalar_reduction (bool has_nop, tree op)
> > > > +{
> > > > +  if (!has_nop)
> > > > +    return op;
> > > > +
> > > > +  if (TREE_CODE (op) != SSA_NAME)
> > > > +    return NULL_TREE;
> > > > +
> > > > +  gimple* stmt = SSA_NAME_DEF_STMT (op);
> > > > +  if (!stmt
> > > > +      || gimple_code (stmt) != GIMPLE_ASSIGN
> > > > +      || gimple_has_volatile_ops (stmt)
> > > > +      || gimple_assign_rhs_code (stmt) != NOP_EXPR)
> > > > +    return NULL_TREE;
> > > > +
> > > > +  return gimple_assign_rhs1 (stmt);
> > > >
> > > > this allows arbitrary conversions where the comment suggests you
> > > > only want to allow conversions to the same precision but different sign.
> > > > Sth like
> > > >
> > > >   gassign *stmt = safe_dyn_cast <gassign *> (SSA_NAME_DEF_STMT (op));
> > > >   if (!stmt
> > > >       || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt))
> > > >       || !tree_nop_conversion_p (TREE_TYPE (op), TREE_TYPE
> > > > (gimple_assign_rhs1 (stmt))))
> > > >     return NULL_TREE;
> > > >
> Changed.
> > > > +      if (gimple_bb (stmt) != gimple_bb (*nop_reduc)
> > > > +         || gimple_code (stmt) != GIMPLE_ASSIGN
> > > > +         || gimple_has_volatile_ops (stmt))
> > > > +       return false;
> > > >
> > > > !is_gimple_assign (stmt) instead of gimple_code (stmt) != GIMPLE_ASSIGN
> > > >
> Changed.
> > > > the gimple_has_volatile_ops check is superfluous given you restrict
> > > > the assign code.
> > > >
> Changed.
> > > > +      /* Check that R_NOP1 is used in nop_stmt or in PHI only.  */
> > > > +      FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_nop1)
> > > > +       {
> > > > +         gimple *use_stmt = USE_STMT (use_p);
> > > > +         if (is_gimple_debug (use_stmt))
> > > > +           continue;
> > > > +         if (use_stmt == SSA_NAME_DEF_STMT (r_op1))
> > > > +           continue;
> > > > +         if (gimple_code (use_stmt) != GIMPLE_PHI)
> > > > +           return false;
> > > >
> > > > can the last check be use_stmt == phi since we should have the
> > > > PHI readily available?
> > > >
> Yes, changed.
> > > > @@ -1735,6 +1822,23 @@ convert_scalar_cond_reduction (gimple *reduc,
> > > > gimple_stmt_iterator *gsi,
> > > >    rhs = fold_build2 (gimple_assign_rhs_code (reduc),
> > > >                      TREE_TYPE (rhs1), op0, tmp);
> > > >
> > > > +  if (has_nop)
> > > > +    {
> > > > +      /* Create assignment nop_rhs = op0 +/- _ifc_.  */
> > > > +      tree nop_rhs = make_temp_ssa_name (TREE_TYPE (rhs1), NULL, "_nop_");
> > > > +      gimple* new_assign2 = gimple_build_assign (nop_rhs, rhs);
> > > > +      gsi_insert_before (gsi, new_assign2, GSI_SAME_STMT);
> > > > +      /* Rebuild rhs for nop_expr.  */
> > > > +      rhs = fold_build1 (NOP_EXPR,
> > > > +                        TREE_TYPE (gimple_assign_lhs (nop_reduc)),
> > > > +                        nop_rhs);
> > > > +
> > > > +      /* Delete nop_reduc.  */
> > > > +      stmt_it = gsi_for_stmt (nop_reduc);
> > > > +      gsi_remove (&stmt_it, true);
> > > > +      release_defs (nop_reduc);
> > > > +    }
> > > > +
> > > >
> > > > hmm, the whole function could be cleaned up with sth like
> > > >
> > > >      /* Build rhs for unconditional increment/decrement.  */
> > > >      gimple_seq stmts = NULL;
> > > >      rhs = gimple_build (&stmts, gimple_assing_rhs_code (reduc),
> > > >                                     TREE_TYPE (rhs1), op0, tmp);
> > > >      if (has_nop)
> > > >        rhs = gimple_convert (&stmts, TREE_TYPE (gimple_assign_lhs
> > > > (nop_reduc)), rhs);
> > > >      gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
> > > >
> > > > plus in the caller moving the
> > > >
> > > >       new_stmt = gimple_build_assign (res, rhs);
> > > >       gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
> > > >
> > > > to the else branch as well as the folding done on new_stmt (maybe return
> > > > new_stmt instead of rhs from convert_scalar_cond_reduction.
> > > Eventually, we needed to assign rhs to res, and with an extra mov stmt
> > > from rhs to res, the vectorizer failed.
> > > the only difference in 166t.ifcvt between successfully vectorization
> > > and failed vectorization is below
> > >    char * _24;
> > >    char _25;
> > >    unsigned char _ifc__29;
> > > +  unsigned char _30;
> > >
> > >    <bb 2> [local count: 118111600]:
> > >    if (n_10(D) != 0)
> > > @@ -70,7 +71,8 @@ char foo2 (char * a, char * c, int n)
> > >    _5 = c_14(D) + _1;
> > >    _6 = *_5;
> > >    _ifc__29 = _3 == _6 ? 1 : 0;
> > > -  cnt_7 = cnt_18 + _ifc__29;
> > > +  _30 = cnt_18 + _ifc__29;
> > > +  cnt_7 = _30;
> > >    i_16 = i_20 + 1;
> > >    if (n_10(D) != i_16)
> > >      goto <bb 9>; [89.00%]
> > > @@ -110,7 +112,7 @@ char foo2 (char * a, char * c, int n)
> > >    goto <bb 12>; [100.00%]
> > >
> > >    <bb 6> [local count: 105119324]:
> > > -  # cnt_19 = PHI <cnt_7(3), cnt_27(15)>
> > > +  # cnt_19 = PHI <_30(3), cnt_27(15)>
> > >    _21 = (char) cnt_19;
> > >
> > > if we want to eliminate the extra move, gimple_build and
> > > gimple_convert is not suitable since they create a new lhs, is there
> > > any interface like gimple_build_assign but accept stmts?
> >
> > Hmm, OK.  So what you could do is replacing all uses of 'res'
> > with 'rhs', alternatively replacing the last generated stmt LHS
> > by 'res'.  Both is a bit hackish of course.  Usually the vectorizer
> > just ignores copies like this but the reduction matching is
> > unnecessarily strict here (but it's also a bit awkward to fix there).
> >
> > There's redundant_ssa_names which seems to be designed to
> > handle propagating those out and there's the do_rpo_vn run,
> > so I wonder why the stmt remains.  Now, for redundant_ssa_names
> > you'd need to push a std::pair (res, rhs) to it in case rhs is an
> > SSA name - does that help?
> Yes, it worked.
> >
> > Richard.
> > > >
> > > > Richard.
> > > >
> > > > >   gcc/ChangeLog:
> > > > >
> > > > >         PR tree-optimization/pr98365
> > > > >         * tree-if-conv.c (strip_nop_cond_scalar_reduction): New function.
> > > > >         (is_cond_scalar_reduction): Handle nop_expr in cond scalar reduction.
> > > > >         (convert_scalar_cond_reduction): Ditto.
> > > > >         (predicate_scalar_phi): Ditto.
> > > > >
> > > > > gcc/testsuite/ChangeLog:
> > > > >
> > > > >         PR tree-optimization/pr98365
> > > > >         * gcc.target/i386/pr98365.c: New test.
> > > > >
> > > > > --
> > > > > BR,
> > > > > Hongtao
> > >
> > >
> > >
> > > --
> > > BR,
> > > Hongtao
>
>
> Update patch.

+  if (reduction_op == NOP_EXPR)
+    {

CONVERT_EXPR_CODE_P (reduction_op)

OK with that change.

Thanks,
Richard.

> --
> BR,
> Hongtao

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] Extend is_cond_scalar_reduction to handle nop_expr after/before scalar reduction.[PR98365]
  2021-05-31 10:14         ` Richard Biener
@ 2021-06-01  2:04           ` Hongtao Liu
  0 siblings, 0 replies; 7+ messages in thread
From: Hongtao Liu @ 2021-06-01  2:04 UTC (permalink / raw)
  To: Richard Biener; +Cc: GCC Patches

On Mon, May 31, 2021 at 6:14 PM Richard Biener
<richard.guenther@gmail.com> wrote:
>
> On Thu, May 27, 2021 at 9:05 AM Hongtao Liu <crazylht@gmail.com> wrote:
> >
> > On Wed, May 26, 2021 at 8:41 PM Richard Biener
> > <richard.guenther@gmail.com> wrote:
> > >
> > > On Wed, May 26, 2021 at 7:06 AM Hongtao Liu <crazylht@gmail.com> wrote:
> > > >
> > > > On Tue, May 25, 2021 at 6:24 PM Richard Biener
> > > > <richard.guenther@gmail.com> wrote:
> > > > >
> > > > > On Mon, May 24, 2021 at 11:52 AM Hongtao Liu <crazylht@gmail.com> wrote:
> > > > > >
> > > > > > Hi:
> > > > > >   Details described in PR.
> > > > > >   Bootstrapped and regtest on
> > > > > > x86_64-linux-gnu{-m32,}/x86_64-linux-gnu{-m32\
> > > > > > -march=cascadelake,-march=cascadelake}
> > > > > >   Ok for trunk?
> > > > >
> > > > > +static tree
> > > > > +strip_nop_cond_scalar_reduction (bool has_nop, tree op)
> > > > > +{
> > > > > +  if (!has_nop)
> > > > > +    return op;
> > > > > +
> > > > > +  if (TREE_CODE (op) != SSA_NAME)
> > > > > +    return NULL_TREE;
> > > > > +
> > > > > +  gimple* stmt = SSA_NAME_DEF_STMT (op);
> > > > > +  if (!stmt
> > > > > +      || gimple_code (stmt) != GIMPLE_ASSIGN
> > > > > +      || gimple_has_volatile_ops (stmt)
> > > > > +      || gimple_assign_rhs_code (stmt) != NOP_EXPR)
> > > > > +    return NULL_TREE;
> > > > > +
> > > > > +  return gimple_assign_rhs1 (stmt);
> > > > >
> > > > > this allows arbitrary conversions where the comment suggests you
> > > > > only want to allow conversions to the same precision but different sign.
> > > > > Sth like
> > > > >
> > > > >   gassign *stmt = safe_dyn_cast <gassign *> (SSA_NAME_DEF_STMT (op));
> > > > >   if (!stmt
> > > > >       || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt))
> > > > >       || !tree_nop_conversion_p (TREE_TYPE (op), TREE_TYPE
> > > > > (gimple_assign_rhs1 (stmt))))
> > > > >     return NULL_TREE;
> > > > >
> > Changed.
> > > > > +      if (gimple_bb (stmt) != gimple_bb (*nop_reduc)
> > > > > +         || gimple_code (stmt) != GIMPLE_ASSIGN
> > > > > +         || gimple_has_volatile_ops (stmt))
> > > > > +       return false;
> > > > >
> > > > > !is_gimple_assign (stmt) instead of gimple_code (stmt) != GIMPLE_ASSIGN
> > > > >
> > Changed.
> > > > > the gimple_has_volatile_ops check is superfluous given you restrict
> > > > > the assign code.
> > > > >
> > Changed.
> > > > > +      /* Check that R_NOP1 is used in nop_stmt or in PHI only.  */
> > > > > +      FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_nop1)
> > > > > +       {
> > > > > +         gimple *use_stmt = USE_STMT (use_p);
> > > > > +         if (is_gimple_debug (use_stmt))
> > > > > +           continue;
> > > > > +         if (use_stmt == SSA_NAME_DEF_STMT (r_op1))
> > > > > +           continue;
> > > > > +         if (gimple_code (use_stmt) != GIMPLE_PHI)
> > > > > +           return false;
> > > > >
> > > > > can the last check be use_stmt == phi since we should have the
> > > > > PHI readily available?
> > > > >
> > Yes, changed.
> > > > > @@ -1735,6 +1822,23 @@ convert_scalar_cond_reduction (gimple *reduc,
> > > > > gimple_stmt_iterator *gsi,
> > > > >    rhs = fold_build2 (gimple_assign_rhs_code (reduc),
> > > > >                      TREE_TYPE (rhs1), op0, tmp);
> > > > >
> > > > > +  if (has_nop)
> > > > > +    {
> > > > > +      /* Create assignment nop_rhs = op0 +/- _ifc_.  */
> > > > > +      tree nop_rhs = make_temp_ssa_name (TREE_TYPE (rhs1), NULL, "_nop_");
> > > > > +      gimple* new_assign2 = gimple_build_assign (nop_rhs, rhs);
> > > > > +      gsi_insert_before (gsi, new_assign2, GSI_SAME_STMT);
> > > > > +      /* Rebuild rhs for nop_expr.  */
> > > > > +      rhs = fold_build1 (NOP_EXPR,
> > > > > +                        TREE_TYPE (gimple_assign_lhs (nop_reduc)),
> > > > > +                        nop_rhs);
> > > > > +
> > > > > +      /* Delete nop_reduc.  */
> > > > > +      stmt_it = gsi_for_stmt (nop_reduc);
> > > > > +      gsi_remove (&stmt_it, true);
> > > > > +      release_defs (nop_reduc);
> > > > > +    }
> > > > > +
> > > > >
> > > > > hmm, the whole function could be cleaned up with sth like
> > > > >
> > > > >      /* Build rhs for unconditional increment/decrement.  */
> > > > >      gimple_seq stmts = NULL;
> > > > >      rhs = gimple_build (&stmts, gimple_assing_rhs_code (reduc),
> > > > >                                     TREE_TYPE (rhs1), op0, tmp);
> > > > >      if (has_nop)
> > > > >        rhs = gimple_convert (&stmts, TREE_TYPE (gimple_assign_lhs
> > > > > (nop_reduc)), rhs);
> > > > >      gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
> > > > >
> > > > > plus in the caller moving the
> > > > >
> > > > >       new_stmt = gimple_build_assign (res, rhs);
> > > > >       gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
> > > > >
> > > > > to the else branch as well as the folding done on new_stmt (maybe return
> > > > > new_stmt instead of rhs from convert_scalar_cond_reduction.
> > > > Eventually, we needed to assign rhs to res, and with an extra mov stmt
> > > > from rhs to res, the vectorizer failed.
> > > > the only difference in 166t.ifcvt between successfully vectorization
> > > > and failed vectorization is below
> > > >    char * _24;
> > > >    char _25;
> > > >    unsigned char _ifc__29;
> > > > +  unsigned char _30;
> > > >
> > > >    <bb 2> [local count: 118111600]:
> > > >    if (n_10(D) != 0)
> > > > @@ -70,7 +71,8 @@ char foo2 (char * a, char * c, int n)
> > > >    _5 = c_14(D) + _1;
> > > >    _6 = *_5;
> > > >    _ifc__29 = _3 == _6 ? 1 : 0;
> > > > -  cnt_7 = cnt_18 + _ifc__29;
> > > > +  _30 = cnt_18 + _ifc__29;
> > > > +  cnt_7 = _30;
> > > >    i_16 = i_20 + 1;
> > > >    if (n_10(D) != i_16)
> > > >      goto <bb 9>; [89.00%]
> > > > @@ -110,7 +112,7 @@ char foo2 (char * a, char * c, int n)
> > > >    goto <bb 12>; [100.00%]
> > > >
> > > >    <bb 6> [local count: 105119324]:
> > > > -  # cnt_19 = PHI <cnt_7(3), cnt_27(15)>
> > > > +  # cnt_19 = PHI <_30(3), cnt_27(15)>
> > > >    _21 = (char) cnt_19;
> > > >
> > > > if we want to eliminate the extra move, gimple_build and
> > > > gimple_convert is not suitable since they create a new lhs, is there
> > > > any interface like gimple_build_assign but accept stmts?
> > >
> > > Hmm, OK.  So what you could do is replacing all uses of 'res'
> > > with 'rhs', alternatively replacing the last generated stmt LHS
> > > by 'res'.  Both is a bit hackish of course.  Usually the vectorizer
> > > just ignores copies like this but the reduction matching is
> > > unnecessarily strict here (but it's also a bit awkward to fix there).
> > >
> > > There's redundant_ssa_names which seems to be designed to
> > > handle propagating those out and there's the do_rpo_vn run,
> > > so I wonder why the stmt remains.  Now, for redundant_ssa_names
> > > you'd need to push a std::pair (res, rhs) to it in case rhs is an
> > > SSA name - does that help?
> > Yes, it worked.
> > >
> > > Richard.
> > > > >
> > > > > Richard.
> > > > >
> > > > > >   gcc/ChangeLog:
> > > > > >
> > > > > >         PR tree-optimization/pr98365
> > > > > >         * tree-if-conv.c (strip_nop_cond_scalar_reduction): New function.
> > > > > >         (is_cond_scalar_reduction): Handle nop_expr in cond scalar reduction.
> > > > > >         (convert_scalar_cond_reduction): Ditto.
> > > > > >         (predicate_scalar_phi): Ditto.
> > > > > >
> > > > > > gcc/testsuite/ChangeLog:
> > > > > >
> > > > > >         PR tree-optimization/pr98365
> > > > > >         * gcc.target/i386/pr98365.c: New test.
> > > > > >
> > > > > > --
> > > > > > BR,
> > > > > > Hongtao
> > > >
> > > >
> > > >
> > > > --
> > > > BR,
> > > > Hongtao
> >
> >
> > Update patch.
>
> +  if (reduction_op == NOP_EXPR)
> +    {
>
> CONVERT_EXPR_CODE_P (reduction_op)
>
> OK with that change.
>

Thanks for the review.

> Thanks,
> Richard.
>
> > --
> > BR,
> > Hongtao



-- 
BR,
Hongtao

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2021-06-01  2:00 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-05-24  9:56 [PATCH] Extend is_cond_scalar_reduction to handle nop_expr after/before scalar reduction.[PR98365] Hongtao Liu
2021-05-25 10:24 ` Richard Biener
2021-05-26  5:10   ` Hongtao Liu
2021-05-26 12:41     ` Richard Biener
2021-05-27  7:09       ` Hongtao Liu
2021-05-31 10:14         ` Richard Biener
2021-06-01  2:04           ` Hongtao Liu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).