public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r12-8195] tree-optimization/104010 - fix SLP scalar costing with patterns
@ 2022-04-19 14:42 Richard Biener
  0 siblings, 0 replies; only message in thread
From: Richard Biener @ 2022-04-19 14:42 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:353434b65ef7972172597d232ae17022d9a57244

commit r12-8195-g353434b65ef7972172597d232ae17022d9a57244
Author: Richard Biener <rguenther@suse.de>
Date:   Wed Apr 13 13:49:45 2022 +0200

    tree-optimization/104010 - fix SLP scalar costing with patterns
    
    When doing BB vectorization the scalar cost compute is derailed
    by patterns, causing lanes to be considered live and thus not
    costed on the scalar side.  For the testcase in PR104010 this
    prevents vectorization which was done by GCC 11.  PR103941
    shows similar cases of missed optimizations that are fixed by
    this patch.
    
    2022-04-13  Richard Biener  <rguenther@suse.de>
    
            PR tree-optimization/104010
            PR tree-optimization/103941
            * tree-vect-slp.cc (vect_bb_slp_scalar_cost): When
            we run into stmts in patterns continue walking those
            for uses outside of the vectorized region instead of
            marking the lane live.
    
            * gcc.target/i386/pr103941-1.c: New testcase.
            * gcc.target/i386/pr103941-2.c: Likewise.

Diff:
---
 gcc/testsuite/gcc.target/i386/pr103941-1.c | 14 +++++++++
 gcc/testsuite/gcc.target/i386/pr103941-2.c | 12 ++++++++
 gcc/tree-vect-slp.cc                       | 48 ++++++++++++++++++++++--------
 3 files changed, 62 insertions(+), 12 deletions(-)

diff --git a/gcc/testsuite/gcc.target/i386/pr103941-1.c b/gcc/testsuite/gcc.target/i386/pr103941-1.c
new file mode 100644
index 00000000000..524fdd0b4b1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr103941-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2" } */
+
+unsigned char ur[16], ua[16], ub[16];
+
+void avgu_v2qi (void)
+{
+  int i;
+
+  for (i = 0; i < 2; i++)
+    ur[i] = (ua[i] + ub[i] + 1) >> 1;
+}
+
+/* { dg-final { scan-assembler "pavgb" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr103941-2.c b/gcc/testsuite/gcc.target/i386/pr103941-2.c
new file mode 100644
index 00000000000..972a32be997
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr103941-2.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2" } */
+
+void foo (int *c, float *x, float *y)
+{
+  c[0] = x[0] < y[0];
+  c[1] = x[1] < y[1];
+  c[2] = x[2] < y[2];
+  c[3] = x[3] < y[3];
+}
+
+/* { dg-final { scan-assembler "cmpltps" } } */
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 4ac2b70303c..805dd7e10e2 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -5185,22 +5185,46 @@ vect_bb_slp_scalar_cost (vec_info *vinfo,
 	 the scalar cost.  */
       if (!STMT_VINFO_LIVE_P (stmt_info))
 	{
-	  FOR_EACH_PHI_OR_STMT_DEF (def_p, orig_stmt, op_iter, SSA_OP_DEF)
+	  auto_vec<gimple *, 8> worklist;
+	  hash_set<gimple *> *worklist_visited = NULL;
+	  worklist.quick_push (orig_stmt);
+	  do
 	    {
-	      imm_use_iterator use_iter;
-	      gimple *use_stmt;
-	      FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
-		if (!is_gimple_debug (use_stmt))
-		  {
-		    stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
-		    if (!use_stmt_info
-			|| !vectorized_scalar_stmts.contains (use_stmt_info))
+	      gimple *work_stmt = worklist.pop ();
+	      FOR_EACH_PHI_OR_STMT_DEF (def_p, work_stmt, op_iter, SSA_OP_DEF)
+		{
+		  imm_use_iterator use_iter;
+		  gimple *use_stmt;
+		  FOR_EACH_IMM_USE_STMT (use_stmt, use_iter,
+					 DEF_FROM_PTR (def_p))
+		    if (!is_gimple_debug (use_stmt))
 		      {
-			(*life)[i] = true;
-			break;
+			stmt_vec_info use_stmt_info
+			  = vinfo->lookup_stmt (use_stmt);
+			if (!use_stmt_info
+			    || !vectorized_scalar_stmts.contains (use_stmt_info))
+			  {
+			    if (use_stmt_info
+				&& STMT_VINFO_IN_PATTERN_P (use_stmt_info))
+			      {
+				/* For stmts participating in patterns we have
+				   to check its uses recursively.  */
+				if (!worklist_visited)
+				  worklist_visited = new hash_set<gimple *> ();
+				if (!worklist_visited->add (use_stmt))
+				  worklist.safe_push (use_stmt);
+				continue;
+			      }
+			    (*life)[i] = true;
+			    goto next_lane;
+			  }
 		      }
-		  }
+		}
 	    }
+	  while (!worklist.is_empty ());
+next_lane:
+	  if (worklist_visited)
+	    delete worklist_visited;
 	  if ((*life)[i])
 	    continue;
 	}


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2022-04-19 14:42 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-04-19 14:42 [gcc r12-8195] tree-optimization/104010 - fix SLP scalar costing with patterns Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).