public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] tree-optimization/97832 - avoid breaking linearization in reassoc
@ 2020-11-17 10:49 Richard Biener
  0 siblings, 0 replies; only message in thread
From: Richard Biener @ 2020-11-17 10:49 UTC (permalink / raw)
  To: gcc-patches; +Cc: jakub

The first reassoc pass is supposed to strictly linearize expressions
but re-propagation of negates can break this in case it is faced
with (-a + -b) + c which it turns into c - (a + b).  The root-cause
is swap_ops_for_binary_stmt which prematurely associates two negates
together.  The following patch avoids this and allows the two new
testcases to be SLP vectorized - previously reassoc made the
expression trees not match up for SLP discovery.

This fixes the reassoc part of the PR.

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

OK?

Thanks,
Richard.

2020-11-17  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/97832
	* tree-ssa-reassoc.c (op_negated_p): New function.
	(swap_ops_for_binary_stmt): Avoid swapping two negated operands
	together.
	(get_rank): Simplify.

	* gcc.dg/vect/pr97832-1.c: New testcase.
	* gcc.dg/vect/pr97832-2.c: Likewise.
---
 gcc/testsuite/gcc.dg/vect/pr97832-1.c | 17 +++++++++++++++
 gcc/testsuite/gcc.dg/vect/pr97832-2.c | 29 ++++++++++++++++++++++++++
 gcc/tree-ssa-reassoc.c                | 30 ++++++++++++++++++++++-----
 3 files changed, 71 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr97832-1.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr97832-2.c

diff --git a/gcc/testsuite/gcc.dg/vect/pr97832-1.c b/gcc/testsuite/gcc.dg/vect/pr97832-1.c
new file mode 100644
index 00000000000..063fc7bd717
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr97832-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-Ofast" } */
+/* { dg-require-effective-target vect_double } */
+
+double a[1024], b[1024], c[1024];
+
+void foo()
+{
+  for (int i = 0; i < 256; ++i)
+    {
+      a[2*i] = a[2*i] + b[2*i] - c[2*i];
+      a[2*i+1] = a[2*i+1] - b[2*i+1] - c[2*i+1];
+    }
+}
+
+/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" } } */
+/* { dg-final { scan-tree-dump "Loop contains only SLP stmts" "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr97832-2.c b/gcc/testsuite/gcc.dg/vect/pr97832-2.c
new file mode 100644
index 00000000000..4f0578120ee
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr97832-2.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-Ofast" } */
+/* { dg-require-effective-target vect_double } */
+
+void foo1x1(double* restrict y, const double* restrict x, int clen)
+{
+  int xi = clen & 2;
+  double f_re = x[0+xi+0];
+  double f_im = x[4+xi+0];
+  int clen2 = (clen+xi) * 2;
+#pragma GCC unroll 0
+  for (int c = 0; c < clen2; c += 8) {
+    // y[c] = y[c] - x[c]*conj(f);
+#pragma GCC unroll 4
+    for (int k = 0; k < 4; ++k) {
+      double x_re = x[c+0+k];
+      double x_im = x[c+4+k];
+      double y_re = y[c+0+k];
+      double y_im = y[c+4+k];
+      y_re = y_re - x_re * f_re - x_im * f_im;;
+      y_im = y_im + x_re * f_im - x_im * f_re;
+      y[c+0+k] = y_re;
+      y[c+4+k] = y_im;
+    }
+  }
+}
+
+/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" } } */
+/* { dg-final { scan-tree-dump "Loop contains only SLP stmts" "vect" } } */
diff --git a/gcc/tree-ssa-reassoc.c b/gcc/tree-ssa-reassoc.c
index a2ca1713d4b..4b728ee69de 100644
--- a/gcc/tree-ssa-reassoc.c
+++ b/gcc/tree-ssa-reassoc.c
@@ -450,16 +450,18 @@ get_rank (tree e)
       FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
 	rank = propagate_rank (rank, op);
 
+      rank += 1;
+
       if (dump_file && (dump_flags & TDF_DETAILS))
 	{
 	  fprintf (dump_file, "Rank for ");
 	  print_generic_expr (dump_file, e);
-	  fprintf (dump_file, " is %ld\n", (rank + 1));
+	  fprintf (dump_file, " is %ld\n", rank);
 	}
 
       /* Note the rank in the hashtable so we don't recompute it.  */
-      insert_operand_rank (e, (rank + 1));
-      return (rank + 1);
+      insert_operand_rank (e, rank);
+      return rank;
     }
 
   /* Constants, globals, etc., are rank 0 */
@@ -4890,6 +4892,18 @@ remove_visited_stmt_chain (tree var)
     }
 }
 
+/* Return true if OE is negated.  */
+
+static bool
+op_negated_p (operand_entry *oe)
+{
+  if (TREE_CODE (oe->op) != SSA_NAME)
+    return false;
+  if (gassign *ass = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (oe->op)))
+    return gimple_assign_rhs_code (ass) == NEGATE_EXPR;
+  return false;
+}
+
 /* This function checks three consequtive operands in
    passed operands vector OPS starting from OPINDEX and
    swaps two operands if it is profitable for binary operation
@@ -4921,13 +4935,19 @@ swap_ops_for_binary_stmt (vec<operand_entry *> ops,
   oe3 = ops[opindex + 2];
 
   if ((oe1->rank == oe2->rank
-       && oe2->rank != oe3->rank)
+       && oe2->rank != oe3->rank
+       /* Avoid associating two negated operands together, this
+	  undoes linearization during negate repropagation.  */
+       && (!(op_negated_p (oe1) && op_negated_p (oe2))
+	   || op_negated_p (oe3)))
       || (stmt && is_phi_for_stmt (stmt, oe3->op)
 	  && !is_phi_for_stmt (stmt, oe1->op)
 	  && !is_phi_for_stmt (stmt, oe2->op)))
     std::swap (*oe1, *oe3);
   else if ((oe1->rank == oe3->rank
-	    && oe2->rank != oe3->rank)
+	    && oe2->rank != oe3->rank
+	    && (!(op_negated_p (oe1) && op_negated_p (oe3))
+		|| op_negated_p (oe2)))
 	   || (stmt && is_phi_for_stmt (stmt, oe2->op)
 	       && !is_phi_for_stmt (stmt, oe1->op)
 	       && !is_phi_for_stmt (stmt, oe3->op)))
-- 
2.26.2

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2020-11-17 10:49 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-11-17 10:49 [PATCH] tree-optimization/97832 - avoid breaking linearization in reassoc Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).