* [PATCH] tree-optimization/99412 - reassoc and reduction chains
@ 2023-01-12 13:03 Richard Biener
0 siblings, 0 replies; only message in thread
From: Richard Biener @ 2023-01-12 13:03 UTC (permalink / raw)
To: gcc-patches
With -ffast-math we end up associating reduction chains and break
them - this is because of old code that tries to rectify reductions
into a shape likened by the vectorizer. Nowadays the rank compute
produces correct association for reduction chains and the vectorizer
has robust support to fall back to a regular reductions (via
reduction path) when it turns out to be not a proper reduction chain.
So this patch removes the special code in reassoc which makes
the TSVC s352 vectorized with -Ofast (it is already without
-ffast-math).
Bootstrapped and tested on x86_64-unknown-linux-gnu, I'm going to
push this with the option to revert if it turns out my vectorizer
expectation is wrong.
PR tree-optimization/99412
* tree-ssa-reassoc.cc (is_phi_for_stmt): Remove.
(swap_ops_for_binary_stmt): Remove reduction handling.
(rewrite_expr_tree_parallel): Adjust.
(reassociate_bb): Likewise.
* tree-parloops.cc (build_new_reduction): Handle MINUS_EXPR.
* gcc.dg/vect/pr99412.c: New testcase.
* gcc.dg/tree-ssa/reassoc-47.c: Adjust comment.
* gcc.dg/tree-ssa/reassoc-48.c: Remove.
---
gcc/testsuite/gcc.dg/tree-ssa/reassoc-47.c | 4 +-
gcc/testsuite/gcc.dg/tree-ssa/reassoc-48.c | 9 ----
gcc/testsuite/gcc.dg/vect/pr99412.c | 24 +++++++++
gcc/tree-parloops.cc | 3 ++
gcc/tree-ssa-reassoc.cc | 62 +++-------------------
5 files changed, 35 insertions(+), 67 deletions(-)
delete mode 100644 gcc/testsuite/gcc.dg/tree-ssa/reassoc-48.c
create mode 100644 gcc/testsuite/gcc.dg/vect/pr99412.c
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/reassoc-47.c b/gcc/testsuite/gcc.dg/tree-ssa/reassoc-47.c
index 1b0f0fdabe1..cd2cc740f6d 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/reassoc-47.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/reassoc-47.c
@@ -4,6 +4,6 @@
#define MODIFY
#include "reassoc-46.h"
-/* Check that if the loop accumulator is saved into a global variable, it's
- still added last. */
+/* Check that if the loop accumulator is modified using a chain of operations
+ other than addition, its new value is still added last. */
/* { dg-final { scan-tree-dump-times {(?:vect_)?sum_[\d._]+ = (?:(?:vect_)?_[\d._]+ \+ (?:vect_)?sum_[\d._]+|(?:vect_)?sum_[\d._]+ \+ (?:vect_)?_[\d._]+)} 1 "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/reassoc-48.c b/gcc/testsuite/gcc.dg/tree-ssa/reassoc-48.c
deleted file mode 100644
index 13836ebe8e6..00000000000
--- a/gcc/testsuite/gcc.dg/tree-ssa/reassoc-48.c
+++ /dev/null
@@ -1,9 +0,0 @@
-/* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-optimized -ftree-vectorize" } */
-
-#define STORE
-#include "reassoc-46.h"
-
-/* Check that if the loop accumulator is modified using a chain of operations
- other than addition, its new value is still added last. */
-/* { dg-final { scan-tree-dump-times {(?:vect_)?sum_[\d._]+ = (?:(?:vect_)?_[\d._]+ \+ (?:vect_)?sum_[\d._]+|(?:vect_)?sum_[\d._]+ \+ (?:vect_)?_[\d._]+)} 1 "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr99412.c b/gcc/testsuite/gcc.dg/vect/pr99412.c
new file mode 100644
index 00000000000..e3e94a052ca
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr99412.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-Ofast --param vect-epilogues-nomask=0" } */
+/* { dg-require-effective-target vect_float } */
+
+/* From TSVC s352. */
+
+typedef float real_t;
+
+#define LEN_1D 32000
+#define LEN_2D 256
+
+real_t a[LEN_1D],b[LEN_1D];
+real_t foo ()
+{
+ real_t dot = (real_t)0.;
+ for (int i = 0; i < LEN_1D; i += 5) {
+ dot = dot + a[i] * b[i] + a[i + 1] * b[i + 1] + a[i + 2]
+ * b[i + 2] + a[i + 3] * b[i + 3] + a[i + 4] * b[i + 4];
+ }
+
+ return dot;
+}
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
diff --git a/gcc/tree-parloops.cc b/gcc/tree-parloops.cc
index 4f92c4be7d3..dfb75c369d6 100644
--- a/gcc/tree-parloops.cc
+++ b/gcc/tree-parloops.cc
@@ -3228,6 +3228,9 @@ build_new_reduction (reduction_info_table_type *reduction_list,
/* Check for OpenMP supported reduction. */
switch (reduction_code)
{
+ case MINUS_EXPR:
+ reduction_code = PLUS_EXPR;
+ /* Fallthru. */
case PLUS_EXPR:
case MULT_EXPR:
case MAX_EXPR:
diff --git a/gcc/tree-ssa-reassoc.cc b/gcc/tree-ssa-reassoc.cc
index 580ec0ee0eb..5522a3ada8e 100644
--- a/gcc/tree-ssa-reassoc.cc
+++ b/gcc/tree-ssa-reassoc.cc
@@ -5117,35 +5117,6 @@ maybe_optimize_range_tests (gimple *stmt)
return cfg_cleanup_needed;
}
-/* Return true if OPERAND is defined by a PHI node which uses the LHS
- of STMT in it's operands. This is also known as a "destructive
- update" operation. */
-
-static bool
-is_phi_for_stmt (gimple *stmt, tree operand)
-{
- gimple *def_stmt;
- gphi *def_phi;
- tree lhs;
- use_operand_p arg_p;
- ssa_op_iter i;
-
- if (TREE_CODE (operand) != SSA_NAME)
- return false;
-
- lhs = gimple_assign_lhs (stmt);
-
- def_stmt = SSA_NAME_DEF_STMT (operand);
- def_phi = dyn_cast <gphi *> (def_stmt);
- if (!def_phi)
- return false;
-
- FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE)
- if (lhs == USE_FROM_PTR (arg_p))
- return true;
- return false;
-}
-
/* Remove def stmt of VAR if VAR has zero uses and recurse
on rhs1 operand if so. */
@@ -5177,24 +5148,11 @@ remove_visited_stmt_chain (tree var)
swaps two operands if it is profitable for binary operation
consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
- We pair ops with the same rank if possible.
-
- The alternative we try is to see if STMT is a destructive
- update style statement, which is like:
- b = phi (a, ...)
- a = c + b;
- In that case, we want to use the destructive update form to
- expose the possible vectorizer sum reduction opportunity.
- In that case, the third operand will be the phi node. This
- check is not performed if STMT is null.
-
- We could, of course, try to be better as noted above, and do a
- lot of work to try to find these opportunities in >3 operand
- cases, but it is unlikely to be worth it. */
+ We pair ops with the same rank if possible. */
static void
swap_ops_for_binary_stmt (const vec<operand_entry *> &ops,
- unsigned int opindex, gimple *stmt)
+ unsigned int opindex)
{
operand_entry *oe1, *oe2, *oe3;
@@ -5202,17 +5160,9 @@ swap_ops_for_binary_stmt (const vec<operand_entry *> &ops,
oe2 = ops[opindex + 1];
oe3 = ops[opindex + 2];
- if ((oe1->rank == oe2->rank
- && oe2->rank != oe3->rank)
- || (stmt && is_phi_for_stmt (stmt, oe3->op)
- && !is_phi_for_stmt (stmt, oe1->op)
- && !is_phi_for_stmt (stmt, oe2->op)))
+ if (oe1->rank == oe2->rank && oe2->rank != oe3->rank)
std::swap (*oe1, *oe3);
- else if ((oe1->rank == oe3->rank
- && oe2->rank != oe3->rank)
- || (stmt && is_phi_for_stmt (stmt, oe2->op)
- && !is_phi_for_stmt (stmt, oe1->op)
- && !is_phi_for_stmt (stmt, oe3->op)))
+ else if (oe1->rank == oe3->rank && oe2->rank != oe3->rank)
std::swap (*oe1, *oe2);
}
@@ -5561,7 +5511,7 @@ rewrite_expr_tree_parallel (gassign *stmt, int width,
else
{
if (op_index > 1)
- swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
+ swap_ops_for_binary_stmt (ops, op_index - 2);
operand_entry *oe2 = ops[op_index--];
operand_entry *oe1 = ops[op_index--];
op2 = oe2->op;
@@ -6877,7 +6827,7 @@ reassociate_bb (basic_block bb)
binary op are chosen wisely. */
int len = ops.length ();
if (len >= 3)
- swap_ops_for_binary_stmt (ops, len - 3, stmt);
+ swap_ops_for_binary_stmt (ops, len - 3);
new_lhs = rewrite_expr_tree (stmt, rhs_code, 0, ops,
powi_result != NULL
--
2.35.3
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2023-01-12 13:03 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-01-12 13:03 [PATCH] tree-optimization/99412 - reassoc and reduction chains Richard Biener
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).