From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1666) id E57DE3858C66; Thu, 12 Jan 2023 13:30:45 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org E57DE3858C66 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1673530245; bh=P5ZQr1lCEZAr2lHo0yNIdJrM78R4IWWywn6ret5xRZk=; h=From:To:Subject:Date:From; b=EObRVI9BDZqMhwpHOvQ2mX1lJ0Lvwq4jGmSi6ojmBa1vl1xLxvGcVhpaDcKBo07V6 W07cqogCes1X0HXRWiIelbwlnf0v6KjlDyXmi8ImL7z4IVWC0H0WOaNmD5GHeSq1XL qlsT8/DKAnWrgMFUCUCnR21dSW2PUpDIZWFg3fDg= MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset="utf-8" From: Richard Biener To: gcc-cvs@gcc.gnu.org Subject: [gcc r13-5122] tree-optimization/99412 - reassoc and reduction chains X-Act-Checkin: gcc X-Git-Author: Richard Biener X-Git-Refname: refs/heads/master X-Git-Oldrev: 117be79bd84ed21b47588d0cd86d72d5d1757cae X-Git-Newrev: b073f2b098ba7819450d6c14a0fb96cb1c09f242 Message-Id: <20230112133045.E57DE3858C66@sourceware.org> Date: Thu, 12 Jan 2023 13:30:45 +0000 (GMT) List-Id: https://gcc.gnu.org/g:b073f2b098ba7819450d6c14a0fb96cb1c09f242 commit r13-5122-gb073f2b098ba7819450d6c14a0fb96cb1c09f242 Author: Richard Biener Date: Thu Jan 12 11:18:22 2023 +0100 tree-optimization/99412 - reassoc and reduction chains With -ffast-math we end up associating reduction chains and break them - this is because of old code that tries to rectify reductions into a shape likened by the vectorizer. Nowadays the rank compute produces correct association for reduction chains and the vectorizer has robust support to fall back to a regular reductions (via reduction path) when it turns out to be not a proper reduction chain. So this patch removes the special code in reassoc which makes the TSVC s352 vectorized with -Ofast (it is already without -ffast-math). PR tree-optimization/99412 * tree-ssa-reassoc.cc (is_phi_for_stmt): Remove. (swap_ops_for_binary_stmt): Remove reduction handling. (rewrite_expr_tree_parallel): Adjust. (reassociate_bb): Likewise. * tree-parloops.cc (build_new_reduction): Handle MINUS_EXPR. * gcc.dg/vect/pr99412.c: New testcase. * gcc.dg/tree-ssa/reassoc-47.c: Adjust comment. * gcc.dg/tree-ssa/reassoc-48.c: Remove. Diff: --- gcc/testsuite/gcc.dg/tree-ssa/reassoc-47.c | 4 +- gcc/testsuite/gcc.dg/tree-ssa/reassoc-48.c | 9 ----- gcc/testsuite/gcc.dg/vect/pr99412.c | 24 ++++++++++++ gcc/tree-parloops.cc | 3 ++ gcc/tree-ssa-reassoc.cc | 62 +++--------------------------- 5 files changed, 35 insertions(+), 67 deletions(-) diff --git a/gcc/testsuite/gcc.dg/tree-ssa/reassoc-47.c b/gcc/testsuite/gcc.dg/tree-ssa/reassoc-47.c index 1b0f0fdabe1..cd2cc740f6d 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/reassoc-47.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/reassoc-47.c @@ -4,6 +4,6 @@ #define MODIFY #include "reassoc-46.h" -/* Check that if the loop accumulator is saved into a global variable, it's - still added last. */ +/* Check that if the loop accumulator is modified using a chain of operations + other than addition, its new value is still added last. */ /* { dg-final { scan-tree-dump-times {(?:vect_)?sum_[\d._]+ = (?:(?:vect_)?_[\d._]+ \+ (?:vect_)?sum_[\d._]+|(?:vect_)?sum_[\d._]+ \+ (?:vect_)?_[\d._]+)} 1 "optimized" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/reassoc-48.c b/gcc/testsuite/gcc.dg/tree-ssa/reassoc-48.c deleted file mode 100644 index 13836ebe8e6..00000000000 --- a/gcc/testsuite/gcc.dg/tree-ssa/reassoc-48.c +++ /dev/null @@ -1,9 +0,0 @@ -/* { dg-do compile } */ -/* { dg-options "-O2 -fdump-tree-optimized -ftree-vectorize" } */ - -#define STORE -#include "reassoc-46.h" - -/* Check that if the loop accumulator is modified using a chain of operations - other than addition, its new value is still added last. */ -/* { dg-final { scan-tree-dump-times {(?:vect_)?sum_[\d._]+ = (?:(?:vect_)?_[\d._]+ \+ (?:vect_)?sum_[\d._]+|(?:vect_)?sum_[\d._]+ \+ (?:vect_)?_[\d._]+)} 1 "optimized" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr99412.c b/gcc/testsuite/gcc.dg/vect/pr99412.c new file mode 100644 index 00000000000..e3e94a052ca --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr99412.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-Ofast --param vect-epilogues-nomask=0" } */ +/* { dg-require-effective-target vect_float } */ + +/* From TSVC s352. */ + +typedef float real_t; + +#define LEN_1D 32000 +#define LEN_2D 256 + +real_t a[LEN_1D],b[LEN_1D]; +real_t foo () +{ + real_t dot = (real_t)0.; + for (int i = 0; i < LEN_1D; i += 5) { + dot = dot + a[i] * b[i] + a[i + 1] * b[i + 1] + a[i + 2] + * b[i + 2] + a[i + 3] * b[i + 3] + a[i + 4] * b[i + 4]; + } + + return dot; +} + +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */ diff --git a/gcc/tree-parloops.cc b/gcc/tree-parloops.cc index 4f92c4be7d3..dfb75c369d6 100644 --- a/gcc/tree-parloops.cc +++ b/gcc/tree-parloops.cc @@ -3228,6 +3228,9 @@ build_new_reduction (reduction_info_table_type *reduction_list, /* Check for OpenMP supported reduction. */ switch (reduction_code) { + case MINUS_EXPR: + reduction_code = PLUS_EXPR; + /* Fallthru. */ case PLUS_EXPR: case MULT_EXPR: case MAX_EXPR: diff --git a/gcc/tree-ssa-reassoc.cc b/gcc/tree-ssa-reassoc.cc index 580ec0ee0eb..5522a3ada8e 100644 --- a/gcc/tree-ssa-reassoc.cc +++ b/gcc/tree-ssa-reassoc.cc @@ -5117,35 +5117,6 @@ maybe_optimize_range_tests (gimple *stmt) return cfg_cleanup_needed; } -/* Return true if OPERAND is defined by a PHI node which uses the LHS - of STMT in it's operands. This is also known as a "destructive - update" operation. */ - -static bool -is_phi_for_stmt (gimple *stmt, tree operand) -{ - gimple *def_stmt; - gphi *def_phi; - tree lhs; - use_operand_p arg_p; - ssa_op_iter i; - - if (TREE_CODE (operand) != SSA_NAME) - return false; - - lhs = gimple_assign_lhs (stmt); - - def_stmt = SSA_NAME_DEF_STMT (operand); - def_phi = dyn_cast (def_stmt); - if (!def_phi) - return false; - - FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE) - if (lhs == USE_FROM_PTR (arg_p)) - return true; - return false; -} - /* Remove def stmt of VAR if VAR has zero uses and recurse on rhs1 operand if so. */ @@ -5177,24 +5148,11 @@ remove_visited_stmt_chain (tree var) swaps two operands if it is profitable for binary operation consuming OPINDEX + 1 abnd OPINDEX + 2 operands. - We pair ops with the same rank if possible. - - The alternative we try is to see if STMT is a destructive - update style statement, which is like: - b = phi (a, ...) - a = c + b; - In that case, we want to use the destructive update form to - expose the possible vectorizer sum reduction opportunity. - In that case, the third operand will be the phi node. This - check is not performed if STMT is null. - - We could, of course, try to be better as noted above, and do a - lot of work to try to find these opportunities in >3 operand - cases, but it is unlikely to be worth it. */ + We pair ops with the same rank if possible. */ static void swap_ops_for_binary_stmt (const vec &ops, - unsigned int opindex, gimple *stmt) + unsigned int opindex) { operand_entry *oe1, *oe2, *oe3; @@ -5202,17 +5160,9 @@ swap_ops_for_binary_stmt (const vec &ops, oe2 = ops[opindex + 1]; oe3 = ops[opindex + 2]; - if ((oe1->rank == oe2->rank - && oe2->rank != oe3->rank) - || (stmt && is_phi_for_stmt (stmt, oe3->op) - && !is_phi_for_stmt (stmt, oe1->op) - && !is_phi_for_stmt (stmt, oe2->op))) + if (oe1->rank == oe2->rank && oe2->rank != oe3->rank) std::swap (*oe1, *oe3); - else if ((oe1->rank == oe3->rank - && oe2->rank != oe3->rank) - || (stmt && is_phi_for_stmt (stmt, oe2->op) - && !is_phi_for_stmt (stmt, oe1->op) - && !is_phi_for_stmt (stmt, oe3->op))) + else if (oe1->rank == oe3->rank && oe2->rank != oe3->rank) std::swap (*oe1, *oe2); } @@ -5561,7 +5511,7 @@ rewrite_expr_tree_parallel (gassign *stmt, int width, else { if (op_index > 1) - swap_ops_for_binary_stmt (ops, op_index - 2, NULL); + swap_ops_for_binary_stmt (ops, op_index - 2); operand_entry *oe2 = ops[op_index--]; operand_entry *oe1 = ops[op_index--]; op2 = oe2->op; @@ -6877,7 +6827,7 @@ reassociate_bb (basic_block bb) binary op are chosen wisely. */ int len = ops.length (); if (len >= 3) - swap_ops_for_binary_stmt (ops, len - 3, stmt); + swap_ops_for_binary_stmt (ops, len - 3); new_lhs = rewrite_expr_tree (stmt, rhs_code, 0, ops, powi_result != NULL