From: Richard Guenther <rguenther@suse.de>
To: gcc-patches@gcc.gnu.org
Cc: Diego Novillo <dnovillo@google.com>,
Daniel Berlin <dberlin@dberlin.org>
Subject: [PATCH] Implement un-distribution of multiplication and division in the re-association pass
Date: Tue, 29 Apr 2008 16:38:00 -0000 [thread overview]
Message-ID: <Pine.LNX.4.64.0804291659390.4130@zhemvz.fhfr.qr> (raw)
Currently only fold can optimize a * b + c * b to (a + c) * b saving one
multiplication. The tree re-association pass can be extended to do this
"un-distribution" thereby integrating re-association and un-distribution
tightly enough to also optimize cases like
int test1 (int x, int y, int z, int weight)
{
int tmp1 = x * weight;
int tmp2 = y * weight;
int tmp3 = (x - y) * weight;
return tmp1 + (tmp2 + tmp3);
}
I realize this patch makes tree-ssa-reassoc.c more like a weak
tree-combiner, but un-distribution is closely enough related to
re-association that I didn't care. The integration of this (and further
similar) optimization might also benefit from an onverhaul of the
passes internal data structures, but it fits the current implementation
of the existing optimizations well enough.
Bootstrapped and tested on x86_64-unknown-linux-gnu, ok for mainline?
Thanks,
Richard.
2008-04-29 Richard Guenther <rguenther@suse.de>
PR tree-optimization/15255
* tree-ssa-reassoc.c (undistribute_ops_list): Perform
un-distribution of multiplication and division on the
chain of summands.
(should_break_up_subtract): Also break up subtracts for
factors.
(reassociate_bb): Call undistribute_ops_list.
* gcc.dg/tree-ssa/reassoc-14.c: New testcase.
* gcc.dg/tree-ssa/reassoc-15.c: Likewise.
* gcc.dg/tree-ssa/reassoc-16.c: Likewise.
Index: gcc/tree-ssa-reassoc.c
===================================================================
*** gcc/tree-ssa-reassoc.c.orig 2008-04-28 17:23:24.000000000 +0200
--- gcc/tree-ssa-reassoc.c 2008-04-29 11:32:03.000000000 +0200
*************** eliminate_using_constants (enum tree_cod
*** 722,727 ****
--- 722,848 ----
}
}
+ /* Perform un-distribution of operations. A * X + B * X -> (A + B) * X. */
+
+ static void
+ undistribute_ops_list (enum tree_code opcode,
+ VEC (operand_entry_t, heap) **ops)
+ {
+ unsigned int length = VEC_length (operand_entry_t, *ops);
+ operand_entry_t oe1, oe2;
+ unsigned i, j;
+
+ if (length <= 1
+ || opcode != PLUS_EXPR)
+ return;
+
+ for (i = 0; VEC_iterate (operand_entry_t, *ops, i, oe1); ++i)
+ {
+ enum tree_code dcode;
+ tree oe1def;
+
+ if (TREE_CODE (oe1->op) != SSA_NAME)
+ continue;
+ oe1def = SSA_NAME_DEF_STMT (oe1->op);
+ if (TREE_CODE (oe1def) != GIMPLE_MODIFY_STMT)
+ continue;
+ dcode = TREE_CODE (GIMPLE_STMT_OPERAND (oe1def, 1));
+ if (dcode != MULT_EXPR
+ && dcode != RDIV_EXPR)
+ continue;
+
+ for (j = i + 1; VEC_iterate (operand_entry_t, *ops, j, oe2); ++j)
+ {
+ tree oe2def;
+ tree op10, op11, op20, op21, tmp, tmpvar, op;
+ block_stmt_iterator bsi;
+
+ if (TREE_CODE (oe2->op) != SSA_NAME)
+ continue;
+ oe2def = SSA_NAME_DEF_STMT (oe2->op);
+ if (TREE_CODE (oe2def) != GIMPLE_MODIFY_STMT)
+ continue;
+ if (TREE_CODE (GIMPLE_STMT_OPERAND (oe2def, 1)) != dcode)
+ continue;
+
+ op10 = TREE_OPERAND (GIMPLE_STMT_OPERAND (oe1def, 1), 0);
+ op11 = TREE_OPERAND (GIMPLE_STMT_OPERAND (oe1def, 1), 1);
+ op20 = TREE_OPERAND (GIMPLE_STMT_OPERAND (oe2def, 1), 0);
+ op21 = TREE_OPERAND (GIMPLE_STMT_OPERAND (oe2def, 1), 1);
+ if (op10 == op20
+ && dcode != RDIV_EXPR)
+ {
+ tmp = op10; op10 = op11; op11 = tmp;
+ tmp = op20; op20 = op21; op21 = tmp;
+ }
+ else if (op10 == op21
+ && dcode != RDIV_EXPR)
+ {
+ tmp = op10; op10 = op11; op11 = tmp;
+ }
+ else if (op11 == op20
+ && dcode != RDIV_EXPR)
+ {
+ tmp = op20; op20 = op21; op21 = tmp;
+ }
+ else if (op11 == op21)
+ ;
+ else
+ continue;
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "applying reverse distributive law to ");
+ print_generic_expr (dump_file, oe1def, 0);
+ fprintf (dump_file, " and ");
+ print_generic_expr (dump_file, oe2def, 0);
+ fprintf (dump_file, "\n");
+ }
+
+ /* Remove the immediate uses of the old stmts. */
+ GIMPLE_STMT_OPERAND (oe1def, 1)
+ = fold_convert (TREE_TYPE (op10), integer_zero_node);
+ update_stmt (oe1def);
+ GIMPLE_STMT_OPERAND (oe2def, 1)
+ = fold_convert (TREE_TYPE (op20), integer_zero_node);
+ update_stmt (oe2def);
+
+ /* Build two new stmts to avoid assigning different values to
+ names than in the original program (for debugging) and to
+ make sure not to break dominator relationships. */
+ tmpvar = create_tmp_var (TREE_TYPE (op10), NULL);
+ add_referenced_var (tmpvar);
+ if (stmt_dominates_stmt_p (oe1def, oe2def))
+ bsi = bsi_for_stmt (oe2def);
+ else
+ bsi = bsi_for_stmt (oe1def);
+
+ tmp = build_gimple_modify_stmt (NULL_TREE,
+ fold_build2 (opcode, TREE_TYPE (op10),
+ op10, op20));
+ op = make_ssa_name (tmpvar, tmp);
+ GIMPLE_STMT_OPERAND (tmp, 0) = op;
+ bsi_insert_before (&bsi, tmp, BSI_SAME_STMT);
+ update_stmt (tmp);
+
+ tmp = build_gimple_modify_stmt (NULL_TREE,
+ fold_build2 (dcode, TREE_TYPE (op10),
+ op, op11));
+ op = make_ssa_name (tmpvar, tmp);
+ GIMPLE_STMT_OPERAND (tmp, 0) = op;
+ bsi_insert_before (&bsi, tmp, BSI_SAME_STMT);
+ update_stmt (tmp);
+
+ /* Finally recurse with the operands removed. */
+ VEC_unordered_remove (operand_entry_t, *ops, j);
+ VEC_unordered_remove (operand_entry_t, *ops, i);
+ add_to_ops_vec (ops, op);
+ undistribute_ops_list (opcode, ops);
+ return;
+ }
+ }
+ }
+
/* Perform various identities and other optimizations on the list of
operand entries, stored in OPS. The tree code for the binary
operation between all the operands is OPCODE. */
*************** should_break_up_subtract (tree stmt)
*** 1093,1099 ****
if (TREE_CODE (lhs) == SSA_NAME
&& (immusestmt = get_single_immediate_use (lhs))
! && TREE_CODE (GIMPLE_STMT_OPERAND (immusestmt, 1)) == PLUS_EXPR)
return true;
return false;
--- 1214,1221 ----
if (TREE_CODE (lhs) == SSA_NAME
&& (immusestmt = get_single_immediate_use (lhs))
! && (TREE_CODE (GIMPLE_STMT_OPERAND (immusestmt, 1)) == PLUS_EXPR
! || TREE_CODE (GIMPLE_STMT_OPERAND (immusestmt, 1)) == MULT_EXPR))
return true;
return false;
*************** reassociate_bb (basic_block bb)
*** 1355,1366 ****
--- 1477,1491 ----
TREE_VISITED (stmt) = 1;
linearize_expr_tree (&ops, stmt);
+
qsort (VEC_address (operand_entry_t, ops),
VEC_length (operand_entry_t, ops),
sizeof (operand_entry_t),
sort_by_operand_rank);
optimize_ops_list (TREE_CODE (rhs), &ops);
+ undistribute_ops_list (TREE_CODE (rhs), &ops);
+
if (VEC_length (operand_entry_t, ops) == 1)
{
if (dump_file && (dump_flags & TDF_DETAILS))
Index: gcc/testsuite/gcc.dg/tree-ssa/reassoc-14.c
===================================================================
*** /dev/null 1970-01-01 00:00:00.000000000 +0000
--- gcc/testsuite/gcc.dg/tree-ssa/reassoc-14.c 2008-04-28 17:31:41.000000000 +0200
***************
*** 0 ****
--- 1,23 ----
+ /* { dg-do compile } */
+ /* { dg-options "-O2 -fdump-tree-reassoc1" } */
+
+ int test1 (int x, int y, int z, int weight)
+ {
+ int tmp1 = x * weight;
+ int tmp2 = y * weight;
+ int tmp3 = (x - y) * weight;
+ return tmp1 + (tmp2 + tmp3);
+ }
+
+ int test2 (int x, int y, int z, int weight)
+ {
+ int tmp1 = x * weight;
+ int tmp2 = y * weight * weight;
+ int tmp3 = z * weight * weight * weight;
+ return tmp1 + tmp2 + tmp3;
+ }
+
+ /* There should be one multiplication left in test1 and three in test2. */
+
+ /* { dg-final { scan-tree-dump-times "\\\*" 4 "reassoc1" } } */
+ /* { dg-final { cleanup-tree-dump "reassoc1" } } */
Index: gcc/testsuite/gcc.dg/tree-ssa/reassoc-15.c
===================================================================
*** /dev/null 1970-01-01 00:00:00.000000000 +0000
--- gcc/testsuite/gcc.dg/tree-ssa/reassoc-15.c 2008-04-29 11:28:38.000000000 +0200
***************
*** 0 ****
--- 1,19 ----
+ /* { dg-do compile } */
+ /* { dg-options "-O2 -fdump-tree-reassoc1" } */
+
+ int test3 (int x, int y, int z, int weight, int w1, int w2, int w3)
+ {
+ int wtmp1 = w1 * weight;
+ int wtmp2 = w2 * weight;
+ int wtmp3 = w3 * weight;
+ int tmp1 = x * wtmp1;
+ int tmp2 = y * wtmp2;
+ int tmp3 = z * wtmp3;
+ return tmp1 + tmp2 + tmp3;
+ }
+
+ /* The multiplication with weight should be un-distributed.
+ ??? This pattern is not recognized currently. */
+
+ /* { dg-final { scan-tree-dump-times "\\\*" 4 "reassoc1" { xfail *-*-* } } } */
+ /* { dg-final { cleanup-tree-dump "reassoc1" } } */
Index: gcc/testsuite/gcc.dg/tree-ssa/reassoc-16.c
===================================================================
*** /dev/null 1970-01-01 00:00:00.000000000 +0000
--- gcc/testsuite/gcc.dg/tree-ssa/reassoc-16.c 2008-04-29 11:37:09.000000000 +0200
***************
*** 0 ****
--- 1,16 ----
+ /* { dg-do compile } */
+ /* { dg-options "-O2 -ffast-math -fdump-tree-reassoc1" } */
+
+ double test1 (double x, double y, double z, double weight)
+ {
+ double tmp1 = x / weight;
+ double tmp2 = y / weight;
+ double tmp3 = -x / weight;
+ return tmp1 + tmp2 + tmp3;
+ }
+
+ /* The division should be un-distributed and all references to x should
+ be gone. */
+
+ /* { dg-final { scan-tree-dump-times "/" 1 "reassoc1" } } */
+ /* { dg-final { cleanup-tree-dump "reassoc1" } } */
next reply other threads:[~2008-04-29 15:06 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-04-29 16:38 Richard Guenther [this message]
2008-05-08 8:18 ` Richard Guenther
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=Pine.LNX.4.64.0804291659390.4130@zhemvz.fhfr.qr \
--to=rguenther@suse.de \
--cc=dberlin@dberlin.org \
--cc=dnovillo@google.com \
--cc=gcc-patches@gcc.gnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).