* [PATCH] CSE sin and cos as sincos using the cexpi builtin
@ 2006-12-27 23:20 Richard Guenther
2007-01-09 10:39 ` [PING][PATCH] " Richard Guenther
2007-01-16 5:31 ` [PATCH] " Ian Lance Taylor
0 siblings, 2 replies; 4+ messages in thread
From: Richard Guenther @ 2006-12-27 23:20 UTC (permalink / raw)
To: gcc-patches
[-- Attachment #1: Type: text/plain, Size: 977 bytes --]
This is the 2nd version of the patch to tree-ssa-math-opts.c CSEing sin
and cos. It now uses a simple dominator heuristic to identify profitable
sin/cos sequences and the insertion point.
Bootstrapped and tested on i686-pc-linux-gnu. Performance testing
(together with a fortran enabling patch) will be on the suse polyhedron
and spec testers tonight.
Ok for mainline?
Thanks,
Richard.
2006-12-27 Richard Guenther <rguenther@suse.de>
PR tree-optimization/30028
* tree-ssa-math-opts.c (maybe_record_sincos): New static helper
function.
(execute_cse_sincos_1): Likewise.
(execute_cse_sincos): Likewise.
(gate_cse_sincos): Likewise.
(pass_cse_sincos): New pass CSEing sin() and cos() calls using
the cexpi() canonicalization of sincos().
* tree-pass.h (pass_cse_sincos): Declare.
* passes.c (init_optimization_passes): New pass pas_cse_sincos.
* gcc.dg/builtins-62.c: New testcase.
[-- Attachment #2: sincos.diff --]
[-- Type: text/x-patch, Size: 9259 bytes --]
2006-12-27 Richard Guenther <rguenther@suse.de>
PR tree-optimization/30028
* tree-ssa-math-opts.c (maybe_record_sincos): New static helper
function.
(execute_cse_sincos_1): Likewise.
(execute_cse_sincos): Likewise.
(gate_cse_sincos): Likewise.
(pass_cse_sincos): New pass CSEing sin() and cos() calls using
the cexpi() canonicalization of sincos().
* tree-pass.h (pass_cse_sincos): Declare.
* passes.c (init_optimization_passes): New pass pas_cse_sincos.
* gcc.dg/builtins-62.c: New testcase.
Index: gcc/testsuite/gcc.dg/builtins-62.c
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ gcc/testsuite/gcc.dg/builtins-62.c 2006-12-27 21:07:25.000000000 +0100
@@ -0,0 +1,40 @@
+/* { dg-do compile } */
+/* { dg-options "-O -ffinite-math-only -fdump-tree-optimized" } */
+
+double test1 (double x)
+{
+ double s, c;
+ s = __builtin_sin (x);
+ c = __builtin_cos (x);
+ return s + c;
+}
+
+double test2 (double x)
+{
+ double s, c;
+ x = x * 2;
+ s = __builtin_sin (x);
+ c = __builtin_cos (x);
+ return s + c;
+}
+
+double test3 (double x, int b)
+{
+ double s, c;
+ if (b)
+ x = x * 2;
+ s = __builtin_sin (x);
+ c = __builtin_cos (x);
+ return s + c;
+}
+
+double test4 (double x)
+{
+ double s;
+ x = x * 2;
+ s = __builtin_sin (x);
+ return s;
+}
+
+/* { dg-final { scan-tree-dump-times "cexpi" 3 "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
Index: gcc/tree-ssa-math-opts.c
===================================================================
--- gcc.orig/tree-ssa-math-opts.c 2006-12-27 21:06:50.000000000 +0100
+++ gcc/tree-ssa-math-opts.c 2006-12-27 23:03:49.000000000 +0100
@@ -440,14 +440,12 @@
occ_head = NULL;
}
-
static bool
gate_cse_reciprocals (void)
{
return optimize && !optimize_size && flag_unsafe_math_optimizations;
}
-
/* Go through all the floating-point SSA_NAMEs, and call
execute_cse_reciprocals_1 on each of them. */
static unsigned int
@@ -490,6 +488,7 @@
for (bsi = bsi_after_labels (bb); !bsi_end_p (bsi); bsi_next (&bsi))
{
tree stmt = bsi_stmt (bsi);
+
if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT
&& (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL
&& FLOAT_TYPE_P (TREE_TYPE (def))
@@ -521,3 +520,205 @@
| TODO_verify_stmts, /* todo_flags_finish */
0 /* letter */
};
+
+/* Records an occurance at statement USE_STMT in the vector of trees
+ STMTS if it is dominated by *TOP_BB or dominates it or this basic block
+ is not yet initialized. Returns true if the occurance was pushed on
+ the vector. Adjusts *TOP_BB to be the basic block dominating all
+ statements in the vector. */
+
+static bool
+maybe_record_sincos (VEC(tree, heap) **stmts,
+ basic_block *top_bb, tree use_stmt)
+{
+ basic_block use_bb = bb_for_stmt (use_stmt);
+ if (*top_bb
+ && (*top_bb == use_bb
+ || dominated_by_p (CDI_DOMINATORS, use_bb, *top_bb)))
+ VEC_safe_push (tree, heap, *stmts, use_stmt);
+ else if (!*top_bb
+ || dominated_by_p (CDI_DOMINATORS, *top_bb, use_bb))
+ {
+ VEC_safe_push (tree, heap, *stmts, use_stmt);
+ *top_bb = use_bb;
+ }
+ else
+ return false;
+
+ return true;
+}
+
+/* Look for sin, cos and cexpi calls with the same argument NAME and
+ create a single call to cexpi CSEing the result in this case.
+ We first walk over all immediate uses of the argument collecting
+ statements that we can CSE in a vector and in a second pass replace
+ the statement rhs with a REALPART or IMAGPART expression on the
+ result of the cexpi call we insert before the use statement that
+ dominates all other candidates. */
+
+static void
+execute_cse_sincos_1 (tree name)
+{
+ block_stmt_iterator bsi;
+ imm_use_iterator use_iter;
+ tree def_stmt, use_stmt, fndecl, res, call, stmt, type;
+ bool seen_cos = false, seen_sin = false, seen_cexpi = false;
+ VEC(tree, heap) *stmts = NULL;
+ basic_block top_bb = NULL;
+ int i;
+
+ type = TREE_TYPE (name);
+ FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, name)
+ {
+ if (TREE_CODE (use_stmt) != GIMPLE_MODIFY_STMT
+ || TREE_CODE (GIMPLE_STMT_OPERAND (use_stmt, 1)) != CALL_EXPR
+ || !(fndecl = get_callee_fndecl (GIMPLE_STMT_OPERAND (use_stmt, 1)))
+ || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
+ continue;
+
+ switch (DECL_FUNCTION_CODE (fndecl))
+ {
+ CASE_FLT_FN (BUILT_IN_COS):
+ seen_cos |= maybe_record_sincos (&stmts, &top_bb, use_stmt);
+ break;
+
+ CASE_FLT_FN (BUILT_IN_SIN):
+ seen_sin |= maybe_record_sincos (&stmts, &top_bb, use_stmt);
+ break;
+
+ CASE_FLT_FN (BUILT_IN_CEXPI):
+ seen_cexpi |= maybe_record_sincos (&stmts, &top_bb, use_stmt);
+ break;
+
+ default:;
+ }
+ }
+
+ if (seen_cos + seen_sin + seen_cexpi <= 1)
+ {
+ VEC_free(tree, heap, stmts);
+ return;
+ }
+
+ /* Simply insert cexpi at the beginning of top_bb but not earlier than
+ the name def statement. */
+ fndecl = mathfn_built_in (type, BUILT_IN_CEXPI);
+ if (!fndecl)
+ return;
+ res = make_rename_temp (TREE_TYPE (TREE_TYPE (fndecl)), "sincostmp");
+ call = build_function_call_expr (fndecl, build_tree_list (NULL_TREE, name));
+ stmt = build2 (GIMPLE_MODIFY_STMT, NULL_TREE, res, call);
+ def_stmt = SSA_NAME_DEF_STMT (name);
+ if (bb_for_stmt (def_stmt) == top_bb
+ && TREE_CODE (def_stmt) == GIMPLE_MODIFY_STMT)
+ {
+ bsi = bsi_for_stmt (def_stmt);
+ bsi_insert_after (&bsi, stmt, BSI_SAME_STMT);
+ }
+ else
+ {
+ bsi = bsi_after_labels (top_bb);
+ bsi_insert_before (&bsi, stmt, BSI_SAME_STMT);
+ }
+ update_stmt (stmt);
+
+ /* And adjust the recorded old call sites. */
+ for (i = 0; VEC_iterate(tree, stmts, i, use_stmt); ++i)
+ {
+ fndecl = get_callee_fndecl (GIMPLE_STMT_OPERAND (use_stmt, 1));
+ switch (DECL_FUNCTION_CODE (fndecl))
+ {
+ CASE_FLT_FN (BUILT_IN_COS):
+ GIMPLE_STMT_OPERAND (use_stmt, 1) = fold_build1 (REALPART_EXPR,
+ type, res);
+ break;
+
+ CASE_FLT_FN (BUILT_IN_SIN):
+ GIMPLE_STMT_OPERAND (use_stmt, 1) = fold_build1 (IMAGPART_EXPR,
+ type, res);
+ break;
+
+ CASE_FLT_FN (BUILT_IN_CEXPI):
+ GIMPLE_STMT_OPERAND (use_stmt, 1) = res;
+ break;
+
+ default:;
+ gcc_unreachable ();
+ }
+
+ update_stmt (use_stmt);
+ }
+
+ VEC_free(tree, heap, stmts);
+}
+
+/* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1
+ on the SSA_NAME argument of each of them. */
+
+static unsigned int
+execute_cse_sincos (void)
+{
+ basic_block bb;
+
+ calculate_dominance_info (CDI_DOMINATORS);
+
+ FOR_EACH_BB (bb)
+ {
+ block_stmt_iterator bsi;
+
+ for (bsi = bsi_after_labels (bb); !bsi_end_p (bsi); bsi_next (&bsi))
+ {
+ tree stmt = bsi_stmt (bsi);
+ tree fndecl;
+
+ if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT
+ && TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 1)) == CALL_EXPR
+ && (fndecl = get_callee_fndecl (GIMPLE_STMT_OPERAND (stmt, 1)))
+ && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
+ {
+ tree arg;
+
+ switch (DECL_FUNCTION_CODE (fndecl))
+ {
+ CASE_FLT_FN (BUILT_IN_COS):
+ CASE_FLT_FN (BUILT_IN_SIN):
+ CASE_FLT_FN (BUILT_IN_CEXPI):
+ arg = GIMPLE_STMT_OPERAND (stmt, 1);
+ arg = TREE_VALUE (TREE_OPERAND (arg, 1));
+ if (TREE_CODE (arg) == SSA_NAME)
+ execute_cse_sincos_1 (arg);
+ break;
+
+ default:;
+ }
+ }
+ }
+ }
+
+ free_dominance_info (CDI_DOMINATORS);
+ return 0;
+}
+
+static bool
+gate_cse_sincos (void)
+{
+ return optimize;
+}
+
+struct tree_opt_pass pass_cse_sincos =
+{
+ "sincos", /* name */
+ gate_cse_sincos, /* gate */
+ execute_cse_sincos, /* execute */
+ NULL, /* sub */
+ NULL, /* next */
+ 0, /* static_pass_number */
+ 0, /* tv_id */
+ PROP_ssa, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ TODO_dump_func | TODO_update_ssa | TODO_verify_ssa
+ | TODO_verify_stmts, /* todo_flags_finish */
+ 0 /* letter */
+};
Index: gcc/passes.c
===================================================================
--- gcc.orig/passes.c 2006-12-27 22:10:24.000000000 +0100
+++ gcc/passes.c 2006-12-27 22:11:17.000000000 +0100
@@ -544,6 +544,7 @@
NEXT_PASS (pass_store_ccp);
NEXT_PASS (pass_store_copy_prop);
NEXT_PASS (pass_fold_builtins);
+ NEXT_PASS (pass_cse_sincos);
/* FIXME: May alias should a TODO but for 4.0.0,
we add may_alias right after fold builtins
which can create arbitrary GIMPLE. */
Index: gcc/tree-pass.h
===================================================================
--- gcc.orig/tree-pass.h 2006-12-27 22:10:16.000000000 +0100
+++ gcc/tree-pass.h 2006-12-27 22:10:36.000000000 +0100
@@ -281,6 +281,7 @@
extern struct tree_opt_pass pass_early_warn_uninitialized;
extern struct tree_opt_pass pass_late_warn_uninitialized;
extern struct tree_opt_pass pass_cse_reciprocals;
+extern struct tree_opt_pass pass_cse_sincos;
extern struct tree_opt_pass pass_warn_function_return;
extern struct tree_opt_pass pass_warn_function_noreturn;
extern struct tree_opt_pass pass_phiopt;
^ permalink raw reply [flat|nested] 4+ messages in thread
* [PING][PATCH] CSE sin and cos as sincos using the cexpi builtin
2006-12-27 23:20 [PATCH] CSE sin and cos as sincos using the cexpi builtin Richard Guenther
@ 2007-01-09 10:39 ` Richard Guenther
2007-01-16 5:31 ` [PATCH] " Ian Lance Taylor
1 sibling, 0 replies; 4+ messages in thread
From: Richard Guenther @ 2007-01-09 10:39 UTC (permalink / raw)
To: gcc-patches
On 12/28/06, Richard Guenther <richard.guenther@gmail.com> wrote:
> This is the 2nd version of the patch to tree-ssa-math-opts.c CSEing sin
> and cos. It now uses a simple dominator heuristic to identify profitable
> sin/cos sequences and the insertion point.
>
> Bootstrapped and tested on i686-pc-linux-gnu. Performance testing
> (together with a fortran enabling patch) will be on the suse polyhedron
> and spec testers tonight.
As expected fatigue improves a lot.
> Ok for mainline?
Ping!
> Thanks,
> Richard.
>
> 2006-12-27 Richard Guenther <rguenther@suse.de>
>
> PR tree-optimization/30028
> * tree-ssa-math-opts.c (maybe_record_sincos): New static helper
> function.
> (execute_cse_sincos_1): Likewise.
> (execute_cse_sincos): Likewise.
> (gate_cse_sincos): Likewise.
> (pass_cse_sincos): New pass CSEing sin() and cos() calls using
> the cexpi() canonicalization of sincos().
> * tree-pass.h (pass_cse_sincos): Declare.
> * passes.c (init_optimization_passes): New pass pas_cse_sincos.
>
> * gcc.dg/builtins-62.c: New testcase.
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] CSE sin and cos as sincos using the cexpi builtin
2006-12-27 23:20 [PATCH] CSE sin and cos as sincos using the cexpi builtin Richard Guenther
2007-01-09 10:39 ` [PING][PATCH] " Richard Guenther
@ 2007-01-16 5:31 ` Ian Lance Taylor
2007-01-17 21:37 ` Richard Guenther
1 sibling, 1 reply; 4+ messages in thread
From: Ian Lance Taylor @ 2007-01-16 5:31 UTC (permalink / raw)
To: Richard Guenther; +Cc: gcc-patches
"Richard Guenther" <richard.guenther@gmail.com> writes:
> 2006-12-27 Richard Guenther <rguenther@suse.de>
>
> PR tree-optimization/30028
> * tree-ssa-math-opts.c (maybe_record_sincos): New static helper
> function.
> (execute_cse_sincos_1): Likewise.
> (execute_cse_sincos): Likewise.
> (gate_cse_sincos): Likewise.
> (pass_cse_sincos): New pass CSEing sin() and cos() calls using
> the cexpi() canonicalization of sincos().
> * tree-pass.h (pass_cse_sincos): Declare.
> * passes.c (init_optimization_passes): New pass pas_cse_sincos.
>
> * gcc.dg/builtins-62.c: New testcase.
This code is going to insert calls to the builtin function cexpi for
some code which calls just sin and cos. When I look at
expand_builtin_cexpi, I see that it assumes that the sincos function
exists, and justifies that on the basis that cexpi is only generated
from sincos.
sincos is not a standard function. So what will happen on a target
which does not provide sincos in -lm, and for which the processor does
not have a sincos instruction?
> + bool seen_cos = false, seen_sin = false, seen_cexpi = false;
> + VEC(tree, heap) *stmts = NULL;
> + basic_block top_bb = NULL;
> + int i;
> +
> + type = TREE_TYPE (name);
> + FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, name)
> + {
> + if (TREE_CODE (use_stmt) != GIMPLE_MODIFY_STMT
> + || TREE_CODE (GIMPLE_STMT_OPERAND (use_stmt, 1)) != CALL_EXPR
> + || !(fndecl = get_callee_fndecl (GIMPLE_STMT_OPERAND (use_stmt, 1)))
> + || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
> + continue;
> +
> + switch (DECL_FUNCTION_CODE (fndecl))
> + {
> + CASE_FLT_FN (BUILT_IN_COS):
> + seen_cos |= maybe_record_sincos (&stmts, &top_bb, use_stmt);
> + break;
> +
> + CASE_FLT_FN (BUILT_IN_SIN):
> + seen_sin |= maybe_record_sincos (&stmts, &top_bb, use_stmt);
> + break;
> +
> + CASE_FLT_FN (BUILT_IN_CEXPI):
> + seen_cexpi |= maybe_record_sincos (&stmts, &top_bb, use_stmt);
> + break;
> +
> + default:;
> + }
> + }
> +
> + if (seen_cos + seen_sin + seen_cexpi <= 1)
I think it's confusing to the reader to do math with boolean values.
I think it would be clearer to change the types of seen_cos, seen_sin,
and seen_cexpi to be int. And then change maybe_record_sincos to
return int, or alternatively change the lines to
seen_cos |= maybe_record_sincos (...) ? 1 : 0;
Ian
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] CSE sin and cos as sincos using the cexpi builtin
2007-01-16 5:31 ` [PATCH] " Ian Lance Taylor
@ 2007-01-17 21:37 ` Richard Guenther
0 siblings, 0 replies; 4+ messages in thread
From: Richard Guenther @ 2007-01-17 21:37 UTC (permalink / raw)
To: Ian Lance Taylor; +Cc: gcc-patches
On 15 Jan 2007 21:31:14 -0800, Ian Lance Taylor <iant@google.com> wrote:
> "Richard Guenther" <richard.guenther@gmail.com> writes:
>
> > 2006-12-27 Richard Guenther <rguenther@suse.de>
> >
> > PR tree-optimization/30028
> > * tree-ssa-math-opts.c (maybe_record_sincos): New static helper
> > function.
> > (execute_cse_sincos_1): Likewise.
> > (execute_cse_sincos): Likewise.
> > (gate_cse_sincos): Likewise.
> > (pass_cse_sincos): New pass CSEing sin() and cos() calls using
> > the cexpi() canonicalization of sincos().
> > * tree-pass.h (pass_cse_sincos): Declare.
> > * passes.c (init_optimization_passes): New pass pas_cse_sincos.
> >
> > * gcc.dg/builtins-62.c: New testcase.
>
> This code is going to insert calls to the builtin function cexpi for
> some code which calls just sin and cos. When I look at
> expand_builtin_cexpi, I see that it assumes that the sincos function
> exists, and justifies that on the basis that cexpi is only generated
> from sincos.
>
> sincos is not a standard function. So what will happen on a target
> which does not provide sincos in -lm, and for which the processor does
> not have a sincos instruction?
I am aware of this problem and hoped to use libgcc-math to fix it. As
that one is delayed I will be posting a patch to add TARGET_GNU_SINCOS
target macro in a minute. I will then update the pass guard to only
run if either sincos or cexp are available (I will also post a patch to
expand cexpi via cexp as I expect cexp to be nearly as fast as sincos
if the realpart of the argument is zero).
>
> > + bool seen_cos = false, seen_sin = false, seen_cexpi = false;
> > + VEC(tree, heap) *stmts = NULL;
> > + basic_block top_bb = NULL;
> > + int i;
> > +
> > + type = TREE_TYPE (name);
> > + FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, name)
> > + {
> > + if (TREE_CODE (use_stmt) != GIMPLE_MODIFY_STMT
> > + || TREE_CODE (GIMPLE_STMT_OPERAND (use_stmt, 1)) != CALL_EXPR
> > + || !(fndecl = get_callee_fndecl (GIMPLE_STMT_OPERAND (use_stmt, 1)))
> > + || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
> > + continue;
> > +
> > + switch (DECL_FUNCTION_CODE (fndecl))
> > + {
> > + CASE_FLT_FN (BUILT_IN_COS):
> > + seen_cos |= maybe_record_sincos (&stmts, &top_bb, use_stmt);
> > + break;
> > +
> > + CASE_FLT_FN (BUILT_IN_SIN):
> > + seen_sin |= maybe_record_sincos (&stmts, &top_bb, use_stmt);
> > + break;
> > +
> > + CASE_FLT_FN (BUILT_IN_CEXPI):
> > + seen_cexpi |= maybe_record_sincos (&stmts, &top_bb, use_stmt);
> > + break;
> > +
> > + default:;
> > + }
> > + }
> > +
> > + if (seen_cos + seen_sin + seen_cexpi <= 1)
>
> I think it's confusing to the reader to do math with boolean values.
> I think it would be clearer to change the types of seen_cos, seen_sin,
> and seen_cexpi to be int. And then change maybe_record_sincos to
> return int, or alternatively change the lines to
> seen_cos |= maybe_record_sincos (...) ? 1 : 0;
ok, I'll adjust this.
Thanks,
Richard.
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2007-01-17 21:37 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2006-12-27 23:20 [PATCH] CSE sin and cos as sincos using the cexpi builtin Richard Guenther
2007-01-09 10:39 ` [PING][PATCH] " Richard Guenther
2007-01-16 5:31 ` [PATCH] " Ian Lance Taylor
2007-01-17 21:37 ` Richard Guenther
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).