From: Tehila Meyzels <TEHILA@il.ibm.com>
To: gcc-patches@gcc.gnu.org
Subject: [PATCH] conditional load hoisting
Date: Tue, 31 Jul 2007 12:49:00 -0000 [thread overview]
Message-ID: <OFE5A60DFA.54B0BCE4-ONC2257329.0044ED2D-C2257329.0044FC2C@il.ibm.com> (raw)
[-- Attachment #1: Type: text/plain, Size: 1309 bytes --]
Hi,
This patch is aimed to perform conditional load hoisting (in tree level),
to increase opportunities for if-conversion (in RTL, ifcvt.c),
It was written for a GCC 4.1.1 based compiler, and I'm posting it, as a
reference, for a discussion I want to raise
with respect tree-ssa-if-conversion improvements (coming up) .
I'm not submitting this for inclusion in GCC as is, since I think it
should be rewritten to be part of tree-if-conv (details will followed).
This patch performs hoisting of a load insn from the THEN or the ELSE block
(one of them, where it appears) to the IF block above.
It has several restrictions when to hoist, including a checking that the
BBs are if-convertable.
For example:
Replace:
bb0:
if (c) goto bb1; else goto bb2;
bb1:
a = arr[index];
bb2:
with
bb0:
a' = arr[index];
if (c) goto bb1; else goto bb2;
bb1:
tmp = a';
bb2:
In order to activate this transformation, 2 flags should be enabled:
1. -ftree-clhoist (the pass flag)
2. -fsched-spec-load-dangerous - since we create speculative loads, which
is dangerous.
Tehila.
(See attached file: cond_load_hoist.diff)
-----------------------------------------------------------------------------
Tehila Meyzels
Tel: 972-4-829-6190
Email: tehila@il.ibm.com
[-- Attachment #2: cond_load_hoist.diff --]
[-- Type: application/octet-stream, Size: 11931 bytes --]
Index: gcc/gcc/doc/passes.texi
===================================================================
--- gcc/gcc/doc/passes.texi (revision 833)
+++ gcc/gcc/doc/passes.texi (working copy)
@@ -307,6 +307,13 @@
It is located in @file{tree-ssa-phiopt.c} and is described by
@code{pass_phiopt}.
+@item Conditional load hoist optimizations
+This pass recognizes a conditional load, where there is a potential for
+if-conversion transformation afterwards and hoist it to the condition
+basic block.
+It is located in @file{tree-ssa-phiopt.c} and is described by
+@code{pass_clhoist}.
+
@item May-alias optimization
This pass performs a flow sensitive SSA-based points-to analysis.
Index: gcc/gcc/doc/invoke.texi
===================================================================
--- gcc/gcc/doc/invoke.texi (revision 833)
+++ gcc/gcc/doc/invoke.texi (working copy)
@@ -336,6 +336,7 @@
-ftree-pre -ftree-ccp -ftree-dce -ftree-loop-optimize @gol
-ftree-loop-linear -ftree-loop-im -ftree-loop-ivcanon -fivopts @gol
-ftree-dominator-opts -ftree-dse -ftree-copyrename -ftree-sink @gol
+-ftree-clhoist @gol
-ftree-ch -ftree-sra -ftree-ter -ftree-lrs -ftree-fre -ftree-vectorize @gol
-ftree-vect-loop-version -ftree-salias -fweb @gol
-ftree-copy-prop -ftree-store-ccp -ftree-store-copy-prop -fwhole-program @gol
@@ -4990,6 +4991,9 @@
Perform forward store motion on trees. This flag is
enabled by default at @option{-O} and higher.
+@item -ftree-clhoist
+Perform conditional load hoisting on trees.
+
@item -ftree-ccp
Perform sparse conditional constant propagation (CCP) on trees. This
pass only operates on local scalar variables and is enabled by default
Index: gcc/gcc/tree-pass.h
===================================================================
--- gcc/gcc/tree-pass.h (revision 833)
+++ gcc/gcc/tree-pass.h (working copy)
@@ -262,6 +262,7 @@
extern struct tree_opt_pass pass_cse_reciprocals;
extern struct tree_opt_pass pass_warn_function_return;
extern struct tree_opt_pass pass_warn_function_noreturn;
+extern struct tree_opt_pass pass_clhoist;
extern struct tree_opt_pass pass_phiopt;
extern struct tree_opt_pass pass_forwprop;
extern struct tree_opt_pass pass_redundant_phi;
Index: gcc/gcc/tree-ssa-phiopt.c
===================================================================
--- gcc/gcc/tree-ssa-phiopt.c (revision 834)
+++ gcc/gcc/tree-ssa-phiopt.c (working copy)
@@ -35,7 +35,10 @@
#include "tree-dump.h"
#include "langhooks.h"
+static bool array_already_defined_p (tree, basic_block);
+static bool bb_ok_for_ifcvt_p (basic_block);
static void tree_ssa_phiopt (void);
+static void tree_ssa_phiopt_worker (bool);
static bool conditional_replacement (basic_block, basic_block,
edge, edge, tree, tree, tree);
static bool value_replacement (basic_block, basic_block,
@@ -44,9 +47,95 @@
edge, edge, tree, tree, tree);
static bool abs_replacement (basic_block, basic_block,
edge, edge, tree, tree, tree);
+static void cond_load_replacement (basic_block, basic_block);
static void replace_phi_edge_with_variable (basic_block, edge, tree, tree);
static basic_block *blocks_in_phiopt_order (void);
+
+/* Returns TRUE if the array ARR_REF and its index already defined in previous
+ basic block. Otherwise, returns FALSE. */
+
+static bool
+array_already_defined_p (tree arr_ref, basic_block bb)
+{
+ tree index, index_def_stmt;
+ basic_block index_bb;
+
+ gcc_assert (arr_ref != NULL_TREE);
+
+ index = TREE_OPERAND (arr_ref, 1);
+
+ if (index == NULL)
+ return true;
+
+ index_def_stmt = SSA_NAME_DEF_STMT (index);
+ index_bb = bb_for_stmt (index_def_stmt);
+ if (bb == index_bb)
+ return false;
+ return true;
+}
+
+
+/* Goes over the statements in BB. Returns FALSE if there are stores, function
+ calls, loads from non-array memory or load from array that can't be removed.
+ True, otherwise. */
+
+static bool
+bb_ok_for_ifcvt_p (basic_block bb)
+{
+ block_stmt_iterator bsi;
+
+ /* BB must have no stores or function calls statements in order. We count on
+ the fact that store-sink pass is prior to this pass, so all the stores that
+ could be removed from BB are already sunk. In addition, if there is a load
+ (from an array, that we may want to hoist, we check that it is safe to move
+ it. */
+
+ for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
+ {
+ tree stmt;
+
+ stmt = bsi_stmt (bsi);
+
+ if (get_call_expr_in (stmt))
+ return false;
+
+ if (TREE_CODE (stmt) != MODIFY_EXPR)
+ continue;
+
+ if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_VIRTUAL_DEFS)) /* store */
+ return false;
+
+ if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_VIRTUAL_USES)) /* load */
+ {
+ tree lhs, rhs;
+ stmt_ann_t ann;
+
+ ann = stmt_ann (stmt);
+ lhs = TREE_OPERAND (stmt, 0);
+ rhs = TREE_OPERAND (stmt, 1);
+
+ /* We don't support load-hoisting of loads that aren't from an
+ array. */
+ if (TREE_CODE (rhs) != ARRAY_REF)
+ return false;
+
+ /* Verify that this load-from-array can be moved. */
+ else
+ {
+ if (TREE_SIDE_EFFECTS (lhs)
+ || TREE_CODE (lhs) == EXC_PTR_EXPR
+ || TREE_CODE (lhs) == FILTER_EXPR
+ || ann->has_volatile_ops
+ || !array_already_defined_p (rhs, bb))
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+
/* This pass tries to replace an if-then-else block with an
assignment. We have four kinds of transformations. Some of these
transformations are also performed by the ifcvt RTL optimizer.
@@ -136,10 +225,51 @@
static void
tree_ssa_phiopt (void)
{
+ tree_ssa_phiopt_worker (false);
+}
+
+/* This pass tries to transform conditional loads into unconditional
+ ones, by hoisting the load to the if bb. In particular, it replaces this:
+
+ bb0:
+ if (cond) goto bb1; else goto bb2;
+ bb1:
+ LHS = arr[index];
+ bb2:
+
+ with
+
+ bb0:
+ LHS' = arr[index];
+ if (cond) goto bb1; else goto bb2;
+ bb1:
+ tmp = LHS';
+ bb2:
+
+ This transformation can only be done under several constraints,
+ documented below. */
+
+static void
+tree_ssa_cl_hoist (void)
+{
+ tree_ssa_phiopt_worker (true);
+}
+
+
+/* The core routine of conditional load replacement and normal
+ phi optimizations. Both share much of the infrastructure in how
+ to match applicable basic block patterns. DO_LOAD_HOIST is true
+ when we want to do conditional load replacement, false otherwise. */
+static void
+tree_ssa_phiopt_worker (bool do_load_hoist)
+{
basic_block bb;
basic_block *bb_order;
unsigned n, i;
+ bool if_then_else;
+ if_then_else = false;
+
/* Search every basic block for COND_EXPR we may be able to optimize.
We walk the blocks in order that guarantees that a block with
@@ -194,6 +324,9 @@
e1 = e2;
e2 = e_tmp;
}
+ else if ((EDGE_SUCC (bb1, 0)->dest == EDGE_SUCC (bb2, 0)->dest)
+ && do_load_hoist)
+ if_then_else = true;
else
continue;
@@ -210,6 +343,26 @@
|| single_pred (bb1) != bb)
continue;
+ if (if_then_else) /* Check also bb2. */
+ {
+ if (!single_succ_p (bb2))
+ continue;
+ if (!single_pred_p (bb2)
+ || single_pred (bb2) != bb)
+ continue;
+ }
+
+ if ((do_load_hoist) && (flag_schedule_speculative_load_dangerous))
+ {
+ if (!bb_ok_for_ifcvt_p (bb1)
+ || (if_then_else && !bb_ok_for_ifcvt_p (bb2)))
+ continue;
+ cond_load_replacement (bb, bb1);
+ if (if_then_else)
+ cond_load_replacement (bb, bb2);
+ continue;
+ }
+
phi = phi_nodes (bb2);
/* Check to make sure that there is only one PHI node.
@@ -987,6 +1140,98 @@
}
+/* Do the main work of conditional load hoisting. We already know that the
+ recognized pattern looks like one of the options:
+
+ COND_BB:
+ if (cond) goto MIDDLE_BB1; else goto JOIN_BB (edge E1)
+ MIDDLE_BB1:
+ something
+ fallthrough (edge E0)
+ JOIN_BB:
+ some more
+
+ or
+
+ COND_BB:
+ if (cond) goto MIDDLE_BB1(edge E0); else goto MIDDLE_BB2 (edge E1)
+ MIDDLE_BB1:
+ something (including load)
+ goto JOIN_BB
+ MIDDLE_BB2:
+ something
+ fallthrough
+ JOIN_BB:
+ some more
+
+ or
+
+ COND_BB:
+ if (cond) goto MIDDLE_BB2 (edge E1); else goto MIDDLE_BB1 (edge E0)
+ MIDDLE_BB1:
+ something (including load)
+ goto JOIN_BB
+ MIDDLE_BB2:
+ something
+ fallthrough
+ JOIN_BB:
+ some more
+
+ We check that MIDDLE_BB1 contains a load, and the load has a "simple" LHS.
+ Since there are no stores or function calls in the basic_block, there is a
+ potential for if-conversion transformation. Therefre, we hoist the load to
+ COND_BB. */
+
+static void
+cond_load_replacement (basic_block cond_bb, basic_block middle_bb1)
+{
+ bool load_hoist = false;
+ block_stmt_iterator bsi;
+
+ for (bsi = bsi_start (middle_bb1); !bsi_end_p (bsi); bsi_next (&bsi))
+ {
+ tree stmt, rhs;
+
+ stmt = bsi_stmt (bsi);
+
+ if (TREE_CODE (stmt) != MODIFY_EXPR)
+ continue;
+
+ rhs = TREE_OPERAND (stmt, 1);
+
+ if (TREE_CODE (rhs) == ARRAY_REF) /* load */
+ {
+ tree tmp_var, type, new_load_stmt, new_rhs;
+ block_stmt_iterator bsi_cond;
+
+ /* Do load-hoist of bsi. */
+ /* 1) Create new temporary variable TMP_VAR to load into */
+ type = TREE_TYPE (rhs);
+ tmp_var = create_tmp_var (type, "_clh_");
+ add_referenced_tmp_var (tmp_var);
+ mark_sym_for_renaming (tmp_var);
+
+ /* 2) Replace the MEM operand with tmp_var in the original statement. */
+ TREE_OPERAND (stmt, 1) = tmp_var;
+ mark_sym_for_renaming (TREE_OPERAND (stmt, 1));
+ update_stmt (stmt);
+
+ /* 3) Build the new load (hoisted) statement and insert before
+ last statement of the COND_BB (i.e., if-statement). */
+ new_rhs = unshare_expr (rhs);
+ new_load_stmt = build2 (MODIFY_EXPR, type, tmp_var, new_rhs);
+ bsi_cond = bsi_last (cond_bb);
+ bsi_insert_before (&bsi_cond, new_load_stmt, BSI_NEW_STMT);
+
+ load_hoist = true;
+ continue;
+ }
+ }
+
+ return;
+}
+
+
/* Always do these optimizations if we have SSA
trees to work on. */
static bool
@@ -1016,3 +1261,30 @@
| TODO_verify_stmts, /* todo_flags_finish */
0 /* letter */
};
+
+static bool
+gate_clhoist (void)
+{
+ return flag_tree_clhoist;
+}
+
+struct tree_opt_pass pass_clhoist = {
+ "clhoist", /* name */
+ gate_clhoist, /* gate */
+ tree_ssa_cl_hoist, /* execute */
+ NULL, /* sub */
+ NULL, /* next */
+ 0, /* static_pass_number */
+ TV_TREE_PHIOPT, /* tv_id */
+ PROP_cfg | PROP_ssa | PROP_alias, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ TODO_update_ssa
+ | TODO_dump_func
+ | TODO_ggc_collect
+ | TODO_verify_ssa
+ | TODO_verify_flow
+ | TODO_verify_stmts, /* todo_flags_finish */
+ 0 /* letter */
+};
Index: gcc/gcc/common.opt
===================================================================
--- gcc/gcc/common.opt (revision 833)
+++ gcc/gcc/common.opt (working copy)
@@ -929,6 +929,10 @@
Common Report Var(flag_tree_store_copy_prop)
Enable copy propagation for stores and loads
+ftree-clhoist
+Common Report Var(flag_tree_clhoist)
+Hoist conditional loads
+
ftree-dce
Common Report Var(flag_tree_dce)
Enable SSA dead code elimination optimization on trees
Index: gcc/gcc/passes.c
===================================================================
--- gcc/gcc/passes.c (revision 833)
+++ gcc/gcc/passes.c (working copy)
@@ -562,6 +562,7 @@
NEXT_PASS (pass_late_warn_uninitialized);
NEXT_PASS (pass_dse);
NEXT_PASS (pass_forwprop);
+ NEXT_PASS (pass_clhoist);
NEXT_PASS (pass_phiopt);
NEXT_PASS (pass_tail_calls);
NEXT_PASS (pass_rename_ssa_copies);
reply other threads:[~2007-07-31 12:37 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=OFE5A60DFA.54B0BCE4-ONC2257329.0044ED2D-C2257329.0044FC2C@il.ibm.com \
--to=tehila@il.ibm.com \
--cc=gcc-patches@gcc.gnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).