Index: gcc/tree-ssa-tail-merge.c =================================================================== --- gcc/tree-ssa-tail-merge.c (revision 0) +++ gcc/tree-ssa-tail-merge.c (revision 0) @@ -0,0 +1,1530 @@ +/* Tail merging for gimple. + Copyright (C) 2011 Free Software Foundation, Inc. + Contributed by Tom de Vries (tom@codesourcery.com) + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +/* Pass overview. + + + MOTIVATIONAL EXAMPLE + + gimple representation of gcc/testsuite/gcc.dg/pr43864.c at + + hprofStartupp (charD.1 * outputFileNameD.2600, charD.1 * ctxD.2601) + { + struct FILED.1638 * fpD.2605; + charD.1 fileNameD.2604[1000]; + intD.0 D.3915; + const charD.1 * restrict outputFileName.0D.3914; + + # BLOCK 2 freq:10000 + # PRED: ENTRY [100.0%] (fallthru,exec) + # PT = nonlocal { D.3926 } (restr) + outputFileName.0D.3914_3 + = (const charD.1 * restrict) outputFileNameD.2600_2(D); + # .MEMD.3923_13 = VDEF <.MEMD.3923_12(D)> + # USE = nonlocal null { fileNameD.2604 D.3926 } (restr) + # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr) + sprintfD.759 (&fileNameD.2604, outputFileName.0D.3914_3); + # .MEMD.3923_14 = VDEF <.MEMD.3923_13> + # USE = nonlocal null { fileNameD.2604 D.3926 } (restr) + # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr) + D.3915_4 = accessD.2606 (&fileNameD.2604, 1); + if (D.3915_4 == 0) + goto ; + else + goto ; + # SUCC: 3 [10.0%] (true,exec) 4 [90.0%] (false,exec) + + # BLOCK 3 freq:1000 + # PRED: 2 [10.0%] (true,exec) + # .MEMD.3923_15 = VDEF <.MEMD.3923_14> + # USE = nonlocal null { fileNameD.2604 D.3926 } (restr) + # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr) + freeD.898 (ctxD.2601_5(D)); + goto ; + # SUCC: 7 [100.0%] (fallthru,exec) + + # BLOCK 4 freq:9000 + # PRED: 2 [90.0%] (false,exec) + # .MEMD.3923_16 = VDEF <.MEMD.3923_14> + # PT = nonlocal escaped + # USE = nonlocal null { fileNameD.2604 D.3926 } (restr) + # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr) + fpD.2605_8 = fopenD.1805 (&fileNameD.2604[0], 0B); + if (fpD.2605_8 == 0B) + goto ; + else + goto ; + # SUCC: 5 [1.9%] (true,exec) 6 [98.1%] (false,exec) + + # BLOCK 5 freq:173 + # PRED: 4 [1.9%] (true,exec) + # .MEMD.3923_17 = VDEF <.MEMD.3923_16> + # USE = nonlocal null { fileNameD.2604 D.3926 } (restr) + # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr) + freeD.898 (ctxD.2601_5(D)); + goto ; + # SUCC: 7 [100.0%] (fallthru,exec) + + # BLOCK 6 freq:8827 + # PRED: 4 [98.1%] (false,exec) + # .MEMD.3923_18 = VDEF <.MEMD.3923_16> + # USE = nonlocal null { fileNameD.2604 D.3926 } (restr) + # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr) + fooD.2599 (outputFileNameD.2600_2(D), fpD.2605_8); + # SUCC: 7 [100.0%] (fallthru,exec) + + # BLOCK 7 freq:10000 + # PRED: 3 [100.0%] (fallthru,exec) 5 [100.0%] (fallthru,exec) + 6 [100.0%] (fallthru,exec) + # PT = nonlocal null + + # ctxD.2601_1 = PHI <0B(3), 0B(5), ctxD.2601_5(D)(6)> + # .MEMD.3923_11 = PHI <.MEMD.3923_15(3), .MEMD.3923_17(5), + .MEMD.3923_18(6)> + # VUSE <.MEMD.3923_11> + return ctxD.2601_1; + # SUCC: EXIT [100.0%] + } + + bb 3 and bb 5 can be merged. The blocks have different predecessors, but the + same successors, and the same operations. + + + CONTEXT + + A technique called tail merging (or cross jumping) can fix the example + above. For a block, we look for common code at the end (the tail) of the + predecessor blocks, and insert jumps from one block to the other. + The example is a special case for tail merging, in that 2 whole blocks + can be merged, rather than just the end parts of it. + We currently only focus on whole block merging, so in that sense + calling this pass tail merge is a bit of a misnomer. + + We distinguish 2 kinds of situations in which blocks can be merged: + - same operations, same predecessors. The successor edges coming from one + block are redirected to come from the other block. + - same operations, same successors. The predecessor edges entering one block + are redirected to enter the other block. Note that this operation might + involve introducing phi operations. + + For efficient implementation, we would like to value numbers the blocks, and + have a comparison operator that tells us whether the blocks are equal. + Besides being runtime efficient, block value numbering should also abstract + from irrelevant differences in order of operations, much like normal value + numbering abstracts from irrelevant order of operations. + + For the first situation (same_operations, same predecessors), normal value + numbering fits well. We can calculate a block value number based on the + value numbers of the defs and vdefs. + + For the second situation (same operations, same successors), this approach + doesn't work so well. We can illustrate this using the example. The calls + to free use different vdefs: MEMD.3923_16 and MEMD.3923_14, and these will + remain different in value numbering, since they represent different memory + states. So the resulting vdefs of the frees will be different in value + numbering, so the block value numbers will be different. + + The reason why we call the blocks equal is not because they define the same + values, but because uses in the blocks use (possibly different) defs in the + same way. To be able to detect this efficiently, we need to do some kind of + reverse value numbering, meaning number the uses rather than the defs, and + calculate a block value number based on the value number of the uses. + Ideally, a block comparison operator will also indicate which phis are needed + to merge the blocks. + + For the moment, we don't do block value numbering, but we do insn-by-insn + matching, using scc value numbers to match operations with results, and + structural comparison otherwise, while ignoring vop mismatches. + + + IMPLEMENTATION + + 1. The pass first determines all groups of blocks with the same successor + blocks. + 2. Within each group, it tries to determine clusters of equal basic blocks. + 3. The clusters are applied. + 4. The same successor groups are updated. + 5. This process is repeated from 2 onwards, until no more changes. + + + LIMITATIONS/TODO + + - block only + - handles only 'same operations, same successors'. + It handles same predecessors as a special subcase though. + - does not implement the reverse value numbering and block value numbering. + - improve memory allocation: use garbage collected memory, obstacks, + allocpools where appropriate. + - no insertion of phis. We only introduce vop-phis, and not explicitly, but + by TODO_update_ssa_only_virtuals. + + + SWITCHES + + - ftree-tail-merge. On at -O2. We might have make the pass less aggressive + for -O2, and only keep maximum at -Os. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "tree.h" +#include "tm_p.h" +#include "basic-block.h" +#include "output.h" +#include "flags.h" +#include "function.h" +#include "tree-flow.h" +#include "timevar.h" +#include "tree-pass.h" +#include "bitmap.h" +#include "tree-ssa-alias.h" +#include "params.h" +#include "tree-pretty-print.h" +#include "hashtab.h" +#include "gimple-pretty-print.h" +#include "tree-ssa-sccvn.h" +#include "tree-dump.h" + +/* Returns true if BB1 is dominated by BB2. Robust against + arguments being NULL, where NULL means entry bb. */ + +static bool +bb_dominated_by_p (basic_block bb1, basic_block bb2) +{ + if (!bb1) + return false; + + if (!bb2) + return true; + + return dominated_by_p (CDI_DOMINATORS, bb1, bb2); +} + +/* Indicates whether we can use scc_vn info. */ + +static bool scc_vn_ok; + +/* Initializes scc_vn info. */ + +static void +init_gvn (void) +{ + scc_vn_ok = run_scc_vn (VN_NOWALK); +} + +/* Deletes scc_vn info. */ + +static void +delete_gvn (void) +{ + if (!scc_vn_ok) + return; + + free_scc_vn (); +} + +/* Return the canonical scc_vn tree for X, if we can use scc_vn_info. + Otherwise, return X. */ + +static tree +gvn_val (tree x) +{ + return ((scc_vn_ok && x != NULL && TREE_CODE (x) == SSA_NAME) + ? VN_INFO ((x))->valnum : x); +} + +/* VAL1 and VAL2 are either: + - uses in BB1 and BB2, or + - phi alternatives for BB1 and BB2. + SAME_PREDS indicates whether BB1 and BB2 have the same predecessors. + Return true if the uses have the same gvn value, and if the corresponding + defs can be used in both BB1 and BB2. */ + +static bool +gvn_uses_equal (tree val1, tree val2, basic_block bb1, + basic_block bb2, bool same_preds) +{ + gimple def1, def2; + basic_block def1_bb, def2_bb; + + if (val1 == NULL_TREE || val2 == NULL_TREE) + return false; + + if (gvn_val (val1) != gvn_val (val2)) + return false; + + /* If BB1 and BB2 have the same predecessors, the same values are defined at + entry of BB1 and BB2. Otherwise, we need to check. */ + + if (TREE_CODE (val1) == SSA_NAME) + { + if (!same_preds) + { + def1 = SSA_NAME_DEF_STMT (val1); + def1_bb = gimple_bb (def1); + if (!bb_dominated_by_p (bb2, def1_bb)) + return false; + } + } + else if (!CONSTANT_CLASS_P (val1)) + return false; + + if (TREE_CODE (val2) == SSA_NAME) + { + if (!same_preds) + { + def2 = SSA_NAME_DEF_STMT (val2); + def2_bb = gimple_bb (def2); + if (bb_dominated_by_p (bb1, def2_bb)) + return false; + } + } + else if (!CONSTANT_CLASS_P (val2)) + return false; + + return true; +} + +/* Size of each bb. */ + +static int *bb_size; + +/* Init bb_size administration. */ + +static void +init_bb_size (void) +{ + int i; + int size; + gimple_stmt_iterator gsi; + basic_block bb; + + bb_size = XNEWVEC (int, last_basic_block); + for (i = 0; i < last_basic_block; ++i) + { + bb = BASIC_BLOCK (i); + size = 0; + bb_size[i] = size; + if (bb == NULL) + continue; + for (gsi = gsi_start_nondebug_bb (bb); + !gsi_end_p (gsi); gsi_next_nondebug (&gsi)) + size++; + bb_size[i] = size; + } +} + +/* Delete bb_size administration. */ + +static void +delete_bb_size (void) +{ + XDELETEVEC (bb_size); + bb_size = NULL; +} + +struct same_succ +{ + /* The bbs that have the same successor bbs. */ + bitmap bbs; + /* The successor bbs. */ + bitmap succs; + /* Indicates whether the EDGE_TRUE/FALSE_VALUEs of succ_flags are swapped for + bb. */ + bitmap inverse; + /* The edge flags for each of the successor bbs. */ + VEC (int, heap) *succ_flags; + /* Indicates whether the struct is in the worklist. */ + bool in_worklist; +}; +typedef struct same_succ *same_succ_t; +typedef const struct same_succ *const_same_succ_t; + +/* Prints E to FILE. */ + +static void +same_succ_print (FILE *file, const same_succ_t e) +{ + unsigned int i; + bitmap_print (file, e->bbs, "bbs:", "\n"); + bitmap_print (file, e->succs, "succs:", "\n"); + bitmap_print (file, e->inverse, "inverse:", "\n"); + fprintf (file, "flags:"); + for (i = 0; i < VEC_length (int, e->succ_flags); ++i) + fprintf (file, " %x", VEC_index (int, e->succ_flags, i)); + fprintf (file, "\n"); +} + +/* Prints same_succ VE to VFILE. */ + +static int +same_succ_print_traverse (void **ve, void *vfile) +{ + const same_succ_t e = *((const same_succ_t *)ve); + FILE *file = ((FILE*)vfile); + same_succ_print (file, e); + return 1; +} + +/* Calculates hash value for same_succ VE. */ + +static hashval_t +same_succ_hash (const void *ve) +{ + const_same_succ_t e = (const_same_succ_t)ve; + hashval_t hashval = bitmap_hash (e->succs); + int flags; + unsigned int i; + unsigned int first = bitmap_first_set_bit (e->bbs); + int size = bb_size [first]; + gimple_stmt_iterator gsi; + gimple stmt; + basic_block bb = BASIC_BLOCK (first); + + hashval = iterative_hash_hashval_t (size, hashval); + for (gsi = gsi_start_nondebug_bb (bb); + !gsi_end_p (gsi); gsi_next_nondebug (&gsi)) + { + stmt = gsi_stmt (gsi); + hashval = iterative_hash_hashval_t (gimple_code (stmt), hashval); + if (!is_gimple_call (stmt)) + continue; + if (gimple_call_internal_p (stmt)) + hashval = iterative_hash_hashval_t + ((hashval_t) gimple_call_internal_fn (stmt), hashval); + else + hashval = iterative_hash_expr (gimple_call_fn (stmt), hashval); + } + for (i = 0; i < VEC_length (int, e->succ_flags); ++i) + { + flags = VEC_index (int, e->succ_flags, i); + flags = flags & ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE); + hashval = iterative_hash_hashval_t (flags, hashval); + } + return hashval; +} + +/* Returns true if E1 and E2 have 2 successors, and if the successor flags + are inverse for the EDGE_TRUE_VALUE and EDGE_FALSE_VALUE flags, and equal for + the other edge flags. */ + +static bool +inverse_flags (const_same_succ_t e1, const_same_succ_t e2) +{ + int f1a, f1b, f2a, f2b; + int mask = ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE); + + if (VEC_length (int, e1->succ_flags) != 2) + return false; + + f1a = VEC_index (int, e1->succ_flags, 0); + f1b = VEC_index (int, e1->succ_flags, 1); + f2a = VEC_index (int, e2->succ_flags, 0); + f2b = VEC_index (int, e2->succ_flags, 1); + + if (f1a == f2a && f1b == f2b) + return false; + + return (f1a & mask) == (f2a & mask) && (f1b & mask) == (f2b & mask); +} + +/* Compares SAME_SUCCs VE1 and VE2. */ + +static int +same_succ_equal (const void *ve1, const void *ve2) +{ + const_same_succ_t e1 = (const_same_succ_t)ve1; + const_same_succ_t e2 = (const_same_succ_t)ve2; + unsigned int i, first1, first2; + gimple_stmt_iterator gsi1, gsi2; + gimple s1, s2; + + if (bitmap_bit_p (e1->bbs, ENTRY_BLOCK) + || bitmap_bit_p (e1->bbs, EXIT_BLOCK) + || bitmap_bit_p (e2->bbs, ENTRY_BLOCK) + || bitmap_bit_p (e2->bbs, EXIT_BLOCK)) + return 0; + + if (VEC_length (int, e1->succ_flags) != VEC_length (int, e2->succ_flags)) + return 0; + + if (!bitmap_equal_p (e1->succs, e2->succs)) + return 0; + + if (!inverse_flags (e1, e2)) + { + for (i = 0; i < VEC_length (int, e1->succ_flags); ++i) + if (VEC_index (int, e1->succ_flags, i) + != VEC_index (int, e1->succ_flags, i)) + return 0; + } + + first1 = bitmap_first_set_bit (e1->bbs); + first2 = bitmap_first_set_bit (e2->bbs); + + if (bb_size [first1] + != bb_size [first2]) + return 0; + + gsi1 = gsi_start_nondebug_bb (BASIC_BLOCK (first1)); + gsi2 = gsi_start_nondebug_bb (BASIC_BLOCK (first1)); + while (!(gsi_end_p (gsi1) || gsi_end_p (gsi2))) + { + s1 = gsi_stmt (gsi1); + s2 = gsi_stmt (gsi2); + if (gimple_code (s1) != gimple_code (s2)) + return 0; + if (is_gimple_call (s1) && !gimple_call_same_target_p (s1, s2)) + return 0; + gsi_next_nondebug (&gsi1); + gsi_next_nondebug (&gsi2); + } + + return 1; +} + +/* Alloc and init a new SAME_SUCC. */ + +static same_succ_t +same_succ_alloc (void) +{ + same_succ_t same = XNEW (struct same_succ); + + same->bbs = BITMAP_ALLOC (NULL); + same->succs = BITMAP_ALLOC (NULL); + same->inverse = BITMAP_ALLOC (NULL); + same->succ_flags = VEC_alloc (int, heap, 10); + same->in_worklist = false; + + return same; +} + +/* Delete same_succ VE. */ + +static void +same_succ_delete (void *ve) +{ + same_succ_t e = (same_succ_t)ve; + + bitmap_clear (e->bbs); + bitmap_clear (e->succs); + bitmap_clear (e->inverse); + VEC_free (int, heap, e->succ_flags); + + XDELETE (ve); +} + +/* Reset same_succ SAME. */ + +static void +same_succ_reset (same_succ_t same) +{ + bitmap_clear (same->bbs); + bitmap_clear (same->succs); + bitmap_clear (same->inverse); + VEC_truncate (int, same->succ_flags, 0); +} + +/* Hash table with all same_succ entries. */ + +static htab_t same_succ_htab; + +/* Array that indicates the same_succ for each bb. */ + +static same_succ_t *bb_to_same_succ; + +/* Array that is used to store the edge flags for a successor. */ + +static int *same_succ_edge_flags; + +/* Bitmap that is used to mark bbs that are recently deleted. */ + +static bitmap deleted_bbs; + +/* Bitmap that is used to mark predecessors of bbs that are + deleted. */ + +static bitmap deleted_bb_preds; + +DEF_VEC_P (same_succ_t); +DEF_VEC_ALLOC_P (same_succ_t, heap); + +/* Prints same_succ_htab to stderr. */ + +extern void debug_same_succ (void); +DEBUG_FUNCTION void +debug_same_succ ( void) +{ + htab_traverse (same_succ_htab, same_succ_print_traverse, stderr); +} + +/* Vector of bbs to process. */ + +static VEC (same_succ_t, heap) *worklist; + +/* Prints worklist to FILE. */ + +static void +print_worklist (FILE *file) +{ + unsigned int i; + for (i = 0; i < VEC_length (same_succ_t, worklist); ++i) + same_succ_print (file, VEC_index (same_succ_t, worklist, i)); +} + +/* Adds SAME to worklist. */ + +static void +add_to_worklist (same_succ_t same) +{ + if (same->in_worklist) + return; + + if (bitmap_count_bits (same->bbs) < 2) + return; + + same->in_worklist = true; + VEC_safe_push (same_succ_t, heap, worklist, same); +} + +/* Add BB to same_succ_htab. */ + +static void +find_same_succ_bb (basic_block bb, same_succ_t *same_p) +{ + unsigned int j; + bitmap_iterator bj; + same_succ_t same = *same_p; + same_succ_t *slot; + + if (bb == NULL) + return; + bitmap_set_bit (same->bbs, bb->index); + for (j = 0; j < EDGE_COUNT (bb->succs); ++j) + { + edge e = EDGE_SUCC (bb, j); + int index = e->dest->index; + bitmap_set_bit (same->succs, index); + same_succ_edge_flags[index] = e->flags; + } + EXECUTE_IF_SET_IN_BITMAP (same->succs, 0, j, bj) + VEC_safe_push (int, heap, same->succ_flags, same_succ_edge_flags[j]); + + slot = (same_succ_t *) htab_find_slot (same_succ_htab, same, INSERT); + if (*slot == NULL) + { + *slot = same; + bb_to_same_succ[bb->index] = same; + add_to_worklist (same); + *same_p = NULL; + } + else + { + bitmap_set_bit ((*slot)->bbs, bb->index); + bb_to_same_succ[bb->index] = *slot; + add_to_worklist (*slot); + if (inverse_flags (same, *slot)) + bitmap_set_bit ((*slot)->inverse, bb->index); + same_succ_reset (same); + } +} + +/* Find bbs with same successors. */ + +static void +find_same_succ (void) +{ + int i; + same_succ_t same = same_succ_alloc (); + + for (i = 0; i < last_basic_block; ++i) + { + find_same_succ_bb (BASIC_BLOCK (i), &same); + if (same == NULL) + same = same_succ_alloc (); + } + + same_succ_delete (same); +} + +/* Initializes worklist administration. */ + +static void +init_worklist (void) +{ + init_bb_size (); + same_succ_htab + = htab_create (1024, same_succ_hash, same_succ_equal, same_succ_delete); + bb_to_same_succ = XCNEWVEC (same_succ_t, last_basic_block); + same_succ_edge_flags = XCNEWVEC (int, last_basic_block); + deleted_bbs = BITMAP_ALLOC (NULL); + deleted_bb_preds = BITMAP_ALLOC (NULL); + worklist = VEC_alloc (same_succ_t, heap, last_basic_block); + find_same_succ (); + + if (dump_file) + { + fprintf (dump_file, "initial worklist:\n"); + print_worklist (dump_file); + } +} + +/* Deletes worklist administration. */ + +static void +delete_worklist (void) +{ + delete_bb_size (); + htab_delete (same_succ_htab); + same_succ_htab = NULL; + XDELETEVEC (bb_to_same_succ); + bb_to_same_succ = NULL; + XDELETEVEC (same_succ_edge_flags); + same_succ_edge_flags = NULL; + BITMAP_FREE (deleted_bbs); + BITMAP_FREE (deleted_bb_preds); + VEC_free (same_succ_t, heap, worklist); +} + +/* Mark BB as deleted, and mark its predecessors. */ + +static void +delete_basic_block_same_succ (basic_block bb) +{ + int pred_i, i = bb->index; + unsigned int j; + edge e; + + bitmap_set_bit (deleted_bbs, i); + + for (j = 0; j < EDGE_COUNT (bb->preds); ++j) + { + e = EDGE_PRED (bb, j); + pred_i = e->src->index; + bitmap_set_bit (deleted_bb_preds, pred_i); + } +} + +/* For deleted_bb_preds, find bbs with same successors. */ + +static void +update_worklist (void) +{ + unsigned int i; + bitmap_iterator bi; + basic_block bb; + same_succ_t same; + + EXECUTE_IF_SET_IN_BITMAP (deleted_bbs, 0, i, bi) + { + same = bb_to_same_succ[i]; + bb_to_same_succ[i] = NULL; + bitmap_clear_bit (same->bbs, i); + } + + same = same_succ_alloc (); + bitmap_and_compl_into (deleted_bb_preds, deleted_bbs); + EXECUTE_IF_SET_IN_BITMAP (deleted_bb_preds, 0, i, bi) + { + bb = BASIC_BLOCK (i); + gcc_assert (bb != NULL); + bb_to_same_succ[i] = NULL; + find_same_succ_bb (bb, &same); + if (same == NULL) + same = same_succ_alloc (); + } + + same_succ_delete (same); + + bitmap_clear (deleted_bbs); + bitmap_clear (deleted_bb_preds); +} + +struct bb_cluster +{ + /* The bbs in the cluster. */ + bitmap bbs; + /* The preds of the bbs in the cluster. */ + bitmap preds; + /* index in all_clusters vector. */ + int index; +}; +typedef struct bb_cluster *bb_cluster_t; +typedef const struct bb_cluster *const_bb_cluster_t; + +/* Prints cluster C to FILE. */ + +static void +print_cluster (FILE *file, bb_cluster_t c) +{ + if (c == NULL) + return; + bitmap_print (file, c->bbs, "bbs:", "\n"); + bitmap_print (file, c->preds, "preds:", "\n"); +} + +/* Prints cluster C to stderr. */ + +extern void debug_cluster (bb_cluster_t); +DEBUG_FUNCTION void +debug_cluster (bb_cluster_t c) +{ + print_cluster (stderr, c); +} + +/* Returns true if bb1 and bb2 have the same predecessors. */ + +static bool +same_predecessors (basic_block bb1, basic_block bb2) +{ + unsigned int i, j; + edge ei, ej; + unsigned int n1 = EDGE_COUNT (bb1->preds), n2 = EDGE_COUNT (bb2->preds); + unsigned int nr_matches = 0; + + if (n1 != n2) + return false; + + for (i = 0; i < n1; ++i) + { + ei = EDGE_PRED (bb1, i); + for (j = 0; j < n2; ++j) + { + ej = EDGE_PRED (bb2, j); + if (ei->src != ej->src) + continue; + nr_matches++; + break; + } + } + + return nr_matches == n1; +} + +/* Add BB to cluster C. Sets BB in C->bbs, and preds of BB in C->preds. */ + +static void +add_bb_to_cluster (bb_cluster_t c, basic_block bb) +{ + int index = bb->index; + unsigned int i; + bitmap_set_bit (c->bbs, index); + + for (i = 0; i < EDGE_COUNT (bb->preds); ++i) + bitmap_set_bit (c->preds, EDGE_PRED (bb, i)->src->index); +} + +/* Allocate and init new cluster. */ + +static bb_cluster_t +new_cluster (void) +{ + bb_cluster_t c; + c = XCNEW (struct bb_cluster); + c->bbs = BITMAP_ALLOC (NULL); + c->preds = BITMAP_ALLOC (NULL); + return c; +} + +/* Delete clusters. */ + +static void +delete_cluster (bb_cluster_t c) +{ + if (c == NULL) + return; + BITMAP_FREE (c->bbs); + BITMAP_FREE (c->preds); + XDELETE (c); +} + +/* Array that indicates which bbs have clusters that can be merged. */ + +static bb_cluster_t *merge_cluster; + +DEF_VEC_P (bb_cluster_t); +DEF_VEC_ALLOC_P (bb_cluster_t, heap); + +/* Array that contains all clusters. */ + +static VEC (bb_cluster_t, heap) *all_clusters; + +/* Allocate all cluster vectors. */ + +static void +alloc_cluster_vectors (void) +{ + merge_cluster = XCNEWVEC (bb_cluster_t, last_basic_block); + all_clusters = VEC_alloc (bb_cluster_t, heap, last_basic_block); +} + +/* Reset all cluster vectors. */ + +static void +reset_cluster_vectors (void) +{ + unsigned int i; + unsigned size = last_basic_block * sizeof (bb_cluster_t); + memset (merge_cluster, 0, size); + for (i = 0; i < VEC_length (bb_cluster_t, all_clusters); ++i) + delete_cluster (VEC_index (bb_cluster_t, all_clusters, i)); + VEC_truncate (bb_cluster_t, all_clusters, 0); +} + +/* Delete all cluster vectors. */ + +static void +delete_cluster_vectors (void) +{ + unsigned int i; + XDELETEVEC (merge_cluster); + merge_cluster = NULL; + for (i = 0; i < VEC_length (bb_cluster_t, all_clusters); ++i) + delete_cluster (VEC_index (bb_cluster_t, all_clusters, i)); + VEC_free (bb_cluster_t, heap, all_clusters); +} + +/* Merge cluster C2 into C1. */ + +static void +merge_clusters (bb_cluster_t c1, bb_cluster_t c2) +{ + bitmap_ior_into (c1->bbs, c2->bbs); + bitmap_ior_into (c1->preds, c2->preds); +} + +/* Register equivalence of BB1 and BB2 (members of cluster C). Store c in + all_clusters, or merge c with existing cluster. */ + +static void +set_cluster (bb_cluster_t c, basic_block bb1, basic_block bb2) +{ + int i1 = bb1->index; + int i2 = bb2->index; + int old_index, other_index; + bb_cluster_t old; + + if (merge_cluster[i1] == NULL && merge_cluster[i2] == NULL) + { + merge_cluster[i1] = c; + merge_cluster[i2] = c; + c->index = VEC_length (bb_cluster_t, all_clusters); + VEC_safe_push (bb_cluster_t, heap, all_clusters, c); + } + else if (merge_cluster[i1] == NULL || merge_cluster[i2] == NULL) + { + old_index = merge_cluster[i1] == NULL ? i2 : i1; + other_index = merge_cluster[i1] == NULL ? i1 : i2; + old = merge_cluster[old_index]; + merge_clusters (old, c); + merge_cluster[other_index] = old; + delete_cluster (c); + } + else if (merge_cluster[i1] != merge_cluster[i2]) + { + unsigned int j; + bitmap_iterator bj; + delete_cluster (c); + old = merge_cluster[i2]; + merge_clusters (merge_cluster[i1], old); + EXECUTE_IF_SET_IN_BITMAP (old->bbs, 0, j, bj) + merge_cluster[j] = merge_cluster[i1]; + VEC_replace (bb_cluster_t, all_clusters, old->index, NULL); + delete_cluster (old); + } + else + gcc_unreachable (); +} + +/* Returns true if + - the gimple subcodes of S1 and S2 match, or + - the gimple subcodes do not matter given the gimple code, or + - the gimple subcodes are an inverse comparison and INV_COND + is true. */ + +static bool +gimple_subcode_equal_p (gimple s1, gimple s2, bool inv_cond) +{ + tree var, var_type; + bool honor_nans; + + if (is_gimple_assign (s1) + && gimple_assign_rhs_class (s1) == GIMPLE_SINGLE_RHS) + return true; + + if (gimple_code (s1) == GIMPLE_COND && inv_cond) + { + var = gimple_cond_lhs (s1); + var_type = TREE_TYPE (var); + honor_nans = HONOR_NANS (TYPE_MODE (var_type)); + + if (gimple_expr_code (s1) + == invert_tree_comparison (gimple_expr_code (s2), honor_nans)) + return true; + } + + return s1->gsbase.subcode == s2->gsbase.subcode; +} + +/* Check whether S1 and S2 are equal, considering the fields in + gimple_statement_base. Ignores fields uid, location, bb, and block, and the + pass-local flags visited and plf. */ + +static bool +gimple_base_equal_p (gimple s1, gimple s2, bool inv_cond) +{ + if (gimple_code (s1) != gimple_code (s2)) + return false; + + if (gimple_no_warning_p (s1) != gimple_no_warning_p (s2)) + return false; + + if (is_gimple_assign (s1) + && (gimple_assign_nontemporal_move_p (s1) + != gimple_assign_nontemporal_move_p (s2))) + return false; + + gcc_assert (!gimple_modified_p (s1) && !gimple_modified_p (s2)); + + if (gimple_has_volatile_ops (s1) != gimple_has_volatile_ops (s2)) + return false; + + if (!gimple_subcode_equal_p (s1, s2, inv_cond)) + return false; + + if (gimple_num_ops (s1) != gimple_num_ops (s2)) + return false; + + return true; +} + +/* Return true if gimple statements S1 and S2 are equal. SAME_PREDS indicates + whether gimple_bb (s1) and gimple_bb (s2) have the same predecessors. */ + +static bool +gimple_equal_p (gimple s1, gimple s2, bool same_preds, bool inv_cond) +{ + unsigned int i; + enum gimple_statement_structure_enum gss; + tree lhs1, lhs2; + basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2); + + /* Handle omp gimples conservatively. */ + if (is_gimple_omp (s1) || is_gimple_omp (s2)) + return false; + + /* Handle lhs. */ + lhs1 = gimple_get_lhs (s1); + lhs2 = gimple_get_lhs (s2); + if (lhs1 != NULL_TREE && lhs2 != NULL_TREE) + return (same_preds && TREE_CODE (lhs1) == SSA_NAME + && TREE_CODE (lhs2) == SSA_NAME + && gvn_val (lhs1) == gvn_val (lhs2)); + else if (!(lhs1 == NULL_TREE && lhs2 == NULL_TREE)) + return false; + + if (!gimple_base_equal_p (s1, s2, inv_cond)) + return false; + + gss = gimple_statement_structure (s1); + switch (gss) + { + case GSS_CALL: + /* Ignore gimple_call_use_set and gimple_call_clobber_set, and let + TODO_rebuild_alias deal with this. */ + if (!gimple_call_same_target_p (s1, s2)) + return false; + /* Falthru. */ + + case GSS_WITH_MEM_OPS_BASE: + case GSS_WITH_MEM_OPS: + /* Ignore gimple_vdef and gimpe_vuse mismatches, and let + TODO_update_ssa_only_virtuals deal with this. */ + /* Falthru. */ + + case GSS_WITH_OPS: + /* Ignore gimple_def_ops and gimple_use_ops. They are duplicates of + gimple_vdef, gimple_vuse and gimple_ops, which are checked + elsewhere. */ + /* Falthru. */ + + case GSS_BASE: + break; + + default: + return false; + } + + /* Handle ops. */ + for (i = 0; i < gimple_num_ops (s1); ++i) + { + tree t1 = gimple_op (s1, i); + tree t2 = gimple_op (s2, i); + + if (t1 == NULL_TREE && t2 == NULL_TREE) + continue; + if (t1 == NULL_TREE || t2 == NULL_TREE) + return false; + /* Skip lhs. */ + if (lhs1 == t1 && i == 0) + continue; + + if (operand_equal_p (t1, t2, 0)) + continue; + if (gvn_uses_equal (t1, t2, bb1, bb2, same_preds)) + continue; + + return false; + } + + return true; +} + +/* Return true if BB1 and BB2 contain the same non-debug gimple statements. + SAME_PREDS indicates whether BB1 and BB2 have the same predecessors. */ + +static bool +bb_gimple_equal_p (basic_block bb1, basic_block bb2, bool same_preds, + bool inv_cond) + +{ + gimple_stmt_iterator gsi1 = gsi_last_nondebug_bb (bb1); + gimple_stmt_iterator gsi2 = gsi_last_nondebug_bb (bb2); + bool end1 = gsi_end_p (gsi1); + bool end2 = gsi_end_p (gsi2); + + while (!end1 && !end2) + { + if (!gimple_equal_p (gsi_stmt (gsi1), gsi_stmt (gsi2), + same_preds, inv_cond)) + return false; + + gsi_prev_nondebug (&gsi1); + gsi_prev_nondebug (&gsi2); + end1 = gsi_end_p (gsi1); + end2 = gsi_end_p (gsi2); + } + + return end1 && end2; +} + +/* Return true if BB1 and BB2 (members of cluster C) are duplicates. SAME_PREDS + indicates whether BB1 and BB2 have the same predecessors. */ + +static bool +find_duplicate (bb_cluster_t c, basic_block bb1, + basic_block bb2, bool same_preds, bool inv_cond) +{ + if (!bb_gimple_equal_p (bb1, bb2, same_preds, inv_cond)) + return false; + + if (dump_file) + { + fprintf (dump_file, "find_duplicates: duplicate of \n", + bb1->index, bb2->index); + print_cluster (dump_file, c); + } + + set_cluster (c, bb1, bb2); + return true; +} + +/* Returns whether for all phis in DEST the phi alternatives for E1 and + E2 are equal. SAME_PREDS indicates whether BB1 and BB2 have the same + predecessors. */ + +static bool +same_phi_alternatives_1 (basic_block dest, edge e1, edge e2, bool same_preds) +{ + int n1 = e1->dest_idx, n2 = e2->dest_idx; + basic_block bb1 = e1->src, bb2 = e2->src; + gimple_stmt_iterator gsi; + + for (gsi = gsi_start_phis (dest); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple phi = gsi_stmt (gsi); + tree lhs = gimple_phi_result (phi); + tree val1 = gimple_phi_arg_def (phi, n1); + tree val2 = gimple_phi_arg_def (phi, n2); + + if (VOID_TYPE_P (TREE_TYPE (lhs))) + continue; + + if (operand_equal_for_phi_arg_p (val1, val2)) + continue; + if (gvn_uses_equal (val1, val2, bb1, bb2, same_preds)) + continue; + + return false; + } + + return true; +} + +/* Returns whether for all successors of BB1 and BB2 (members of SAME_SUCC), the + phi alternatives for BB1 and BB2 are equal. SAME_PREDS indicates whether BB1 + and BB2 have the same predecessors. */ + +static bool +same_phi_alternatives (same_succ_t same_succ, basic_block bb1, basic_block bb2, + bool same_preds) +{ + unsigned int s; + bitmap_iterator bs; + edge e1, e2; + basic_block succ; + + EXECUTE_IF_SET_IN_BITMAP (same_succ->succs, 0, s, bs) + { + succ = BASIC_BLOCK (s); + e1 = find_edge (bb1, succ); + e2 = find_edge (bb2, succ); + if (e1->flags & EDGE_COMPLEX + || e2->flags & EDGE_COMPLEX) + return false; + + /* For all phis in bb, the phi alternatives for e1 and e2 need to have + the same value. */ + if (!same_phi_alternatives_1 (succ, e1, e2, same_preds)) + return false; + } + + return true; +} + +/* Return true if BB has non-vop phis. */ + +static bool +bb_has_non_vop_phi (basic_block bb) +{ + gimple_seq phis = phi_nodes (bb); + gimple phi; + + if (phis == NULL) + return false; + + if (!gimple_seq_singleton_p (phis)) + return true; + + phi = gimple_seq_first_stmt (phis); + return !VOID_TYPE_P (TREE_TYPE (gimple_phi_result (phi))); +} + +/* Within SAME_SUCC->bbs, find clusters of bbs which can be merged. */ + +static void +find_clusters_1 (same_succ_t same_succ) +{ + bb_cluster_t c; + basic_block bb1, bb2; + unsigned int i, j; + bitmap_iterator bi, bj; + bool same_preds, inv_cond; + int nr_comparisons; + int max_comparisons = PARAM_VALUE (PARAM_TAIL_MERGE_MAX_COMPARISONS); + + if (same_succ == NULL) + return; + + EXECUTE_IF_SET_IN_BITMAP (same_succ->bbs, 0, i, bi) + { + bb1 = BASIC_BLOCK (i); + + /* TODO: handle blocks with phi-nodes. We'll have find corresponding + phi-nodes in bb1 and bb2, with the same alternatives for the same + preds. */ + if (bb_has_non_vop_phi (bb1)) + continue; + + nr_comparisons = 0; + EXECUTE_IF_SET_IN_BITMAP (same_succ->bbs, i + 1, j, bj) + { + bb2 = BASIC_BLOCK (j); + + if (bb_has_non_vop_phi (bb2)) + continue; + + if (merge_cluster[bb1->index] != NULL + && merge_cluster[bb1->index] == merge_cluster[bb2->index]) + continue; + + /* Limit quadratic behaviour. */ + nr_comparisons++; + if (nr_comparisons > max_comparisons) + break; + + c = new_cluster (); + add_bb_to_cluster (c, bb1); + add_bb_to_cluster (c, bb2); + same_preds = same_predecessors (bb1, bb2); + inv_cond = (bitmap_bit_p (same_succ->inverse, bb1->index) + != bitmap_bit_p (same_succ->inverse, bb2->index)); + if (!(same_phi_alternatives (same_succ, bb1, bb2, + same_preds) + && find_duplicate (c, bb1, bb2, same_preds, + inv_cond))) + delete_cluster (c); + } + } +} + +/* Find clusters of bbs which can be merged. */ + +static void +find_clusters (void) +{ + same_succ_t same; + + while (!VEC_empty (same_succ_t, worklist)) + { + same = VEC_pop (same_succ_t, worklist); + same->in_worklist = false; + if (dump_file) + { + fprintf (dump_file, "processing worklist entry\n"); + same_succ_print (dump_file, same); + } + find_clusters_1 (same); + } +} + +/* Redirect all edges from BB1 to BB2, remove BB1, and insert C->phis into + BB2. */ + +static void +replace_block_by (basic_block bb1, basic_block bb2) +{ + edge pred_edge; + unsigned int i; + + delete_basic_block_same_succ (bb1); + + /* Redirect the incoming edges of bb1 to bb2. */ + for (i = EDGE_COUNT (bb1->preds); i > 0 ; --i) + { + pred_edge = EDGE_PRED (bb1, i - 1); + pred_edge = redirect_edge_and_branch (pred_edge, bb2); + gcc_assert (pred_edge != NULL); + } + + /* bb1 has no incoming edges anymore, and has become unreachable. */ + delete_basic_block (bb1); + + /* Update dominator info. */ + set_immediate_dominator (CDI_DOMINATORS, bb2, + recompute_dominator (CDI_DOMINATORS, bb2)); +} + +/* For each cluster in all_clusters, merge all cluster->bbs. Returns + number of bbs removed. */ + +static int +apply_clusters (void) +{ + basic_block bb1, bb2; + bb_cluster_t c; + unsigned int i, j; + bitmap_iterator bj; + int nr_bbs_removed = 0; + + for (i = 0; i < VEC_length (bb_cluster_t, all_clusters); ++i) + { + c = VEC_index (bb_cluster_t, all_clusters, i); + if (c == NULL) + continue; + + bb2 = BASIC_BLOCK (bitmap_first_set_bit (c->bbs)); + gcc_assert (bb2 != NULL); + + EXECUTE_IF_SET_IN_BITMAP (c->bbs, 0, j, bj) + { + bb1 = BASIC_BLOCK (j); + gcc_assert (bb1 != NULL); + if (bb1 == bb2) + continue; + + replace_block_by (bb1, bb2); + nr_bbs_removed++; + } + } + + return nr_bbs_removed; +} + +/* Resets debug statement STMT if it has uses that are not dominated by their + defs. */ + +static void +update_debug_stmt (gimple stmt) +{ + use_operand_p use_p; + ssa_op_iter oi; + basic_block bbdef, bbuse; + gimple def_stmt; + tree name; + + if (!gimple_debug_bind_p (stmt)) + return; + + bbuse = gimple_bb (stmt); + FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, oi, SSA_OP_USE) + { + name = USE_FROM_PTR (use_p); + gcc_assert (TREE_CODE (name) == SSA_NAME); + + def_stmt = SSA_NAME_DEF_STMT (name); + gcc_assert (def_stmt != NULL); + + bbdef = gimple_bb (def_stmt); + if (bbdef == NULL || bbuse == bbdef + || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)) + continue; + + gimple_debug_bind_reset_value (stmt); + update_stmt (stmt); + } +} + +/* Resets all debug statements that have uses that are not + dominated by their defs. */ + +static void +update_debug_stmts (void) +{ + int i; + basic_block bb; + + for (i = 0; i < last_basic_block; ++i) + { + gimple stmt; + gimple_stmt_iterator gsi; + + bb = BASIC_BLOCK (i); + + if (bb == NULL) + continue; + + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + stmt = gsi_stmt (gsi); + if (!is_gimple_debug (stmt)) + continue; + update_debug_stmt (stmt); + } + } +} + +/* Runs tail merge optimization. */ + +static unsigned int +tail_merge_optimize (void) +{ + int nr_bbs_removed_total = 0; + int nr_bbs_removed; + bool loop_entered = false; + int iteration_nr = 0; + + init_worklist (); + + while (!VEC_empty (same_succ_t, worklist)) + { + if (!loop_entered) + { + loop_entered = true; + calculate_dominance_info (CDI_DOMINATORS); + init_gvn (); + alloc_cluster_vectors (); + } + else + reset_cluster_vectors (); + + iteration_nr++; + if (dump_file) + fprintf (dump_file, "worklist iteration #%d\n", iteration_nr); + + find_clusters (); + gcc_assert (VEC_empty (same_succ_t, worklist)); + if (VEC_empty (bb_cluster_t, all_clusters)) + break; + + nr_bbs_removed = apply_clusters (); + nr_bbs_removed_total += nr_bbs_removed; + if (nr_bbs_removed == 0) + break; + + update_worklist (); + } + + if (nr_bbs_removed_total > 0) + { + update_debug_stmts (); + + /* Mark vops for updating. Without this, TODO_update_ssa_only_virtuals + won't do anything. */ + mark_sym_for_renaming (gimple_vop (cfun)); + + if (dump_file) + { + fprintf (dump_file, "Before TODOs.\n"); + dump_function_to_file (current_function_decl, dump_file, dump_flags); + } + } + + delete_worklist (); + if (loop_entered) + { + free_dominance_info (CDI_DOMINATORS); + delete_gvn (); + delete_cluster_vectors (); + } + + return 0; +} + +/* Returns true if tail merge pass should be run. */ + +static bool +gate_tail_merge (void) +{ + return flag_tree_tail_merge; +} + +struct gimple_opt_pass pass_tail_merge = +{ + { + GIMPLE_PASS, + "tailmerge", /* name */ + gate_tail_merge, /* gate */ + tail_merge_optimize, /* execute */ + NULL, /* sub */ + NULL, /* next */ + 0, /* static_pass_number */ + TV_TREE_TAIL_MERGE, /* tv_id */ + PROP_ssa | PROP_cfg, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_verify_ssa | TODO_verify_stmts + | TODO_verify_flow | TODO_update_ssa_only_virtuals + | TODO_rebuild_alias + | TODO_cleanup_cfg | TODO_dump_func /* todo_flags_finish */ + } +}; Index: gcc/tree-pass.h =================================================================== --- gcc/tree-pass.h (revision 175801) +++ gcc/tree-pass.h (working copy) @@ -447,6 +447,7 @@ extern struct gimple_opt_pass pass_trace extern struct gimple_opt_pass pass_warn_unused_result; extern struct gimple_opt_pass pass_split_functions; extern struct gimple_opt_pass pass_feedback_split_functions; +extern struct gimple_opt_pass pass_tail_merge; /* IPA Passes */ extern struct simple_ipa_opt_pass pass_ipa_lower_emutls; Index: gcc/opts.c =================================================================== --- gcc/opts.c (revision 175801) +++ gcc/opts.c (working copy) @@ -484,6 +484,7 @@ static const struct default_options defa { OPT_LEVELS_2_PLUS, OPT_falign_jumps, NULL, 1 }, { OPT_LEVELS_2_PLUS, OPT_falign_labels, NULL, 1 }, { OPT_LEVELS_2_PLUS, OPT_falign_functions, NULL, 1 }, + { OPT_LEVELS_2_PLUS, OPT_ftree_tail_merge, NULL, 1 }, /* -O3 optimizations. */ { OPT_LEVELS_3_PLUS, OPT_ftree_loop_distribute_patterns, NULL, 1 }, Index: gcc/timevar.def =================================================================== --- gcc/timevar.def (revision 175801) +++ gcc/timevar.def (working copy) @@ -127,6 +127,7 @@ DEFTIMEVAR (TV_TREE_GIMPLIFY , "tre DEFTIMEVAR (TV_TREE_EH , "tree eh") DEFTIMEVAR (TV_TREE_CFG , "tree CFG construction") DEFTIMEVAR (TV_TREE_CLEANUP_CFG , "tree CFG cleanup") +DEFTIMEVAR (TV_TREE_TAIL_MERGE , "tree tail merge") DEFTIMEVAR (TV_TREE_VRP , "tree VRP") DEFTIMEVAR (TV_TREE_COPY_PROP , "tree copy propagation") DEFTIMEVAR (TV_FIND_REFERENCED_VARS , "tree find ref. vars") Index: gcc/common.opt =================================================================== --- gcc/common.opt (revision 175801) +++ gcc/common.opt (working copy) @@ -1937,6 +1937,10 @@ ftree-dominator-opts Common Report Var(flag_tree_dom) Optimization Enable dominator optimizations +ftree-tail-merge +Common Report Var(flag_tree_tail_merge) Optimization +Enable tail merging on trees + ftree-dse Common Report Var(flag_tree_dse) Optimization Enable dead store elimination Index: gcc/Makefile.in =================================================================== --- gcc/Makefile.in (revision 175801) +++ gcc/Makefile.in (working copy) @@ -1466,6 +1466,7 @@ OBJS = \ tree-ssa-sccvn.o \ tree-ssa-sink.o \ tree-ssa-structalias.o \ + tree-ssa-tail-merge.o \ tree-ssa-ter.o \ tree-ssa-threadedge.o \ tree-ssa-threadupdate.o \ @@ -2427,6 +2428,13 @@ stor-layout.o : stor-layout.c $(CONFIG_H $(TREE_H) $(PARAMS_H) $(FLAGS_H) $(FUNCTION_H) $(EXPR_H) output.h $(RTL_H) \ $(GGC_H) $(TM_P_H) $(TARGET_H) langhooks.h $(REGS_H) gt-stor-layout.h \ $(DIAGNOSTIC_CORE_H) $(CGRAPH_H) $(TREE_INLINE_H) $(TREE_DUMP_H) $(GIMPLE_H) +tree-ssa-tail-merge.o: tree-ssa-tail-merge.c \ + $(SYSTEM_H) $(CONFIG_H) coretypes.h $(TM_H) $(BITMAP_H) \ + $(FLAGS_H) $(TM_P_H) $(BASIC_BLOCK_H) output.h \ + $(TREE_H) $(TREE_FLOW_H) $(TREE_INLINE_H) \ + $(GIMPLE_H) $(FUNCTION_H) \ + $(TREE_PASS_H) $(TIMEVAR_H) tree-ssa-sccvn.h \ + $(CGRAPH_H) gimple-pretty-print.h tree-pretty-print.h $(PARAMS_H) tree-ssa-structalias.o: tree-ssa-structalias.c \ $(SYSTEM_H) $(CONFIG_H) coretypes.h $(TM_H) $(GGC_H) $(OBSTACK_H) $(BITMAP_H) \ $(FLAGS_H) $(TM_P_H) $(BASIC_BLOCK_H) output.h \ Index: gcc/passes.c =================================================================== --- gcc/passes.c (revision 175801) +++ gcc/passes.c (working copy) @@ -1292,6 +1292,7 @@ init_optimization_passes (void) NEXT_PASS (pass_fre); NEXT_PASS (pass_copy_prop); NEXT_PASS (pass_merge_phi); + NEXT_PASS (pass_tail_merge); NEXT_PASS (pass_vrp); NEXT_PASS (pass_dce); NEXT_PASS (pass_cselim); Index: gcc/params.def =================================================================== --- gcc/params.def (revision 175801) +++ gcc/params.def (working copy) @@ -892,6 +892,11 @@ DEFPARAM (PARAM_MAX_STORES_TO_SINK, "Maximum number of conditional store pairs that can be sunk", 2, 0, 0) +DEFPARAM (PARAM_TAIL_MERGE_MAX_COMPARISONS, + "tail-merge-max-comparisons", + "Maximum amount of similar bbs to compare bb with", + 10, 0, 0) + /* Local variables: