2011-01-12 H.J. Lu PR target/46519 * config/i386/i386.c: Include sbitmap.h and fibheap.h. (move_or_delete_vzeroupper): Visit basic blocks using the work-list based algorithm based on vt_find_locations in var-tracking.c. * config/i386/t-i386: Also depend on sbitmap.h and $(FIBHEAP_H). diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index a26314b..5afc1ae 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -56,6 +56,8 @@ along with GCC; see the file COPYING3. If not see #include "debug.h" #include "dwarf2out.h" #include "sched-int.h" +#include "sbitmap.h" +#include "fibheap.h" enum upper_128bits_state { @@ -338,14 +340,18 @@ move_or_delete_vzeroupper (void) edge e; edge_iterator ei; basic_block bb; - unsigned int count; + fibheap_t worklist, pending, fibheap_swap; + sbitmap visited, in_worklist, in_pending, sbitmap_swap; + int *bb_order; + int *rc_order; + int i; /* Set up block info for each basic block. */ alloc_aux_for_blocks (sizeof (struct block_info_def)); - /* Process successor blocks of all entry points. */ + /* Process outgoing edges of entry point. */ if (dump_file) - fprintf (dump_file, "Process all entry points\n"); + fprintf (dump_file, "Process outgoing edges of entry point\n"); FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR->succs) { @@ -355,25 +361,100 @@ move_or_delete_vzeroupper (void) BLOCK_INFO (e->dest)->processed = true; } - /* Process all basic blocks. */ - count = 0; - do + /* Compute reverse completion order of depth first search of the CFG + so that the data-flow runs faster. */ + rc_order = XNEWVEC (int, n_basic_blocks - NUM_FIXED_BLOCKS); + bb_order = XNEWVEC (int, last_basic_block); + pre_and_rev_post_order_compute (NULL, rc_order, false); + for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++) + bb_order[rc_order[i]] = i; + free (rc_order); + + worklist = fibheap_new (); + pending = fibheap_new (); + visited = sbitmap_alloc (last_basic_block); + in_worklist = sbitmap_alloc (last_basic_block); + in_pending = sbitmap_alloc (last_basic_block); + sbitmap_zero (in_worklist); + + /* Don't check outgoing edges of entry point. */ + sbitmap_ones (in_pending); + FOR_EACH_BB (bb) + if (BLOCK_INFO (bb)->processed) + RESET_BIT (in_pending, bb->index); + else + fibheap_insert (pending, bb_order[bb->index], bb); + + if (dump_file) + fprintf (dump_file, "Check remaining basic blocks\n"); + + while (!fibheap_empty (pending)) { - if (dump_file) - fprintf (dump_file, "Process all basic blocks: trip %d\n", - count); + fibheap_swap = pending; + pending = worklist; + worklist = fibheap_swap; + sbitmap_swap = in_pending; + in_pending = in_worklist; + in_worklist = sbitmap_swap; + + sbitmap_zero (visited); + cfun->machine->rescan_vzeroupper_p = 0; - FOR_EACH_BB (bb) - move_or_delete_vzeroupper_1 (bb, false); + + while (!fibheap_empty (worklist)) + { + bb = (basic_block) fibheap_extract_min (worklist); + RESET_BIT (in_worklist, bb->index); + gcc_assert (!TEST_BIT (visited, bb->index)); + if (!TEST_BIT (visited, bb->index)) + { + edge_iterator ei; + + SET_BIT (visited, bb->index); + + move_or_delete_vzeroupper_1 (bb, false); + + FOR_EACH_EDGE (e, ei, bb->succs) + { + if (e->dest == EXIT_BLOCK_PTR + || BLOCK_INFO (e->dest)->processed) + continue; + + if (TEST_BIT (visited, e->dest->index)) + { + if (!TEST_BIT (in_pending, e->dest->index)) + { + /* Send E->DEST to next round. */ + SET_BIT (in_pending, e->dest->index); + fibheap_insert (pending, + bb_order[e->dest->index], + e->dest); + } + } + else if (!TEST_BIT (in_worklist, e->dest->index)) + { + /* Add E->DEST to current round. */ + SET_BIT (in_worklist, e->dest->index); + fibheap_insert (worklist, bb_order[e->dest->index], + e->dest); + } + } + } + } + + if (!cfun->machine->rescan_vzeroupper_p) + break; } - while (cfun->machine->rescan_vzeroupper_p && count++ < 20); - /* FIXME: Is 20 big enough? */ - if (count >= 20) - gcc_unreachable (); + free (bb_order); + fibheap_delete (worklist); + fibheap_delete (pending); + sbitmap_free (visited); + sbitmap_free (in_worklist); + sbitmap_free (in_pending); if (dump_file) - fprintf (dump_file, "Process all basic blocks\n"); + fprintf (dump_file, "Process remaining basic blocks\n"); FOR_EACH_BB (bb) move_or_delete_vzeroupper_1 (bb, true); diff --git a/gcc/config/i386/t-i386 b/gcc/config/i386/t-i386 index 6c801a5..1c658a1 100644 --- a/gcc/config/i386/t-i386 +++ b/gcc/config/i386/t-i386 @@ -23,7 +23,7 @@ i386.o: $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ $(RECOG_H) $(EXPR_H) $(OPTABS_H) toplev.h $(BASIC_BLOCK_H) \ $(GGC_H) $(TARGET_H) $(TARGET_DEF_H) langhooks.h $(CGRAPH_H) \ $(TREE_GIMPLE_H) $(DWARF2_H) $(DF_H) tm-constrs.h $(PARAMS_H) \ - i386-builtin-types.inc debug.h dwarf2out.h + i386-builtin-types.inc debug.h dwarf2out.h sbitmap.h $(FIBHEAP_H) i386-c.o: $(srcdir)/config/i386/i386-c.c \ $(srcdir)/config/i386/i386-protos.h $(CONFIG_H) $(SYSTEM_H) coretypes.h \