2011-01-15 H.J. Lu PR target/46519 * config/i386/i386.c: Include sbitmap.h and fibheap.h. (block_info): Add scanned and prev. (move_or_delete_vzeroupper_2): Return if the basic block has been scanned and the upper 128bit state is unchanged from the last scan. (move_or_delete_vzeroupper_1): Return true if the exit state is changed. (move_or_delete_vzeroupper): Visit basic blocks using the work-list based algorithm based on vt_find_locations in var-tracking.c. * config/i386/t-i386: Also depend on sbitmap.h and $(FIBHEAP_H). diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index a26314b..450cddf 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -56,6 +56,8 @@ along with GCC; see the file COPYING3. If not see #include "debug.h" #include "dwarf2out.h" #include "sched-int.h" +#include "sbitmap.h" +#include "fibheap.h" enum upper_128bits_state { @@ -73,6 +75,10 @@ typedef struct block_info_def bool unchanged; /* TRUE if block has been processed. */ bool processed; + /* TRUE if block has been scanned. */ + bool scanned; + /* Previous state of the upper 128bits of AVX registers at entry. */ + enum upper_128bits_state prev; } *block_info; #define BLOCK_INFO(B) ((block_info) (B)->aux) @@ -135,6 +141,16 @@ move_or_delete_vzeroupper_2 (basic_block bb, return; } + if (BLOCK_INFO (bb)->scanned && BLOCK_INFO (bb)->prev == state) + { + if (dump_file) + fprintf (dump_file, " [bb %i] scanned: upper 128bits: %d\n", + bb->index, BLOCK_INFO (bb)->state); + return; + } + + BLOCK_INFO (bb)->prev = state; + if (dump_file) fprintf (dump_file, " [bb %i] entry: upper 128bits: %d\n", bb->index, state); @@ -264,6 +280,7 @@ move_or_delete_vzeroupper_2 (basic_block bb, BLOCK_INFO (bb)->state = state; BLOCK_INFO (bb)->unchanged = unchanged; + BLOCK_INFO (bb)->scanned = true; if (dump_file) fprintf (dump_file, " [bb %i] exit: %s: upper 128bits: %d\n", @@ -273,9 +290,10 @@ move_or_delete_vzeroupper_2 (basic_block bb, /* Helper function for move_or_delete_vzeroupper. Process vzeroupper in BLOCK and check its predecessor blocks. Treat UNKNOWN state - as USED if UNKNOWN_IS_UNUSED is true. */ + as USED if UNKNOWN_IS_UNUSED is true. Return TRUE if the exit + state is changed. */ -static void +static bool move_or_delete_vzeroupper_1 (basic_block block, bool unknown_is_unused) { edge e; @@ -288,7 +306,7 @@ move_or_delete_vzeroupper_1 (basic_block block, bool unknown_is_unused) block->index, BLOCK_INFO (block)->processed); if (BLOCK_INFO (block)->processed) - return; + return false; state = unused; @@ -324,8 +342,14 @@ done: /* Need to rescan if the upper 128bits of AVX registers are changed to USED at exit. */ - if (new_state != old_state && new_state == used) - cfun->machine->rescan_vzeroupper_p = 1; + if (new_state != old_state) + { + if (new_state == used) + cfun->machine->rescan_vzeroupper_p = 1; + return true; + } + else + return false; } /* Go through the instruction stream looking for vzeroupper. Delete @@ -338,14 +362,18 @@ move_or_delete_vzeroupper (void) edge e; edge_iterator ei; basic_block bb; - unsigned int count; + fibheap_t worklist, pending, fibheap_swap; + sbitmap visited, in_worklist, in_pending, sbitmap_swap; + int *bb_order; + int *rc_order; + int i; /* Set up block info for each basic block. */ alloc_aux_for_blocks (sizeof (struct block_info_def)); - /* Process successor blocks of all entry points. */ + /* Process outgoing edges of entry point. */ if (dump_file) - fprintf (dump_file, "Process all entry points\n"); + fprintf (dump_file, "Process outgoing edges of entry point\n"); FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR->succs) { @@ -355,25 +383,102 @@ move_or_delete_vzeroupper (void) BLOCK_INFO (e->dest)->processed = true; } - /* Process all basic blocks. */ - count = 0; - do + /* Compute reverse completion order of depth first search of the CFG + so that the data-flow runs faster. */ + rc_order = XNEWVEC (int, n_basic_blocks - NUM_FIXED_BLOCKS); + bb_order = XNEWVEC (int, last_basic_block); + pre_and_rev_post_order_compute (NULL, rc_order, false); + for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++) + bb_order[rc_order[i]] = i; + free (rc_order); + + worklist = fibheap_new (); + pending = fibheap_new (); + visited = sbitmap_alloc (last_basic_block); + in_worklist = sbitmap_alloc (last_basic_block); + in_pending = sbitmap_alloc (last_basic_block); + sbitmap_zero (in_worklist); + + /* Don't check outgoing edges of entry point. */ + sbitmap_ones (in_pending); + FOR_EACH_BB (bb) + if (BLOCK_INFO (bb)->processed) + RESET_BIT (in_pending, bb->index); + else + { + move_or_delete_vzeroupper_1 (bb, false); + fibheap_insert (pending, bb_order[bb->index], bb); + } + + if (dump_file) + fprintf (dump_file, "Check remaining basic blocks\n"); + + while (!fibheap_empty (pending)) { - if (dump_file) - fprintf (dump_file, "Process all basic blocks: trip %d\n", - count); + fibheap_swap = pending; + pending = worklist; + worklist = fibheap_swap; + sbitmap_swap = in_pending; + in_pending = in_worklist; + in_worklist = sbitmap_swap; + + sbitmap_zero (visited); + cfun->machine->rescan_vzeroupper_p = 0; - FOR_EACH_BB (bb) - move_or_delete_vzeroupper_1 (bb, false); + + while (!fibheap_empty (worklist)) + { + bb = (basic_block) fibheap_extract_min (worklist); + RESET_BIT (in_worklist, bb->index); + gcc_assert (!TEST_BIT (visited, bb->index)); + if (!TEST_BIT (visited, bb->index)) + { + edge_iterator ei; + + SET_BIT (visited, bb->index); + + if (move_or_delete_vzeroupper_1 (bb, false)) + FOR_EACH_EDGE (e, ei, bb->succs) + { + if (e->dest == EXIT_BLOCK_PTR + || BLOCK_INFO (e->dest)->processed) + continue; + + if (TEST_BIT (visited, e->dest->index)) + { + if (!TEST_BIT (in_pending, e->dest->index)) + { + /* Send E->DEST to next round. */ + SET_BIT (in_pending, e->dest->index); + fibheap_insert (pending, + bb_order[e->dest->index], + e->dest); + } + } + else if (!TEST_BIT (in_worklist, e->dest->index)) + { + /* Add E->DEST to current round. */ + SET_BIT (in_worklist, e->dest->index); + fibheap_insert (worklist, bb_order[e->dest->index], + e->dest); + } + } + } + } + + if (!cfun->machine->rescan_vzeroupper_p) + break; } - while (cfun->machine->rescan_vzeroupper_p && count++ < 20); - /* FIXME: Is 20 big enough? */ - if (count >= 20) - gcc_unreachable (); + free (bb_order); + fibheap_delete (worklist); + fibheap_delete (pending); + sbitmap_free (visited); + sbitmap_free (in_worklist); + sbitmap_free (in_pending); if (dump_file) - fprintf (dump_file, "Process all basic blocks\n"); + fprintf (dump_file, "Process remaining basic blocks\n"); FOR_EACH_BB (bb) move_or_delete_vzeroupper_1 (bb, true); diff --git a/gcc/config/i386/t-i386 b/gcc/config/i386/t-i386 index 6c801a5..1c658a1 100644 --- a/gcc/config/i386/t-i386 +++ b/gcc/config/i386/t-i386 @@ -23,7 +23,7 @@ i386.o: $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ $(RECOG_H) $(EXPR_H) $(OPTABS_H) toplev.h $(BASIC_BLOCK_H) \ $(GGC_H) $(TARGET_H) $(TARGET_DEF_H) langhooks.h $(CGRAPH_H) \ $(TREE_GIMPLE_H) $(DWARF2_H) $(DF_H) tm-constrs.h $(PARAMS_H) \ - i386-builtin-types.inc debug.h dwarf2out.h + i386-builtin-types.inc debug.h dwarf2out.h sbitmap.h $(FIBHEAP_H) i386-c.o: $(srcdir)/config/i386/i386-c.c \ $(srcdir)/config/i386/i386-protos.h $(CONFIG_H) $(SYSTEM_H) coretypes.h \ diff --git a/gcc/testsuite/ChangeLog.vzero b/gcc/testsuite/ChangeLog.vzero new file mode 100644 index 0000000..fedac70 --- /dev/null +++ b/gcc/testsuite/ChangeLog.vzero @@ -0,0 +1,115 @@ +2010-12-17 H.J. Lu + + * gfortran.dg/pr46519-2.f90: New. + +2010-11-24 H.J. Lu + + * gfortran.dg/pr46519-1.f: Replace -mtune=generic with + -mvzeroupper. + +2010-11-24 H.J. Lu + + * gfortran.dg/pr46519-1.f: New. + +2010-11-20 H.J. Lu + + * gcc.target/i386/avx-vzeroupper-14.c: Replace -O0 with -O2. + * gcc.target/i386/avx-vzeroupper-15.c: Likewise. + * gcc.target/i386/avx-vzeroupper-16.c: Likewise. + * gcc.target/i386/avx-vzeroupper-17.c: Likewise. + + * gcc.target/i386/avx-vzeroupper-25.c: New. + * gcc.target/i386/avx-vzeroupper-26.c: Likewise. + +2010-11-18 H.J. Lu + + * gcc.target/i386/avx-vzeroupper-24.c: New. + +2010-11-18 H.J. Lu + + * gcc.target/i386/avx-vzeroupper-21.c: New. + * gcc.target/i386/avx-vzeroupper-22.c: Likewise. + * gcc.target/i386/avx-vzeroupper-23.c: Likewise. + +2010-11-17 H.J. Lu + + * gcc.target/i386/avx-vzeroupper-10.c: Expect no avx_vzeroupper. + * gcc.target/i386/avx-vzeroupper-11.c: Likewise. + +2010-11-16 H.J. Lu + + * gcc.target/i386/avx-vzeroupper-20.c: New. + +2010-11-04 H.J. Lu + + * gcc.target/i386/avx-vzeroupper-19.c: New. + +2010-11-03 H.J. Lu + + PR target/46295 + * gcc.target/i386/pr46295.c: New. + +2010-11-03 H.J. Lu + + * gcc.target/i386/pr46285.c: Remove -dp. + +2010-11-02 H.J. Lu + + PR target/46285 + * gcc.target/i386/pr46285.c. + +2010-11-02 H.J. Lu + + PR target/46253 + * gcc.target/i386/pr46253.c: New. + +2010-11-02 H.J. Lu + + * gcc.target/i386/avx-vzeroupper-16.c: New. + * gcc.target/i386/avx-vzeroupper-17.c: Likewise. + * gcc.target/i386/avx-vzeroupper-18.c: Likewise. + +2010-11-02 H.J. Lu + + * gcc.target/i386/avx-vzeroupper-15.c: New. + +2010-10-26 H.J. Lu + + * gcc.target/i386/avx-vzeroupper-4.c: Don't scan + avx_vzeroupper_nop. Scan avx_vzeroupper instead of + *avx_vzeroupper. + * gcc.target/i386/avx-vzeroupper-10.c: Likewise. + * gcc.target/i386/avx-vzeroupper-11.c: Likewise. + * gcc.target/i386/avx-vzeroupper-12.c: Likewise. + * gcc.target/i386/avx-vzeroupper-13.c: Likewise. + * gcc.target/i386/avx-vzeroupper-14.c: Likewise. + +2010-10-19 H.J. Lu + + * gcc.target/i386/avx-vzeroupper-1.c: Add -mtune=generic. + * gcc.target/i386/avx-vzeroupper-1.c: Likewise. + + * gcc.target/i386/avx-vzeroupper-13.c: New. + * gcc.target/i386/avx-vzeroupper-14.c: Likewise. + +2010-10-12 H.J. Lu + + * gcc.target/i386/avx-vzeroupper-9.c: New. + * gcc.target/i386/avx-vzeroupper-10.c: Likewise. + * gcc.target/i386/avx-vzeroupper-11.c: Likewise. + * gcc.target/i386/avx-vzeroupper-12.c: Likewise. + +2010-10-12 H.J. Lu + + * gcc.target/i386/avx-vzeroupper-5.c: New. + * gcc.target/i386/avx-vzeroupper-6.c: Likewise. + * gcc.target/i386/avx-vzeroupper-7.c: Likewise. + * gcc.target/i386/avx-vzeroupper-8.c: Likewise. + +2010-10-12 H.J. Lu + + * gcc.target/i386/avx-vzeroupper-4.c: New. + +2010-06-15 H.J. Lu + + * gcc.target/i386/avx-vzeroupper-3.c: New.