From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 16583 invoked by alias); 12 Mar 2013 15:25:39 -0000 Received: (qmail 16569 invoked by uid 22791); 12 Mar 2013 15:25:38 -0000 X-SWARE-Spam-Status: No, hits=-6.8 required=5.0 tests=AWL,BAYES_00,KHOP_RCVD_UNTRUST,RCVD_IN_DNSWL_HI,RP_MATCHES_RCVD X-Spam-Check-By: sourceware.org Received: from cantor2.suse.de (HELO mx2.suse.de) (195.135.220.15) by sourceware.org (qpsmtpd/0.43rc1) with ESMTP; Tue, 12 Mar 2013 15:25:14 +0000 Received: from relay1.suse.de (unknown [195.135.220.254]) by mx2.suse.de (Postfix) with ESMTP id E416EA0FED for ; Tue, 12 Mar 2013 16:25:12 +0100 (CET) Date: Tue, 12 Mar 2013 15:25:00 -0000 From: Richard Biener To: gcc-patches@gcc.gnu.org Subject: [PATCH][6/n] tree LIM TLC Message-ID: User-Agent: Alpine 2.00 (LNX 1167 2008-08-23) MIME-Version: 1.0 Content-Type: TEXT/PLAIN; charset=US-ASCII Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org X-SW-Source: 2013-03/txt/msg00437.txt.bz2 (Un-?)surprisingly the most effective compile-time reduction for the testcase in PR39326 is to employ ao_ref caching for alias oracle queries and caching of expanded affine-combinations for affine disambiguations. This reduces compile-time to a manageable amount in the first place for me (so I'm sending it "late" in the series). Bootstrap and regtest scheduled on x86_64-unknown-linux-gnu, queued for 4.9. Richard. 2013-03-12 Richard Biener PR tree-optimization/39326 * tree-ssa-loop-im.c (struct mem_ref): Replace mem member with an ao_ref typed one. Add affine-combination cache members. (MEM_ANALYZABLE): Adjust. (memref_eq): Likewise. (mem_ref_alloc): Likewise. (gather_mem_refs_stmt): Likewise. (execute_sm_if_changed_flag_set): Likewise. (execute_sm): Likewise. (ref_always_accessed_p): Likewise. (refs_independent_p): Likewise. (can_sm_ref_p): Likewise. (mem_refs_may_alias_p): Use ao_ref members to query the oracle. Cache expanded affine combinations. Index: trunk/gcc/tree-ssa-loop-im.c =================================================================== *** trunk.orig/gcc/tree-ssa-loop-im.c 2013-03-12 15:11:12.000000000 +0100 --- trunk/gcc/tree-ssa-loop-im.c 2013-03-12 16:20:49.115169595 +0100 *************** typedef struct mem_ref_locs *** 117,126 **** typedef struct mem_ref { - tree mem; /* The memory itself. */ unsigned id; /* ID assigned to the memory reference (its index in memory_accesses.refs_list) */ hashval_t hash; /* Its hash value. */ bitmap stored; /* The set of loops in that this memory location is stored to. */ vec accesses_in_loop; --- 117,130 ---- typedef struct mem_ref { unsigned id; /* ID assigned to the memory reference (its index in memory_accesses.refs_list) */ hashval_t hash; /* Its hash value. */ + + /* The memory access itself and associated caching of alias-oracle + query meta-data. */ + ao_ref mem; /* The ao_ref of this memory access. */ + bitmap stored; /* The set of loops in that this memory location is stored to. */ vec accesses_in_loop; *************** typedef struct mem_ref *** 142,147 **** --- 146,155 ---- bitmap indep_ref; /* The set of memory references on that this reference is independent. */ bitmap dep_ref; /* The complement of INDEP_REF. */ + + /* The expanded affine combination of this memory access. */ + aff_tree aff_off; + double_int aff_size; } *mem_ref_p; *************** static bool ref_indep_loop_p (struct loo *** 186,192 **** #define SET_ALWAYS_EXECUTED_IN(BB, VAL) ((BB)->aux = (void *) (VAL)) /* Whether the reference was analyzable. */ ! #define MEM_ANALYZABLE(REF) ((REF)->mem != error_mark_node) static struct lim_aux_data * init_lim_data (gimple stmt) --- 194,200 ---- #define SET_ALWAYS_EXECUTED_IN(BB, VAL) ((BB)->aux = (void *) (VAL)) /* Whether the reference was analyzable. */ ! #define MEM_ANALYZABLE(REF) ((REF)->mem.ref != error_mark_node) static struct lim_aux_data * init_lim_data (gimple stmt) *************** memref_eq (const void *obj1, const void *** 1435,1441 **** { const struct mem_ref *const mem1 = (const struct mem_ref *) obj1; ! return operand_equal_p (mem1->mem, (const_tree) obj2, 0); } /* Releases list of memory reference locations ACCS. */ --- 1443,1449 ---- { const struct mem_ref *const mem1 = (const struct mem_ref *) obj1; ! return operand_equal_p (mem1->mem.ref, (const_tree) obj2, 0); } /* Releases list of memory reference locations ACCS. */ *************** static mem_ref_p *** 1477,1483 **** mem_ref_alloc (tree mem, unsigned hash, unsigned id) { mem_ref_p ref = XNEW (struct mem_ref); ! ref->mem = mem; ref->id = id; ref->hash = hash; ref->stored = BITMAP_ALLOC (&lim_bitmap_obstack); --- 1485,1491 ---- mem_ref_alloc (tree mem, unsigned hash, unsigned id) { mem_ref_p ref = XNEW (struct mem_ref); ! ao_ref_init (&ref->mem, mem); ref->id = id; ref->hash = hash; ref->stored = BITMAP_ALLOC (&lim_bitmap_obstack); *************** mem_ref_alloc (tree mem, unsigned hash, *** 1487,1492 **** --- 1495,1502 ---- ref->dep_ref = BITMAP_ALLOC (&lim_bitmap_obstack); ref->accesses_in_loop.create (0); + ref->aff_off.type = NULL_TREE; + return ref; } *************** gather_mem_refs_stmt (struct loop *loop, *** 1586,1592 **** if (dump_file && (dump_flags & TDF_DETAILS)) { fprintf (dump_file, "Memory reference %u: ", id); ! print_generic_expr (dump_file, ref->mem, TDF_SLIM); fprintf (dump_file, "\n"); } } --- 1596,1602 ---- if (dump_file && (dump_flags & TDF_DETAILS)) { fprintf (dump_file, "Memory reference %u: ", id); ! print_generic_expr (dump_file, ref->mem.ref, TDF_SLIM); fprintf (dump_file, "\n"); } } *************** analyze_memory_references (struct loop * *** 1638,1653 **** tree_to_aff_combination_expand. */ static bool ! mem_refs_may_alias_p (tree mem1, tree mem2, struct pointer_map_t **ttae_cache) { /* Perform BASE + OFFSET analysis -- if MEM1 and MEM2 are based on the same object and their offset differ in such a way that the locations cannot overlap, then they cannot alias. */ - double_int size1, size2; - aff_tree off1, off2; /* Perform basic offset and type-based disambiguation. */ ! if (!refs_may_alias_p (mem1, mem2)) return false; /* The expansion of addresses may be a bit expensive, thus we only do --- 1648,1661 ---- tree_to_aff_combination_expand. */ static bool ! mem_refs_may_alias_p (mem_ref_p mem1, mem_ref_p mem2) { /* Perform BASE + OFFSET analysis -- if MEM1 and MEM2 are based on the same object and their offset differ in such a way that the locations cannot overlap, then they cannot alias. */ /* Perform basic offset and type-based disambiguation. */ ! if (!refs_may_alias_p_1 (&mem1->mem, &mem2->mem, true)) return false; /* The expansion of addresses may be a bit expensive, thus we only do *************** mem_refs_may_alias_p (tree mem1, tree me *** 1655,1668 **** if (optimize < 2) return true; ! get_inner_reference_aff (mem1, &off1, &size1); ! get_inner_reference_aff (mem2, &off2, &size2); ! aff_combination_expand (&off1, ttae_cache); ! aff_combination_expand (&off2, ttae_cache); ! aff_combination_scale (&off1, double_int_minus_one); ! aff_combination_add (&off2, &off1); ! if (aff_comb_cannot_overlap_p (&off2, size1, size2)) return false; return true; --- 1663,1686 ---- if (optimize < 2) return true; ! if (mem1->aff_off.type == NULL_TREE) ! { ! get_inner_reference_aff (mem1->mem.ref, &mem1->aff_off, &mem1->aff_size); ! aff_combination_expand (&mem1->aff_off, &memory_accesses.ttae_cache); ! gcc_assert (mem1->aff_off.type != NULL_TREE); ! } ! if (mem2->aff_off.type == NULL_TREE) ! { ! get_inner_reference_aff (mem2->mem.ref, &mem2->aff_off, &mem2->aff_size); ! aff_combination_expand (&mem2->aff_off, &memory_accesses.ttae_cache); ! gcc_assert (mem2->aff_off.type != NULL_TREE); ! } ! ! aff_tree tem = mem1->aff_off; ! aff_combination_scale (&tem, double_int_minus_one); ! aff_combination_add (&tem, &mem2->aff_off); ! if (aff_comb_cannot_overlap_p (&tem, mem1->aff_size, mem2->aff_size)) return false; return true; *************** execute_sm_if_changed_flag_set (struct l *** 1987,1993 **** mem_ref_loc_p loc; tree flag; vec locs = vNULL; ! char *str = get_lsm_tmp_name (ref->mem, ~0); lsm_tmp_name_add ("_flag"); flag = create_tmp_reg (boolean_type_node, str); --- 2005,2011 ---- mem_ref_loc_p loc; tree flag; vec locs = vNULL; ! char *str = get_lsm_tmp_name (ref->mem.ref, ~0); lsm_tmp_name_add ("_flag"); flag = create_tmp_reg (boolean_type_node, str); *************** execute_sm (struct loop *loop, vec *** 2029,2044 **** if (dump_file && (dump_flags & TDF_DETAILS)) { fprintf (dump_file, "Executing store motion of "); ! print_generic_expr (dump_file, ref->mem, 0); fprintf (dump_file, " from loop %d\n", loop->num); } ! tmp_var = create_tmp_reg (TREE_TYPE (ref->mem), ! get_lsm_tmp_name (ref->mem, ~0)); fmt_data.loop = loop; fmt_data.orig_loop = loop; ! for_each_index (&ref->mem, force_move_till, &fmt_data); if (block_in_transaction (loop_preheader_edge (loop)->src) || !PARAM_VALUE (PARAM_ALLOW_STORE_DATA_RACES)) --- 2047,2062 ---- if (dump_file && (dump_flags & TDF_DETAILS)) { fprintf (dump_file, "Executing store motion of "); ! print_generic_expr (dump_file, ref->mem.ref, 0); fprintf (dump_file, " from loop %d\n", loop->num); } ! tmp_var = create_tmp_reg (TREE_TYPE (ref->mem.ref), ! get_lsm_tmp_name (ref->mem.ref, ~0)); fmt_data.loop = loop; fmt_data.orig_loop = loop; ! for_each_index (&ref->mem.ref, force_move_till, &fmt_data); if (block_in_transaction (loop_preheader_edge (loop)->src) || !PARAM_VALUE (PARAM_ALLOW_STORE_DATA_RACES)) *************** execute_sm (struct loop *loop, vec *** 2056,2062 **** /* FIXME/TODO: For the multi-threaded variant, we could avoid this load altogether, since the store is predicated by a flag. We could, do the load only if it was originally in the loop. */ ! load = gimple_build_assign (tmp_var, unshare_expr (ref->mem)); lim_data = init_lim_data (load); lim_data->max_loop = loop; lim_data->tgt_loop = loop; --- 2074,2080 ---- /* FIXME/TODO: For the multi-threaded variant, we could avoid this load altogether, since the store is predicated by a flag. We could, do the load only if it was originally in the loop. */ ! load = gimple_build_assign (tmp_var, unshare_expr (ref->mem.ref)); lim_data = init_lim_data (load); lim_data->max_loop = loop; lim_data->tgt_loop = loop; *************** execute_sm (struct loop *loop, vec *** 2076,2086 **** if (!multi_threaded_model_p) { gimple store; ! store = gimple_build_assign (unshare_expr (ref->mem), tmp_var); gsi_insert_on_edge (ex, store); } else ! execute_sm_if_changed (ex, ref->mem, tmp_var, store_flag); } /* Hoists memory references MEM_REFS out of LOOP. EXITS is the list of exit --- 2094,2104 ---- if (!multi_threaded_model_p) { gimple store; ! store = gimple_build_assign (unshare_expr (ref->mem.ref), tmp_var); gsi_insert_on_edge (ex, store); } else ! execute_sm_if_changed (ex, ref->mem.ref, tmp_var, store_flag); } /* Hoists memory references MEM_REFS out of LOOP. EXITS is the list of exit *************** ref_always_accessed_p (struct loop *loop *** 2114,2120 **** struct loop *must_exec; tree base; ! base = get_base_address (ref->mem); if (INDIRECT_REF_P (base) || TREE_CODE (base) == MEM_REF) base = TREE_OPERAND (base, 0); --- 2132,2138 ---- struct loop *must_exec; tree base; ! base = ao_ref_base (&ref->mem); if (INDIRECT_REF_P (base) || TREE_CODE (base) == MEM_REF) base = TREE_OPERAND (base, 0); *************** refs_independent_p (mem_ref_p ref1, mem_ *** 2187,2194 **** fprintf (dump_file, "Querying dependency of refs %u and %u: ", ref1->id, ref2->id); ! if (mem_refs_may_alias_p (ref1->mem, ref2->mem, ! &memory_accesses.ttae_cache)) { bitmap_set_bit (ref1->dep_ref, ref2->id); if (dump_file && (dump_flags & TDF_DETAILS)) --- 2205,2211 ---- fprintf (dump_file, "Querying dependency of refs %u and %u: ", ref1->id, ref2->id); ! if (mem_refs_may_alias_p (ref1, ref2)) { bitmap_set_bit (ref1->dep_ref, ref2->id); if (dump_file && (dump_flags & TDF_DETAILS)) *************** can_sm_ref_p (struct loop *loop, mem_ref *** 2284,2304 **** return false; /* It should be movable. */ ! if (!is_gimple_reg_type (TREE_TYPE (ref->mem)) ! || TREE_THIS_VOLATILE (ref->mem) ! || !for_each_index (&ref->mem, may_move_till, loop)) return false; /* If it can throw fail, we do not properly update EH info. */ ! if (tree_could_throw_p (ref->mem)) return false; /* If it can trap, it must be always executed in LOOP. Readonly memory locations may trap when storing to them, but tree_could_trap_p is a predicate for rvalues, so check that explicitly. */ ! base = get_base_address (ref->mem); ! if ((tree_could_trap_p (ref->mem) || (DECL_P (base) && TREE_READONLY (base))) && !ref_always_accessed_p (loop, ref, true)) return false; --- 2301,2321 ---- return false; /* It should be movable. */ ! if (!is_gimple_reg_type (TREE_TYPE (ref->mem.ref)) ! || TREE_THIS_VOLATILE (ref->mem.ref) ! || !for_each_index (&ref->mem.ref, may_move_till, loop)) return false; /* If it can throw fail, we do not properly update EH info. */ ! if (tree_could_throw_p (ref->mem.ref)) return false; /* If it can trap, it must be always executed in LOOP. Readonly memory locations may trap when storing to them, but tree_could_trap_p is a predicate for rvalues, so check that explicitly. */ ! base = ao_ref_base (&ref->mem); ! if ((tree_could_trap_p (ref->mem.ref) || (DECL_P (base) && TREE_READONLY (base))) && !ref_always_accessed_p (loop, ref, true)) return false;