From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 119906 invoked by alias); 13 Oct 2015 08:38:24 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Received: (qmail 118677 invoked by uid 89); 13 Oct 2015 08:38:23 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-2.6 required=5.0 tests=AWL,BAYES_00,KAM_ASCII_DIVIDERS,SPF_PASS,T_RP_MATCHES_RCVD autolearn=no version=3.3.2 X-HELO: mx2.suse.de Received: from mx2.suse.de (HELO mx2.suse.de) (195.135.220.15) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with (CAMELLIA256-SHA encrypted) ESMTPS; Tue, 13 Oct 2015 08:38:21 +0000 Received: from relay1.suse.de (charybdis-ext.suse.de [195.135.220.254]) by mx2.suse.de (Postfix) with ESMTP id 95750ABF7 for ; Tue, 13 Oct 2015 08:38:17 +0000 (UTC) Date: Tue, 13 Oct 2015 08:38:00 -0000 From: Richard Biener To: gcc-patches@gcc.gnu.org Subject: [PATCH] More vectorizer TLC Message-ID: User-Agent: Alpine 2.11 (LSU 23 2013-08-11) MIME-Version: 1.0 Content-Type: TEXT/PLAIN; charset=US-ASCII X-SW-Source: 2015-10/txt/msg01197.txt.bz2 Bootstrapped and tested on x86_64-unknown-linux-gnu, applied. Richard. 2015-10-13 Richard Biener * tree-vect-data-refs.c (vect_analyze_data_ref_dependences): Allocate the data dependence vector. (vect_peeling_hash_insert): Get the peeling hash table as argument. (vect_peeling_hash_get_lowest_cost): Likewise. (vect_enhance_data_refs_alignment): Adjust. (struct _vect_peel_info, struct _vect_peel_extended_info, struct peel_info_hasher): Move from ... * tree-vectorizer.h: ... here. (LOOP_VINFO_COST_MODEL_MIN_ITERS): Remove. (LOOP_VINFO_PEELING_HTAB): Likewise. (struct _loop_vec_info): Remove min_profitable_iters and peeling_htab members. * tree-vect-loop.c (new_loop_vec_info): Do not allocate vectors here. (destroy_loop_vec_info): Adjust. (vect_analyze_loop_2): Do not set LOOP_VINFO_COST_MODEL_MIN_ITERS. (vect_estimate_min_profitable_iters): Use LOOP_VINFO_COMP_ALIAS_DDRS to estimate alias versioning cost. * tree-vect-slp.c (vect_analyze_slp_cost): Dump header. Index: gcc/tree-vect-data-refs.c =================================================================== *** gcc/tree-vect-data-refs.c (revision 228709) --- gcc/tree-vect-data-refs.c (working copy) *************** vect_analyze_data_ref_dependences (loop_ *** 468,473 **** --- 468,476 ---- dump_printf_loc (MSG_NOTE, vect_location, "=== vect_analyze_data_ref_dependences ===\n"); + LOOP_VINFO_DDRS (loop_vinfo) + .create (LOOP_VINFO_DATAREFS (loop_vinfo).length () + * LOOP_VINFO_DATAREFS (loop_vinfo).length ()); LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = true; if (!compute_all_dependences (LOOP_VINFO_DATAREFS (loop_vinfo), &LOOP_VINFO_DDRS (loop_vinfo), *************** vect_get_data_access_cost (struct data_r *** 1039,1048 **** } /* Insert DR into peeling hash table with NPEEL as key. */ static void ! vect_peeling_hash_insert (loop_vec_info loop_vinfo, struct data_reference *dr, int npeel) { struct _vect_peel_info elem, *slot; --- 1042,1089 ---- } + typedef struct _vect_peel_info + { + int npeel; + struct data_reference *dr; + unsigned int count; + } *vect_peel_info; + + typedef struct _vect_peel_extended_info + { + struct _vect_peel_info peel_info; + unsigned int inside_cost; + unsigned int outside_cost; + stmt_vector_for_cost body_cost_vec; + } *vect_peel_extended_info; + + + /* Peeling hashtable helpers. */ + + struct peel_info_hasher : free_ptr_hash <_vect_peel_info> + { + static inline hashval_t hash (const _vect_peel_info *); + static inline bool equal (const _vect_peel_info *, const _vect_peel_info *); + }; + + inline hashval_t + peel_info_hasher::hash (const _vect_peel_info *peel_info) + { + return (hashval_t) peel_info->npeel; + } + + inline bool + peel_info_hasher::equal (const _vect_peel_info *a, const _vect_peel_info *b) + { + return (a->npeel == b->npeel); + } + + /* Insert DR into peeling hash table with NPEEL as key. */ static void ! vect_peeling_hash_insert (hash_table *peeling_htab, ! loop_vec_info loop_vinfo, struct data_reference *dr, int npeel) { struct _vect_peel_info elem, *slot; *************** vect_peeling_hash_insert (loop_vec_info *** 1050,1056 **** bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true); elem.npeel = npeel; ! slot = LOOP_VINFO_PEELING_HTAB (loop_vinfo)->find (&elem); if (slot) slot->count++; else --- 1091,1097 ---- bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true); elem.npeel = npeel; ! slot = peeling_htab->find (&elem); if (slot) slot->count++; else *************** vect_peeling_hash_insert (loop_vec_info *** 1059,1066 **** slot->npeel = npeel; slot->dr = dr; slot->count = 1; ! new_slot ! = LOOP_VINFO_PEELING_HTAB (loop_vinfo)->find_slot (slot, INSERT); *new_slot = slot; } --- 1100,1106 ---- slot->npeel = npeel; slot->dr = dr; slot->count = 1; ! new_slot = peeling_htab->find_slot (slot, INSERT); *new_slot = slot; } *************** vect_peeling_hash_get_lowest_cost (_vect *** 1164,1170 **** option that aligns as many accesses as possible. */ static struct data_reference * ! vect_peeling_hash_choose_best_peeling (loop_vec_info loop_vinfo, unsigned int *npeel, stmt_vector_for_cost *body_cost_vec) { --- 1204,1211 ---- option that aligns as many accesses as possible. */ static struct data_reference * ! vect_peeling_hash_choose_best_peeling (hash_table *peeling_htab, ! loop_vec_info loop_vinfo, unsigned int *npeel, stmt_vector_for_cost *body_cost_vec) { *************** vect_peeling_hash_choose_best_peeling (l *** 1177,1192 **** { res.inside_cost = INT_MAX; res.outside_cost = INT_MAX; ! LOOP_VINFO_PEELING_HTAB (loop_vinfo) ! ->traverse <_vect_peel_extended_info *, ! vect_peeling_hash_get_lowest_cost> (&res); } else { res.peel_info.count = 0; ! LOOP_VINFO_PEELING_HTAB (loop_vinfo) ! ->traverse <_vect_peel_extended_info *, ! vect_peeling_hash_get_most_frequent> (&res); } *npeel = res.peel_info.npeel; --- 1218,1231 ---- { res.inside_cost = INT_MAX; res.outside_cost = INT_MAX; ! peeling_htab->traverse <_vect_peel_extended_info *, ! vect_peeling_hash_get_lowest_cost> (&res); } else { res.peel_info.count = 0; ! peeling_htab->traverse <_vect_peel_extended_info *, ! vect_peeling_hash_get_most_frequent> (&res); } *npeel = res.peel_info.npeel; *************** vect_enhance_data_refs_alignment (loop_v *** 1307,1312 **** --- 1346,1352 ---- tree vectype; unsigned int nelements, mis, same_align_drs_max = 0; stmt_vector_for_cost body_cost_vec = stmt_vector_for_cost (); + hash_table peeling_htab (1); if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, *************** vect_enhance_data_refs_alignment (loop_v *** 1379,1388 **** size_zero_node) < 0; /* Save info about DR in the hash table. */ - if (!LOOP_VINFO_PEELING_HTAB (loop_vinfo)) - LOOP_VINFO_PEELING_HTAB (loop_vinfo) - = new hash_table (1); - vectype = STMT_VINFO_VECTYPE (stmt_info); nelements = TYPE_VECTOR_SUBPARTS (vectype); mis = DR_MISALIGNMENT (dr) / GET_MODE_SIZE (TYPE_MODE ( --- 1419,1424 ---- *************** vect_enhance_data_refs_alignment (loop_v *** 1424,1430 **** for (j = 0; j < possible_npeel_number; j++) { ! vect_peeling_hash_insert (loop_vinfo, dr, npeel_tmp); npeel_tmp += nelements; } --- 1460,1467 ---- for (j = 0; j < possible_npeel_number; j++) { ! vect_peeling_hash_insert (&peeling_htab, loop_vinfo, ! dr, npeel_tmp); npeel_tmp += nelements; } *************** vect_enhance_data_refs_alignment (loop_v *** 1590,1596 **** gcc_assert (!all_misalignments_unknown); /* Choose the best peeling from the hash table. */ ! dr0 = vect_peeling_hash_choose_best_peeling (loop_vinfo, &npeel, &body_cost_vec); if (!dr0 || !npeel) do_peeling = false; --- 1627,1634 ---- gcc_assert (!all_misalignments_unknown); /* Choose the best peeling from the hash table. */ ! dr0 = vect_peeling_hash_choose_best_peeling (&peeling_htab, ! loop_vinfo, &npeel, &body_cost_vec); if (!dr0 || !npeel) do_peeling = false; Index: gcc/tree-vect-loop.c =================================================================== *** gcc/tree-vect-loop.c (revision 228709) --- gcc/tree-vect-loop.c (working copy) *************** new_loop_vec_info (struct loop *loop) *** 937,959 **** LOOP_VINFO_NITERSM1 (res) = NULL; LOOP_VINFO_NITERS (res) = NULL; LOOP_VINFO_NITERS_UNCHANGED (res) = NULL; - LOOP_VINFO_COST_MODEL_MIN_ITERS (res) = 0; LOOP_VINFO_COST_MODEL_THRESHOLD (res) = 0; LOOP_VINFO_VECTORIZABLE_P (res) = 0; LOOP_VINFO_PEELING_FOR_ALIGNMENT (res) = 0; LOOP_VINFO_VECT_FACTOR (res) = 0; ! LOOP_VINFO_LOOP_NEST (res).create (3); ! LOOP_VINFO_DATAREFS (res).create (10); ! LOOP_VINFO_DDRS (res).create (10 * 10); LOOP_VINFO_UNALIGNED_DR (res) = NULL; ! LOOP_VINFO_MAY_MISALIGN_STMTS (res).create ( ! PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS)); ! LOOP_VINFO_MAY_ALIAS_DDRS (res).create ( ! PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS)); ! LOOP_VINFO_GROUPED_STORES (res).create (10); ! LOOP_VINFO_REDUCTIONS (res).create (10); ! LOOP_VINFO_REDUCTION_CHAINS (res).create (10); ! LOOP_VINFO_SLP_INSTANCES (res).create (10); LOOP_VINFO_SLP_UNROLLING_FACTOR (res) = 1; LOOP_VINFO_TARGET_COST_DATA (res) = init_cost (loop); LOOP_VINFO_PEELING_FOR_GAPS (res) = false; --- 937,956 ---- LOOP_VINFO_NITERSM1 (res) = NULL; LOOP_VINFO_NITERS (res) = NULL; LOOP_VINFO_NITERS_UNCHANGED (res) = NULL; LOOP_VINFO_COST_MODEL_THRESHOLD (res) = 0; LOOP_VINFO_VECTORIZABLE_P (res) = 0; LOOP_VINFO_PEELING_FOR_ALIGNMENT (res) = 0; LOOP_VINFO_VECT_FACTOR (res) = 0; ! LOOP_VINFO_LOOP_NEST (res) = vNULL; ! LOOP_VINFO_DATAREFS (res) = vNULL; ! LOOP_VINFO_DDRS (res) = vNULL; LOOP_VINFO_UNALIGNED_DR (res) = NULL; ! LOOP_VINFO_MAY_MISALIGN_STMTS (res) = vNULL; ! LOOP_VINFO_MAY_ALIAS_DDRS (res) = vNULL; ! LOOP_VINFO_GROUPED_STORES (res) = vNULL; ! LOOP_VINFO_REDUCTIONS (res) = vNULL; ! LOOP_VINFO_REDUCTION_CHAINS (res) = vNULL; ! LOOP_VINFO_SLP_INSTANCES (res) = vNULL; LOOP_VINFO_SLP_UNROLLING_FACTOR (res) = 1; LOOP_VINFO_TARGET_COST_DATA (res) = init_cost (loop); LOOP_VINFO_PEELING_FOR_GAPS (res) = false; *************** destroy_loop_vec_info (loop_vec_info loo *** 1036,1044 **** LOOP_VINFO_REDUCTIONS (loop_vinfo).release (); LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo).release (); - delete LOOP_VINFO_PEELING_HTAB (loop_vinfo); - LOOP_VINFO_PEELING_HTAB (loop_vinfo) = NULL; - destroy_cost_data (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)); loop_vinfo->scalar_cost_vec.release (); --- 1033,1038 ---- *************** vect_analyze_loop_2 (loop_vec_info loop_ *** 1786,1792 **** int min_profitable_estimate, min_profitable_iters; vect_estimate_min_profitable_iters (loop_vinfo, &min_profitable_iters, &min_profitable_estimate); - LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo) = min_profitable_iters; if (min_profitable_iters < 0) { --- 1780,1785 ---- *************** vect_estimate_min_profitable_iters (loop *** 2810,2816 **** if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo)) { /* FIXME: Make cost depend on complexity of individual check. */ ! unsigned len = LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo).length (); (void) add_stmt_cost (target_cost_data, len, vector_stmt, NULL, 0, vect_prologue); dump_printf (MSG_NOTE, --- 2803,2809 ---- if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo)) { /* FIXME: Make cost depend on complexity of individual check. */ ! unsigned len = LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo).length (); (void) add_stmt_cost (target_cost_data, len, vector_stmt, NULL, 0, vect_prologue); dump_printf (MSG_NOTE, Index: gcc/tree-vect-slp.c =================================================================== *** gcc/tree-vect-slp.c (revision 228709) --- gcc/tree-vect-slp.c (working copy) *************** vect_analyze_slp_cost (slp_instance inst *** 1569,1574 **** --- 1569,1578 ---- stmt_info_for_cost *si; unsigned i; + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "=== vect_analyze_slp_cost ===\n"); + /* Calculate the number of vector stmts to create based on the unrolling factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is GROUP_SIZE / NUNITS otherwise. */ Index: gcc/tree-vectorizer.h =================================================================== *** gcc/tree-vectorizer.h (revision 228709) --- gcc/tree-vectorizer.h (working copy) *************** struct dr_with_seg_len_pair_t *** 194,234 **** }; - typedef struct _vect_peel_info - { - int npeel; - struct data_reference *dr; - unsigned int count; - } *vect_peel_info; - - typedef struct _vect_peel_extended_info - { - struct _vect_peel_info peel_info; - unsigned int inside_cost; - unsigned int outside_cost; - stmt_vector_for_cost body_cost_vec; - } *vect_peel_extended_info; - - - /* Peeling hashtable helpers. */ - - struct peel_info_hasher : free_ptr_hash <_vect_peel_info> - { - static inline hashval_t hash (const _vect_peel_info *); - static inline bool equal (const _vect_peel_info *, const _vect_peel_info *); - }; - - inline hashval_t - peel_info_hasher::hash (const _vect_peel_info *peel_info) - { - return (hashval_t) peel_info->npeel; - } - - inline bool - peel_info_hasher::equal (const _vect_peel_info *a, const _vect_peel_info *b) - { - return (a->npeel == b->npeel); - } /* Vectorizer state common between loop and basic-block vectorization. */ struct vec_info { --- 194,199 ---- *************** typedef struct _loop_vec_info : public v *** 289,301 **** /* Number of iterations of the original loop. */ tree num_iters_unchanged; - /* Minimum number of iterations below which vectorization is expected to - not be profitable (as estimated by the cost model). - -1 indicates that vectorization will not be profitable. - FORNOW: This field is an int. Will be a tree in the future, to represent - values unknown at compile time. */ - int min_profitable_iters; - /* Threshold of number of iterations below which vectorzation will not be performed. It is calculated from MIN_PROFITABLE_ITERS and PARAM_MIN_VECT_LOOP_BOUND. */ --- 254,259 ---- *************** typedef struct _loop_vec_info : public v *** 349,357 **** stmt in the chain. */ vec reduction_chains; - /* Hash table used to choose the best peeling option. */ - hash_table *peeling_htab; - /* Cost vector for a single scalar iteration. */ vec scalar_cost_vec; --- 307,312 ---- *************** typedef struct _loop_vec_info : public v *** 407,413 **** prologue peeling retain total unchanged scalar loop iterations for cost model. */ #define LOOP_VINFO_NITERS_UNCHANGED(L) (L)->num_iters_unchanged - #define LOOP_VINFO_COST_MODEL_MIN_ITERS(L) (L)->min_profitable_iters #define LOOP_VINFO_COST_MODEL_THRESHOLD(L) (L)->th #define LOOP_VINFO_VECTORIZABLE_P(L) (L)->vectorizable #define LOOP_VINFO_VECT_FACTOR(L) (L)->vectorization_factor --- 362,367 ---- *************** typedef struct _loop_vec_info : public v *** 426,432 **** #define LOOP_VINFO_SLP_UNROLLING_FACTOR(L) (L)->slp_unrolling_factor #define LOOP_VINFO_REDUCTIONS(L) (L)->reductions #define LOOP_VINFO_REDUCTION_CHAINS(L) (L)->reduction_chains - #define LOOP_VINFO_PEELING_HTAB(L) (L)->peeling_htab #define LOOP_VINFO_TARGET_COST_DATA(L) (L)->target_cost_data #define LOOP_VINFO_PEELING_FOR_GAPS(L) (L)->peeling_for_gaps #define LOOP_VINFO_OPERANDS_SWAPPED(L) (L)->operands_swapped --- 380,385 ----