From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 24323 invoked by alias); 9 Nov 2015 12:55:59 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Received: (qmail 24313 invoked by uid 89); 9 Nov 2015 12:55:59 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-2.8 required=5.0 tests=AWL,BAYES_00,KAM_ASCII_DIVIDERS,RP_MATCHES_RCVD,SPF_PASS autolearn=no version=3.3.2 X-HELO: mx2.suse.de Received: from mx2.suse.de (HELO mx2.suse.de) (195.135.220.15) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with (CAMELLIA256-SHA encrypted) ESMTPS; Mon, 09 Nov 2015 12:55:57 +0000 Received: from relay1.suse.de (charybdis-ext.suse.de [195.135.220.254]) by mx2.suse.de (Postfix) with ESMTP id 8ABEAABFA for ; Mon, 9 Nov 2015 12:55:34 +0000 (UTC) Date: Mon, 09 Nov 2015 12:55:00 -0000 From: Richard Biener To: gcc-patches@gcc.gnu.org Subject: [PATCH] Improve BB vectorization dependence analysis Message-ID: User-Agent: Alpine 2.11 (LSU 23 2013-08-11) MIME-Version: 1.0 Content-Type: TEXT/PLAIN; charset=US-ASCII X-SW-Source: 2015-11/txt/msg00950.txt.bz2 Currently BB vectorization computes all dependences inside a BB region and fails all vectorization if it cannot handle some of them. This is obviously not needed - BB vectorization can restrict the dependence tests to those that are needed to apply the load/store motion effectively performed by the vectorization (sinking all participating loads/stores to the place of the last one). With restructuring it that way it's also easy to not give up completely but only for the SLP instance we cannot vectorize (this gives a slight bump in my SPEC CPU 2006 testing to 756 vectorized basic block regions). But first and foremost this patch is to reduce the dependence analysis cost and somewhat mitigate the compile-time effects of the first patch. For fixing PR56118 only a cost model issue remains. Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk. Richard. 2015-11-09 Richard Biener PR tree-optimization/56118 * tree-vectorizer.h (vect_find_last_scalar_stmt_in_slp): Declare. * tree-vect-slp.c (vect_find_last_scalar_stmt_in_slp): Export. * tree-vect-data-refs.c (vect_slp_analyze_node_dependences): New function. (vect_slp_analyze_data_ref_dependences): Instead of computing all dependences of the region DRs just analyze the code motions SLP vectorization will perform. Remove SLP instances that cannot have their store/load motions applied. (vect_analyze_data_refs): Allow DRs without a vectype in BB vectorization. * gcc.dg/vect/no-tree-sra-bb-slp-pr50730.c: Adjust. Index: gcc/tree-vectorizer.h =================================================================== *** gcc/tree-vectorizer.h.orig 2015-11-09 11:01:55.688175321 +0100 --- gcc/tree-vectorizer.h 2015-11-09 11:02:18.987432840 +0100 *************** extern void vect_detect_hybrid_slp (loop *** 1075,1080 **** --- 1075,1081 ---- extern void vect_get_slp_defs (vec , slp_tree, vec > *, int); extern bool vect_slp_bb (basic_block); + extern gimple *vect_find_last_scalar_stmt_in_slp (slp_tree); /* In tree-vect-patterns.c. */ /* Pattern recognition functions. Index: gcc/tree-vect-data-refs.c =================================================================== *** gcc/tree-vect-data-refs.c.orig 2015-11-09 10:22:33.140125722 +0100 --- gcc/tree-vect-data-refs.c 2015-11-09 11:33:05.503874719 +0100 *************** vect_slp_analyze_data_ref_dependence (st *** 581,586 **** --- 581,629 ---- } + /* Analyze dependences involved in the transform of SLP NODE. */ + + static bool + vect_slp_analyze_node_dependences (slp_instance instance, slp_tree node) + { + /* This walks over all stmts involved in the SLP load/store done + in NODE verifying we can sink them up to the last stmt in the + group. */ + gimple *last_access = vect_find_last_scalar_stmt_in_slp (node); + for (unsigned k = 0; k < SLP_INSTANCE_GROUP_SIZE (instance); ++k) + { + gimple *access = SLP_TREE_SCALAR_STMTS (node)[k]; + if (access == last_access) + continue; + stmt_vec_info access_stmt_info = vinfo_for_stmt (access); + gimple_stmt_iterator gsi = gsi_for_stmt (access); + gsi_next (&gsi); + for (; gsi_stmt (gsi) != last_access; gsi_next (&gsi)) + { + gimple *stmt = gsi_stmt (gsi); + stmt_vec_info stmt_info = vinfo_for_stmt (stmt); + if (!STMT_VINFO_DATA_REF (stmt_info) + || (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)) + && DR_IS_READ (STMT_VINFO_DATA_REF (access_stmt_info)))) + continue; + + ddr_p ddr = initialize_data_dependence_relation + (STMT_VINFO_DATA_REF (access_stmt_info), + STMT_VINFO_DATA_REF (stmt_info), vNULL); + if (vect_slp_analyze_data_ref_dependence (ddr)) + { + /* ??? If the dependence analysis failed we can resort to the + alias oracle which can handle more kinds of stmts. */ + free_dependence_relation (ddr); + return false; + } + free_dependence_relation (ddr); + } + } + return true; + } + + /* Function vect_analyze_data_ref_dependences. Examine all the data references in the basic-block, and make sure there *************** vect_slp_analyze_data_ref_dependence (st *** 590,610 **** bool vect_slp_analyze_data_ref_dependences (bb_vec_info bb_vinfo) { - struct data_dependence_relation *ddr; - unsigned int i; - if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, "=== vect_slp_analyze_data_ref_dependences ===\n"); ! if (!compute_all_dependences (BB_VINFO_DATAREFS (bb_vinfo), ! &BB_VINFO_DDRS (bb_vinfo), ! vNULL, true)) ! return false; ! FOR_EACH_VEC_ELT (BB_VINFO_DDRS (bb_vinfo), i, ddr) ! if (vect_slp_analyze_data_ref_dependence (ddr)) ! return false; return true; } --- 633,677 ---- bool vect_slp_analyze_data_ref_dependences (bb_vec_info bb_vinfo) { if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, "=== vect_slp_analyze_data_ref_dependences ===\n"); ! slp_instance instance; ! slp_tree load; ! unsigned int i, j; ! for (i = 0; BB_VINFO_SLP_INSTANCES (bb_vinfo).iterate (i, &instance); ) ! { ! bool remove = false; ! /* Verify we can sink loads to the vectorized stmt insert location. */ ! FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), j, load) ! if (! vect_slp_analyze_node_dependences (instance, load)) ! { ! remove = true; ! break; ! } ! /* Verify we can sink stores to the vectorized stmt insert location. */ ! slp_tree store = SLP_INSTANCE_TREE (instance); ! if (!remove ! && STMT_VINFO_DATA_REF ! (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (store)[0])) ! && ! vect_slp_analyze_node_dependences (instance, store)) ! remove = true; ! if (remove) ! { ! dump_printf_loc (MSG_NOTE, vect_location, ! "removing SLP instance operations starting from: "); ! dump_gimple_stmt (MSG_NOTE, TDF_SLIM, ! SLP_TREE_SCALAR_STMTS ! (SLP_INSTANCE_TREE (instance))[0], 0); ! vect_free_slp_instance (instance); ! BB_VINFO_SLP_INSTANCES (bb_vinfo).ordered_remove (i); ! } ! i++; ! } ! if (!BB_VINFO_SLP_INSTANCES (bb_vinfo).length ()) ! return false; return true; } *************** again: *** 3715,3721 **** } if (is_a (vinfo)) ! break; if (gatherscatter != SG_NONE || simd_lane_access) { --- 3782,3793 ---- } if (is_a (vinfo)) ! { ! /* No vector type is fine, the ref can still participate ! in dependence analysis, we just can't vectorize it. */ ! STMT_VINFO_VECTORIZABLE (stmt_info) = false; ! continue; ! } if (gatherscatter != SG_NONE || simd_lane_access) { Index: gcc/tree-vect-slp.c =================================================================== *** gcc/tree-vect-slp.c.orig 2015-11-09 11:01:55.720175675 +0100 --- gcc/tree-vect-slp.c 2015-11-09 11:02:19.004433028 +0100 *************** vect_supported_load_permutation_p (slp_i *** 1426,1432 **** /* Find the last store in SLP INSTANCE. */ ! static gimple * vect_find_last_scalar_stmt_in_slp (slp_tree node) { gimple *last = NULL, *stmt; --- 1426,1432 ---- /* Find the last store in SLP INSTANCE. */ ! gimple * vect_find_last_scalar_stmt_in_slp (slp_tree node) { gimple *last = NULL, *stmt; Index: gcc/testsuite/gcc.dg/vect/no-tree-sra-bb-slp-pr50730.c =================================================================== *** gcc/testsuite/gcc.dg/vect/no-tree-sra-bb-slp-pr50730.c.orig 2015-11-06 12:11:17.347076131 +0100 --- gcc/testsuite/gcc.dg/vect/no-tree-sra-bb-slp-pr50730.c 2015-11-09 11:02:19.061433658 +0100 *************** A sum(A a,A b) *** 13,16 **** return a; } ! /* { dg-final { scan-tree-dump-times "not vectorized: more than one data ref in stmt" 0 "slp2" } } */ --- 13,16 ---- return a; } ! /* { dg-final { scan-tree-dump-times "basic block vectorized" 1 "slp2" } } */