From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 64870 invoked by alias); 16 Apr 2019 12:05:13 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Received: (qmail 64861 invoked by uid 89); 16 Apr 2019 12:05:13 -0000 Authentication-Results: sourceware.org; auth=none X-Spam-SWARE-Status: No, score=-13.7 required=5.0 tests=AWL,BAYES_00,GIT_PATCH_0,GIT_PATCH_2,GIT_PATCH_3,KAM_NUMSUBJECT,RCVD_IN_DNSWL_NONE,SPF_PASS autolearn=ham version=3.3.1 spammy=indicating, Updates, 10217, cuid X-HELO: mail.ispras.ru Received: from mail.ispras.ru (HELO mail.ispras.ru) (83.149.199.45) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Tue, 16 Apr 2019 12:05:11 +0000 Received: from mail.ispras.ru (localhost [127.0.0.1]) by mail.ispras.ru (Postfix) with ESMTPSA id 8E0CE540094 for ; Tue, 16 Apr 2019 15:05:08 +0300 (MSK) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8; format=flowed Content-Transfer-Encoding: 8bit Date: Tue, 16 Apr 2019 12:08:00 -0000 From: Roman Zhuykov To: gcc-patches@gcc.gnu.org Subject: [3/4][PATCH] Fix PR90001 In-Reply-To: References: Message-ID: <06c5e43724c4964b455a41c32626115c@ispras.ru> X-Sender: zhroma@ispras.ru User-Agent: Roundcube Webmail/1.1.2 X-SW-Source: 2019-04/txt/msg00632.txt.bz2 Current algorithm which finds recurrence_length for all DDG strongly connected components works in like O(N^6) time, where N in the number of nodes in DDG. The time is so bad mostly for graphs with lots of edges, like almost N^2 edges. My proposed algorithm works in O(N^3). Algorithm of finding SCCs itself is also not optimal (maybe up to O(N^4)), but here it left untouched. For some situations, when amount of edges is smaller (like equal to N), new algorithm can be unfortunately slower than old one. But I think it's better here to add some bail-out when we got more than 1000 nodes for example. Before creating this patch, I tested special version of it, where both approaches were in action and asserts were inserted to check that algorithms results (longest_simple_path values) are absolutely the same. I can publish this special version if needed. I’ve described patch testing in cover letter. Ok for trunk? gcc/ChangeLog: 2019-04-08 Roman Zhuykov PR rtl-optimization/90001 * ddg.c (create_ddg): Init max_dist array for each node. (free_ddg): Free max_dist array. (create_ddg_edge): Use bool field instead of aux union. (set_recurrence_length): Use prepared max_dist information instead of calling longest_simple_path. (create_scc): Remove graph argument, fill node's aux.count with SCC id, and move set_recurrence_length call to... (create_ddg_all_sccs): ...here, after filling all max_dist arrays using Floyd–Warshall-like algorithm. (update_dist_to_successors): Remove the whole function. (longest_simple_path): Likewise. * ddg.h (struct ddg_node): Add max_dist pointer. (struct ddg_edge): Use bool field instead of unused aux union. diff --git a/gcc/ddg.c b/gcc/ddg.c --- a/gcc/ddg.c +++ b/gcc/ddg.c @@ -32,9 +32,6 @@ along with GCC; see the file COPYING3. If not see #ifdef INSN_SCHEDULING -/* A flag indicating that a ddg edge belongs to an SCC or not. */ -enum edge_flag {NOT_IN_SCC = 0, IN_SCC}; - /* Forward declarations. */ static void add_backarc_to_ddg (ddg_ptr, ddg_edge_ptr); static void add_backarc_to_scc (ddg_scc_ptr, ddg_edge_ptr); @@ -564,7 +561,7 @@ create_ddg (basic_block bb, int closing_branch_deps) { ddg_ptr g; rtx_insn *insn, *first_note; - int i; + int i, j; int num_nodes = 0; g = (ddg_ptr) xcalloc (1, sizeof (struct ddg)); @@ -632,6 +629,12 @@ create_ddg (basic_block bb, int closing_branch_deps) g->nodes[i].predecessors = sbitmap_alloc (num_nodes); bitmap_clear (g->nodes[i].predecessors); g->nodes[i].first_note = (first_note ? first_note : insn); + + g->nodes[i].aux.count = -1; + g->nodes[i].max_dist = XCNEWVEC (int, num_nodes); + for (j = 0; j < num_nodes; j++) + g->nodes[i].max_dist[j] = -1; + g->nodes[i++].insn = insn; first_note = NULL; } @@ -668,6 +671,7 @@ free_ddg (ddg_ptr g) } sbitmap_free (g->nodes[i].successors); sbitmap_free (g->nodes[i].predecessors); + free (g->nodes[i].max_dist); } if (g->num_backarcs > 0) free (g->backarcs); @@ -792,7 +796,7 @@ create_ddg_edge (ddg_node_ptr src, ddg_node_ptr dest, e->latency = l; e->distance = d; e->next_in = e->next_out = NULL; - e->aux.info = 0; + e->in_scc = false; return e; } @@ -820,7 +824,7 @@ add_edge_to_ddg (ddg_ptr g ATTRIBUTE_UNUSED, ddg_edge_ptr e) for now that cycles in the data dependence graph contain a single backarc. This simplifies the algorithm, and can be generalized later. */ static void -set_recurrence_length (ddg_scc_ptr scc, ddg_ptr g) +set_recurrence_length (ddg_scc_ptr scc) { int j; int result = -1; @@ -828,17 +832,14 @@ set_recurrence_length (ddg_scc_ptr scc, ddg_ptr g) for (j = 0; j < scc->num_backarcs; j++) { ddg_edge_ptr backarc = scc->backarcs[j]; - int length; int distance = backarc->distance; ddg_node_ptr src = backarc->dest; ddg_node_ptr dest = backarc->src; + int length = src->max_dist[dest->cuid]; + + if (length < 0) + continue; - length = longest_simple_path (g, src->cuid, dest->cuid, scc->nodes); - if (length < 0 ) - { - /* fprintf (stderr, "Backarc not on simple cycle in SCC.\n"); */ - continue; - } length += backarc->latency; result = MAX (result, (length / distance)); } @@ -846,9 +847,9 @@ set_recurrence_length (ddg_scc_ptr scc, ddg_ptr g) } /* Create a new SCC given the set of its nodes. Compute its recurrence_length - and mark edges that belong to this scc as IN_SCC. */ + and mark edges that belong to this scc. */ static ddg_scc_ptr -create_scc (ddg_ptr g, sbitmap nodes) +create_scc (ddg_ptr g, sbitmap nodes, int id) { ddg_scc_ptr scc; unsigned int u = 0; @@ -866,16 +867,18 @@ create_scc (ddg_ptr g, sbitmap nodes) ddg_edge_ptr e; ddg_node_ptr n = &g->nodes[u]; + gcc_assert (n->aux.count == -1); + n->aux.count = id; + for (e = n->out; e; e = e->next_out) if (bitmap_bit_p (nodes, e->dest->cuid)) { - e->aux.count = IN_SCC; + e->in_scc = true; if (e->distance > 0) add_backarc_to_scc (scc, e); } } - set_recurrence_length (scc, g); return scc; } @@ -1018,7 +1021,7 @@ check_sccs (ddg_all_sccs_ptr sccs, int num_nodes) ddg_all_sccs_ptr create_ddg_all_sccs (ddg_ptr g) { - int i; + int i, j, k, scc, way; int num_nodes = g->num_nodes; auto_sbitmap from (num_nodes); auto_sbitmap to (num_nodes); @@ -1038,7 +1041,7 @@ create_ddg_all_sccs (ddg_ptr g) ddg_node_ptr dest = backarc->dest; /* If the backarc already belongs to an SCC, continue. */ - if (backarc->aux.count == IN_SCC) + if (backarc->in_scc) continue; bitmap_clear (scc_nodes); @@ -1049,10 +1052,52 @@ create_ddg_all_sccs (ddg_ptr g) if (find_nodes_on_paths (scc_nodes, g, from, to)) { - scc = create_scc (g, scc_nodes); + scc = create_scc (g, scc_nodes, sccs->num_sccs); add_scc_to_ddg (sccs, scc); } } + + /* Init max_dist arrays for Floyd–Warshall-like + longest patch calculation algorithm. */ + for (k = 0; k < num_nodes; k++) + { + ddg_edge_ptr e; + ddg_node_ptr n = &g->nodes[k]; + + if (n->aux.count == -1) + continue; + + n->max_dist[k] = 0; + for (e = n->out; e; e = e->next_out) + if (e->distance == 0 && g->nodes[e->dest->cuid].aux.count == n->aux.count) + n->max_dist[e->dest->cuid] = e->latency; + } + + /* Run main Floid-Warshall loop. We use only non-backarc edges + inside each scc. */ + for (k = 0; k < num_nodes; k++) + { + scc = g->nodes[k].aux.count; + if (scc != -1) + { + for (i = 0; i < num_nodes; i++) + if (g->nodes[i].aux.count == scc) + for (j = 0; j < num_nodes; j++) + if (g->nodes[j].aux.count == scc + && g->nodes[i].max_dist[k] >= 0 + && g->nodes[k].max_dist[j] >= 0) + { + way = g->nodes[i].max_dist[k] + g->nodes[k].max_dist[j]; + if (g->nodes[i].max_dist[j] < way) + g->nodes[i].max_dist[j] = way; + } + } + } + + /* Calculate recurrence_length using max_dist info. */ + for (i = 0; i < sccs->num_sccs; i++) + set_recurrence_length (sccs->sccs[i]); + order_sccs (sccs); if (flag_checking) @@ -1155,72 +1200,4 @@ find_nodes_on_paths (sbitmap result, ddg_ptr g, sbitmap from, sbitmap to) return bitmap_and (result, reachable_from, reach_to); } - -/* Updates the counts of U_NODE's successors (that belong to NODES) to be - at-least as large as the count of U_NODE plus the latency between them. - Sets a bit in TMP for each successor whose count was changed (increased). - Returns nonzero if any count was changed. */ -static int -update_dist_to_successors (ddg_node_ptr u_node, sbitmap nodes, sbitmap tmp) -{ - ddg_edge_ptr e; - int result = 0; - - for (e = u_node->out; e; e = e->next_out) - { - ddg_node_ptr v_node = e->dest; - int v = v_node->cuid; - - if (bitmap_bit_p (nodes, v) - && (e->distance == 0) - && (v_node->aux.count < u_node->aux.count + e->latency)) - { - v_node->aux.count = u_node->aux.count + e->latency; - bitmap_set_bit (tmp, v); - result = 1; - } - } - return result; -} - - -/* Find the length of a longest path from SRC to DEST in G, - going only through NODES, and disregarding backarcs. */ -int -longest_simple_path (struct ddg * g, int src, int dest, sbitmap nodes) -{ - int i; - unsigned int u = 0; - int change = 1; - int num_nodes = g->num_nodes; - auto_sbitmap workset (num_nodes); - auto_sbitmap tmp (num_nodes); - for (i = 0; i < g->num_nodes; i++) - g->nodes[i].aux.count = -1; - g->nodes[src].aux.count = 0; - - bitmap_clear (tmp); - bitmap_set_bit (tmp, src); - - while (change) - { - sbitmap_iterator sbi; - - change = 0; - bitmap_copy (workset, tmp); - bitmap_clear (tmp); - EXECUTE_IF_SET_IN_BITMAP (workset, 0, u, sbi) - { - ddg_node_ptr u_node = &g->nodes[u]; - - change |= update_dist_to_successors (u_node, nodes, tmp); - } - } - return g->nodes[dest].aux.count; -} - #endif /* INSN_SCHEDULING */ diff --git a/gcc/ddg.h b/gcc/ddg.h --- a/gcc/ddg.h +++ b/gcc/ddg.h @@ -64,6 +64,10 @@ struct ddg_node sbitmap successors; sbitmap predecessors; + /* Temporary array used for Floyd-Warshall algorithm to find + scc recurrence length. */ + int *max_dist; + /* For general use by algorithms manipulating the ddg. */ union { int count; @@ -95,11 +99,8 @@ struct ddg_edge ddg_edge_ptr next_in; ddg_edge_ptr next_out; - /* For general use by algorithms manipulating the ddg. */ - union { - int count; - void *info; - } aux; + /* Is true when edge is already in scc. */ + bool in_scc; }; /* This structure holds the Data Dependence Graph for a basic block. */ @@ -178,7 +179,6 @@ ddg_all_sccs_ptr create_ddg_all_sccs (ddg_ptr); void free_ddg_all_sccs (ddg_all_sccs_ptr); int find_nodes_on_paths (sbitmap result, ddg_ptr, sbitmap from, sbitmap to); -int longest_simple_path (ddg_ptr, int from, int to, sbitmap via); bool autoinc_var_is_used_p (rtx_insn *, rtx_insn *);