public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] outerloop parallelization support
@ 2009-10-22 13:00 Razya Ladelsky
  2009-10-22 16:35 ` Sebastian Pop
  2009-10-23  6:25 ` Eric Botcazou
  0 siblings, 2 replies; 4+ messages in thread
From: Razya Ladelsky @ 2009-10-22 13:00 UTC (permalink / raw)
  To: gcc-patches, Richard Guenther; +Cc: Zdenek Dvorak, Alon Dayan, gcc-graphite

[-- Attachment #1: Type: text/plain, Size: 400 bytes --]

Hi,
Following this thread 
http://gcc.gnu.org/ml/gcc-patches/2009-10/msg00686.html
I made some changes to the patch as requested, and tested bootstrap, 
testsuite.
and spec with autopar enabled (on powerpc linux).
The patch does not introduce any new regressions.
 If there are no other objections, I'll commit it later today.
I also added more testcases that include reductions.



Thanks,
Razya




[-- Attachment #2: outer-4.c --]
[-- Type: application/octet-stream, Size: 888 bytes --]

/* { dg-do compile } */
/* { dg-options "-O2 -ftree-parallelize-loops=4 -fdump-tree-parloops-details -fdump-tree-optimized" } */

void abort (void);

int g_sum=0;
int x[500][500];

__attribute__((noinline))
void parloop (int N)
{
  int i, j;
  int sum;

  /* Double reduction is currently not supported, outer loop is not 
     parallelized.  Inner reduction is detected, inner loop is 
     parallelized.  */
  sum = 0;
  for (i = 0; i < N; i++)
    for (j = 0; j < N; j++)
      sum += x[i][j];

  g_sum = sum;
}

int main(void)
{
  parloop(500);

  return 0;
}


/* Check that outer loop is parallelized.  */
/* { dg-final { scan-tree-dump-times "parallelizing outer loop" 0 "parloops" } } */
/* { dg-final { scan-tree-dump-times "parallelizing inner loop" 1 "parloops" }
 * } */
/* { dg-final { cleanup-tree-dump "parloops" } } */
/* { dg-final { cleanup-tree-dump "optimized" } } */

[-- Attachment #3: outer-5.c --]
[-- Type: application/octet-stream, Size: 1078 bytes --]

/* { dg-do compile } */
/* { dg-options "-O2 -ftree-parallelize-loops=4 -fdump-tree-parloops-details -fdump-tree-optimized" } */

void abort (void);

int x[500][500];
int y[500];
int g_sum=0;

__attribute__((noinline))
void init (int i, int j)
{
  x[i][j]=1;
}

__attribute__((noinline))
void parloop (int N)
{
  int i, j;
  int sum;

  /* Inner cycle is currently not supported, outer loop is not 
     parallelized.  Inner reduction is detected, inner loop is 
     parallelized.  */
  for (i = 0; i < N; i++)
  {
    sum = 0;
    for (j = 0; j < N; j++)
      sum += x[i][j];
    y[i]=sum;
  }
  g_sum = sum;
}

int main(void)
{
  int i,j;
  for (i = 0; i < 500; i++) 
    for (j = 0; j < 500; j++)
      init(i, j);
  
  parloop(500);

  return 0;
}


/* Check that outer loop is parallelized.  */
/* { dg-final { scan-tree-dump-times "parallelizing outer loop" 0 "parloops" } } */
/* { dg-final { scan-tree-dump-times "parallelizing inner loop" 1 "parloops" }
 * } */
/* { dg-final { cleanup-tree-dump "parloops" } } */
/* { dg-final { cleanup-tree-dump "optimized" } } */

[-- Attachment #4: outer-6.c --]
[-- Type: application/octet-stream, Size: 983 bytes --]

/* { dg-do compile } */
/* { dg-options "-O2 -ftree-parallelize-loops=4 -fdump-tree-parloops-details -fdump-tree-optimized" } */

void abort (void);

int x[500][500];
int y[500];
int g_sum=0;

__attribute__((noinline))
void init (int i, int j)
{
  x[i][j]=1;
}

__attribute__((noinline))
void parloop (int N)
{
  int i, j;
  int sum;

  /* Outer loop reduction, outerloop is parallelized.  */ 
  sum=0;
  for (i = 0; i < N; i++)
  {
    for (j = 0; j < N; j++)
      y[i]=x[i][j];
    sum += y[i];
  }
  g_sum = sum;
}

int main(void)
{
  int i,j;
  for (i = 0; i < 500; i++) 
    for (j = 0; j < 500; j++)
      init(i, j);
  
  parloop(500);

  return 0;
}


/* Check that outer loop is parallelized.  */
/* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops" } } */
/* { dg-final { scan-tree-dump-times "parallelizing inner loop" 0 "parloops" }
 * } */
/* { dg-final { cleanup-tree-dump "parloops" } } */
/* { dg-final { cleanup-tree-dump "optimized" } } */

[-- Attachment #5: outer-1.c --]
[-- Type: application/octet-stream, Size: 745 bytes --]

/* { dg-do compile } */
/* { dg-options "-O2 -ftree-parallelize-loops=4 -fdump-tree-parloops-details -fdump-tree-optimized" } */

void abort (void);

void parloop (int N)
{
  int i, j;
  int x[10000][10000];

  for (i = 0; i < N; i++)
    for (j = 0; j < N; j++)
      x[i][j] = i + j + 3;

  for (i = 0; i < N; i++)
    for (j = 0; j < N; j++)
      if (x[i][j] != i + j + 3)
	abort ();
}

int main(void)
{
  parloop(10000);

  return 0;
}


/* Check that outer loop is parallelized.  */
/* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops" } } */
/* { dg-final { scan-tree-dump-times "loopfn" 5 "optimized" } } */
/* { dg-final { cleanup-tree-dump "parloops" } } */
/* { dg-final { cleanup-tree-dump "optimized" } } */

[-- Attachment #6: outer-2.c --]
[-- Type: application/octet-stream, Size: 766 bytes --]

/* { dg-do compile } */
/* { dg-options "-O2 -ftree-parallelize-loops=4 -fdump-tree-parloops-details -fdump-tree-optimized" } */

void abort (void);

void parloop (int N)
{
  int i, j,ii;
  int x[400][10][400];

for (ii = 0; ii < N; ii++)
  for (i = 0; i < N; i++)
    for (j = 0; j < N; j++)
      x[i][j][ii] = ii+i + j + 3;

for (ii = 0; ii < N; ii++)
  for (i = 0; i < N;i++)
    for (j = 0; j < N; j++)
      if (x[i][j][ii] != ii+i + j + 3)
	abort ();
}

int main(void)
{
  parloop(400);

  return 0;
}

/* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops" } } */
/* { dg-final { scan-tree-dump-times "loopfn" 5 "optimized" } } */
/* { dg-final { cleanup-tree-dump "parloops" } } */
/* { dg-final { cleanup-tree-dump "optimized" } } */

[-- Attachment #7: outer-3.c --]
[-- Type: application/octet-stream, Size: 739 bytes --]

/* { dg-do compile } */
/* { dg-options "-O2 -ftree-parallelize-loops=4 -fdump-tree-parloops-details -fdump-tree-optimized" } */

void abort (void);

void parloop (int N)
{
  int i, j;
  int x[500][500];

  for (i = 0; i < N; i++)
    for (j = 0; j < i; j++)
      x[i][j] = i + j + 3;

  for (i = 0; i < N; i++)
    for (j = 0; j < i; j++)
      if (x[i][j] != i + j + 3)
	abort ();
}

int main(void)
{
  parloop(500);

  return 0;
}


/* Check that outer loop is parallelized.  */
/* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops" } } */
/* { dg-final { scan-tree-dump-times "loopfn" 5 "optimized" } } */
/* { dg-final { cleanup-tree-dump "parloops" } } */
/* { dg-final { cleanup-tree-dump "optimized" } } */

[-- Attachment #8: ChangeLog.txt --]
[-- Type: text/plain, Size: 1333 bytes --]

2009-10-22  Razya Ladelsky  <razya@il.ibm.com>

       	* cfgloopmanip.c  (duplicate_subloops): Export.
    	* tree-parloops.c (loop_parallel_p): Dump if loop is innermost.
	(transform_to_exit_first_loop): Duplicate bbs starting from 
	header up to loop->latch instead of exit->src.
	Initialize control variable to the correct number of iterations.
	(gather_scalar_reductions): Do not register double reductions.
	(parallelize_loops): Dump which loop is tested.
	Indicate whether the parallelized loop is inner or not.
	Remove the innermost-loop requirement.
	* cfgloop.h (duplicate_subloops): Export.
	* tree-cfg.c (add_phi_args_after_redirect): New function.
	(gimple_duplicate_sese_tail): Remove the no-subloops constraint.
	Call duplicate_subloops.
	Update number of iterations at the exit condition.
	Don't redirect nexits always to the loop exit.
	Redirect copied edges from latch to the loop exit.
	* testsuite/libgomp.graphite/force-parallel-2.c: Adjust scan.
	* testsuite/gcc.dg/autopar/outer-1.c: New testcase.
	* testsuite/gcc.dg/autopar/outer-2.c: New testcase.
        * testsuite/gcc.dg/autopar/outer-3.c: New testcase
        * testsuite/gcc.dg/autopar/outer-4.c: New testcase
        * testsuite/gcc.dg/autopar/outer-5.c: New testcase
        * testsuite/gcc.dg/autopar/outer-6.c: New testcase

[-- Attachment #9: autopar_outer.txt --]
[-- Type: text/plain, Size: 14186 bytes --]

Index: libgomp/testsuite/libgomp.graphite/force-parallel-2.c
===================================================================
--- libgomp/testsuite/libgomp.graphite/force-parallel-2.c	(revision 153056)
+++ libgomp/testsuite/libgomp.graphite/force-parallel-2.c	(working copy)
@@ -23,7 +23,7 @@ int main(void)
 }
 
 /* Check that parallel code generation part make the right answer.  */
-/* { dg-final { scan-tree-dump-times "2 loops carried no dependency" 1 "graphite" } } */
+/* { dg-final { scan-tree-dump-times "2 loops carried no dependency" 2 "graphite" } } */
 /* { dg-final { cleanup-tree-dump "graphite" } } */
 /* { dg-final { scan-tree-dump-times "loopfn" 5 "optimized" } } */
 /* { dg-final { cleanup-tree-dump "parloops" } } */
Index: gcc/cfgloopmanip.c
===================================================================
--- gcc/cfgloopmanip.c	(revision 153056)
+++ gcc/cfgloopmanip.c	(working copy)
@@ -32,7 +32,6 @@ along with GCC; see the file COPYING3.  
 #include "output.h"
 #include "tree-flow.h"
 
-static void duplicate_subloops (struct loop *, struct loop *);
 static void copy_loops_to (struct loop **, int,
 			   struct loop *);
 static void loop_redirect_edge (edge, basic_block);
@@ -886,7 +885,7 @@ duplicate_loop (struct loop *loop, struc
 
 /* Copies structure of subloops of LOOP into TARGET loop, placing
    newly created loops into loop tree.  */
-static void
+void
 duplicate_subloops (struct loop *loop, struct loop *target)
 {
   struct loop *aloop, *cloop;
Index: gcc/tree-parloops.c
===================================================================
--- gcc/tree-parloops.c	(revision 153056)
+++ gcc/tree-parloops.c	(working copy)
@@ -255,7 +255,13 @@ loop_parallel_p (struct loop *loop)
   bool ret = false;
 
   if (dump_file && (dump_flags & TDF_DETAILS))
-    fprintf (dump_file, "\nConsidering loop %d\n", loop->num);
+  {
+    fprintf (dump_file, "Considering loop %d\n", loop->num);
+    if (!loop->inner)
+      fprintf (dump_file, "loop is innermost\n");
+    else 
+      fprintf (dump_file, "loop NOT innermost\n");
+   }
 
   /* Check for problems with dependences.  If the loop can be reversed,
      the iterations are independent.  */
@@ -1289,8 +1295,9 @@ transform_to_exit_first_loop (struct loo
   bool ok;
   edge exit = single_dom_exit (loop), hpred;
   tree control, control_name, res, t;
-  gimple phi, nphi, cond_stmt, stmt;
+  gimple phi, nphi, cond_stmt, stmt, cond_nit;
   gimple_stmt_iterator gsi;
+  tree nit_1;
 
   split_block_after_labels (loop->header);
   orig_header = single_succ (loop->header);
@@ -1308,7 +1315,6 @@ transform_to_exit_first_loop (struct loo
       res = PHI_RESULT (phi);
       t = make_ssa_name (SSA_NAME_VAR (res), phi);
       SET_PHI_RESULT (phi, t);
-
       nphi = create_phi_node (res, orig_header);
       SSA_NAME_DEF_STMT (res) = nphi;
       add_phi_arg (nphi, t, hpred, UNKNOWN_LOCATION);
@@ -1320,10 +1326,11 @@ transform_to_exit_first_loop (struct loo
 	  control = t;
 	}
     }
-
   bbs = get_loop_body_in_dom_order (loop);
-  for (n = 0; bbs[n] != exit->src; n++)
+
+  for (n = 0; bbs[n] != loop->latch; n++)
     continue;
+  n--;
   nbbs = XNEWVEC (basic_block, n);
   ok = gimple_duplicate_sese_tail (single_succ_edge (loop->header), exit,
 				   bbs + 1, n, nbbs);
@@ -1358,7 +1365,6 @@ transform_to_exit_first_loop (struct loo
 	  struct reduction_info *red;
 
 	  tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
-
 	  red = reduction_phi (reduction_list, SSA_NAME_DEF_STMT (val));
 	  if (red)
 	    {
@@ -1374,12 +1380,15 @@ transform_to_exit_first_loop (struct loo
     }
   gcc_assert (control_name != NULL_TREE);
 
-  /* Initialize the control variable to NIT.  */
+  /* Initialize the control variable to number of iterations 
+     according to the rhs of the exit condition.  */
   gsi = gsi_after_labels (ex_bb);
-  nit = force_gimple_operand_gsi (&gsi,
-				  fold_convert (TREE_TYPE (control_name), nit),
+  cond_nit = last_stmt (exit->src); 
+  nit_1 =  gimple_cond_rhs (cond_nit);
+  nit_1 = force_gimple_operand_gsi (&gsi,
+				  fold_convert (TREE_TYPE (control_name), nit_1),
 				  false, NULL_TREE, false, GSI_SAME_STMT);
-  stmt = gimple_build_assign (control_name, nit);
+  stmt = gimple_build_assign (control_name, nit_1);
   gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
   SSA_NAME_DEF_STMT (control_name) = stmt;
 }
@@ -1740,7 +1749,7 @@ gather_scalar_reductions (loop_p loop, h
 	&& simple_loop_info)
 	{
            gimple reduc_stmt = vect_is_simple_reduction (simple_loop_info, phi, true, &double_reduc);
-	   if (reduc_stmt)
+	   if (reduc_stmt && !double_reduc)
               build_new_reduction (reduction_list, reduc_stmt, phi);
         }
     }
@@ -1890,15 +1899,32 @@ parallelize_loops (void)
   FOR_EACH_LOOP (li, loop, 0)
     {
       htab_empty (reduction_list);
-
-      /* If we use autopar in graphite pass, we use it's marked dependency
+      if (dump_file && (dump_flags & TDF_DETAILS))
+      {
+        fprintf (dump_file, "Trying loop %d as candidate\n",loop->num);
+	if (loop->inner)
+	  fprintf (dump_file, "loop %d is not innermost\n",loop->num);
+	else
+	  fprintf (dump_file, "loop %d is innermost\n",loop->num);
+      }
+      
+      /* If we use autopar in graphite pass, we use its marked dependency
       checking results.  */
       if (flag_loop_parallelize_all && !loop->can_be_parallel)
+      {
+        if (dump_file && (dump_flags & TDF_DETAILS))
+	   fprintf (dump_file, "loop is not parallel according to graphite\n");
 	continue;
+      }
 
-      /* FIXME: Only consider innermost loops with just one exit.  */
-      if (loop->inner || !single_dom_exit (loop))
+      if (!single_dom_exit (loop))
+      {
+       
+        if (dump_file && (dump_flags & TDF_DETAILS))
+	  fprintf (dump_file, "loop is !single_dom_exit\n");
+		
 	continue;
+      }
 
       if (/* And of course, the loop must be parallelizable.  */
 	  !can_duplicate_loop_p (loop)
@@ -1915,7 +1941,7 @@ parallelize_loops (void)
 	      /* Do not bother with loops in cold areas.  */
 	      || optimize_loop_nest_for_size_p (loop)))
 	continue;
-
+ 
       if (!try_get_loop_niter (loop, &niter_desc))
 	continue;
 
@@ -1926,6 +1952,14 @@ parallelize_loops (void)
 	continue;
 
       changed = true;
+      if (dump_file && (dump_flags & TDF_DETAILS))
+      {
+        fprintf (dump_file, "parallelizing ");
+	if (loop->inner)
+	  fprintf (dump_file, "outer loop\n");
+	else
+	  fprintf (dump_file, "inner loop\n");
+      } 
       gen_parallel_loop (loop, reduction_list, 
 			 n_threads, &niter_desc);
       verify_flow_info ();
Index: gcc/cfgloop.h
===================================================================
--- gcc/cfgloop.h	(revision 153056)
+++ gcc/cfgloop.h	(working copy)
@@ -288,6 +288,7 @@ extern edge create_empty_if_region_on_ed
 extern struct loop *create_empty_loop_on_edge (edge, tree, tree, tree, tree,
 					       tree *, tree *, struct loop *);
 extern struct loop * duplicate_loop (struct loop *, struct loop *);
+extern void duplicate_subloops (struct loop *, struct loop *);
 extern bool duplicate_loop_to_header_edge (struct loop *, edge, 
 					   unsigned, sbitmap, edge,
  					   VEC (edge, heap) **, int);
Index: gcc/tree-cfg.c
===================================================================
--- gcc/tree-cfg.c	(revision 153056)
+++ gcc/tree-cfg.c	(working copy)
@@ -4850,6 +4850,31 @@ gimple_duplicate_bb (basic_block bb)
   return new_bb;
 }
 
+/* Add phi arguments to the phi nodes in E_COPY->dest according to 
+   the phi arguments coming from the equivalent edge at
+   the phi nodes of DEST.  */
+
+static void
+add_phi_args_after_redirect (edge e_copy, edge orig_e)
+{
+   gimple_stmt_iterator psi, psi_copy;
+   gimple phi, phi_copy;
+   tree def; 
+   
+    for (psi = gsi_start_phis (orig_e->dest),
+       psi_copy = gsi_start_phis (e_copy->dest);
+       !gsi_end_p (psi);
+       gsi_next (&psi), gsi_next (&psi_copy))
+    {
+
+      phi = gsi_stmt (psi);
+      phi_copy = gsi_stmt (psi_copy);
+      def = PHI_ARG_DEF_FROM_EDGE (phi, orig_e);
+      add_phi_arg (phi_copy, def, e_copy,
+                   gimple_phi_arg_location_from_edge (phi, orig_e));
+    }
+}
+
 /* Adds phi node arguments for edge E_COPY after basic block duplication.  */
 
 static void
@@ -5131,9 +5156,14 @@ gimple_duplicate_sese_tail (edge entry A
   int total_freq = 0, exit_freq = 0;
   gcov_type total_count = 0, exit_count = 0;
   edge exits[2], nexits[2], e;
-  gimple_stmt_iterator gsi;
+  gimple_stmt_iterator gsi,gsi1;
   gimple cond_stmt;
-  edge sorig, snew;
+  edge sorig, snew, orig_e;
+  basic_block exit_bb;
+  edge_iterator ei;
+  VEC (edge, heap) *redirect_edges;
+  basic_block iters_bb, orig_src;
+  tree new_rhs;
 
   gcc_assert (EDGE_COUNT (exit->src->succs) == 2);
   exits[0] = exit;
@@ -5149,17 +5179,13 @@ gimple_duplicate_sese_tail (edge entry A
      it will work, but the resulting code will not be correct.  */
   for (i = 0; i < n_region; i++)
     {
-      /* We do not handle subloops, i.e. all the blocks must belong to the
-	 same loop.  */
-      if (region[i]->loop_father != orig_loop)
-	return false;
-
       if (region[i] == orig_loop->latch)
 	return false;
     }
 
   initialize_original_copy_tables ();
   set_loop_copy (orig_loop, loop);
+  duplicate_subloops (orig_loop, loop);
 
   if (!region_copy)
     {
@@ -5225,8 +5251,36 @@ gimple_duplicate_sese_tail (edge entry A
   cond_stmt = last_stmt (exit->src);
   gcc_assert (gimple_code (cond_stmt) == GIMPLE_COND);
   cond_stmt = gimple_copy (cond_stmt);
+ 
+ /* If the block consisting of the exit condition has the latch as 
+    successor, then the body of the loop is executed before 
+    the exit consition is tested.  In such case, moving the 
+    condition to the entry, causes that the loop will iterate  
+    one less iteration (which is the wanted outcome, since we 
+    peel out the last iteration).  If the body is executed after 
+    the condition, moving the condition to the entry requires 
+    decrementing one iteration.  */
+  if (exits[1]->dest == orig_loop->latch)
+    new_rhs = gimple_cond_rhs (cond_stmt);
+  else
+  {
+    new_rhs = fold_build2 (MINUS_EXPR, TREE_TYPE (gimple_cond_rhs (cond_stmt)),
+			   gimple_cond_rhs (cond_stmt), 
+			   build_int_cst (TREE_TYPE (gimple_cond_rhs (cond_stmt)), 1));
+
+    if (TREE_CODE (gimple_cond_rhs (cond_stmt)) == SSA_NAME)
+      {
+	iters_bb = gimple_bb (SSA_NAME_DEF_STMT (gimple_cond_rhs (cond_stmt)));
+	for (gsi1 = gsi_start_bb (iters_bb); !gsi_end_p (gsi1); gsi_next (&gsi1))
+	  if (gsi_stmt (gsi1)==SSA_NAME_DEF_STMT (gimple_cond_rhs (cond_stmt)))
+	    break;
+		 
+	new_rhs = force_gimple_operand_gsi (&gsi1, new_rhs, true,
+					    NULL_TREE,false,GSI_CONTINUE_LINKING);
+      }
+  }   
+  gimple_cond_set_rhs (cond_stmt, unshare_expr (new_rhs)); 
   gimple_cond_set_lhs (cond_stmt, unshare_expr (gimple_cond_lhs (cond_stmt)));
-  gimple_cond_set_rhs (cond_stmt, unshare_expr (gimple_cond_rhs (cond_stmt)));
   gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
 
   sorig = single_succ_edge (switch_bb);
@@ -5238,25 +5292,87 @@ gimple_duplicate_sese_tail (edge entry A
 
   /* Add the PHI node arguments.  */
   add_phi_args_after_copy (region_copy, n_region, snew);
-
+  
   /* Get rid of now superfluous conditions and associated edges (and phi node
      arguments).  */
+  exit_bb = exit->dest;
+ 
   e = redirect_edge_and_branch (exits[0], exits[1]->dest);
   PENDING_STMT (e) = NULL;
-  e = redirect_edge_and_branch (nexits[1], nexits[0]->dest);
-  PENDING_STMT (e) = NULL;
-
+ 
+  /* If the block consisting of the exit condition has the latch as 
+     successor, then the body of the loop is executed before 
+     the exit consition is tested.  
+     
+     { body  }
+     { cond  } (exit[0])  -> { latch }
+        |      
+	V (exit[1])
+	 
+     { exit_bb }
+     
+     
+     In such case, the equivalent copied edge nexits[1]
+     (for the peeled iteration) needs to be redirected to exit_bb.
+     
+     Otherwise, 
+     
+     { cond  } (exit[0])  -> { body }
+        |
+	V (exit[1])
+     
+     { exit_bb }
+    
+     
+     exit[0] is pointing to the body of the loop,
+     and the equivalent nexits[0] needs to be redirected to 
+     the copied body (of the peeled iteration).  */ 
+    
+  if (exits[1]->dest == orig_loop->latch)
+    e = redirect_edge_and_branch (nexits[1], nexits[0]->dest);
+  else
+    e = redirect_edge_and_branch (nexits[0], nexits[1]->dest);
+  PENDING_STMT (e) = NULL; 
+  
+  redirect_edges = VEC_alloc (edge, heap, 10);
+  
+  for (i = 0; i < n_region; i++)
+    region_copy[i]->flags |= BB_DUPLICATED;
+  
+  /* Iterate all incoming edges to latch.  All those coming from 
+     copied bbs will be redicrecred to exit_bb.  */
+  FOR_EACH_EDGE (e, ei, orig_loop->latch->preds)
+    {
+      if (e->src->flags & BB_DUPLICATED)
+        VEC_safe_push (edge, heap, redirect_edges, e);
+    }
+  
+  for (i = 0; i < n_region; i++)
+    region_copy[i]->flags &= ~BB_DUPLICATED;
+  
+  for (i = 0; VEC_iterate (edge, redirect_edges, i, e); ++i)
+    {
+      e = redirect_edge_and_branch (e, exit_bb);
+      PENDING_STMT (e) = NULL;
+      orig_src = get_bb_original (e->src);
+      orig_e = find_edge (orig_src, orig_loop->latch);
+      add_phi_args_after_redirect (e, orig_e);
+    }
+  
+  VEC_free (edge, heap, redirect_edges);
+  
+  
   /* Anything that is outside of the region, but was dominated by something
      inside needs to update dominance info.  */
   iterate_fix_dominators (CDI_DOMINATORS, doms, false);
   VEC_free (basic_block, heap, doms);
-
+  
   /* Update the SSA web.  */
   update_ssa (TODO_update_ssa);
-
+  
   if (free_region_copy)
     free (region_copy);
-
+  
   free_original_copy_tables ();
   return true;
 }
=

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2009-10-23  5:46 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-10-22 13:00 [PATCH] outerloop parallelization support Razya Ladelsky
2009-10-22 16:35 ` Sebastian Pop
2009-10-22 17:21   ` Razya Ladelsky
2009-10-23  6:25 ` Eric Botcazou

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).