public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [patch] Fix PR tree-optimization/49960 ,Fix self data dependence
@ 2011-10-17  7:21 Razya Ladelsky
  2011-10-17  8:53 ` Richard Guenther
  0 siblings, 1 reply; 13+ messages in thread
From: Razya Ladelsky @ 2011-10-17  7:21 UTC (permalink / raw)
  To: gcc-patches, Sebastian Pop

[-- Attachment #1: Type: text/plain, Size: 1423 bytes --]

This patch fixes the failures described in 
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=49960
It also fixes bzips when run with autopar enabled.

In both cases the self dependences are not handled correctly.
In the first case, a non affine access is analyzed:
in the second, the distance vector is not calculated correctly (the 
distance vector considered for for self dependences is always (0,0,...))

As  a result, the loops get wrongfully parallelized.

The patch avoids the special handling of  self dependences, and analyzes 
all dependences in the same way. Specific adjustments
and support for the self dependence cases were made.

Bootstrap and testsuite pass successfully for ppc64-redhat-linux.

OK for trunk?
Thank you,
Razya


ChangeLog:

        PR tree-optimization/49960
        * tree-data-ref.c (compute_self_dependence): Remove.
             (initialize_data_dependence_relation): Add intializations. 
Remove compute_self_dependence.
             (add_other_self_distances): Add support for two dimensions if 
the second is zero.
             (compute_affine_dependence): Remove the !DDR_SELF_REFERENCE 
condition.
             (compute_all_dependences): Remove call to 
compute_self_dependence. Add call to compute_affine_dependence

testsuite/ChangeLog:

        PR tree-optimization/49660
        * gcc.dg/autopar/pr49660.c: New test.
           * gcc.dg/autopar/pr49660-1.c: New test.













 


[-- Attachment #2: pr49960_fix.txt --]
[-- Type: text/plain, Size: 4300 bytes --]

Index: tree-data-ref.c
===================================================================
--- tree-data-ref.c	(revision 179799)
+++ tree-data-ref.c	(working copy)
@@ -1346,7 +1346,6 @@ dr_may_alias_p (const struct data_reference *a, co
   return refs_may_alias_p (addr_a, addr_b);
 }
 
-static void compute_self_dependence (struct data_dependence_relation *);
 
 /* Initialize a data dependence relation between data accesses A and
    B.  NB_LOOPS is the number of loops surrounding the references: the
@@ -1386,13 +1385,30 @@ initialize_data_dependence_relation (struct data_r
      the data dependence tests, just initialize the ddr and return.  */
   if (operand_equal_p (DR_REF (a), DR_REF (b), 0))
     {
+      if (loop_nest
+	  && !object_address_invariant_in_loop_p (VEC_index (loop_p, loop_nest, 0),
+						  DR_BASE_OBJECT (a)))
+	{
+	  DDR_ARE_DEPENDENT (res) = chrec_dont_know;
+	  return res;
+	}
       DDR_AFFINE_P (res) = true;
       DDR_ARE_DEPENDENT (res) = NULL_TREE;
       DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
       DDR_LOOP_NEST (res) = loop_nest;
       DDR_INNER_LOOP (res) = 0;
       DDR_SELF_REFERENCE (res) = true;
-      compute_self_dependence (res);
+      for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
+	{
+	  struct subscript *subscript;
+	  
+	  subscript = XNEW (struct subscript);
+	  SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
+	  SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
+	  SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
+	  SUB_DISTANCE (subscript) = chrec_dont_know;
+	  VEC_safe_push (subscript_p, heap, DDR_SUBSCRIPTS (res), subscript);
+	}
       return res;
     }
 
@@ -3119,8 +3135,11 @@ add_other_self_distances (struct data_dependence_r
 	    {
 	      if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
 		{
-		  DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
-		  return;
+		  if (DDR_NUM_SUBSCRIPTS (ddr) != 2 || !integer_zerop (DR_ACCESS_FN (DDR_A (ddr), 1)))
+		    {
+		      DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
+		      return;
+		    }
 		}
 
 	      access_fun = DR_ACCESS_FN (DDR_A (ddr), 0);
@@ -4037,8 +4056,7 @@ compute_affine_dependence (struct data_dependence_
     }
 
   /* Analyze only when the dependence relation is not yet known.  */
-  if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE
-      && !DDR_SELF_REFERENCE (ddr))
+  if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
     {
       dependence_stats.num_dependence_tests++;
 
@@ -4113,39 +4131,6 @@ compute_affine_dependence (struct data_dependence_
     fprintf (dump_file, ")\n");
 }
 
-/* This computes the dependence relation for the same data
-   reference into DDR.  */
-
-static void
-compute_self_dependence (struct data_dependence_relation *ddr)
-{
-  unsigned int i;
-  struct subscript *subscript;
-
-  if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
-    return;
-
-  for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
-       i++)
-    {
-      if (SUB_CONFLICTS_IN_A (subscript))
-	free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
-      if (SUB_CONFLICTS_IN_B (subscript))
-	free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
-
-      /* The accessed index overlaps for each iteration.  */
-      SUB_CONFLICTS_IN_A (subscript)
-	= conflict_fn (1, affine_fn_cst (integer_zero_node));
-      SUB_CONFLICTS_IN_B (subscript)
-	= conflict_fn (1, affine_fn_cst (integer_zero_node));
-      SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
-    }
-
-  /* The distance vector is the zero vector.  */
-  save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
-  save_dir_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
-}
-
 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
    the data references in DATAREFS, in the LOOP_NEST.  When
    COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
@@ -4176,7 +4161,8 @@ compute_all_dependences (VEC (data_reference_p, he
       {
 	ddr = initialize_data_dependence_relation (a, a, loop_nest);
 	VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
-	compute_self_dependence (ddr);
+	if (loop_nest)
+	  compute_affine_dependence (ddr, VEC_index (loop_p, loop_nest, 0));
       }
 }
 
=

[-- Attachment #3: pr49660-1.c --]
[-- Type: application/octet-stream, Size: 984 bytes --]

/* { dg-do compile } */
/* { dg-options "-O2 -ftree-parallelize-loops=4 -fdump-tree-parloops-details -fdump-tree-optimized" } */

#include <stdlib.h>
#include <stdio.h>

int main() 
{
  unsigned int x, y, idx, H = 1024, W = 1024;
  
  int * tmps = (int *)malloc(H*W*sizeof(int));
  
  /* This loop gets parallelized even though output dependences exist 
     between writes to 'tmps' that prevent parallelization. 
     For example: tmps[1] = 1, ..., tmps[1] = 17.  */
  
  for(x = 1; x < H; x++) 
    {
      for(y = 1; y < W; y++) 
	{
	  idx = x*W+y;
	  tmps[idx % 4096] = idx;	  
	}
    }
  
  for(x = 1; x < 8; x++)
    printf("tmps[%d]=%d\n", x, tmps[x]);
  
  return 0;
}
/* Check that no loop gets parallelized.  */

/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 0 "parloops" } } */
/* { dg-final { scan-tree-dump-times "loopfn" 0 "optimized" } } */
/* { dg-final { cleanup-tree-dump "parloops" } } */
/* { dg-final { cleanup-tree-dump "optimized" } } */

[-- Attachment #4: pr49960.c --]
[-- Type: application/octet-stream, Size: 1276 bytes --]

/* { dg-do compile } */
/* { dg-options "-O2 -ftree-parallelize-loops=4 -fdump-tree-parloops-details -fdump-tree-optimized" } */

#include <stdio.h>
#define MB 100
#define NA 450
#define MA 400

int T[MA][MB],A[MA][NA],B[MB][NA];
void MRTRBR(int MA_1, int NA_1, int MB_1)
{
  int i,j, t,k;

  /* The outer most loop is not parallel because for different k's there
     is write-write dependency for T[i][j].  */
  
  /* The two inner loops don't get parallelized due to low number of 
     iterations.  */

  for (k = 3; k < NA_1; k++)
    for (i = 3; i < MA_1; i++)
      for (j = 3; j < MB_1; j++)
	{
	  t = T[i][j];
	  T[i][j] = t+2+A[i][k]*B[j][k];
	}
}
void main ()
{
  int j,i;
  
  for (i = 3; i < MA; i++)
    for (j = 3; j < MB; j++)
      T[i][j] = (i>j?i:j);
  
  MRTRBR (MA,NA,MB);
  
  for (i = MA-1; i < MA; i++)
    for (j = MB-10; j < MB; j++)
      printf ("i %d j %d T[i][j] = %d\n",i,j,T[i][j]);
}


/* Check that the outer most loop doesn't get parallelized (thus no loop gets parallelized)  */

/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 0 "parloops" } } */
/* { dg-final { scan-tree-dump-times "loopfn" 0 "optimized" } } */
/* { dg-final { cleanup-tree-dump "parloops" } } */
/* { dg-final { cleanup-tree-dump "optimized" } } */

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2011-11-24 12:33 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-10-17  7:21 [patch] Fix PR tree-optimization/49960 ,Fix self data dependence Razya Ladelsky
2011-10-17  8:53 ` Richard Guenther
     [not found]   ` <OF746BCB18.CF82809F-ONC225792E.0051CE3F-C225792E.00564975@il.ibm.com>
     [not found]     ` <CAFiYyc2ykFPCW8A8vW=f5UbNa7zFRQObwL13D9ioXjCd_em9pQ@mail.gmail.com>
2011-10-21  9:26       ` Fwd: " Richard Guenther
2011-11-15 15:13         ` [PATCH, take 2] " Razya Ladelsky
2011-11-15 18:54           ` Richard Guenther
2011-11-21 13:32           ` Jakub Jelinek
2011-11-21 14:24             ` Razya Ladelsky
2011-11-21 14:54               ` Jakub Jelinek
2011-11-21 15:43                 ` Razya Ladelsky
2011-11-21 16:13                   ` Jakub Jelinek
2011-11-21 17:25                     ` Razya Ladelsky
2011-11-21 18:26                       ` Jakub Jelinek
2011-11-24 15:48                         ` Razya Ladelsky

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).