public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH GCC][09/13]Simply cost model merges partitions with the same references
@ 2017-06-12 17:03 Bin Cheng
  2017-06-14 13:54 ` Richard Biener
  0 siblings, 1 reply; 8+ messages in thread
From: Bin Cheng @ 2017-06-12 17:03 UTC (permalink / raw)
  To: gcc-patches; +Cc: nd

[-- Attachment #1: Type: text/plain, Size: 696 bytes --]

Hi,
Current primitive cost model merges partitions with data references sharing the same
base address.  I believe it's designed to maximize data reuse in distribution, but
that should be done by dedicated data reusing algorithm.  At this stage of merging,
we should be conservative and only merge partitions with the same references.
Bootstrap and test on x86_64 and AArch64.  Is it OK?

Thanks,
bin
2017-06-07  Bin Cheng  <bin.cheng@arm.com>

	* tree-loop-distribution.c (ref_base_address): Delete.
	(similar_memory_accesses): Rename ...
	(share_memory_accesses): ... to this.  Check if partitions access
	the same memory reference.
	(distribute_loop): Call share_memory_accesses.

[-- Attachment #2: 0009-share-memory-access-20170607.txt --]
[-- Type: text/plain, Size: 5624 bytes --]

From ce94bbb382eacb8d170a8349415b7d2c88528d74 Mon Sep 17 00:00:00 2001
From: Bin Cheng <binche01@e108451-lin.cambridge.arm.com>
Date: Fri, 9 Jun 2017 12:41:36 +0100
Subject: [PATCH 09/14] share-memory-access-20170607.txt

---
 gcc/tree-loop-distribution.c | 126 ++++++++++++++++++++++++++++++-------------
 1 file changed, 88 insertions(+), 38 deletions(-)

diff --git a/gcc/tree-loop-distribution.c b/gcc/tree-loop-distribution.c
index 9a0e101..90dc8ea 100644
--- a/gcc/tree-loop-distribution.c
+++ b/gcc/tree-loop-distribution.c
@@ -1276,30 +1276,16 @@ classify_partition (loop_p loop, struct graph *rdg, partition *partition)
     }
 }
 
-/* For a data reference REF, return the declaration of its base
-   address or NULL_TREE if the base is not determined.  */
-
-static tree
-ref_base_address (data_reference_p dr)
-{
-  tree base_address = DR_BASE_ADDRESS (dr);
-  if (base_address
-      && TREE_CODE (base_address) == ADDR_EXPR)
-    return TREE_OPERAND (base_address, 0);
-
-  return base_address;
-}
-
-/* Returns true when PARTITION1 and PARTITION2 have similar memory
-   accesses in RDG.  */
+/* Returns true when PARTITION1 and PARTITION2 access the same memory
+   object in RDG.  */
 
 static bool
-similar_memory_accesses (struct graph *rdg, partition *partition1,
-			 partition *partition2)
+share_memory_accesses (struct graph *rdg,
+		       partition *partition1, partition *partition2)
 {
-  unsigned i, j, k, l;
+  unsigned i, j;
   bitmap_iterator bi, bj;
-  data_reference_p ref1, ref2;
+  data_reference_p dr1, dr2;
 
   /* First check whether in the intersection of the two partitions are
      any loads or stores.  Common loads are the situation that happens
@@ -1309,24 +1295,88 @@ similar_memory_accesses (struct graph *rdg, partition *partition1,
 	|| RDG_MEM_READS_STMT (rdg, i))
       return true;
 
-  /* Then check all data-references against each other.  */
-  EXECUTE_IF_SET_IN_BITMAP (partition1->stmts, 0, i, bi)
-    if (RDG_MEM_WRITE_STMT (rdg, i)
-	|| RDG_MEM_READS_STMT (rdg, i))
-      EXECUTE_IF_SET_IN_BITMAP (partition2->stmts, 0, j, bj)
-	if (RDG_MEM_WRITE_STMT (rdg, j)
-	    || RDG_MEM_READS_STMT (rdg, j))
-	  {
-	    FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, i), k, ref1)
-	      {
-		tree base1 = ref_base_address (ref1);
-		if (base1)
-		  FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, j), l, ref2)
-		    if (base1 == ref_base_address (ref2))
-		      return true;
-	      }
-	  }
+  /* Then check whether the two partitions access the same memory object.  */
+  EXECUTE_IF_SET_IN_BITMAP (partition1->reads, 0, i, bi)
+    {
+      gcc_assert (i < datarefs_vec->length ());
+      dr1 = (*datarefs_vec)[i];
+
+      if (!DR_BASE_ADDRESS (dr1)
+	  || !DR_OFFSET (dr1) || !DR_INIT (dr1) || !DR_STEP (dr1))
+	continue;
+
+      EXECUTE_IF_SET_IN_BITMAP (partition2->reads, 0, j, bj)
+	{
+	  gcc_assert (j < datarefs_vec->length ());
+	  dr2 = (*datarefs_vec)[j];
+
+	  if (!DR_BASE_ADDRESS (dr2)
+	      || !DR_OFFSET (dr2) || !DR_INIT (dr2) || !DR_STEP (dr2))
+	    continue;
 
+	  if (operand_equal_p (DR_BASE_ADDRESS (dr1), DR_BASE_ADDRESS (dr2), 0)
+	      && operand_equal_p (DR_OFFSET (dr1), DR_OFFSET (dr2), 0)
+	      && operand_equal_p (DR_INIT (dr1), DR_INIT (dr2), 0)
+	      && operand_equal_p (DR_STEP (dr1), DR_STEP (dr2), 0))
+	    return true;
+	}
+      EXECUTE_IF_SET_IN_BITMAP (partition2->writes, 0, j, bj)
+	{
+	  gcc_assert (j < datarefs_vec->length ());
+	  dr2 = (*datarefs_vec)[j];
+
+	  if (!DR_BASE_ADDRESS (dr2)
+	      || !DR_OFFSET (dr2) || !DR_INIT (dr2) || !DR_STEP (dr2))
+	    continue;
+
+	  if (operand_equal_p (DR_BASE_ADDRESS (dr1), DR_BASE_ADDRESS (dr2), 0)
+	      && operand_equal_p (DR_OFFSET (dr1), DR_OFFSET (dr2), 0)
+	      && operand_equal_p (DR_INIT (dr1), DR_INIT (dr2), 0)
+	      && operand_equal_p (DR_STEP (dr1), DR_STEP (dr2), 0))
+	    return true;
+	}
+    }
+
+  EXECUTE_IF_SET_IN_BITMAP (partition1->writes, 0, i, bi)
+    {
+      gcc_assert (i < datarefs_vec->length ());
+      dr1 = (*datarefs_vec)[i];
+
+      if (!DR_BASE_ADDRESS (dr1)
+	  || !DR_OFFSET (dr1) || !DR_INIT (dr1) || !DR_STEP (dr1))
+	continue;
+
+      EXECUTE_IF_SET_IN_BITMAP (partition2->reads, 0, j, bj)
+	{
+	  gcc_assert (j < datarefs_vec->length ());
+	  dr2 = (*datarefs_vec)[j];
+
+	  if (!DR_BASE_ADDRESS (dr2)
+	      || !DR_OFFSET (dr2) || !DR_INIT (dr2) || !DR_STEP (dr2))
+	    continue;
+
+	  if (operand_equal_p (DR_BASE_ADDRESS (dr1), DR_BASE_ADDRESS (dr2), 0)
+	      && operand_equal_p (DR_OFFSET (dr1), DR_OFFSET (dr2), 0)
+	      && operand_equal_p (DR_INIT (dr1), DR_INIT (dr2), 0)
+	      && operand_equal_p (DR_STEP (dr1), DR_STEP (dr2), 0))
+	    return true;
+	}
+      EXECUTE_IF_SET_IN_BITMAP (partition2->writes, 0, j, bj)
+	{
+	  gcc_assert (j < datarefs_vec->length ());
+	  dr2 = (*datarefs_vec)[j];
+
+	  if (!DR_BASE_ADDRESS (dr2)
+	      || !DR_OFFSET (dr2) || !DR_INIT (dr2) || !DR_STEP (dr2))
+	    continue;
+
+	  if (operand_equal_p (DR_BASE_ADDRESS (dr1), DR_BASE_ADDRESS (dr2), 0)
+	      && operand_equal_p (DR_OFFSET (dr1), DR_OFFSET (dr2), 0)
+	      && operand_equal_p (DR_INIT (dr1), DR_INIT (dr2), 0)
+	      && operand_equal_p (DR_STEP (dr1), DR_STEP (dr2), 0))
+	    return true;
+	}
+    }
   return false;
 }
 
@@ -1666,7 +1716,7 @@ distribute_loop (struct loop *loop, vec<gimple *> stmts,
       for (int j = i + 1;
 	   partitions.iterate (j, &partition); ++j)
 	{
-	  if (similar_memory_accesses (rdg, into, partition))
+	  if (share_memory_accesses (rdg, into, partition))
 	    {
 	      partition_merge_into (into, partition, FUSE_SHARE_REF);
 	      partitions.unordered_remove (j);
-- 
1.9.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH GCC][09/13]Simply cost model merges partitions with the same references
  2017-06-12 17:03 [PATCH GCC][09/13]Simply cost model merges partitions with the same references Bin Cheng
@ 2017-06-14 13:54 ` Richard Biener
  2017-06-14 14:12   ` Bin.Cheng
  2017-06-19 13:40   ` Bin.Cheng
  0 siblings, 2 replies; 8+ messages in thread
From: Richard Biener @ 2017-06-14 13:54 UTC (permalink / raw)
  To: Bin Cheng; +Cc: gcc-patches, nd

On Mon, Jun 12, 2017 at 7:03 PM, Bin Cheng <Bin.Cheng@arm.com> wrote:
> Hi,
> Current primitive cost model merges partitions with data references sharing the same
> base address.  I believe it's designed to maximize data reuse in distribution, but
> that should be done by dedicated data reusing algorithm.  At this stage of merging,
> we should be conservative and only merge partitions with the same references.
> Bootstrap and test on x86_64 and AArch64.  Is it OK?

Well, I'd say "conservative" is merging more, not less.  For example
splitting a[i+1] from a[i]
would be bad(?), so I'd see to allow unequal DR_INIT as "equal" for
merging.  Maybe
DR_INIT within a cacheline or so.

How many extra distributions in say SPEC do you get from this change alone?

It shows also that having partition->reads_and_writes would be nice
...  the code duplication
really looks awkward.  Eventually a EXECUTE_IF_IOR_IN_BITMAP would help as well.

Well.  Can you at least factor out the core DR comparison into a function?

Otherwise ok.

Thanks,
Richard.

> Thanks,
> bin
> 2017-06-07  Bin Cheng  <bin.cheng@arm.com>
>
>         * tree-loop-distribution.c (ref_base_address): Delete.
>         (similar_memory_accesses): Rename ...
>         (share_memory_accesses): ... to this.  Check if partitions access
>         the same memory reference.
>         (distribute_loop): Call share_memory_accesses.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH GCC][09/13]Simply cost model merges partitions with the same references
  2017-06-14 13:54 ` Richard Biener
@ 2017-06-14 14:12   ` Bin.Cheng
  2017-06-19 13:40   ` Bin.Cheng
  1 sibling, 0 replies; 8+ messages in thread
From: Bin.Cheng @ 2017-06-14 14:12 UTC (permalink / raw)
  To: Richard Biener; +Cc: gcc-patches

On Wed, Jun 14, 2017 at 2:54 PM, Richard Biener
<richard.guenther@gmail.com> wrote:
> On Mon, Jun 12, 2017 at 7:03 PM, Bin Cheng <Bin.Cheng@arm.com> wrote:
>> Hi,
>> Current primitive cost model merges partitions with data references sharing the same
>> base address.  I believe it's designed to maximize data reuse in distribution, but
>> that should be done by dedicated data reusing algorithm.  At this stage of merging,
>> we should be conservative and only merge partitions with the same references.
>> Bootstrap and test on x86_64 and AArch64.  Is it OK?
>
> Well, I'd say "conservative" is merging more, not less.  For example
> splitting a[i+1] from a[i]
> would be bad(?), so I'd see to allow unequal DR_INIT as "equal" for
Yes, cost model is missing here.  Given below loop:
{
  a[i] = x;
  a[i+1] = y;
}
It can't be vectorized of this form.  Simply merging with-cache-line
DR_INIT would partially corrupt distribution in hmmer, which makes the
whole work not that useful.  We need to estimate benefit of vectorizer
and cost of data locality somehow.  One idea is to pass cost (for
runtime alias check and data locality) in IFN_LOOP_DIST_ALIAS and let
vectorizer check the cost in unified way.
As for a[i]/a[i+1] case, it would be great if we can reorder
statements in fusion.  The net effect would be switching the two data
references.

> merging.  Maybe
> DR_INIT within a cacheline or so.
>
> How many extra distributions in say SPEC do you get from this change alone?
I will try to collect data.

>
> It shows also that having partition->reads_and_writes would be nice
> ...  the code duplication
> really looks awkward.  Eventually a EXECUTE_IF_IOR_IN_BITMAP would help as well.
>
> Well.  Can you at least factor out the core DR comparison into a function?
Will try to make simplification.

Thanks,
bin
>
> Otherwise ok.
>
> Thanks,
> Richard.
>
>> Thanks,
>> bin
>> 2017-06-07  Bin Cheng  <bin.cheng@arm.com>
>>
>>         * tree-loop-distribution.c (ref_base_address): Delete.
>>         (similar_memory_accesses): Rename ...
>>         (share_memory_accesses): ... to this.  Check if partitions access
>>         the same memory reference.
>>         (distribute_loop): Call share_memory_accesses.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH GCC][09/13]Simply cost model merges partitions with the same references
  2017-06-14 13:54 ` Richard Biener
  2017-06-14 14:12   ` Bin.Cheng
@ 2017-06-19 13:40   ` Bin.Cheng
  2017-06-19 15:20     ` Richard Biener
  1 sibling, 1 reply; 8+ messages in thread
From: Bin.Cheng @ 2017-06-19 13:40 UTC (permalink / raw)
  To: Richard Biener; +Cc: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 1545 bytes --]

On Wed, Jun 14, 2017 at 2:54 PM, Richard Biener
<richard.guenther@gmail.com> wrote:
> On Mon, Jun 12, 2017 at 7:03 PM, Bin Cheng <Bin.Cheng@arm.com> wrote:
>> Hi,
>> Current primitive cost model merges partitions with data references sharing the same
>> base address.  I believe it's designed to maximize data reuse in distribution, but
>> that should be done by dedicated data reusing algorithm.  At this stage of merging,
>> we should be conservative and only merge partitions with the same references.
>> Bootstrap and test on x86_64 and AArch64.  Is it OK?
>
> Well, I'd say "conservative" is merging more, not less.  For example
> splitting a[i+1] from a[i]
> would be bad(?), so I'd see to allow unequal DR_INIT as "equal" for
> merging.  Maybe
> DR_INIT within a cacheline or so.
>
> How many extra distributions in say SPEC do you get from this change alone?
Hi,
I collected data for spec2006 only with/without this patch.  I am a
bit surprised that it doesn't change the number of distributed loops.
>
> It shows also that having partition->reads_and_writes would be nice
> ...  the code duplication
Yeah, I merged read/write data references in previous patch, now this
duplication is gone.  Update patch attached.  Is it OK?

Thanks.
bin
2017-06-07  Bin Cheng  <bin.cheng@arm.com>

    * tree-loop-distribution.c (ref_base_address): Delete.
    (similar_memory_accesses): Rename ...
    (share_memory_accesses): ... to this.  Check if partitions access
    the same memory reference.
    (distribute_loop): Call share_memory_accesses.

[-- Attachment #2: 0008-share-memory-access-20170608.txt.patch --]
[-- Type: text/x-patch, Size: 3723 bytes --]

From 98af3ab3b309ac7e7b2fb3c6b55eb19a9004225c Mon Sep 17 00:00:00 2001
From: Bin Cheng <binche01@e108451-lin.cambridge.arm.com>
Date: Fri, 9 Jun 2017 12:41:36 +0100
Subject: [PATCH 08/13] share-memory-access-20170608.txt

---
 gcc/tree-loop-distribution.c | 71 ++++++++++++++++++++------------------------
 1 file changed, 33 insertions(+), 38 deletions(-)

diff --git a/gcc/tree-loop-distribution.c b/gcc/tree-loop-distribution.c
index 03bb735..2e5a828 100644
--- a/gcc/tree-loop-distribution.c
+++ b/gcc/tree-loop-distribution.c
@@ -1268,30 +1268,16 @@ classify_partition (loop_p loop, struct graph *rdg, partition *partition)
     }
 }
 
-/* For a data reference REF, return the declaration of its base
-   address or NULL_TREE if the base is not determined.  */
-
-static tree
-ref_base_address (data_reference_p dr)
-{
-  tree base_address = DR_BASE_ADDRESS (dr);
-  if (base_address
-      && TREE_CODE (base_address) == ADDR_EXPR)
-    return TREE_OPERAND (base_address, 0);
-
-  return base_address;
-}
-
-/* Returns true when PARTITION1 and PARTITION2 have similar memory
-   accesses in RDG.  */
+/* Returns true when PARTITION1 and PARTITION2 access the same memory
+   object in RDG.  */
 
 static bool
-similar_memory_accesses (struct graph *rdg, partition *partition1,
-			 partition *partition2)
+share_memory_accesses (struct graph *rdg,
+		       partition *partition1, partition *partition2)
 {
-  unsigned i, j, k, l;
+  unsigned i, j;
   bitmap_iterator bi, bj;
-  data_reference_p ref1, ref2;
+  data_reference_p dr1, dr2;
 
   /* First check whether in the intersection of the two partitions are
      any loads or stores.  Common loads are the situation that happens
@@ -1301,23 +1287,32 @@ similar_memory_accesses (struct graph *rdg, partition *partition1,
 	|| RDG_MEM_READS_STMT (rdg, i))
       return true;
 
-  /* Then check all data-references against each other.  */
-  EXECUTE_IF_SET_IN_BITMAP (partition1->stmts, 0, i, bi)
-    if (RDG_MEM_WRITE_STMT (rdg, i)
-	|| RDG_MEM_READS_STMT (rdg, i))
-      EXECUTE_IF_SET_IN_BITMAP (partition2->stmts, 0, j, bj)
-	if (RDG_MEM_WRITE_STMT (rdg, j)
-	    || RDG_MEM_READS_STMT (rdg, j))
-	  {
-	    FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, i), k, ref1)
-	      {
-		tree base1 = ref_base_address (ref1);
-		if (base1)
-		  FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, j), l, ref2)
-		    if (base1 == ref_base_address (ref2))
-		      return true;
-	      }
-	  }
+  /* Then check whether the two partitions access the same memory object.  */
+  EXECUTE_IF_SET_IN_BITMAP (partition1->datarefs, 0, i, bi)
+    {
+      gcc_assert (i < datarefs_vec.length ());
+      dr1 = datarefs_vec[i];
+
+      if (!DR_BASE_ADDRESS (dr1)
+	  || !DR_OFFSET (dr1) || !DR_INIT (dr1) || !DR_STEP (dr1))
+	continue;
+
+      EXECUTE_IF_SET_IN_BITMAP (partition2->datarefs, 0, j, bj)
+	{
+	  gcc_assert (j < datarefs_vec.length ());
+	  dr2 = datarefs_vec[j];
+
+	  if (!DR_BASE_ADDRESS (dr2)
+	      || !DR_OFFSET (dr2) || !DR_INIT (dr2) || !DR_STEP (dr2))
+	    continue;
+
+	  if (operand_equal_p (DR_BASE_ADDRESS (dr1), DR_BASE_ADDRESS (dr2), 0)
+	      && operand_equal_p (DR_OFFSET (dr1), DR_OFFSET (dr2), 0)
+	      && operand_equal_p (DR_INIT (dr1), DR_INIT (dr2), 0)
+	      && operand_equal_p (DR_STEP (dr1), DR_STEP (dr2), 0))
+	    return true;
+	}
+    }
 
   return false;
 }
@@ -1654,7 +1649,7 @@ distribute_loop (struct loop *loop, vec<gimple *> stmts,
       for (int j = i + 1;
 	   partitions.iterate (j, &partition); ++j)
 	{
-	  if (similar_memory_accesses (rdg, into, partition))
+	  if (share_memory_accesses (rdg, into, partition))
 	    {
 	      partition_merge_into (into, partition, FUSE_SHARE_REF);
 	      partitions.unordered_remove (j);
-- 
1.9.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH GCC][09/13]Simply cost model merges partitions with the same references
  2017-06-19 13:40   ` Bin.Cheng
@ 2017-06-19 15:20     ` Richard Biener
  2017-06-23 10:19       ` Bin.Cheng
  0 siblings, 1 reply; 8+ messages in thread
From: Richard Biener @ 2017-06-19 15:20 UTC (permalink / raw)
  To: Bin.Cheng; +Cc: gcc-patches

On Mon, Jun 19, 2017 at 3:40 PM, Bin.Cheng <amker.cheng@gmail.com> wrote:
> On Wed, Jun 14, 2017 at 2:54 PM, Richard Biener
> <richard.guenther@gmail.com> wrote:
>> On Mon, Jun 12, 2017 at 7:03 PM, Bin Cheng <Bin.Cheng@arm.com> wrote:
>>> Hi,
>>> Current primitive cost model merges partitions with data references sharing the same
>>> base address.  I believe it's designed to maximize data reuse in distribution, but
>>> that should be done by dedicated data reusing algorithm.  At this stage of merging,
>>> we should be conservative and only merge partitions with the same references.
>>> Bootstrap and test on x86_64 and AArch64.  Is it OK?
>>
>> Well, I'd say "conservative" is merging more, not less.  For example
>> splitting a[i+1] from a[i]
>> would be bad(?), so I'd see to allow unequal DR_INIT as "equal" for
>> merging.  Maybe
>> DR_INIT within a cacheline or so.
>>
>> How many extra distributions in say SPEC do you get from this change alone?
> Hi,
> I collected data for spec2006 only with/without this patch.  I am a
> bit surprised that it doesn't change the number of distributed loops.
>>
>> It shows also that having partition->reads_and_writes would be nice
>> ...  the code duplication
> Yeah, I merged read/write data references in previous patch, now this
> duplication is gone.  Update patch attached.  Is it OK?

+      gcc_assert (i < datarefs_vec.length ());
+      dr1 = datarefs_vec[i];

these asserts are superfluous -- vec::operator[] does them as well.

Ok if you remove them.

Richard.

> Thanks.
> bin
> 2017-06-07  Bin Cheng  <bin.cheng@arm.com>
>
>     * tree-loop-distribution.c (ref_base_address): Delete.
>     (similar_memory_accesses): Rename ...
>     (share_memory_accesses): ... to this.  Check if partitions access
>     the same memory reference.
>     (distribute_loop): Call share_memory_accesses.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH GCC][09/13]Simply cost model merges partitions with the same references
  2017-06-19 15:20     ` Richard Biener
@ 2017-06-23 10:19       ` Bin.Cheng
  2017-06-23 10:48         ` Richard Biener
  0 siblings, 1 reply; 8+ messages in thread
From: Bin.Cheng @ 2017-06-23 10:19 UTC (permalink / raw)
  To: Richard Biener; +Cc: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 2900 bytes --]

On Mon, Jun 19, 2017 at 4:20 PM, Richard Biener
<richard.guenther@gmail.com> wrote:
> On Mon, Jun 19, 2017 at 3:40 PM, Bin.Cheng <amker.cheng@gmail.com> wrote:
>> On Wed, Jun 14, 2017 at 2:54 PM, Richard Biener
>> <richard.guenther@gmail.com> wrote:
>>> On Mon, Jun 12, 2017 at 7:03 PM, Bin Cheng <Bin.Cheng@arm.com> wrote:
>>>> Hi,
>>>> Current primitive cost model merges partitions with data references sharing the same
>>>> base address.  I believe it's designed to maximize data reuse in distribution, but
>>>> that should be done by dedicated data reusing algorithm.  At this stage of merging,
>>>> we should be conservative and only merge partitions with the same references.
>>>> Bootstrap and test on x86_64 and AArch64.  Is it OK?
>>>
>>> Well, I'd say "conservative" is merging more, not less.  For example
>>> splitting a[i+1] from a[i]
>>> would be bad(?), so I'd see to allow unequal DR_INIT as "equal" for
>>> merging.  Maybe
>>> DR_INIT within a cacheline or so.
>>>
>>> How many extra distributions in say SPEC do you get from this change alone?
>> Hi,
>> I collected data for spec2006 only with/without this patch.  I am a
>> bit surprised that it doesn't change the number of distributed loops.
>>>
>>> It shows also that having partition->reads_and_writes would be nice
>>> ...  the code duplication
>> Yeah, I merged read/write data references in previous patch, now this
>> duplication is gone.  Update patch attached.  Is it OK?
>
> +      gcc_assert (i < datarefs_vec.length ());
> +      dr1 = datarefs_vec[i];
>
> these asserts are superfluous -- vec::operator[] does them as well.
>
> Ok if you remove them.
Done.
I realized I made mistakes when measuring the impact of this patch.
This patch only apparently causes failure of
gcc.dg/tree-ssa/ldist-6.c, so here is the updated patch.  I also
collected the number of distributed loops in spec2k6 as below:
     trunk:  5882
     only this patch: 7130
     whole patch series: 5237
So the conclusion is, this patch does aggressive distribution like
ldist-6.c, which means worse data-locality.  The following patch does
more fusion which mitigates impact of this patch and results in
conservative distribution overall.   But as we lack of data locality
cost model, ldist-6.c remains failed even after applying whole patch
series.  Hmm, a cache-sensitive cost model is need for several passes
now, distribution, prefetch and (possible) interchange.
Richard, do you have second comment based on the new data?

Thanks,
bin
2017-06-20  Bin Cheng  <bin.cheng@arm.com>

    * tree-loop-distribution.c (ref_base_address): Delete.
    (similar_memory_accesses): Rename ...
    (share_memory_accesses): ... to this.  Check if partitions access
    the same memory reference.
    (distribute_loop): Call share_memory_accesses.

gcc/testsuite/ChangeLog
2017-06-20  Bin Cheng  <bin.cheng@arm.com>

    * gcc.dg/tree-ssa/ldist-6.c: XFAIL.

[-- Attachment #2: 0008-share-memory-access-20170608.txt.patch --]
[-- Type: text/x-patch, Size: 4185 bytes --]

From a002d0a88ab9e981d9c57bd8f1203072290623ad Mon Sep 17 00:00:00 2001
From: Bin Cheng <binche01@e108451-lin.cambridge.arm.com>
Date: Fri, 9 Jun 2017 12:41:36 +0100
Subject: [PATCH 08/13] share-memory-access-20170608.txt

---
 gcc/testsuite/gcc.dg/tree-ssa/ldist-6.c |  2 +-
 gcc/tree-loop-distribution.c            | 69 +++++++++++++++------------------
 2 files changed, 32 insertions(+), 39 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldist-6.c b/gcc/testsuite/gcc.dg/tree-ssa/ldist-6.c
index 8eb1c62..e0a68d8 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ldist-6.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ldist-6.c
@@ -34,4 +34,4 @@ int loop1 (int k)
   return a[1000-2] + b[1000-1] + c[1000-2] + d[1000-2];
 }
 
-/* { dg-final { scan-tree-dump-times "distributed: split to 2 loops" 0 "ldist" } } */
+/* { dg-final { scan-tree-dump-times "distributed: split to 2 loops" 0 "ldist" { xfail *-*-* } } } */
diff --git a/gcc/tree-loop-distribution.c b/gcc/tree-loop-distribution.c
index eafd119..119863f 100644
--- a/gcc/tree-loop-distribution.c
+++ b/gcc/tree-loop-distribution.c
@@ -1268,30 +1268,16 @@ classify_partition (loop_p loop, struct graph *rdg, partition *partition)
     }
 }
 
-/* For a data reference REF, return the declaration of its base
-   address or NULL_TREE if the base is not determined.  */
-
-static tree
-ref_base_address (data_reference_p dr)
-{
-  tree base_address = DR_BASE_ADDRESS (dr);
-  if (base_address
-      && TREE_CODE (base_address) == ADDR_EXPR)
-    return TREE_OPERAND (base_address, 0);
-
-  return base_address;
-}
-
-/* Returns true when PARTITION1 and PARTITION2 have similar memory
-   accesses in RDG.  */
+/* Returns true when PARTITION1 and PARTITION2 access the same memory
+   object in RDG.  */
 
 static bool
-similar_memory_accesses (struct graph *rdg, partition *partition1,
-			 partition *partition2)
+share_memory_accesses (struct graph *rdg,
+		       partition *partition1, partition *partition2)
 {
-  unsigned i, j, k, l;
+  unsigned i, j;
   bitmap_iterator bi, bj;
-  data_reference_p ref1, ref2;
+  data_reference_p dr1, dr2;
 
   /* First check whether in the intersection of the two partitions are
      any loads or stores.  Common loads are the situation that happens
@@ -1301,23 +1287,30 @@ similar_memory_accesses (struct graph *rdg, partition *partition1,
 	|| RDG_MEM_READS_STMT (rdg, i))
       return true;
 
-  /* Then check all data-references against each other.  */
-  EXECUTE_IF_SET_IN_BITMAP (partition1->stmts, 0, i, bi)
-    if (RDG_MEM_WRITE_STMT (rdg, i)
-	|| RDG_MEM_READS_STMT (rdg, i))
-      EXECUTE_IF_SET_IN_BITMAP (partition2->stmts, 0, j, bj)
-	if (RDG_MEM_WRITE_STMT (rdg, j)
-	    || RDG_MEM_READS_STMT (rdg, j))
-	  {
-	    FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, i), k, ref1)
-	      {
-		tree base1 = ref_base_address (ref1);
-		if (base1)
-		  FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, j), l, ref2)
-		    if (base1 == ref_base_address (ref2))
-		      return true;
-	      }
-	  }
+  /* Then check whether the two partitions access the same memory object.  */
+  EXECUTE_IF_SET_IN_BITMAP (partition1->datarefs, 0, i, bi)
+    {
+      dr1 = datarefs_vec[i];
+
+      if (!DR_BASE_ADDRESS (dr1)
+	  || !DR_OFFSET (dr1) || !DR_INIT (dr1) || !DR_STEP (dr1))
+	continue;
+
+      EXECUTE_IF_SET_IN_BITMAP (partition2->datarefs, 0, j, bj)
+	{
+	  dr2 = datarefs_vec[j];
+
+	  if (!DR_BASE_ADDRESS (dr2)
+	      || !DR_OFFSET (dr2) || !DR_INIT (dr2) || !DR_STEP (dr2))
+	    continue;
+
+	  if (operand_equal_p (DR_BASE_ADDRESS (dr1), DR_BASE_ADDRESS (dr2), 0)
+	      && operand_equal_p (DR_OFFSET (dr1), DR_OFFSET (dr2), 0)
+	      && operand_equal_p (DR_INIT (dr1), DR_INIT (dr2), 0)
+	      && operand_equal_p (DR_STEP (dr1), DR_STEP (dr2), 0))
+	    return true;
+	}
+    }
 
   return false;
 }
@@ -1654,7 +1647,7 @@ distribute_loop (struct loop *loop, vec<gimple *> stmts,
       for (int j = i + 1;
 	   partitions.iterate (j, &partition); ++j)
 	{
-	  if (similar_memory_accesses (rdg, into, partition))
+	  if (share_memory_accesses (rdg, into, partition))
 	    {
 	      partition_merge_into (into, partition, FUSE_SHARE_REF);
 	      partitions.unordered_remove (j);
-- 
1.9.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH GCC][09/13]Simply cost model merges partitions with the same references
  2017-06-23 10:19       ` Bin.Cheng
@ 2017-06-23 10:48         ` Richard Biener
  2017-06-23 10:51           ` Bin.Cheng
  0 siblings, 1 reply; 8+ messages in thread
From: Richard Biener @ 2017-06-23 10:48 UTC (permalink / raw)
  To: Bin.Cheng; +Cc: gcc-patches

On Fri, Jun 23, 2017 at 12:19 PM, Bin.Cheng <amker.cheng@gmail.com> wrote:
> On Mon, Jun 19, 2017 at 4:20 PM, Richard Biener
> <richard.guenther@gmail.com> wrote:
>> On Mon, Jun 19, 2017 at 3:40 PM, Bin.Cheng <amker.cheng@gmail.com> wrote:
>>> On Wed, Jun 14, 2017 at 2:54 PM, Richard Biener
>>> <richard.guenther@gmail.com> wrote:
>>>> On Mon, Jun 12, 2017 at 7:03 PM, Bin Cheng <Bin.Cheng@arm.com> wrote:
>>>>> Hi,
>>>>> Current primitive cost model merges partitions with data references sharing the same
>>>>> base address.  I believe it's designed to maximize data reuse in distribution, but
>>>>> that should be done by dedicated data reusing algorithm.  At this stage of merging,
>>>>> we should be conservative and only merge partitions with the same references.
>>>>> Bootstrap and test on x86_64 and AArch64.  Is it OK?
>>>>
>>>> Well, I'd say "conservative" is merging more, not less.  For example
>>>> splitting a[i+1] from a[i]
>>>> would be bad(?), so I'd see to allow unequal DR_INIT as "equal" for
>>>> merging.  Maybe
>>>> DR_INIT within a cacheline or so.
>>>>
>>>> How many extra distributions in say SPEC do you get from this change alone?
>>> Hi,
>>> I collected data for spec2006 only with/without this patch.  I am a
>>> bit surprised that it doesn't change the number of distributed loops.
>>>>
>>>> It shows also that having partition->reads_and_writes would be nice
>>>> ...  the code duplication
>>> Yeah, I merged read/write data references in previous patch, now this
>>> duplication is gone.  Update patch attached.  Is it OK?
>>
>> +      gcc_assert (i < datarefs_vec.length ());
>> +      dr1 = datarefs_vec[i];
>>
>> these asserts are superfluous -- vec::operator[] does them as well.
>>
>> Ok if you remove them.
> Done.
> I realized I made mistakes when measuring the impact of this patch.
> This patch only apparently causes failure of
> gcc.dg/tree-ssa/ldist-6.c, so here is the updated patch.  I also
> collected the number of distributed loops in spec2k6 as below:
>      trunk:  5882
>      only this patch: 7130
>      whole patch series: 5237
> So the conclusion is, this patch does aggressive distribution like
> ldist-6.c, which means worse data-locality.  The following patch does
> more fusion which mitigates impact of this patch and results in
> conservative distribution overall.

What changed in the patch?  Did you attach the correct one?

I'm not sure ldist-6.c is a "valid" testcase but I didn't try to see
where it was reduced from.

>   But as we lack of data locality
> cost model, ldist-6.c remains failed even after applying whole patch
> series.  Hmm, a cache-sensitive cost model is need for several passes
> now, distribution, prefetch and (possible) interchange.
> Richard, do you have second comment based on the new data?

I expected the "only this patch" result somewhat, as said, I'd have
allowed "related" references to fuse by not requiring equal
DR_INIT for example.

I suggest to go forward with it in its current form.  We can tweak the
cost model later.

Thanks,
Richard.

> Thanks,
> bin
> 2017-06-20  Bin Cheng  <bin.cheng@arm.com>
>
>     * tree-loop-distribution.c (ref_base_address): Delete.
>     (similar_memory_accesses): Rename ...
>     (share_memory_accesses): ... to this.  Check if partitions access
>     the same memory reference.
>     (distribute_loop): Call share_memory_accesses.
>
> gcc/testsuite/ChangeLog
> 2017-06-20  Bin Cheng  <bin.cheng@arm.com>
>
>     * gcc.dg/tree-ssa/ldist-6.c: XFAIL.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH GCC][09/13]Simply cost model merges partitions with the same references
  2017-06-23 10:48         ` Richard Biener
@ 2017-06-23 10:51           ` Bin.Cheng
  0 siblings, 0 replies; 8+ messages in thread
From: Bin.Cheng @ 2017-06-23 10:51 UTC (permalink / raw)
  To: Richard Biener; +Cc: gcc-patches

On Fri, Jun 23, 2017 at 11:48 AM, Richard Biener
<richard.guenther@gmail.com> wrote:
> On Fri, Jun 23, 2017 at 12:19 PM, Bin.Cheng <amker.cheng@gmail.com> wrote:
>> On Mon, Jun 19, 2017 at 4:20 PM, Richard Biener
>> <richard.guenther@gmail.com> wrote:
>>> On Mon, Jun 19, 2017 at 3:40 PM, Bin.Cheng <amker.cheng@gmail.com> wrote:
>>>> On Wed, Jun 14, 2017 at 2:54 PM, Richard Biener
>>>> <richard.guenther@gmail.com> wrote:
>>>>> On Mon, Jun 12, 2017 at 7:03 PM, Bin Cheng <Bin.Cheng@arm.com> wrote:
>>>>>> Hi,
>>>>>> Current primitive cost model merges partitions with data references sharing the same
>>>>>> base address.  I believe it's designed to maximize data reuse in distribution, but
>>>>>> that should be done by dedicated data reusing algorithm.  At this stage of merging,
>>>>>> we should be conservative and only merge partitions with the same references.
>>>>>> Bootstrap and test on x86_64 and AArch64.  Is it OK?
>>>>>
>>>>> Well, I'd say "conservative" is merging more, not less.  For example
>>>>> splitting a[i+1] from a[i]
>>>>> would be bad(?), so I'd see to allow unequal DR_INIT as "equal" for
>>>>> merging.  Maybe
>>>>> DR_INIT within a cacheline or so.
>>>>>
>>>>> How many extra distributions in say SPEC do you get from this change alone?
>>>> Hi,
>>>> I collected data for spec2006 only with/without this patch.  I am a
>>>> bit surprised that it doesn't change the number of distributed loops.
>>>>>
>>>>> It shows also that having partition->reads_and_writes would be nice
>>>>> ...  the code duplication
>>>> Yeah, I merged read/write data references in previous patch, now this
>>>> duplication is gone.  Update patch attached.  Is it OK?
>>>
>>> +      gcc_assert (i < datarefs_vec.length ());
>>> +      dr1 = datarefs_vec[i];
>>>
>>> these asserts are superfluous -- vec::operator[] does them as well.
>>>
>>> Ok if you remove them.
>> Done.
>> I realized I made mistakes when measuring the impact of this patch.
>> This patch only apparently causes failure of
>> gcc.dg/tree-ssa/ldist-6.c, so here is the updated patch.  I also
>> collected the number of distributed loops in spec2k6 as below:
>>      trunk:  5882
>>      only this patch: 7130
>>      whole patch series: 5237
>> So the conclusion is, this patch does aggressive distribution like
>> ldist-6.c, which means worse data-locality.  The following patch does
>> more fusion which mitigates impact of this patch and results in
>> conservative distribution overall.
>
> What changed in the patch?  Did you attach the correct one?
No code changed in this one.  I just added test case change which
can't be resolved by following patches.  ldist-6.c slipped away
because of a bug in patch:

[11/13]Annotate partition by its parallelism execution type

>
> I'm not sure ldist-6.c is a "valid" testcase but I didn't try to see
> where it was reduced from.
>
>>   But as we lack of data locality
>> cost model, ldist-6.c remains failed even after applying whole patch
>> series.  Hmm, a cache-sensitive cost model is need for several passes
>> now, distribution, prefetch and (possible) interchange.
>> Richard, do you have second comment based on the new data?
>
> I expected the "only this patch" result somewhat, as said, I'd have
> allowed "related" references to fuse by not requiring equal
> DR_INIT for example.
>
> I suggest to go forward with it in its current form.  We can tweak the
> cost model later.
Yeah.
>
> Thanks,
> Richard.
>
>> Thanks,
>> bin
>> 2017-06-20  Bin Cheng  <bin.cheng@arm.com>
>>
>>     * tree-loop-distribution.c (ref_base_address): Delete.
>>     (similar_memory_accesses): Rename ...
>>     (share_memory_accesses): ... to this.  Check if partitions access
>>     the same memory reference.
>>     (distribute_loop): Call share_memory_accesses.
>>
>> gcc/testsuite/ChangeLog
>> 2017-06-20  Bin Cheng  <bin.cheng@arm.com>
>>
>>     * gcc.dg/tree-ssa/ldist-6.c: XFAIL.

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2017-06-23 10:51 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-06-12 17:03 [PATCH GCC][09/13]Simply cost model merges partitions with the same references Bin Cheng
2017-06-14 13:54 ` Richard Biener
2017-06-14 14:12   ` Bin.Cheng
2017-06-19 13:40   ` Bin.Cheng
2017-06-19 15:20     ` Richard Biener
2017-06-23 10:19       ` Bin.Cheng
2017-06-23 10:48         ` Richard Biener
2017-06-23 10:51           ` Bin.Cheng

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).