public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH]middle-end slp: fix accidental resource re-use of slp_tree (PR99220)
@ 2021-02-23 19:17 Tamar Christina
  2021-02-24  8:52 ` Richard Biener
  0 siblings, 1 reply; 4+ messages in thread
From: Tamar Christina @ 2021-02-23 19:17 UTC (permalink / raw)
  To: gcc-patches; +Cc: nd, rguenther

[-- Attachment #1: Type: text/plain, Size: 4276 bytes --]

Hi Richi,

The attached testcase shows a bug where two nodes end up with the same pointer.
During the loop that analyzes all the instances
in optimize_load_redistribution_1 we do

      if (value)
        {
          SLP_TREE_REF_COUNT (value)++;
          SLP_TREE_CHILDREN (root)[i] = value;
          vect_free_slp_tree (node);
        }

when doing a replacement.  When this is done and the refcount for the node
reaches 0, the node is removed, which allows the libc to return the pointer
again in the next call to new, which it does..

First instance

note:   node 0x5325f48 (max_nunits=1, refcnt=2)
note:   op: VEC_PERM_EXPR
note:           { }
note:           lane permutation { 0[0] 1[1] 0[2] 1[3] }
note:           children 0x5325db0 0x5325200

Second instance

note:   node 0x5325f48 (max_nunits=1, refcnt=1)
note:   op: VEC_PERM_EXPR
note:           { }
note:           lane permutation { 0[0] 1[1] }
note:           children 0x53255b8 0x5325530

This will end up with the illegal construction of

note:   node 0x53258e8 (max_nunits=2, refcnt=2)
note:   op template: slp_patt_57 = .COMPLEX_MUL (_16, _16);
note:           stmt 0 _16 = _14 - _15;
note:           stmt 1 _23 = _17 + _22;
note:           children 0x53257d8 0x5325d28
note:   node 0x53257d8 (max_nunits=2, refcnt=3)
note:   op template: l$b_4 = MEM[(const struct a &)_3].b;
note:           stmt 0 l$b_4 = MEM[(const struct a &)_3].b;
note:           stmt 1 l$c_5 = MEM[(const struct a &)_3].c;
note:           load permutation { 0 1 }
note:   node 0x5325d28 (max_nunits=2, refcnt=8)
note:   op template: l$b_4 = MEM[(const struct a &)_3].b;
note:           stmt 0 l$b_4 = MEM[(const struct a &)_3].b;
note:           stmt 1 l$c_5 = MEM[(const struct a &)_3].c;
note:           stmt 2 l$b_4 = MEM[(const struct a &)_3].b;
note:           stmt 3 l$c_5 = MEM[(const struct a &)_3].c;
note:           load permutation { 0 1 0 1 }

To prevent this my initial thought was to add the temporary VEC_PERM_EXPR nodes
to the bst_map cache and increase their refcnt one more.  However since bst_map
is gated on scalar statements and these nodes have none we can't do that.

Instead I realized that load_map is really only a visited list at the top level.
So instead of returning the reference, we should return NULL.

What this means is that it will no replacement was found at that level.  This is
fine since these VEC_PERM_EXPR are single use.  So while the any other node is
an indication to use the cache, VEC_PERM_EXPR are an indication to avoid it.

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

	PR tree-optimization/99220
	* tree-vect-slp.c (optimize_load_redistribution_1): Don't use
	VEC_PERM_EXPR in cache.

gcc/testsuite/ChangeLog:

	PR tree-optimization/99220
	* g++.dg/vect/pr99220.cc: New test.

--- inline copy of patch -- 
diff --git a/gcc/testsuite/g++.dg/vect/pr99220.cc b/gcc/testsuite/g++.dg/vect/pr99220.cc
new file mode 100755
index 0000000000000000000000000000000000000000..ff3058832b742414202a8ada0a9dafc72c9a54aa
--- /dev/null
+++ b/gcc/testsuite/g++.dg/vect/pr99220.cc
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-w -O3 -march=armv8.3-a" { target { aarch64*-*-* } } } */
+
+class a {
+  float b;
+  float c;
+
+public:
+  a(float d, float e) : b(d), c(e) {}
+  a operator+(a d) { return a(b + d.b, c + d.c); }
+  a operator-(a d) { return a(b - d.b, c - d.c); }
+  a operator*(a d) { return a(b * b - c * c, b * c + c * d.b); }
+};
+long f;
+a *g;
+class {
+  a *h;
+  long i;
+  a *j;
+
+public:
+  void k() {
+    a l = h[0], m = g[i], n = l * g[1], o = l * j[8];
+    g[i] = m + n;
+    g[i + 1] = m - n;
+    j[f] = o;
+  }
+} p;
+main() { p.k(); }
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index 605873714a5cafaaf822f61f1f769f96b3876694..e631463be8fc5b2de355e674a9c96665beb9516c 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -2292,7 +2292,12 @@ optimize_load_redistribution_1 (scalar_stmts_to_slp_tree_map_t *bst_map,
 				slp_tree root)
 {
   if (slp_tree *leader = load_map->get (root))
-    return *leader;
+    {
+      if (SLP_TREE_CODE (root) == VEC_PERM_EXPR)
+	return NULL;
+      else
+	return *leader;
+    }
 
   load_map->put (root, NULL);
 


-- 

[-- Attachment #2: rb14228.patch --]
[-- Type: text/x-diff, Size: 1403 bytes --]

diff --git a/gcc/testsuite/g++.dg/vect/pr99220.cc b/gcc/testsuite/g++.dg/vect/pr99220.cc
new file mode 100755
index 0000000000000000000000000000000000000000..ff3058832b742414202a8ada0a9dafc72c9a54aa
--- /dev/null
+++ b/gcc/testsuite/g++.dg/vect/pr99220.cc
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-w -O3 -march=armv8.3-a" { target { aarch64*-*-* } } } */
+
+class a {
+  float b;
+  float c;
+
+public:
+  a(float d, float e) : b(d), c(e) {}
+  a operator+(a d) { return a(b + d.b, c + d.c); }
+  a operator-(a d) { return a(b - d.b, c - d.c); }
+  a operator*(a d) { return a(b * b - c * c, b * c + c * d.b); }
+};
+long f;
+a *g;
+class {
+  a *h;
+  long i;
+  a *j;
+
+public:
+  void k() {
+    a l = h[0], m = g[i], n = l * g[1], o = l * j[8];
+    g[i] = m + n;
+    g[i + 1] = m - n;
+    j[f] = o;
+  }
+} p;
+main() { p.k(); }
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index 605873714a5cafaaf822f61f1f769f96b3876694..e631463be8fc5b2de355e674a9c96665beb9516c 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -2292,7 +2292,12 @@ optimize_load_redistribution_1 (scalar_stmts_to_slp_tree_map_t *bst_map,
 				slp_tree root)
 {
   if (slp_tree *leader = load_map->get (root))
-    return *leader;
+    {
+      if (SLP_TREE_CODE (root) == VEC_PERM_EXPR)
+	return NULL;
+      else
+	return *leader;
+    }
 
   load_map->put (root, NULL);
 


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH]middle-end slp: fix accidental resource re-use of slp_tree (PR99220)
  2021-02-23 19:17 [PATCH]middle-end slp: fix accidental resource re-use of slp_tree (PR99220) Tamar Christina
@ 2021-02-24  8:52 ` Richard Biener
  2021-02-24 14:08   ` Tamar Christina
  0 siblings, 1 reply; 4+ messages in thread
From: Richard Biener @ 2021-02-24  8:52 UTC (permalink / raw)
  To: Tamar Christina; +Cc: gcc-patches, nd

On Tue, 23 Feb 2021, Tamar Christina wrote:

> Hi Richi,
> 
> The attached testcase shows a bug where two nodes end up with the same pointer.
> During the loop that analyzes all the instances
> in optimize_load_redistribution_1 we do
> 
>       if (value)
>         {
>           SLP_TREE_REF_COUNT (value)++;
>           SLP_TREE_CHILDREN (root)[i] = value;
>           vect_free_slp_tree (node);
>         }
> 
> when doing a replacement.  When this is done and the refcount for the node
> reaches 0, the node is removed, which allows the libc to return the pointer
> again in the next call to new, which it does..
> 
> First instance
> 
> note:   node 0x5325f48 (max_nunits=1, refcnt=2)
> note:   op: VEC_PERM_EXPR
> note:           { }
> note:           lane permutation { 0[0] 1[1] 0[2] 1[3] }
> note:           children 0x5325db0 0x5325200
> 
> Second instance
> 
> note:   node 0x5325f48 (max_nunits=1, refcnt=1)
> note:   op: VEC_PERM_EXPR
> note:           { }
> note:           lane permutation { 0[0] 1[1] }
> note:           children 0x53255b8 0x5325530
> 
> This will end up with the illegal construction of
> 
> note:   node 0x53258e8 (max_nunits=2, refcnt=2)
> note:   op template: slp_patt_57 = .COMPLEX_MUL (_16, _16);
> note:           stmt 0 _16 = _14 - _15;
> note:           stmt 1 _23 = _17 + _22;
> note:           children 0x53257d8 0x5325d28
> note:   node 0x53257d8 (max_nunits=2, refcnt=3)
> note:   op template: l$b_4 = MEM[(const struct a &)_3].b;
> note:           stmt 0 l$b_4 = MEM[(const struct a &)_3].b;
> note:           stmt 1 l$c_5 = MEM[(const struct a &)_3].c;
> note:           load permutation { 0 1 }
> note:   node 0x5325d28 (max_nunits=2, refcnt=8)
> note:   op template: l$b_4 = MEM[(const struct a &)_3].b;
> note:           stmt 0 l$b_4 = MEM[(const struct a &)_3].b;
> note:           stmt 1 l$c_5 = MEM[(const struct a &)_3].c;
> note:           stmt 2 l$b_4 = MEM[(const struct a &)_3].b;
> note:           stmt 3 l$c_5 = MEM[(const struct a &)_3].c;
> note:           load permutation { 0 1 0 1 }
> 
> To prevent this my initial thought was to add the temporary VEC_PERM_EXPR nodes
> to the bst_map cache and increase their refcnt one more.  However since bst_map
> is gated on scalar statements and these nodes have none we can't do that.
> 
> Instead I realized that load_map is really only a visited list at the top level.
> So instead of returning the reference, we should return NULL.
> 
> What this means is that it will no replacement was found at that level.  This is
> fine since these VEC_PERM_EXPR are single use.  So while the any other node is
> an indication to use the cache, VEC_PERM_EXPR are an indication to avoid it.

I don't understand really.  Waiting for the other patch to be pushed so
I can eventually have a look, but see below.

> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> 
> Ok for master?
> 
> Thanks,
> Tamar
> 
> gcc/ChangeLog:
> 
> 	PR tree-optimization/99220
> 	* tree-vect-slp.c (optimize_load_redistribution_1): Don't use
> 	VEC_PERM_EXPR in cache.
> 
> gcc/testsuite/ChangeLog:
> 
> 	PR tree-optimization/99220
> 	* g++.dg/vect/pr99220.cc: New test.
> 
> --- inline copy of patch -- 
> diff --git a/gcc/testsuite/g++.dg/vect/pr99220.cc b/gcc/testsuite/g++.dg/vect/pr99220.cc
> new file mode 100755
> index 0000000000000000000000000000000000000000..ff3058832b742414202a8ada0a9dafc72c9a54aa
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/vect/pr99220.cc
> @@ -0,0 +1,29 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-w -O3 -march=armv8.3-a" { target { aarch64*-*-* } } } */
> +
> +class a {
> +  float b;
> +  float c;
> +
> +public:
> +  a(float d, float e) : b(d), c(e) {}
> +  a operator+(a d) { return a(b + d.b, c + d.c); }
> +  a operator-(a d) { return a(b - d.b, c - d.c); }
> +  a operator*(a d) { return a(b * b - c * c, b * c + c * d.b); }
> +};
> +long f;
> +a *g;
> +class {
> +  a *h;
> +  long i;
> +  a *j;
> +
> +public:
> +  void k() {
> +    a l = h[0], m = g[i], n = l * g[1], o = l * j[8];
> +    g[i] = m + n;
> +    g[i + 1] = m - n;
> +    j[f] = o;
> +  }
> +} p;
> +main() { p.k(); }
> diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
> index 605873714a5cafaaf822f61f1f769f96b3876694..e631463be8fc5b2de355e674a9c96665beb9516c 100644
> --- a/gcc/tree-vect-slp.c
> +++ b/gcc/tree-vect-slp.c
> @@ -2292,7 +2292,12 @@ optimize_load_redistribution_1 (scalar_stmts_to_slp_tree_map_t *bst_map,
>  				slp_tree root)
>  {
>    if (slp_tree *leader = load_map->get (root))
> -    return *leader;
> +    {
> +      if (SLP_TREE_CODE (root) == VEC_PERM_EXPR)
> +	return NULL;

But this will then only optimize the first occurance.  Wouldn't it be
better to increase the refcount at

      load_map->put (root, node);

and walk load_map at the end, releasing refs owned by it like we do
for bst_map?

> +      else
> +	return *leader;
> +    }
>  
>    load_map->put (root, NULL);
>  
> 
> 
> 

-- 
Richard Biener <rguenther@suse.de>
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH]middle-end slp: fix accidental resource re-use of slp_tree (PR99220)
  2021-02-24  8:52 ` Richard Biener
@ 2021-02-24 14:08   ` Tamar Christina
  2021-02-24 15:12     ` Richard Biener
  0 siblings, 1 reply; 4+ messages in thread
From: Tamar Christina @ 2021-02-24 14:08 UTC (permalink / raw)
  To: Richard Biener; +Cc: gcc-patches, nd

[-- Attachment #1: Type: text/plain, Size: 5872 bytes --]

Hi Richi,

This is an updated patch with your suggestion.

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

	PR tree-optimization/99220
	* tree-vect-slp.c (optimize_load_redistribution_1): Remove
	node from cache when it's about to be deleted.

gcc/testsuite/ChangeLog:

	PR tree-optimization/99220
	* g++.dg/vect/pr99220.cc: New test.

The 02/24/2021 08:52, Richard Biener wrote:
> On Tue, 23 Feb 2021, Tamar Christina wrote:
> 
> > Hi Richi,
> > 
> > The attached testcase shows a bug where two nodes end up with the same pointer.
> > During the loop that analyzes all the instances
> > in optimize_load_redistribution_1 we do
> > 
> >       if (value)
> >         {
> >           SLP_TREE_REF_COUNT (value)++;
> >           SLP_TREE_CHILDREN (root)[i] = value;
> >           vect_free_slp_tree (node);
> >         }
> > 
> > when doing a replacement.  When this is done and the refcount for the node
> > reaches 0, the node is removed, which allows the libc to return the pointer
> > again in the next call to new, which it does..
> > 
> > First instance
> > 
> > note:   node 0x5325f48 (max_nunits=1, refcnt=2)
> > note:   op: VEC_PERM_EXPR
> > note:           { }
> > note:           lane permutation { 0[0] 1[1] 0[2] 1[3] }
> > note:           children 0x5325db0 0x5325200
> > 
> > Second instance
> > 
> > note:   node 0x5325f48 (max_nunits=1, refcnt=1)
> > note:   op: VEC_PERM_EXPR
> > note:           { }
> > note:           lane permutation { 0[0] 1[1] }
> > note:           children 0x53255b8 0x5325530
> > 
> > This will end up with the illegal construction of
> > 
> > note:   node 0x53258e8 (max_nunits=2, refcnt=2)
> > note:   op template: slp_patt_57 = .COMPLEX_MUL (_16, _16);
> > note:           stmt 0 _16 = _14 - _15;
> > note:           stmt 1 _23 = _17 + _22;
> > note:           children 0x53257d8 0x5325d28
> > note:   node 0x53257d8 (max_nunits=2, refcnt=3)
> > note:   op template: l$b_4 = MEM[(const struct a &)_3].b;
> > note:           stmt 0 l$b_4 = MEM[(const struct a &)_3].b;
> > note:           stmt 1 l$c_5 = MEM[(const struct a &)_3].c;
> > note:           load permutation { 0 1 }
> > note:   node 0x5325d28 (max_nunits=2, refcnt=8)
> > note:   op template: l$b_4 = MEM[(const struct a &)_3].b;
> > note:           stmt 0 l$b_4 = MEM[(const struct a &)_3].b;
> > note:           stmt 1 l$c_5 = MEM[(const struct a &)_3].c;
> > note:           stmt 2 l$b_4 = MEM[(const struct a &)_3].b;
> > note:           stmt 3 l$c_5 = MEM[(const struct a &)_3].c;
> > note:           load permutation { 0 1 0 1 }
> > 
> > To prevent this my initial thought was to add the temporary VEC_PERM_EXPR nodes
> > to the bst_map cache and increase their refcnt one more.  However since bst_map
> > is gated on scalar statements and these nodes have none we can't do that.
> > 
> > Instead I realized that load_map is really only a visited list at the top level.
> > So instead of returning the reference, we should return NULL.
> > 
> > What this means is that it will no replacement was found at that level.  This is
> > fine since these VEC_PERM_EXPR are single use.  So while the any other node is
> > an indication to use the cache, VEC_PERM_EXPR are an indication to avoid it.
> 
> I don't understand really.  Waiting for the other patch to be pushed so
> I can eventually have a look, but see below.
> 
> > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> > 
> > Ok for master?
> > 
> > Thanks,
> > Tamar
> > 
> > gcc/ChangeLog:
> > 
> > 	PR tree-optimization/99220
> > 	* tree-vect-slp.c (optimize_load_redistribution_1): Don't use
> > 	VEC_PERM_EXPR in cache.
> > 
> > gcc/testsuite/ChangeLog:
> > 
> > 	PR tree-optimization/99220
> > 	* g++.dg/vect/pr99220.cc: New test.
> > 
> > --- inline copy of patch -- 
> > diff --git a/gcc/testsuite/g++.dg/vect/pr99220.cc b/gcc/testsuite/g++.dg/vect/pr99220.cc
> > new file mode 100755
> > index 0000000000000000000000000000000000000000..ff3058832b742414202a8ada0a9dafc72c9a54aa
> > --- /dev/null
> > +++ b/gcc/testsuite/g++.dg/vect/pr99220.cc
> > @@ -0,0 +1,29 @@
> > +/* { dg-do compile } */
> > +/* { dg-additional-options "-w -O3 -march=armv8.3-a" { target { aarch64*-*-* } } } */
> > +
> > +class a {
> > +  float b;
> > +  float c;
> > +
> > +public:
> > +  a(float d, float e) : b(d), c(e) {}
> > +  a operator+(a d) { return a(b + d.b, c + d.c); }
> > +  a operator-(a d) { return a(b - d.b, c - d.c); }
> > +  a operator*(a d) { return a(b * b - c * c, b * c + c * d.b); }
> > +};
> > +long f;
> > +a *g;
> > +class {
> > +  a *h;
> > +  long i;
> > +  a *j;
> > +
> > +public:
> > +  void k() {
> > +    a l = h[0], m = g[i], n = l * g[1], o = l * j[8];
> > +    g[i] = m + n;
> > +    g[i + 1] = m - n;
> > +    j[f] = o;
> > +  }
> > +} p;
> > +main() { p.k(); }
> > diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
> > index 605873714a5cafaaf822f61f1f769f96b3876694..e631463be8fc5b2de355e674a9c96665beb9516c 100644
> > --- a/gcc/tree-vect-slp.c
> > +++ b/gcc/tree-vect-slp.c
> > @@ -2292,7 +2292,12 @@ optimize_load_redistribution_1 (scalar_stmts_to_slp_tree_map_t *bst_map,
> >  				slp_tree root)
> >  {
> >    if (slp_tree *leader = load_map->get (root))
> > -    return *leader;
> > +    {
> > +      if (SLP_TREE_CODE (root) == VEC_PERM_EXPR)
> > +	return NULL;
> 
> But this will then only optimize the first occurance.  Wouldn't it be
> better to increase the refcount at
> 
>       load_map->put (root, node);
> 
> and walk load_map at the end, releasing refs owned by it like we do
> for bst_map?
> 
> > +      else
> > +	return *leader;
> > +    }
> >  
> >    load_map->put (root, NULL);
> >  
> > 
> > 
> > 
> 
> -- 
> Richard Biener <rguenther@suse.de>
> SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
> Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)

-- 

[-- Attachment #2: rb14228.patch --]
[-- Type: text/x-diff, Size: 1908 bytes --]

diff --git a/gcc/testsuite/g++.dg/vect/pr99220.cc b/gcc/testsuite/g++.dg/vect/pr99220.cc
new file mode 100755
index 0000000000000000000000000000000000000000..b41b2d4f0ee401732073321645f3f1f958da842d
--- /dev/null
+++ b/gcc/testsuite/g++.dg/vect/pr99220.cc
@@ -0,0 +1,29 @@
+/* { dg-do compile { target { aarch64*-*-* } } } */
+/* { dg-additional-options "-w -O3 -march=armv8.3-a" } */
+
+class a {
+  float b;
+  float c;
+
+public:
+  a(float d, float e) : b(d), c(e) {}
+  a operator+(a d) { return a(b + d.b, c + d.c); }
+  a operator-(a d) { return a(b - d.b, c - d.c); }
+  a operator*(a d) { return a(b * b - c * c, b * c + c * d.b); }
+};
+long f;
+a *g;
+class {
+  a *h;
+  long i;
+  a *j;
+
+public:
+  void k() {
+    a l = h[0], m = g[i], n = l * g[1], o = l * j[8];
+    g[i] = m + n;
+    g[i + 1] = m - n;
+    j[f] = o;
+  }
+} p;
+main() { p.k(); }
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index 605873714a5cafaaf822f61f1f769f96b3876694..09a0f42410e3542c7823d70757550c22b8ef676f 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -2351,6 +2351,12 @@ next:
 	{
 	  SLP_TREE_REF_COUNT (value)++;
 	  SLP_TREE_CHILDREN (root)[i] = value;
+	  /* ???  We know the original leafs of the replaced nodes will
+	    be referenced by bst_map, only the permutes created by
+	  pattern matching are not.  */
+	  if (SLP_TREE_REF_COUNT (node) == 1)
+	    load_map->remove (node);
+
 	  vect_free_slp_tree (node);
 	}
     }
@@ -2383,6 +2389,12 @@ optimize_load_redistribution (scalar_stmts_to_slp_tree_map_t *bst_map,
 	{
 	  SLP_TREE_REF_COUNT (value)++;
 	  SLP_TREE_CHILDREN (root)[i] = value;
+	  /* ???  We know the original leafs of the replaced nodes will
+	     be referenced by bst_map, only the permutes created by
+	     pattern matching are not.  */
+	  if (SLP_TREE_REF_COUNT (node) == 1)
+	    load_map->remove (node);
+
 	  vect_free_slp_tree (node);
 	}
     }


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH]middle-end slp: fix accidental resource re-use of slp_tree (PR99220)
  2021-02-24 14:08   ` Tamar Christina
@ 2021-02-24 15:12     ` Richard Biener
  0 siblings, 0 replies; 4+ messages in thread
From: Richard Biener @ 2021-02-24 15:12 UTC (permalink / raw)
  To: Tamar Christina; +Cc: gcc-patches, nd

On Wed, 24 Feb 2021, Tamar Christina wrote:

> Hi Richi,
> 
> This is an updated patch with your suggestion.
> 
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> 
> Ok for master?

OK.

Thanks,
Richard.

> Thanks,
> Tamar
> 
> gcc/ChangeLog:
> 
> 	PR tree-optimization/99220
> 	* tree-vect-slp.c (optimize_load_redistribution_1): Remove
> 	node from cache when it's about to be deleted.
> 
> gcc/testsuite/ChangeLog:
> 
> 	PR tree-optimization/99220
> 	* g++.dg/vect/pr99220.cc: New test.
> 
> The 02/24/2021 08:52, Richard Biener wrote:
> > On Tue, 23 Feb 2021, Tamar Christina wrote:
> > 
> > > Hi Richi,
> > > 
> > > The attached testcase shows a bug where two nodes end up with the same pointer.
> > > During the loop that analyzes all the instances
> > > in optimize_load_redistribution_1 we do
> > > 
> > >       if (value)
> > >         {
> > >           SLP_TREE_REF_COUNT (value)++;
> > >           SLP_TREE_CHILDREN (root)[i] = value;
> > >           vect_free_slp_tree (node);
> > >         }
> > > 
> > > when doing a replacement.  When this is done and the refcount for the node
> > > reaches 0, the node is removed, which allows the libc to return the pointer
> > > again in the next call to new, which it does..
> > > 
> > > First instance
> > > 
> > > note:   node 0x5325f48 (max_nunits=1, refcnt=2)
> > > note:   op: VEC_PERM_EXPR
> > > note:           { }
> > > note:           lane permutation { 0[0] 1[1] 0[2] 1[3] }
> > > note:           children 0x5325db0 0x5325200
> > > 
> > > Second instance
> > > 
> > > note:   node 0x5325f48 (max_nunits=1, refcnt=1)
> > > note:   op: VEC_PERM_EXPR
> > > note:           { }
> > > note:           lane permutation { 0[0] 1[1] }
> > > note:           children 0x53255b8 0x5325530
> > > 
> > > This will end up with the illegal construction of
> > > 
> > > note:   node 0x53258e8 (max_nunits=2, refcnt=2)
> > > note:   op template: slp_patt_57 = .COMPLEX_MUL (_16, _16);
> > > note:           stmt 0 _16 = _14 - _15;
> > > note:           stmt 1 _23 = _17 + _22;
> > > note:           children 0x53257d8 0x5325d28
> > > note:   node 0x53257d8 (max_nunits=2, refcnt=3)
> > > note:   op template: l$b_4 = MEM[(const struct a &)_3].b;
> > > note:           stmt 0 l$b_4 = MEM[(const struct a &)_3].b;
> > > note:           stmt 1 l$c_5 = MEM[(const struct a &)_3].c;
> > > note:           load permutation { 0 1 }
> > > note:   node 0x5325d28 (max_nunits=2, refcnt=8)
> > > note:   op template: l$b_4 = MEM[(const struct a &)_3].b;
> > > note:           stmt 0 l$b_4 = MEM[(const struct a &)_3].b;
> > > note:           stmt 1 l$c_5 = MEM[(const struct a &)_3].c;
> > > note:           stmt 2 l$b_4 = MEM[(const struct a &)_3].b;
> > > note:           stmt 3 l$c_5 = MEM[(const struct a &)_3].c;
> > > note:           load permutation { 0 1 0 1 }
> > > 
> > > To prevent this my initial thought was to add the temporary VEC_PERM_EXPR nodes
> > > to the bst_map cache and increase their refcnt one more.  However since bst_map
> > > is gated on scalar statements and these nodes have none we can't do that.
> > > 
> > > Instead I realized that load_map is really only a visited list at the top level.
> > > So instead of returning the reference, we should return NULL.
> > > 
> > > What this means is that it will no replacement was found at that level.  This is
> > > fine since these VEC_PERM_EXPR are single use.  So while the any other node is
> > > an indication to use the cache, VEC_PERM_EXPR are an indication to avoid it.
> > 
> > I don't understand really.  Waiting for the other patch to be pushed so
> > I can eventually have a look, but see below.
> > 
> > > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> > > 
> > > Ok for master?
> > > 
> > > Thanks,
> > > Tamar
> > > 
> > > gcc/ChangeLog:
> > > 
> > > 	PR tree-optimization/99220
> > > 	* tree-vect-slp.c (optimize_load_redistribution_1): Don't use
> > > 	VEC_PERM_EXPR in cache.
> > > 
> > > gcc/testsuite/ChangeLog:
> > > 
> > > 	PR tree-optimization/99220
> > > 	* g++.dg/vect/pr99220.cc: New test.
> > > 
> > > --- inline copy of patch -- 
> > > diff --git a/gcc/testsuite/g++.dg/vect/pr99220.cc b/gcc/testsuite/g++.dg/vect/pr99220.cc
> > > new file mode 100755
> > > index 0000000000000000000000000000000000000000..ff3058832b742414202a8ada0a9dafc72c9a54aa
> > > --- /dev/null
> > > +++ b/gcc/testsuite/g++.dg/vect/pr99220.cc
> > > @@ -0,0 +1,29 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-additional-options "-w -O3 -march=armv8.3-a" { target { aarch64*-*-* } } } */
> > > +
> > > +class a {
> > > +  float b;
> > > +  float c;
> > > +
> > > +public:
> > > +  a(float d, float e) : b(d), c(e) {}
> > > +  a operator+(a d) { return a(b + d.b, c + d.c); }
> > > +  a operator-(a d) { return a(b - d.b, c - d.c); }
> > > +  a operator*(a d) { return a(b * b - c * c, b * c + c * d.b); }
> > > +};
> > > +long f;
> > > +a *g;
> > > +class {
> > > +  a *h;
> > > +  long i;
> > > +  a *j;
> > > +
> > > +public:
> > > +  void k() {
> > > +    a l = h[0], m = g[i], n = l * g[1], o = l * j[8];
> > > +    g[i] = m + n;
> > > +    g[i + 1] = m - n;
> > > +    j[f] = o;
> > > +  }
> > > +} p;
> > > +main() { p.k(); }
> > > diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
> > > index 605873714a5cafaaf822f61f1f769f96b3876694..e631463be8fc5b2de355e674a9c96665beb9516c 100644
> > > --- a/gcc/tree-vect-slp.c
> > > +++ b/gcc/tree-vect-slp.c
> > > @@ -2292,7 +2292,12 @@ optimize_load_redistribution_1 (scalar_stmts_to_slp_tree_map_t *bst_map,
> > >  				slp_tree root)
> > >  {
> > >    if (slp_tree *leader = load_map->get (root))
> > > -    return *leader;
> > > +    {
> > > +      if (SLP_TREE_CODE (root) == VEC_PERM_EXPR)
> > > +	return NULL;
> > 
> > But this will then only optimize the first occurance.  Wouldn't it be
> > better to increase the refcount at
> > 
> >       load_map->put (root, node);
> > 
> > and walk load_map at the end, releasing refs owned by it like we do
> > for bst_map?
> > 
> > > +      else
> > > +	return *leader;
> > > +    }
> > >  
> > >    load_map->put (root, NULL);
> > >  
> > > 
> > > 
> > > 
> > 
> > -- 
> > Richard Biener <rguenther@suse.de>
> > SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
> > Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)
> 
> 

-- 
Richard Biener <rguenther@suse.de>
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2021-02-24 15:12 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-02-23 19:17 [PATCH]middle-end slp: fix accidental resource re-use of slp_tree (PR99220) Tamar Christina
2021-02-24  8:52 ` Richard Biener
2021-02-24 14:08   ` Tamar Christina
2021-02-24 15:12     ` Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).