public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] tree-ssa-math-opts: Fix up match_uaddc_usubc [PR111845]
@ 2023-10-18 10:03 Jakub Jelinek
  2023-10-18 10:36 ` Richard Biener
  0 siblings, 1 reply; 2+ messages in thread
From: Jakub Jelinek @ 2023-10-18 10:03 UTC (permalink / raw)
  To: Richard Biener; +Cc: gcc-patches

Hi!

GCC ICEs on the first testcase.  Successful match_uaddc_usubc ends up with
some dead stmts which DCE will remove (hopefully) later all.
The ICE is because one of the dead stmts refers to a freed SSA_NAME.
The code already gsi_removes a couple of stmts in the
  /* Remove some statements which can't be kept in the IL because they
     use SSA_NAME whose setter is going to be removed too.  */
section for the same reason (the reason for the freed SSA_NAMEs is that
we don't really have a replacement for those cases - all we have after
a match is combined overflow from the addition/subtraction of 2 operands + a
[0, 1] carry in, but not the individual overflows from the former 2
additions), but for the last (most significant) limb case, where we try
to match x = op1 + op2 + carry1 + carry2; or
x = op1 - op2 - carry1 - carry2; we just gsi_replace the final stmt, but
left around the 2 temporary stmts as dead; if we were unlucky enough that
those referenced the carry flag that went away, it ICEs.

So, the following patch remembers those temporary statements (rather than
trying to rediscover them more expensively) and removes them before the
final one is replaced.

While working on it, I've noticed we didn't support all the reassociated
possibilities of writing the addition of 4 operands or subtracting 3
operands from one, we supported e.g.
x = ((op1 + op2) + op3) + op4;
x = op1 + ((op2 + op3) + op4);
but not
x = (op1 + (op2 + op3)) + op4;
x = op1 + (op2 + (op3 + op4));
Fixed by the change to inspect also rhs[2] when rhs[1] didn't yield what
we were searching for (if non-NULL) - rhs[0] is inspected in the first
loop and has different handling for the MINUS_EXPR case.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2023-10-18  Jakub Jelinek  <jakub@redhat.com>

	PR tree-optimization/111845
	* tree-ssa-math-opts.cc (match_uaddc_usubc): Remember temporary
	statements for the 4 operand addition or subtraction of 3 operands
	from 1 operand cases and remove them when successful.  Look for
	nested additions even from rhs[2], not just rhs[1].

	* gcc.dg/pr111845.c: New test.
	* gcc.target/i386/pr111845.c: New test.

--- gcc/tree-ssa-math-opts.cc.jj	2023-09-18 10:37:56.180963000 +0200
+++ gcc/tree-ssa-math-opts.cc	2023-10-17 14:46:39.430139602 +0200
@@ -4581,6 +4581,7 @@ match_uaddc_usubc (gimple_stmt_iterator
   if (!INTEGRAL_TYPE_P (type) || !TYPE_UNSIGNED (type))
     return false;
 
+  auto_vec<gimple *, 2> temp_stmts;
   if (code != BIT_IOR_EXPR && code != BIT_XOR_EXPR)
     {
       /* If overflow flag is ignored on the MSB limb, we can end up with
@@ -4615,26 +4616,29 @@ match_uaddc_usubc (gimple_stmt_iterator
 	      rhs[0] = gimple_assign_rhs1 (g);
 	      tree &r = rhs[2] ? rhs[3] : rhs[2];
 	      r = r2;
+	      temp_stmts.quick_push (g);
 	    }
 	  else
 	    break;
 	}
-      while (TREE_CODE (rhs[1]) == SSA_NAME && !rhs[3])
-	{
-	  gimple *g = SSA_NAME_DEF_STMT (rhs[1]);
-	  if (has_single_use (rhs[1])
-	      && is_gimple_assign (g)
-	      && gimple_assign_rhs_code (g) == PLUS_EXPR)
-	    {
-	      rhs[1] = gimple_assign_rhs1 (g);
-	      if (rhs[2])
-		rhs[3] = gimple_assign_rhs2 (g);
-	      else
-		rhs[2] = gimple_assign_rhs2 (g);
-	    }
-	  else
-	    break;
-	}
+      for (int i = 1; i <= 2; ++i)
+	while (rhs[i] && TREE_CODE (rhs[i]) == SSA_NAME && !rhs[3])
+	  {
+	    gimple *g = SSA_NAME_DEF_STMT (rhs[i]);
+	    if (has_single_use (rhs[i])
+		&& is_gimple_assign (g)
+		&& gimple_assign_rhs_code (g) == PLUS_EXPR)
+	      {
+		rhs[i] = gimple_assign_rhs1 (g);
+		if (rhs[2])
+		  rhs[3] = gimple_assign_rhs2 (g);
+		else
+		  rhs[2] = gimple_assign_rhs2 (g);
+		temp_stmts.quick_push (g);
+	      }
+	    else
+	      break;
+	  }
       /* If there are just 3 addends or one minuend and two subtrahends,
 	 check for UADDC or USUBC being pattern recognized earlier.
 	 Say r = op1 + op2 + ovf1 + ovf2; where the (ovf1 + ovf2) part
@@ -5039,7 +5043,17 @@ match_uaddc_usubc (gimple_stmt_iterator
   g = gimple_build_assign (ilhs, IMAGPART_EXPR,
 			   build1 (IMAGPART_EXPR, TREE_TYPE (ilhs), nlhs));
   if (rhs[2])
-    gsi_insert_before (gsi, g, GSI_SAME_STMT);
+    {
+      gsi_insert_before (gsi, g, GSI_SAME_STMT);
+      /* Remove some further statements which can't be kept in the IL because
+	 they can use SSA_NAMEs whose setter is going to be removed too.  */
+      while (temp_stmts.length ())
+	{
+	  g = temp_stmts.pop ();
+	  gsi2 = gsi_for_stmt (g);
+	  gsi_remove (&gsi2, true);
+	}
+    }
   else
     gsi_replace (gsi, g, true);
   /* Remove some statements which can't be kept in the IL because they
--- gcc/testsuite/gcc.dg/pr111845.c.jj	2023-10-17 16:08:48.561492321 +0200
+++ gcc/testsuite/gcc.dg/pr111845.c	2023-10-17 15:04:21.199150290 +0200
@@ -0,0 +1,16 @@
+/* PR tree-optimization/111845 */
+/* { dg-do compile } */
+/* { dg-options "-O2 --param tree-reassoc-width=2" } */
+
+int a, b;
+unsigned int c, d, e;
+
+void
+foo (int x)
+{
+  b += d;
+  c += b < d;
+  b += e = a < x;
+  c += b;
+  c += b < e;
+}
--- gcc/testsuite/gcc.target/i386/pr111845.c.jj	2023-10-17 16:11:44.233010594 +0200
+++ gcc/testsuite/gcc.target/i386/pr111845.c	2023-10-17 16:11:27.944240706 +0200
@@ -0,0 +1,47 @@
+/* PR tree-optimization/111845 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -g -masm=att" } */
+/* { dg-final { scan-assembler-times "\tadcq\t" 8 { target lp64 } } } */
+/* { dg-final { scan-assembler-times "\tadcl\t" 8 { target ia32 } } } */
+
+unsigned long l, m;
+
+__attribute__((noipa)) void
+foo (unsigned long x, unsigned long y, unsigned long h, unsigned long i, int a, int b)
+{
+  unsigned long c, d;
+  unsigned long e = __builtin_add_overflow (x, y, &c);
+  unsigned long f = __builtin_add_overflow (c, a < b, &d);
+  m = ((h + i) + e) + f;
+  l = d;
+}
+
+__attribute__((noipa)) void
+bar (unsigned long x, unsigned long y, unsigned long h, unsigned long i, int a, int b)
+{
+  unsigned long c, d;
+  unsigned long e = __builtin_add_overflow (x, y, &c);
+  unsigned long f = __builtin_add_overflow (c, a < b, &d);
+  m = (h + (i + e)) + f;
+  l = d;
+}
+
+__attribute__((noipa)) void
+baz (unsigned long x, unsigned long y, unsigned long h, unsigned long i, int a, int b)
+{
+  unsigned long c, d;
+  unsigned long e = __builtin_add_overflow (x, y, &c);
+  unsigned long f = __builtin_add_overflow (c, a < b, &d);
+  m = h + (i + (e + f));
+  l = d;
+}
+
+__attribute__((noipa)) void
+qux (unsigned long x, unsigned long y, unsigned long h, unsigned long i, int a, int b)
+{
+  unsigned long c, d;
+  unsigned long e = __builtin_add_overflow (x, y, &c);
+  unsigned long f = __builtin_add_overflow (c, a < b, &d);
+  m = h + ((i + e) + f);
+  l = d;
+}

	Jakub


^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH] tree-ssa-math-opts: Fix up match_uaddc_usubc [PR111845]
  2023-10-18 10:03 [PATCH] tree-ssa-math-opts: Fix up match_uaddc_usubc [PR111845] Jakub Jelinek
@ 2023-10-18 10:36 ` Richard Biener
  0 siblings, 0 replies; 2+ messages in thread
From: Richard Biener @ 2023-10-18 10:36 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: gcc-patches

On Wed, 18 Oct 2023, Jakub Jelinek wrote:

> Hi!
> 
> GCC ICEs on the first testcase.  Successful match_uaddc_usubc ends up with
> some dead stmts which DCE will remove (hopefully) later all.
> The ICE is because one of the dead stmts refers to a freed SSA_NAME.
> The code already gsi_removes a couple of stmts in the
>   /* Remove some statements which can't be kept in the IL because they
>      use SSA_NAME whose setter is going to be removed too.  */
> section for the same reason (the reason for the freed SSA_NAMEs is that
> we don't really have a replacement for those cases - all we have after
> a match is combined overflow from the addition/subtraction of 2 operands + a
> [0, 1] carry in, but not the individual overflows from the former 2
> additions), but for the last (most significant) limb case, where we try
> to match x = op1 + op2 + carry1 + carry2; or
> x = op1 - op2 - carry1 - carry2; we just gsi_replace the final stmt, but
> left around the 2 temporary stmts as dead; if we were unlucky enough that
> those referenced the carry flag that went away, it ICEs.
> 
> So, the following patch remembers those temporary statements (rather than
> trying to rediscover them more expensively) and removes them before the
> final one is replaced.
> 
> While working on it, I've noticed we didn't support all the reassociated
> possibilities of writing the addition of 4 operands or subtracting 3
> operands from one, we supported e.g.
> x = ((op1 + op2) + op3) + op4;
> x = op1 + ((op2 + op3) + op4);
> but not
> x = (op1 + (op2 + op3)) + op4;
> x = op1 + (op2 + (op3 + op4));
> Fixed by the change to inspect also rhs[2] when rhs[1] didn't yield what
> we were searching for (if non-NULL) - rhs[0] is inspected in the first
> loop and has different handling for the MINUS_EXPR case.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK.

Richard.

> 2023-10-18  Jakub Jelinek  <jakub@redhat.com>
> 
> 	PR tree-optimization/111845
> 	* tree-ssa-math-opts.cc (match_uaddc_usubc): Remember temporary
> 	statements for the 4 operand addition or subtraction of 3 operands
> 	from 1 operand cases and remove them when successful.  Look for
> 	nested additions even from rhs[2], not just rhs[1].
> 
> 	* gcc.dg/pr111845.c: New test.
> 	* gcc.target/i386/pr111845.c: New test.
> 
> --- gcc/tree-ssa-math-opts.cc.jj	2023-09-18 10:37:56.180963000 +0200
> +++ gcc/tree-ssa-math-opts.cc	2023-10-17 14:46:39.430139602 +0200
> @@ -4581,6 +4581,7 @@ match_uaddc_usubc (gimple_stmt_iterator
>    if (!INTEGRAL_TYPE_P (type) || !TYPE_UNSIGNED (type))
>      return false;
>  
> +  auto_vec<gimple *, 2> temp_stmts;
>    if (code != BIT_IOR_EXPR && code != BIT_XOR_EXPR)
>      {
>        /* If overflow flag is ignored on the MSB limb, we can end up with
> @@ -4615,26 +4616,29 @@ match_uaddc_usubc (gimple_stmt_iterator
>  	      rhs[0] = gimple_assign_rhs1 (g);
>  	      tree &r = rhs[2] ? rhs[3] : rhs[2];
>  	      r = r2;
> +	      temp_stmts.quick_push (g);
>  	    }
>  	  else
>  	    break;
>  	}
> -      while (TREE_CODE (rhs[1]) == SSA_NAME && !rhs[3])
> -	{
> -	  gimple *g = SSA_NAME_DEF_STMT (rhs[1]);
> -	  if (has_single_use (rhs[1])
> -	      && is_gimple_assign (g)
> -	      && gimple_assign_rhs_code (g) == PLUS_EXPR)
> -	    {
> -	      rhs[1] = gimple_assign_rhs1 (g);
> -	      if (rhs[2])
> -		rhs[3] = gimple_assign_rhs2 (g);
> -	      else
> -		rhs[2] = gimple_assign_rhs2 (g);
> -	    }
> -	  else
> -	    break;
> -	}
> +      for (int i = 1; i <= 2; ++i)
> +	while (rhs[i] && TREE_CODE (rhs[i]) == SSA_NAME && !rhs[3])
> +	  {
> +	    gimple *g = SSA_NAME_DEF_STMT (rhs[i]);
> +	    if (has_single_use (rhs[i])
> +		&& is_gimple_assign (g)
> +		&& gimple_assign_rhs_code (g) == PLUS_EXPR)
> +	      {
> +		rhs[i] = gimple_assign_rhs1 (g);
> +		if (rhs[2])
> +		  rhs[3] = gimple_assign_rhs2 (g);
> +		else
> +		  rhs[2] = gimple_assign_rhs2 (g);
> +		temp_stmts.quick_push (g);
> +	      }
> +	    else
> +	      break;
> +	  }
>        /* If there are just 3 addends or one minuend and two subtrahends,
>  	 check for UADDC or USUBC being pattern recognized earlier.
>  	 Say r = op1 + op2 + ovf1 + ovf2; where the (ovf1 + ovf2) part
> @@ -5039,7 +5043,17 @@ match_uaddc_usubc (gimple_stmt_iterator
>    g = gimple_build_assign (ilhs, IMAGPART_EXPR,
>  			   build1 (IMAGPART_EXPR, TREE_TYPE (ilhs), nlhs));
>    if (rhs[2])
> -    gsi_insert_before (gsi, g, GSI_SAME_STMT);
> +    {
> +      gsi_insert_before (gsi, g, GSI_SAME_STMT);
> +      /* Remove some further statements which can't be kept in the IL because
> +	 they can use SSA_NAMEs whose setter is going to be removed too.  */
> +      while (temp_stmts.length ())
> +	{
> +	  g = temp_stmts.pop ();
> +	  gsi2 = gsi_for_stmt (g);
> +	  gsi_remove (&gsi2, true);
> +	}
> +    }
>    else
>      gsi_replace (gsi, g, true);
>    /* Remove some statements which can't be kept in the IL because they
> --- gcc/testsuite/gcc.dg/pr111845.c.jj	2023-10-17 16:08:48.561492321 +0200
> +++ gcc/testsuite/gcc.dg/pr111845.c	2023-10-17 15:04:21.199150290 +0200
> @@ -0,0 +1,16 @@
> +/* PR tree-optimization/111845 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 --param tree-reassoc-width=2" } */
> +
> +int a, b;
> +unsigned int c, d, e;
> +
> +void
> +foo (int x)
> +{
> +  b += d;
> +  c += b < d;
> +  b += e = a < x;
> +  c += b;
> +  c += b < e;
> +}
> --- gcc/testsuite/gcc.target/i386/pr111845.c.jj	2023-10-17 16:11:44.233010594 +0200
> +++ gcc/testsuite/gcc.target/i386/pr111845.c	2023-10-17 16:11:27.944240706 +0200
> @@ -0,0 +1,47 @@
> +/* PR tree-optimization/111845 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -g -masm=att" } */
> +/* { dg-final { scan-assembler-times "\tadcq\t" 8 { target lp64 } } } */
> +/* { dg-final { scan-assembler-times "\tadcl\t" 8 { target ia32 } } } */
> +
> +unsigned long l, m;
> +
> +__attribute__((noipa)) void
> +foo (unsigned long x, unsigned long y, unsigned long h, unsigned long i, int a, int b)
> +{
> +  unsigned long c, d;
> +  unsigned long e = __builtin_add_overflow (x, y, &c);
> +  unsigned long f = __builtin_add_overflow (c, a < b, &d);
> +  m = ((h + i) + e) + f;
> +  l = d;
> +}
> +
> +__attribute__((noipa)) void
> +bar (unsigned long x, unsigned long y, unsigned long h, unsigned long i, int a, int b)
> +{
> +  unsigned long c, d;
> +  unsigned long e = __builtin_add_overflow (x, y, &c);
> +  unsigned long f = __builtin_add_overflow (c, a < b, &d);
> +  m = (h + (i + e)) + f;
> +  l = d;
> +}
> +
> +__attribute__((noipa)) void
> +baz (unsigned long x, unsigned long y, unsigned long h, unsigned long i, int a, int b)
> +{
> +  unsigned long c, d;
> +  unsigned long e = __builtin_add_overflow (x, y, &c);
> +  unsigned long f = __builtin_add_overflow (c, a < b, &d);
> +  m = h + (i + (e + f));
> +  l = d;
> +}
> +
> +__attribute__((noipa)) void
> +qux (unsigned long x, unsigned long y, unsigned long h, unsigned long i, int a, int b)
> +{
> +  unsigned long c, d;
> +  unsigned long e = __builtin_add_overflow (x, y, &c);
> +  unsigned long f = __builtin_add_overflow (c, a < b, &d);
> +  m = h + ((i + e) + f);
> +  l = d;
> +}
> 
> 	Jakub
> 
> 

-- 
Richard Biener <rguenther@suse.de>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2023-10-18 10:36 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-10-18 10:03 [PATCH] tree-ssa-math-opts: Fix up match_uaddc_usubc [PR111845] Jakub Jelinek
2023-10-18 10:36 ` Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).