public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] Fold (x + 0.0) + 0.0 to x + 0.0 (PR tree-optimization/90356)
@ 2019-05-07  7:21 Jakub Jelinek
  2019-05-07  7:48 ` Richard Biener
  0 siblings, 1 reply; 7+ messages in thread
From: Jakub Jelinek @ 2019-05-07  7:21 UTC (permalink / raw)
  To: Richard Biener, Jeff Law, Marc Glisse; +Cc: gcc-patches

Hi!

fold_real_zero_addition_p will fold x + (-0.0) or x - 0.0 to x
when not -frounding-math, but not the rest of the options when
-fsigned-zeros, and not when -fsignaling-nans.
If we have (x + 0.0) + 0.0, we can fold that to just x + 0.0 even
when honoring signed zeros, and IMNSHO even when honoring sNaNs,
of course unless -frounding-math, then we can't do anything.
For x other than 0.0, -0.0 and sNaN it is obviously correct, for sNaN
sNaN + 0.0 will raise an exception and turn the result into qNaN, which
will not raise further exception on the second addition, so IMHO it is ok
too (unless we want to say special case -fnon-call-exceptions and the
exception handler changing the result back to sNaN and expecting yet another
exception).  For 0.0/-0.0 if we can assume rounding other than towards
negative infinity, the results are:
  x                         x
(0.0 + 0.0) + 0.0 = 0.0 = (0.0 + 0.0)
(-0.0 + 0.0) + 0.0 = 0.0 = (-0.0 + 0.0)
(0.0 - 0.0) - 0.0 = 0.0 = (0.0 - 0.0)
(-0.0 - 0.0) - 0.0 = -0.0 = (-0.0 - 0.0)
(0.0 + 0.0) - 0.0 = 0.0 = (0.0 + 0.0)
(-0.0 + 0.0) - 0.0 = 0.0 = (-0.0 + 0.0)
For the above ones, the two operations are always equal to the inner operation
(0.0 - 0.0) + 0.0 = 0.0 = 0.0 + 0.0
(-0.0 - 0.0) + 0.0 = 0.0 = -0.0 + 0.0
For the above cases, the two operations are always equal to the outer operation

If it is y + (-0.0), it is equivalent to y - 0.0 and if it is y - (-0.0),
it is equivalent to y + 0.0 in the above.

For rounding towards negative infinity, 0.0 - 0.0 is -0.0 rather than 0.0
and so some of the above equivalencies are not true.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2019-05-07  Jakub Jelinek  <jakub@redhat.com>

	PR tree-optimization/90356
	* match.pd ((X +/- 0.0) +/- 0.0): Optimize into X +/- 0.0 if possible.

	* gcc.dg/tree-ssa/pr90356-1.c: New test.
	* gcc.dg/tree-ssa/pr90356-2.c: New test.
	* gcc.dg/tree-ssa/pr90356-3.c: New test.
	* gcc.dg/tree-ssa/pr90356-4.c: New test.

--- gcc/match.pd.jj	2019-05-03 15:22:07.370401908 +0200
+++ gcc/match.pd	2019-05-06 11:26:04.701663020 +0200
@@ -152,6 +152,28 @@ (define_operator_list COND_TERNARY
  (if (fold_real_zero_addition_p (type, @1, 1))
   (non_lvalue @0)))
 
+/* Even if the fold_real_zero_addition_p can't simplify X + 0.0
+   into X, we can optimize (X + 0.0) + 0.0 or (X + 0.0) - 0.0
+   or (X - 0.0) + 0.0 into X + 0.0 and (X - 0.0) - 0.0 into X - 0.0
+   if not -frounding-math.  For sNaNs the first operation would raise
+   exceptions but turn the result into qNan, so the second operation
+   would not raise it.   */
+(for inner_op (plus minus)
+ (for outer_op (plus minus)
+  (simplify
+   (outer_op (inner_op @0 real_zerop@1) real_zerop@2)
+    (if (TREE_CODE (@1) == REAL_CST
+	 && TREE_CODE (@2) == REAL_CST
+	 && HONOR_SIGNED_ZEROS (element_mode (type))
+	 && !HONOR_SIGN_DEPENDENT_ROUNDING (element_mode (type)))
+     (with { bool plus1 = ((inner_op == PLUS_EXPR)
+			   ^ REAL_VALUE_MINUS_ZERO (TREE_REAL_CST (@1)));
+	     bool plus2 = ((outer_op == PLUS_EXPR)
+			   ^ REAL_VALUE_MINUS_ZERO (TREE_REAL_CST (@2))); }
+      (if (plus2 && !plus1)
+       (outer_op @0 @2)
+       (inner_op @0 @1)))))))
+
 /* Simplify x - x.
    This is unsafe for certain floats even in non-IEEE formats.
    In IEEE, it is unsafe because it does wrong for NaNs.
--- gcc/testsuite/gcc.dg/tree-ssa/pr90356-1.c.jj	2019-05-06 11:39:58.998288472 +0200
+++ gcc/testsuite/gcc.dg/tree-ssa/pr90356-1.c	2019-05-06 11:42:53.597489688 +0200
@@ -0,0 +1,23 @@
+/* PR tree-optimization/90356 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-rounding-math -fsignaling-nans -fsigned-zeros -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-times "x_\[0-9]*.D. \\+ 0.0;" 12 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "y_\[0-9]*.D. - 0.0;" 4 "optimized" } } */
+/* { dg-final { scan-tree-dump-times " \[+-] 0.0;" 16 "optimized" } } */
+
+double f1 (double x) { return (x + 0.0) + 0.0; }
+double f2 (double y) { return (y + (-0.0)) + (-0.0); }
+double f3 (double y) { return (y - 0.0) - 0.0; }
+double f4 (double x) { return (x - (-0.0)) - (-0.0); }
+double f5 (double x) { return (x + 0.0) - 0.0; }
+double f6 (double x) { return (x + (-0.0)) - (-0.0); }
+double f7 (double x) { return (x - 0.0) + 0.0; }
+double f8 (double x) { return (x - (-0.0)) + (-0.0); }
+double f9 (double x) { double t = x + 0.0; return t + 0.0; }
+double f10 (double y) { double t = y + (-0.0); return t + (-0.0); }
+double f11 (double y) { double t = y - 0.0; return t - 0.0; }
+double f12 (double x) { double t = x - (-0.0); return t - (-0.0); }
+double f13 (double x) { double t = x + 0.0; return t - 0.0; }
+double f14 (double x) { double t = x + (-0.0); return t - (-0.0); }
+double f15 (double x) { double t = x - 0.0; return t + 0.0; }
+double f16 (double x) { double t = x - (-0.0); return t + (-0.0); }
--- gcc/testsuite/gcc.dg/tree-ssa/pr90356-2.c.jj	2019-05-06 11:43:07.232271129 +0200
+++ gcc/testsuite/gcc.dg/tree-ssa/pr90356-2.c	2019-05-06 11:45:41.145803937 +0200
@@ -0,0 +1,8 @@
+/* PR tree-optimization/90356 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-rounding-math -fno-signaling-nans -fsigned-zeros -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-times "x_\[0-9]*.D. \\+ 0.0;" 12 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "y_\[0-9]*.D. - 0.0;" 0 "optimized" } } */
+/* { dg-final { scan-tree-dump-times " \[+-] 0.0;" 12 "optimized" } } */
+
+#include "pr90356-1.c"
--- gcc/testsuite/gcc.dg/tree-ssa/pr90356-3.c.jj	2019-05-06 11:45:05.056382441 +0200
+++ gcc/testsuite/gcc.dg/tree-ssa/pr90356-3.c	2019-05-06 11:47:19.779222871 +0200
@@ -0,0 +1,6 @@
+/* PR tree-optimization/90356 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -frounding-math -fsignaling-nans -fsigned-zeros -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-times " \[+-] 0.0;" 32 "optimized" } } */
+
+#include "pr90356-1.c"
--- gcc/testsuite/gcc.dg/tree-ssa/pr90356-4.c.jj	2019-05-06 11:46:02.140467400 +0200
+++ gcc/testsuite/gcc.dg/tree-ssa/pr90356-4.c	2019-05-06 11:47:28.175088284 +0200
@@ -0,0 +1,6 @@
+/* PR tree-optimization/90356 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -frounding-math -fno-signaling-nans -fsigned-zeros -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-times " \[+-] 0.0;" 32 "optimized" } } */
+
+#include "pr90356-1.c"

	Jakub

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] Fold (x + 0.0) + 0.0 to x + 0.0 (PR tree-optimization/90356)
  2019-05-07  7:21 [PATCH] Fold (x + 0.0) + 0.0 to x + 0.0 (PR tree-optimization/90356) Jakub Jelinek
@ 2019-05-07  7:48 ` Richard Biener
  2019-05-07  7:55   ` Jakub Jelinek
  0 siblings, 1 reply; 7+ messages in thread
From: Richard Biener @ 2019-05-07  7:48 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: Jeff Law, Marc Glisse, gcc-patches

[-- Attachment #1: Type: text/plain, Size: 7418 bytes --]

On Tue, 7 May 2019, Jakub Jelinek wrote:

> Hi!
> 
> fold_real_zero_addition_p will fold x + (-0.0) or x - 0.0 to x
> when not -frounding-math, but not the rest of the options when
> -fsigned-zeros, and not when -fsignaling-nans.
> If we have (x + 0.0) + 0.0, we can fold that to just x + 0.0 even
> when honoring signed zeros, and IMNSHO even when honoring sNaNs,
> of course unless -frounding-math, then we can't do anything.
> For x other than 0.0, -0.0 and sNaN it is obviously correct, for sNaN
> sNaN + 0.0 will raise an exception and turn the result into qNaN, which
> will not raise further exception on the second addition, so IMHO it is ok
> too (unless we want to say special case -fnon-call-exceptions and the
> exception handler changing the result back to sNaN and expecting yet another
> exception).  For 0.0/-0.0 if we can assume rounding other than towards
> negative infinity, the results are:
>   x                         x
> (0.0 + 0.0) + 0.0 = 0.0 = (0.0 + 0.0)
> (-0.0 + 0.0) + 0.0 = 0.0 = (-0.0 + 0.0)
> (0.0 - 0.0) - 0.0 = 0.0 = (0.0 - 0.0)
> (-0.0 - 0.0) - 0.0 = -0.0 = (-0.0 - 0.0)
> (0.0 + 0.0) - 0.0 = 0.0 = (0.0 + 0.0)
> (-0.0 + 0.0) - 0.0 = 0.0 = (-0.0 + 0.0)
> For the above ones, the two operations are always equal to the inner operation
> (0.0 - 0.0) + 0.0 = 0.0 = 0.0 + 0.0
> (-0.0 - 0.0) + 0.0 = 0.0 = -0.0 + 0.0
> For the above cases, the two operations are always equal to the outer operation
> 
> If it is y + (-0.0), it is equivalent to y - 0.0 and if it is y - (-0.0),
> it is equivalent to y + 0.0 in the above.
> 
> For rounding towards negative infinity, 0.0 - 0.0 is -0.0 rather than 0.0
> and so some of the above equivalencies are not true.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
> 
> 2019-05-07  Jakub Jelinek  <jakub@redhat.com>
> 
> 	PR tree-optimization/90356
> 	* match.pd ((X +/- 0.0) +/- 0.0): Optimize into X +/- 0.0 if possible.
> 
> 	* gcc.dg/tree-ssa/pr90356-1.c: New test.
> 	* gcc.dg/tree-ssa/pr90356-2.c: New test.
> 	* gcc.dg/tree-ssa/pr90356-3.c: New test.
> 	* gcc.dg/tree-ssa/pr90356-4.c: New test.
> 
> --- gcc/match.pd.jj	2019-05-03 15:22:07.370401908 +0200
> +++ gcc/match.pd	2019-05-06 11:26:04.701663020 +0200
> @@ -152,6 +152,28 @@ (define_operator_list COND_TERNARY
>   (if (fold_real_zero_addition_p (type, @1, 1))
>    (non_lvalue @0)))
>  
> +/* Even if the fold_real_zero_addition_p can't simplify X + 0.0
> +   into X, we can optimize (X + 0.0) + 0.0 or (X + 0.0) - 0.0
> +   or (X - 0.0) + 0.0 into X + 0.0 and (X - 0.0) - 0.0 into X - 0.0
> +   if not -frounding-math.  For sNaNs the first operation would raise
> +   exceptions but turn the result into qNan, so the second operation
> +   would not raise it.   */
> +(for inner_op (plus minus)
> + (for outer_op (plus minus)
> +  (simplify
> +   (outer_op (inner_op @0 real_zerop@1) real_zerop@2)
> +    (if (TREE_CODE (@1) == REAL_CST
> +	 && TREE_CODE (@2) == REAL_CST

Will leave the "correctness check" for other folks but the above is
better written as

+   (outer_op (inner_op @0 REAL_CST@1) REAL_CST@2)
+    (if (real_zerop (@1)
+         && real_zerop (@2)

because that gets code-generated better.  Btw, for -fsignalling-nans
can we have a literal sNaN?  Then you need :c on the inner_op since
I'm not sure we canonicalize to sNaN + 0.0 rather than 0.0 + sNaN.
Maybe not worth optimizing though (since we rule out -frounding-math
a similar case there doesn't need to be considered).

> +	 && HONOR_SIGNED_ZEROS (element_mode (type))
> +	 && !HONOR_SIGN_DEPENDENT_ROUNDING (element_mode (type)))

You can write HONOR_SIGNED_ZEROS (type) here for brevity.

> +     (with { bool plus1 = ((inner_op == PLUS_EXPR)
> +			   ^ REAL_VALUE_MINUS_ZERO (TREE_REAL_CST (@1)));
> +	     bool plus2 = ((outer_op == PLUS_EXPR)
> +			   ^ REAL_VALUE_MINUS_ZERO (TREE_REAL_CST (@2))); }
> +      (if (plus2 && !plus1)
> +       (outer_op @0 @2)
> +       (inner_op @0 @1)))))))
> +
>  /* Simplify x - x.
>     This is unsafe for certain floats even in non-IEEE formats.
>     In IEEE, it is unsafe because it does wrong for NaNs.
> --- gcc/testsuite/gcc.dg/tree-ssa/pr90356-1.c.jj	2019-05-06 11:39:58.998288472 +0200
> +++ gcc/testsuite/gcc.dg/tree-ssa/pr90356-1.c	2019-05-06 11:42:53.597489688 +0200
> @@ -0,0 +1,23 @@
> +/* PR tree-optimization/90356 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fno-rounding-math -fsignaling-nans -fsigned-zeros -fdump-tree-optimized" } */
> +/* { dg-final { scan-tree-dump-times "x_\[0-9]*.D. \\+ 0.0;" 12 "optimized" } } */
> +/* { dg-final { scan-tree-dump-times "y_\[0-9]*.D. - 0.0;" 4 "optimized" } } */
> +/* { dg-final { scan-tree-dump-times " \[+-] 0.0;" 16 "optimized" } } */
> +
> +double f1 (double x) { return (x + 0.0) + 0.0; }
> +double f2 (double y) { return (y + (-0.0)) + (-0.0); }
> +double f3 (double y) { return (y - 0.0) - 0.0; }
> +double f4 (double x) { return (x - (-0.0)) - (-0.0); }
> +double f5 (double x) { return (x + 0.0) - 0.0; }
> +double f6 (double x) { return (x + (-0.0)) - (-0.0); }
> +double f7 (double x) { return (x - 0.0) + 0.0; }
> +double f8 (double x) { return (x - (-0.0)) + (-0.0); }
> +double f9 (double x) { double t = x + 0.0; return t + 0.0; }
> +double f10 (double y) { double t = y + (-0.0); return t + (-0.0); }
> +double f11 (double y) { double t = y - 0.0; return t - 0.0; }
> +double f12 (double x) { double t = x - (-0.0); return t - (-0.0); }
> +double f13 (double x) { double t = x + 0.0; return t - 0.0; }
> +double f14 (double x) { double t = x + (-0.0); return t - (-0.0); }
> +double f15 (double x) { double t = x - 0.0; return t + 0.0; }
> +double f16 (double x) { double t = x - (-0.0); return t + (-0.0); }
> --- gcc/testsuite/gcc.dg/tree-ssa/pr90356-2.c.jj	2019-05-06 11:43:07.232271129 +0200
> +++ gcc/testsuite/gcc.dg/tree-ssa/pr90356-2.c	2019-05-06 11:45:41.145803937 +0200
> @@ -0,0 +1,8 @@
> +/* PR tree-optimization/90356 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fno-rounding-math -fno-signaling-nans -fsigned-zeros -fdump-tree-optimized" } */
> +/* { dg-final { scan-tree-dump-times "x_\[0-9]*.D. \\+ 0.0;" 12 "optimized" } } */
> +/* { dg-final { scan-tree-dump-times "y_\[0-9]*.D. - 0.0;" 0 "optimized" } } */
> +/* { dg-final { scan-tree-dump-times " \[+-] 0.0;" 12 "optimized" } } */
> +
> +#include "pr90356-1.c"
> --- gcc/testsuite/gcc.dg/tree-ssa/pr90356-3.c.jj	2019-05-06 11:45:05.056382441 +0200
> +++ gcc/testsuite/gcc.dg/tree-ssa/pr90356-3.c	2019-05-06 11:47:19.779222871 +0200
> @@ -0,0 +1,6 @@
> +/* PR tree-optimization/90356 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -frounding-math -fsignaling-nans -fsigned-zeros -fdump-tree-optimized" } */
> +/* { dg-final { scan-tree-dump-times " \[+-] 0.0;" 32 "optimized" } } */
> +
> +#include "pr90356-1.c"
> --- gcc/testsuite/gcc.dg/tree-ssa/pr90356-4.c.jj	2019-05-06 11:46:02.140467400 +0200
> +++ gcc/testsuite/gcc.dg/tree-ssa/pr90356-4.c	2019-05-06 11:47:28.175088284 +0200
> @@ -0,0 +1,6 @@
> +/* PR tree-optimization/90356 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -frounding-math -fno-signaling-nans -fsigned-zeros -fdump-tree-optimized" } */
> +/* { dg-final { scan-tree-dump-times " \[+-] 0.0;" 32 "optimized" } } */
> +
> +#include "pr90356-1.c"
> 
> 	Jakub
> 

-- 
Richard Biener <rguenther@suse.de>
SUSE Linux GmbH, Maxfeldstrasse 5, 90409 Nuernberg, Germany;
GF: Felix Imendörffer, Mary Higgins, Sri Rasiah; HRB 21284 (AG NÌrnberg)

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] Fold (x + 0.0) + 0.0 to x + 0.0 (PR tree-optimization/90356)
  2019-05-07  7:48 ` Richard Biener
@ 2019-05-07  7:55   ` Jakub Jelinek
  2019-05-07  9:11     ` Jakub Jelinek
  0 siblings, 1 reply; 7+ messages in thread
From: Jakub Jelinek @ 2019-05-07  7:55 UTC (permalink / raw)
  To: Richard Biener; +Cc: Jeff Law, Marc Glisse, gcc-patches

On Tue, May 07, 2019 at 09:48:13AM +0200, Richard Biener wrote:
> Will leave the "correctness check" for other folks but the above is
> better written as
> 
> +   (outer_op (inner_op @0 REAL_CST@1) REAL_CST@2)
> +    (if (real_zerop (@1)
> +         && real_zerop (@2)
> 
> because that gets code-generated better.  Btw, for -fsignalling-nans

Ok, will change.  I want to introduce uniform_real_cst_p similar to
uniform_integer_cst_p incrementally and then it will change again.

> can we have a literal sNaN?  Then you need :c on the inner_op since
> I'm not sure we canonicalize to sNaN + 0.0 rather than 0.0 + sNaN.

I had :c on both initially, but that doesn't compile, because MINUS_EXPR
is not commutative.  And I wanted to avoid writing 4 patterns instead of 1.

> Maybe not worth optimizing though (since we rule out -frounding-math
> a similar case there doesn't need to be considered).
> 
> > +	 && HONOR_SIGNED_ZEROS (element_mode (type))
> > +	 && !HONOR_SIGN_DEPENDENT_ROUNDING (element_mode (type)))
> 
> You can write HONOR_SIGNED_ZEROS (type) here for brevity.

Ok, will do (and change it then in fold_real_zero_addition_p as well).

	Jakub

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] Fold (x + 0.0) + 0.0 to x + 0.0 (PR tree-optimization/90356)
  2019-05-07  7:55   ` Jakub Jelinek
@ 2019-05-07  9:11     ` Jakub Jelinek
  2019-05-07 11:50       ` Marc Glisse
  0 siblings, 1 reply; 7+ messages in thread
From: Jakub Jelinek @ 2019-05-07  9:11 UTC (permalink / raw)
  To: Richard Biener; +Cc: Jeff Law, Marc Glisse, gcc-patches

On Tue, May 07, 2019 at 09:55:21AM +0200, Jakub Jelinek wrote:
> On Tue, May 07, 2019 at 09:48:13AM +0200, Richard Biener wrote:
> > Will leave the "correctness check" for other folks
> > but the above is

BTW, as I wanted to be sure about the correctness, I wrote a simple program
(below).
And actually it seems that we could optimize the plus1 == plus2 cases
even if HONOR_SIGN_DEPENDENT_ROUNDING (type), because even in fesetenv
(FE_DOWNWARD) mode the testcase prints the first two (in all other modes all
4).

So here is also an updated version of the patch:

2019-05-07  Jakub Jelinek  <jakub@redhat.com>

	PR tree-optimization/90356
	* match.pd ((X +/- 0.0) +/- 0.0): Optimize into X +/- 0.0 if possible.

	* gcc.dg/tree-ssa/pr90356-1.c: New test.
	* gcc.dg/tree-ssa/pr90356-2.c: New test.
	* gcc.dg/tree-ssa/pr90356-3.c: New test.
	* gcc.dg/tree-ssa/pr90356-4.c: New test.
	* gcc.dg/tree-ssa/pr90356-5.c: New test.
	* gcc.dg/tree-ssa/pr90356-6.c: New test.

--- gcc/match.pd.jj	2019-05-06 23:47:52.642628123 +0200
+++ gcc/match.pd	2019-05-07 10:40:25.475136027 +0200
@@ -152,6 +152,28 @@ (define_operator_list COND_TERNARY
  (if (fold_real_zero_addition_p (type, @1, 1))
   (non_lvalue @0)))
 
+/* Even if the fold_real_zero_addition_p can't simplify X + 0.0
+   into X, we can optimize (X + 0.0) + 0.0 or (X + 0.0) - 0.0
+   or (X - 0.0) + 0.0 into X + 0.0 and (X - 0.0) - 0.0 into X - 0.0
+   if not -frounding-math (for (X + 0.0) + 0.0 and (X - 0.0) - 0.0
+   even if -frounding-math).  For sNaNs the first operation would raise
+   exceptions but turn the result into qNan, so the second operation
+   would not raise it.   */
+(for inner_op (plus minus)
+ (for outer_op (plus minus)
+  (simplify
+   (outer_op (inner_op @0 REAL_CST@1) REAL_CST@2)
+    (if (real_zerop (@1) && real_zerop (@2))
+     (with { bool inner_plus = ((inner_op == PLUS_EXPR)
+				^ REAL_VALUE_MINUS_ZERO (TREE_REAL_CST (@1)));
+	     bool outer_plus
+	       = ((outer_op == PLUS_EXPR)
+		  ^ REAL_VALUE_MINUS_ZERO (TREE_REAL_CST (@2))); }
+      (if (!HONOR_SIGN_DEPENDENT_ROUNDING (type) || outer_plus == inner_plus)
+       (if (outer_plus && !inner_plus)
+	(outer_op @0 @2)
+	(inner_op @0 @1))))))))
+
 /* Simplify x - x.
    This is unsafe for certain floats even in non-IEEE formats.
    In IEEE, it is unsafe because it does wrong for NaNs.
--- gcc/testsuite/gcc.dg/tree-ssa/pr90356-1.c.jj	2019-05-07 10:34:07.270208201 +0200
+++ gcc/testsuite/gcc.dg/tree-ssa/pr90356-1.c	2019-05-07 10:34:07.270208201 +0200
@@ -0,0 +1,23 @@
+/* PR tree-optimization/90356 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-rounding-math -fsignaling-nans -fsigned-zeros -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-times "x_\[0-9]*.D. \\+ 0.0;" 12 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "y_\[0-9]*.D. - 0.0;" 4 "optimized" } } */
+/* { dg-final { scan-tree-dump-times " \[+-] 0.0;" 16 "optimized" } } */
+
+double f1 (double x) { return (x + 0.0) + 0.0; }
+double f2 (double y) { return (y + (-0.0)) + (-0.0); }
+double f3 (double y) { return (y - 0.0) - 0.0; }
+double f4 (double x) { return (x - (-0.0)) - (-0.0); }
+double f5 (double x) { return (x + 0.0) - 0.0; }
+double f6 (double x) { return (x + (-0.0)) - (-0.0); }
+double f7 (double x) { return (x - 0.0) + 0.0; }
+double f8 (double x) { return (x - (-0.0)) + (-0.0); }
+double f9 (double x) { double t = x + 0.0; return t + 0.0; }
+double f10 (double y) { double t = y + (-0.0); return t + (-0.0); }
+double f11 (double y) { double t = y - 0.0; return t - 0.0; }
+double f12 (double x) { double t = x - (-0.0); return t - (-0.0); }
+double f13 (double x) { double t = x + 0.0; return t - 0.0; }
+double f14 (double x) { double t = x + (-0.0); return t - (-0.0); }
+double f15 (double x) { double t = x - 0.0; return t + 0.0; }
+double f16 (double x) { double t = x - (-0.0); return t + (-0.0); }
--- gcc/testsuite/gcc.dg/tree-ssa/pr90356-2.c.jj	2019-05-07 10:34:07.270208201 +0200
+++ gcc/testsuite/gcc.dg/tree-ssa/pr90356-2.c	2019-05-07 10:34:07.270208201 +0200
@@ -0,0 +1,8 @@
+/* PR tree-optimization/90356 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-rounding-math -fno-signaling-nans -fsigned-zeros -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-times "x_\[0-9]*.D. \\+ 0.0;" 12 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "y_\[0-9]*.D. - 0.0;" 0 "optimized" } } */
+/* { dg-final { scan-tree-dump-times " \[+-] 0.0;" 12 "optimized" } } */
+
+#include "pr90356-1.c"
--- gcc/testsuite/gcc.dg/tree-ssa/pr90356-3.c.jj	2019-05-07 10:34:07.271208185 +0200
+++ gcc/testsuite/gcc.dg/tree-ssa/pr90356-3.c	2019-05-07 11:00:50.345488636 +0200
@@ -0,0 +1,15 @@
+/* PR tree-optimization/90356 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -frounding-math -fsignaling-nans -fsigned-zeros -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-times "x_\[0-9]*.D. \\+ 0.0;" 4 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "y_\[0-9]*.D. - 0.0;" 4 "optimized" } } */
+/* { dg-final { scan-tree-dump-times " \[+-] 0.0;" 8 "optimized" } } */
+
+double f1 (double x) { return (x + 0.0) + 0.0; }
+double f2 (double y) { return (y + (-0.0)) + (-0.0); }
+double f3 (double y) { return (y - 0.0) - 0.0; }
+double f4 (double x) { return (x - (-0.0)) - (-0.0); }
+double f9 (double x) { double t = x + 0.0; return t + 0.0; }
+double f10 (double y) { double t = y + (-0.0); return t + (-0.0); }
+double f11 (double y) { double t = y - 0.0; return t - 0.0; }
+double f12 (double x) { double t = x - (-0.0); return t - (-0.0); }
--- gcc/testsuite/gcc.dg/tree-ssa/pr90356-4.c.jj	2019-05-07 10:34:07.271208185 +0200
+++ gcc/testsuite/gcc.dg/tree-ssa/pr90356-4.c	2019-05-07 11:01:11.567148473 +0200
@@ -0,0 +1,8 @@
+/* PR tree-optimization/90356 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -frounding-math -fno-signaling-nans -fsigned-zeros -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-times "x_\[0-9]*.D. \\+ 0.0;" 4 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "y_\[0-9]*.D. - 0.0;" 4 "optimized" } } */
+/* { dg-final { scan-tree-dump-times " \[+-] 0.0;" 8 "optimized" } } */
+
+#include "pr90356-3.c"
--- gcc/testsuite/gcc.dg/tree-ssa/pr90356-5.c.jj	2019-05-07 11:01:21.704985970 +0200
+++ gcc/testsuite/gcc.dg/tree-ssa/pr90356-5.c	2019-05-07 11:01:45.981596834 +0200
@@ -0,0 +1,13 @@
+/* PR tree-optimization/90356 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -frounding-math -fsignaling-nans -fsigned-zeros -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-times " \[+-] 0.0;" 16 "optimized" } } */
+
+double f5 (double x) { return (x + 0.0) - 0.0; }
+double f6 (double x) { return (x + (-0.0)) - (-0.0); }
+double f7 (double x) { return (x - 0.0) + 0.0; }
+double f8 (double x) { return (x - (-0.0)) + (-0.0); }
+double f13 (double x) { double t = x + 0.0; return t - 0.0; }
+double f14 (double x) { double t = x + (-0.0); return t - (-0.0); }
+double f15 (double x) { double t = x - 0.0; return t + 0.0; }
+double f16 (double x) { double t = x - (-0.0); return t + (-0.0); }
--- gcc/testsuite/gcc.dg/tree-ssa/pr90356-6.c.jj	2019-05-07 11:01:58.456396880 +0200
+++ gcc/testsuite/gcc.dg/tree-ssa/pr90356-6.c	2019-05-07 11:02:09.451220639 +0200
@@ -0,0 +1,6 @@
+/* PR tree-optimization/90356 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -frounding-math -fno-signaling-nans -fsigned-zeros -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-times " \[+-] 0.0;" 16 "optimized" } } */
+
+#include "pr90356-5.c"


__attribute__((noipa)) double f1 (double x) { return x + 0.0; }
__attribute__((noipa)) double f2 (double x) { return x + (-0.0); }
__attribute__((noipa)) double f3 (double x) { return x - 0.0; }
__attribute__((noipa)) double f4 (double x) { return x - (-0.0); }

int
main ()
{
  double d[] = { -2.0, -0.0, 0.0, 2.0 };
  int i;
  for (i = 0; i < 4; i++)
    {
      double r1, r2;
      r1 = f1 (d[i]); r2 = f1 (f1 (d[i]));
      if (__builtin_memcmp (&r1, &r2, sizeof (double))) break;
    }
  if (i == 4) __builtin_printf ("f1 (f1) == f1\n");
  for (i = 0; i < 4; i++)
    {
      double r1, r2;
      r1 = f2 (d[i]); r2 = f2 (f2 (d[i]));
      if (__builtin_memcmp (&r1, &r2, sizeof (double))) break;
    }
  if (i == 4) __builtin_printf ("f2 (f2) == f2\n");
  for (i = 0; i < 4; i++)
    {
      double r1, r2;
      r1 = f1 (d[i]); r2 = f2 (f1 (d[i]));
      if (__builtin_memcmp (&r1, &r2, sizeof (double))) break;
    }
  if (i == 4) __builtin_printf ("f2 (f1) == f1\n");
  for (i = 0; i < 4; i++)
    {
      double r1, r2;
      r1 = f1 (d[i]); r2 = f1 (f2 (d[i]));
      if (__builtin_memcmp (&r1, &r2, sizeof (double))) break;
    }
  if (i == 4) __builtin_printf ("f1 (f2) == f1\n");
  return 0;
}


	Jakub

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] Fold (x + 0.0) + 0.0 to x + 0.0 (PR tree-optimization/90356)
  2019-05-07  9:11     ` Jakub Jelinek
@ 2019-05-07 11:50       ` Marc Glisse
  2019-05-07 12:58         ` Jakub Jelinek
  0 siblings, 1 reply; 7+ messages in thread
From: Marc Glisse @ 2019-05-07 11:50 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: Richard Biener, Jeff Law, gcc-patches

On Tue, 7 May 2019, Jakub Jelinek wrote:

> On Tue, May 07, 2019 at 09:55:21AM +0200, Jakub Jelinek wrote:
>> On Tue, May 07, 2019 at 09:48:13AM +0200, Richard Biener wrote:
>>> Will leave the "correctness check" for other folks
>>> but the above is
>
> BTW, as I wanted to be sure about the correctness, I wrote a simple program
> (below).

Good idea :-)

> And actually it seems that we could optimize the plus1 == plus2 cases
> even if HONOR_SIGN_DEPENDENT_ROUNDING (type), because even in fesetenv
> (FE_DOWNWARD) mode the testcase prints the first two (in all other modes all
> 4).

It is very hard to judge what is ok with -frounding-math, because that 
mode is already unusably broken (I use a pass-through asm volatile to 
protect the arguments and result of every operation instead). One 
important aspect of the optimization is whether both operations use the 
same rounding mode, or if there may be a call to fesetround in between. 
Probably we shouldn't care about -frounding-mode, since anyway it is 
likely that it will use some IFN_FANCY_PLUS instead of PLUS_EXPR if it is 
ever implemented.

> +	(inner_op @0 @1))))))))

Shouldn't you give it a name in the source pattern and return that, 
instead of creating a new statement? Or are you doing the operation a 
second time on purpose in case the rounding mode changed or to force an 
exception?

> +	(outer_op @0 @2)

With sNaN, this may raise a second exception where we used to have only 
qNaN+0, no? And the handling of exceptions may have changed in between, 
etc. Yes, -ftrapping-math is just as broken as -frounding-math.

-- 
Marc Glisse

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] Fold (x + 0.0) + 0.0 to x + 0.0 (PR tree-optimization/90356)
  2019-05-07 11:50       ` Marc Glisse
@ 2019-05-07 12:58         ` Jakub Jelinek
  2019-05-07 13:04           ` Richard Biener
  0 siblings, 1 reply; 7+ messages in thread
From: Jakub Jelinek @ 2019-05-07 12:58 UTC (permalink / raw)
  To: gcc-patches; +Cc: Richard Biener, Jeff Law

On Tue, May 07, 2019 at 01:50:23PM +0200, Marc Glisse wrote:
> > And actually it seems that we could optimize the plus1 == plus2 cases
> > even if HONOR_SIGN_DEPENDENT_ROUNDING (type), because even in fesetenv
> > (FE_DOWNWARD) mode the testcase prints the first two (in all other modes all
> > 4).
> 
> It is very hard to judge what is ok with -frounding-math, because that mode
> is already unusably broken (I use a pass-through asm volatile to protect the
> arguments and result of every operation instead). One important aspect of
> the optimization is whether both operations use the same rounding mode, or
> if there may be a call to fesetround in between. Probably we shouldn't care
> about -frounding-mode, since anyway it is likely that it will use some
> IFN_FANCY_PLUS instead of PLUS_EXPR if it is ever implemented.

I haven't thought about
 t = x + 0.0;
 fesetround (...);
 y = t + 0.0;
indeed, let's take -frounding-math out of the patch now.  If we improve
that mode, such as through explicit dependencies on the floating point state
in the IL, we can get back to this case too.

> > +	(inner_op @0 @1))))))))
> 
> Shouldn't you give it a name in the source pattern and return that, instead
> of creating a new statement? Or are you doing the operation a second time on

Good idea.

> purpose in case the rounding mode changed or to force an exception?
> 
> > +	(outer_op @0 @2)
> 
> With sNaN, this may raise a second exception where we used to have only
> qNaN+0, no? And the handling of exceptions may have changed in between, etc.

IEEE 754 I believe says that for x non-zero x + (+/-0.0) = x and the only
exception raised could be invalid exception if x is sNaN or the Intel
denormal operand exception (I think we generally don't care about that one)
and nothing else (there should be no overflow nor underflow nor inexact and
obviously no division by zero).  If the invalid exception is masked off,
then I believe one can't distinguish between the x + 0.0 and (x + 0.0) + 0.0
computations, already x + 0.0 will raise IE and turn the sNaN into qNaN and
the optional second + 0.0 will just keep that to be a qNaN without further
exceptions, unless there is some library call in between which queries the
accumulated exceptions, clears it etc.  I believe handling that case right
is only possible if we make those dependencies in the IL explicit and under
non-default flags.  In any case, I don't see a difference between the
@3 case where we keep the inner op and the case where we keep the outer op
but remove the inner op.  Both behave the same.

Here is an updated patch with your @3 idea and taking out -frounding-math
stuff.

2019-05-07  Jakub Jelinek  <jakub@redhat.com>

	PR tree-optimization/90356
	* match.pd ((X +/- 0.0) +/- 0.0): Optimize into X +/- 0.0 if possible.

	* gcc.dg/tree-ssa/pr90356-1.c: New test.
	* gcc.dg/tree-ssa/pr90356-2.c: New test.
	* gcc.dg/tree-ssa/pr90356-3.c: New test.
	* gcc.dg/tree-ssa/pr90356-4.c: New test.

--- gcc/match.pd.jj	2019-05-07 13:56:53.062954181 +0200
+++ gcc/match.pd	2019-05-07 14:30:36.010474285 +0200
@@ -152,6 +152,28 @@ (define_operator_list COND_TERNARY
  (if (fold_real_zero_addition_p (type, @1, 1))
   (non_lvalue @0)))
 
+/* Even if the fold_real_zero_addition_p can't simplify X + 0.0
+   into X, we can optimize (X + 0.0) + 0.0 or (X + 0.0) - 0.0
+   or (X - 0.0) + 0.0 into X + 0.0 and (X - 0.0) - 0.0 into X - 0.0
+   if not -frounding-math.  For sNaNs the first operation would raise
+   exceptions but turn the result into qNan, so the second operation
+   would not raise it.   */
+(for inner_op (plus minus)
+ (for outer_op (plus minus)
+  (simplify
+   (outer_op (inner_op@3 @0 REAL_CST@1) REAL_CST@2)
+    (if (real_zerop (@1)
+	 && real_zerop (@2)
+	 && !HONOR_SIGN_DEPENDENT_ROUNDING (type))
+     (with { bool inner_plus = ((inner_op == PLUS_EXPR)
+				^ REAL_VALUE_MINUS_ZERO (TREE_REAL_CST (@1)));
+	     bool outer_plus
+	       = ((outer_op == PLUS_EXPR)
+		  ^ REAL_VALUE_MINUS_ZERO (TREE_REAL_CST (@2))); }
+      (if (outer_plus && !inner_plus)
+       (outer_op @0 @2)
+       @3))))))
+
 /* Simplify x - x.
    This is unsafe for certain floats even in non-IEEE formats.
    In IEEE, it is unsafe because it does wrong for NaNs.
--- gcc/testsuite/gcc.dg/tree-ssa/pr90356-1.c.jj	2019-05-07 14:27:17.912654939 +0200
+++ gcc/testsuite/gcc.dg/tree-ssa/pr90356-1.c	2019-05-07 14:27:17.912654939 +0200
@@ -0,0 +1,23 @@
+/* PR tree-optimization/90356 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-rounding-math -fsignaling-nans -fsigned-zeros -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-times "x_\[0-9]*.D. \\+ 0.0;" 12 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "y_\[0-9]*.D. - 0.0;" 4 "optimized" } } */
+/* { dg-final { scan-tree-dump-times " \[+-] 0.0;" 16 "optimized" } } */
+
+double f1 (double x) { return (x + 0.0) + 0.0; }
+double f2 (double y) { return (y + (-0.0)) + (-0.0); }
+double f3 (double y) { return (y - 0.0) - 0.0; }
+double f4 (double x) { return (x - (-0.0)) - (-0.0); }
+double f5 (double x) { return (x + 0.0) - 0.0; }
+double f6 (double x) { return (x + (-0.0)) - (-0.0); }
+double f7 (double x) { return (x - 0.0) + 0.0; }
+double f8 (double x) { return (x - (-0.0)) + (-0.0); }
+double f9 (double x) { double t = x + 0.0; return t + 0.0; }
+double f10 (double y) { double t = y + (-0.0); return t + (-0.0); }
+double f11 (double y) { double t = y - 0.0; return t - 0.0; }
+double f12 (double x) { double t = x - (-0.0); return t - (-0.0); }
+double f13 (double x) { double t = x + 0.0; return t - 0.0; }
+double f14 (double x) { double t = x + (-0.0); return t - (-0.0); }
+double f15 (double x) { double t = x - 0.0; return t + 0.0; }
+double f16 (double x) { double t = x - (-0.0); return t + (-0.0); }
--- gcc/testsuite/gcc.dg/tree-ssa/pr90356-2.c.jj	2019-05-07 14:27:17.912654939 +0200
+++ gcc/testsuite/gcc.dg/tree-ssa/pr90356-2.c	2019-05-07 14:27:17.912654939 +0200
@@ -0,0 +1,8 @@
+/* PR tree-optimization/90356 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-rounding-math -fno-signaling-nans -fsigned-zeros -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-times "x_\[0-9]*.D. \\+ 0.0;" 12 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "y_\[0-9]*.D. - 0.0;" 0 "optimized" } } */
+/* { dg-final { scan-tree-dump-times " \[+-] 0.0;" 12 "optimized" } } */
+
+#include "pr90356-1.c"
--- gcc/testsuite/gcc.dg/tree-ssa/pr90356-3.c.jj	2019-05-07 14:27:17.913654923 +0200
+++ gcc/testsuite/gcc.dg/tree-ssa/pr90356-3.c	2019-05-07 14:27:17.913654923 +0200
@@ -0,0 +1,6 @@
+/* PR tree-optimization/90356 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -frounding-math -fsignaling-nans -fsigned-zeros -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-times " \[+-] 0.0;" 32 "optimized" } } */
+
+#include "pr90356-1.c"
--- gcc/testsuite/gcc.dg/tree-ssa/pr90356-4.c.jj	2019-05-07 14:27:17.913654923 +0200
+++ gcc/testsuite/gcc.dg/tree-ssa/pr90356-4.c	2019-05-07 14:27:17.913654923 +0200
@@ -0,0 +1,6 @@
+/* PR tree-optimization/90356 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -frounding-math -fno-signaling-nans -fsigned-zeros -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-times " \[+-] 0.0;" 32 "optimized" } } */
+
+#include "pr90356-1.c"


	Jakub

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] Fold (x + 0.0) + 0.0 to x + 0.0 (PR tree-optimization/90356)
  2019-05-07 12:58         ` Jakub Jelinek
@ 2019-05-07 13:04           ` Richard Biener
  0 siblings, 0 replies; 7+ messages in thread
From: Richard Biener @ 2019-05-07 13:04 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: gcc-patches, Jeff Law

[-- Attachment #1: Type: text/plain, Size: 7844 bytes --]

On Tue, 7 May 2019, Jakub Jelinek wrote:

> On Tue, May 07, 2019 at 01:50:23PM +0200, Marc Glisse wrote:
> > > And actually it seems that we could optimize the plus1 == plus2 cases
> > > even if HONOR_SIGN_DEPENDENT_ROUNDING (type), because even in fesetenv
> > > (FE_DOWNWARD) mode the testcase prints the first two (in all other modes all
> > > 4).
> > 
> > It is very hard to judge what is ok with -frounding-math, because that mode
> > is already unusably broken (I use a pass-through asm volatile to protect the
> > arguments and result of every operation instead). One important aspect of
> > the optimization is whether both operations use the same rounding mode, or
> > if there may be a call to fesetround in between. Probably we shouldn't care
> > about -frounding-mode, since anyway it is likely that it will use some
> > IFN_FANCY_PLUS instead of PLUS_EXPR if it is ever implemented.
> 
> I haven't thought about
>  t = x + 0.0;
>  fesetround (...);
>  y = t + 0.0;
> indeed, let's take -frounding-math out of the patch now.  If we improve
> that mode, such as through explicit dependencies on the floating point state
> in the IL, we can get back to this case too.
> 
> > > +	(inner_op @0 @1))))))))
> > 
> > Shouldn't you give it a name in the source pattern and return that, instead
> > of creating a new statement? Or are you doing the operation a second time on
> 
> Good idea.
> 
> > purpose in case the rounding mode changed or to force an exception?
> > 
> > > +	(outer_op @0 @2)
> > 
> > With sNaN, this may raise a second exception where we used to have only
> > qNaN+0, no? And the handling of exceptions may have changed in between, etc.
> 
> IEEE 754 I believe says that for x non-zero x + (+/-0.0) = x and the only
> exception raised could be invalid exception if x is sNaN or the Intel
> denormal operand exception (I think we generally don't care about that one)
> and nothing else (there should be no overflow nor underflow nor inexact and
> obviously no division by zero).  If the invalid exception is masked off,
> then I believe one can't distinguish between the x + 0.0 and (x + 0.0) + 0.0
> computations, already x + 0.0 will raise IE and turn the sNaN into qNaN and
> the optional second + 0.0 will just keep that to be a qNaN without further
> exceptions, unless there is some library call in between which queries the
> accumulated exceptions, clears it etc.  I believe handling that case right
> is only possible if we make those dependencies in the IL explicit and under
> non-default flags.  In any case, I don't see a difference between the
> @3 case where we keep the inner op and the case where we keep the outer op
> but remove the inner op.  Both behave the same.
> 
> Here is an updated patch with your @3 idea and taking out -frounding-math
> stuff.

OK if there are no further comments.

Richard.

> 2019-05-07  Jakub Jelinek  <jakub@redhat.com>
> 
> 	PR tree-optimization/90356
> 	* match.pd ((X +/- 0.0) +/- 0.0): Optimize into X +/- 0.0 if possible.
> 
> 	* gcc.dg/tree-ssa/pr90356-1.c: New test.
> 	* gcc.dg/tree-ssa/pr90356-2.c: New test.
> 	* gcc.dg/tree-ssa/pr90356-3.c: New test.
> 	* gcc.dg/tree-ssa/pr90356-4.c: New test.
> 
> --- gcc/match.pd.jj	2019-05-07 13:56:53.062954181 +0200
> +++ gcc/match.pd	2019-05-07 14:30:36.010474285 +0200
> @@ -152,6 +152,28 @@ (define_operator_list COND_TERNARY
>   (if (fold_real_zero_addition_p (type, @1, 1))
>    (non_lvalue @0)))
>  
> +/* Even if the fold_real_zero_addition_p can't simplify X + 0.0
> +   into X, we can optimize (X + 0.0) + 0.0 or (X + 0.0) - 0.0
> +   or (X - 0.0) + 0.0 into X + 0.0 and (X - 0.0) - 0.0 into X - 0.0
> +   if not -frounding-math.  For sNaNs the first operation would raise
> +   exceptions but turn the result into qNan, so the second operation
> +   would not raise it.   */
> +(for inner_op (plus minus)
> + (for outer_op (plus minus)
> +  (simplify
> +   (outer_op (inner_op@3 @0 REAL_CST@1) REAL_CST@2)
> +    (if (real_zerop (@1)
> +	 && real_zerop (@2)
> +	 && !HONOR_SIGN_DEPENDENT_ROUNDING (type))
> +     (with { bool inner_plus = ((inner_op == PLUS_EXPR)
> +				^ REAL_VALUE_MINUS_ZERO (TREE_REAL_CST (@1)));
> +	     bool outer_plus
> +	       = ((outer_op == PLUS_EXPR)
> +		  ^ REAL_VALUE_MINUS_ZERO (TREE_REAL_CST (@2))); }
> +      (if (outer_plus && !inner_plus)
> +       (outer_op @0 @2)
> +       @3))))))
> +
>  /* Simplify x - x.
>     This is unsafe for certain floats even in non-IEEE formats.
>     In IEEE, it is unsafe because it does wrong for NaNs.
> --- gcc/testsuite/gcc.dg/tree-ssa/pr90356-1.c.jj	2019-05-07 14:27:17.912654939 +0200
> +++ gcc/testsuite/gcc.dg/tree-ssa/pr90356-1.c	2019-05-07 14:27:17.912654939 +0200
> @@ -0,0 +1,23 @@
> +/* PR tree-optimization/90356 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fno-rounding-math -fsignaling-nans -fsigned-zeros -fdump-tree-optimized" } */
> +/* { dg-final { scan-tree-dump-times "x_\[0-9]*.D. \\+ 0.0;" 12 "optimized" } } */
> +/* { dg-final { scan-tree-dump-times "y_\[0-9]*.D. - 0.0;" 4 "optimized" } } */
> +/* { dg-final { scan-tree-dump-times " \[+-] 0.0;" 16 "optimized" } } */
> +
> +double f1 (double x) { return (x + 0.0) + 0.0; }
> +double f2 (double y) { return (y + (-0.0)) + (-0.0); }
> +double f3 (double y) { return (y - 0.0) - 0.0; }
> +double f4 (double x) { return (x - (-0.0)) - (-0.0); }
> +double f5 (double x) { return (x + 0.0) - 0.0; }
> +double f6 (double x) { return (x + (-0.0)) - (-0.0); }
> +double f7 (double x) { return (x - 0.0) + 0.0; }
> +double f8 (double x) { return (x - (-0.0)) + (-0.0); }
> +double f9 (double x) { double t = x + 0.0; return t + 0.0; }
> +double f10 (double y) { double t = y + (-0.0); return t + (-0.0); }
> +double f11 (double y) { double t = y - 0.0; return t - 0.0; }
> +double f12 (double x) { double t = x - (-0.0); return t - (-0.0); }
> +double f13 (double x) { double t = x + 0.0; return t - 0.0; }
> +double f14 (double x) { double t = x + (-0.0); return t - (-0.0); }
> +double f15 (double x) { double t = x - 0.0; return t + 0.0; }
> +double f16 (double x) { double t = x - (-0.0); return t + (-0.0); }
> --- gcc/testsuite/gcc.dg/tree-ssa/pr90356-2.c.jj	2019-05-07 14:27:17.912654939 +0200
> +++ gcc/testsuite/gcc.dg/tree-ssa/pr90356-2.c	2019-05-07 14:27:17.912654939 +0200
> @@ -0,0 +1,8 @@
> +/* PR tree-optimization/90356 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fno-rounding-math -fno-signaling-nans -fsigned-zeros -fdump-tree-optimized" } */
> +/* { dg-final { scan-tree-dump-times "x_\[0-9]*.D. \\+ 0.0;" 12 "optimized" } } */
> +/* { dg-final { scan-tree-dump-times "y_\[0-9]*.D. - 0.0;" 0 "optimized" } } */
> +/* { dg-final { scan-tree-dump-times " \[+-] 0.0;" 12 "optimized" } } */
> +
> +#include "pr90356-1.c"
> --- gcc/testsuite/gcc.dg/tree-ssa/pr90356-3.c.jj	2019-05-07 14:27:17.913654923 +0200
> +++ gcc/testsuite/gcc.dg/tree-ssa/pr90356-3.c	2019-05-07 14:27:17.913654923 +0200
> @@ -0,0 +1,6 @@
> +/* PR tree-optimization/90356 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -frounding-math -fsignaling-nans -fsigned-zeros -fdump-tree-optimized" } */
> +/* { dg-final { scan-tree-dump-times " \[+-] 0.0;" 32 "optimized" } } */
> +
> +#include "pr90356-1.c"
> --- gcc/testsuite/gcc.dg/tree-ssa/pr90356-4.c.jj	2019-05-07 14:27:17.913654923 +0200
> +++ gcc/testsuite/gcc.dg/tree-ssa/pr90356-4.c	2019-05-07 14:27:17.913654923 +0200
> @@ -0,0 +1,6 @@
> +/* PR tree-optimization/90356 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -frounding-math -fno-signaling-nans -fsigned-zeros -fdump-tree-optimized" } */
> +/* { dg-final { scan-tree-dump-times " \[+-] 0.0;" 32 "optimized" } } */
> +
> +#include "pr90356-1.c"
> 
> 
> 	Jakub
> 

-- 
Richard Biener <rguenther@suse.de>
SUSE Linux GmbH, Maxfeldstrasse 5, 90409 Nuernberg, Germany;
GF: Felix Imendörffer, Mary Higgins, Sri Rasiah; HRB 21284 (AG NÌrnberg)

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2019-05-07 13:04 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-05-07  7:21 [PATCH] Fold (x + 0.0) + 0.0 to x + 0.0 (PR tree-optimization/90356) Jakub Jelinek
2019-05-07  7:48 ` Richard Biener
2019-05-07  7:55   ` Jakub Jelinek
2019-05-07  9:11     ` Jakub Jelinek
2019-05-07 11:50       ` Marc Glisse
2019-05-07 12:58         ` Jakub Jelinek
2019-05-07 13:04           ` Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).