public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] Optimize const1 * copysign (const2, y) in reassoc (PR tree-optimization/67815)
@ 2015-10-13 16:28 Marek Polacek
  2015-10-14  9:10 ` Richard Biener
  0 siblings, 1 reply; 4+ messages in thread
From: Marek Polacek @ 2015-10-13 16:28 UTC (permalink / raw)
  To: GCC Patches, Jakub Jelinek, Richard Biener, Joseph Myers

This patch implements the copysign optimization for reassoc I promised
I'd look into.  I.e.,

CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0
CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0

After getting familiar with reassoc a bit this wasn't that hard.  But
I'm hopeless when it comes to floating-point stuff, so I'd appreciate
if you could glance over the tests.  The reassoc-40.c should address
Joseph's comment in the audit trail (with -fno-rounding-math the
optimization would take place).

For 0.0 * copysign (cst, x), the result is folded into 0.0 way before
reassoc, so we probably don't have to pay attention to this case.

Bootstrapped/regtested on x86_64-linux, ok for trunk?

2015-10-13  Marek Polacek  <polacek@redhat.com>

	PR tree-optimization/67815
	* tree-ssa-reassoc.c (attempt_builtin_copysign): New function.
	(reassociate_bb): Call it.

	* gcc.dg/tree-ssa/reassoc-39.c: New test.
	* gcc.dg/tree-ssa/reassoc-40.c: New test.

diff --git gcc/testsuite/gcc.dg/tree-ssa/reassoc-39.c gcc/testsuite/gcc.dg/tree-ssa/reassoc-39.c
index e69de29..589d06b 100644
--- gcc/testsuite/gcc.dg/tree-ssa/reassoc-39.c
+++ gcc/testsuite/gcc.dg/tree-ssa/reassoc-39.c
@@ -0,0 +1,41 @@
+/* PR tree-optimization/67815 */
+/* { dg-do compile } */
+/* { dg-options "-Ofast -fdump-tree-reassoc1-details" } */
+
+float
+f0 (float x)
+{
+  return 7.5 * __builtin_copysignf (2.0, x);
+}
+
+float
+f1 (float x)
+{
+  return -7.5 * __builtin_copysignf (2.0, x);
+}
+
+double
+f2 (double x, double y)
+{
+  return x * ((1.0/12) * __builtin_copysign (1.0, y));
+}
+
+double
+f3 (double x, double y)
+{
+  return (x * (-1.0/12)) * __builtin_copysign (1.0, y);
+}
+
+double
+f4 (double x, double y, double z)
+{
+  return (x * z) * ((1.0/12) * __builtin_copysign (4.0, y));
+}
+
+double
+f5 (double x, double y, double z)
+{
+  return (x * (-1.0/12)) * z * __builtin_copysign (2.0, y);
+}
+
+/* { dg-final { scan-tree-dump-times "Optimizing copysign" 6 "reassoc1"} }*/
diff --git gcc/testsuite/gcc.dg/tree-ssa/reassoc-40.c gcc/testsuite/gcc.dg/tree-ssa/reassoc-40.c
index e69de29..d65bcc1b 100644
--- gcc/testsuite/gcc.dg/tree-ssa/reassoc-40.c
+++ gcc/testsuite/gcc.dg/tree-ssa/reassoc-40.c
@@ -0,0 +1,21 @@
+/* PR tree-optimization/67815 */
+/* { dg-do compile } */
+/* { dg-options "-Ofast -frounding-math -fdump-tree-reassoc1-details" } */
+
+/* Test that the copysign reassoc optimization doesn't fire for
+   -frounding-math (i.e. HONOR_SIGN_DEPENDENT_ROUNDING) if the multiplication
+   is inexact.  */
+
+double
+f1 (double y)
+{
+  return (1.2 * __builtin_copysign (1.1, y));
+}
+
+double
+f2 (double y)
+{
+  return (-1.2 * __builtin_copysign (1.1, y));
+}
+
+/* { dg-final { scan-tree-dump-not "Optimizing copysign" "reassoc1" } } */
diff --git gcc/tree-ssa-reassoc.c gcc/tree-ssa-reassoc.c
index 879722e..b8897b7 100644
--- gcc/tree-ssa-reassoc.c
+++ gcc/tree-ssa-reassoc.c
@@ -4622,6 +4622,95 @@ attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops)
   return result;
 }
 
+/* Attempt to optimize
+   CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
+   CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0.  */
+
+static void
+attempt_builtin_copysign (vec<operand_entry *> *ops)
+{
+  operand_entry *oe;
+  unsigned int i;
+  unsigned int length = ops->length ();
+  tree cst1 = ops->last ()->op;
+
+  if (length == 1 || TREE_CODE (cst1) != REAL_CST)
+    return;
+
+  FOR_EACH_VEC_ELT (*ops, i, oe)
+    {
+      if (TREE_CODE (oe->op) == SSA_NAME)
+	{
+	  gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
+	  if (is_gimple_call (def_stmt))
+	    {
+	      tree fndecl = gimple_call_fndecl (def_stmt);
+	      tree cst2;
+	      switch (DECL_FUNCTION_CODE (fndecl))
+		{
+		CASE_FLT_FN (BUILT_IN_COPYSIGN):
+		  cst2 = gimple_call_arg (def_stmt, 0);
+		  /* The first argument of copysign must be a constant,
+		     otherwise there's nothing to do.  */
+		  if (TREE_CODE (cst2) == REAL_CST)
+		    {
+		      tree mul = const_binop (MULT_EXPR, TREE_TYPE (cst1),
+					      cst1, cst2);
+		      /* If we couldn't fold to a single constant, skip it.  */
+		      if (mul == NULL_TREE)
+			break;
+		      /* We're going to replace the copysign argument with
+			 the multiplication product.  Remove the constant.  */
+		      ops->pop ();
+		      gimple_call_set_arg (def_stmt, 0, mul);
+		      bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst1));
+		      /* Handle the CST1 < 0 case -- negate the result.  */
+		      if (cst1_neg)
+			{
+			  tree lhs = gimple_call_lhs (def_stmt);
+			  tree negrhs = make_ssa_name (TREE_TYPE (lhs));
+			  gimple *negate_stmt
+			    = gimple_build_assign (negrhs, NEGATE_EXPR, lhs);
+			  insert_stmt_after (negate_stmt, def_stmt);
+			  ops->ordered_remove (i);
+			  add_to_ops_vec (ops, negrhs);
+			}
+		      if (dump_file && (dump_flags & TDF_DETAILS))
+			{
+			  fprintf (dump_file, "Optimizing copysign: ");
+			  print_generic_expr (dump_file, cst1, 0);
+			  fprintf (dump_file, " * ");
+			  print_generic_expr (dump_file,
+					      gimple_call_fn (def_stmt), 0);
+			  fprintf (dump_file, " (");
+			  print_generic_expr (dump_file, cst2, 0);
+			  fprintf (dump_file, ", ");
+			  print_generic_expr (dump_file,
+					      gimple_call_arg (def_stmt, 1),
+					      0);
+			  fprintf (dump_file, ") into %s",
+				   cst1_neg ? "-" : "");
+			  print_generic_expr (dump_file,
+					      gimple_call_fn (def_stmt), 0);
+			  fprintf (dump_file, " (");
+			  print_generic_expr (dump_file, mul, 0);
+			  fprintf (dump_file, ", ");
+			  print_generic_expr (dump_file,
+					      gimple_call_arg (def_stmt, 1),
+					      0);
+			  fprintf (dump_file, "\n");
+			}
+		      return;
+		    }
+		  break;
+		default:
+		  break;
+		}
+	    }
+	}
+    }
+}
+
 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS.  */
 
 static void
@@ -4764,6 +4853,9 @@ reassociate_bb (basic_block bb)
 	      if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
 		optimize_range_tests (rhs_code, &ops);
 
+	      if (rhs_code == MULT_EXPR)
+		attempt_builtin_copysign (&ops);
+
 	      if (first_pass_instance
 		  && rhs_code == MULT_EXPR
 		  && flag_unsafe_math_optimizations)

	Marek

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] Optimize const1 * copysign (const2, y) in reassoc (PR tree-optimization/67815)
  2015-10-13 16:28 [PATCH] Optimize const1 * copysign (const2, y) in reassoc (PR tree-optimization/67815) Marek Polacek
@ 2015-10-14  9:10 ` Richard Biener
  2015-10-14 12:20   ` Marek Polacek
  0 siblings, 1 reply; 4+ messages in thread
From: Richard Biener @ 2015-10-14  9:10 UTC (permalink / raw)
  To: Marek Polacek; +Cc: GCC Patches, Jakub Jelinek, Joseph Myers

On Tue, 13 Oct 2015, Marek Polacek wrote:

> This patch implements the copysign optimization for reassoc I promised
> I'd look into.  I.e.,
> 
> CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0
> CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0
> 
> After getting familiar with reassoc a bit this wasn't that hard.  But
> I'm hopeless when it comes to floating-point stuff, so I'd appreciate
> if you could glance over the tests.  The reassoc-40.c should address
> Joseph's comment in the audit trail (with -fno-rounding-math the
> optimization would take place).
> 
> For 0.0 * copysign (cst, x), the result is folded into 0.0 way before
> reassoc, so we probably don't have to pay attention to this case.
> 
> Bootstrapped/regtested on x86_64-linux, ok for trunk?
> 
> 2015-10-13  Marek Polacek  <polacek@redhat.com>
> 
> 	PR tree-optimization/67815
> 	* tree-ssa-reassoc.c (attempt_builtin_copysign): New function.
> 	(reassociate_bb): Call it.
> 
> 	* gcc.dg/tree-ssa/reassoc-39.c: New test.
> 	* gcc.dg/tree-ssa/reassoc-40.c: New test.
> 
> diff --git gcc/testsuite/gcc.dg/tree-ssa/reassoc-39.c gcc/testsuite/gcc.dg/tree-ssa/reassoc-39.c
> index e69de29..589d06b 100644
> --- gcc/testsuite/gcc.dg/tree-ssa/reassoc-39.c
> +++ gcc/testsuite/gcc.dg/tree-ssa/reassoc-39.c
> @@ -0,0 +1,41 @@
> +/* PR tree-optimization/67815 */
> +/* { dg-do compile } */
> +/* { dg-options "-Ofast -fdump-tree-reassoc1-details" } */
> +
> +float
> +f0 (float x)
> +{
> +  return 7.5 * __builtin_copysignf (2.0, x);
> +}
> +
> +float
> +f1 (float x)
> +{
> +  return -7.5 * __builtin_copysignf (2.0, x);
> +}
> +
> +double
> +f2 (double x, double y)
> +{
> +  return x * ((1.0/12) * __builtin_copysign (1.0, y));
> +}
> +
> +double
> +f3 (double x, double y)
> +{
> +  return (x * (-1.0/12)) * __builtin_copysign (1.0, y);
> +}
> +
> +double
> +f4 (double x, double y, double z)
> +{
> +  return (x * z) * ((1.0/12) * __builtin_copysign (4.0, y));
> +}
> +
> +double
> +f5 (double x, double y, double z)
> +{
> +  return (x * (-1.0/12)) * z * __builtin_copysign (2.0, y);
> +}
> +
> +/* { dg-final { scan-tree-dump-times "Optimizing copysign" 6 "reassoc1"} }*/
> diff --git gcc/testsuite/gcc.dg/tree-ssa/reassoc-40.c gcc/testsuite/gcc.dg/tree-ssa/reassoc-40.c
> index e69de29..d65bcc1b 100644
> --- gcc/testsuite/gcc.dg/tree-ssa/reassoc-40.c
> +++ gcc/testsuite/gcc.dg/tree-ssa/reassoc-40.c
> @@ -0,0 +1,21 @@
> +/* PR tree-optimization/67815 */
> +/* { dg-do compile } */
> +/* { dg-options "-Ofast -frounding-math -fdump-tree-reassoc1-details" } */
> +
> +/* Test that the copysign reassoc optimization doesn't fire for
> +   -frounding-math (i.e. HONOR_SIGN_DEPENDENT_ROUNDING) if the multiplication
> +   is inexact.  */
> +
> +double
> +f1 (double y)
> +{
> +  return (1.2 * __builtin_copysign (1.1, y));
> +}
> +
> +double
> +f2 (double y)
> +{
> +  return (-1.2 * __builtin_copysign (1.1, y));
> +}
> +
> +/* { dg-final { scan-tree-dump-not "Optimizing copysign" "reassoc1" } } */
> diff --git gcc/tree-ssa-reassoc.c gcc/tree-ssa-reassoc.c
> index 879722e..b8897b7 100644
> --- gcc/tree-ssa-reassoc.c
> +++ gcc/tree-ssa-reassoc.c
> @@ -4622,6 +4622,95 @@ attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops)
>    return result;
>  }
>  
> +/* Attempt to optimize
> +   CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
> +   CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0.  */
> +
> +static void
> +attempt_builtin_copysign (vec<operand_entry *> *ops)
> +{
> +  operand_entry *oe;
> +  unsigned int i;
> +  unsigned int length = ops->length ();
> +  tree cst1 = ops->last ()->op;
> +
> +  if (length == 1 || TREE_CODE (cst1) != REAL_CST)
> +    return;
> +
> +  FOR_EACH_VEC_ELT (*ops, i, oe)
> +    {
> +      if (TREE_CODE (oe->op) == SSA_NAME)

I think you need to check whether the SSA_NAME has a single use only
as you are changing its value.  Which also means you shouldn't be
"reusing" it (because existing debug stmts will then be wrong).
Thus you have to replace it.

> +	{
> +	  gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
> +	  if (is_gimple_call (def_stmt))
> +	    {
> +	      tree fndecl = gimple_call_fndecl (def_stmt);
> +	      tree cst2;
> +	      switch (DECL_FUNCTION_CODE (fndecl))
> +		{
> +		CASE_FLT_FN (BUILT_IN_COPYSIGN):
> +		  cst2 = gimple_call_arg (def_stmt, 0);
> +		  /* The first argument of copysign must be a constant,
> +		     otherwise there's nothing to do.  */
> +		  if (TREE_CODE (cst2) == REAL_CST)
> +		    {
> +		      tree mul = const_binop (MULT_EXPR, TREE_TYPE (cst1),
> +					      cst1, cst2);
> +		      /* If we couldn't fold to a single constant, skip it.  */
> +		      if (mul == NULL_TREE)
> +			break;
> +		      /* We're going to replace the copysign argument with
> +			 the multiplication product.  Remove the constant.  */
> +		      ops->pop ();
> +		      gimple_call_set_arg (def_stmt, 0, mul);
> +		      bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst1));
> +		      /* Handle the CST1 < 0 case -- negate the result.  */
> +		      if (cst1_neg)
> +			{
> +			  tree lhs = gimple_call_lhs (def_stmt);
> +			  tree negrhs = make_ssa_name (TREE_TYPE (lhs));
> +			  gimple *negate_stmt
> +			    = gimple_build_assign (negrhs, NEGATE_EXPR, lhs);
> +			  insert_stmt_after (negate_stmt, def_stmt);
> +			  ops->ordered_remove (i);
> +			  add_to_ops_vec (ops, negrhs);

Why use ordered remove and add_to_ops_vec here?  Just replace the entry?

Thanks,
Richard.

> +			}
> +		      if (dump_file && (dump_flags & TDF_DETAILS))
> +			{
> +			  fprintf (dump_file, "Optimizing copysign: ");
> +			  print_generic_expr (dump_file, cst1, 0);
> +			  fprintf (dump_file, " * ");
> +			  print_generic_expr (dump_file,
> +					      gimple_call_fn (def_stmt), 0);
> +			  fprintf (dump_file, " (");
> +			  print_generic_expr (dump_file, cst2, 0);
> +			  fprintf (dump_file, ", ");
> +			  print_generic_expr (dump_file,
> +					      gimple_call_arg (def_stmt, 1),
> +					      0);
> +			  fprintf (dump_file, ") into %s",
> +				   cst1_neg ? "-" : "");
> +			  print_generic_expr (dump_file,
> +					      gimple_call_fn (def_stmt), 0);
> +			  fprintf (dump_file, " (");
> +			  print_generic_expr (dump_file, mul, 0);
> +			  fprintf (dump_file, ", ");
> +			  print_generic_expr (dump_file,
> +					      gimple_call_arg (def_stmt, 1),
> +					      0);
> +			  fprintf (dump_file, "\n");
> +			}
> +		      return;
> +		    }
> +		  break;
> +		default:
> +		  break;
> +		}
> +	    }
> +	}
> +    }
> +}
> +
>  /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS.  */
>  
>  static void
> @@ -4764,6 +4853,9 @@ reassociate_bb (basic_block bb)
>  	      if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
>  		optimize_range_tests (rhs_code, &ops);
>  
> +	      if (rhs_code == MULT_EXPR)
> +		attempt_builtin_copysign (&ops);
> +
>  	      if (first_pass_instance
>  		  && rhs_code == MULT_EXPR
>  		  && flag_unsafe_math_optimizations)
> 
> 	Marek
> 
> 

-- 
Richard Biener <rguenther@suse.de>
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] Optimize const1 * copysign (const2, y) in reassoc (PR tree-optimization/67815)
  2015-10-14  9:10 ` Richard Biener
@ 2015-10-14 12:20   ` Marek Polacek
  2015-10-14 12:44     ` Richard Biener
  0 siblings, 1 reply; 4+ messages in thread
From: Marek Polacek @ 2015-10-14 12:20 UTC (permalink / raw)
  To: Richard Biener; +Cc: GCC Patches, Jakub Jelinek, Joseph Myers

On Wed, Oct 14, 2015 at 11:10:38AM +0200, Richard Biener wrote:
> > +  FOR_EACH_VEC_ELT (*ops, i, oe)
> > +    {
> > +      if (TREE_CODE (oe->op) == SSA_NAME)
> 
> I think you need to check whether the SSA_NAME has a single use only
> as you are changing its value.  Which also means you shouldn't be
> "reusing" it (because existing debug stmts will then be wrong).
> Thus you have to replace it.
 
Changed as per our discussion on IRC.  I'm building a new call while
the old one is going to be cleaned up by subsequent DCE.

> > +			  ops->ordered_remove (i);
> > +			  add_to_ops_vec (ops, negrhs);
> 
> Why use ordered remove and add_to_ops_vec here?  Just replace the entry?

Fixed.

I also added a new test.

Bootstrapped/regtested on x86_64-linux, ok for trunk?

2015-10-14  Marek Polacek  <polacek@redhat.com>

	PR tree-optimization/67815
	* tree-ssa-reassoc.c (attempt_builtin_copysign): New function.
	(reassociate_bb): Call it.

	* gcc.dg/tree-ssa/reassoc-39.c: New test.
	* gcc.dg/tree-ssa/reassoc-40.c: New test.
	* gcc.dg/tree-ssa/reassoc-41.c: New test.

diff --git gcc/testsuite/gcc.dg/tree-ssa/reassoc-39.c gcc/testsuite/gcc.dg/tree-ssa/reassoc-39.c
index e69de29..589d06b 100644
--- gcc/testsuite/gcc.dg/tree-ssa/reassoc-39.c
+++ gcc/testsuite/gcc.dg/tree-ssa/reassoc-39.c
@@ -0,0 +1,41 @@
+/* PR tree-optimization/67815 */
+/* { dg-do compile } */
+/* { dg-options "-Ofast -fdump-tree-reassoc1-details" } */
+
+float
+f0 (float x)
+{
+  return 7.5 * __builtin_copysignf (2.0, x);
+}
+
+float
+f1 (float x)
+{
+  return -7.5 * __builtin_copysignf (2.0, x);
+}
+
+double
+f2 (double x, double y)
+{
+  return x * ((1.0/12) * __builtin_copysign (1.0, y));
+}
+
+double
+f3 (double x, double y)
+{
+  return (x * (-1.0/12)) * __builtin_copysign (1.0, y);
+}
+
+double
+f4 (double x, double y, double z)
+{
+  return (x * z) * ((1.0/12) * __builtin_copysign (4.0, y));
+}
+
+double
+f5 (double x, double y, double z)
+{
+  return (x * (-1.0/12)) * z * __builtin_copysign (2.0, y);
+}
+
+/* { dg-final { scan-tree-dump-times "Optimizing copysign" 6 "reassoc1"} }*/
diff --git gcc/testsuite/gcc.dg/tree-ssa/reassoc-40.c gcc/testsuite/gcc.dg/tree-ssa/reassoc-40.c
index e69de29..d65bcc1b 100644
--- gcc/testsuite/gcc.dg/tree-ssa/reassoc-40.c
+++ gcc/testsuite/gcc.dg/tree-ssa/reassoc-40.c
@@ -0,0 +1,21 @@
+/* PR tree-optimization/67815 */
+/* { dg-do compile } */
+/* { dg-options "-Ofast -frounding-math -fdump-tree-reassoc1-details" } */
+
+/* Test that the copysign reassoc optimization doesn't fire for
+   -frounding-math (i.e. HONOR_SIGN_DEPENDENT_ROUNDING) if the multiplication
+   is inexact.  */
+
+double
+f1 (double y)
+{
+  return (1.2 * __builtin_copysign (1.1, y));
+}
+
+double
+f2 (double y)
+{
+  return (-1.2 * __builtin_copysign (1.1, y));
+}
+
+/* { dg-final { scan-tree-dump-not "Optimizing copysign" "reassoc1" } } */
diff --git gcc/testsuite/gcc.dg/tree-ssa/reassoc-41.c gcc/testsuite/gcc.dg/tree-ssa/reassoc-41.c
index e69de29..8a18b88 100644
--- gcc/testsuite/gcc.dg/tree-ssa/reassoc-41.c
+++ gcc/testsuite/gcc.dg/tree-ssa/reassoc-41.c
@@ -0,0 +1,21 @@
+/* PR tree-optimization/67815 */
+/* { dg-do compile } */
+/* { dg-options "-Ofast -fno-rounding-math -fdump-tree-reassoc1-details" } */
+
+/* Test that the copysign reassoc optimization does fire for
+   -fno-rounding-math (i.e. HONOR_SIGN_DEPENDENT_ROUNDING) if the multiplication
+   is inexact.  */
+
+double
+f1 (double y)
+{
+  return (1.2 * __builtin_copysign (1.1, y));
+}
+
+double
+f2 (double y)
+{
+  return (-1.2 * __builtin_copysign (1.1, y));
+}
+
+/* { dg-final { scan-tree-dump-times "Optimizing copysign" 2 "reassoc1"} }*/
diff --git gcc/tree-ssa-reassoc.c gcc/tree-ssa-reassoc.c
index 879722e..62438dd 100644
--- gcc/tree-ssa-reassoc.c
+++ gcc/tree-ssa-reassoc.c
@@ -4622,6 +4622,102 @@ attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops)
   return result;
 }
 
+/* Attempt to optimize
+   CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
+   CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0.  */
+
+static void
+attempt_builtin_copysign (vec<operand_entry *> *ops)
+{
+  operand_entry *oe;
+  unsigned int i;
+  unsigned int length = ops->length ();
+  tree cst = ops->last ()->op;
+
+  if (length == 1 || TREE_CODE (cst) != REAL_CST)
+    return;
+
+  FOR_EACH_VEC_ELT (*ops, i, oe)
+    {
+      if (TREE_CODE (oe->op) == SSA_NAME
+	  && has_single_use (oe->op))
+	{
+	  gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
+	  if (is_gimple_call (def_stmt))
+	    {
+	      tree fndecl = gimple_call_fndecl (def_stmt);
+	      tree arg0, arg1;
+	      switch (DECL_FUNCTION_CODE (fndecl))
+		{
+		CASE_FLT_FN (BUILT_IN_COPYSIGN):
+		  arg0 = gimple_call_arg (def_stmt, 0);
+		  arg1 = gimple_call_arg (def_stmt, 1);
+		  /* The first argument of copysign must be a constant,
+		     otherwise there's nothing to do.  */
+		  if (TREE_CODE (arg0) == REAL_CST)
+		    {
+		      tree mul = const_binop (MULT_EXPR, TREE_TYPE (cst),
+					      cst, arg0);
+		      /* If we couldn't fold to a single constant, skip it.
+			 That happens e.g. for inexact multiplication when
+			 -frounding-math.  */
+		      if (mul == NULL_TREE)
+			break;
+		      /* Instead of adjusting the old DEF_STMT, let's build
+			 a new call to not leak the LHS and prevent keeping
+			 bogus debug statements.  DCE will clean up the old
+			 call.  */
+		      gcall *call = gimple_build_call (fndecl, 2, mul, arg1);
+		      tree lhs = make_ssa_name (TREE_TYPE (arg0));
+		      gimple_call_set_lhs (call, lhs);
+		      gimple_set_location (call, gimple_location (def_stmt));
+		      insert_stmt_after (call, def_stmt);
+		      /* We've used the constant, get rid of it.  */
+		      ops->pop ();
+		      bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst));
+		      /* Handle the CST1 < 0 case by negating the result.  */
+		      if (cst1_neg)
+			{
+			  tree negrhs = make_ssa_name (TREE_TYPE (lhs));
+			  gimple *negate_stmt
+			    = gimple_build_assign (negrhs, NEGATE_EXPR, lhs);
+			  insert_stmt_after (negate_stmt, call);
+			  oe->op = negrhs;
+			}
+		      else
+			oe->op = lhs;
+		      if (dump_file && (dump_flags & TDF_DETAILS))
+			{
+			  fprintf (dump_file, "Optimizing copysign: ");
+			  print_generic_expr (dump_file, cst, 0);
+			  fprintf (dump_file, " * ");
+			  print_generic_expr (dump_file,
+					      gimple_call_fn (def_stmt), 0);
+			  fprintf (dump_file, " (");
+			  print_generic_expr (dump_file, arg0, 0);
+			  fprintf (dump_file, ", ");
+			  print_generic_expr (dump_file, arg1, 0);
+			  fprintf (dump_file, ") into %s",
+				   cst1_neg ? "-" : "");
+			  print_generic_expr (dump_file,
+					      gimple_call_fn (def_stmt), 0);
+			  fprintf (dump_file, " (");
+			  print_generic_expr (dump_file, mul, 0);
+			  fprintf (dump_file, ", ");
+			  print_generic_expr (dump_file, arg1, 0);
+			  fprintf (dump_file, "\n");
+			}
+		      return;
+		    }
+		  break;
+		default:
+		  break;
+		}
+	    }
+	}
+    }
+}
+
 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS.  */
 
 static void
@@ -4764,6 +4860,9 @@ reassociate_bb (basic_block bb)
 	      if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
 		optimize_range_tests (rhs_code, &ops);
 
+	      if (rhs_code == MULT_EXPR)
+		attempt_builtin_copysign (&ops);
+
 	      if (first_pass_instance
 		  && rhs_code == MULT_EXPR
 		  && flag_unsafe_math_optimizations)

	Marek

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] Optimize const1 * copysign (const2, y) in reassoc (PR tree-optimization/67815)
  2015-10-14 12:20   ` Marek Polacek
@ 2015-10-14 12:44     ` Richard Biener
  0 siblings, 0 replies; 4+ messages in thread
From: Richard Biener @ 2015-10-14 12:44 UTC (permalink / raw)
  To: Marek Polacek; +Cc: GCC Patches, Jakub Jelinek, Joseph Myers

On Wed, 14 Oct 2015, Marek Polacek wrote:

> On Wed, Oct 14, 2015 at 11:10:38AM +0200, Richard Biener wrote:
> > > +  FOR_EACH_VEC_ELT (*ops, i, oe)
> > > +    {
> > > +      if (TREE_CODE (oe->op) == SSA_NAME)
> > 
> > I think you need to check whether the SSA_NAME has a single use only
> > as you are changing its value.  Which also means you shouldn't be
> > "reusing" it (because existing debug stmts will then be wrong).
> > Thus you have to replace it.
>  
> Changed as per our discussion on IRC.  I'm building a new call while
> the old one is going to be cleaned up by subsequent DCE.
> 
> > > +			  ops->ordered_remove (i);
> > > +			  add_to_ops_vec (ops, negrhs);
> > 
> > Why use ordered remove and add_to_ops_vec here?  Just replace the entry?
> 
> Fixed.
> 
> I also added a new test.
> 
> Bootstrapped/regtested on x86_64-linux, ok for trunk?

Ok.

Thanks,
Richard.

> 2015-10-14  Marek Polacek  <polacek@redhat.com>
> 
> 	PR tree-optimization/67815
> 	* tree-ssa-reassoc.c (attempt_builtin_copysign): New function.
> 	(reassociate_bb): Call it.
> 
> 	* gcc.dg/tree-ssa/reassoc-39.c: New test.
> 	* gcc.dg/tree-ssa/reassoc-40.c: New test.
> 	* gcc.dg/tree-ssa/reassoc-41.c: New test.
> 
> diff --git gcc/testsuite/gcc.dg/tree-ssa/reassoc-39.c gcc/testsuite/gcc.dg/tree-ssa/reassoc-39.c
> index e69de29..589d06b 100644
> --- gcc/testsuite/gcc.dg/tree-ssa/reassoc-39.c
> +++ gcc/testsuite/gcc.dg/tree-ssa/reassoc-39.c
> @@ -0,0 +1,41 @@
> +/* PR tree-optimization/67815 */
> +/* { dg-do compile } */
> +/* { dg-options "-Ofast -fdump-tree-reassoc1-details" } */
> +
> +float
> +f0 (float x)
> +{
> +  return 7.5 * __builtin_copysignf (2.0, x);
> +}
> +
> +float
> +f1 (float x)
> +{
> +  return -7.5 * __builtin_copysignf (2.0, x);
> +}
> +
> +double
> +f2 (double x, double y)
> +{
> +  return x * ((1.0/12) * __builtin_copysign (1.0, y));
> +}
> +
> +double
> +f3 (double x, double y)
> +{
> +  return (x * (-1.0/12)) * __builtin_copysign (1.0, y);
> +}
> +
> +double
> +f4 (double x, double y, double z)
> +{
> +  return (x * z) * ((1.0/12) * __builtin_copysign (4.0, y));
> +}
> +
> +double
> +f5 (double x, double y, double z)
> +{
> +  return (x * (-1.0/12)) * z * __builtin_copysign (2.0, y);
> +}
> +
> +/* { dg-final { scan-tree-dump-times "Optimizing copysign" 6 "reassoc1"} }*/
> diff --git gcc/testsuite/gcc.dg/tree-ssa/reassoc-40.c gcc/testsuite/gcc.dg/tree-ssa/reassoc-40.c
> index e69de29..d65bcc1b 100644
> --- gcc/testsuite/gcc.dg/tree-ssa/reassoc-40.c
> +++ gcc/testsuite/gcc.dg/tree-ssa/reassoc-40.c
> @@ -0,0 +1,21 @@
> +/* PR tree-optimization/67815 */
> +/* { dg-do compile } */
> +/* { dg-options "-Ofast -frounding-math -fdump-tree-reassoc1-details" } */
> +
> +/* Test that the copysign reassoc optimization doesn't fire for
> +   -frounding-math (i.e. HONOR_SIGN_DEPENDENT_ROUNDING) if the multiplication
> +   is inexact.  */
> +
> +double
> +f1 (double y)
> +{
> +  return (1.2 * __builtin_copysign (1.1, y));
> +}
> +
> +double
> +f2 (double y)
> +{
> +  return (-1.2 * __builtin_copysign (1.1, y));
> +}
> +
> +/* { dg-final { scan-tree-dump-not "Optimizing copysign" "reassoc1" } } */
> diff --git gcc/testsuite/gcc.dg/tree-ssa/reassoc-41.c gcc/testsuite/gcc.dg/tree-ssa/reassoc-41.c
> index e69de29..8a18b88 100644
> --- gcc/testsuite/gcc.dg/tree-ssa/reassoc-41.c
> +++ gcc/testsuite/gcc.dg/tree-ssa/reassoc-41.c
> @@ -0,0 +1,21 @@
> +/* PR tree-optimization/67815 */
> +/* { dg-do compile } */
> +/* { dg-options "-Ofast -fno-rounding-math -fdump-tree-reassoc1-details" } */
> +
> +/* Test that the copysign reassoc optimization does fire for
> +   -fno-rounding-math (i.e. HONOR_SIGN_DEPENDENT_ROUNDING) if the multiplication
> +   is inexact.  */
> +
> +double
> +f1 (double y)
> +{
> +  return (1.2 * __builtin_copysign (1.1, y));
> +}
> +
> +double
> +f2 (double y)
> +{
> +  return (-1.2 * __builtin_copysign (1.1, y));
> +}
> +
> +/* { dg-final { scan-tree-dump-times "Optimizing copysign" 2 "reassoc1"} }*/
> diff --git gcc/tree-ssa-reassoc.c gcc/tree-ssa-reassoc.c
> index 879722e..62438dd 100644
> --- gcc/tree-ssa-reassoc.c
> +++ gcc/tree-ssa-reassoc.c
> @@ -4622,6 +4622,102 @@ attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops)
>    return result;
>  }
>  
> +/* Attempt to optimize
> +   CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
> +   CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0.  */
> +
> +static void
> +attempt_builtin_copysign (vec<operand_entry *> *ops)
> +{
> +  operand_entry *oe;
> +  unsigned int i;
> +  unsigned int length = ops->length ();
> +  tree cst = ops->last ()->op;
> +
> +  if (length == 1 || TREE_CODE (cst) != REAL_CST)
> +    return;
> +
> +  FOR_EACH_VEC_ELT (*ops, i, oe)
> +    {
> +      if (TREE_CODE (oe->op) == SSA_NAME
> +	  && has_single_use (oe->op))
> +	{
> +	  gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
> +	  if (is_gimple_call (def_stmt))
> +	    {
> +	      tree fndecl = gimple_call_fndecl (def_stmt);
> +	      tree arg0, arg1;
> +	      switch (DECL_FUNCTION_CODE (fndecl))
> +		{
> +		CASE_FLT_FN (BUILT_IN_COPYSIGN):
> +		  arg0 = gimple_call_arg (def_stmt, 0);
> +		  arg1 = gimple_call_arg (def_stmt, 1);
> +		  /* The first argument of copysign must be a constant,
> +		     otherwise there's nothing to do.  */
> +		  if (TREE_CODE (arg0) == REAL_CST)
> +		    {
> +		      tree mul = const_binop (MULT_EXPR, TREE_TYPE (cst),
> +					      cst, arg0);
> +		      /* If we couldn't fold to a single constant, skip it.
> +			 That happens e.g. for inexact multiplication when
> +			 -frounding-math.  */
> +		      if (mul == NULL_TREE)
> +			break;
> +		      /* Instead of adjusting the old DEF_STMT, let's build
> +			 a new call to not leak the LHS and prevent keeping
> +			 bogus debug statements.  DCE will clean up the old
> +			 call.  */
> +		      gcall *call = gimple_build_call (fndecl, 2, mul, arg1);
> +		      tree lhs = make_ssa_name (TREE_TYPE (arg0));
> +		      gimple_call_set_lhs (call, lhs);
> +		      gimple_set_location (call, gimple_location (def_stmt));
> +		      insert_stmt_after (call, def_stmt);
> +		      /* We've used the constant, get rid of it.  */
> +		      ops->pop ();
> +		      bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst));
> +		      /* Handle the CST1 < 0 case by negating the result.  */
> +		      if (cst1_neg)
> +			{
> +			  tree negrhs = make_ssa_name (TREE_TYPE (lhs));
> +			  gimple *negate_stmt
> +			    = gimple_build_assign (negrhs, NEGATE_EXPR, lhs);
> +			  insert_stmt_after (negate_stmt, call);
> +			  oe->op = negrhs;
> +			}
> +		      else
> +			oe->op = lhs;
> +		      if (dump_file && (dump_flags & TDF_DETAILS))
> +			{
> +			  fprintf (dump_file, "Optimizing copysign: ");
> +			  print_generic_expr (dump_file, cst, 0);
> +			  fprintf (dump_file, " * ");
> +			  print_generic_expr (dump_file,
> +					      gimple_call_fn (def_stmt), 0);
> +			  fprintf (dump_file, " (");
> +			  print_generic_expr (dump_file, arg0, 0);
> +			  fprintf (dump_file, ", ");
> +			  print_generic_expr (dump_file, arg1, 0);
> +			  fprintf (dump_file, ") into %s",
> +				   cst1_neg ? "-" : "");
> +			  print_generic_expr (dump_file,
> +					      gimple_call_fn (def_stmt), 0);
> +			  fprintf (dump_file, " (");
> +			  print_generic_expr (dump_file, mul, 0);
> +			  fprintf (dump_file, ", ");
> +			  print_generic_expr (dump_file, arg1, 0);
> +			  fprintf (dump_file, "\n");
> +			}
> +		      return;
> +		    }
> +		  break;
> +		default:
> +		  break;
> +		}
> +	    }
> +	}
> +    }
> +}
> +
>  /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS.  */
>  
>  static void
> @@ -4764,6 +4860,9 @@ reassociate_bb (basic_block bb)
>  	      if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
>  		optimize_range_tests (rhs_code, &ops);
>  
> +	      if (rhs_code == MULT_EXPR)
> +		attempt_builtin_copysign (&ops);
> +
>  	      if (first_pass_instance
>  		  && rhs_code == MULT_EXPR
>  		  && flag_unsafe_math_optimizations)
> 
> 	Marek
> 
> 

-- 
Richard Biener <rguenther@suse.de>
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2015-10-14 12:44 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-10-13 16:28 [PATCH] Optimize const1 * copysign (const2, y) in reassoc (PR tree-optimization/67815) Marek Polacek
2015-10-14  9:10 ` Richard Biener
2015-10-14 12:20   ` Marek Polacek
2015-10-14 12:44     ` Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).