public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] Optimize const1 * copysign (const2, y) in reassoc (PR tree-optimization/67815)
@ 2015-10-13 16:28 Marek Polacek
  2015-10-14  9:10 ` Richard Biener
  0 siblings, 1 reply; 4+ messages in thread
From: Marek Polacek @ 2015-10-13 16:28 UTC (permalink / raw)
  To: GCC Patches, Jakub Jelinek, Richard Biener, Joseph Myers

This patch implements the copysign optimization for reassoc I promised
I'd look into.  I.e.,

CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0
CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0

After getting familiar with reassoc a bit this wasn't that hard.  But
I'm hopeless when it comes to floating-point stuff, so I'd appreciate
if you could glance over the tests.  The reassoc-40.c should address
Joseph's comment in the audit trail (with -fno-rounding-math the
optimization would take place).

For 0.0 * copysign (cst, x), the result is folded into 0.0 way before
reassoc, so we probably don't have to pay attention to this case.

Bootstrapped/regtested on x86_64-linux, ok for trunk?

2015-10-13  Marek Polacek  <polacek@redhat.com>

	PR tree-optimization/67815
	* tree-ssa-reassoc.c (attempt_builtin_copysign): New function.
	(reassociate_bb): Call it.

	* gcc.dg/tree-ssa/reassoc-39.c: New test.
	* gcc.dg/tree-ssa/reassoc-40.c: New test.

diff --git gcc/testsuite/gcc.dg/tree-ssa/reassoc-39.c gcc/testsuite/gcc.dg/tree-ssa/reassoc-39.c
index e69de29..589d06b 100644
--- gcc/testsuite/gcc.dg/tree-ssa/reassoc-39.c
+++ gcc/testsuite/gcc.dg/tree-ssa/reassoc-39.c
@@ -0,0 +1,41 @@
+/* PR tree-optimization/67815 */
+/* { dg-do compile } */
+/* { dg-options "-Ofast -fdump-tree-reassoc1-details" } */
+
+float
+f0 (float x)
+{
+  return 7.5 * __builtin_copysignf (2.0, x);
+}
+
+float
+f1 (float x)
+{
+  return -7.5 * __builtin_copysignf (2.0, x);
+}
+
+double
+f2 (double x, double y)
+{
+  return x * ((1.0/12) * __builtin_copysign (1.0, y));
+}
+
+double
+f3 (double x, double y)
+{
+  return (x * (-1.0/12)) * __builtin_copysign (1.0, y);
+}
+
+double
+f4 (double x, double y, double z)
+{
+  return (x * z) * ((1.0/12) * __builtin_copysign (4.0, y));
+}
+
+double
+f5 (double x, double y, double z)
+{
+  return (x * (-1.0/12)) * z * __builtin_copysign (2.0, y);
+}
+
+/* { dg-final { scan-tree-dump-times "Optimizing copysign" 6 "reassoc1"} }*/
diff --git gcc/testsuite/gcc.dg/tree-ssa/reassoc-40.c gcc/testsuite/gcc.dg/tree-ssa/reassoc-40.c
index e69de29..d65bcc1b 100644
--- gcc/testsuite/gcc.dg/tree-ssa/reassoc-40.c
+++ gcc/testsuite/gcc.dg/tree-ssa/reassoc-40.c
@@ -0,0 +1,21 @@
+/* PR tree-optimization/67815 */
+/* { dg-do compile } */
+/* { dg-options "-Ofast -frounding-math -fdump-tree-reassoc1-details" } */
+
+/* Test that the copysign reassoc optimization doesn't fire for
+   -frounding-math (i.e. HONOR_SIGN_DEPENDENT_ROUNDING) if the multiplication
+   is inexact.  */
+
+double
+f1 (double y)
+{
+  return (1.2 * __builtin_copysign (1.1, y));
+}
+
+double
+f2 (double y)
+{
+  return (-1.2 * __builtin_copysign (1.1, y));
+}
+
+/* { dg-final { scan-tree-dump-not "Optimizing copysign" "reassoc1" } } */
diff --git gcc/tree-ssa-reassoc.c gcc/tree-ssa-reassoc.c
index 879722e..b8897b7 100644
--- gcc/tree-ssa-reassoc.c
+++ gcc/tree-ssa-reassoc.c
@@ -4622,6 +4622,95 @@ attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops)
   return result;
 }
 
+/* Attempt to optimize
+   CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
+   CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0.  */
+
+static void
+attempt_builtin_copysign (vec<operand_entry *> *ops)
+{
+  operand_entry *oe;
+  unsigned int i;
+  unsigned int length = ops->length ();
+  tree cst1 = ops->last ()->op;
+
+  if (length == 1 || TREE_CODE (cst1) != REAL_CST)
+    return;
+
+  FOR_EACH_VEC_ELT (*ops, i, oe)
+    {
+      if (TREE_CODE (oe->op) == SSA_NAME)
+	{
+	  gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
+	  if (is_gimple_call (def_stmt))
+	    {
+	      tree fndecl = gimple_call_fndecl (def_stmt);
+	      tree cst2;
+	      switch (DECL_FUNCTION_CODE (fndecl))
+		{
+		CASE_FLT_FN (BUILT_IN_COPYSIGN):
+		  cst2 = gimple_call_arg (def_stmt, 0);
+		  /* The first argument of copysign must be a constant,
+		     otherwise there's nothing to do.  */
+		  if (TREE_CODE (cst2) == REAL_CST)
+		    {
+		      tree mul = const_binop (MULT_EXPR, TREE_TYPE (cst1),
+					      cst1, cst2);
+		      /* If we couldn't fold to a single constant, skip it.  */
+		      if (mul == NULL_TREE)
+			break;
+		      /* We're going to replace the copysign argument with
+			 the multiplication product.  Remove the constant.  */
+		      ops->pop ();
+		      gimple_call_set_arg (def_stmt, 0, mul);
+		      bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst1));
+		      /* Handle the CST1 < 0 case -- negate the result.  */
+		      if (cst1_neg)
+			{
+			  tree lhs = gimple_call_lhs (def_stmt);
+			  tree negrhs = make_ssa_name (TREE_TYPE (lhs));
+			  gimple *negate_stmt
+			    = gimple_build_assign (negrhs, NEGATE_EXPR, lhs);
+			  insert_stmt_after (negate_stmt, def_stmt);
+			  ops->ordered_remove (i);
+			  add_to_ops_vec (ops, negrhs);
+			}
+		      if (dump_file && (dump_flags & TDF_DETAILS))
+			{
+			  fprintf (dump_file, "Optimizing copysign: ");
+			  print_generic_expr (dump_file, cst1, 0);
+			  fprintf (dump_file, " * ");
+			  print_generic_expr (dump_file,
+					      gimple_call_fn (def_stmt), 0);
+			  fprintf (dump_file, " (");
+			  print_generic_expr (dump_file, cst2, 0);
+			  fprintf (dump_file, ", ");
+			  print_generic_expr (dump_file,
+					      gimple_call_arg (def_stmt, 1),
+					      0);
+			  fprintf (dump_file, ") into %s",
+				   cst1_neg ? "-" : "");
+			  print_generic_expr (dump_file,
+					      gimple_call_fn (def_stmt), 0);
+			  fprintf (dump_file, " (");
+			  print_generic_expr (dump_file, mul, 0);
+			  fprintf (dump_file, ", ");
+			  print_generic_expr (dump_file,
+					      gimple_call_arg (def_stmt, 1),
+					      0);
+			  fprintf (dump_file, "\n");
+			}
+		      return;
+		    }
+		  break;
+		default:
+		  break;
+		}
+	    }
+	}
+    }
+}
+
 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS.  */
 
 static void
@@ -4764,6 +4853,9 @@ reassociate_bb (basic_block bb)
 	      if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
 		optimize_range_tests (rhs_code, &ops);
 
+	      if (rhs_code == MULT_EXPR)
+		attempt_builtin_copysign (&ops);
+
 	      if (first_pass_instance
 		  && rhs_code == MULT_EXPR
 		  && flag_unsafe_math_optimizations)

	Marek

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2015-10-14 12:44 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-10-13 16:28 [PATCH] Optimize const1 * copysign (const2, y) in reassoc (PR tree-optimization/67815) Marek Polacek
2015-10-14  9:10 ` Richard Biener
2015-10-14 12:20   ` Marek Polacek
2015-10-14 12:44     ` Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).