public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] More pow(x,c) expansions in cse_sincos pass (PR46728, patch 3)
@ 2011-05-25 19:18 William J. Schmidt
  2011-05-26 10:33 ` Richard Guenther
  0 siblings, 1 reply; 6+ messages in thread
From: William J. Schmidt @ 2011-05-25 19:18 UTC (permalink / raw)
  To: gcc-patches; +Cc: richard.guenther

This patch adds logic to gimple_expand_builtin_pow () to optimize
pow(x,y), where y is one of 0.5, 0.25, 0.75, 1./3., or 1./6.  I noticed
that there were two missing calls to gimple_set_location () in my
previous patch, so I've corrected those here as well.

There's one TODO comment in this patch.  I don't believe the test for
TREE_SIDE_EFFECTS (arg0) should be necessary; but I'm not convinced it
was necessary in the code whence I copied it, either, so I left it in
for comment in case I'm misunderstanding something.


2011-05-25  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>

	PR tree-optimization/46728
	* tree-ssa-math-opts.c (powi_as_mults_1): Add gimple_set_location.
	(powi_as_mults): Add gimple_set_location.
	(build_and_insert_call): New.
	(gimple_expand_builtin_pow): Add handling for pow(x,y) when y is
	0.5, 0.25, 0.75, 1./3., or 1./6.
	
Index: gcc/tree-ssa-math-opts.c
===================================================================
--- gcc/tree-ssa-math-opts.c	(revision 174199)
+++ gcc/tree-ssa-math-opts.c	(working copy)
@@ -965,6 +965,7 @@ powi_as_mults_1 (gimple_stmt_iterator *gsi, locati
     }
 
   mult_stmt = gimple_build_assign_with_ops (MULT_EXPR, ssa_target, op0, op1);
+  gimple_set_location (mult_stmt, loc);
   gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT);
 
   return ssa_target;
@@ -999,6 +1000,7 @@ powi_as_mults (gimple_stmt_iterator *gsi, location
   div_stmt = gimple_build_assign_with_ops (RDIV_EXPR, target, 
 					   build_real (type, dconst1),
 					   result);
+  gimple_set_location (div_stmt, loc);
   gsi_insert_before (gsi, div_stmt, GSI_SAME_STMT);
 
   return target;
@@ -1024,6 +1026,34 @@ gimple_expand_builtin_powi (gimple_stmt_iterator *
   return NULL_TREE;
 }
 
+/* Build a gimple call statement that calls FN with argument ARG.
+   Set the lhs of the call statement to a fresh SSA name for
+   variable VAR.  If VAR is NULL, first allocate it.  Insert the
+   statement prior to GSI's current position, and return the fresh
+   SSA name.  */
+
+static tree
+build_and_insert_call (gimple_stmt_iterator *gsi, tree fn, tree arg,
+		       tree *var, location_t loc)
+{
+  gimple call_stmt;
+  tree ssa_target;
+
+  if (!*var)
+    {
+      *var = create_tmp_var (TREE_TYPE (arg), "powroot");
+      add_referenced_var (*var);
+    }
+
+  call_stmt = gimple_build_call (fn, 1, arg);
+  ssa_target = make_ssa_name (*var, NULL);
+  gimple_set_lhs (call_stmt, ssa_target);
+  gimple_set_location (call_stmt, loc);
+  gsi_insert_before (gsi, call_stmt, GSI_SAME_STMT);
+
+  return ssa_target;
+}
+
 /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
    with location info LOC.  If possible, create an equivalent and
    less expensive sequence of statements prior to GSI, and return an
@@ -1035,6 +1065,8 @@ gimple_expand_builtin_pow (gimple_stmt_iterator *g
 {
   REAL_VALUE_TYPE c, cint;
   HOST_WIDE_INT n;
+  tree type, sqrtfn, target = NULL_TREE;
+  enum machine_mode mode;
 
   /* If the exponent isn't a constant, there's nothing of interest
      to be done.  */
@@ -1054,6 +1086,108 @@ gimple_expand_builtin_pow (gimple_stmt_iterator *g
 	      && powi_cost (n) <= POWI_MAX_MULTS)))
     return gimple_expand_builtin_powi (gsi, loc, arg0, n);
 
+  /* Attempt various optimizations using sqrt and cbrt.  */
+  type = TREE_TYPE (arg0);
+  mode = TYPE_MODE (type);
+  sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
+
+  if (flag_unsafe_math_optimizations && sqrtfn != NULL_TREE)
+    {
+      REAL_VALUE_TYPE dconst1_4, dconst3_4;
+      tree cbrtfn;
+      bool hw_sqrt_exists;
+
+      /* Optimize pow(x,0.5) = sqrt(x).  */
+      if (REAL_VALUES_EQUAL (c, dconsthalf))
+	return build_and_insert_call (gsi, sqrtfn, arg0, &target, loc);
+
+      /* Optimize pow(x,0.25) = sqrt(sqrt(x)).  */
+      dconst1_4 = dconst1;
+      SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2);
+      hw_sqrt_exists = optab_handler(sqrt_optab, mode) != CODE_FOR_nothing;
+
+      if (REAL_VALUES_EQUAL (c, dconst1_4) && hw_sqrt_exists)
+	{
+	  tree sqrt_arg0;
+
+	  /* sqrt(x)  */
+	  sqrt_arg0 = build_and_insert_call (gsi, sqrtfn, arg0, &target, loc);
+
+	  /* sqrt(sqrt(x))  */
+	  return build_and_insert_call (gsi, sqrtfn, sqrt_arg0,	&target, loc);
+	}
+      
+      /* Optimize pow(x,0.75) = sqrt(x) * sqrt(sqrt(x)).  */
+      real_from_integer (&dconst3_4, VOIDmode, 3, 0, 0);
+      SET_REAL_EXP (&dconst3_4, REAL_EXP (&dconst3_4) - 2);
+
+      if (optimize_function_for_speed_p (cfun)
+	  && !TREE_SIDE_EFFECTS (arg0) /* TODO: is this needed?  */
+	  && REAL_VALUES_EQUAL (c, dconst3_4)
+	  && hw_sqrt_exists)
+	{
+	  tree sqrt_arg0, sqrt_sqrt, ssa_target;
+	  gimple mult_stmt;
+
+	  /* sqrt(x)  */
+	  sqrt_arg0 = build_and_insert_call (gsi, sqrtfn, arg0, &target, loc);
+
+	  /* sqrt(sqrt(x))  */
+	  sqrt_sqrt = build_and_insert_call (gsi, sqrtfn, sqrt_arg0,
+					     &target, loc);
+	  /* sqrt(x) * sqrt(sqrt(x))  */
+	  ssa_target = make_ssa_name (target, NULL);
+	  mult_stmt = gimple_build_assign_with_ops (MULT_EXPR, ssa_target,
+						    sqrt_arg0, sqrt_sqrt);
+	  gimple_set_location (mult_stmt, loc);
+	  gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT);
+
+	  return ssa_target;
+	}
+
+      /* Optimize pow(x,1./3.) = cbrt(x), and pow(x,1./6.) = cbrt(sqrt(x)).  */
+      cbrtfn = mathfn_built_in (type, BUILT_IN_CBRT);
+
+      if (cbrtfn != NULL_TREE)
+	{
+	  /* pow(x,1./3.) => cbrt(x).  */
+	  const REAL_VALUE_TYPE dconst1_3
+	    = real_value_truncate (mode, dconst_third ());
+
+	  if (REAL_VALUES_EQUAL (c, dconst1_3))
+	    return build_and_insert_call (gsi, cbrtfn, arg0, &target, loc);
+
+	  /* pow(x,1./6.) => cbrt(sqrt(x)).  */
+	  if (optimize_function_for_speed_p (cfun) && hw_sqrt_exists)
+	    {
+	      REAL_VALUE_TYPE dconst1_6 = dconst1_3;
+	      SET_REAL_EXP (&dconst1_6, REAL_EXP (&dconst1_6) - 1);
+
+	      if (REAL_VALUES_EQUAL (c, dconst1_6))
+		{
+		  tree sqrt_arg0;
+
+		  /* sqrt(x)  */
+		  sqrt_arg0 = build_and_insert_call (gsi, sqrtfn, arg0,
+						     &target, loc);
+		  /* cbrt(sqrt(x))  */
+		  return build_and_insert_call (gsi, cbrtfn, sqrt_arg0,
+						&target, loc);
+		}
+	    }
+	}
+    }
+
+  /* We allow one transformation when flag_unsafe_math_optimizations is
+     false.  Replacing pow(x,0.5) with sqrt(x) is safe, provided signed
+     zeros must not be maintained.  For x = -0, the former produces +0,
+     and the latter produces -0.  */
+  else if (sqrtfn != NULL_TREE
+	   && !HONOR_SIGNED_ZEROS (mode)
+	   && REAL_VALUES_EQUAL (c, dconsthalf))
+
+    return build_and_insert_call (gsi, sqrtfn, arg0, &target, loc);
+
   return NULL_TREE;
 }
 


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] More pow(x,c) expansions in cse_sincos pass (PR46728, patch 3)
  2011-05-25 19:18 [PATCH] More pow(x,c) expansions in cse_sincos pass (PR46728, patch 3) William J. Schmidt
@ 2011-05-26 10:33 ` Richard Guenther
  2011-05-26 18:40   ` William J. Schmidt
  0 siblings, 1 reply; 6+ messages in thread
From: Richard Guenther @ 2011-05-26 10:33 UTC (permalink / raw)
  To: William J. Schmidt; +Cc: gcc-patches

On Wed, May 25, 2011 at 7:43 PM, William J. Schmidt
<wschmidt@linux.vnet.ibm.com> wrote:
> This patch adds logic to gimple_expand_builtin_pow () to optimize
> pow(x,y), where y is one of 0.5, 0.25, 0.75, 1./3., or 1./6.  I noticed
> that there were two missing calls to gimple_set_location () in my
> previous patch, so I've corrected those here as well.
>
> There's one TODO comment in this patch.  I don't believe the test for
> TREE_SIDE_EFFECTS (arg0) should be necessary; but I'm not convinced it
> was necessary in the code whence I copied it, either, so I left it in
> for comment in case I'm misunderstanding something.
>
>
> 2011-05-25  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
>
>        PR tree-optimization/46728
>        * tree-ssa-math-opts.c (powi_as_mults_1): Add gimple_set_location.
>        (powi_as_mults): Add gimple_set_location.
>        (build_and_insert_call): New.
>        (gimple_expand_builtin_pow): Add handling for pow(x,y) when y is
>        0.5, 0.25, 0.75, 1./3., or 1./6.
>
> Index: gcc/tree-ssa-math-opts.c
> ===================================================================
> --- gcc/tree-ssa-math-opts.c    (revision 174199)
> +++ gcc/tree-ssa-math-opts.c    (working copy)
> @@ -965,6 +965,7 @@ powi_as_mults_1 (gimple_stmt_iterator *gsi, locati
>     }
>
>   mult_stmt = gimple_build_assign_with_ops (MULT_EXPR, ssa_target, op0, op1);
> +  gimple_set_location (mult_stmt, loc);
>   gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT);
>
>   return ssa_target;
> @@ -999,6 +1000,7 @@ powi_as_mults (gimple_stmt_iterator *gsi, location
>   div_stmt = gimple_build_assign_with_ops (RDIV_EXPR, target,
>                                           build_real (type, dconst1),
>                                           result);
> +  gimple_set_location (div_stmt, loc);
>   gsi_insert_before (gsi, div_stmt, GSI_SAME_STMT);
>
>   return target;
> @@ -1024,6 +1026,34 @@ gimple_expand_builtin_powi (gimple_stmt_iterator *
>   return NULL_TREE;
>  }
>
> +/* Build a gimple call statement that calls FN with argument ARG.
> +   Set the lhs of the call statement to a fresh SSA name for
> +   variable VAR.  If VAR is NULL, first allocate it.  Insert the
> +   statement prior to GSI's current position, and return the fresh
> +   SSA name.  */
> +
> +static tree
> +build_and_insert_call (gimple_stmt_iterator *gsi, tree fn, tree arg,
> +                      tree *var, location_t loc)
> +{
> +  gimple call_stmt;
> +  tree ssa_target;
> +
> +  if (!*var)
> +    {
> +      *var = create_tmp_var (TREE_TYPE (arg), "powroot");
> +      add_referenced_var (*var);
> +    }
> +
> +  call_stmt = gimple_build_call (fn, 1, arg);
> +  ssa_target = make_ssa_name (*var, NULL);
> +  gimple_set_lhs (call_stmt, ssa_target);
> +  gimple_set_location (call_stmt, loc);
> +  gsi_insert_before (gsi, call_stmt, GSI_SAME_STMT);
> +
> +  return ssa_target;
> +}
> +
>  /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
>    with location info LOC.  If possible, create an equivalent and
>    less expensive sequence of statements prior to GSI, and return an
> @@ -1035,6 +1065,8 @@ gimple_expand_builtin_pow (gimple_stmt_iterator *g
>  {
>   REAL_VALUE_TYPE c, cint;
>   HOST_WIDE_INT n;
> +  tree type, sqrtfn, target = NULL_TREE;
> +  enum machine_mode mode;
>
>   /* If the exponent isn't a constant, there's nothing of interest
>      to be done.  */
> @@ -1054,6 +1086,108 @@ gimple_expand_builtin_pow (gimple_stmt_iterator *g
>              && powi_cost (n) <= POWI_MAX_MULTS)))
>     return gimple_expand_builtin_powi (gsi, loc, arg0, n);
>
> +  /* Attempt various optimizations using sqrt and cbrt.  */
> +  type = TREE_TYPE (arg0);
> +  mode = TYPE_MODE (type);
> +  sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
> +
> +  if (flag_unsafe_math_optimizations && sqrtfn != NULL_TREE)
> +    {
> +      REAL_VALUE_TYPE dconst1_4, dconst3_4;
> +      tree cbrtfn;
> +      bool hw_sqrt_exists;
> +
> +      /* Optimize pow(x,0.5) = sqrt(x).  */
> +      if (REAL_VALUES_EQUAL (c, dconsthalf))
> +       return build_and_insert_call (gsi, sqrtfn, arg0, &target, loc);

This should be done if !HONOR_SIGNED_ZEROS instead.

> +
> +      /* Optimize pow(x,0.25) = sqrt(sqrt(x)).  */
> +      dconst1_4 = dconst1;
> +      SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2);
> +      hw_sqrt_exists = optab_handler(sqrt_optab, mode) != CODE_FOR_nothing;
> +
> +      if (REAL_VALUES_EQUAL (c, dconst1_4) && hw_sqrt_exists)
> +       {
> +         tree sqrt_arg0;
> +
> +         /* sqrt(x)  */
> +         sqrt_arg0 = build_and_insert_call (gsi, sqrtfn, arg0, &target, loc);
> +
> +         /* sqrt(sqrt(x))  */
> +         return build_and_insert_call (gsi, sqrtfn, sqrt_arg0, &target, loc);
> +       }
> +
> +      /* Optimize pow(x,0.75) = sqrt(x) * sqrt(sqrt(x)).  */
> +      real_from_integer (&dconst3_4, VOIDmode, 3, 0, 0);
> +      SET_REAL_EXP (&dconst3_4, REAL_EXP (&dconst3_4) - 2);
> +
> +      if (optimize_function_for_speed_p (cfun)
> +         && !TREE_SIDE_EFFECTS (arg0) /* TODO: is this needed?  */

Not needed as we are in gimple form.

> +         && REAL_VALUES_EQUAL (c, dconst3_4)
> +         && hw_sqrt_exists)
> +       {
> +         tree sqrt_arg0, sqrt_sqrt, ssa_target;
> +         gimple mult_stmt;
> +
> +         /* sqrt(x)  */
> +         sqrt_arg0 = build_and_insert_call (gsi, sqrtfn, arg0, &target, loc);
> +
> +         /* sqrt(sqrt(x))  */
> +         sqrt_sqrt = build_and_insert_call (gsi, sqrtfn, sqrt_arg0,
> +                                            &target, loc);
> +         /* sqrt(x) * sqrt(sqrt(x))  */
> +         ssa_target = make_ssa_name (target, NULL);
> +         mult_stmt = gimple_build_assign_with_ops (MULT_EXPR, ssa_target,
> +                                                   sqrt_arg0, sqrt_sqrt);
> +         gimple_set_location (mult_stmt, loc);
> +         gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT);
> +
> +         return ssa_target;
> +       }
> +
> +      /* Optimize pow(x,1./3.) = cbrt(x), and pow(x,1./6.) = cbrt(sqrt(x)).  */
> +      cbrtfn = mathfn_built_in (type, BUILT_IN_CBRT);
> +
> +      if (cbrtfn != NULL_TREE)
> +       {
> +         /* pow(x,1./3.) => cbrt(x).  */
> +         const REAL_VALUE_TYPE dconst1_3
> +           = real_value_truncate (mode, dconst_third ());
> +
> +         if (REAL_VALUES_EQUAL (c, dconst1_3))
> +           return build_and_insert_call (gsi, cbrtfn, arg0, &target, loc);
> +
> +         /* pow(x,1./6.) => cbrt(sqrt(x)).  */
> +         if (optimize_function_for_speed_p (cfun) && hw_sqrt_exists)
> +           {
> +             REAL_VALUE_TYPE dconst1_6 = dconst1_3;
> +             SET_REAL_EXP (&dconst1_6, REAL_EXP (&dconst1_6) - 1);
> +
> +             if (REAL_VALUES_EQUAL (c, dconst1_6))
> +               {
> +                 tree sqrt_arg0;
> +
> +                 /* sqrt(x)  */
> +                 sqrt_arg0 = build_and_insert_call (gsi, sqrtfn, arg0,
> +                                                    &target, loc);
> +                 /* cbrt(sqrt(x))  */
> +                 return build_and_insert_call (gsi, cbrtfn, sqrt_arg0,
> +                                               &target, loc);
> +               }
> +           }
> +       }
> +    }

There are several extra pre-conditions in the original code in
builtins.c as well as commens for reasonings (yes, there seem
to be duplicates of several of the transforms there ...).  Please
try to preserve them.  I noticed especially

              /* Optimize pow (x, 0.25) into sqrt (sqrt (x)).  Assume on most
                 machines that a builtin sqrt instruction is smaller than a
                 call to pow with 0.25, so do this optimization even if
                 -Os.  */

and

      /* Check whether we can do cbrt insstead of pow (x, 1./3.) and
         cbrt/sqrts instead of pow (x, 1./6.).  */
      if (cbrtfn && ! op
          && (tree_expr_nonnegative_p (arg0) || !HONOR_NANS (mode)))

where you omit the non-negative check and the check for NANs.
Note that -funsafe-math-optimizations does not mean you can
ignore nans or signed zero issues.  pow(x,1/3) -> cbrt(x) does not
need a -funsafe-math-optimization check either.

It would be nice if you could re-arrange this function so that before
each individual replacement all preconditions are checked (even if
that means duplicating some checks and code) - that way
reviewing and later maintainance should be much easier.

Thanks,
Richard.

> +  /* We allow one transformation when flag_unsafe_math_optimizations is
> +     false.  Replacing pow(x,0.5) with sqrt(x) is safe, provided signed
> +     zeros must not be maintained.  For x = -0, the former produces +0,
> +     and the latter produces -0.  */
> +  else if (sqrtfn != NULL_TREE
> +          && !HONOR_SIGNED_ZEROS (mode)
> +          && REAL_VALUES_EQUAL (c, dconsthalf))
> +
> +    return build_and_insert_call (gsi, sqrtfn, arg0, &target, l

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] More pow(x,c) expansions in cse_sincos pass (PR46728, patch 3)
  2011-05-26 10:33 ` Richard Guenther
@ 2011-05-26 18:40   ` William J. Schmidt
  2011-05-26 18:41     ` Joseph S. Myers
  2011-05-27 13:10     ` Richard Guenther
  0 siblings, 2 replies; 6+ messages in thread
From: William J. Schmidt @ 2011-05-26 18:40 UTC (permalink / raw)
  To: Richard Guenther; +Cc: gcc-patches


On Thu, 2011-05-26 at 11:22 +0200, Richard Guenther wrote:

<snip>


> There are several extra pre-conditions in the original code in
> builtins.c as well as commens for reasonings (yes, there seem
> to be duplicates of several of the transforms there ...).  Please
> try to preserve them.  I noticed especially
> 
>               /* Optimize pow (x, 0.25) into sqrt (sqrt (x)).  Assume on most
>                  machines that a builtin sqrt instruction is smaller than a
>                  call to pow with 0.25, so do this optimization even if
>                  -Os.  */
> 
> and
> 
>       /* Check whether we can do cbrt insstead of pow (x, 1./3.) and
>          cbrt/sqrts instead of pow (x, 1./6.).  */
>       if (cbrtfn && ! op
>           && (tree_expr_nonnegative_p (arg0) || !HONOR_NANS (mode)))
> 
> where you omit the non-negative check and the check for NANs.
> Note that -funsafe-math-optimizations does not mean you can
> ignore nans or signed zero issues.  pow(x,1/3) -> cbrt(x) does not
> need a -funsafe-math-optimization check either.

OK.  Good catch on the non-negative/NaN check -- that just slipped
through the cracks.  I've fixed this and beefed up the comments as you
suggest.

> 
> It would be nice if you could re-arrange this function so that before
> each individual replacement all preconditions are checked (even if
> that means duplicating some checks and code) - that way
> reviewing and later maintainance should be much easier.
> 

Done -- you're right, the result is much cleaner.  Hopefully this
version is easier to follow.

Thanks, 
Bill


2011-05-25  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>

	PR tree-optimization/46728
	* tree-ssa-math-opts.c (powi_as_mults_1): Add gimple_set_location.
	(powi_as_mults): Add gimple_set_location.
	(build_and_insert_call): New.
	(gimple_expand_builtin_pow): Add handling for pow(x,y) when y is
	0.5, 0.25, 0.75, 1./3., or 1./6.
	

Index: gcc/tree-ssa-math-opts.c
===================================================================
--- gcc/tree-ssa-math-opts.c	(revision 174199)
+++ gcc/tree-ssa-math-opts.c	(working copy)
@@ -965,6 +965,7 @@ powi_as_mults_1 (gimple_stmt_iterator *gsi, locati
     }
 
   mult_stmt = gimple_build_assign_with_ops (MULT_EXPR, ssa_target, op0, op1);
+  gimple_set_location (mult_stmt, loc);
   gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT);
 
   return ssa_target;
@@ -999,6 +1000,7 @@ powi_as_mults (gimple_stmt_iterator *gsi, location
   div_stmt = gimple_build_assign_with_ops (RDIV_EXPR, target, 
 					   build_real (type, dconst1),
 					   result);
+  gimple_set_location (div_stmt, loc);
   gsi_insert_before (gsi, div_stmt, GSI_SAME_STMT);
 
   return target;
@@ -1024,6 +1026,34 @@ gimple_expand_builtin_powi (gimple_stmt_iterator *
   return NULL_TREE;
 }
 
+/* Build a gimple call statement that calls FN with argument ARG.
+   Set the lhs of the call statement to a fresh SSA name for
+   variable VAR.  If VAR is NULL, first allocate it.  Insert the
+   statement prior to GSI's current position, and return the fresh
+   SSA name.  */
+
+static tree
+build_and_insert_call (gimple_stmt_iterator *gsi, tree fn, tree arg,
+		       tree *var, location_t loc)
+{
+  gimple call_stmt;
+  tree ssa_target;
+
+  if (!*var)
+    {
+      *var = create_tmp_var (TREE_TYPE (arg), "powroot");
+      add_referenced_var (*var);
+    }
+
+  call_stmt = gimple_build_call (fn, 1, arg);
+  ssa_target = make_ssa_name (*var, NULL);
+  gimple_set_lhs (call_stmt, ssa_target);
+  gimple_set_location (call_stmt, loc);
+  gsi_insert_before (gsi, call_stmt, GSI_SAME_STMT);
+
+  return ssa_target;
+}
+
 /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
    with location info LOC.  If possible, create an equivalent and
    less expensive sequence of statements prior to GSI, and return an
@@ -1033,16 +1063,21 @@ static tree
 gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc, 
 			   tree arg0, tree arg1)
 {
-  REAL_VALUE_TYPE c, cint;
+  REAL_VALUE_TYPE c, cint, dconst1_4, dconst3_4, dconst1_3, dconst1_6;
   HOST_WIDE_INT n;
+  tree type, sqrtfn, cbrtfn, sqrt_arg0, sqrt_sqrt, ssa_target;
+  tree target = NULL_TREE;
+  enum machine_mode mode;
+  bool hw_sqrt_exists;
+  gimple mult_stmt;
 
   /* If the exponent isn't a constant, there's nothing of interest
      to be done.  */
   if (TREE_CODE (arg1) != REAL_CST)
     return NULL_TREE;
 
-  /* If the exponent is equivalent to an integer, expand it into
-     multiplies when profitable.  */
+  /* If the exponent is equivalent to an integer, expand to an optimal
+     multiplication sequence when profitable.  */
   c = TREE_REAL_CST (arg1);
   n = real_to_integer (&c);
   real_from_integer (&cint, VOIDmode, n, n < 0 ? -1 : 0, 0);
@@ -1054,6 +1089,97 @@ gimple_expand_builtin_pow (gimple_stmt_iterator *g
 	      && powi_cost (n) <= POWI_MAX_MULTS)))
     return gimple_expand_builtin_powi (gsi, loc, arg0, n);
 
+  /* Attempt various optimizations using sqrt and cbrt.  */
+  type = TREE_TYPE (arg0);
+  mode = TYPE_MODE (type);
+  sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
+
+  /* Optimize pow(x,0.5) = sqrt(x).  This replacement is always safe
+     unless signed zeros must be maintained.  pow(-0,0.5) = +0, while
+     sqrt(-0) = -0.  */
+  if (sqrtfn
+      && REAL_VALUES_EQUAL (c, dconsthalf)
+      && (flag_unsafe_math_optimizations
+	  || !HONOR_SIGNED_ZEROS (mode)))
+    return build_and_insert_call (gsi, sqrtfn, arg0, &target, loc);
+
+  /* Optimize pow(x,0.25) = sqrt(sqrt(x)).  Assume on most machines that
+     a builtin sqrt instruction is smaller than a call to pow with 0.25,
+     so do this optimization even if -Os.  Don't do this optimization
+     if we don't have a hardware sqrt insn.  */
+  dconst1_4 = dconst1;
+  SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2);
+  hw_sqrt_exists = optab_handler(sqrt_optab, mode) != CODE_FOR_nothing;
+
+  if (flag_unsafe_math_optimizations
+      && sqrtfn
+      && REAL_VALUES_EQUAL (c, dconst1_4)
+      && hw_sqrt_exists)
+    {
+      /* sqrt(x)  */
+      sqrt_arg0 = build_and_insert_call (gsi, sqrtfn, arg0, &target, loc);
+
+      /* sqrt(sqrt(x))  */
+      return build_and_insert_call (gsi, sqrtfn, sqrt_arg0, &target, loc);
+    }
+      
+  /* Optimize pow(x,0.75) = sqrt(x) * sqrt(sqrt(x)) unless we are
+     optimizing for space.  Don't do this optimization if we don't have
+     a hardware sqrt insn.  */
+  real_from_integer (&dconst3_4, VOIDmode, 3, 0, 0);
+  SET_REAL_EXP (&dconst3_4, REAL_EXP (&dconst3_4) - 2);
+
+  if (flag_unsafe_math_optimizations
+      && sqrtfn
+      && optimize_function_for_speed_p (cfun)
+      && REAL_VALUES_EQUAL (c, dconst3_4)
+      && hw_sqrt_exists)
+    {
+      /* sqrt(x)  */
+      sqrt_arg0 = build_and_insert_call (gsi, sqrtfn, arg0, &target, loc);
+
+      /* sqrt(sqrt(x))  */
+      sqrt_sqrt = build_and_insert_call (gsi, sqrtfn, sqrt_arg0, &target, loc);
+
+      /* sqrt(x) * sqrt(sqrt(x))  */
+      ssa_target = make_ssa_name (target, NULL);
+      mult_stmt = gimple_build_assign_with_ops (MULT_EXPR, ssa_target,
+						sqrt_arg0, sqrt_sqrt);
+      gimple_set_location (mult_stmt, loc);
+      gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT);
+
+      return ssa_target;
+    }
+
+  /* Optimize pow(x,1./3.) = cbrt(x).  This is always safe.  */
+  cbrtfn = mathfn_built_in (type, BUILT_IN_CBRT);
+  dconst1_3 = real_value_truncate (mode, dconst_third ());
+
+  if (cbrtfn
+      && (tree_expr_nonnegative_p (arg0) || !HONOR_NANS (mode))
+      && REAL_VALUES_EQUAL (c, dconst1_3))
+    return build_and_insert_call (gsi, cbrtfn, arg0, &target, loc);
+  
+  /* Optimize pow(x,1./6.) = cbrt(sqrt(x)).  Don't do this optimization
+     if we don't have a hardware sqrt insn.  */
+  dconst1_6 = dconst1_3;
+  SET_REAL_EXP (&dconst1_6, REAL_EXP (&dconst1_6) - 1);
+
+  if (flag_unsafe_math_optimizations
+      && sqrtfn
+      && cbrtfn
+      && (tree_expr_nonnegative_p (arg0) || !HONOR_NANS (mode))
+      && optimize_function_for_speed_p (cfun)
+      && hw_sqrt_exists
+      && REAL_VALUES_EQUAL (c, dconst1_6))
+    {
+      /* sqrt(x)  */
+      sqrt_arg0 = build_and_insert_call (gsi, sqrtfn, arg0, &target, loc);
+
+      /* cbrt(sqrt(x))  */
+      return build_and_insert_call (gsi, cbrtfn, sqrt_arg0, &target, loc);
+    }
+
   return NULL_TREE;
 }
 


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] More pow(x,c) expansions in cse_sincos pass (PR46728, patch 3)
  2011-05-26 18:40   ` William J. Schmidt
@ 2011-05-26 18:41     ` Joseph S. Myers
  2011-05-27 13:10     ` Richard Guenther
  1 sibling, 0 replies; 6+ messages in thread
From: Joseph S. Myers @ 2011-05-26 18:41 UTC (permalink / raw)
  To: William J. Schmidt; +Cc: Richard Guenther, gcc-patches

On Thu, 26 May 2011, William J. Schmidt wrote:

> +  /* Optimize pow(x,1./3.) = cbrt(x).  This is always safe.  */

No, it's not always safe.  1./3. isn't exactly representable; if x is 
negative (and finite), the correct value of the LHS is a NaN (with the 
"invalid" exception raised) because the value of 1./3. actually has an 
even denominator, whereas the correct value of the RHS is a negative real 
value.

-- 
Joseph S. Myers
joseph@codesourcery.com

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] More pow(x,c) expansions in cse_sincos pass (PR46728, patch 3)
  2011-05-26 18:40   ` William J. Schmidt
  2011-05-26 18:41     ` Joseph S. Myers
@ 2011-05-27 13:10     ` Richard Guenther
  2011-05-27 14:22       ` William J. Schmidt
  1 sibling, 1 reply; 6+ messages in thread
From: Richard Guenther @ 2011-05-27 13:10 UTC (permalink / raw)
  To: William J. Schmidt; +Cc: gcc-patches

On Thu, May 26, 2011 at 7:50 PM, William J. Schmidt
<wschmidt@linux.vnet.ibm.com> wrote:
>
> On Thu, 2011-05-26 at 11:22 +0200, Richard Guenther wrote:
>
> <snip>
>
>
>> There are several extra pre-conditions in the original code in
>> builtins.c as well as commens for reasonings (yes, there seem
>> to be duplicates of several of the transforms there ...).  Please
>> try to preserve them.  I noticed especially
>>
>>               /* Optimize pow (x, 0.25) into sqrt (sqrt (x)).  Assume on most
>>                  machines that a builtin sqrt instruction is smaller than a
>>                  call to pow with 0.25, so do this optimization even if
>>                  -Os.  */
>>
>> and
>>
>>       /* Check whether we can do cbrt insstead of pow (x, 1./3.) and
>>          cbrt/sqrts instead of pow (x, 1./6.).  */
>>       if (cbrtfn && ! op
>>           && (tree_expr_nonnegative_p (arg0) || !HONOR_NANS (mode)))
>>
>> where you omit the non-negative check and the check for NANs.
>> Note that -funsafe-math-optimizations does not mean you can
>> ignore nans or signed zero issues.  pow(x,1/3) -> cbrt(x) does not
>> need a -funsafe-math-optimization check either.
>
> OK.  Good catch on the non-negative/NaN check -- that just slipped
> through the cracks.  I've fixed this and beefed up the comments as you
> suggest.
>
>>
>> It would be nice if you could re-arrange this function so that before
>> each individual replacement all preconditions are checked (even if
>> that means duplicating some checks and code) - that way
>> reviewing and later maintainance should be much easier.
>>
>
> Done -- you're right, the result is much cleaner.  Hopefully this
> version is easier to follow.
>
> Thanks,
> Bill
>
>
> 2011-05-25  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
>
>        PR tree-optimization/46728
>        * tree-ssa-math-opts.c (powi_as_mults_1): Add gimple_set_location.
>        (powi_as_mults): Add gimple_set_location.
>        (build_and_insert_call): New.
>        (gimple_expand_builtin_pow): Add handling for pow(x,y) when y is
>        0.5, 0.25, 0.75, 1./3., or 1./6.
>
>
> Index: gcc/tree-ssa-math-opts.c
> ===================================================================
> --- gcc/tree-ssa-math-opts.c    (revision 174199)
> +++ gcc/tree-ssa-math-opts.c    (working copy)
> @@ -965,6 +965,7 @@ powi_as_mults_1 (gimple_stmt_iterator *gsi, locati
>     }
>
>   mult_stmt = gimple_build_assign_with_ops (MULT_EXPR, ssa_target, op0, op1);
> +  gimple_set_location (mult_stmt, loc);
>   gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT);
>
>   return ssa_target;
> @@ -999,6 +1000,7 @@ powi_as_mults (gimple_stmt_iterator *gsi, location
>   div_stmt = gimple_build_assign_with_ops (RDIV_EXPR, target,
>                                           build_real (type, dconst1),
>                                           result);
> +  gimple_set_location (div_stmt, loc);
>   gsi_insert_before (gsi, div_stmt, GSI_SAME_STMT);
>
>   return target;
> @@ -1024,6 +1026,34 @@ gimple_expand_builtin_powi (gimple_stmt_iterator *
>   return NULL_TREE;
>  }
>
> +/* Build a gimple call statement that calls FN with argument ARG.
> +   Set the lhs of the call statement to a fresh SSA name for
> +   variable VAR.  If VAR is NULL, first allocate it.  Insert the
> +   statement prior to GSI's current position, and return the fresh
> +   SSA name.  */
> +
> +static tree
> +build_and_insert_call (gimple_stmt_iterator *gsi, tree fn, tree arg,
> +                      tree *var, location_t loc)
> +{
> +  gimple call_stmt;
> +  tree ssa_target;
> +
> +  if (!*var)
> +    {
> +      *var = create_tmp_var (TREE_TYPE (arg), "powroot");
> +      add_referenced_var (*var);
> +    }
> +
> +  call_stmt = gimple_build_call (fn, 1, arg);
> +  ssa_target = make_ssa_name (*var, NULL);
> +  gimple_set_lhs (call_stmt, ssa_target);
> +  gimple_set_location (call_stmt, loc);
> +  gsi_insert_before (gsi, call_stmt, GSI_SAME_STMT);
> +
> +  return ssa_target;
> +}
> +
>  /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
>    with location info LOC.  If possible, create an equivalent and
>    less expensive sequence of statements prior to GSI, and return an
> @@ -1033,16 +1063,21 @@ static tree
>  gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc,
>                           tree arg0, tree arg1)
>  {
> -  REAL_VALUE_TYPE c, cint;
> +  REAL_VALUE_TYPE c, cint, dconst1_4, dconst3_4, dconst1_3, dconst1_6;
>   HOST_WIDE_INT n;
> +  tree type, sqrtfn, cbrtfn, sqrt_arg0, sqrt_sqrt, ssa_target;
> +  tree target = NULL_TREE;
> +  enum machine_mode mode;
> +  bool hw_sqrt_exists;
> +  gimple mult_stmt;
>
>   /* If the exponent isn't a constant, there's nothing of interest
>      to be done.  */
>   if (TREE_CODE (arg1) != REAL_CST)
>     return NULL_TREE;
>
> -  /* If the exponent is equivalent to an integer, expand it into
> -     multiplies when profitable.  */
> +  /* If the exponent is equivalent to an integer, expand to an optimal
> +     multiplication sequence when profitable.  */
>   c = TREE_REAL_CST (arg1);
>   n = real_to_integer (&c);
>   real_from_integer (&cint, VOIDmode, n, n < 0 ? -1 : 0, 0);
> @@ -1054,6 +1089,97 @@ gimple_expand_builtin_pow (gimple_stmt_iterator *g
>              && powi_cost (n) <= POWI_MAX_MULTS)))
>     return gimple_expand_builtin_powi (gsi, loc, arg0, n);
>
> +  /* Attempt various optimizations using sqrt and cbrt.  */
> +  type = TREE_TYPE (arg0);
> +  mode = TYPE_MODE (type);
> +  sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
> +
> +  /* Optimize pow(x,0.5) = sqrt(x).  This replacement is always safe
> +     unless signed zeros must be maintained.  pow(-0,0.5) = +0, while
> +     sqrt(-0) = -0.  */
> +  if (sqrtfn
> +      && REAL_VALUES_EQUAL (c, dconsthalf)
> +      && (flag_unsafe_math_optimizations
> +         || !HONOR_SIGNED_ZEROS (mode)))

Drop flag_unsafe_math_optimizations - the replacement isn't safe
for -funsafe-math-optimizations -fsigned-zeros.

> +    return build_and_insert_call (gsi, sqrtfn, arg0, &target, loc);
> +
> +  /* Optimize pow(x,0.25) = sqrt(sqrt(x)).  Assume on most machines that
> +     a builtin sqrt instruction is smaller than a call to pow with 0.25,
> +     so do this optimization even if -Os.  Don't do this optimization
> +     if we don't have a hardware sqrt insn.  */
> +  dconst1_4 = dconst1;
> +  SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2);
> +  hw_sqrt_exists = optab_handler(sqrt_optab, mode) != CODE_FOR_nothing;
> +
> +  if (flag_unsafe_math_optimizations
> +      && sqrtfn
> +      && REAL_VALUES_EQUAL (c, dconst1_4)
> +      && hw_sqrt_exists)
> +    {
> +      /* sqrt(x)  */
> +      sqrt_arg0 = build_and_insert_call (gsi, sqrtfn, arg0, &target, loc);
> +
> +      /* sqrt(sqrt(x))  */
> +      return build_and_insert_call (gsi, sqrtfn, sqrt_arg0, &target, loc);
> +    }
> +
> +  /* Optimize pow(x,0.75) = sqrt(x) * sqrt(sqrt(x)) unless we are
> +     optimizing for space.  Don't do this optimization if we don't have
> +     a hardware sqrt insn.  */
> +  real_from_integer (&dconst3_4, VOIDmode, 3, 0, 0);
> +  SET_REAL_EXP (&dconst3_4, REAL_EXP (&dconst3_4) - 2);
> +
> +  if (flag_unsafe_math_optimizations
> +      && sqrtfn
> +      && optimize_function_for_speed_p (cfun)
> +      && REAL_VALUES_EQUAL (c, dconst3_4)
> +      && hw_sqrt_exists)
> +    {
> +      /* sqrt(x)  */
> +      sqrt_arg0 = build_and_insert_call (gsi, sqrtfn, arg0, &target, loc);
> +
> +      /* sqrt(sqrt(x))  */
> +      sqrt_sqrt = build_and_insert_call (gsi, sqrtfn, sqrt_arg0, &target, loc);
> +
> +      /* sqrt(x) * sqrt(sqrt(x))  */
> +      ssa_target = make_ssa_name (target, NULL);
> +      mult_stmt = gimple_build_assign_with_ops (MULT_EXPR, ssa_target,
> +                                               sqrt_arg0, sqrt_sqrt);
> +      gimple_set_location (mult_stmt, loc);
> +      gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT);
> +
> +      return ssa_target;
> +    }
> +
> +  /* Optimize pow(x,1./3.) = cbrt(x).  This is always safe.  */
> +  cbrtfn = mathfn_built_in (type, BUILT_IN_CBRT);
> +  dconst1_3 = real_value_truncate (mode, dconst_third ());
> +
> +  if (cbrtfn
> +      && (tree_expr_nonnegative_p (arg0) || !HONOR_NANS (mode))
> +      && REAL_VALUES_EQUAL (c, dconst1_3))
> +    return build_and_insert_call (gsi, cbrtfn, arg0, &target, loc);

As Joseph said this needs to be conditional on flag_unsafe_math_optimization
because of the inexactness of 1./3..
Please add rationale as explained by Joseph for the HONOR_NANS
check.

Now, the tree_expr_nonnegative_p check will always return false
because arg0 is a gimple value.  We would need to implement
either some propagation engine to derive properties for floating
point entities or mimic what tree_expr_nonnegative_p does by
looking at defining statements.  A very simple variant could be
like

bool
gimple_val_nonnegative_p (tree val)
{
  if (tree_expr_nonnegative_p (val))
    return true;

  if (TREE_CODE (val) == SSA_NAME)
   {
     gimple def_stmt = SSA_NAME_DEF_STMT (val);
     if (is_gimple_assign (def_stmt))
       return gimple_assign_rhs_code (def_stmt) == ABS_EXPR;
     else if (is_gimple_call (def_stmt))
       {
          tree fndecl = gimple_call_fndecl (def_stmt);
          if (fndecl
              && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
            {
               switch (DECL_FUNCTION_CODE (fndecl))
                 {
                 CASE_FLT_FN (BUILT_IN_FABS):
                     return true;
                 default:;
                 }
            }
       }
   }

  return false;
}

suitable for gimple-fold.c.

Alternatively you can drop the call and add a FIXME comment.

> +
> +  /* Optimize pow(x,1./6.) = cbrt(sqrt(x)).  Don't do this optimization
> +     if we don't have a hardware sqrt insn.  */
> +  dconst1_6 = dconst1_3;
> +  SET_REAL_EXP (&dconst1_6, REAL_EXP (&dconst1_6) - 1);
> +
> +  if (flag_unsafe_math_optimizations
> +      && sqrtfn
> +      && cbrtfn
> +      && (tree_expr_nonnegative_p (arg0) || !HONOR_NANS (mode))

Likewise.

Ok with these changes.

Thanks,
Richard.

> +      && optimize_function_for_speed_p (cfun)
> +      && hw_sqrt_exists
> +      && REAL_VALUES_EQUAL (c, dconst1_6))
> +    {
> +      /* sqrt(x)  */
> +      sqrt_arg0 = build_and_insert_call (gsi, sqrtfn, arg0, &target, loc);
> +
> +      /* cbrt(sqrt(x))  */
> +      return build_and_insert_call (gsi, cbrtfn, sqrt_arg0, &target, loc);
> +    }
> +
>   return NULL_TREE;
>  }

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] More pow(x,c) expansions in cse_sincos pass (PR46728, patch 3)
  2011-05-27 13:10     ` Richard Guenther
@ 2011-05-27 14:22       ` William J. Schmidt
  0 siblings, 0 replies; 6+ messages in thread
From: William J. Schmidt @ 2011-05-27 14:22 UTC (permalink / raw)
  To: Richard Guenther; +Cc: gcc-patches


On Fri, 2011-05-27 at 12:24 +0200, Richard Guenther wrote:

<snip>

> Now, the tree_expr_nonnegative_p check will always return false
> because arg0 is a gimple value.  We would need to implement
> either some propagation engine to derive properties for floating
> point entities or mimic what tree_expr_nonnegative_p does by
> looking at defining statements.  A very simple variant could be
> like
> 
> bool
> gimple_val_nonnegative_p (tree val)
> {
>   if (tree_expr_nonnegative_p (val))
>     return true;
> 
>   if (TREE_CODE (val) == SSA_NAME)
>    {
>      gimple def_stmt = SSA_NAME_DEF_STMT (val);
>      if (is_gimple_assign (def_stmt))
>        return gimple_assign_rhs_code (def_stmt) == ABS_EXPR;
>      else if (is_gimple_call (def_stmt))
>        {
>           tree fndecl = gimple_call_fndecl (def_stmt);
>           if (fndecl
>               && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
>             {
>                switch (DECL_FUNCTION_CODE (fndecl))
>                  {
>                  CASE_FLT_FN (BUILT_IN_FABS):
>                      return true;
>                  default:;
>                  }
>             }
>        }
>    }
> 
>   return false;
> }
> 
> suitable for gimple-fold.c.
> 
> Alternatively you can drop the call and add a FIXME comment.
> 
> > +
> > +  /* Optimize pow(x,1./6.) = cbrt(sqrt(x)).  Don't do this optimization
> > +     if we don't have a hardware sqrt insn.  */
> > +  dconst1_6 = dconst1_3;
> > +  SET_REAL_EXP (&dconst1_6, REAL_EXP (&dconst1_6) - 1);
> > +
> > +  if (flag_unsafe_math_optimizations
> > +      && sqrtfn
> > +      && cbrtfn
> > +      && (tree_expr_nonnegative_p (arg0) || !HONOR_NANS (mode))
> 
> Likewise.

OK, sounds good.  I will commit this patch with the FIXME comments, and
test the gimple_val_nonnegative_p changes next week after the holiday.

Thanks,
Bill

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2011-05-27 13:45 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-05-25 19:18 [PATCH] More pow(x,c) expansions in cse_sincos pass (PR46728, patch 3) William J. Schmidt
2011-05-26 10:33 ` Richard Guenther
2011-05-26 18:40   ` William J. Schmidt
2011-05-26 18:41     ` Joseph S. Myers
2011-05-27 13:10     ` Richard Guenther
2011-05-27 14:22       ` William J. Schmidt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).