public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH][GCC] Simplify to single precision where possible for binary/builtin maths operations.
@ 2019-09-02 17:29 Barnaby Wilks
  2019-09-03  8:23 ` Richard Biener
  0 siblings, 1 reply; 5+ messages in thread
From: Barnaby Wilks @ 2019-09-02 17:29 UTC (permalink / raw)
  To: gcc-patches; +Cc: nd, law, ian, rguenther, Tamar Christina, Wilco Dijkstra

[-- Attachment #1: Type: text/plain, Size: 1702 bytes --]

Hello,

This patch introduces an optimization for narrowing binary and builtin
math operations to the smallest type when unsafe math optimizations are
enabled (typically -Ofast or -ffast-math).

Consider the example:

   float f (float x) {
     return 1.0 / sqrt (x);
   }

   f:
     fcvt	d0, s0
     fmov	d1, 1.0e+0
     fsqrt	d0, d0
     fdiv	d0, d1, d0
     fcvt	s0, d0
     ret

Given that all outputs are of float type, we can do the whole 
calculation in single precision and avoid any potentially expensive 
conversions between single and double precision.

Aka the expression would end up looking more like

   float f (float x) {
     return 1.0f / sqrtf (x);
   }

   f:
     fsqrt	s0, s0
     fmov	s1, 1.0e+0
     fdiv	s0, s1, s0
     ret

This optimization will narrow casts around math builtins, and also
not try to find the widest type for calculations when processing binary
math operations (if unsafe math optimizations are enable).

Added tests to verify that narrower math builtins are chosen and
no unnecessary casts are introduced when appropriate.

Bootstrapped and regtested on aarch64 and x86_64 with no regressions.

I don't have write access, so if OK for trunk then can someone commit on 
my behalf?

Regards,
Barney

gcc/ChangeLog:

2019-09-02  Barnaby Wilks  <barnaby.wilks@arm.com>

	* builtins.c (mathfn_built_in): Expose find implicit builtin parameter.
	* builtins.h (mathfn_built_in): Likewise.
	* match.pd: Add expressions for simplifying builtin and binary
	math expressions.

gcc/testsuite/ChangeLog:

2019-09-02  Barnaby Wilks  <barnaby.wilks@arm.com>

	* gcc.dg/fold-single-precision.c: New test.

[-- Attachment #2: gcc-float-narrow-opt.txt --]
[-- Type: text/plain, Size: 4701 bytes --]

diff --git a/gcc/builtins.h b/gcc/builtins.h
index 1ffb491d7850366c74bd694bf9e1c277bcde1da9..5cd02af3be55b041918ad6f1a44d5520f5689fee 100644
--- a/gcc/builtins.h
+++ b/gcc/builtins.h
@@ -108,6 +108,7 @@ extern void expand_builtin_setjmp_setup (rtx, rtx);
 extern void expand_builtin_setjmp_receiver (rtx);
 extern void expand_builtin_update_setjmp_buf (rtx);
 extern tree mathfn_built_in (tree, enum built_in_function fn);
+extern tree mathfn_built_in (tree, enum built_in_function fn, bool implicit);
 extern tree mathfn_built_in (tree, combined_fn);
 extern rtx builtin_strncpy_read_str (void *, HOST_WIDE_INT, scalar_int_mode);
 extern rtx builtin_memset_read_str (void *, HOST_WIDE_INT, scalar_int_mode);
diff --git a/gcc/builtins.c b/gcc/builtins.c
index 695a9d191af4c4922351e3e59601a87b3fedda5c..6cfd7f4af54110fec9f53ddaf71408e7efc329da 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -2137,6 +2137,12 @@ mathfn_built_in (tree type, enum built_in_function fn)
   return mathfn_built_in_1 (type, as_combined_fn (fn), /*implicit=*/ 1);
 }
 
+tree
+mathfn_built_in (tree type, enum built_in_function fn, bool implicit)
+{
+  return mathfn_built_in_1 (type, as_combined_fn (fn), implicit);
+}
+
 /* If BUILT_IN_NORMAL function FNDECL has an associated internal function,
    return its code, otherwise return IFN_LAST.  Note that this function
    only tests whether the function is defined in internals.def, not whether
diff --git a/gcc/match.pd b/gcc/match.pd
index 0317bc704f771f626ab72189b3a54de00087ad5a..3562548de3ebcb986da20986b868d9a3d318c4ee 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -5004,10 +5004,18 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 	      && newtype == type
 	      && types_match (newtype, type))
 	    (op (convert:newtype @1) (convert:newtype @2))
-	    (with { if (TYPE_PRECISION (ty1) > TYPE_PRECISION (newtype))
+	    (with
+	      {
+		if (!flag_unsafe_math_optimizations)
+		  {
+		    if (TYPE_PRECISION (ty1) > TYPE_PRECISION (newtype))
 		      newtype = ty1;
+
 		    if (TYPE_PRECISION (ty2) > TYPE_PRECISION (newtype))
-		      newtype = ty2; }
+		      newtype = ty2;
+		  }
+	      }
+
 	       /* Sometimes this transformation is safe (cannot
 		  change results through affecting double rounding
 		  cases) and sometimes it is not.  If NEWTYPE is
@@ -5654,3 +5662,24 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 (simplify
  (vec_perm vec_same_elem_p@0 @0 @1)
  @0)
+
+/* Convert expressions of the form
+   (x) math_call1 ((y) z) where (x) and z are the same type, into
+   math_call2 (z), where math_call2 is the math builtin for
+   type x.  Type x (and therefore type of z) must be a lower precision
+   than y/math_call1.  */
+(if (flag_unsafe_math_optimizations && !flag_errno_math)
+  (for op (COSH EXP EXP10 EXP2 EXPM1 GAMMA J0 J1 LGAMMA
+	   POW10 SINH TGAMMA Y0 Y1 ACOS ACOSH ASIN ASINH
+	   ATAN ATANH CBRT COS ERF ERFC LOG LOG10 LOG2
+	   LOG1P SIN TAN TANH SQRT FABS LOGB)
+    (simplify
+      (convert (op@0 (convert@1 @2)))
+	(if (SCALAR_FLOAT_TYPE_P (type) && SCALAR_FLOAT_TYPE_P (TREE_TYPE (@1))
+	      && SCALAR_FLOAT_TYPE_P (TREE_TYPE (@2))
+	      && types_match (type, TREE_TYPE (@2))
+	      && TYPE_PRECISION (type) < TYPE_PRECISION (TREE_TYPE (@1)))
+	  (with { enum built_in_function fcode = builtin_mathfn_code (@0);
+		  tree fn = mathfn_built_in (type, fcode, false); }
+	    (if (fn)
+	      (convert { build_call_expr (fn, 1, @2); })))))))
diff --git a/gcc/testsuite/gcc.dg/fold-single-precision.c b/gcc/testsuite/gcc.dg/fold-single-precision.c
new file mode 100644
index 0000000000000000000000000000000000000000..9209b5ce42d87cda69e84b048f0f0e3eaf0dd973
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/fold-single-precision.c
@@ -0,0 +1,57 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast -fdump-tree-optimized" } */
+
+#include <math.h>
+
+float f (float x)
+{
+  x = 1.0 / sqrt (x);
+  return x;
+}
+
+float g (float x, float y)
+{
+  double t = 1.0 / x;
+  return t * y;
+}
+
+float h (float x, float y)
+{
+  float z = pow (y, 2.0);
+  return sqrt ((x * x) + z);
+}
+
+float i (float x)
+{
+  return x * (double) sqrtf (x);
+}
+
+void j (float* x, float* y)
+{
+  double len = h (*x, *y);
+  *x = *x / len;
+  *y = *y / len;
+}
+
+float k (float x, float y)
+{
+  double t = 4.0 * x;
+  double z = t + y;
+  return z;
+}
+
+float l (float n)
+{
+  return cbrt (n);
+}
+
+float m (float n)
+{
+  float x = n * n;
+  return sqrt (x) - 1.0f;
+}
+
+/* { dg-final { scan-tree-dump "__builtin_sqrtf" "optimized" } } */
+/* { dg-final { scan-tree-dump "__builtin_cbrtf" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "\\(double\\)" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "\\(float\\)" "optimized" } } */

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2019-09-05  9:50 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-09-02 17:29 [PATCH][GCC] Simplify to single precision where possible for binary/builtin maths operations Barnaby Wilks
2019-09-03  8:23 ` Richard Biener
2019-09-03 14:19   ` Richard Sandiford
2019-09-03 15:23   ` Barnaby Wilks
2019-09-05  9:50     ` Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).