diff --git a/gcc/builtins.h b/gcc/builtins.h
index 1ffb491d7850366c74bd694bf9e1c277bcde1da9..5cd02af3be55b041918ad6f1a44d5520f5689fee 100644
--- a/gcc/builtins.h
+++ b/gcc/builtins.h
@@ -108,6 +108,7 @@ extern void expand_builtin_setjmp_setup (rtx, rtx);
 extern void expand_builtin_setjmp_receiver (rtx);
 extern void expand_builtin_update_setjmp_buf (rtx);
 extern tree mathfn_built_in (tree, enum built_in_function fn);
+extern tree mathfn_built_in (tree, enum built_in_function fn, bool implicit);
 extern tree mathfn_built_in (tree, combined_fn);
 extern rtx builtin_strncpy_read_str (void *, HOST_WIDE_INT, scalar_int_mode);
 extern rtx builtin_memset_read_str (void *, HOST_WIDE_INT, scalar_int_mode);
diff --git a/gcc/builtins.c b/gcc/builtins.c
index 695a9d191af4c4922351e3e59601a87b3fedda5c..6cfd7f4af54110fec9f53ddaf71408e7efc329da 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -2137,6 +2137,12 @@ mathfn_built_in (tree type, enum built_in_function fn)
   return mathfn_built_in_1 (type, as_combined_fn (fn), /*implicit=*/ 1);
 }
 
+tree
+mathfn_built_in (tree type, enum built_in_function fn, bool implicit)
+{
+  return mathfn_built_in_1 (type, as_combined_fn (fn), implicit);
+}
+
 /* If BUILT_IN_NORMAL function FNDECL has an associated internal function,
    return its code, otherwise return IFN_LAST.  Note that this function
    only tests whether the function is defined in internals.def, not whether
diff --git a/gcc/match.pd b/gcc/match.pd
index 0317bc704f771f626ab72189b3a54de00087ad5a..3562548de3ebcb986da20986b868d9a3d318c4ee 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -5004,10 +5004,18 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 	      && newtype == type
 	      && types_match (newtype, type))
 	    (op (convert:newtype @1) (convert:newtype @2))
-	    (with { if (TYPE_PRECISION (ty1) > TYPE_PRECISION (newtype))
+	    (with
+	      {
+		if (!flag_unsafe_math_optimizations)
+		  {
+		    if (TYPE_PRECISION (ty1) > TYPE_PRECISION (newtype))
 		      newtype = ty1;
+
 		    if (TYPE_PRECISION (ty2) > TYPE_PRECISION (newtype))
-		      newtype = ty2; }
+		      newtype = ty2;
+		  }
+	      }
+
 	       /* Sometimes this transformation is safe (cannot
 		  change results through affecting double rounding
 		  cases) and sometimes it is not.  If NEWTYPE is
@@ -5654,3 +5662,24 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 (simplify
  (vec_perm vec_same_elem_p@0 @0 @1)
  @0)
+
+/* Convert expressions of the form
+   (x) math_call1 ((y) z) where (x) and z are the same type, into
+   math_call2 (z), where math_call2 is the math builtin for
+   type x.  Type x (and therefore type of z) must be a lower precision
+   than y/math_call1.  */
+(if (flag_unsafe_math_optimizations && !flag_errno_math)
+  (for op (COSH EXP EXP10 EXP2 EXPM1 GAMMA J0 J1 LGAMMA
+	   POW10 SINH TGAMMA Y0 Y1 ACOS ACOSH ASIN ASINH
+	   ATAN ATANH CBRT COS ERF ERFC LOG LOG10 LOG2
+	   LOG1P SIN TAN TANH SQRT FABS LOGB)
+    (simplify
+      (convert (op@0 (convert@1 @2)))
+	(if (SCALAR_FLOAT_TYPE_P (type) && SCALAR_FLOAT_TYPE_P (TREE_TYPE (@1))
+	      && SCALAR_FLOAT_TYPE_P (TREE_TYPE (@2))
+	      && types_match (type, TREE_TYPE (@2))
+	      && TYPE_PRECISION (type) < TYPE_PRECISION (TREE_TYPE (@1)))
+	  (with { enum built_in_function fcode = builtin_mathfn_code (@0);
+		  tree fn = mathfn_built_in (type, fcode, false); }
+	    (if (fn)
+	      (convert { build_call_expr (fn, 1, @2); })))))))
diff --git a/gcc/testsuite/gcc.dg/fold-single-precision.c b/gcc/testsuite/gcc.dg/fold-single-precision.c
new file mode 100644
index 0000000000000000000000000000000000000000..9209b5ce42d87cda69e84b048f0f0e3eaf0dd973
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/fold-single-precision.c
@@ -0,0 +1,57 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast -fdump-tree-optimized" } */
+
+#include <math.h>
+
+float f (float x)
+{
+  x = 1.0 / sqrt (x);
+  return x;
+}
+
+float g (float x, float y)
+{
+  double t = 1.0 / x;
+  return t * y;
+}
+
+float h (float x, float y)
+{
+  float z = pow (y, 2.0);
+  return sqrt ((x * x) + z);
+}
+
+float i (float x)
+{
+  return x * (double) sqrtf (x);
+}
+
+void j (float* x, float* y)
+{
+  double len = h (*x, *y);
+  *x = *x / len;
+  *y = *y / len;
+}
+
+float k (float x, float y)
+{
+  double t = 4.0 * x;
+  double z = t + y;
+  return z;
+}
+
+float l (float n)
+{
+  return cbrt (n);
+}
+
+float m (float n)
+{
+  float x = n * n;
+  return sqrt (x) - 1.0f;
+}
+
+/* { dg-final { scan-tree-dump "__builtin_sqrtf" "optimized" } } */
+/* { dg-final { scan-tree-dump "__builtin_cbrtf" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "\\(double\\)" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "\\(float\\)" "optimized" } } */