public inbox for newlib@sourceware.org
 help / color / mirror / Atom feed
From: Keith Packard <keithp@keithp.com>
To: newlib@sourceware.org
Subject: [PATCH 3/3] libm/machine/arm: Add optimized fmaf and fma when available
Date: Sat,  8 Aug 2020 15:34:13 -0700	[thread overview]
Message-ID: <20200808223413.4015633-4-keithp@keithp.com> (raw)
In-Reply-To: <20200808223413.4015633-1-keithp@keithp.com>

When HAVE_FAST_FMAF is set, use the vfma.f32 instruction, when
HAVE_FAST_FMA is set, use the vfma.f64 instruction.

Usually the compiler built-ins will already have inlined these
instructions, but provide these symbols for cases where that doesn't
work instead of falling back to the (inaccurate) common code versions.

Signed-off-by: Keith Packard <keithp@keithp.com>
---
 newlib/libm/common/s_fma.c          |  4 +++
 newlib/libm/common/sf_fma.c         |  4 +++
 newlib/libm/machine/arm/Makefile.am |  2 ++
 newlib/libm/machine/arm/Makefile.in | 16 ++++++++++
 newlib/libm/machine/arm/s_fma.c     | 48 +++++++++++++++++++++++++++++
 newlib/libm/machine/arm/sf_fma.c    | 48 +++++++++++++++++++++++++++++
 6 files changed, 122 insertions(+)
 create mode 100644 newlib/libm/machine/arm/s_fma.c
 create mode 100644 newlib/libm/machine/arm/sf_fma.c

diff --git a/newlib/libm/common/s_fma.c b/newlib/libm/common/s_fma.c
index ab9e525b0..15c7d23f5 100644
--- a/newlib/libm/common/s_fma.c
+++ b/newlib/libm/common/s_fma.c
@@ -38,6 +38,8 @@ ANSI C, POSIX.
 
 #include "fdlibm.h"
 
+#if !HAVE_FAST_FMA
+
 #ifndef _DOUBLE_IS_32BITS
 
 #ifdef __STDC__
@@ -54,3 +56,5 @@ ANSI C, POSIX.
 }
 
 #endif /* _DOUBLE_IS_32BITS */
+
+#endif /* !HAVE_FAST_FMA */
diff --git a/newlib/libm/common/sf_fma.c b/newlib/libm/common/sf_fma.c
index 4360f400b..ce7f13bb2 100644
--- a/newlib/libm/common/sf_fma.c
+++ b/newlib/libm/common/sf_fma.c
@@ -6,6 +6,8 @@
 
 #include "fdlibm.h"
 
+#if !HAVE_FAST_FMAF
+
 #ifdef __STDC__
 	float fmaf(float x, float y, float z)
 #else
@@ -25,6 +27,8 @@
   return (float) (((double) x * (double) y) + (double) z);
 }
 
+#endif
+
 #ifdef _DOUBLE_IS_32BITS
 
 #ifdef __STDC__
diff --git a/newlib/libm/machine/arm/Makefile.am b/newlib/libm/machine/arm/Makefile.am
index 6574c56c9..09a266c43 100644
--- a/newlib/libm/machine/arm/Makefile.am
+++ b/newlib/libm/machine/arm/Makefile.am
@@ -10,12 +10,14 @@ LIB_SOURCES = \
 	ef_sqrt.c \
 	s_ceil.c \
 	s_floor.c \
+	s_fma.c \
 	s_nearbyint.c \
 	s_rint.c \
 	s_round.c \
 	s_trunc.c \
 	sf_ceil.c \
 	sf_floor.c \
+	sf_fma.c \
 	sf_nearbyint.c \
 	sf_rint.c \
 	sf_round.c \
diff --git a/newlib/libm/machine/arm/Makefile.in b/newlib/libm/machine/arm/Makefile.in
index 63de93443..16f5773d1 100644
--- a/newlib/libm/machine/arm/Makefile.in
+++ b/newlib/libm/machine/arm/Makefile.in
@@ -73,9 +73,11 @@ lib_a_AR = $(AR) $(ARFLAGS)
 lib_a_LIBADD =
 am__objects_1 = lib_a-e_sqrt.$(OBJEXT) lib_a-ef_sqrt.$(OBJEXT) \
 	lib_a-s_ceil.$(OBJEXT) lib_a-s_floor.$(OBJEXT) \
+	lib_a-s_fma.$(OBJEXT) \
 	lib_a-s_nearbyint.$(OBJEXT) lib_a-s_rint.$(OBJEXT) \
 	lib_a-s_round.$(OBJEXT) lib_a-s_trunc.$(OBJEXT) \
 	lib_a-sf_ceil.$(OBJEXT) lib_a-sf_floor.$(OBJEXT) \
+	lib_a-sf_fma.$(OBJEXT) \
 	lib_a-sf_nearbyint.$(OBJEXT) lib_a-sf_rint.$(OBJEXT) \
 	lib_a-sf_round.$(OBJEXT) lib_a-sf_trunc.$(OBJEXT) \
 	lib_a-feclearexcept.$(OBJEXT) lib_a-fe_dfl_env.$(OBJEXT) \
@@ -216,12 +218,14 @@ LIB_SOURCES = \
 	ef_sqrt.c \
 	s_ceil.c \
 	s_floor.c \
+	s_fma.c \
 	s_nearbyint.c \
 	s_rint.c \
 	s_round.c \
 	s_trunc.c \
 	sf_ceil.c \
 	sf_floor.c \
+	sf_fma.c \
 	sf_nearbyint.c \
 	sf_rint.c \
 	sf_round.c \
@@ -342,6 +346,12 @@ lib_a-s_floor.o: s_floor.c
 lib_a-s_floor.obj: s_floor.c
 	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-s_floor.obj `if test -f 's_floor.c'; then $(CYGPATH_W) 's_floor.c'; else $(CYGPATH_W) '$(srcdir)/s_floor.c'; fi`
 
+lib_a-s_fma.o: s_fma.c
+	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-s_fma.o `test -f 's_fma.c' || echo '$(srcdir)/'`s_fma.c
+
+lib_a-s_fma.obj: s_fma.c
+	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-s_fma.obj `if test -f 's_fma.c'; then $(CYGPATH_W) 's_fma.c'; else $(CYGPATH_W) '$(srcdir)/s_fma.c'; fi`
+
 lib_a-s_nearbyint.o: s_nearbyint.c
 	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-s_nearbyint.o `test -f 's_nearbyint.c' || echo '$(srcdir)/'`s_nearbyint.c
 
@@ -378,6 +388,12 @@ lib_a-sf_floor.o: sf_floor.c
 lib_a-sf_floor.obj: sf_floor.c
 	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-sf_floor.obj `if test -f 'sf_floor.c'; then $(CYGPATH_W) 'sf_floor.c'; else $(CYGPATH_W) '$(srcdir)/sf_floor.c'; fi`
 
+lib_a-sf_fma.o: sf_fma.c
+	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-sf_fma.o `test -f 'sf_fma.c' || echo '$(srcdir)/'`sf_fma.c
+
+lib_a-sf_fma.obj: sf_fma.c
+	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-sf_fma.obj `if test -f 'sf_fma.c'; then $(CYGPATH_W) 'sf_fma.c'; else $(CYGPATH_W) '$(srcdir)/sf_fma.c'; fi`
+
 lib_a-sf_nearbyint.o: sf_nearbyint.c
 	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-sf_nearbyint.o `test -f 'sf_nearbyint.c' || echo '$(srcdir)/'`sf_nearbyint.c
 
diff --git a/newlib/libm/machine/arm/s_fma.c b/newlib/libm/machine/arm/s_fma.c
new file mode 100644
index 000000000..f945419b5
--- /dev/null
+++ b/newlib/libm/machine/arm/s_fma.c
@@ -0,0 +1,48 @@
+/*
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright © 2020 Keith Packard
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ *    copyright notice, this list of conditions and the following
+ *    disclaimer in the documentation and/or other materials provided
+ *    with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <math.h>
+#include "math_config.h"
+
+#if HAVE_FAST_FMA
+
+double
+fma (double x, double y, double z)
+{
+  asm ("vfma.f64 %P0, %P1, %P2" : "=w" (z) : "w" (x), "w" (y));
+  return z;
+}
+
+#endif
diff --git a/newlib/libm/machine/arm/sf_fma.c b/newlib/libm/machine/arm/sf_fma.c
new file mode 100644
index 000000000..4befd9017
--- /dev/null
+++ b/newlib/libm/machine/arm/sf_fma.c
@@ -0,0 +1,48 @@
+/*
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright © 2020 Keith Packard
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ *    copyright notice, this list of conditions and the following
+ *    disclaimer in the documentation and/or other materials provided
+ *    with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <math.h>
+#include "math_config.h"
+
+#if HAVE_FAST_FMAF
+
+float
+fmaf (float x, float y, float z)
+{
+  asm ("vfma.f32 %0, %1, %2" : "=t" (z) : "t" (x), "t" (y));
+  return z;
+}
+
+#endif
-- 
2.28.0


  parent reply	other threads:[~2020-08-08 22:34 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-08-08 22:34 [PATCH 0/3] ARM with only 32-bit floats do not have fast 64-bit FMA Keith Packard
2020-08-08 22:34 ` [PATCH 1/3] libm: ARM without HW double does not have fast FMA Keith Packard
2020-08-08 22:34 ` [PATCH 2/3] libm: Detect fast fmaf support Keith Packard
2020-08-08 22:34 ` Keith Packard [this message]
2020-08-10  9:30 ` [PATCH 0/3] ARM with only 32-bit floats do not have fast 64-bit FMA Corinna Vinschen
2020-08-10 14:43   ` Szabolcs Nagy
2020-08-10 15:19     ` Keith Packard
2020-08-10 19:06 ` Corinna Vinschen
2020-09-01 16:32 ` Sebastian Huber
2020-09-01 17:21   ` Sebastian Huber
2020-09-01 18:04     ` Sebastian Huber
2020-09-01 19:28       ` Keith Packard
2020-09-01 21:16         ` Joseph Myers
2020-09-01 23:06           ` Keith Packard
2020-09-02  4:41             ` Sebastian Huber
2020-09-02  5:25               ` Keith Packard
2020-09-02  5:35                 ` Keith Packard
2020-09-02 17:12               ` Joseph Myers
2020-09-02 17:59                 ` Sebastian Huber
2020-09-02 20:39                   ` Keith Packard
2020-09-01 19:50     ` Keith Packard

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200808223413.4015633-4-keithp@keithp.com \
    --to=keithp@keithp.com \
    --cc=newlib@sourceware.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).