From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 2155) id 4B7363857C53; Mon, 10 Aug 2020 19:06:39 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 4B7363857C53 MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Content-Type: text/plain; charset="utf-8" From: Corinna Vinschen To: newlib-cvs@sourceware.org Subject: [newlib-cygwin] libm/machine/arm: Add optimized fmaf and fma when available X-Act-Checkin: newlib-cygwin X-Git-Author: Keith Packard via Newlib X-Git-Refname: refs/heads/master X-Git-Oldrev: 0c1989070ee1d849c7a25ad2eb6c8e3fb1df6393 X-Git-Newrev: a44bc679a47403e5439ae46106a886fcb6240233 Message-Id: <20200810190639.4B7363857C53@sourceware.org> Date: Mon, 10 Aug 2020 19:06:39 +0000 (GMT) X-BeenThere: newlib-cvs@sourceware.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Newlib GIT logs List-Unsubscribe: , List-Archive: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 10 Aug 2020 19:06:39 -0000 https://sourceware.org/git/gitweb.cgi?p=newlib-cygwin.git;h=a44bc679a47403e5439ae46106a886fcb6240233 commit a44bc679a47403e5439ae46106a886fcb6240233 Author: Keith Packard via Newlib Date: Sat Aug 8 15:34:13 2020 -0700 libm/machine/arm: Add optimized fmaf and fma when available When HAVE_FAST_FMAF is set, use the vfma.f32 instruction, when HAVE_FAST_FMA is set, use the vfma.f64 instruction. Usually the compiler built-ins will already have inlined these instructions, but provide these symbols for cases where that doesn't work instead of falling back to the (inaccurate) common code versions. Signed-off-by: Keith Packard Diff: --- newlib/libm/common/s_fma.c | 4 ++++ newlib/libm/common/sf_fma.c | 4 ++++ newlib/libm/machine/arm/Makefile.am | 2 ++ newlib/libm/machine/arm/Makefile.in | 24 +++++++++++++++---- newlib/libm/machine/arm/s_fma.c | 48 +++++++++++++++++++++++++++++++++++++ newlib/libm/machine/arm/sf_fma.c | 48 +++++++++++++++++++++++++++++++++++++ 6 files changed, 125 insertions(+), 5 deletions(-) diff --git a/newlib/libm/common/s_fma.c b/newlib/libm/common/s_fma.c index ab9e525b0..15c7d23f5 100644 --- a/newlib/libm/common/s_fma.c +++ b/newlib/libm/common/s_fma.c @@ -38,6 +38,8 @@ ANSI C, POSIX. #include "fdlibm.h" +#if !HAVE_FAST_FMA + #ifndef _DOUBLE_IS_32BITS #ifdef __STDC__ @@ -54,3 +56,5 @@ ANSI C, POSIX. } #endif /* _DOUBLE_IS_32BITS */ + +#endif /* !HAVE_FAST_FMA */ diff --git a/newlib/libm/common/sf_fma.c b/newlib/libm/common/sf_fma.c index 4360f400b..ce7f13bb2 100644 --- a/newlib/libm/common/sf_fma.c +++ b/newlib/libm/common/sf_fma.c @@ -6,6 +6,8 @@ #include "fdlibm.h" +#if !HAVE_FAST_FMAF + #ifdef __STDC__ float fmaf(float x, float y, float z) #else @@ -25,6 +27,8 @@ return (float) (((double) x * (double) y) + (double) z); } +#endif + #ifdef _DOUBLE_IS_32BITS #ifdef __STDC__ diff --git a/newlib/libm/machine/arm/Makefile.am b/newlib/libm/machine/arm/Makefile.am index 6574c56c9..09a266c43 100644 --- a/newlib/libm/machine/arm/Makefile.am +++ b/newlib/libm/machine/arm/Makefile.am @@ -10,12 +10,14 @@ LIB_SOURCES = \ ef_sqrt.c \ s_ceil.c \ s_floor.c \ + s_fma.c \ s_nearbyint.c \ s_rint.c \ s_round.c \ s_trunc.c \ sf_ceil.c \ sf_floor.c \ + sf_fma.c \ sf_nearbyint.c \ sf_rint.c \ sf_round.c \ diff --git a/newlib/libm/machine/arm/Makefile.in b/newlib/libm/machine/arm/Makefile.in index 63de93443..e7bca5f66 100644 --- a/newlib/libm/machine/arm/Makefile.in +++ b/newlib/libm/machine/arm/Makefile.in @@ -54,8 +54,7 @@ build_triplet = @build@ host_triplet = @host@ DIST_COMMON = $(srcdir)/../../../Makefile.shared $(srcdir)/Makefile.in \ $(srcdir)/Makefile.am $(top_srcdir)/configure \ - $(am__configure_deps) $(srcdir)/../../../../mkinstalldirs \ - $(srcdir)/../../../../mkinstalldirs + $(am__configure_deps) $(srcdir)/../../../../mkinstalldirs subdir = . ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/../../../acinclude.m4 \ @@ -73,9 +72,10 @@ lib_a_AR = $(AR) $(ARFLAGS) lib_a_LIBADD = am__objects_1 = lib_a-e_sqrt.$(OBJEXT) lib_a-ef_sqrt.$(OBJEXT) \ lib_a-s_ceil.$(OBJEXT) lib_a-s_floor.$(OBJEXT) \ - lib_a-s_nearbyint.$(OBJEXT) lib_a-s_rint.$(OBJEXT) \ - lib_a-s_round.$(OBJEXT) lib_a-s_trunc.$(OBJEXT) \ - lib_a-sf_ceil.$(OBJEXT) lib_a-sf_floor.$(OBJEXT) \ + lib_a-s_fma.$(OBJEXT) lib_a-s_nearbyint.$(OBJEXT) \ + lib_a-s_rint.$(OBJEXT) lib_a-s_round.$(OBJEXT) \ + lib_a-s_trunc.$(OBJEXT) lib_a-sf_ceil.$(OBJEXT) \ + lib_a-sf_floor.$(OBJEXT) lib_a-sf_fma.$(OBJEXT) \ lib_a-sf_nearbyint.$(OBJEXT) lib_a-sf_rint.$(OBJEXT) \ lib_a-sf_round.$(OBJEXT) lib_a-sf_trunc.$(OBJEXT) \ lib_a-feclearexcept.$(OBJEXT) lib_a-fe_dfl_env.$(OBJEXT) \ @@ -216,12 +216,14 @@ LIB_SOURCES = \ ef_sqrt.c \ s_ceil.c \ s_floor.c \ + s_fma.c \ s_nearbyint.c \ s_rint.c \ s_round.c \ s_trunc.c \ sf_ceil.c \ sf_floor.c \ + sf_fma.c \ sf_nearbyint.c \ sf_rint.c \ sf_round.c \ @@ -342,6 +344,12 @@ lib_a-s_floor.o: s_floor.c lib_a-s_floor.obj: s_floor.c $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-s_floor.obj `if test -f 's_floor.c'; then $(CYGPATH_W) 's_floor.c'; else $(CYGPATH_W) '$(srcdir)/s_floor.c'; fi` +lib_a-s_fma.o: s_fma.c + $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-s_fma.o `test -f 's_fma.c' || echo '$(srcdir)/'`s_fma.c + +lib_a-s_fma.obj: s_fma.c + $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-s_fma.obj `if test -f 's_fma.c'; then $(CYGPATH_W) 's_fma.c'; else $(CYGPATH_W) '$(srcdir)/s_fma.c'; fi` + lib_a-s_nearbyint.o: s_nearbyint.c $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-s_nearbyint.o `test -f 's_nearbyint.c' || echo '$(srcdir)/'`s_nearbyint.c @@ -378,6 +386,12 @@ lib_a-sf_floor.o: sf_floor.c lib_a-sf_floor.obj: sf_floor.c $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-sf_floor.obj `if test -f 'sf_floor.c'; then $(CYGPATH_W) 'sf_floor.c'; else $(CYGPATH_W) '$(srcdir)/sf_floor.c'; fi` +lib_a-sf_fma.o: sf_fma.c + $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-sf_fma.o `test -f 'sf_fma.c' || echo '$(srcdir)/'`sf_fma.c + +lib_a-sf_fma.obj: sf_fma.c + $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-sf_fma.obj `if test -f 'sf_fma.c'; then $(CYGPATH_W) 'sf_fma.c'; else $(CYGPATH_W) '$(srcdir)/sf_fma.c'; fi` + lib_a-sf_nearbyint.o: sf_nearbyint.c $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-sf_nearbyint.o `test -f 'sf_nearbyint.c' || echo '$(srcdir)/'`sf_nearbyint.c diff --git a/newlib/libm/machine/arm/s_fma.c b/newlib/libm/machine/arm/s_fma.c new file mode 100644 index 000000000..f945419b5 --- /dev/null +++ b/newlib/libm/machine/arm/s_fma.c @@ -0,0 +1,48 @@ +/* + * SPDX-License-Identifier: BSD-3-Clause + * + * Copyright © 2020 Keith Packard + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include "math_config.h" + +#if HAVE_FAST_FMA + +double +fma (double x, double y, double z) +{ + asm ("vfma.f64 %P0, %P1, %P2" : "=w" (z) : "w" (x), "w" (y)); + return z; +} + +#endif diff --git a/newlib/libm/machine/arm/sf_fma.c b/newlib/libm/machine/arm/sf_fma.c new file mode 100644 index 000000000..4befd9017 --- /dev/null +++ b/newlib/libm/machine/arm/sf_fma.c @@ -0,0 +1,48 @@ +/* + * SPDX-License-Identifier: BSD-3-Clause + * + * Copyright © 2020 Keith Packard + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include "math_config.h" + +#if HAVE_FAST_FMAF + +float +fmaf (float x, float y, float z) +{ + asm ("vfma.f32 %0, %1, %2" : "=t" (z) : "t" (x), "t" (y)); + return z; +} + +#endif