public inbox for glibc-cvs@sourceware.org
help / color / mirror / Atom feed
* [glibc/nsz/mathvec] Aarch64: Add simd exp/expf ABI symbols
@ 2019-07-15 10:31 Szabolcs Nagy
  0 siblings, 0 replies; 2+ messages in thread
From: Szabolcs Nagy @ 2019-07-15 10:31 UTC (permalink / raw)
  To: glibc-cvs

https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=f34706523b93e2ea126e895fb3c985c562c45b5b

commit f34706523b93e2ea126e895fb3c985c562c45b5b
Author: Szabolcs Nagy <szabolcs.nagy@arm.com>
Date:   Fri Jun 28 11:29:19 2019 +0100

    Aarch64: Add simd exp/expf ABI symbols
    
    The implementation is in assembly and just calls the scalar math code.
    This ensures that old compiler without vector call abi support can
    build libmvec. The abi is supported since GCC 9.1, the specification is
    
    https://developer.arm.com/tools-and-software/server-and-hpc/arm-architecture-tools/arm-compiler-for-hpc/vector-function-abi
    
    Vector functions require a STO_AARCH64_VARIANT_PCS marking in the
    dynamic symbol table for lazy bound calls to work. This will be
    missing in libmvec, which works because the marking only affects
    the behaviour if there are calls to the symbols in the binary.
    
    Testing requires vector call abi support, which is detected.
    
    Header declarations are not added yet, so the symbols will not be used
    by the compiler: they are just added so the abi is in place which
    enables backporting later. Currently we cannot add correct declarations
    that only declare the specific symbols we provide: the OpenMP pragma
    mechanism would declare both AdvSIMD and SVE variants.
    
    2019-07-15  Steve Ellcey  <sellcey@marvell.com>
    	    Szabolcs Nagy  <szabolcs.nagy@arm.com>
    
    	* sysdeps/aarch64/configure.ac (build_mathvec): Enable.
    	(test-mathvec): Enable if ABI is supported.
    	* sysdeps/aarch64/configure: Regenerate.
    	* sysdeps/aarch64/fpu/Makefile
    	(libmvec-support): Add libmvec_double_vlen2_exp,
    	libmvec_float_vlen4_expf to list.
    	(libmvec-tests): Add double-vlen2, float-vlen4 to list.
    	(double-vlen2-funcs): Add new vector function name.
    	(float-vlen4-funcs): Add new vector function name.
    	* sysdeps/aarch64/fpu/Versions: New file.
    	* sysdeps/aarch64/fpu/libmvec_double_vlen2.h: New file.
    	* sysdeps/aarch64/fpu/libmvec_double_vlen2_exp.S: New file.
    	* sysdeps/aarch64/fpu/libmvec_float_vlen4.h: New file.
    	* sysdeps/aarch64/fpu/libmvec_float_vlen4_expf.S: New file.
    	* sysdeps/aarch64/fpu/test-double-vlen2-wrappers.c: New file.
    	* sysdeps/aarch64/fpu/test-float-vlen4-wrappers.c: New file.
    	* sysdeps/aarch64/libm-test-ulps (exp_vlen2): New entry.
    	(exp_vlen4): Likewise.
    	* sysdeps/unix/sysv/linux/aarch64/libmvec.abilist: New file.

Diff:
---
 sysdeps/aarch64/configure                        | 31 +++++++++++
 sysdeps/aarch64/configure.ac                     | 24 +++++++++
 sysdeps/aarch64/fpu/Makefile                     | 18 +++++++
 sysdeps/aarch64/fpu/Versions                     |  5 ++
 sysdeps/aarch64/fpu/libmvec_double_vlen2.h       | 59 +++++++++++++++++++++
 sysdeps/aarch64/fpu/libmvec_double_vlen2_exp.S   | 21 ++++++++
 sysdeps/aarch64/fpu/libmvec_float_vlen4.h        | 65 ++++++++++++++++++++++++
 sysdeps/aarch64/fpu/libmvec_float_vlen4_expf.S   | 21 ++++++++
 sysdeps/aarch64/fpu/test-double-vlen2-wrappers.c | 28 ++++++++++
 sysdeps/aarch64/fpu/test-float-vlen4-wrappers.c  | 28 ++++++++++
 sysdeps/aarch64/libm-test-ulps                   |  6 +++
 sysdeps/unix/sysv/linux/aarch64/libmvec.abilist  |  2 +
 12 files changed, 308 insertions(+)

diff --git a/sysdeps/aarch64/configure b/sysdeps/aarch64/configure
index 5bd355a..df15cdb 100644
--- a/sysdeps/aarch64/configure
+++ b/sysdeps/aarch64/configure
@@ -172,3 +172,34 @@ else
   config_vars="$config_vars
 default-abi = lp64"
 fi
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for pcs attribute support" >&5
+$as_echo_n "checking for pcs attribute support... " >&6; }
+if ${libc_cv_gcc_pcs_attribute+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat > conftest.c <<EOF
+__attribute__((aarch64_vector_pcs)) extern void foo (void);
+EOF
+libc_cv_gcc_pcs_attribute=no
+if ${CC-cc} -c -Wall -Werror conftest.c -o conftest.o 1>&5 \
+   2>&5 ; then
+  libc_cv_gcc_pcs_attribute=yes
+fi
+rm -f conftest*
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_gcc_pcs_attribute" >&5
+$as_echo "$libc_cv_gcc_pcs_attribute" >&6; }
+
+# Enable libmvec by default.
+if test x"$build_mathvec" = xnotset; then
+  build_mathvec=yes
+fi
+
+# Only test libmvec if the compiler supports aarch64_vector_pcs.
+if test x"$build_mathvec" = xyes; then
+  if test $libc_cv_gcc_pcs_attribute = yes; then
+    config_vars="$config_vars
+test-mathvec = yes"
+  fi
+fi
diff --git a/sysdeps/aarch64/configure.ac b/sysdeps/aarch64/configure.ac
index 7851dd4..eab411c 100644
--- a/sysdeps/aarch64/configure.ac
+++ b/sysdeps/aarch64/configure.ac
@@ -20,3 +20,27 @@ if test $libc_cv_aarch64_be = yes; then
 else
   LIBC_CONFIG_VAR([default-abi], [lp64])
 fi
+
+AC_CACHE_CHECK([for pcs attribute support],
+               libc_cv_gcc_pcs_attribute, [dnl
+cat > conftest.c <<EOF
+__attribute__((aarch64_vector_pcs)) extern void foo (void);
+EOF
+libc_cv_gcc_pcs_attribute=no
+if ${CC-cc} -c -Wall -Werror conftest.c -o conftest.o 1>&AS_MESSAGE_LOG_FD \
+   2>&AS_MESSAGE_LOG_FD ; then
+  libc_cv_gcc_pcs_attribute=yes
+fi
+rm -f conftest*])
+
+# Enable libmvec by default.
+if test x"$build_mathvec" = xnotset; then
+  build_mathvec=yes
+fi
+
+# Only test libmvec if the compiler supports aarch64_vector_pcs.
+if test x"$build_mathvec" = xyes; then
+  if test $libc_cv_gcc_pcs_attribute = yes; then
+    LIBC_CONFIG_VAR([test-mathvec], [yes])
+  fi
+fi
diff --git a/sysdeps/aarch64/fpu/Makefile b/sysdeps/aarch64/fpu/Makefile
index 4a182bd..f7939d0 100644
--- a/sysdeps/aarch64/fpu/Makefile
+++ b/sysdeps/aarch64/fpu/Makefile
@@ -12,3 +12,21 @@ CFLAGS-s_fmaxf.c += -ffinite-math-only
 CFLAGS-s_fmin.c += -ffinite-math-only
 CFLAGS-s_fminf.c += -ffinite-math-only
 endif
+
+ifeq ($(subdir),mathvec)
+libmvec-support += \
+  libmvec_double_vlen2_exp \
+  libmvec_float_vlen4_expf \
+
+libmvec-static-only-routines = non-existing-routine
+endif
+
+ifeq ($(subdir),math)
+ifeq ($(build-mathvec),yes)
+double-vlen2-funcs = exp
+float-vlen4-funcs = exp
+ifeq ($(test-mathvec),yes)
+libmvec-tests += double-vlen2 float-vlen4
+endif
+endif
+endif
diff --git a/sysdeps/aarch64/fpu/Versions b/sysdeps/aarch64/fpu/Versions
new file mode 100644
index 0000000..da36f3c
--- /dev/null
+++ b/sysdeps/aarch64/fpu/Versions
@@ -0,0 +1,5 @@
+libmvec {
+  GLIBC_2.30 {
+    _ZGVnN2v_exp; _ZGVnN4v_expf;
+  }
+}
diff --git a/sysdeps/aarch64/fpu/libmvec_double_vlen2.h b/sysdeps/aarch64/fpu/libmvec_double_vlen2.h
new file mode 100644
index 0000000..383980d
--- /dev/null
+++ b/sysdeps/aarch64/fpu/libmvec_double_vlen2.h
@@ -0,0 +1,59 @@
+/* Double-precision 2 element vector function template.
+   Copyright (C) 2019 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+ENTRY (VECTOR_FUNCTION)
+	stp	x29, x30, [sp, -288]!
+	cfi_adjust_cfa_offset (288)
+	cfi_rel_offset (x29, 0)
+	cfi_rel_offset (x30, 8)
+	mov	x29, sp
+	stp	 q8,  q9, [sp, 16]
+	stp	q10, q11, [sp, 48]
+	stp	q12, q13, [sp, 80]
+	stp	q14, q15, [sp, 112]
+	stp	q16, q17, [sp, 144]
+	stp	q18, q19, [sp, 176]
+	stp	q20, q21, [sp, 208]
+	stp	q22, q23, [sp, 240]
+
+	// Use per lane load/store to avoid endianness issues.
+	str	q0, [sp, 272]
+	ldr	d0, [sp, 272]
+	bl SCALAR_FUNCTION
+	str	d0, [sp, 272]
+	ldr	d0, [sp, 280]
+	bl SCALAR_FUNCTION
+	str	d0, [sp, 280]
+	ldr	q0, [sp, 272]
+
+	ldp	q8, q9, [sp, 16]
+	ldp	q10, q11, [sp, 48]
+	ldp	q12, q13, [sp, 80]
+	ldp	q14, q15, [sp, 112]
+	ldp	q16, q17, [sp, 144]
+	ldp	q18, q19, [sp, 176]
+	ldp	q20, q21, [sp, 208]
+	ldp	q22, q23, [sp, 240]
+	ldp	x29, x30, [sp], 288
+	cfi_adjust_cfa_offset (288)
+	cfi_restore (x29)
+	cfi_restore (x30)
+	ret
+END (VECTOR_FUNCTION)
diff --git a/sysdeps/aarch64/fpu/libmvec_double_vlen2_exp.S b/sysdeps/aarch64/fpu/libmvec_double_vlen2_exp.S
new file mode 100644
index 0000000..644405c
--- /dev/null
+++ b/sysdeps/aarch64/fpu/libmvec_double_vlen2_exp.S
@@ -0,0 +1,21 @@
+/* Double-precision 2 element vector e^x function.
+   Copyright (C) 2019 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define SCALAR_FUNCTION exp
+#define VECTOR_FUNCTION _ZGVnN2v_exp
+#include "libmvec_double_vlen2.h"
diff --git a/sysdeps/aarch64/fpu/libmvec_float_vlen4.h b/sysdeps/aarch64/fpu/libmvec_float_vlen4.h
new file mode 100644
index 0000000..2450309
--- /dev/null
+++ b/sysdeps/aarch64/fpu/libmvec_float_vlen4.h
@@ -0,0 +1,65 @@
+/* Single-precision 4 element vector function template.
+   Copyright (C) 2019 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+ENTRY (VECTOR_FUNCTION)
+	stp	x29, x30, [sp, -288]!
+	cfi_adjust_cfa_offset (288)
+	cfi_rel_offset (x29, 0)
+	cfi_rel_offset (x30, 8)
+	mov	x29, sp
+	stp	 q8,  q9, [sp, 16]
+	stp	q10, q11, [sp, 48]
+	stp	q12, q13, [sp, 80]
+	stp	q14, q15, [sp, 112]
+	stp	q16, q17, [sp, 144]
+	stp	q18, q19, [sp, 176]
+	stp	q20, q21, [sp, 208]
+	stp	q22, q23, [sp, 240]
+
+	// Use per lane load/store to avoid endianness issues.
+	str	q0, [sp, 272]
+	ldr	s0, [sp, 272]
+	bl SCALAR_FUNCTION
+	str	s0, [sp, 272]
+	ldr	s0, [sp, 276]
+	bl SCALAR_FUNCTION
+	str	s0, [sp, 276]
+	ldr	s0, [sp, 280]
+	bl SCALAR_FUNCTION
+	str	s0, [sp, 280]
+	ldr	s0, [sp, 284]
+	bl SCALAR_FUNCTION
+	str	s0, [sp, 284]
+	ldr	q0, [sp, 272]
+
+	ldp	q8, q9, [sp, 16]
+	ldp	q10, q11, [sp, 48]
+	ldp	q12, q13, [sp, 80]
+	ldp	q14, q15, [sp, 112]
+	ldp	q16, q17, [sp, 144]
+	ldp	q18, q19, [sp, 176]
+	ldp	q20, q21, [sp, 208]
+	ldp	q22, q23, [sp, 240]
+	ldp	x29, x30, [sp], 288
+	cfi_adjust_cfa_offset (288)
+	cfi_restore (x29)
+	cfi_restore (x30)
+	ret
+END (VECTOR_FUNCTION)
diff --git a/sysdeps/aarch64/fpu/libmvec_float_vlen4_expf.S b/sysdeps/aarch64/fpu/libmvec_float_vlen4_expf.S
new file mode 100644
index 0000000..ab76ea0
--- /dev/null
+++ b/sysdeps/aarch64/fpu/libmvec_float_vlen4_expf.S
@@ -0,0 +1,21 @@
+/* Single-precision 4 element vector e^x function.
+   Copyright (C) 2019 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define SCALAR_FUNCTION expf
+#define VECTOR_FUNCTION _ZGVnN4v_expf
+#include "libmvec_float_vlen4.h"
diff --git a/sysdeps/aarch64/fpu/test-double-vlen2-wrappers.c b/sysdeps/aarch64/fpu/test-double-vlen2-wrappers.c
new file mode 100644
index 0000000..6c6c44d
--- /dev/null
+++ b/sysdeps/aarch64/fpu/test-double-vlen2-wrappers.c
@@ -0,0 +1,28 @@
+/* Wrapper part of tests for aarch64 double vector math functions.
+   Copyright (C) 2019 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <arm_neon.h>
+#include "test-double-vlen2.h"
+
+#define VEC_TYPE float64x2_t
+
+/* Hack: VECTOR_WRAPPER declares the vector function without the pcs attribute,
+   placing it here happens to work, should be fixed in test-math-vector.h.  */
+__attribute__ ((aarch64_vector_pcs))
+
+VECTOR_WRAPPER (WRAPPER_NAME (exp), _ZGVnN2v_exp)
diff --git a/sysdeps/aarch64/fpu/test-float-vlen4-wrappers.c b/sysdeps/aarch64/fpu/test-float-vlen4-wrappers.c
new file mode 100644
index 0000000..5117633
--- /dev/null
+++ b/sysdeps/aarch64/fpu/test-float-vlen4-wrappers.c
@@ -0,0 +1,28 @@
+/* Wrapper part of tests for float aarch64 vector math functions.
+   Copyright (C) 2019 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <arm_neon.h>
+#include "test-float-vlen4.h"
+
+#define VEC_TYPE float32x4_t
+
+/* Hack: VECTOR_WRAPPER declares the vector function without the pcs attribute,
+   placing it here happens to work, should be fixed in test-math-vector.h.  */
+__attribute__ ((aarch64_vector_pcs))
+
+VECTOR_WRAPPER (WRAPPER_NAME (expf), _ZGVnN4v_expf)
diff --git a/sysdeps/aarch64/libm-test-ulps b/sysdeps/aarch64/libm-test-ulps
index 585e5bb..1ed4af9 100644
--- a/sysdeps/aarch64/libm-test-ulps
+++ b/sysdeps/aarch64/libm-test-ulps
@@ -1601,6 +1601,12 @@ float: 1
 idouble: 1
 ifloat: 1
 
+Function: "exp_vlen2":
+double: 1
+
+Function: "exp_vlen4":
+float: 1
+
 Function: "expm1":
 double: 1
 float: 1
diff --git a/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist b/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist
new file mode 100644
index 0000000..9e17825
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist
@@ -0,0 +1,2 @@
+GLIBC_2.30 _ZGVnN2v_exp F
+GLIBC_2.30 _ZGVnN4v_expf F


^ permalink raw reply	[flat|nested] 2+ messages in thread

* [glibc/nsz/mathvec] Aarch64: Add simd exp/expf ABI symbols
@ 2019-07-16 10:25 Szabolcs Nagy
  0 siblings, 0 replies; 2+ messages in thread
From: Szabolcs Nagy @ 2019-07-16 10:25 UTC (permalink / raw)
  To: glibc-cvs

https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=80792b61024d810c3d36e9a1dd8a166249d8b6e8

commit 80792b61024d810c3d36e9a1dd8a166249d8b6e8
Author: Szabolcs Nagy <szabolcs.nagy@arm.com>
Date:   Fri Jun 28 11:29:19 2019 +0100

    Aarch64: Add simd exp/expf ABI symbols
    
    The implementation is in assembly and just calls the scalar math code.
    This ensures that old compiler without vector call abi support can
    build libmvec. The abi is supported since GCC 9.1, the specification is
    
    https://developer.arm.com/tools-and-software/server-and-hpc/arm-architecture-tools/arm-compiler-for-hpc/vector-function-abi
    
    Vector functions require a STO_AARCH64_VARIANT_PCS marking in the
    dynamic symbol table for lazy bound calls to work. This will be
    missing in libmvec, which works because the marking only affects
    the behaviour if there are calls to the symbols in the binary.
    
    Testing requires vector call abi support, which is detected.
    
    Header declarations are not added yet, so the symbols will not be used
    by the compiler: they are just added so the abi is in place which
    enables backporting later. Currently we cannot add correct declarations
    that only declare the specific symbols we provide: the OpenMP pragma
    mechanism would declare both AdvSIMD and SVE variants.
    
    2019-07-15  Steve Ellcey  <sellcey@marvell.com>
    	    Szabolcs Nagy  <szabolcs.nagy@arm.com>
    
    	* sysdeps/aarch64/configure.ac (build_mathvec): Enable.
    	(test-mathvec): Enable if ABI is supported.
    	* sysdeps/aarch64/configure: Regenerate.
    	* sysdeps/aarch64/fpu/Makefile
    	(libmvec-support): Add libmvec_double_vlen2_exp,
    	libmvec_float_vlen4_expf to list.
    	(libmvec_nonshared.a): Use make-dummy-lib.
    	(libmvec-tests): Add double-vlen2, float-vlen4 to list.
    	(double-vlen2-funcs): Add new vector function name.
    	(float-vlen4-funcs): Add new vector function name.
    	* sysdeps/aarch64/fpu/Versions: New file.
    	* sysdeps/aarch64/fpu/libmvec_double_vlen2.h: New file.
    	* sysdeps/aarch64/fpu/libmvec_double_vlen2_exp.S: New file.
    	* sysdeps/aarch64/fpu/libmvec_float_vlen4.h: New file.
    	* sysdeps/aarch64/fpu/libmvec_float_vlen4_expf.S: New file.
    	* sysdeps/aarch64/fpu/test-double-vlen2-wrappers.c: New file.
    	* sysdeps/aarch64/fpu/test-float-vlen4-wrappers.c: New file.
    	* sysdeps/aarch64/libm-test-ulps (exp_vlen2): New entry.
    	(exp_vlen4): Likewise.
    	* sysdeps/unix/sysv/linux/aarch64/libmvec.abilist: New file.

Diff:
---
 sysdeps/aarch64/configure                        | 31 +++++++++++
 sysdeps/aarch64/configure.ac                     | 24 +++++++++
 sysdeps/aarch64/fpu/Makefile                     | 19 +++++++
 sysdeps/aarch64/fpu/Versions                     |  5 ++
 sysdeps/aarch64/fpu/libmvec_double_vlen2.h       | 59 +++++++++++++++++++++
 sysdeps/aarch64/fpu/libmvec_double_vlen2_exp.S   | 21 ++++++++
 sysdeps/aarch64/fpu/libmvec_float_vlen4.h        | 65 ++++++++++++++++++++++++
 sysdeps/aarch64/fpu/libmvec_float_vlen4_expf.S   | 21 ++++++++
 sysdeps/aarch64/fpu/test-double-vlen2-wrappers.c | 28 ++++++++++
 sysdeps/aarch64/fpu/test-float-vlen4-wrappers.c  | 28 ++++++++++
 sysdeps/aarch64/libm-test-ulps                   |  6 +++
 sysdeps/unix/sysv/linux/aarch64/libmvec.abilist  |  2 +
 12 files changed, 309 insertions(+)

diff --git a/sysdeps/aarch64/configure b/sysdeps/aarch64/configure
index 5bd355a..df15cdb 100644
--- a/sysdeps/aarch64/configure
+++ b/sysdeps/aarch64/configure
@@ -172,3 +172,34 @@ else
   config_vars="$config_vars
 default-abi = lp64"
 fi
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for pcs attribute support" >&5
+$as_echo_n "checking for pcs attribute support... " >&6; }
+if ${libc_cv_gcc_pcs_attribute+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat > conftest.c <<EOF
+__attribute__((aarch64_vector_pcs)) extern void foo (void);
+EOF
+libc_cv_gcc_pcs_attribute=no
+if ${CC-cc} -c -Wall -Werror conftest.c -o conftest.o 1>&5 \
+   2>&5 ; then
+  libc_cv_gcc_pcs_attribute=yes
+fi
+rm -f conftest*
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_gcc_pcs_attribute" >&5
+$as_echo "$libc_cv_gcc_pcs_attribute" >&6; }
+
+# Enable libmvec by default.
+if test x"$build_mathvec" = xnotset; then
+  build_mathvec=yes
+fi
+
+# Only test libmvec if the compiler supports aarch64_vector_pcs.
+if test x"$build_mathvec" = xyes; then
+  if test $libc_cv_gcc_pcs_attribute = yes; then
+    config_vars="$config_vars
+test-mathvec = yes"
+  fi
+fi
diff --git a/sysdeps/aarch64/configure.ac b/sysdeps/aarch64/configure.ac
index 7851dd4..eab411c 100644
--- a/sysdeps/aarch64/configure.ac
+++ b/sysdeps/aarch64/configure.ac
@@ -20,3 +20,27 @@ if test $libc_cv_aarch64_be = yes; then
 else
   LIBC_CONFIG_VAR([default-abi], [lp64])
 fi
+
+AC_CACHE_CHECK([for pcs attribute support],
+               libc_cv_gcc_pcs_attribute, [dnl
+cat > conftest.c <<EOF
+__attribute__((aarch64_vector_pcs)) extern void foo (void);
+EOF
+libc_cv_gcc_pcs_attribute=no
+if ${CC-cc} -c -Wall -Werror conftest.c -o conftest.o 1>&AS_MESSAGE_LOG_FD \
+   2>&AS_MESSAGE_LOG_FD ; then
+  libc_cv_gcc_pcs_attribute=yes
+fi
+rm -f conftest*])
+
+# Enable libmvec by default.
+if test x"$build_mathvec" = xnotset; then
+  build_mathvec=yes
+fi
+
+# Only test libmvec if the compiler supports aarch64_vector_pcs.
+if test x"$build_mathvec" = xyes; then
+  if test $libc_cv_gcc_pcs_attribute = yes; then
+    LIBC_CONFIG_VAR([test-mathvec], [yes])
+  fi
+fi
diff --git a/sysdeps/aarch64/fpu/Makefile b/sysdeps/aarch64/fpu/Makefile
index 4a182bd..2841c03 100644
--- a/sysdeps/aarch64/fpu/Makefile
+++ b/sysdeps/aarch64/fpu/Makefile
@@ -12,3 +12,22 @@ CFLAGS-s_fmaxf.c += -ffinite-math-only
 CFLAGS-s_fmin.c += -ffinite-math-only
 CFLAGS-s_fminf.c += -ffinite-math-only
 endif
+
+ifeq ($(subdir),mathvec)
+libmvec-support += \
+  libmvec_double_vlen2_exp \
+  libmvec_float_vlen4_expf \
+
+install-lib += libmvec_nonshared.a
+$(objpfx)libmvec_nonshared.a: $(dep-dummy-lib); $(make-dummy-lib)
+endif
+
+ifeq ($(subdir),math)
+ifeq ($(build-mathvec),yes)
+double-vlen2-funcs = exp
+float-vlen4-funcs = exp
+ifeq ($(test-mathvec),yes)
+libmvec-tests += double-vlen2 float-vlen4
+endif
+endif
+endif
diff --git a/sysdeps/aarch64/fpu/Versions b/sysdeps/aarch64/fpu/Versions
new file mode 100644
index 0000000..da36f3c
--- /dev/null
+++ b/sysdeps/aarch64/fpu/Versions
@@ -0,0 +1,5 @@
+libmvec {
+  GLIBC_2.30 {
+    _ZGVnN2v_exp; _ZGVnN4v_expf;
+  }
+}
diff --git a/sysdeps/aarch64/fpu/libmvec_double_vlen2.h b/sysdeps/aarch64/fpu/libmvec_double_vlen2.h
new file mode 100644
index 0000000..383980d
--- /dev/null
+++ b/sysdeps/aarch64/fpu/libmvec_double_vlen2.h
@@ -0,0 +1,59 @@
+/* Double-precision 2 element vector function template.
+   Copyright (C) 2019 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+ENTRY (VECTOR_FUNCTION)
+	stp	x29, x30, [sp, -288]!
+	cfi_adjust_cfa_offset (288)
+	cfi_rel_offset (x29, 0)
+	cfi_rel_offset (x30, 8)
+	mov	x29, sp
+	stp	 q8,  q9, [sp, 16]
+	stp	q10, q11, [sp, 48]
+	stp	q12, q13, [sp, 80]
+	stp	q14, q15, [sp, 112]
+	stp	q16, q17, [sp, 144]
+	stp	q18, q19, [sp, 176]
+	stp	q20, q21, [sp, 208]
+	stp	q22, q23, [sp, 240]
+
+	// Use per lane load/store to avoid endianness issues.
+	str	q0, [sp, 272]
+	ldr	d0, [sp, 272]
+	bl SCALAR_FUNCTION
+	str	d0, [sp, 272]
+	ldr	d0, [sp, 280]
+	bl SCALAR_FUNCTION
+	str	d0, [sp, 280]
+	ldr	q0, [sp, 272]
+
+	ldp	q8, q9, [sp, 16]
+	ldp	q10, q11, [sp, 48]
+	ldp	q12, q13, [sp, 80]
+	ldp	q14, q15, [sp, 112]
+	ldp	q16, q17, [sp, 144]
+	ldp	q18, q19, [sp, 176]
+	ldp	q20, q21, [sp, 208]
+	ldp	q22, q23, [sp, 240]
+	ldp	x29, x30, [sp], 288
+	cfi_adjust_cfa_offset (288)
+	cfi_restore (x29)
+	cfi_restore (x30)
+	ret
+END (VECTOR_FUNCTION)
diff --git a/sysdeps/aarch64/fpu/libmvec_double_vlen2_exp.S b/sysdeps/aarch64/fpu/libmvec_double_vlen2_exp.S
new file mode 100644
index 0000000..644405c
--- /dev/null
+++ b/sysdeps/aarch64/fpu/libmvec_double_vlen2_exp.S
@@ -0,0 +1,21 @@
+/* Double-precision 2 element vector e^x function.
+   Copyright (C) 2019 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define SCALAR_FUNCTION exp
+#define VECTOR_FUNCTION _ZGVnN2v_exp
+#include "libmvec_double_vlen2.h"
diff --git a/sysdeps/aarch64/fpu/libmvec_float_vlen4.h b/sysdeps/aarch64/fpu/libmvec_float_vlen4.h
new file mode 100644
index 0000000..2450309
--- /dev/null
+++ b/sysdeps/aarch64/fpu/libmvec_float_vlen4.h
@@ -0,0 +1,65 @@
+/* Single-precision 4 element vector function template.
+   Copyright (C) 2019 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+ENTRY (VECTOR_FUNCTION)
+	stp	x29, x30, [sp, -288]!
+	cfi_adjust_cfa_offset (288)
+	cfi_rel_offset (x29, 0)
+	cfi_rel_offset (x30, 8)
+	mov	x29, sp
+	stp	 q8,  q9, [sp, 16]
+	stp	q10, q11, [sp, 48]
+	stp	q12, q13, [sp, 80]
+	stp	q14, q15, [sp, 112]
+	stp	q16, q17, [sp, 144]
+	stp	q18, q19, [sp, 176]
+	stp	q20, q21, [sp, 208]
+	stp	q22, q23, [sp, 240]
+
+	// Use per lane load/store to avoid endianness issues.
+	str	q0, [sp, 272]
+	ldr	s0, [sp, 272]
+	bl SCALAR_FUNCTION
+	str	s0, [sp, 272]
+	ldr	s0, [sp, 276]
+	bl SCALAR_FUNCTION
+	str	s0, [sp, 276]
+	ldr	s0, [sp, 280]
+	bl SCALAR_FUNCTION
+	str	s0, [sp, 280]
+	ldr	s0, [sp, 284]
+	bl SCALAR_FUNCTION
+	str	s0, [sp, 284]
+	ldr	q0, [sp, 272]
+
+	ldp	q8, q9, [sp, 16]
+	ldp	q10, q11, [sp, 48]
+	ldp	q12, q13, [sp, 80]
+	ldp	q14, q15, [sp, 112]
+	ldp	q16, q17, [sp, 144]
+	ldp	q18, q19, [sp, 176]
+	ldp	q20, q21, [sp, 208]
+	ldp	q22, q23, [sp, 240]
+	ldp	x29, x30, [sp], 288
+	cfi_adjust_cfa_offset (288)
+	cfi_restore (x29)
+	cfi_restore (x30)
+	ret
+END (VECTOR_FUNCTION)
diff --git a/sysdeps/aarch64/fpu/libmvec_float_vlen4_expf.S b/sysdeps/aarch64/fpu/libmvec_float_vlen4_expf.S
new file mode 100644
index 0000000..ab76ea0
--- /dev/null
+++ b/sysdeps/aarch64/fpu/libmvec_float_vlen4_expf.S
@@ -0,0 +1,21 @@
+/* Single-precision 4 element vector e^x function.
+   Copyright (C) 2019 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define SCALAR_FUNCTION expf
+#define VECTOR_FUNCTION _ZGVnN4v_expf
+#include "libmvec_float_vlen4.h"
diff --git a/sysdeps/aarch64/fpu/test-double-vlen2-wrappers.c b/sysdeps/aarch64/fpu/test-double-vlen2-wrappers.c
new file mode 100644
index 0000000..6c6c44d
--- /dev/null
+++ b/sysdeps/aarch64/fpu/test-double-vlen2-wrappers.c
@@ -0,0 +1,28 @@
+/* Wrapper part of tests for aarch64 double vector math functions.
+   Copyright (C) 2019 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <arm_neon.h>
+#include "test-double-vlen2.h"
+
+#define VEC_TYPE float64x2_t
+
+/* Hack: VECTOR_WRAPPER declares the vector function without the pcs attribute,
+   placing it here happens to work, should be fixed in test-math-vector.h.  */
+__attribute__ ((aarch64_vector_pcs))
+
+VECTOR_WRAPPER (WRAPPER_NAME (exp), _ZGVnN2v_exp)
diff --git a/sysdeps/aarch64/fpu/test-float-vlen4-wrappers.c b/sysdeps/aarch64/fpu/test-float-vlen4-wrappers.c
new file mode 100644
index 0000000..5117633
--- /dev/null
+++ b/sysdeps/aarch64/fpu/test-float-vlen4-wrappers.c
@@ -0,0 +1,28 @@
+/* Wrapper part of tests for float aarch64 vector math functions.
+   Copyright (C) 2019 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <arm_neon.h>
+#include "test-float-vlen4.h"
+
+#define VEC_TYPE float32x4_t
+
+/* Hack: VECTOR_WRAPPER declares the vector function without the pcs attribute,
+   placing it here happens to work, should be fixed in test-math-vector.h.  */
+__attribute__ ((aarch64_vector_pcs))
+
+VECTOR_WRAPPER (WRAPPER_NAME (expf), _ZGVnN4v_expf)
diff --git a/sysdeps/aarch64/libm-test-ulps b/sysdeps/aarch64/libm-test-ulps
index 585e5bb..1ed4af9 100644
--- a/sysdeps/aarch64/libm-test-ulps
+++ b/sysdeps/aarch64/libm-test-ulps
@@ -1601,6 +1601,12 @@ float: 1
 idouble: 1
 ifloat: 1
 
+Function: "exp_vlen2":
+double: 1
+
+Function: "exp_vlen4":
+float: 1
+
 Function: "expm1":
 double: 1
 float: 1
diff --git a/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist b/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist
new file mode 100644
index 0000000..9e17825
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist
@@ -0,0 +1,2 @@
+GLIBC_2.30 _ZGVnN2v_exp F
+GLIBC_2.30 _ZGVnN4v_expf F


^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2019-07-16 10:25 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-07-15 10:31 [glibc/nsz/mathvec] Aarch64: Add simd exp/expf ABI symbols Szabolcs Nagy
2019-07-16 10:25 Szabolcs Nagy

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).