[RFC V2] Enable libmvec support for RISC-V

public inbox for libc-alpha@sourceware.org
 help / color / mirror / Atom feed

* [RFC V2] Enable libmvec support for RISC-V
@ 2024-04-09  2:06 shiyulong
  2024-04-09  8:01 ` Andreas Schwab
  0 siblings, 1 reply; 2+ messages in thread
From: shiyulong @ 2024-04-09  2:06 UTC (permalink / raw)
  To: libc-alpha
  Cc: palmer, darius, andrew, maskray, kito.cheng, wuwei2016, jiawei,
	shihua, chenyixuan, yulong

From: yulong <shiyulong@iscas.ac.cn>

Diff: Add check for notset
This patch tries to enable libmvec on RISC-V. I also have demonstrated
how this all fits together by adding implementations for vector cos.
This patch is a try and we hope to receive valuable comments.

Thanks,
yulong

---
 sysdeps/riscv/configure                       |   4 +
 sysdeps/riscv/configure.ac                    |   4 +
 sysdeps/riscv/rvd/Makefile                    |   5 +
 sysdeps/riscv/rvd/Versions                    |   5 +
 sysdeps/riscv/rvd/bits/math-vector.h          |  29 ++++
 sysdeps/riscv/rvd/cos.c                       |  94 ++++++++++++
 sysdeps/riscv/rvd/math_private.h              |  42 ++++++
 sysdeps/riscv/rvd/v_math.h                    | 139 ++++++++++++++++++
 sysdeps/riscv/rvd/vecmath_config.h            |  33 +++++
 sysdeps/unix/sysv/linux/riscv/libmvec.abilist |   1 +
 10 files changed, 356 insertions(+)
 mode change 100644 => 100755 sysdeps/riscv/configure
 create mode 100644 sysdeps/riscv/rvd/Makefile
 create mode 100644 sysdeps/riscv/rvd/Versions
 create mode 100644 sysdeps/riscv/rvd/bits/math-vector.h
 create mode 100644 sysdeps/riscv/rvd/cos.c
 create mode 100644 sysdeps/riscv/rvd/math_private.h
 create mode 100644 sysdeps/riscv/rvd/v_math.h
 create mode 100644 sysdeps/riscv/rvd/vecmath_config.h
 create mode 100644 sysdeps/unix/sysv/linux/riscv/libmvec.abilist

diff --git a/sysdeps/riscv/configure b/sysdeps/riscv/configure
old mode 100644
new mode 100755
index c8f01709f8..a6d0b4becb
--- a/sysdeps/riscv/configure
+++ b/sysdeps/riscv/configure
@@ -80,3 +80,7 @@ if test "$libc_cv_static_pie_on_riscv" = yes; then
   printf "%s\n" "#define SUPPORT_STATIC_PIE 1" >>confdefs.h
 
 fi
+
+if test x"$build_mathvec" = xnotset; then
+  build_mathvec=yes
+fi
diff --git a/sysdeps/riscv/configure.ac b/sysdeps/riscv/configure.ac
index ee3d1ed014..b1c1105baa 100644
--- a/sysdeps/riscv/configure.ac
+++ b/sysdeps/riscv/configure.ac
@@ -43,3 +43,7 @@ EOF
 if test "$libc_cv_static_pie_on_riscv" = yes; then
   AC_DEFINE(SUPPORT_STATIC_PIE)
 fi
+
+if test x"$build_mathvec" = xnotset; then
+  build_mathvec=yes
+fi
diff --git a/sysdeps/riscv/rvd/Makefile b/sysdeps/riscv/rvd/Makefile
new file mode 100644
index 0000000000..1adb2ee582
--- /dev/null
+++ b/sysdeps/riscv/rvd/Makefile
@@ -0,0 +1,5 @@
+libmvec-supported-funcs = cos
+
+ifeq ($(subdir),mathvec)
+libmvec-support = $(addprefix d,$(libmvec-supported-funcs))
+endif
diff --git a/sysdeps/riscv/rvd/Versions b/sysdeps/riscv/rvd/Versions
new file mode 100644
index 0000000000..c381b0b9fb
--- /dev/null
+++ b/sysdeps/riscv/rvd/Versions
@@ -0,0 +1,5 @@
+libmvec {
+  GLIBC_2.39 {
+    _ZGVnN2v_cos;
+  }
+}
\ No newline at end of file
diff --git a/sysdeps/riscv/rvd/bits/math-vector.h b/sysdeps/riscv/rvd/bits/math-vector.h
new file mode 100644
index 0000000000..b34ffc9bc1
--- /dev/null
+++ b/sysdeps/riscv/rvd/bits/math-vector.h
@@ -0,0 +1,29 @@
+/* Platform-specific SIMD declarations of math functions.
+
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#ifndef _MATH_H
+#  error "Never include <bits/math-vector.h> directly;\
+ include <math.h> instead."
+#endif
+
+#if defined __riscv__
+# define __DECL_RVV_RISCV _Pragma
+# undef __DECL_RVV_cos
+# define __DECL_RVV_cos __DECL_RVV_RISCV
+#endif
diff --git a/sysdeps/riscv/rvd/cos.c b/sysdeps/riscv/rvd/cos.c
new file mode 100644
index 0000000000..1806acd629
--- /dev/null
+++ b/sysdeps/riscv/rvd/cos.c
@@ -0,0 +1,94 @@
+/* Double-precision vector cos function.
+
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include "v_math.h"
+
+
+static const struct data
+{
+  vfloat64m2_t poly[7];
+  vfloat64m2_t range_val, shift, inv_pi, half_pi, pi_1, pi_2, pi_3;
+} data = {
+  /* Worst-case error is 3.3 ulp in [-pi/2, pi/2].  */
+  .poly = { V2 (-0x1.555555555547bp-3), V2 (0x1.1111111108a4dp-7),
+	    V2 (-0x1.a01a019936f27p-13), V2 (0x1.71de37a97d93ep-19),
+	    V2 (-0x1.ae633919987c6p-26), V2 (0x1.60e277ae07cecp-33),
+	    V2 (-0x1.9e9540300a1p-41) },
+  .inv_pi = V2 (0x1.45f306dc9c883p-2),
+  .half_pi = V2 (0x1.921fb54442d18p+0),
+  .pi_1 = V2 (0x1.921fb54442d18p+1),
+  .pi_2 = V2 (0x1.1a62633145c06p-53),
+  .pi_3 = V2 (0x1.c1cd129024e09p-106),
+  .shift = V2 (0x1.8p52),
+  .range_val = V2 (0x1p23)
+};
+
+#define C(i) d->poly[i]
+
+static vfloat64m2_t NOINLINE
+special_case (vfloat64m2_t x, vfloat64m2_t y, vuint64m2_t odd, vuint64m2_t cmp)
+{
+  y = vreinterpret_v_u64m2_f64m2 (vor (vreinterpret_v_f64m2_u64m2 (y), odd, 1));
+  return v_call_f64 (cos, x, y, cmp);
+}
+
+vfloat64m2_t V_NAME_D1 (cos) (vfloat64m2_t x)
+{
+  const struct data *d = ptr_barrier (&data);
+  vfloat64m2_t n, r, r2, r3, r4, t1, t2, t3, y;
+  vuint64m2_t odd, cmp;
+
+  r = vfabs_v_f64m2 (x, 2);
+  cmp = (vuint64m2_t) vmsgeu (vreinterpret_v_f64m2_u64m2 (r),
+		   vreinterpret_v_f64m2_u64m2 (d->range_val));
+  if (__glibc_unlikely (v_any_u64 (cmp)))
+    /* If fenv exceptions are to be triggered correctly, set any special lanes
+       to 1 (which is neutral w.r.t. fenv). These lanes will be fixed by
+       special-case handler later.  */
+    r = vmsltu (cmp, v_f64 (1.0), r);
+
+  /* n = rint((|x|+pi/2)/pi) - 0.5.  */
+  n = vfmadd (d->shift, d->inv_pi, vfadd (r, d->half_pi,2), 2);
+  odd = vshlq_n_u64 (vreinterpret_v_f64m2_u64m2 (n), 63);
+  n = vfsub (n, d->shift, 2);
+  n = vfsub (n, v_f64 (0.5), 2);
+
+  /* r = |x| - n*pi  (range reduction into -pi/2 .. pi/2).  */
+  r = vfmsub (r, d->pi_1, n, 2);
+  r = vfmsub (r, d->pi_2, n, 2);
+  r = vfmsub (r, d->pi_3, n, 2);
+
+  /* sin(r) poly approx.  */
+  r2 = vfmul (r, r, 2);
+  r3 = vfmul (r2, r, 2);
+  r4 = vfmul (r2, r2, 2);
+
+  t1 = vfmadd (C (4), C (5), r2, 2);
+  t2 = vfmadd (C (2), C (3), r2, 2);
+  t3 = vfmadd (C (0), C (1), r2, 2);
+
+  y = vfmadd (t1, C (6), r4, 2);
+  y = vfmadd (t2, y, r4, 2);
+  y = vfmadd (t3, y, r4, 2);
+  y = vfmadd (r, y, r3, 2);
+
+  if (__glibc_unlikely (v_any_u64 (cmp)))
+    return special_case (x, y, odd, cmp);
+  return vreinterpretq_f64_u64 (vor (vreinterpret_v_f64m2_u64m2 (y), odd, 2));
+}
diff --git a/sysdeps/riscv/rvd/math_private.h b/sysdeps/riscv/rvd/math_private.h
new file mode 100644
index 0000000000..655a4dcd55
--- /dev/null
+++ b/sysdeps/riscv/rvd/math_private.h
@@ -0,0 +1,42 @@
+/* Configure optimized libm functions.  RISC-V version.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#ifndef RISCV_MATH_PRIVATE_H
+#define RISCV_MATH_PRIVATE_H 1
+
+#include <stdint.h>
+#include <math.h>
+
+/* Use inline round and lround instructions.  */
+#define TOINT_INTRINSICS 1
+
+static inline double_t
+roundtoint (double_t x)
+{
+  return round (x);
+}
+
+static inline int32_t
+converttoint (double_t x)
+{
+  return lround (x);
+}
+
+#include_next <math_private.h>
+
+#endif
diff --git a/sysdeps/riscv/rvd/v_math.h b/sysdeps/riscv/rvd/v_math.h
new file mode 100644
index 0000000000..d2e821aeb2
--- /dev/null
+++ b/sysdeps/riscv/rvd/v_math.h
@@ -0,0 +1,139 @@
+/* Utilities for Advanced SIMD libmvec routines.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#ifndef _V_MATH_H
+#define _V_MATH_H
+
+#include <riscv_vector.h>
+#include "vecmath_config.h"
+
+#define V_NAME_D1(fun) _ZGVnN2v_##fun
+
+/* Shorthand helpers for declaring constants.  */
+#define V2(X) { X, X }
+#define V4(X) { X, X, X, X }
+#define V8(X) { X, X, X, X, X, X, X, X }
+
+static inline vfloat32m4_t
+v_f32 (float x)
+{
+  return (vfloat32m4_t) V4 (x);
+}
+static inline vuint32m4_t
+v_u32 (uint32_t x)
+{
+  return (vuint32m4_t) V4 (x);
+}
+static inline vint32m4_t
+v_s32 (int32_t x)
+{
+  return (vint32m4_t) V4 (x);
+}
+
+/* true if any elements of a vector compare result is non-zero.  */
+static inline int
+v_any_u32 (vuint32m4_t x)
+{
+  /* assume elements in x are either 0 or -1u.  */
+  return vpaddd_u64 (vreinterpret_v_u64m2_u32m2 (x)) != 0;
+}
+static inline int
+v_any_u32h (vuint32m2_t x)
+{
+  return vget_lane_u64 (vreinterpret_v_u32m2_u64m2 (x), 0) != 0;
+}
+static inline vfloat32m4_t
+v_lookup_f32 (const float *tab, vuint32m4_t idx)
+{
+  return (vfloat32m4_t){ tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]] };
+}
+static inline vuint32m4_t
+v_lookup_u32 (const uint32_t *tab, vuint32m4_t idx)
+{
+  return (vuint32m4_t){ tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]] };
+}
+static inline vfloat32m4_t
+v_call_f32 (float (*f) (float), vfloat32m4_t x, vfloat32m4_t y, vuint32m4_t p)
+{
+  return (vfloat32m4_t){ p[0] ? f (x[0]) : y[0], p[1] ? f (x[1]) : y[1],
+			p[2] ? f (x[2]) : y[2], p[3] ? f (x[3]) : y[3] };
+}
+static inline vfloat32m4_t
+v_call2_f32 (float (*f) (float, float), vfloat32m4_t x1, vfloat32m4_t x2,
+	     vfloat32m4_t y, vuint32m4_t p)
+{
+  return (vfloat32m4_t){ p[0] ? f (x1[0], x2[0]) : y[0],
+			p[1] ? f (x1[1], x2[1]) : y[1],
+			p[2] ? f (x1[2], x2[2]) : y[2],
+			p[3] ? f (x1[3], x2[3]) : y[3] };
+}
+
+static inline vfloat64m2_t
+v_f64 (double x)
+{
+  return (vfloat64m2_t) V2 (x);
+}
+static inline vuint64m2_t
+v_u64 (uint64_t x)
+{
+  return (vuint64m2_t) V2 (x);
+}
+static inline vint64m2_t
+v_s64 (int64_t x)
+{
+  return (vint64m2_t) V2 (x);
+}
+
+/* true if any elements of a vector compare result is non-zero.  */
+static inline int
+v_any_u64 (vuint64m1_t x)
+{
+  /* assume elements in x are either 0 or -1u.  */
+  return vpaddd_u64 (x) != 0;
+}
+/* true if all elements of a vector compare result is 1.  */
+static inline int
+v_all_u64 (vuint64m1_t x)
+{
+  /* assume elements in x are either 0 or -1u.  */
+  return vpaddd_s64 (vreinterpretq_s64_u64 (x)) == -2;
+}
+static inline vfloat64m1_t
+v_lookup_f64 (const double *tab, vuint64m1_t idx)
+{
+  return (vfloat64m1_t){ tab[idx[0]], tab[idx[1]] };
+}
+static inline vuint64m1_t
+v_lookup_u64 (const uint64_t *tab, vuint64m1_t idx)
+{
+  return (vuint64m1_t){ tab[idx[0]], tab[idx[1]] };
+}
+static inline vfloat64m1_t
+v_call_f64 (double (*f) (double), vfloat64m1_t x, vfloat64m1_t y, vuint64m1_t p)
+{
+  return (vfloat64m1_t){ p[0] ? f (x[0]) : y[0], p[1] ? f (x[1]) : y[1] };
+}
+static inline vfloat64m1_t
+v_call2_f64 (double (*f) (double, double), vfloat64m1_t x1, vfloat64m1_t x2,
+	     vfloat64m1_t y, vuint64m1_t p)
+{
+  return (vfloat64m1_t){ p[0] ? f (x1[0], x2[0]) : y[0],
+			p[1] ? f (x1[1], x2[1]) : y[1] };
+}
+
+#endif
diff --git a/sysdeps/riscv/rvd/vecmath_config.h b/sysdeps/riscv/rvd/vecmath_config.h
new file mode 100644
index 0000000000..290ea1e33c
--- /dev/null
+++ b/sysdeps/riscv/rvd/vecmath_config.h
@@ -0,0 +1,33 @@
+/* Configuration for libmvec routines.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#ifndef _VECMATH_CONFIG_H
+#define _VECMATH_CONFIG_H
+
+#include <math_private.h>
+
+/* Return ptr but hide its value from the compiler so accesses through it
+   cannot be optimized based on the contents.  */
+#define ptr_barrier(ptr)                                                      \
+  ({                                                                          \
+    __typeof (ptr) __ptr = (ptr);                                             \
+    __asm("" : "+r"(__ptr));                                                  \
+    __ptr;                                                                    \
+  })
+
+#endif
diff --git a/sysdeps/unix/sysv/linux/riscv/libmvec.abilist b/sysdeps/unix/sysv/linux/riscv/libmvec.abilist
new file mode 100644
index 0000000000..7389378e58
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/riscv/libmvec.abilist
@@ -0,0 +1 @@
+GLIBC_2.39 _ZGVnN2v_cos F
\ No newline at end of file
-- 
2.34.1


^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [RFC V2] Enable libmvec support for RISC-V
  2024-04-09  2:06 [RFC V2] Enable libmvec support for RISC-V shiyulong
@ 2024-04-09  8:01 ` Andreas Schwab
  0 siblings, 0 replies; 2+ messages in thread
From: Andreas Schwab @ 2024-04-09  8:01 UTC (permalink / raw)
  To: shiyulong
  Cc: libc-alpha, palmer, darius, andrew, maskray, kito.cheng,
	wuwei2016, jiawei, shihua, chenyixuan

On Apr 09 2024, shiyulong@iscas.ac.cn wrote:

> \ No newline at end of file
> \ No newline at end of file

Please fix that.

-- 
Andreas Schwab, SUSE Labs, schwab@suse.de
GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE  1748 E4D4 88E3 0EEA B9D7
"And now for something completely different."

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2024-04-09  8:01 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-04-09  2:06 [RFC V2] Enable libmvec support for RISC-V shiyulong
2024-04-09  8:01 ` Andreas Schwab

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).