public inbox for libc-alpha@sourceware.org
 help / color / mirror / Atom feed
* [PATCH v1] linux: Add support for getrandom vDSO
@ 2024-07-30 13:24 Jason A. Donenfeld
  2024-07-30 20:16 ` Florian Weimer
  2024-08-02 17:32 ` [PATCH v1] " Cristian Rodríguez
  0 siblings, 2 replies; 9+ messages in thread
From: Jason A. Donenfeld @ 2024-07-30 13:24 UTC (permalink / raw)
  To: libc-alpha, zatrazz, carlos; +Cc: Jason A. Donenfeld

Linux 6.11 gains support for calling getrandom() from the vDSO. It
operates on a thread-local opaque state allocated with mmap using flags
specified by the vDSO.

Multiple states are allocated at once, as many as fit into a page, and
these are held in an array of available states to be doled out to each
thread upon first use, and recycled when a thread terminates. As these
states run low, more are allocated.

To make this procedure async-signal-safe, a simple guard is used in the
LSB of the opaque state address, falling back to the syscall if there's
reentrancy contention.

This implementation is intentionally kept somewhat basic. We can add
optimizations later, but for now, the idea is to get the bones set.

It's currently enabled for x86_64. As the kernel support gains more
platforms (arm64 is in the works), this can be easily turned on for
those.

Co-authored-by: Adhemerval Zanella <zatrazz@gmail.com>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
Hi folks,

This is joint work with Adhemerval. We've been iterating on this in
parallel with the kernel development, and now that the kernel part has
landed, we're ready to submit the glibc implementation here.

Note that I'll be traveling until the end of August, so v2 of this
patch, should there be a need, will likely be submitted by Adhemerval
instead.

This patch currently lives here: https://git.zx2c4.com/glibc/log/?h=vdso

Thanks for reviewing.

Thanks,
Jason

 include/sys/random.h                          |   4 +
 malloc/malloc.c                               |   4 +-
 sysdeps/generic/not-cancel.h                  |   4 +-
 sysdeps/mach/hurd/not-cancel.h                |   4 +-
 sysdeps/nptl/_Fork.c                          |   2 +
 sysdeps/nptl/fork.h                           |   2 +
 sysdeps/unix/sysv/linux/dl-vdso-setup.c       |   5 +
 sysdeps/unix/sysv/linux/dl-vdso-setup.h       |   3 +
 sysdeps/unix/sysv/linux/getrandom.c           | 199 +++++++++++++++++-
 sysdeps/unix/sysv/linux/getrandom_vdso.h      |  36 ++++
 sysdeps/unix/sysv/linux/not-cancel.h          |   7 +-
 sysdeps/unix/sysv/linux/tls-internal-struct.h |  29 +++
 sysdeps/unix/sysv/linux/tls-internal.c        |   2 +
 sysdeps/unix/sysv/linux/x86_64/sysdep.h       |   1 +
 14 files changed, 295 insertions(+), 7 deletions(-)
 create mode 100644 sysdeps/unix/sysv/linux/getrandom_vdso.h
 create mode 100644 sysdeps/unix/sysv/linux/tls-internal-struct.h

diff --git a/include/sys/random.h b/include/sys/random.h
index 6aa313d35d..4cc94c572d 100644
--- a/include/sys/random.h
+++ b/include/sys/random.h
@@ -3,9 +3,13 @@
 
 # ifndef _ISOMAC
 
+# include <stdbool.h>
+
 extern ssize_t __getrandom (void *__buffer, size_t __length,
                             unsigned int __flags) __wur;
 libc_hidden_proto (__getrandom)
 
+extern void __getrandom_fork_subprocess (bool reset_states) attribute_hidden;
+
 # endif /* !_ISOMAC */
 #endif
diff --git a/malloc/malloc.c b/malloc/malloc.c
index bcb6e5b83c..9e577ab900 100644
--- a/malloc/malloc.c
+++ b/malloc/malloc.c
@@ -3140,8 +3140,8 @@ static void
 tcache_key_initialize (void)
 {
   /* We need to use the _nostatus version here, see BZ 29624.  */
-  if (__getrandom_nocancel_nostatus (&tcache_key, sizeof(tcache_key),
-				     GRND_NONBLOCK)
+  if (__getrandom_nocancel_nostatus_direct (&tcache_key, sizeof(tcache_key),
+					    GRND_NONBLOCK)
       != sizeof (tcache_key))
     {
       tcache_key = random_bits ();
diff --git a/sysdeps/generic/not-cancel.h b/sysdeps/generic/not-cancel.h
index 2dd1064600..95d0ba1923 100644
--- a/sysdeps/generic/not-cancel.h
+++ b/sysdeps/generic/not-cancel.h
@@ -51,7 +51,9 @@
   __fcntl64 (fd, cmd, __VA_ARGS__)
 #define __getrandom_nocancel(buf, size, flags) \
   __getrandom (buf, size, flags)
-#define __getrandom_nocancel_nostatus(buf, size, flags) \
+#define __getrandom_nocancel_direct(buf, size, flags) \
+  __getrandom_nocancel (buf, size, flags)
+#define __getrandom_nocancel_nostatus_direct(buf, size, flags) \
   __getrandom (buf, size, flags)
 #define __poll_infinity_nocancel(fds, nfds) \
   __poll (fds, nfds, -1)
diff --git a/sysdeps/mach/hurd/not-cancel.h b/sysdeps/mach/hurd/not-cancel.h
index 69fb3c00ef..393febd225 100644
--- a/sysdeps/mach/hurd/not-cancel.h
+++ b/sysdeps/mach/hurd/not-cancel.h
@@ -79,7 +79,7 @@ __typeof (__fcntl) __fcntl_nocancel;
 /* Non cancellable getrandom syscall that does not also set errno in case of
    failure.  */
 static inline ssize_t
-__getrandom_nocancel_nostatus (void *buf, size_t buflen, unsigned int flags)
+__getrandom_nocancel_nostatus_direct (void *buf, size_t buflen, unsigned int flags)
 {
   int save_errno = errno;
   ssize_t r = __getrandom (buf, buflen, flags);
@@ -90,6 +90,8 @@ __getrandom_nocancel_nostatus (void *buf, size_t buflen, unsigned int flags)
 
 #define __getrandom_nocancel(buf, size, flags) \
   __getrandom (buf, size, flags)
+#define __getrandom_nocancel_direct(buf, size, flags) \
+  __getrandom_nocancel (buf, size, flags)
 
 #define __poll_infinity_nocancel(fds, nfds) \
   __poll (fds, nfds, -1)
diff --git a/sysdeps/nptl/_Fork.c b/sysdeps/nptl/_Fork.c
index ef199ddbc3..8375b18535 100644
--- a/sysdeps/nptl/_Fork.c
+++ b/sysdeps/nptl/_Fork.c
@@ -18,6 +18,7 @@
 
 #include <arch-fork.h>
 #include <pthreadP.h>
+#include <sys/random.h>
 
 pid_t
 _Fork (void)
@@ -43,6 +44,7 @@ _Fork (void)
       self->robust_head.list = &self->robust_head;
       INTERNAL_SYSCALL_CALL (set_robust_list, &self->robust_head,
 			     sizeof (struct robust_list_head));
+      call_function_static_weak (__getrandom_fork_subprocess, false);
     }
   return pid;
 }
diff --git a/sysdeps/nptl/fork.h b/sysdeps/nptl/fork.h
index 7643926df9..858129f3d2 100644
--- a/sysdeps/nptl/fork.h
+++ b/sysdeps/nptl/fork.h
@@ -26,6 +26,7 @@
 #include <mqueue.h>
 #include <pthreadP.h>
 #include <sysdep.h>
+#include <sys/random.h>
 
 static inline void
 fork_system_setup (void)
@@ -46,6 +47,7 @@ fork_system_setup_after_fork (void)
 
   call_function_static_weak (__mq_notify_fork_subprocess);
   call_function_static_weak (__timer_fork_subprocess);
+  call_function_static_weak (__getrandom_fork_subprocess, true);
 }
 
 /* In case of a fork() call the memory allocation in the child will be
diff --git a/sysdeps/unix/sysv/linux/dl-vdso-setup.c b/sysdeps/unix/sysv/linux/dl-vdso-setup.c
index 3a44944dbb..476c6db75a 100644
--- a/sysdeps/unix/sysv/linux/dl-vdso-setup.c
+++ b/sysdeps/unix/sysv/linux/dl-vdso-setup.c
@@ -66,6 +66,11 @@ PROCINFO_CLASS int (*_dl_vdso_clock_getres) (clockid_t,
 PROCINFO_CLASS int (*_dl_vdso_clock_getres_time64) (clockid_t,
 						    struct __timespec64 *) RELRO;
 # endif
+# ifdef HAVE_GETRANDOM_VSYSCALL
+PROCINFO_CLASS ssize_t (*_dl_vdso_getrandom) (void *buffer, size_t len,
+                                              unsigned int flags, void *state,
+                                              size_t state_len) RELRO;
+# endif
 
 /* PowerPC specific ones.  */
 # ifdef HAVE_GET_TBFREQ
diff --git a/sysdeps/unix/sysv/linux/dl-vdso-setup.h b/sysdeps/unix/sysv/linux/dl-vdso-setup.h
index 8aee5a8212..cde99f608c 100644
--- a/sysdeps/unix/sysv/linux/dl-vdso-setup.h
+++ b/sysdeps/unix/sysv/linux/dl-vdso-setup.h
@@ -50,6 +50,9 @@ setup_vdso_pointers (void)
 #ifdef HAVE_RISCV_HWPROBE
   GLRO(dl_vdso_riscv_hwprobe) = dl_vdso_vsym (HAVE_RISCV_HWPROBE);
 #endif
+#ifdef HAVE_GETRANDOM_VSYSCALL
+  GLRO(dl_vdso_getrandom) = dl_vdso_vsym (HAVE_GETRANDOM_VSYSCALL);
+#endif
 }
 
 #endif
diff --git a/sysdeps/unix/sysv/linux/getrandom.c b/sysdeps/unix/sysv/linux/getrandom.c
index 777d1decf0..b84cd72939 100644
--- a/sysdeps/unix/sysv/linux/getrandom.c
+++ b/sysdeps/unix/sysv/linux/getrandom.c
@@ -21,12 +21,209 @@
 #include <unistd.h>
 #include <sysdep-cancel.h>
 
+#ifdef HAVE_GETRANDOM_VSYSCALL
+# include <getrandom_vdso.h>
+# include <ldsodefs.h>
+# include <libc-lock.h>
+# include <list.h>
+# include <setvmaname.h>
+# include <sys/mman.h>
+# include <sys/sysinfo.h>
+# include <tls-internal.h>
+
+# define ALIGN_PAGE(p) PTR_ALIGN_UP (p, GLRO (dl_pagesize))
+# define READ_ONCE(p) (*((volatile typeof (p) *) (&(p))))
+# define WRITE_ONCE(p, v) (*((volatile typeof (p) *) (&(p))) = (v))
+# define RESERVE_PTR(p) ((void *) ((uintptr_t) (p) | 1UL))
+# define RELEASE_PTR(p) ((void *) ((uintptr_t) (p) & ~1UL))
+# define IS_RESERVED_PTR(p) (!!((uintptr_t) (p) & 1UL))
+
+static struct
+{
+  __libc_lock_define (, lock);
+  void **states;
+  size_t state_size;
+  size_t len;
+  size_t total;
+  size_t cap;
+} grnd_alloc = { .lock = LLL_LOCK_INITIALIZER };
+
+static struct grnd_alloc_state *
+vgetrandom_get_state (void)
+{
+  void *state = NULL;
+
+  __libc_lock_lock (grnd_alloc.lock);
+
+  if (grnd_alloc.len == 0)
+    {
+      struct vgetrandom_opaque_params params;
+      size_t block_size, num = __get_nprocs (); /* Just a decent heuristic. */
+      void *block;
+
+      if (GLRO (dl_vdso_getrandom) (NULL, 0, 0, &params, ~0UL))
+        goto out;
+      grnd_alloc.state_size = params.size_of_opaque_state;
+
+      block_size = ALIGN_PAGE (num * grnd_alloc.state_size);
+      num = (GLRO (dl_pagesize) / grnd_alloc.state_size) *
+            (block_size / GLRO (dl_pagesize));
+      block = __mmap (0, block_size, params.mmap_prot,
+                      params.mmap_flags, -1, 0);
+      if (block == NULL)
+        goto out;
+      __set_vma_name (block, block_size, " glibc: getrandom");
+
+      if (grnd_alloc.total + num > grnd_alloc.cap)
+        {
+          void **states;
+          size_t states_size = ALIGN_PAGE (sizeof(*grnd_alloc.states) *
+                                           (grnd_alloc.total + num));
+          if (grnd_alloc.states)
+            states = __mremap (grnd_alloc.states,
+                               ALIGN_PAGE (sizeof (*grnd_alloc.states) *
+                                           grnd_alloc.cap),
+                               states_size, MREMAP_MAYMOVE);
+          else
+            states = __mmap (NULL, states_size, PROT_READ | PROT_WRITE,
+                             MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+          if (states == MAP_FAILED)
+            goto unmap;
+          __set_vma_name (states, states_size, " glibc: getrandom");
+          grnd_alloc.states = states;
+          grnd_alloc.cap = states_size / sizeof (*grnd_alloc.states);
+        }
+
+      for (size_t i = 0; i < num; ++i)
+        {
+          if (((uintptr_t) block & (GLRO (dl_pagesize) - 1)) +
+              grnd_alloc.state_size > GLRO (dl_pagesize))
+            block = ALIGN_PAGE (block);
+          grnd_alloc.states[i] = block;
+          block += grnd_alloc.state_size;
+        }
+      grnd_alloc.len = num;
+      grnd_alloc.total += num;
+      goto success;
+
+    unmap:
+      __munmap (block, block_size);
+      goto out;
+    }
+
+success:
+  state = grnd_alloc.states[--grnd_alloc.len];
+
+out:
+  __libc_lock_unlock (grnd_alloc.lock);
+  return state;
+}
+
+/* Return true if the syscall fallback should be issued in the case the vDSO
+   is not present, in the case of reentrancy, or if any memory allocation
+   fails.  */
+static bool
+__getrandom_internal (ssize_t *ret, void *buffer, size_t length,
+                      unsigned int flags)
+{
+  if (GLRO (dl_vdso_getrandom) == NULL)
+    return false;
+
+  struct tls_internal_t *ti = __glibc_tls_internal ();
+  void *state = READ_ONCE (ti->getrandom_buf);
+  if (IS_RESERVED_PTR (state))
+    return false;
+  WRITE_ONCE (ti->getrandom_buf, RESERVE_PTR (state));
+
+  bool r = false;
+  if (state == NULL)
+    {
+      state = vgetrandom_get_state ();
+      if (state == NULL)
+        goto out;
+    }
+
+  *ret = GLRO (dl_vdso_getrandom) (buffer, length, flags, state,
+                                   grnd_alloc.state_size);
+  if (INTERNAL_SYSCALL_ERROR_P (*ret))
+    {
+      __set_errno (INTERNAL_SYSCALL_ERRNO (*ret));
+      *ret = -1;
+    }
+  r = true;
+
+out:
+  WRITE_ONCE (ti->getrandom_buf, state);
+  return r;
+}
+
+static void
+reset_other_threads (list_t *list)
+{
+  list_t *runp;
+  list_for_each (runp, list)
+    {
+      struct pthread *t = list_entry (runp, struct pthread, list);
+      if (t == THREAD_SELF || !t->tls_state.getrandom_buf)
+        continue;
+      grnd_alloc.states[grnd_alloc.len++]
+          = RELEASE_PTR (t->tls_state.getrandom_buf);
+      t->tls_state.getrandom_buf = NULL;
+    }
+}
+#endif
+
+void
+__getrandom_vdso_release (void)
+{
+#ifdef HAVE_GETRANDOM_VSYSCALL
+  void *state = __glibc_tls_internal ()->getrandom_buf;
+  if (state == NULL)
+    return;
+
+  __libc_lock_lock (grnd_alloc.lock);
+  grnd_alloc.states[grnd_alloc.len++] = state;
+  __libc_lock_unlock (grnd_alloc.lock);
+#endif
+}
+
+void
+__getrandom_fork_subprocess (bool reset_states)
+{
+#ifdef HAVE_GETRANDOM_VSYSCALL
+  grnd_alloc.lock = LLL_LOCK_INITIALIZER;
+  if (reset_states)
+    {
+      reset_other_threads (&GL (dl_stack_used));
+      reset_other_threads (&GL (dl_stack_user));
+    }
+#endif
+}
+
+ssize_t
+__getrandom_nocancel (void *buffer, size_t length, unsigned int flags)
+{
+#ifdef HAVE_GETRANDOM_VSYSCALL
+  ssize_t r;
+  if (__getrandom_internal (&r, buffer, length, flags))
+    return r;
+#endif
+
+  return SYSCALL_CANCEL (getrandom, buffer, length, flags);
+}
+
 /* Write up to LENGTH bytes of randomness starting at BUFFER.
    Return the number of bytes written, or -1 on error.  */
 ssize_t
 __getrandom (void *buffer, size_t length, unsigned int flags)
 {
-  return SYSCALL_CANCEL (getrandom, buffer, length, flags);
+#ifdef HAVE_GETRANDOM_VSYSCALL
+  ssize_t r;
+  if (__getrandom_internal (&r, buffer, length, flags))
+    return r;
+#endif
+
+  return INTERNAL_SYSCALL_CALL (getrandom, buffer, length, flags);
 }
 libc_hidden_def (__getrandom)
 weak_alias (__getrandom, getrandom)
diff --git a/sysdeps/unix/sysv/linux/getrandom_vdso.h b/sysdeps/unix/sysv/linux/getrandom_vdso.h
new file mode 100644
index 0000000000..d3130c1038
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/getrandom_vdso.h
@@ -0,0 +1,36 @@
+/* Linux getrandom vDSO support.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#ifndef _GETRANDOM_VDSO_H
+#define _GETRANDOM_VDSO_H
+
+#include <stddef.h>
+#include <stdint.h>
+#include <sys/types.h>
+
+extern void __getrandom_vdso_release (void) attribute_hidden;
+
+struct vgetrandom_opaque_params
+{
+  uint32_t size_of_opaque_state;
+  uint32_t mmap_prot;
+  uint32_t mmap_flags;
+  uint32_t reserved[13];
+};
+
+#endif
diff --git a/sysdeps/unix/sysv/linux/not-cancel.h b/sysdeps/unix/sysv/linux/not-cancel.h
index 2a7585b73f..12f26912d3 100644
--- a/sysdeps/unix/sysv/linux/not-cancel.h
+++ b/sysdeps/unix/sysv/linux/not-cancel.h
@@ -27,6 +27,7 @@
 #include <sys/syscall.h>
 #include <sys/wait.h>
 #include <time.h>
+#include <sys/random.h>
 
 /* Non cancellable open syscall.  */
 __typeof (open) __open_nocancel;
@@ -84,15 +85,17 @@ __writev_nocancel_nostatus (int fd, const struct iovec *iov, int iovcnt)
 }
 
 static inline ssize_t
-__getrandom_nocancel (void *buf, size_t buflen, unsigned int flags)
+__getrandom_nocancel_direct (void *buf, size_t buflen, unsigned int flags)
 {
   return INLINE_SYSCALL_CALL (getrandom, buf, buflen, flags);
 }
 
+__typeof (getrandom) __getrandom_nocancel attribute_hidden;
+
 /* Non cancellable getrandom syscall that does not also set errno in case of
    failure.  */
 static inline ssize_t
-__getrandom_nocancel_nostatus (void *buf, size_t buflen, unsigned int flags)
+__getrandom_nocancel_nostatus_direct (void *buf, size_t buflen, unsigned int flags)
 {
   return INTERNAL_SYSCALL_CALL (getrandom, buf, buflen, flags);
 }
diff --git a/sysdeps/unix/sysv/linux/tls-internal-struct.h b/sysdeps/unix/sysv/linux/tls-internal-struct.h
new file mode 100644
index 0000000000..f8d561c987
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/tls-internal-struct.h
@@ -0,0 +1,29 @@
+/* Per-thread state.  Linux version.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#ifndef _TLS_INTERNAL_STRUCT_H
+#define _TLS_INTERNAL_STRUCT_H 1
+
+struct tls_internal_t
+{
+  char *strsignal_buf;
+  char *strerror_l_buf;
+  void *getrandom_buf;
+};
+
+#endif
diff --git a/sysdeps/unix/sysv/linux/tls-internal.c b/sysdeps/unix/sysv/linux/tls-internal.c
index 81ebcdfbf8..886b55daf9 100644
--- a/sysdeps/unix/sysv/linux/tls-internal.c
+++ b/sysdeps/unix/sysv/linux/tls-internal.c
@@ -16,6 +16,7 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
+#include <getrandom_vdso.h>
 #include <string.h>
 #include <tls-internal.h>
 
@@ -25,4 +26,5 @@ __glibc_tls_internal_free (void)
   struct pthread *self = THREAD_SELF;
   free (self->tls_state.strsignal_buf);
   free (self->tls_state.strerror_l_buf);
+  call_function_static_weak (__getrandom_vdso_release);
 }
diff --git a/sysdeps/unix/sysv/linux/x86_64/sysdep.h b/sysdeps/unix/sysv/linux/x86_64/sysdep.h
index a2b021bd86..7dc072ae2d 100644
--- a/sysdeps/unix/sysv/linux/x86_64/sysdep.h
+++ b/sysdeps/unix/sysv/linux/x86_64/sysdep.h
@@ -376,6 +376,7 @@
 # define HAVE_TIME_VSYSCALL             "__vdso_time"
 # define HAVE_GETCPU_VSYSCALL		"__vdso_getcpu"
 # define HAVE_CLOCK_GETRES64_VSYSCALL   "__vdso_clock_getres"
+# define HAVE_GETRANDOM_VSYSCALL        "__vdso_getrandom"
 
 # define HAVE_CLONE3_WRAPPER			1
 
-- 
2.45.2


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v1] linux: Add support for getrandom vDSO
  2024-07-30 13:24 [PATCH v1] linux: Add support for getrandom vDSO Jason A. Donenfeld
@ 2024-07-30 20:16 ` Florian Weimer
  2024-07-31 12:38   ` Adhemerval Zanella Netto
  2024-08-01  1:47   ` Jason A. Donenfeld
  2024-08-02 17:32 ` [PATCH v1] " Cristian Rodríguez
  1 sibling, 2 replies; 9+ messages in thread
From: Florian Weimer @ 2024-07-30 20:16 UTC (permalink / raw)
  To: Jason A. Donenfeld; +Cc: libc-alpha, zatrazz, carlos

* Jason A. Donenfeld:

> This is joint work with Adhemerval. We've been iterating on this in
> parallel with the kernel development, and now that the kernel part has
> landed, we're ready to submit the glibc implementation here.

I find the fork handling part puzzling.  This needs comments.  I expect
that we need to reset the state pointer in threads that do not exist.

The vgetrandom_get_state function does not look async-signal-safe, and
safe against concurrent fork (given that its lock is just
reinitialized).  It's on the slow path, so maybe it could block signals.

It seems wrong that __getrandom_nocancel uses SYSCALL_CANCEL.

Thanks,
Florian


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v1] linux: Add support for getrandom vDSO
  2024-07-30 20:16 ` Florian Weimer
@ 2024-07-31 12:38   ` Adhemerval Zanella Netto
  2024-08-01  1:47   ` Jason A. Donenfeld
  1 sibling, 0 replies; 9+ messages in thread
From: Adhemerval Zanella Netto @ 2024-07-31 12:38 UTC (permalink / raw)
  To: Florian Weimer, Jason A. Donenfeld; +Cc: libc-alpha, carlos



On 30/07/24 17:16, Florian Weimer wrote:
> * Jason A. Donenfeld:
> 
>> This is joint work with Adhemerval. We've been iterating on this in
>> parallel with the kernel development, and now that the kernel part has
>> landed, we're ready to submit the glibc implementation here.
> 
> I find the fork handling part puzzling.  This needs comments.  I expect
> that we need to reset the state pointer in threads that do not exist.

What do you mean by 'thread that do not exist'? Either the opaque state pointer
is in the free-list or allocated to a thread at the moment of the fork() (thus
on getrandom_buf).

> 
> The vgetrandom_get_state function does not look async-signal-safe, and
> safe against concurrent fork (given that its lock is just
> reinitialized).  It's on the slow path, so maybe it could block signals.

I think you mean _Fork() here, since fork() is not async-signal-safe. The
idea is re-entrant getrandom() will always fallback to the syscall, since 
getrandom_buf reserved bit will be set.  And since _Fork() idea is to
eventually either exit() or call execve(), I think this is fair compromise.

The signal block / unblock would be required if we want to return from 
the signal handler in this case, but I also think it was not defined since
we dot not re-initilize a bunch of stuff at _Fork() anyway.

> 
> It seems wrong that __getrandom_nocancel uses SYSCALL_CANCEL.

Indeed, the __getrandom_nocancel and __getrandom inline macros are switched.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v1] linux: Add support for getrandom vDSO
  2024-07-30 20:16 ` Florian Weimer
  2024-07-31 12:38   ` Adhemerval Zanella Netto
@ 2024-08-01  1:47   ` Jason A. Donenfeld
  2024-08-01  2:11     ` [PATCH v2] " Jason A. Donenfeld
  1 sibling, 1 reply; 9+ messages in thread
From: Jason A. Donenfeld @ 2024-08-01  1:47 UTC (permalink / raw)
  To: Florian Weimer; +Cc: libc-alpha, zatrazz, carlos

On Tue, Jul 30, 2024 at 10:16 PM Florian Weimer <fweimer@redhat.com> wrote:
>
> * Jason A. Donenfeld:
>
> > This is joint work with Adhemerval. We've been iterating on this in
> > parallel with the kernel development, and now that the kernel part has
> > landed, we're ready to submit the glibc implementation here.
>
> I find the fork handling part puzzling.  This needs comments.  I expect
> that we need to reset the state pointer in threads that do not exist.
>
> The vgetrandom_get_state function does not look async-signal-safe, and
> safe against concurrent fork (given that its lock is just
> reinitialized).  It's on the slow path, so maybe it could block signals.

What Adhemerval said is my thinking on this too. I'll add a bunch of
comments though to make it more clear, and submit a v2.


>
> It seems wrong that __getrandom_nocancel uses SYSCALL_CANCEL.

Thanks. Will fix in v2.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH v2] linux: Add support for getrandom vDSO
  2024-08-01  1:47   ` Jason A. Donenfeld
@ 2024-08-01  2:11     ` Jason A. Donenfeld
  2024-08-01  6:47       ` Florian Weimer
  0 siblings, 1 reply; 9+ messages in thread
From: Jason A. Donenfeld @ 2024-08-01  2:11 UTC (permalink / raw)
  To: libc-alpha, zatrazz, carlos, fweimer; +Cc: Jason A. Donenfeld

Linux 6.11 gains support for calling getrandom() from the vDSO. It
operates on a thread-local opaque state allocated with mmap using flags
specified by the vDSO.

Multiple states are allocated at once, as many as fit into a page, and
these are held in an array of available states to be doled out to each
thread upon first use, and recycled when a thread terminates. As these
states run low, more are allocated.

To make this procedure async-signal-safe, a simple guard is used in the
LSB of the opaque state address, falling back to the syscall if there's
reentrancy contention.

This implementation is intentionally kept somewhat basic. We can add
optimizations later, but for now, the idea is to get the bones set.

It's currently enabled for x86_64. As the kernel support gains more
platforms (arm64 is in the works), this can be easily turned on for
those.

Co-authored-by: Adhemerval Zanella <zatrazz@gmail.com>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
Hi folks,

This is joint work with Adhemerval. We've been iterating on this in
parallel with the kernel development, and now that the kernel part has
landed, we're ready to submit the glibc implementation here.

Note that I'll be traveling until the end of August, so v2 of this
patch, should there be a need, will likely be submitted by Adhemerval
instead.

This patch currently lives here: https://git.zx2c4.com/glibc/log/?h=vdso

Thanks for reviewing.

Thanks,
Jason

Changes v1->v2:
- use INLINE_SYSCALL_CALL instead of SYSCALL_CANCEL for
  getrandom_nocancel variant
- add lots of comment documentation on various parts, including
  reentrancy guard

 include/sys/random.h                          |   4 +
 malloc/malloc.c                               |   4 +-
 sysdeps/generic/not-cancel.h                  |   4 +-
 sysdeps/mach/hurd/not-cancel.h                |   4 +-
 sysdeps/nptl/_Fork.c                          |   2 +
 sysdeps/nptl/fork.h                           |   2 +
 sysdeps/unix/sysv/linux/dl-vdso-setup.c       |   5 +
 sysdeps/unix/sysv/linux/dl-vdso-setup.h       |   3 +
 sysdeps/unix/sysv/linux/getrandom.c           | 263 +++++++++++++++++-
 sysdeps/unix/sysv/linux/getrandom_vdso.h      |  36 +++
 sysdeps/unix/sysv/linux/not-cancel.h          |   7 +-
 sysdeps/unix/sysv/linux/tls-internal-struct.h |  29 ++
 sysdeps/unix/sysv/linux/tls-internal.c        |   2 +
 sysdeps/unix/sysv/linux/x86_64/sysdep.h       |   1 +
 14 files changed, 359 insertions(+), 7 deletions(-)
 create mode 100644 sysdeps/unix/sysv/linux/getrandom_vdso.h
 create mode 100644 sysdeps/unix/sysv/linux/tls-internal-struct.h

diff --git a/include/sys/random.h b/include/sys/random.h
index 6aa313d35d..4cc94c572d 100644
--- a/include/sys/random.h
+++ b/include/sys/random.h
@@ -3,9 +3,13 @@
 
 # ifndef _ISOMAC
 
+# include <stdbool.h>
+
 extern ssize_t __getrandom (void *__buffer, size_t __length,
                             unsigned int __flags) __wur;
 libc_hidden_proto (__getrandom)
 
+extern void __getrandom_fork_subprocess (bool reset_states) attribute_hidden;
+
 # endif /* !_ISOMAC */
 #endif
diff --git a/malloc/malloc.c b/malloc/malloc.c
index bcb6e5b83c..9e577ab900 100644
--- a/malloc/malloc.c
+++ b/malloc/malloc.c
@@ -3140,8 +3140,8 @@ static void
 tcache_key_initialize (void)
 {
   /* We need to use the _nostatus version here, see BZ 29624.  */
-  if (__getrandom_nocancel_nostatus (&tcache_key, sizeof(tcache_key),
-				     GRND_NONBLOCK)
+  if (__getrandom_nocancel_nostatus_direct (&tcache_key, sizeof(tcache_key),
+					    GRND_NONBLOCK)
       != sizeof (tcache_key))
     {
       tcache_key = random_bits ();
diff --git a/sysdeps/generic/not-cancel.h b/sysdeps/generic/not-cancel.h
index 2dd1064600..8e3f49cc07 100644
--- a/sysdeps/generic/not-cancel.h
+++ b/sysdeps/generic/not-cancel.h
@@ -51,7 +51,9 @@
   __fcntl64 (fd, cmd, __VA_ARGS__)
 #define __getrandom_nocancel(buf, size, flags) \
   __getrandom (buf, size, flags)
-#define __getrandom_nocancel_nostatus(buf, size, flags) \
+#define __getrandom_nocancel_direct(buf, size, flags) \
+  __getrandom (buf, size, flags)
+#define __getrandom_nocancel_nostatus_direct(buf, size, flags) \
   __getrandom (buf, size, flags)
 #define __poll_infinity_nocancel(fds, nfds) \
   __poll (fds, nfds, -1)
diff --git a/sysdeps/mach/hurd/not-cancel.h b/sysdeps/mach/hurd/not-cancel.h
index 69fb3c00ef..ec5f5aa895 100644
--- a/sysdeps/mach/hurd/not-cancel.h
+++ b/sysdeps/mach/hurd/not-cancel.h
@@ -79,7 +79,7 @@ __typeof (__fcntl) __fcntl_nocancel;
 /* Non cancellable getrandom syscall that does not also set errno in case of
    failure.  */
 static inline ssize_t
-__getrandom_nocancel_nostatus (void *buf, size_t buflen, unsigned int flags)
+__getrandom_nocancel_nostatus_direct (void *buf, size_t buflen, unsigned int flags)
 {
   int save_errno = errno;
   ssize_t r = __getrandom (buf, buflen, flags);
@@ -90,6 +90,8 @@ __getrandom_nocancel_nostatus (void *buf, size_t buflen, unsigned int flags)
 
 #define __getrandom_nocancel(buf, size, flags) \
   __getrandom (buf, size, flags)
+#define __getrandom_nocancel_direct(buf, size, flags) \
+  __getrandom (buf, size, flags)
 
 #define __poll_infinity_nocancel(fds, nfds) \
   __poll (fds, nfds, -1)
diff --git a/sysdeps/nptl/_Fork.c b/sysdeps/nptl/_Fork.c
index ef199ddbc3..8375b18535 100644
--- a/sysdeps/nptl/_Fork.c
+++ b/sysdeps/nptl/_Fork.c
@@ -18,6 +18,7 @@
 
 #include <arch-fork.h>
 #include <pthreadP.h>
+#include <sys/random.h>
 
 pid_t
 _Fork (void)
@@ -43,6 +44,7 @@ _Fork (void)
       self->robust_head.list = &self->robust_head;
       INTERNAL_SYSCALL_CALL (set_robust_list, &self->robust_head,
 			     sizeof (struct robust_list_head));
+      call_function_static_weak (__getrandom_fork_subprocess, false);
     }
   return pid;
 }
diff --git a/sysdeps/nptl/fork.h b/sysdeps/nptl/fork.h
index 7643926df9..858129f3d2 100644
--- a/sysdeps/nptl/fork.h
+++ b/sysdeps/nptl/fork.h
@@ -26,6 +26,7 @@
 #include <mqueue.h>
 #include <pthreadP.h>
 #include <sysdep.h>
+#include <sys/random.h>
 
 static inline void
 fork_system_setup (void)
@@ -46,6 +47,7 @@ fork_system_setup_after_fork (void)
 
   call_function_static_weak (__mq_notify_fork_subprocess);
   call_function_static_weak (__timer_fork_subprocess);
+  call_function_static_weak (__getrandom_fork_subprocess, true);
 }
 
 /* In case of a fork() call the memory allocation in the child will be
diff --git a/sysdeps/unix/sysv/linux/dl-vdso-setup.c b/sysdeps/unix/sysv/linux/dl-vdso-setup.c
index 3a44944dbb..476c6db75a 100644
--- a/sysdeps/unix/sysv/linux/dl-vdso-setup.c
+++ b/sysdeps/unix/sysv/linux/dl-vdso-setup.c
@@ -66,6 +66,11 @@ PROCINFO_CLASS int (*_dl_vdso_clock_getres) (clockid_t,
 PROCINFO_CLASS int (*_dl_vdso_clock_getres_time64) (clockid_t,
 						    struct __timespec64 *) RELRO;
 # endif
+# ifdef HAVE_GETRANDOM_VSYSCALL
+PROCINFO_CLASS ssize_t (*_dl_vdso_getrandom) (void *buffer, size_t len,
+                                              unsigned int flags, void *state,
+                                              size_t state_len) RELRO;
+# endif
 
 /* PowerPC specific ones.  */
 # ifdef HAVE_GET_TBFREQ
diff --git a/sysdeps/unix/sysv/linux/dl-vdso-setup.h b/sysdeps/unix/sysv/linux/dl-vdso-setup.h
index 8aee5a8212..cde99f608c 100644
--- a/sysdeps/unix/sysv/linux/dl-vdso-setup.h
+++ b/sysdeps/unix/sysv/linux/dl-vdso-setup.h
@@ -50,6 +50,9 @@ setup_vdso_pointers (void)
 #ifdef HAVE_RISCV_HWPROBE
   GLRO(dl_vdso_riscv_hwprobe) = dl_vdso_vsym (HAVE_RISCV_HWPROBE);
 #endif
+#ifdef HAVE_GETRANDOM_VSYSCALL
+  GLRO(dl_vdso_getrandom) = dl_vdso_vsym (HAVE_GETRANDOM_VSYSCALL);
+#endif
 }
 
 #endif
diff --git a/sysdeps/unix/sysv/linux/getrandom.c b/sysdeps/unix/sysv/linux/getrandom.c
index 777d1decf0..ba3092d7ca 100644
--- a/sysdeps/unix/sysv/linux/getrandom.c
+++ b/sysdeps/unix/sysv/linux/getrandom.c
@@ -21,12 +21,273 @@
 #include <unistd.h>
 #include <sysdep-cancel.h>
 
+#ifdef HAVE_GETRANDOM_VSYSCALL
+# include <getrandom_vdso.h>
+# include <ldsodefs.h>
+# include <libc-lock.h>
+# include <list.h>
+# include <setvmaname.h>
+# include <sys/mman.h>
+# include <sys/sysinfo.h>
+# include <tls-internal.h>
+
+# define ALIGN_PAGE(p) PTR_ALIGN_UP (p, GLRO (dl_pagesize))
+# define READ_ONCE(p) (*((volatile typeof (p) *) (&(p))))
+# define WRITE_ONCE(p, v) (*((volatile typeof (p) *) (&(p))) = (v))
+# define RESERVE_PTR(p) ((void *) ((uintptr_t) (p) | 1UL))
+# define RELEASE_PTR(p) ((void *) ((uintptr_t) (p) & ~1UL))
+# define IS_RESERVED_PTR(p) (!!((uintptr_t) (p) & 1UL))
+
+static struct
+{
+  /* Must be held always on access, as this is used by multiple threads.  */
+  __libc_lock_define (, lock);
+
+  /* Stack of opaque states for use in vgetrandom.  */
+  void **states;
+
+  /* Size of each opaque state, copied from vgetrandom_opaque_params.  */
+  size_t state_size;
+
+  /* Number of states available in the queue.  */
+  size_t len;
+
+  /* Number of states in the queue plus the number of states used in threads.  */
+  size_t total;
+
+  /* Number of states that the states array can hold before needing to be resized.  */
+  size_t cap;
+} grnd_alloc = { .lock = LLL_LOCK_INITIALIZER };
+
+/* Allocate an opaque state for vgetrandom. If the allocator doesn't have any,
+   mmap() another page of them. This function alone is not async-signal-safe.
+   In order to be safe reentrantly, this function *must* be called with the
+   high bit of ti->getrandom_buf set (using RESERVE_PTR()), and not called if
+   it has already been set.  */
+static void *
+vgetrandom_get_state (void)
+{
+  void *state = NULL;
+
+  __libc_lock_lock (grnd_alloc.lock);
+
+  if (grnd_alloc.len == 0)
+    {
+      struct vgetrandom_opaque_params params;
+      size_t block_size, num = __get_nprocs (); /* Just a decent heuristic. */
+      void *block;
+
+      if (GLRO (dl_vdso_getrandom) (NULL, 0, 0, &params, ~0UL))
+        goto out;
+      grnd_alloc.state_size = params.size_of_opaque_state;
+
+      block_size = ALIGN_PAGE (num * grnd_alloc.state_size);
+      num = (GLRO (dl_pagesize) / grnd_alloc.state_size) *
+            (block_size / GLRO (dl_pagesize));
+      block = __mmap (0, block_size, params.mmap_prot,
+                      params.mmap_flags, -1, 0);
+      if (block == NULL)
+        goto out;
+      __set_vma_name (block, block_size, " glibc: getrandom");
+
+      if (grnd_alloc.total + num > grnd_alloc.cap)
+        {
+          void **states;
+          size_t states_size = ALIGN_PAGE (sizeof(*grnd_alloc.states) *
+                                           (grnd_alloc.total + num));
+          if (grnd_alloc.states)
+            states = __mremap (grnd_alloc.states,
+                               ALIGN_PAGE (sizeof (*grnd_alloc.states) *
+                                           grnd_alloc.cap),
+                               states_size, MREMAP_MAYMOVE);
+          else
+            states = __mmap (NULL, states_size, PROT_READ | PROT_WRITE,
+                             MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+          if (states == MAP_FAILED)
+            goto unmap;
+          __set_vma_name (states, states_size, " glibc: getrandom");
+          grnd_alloc.states = states;
+          grnd_alloc.cap = states_size / sizeof (*grnd_alloc.states);
+        }
+
+      for (size_t i = 0; i < num; ++i)
+        {
+          /* States should not straddle a page.  */
+          if (((uintptr_t) block & (GLRO (dl_pagesize) - 1)) +
+              grnd_alloc.state_size > GLRO (dl_pagesize))
+            block = ALIGN_PAGE (block);
+          grnd_alloc.states[i] = block;
+          block += grnd_alloc.state_size;
+        }
+      grnd_alloc.len = num;
+      grnd_alloc.total += num;
+      goto success;
+
+    unmap:
+      __munmap (block, block_size);
+      goto out;
+    }
+
+success:
+  state = grnd_alloc.states[--grnd_alloc.len];
+
+out:
+  __libc_lock_unlock (grnd_alloc.lock);
+  return state;
+}
+
+/* Returns true when vgetrandom is used successfully. Returns false if the
+   syscall fallback should be issued in the case the vDSO is not present, in
+   the case of reentrancy, or if any memory allocation fails.  */
+static bool
+__getrandom_vdso (ssize_t *ret, void *buffer, size_t length,
+                  unsigned int flags)
+{
+  if (GLRO (dl_vdso_getrandom) == NULL)
+    return false;
+
+  struct tls_internal_t *ti = __glibc_tls_internal ();
+
+  /* If the LSB of getrandom_buf is set, then this function is already being
+     called, and we've reentered it reentrantly from a signal handler, in which
+     case, this is not safe, so just fallback to the syscall. If the LSB is not
+     set, then we're the first caller, so immediately set the LSB before
+     continuing onward. Note that this is not a matter of concurrency but one
+     of reentrancy. As an example of its safety, observe:
+
+       [MAIN THREAD]
+           if (IS_RESERVED_PTR(getrandom_buf))
+               ; // continue
+
+           [SIGNAL 1]
+               if (IS_RESERVED_PTR(getrandom_buf))
+                   ; // continue
+
+               [SIGNAL 2]
+                   if (IS_RESERVED_PTR(getrandom_buf))
+                       ; // continue
+                   getrandom_buf = RESERVE_PTR(getrandom_buf);
+
+                   [SIGNAL 3]
+                       if (IS_RESERVED_PTR(getrandom_buf))
+                           return; // Already reserved prior in SIGNAL 2
+
+                   // do stuff
+                   getrandom_buf = RELEASE_PTR(getrandom_buf);
+
+               getrandom_buf = RESERVE_PTR(getrandom_buf);
+               // do stuff
+               getrandom_buf = RELEASE_PTR(getrandom_buf);
+
+           getrandom_buf = RESERVE_PTR(getrandom_buf);
+           // do stuff
+           getrandom_buf = RELEASE_PTR(getrandom_buf);
+
+     As you can see, even in the case of multiple signals interrupting each
+     other, the execution remains consistent.  */
+
+  void *state = READ_ONCE (ti->getrandom_buf);
+  if (IS_RESERVED_PTR (state))
+    return false;
+  WRITE_ONCE (ti->getrandom_buf, RESERVE_PTR (state));
+
+  bool r = false;
+  if (state == NULL)
+    {
+      state = vgetrandom_get_state ();
+      if (state == NULL)
+        goto out;
+    }
+
+  *ret = GLRO (dl_vdso_getrandom) (buffer, length, flags, state,
+                                   grnd_alloc.state_size);
+  if (INTERNAL_SYSCALL_ERROR_P (*ret))
+    {
+      __set_errno (INTERNAL_SYSCALL_ERRNO (*ret));
+      *ret = -1;
+    }
+  r = true;
+
+out:
+  WRITE_ONCE (ti->getrandom_buf, state);
+  return r;
+}
+
+/* Re-add states of stale threads different from our own that have a
+   getrandom_buf in use at the time of a fork().  */
+static void
+reset_other_threads (list_t *list)
+{
+  list_t *runp;
+  list_for_each (runp, list)
+    {
+      struct pthread *t = list_entry (runp, struct pthread, list);
+      if (t == THREAD_SELF || !t->tls_state.getrandom_buf)
+        continue;
+      grnd_alloc.states[grnd_alloc.len++]
+          = RELEASE_PTR (t->tls_state.getrandom_buf);
+      t->tls_state.getrandom_buf = NULL;
+    }
+}
+#endif
+
+/* Called when a thread terminates, and adds its random buffer back into the
+   allocator pool for use in a future thread.  */
+void
+__getrandom_vdso_release (void)
+{
+#ifdef HAVE_GETRANDOM_VSYSCALL
+  void *state = __glibc_tls_internal ()->getrandom_buf;
+  if (state == NULL)
+    return;
+
+  __libc_lock_lock (grnd_alloc.lock);
+  grnd_alloc.states[grnd_alloc.len++] = state;
+  __libc_lock_unlock (grnd_alloc.lock);
+#endif
+}
+
+/* When called post-fork(), reset_states is true, so stale threads give their
+   getrandom_buf back to the allocator. When called post-_Fork(), this step is
+   skipped, as is the norm. In both cases, the allocator lock is set back to
+   unlocked, in case another thread had locked this while our thread forked.  */
+void
+__getrandom_fork_subprocess (bool reset_states)
+{
+#ifdef HAVE_GETRANDOM_VSYSCALL
+  grnd_alloc.lock = LLL_LOCK_INITIALIZER;
+  if (reset_states)
+    {
+      reset_other_threads (&GL (dl_stack_used));
+      reset_other_threads (&GL (dl_stack_user));
+    }
+#endif
+}
+
+ssize_t
+__getrandom_nocancel (void *buffer, size_t length, unsigned int flags)
+{
+#ifdef HAVE_GETRANDOM_VSYSCALL
+  ssize_t r;
+  if (__getrandom_vdso (&r, buffer, length, flags))
+    return r;
+#endif
+
+  return INLINE_SYSCALL_CALL (getrandom, buffer, length, flags);
+}
+
 /* Write up to LENGTH bytes of randomness starting at BUFFER.
    Return the number of bytes written, or -1 on error.  */
 ssize_t
 __getrandom (void *buffer, size_t length, unsigned int flags)
 {
-  return SYSCALL_CANCEL (getrandom, buffer, length, flags);
+#ifdef HAVE_GETRANDOM_VSYSCALL
+  ssize_t r;
+  if (__getrandom_vdso (&r, buffer, length, flags))
+    return r;
+#endif
+
+  return INTERNAL_SYSCALL_CALL (getrandom, buffer, length, flags);
 }
 libc_hidden_def (__getrandom)
 weak_alias (__getrandom, getrandom)
diff --git a/sysdeps/unix/sysv/linux/getrandom_vdso.h b/sysdeps/unix/sysv/linux/getrandom_vdso.h
new file mode 100644
index 0000000000..d3130c1038
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/getrandom_vdso.h
@@ -0,0 +1,36 @@
+/* Linux getrandom vDSO support.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#ifndef _GETRANDOM_VDSO_H
+#define _GETRANDOM_VDSO_H
+
+#include <stddef.h>
+#include <stdint.h>
+#include <sys/types.h>
+
+extern void __getrandom_vdso_release (void) attribute_hidden;
+
+struct vgetrandom_opaque_params
+{
+  uint32_t size_of_opaque_state;
+  uint32_t mmap_prot;
+  uint32_t mmap_flags;
+  uint32_t reserved[13];
+};
+
+#endif
diff --git a/sysdeps/unix/sysv/linux/not-cancel.h b/sysdeps/unix/sysv/linux/not-cancel.h
index 2a7585b73f..12f26912d3 100644
--- a/sysdeps/unix/sysv/linux/not-cancel.h
+++ b/sysdeps/unix/sysv/linux/not-cancel.h
@@ -27,6 +27,7 @@
 #include <sys/syscall.h>
 #include <sys/wait.h>
 #include <time.h>
+#include <sys/random.h>
 
 /* Non cancellable open syscall.  */
 __typeof (open) __open_nocancel;
@@ -84,15 +85,17 @@ __writev_nocancel_nostatus (int fd, const struct iovec *iov, int iovcnt)
 }
 
 static inline ssize_t
-__getrandom_nocancel (void *buf, size_t buflen, unsigned int flags)
+__getrandom_nocancel_direct (void *buf, size_t buflen, unsigned int flags)
 {
   return INLINE_SYSCALL_CALL (getrandom, buf, buflen, flags);
 }
 
+__typeof (getrandom) __getrandom_nocancel attribute_hidden;
+
 /* Non cancellable getrandom syscall that does not also set errno in case of
    failure.  */
 static inline ssize_t
-__getrandom_nocancel_nostatus (void *buf, size_t buflen, unsigned int flags)
+__getrandom_nocancel_nostatus_direct (void *buf, size_t buflen, unsigned int flags)
 {
   return INTERNAL_SYSCALL_CALL (getrandom, buf, buflen, flags);
 }
diff --git a/sysdeps/unix/sysv/linux/tls-internal-struct.h b/sysdeps/unix/sysv/linux/tls-internal-struct.h
new file mode 100644
index 0000000000..f8d561c987
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/tls-internal-struct.h
@@ -0,0 +1,29 @@
+/* Per-thread state.  Linux version.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#ifndef _TLS_INTERNAL_STRUCT_H
+#define _TLS_INTERNAL_STRUCT_H 1
+
+struct tls_internal_t
+{
+  char *strsignal_buf;
+  char *strerror_l_buf;
+  void *getrandom_buf;
+};
+
+#endif
diff --git a/sysdeps/unix/sysv/linux/tls-internal.c b/sysdeps/unix/sysv/linux/tls-internal.c
index 81ebcdfbf8..886b55daf9 100644
--- a/sysdeps/unix/sysv/linux/tls-internal.c
+++ b/sysdeps/unix/sysv/linux/tls-internal.c
@@ -16,6 +16,7 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
+#include <getrandom_vdso.h>
 #include <string.h>
 #include <tls-internal.h>
 
@@ -25,4 +26,5 @@ __glibc_tls_internal_free (void)
   struct pthread *self = THREAD_SELF;
   free (self->tls_state.strsignal_buf);
   free (self->tls_state.strerror_l_buf);
+  call_function_static_weak (__getrandom_vdso_release);
 }
diff --git a/sysdeps/unix/sysv/linux/x86_64/sysdep.h b/sysdeps/unix/sysv/linux/x86_64/sysdep.h
index a2b021bd86..7dc072ae2d 100644
--- a/sysdeps/unix/sysv/linux/x86_64/sysdep.h
+++ b/sysdeps/unix/sysv/linux/x86_64/sysdep.h
@@ -376,6 +376,7 @@
 # define HAVE_TIME_VSYSCALL             "__vdso_time"
 # define HAVE_GETCPU_VSYSCALL		"__vdso_getcpu"
 # define HAVE_CLOCK_GETRES64_VSYSCALL   "__vdso_clock_getres"
+# define HAVE_GETRANDOM_VSYSCALL        "__vdso_getrandom"
 
 # define HAVE_CLONE3_WRAPPER			1
 
-- 
2.45.2


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2] linux: Add support for getrandom vDSO
  2024-08-01  2:11     ` [PATCH v2] " Jason A. Donenfeld
@ 2024-08-01  6:47       ` Florian Weimer
  2024-08-19 19:35         ` Adhemerval Zanella Netto
  0 siblings, 1 reply; 9+ messages in thread
From: Florian Weimer @ 2024-08-01  6:47 UTC (permalink / raw)
  To: Jason A. Donenfeld; +Cc: libc-alpha, zatrazz, carlos

* Jason A. Donenfeld:

> diff --git a/malloc/malloc.c b/malloc/malloc.c
> index bcb6e5b83c..9e577ab900 100644
> --- a/malloc/malloc.c
> +++ b/malloc/malloc.c
> @@ -3140,8 +3140,8 @@ static void
>  tcache_key_initialize (void)
>  {
>    /* We need to use the _nostatus version here, see BZ 29624.  */
> -  if (__getrandom_nocancel_nostatus (&tcache_key, sizeof(tcache_key),
> -				     GRND_NONBLOCK)
> +  if (__getrandom_nocancel_nostatus_direct (&tcache_key, sizeof(tcache_key),
> +					    GRND_NONBLOCK)
>        != sizeof (tcache_key))
>      {
>        tcache_key = random_bits ();

Sorry, the comment needs updating.

> diff --git a/sysdeps/unix/sysv/linux/getrandom.c b/sysdeps/unix/sysv/linux/getrandom.c
> index 777d1decf0..ba3092d7ca 100644
> --- a/sysdeps/unix/sysv/linux/getrandom.c
> +++ b/sysdeps/unix/sysv/linux/getrandom.c

> +static void *
> +vgetrandom_get_state (void)
> +{
> +  void *state = NULL;
> +
> +  __libc_lock_lock (grnd_alloc.lock);
> +
> +  if (grnd_alloc.len == 0)
> +    {
> +      struct vgetrandom_opaque_params params;
> +      size_t block_size, num = __get_nprocs (); /* Just a decent heuristic. */
> +      void *block;
> +
> +      if (GLRO (dl_vdso_getrandom) (NULL, 0, 0, &params, ~0UL))
> +        goto out;
> +      grnd_alloc.state_size = params.size_of_opaque_state;
> +
> +      block_size = ALIGN_PAGE (num * grnd_alloc.state_size);
> +      num = (GLRO (dl_pagesize) / grnd_alloc.state_size) *
> +            (block_size / GLRO (dl_pagesize));
> +      block = __mmap (0, block_size, params.mmap_prot,
> +                      params.mmap_flags, -1, 0);
> +      if (block == NULL)
> +        goto out;
> +      __set_vma_name (block, block_size, " glibc: getrandom");
> +
> +      if (grnd_alloc.total + num > grnd_alloc.cap)
> +        {
> +          void **states;
> +          size_t states_size = ALIGN_PAGE (sizeof(*grnd_alloc.states) *
> +                                           (grnd_alloc.total + num));
> +          if (grnd_alloc.states)
> +            states = __mremap (grnd_alloc.states,
> +                               ALIGN_PAGE (sizeof (*grnd_alloc.states) *
> +                                           grnd_alloc.cap),
> +                               states_size, MREMAP_MAYMOVE);

I think if we fork after this point (after the mremap), the new process
will no longer have the pointer array at grnd_alloc.states.  Currently,
it is not safe to simply reset grnd_alloc.lock in the subprocess.

It may be easier to just use a list of pages, so that a fork at the
wrong time merely loses track of the newly allocated page.

> +/* Re-add states of stale threads different from our own that have a
> +   getrandom_buf in use at the time of a fork().  */
> +static void
> +reset_other_threads (list_t *list)
> +{
> +  list_t *runp;
> +  list_for_each (runp, list)
> +    {
> +      struct pthread *t = list_entry (runp, struct pthread, list);
> +      if (t == THREAD_SELF || !t->tls_state.getrandom_buf)
> +        continue;
> +      grnd_alloc.states[grnd_alloc.len++]
> +          = RELEASE_PTR (t->tls_state.getrandom_buf);
> +      t->tls_state.getrandom_buf = NULL;
> +    }
> +}

I thought that the kernel would drop those mappings on fork?  Or do they
stick around and just get replaced with the zero page?

As I said before, the whole thing is rather suspicious.  We should not
need to walk the thread list after fork (or _Fork).  We know that only
one thread exists, and where all the getrandom states are.  We can put
all states except that of the current thread (if any) on the freelist.
This only needs the freelist itself to be handled in a fork-safe manner.

> +/* Called when a thread terminates, and adds its random buffer back into the
> +   allocator pool for use in a future thread.  */
> +void
> +__getrandom_vdso_release (void)
> +{
> +#ifdef HAVE_GETRANDOM_VSYSCALL
> +  void *state = __glibc_tls_internal ()->getrandom_buf;
> +  if (state == NULL)
> +    return;
> +
> +  __libc_lock_lock (grnd_alloc.lock);
> +  grnd_alloc.states[grnd_alloc.len++] = state;
> +  __libc_lock_unlock (grnd_alloc.lock);
> +#endif
> +}

At present, this has a race if a signal handler installs a state pointer
after the null check (see below for start_thread).

> diff --git a/sysdeps/unix/sysv/linux/getrandom_vdso.h b/sysdeps/unix/sysv/linux/getrandom_vdso.h
> new file mode 100644
> index 0000000000..d3130c1038
> --- /dev/null
> +++ b/sysdeps/unix/sysv/linux/getrandom_vdso.h

> +struct vgetrandom_opaque_params
> +{
> +  uint32_t size_of_opaque_state;
> +  uint32_t mmap_prot;
> +  uint32_t mmap_flags;
> +  uint32_t reserved[13];
> +};

This struct is shared with the kernel, right?  Needs a comment (perhaps
pointing to kernel documentation).

> diff --git a/sysdeps/unix/sysv/linux/tls-internal-struct.h b/sysdeps/unix/sysv/linux/tls-internal-struct.h
> new file mode 100644
> index 0000000000..f8d561c987
> --- /dev/null
> +++ b/sysdeps/unix/sysv/linux/tls-internal-struct.h

> +struct tls_internal_t
> +{
> +  char *strsignal_buf;
> +  char *strerror_l_buf;
> +  void *getrandom_buf;
> +};

The getrandom_buf member should go directly into struct pthread in
nptl/descr.h because it's Linux-specific.  We want to keep
tls_internal_t generic.

> diff --git a/sysdeps/unix/sysv/linux/tls-internal.c b/sysdeps/unix/sysv/linux/tls-internal.c
> index 81ebcdfbf8..886b55daf9 100644
> --- a/sysdeps/unix/sysv/linux/tls-internal.c
> +++ b/sysdeps/unix/sysv/linux/tls-internal.c

> @@ -25,4 +26,5 @@ __glibc_tls_internal_free (void)
>    struct pthread *self = THREAD_SELF;
>    free (self->tls_state.strsignal_buf);
>    free (self->tls_state.strerror_l_buf);
> +  call_function_static_weak (__getrandom_vdso_release);
>  }

The __glibc_tls_internal_free function is not called late enough, and
user code can still run at this point.  You need to call
__getrandom_vdso_release after signals are disabled in start_thread in
nptl/pthread_create.c.

We need to discuss what we should do about getrandom as a cancellation
point.  If no longer want it to be a cancellation point, that should be
changed first, in a separate patch, I think.

Thanks,
Florian


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v1] linux: Add support for getrandom vDSO
  2024-07-30 13:24 [PATCH v1] linux: Add support for getrandom vDSO Jason A. Donenfeld
  2024-07-30 20:16 ` Florian Weimer
@ 2024-08-02 17:32 ` Cristian Rodríguez
  1 sibling, 0 replies; 9+ messages in thread
From: Cristian Rodríguez @ 2024-08-02 17:32 UTC (permalink / raw)
  To: Jason A. Donenfeld; +Cc: libc-alpha, zatrazz, carlos

[-- Attachment #1: Type: text/plain, Size: 1294 bytes --]

On Tue, Jul 30, 2024 at 9:25 AM Jason A. Donenfeld <Jason@zx2c4.com> wrote:

> Linux 6.11 gains support for calling getrandom() from the vDSO. It
> operates on a thread-local opaque state allocated with mmap using flags
> specified by the vDSO.
>
> Multiple states are allocated at once, as many as fit into a page, and
> these are held in an array of available states to be doled out to each
> thread upon first use, and recycled when a thread terminates. As these
> states run low, more are allocated.
>
> To make this procedure async-signal-safe, a simple guard is used in the
> LSB of the opaque state address, falling back to the syscall if there's
> reentrancy contention.
>
> This implementation is intentionally kept somewhat basic. We can add
> optimizations later, but for now, the idea is to get the bones set.
>
> It's currently enabled for x86_64. As the kernel support gains more
> platforms (arm64 is in the works), this can be easily turned on for
> those.
>
> Co-authored-by: Adhemerval Zanella <zatrazz@gmail.com>
> Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
>

Worked well enough to find applications that call the raw syscall and start
filling bug reports. nothing crashed yet. a silly microbenchmark shows a 2x
performance increase.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2] linux: Add support for getrandom vDSO
  2024-08-01  6:47       ` Florian Weimer
@ 2024-08-19 19:35         ` Adhemerval Zanella Netto
  2024-08-20  4:13           ` Jason A. Donenfeld
  0 siblings, 1 reply; 9+ messages in thread
From: Adhemerval Zanella Netto @ 2024-08-19 19:35 UTC (permalink / raw)
  To: Florian Weimer, Jason A. Donenfeld; +Cc: libc-alpha, zatrazz, carlos



On 01/08/24 03:47, Florian Weimer wrote:
> * Jason A. Donenfeld:
> 
>> diff --git a/malloc/malloc.c b/malloc/malloc.c
>> index bcb6e5b83c..9e577ab900 100644
>> --- a/malloc/malloc.c
>> +++ b/malloc/malloc.c
>> @@ -3140,8 +3140,8 @@ static void
>>  tcache_key_initialize (void)
>>  {
>>    /* We need to use the _nostatus version here, see BZ 29624.  */
>> -  if (__getrandom_nocancel_nostatus (&tcache_key, sizeof(tcache_key),
>> -				     GRND_NONBLOCK)
>> +  if (__getrandom_nocancel_nostatus_direct (&tcache_key, sizeof(tcache_key),
>> +					    GRND_NONBLOCK)
>>        != sizeof (tcache_key))
>>      {
>>        tcache_key = random_bits ();
> 
> Sorry, the comment needs updating.

Ack.

> 
>> diff --git a/sysdeps/unix/sysv/linux/getrandom.c b/sysdeps/unix/sysv/linux/getrandom.c
>> index 777d1decf0..ba3092d7ca 100644
>> --- a/sysdeps/unix/sysv/linux/getrandom.c
>> +++ b/sysdeps/unix/sysv/linux/getrandom.c
> 
>> +static void *
>> +vgetrandom_get_state (void)
>> +{
>> +  void *state = NULL;
>> +
>> +  __libc_lock_lock (grnd_alloc.lock);
>> +
>> +  if (grnd_alloc.len == 0)
>> +    {
>> +      struct vgetrandom_opaque_params params;
>> +      size_t block_size, num = __get_nprocs (); /* Just a decent heuristic. */
>> +      void *block;
>> +
>> +      if (GLRO (dl_vdso_getrandom) (NULL, 0, 0, &params, ~0UL))
>> +        goto out;
>> +      grnd_alloc.state_size = params.size_of_opaque_state;
>> +
>> +      block_size = ALIGN_PAGE (num * grnd_alloc.state_size);
>> +      num = (GLRO (dl_pagesize) / grnd_alloc.state_size) *
>> +            (block_size / GLRO (dl_pagesize));
>> +      block = __mmap (0, block_size, params.mmap_prot,
>> +                      params.mmap_flags, -1, 0);
>> +      if (block == NULL)
>> +        goto out;
>> +      __set_vma_name (block, block_size, " glibc: getrandom");
>> +
>> +      if (grnd_alloc.total + num > grnd_alloc.cap)
>> +        {
>> +          void **states;
>> +          size_t states_size = ALIGN_PAGE (sizeof(*grnd_alloc.states) *
>> +                                           (grnd_alloc.total + num));
>> +          if (grnd_alloc.states)
>> +            states = __mremap (grnd_alloc.states,
>> +                               ALIGN_PAGE (sizeof (*grnd_alloc.states) *
>> +                                           grnd_alloc.cap),
>> +                               states_size, MREMAP_MAYMOVE);
> 
> I think if we fork after this point (after the mremap), the new process
> will no longer have the pointer array at grnd_alloc.states.  Currently,
> it is not safe to simply reset grnd_alloc.lock in the subprocess.
> 
> It may be easier to just use a list of pages, so that a fork at the
> wrong time merely loses track of the newly allocated page.

The list of pages would have similar issues where it would need to put
the address somewhere, either in the free-state itself (which adds
extra complexity in the free-state tracking) or by adding a static array
to track it.

Instead I think that since the idea it to keep the getrandom call 
*consistent* after fork and not really avoid possible resource leakage
(simiar to the thread stack cache), reallocate the free list with
mmap, atomically replace the pointer, and munmap will be suffice.

> 
>> +/* Re-add states of stale threads different from our own that have a
>> +   getrandom_buf in use at the time of a fork().  */
>> +static void
>> +reset_other_threads (list_t *list)
>> +{
>> +  list_t *runp;
>> +  list_for_each (runp, list)
>> +    {
>> +      struct pthread *t = list_entry (runp, struct pthread, list);
>> +      if (t == THREAD_SELF || !t->tls_state.getrandom_buf)
>> +        continue;
>> +      grnd_alloc.states[grnd_alloc.len++]
>> +          = RELEASE_PTR (t->tls_state.getrandom_buf);
>> +      t->tls_state.getrandom_buf = NULL;
>> +    }
>> +}
> 
> I thought that the kernel would drop those mappings on fork?  Or do they
> stick around and just get replaced with the zero page?

The kernel does not drop, afaik MAP_DROPPABLE (used by getrandom vDSO
pages) follows usual mmap semantic regarding fork().

> 
> As I said before, the whole thing is rather suspicious.  We should not
> need to walk the thread list after fork (or _Fork).  We know that only
> one thread exists, and where all the getrandom states are.  We can put
> all states except that of the current thread (if any) on the freelist.
> This only needs the freelist itself to be handled in a fork-safe manner.

We do not track the *pages* allocated for the opaque states because they
will be either in the free states list (grnd_alloc.states) or allocated
to a running thread.  So on fork(), it is way simpler to just iterate
over the thread cache than tracking the pages.

I have move the logic to reclaim the getrandom opaque states to
reclaim_stacks, so it is the only place where fork() iterates over the
list.

> 
>> +/* Called when a thread terminates, and adds its random buffer back into the
>> +   allocator pool for use in a future thread.  */
>> +void
>> +__getrandom_vdso_release (void)
>> +{
>> +#ifdef HAVE_GETRANDOM_VSYSCALL
>> +  void *state = __glibc_tls_internal ()->getrandom_buf;
>> +  if (state == NULL)
>> +    return;
>> +
>> +  __libc_lock_lock (grnd_alloc.lock);
>> +  grnd_alloc.states[grnd_alloc.len++] = state;
>> +  __libc_lock_unlock (grnd_alloc.lock);
>> +#endif
>> +}
> 
> At present, this has a race if a signal handler installs a state pointer
> after the null check (see below for start_thread).
> 
>> diff --git a/sysdeps/unix/sysv/linux/getrandom_vdso.h b/sysdeps/unix/sysv/linux/getrandom_vdso.h
>> new file mode 100644
>> index 0000000000..d3130c1038
>> --- /dev/null
>> +++ b/sysdeps/unix/sysv/linux/getrandom_vdso.h
> 
>> +struct vgetrandom_opaque_params
>> +{
>> +  uint32_t size_of_opaque_state;
>> +  uint32_t mmap_prot;
>> +  uint32_t mmap_flags;
>> +  uint32_t reserved[13];
>> +};
> 
> This struct is shared with the kernel, right?  Needs a comment (perhaps
> pointing to kernel documentation).

Ack.

> 
>> diff --git a/sysdeps/unix/sysv/linux/tls-internal-struct.h b/sysdeps/unix/sysv/linux/tls-internal-struct.h
>> new file mode 100644
>> index 0000000000..f8d561c987
>> --- /dev/null
>> +++ b/sysdeps/unix/sysv/linux/tls-internal-struct.h
> 
>> +struct tls_internal_t
>> +{
>> +  char *strsignal_buf;
>> +  char *strerror_l_buf;
>> +  void *getrandom_buf;
>> +};
> 
> The getrandom_buf member should go directly into struct pthread in
> nptl/descr.h because it's Linux-specific.  We want to keep
> tls_internal_t generic.

Ack.

> 
>> diff --git a/sysdeps/unix/sysv/linux/tls-internal.c b/sysdeps/unix/sysv/linux/tls-internal.c
>> index 81ebcdfbf8..886b55daf9 100644
>> --- a/sysdeps/unix/sysv/linux/tls-internal.c
>> +++ b/sysdeps/unix/sysv/linux/tls-internal.c
> 
>> @@ -25,4 +26,5 @@ __glibc_tls_internal_free (void)
>>    struct pthread *self = THREAD_SELF;
>>    free (self->tls_state.strsignal_buf);
>>    free (self->tls_state.strerror_l_buf);
>> +  call_function_static_weak (__getrandom_vdso_release);
>>  }
> 
> The __glibc_tls_internal_free function is not called late enough, and
> user code can still run at this point.  You need to call
> __getrandom_vdso_release after signals are disabled in start_thread in
> nptl/pthread_create.c.

Ack.

> 
> We need to discuss what we should do about getrandom as a cancellation
> point.  If no longer want it to be a cancellation point, that should be
> changed first, in a separate patch, I think.

Indeed, the vDSO currently always issues the syscall if it can not use
the opaque state for any reason (invalid size, RNG is not ready, reentracy
protection, etc) and this breaks the SYSCALL_CANCEL macro.

I don't have a strong opinion here, maybe a better option would to just
avoid calling the syscall on the vDSO (but this would make it differ from
other vDSO symbols).  Jason, what do you think?  Would current getrandom
implementation block?

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2] linux: Add support for getrandom vDSO
  2024-08-19 19:35         ` Adhemerval Zanella Netto
@ 2024-08-20  4:13           ` Jason A. Donenfeld
  0 siblings, 0 replies; 9+ messages in thread
From: Jason A. Donenfeld @ 2024-08-20  4:13 UTC (permalink / raw)
  To: Adhemerval Zanella Netto; +Cc: Florian Weimer, libc-alpha, zatrazz, carlos

> >> +            states = __mremap (grnd_alloc.states,
> >> +                               ALIGN_PAGE (sizeof (*grnd_alloc.states) *
> >> +                                           grnd_alloc.cap),
> >> +                               states_size, MREMAP_MAYMOVE);
> > 
> > I think if we fork after this point (after the mremap), the new process
> > will no longer have the pointer array at grnd_alloc.states.  Currently,
> > it is not safe to simply reset grnd_alloc.lock in the subprocess.
> > 
> > It may be easier to just use a list of pages, so that a fork at the
> > wrong time merely loses track of the newly allocated page.
> 
> The list of pages would have similar issues where it would need to put
> the address somewhere, either in the free-state itself (which adds
> extra complexity in the free-state tracking) or by adding a static array
> to track it.
> 
> Instead I think that since the idea it to keep the getrandom call 
> *consistent* after fork and not really avoid possible resource leakage
> (simiar to the thread stack cache), reallocate the free list with
> mmap, atomically replace the pointer, and munmap will be suffice.

Or, in order to avoid already allocated states from having invalid
addresses post-unmap, if you want more states, just mmap a new set of
states. Because we're already allocating lots per chunk, I don't think
practically speaking this should pose a real issue.


> > I thought that the kernel would drop those mappings on fork?  Or do they
> > stick around and just get replaced with the zero page?
> 
> The kernel does not drop, afaik MAP_DROPPABLE (used by getrandom vDSO
> pages) follows usual mmap semantic regarding fork().

Right, they get zeroed.

> > We need to discuss what we should do about getrandom as a cancellation
> > point.  If no longer want it to be a cancellation point, that should be
> > changed first, in a separate patch, I think.
> 
> Indeed, the vDSO currently always issues the syscall if it can not use
> the opaque state for any reason (invalid size, RNG is not ready, reentracy
> protection, etc) and this breaks the SYSCALL_CANCEL macro.
> 
> I don't have a strong opinion here, maybe a better option would to just
> avoid calling the syscall on the vDSO (but this would make it differ from
> other vDSO symbols).  Jason, what do you think?  Would current getrandom
> implementation block?

The vDSO will sometimes make the syscall, particularly in the case when
the RNG is not yet initialized. On modern kernels, the RNG gets
initialized very fast, and there's generally not an issue with "blocking
forever and ever" like there used to be. But with that said, at early
boot it still _might_ block for a quarter second or so, though it will
only do so once.

So I think we could preserve its cancelability, but we'll only need to
do so for the first invocation of it. So we could optimize it to
something like this:

diff --git a/sysdeps/unix/sysv/linux/getrandom.c b/sysdeps/unix/sysv/linux/getrandom.c
index c7806810..75a52c37 100644
--- a/sysdeps/unix/sysv/linux/getrandom.c
+++ b/sysdeps/unix/sysv/linux/getrandom.c
@@ -180,8 +180,21 @@ __getrandom_vdso (ssize_t *ret, void *buffer, size_t length,
         goto out;
     }
 
-  *ret = GLRO (dl_vdso_getrandom) (buffer, length, flags, state,
-                                   grnd_alloc.state_size);
+  static bool did_succeed = false;
+  bool could_block = !did_succeed && !(flags & (GRND_NONBLOCK | GRND_INSECURE));
+  if (NO_SYSCALL_CANCEL_CHECKING || !could_block)
+    *ret = glro (dl_vdso_getrandom) (buffer, length, flags, state,
+                                     grnd_alloc.state_size);
+  else
+    {
+      int sc_cancel_oldtype = LIBC_CANCEL_ASYNC ();
+      *ret = glro (dl_vdso_getrandom) (buffer, length, flags, state,
+                                       grnd_alloc.state_size);
+      LIBC_CANCEL_RESET (sc_cancel_oldtype);
+      if (!INTERNAL_SYSCALL_ERROR_P (*ret))
+        did_succeed = true;
+    }
+
   if (INTERNAL_SYSCALL_ERROR_P (*ret))
     {
       __set_errno (INTERNAL_SYSCALL_ERRNO (*ret));


^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2024-08-20  4:13 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-07-30 13:24 [PATCH v1] linux: Add support for getrandom vDSO Jason A. Donenfeld
2024-07-30 20:16 ` Florian Weimer
2024-07-31 12:38   ` Adhemerval Zanella Netto
2024-08-01  1:47   ` Jason A. Donenfeld
2024-08-01  2:11     ` [PATCH v2] " Jason A. Donenfeld
2024-08-01  6:47       ` Florian Weimer
2024-08-19 19:35         ` Adhemerval Zanella Netto
2024-08-20  4:13           ` Jason A. Donenfeld
2024-08-02 17:32 ` [PATCH v1] " Cristian Rodríguez

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).