public inbox for libc-alpha@sourceware.org
 help / color / mirror / Atom feed
* [PATCH] x86-64: Allocate state buffer space for RDI, RSI and RBX
@ 2024-03-16 22:00 H.J. Lu
  0 siblings, 0 replies; only message in thread
From: H.J. Lu @ 2024-03-16 22:00 UTC (permalink / raw)
  To: libc-alpha; +Cc: fweimer

_dl_tlsdesc_dynamic preserves RDI, RSI and RBX before realigning stack.
After realigning stack, it saves RCX, RDX, R8, R9, R10 and R11.  Define
TLSDESC_CALL_STATE_SAVE_OFFSET to allocate space for all integer registers
and round up the state size to 64 bytes to avoid clobbering saved RDI, RSI
and RBX values on stack by xsave to STATE_SAVE_OFFSET(%rsp).  This fixes
BZ #31501.
---
 sysdeps/x86/cpu-features.c                 |  11 +-
 sysdeps/x86/sysdep.h                       |   8 ++
 sysdeps/x86_64/Makefile                    |  19 ++++
 sysdeps/x86_64/tst-gnu2-tls2-x86-64-mod0.c |  31 +++++
 sysdeps/x86_64/tst-gnu2-tls2-x86-64-mod1.S |  77 +++++++++++++
 sysdeps/x86_64/tst-gnu2-tls2-x86-64-mod2.c |  31 +++++
 sysdeps/x86_64/tst-gnu2-tls2-x86-64.c      | 126 +++++++++++++++++++++
 sysdeps/x86_64/tst-gnu2-tls2-x86-64.h      |  36 ++++++
 8 files changed, 335 insertions(+), 4 deletions(-)
 create mode 100644 sysdeps/x86_64/tst-gnu2-tls2-x86-64-mod0.c
 create mode 100644 sysdeps/x86_64/tst-gnu2-tls2-x86-64-mod1.S
 create mode 100644 sysdeps/x86_64/tst-gnu2-tls2-x86-64-mod2.c
 create mode 100644 sysdeps/x86_64/tst-gnu2-tls2-x86-64.c
 create mode 100644 sysdeps/x86_64/tst-gnu2-tls2-x86-64.h

diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
index 4ea373dffa..5e9c167417 100644
--- a/sysdeps/x86/cpu-features.c
+++ b/sysdeps/x86/cpu-features.c
@@ -311,7 +311,7 @@ update_active (struct cpu_features *cpu_features)
 	      /* NB: On AMX capable processors, ebx always includes AMX
 		 states.  */
 	      unsigned int xsave_state_full_size
-		= ALIGN_UP (ebx + STATE_SAVE_OFFSET, 64);
+		= ALIGN_UP (ebx + TLSDESC_CALL_STATE_SAVE_OFFSET, 64);
 
 	      cpu_features->xsave_state_size
 		= xsave_state_full_size;
@@ -401,8 +401,10 @@ update_active (struct cpu_features *cpu_features)
 		      unsigned int amx_size
 			= (xstate_amx_comp_offsets[31]
 			   + xstate_amx_comp_sizes[31]);
-		      amx_size = ALIGN_UP (amx_size + STATE_SAVE_OFFSET,
-					   64);
+		      amx_size
+			= ALIGN_UP ((amx_size
+				     + TLSDESC_CALL_STATE_SAVE_OFFSET),
+				    64);
 		      /* Set xsave_state_full_size to the compact AMX
 			 state size for XSAVEC.  NB: xsave_state_full_size
 			 is only used in _dl_tlsdesc_dynamic_xsave and
@@ -410,7 +412,8 @@ update_active (struct cpu_features *cpu_features)
 		      cpu_features->xsave_state_full_size = amx_size;
 #endif
 		      cpu_features->xsave_state_size
-			= ALIGN_UP (size + STATE_SAVE_OFFSET, 64);
+			= ALIGN_UP (size + TLSDESC_CALL_STATE_SAVE_OFFSET,
+				    64);
 		      CPU_FEATURE_SET (cpu_features, XSAVEC);
 		    }
 		}
diff --git a/sysdeps/x86/sysdep.h b/sysdeps/x86/sysdep.h
index db8e576e91..262d4083e2 100644
--- a/sysdeps/x86/sysdep.h
+++ b/sysdeps/x86/sysdep.h
@@ -46,6 +46,13 @@
    red-zone into account.  */
 # define STATE_SAVE_OFFSET (8 * 7 + 8)
 
+/* _dl_tlsdesc_dynamic preserves RDI, RSI and RBX before realigning
+   stack.  After realigning stack, it saves RCX, RDX, R8, R9, R10 and
+   R11.  Allocate space for all integer registers and round up the state
+   size to 64 bytes to avoid clobbering saved RDI, RSI and RBX values on
+   stack by xsave on STATE_SAVE_OFFSET(%rsp).  */
+# define TLSDESC_CALL_STATE_SAVE_OFFSET (STATE_SAVE_OFFSET + 64)
+
 /* Save SSE, AVX, AVX512, mask, bound and APX registers.  Bound and APX
    registers are mutually exclusive.  */
 # define STATE_SAVE_MASK		\
@@ -68,6 +75,7 @@
 /* Offset for fxsave/xsave area used by _dl_tlsdesc_dynamic.  Since i386
    doesn't have red-zone, use 0 here.  */
 # define STATE_SAVE_OFFSET 0
+# define TLSDESC_CALL_STATE_SAVE_OFFSET 0
 
 /* Save SSE, AVX, AXV512, mask and bound registers.   */
 # define STATE_SAVE_MASK		\
diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile
index 66b21954f3..e21e4b96ab 100644
--- a/sysdeps/x86_64/Makefile
+++ b/sysdeps/x86_64/Makefile
@@ -217,6 +217,25 @@ valgrind-suppressions-tst-valgrind-smoke = \
   --suppressions=$(..)sysdeps/x86_64/tst-valgrind-smoke.supp
 endif
 
+tests += \
+  tst-gnu2-tls2-x86-64 \
+# tests
+
+modules-names += \
+  tst-gnu2-tls2-x86-64-mod0 \
+  tst-gnu2-tls2-x86-64-mod1 \
+  tst-gnu2-tls2-x86-64-mod2 \
+# modules-names
+
+$(objpfx)tst-gnu2-tls2-x86-64: $(shared-thread-library)
+$(objpfx)tst-gnu2-tls2-x86-64.out: \
+  $(objpfx)tst-gnu2-tls2-x86-64-mod0.so \
+  $(objpfx)tst-gnu2-tls2-x86-64-mod1.so \
+  $(objpfx)tst-gnu2-tls2-x86-64-mod2.so
+
+CFLAGS-tst-gnu2-tls2-x86-64-mod0.c += -mtls-dialect=gnu2
+CFLAGS-tst-gnu2-tls2-x86-64-mod2.c += -mtls-dialect=gnu2
+
 endif # $(subdir) == elf
 
 ifeq ($(subdir),csu)
diff --git a/sysdeps/x86_64/tst-gnu2-tls2-x86-64-mod0.c b/sysdeps/x86_64/tst-gnu2-tls2-x86-64-mod0.c
new file mode 100644
index 0000000000..496ad0767f
--- /dev/null
+++ b/sysdeps/x86_64/tst-gnu2-tls2-x86-64-mod0.c
@@ -0,0 +1,31 @@
+/* DSO used by tst-gnu2-tls2-x86-64.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include "tst-gnu2-tls2-x86-64.h"
+
+__thread struct tls tls_var0 __attribute__ ((visibility ("hidden")));
+
+struct tls *
+apply_tls (struct tls *p, intptr_t offset)
+{
+  BEFORE_TLSDESC_CALL ();
+  tls_var0 = *p;
+  struct tls *ret = &tls_var0;
+  AFTER_TLSDESC_CALL ();
+  return ret;
+}
diff --git a/sysdeps/x86_64/tst-gnu2-tls2-x86-64-mod1.S b/sysdeps/x86_64/tst-gnu2-tls2-x86-64-mod1.S
new file mode 100644
index 0000000000..7a9b5766cf
--- /dev/null
+++ b/sysdeps/x86_64/tst-gnu2-tls2-x86-64-mod1.S
@@ -0,0 +1,77 @@
+/* Check if TLSDESC relocation preserves %rdi, %rsi and %rbx.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+	.text
+	.p2align 4
+	.globl	apply_tls
+	.type	apply_tls, @function
+apply_tls:
+	cfi_startproc
+	_CET_ENDBR
+	pushq	%rbp
+	cfi_def_cfa_offset (16)
+	cfi_offset (6, -16)
+	movdqu	(%RDI_LP), %xmm0
+	lea	tls_var1@TLSDESC(%rip), %RAX_LP
+	mov	%RSP_LP, %RBP_LP
+	cfi_def_cfa_register (6)
+	/* Align stack to 64 bytes.  */
+	and	$-64, %RSP_LP
+	sub	%RSI_LP, %RSP_LP
+	pushq	%rbx
+	/* Set %ebx to 0xbadbeef.  */
+	movl	$0xbadbeef, %ebx
+	movq	%rdi, saved_rdi(%rip)
+	movq	%rsi, saved_rsi(%rip)
+	call	*tls_var1@TLSCALL(%RAX_LP)
+	/* Check if _dl_tlsdesc_dynamic preserves %rdi, %rsi and %rbx.  */
+	cmpq	saved_rdi(%rip), %rdi
+	jne	L(hlt)
+	cmpq	saved_rsi(%rip), %rsi
+	jne	L(hlt)
+	cmpl	$0xbadbeef, %ebx
+	jne	L(hlt)
+	add	%fs:0, %RAX_LP
+	movups	%xmm0, 32(%RAX_LP)
+	movdqu	16(%RDI_LP), %xmm1
+	mov	%RAX_LP, %RBX_LP
+	movups	%xmm1, 48(%RAX_LP)
+	lea	32(%RBX_LP), %RAX_LP
+	pop	%rbx
+	leave
+	cfi_def_cfa (7, 8)
+	ret
+L(hlt):
+	hlt
+	cfi_endproc
+	.size	apply_tls, .-apply_tls
+	.hidden	tls_var1
+	.globl	tls_var1
+	.section	.tbss,"awT",@nobits
+	.align 16
+	.type	tls_var1, @object
+	.size	tls_var1, 3200
+tls_var1:
+	.zero	3200
+	.local	saved_rdi
+	.comm	saved_rdi,8,8
+	.local	saved_rsi
+	.comm	saved_rsi,8,8
+	.section	.note.GNU-stack,"",@progbits
diff --git a/sysdeps/x86_64/tst-gnu2-tls2-x86-64-mod2.c b/sysdeps/x86_64/tst-gnu2-tls2-x86-64-mod2.c
new file mode 100644
index 0000000000..632a8dce80
--- /dev/null
+++ b/sysdeps/x86_64/tst-gnu2-tls2-x86-64-mod2.c
@@ -0,0 +1,31 @@
+/* DSO used by tst-gnu2-tls2-x86-64.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include "tst-gnu2-tls2-x86-64.h"
+
+__thread struct tls tls_var2 __attribute__ ((visibility ("hidden")));
+
+struct tls *
+apply_tls (struct tls *p, intptr_t offset)
+{
+  BEFORE_TLSDESC_CALL ();
+  tls_var2 = *p;
+  struct tls *ret = &tls_var2;
+  AFTER_TLSDESC_CALL ();
+  return ret;
+}
diff --git a/sysdeps/x86_64/tst-gnu2-tls2-x86-64.c b/sysdeps/x86_64/tst-gnu2-tls2-x86-64.c
new file mode 100644
index 0000000000..acb38a66c1
--- /dev/null
+++ b/sysdeps/x86_64/tst-gnu2-tls2-x86-64.c
@@ -0,0 +1,126 @@
+/* Check if TLSDESC relocation preserves %rdi, %rsi and %rbx.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <dlfcn.h>
+#include <pthread.h>
+#include <support/xdlfcn.h>
+#include <support/xthread.h>
+#include <support/check.h>
+#include <support/test-driver.h>
+#include "tst-gnu2-tls2-x86-64.h"
+
+#ifndef IS_SUPPORTED
+# define IS_SUPPORTED() true
+#endif
+
+/* An architecture can define it to clobber caller-saved registers in
+   malloc below to verify that the implicit TLSDESC call won't change
+   caller-saved registers.  */
+#ifndef PREPARE_MALLOC
+# define PREPARE_MALLOC()
+#endif
+
+extern void * __libc_malloc (size_t);
+
+size_t malloc_counter = 0;
+
+void *
+malloc (size_t n)
+{
+  PREPARE_MALLOC ();
+  malloc_counter++;
+  return __libc_malloc (n);
+}
+
+static void *mod[3];
+#ifndef MOD
+# define MOD(i) "tst-gnu2-tls2-x86-64-mod" #i ".so"
+#endif
+static const char *modname[3] = { MOD(0), MOD(1), MOD(2) };
+#undef MOD
+
+static void
+open_mod (int i)
+{
+  mod[i] = xdlopen (modname[i], RTLD_LAZY);
+  printf ("open %s\n", modname[i]);
+}
+
+static void
+close_mod (int i)
+{
+  xdlclose (mod[i]);
+  mod[i] = NULL;
+  printf ("close %s\n", modname[i]);
+}
+
+static void
+access_mod (int i, const char *sym)
+{
+  struct tls var = { -1, -1, -1, -1 };
+  struct tls *(*f) (struct tls *, intptr_t) = xdlsym (mod[i], sym);
+  /* Check that our malloc is called.  */
+  malloc_counter = 0;
+  /* apply_tls will align stack to 64 bytes and adjust stack by OFFSET
+     and followed by "pushq %rbx".  On AVX512 machines, OFFSET == 104
+     causes _dl_tlsdesc_dynamic_xsavec to clobber %rdi, %rsi and %rbx.  */
+  intptr_t offset = 104;
+  struct tls *p = f (&var, offset);
+  TEST_VERIFY (malloc_counter != 0);
+  printf ("access %s: %s() = %p\n", modname[i], sym, p);
+  TEST_VERIFY_EXIT (memcmp (p, &var, sizeof (var)) == 0);
+  ++(p->a);
+}
+
+static void *
+start (void *arg)
+{
+  /* The DTV generation is at the last dlopen of mod0 and the
+     entry for mod1 is NULL.  */
+
+  open_mod (1); /* Reuse modid of mod1. Uses dynamic TLS.  */
+
+  /* Force the slow path in GNU2 TLS descriptor call.  */
+  access_mod (1, "apply_tls");
+
+  return arg;
+}
+
+static int
+do_test (void)
+{
+  if (!IS_SUPPORTED ())
+    return EXIT_UNSUPPORTED;
+
+  open_mod (0);
+  open_mod (1);
+  open_mod (2);
+  close_mod (0);
+  close_mod (1); /* Create modid gap at mod1.  */
+  open_mod (0); /* Reuse modid of mod0, bump generation count.  */
+
+  /* Create a thread where DTV of mod1 is NULL.  */
+  pthread_t t = xpthread_create (NULL, start, NULL);
+  xpthread_join (t);
+  return 0;
+}
+
+#include <support/test-driver.c>
diff --git a/sysdeps/x86_64/tst-gnu2-tls2-x86-64.h b/sysdeps/x86_64/tst-gnu2-tls2-x86-64.h
new file mode 100644
index 0000000000..16055c77aa
--- /dev/null
+++ b/sysdeps/x86_64/tst-gnu2-tls2-x86-64.h
@@ -0,0 +1,36 @@
+/* Test TLSDESC relocation.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <stdint.h>
+
+struct tls
+{
+  int64_t a, b, c, d;
+};
+
+extern struct tls *apply_tls (struct tls *, intptr_t);
+
+/* An architecture can define them to verify that clobber caller-saved
+   registers aren't changed by the implicit TLSDESC call.  */
+#ifndef BEFORE_TLSDESC_CALL
+# define BEFORE_TLSDESC_CALL()
+#endif
+
+#ifndef AFTER_TLSDESC_CALL
+# define AFTER_TLSDESC_CALL()
+#endif
-- 
2.44.0


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2024-03-16 22:00 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-03-16 22:00 [PATCH] x86-64: Allocate state buffer space for RDI, RSI and RBX H.J. Lu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).