public inbox for libc-alpha@sourceware.org
 help / color / mirror / Atom feed
* [PATCH v3] x86-64: Allocate state buffer space for RDI, RSI and RBX
@ 2024-03-17 11:37 H.J. Lu
  0 siblings, 0 replies; only message in thread
From: H.J. Lu @ 2024-03-17 11:37 UTC (permalink / raw)
  To: libc-alpha; +Cc: fweimer

_dl_tlsdesc_dynamic preserves RDI, RSI and RBX before realigning stack.
After realigning stack, it saves RCX, RDX, R8, R9, R10 and R11.  Define
TLSDESC_CALL_REGISTER_SAVE_AREA to allocate space for RDI, RSI and RBX
to avoid clobbering saved RDI, RSI and RBX values on stack by xsave to
STATE_SAVE_OFFSET(%rsp).

   +==================+<- stack frame start aligned at 8 or 16 bytes
   |                  |<- RDI
   |                  |<- RSI
   |                  |<- RBX
   |                  |<- paddings from stack realignment of 64 bytes
   |------------------|<- xsave buffer end aligned at 64 bytes
   |                  |<-
   |                  |<-
   |                  |<-
   |------------------|<- xsave buffer start at STATE_SAVE_OFFSET(%rsp)
   |                  |<- 8-byte padding
   |                  |<- 8-byte padding
   |                  |<- R11
   |                  |<- R10
   |                  |<- R9
   |                  |<- R8
   |                  |<- RDX
   |                  |<- RCX
   +==================+<- State buffer start aligned at 64 bytes

This fixes BZ #31501.
---
 sysdeps/x86/cpu-features.c                 | 11 ++-
 sysdeps/x86/sysdep.h                       | 29 ++++++++
 sysdeps/x86_64/Makefile                    | 19 +++++
 sysdeps/x86_64/tst-gnu2-tls2-x86-64-mod0.c | 19 +++++
 sysdeps/x86_64/tst-gnu2-tls2-x86-64-mod1.S | 87 ++++++++++++++++++++++
 sysdeps/x86_64/tst-gnu2-tls2-x86-64-mod2.c | 19 +++++
 sysdeps/x86_64/tst-gnu2-tls2-x86-64.c      | 21 ++++++
 7 files changed, 201 insertions(+), 4 deletions(-)
 create mode 100644 sysdeps/x86_64/tst-gnu2-tls2-x86-64-mod0.c
 create mode 100644 sysdeps/x86_64/tst-gnu2-tls2-x86-64-mod1.S
 create mode 100644 sysdeps/x86_64/tst-gnu2-tls2-x86-64-mod2.c
 create mode 100644 sysdeps/x86_64/tst-gnu2-tls2-x86-64.c

diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
index 4ea373dffa..3d7c2819d7 100644
--- a/sysdeps/x86/cpu-features.c
+++ b/sysdeps/x86/cpu-features.c
@@ -311,7 +311,7 @@ update_active (struct cpu_features *cpu_features)
 	      /* NB: On AMX capable processors, ebx always includes AMX
 		 states.  */
 	      unsigned int xsave_state_full_size
-		= ALIGN_UP (ebx + STATE_SAVE_OFFSET, 64);
+		= ALIGN_UP (ebx + TLSDESC_CALL_REGISTER_SAVE_AREA, 64);
 
 	      cpu_features->xsave_state_size
 		= xsave_state_full_size;
@@ -401,8 +401,10 @@ update_active (struct cpu_features *cpu_features)
 		      unsigned int amx_size
 			= (xstate_amx_comp_offsets[31]
 			   + xstate_amx_comp_sizes[31]);
-		      amx_size = ALIGN_UP (amx_size + STATE_SAVE_OFFSET,
-					   64);
+		      amx_size
+			= ALIGN_UP ((amx_size
+				     + TLSDESC_CALL_REGISTER_SAVE_AREA),
+				    64);
 		      /* Set xsave_state_full_size to the compact AMX
 			 state size for XSAVEC.  NB: xsave_state_full_size
 			 is only used in _dl_tlsdesc_dynamic_xsave and
@@ -410,7 +412,8 @@ update_active (struct cpu_features *cpu_features)
 		      cpu_features->xsave_state_full_size = amx_size;
 #endif
 		      cpu_features->xsave_state_size
-			= ALIGN_UP (size + STATE_SAVE_OFFSET, 64);
+			= ALIGN_UP (size + TLSDESC_CALL_REGISTER_SAVE_AREA,
+				    64);
 		      CPU_FEATURE_SET (cpu_features, XSAVEC);
 		    }
 		}
diff --git a/sysdeps/x86/sysdep.h b/sysdeps/x86/sysdep.h
index db8e576e91..46fcd27345 100644
--- a/sysdeps/x86/sysdep.h
+++ b/sysdeps/x86/sysdep.h
@@ -46,6 +46,34 @@
    red-zone into account.  */
 # define STATE_SAVE_OFFSET (8 * 7 + 8)
 
+/* _dl_tlsdesc_dynamic preserves RDI, RSI and RBX before realigning
+   stack.  After realigning stack, it saves RCX, RDX, R8, R9, R10 and
+   R11.  Allocate space for RDI, RSI and RBX to avoid clobbering saved
+   RDI, RSI and RBX values on stack by xsave.
+
+   +==================+<- stack frame start aligned at 8 or 16 bytes
+   |                  |<- RDI
+   |                  |<- RSI
+   |                  |<- RBX
+   |                  |<- paddings from stack realignment of 64 bytes
+   |------------------|<- xsave buffer end aligned at 64 bytes
+   |                  |<-
+   |                  |<-
+   |                  |<-
+   |------------------|<- xsave buffer start at STATE_SAVE_OFFSET(%rsp)
+   |                  |<- 8-byte padding
+   |                  |<- 8-byte padding
+   |                  |<- R11
+   |                  |<- R10
+   |                  |<- R9
+   |                  |<- R8
+   |                  |<- RDX
+   |                  |<- RCX
+   +==================+<- State buffer start aligned at 64 bytes
+
+*/
+# define TLSDESC_CALL_REGISTER_SAVE_AREA (STATE_SAVE_OFFSET + 24)
+
 /* Save SSE, AVX, AVX512, mask, bound and APX registers.  Bound and APX
    registers are mutually exclusive.  */
 # define STATE_SAVE_MASK		\
@@ -68,6 +96,7 @@
 /* Offset for fxsave/xsave area used by _dl_tlsdesc_dynamic.  Since i386
    doesn't have red-zone, use 0 here.  */
 # define STATE_SAVE_OFFSET 0
+# define TLSDESC_CALL_REGISTER_SAVE_AREA 0
 
 /* Save SSE, AVX, AXV512, mask and bound registers.   */
 # define STATE_SAVE_MASK		\
diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile
index 66b21954f3..e21e4b96ab 100644
--- a/sysdeps/x86_64/Makefile
+++ b/sysdeps/x86_64/Makefile
@@ -217,6 +217,25 @@ valgrind-suppressions-tst-valgrind-smoke = \
   --suppressions=$(..)sysdeps/x86_64/tst-valgrind-smoke.supp
 endif
 
+tests += \
+  tst-gnu2-tls2-x86-64 \
+# tests
+
+modules-names += \
+  tst-gnu2-tls2-x86-64-mod0 \
+  tst-gnu2-tls2-x86-64-mod1 \
+  tst-gnu2-tls2-x86-64-mod2 \
+# modules-names
+
+$(objpfx)tst-gnu2-tls2-x86-64: $(shared-thread-library)
+$(objpfx)tst-gnu2-tls2-x86-64.out: \
+  $(objpfx)tst-gnu2-tls2-x86-64-mod0.so \
+  $(objpfx)tst-gnu2-tls2-x86-64-mod1.so \
+  $(objpfx)tst-gnu2-tls2-x86-64-mod2.so
+
+CFLAGS-tst-gnu2-tls2-x86-64-mod0.c += -mtls-dialect=gnu2
+CFLAGS-tst-gnu2-tls2-x86-64-mod2.c += -mtls-dialect=gnu2
+
 endif # $(subdir) == elf
 
 ifeq ($(subdir),csu)
diff --git a/sysdeps/x86_64/tst-gnu2-tls2-x86-64-mod0.c b/sysdeps/x86_64/tst-gnu2-tls2-x86-64-mod0.c
new file mode 100644
index 0000000000..40b3ec5c82
--- /dev/null
+++ b/sysdeps/x86_64/tst-gnu2-tls2-x86-64-mod0.c
@@ -0,0 +1,19 @@
+/* DSO used by tst-gnu2-tls2-x86-64.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <tst-gnu2-tls2mod0.c>
diff --git a/sysdeps/x86_64/tst-gnu2-tls2-x86-64-mod1.S b/sysdeps/x86_64/tst-gnu2-tls2-x86-64-mod1.S
new file mode 100644
index 0000000000..449ddd5c9d
--- /dev/null
+++ b/sysdeps/x86_64/tst-gnu2-tls2-x86-64-mod1.S
@@ -0,0 +1,87 @@
+/* Check if TLSDESC relocation preserves %rdi, %rsi and %rbx.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+/* On AVX512 machines, OFFSET == 104 caused _dl_tlsdesc_dynamic_xsavec
+   to clobber %rdi, %rsi and %rbx.  On Intel AVX CPUs, the state size
+   is 960 bytes and this test didn't fail.  It may be due to the unused
+   last 128 bytes.  On AMD AVX CPUs, the state size is 832 bytes and
+   this test might fail without the fix.  */
+#ifndef OFFSET
+# define OFFSET 104
+#endif
+
+	.text
+	.p2align 4
+	.globl	apply_tls
+	.type	apply_tls, @function
+apply_tls:
+	cfi_startproc
+	_CET_ENDBR
+	pushq	%rbp
+	cfi_def_cfa_offset (16)
+	cfi_offset (6, -16)
+	movdqu	(%RDI_LP), %xmm0
+	lea	tls_var1@TLSDESC(%rip), %RAX_LP
+	mov	%RSP_LP, %RBP_LP
+	cfi_def_cfa_register (6)
+	/* Align stack to 64 bytes.  */
+	and	$-64, %RSP_LP
+	sub	$OFFSET, %RSP_LP
+	pushq	%rbx
+	/* Set %ebx to 0xbadbeef.  */
+	movl	$0xbadbeef, %ebx
+	movl	$0xbadbeef, %esi
+	movq	%rdi, saved_rdi(%rip)
+	movq	%rsi, saved_rsi(%rip)
+	call	*tls_var1@TLSCALL(%RAX_LP)
+	/* Check if _dl_tlsdesc_dynamic preserves %rdi, %rsi and %rbx.  */
+	cmpq	saved_rdi(%rip), %rdi
+	jne	L(hlt)
+	cmpq	saved_rsi(%rip), %rsi
+	jne	L(hlt)
+	cmpl	$0xbadbeef, %ebx
+	jne	L(hlt)
+	add	%fs:0, %RAX_LP
+	movups	%xmm0, 32(%RAX_LP)
+	movdqu	16(%RDI_LP), %xmm1
+	mov	%RAX_LP, %RBX_LP
+	movups	%xmm1, 48(%RAX_LP)
+	lea	32(%RBX_LP), %RAX_LP
+	pop	%rbx
+	leave
+	cfi_def_cfa (7, 8)
+	ret
+L(hlt):
+	hlt
+	cfi_endproc
+	.size	apply_tls, .-apply_tls
+	.hidden	tls_var1
+	.globl	tls_var1
+	.section	.tbss,"awT",@nobits
+	.align 16
+	.type	tls_var1, @object
+	.size	tls_var1, 3200
+tls_var1:
+	.zero	3200
+	.local	saved_rdi
+	.comm	saved_rdi,8,8
+	.local	saved_rsi
+	.comm	saved_rsi,8,8
+	.section	.note.GNU-stack,"",@progbits
diff --git a/sysdeps/x86_64/tst-gnu2-tls2-x86-64-mod2.c b/sysdeps/x86_64/tst-gnu2-tls2-x86-64-mod2.c
new file mode 100644
index 0000000000..c12b81a49b
--- /dev/null
+++ b/sysdeps/x86_64/tst-gnu2-tls2-x86-64-mod2.c
@@ -0,0 +1,19 @@
+/* DSO used by tst-gnu2-tls2-x86-64.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <tst-gnu2-tls2mod2.c>
diff --git a/sysdeps/x86_64/tst-gnu2-tls2-x86-64.c b/sysdeps/x86_64/tst-gnu2-tls2-x86-64.c
new file mode 100644
index 0000000000..7d51f488bd
--- /dev/null
+++ b/sysdeps/x86_64/tst-gnu2-tls2-x86-64.c
@@ -0,0 +1,21 @@
+/* Check if TLSDESC relocation preserves %rdi, %rsi and %rbx.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define MOD(i) "tst-gnu2-tls2-x86-64-mod" #i ".so"
+
+#include <elf/tst-gnu2-tls2.c>
-- 
2.44.0


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2024-03-17 11:37 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-03-17 11:37 [PATCH v3] x86-64: Allocate state buffer space for RDI, RSI and RBX H.J. Lu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).