public inbox for libc-alpha@sourceware.org
 help / color / mirror / Atom feed
* [PATCH v2] x86-64: Allocate state buffer space for RDI, RSI and RBX
@ 2024-03-17  3:13 H.J. Lu
  2024-03-17  8:26 ` Florian Weimer
  0 siblings, 1 reply; 4+ messages in thread
From: H.J. Lu @ 2024-03-17  3:13 UTC (permalink / raw)
  To: libc-alpha; +Cc: fweimer

_dl_tlsdesc_dynamic preserves RDI, RSI and RBX before realigning stack.
After realigning stack, it saves RCX, RDX, R8, R9, R10 and R11.  Define
TLSDESC_CALL_STATE_SAVE_OFFSET to allocate space for all integer registers
and round up the state size to 64 bytes to avoid clobbering saved RDI, RSI
and RBX values on stack by xsave to STATE_SAVE_OFFSET(%rsp).  This fixes
BZ #31501.
---
 sysdeps/x86/cpu-features.c                 | 11 ++-
 sysdeps/x86/sysdep.h                       |  8 ++
 sysdeps/x86_64/Makefile                    | 19 +++++
 sysdeps/x86_64/tst-gnu2-tls2-x86-64-mod0.c | 19 +++++
 sysdeps/x86_64/tst-gnu2-tls2-x86-64-mod1.S | 87 ++++++++++++++++++++++
 sysdeps/x86_64/tst-gnu2-tls2-x86-64-mod2.c | 19 +++++
 sysdeps/x86_64/tst-gnu2-tls2-x86-64.c      | 21 ++++++
 7 files changed, 180 insertions(+), 4 deletions(-)
 create mode 100644 sysdeps/x86_64/tst-gnu2-tls2-x86-64-mod0.c
 create mode 100644 sysdeps/x86_64/tst-gnu2-tls2-x86-64-mod1.S
 create mode 100644 sysdeps/x86_64/tst-gnu2-tls2-x86-64-mod2.c
 create mode 100644 sysdeps/x86_64/tst-gnu2-tls2-x86-64.c

diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
index 4ea373dffa..5e9c167417 100644
--- a/sysdeps/x86/cpu-features.c
+++ b/sysdeps/x86/cpu-features.c
@@ -311,7 +311,7 @@ update_active (struct cpu_features *cpu_features)
 	      /* NB: On AMX capable processors, ebx always includes AMX
 		 states.  */
 	      unsigned int xsave_state_full_size
-		= ALIGN_UP (ebx + STATE_SAVE_OFFSET, 64);
+		= ALIGN_UP (ebx + TLSDESC_CALL_STATE_SAVE_OFFSET, 64);
 
 	      cpu_features->xsave_state_size
 		= xsave_state_full_size;
@@ -401,8 +401,10 @@ update_active (struct cpu_features *cpu_features)
 		      unsigned int amx_size
 			= (xstate_amx_comp_offsets[31]
 			   + xstate_amx_comp_sizes[31]);
-		      amx_size = ALIGN_UP (amx_size + STATE_SAVE_OFFSET,
-					   64);
+		      amx_size
+			= ALIGN_UP ((amx_size
+				     + TLSDESC_CALL_STATE_SAVE_OFFSET),
+				    64);
 		      /* Set xsave_state_full_size to the compact AMX
 			 state size for XSAVEC.  NB: xsave_state_full_size
 			 is only used in _dl_tlsdesc_dynamic_xsave and
@@ -410,7 +412,8 @@ update_active (struct cpu_features *cpu_features)
 		      cpu_features->xsave_state_full_size = amx_size;
 #endif
 		      cpu_features->xsave_state_size
-			= ALIGN_UP (size + STATE_SAVE_OFFSET, 64);
+			= ALIGN_UP (size + TLSDESC_CALL_STATE_SAVE_OFFSET,
+				    64);
 		      CPU_FEATURE_SET (cpu_features, XSAVEC);
 		    }
 		}
diff --git a/sysdeps/x86/sysdep.h b/sysdeps/x86/sysdep.h
index db8e576e91..262d4083e2 100644
--- a/sysdeps/x86/sysdep.h
+++ b/sysdeps/x86/sysdep.h
@@ -46,6 +46,13 @@
    red-zone into account.  */
 # define STATE_SAVE_OFFSET (8 * 7 + 8)
 
+/* _dl_tlsdesc_dynamic preserves RDI, RSI and RBX before realigning
+   stack.  After realigning stack, it saves RCX, RDX, R8, R9, R10 and
+   R11.  Allocate space for all integer registers and round up the state
+   size to 64 bytes to avoid clobbering saved RDI, RSI and RBX values on
+   stack by xsave on STATE_SAVE_OFFSET(%rsp).  */
+# define TLSDESC_CALL_STATE_SAVE_OFFSET (STATE_SAVE_OFFSET + 64)
+
 /* Save SSE, AVX, AVX512, mask, bound and APX registers.  Bound and APX
    registers are mutually exclusive.  */
 # define STATE_SAVE_MASK		\
@@ -68,6 +75,7 @@
 /* Offset for fxsave/xsave area used by _dl_tlsdesc_dynamic.  Since i386
    doesn't have red-zone, use 0 here.  */
 # define STATE_SAVE_OFFSET 0
+# define TLSDESC_CALL_STATE_SAVE_OFFSET 0
 
 /* Save SSE, AVX, AXV512, mask and bound registers.   */
 # define STATE_SAVE_MASK		\
diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile
index 66b21954f3..e21e4b96ab 100644
--- a/sysdeps/x86_64/Makefile
+++ b/sysdeps/x86_64/Makefile
@@ -217,6 +217,25 @@ valgrind-suppressions-tst-valgrind-smoke = \
   --suppressions=$(..)sysdeps/x86_64/tst-valgrind-smoke.supp
 endif
 
+tests += \
+  tst-gnu2-tls2-x86-64 \
+# tests
+
+modules-names += \
+  tst-gnu2-tls2-x86-64-mod0 \
+  tst-gnu2-tls2-x86-64-mod1 \
+  tst-gnu2-tls2-x86-64-mod2 \
+# modules-names
+
+$(objpfx)tst-gnu2-tls2-x86-64: $(shared-thread-library)
+$(objpfx)tst-gnu2-tls2-x86-64.out: \
+  $(objpfx)tst-gnu2-tls2-x86-64-mod0.so \
+  $(objpfx)tst-gnu2-tls2-x86-64-mod1.so \
+  $(objpfx)tst-gnu2-tls2-x86-64-mod2.so
+
+CFLAGS-tst-gnu2-tls2-x86-64-mod0.c += -mtls-dialect=gnu2
+CFLAGS-tst-gnu2-tls2-x86-64-mod2.c += -mtls-dialect=gnu2
+
 endif # $(subdir) == elf
 
 ifeq ($(subdir),csu)
diff --git a/sysdeps/x86_64/tst-gnu2-tls2-x86-64-mod0.c b/sysdeps/x86_64/tst-gnu2-tls2-x86-64-mod0.c
new file mode 100644
index 0000000000..40b3ec5c82
--- /dev/null
+++ b/sysdeps/x86_64/tst-gnu2-tls2-x86-64-mod0.c
@@ -0,0 +1,19 @@
+/* DSO used by tst-gnu2-tls2-x86-64.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <tst-gnu2-tls2mod0.c>
diff --git a/sysdeps/x86_64/tst-gnu2-tls2-x86-64-mod1.S b/sysdeps/x86_64/tst-gnu2-tls2-x86-64-mod1.S
new file mode 100644
index 0000000000..449ddd5c9d
--- /dev/null
+++ b/sysdeps/x86_64/tst-gnu2-tls2-x86-64-mod1.S
@@ -0,0 +1,87 @@
+/* Check if TLSDESC relocation preserves %rdi, %rsi and %rbx.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+/* On AVX512 machines, OFFSET == 104 caused _dl_tlsdesc_dynamic_xsavec
+   to clobber %rdi, %rsi and %rbx.  On Intel AVX CPUs, the state size
+   is 960 bytes and this test didn't fail.  It may be due to the unused
+   last 128 bytes.  On AMD AVX CPUs, the state size is 832 bytes and
+   this test might fail without the fix.  */
+#ifndef OFFSET
+# define OFFSET 104
+#endif
+
+	.text
+	.p2align 4
+	.globl	apply_tls
+	.type	apply_tls, @function
+apply_tls:
+	cfi_startproc
+	_CET_ENDBR
+	pushq	%rbp
+	cfi_def_cfa_offset (16)
+	cfi_offset (6, -16)
+	movdqu	(%RDI_LP), %xmm0
+	lea	tls_var1@TLSDESC(%rip), %RAX_LP
+	mov	%RSP_LP, %RBP_LP
+	cfi_def_cfa_register (6)
+	/* Align stack to 64 bytes.  */
+	and	$-64, %RSP_LP
+	sub	$OFFSET, %RSP_LP
+	pushq	%rbx
+	/* Set %ebx to 0xbadbeef.  */
+	movl	$0xbadbeef, %ebx
+	movl	$0xbadbeef, %esi
+	movq	%rdi, saved_rdi(%rip)
+	movq	%rsi, saved_rsi(%rip)
+	call	*tls_var1@TLSCALL(%RAX_LP)
+	/* Check if _dl_tlsdesc_dynamic preserves %rdi, %rsi and %rbx.  */
+	cmpq	saved_rdi(%rip), %rdi
+	jne	L(hlt)
+	cmpq	saved_rsi(%rip), %rsi
+	jne	L(hlt)
+	cmpl	$0xbadbeef, %ebx
+	jne	L(hlt)
+	add	%fs:0, %RAX_LP
+	movups	%xmm0, 32(%RAX_LP)
+	movdqu	16(%RDI_LP), %xmm1
+	mov	%RAX_LP, %RBX_LP
+	movups	%xmm1, 48(%RAX_LP)
+	lea	32(%RBX_LP), %RAX_LP
+	pop	%rbx
+	leave
+	cfi_def_cfa (7, 8)
+	ret
+L(hlt):
+	hlt
+	cfi_endproc
+	.size	apply_tls, .-apply_tls
+	.hidden	tls_var1
+	.globl	tls_var1
+	.section	.tbss,"awT",@nobits
+	.align 16
+	.type	tls_var1, @object
+	.size	tls_var1, 3200
+tls_var1:
+	.zero	3200
+	.local	saved_rdi
+	.comm	saved_rdi,8,8
+	.local	saved_rsi
+	.comm	saved_rsi,8,8
+	.section	.note.GNU-stack,"",@progbits
diff --git a/sysdeps/x86_64/tst-gnu2-tls2-x86-64-mod2.c b/sysdeps/x86_64/tst-gnu2-tls2-x86-64-mod2.c
new file mode 100644
index 0000000000..c12b81a49b
--- /dev/null
+++ b/sysdeps/x86_64/tst-gnu2-tls2-x86-64-mod2.c
@@ -0,0 +1,19 @@
+/* DSO used by tst-gnu2-tls2-x86-64.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <tst-gnu2-tls2mod2.c>
diff --git a/sysdeps/x86_64/tst-gnu2-tls2-x86-64.c b/sysdeps/x86_64/tst-gnu2-tls2-x86-64.c
new file mode 100644
index 0000000000..7d51f488bd
--- /dev/null
+++ b/sysdeps/x86_64/tst-gnu2-tls2-x86-64.c
@@ -0,0 +1,21 @@
+/* Check if TLSDESC relocation preserves %rdi, %rsi and %rbx.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define MOD(i) "tst-gnu2-tls2-x86-64-mod" #i ".so"
+
+#include <elf/tst-gnu2-tls2.c>
-- 
2.44.0


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v2] x86-64: Allocate state buffer space for RDI, RSI and RBX
  2024-03-17  3:13 [PATCH v2] x86-64: Allocate state buffer space for RDI, RSI and RBX H.J. Lu
@ 2024-03-17  8:26 ` Florian Weimer
  2024-03-17 11:39   ` H.J. Lu
  0 siblings, 1 reply; 4+ messages in thread
From: Florian Weimer @ 2024-03-17  8:26 UTC (permalink / raw)
  To: H.J. Lu; +Cc: libc-alpha

* H. J. Lu:

> diff --git a/sysdeps/x86/sysdep.h b/sysdeps/x86/sysdep.h
> index db8e576e91..262d4083e2 100644
> --- a/sysdeps/x86/sysdep.h
> +++ b/sysdeps/x86/sysdep.h
> @@ -46,6 +46,13 @@
>     red-zone into account.  */
>  # define STATE_SAVE_OFFSET (8 * 7 + 8)
>  
> +/* _dl_tlsdesc_dynamic preserves RDI, RSI and RBX before realigning
> +   stack.  After realigning stack, it saves RCX, RDX, R8, R9, R10 and
> +   R11.  Allocate space for all integer registers and round up the state
> +   size to 64 bytes to avoid clobbering saved RDI, RSI and RBX values on
> +   stack by xsave on STATE_SAVE_OFFSET(%rsp).  */
> +# define TLSDESC_CALL_STATE_SAVE_OFFSET (STATE_SAVE_OFFSET + 64)

Why 64?  The red zone is 128 bytes, and the three registers only need 24
bytes.  I think 24 has to be rounded up to 64 to preserve the XSAVE area
alignment.

The macro is also misnamed because it is not an offset.  The real offset
depends on what the stack realignment did.

Thanks,
Florian


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v2] x86-64: Allocate state buffer space for RDI, RSI and RBX
  2024-03-17  8:26 ` Florian Weimer
@ 2024-03-17 11:39   ` H.J. Lu
  2024-03-17 12:57     ` H.J. Lu
  0 siblings, 1 reply; 4+ messages in thread
From: H.J. Lu @ 2024-03-17 11:39 UTC (permalink / raw)
  To: Florian Weimer; +Cc: libc-alpha

On Sun, Mar 17, 2024 at 1:26 AM Florian Weimer <fweimer@redhat.com> wrote:
>
> * H. J. Lu:
>
> > diff --git a/sysdeps/x86/sysdep.h b/sysdeps/x86/sysdep.h
> > index db8e576e91..262d4083e2 100644
> > --- a/sysdeps/x86/sysdep.h
> > +++ b/sysdeps/x86/sysdep.h
> > @@ -46,6 +46,13 @@
> >     red-zone into account.  */
> >  # define STATE_SAVE_OFFSET (8 * 7 + 8)
> >
> > +/* _dl_tlsdesc_dynamic preserves RDI, RSI and RBX before realigning
> > +   stack.  After realigning stack, it saves RCX, RDX, R8, R9, R10 and
> > +   R11.  Allocate space for all integer registers and round up the state
> > +   size to 64 bytes to avoid clobbering saved RDI, RSI and RBX values on
> > +   stack by xsave on STATE_SAVE_OFFSET(%rsp).  */
> > +# define TLSDESC_CALL_STATE_SAVE_OFFSET (STATE_SAVE_OFFSET + 64)
>
> Why 64?  The red zone is 128 bytes, and the three registers only need 24
> bytes.  I think 24 has to be rounded up to 64 to preserve the XSAVE area
> alignment.

I changed it to 24.

> The macro is also misnamed because it is not an offset.  The real offset
> depends on what the stack realignment did.
>

I renamed it to TLSDESC_CALL_REGISTER_SAVE_AREA in the v3 patch:

https://patchwork.sourceware.org/project/glibc/list/?series=31971

-- 
H.J.

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v2] x86-64: Allocate state buffer space for RDI, RSI and RBX
  2024-03-17 11:39   ` H.J. Lu
@ 2024-03-17 12:57     ` H.J. Lu
  0 siblings, 0 replies; 4+ messages in thread
From: H.J. Lu @ 2024-03-17 12:57 UTC (permalink / raw)
  To: Florian Weimer; +Cc: libc-alpha

On Sun, Mar 17, 2024 at 4:39 AM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Sun, Mar 17, 2024 at 1:26 AM Florian Weimer <fweimer@redhat.com> wrote:
> >
> > * H. J. Lu:
> >
> > > diff --git a/sysdeps/x86/sysdep.h b/sysdeps/x86/sysdep.h
> > > index db8e576e91..262d4083e2 100644
> > > --- a/sysdeps/x86/sysdep.h
> > > +++ b/sysdeps/x86/sysdep.h
> > > @@ -46,6 +46,13 @@
> > >     red-zone into account.  */
> > >  # define STATE_SAVE_OFFSET (8 * 7 + 8)
> > >
> > > +/* _dl_tlsdesc_dynamic preserves RDI, RSI and RBX before realigning
> > > +   stack.  After realigning stack, it saves RCX, RDX, R8, R9, R10 and
> > > +   R11.  Allocate space for all integer registers and round up the state
> > > +   size to 64 bytes to avoid clobbering saved RDI, RSI and RBX values on
> > > +   stack by xsave on STATE_SAVE_OFFSET(%rsp).  */
> > > +# define TLSDESC_CALL_STATE_SAVE_OFFSET (STATE_SAVE_OFFSET + 64)
> >
> > Why 64?  The red zone is 128 bytes, and the three registers only need 24
> > bytes.  I think 24 has to be rounded up to 64 to preserve the XSAVE area
> > alignment.
>
> I changed it to 24.
>
> > The macro is also misnamed because it is not an offset.  The real offset
> > depends on what the stack realignment did.
> >
>
> I renamed it to TLSDESC_CALL_REGISTER_SAVE_AREA in the v3 patch:
>
> https://patchwork.sourceware.org/project/glibc/list/?series=31971
>

No need to add a new test.  Only sysdeps/x86_64/tst-gnu2-tls2mod1.S
is need:

https://patchwork.sourceware.org/project/glibc/list/?series=31975

-- 
H.J.

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2024-03-17 12:57 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-03-17  3:13 [PATCH v2] x86-64: Allocate state buffer space for RDI, RSI and RBX H.J. Lu
2024-03-17  8:26 ` Florian Weimer
2024-03-17 11:39   ` H.J. Lu
2024-03-17 12:57     ` H.J. Lu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).