* [PATCH v2] x86-64: Allocate state buffer space for RDI, RSI and RBX
@ 2024-03-17 3:13 H.J. Lu
2024-03-17 8:26 ` Florian Weimer
0 siblings, 1 reply; 4+ messages in thread
From: H.J. Lu @ 2024-03-17 3:13 UTC (permalink / raw)
To: libc-alpha; +Cc: fweimer
_dl_tlsdesc_dynamic preserves RDI, RSI and RBX before realigning stack.
After realigning stack, it saves RCX, RDX, R8, R9, R10 and R11. Define
TLSDESC_CALL_STATE_SAVE_OFFSET to allocate space for all integer registers
and round up the state size to 64 bytes to avoid clobbering saved RDI, RSI
and RBX values on stack by xsave to STATE_SAVE_OFFSET(%rsp). This fixes
BZ #31501.
---
sysdeps/x86/cpu-features.c | 11 ++-
sysdeps/x86/sysdep.h | 8 ++
sysdeps/x86_64/Makefile | 19 +++++
sysdeps/x86_64/tst-gnu2-tls2-x86-64-mod0.c | 19 +++++
sysdeps/x86_64/tst-gnu2-tls2-x86-64-mod1.S | 87 ++++++++++++++++++++++
sysdeps/x86_64/tst-gnu2-tls2-x86-64-mod2.c | 19 +++++
sysdeps/x86_64/tst-gnu2-tls2-x86-64.c | 21 ++++++
7 files changed, 180 insertions(+), 4 deletions(-)
create mode 100644 sysdeps/x86_64/tst-gnu2-tls2-x86-64-mod0.c
create mode 100644 sysdeps/x86_64/tst-gnu2-tls2-x86-64-mod1.S
create mode 100644 sysdeps/x86_64/tst-gnu2-tls2-x86-64-mod2.c
create mode 100644 sysdeps/x86_64/tst-gnu2-tls2-x86-64.c
diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
index 4ea373dffa..5e9c167417 100644
--- a/sysdeps/x86/cpu-features.c
+++ b/sysdeps/x86/cpu-features.c
@@ -311,7 +311,7 @@ update_active (struct cpu_features *cpu_features)
/* NB: On AMX capable processors, ebx always includes AMX
states. */
unsigned int xsave_state_full_size
- = ALIGN_UP (ebx + STATE_SAVE_OFFSET, 64);
+ = ALIGN_UP (ebx + TLSDESC_CALL_STATE_SAVE_OFFSET, 64);
cpu_features->xsave_state_size
= xsave_state_full_size;
@@ -401,8 +401,10 @@ update_active (struct cpu_features *cpu_features)
unsigned int amx_size
= (xstate_amx_comp_offsets[31]
+ xstate_amx_comp_sizes[31]);
- amx_size = ALIGN_UP (amx_size + STATE_SAVE_OFFSET,
- 64);
+ amx_size
+ = ALIGN_UP ((amx_size
+ + TLSDESC_CALL_STATE_SAVE_OFFSET),
+ 64);
/* Set xsave_state_full_size to the compact AMX
state size for XSAVEC. NB: xsave_state_full_size
is only used in _dl_tlsdesc_dynamic_xsave and
@@ -410,7 +412,8 @@ update_active (struct cpu_features *cpu_features)
cpu_features->xsave_state_full_size = amx_size;
#endif
cpu_features->xsave_state_size
- = ALIGN_UP (size + STATE_SAVE_OFFSET, 64);
+ = ALIGN_UP (size + TLSDESC_CALL_STATE_SAVE_OFFSET,
+ 64);
CPU_FEATURE_SET (cpu_features, XSAVEC);
}
}
diff --git a/sysdeps/x86/sysdep.h b/sysdeps/x86/sysdep.h
index db8e576e91..262d4083e2 100644
--- a/sysdeps/x86/sysdep.h
+++ b/sysdeps/x86/sysdep.h
@@ -46,6 +46,13 @@
red-zone into account. */
# define STATE_SAVE_OFFSET (8 * 7 + 8)
+/* _dl_tlsdesc_dynamic preserves RDI, RSI and RBX before realigning
+ stack. After realigning stack, it saves RCX, RDX, R8, R9, R10 and
+ R11. Allocate space for all integer registers and round up the state
+ size to 64 bytes to avoid clobbering saved RDI, RSI and RBX values on
+ stack by xsave on STATE_SAVE_OFFSET(%rsp). */
+# define TLSDESC_CALL_STATE_SAVE_OFFSET (STATE_SAVE_OFFSET + 64)
+
/* Save SSE, AVX, AVX512, mask, bound and APX registers. Bound and APX
registers are mutually exclusive. */
# define STATE_SAVE_MASK \
@@ -68,6 +75,7 @@
/* Offset for fxsave/xsave area used by _dl_tlsdesc_dynamic. Since i386
doesn't have red-zone, use 0 here. */
# define STATE_SAVE_OFFSET 0
+# define TLSDESC_CALL_STATE_SAVE_OFFSET 0
/* Save SSE, AVX, AXV512, mask and bound registers. */
# define STATE_SAVE_MASK \
diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile
index 66b21954f3..e21e4b96ab 100644
--- a/sysdeps/x86_64/Makefile
+++ b/sysdeps/x86_64/Makefile
@@ -217,6 +217,25 @@ valgrind-suppressions-tst-valgrind-smoke = \
--suppressions=$(..)sysdeps/x86_64/tst-valgrind-smoke.supp
endif
+tests += \
+ tst-gnu2-tls2-x86-64 \
+# tests
+
+modules-names += \
+ tst-gnu2-tls2-x86-64-mod0 \
+ tst-gnu2-tls2-x86-64-mod1 \
+ tst-gnu2-tls2-x86-64-mod2 \
+# modules-names
+
+$(objpfx)tst-gnu2-tls2-x86-64: $(shared-thread-library)
+$(objpfx)tst-gnu2-tls2-x86-64.out: \
+ $(objpfx)tst-gnu2-tls2-x86-64-mod0.so \
+ $(objpfx)tst-gnu2-tls2-x86-64-mod1.so \
+ $(objpfx)tst-gnu2-tls2-x86-64-mod2.so
+
+CFLAGS-tst-gnu2-tls2-x86-64-mod0.c += -mtls-dialect=gnu2
+CFLAGS-tst-gnu2-tls2-x86-64-mod2.c += -mtls-dialect=gnu2
+
endif # $(subdir) == elf
ifeq ($(subdir),csu)
diff --git a/sysdeps/x86_64/tst-gnu2-tls2-x86-64-mod0.c b/sysdeps/x86_64/tst-gnu2-tls2-x86-64-mod0.c
new file mode 100644
index 0000000000..40b3ec5c82
--- /dev/null
+++ b/sysdeps/x86_64/tst-gnu2-tls2-x86-64-mod0.c
@@ -0,0 +1,19 @@
+/* DSO used by tst-gnu2-tls2-x86-64.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <tst-gnu2-tls2mod0.c>
diff --git a/sysdeps/x86_64/tst-gnu2-tls2-x86-64-mod1.S b/sysdeps/x86_64/tst-gnu2-tls2-x86-64-mod1.S
new file mode 100644
index 0000000000..449ddd5c9d
--- /dev/null
+++ b/sysdeps/x86_64/tst-gnu2-tls2-x86-64-mod1.S
@@ -0,0 +1,87 @@
+/* Check if TLSDESC relocation preserves %rdi, %rsi and %rbx.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+/* On AVX512 machines, OFFSET == 104 caused _dl_tlsdesc_dynamic_xsavec
+ to clobber %rdi, %rsi and %rbx. On Intel AVX CPUs, the state size
+ is 960 bytes and this test didn't fail. It may be due to the unused
+ last 128 bytes. On AMD AVX CPUs, the state size is 832 bytes and
+ this test might fail without the fix. */
+#ifndef OFFSET
+# define OFFSET 104
+#endif
+
+ .text
+ .p2align 4
+ .globl apply_tls
+ .type apply_tls, @function
+apply_tls:
+ cfi_startproc
+ _CET_ENDBR
+ pushq %rbp
+ cfi_def_cfa_offset (16)
+ cfi_offset (6, -16)
+ movdqu (%RDI_LP), %xmm0
+ lea tls_var1@TLSDESC(%rip), %RAX_LP
+ mov %RSP_LP, %RBP_LP
+ cfi_def_cfa_register (6)
+ /* Align stack to 64 bytes. */
+ and $-64, %RSP_LP
+ sub $OFFSET, %RSP_LP
+ pushq %rbx
+ /* Set %ebx to 0xbadbeef. */
+ movl $0xbadbeef, %ebx
+ movl $0xbadbeef, %esi
+ movq %rdi, saved_rdi(%rip)
+ movq %rsi, saved_rsi(%rip)
+ call *tls_var1@TLSCALL(%RAX_LP)
+ /* Check if _dl_tlsdesc_dynamic preserves %rdi, %rsi and %rbx. */
+ cmpq saved_rdi(%rip), %rdi
+ jne L(hlt)
+ cmpq saved_rsi(%rip), %rsi
+ jne L(hlt)
+ cmpl $0xbadbeef, %ebx
+ jne L(hlt)
+ add %fs:0, %RAX_LP
+ movups %xmm0, 32(%RAX_LP)
+ movdqu 16(%RDI_LP), %xmm1
+ mov %RAX_LP, %RBX_LP
+ movups %xmm1, 48(%RAX_LP)
+ lea 32(%RBX_LP), %RAX_LP
+ pop %rbx
+ leave
+ cfi_def_cfa (7, 8)
+ ret
+L(hlt):
+ hlt
+ cfi_endproc
+ .size apply_tls, .-apply_tls
+ .hidden tls_var1
+ .globl tls_var1
+ .section .tbss,"awT",@nobits
+ .align 16
+ .type tls_var1, @object
+ .size tls_var1, 3200
+tls_var1:
+ .zero 3200
+ .local saved_rdi
+ .comm saved_rdi,8,8
+ .local saved_rsi
+ .comm saved_rsi,8,8
+ .section .note.GNU-stack,"",@progbits
diff --git a/sysdeps/x86_64/tst-gnu2-tls2-x86-64-mod2.c b/sysdeps/x86_64/tst-gnu2-tls2-x86-64-mod2.c
new file mode 100644
index 0000000000..c12b81a49b
--- /dev/null
+++ b/sysdeps/x86_64/tst-gnu2-tls2-x86-64-mod2.c
@@ -0,0 +1,19 @@
+/* DSO used by tst-gnu2-tls2-x86-64.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <tst-gnu2-tls2mod2.c>
diff --git a/sysdeps/x86_64/tst-gnu2-tls2-x86-64.c b/sysdeps/x86_64/tst-gnu2-tls2-x86-64.c
new file mode 100644
index 0000000000..7d51f488bd
--- /dev/null
+++ b/sysdeps/x86_64/tst-gnu2-tls2-x86-64.c
@@ -0,0 +1,21 @@
+/* Check if TLSDESC relocation preserves %rdi, %rsi and %rbx.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define MOD(i) "tst-gnu2-tls2-x86-64-mod" #i ".so"
+
+#include <elf/tst-gnu2-tls2.c>
--
2.44.0
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH v2] x86-64: Allocate state buffer space for RDI, RSI and RBX
2024-03-17 3:13 [PATCH v2] x86-64: Allocate state buffer space for RDI, RSI and RBX H.J. Lu
@ 2024-03-17 8:26 ` Florian Weimer
2024-03-17 11:39 ` H.J. Lu
0 siblings, 1 reply; 4+ messages in thread
From: Florian Weimer @ 2024-03-17 8:26 UTC (permalink / raw)
To: H.J. Lu; +Cc: libc-alpha
* H. J. Lu:
> diff --git a/sysdeps/x86/sysdep.h b/sysdeps/x86/sysdep.h
> index db8e576e91..262d4083e2 100644
> --- a/sysdeps/x86/sysdep.h
> +++ b/sysdeps/x86/sysdep.h
> @@ -46,6 +46,13 @@
> red-zone into account. */
> # define STATE_SAVE_OFFSET (8 * 7 + 8)
>
> +/* _dl_tlsdesc_dynamic preserves RDI, RSI and RBX before realigning
> + stack. After realigning stack, it saves RCX, RDX, R8, R9, R10 and
> + R11. Allocate space for all integer registers and round up the state
> + size to 64 bytes to avoid clobbering saved RDI, RSI and RBX values on
> + stack by xsave on STATE_SAVE_OFFSET(%rsp). */
> +# define TLSDESC_CALL_STATE_SAVE_OFFSET (STATE_SAVE_OFFSET + 64)
Why 64? The red zone is 128 bytes, and the three registers only need 24
bytes. I think 24 has to be rounded up to 64 to preserve the XSAVE area
alignment.
The macro is also misnamed because it is not an offset. The real offset
depends on what the stack realignment did.
Thanks,
Florian
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH v2] x86-64: Allocate state buffer space for RDI, RSI and RBX
2024-03-17 8:26 ` Florian Weimer
@ 2024-03-17 11:39 ` H.J. Lu
2024-03-17 12:57 ` H.J. Lu
0 siblings, 1 reply; 4+ messages in thread
From: H.J. Lu @ 2024-03-17 11:39 UTC (permalink / raw)
To: Florian Weimer; +Cc: libc-alpha
On Sun, Mar 17, 2024 at 1:26 AM Florian Weimer <fweimer@redhat.com> wrote:
>
> * H. J. Lu:
>
> > diff --git a/sysdeps/x86/sysdep.h b/sysdeps/x86/sysdep.h
> > index db8e576e91..262d4083e2 100644
> > --- a/sysdeps/x86/sysdep.h
> > +++ b/sysdeps/x86/sysdep.h
> > @@ -46,6 +46,13 @@
> > red-zone into account. */
> > # define STATE_SAVE_OFFSET (8 * 7 + 8)
> >
> > +/* _dl_tlsdesc_dynamic preserves RDI, RSI and RBX before realigning
> > + stack. After realigning stack, it saves RCX, RDX, R8, R9, R10 and
> > + R11. Allocate space for all integer registers and round up the state
> > + size to 64 bytes to avoid clobbering saved RDI, RSI and RBX values on
> > + stack by xsave on STATE_SAVE_OFFSET(%rsp). */
> > +# define TLSDESC_CALL_STATE_SAVE_OFFSET (STATE_SAVE_OFFSET + 64)
>
> Why 64? The red zone is 128 bytes, and the three registers only need 24
> bytes. I think 24 has to be rounded up to 64 to preserve the XSAVE area
> alignment.
I changed it to 24.
> The macro is also misnamed because it is not an offset. The real offset
> depends on what the stack realignment did.
>
I renamed it to TLSDESC_CALL_REGISTER_SAVE_AREA in the v3 patch:
https://patchwork.sourceware.org/project/glibc/list/?series=31971
--
H.J.
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH v2] x86-64: Allocate state buffer space for RDI, RSI and RBX
2024-03-17 11:39 ` H.J. Lu
@ 2024-03-17 12:57 ` H.J. Lu
0 siblings, 0 replies; 4+ messages in thread
From: H.J. Lu @ 2024-03-17 12:57 UTC (permalink / raw)
To: Florian Weimer; +Cc: libc-alpha
On Sun, Mar 17, 2024 at 4:39 AM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Sun, Mar 17, 2024 at 1:26 AM Florian Weimer <fweimer@redhat.com> wrote:
> >
> > * H. J. Lu:
> >
> > > diff --git a/sysdeps/x86/sysdep.h b/sysdeps/x86/sysdep.h
> > > index db8e576e91..262d4083e2 100644
> > > --- a/sysdeps/x86/sysdep.h
> > > +++ b/sysdeps/x86/sysdep.h
> > > @@ -46,6 +46,13 @@
> > > red-zone into account. */
> > > # define STATE_SAVE_OFFSET (8 * 7 + 8)
> > >
> > > +/* _dl_tlsdesc_dynamic preserves RDI, RSI and RBX before realigning
> > > + stack. After realigning stack, it saves RCX, RDX, R8, R9, R10 and
> > > + R11. Allocate space for all integer registers and round up the state
> > > + size to 64 bytes to avoid clobbering saved RDI, RSI and RBX values on
> > > + stack by xsave on STATE_SAVE_OFFSET(%rsp). */
> > > +# define TLSDESC_CALL_STATE_SAVE_OFFSET (STATE_SAVE_OFFSET + 64)
> >
> > Why 64? The red zone is 128 bytes, and the three registers only need 24
> > bytes. I think 24 has to be rounded up to 64 to preserve the XSAVE area
> > alignment.
>
> I changed it to 24.
>
> > The macro is also misnamed because it is not an offset. The real offset
> > depends on what the stack realignment did.
> >
>
> I renamed it to TLSDESC_CALL_REGISTER_SAVE_AREA in the v3 patch:
>
> https://patchwork.sourceware.org/project/glibc/list/?series=31971
>
No need to add a new test. Only sysdeps/x86_64/tst-gnu2-tls2mod1.S
is need:
https://patchwork.sourceware.org/project/glibc/list/?series=31975
--
H.J.
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2024-03-17 12:57 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-03-17 3:13 [PATCH v2] x86-64: Allocate state buffer space for RDI, RSI and RBX H.J. Lu
2024-03-17 8:26 ` Florian Weimer
2024-03-17 11:39 ` H.J. Lu
2024-03-17 12:57 ` H.J. Lu
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).