I pushed Amrita's Power11 patch to the 2.39 release branch. There is no change to the ABI with this patch. The following three changes have been added to provide initial Power11 support. 1. Add the directories to hold Power11 files. 2. Add support to select Power11 libraries based on AT_PLATFORM. 3. Let submachine=power11 be set automatically. Reviewed-by: Florian Weimer <fweimer@redhat.com> Reviewed-by: Peter Bergner <bergner@linux.ibm.com> (cherry picked from commit 1ea051145612f199d8716ecdf78b084b00b5a727) --- sysdeps/powerpc/dl-procinfo.h | 8 +++++++- sysdeps/powerpc/powerpc32/power11/Implies | 2 ++ sysdeps/powerpc/powerpc32/power11/fpu/multiarch/Implies | 1 + sysdeps/powerpc/powerpc32/power11/multiarch/Implies | 1 + sysdeps/powerpc/powerpc64/be/power11/Implies | 2 ++ sysdeps/powerpc/powerpc64/be/power11/fpu/Implies | 1 + .../powerpc/powerpc64/be/power11/fpu/multiarch/Implies | 1 + sysdeps/powerpc/powerpc64/be/power11/multiarch/Implies | 1 + sysdeps/powerpc/powerpc64/le/power11/Implies | 2 ++ sysdeps/powerpc/powerpc64/le/power11/fpu/Implies | 1 + .../powerpc/powerpc64/le/power11/fpu/multiarch/Implies | 1 + sysdeps/powerpc/powerpc64/le/power11/multiarch/Implies | 1 + sysdeps/powerpc/powerpc64/le/tst-glibc-hwcaps.c | 6 ++++-- sysdeps/powerpc/preconfigure | 2 +- sysdeps/powerpc/preconfigure.ac | 2 +- 15 files changed, 27 insertions(+), 5 deletions(-) create mode 100644 sysdeps/powerpc/powerpc32/power11/Implies create mode 100644 sysdeps/powerpc/powerpc32/power11/fpu/multiarch/Implies create mode 100644 sysdeps/powerpc/powerpc32/power11/multiarch/Implies create mode 100644 sysdeps/powerpc/powerpc64/be/power11/Implies create mode 100644 sysdeps/powerpc/powerpc64/be/power11/fpu/Implies create mode 100644 sysdeps/powerpc/powerpc64/be/power11/fpu/multiarch/Implies create mode 100644 sysdeps/powerpc/powerpc64/be/power11/multiarch/Implies create mode 100644 sysdeps/powerpc/powerpc64/le/power11/Implies create mode 100644 sysdeps/powerpc/powerpc64/le/power11/fpu/Implies create mode 100644 sysdeps/powerpc/powerpc64/le/power11/fpu/multiarch/Implies create mode 100644 sysdeps/powerpc/powerpc64/le/power11/multiarch/Implies diff --git a/sysdeps/powerpc/dl-procinfo.h b/sysdeps/powerpc/dl-procinfo.h index f8cb343877..b36697ba44 100644 --- a/sysdeps/powerpc/dl-procinfo.h +++ b/sysdeps/powerpc/dl-procinfo.h @@ -38,7 +38,7 @@ #define HWCAP_IMPORTANT (PPC_FEATURE_HAS_ALTIVEC \ + PPC_FEATURE_HAS_DFP) -#define _DL_PLATFORMS_COUNT 16 +#define _DL_PLATFORMS_COUNT 17 #define _DL_FIRST_PLATFORM 32 /* Mask to filter out platforms. */ @@ -62,6 +62,7 @@ #define PPC_PLATFORM_POWER8 13 #define PPC_PLATFORM_POWER9 14 #define PPC_PLATFORM_POWER10 15 +#define PPC_PLATFORM_POWER11 16 static inline const char * __attribute__ ((unused)) @@ -89,6 +90,11 @@ _dl_string_platform (const char *str) ret = _DL_FIRST_PLATFORM + PPC_PLATFORM_POWER10; str++; } + else if (str[1] == '1') + { + ret = _DL_FIRST_PLATFORM + PPC_PLATFORM_POWER11; + str++; + } else return -1; break; diff --git a/sysdeps/powerpc/powerpc32/power11/Implies b/sysdeps/powerpc/powerpc32/power11/Implies new file mode 100644 index 0000000000..051cbe0f79 --- /dev/null +++ b/sysdeps/powerpc/powerpc32/power11/Implies @@ -0,0 +1,2 @@ +powerpc/powerpc32/power10/fpu +powerpc/powerpc32/power10 diff --git a/sysdeps/powerpc/powerpc32/power11/fpu/multiarch/Implies b/sysdeps/powerpc/powerpc32/power11/fpu/multiarch/Implies new file mode 100644 index 0000000000..58edb2861d --- /dev/null +++ b/sysdeps/powerpc/powerpc32/power11/fpu/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc32/power10/fpu/multiarch diff --git a/sysdeps/powerpc/powerpc32/power11/multiarch/Implies b/sysdeps/powerpc/powerpc32/power11/multiarch/Implies new file mode 100644 index 0000000000..c70f0428ba --- /dev/null +++ b/sysdeps/powerpc/powerpc32/power11/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc32/power10/multiarch diff --git a/sysdeps/powerpc/powerpc64/be/power11/Implies b/sysdeps/powerpc/powerpc64/be/power11/Implies new file mode 100644 index 0000000000..de481d1c13 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/be/power11/Implies @@ -0,0 +1,2 @@ +powerpc/powerpc64/be/power10/fpu +powerpc/powerpc64/be/power10 diff --git a/sysdeps/powerpc/powerpc64/be/power11/fpu/Implies b/sysdeps/powerpc/powerpc64/be/power11/fpu/Implies new file mode 100644 index 0000000000..dff0e13064 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/be/power11/fpu/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/be/power10/fpu diff --git a/sysdeps/powerpc/powerpc64/be/power11/fpu/multiarch/Implies b/sysdeps/powerpc/powerpc64/be/power11/fpu/multiarch/Implies new file mode 100644 index 0000000000..c3f259e009 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/be/power11/fpu/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/be/power10/fpu/multiarch diff --git a/sysdeps/powerpc/powerpc64/be/power11/multiarch/Implies b/sysdeps/powerpc/powerpc64/be/power11/multiarch/Implies new file mode 100644 index 0000000000..9491a394c9 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/be/power11/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/be/power10/multiarch diff --git a/sysdeps/powerpc/powerpc64/le/power11/Implies b/sysdeps/powerpc/powerpc64/le/power11/Implies new file mode 100644 index 0000000000..e18182dcc1 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/le/power11/Implies @@ -0,0 +1,2 @@ +powerpc/powerpc64/le/power10/fpu +powerpc/powerpc64/le/power10 diff --git a/sysdeps/powerpc/powerpc64/le/power11/fpu/Implies b/sysdeps/powerpc/powerpc64/le/power11/fpu/Implies new file mode 100644 index 0000000000..e41bd55684 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/le/power11/fpu/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/le/power10/fpu diff --git a/sysdeps/powerpc/powerpc64/le/power11/fpu/multiarch/Implies b/sysdeps/powerpc/powerpc64/le/power11/fpu/multiarch/Implies new file mode 100644 index 0000000000..c838d50931 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/le/power11/fpu/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/le/power10/fpu/multiarch diff --git a/sysdeps/powerpc/powerpc64/le/power11/multiarch/Implies b/sysdeps/powerpc/powerpc64/le/power11/multiarch/Implies new file mode 100644 index 0000000000..687248c3c2 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/le/power11/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/le/power10/multiarch diff --git a/sysdeps/powerpc/powerpc64/le/tst-glibc-hwcaps.c b/sysdeps/powerpc/powerpc64/le/tst-glibc-hwcaps.c index 77465d9133..65d3e69303 100644 --- a/sysdeps/powerpc/powerpc64/le/tst-glibc-hwcaps.c +++ b/sysdeps/powerpc/powerpc64/le/tst-glibc-hwcaps.c @@ -36,9 +36,11 @@ compute_level (void) return 9; if (strcmp (platform, "power10") == 0) return 10; + if (strcmp (platform, "power11") == 0) + return 11; printf ("warning: unrecognized AT_PLATFORM value: %s\n", platform); - /* Assume that the new platform supports POWER10. */ - return 10; + /* Assume that the new platform supports POWER11. */ + return 11; } static int diff --git a/sysdeps/powerpc/preconfigure b/sysdeps/powerpc/preconfigure index 4de94089a3..9e5a07ab6d 100644 --- a/sysdeps/powerpc/preconfigure +++ b/sysdeps/powerpc/preconfigure @@ -58,7 +58,7 @@ fi ;; - a2|970|power[4-9]|power5x|power6+|power10) + a2|970|power[4-9]|power5x|power6+|power10|power11) submachine=${archcpu} if test ${libc_cv_cc_submachine+y} then : diff --git a/sysdeps/powerpc/preconfigure.ac b/sysdeps/powerpc/preconfigure.ac index 6c63bd8257..14b6dafd4a 100644 --- a/sysdeps/powerpc/preconfigure.ac +++ b/sysdeps/powerpc/preconfigure.ac @@ -46,7 +46,7 @@ case "${machine}:${submachine}" in AC_CACHE_VAL(libc_cv_cc_submachine,libc_cv_cc_submachine="") ;; - a2|970|power[[4-9]]|power5x|power6+|power10) + a2|970|power[[4-9]]|power5x|power6+|power10|power11) submachine=${archcpu} AC_CACHE_VAL(libc_cv_cc_submachine,libc_cv_cc_submachine="") ;; -- 2.39.3
I pushed the cherry-pick of Manjunath's patch #2 which adds the support for HWCAP3/HWCAP4 to the powerpc port. This is not an ABI changing patch. Patch #1 was pushed before 2.39 branched. This patch adds a new feature for powerpc. In order to get faster access to the HWCAP3/HWCAP4 masks, similar to HWCAP/HWCAP2 (i.e. for implementing __builtin_cpu_supports() in GCC) without the overhead of reading them from the auxiliary vector, we now reserve space for them in the TCB. Suggested-by: Peter Bergner <bergner@linux.ibm.com> Reviewed-by: Peter Bergner <bergner@linux.ibm.com> (cherry picked from commit 3ab9b88e2ac91062b6d493fe32bd101a55006c6a) --- elf/dl-diagnostics.c | 2 + elf/dl-support.c | 2 + elf/elf.h | 4 ++ sysdeps/generic/ldsodefs.h | 2 + sysdeps/powerpc/dl-procinfo.c | 6 ++- sysdeps/powerpc/dl-procinfo.h | 52 +++++++++++++------ sysdeps/powerpc/hwcapinfo.c | 11 ++-- sysdeps/unix/sysv/linux/dl-parse_auxv.h | 2 + sysdeps/unix/sysv/linux/dl-sysdep.c | 2 + .../unix/sysv/linux/powerpc/cpu-features.c | 2 + .../unix/sysv/linux/powerpc/cpu-features.h | 2 + sysdeps/unix/sysv/linux/powerpc/libc-start.c | 6 +++ 12 files changed, 74 insertions(+), 19 deletions(-) diff --git a/elf/dl-diagnostics.c b/elf/dl-diagnostics.c index 7345ebc4e5..aaf67b87e8 100644 --- a/elf/dl-diagnostics.c +++ b/elf/dl-diagnostics.c @@ -235,6 +235,8 @@ _dl_print_diagnostics (char **environ) _dl_diagnostics_print_labeled_value ("dl_hwcap", GLRO (dl_hwcap)); _dl_diagnostics_print_labeled_value ("dl_hwcap_important", HWCAP_IMPORTANT); _dl_diagnostics_print_labeled_value ("dl_hwcap2", GLRO (dl_hwcap2)); + _dl_diagnostics_print_labeled_value ("dl_hwcap3", GLRO (dl_hwcap3)); + _dl_diagnostics_print_labeled_value ("dl_hwcap4", GLRO (dl_hwcap4)); _dl_diagnostics_print_labeled_string ("dl_hwcaps_subdirs", _dl_hwcaps_subdirs); _dl_diagnostics_print_labeled_value diff --git a/elf/dl-support.c b/elf/dl-support.c index 2f502c8b0d..451932dd03 100644 --- a/elf/dl-support.c +++ b/elf/dl-support.c @@ -158,6 +158,8 @@ const ElfW(Phdr) *_dl_phdr; size_t _dl_phnum; uint64_t _dl_hwcap; uint64_t _dl_hwcap2; +uint64_t _dl_hwcap3; +uint64_t _dl_hwcap4; enum dso_sort_algorithm _dl_dso_sort_algo; diff --git a/elf/elf.h b/elf/elf.h index 455731663c..1c394c64cd 100644 --- a/elf/elf.h +++ b/elf/elf.h @@ -1234,6 +1234,10 @@ typedef struct #define AT_RSEQ_FEATURE_SIZE 27 /* rseq supported feature size. */ #define AT_RSEQ_ALIGN 28 /* rseq allocation alignment. */ +/* More machine-dependent hints about processor capabilities. */ +#define AT_HWCAP3 29 /* extension of AT_HWCAP. */ +#define AT_HWCAP4 30 /* extension of AT_HWCAP. */ + #define AT_EXECFN 31 /* Filename of executable. */ /* Pointer to the global system page used for system calls and other diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h index 117c901ccc..50f58a60e3 100644 --- a/sysdeps/generic/ldsodefs.h +++ b/sysdeps/generic/ldsodefs.h @@ -646,6 +646,8 @@ struct rtld_global_ro /* Mask for more hardware capabilities that are available on some platforms. */ EXTERN uint64_t _dl_hwcap2; + EXTERN uint64_t _dl_hwcap3; + EXTERN uint64_t _dl_hwcap4; EXTERN enum dso_sort_algorithm _dl_dso_sort_algo; diff --git a/sysdeps/powerpc/dl-procinfo.c b/sysdeps/powerpc/dl-procinfo.c index a76bb6e5b0..8cf00aa7e3 100644 --- a/sysdeps/powerpc/dl-procinfo.c +++ b/sysdeps/powerpc/dl-procinfo.c @@ -38,6 +38,10 @@ needed. */ +/* The total number of available bits (including those prior to + _DL_HWCAP_FIRST). Some of these bits might not be used. */ +#define _DL_HWCAP_COUNT 128 + #ifndef PROCINFO_CLASS # define PROCINFO_CLASS #endif @@ -61,7 +65,7 @@ PROCINFO_CLASS struct cpu_features _dl_powerpc_cpu_features #if !defined PROCINFO_DECL && defined SHARED ._dl_powerpc_cap_flags #else -PROCINFO_CLASS const char _dl_powerpc_cap_flags[64][15] +PROCINFO_CLASS const char _dl_powerpc_cap_flags[_DL_HWCAP_COUNT][15] #endif #ifndef PROCINFO_DECL = { diff --git a/sysdeps/powerpc/dl-procinfo.h b/sysdeps/powerpc/dl-procinfo.h index 68f4241095..f8cb343877 100644 --- a/sysdeps/powerpc/dl-procinfo.h +++ b/sysdeps/powerpc/dl-procinfo.h @@ -22,16 +22,17 @@ #include <ldsodefs.h> #include <sysdep.h> /* This defines the PPC_FEATURE[2]_* macros. */ -/* The total number of available bits (including those prior to - _DL_HWCAP_FIRST). Some of these bits might not be used. */ -#define _DL_HWCAP_COUNT 64 +/* Feature masks are all 32-bits in size. */ +#define _DL_HWCAP_SIZE 32 -/* Features started at bit 31 and decremented as new features were added. */ -#define _DL_HWCAP_LAST 31 +/* AT_HWCAP2 feature strings follow the AT_HWCAP feature strings. */ +#define _DL_HWCAP2_OFFSET _DL_HWCAP_SIZE -/* AT_HWCAP2 features started at bit 31 and decremented as new features were - added. HWCAP2 feature bits start at bit 0. */ -#define _DL_HWCAP2_LAST 31 +/* AT_HWCAP3 feature strings follow the AT_HWCAP2 feature strings. */ +#define _DL_HWCAP3_OFFSET (_DL_HWCAP2_OFFSET + _DL_HWCAP_SIZE) + +/* AT_HWCAP4 feature strings follow the AT_HWCAP3 feature strings. */ +#define _DL_HWCAP4_OFFSET (_DL_HWCAP3_OFFSET + _DL_HWCAP_SIZE) /* These bits influence library search. */ #define HWCAP_IMPORTANT (PPC_FEATURE_HAS_ALTIVEC \ @@ -187,21 +188,42 @@ _dl_procinfo (unsigned int type, unsigned long int word) case AT_HWCAP: _dl_printf ("AT_HWCAP: "); - for (int i = 0; i <= _DL_HWCAP_LAST; ++i) + for (int i = 0; i < _DL_HWCAP_SIZE; ++i) if (word & (1 << i)) _dl_printf (" %s", _dl_hwcap_string (i)); break; case AT_HWCAP2: { - unsigned int offset = _DL_HWCAP_LAST + 1; _dl_printf ("AT_HWCAP2: "); - /* We have to go through them all because the kernel added the - AT_HWCAP2 features starting with the high bits. */ - for (int i = 0; i <= _DL_HWCAP2_LAST; ++i) - if (word & (1 << i)) - _dl_printf (" %s", _dl_hwcap_string (offset + i)); + /* We have to go through them all because the kernel added the + AT_HWCAP2 features starting with the high bits. */ + for (int i = 0; i < _DL_HWCAP_SIZE; ++i) + if (word & (1 << i)) + _dl_printf (" %s", _dl_hwcap_string (_DL_HWCAP2_OFFSET + i)); + break; + } + case AT_HWCAP3: + { + _dl_printf ("AT_HWCAP3: "); + + /* We have to go through them all because the kernel added the + AT_HWCAP3 features starting with the high bits. */ + for (int i = 0; i < _DL_HWCAP_SIZE; ++i) + if (word & (1 << i)) + _dl_printf (" %s", _dl_hwcap_string (_DL_HWCAP3_OFFSET + i)); + break; + } + case AT_HWCAP4: + { + _dl_printf ("AT_HWCAP4: "); + + /* We have to go through them all because the kernel added the + AT_HWCAP4 features starting with the high bits. */ + for (int i = 0; i <= _DL_HWCAP_SIZE; ++i) + if (word & (1 << i)) + _dl_printf (" %s", _dl_hwcap_string (_DL_HWCAP4_OFFSET + i)); break; } case AT_L1I_CACHEGEOMETRY: diff --git a/sysdeps/powerpc/hwcapinfo.c b/sysdeps/powerpc/hwcapinfo.c index 76344f285a..f6fede15a7 100644 --- a/sysdeps/powerpc/hwcapinfo.c +++ b/sysdeps/powerpc/hwcapinfo.c @@ -31,7 +31,7 @@ void __tcb_parse_hwcap_and_convert_at_platform (void) { - uint64_t h1, h2; + uint64_t h1, h2, h3, h4; /* Read AT_PLATFORM string from auxv and convert it to a number. */ __tcb.at_platform = _dl_string_platform (GLRO (dl_platform)); @@ -39,6 +39,8 @@ __tcb_parse_hwcap_and_convert_at_platform (void) /* Read HWCAP and HWCAP2 from auxv. */ h1 = GLRO (dl_hwcap); h2 = GLRO (dl_hwcap2); + h3 = GLRO (dl_hwcap3); + h4 = GLRO (dl_hwcap4); /* hwcap contains only the latest supported ISA, the code checks which is and fills the previous supported ones. */ @@ -64,13 +66,16 @@ __tcb_parse_hwcap_and_convert_at_platform (void) else if (h1 & PPC_FEATURE_POWER5) h1 |= PPC_FEATURE_POWER4; - uint64_t array_hwcaps[] = { h1, h2 }; + uint64_t array_hwcaps[] = { h1, h2, h3, h4 }; init_cpu_features (&GLRO(dl_powerpc_cpu_features), array_hwcaps); /* Consolidate both HWCAP and HWCAP2 into a single doubleword so that we can read both in a single load later. */ __tcb.hwcap = (h1 << 32) | (h2 & 0xffffffff); - __tcb.hwcap_extn = 0x0; + + /* Consolidate both HWCAP3 and HWCAP4 into a single doubleword so that + we can read both in a single load later. */ + __tcb.hwcap_extn = (h3 << 32) | (h4 & 0xffffffff); } #if IS_IN (rtld) diff --git a/sysdeps/unix/sysv/linux/dl-parse_auxv.h b/sysdeps/unix/sysv/linux/dl-parse_auxv.h index e3d758b163..ea2a58ecb1 100644 --- a/sysdeps/unix/sysv/linux/dl-parse_auxv.h +++ b/sysdeps/unix/sysv/linux/dl-parse_auxv.h @@ -47,6 +47,8 @@ void _dl_parse_auxv (ElfW(auxv_t) *av, dl_parse_auxv_t auxv_values) GLRO(dl_platform) = (void *) auxv_values[AT_PLATFORM]; GLRO(dl_hwcap) = auxv_values[AT_HWCAP]; GLRO(dl_hwcap2) = auxv_values[AT_HWCAP2]; + GLRO(dl_hwcap3) = auxv_values[AT_HWCAP3]; + GLRO(dl_hwcap4) = auxv_values[AT_HWCAP4]; GLRO(dl_clktck) = auxv_values[AT_CLKTCK]; GLRO(dl_fpu_control) = auxv_values[AT_FPUCW]; _dl_random = (void *) auxv_values[AT_RANDOM]; diff --git a/sysdeps/unix/sysv/linux/dl-sysdep.c b/sysdeps/unix/sysv/linux/dl-sysdep.c index ad3692d738..e1b14e9eb3 100644 --- a/sysdeps/unix/sysv/linux/dl-sysdep.c +++ b/sysdeps/unix/sysv/linux/dl-sysdep.c @@ -197,6 +197,8 @@ _dl_show_auxv (void) [AT_SYSINFO_EHDR - 2] = { "SYSINFO_EHDR: 0x", hex }, [AT_RANDOM - 2] = { "RANDOM: 0x", hex }, [AT_HWCAP2 - 2] = { "HWCAP2: 0x", hex }, + [AT_HWCAP3 - 2] = { "HWCAP3: 0x", hex }, + [AT_HWCAP4 - 2] = { "HWCAP4: 0x", hex }, [AT_MINSIGSTKSZ - 2] = { "MINSIGSTKSZ: ", dec }, [AT_L1I_CACHESIZE - 2] = { "L1I_CACHESIZE: ", dec }, [AT_L1I_CACHEGEOMETRY - 2] = { "L1I_CACHEGEOMETRY: 0x", hex }, diff --git a/sysdeps/unix/sysv/linux/powerpc/cpu-features.c b/sysdeps/unix/sysv/linux/powerpc/cpu-features.c index 8e8a5ec2ea..a947d62db6 100644 --- a/sysdeps/unix/sysv/linux/powerpc/cpu-features.c +++ b/sysdeps/unix/sysv/linux/powerpc/cpu-features.c @@ -94,6 +94,8 @@ init_cpu_features (struct cpu_features *cpu_features, uint64_t hwcaps[]) which are set by __tcb_parse_hwcap_and_convert_at_platform. */ cpu_features->hwcap = hwcaps[0]; cpu_features->hwcap2 = hwcaps[1]; + cpu_features->hwcap3 = hwcaps[2]; + cpu_features->hwcap4 = hwcaps[3]; /* Default is to use aligned memory access on optimized function unless tunables is enable, since for this case user can explicit disable unaligned optimizations. */ diff --git a/sysdeps/unix/sysv/linux/powerpc/cpu-features.h b/sysdeps/unix/sysv/linux/powerpc/cpu-features.h index 1294f0b601..e9eb6a13c8 100644 --- a/sysdeps/unix/sysv/linux/powerpc/cpu-features.h +++ b/sysdeps/unix/sysv/linux/powerpc/cpu-features.h @@ -26,6 +26,8 @@ struct cpu_features bool use_cached_memopt; unsigned long int hwcap; unsigned long int hwcap2; + unsigned long int hwcap3; + unsigned long int hwcap4; }; static const char hwcap_names[] = { diff --git a/sysdeps/unix/sysv/linux/powerpc/libc-start.c b/sysdeps/unix/sysv/linux/powerpc/libc-start.c index a4705daf1c..6a00cd88cd 100644 --- a/sysdeps/unix/sysv/linux/powerpc/libc-start.c +++ b/sysdeps/unix/sysv/linux/powerpc/libc-start.c @@ -87,6 +87,12 @@ __libc_start_main_impl (int argc, char **argv, case AT_HWCAP2: _dl_hwcap2 = (unsigned long int) av->a_un.a_val; break; + case AT_HWCAP3: + _dl_hwcap3 = (unsigned long int) av->a_un.a_val; + break; + case AT_HWCAP4: + _dl_hwcap4 = (unsigned long int) av->a_un.a_val; + break; case AT_PLATFORM: _dl_platform = (void *) av->a_un.a_val; break; -- 2.39.3
--- sysdeps/loongarch/fpu/e_scalbf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sysdeps/loongarch/fpu/e_scalbf.c b/sysdeps/loongarch/fpu/e_scalbf.c index 9f05485236..7c0395fbb5 100644 --- a/sysdeps/loongarch/fpu/e_scalbf.c +++ b/sysdeps/loongarch/fpu/e_scalbf.c @@ -57,4 +57,4 @@ __ieee754_scalbf (float x, float fn) return x; } -libm_alias_finite (__ieee754_scalb, __scalb) +libm_alias_finite (__ieee754_scalbf, __scalbf) -- 2.39.3
On Fri, Mar 8, 2024 at 2:09 PM H.J. Lu <hjl.tools@gmail.com> wrote: > > On Sat, Feb 24, 2024 at 11:01 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > > > On Fri, Feb 16, 2024 at 9:17 AM H.J. Lu <hjl.tools@gmail.com> wrote: > > > > > > Add APX registers to STATE_SAVE_MASK so that APX registers are saved in > > > ld.so trampoline. This fixes BZ #31371. > > > > > > Also update STATE_SAVE_OFFSET and STATE_SAVE_MASK for i386 which will > > > be used by i386 _dl_tlsdesc_dynamic. > > > --- > > > sysdeps/x86/sysdep.h | 52 +++++++++++++++++++++++++++++++++++++++----- > > > 1 file changed, 46 insertions(+), 6 deletions(-) > > > > > > diff --git a/sysdeps/x86/sysdep.h b/sysdeps/x86/sysdep.h > > > index 85d0a8c943..837fd28734 100644 > > > --- a/sysdeps/x86/sysdep.h > > > +++ b/sysdeps/x86/sysdep.h > > > @@ -21,14 +21,54 @@ > > > > > > #include <sysdeps/generic/sysdep.h> > > > > > > +/* The extended state feature IDs in the state component bitmap. */ > > > +#define X86_XSTATE_X87_ID 0 > > > +#define X86_XSTATE_SSE_ID 1 > > > +#define X86_XSTATE_AVX_ID 2 > > > +#define X86_XSTATE_BNDREGS_ID 3 > > > +#define X86_XSTATE_BNDCFG_ID 4 > > > +#define X86_XSTATE_K_ID 5 > > > +#define X86_XSTATE_ZMM_H_ID 6 > > > +#define X86_XSTATE_ZMM_ID 7 > > > +#define X86_XSTATE_PKRU_ID 9 > > > +#define X86_XSTATE_TILECFG_ID 17 > > > +#define X86_XSTATE_TILEDATA_ID 18 > > > +#define X86_XSTATE_APX_F_ID 19 > > > + > > > +#ifdef __x86_64__ > > > /* Offset for fxsave/xsave area used by _dl_runtime_resolve. Also need > > > space to preserve RCX, RDX, RSI, RDI, R8, R9 and RAX. It must be > > > - aligned to 16 bytes for fxsave and 64 bytes for xsave. */ > > > -#define STATE_SAVE_OFFSET (8 * 7 + 8) > > > - > > > -/* Save SSE, AVX, AVX512, mask and bound registers. */ > > > -#define STATE_SAVE_MASK \ > > > - ((1 << 1) | (1 << 2) | (1 << 3) | (1 << 5) | (1 << 6) | (1 << 7)) > > > + aligned to 16 bytes for fxsave and 64 bytes for xsave. > > > + > > > + NB: Is is non-zero because of the 128-byte red-zone. Some registers > > > + are saved on stack without adjusting stack pointer first. When we > > > + update stack pointer to allocate more space, we need to take the > > > + red-zone into account. */ > > > +# define STATE_SAVE_OFFSET (8 * 7 + 8) > > > + > > > +/* Save SSE, AVX, AVX512, mask, bound and APX registers. Bound and APX > > > + registers are mutually exclusive. */ > > > +# define STATE_SAVE_MASK \ > > > + ((1 << X86_XSTATE_SSE_ID) \ > > > + | (1 << X86_XSTATE_AVX_ID) \ > > > + | (1 << X86_XSTATE_BNDREGS_ID) \ > > > + | (1 << X86_XSTATE_K_ID) \ > > > + | (1 << X86_XSTATE_ZMM_H_ID) \ > > > + | (1 << X86_XSTATE_ZMM_ID) \ > > > + | (1 << X86_XSTATE_APX_F_ID)) > > > +#else > > > +/* Offset for fxsave/xsave area used by _dl_tlsdesc_dynamic. Since i386 > > > + doesn't have red-zone, use 0 here. */ > > > +# define STATE_SAVE_OFFSET 0 > > > + > > > +/* Save SSE, AVX, AXV512, mask and bound registers. */ > > > +# define STATE_SAVE_MASK \ > > > + ((1 << X86_XSTATE_SSE_ID) \ > > > + | (1 << X86_XSTATE_AVX_ID) \ > > > + | (1 << X86_XSTATE_BNDREGS_ID) \ > > > + | (1 << X86_XSTATE_K_ID) \ > > > + | (1 << X86_XSTATE_ZMM_H_ID)) > > > +#endif > > > > > > /* Constants for bits in __x86_string_control: */ > > > > > > -- > > > 2.43.0 > > > > > > > > > LGTM. > > Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com> > > OK to backport it to release branches? > > Thanks. Okay with it, but imo this type of system level change should have a bit longer of being tested in the realworld before backport. Maybe give it another week first?> > -- > H.J.
On Tue, Feb 27, 2024 at 9:00 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> On Mon, Feb 26, 2024 at 1:06 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> >
> > On Mon, Feb 26, 2024 at 11:02 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> > >
> > > On Mon, Feb 26, 2024 at 1:02 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> > > >
> > > > On Mon, Feb 26, 2024 at 8:37 AM H.J. Lu <hjl.tools@gmail.com> wrote:
> > > > >
> > > > > Compiler generates the following instruction sequence for GNU2 dynamic
> > > > > TLS access:
> > > > >
> > > > > leaq tls_var@TLSDESC(%rip), %rax
> > > > > call *tls_var@TLSCALL(%rax)
> > > > >
> > > > > or
> > > > >
> > > > > leal tls_var@TLSDESC(%ebx), %eax
> > > > > call *tls_var@TLSCALL(%eax)
> > > > >
> > > > > CALL instruction is transparent to compiler which assumes all registers,
> > > > > except for EFLAGS and RAX/EAX, are unchanged after CALL. When
> > > > > _dl_tlsdesc_dynamic is called, it calls __tls_get_addr on the slow
> > > > > path. __tls_get_addr is a normal function which doesn't preserve any
> > > > > caller-saved registers. _dl_tlsdesc_dynamic saved and restored integer
> > > > > caller-saved registers, but didn't preserve any other caller-saved
> > > > > registers. Add _dl_tlsdesc_dynamic IFUNC functions for FNSAVE, FXSAVE,
> > > > > XSAVE and XSAVEC to save and restore all caller-saved registers. This
> > > > > fixes BZ #31372.
> > > > >
> > > > > Add GLRO(dl_x86_64_runtime_resolve) with GLRO(dl_x86_tlsdesc_dynamic)
> > > > > to optimize elf_machine_runtime_setup.
> > > > > ---
> > > > > elf/Makefile | 18 ++
> > > > > elf/tst-gnu2-tls2.c | 122 ++++++++++++
> > > > > elf/tst-gnu2-tls2.h | 36 ++++
> > > > > elf/tst-gnu2-tls2mod0.c | 31 +++
> > > > > elf/tst-gnu2-tls2mod1.c | 31 +++
> > > > > elf/tst-gnu2-tls2mod2.c | 31 +++
> > > > > sysdeps/i386/dl-machine.h | 2 +-
> > > > > sysdeps/i386/dl-tlsdesc-dynamic.h | 190 +++++++++++++++++++
> > > > > sysdeps/i386/dl-tlsdesc.S | 115 +++++------
> > > > > sysdeps/x86/Makefile | 7 +-
> > > > > sysdeps/x86/cpu-features.c | 56 +++++-
> > > > > sysdeps/x86/dl-procinfo.c | 16 ++
> > > > > sysdeps/{x86_64 => x86}/features-offsets.sym | 2 +
> > > > > sysdeps/x86/sysdep.h | 6 +
> > > > > sysdeps/x86/tst-gnu2-tls2.c | 20 ++
> > > > > sysdeps/x86_64/Makefile | 2 +-
> > > > > sysdeps/x86_64/dl-machine.h | 19 +-
> > > > > sysdeps/x86_64/dl-procinfo.c | 16 ++
> > > > > sysdeps/x86_64/dl-tlsdesc-dynamic.h | 166 ++++++++++++++++
> > > > > sysdeps/x86_64/dl-tlsdesc.S | 108 ++++-------
> > > > > sysdeps/x86_64/dl-trampoline-save.h | 34 ++++
> > > > > sysdeps/x86_64/dl-trampoline-state.h | 51 +++++
> > > > > sysdeps/x86_64/dl-trampoline.S | 20 +-
> > > > > sysdeps/x86_64/dl-trampoline.h | 34 +---
> > > > > 24 files changed, 920 insertions(+), 213 deletions(-)
> > > > > create mode 100644 elf/tst-gnu2-tls2.c
> > > > > create mode 100644 elf/tst-gnu2-tls2.h
> > > > > create mode 100644 elf/tst-gnu2-tls2mod0.c
> > > > > create mode 100644 elf/tst-gnu2-tls2mod1.c
> > > > > create mode 100644 elf/tst-gnu2-tls2mod2.c
> > > > > create mode 100644 sysdeps/i386/dl-tlsdesc-dynamic.h
> > > > > rename sysdeps/{x86_64 => x86}/features-offsets.sym (89%)
> > > > > create mode 100644 sysdeps/x86/tst-gnu2-tls2.c
> > > > > create mode 100644 sysdeps/x86_64/dl-tlsdesc-dynamic.h
> > > > > create mode 100644 sysdeps/x86_64/dl-trampoline-save.h
> > > > > create mode 100644 sysdeps/x86_64/dl-trampoline-state.h
> > > > >
> > > > > diff --git a/elf/Makefile b/elf/Makefile
> > > > > index 36c04baf02..02dc476e27 100644
> > > > > --- a/elf/Makefile
> > > > > +++ b/elf/Makefile
> > > > > @@ -424,6 +424,7 @@ tests += \
> > > > > tst-glibc-hwcaps-prepend \
> > > > > tst-global1 \
> > > > > tst-global2 \
> > > > > + tst-gnu2-tls2 \
> > > > > tst-initfinilazyfail \
> > > > > tst-initorder \
> > > > > tst-initorder2 \
> > > > > @@ -846,6 +847,9 @@ modules-names += \
> > > > > tst-filterobj-flt \
> > > > > tst-finilazyfailmod \
> > > > > tst-globalmod2 \
> > > > > + tst-gnu2-tls2mod0 \
> > > > > + tst-gnu2-tls2mod1 \
> > > > > + tst-gnu2-tls2mod2 \
> > > > > tst-initlazyfailmod \
> > > > > tst-initorder2a \
> > > > > tst-initorder2b \
> > > > > @@ -3044,8 +3048,22 @@ $(objpfx)tst-tlsgap.out: \
> > > > > $(objpfx)tst-tlsgap-mod0.so \
> > > > > $(objpfx)tst-tlsgap-mod1.so \
> > > > > $(objpfx)tst-tlsgap-mod2.so
> > > > > +
> > > > > +$(objpfx)tst-gnu2-tls2: $(shared-thread-library)
> > > > > +$(objpfx)tst-gnu2-tls2.out: \
> > > > > + $(objpfx)tst-gnu2-tls2mod0.so \
> > > > > + $(objpfx)tst-gnu2-tls2mod1.so \
> > > > > + $(objpfx)tst-gnu2-tls2mod2.so
> > > > > +
> > > > > ifeq (yes,$(have-mtls-dialect-gnu2))
> > > > > +# This test fails if dl_tlsdesc_dynamic doesn't preserve all caller-saved
> > > > > +# registers. See https://sourceware.org/bugzilla/show_bug.cgi?id=31372
> > > > > +test-xfail-tst-gnu2-tls2 = yes
> > > > > +
> > > > > CFLAGS-tst-tlsgap-mod0.c += -mtls-dialect=gnu2
> > > > > CFLAGS-tst-tlsgap-mod1.c += -mtls-dialect=gnu2
> > > > > CFLAGS-tst-tlsgap-mod2.c += -mtls-dialect=gnu2
> > > > > +CFLAGS-tst-gnu2-tls2mod0.c += -mtls-dialect=gnu2
> > > > > +CFLAGS-tst-gnu2-tls2mod1.c += -mtls-dialect=gnu2
> > > > > +CFLAGS-tst-gnu2-tls2mod2.c += -mtls-dialect=gnu2
> > > > > endif
> > > > > diff --git a/elf/tst-gnu2-tls2.c b/elf/tst-gnu2-tls2.c
> > > > > new file mode 100644
> > > > > index 0000000000..7ac04d7f33
> > > > > --- /dev/null
> > > > > +++ b/elf/tst-gnu2-tls2.c
> > > > > @@ -0,0 +1,122 @@
> > > > > +/* Test TLSDESC relocation.
> > > > > + Copyright (C) 2024 Free Software Foundation, Inc.
> > > > > + This file is part of the GNU C Library.
> > > > > +
> > > > > + The GNU C Library is free software; you can redistribute it and/or
> > > > > + modify it under the terms of the GNU Lesser General Public
> > > > > + License as published by the Free Software Foundation; either
> > > > > + version 2.1 of the License, or (at your option) any later version.
> > > > > +
> > > > > + The GNU C Library is distributed in the hope that it will be useful,
> > > > > + but WITHOUT ANY WARRANTY; without even the implied warranty of
> > > > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> > > > > + Lesser General Public License for more details.
> > > > > +
> > > > > + You should have received a copy of the GNU Lesser General Public
> > > > > + License along with the GNU C Library; if not, see
> > > > > + <http://www.gnu.org/licenses/>. */
> > > > > +
> > > > > +#include <stdio.h>
> > > > > +#include <stdlib.h>
> > > > > +#include <string.h>
> > > > > +#include <dlfcn.h>
> > > > > +#include <pthread.h>
> > > > > +#include <support/xdlfcn.h>
> > > > > +#include <support/xthread.h>
> > > > > +#include <support/check.h>
> > > > > +#include <support/test-driver.h>
> > > > > +#include "tst-gnu2-tls2.h"
> > > > > +
> > > > > +#ifndef IS_SUPPORTED
> > > > > +# define IS_SUPPORTED() true
> > > > > +#endif
> > > > > +
> > > > > +/* An architecture can define it to clobber caller-saved registers in
> > > > > + malloc below to verify that the implicit TLSDESC call won't change
> > > > > + caller-saved registers. */
> > > > > +#ifndef PREPARE_MALLOC
> > > > > +# define PREPARE_MALLOC()
> > > > > +#endif
> > > > > +
> > > > > +extern void * __libc_malloc (size_t);
> > > > > +
> > > > > +size_t malloc_counter = 0;
> > > > > +
> > > > > +void *
> > > > > +malloc (size_t n)
> > > > > +{
> > > > > + PREPARE_MALLOC ();
> > > > > + malloc_counter++;
> > > > > + return __libc_malloc (n);
> > > > > +}
> > > > > +
> > > > > +static void *mod[3];
> > > > > +#ifndef MOD
> > > > > +# define MOD(i) "tst-gnu2-tls2mod" #i ".so"
> > > > > +#endif
> > > > > +static const char *modname[3] = { MOD(0), MOD(1), MOD(2) };
> > > > > +#undef MOD
> > > > > +
> > > > > +static void
> > > > > +open_mod (int i)
> > > > > +{
> > > > > + mod[i] = xdlopen (modname[i], RTLD_LAZY);
> > > > > + printf ("open %s\n", modname[i]);
> > > > > +}
> > > > > +
> > > > > +static void
> > > > > +close_mod (int i)
> > > > > +{
> > > > > + xdlclose (mod[i]);
> > > > > + mod[i] = NULL;
> > > > > + printf ("close %s\n", modname[i]);
> > > > > +}
> > > > > +
> > > > > +static void
> > > > > +access_mod (int i, const char *sym)
> > > > > +{
> > > > > + struct tls var = { -1, -1, -1, -1 };
> > > > > + struct tls *(*f) (struct tls *) = xdlsym (mod[i], sym);
> > > > > + /* Check that our malloc is called. */
> > > > > + malloc_counter = 0;
> > > > > + struct tls *p = f (&var);
> > > > > + TEST_VERIFY (malloc_counter != 0);
> > > > > + printf ("access %s: %s() = %p\n", modname[i], sym, p);
> > > > > + TEST_VERIFY_EXIT (memcmp (p, &var, sizeof (var)) == 0);
> > > > > + ++(p->a);
> > > > > +}
> > > > > +
> > > > > +static void *
> > > > > +start (void *arg)
> > > > > +{
> > > > > + /* The DTV generation is at the last dlopen of mod0 and the
> > > > > + entry for mod1 is NULL. */
> > > > > +
> > > > > + open_mod (1); /* Reuse modid of mod1. Uses dynamic TLS. */
> > > > > +
> > > > > + /* Force the slow path in GNU2 TLS descriptor call. */
> > > > > + access_mod (1, "apply_tls");
> > > > > +
> > > > > + return arg;
> > > > > +}
> > > > > +
> > > > > +static int
> > > > > +do_test (void)
> > > > > +{
> > > > > + if (!IS_SUPPORTED ())
> > > > > + return EXIT_UNSUPPORTED;
> > > > > +
> > > > > + open_mod (0);
> > > > > + open_mod (1);
> > > > > + open_mod (2);
> > > > > + close_mod (0);
> > > > > + close_mod (1); /* Create modid gap at mod1. */
> > > > > + open_mod (0); /* Reuse modid of mod0, bump generation count. */
> > > > > +
> > > > > + /* Create a thread where DTV of mod1 is NULL. */
> > > > > + pthread_t t = xpthread_create (NULL, start, NULL);
> > > > > + xpthread_join (t);
> > > > > + return 0;
> > > > > +}
> > > > > +
> > > > > +#include <support/test-driver.c>
> > > > > diff --git a/elf/tst-gnu2-tls2.h b/elf/tst-gnu2-tls2.h
> > > > > new file mode 100644
> > > > > index 0000000000..77964a57a3
> > > > > --- /dev/null
> > > > > +++ b/elf/tst-gnu2-tls2.h
> > > > > @@ -0,0 +1,36 @@
> > > > > +/* Test TLSDESC relocation.
> > > > > + Copyright (C) 2024 Free Software Foundation, Inc.
> > > > > + This file is part of the GNU C Library.
> > > > > +
> > > > > + The GNU C Library is free software; you can redistribute it and/or
> > > > > + modify it under the terms of the GNU Lesser General Public
> > > > > + License as published by the Free Software Foundation; either
> > > > > + version 2.1 of the License, or (at your option) any later version.
> > > > > +
> > > > > + The GNU C Library is distributed in the hope that it will be useful,
> > > > > + but WITHOUT ANY WARRANTY; without even the implied warranty of
> > > > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> > > > > + Lesser General Public License for more details.
> > > > > +
> > > > > + You should have received a copy of the GNU Lesser General Public
> > > > > + License along with the GNU C Library; if not, see
> > > > > + <https://www.gnu.org/licenses/>. */
> > > > > +
> > > > > +#include <stdint.h>
> > > > > +
> > > > > +struct tls
> > > > > +{
> > > > > + int64_t a, b, c, d;
> > > > > +};
> > > > > +
> > > > > +extern struct tls *apply_tls (struct tls *);
> > > > > +
> > > > > +/* An architecture can define them to verify that clobber caller-saved
> > > > > + registers aren't changed by the implicit TLSDESC call. */
> > > > > +#ifndef BEFORE_TLSDESC_CALL
> > > > > +# define BEFORE_TLSDESC_CALL()
> > > > > +#endif
> > > > > +
> > > > > +#ifndef AFTER_TLSDESC_CALL
> > > > > +# define AFTER_TLSDESC_CALL()
> > > > > +#endif
> > > > > diff --git a/elf/tst-gnu2-tls2mod0.c b/elf/tst-gnu2-tls2mod0.c
> > > > > new file mode 100644
> > > > > index 0000000000..45556a0e17
> > > > > --- /dev/null
> > > > > +++ b/elf/tst-gnu2-tls2mod0.c
> > > > > @@ -0,0 +1,31 @@
> > > > > +/* DSO used by tst-gnu2-tls2.
> > > > > + Copyright (C) 2024 Free Software Foundation, Inc.
> > > > > + This file is part of the GNU C Library.
> > > > > +
> > > > > + The GNU C Library is free software; you can redistribute it and/or
> > > > > + modify it under the terms of the GNU Lesser General Public
> > > > > + License as published by the Free Software Foundation; either
> > > > > + version 2.1 of the License, or (at your option) any later version.
> > > > > +
> > > > > + The GNU C Library is distributed in the hope that it will be useful,
> > > > > + but WITHOUT ANY WARRANTY; without even the implied warranty of
> > > > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> > > > > + Lesser General Public License for more details.
> > > > > +
> > > > > + You should have received a copy of the GNU Lesser General Public
> > > > > + License along with the GNU C Library; if not, see
> > > > > + <https://www.gnu.org/licenses/>. */
> > > > > +
> > > > > +#include "tst-gnu2-tls2.h"
> > > > > +
> > > > > +__thread struct tls tls_var0 __attribute__ ((visibility ("hidden")));
> > > > > +
> > > > > +struct tls *
> > > > > +apply_tls (struct tls *p)
> > > > > +{
> > > > > + BEFORE_TLSDESC_CALL ();
> > > > > + tls_var0 = *p;
> > > > > + struct tls *ret = &tls_var0;
> > > > > + AFTER_TLSDESC_CALL ();
> > > > > + return ret;
> > > > > +}
> > > > > diff --git a/elf/tst-gnu2-tls2mod1.c b/elf/tst-gnu2-tls2mod1.c
> > > > > new file mode 100644
> > > > > index 0000000000..e10b9dbc0a
> > > > > --- /dev/null
> > > > > +++ b/elf/tst-gnu2-tls2mod1.c
> > > > > @@ -0,0 +1,31 @@
> > > > > +/* DSO used by tst-gnu2-tls2.
> > > > > + Copyright (C) 2024 Free Software Foundation, Inc.
> > > > > + This file is part of the GNU C Library.
> > > > > +
> > > > > + The GNU C Library is free software; you can redistribute it and/or
> > > > > + modify it under the terms of the GNU Lesser General Public
> > > > > + License as published by the Free Software Foundation; either
> > > > > + version 2.1 of the License, or (at your option) any later version.
> > > > > +
> > > > > + The GNU C Library is distributed in the hope that it will be useful,
> > > > > + but WITHOUT ANY WARRANTY; without even the implied warranty of
> > > > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> > > > > + Lesser General Public License for more details.
> > > > > +
> > > > > + You should have received a copy of the GNU Lesser General Public
> > > > > + License along with the GNU C Library; if not, see
> > > > > + <https://www.gnu.org/licenses/>. */
> > > > > +
> > > > > +#include "tst-gnu2-tls2.h"
> > > > > +
> > > > > +__thread struct tls tls_var1[100] __attribute__ ((visibility ("hidden")));
> > > > > +
> > > > > +struct tls *
> > > > > +apply_tls (struct tls *p)
> > > > > +{
> > > > > + BEFORE_TLSDESC_CALL ();
> > > > > + tls_var1[1] = *p;
> > > > > + struct tls *ret = &tls_var1[1];
> > > > > + AFTER_TLSDESC_CALL ();
> > > > > + return ret;
> > > > > +}
> > > > > diff --git a/elf/tst-gnu2-tls2mod2.c b/elf/tst-gnu2-tls2mod2.c
> > > > > new file mode 100644
> > > > > index 0000000000..141af51e55
> > > > > --- /dev/null
> > > > > +++ b/elf/tst-gnu2-tls2mod2.c
> > > > > @@ -0,0 +1,31 @@
> > > > > +/* DSO used by tst-gnu2-tls2.
> > > > > + Copyright (C) 2024 Free Software Foundation, Inc.
> > > > > + This file is part of the GNU C Library.
> > > > > +
> > > > > + The GNU C Library is free software; you can redistribute it and/or
> > > > > + modify it under the terms of the GNU Lesser General Public
> > > > > + License as published by the Free Software Foundation; either
> > > > > + version 2.1 of the License, or (at your option) any later version.
> > > > > +
> > > > > + The GNU C Library is distributed in the hope that it will be useful,
> > > > > + but WITHOUT ANY WARRANTY; without even the implied warranty of
> > > > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> > > > > + Lesser General Public License for more details.
> > > > > +
> > > > > + You should have received a copy of the GNU Lesser General Public
> > > > > + License along with the GNU C Library; if not, see
> > > > > + <https://www.gnu.org/licenses/>. */
> > > > > +
> > > > > +#include "tst-gnu2-tls2.h"
> > > > > +
> > > > > +__thread struct tls tls_var2 __attribute__ ((visibility ("hidden")));
> > > > > +
> > > > > +struct tls *
> > > > > +apply_tls (struct tls *p)
> > > > > +{
> > > > > + BEFORE_TLSDESC_CALL ();
> > > > > + tls_var2 = *p;
> > > > > + struct tls *ret = &tls_var2;
> > > > > + AFTER_TLSDESC_CALL ();
> > > > > + return ret;
> > > > > +}
> > > > > diff --git a/sysdeps/i386/dl-machine.h b/sysdeps/i386/dl-machine.h
> > > > > index fc1ef96587..50d74fe6e9 100644
> > > > > --- a/sysdeps/i386/dl-machine.h
> > > > > +++ b/sysdeps/i386/dl-machine.h
> > > > > @@ -347,7 +347,7 @@ and creates an unsatisfiable circular dependency.\n",
> > > > > {
> > > > > td->arg = _dl_make_tlsdesc_dynamic
> > > > > (sym_map, sym->st_value + (ElfW(Word))td->arg);
> > > > > - td->entry = _dl_tlsdesc_dynamic;
> > > > > + td->entry = GLRO(dl_x86_tlsdesc_dynamic);
> > > > > }
> > > > > else
> > > > > # endif
> > > > > diff --git a/sysdeps/i386/dl-tlsdesc-dynamic.h b/sysdeps/i386/dl-tlsdesc-dynamic.h
> > > > > new file mode 100644
> > > > > index 0000000000..3627028577
> > > > > --- /dev/null
> > > > > +++ b/sysdeps/i386/dl-tlsdesc-dynamic.h
> > > > > @@ -0,0 +1,190 @@
> > > > > +/* Thread-local storage handling in the ELF dynamic linker. i386 version.
> > > > > + Copyright (C) 2004-2024 Free Software Foundation, Inc.
> > > > > + This file is part of the GNU C Library.
> > > > > +
> > > > > + The GNU C Library is free software; you can redistribute it and/or
> > > > > + modify it under the terms of the GNU Lesser General Public
> > > > > + License as published by the Free Software Foundation; either
> > > > > + version 2.1 of the License, or (at your option) any later version.
> > > > > +
> > > > > + The GNU C Library is distributed in the hope that it will be useful,
> > > > > + but WITHOUT ANY WARRANTY; without even the implied warranty of
> > > > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> > > > > + Lesser General Public License for more details.
> > > > > +
> > > > > + You should have received a copy of the GNU Lesser General Public
> > > > > + License along with the GNU C Library; if not, see
> > > > > + <https://www.gnu.org/licenses/>. */
> > > > > +
> > > > > +#undef REGISTER_SAVE_AREA
> > > > > +
> > > > > +#if !defined USE_FNSAVE && (STATE_SAVE_ALIGNMENT % 16) != 0
> > > > > +# error STATE_SAVE_ALIGNMENT must be multiple of 16
> > > > > +#endif
> > > > > +
> > > > > +#if DL_RUNTIME_RESOLVE_REALIGN_STACK
> > > > > +# ifdef USE_FNSAVE
> > > > > +# error USE_FNSAVE shouldn't be defined
> > > > > +# endif
> > > > > +# ifdef USE_FXSAVE
> > > > > +/* Use fxsave to save all registers. */
> > > > > +# define REGISTER_SAVE_AREA 512
> > > > > +# endif
> > > > > +#else
> > > > > +# ifdef USE_FNSAVE
> > > > > +/* Use fnsave to save x87 FPU stack registers. */
> > > > > +# define REGISTER_SAVE_AREA 108
> > > > > +# else
> > > > > +# ifndef USE_FXSAVE
> > > > > +# error USE_FXSAVE must be defined
> > > > > +# endif
> > > > > +/* Use fxsave to save all registers. Add 12 bytes to align the stack
> > > > > + to 16 bytes. */
> > > > > +# define REGISTER_SAVE_AREA (512 + 12)
> > > > > +# endif
> > > > > +#endif
> > > > > +
> > > > > + .hidden _dl_tlsdesc_dynamic
> > > > > + .global _dl_tlsdesc_dynamic
> > > > > + .type _dl_tlsdesc_dynamic,@function
> > > > > +
> > > > > + /* This function is used for symbols that need dynamic TLS.
> > > > > +
> > > > > + %eax points to the TLS descriptor, such that 0(%eax) points to
> > > > > + _dl_tlsdesc_dynamic itself, and 4(%eax) points to a struct
> > > > > + tlsdesc_dynamic_arg object. It must return in %eax the offset
> > > > > + between the thread pointer and the object denoted by the
> > > > > + argument, without clobbering any registers.
> > > > > +
> > > > > + The assembly code that follows is a rendition of the following
> > > > > + C code, hand-optimized a little bit.
> > > > > +
> > > > > +ptrdiff_t
> > > > > +__attribute__ ((__regparm__ (1)))
> > > > > +_dl_tlsdesc_dynamic (struct tlsdesc *tdp)
> > > > > +{
> > > > > + struct tlsdesc_dynamic_arg *td = tdp->arg;
> > > > > + dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + DTV_OFFSET);
> > > > > + if (__builtin_expect (td->gen_count <= dtv[0].counter
> > > > > + && (dtv[td->tlsinfo.ti_module].pointer.val
> > > > > + != TLS_DTV_UNALLOCATED),
> > > > > + 1))
> > > > > + return dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset
> > > > > + - __thread_pointer;
> > > > > +
> > > > > + return ___tls_get_addr (&td->tlsinfo) - __thread_pointer;
> > > > > +}
> > > > > +*/
> > > > > + cfi_startproc
> > > > > + .align 16
> > > > > +_dl_tlsdesc_dynamic:
> > > > > + /* Like all TLS resolvers, preserve call-clobbered registers.
> > > > > + We need two scratch regs anyway. */
> > > > > + subl $32, %esp
> > > > > + cfi_adjust_cfa_offset (32)
> > > > > + movl %ecx, 20(%esp)
> > > > > + movl %edx, 24(%esp)
> > > > > + movl TLSDESC_ARG(%eax), %eax
> > > > > + movl %gs:DTV_OFFSET, %edx
> > > > > + movl TLSDESC_GEN_COUNT(%eax), %ecx
> > > > > + cmpl (%edx), %ecx
> > > > > + ja 2f
> > > > > + movl TLSDESC_MODID(%eax), %ecx
> > > > > + movl (%edx,%ecx,8), %edx
> > > > > + cmpl $-1, %edx
> > > > > + je 2f
> > > > > + movl TLSDESC_MODOFF(%eax), %eax
> > > > > + addl %edx, %eax
> > > > > +1:
> > > > > + movl 20(%esp), %ecx
> > > > > + subl %gs:0, %eax
> > > > > + movl 24(%esp), %edx
> > > > > + addl $32, %esp
> > > > > + cfi_adjust_cfa_offset (-32)
> > > > > + ret
> > > > > + .p2align 4,,7
> > > > > +2:
> > > > > + cfi_adjust_cfa_offset (32)
> > > > > +#if DL_RUNTIME_RESOLVE_REALIGN_STACK
> > > > > + movl %ebx, -28(%esp)
> > > > > + movl %esp, %ebx
> > > > > + cfi_def_cfa_register(%ebx)
> > > > > + and $-STATE_SAVE_ALIGNMENT, %esp
> > > > > +#endif
> > > > > +#ifdef REGISTER_SAVE_AREA
> > > > > + subl $REGISTER_SAVE_AREA, %esp
> > > > > +# if !DL_RUNTIME_RESOLVE_REALIGN_STACK
> > > > > + cfi_adjust_cfa_offset(REGISTER_SAVE_AREA)
> > > > > +# endif
> > > > > +#else
> > > > > +# if !DL_RUNTIME_RESOLVE_REALIGN_STACK
> > > > > +# error DL_RUNTIME_RESOLVE_REALIGN_STACK must be true
> > > > > +# endif
> > > > > + /* Allocate stack space of the required size to save the state. */
> > > > > + LOAD_PIC_REG (cx)
> > > > > + subl RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+XSAVE_STATE_SIZE_OFFSET+_rtld_local_ro@GOTOFF(%ecx), %esp
> > > > > +#endif
> > > > > +#ifdef USE_FNSAVE
> > > > > + fnsave (%esp)
> > > > > +#elif defined USE_FXSAVE
> > > > > + fxsave (%esp)
> > > > > +#else
> > > > > + /* Save the argument for ___tls_get_addr in EAX. */
> > > > > + movl %eax, %ecx
> > > > > + movl $TLSDESC_CALL_STATE_SAVE_MASK, %eax
> > > > > + xorl %edx, %edx
> > > > > + /* Clear the XSAVE Header. */
> > > > > +# ifdef USE_XSAVE
> > > > > + movl %edx, (512)(%esp)
> > > > > + movl %edx, (512 + 4 * 1)(%esp)
> > > > > + movl %edx, (512 + 4 * 2)(%esp)
> > > > > + movl %edx, (512 + 4 * 3)(%esp)
> > > > > +# endif
> > > > > + movl %edx, (512 + 4 * 4)(%esp)
> > > > > + movl %edx, (512 + 4 * 5)(%esp)
> > > > > + movl %edx, (512 + 4 * 6)(%esp)
> > > > > + movl %edx, (512 + 4 * 7)(%esp)
> > > > > + movl %edx, (512 + 4 * 8)(%esp)
> > > > > + movl %edx, (512 + 4 * 9)(%esp)
> > > > > + movl %edx, (512 + 4 * 10)(%esp)
> > > > > + movl %edx, (512 + 4 * 11)(%esp)
> > > > > + movl %edx, (512 + 4 * 12)(%esp)
> > > > > + movl %edx, (512 + 4 * 13)(%esp)
> > > > > + movl %edx, (512 + 4 * 14)(%esp)
> > > > > + movl %edx, (512 + 4 * 15)(%esp)
> > > > > +# ifdef USE_XSAVE
> > > > > + xsave (%esp)
> > > > > +# else
> > > > > + xsavec (%esp)
> > > > > +# endif
> > > > > + /* Restore the argument for ___tls_get_addr in EAX. */
> > > > > + movl %ecx, %eax
> > > > > +#endif
> > > > > + call HIDDEN_JUMPTARGET (___tls_get_addr)
> > > > > + /* Get register content back. */
> > > > > +#ifdef USE_FNSAVE
> > > > > + frstor (%esp)
> > > > > +#elif defined USE_FXSAVE
> > > > > + fxrstor (%esp)
> > > > > +#else
> > > > > + /* Save and retore ___tls_get_addr return value stored in EAX. */
> > > > > + movl %eax, %ecx
> > > > > + movl $TLSDESC_CALL_STATE_SAVE_MASK, %eax
> > > > > + xorl %edx, %edx
> > > > > + xrstor (%esp)
> > > > > + movl %ecx, %eax
> > > > > +#endif
> > > > > +#if DL_RUNTIME_RESOLVE_REALIGN_STACK
> > > > > + mov %ebx, %esp
> > > > > + cfi_def_cfa_register(%esp)
> > > > > + movl -28(%esp), %ebx
> > > > > + cfi_restore(%ebx)
> > > > > +#else
> > > > > + addl $REGISTER_SAVE_AREA, %esp
> > > > > + cfi_adjust_cfa_offset(-REGISTER_SAVE_AREA)
> > > > > +#endif
> > > > > + jmp 1b
> > > > > + cfi_endproc
> > > > > + .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
> > > > > +
> > > > > +#undef STATE_SAVE_ALIGNMENT
> > > > > diff --git a/sysdeps/i386/dl-tlsdesc.S b/sysdeps/i386/dl-tlsdesc.S
> > > > > index 90d93caa0c..f002feee56 100644
> > > > > --- a/sysdeps/i386/dl-tlsdesc.S
> > > > > +++ b/sysdeps/i386/dl-tlsdesc.S
> > > > > @@ -18,8 +18,27 @@
> > > > >
> > > > > #include <sysdep.h>
> > > > > #include <tls.h>
> > > > > +#include <cpu-features-offsets.h>
> > > > > +#include <features-offsets.h>
> > > > > #include "tlsdesc.h"
> > > > >
> > > > > +#ifndef DL_STACK_ALIGNMENT
> > > > > +/* Due to GCC bug:
> > > > > +
> > > > > + https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58066
> > > > > +
> > > > > + __tls_get_addr may be called with 4-byte stack alignment. Although
> > > > > + this bug has been fixed in GCC 4.9.4, 5.3 and 6, we can't assume
> > > > > + that stack will be always aligned at 16 bytes. */
> > > > > +# define DL_STACK_ALIGNMENT 4
> > > > > +#endif
> > > > > +
> > > > > +/* True if _dl_tlsdesc_dynamic should align stack for STATE_SAVE or align
> > > > > + stack to MINIMUM_ALIGNMENT bytes before calling ___tls_get_addr. */
> > > > > +#define DL_RUNTIME_RESOLVE_REALIGN_STACK \
> > > > > + (STATE_SAVE_ALIGNMENT > DL_STACK_ALIGNMENT \
> > > > > + || MINIMUM_ALIGNMENT > DL_STACK_ALIGNMENT)
> > > > > +
> > > > > .text
> > > > >
> > > > > /* This function is used to compute the TP offset for symbols in
> > > > > @@ -65,69 +84,35 @@ _dl_tlsdesc_undefweak:
> > > > > .size _dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak
> > > > >
> > > > > #ifdef SHARED
> > > > > - .hidden _dl_tlsdesc_dynamic
> > > > > - .global _dl_tlsdesc_dynamic
> > > > > - .type _dl_tlsdesc_dynamic,@function
> > > > > -
> > > > > - /* This function is used for symbols that need dynamic TLS.
> > > > > -
> > > > > - %eax points to the TLS descriptor, such that 0(%eax) points to
> > > > > - _dl_tlsdesc_dynamic itself, and 4(%eax) points to a struct
> > > > > - tlsdesc_dynamic_arg object. It must return in %eax the offset
> > > > > - between the thread pointer and the object denoted by the
> > > > > - argument, without clobbering any registers.
> > > > > -
> > > > > - The assembly code that follows is a rendition of the following
> > > > > - C code, hand-optimized a little bit.
> > > > > -
> > > > > -ptrdiff_t
> > > > > -__attribute__ ((__regparm__ (1)))
> > > > > -_dl_tlsdesc_dynamic (struct tlsdesc *tdp)
> > > > > -{
> > > > > - struct tlsdesc_dynamic_arg *td = tdp->arg;
> > > > > - dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + DTV_OFFSET);
> > > > > - if (__builtin_expect (td->gen_count <= dtv[0].counter
> > > > > - && (dtv[td->tlsinfo.ti_module].pointer.val
> > > > > - != TLS_DTV_UNALLOCATED),
> > > > > - 1))
> > > > > - return dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset
> > > > > - - __thread_pointer;
> > > > > -
> > > > > - return ___tls_get_addr (&td->tlsinfo) - __thread_pointer;
> > > > > -}
> > > > > -*/
> > > > > - cfi_startproc
> > > > > - .align 16
> > > > > -_dl_tlsdesc_dynamic:
> > > > > - /* Like all TLS resolvers, preserve call-clobbered registers.
> > > > > - We need two scratch regs anyway. */
> > > > > - subl $28, %esp
> > > > > - cfi_adjust_cfa_offset (28)
> > > > > - movl %ecx, 20(%esp)
> > > > > - movl %edx, 24(%esp)
> > > > > - movl TLSDESC_ARG(%eax), %eax
> > > > > - movl %gs:DTV_OFFSET, %edx
> > > > > - movl TLSDESC_GEN_COUNT(%eax), %ecx
> > > > > - cmpl (%edx), %ecx
> > > > > - ja .Lslow
> > > > > - movl TLSDESC_MODID(%eax), %ecx
> > > > > - movl (%edx,%ecx,8), %edx
> > > > > - cmpl $-1, %edx
> > > > > - je .Lslow
> > > > > - movl TLSDESC_MODOFF(%eax), %eax
> > > > > - addl %edx, %eax
> > > > > -.Lret:
> > > > > - movl 20(%esp), %ecx
> > > > > - subl %gs:0, %eax
> > > > > - movl 24(%esp), %edx
> > > > > - addl $28, %esp
> > > > > - cfi_adjust_cfa_offset (-28)
> > > > > - ret
> > > > > - .p2align 4,,7
> > > > > -.Lslow:
> > > > > - cfi_adjust_cfa_offset (28)
> > > > > - call HIDDEN_JUMPTARGET (___tls_get_addr)
> > > > > - jmp .Lret
> > > > > - cfi_endproc
> > > > > - .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
> > > > > +# define USE_FNSAVE
> > > > > +# define MINIMUM_ALIGNMENT 4
> > > > > +# define STATE_SAVE_ALIGNMENT 4
> > > > > +# define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_fnsave
> > > > > +# include "dl-tlsdesc-dynamic.h"
> > > > > +# undef _dl_tlsdesc_dynamic
> > > > > +# undef MINIMUM_ALIGNMENT
> > > > > +# undef USE_FNSAVE
> > > > > +
> > > > > +# define MINIMUM_ALIGNMENT 16
> > > > > +
> > > > > +# define USE_FXSAVE
> > > > > +# define STATE_SAVE_ALIGNMENT 16
> > > > > +# define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_fxsave
> > > > > +# include "dl-tlsdesc-dynamic.h"
> > > > > +# undef _dl_tlsdesc_dynamic
> > > > > +# undef USE_FXSAVE
> > > > > +
> > > > > +# define USE_XSAVE
> > > > > +# define STATE_SAVE_ALIGNMENT 64
> > > > > +# define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_xsave
> > > > > +# include "dl-tlsdesc-dynamic.h"
> > > > > +# undef _dl_tlsdesc_dynamic
> > > > > +# undef USE_XSAVE
> > > > > +
> > > > > +# define USE_XSAVEC
> > > > > +# define STATE_SAVE_ALIGNMENT 64
> > > > > +# define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_xsavec
> > > > > +# include "dl-tlsdesc-dynamic.h"
> > > > > +# undef _dl_tlsdesc_dynamic
> > > > > +# undef USE_XSAVEC
> > > > > #endif /* SHARED */
> > > > > diff --git a/sysdeps/x86/Makefile b/sysdeps/x86/Makefile
> > > > > index 73b29cc78c..5311b594af 100644
> > > > > --- a/sysdeps/x86/Makefile
> > > > > +++ b/sysdeps/x86/Makefile
> > > > > @@ -1,5 +1,5 @@
> > > > > ifeq ($(subdir),csu)
> > > > > -gen-as-const-headers += cpu-features-offsets.sym
> > > > > +gen-as-const-headers += cpu-features-offsets.sym features-offsets.sym
> > > > > endif
> > > > >
> > > > > ifeq ($(subdir),elf)
> > > > > @@ -86,6 +86,11 @@ endif
> > > > > tst-ifunc-isa-2-ENV = GLIBC_TUNABLES=glibc.cpu.hwcaps=-SSE4_2,-AVX,-AVX2,-AVX512F
> > > > > tst-ifunc-isa-2-static-ENV = $(tst-ifunc-isa-2-ENV)
> > > > > tst-hwcap-tunables-ARGS = -- $(host-test-program-cmd)
> > > > > +
> > > > > +CFLAGS-tst-gnu2-tls2.c += -msse
> > > > > +CFLAGS-tst-gnu2-tls2mod0.c += -msse2 -mtune=haswell
> > > > > +CFLAGS-tst-gnu2-tls2mod1.c += -msse2 -mtune=haswell
> > > > > +CFLAGS-tst-gnu2-tls2mod2.c += -msse2 -mtune=haswell
> > > > > endif
> > > > >
> > > > > ifeq ($(subdir),math)
> > > > > diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
> > > > > index 25e6622a79..835113b42f 100644
> > > > > --- a/sysdeps/x86/cpu-features.c
> > > > > +++ b/sysdeps/x86/cpu-features.c
> > > > > @@ -27,8 +27,13 @@
> > > > > extern void TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *)
> > > > > attribute_hidden;
> > > > >
> > > > > -#if defined SHARED && defined __x86_64__
> > > > > -# include <dl-plt-rewrite.h>
> > > > > +#if defined SHARED
> > > > > +extern void _dl_tlsdesc_dynamic_fxsave (void) attribute_hidden;
> > > > > +extern void _dl_tlsdesc_dynamic_xsave (void) attribute_hidden;
> > > > > +extern void _dl_tlsdesc_dynamic_xsavec (void) attribute_hidden;
> > > > > +
> > > > > +# ifdef __x86_64__
> > > > > +# include <dl-plt-rewrite.h>
> > > > >
> > > > > static void
> > > > > TUNABLE_CALLBACK (set_plt_rewrite) (tunable_val_t *valp)
> > > > > @@ -47,6 +52,15 @@ TUNABLE_CALLBACK (set_plt_rewrite) (tunable_val_t *valp)
> > > > > : plt_rewrite_jmp);
> > > > > }
> > > > > }
> > > > > +# else
> > > > > +extern void _dl_tlsdesc_dynamic_fnsave (void) attribute_hidden;
> > > > > +# endif
> > > > > +#endif
> > > > > +
> > > > > +#ifdef __x86_64__
> > > > > +extern void _dl_runtime_resolve_fxsave (void) attribute_hidden;
> > > > > +extern void _dl_runtime_resolve_xsave (void) attribute_hidden;
> > > > > +extern void _dl_runtime_resolve_xsavec (void) attribute_hidden;
> > > > > #endif
> > > > >
> > > > > #ifdef __LP64__
> > > > > @@ -1130,6 +1144,44 @@ no_cpuid:
> > > > > TUNABLE_CALLBACK (set_x86_shstk));
> > > > > #endif
> > > > >
> > > > > + if (GLRO(dl_x86_cpu_features).xsave_state_size != 0)
> > > > > + {
> > > > > + if (CPU_FEATURE_USABLE_P (cpu_features, XSAVEC))
> > > > > + {
> > > > > +#ifdef __x86_64__
> > > > > + GLRO(dl_x86_64_runtime_resolve) = _dl_runtime_resolve_xsavec;
> > > > > +#endif
> > > > > +#ifdef SHARED
> > > > > + GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_xsavec;
> > > > > +#endif
> > > > > + }
> > > > > + else
> > > > > + {
> > > > > +#ifdef __x86_64__
> > > > > + GLRO(dl_x86_64_runtime_resolve) = _dl_runtime_resolve_xsave;
> > > > > +#endif
> > > > > +#ifdef SHARED
> > > > > + GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_xsave;
> > > > > +#endif
> > > > > + }
> > > > > + }
> > > > > + else
> > > > > + {
> > > > > +#ifdef __x86_64__
> > > > > + GLRO(dl_x86_64_runtime_resolve) = _dl_runtime_resolve_fxsave;
> > > > > +# ifdef SHARED
> > > > > + GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_fxsave;
> > > > > +# endif
> > > > > +#else
> > > > > +# ifdef SHARED
> > > > > + if (CPU_FEATURE_USABLE_P (cpu_features, FXSR))
> > > > > + GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_fxsave;
> > > > > + else
> > > > > + GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_fnsave;
> > > > > +# endif
> > > > > +#endif
> > > > > + }
> > > > > +
> > > > > #ifdef SHARED
> > > > > # ifdef __x86_64__
> > > > > TUNABLE_GET (plt_rewrite, tunable_val_t *,
> > > > > diff --git a/sysdeps/x86/dl-procinfo.c b/sysdeps/x86/dl-procinfo.c
> > > > > index ee957b4d70..5920d4b320 100644
> > > > > --- a/sysdeps/x86/dl-procinfo.c
> > > > > +++ b/sysdeps/x86/dl-procinfo.c
> > > > > @@ -86,3 +86,19 @@ PROCINFO_CLASS const char _dl_x86_platforms[4][9]
> > > > > #else
> > > > > ,
> > > > > #endif
> > > > > +
> > > > > +#if defined SHARED && !IS_IN (ldconfig)
> > > > > +# if !defined PROCINFO_DECL
> > > > > + ._dl_x86_tlsdesc_dynamic
> > > > > +# else
> > > > > +PROCINFO_CLASS void * _dl_x86_tlsdesc_dynamic
> > > > > +# endif
> > > > > +# ifndef PROCINFO_DECL
> > > > > += NULL
> > > > > +# endif
> > > > > +# ifdef PROCINFO_DECL
> > > > > +;
> > > > > +# else
> > > > > +,
> > > > > +# endif
> > > > > +#endif
> > > > > diff --git a/sysdeps/x86_64/features-offsets.sym b/sysdeps/x86/features-offsets.sym
> > > > > similarity index 89%
> > > > > rename from sysdeps/x86_64/features-offsets.sym
> > > > > rename to sysdeps/x86/features-offsets.sym
> > > > > index 9e4be3393a..77e990c705 100644
> > > > > --- a/sysdeps/x86_64/features-offsets.sym
> > > > > +++ b/sysdeps/x86/features-offsets.sym
> > > > > @@ -3,4 +3,6 @@
> > > > > #include <ldsodefs.h>
> > > > >
> > > > > RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET offsetof (struct rtld_global_ro, _dl_x86_cpu_features)
> > > > > +#ifdef __x86_64__
> > > > > RTLD_GLOBAL_DL_X86_FEATURE_1_OFFSET offsetof (struct rtld_global, _dl_x86_feature_1)
> > > > > +#endif
> > > > > diff --git a/sysdeps/x86/sysdep.h b/sysdeps/x86/sysdep.h
> > > > > index 837fd28734..485cad9c02 100644
> > > > > --- a/sysdeps/x86/sysdep.h
> > > > > +++ b/sysdeps/x86/sysdep.h
> > > > > @@ -70,6 +70,12 @@
> > > > > | (1 << X86_XSTATE_ZMM_H_ID))
> > > > > #endif
> > > > >
> > > > > +/* States which should be saved for TLSDESC_CALL and TLS_DESC_CALL.
> > > > > + Compiler assumes that all registers, including x87 FPU stack registers,
> > > > > + are unchanged after CALL, except for EFLAGS and RAX/EAX. */
> > > > > +#define TLSDESC_CALL_STATE_SAVE_MASK \
> > > > > + (STATE_SAVE_MASK | (1 << X86_XSTATE_X87_ID))
> > > > > +
> > > > > /* Constants for bits in __x86_string_control: */
> > > > >
> > > > > /* Avoid short distance REP MOVSB. */
> > > > > diff --git a/sysdeps/x86/tst-gnu2-tls2.c b/sysdeps/x86/tst-gnu2-tls2.c
> > > > > new file mode 100644
> > > > > index 0000000000..de900a423b
> > > > > --- /dev/null
> > > > > +++ b/sysdeps/x86/tst-gnu2-tls2.c
> > > > > @@ -0,0 +1,20 @@
> > > > > +#ifndef __x86_64__
> > > > > +#include <sys/platform/x86.h>
> > > > > +
> > > > > +#define IS_SUPPORTED() CPU_FEATURE_ACTIVE (SSE2)
> > > > > +#endif
> > > > > +
> > > > > +/* Clear XMM0...XMM7 */
> > > > > +#define PREPARE_MALLOC() \
> > > > > +{ \
> > > > > + asm volatile ("xorps %%xmm0, %%xmm0" : : : "xmm0" ); \
> > > > > + asm volatile ("xorps %%xmm1, %%xmm1" : : : "xmm1" ); \
> > > > > + asm volatile ("xorps %%xmm2, %%xmm2" : : : "xmm2" ); \
> > > > > + asm volatile ("xorps %%xmm3, %%xmm3" : : : "xmm3" ); \
> > > > > + asm volatile ("xorps %%xmm4, %%xmm4" : : : "xmm4" ); \
> > > > > + asm volatile ("xorps %%xmm5, %%xmm5" : : : "xmm5" ); \
> > > > > + asm volatile ("xorps %%xmm6, %%xmm6" : : : "xmm6" ); \
> > > > > + asm volatile ("xorps %%xmm7, %%xmm7" : : : "xmm7" ); \
> > > > > +}
> > > > > +
> > > > > +#include <elf/tst-gnu2-tls2.c>
> > > > > diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile
> > > > > index 145f25e7f6..9337e95093 100644
> > > > > --- a/sysdeps/x86_64/Makefile
> > > > > +++ b/sysdeps/x86_64/Makefile
> > > > > @@ -10,7 +10,7 @@ LDFLAGS-rtld += -Wl,-z,nomark-plt
> > > > > endif
> > > > >
> > > > > ifeq ($(subdir),csu)
> > > > > -gen-as-const-headers += features-offsets.sym link-defines.sym
> > > > > +gen-as-const-headers += link-defines.sym
> > > > > endif
> > > > >
> > > > > ifeq ($(subdir),gmon)
> > > > > diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h
> > > > > index 6d605d0d32..ff5d45f7cb 100644
> > > > > --- a/sysdeps/x86_64/dl-machine.h
> > > > > +++ b/sysdeps/x86_64/dl-machine.h
> > > > > @@ -71,9 +71,6 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
> > > > > int lazy, int profile)
> > > > > {
> > > > > Elf64_Addr *got;
> > > > > - extern void _dl_runtime_resolve_fxsave (ElfW(Word)) attribute_hidden;
> > > > > - extern void _dl_runtime_resolve_xsave (ElfW(Word)) attribute_hidden;
> > > > > - extern void _dl_runtime_resolve_xsavec (ElfW(Word)) attribute_hidden;
> > > > > extern void _dl_runtime_profile_sse (ElfW(Word)) attribute_hidden;
> > > > > extern void _dl_runtime_profile_avx (ElfW(Word)) attribute_hidden;
> > > > > extern void _dl_runtime_profile_avx512 (ElfW(Word)) attribute_hidden;
> > > > > @@ -96,8 +93,6 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
> > > > > /* Identify this shared object. */
> > > > > *(ElfW(Addr) *) (got + 1) = (ElfW(Addr)) l;
> > > > >
> > > > > - const struct cpu_features* cpu_features = __get_cpu_features ();
> > > > > -
> > > > > #ifdef SHARED
> > > > > /* The got[2] entry contains the address of a function which gets
> > > > > called to get the address of a so far unresolved function and
> > > > > @@ -107,6 +102,7 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
> > > > > end in this function. */
> > > > > if (__glibc_unlikely (profile))
> > > > > {
> > > > > + const struct cpu_features* cpu_features = __get_cpu_features ();
> > > > > if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512F))
> > > > > *(ElfW(Addr) *) (got + 2) = (ElfW(Addr)) &_dl_runtime_profile_avx512;
> > > > > else if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX))
> > > > > @@ -126,15 +122,8 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
> > > > > /* This function will get called to fix up the GOT entry
> > > > > indicated by the offset on the stack, and then jump to
> > > > > the resolved address. */
> > > > > - if (MINIMUM_X86_ISA_LEVEL >= AVX_X86_ISA_LEVEL
> > > > > - || GLRO(dl_x86_cpu_features).xsave_state_size != 0)
> > > > > - *(ElfW(Addr) *) (got + 2)
> > > > > - = (CPU_FEATURE_USABLE_P (cpu_features, XSAVEC)
> > > > > - ? (ElfW(Addr)) &_dl_runtime_resolve_xsavec
> > > > > - : (ElfW(Addr)) &_dl_runtime_resolve_xsave);
> > > > > - else
> > > > > - *(ElfW(Addr) *) (got + 2)
> > > > > - = (ElfW(Addr)) &_dl_runtime_resolve_fxsave;
> > > > > + *(ElfW(Addr) *) (got + 2)
> > > > > + = (ElfW(Addr)) GLRO(dl_x86_64_runtime_resolve);
> > > > > }
> > > > > }
> > > > >
> > > > > @@ -383,7 +372,7 @@ and creates an unsatisfiable circular dependency.\n",
> > > > > {
> > > > > td->arg = _dl_make_tlsdesc_dynamic
> > > > > (sym_map, sym->st_value + reloc->r_addend);
> > > > > - td->entry = _dl_tlsdesc_dynamic;
> > > > > + td->entry = GLRO(dl_x86_tlsdesc_dynamic);
> > > > > }
> > > > > else
> > > > > # endif
> > > > > diff --git a/sysdeps/x86_64/dl-procinfo.c b/sysdeps/x86_64/dl-procinfo.c
> > > > > index 4d1d790fbb..06637a8154 100644
> > > > > --- a/sysdeps/x86_64/dl-procinfo.c
> > > > > +++ b/sysdeps/x86_64/dl-procinfo.c
> > > > > @@ -41,5 +41,21 @@
> > > > >
> > > > > #include <sysdeps/x86/dl-procinfo.c>
> > > > >
> > > > > +#if !IS_IN (ldconfig)
> > > > > +# if !defined PROCINFO_DECL && defined SHARED
> > > > > + ._dl_x86_64_runtime_resolve
> > > > > +# else
> > > > > +PROCINFO_CLASS void * _dl_x86_64_runtime_resolve
> > > > > +# endif
> > > > > +# ifndef PROCINFO_DECL
> > > > > += NULL
> > > > > +# endif
> > > > > +# if !defined SHARED || defined PROCINFO_DECL
> > > > > +;
> > > > > +# else
> > > > > +,
> > > > > +# endif
> > > > > +#endif
> > > > > +
> > > > > #undef PROCINFO_DECL
> > > > > #undef PROCINFO_CLASS
> > > > > diff --git a/sysdeps/x86_64/dl-tlsdesc-dynamic.h b/sysdeps/x86_64/dl-tlsdesc-dynamic.h
> > > > > new file mode 100644
> > > > > index 0000000000..0c2e8d5320
> > > > > --- /dev/null
> > > > > +++ b/sysdeps/x86_64/dl-tlsdesc-dynamic.h
> > > > > @@ -0,0 +1,166 @@
> > > > > +/* Thread-local storage handling in the ELF dynamic linker. x86_64 version.
> > > > > + Copyright (C) 2004-2024 Free Software Foundation, Inc.
> > > > > + This file is part of the GNU C Library.
> > > > > +
> > > > > + The GNU C Library is free software; you can redistribute it and/or
> > > > > + modify it under the terms of the GNU Lesser General Public
> > > > > + License as published by the Free Software Foundation; either
> > > > > + version 2.1 of the License, or (at your option) any later version.
> > > > > +
> > > > > + The GNU C Library is distributed in the hope that it will be useful,
> > > > > + but WITHOUT ANY WARRANTY; without even the implied warranty of
> > > > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> > > > > + Lesser General Public License for more details.
> > > > > +
> > > > > + You should have received a copy of the GNU Lesser General Public
> > > > > + License along with the GNU C Library; if not, see
> > > > > + <https://www.gnu.org/licenses/>. */
> > > > > +
> > > > > +#ifndef SECTION
> > > > > +# define SECTION(p) p
> > > > > +#endif
> > > > > +
> > > > > +#undef REGISTER_SAVE_AREA
> > > > > +#undef LOCAL_STORAGE_AREA
> > > > > +#undef BASE
> > > > > +
> > > > > +#include "dl-trampoline-state.h"
> > > > > +
> > > > > + .section SECTION(.text),"ax",@progbits
> > > > > +
> > > > > + .hidden _dl_tlsdesc_dynamic
> > > > > + .global _dl_tlsdesc_dynamic
> > > > > + .type _dl_tlsdesc_dynamic,@function
> > > > > +
> > > > > + /* %rax points to the TLS descriptor, such that 0(%rax) points to
> > > > > + _dl_tlsdesc_dynamic itself, and 8(%rax) points to a struct
> > > > > + tlsdesc_dynamic_arg object. It must return in %rax the offset
> > > > > + between the thread pointer and the object denoted by the
> > > > > + argument, without clobbering any registers.
> > > > > +
> > > > > + The assembly code that follows is a rendition of the following
> > > > > + C code, hand-optimized a little bit.
> > > > > +
> > > > > +ptrdiff_t
> > > > > +_dl_tlsdesc_dynamic (register struct tlsdesc *tdp asm ("%rax"))
> > > > > +{
> > > > > + struct tlsdesc_dynamic_arg *td = tdp->arg;
> > > > > + dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + DTV_OFFSET);
> > > > > + if (__builtin_expect (td->gen_count <= dtv[0].counter
> > > > > + && (dtv[td->tlsinfo.ti_module].pointer.val
> > > > > + != TLS_DTV_UNALLOCATED),
> > > > > + 1))
> > > > > + return dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset
> > > > > + - __thread_pointer;
> > > > > +
> > > > > + return __tls_get_addr_internal (&td->tlsinfo) - __thread_pointer;
> > > > > +}
> > > > > +*/
> > > > > + cfi_startproc
> > > > > + .align 16
> > > > > +_dl_tlsdesc_dynamic:
> > > > > + _CET_ENDBR
> > > > > + /* Preserve call-clobbered registers that we modify.
> > > > > + We need two scratch regs anyway. */
> > > > > + movq %rsi, -16(%rsp)
> > > > > + mov %fs:DTV_OFFSET, %RSI_LP
> > > > > + movq %rdi, -8(%rsp)
> > > > > + movq TLSDESC_ARG(%rax), %rdi
> > > > > + movq (%rsi), %rax
> > > > > + cmpq %rax, TLSDESC_GEN_COUNT(%rdi)
> > > > > + ja 2f
> > > > > + movq TLSDESC_MODID(%rdi), %rax
> > > > > + salq $4, %rax
> > > > > + movq (%rax,%rsi), %rax
> > > > > + cmpq $-1, %rax
> > > > > + je 2f
> > > > > + addq TLSDESC_MODOFF(%rdi), %rax
> > > > > +1:
> > > > > + movq -16(%rsp), %rsi
> > > > > + sub %fs:0, %RAX_LP
> > > > > + movq -8(%rsp), %rdi
> > > > > + ret
> > > > > +2:
> > > > > +#if DL_RUNTIME_RESOLVE_REALIGN_STACK
> > > > > + movq %rbx, -24(%rsp)
> > > > > + mov %RSP_LP, %RBX_LP
> > > > > + cfi_def_cfa_register(%rbx)
> > > > > + and $-STATE_SAVE_ALIGNMENT, %RSP_LP
> > > > > +#endif
> > > > > +#ifdef REGISTER_SAVE_AREA
> > > > > +# if DL_RUNTIME_RESOLVE_REALIGN_STACK
> > > > > + /* STATE_SAVE_OFFSET has space for 8 integer registers. But we
> > > > > + need space for RCX, RDX, RSI, RDI, R8, R9, R10 and R11, plus
> > > > > + RBX above. */
> > > > > + sub $(REGISTER_SAVE_AREA + STATE_SAVE_ALIGNMENT), %RSP_LP
> > > > > +# else
> > > > > + sub $REGISTER_SAVE_AREA, %RSP_LP
> > > > > + cfi_adjust_cfa_offset(REGISTER_SAVE_AREA)
> > > > > +# endif
> > > > > +#else
> > > > > + /* Allocate stack space of the required size to save the state. */
> > > > > + sub _rtld_local_ro+RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+XSAVE_STATE_SIZE_OFFSET(%rip), %RSP_LP
> > > > > +#endif
> > > > > + /* Besides rdi and rsi, saved above, save rcx, rdx, r8, r9,
> > > > > + r10 and r11. */
> > > > > + movq %rcx, REGISTER_SAVE_RCX(%rsp)
> > > > > + movq %rdx, REGISTER_SAVE_RDX(%rsp)
> > > > > + movq %r8, REGISTER_SAVE_R8(%rsp)
> > > > > + movq %r9, REGISTER_SAVE_R9(%rsp)
> > > > > + movq %r10, REGISTER_SAVE_R10(%rsp)
> > > > > + movq %r11, REGISTER_SAVE_R11(%rsp)
> > > > > +#ifdef USE_FXSAVE
> > > > > + fxsave STATE_SAVE_OFFSET(%rsp)
> > > > > +#else
> > > > > + movl $TLSDESC_CALL_STATE_SAVE_MASK, %eax
> > > > > + xorl %edx, %edx
> > > > > + /* Clear the XSAVE Header. */
> > > > > +# ifdef USE_XSAVE
> > > > > + movq %rdx, (STATE_SAVE_OFFSET + 512)(%rsp)
> > > > > + movq %rdx, (STATE_SAVE_OFFSET + 512 + 8)(%rsp)
> > > > > +# endif
> > > > > + movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 2)(%rsp)
> > > > > + movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 3)(%rsp)
> > > > > + movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 4)(%rsp)
> > > > > + movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 5)(%rsp)
> > > > > + movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 6)(%rsp)
> > > > > + movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 7)(%rsp)
> > > > > +# ifdef USE_XSAVE
> > > > > + xsave STATE_SAVE_OFFSET(%rsp)
> > > > > +# else
> > > > > + xsavec STATE_SAVE_OFFSET(%rsp)
> > > > > +# endif
> > > > > +#endif
> > > > > + /* %rdi already points to the tlsinfo data structure. */
> > > > > + call HIDDEN_JUMPTARGET (__tls_get_addr)
> > > > > + # Get register content back.
> > > > > +#ifdef USE_FXSAVE
> > > > > + fxrstor STATE_SAVE_OFFSET(%rsp)
> > > > > +#else
> > > > > + /* Save and retore __tls_get_addr return value stored in RAX. */
> > > > > + mov %RAX_LP, %RCX_LP
> > > > > + movl $TLSDESC_CALL_STATE_SAVE_MASK, %eax
> > > > > + xorl %edx, %edx
> > > > > + xrstor STATE_SAVE_OFFSET(%rsp)
> > > > > + mov %RCX_LP, %RAX_LP
> > > > > +#endif
> > > > > + movq REGISTER_SAVE_R11(%rsp), %r11
> > > > > + movq REGISTER_SAVE_R10(%rsp), %r10
> > > > > + movq REGISTER_SAVE_R9(%rsp), %r9
> > > > > + movq REGISTER_SAVE_R8(%rsp), %r8
> > > > > + movq REGISTER_SAVE_RDX(%rsp), %rdx
> > > > > + movq REGISTER_SAVE_RCX(%rsp), %rcx
> > > > > +#if DL_RUNTIME_RESOLVE_REALIGN_STACK
> > > > > + mov %RBX_LP, %RSP_LP
> > > > > + cfi_def_cfa_register(%rsp)
> > > > > + movq -24(%rsp), %rbx
> > > > > + cfi_restore(%rbx)
> > > > > +#else
> > > > > + add $REGISTER_SAVE_AREA, %RSP_LP
> > > > > + cfi_adjust_cfa_offset(-REGISTER_SAVE_AREA)
> > > > > +#endif
> > > > > + jmp 1b
> > > > > + cfi_endproc
> > > > > + .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
> > > > > +
> > > > > +#undef STATE_SAVE_ALIGNMENT
> > > > > diff --git a/sysdeps/x86_64/dl-tlsdesc.S b/sysdeps/x86_64/dl-tlsdesc.S
> > > > > index f748af2ece..ea69f5223a 100644
> > > > > --- a/sysdeps/x86_64/dl-tlsdesc.S
> > > > > +++ b/sysdeps/x86_64/dl-tlsdesc.S
> > > > > @@ -18,7 +18,19 @@
> > > > >
> > > > > #include <sysdep.h>
> > > > > #include <tls.h>
> > > > > +#include <cpu-features-offsets.h>
> > > > > +#include <features-offsets.h>
> > > > > #include "tlsdesc.h"
> > > > > +#include "dl-trampoline-save.h"
> > > > > +
> > > > > +/* Area on stack to save and restore registers used for parameter
> > > > > + passing when calling _dl_tlsdesc_dynamic. */
> > > > > +#define REGISTER_SAVE_RCX 0
> > > > > +#define REGISTER_SAVE_RDX (REGISTER_SAVE_RCX + 8)
> > > > > +#define REGISTER_SAVE_R8 (REGISTER_SAVE_RDX + 8)
> > > > > +#define REGISTER_SAVE_R9 (REGISTER_SAVE_R8 + 8)
> > > > > +#define REGISTER_SAVE_R10 (REGISTER_SAVE_R9 + 8)
> > > > > +#define REGISTER_SAVE_R11 (REGISTER_SAVE_R10 + 8)
> > > > >
> > > > > .text
> > > > >
> > > > > @@ -67,80 +79,24 @@ _dl_tlsdesc_undefweak:
> > > > > .size _dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak
> > > > >
> > > > > #ifdef SHARED
> > > > > - .hidden _dl_tlsdesc_dynamic
> > > > > - .global _dl_tlsdesc_dynamic
> > > > > - .type _dl_tlsdesc_dynamic,@function
> > > > > -
> > > > > - /* %rax points to the TLS descriptor, such that 0(%rax) points to
> > > > > - _dl_tlsdesc_dynamic itself, and 8(%rax) points to a struct
> > > > > - tlsdesc_dynamic_arg object. It must return in %rax the offset
> > > > > - between the thread pointer and the object denoted by the
> > > > > - argument, without clobbering any registers.
> > > > > -
> > > > > - The assembly code that follows is a rendition of the following
> > > > > - C code, hand-optimized a little bit.
> > > > > -
> > > > > -ptrdiff_t
> > > > > -_dl_tlsdesc_dynamic (register struct tlsdesc *tdp asm ("%rax"))
> > > > > -{
> > > > > - struct tlsdesc_dynamic_arg *td = tdp->arg;
> > > > > - dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + DTV_OFFSET);
> > > > > - if (__builtin_expect (td->gen_count <= dtv[0].counter
> > > > > - && (dtv[td->tlsinfo.ti_module].pointer.val
> > > > > - != TLS_DTV_UNALLOCATED),
> > > > > - 1))
> > > > > - return dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset
> > > > > - - __thread_pointer;
> > > > > -
> > > > > - return __tls_get_addr_internal (&td->tlsinfo) - __thread_pointer;
> > > > > -}
> > > > > -*/
> > > > > - cfi_startproc
> > > > > - .align 16
> > > > > -_dl_tlsdesc_dynamic:
> > > > > - _CET_ENDBR
> > > > > - /* Preserve call-clobbered registers that we modify.
> > > > > - We need two scratch regs anyway. */
> > > > > - movq %rsi, -16(%rsp)
> > > > > - mov %fs:DTV_OFFSET, %RSI_LP
> > > > > - movq %rdi, -8(%rsp)
> > > > > - movq TLSDESC_ARG(%rax), %rdi
> > > > > - movq (%rsi), %rax
> > > > > - cmpq %rax, TLSDESC_GEN_COUNT(%rdi)
> > > > > - ja .Lslow
> > > > > - movq TLSDESC_MODID(%rdi), %rax
> > > > > - salq $4, %rax
> > > > > - movq (%rax,%rsi), %rax
> > > > > - cmpq $-1, %rax
> > > > > - je .Lslow
> > > > > - addq TLSDESC_MODOFF(%rdi), %rax
> > > > > -.Lret:
> > > > > - movq -16(%rsp), %rsi
> > > > > - sub %fs:0, %RAX_LP
> > > > > - movq -8(%rsp), %rdi
> > > > > - ret
> > > > > -.Lslow:
> > > > > - /* Besides rdi and rsi, saved above, save rdx, rcx, r8, r9,
> > > > > - r10 and r11. Also, align the stack, that's off by 8 bytes. */
> > > > > - subq $72, %rsp
> > > > > - cfi_adjust_cfa_offset (72)
> > > > > - movq %rdx, 8(%rsp)
> > > > > - movq %rcx, 16(%rsp)
> > > > > - movq %r8, 24(%rsp)
> > > > > - movq %r9, 32(%rsp)
> > > > > - movq %r10, 40(%rsp)
> > > > > - movq %r11, 48(%rsp)
> > > > > - /* %rdi already points to the tlsinfo data structure. */
> > > > > - call HIDDEN_JUMPTARGET (__tls_get_addr)
> > > > > - movq 8(%rsp), %rdx
> > > > > - movq 16(%rsp), %rcx
> > > > > - movq 24(%rsp), %r8
> > > > > - movq 32(%rsp), %r9
> > > > > - movq 40(%rsp), %r10
> > > > > - movq 48(%rsp), %r11
> > > > > - addq $72, %rsp
> > > > > - cfi_adjust_cfa_offset (-72)
> > > > > - jmp .Lret
> > > > > - cfi_endproc
> > > > > - .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
> > > > > +# define USE_FXSAVE
> > > > > +# define STATE_SAVE_ALIGNMENT 16
> > > > > +# define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_fxsave
> > > > > +# include "dl-tlsdesc-dynamic.h"
> > > > > +# undef _dl_tlsdesc_dynamic
> > > > > +# undef USE_FXSAVE
> > > > > +
> > > > > +# define USE_XSAVE
> > > > > +# define STATE_SAVE_ALIGNMENT 64
> > > > > +# define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_xsave
> > > > > +# include "dl-tlsdesc-dynamic.h"
> > > > > +# undef _dl_tlsdesc_dynamic
> > > > > +# undef USE_XSAVE
> > > > > +
> > > > > +# define USE_XSAVEC
> > > > > +# define STATE_SAVE_ALIGNMENT 64
> > > > > +# define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_xsavec
> > > > > +# include "dl-tlsdesc-dynamic.h"
> > > > > +# undef _dl_tlsdesc_dynamic
> > > > > +# undef USE_XSAVEC
> > > > > #endif /* SHARED */
> > > > > diff --git a/sysdeps/x86_64/dl-trampoline-save.h b/sysdeps/x86_64/dl-trampoline-save.h
> > > > > new file mode 100644
> > > > > index 0000000000..84eac4a8ac
> > > > > --- /dev/null
> > > > > +++ b/sysdeps/x86_64/dl-trampoline-save.h
> > > > > @@ -0,0 +1,34 @@
> > > > > +/* x86-64 PLT trampoline register save macros.
> > > > > + Copyright (C) 2024 Free Software Foundation, Inc.
> > > > > + This file is part of the GNU C Library.
> > > > > +
> > > > > + The GNU C Library is free software; you can redistribute it and/or
> > > > > + modify it under the terms of the GNU Lesser General Public
> > > > > + License as published by the Free Software Foundation; either
> > > > > + version 2.1 of the License, or (at your option) any later version.
> > > > > +
> > > > > + The GNU C Library is distributed in the hope that it will be useful,
> > > > > + but WITHOUT ANY WARRANTY; without even the implied warranty of
> > > > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> > > > > + Lesser General Public License for more details.
> > > > > +
> > > > > + You should have received a copy of the GNU Lesser General Public
> > > > > + License along with the GNU C Library; if not, see
> > > > > + <https://www.gnu.org/licenses/>. */
> > > > > +
> > > > > +#ifndef DL_STACK_ALIGNMENT
> > > > > +/* Due to GCC bug:
> > > > > +
> > > > > + https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58066
> > > > > +
> > > > > + __tls_get_addr may be called with 8-byte stack alignment. Although
> > > > > + this bug has been fixed in GCC 4.9.4, 5.3 and 6, we can't assume
> > > > > + that stack will be always aligned at 16 bytes. */
> > > > > +# define DL_STACK_ALIGNMENT 8
> > > > > +#endif
> > > > > +
> > > > > +/* True if _dl_runtime_resolve should align stack for STATE_SAVE or align
> > > > > + stack to 16 bytes before calling _dl_fixup. */
> > > > > +#define DL_RUNTIME_RESOLVE_REALIGN_STACK \
> > > > > + (STATE_SAVE_ALIGNMENT > DL_STACK_ALIGNMENT \
> > > > > + || 16 > DL_STACK_ALIGNMENT)
> > > > > diff --git a/sysdeps/x86_64/dl-trampoline-state.h b/sysdeps/x86_64/dl-trampoline-state.h
> > > > > new file mode 100644
> > > > > index 0000000000..575f120797
> > > > > --- /dev/null
> > > > > +++ b/sysdeps/x86_64/dl-trampoline-state.h
> > > > > @@ -0,0 +1,51 @@
> > > > > +/* x86-64 PLT dl-trampoline state macros.
> > > > > + Copyright (C) 2024 Free Software Foundation, Inc.
> > > > > + This file is part of the GNU C Library.
> > > > > +
> > > > > + The GNU C Library is free software; you can redistribute it and/or
> > > > > + modify it under the terms of the GNU Lesser General Public
> > > > > + License as published by the Free Software Foundation; either
> > > > > + version 2.1 of the License, or (at your option) any later version.
> > > > > +
> > > > > + The GNU C Library is distributed in the hope that it will be useful,
> > > > > + but WITHOUT ANY WARRANTY; without even the implied warranty of
> > > > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> > > > > + Lesser General Public License for more details.
> > > > > +
> > > > > + You should have received a copy of the GNU Lesser General Public
> > > > > + License along with the GNU C Library; if not, see
> > > > > + <https://www.gnu.org/licenses/>. */
> > > > > +
> > > > > +#if (STATE_SAVE_ALIGNMENT % 16) != 0
> > > > > +# error STATE_SAVE_ALIGNMENT must be multiple of 16
> > > > > +#endif
> > > > > +
> > > > > +#if (STATE_SAVE_OFFSET % STATE_SAVE_ALIGNMENT) != 0
> > > > > +# error STATE_SAVE_OFFSET must be multiple of STATE_SAVE_ALIGNMENT
> > > > > +#endif
> > > > > +
> > > > > +#if DL_RUNTIME_RESOLVE_REALIGN_STACK
> > > > > +/* Local stack area before jumping to function address: RBX. */
> > > > > +# define LOCAL_STORAGE_AREA 8
> > > > > +# define BASE rbx
> > > > > +# ifdef USE_FXSAVE
> > > > > +/* Use fxsave to save XMM registers. */
> > > > > +# define REGISTER_SAVE_AREA (512 + STATE_SAVE_OFFSET)
> > > > > +# if (REGISTER_SAVE_AREA % 16) != 0
> > > > > +# error REGISTER_SAVE_AREA must be multiple of 16
> > > > > +# endif
> > > > > +# endif
> > > > > +#else
> > > > > +# ifndef USE_FXSAVE
> > > > > +# error USE_FXSAVE must be defined
> > > > > +# endif
> > > > > +/* Use fxsave to save XMM registers. */
> > > > > +# define REGISTER_SAVE_AREA (512 + STATE_SAVE_OFFSET + 8)
> > > > > +/* Local stack area before jumping to function address: All saved
> > > > > + registers. */
> > > > > +# define LOCAL_STORAGE_AREA REGISTER_SAVE_AREA
> > > > > +# define BASE rsp
> > > > > +# if (REGISTER_SAVE_AREA % 16) != 8
> > > > > +# error REGISTER_SAVE_AREA must be odd multiple of 8
> > > > > +# endif
> > > > > +#endif
> > > > > diff --git a/sysdeps/x86_64/dl-trampoline.S b/sysdeps/x86_64/dl-trampoline.S
> > > > > index b2e7e0f69b..87c5137837 100644
> > > > > --- a/sysdeps/x86_64/dl-trampoline.S
> > > > > +++ b/sysdeps/x86_64/dl-trampoline.S
> > > > > @@ -22,25 +22,7 @@
> > > > > #include <features-offsets.h>
> > > > > #include <link-defines.h>
> > > > > #include <isa-level.h>
> > > > > -
> > > > > -#ifndef DL_STACK_ALIGNMENT
> > > > > -/* Due to GCC bug:
> > > > > -
> > > > > - https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58066
> > > > > -
> > > > > - __tls_get_addr may be called with 8-byte stack alignment. Although
> > > > > - this bug has been fixed in GCC 4.9.4, 5.3 and 6, we can't assume
> > > > > - that stack will be always aligned at 16 bytes. We use unaligned
> > > > > - 16-byte move to load and store SSE registers, which has no penalty
> > > > > - on modern processors if stack is 16-byte aligned. */
> > > > > -# define DL_STACK_ALIGNMENT 8
> > > > > -#endif
> > > > > -
> > > > > -/* True if _dl_runtime_resolve should align stack for STATE_SAVE or align
> > > > > - stack to 16 bytes before calling _dl_fixup. */
> > > > > -#define DL_RUNTIME_RESOLVE_REALIGN_STACK \
> > > > > - (STATE_SAVE_ALIGNMENT > DL_STACK_ALIGNMENT \
> > > > > - || 16 > DL_STACK_ALIGNMENT)
> > > > > +#include "dl-trampoline-save.h"
> > > > >
> > > > > /* Area on stack to save and restore registers used for parameter
> > > > > passing when calling _dl_fixup. */
> > > > > diff --git a/sysdeps/x86_64/dl-trampoline.h b/sysdeps/x86_64/dl-trampoline.h
> > > > > index f55c6ea040..d9ccfb40d4 100644
> > > > > --- a/sysdeps/x86_64/dl-trampoline.h
> > > > > +++ b/sysdeps/x86_64/dl-trampoline.h
> > > > > @@ -27,39 +27,7 @@
> > > > > # undef LOCAL_STORAGE_AREA
> > > > > # undef BASE
> > > > >
> > > > > -# if (STATE_SAVE_ALIGNMENT % 16) != 0
> > > > > -# error STATE_SAVE_ALIGNMENT must be multiple of 16
> > > > > -# endif
> > > > > -
> > > > > -# if (STATE_SAVE_OFFSET % STATE_SAVE_ALIGNMENT) != 0
> > > > > -# error STATE_SAVE_OFFSET must be multiple of STATE_SAVE_ALIGNMENT
> > > > > -# endif
> > > > > -
> > > > > -# if DL_RUNTIME_RESOLVE_REALIGN_STACK
> > > > > -/* Local stack area before jumping to function address: RBX. */
> > > > > -# define LOCAL_STORAGE_AREA 8
> > > > > -# define BASE rbx
> > > > > -# ifdef USE_FXSAVE
> > > > > -/* Use fxsave to save XMM registers. */
> > > > > -# define REGISTER_SAVE_AREA (512 + STATE_SAVE_OFFSET)
> > > > > -# if (REGISTER_SAVE_AREA % 16) != 0
> > > > > -# error REGISTER_SAVE_AREA must be multiple of 16
> > > > > -# endif
> > > > > -# endif
> > > > > -# else
> > > > > -# ifndef USE_FXSAVE
> > > > > -# error USE_FXSAVE must be defined
> > > > > -# endif
> > > > > -/* Use fxsave to save XMM registers. */
> > > > > -# define REGISTER_SAVE_AREA (512 + STATE_SAVE_OFFSET + 8)
> > > > > -/* Local stack area before jumping to function address: All saved
> > > > > - registers. */
> > > > > -# define LOCAL_STORAGE_AREA REGISTER_SAVE_AREA
> > > > > -# define BASE rsp
> > > > > -# if (REGISTER_SAVE_AREA % 16) != 8
> > > > > -# error REGISTER_SAVE_AREA must be odd multiple of 8
> > > > > -# endif
> > > > > -# endif
> > > > > +# include "dl-trampoline-state.h"
> > > > >
> > > > > .globl _dl_runtime_resolve
> > > > > .hidden _dl_runtime_resolve
> > > > > --
> > > > > 2.43.2
> > > > >
> > > >
> > > > This still LGTM.
> > > > Feel free to add my reviewed-by tag to future posts of the patch.
> > > > Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
> > >
> > > Please wait for community consensus before pushing
> >
> > It was agreed that I should send out the v10 patch with xfail on
> > the new test. Someone just needs to double check that my
> > v10 patch does it.
> Okay, give it 24 hour. If you no further concerns are brought up
> think its okay to push.
OK to backport it to release branches?
--
H.J.
On Sat, Feb 24, 2024 at 11:01 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> On Fri, Feb 16, 2024 at 9:17 AM H.J. Lu <hjl.tools@gmail.com> wrote:
> >
> > Add APX registers to STATE_SAVE_MASK so that APX registers are saved in
> > ld.so trampoline. This fixes BZ #31371.
> >
> > Also update STATE_SAVE_OFFSET and STATE_SAVE_MASK for i386 which will
> > be used by i386 _dl_tlsdesc_dynamic.
> > ---
> > sysdeps/x86/sysdep.h | 52 +++++++++++++++++++++++++++++++++++++++-----
> > 1 file changed, 46 insertions(+), 6 deletions(-)
> >
> > diff --git a/sysdeps/x86/sysdep.h b/sysdeps/x86/sysdep.h
> > index 85d0a8c943..837fd28734 100644
> > --- a/sysdeps/x86/sysdep.h
> > +++ b/sysdeps/x86/sysdep.h
> > @@ -21,14 +21,54 @@
> >
> > #include <sysdeps/generic/sysdep.h>
> >
> > +/* The extended state feature IDs in the state component bitmap. */
> > +#define X86_XSTATE_X87_ID 0
> > +#define X86_XSTATE_SSE_ID 1
> > +#define X86_XSTATE_AVX_ID 2
> > +#define X86_XSTATE_BNDREGS_ID 3
> > +#define X86_XSTATE_BNDCFG_ID 4
> > +#define X86_XSTATE_K_ID 5
> > +#define X86_XSTATE_ZMM_H_ID 6
> > +#define X86_XSTATE_ZMM_ID 7
> > +#define X86_XSTATE_PKRU_ID 9
> > +#define X86_XSTATE_TILECFG_ID 17
> > +#define X86_XSTATE_TILEDATA_ID 18
> > +#define X86_XSTATE_APX_F_ID 19
> > +
> > +#ifdef __x86_64__
> > /* Offset for fxsave/xsave area used by _dl_runtime_resolve. Also need
> > space to preserve RCX, RDX, RSI, RDI, R8, R9 and RAX. It must be
> > - aligned to 16 bytes for fxsave and 64 bytes for xsave. */
> > -#define STATE_SAVE_OFFSET (8 * 7 + 8)
> > -
> > -/* Save SSE, AVX, AVX512, mask and bound registers. */
> > -#define STATE_SAVE_MASK \
> > - ((1 << 1) | (1 << 2) | (1 << 3) | (1 << 5) | (1 << 6) | (1 << 7))
> > + aligned to 16 bytes for fxsave and 64 bytes for xsave.
> > +
> > + NB: Is is non-zero because of the 128-byte red-zone. Some registers
> > + are saved on stack without adjusting stack pointer first. When we
> > + update stack pointer to allocate more space, we need to take the
> > + red-zone into account. */
> > +# define STATE_SAVE_OFFSET (8 * 7 + 8)
> > +
> > +/* Save SSE, AVX, AVX512, mask, bound and APX registers. Bound and APX
> > + registers are mutually exclusive. */
> > +# define STATE_SAVE_MASK \
> > + ((1 << X86_XSTATE_SSE_ID) \
> > + | (1 << X86_XSTATE_AVX_ID) \
> > + | (1 << X86_XSTATE_BNDREGS_ID) \
> > + | (1 << X86_XSTATE_K_ID) \
> > + | (1 << X86_XSTATE_ZMM_H_ID) \
> > + | (1 << X86_XSTATE_ZMM_ID) \
> > + | (1 << X86_XSTATE_APX_F_ID))
> > +#else
> > +/* Offset for fxsave/xsave area used by _dl_tlsdesc_dynamic. Since i386
> > + doesn't have red-zone, use 0 here. */
> > +# define STATE_SAVE_OFFSET 0
> > +
> > +/* Save SSE, AVX, AXV512, mask and bound registers. */
> > +# define STATE_SAVE_MASK \
> > + ((1 << X86_XSTATE_SSE_ID) \
> > + | (1 << X86_XSTATE_AVX_ID) \
> > + | (1 << X86_XSTATE_BNDREGS_ID) \
> > + | (1 << X86_XSTATE_K_ID) \
> > + | (1 << X86_XSTATE_ZMM_H_ID))
> > +#endif
> >
> > /* Constants for bits in __x86_string_control: */
> >
> > --
> > 2.43.0
> >
>
>
> LGTM.
> Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
OK to backport it to release branches?
Thanks.
--
H.J.
Per recommendation from Siddhesh, I've cherry-picked this patch to 2.39 branch (with c62b6265a641aaec4b0fa1b3fe622c2edf9c3821 fix squashed into it). This isn't ABI changing and improves stdbit.h not just to types which weren't handled before, but also avoids using the macro arguments multiple times: The following patch uses the GCC 14 __builtin_stdc_* builtins in stdbit.h for the type-generic macros, so that when compiled with GCC 14 or later, it supports not just 8/16/32/64-bit unsigned integers, but also 128-bit (if target supports them) and unsigned _BitInt (any supported precision). And so that the macros don't expand arguments multiple times and can be evaluated in constant expressions. The new testcase is gcc's gcc/testsuite/gcc.dg/builtin-stdc-bit-1.c adjusted to test stdbit.h and the type-generic macros in there instead of the builtins and adjusted to use glibc test framework rather than gcc style tests with __builtin_abort (). Signed-off-by: Jakub Jelinek <jakub@redhat.com> Reviewed-by: Joseph Myers <josmyers@redhat.com> (cherry picked from commit da89496337b97e6a2aaf1e81d55cf998f6db1070) --- manual/stdbit.texi | 8 +- stdlib/Makefile | 1 + stdlib/stdbit.h | 84 +++- stdlib/tst-stdbit-builtins.c | 778 +++++++++++++++++++++++++++++++++++ 4 files changed, 856 insertions(+), 15 deletions(-) create mode 100644 stdlib/tst-stdbit-builtins.c diff --git a/manual/stdbit.texi b/manual/stdbit.texi index fe41c671d8..6c75ed9a20 100644 --- a/manual/stdbit.texi +++ b/manual/stdbit.texi @@ -32,7 +32,13 @@ and @code{unsigned long long int}. In addition, there is a corresponding type-generic macro (not listed below), named the same as the functions but without any suffix such as @samp{_uc}. The type-generic macro can only be used with an argument of an unsigned -integer type with a width of 8, 16, 32 or 64 bits. +integer type with a width of 8, 16, 32 or 64 bits, or when using +a compiler with support for +@uref{https://gcc.gnu.org/onlinedocs/gcc/Other-Builtins.html,@code{__builtin_stdc_bit_ceil}}, +etc.@:, built-in functions such as GCC 14.1 or later +any unsigned integer type those built-in functions support. +In GCC 14.1 that includes support for @code{unsigned __int128} and +@code{unsigned _BitInt(@var{n})} if supported by the target. @deftypefun {unsigned int} stdc_leading_zeros_uc (unsigned char @var{x}) @deftypefunx {unsigned int} stdc_leading_zeros_us (unsigned short @var{x}) diff --git a/stdlib/Makefile b/stdlib/Makefile index d587f054d1..9898cc5d8a 100644 --- a/stdlib/Makefile +++ b/stdlib/Makefile @@ -308,6 +308,7 @@ tests := \ tst-setcontext10 \ tst-setcontext11 \ tst-stdbit-Wconversion \ + tst-stdbit-builtins \ tst-stdc_bit_ceil \ tst-stdc_bit_floor \ tst-stdc_bit_width \ diff --git a/stdlib/stdbit.h b/stdlib/stdbit.h index f334eb174d..2801590c63 100644 --- a/stdlib/stdbit.h +++ b/stdlib/stdbit.h @@ -64,9 +64,13 @@ extern unsigned int stdc_leading_zeros_ul (unsigned long int __x) __extension__ extern unsigned int stdc_leading_zeros_ull (unsigned long long int __x) __THROW __attribute_const__; -#define stdc_leading_zeros(x) \ +#if __glibc_has_builtin (__builtin_stdc_leading_zeros) +# define stdc_leading_zeros(x) (__builtin_stdc_leading_zeros (x)) +#else +# define stdc_leading_zeros(x) \ (stdc_leading_zeros_ull (x) \ - (unsigned int) (8 * (sizeof (0ULL) - sizeof (x)))) +#endif #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_clzll) static __always_inline unsigned int @@ -116,9 +120,13 @@ extern unsigned int stdc_leading_ones_ul (unsigned long int __x) __extension__ extern unsigned int stdc_leading_ones_ull (unsigned long long int __x) __THROW __attribute_const__; -#define stdc_leading_ones(x) \ +#if __glibc_has_builtin (__builtin_stdc_leading_ones) +# define stdc_leading_ones(x) (__builtin_stdc_leading_ones (x)) +#else +# define stdc_leading_ones(x) \ (stdc_leading_ones_ull ((unsigned long long int) (x) \ << 8 * (sizeof (0ULL) - sizeof (x)))) +#endif #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_clzll) static __always_inline unsigned int @@ -168,11 +176,15 @@ extern unsigned int stdc_trailing_zeros_ul (unsigned long int __x) __extension__ extern unsigned int stdc_trailing_zeros_ull (unsigned long long int __x) __THROW __attribute_const__; -#define stdc_trailing_zeros(x) \ +#if __glibc_has_builtin (__builtin_stdc_trailing_zeros) +# define stdc_trailing_zeros(x) (__builtin_stdc_trailing_zeros (x)) +#else +# define stdc_trailing_zeros(x) \ (sizeof (x) == 8 ? stdc_trailing_zeros_ull (x) \ : sizeof (x) == 4 ? stdc_trailing_zeros_ui (x) \ : sizeof (x) == 2 ? stdc_trailing_zeros_us (__pacify_uint16 (x)) \ : stdc_trailing_zeros_uc (__pacify_uint8 (x))) +#endif #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_ctzll) static __always_inline unsigned int @@ -222,7 +234,11 @@ extern unsigned int stdc_trailing_ones_ul (unsigned long int __x) __extension__ extern unsigned int stdc_trailing_ones_ull (unsigned long long int __x) __THROW __attribute_const__; -#define stdc_trailing_ones(x) (stdc_trailing_ones_ull (x)) +#if __glibc_has_builtin (__builtin_stdc_trailing_ones) +# define stdc_trailing_ones(x) (__builtin_stdc_trailing_ones (x)) +#else +# define stdc_trailing_ones(x) (stdc_trailing_ones_ull (x)) +#endif #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_ctzll) static __always_inline unsigned int @@ -272,11 +288,15 @@ extern unsigned int stdc_first_leading_zero_ul (unsigned long int __x) __extension__ extern unsigned int stdc_first_leading_zero_ull (unsigned long long int __x) __THROW __attribute_const__; -#define stdc_first_leading_zero(x) \ +#if __glibc_has_builtin (__builtin_stdc_first_leading_zero) +# define stdc_first_leading_zero(x) (__builtin_stdc_first_leading_zero (x)) +#else +# define stdc_first_leading_zero(x) \ (sizeof (x) == 8 ? stdc_first_leading_zero_ull (x) \ : sizeof (x) == 4 ? stdc_first_leading_zero_ui (x) \ : sizeof (x) == 2 ? stdc_first_leading_zero_us (__pacify_uint16 (x)) \ : stdc_first_leading_zero_uc (__pacify_uint8 (x))) +#endif #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_clzll) static __always_inline unsigned int @@ -326,11 +346,15 @@ extern unsigned int stdc_first_leading_one_ul (unsigned long int __x) __extension__ extern unsigned int stdc_first_leading_one_ull (unsigned long long int __x) __THROW __attribute_const__; -#define stdc_first_leading_one(x) \ +#if __glibc_has_builtin (__builtin_stdc_first_leading_one) +# define stdc_first_leading_one(x) (__builtin_stdc_first_leading_one (x)) +#else +# define stdc_first_leading_one(x) \ (sizeof (x) == 8 ? stdc_first_leading_one_ull (x) \ : sizeof (x) == 4 ? stdc_first_leading_one_ui (x) \ : sizeof (x) == 2 ? stdc_first_leading_one_us (__pacify_uint16 (x)) \ : stdc_first_leading_one_uc (__pacify_uint8 (x))) +#endif #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_clzll) static __always_inline unsigned int @@ -380,11 +404,15 @@ extern unsigned int stdc_first_trailing_zero_ul (unsigned long int __x) __extension__ extern unsigned int stdc_first_trailing_zero_ull (unsigned long long int __x) __THROW __attribute_const__; -#define stdc_first_trailing_zero(x) \ +#if __glibc_has_builtin (__builtin_stdc_first_trailing_zero) +# define stdc_first_trailing_zero(x) (__builtin_stdc_first_trailing_zero (x)) +#else +# define stdc_first_trailing_zero(x) \ (sizeof (x) == 8 ? stdc_first_trailing_zero_ull (x) \ : sizeof (x) == 4 ? stdc_first_trailing_zero_ui (x) \ : sizeof (x) == 2 ? stdc_first_trailing_zero_us (__pacify_uint16 (x)) \ : stdc_first_trailing_zero_uc (__pacify_uint8 (x))) +#endif #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_ctzll) static __always_inline unsigned int @@ -434,11 +462,15 @@ extern unsigned int stdc_first_trailing_one_ul (unsigned long int __x) __extension__ extern unsigned int stdc_first_trailing_one_ull (unsigned long long int __x) __THROW __attribute_const__; -#define stdc_first_trailing_one(x) \ +#if __glibc_has_builtin (__builtin_stdc_first_trailing_one) +# define stdc_first_trailing_one(x) (__builtin_stdc_first_trailing_one (x)) +#else +# define stdc_first_trailing_one(x) \ (sizeof (x) == 8 ? stdc_first_trailing_one_ull (x) \ : sizeof (x) == 4 ? stdc_first_trailing_one_ui (x) \ : sizeof (x) == 2 ? stdc_first_trailing_one_us (__pacify_uint16 (x)) \ : stdc_first_trailing_one_uc (__pacify_uint8 (x))) +#endif #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_ctzll) static __always_inline unsigned int @@ -488,9 +520,13 @@ extern unsigned int stdc_count_zeros_ul (unsigned long int __x) __extension__ extern unsigned int stdc_count_zeros_ull (unsigned long long int __x) __THROW __attribute_const__; -#define stdc_count_zeros(x) \ +#if __glibc_has_builtin (__builtin_stdc_count_zeros) +# define stdc_count_zeros(x) (__builtin_stdc_count_zeros (x)) +#else +# define stdc_count_zeros(x) \ (stdc_count_zeros_ull (x) \ - (unsigned int) (8 * (sizeof (0ULL) - sizeof (x)))) +#endif #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_popcountll) static __always_inline unsigned int @@ -540,7 +576,11 @@ extern unsigned int stdc_count_ones_ul (unsigned long int __x) __extension__ extern unsigned int stdc_count_ones_ull (unsigned long long int __x) __THROW __attribute_const__; -#define stdc_count_ones(x) (stdc_count_ones_ull (x)) +#if __glibc_has_builtin (__builtin_stdc_count_ones) +# define stdc_count_ones(x) (__builtin_stdc_count_ones (x)) +#else +# define stdc_count_ones(x) (stdc_count_ones_ull (x)) +#endif #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_popcountll) static __always_inline unsigned int @@ -590,10 +630,14 @@ extern bool stdc_has_single_bit_ul (unsigned long int __x) __extension__ extern bool stdc_has_single_bit_ull (unsigned long long int __x) __THROW __attribute_const__; -#define stdc_has_single_bit(x) \ +#if __glibc_has_builtin (__builtin_stdc_has_single_bit) +# define stdc_has_single_bit(x) (__builtin_stdc_has_single_bit (x)) +#else +# define stdc_has_single_bit(x) \ ((bool) (sizeof (x) <= sizeof (unsigned int) \ ? stdc_has_single_bit_ui (x) \ : stdc_has_single_bit_ull (x))) +#endif static __always_inline bool __hsb64_inline (uint64_t __x) @@ -641,7 +685,11 @@ extern unsigned int stdc_bit_width_ul (unsigned long int __x) __extension__ extern unsigned int stdc_bit_width_ull (unsigned long long int __x) __THROW __attribute_const__; -#define stdc_bit_width(x) (stdc_bit_width_ull (x)) +#if __glibc_has_builtin (__builtin_stdc_bit_width) +# define stdc_bit_width(x) (__builtin_stdc_bit_width (x)) +#else +# define stdc_bit_width(x) (stdc_bit_width_ull (x)) +#endif #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_clzll) static __always_inline unsigned int @@ -691,7 +739,11 @@ extern unsigned long int stdc_bit_floor_ul (unsigned long int __x) __extension__ extern unsigned long long int stdc_bit_floor_ull (unsigned long long int __x) __THROW __attribute_const__; -#define stdc_bit_floor(x) ((__typeof (x)) stdc_bit_floor_ull (x)) +#if __glibc_has_builtin (__builtin_stdc_bit_floor) +# define stdc_bit_floor(x) (__builtin_stdc_bit_floor (x)) +#else +# define stdc_bit_floor(x) ((__typeof (x)) stdc_bit_floor_ull (x)) +#endif #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_clzll) static __always_inline uint64_t @@ -743,7 +795,11 @@ extern unsigned long int stdc_bit_ceil_ul (unsigned long int __x) __extension__ extern unsigned long long int stdc_bit_ceil_ull (unsigned long long int __x) __THROW __attribute_const__; -#define stdc_bit_ceil(x) ((__typeof (x)) stdc_bit_ceil_ull (x)) +#if __glibc_has_builtin (__builtin_stdc_bit_ceil) +# define stdc_bit_ceil(x) (__builtin_stdc_bit_ceil (x)) +#else +# define stdc_bit_ceil(x) ((__typeof (x)) stdc_bit_ceil_ull (x)) +#endif #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_clzll) static __always_inline uint64_t diff --git a/stdlib/tst-stdbit-builtins.c b/stdlib/tst-stdbit-builtins.c new file mode 100644 index 0000000000..536841ca8a --- /dev/null +++ b/stdlib/tst-stdbit-builtins.c @@ -0,0 +1,778 @@ +/* Test <stdbit.h> type-generic macros with compiler __builtin_stdc_* support. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <stdbit.h> +#include <limits.h> +#include <support/check.h> + +#if __glibc_has_builtin (__builtin_stdc_leading_zeros) \ + && __glibc_has_builtin (__builtin_stdc_leading_ones) \ + && __glibc_has_builtin (__builtin_stdc_trailing_zeros) \ + && __glibc_has_builtin (__builtin_stdc_trailing_ones) \ + && __glibc_has_builtin (__builtin_stdc_first_leading_zero) \ + && __glibc_has_builtin (__builtin_stdc_first_leading_one) \ + && __glibc_has_builtin (__builtin_stdc_first_trailing_zero) \ + && __glibc_has_builtin (__builtin_stdc_first_trailing_one) \ + && __glibc_has_builtin (__builtin_stdc_count_zeros) \ + && __glibc_has_builtin (__builtin_stdc_count_ones) \ + && __glibc_has_builtin (__builtin_stdc_has_single_bit) \ + && __glibc_has_builtin (__builtin_stdc_bit_width) \ + && __glibc_has_builtin (__builtin_stdc_bit_floor) \ + && __glibc_has_builtin (__builtin_stdc_bit_ceil) + +# if !defined (BITINT_MAXWIDTH) && defined (__BITINT_MAXWIDTH__) +# define BITINT_MAXWIDTH __BITINT_MAXWIDTH__ +# endif + +typedef unsigned char uc; +typedef unsigned short us; +typedef unsigned int ui; +typedef unsigned long int ul; +typedef unsigned long long int ull; + +# define expr_has_type(e, t) _Generic (e, default : 0, t : 1) + +static int +do_test (void) +{ + TEST_COMPARE (stdc_leading_zeros ((uc) 0), CHAR_BIT); + TEST_COMPARE (expr_has_type (stdc_leading_zeros ((uc) 0), ui), 1); + TEST_COMPARE (stdc_leading_zeros ((us) 0), sizeof (short) * CHAR_BIT); + TEST_COMPARE (expr_has_type (stdc_leading_zeros ((us) 0), ui), 1); + TEST_COMPARE (stdc_leading_zeros (0U), sizeof (int) * CHAR_BIT); + TEST_COMPARE (expr_has_type (stdc_leading_zeros (0U), ui), 1); + TEST_COMPARE (stdc_leading_zeros (0UL), sizeof (long int) * CHAR_BIT); + TEST_COMPARE (expr_has_type (stdc_leading_zeros (0UL), ui), 1); + TEST_COMPARE (stdc_leading_zeros (0ULL), sizeof (long long int) * CHAR_BIT); + TEST_COMPARE (expr_has_type (stdc_leading_zeros (0ULL), ui), 1); + TEST_COMPARE (stdc_leading_zeros ((uc) ~0U), 0); + TEST_COMPARE (stdc_leading_zeros ((us) ~0U), 0); + TEST_COMPARE (stdc_leading_zeros (~0U), 0); + TEST_COMPARE (stdc_leading_zeros (~0UL), 0); + TEST_COMPARE (stdc_leading_zeros (~0ULL), 0); + TEST_COMPARE (stdc_leading_zeros ((uc) 3), CHAR_BIT - 2); + TEST_COMPARE (stdc_leading_zeros ((us) 9), sizeof (short) * CHAR_BIT - 4); + TEST_COMPARE (stdc_leading_zeros (34U), sizeof (int) * CHAR_BIT - 6); + TEST_COMPARE (stdc_leading_zeros (130UL), sizeof (long int) * CHAR_BIT - 8); + TEST_COMPARE (stdc_leading_zeros (512ULL), + sizeof (long long int) * CHAR_BIT - 10); + TEST_COMPARE (stdc_leading_ones ((uc) 0), 0); + TEST_COMPARE (expr_has_type (stdc_leading_ones ((uc) 0), ui), 1); + TEST_COMPARE (stdc_leading_ones ((us) 0), 0); + TEST_COMPARE (expr_has_type (stdc_leading_ones ((us) 0), ui), 1); + TEST_COMPARE (stdc_leading_ones (0U), 0); + TEST_COMPARE (expr_has_type (stdc_leading_ones (0U), ui), 1); + TEST_COMPARE (stdc_leading_ones (0UL), 0); + TEST_COMPARE (expr_has_type (stdc_leading_ones (0UL), ui), 1); + TEST_COMPARE (stdc_leading_ones (0ULL), 0); + TEST_COMPARE (expr_has_type (stdc_leading_ones (0ULL), ui), 1); + TEST_COMPARE (stdc_leading_ones ((uc) ~0U), CHAR_BIT); + TEST_COMPARE (stdc_leading_ones ((us) ~0U), sizeof (short) * CHAR_BIT); + TEST_COMPARE (stdc_leading_ones (~0U), sizeof (int) * CHAR_BIT); + TEST_COMPARE (stdc_leading_ones (~0UL), sizeof (long int) * CHAR_BIT); + TEST_COMPARE (stdc_leading_ones (~0ULL), sizeof (long long int) * CHAR_BIT); + TEST_COMPARE (stdc_leading_ones ((uc) ~3), CHAR_BIT - 2); + TEST_COMPARE (stdc_leading_ones ((us) ~9), sizeof (short) * CHAR_BIT - 4); + TEST_COMPARE (stdc_leading_ones (~34U), sizeof (int) * CHAR_BIT - 6); + TEST_COMPARE (stdc_leading_ones (~130UL), sizeof (long int) * CHAR_BIT - 8); + TEST_COMPARE (stdc_leading_ones (~512ULL), + sizeof (long long int) * CHAR_BIT - 10); + TEST_COMPARE (stdc_trailing_zeros ((uc) 0), CHAR_BIT); + TEST_COMPARE (expr_has_type (stdc_trailing_zeros ((uc) 0), ui), 1); + TEST_COMPARE (stdc_trailing_zeros ((us) 0), sizeof (short) * CHAR_BIT); + TEST_COMPARE (expr_has_type (stdc_trailing_zeros ((us) 0), ui), 1); + TEST_COMPARE (stdc_trailing_zeros (0U), sizeof (int) * CHAR_BIT); + TEST_COMPARE (expr_has_type (stdc_trailing_zeros (0U), ui), 1); + TEST_COMPARE (stdc_trailing_zeros (0UL), sizeof (long int) * CHAR_BIT); + TEST_COMPARE (expr_has_type (stdc_trailing_zeros (0UL), ui), 1); + TEST_COMPARE (stdc_trailing_zeros (0ULL), sizeof (long long int) * CHAR_BIT); + TEST_COMPARE (expr_has_type (stdc_trailing_zeros (0ULL), ui), 1); + TEST_COMPARE (stdc_trailing_zeros ((uc) ~0U), 0); + TEST_COMPARE (stdc_trailing_zeros ((us) ~0U), 0); + TEST_COMPARE (stdc_trailing_zeros (~0U), 0); + TEST_COMPARE (stdc_trailing_zeros (~0UL), 0); + TEST_COMPARE (stdc_trailing_zeros (~0ULL), 0); + TEST_COMPARE (stdc_trailing_zeros ((uc) 2), 1); + TEST_COMPARE (stdc_trailing_zeros ((us) 24), 3); + TEST_COMPARE (stdc_trailing_zeros (32U), 5); + TEST_COMPARE (stdc_trailing_zeros (128UL), 7); + TEST_COMPARE (stdc_trailing_zeros (512ULL), 9); + TEST_COMPARE (stdc_trailing_ones ((uc) 0), 0); + TEST_COMPARE (expr_has_type (stdc_trailing_ones ((uc) 0), ui), 1); + TEST_COMPARE (stdc_trailing_ones ((us) 0), 0); + TEST_COMPARE (expr_has_type (stdc_trailing_ones ((us) 0), ui), 1); + TEST_COMPARE (stdc_trailing_ones (0U), 0); + TEST_COMPARE (expr_has_type (stdc_trailing_ones (0U), ui), 1); + TEST_COMPARE (stdc_trailing_ones (0UL), 0); + TEST_COMPARE (expr_has_type (stdc_trailing_ones (0UL), ui), 1); + TEST_COMPARE (stdc_trailing_ones (0ULL), 0); + TEST_COMPARE (expr_has_type (stdc_trailing_ones (0ULL), ui), 1); + TEST_COMPARE (stdc_trailing_ones ((uc) ~0U), CHAR_BIT); + TEST_COMPARE (stdc_trailing_ones ((us) ~0U), sizeof (short) * CHAR_BIT); + TEST_COMPARE (stdc_trailing_ones (~0U), sizeof (int) * CHAR_BIT); + TEST_COMPARE (stdc_trailing_ones (~0UL), sizeof (long int) * CHAR_BIT); + TEST_COMPARE (stdc_trailing_ones (~0ULL), sizeof (long long int) * CHAR_BIT); + TEST_COMPARE (stdc_trailing_ones ((uc) 5), 1); + TEST_COMPARE (stdc_trailing_ones ((us) 15), 4); + TEST_COMPARE (stdc_trailing_ones (127U), 7); + TEST_COMPARE (stdc_trailing_ones (511UL), 9); + TEST_COMPARE (stdc_trailing_ones (~0ULL >> 2), + sizeof (long long int) * CHAR_BIT - 2); + TEST_COMPARE (stdc_first_leading_zero ((uc) 0), 1); + TEST_COMPARE (expr_has_type (stdc_first_leading_zero ((uc) 0), ui), 1); + TEST_COMPARE (stdc_first_leading_zero ((us) 0), 1); + TEST_COMPARE (expr_has_type (stdc_first_leading_zero ((us) 0), ui), 1); + TEST_COMPARE (stdc_first_leading_zero (0U), 1); + TEST_COMPARE (expr_has_type (stdc_first_leading_zero (0U), ui), 1); + TEST_COMPARE (stdc_first_leading_zero (0UL), 1); + TEST_COMPARE (expr_has_type (stdc_first_leading_zero (0UL), ui), 1); + TEST_COMPARE (stdc_first_leading_zero (0ULL), 1); + TEST_COMPARE (expr_has_type (stdc_first_leading_zero (0ULL), ui), 1); + TEST_COMPARE (stdc_first_leading_zero ((uc) ~0U), 0); + TEST_COMPARE (stdc_first_leading_zero ((us) ~0U), 0); + TEST_COMPARE (stdc_first_leading_zero (~0U), 0); + TEST_COMPARE (stdc_first_leading_zero (~0UL), 0); + TEST_COMPARE (stdc_first_leading_zero (~0ULL), 0); + TEST_COMPARE (stdc_first_leading_zero ((uc) ~3U), CHAR_BIT - 1); + TEST_COMPARE (stdc_first_leading_zero ((us) ~15U), + sizeof (short) * CHAR_BIT - 3); + TEST_COMPARE (stdc_first_leading_zero (~63U), sizeof (int) * CHAR_BIT - 5); + TEST_COMPARE (stdc_first_leading_zero (~255UL), + sizeof (long int) * CHAR_BIT - 7); + TEST_COMPARE (stdc_first_leading_zero (~1023ULL), + sizeof (long long int) * CHAR_BIT - 9); + TEST_COMPARE (stdc_first_leading_one ((uc) 0), 0); + TEST_COMPARE (expr_has_type (stdc_first_leading_one ((uc) 0), ui), 1); + TEST_COMPARE (stdc_first_leading_one ((us) 0), 0); + TEST_COMPARE (expr_has_type (stdc_first_leading_one ((us) 0), ui), 1); + TEST_COMPARE (stdc_first_leading_one (0U), 0); + TEST_COMPARE (expr_has_type (stdc_first_leading_one (0U), ui), 1); + TEST_COMPARE (stdc_first_leading_one (0UL), 0); + TEST_COMPARE (expr_has_type (stdc_first_leading_one (0UL), ui), 1); + TEST_COMPARE (stdc_first_leading_one (0ULL), 0); + TEST_COMPARE (expr_has_type (stdc_first_leading_one (0ULL), ui), 1); + TEST_COMPARE (stdc_first_leading_one ((uc) ~0U), 1); + TEST_COMPARE (stdc_first_leading_one ((us) ~0U), 1); + TEST_COMPARE (stdc_first_leading_one (~0U), 1); + TEST_COMPARE (stdc_first_leading_one (~0UL), 1); + TEST_COMPARE (stdc_first_leading_one (~0ULL), 1); + TEST_COMPARE (stdc_first_leading_one ((uc) 3), CHAR_BIT - 1); + TEST_COMPARE (stdc_first_leading_one ((us) 9), + sizeof (short) * CHAR_BIT - 3); + TEST_COMPARE (stdc_first_leading_one (34U), sizeof (int) * CHAR_BIT - 5); + TEST_COMPARE (stdc_first_leading_one (130UL), + sizeof (long int) * CHAR_BIT - 7); + TEST_COMPARE (stdc_first_leading_one (512ULL), + sizeof (long long int) * CHAR_BIT - 9); + TEST_COMPARE (stdc_first_trailing_zero ((uc) 0), 1); + TEST_COMPARE (expr_has_type (stdc_first_trailing_zero ((uc) 0), ui), 1); + TEST_COMPARE (stdc_first_trailing_zero ((us) 0), 1); + TEST_COMPARE (expr_has_type (stdc_first_trailing_zero ((us) 0), ui), 1); + TEST_COMPARE (stdc_first_trailing_zero (0U), 1); + TEST_COMPARE (expr_has_type (stdc_first_trailing_zero (0U), ui), 1); + TEST_COMPARE (stdc_first_trailing_zero (0UL), 1); + TEST_COMPARE (expr_has_type (stdc_first_trailing_zero (0UL), ui), 1); + TEST_COMPARE (stdc_first_trailing_zero (0ULL), 1); + TEST_COMPARE (expr_has_type (stdc_first_trailing_zero (0ULL), ui), 1); + TEST_COMPARE (stdc_first_trailing_zero ((uc) ~0U), 0); + TEST_COMPARE (stdc_first_trailing_zero ((us) ~0U), 0); + TEST_COMPARE (stdc_first_trailing_zero (~0U), 0); + TEST_COMPARE (stdc_first_trailing_zero (~0UL), 0); + TEST_COMPARE (stdc_first_trailing_zero (~0ULL), 0); + TEST_COMPARE (stdc_first_trailing_zero ((uc) 2), 1); + TEST_COMPARE (stdc_first_trailing_zero ((us) 15), 5); + TEST_COMPARE (stdc_first_trailing_zero (63U), 7); + TEST_COMPARE (stdc_first_trailing_zero (128UL), 1); + TEST_COMPARE (stdc_first_trailing_zero (511ULL), 10); + TEST_COMPARE (stdc_first_trailing_one ((uc) 0), 0); + TEST_COMPARE (expr_has_type (stdc_first_trailing_one ((uc) 0), ui), 1); + TEST_COMPARE (stdc_first_trailing_one ((us) 0), 0); + TEST_COMPARE (expr_has_type (stdc_first_trailing_one ((us) 0), ui), 1); + TEST_COMPARE (stdc_first_trailing_one (0U), 0); + TEST_COMPARE (expr_has_type (stdc_first_trailing_one (0U), ui), 1); + TEST_COMPARE (stdc_first_trailing_one (0UL), 0); + TEST_COMPARE (expr_has_type (stdc_first_trailing_one (0UL), ui), 1); + TEST_COMPARE (stdc_first_trailing_one (0ULL), 0); + TEST_COMPARE (expr_has_type (stdc_first_trailing_one (0ULL), ui), 1); + TEST_COMPARE (stdc_first_trailing_one ((uc) ~0U), 1); + TEST_COMPARE (stdc_first_trailing_one ((us) ~0U), 1); + TEST_COMPARE (stdc_first_trailing_one (~0U), 1); + TEST_COMPARE (stdc_first_trailing_one (~0UL), 1); + TEST_COMPARE (stdc_first_trailing_one (~0ULL), 1); + TEST_COMPARE (stdc_first_trailing_one ((uc) 4), 3); + TEST_COMPARE (stdc_first_trailing_one ((us) 96), 6); + TEST_COMPARE (stdc_first_trailing_one (127U), 1); + TEST_COMPARE (stdc_first_trailing_one (511UL), 1); + TEST_COMPARE (stdc_first_trailing_one (~0ULL << 12), 13); + TEST_COMPARE (stdc_count_zeros ((uc) 0), CHAR_BIT); + TEST_COMPARE (expr_has_type (stdc_count_zeros ((uc) 0), ui), 1); + TEST_COMPARE (stdc_count_zeros ((us) 0), sizeof (short) * CHAR_BIT); + TEST_COMPARE (expr_has_type (stdc_count_zeros ((us) 0), ui), 1); + TEST_COMPARE (stdc_count_zeros (0U), sizeof (int) * CHAR_BIT); + TEST_COMPARE (expr_has_type (stdc_count_zeros (0U), ui), 1); + TEST_COMPARE (stdc_count_zeros (0UL), sizeof (long int) * CHAR_BIT); + TEST_COMPARE (expr_has_type (stdc_count_zeros (0UL), ui), 1); + TEST_COMPARE (stdc_count_zeros (0ULL), sizeof (long long int) * CHAR_BIT); + TEST_COMPARE (expr_has_type (stdc_count_zeros (0ULL), ui), 1); + TEST_COMPARE (stdc_count_zeros ((uc) ~0U), 0); + TEST_COMPARE (stdc_count_zeros ((us) ~0U), 0); + TEST_COMPARE (stdc_count_zeros (~0U), 0); + TEST_COMPARE (stdc_count_zeros (~0UL), 0); + TEST_COMPARE (stdc_count_zeros (~0ULL), 0); + TEST_COMPARE (stdc_count_zeros ((uc) 1U), CHAR_BIT - 1); + TEST_COMPARE (stdc_count_zeros ((us) 42), sizeof (short) * CHAR_BIT - 3); + TEST_COMPARE (stdc_count_zeros (291U), sizeof (int) * CHAR_BIT - 4); + TEST_COMPARE (stdc_count_zeros (~1315UL), 5); + TEST_COMPARE (stdc_count_zeros (3363ULL), + sizeof (long long int) * CHAR_BIT - 6); + TEST_COMPARE (stdc_count_ones ((uc) 0), 0); + TEST_COMPARE (expr_has_type (stdc_count_ones ((uc) 0), ui), 1); + TEST_COMPARE (stdc_count_ones ((us) 0), 0); + TEST_COMPARE (expr_has_type (stdc_count_ones ((us) 0), ui), 1); + TEST_COMPARE (stdc_count_ones (0U), 0); + TEST_COMPARE (expr_has_type (stdc_count_ones (0U), ui), 1); + TEST_COMPARE (stdc_count_ones (0UL), 0); + TEST_COMPARE (expr_has_type (stdc_count_ones (0UL), ui), 1); + TEST_COMPARE (stdc_count_ones (0ULL), 0); + TEST_COMPARE (expr_has_type (stdc_count_ones (0ULL), ui), 1); + TEST_COMPARE (stdc_count_ones ((uc) ~0U), CHAR_BIT); + TEST_COMPARE (stdc_count_ones ((us) ~0U), sizeof (short) * CHAR_BIT); + TEST_COMPARE (stdc_count_ones (~0U), sizeof (int) * CHAR_BIT); + TEST_COMPARE (stdc_count_ones (~0UL), sizeof (long int) * CHAR_BIT); + TEST_COMPARE (stdc_count_ones (~0ULL), sizeof (long long int) * CHAR_BIT); + TEST_COMPARE (stdc_count_ones ((uc) ~1U), CHAR_BIT - 1); + TEST_COMPARE (stdc_count_ones ((us) ~42), sizeof (short) * CHAR_BIT - 3); + TEST_COMPARE (stdc_count_ones (~291U), sizeof (int) * CHAR_BIT - 4); + TEST_COMPARE (stdc_count_ones (1315UL), 5); + TEST_COMPARE (stdc_count_ones (~3363ULL), + sizeof (long long int) * CHAR_BIT - 6); + TEST_COMPARE (stdc_has_single_bit ((uc) 0), 0); + TEST_COMPARE (expr_has_type (stdc_has_single_bit ((uc) 0), _Bool), 1); + TEST_COMPARE (stdc_has_single_bit ((us) 0), 0); + TEST_COMPARE (expr_has_type (stdc_has_single_bit ((us) 0), _Bool), 1); + TEST_COMPARE (stdc_has_single_bit (0U), 0); + TEST_COMPARE (expr_has_type (stdc_has_single_bit (0U), _Bool), 1); + TEST_COMPARE (stdc_has_single_bit (0UL), 0); + TEST_COMPARE (expr_has_type (stdc_has_single_bit (0UL), _Bool), 1); + TEST_COMPARE (stdc_has_single_bit (0ULL), 0); + TEST_COMPARE (expr_has_type (stdc_has_single_bit (0ULL), _Bool), 1); + TEST_COMPARE (stdc_has_single_bit ((uc) 2), 1); + TEST_COMPARE (stdc_has_single_bit ((us) 8), 1); + TEST_COMPARE (stdc_has_single_bit (32U), 1); + TEST_COMPARE (stdc_has_single_bit (128UL), 1); + TEST_COMPARE (stdc_has_single_bit (512ULL), 1); + TEST_COMPARE (stdc_has_single_bit ((uc) 7), 0); + TEST_COMPARE (stdc_has_single_bit ((us) 96), 0); + TEST_COMPARE (stdc_has_single_bit (513U), 0); + TEST_COMPARE (stdc_has_single_bit (1022UL), 0); + TEST_COMPARE (stdc_has_single_bit (12ULL), 0); + TEST_COMPARE (stdc_bit_width ((uc) 0), 0); + TEST_COMPARE (expr_has_type (stdc_bit_width ((uc) 0), ui), 1); + TEST_COMPARE (stdc_bit_width ((us) 0), 0); + TEST_COMPARE (expr_has_type (stdc_bit_width ((us) 0), ui), 1); + TEST_COMPARE (stdc_bit_width (0U), 0); + TEST_COMPARE (expr_has_type (stdc_bit_width (0U), ui), 1); + TEST_COMPARE (stdc_bit_width (0UL), 0); + TEST_COMPARE (expr_has_type (stdc_bit_width (0UL), ui), 1); + TEST_COMPARE (stdc_bit_width (0ULL), 0); + TEST_COMPARE (expr_has_type (stdc_bit_width (0ULL), ui), 1); + TEST_COMPARE (stdc_bit_width ((uc) ~0U), CHAR_BIT); + TEST_COMPARE (stdc_bit_width ((us) ~0U), sizeof (short) * CHAR_BIT); + TEST_COMPARE (stdc_bit_width (~0U), sizeof (int) * CHAR_BIT); + TEST_COMPARE (stdc_bit_width (~0UL), sizeof (long int) * CHAR_BIT); + TEST_COMPARE (stdc_bit_width (~0ULL), sizeof (long long int) * CHAR_BIT); + TEST_COMPARE (stdc_bit_width ((uc) ((uc) ~0U >> 1)), CHAR_BIT - 1); + TEST_COMPARE (stdc_bit_width ((uc) 6), 3); + TEST_COMPARE (stdc_bit_width ((us) 12U), 4); + TEST_COMPARE (stdc_bit_width ((us) ((us) ~0U >> 5)), + sizeof (short) * CHAR_BIT - 5); + TEST_COMPARE (stdc_bit_width (137U), 8); + TEST_COMPARE (stdc_bit_width (269U), 9); + TEST_COMPARE (stdc_bit_width (39UL), 6); + TEST_COMPARE (stdc_bit_width (~0UL >> 2), sizeof (long int) * CHAR_BIT - 2); + TEST_COMPARE (stdc_bit_width (1023ULL), 10); + TEST_COMPARE (stdc_bit_width (1024ULL), 11); + TEST_COMPARE (stdc_bit_floor ((uc) 0), 0); + TEST_COMPARE (expr_has_type (stdc_bit_floor ((uc) 0), uc), 1); + TEST_COMPARE (stdc_bit_floor ((us) 0), 0); + TEST_COMPARE (expr_has_type (stdc_bit_floor ((us) 0), us), 1); + TEST_COMPARE (stdc_bit_floor (0U), 0U); + TEST_COMPARE (expr_has_type (stdc_bit_floor (0U), ui), 1); + TEST_COMPARE (stdc_bit_floor (0UL), 0UL); + TEST_COMPARE (expr_has_type (stdc_bit_floor (0UL), ul), 1); + TEST_COMPARE (stdc_bit_floor (0ULL), 0ULL); + TEST_COMPARE (expr_has_type (stdc_bit_floor (0ULL), ull), 1); + TEST_COMPARE (stdc_bit_floor ((uc) ~0U), (1U << (CHAR_BIT - 1))); + TEST_COMPARE (stdc_bit_floor ((us) ~0U), + (1U << (sizeof (short) * CHAR_BIT - 1))); + TEST_COMPARE (stdc_bit_floor (~0U), (1U << (sizeof (int) * CHAR_BIT - 1))); + TEST_COMPARE (stdc_bit_floor (~0UL), + (1UL << (sizeof (long int) * CHAR_BIT - 1))); + TEST_COMPARE (stdc_bit_floor (~0ULL), + (1ULL << (sizeof (long long int) * CHAR_BIT - 1))); + TEST_COMPARE (stdc_bit_floor ((uc) 4), 4); + TEST_COMPARE (stdc_bit_floor ((uc) 7), 4); + TEST_COMPARE (stdc_bit_floor ((us) 8U), 8); + TEST_COMPARE (stdc_bit_floor ((us) 31U), 16); + TEST_COMPARE (stdc_bit_floor (137U), 128U); + TEST_COMPARE (stdc_bit_floor (269U), 256U); + TEST_COMPARE (stdc_bit_floor (511UL), 256UL); + TEST_COMPARE (stdc_bit_floor (512UL), 512UL); + TEST_COMPARE (stdc_bit_floor (513UL), 512ULL); + TEST_COMPARE (stdc_bit_floor (1024ULL), 1024ULL); + TEST_COMPARE (stdc_bit_ceil ((uc) 0), 1); + TEST_COMPARE (expr_has_type (stdc_bit_ceil ((uc) 0), uc), 1); + TEST_COMPARE (stdc_bit_ceil ((us) 0), 1); + TEST_COMPARE (expr_has_type (stdc_bit_ceil ((us) 0), us), 1); + TEST_COMPARE (stdc_bit_ceil (0U), 1U); + TEST_COMPARE (expr_has_type (stdc_bit_ceil (0U), ui), 1); + TEST_COMPARE (stdc_bit_ceil (0UL), 1UL); + TEST_COMPARE (expr_has_type (stdc_bit_ceil (0UL), ul), 1); + TEST_COMPARE (stdc_bit_ceil (0ULL), 1ULL); + TEST_COMPARE (expr_has_type (stdc_bit_ceil (0ULL), ull), 1); + TEST_COMPARE (stdc_bit_ceil ((uc) ~0U), 0); + TEST_COMPARE (stdc_bit_ceil ((us) ~0U), 0); + TEST_COMPARE (stdc_bit_ceil (~0U), 0U); + TEST_COMPARE (stdc_bit_ceil (~0UL), 0UL); + TEST_COMPARE (stdc_bit_ceil (~0ULL), 0ULL); + TEST_COMPARE (stdc_bit_ceil ((uc) ((uc) ~0U >> 1)), (1U << (CHAR_BIT - 1))); + TEST_COMPARE (stdc_bit_ceil ((uc) ((uc) ~0U >> 1)), (1U << (CHAR_BIT - 1))); + TEST_COMPARE (stdc_bit_ceil ((us) ((us) ~0U >> 1)), + (1U << (sizeof (short) * CHAR_BIT - 1))); + TEST_COMPARE (stdc_bit_ceil ((us) ((us) ~0U >> 1)), + (1U << (sizeof (short) * CHAR_BIT - 1))); + TEST_COMPARE (stdc_bit_ceil (~0U >> 1), + (1U << (sizeof (int) * CHAR_BIT - 1))); + TEST_COMPARE (stdc_bit_ceil (1U << (sizeof (int) * CHAR_BIT - 1)), + (1U << (sizeof (int) * CHAR_BIT - 1))); + TEST_COMPARE (stdc_bit_ceil (~0UL >> 1), + (1UL << (sizeof (long int) * CHAR_BIT - 1))); + TEST_COMPARE (stdc_bit_ceil (~0UL >> 1), + (1UL << (sizeof (long int) * CHAR_BIT - 1))); + TEST_COMPARE (stdc_bit_ceil (1ULL + << (sizeof (long long int) * CHAR_BIT - 1)), + (1ULL << (sizeof (long long int) * CHAR_BIT - 1))); + TEST_COMPARE (stdc_bit_ceil (~0ULL >> 1), + (1ULL << (sizeof (long long int) * CHAR_BIT - 1))); + TEST_COMPARE (stdc_bit_ceil ((uc) 1), 1); + TEST_COMPARE (stdc_bit_ceil ((uc) 2), 2); + TEST_COMPARE (stdc_bit_ceil ((us) 3U), 4); + TEST_COMPARE (stdc_bit_ceil ((us) 4U), 4); + TEST_COMPARE (stdc_bit_ceil (5U), 8U); + TEST_COMPARE (stdc_bit_ceil (269U), 512U); + TEST_COMPARE (stdc_bit_ceil (511UL), 512UL); + TEST_COMPARE (stdc_bit_ceil (512UL), 512UL); + TEST_COMPARE (stdc_bit_ceil (513ULL), 1024ULL); + TEST_COMPARE (stdc_bit_ceil (1025ULL), 2048ULL); +# ifdef __SIZEOF_INT128__ + TEST_COMPARE (stdc_leading_zeros ((unsigned __int128) 0), + sizeof (__int128) * CHAR_BIT); + TEST_COMPARE (expr_has_type (stdc_leading_zeros ((unsigned __int128) 0), ui), + 1); + TEST_COMPARE (stdc_leading_zeros (~(unsigned __int128) 0), 0); + TEST_COMPARE (stdc_leading_ones ((unsigned __int128) 0), 0); + TEST_COMPARE (expr_has_type (stdc_leading_ones ((unsigned __int128) 0), ui), + 1); + TEST_COMPARE (stdc_leading_ones (~(unsigned __int128) 0), + sizeof (__int128) * CHAR_BIT); + TEST_COMPARE (stdc_trailing_zeros ((unsigned __int128) 0), + sizeof (__int128) * CHAR_BIT); + TEST_COMPARE (expr_has_type (stdc_trailing_zeros ((unsigned __int128) 0), + ui), 1); + TEST_COMPARE (stdc_trailing_zeros (~(unsigned __int128) 0), 0); + TEST_COMPARE (stdc_trailing_ones ((unsigned __int128) 0), 0); + TEST_COMPARE (expr_has_type (stdc_trailing_ones ((unsigned __int128) 0), ui), + 1); + TEST_COMPARE (stdc_trailing_ones (~(unsigned __int128) 0), + sizeof (__int128) * CHAR_BIT); + TEST_COMPARE (stdc_first_leading_zero ((unsigned __int128) 0), 1); + TEST_COMPARE (expr_has_type (stdc_first_leading_zero ((unsigned __int128) 0), + ui), 1); + TEST_COMPARE (stdc_first_leading_zero (~(unsigned __int128) 0), 0); + TEST_COMPARE (stdc_first_leading_one ((unsigned __int128) 0), 0); + TEST_COMPARE (expr_has_type (stdc_first_leading_one ((unsigned __int128) 0), + ui), 1); + TEST_COMPARE (stdc_first_leading_one (~(unsigned __int128) 0), 1); + TEST_COMPARE (stdc_first_trailing_zero ((unsigned __int128) 0), 1); + TEST_COMPARE (expr_has_type (stdc_first_trailing_zero ((unsigned __int128) + 0), ui), 1); + TEST_COMPARE (stdc_first_trailing_zero (~(unsigned __int128) 0), 0); + TEST_COMPARE (stdc_first_trailing_one ((unsigned __int128) 0), 0); + TEST_COMPARE (expr_has_type (stdc_first_trailing_one ((unsigned __int128) 0), + ui), 1); + TEST_COMPARE (stdc_first_trailing_one (~(unsigned __int128) 0), 1); + TEST_COMPARE (stdc_count_zeros ((unsigned __int128) 0), + sizeof (__int128) * CHAR_BIT); + TEST_COMPARE (expr_has_type (stdc_count_zeros ((unsigned __int128) 0), ui), + 1); + TEST_COMPARE (stdc_count_zeros (~(unsigned __int128) 0), 0); + TEST_COMPARE (stdc_count_ones ((unsigned __int128) 0), 0); + TEST_COMPARE (expr_has_type (stdc_count_ones ((unsigned __int128) 0), ui), + 1); + TEST_COMPARE (stdc_count_ones (~(unsigned __int128) 0), + sizeof (__int128) * CHAR_BIT); + TEST_COMPARE (stdc_has_single_bit ((unsigned __int128) 0), 0); + TEST_COMPARE (expr_has_type (stdc_has_single_bit ((unsigned __int128) 0), + _Bool), 1); + TEST_COMPARE (stdc_has_single_bit (~(unsigned __int128) 0), 0); + TEST_COMPARE (stdc_bit_width ((unsigned __int128) 0), 0); + TEST_COMPARE (expr_has_type (stdc_bit_width ((unsigned __int128) 0), ui), 1); + TEST_COMPARE (stdc_bit_width (~(unsigned __int128) 0), + sizeof (__int128) * CHAR_BIT); + TEST_COMPARE (stdc_bit_floor ((unsigned __int128) 0) != 0, 0); + TEST_COMPARE (expr_has_type (stdc_bit_floor ((unsigned __int128) 0), + unsigned __int128), 1); + TEST_COMPARE (stdc_bit_floor (~(unsigned __int128) 0) + != ((unsigned __int128) 1) << (sizeof (__int128) + * CHAR_BIT - 1), 0); + TEST_COMPARE (stdc_bit_ceil ((unsigned __int128) 0) != 1, 0); + TEST_COMPARE (expr_has_type (stdc_bit_ceil ((unsigned __int128) 0), + unsigned __int128), 1); + TEST_COMPARE (stdc_bit_ceil ((unsigned __int128) 1) != 1, 0); + TEST_COMPARE (stdc_bit_ceil ((~(unsigned __int128) 0) >> 1) + != ((unsigned __int128) 1) << (sizeof (__int128) + * CHAR_BIT - 1), 0); + TEST_COMPARE (stdc_bit_ceil (~(unsigned __int128) 0) != 0, 0); +# endif + uc a = 0; + TEST_COMPARE (stdc_bit_width (a++), 0); + TEST_COMPARE (a, 1); + ull b = 0; + TEST_COMPARE (stdc_bit_width (b++), 0); + TEST_COMPARE (b, 1); + TEST_COMPARE (stdc_bit_floor (a++), 1); + TEST_COMPARE (a, 2); + TEST_COMPARE (stdc_bit_floor (b++), 1); + TEST_COMPARE (b, 2); + TEST_COMPARE (stdc_bit_ceil (a++), 2); + TEST_COMPARE (a, 3); + TEST_COMPARE (stdc_bit_ceil (b++), 2); + TEST_COMPARE (b, 3); + TEST_COMPARE (stdc_leading_zeros (a++), CHAR_BIT - 2); + TEST_COMPARE (a, 4); + TEST_COMPARE (stdc_leading_zeros (b++), + sizeof (long long int) * CHAR_BIT - 2); + TEST_COMPARE (b, 4); + TEST_COMPARE (stdc_leading_ones (a++), 0); + TEST_COMPARE (a, 5); + TEST_COMPARE (stdc_leading_ones (b++), 0); + TEST_COMPARE (b, 5); + TEST_COMPARE (stdc_trailing_zeros (a++), 0); + TEST_COMPARE (a, 6); + TEST_COMPARE (stdc_trailing_zeros (b++), 0); + TEST_COMPARE (b, 6); + TEST_COMPARE (stdc_trailing_ones (a++), 0); + TEST_COMPARE (a, 7); + TEST_COMPARE (stdc_trailing_ones (b++), 0); + TEST_COMPARE (b, 7); + TEST_COMPARE (stdc_first_leading_zero (a++), 1); + TEST_COMPARE (a, 8); + TEST_COMPARE (stdc_first_leading_zero (b++), 1); + TEST_COMPARE (b, 8); + TEST_COMPARE (stdc_first_leading_one (a++), CHAR_BIT - 3); + TEST_COMPARE (a, 9); + TEST_COMPARE (stdc_first_leading_one (b++), + sizeof (long long int) * CHAR_BIT - 3); + TEST_COMPARE (b, 9); + TEST_COMPARE (stdc_first_trailing_zero (a++), 2); + TEST_COMPARE (a, 10); + TEST_COMPARE (stdc_first_trailing_zero (b++), 2); + TEST_COMPARE (b, 10); + TEST_COMPARE (stdc_first_trailing_one (a++), 2); + TEST_COMPARE (a, 11); + TEST_COMPARE (stdc_first_trailing_one (b++), 2); + TEST_COMPARE (b, 11); + TEST_COMPARE (stdc_count_zeros (a++), CHAR_BIT - 3); + TEST_COMPARE (a, 12); + TEST_COMPARE (stdc_count_zeros (b++), + sizeof (long long int) * CHAR_BIT - 3); + TEST_COMPARE (b, 12); + TEST_COMPARE (stdc_count_ones (a++), 2); + TEST_COMPARE (a, 13); + TEST_COMPARE (stdc_count_ones (b++), 2); + TEST_COMPARE (b, 13); + TEST_COMPARE (stdc_has_single_bit (a++), 0); + TEST_COMPARE (a, 14); + TEST_COMPARE (stdc_has_single_bit (b++), 0); + TEST_COMPARE (b, 14); +# ifdef BITINT_MAXWIDTH +# if BITINT_MAXWIDTH >= 64 + TEST_COMPARE (stdc_leading_zeros (0uwb), 1); + TEST_COMPARE (expr_has_type (stdc_leading_zeros (0uwb), ui), 1); + TEST_COMPARE (stdc_leading_zeros (1uwb), 0); + TEST_COMPARE (expr_has_type (stdc_leading_zeros (1uwb), ui), 1); + TEST_COMPARE (stdc_leading_ones (0uwb), 0); + TEST_COMPARE (expr_has_type (stdc_leading_ones (0uwb), ui), 1); + TEST_COMPARE (stdc_leading_ones (1uwb), 1); + TEST_COMPARE (expr_has_type (stdc_leading_ones (1uwb), ui), 1); + TEST_COMPARE (stdc_trailing_zeros (0uwb), 1); + TEST_COMPARE (expr_has_type (stdc_trailing_zeros (0uwb), ui), 1); + TEST_COMPARE (stdc_trailing_zeros (1uwb), 0); + TEST_COMPARE (expr_has_type (stdc_trailing_zeros (1uwb), ui), 1); + TEST_COMPARE (stdc_trailing_ones (0uwb), 0); + TEST_COMPARE (expr_has_type (stdc_trailing_ones (0uwb), ui), 1); + TEST_COMPARE (stdc_trailing_ones (1uwb), 1); + TEST_COMPARE (expr_has_type (stdc_trailing_ones (1uwb), ui), 1); + TEST_COMPARE (stdc_first_leading_zero (0uwb), 1); + TEST_COMPARE (expr_has_type (stdc_first_leading_zero (0uwb), ui), 1); + TEST_COMPARE (stdc_first_leading_zero (1uwb), 0); + TEST_COMPARE (expr_has_type (stdc_first_leading_zero (1uwb), ui), 1); + TEST_COMPARE (stdc_first_leading_one (0uwb), 0); + TEST_COMPARE (expr_has_type (stdc_first_leading_one (0uwb), ui), 1); + TEST_COMPARE (stdc_first_leading_one (1uwb), 1); + TEST_COMPARE (expr_has_type (stdc_first_leading_one (1uwb), ui), 1); + TEST_COMPARE (stdc_first_trailing_zero (0uwb), 1); + TEST_COMPARE (expr_has_type (stdc_first_trailing_zero (0uwb), ui), 1); + TEST_COMPARE (stdc_first_trailing_zero (1uwb), 0); + TEST_COMPARE (expr_has_type (stdc_first_trailing_zero (1uwb), ui), 1); + TEST_COMPARE (stdc_first_trailing_one (0uwb), 0); + TEST_COMPARE (expr_has_type (stdc_first_trailing_one (0uwb), ui), 1); + TEST_COMPARE (stdc_first_trailing_one (1uwb), 1); + TEST_COMPARE (expr_has_type (stdc_first_trailing_one (1uwb), ui), 1); + TEST_COMPARE (stdc_count_zeros (0uwb), 1); + TEST_COMPARE (expr_has_type (stdc_count_zeros (0uwb), ui), 1); + TEST_COMPARE (stdc_count_zeros (1uwb), 0); + TEST_COMPARE (expr_has_type (stdc_count_zeros (1uwb), ui), 1); + TEST_COMPARE (stdc_count_ones (0uwb), 0); + TEST_COMPARE (expr_has_type (stdc_count_ones (0uwb), ui), 1); + TEST_COMPARE (stdc_count_ones (1uwb), 1); + TEST_COMPARE (expr_has_type (stdc_count_ones (1uwb), ui), 1); + TEST_COMPARE (stdc_has_single_bit (0uwb), 0); + TEST_COMPARE (expr_has_type (stdc_has_single_bit (0uwb), _Bool), 1); + TEST_COMPARE (stdc_has_single_bit (1uwb), 1); + TEST_COMPARE (expr_has_type (stdc_has_single_bit (1uwb), _Bool), 1); + TEST_COMPARE (stdc_bit_width (0uwb), 0); + TEST_COMPARE (expr_has_type (stdc_bit_width (0uwb), ui), 1); + TEST_COMPARE (stdc_bit_width (1uwb), 1); + TEST_COMPARE (expr_has_type (stdc_bit_width (1uwb), ui), 1); + TEST_COMPARE (stdc_bit_floor (0uwb), 0); + TEST_COMPARE (expr_has_type (stdc_bit_floor (0uwb), unsigned _BitInt(1)), 1); + TEST_COMPARE (stdc_bit_floor (1uwb), 1); + TEST_COMPARE (expr_has_type (stdc_bit_floor (1uwb), unsigned _BitInt(1)), 1); + TEST_COMPARE (stdc_bit_ceil (0uwb), 1); + TEST_COMPARE (expr_has_type (stdc_bit_ceil (0uwb), unsigned _BitInt(1)), 1); + TEST_COMPARE (stdc_bit_ceil (1uwb), 1); + TEST_COMPARE (expr_has_type (stdc_bit_ceil (1uwb), unsigned _BitInt(1)), 1); + unsigned _BitInt(1) c = 0; + TEST_COMPARE (stdc_bit_floor (c++), 0); + TEST_COMPARE (c, 1); + TEST_COMPARE (stdc_bit_floor (c++), 1); + TEST_COMPARE (c, 0); + TEST_COMPARE (stdc_bit_ceil (c++), 1); + TEST_COMPARE (c, 1); + TEST_COMPARE (stdc_bit_ceil (c++), 1); + TEST_COMPARE (c, 0); +# endif +# if BITINT_MAXWIDTH >= 512 + TEST_COMPARE (stdc_leading_zeros ((unsigned _BitInt(512)) 0), 512); + TEST_COMPARE (expr_has_type (stdc_leading_zeros ((unsigned _BitInt(512)) 0), + ui), 1); + TEST_COMPARE (stdc_leading_zeros ((unsigned _BitInt(373)) 0), 373); + TEST_COMPARE (expr_has_type (stdc_leading_zeros ((unsigned _BitInt(373)) 0), + ui), 1); + TEST_COMPARE (stdc_leading_zeros (~(unsigned _BitInt(512)) 0), 0); + TEST_COMPARE (stdc_leading_zeros (~(unsigned _BitInt(373)) 0), 0); + TEST_COMPARE (stdc_leading_zeros ((unsigned _BitInt(512)) 275), 512 - 9); + TEST_COMPARE (stdc_leading_zeros ((unsigned _BitInt(373)) 512), 373 - 10); + TEST_COMPARE (stdc_leading_ones ((unsigned _BitInt(512)) 0), 0); + TEST_COMPARE (expr_has_type (stdc_leading_ones ((unsigned _BitInt(512)) 0), + ui), 1); + TEST_COMPARE (stdc_leading_ones ((unsigned _BitInt(373)) 0), 0); + TEST_COMPARE (expr_has_type (stdc_leading_ones ((unsigned _BitInt(373)) 0), + ui), 1); + TEST_COMPARE (stdc_leading_ones (~(unsigned _BitInt(512)) 0), 512); + TEST_COMPARE (stdc_leading_ones (~(unsigned _BitInt(373)) 0), 373); + TEST_COMPARE (stdc_leading_ones (~(unsigned _BitInt(512)) 275), 512 - 9); + TEST_COMPARE (stdc_leading_ones (~(unsigned _BitInt(373)) 512), 373 - 10); + TEST_COMPARE (stdc_trailing_zeros ((unsigned _BitInt(512)) 0), 512); + TEST_COMPARE (expr_has_type (stdc_trailing_zeros ((unsigned _BitInt(512)) 0), + ui), 1); + TEST_COMPARE (stdc_trailing_zeros ((unsigned _BitInt(373)) 0), 373); + TEST_COMPARE (expr_has_type (stdc_trailing_zeros ((unsigned _BitInt(373)) 0), + ui), 1); + TEST_COMPARE (stdc_trailing_zeros (~(unsigned _BitInt(512)) 0), 0); + TEST_COMPARE (stdc_trailing_zeros (~(unsigned _BitInt(373)) 0), 0); + TEST_COMPARE (stdc_trailing_zeros ((unsigned _BitInt(512)) 256), 8); + TEST_COMPARE (stdc_trailing_zeros ((unsigned _BitInt(373)) 512), 9); + TEST_COMPARE (stdc_trailing_ones ((unsigned _BitInt(512)) 0), 0); + TEST_COMPARE (expr_has_type (stdc_trailing_ones ((unsigned _BitInt(512)) 0), + ui), 1); + TEST_COMPARE (stdc_trailing_ones ((unsigned _BitInt(373)) 0), 0); + TEST_COMPARE (expr_has_type (stdc_trailing_ones ((unsigned _BitInt(373)) 0), + ui), 1); + TEST_COMPARE (stdc_trailing_ones (~(unsigned _BitInt(512)) 0), 512); + TEST_COMPARE (stdc_trailing_ones (~(unsigned _BitInt(373)) 0), 373); + TEST_COMPARE (stdc_trailing_ones ((unsigned _BitInt(512)) 255), 8); + TEST_COMPARE (stdc_trailing_ones ((~(unsigned _BitInt(373)) 0) >> 2), + 373 - 2); + TEST_COMPARE (stdc_first_leading_zero ((unsigned _BitInt(512)) 0), 1); + TEST_COMPARE (expr_has_type (stdc_first_leading_zero ((unsigned _BitInt(512)) + 0), ui), 1); + TEST_COMPARE (stdc_first_leading_zero ((unsigned _BitInt(373)) 0), 1); + TEST_COMPARE (expr_has_type (stdc_first_leading_zero ((unsigned _BitInt(373)) + 0), ui), 1); + TEST_COMPARE (stdc_first_leading_zero (~(unsigned _BitInt(512)) 0), 0); + TEST_COMPARE (stdc_first_leading_zero (~(unsigned _BitInt(373)) 0), 0); + TEST_COMPARE (stdc_first_leading_zero (~(unsigned _BitInt(512)) 511), + 512 - 8); + TEST_COMPARE (stdc_first_leading_zero (~(unsigned _BitInt(373)) 1023), + 373 - 9); + TEST_COMPARE (stdc_first_leading_one ((unsigned _BitInt(512)) 0), 0); + TEST_COMPARE (expr_has_type (stdc_first_leading_one ((unsigned _BitInt(512)) + 0), ui), 1); + TEST_COMPARE (stdc_first_leading_one ((unsigned _BitInt(373)) 0), 0); + TEST_COMPARE (expr_has_type (stdc_first_leading_one ((unsigned _BitInt(373)) + 0), ui), 1); + TEST_COMPARE (stdc_first_leading_one (~(unsigned _BitInt(512)) 0), 1); + TEST_COMPARE (stdc_first_leading_one (~(unsigned _BitInt(373)) 0), 1); + TEST_COMPARE (stdc_first_leading_one ((unsigned _BitInt(512)) 275), 512 - 8); + TEST_COMPARE (stdc_first_leading_one ((unsigned _BitInt(373)) 512), 373 - 9); + TEST_COMPARE (stdc_first_trailing_zero ((unsigned _BitInt(512)) 0), 1); + TEST_COMPARE (expr_has_type (stdc_first_trailing_zero ((unsigned + _BitInt(512)) 0), + ui), 1); + TEST_COMPARE (stdc_first_trailing_zero ((unsigned _BitInt(373)) 0), 1); + TEST_COMPARE (expr_has_type (stdc_first_trailing_zero ((unsigned + _BitInt(373)) 0), + ui), 1); + TEST_COMPARE (stdc_first_trailing_zero (~(unsigned _BitInt(512)) 0), 0); + TEST_COMPARE (stdc_first_trailing_zero (~(unsigned _BitInt(373)) 0), 0); + TEST_COMPARE (stdc_first_trailing_zero ((unsigned _BitInt(512)) 255), 9); + TEST_COMPARE (stdc_first_trailing_zero ((unsigned _BitInt(373)) 511), 10); + TEST_COMPARE (stdc_first_trailing_one ((unsigned _BitInt(512)) 0), 0); + TEST_COMPARE (expr_has_type (stdc_first_trailing_one ((unsigned _BitInt(512)) + 0), ui), 1); + TEST_COMPARE (stdc_first_trailing_one ((unsigned _BitInt(373)) 0), 0); + TEST_COMPARE (expr_has_type (stdc_first_trailing_one ((unsigned _BitInt(373)) + 0), ui), 1); + TEST_COMPARE (stdc_first_trailing_one (~(unsigned _BitInt(512)) 0), 1); + TEST_COMPARE (stdc_first_trailing_one (~(unsigned _BitInt(373)) 0), 1); + TEST_COMPARE (stdc_first_trailing_one (((unsigned _BitInt(512)) 255) << 175), + 176); + TEST_COMPARE (stdc_first_trailing_one ((~(unsigned _BitInt(373)) 0) << 311), + 312); + TEST_COMPARE (stdc_count_zeros ((unsigned _BitInt(512)) 0), 512); + TEST_COMPARE (expr_has_type (stdc_count_zeros ((unsigned _BitInt(512)) 0), + ui), 1); + TEST_COMPARE (stdc_count_zeros ((unsigned _BitInt(373)) 0), 373); + TEST_COMPARE (expr_has_type (stdc_count_zeros ((unsigned _BitInt(373)) 0), + ui), 1); + TEST_COMPARE (stdc_count_zeros (~(unsigned _BitInt(512)) 0), 0); + TEST_COMPARE (stdc_count_zeros (~(unsigned _BitInt(373)) 0), 0); + TEST_COMPARE (stdc_count_zeros ((unsigned _BitInt(512)) 1315), 512 - 5); + TEST_COMPARE (stdc_count_zeros ((unsigned _BitInt(373)) 3363), 373 - 6); + TEST_COMPARE (stdc_count_ones ((unsigned _BitInt(512)) 0), 0); + TEST_COMPARE (expr_has_type (stdc_count_ones ((unsigned _BitInt(512)) 0), + ui), 1); + TEST_COMPARE (stdc_count_ones ((unsigned _BitInt(373)) 0), 0); + TEST_COMPARE (expr_has_type (stdc_count_ones ((unsigned _BitInt(373)) 0), + ui), 1); + TEST_COMPARE (stdc_count_ones (~(unsigned _BitInt(512)) 0), 512); + TEST_COMPARE (stdc_count_ones (~(unsigned _BitInt(373)) 0), 373); + TEST_COMPARE (stdc_count_ones (~(unsigned _BitInt(512)) 1315), 512 - 5); + TEST_COMPARE (stdc_count_ones (~(unsigned _BitInt(373)) 3363), 373 - 6); + TEST_COMPARE (stdc_has_single_bit ((unsigned _BitInt(512)) 0), 0); + TEST_COMPARE (expr_has_type (stdc_has_single_bit ((unsigned _BitInt(512)) 0), + _Bool), 1); + TEST_COMPARE (stdc_has_single_bit ((unsigned _BitInt(373)) 0), 0); + TEST_COMPARE (expr_has_type (stdc_has_single_bit ((unsigned _BitInt(373)) 0), + _Bool), 1); + TEST_COMPARE (stdc_has_single_bit (~(unsigned _BitInt(512)) 0), 0); + TEST_COMPARE (stdc_has_single_bit (~(unsigned _BitInt(373)) 0), 0); + TEST_COMPARE (stdc_has_single_bit (((unsigned _BitInt(512)) 1022) << 279), + 0); + TEST_COMPARE (stdc_has_single_bit (((unsigned _BitInt(373)) 12) << 305), 0); + TEST_COMPARE (stdc_bit_width ((unsigned _BitInt(512)) 0), 0); + TEST_COMPARE (expr_has_type (stdc_bit_width ((unsigned _BitInt(512)) 0), + ui), 1); + TEST_COMPARE (stdc_bit_width ((unsigned _BitInt(373)) 0), 0); + TEST_COMPARE (expr_has_type (stdc_bit_width ((unsigned _BitInt(373)) 0), + ui), 1); + TEST_COMPARE (stdc_bit_width (~(unsigned _BitInt(512)) 0), 512); + TEST_COMPARE (stdc_bit_width (~(unsigned _BitInt(373)) 0), 373); + TEST_COMPARE (stdc_bit_width (((unsigned _BitInt(512)) 1023) << 405), + 405 + 10); + TEST_COMPARE (stdc_bit_width (((unsigned _BitInt(373)) 1024) << 242), + 242 + 11); + TEST_COMPARE (stdc_bit_floor ((unsigned _BitInt(512)) 0) != 0, 0); + TEST_COMPARE (expr_has_type (stdc_bit_floor ((unsigned _BitInt(512)) 0), + unsigned _BitInt(512)), 1); + TEST_COMPARE (stdc_bit_floor ((unsigned _BitInt(373)) 0) != 0, 0); + TEST_COMPARE (expr_has_type (stdc_bit_floor ((unsigned _BitInt(373)) 0), + unsigned _BitInt(373)), 1); + TEST_COMPARE (stdc_bit_floor (~(unsigned _BitInt(512)) 0) + != ((unsigned _BitInt(512)) 1) << (512 - 1), 0); + TEST_COMPARE (stdc_bit_floor (~(unsigned _BitInt(373)) 0) + != ((unsigned _BitInt(373)) 1) << (373 - 1), 0); + TEST_COMPARE (stdc_bit_floor (((unsigned _BitInt(512)) 511) << 405) + != (((unsigned _BitInt(512)) 256) << 405), 0); + TEST_COMPARE (stdc_bit_floor (((unsigned _BitInt(373)) 512) << 242) + != (((unsigned _BitInt(512)) 512) << 242), 0); + TEST_COMPARE (stdc_bit_ceil ((unsigned _BitInt(512)) 0) != 1, 0); + TEST_COMPARE (expr_has_type (stdc_bit_ceil ((unsigned _BitInt(512)) 0), + unsigned _BitInt(512)), 1); + TEST_COMPARE (stdc_bit_ceil ((unsigned _BitInt(373)) 0) != 1, 0); + TEST_COMPARE (expr_has_type (stdc_bit_ceil ((unsigned _BitInt(373)) 0), + unsigned _BitInt(373)), 1); + TEST_COMPARE (stdc_bit_ceil (~(unsigned _BitInt(512)) 0) != 0, 0); + TEST_COMPARE (stdc_bit_ceil (~(unsigned _BitInt(373)) 0) != 0, 0); + TEST_COMPARE (stdc_bit_ceil (((unsigned _BitInt(512)) 1) << (512 - 1)) + != ((unsigned _BitInt(512)) 1) << (512 - 1), 0); + TEST_COMPARE (stdc_bit_ceil ((~(unsigned _BitInt(373)) 0) >> 1) + != ((unsigned _BitInt(373)) 1) << (373 - 1), 0); + TEST_COMPARE (stdc_bit_ceil (((unsigned _BitInt(512)) 512) << 405) + != (((unsigned _BitInt(512)) 512) << 405), 0); + TEST_COMPARE (stdc_bit_ceil (((unsigned _BitInt(373)) 513) << 242) + != (((unsigned _BitInt(512)) 1024) << 242), 0); + TEST_COMPARE (stdc_bit_floor ((unsigned _BitInt(BITINT_MAXWIDTH)) 0) != 0, + 0); + TEST_COMPARE (stdc_bit_floor (~(unsigned _BitInt(BITINT_MAXWIDTH)) 0) + != ((unsigned _BitInt(BITINT_MAXWIDTH)) 1) << (BITINT_MAXWIDTH + - 1), 0); + TEST_COMPARE (stdc_bit_floor (((unsigned _BitInt(BITINT_MAXWIDTH)) 511) + << 405) + != (((unsigned _BitInt(BITINT_MAXWIDTH)) 256) << 405), 0); + TEST_COMPARE (stdc_bit_floor (((unsigned _BitInt(BITINT_MAXWIDTH)) 512) + << 405) + != (((unsigned _BitInt(BITINT_MAXWIDTH)) 512) << 405), 0); + TEST_COMPARE (stdc_bit_ceil ((unsigned _BitInt(BITINT_MAXWIDTH)) 0) != 1, 0); + TEST_COMPARE (stdc_bit_ceil (~(unsigned _BitInt(BITINT_MAXWIDTH)) 0) != 0, + 0); + TEST_COMPARE (stdc_bit_ceil (((unsigned _BitInt(BITINT_MAXWIDTH)) 1) + << (BITINT_MAXWIDTH - 1)) + != ((unsigned _BitInt(BITINT_MAXWIDTH)) 1) << (BITINT_MAXWIDTH + - 1), 0); + TEST_COMPARE (stdc_bit_ceil (((unsigned _BitInt(BITINT_MAXWIDTH)) 512) + << 405) + != (((unsigned _BitInt(BITINT_MAXWIDTH)) 512) << 405), 0); + TEST_COMPARE (stdc_bit_ceil (((unsigned _BitInt(BITINT_MAXWIDTH)) 513) + << 405) + != (((unsigned _BitInt(BITINT_MAXWIDTH)) 1024) << 405), 0); +# endif +# endif + return 0; +} +#else +static int +do_test (void) +{ + return 0; +} +#endif + +#include <support/test-driver.c> -- 2.36.1
Starting with commit e57d8fc97b90127de4ed3e3a9cdf663667580935 "S390: Always use svc 0" clone clobbers the call-saved register r7 in error case: function or stack is NULL. This patch restores the saved registers also in the error case. Furthermore the existing test misc/tst-clone is extended to check all error cases and that clone does not clobber registers in this error case. (cherry picked from commit 02782fd12849b6673cb5c2728cb750e8ec295aa3) --- sysdeps/unix/sysv/linux/s390/s390-32/clone.S | 1 + sysdeps/unix/sysv/linux/s390/s390-64/clone.S | 1 + sysdeps/unix/sysv/linux/tst-clone.c | 73 ++++++++++++++++---- 3 files changed, 63 insertions(+), 12 deletions(-) diff --git a/sysdeps/unix/sysv/linux/s390/s390-32/clone.S b/sysdeps/unix/sysv/linux/s390/s390-32/clone.S index 4c882ef2ee..a7a863242c 100644 --- a/sysdeps/unix/sysv/linux/s390/s390-32/clone.S +++ b/sysdeps/unix/sysv/linux/s390/s390-32/clone.S @@ -53,6 +53,7 @@ ENTRY(__clone) br %r14 error: lhi %r2,-EINVAL + lm %r6,%r7,24(%r15) /* Load registers. */ j SYSCALL_ERROR_LABEL PSEUDO_END (__clone) diff --git a/sysdeps/unix/sysv/linux/s390/s390-64/clone.S b/sysdeps/unix/sysv/linux/s390/s390-64/clone.S index 4eb104be71..c552a6b8de 100644 --- a/sysdeps/unix/sysv/linux/s390/s390-64/clone.S +++ b/sysdeps/unix/sysv/linux/s390/s390-64/clone.S @@ -54,6 +54,7 @@ ENTRY(__clone) br %r14 error: lghi %r2,-EINVAL + lmg %r6,%r7,48(%r15) /* Restore registers. */ jg SYSCALL_ERROR_LABEL PSEUDO_END (__clone) diff --git a/sysdeps/unix/sysv/linux/tst-clone.c b/sysdeps/unix/sysv/linux/tst-clone.c index 470676ab2b..2bc7124983 100644 --- a/sysdeps/unix/sysv/linux/tst-clone.c +++ b/sysdeps/unix/sysv/linux/tst-clone.c @@ -16,12 +16,16 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ -/* BZ #2386 */ +/* BZ #2386, BZ #31402 */ #include <errno.h> #include <stdio.h> #include <stdlib.h> #include <unistd.h> #include <sched.h> +#include <stackinfo.h> /* For _STACK_GROWS_{UP,DOWN}. */ +#include <support/check.h> + +volatile unsigned v = 0xdeadbeef; int child_fn(void *arg) { @@ -30,22 +34,67 @@ int child_fn(void *arg) } static int -do_test (void) +__attribute__((noinline)) +do_clone (int (*fn)(void *), void *stack) { int result; + unsigned int a = v; + unsigned int b = v; + unsigned int c = v; + unsigned int d = v; + unsigned int e = v; + unsigned int f = v; + unsigned int g = v; + unsigned int h = v; + unsigned int i = v; + unsigned int j = v; + unsigned int k = v; + unsigned int l = v; + unsigned int m = v; + unsigned int n = v; + unsigned int o = v; + + result = clone (fn, stack, 0, NULL); + + /* Check that clone does not clobber call-saved registers. */ + TEST_VERIFY (a == v && b == v && c == v && d == v && e == v && f == v + && g == v && h == v && i == v && j == v && k == v && l == v + && m == v && n == v && o == v); + + return result; +} + +static void +__attribute__((noinline)) +do_test_single (int (*fn)(void *), void *stack) +{ + printf ("%s (fn=%p, stack=%p)\n", __FUNCTION__, fn, stack); + errno = 0; + + int result = do_clone (fn, stack); + + TEST_COMPARE (errno, EINVAL); + TEST_COMPARE (result, -1); +} - result = clone (child_fn, NULL, 0, NULL); +static int +do_test (void) +{ + char st[128 * 1024] __attribute__ ((aligned)); + void *stack = NULL; +#if _STACK_GROWS_DOWN + stack = st + sizeof (st); +#elif _STACK_GROWS_UP + stack = st; +#else +# error "Define either _STACK_GROWS_DOWN or _STACK_GROWS_UP" +#endif - if (errno != EINVAL || result != -1) - { - printf ("FAIL: clone()=%d (wanted -1) errno=%d (wanted %d)\n", - result, errno, EINVAL); - return 1; - } + do_test_single (child_fn, NULL); + do_test_single (NULL, stack); + do_test_single (NULL, NULL); - puts ("All OK"); return 0; } -#define TEST_FUNCTION do_test () -#include "../test-skeleton.c" +#include <support/test-driver.c> -- 2.43.2
Starting with commits - 7ea510127e2067efa07865158ac92c330c379950 string: Add libc_hidden_proto for strchrnul - 22999b2f0fb62eed1af4095d062bd1272d6afeb1 string: Add libc_hidden_proto for memrchr building glibc on s390x with --disable-multi-arch fails if only the C-variant of strchrnul / memrchr is used. This is the case if gcc uses -march < z13. The build fails with: ../sysdeps/s390/strchrnul-c.c:28:49: error: ‘__strchrnul_c’ undeclared here (not in a function); did you mean ‘__strchrnul’? 28 | __hidden_ver1 (__strchrnul_c, __GI___strchrnul, __strchrnul_c); With --disable-multi-arch, __strchrnul_c is not available as string/strchrnul.c is just included without defining STRCHRNUL and thus we also don't have to create the internal hidden symbol. Tested-by: Andreas K. Hüttel <dilfridge@gentoo.org> (cherry picked from commit cc1b91eabd806057aa7e3058a84bf129ed36e157) --- sysdeps/s390/memrchr-c.c | 4 +++- sysdeps/s390/strchrnul-c.c | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/sysdeps/s390/memrchr-c.c b/sysdeps/s390/memrchr-c.c index bdf3c7bbe0..fadd63087a 100644 --- a/sysdeps/s390/memrchr-c.c +++ b/sysdeps/s390/memrchr-c.c @@ -25,7 +25,9 @@ # include <string/memrchr.c> -# if defined SHARED && IS_IN (libc) +# if HAVE_MEMRCHR_IFUNC +# if defined SHARED && IS_IN (libc) __hidden_ver1 (__memrchr_c, __GI___memrchr, __memrchr_c); +# endif # endif #endif diff --git a/sysdeps/s390/strchrnul-c.c b/sysdeps/s390/strchrnul-c.c index f6f5bae311..97fbc16edb 100644 --- a/sysdeps/s390/strchrnul-c.c +++ b/sysdeps/s390/strchrnul-c.c @@ -24,7 +24,9 @@ # endif # include <string/strchrnul.c> -# if defined SHARED && IS_IN (libc) +# if HAVE_STRCHRNUL_IFUNC +# if defined SHARED && IS_IN (libc) __hidden_ver1 (__strchrnul_c, __GI___strchrnul, __strchrnul_c); +# endif # endif #endif -- 2.43.0
Subject: New GNU C Library (glibc) security flaw reported on 30 Jan 2024 Good day from Singapore, I recently stumbled upon this insightful article and wanted to share it with you. Article: New Linux glibc flaw lets attackers get root on major distros Link: https://www.bleepingcomputer.com/news/security/new-linux-glibc-flaw-lets-attackers-get-root-on-major-distros/ Thank you. Regards, Mr. Turritopsis Dohrnii Teo En Ming Targeted Individual in Singapore Blogs: https://tdtemcerts.blogspot.com https://tdtemcerts.wordpress.com GIMP also stands for Government-Induced Medical Problems.
On Wed, Dec 20, 2023 at 7:42 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> _dl_tlsdesc_undefweak and _dl_tlsdesc_dynamic access the thread pointer
> via the tcb field in TCB:
>
> _dl_tlsdesc_undefweak:
> _CET_ENDBR
> movq 8(%rax), %rax
> subq %fs:0, %rax
> ret
>
> _dl_tlsdesc_dynamic:
> ...
> subq %fs:0, %rax
> movq -8(%rsp), %rdi
> ret
>
> Since the tcb field in TCB is a pointer, %fs:0 is a 32-bit location,
> not 64-bit. It should use "sub %fs:0, %RAX_LP" instead. Since
> _dl_tlsdesc_undefweak returns ptrdiff_t and _dl_make_tlsdesc_dynamic
> returns void *, RAX_LP is appropriate here for x32 and x86-64. This
> fixes BZ #31185.
> ---
> sysdeps/x86_64/dl-tlsdesc.S | 4 ++--
> 1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/sysdeps/x86_64/dl-tlsdesc.S b/sysdeps/x86_64/dl-tlsdesc.S
> index c4823547d7..4579424bf7 100644
> --- a/sysdeps/x86_64/dl-tlsdesc.S
> +++ b/sysdeps/x86_64/dl-tlsdesc.S
> @@ -61,7 +61,7 @@ _dl_tlsdesc_return:
> _dl_tlsdesc_undefweak:
> _CET_ENDBR
> movq 8(%rax), %rax
> - subq %fs:0, %rax
> + sub %fs:0, %RAX_LP
> ret
> cfi_endproc
> .size _dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak
> @@ -116,7 +116,7 @@ _dl_tlsdesc_dynamic:
> addq TLSDESC_MODOFF(%rdi), %rax
> .Lret:
> movq -16(%rsp), %rsi
> - subq %fs:0, %rax
> + sub %fs:0, %RAX_LP
> movq -8(%rsp), %rdi
> ret
> .Lslow:
> --
> 2.43.0
>
I will check it in tomorrow and back it to release branches later.
Thanks.
--
H.J.
On Wed, Dec 20, 2023 at 4:31 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On x32, I got
>
> FAIL: elf/tst-tlsgap
>
> $ gdb elf/tst-tlsgap
> ...
> open tst-tlsgap-mod1.so
>
> Thread 2 "tst-tlsgap" received signal SIGSEGV, Segmentation fault.
> [Switching to LWP 2268754]
> _dl_tlsdesc_dynamic () at ../sysdeps/x86_64/dl-tlsdesc.S:108
> 108 movq (%rsi), %rax
> (gdb) p/x $rsi
> $4 = 0xf7dbf9005655fb18
> (gdb)
>
> This is caused by
>
> _dl_tlsdesc_dynamic:
> _CET_ENDBR
> /* Preserve call-clobbered registers that we modify.
> We need two scratch regs anyway. */
> movq %rsi, -16(%rsp)
> movq %fs:DTV_OFFSET, %rsi
>
> Since the dtv field in TCB is a pointer, %fs:DTV_OFFSET is a 32-bit
> location, not 64-bit. Load the dtv field to RSI_LP instead of rsi.
> This fixes BZ #31184.
> ---
> sysdeps/x86_64/dl-tlsdesc.S | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/sysdeps/x86_64/dl-tlsdesc.S b/sysdeps/x86_64/dl-tlsdesc.S
> index 5593897e29..c4823547d7 100644
> --- a/sysdeps/x86_64/dl-tlsdesc.S
> +++ b/sysdeps/x86_64/dl-tlsdesc.S
> @@ -102,7 +102,7 @@ _dl_tlsdesc_dynamic:
> /* Preserve call-clobbered registers that we modify.
> We need two scratch regs anyway. */
> movq %rsi, -16(%rsp)
> - movq %fs:DTV_OFFSET, %rsi
> + mov %fs:DTV_OFFSET, %RSI_LP
> movq %rdi, -8(%rsp)
> movq TLSDESC_ARG(%rax), %rdi
> movq (%rsi), %rax
> --
> 2.43.0
>
I will check it in tomorrow and back it to release branches later.
Thanks.
--
H.J.
Backported from glibc 2.39 development. --- sysdeps/loongarch/dl-trampoline.h | 68 +++++++++++++++---------------- 1 file changed, 34 insertions(+), 34 deletions(-) diff --git a/sysdeps/loongarch/dl-trampoline.h b/sysdeps/loongarch/dl-trampoline.h index 02375286f8..99fcacab76 100644 --- a/sysdeps/loongarch/dl-trampoline.h +++ b/sysdeps/loongarch/dl-trampoline.h @@ -19,9 +19,9 @@ /* Assembler veneer called from the PLT header code for lazy loading. The PLT header passes its own args in t0-t2. */ #ifdef USE_LASX -# define FRAME_SIZE (-((-9 * SZREG - 8 * SZFREG - 8 * SZXREG) & ALMASK)) +# define FRAME_SIZE (-((-9 * SZREG - 8 * SZXREG) & ALMASK)) #elif defined USE_LSX -# define FRAME_SIZE (-((-9 * SZREG - 8 * SZFREG - 8 * SZVREG) & ALMASK)) +# define FRAME_SIZE (-((-9 * SZREG - 8 * SZVREG) & ALMASK)) #elif !defined __loongarch_soft_float # define FRAME_SIZE (-((-9 * SZREG - 8 * SZFREG) & ALMASK)) #else @@ -44,23 +44,23 @@ ENTRY (_dl_runtime_resolve) REG_S a7, sp, 8*SZREG #ifdef USE_LASX - xvst xr0, sp, 9*SZREG + 8*SZFREG + 0*SZXREG - xvst xr1, sp, 9*SZREG + 8*SZFREG + 1*SZXREG - xvst xr2, sp, 9*SZREG + 8*SZFREG + 2*SZXREG - xvst xr3, sp, 9*SZREG + 8*SZFREG + 3*SZXREG - xvst xr4, sp, 9*SZREG + 8*SZFREG + 4*SZXREG - xvst xr5, sp, 9*SZREG + 8*SZFREG + 5*SZXREG - xvst xr6, sp, 9*SZREG + 8*SZFREG + 6*SZXREG - xvst xr7, sp, 9*SZREG + 8*SZFREG + 7*SZXREG + xvst xr0, sp, 9*SZREG + 0*SZXREG + xvst xr1, sp, 9*SZREG + 1*SZXREG + xvst xr2, sp, 9*SZREG + 2*SZXREG + xvst xr3, sp, 9*SZREG + 3*SZXREG + xvst xr4, sp, 9*SZREG + 4*SZXREG + xvst xr5, sp, 9*SZREG + 5*SZXREG + xvst xr6, sp, 9*SZREG + 6*SZXREG + xvst xr7, sp, 9*SZREG + 7*SZXREG #elif defined USE_LSX - vst vr0, sp, 9*SZREG + 8*SZFREG + 0*SZVREG - vst vr1, sp, 9*SZREG + 8*SZFREG + 1*SZVREG - vst vr2, sp, 9*SZREG + 8*SZFREG + 2*SZVREG - vst vr3, sp, 9*SZREG + 8*SZFREG + 3*SZVREG - vst vr4, sp, 9*SZREG + 8*SZFREG + 4*SZVREG - vst vr5, sp, 9*SZREG + 8*SZFREG + 5*SZVREG - vst vr6, sp, 9*SZREG + 8*SZFREG + 6*SZVREG - vst vr7, sp, 9*SZREG + 8*SZFREG + 7*SZVREG + vst vr0, sp, 9*SZREG + 0*SZVREG + vst vr1, sp, 9*SZREG + 1*SZVREG + vst vr2, sp, 9*SZREG + 2*SZVREG + vst vr3, sp, 9*SZREG + 3*SZVREG + vst vr4, sp, 9*SZREG + 4*SZVREG + vst vr5, sp, 9*SZREG + 5*SZVREG + vst vr6, sp, 9*SZREG + 6*SZVREG + vst vr7, sp, 9*SZREG + 7*SZVREG #elif !defined __loongarch_soft_float FREG_S fa0, sp, 9*SZREG + 0*SZFREG FREG_S fa1, sp, 9*SZREG + 1*SZFREG @@ -92,23 +92,23 @@ ENTRY (_dl_runtime_resolve) REG_L a7, sp, 8*SZREG #ifdef USE_LASX - xvld xr0, sp, 9*SZREG + 8*SZFREG + 0*SZXREG - xvld xr1, sp, 9*SZREG + 8*SZFREG + 1*SZXREG - xvld xr2, sp, 9*SZREG + 8*SZFREG + 2*SZXREG - xvld xr3, sp, 9*SZREG + 8*SZFREG + 3*SZXREG - xvld xr4, sp, 9*SZREG + 8*SZFREG + 4*SZXREG - xvld xr5, sp, 9*SZREG + 8*SZFREG + 5*SZXREG - xvld xr6, sp, 9*SZREG + 8*SZFREG + 6*SZXREG - xvld xr7, sp, 9*SZREG + 8*SZFREG + 7*SZXREG + xvld xr0, sp, 9*SZREG + 0*SZXREG + xvld xr1, sp, 9*SZREG + 1*SZXREG + xvld xr2, sp, 9*SZREG + 2*SZXREG + xvld xr3, sp, 9*SZREG + 3*SZXREG + xvld xr4, sp, 9*SZREG + 4*SZXREG + xvld xr5, sp, 9*SZREG + 5*SZXREG + xvld xr6, sp, 9*SZREG + 6*SZXREG + xvld xr7, sp, 9*SZREG + 7*SZXREG #elif defined USE_LSX - vld vr0, sp, 9*SZREG + 8*SZFREG + 0*SZVREG - vld vr1, sp, 9*SZREG + 8*SZFREG + 1*SZVREG - vld vr2, sp, 9*SZREG + 8*SZFREG + 2*SZVREG - vld vr3, sp, 9*SZREG + 8*SZFREG + 3*SZVREG - vld vr4, sp, 9*SZREG + 8*SZFREG + 4*SZVREG - vld vr5, sp, 9*SZREG + 8*SZFREG + 5*SZVREG - vld vr6, sp, 9*SZREG + 8*SZFREG + 6*SZVREG - vld vr7, sp, 9*SZREG + 8*SZFREG + 7*SZVREG + vld vr0, sp, 9*SZREG + 0*SZVREG + vld vr1, sp, 9*SZREG + 1*SZVREG + vld vr2, sp, 9*SZREG + 2*SZVREG + vld vr3, sp, 9*SZREG + 3*SZVREG + vld vr4, sp, 9*SZREG + 4*SZVREG + vld vr5, sp, 9*SZREG + 5*SZVREG + vld vr6, sp, 9*SZREG + 6*SZVREG + vld vr7, sp, 9*SZREG + 7*SZVREG #elif !defined __loongarch_soft_float FREG_L fa0, sp, 9*SZREG + 0*SZFREG FREG_L fa1, sp, 9*SZREG + 1*SZFREG -- 2.39.3
From: Sergio Durigan Junior <sergiodj@sergiodj.net> When invoking sem_open with O_CREAT as one of its flags, we'll end up in the second part of sem_open's "if ((oflag & O_CREAT) == 0 || (oflag & O_EXCL) == 0)", which means that we don't expect the semaphore file to exist. In that part, open_flags is initialized as "O_RDWR | O_CREAT | O_EXCL | O_CLOEXEC" and there's an attempt to open(2) the file, which will likely fail because it won't exist. After that first (expected) failure, some cleanup is done and we go back to the label "try_again", which lives in the first part of the aforementioned "if". The problem is that, in that part of the code, we expect the semaphore file to exist, and as such O_CREAT (this time the flag we pass to open(2)) needs to be cleaned from open_flags, otherwise we'll see another failure (this time unexpected) when trying to open the file, which will lead the call to sem_open to fail as well. This can cause very strange bugs, especially with OpenMPI, which makes extensive use of semaphores. Fix the bug by simplifying the logic when choosing open(2) flags and making sure O_CREAT is not set when the semaphore file is expected to exist. A regression test for this issue would require a complex and cpu time consuming logic, since to trigger the wrong code path is not straightforward due the racy condition. There is a somewhat reliable reproducer in the bug, but it requires using OpenMPI. This resolves BZ #30789. See also: https://bugs.launchpad.net/ubuntu/+source/h5py/+bug/2031912 Signed-off-by: Sergio Durigan Junior <sergiodj@sergiodj.net> Co-Authored-By: Simon Chopin <simon.chopin@canonical.com> Co-Authored-By: Adhemerval Zanella Netto <adhemerval.zanella@linaro.org> Fixes: 533deafbdf189f5fbb280c28562dd43ace2f4b0f ("Use O_CLOEXEC in more places (BZ #15722)") (cherry picked from commit f957f47df75b9fab995754011491edebc6feb147) --- NEWS | 2 ++ sysdeps/pthread/sem_open.c | 10 ++++------ 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/NEWS b/NEWS index f117874e34..5ac488bf9b 100644 --- a/NEWS +++ b/NEWS @@ -32,6 +32,8 @@ Security related changes: The following bugs are resolved with this release: [30723] posix_memalign repeatedly scans long bin lists + [30789] sem_open will fail on multithreaded scenarios when semaphore + file doesn't exist (O_CREAT) [30804] F_GETLK, F_SETLK, and F_SETLKW value change for powerpc64 with -D_FILE_OFFSET_BITS=64 [30842] Stack read overflow in getaddrinfo in no-aaaa mode (CVE-2023-4527) diff --git a/sysdeps/pthread/sem_open.c b/sysdeps/pthread/sem_open.c index e5db929d20..0e331a7445 100644 --- a/sysdeps/pthread/sem_open.c +++ b/sysdeps/pthread/sem_open.c @@ -32,11 +32,12 @@ # define __unlink unlink #endif +#define SEM_OPEN_FLAGS (O_RDWR | O_NOFOLLOW | O_CLOEXEC) + sem_t * __sem_open (const char *name, int oflag, ...) { int fd; - int open_flags; sem_t *result; /* Check that shared futexes are supported. */ @@ -65,10 +66,8 @@ __sem_open (const char *name, int oflag, ...) /* If the semaphore object has to exist simply open it. */ if ((oflag & O_CREAT) == 0 || (oflag & O_EXCL) == 0) { - open_flags = O_RDWR | O_NOFOLLOW | O_CLOEXEC; - open_flags |= (oflag & ~(O_CREAT|O_ACCMODE)); try_again: - fd = __open (dirname.name, open_flags); + fd = __open (dirname.name, (oflag & O_EXCL) | SEM_OPEN_FLAGS); if (fd == -1) { @@ -135,8 +134,7 @@ __sem_open (const char *name, int oflag, ...) } /* Open the file. Make sure we do not overwrite anything. */ - open_flags = O_RDWR | O_CREAT | O_EXCL | O_CLOEXEC; - fd = __open (tmpfname, open_flags, mode); + fd = __open (tmpfname, O_CREAT | O_EXCL | SEM_OPEN_FLAGS, mode); if (fd == -1) { if (errno == EEXIST) -- 2.42.0
The string parsing routine may end up writing beyond bounds of tunestr if the input tunable string is malformed, of the form name=name=val. This gets processed twice, first as name=name=val and next as name=val, resulting in tunestr being name=name=val:name=val, thus overflowing tunestr. Terminate the parsing loop at the first instance itself so that tunestr does not overflow. This also fixes up tst-env-setuid-tunables to actually handle failures correct and add new tests to validate the fix for this CVE. Signed-off-by: Siddhesh Poyarekar <siddhesh@sourceware.org> Reviewed-by: Carlos O'Donell <carlos@redhat.com> (cherry picked from commit 1056e5b4c3f2d90ed2b4a55f96add28da2f4c8fa) --- NEWS | 5 +++++ elf/dl-tunables.c | 17 +++++++++------- elf/tst-env-setuid-tunables.c | 37 +++++++++++++++++++++++++++-------- 3 files changed, 44 insertions(+), 15 deletions(-) diff --git a/NEWS b/NEWS index e16f9968f3..625780a01a 100644 --- a/NEWS +++ b/NEWS @@ -55,6 +55,11 @@ Security related changes: an application calls getaddrinfo for AF_INET6 with AI_CANONNAME, AI_ALL and AI_V4MAPPED flags set. + CVE-2023-4911: If a tunable of the form NAME=NAME=VAL is passed in the + environment of a setuid program and NAME is valid, it may result in a + buffer overflow, which could be exploited to achieve escalated + privileges. This flaw was introduced in glibc 2.34. + The following bugs are resolved with this release: [11053] regex: Wrong results with backreferences diff --git a/elf/dl-tunables.c b/elf/dl-tunables.c index 8009e54ee5..837474b504 100644 --- a/elf/dl-tunables.c +++ b/elf/dl-tunables.c @@ -188,11 +188,7 @@ parse_tunables (char *tunestr, char *valstring) /* If we reach the end of the string before getting a valid name-value pair, bail out. */ if (p[len] == '\0') - { - if (__libc_enable_secure) - tunestr[off] = '\0'; - return; - } + break; /* We did not find a valid name-value pair before encountering the colon. */ @@ -252,9 +248,16 @@ parse_tunables (char *tunestr, char *valstring) } } - if (p[len] != '\0') - p += len + 1; + /* We reached the end while processing the tunable string. */ + if (p[len] == '\0') + break; + + p += len + 1; } + + /* Terminate tunestr before we leave. */ + if (__libc_enable_secure) + tunestr[off] = '\0'; } #endif diff --git a/elf/tst-env-setuid-tunables.c b/elf/tst-env-setuid-tunables.c index 05619c9adc..cd4e843640 100644 --- a/elf/tst-env-setuid-tunables.c +++ b/elf/tst-env-setuid-tunables.c @@ -52,6 +52,8 @@ const char *teststrings[] = "glibc.malloc.perturb=0x800:not_valid.malloc.check=2:glibc.malloc.mmap_threshold=4096", "glibc.not_valid.check=2:glibc.malloc.mmap_threshold=4096", "not_valid.malloc.check=2:glibc.malloc.mmap_threshold=4096", + "glibc.malloc.mmap_threshold=glibc.malloc.mmap_threshold=4096", + "glibc.malloc.check=2", "glibc.malloc.garbage=2:glibc.maoc.mmap_threshold=4096:glibc.malloc.check=2", "glibc.malloc.check=4:glibc.malloc.garbage=2:glibc.maoc.mmap_threshold=4096", ":glibc.malloc.garbage=2:glibc.malloc.check=1", @@ -70,6 +72,8 @@ const char *resultstrings[] = "glibc.malloc.perturb=0x800:glibc.malloc.mmap_threshold=4096", "glibc.malloc.mmap_threshold=4096", "glibc.malloc.mmap_threshold=4096", + "glibc.malloc.mmap_threshold=glibc.malloc.mmap_threshold=4096", + "", "", "", "", @@ -84,11 +88,18 @@ test_child (int off) const char *val = getenv ("GLIBC_TUNABLES"); #if HAVE_TUNABLES + printf (" [%d] GLIBC_TUNABLES is %s\n", off, val); + fflush (stdout); if (val != NULL && strcmp (val, resultstrings[off]) == 0) return 0; if (val != NULL) - printf ("[%d] Unexpected GLIBC_TUNABLES VALUE %s\n", off, val); + printf (" [%d] Unexpected GLIBC_TUNABLES VALUE %s, expected %s\n", + off, val, resultstrings[off]); + else + printf (" [%d] GLIBC_TUNABLES environment variable absent\n", off); + + fflush (stdout); return 1; #else @@ -117,21 +128,26 @@ do_test (int argc, char **argv) if (ret != 0) exit (1); - exit (EXIT_SUCCESS); + /* Special return code to make sure that the child executed all the way + through. */ + exit (42); } else { - int ret = 0; - /* Spawn tests. */ for (int i = 0; i < array_length (teststrings); i++) { char buf[INT_BUFSIZE_BOUND (int)]; - printf ("Spawned test for %s (%d)\n", teststrings[i], i); + printf ("[%d] Spawned test for %s\n", i, teststrings[i]); snprintf (buf, sizeof (buf), "%d\n", i); + fflush (stdout); if (setenv ("GLIBC_TUNABLES", teststrings[i], 1) != 0) - exit (1); + { + printf (" [%d] Failed to set GLIBC_TUNABLES: %m", i); + support_record_failure (); + continue; + } int status = support_capture_subprogram_self_sgid (buf); @@ -139,9 +155,14 @@ do_test (int argc, char **argv) if (WEXITSTATUS (status) == EXIT_UNSUPPORTED) return EXIT_UNSUPPORTED; - ret |= status; + if (WEXITSTATUS (status) != 42) + { + printf (" [%d] child failed with status %d\n", i, + WEXITSTATUS (status)); + support_record_failure (); + } } - return ret; + return 0; } } -- 2.41.0
The string parsing routine may end up writing beyond bounds of tunestr if the input tunable string is malformed, of the form name=name=val. This gets processed twice, first as name=name=val and next as name=val, resulting in tunestr being name=name=val:name=val, thus overflowing tunestr. Terminate the parsing loop at the first instance itself so that tunestr does not overflow. This also fixes up tst-env-setuid-tunables to actually handle failures correct and add new tests to validate the fix for this CVE. Signed-off-by: Siddhesh Poyarekar <siddhesh@sourceware.org> Reviewed-by: Carlos O'Donell <carlos@redhat.com> (cherry picked from commit 1056e5b4c3f2d90ed2b4a55f96add28da2f4c8fa) --- NEWS | 5 +++++ elf/dl-tunables.c | 17 +++++++++------- elf/tst-env-setuid-tunables.c | 37 +++++++++++++++++++++++++++-------- 3 files changed, 44 insertions(+), 15 deletions(-) diff --git a/NEWS b/NEWS index ae55ffb53a..5358e0cbe3 100644 --- a/NEWS +++ b/NEWS @@ -36,6 +36,11 @@ Security related changes: an application calls getaddrinfo for AF_INET6 with AI_CANONNAME, AI_ALL and AI_V4MAPPED flags set. + CVE-2023-4911: If a tunable of the form NAME=NAME=VAL is passed in the + environment of a setuid program and NAME is valid, it may result in a + buffer overflow, which could be exploited to achieve escalated + privileges. This flaw was introduced in glibc 2.34. + The following bugs are resolved with this release: [12154] Do not fail DNS resolution for CNAMEs which are not host names diff --git a/elf/dl-tunables.c b/elf/dl-tunables.c index 8e7ee9df10..76cf8b9da3 100644 --- a/elf/dl-tunables.c +++ b/elf/dl-tunables.c @@ -187,11 +187,7 @@ parse_tunables (char *tunestr, char *valstring) /* If we reach the end of the string before getting a valid name-value pair, bail out. */ if (p[len] == '\0') - { - if (__libc_enable_secure) - tunestr[off] = '\0'; - return; - } + break; /* We did not find a valid name-value pair before encountering the colon. */ @@ -251,9 +247,16 @@ parse_tunables (char *tunestr, char *valstring) } } - if (p[len] != '\0') - p += len + 1; + /* We reached the end while processing the tunable string. */ + if (p[len] == '\0') + break; + + p += len + 1; } + + /* Terminate tunestr before we leave. */ + if (__libc_enable_secure) + tunestr[off] = '\0'; } #endif diff --git a/elf/tst-env-setuid-tunables.c b/elf/tst-env-setuid-tunables.c index 88182b7b25..5e9e4c5756 100644 --- a/elf/tst-env-setuid-tunables.c +++ b/elf/tst-env-setuid-tunables.c @@ -52,6 +52,8 @@ const char *teststrings[] = "glibc.malloc.perturb=0x800:not_valid.malloc.check=2:glibc.malloc.mmap_threshold=4096", "glibc.not_valid.check=2:glibc.malloc.mmap_threshold=4096", "not_valid.malloc.check=2:glibc.malloc.mmap_threshold=4096", + "glibc.malloc.mmap_threshold=glibc.malloc.mmap_threshold=4096", + "glibc.malloc.check=2", "glibc.malloc.garbage=2:glibc.maoc.mmap_threshold=4096:glibc.malloc.check=2", "glibc.malloc.check=4:glibc.malloc.garbage=2:glibc.maoc.mmap_threshold=4096", ":glibc.malloc.garbage=2:glibc.malloc.check=1", @@ -70,6 +72,8 @@ const char *resultstrings[] = "glibc.malloc.perturb=0x800:glibc.malloc.mmap_threshold=4096", "glibc.malloc.mmap_threshold=4096", "glibc.malloc.mmap_threshold=4096", + "glibc.malloc.mmap_threshold=glibc.malloc.mmap_threshold=4096", + "", "", "", "", @@ -84,11 +88,18 @@ test_child (int off) const char *val = getenv ("GLIBC_TUNABLES"); #if HAVE_TUNABLES + printf (" [%d] GLIBC_TUNABLES is %s\n", off, val); + fflush (stdout); if (val != NULL && strcmp (val, resultstrings[off]) == 0) return 0; if (val != NULL) - printf ("[%d] Unexpected GLIBC_TUNABLES VALUE %s\n", off, val); + printf (" [%d] Unexpected GLIBC_TUNABLES VALUE %s, expected %s\n", + off, val, resultstrings[off]); + else + printf (" [%d] GLIBC_TUNABLES environment variable absent\n", off); + + fflush (stdout); return 1; #else @@ -117,21 +128,26 @@ do_test (int argc, char **argv) if (ret != 0) exit (1); - exit (EXIT_SUCCESS); + /* Special return code to make sure that the child executed all the way + through. */ + exit (42); } else { - int ret = 0; - /* Spawn tests. */ for (int i = 0; i < array_length (teststrings); i++) { char buf[INT_BUFSIZE_BOUND (int)]; - printf ("Spawned test for %s (%d)\n", teststrings[i], i); + printf ("[%d] Spawned test for %s\n", i, teststrings[i]); snprintf (buf, sizeof (buf), "%d\n", i); + fflush (stdout); if (setenv ("GLIBC_TUNABLES", teststrings[i], 1) != 0) - exit (1); + { + printf (" [%d] Failed to set GLIBC_TUNABLES: %m", i); + support_record_failure (); + continue; + } int status = support_capture_subprogram_self_sgid (buf); @@ -139,9 +155,14 @@ do_test (int argc, char **argv) if (WEXITSTATUS (status) == EXIT_UNSUPPORTED) return EXIT_UNSUPPORTED; - ret |= status; + if (WEXITSTATUS (status) != 42) + { + printf (" [%d] child failed with status %d\n", i, + WEXITSTATUS (status)); + support_record_failure (); + } } - return ret; + return 0; } } -- 2.41.0
The string parsing routine may end up writing beyond bounds of tunestr if the input tunable string is malformed, of the form name=name=val. This gets processed twice, first as name=name=val and next as name=val, resulting in tunestr being name=name=val:name=val, thus overflowing tunestr. Terminate the parsing loop at the first instance itself so that tunestr does not overflow. This also fixes up tst-env-setuid-tunables to actually handle failures correct and add new tests to validate the fix for this CVE. Signed-off-by: Siddhesh Poyarekar <siddhesh@sourceware.org> Reviewed-by: Carlos O'Donell <carlos@redhat.com> (cherry picked from commit 1056e5b4c3f2d90ed2b4a55f96add28da2f4c8fa) --- NEWS | 5 +++++ elf/dl-tunables.c | 17 +++++++++------- elf/tst-env-setuid-tunables.c | 37 +++++++++++++++++++++++++++-------- 3 files changed, 44 insertions(+), 15 deletions(-) diff --git a/NEWS b/NEWS index f1b1b0a3b4..bfcd46efa9 100644 --- a/NEWS +++ b/NEWS @@ -24,6 +24,11 @@ Security related changes: an application calls getaddrinfo for AF_INET6 with AI_CANONNAME, AI_ALL and AI_V4MAPPED flags set. + CVE-2023-4911: If a tunable of the form NAME=NAME=VAL is passed in the + environment of a setuid program and NAME is valid, it may result in a + buffer overflow, which could be exploited to achieve escalated + privileges. This flaw was introduced in glibc 2.34. + The following bugs are resolved with this release: [30723] posix_memalign repeatedly scans long bin lists diff --git a/elf/dl-tunables.c b/elf/dl-tunables.c index 62b7332d95..cae67efa0a 100644 --- a/elf/dl-tunables.c +++ b/elf/dl-tunables.c @@ -180,11 +180,7 @@ parse_tunables (char *tunestr, char *valstring) /* If we reach the end of the string before getting a valid name-value pair, bail out. */ if (p[len] == '\0') - { - if (__libc_enable_secure) - tunestr[off] = '\0'; - return; - } + break; /* We did not find a valid name-value pair before encountering the colon. */ @@ -244,9 +240,16 @@ parse_tunables (char *tunestr, char *valstring) } } - if (p[len] != '\0') - p += len + 1; + /* We reached the end while processing the tunable string. */ + if (p[len] == '\0') + break; + + p += len + 1; } + + /* Terminate tunestr before we leave. */ + if (__libc_enable_secure) + tunestr[off] = '\0'; } /* Enable the glibc.malloc.check tunable in SETUID/SETGID programs only when diff --git a/elf/tst-env-setuid-tunables.c b/elf/tst-env-setuid-tunables.c index 7dfb0e073a..f0b92c97e7 100644 --- a/elf/tst-env-setuid-tunables.c +++ b/elf/tst-env-setuid-tunables.c @@ -50,6 +50,8 @@ const char *teststrings[] = "glibc.malloc.perturb=0x800:not_valid.malloc.check=2:glibc.malloc.mmap_threshold=4096", "glibc.not_valid.check=2:glibc.malloc.mmap_threshold=4096", "not_valid.malloc.check=2:glibc.malloc.mmap_threshold=4096", + "glibc.malloc.mmap_threshold=glibc.malloc.mmap_threshold=4096", + "glibc.malloc.check=2", "glibc.malloc.garbage=2:glibc.maoc.mmap_threshold=4096:glibc.malloc.check=2", "glibc.malloc.check=4:glibc.malloc.garbage=2:glibc.maoc.mmap_threshold=4096", ":glibc.malloc.garbage=2:glibc.malloc.check=1", @@ -68,6 +70,8 @@ const char *resultstrings[] = "glibc.malloc.perturb=0x800:glibc.malloc.mmap_threshold=4096", "glibc.malloc.mmap_threshold=4096", "glibc.malloc.mmap_threshold=4096", + "glibc.malloc.mmap_threshold=glibc.malloc.mmap_threshold=4096", + "", "", "", "", @@ -81,11 +85,18 @@ test_child (int off) { const char *val = getenv ("GLIBC_TUNABLES"); + printf (" [%d] GLIBC_TUNABLES is %s\n", off, val); + fflush (stdout); if (val != NULL && strcmp (val, resultstrings[off]) == 0) return 0; if (val != NULL) - printf ("[%d] Unexpected GLIBC_TUNABLES VALUE %s\n", off, val); + printf (" [%d] Unexpected GLIBC_TUNABLES VALUE %s, expected %s\n", + off, val, resultstrings[off]); + else + printf (" [%d] GLIBC_TUNABLES environment variable absent\n", off); + + fflush (stdout); return 1; } @@ -106,21 +117,26 @@ do_test (int argc, char **argv) if (ret != 0) exit (1); - exit (EXIT_SUCCESS); + /* Special return code to make sure that the child executed all the way + through. */ + exit (42); } else { - int ret = 0; - /* Spawn tests. */ for (int i = 0; i < array_length (teststrings); i++) { char buf[INT_BUFSIZE_BOUND (int)]; - printf ("Spawned test for %s (%d)\n", teststrings[i], i); + printf ("[%d] Spawned test for %s\n", i, teststrings[i]); snprintf (buf, sizeof (buf), "%d\n", i); + fflush (stdout); if (setenv ("GLIBC_TUNABLES", teststrings[i], 1) != 0) - exit (1); + { + printf (" [%d] Failed to set GLIBC_TUNABLES: %m", i); + support_record_failure (); + continue; + } int status = support_capture_subprogram_self_sgid (buf); @@ -128,9 +144,14 @@ do_test (int argc, char **argv) if (WEXITSTATUS (status) == EXIT_UNSUPPORTED) return EXIT_UNSUPPORTED; - ret |= status; + if (WEXITSTATUS (status) != 42) + { + printf (" [%d] child failed with status %d\n", i, + WEXITSTATUS (status)); + support_record_failure (); + } } - return ret; + return 0; } } -- 2.41.0
The string parsing routine may end up writing beyond bounds of tunestr if the input tunable string is malformed, of the form name=name=val. This gets processed twice, first as name=name=val and next as name=val, resulting in tunestr being name=name=val:name=val, thus overflowing tunestr. Terminate the parsing loop at the first instance itself so that tunestr does not overflow. This also fixes up tst-env-setuid-tunables to actually handle failures correct and add new tests to validate the fix for this CVE. Signed-off-by: Siddhesh Poyarekar <siddhesh@sourceware.org> Reviewed-by: Carlos O'Donell <carlos@redhat.com> (cherry picked from commit 1056e5b4c3f2d90ed2b4a55f96add28da2f4c8fa) --- NEWS | 5 +++++ elf/dl-tunables.c | 17 +++++++++------- elf/tst-env-setuid-tunables.c | 37 +++++++++++++++++++++++++++-------- 3 files changed, 44 insertions(+), 15 deletions(-) diff --git a/NEWS b/NEWS index 3725cc4820..b351537a78 100644 --- a/NEWS +++ b/NEWS @@ -31,6 +31,11 @@ Security related changes: an application calls getaddrinfo for AF_INET6 with AI_CANONNAME, AI_ALL and AI_V4MAPPED flags set. + CVE-2023-4911: If a tunable of the form NAME=NAME=VAL is passed in the + environment of a setuid program and NAME is valid, it may result in a + buffer overflow, which could be exploited to achieve escalated + privileges. This flaw was introduced in glibc 2.34. + The following bugs are resolved with this release: [20975] Deferred cancellation triggers in __check_pf and looses lock leading to deadlock diff --git a/elf/dl-tunables.c b/elf/dl-tunables.c index 327b9eb52f..985b69c180 100644 --- a/elf/dl-tunables.c +++ b/elf/dl-tunables.c @@ -187,11 +187,7 @@ parse_tunables (char *tunestr, char *valstring) /* If we reach the end of the string before getting a valid name-value pair, bail out. */ if (p[len] == '\0') - { - if (__libc_enable_secure) - tunestr[off] = '\0'; - return; - } + break; /* We did not find a valid name-value pair before encountering the colon. */ @@ -251,9 +247,16 @@ parse_tunables (char *tunestr, char *valstring) } } - if (p[len] != '\0') - p += len + 1; + /* We reached the end while processing the tunable string. */ + if (p[len] == '\0') + break; + + p += len + 1; } + + /* Terminate tunestr before we leave. */ + if (__libc_enable_secure) + tunestr[off] = '\0'; } #endif diff --git a/elf/tst-env-setuid-tunables.c b/elf/tst-env-setuid-tunables.c index 807b426012..1f5e7f4f06 100644 --- a/elf/tst-env-setuid-tunables.c +++ b/elf/tst-env-setuid-tunables.c @@ -52,6 +52,8 @@ const char *teststrings[] = "glibc.malloc.perturb=0x800:not_valid.malloc.check=2:glibc.malloc.mmap_threshold=4096", "glibc.not_valid.check=2:glibc.malloc.mmap_threshold=4096", "not_valid.malloc.check=2:glibc.malloc.mmap_threshold=4096", + "glibc.malloc.mmap_threshold=glibc.malloc.mmap_threshold=4096", + "glibc.malloc.check=2", "glibc.malloc.garbage=2:glibc.maoc.mmap_threshold=4096:glibc.malloc.check=2", "glibc.malloc.check=4:glibc.malloc.garbage=2:glibc.maoc.mmap_threshold=4096", ":glibc.malloc.garbage=2:glibc.malloc.check=1", @@ -70,6 +72,8 @@ const char *resultstrings[] = "glibc.malloc.perturb=0x800:glibc.malloc.mmap_threshold=4096", "glibc.malloc.mmap_threshold=4096", "glibc.malloc.mmap_threshold=4096", + "glibc.malloc.mmap_threshold=glibc.malloc.mmap_threshold=4096", + "", "", "", "", @@ -84,11 +88,18 @@ test_child (int off) const char *val = getenv ("GLIBC_TUNABLES"); #if HAVE_TUNABLES + printf (" [%d] GLIBC_TUNABLES is %s\n", off, val); + fflush (stdout); if (val != NULL && strcmp (val, resultstrings[off]) == 0) return 0; if (val != NULL) - printf ("[%d] Unexpected GLIBC_TUNABLES VALUE %s\n", off, val); + printf (" [%d] Unexpected GLIBC_TUNABLES VALUE %s, expected %s\n", + off, val, resultstrings[off]); + else + printf (" [%d] GLIBC_TUNABLES environment variable absent\n", off); + + fflush (stdout); return 1; #else @@ -117,21 +128,26 @@ do_test (int argc, char **argv) if (ret != 0) exit (1); - exit (EXIT_SUCCESS); + /* Special return code to make sure that the child executed all the way + through. */ + exit (42); } else { - int ret = 0; - /* Spawn tests. */ for (int i = 0; i < array_length (teststrings); i++) { char buf[INT_BUFSIZE_BOUND (int)]; - printf ("Spawned test for %s (%d)\n", teststrings[i], i); + printf ("[%d] Spawned test for %s\n", i, teststrings[i]); snprintf (buf, sizeof (buf), "%d\n", i); + fflush (stdout); if (setenv ("GLIBC_TUNABLES", teststrings[i], 1) != 0) - exit (1); + { + printf (" [%d] Failed to set GLIBC_TUNABLES: %m", i); + support_record_failure (); + continue; + } int status = support_capture_subprogram_self_sgid (buf); @@ -139,9 +155,14 @@ do_test (int argc, char **argv) if (WEXITSTATUS (status) == EXIT_UNSUPPORTED) return EXIT_UNSUPPORTED; - ret |= status; + if (WEXITSTATUS (status) != 42) + { + printf (" [%d] child failed with status %d\n", i, + WEXITSTATUS (status)); + support_record_failure (); + } } - return ret; + return 0; } } -- 2.41.0
The string parsing routine may end up writing beyond bounds of tunestr if the input tunable string is malformed, of the form name=name=val. This gets processed twice, first as name=name=val and next as name=val, resulting in tunestr being name=name=val:name=val, thus overflowing tunestr. Terminate the parsing loop at the first instance itself so that tunestr does not overflow. This also fixes up tst-env-setuid-tunables to actually handle failures correct and add new tests to validate the fix for this CVE. Signed-off-by: Siddhesh Poyarekar <siddhesh@sourceware.org> Reviewed-by: Carlos O'Donell <carlos@redhat.com> (cherry picked from commit 1056e5b4c3f2d90ed2b4a55f96add28da2f4c8fa) --- NEWS | 5 +++++ elf/dl-tunables.c | 17 +++++++++------- elf/tst-env-setuid-tunables.c | 37 +++++++++++++++++++++++++++-------- 3 files changed, 44 insertions(+), 15 deletions(-) diff --git a/NEWS b/NEWS index 36da15a922..be1c4ffaee 100644 --- a/NEWS +++ b/NEWS @@ -97,6 +97,11 @@ Security related changes: an application calls getaddrinfo for AF_INET6 with AI_CANONNAME, AI_ALL and AI_V4MAPPED flags set. + CVE-2023-4911: If a tunable of the form NAME=NAME=VAL is passed in the + environment of a setuid program and NAME is valid, it may result in a + buffer overflow, which could be exploited to achieve escalated + privileges. This flaw was introduced in glibc 2.34. + \f Version 2.35 diff --git a/elf/dl-tunables.c b/elf/dl-tunables.c index 8e7ee9df10..76cf8b9da3 100644 --- a/elf/dl-tunables.c +++ b/elf/dl-tunables.c @@ -187,11 +187,7 @@ parse_tunables (char *tunestr, char *valstring) /* If we reach the end of the string before getting a valid name-value pair, bail out. */ if (p[len] == '\0') - { - if (__libc_enable_secure) - tunestr[off] = '\0'; - return; - } + break; /* We did not find a valid name-value pair before encountering the colon. */ @@ -251,9 +247,16 @@ parse_tunables (char *tunestr, char *valstring) } } - if (p[len] != '\0') - p += len + 1; + /* We reached the end while processing the tunable string. */ + if (p[len] == '\0') + break; + + p += len + 1; } + + /* Terminate tunestr before we leave. */ + if (__libc_enable_secure) + tunestr[off] = '\0'; } #endif diff --git a/elf/tst-env-setuid-tunables.c b/elf/tst-env-setuid-tunables.c index 88182b7b25..5e9e4c5756 100644 --- a/elf/tst-env-setuid-tunables.c +++ b/elf/tst-env-setuid-tunables.c @@ -52,6 +52,8 @@ const char *teststrings[] = "glibc.malloc.perturb=0x800:not_valid.malloc.check=2:glibc.malloc.mmap_threshold=4096", "glibc.not_valid.check=2:glibc.malloc.mmap_threshold=4096", "not_valid.malloc.check=2:glibc.malloc.mmap_threshold=4096", + "glibc.malloc.mmap_threshold=glibc.malloc.mmap_threshold=4096", + "glibc.malloc.check=2", "glibc.malloc.garbage=2:glibc.maoc.mmap_threshold=4096:glibc.malloc.check=2", "glibc.malloc.check=4:glibc.malloc.garbage=2:glibc.maoc.mmap_threshold=4096", ":glibc.malloc.garbage=2:glibc.malloc.check=1", @@ -70,6 +72,8 @@ const char *resultstrings[] = "glibc.malloc.perturb=0x800:glibc.malloc.mmap_threshold=4096", "glibc.malloc.mmap_threshold=4096", "glibc.malloc.mmap_threshold=4096", + "glibc.malloc.mmap_threshold=glibc.malloc.mmap_threshold=4096", + "", "", "", "", @@ -84,11 +88,18 @@ test_child (int off) const char *val = getenv ("GLIBC_TUNABLES"); #if HAVE_TUNABLES + printf (" [%d] GLIBC_TUNABLES is %s\n", off, val); + fflush (stdout); if (val != NULL && strcmp (val, resultstrings[off]) == 0) return 0; if (val != NULL) - printf ("[%d] Unexpected GLIBC_TUNABLES VALUE %s\n", off, val); + printf (" [%d] Unexpected GLIBC_TUNABLES VALUE %s, expected %s\n", + off, val, resultstrings[off]); + else + printf (" [%d] GLIBC_TUNABLES environment variable absent\n", off); + + fflush (stdout); return 1; #else @@ -117,21 +128,26 @@ do_test (int argc, char **argv) if (ret != 0) exit (1); - exit (EXIT_SUCCESS); + /* Special return code to make sure that the child executed all the way + through. */ + exit (42); } else { - int ret = 0; - /* Spawn tests. */ for (int i = 0; i < array_length (teststrings); i++) { char buf[INT_BUFSIZE_BOUND (int)]; - printf ("Spawned test for %s (%d)\n", teststrings[i], i); + printf ("[%d] Spawned test for %s\n", i, teststrings[i]); snprintf (buf, sizeof (buf), "%d\n", i); + fflush (stdout); if (setenv ("GLIBC_TUNABLES", teststrings[i], 1) != 0) - exit (1); + { + printf (" [%d] Failed to set GLIBC_TUNABLES: %m", i); + support_record_failure (); + continue; + } int status = support_capture_subprogram_self_sgid (buf); @@ -139,9 +155,14 @@ do_test (int argc, char **argv) if (WEXITSTATUS (status) == EXIT_UNSUPPORTED) return EXIT_UNSUPPORTED; - ret |= status; + if (WEXITSTATUS (status) != 42) + { + printf (" [%d] child failed with status %d\n", i, + WEXITSTATUS (status)); + support_record_failure (); + } } - return ret; + return 0; } } -- 2.41.0
GLIBC_TUNABLES scrubbing happens earlier than envvar scrubbing and some tunables are required to propagate past setxid boundary, like their env_alias. Rely on tunable scrubbing to clean out GLIBC_TUNABLES like before, restoring behaviour in glibc 2.37 and earlier. Signed-off-by: Siddhesh Poyarekar <siddhesh@sourceware.org> Reviewed-by: Carlos O'Donell <carlos@redhat.com> (cherry picked from commit 0d5f9ea97f1b39f2a855756078771673a68497e1) --- sysdeps/generic/unsecvars.h | 1 - 1 file changed, 1 deletion(-) diff --git a/sysdeps/generic/unsecvars.h b/sysdeps/generic/unsecvars.h index 81397fb90b..8278c50a84 100644 --- a/sysdeps/generic/unsecvars.h +++ b/sysdeps/generic/unsecvars.h @@ -4,7 +4,6 @@ #define UNSECURE_ENVVARS \ "GCONV_PATH\0" \ "GETCONF_DIR\0" \ - "GLIBC_TUNABLES\0" \ "HOSTALIASES\0" \ "LD_AUDIT\0" \ "LD_DEBUG\0" \ -- 2.41.0
Backport fixes for privilege escalation due to buffer overwrite in tunables processing. Thanks, Sid -- 2.41.0
Linux dilfridge-amd64-stable 6.1.41-gentoo-dist #1 SMP PREEMPT_DYNAMIC Tue Jul 25 09:26:34 -00 2023 x86_64 AMD Ryzen 7 3700X 8-Core Processor AuthenticAMD GNU/Linux 32bit build on x86-64 Signed-off-by: Andreas K. Hüttel <dilfridge@gentoo.org> --- sysdeps/i386/i686/fpu/multiarch/libm-test-ulps | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps b/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps index 8705a7822c..6f50cec761 100644 --- a/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps +++ b/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps @@ -1617,7 +1617,7 @@ ldouble: 5 Function: "y0_towardzero": double: 4 -float: 8 +float: 9 float128: 3 ldouble: 8 -- 2.41.0
These are tracked in BZ #30884 and BZ #30843. Signed-off-by: Siddhesh Poyarekar <siddhesh@sourceware.org> (cherry picked from commit fd134feba35fa839018965733b34d28a09a075dd) --- NEWS | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/NEWS b/NEWS index dfee278a9c..f1b1b0a3b4 100644 --- a/NEWS +++ b/NEWS @@ -15,6 +15,15 @@ Security related changes: 2048 bytes, getaddrinfo may potentially disclose stack contents via the returned address data, or crash. + CVE-2023-4806: When an NSS plugin only implements the + _gethostbyname2_r and _getcanonname_r callbacks, getaddrinfo could use + memory that was freed during buffer resizing, potentially causing a + crash or read or write to arbitrary memory. + + CVE-2023-5156: The fix for CVE-2023-4806 introduced a memory leak when + an application calls getaddrinfo for AF_INET6 with AI_CANONNAME, + AI_ALL and AI_V4MAPPED flags set. + The following bugs are resolved with this release: [30723] posix_memalign repeatedly scans long bin lists -- 2.41.0
These are tracked in BZ #30884 and BZ #30843. Signed-off-by: Siddhesh Poyarekar <siddhesh@sourceware.org> (cherry picked from commit fd134feba35fa839018965733b34d28a09a075dd) --- NEWS | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/NEWS b/NEWS index d6cd902bd3..3725cc4820 100644 --- a/NEWS +++ b/NEWS @@ -22,6 +22,15 @@ Security related changes: 2048 bytes, getaddrinfo may potentially disclose stack contents via the returned address data, or crash. + CVE-2023-4806: When an NSS plugin only implements the + _gethostbyname2_r and _getcanonname_r callbacks, getaddrinfo could use + memory that was freed during buffer resizing, potentially causing a + crash or read or write to arbitrary memory. + + CVE-2023-5156: The fix for CVE-2023-4806 introduced a memory leak when + an application calls getaddrinfo for AF_INET6 with AI_CANONNAME, + AI_ALL and AI_V4MAPPED flags set. + The following bugs are resolved with this release: [20975] Deferred cancellation triggers in __check_pf and looses lock leading to deadlock -- 2.41.0
These are tracked in BZ #30884 and BZ #30843. Signed-off-by: Siddhesh Poyarekar <siddhesh@sourceware.org> (cherry picked from commit fd134feba35fa839018965733b34d28a09a075dd) --- NEWS | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/NEWS b/NEWS index 94adcee263..36da15a922 100644 --- a/NEWS +++ b/NEWS @@ -86,6 +86,17 @@ The following bugs are resolved with this release: [30804] F_GETLK, F_SETLK, and F_SETLKW value change for powerpc64 with -D_FILE_OFFSET_BITS=64 +Security related changes: + + CVE-2023-4806: When an NSS plugin only implements the + _gethostbyname2_r and _getcanonname_r callbacks, getaddrinfo could use + memory that was freed during buffer resizing, potentially causing a + crash or read or write to arbitrary memory. + + CVE-2023-5156: The fix for CVE-2023-4806 introduced a memory leak when + an application calls getaddrinfo for AF_INET6 with AI_CANONNAME, + AI_ALL and AI_V4MAPPED flags set. + \f Version 2.35 -- 2.41.0