public inbox for libc-alpha@sourceware.org
 help / color / mirror / Atom feed
* [PATCH v2] AArch64: Check kernel version for SVE ifuncs
@ 2024-03-18 14:14 Wilco Dijkstra
  2024-03-20 15:05 ` Szabolcs Nagy
  2024-10-07 18:14 ` Aurelien Jarno
  0 siblings, 2 replies; 9+ messages in thread
From: Wilco Dijkstra @ 2024-03-18 14:14 UTC (permalink / raw)
  To: 'GNU C Library'; +Cc: Florian Weimer, Adhemerval Zanella, Szabolcs Nagy


v2: Add __LINUX_KERNEL_VERSION #ifdefs, improve parser, use 8:8:8 format.

Old Linux kernels disable SVE after every system call.  Calling the
SVE-optimized memcpy afterwards will then cause a trap to reenable SVE.
As a result, applications with a high use of syscalls may run slower with
the SVE memcpy.  This is true for kernels between 4.15.0 and before 6.2.0,
except for 5.14.0 which was patched.  Avoid this by checking the kernel
version and selecting the SVE ifunc on modern kernels.

Parse the kernel version reported by uname() into a 24-bit kernel.major.minor
value without calling any library functions.  If uname() is not supported or
if the version format is not recognized, assume the kernel is modern.

Passes regress, OK for commit?

---

diff --git a/sysdeps/aarch64/cpu-features.h b/sysdeps/aarch64/cpu-features.h
index 77a782422af1b6e4b2af32bfebfda37874111510..5f2da91ebbd0adafb0d84ec503b0f902f566da5a 100644
--- a/sysdeps/aarch64/cpu-features.h
+++ b/sysdeps/aarch64/cpu-features.h
@@ -71,6 +71,7 @@ struct cpu_features
   /* Currently, the GLIBC memory tagging tunable only defines 8 bits.  */
   uint8_t mte_state;
   bool sve;
+  bool prefer_sve_ifuncs;
   bool mops;
 };
 
diff --git a/sysdeps/aarch64/multiarch/init-arch.h b/sysdeps/aarch64/multiarch/init-arch.h
index c52860efb22d70eb4bdf356781f51c7de8ec67dc..61dc40088f4d9e5e06b57bdc7d54bde1e2a686a4 100644
--- a/sysdeps/aarch64/multiarch/init-arch.h
+++ b/sysdeps/aarch64/multiarch/init-arch.h
@@ -36,5 +36,7 @@
     MTE_ENABLED ();							      \
   bool __attribute__((unused)) sve =					      \
     GLRO(dl_aarch64_cpu_features).sve;					      \
+  bool __attribute__((unused)) prefer_sve_ifuncs =			      \
+    GLRO(dl_aarch64_cpu_features).prefer_sve_ifuncs;			      \
   bool __attribute__((unused)) mops =					      \
     GLRO(dl_aarch64_cpu_features).mops;
diff --git a/sysdeps/aarch64/multiarch/memcpy.c b/sysdeps/aarch64/multiarch/memcpy.c
index d12eccfca51f4bcfef6ccf5aa286edb301e361ac..ce53567dab33c2f00b89b4069235abd4651811a6 100644
--- a/sysdeps/aarch64/multiarch/memcpy.c
+++ b/sysdeps/aarch64/multiarch/memcpy.c
@@ -47,7 +47,7 @@ select_memcpy_ifunc (void)
     {
       if (IS_A64FX (midr))
 	return __memcpy_a64fx;
-      return __memcpy_sve;
+      return prefer_sve_ifuncs ? __memcpy_sve : __memcpy_generic;
     }
 
   if (IS_THUNDERX (midr))
diff --git a/sysdeps/aarch64/multiarch/memmove.c b/sysdeps/aarch64/multiarch/memmove.c
index 2081eeb4d40e0240e67a7b26b64576f44eaf18e3..fe95037be391896c7670ef606bf4d3ba7dfb6a00 100644
--- a/sysdeps/aarch64/multiarch/memmove.c
+++ b/sysdeps/aarch64/multiarch/memmove.c
@@ -47,7 +47,7 @@ select_memmove_ifunc (void)
     {
       if (IS_A64FX (midr))
 	return __memmove_a64fx;
-      return __memmove_sve;
+      return prefer_sve_ifuncs ? __memmove_sve : __memmove_generic;
     }
 
   if (IS_THUNDERX (midr))
diff --git a/sysdeps/unix/sysv/linux/aarch64/cpu-features.c b/sysdeps/unix/sysv/linux/aarch64/cpu-features.c
index b1a3f673f067280bdacfddd92723a81e418023e5..c0b047bc0dbeae428c89e12688b7d802e4cb3a43 100644
--- a/sysdeps/unix/sysv/linux/aarch64/cpu-features.c
+++ b/sysdeps/unix/sysv/linux/aarch64/cpu-features.c
@@ -21,6 +21,7 @@
 #include <sys/auxv.h>
 #include <elf/dl-hwcaps.h>
 #include <sys/prctl.h>
+#include <sys/utsname.h>
 #include <dl-tunables-parse.h>
 
 #define DCZID_DZP_MASK (1 << 4)
@@ -62,6 +63,46 @@ get_midr_from_mcpu (const struct tunable_str_t *mcpu)
   return UINT64_MAX;
 }
 
+#if __LINUX_KERNEL_VERSION < 0x060200
+
+/* Return true if we prefer using SVE in string ifuncs.  Old kernels disable
+   SVE after every system call which results in unnecessary traps if memcpy
+   uses SVE.  This is true for kernels between 4.15.0 and before 6.2.0, except
+   for 5.14.0 which was patched.  For these versions return false to avoid using
+   SVE ifuncs.
+   Parse the kernel version into a 24-bit kernel.major.minor value without
+   calling any library functions.  If uname() is not supported or if the version
+   format is not recognized, assume the kernel is modern and return true.  */
+
+static inline bool
+prefer_sve_ifuncs (void)
+{
+  struct utsname buf;
+  const char *p = &buf.release[0];
+  int kernel = 0;
+  int val;
+
+  if (__uname (&buf) < 0)
+    return true;
+
+  for (int shift = 16; shift >= 0; shift -= 8)
+    {
+      for (val = 0; *p >= '0' && *p <= '9'; p++)
+	val = val * 10 + *p - '0';
+      kernel |= (val & 255) << shift;
+      if (*p++ != '.')
+	break;
+    }
+
+  if (kernel >= 0x060200 || kernel == 0x050e00)
+    return true;
+  if (kernel >= 0x040f00)
+    return false;
+  return true;
+}
+
+#endif
+
 static inline void
 init_cpu_features (struct cpu_features *cpu_features)
 {
@@ -126,6 +167,13 @@ init_cpu_features (struct cpu_features *cpu_features)
   /* Check if SVE is supported.  */
   cpu_features->sve = GLRO (dl_hwcap) & HWCAP_SVE;
 
+  cpu_features->prefer_sve_ifuncs = cpu_features->sve;
+
+#if __LINUX_KERNEL_VERSION < 0x060200
+  if (cpu_features->sve)
+    cpu_features->prefer_sve_ifuncs = prefer_sve_ifuncs ();
+#endif
+
   /* Check if MOPS is supported.  */
   cpu_features->mops = GLRO (dl_hwcap2) & HWCAP2_MOPS;
 }


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2] AArch64: Check kernel version for SVE ifuncs
  2024-03-18 14:14 [PATCH v2] AArch64: Check kernel version for SVE ifuncs Wilco Dijkstra
@ 2024-03-20 15:05 ` Szabolcs Nagy
  2024-03-20 15:39   ` Florian Weimer
  2024-10-07 18:14 ` Aurelien Jarno
  1 sibling, 1 reply; 9+ messages in thread
From: Szabolcs Nagy @ 2024-03-20 15:05 UTC (permalink / raw)
  To: Wilco Dijkstra, 'GNU C Library'
  Cc: Florian Weimer, Adhemerval Zanella

The 03/18/2024 14:14, Wilco Dijkstra wrote:
> 
> v2: Add __LINUX_KERNEL_VERSION #ifdefs, improve parser, use 8:8:8 format.
> 
> Old Linux kernels disable SVE after every system call.  Calling the
> SVE-optimized memcpy afterwards will then cause a trap to reenable SVE.
> As a result, applications with a high use of syscalls may run slower with
> the SVE memcpy.  This is true for kernels between 4.15.0 and before 6.2.0,
> except for 5.14.0 which was patched.  Avoid this by checking the kernel
> version and selecting the SVE ifunc on modern kernels.
> 
> Parse the kernel version reported by uname() into a 24-bit kernel.major.minor
> value without calling any library functions.  If uname() is not supported or
> if the version format is not recognized, assume the kernel is modern.
> 
> Passes regress, OK for commit?

OK to commit. (clearly a hack but what can we do..)

Reviewed-by: Szabolcs Nagy <szabolcs.nagy@arm.com>



> 
> ---
> 
> diff --git a/sysdeps/aarch64/cpu-features.h b/sysdeps/aarch64/cpu-features.h
> index 77a782422af1b6e4b2af32bfebfda37874111510..5f2da91ebbd0adafb0d84ec503b0f902f566da5a 100644
> --- a/sysdeps/aarch64/cpu-features.h
> +++ b/sysdeps/aarch64/cpu-features.h
> @@ -71,6 +71,7 @@ struct cpu_features
>    /* Currently, the GLIBC memory tagging tunable only defines 8 bits.  */
>    uint8_t mte_state;
>    bool sve;
> +  bool prefer_sve_ifuncs;
>    bool mops;
>  };
> 
> diff --git a/sysdeps/aarch64/multiarch/init-arch.h b/sysdeps/aarch64/multiarch/init-arch.h
> index c52860efb22d70eb4bdf356781f51c7de8ec67dc..61dc40088f4d9e5e06b57bdc7d54bde1e2a686a4 100644
> --- a/sysdeps/aarch64/multiarch/init-arch.h
> +++ b/sysdeps/aarch64/multiarch/init-arch.h
> @@ -36,5 +36,7 @@
>      MTE_ENABLED ();                                                          \
>    bool __attribute__((unused)) sve =                                         \
>      GLRO(dl_aarch64_cpu_features).sve;                                       \
> +  bool __attribute__((unused)) prefer_sve_ifuncs =                           \
> +    GLRO(dl_aarch64_cpu_features).prefer_sve_ifuncs;                         \
>    bool __attribute__((unused)) mops =                                        \
>      GLRO(dl_aarch64_cpu_features).mops;
> diff --git a/sysdeps/aarch64/multiarch/memcpy.c b/sysdeps/aarch64/multiarch/memcpy.c
> index d12eccfca51f4bcfef6ccf5aa286edb301e361ac..ce53567dab33c2f00b89b4069235abd4651811a6 100644
> --- a/sysdeps/aarch64/multiarch/memcpy.c
> +++ b/sysdeps/aarch64/multiarch/memcpy.c
> @@ -47,7 +47,7 @@ select_memcpy_ifunc (void)
>      {
>        if (IS_A64FX (midr))
>         return __memcpy_a64fx;
> -      return __memcpy_sve;
> +      return prefer_sve_ifuncs ? __memcpy_sve : __memcpy_generic;
>      }
> 
>    if (IS_THUNDERX (midr))
> diff --git a/sysdeps/aarch64/multiarch/memmove.c b/sysdeps/aarch64/multiarch/memmove.c
> index 2081eeb4d40e0240e67a7b26b64576f44eaf18e3..fe95037be391896c7670ef606bf4d3ba7dfb6a00 100644
> --- a/sysdeps/aarch64/multiarch/memmove.c
> +++ b/sysdeps/aarch64/multiarch/memmove.c
> @@ -47,7 +47,7 @@ select_memmove_ifunc (void)
>      {
>        if (IS_A64FX (midr))
>         return __memmove_a64fx;
> -      return __memmove_sve;
> +      return prefer_sve_ifuncs ? __memmove_sve : __memmove_generic;
>      }
> 
>    if (IS_THUNDERX (midr))
> diff --git a/sysdeps/unix/sysv/linux/aarch64/cpu-features.c b/sysdeps/unix/sysv/linux/aarch64/cpu-features.c
> index b1a3f673f067280bdacfddd92723a81e418023e5..c0b047bc0dbeae428c89e12688b7d802e4cb3a43 100644
> --- a/sysdeps/unix/sysv/linux/aarch64/cpu-features.c
> +++ b/sysdeps/unix/sysv/linux/aarch64/cpu-features.c
> @@ -21,6 +21,7 @@
>  #include <sys/auxv.h>
>  #include <elf/dl-hwcaps.h>
>  #include <sys/prctl.h>
> +#include <sys/utsname.h>
>  #include <dl-tunables-parse.h>
> 
>  #define DCZID_DZP_MASK (1 << 4)
> @@ -62,6 +63,46 @@ get_midr_from_mcpu (const struct tunable_str_t *mcpu)
>    return UINT64_MAX;
>  }
> 
> +#if __LINUX_KERNEL_VERSION < 0x060200
> +
> +/* Return true if we prefer using SVE in string ifuncs.  Old kernels disable
> +   SVE after every system call which results in unnecessary traps if memcpy
> +   uses SVE.  This is true for kernels between 4.15.0 and before 6.2.0, except
> +   for 5.14.0 which was patched.  For these versions return false to avoid using
> +   SVE ifuncs.
> +   Parse the kernel version into a 24-bit kernel.major.minor value without
> +   calling any library functions.  If uname() is not supported or if the version
> +   format is not recognized, assume the kernel is modern and return true.  */
> +
> +static inline bool
> +prefer_sve_ifuncs (void)
> +{
> +  struct utsname buf;
> +  const char *p = &buf.release[0];
> +  int kernel = 0;
> +  int val;
> +
> +  if (__uname (&buf) < 0)
> +    return true;
> +
> +  for (int shift = 16; shift >= 0; shift -= 8)
> +    {
> +      for (val = 0; *p >= '0' && *p <= '9'; p++)
> +       val = val * 10 + *p - '0';
> +      kernel |= (val & 255) << shift;
> +      if (*p++ != '.')
> +       break;
> +    }
> +
> +  if (kernel >= 0x060200 || kernel == 0x050e00)
> +    return true;
> +  if (kernel >= 0x040f00)
> +    return false;
> +  return true;
> +}
> +
> +#endif
> +
>  static inline void
>  init_cpu_features (struct cpu_features *cpu_features)
>  {
> @@ -126,6 +167,13 @@ init_cpu_features (struct cpu_features *cpu_features)
>    /* Check if SVE is supported.  */
>    cpu_features->sve = GLRO (dl_hwcap) & HWCAP_SVE;
> 
> +  cpu_features->prefer_sve_ifuncs = cpu_features->sve;
> +
> +#if __LINUX_KERNEL_VERSION < 0x060200
> +  if (cpu_features->sve)
> +    cpu_features->prefer_sve_ifuncs = prefer_sve_ifuncs ();
> +#endif
> +
>    /* Check if MOPS is supported.  */
>    cpu_features->mops = GLRO (dl_hwcap2) & HWCAP2_MOPS;
>  }
> 

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2] AArch64: Check kernel version for SVE ifuncs
  2024-03-20 15:05 ` Szabolcs Nagy
@ 2024-03-20 15:39   ` Florian Weimer
  2024-03-20 16:13     ` Szabolcs Nagy
  0 siblings, 1 reply; 9+ messages in thread
From: Florian Weimer @ 2024-03-20 15:39 UTC (permalink / raw)
  To: Szabolcs Nagy; +Cc: Wilco Dijkstra, 'GNU C Library', Adhemerval Zanella

* Szabolcs Nagy:

> The 03/18/2024 14:14, Wilco Dijkstra wrote:
>> 
>> v2: Add __LINUX_KERNEL_VERSION #ifdefs, improve parser, use 8:8:8 format.
>> 
>> Old Linux kernels disable SVE after every system call.  Calling the
>> SVE-optimized memcpy afterwards will then cause a trap to reenable SVE.
>> As a result, applications with a high use of syscalls may run slower with
>> the SVE memcpy.  This is true for kernels between 4.15.0 and before 6.2.0,
>> except for 5.14.0 which was patched.  Avoid this by checking the kernel
>> version and selecting the SVE ifunc on modern kernels.
>> 
>> Parse the kernel version reported by uname() into a 24-bit kernel.major.minor
>> value without calling any library functions.  If uname() is not supported or
>> if the version format is not recognized, assume the kernel is modern.
>> 
>> Passes regress, OK for commit?
>
> OK to commit. (clearly a hack but what can we do..)
>
> Reviewed-by: Szabolcs Nagy <szabolcs.nagy@arm.com>

I have not had a chance to test this yet with the el9 kernel.

I will try to do this tomorrow.

Do we need to include other distribution LTS kernels in the version
check?

Thanks,
Florian


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2] AArch64: Check kernel version for SVE ifuncs
  2024-03-20 15:39   ` Florian Weimer
@ 2024-03-20 16:13     ` Szabolcs Nagy
  2024-03-21 11:44       ` Florian Weimer
  0 siblings, 1 reply; 9+ messages in thread
From: Szabolcs Nagy @ 2024-03-20 16:13 UTC (permalink / raw)
  To: Florian Weimer
  Cc: Wilco Dijkstra, 'GNU C Library', Adhemerval Zanella

The 03/20/2024 16:39, Florian Weimer wrote:
> * Szabolcs Nagy:
> 
> > The 03/18/2024 14:14, Wilco Dijkstra wrote:
> >> 
> >> v2: Add __LINUX_KERNEL_VERSION #ifdefs, improve parser, use 8:8:8 format.
> >> 
> >> Old Linux kernels disable SVE after every system call.  Calling the
> >> SVE-optimized memcpy afterwards will then cause a trap to reenable SVE.
> >> As a result, applications with a high use of syscalls may run slower with
> >> the SVE memcpy.  This is true for kernels between 4.15.0 and before 6.2.0,
> >> except for 5.14.0 which was patched.  Avoid this by checking the kernel
> >> version and selecting the SVE ifunc on modern kernels.
> >> 
> >> Parse the kernel version reported by uname() into a 24-bit kernel.major.minor
> >> value without calling any library functions.  If uname() is not supported or
> >> if the version format is not recognized, assume the kernel is modern.
> >> 
> >> Passes regress, OK for commit?
> >
> > OK to commit. (clearly a hack but what can we do..)
> >
> > Reviewed-by: Szabolcs Nagy <szabolcs.nagy@arm.com>
> 
> I have not had a chance to test this yet with the el9 kernel.
> 
> I will try to do this tomorrow.
> 
> Do we need to include other distribution LTS kernels in the version
> check?

i checked some (suse,ubuntu,debian) and they didn't have the
SVE backport (maybe we should request those backports, but
today the version check looks ok)


> 
> Thanks,
> Florian
> 

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2] AArch64: Check kernel version for SVE ifuncs
  2024-03-20 16:13     ` Szabolcs Nagy
@ 2024-03-21 11:44       ` Florian Weimer
  2024-03-21 15:43         ` Adhemerval Zanella Netto
  0 siblings, 1 reply; 9+ messages in thread
From: Florian Weimer @ 2024-03-21 11:44 UTC (permalink / raw)
  To: Szabolcs Nagy; +Cc: Wilco Dijkstra, 'GNU C Library', Adhemerval Zanella

* Szabolcs Nagy:

> The 03/20/2024 16:39, Florian Weimer wrote:
>> * Szabolcs Nagy:
>> 
>> > The 03/18/2024 14:14, Wilco Dijkstra wrote:
>> >> 
>> >> v2: Add __LINUX_KERNEL_VERSION #ifdefs, improve parser, use 8:8:8 format.
>> >> 
>> >> Old Linux kernels disable SVE after every system call.  Calling the
>> >> SVE-optimized memcpy afterwards will then cause a trap to reenable SVE.
>> >> As a result, applications with a high use of syscalls may run slower with
>> >> the SVE memcpy.  This is true for kernels between 4.15.0 and before 6.2.0,
>> >> except for 5.14.0 which was patched.  Avoid this by checking the kernel
>> >> version and selecting the SVE ifunc on modern kernels.
>> >> 
>> >> Parse the kernel version reported by uname() into a 24-bit kernel.major.minor
>> >> value without calling any library functions.  If uname() is not supported or
>> >> if the version format is not recognized, assume the kernel is modern.
>> >> 
>> >> Passes regress, OK for commit?
>> >
>> > OK to commit. (clearly a hack but what can we do..)
>> >
>> > Reviewed-by: Szabolcs Nagy <szabolcs.nagy@arm.com>
>> 
>> I have not had a chance to test this yet with the el9 kernel.
>> 
>> I will try to do this tomorrow.
>> 
>> Do we need to include other distribution LTS kernels in the version
>> check?
>
> i checked some (suse,ubuntu,debian) and they didn't have the
> SVE backport (maybe we should request those backports, but
> today the version check looks ok)

I think we should make this change in one commit if at all possible,
otherwise we risk creating too much divergence.  But if you consider
further kernel backports unlikely, the v2 patch is okay.

Anyway, I verified this against 4.18 and 5.14 kernels, and got the
expected results (__memcpy_generic and __memcpy_sve).

Tested-by: Florian Weimer <fweimer@redhat.com>

Thanks,
Florian


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2] AArch64: Check kernel version for SVE ifuncs
  2024-03-21 11:44       ` Florian Weimer
@ 2024-03-21 15:43         ` Adhemerval Zanella Netto
  2024-03-21 16:11           ` Florian Weimer
  0 siblings, 1 reply; 9+ messages in thread
From: Adhemerval Zanella Netto @ 2024-03-21 15:43 UTC (permalink / raw)
  To: Florian Weimer, Szabolcs Nagy; +Cc: Wilco Dijkstra, 'GNU C Library'



On 21/03/24 08:44, Florian Weimer wrote:
> * Szabolcs Nagy:
> 
>> The 03/20/2024 16:39, Florian Weimer wrote:
>>> * Szabolcs Nagy:
>>>
>>>> The 03/18/2024 14:14, Wilco Dijkstra wrote:
>>>>>
>>>>> v2: Add __LINUX_KERNEL_VERSION #ifdefs, improve parser, use 8:8:8 format.
>>>>>
>>>>> Old Linux kernels disable SVE after every system call.  Calling the
>>>>> SVE-optimized memcpy afterwards will then cause a trap to reenable SVE.
>>>>> As a result, applications with a high use of syscalls may run slower with
>>>>> the SVE memcpy.  This is true for kernels between 4.15.0 and before 6.2.0,
>>>>> except for 5.14.0 which was patched.  Avoid this by checking the kernel
>>>>> version and selecting the SVE ifunc on modern kernels.
>>>>>
>>>>> Parse the kernel version reported by uname() into a 24-bit kernel.major.minor
>>>>> value without calling any library functions.  If uname() is not supported or
>>>>> if the version format is not recognized, assume the kernel is modern.
>>>>>
>>>>> Passes regress, OK for commit?
>>>>
>>>> OK to commit. (clearly a hack but what can we do..)
>>>>
>>>> Reviewed-by: Szabolcs Nagy <szabolcs.nagy@arm.com>
>>>
>>> I have not had a chance to test this yet with the el9 kernel.
>>>
>>> I will try to do this tomorrow.
>>>
>>> Do we need to include other distribution LTS kernels in the version
>>> check?
>>
>> i checked some (suse,ubuntu,debian) and they didn't have the
>> SVE backport (maybe we should request those backports, but
>> today the version check looks ok)
> 
> I think we should make this change in one commit if at all possible,
> otherwise we risk creating too much divergence.  But if you consider
> further kernel backports unlikely, the v2 patch is okay.
> 
> Anyway, I verified this against 4.18 and 5.14 kernels, and got the
> expected results (__memcpy_generic and __memcpy_sve).

If you have check through the string tests, you might not get the 
correct answer since it uses the ifunc-impl-list.c and this patch
does not changes the reported functions.

> 
> Tested-by: Florian Weimer <fweimer@redhat.com>
> 
> Thanks,
> Florian
> 

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2] AArch64: Check kernel version for SVE ifuncs
  2024-03-21 15:43         ` Adhemerval Zanella Netto
@ 2024-03-21 16:11           ` Florian Weimer
  0 siblings, 0 replies; 9+ messages in thread
From: Florian Weimer @ 2024-03-21 16:11 UTC (permalink / raw)
  To: Adhemerval Zanella Netto
  Cc: Szabolcs Nagy, Wilco Dijkstra, 'GNU C Library'

* Adhemerval Zanella Netto:

>> Anyway, I verified this against 4.18 and 5.14 kernels, and got the
>> expected results (__memcpy_generic and __memcpy_sve).
>
> If you have check through the string tests, you might not get the 
> correct answer since it uses the ifunc-impl-list.c and this patch
> does not changes the reported functions.

No, I used GDB to resolve the addresses returned by dlsym back to a
symbol.

Thanks,
Florian


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2] AArch64: Check kernel version for SVE ifuncs
  2024-03-18 14:14 [PATCH v2] AArch64: Check kernel version for SVE ifuncs Wilco Dijkstra
  2024-03-20 15:05 ` Szabolcs Nagy
@ 2024-10-07 18:14 ` Aurelien Jarno
  2024-10-07 19:33   ` Florian Weimer
  1 sibling, 1 reply; 9+ messages in thread
From: Aurelien Jarno @ 2024-10-07 18:14 UTC (permalink / raw)
  To: Wilco Dijkstra
  Cc: 'GNU C Library',
	Florian Weimer, Adhemerval Zanella, Szabolcs Nagy, libc-stable

Hi

On 2024-03-18 14:14, Wilco Dijkstra wrote:
> 
> v2: Add __LINUX_KERNEL_VERSION #ifdefs, improve parser, use 8:8:8 format.
> 
> Old Linux kernels disable SVE after every system call.  Calling the
> SVE-optimized memcpy afterwards will then cause a trap to reenable SVE.
> As a result, applications with a high use of syscalls may run slower with
> the SVE memcpy.  This is true for kernels between 4.15.0 and before 6.2.0,
> except for 5.14.0 which was patched.  Avoid this by checking the kernel
> version and selecting the SVE ifunc on modern kernels.
> 
> Parse the kernel version reported by uname() into a 24-bit kernel.major.minor
> value without calling any library functions.  If uname() is not supported or
> if the version format is not recognized, assume the kernel is modern.
> 
> Passes regress, OK for commit?
> 

This commit breaks getpw* functions when using "compat" in nsswitch.conf
and static binaries. This has been reported in Debian as bug #1083095
and involving bash. Here is a simple reproducer:

| #include <pwd.h>
| #include <sys/types.h>
| #include <unistd.h>
|
| int main()
| {
|     return getpwuid(getuid()) != NULL;
| }

When compiled statically against a libc without this patch and executed
against a libc with this patch, this leads to a segmentation fault with
the following backtrace (note that libnss_nis.so.2 is not available on
the system):

| #0  0x0000000000000000 in ?? ()
| #1  0x0000fffff7dda718 in _dl_open (file=0xfffff7cd0650 "libnss_nis.so.2", mode=-2147483646, caller_dlopen=0xfffff7f32748 <module_load+152>, nsid=-2, argc=1, argv=0xfffffffff3d8, env=0xfffffffff3e8) at ./elf/dl-open.c:830
| #2  0x0000fffff7f4eca0 in do_dlopen (ptr=ptr@entry=0xffffffffeca8) at ./elf/dl-libc.c:95
| #3  0x0000fffff7f4e8bc in __GI__dl_catch_exception (exception=exception@entry=0xffffffffec30, operate=0xfffff7f4ec54 <do_dlopen>, args=0xffffffffeca8) at ./elf/dl-error-skeleton.c:208
| #4  0x0000fffff7f4e980 in __GI__dl_catch_error (objname=0xffffffffec78, errstring=0xffffffffec80, mallocedp=0xffffffffec77, operate=<optimized out>, args=<optimized out>) at ./elf/dl-error-skeleton.c:227
| #5  0x0000fffff7f4ebf8 in dlerror_run (operate=operate@entry=0xfffff7f4ec54 <do_dlopen>, args=args@entry=0xffffffffeca8) at ./elf/dl-libc.c:45
| #6  0x0000fffff7f4edf4 in __libc_dlopen_mode (name=<optimized out>, mode=<optimized out>) at ./elf/dl-libc.c:162
| #7  0x0000fffff7f32748 in module_load (module=0xfffff7cd1fd0) at ./nss/nss_module.c:191
| #8  0x0000fffff7f32c58 in __nss_module_load (module=0xfffff7cd1fd0) at ./nss/nss_module.c:310
| #9  __nss_module_get_function (module=0xfffff7cd1fd0, name=0xfffff7fd6780 "setpwent") at ./nss/nss_module.c:336
| #10 0x0000fffff7fd2834 in init_nss_interface () at nss_compat/compat-pwd.c:95
| #11 init_nss_interface () at nss_compat/compat-pwd.c:91
| #12 0x0000fffff7fd40b0 in _nss_compat_getpwuid_r (uid=1000, pwd=0x4b6e58 <resbuf>, buffer=0x4b94d0 "", buflen=1024, errnop=0x4b8770) at nss_compat/compat-pwd.c:1063
| #13 0x000000000040a2a4 in getpwuid_r ()
| #14 0x000000000040a098 in getpwuid ()
| #15 0x00000000004006e4 in main ()

I am fully aware of the "warning: Using 'dlopen' in statically linked
applications requires at runtime the shared libraries from the glibc
version used for linking" message displayed when linking statically,
that said if we can't avoid the breakage, we should probably not
backport such changes in the stable branches.


Regards
Aurelien

-- 
Aurelien Jarno                          GPG: 4096R/1DDD8C9B
aurelien@aurel32.net                     http://aurel32.net

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2] AArch64: Check kernel version for SVE ifuncs
  2024-10-07 18:14 ` Aurelien Jarno
@ 2024-10-07 19:33   ` Florian Weimer
  0 siblings, 0 replies; 9+ messages in thread
From: Florian Weimer @ 2024-10-07 19:33 UTC (permalink / raw)
  To: Wilco Dijkstra
  Cc: 'GNU C Library', Adhemerval Zanella, Szabolcs Nagy, libc-stable

* Aurelien Jarno:

> Hi
>
> On 2024-03-18 14:14, Wilco Dijkstra wrote:
>> 
>> v2: Add __LINUX_KERNEL_VERSION #ifdefs, improve parser, use 8:8:8 format.
>> 
>> Old Linux kernels disable SVE after every system call.  Calling the
>> SVE-optimized memcpy afterwards will then cause a trap to reenable SVE.
>> As a result, applications with a high use of syscalls may run slower with
>> the SVE memcpy.  This is true for kernels between 4.15.0 and before 6.2.0,
>> except for 5.14.0 which was patched.  Avoid this by checking the kernel
>> version and selecting the SVE ifunc on modern kernels.
>> 
>> Parse the kernel version reported by uname() into a 24-bit kernel.major.minor
>> value without calling any library functions.  If uname() is not supported or
>> if the version format is not recognized, assume the kernel is modern.
>> 
>> Passes regress, OK for commit?
>> 
>
> This commit breaks getpw* functions when using "compat" in nsswitch.conf
> and static binaries. This has been reported in Debian as bug #1083095
> and involving bash. Here is a simple reproducer:
>
> | #include <pwd.h>
> | #include <sys/types.h>
> | #include <unistd.h>
> |
> | int main()
> | {
> |     return getpwuid(getuid()) != NULL;
> | }
>
> When compiled statically against a libc without this patch and executed
> against a libc with this patch, this leads to a segmentation fault with
> the following backtrace (note that libnss_nis.so.2 is not available on
> the system):
>
> | #0  0x0000000000000000 in ?? ()
> | #1  0x0000fffff7dda718 in _dl_open (file=0xfffff7cd0650 "libnss_nis.so.2", mode=-2147483646, caller_dlopen=0xfffff7f32748 <module_load+152>, nsid=-2, argc=1, argv=0xfffffffff3d8, env=0xfffffffff3e8) at ./elf/dl-open.c:830
> | #2  0x0000fffff7f4eca0 in do_dlopen (ptr=ptr@entry=0xffffffffeca8) at ./elf/dl-libc.c:95
> | #3  0x0000fffff7f4e8bc in __GI__dl_catch_exception (exception=exception@entry=0xffffffffec30, operate=0xfffff7f4ec54 <do_dlopen>, args=0xffffffffeca8) at ./elf/dl-error-skeleton.c:208
> | #4  0x0000fffff7f4e980 in __GI__dl_catch_error (objname=0xffffffffec78, errstring=0xffffffffec80, mallocedp=0xffffffffec77, operate=<optimized out>, args=<optimized out>) at ./elf/dl-error-skeleton.c:227
> | #5  0x0000fffff7f4ebf8 in dlerror_run (operate=operate@entry=0xfffff7f4ec54 <do_dlopen>, args=args@entry=0xffffffffeca8) at ./elf/dl-libc.c:45
> | #6  0x0000fffff7f4edf4 in __libc_dlopen_mode (name=<optimized out>, mode=<optimized out>) at ./elf/dl-libc.c:162
> | #7  0x0000fffff7f32748 in module_load (module=0xfffff7cd1fd0) at ./nss/nss_module.c:191
> | #8  0x0000fffff7f32c58 in __nss_module_load (module=0xfffff7cd1fd0) at ./nss/nss_module.c:310
> | #9  __nss_module_get_function (module=0xfffff7cd1fd0, name=0xfffff7fd6780 "setpwent") at ./nss/nss_module.c:336
> | #10 0x0000fffff7fd2834 in init_nss_interface () at nss_compat/compat-pwd.c:95
> | #11 init_nss_interface () at nss_compat/compat-pwd.c:91
> | #12 0x0000fffff7fd40b0 in _nss_compat_getpwuid_r (uid=1000, pwd=0x4b6e58 <resbuf>, buffer=0x4b94d0 "", buflen=1024, errnop=0x4b8770) at nss_compat/compat-pwd.c:1063
> | #13 0x000000000040a2a4 in getpwuid_r ()
> | #14 0x000000000040a098 in getpwuid ()
> | #15 0x00000000004006e4 in main ()
>
> I am fully aware of the "warning: Using 'dlopen' in statically linked
> applications requires at runtime the shared libraries from the glibc
> version used for linking" message displayed when linking statically,
> that said if we can't avoid the breakage, we should probably not
> backport such changes in the stable branches.

This part:

diff --git a/sysdeps/aarch64/cpu-features.h b/sysdeps/aarch64/cpu-features.h
index 77a782422a..5f2da91ebb 100644
--- a/sysdeps/aarch64/cpu-features.h
+++ b/sysdeps/aarch64/cpu-features.h
@@ -71,6 +71,7 @@ struct cpu_features
   /* Currently, the GLIBC memory tagging tunable only defines 8 bits.  */
   uint8_t mte_state;
   bool sve;
+  bool prefer_sve_ifuncs;
   bool mops;
 };

changes the internal GLIBC_PRIVATE ABI.  As far as I can see, the GLRO
struct size increases, and some fields move their addresses.  The bug
report does not show GLRO, only GL.  I suspect that
GLRO(dl_dlfcn_hook) is now zero (taking the place that was formerly
occupied by GLRO(dl_audit).  That's why the loaded libc.so starts
using the loaded ld.so code instead of the statically linked loader.
The loaded ld.so is expected to be dormant, only very partially
initialized, and the statically linked dynamic linker should be used
instead (and that's what GLRO(dl_dlfcn_hook) is for).

It's many many months on the branch, so I'm not sure if we should work
around it at this point.  We can't realistically remain compatible
with both variants at the same time.  I suggest to rebuild bash-static.

We want to provide a way to do static linking while restricting the
use of foreign NSS modules, which would make static linking more
useful.  In the meantime, your statically linked bash could try to use
__nss_configure_lookup to switch most databases to "files", and the
hosts database to "files dns".

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2024-10-07 19:33 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-03-18 14:14 [PATCH v2] AArch64: Check kernel version for SVE ifuncs Wilco Dijkstra
2024-03-20 15:05 ` Szabolcs Nagy
2024-03-20 15:39   ` Florian Weimer
2024-03-20 16:13     ` Szabolcs Nagy
2024-03-21 11:44       ` Florian Weimer
2024-03-21 15:43         ` Adhemerval Zanella Netto
2024-03-21 16:11           ` Florian Weimer
2024-10-07 18:14 ` Aurelien Jarno
2024-10-07 19:33   ` Florian Weimer

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).