public inbox for libc-alpha@sourceware.org
 help / color / mirror / Atom feed
* [PATCH v2] x86: Check minimum/maximum of non_temporal_threshold [BZ #29953]
@ 2023-01-03 21:06 H.J. Lu
  2023-01-03 21:15 ` Noah Goldstein
  0 siblings, 1 reply; 2+ messages in thread
From: H.J. Lu @ 2023-01-03 21:06 UTC (permalink / raw)
  To: libc-alpha; +Cc: Noah Goldstein

The minimum non_temporal_threshold is 0x4040.  non_temporal_threshold may
be set to less than the minimum value when the shared cache size isn't
available (e.g., in an emulator) or by the tunable.  Add checks for
minimum and maximum of non_temporal_threshold.

This fixes BZ #29953.
---
 sysdeps/x86/dl-cacheinfo.h | 25 ++++++++++++++++---------
 1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h
index e9f3382108..637b5a022d 100644
--- a/sysdeps/x86/dl-cacheinfo.h
+++ b/sysdeps/x86/dl-cacheinfo.h
@@ -861,6 +861,18 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
      share of the cache, it has a substantial risk of negatively
      impacting the performance of other threads running on the chip. */
   unsigned long int non_temporal_threshold = shared * 3 / 4;
+  /* SIZE_MAX >> 4 because memmove-vec-unaligned-erms right-shifts the value of
+     'x86_non_temporal_threshold' by `LOG_4X_MEMCPY_THRESH` (4) and it is best
+     if that operation cannot overflow. Minimum of 0x4040 (16448) because the
+     L(large_memset_4x) loops need 64-byte to cache align and enough space for
+     at least 1 iteration of 4x PAGE_SIZE unrolled loop.  Both values are
+     reflected in the manual.  */
+  unsigned long int maximum_non_temporal_threshold = SIZE_MAX >> 4;
+  unsigned long int minimum_non_temporal_threshold = 0x4040;
+  if (non_temporal_threshold < minimum_non_temporal_threshold)
+    non_temporal_threshold = minimum_non_temporal_threshold;
+  else if (non_temporal_threshold > maximum_non_temporal_threshold)
+    non_temporal_threshold = maximum_non_temporal_threshold;
 
 #if HAVE_TUNABLES
   /* NB: The REP MOVSB threshold must be greater than VEC_SIZE * 8.  */
@@ -915,8 +927,8 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
     shared = tunable_size;
 
   tunable_size = TUNABLE_GET (x86_non_temporal_threshold, long int, NULL);
-  /* NB: Ignore the default value 0.  */
-  if (tunable_size != 0)
+  if (tunable_size > minimum_non_temporal_threshold
+      && tunable_size <= maximum_non_temporal_threshold)
     non_temporal_threshold = tunable_size;
 
   tunable_size = TUNABLE_GET (x86_rep_movsb_threshold, long int, NULL);
@@ -931,14 +943,9 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
 
   TUNABLE_SET_WITH_BOUNDS (x86_data_cache_size, data, 0, SIZE_MAX);
   TUNABLE_SET_WITH_BOUNDS (x86_shared_cache_size, shared, 0, SIZE_MAX);
-  /* SIZE_MAX >> 4 because memmove-vec-unaligned-erms right-shifts the value of
-     'x86_non_temporal_threshold' by `LOG_4X_MEMCPY_THRESH` (4) and it is best
-     if that operation cannot overflow. Minimum of 0x4040 (16448) because the
-     L(large_memset_4x) loops need 64-byte to cache align and enough space for
-     at least 1 iteration of 4x PAGE_SIZE unrolled loop.  Both values are
-     reflected in the manual.  */
   TUNABLE_SET_WITH_BOUNDS (x86_non_temporal_threshold, non_temporal_threshold,
-			   0x4040, SIZE_MAX >> 4);
+			   minimum_non_temporal_threshold,
+			   maximum_non_temporal_threshold);
   TUNABLE_SET_WITH_BOUNDS (x86_rep_movsb_threshold, rep_movsb_threshold,
 			   minimum_rep_movsb_threshold, SIZE_MAX);
   TUNABLE_SET_WITH_BOUNDS (x86_rep_stosb_threshold, rep_stosb_threshold, 1,
-- 
2.39.0


^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH v2] x86: Check minimum/maximum of non_temporal_threshold [BZ #29953]
  2023-01-03 21:06 [PATCH v2] x86: Check minimum/maximum of non_temporal_threshold [BZ #29953] H.J. Lu
@ 2023-01-03 21:15 ` Noah Goldstein
  0 siblings, 0 replies; 2+ messages in thread
From: Noah Goldstein @ 2023-01-03 21:15 UTC (permalink / raw)
  To: H.J. Lu; +Cc: libc-alpha

On Tue, Jan 3, 2023 at 1:06 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> The minimum non_temporal_threshold is 0x4040.  non_temporal_threshold may
> be set to less than the minimum value when the shared cache size isn't
> available (e.g., in an emulator) or by the tunable.  Add checks for
> minimum and maximum of non_temporal_threshold.
>
> This fixes BZ #29953.
> ---
>  sysdeps/x86/dl-cacheinfo.h | 25 ++++++++++++++++---------
>  1 file changed, 16 insertions(+), 9 deletions(-)
>
> diff --git a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h
> index e9f3382108..637b5a022d 100644
> --- a/sysdeps/x86/dl-cacheinfo.h
> +++ b/sysdeps/x86/dl-cacheinfo.h
> @@ -861,6 +861,18 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
>       share of the cache, it has a substantial risk of negatively
>       impacting the performance of other threads running on the chip. */
>    unsigned long int non_temporal_threshold = shared * 3 / 4;
> +  /* SIZE_MAX >> 4 because memmove-vec-unaligned-erms right-shifts the value of
> +     'x86_non_temporal_threshold' by `LOG_4X_MEMCPY_THRESH` (4) and it is best
> +     if that operation cannot overflow. Minimum of 0x4040 (16448) because the
> +     L(large_memset_4x) loops need 64-byte to cache align and enough space for
> +     at least 1 iteration of 4x PAGE_SIZE unrolled loop.  Both values are
> +     reflected in the manual.  */
> +  unsigned long int maximum_non_temporal_threshold = SIZE_MAX >> 4;
> +  unsigned long int minimum_non_temporal_threshold = 0x4040;
> +  if (non_temporal_threshold < minimum_non_temporal_threshold)
> +    non_temporal_threshold = minimum_non_temporal_threshold;
> +  else if (non_temporal_threshold > maximum_non_temporal_threshold)
> +    non_temporal_threshold = maximum_non_temporal_threshold;
>
>  #if HAVE_TUNABLES
>    /* NB: The REP MOVSB threshold must be greater than VEC_SIZE * 8.  */
> @@ -915,8 +927,8 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
>      shared = tunable_size;
>
>    tunable_size = TUNABLE_GET (x86_non_temporal_threshold, long int, NULL);
> -  /* NB: Ignore the default value 0.  */
> -  if (tunable_size != 0)
> +  if (tunable_size > minimum_non_temporal_threshold
> +      && tunable_size <= maximum_non_temporal_threshold)
>      non_temporal_threshold = tunable_size;
>
>    tunable_size = TUNABLE_GET (x86_rep_movsb_threshold, long int, NULL);
> @@ -931,14 +943,9 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
>
>    TUNABLE_SET_WITH_BOUNDS (x86_data_cache_size, data, 0, SIZE_MAX);
>    TUNABLE_SET_WITH_BOUNDS (x86_shared_cache_size, shared, 0, SIZE_MAX);
> -  /* SIZE_MAX >> 4 because memmove-vec-unaligned-erms right-shifts the value of
> -     'x86_non_temporal_threshold' by `LOG_4X_MEMCPY_THRESH` (4) and it is best
> -     if that operation cannot overflow. Minimum of 0x4040 (16448) because the
> -     L(large_memset_4x) loops need 64-byte to cache align and enough space for
> -     at least 1 iteration of 4x PAGE_SIZE unrolled loop.  Both values are
> -     reflected in the manual.  */
>    TUNABLE_SET_WITH_BOUNDS (x86_non_temporal_threshold, non_temporal_threshold,
> -                          0x4040, SIZE_MAX >> 4);
> +                          minimum_non_temporal_threshold,
> +                          maximum_non_temporal_threshold);
>    TUNABLE_SET_WITH_BOUNDS (x86_rep_movsb_threshold, rep_movsb_threshold,
>                            minimum_rep_movsb_threshold, SIZE_MAX);
>    TUNABLE_SET_WITH_BOUNDS (x86_rep_stosb_threshold, rep_stosb_threshold, 1,
> --
> 2.39.0
>

LGTM.

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2023-01-03 21:15 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-01-03 21:06 [PATCH v2] x86: Check minimum/maximum of non_temporal_threshold [BZ #29953] H.J. Lu
2023-01-03 21:15 ` Noah Goldstein

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).