From bfb716e07b77f0ed8e0c2689d5cd01e2c8251fc5 Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Fri, 12 May 2017 13:38:04 -0700 Subject: [PATCH] x86: Update __x86_shared_non_temporal_threshold __x86_shared_non_temporal_threshold was set to 6 times of per-core shared cache size, based on the large memcpy micro benchmark in glibc on a 8-core processor. For a processor with more than 8 cores, the threshold is too low. Set __x86_shared_non_temporal_threshold to the 3/4 of the total shared cache size so that it is unchanged on 8-core processors. On processors with less than 8 cores, the threshold is lower. * sysdeps/x86/cacheinfo.c (__x86_shared_non_temporal_threshold): Set to the 3/4 of the total shared cache size. --- sysdeps/x86/cacheinfo.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sysdeps/x86/cacheinfo.c b/sysdeps/x86/cacheinfo.c index 1ccbe41..3434d97 100644 --- a/sysdeps/x86/cacheinfo.c +++ b/sysdeps/x86/cacheinfo.c @@ -766,6 +766,8 @@ intel_bug_no_cache_info: /* The large memcpy micro benchmark in glibc shows that 6 times of shared cache size is the approximate value above which non-temporal - store becomes faster. */ - __x86_shared_non_temporal_threshold = __x86_shared_cache_size * 6; + store becomes faster on a 8-core processor. This is the 3/4 of the + total shared cache size. */ + __x86_shared_non_temporal_threshold + = __x86_shared_cache_size * threads * 3 / 4; } -- 2.9.4