From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 7837) id E6B8A3858412; Mon, 7 Aug 2023 09:48:21 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org E6B8A3858412 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=sourceware.org; s=default; t=1691401701; bh=RuAilB3+5hntfW2vE4FQqflRtDCPX2SqXOM7wJpOIW0=; h=From:To:Subject:Date:From; b=LW75Zml0vQsP6rbLMQpxEEnKdklf8gQyJyOmC8qHWRlUwQ9Bo8rxuOLHDfQuXI4y/ 7BmbxrF42qgbgm85EUZQrkUJ14uc2raRdqEb39wAkkxeokrvvWefRXhZUocnqYJE0o bTlq5gWcCT6E6SC148DdGVJ0fISBKYTCEpFFYD6c= Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: Prem Mallappa To: glibc-cvs@sourceware.org Subject: [glibc] x86: Fix for cache computation on AMD legacy cpus. X-Act-Checkin: glibc X-Git-Author: Sajan Karumanchi X-Git-Refname: refs/heads/master X-Git-Oldrev: 53850f044f65dd11efdf67a2ab214d312295d85c X-Git-Newrev: dcad5c8578130dec7f35fd5b0885304b59f9f543 Message-Id: <20230807094821.E6B8A3858412@sourceware.org> Date: Mon, 7 Aug 2023 09:48:21 +0000 (GMT) List-Id: https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=dcad5c8578130dec7f35fd5b0885304b59f9f543 commit dcad5c8578130dec7f35fd5b0885304b59f9f543 Author: Sajan Karumanchi Date: Tue Aug 1 15:20:55 2023 +0000 x86: Fix for cache computation on AMD legacy cpus. Some legacy AMD CPUs and hypervisors have the _cpuid_ '0x8000_001D' set to Zero, thus resulting in zeroed-out computed cache values. This patch reintroduces the old way of cache computation as a fail-safe option to handle these exceptions. Fixed 'level4_cache_size' value through handle_amd(). Reviewed-by: Premachandra Mallappa Tested-by: Florian Weimer Diff: --- sysdeps/x86/dl-cacheinfo.h | 226 +++++++++++++++++++++++++++++++++++++++------ 1 file changed, 199 insertions(+), 27 deletions(-) diff --git a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h index cd4d0351ae..285773039f 100644 --- a/sysdeps/x86/dl-cacheinfo.h +++ b/sysdeps/x86/dl-cacheinfo.h @@ -315,40 +315,206 @@ handle_amd (int name) { unsigned int eax; unsigned int ebx; - unsigned int ecx; + unsigned int ecx = 0; unsigned int edx; - unsigned int count = 0x1; + unsigned int max_cpuid = 0; + unsigned int fn = 0; /* No level 4 cache (yet). */ if (name > _SC_LEVEL3_CACHE_LINESIZE) return 0; - if (name >= _SC_LEVEL3_CACHE_SIZE) - count = 0x3; - else if (name >= _SC_LEVEL2_CACHE_SIZE) - count = 0x2; - else if (name >= _SC_LEVEL1_DCACHE_SIZE) - count = 0x0; + __cpuid (0x80000000, max_cpuid, ebx, ecx, edx); + + if (max_cpuid >= 0x8000001D) + /* Use __cpuid__ '0x8000_001D' to compute cache details. */ + { + unsigned int count = 0x1; + + if (name >= _SC_LEVEL3_CACHE_SIZE) + count = 0x3; + else if (name >= _SC_LEVEL2_CACHE_SIZE) + count = 0x2; + else if (name >= _SC_LEVEL1_DCACHE_SIZE) + count = 0x0; + + __cpuid_count (0x8000001D, count, eax, ebx, ecx, edx); + + if (ecx != 0) + { + switch (name) + { + case _SC_LEVEL1_ICACHE_ASSOC: + case _SC_LEVEL1_DCACHE_ASSOC: + case _SC_LEVEL2_CACHE_ASSOC: + case _SC_LEVEL3_CACHE_ASSOC: + return ((ebx >> 22) & 0x3ff) + 1; + case _SC_LEVEL1_ICACHE_LINESIZE: + case _SC_LEVEL1_DCACHE_LINESIZE: + case _SC_LEVEL2_CACHE_LINESIZE: + case _SC_LEVEL3_CACHE_LINESIZE: + return (ebx & 0xfff) + 1; + case _SC_LEVEL1_ICACHE_SIZE: + case _SC_LEVEL1_DCACHE_SIZE: + case _SC_LEVEL2_CACHE_SIZE: + case _SC_LEVEL3_CACHE_SIZE: + return (((ebx >> 22) & 0x3ff) + 1) * ((ebx & 0xfff) + 1) * (ecx + 1); + default: + __builtin_unreachable (); + } + return -1; + } + } + + /* Legacy cache computation for CPUs prior to Bulldozer family. + This is also a fail-safe mechanism for some hypervisors that + accidentally configure __cpuid__ '0x8000_001D' to Zero. */ - __cpuid_count (0x8000001D, count, eax, ebx, ecx, edx); + fn = 0x80000005 + (name >= _SC_LEVEL2_CACHE_SIZE); + + if (max_cpuid < fn) + return 0; + + __cpuid (fn, eax, ebx, ecx, edx); + + if (name < _SC_LEVEL1_DCACHE_SIZE) + { + name += _SC_LEVEL1_DCACHE_SIZE - _SC_LEVEL1_ICACHE_SIZE; + ecx = edx; + } switch (name) { - case _SC_LEVEL1_ICACHE_ASSOC: - case _SC_LEVEL1_DCACHE_ASSOC: - case _SC_LEVEL2_CACHE_ASSOC: + case _SC_LEVEL1_DCACHE_SIZE: + return (ecx >> 14) & 0x3fc00; + + case _SC_LEVEL1_DCACHE_ASSOC: + ecx >>= 16; + if ((ecx & 0xff) == 0xff) + { + /* Fully associative. */ + return (ecx << 2) & 0x3fc00; + } + return ecx & 0xff; + + case _SC_LEVEL1_DCACHE_LINESIZE: + return ecx & 0xff; + + case _SC_LEVEL2_CACHE_SIZE: + return (ecx & 0xf000) == 0 ? 0 : (ecx >> 6) & 0x3fffc00; + + case _SC_LEVEL2_CACHE_ASSOC: + switch ((ecx >> 12) & 0xf) + { + case 0: + case 1: + case 2: + case 4: + return (ecx >> 12) & 0xf; + case 6: + return 8; + case 8: + return 16; + case 10: + return 32; + case 11: + return 48; + case 12: + return 64; + case 13: + return 96; + case 14: + return 128; + case 15: + return ((ecx >> 6) & 0x3fffc00) / (ecx & 0xff); + default: + return 0; + } + + case _SC_LEVEL2_CACHE_LINESIZE: + return (ecx & 0xf000) == 0 ? 0 : ecx & 0xff; + + case _SC_LEVEL3_CACHE_SIZE: + { + long int total_l3_cache = 0, l3_cache_per_thread = 0; + unsigned int threads = 0; + const struct cpu_features *cpu_features; + + if ((edx & 0xf000) == 0) + return 0; + + total_l3_cache = (edx & 0x3ffc0000) << 1; + cpu_features = __get_cpu_features (); + + /* Figure out the number of logical threads that share L3. */ + if (max_cpuid >= 0x80000008) + { + /* Get width of APIC ID. */ + __cpuid (0x80000008, eax, ebx, ecx, edx); + threads = (ecx & 0xff) + 1; + } + + if (threads == 0) + { + /* If APIC ID width is not available, use logical + processor count. */ + __cpuid (0x00000001, eax, ebx, ecx, edx); + if ((edx & (1 << 28)) != 0) + threads = (ebx >> 16) & 0xff; + } + + /* Cap usage of highest cache level to the number of + supported threads. */ + if (threads > 0) + l3_cache_per_thread = total_l3_cache/threads; + + /* Get shared cache per ccx for Zen architectures. */ + if (cpu_features->basic.family >= 0x17) + { + long int l3_cache_per_ccx = 0; + /* Get number of threads share the L3 cache in CCX. */ + __cpuid_count (0x8000001D, 0x3, eax, ebx, ecx, edx); + unsigned int threads_per_ccx = ((eax >> 14) & 0xfff) + 1; + l3_cache_per_ccx = l3_cache_per_thread * threads_per_ccx; + return l3_cache_per_ccx; + } + else + { + return l3_cache_per_thread; + } + } + case _SC_LEVEL3_CACHE_ASSOC: - return ecx ? ((ebx >> 22) & 0x3ff) + 1 : 0; - case _SC_LEVEL1_ICACHE_LINESIZE: - case _SC_LEVEL1_DCACHE_LINESIZE: - case _SC_LEVEL2_CACHE_LINESIZE: + switch ((edx >> 12) & 0xf) + { + case 0: + case 1: + case 2: + case 4: + return (edx >> 12) & 0xf; + case 6: + return 8; + case 8: + return 16; + case 10: + return 32; + case 11: + return 48; + case 12: + return 64; + case 13: + return 96; + case 14: + return 128; + case 15: + return ((edx & 0x3ffc0000) << 1) / (edx & 0xff); + default: + return 0; + } + case _SC_LEVEL3_CACHE_LINESIZE: - return ecx ? (ebx & 0xfff) + 1 : 0; - case _SC_LEVEL1_ICACHE_SIZE: - case _SC_LEVEL1_DCACHE_SIZE: - case _SC_LEVEL2_CACHE_SIZE: - case _SC_LEVEL3_CACHE_SIZE: - return ecx ? (((ebx >> 22) & 0x3ff) + 1) * ((ebx & 0xfff) + 1) * (ecx + 1): 0; + return (edx & 0xf000) == 0 ? 0 : edx & 0xff; + default: __builtin_unreachable (); } @@ -703,7 +869,6 @@ dl_init_cacheinfo (struct cpu_features *cpu_features) data = handle_amd (_SC_LEVEL1_DCACHE_SIZE); core = handle_amd (_SC_LEVEL2_CACHE_SIZE); shared = handle_amd (_SC_LEVEL3_CACHE_SIZE); - shared_per_thread = shared; level1_icache_size = handle_amd (_SC_LEVEL1_ICACHE_SIZE); level1_icache_linesize = handle_amd (_SC_LEVEL1_ICACHE_LINESIZE); @@ -716,13 +881,20 @@ dl_init_cacheinfo (struct cpu_features *cpu_features) level3_cache_size = shared; level3_cache_assoc = handle_amd (_SC_LEVEL3_CACHE_ASSOC); level3_cache_linesize = handle_amd (_SC_LEVEL3_CACHE_LINESIZE); + level4_cache_size = handle_amd (_SC_LEVEL4_CACHE_SIZE); if (shared <= 0) - /* No shared L3 cache. All we have is the L2 cache. */ - shared = core; + { + /* No shared L3 cache. All we have is the L2 cache. */ + shared = core; + } + else if (cpu_features->basic.family < 0x17) + { + /* Account for exclusive L2 and L3 caches. */ + shared += core; + } - if (shared_per_thread <= 0) - shared_per_thread = shared; + shared_per_thread = shared; } cpu_features->level1_icache_size = level1_icache_size;