From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-il1-x141.google.com (mail-il1-x141.google.com [IPv6:2607:f8b0:4864:20::141]) by sourceware.org (Postfix) with ESMTPS id 11176385C019 for ; Tue, 7 Apr 2020 12:44:27 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.3.2 sourceware.org 11176385C019 Received: by mail-il1-x141.google.com with SMTP id i75so2959449ild.13 for ; Tue, 07 Apr 2020 05:44:27 -0700 (PDT) X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20161025; h=x-gm-message-state:mime-version:references:in-reply-to:from:date :message-id:subject:to:cc; bh=vIDTBOzgxszVEwJPSPPposj440UtukB/4/MiKlKRJ60=; b=bxsblh1xIElaWmHfZnSyO3bCFcMqe5ElcOVeYQHlcIYFsB+M/VgYxn5wzys36izYgU 4YqMzZBYFB1pntcrEi2WQKytuKnCoUlpA/S+lUg/cJI4yhAddLHy+OHEn4nCSZHnemBl JRahEJN46DBi/2cjge2g3E/1YaaqaBsBIVi8RX8R0HbC8vhC65KAP89FbV+gAWeSb2yS FxbOUrwS79OwomISUfBRqJ9doPB9+XlCKlE04R1whAinKdkolaN4O+ZTK4QcyM1LP7BV XqNlSk9/2rkaXCoA3gybXmrVehrsjxa5fcBL6wBMiqmMLPgakmU4M9bjCUH1kbJ0JwDs xwaA== X-Gm-Message-State: AGi0Pua7bfil5l8B9BkXpCqx3tk1wLoXXzNTvyNo0VUHy3n3pTpjm1uV QUy32eYk7ES637Lkz7AbFO1t8ASZxiHaXn8E51E= X-Google-Smtp-Source: APiQypIJbLsClnl5s4UQhM3zS4qMlCjOVFXmzc3OcmUOTK9rL9IPYjkfFxywJSFjvxjHkz4Tkw8ZDFgX2csBzeJbiaU= X-Received: by 2002:a92:86da:: with SMTP id l87mr2245883ilh.292.1586263466501; Tue, 07 Apr 2020 05:44:26 -0700 (PDT) MIME-Version: 1.0 References: <1585546430-6167-1-git-send-email-MayShao@zhaoxin.com> <1585546430-6167-3-git-send-email-MayShao@zhaoxin.com> In-Reply-To: <1585546430-6167-3-git-send-email-MayShao@zhaoxin.com> From: "H.J. Lu" Date: Tue, 7 Apr 2020 05:43:50 -0700 Message-ID: Subject: Re: [PATCH v2 2/3] x86: Add cache information support for Zhaoxin processors To: MayShao Cc: GNU C Library , CooperYan@zhaoxin.com, HerryYang@zhaoxin.com, QiyuanWang@zhaoxin.com, RickyLi@zhaoxin.com Content-Type: text/plain; charset="UTF-8" X-Spam-Status: No, score=-18.9 required=5.0 tests=BAYES_00, DKIM_SIGNED, DKIM_VALID, DKIM_VALID_AU, DKIM_VALID_EF, FREEMAIL_FROM, GIT_PATCH_0, GIT_PATCH_1, GIT_PATCH_2, GIT_PATCH_3, RCVD_IN_DNSWL_NONE, SPF_HELO_NONE, SPF_PASS, TXREP autolearn=ham autolearn_force=no version=3.4.2 X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on server2.sourceware.org X-BeenThere: libc-alpha@sourceware.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Libc-alpha mailing list List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 07 Apr 2020 12:44:28 -0000 On Sun, Mar 29, 2020 at 10:35 PM MayShao wrote: > > To obtain Zhaoxin CPU cache information, add a new function > handle_zhaoxin(). > > Add Zhaoxin branch in init_cacheinfo() for initializing variables, > such as __x86_shared_cache_size. > > --- > sysdeps/x86/cacheinfo.c | 185 ++++++++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 185 insertions(+) > > diff --git a/sysdeps/x86/cacheinfo.c b/sysdeps/x86/cacheinfo.c > index e3e8ef2..e5a3284 100644 > --- a/sysdeps/x86/cacheinfo.c > +++ b/sysdeps/x86/cacheinfo.c > @@ -436,6 +436,57 @@ handle_amd (int name) > } > > > +static long int __attribute__ ((noinline)) > +handle_zhaoxin (int name) > +{ > + unsigned int eax; > + unsigned int ebx; > + unsigned int ecx; > + unsigned int edx; > + > + int folded_rel_name = (M(name) / 3) * 3; > + > + unsigned int round = 0; > + while (1) > + { > + __cpuid_count (4, round, eax, ebx, ecx, edx); > + > + enum { null = 0, data = 1, inst = 2, uni = 3 } type = eax & 0x1f; > + if (type == null) > + break; > + > + unsigned int level = (eax >> 5) & 0x7; > + > + if ((level == 1 && type == data > + && folded_rel_name == M(_SC_LEVEL1_DCACHE_SIZE)) > + || (level == 1 && type == inst > + && folded_rel_name == M(_SC_LEVEL1_ICACHE_SIZE)) > + || (level == 2 && folded_rel_name == M(_SC_LEVEL2_CACHE_SIZE)) > + || (level == 3 && folded_rel_name == M(_SC_LEVEL3_CACHE_SIZE))) > + { > + unsigned int offset = M(name) - folded_rel_name; > + > + if (offset == 0) > + /* Cache size. */ > + return (((ebx >> 22) + 1) > + * (((ebx >> 12) & 0x3ff) + 1) > + * ((ebx & 0xfff) + 1) > + * (ecx + 1)); > + if (offset == 1) > + return (ebx >> 22) + 1; > + > + assert (offset == 2); > + return (ebx & 0xfff) + 1; > + } > + > + ++round; > + } > + > + /* Nothing found. */ > + return 0; > +} > + > + > /* Get the value of the system variable NAME. */ > long int > attribute_hidden > @@ -449,6 +500,9 @@ __cache_sysconf (int name) > if (cpu_features->basic.kind == arch_kind_amd) > return handle_amd (name); > > + if (cpu_features->basic.kind == arch_kind_zhaoxin) > + return handle_zhaoxin (name); > + > // XXX Fill in more vendors. > > /* CPU not known, we have no information. */ > @@ -751,6 +805,137 @@ intel_bug_no_cache_info: > } > #endif > } > + else if (cpu_features->basic.kind == arch_kind_zhaoxin) > + { > + data = handle_zhaoxin (_SC_LEVEL1_DCACHE_SIZE); > + long int core = handle_zhaoxin (_SC_LEVEL2_CACHE_SIZE); > + shared = handle_zhaoxin (_SC_LEVEL3_CACHE_SIZE); > + > + /* Number of logical processors sharing L2 cache. */ > + int threads_l2; > + > + /* Number of logical processors sharing L3 cache. */ > + int threads_l3; > + > + if (shared <= 0) > + { > + /* No shared L3 cache. All we have is the L2 cache. */ > + level = 2; > + shared = core; > + threads_l2 = 0; > + threads_l3 = -1; > + } > + else > + { > + level = 3; > + threads_l2 = 0; > + threads_l3 = 0; > + } > + > + int i = 0; > + > + /* Query until cache level 2 and 3 are enumerated. */ > + int check = 0x1 | (threads_l3 == 0) << 1; > + do > + { > + __cpuid_count (4, i++, eax, ebx, ecx, edx); > + > + switch ((eax >> 5) & 0x7) > + { > + default: > + break; > + case 2: > + if ((check & 0x1)) > + { > + /* Get maximum number of logical processors > + sharing L2 cache. */ > + threads_l2 = (eax >> 14) & 0x3ff; > + check &= ~0x1; > + } > + break; > + case 3: > + if ((check & (0x1 << 1))) > + { > + /* Get maximum number of logical processors > + sharing L3 cache. */ > + threads_l3 = (eax >> 14) & 0x3ff; > + check &= ~(0x1 << 1); > + } > + break; > + } > + } > + while (check); > + > + /* If max_cpuid >= 11, THREADS_L2/THREADS_L3 are the maximum > + numbers of addressable IDs for logical processors sharing > + the cache, instead of the maximum number of threads > + sharing the cache. */ > + if (max_cpuid >= 11) > + { > + /* Find the number of logical processors shipped in > + one core and apply count mask. */ > + i = 0; > + > + /* Count SMT only if there is L3 cache. Always count > + core if there is no L3 cache. */ > + int count = ((threads_l2 > 0 && level == 3) > + | ((threads_l3 > 0 > + || (threads_l2 > 0 && level == 2)) << 1)); > + > + while (count) > + { > + __cpuid_count (11, i++, eax, ebx, ecx, edx); > + > + int shipped = ebx & 0xff; > + int type = ecx & 0xff00; > + if (shipped == 0 || type == 0) > + break; > + else if (type == 0x100) > + { > + /* Count SMT. */ > + if ((count & 0x1)) > + { > + int count_mask; > + > + /* Compute count mask. */ > + asm ("bsr %1, %0" > + : "=r" (count_mask) : "g" (threads_l2)); > + count_mask = ~(-1 << (count_mask + 1)); > + threads_l2 = (shipped - 1) & count_mask; > + count &= ~0x1; > + } > + } > + else if (type == 0x200) > + { > + /* Count core. */ > + if ((count & (0x1 << 1))) > + { > + int count_mask; > + int threads_core > + = (level == 2 ? threads_l2 : threads_l3); > + > + /* Compute count mask. */ > + asm ("bsr %1, %0" > + : "=r" (count_mask) : "g" (threads_core)); > + count_mask = ~(-1 << (count_mask + 1)); > + threads_core = (shipped - 1) & count_mask; > + if (level == 2) > + threads_l2 = threads_core; > + else > + threads_l3 = threads_core; > + count &= ~(0x1 << 1); > + } > + } > + } > + } > + if (level == 2 && threads_l2 > 0) > + threads = threads_l2 + 1; > + if (level == 3 && threads_l3 > 0) > + threads = threads_l3 + 1; > + > + if (shared > 0 && threads > 0) > + shared /= threads; > + } This code looks very similar to Intel code. Can you factor it out and reuse it for you? > if (cpu_features->data_cache_size != 0) > data = cpu_features->data_cache_size; -- H.J.