RE: [PATCH v2 2/3] x86: Add cache information support for Zhaoxin processors

public inbox for libc-alpha@sourceware.org
 help / color / mirror / Atom feed

From: Mayshao-oc <Mayshao-oc@zhaoxin.com>
To: "H.J. Lu" <hjl.tools@gmail.com>
Cc: GNU C Library <libc-alpha@sourceware.org>,
	"Cooper Yan(BJ-RD)" <CooperYan@zhaoxin.com>,
	"Herry Yang(BJ-RD)" <HerryYang@zhaoxin.com>,
	"Qiyuan Wang(BJ-RD)" <QiyuanWang@zhaoxin.com>,
	"Ricky Li(BJ-RD)" <RickyLi@zhaoxin.com>
Subject: RE: [PATCH v2 2/3] x86: Add cache information support for Zhaoxin processors
Date: Fri, 10 Apr 2020 02:34:05 +0000	[thread overview]
Message-ID: <0894bcfa963547c79bdd2d33e6305ce2@zhaoxin.com> (raw)
In-Reply-To: <CAMe9rOoCMdU6NGobu-S3TT3ucwudScwUHJYaq07QXt8+UwMJeg@mail.gmail.com>


On Tue, April 7, 2020 at 8:44 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> 
> On Sun, Mar 29, 2020 at 10:35 PM MayShao <MayShao@zhaoxin.com> wrote:
> >
> > To obtain Zhaoxin CPU cache information, add a new function
> > handle_zhaoxin().
> >
> > Add Zhaoxin branch in init_cacheinfo() for initializing variables,
> > such as __x86_shared_cache_size.
> >
> > ---
> >  sysdeps/x86/cacheinfo.c | 185
> ++++++++++++++++++++++++++++++++++++++++++++++++
> >  1 file changed, 185 insertions(+)
> >
> > diff --git a/sysdeps/x86/cacheinfo.c b/sysdeps/x86/cacheinfo.c
> > index e3e8ef2..e5a3284 100644
> > --- a/sysdeps/x86/cacheinfo.c
> > +++ b/sysdeps/x86/cacheinfo.c
> > @@ -436,6 +436,57 @@ handle_amd (int name)
> >  }
> >
> >
> > +static long int __attribute__ ((noinline))
> > +handle_zhaoxin (int name)
> > +{
> > +  unsigned int eax;
> > +  unsigned int ebx;
> > +  unsigned int ecx;
> > +  unsigned int edx;
> > +
> > +  int folded_rel_name = (M(name) / 3) * 3;
> > +
> > +  unsigned int round = 0;
> > +  while (1)
> > +    {
> > +      __cpuid_count (4, round, eax, ebx, ecx, edx);
> > +
> > +      enum { null = 0, data = 1, inst = 2, uni = 3 } type = eax & 0x1f;
> > +      if (type == null)
> > +        break;
> > +
> > +      unsigned int level = (eax >> 5) & 0x7;
> > +
> > +      if ((level == 1 && type == data
> > +        && folded_rel_name == M(_SC_LEVEL1_DCACHE_SIZE))
> > +        || (level == 1 && type == inst
> > +            && folded_rel_name == M(_SC_LEVEL1_ICACHE_SIZE))
> > +        || (level == 2 && folded_rel_name ==
> M(_SC_LEVEL2_CACHE_SIZE))
> > +        || (level == 3 && folded_rel_name ==
> M(_SC_LEVEL3_CACHE_SIZE)))
> > +        {
> > +          unsigned int offset = M(name) - folded_rel_name;
> > +
> > +          if (offset == 0)
> > +            /* Cache size.  */
> > +            return (((ebx >> 22) + 1)
> > +                * (((ebx >> 12) & 0x3ff) + 1)
> > +                * ((ebx & 0xfff) + 1)
> > +                * (ecx + 1));
> > +          if (offset == 1)
> > +            return (ebx >> 22) + 1;
> > +
> > +          assert (offset == 2);
> > +          return (ebx & 0xfff) + 1;
> > +        }
> > +
> > +      ++round;
> > +    }
> > +
> > +  /* Nothing found.  */
> > +  return 0;
> > +}
> > +
> > +
> >  /* Get the value of the system variable NAME.  */
> >  long int
> >  attribute_hidden
> > @@ -449,6 +500,9 @@ __cache_sysconf (int name)
> >    if (cpu_features->basic.kind == arch_kind_amd)
> >      return handle_amd (name);
> >
> > +  if (cpu_features->basic.kind == arch_kind_zhaoxin)
> > +    return handle_zhaoxin (name);
> > +
> >    // XXX Fill in more vendors.
> >
> >    /* CPU not known, we have no information.  */
> > @@ -751,6 +805,137 @@ intel_bug_no_cache_info:
> >         }
> >  #endif
> >      }
> > +  else if (cpu_features->basic.kind == arch_kind_zhaoxin)
> > +    {
> > +      data   = handle_zhaoxin (_SC_LEVEL1_DCACHE_SIZE);
> > +      long int core = handle_zhaoxin (_SC_LEVEL2_CACHE_SIZE);
> > +      shared = handle_zhaoxin (_SC_LEVEL3_CACHE_SIZE);
> > +
> > +      /* Number of logical processors sharing L2 cache.  */
> > +      int threads_l2;
> > +
> > +      /* Number of logical processors sharing L3 cache.  */
> > +      int threads_l3;
> > +
> > +      if (shared <= 0)
> > +        {
> > +          /* No shared L3 cache.  All we have is the L2 cache.  */
> > +          level = 2;
> > +          shared = core;
> > +          threads_l2 = 0;
> > +          threads_l3 = -1;
> > +        }
> > +      else
> > +        {
> > +          level = 3;
> > +          threads_l2 = 0;
> > +          threads_l3 = 0;
> > +        }
> > +
> > +      int i = 0;
> > +
> > +      /* Query until cache level 2 and 3 are enumerated.  */
> > +      int check = 0x1 | (threads_l3 == 0) << 1;
> > +      do
> > +        {
> > +          __cpuid_count (4, i++, eax, ebx, ecx, edx);
> > +
> > +          switch ((eax >> 5) & 0x7)
> > +            {
> > +            default:
> > +              break;
> > +            case 2:
> > +              if ((check & 0x1))
> > +                {
> > +                  /* Get maximum number of logical processors
> > +                     sharing L2 cache.  */
> > +                  threads_l2 = (eax >> 14) & 0x3ff;
> > +                  check &= ~0x1;
> > +                }
> > +              break;
> > +            case 3:
> > +              if ((check & (0x1 << 1)))
> > +               {
> > +                  /* Get maximum number of logical processors
> > +                     sharing L3 cache.  */
> > +                  threads_l3 = (eax >> 14) & 0x3ff;
> > +                  check &= ~(0x1 << 1);
> > +                }
> > +              break;
> > +           }
> > +        }
> > +      while (check);
> > +
> > +      /* If max_cpuid >= 11, THREADS_L2/THREADS_L3 are the maximum
> > +         numbers of addressable IDs for logical processors sharing
> > +         the cache, instead of the maximum number of threads
> > +         sharing the cache.  */
> > +      if (max_cpuid >= 11)
> > +        {
> > +          /* Find the number of logical processors shipped in
> > +             one core and apply count mask.  */
> > +          i = 0;
> > +
> > +          /* Count SMT only if there is L3 cache.  Always count
> > +             core if there is no L3 cache.  */
> > +          int count = ((threads_l2 > 0 && level == 3)
> > +                       | ((threads_l3 > 0
> > +                           || (threads_l2 > 0 && level == 2)) << 1));
> > +
> > +          while (count)
> > +            {
> > +              __cpuid_count (11, i++, eax, ebx, ecx, edx);
> > +
> > +              int shipped = ebx & 0xff;
> > +              int type = ecx & 0xff00;
> > +              if (shipped == 0 || type == 0)
> > +                break;
> > +              else if (type == 0x100)
> > +                {
> > +                  /* Count SMT.  */
> > +                  if ((count & 0x1))
> > +                    {
> > +                      int count_mask;
> > +
> > +                      /* Compute count mask.  */
> > +                      asm ("bsr %1, %0"
> > +                           : "=r" (count_mask) : "g" (threads_l2));
> > +                      count_mask = ~(-1 << (count_mask + 1));
> > +                      threads_l2 = (shipped - 1) & count_mask;
> > +                      count &= ~0x1;
> > +                    }
> > +                }
> > +              else if (type == 0x200)
> > +                {
> > +                  /* Count core.  */
> > +                  if ((count & (0x1 << 1)))
> > +                    {
> > +                      int count_mask;
> > +                      int threads_core
> > +                        = (level == 2 ? threads_l2 : threads_l3);
> > +
> > +                      /* Compute count mask.  */
> > +                      asm ("bsr %1, %0"
> > +                           : "=r" (count_mask) : "g" (threads_core));
> > +                      count_mask = ~(-1 << (count_mask + 1));
> > +                      threads_core = (shipped - 1) & count_mask;
> > +                      if (level == 2)
> > +                        threads_l2 = threads_core;
> > +                      else
> > +                        threads_l3 = threads_core;
> > +                      count &= ~(0x1 << 1);
> > +                    }
> > +                }
> > +            }
> > +        }
> > +      if (level == 2 && threads_l2 > 0)
> > +        threads = threads_l2 + 1;
> > +      if (level == 3 && threads_l3 > 0)
> > +        threads = threads_l3 + 1;
> > +
> > +      if (shared > 0 && threads > 0)
> > +        shared /= threads;
> > +    }
> 
> This code looks very similar to Intel code.   Can you factor it out and reuse
> it for you?

I tried to extract this part of the code, but it didn’t look very clean. 
For Example, the case of max_cpuid < 4 does not exist on Zhaoxin processors.
Zhaoxin processors are currently inclusive caches, and the number of threads
sharing L2 cache is not affected by the family or model.  Considering the possible
changes of CPU design in future, it may be more convenient to keep separate branches.

I was wandering if you had any concerns, or if you could give some suggestions,
that would be great.


Best Regards,
May Shao

next prev parent reply	other threads:[~2020-04-10  2:34 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-03-30  5:33 [PATCH v2 0/3] x86: Add " MayShao
2020-03-30  5:33 ` [PATCH v2 1/3] x86: Add CPU Vendor ID detection " MayShao
2020-04-07 12:37   ` H.J. Lu
2020-04-10 11:44     ` Mayshao-oc
2020-03-30  5:33 ` [PATCH v2 2/3] x86: Add cache information " MayShao
2020-04-07 12:43   ` H.J. Lu
2020-04-10  2:34     ` Mayshao-oc [this message]
2020-04-10 11:53       ` H.J. Lu
2020-04-10 12:49         ` Mayshao-oc
2020-04-10 13:01         ` Mayshao-oc
2020-03-30  5:33 ` [PATCH v2 3/3] x86: Add the test case of __get_cpu_features " MayShao
2020-04-07 12:39   ` H.J. Lu
2020-04-08  2:36     ` May Shao(BJ-RD)
2020-04-08 12:52       ` Carlos O'Donell
     [not found] ` <4b7ba83030284e9c9c5b1cd91f1b3012@zhaoxin.com>
2020-04-07 10:25   ` [PATCH v2 0/3] x86: Add " May Shao(BJ-RD)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=0894bcfa963547c79bdd2d33e6305ce2@zhaoxin.com \
    --to=mayshao-oc@zhaoxin.com \
    --cc=CooperYan@zhaoxin.com \
    --cc=HerryYang@zhaoxin.com \
    --cc=QiyuanWang@zhaoxin.com \
    --cc=RickyLi@zhaoxin.com \
    --cc=hjl.tools@gmail.com \
    --cc=libc-alpha@sourceware.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).