public inbox for libc-alpha@sourceware.org
 help / color / mirror / Atom feed
* [PATCH] x86: Cache computation for AMD architecture.
@ 2022-12-12 11:53 Karumanchi, Sajan
  2023-01-16 12:33 ` Florian Weimer
  0 siblings, 1 reply; 8+ messages in thread
From: Karumanchi, Sajan @ 2022-12-12 11:53 UTC (permalink / raw)
  To: libc-alpha, carlos, fweimer; +Cc: Mallappa, Premachandra, Karumanchi, Sajan

[-- Attachment #1: Type: text/plain, Size: 180 bytes --]

[AMD Official Use Only - General]

Hi Florian,

This is a patch for computing cache information on AMD architectures with CPUID: 0x8000_001D.

Thanks & Regards,
Sajan K.


[-- Attachment #2: 0001-x86-Cache-computation-for-AMD-architecture.patch --]
[-- Type: application/octet-stream, Size: 7889 bytes --]

From c02460ea1521ce13bb079508808436daeb11f60f Mon Sep 17 00:00:00 2001
From: Sajan Karumanchi <sajan.karumanchi@amd.com>
Date: Mon, 5 Dec 2022 15:49:14 +0530
Subject: [PATCH] x86: Cache computation for AMD architecture.
To: libc-alpha@sourceware.org,carlos@redhat.com,fweimer@redhat.com
Cc: premachandra.mallappa@amd.com

All AMD architectures cache details will be computed based on
__cpuid__ 0x8000_001D and the reference to __cpuid__ 0x8000_0006 will be
zeroed out for future architectures.

Reviewed-by: Premachandra Mallappa <premachandra.mallappa@amd.com>
---
 sysdeps/x86/dl-cacheinfo.h | 198 ++++++++-----------------------------
 1 file changed, 40 insertions(+), 158 deletions(-)

diff --git a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h
index e9f3382108..a082e639b3 100644
--- a/sysdeps/x86/dl-cacheinfo.h
+++ b/sysdeps/x86/dl-cacheinfo.h
@@ -311,117 +311,48 @@ handle_intel (int name, const struct cpu_features *cpu_features)
 
 
 static long int __attribute__ ((noinline))
-handle_amd (int name)
+handle_amd (int name, const struct cpu_features *cpu_features)
 {
   unsigned int eax;
   unsigned int ebx;
   unsigned int ecx;
   unsigned int edx;
+  unsigned int count = 0x1;
+
   __cpuid (0x80000000, eax, ebx, ecx, edx);
 
   /* No level 4 cache (yet).  */
   if (name > _SC_LEVEL3_CACHE_LINESIZE)
     return 0;
 
-  unsigned int fn = 0x80000005 + (name >= _SC_LEVEL2_CACHE_SIZE);
-  if (eax < fn)
-    return 0;
-
-  __cpuid (fn, eax, ebx, ecx, edx);
+  if (name >= _SC_LEVEL3_CACHE_SIZE)
+    count = 0x3;
+  else if (name >= _SC_LEVEL2_CACHE_SIZE)
+    count = 0x2;
+  else if (name >= _SC_LEVEL1_DCACHE_SIZE)
+    count = 0x0;
 
-  if (name < _SC_LEVEL1_DCACHE_SIZE)
-    {
-      name += _SC_LEVEL1_DCACHE_SIZE - _SC_LEVEL1_ICACHE_SIZE;
-      ecx = edx;
-    }
+  __cpuid_count (0x8000001D, count, eax, ebx, ecx, edx);
 
   switch (name)
     {
-    case _SC_LEVEL1_DCACHE_SIZE:
-      return (ecx >> 14) & 0x3fc00;
-
-    case _SC_LEVEL1_DCACHE_ASSOC:
-      ecx >>= 16;
-      if ((ecx & 0xff) == 0xff)
-	/* Fully associative.  */
-	return (ecx << 2) & 0x3fc00;
-      return ecx & 0xff;
-
-    case _SC_LEVEL1_DCACHE_LINESIZE:
-      return ecx & 0xff;
-
-    case _SC_LEVEL2_CACHE_SIZE:
-      return (ecx & 0xf000) == 0 ? 0 : (ecx >> 6) & 0x3fffc00;
-
-    case _SC_LEVEL2_CACHE_ASSOC:
-      switch ((ecx >> 12) & 0xf)
-	{
-	case 0:
-	case 1:
-	case 2:
-	case 4:
-	  return (ecx >> 12) & 0xf;
-	case 6:
-	  return 8;
-	case 8:
-	  return 16;
-	case 10:
-	  return 32;
-	case 11:
-	  return 48;
-	case 12:
-	  return 64;
-	case 13:
-	  return 96;
-	case 14:
-	  return 128;
-	case 15:
-	  return ((ecx >> 6) & 0x3fffc00) / (ecx & 0xff);
-	default:
-	  return 0;
-	}
-      /* NOTREACHED */
-
-    case _SC_LEVEL2_CACHE_LINESIZE:
-      return (ecx & 0xf000) == 0 ? 0 : ecx & 0xff;
-
-    case _SC_LEVEL3_CACHE_SIZE:
-      return (edx & 0xf000) == 0 ? 0 : (edx & 0x3ffc0000) << 1;
-
-    case _SC_LEVEL3_CACHE_ASSOC:
-      switch ((edx >> 12) & 0xf)
-	{
-	case 0:
-	case 1:
-	case 2:
-	case 4:
-	  return (edx >> 12) & 0xf;
-	case 6:
-	  return 8;
-	case 8:
-	  return 16;
-	case 10:
-	  return 32;
-	case 11:
-	  return 48;
-	case 12:
-	  return 64;
-	case 13:
-	  return 96;
-	case 14:
-	  return 128;
-	case 15:
-	  return ((edx & 0x3ffc0000) << 1) / (edx & 0xff);
-	default:
-	  return 0;
-	}
-      /* NOTREACHED */
-
-    case _SC_LEVEL3_CACHE_LINESIZE:
-      return (edx & 0xf000) == 0 ? 0 : edx & 0xff;
-
-    default:
-      assert (! "cannot happen");
+       case _SC_LEVEL1_ICACHE_ASSOC:
+       case _SC_LEVEL1_DCACHE_ASSOC:
+       case _SC_LEVEL2_CACHE_ASSOC:
+       case _SC_LEVEL3_CACHE_ASSOC:
+         return ecx?((ebx >> 22) & 0x3ff) + 1 : 0;
+       case _SC_LEVEL1_ICACHE_LINESIZE:
+       case _SC_LEVEL1_DCACHE_LINESIZE:
+       case _SC_LEVEL2_CACHE_LINESIZE:
+       case _SC_LEVEL3_CACHE_LINESIZE:
+         return ecx?(ebx & 0xfff) + 1 : 0;
+       case _SC_LEVEL1_ICACHE_SIZE:
+       case _SC_LEVEL1_DCACHE_SIZE:
+       case _SC_LEVEL2_CACHE_SIZE:
+       case _SC_LEVEL3_CACHE_SIZE:
+         return ecx?(((ebx >> 22) & 0x3ff) + 1)*((ebx & 0xfff) + 1)*(ecx + 1):0;
+       default:
+         assert (! "cannot happen");
     }
   return -1;
 }
@@ -698,10 +629,6 @@ static void
 dl_init_cacheinfo (struct cpu_features *cpu_features)
 {
   /* Find out what brand of processor.  */
-  unsigned int ebx;
-  unsigned int ecx;
-  unsigned int edx;
-  int max_cpuid_ex;
   long int data = -1;
   long int shared = -1;
   long int core = -1;
@@ -771,70 +698,25 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
     }
   else if (cpu_features->basic.kind == arch_kind_amd)
     {
-      data  = handle_amd (_SC_LEVEL1_DCACHE_SIZE);
-      core = handle_amd (_SC_LEVEL2_CACHE_SIZE);
-      shared = handle_amd (_SC_LEVEL3_CACHE_SIZE);
+      data  = handle_amd (_SC_LEVEL1_DCACHE_SIZE, cpu_features);
+      core = handle_amd (_SC_LEVEL2_CACHE_SIZE, cpu_features);
+      shared = handle_amd (_SC_LEVEL3_CACHE_SIZE, cpu_features);
 
-      level1_icache_size = handle_amd (_SC_LEVEL1_ICACHE_SIZE);
-      level1_icache_linesize = handle_amd (_SC_LEVEL1_ICACHE_LINESIZE);
+      level1_icache_size = handle_amd (_SC_LEVEL1_ICACHE_SIZE, cpu_features);
+      level1_icache_linesize = handle_amd (_SC_LEVEL1_ICACHE_LINESIZE, cpu_features);
       level1_dcache_size = data;
-      level1_dcache_assoc = handle_amd (_SC_LEVEL1_DCACHE_ASSOC);
-      level1_dcache_linesize = handle_amd (_SC_LEVEL1_DCACHE_LINESIZE);
+      level1_dcache_assoc = handle_amd (_SC_LEVEL1_DCACHE_ASSOC, cpu_features);
+      level1_dcache_linesize = handle_amd (_SC_LEVEL1_DCACHE_LINESIZE, cpu_features);
       level2_cache_size = core;
-      level2_cache_assoc = handle_amd (_SC_LEVEL2_CACHE_ASSOC);
-      level2_cache_linesize = handle_amd (_SC_LEVEL2_CACHE_LINESIZE);
+      level2_cache_assoc = handle_amd (_SC_LEVEL2_CACHE_ASSOC, cpu_features);
+      level2_cache_linesize = handle_amd (_SC_LEVEL2_CACHE_LINESIZE, cpu_features);
       level3_cache_size = shared;
-      level3_cache_assoc = handle_amd (_SC_LEVEL3_CACHE_ASSOC);
-      level3_cache_linesize = handle_amd (_SC_LEVEL3_CACHE_LINESIZE);
-
-      /* Get maximum extended function. */
-      __cpuid (0x80000000, max_cpuid_ex, ebx, ecx, edx);
+      level3_cache_assoc = handle_amd (_SC_LEVEL3_CACHE_ASSOC, cpu_features);
+      level3_cache_linesize = handle_amd (_SC_LEVEL3_CACHE_LINESIZE, cpu_features);
 
       if (shared <= 0)
-	/* No shared L3 cache.  All we have is the L2 cache.  */
-	shared = core;
-      else
-	{
-	  /* Figure out the number of logical threads that share L3.  */
-	  if (max_cpuid_ex >= 0x80000008)
-	    {
-	      /* Get width of APIC ID.  */
-	      __cpuid (0x80000008, max_cpuid_ex, ebx, ecx, edx);
-	      threads = 1 << ((ecx >> 12) & 0x0f);
-	    }
-
-	  if (threads == 0 || cpu_features->basic.family >= 0x17)
-	    {
-	      /* If APIC ID width is not available, use logical
-		 processor count.  */
-	      __cpuid (0x00000001, max_cpuid_ex, ebx, ecx, edx);
-
-	      if ((edx & (1 << 28)) != 0)
-		threads = (ebx >> 16) & 0xff;
-	    }
-
-	  /* Cap usage of highest cache level to the number of
-	     supported threads.  */
-	  if (threads > 0)
-	    shared /= threads;
-
-	  /* Get shared cache per ccx for Zen architectures.  */
-	  if (cpu_features->basic.family >= 0x17)
-	    {
-	      unsigned int eax;
-
-	      /* Get number of threads share the L3 cache in CCX.  */
-	      __cpuid_count (0x8000001D, 0x3, eax, ebx, ecx, edx);
-
-	      unsigned int threads_per_ccx = ((eax >> 14) & 0xfff) + 1;
-	      shared *= threads_per_ccx;
-	    }
-	  else
-	    {
-	      /* Account for exclusive L2 and L3 caches.  */
-	      shared += core;
-            }
-	}
+        /* No shared L3 cache.  All we have is the L2 cache.  */
+         shared = core;
     }
 
   cpu_features->level1_icache_size = level1_icache_size;
-- 
2.17.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] x86: Cache computation for AMD architecture.
  2022-12-12 11:53 [PATCH] x86: Cache computation for AMD architecture Karumanchi, Sajan
@ 2023-01-16 12:33 ` Florian Weimer
  2023-01-18  7:39   ` [PATCH v1] " Karumanchi, Sajan
  0 siblings, 1 reply; 8+ messages in thread
From: Florian Weimer @ 2023-01-16 12:33 UTC (permalink / raw)
  To: Karumanchi, Sajan; +Cc: libc-alpha, carlos, Mallappa, Premachandra

* Sajan Karumanchi:

> Hi Florian,
>
> This is a patch for computing cache information on AMD architectures
> with CPUID: 0x8000_001D.

Sorry for the delay in commenting on this.

> Subject: [PATCH] x86: Cache computation for AMD architecture.
> To: libc-alpha@sourceware.org,carlos@redhat.com,fweimer@redhat.com
> Cc: premachandra.mallappa@amd.com
>
> All AMD architectures cache details will be computed based on
> __cpuid__ 0x8000_001D and the reference to __cpuid__ 0x8000_0006 will be
> zeroed out for future architectures.
>
> Reviewed-by: Premachandra Mallappa <premachandra.mallappa@amd.com>

As this patch has already been reviewed, should we just install it,
after fixing some style issues?

Do we need to do some testing across various CPUs to make sure the
reported values for other CPUs do not change?

> ---
>  sysdeps/x86/dl-cacheinfo.h | 198 ++++++++-----------------------------
>  1 file changed, 40 insertions(+), 158 deletions(-)
>
> diff --git a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h
> index e9f3382108..a082e639b3 100644
> --- a/sysdeps/x86/dl-cacheinfo.h
> +++ b/sysdeps/x86/dl-cacheinfo.h
> @@ -311,117 +311,48 @@ handle_intel (int name, const struct cpu_features *cpu_features)
>  
>  
>  static long int __attribute__ ((noinline))
> -handle_amd (int name)
> +handle_amd (int name, const struct cpu_features *cpu_features)
>  {
>    unsigned int eax;
>    unsigned int ebx;
>    unsigned int ecx;
>    unsigned int edx;
> +  unsigned int count = 0x1;
> +
>    __cpuid (0x80000000, eax, ebx, ecx, edx);

It seems that the results of this CPUID instructure are overwritten …

>  
>    /* No level 4 cache (yet).  */
>    if (name > _SC_LEVEL3_CACHE_LINESIZE)
>      return 0;
>  
> +  if (name >= _SC_LEVEL3_CACHE_SIZE)
> +    count = 0x3;
> +  else if (name >= _SC_LEVEL2_CACHE_SIZE)
> +    count = 0x2;
> +  else if (name >= _SC_LEVEL1_DCACHE_SIZE)
> +    count = 0x0;
>  
> +  __cpuid_count (0x8000001D, count, eax, ebx, ecx, edx);

… here?

> +      level1_icache_linesize = handle_amd (_SC_LEVEL1_ICACHE_LINESIZE, cpu_features);

> +      level1_dcache_linesize = handle_amd (_SC_LEVEL1_DCACHE_LINESIZE, cpu_features);

> +      level2_cache_linesize = handle_amd (_SC_LEVEL2_CACHE_LINESIZE, cpu_features);

> +      level3_cache_linesize = handle_amd (_SC_LEVEL3_CACHE_LINESIZE, cpu_features);

I think these lines are overly long based on our 79-column rule.

Thanks,
Florian


^ permalink raw reply	[flat|nested] 8+ messages in thread

* RE: [PATCH v1] x86: Cache computation for AMD architecture.
  2023-01-16 12:33 ` Florian Weimer
@ 2023-01-18  7:39   ` Karumanchi, Sajan
  2023-01-18 14:54     ` Florian Weimer
                       ` (2 more replies)
  0 siblings, 3 replies; 8+ messages in thread
From: Karumanchi, Sajan @ 2023-01-18  7:39 UTC (permalink / raw)
  To: Florian Weimer, libc-alpha, carlos; +Cc: Mallappa, Premachandra

[-- Attachment #1: Type: text/plain, Size: 3518 bytes --]

[AMD Official Use Only - General]

Hi Florian,

> -----Original Message-----
> From: Florian Weimer <fweimer@redhat.com>
> Sent: Monday, January 16, 2023 6:04 PM
> To: Karumanchi, Sajan <Sajan.Karumanchi@amd.com>
> Cc: libc-alpha@sourceware.org; carlos@redhat.com; Mallappa,
> Premachandra <Premachandra.Mallappa@amd.com>
> Subject: Re: [PATCH] x86: Cache computation for AMD architecture.
>
> Caution: This message originated from an External Source. Use proper
> caution when opening attachments, clicking links, or responding.
>
>
> * Sajan Karumanchi:
>
> > Hi Florian,
> >
> > This is a patch for computing cache information on AMD architectures
> > with CPUID: 0x8000_001D.
>
> Sorry for the delay in commenting on this.
>
> > Subject: [PATCH] x86: Cache computation for AMD architecture.
> > To: libc-alpha@sourceware.org,carlos@redhat.com,fweimer@redhat.com
> > Cc: premachandra.mallappa@amd.com
> >
> > All AMD architectures cache details will be computed based on
> > __cpuid__ 0x8000_001D and the reference to __cpuid__ 0x8000_0006 will
> > be zeroed out for future architectures.
> >
> > Reviewed-by: Premachandra Mallappa
> <premachandra.mallappa@amd.com>
>
> As this patch has already been reviewed, should we just install it, after fixing
> some style issues?
 Yes
>
> Do we need to do some testing across various CPUs to make sure the
> reported values for other CPUs do not change?
Though we have done the testing on Zen and pre-Zen architectures, we recommend to carryout the tests from your end too.
>
> > ---
> >  sysdeps/x86/dl-cacheinfo.h | 198
> > ++++++++-----------------------------
> >  1 file changed, 40 insertions(+), 158 deletions(-)
> >
> > diff --git a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h
> > index e9f3382108..a082e639b3 100644
> > --- a/sysdeps/x86/dl-cacheinfo.h
> > +++ b/sysdeps/x86/dl-cacheinfo.h
> > @@ -311,117 +311,48 @@ handle_intel (int name, const struct
> > cpu_features *cpu_features)
> >
> >
> >  static long int __attribute__ ((noinline)) -handle_amd (int name)
> > +handle_amd (int name, const struct cpu_features *cpu_features)
> >  {
> >    unsigned int eax;
> >    unsigned int ebx;
> >    unsigned int ecx;
> >    unsigned int edx;
> > +  unsigned int count = 0x1;
> > +
> >    __cpuid (0x80000000, eax, ebx, ecx, edx);
>
> It seems that the results of this CPUID instructure are overwritten …
Tanks for pointing it out, this has no significance and removed.
>
> >
> >    /* No level 4 cache (yet).  */
> >    if (name > _SC_LEVEL3_CACHE_LINESIZE)
> >      return 0;
> >
> > +  if (name >= _SC_LEVEL3_CACHE_SIZE)
> > +    count = 0x3;
> > +  else if (name >= _SC_LEVEL2_CACHE_SIZE)
> > +    count = 0x2;
> > +  else if (name >= _SC_LEVEL1_DCACHE_SIZE)
> > +    count = 0x0;
> >
> > +  __cpuid_count (0x8000001D, count, eax, ebx, ecx, edx);
>
> … here?
>
> > +      level1_icache_linesize = handle_amd
> > + (_SC_LEVEL1_ICACHE_LINESIZE, cpu_features);
>
> > +      level1_dcache_linesize = handle_amd
> > + (_SC_LEVEL1_DCACHE_LINESIZE, cpu_features);
>
> > +      level2_cache_linesize = handle_amd (_SC_LEVEL2_CACHE_LINESIZE,
> > + cpu_features);
>
> > +      level3_cache_linesize = handle_amd (_SC_LEVEL3_CACHE_LINESIZE,
> > + cpu_features);
>
> I think these lines are overly long based on our 79-column rule.
Fixed the styling issues.
>
> Thanks,
> Florian

PFA reworked patch addressing the review comments.

Thanks,
Sajan K.

[-- Attachment #2: v1-0001-x86-Cache-computation-for-AMD-architecture.patch --]
[-- Type: application/octet-stream, Size: 8057 bytes --]

From d5995d6b82f3ae12605c3d64c96b403f0b6b4f16 Mon Sep 17 00:00:00 2001
From: Sajan Karumanchi <sajan.karumanchi@amd.com>
Date: Mon, 5 Dec 2022 15:49:14 +0530
Subject: [PATCH v1] x86: Cache computation for AMD architecture.
To: libc-alpha@sourceware.org,carlos@redhat.com,fweimer@redhat.com
Cc: premachandra.mallappa@amd.com

All AMD architectures cache details will be computed based on
__cpuid__ `0x8000_001D` and the reference to __cpuid__ `0x8000_0006` will be
zeroed out for future architectures.

Reviewed-by: Premachandra Mallappa <premachandra.mallappa@amd.com>
---
 sysdeps/x86/dl-cacheinfo.h | 205 +++++++++----------------------------
 1 file changed, 46 insertions(+), 159 deletions(-)

diff --git a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h
index e9f3382108..21ac0a2dad 100644
--- a/sysdeps/x86/dl-cacheinfo.h
+++ b/sysdeps/x86/dl-cacheinfo.h
@@ -311,117 +311,47 @@ handle_intel (int name, const struct cpu_features *cpu_features)
 
 
 static long int __attribute__ ((noinline))
-handle_amd (int name)
+handle_amd (int name, const struct cpu_features *cpu_features)
 {
   unsigned int eax;
   unsigned int ebx;
   unsigned int ecx;
   unsigned int edx;
-  __cpuid (0x80000000, eax, ebx, ecx, edx);
+  unsigned int count = 0x1;
 
   /* No level 4 cache (yet).  */
   if (name > _SC_LEVEL3_CACHE_LINESIZE)
     return 0;
 
-  unsigned int fn = 0x80000005 + (name >= _SC_LEVEL2_CACHE_SIZE);
-  if (eax < fn)
-    return 0;
+  if (name >= _SC_LEVEL3_CACHE_SIZE)
+    count = 0x3;
+  else if (name >= _SC_LEVEL2_CACHE_SIZE)
+    count = 0x2;
+  else if (name >= _SC_LEVEL1_DCACHE_SIZE)
+    count = 0x0;
 
-  __cpuid (fn, eax, ebx, ecx, edx);
-
-  if (name < _SC_LEVEL1_DCACHE_SIZE)
-    {
-      name += _SC_LEVEL1_DCACHE_SIZE - _SC_LEVEL1_ICACHE_SIZE;
-      ecx = edx;
-    }
+  __cpuid_count (0x8000001D, count, eax, ebx, ecx, edx);
 
   switch (name)
     {
-    case _SC_LEVEL1_DCACHE_SIZE:
-      return (ecx >> 14) & 0x3fc00;
-
-    case _SC_LEVEL1_DCACHE_ASSOC:
-      ecx >>= 16;
-      if ((ecx & 0xff) == 0xff)
-	/* Fully associative.  */
-	return (ecx << 2) & 0x3fc00;
-      return ecx & 0xff;
-
-    case _SC_LEVEL1_DCACHE_LINESIZE:
-      return ecx & 0xff;
-
-    case _SC_LEVEL2_CACHE_SIZE:
-      return (ecx & 0xf000) == 0 ? 0 : (ecx >> 6) & 0x3fffc00;
-
-    case _SC_LEVEL2_CACHE_ASSOC:
-      switch ((ecx >> 12) & 0xf)
-	{
-	case 0:
-	case 1:
-	case 2:
-	case 4:
-	  return (ecx >> 12) & 0xf;
-	case 6:
-	  return 8;
-	case 8:
-	  return 16;
-	case 10:
-	  return 32;
-	case 11:
-	  return 48;
-	case 12:
-	  return 64;
-	case 13:
-	  return 96;
-	case 14:
-	  return 128;
-	case 15:
-	  return ((ecx >> 6) & 0x3fffc00) / (ecx & 0xff);
-	default:
-	  return 0;
-	}
-      /* NOTREACHED */
-
-    case _SC_LEVEL2_CACHE_LINESIZE:
-      return (ecx & 0xf000) == 0 ? 0 : ecx & 0xff;
-
-    case _SC_LEVEL3_CACHE_SIZE:
-      return (edx & 0xf000) == 0 ? 0 : (edx & 0x3ffc0000) << 1;
-
-    case _SC_LEVEL3_CACHE_ASSOC:
-      switch ((edx >> 12) & 0xf)
-	{
-	case 0:
-	case 1:
-	case 2:
-	case 4:
-	  return (edx >> 12) & 0xf;
-	case 6:
-	  return 8;
-	case 8:
-	  return 16;
-	case 10:
-	  return 32;
-	case 11:
-	  return 48;
-	case 12:
-	  return 64;
-	case 13:
-	  return 96;
-	case 14:
-	  return 128;
-	case 15:
-	  return ((edx & 0x3ffc0000) << 1) / (edx & 0xff);
-	default:
-	  return 0;
-	}
-      /* NOTREACHED */
-
-    case _SC_LEVEL3_CACHE_LINESIZE:
-      return (edx & 0xf000) == 0 ? 0 : edx & 0xff;
-
-    default:
-      assert (! "cannot happen");
+       case _SC_LEVEL1_ICACHE_ASSOC:
+       case _SC_LEVEL1_DCACHE_ASSOC:
+       case _SC_LEVEL2_CACHE_ASSOC:
+       case _SC_LEVEL3_CACHE_ASSOC:
+         return ecx?((ebx >> 22) & 0x3ff) + 1 : 0;
+       case _SC_LEVEL1_ICACHE_LINESIZE:
+       case _SC_LEVEL1_DCACHE_LINESIZE:
+       case _SC_LEVEL2_CACHE_LINESIZE:
+       case _SC_LEVEL3_CACHE_LINESIZE:
+         return ecx?(ebx & 0xfff) + 1 : 0;
+       case _SC_LEVEL1_ICACHE_SIZE:
+       case _SC_LEVEL1_DCACHE_SIZE:
+       case _SC_LEVEL2_CACHE_SIZE:
+       case _SC_LEVEL3_CACHE_SIZE:
+         return ecx?(((ebx >> 22) & 0x3ff) + 1)*((ebx & 0xfff) + 1)\
+                                                    *(ecx + 1):0;
+       default:
+         assert (! "cannot happen");
     }
   return -1;
 }
@@ -698,10 +628,6 @@ static void
 dl_init_cacheinfo (struct cpu_features *cpu_features)
 {
   /* Find out what brand of processor.  */
-  unsigned int ebx;
-  unsigned int ecx;
-  unsigned int edx;
-  int max_cpuid_ex;
   long int data = -1;
   long int shared = -1;
   long int core = -1;
@@ -771,70 +697,31 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
     }
   else if (cpu_features->basic.kind == arch_kind_amd)
     {
-      data  = handle_amd (_SC_LEVEL1_DCACHE_SIZE);
-      core = handle_amd (_SC_LEVEL2_CACHE_SIZE);
-      shared = handle_amd (_SC_LEVEL3_CACHE_SIZE);
+      data  = handle_amd (_SC_LEVEL1_DCACHE_SIZE, cpu_features);
+      core = handle_amd (_SC_LEVEL2_CACHE_SIZE, cpu_features);
+      shared = handle_amd (_SC_LEVEL3_CACHE_SIZE, cpu_features);
 
-      level1_icache_size = handle_amd (_SC_LEVEL1_ICACHE_SIZE);
-      level1_icache_linesize = handle_amd (_SC_LEVEL1_ICACHE_LINESIZE);
+      level1_icache_size = handle_amd (_SC_LEVEL1_ICACHE_SIZE, cpu_features);
+      level1_icache_linesize = 
+                handle_amd (_SC_LEVEL1_ICACHE_LINESIZE, cpu_features);
       level1_dcache_size = data;
-      level1_dcache_assoc = handle_amd (_SC_LEVEL1_DCACHE_ASSOC);
-      level1_dcache_linesize = handle_amd (_SC_LEVEL1_DCACHE_LINESIZE);
+      level1_dcache_assoc = 
+                handle_amd (_SC_LEVEL1_DCACHE_ASSOC, cpu_features);
+      level1_dcache_linesize = 
+                handle_amd (_SC_LEVEL1_DCACHE_LINESIZE, cpu_features);
       level2_cache_size = core;
-      level2_cache_assoc = handle_amd (_SC_LEVEL2_CACHE_ASSOC);
-      level2_cache_linesize = handle_amd (_SC_LEVEL2_CACHE_LINESIZE);
+      level2_cache_assoc = handle_amd (_SC_LEVEL2_CACHE_ASSOC, cpu_features);
+      level2_cache_linesize = 
+                handle_amd (_SC_LEVEL2_CACHE_LINESIZE, cpu_features);
       level3_cache_size = shared;
-      level3_cache_assoc = handle_amd (_SC_LEVEL3_CACHE_ASSOC);
-      level3_cache_linesize = handle_amd (_SC_LEVEL3_CACHE_LINESIZE);
-
-      /* Get maximum extended function. */
-      __cpuid (0x80000000, max_cpuid_ex, ebx, ecx, edx);
+      level3_cache_assoc = 
+                handle_amd (_SC_LEVEL3_CACHE_ASSOC, cpu_features);
+      level3_cache_linesize = 
+                handle_amd (_SC_LEVEL3_CACHE_LINESIZE, cpu_features);
 
       if (shared <= 0)
-	/* No shared L3 cache.  All we have is the L2 cache.  */
-	shared = core;
-      else
-	{
-	  /* Figure out the number of logical threads that share L3.  */
-	  if (max_cpuid_ex >= 0x80000008)
-	    {
-	      /* Get width of APIC ID.  */
-	      __cpuid (0x80000008, max_cpuid_ex, ebx, ecx, edx);
-	      threads = 1 << ((ecx >> 12) & 0x0f);
-	    }
-
-	  if (threads == 0 || cpu_features->basic.family >= 0x17)
-	    {
-	      /* If APIC ID width is not available, use logical
-		 processor count.  */
-	      __cpuid (0x00000001, max_cpuid_ex, ebx, ecx, edx);
-
-	      if ((edx & (1 << 28)) != 0)
-		threads = (ebx >> 16) & 0xff;
-	    }
-
-	  /* Cap usage of highest cache level to the number of
-	     supported threads.  */
-	  if (threads > 0)
-	    shared /= threads;
-
-	  /* Get shared cache per ccx for Zen architectures.  */
-	  if (cpu_features->basic.family >= 0x17)
-	    {
-	      unsigned int eax;
-
-	      /* Get number of threads share the L3 cache in CCX.  */
-	      __cpuid_count (0x8000001D, 0x3, eax, ebx, ecx, edx);
-
-	      unsigned int threads_per_ccx = ((eax >> 14) & 0xfff) + 1;
-	      shared *= threads_per_ccx;
-	    }
-	  else
-	    {
-	      /* Account for exclusive L2 and L3 caches.  */
-	      shared += core;
-            }
-	}
+        /* No shared L3 cache.  All we have is the L2 cache.  */
+         shared = core;
     }
 
   cpu_features->level1_icache_size = level1_icache_size;
-- 
2.17.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v1] x86: Cache computation for AMD architecture.
  2023-01-18  7:39   ` [PATCH v1] " Karumanchi, Sajan
@ 2023-01-18 14:54     ` Florian Weimer
  2023-01-18 15:59       ` Carlos O'Donell
  2023-02-09 13:43     ` Andreas Schwab
  2023-05-08 17:06     ` Florian Weimer
  2 siblings, 1 reply; 8+ messages in thread
From: Florian Weimer @ 2023-01-18 14:54 UTC (permalink / raw)
  To: Karumanchi, Sajan; +Cc: libc-alpha, carlos, Mallappa, Premachandra

* Sajan Karumanchi:

>> As this patch has already been reviewed, should we just install it,
>> after fixing some style issues?

>  Yes
>>
>> Do we need to do some testing across various CPUs to make sure the
>> reported values for other CPUs do not change?

> Though we have done the testing on Zen and pre-Zen architectures, we
> recommend to carryout the tests from your end too.

I'll see if we can arrange some more testing with our lab machines.  I
think this can happen afterwards.

> PFA reworked patch addressing the review comments.

Thank you.

> From d5995d6b82f3ae12605c3d64c96b403f0b6b4f16 Mon Sep 17 00:00:00 2001
> From: Sajan Karumanchi <sajan.karumanchi@amd.com>
> Date: Mon, 5 Dec 2022 15:49:14 +0530
> Subject: [PATCH v1] x86: Cache computation for AMD architecture.
> To: libc-alpha@sourceware.org,carlos@redhat.com,fweimer@redhat.com
> Cc: premachandra.mallappa@amd.com
>
> All AMD architectures cache details will be computed based on
> __cpuid__ `0x8000_001D` and the reference to __cpuid__ `0x8000_0006` will be
> zeroed out for future architectures.
>
> Reviewed-by: Premachandra Mallappa <premachandra.mallappa@amd.com>
> ---
>  sysdeps/x86/dl-cacheinfo.h | 205 +++++++++----------------------------
>  1 file changed, 46 insertions(+), 159 deletions(-)

Carlos, is it still okay to merge this during the freeze?

This looks like something we would backport.

Thanks,
Florian


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v1] x86: Cache computation for AMD architecture.
  2023-01-18 14:54     ` Florian Weimer
@ 2023-01-18 15:59       ` Carlos O'Donell
  0 siblings, 0 replies; 8+ messages in thread
From: Carlos O'Donell @ 2023-01-18 15:59 UTC (permalink / raw)
  To: Florian Weimer, Karumanchi, Sajan; +Cc: libc-alpha, Mallappa, Premachandra

On 1/18/23 09:54, Florian Weimer wrote:
>> From d5995d6b82f3ae12605c3d64c96b403f0b6b4f16 Mon Sep 17 00:00:00 2001
>> From: Sajan Karumanchi <sajan.karumanchi@amd.com>
>> Date: Mon, 5 Dec 2022 15:49:14 +0530
>> Subject: [PATCH v1] x86: Cache computation for AMD architecture.
>> To: libc-alpha@sourceware.org,carlos@redhat.com,fweimer@redhat.com
>> Cc: premachandra.mallappa@amd.com
>>
>> All AMD architectures cache details will be computed based on
>> __cpuid__ `0x8000_001D` and the reference to __cpuid__ `0x8000_0006` will be
>> zeroed out for future architectures.
>>
>> Reviewed-by: Premachandra Mallappa <premachandra.mallappa@amd.com>
>> ---
>>  sysdeps/x86/dl-cacheinfo.h | 205 +++++++++----------------------------
>>  1 file changed, 46 insertions(+), 159 deletions(-)
> 
> Carlos, is it still okay to merge this during the freeze?
> 
> This looks like something we would backport.

It is still OK to merge and include in 2.37.

Please commit this as soon as your ready.

-- 
Cheers,
Carlos.


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v1] x86: Cache computation for AMD architecture.
  2023-01-18  7:39   ` [PATCH v1] " Karumanchi, Sajan
  2023-01-18 14:54     ` Florian Weimer
@ 2023-02-09 13:43     ` Andreas Schwab
  2023-05-08 17:06     ` Florian Weimer
  2 siblings, 0 replies; 8+ messages in thread
From: Andreas Schwab @ 2023-02-09 13:43 UTC (permalink / raw)
  To: Karumanchi, Sajan via Libc-alpha
  Cc: Florian Weimer, carlos, Karumanchi, Sajan, Mallappa, Premachandra

On Jan 18 2023, Karumanchi, Sajan via Libc-alpha wrote:

> diff --git a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h
> index e9f3382108..21ac0a2dad 100644
> --- a/sysdeps/x86/dl-cacheinfo.h
> +++ b/sysdeps/x86/dl-cacheinfo.h
> @@ -311,117 +311,47 @@ handle_intel (int name, const struct cpu_features *cpu_features)
>  
>  
>  static long int __attribute__ ((noinline))
> -handle_amd (int name)
> +handle_amd (int name, const struct cpu_features *cpu_features)

Nothing uses that argument.

-- 
Andreas Schwab, SUSE Labs, schwab@suse.de
GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE  1748 E4D4 88E3 0EEA B9D7
"And now for something completely different."

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v1] x86: Cache computation for AMD architecture.
  2023-01-18  7:39   ` [PATCH v1] " Karumanchi, Sajan
  2023-01-18 14:54     ` Florian Weimer
  2023-02-09 13:43     ` Andreas Schwab
@ 2023-05-08 17:06     ` Florian Weimer
  2023-05-11  7:15       ` Karumanchi, Sajan
  2 siblings, 1 reply; 8+ messages in thread
From: Florian Weimer @ 2023-05-08 17:06 UTC (permalink / raw)
  To: Karumanchi, Sajan; +Cc: libc-alpha, carlos, Mallappa, Premachandra

I'm sorry to report this new cache size computation approach is not
compatible with older AMD CPUs.  Some existing hypervisors set these
CPUID values to zero while still identifying these CPUs as AMD.  I filed
a new bug to track this regression:

  New AMD cache size computation logic does not work for some CPUs,
  hypervisors
  <https://sourceware.org/bugzilla/show_bug.cgi?id=30428>

It looks like we need to keep the old way of doing things.
Would you be able to work on a new patch?

I'm also going to file a separate bug, suggesting that we deselect
string functions that require cache size information if this information
is not available (mostly due to hypervisor masking).  But that's a
different issue.

Thanks,
Florian


^ permalink raw reply	[flat|nested] 8+ messages in thread

* RE: [PATCH v1] x86: Cache computation for AMD architecture.
  2023-05-08 17:06     ` Florian Weimer
@ 2023-05-11  7:15       ` Karumanchi, Sajan
  0 siblings, 0 replies; 8+ messages in thread
From: Karumanchi, Sajan @ 2023-05-11  7:15 UTC (permalink / raw)
  To: Florian Weimer; +Cc: libc-alpha, carlos, Mallappa, Premachandra

[AMD Official Use Only - General]

Hi Florian,

> -----Original Message-----
> From: Florian Weimer <fweimer@redhat.com>
> Sent: Monday, May 8, 2023 10:36 PM
> To: Karumanchi, Sajan <Sajan.Karumanchi@amd.com>
> Cc: libc-alpha@sourceware.org; carlos@redhat.com; Mallappa,
> Premachandra <Premachandra.Mallappa@amd.com>
> Subject: Re: [PATCH v1] x86: Cache computation for AMD architecture.
>
> Caution: This message originated from an External Source. Use proper
> caution when opening attachments, clicking links, or responding.
>
>
> I'm sorry to report this new cache size computation approach is not
> compatible with older AMD CPUs.  Some existing hypervisors set these
> CPUID values to zero while still identifying these CPUs as AMD.  I filed a new
> bug to track this regression:
>
>   New AMD cache size computation logic does not work for some CPUs,
>   hypervisors
>   <https://sourceware.org/bugzilla/show_bug.cgi?id=30428>
>
> It looks like we need to keep the old way of doing things.
> Would you be able to work on a new patch?
[Sajan] Yes, I will work on a new patch to handle the zeroed out CPUID scenario with the old way.
>
> I'm also going to file a separate bug, suggesting that we deselect string
> functions that require cache size information if this information is not
> available (mostly due to hypervisor masking).  But that's a different issue.
>
> Thanks,
> Florian

Thanks & Regards,
Sajan K.

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2023-05-11  7:15 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-12-12 11:53 [PATCH] x86: Cache computation for AMD architecture Karumanchi, Sajan
2023-01-16 12:33 ` Florian Weimer
2023-01-18  7:39   ` [PATCH v1] " Karumanchi, Sajan
2023-01-18 14:54     ` Florian Weimer
2023-01-18 15:59       ` Carlos O'Donell
2023-02-09 13:43     ` Andreas Schwab
2023-05-08 17:06     ` Florian Weimer
2023-05-11  7:15       ` Karumanchi, Sajan

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).