public inbox for libc-alpha@sourceware.org
 help / color / mirror / Atom feed
* [PATCH v4 0/3] Enhanced CPU diagnostics for ld.so
@ 2024-04-08 11:07 Florian Weimer
  2024-04-08 11:07 ` [PATCH v4 1/3] elf: Add CPU iteration support for future use in ld.so diagnostics Florian Weimer
                   ` (2 more replies)
  0 siblings, 3 replies; 6+ messages in thread
From: Florian Weimer @ 2024-04-08 11:07 UTC (permalink / raw)
  To: libc-alpha

Mostly unchanged repost with a Hurd build fix and an aarch64 manual
update.

Florian Weimer (3):
  elf: Add CPU iteration support for future use in ld.so diagnostics
  x86: Add generic CPUID data dumper to ld.so --list-diagnostics
  aarch64: Enhanced CPU diagnostics for ld.so

 elf/dl-iterate_cpu.h                  | 136 +++++++++
 manual/dynlink.texi                   | 119 +++++++-
 sysdeps/aarch64/dl-diagnostics-cpu.c  |  84 ++++++
 sysdeps/generic/dl-affinity.h         |  54 ++++
 sysdeps/unix/sysv/linux/dl-affinity.h |  46 +++
 sysdeps/x86/dl-diagnostics-cpu.c      | 384 ++++++++++++++++++++++++++
 6 files changed, 822 insertions(+), 1 deletion(-)
 create mode 100644 elf/dl-iterate_cpu.h
 create mode 100644 sysdeps/aarch64/dl-diagnostics-cpu.c
 create mode 100644 sysdeps/generic/dl-affinity.h
 create mode 100644 sysdeps/unix/sysv/linux/dl-affinity.h


base-commit: 1f94147a79fcb7211f1421b87383cad93986797f
-- 
2.44.0


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH v4 1/3] elf: Add CPU iteration support for future use in ld.so diagnostics
  2024-04-08 11:07 [PATCH v4 0/3] Enhanced CPU diagnostics for ld.so Florian Weimer
@ 2024-04-08 11:07 ` Florian Weimer
  2024-04-08 13:32   ` Szabolcs Nagy
  2024-04-08 11:08 ` [PATCH v4 2/3] x86: Add generic CPUID data dumper to ld.so --list-diagnostics Florian Weimer
  2024-04-08 11:08 ` [PATCH v4 3/3] aarch64: Enhanced CPU diagnostics for ld.so Florian Weimer
  2 siblings, 1 reply; 6+ messages in thread
From: Florian Weimer @ 2024-04-08 11:07 UTC (permalink / raw)
  To: libc-alpha

---
v4: Include <stdbool.h> in the generic version, to fix the Hurd build.

 elf/dl-iterate_cpu.h                  | 136 ++++++++++++++++++++++++++
 sysdeps/generic/dl-affinity.h         |  54 ++++++++++
 sysdeps/unix/sysv/linux/dl-affinity.h |  46 +++++++++
 3 files changed, 236 insertions(+)
 create mode 100644 elf/dl-iterate_cpu.h
 create mode 100644 sysdeps/generic/dl-affinity.h
 create mode 100644 sysdeps/unix/sysv/linux/dl-affinity.h

diff --git a/elf/dl-iterate_cpu.h b/elf/dl-iterate_cpu.h
new file mode 100644
index 0000000000..60db167b13
--- /dev/null
+++ b/elf/dl-iterate_cpu.h
@@ -0,0 +1,136 @@
+/* Iterate over all CPUs, for CPU-specific diagnostics.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#ifndef DL_ITERATE_CPU_H
+#define DL_ITERATE_CPU_H
+
+#include <dl-affinity.h>
+#include <stdbool.h>
+
+struct dl_iterate_cpu
+{
+  /* Sequential iteration count, starting at 0.  */
+  unsigned int processor_index;
+
+  /* Requested CPU.  Can be -1 if affinity could not be set.  */
+  int requested_cpu;
+
+  /* Observed current CPU.  -1 if unavailable.  */
+  int actual_cpu;
+
+  /* Observed node ID for the CPU.  -1 if unavailable.  */
+  int actual_node;
+
+  /* Internal fields to implement the iteration.   */
+
+  /* Affinity as obtained by _dl_iterate_cpu_init, using
+     _dl_getaffinity.  Space for 8,192 CPUs.  */
+  unsigned long int mask_reference[8192 / sizeof (unsigned long int) / 8];
+
+  /* This array is used by _dl_setaffinity calls.  */
+  unsigned long int mask_request[8192 / sizeof (unsigned long int) / 8];
+
+  /* Return value from the initial _dl_getaffinity call.   */
+  int length_reference;
+};
+
+static void
+_dl_iterate_cpu_init (struct dl_iterate_cpu *dic)
+{
+  dic->length_reference
+    = _dl_getaffinity (dic->mask_reference, sizeof (dic->mask_reference));
+  /* Prepare for the first _dl_iterate_cpu_next call.  */
+  dic->processor_index = -1;
+  dic->requested_cpu = -1;
+}
+
+static bool
+_dl_iterate_cpu_next (struct dl_iterate_cpu *dic)
+{
+  ++dic->processor_index;
+
+  if (dic->length_reference > 0)
+    {
+      /* Search for the next CPU to switch to.  */
+      while (true)
+        {
+          ++dic->requested_cpu;
+
+          /* Array index and bit number within the array.  */
+          unsigned int long_index
+            = dic->requested_cpu / sizeof (unsigned long int) / 8;
+          unsigned int bit_index
+            = dic->requested_cpu % (sizeof (unsigned long int) * 8);
+
+          if (long_index * sizeof (unsigned long int) >= dic->length_reference)
+            /* All possible CPUs have been covered.  */
+            return false;
+
+          unsigned long int bit = 1UL << bit_index;
+          if (dic->mask_reference[long_index] & bit)
+            {
+              /* The CPU is available.  Try to select it.  */
+              dic->mask_request[long_index] = bit;
+              if (_dl_setaffinity (dic->mask_request,
+                                   (long_index + 1)
+                                   * sizeof (unsigned long int)) < 0)
+                {
+                  /* Record that we could not perform a CPU request.  */
+                  dic->length_reference = -1;
+
+                  if (dic->processor_index > 0)
+                    /* We already reported something.  There is no need to
+                       continue because the new data is probably not useful.  */
+                    return false;
+                }
+
+              /* Clear the bit in case the next iteration switches to the
+                 next long value.  */
+              dic->mask_request[long_index] = 0;
+
+              /* We found a CPU to run on.  */
+              break;
+            }
+        }
+    }
+  else
+    {
+      /* No way to set CPU affinity.  Iterate just once.  */
+      if (dic->processor_index > 0)
+        return false;
+    }
+
+  /* Fill in the actual CPU information.  CPU pinning may not actually
+     be effective, depending on the container host.  */
+  unsigned int cpu, node;
+  if (_dl_getcpu (&cpu, &node) < 0)
+    {
+      /* No CPU information available.  */
+      dic->actual_cpu = -1;
+      dic->actual_node = -1;
+    }
+  else
+    {
+      dic->actual_cpu = cpu;
+      dic->actual_node = node;
+    }
+
+  return true;
+}
+
+#endif /* DL_ITERATE_CPU_H */
diff --git a/sysdeps/generic/dl-affinity.h b/sysdeps/generic/dl-affinity.h
new file mode 100644
index 0000000000..d117f737e9
--- /dev/null
+++ b/sysdeps/generic/dl-affinity.h
@@ -0,0 +1,54 @@
+/* CPU affinity handling for the dynamic linker.  Stub version.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#ifndef DL_AFFINITY_H
+#define DL_AFFINITY_H
+
+#include <errno.h>
+#include <stddef.h>
+
+/* On success, write the current CPU ID to *CPU, and the current node
+   ID to *NODE, and return 0.  Return a negative error code on
+   failure.  */
+static inline int
+_dl_getcpu (unsigned int *cpu, unsigned int *node)
+{
+  return -ENOSYS;
+}
+
+/* On success, write CPU ID affinity bits for the current thread to
+   *BITS, which must be SIZE bytes long, and return the number of
+   bytes updated, a multiple of sizeof (unsigned long int).  On
+   failure, return a negative error code.  */
+static int
+_dl_getaffinity (unsigned long int *bits, size_t size)
+{
+  return -ENOSYS;
+}
+
+/* Set the CPU affinity mask for the current thread to *BITS, using
+   the SIZE bytes from that array, which should be a multiple of
+   sizeof (unsigned long int).  Return 0 on success, and a negative
+   error code on failure.  */
+static int
+_dl_setaffinity (const unsigned long int *bits, size_t size)
+{
+  return -ENOSYS;
+}
+
+#endif /* DL_AFFINITY_H */
diff --git a/sysdeps/unix/sysv/linux/dl-affinity.h b/sysdeps/unix/sysv/linux/dl-affinity.h
new file mode 100644
index 0000000000..bbfede7750
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/dl-affinity.h
@@ -0,0 +1,46 @@
+/* CPU affinity handling for the dynamic linker.  Linux version.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+/* See sysdeps/generic/dl-affinity.h for documentation of these interfaces.  */
+
+#ifndef DL_AFFINITY_H
+#define DL_AFFINITY_H
+
+#include <sysdep.h>
+#include <stddef.h>
+#include <unistd.h>
+
+static inline int
+_dl_getcpu (unsigned int *cpu, unsigned int *node)
+{
+  return INTERNAL_SYSCALL_CALL (getcpu, cpu, node);
+}
+
+static int
+_dl_getaffinity (unsigned long int *bits, size_t size)
+{
+  return INTERNAL_SYSCALL_CALL (sched_getaffinity, /* TID */ 0, size, bits);
+}
+
+static int
+_dl_setaffinity (const unsigned long int *bits, size_t size)
+{
+  return INTERNAL_SYSCALL_CALL (sched_setaffinity, /* TID */ 0, size, bits);
+}
+
+#endif /* DL_AFFINITY_H */
-- 
2.44.0



^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH v4 2/3] x86: Add generic CPUID data dumper to ld.so --list-diagnostics
  2024-04-08 11:07 [PATCH v4 0/3] Enhanced CPU diagnostics for ld.so Florian Weimer
  2024-04-08 11:07 ` [PATCH v4 1/3] elf: Add CPU iteration support for future use in ld.so diagnostics Florian Weimer
@ 2024-04-08 11:08 ` Florian Weimer
  2024-04-08 11:08 ` [PATCH v4 3/3] aarch64: Enhanced CPU diagnostics for ld.so Florian Weimer
  2 siblings, 0 replies; 6+ messages in thread
From: Florian Weimer @ 2024-04-08 11:08 UTC (permalink / raw)
  To: libc-alpha

This is surprisingly difficult to implement if the goal is to produce
reasonably sized output.  With the current approaches to output
compression (suppressing zeros and repeated results between CPUs,
folding ranges of identical subleaves, dealing with the %ecx
reflection issue), the output is less than 600 KiB even for systems
with 256 logical CPUs.

Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
---
 manual/dynlink.texi              |  85 ++++++-
 sysdeps/x86/dl-diagnostics-cpu.c | 384 +++++++++++++++++++++++++++++++
 2 files changed, 468 insertions(+), 1 deletion(-)

diff --git a/manual/dynlink.texi b/manual/dynlink.texi
index 06a6c15533..f2f2341818 100644
--- a/manual/dynlink.texi
+++ b/manual/dynlink.texi
@@ -228,7 +228,90 @@ reported by the @code{uname} function.  @xref{Platform Type}.
 @item x86.cpu_features.@dots{}
 These items are specific to the i386 and x86-64 architectures.  They
 reflect supported CPU features and information on cache geometry, mostly
-collected using the @code{CPUID} instruction.
+collected using the CPUID instruction.
+
+@item x86.processor[@var{index}].@dots{}
+These are additional items for the i386 and x86-64 architectures, as
+described below.  They mostly contain raw data from the CPUID
+instruction.  The probes are performed for each active CPU for the
+@code{ld.so} process, and data for different probed CPUs receives a
+uniqe @var{index} value.  Some CPUID data is expected to differ from CPU
+core to CPU core.  In some cases, CPUs are not correctly initialized and
+indicate the presence of different feature sets.
+
+@item x86.processor[@var{index}].requested=@var{kernel-cpu}
+The kernel is told to run the subsequent probing on the CPU numbered
+@var{kernel-cpu}.  The values @var{kernel-cpu} and @var{index} can be
+distinct if there are gaps in the process CPU affinity mask.  This line
+is not included if CPU affinity mask information is not available.
+
+@item x86.processor[@var{index}].observed=@var{kernel-cpu}
+This line reports the kernel CPU number @var{kernel-cpu} on which the
+probing code initially ran.  If the CPU number cannot be obtained,
+this line is not printed.
+
+@item x86.processor[@var{index}].observed_node=@var{node}
+This reports the observed NUMA node number, as reported by the
+@code{getcpu} system call.  If this information cannot be obtained, this
+line is not printed.
+
+@item x86.processor[@var{index}].cpuid_leaves=@var{count}
+This line indicates that @var{count} distinct CPUID leaves were
+encountered.  (This reflects internal @code{ld.so} storage space, it
+does not directly correspond to @code{CPUID} enumeration ranges.)
+
+@item x86.processor[@var{index}].ecx_limit=@var{value}
+The CPUID data extraction code uses a brute-force approach to enumerate
+subleaves (see the @samp{.subleaf_eax} lines below).  The last
+@code{%rcx} value used in a CPUID query on this probed CPU was
+@var{value}.
+
+@item x86.processor[@var{index}].cpuid.eax[@var{query_eax}].eax=@var{eax}
+@itemx x86.processor[@var{index}].cpuid.eax[@var{query_eax}].ebx=@var{ebx}
+@itemx x86.processor[@var{index}].cpuid.eax[@var{query_eax}].ecx=@var{ecx}
+@itemx x86.processor[@var{index}].cpuid.eax[@var{query_eax}].edx=@var{edx}
+These lines report the register contents after executing the CPUID
+instruction with @samp{%rax == @var{query_eax}} and @samp{%rcx == 0} (a
+@dfn{leaf}).  For the first probed CPU (with a zero @var{index}), only
+leaves with non-zero register contents are reported.  For subsequent
+CPUs, only leaves whose register contents differs from the previously
+probed CPUs (with @var{index} one less) are reported.
+
+Basic and extended leaves are reported using the same syntax.  This
+means there is a large jump in @var{query_eax} for the first reported
+extended leaf.
+
+@item x86.processor[@var{index}].cpuid.subleaf_eax[@var{query_eax}].ecx[@var{query_ecx}].eax=@var{eax}
+@itemx x86.processor[@var{index}].cpuid.subleaf_eax[@var{query_eax}].ecx[@var{query_ecx}].ebx=@var{ebx}
+@itemx x86.processor[@var{index}].cpuid.subleaf_eax[@var{query_eax}].ecx[@var{query_ecx}].ecx=@var{ecx}
+@itemx x86.processor[@var{index}].cpuid.subleaf_eax[@var{query_eax}].ecx[@var{query_ecx}].edx=@var{edx}
+This is similar to the leaves above, but for a @dfn{subleaf}.  For
+subleaves, the CPUID instruction is executed with @samp{%rax ==
+@var{query_eax}} and @samp{%rcx == @var{query_ecx}}, so the result
+depends on both register values.  The same rules about filtering zero
+and identical results apply.
+
+@item x86.processor[@var{index}].cpuid.subleaf_eax[@var{query_eax}].ecx[@var{query_ecx}].until_ecx=@var{ecx_limit}
+Some CPUID results are the same regardless the @var{query_ecx} value.
+If this situation is detected, a line with the @samp{.until_ecx}
+selector ins included, and this indicates that the CPUID register
+contents is the same for @code{%rcx} values between @var{query_ecx}
+and @var{ecx_limit} (inclusive).
+
+@item x86.processor[@var{index}].cpuid.subleaf_eax[@var{query_eax}].ecx[@var{query_ecx}].ecx_query_mask=0xff
+This line indicates that in an @samp{.until_ecx} range, the CPUID
+instruction preserved the lowested 8 bits of the input @code{%rcx} in
+the output @code{%rcx} registers.  Otherwise, the subleaves in the range
+have identical values.  This special treatment is necessary to report
+compact range information in case such copying occurs (because the
+subleaves would otherwise be all different).
+
+@item x86.processor[@var{index}].xgetbv.ecx[@var{query_ecx}]=@var{result}
+This line shows the 64-bit @var{result} value in the @code{%rdx:%rax}
+register pair after executing the XGETBV instruction with @code{%rcx}
+set to @var{query_ecx}.  Zero values and values matching the previously
+probed CPU are omitted.  Nothing is printed if the system does not
+support the XGETBV instruction.
 @end table
 
 @node Dynamic Linker Introspection
diff --git a/sysdeps/x86/dl-diagnostics-cpu.c b/sysdeps/x86/dl-diagnostics-cpu.c
index c76ea3be16..ceafde9481 100644
--- a/sysdeps/x86/dl-diagnostics-cpu.c
+++ b/sysdeps/x86/dl-diagnostics-cpu.c
@@ -17,7 +17,18 @@
    <https://www.gnu.org/licenses/>.  */
 
 #include <dl-diagnostics.h>
+
+#include <array_length.h>
+#include <cpu-features.h>
+#include <cpuid.h>
+#include <dl-iterate_cpu.h>
 #include <ldsodefs.h>
+#include <stdbool.h>
+#include <string.h>
+#include <sysdep.h>
+
+/* The generic CPUID dumping code.  */
+static void _dl_diagnostics_cpuid (void);
 
 static void
 print_cpu_features_value (const char *label, uint64_t value)
@@ -120,4 +131,377 @@ _dl_diagnostics_cpu (void)
 	      + sizeof (cpu_features->cachesize_non_temporal_divisor)
 	  == sizeof (*cpu_features),
       "last cpu_features field has been printed");
+
+  _dl_diagnostics_cpuid ();
+}
+
+/* The following code implements a generic CPUID dumper that tries to
+   gather CPUID data without knowing about CPUID implementation
+   details.  */
+
+/* Register arguments to CPUID.  Multiple ECX subleaf values yielding
+   the same result are combined, to shorten the output.  Both
+   identical matches (EAX to EDX are the same) and matches where EAX,
+   EBX, EDX, and ECX are equal except in the lower byte, which must
+   match the query ECX value.  The latter is needed to compress ranges
+   on CPUs which preserve the lowest byte in ECX if an unknown leaf is
+   queried.  */
+struct cpuid_query
+{
+  unsigned int eax;
+  unsigned ecx_first;
+  unsigned ecx_last;
+  bool ecx_preserves_query_byte;
+};
+
+/* Single integer value that can be used for sorting/ordering
+   comparisons.  Uses Q->eax and Q->ecx_first only because ecx_last is
+   always greater than the previous ecx_first value and less than the
+   subsequent one.  */
+static inline unsigned long long int
+cpuid_query_combined (struct cpuid_query *q)
+{
+  /* ecx can be -1 (that is, ~0U).  If this happens, this the only ecx
+     value for this eax value, so the ordering does not matter.  */
+  return ((unsigned long long int) q->eax << 32) | (unsigned int) q->ecx_first;
+};
+
+/* Used for differential reporting of zero/non-zero values.  */
+static const struct cpuid_registers cpuid_registers_zero;
+
+/* Register arguments to CPUID paired with the results that came back.  */
+struct cpuid_query_result
+{
+  struct cpuid_query q;
+  struct cpuid_registers r;
+};
+
+/* During a first enumeration pass, we try to collect data for
+  cpuid_initial_subleaf_limit subleaves per leaf/EAX value.  If we run
+  out of space, we try once more with applying the lower limit.  */
+enum { cpuid_main_leaf_limit = 128 };
+enum { cpuid_initial_subleaf_limit = 512 };
+enum { cpuid_subleaf_limit = 32 };
+
+/* Offset of the extended leaf area.  */
+enum {cpuid_extended_leaf_offset = 0x80000000 };
+
+/* Collected CPUID data.  Everything is stored in a statically sized
+   array that is sized so that the second pass will collect some data
+   for all leaves, after the limit is applied.  On the second pass,
+   ecx_limit is set to cpuid_subleaf_limit.  */
+struct cpuid_collected_data
+{
+  unsigned int used;
+  unsigned int ecx_limit;
+  uint64_t xgetbv_ecx_0;
+  struct cpuid_query_result qr[cpuid_main_leaf_limit
+                               * 2 * cpuid_subleaf_limit];
+};
+
+/* Fill in the result of a CPUID query.  Returns true if there is
+   room, false if nothing could be stored.  */
+static bool
+_dl_diagnostics_cpuid_store (struct cpuid_collected_data *ccd,
+                             unsigned eax, int ecx)
+{
+  if (ccd->used >= array_length (ccd->qr))
+    return false;
+
+  /* Tentatively fill in the next value.  */
+  __cpuid_count (eax, ecx,
+                 ccd->qr[ccd->used].r.eax,
+                 ccd->qr[ccd->used].r.ebx,
+                 ccd->qr[ccd->used].r.ecx,
+                 ccd->qr[ccd->used].r.edx);
+
+  /* If the ECX subleaf is next subleaf after the previous one (for
+     the same leaf), and the values are the same, merge the result
+     with the already-stored one.  Do this before skipping zero
+     leaves, which avoids artifiacts for ECX == 256 queries.  */
+  if (ccd->used > 0
+      && ccd->qr[ccd->used - 1].q.eax == eax
+      && ccd->qr[ccd->used - 1].q.ecx_last + 1 == ecx)
+    {
+      /* Exact match of the previous result. Ignore the value of
+         ecx_preserves_query_byte if this is a singleton range so far
+         because we can treat ECX as fixed if the same value repeats.  */
+      if ((!ccd->qr[ccd->used - 1].q.ecx_preserves_query_byte
+           || (ccd->qr[ccd->used - 1].q.ecx_first
+               == ccd->qr[ccd->used - 1].q.ecx_last))
+          && memcmp (&ccd->qr[ccd->used - 1].r, &ccd->qr[ccd->used].r,
+                     sizeof (ccd->qr[ccd->used].r)) == 0)
+        {
+          ccd->qr[ccd->used - 1].q.ecx_last = ecx;
+          /* ECX is now fixed because the same value has been observed
+             twice, even if we had a low-byte match before.  */
+          ccd->qr[ccd->used - 1].q.ecx_preserves_query_byte = false;
+          return true;
+        }
+      /* Match except for the low byte in ECX, which must match the
+         incoming ECX value.  */
+      if (ccd->qr[ccd->used - 1].q.ecx_preserves_query_byte
+          && (ecx & 0xff) == (ccd->qr[ccd->used].r.ecx & 0xff)
+          && ccd->qr[ccd->used].r.eax == ccd->qr[ccd->used - 1].r.eax
+          && ccd->qr[ccd->used].r.ebx == ccd->qr[ccd->used - 1].r.ebx
+          && ((ccd->qr[ccd->used].r.ecx & 0xffffff00)
+              == (ccd->qr[ccd->used - 1].r.ecx & 0xffffff00))
+          && ccd->qr[ccd->used].r.edx == ccd->qr[ccd->used - 1].r.edx)
+        {
+          ccd->qr[ccd->used - 1].q.ecx_last = ecx;
+          return true;
+        }
+    }
+
+  /* Do not store zero results.  All-zero values usually mean that the
+     subleaf is unsupported.  */
+  if (ccd->qr[ccd->used].r.eax == 0
+      && ccd->qr[ccd->used].r.ebx == 0
+      && ccd->qr[ccd->used].r.ecx == 0
+      && ccd->qr[ccd->used].r.edx == 0)
+    return true;
+
+  /* The result needs to be stored.  Fill in the query parameters and
+     consume the storage.  */
+  ccd->qr[ccd->used].q.eax = eax;
+  ccd->qr[ccd->used].q.ecx_first = ecx;
+  ccd->qr[ccd->used].q.ecx_last = ecx;
+  ccd->qr[ccd->used].q.ecx_preserves_query_byte
+    = (ecx & 0xff) == (ccd->qr[ccd->used].r.ecx & 0xff);
+  ++ccd->used;
+  return true;
+}
+
+/* Collected CPUID data into *CCD.  If LIMIT, apply per-leaf limits to
+   avoid exceeding the pre-allocated space.  Return true if all data
+   could be stored, false if the retrying without a limit is
+   requested.  */
+static bool
+_dl_diagnostics_cpuid_collect_1 (struct cpuid_collected_data *ccd, bool limit)
+{
+  ccd->used = 0;
+  ccd->ecx_limit
+    = (limit ? cpuid_subleaf_limit : cpuid_initial_subleaf_limit) - 1;
+  _dl_diagnostics_cpuid_store (ccd, 0x00, 0x00);
+  if (ccd->used == 0)
+    /* CPUID reported all 0.  Should not happen.  */
+    return true;
+  unsigned int maximum_leaf = ccd->qr[0x00].r.eax;
+  if (limit && maximum_leaf >= cpuid_main_leaf_limit)
+    maximum_leaf = cpuid_main_leaf_limit - 1;
+
+  for (unsigned int eax = 1; eax <= maximum_leaf; ++eax)
+    {
+      for (unsigned int ecx = 0; ecx <= ccd->ecx_limit; ++ecx)
+        if (!_dl_diagnostics_cpuid_store (ccd, eax, ecx))
+          return false;
+    }
+
+  if (!_dl_diagnostics_cpuid_store (ccd, cpuid_extended_leaf_offset, 0x00))
+    return false;
+  maximum_leaf = ccd->qr[ccd->used - 1].r.eax;
+  if (maximum_leaf < cpuid_extended_leaf_offset)
+    /* No extended CPUID information.  */
+    return true;
+  if (limit
+      && maximum_leaf - cpuid_extended_leaf_offset >= cpuid_main_leaf_limit)
+    maximum_leaf = cpuid_extended_leaf_offset + cpuid_main_leaf_limit - 1;
+  for (unsigned int eax = cpuid_extended_leaf_offset + 1;
+       eax <= maximum_leaf; ++eax)
+    {
+      for (unsigned int ecx = 0; ecx <= ccd->ecx_limit; ++ecx)
+        if (!_dl_diagnostics_cpuid_store (ccd, eax, ecx))
+          return false;
+    }
+  return true;
+}
+
+/* Call _dl_diagnostics_cpuid_collect_1 twice if necessary, the
+   second time with the limit applied.  */
+static void
+_dl_diagnostics_cpuid_collect (struct cpuid_collected_data *ccd)
+{
+  if (!_dl_diagnostics_cpuid_collect_1 (ccd, false))
+    _dl_diagnostics_cpuid_collect_1 (ccd, true);
+
+  /* Re-use the result of the official feature probing here.  */
+  const struct cpu_features *cpu_features = __get_cpu_features ();
+  if (CPU_FEATURES_CPU_P (cpu_features, OSXSAVE))
+    {
+      unsigned int xcrlow;
+      unsigned int xcrhigh;
+      asm ("xgetbv" : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0));
+      ccd->xgetbv_ecx_0 = ((uint64_t) xcrhigh << 32) + xcrlow;
+    }
+  else
+    ccd->xgetbv_ecx_0 = 0;
+}
+
+/* Print a CPUID register value (passed as REG_VALUE) if it differs
+   from the expected REG_REFERENCE value.  PROCESSOR_INDEX is the
+   process sequence number (always starting at zero; not a kernel ID).  */
+static void
+_dl_diagnostics_cpuid_print_reg (unsigned int processor_index,
+                                 const struct cpuid_query *q,
+                                 const char *reg_label, unsigned int reg_value,
+                                 bool subleaf)
+{
+  if (subleaf)
+    _dl_printf ("x86.processor[0x%x].cpuid.subleaf_eax[0x%x]"
+                ".ecx[0x%x].%s=0x%x\n",
+                processor_index, q->eax, q->ecx_first, reg_label, reg_value);
+  else
+    _dl_printf ("x86.processor[0x%x].cpuid.eax[0x%x].%s=0x%x\n",
+                processor_index, q->eax, reg_label, reg_value);
+}
+
+/* Print CPUID result values in *RESULT for the query in
+   CCD->qr[CCD_IDX].  PROCESSOR_INDEX is the process sequence number
+   (always starting at zero; not a kernel ID).  */
+static void
+_dl_diagnostics_cpuid_print_query (unsigned int processor_index,
+                                   struct cpuid_collected_data *ccd,
+                                   unsigned int ccd_idx,
+                                   const struct cpuid_registers *result)
+{
+  /* Treat this as a value if subleaves if ecx isn't zero (maybe
+     within the [ecx_fist, ecx_last] range), or if eax matches its
+     neighbors.  If the range is [0, ecx_limit], then the subleaves
+     are not distinct (independently of ecx_preserves_query_byte),
+     so do not report them separately.  */
+  struct cpuid_query *q = &ccd->qr[ccd_idx].q;
+  bool subleaf = (q->ecx_first > 0
+                  || (q->ecx_first != q->ecx_last
+                      && !(q->ecx_first == 0 && q->ecx_last == ccd->ecx_limit))
+                  || (ccd_idx > 0 && q->eax == ccd->qr[ccd_idx - 1].q.eax)
+                  || (ccd_idx + 1 < ccd->used
+                      && q->eax == ccd->qr[ccd_idx + 1].q.eax));
+  _dl_diagnostics_cpuid_print_reg (processor_index, q, "eax", result->eax,
+                                   subleaf);
+  _dl_diagnostics_cpuid_print_reg (processor_index, q, "ebx", result->ebx,
+                                   subleaf);
+  _dl_diagnostics_cpuid_print_reg (processor_index, q, "ecx", result->ecx,
+                                   subleaf);
+  _dl_diagnostics_cpuid_print_reg (processor_index, q, "edx", result->edx,
+                                   subleaf);
+
+  if (subleaf && q->ecx_first != q->ecx_last)
+    {
+      _dl_printf ("x86.processor[0x%x].cpuid.subleaf_eax[0x%x]"
+                  ".ecx[0x%x].until_ecx=0x%x\n",
+                  processor_index, q->eax, q->ecx_first, q->ecx_last);
+      if (q->ecx_preserves_query_byte)
+        _dl_printf ("x86.processor[0x%x].cpuid.subleaf_eax[0x%x]"
+                    ".ecx[0x%x].ecx_query_mask=0xff\n",
+                    processor_index, q->eax, q->ecx_first);
+    }
+}
+
+/* Perform differential reporting of the data in *CURRENT against
+   *BASE.  REQUESTED_CPU is the kernel CPU ID the thread was
+   configured to run on, or -1 if no configuration was possible.
+   PROCESSOR_INDEX is the process sequence number (always starting at
+   zero; not a kernel ID).  */
+static void
+_dl_diagnostics_cpuid_report (struct dl_iterate_cpu *dci,
+                              struct cpuid_collected_data *current,
+                              struct cpuid_collected_data *base)
+{
+  if (dci->requested_cpu >= 0)
+    _dl_printf ("x86.processor[0x%x].requested=0x%x\n",
+                dci->processor_index, dci->requested_cpu);
+  if (dci->actual_cpu >= 0)
+    _dl_printf ("x86.processor[0x%x].observed=0x%x\n",
+                dci->processor_index, dci->actual_cpu);
+  if (dci->actual_node >= 0)
+    _dl_printf ("x86.processor[0x%x].observed_node=0x%x\n",
+                dci->processor_index, dci->actual_node);
+
+  _dl_printf ("x86.processor[0x%x].cpuid_leaves=0x%x\n",
+              dci->processor_index, current->used);
+  _dl_printf ("x86.processor[0x%x].ecx_limit=0x%x\n",
+              dci->processor_index, current->ecx_limit);
+
+  unsigned int base_idx = 0;
+  for (unsigned int current_idx = 0; current_idx < current->used;
+       ++current_idx)
+    {
+      /* Report missing data on the current CPU as 0.  */
+      unsigned long long int current_query
+        = cpuid_query_combined (&current->qr[current_idx].q);
+      while (base_idx < base->used
+             && cpuid_query_combined (&base->qr[base_idx].q) < current_query)
+      {
+        _dl_diagnostics_cpuid_print_query (dci->processor_index,
+                                           base, base_idx,
+                                           &cpuid_registers_zero);
+        ++base_idx;
+      }
+
+      if (base_idx < base->used
+          && cpuid_query_combined (&base->qr[base_idx].q) == current_query)
+        {
+          _Static_assert (sizeof (struct cpuid_registers) == 4 * 4,
+                          "no padding in struct cpuid_registers");
+          if (current->qr[current_idx].q.ecx_last
+              != base->qr[base_idx].q.ecx_last
+              || memcmp (&current->qr[current_idx].r,
+                         &base->qr[base_idx].r,
+                         sizeof (struct cpuid_registers)) != 0)
+              /* The ECX range or the values have changed.  Show the
+                 new values.  */
+            _dl_diagnostics_cpuid_print_query (dci->processor_index,
+                                               current, current_idx,
+                                               &current->qr[current_idx].r);
+          ++base_idx;
+        }
+      else
+        /* Data is absent in the base reference.  Report the new data.  */
+        _dl_diagnostics_cpuid_print_query (dci->processor_index,
+                                           current, current_idx,
+                                           &current->qr[current_idx].r);
+    }
+
+  if (current->xgetbv_ecx_0 != base->xgetbv_ecx_0)
+    {
+      /* Re-use the 64-bit printing routine.  */
+      _dl_printf ("x86.processor[0x%x].", dci->processor_index);
+      _dl_diagnostics_print_labeled_value ("xgetbv.ecx[0x0]",
+                                           current->xgetbv_ecx_0);
+    }
+}
+
+static void
+_dl_diagnostics_cpuid (void)
+{
+#if !HAS_CPUID
+  /* CPUID is not supported, so there is nothing to dump.  */
+  if (__get_cpuid_max (0, 0) == 0)
+    return;
+#endif
+
+  struct dl_iterate_cpu dic;
+  _dl_iterate_cpu_init (&dic);
+
+  /* Two copies of the data are used.  Data is written to the index
+     (dic.processor_index & 1).  The previous version against which the
+     data dump is reported is at index !(processor_index & 1).  */
+  struct cpuid_collected_data ccd[2];
+
+  /* The initial data is presumed to be all zero.  Zero results are
+     not recorded.  */
+  ccd[1].used = 0;
+  ccd[1].xgetbv_ecx_0 = 0;
+
+  /* Run the CPUID probing on a specific CPU.  There are expected
+     differences for encoding core IDs and topology information in
+     CPUID output, but some firmware/kernel bugs also may result in
+     asymmetric data across CPUs in some cases.  */
+  while (_dl_iterate_cpu_next (&dic))
+    {
+      _dl_diagnostics_cpuid_collect (&ccd[dic.processor_index & 1]);
+      _dl_diagnostics_cpuid_report
+        (&dic, &ccd[dic.processor_index & 1],
+         &ccd[!(dic.processor_index & 1)]);
+    }
 }
-- 
2.44.0



^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH v4 3/3] aarch64: Enhanced CPU diagnostics for ld.so
  2024-04-08 11:07 [PATCH v4 0/3] Enhanced CPU diagnostics for ld.so Florian Weimer
  2024-04-08 11:07 ` [PATCH v4 1/3] elf: Add CPU iteration support for future use in ld.so diagnostics Florian Weimer
  2024-04-08 11:08 ` [PATCH v4 2/3] x86: Add generic CPUID data dumper to ld.so --list-diagnostics Florian Weimer
@ 2024-04-08 11:08 ` Florian Weimer
  2024-04-08 13:16   ` Szabolcs Nagy
  2 siblings, 1 reply; 6+ messages in thread
From: Florian Weimer @ 2024-04-08 11:08 UTC (permalink / raw)
  To: libc-alpha

This prints some information from struct cpu_features, and the midr_el1
and dczid_el0 system register contents on every CPU.
---
v4: Manual update.

 manual/dynlink.texi                  | 34 +++++++++++
 sysdeps/aarch64/dl-diagnostics-cpu.c | 84 ++++++++++++++++++++++++++++
 2 files changed, 118 insertions(+)
 create mode 100644 sysdeps/aarch64/dl-diagnostics-cpu.c

diff --git a/manual/dynlink.texi b/manual/dynlink.texi
index f2f2341818..d71f7a30d6 100644
--- a/manual/dynlink.texi
+++ b/manual/dynlink.texi
@@ -224,6 +224,40 @@ reflect adjustment by @theglibc{}.
 These Linux-specific items show the values of @code{struct utsname}, as
 reported by the @code{uname} function.  @xref{Platform Type}.
 
+@item aarch64.cpu_features.@dots{}
+These items are specific to the AArch64 architectures.  They report data
+@theglibc{} uses to activate conditionally supported features such as
+BTI and MTE, and to select alternative function implementations.
+
+@item aarch64.processor[@var{index}].@dots{}
+These are additional items for the AArch64 architecture and are
+described below.
+
+@item aarch64.processor[@var{index}].requested=@var{kernel-cpu}
+The kernel is told to run the subsequent probing on the CPU numbered
+@var{kernel-cpu}.  The values @var{kernel-cpu} and @var{index} can be
+distinct if there are gaps in the process CPU affinity mask.  This line
+is not included if CPU affinity mask information is not available.
+
+@item aarch64.processor[@var{index}].observed=@var{kernel-cpu}
+This line reports the kernel CPU number @var{kernel-cpu} on which the
+probing code initially ran.  If the CPU number cannot be obtained,
+this line is not printed.
+
+@item aarch64.processor[@var{index}].observed_node=@var{node}
+This reports the observed NUMA node number, as reported by the
+@code{getcpu} system call.  If this information cannot be obtained, this
+line is not printed.
+
+@item aarch64.processor[@var{index}].midr_el1=@var{value}
+The value of the @code{midr_el1} system register on the processor
+@var{index}.  This line is only printed if the kernel indicates that
+this system register is supported.
+
+@item aarch64.processor[@var{index}].dczid_el0=@var{value}
+The value of the @code{dczid_el0} system register on the processor
+@var{index}.
+
 @cindex CPUID (diagnostics)
 @item x86.cpu_features.@dots{}
 These items are specific to the i386 and x86-64 architectures.  They
diff --git a/sysdeps/aarch64/dl-diagnostics-cpu.c b/sysdeps/aarch64/dl-diagnostics-cpu.c
new file mode 100644
index 0000000000..e037e6ea8c
--- /dev/null
+++ b/sysdeps/aarch64/dl-diagnostics-cpu.c
@@ -0,0 +1,84 @@
+/* Print CPU diagnostics data in ld.so.  AArch64 version.
+   Copyright (C) 2021-2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <dl-diagnostics.h>
+
+#include <cpu-features.h>
+#include <dl-iterate_cpu.h>
+#include <ldsodefs.h>
+#include <sys/auxv.h>
+
+static void
+print_cpu_features_value (const char *label, uint64_t value)
+{
+  _dl_printf ("aarch64.cpu_features.");
+  _dl_diagnostics_print_labeled_value (label, value);
+}
+
+static void
+print_per_cpu_value (const struct dl_iterate_cpu *dic,
+                     const char *label, uint64_t value)
+{
+  _dl_printf ("aarch64.processor[0x%x].", dic->processor_index);
+  _dl_diagnostics_print_labeled_value (label, value);
+}
+
+void
+_dl_diagnostics_cpu (void)
+{
+  print_cpu_features_value ("bti", GLRO (dl_aarch64_cpu_features).bti);
+  print_cpu_features_value ("midr_el1",
+                            GLRO (dl_aarch64_cpu_features).midr_el1);
+  print_cpu_features_value ("mops", GLRO (dl_aarch64_cpu_features).mops);
+  print_cpu_features_value ("mte_state",
+                            GLRO (dl_aarch64_cpu_features).mte_state);
+  print_cpu_features_value ("prefer_sve_ifuncs",
+                            GLRO (dl_aarch64_cpu_features).prefer_sve_ifuncs);
+  print_cpu_features_value ("sve", GLRO (dl_aarch64_cpu_features).sve);
+  print_cpu_features_value ("zva_size",
+                            GLRO (dl_aarch64_cpu_features).zva_size);
+
+  struct dl_iterate_cpu dic;
+  _dl_iterate_cpu_init (&dic);
+
+  while (_dl_iterate_cpu_next (&dic))
+    {
+      if (dic.requested_cpu >= 0)
+        _dl_printf ("aarch64.processor[0x%x].requested=0x%x\n",
+                    dic.processor_index, dic.requested_cpu);
+      if (dic.actual_cpu >= 0)
+        _dl_printf ("aarch64.processor[0x%x].observed=0x%x\n",
+                    dic.processor_index, dic.actual_cpu);
+      if (dic.actual_node >= 0)
+        _dl_printf ("aarch64.processor[0x%x].observed_node=0x%x\n",
+                    dic.processor_index, dic.actual_node);
+
+      if (GLRO (dl_hwcap) & HWCAP_CPUID)
+        {
+          uint64_t midr_el1;
+          asm ("mrs %0, midr_el1" : "=r" (midr_el1));
+          print_per_cpu_value (&dic, "midr_el1", midr_el1);
+        }
+
+      {
+        uint64_t dczid_el0;
+        asm ("mrs %0, dczid_el0" : "=r" (dczid_el0));
+        print_per_cpu_value (&dic, "dczid_el0", dczid_el0);
+      }
+    }
+}
-- 
2.44.0


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v4 3/3] aarch64: Enhanced CPU diagnostics for ld.so
  2024-04-08 11:08 ` [PATCH v4 3/3] aarch64: Enhanced CPU diagnostics for ld.so Florian Weimer
@ 2024-04-08 13:16   ` Szabolcs Nagy
  0 siblings, 0 replies; 6+ messages in thread
From: Szabolcs Nagy @ 2024-04-08 13:16 UTC (permalink / raw)
  To: Florian Weimer, libc-alpha

The 04/08/2024 13:08, Florian Weimer wrote:
> This prints some information from struct cpu_features, and the midr_el1
> and dczid_el0 system register contents on every CPU.

OK.

Reviewed-by: Szabolcs Nagy <szabolcs.nagy@arm.com>


> ---
> v4: Manual update.
> 
>  manual/dynlink.texi                  | 34 +++++++++++
>  sysdeps/aarch64/dl-diagnostics-cpu.c | 84 ++++++++++++++++++++++++++++
>  2 files changed, 118 insertions(+)
>  create mode 100644 sysdeps/aarch64/dl-diagnostics-cpu.c
> 
> diff --git a/manual/dynlink.texi b/manual/dynlink.texi
> index f2f2341818..d71f7a30d6 100644
> --- a/manual/dynlink.texi
> +++ b/manual/dynlink.texi
> @@ -224,6 +224,40 @@ reflect adjustment by @theglibc{}.
>  These Linux-specific items show the values of @code{struct utsname}, as
>  reported by the @code{uname} function.  @xref{Platform Type}.
>  
> +@item aarch64.cpu_features.@dots{}
> +These items are specific to the AArch64 architectures.  They report data
> +@theglibc{} uses to activate conditionally supported features such as
> +BTI and MTE, and to select alternative function implementations.
> +
> +@item aarch64.processor[@var{index}].@dots{}
> +These are additional items for the AArch64 architecture and are
> +described below.
> +
> +@item aarch64.processor[@var{index}].requested=@var{kernel-cpu}
> +The kernel is told to run the subsequent probing on the CPU numbered
> +@var{kernel-cpu}.  The values @var{kernel-cpu} and @var{index} can be
> +distinct if there are gaps in the process CPU affinity mask.  This line
> +is not included if CPU affinity mask information is not available.
> +
> +@item aarch64.processor[@var{index}].observed=@var{kernel-cpu}
> +This line reports the kernel CPU number @var{kernel-cpu} on which the
> +probing code initially ran.  If the CPU number cannot be obtained,
> +this line is not printed.
> +
> +@item aarch64.processor[@var{index}].observed_node=@var{node}
> +This reports the observed NUMA node number, as reported by the
> +@code{getcpu} system call.  If this information cannot be obtained, this
> +line is not printed.
> +
> +@item aarch64.processor[@var{index}].midr_el1=@var{value}
> +The value of the @code{midr_el1} system register on the processor
> +@var{index}.  This line is only printed if the kernel indicates that
> +this system register is supported.
> +
> +@item aarch64.processor[@var{index}].dczid_el0=@var{value}
> +The value of the @code{dczid_el0} system register on the processor
> +@var{index}.
> +
>  @cindex CPUID (diagnostics)
>  @item x86.cpu_features.@dots{}
>  These items are specific to the i386 and x86-64 architectures.  They
> diff --git a/sysdeps/aarch64/dl-diagnostics-cpu.c b/sysdeps/aarch64/dl-diagnostics-cpu.c
> new file mode 100644
> index 0000000000..e037e6ea8c
> --- /dev/null
> +++ b/sysdeps/aarch64/dl-diagnostics-cpu.c
> @@ -0,0 +1,84 @@
> +/* Print CPU diagnostics data in ld.so.  AArch64 version.
> +   Copyright (C) 2021-2024 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#include <dl-diagnostics.h>
> +
> +#include <cpu-features.h>
> +#include <dl-iterate_cpu.h>
> +#include <ldsodefs.h>
> +#include <sys/auxv.h>
> +
> +static void
> +print_cpu_features_value (const char *label, uint64_t value)
> +{
> +  _dl_printf ("aarch64.cpu_features.");
> +  _dl_diagnostics_print_labeled_value (label, value);
> +}
> +
> +static void
> +print_per_cpu_value (const struct dl_iterate_cpu *dic,
> +                     const char *label, uint64_t value)
> +{
> +  _dl_printf ("aarch64.processor[0x%x].", dic->processor_index);
> +  _dl_diagnostics_print_labeled_value (label, value);
> +}
> +
> +void
> +_dl_diagnostics_cpu (void)
> +{
> +  print_cpu_features_value ("bti", GLRO (dl_aarch64_cpu_features).bti);
> +  print_cpu_features_value ("midr_el1",
> +                            GLRO (dl_aarch64_cpu_features).midr_el1);
> +  print_cpu_features_value ("mops", GLRO (dl_aarch64_cpu_features).mops);
> +  print_cpu_features_value ("mte_state",
> +                            GLRO (dl_aarch64_cpu_features).mte_state);
> +  print_cpu_features_value ("prefer_sve_ifuncs",
> +                            GLRO (dl_aarch64_cpu_features).prefer_sve_ifuncs);
> +  print_cpu_features_value ("sve", GLRO (dl_aarch64_cpu_features).sve);
> +  print_cpu_features_value ("zva_size",
> +                            GLRO (dl_aarch64_cpu_features).zva_size);
> +
> +  struct dl_iterate_cpu dic;
> +  _dl_iterate_cpu_init (&dic);
> +
> +  while (_dl_iterate_cpu_next (&dic))
> +    {
> +      if (dic.requested_cpu >= 0)
> +        _dl_printf ("aarch64.processor[0x%x].requested=0x%x\n",
> +                    dic.processor_index, dic.requested_cpu);
> +      if (dic.actual_cpu >= 0)
> +        _dl_printf ("aarch64.processor[0x%x].observed=0x%x\n",
> +                    dic.processor_index, dic.actual_cpu);
> +      if (dic.actual_node >= 0)
> +        _dl_printf ("aarch64.processor[0x%x].observed_node=0x%x\n",
> +                    dic.processor_index, dic.actual_node);
> +
> +      if (GLRO (dl_hwcap) & HWCAP_CPUID)
> +        {
> +          uint64_t midr_el1;
> +          asm ("mrs %0, midr_el1" : "=r" (midr_el1));
> +          print_per_cpu_value (&dic, "midr_el1", midr_el1);
> +        }
> +
> +      {
> +        uint64_t dczid_el0;
> +        asm ("mrs %0, dczid_el0" : "=r" (dczid_el0));
> +        print_per_cpu_value (&dic, "dczid_el0", dczid_el0);
> +      }
> +    }
> +}
> -- 
> 2.44.0
> 

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v4 1/3] elf: Add CPU iteration support for future use in ld.so diagnostics
  2024-04-08 11:07 ` [PATCH v4 1/3] elf: Add CPU iteration support for future use in ld.so diagnostics Florian Weimer
@ 2024-04-08 13:32   ` Szabolcs Nagy
  0 siblings, 0 replies; 6+ messages in thread
From: Szabolcs Nagy @ 2024-04-08 13:32 UTC (permalink / raw)
  To: Florian Weimer, libc-alpha

The 04/08/2024 13:07, Florian Weimer wrote:
> ---
> v4: Include <stdbool.h> in the generic version, to fix the Hurd build.

looks good.

Reviewed-by: Szabolcs Nagy <szabolcs.nagy@arm.com>

> 
>  elf/dl-iterate_cpu.h                  | 136 ++++++++++++++++++++++++++
>  sysdeps/generic/dl-affinity.h         |  54 ++++++++++
>  sysdeps/unix/sysv/linux/dl-affinity.h |  46 +++++++++
>  3 files changed, 236 insertions(+)
>  create mode 100644 elf/dl-iterate_cpu.h
>  create mode 100644 sysdeps/generic/dl-affinity.h
>  create mode 100644 sysdeps/unix/sysv/linux/dl-affinity.h
> 
> diff --git a/elf/dl-iterate_cpu.h b/elf/dl-iterate_cpu.h
> new file mode 100644
> index 0000000000..60db167b13
> --- /dev/null
> +++ b/elf/dl-iterate_cpu.h
> @@ -0,0 +1,136 @@
> +/* Iterate over all CPUs, for CPU-specific diagnostics.
> +   Copyright (C) 2024 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#ifndef DL_ITERATE_CPU_H
> +#define DL_ITERATE_CPU_H
> +
> +#include <dl-affinity.h>
> +#include <stdbool.h>
> +
> +struct dl_iterate_cpu
> +{
> +  /* Sequential iteration count, starting at 0.  */
> +  unsigned int processor_index;
> +
> +  /* Requested CPU.  Can be -1 if affinity could not be set.  */
> +  int requested_cpu;
> +
> +  /* Observed current CPU.  -1 if unavailable.  */
> +  int actual_cpu;
> +
> +  /* Observed node ID for the CPU.  -1 if unavailable.  */
> +  int actual_node;
> +
> +  /* Internal fields to implement the iteration.   */
> +
> +  /* Affinity as obtained by _dl_iterate_cpu_init, using
> +     _dl_getaffinity.  Space for 8,192 CPUs.  */
> +  unsigned long int mask_reference[8192 / sizeof (unsigned long int) / 8];
> +
> +  /* This array is used by _dl_setaffinity calls.  */
> +  unsigned long int mask_request[8192 / sizeof (unsigned long int) / 8];
> +
> +  /* Return value from the initial _dl_getaffinity call.   */
> +  int length_reference;
> +};
> +
> +static void
> +_dl_iterate_cpu_init (struct dl_iterate_cpu *dic)
> +{
> +  dic->length_reference
> +    = _dl_getaffinity (dic->mask_reference, sizeof (dic->mask_reference));
> +  /* Prepare for the first _dl_iterate_cpu_next call.  */
> +  dic->processor_index = -1;
> +  dic->requested_cpu = -1;
> +}
> +
> +static bool
> +_dl_iterate_cpu_next (struct dl_iterate_cpu *dic)
> +{
> +  ++dic->processor_index;
> +
> +  if (dic->length_reference > 0)
> +    {
> +      /* Search for the next CPU to switch to.  */
> +      while (true)
> +        {
> +          ++dic->requested_cpu;
> +
> +          /* Array index and bit number within the array.  */
> +          unsigned int long_index
> +            = dic->requested_cpu / sizeof (unsigned long int) / 8;
> +          unsigned int bit_index
> +            = dic->requested_cpu % (sizeof (unsigned long int) * 8);
> +
> +          if (long_index * sizeof (unsigned long int) >= dic->length_reference)
> +            /* All possible CPUs have been covered.  */
> +            return false;
> +
> +          unsigned long int bit = 1UL << bit_index;
> +          if (dic->mask_reference[long_index] & bit)
> +            {
> +              /* The CPU is available.  Try to select it.  */
> +              dic->mask_request[long_index] = bit;
> +              if (_dl_setaffinity (dic->mask_request,
> +                                   (long_index + 1)
> +                                   * sizeof (unsigned long int)) < 0)
> +                {
> +                  /* Record that we could not perform a CPU request.  */
> +                  dic->length_reference = -1;
> +
> +                  if (dic->processor_index > 0)
> +                    /* We already reported something.  There is no need to
> +                       continue because the new data is probably not useful.  */
> +                    return false;
> +                }
> +
> +              /* Clear the bit in case the next iteration switches to the
> +                 next long value.  */
> +              dic->mask_request[long_index] = 0;
> +
> +              /* We found a CPU to run on.  */
> +              break;
> +            }
> +        }
> +    }
> +  else
> +    {
> +      /* No way to set CPU affinity.  Iterate just once.  */
> +      if (dic->processor_index > 0)
> +        return false;
> +    }
> +
> +  /* Fill in the actual CPU information.  CPU pinning may not actually
> +     be effective, depending on the container host.  */
> +  unsigned int cpu, node;
> +  if (_dl_getcpu (&cpu, &node) < 0)
> +    {
> +      /* No CPU information available.  */
> +      dic->actual_cpu = -1;
> +      dic->actual_node = -1;
> +    }
> +  else
> +    {
> +      dic->actual_cpu = cpu;
> +      dic->actual_node = node;
> +    }
> +
> +  return true;
> +}
> +
> +#endif /* DL_ITERATE_CPU_H */
> diff --git a/sysdeps/generic/dl-affinity.h b/sysdeps/generic/dl-affinity.h
> new file mode 100644
> index 0000000000..d117f737e9
> --- /dev/null
> +++ b/sysdeps/generic/dl-affinity.h
> @@ -0,0 +1,54 @@
> +/* CPU affinity handling for the dynamic linker.  Stub version.
> +   Copyright (C) 2024 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#ifndef DL_AFFINITY_H
> +#define DL_AFFINITY_H
> +
> +#include <errno.h>
> +#include <stddef.h>
> +
> +/* On success, write the current CPU ID to *CPU, and the current node
> +   ID to *NODE, and return 0.  Return a negative error code on
> +   failure.  */
> +static inline int
> +_dl_getcpu (unsigned int *cpu, unsigned int *node)
> +{
> +  return -ENOSYS;
> +}
> +
> +/* On success, write CPU ID affinity bits for the current thread to
> +   *BITS, which must be SIZE bytes long, and return the number of
> +   bytes updated, a multiple of sizeof (unsigned long int).  On
> +   failure, return a negative error code.  */
> +static int
> +_dl_getaffinity (unsigned long int *bits, size_t size)
> +{
> +  return -ENOSYS;
> +}
> +
> +/* Set the CPU affinity mask for the current thread to *BITS, using
> +   the SIZE bytes from that array, which should be a multiple of
> +   sizeof (unsigned long int).  Return 0 on success, and a negative
> +   error code on failure.  */
> +static int
> +_dl_setaffinity (const unsigned long int *bits, size_t size)
> +{
> +  return -ENOSYS;
> +}
> +
> +#endif /* DL_AFFINITY_H */
> diff --git a/sysdeps/unix/sysv/linux/dl-affinity.h b/sysdeps/unix/sysv/linux/dl-affinity.h
> new file mode 100644
> index 0000000000..bbfede7750
> --- /dev/null
> +++ b/sysdeps/unix/sysv/linux/dl-affinity.h
> @@ -0,0 +1,46 @@
> +/* CPU affinity handling for the dynamic linker.  Linux version.
> +   Copyright (C) 2024 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +/* See sysdeps/generic/dl-affinity.h for documentation of these interfaces.  */
> +
> +#ifndef DL_AFFINITY_H
> +#define DL_AFFINITY_H
> +
> +#include <sysdep.h>
> +#include <stddef.h>
> +#include <unistd.h>
> +
> +static inline int
> +_dl_getcpu (unsigned int *cpu, unsigned int *node)
> +{
> +  return INTERNAL_SYSCALL_CALL (getcpu, cpu, node);
> +}
> +
> +static int
> +_dl_getaffinity (unsigned long int *bits, size_t size)
> +{
> +  return INTERNAL_SYSCALL_CALL (sched_getaffinity, /* TID */ 0, size, bits);
> +}
> +
> +static int
> +_dl_setaffinity (const unsigned long int *bits, size_t size)
> +{
> +  return INTERNAL_SYSCALL_CALL (sched_setaffinity, /* TID */ 0, size, bits);
> +}
> +
> +#endif /* DL_AFFINITY_H */
> -- 
> 2.44.0
> 
> 

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2024-04-08 13:33 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-04-08 11:07 [PATCH v4 0/3] Enhanced CPU diagnostics for ld.so Florian Weimer
2024-04-08 11:07 ` [PATCH v4 1/3] elf: Add CPU iteration support for future use in ld.so diagnostics Florian Weimer
2024-04-08 13:32   ` Szabolcs Nagy
2024-04-08 11:08 ` [PATCH v4 2/3] x86: Add generic CPUID data dumper to ld.so --list-diagnostics Florian Weimer
2024-04-08 11:08 ` [PATCH v4 3/3] aarch64: Enhanced CPU diagnostics for ld.so Florian Weimer
2024-04-08 13:16   ` Szabolcs Nagy

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).