public inbox for libc-alpha@sourceware.org
 help / color / mirror / Atom feed
* [PATCH] PowerPC: Influence hwcaps via cpu arch-level GLIBC_TUNABLES.
@ 2023-06-19  8:09 bmahi496
  2023-06-20 16:20 ` Peter Bergner
  0 siblings, 1 reply; 5+ messages in thread
From: bmahi496 @ 2023-06-19  8:09 UTC (permalink / raw)
  To: libc-alpha; +Cc: rajis, Mahesh Bodapati

From: Mahesh Bodapati <mahesh.bodapati@ibm.com>

This patch enables the option to influence hwcaps used by the powerpc.
The user can set a CPU arch-level tunable like power10 instead of single
HWCAP features.

The influenced hwcap are stored in the powerpc-specific cpu_features struct.

Below are the supported cpu arch-level tunables.
-  power10: power10 feature set
-  power9: power9 feature set
-  power8: power8 feature set
-  power7: power7 feature set
-  power6: power6 feature set
-  power5: power5 feature set
-  power4: power4 feature set.
---
 manual/tunables.texi                          |   5 +-
 sysdeps/powerpc/cpu-features.c                | 164 ++++++++++++++++++
 sysdeps/powerpc/cpu-features.h                |   2 +
 sysdeps/powerpc/dl-tunables.list              |   3 +
 sysdeps/powerpc/powerpc32/dl-machine.h        |   2 +
 .../powerpc32/power4/multiarch/init-arch.h    |  10 +-
 .../powerpc64/multiarch/ifunc-impl-list.c     |   7 +-
 7 files changed, 185 insertions(+), 8 deletions(-)

diff --git a/manual/tunables.texi b/manual/tunables.texi
index 4ca0e42a11..c3a657d5d2 100644
--- a/manual/tunables.texi
+++ b/manual/tunables.texi
@@ -513,7 +513,10 @@ On s390x, the supported HWCAP and STFLE features can be found in
 @code{sysdeps/s390/cpu-features.c}.  In addition the user can also set
 a CPU arch-level like @code{z13} instead of single HWCAP and STFLE features.
 
-This tunable is specific to i386, x86-64 and s390x.
+On powerpc, the user can set a CPU arch-level like @code{power10}, @code{power9}
+instead of single HWCAP features.
+
+This tunable is specific to i386, x86-64, s390x and powerpc.
 @end deftp
 
 @deftp Tunable glibc.cpu.cached_memopt
diff --git a/sysdeps/powerpc/cpu-features.c b/sysdeps/powerpc/cpu-features.c
index 0ef3cf89d2..35c88e8ebf 100644
--- a/sysdeps/powerpc/cpu-features.c
+++ b/sysdeps/powerpc/cpu-features.c
@@ -19,14 +19,178 @@
 #include <stdint.h>
 #include <cpu-features.h>
 #include <elf/dl-tunables.h>
+#include <unistd.h>
+#include <string.h>
+#define MEMCMP_DEFAULT memcmp
+
+#define POWERPC_COPY_CPU_FEATURES(SRC_PTR, DEST_PTR)      \
+  (DEST_PTR)->hwcap = (SRC_PTR)->hwcap;                    \
+  (DEST_PTR)->hwcap2 = (SRC_PTR)->hwcap2;
+
+static void
+TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *valp)
+{
+  /* The current IFUNC selection is always using the most recent
+     features which are available via AT_HWCAP or AT_HWCAP2.  But in
+     some scenarios it is useful to adjust this selection.
+
+     The environment variable:
+
+     GLIBC_TUNABLES=glibc.cpu.hwcaps=-xxx,yyy,....
+
+     Can be used to enable HWCAP/HWCAP2 feature yyy, disable HWCAP/HWCAP2 feature
+     xxx, where the feature name is case-sensitive and has to match the ones
+     used below.
+     power10: enable/disable power10 feature set
+     power9: enable/disable power9 feature set
+     power8: enable/disable power8 feature set
+     power7: enable/disable power7 feature set
+     power6: enable/disable power6 feature set
+     power5: enable/disable power5 feature set
+     power4: enable/disable power4 feature set.  */
+
+  /* Copy the features from dl_powerpc_cpu_features, which contains the features
+     provided by AT_HWCAP and AT_HWCAP2.  */
+  struct cpu_features *cpu_features = &GLRO(dl_powerpc_cpu_features);
+  struct cpu_features cpu_features_curr;
+  bool disable_vsx = 0;
+  POWERPC_COPY_CPU_FEATURES (cpu_features, &cpu_features_curr);
+  const char *token = valp->strval;
+  do
+    {
+      const char *token_end, *feature;
+      bool disable;
+      size_t token_len;
+      size_t feature_len;
+      /* Find token separator or end of string.  */
+      for (token_end = token; *token_end != ','; token_end++)
+        if (*token_end == '\0')
+          break;
+      /* Determine feature.  */
+      token_len = token_end - token;
+      if(*token == '-')
+        {
+          disable = true;
+          feature = token + 1;
+          feature_len = token_len - 1;
+        }
+      else
+        {
+          disable = false;
+          feature = token;
+          feature_len = token_len;
+        }
+      unsigned long int hwcap_mask = 0UL, hwcap2_mask = 0UL;
+      unsigned long int hwcap_disable = 0UL, hwcap2_disable = 0UL;
+      if (*feature == 'p')
+        {
+          if (feature_len == 7 && MEMCMP_DEFAULT (feature, "power10", 7) == 0)
+            {
+              hwcap2_mask = hwcap2_mask | PPC_FEATURE2_ARCH_3_1;
+              disable_vsx = 0;
+            }
+          else if (feature_len == 6
+                   && MEMCMP_DEFAULT (feature, "power9", 6) == 0)
+            {
+              hwcap2_mask = hwcap2_mask | PPC_FEATURE2_ARCH_3_00;
+              hwcap2_disable =  PPC_FEATURE2_ARCH_3_1;
+              disable_vsx = 0;
+            }
+          else if (feature_len == 6
+                   && MEMCMP_DEFAULT (feature, "power8", 6) == 0)
+            {
+              hwcap2_mask = hwcap2_mask | PPC_FEATURE2_ARCH_2_07;
+              hwcap2_disable = PPC_FEATURE2_ARCH_3_1 | PPC_FEATURE2_ARCH_3_00;
+              disable_vsx = 0;
+            }
+          else if (feature_len == 6
+                   && MEMCMP_DEFAULT (feature, "power7", 6) == 0)
+            {
+              hwcap_mask = hwcap_mask | PPC_FEATURE_ARCH_2_06;
+              hwcap2_disable = PPC_FEATURE2_ARCH_3_1 | PPC_FEATURE2_ARCH_3_00
+                               | PPC_FEATURE2_ARCH_2_07;
+              disable_vsx = 0;
+            }
+          else if (feature_len == 6
+                   && MEMCMP_DEFAULT (feature, "power6", 6) == 0)
+            {
+              hwcap_mask = hwcap_mask | PPC_FEATURE_ARCH_2_05;
+              hwcap_disable = PPC_FEATURE_ARCH_2_06;
+              hwcap2_disable = PPC_FEATURE2_ARCH_3_1 | PPC_FEATURE2_ARCH_3_00
+                               | PPC_FEATURE2_ARCH_2_07;
+              if (!disable)
+                disable_vsx = 1;
+            }
+          else if (feature_len == 6
+                   && MEMCMP_DEFAULT (feature, "power5", 6) == 0)
+            {
+              hwcap_mask = hwcap_mask | PPC_FEATURE_POWER5;
+              hwcap_disable = PPC_FEATURE_ARCH_2_06 | PPC_FEATURE_ARCH_2_05;
+              hwcap2_disable = PPC_FEATURE2_ARCH_3_1 | PPC_FEATURE2_ARCH_3_00
+                               | PPC_FEATURE2_ARCH_2_07;
+              if (!disable)
+                disable_vsx = 1;
+            }
+          else if (feature_len == 6
+                   && MEMCMP_DEFAULT (feature, "power4", 6) == 0)
+            {
+              hwcap_mask = hwcap_mask | PPC_FEATURE_POWER4;
+              hwcap_disable = PPC_FEATURE_ARCH_2_06 | PPC_FEATURE_ARCH_2_05
+                              | PPC_FEATURE_POWER5;
+              hwcap2_disable = PPC_FEATURE2_ARCH_3_1 | PPC_FEATURE2_ARCH_3_00
+                               | PPC_FEATURE2_ARCH_2_07;
+              if (!disable)
+                disable_vsx = 1;
+            }
+	}
+      /* Perform the actions determined above.  */
+        if (hwcap_mask != 0UL)
+          {
+            /* we don't disable altivec and vsx */
+            if (disable)
+              {
+                cpu_features_curr.hwcap &= ~hwcap_mask;
+              }
+            else
+              cpu_features_curr.hwcap |= hwcap_mask;
+          }
+        if (!disable  && hwcap_disable != 0UL)
+          cpu_features_curr.hwcap &= ~hwcap_disable;
+        if (hwcap2_mask != 0UL)
+          {
+            if (disable)
+              cpu_features_curr.hwcap2 &= ~hwcap2_mask;
+            else
+              cpu_features_curr.hwcap2 |= hwcap2_mask;
+          }
+        if (!disable && hwcap2_disable != 0UL)
+          cpu_features_curr.hwcap2 &= ~hwcap2_disable;
+
+        token += token_len;
+        /* ... and skip token separator for next round.  */
+        if (*token == ',') token++;
+    }
+  while (*token != '\0');
+  if (disable_vsx)
+    cpu_features_curr.hwcap &= ~PPC_FEATURE_HAS_VSX;
+
+  /* Copy back the supported tunable features */
+  cpu_features->hwcap = cpu_features_curr.hwcap;
+  cpu_features->hwcap2 = cpu_features_curr.hwcap2;
+}
 
 static inline void
 init_cpu_features (struct cpu_features *cpu_features)
 {
+  /* Fill cpu_features as passed by kernel and machine.  */
+  cpu_features->hwcap = GLRO(dl_hwcap);
+  cpu_features->hwcap2 = GLRO(dl_hwcap2);
   /* Default is to use aligned memory access on optimized function unless
      tunables is enable, since for this case user can explicit disable
      unaligned optimizations.  */
   int32_t cached_memfunc = TUNABLE_GET (glibc, cpu, cached_memopt, int32_t,
 					NULL);
   cpu_features->use_cached_memopt = (cached_memfunc > 0);
+  TUNABLE_GET (glibc, cpu, hwcaps, tunable_val_t *,
+               TUNABLE_CALLBACK (set_hwcaps));
 }
diff --git a/sysdeps/powerpc/cpu-features.h b/sysdeps/powerpc/cpu-features.h
index d316dc3d64..928d2e7f74 100644
--- a/sysdeps/powerpc/cpu-features.h
+++ b/sysdeps/powerpc/cpu-features.h
@@ -23,6 +23,8 @@
 struct cpu_features
 {
   bool use_cached_memopt;
+  unsigned long int hwcap;
+  unsigned long int hwcap2;
 };
 
 #endif /* __CPU_FEATURES_H  */
diff --git a/sysdeps/powerpc/dl-tunables.list b/sysdeps/powerpc/dl-tunables.list
index 87d6235c75..807b7f8013 100644
--- a/sysdeps/powerpc/dl-tunables.list
+++ b/sysdeps/powerpc/dl-tunables.list
@@ -24,5 +24,8 @@ glibc {
       maxval: 1
       default: 0
     }
+    hwcaps {
+      type: STRING
+    }
   }
 }
diff --git a/sysdeps/powerpc/powerpc32/dl-machine.h b/sysdeps/powerpc/powerpc32/dl-machine.h
index a4cad7583c..43c07205e0 100644
--- a/sysdeps/powerpc/powerpc32/dl-machine.h
+++ b/sysdeps/powerpc/powerpc32/dl-machine.h
@@ -24,6 +24,7 @@
 #include <assert.h>
 #include <dl-tls.h>
 #include <dl-irel.h>
+#include <cpu-features.c>
 #include <hwcapinfo.h>
 #include <dl-static-tls.h>
 #include <dl-machine-rel.h>
@@ -157,6 +158,7 @@ static inline void __attribute__ ((unused))
 dl_platform_init (void)
 {
   __tcb_parse_hwcap_and_convert_at_platform ();
+  init_cpu_features (&GLRO(dl_powerpc_cpu_features));
 }
 #endif
 
diff --git a/sysdeps/powerpc/powerpc32/power4/multiarch/init-arch.h b/sysdeps/powerpc/powerpc32/power4/multiarch/init-arch.h
index 3dd00e02ee..a0bbd12012 100644
--- a/sysdeps/powerpc/powerpc32/power4/multiarch/init-arch.h
+++ b/sysdeps/powerpc/powerpc32/power4/multiarch/init-arch.h
@@ -16,6 +16,7 @@
    <https://www.gnu.org/licenses/>.  */
 
 #include <ldsodefs.h>
+#include <cpu-features.h>
 
 /* The code checks if _rtld_global_ro was realocated before trying to access
    the dl_hwcap field. The assembly is to make the compiler not optimize the
@@ -32,11 +33,12 @@
 # define __GLRO(value)  GLRO(value)
 #endif
 
-/* dl_hwcap contains only the latest supported ISA, the macro checks which is
-   and fills the previous ones.  */
+/* Get the hardware information post the tunables set , the macro checks
+   it and fills the previous ones.  */
 #define INIT_ARCH() \
-  unsigned long int hwcap = __GLRO(dl_hwcap); 			\
-  unsigned long int __attribute__((unused)) hwcap2 = __GLRO(dl_hwcap2); \
+  const struct cpu_features *features = &GLRO(dl_powerpc_cpu_features);	\
+  unsigned long int hwcap = features->hwcap;				\
+  unsigned long int __attribute__((unused)) hwcap2 = features->hwcap2; \
   bool __attribute__((unused)) use_cached_memopt =		\
     __GLRO(dl_powerpc_cpu_features.use_cached_memopt);		\
   if (hwcap & PPC_FEATURE_ARCH_2_06)				\
diff --git a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
index ebe9434052..fc26dd0e17 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
@@ -17,6 +17,7 @@
    <https://www.gnu.org/licenses/>.  */
 
 #include <assert.h>
+#include <cpu-features.h>
 #include <string.h>
 #include <wchar.h>
 #include <ldsodefs.h>
@@ -27,9 +28,9 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 			size_t max)
 {
   size_t i = max;
-
-  unsigned long int hwcap = GLRO(dl_hwcap);
-  unsigned long int hwcap2 = GLRO(dl_hwcap2);
+  const struct cpu_features *features = &GLRO(dl_powerpc_cpu_features);
+  unsigned long int hwcap = features->hwcap;
+  unsigned long int hwcap2 = features->hwcap2;
 #ifdef SHARED
   int cacheline_size = GLRO(dl_cache_line_size);
 #endif
-- 
2.39.3


^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2023-06-21  6:20 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-06-19  8:09 [PATCH] PowerPC: Influence hwcaps via cpu arch-level GLIBC_TUNABLES bmahi496
2023-06-20 16:20 ` Peter Bergner
2023-06-20 17:45   ` MAHESH BODAPATI
2023-06-21  4:11     ` Peter Bergner
2023-06-21  6:20       ` MAHESH BODAPATI

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).