public inbox for libc-alpha@sourceware.org
 help / color / mirror / Atom feed
* [PATCH 1/4] Remove architecture specific sched_cpucount optimizations
@ 2021-03-29 18:25 Adhemerval Zanella
  2021-03-29 18:25 ` [PATCH 2/4] linux: Use sched_getaffinity for __get_nprocs (BZ #27645) Adhemerval Zanella
                   ` (4 more replies)
  0 siblings, 5 replies; 28+ messages in thread
From: Adhemerval Zanella @ 2021-03-29 18:25 UTC (permalink / raw)
  To: libc-alpha; +Cc: crrodriguez

And replace the generic algorithm with the Brian Kernighan’s one.
GCC optimize it with popcnt if the architecture supports, so there
is no need to add the extra POPCNT define to enable it.

This is really a micro-optimization that only adds complexity:
recent ABIs already support it (x86-64-v2 or power64le) and it
simplifies the code for internal usage, since i686 does not allow an
internal iFUNC call.

Checked on x86_64-linux-gnu, aarch64-linux-gnu, and
powerpc64le-linux-gnu.
---
 posix/sched_cpucount.c                       | 28 +++------------
 sysdeps/i386/i686/multiarch/sched_cpucount.c |  1 -
 sysdeps/ia64/sched_cpucount.c                | 20 -----------
 sysdeps/powerpc/sched_cpucount.c             | 22 ------------
 sysdeps/x86_64/multiarch/sched_cpucount.c    | 36 --------------------
 sysdeps/x86_64/sched_cpucount.c              | 25 --------------
 6 files changed, 4 insertions(+), 128 deletions(-)
 delete mode 100644 sysdeps/i386/i686/multiarch/sched_cpucount.c
 delete mode 100644 sysdeps/ia64/sched_cpucount.c
 delete mode 100644 sysdeps/powerpc/sched_cpucount.c
 delete mode 100644 sysdeps/x86_64/multiarch/sched_cpucount.c
 delete mode 100644 sysdeps/x86_64/sched_cpucount.c

diff --git a/posix/sched_cpucount.c b/posix/sched_cpucount.c
index b0ca4ea7bc..529286e777 100644
--- a/posix/sched_cpucount.c
+++ b/posix/sched_cpucount.c
@@ -22,31 +22,11 @@ int
 __sched_cpucount (size_t setsize, const cpu_set_t *setp)
 {
   int s = 0;
-  const __cpu_mask *p = setp->__bits;
-  const __cpu_mask *end = &setp->__bits[setsize / sizeof (__cpu_mask)];
-
-  while (p < end)
+  for (int i = 0; i < setsize / sizeof (__cpu_mask); i++)
     {
-      __cpu_mask l = *p++;
-
-#ifdef POPCNT
-      s += POPCNT (l);
-#else
-      if (l == 0)
-	continue;
-
-      _Static_assert (sizeof (l) == sizeof (unsigned int)
-		      || sizeof (l) == sizeof (unsigned long)
-		      || sizeof (l) == sizeof (unsigned long long),
-		      "sizeof (__cpu_mask");
-      if (sizeof (__cpu_mask) == sizeof (unsigned int))
-	s += __builtin_popcount (l);
-      else if (sizeof (__cpu_mask) == sizeof (unsigned long))
-	s += __builtin_popcountl (l);
-      else
-	s += __builtin_popcountll (l);
-#endif
+      __cpu_mask si = setp->__bits[i];
+      /* Clear the least significant bit set.  */
+      for (; si != 0; si &= si - 1, s++);
     }
-
   return s;
 }
diff --git a/sysdeps/i386/i686/multiarch/sched_cpucount.c b/sysdeps/i386/i686/multiarch/sched_cpucount.c
deleted file mode 100644
index 7db31b02f8..0000000000
--- a/sysdeps/i386/i686/multiarch/sched_cpucount.c
+++ /dev/null
@@ -1 +0,0 @@
-#include <sysdeps/x86_64/multiarch/sched_cpucount.c>
diff --git a/sysdeps/ia64/sched_cpucount.c b/sysdeps/ia64/sched_cpucount.c
deleted file mode 100644
index 8440864b02..0000000000
--- a/sysdeps/ia64/sched_cpucount.c
+++ /dev/null
@@ -1,20 +0,0 @@
-/* Copyright (C) 2007-2021 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <https://www.gnu.org/licenses/>.  */
-
-#define POPCNT(l) __builtin_popcountl (l)
-
-#include <posix/sched_cpucount.c>
diff --git a/sysdeps/powerpc/sched_cpucount.c b/sysdeps/powerpc/sched_cpucount.c
deleted file mode 100644
index 8f00e3dbc8..0000000000
--- a/sysdeps/powerpc/sched_cpucount.c
+++ /dev/null
@@ -1,22 +0,0 @@
-/* Copyright (C) 2007-2021 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <https://www.gnu.org/licenses/>.  */
-
-#ifdef _ARCH_PWR5
-# define POPCNT(l) __builtin_popcountl (l)
-#endif
-
-#include <posix/sched_cpucount.c>
diff --git a/sysdeps/x86_64/multiarch/sched_cpucount.c b/sysdeps/x86_64/multiarch/sched_cpucount.c
deleted file mode 100644
index 5180a11434..0000000000
--- a/sysdeps/x86_64/multiarch/sched_cpucount.c
+++ /dev/null
@@ -1,36 +0,0 @@
-/* Count bits in CPU set.  x86-64 multi-arch version.
-   This file is part of the GNU C Library.
-   Copyright (C) 2008-2021 Free Software Foundation, Inc.
-   Contributed by Ulrich Drepper <drepper@redhat.com>.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <https://www.gnu.org/licenses/>.  */
-
-#include <sched.h>
-#include "init-arch.h"
-
-#define __sched_cpucount static generic_cpucount
-#include <posix/sched_cpucount.c>
-#undef __sched_cpucount
-
-#define POPCNT(l) \
-  ({ __cpu_mask r; \
-     asm ("popcnt %1, %0" : "=r" (r) : "0" (l));\
-     r; })
-#define __sched_cpucount static popcount_cpucount
-#include <posix/sched_cpucount.c>
-#undef __sched_cpucount
-
-libc_ifunc (__sched_cpucount,
-	    CPU_FEATURE_USABLE (POPCNT) ? popcount_cpucount : generic_cpucount);
diff --git a/sysdeps/x86_64/sched_cpucount.c b/sysdeps/x86_64/sched_cpucount.c
deleted file mode 100644
index 5a27336d6d..0000000000
--- a/sysdeps/x86_64/sched_cpucount.c
+++ /dev/null
@@ -1,25 +0,0 @@
-/* Copyright (C) 2007-2021 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <https://www.gnu.org/licenses/>.  */
-
-#ifdef __amdfam10
-# define POPCNT(l) \
-  ({ __cpu_mask r;							      \
-     asm ("popcntq %1, %0" : "=r" (r) : "0" (l));			      \
-     r; })
-#endif
-
-#include <posix/sched_cpucount.c>
-- 
2.27.0


^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 2/4] linux: Use sched_getaffinity for __get_nprocs (BZ #27645)
  2021-03-29 18:25 [PATCH 1/4] Remove architecture specific sched_cpucount optimizations Adhemerval Zanella
@ 2021-03-29 18:25 ` Adhemerval Zanella
  2021-04-27 15:38   ` Florian Weimer
  2021-03-29 18:25 ` [PATCH 3/4] linux: Use /sys/devices/system/cpu/possible on __get_nprocs_conf Adhemerval Zanella
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 28+ messages in thread
From: Adhemerval Zanella @ 2021-03-29 18:25 UTC (permalink / raw)
  To: libc-alpha; +Cc: crrodriguez

Both the sysfs and procfs parsing (through GET_NPROCS_PARSER) are
removed in favor the syscall.  The initial scratch buffer should
fit to most of the common usage (1024 bytes with maps to 8192 CPUs).

Checked on x86_64-linux-gnu and aarch64-linux-gnu.
---
 include/bits/cpu-set.h                        |   8 +
 posix/sched_cpucount.c                        |   1 +
 sysdeps/unix/sysv/linux/alpha/getsysstats.c   |  19 --
 sysdeps/unix/sysv/linux/getsysstats.c         | 217 ++----------------
 sysdeps/unix/sysv/linux/m68k/getsysstats.c    |  37 ---
 .../unix/sysv/linux/microblaze/getsysstats.c  |  34 ---
 sysdeps/unix/sysv/linux/mips/getsysstats.c    |  36 ---
 sysdeps/unix/sysv/linux/sparc/getsysstats.c   |  17 --
 8 files changed, 31 insertions(+), 338 deletions(-)
 delete mode 100644 sysdeps/unix/sysv/linux/m68k/getsysstats.c
 delete mode 100644 sysdeps/unix/sysv/linux/microblaze/getsysstats.c
 delete mode 100644 sysdeps/unix/sysv/linux/mips/getsysstats.c

diff --git a/include/bits/cpu-set.h b/include/bits/cpu-set.h
index 388f03cfbd..05aa0a8cf9 100644
--- a/include/bits/cpu-set.h
+++ b/include/bits/cpu-set.h
@@ -1 +1,9 @@
+#ifndef _BITS_CPU_SET_H
 #include <posix/bits/cpu-set.h>
+
+#ifndef _ISOMAC
+int __sched_cpucount (size_t __setsize, const cpu_set_t *__setp);
+libc_hidden_proto (__sched_cpucount)
+#endif
+
+#endif /* _BITS_CPU_SET_H  */
diff --git a/posix/sched_cpucount.c b/posix/sched_cpucount.c
index 529286e777..8d853dcc2f 100644
--- a/posix/sched_cpucount.c
+++ b/posix/sched_cpucount.c
@@ -30,3 +30,4 @@ __sched_cpucount (size_t setsize, const cpu_set_t *setp)
     }
   return s;
 }
+libc_hidden_def (__sched_cpucount)
diff --git a/sysdeps/unix/sysv/linux/alpha/getsysstats.c b/sysdeps/unix/sysv/linux/alpha/getsysstats.c
index bdf794703a..098575faef 100644
--- a/sysdeps/unix/sysv/linux/alpha/getsysstats.c
+++ b/sysdeps/unix/sysv/linux/alpha/getsysstats.c
@@ -18,25 +18,6 @@
    <https://www.gnu.org/licenses/>.  */
 
 
-/* We need to define a special parser for /proc/cpuinfo.  */
-#define GET_NPROCS_PARSER(FD, BUFFER, CP, RE, BUFFER_END, RESULT)	   \
-  do									   \
-    {									   \
-      /* Find the line that contains the information about the number of   \
-	 active cpus.  We don't have to fear extremely long lines since	   \
-	 the kernel will not generate them.  8192 bytes are really enough. \
-	 If there is no "CPUs ..." line then we are on a UP system.  */	   \
-      char *l;								   \
-      (RESULT) = 1;							   \
-      while ((l = next_line (FD, BUFFER, &CP, &RE, BUFFER_END)) != NULL)  \
-	if ((sscanf (BUFFER, "cpus active : %d", &(RESULT)) == 1)	   \
-	    || (sscanf (BUFFER, "CPUs probed %*d active %d",		   \
-			&(RESULT)) == 1))  				   \
-	  break;							   \
-    }									   \
-  while (0)
-
-
 /* On the Alpha we can distinguish between the number of configured and
    active cpus.  */
 #define GET_NPROCS_CONF_PARSER(FP, BUFFER, RESULT)			   \
diff --git a/sysdeps/unix/sysv/linux/getsysstats.c b/sysdeps/unix/sysv/linux/getsysstats.c
index 2c7f40998a..f8a4a31d1b 100644
--- a/sysdeps/unix/sysv/linux/getsysstats.c
+++ b/sysdeps/unix/sysv/linux/getsysstats.c
@@ -17,215 +17,42 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <alloca.h>
-#include <assert.h>
-#include <ctype.h>
 #include <dirent.h>
-#include <errno.h>
-#include <fcntl.h>
-#include <mntent.h>
-#include <paths.h>
+#include <not-cancel.h>
+#include <scratch_buffer.h>
 #include <stdio.h>
 #include <stdio_ext.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
 #include <sys/sysinfo.h>
-
-#include <atomic.h>
-#include <not-cancel.h>
-
-
-/* How we can determine the number of available processors depends on
-   the configuration.  There is currently (as of version 2.0.21) no
-   system call to determine the number.  It is planned for the 2.1.x
-   series to add this, though.
-
-   One possibility to implement it for systems using Linux 2.0 is to
-   examine the pseudo file /proc/cpuinfo.  Here we have one entry for
-   each processor.
-
-   But not all systems have support for the /proc filesystem.  If it
-   is not available we simply return 1 since there is no way.  */
-
-
-/* Other architectures use different formats for /proc/cpuinfo.  This
-   provides a hook for alternative parsers.  */
-#ifndef GET_NPROCS_PARSER
-# define GET_NPROCS_PARSER(FD, BUFFER, CP, RE, BUFFER_END, RESULT) \
-  do									\
-    {									\
-      (RESULT) = 0;							\
-      /* Read all lines and count the lines starting with the string	\
-	 "processor".  We don't have to fear extremely long lines since	\
-	 the kernel will not generate them.  8192 bytes are really	\
-	 enough.  */							\
-      char *l;								\
-      while ((l = next_line (FD, BUFFER, &CP, &RE, BUFFER_END)) != NULL) \
-	if (strncmp (l, "processor", 9) == 0)				\
-	  ++(RESULT);							\
-    }									\
-  while (0)
-#endif
-
-
-static char *
-next_line (int fd, char *const buffer, char **cp, char **re,
-	   char *const buffer_end)
-{
-  char *res = *cp;
-  char *nl = memchr (*cp, '\n', *re - *cp);
-  if (nl == NULL)
-    {
-      if (*cp != buffer)
-	{
-	  if (*re == buffer_end)
-	    {
-	      memmove (buffer, *cp, *re - *cp);
-	      *re = buffer + (*re - *cp);
-	      *cp = buffer;
-
-	      ssize_t n = __read_nocancel (fd, *re, buffer_end - *re);
-	      if (n < 0)
-		return NULL;
-
-	      *re += n;
-
-	      nl = memchr (*cp, '\n', *re - *cp);
-	      while (nl == NULL && *re == buffer_end)
-		{
-		  /* Truncate too long lines.  */
-		  *re = buffer + 3 * (buffer_end - buffer) / 4;
-		  n = __read_nocancel (fd, *re, buffer_end - *re);
-		  if (n < 0)
-		    return NULL;
-
-		  nl = memchr (*re, '\n', n);
-		  **re = '\n';
-		  *re += n;
-		}
-	    }
-	  else
-	    nl = memchr (*cp, '\n', *re - *cp);
-
-	  res = *cp;
-	}
-
-      if (nl == NULL)
-	nl = *re - 1;
-    }
-
-  *cp = nl + 1;
-  assert (*cp <= *re);
-
-  return res == *re ? NULL : res;
-}
-
+#include <sysdep.h>
 
 int
 __get_nprocs (void)
 {
-  static int cached_result = -1;
-  static time_t timestamp;
-
-  time_t now = time_now ();
-  time_t prev = timestamp;
-  atomic_read_barrier ();
-  if (now == prev && cached_result > -1)
-    return cached_result;
-
-  /* XXX Here will come a test for the new system call.  */
-
-  const size_t buffer_size = __libc_use_alloca (8192) ? 8192 : 512;
-  char *buffer = alloca (buffer_size);
-  char *buffer_end = buffer + buffer_size;
-  char *cp = buffer_end;
-  char *re = buffer_end;
+  struct scratch_buffer set;
+  scratch_buffer_init (&set);
 
-  const int flags = O_RDONLY | O_CLOEXEC;
-  /* This file contains comma-separated ranges.  */
-  int fd = __open_nocancel ("/sys/devices/system/cpu/online", flags);
-  char *l;
-  int result = 0;
-  if (fd != -1)
+  int r;
+  while (true)
     {
-      l = next_line (fd, buffer, &cp, &re, buffer_end);
-      if (l != NULL)
-	do
-	  {
-	    char *endp;
-	    unsigned long int n = strtoul (l, &endp, 10);
-	    if (l == endp)
-	      {
-		result = 0;
-		break;
-	      }
-
-	    unsigned long int m = n;
-	    if (*endp == '-')
-	      {
-		l = endp + 1;
-		m = strtoul (l, &endp, 10);
-		if (l == endp)
-		  {
-		    result = 0;
-		    break;
-		  }
-	      }
-
-	    result += m - n + 1;
-
-	    l = endp;
-	    if (l < re && *l == ',')
-	      ++l;
-	  }
-	while (l < re && *l != '\n');
-
-      __close_nocancel_nostatus (fd);
-
-      if (result > 0)
-	goto out;
+      /* The possible error are EFAULT for an invalid buffer or ESRCH for
+	 invalid pid, none could happen.  */
+      r = INTERNAL_SYSCALL_CALL (sched_getaffinity, 0, set.length,
+				 set.data);
+      if (r > 0)
+	break;
+
+      if (!scratch_buffer_grow (&set))
+	/* Default to an SMP system in case we cannot obtain an accurate
+	   number.  */
+	return 2;
     }
 
-  cp = buffer_end;
-  re = buffer_end;
-
-  /* Default to an SMP system in case we cannot obtain an accurate
-     number.  */
-  result = 2;
+  /* The scratch_buffer is aligned to max_align_t.  */
+  r = __sched_cpucount (r, (const cpu_set_t *) set.data);
 
-  /* The /proc/stat format is more uniform, use it by default.  */
-  fd = __open_nocancel ("/proc/stat", flags);
-  if (fd != -1)
-    {
-      result = 0;
-
-      while ((l = next_line (fd, buffer, &cp, &re, buffer_end)) != NULL)
-	/* The current format of /proc/stat has all the cpu* entries
-	   at the front.  We assume here that stays this way.  */
-	if (strncmp (l, "cpu", 3) != 0)
-	  break;
-	else if (isdigit (l[3]))
-	  ++result;
+  scratch_buffer_free (&set);
 
-      __close_nocancel_nostatus (fd);
-    }
-  else
-    {
-      fd = __open_nocancel ("/proc/cpuinfo", flags);
-      if (fd != -1)
-	{
-	  GET_NPROCS_PARSER (fd, buffer, cp, re, buffer_end, result);
-	  __close_nocancel_nostatus (fd);
-	}
-    }
-
- out:
-  cached_result = result;
-  atomic_write_barrier ();
-  timestamp = now;
-
-  return result;
+  return r;
 }
 libc_hidden_def (__get_nprocs)
 weak_alias (__get_nprocs, get_nprocs)
diff --git a/sysdeps/unix/sysv/linux/m68k/getsysstats.c b/sysdeps/unix/sysv/linux/m68k/getsysstats.c
deleted file mode 100644
index 6915f6d721..0000000000
--- a/sysdeps/unix/sysv/linux/m68k/getsysstats.c
+++ /dev/null
@@ -1,37 +0,0 @@
-/* Determine various system internal values, Linux/m68k version.
-   Copyright (C) 2003-2021 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-   Contributed by Andreas Schwab <schwab@suse.de>
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library.  If not, see
-   <https://www.gnu.org/licenses/>.  */
-
-
-/* We need to define a special parser for /proc/cpuinfo.  */
-#define GET_NPROCS_PARSER(FD, BUFFER, CP, RE, BUFFER_END, RESULT)	  \
-  do									  \
-    {									  \
-      (RESULT) = 0;							  \
-      /* Read all lines and count the lines starting with the string	  \
-	 "CPU:".  We don't have to fear extremely long lines since	  \
-	 the kernel will not generate them.  8192 bytes are really	  \
-	 enough.  */							  \
-      char *l;								  \
-      while ((l = next_line (FD, BUFFER, &CP, &RE, BUFFER_END)) != NULL)  \
-	if (strncmp (l, "CPU:", 4) == 0)	      	     		  \
-	  ++(RESULT);							  \
-    }									  \
-  while (0)
-
-#include <sysdeps/unix/sysv/linux/getsysstats.c>
diff --git a/sysdeps/unix/sysv/linux/microblaze/getsysstats.c b/sysdeps/unix/sysv/linux/microblaze/getsysstats.c
deleted file mode 100644
index 46403e74a8..0000000000
--- a/sysdeps/unix/sysv/linux/microblaze/getsysstats.c
+++ /dev/null
@@ -1,34 +0,0 @@
-/* Copyright (C) 1997-2021 Free Software Foundation, Inc.
-
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public License as
-   published by the Free Software Foundation; either version 2.1 of the
-   License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <https://www.gnu.org/licenses/>.  */
-
-/* We need to define a special parser for /proc/cpuinfo.  */
-#define GET_NPROCS_PARSER(FD, BUFFER, CP, RE, BUFFER_END, RESULT)              \
-  do                                                                           \
-    {                                                                          \
-      (RESULT) = 0;                                                            \
-      /* Read all lines and count the lines starting with the string           \
-         "CPU-Family:".  We don't have to fear extremely long lines since      \
-         the kernel will not generate them.  8192 bytes are really enough.  */ \
-      char *l;                                                                 \
-      while ((l = next_line (FD, BUFFER, &CP, &RE, BUFFER_END)) != NULL)       \
-      if (strncmp (l, "CPU-Family:", 11) == 0)                                 \
-        ++(RESULT);                                                            \
-    }                                                                          \
-  while (0)
-
-#include <sysdeps/unix/sysv/linux/getsysstats.c>
diff --git a/sysdeps/unix/sysv/linux/mips/getsysstats.c b/sysdeps/unix/sysv/linux/mips/getsysstats.c
deleted file mode 100644
index 34f352e963..0000000000
--- a/sysdeps/unix/sysv/linux/mips/getsysstats.c
+++ /dev/null
@@ -1,36 +0,0 @@
-/* Determine various system internal values, Linux/MIPS version.
-   Copyright (C) 2001-2021 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library.  If not, see
-   <https://www.gnu.org/licenses/>.  */
-
-
-/* We need to define a special parser for /proc/cpuinfo.  */
-#define GET_NPROCS_PARSER(FD, BUFFER, CP, RE, BUFFER_END, RESULT)	  \
-  do									  \
-    {									  \
-      (RESULT) = 0;							  \
-      /* Read all lines and count the lines starting with the string	  \
-	 "cpu model".  We don't have to fear extremely long lines since	  \
-	 the kernel will not generate them.  8192 bytes are really	  \
-	 enough.  */							  \
-      char *l;								  \
-      while ((l = next_line (FD, BUFFER, &CP, &RE, BUFFER_END)) != NULL)  \
-	if (strncmp (l, "cpu model", 9) == 0)				  \
-	  ++(RESULT);							  \
-    }									  \
-  while (0)
-
-#include <sysdeps/unix/sysv/linux/getsysstats.c>
diff --git a/sysdeps/unix/sysv/linux/sparc/getsysstats.c b/sysdeps/unix/sysv/linux/sparc/getsysstats.c
index 8e1760802f..a8adba8321 100644
--- a/sysdeps/unix/sysv/linux/sparc/getsysstats.c
+++ b/sysdeps/unix/sysv/linux/sparc/getsysstats.c
@@ -19,23 +19,6 @@
    <https://www.gnu.org/licenses/>.  */
 
 
-/* We need to define a special parser for /proc/cpuinfo.  */
-#define GET_NPROCS_PARSER(FD, BUFFER, CP, RE, BUFFER_END, RESULT)	  \
-  do									  \
-    {									  \
-      (RESULT) = 0;							  \
-      /* Find the line that contains the information about the number of  \
-	 active cpus.  We don't have to fear extremely long lines since	  \
-	 the kernel will not generate them.  8192 bytes are really	  \
-	 enough.  */							  \
-      char *l;								  \
-      while ((l = next_line (FD, BUFFER, &CP, &RE, BUFFER_END)) != NULL)  \
-	if (sscanf (l, "ncpus active : %d", &(RESULT)) == 1)		  \
-	  break;							  \
-    }									  \
-  while (0)
-
-
 /* On the Sparc we can distinguish between the number of configured and
    active cpus.  */
 #define GET_NPROCS_CONF_PARSER(FP, BUFFER, RESULT)			 \
-- 
2.27.0


^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 3/4] linux: Use /sys/devices/system/cpu/possible on __get_nprocs_conf
  2021-03-29 18:25 [PATCH 1/4] Remove architecture specific sched_cpucount optimizations Adhemerval Zanella
  2021-03-29 18:25 ` [PATCH 2/4] linux: Use sched_getaffinity for __get_nprocs (BZ #27645) Adhemerval Zanella
@ 2021-03-29 18:25 ` Adhemerval Zanella
  2021-05-05 16:53   ` Adhemerval Zanella
  2021-05-05 17:54   ` Florian Weimer
  2021-03-29 18:25 ` [PATCH 4/4] linux: Remove /proc/cpuinfo fallback on alpha and sparc Adhemerval Zanella
                   ` (2 subsequent siblings)
  4 siblings, 2 replies; 28+ messages in thread
From: Adhemerval Zanella @ 2021-03-29 18:25 UTC (permalink / raw)
  To: libc-alpha; +Cc: crrodriguez

Instead of iterate over all the cpu in the sysfs folder.  It consumes
slight less resources on large system (which might require extra
getdents call) and memory (a limited stack buffer instead a large
malloced one for opendir).

Checked on x86_64-linux-gnu, aarch64-linux-gnu, and
powerpc64le-linux-gnu.
---
 sysdeps/unix/sysv/linux/getsysstats.c | 51 +++++++++++++++------------
 1 file changed, 29 insertions(+), 22 deletions(-)

diff --git a/sysdeps/unix/sysv/linux/getsysstats.c b/sysdeps/unix/sysv/linux/getsysstats.c
index f8a4a31d1b..5069951246 100644
--- a/sysdeps/unix/sysv/linux/getsysstats.c
+++ b/sysdeps/unix/sysv/linux/getsysstats.c
@@ -17,7 +17,8 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <dirent.h>
+#include <ctype.h>
+#include <intprops.h>
 #include <not-cancel.h>
 #include <scratch_buffer.h>
 #include <stdio.h>
@@ -63,33 +64,39 @@ weak_alias (__get_nprocs, get_nprocs)
 int
 __get_nprocs_conf (void)
 {
-  /* XXX Here will come a test for the new system call.  */
+  int result = 1;
 
   /* Try to use the sysfs filesystem.  It has actual information about
      online processors.  */
-  DIR *dir = __opendir ("/sys/devices/system/cpu");
-  if (dir != NULL)
+  int fd = __open64_nocancel ("/sys/devices/system/cpu/possible", O_RDONLY);
+  if (fd != -1)
     {
-      int count = 0;
-      struct dirent64 *d;
-
-      while ((d = __readdir64 (dir)) != NULL)
-	/* NB: the sysfs has d_type support.  */
-	if (d->d_type == DT_DIR && strncmp (d->d_name, "cpu", 3) == 0)
-	  {
-	    char *endp;
-	    unsigned long int nr = strtoul (d->d_name + 3, &endp, 10);
-	    if (nr != ULONG_MAX && endp != d->d_name + 3 && *endp == '\0')
-	      ++count;
-	  }
-
-      __closedir (dir);
-
-      return count;
+      /* The entry is in the form of '[cpuX]-[cpuY]'.  */
+      char buf[2 * INT_STRLEN_BOUND (unsigned int) + 1];
+
+      ssize_t n = __read_nocancel (fd, buf, sizeof (buf));
+      if (n > 0)
+	{
+	  buf[n] = '\0';
+
+	  /* Start on the right, to find highest node number.  */
+	  int m = 1;
+	  while (--n)
+	    {
+	      if ((buf[n] == ',') || (buf[n] == '-'))
+		break;
+	      /* Ignore '\n'  */
+	      if (! isdigit (buf[n]))
+		continue;
+	      result += (buf[n] - '0') * m;
+	      m *= 10;
+	    }
+	}
+
+      __close_nocancel (fd);
+      return result + 1;
     }
 
-  int result = 1;
-
 #ifdef GET_NPROCS_CONF_PARSER
   /* If we haven't found an appropriate entry return 1.  */
   FILE *fp = fopen ("/proc/cpuinfo", "rce");
-- 
2.27.0


^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 4/4] linux: Remove /proc/cpuinfo fallback on alpha and sparc
  2021-03-29 18:25 [PATCH 1/4] Remove architecture specific sched_cpucount optimizations Adhemerval Zanella
  2021-03-29 18:25 ` [PATCH 2/4] linux: Use sched_getaffinity for __get_nprocs (BZ #27645) Adhemerval Zanella
  2021-03-29 18:25 ` [PATCH 3/4] linux: Use /sys/devices/system/cpu/possible on __get_nprocs_conf Adhemerval Zanella
@ 2021-03-29 18:25 ` Adhemerval Zanella
  2021-05-05 16:53   ` Adhemerval Zanella
  2021-05-05 16:52 ` [PATCH 1/4] Remove architecture specific sched_cpucount optimizations Adhemerval Zanella
  2021-05-05 17:28 ` Florian Weimer
  4 siblings, 1 reply; 28+ messages in thread
From: Adhemerval Zanella @ 2021-03-29 18:25 UTC (permalink / raw)
  To: libc-alpha; +Cc: crrodriguez

There is no much gain in fallback to cpuinfo if sysfs is no present,
usually on restricted environment neither will be present.  It also
simplifies the code and make all architecture use the sched_getaffinity
as the sysfs fallback.

Checked on sparc64-linux-gnu.
---
 sysdeps/unix/sysv/linux/alpha/getsysstats.c | 38 ---------------------
 sysdeps/unix/sysv/linux/getsysstats.c       | 22 ++----------
 sysdeps/unix/sysv/linux/sparc/getsysstats.c | 38 ---------------------
 3 files changed, 3 insertions(+), 95 deletions(-)
 delete mode 100644 sysdeps/unix/sysv/linux/alpha/getsysstats.c
 delete mode 100644 sysdeps/unix/sysv/linux/sparc/getsysstats.c

diff --git a/sysdeps/unix/sysv/linux/alpha/getsysstats.c b/sysdeps/unix/sysv/linux/alpha/getsysstats.c
deleted file mode 100644
index 098575faef..0000000000
--- a/sysdeps/unix/sysv/linux/alpha/getsysstats.c
+++ /dev/null
@@ -1,38 +0,0 @@
-/* Determine various system internal values, Linux/Alpha version.
-   Copyright (C) 1999-2021 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-   Contributed by Andreas Schwab <schwab@suse.de>
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library.  If not, see
-   <https://www.gnu.org/licenses/>.  */
-
-
-/* On the Alpha we can distinguish between the number of configured and
-   active cpus.  */
-#define GET_NPROCS_CONF_PARSER(FP, BUFFER, RESULT)			   \
-  do									   \
-    {									   \
-      /* Find the line that contains the information about the number of   \
-	 probed cpus.  We don't have to fear extremely long lines since	   \
-	 the kernel will not generate them.  8192 bytes are really enough. \
-	 If there is no "CPUs ..." line then we are on a UP system.  */	   \
-      (RESULT) = 1;							   \
-      while (__fgets_unlocked ((BUFFER), sizeof (BUFFER), (FP)) != NULL)   \
-	if ((sscanf (buffer, "cpus detected : %d", &(RESULT)) == 1)	   \
-	    || (sscanf (buffer, "CPUs probed %d", &(RESULT)) == 1))	   \
-	  break;							   \
-    }									   \
-  while (0)
-
-#include <sysdeps/unix/sysv/linux/getsysstats.c>
diff --git a/sysdeps/unix/sysv/linux/getsysstats.c b/sysdeps/unix/sysv/linux/getsysstats.c
index 5069951246..0791399b8f 100644
--- a/sysdeps/unix/sysv/linux/getsysstats.c
+++ b/sysdeps/unix/sysv/linux/getsysstats.c
@@ -64,13 +64,13 @@ weak_alias (__get_nprocs, get_nprocs)
 int
 __get_nprocs_conf (void)
 {
-  int result = 1;
-
   /* Try to use the sysfs filesystem.  It has actual information about
      online processors.  */
   int fd = __open64_nocancel ("/sys/devices/system/cpu/possible", O_RDONLY);
   if (fd != -1)
     {
+      int result = 0;
+
       /* The entry is in the form of '[cpuX]-[cpuY]'.  */
       char buf[2 * INT_STRLEN_BOUND (unsigned int) + 1];
 
@@ -97,23 +97,7 @@ __get_nprocs_conf (void)
       return result + 1;
     }
 
-#ifdef GET_NPROCS_CONF_PARSER
-  /* If we haven't found an appropriate entry return 1.  */
-  FILE *fp = fopen ("/proc/cpuinfo", "rce");
-  if (fp != NULL)
-    {
-      char buffer[8192];
-
-      /* No threads use this stream.  */
-      __fsetlocking (fp, FSETLOCKING_BYCALLER);
-      GET_NPROCS_CONF_PARSER (fp, buffer, result);
-      fclose (fp);
-    }
-#else
-  result = __get_nprocs ();
-#endif
-
-  return result;
+  return __get_nprocs ();
 }
 libc_hidden_def (__get_nprocs_conf)
 weak_alias (__get_nprocs_conf, get_nprocs_conf)
diff --git a/sysdeps/unix/sysv/linux/sparc/getsysstats.c b/sysdeps/unix/sysv/linux/sparc/getsysstats.c
deleted file mode 100644
index a8adba8321..0000000000
--- a/sysdeps/unix/sysv/linux/sparc/getsysstats.c
+++ /dev/null
@@ -1,38 +0,0 @@
-/* Determine various system internal values, Linux/Sparc version.
-   Copyright (C) 1999-2021 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-   Contributed by Andreas Schwab <schwab@suse.de> and
-		  Jakub Jelinek <jj@ultra.linux.cz>
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <https://www.gnu.org/licenses/>.  */
-
-
-/* On the Sparc we can distinguish between the number of configured and
-   active cpus.  */
-#define GET_NPROCS_CONF_PARSER(FP, BUFFER, RESULT)			 \
-  do									 \
-    {									 \
-      (RESULT) = 0;							 \
-      /* Find the line that contains the information about the number of \
-	 probed cpus.  We don't have to fear extremely long lines since	 \
-	 the kernel will not generate them.  8192 bytes are really	 \
-	 enough.  */							 \
-      while (__fgets_unlocked ((BUFFER), sizeof (BUFFER), (FP)) != NULL) \
-	if (sscanf (buffer, "ncpus probed : %d", &(RESULT)) == 1)	 \
-	  break;							 \
-    }									 \
-  while (0)
-
-#include <sysdeps/unix/sysv/linux/getsysstats.c>
-- 
2.27.0


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 2/4] linux: Use sched_getaffinity for __get_nprocs (BZ #27645)
  2021-03-29 18:25 ` [PATCH 2/4] linux: Use sched_getaffinity for __get_nprocs (BZ #27645) Adhemerval Zanella
@ 2021-04-27 15:38   ` Florian Weimer
  0 siblings, 0 replies; 28+ messages in thread
From: Florian Weimer @ 2021-04-27 15:38 UTC (permalink / raw)
  To: Adhemerval Zanella via Libc-alpha; +Cc: Adhemerval Zanella, crrodriguez

* Adhemerval Zanella via Libc-alpha:

> Both the sysfs and procfs parsing (through GET_NPROCS_PARSER) are
> removed in favor the syscall.  The initial scratch buffer should
> fit to most of the common usage (1024 bytes with maps to 8192 CPUs).

The code changes look okay to me.

I don't know if it is safe to ever return 1, though, because some
container hosts return just one bit in the affinity mask, but will still
schedule threads on different CPUs, so the single-processor
synchronization that is normally implied by a CPU count of 1 isn't
guaranteed.

This isn't an issue with the old approach because containers hosts
generally do not virtualize those /sys and /proc files.

I suppose it's okay to make this change and maybe add a NEWS entry that
recommends using at least 2 CPUs for containers due to this change.

Thanks,
Florian


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 1/4] Remove architecture specific sched_cpucount optimizations
  2021-03-29 18:25 [PATCH 1/4] Remove architecture specific sched_cpucount optimizations Adhemerval Zanella
                   ` (2 preceding siblings ...)
  2021-03-29 18:25 ` [PATCH 4/4] linux: Remove /proc/cpuinfo fallback on alpha and sparc Adhemerval Zanella
@ 2021-05-05 16:52 ` Adhemerval Zanella
  2021-05-05 17:28 ` Florian Weimer
  4 siblings, 0 replies; 28+ messages in thread
From: Adhemerval Zanella @ 2021-05-05 16:52 UTC (permalink / raw)
  To: libc-alpha

Ping.

On 29/03/2021 15:25, Adhemerval Zanella wrote:
> And replace the generic algorithm with the Brian Kernighan’s one.
> GCC optimize it with popcnt if the architecture supports, so there
> is no need to add the extra POPCNT define to enable it.
> 
> This is really a micro-optimization that only adds complexity:
> recent ABIs already support it (x86-64-v2 or power64le) and it
> simplifies the code for internal usage, since i686 does not allow an
> internal iFUNC call.
> 
> Checked on x86_64-linux-gnu, aarch64-linux-gnu, and
> powerpc64le-linux-gnu.
> ---
>  posix/sched_cpucount.c                       | 28 +++------------
>  sysdeps/i386/i686/multiarch/sched_cpucount.c |  1 -
>  sysdeps/ia64/sched_cpucount.c                | 20 -----------
>  sysdeps/powerpc/sched_cpucount.c             | 22 ------------
>  sysdeps/x86_64/multiarch/sched_cpucount.c    | 36 --------------------
>  sysdeps/x86_64/sched_cpucount.c              | 25 --------------
>  6 files changed, 4 insertions(+), 128 deletions(-)
>  delete mode 100644 sysdeps/i386/i686/multiarch/sched_cpucount.c
>  delete mode 100644 sysdeps/ia64/sched_cpucount.c
>  delete mode 100644 sysdeps/powerpc/sched_cpucount.c
>  delete mode 100644 sysdeps/x86_64/multiarch/sched_cpucount.c
>  delete mode 100644 sysdeps/x86_64/sched_cpucount.c
> 
> diff --git a/posix/sched_cpucount.c b/posix/sched_cpucount.c
> index b0ca4ea7bc..529286e777 100644
> --- a/posix/sched_cpucount.c
> +++ b/posix/sched_cpucount.c
> @@ -22,31 +22,11 @@ int
>  __sched_cpucount (size_t setsize, const cpu_set_t *setp)
>  {
>    int s = 0;
> -  const __cpu_mask *p = setp->__bits;
> -  const __cpu_mask *end = &setp->__bits[setsize / sizeof (__cpu_mask)];
> -
> -  while (p < end)
> +  for (int i = 0; i < setsize / sizeof (__cpu_mask); i++)
>      {
> -      __cpu_mask l = *p++;
> -
> -#ifdef POPCNT
> -      s += POPCNT (l);
> -#else
> -      if (l == 0)
> -	continue;
> -
> -      _Static_assert (sizeof (l) == sizeof (unsigned int)
> -		      || sizeof (l) == sizeof (unsigned long)
> -		      || sizeof (l) == sizeof (unsigned long long),
> -		      "sizeof (__cpu_mask");
> -      if (sizeof (__cpu_mask) == sizeof (unsigned int))
> -	s += __builtin_popcount (l);
> -      else if (sizeof (__cpu_mask) == sizeof (unsigned long))
> -	s += __builtin_popcountl (l);
> -      else
> -	s += __builtin_popcountll (l);
> -#endif
> +      __cpu_mask si = setp->__bits[i];
> +      /* Clear the least significant bit set.  */
> +      for (; si != 0; si &= si - 1, s++);
>      }
> -
>    return s;
>  }
> diff --git a/sysdeps/i386/i686/multiarch/sched_cpucount.c b/sysdeps/i386/i686/multiarch/sched_cpucount.c
> deleted file mode 100644
> index 7db31b02f8..0000000000
> --- a/sysdeps/i386/i686/multiarch/sched_cpucount.c
> +++ /dev/null
> @@ -1 +0,0 @@
> -#include <sysdeps/x86_64/multiarch/sched_cpucount.c>
> diff --git a/sysdeps/ia64/sched_cpucount.c b/sysdeps/ia64/sched_cpucount.c
> deleted file mode 100644
> index 8440864b02..0000000000
> --- a/sysdeps/ia64/sched_cpucount.c
> +++ /dev/null
> @@ -1,20 +0,0 @@
> -/* Copyright (C) 2007-2021 Free Software Foundation, Inc.
> -   This file is part of the GNU C Library.
> -
> -   The GNU C Library is free software; you can redistribute it and/or
> -   modify it under the terms of the GNU Lesser General Public
> -   License as published by the Free Software Foundation; either
> -   version 2.1 of the License, or (at your option) any later version.
> -
> -   The GNU C Library is distributed in the hope that it will be useful,
> -   but WITHOUT ANY WARRANTY; without even the implied warranty of
> -   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> -   Lesser General Public License for more details.
> -
> -   You should have received a copy of the GNU Lesser General Public
> -   License along with the GNU C Library; if not, see
> -   <https://www.gnu.org/licenses/>.  */
> -
> -#define POPCNT(l) __builtin_popcountl (l)
> -
> -#include <posix/sched_cpucount.c>
> diff --git a/sysdeps/powerpc/sched_cpucount.c b/sysdeps/powerpc/sched_cpucount.c
> deleted file mode 100644
> index 8f00e3dbc8..0000000000
> --- a/sysdeps/powerpc/sched_cpucount.c
> +++ /dev/null
> @@ -1,22 +0,0 @@
> -/* Copyright (C) 2007-2021 Free Software Foundation, Inc.
> -   This file is part of the GNU C Library.
> -
> -   The GNU C Library is free software; you can redistribute it and/or
> -   modify it under the terms of the GNU Lesser General Public
> -   License as published by the Free Software Foundation; either
> -   version 2.1 of the License, or (at your option) any later version.
> -
> -   The GNU C Library is distributed in the hope that it will be useful,
> -   but WITHOUT ANY WARRANTY; without even the implied warranty of
> -   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> -   Lesser General Public License for more details.
> -
> -   You should have received a copy of the GNU Lesser General Public
> -   License along with the GNU C Library; if not, see
> -   <https://www.gnu.org/licenses/>.  */
> -
> -#ifdef _ARCH_PWR5
> -# define POPCNT(l) __builtin_popcountl (l)
> -#endif
> -
> -#include <posix/sched_cpucount.c>
> diff --git a/sysdeps/x86_64/multiarch/sched_cpucount.c b/sysdeps/x86_64/multiarch/sched_cpucount.c
> deleted file mode 100644
> index 5180a11434..0000000000
> --- a/sysdeps/x86_64/multiarch/sched_cpucount.c
> +++ /dev/null
> @@ -1,36 +0,0 @@
> -/* Count bits in CPU set.  x86-64 multi-arch version.
> -   This file is part of the GNU C Library.
> -   Copyright (C) 2008-2021 Free Software Foundation, Inc.
> -   Contributed by Ulrich Drepper <drepper@redhat.com>.
> -
> -   The GNU C Library is free software; you can redistribute it and/or
> -   modify it under the terms of the GNU Lesser General Public
> -   License as published by the Free Software Foundation; either
> -   version 2.1 of the License, or (at your option) any later version.
> -
> -   The GNU C Library is distributed in the hope that it will be useful,
> -   but WITHOUT ANY WARRANTY; without even the implied warranty of
> -   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> -   Lesser General Public License for more details.
> -
> -   You should have received a copy of the GNU Lesser General Public
> -   License along with the GNU C Library; if not, see
> -   <https://www.gnu.org/licenses/>.  */
> -
> -#include <sched.h>
> -#include "init-arch.h"
> -
> -#define __sched_cpucount static generic_cpucount
> -#include <posix/sched_cpucount.c>
> -#undef __sched_cpucount
> -
> -#define POPCNT(l) \
> -  ({ __cpu_mask r; \
> -     asm ("popcnt %1, %0" : "=r" (r) : "0" (l));\
> -     r; })
> -#define __sched_cpucount static popcount_cpucount
> -#include <posix/sched_cpucount.c>
> -#undef __sched_cpucount
> -
> -libc_ifunc (__sched_cpucount,
> -	    CPU_FEATURE_USABLE (POPCNT) ? popcount_cpucount : generic_cpucount);
> diff --git a/sysdeps/x86_64/sched_cpucount.c b/sysdeps/x86_64/sched_cpucount.c
> deleted file mode 100644
> index 5a27336d6d..0000000000
> --- a/sysdeps/x86_64/sched_cpucount.c
> +++ /dev/null
> @@ -1,25 +0,0 @@
> -/* Copyright (C) 2007-2021 Free Software Foundation, Inc.
> -   This file is part of the GNU C Library.
> -
> -   The GNU C Library is free software; you can redistribute it and/or
> -   modify it under the terms of the GNU Lesser General Public
> -   License as published by the Free Software Foundation; either
> -   version 2.1 of the License, or (at your option) any later version.
> -
> -   The GNU C Library is distributed in the hope that it will be useful,
> -   but WITHOUT ANY WARRANTY; without even the implied warranty of
> -   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> -   Lesser General Public License for more details.
> -
> -   You should have received a copy of the GNU Lesser General Public
> -   License along with the GNU C Library; if not, see
> -   <https://www.gnu.org/licenses/>.  */
> -
> -#ifdef __amdfam10
> -# define POPCNT(l) \
> -  ({ __cpu_mask r;							      \
> -     asm ("popcntq %1, %0" : "=r" (r) : "0" (l));			      \
> -     r; })
> -#endif
> -
> -#include <posix/sched_cpucount.c>
> 

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 3/4] linux: Use /sys/devices/system/cpu/possible on __get_nprocs_conf
  2021-03-29 18:25 ` [PATCH 3/4] linux: Use /sys/devices/system/cpu/possible on __get_nprocs_conf Adhemerval Zanella
@ 2021-05-05 16:53   ` Adhemerval Zanella
  2021-05-05 17:54   ` Florian Weimer
  1 sibling, 0 replies; 28+ messages in thread
From: Adhemerval Zanella @ 2021-05-05 16:53 UTC (permalink / raw)
  To: libc-alpha

Ping.

On 29/03/2021 15:25, Adhemerval Zanella wrote:
> Instead of iterate over all the cpu in the sysfs folder.  It consumes
> slight less resources on large system (which might require extra
> getdents call) and memory (a limited stack buffer instead a large
> malloced one for opendir).
> 
> Checked on x86_64-linux-gnu, aarch64-linux-gnu, and
> powerpc64le-linux-gnu.
> ---
>  sysdeps/unix/sysv/linux/getsysstats.c | 51 +++++++++++++++------------
>  1 file changed, 29 insertions(+), 22 deletions(-)
> 
> diff --git a/sysdeps/unix/sysv/linux/getsysstats.c b/sysdeps/unix/sysv/linux/getsysstats.c
> index f8a4a31d1b..5069951246 100644
> --- a/sysdeps/unix/sysv/linux/getsysstats.c
> +++ b/sysdeps/unix/sysv/linux/getsysstats.c
> @@ -17,7 +17,8 @@
>     License along with the GNU C Library; if not, see
>     <https://www.gnu.org/licenses/>.  */
>  
> -#include <dirent.h>
> +#include <ctype.h>
> +#include <intprops.h>
>  #include <not-cancel.h>
>  #include <scratch_buffer.h>
>  #include <stdio.h>
> @@ -63,33 +64,39 @@ weak_alias (__get_nprocs, get_nprocs)
>  int
>  __get_nprocs_conf (void)
>  {
> -  /* XXX Here will come a test for the new system call.  */
> +  int result = 1;
>  
>    /* Try to use the sysfs filesystem.  It has actual information about
>       online processors.  */
> -  DIR *dir = __opendir ("/sys/devices/system/cpu");
> -  if (dir != NULL)
> +  int fd = __open64_nocancel ("/sys/devices/system/cpu/possible", O_RDONLY);
> +  if (fd != -1)
>      {
> -      int count = 0;
> -      struct dirent64 *d;
> -
> -      while ((d = __readdir64 (dir)) != NULL)
> -	/* NB: the sysfs has d_type support.  */
> -	if (d->d_type == DT_DIR && strncmp (d->d_name, "cpu", 3) == 0)
> -	  {
> -	    char *endp;
> -	    unsigned long int nr = strtoul (d->d_name + 3, &endp, 10);
> -	    if (nr != ULONG_MAX && endp != d->d_name + 3 && *endp == '\0')
> -	      ++count;
> -	  }
> -
> -      __closedir (dir);
> -
> -      return count;
> +      /* The entry is in the form of '[cpuX]-[cpuY]'.  */
> +      char buf[2 * INT_STRLEN_BOUND (unsigned int) + 1];
> +
> +      ssize_t n = __read_nocancel (fd, buf, sizeof (buf));
> +      if (n > 0)
> +	{
> +	  buf[n] = '\0';
> +
> +	  /* Start on the right, to find highest node number.  */
> +	  int m = 1;
> +	  while (--n)
> +	    {
> +	      if ((buf[n] == ',') || (buf[n] == '-'))
> +		break;
> +	      /* Ignore '\n'  */
> +	      if (! isdigit (buf[n]))
> +		continue;
> +	      result += (buf[n] - '0') * m;
> +	      m *= 10;
> +	    }
> +	}
> +
> +      __close_nocancel (fd);
> +      return result + 1;
>      }
>  
> -  int result = 1;
> -
>  #ifdef GET_NPROCS_CONF_PARSER
>    /* If we haven't found an appropriate entry return 1.  */
>    FILE *fp = fopen ("/proc/cpuinfo", "rce");
> 

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 4/4] linux: Remove /proc/cpuinfo fallback on alpha and sparc
  2021-03-29 18:25 ` [PATCH 4/4] linux: Remove /proc/cpuinfo fallback on alpha and sparc Adhemerval Zanella
@ 2021-05-05 16:53   ` Adhemerval Zanella
  0 siblings, 0 replies; 28+ messages in thread
From: Adhemerval Zanella @ 2021-05-05 16:53 UTC (permalink / raw)
  To: libc-alpha

Ping.

On 29/03/2021 15:25, Adhemerval Zanella wrote:
> There is no much gain in fallback to cpuinfo if sysfs is no present,
> usually on restricted environment neither will be present.  It also
> simplifies the code and make all architecture use the sched_getaffinity
> as the sysfs fallback.
> 
> Checked on sparc64-linux-gnu.
> ---
>  sysdeps/unix/sysv/linux/alpha/getsysstats.c | 38 ---------------------
>  sysdeps/unix/sysv/linux/getsysstats.c       | 22 ++----------
>  sysdeps/unix/sysv/linux/sparc/getsysstats.c | 38 ---------------------
>  3 files changed, 3 insertions(+), 95 deletions(-)
>  delete mode 100644 sysdeps/unix/sysv/linux/alpha/getsysstats.c
>  delete mode 100644 sysdeps/unix/sysv/linux/sparc/getsysstats.c
> 
> diff --git a/sysdeps/unix/sysv/linux/alpha/getsysstats.c b/sysdeps/unix/sysv/linux/alpha/getsysstats.c
> deleted file mode 100644
> index 098575faef..0000000000
> --- a/sysdeps/unix/sysv/linux/alpha/getsysstats.c
> +++ /dev/null
> @@ -1,38 +0,0 @@
> -/* Determine various system internal values, Linux/Alpha version.
> -   Copyright (C) 1999-2021 Free Software Foundation, Inc.
> -   This file is part of the GNU C Library.
> -   Contributed by Andreas Schwab <schwab@suse.de>
> -
> -   The GNU C Library is free software; you can redistribute it and/or
> -   modify it under the terms of the GNU Lesser General Public
> -   License as published by the Free Software Foundation; either
> -   version 2.1 of the License, or (at your option) any later version.
> -
> -   The GNU C Library is distributed in the hope that it will be useful,
> -   but WITHOUT ANY WARRANTY; without even the implied warranty of
> -   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> -   Lesser General Public License for more details.
> -
> -   You should have received a copy of the GNU Lesser General Public
> -   License along with the GNU C Library.  If not, see
> -   <https://www.gnu.org/licenses/>.  */
> -
> -
> -/* On the Alpha we can distinguish between the number of configured and
> -   active cpus.  */
> -#define GET_NPROCS_CONF_PARSER(FP, BUFFER, RESULT)			   \
> -  do									   \
> -    {									   \
> -      /* Find the line that contains the information about the number of   \
> -	 probed cpus.  We don't have to fear extremely long lines since	   \
> -	 the kernel will not generate them.  8192 bytes are really enough. \
> -	 If there is no "CPUs ..." line then we are on a UP system.  */	   \
> -      (RESULT) = 1;							   \
> -      while (__fgets_unlocked ((BUFFER), sizeof (BUFFER), (FP)) != NULL)   \
> -	if ((sscanf (buffer, "cpus detected : %d", &(RESULT)) == 1)	   \
> -	    || (sscanf (buffer, "CPUs probed %d", &(RESULT)) == 1))	   \
> -	  break;							   \
> -    }									   \
> -  while (0)
> -
> -#include <sysdeps/unix/sysv/linux/getsysstats.c>
> diff --git a/sysdeps/unix/sysv/linux/getsysstats.c b/sysdeps/unix/sysv/linux/getsysstats.c
> index 5069951246..0791399b8f 100644
> --- a/sysdeps/unix/sysv/linux/getsysstats.c
> +++ b/sysdeps/unix/sysv/linux/getsysstats.c
> @@ -64,13 +64,13 @@ weak_alias (__get_nprocs, get_nprocs)
>  int
>  __get_nprocs_conf (void)
>  {
> -  int result = 1;
> -
>    /* Try to use the sysfs filesystem.  It has actual information about
>       online processors.  */
>    int fd = __open64_nocancel ("/sys/devices/system/cpu/possible", O_RDONLY);
>    if (fd != -1)
>      {
> +      int result = 0;
> +
>        /* The entry is in the form of '[cpuX]-[cpuY]'.  */
>        char buf[2 * INT_STRLEN_BOUND (unsigned int) + 1];
>  
> @@ -97,23 +97,7 @@ __get_nprocs_conf (void)
>        return result + 1;
>      }
>  
> -#ifdef GET_NPROCS_CONF_PARSER
> -  /* If we haven't found an appropriate entry return 1.  */
> -  FILE *fp = fopen ("/proc/cpuinfo", "rce");
> -  if (fp != NULL)
> -    {
> -      char buffer[8192];
> -
> -      /* No threads use this stream.  */
> -      __fsetlocking (fp, FSETLOCKING_BYCALLER);
> -      GET_NPROCS_CONF_PARSER (fp, buffer, result);
> -      fclose (fp);
> -    }
> -#else
> -  result = __get_nprocs ();
> -#endif
> -
> -  return result;
> +  return __get_nprocs ();
>  }
>  libc_hidden_def (__get_nprocs_conf)
>  weak_alias (__get_nprocs_conf, get_nprocs_conf)
> diff --git a/sysdeps/unix/sysv/linux/sparc/getsysstats.c b/sysdeps/unix/sysv/linux/sparc/getsysstats.c
> deleted file mode 100644
> index a8adba8321..0000000000
> --- a/sysdeps/unix/sysv/linux/sparc/getsysstats.c
> +++ /dev/null
> @@ -1,38 +0,0 @@
> -/* Determine various system internal values, Linux/Sparc version.
> -   Copyright (C) 1999-2021 Free Software Foundation, Inc.
> -   This file is part of the GNU C Library.
> -   Contributed by Andreas Schwab <schwab@suse.de> and
> -		  Jakub Jelinek <jj@ultra.linux.cz>
> -
> -   The GNU C Library is free software; you can redistribute it and/or
> -   modify it under the terms of the GNU Lesser General Public
> -   License as published by the Free Software Foundation; either
> -   version 2.1 of the License, or (at your option) any later version.
> -
> -   The GNU C Library is distributed in the hope that it will be useful,
> -   but WITHOUT ANY WARRANTY; without even the implied warranty of
> -   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> -   Lesser General Public License for more details.
> -
> -   You should have received a copy of the GNU Lesser General Public
> -   License along with the GNU C Library; if not, see
> -   <https://www.gnu.org/licenses/>.  */
> -
> -
> -/* On the Sparc we can distinguish between the number of configured and
> -   active cpus.  */
> -#define GET_NPROCS_CONF_PARSER(FP, BUFFER, RESULT)			 \
> -  do									 \
> -    {									 \
> -      (RESULT) = 0;							 \
> -      /* Find the line that contains the information about the number of \
> -	 probed cpus.  We don't have to fear extremely long lines since	 \
> -	 the kernel will not generate them.  8192 bytes are really	 \
> -	 enough.  */							 \
> -      while (__fgets_unlocked ((BUFFER), sizeof (BUFFER), (FP)) != NULL) \
> -	if (sscanf (buffer, "ncpus probed : %d", &(RESULT)) == 1)	 \
> -	  break;							 \
> -    }									 \
> -  while (0)
> -
> -#include <sysdeps/unix/sysv/linux/getsysstats.c>
> 

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 1/4] Remove architecture specific sched_cpucount optimizations
  2021-03-29 18:25 [PATCH 1/4] Remove architecture specific sched_cpucount optimizations Adhemerval Zanella
                   ` (3 preceding siblings ...)
  2021-05-05 16:52 ` [PATCH 1/4] Remove architecture specific sched_cpucount optimizations Adhemerval Zanella
@ 2021-05-05 17:28 ` Florian Weimer
  2021-05-05 18:25   ` Paul Eggert
  4 siblings, 1 reply; 28+ messages in thread
From: Florian Weimer @ 2021-05-05 17:28 UTC (permalink / raw)
  To: Adhemerval Zanella via Libc-alpha; +Cc: Adhemerval Zanella, crrodriguez

* Adhemerval Zanella via Libc-alpha:

> diff --git a/posix/sched_cpucount.c b/posix/sched_cpucount.c
> index b0ca4ea7bc..529286e777 100644
> --- a/posix/sched_cpucount.c
> +++ b/posix/sched_cpucount.c
> @@ -22,31 +22,11 @@ int
>  __sched_cpucount (size_t setsize, const cpu_set_t *setp)
>  {
>    int s = 0;
> +  for (int i = 0; i < setsize / sizeof (__cpu_mask); i++)
>      {
> +      __cpu_mask si = setp->__bits[i];
> +      /* Clear the least significant bit set.  */
> +      for (; si != 0; si &= si - 1, s++);
>      }
> -
>    return s;
>  }

Why “si”?  It think si &= si - 1 clears the *most* significant bit in
si.  If you agree, please update the comment.

The rest looks okay to me.

Thanks,
Florian


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 3/4] linux: Use /sys/devices/system/cpu/possible on __get_nprocs_conf
  2021-03-29 18:25 ` [PATCH 3/4] linux: Use /sys/devices/system/cpu/possible on __get_nprocs_conf Adhemerval Zanella
  2021-05-05 16:53   ` Adhemerval Zanella
@ 2021-05-05 17:54   ` Florian Weimer
  2021-05-05 18:06     ` Florian Weimer
  2021-05-06 13:17     ` Adhemerval Zanella
  1 sibling, 2 replies; 28+ messages in thread
From: Florian Weimer @ 2021-05-05 17:54 UTC (permalink / raw)
  To: Adhemerval Zanella via Libc-alpha; +Cc: Adhemerval Zanella, crrodriguez

* Adhemerval Zanella via Libc-alpha:

> +      /* The entry is in the form of '[cpuX]-[cpuY]'.  */
> +      char buf[2 * INT_STRLEN_BOUND (unsigned int) + 1];
> +
> +      ssize_t n = __read_nocancel (fd, buf, sizeof (buf));
> +      if (n > 0)
> +	{
> +	  buf[n] = '\0';
> +
> +	  /* Start on the right, to find highest node number.  */
> +	  int m = 1;
> +	  while (--n)
> +	    {
> +	      if ((buf[n] == ',') || (buf[n] == '-'))
> +		break;
> +	      /* Ignore '\n'  */
> +	      if (! isdigit (buf[n]))
> +		continue;
> +	      result += (buf[n] - '0') * m;
> +	      m *= 10;
> +	    }
> +	}
> +
> +      __close_nocancel (fd);
> +      return result + 1;
>      }

I think the /online and /possible files have the same layout, so you
could use both.

Is there a way to tell whether the two data sources (/possible and the
count of the cpu%d directory entries) actually agree?  I tried to make
sense of the kernel sources but didn't succeed.

Thanks,
Florian


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 3/4] linux: Use /sys/devices/system/cpu/possible on __get_nprocs_conf
  2021-05-05 17:54   ` Florian Weimer
@ 2021-05-05 18:06     ` Florian Weimer
  2021-05-06 13:03       ` Adhemerval Zanella
  2021-05-06 13:17     ` Adhemerval Zanella
  1 sibling, 1 reply; 28+ messages in thread
From: Florian Weimer @ 2021-05-05 18:06 UTC (permalink / raw)
  To: Adhemerval Zanella via Libc-alpha; +Cc: Adhemerval Zanella, crrodriguez

* Florian Weimer:

> * Adhemerval Zanella via Libc-alpha:
>
>> +      /* The entry is in the form of '[cpuX]-[cpuY]'.  */
>> +      char buf[2 * INT_STRLEN_BOUND (unsigned int) + 1];
>> +
>> +      ssize_t n = __read_nocancel (fd, buf, sizeof (buf));
>> +      if (n > 0)
>> +	{
>> +	  buf[n] = '\0';
>> +
>> +	  /* Start on the right, to find highest node number.  */
>> +	  int m = 1;
>> +	  while (--n)
>> +	    {
>> +	      if ((buf[n] == ',') || (buf[n] == '-'))
>> +		break;
>> +	      /* Ignore '\n'  */
>> +	      if (! isdigit (buf[n]))
>> +		continue;
>> +	      result += (buf[n] - '0') * m;
>> +	      m *= 10;
>> +	    }
>> +	}
>> +
>> +      __close_nocancel (fd);
>> +      return result + 1;
>>      }
>
> I think the /online and /possible files have the same layout, so you
> could use both.

Sorry, I meant to write: “so you could use *one parser for* both”

Florian


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 1/4] Remove architecture specific sched_cpucount optimizations
  2021-05-05 17:28 ` Florian Weimer
@ 2021-05-05 18:25   ` Paul Eggert
  2021-05-05 19:52     ` Noah Goldstein
  2021-05-06 13:33     ` Adhemerval Zanella
  0 siblings, 2 replies; 28+ messages in thread
From: Paul Eggert @ 2021-05-05 18:25 UTC (permalink / raw)
  To: Florian Weimer, Adhemerval Zanella via Libc-alpha; +Cc: crrodriguez

On 5/5/21 10:28 AM, Florian Weimer via Libc-alpha wrote:
>> diff --git a/posix/sched_cpucount.c b/posix/sched_cpucount.c
>> index b0ca4ea7bc..529286e777 100644
>> --- a/posix/sched_cpucount.c
>> +++ b/posix/sched_cpucount.c
>> @@ -22,31 +22,11 @@ int
>>   __sched_cpucount (size_t setsize, const cpu_set_t *setp)
>>   {
>>     int s = 0;
>> +  for (int i = 0; i < setsize / sizeof (__cpu_mask); i++)
>>       {
>> +      __cpu_mask si = setp->__bits[i];
>> +      /* Clear the least significant bit set.  */
>> +      for (; si != 0; si &= si - 1, s++);
>>       }
>> -
>>     return s;
>>   }
> Why “si”?  It think si &= si - 1 clears the*most*  significant bit in
> si.  If you agree, please update the comment.

Better yet, define a static function 'popcount' that uses Kernighan's 
trick and call that function. As things stand it's not obvious what the 
code is doing, regardless of which bit it's clearing. The function's 
comment should explain why it's not using __builtin_popcount.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 1/4] Remove architecture specific sched_cpucount optimizations
  2021-05-05 18:25   ` Paul Eggert
@ 2021-05-05 19:52     ` Noah Goldstein
  2021-05-06 12:22       ` Adhemerval Zanella
  2021-05-06 13:33     ` Adhemerval Zanella
  1 sibling, 1 reply; 28+ messages in thread
From: Noah Goldstein @ 2021-05-05 19:52 UTC (permalink / raw)
  To: Paul Eggert
  Cc: Florian Weimer, Adhemerval Zanella via Libc-alpha, crrodriguez

On Wed, May 5, 2021 at 12:32 PM Paul Eggert <eggert@cs.ucla.edu> wrote:
>
> On 5/5/21 10:28 AM, Florian Weimer via Libc-alpha wrote:
> >> diff --git a/posix/sched_cpucount.c b/posix/sched_cpucount.c
> >> index b0ca4ea7bc..529286e777 100644
> >> --- a/posix/sched_cpucount.c
> >> +++ b/posix/sched_cpucount.c
> >> @@ -22,31 +22,11 @@ int
> >>   __sched_cpucount (size_t setsize, const cpu_set_t *setp)
> >>   {
> >>     int s = 0;
> >> +  for (int i = 0; i < setsize / sizeof (__cpu_mask); i++)
> >>       {
> >> +      __cpu_mask si = setp->__bits[i];
> >> +      /* Clear the least significant bit set.  */
> >> +      for (; si != 0; si &= si - 1, s++);
> >>       }
> >> -
> >>     return s;
> >>   }
> > Why “si”?  It think si &= si - 1 clears the*most*  significant bit in
> > si.  If you agree, please update the comment.
>
> Better yet, define a static function 'popcount' that uses Kernighan's
> trick and call that function. As things stand it's not obvious what the
> code is doing, regardless of which bit it's clearing. The function's
> comment should explain why it's not using __builtin_popcount.

It looks to me like GCC is still having a bit of trouble with the new
implementation. With skylake as a target it seems to be throwing
in a cmovcc in the outer loop:

https://godbolt.org/z/ocn1KWGPx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 1/4] Remove architecture specific sched_cpucount optimizations
  2021-05-05 19:52     ` Noah Goldstein
@ 2021-05-06 12:22       ` Adhemerval Zanella
  2021-05-06 18:34         ` Noah Goldstein
  0 siblings, 1 reply; 28+ messages in thread
From: Adhemerval Zanella @ 2021-05-06 12:22 UTC (permalink / raw)
  To: Noah Goldstein, Paul Eggert
  Cc: Florian Weimer, crrodriguez, Adhemerval Zanella via Libc-alpha



On 05/05/2021 16:52, Noah Goldstein via Libc-alpha wrote:
> On Wed, May 5, 2021 at 12:32 PM Paul Eggert <eggert@cs.ucla.edu> wrote:
>>
>> On 5/5/21 10:28 AM, Florian Weimer via Libc-alpha wrote:
>>>> diff --git a/posix/sched_cpucount.c b/posix/sched_cpucount.c
>>>> index b0ca4ea7bc..529286e777 100644
>>>> --- a/posix/sched_cpucount.c
>>>> +++ b/posix/sched_cpucount.c
>>>> @@ -22,31 +22,11 @@ int
>>>>   __sched_cpucount (size_t setsize, const cpu_set_t *setp)
>>>>   {
>>>>     int s = 0;
>>>> +  for (int i = 0; i < setsize / sizeof (__cpu_mask); i++)
>>>>       {
>>>> +      __cpu_mask si = setp->__bits[i];
>>>> +      /* Clear the least significant bit set.  */
>>>> +      for (; si != 0; si &= si - 1, s++);
>>>>       }
>>>> -
>>>>     return s;
>>>>   }
>>> Why “si”?  It think si &= si - 1 clears the*most*  significant bit in
>>> si.  If you agree, please update the comment.
>>
>> Better yet, define a static function 'popcount' that uses Kernighan's
>> trick and call that function. As things stand it's not obvious what the
>> code is doing, regardless of which bit it's clearing. The function's
>> comment should explain why it's not using __builtin_popcount.
> 
> It looks to me like GCC is still having a bit of trouble with the new
> implementation. With skylake as a target it seems to be throwing
> in a cmovcc in the outer loop:
> 
> https://godbolt.org/z/ocn1KWGPx
> 

It seems that come from using a signed int as the counter, I got a slight
better version using unsigned: https://godbolt.org/z/4MrMq1zKb

It is not as better as the builtin, but do we really need that
micro-optimization in this specific implementation? This is exactly what 
added the current complexity and using a open-coded popcount implementation
is slight better on architectures that do not provided such instruction
(where builtin_popcount will issue a call to __popcountsi2 and gcc seems
to aiming for open code such scenarios [1]).

In any case I would go for simplicity (and the builtin requires know the
underlying type size, which requires the ugly size of check to call the
correct one).

[1] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92135

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 3/4] linux: Use /sys/devices/system/cpu/possible on __get_nprocs_conf
  2021-05-05 18:06     ` Florian Weimer
@ 2021-05-06 13:03       ` Adhemerval Zanella
  2021-05-06 13:51         ` Florian Weimer
  0 siblings, 1 reply; 28+ messages in thread
From: Adhemerval Zanella @ 2021-05-06 13:03 UTC (permalink / raw)
  To: Florian Weimer, Adhemerval Zanella via Libc-alpha; +Cc: crrodriguez



On 05/05/2021 15:06, Florian Weimer wrote:
> * Florian Weimer:
> 
>> * Adhemerval Zanella via Libc-alpha:
>>
>>> +      /* The entry is in the form of '[cpuX]-[cpuY]'.  */
>>> +      char buf[2 * INT_STRLEN_BOUND (unsigned int) + 1];
>>> +
>>> +      ssize_t n = __read_nocancel (fd, buf, sizeof (buf));
>>> +      if (n > 0)
>>> +	{
>>> +	  buf[n] = '\0';
>>> +
>>> +	  /* Start on the right, to find highest node number.  */
>>> +	  int m = 1;
>>> +	  while (--n)
>>> +	    {
>>> +	      if ((buf[n] == ',') || (buf[n] == '-'))
>>> +		break;
>>> +	      /* Ignore '\n'  */
>>> +	      if (! isdigit (buf[n]))
>>> +		continue;
>>> +	      result += (buf[n] - '0') * m;
>>> +	      m *= 10;
>>> +	    }
>>> +	}
>>> +
>>> +      __close_nocancel (fd);
>>> +      return result + 1;
>>>      }
>>
>> I think the /online and /possible files have the same layout, so you
>> could use both.
> 
> Sorry, I meant to write: “so you could use *one parser for* both”

I am not following, the second patch in this set *removed* the 
/sys/devices/system/cpu/online parsing in favor of the sched_getaffinity.
So now it only parses /sys/devices/system/cpu/possible and fallbacks
to sched_getaffinity.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 3/4] linux: Use /sys/devices/system/cpu/possible on __get_nprocs_conf
  2021-05-05 17:54   ` Florian Weimer
  2021-05-05 18:06     ` Florian Weimer
@ 2021-05-06 13:17     ` Adhemerval Zanella
  1 sibling, 0 replies; 28+ messages in thread
From: Adhemerval Zanella @ 2021-05-06 13:17 UTC (permalink / raw)
  To: Florian Weimer, Adhemerval Zanella via Libc-alpha; +Cc: crrodriguez



On 05/05/2021 14:54, Florian Weimer wrote:
> * Adhemerval Zanella via Libc-alpha:
> 
>> +      /* The entry is in the form of '[cpuX]-[cpuY]'.  */
>> +      char buf[2 * INT_STRLEN_BOUND (unsigned int) + 1];
>> +
>> +      ssize_t n = __read_nocancel (fd, buf, sizeof (buf));
>> +      if (n > 0)
>> +	{
>> +	  buf[n] = '\0';
>> +
>> +	  /* Start on the right, to find highest node number.  */
>> +	  int m = 1;
>> +	  while (--n)
>> +	    {
>> +	      if ((buf[n] == ',') || (buf[n] == '-'))
>> +		break;
>> +	      /* Ignore '\n'  */
>> +	      if (! isdigit (buf[n]))
>> +		continue;
>> +	      result += (buf[n] - '0') * m;
>> +	      m *= 10;
>> +	    }
>> +	}
>> +
>> +      __close_nocancel (fd);
>> +      return result + 1;
>>      }
> 
> I think the /online and /possible files have the same layout, so you
> could use both.
> 
> Is there a way to tell whether the two data sources (/possible and the
> count of the cpu%d directory entries) actually agree?  I tried to make
> sense of the kernel sources but didn't succeed.

I am not sure, but at least the /possible is properly documented as 
'testing' (which should stable as indicated by own kernel documentation [1])
ABI [2]:

What:		/sys/devices/system/cpu/kernel_max
		/sys/devices/system/cpu/offline
		/sys/devices/system/cpu/online
		/sys/devices/system/cpu/possible
		/sys/devices/system/cpu/present
Date:		December 2008
Contact:	Linux kernel mailing list <linux-kernel@vger.kernel.org>
Description:	CPU topology files that describe kernel limits related to
		hotplug. Briefly:

		kernel_max: the maximum cpu index allowed by the kernel
		configuration.

		offline: cpus that are not online because they have been
		HOTPLUGGED off or exceed the limit of cpus allowed by the
		kernel configuration (kernel_max above).

		online: cpus that are online and being scheduled.

		possible: cpus that have been allocated resources and can be
		brought online if they are present.

So I think we don't need to certify that directories entries do match
/possible values.

[1] https://github.com/torvalds/linux/tree/master/Documentation/ABI
[2] https://github.com/torvalds/linux/blob/master/Documentation/ABI/testing/sysfs-devices-system-cpu

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 1/4] Remove architecture specific sched_cpucount optimizations
  2021-05-05 18:25   ` Paul Eggert
  2021-05-05 19:52     ` Noah Goldstein
@ 2021-05-06 13:33     ` Adhemerval Zanella
  2021-05-06 13:43       ` Florian Weimer
  2021-05-06 17:12       ` Paul Eggert
  1 sibling, 2 replies; 28+ messages in thread
From: Adhemerval Zanella @ 2021-05-06 13:33 UTC (permalink / raw)
  To: Paul Eggert, Florian Weimer, Adhemerval Zanella via Libc-alpha
  Cc: crrodriguez



On 05/05/2021 15:25, Paul Eggert wrote:
> On 5/5/21 10:28 AM, Florian Weimer via Libc-alpha wrote:
>>> diff --git a/posix/sched_cpucount.c b/posix/sched_cpucount.c
>>> index b0ca4ea7bc..529286e777 100644
>>> --- a/posix/sched_cpucount.c
>>> +++ b/posix/sched_cpucount.c
>>> @@ -22,31 +22,11 @@ int
>>>   __sched_cpucount (size_t setsize, const cpu_set_t *setp)
>>>   {
>>>     int s = 0;
>>> +  for (int i = 0; i < setsize / sizeof (__cpu_mask); i++)
>>>       {
>>> +      __cpu_mask si = setp->__bits[i];
>>> +      /* Clear the least significant bit set.  */
>>> +      for (; si != 0; si &= si - 1, s++);
>>>       }
>>> -
>>>     return s;
>>>   }
>> Why “si”?  It think si &= si - 1 clears the*most*  significant bit in
>> si.  If you agree, please update the comment.
> 
> Better yet, define a static function 'popcount' that uses Kernighan's trick and call that function. As things stand it's not obvious what the code is doing, regardless of which bit it's clearing. The function's comment should explain why it's not using __builtin_popcount.

What about the below:

diff --git a/posix/sched_cpucount.c b/posix/sched_cpucount.c
index b0ca4ea7bc..6cb5c4337e 100644
--- a/posix/sched_cpucount.c
+++ b/posix/sched_cpucount.c
@@ -17,36 +17,21 @@
 
 #include <sched.h>
 
+/* Counting bits set, Brian Kernighan's way  */
+static inline unsigned int
+countbits (__cpu_mask v)
+{
+  unsigned int s = 0;
+  for (; v != 0; s++)
+    v &= v - 1;
+  return s;
+}
 
 int
 __sched_cpucount (size_t setsize, const cpu_set_t *setp)
 {
   int s = 0;
-  const __cpu_mask *p = setp->__bits;
-  const __cpu_mask *end = &setp->__bits[setsize / sizeof (__cpu_mask)];
-
-  while (p < end)
-    {
-      __cpu_mask l = *p++;
-
-#ifdef POPCNT
-      s += POPCNT (l);
-#else
-      if (l == 0)
-	continue;
-
-      _Static_assert (sizeof (l) == sizeof (unsigned int)
-		      || sizeof (l) == sizeof (unsigned long)
-		      || sizeof (l) == sizeof (unsigned long long),
-		      "sizeof (__cpu_mask");
-      if (sizeof (__cpu_mask) == sizeof (unsigned int))
-	s += __builtin_popcount (l);
-      else if (sizeof (__cpu_mask) == sizeof (unsigned long))
-	s += __builtin_popcountl (l);
-      else
-	s += __builtin_popcountll (l);
-#endif
-    }
-
+  for (int i = 0; i < setsize / sizeof (__cpu_mask); i++)
+    s += countbits (setp->__bits[i]);
   return s;
 }
diff --git a/sysdeps/i386/i686/multiarch/sched_cpucount.c b/sysdeps/i386/i686/multiarch/sched_cpucount.c
deleted file mode 100644
index 7db31b02f8..0000000000
--- a/sysdeps/i386/i686/multiarch/sched_cpucount.c
+++ /dev/null
@@ -1 +0,0 @@
-#include <sysdeps/x86_64/multiarch/sched_cpucount.c>
diff --git a/sysdeps/ia64/sched_cpucount.c b/sysdeps/ia64/sched_cpucount.c
deleted file mode 100644
index 8440864b02..0000000000
--- a/sysdeps/ia64/sched_cpucount.c
+++ /dev/null
@@ -1,20 +0,0 @@
-/* Copyright (C) 2007-2021 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <https://www.gnu.org/licenses/>.  */
-
-#define POPCNT(l) __builtin_popcountl (l)
-
-#include <posix/sched_cpucount.c>
diff --git a/sysdeps/powerpc/sched_cpucount.c b/sysdeps/powerpc/sched_cpucount.c
deleted file mode 100644
index 8f00e3dbc8..0000000000
--- a/sysdeps/powerpc/sched_cpucount.c
+++ /dev/null
@@ -1,22 +0,0 @@
-/* Copyright (C) 2007-2021 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <https://www.gnu.org/licenses/>.  */
-
-#ifdef _ARCH_PWR5
-# define POPCNT(l) __builtin_popcountl (l)
-#endif
-
-#include <posix/sched_cpucount.c>
diff --git a/sysdeps/x86_64/multiarch/sched_cpucount.c b/sysdeps/x86_64/multiarch/sched_cpucount.c
deleted file mode 100644
index 5180a11434..0000000000
--- a/sysdeps/x86_64/multiarch/sched_cpucount.c
+++ /dev/null
@@ -1,36 +0,0 @@
-/* Count bits in CPU set.  x86-64 multi-arch version.
-   This file is part of the GNU C Library.
-   Copyright (C) 2008-2021 Free Software Foundation, Inc.
-   Contributed by Ulrich Drepper <drepper@redhat.com>.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <https://www.gnu.org/licenses/>.  */
-
-#include <sched.h>
-#include "init-arch.h"
-
-#define __sched_cpucount static generic_cpucount
-#include <posix/sched_cpucount.c>
-#undef __sched_cpucount
-
-#define POPCNT(l) \
-  ({ __cpu_mask r; \
-     asm ("popcnt %1, %0" : "=r" (r) : "0" (l));\
-     r; })
-#define __sched_cpucount static popcount_cpucount
-#include <posix/sched_cpucount.c>
-#undef __sched_cpucount
-
-libc_ifunc (__sched_cpucount,
-	    CPU_FEATURE_USABLE (POPCNT) ? popcount_cpucount : generic_cpucount);
diff --git a/sysdeps/x86_64/sched_cpucount.c b/sysdeps/x86_64/sched_cpucount.c
deleted file mode 100644
index 5a27336d6d..0000000000
--- a/sysdeps/x86_64/sched_cpucount.c
+++ /dev/null
@@ -1,25 +0,0 @@
-/* Copyright (C) 2007-2021 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <https://www.gnu.org/licenses/>.  */
-
-#ifdef __amdfam10
-# define POPCNT(l) \
-  ({ __cpu_mask r;							      \
-     asm ("popcntq %1, %0" : "=r" (r) : "0" (l));			      \
-     r; })
-#endif
-
-#include <posix/sched_cpucount.c>

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 1/4] Remove architecture specific sched_cpucount optimizations
  2021-05-06 13:33     ` Adhemerval Zanella
@ 2021-05-06 13:43       ` Florian Weimer
  2021-05-06 16:16         ` Adhemerval Zanella
  2021-05-06 17:12       ` Paul Eggert
  1 sibling, 1 reply; 28+ messages in thread
From: Florian Weimer @ 2021-05-06 13:43 UTC (permalink / raw)
  To: Adhemerval Zanella
  Cc: Paul Eggert, Adhemerval Zanella via Libc-alpha, crrodriguez

* Adhemerval Zanella:

> On 05/05/2021 15:25, Paul Eggert wrote:
>> On 5/5/21 10:28 AM, Florian Weimer via Libc-alpha wrote:
>>>> diff --git a/posix/sched_cpucount.c b/posix/sched_cpucount.c
>>>> index b0ca4ea7bc..529286e777 100644
>>>> --- a/posix/sched_cpucount.c
>>>> +++ b/posix/sched_cpucount.c
>>>> @@ -22,31 +22,11 @@ int
>>>>   __sched_cpucount (size_t setsize, const cpu_set_t *setp)
>>>>   {
>>>>     int s = 0;
>>>> +  for (int i = 0; i < setsize / sizeof (__cpu_mask); i++)
>>>>       {
>>>> +      __cpu_mask si = setp->__bits[i];
>>>> +      /* Clear the least significant bit set.  */
>>>> +      for (; si != 0; si &= si - 1, s++);
>>>>       }
>>>> -
>>>>     return s;
>>>>   }
>>> Why “si”?  It think si &= si - 1 clears the*most*  significant bit in
>>> si.  If you agree, please update the comment.
>> 
>> Better yet, define a static function 'popcount' that uses Kernighan's trick and call that function. As things stand it's not obvious what the code is doing, regardless of which bit it's clearing. The function's comment should explain why it's not using __builtin_popcount.
>
> What about the below:
>
> diff --git a/posix/sched_cpucount.c b/posix/sched_cpucount.c
> index b0ca4ea7bc..6cb5c4337e 100644
> --- a/posix/sched_cpucount.c
> +++ b/posix/sched_cpucount.c
> @@ -17,36 +17,21 @@
>  
>  #include <sched.h>
>  
> +/* Counting bits set, Brian Kernighan's way  */
> +static inline unsigned int
> +countbits (__cpu_mask v)
> +{
> +  unsigned int s = 0;
> +  for (; v != 0; s++)
> +    v &= v - 1;
> +  return s;
> +}

I get that choosing the exact matching builtin for the __cpu_mask type
isn't easy, but wouldn't it be safe to use __builtin_popcountll
unconditionally?

Thanks,
Florian


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 3/4] linux: Use /sys/devices/system/cpu/possible on __get_nprocs_conf
  2021-05-06 13:03       ` Adhemerval Zanella
@ 2021-05-06 13:51         ` Florian Weimer
  2021-05-06 20:07           ` Adhemerval Zanella
  0 siblings, 1 reply; 28+ messages in thread
From: Florian Weimer @ 2021-05-06 13:51 UTC (permalink / raw)
  To: Adhemerval Zanella via Libc-alpha; +Cc: Adhemerval Zanella, crrodriguez

* Adhemerval Zanella via Libc-alpha:

> On 05/05/2021 15:06, Florian Weimer wrote:
>> * Florian Weimer:
>> 
>>> * Adhemerval Zanella via Libc-alpha:
>>>
>>>> +      /* The entry is in the form of '[cpuX]-[cpuY]'.  */
>>>> +      char buf[2 * INT_STRLEN_BOUND (unsigned int) + 1];
>>>> +
>>>> +      ssize_t n = __read_nocancel (fd, buf, sizeof (buf));
>>>> +      if (n > 0)
>>>> +	{
>>>> +	  buf[n] = '\0';
>>>> +
>>>> +	  /* Start on the right, to find highest node number.  */
>>>> +	  int m = 1;
>>>> +	  while (--n)
>>>> +	    {
>>>> +	      if ((buf[n] == ',') || (buf[n] == '-'))
>>>> +		break;
>>>> +	      /* Ignore '\n'  */
>>>> +	      if (! isdigit (buf[n]))
>>>> +		continue;
>>>> +	      result += (buf[n] - '0') * m;
>>>> +	      m *= 10;
>>>> +	    }
>>>> +	}
>>>> +
>>>> +      __close_nocancel (fd);
>>>> +      return result + 1;
>>>>      }
>>>
>>> I think the /online and /possible files have the same layout, so you
>>> could use both.
>> 
>> Sorry, I meant to write: “so you could use *one parser for* both”
>
> I am not following, the second patch in this set *removed* the 
> /sys/devices/system/cpu/online parsing in favor of the sched_getaffinity.
> So now it only parses /sys/devices/system/cpu/possible and fallbacks
> to sched_getaffinity.

Oh, right.  So you removed the old implementation and bring bug a
slightly different new one.  Got it.

Thanks,
Florian


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 1/4] Remove architecture specific sched_cpucount optimizations
  2021-05-06 13:43       ` Florian Weimer
@ 2021-05-06 16:16         ` Adhemerval Zanella
  2021-05-06 16:42           ` Florian Weimer
  0 siblings, 1 reply; 28+ messages in thread
From: Adhemerval Zanella @ 2021-05-06 16:16 UTC (permalink / raw)
  To: Florian Weimer
  Cc: Paul Eggert, Adhemerval Zanella via Libc-alpha, crrodriguez



On 06/05/2021 10:43, Florian Weimer wrote:
> * Adhemerval Zanella:
> 
>> On 05/05/2021 15:25, Paul Eggert wrote:
>>> On 5/5/21 10:28 AM, Florian Weimer via Libc-alpha wrote:
>>>>> diff --git a/posix/sched_cpucount.c b/posix/sched_cpucount.c
>>>>> index b0ca4ea7bc..529286e777 100644
>>>>> --- a/posix/sched_cpucount.c
>>>>> +++ b/posix/sched_cpucount.c
>>>>> @@ -22,31 +22,11 @@ int
>>>>>   __sched_cpucount (size_t setsize, const cpu_set_t *setp)
>>>>>   {
>>>>>     int s = 0;
>>>>> +  for (int i = 0; i < setsize / sizeof (__cpu_mask); i++)
>>>>>       {
>>>>> +      __cpu_mask si = setp->__bits[i];
>>>>> +      /* Clear the least significant bit set.  */
>>>>> +      for (; si != 0; si &= si - 1, s++);
>>>>>       }
>>>>> -
>>>>>     return s;
>>>>>   }
>>>> Why “si”?  It think si &= si - 1 clears the*most*  significant bit in
>>>> si.  If you agree, please update the comment.
>>>
>>> Better yet, define a static function 'popcount' that uses Kernighan's trick and call that function. As things stand it's not obvious what the code is doing, regardless of which bit it's clearing. The function's comment should explain why it's not using __builtin_popcount.
>>
>> What about the below:
>>
>> diff --git a/posix/sched_cpucount.c b/posix/sched_cpucount.c
>> index b0ca4ea7bc..6cb5c4337e 100644
>> --- a/posix/sched_cpucount.c
>> +++ b/posix/sched_cpucount.c
>> @@ -17,36 +17,21 @@
>>  
>>  #include <sched.h>
>>  
>> +/* Counting bits set, Brian Kernighan's way  */
>> +static inline unsigned int
>> +countbits (__cpu_mask v)
>> +{
>> +  unsigned int s = 0;
>> +  for (; v != 0; s++)
>> +    v &= v - 1;
>> +  return s;
>> +}
> 
> I get that choosing the exact matching builtin for the __cpu_mask type
> isn't easy, but wouldn't it be safe to use __builtin_popcountll
> unconditionally?

Using a open-coded routine is slight better for architectures that
do not have a popcount instruction, since __builtin_popcountll will
call the libgcc routine).  But I hardly think we need that amount
of micro-optimization for such routine.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 1/4] Remove architecture specific sched_cpucount optimizations
  2021-05-06 16:16         ` Adhemerval Zanella
@ 2021-05-06 16:42           ` Florian Weimer
  2021-05-06 16:54             ` Adhemerval Zanella
  0 siblings, 1 reply; 28+ messages in thread
From: Florian Weimer @ 2021-05-06 16:42 UTC (permalink / raw)
  To: Adhemerval Zanella
  Cc: Paul Eggert, Adhemerval Zanella via Libc-alpha, crrodriguez

* Adhemerval Zanella:

>> I get that choosing the exact matching builtin for the __cpu_mask type
>> isn't easy, but wouldn't it be safe to use __builtin_popcountll
>> unconditionally?
>
> Using a open-coded routine is slight better for architectures that
> do not have a popcount instruction, since __builtin_popcountll will
> call the libgcc routine).  But I hardly think we need that amount
> of micro-optimization for such routine.

Ahh, and we'll get a check-localplt failure.  Hmm.  So mention that in
the comment?

Thanks,
Florian


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 1/4] Remove architecture specific sched_cpucount optimizations
  2021-05-06 16:42           ` Florian Weimer
@ 2021-05-06 16:54             ` Adhemerval Zanella
  0 siblings, 0 replies; 28+ messages in thread
From: Adhemerval Zanella @ 2021-05-06 16:54 UTC (permalink / raw)
  To: Florian Weimer
  Cc: Paul Eggert, Adhemerval Zanella via Libc-alpha, crrodriguez



On 06/05/2021 13:42, Florian Weimer wrote:
> * Adhemerval Zanella:
> 
>>> I get that choosing the exact matching builtin for the __cpu_mask type
>>> isn't easy, but wouldn't it be safe to use __builtin_popcountll
>>> unconditionally?
>>
>> Using a open-coded routine is slight better for architectures that
>> do not have a popcount instruction, since __builtin_popcountll will
>> call the libgcc routine).  But I hardly think we need that amount
>> of micro-optimization for such routine.
> 
> Ahh, and we'll get a check-localplt failure.  Hmm.  So mention that in
> the comment?
The check-localplt issue only occurs when using compiler generated symbols 
if the libgcc function itself calls a glibc symbol (for instance arm div
soft-fp might call raise). In this case the call will be local, the
issue is more performance-wise.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 1/4] Remove architecture specific sched_cpucount optimizations
  2021-05-06 13:33     ` Adhemerval Zanella
  2021-05-06 13:43       ` Florian Weimer
@ 2021-05-06 17:12       ` Paul Eggert
  2021-05-06 17:51         ` Adhemerval Zanella
  1 sibling, 1 reply; 28+ messages in thread
From: Paul Eggert @ 2021-05-06 17:12 UTC (permalink / raw)
  To: Adhemerval Zanella, Florian Weimer, Adhemerval Zanella via Libc-alpha
  Cc: crrodriguez

On 5/6/21 6:33 AM, Adhemerval Zanella wrote:
> +/* Counting bits set, Brian Kernighan's way  */
> +static inline unsigned int
> +countbits (__cpu_mask v)
> +{
> +  unsigned int s = 0;
> +  for (; v != 0; s++)
> +    v &= v - 1;
> +  return s;
> +}

Looks OK, except please use 'int' rather than 'unsigned int' (twice) as 
we should prefer signed to unsigned where either will do and the caller 
wants plain int anyway.

More important, please add a comment saying why we're not using the GCC 
builtin (summarizing Florian's and your followup remarks) since it is a 
little odd to see code that simply replicates a compiler builtin (what's 
next, we'll forgo multiplication in favor of repeated addition? :-).

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 1/4] Remove architecture specific sched_cpucount optimizations
  2021-05-06 17:12       ` Paul Eggert
@ 2021-05-06 17:51         ` Adhemerval Zanella
  0 siblings, 0 replies; 28+ messages in thread
From: Adhemerval Zanella @ 2021-05-06 17:51 UTC (permalink / raw)
  To: Paul Eggert, Florian Weimer, Adhemerval Zanella via Libc-alpha
  Cc: crrodriguez



On 06/05/2021 14:12, Paul Eggert wrote:
> On 5/6/21 6:33 AM, Adhemerval Zanella wrote:
>> +/* Counting bits set, Brian Kernighan's way  */
>> +static inline unsigned int
>> +countbits (__cpu_mask v)
>> +{
>> +  unsigned int s = 0;
>> +  for (; v != 0; s++)
>> +    v &= v - 1;
>> +  return s;
>> +}
> 
> Looks OK, except please use 'int' rather than 'unsigned int' (twice) as we should prefer signed to unsigned where either will do and the caller wants plain int anyway.

The unsigned int is more to avoid the cmovcc on x86_64
https://godbolt.org/z/4MrMq1zKb.  But I do prefer int
in this case as well.

> 
> More important, please add a comment saying why we're not using the GCC builtin (summarizing Florian's and your followup remarks) since it is a little odd to see code that simply replicates a compiler builtin (what's next, we'll forgo multiplication in favor of repeated addition? :-).

This popcount is trick, even gcc is not really sure what do do
on every architecture [1].

[1] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92135

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 1/4] Remove architecture specific sched_cpucount optimizations
  2021-05-06 12:22       ` Adhemerval Zanella
@ 2021-05-06 18:34         ` Noah Goldstein
  0 siblings, 0 replies; 28+ messages in thread
From: Noah Goldstein @ 2021-05-06 18:34 UTC (permalink / raw)
  To: Adhemerval Zanella
  Cc: Paul Eggert, Florian Weimer, crrodriguez,
	Adhemerval Zanella via Libc-alpha

On Thu, May 6, 2021 at 8:22 AM Adhemerval Zanella
<adhemerval.zanella@linaro.org> wrote:
>
>
>
> On 05/05/2021 16:52, Noah Goldstein via Libc-alpha wrote:
> > On Wed, May 5, 2021 at 12:32 PM Paul Eggert <eggert@cs.ucla.edu> wrote:
> >>
> >> On 5/5/21 10:28 AM, Florian Weimer via Libc-alpha wrote:
> >>>> diff --git a/posix/sched_cpucount.c b/posix/sched_cpucount.c
> >>>> index b0ca4ea7bc..529286e777 100644
> >>>> --- a/posix/sched_cpucount.c
> >>>> +++ b/posix/sched_cpucount.c
> >>>> @@ -22,31 +22,11 @@ int
> >>>>   __sched_cpucount (size_t setsize, const cpu_set_t *setp)
> >>>>   {
> >>>>     int s = 0;
> >>>> +  for (int i = 0; i < setsize / sizeof (__cpu_mask); i++)
> >>>>       {
> >>>> +      __cpu_mask si = setp->__bits[i];
> >>>> +      /* Clear the least significant bit set.  */
> >>>> +      for (; si != 0; si &= si - 1, s++);
> >>>>       }
> >>>> -
> >>>>     return s;
> >>>>   }
> >>> Why “si”?  It think si &= si - 1 clears the*most*  significant bit in
> >>> si.  If you agree, please update the comment.
> >>
> >> Better yet, define a static function 'popcount' that uses Kernighan's
> >> trick and call that function. As things stand it's not obvious what the
> >> code is doing, regardless of which bit it's clearing. The function's
> >> comment should explain why it's not using __builtin_popcount.
> >
> > It looks to me like GCC is still having a bit of trouble with the new
> > implementation. With skylake as a target it seems to be throwing
> > in a cmovcc in the outer loop:
> >
> > https://godbolt.org/z/ocn1KWGPx
> >
>
> It seems that come from using a signed int as the counter, I got a slight
> better version using unsigned: https://godbolt.org/z/4MrMq1zKb
>
> It is not as better as the builtin, but do we really need that
> micro-optimization in this specific implementation? This is exactly what
> added the current complexity and using a open-coded popcount implementation
> is slight better on architectures that do not provided such instruction
> (where builtin_popcount will issue a call to __popcountsi2 and gcc seems
> to aiming for open code such scenarios [1]).
>
> In any case I would go for simplicity (and the builtin requires know the
> underlying type size, which requires the ugly size of check to call the
> correct one).
>
> [1] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92135

I agree. Definetly not opposed to the patch.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 3/4] linux: Use /sys/devices/system/cpu/possible on __get_nprocs_conf
  2021-05-06 13:51         ` Florian Weimer
@ 2021-05-06 20:07           ` Adhemerval Zanella
  2021-05-07 11:07             ` Florian Weimer
  0 siblings, 1 reply; 28+ messages in thread
From: Adhemerval Zanella @ 2021-05-06 20:07 UTC (permalink / raw)
  To: Florian Weimer, Adhemerval Zanella via Libc-alpha; +Cc: crrodriguez



On 06/05/2021 10:51, Florian Weimer wrote:
> * Adhemerval Zanella via Libc-alpha:
> 
>> On 05/05/2021 15:06, Florian Weimer wrote:
>>> * Florian Weimer:
>>>
>>>> * Adhemerval Zanella via Libc-alpha:
>>>>
>>>>> +      /* The entry is in the form of '[cpuX]-[cpuY]'.  */
>>>>> +      char buf[2 * INT_STRLEN_BOUND (unsigned int) + 1];
>>>>> +
>>>>> +      ssize_t n = __read_nocancel (fd, buf, sizeof (buf));
>>>>> +      if (n > 0)
>>>>> +	{
>>>>> +	  buf[n] = '\0';
>>>>> +
>>>>> +	  /* Start on the right, to find highest node number.  */
>>>>> +	  int m = 1;
>>>>> +	  while (--n)
>>>>> +	    {
>>>>> +	      if ((buf[n] == ',') || (buf[n] == '-'))
>>>>> +		break;
>>>>> +	      /* Ignore '\n'  */
>>>>> +	      if (! isdigit (buf[n]))
>>>>> +		continue;
>>>>> +	      result += (buf[n] - '0') * m;
>>>>> +	      m *= 10;
>>>>> +	    }
>>>>> +	}
>>>>> +
>>>>> +      __close_nocancel (fd);
>>>>> +      return result + 1;
>>>>>      }
>>>>
>>>> I think the /online and /possible files have the same layout, so you
>>>> could use both.
>>>
>>> Sorry, I meant to write: “so you could use *one parser for* both”
>>
>> I am not following, the second patch in this set *removed* the 
>> /sys/devices/system/cpu/online parsing in favor of the sched_getaffinity.
>> So now it only parses /sys/devices/system/cpu/possible and fallbacks
>> to sched_getaffinity.
> 
> Oh, right.  So you removed the old implementation and bring bug a
> slightly different new one.  Got it.

Right, are you ok with the patch then?

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 3/4] linux: Use /sys/devices/system/cpu/possible on __get_nprocs_conf
  2021-05-06 20:07           ` Adhemerval Zanella
@ 2021-05-07 11:07             ` Florian Weimer
  2021-05-07 12:43               ` Adhemerval Zanella
  0 siblings, 1 reply; 28+ messages in thread
From: Florian Weimer @ 2021-05-07 11:07 UTC (permalink / raw)
  To: Adhemerval Zanella via Libc-alpha; +Cc: Adhemerval Zanella, crrodriguez

* Adhemerval Zanella via Libc-alpha:

> On 06/05/2021 10:51, Florian Weimer wrote:
>> * Adhemerval Zanella via Libc-alpha:
>> 
>>> On 05/05/2021 15:06, Florian Weimer wrote:
>>>> * Florian Weimer:
>>>>
>>>>> * Adhemerval Zanella via Libc-alpha:
>>>>>
>>>>>> +      /* The entry is in the form of '[cpuX]-[cpuY]'.  */
>>>>>> +      char buf[2 * INT_STRLEN_BOUND (unsigned int) + 1];
>>>>>> +
>>>>>> +      ssize_t n = __read_nocancel (fd, buf, sizeof (buf));
>>>>>> +      if (n > 0)
>>>>>> +	{
>>>>>> +	  buf[n] = '\0';
>>>>>> +
>>>>>> +	  /* Start on the right, to find highest node number.  */
>>>>>> +	  int m = 1;
>>>>>> +	  while (--n)
>>>>>> +	    {
>>>>>> +	      if ((buf[n] == ',') || (buf[n] == '-'))
>>>>>> +		break;
>>>>>> +	      /* Ignore '\n'  */
>>>>>> +	      if (! isdigit (buf[n]))
>>>>>> +		continue;
>>>>>> +	      result += (buf[n] - '0') * m;
>>>>>> +	      m *= 10;
>>>>>> +	    }
>>>>>> +	}
>>>>>> +
>>>>>> +      __close_nocancel (fd);
>>>>>> +      return result + 1;
>>>>>>      }
>>>>>
>>>>> I think the /online and /possible files have the same layout, so you
>>>>> could use both.
>>>>
>>>> Sorry, I meant to write: “so you could use *one parser for* both”
>>>
>>> I am not following, the second patch in this set *removed* the 
>>> /sys/devices/system/cpu/online parsing in favor of the sched_getaffinity.
>>> So now it only parses /sys/devices/system/cpu/possible and fallbacks
>>> to sched_getaffinity.
>> 
>> Oh, right.  So you removed the old implementation and bring bug a
>> slightly different new one.  Got it.
>
> Right, are you ok with the patch then?

The new parser cannot handle gaps or ranges that do not start at 0, I
think.  The old parser could cope with that.  The kernel data structures
support gaps in the possible CPU mask.  I don't know if they occur in
practice, but firmware quirks in this area aren't exactly rare (e.g.,
single-socket systems which report hundreds of hot-pluggable CPU cores).

Thanks,
Florian


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 3/4] linux: Use /sys/devices/system/cpu/possible on __get_nprocs_conf
  2021-05-07 11:07             ` Florian Weimer
@ 2021-05-07 12:43               ` Adhemerval Zanella
  0 siblings, 0 replies; 28+ messages in thread
From: Adhemerval Zanella @ 2021-05-07 12:43 UTC (permalink / raw)
  To: Florian Weimer, Adhemerval Zanella via Libc-alpha; +Cc: crrodriguez



On 07/05/2021 08:07, Florian Weimer wrote:
> * Adhemerval Zanella via Libc-alpha:
> 
>> On 06/05/2021 10:51, Florian Weimer wrote:
>>> * Adhemerval Zanella via Libc-alpha:
>>>
>>>> On 05/05/2021 15:06, Florian Weimer wrote:
>>>>> * Florian Weimer:
>>>>>
>>>>>> * Adhemerval Zanella via Libc-alpha:
>>>>>>
>>>>>>> +      /* The entry is in the form of '[cpuX]-[cpuY]'.  */
>>>>>>> +      char buf[2 * INT_STRLEN_BOUND (unsigned int) + 1];
>>>>>>> +
>>>>>>> +      ssize_t n = __read_nocancel (fd, buf, sizeof (buf));
>>>>>>> +      if (n > 0)
>>>>>>> +	{
>>>>>>> +	  buf[n] = '\0';
>>>>>>> +
>>>>>>> +	  /* Start on the right, to find highest node number.  */
>>>>>>> +	  int m = 1;
>>>>>>> +	  while (--n)
>>>>>>> +	    {
>>>>>>> +	      if ((buf[n] == ',') || (buf[n] == '-'))
>>>>>>> +		break;
>>>>>>> +	      /* Ignore '\n'  */
>>>>>>> +	      if (! isdigit (buf[n]))
>>>>>>> +		continue;
>>>>>>> +	      result += (buf[n] - '0') * m;
>>>>>>> +	      m *= 10;
>>>>>>> +	    }
>>>>>>> +	}
>>>>>>> +
>>>>>>> +      __close_nocancel (fd);
>>>>>>> +      return result + 1;
>>>>>>>      }
>>>>>>
>>>>>> I think the /online and /possible files have the same layout, so you
>>>>>> could use both.
>>>>>
>>>>> Sorry, I meant to write: “so you could use *one parser for* both”
>>>>
>>>> I am not following, the second patch in this set *removed* the 
>>>> /sys/devices/system/cpu/online parsing in favor of the sched_getaffinity.
>>>> So now it only parses /sys/devices/system/cpu/possible and fallbacks
>>>> to sched_getaffinity.
>>>
>>> Oh, right.  So you removed the old implementation and bring bug a
>>> slightly different new one.  Got it.
>>
>> Right, are you ok with the patch then?
> 
> The new parser cannot handle gaps or ranges that do not start at 0, I
> think.  The old parser could cope with that.  The kernel data structures
> support gaps in the possible CPU mask.  I don't know if they occur in
> practice, but firmware quirks in this area aren't exactly rare (e.g.,
> single-socket systems which report hundreds of hot-pluggable CPU cores).

Indeed I did not take this in consideration and it seems that using
/possible is more complicated than just opendir the directory.  And 
we will need to handle the possible unlikely issue of a truncated
value from kernel, since the buffer is limited to PAGE_SIZE.

I will drop this patch.

^ permalink raw reply	[flat|nested] 28+ messages in thread

end of thread, other threads:[~2021-05-07 12:43 UTC | newest]

Thread overview: 28+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-03-29 18:25 [PATCH 1/4] Remove architecture specific sched_cpucount optimizations Adhemerval Zanella
2021-03-29 18:25 ` [PATCH 2/4] linux: Use sched_getaffinity for __get_nprocs (BZ #27645) Adhemerval Zanella
2021-04-27 15:38   ` Florian Weimer
2021-03-29 18:25 ` [PATCH 3/4] linux: Use /sys/devices/system/cpu/possible on __get_nprocs_conf Adhemerval Zanella
2021-05-05 16:53   ` Adhemerval Zanella
2021-05-05 17:54   ` Florian Weimer
2021-05-05 18:06     ` Florian Weimer
2021-05-06 13:03       ` Adhemerval Zanella
2021-05-06 13:51         ` Florian Weimer
2021-05-06 20:07           ` Adhemerval Zanella
2021-05-07 11:07             ` Florian Weimer
2021-05-07 12:43               ` Adhemerval Zanella
2021-05-06 13:17     ` Adhemerval Zanella
2021-03-29 18:25 ` [PATCH 4/4] linux: Remove /proc/cpuinfo fallback on alpha and sparc Adhemerval Zanella
2021-05-05 16:53   ` Adhemerval Zanella
2021-05-05 16:52 ` [PATCH 1/4] Remove architecture specific sched_cpucount optimizations Adhemerval Zanella
2021-05-05 17:28 ` Florian Weimer
2021-05-05 18:25   ` Paul Eggert
2021-05-05 19:52     ` Noah Goldstein
2021-05-06 12:22       ` Adhemerval Zanella
2021-05-06 18:34         ` Noah Goldstein
2021-05-06 13:33     ` Adhemerval Zanella
2021-05-06 13:43       ` Florian Weimer
2021-05-06 16:16         ` Adhemerval Zanella
2021-05-06 16:42           ` Florian Weimer
2021-05-06 16:54             ` Adhemerval Zanella
2021-05-06 17:12       ` Paul Eggert
2021-05-06 17:51         ` Adhemerval Zanella

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).