public inbox for libc-alpha@sourceware.org
 help / color / mirror / Atom feed
* [PATCH] linux: fix accuracy of get_nprocs and get_nprocs_conf [BZ #28865]
@ 2022-02-05 21:24 Dmitry V. Levin
  2022-02-07 11:25 ` Adhemerval Zanella
  2022-02-07 13:57 ` [PATCH v2] " Dmitry V. Levin
  0 siblings, 2 replies; 13+ messages in thread
From: Dmitry V. Levin @ 2022-02-05 21:24 UTC (permalink / raw)
  To: libc-alpha

get_nprocs() and get_nprocs_conf() use various methods to obtain an
accurate number of processors.  Re-introduce __get_nprocs_sched() as
a source of information, and fix the order in which these methods are
used to return the most accurate information.  The primary source of
information used in both functions remains unchanged.

This also changes __get_nprocs_sched() error return value from 2 to 0,
but all its users are already prepared to handle that.

Old behavior:
  get_nprocs:
    /sys/devices/system/cpu/online -> /proc/stat -> 2
  get_nprocs_conf:
    /sys/devices/system/cpu/ -> /proc/stat -> 2

New behavior:
  get_nprocs:
    /sys/devices/system/cpu/online -> sched_getaffinity -> /proc/stat -> 2
  get_nprocs_conf:
    /sys/devices/system/cpu/ -> /proc/stat -> sched_getaffinity -> 2

Fixes: 342298278e ("linux: Revert the use of sched_getaffinity on get_nproc")
Closes: BZ #28865
---
 sysdeps/unix/sysv/linux/getsysstats.c | 101 ++++++++++++++++++--------
 1 file changed, 70 insertions(+), 31 deletions(-)

diff --git a/sysdeps/unix/sysv/linux/getsysstats.c b/sysdeps/unix/sysv/linux/getsysstats.c
index c98c8ce3d4..e1ed96070c 100644
--- a/sysdeps/unix/sysv/linux/getsysstats.c
+++ b/sysdeps/unix/sysv/linux/getsysstats.c
@@ -50,9 +50,8 @@ __get_nprocs_sched (void)
        is an arbitrary values assuming such systems should be rare and there
        is no offline cpus.  */
     return max_num_cpus;
-  /* Some other error.  2 is conservative (not a uniprocessor system, so
-     atomics are needed). */
-  return 2;
+  /* Some other error.  */
+  return 0;
 }
 
 static char *
@@ -108,22 +107,19 @@ next_line (int fd, char *const buffer, char **cp, char **re,
 }
 
 static int
-get_nproc_stat (char *buffer, size_t buffer_size)
+get_nproc_stat (void)
 {
+  enum { buffer_size = 1024 };
+  char buffer[buffer_size];
   char *buffer_end = buffer + buffer_size;
   char *cp = buffer_end;
   char *re = buffer_end;
-
-  /* Default to an SMP system in case we cannot obtain an accurate
-     number.  */
-  int result = 2;
+  int result = 0;
 
   const int flags = O_RDONLY | O_CLOEXEC;
   int fd = __open_nocancel ("/proc/stat", flags);
   if (fd != -1)
     {
-      result = 0;
-
       char *l;
       while ((l = next_line (fd, buffer, &cp, &re, buffer_end)) != NULL)
 	/* The current format of /proc/stat has all the cpu* entries
@@ -139,8 +135,8 @@ get_nproc_stat (char *buffer, size_t buffer_size)
   return result;
 }
 
-int
-__get_nprocs (void)
+static int
+get_nprocs_cpu_online (void)
 {
   enum { buffer_size = 1024 };
   char buffer[buffer_size];
@@ -179,7 +175,8 @@ __get_nprocs (void)
 		  }
 	      }
 
-	    result += m - n + 1;
+	    if (m >= n)
+	      result += m - n + 1;
 
 	    l = endp;
 	    if (l < re && *l == ',')
@@ -188,28 +185,18 @@ __get_nprocs (void)
 	while (l < re && *l != '\n');
 
       __close_nocancel_nostatus (fd);
-
-      if (result > 0)
-	return result;
     }
 
-  return get_nproc_stat (buffer, buffer_size);
+  return result;
 }
-libc_hidden_def (__get_nprocs)
-weak_alias (__get_nprocs, get_nprocs)
-
 
-/* On some architectures it is possible to distinguish between configured
-   and active cpus.  */
-int
-__get_nprocs_conf (void)
+static int
+get_nprocs_cpu (void)
 {
-  /* Try to use the sysfs filesystem.  It has actual information about
-     online processors.  */
+  int count = 0;
   DIR *dir = __opendir ("/sys/devices/system/cpu");
   if (dir != NULL)
     {
-      int count = 0;
       struct dirent64 *d;
 
       while ((d = __readdir64 (dir)) != NULL)
@@ -224,12 +211,64 @@ __get_nprocs_conf (void)
 
       __closedir (dir);
 
-      return count;
     }
+  return count;
+}
 
-  enum { buffer_size = 1024 };
-  char buffer[buffer_size];
-  return get_nproc_stat (buffer, buffer_size);
+int
+__get_nprocs (void)
+{
+  int result;
+
+  /* Try /sys/devices/system/cpu/online first.  */
+  result = get_nprocs_cpu_online ();
+  if (result)
+    return result;
+
+  /* Try sched_getaffinity(2).  */
+  result = __get_nprocs_sched ();
+  if (result)
+    return result;
+
+  /* Try /proc/stat.  */
+  result = get_nproc_stat ();
+  if (result)
+    return result;
+
+  /* We failed to obtain an accurate number.  Be conservative: return
+     the smallest number meaning that this is not a uniprocessor system,
+     so atomics are needed.  */
+  return 2;
+}
+libc_hidden_def (__get_nprocs)
+weak_alias (__get_nprocs, get_nprocs)
+
+/* On some architectures it is possible to distinguish between configured
+   and active cpus.  */
+int
+__get_nprocs_conf (void)
+{
+  int result;
+
+  /* Try /sys/devices/system/cpu/ first.  */
+  result = get_nprocs_cpu ();
+  if (result)
+    return result;
+
+  /* Try /proc/stat.  */
+  result = get_nproc_stat ();
+  if (result)
+    return result;
+
+  /* Try sched_getaffinity(2).  */
+  result = __get_nprocs_sched ();
+  if (result)
+    return result;
+
+  /* We failed to obtain an accurate number.  Be conservative: return
+     the smallest number meaning that this is not a uniprocessor system,
+     so atomics are needed.  */
+  return 2;
 }
 libc_hidden_def (__get_nprocs_conf)
 weak_alias (__get_nprocs_conf, get_nprocs_conf)

-- 
ldv

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] linux: fix accuracy of get_nprocs and get_nprocs_conf [BZ #28865]
  2022-02-05 21:24 [PATCH] linux: fix accuracy of get_nprocs and get_nprocs_conf [BZ #28865] Dmitry V. Levin
@ 2022-02-07 11:25 ` Adhemerval Zanella
  2022-02-07 11:44   ` Florian Weimer
  2022-02-07 11:51   ` Dmitry V. Levin
  2022-02-07 13:57 ` [PATCH v2] " Dmitry V. Levin
  1 sibling, 2 replies; 13+ messages in thread
From: Adhemerval Zanella @ 2022-02-07 11:25 UTC (permalink / raw)
  To: Dmitry V. Levin, libc-alpha



On 05/02/2022 18:24, Dmitry V. Levin wrote:
> get_nprocs() and get_nprocs_conf() use various methods to obtain an
> accurate number of processors.  Re-introduce __get_nprocs_sched() as
> a source of information, and fix the order in which these methods are
> used to return the most accurate information.  The primary source of
> information used in both functions remains unchanged.
> 
> This also changes __get_nprocs_sched() error return value from 2 to 0,
> but all its users are already prepared to handle that.
> 
> Old behavior:
>   get_nprocs:
>     /sys/devices/system/cpu/online -> /proc/stat -> 2
>   get_nprocs_conf:
>     /sys/devices/system/cpu/ -> /proc/stat -> 2
> 
> New behavior:
>   get_nprocs:
>     /sys/devices/system/cpu/online -> sched_getaffinity -> /proc/stat -> 2
>   get_nprocs_conf:
>     /sys/devices/system/cpu/ -> /proc/stat -> sched_getaffinity -> 2
> 
> Fixes: 342298278e ("linux: Revert the use of sched_getaffinity on get_nproc")
> Closes: BZ #28865

I think we are circling back on this, on BZ#27645 [1] we changed get_nprocs
to use sched_getaffinity and then we have to revert it with BZ#28310 [2] because
it introduced regression on some monitoring tools [3].

In fact from BZ#27645 and BZ#28624 [4] discussion I think we can't reliable use 
sched_getaffinity because since some container environment returns a synthetic
mask that might break some programs.  Also, sched_getaffinity returns a 
'per-process' mask instead of system-wide as we discussed in previous threads.
It should be ok to get adjusting internal tuning (as for malloc).

[1] https://sourceware.org/bugzilla/show_bug.cgi?id=27645
[2] https://sourceware.org/bugzilla/show_bug.cgi?id=28310
[3] https://sourceware.org/bugzilla/show_bug.cgi?id=27645#c5
[4] https://sourceware.org/bugzilla/show_bug.cgi?id=28624

> ---
>  sysdeps/unix/sysv/linux/getsysstats.c | 101 ++++++++++++++++++--------
>  1 file changed, 70 insertions(+), 31 deletions(-)
> 
> diff --git a/sysdeps/unix/sysv/linux/getsysstats.c b/sysdeps/unix/sysv/linux/getsysstats.c
> index c98c8ce3d4..e1ed96070c 100644
> --- a/sysdeps/unix/sysv/linux/getsysstats.c
> +++ b/sysdeps/unix/sysv/linux/getsysstats.c
> @@ -50,9 +50,8 @@ __get_nprocs_sched (void)
>         is an arbitrary values assuming such systems should be rare and there
>         is no offline cpus.  */
>      return max_num_cpus;
> -  /* Some other error.  2 is conservative (not a uniprocessor system, so
> -     atomics are needed). */
> -  return 2;
> +  /* Some other error.  */
> +  return 0;
>  }
>  
>  static char *
> @@ -108,22 +107,19 @@ next_line (int fd, char *const buffer, char **cp, char **re,
>  }
>  
>  static int
> -get_nproc_stat (char *buffer, size_t buffer_size)
> +get_nproc_stat (void)
>  {
> +  enum { buffer_size = 1024 };
> +  char buffer[buffer_size];
>    char *buffer_end = buffer + buffer_size;
>    char *cp = buffer_end;
>    char *re = buffer_end;
> -
> -  /* Default to an SMP system in case we cannot obtain an accurate
> -     number.  */
> -  int result = 2;
> +  int result = 0;
>  
>    const int flags = O_RDONLY | O_CLOEXEC;
>    int fd = __open_nocancel ("/proc/stat", flags);
>    if (fd != -1)
>      {
> -      result = 0;
> -
>        char *l;
>        while ((l = next_line (fd, buffer, &cp, &re, buffer_end)) != NULL)
>  	/* The current format of /proc/stat has all the cpu* entries
> @@ -139,8 +135,8 @@ get_nproc_stat (char *buffer, size_t buffer_size)
>    return result;
>  }
>  
> -int
> -__get_nprocs (void)
> +static int
> +get_nprocs_cpu_online (void)
>  {
>    enum { buffer_size = 1024 };
>    char buffer[buffer_size];
> @@ -179,7 +175,8 @@ __get_nprocs (void)
>  		  }
>  	      }
>  
> -	    result += m - n + 1;
> +	    if (m >= n)
> +	      result += m - n + 1;
>  
>  	    l = endp;
>  	    if (l < re && *l == ',')
> @@ -188,28 +185,18 @@ __get_nprocs (void)
>  	while (l < re && *l != '\n');
>  
>        __close_nocancel_nostatus (fd);
> -
> -      if (result > 0)
> -	return result;
>      }
>  
> -  return get_nproc_stat (buffer, buffer_size);
> +  return result;
>  }
> -libc_hidden_def (__get_nprocs)
> -weak_alias (__get_nprocs, get_nprocs)
> -
>  
> -/* On some architectures it is possible to distinguish between configured
> -   and active cpus.  */
> -int
> -__get_nprocs_conf (void)
> +static int
> +get_nprocs_cpu (void)
>  {
> -  /* Try to use the sysfs filesystem.  It has actual information about
> -     online processors.  */
> +  int count = 0;
>    DIR *dir = __opendir ("/sys/devices/system/cpu");
>    if (dir != NULL)
>      {
> -      int count = 0;
>        struct dirent64 *d;
>  
>        while ((d = __readdir64 (dir)) != NULL)
> @@ -224,12 +211,64 @@ __get_nprocs_conf (void)
>  
>        __closedir (dir);
>  
> -      return count;
>      }
> +  return count;
> +}
>  
> -  enum { buffer_size = 1024 };
> -  char buffer[buffer_size];
> -  return get_nproc_stat (buffer, buffer_size);
> +int
> +__get_nprocs (void)
> +{
> +  int result;
> +
> +  /* Try /sys/devices/system/cpu/online first.  */
> +  result = get_nprocs_cpu_online ();
> +  if (result)
> +    return result;
> +
> +  /* Try sched_getaffinity(2).  */
> +  result = __get_nprocs_sched ();
> +  if (result)
> +    return result;
> +
> +  /* Try /proc/stat.  */
> +  result = get_nproc_stat ();
> +  if (result)
> +    return result;
> +
> +  /* We failed to obtain an accurate number.  Be conservative: return
> +     the smallest number meaning that this is not a uniprocessor system,
> +     so atomics are needed.  */
> +  return 2;
> +}
> +libc_hidden_def (__get_nprocs)
> +weak_alias (__get_nprocs, get_nprocs)
> +
> +/* On some architectures it is possible to distinguish between configured
> +   and active cpus.  */
> +int
> +__get_nprocs_conf (void)
> +{
> +  int result;
> +
> +  /* Try /sys/devices/system/cpu/ first.  */
> +  result = get_nprocs_cpu ();
> +  if (result)
> +    return result;
> +
> +  /* Try /proc/stat.  */
> +  result = get_nproc_stat ();
> +  if (result)
> +    return result;
> +
> +  /* Try sched_getaffinity(2).  */
> +  result = __get_nprocs_sched ();
> +  if (result)
> +    return result;
> +
> +  /* We failed to obtain an accurate number.  Be conservative: return
> +     the smallest number meaning that this is not a uniprocessor system,
> +     so atomics are needed.  */
> +  return 2;
>  }
>  libc_hidden_def (__get_nprocs_conf)
>  weak_alias (__get_nprocs_conf, get_nprocs_conf)
> 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] linux: fix accuracy of get_nprocs and get_nprocs_conf [BZ #28865]
  2022-02-07 11:25 ` Adhemerval Zanella
@ 2022-02-07 11:44   ` Florian Weimer
  2022-02-07 11:57     ` Adhemerval Zanella
  2022-02-07 11:51   ` Dmitry V. Levin
  1 sibling, 1 reply; 13+ messages in thread
From: Florian Weimer @ 2022-02-07 11:44 UTC (permalink / raw)
  To: Adhemerval Zanella via Libc-alpha; +Cc: Dmitry V. Levin, Adhemerval Zanella

* Adhemerval Zanella via Libc-alpha:

> On 05/02/2022 18:24, Dmitry V. Levin wrote:
>> get_nprocs() and get_nprocs_conf() use various methods to obtain an
>> accurate number of processors.  Re-introduce __get_nprocs_sched() as
>> a source of information, and fix the order in which these methods are
>> used to return the most accurate information.  The primary source of
>> information used in both functions remains unchanged.
>> 
>> This also changes __get_nprocs_sched() error return value from 2 to 0,
>> but all its users are already prepared to handle that.
>> 
>> Old behavior:
>>   get_nprocs:
>>     /sys/devices/system/cpu/online -> /proc/stat -> 2
>>   get_nprocs_conf:
>>     /sys/devices/system/cpu/ -> /proc/stat -> 2
>> 
>> New behavior:
>>   get_nprocs:
>>     /sys/devices/system/cpu/online -> sched_getaffinity -> /proc/stat -> 2
>>   get_nprocs_conf:
>>     /sys/devices/system/cpu/ -> /proc/stat -> sched_getaffinity -> 2
>> 
>> Fixes: 342298278e ("linux: Revert the use of sched_getaffinity on get_nproc")
>> Closes: BZ #28865
>
> I think we are circling back on this, on BZ#27645 [1] we changed get_nprocs
> to use sched_getaffinity and then we have to revert it with BZ#28310 [2] because
> it introduced regression on some monitoring tools [3].

But I think using sched_getaffinity as a fallback when /sys and /proc
are not available makes somse.  It's different form what we did
temporarily (sched_getaffinity first).

Thanks,
Florian


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] linux: fix accuracy of get_nprocs and get_nprocs_conf [BZ #28865]
  2022-02-07 11:25 ` Adhemerval Zanella
  2022-02-07 11:44   ` Florian Weimer
@ 2022-02-07 11:51   ` Dmitry V. Levin
  2022-02-07 12:01     ` Adhemerval Zanella
  1 sibling, 1 reply; 13+ messages in thread
From: Dmitry V. Levin @ 2022-02-07 11:51 UTC (permalink / raw)
  To: Adhemerval Zanella; +Cc: libc-alpha

Hi,

On Mon, Feb 07, 2022 at 08:25:11AM -0300, Adhemerval Zanella via Libc-alpha wrote:
> On 05/02/2022 18:24, Dmitry V. Levin wrote:
> > get_nprocs() and get_nprocs_conf() use various methods to obtain an
> > accurate number of processors.  Re-introduce __get_nprocs_sched() as
> > a source of information, and fix the order in which these methods are
> > used to return the most accurate information.  The primary source of
> > information used in both functions remains unchanged.
> > 
> > This also changes __get_nprocs_sched() error return value from 2 to 0,
> > but all its users are already prepared to handle that.
> > 
> > Old behavior:
> >   get_nprocs:
> >     /sys/devices/system/cpu/online -> /proc/stat -> 2
> >   get_nprocs_conf:
> >     /sys/devices/system/cpu/ -> /proc/stat -> 2
> > 
> > New behavior:
> >   get_nprocs:
> >     /sys/devices/system/cpu/online -> sched_getaffinity -> /proc/stat -> 2
> >   get_nprocs_conf:
> >     /sys/devices/system/cpu/ -> /proc/stat -> sched_getaffinity -> 2
> > 
> > Fixes: 342298278e ("linux: Revert the use of sched_getaffinity on get_nproc")
> > Closes: BZ #28865
> 
> I think we are circling back on this, on BZ#27645 [1] we changed get_nprocs
> to use sched_getaffinity and then we have to revert it with BZ#28310 [2] because
> it introduced regression on some monitoring tools [3].
> 
> In fact from BZ#27645 and BZ#28624 [4] discussion I think we can't reliable use 
> sched_getaffinity because since some container environment returns a synthetic
> mask that might break some programs.  Also, sched_getaffinity returns a 
> 'per-process' mask instead of system-wide as we discussed in previous threads.
> It should be ok to get adjusting internal tuning (as for malloc).
> 
> [1] https://sourceware.org/bugzilla/show_bug.cgi?id=27645
> [2] https://sourceware.org/bugzilla/show_bug.cgi?id=28310
> [3] https://sourceware.org/bugzilla/show_bug.cgi?id=27645#c5
> [4] https://sourceware.org/bugzilla/show_bug.cgi?id=28624

Is there any realistic case when 2 is a more accurate estimation for the
number of processors than sched_getaffinity?  I suppose there are no such
cases.  Also, /sys is consulted first anyway.

I wish I saw commit 342298278e earlier to raise objections before it was
committed.

Please note that BZ #28865 is a real regression we had to patch, this
means glibc must behave properly in that environment without any
additional tuning.

I suggest to install this fix and see what could be done later
in an unlikely case anything else breaks.


-- 
ldv

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] linux: fix accuracy of get_nprocs and get_nprocs_conf [BZ #28865]
  2022-02-07 11:44   ` Florian Weimer
@ 2022-02-07 11:57     ` Adhemerval Zanella
  2022-02-07 12:01       ` Florian Weimer
  0 siblings, 1 reply; 13+ messages in thread
From: Adhemerval Zanella @ 2022-02-07 11:57 UTC (permalink / raw)
  To: Florian Weimer, Adhemerval Zanella via Libc-alpha; +Cc: Dmitry V. Levin



On 07/02/2022 08:44, Florian Weimer wrote:
> * Adhemerval Zanella via Libc-alpha:
> 
>> On 05/02/2022 18:24, Dmitry V. Levin wrote:
>>> get_nprocs() and get_nprocs_conf() use various methods to obtain an
>>> accurate number of processors.  Re-introduce __get_nprocs_sched() as
>>> a source of information, and fix the order in which these methods are
>>> used to return the most accurate information.  The primary source of
>>> information used in both functions remains unchanged.
>>>
>>> This also changes __get_nprocs_sched() error return value from 2 to 0,
>>> but all its users are already prepared to handle that.
>>>
>>> Old behavior:
>>>   get_nprocs:
>>>     /sys/devices/system/cpu/online -> /proc/stat -> 2
>>>   get_nprocs_conf:
>>>     /sys/devices/system/cpu/ -> /proc/stat -> 2
>>>
>>> New behavior:
>>>   get_nprocs:
>>>     /sys/devices/system/cpu/online -> sched_getaffinity -> /proc/stat -> 2
>>>   get_nprocs_conf:
>>>     /sys/devices/system/cpu/ -> /proc/stat -> sched_getaffinity -> 2
>>>
>>> Fixes: 342298278e ("linux: Revert the use of sched_getaffinity on get_nproc")
>>> Closes: BZ #28865
>>
>> I think we are circling back on this, on BZ#27645 [1] we changed get_nprocs
>> to use sched_getaffinity and then we have to revert it with BZ#28310 [2] because
>> it introduced regression on some monitoring tools [3].
> 
> But I think using sched_getaffinity as a fallback when /sys and /proc
> are not available makes somse.  It's different form what we did
> temporarily (sched_getaffinity first).

My concern is we start to see BZ#27645 again on environments that filter
out sysfs and provide a synthetic sched_getaffinity.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] linux: fix accuracy of get_nprocs and get_nprocs_conf [BZ #28865]
  2022-02-07 11:57     ` Adhemerval Zanella
@ 2022-02-07 12:01       ` Florian Weimer
  2022-02-07 12:07         ` Adhemerval Zanella
  0 siblings, 1 reply; 13+ messages in thread
From: Florian Weimer @ 2022-02-07 12:01 UTC (permalink / raw)
  To: Adhemerval Zanella; +Cc: Adhemerval Zanella via Libc-alpha, Dmitry V. Levin

* Adhemerval Zanella:

>> But I think using sched_getaffinity as a fallback when /sys and /proc
>> are not available makes somse.  It's different form what we did
>> temporarily (sched_getaffinity first).
>
> My concern is we start to see BZ#27645 again on environments that filter
> out sysfs and provide a synthetic sched_getaffinity.

Should we move the sched_getaffinity fallback after /proc, then?

Thanks,
Florian


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] linux: fix accuracy of get_nprocs and get_nprocs_conf [BZ #28865]
  2022-02-07 11:51   ` Dmitry V. Levin
@ 2022-02-07 12:01     ` Adhemerval Zanella
  2022-02-07 13:45       ` Dmitry V. Levin
  0 siblings, 1 reply; 13+ messages in thread
From: Adhemerval Zanella @ 2022-02-07 12:01 UTC (permalink / raw)
  To: Dmitry V. Levin; +Cc: libc-alpha



On 07/02/2022 08:51, Dmitry V. Levin wrote:
> Hi,
> 
> On Mon, Feb 07, 2022 at 08:25:11AM -0300, Adhemerval Zanella via Libc-alpha wrote:
>> On 05/02/2022 18:24, Dmitry V. Levin wrote:
>>> get_nprocs() and get_nprocs_conf() use various methods to obtain an
>>> accurate number of processors.  Re-introduce __get_nprocs_sched() as
>>> a source of information, and fix the order in which these methods are
>>> used to return the most accurate information.  The primary source of
>>> information used in both functions remains unchanged.
>>>
>>> This also changes __get_nprocs_sched() error return value from 2 to 0,
>>> but all its users are already prepared to handle that.
>>>
>>> Old behavior:
>>>   get_nprocs:
>>>     /sys/devices/system/cpu/online -> /proc/stat -> 2
>>>   get_nprocs_conf:
>>>     /sys/devices/system/cpu/ -> /proc/stat -> 2
>>>
>>> New behavior:
>>>   get_nprocs:
>>>     /sys/devices/system/cpu/online -> sched_getaffinity -> /proc/stat -> 2
>>>   get_nprocs_conf:
>>>     /sys/devices/system/cpu/ -> /proc/stat -> sched_getaffinity -> 2
>>>
>>> Fixes: 342298278e ("linux: Revert the use of sched_getaffinity on get_nproc")
>>> Closes: BZ #28865
>>
>> I think we are circling back on this, on BZ#27645 [1] we changed get_nprocs
>> to use sched_getaffinity and then we have to revert it with BZ#28310 [2] because
>> it introduced regression on some monitoring tools [3].
>>
>> In fact from BZ#27645 and BZ#28624 [4] discussion I think we can't reliable use 
>> sched_getaffinity because since some container environment returns a synthetic
>> mask that might break some programs.  Also, sched_getaffinity returns a 
>> 'per-process' mask instead of system-wide as we discussed in previous threads.
>> It should be ok to get adjusting internal tuning (as for malloc).
>>
>> [1] https://sourceware.org/bugzilla/show_bug.cgi?id=27645
>> [2] https://sourceware.org/bugzilla/show_bug.cgi?id=28310
>> [3] https://sourceware.org/bugzilla/show_bug.cgi?id=27645#c5
>> [4] https://sourceware.org/bugzilla/show_bug.cgi?id=28624
> 
> Is there any realistic case when 2 is a more accurate estimation for the
> number of processors than sched_getaffinity?  I suppose there are no such
> cases.  Also, /sys is consulted first anyway.

I am not sure, but my impression is on some environments sched_getaffinity
returns a synthetic value that might not represent the correct system
supported CPUs.  At least, it was my impression in the bug reports, where
it does break some programs.

> 
> I wish I saw commit 342298278e earlier to raise objections before it was
> committed.
> 
> Please note that BZ #28865 is a real regression we had to patch, this
> means glibc must behave properly in that environment without any
> additional tuning.
> 
> I suggest to install this fix and see what could be done later
> in an unlikely case anything else breaks.

I think in this case we should use sched_affinity as the last fallback
on get_nprocs as well we, so we first use either sysfs or procfs and
only then fallback to sched_getaffinity.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] linux: fix accuracy of get_nprocs and get_nprocs_conf [BZ #28865]
  2022-02-07 12:01       ` Florian Weimer
@ 2022-02-07 12:07         ` Adhemerval Zanella
  0 siblings, 0 replies; 13+ messages in thread
From: Adhemerval Zanella @ 2022-02-07 12:07 UTC (permalink / raw)
  To: Florian Weimer; +Cc: Adhemerval Zanella via Libc-alpha, Dmitry V. Levin



On 07/02/2022 09:01, Florian Weimer wrote:
> * Adhemerval Zanella:
> 
>>> But I think using sched_getaffinity as a fallback when /sys and /proc
>>> are not available makes somse.  It's different form what we did
>>> temporarily (sched_getaffinity first).
>>
>> My concern is we start to see BZ#27645 again on environments that filter
>> out sysfs and provide a synthetic sched_getaffinity.
> 
> Should we move the sched_getaffinity fallback after /proc, then?

Yeah, that was what I suggested in another reply.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] linux: fix accuracy of get_nprocs and get_nprocs_conf [BZ #28865]
  2022-02-07 12:01     ` Adhemerval Zanella
@ 2022-02-07 13:45       ` Dmitry V. Levin
  0 siblings, 0 replies; 13+ messages in thread
From: Dmitry V. Levin @ 2022-02-07 13:45 UTC (permalink / raw)
  To: Adhemerval Zanella; +Cc: libc-alpha

On Mon, Feb 07, 2022 at 09:01:19AM -0300, Adhemerval Zanella via Libc-alpha wrote:
> On 07/02/2022 08:51, Dmitry V. Levin wrote:
> > On Mon, Feb 07, 2022 at 08:25:11AM -0300, Adhemerval Zanella via Libc-alpha wrote:
> >> On 05/02/2022 18:24, Dmitry V. Levin wrote:
> >>> get_nprocs() and get_nprocs_conf() use various methods to obtain an
> >>> accurate number of processors.  Re-introduce __get_nprocs_sched() as
> >>> a source of information, and fix the order in which these methods are
> >>> used to return the most accurate information.  The primary source of
> >>> information used in both functions remains unchanged.
> >>>
> >>> This also changes __get_nprocs_sched() error return value from 2 to 0,
> >>> but all its users are already prepared to handle that.
> >>>
> >>> Old behavior:
> >>>   get_nprocs:
> >>>     /sys/devices/system/cpu/online -> /proc/stat -> 2
> >>>   get_nprocs_conf:
> >>>     /sys/devices/system/cpu/ -> /proc/stat -> 2
> >>>
> >>> New behavior:
> >>>   get_nprocs:
> >>>     /sys/devices/system/cpu/online -> sched_getaffinity -> /proc/stat -> 2
> >>>   get_nprocs_conf:
> >>>     /sys/devices/system/cpu/ -> /proc/stat -> sched_getaffinity -> 2
> >>>
> >>> Fixes: 342298278e ("linux: Revert the use of sched_getaffinity on get_nproc")
> >>> Closes: BZ #28865
> >>
> >> I think we are circling back on this, on BZ#27645 [1] we changed get_nprocs
> >> to use sched_getaffinity and then we have to revert it with BZ#28310 [2] because
> >> it introduced regression on some monitoring tools [3].
> >>
> >> In fact from BZ#27645 and BZ#28624 [4] discussion I think we can't reliable use 
> >> sched_getaffinity because since some container environment returns a synthetic
> >> mask that might break some programs.  Also, sched_getaffinity returns a 
> >> 'per-process' mask instead of system-wide as we discussed in previous threads.
> >> It should be ok to get adjusting internal tuning (as for malloc).
> >>
> >> [1] https://sourceware.org/bugzilla/show_bug.cgi?id=27645
> >> [2] https://sourceware.org/bugzilla/show_bug.cgi?id=28310
> >> [3] https://sourceware.org/bugzilla/show_bug.cgi?id=27645#c5
> >> [4] https://sourceware.org/bugzilla/show_bug.cgi?id=28624
> > 
> > Is there any realistic case when 2 is a more accurate estimation for the
> > number of processors than sched_getaffinity?  I suppose there are no such
> > cases.  Also, /sys is consulted first anyway.
> 
> I am not sure, but my impression is on some environments sched_getaffinity
> returns a synthetic value that might not represent the correct system
> supported CPUs.  At least, it was my impression in the bug reports, where
> it does break some programs.
> 
> > I wish I saw commit 342298278e earlier to raise objections before it was
> > committed.
> > 
> > Please note that BZ #28865 is a real regression we had to patch, this
> > means glibc must behave properly in that environment without any
> > additional tuning.
> > 
> > I suggest to install this fix and see what could be done later
> > in an unlikely case anything else breaks.
> 
> I think in this case we should use sched_affinity as the last fallback
> on get_nprocs as well we, so we first use either sysfs or procfs and
> only then fallback to sched_getaffinity.

OK, this should work, too, I'll post a revised patch shortly.


-- 
ldv

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH v2] linux: fix accuracy of get_nprocs and get_nprocs_conf [BZ #28865]
  2022-02-05 21:24 [PATCH] linux: fix accuracy of get_nprocs and get_nprocs_conf [BZ #28865] Dmitry V. Levin
  2022-02-07 11:25 ` Adhemerval Zanella
@ 2022-02-07 13:57 ` Dmitry V. Levin
  2022-02-08 19:34   ` Adhemerval Zanella
  1 sibling, 1 reply; 13+ messages in thread
From: Dmitry V. Levin @ 2022-02-07 13:57 UTC (permalink / raw)
  To: libc-alpha

get_nprocs() and get_nprocs_conf() use various methods to obtain an
accurate number of processors.  Re-introduce __get_nprocs_sched() as
a source of information, and fix the order in which these methods are
used to return the most accurate information.  The primary source of
information used in both functions remains unchanged.

This also changes __get_nprocs_sched() error return value from 2 to 0,
but all its users are already prepared to handle that.

Old behavior:
  get_nprocs:
    /sys/devices/system/cpu/online -> /proc/stat -> 2
  get_nprocs_conf:
    /sys/devices/system/cpu/ -> /proc/stat -> 2

New behavior:
  get_nprocs:
    /sys/devices/system/cpu/online -> /proc/stat -> sched_getaffinity -> 2
  get_nprocs_conf:
    /sys/devices/system/cpu/ -> /proc/stat -> sched_getaffinity -> 2

Fixes: 342298278e ("linux: Revert the use of sched_getaffinity on get_nproc")
Closes: BZ #28865
---

v2: prioritize /proc/stat over sched_getaffinity in get_nprocs().

 sysdeps/unix/sysv/linux/getsysstats.c | 94 ++++++++++++++++++---------
 1 file changed, 63 insertions(+), 31 deletions(-)

diff --git a/sysdeps/unix/sysv/linux/getsysstats.c b/sysdeps/unix/sysv/linux/getsysstats.c
index c98c8ce3d4..ef77ed193c 100644
--- a/sysdeps/unix/sysv/linux/getsysstats.c
+++ b/sysdeps/unix/sysv/linux/getsysstats.c
@@ -50,9 +50,8 @@ __get_nprocs_sched (void)
        is an arbitrary values assuming such systems should be rare and there
        is no offline cpus.  */
     return max_num_cpus;
-  /* Some other error.  2 is conservative (not a uniprocessor system, so
-     atomics are needed). */
-  return 2;
+  /* Some other error.  */
+  return 0;
 }
 
 static char *
@@ -108,22 +107,19 @@ next_line (int fd, char *const buffer, char **cp, char **re,
 }
 
 static int
-get_nproc_stat (char *buffer, size_t buffer_size)
+get_nproc_stat (void)
 {
+  enum { buffer_size = 1024 };
+  char buffer[buffer_size];
   char *buffer_end = buffer + buffer_size;
   char *cp = buffer_end;
   char *re = buffer_end;
-
-  /* Default to an SMP system in case we cannot obtain an accurate
-     number.  */
-  int result = 2;
+  int result = 0;
 
   const int flags = O_RDONLY | O_CLOEXEC;
   int fd = __open_nocancel ("/proc/stat", flags);
   if (fd != -1)
     {
-      result = 0;
-
       char *l;
       while ((l = next_line (fd, buffer, &cp, &re, buffer_end)) != NULL)
 	/* The current format of /proc/stat has all the cpu* entries
@@ -139,8 +135,8 @@ get_nproc_stat (char *buffer, size_t buffer_size)
   return result;
 }
 
-int
-__get_nprocs (void)
+static int
+get_nprocs_cpu_online (void)
 {
   enum { buffer_size = 1024 };
   char buffer[buffer_size];
@@ -179,7 +175,8 @@ __get_nprocs (void)
 		  }
 	      }
 
-	    result += m - n + 1;
+	    if (m >= n)
+	      result += m - n + 1;
 
 	    l = endp;
 	    if (l < re && *l == ',')
@@ -188,28 +185,18 @@ __get_nprocs (void)
 	while (l < re && *l != '\n');
 
       __close_nocancel_nostatus (fd);
-
-      if (result > 0)
-	return result;
     }
 
-  return get_nproc_stat (buffer, buffer_size);
+  return result;
 }
-libc_hidden_def (__get_nprocs)
-weak_alias (__get_nprocs, get_nprocs)
-
 
-/* On some architectures it is possible to distinguish between configured
-   and active cpus.  */
-int
-__get_nprocs_conf (void)
+static int
+get_nprocs_cpu (void)
 {
-  /* Try to use the sysfs filesystem.  It has actual information about
-     online processors.  */
+  int count = 0;
   DIR *dir = __opendir ("/sys/devices/system/cpu");
   if (dir != NULL)
     {
-      int count = 0;
       struct dirent64 *d;
 
       while ((d = __readdir64 (dir)) != NULL)
@@ -224,12 +211,57 @@ __get_nprocs_conf (void)
 
       __closedir (dir);
 
-      return count;
     }
+  return count;
+}
 
-  enum { buffer_size = 1024 };
-  char buffer[buffer_size];
-  return get_nproc_stat (buffer, buffer_size);
+static int
+get_nprocs_fallback (void)
+{
+  int result;
+
+  /* Try /proc/stat first.  */
+  result = get_nproc_stat ();
+  if (result)
+    return result;
+
+  /* Try sched_getaffinity.  */
+  result = __get_nprocs_sched ();
+  if (result)
+    return result;
+
+  /* We failed to obtain an accurate number.  Be conservative: return
+     the smallest number meaning that this is not a uniprocessor system,
+     so atomics are needed.  */
+  return 2;
+}
+
+int
+__get_nprocs (void)
+{
+  /* Try /sys/devices/system/cpu/online first.  */
+  int result = get_nprocs_cpu_online ();
+  if (result)
+    return result;
+
+  /* Fall back to /proc/stat and sched_getaffinity.  */
+  return get_nprocs_fallback ();
+}
+libc_hidden_def (__get_nprocs)
+weak_alias (__get_nprocs, get_nprocs)
+
+/* On some architectures it is possible to distinguish between configured
+   and active cpus.  */
+int
+__get_nprocs_conf (void)
+{
+  /* Try /sys/devices/system/cpu/ first.  */
+  int result = get_nprocs_cpu ();
+  if (result)
+    return result;
+
+  /* Fall back to /proc/stat and sched_getaffinity.  */
+  return get_nprocs_fallback ();
 }
 libc_hidden_def (__get_nprocs_conf)
 weak_alias (__get_nprocs_conf, get_nprocs_conf)

-- 
ldv

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v2] linux: fix accuracy of get_nprocs and get_nprocs_conf [BZ #28865]
  2022-02-07 13:57 ` [PATCH v2] " Dmitry V. Levin
@ 2022-02-08 19:34   ` Adhemerval Zanella
  2022-02-08 22:40     ` Dmitry V. Levin
  0 siblings, 1 reply; 13+ messages in thread
From: Adhemerval Zanella @ 2022-02-08 19:34 UTC (permalink / raw)
  To: Dmitry V. Levin, libc-alpha



On 07/02/2022 10:57, Dmitry V. Levin wrote:
> get_nprocs() and get_nprocs_conf() use various methods to obtain an
> accurate number of processors.  Re-introduce __get_nprocs_sched() as
> a source of information, and fix the order in which these methods are
> used to return the most accurate information.  The primary source of
> information used in both functions remains unchanged.
> 
> This also changes __get_nprocs_sched() error return value from 2 to 0,
> but all its users are already prepared to handle that.
> 
> Old behavior:
>   get_nprocs:
>     /sys/devices/system/cpu/online -> /proc/stat -> 2
>   get_nprocs_conf:
>     /sys/devices/system/cpu/ -> /proc/stat -> 2
> 
> New behavior:
>   get_nprocs:
>     /sys/devices/system/cpu/online -> /proc/stat -> sched_getaffinity -> 2
>   get_nprocs_conf:
>     /sys/devices/system/cpu/ -> /proc/stat -> sched_getaffinity -> 2
> 
> Fixes: 342298278e ("linux: Revert the use of sched_getaffinity on get_nproc")
> Closes: BZ #28865

I am still not fully sure if sched_getaffinity is a correct fallback to an 
API that should return a system overview, the issue that lead 
sched_getaffinity removal was that it is subject to per-process filtering
(either by seccomp, cgroup, etc.) and it might trigger some wrong behavior
in some programs  (such as monitoring tools and jvms).

However if the environment does not provide a way to actually obtain such
information I guess sched_getaffinity should not make things worse (it does
not make sense to assume multiprocessor if the process is not allowed more
than one CPU, and monitoring tools should not work if sysfs/procfs are
not present).

LGTM with some nits below.  I think you might use brackets instead of
square bracket in title (to trigger the bugzilla scripts).

Reviewed-by: Adhemerval Zanella  <adhemerval.zanella@linaro.org>

> ---
> 
> v2: prioritize /proc/stat over sched_getaffinity in get_nprocs().
> 
>  sysdeps/unix/sysv/linux/getsysstats.c | 94 ++++++++++++++++++---------
>  1 file changed, 63 insertions(+), 31 deletions(-)
> 
> diff --git a/sysdeps/unix/sysv/linux/getsysstats.c b/sysdeps/unix/sysv/linux/getsysstats.c
> index c98c8ce3d4..ef77ed193c 100644
> --- a/sysdeps/unix/sysv/linux/getsysstats.c
> +++ b/sysdeps/unix/sysv/linux/getsysstats.c
> @@ -50,9 +50,8 @@ __get_nprocs_sched (void)
>         is an arbitrary values assuming such systems should be rare and there
>         is no offline cpus.  */
>      return max_num_cpus;
> -  /* Some other error.  2 is conservative (not a uniprocessor system, so
> -     atomics are needed). */
> -  return 2;
> +  /* Some other error.  */
> +  return 0;
>  }
>  
>  static char *
> @@ -108,22 +107,19 @@ next_line (int fd, char *const buffer, char **cp, char **re,
>  }
>  
>  static int
> -get_nproc_stat (char *buffer, size_t buffer_size)
> +get_nproc_stat (void)
>  {
> +  enum { buffer_size = 1024 };
> +  char buffer[buffer_size];
>    char *buffer_end = buffer + buffer_size;
>    char *cp = buffer_end;
>    char *re = buffer_end;
> -
> -  /* Default to an SMP system in case we cannot obtain an accurate
> -     number.  */
> -  int result = 2;
> +  int result = 0;
>  
>    const int flags = O_RDONLY | O_CLOEXEC;
>    int fd = __open_nocancel ("/proc/stat", flags);
>    if (fd != -1)
>      {
> -      result = 0;
> -
>        char *l;
>        while ((l = next_line (fd, buffer, &cp, &re, buffer_end)) != NULL)
>  	/* The current format of /proc/stat has all the cpu* entries
> @@ -139,8 +135,8 @@ get_nproc_stat (char *buffer, size_t buffer_size)
>    return result;
>  }
>  
> -int
> -__get_nprocs (void)
> +static int
> +get_nprocs_cpu_online (void)
>  {
>    enum { buffer_size = 1024 };
>    char buffer[buffer_size];
> @@ -179,7 +175,8 @@ __get_nprocs (void)
>  		  }
>  	      }
>  
> -	    result += m - n + 1;
> +	    if (m >= n)
> +	      result += m - n + 1;
>  
>  	    l = endp;
>  	    if (l < re && *l == ',')
> @@ -188,28 +185,18 @@ __get_nprocs (void)
>  	while (l < re && *l != '\n');
>  
>        __close_nocancel_nostatus (fd);
> -
> -      if (result > 0)
> -	return result;
>      }
>  
> -  return get_nproc_stat (buffer, buffer_size);
> +  return result;
>  }
> -libc_hidden_def (__get_nprocs)
> -weak_alias (__get_nprocs, get_nprocs)
> -
>  
> -/* On some architectures it is possible to distinguish between configured
> -   and active cpus.  */
> -int
> -__get_nprocs_conf (void)
> +static int
> +get_nprocs_cpu (void)
>  {
> -  /* Try to use the sysfs filesystem.  It has actual information about
> -     online processors.  */
> +  int count = 0;
>    DIR *dir = __opendir ("/sys/devices/system/cpu");
>    if (dir != NULL)
>      {
> -      int count = 0;
>        struct dirent64 *d;
>  
>        while ((d = __readdir64 (dir)) != NULL)
> @@ -224,12 +211,57 @@ __get_nprocs_conf (void)
>  
>        __closedir (dir);
>  
> -      return count;
>      }
> +  return count;
> +}
>  
> -  enum { buffer_size = 1024 };
> -  char buffer[buffer_size];
> -  return get_nproc_stat (buffer, buffer_size);
> +static int
> +get_nprocs_fallback (void)
> +{
> +  int result;
> +
> +  /* Try /proc/stat first.  */
> +  result = get_nproc_stat ();
> +  if (result)

No implicit check.

> +    return result;
> +
> +  /* Try sched_getaffinity.  */
> +  result = __get_nprocs_sched ();
> +  if (result)

Ditto.

> +    return result;
> +
> +  /* We failed to obtain an accurate number.  Be conservative: return
> +     the smallest number meaning that this is not a uniprocessor system,
> +     so atomics are needed.  */
> +  return 2;
> +}
> +
> +int
> +__get_nprocs (void)
> +{
> +  /* Try /sys/devices/system/cpu/online first.  */
> +  int result = get_nprocs_cpu_online ();
> +  if (result)

Ditto.

> +    return result;
> +
> +  /* Fall back to /proc/stat and sched_getaffinity.  */
> +  return get_nprocs_fallback ();
> +}
> +libc_hidden_def (__get_nprocs)
> +weak_alias (__get_nprocs, get_nprocs)
> +
> +/* On some architectures it is possible to distinguish between configured
> +   and active cpus.  */
> +int
> +__get_nprocs_conf (void)
> +{
> +  /* Try /sys/devices/system/cpu/ first.  */
> +  int result = get_nprocs_cpu ();
> +  if (result)

Ditto.

> +    return result;
> +
> +  /* Fall back to /proc/stat and sched_getaffinity.  */
> +  return get_nprocs_fallback ();
>  }
>  libc_hidden_def (__get_nprocs_conf)
>  weak_alias (__get_nprocs_conf, get_nprocs_conf)
> 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v2] linux: fix accuracy of get_nprocs and get_nprocs_conf [BZ #28865]
  2022-02-08 19:34   ` Adhemerval Zanella
@ 2022-02-08 22:40     ` Dmitry V. Levin
  2022-02-08 22:58       ` Adhemerval Zanella
  0 siblings, 1 reply; 13+ messages in thread
From: Dmitry V. Levin @ 2022-02-08 22:40 UTC (permalink / raw)
  To: Adhemerval Zanella; +Cc: libc-alpha

On Tue, Feb 08, 2022 at 04:34:42PM -0300, Adhemerval Zanella wrote:
> On 07/02/2022 10:57, Dmitry V. Levin wrote:
> > get_nprocs() and get_nprocs_conf() use various methods to obtain an
> > accurate number of processors.  Re-introduce __get_nprocs_sched() as
> > a source of information, and fix the order in which these methods are
> > used to return the most accurate information.  The primary source of
> > information used in both functions remains unchanged.
> > 
> > This also changes __get_nprocs_sched() error return value from 2 to 0,
> > but all its users are already prepared to handle that.
> > 
> > Old behavior:
> >   get_nprocs:
> >     /sys/devices/system/cpu/online -> /proc/stat -> 2
> >   get_nprocs_conf:
> >     /sys/devices/system/cpu/ -> /proc/stat -> 2
> > 
> > New behavior:
> >   get_nprocs:
> >     /sys/devices/system/cpu/online -> /proc/stat -> sched_getaffinity -> 2
> >   get_nprocs_conf:
> >     /sys/devices/system/cpu/ -> /proc/stat -> sched_getaffinity -> 2
> > 
> > Fixes: 342298278e ("linux: Revert the use of sched_getaffinity on get_nproc")
> > Closes: BZ #28865
> 
> I am still not fully sure if sched_getaffinity is a correct fallback to an 
> API that should return a system overview, the issue that lead 
> sched_getaffinity removal was that it is subject to per-process filtering
> (either by seccomp, cgroup, etc.) and it might trigger some wrong behavior
> in some programs  (such as monitoring tools and jvms).
> 
> However if the environment does not provide a way to actually obtain such
> information I guess sched_getaffinity should not make things worse (it does
> not make sense to assume multiprocessor if the process is not allowed more
> than one CPU, and monitoring tools should not work if sysfs/procfs are
> not present).
> 
> LGTM with some nits below.  I think you might use brackets instead of
> square bracket in title (to trigger the bugzilla scripts).

I don't think it's the kind of brackets that trigger bugzilla scripts.

> Reviewed-by: Adhemerval Zanella  <adhemerval.zanella@linaro.org>

Corrected nits and pushed, thanks.


-- 
ldv

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v2] linux: fix accuracy of get_nprocs and get_nprocs_conf [BZ #28865]
  2022-02-08 22:40     ` Dmitry V. Levin
@ 2022-02-08 22:58       ` Adhemerval Zanella
  0 siblings, 0 replies; 13+ messages in thread
From: Adhemerval Zanella @ 2022-02-08 22:58 UTC (permalink / raw)
  To: Dmitry V. Levin; +Cc: libc-alpha



On 08/02/2022 19:40, Dmitry V. Levin wrote:
> On Tue, Feb 08, 2022 at 04:34:42PM -0300, Adhemerval Zanella wrote:
>> On 07/02/2022 10:57, Dmitry V. Levin wrote:
>>> get_nprocs() and get_nprocs_conf() use various methods to obtain an
>>> accurate number of processors.  Re-introduce __get_nprocs_sched() as
>>> a source of information, and fix the order in which these methods are
>>> used to return the most accurate information.  The primary source of
>>> information used in both functions remains unchanged.
>>>
>>> This also changes __get_nprocs_sched() error return value from 2 to 0,
>>> but all its users are already prepared to handle that.
>>>
>>> Old behavior:
>>>   get_nprocs:
>>>     /sys/devices/system/cpu/online -> /proc/stat -> 2
>>>   get_nprocs_conf:
>>>     /sys/devices/system/cpu/ -> /proc/stat -> 2
>>>
>>> New behavior:
>>>   get_nprocs:
>>>     /sys/devices/system/cpu/online -> /proc/stat -> sched_getaffinity -> 2
>>>   get_nprocs_conf:
>>>     /sys/devices/system/cpu/ -> /proc/stat -> sched_getaffinity -> 2
>>>
>>> Fixes: 342298278e ("linux: Revert the use of sched_getaffinity on get_nproc")
>>> Closes: BZ #28865
>>
>> I am still not fully sure if sched_getaffinity is a correct fallback to an 
>> API that should return a system overview, the issue that lead 
>> sched_getaffinity removal was that it is subject to per-process filtering
>> (either by seccomp, cgroup, etc.) and it might trigger some wrong behavior
>> in some programs  (such as monitoring tools and jvms).
>>
>> However if the environment does not provide a way to actually obtain such
>> information I guess sched_getaffinity should not make things worse (it does
>> not make sense to assume multiprocessor if the process is not allowed more
>> than one CPU, and monitoring tools should not work if sysfs/procfs are
>> not present).
>>
>> LGTM with some nits below.  I think you might use brackets instead of
>> square bracket in title (to trigger the bugzilla scripts).
> 
> I don't think it's the kind of brackets that trigger bugzilla scripts.
> 
>> Reviewed-by: Adhemerval Zanella  <adhemerval.zanella@linaro.org>
> 
> Corrected nits and pushed, thanks.

I forgot to ask you to hold on backports to see if anything breaks, but it
seems your were faster than me.

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2022-02-08 22:59 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-02-05 21:24 [PATCH] linux: fix accuracy of get_nprocs and get_nprocs_conf [BZ #28865] Dmitry V. Levin
2022-02-07 11:25 ` Adhemerval Zanella
2022-02-07 11:44   ` Florian Weimer
2022-02-07 11:57     ` Adhemerval Zanella
2022-02-07 12:01       ` Florian Weimer
2022-02-07 12:07         ` Adhemerval Zanella
2022-02-07 11:51   ` Dmitry V. Levin
2022-02-07 12:01     ` Adhemerval Zanella
2022-02-07 13:45       ` Dmitry V. Levin
2022-02-07 13:57 ` [PATCH v2] " Dmitry V. Levin
2022-02-08 19:34   ` Adhemerval Zanella
2022-02-08 22:40     ` Dmitry V. Levin
2022-02-08 22:58       ` Adhemerval Zanella

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).