public inbox for libc-alpha@sourceware.org
 help / color / mirror / Atom feed
* [patch v5] Allow for unpriviledged nested containers
@ 2022-03-11  3:38 DJ Delorie
  2022-03-28 20:41 ` Carlos O'Donell
  0 siblings, 1 reply; 8+ messages in thread
From: DJ Delorie @ 2022-03-11  3:38 UTC (permalink / raw)
  To: libc-alpha


[resent as v5]

[Both versions were tested, with identical test results.  Rather than
delete my work, I leave it as an option in the source, in case it's
needed later.  I want to have my cake and eat it too ;-)]

If the build itself is run in a container, we may not be able to
fully set up a nested container for test-container testing.
Notably is the mounting of /proc, since it's critical that it
be mounted from within the same PID namespace as its users, and
thus cannot be bind mounted from outside the container like other
mounts.

This patch chooses to use the parent's PID namespace instead of
creating a new one, as this is more likely to be allowed.

Code for using a separate PID namespace is included as well, in case
in the future a test requires it.  In this configuration,
test-container may not be able to mount /proc but will run the test
anyway, since most containerized tests do not require /proc.  The few
that do may predicate that, and support for such is also added.

diff --git a/elf/tst-pldd.c b/elf/tst-pldd.c
index f31f9956faa..e9e99d11e01 100644
--- a/elf/tst-pldd.c
+++ b/elf/tst-pldd.c
@@ -85,6 +85,9 @@ in_str_list (const char *libname, const char *const strlist[])
 static int
 do_test (void)
 {
+  /* needs /proc/sys/kernel/yama/ptrace_scope and /proc/$child */
+  support_need_proc ();
+
   /* Check if our subprocess can be debugged with ptrace.  */
   {
     int ptrace_scope = support_ptrace_scope ();
diff --git a/nptl/tst-pthread-getattr.c b/nptl/tst-pthread-getattr.c
index d2ebf308ae7..4a8bf1b4846 100644
--- a/nptl/tst-pthread-getattr.c
+++ b/nptl/tst-pthread-getattr.c
@@ -28,6 +28,8 @@
 #include <unistd.h>
 #include <inttypes.h>
 
+#include <support/support.h>
+
 /* There is an obscure bug in the kernel due to which RLIMIT_STACK is sometimes
    returned as unlimited when it is not, which may cause this test to fail.
    There is also the other case where RLIMIT_STACK is intentionally set as
@@ -153,6 +155,9 @@ check_stack_top (void)
 static int
 do_test (void)
 {
+  /* Reads /proc/self/maps to get stack size.  */
+  support_need_proc ();
+
   pagesize = sysconf (_SC_PAGESIZE);
   return check_stack_top ();
 }
diff --git a/nss/tst-reload2.c b/nss/tst-reload2.c
index fb3b94a1fab..94e2029fd35 100644
--- a/nss/tst-reload2.c
+++ b/nss/tst-reload2.c
@@ -95,6 +95,9 @@ do_test (void)
   char buf1[PATH_MAX];
   char buf2[PATH_MAX];
 
+  /* The xmkdirp below fails if we can't map our uid, which requires /proc.  */
+  support_need_proc ();
+
   sprintf (buf1, "/subdir%s", support_slibdir_prefix);
   xmkdirp (buf1, 0777);
 
diff --git a/support/Makefile b/support/Makefile
index 5ddcb8d1581..f036a813048 100644
--- a/support/Makefile
+++ b/support/Makefile
@@ -64,6 +64,7 @@ libsupport-routines = \
   support_format_netent \
   support_isolate_in_subprocess \
   support_mutex_pi_monotonic \
+  support_need_proc \
   support_path_support_time64 \
   support_process_state \
   support_ptrace \
diff --git a/support/support.h b/support/support.h
index 73b9fc48f01..bcf7bc43723 100644
--- a/support/support.h
+++ b/support/support.h
@@ -91,6 +91,10 @@ char *support_quote_string (const char *);
    regular file open for writing, and initially empty.  */
 int support_descriptor_supports_holes (int fd);
 
+/* Predicates that a test requires a working /proc filesystem.  This
+   call will exit with UNSUPPORTED if /proc is not available.  */
+void support_need_proc (void);
+
 /* Error-checking wrapper functions which terminate the process on
    error.  */
 
diff --git a/support/support_need_proc.c b/support/support_need_proc.c
new file mode 100644
index 00000000000..5d94b25ba8b
--- /dev/null
+++ b/support/support_need_proc.c
@@ -0,0 +1,33 @@
+/* Indicate that a test requires a working /proc.
+   Copyright (C) 2022 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <unistd.h>
+#include <support/check.h>
+#include <support/support.h>
+
+/* We test for /proc/self/maps since that's one of the files that one
+   of our tests actually uses, but the general idea is if Linux's
+   /proc/ (procfs) filesystem is mounted.  If not, the process exits
+   with an UNSUPPORTED result code.  */
+
+void
+support_need_proc (void)
+{
+  if (access ("/proc/self/maps", R_OK))
+    FAIL_UNSUPPORTED ("/proc is not available");
+}
diff --git a/support/test-container.c b/support/test-container.c
index 25e7f142193..d574ee6cd89 100644
--- a/support/test-container.c
+++ b/support/test-container.c
@@ -49,6 +49,20 @@
 #include "check.h"
 #include "test-driver.h"
 
+/* If set to 0, we bind mount the parent's /proc and re-use the
+   paren't pids (i.e. test runs as the user).  If set to 1, we enter a
+   new pid namespace and mount our own /proc (i.e. test runs as root).
+   I was tired of swapping the code back and forth during testing, so
+   you get both ;-) */
+#define SEPARATE_PID_NS 1
+#define SHARED_PID_NS (!SEPARATE_PID_NS)
+
+#if SEPARATE_PID_NS
+#define MAYBE_CLONE_NEWPID CLONE_NEWPID
+#else
+#define MAYBE_CLONE_NEWPID 0
+#endif
+
 #ifndef __linux__
 #define mount(s,t,fs,f,d) no_mount()
 int no_mount (void)
@@ -231,7 +245,7 @@ concat (const char *str, ...)
 static void
 trymount (const char *src, const char *dest)
 {
-  if (mount (src, dest, "", MS_BIND, NULL) < 0)
+  if (mount (src, dest, "", MS_BIND | MS_REC, NULL) < 0)
     FAIL_EXIT1 ("can't mount %s onto %s\n", src, dest);
 }
 
@@ -1068,7 +1082,7 @@ main (int argc, char **argv)
 
 #ifdef CLONE_NEWNS
   /* The unshare here gives us our own spaces and capabilities.  */
-  if (unshare (CLONE_NEWUSER | CLONE_NEWPID | CLONE_NEWNS) < 0)
+  if (unshare (CLONE_NEWUSER | MAYBE_CLONE_NEWPID | CLONE_NEWNS) < 0)
     {
       /* Older kernels may not support all the options, or security
 	 policy may block this call.  */
@@ -1094,6 +1108,15 @@ main (int argc, char **argv)
   trymount (support_srcdir_root, new_srcdir_path);
   trymount (support_objdir_root, new_objdir_path);
 
+#if SHARED_PID_NS
+  /* It may not be possible to mount /proc directly.  */
+  {
+    char *new_proc = concat (new_root_path, "/proc", NULL);
+    xmkdirp (new_proc, 0755);
+    trymount ("/proc", new_proc);
+  }
+#endif
+
   xmkdirp (concat (new_root_path, "/dev", NULL), 0755);
   devmount (new_root_path, "null");
   devmount (new_root_path, "zero");
@@ -1163,42 +1186,57 @@ main (int argc, char **argv)
 
   maybe_xmkdir ("/tmp", 0755);
 
+#if SEPARATE_PID_NS
   /* Now that we're pid 1 (effectively "root") we can mount /proc  */
   maybe_xmkdir ("/proc", 0777);
-  if (mount ("proc", "/proc", "proc", 0, NULL) < 0)
-    FAIL_EXIT1 ("Unable to mount /proc: ");
-
-  /* We map our original UID to the same UID in the container so we
-     can own our own files normally.  */
-  UMAP = open ("/proc/self/uid_map", O_WRONLY);
-  if (UMAP < 0)
-    FAIL_EXIT1 ("can't write to /proc/self/uid_map\n");
-
-  sprintf (tmp, "%lld %lld 1\n",
-	   (long long) (be_su ? 0 : original_uid), (long long) original_uid);
-  write (UMAP, tmp, strlen (tmp));
-  xclose (UMAP);
-
-  /* We must disable setgroups () before we can map our groups, else we
-     get EPERM.  */
-  GMAP = open ("/proc/self/setgroups", O_WRONLY);
-  if (GMAP >= 0)
+  if (mount ("proc", "/proc", "proc", 0, NULL) != 0)
     {
-      /* We support kernels old enough to not have this.  */
-      write (GMAP, "deny\n", 5);
-      xclose (GMAP);
+      // This happens if we're trying to create a nested container,
+      // like if the build is running under podman, and we lack
+      // priviledges.
+
+      // Ideally we would WARN here, but that would just add noise to
+      // *every* test-container test, and the ones that care should
+      // have their own relevent diagnostics.
+
+      // FAIL_EXIT1 ("Unable to mount /proc: ");
     }
+  else
+    /* The shared pid namespace case will always run the following block... */
+#endif
+    {
+      /* We map our original UID to the same UID in the container so we
+	 can own our own files normally.  */
+      UMAP = open ("/proc/self/uid_map", O_WRONLY);
+      if (UMAP < 0)
+	FAIL_EXIT1 ("can't write to /proc/self/uid_map\n");
+
+      sprintf (tmp, "%lld %lld 1\n",
+	       (long long) (be_su ? 0 : original_uid), (long long) original_uid);
+      write (UMAP, tmp, strlen (tmp));
+      xclose (UMAP);
+
+      /* We must disable setgroups () before we can map our groups, else we
+	 get EPERM.  */
+      GMAP = open ("/proc/self/setgroups", O_WRONLY);
+      if (GMAP >= 0)
+	{
+	  /* We support kernels old enough to not have this.  */
+	  write (GMAP, "deny\n", 5);
+	  xclose (GMAP);
+	}
 
-  /* We map our original GID to the same GID in the container so we
-     can own our own files normally.  */
-  GMAP = open ("/proc/self/gid_map", O_WRONLY);
-  if (GMAP < 0)
-    FAIL_EXIT1 ("can't write to /proc/self/gid_map\n");
+      /* We map our original GID to the same GID in the container so we
+	 can own our own files normally.  */
+      GMAP = open ("/proc/self/gid_map", O_WRONLY);
+      if (GMAP < 0)
+	FAIL_EXIT1 ("can't write to /proc/self/gid_map\n");
 
-  sprintf (tmp, "%lld %lld 1\n",
-	   (long long) (be_su ? 0 : original_gid), (long long) original_gid);
-  write (GMAP, tmp, strlen (tmp));
-  xclose (GMAP);
+      sprintf (tmp, "%lld %lld 1\n",
+	       (long long) (be_su ? 0 : original_gid), (long long) original_gid);
+      write (GMAP, tmp, strlen (tmp));
+      xclose (GMAP);
+    }
 
   if (change_cwd)
     {


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [patch v5] Allow for unpriviledged nested containers
  2022-03-11  3:38 [patch v5] Allow for unpriviledged nested containers DJ Delorie
@ 2022-03-28 20:41 ` Carlos O'Donell
  2022-03-29  3:53   ` [patch v6] " DJ Delorie
  0 siblings, 1 reply; 8+ messages in thread
From: Carlos O'Donell @ 2022-03-28 20:41 UTC (permalink / raw)
  To: DJ Delorie, libc-alpha

On 3/10/22 22:38, DJ Delorie via Libc-alpha wrote:
> 
> [resent as v5]
> 
> [Both versions were tested, with identical test results.  Rather than
> delete my work, I leave it as an option in the source, in case it's
> needed later.  I want to have my cake and eat it too ;-)]
> 
> If the build itself is run in a container, we may not be able to
> fully set up a nested container for test-container testing.
> Notably is the mounting of /proc, since it's critical that it
> be mounted from within the same PID namespace as its users, and
> thus cannot be bind mounted from outside the container like other
> mounts.
> 
> This patch chooses to use the parent's PID namespace instead of
> creating a new one, as this is more likely to be allowed.
> 
> Code for using a separate PID namespace is included as well, in case
> in the future a test requires it.  In this configuration,
> test-container may not be able to mount /proc but will run the test
> anyway, since most containerized tests do not require /proc.  The few
> that do may predicate that, and support for such is also added.

I dislike what we're doing here, but I don't see better solution.

Let me lay out my thinking here:

(a) The implementation has runtime requirements on the host OS and we don't
    express those requirements anywhere on a per-interface definition e.g.
    the need for proc to compute an answer for pthread_getattr_np for
    ATTR_FLAG_STACKADDR. When creating tests we look to specifically test
    such paths, and so in the test we now encode, via support_need_proc()
    to say we expect proc mounted to satisfy the implementation requirement.
    If the impl requirements change then the test can be out of date and
    need updating.

(b) If we don't express the requirements at the test level then the test will
    just fail when proc is not mounted and developers can no longer reason well
    about what is expected to fail and what is not expected to fail. This is not
    a good solution to this problem and it is where we are today.

Your solution is (a), and the only better solutions I can imagine (which doesn't
require root) are as follows:

(1) On a per-impl level provide a specification of the host requirements.
    Have tests gather those requirements in a general way and use them to
    derive UNSUPPORTED behaviour. This is way too much work.

(2) Provide a synthetic /proc that does what we want based on the impl.
    This is also too much work for what we want to achieve.

In the end I think your solution is an acceptable middle ground:

* Markup the tests.
* Attempt to mount /proc in the container, and if we fail then using
  the test markup (API calls) we can mark tests as UNSUPPORTED.

Please review comments below.

Looking forward to a v6.
 
> diff --git a/elf/tst-pldd.c b/elf/tst-pldd.c
> index f31f9956faa..e9e99d11e01 100644
> --- a/elf/tst-pldd.c
> +++ b/elf/tst-pldd.c
> @@ -85,6 +85,9 @@ in_str_list (const char *libname, const char *const strlist[])
>  static int
>  do_test (void)
>  {
> +  /* needs /proc/sys/kernel/yama/ptrace_scope and /proc/$child */
> +  support_need_proc ();

OK.

> +
>    /* Check if our subprocess can be debugged with ptrace.  */
>    {
>      int ptrace_scope = support_ptrace_scope ();
> diff --git a/nptl/tst-pthread-getattr.c b/nptl/tst-pthread-getattr.c
> index d2ebf308ae7..4a8bf1b4846 100644
> --- a/nptl/tst-pthread-getattr.c
> +++ b/nptl/tst-pthread-getattr.c
> @@ -28,6 +28,8 @@
>  #include <unistd.h>
>  #include <inttypes.h>
>  
> +#include <support/support.h>

OK.

> +
>  /* There is an obscure bug in the kernel due to which RLIMIT_STACK is sometimes
>     returned as unlimited when it is not, which may cause this test to fail.
>     There is also the other case where RLIMIT_STACK is intentionally set as
> @@ -153,6 +155,9 @@ check_stack_top (void)
>  static int
>  do_test (void)
>  {
> +  /* Reads /proc/self/maps to get stack size.  */
> +  support_need_proc ();

OK.

> +
>    pagesize = sysconf (_SC_PAGESIZE);
>    return check_stack_top ();
>  }
> diff --git a/nss/tst-reload2.c b/nss/tst-reload2.c
> index fb3b94a1fab..94e2029fd35 100644
> --- a/nss/tst-reload2.c
> +++ b/nss/tst-reload2.c
> @@ -95,6 +95,9 @@ do_test (void)
>    char buf1[PATH_MAX];
>    char buf2[PATH_MAX];
>  
> +  /* The xmkdirp below fails if we can't map our uid, which requires /proc.  */
> +  support_need_proc ();

OK.

> +
>    sprintf (buf1, "/subdir%s", support_slibdir_prefix);
>    xmkdirp (buf1, 0777);
>  
> diff --git a/support/Makefile b/support/Makefile
> index 5ddcb8d1581..f036a813048 100644
> --- a/support/Makefile
> +++ b/support/Makefile
> @@ -64,6 +64,7 @@ libsupport-routines = \
>    support_format_netent \
>    support_isolate_in_subprocess \
>    support_mutex_pi_monotonic \
> +  support_need_proc \

OK.

>    support_path_support_time64 \
>    support_process_state \
>    support_ptrace \
> diff --git a/support/support.h b/support/support.h
> index 73b9fc48f01..bcf7bc43723 100644
> --- a/support/support.h
> +++ b/support/support.h
> @@ -91,6 +91,10 @@ char *support_quote_string (const char *);
>     regular file open for writing, and initially empty.  */
>  int support_descriptor_supports_holes (int fd);
>  
> +/* Predicates that a test requires a working /proc filesystem.  This
> +   call will exit with UNSUPPORTED if /proc is not available.  */
> +void support_need_proc (void);

OK.

> +
>  /* Error-checking wrapper functions which terminate the process on
>     error.  */
>  
> diff --git a/support/support_need_proc.c b/support/support_need_proc.c
> new file mode 100644
> index 00000000000..5d94b25ba8b
> --- /dev/null
> +++ b/support/support_need_proc.c
> @@ -0,0 +1,33 @@
> +/* Indicate that a test requires a working /proc.
> +   Copyright (C) 2022 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#include <unistd.h>
> +#include <support/check.h>
> +#include <support/support.h>
> +
> +/* We test for /proc/self/maps since that's one of the files that one
> +   of our tests actually uses, but the general idea is if Linux's
> +   /proc/ (procfs) filesystem is mounted.  If not, the process exits
> +   with an UNSUPPORTED result code.  */
> +
> +void
> +support_need_proc (void)
> +{
> +  if (access ("/proc/self/maps", R_OK))
> +    FAIL_UNSUPPORTED ("/proc is not available");

This needs wrapping in #ifdef __linux__ for Hurd support.

Please see the various uses of this in support/*.

So on Hurd it should do nothing, but on Linux it should fail unsupported.

> +}
> diff --git a/support/test-container.c b/support/test-container.c
> index 25e7f142193..d574ee6cd89 100644
> --- a/support/test-container.c
> +++ b/support/test-container.c
> @@ -49,6 +49,20 @@
>  #include "check.h"
>  #include "test-driver.h"
>  
> +/* If set to 0, we bind mount the parent's /proc and re-use the
> +   paren't pids (i.e. test runs as the user).  If set to 1, we enter a
> +   new pid namespace and mount our own /proc (i.e. test runs as root).
> +   I was tired of swapping the code back and forth during testing, so
> +   you get both ;-) */
> +#define SEPARATE_PID_NS 1
> +#define SHARED_PID_NS (!SEPARATE_PID_NS)

This kind of conditional leads to bitrot on the side that is not enabled.

Please remove this and use a variable to ensure both paths are always compiled
and therefore we see warnings and error.

> +
> +#if SEPARATE_PID_NS
> +#define MAYBE_CLONE_NEWPID CLONE_NEWPID
> +#else
> +#define MAYBE_CLONE_NEWPID 0
> +#endif

Should be a variable also.

> +
>  #ifndef __linux__
>  #define mount(s,t,fs,f,d) no_mount()
>  int no_mount (void)
> @@ -231,7 +245,7 @@ concat (const char *str, ...)
>  static void
>  trymount (const char *src, const char *dest)
>  {
> -  if (mount (src, dest, "", MS_BIND, NULL) < 0)
> +  if (mount (src, dest, "", MS_BIND | MS_REC, NULL) < 0)
>      FAIL_EXIT1 ("can't mount %s onto %s\n", src, dest);
>  }
>  
> @@ -1068,7 +1082,7 @@ main (int argc, char **argv)
>  
>  #ifdef CLONE_NEWNS
>    /* The unshare here gives us our own spaces and capabilities.  */
> -  if (unshare (CLONE_NEWUSER | CLONE_NEWPID | CLONE_NEWNS) < 0)
> +  if (unshare (CLONE_NEWUSER | MAYBE_CLONE_NEWPID | CLONE_NEWNS) < 0)
>      {
>        /* Older kernels may not support all the options, or security
>  	 policy may block this call.  */
> @@ -1094,6 +1108,15 @@ main (int argc, char **argv)
>    trymount (support_srcdir_root, new_srcdir_path);
>    trymount (support_objdir_root, new_objdir_path);
>  
> +#if SHARED_PID_NS
> +  /* It may not be possible to mount /proc directly.  */
> +  {
> +    char *new_proc = concat (new_root_path, "/proc", NULL);
> +    xmkdirp (new_proc, 0755);
> +    trymount ("/proc", new_proc);
> +  }
> +#endif

Conditional on variable indicating shared pid ns.

> +
>    xmkdirp (concat (new_root_path, "/dev", NULL), 0755);
>    devmount (new_root_path, "null");
>    devmount (new_root_path, "zero");
> @@ -1163,42 +1186,57 @@ main (int argc, char **argv)
>  
>    maybe_xmkdir ("/tmp", 0755);
>  
> +#if SEPARATE_PID_NS

Likewise for separate pid ns.

>    /* Now that we're pid 1 (effectively "root") we can mount /proc  */
>    maybe_xmkdir ("/proc", 0777);
> -  if (mount ("proc", "/proc", "proc", 0, NULL) < 0)
> -    FAIL_EXIT1 ("Unable to mount /proc: ");
> -
> -  /* We map our original UID to the same UID in the container so we
> -     can own our own files normally.  */
> -  UMAP = open ("/proc/self/uid_map", O_WRONLY);
> -  if (UMAP < 0)
> -    FAIL_EXIT1 ("can't write to /proc/self/uid_map\n");
> -
> -  sprintf (tmp, "%lld %lld 1\n",
> -	   (long long) (be_su ? 0 : original_uid), (long long) original_uid);
> -  write (UMAP, tmp, strlen (tmp));
> -  xclose (UMAP);
> -
> -  /* We must disable setgroups () before we can map our groups, else we
> -     get EPERM.  */
> -  GMAP = open ("/proc/self/setgroups", O_WRONLY);
> -  if (GMAP >= 0)
> +  if (mount ("proc", "/proc", "proc", 0, NULL) != 0)
>      {
> -      /* We support kernels old enough to not have this.  */
> -      write (GMAP, "deny\n", 5);
> -      xclose (GMAP);
> +      // This happens if we're trying to create a nested container,
> +      // like if the build is running under podman, and we lack
> +      // priviledges.
> +
> +      // Ideally we would WARN here, but that would just add noise to
> +      // *every* test-container test, and the ones that care should
> +      // have their own relevent diagnostics.
> +
> +      // FAIL_EXIT1 ("Unable to mount /proc: ");

Please use C-style comments.

>      }
> +  else
> +    /* The shared pid namespace case will always run the following block... */
> +#endif
> +    {
> +      /* We map our original UID to the same UID in the container so we
> +	 can own our own files normally.  */
> +      UMAP = open ("/proc/self/uid_map", O_WRONLY);
> +      if (UMAP < 0)
> +	FAIL_EXIT1 ("can't write to /proc/self/uid_map\n");
> +
> +      sprintf (tmp, "%lld %lld 1\n",
> +	       (long long) (be_su ? 0 : original_uid), (long long) original_uid);
> +      write (UMAP, tmp, strlen (tmp));
> +      xclose (UMAP);
> +
> +      /* We must disable setgroups () before we can map our groups, else we
> +	 get EPERM.  */
> +      GMAP = open ("/proc/self/setgroups", O_WRONLY);
> +      if (GMAP >= 0)
> +	{
> +	  /* We support kernels old enough to not have this.  */
> +	  write (GMAP, "deny\n", 5);
> +	  xclose (GMAP);
> +	}
>  
> -  /* We map our original GID to the same GID in the container so we
> -     can own our own files normally.  */
> -  GMAP = open ("/proc/self/gid_map", O_WRONLY);
> -  if (GMAP < 0)
> -    FAIL_EXIT1 ("can't write to /proc/self/gid_map\n");
> +      /* We map our original GID to the same GID in the container so we
> +	 can own our own files normally.  */
> +      GMAP = open ("/proc/self/gid_map", O_WRONLY);
> +      if (GMAP < 0)
> +	FAIL_EXIT1 ("can't write to /proc/self/gid_map\n");
>  
> -  sprintf (tmp, "%lld %lld 1\n",
> -	   (long long) (be_su ? 0 : original_gid), (long long) original_gid);
> -  write (GMAP, tmp, strlen (tmp));
> -  xclose (GMAP);
> +      sprintf (tmp, "%lld %lld 1\n",
> +	       (long long) (be_su ? 0 : original_gid), (long long) original_gid);
> +      write (GMAP, tmp, strlen (tmp));
> +      xclose (GMAP);
> +    }
>  
>    if (change_cwd)
>      {
> 


-- 
Cheers,
Carlos.


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [patch v6] Allow for unpriviledged nested containers
  2022-03-28 20:41 ` Carlos O'Donell
@ 2022-03-29  3:53   ` DJ Delorie
  2022-04-04 14:17     ` Carlos O'Donell
  0 siblings, 1 reply; 8+ messages in thread
From: DJ Delorie @ 2022-03-29  3:53 UTC (permalink / raw)
  To: libc-alpha


[changes since v5: whether or not to isolate the pid namespace is now a
test-specific "pidns" option, support_needs_proc() takes a "why" message
for future readers]

If the build itself is run in a container, we may not be able to
fully set up a nested container for test-container testing.
Notably is the mounting of /proc, since it's critical that it
be mounted from within the same PID namespace as its users, and
thus cannot be bind mounted from outside the container like other
mounts.

This patch defaults to using the parent's PID namespace instead of
creating a new one, as this is more likely to be allowed.

If the test needs an isolated PID namespace, it should add the "pidns"
command to its init script.

diff --git a/elf/tst-pldd.c b/elf/tst-pldd.c
index f31f9956faa..378262504a5 100644
--- a/elf/tst-pldd.c
+++ b/elf/tst-pldd.c
@@ -85,6 +85,8 @@ in_str_list (const char *libname, const char *const strlist[])
 static int
 do_test (void)
 {
+  support_need_proc ("needs /proc/sys/kernel/yama/ptrace_scope and /proc/$child");
+
   /* Check if our subprocess can be debugged with ptrace.  */
   {
     int ptrace_scope = support_ptrace_scope ();
diff --git a/nptl/tst-pthread-getattr.c b/nptl/tst-pthread-getattr.c
index d2ebf308ae7..3f6f76ea83b 100644
--- a/nptl/tst-pthread-getattr.c
+++ b/nptl/tst-pthread-getattr.c
@@ -28,6 +28,8 @@
 #include <unistd.h>
 #include <inttypes.h>
 
+#include <support/support.h>
+
 /* There is an obscure bug in the kernel due to which RLIMIT_STACK is sometimes
    returned as unlimited when it is not, which may cause this test to fail.
    There is also the other case where RLIMIT_STACK is intentionally set as
@@ -153,6 +155,8 @@ check_stack_top (void)
 static int
 do_test (void)
 {
+  support_need_proc ("Reads /proc/self/maps to get stack size.");
+
   pagesize = sysconf (_SC_PAGESIZE);
   return check_stack_top ();
 }
diff --git a/nss/tst-reload2.c b/nss/tst-reload2.c
index fb3b94a1fab..7df0ca740b6 100644
--- a/nss/tst-reload2.c
+++ b/nss/tst-reload2.c
@@ -95,6 +95,8 @@ do_test (void)
   char buf1[PATH_MAX];
   char buf2[PATH_MAX];
 
+  support_need_proc ("Our xmkdirp fails if we can't map our uid, which requires /proc.");
+
   sprintf (buf1, "/subdir%s", support_slibdir_prefix);
   xmkdirp (buf1, 0777);
 
diff --git a/support/Makefile b/support/Makefile
index 5ddcb8d1581..f036a813048 100644
--- a/support/Makefile
+++ b/support/Makefile
@@ -64,6 +64,7 @@ libsupport-routines = \
   support_format_netent \
   support_isolate_in_subprocess \
   support_mutex_pi_monotonic \
+  support_need_proc \
   support_path_support_time64 \
   support_process_state \
   support_ptrace \
diff --git a/support/support.h b/support/support.h
index 73b9fc48f01..d20051da4d4 100644
--- a/support/support.h
+++ b/support/support.h
@@ -91,6 +91,11 @@ char *support_quote_string (const char *);
    regular file open for writing, and initially empty.  */
 int support_descriptor_supports_holes (int fd);
 
+/* Predicates that a test requires a working /proc filesystem.  This
+   call will exit with UNSUPPORTED if /proc is not available, printing
+   WHY_MSG as part of the diagnostic.  */
+void support_need_proc (const char *why_msg);
+
 /* Error-checking wrapper functions which terminate the process on
    error.  */
 
diff --git a/support/support_need_proc.c b/support/support_need_proc.c
new file mode 100644
index 00000000000..9b4eab7539b
--- /dev/null
+++ b/support/support_need_proc.c
@@ -0,0 +1,35 @@
+/* Indicate that a test requires a working /proc.
+   Copyright (C) 2022 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <unistd.h>
+#include <support/check.h>
+#include <support/support.h>
+
+/* We test for /proc/self/maps since that's one of the files that one
+   of our tests actually uses, but the general idea is if Linux's
+   /proc/ (procfs) filesystem is mounted.  If not, the process exits
+   with an UNSUPPORTED result code.  */
+
+void
+support_need_proc (const char *why_msg)
+{
+#ifdef __linux__
+  if (access ("/proc/self/maps", R_OK))
+    FAIL_UNSUPPORTED ("/proc is not available, %s", why_msg);
+#endif
+}
diff --git a/support/test-container.c b/support/test-container.c
index 25e7f142193..c837c4d7580 100644
--- a/support/test-container.c
+++ b/support/test-container.c
@@ -97,6 +97,7 @@ int verbose = 0;
    * mytest.root/mytest.script has a list of "commands" to run:
        syntax:
          # comment
+	 pidns <comment>
          su
          mv FILE FILE
 	 cp FILE FILE
@@ -122,6 +123,8 @@ int verbose = 0;
 
        details:
          - '#': A comment.
+	 - 'pidns': Require a separate PID namespace, prints comment if it can't
+	    (default is a shared pid namespace)
          - 'su': Enables running test as root in the container.
          - 'mv': A minimal move files command.
          - 'cp': A minimal copy files command.
@@ -148,7 +151,7 @@ int verbose = 0;
    * Simple, easy to review code (i.e. prefer simple naive code over
      complex efficient code)
 
-   * The current implementation ist parallel-make-safe, but only in
+   * The current implementation is parallel-make-safe, but only in
      that it uses a lock to prevent parallel access to the testroot.  */
 
 \f
@@ -227,11 +230,37 @@ concat (const char *str, ...)
   return bufs[n];
 }
 
+/* Like the above, but put spaces between words.  Caller frees.  */
+static char *
+concat_words (char **words, int num_words)
+{
+  int len = 0;
+  int i;
+  char *rv, *p;
+
+  for (i = 0; i < num_words; i ++)
+    {
+      len += strlen (words[i]);
+      len ++;
+    }
+
+  p = rv = (char *) xmalloc (len);
+
+  for (i = 0; i < num_words; i ++)
+    {
+      if (i > 0)
+	p = stpcpy (p, " ");
+      p = stpcpy (p, words[i]);
+    }
+
+  return rv;
+}
+
 /* Try to mount SRC onto DEST.  */
 static void
 trymount (const char *src, const char *dest)
 {
-  if (mount (src, dest, "", MS_BIND, NULL) < 0)
+  if (mount (src, dest, "", MS_BIND | MS_REC, NULL) < 0)
     FAIL_EXIT1 ("can't mount %s onto %s\n", src, dest);
 }
 
@@ -726,6 +755,9 @@ main (int argc, char **argv)
   gid_t original_gid;
   /* If set, the test runs as root instead of the user running the testsuite.  */
   int be_su = 0;
+  int require_pidns = 0;
+  const char *pidns_comment = NULL;
+  int do_proc_mounts = 0;
   int UMAP;
   int GMAP;
   /* Used for "%lld %lld 1" so need not be large.  */
@@ -1011,6 +1043,12 @@ main (int argc, char **argv)
 	      {
 		be_su = 1;
 	      }
+	    else if (nt >= 1 && strcmp (the_words[0], "pidns") == 0)
+	      {
+		require_pidns = 1;
+		if (nt > 1)
+		  pidns_comment = concat_words (the_words + 1, nt - 1);
+	      }
 	    else if (nt == 3 && strcmp (the_words[0], "mkdirp") == 0)
 	      {
 		long int m;
@@ -1068,7 +1106,8 @@ main (int argc, char **argv)
 
 #ifdef CLONE_NEWNS
   /* The unshare here gives us our own spaces and capabilities.  */
-  if (unshare (CLONE_NEWUSER | CLONE_NEWPID | CLONE_NEWNS) < 0)
+  if (unshare (CLONE_NEWUSER | CLONE_NEWNS
+	       | (require_pidns ? CLONE_NEWPID : 0)) < 0)
     {
       /* Older kernels may not support all the options, or security
 	 policy may block this call.  */
@@ -1079,6 +1118,11 @@ main (int argc, char **argv)
 	    check_for_unshare_hints ();
 	  FAIL_UNSUPPORTED ("unable to unshare user/fs: %s", strerror (saved_errno));
 	}
+      /* We're about to exit anyway, it's "safe" to call unshare again
+	 just to see if the CLONE_NEWPID caused the error.  */
+      else if (require_pidns && unshare (CLONE_NEWUSER | CLONE_NEWNS) >= 0)
+	FAIL_EXIT1 ("unable to unshare pid ns: %s : %s", strerror (errno),
+		    pidns_comment ? pidns_comment : "required by test");
       else
 	FAIL_EXIT1 ("unable to unshare user/fs: %s", strerror (errno));
     }
@@ -1094,6 +1138,15 @@ main (int argc, char **argv)
   trymount (support_srcdir_root, new_srcdir_path);
   trymount (support_objdir_root, new_objdir_path);
 
+  /* It may not be possible to mount /proc directly.  */
+  if (! require_pidns)
+  {
+    char *new_proc = concat (new_root_path, "/proc", NULL);
+    xmkdirp (new_proc, 0755);
+    trymount ("/proc", new_proc);
+    do_proc_mounts = 1;
+  }
+
   xmkdirp (concat (new_root_path, "/dev", NULL), 0755);
   devmount (new_root_path, "null");
   devmount (new_root_path, "zero");
@@ -1163,42 +1216,60 @@ main (int argc, char **argv)
 
   maybe_xmkdir ("/tmp", 0755);
 
-  /* Now that we're pid 1 (effectively "root") we can mount /proc  */
-  maybe_xmkdir ("/proc", 0777);
-  if (mount ("proc", "/proc", "proc", 0, NULL) < 0)
-    FAIL_EXIT1 ("Unable to mount /proc: ");
-
-  /* We map our original UID to the same UID in the container so we
-     can own our own files normally.  */
-  UMAP = open ("/proc/self/uid_map", O_WRONLY);
-  if (UMAP < 0)
-    FAIL_EXIT1 ("can't write to /proc/self/uid_map\n");
-
-  sprintf (tmp, "%lld %lld 1\n",
-	   (long long) (be_su ? 0 : original_uid), (long long) original_uid);
-  write (UMAP, tmp, strlen (tmp));
-  xclose (UMAP);
-
-  /* We must disable setgroups () before we can map our groups, else we
-     get EPERM.  */
-  GMAP = open ("/proc/self/setgroups", O_WRONLY);
-  if (GMAP >= 0)
+  if (require_pidns)
     {
-      /* We support kernels old enough to not have this.  */
-      write (GMAP, "deny\n", 5);
-      xclose (GMAP);
+      /* Now that we're pid 1 (effectively "root") we can mount /proc  */
+      maybe_xmkdir ("/proc", 0777);
+      if (mount ("proc", "/proc", "proc", 0, NULL) != 0)
+	{
+	  /* This happens if we're trying to create a nested container,
+	     like if the build is running under podman, and we lack
+	     priviledges.
+
+	     Ideally we would WARN here, but that would just add noise to
+	     *every* test-container test, and the ones that care should
+	     have their own relevent diagnostics.
+
+	     FAIL_EXIT1 ("Unable to mount /proc: ");  */
+	}
+      else
+	do_proc_mounts = 1;
     }
 
-  /* We map our original GID to the same GID in the container so we
-     can own our own files normally.  */
-  GMAP = open ("/proc/self/gid_map", O_WRONLY);
-  if (GMAP < 0)
-    FAIL_EXIT1 ("can't write to /proc/self/gid_map\n");
+  if (do_proc_mounts)
+    {
+      /* We map our original UID to the same UID in the container so we
+	 can own our own files normally.  */
+      UMAP = open ("/proc/self/uid_map", O_WRONLY);
+      if (UMAP < 0)
+	FAIL_EXIT1 ("can't write to /proc/self/uid_map\n");
+
+      sprintf (tmp, "%lld %lld 1\n",
+	       (long long) (be_su ? 0 : original_uid), (long long) original_uid);
+      write (UMAP, tmp, strlen (tmp));
+      xclose (UMAP);
+
+      /* We must disable setgroups () before we can map our groups, else we
+	 get EPERM.  */
+      GMAP = open ("/proc/self/setgroups", O_WRONLY);
+      if (GMAP >= 0)
+	{
+	  /* We support kernels old enough to not have this.  */
+	  write (GMAP, "deny\n", 5);
+	  xclose (GMAP);
+	}
 
-  sprintf (tmp, "%lld %lld 1\n",
-	   (long long) (be_su ? 0 : original_gid), (long long) original_gid);
-  write (GMAP, tmp, strlen (tmp));
-  xclose (GMAP);
+      /* We map our original GID to the same GID in the container so we
+	 can own our own files normally.  */
+      GMAP = open ("/proc/self/gid_map", O_WRONLY);
+      if (GMAP < 0)
+	FAIL_EXIT1 ("can't write to /proc/self/gid_map\n");
+
+      sprintf (tmp, "%lld %lld 1\n",
+	       (long long) (be_su ? 0 : original_gid), (long long) original_gid);
+      write (GMAP, tmp, strlen (tmp));
+      xclose (GMAP);
+    }
 
   if (change_cwd)
     {


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [patch v6] Allow for unpriviledged nested containers
  2022-03-29  3:53   ` [patch v6] " DJ Delorie
@ 2022-04-04 14:17     ` Carlos O'Donell
  2022-04-04 18:34       ` DJ Delorie
  2022-04-05 15:57       ` Joseph Myers
  0 siblings, 2 replies; 8+ messages in thread
From: Carlos O'Donell @ 2022-04-04 14:17 UTC (permalink / raw)
  To: DJ Delorie, libc-alpha

On 3/28/22 23:53, DJ Delorie wrote:
> 
> [changes since v5: whether or not to isolate the pid namespace is now a
> test-specific "pidns" option, support_needs_proc() takes a "why" message
> for future readers]
> 
> If the build itself is run in a container, we may not be able to
> fully set up a nested container for test-container testing.
> Notably is the mounting of /proc, since it's critical that it
> be mounted from within the same PID namespace as its users, and
> thus cannot be bind mounted from outside the container like other
> mounts.
> 
> This patch defaults to using the parent's PID namespace instead of
> creating a new one, as this is more likely to be allowed.
> 
> If the test needs an isolated PID namespace, it should add the "pidns"
> command to its init script.

LGTM.

Reviewed-by: Carlos O'Donell <carlos@redhat.com>
 
> diff --git a/elf/tst-pldd.c b/elf/tst-pldd.c
> index f31f9956faa..378262504a5 100644
> --- a/elf/tst-pldd.c
> +++ b/elf/tst-pldd.c
> @@ -85,6 +85,8 @@ in_str_list (const char *libname, const char *const strlist[])
>  static int
>  do_test (void)
>  {
> +  support_need_proc ("needs /proc/sys/kernel/yama/ptrace_scope and /proc/$child");
> +

OK. Nice use of a message to print.

>    /* Check if our subprocess can be debugged with ptrace.  */
>    {
>      int ptrace_scope = support_ptrace_scope ();
> diff --git a/nptl/tst-pthread-getattr.c b/nptl/tst-pthread-getattr.c
> index d2ebf308ae7..3f6f76ea83b 100644
> --- a/nptl/tst-pthread-getattr.c
> +++ b/nptl/tst-pthread-getattr.c
> @@ -28,6 +28,8 @@
>  #include <unistd.h>
>  #include <inttypes.h>
>  
> +#include <support/support.h>
> +
>  /* There is an obscure bug in the kernel due to which RLIMIT_STACK is sometimes
>     returned as unlimited when it is not, which may cause this test to fail.
>     There is also the other case where RLIMIT_STACK is intentionally set as
> @@ -153,6 +155,8 @@ check_stack_top (void)
>  static int
>  do_test (void)
>  {
> +  support_need_proc ("Reads /proc/self/maps to get stack size.");

OK.

> +
>    pagesize = sysconf (_SC_PAGESIZE);
>    return check_stack_top ();
>  }
> diff --git a/nss/tst-reload2.c b/nss/tst-reload2.c
> index fb3b94a1fab..7df0ca740b6 100644
> --- a/nss/tst-reload2.c
> +++ b/nss/tst-reload2.c
> @@ -95,6 +95,8 @@ do_test (void)
>    char buf1[PATH_MAX];
>    char buf2[PATH_MAX];
>  
> +  support_need_proc ("Our xmkdirp fails if we can't map our uid, which requires /proc.");
> +

OK.

>    sprintf (buf1, "/subdir%s", support_slibdir_prefix);
>    xmkdirp (buf1, 0777);
>  
> diff --git a/support/Makefile b/support/Makefile
> index 5ddcb8d1581..f036a813048 100644
> --- a/support/Makefile
> +++ b/support/Makefile
> @@ -64,6 +64,7 @@ libsupport-routines = \
>    support_format_netent \
>    support_isolate_in_subprocess \
>    support_mutex_pi_monotonic \
> +  support_need_proc \

OK.

>    support_path_support_time64 \
>    support_process_state \
>    support_ptrace \
> diff --git a/support/support.h b/support/support.h
> index 73b9fc48f01..d20051da4d4 100644
> --- a/support/support.h
> +++ b/support/support.h
> @@ -91,6 +91,11 @@ char *support_quote_string (const char *);
>     regular file open for writing, and initially empty.  */
>  int support_descriptor_supports_holes (int fd);
>  
> +/* Predicates that a test requires a working /proc filesystem.  This
> +   call will exit with UNSUPPORTED if /proc is not available, printing
> +   WHY_MSG as part of the diagnostic.  */
> +void support_need_proc (const char *why_msg);

OK.

> +
>  /* Error-checking wrapper functions which terminate the process on
>     error.  */
>  
> diff --git a/support/support_need_proc.c b/support/support_need_proc.c
> new file mode 100644
> index 00000000000..9b4eab7539b
> --- /dev/null
> +++ b/support/support_need_proc.c
> @@ -0,0 +1,35 @@
> +/* Indicate that a test requires a working /proc.
> +   Copyright (C) 2022 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#include <unistd.h>
> +#include <support/check.h>
> +#include <support/support.h>
> +
> +/* We test for /proc/self/maps since that's one of the files that one
> +   of our tests actually uses, but the general idea is if Linux's
> +   /proc/ (procfs) filesystem is mounted.  If not, the process exits
> +   with an UNSUPPORTED result code.  */
> +
> +void
> +support_need_proc (const char *why_msg)
> +{
> +#ifdef __linux__
> +  if (access ("/proc/self/maps", R_OK))
> +    FAIL_UNSUPPORTED ("/proc is not available, %s", why_msg);
> +#endif

OK. Only does anything on linux (fixed from v5).

> +}
> diff --git a/support/test-container.c b/support/test-container.c
> index 25e7f142193..c837c4d7580 100644
> --- a/support/test-container.c
> +++ b/support/test-container.c
> @@ -97,6 +97,7 @@ int verbose = 0;
>     * mytest.root/mytest.script has a list of "commands" to run:
>         syntax:
>           # comment
> +	 pidns <comment>

Perfect.

This is a great way to avoid bitrot of this code while still making it functional.

>           su
>           mv FILE FILE
>  	 cp FILE FILE
> @@ -122,6 +123,8 @@ int verbose = 0;
>  
>         details:
>           - '#': A comment.
> +	 - 'pidns': Require a separate PID namespace, prints comment if it can't
> +	    (default is a shared pid namespace)

OK.

>           - 'su': Enables running test as root in the container.
>           - 'mv': A minimal move files command.
>           - 'cp': A minimal copy files command.
> @@ -148,7 +151,7 @@ int verbose = 0;
>     * Simple, easy to review code (i.e. prefer simple naive code over
>       complex efficient code)
>  
> -   * The current implementation ist parallel-make-safe, but only in
> +   * The current implementation is parallel-make-safe, but only in
>       that it uses a lock to prevent parallel access to the testroot.  */
>  
>  \f
> @@ -227,11 +230,37 @@ concat (const char *str, ...)
>    return bufs[n];
>  }
>  
> +/* Like the above, but put spaces between words.  Caller frees.  */
> +static char *
> +concat_words (char **words, int num_words)
> +{
> +  int len = 0;
> +  int i;
> +  char *rv, *p;
> +
> +  for (i = 0; i < num_words; i ++)
> +    {
> +      len += strlen (words[i]);
> +      len ++;
> +    }
> +
> +  p = rv = (char *) xmalloc (len);
> +
> +  for (i = 0; i < num_words; i ++)
> +    {
> +      if (i > 0)
> +	p = stpcpy (p, " ");
> +      p = stpcpy (p, words[i]);
> +    }
> +
> +  return rv;
> +}
> +
>  /* Try to mount SRC onto DEST.  */
>  static void
>  trymount (const char *src, const char *dest)
>  {
> -  if (mount (src, dest, "", MS_BIND, NULL) < 0)
> +  if (mount (src, dest, "", MS_BIND | MS_REC, NULL) < 0)
>      FAIL_EXIT1 ("can't mount %s onto %s\n", src, dest);
>  }
>  
> @@ -726,6 +755,9 @@ main (int argc, char **argv)
>    gid_t original_gid;
>    /* If set, the test runs as root instead of the user running the testsuite.  */
>    int be_su = 0;
> +  int require_pidns = 0;
> +  const char *pidns_comment = NULL;
> +  int do_proc_mounts = 0;
>    int UMAP;
>    int GMAP;
>    /* Used for "%lld %lld 1" so need not be large.  */
> @@ -1011,6 +1043,12 @@ main (int argc, char **argv)
>  	      {
>  		be_su = 1;
>  	      }
> +	    else if (nt >= 1 && strcmp (the_words[0], "pidns") == 0)
> +	      {
> +		require_pidns = 1;
> +		if (nt > 1)
> +		  pidns_comment = concat_words (the_words + 1, nt - 1);

OK.

> +	      }
>  	    else if (nt == 3 && strcmp (the_words[0], "mkdirp") == 0)
>  	      {
>  		long int m;
> @@ -1068,7 +1106,8 @@ main (int argc, char **argv)
>  
>  #ifdef CLONE_NEWNS
>    /* The unshare here gives us our own spaces and capabilities.  */
> -  if (unshare (CLONE_NEWUSER | CLONE_NEWPID | CLONE_NEWNS) < 0)
> +  if (unshare (CLONE_NEWUSER | CLONE_NEWNS
> +	       | (require_pidns ? CLONE_NEWPID : 0)) < 0)
>      {
>        /* Older kernels may not support all the options, or security
>  	 policy may block this call.  */
> @@ -1079,6 +1118,11 @@ main (int argc, char **argv)
>  	    check_for_unshare_hints ();
>  	  FAIL_UNSUPPORTED ("unable to unshare user/fs: %s", strerror (saved_errno));
>  	}
> +      /* We're about to exit anyway, it's "safe" to call unshare again
> +	 just to see if the CLONE_NEWPID caused the error.  */
> +      else if (require_pidns && unshare (CLONE_NEWUSER | CLONE_NEWNS) >= 0)
> +	FAIL_EXIT1 ("unable to unshare pid ns: %s : %s", strerror (errno),
> +		    pidns_comment ? pidns_comment : "required by test");
>        else
>  	FAIL_EXIT1 ("unable to unshare user/fs: %s", strerror (errno));
>      }
> @@ -1094,6 +1138,15 @@ main (int argc, char **argv)
>    trymount (support_srcdir_root, new_srcdir_path);
>    trymount (support_objdir_root, new_objdir_path);
>  
> +  /* It may not be possible to mount /proc directly.  */
> +  if (! require_pidns)
> +  {
> +    char *new_proc = concat (new_root_path, "/proc", NULL);
> +    xmkdirp (new_proc, 0755);
> +    trymount ("/proc", new_proc);
> +    do_proc_mounts = 1;
> +  }
> +
>    xmkdirp (concat (new_root_path, "/dev", NULL), 0755);
>    devmount (new_root_path, "null");
>    devmount (new_root_path, "zero");
> @@ -1163,42 +1216,60 @@ main (int argc, char **argv)
>  
>    maybe_xmkdir ("/tmp", 0755);
>  
> -  /* Now that we're pid 1 (effectively "root") we can mount /proc  */
> -  maybe_xmkdir ("/proc", 0777);
> -  if (mount ("proc", "/proc", "proc", 0, NULL) < 0)
> -    FAIL_EXIT1 ("Unable to mount /proc: ");
> -
> -  /* We map our original UID to the same UID in the container so we
> -     can own our own files normally.  */
> -  UMAP = open ("/proc/self/uid_map", O_WRONLY);
> -  if (UMAP < 0)
> -    FAIL_EXIT1 ("can't write to /proc/self/uid_map\n");
> -
> -  sprintf (tmp, "%lld %lld 1\n",
> -	   (long long) (be_su ? 0 : original_uid), (long long) original_uid);
> -  write (UMAP, tmp, strlen (tmp));
> -  xclose (UMAP);
> -
> -  /* We must disable setgroups () before we can map our groups, else we
> -     get EPERM.  */
> -  GMAP = open ("/proc/self/setgroups", O_WRONLY);
> -  if (GMAP >= 0)
> +  if (require_pidns)
>      {
> -      /* We support kernels old enough to not have this.  */
> -      write (GMAP, "deny\n", 5);
> -      xclose (GMAP);
> +      /* Now that we're pid 1 (effectively "root") we can mount /proc  */
> +      maybe_xmkdir ("/proc", 0777);
> +      if (mount ("proc", "/proc", "proc", 0, NULL) != 0)
> +	{
> +	  /* This happens if we're trying to create a nested container,
> +	     like if the build is running under podman, and we lack
> +	     priviledges.
> +
> +	     Ideally we would WARN here, but that would just add noise to
> +	     *every* test-container test, and the ones that care should
> +	     have their own relevent diagnostics.
> +
> +	     FAIL_EXIT1 ("Unable to mount /proc: ");  */
> +	}
> +      else
> +	do_proc_mounts = 1;
>      }
>  
> -  /* We map our original GID to the same GID in the container so we
> -     can own our own files normally.  */
> -  GMAP = open ("/proc/self/gid_map", O_WRONLY);
> -  if (GMAP < 0)
> -    FAIL_EXIT1 ("can't write to /proc/self/gid_map\n");
> +  if (do_proc_mounts)
> +    {
> +      /* We map our original UID to the same UID in the container so we
> +	 can own our own files normally.  */
> +      UMAP = open ("/proc/self/uid_map", O_WRONLY);
> +      if (UMAP < 0)
> +	FAIL_EXIT1 ("can't write to /proc/self/uid_map\n");
> +
> +      sprintf (tmp, "%lld %lld 1\n",
> +	       (long long) (be_su ? 0 : original_uid), (long long) original_uid);
> +      write (UMAP, tmp, strlen (tmp));
> +      xclose (UMAP);
> +
> +      /* We must disable setgroups () before we can map our groups, else we
> +	 get EPERM.  */
> +      GMAP = open ("/proc/self/setgroups", O_WRONLY);
> +      if (GMAP >= 0)
> +	{
> +	  /* We support kernels old enough to not have this.  */
> +	  write (GMAP, "deny\n", 5);
> +	  xclose (GMAP);
> +	}
>  
> -  sprintf (tmp, "%lld %lld 1\n",
> -	   (long long) (be_su ? 0 : original_gid), (long long) original_gid);
> -  write (GMAP, tmp, strlen (tmp));
> -  xclose (GMAP);
> +      /* We map our original GID to the same GID in the container so we
> +	 can own our own files normally.  */
> +      GMAP = open ("/proc/self/gid_map", O_WRONLY);
> +      if (GMAP < 0)
> +	FAIL_EXIT1 ("can't write to /proc/self/gid_map\n");
> +
> +      sprintf (tmp, "%lld %lld 1\n",
> +	       (long long) (be_su ? 0 : original_gid), (long long) original_gid);
> +      write (GMAP, tmp, strlen (tmp));
> +      xclose (GMAP);
> +    }
>  
>    if (change_cwd)
>      {
> 


-- 
Cheers,
Carlos.


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [patch v6] Allow for unpriviledged nested containers
  2022-04-04 14:17     ` Carlos O'Donell
@ 2022-04-04 18:34       ` DJ Delorie
  2022-04-05 15:57       ` Joseph Myers
  1 sibling, 0 replies; 8+ messages in thread
From: DJ Delorie @ 2022-04-04 18:34 UTC (permalink / raw)
  To: Carlos O'Donell; +Cc: libc-alpha


Thanks!  Pushed.


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [patch v6] Allow for unpriviledged nested containers
  2022-04-04 14:17     ` Carlos O'Donell
  2022-04-04 18:34       ` DJ Delorie
@ 2022-04-05 15:57       ` Joseph Myers
  2022-04-05 18:20         ` DJ Delorie
  1 sibling, 1 reply; 8+ messages in thread
From: Joseph Myers @ 2022-04-05 15:57 UTC (permalink / raw)
  To: Carlos O'Donell; +Cc: DJ Delorie, libc-alpha

On Mon, 4 Apr 2022, Carlos O'Donell via Libc-alpha wrote:

> > @@ -726,6 +755,9 @@ main (int argc, char **argv)
> >    gid_t original_gid;
> >    /* If set, the test runs as root instead of the user running the testsuite.  */
> >    int be_su = 0;
> > +  int require_pidns = 0;
> > +  const char *pidns_comment = NULL;
> > +  int do_proc_mounts = 0;

I'm seeing build failures for i686-gnu:

test-container.c: In function 'main':
test-container.c:759:15: error: variable 'pidns_comment' set but not used [-Werror=unused-but-set-variable]
  759 |   const char *pidns_comment = NULL;
      |               ^~~~~~~~~~~~~
cc1: all warnings being treated as errors

https://sourceware.org/pipermail/libc-testresults/2022q2/009529.html

-- 
Joseph S. Myers
joseph@codesourcery.com

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [patch v6] Allow for unpriviledged nested containers
  2022-04-05 15:57       ` Joseph Myers
@ 2022-04-05 18:20         ` DJ Delorie
  2022-04-11 14:33           ` Florian Weimer
  0 siblings, 1 reply; 8+ messages in thread
From: DJ Delorie @ 2022-04-05 18:20 UTC (permalink / raw)
  To: Joseph Myers; +Cc: carlos, libc-alpha


Maybe we need a Hurd trybot?  ;-)

I see two options: #ifdef the variable, or mark it attribute-unused.
Preferences?

diff --git a/support/test-container.c b/support/test-container.c
index c837c4d7580..79d0e052d1c 100644
--- a/support/test-container.c
+++ b/support/test-container.c
@@ -756,7 +756,9 @@ main (int argc, char **argv)
   /* If set, the test runs as root instead of the user running the testsuite.  */
   int be_su = 0;
   int require_pidns = 0;
+#ifdef CLONE_NEWNS
   const char *pidns_comment = NULL;
+#endif
   int do_proc_mounts = 0;
   int UMAP;
   int GMAP;
@@ -1046,8 +1048,10 @@ main (int argc, char **argv)
 	    else if (nt >= 1 && strcmp (the_words[0], "pidns") == 0)
 	      {
 		require_pidns = 1;
+#ifdef CLONE_NEWNS
 		if (nt > 1)
 		  pidns_comment = concat_words (the_words + 1, nt - 1);
+#endif
 	      }
 	    else if (nt == 3 && strcmp (the_words[0], "mkdirp") == 0)
 	      {


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [patch v6] Allow for unpriviledged nested containers
  2022-04-05 18:20         ` DJ Delorie
@ 2022-04-11 14:33           ` Florian Weimer
  0 siblings, 0 replies; 8+ messages in thread
From: Florian Weimer @ 2022-04-11 14:33 UTC (permalink / raw)
  To: DJ Delorie via Libc-alpha; +Cc: Joseph Myers, DJ Delorie

* DJ Delorie via Libc-alpha:

> Maybe we need a Hurd trybot?  ;-)
>
> I see two options: #ifdef the variable, or mark it attribute-unused.
> Preferences?
>
> diff --git a/support/test-container.c b/support/test-container.c
> index c837c4d7580..79d0e052d1c 100644
> --- a/support/test-container.c
> +++ b/support/test-container.c
> @@ -756,7 +756,9 @@ main (int argc, char **argv)
>    /* If set, the test runs as root instead of the user running the testsuite.  */
>    int be_su = 0;
>    int require_pidns = 0;
> +#ifdef CLONE_NEWNS
>    const char *pidns_comment = NULL;
> +#endif
>    int do_proc_mounts = 0;
>    int UMAP;
>    int GMAP;
> @@ -1046,8 +1048,10 @@ main (int argc, char **argv)
>  	    else if (nt >= 1 && strcmp (the_words[0], "pidns") == 0)
>  	      {
>  		require_pidns = 1;
> +#ifdef CLONE_NEWNS
>  		if (nt > 1)
>  		  pidns_comment = concat_words (the_words + 1, nt - 1);
> +#endif
>  	      }
>  	    else if (nt == 3 && strcmp (the_words[0], "mkdirp") == 0)
>  	      {

This also needs:

diff --git a/support/test-container.c b/support/test-container.c
index 79d0e052d1..9dde784bf0 100644
--- a/support/test-container.c
+++ b/support/test-container.c
@@ -231,7 +231,7 @@ concat (const char *str, ...)
 }
 
 /* Like the above, but put spaces between words.  Caller frees.  */
-static char *
+static inline char *
 concat_words (char **words, int num_words)
 {
   int len = 0;

Or something like it.

Thanks,
Florian


^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2022-04-11 14:33 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-03-11  3:38 [patch v5] Allow for unpriviledged nested containers DJ Delorie
2022-03-28 20:41 ` Carlos O'Donell
2022-03-29  3:53   ` [patch v6] " DJ Delorie
2022-04-04 14:17     ` Carlos O'Donell
2022-04-04 18:34       ` DJ Delorie
2022-04-05 15:57       ` Joseph Myers
2022-04-05 18:20         ` DJ Delorie
2022-04-11 14:33           ` Florian Weimer

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).