* [PATCH 1/3] stdlib: Use fixed buffer size for realpath (BZ #26241) @ 2020-08-10 20:48 Adhemerval Zanella 2020-08-10 20:48 ` [PATCH 2/3] stdlib: Enforce PATH_MAX on allocated realpath buffer Adhemerval Zanella ` (3 more replies) 0 siblings, 4 replies; 26+ messages in thread From: Adhemerval Zanella @ 2020-08-10 20:48 UTC (permalink / raw) To: libc-alpha; +Cc: Xiaoming Ni, Paul Eggert It uses both a fixed internal buffer with PATH_MAX size to read and copy the results of the readlink call. Also, if PATH_MAX is not defined it uses a default value of 1024 as for other stdlib implementations. The expected stack usage is about 8k on Linux where PATH_MAX is define as 4096 (plus some internal function usage for local variable). Checked on x86_64-linux-gnu and i686-linux-gnu. --- stdlib/Makefile | 3 +- stdlib/canonicalize.c | 38 +++--- stdlib/tst-canon-bz26341.c | 108 ++++++++++++++++++ support/support_set_small_thread_stack_size.c | 12 +- support/xthread.h | 2 + 5 files changed, 138 insertions(+), 25 deletions(-) create mode 100644 stdlib/tst-canon-bz26341.c diff --git a/stdlib/Makefile b/stdlib/Makefile index 4615f6dfe7..7093b8a584 100644 --- a/stdlib/Makefile +++ b/stdlib/Makefile @@ -87,7 +87,7 @@ tests := tst-strtol tst-strtod testmb testrand testsort testdiv \ tst-makecontext-align test-bz22786 tst-strtod-nan-sign \ tst-swapcontext1 tst-setcontext4 tst-setcontext5 \ tst-setcontext6 tst-setcontext7 tst-setcontext8 \ - tst-setcontext9 tst-bz20544 + tst-setcontext9 tst-bz20544 tst-canon-bz26341 tests-internal := tst-strtod1i tst-strtod3 tst-strtod4 tst-strtod5i \ tst-tls-atexit tst-tls-atexit-nodelete @@ -102,6 +102,7 @@ LDLIBS-test-atexit-race = $(shared-thread-library) LDLIBS-test-at_quick_exit-race = $(shared-thread-library) LDLIBS-test-cxa_atexit-race = $(shared-thread-library) LDLIBS-test-on_exit-race = $(shared-thread-library) +LDLIBS-tst-canon-bz26341 = $(shared-thread-library) LDLIBS-test-dlclose-exit-race = $(shared-thread-library) $(libdl) LDFLAGS-test-dlclose-exit-race = $(LDFLAGS-rdynamic) diff --git a/stdlib/canonicalize.c b/stdlib/canonicalize.c index cbd885a3c5..554ba221e4 100644 --- a/stdlib/canonicalize.c +++ b/stdlib/canonicalize.c @@ -28,6 +28,14 @@ #include <eloop-threshold.h> #include <shlib-compat.h> +#ifndef PATH_MAX +# ifdef MAXPATHLEN +# define PATH_MAX MAXPATHLEN +# else +# define PATH_MAX 1024 +# endif +#endif + /* Return the canonical absolute name of file NAME. A canonical name does not contain any `.', `..' components nor any repeated path separators ('/') or symlinks. All path components must exist. If @@ -42,9 +50,8 @@ char * __realpath (const char *name, char *resolved) { - char *rpath, *dest, *extra_buf = NULL; + char *rpath, *dest, extra_buf[PATH_MAX]; const char *start, *end, *rpath_limit; - long int path_max; int num_links = 0; if (name == NULL) @@ -65,27 +72,19 @@ __realpath (const char *name, char *resolved) return NULL; } -#ifdef PATH_MAX - path_max = PATH_MAX; -#else - path_max = __pathconf (name, _PC_PATH_MAX); - if (path_max <= 0) - path_max = 1024; -#endif - if (resolved == NULL) { - rpath = malloc (path_max); + rpath = malloc (PATH_MAX); if (rpath == NULL) return NULL; } else rpath = resolved; - rpath_limit = rpath + path_max; + rpath_limit = rpath + PATH_MAX; if (name[0] != '/') { - if (!__getcwd (rpath, path_max)) + if (!__getcwd (rpath, PATH_MAX)) { rpath[0] = '\0'; goto error; @@ -142,10 +141,10 @@ __realpath (const char *name, char *resolved) goto error; } new_size = rpath_limit - rpath; - if (end - start + 1 > path_max) + if (end - start + 1 > PATH_MAX) new_size += end - start + 1; else - new_size += path_max; + new_size += PATH_MAX; new_rpath = (char *) realloc (rpath, new_size); if (new_rpath == NULL) goto error; @@ -163,7 +162,7 @@ __realpath (const char *name, char *resolved) if (S_ISLNK (st.st_mode)) { - char *buf = __alloca (path_max); + char buf[PATH_MAX]; size_t len; if (++num_links > __eloop_threshold ()) @@ -172,16 +171,13 @@ __realpath (const char *name, char *resolved) goto error; } - n = __readlink (rpath, buf, path_max - 1); + n = __readlink (rpath, buf, sizeof (buf) - 1); if (n < 0) goto error; buf[n] = '\0'; - if (!extra_buf) - extra_buf = __alloca (path_max); - len = strlen (end); - if (path_max - n <= len) + if (PATH_MAX - n <= len) { __set_errno (ENAMETOOLONG); goto error; diff --git a/stdlib/tst-canon-bz26341.c b/stdlib/tst-canon-bz26341.c new file mode 100644 index 0000000000..63474bddaa --- /dev/null +++ b/stdlib/tst-canon-bz26341.c @@ -0,0 +1,108 @@ +/* Check if realpath does not consume extra stack space based on symlink + existance in the path (BZ #26341) + Copyright (C) 2020 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <stdlib.h> +#include <string.h> +#include <sys/param.h> + +#include <support/check.h> +#include <support/support.h> +#include <support/temp_file.h> +#include <support/xunistd.h> +#include <support/xthread.h> + +static char *filename; +static size_t filenamelen; +static char *linkname; + +static int +maxsymlinks (void) +{ +#ifdef MAXSYMLINKS + return MAXSYMLINKS; +#else + long int sysconf_symloop_max = sysconf (_SC_SYMLOOP_MAX); + return sysconf_symloop_max <= 0 + ? _POSIX_SYMLOOP_MAX + : sysconf_symloop_max; +#endif +} + +#ifndef PATH_MAX +# define PATH_MAX 1024 +#endif + +static void +create_link (void) +{ + int fd = create_temp_file ("tst-canon-bz26341", &filename); + TEST_VERIFY_EXIT (fd != -1); + xclose (fd); + + char *prevlink = filename; + int maxlinks = maxsymlinks (); + for (int i = 0; i < maxlinks; i++) + { + linkname = xasprintf ("%s%d", filename, i); + xsymlink (prevlink, linkname); + add_temp_file (linkname); + prevlink = linkname; + } + + filenamelen = strlen (filename); +} + +static void * +do_realpath (void *arg) +{ + /* Old implementation of realpath allocates a PATH_MAX using alloca + for each symlink in the path, leading to MAXSYMLINKS times PATH_MAX + maximum stack usage. + This stack allocations tries fill the thread allocated stack minus + both the thread (plus some slack) and the realpath (plus some slack). + If realpath uses more than 2 * PATH_MAX plus some slack it will trigger + a stackoverflow. */ + + const size_t realpath_usage = 2 * PATH_MAX + 1024; + const size_t thread_usage = 1 * PATH_MAX + 1024; + size_t stack_size = support_small_thread_stack_size () + - realpath_usage - thread_usage; + char stack[stack_size]; + char *resolved = stack + stack_size - thread_usage + 1024; + + char *p = realpath (linkname, resolved); + TEST_VERIFY (p != NULL); + TEST_COMPARE_BLOB (resolved, filenamelen, filename, filenamelen); + + return NULL; +} + +static int +do_test (void) +{ + create_link (); + + pthread_t th = xpthread_create (support_small_stack_thread_attribute (), + do_realpath, NULL); + xpthread_join (th); + + return 0; +} + +#include <support/test-driver.c> diff --git a/support/support_set_small_thread_stack_size.c b/support/support_set_small_thread_stack_size.c index 69d66e97db..74a0e38a72 100644 --- a/support/support_set_small_thread_stack_size.c +++ b/support/support_set_small_thread_stack_size.c @@ -20,8 +20,8 @@ #include <pthread.h> #include <support/xthread.h> -void -support_set_small_thread_stack_size (pthread_attr_t *attr) +size_t +support_small_thread_stack_size (void) { /* Some architectures have too small values for PTHREAD_STACK_MIN which cannot be used for creating threads. Ensure that the stack @@ -31,5 +31,11 @@ support_set_small_thread_stack_size (pthread_attr_t *attr) if (stack_size < PTHREAD_STACK_MIN) stack_size = PTHREAD_STACK_MIN; #endif - xpthread_attr_setstacksize (attr, stack_size); + return stack_size; +} + +void +support_set_small_thread_stack_size (pthread_attr_t *attr) +{ + xpthread_attr_setstacksize (attr, support_small_thread_stack_size ()); } diff --git a/support/xthread.h b/support/xthread.h index 05f8d4a7d9..6ba2f5a18b 100644 --- a/support/xthread.h +++ b/support/xthread.h @@ -78,6 +78,8 @@ void xpthread_attr_setguardsize (pthread_attr_t *attr, /* Set the stack size in ATTR to a small value, but still large enough to cover most internal glibc stack usage. */ void support_set_small_thread_stack_size (pthread_attr_t *attr); +/* Return the stack size used on support_set_small_thread_stack_size. */ +size_t support_small_thread_stack_size (void); /* Return a pointer to a thread attribute which requests a small stack. The caller must not free this pointer. */ -- 2.25.1 ^ permalink raw reply [flat|nested] 26+ messages in thread
* [PATCH 2/3] stdlib: Enforce PATH_MAX on allocated realpath buffer 2020-08-10 20:48 [PATCH 1/3] stdlib: Use fixed buffer size for realpath (BZ #26241) Adhemerval Zanella @ 2020-08-10 20:48 ` Adhemerval Zanella 2020-08-11 8:26 ` Florian Weimer 2020-08-11 9:48 ` Andreas Schwab 2020-08-10 20:48 ` [PATCH 3/3] linux: Optimize realpath stack usage Adhemerval Zanella ` (2 subsequent siblings) 3 siblings, 2 replies; 26+ messages in thread From: Adhemerval Zanella @ 2020-08-10 20:48 UTC (permalink / raw) To: libc-alpha; +Cc: Xiaoming Ni, Paul Eggert It removes the buffer resize for sizes larger than PATH_MAX in the case where the 'resolved' buffer is not specified. This allow assume realpath limit is PATH_MAX for all cases. Checked on x86_64-linux-gnu and i686-linux-gnu. --- stdlib/canonicalize.c | 30 +++++------------------------- 1 file changed, 5 insertions(+), 25 deletions(-) diff --git a/stdlib/canonicalize.c b/stdlib/canonicalize.c index 554ba221e4..91c30e38be 100644 --- a/stdlib/canonicalize.c +++ b/stdlib/canonicalize.c @@ -122,36 +122,16 @@ __realpath (const char *name, char *resolved) } else { - size_t new_size; - if (dest[-1] != '/') *dest++ = '/'; if (dest + (end - start) >= rpath_limit) { - ptrdiff_t dest_offset = dest - rpath; - char *new_rpath; - - if (resolved) - { - __set_errno (ENAMETOOLONG); - if (dest > rpath + 1) - dest--; - *dest = '\0'; - goto error; - } - new_size = rpath_limit - rpath; - if (end - start + 1 > PATH_MAX) - new_size += end - start + 1; - else - new_size += PATH_MAX; - new_rpath = (char *) realloc (rpath, new_size); - if (new_rpath == NULL) - goto error; - rpath = new_rpath; - rpath_limit = rpath + new_size; - - dest = rpath + dest_offset; + __set_errno (ENAMETOOLONG); + if (dest > rpath + 1) + dest--; + *dest = '\0'; + goto error; } dest = __mempcpy (dest, start, end - start); -- 2.25.1 ^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH 2/3] stdlib: Enforce PATH_MAX on allocated realpath buffer 2020-08-10 20:48 ` [PATCH 2/3] stdlib: Enforce PATH_MAX on allocated realpath buffer Adhemerval Zanella @ 2020-08-11 8:26 ` Florian Weimer 2020-08-11 9:54 ` Andreas Schwab 2020-08-11 9:48 ` Andreas Schwab 1 sibling, 1 reply; 26+ messages in thread From: Florian Weimer @ 2020-08-11 8:26 UTC (permalink / raw) To: Adhemerval Zanella via Libc-alpha; +Cc: Adhemerval Zanella, Xiaoming Ni * Adhemerval Zanella via Libc-alpha: > It removes the buffer resize for sizes larger than PATH_MAX in the > case where the 'resolved' buffer is not specified. This allow assume > realpath limit is PATH_MAX for all cases. Is that actually true, in the sense that the full path cannot be longer than PATH_MAX? I don't think Linux has such a restriction. One cannot open such files directly, but they can exist. ^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH 2/3] stdlib: Enforce PATH_MAX on allocated realpath buffer 2020-08-11 8:26 ` Florian Weimer @ 2020-08-11 9:54 ` Andreas Schwab 2020-08-11 10:24 ` Florian Weimer 0 siblings, 1 reply; 26+ messages in thread From: Andreas Schwab @ 2020-08-11 9:54 UTC (permalink / raw) To: Florian Weimer; +Cc: Adhemerval Zanella via Libc-alpha, Xiaoming Ni On Aug 11 2020, Florian Weimer wrote: > I don't think Linux has such a restriction. One cannot open such > files directly, but they can exist. You can open them with a relative name as long as cwd is nearer than PATH_MAX (or use openat with such a directory handle). Andreas. -- Andreas Schwab, schwab@linux-m68k.org GPG Key fingerprint = 7578 EB47 D4E5 4D69 2510 2552 DF73 E780 A9DA AEC1 "And now for something completely different." ^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH 2/3] stdlib: Enforce PATH_MAX on allocated realpath buffer 2020-08-11 9:54 ` Andreas Schwab @ 2020-08-11 10:24 ` Florian Weimer 2020-08-11 15:05 ` Adhemerval Zanella 0 siblings, 1 reply; 26+ messages in thread From: Florian Weimer @ 2020-08-11 10:24 UTC (permalink / raw) To: Andreas Schwab; +Cc: Adhemerval Zanella via Libc-alpha, Xiaoming Ni * Andreas Schwab: > On Aug 11 2020, Florian Weimer wrote: > >> I don't think Linux has such a restriction. One cannot open such >> files directly, but they can exist. > > You can open them with a relative name as long as cwd is nearer than > PATH_MAX (or use openat with such a directory handle). Yes, that's what I meant: one cannot use the full path directly. It has to be split up in some way. I still think realpath should report the full path for them, and not give up with an error. Similar for getcwd. ^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH 2/3] stdlib: Enforce PATH_MAX on allocated realpath buffer 2020-08-11 10:24 ` Florian Weimer @ 2020-08-11 15:05 ` Adhemerval Zanella 2020-08-11 15:37 ` Paul Eggert 2020-08-11 18:29 ` Florian Weimer 0 siblings, 2 replies; 26+ messages in thread From: Adhemerval Zanella @ 2020-08-11 15:05 UTC (permalink / raw) To: libc-alpha On 11/08/2020 07:24, Florian Weimer wrote: > * Andreas Schwab: > >> On Aug 11 2020, Florian Weimer wrote: >> >>> I don't think Linux has such a restriction. One cannot open such >>> files directly, but they can exist. >> >> You can open them with a relative name as long as cwd is nearer than >> PATH_MAX (or use openat with such a directory handle). > > Yes, that's what I meant: one cannot use the full path directly. It > has to be split up in some way. > > I still think realpath should report the full path for them, and not > give up with an error. Similar for getcwd. > The only issue I have with this approach is realpath has different semantic regarding maximum pathname returned whether you pass a 'resolved' buffer (which assume PATH_MAX). ^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH 2/3] stdlib: Enforce PATH_MAX on allocated realpath buffer 2020-08-11 15:05 ` Adhemerval Zanella @ 2020-08-11 15:37 ` Paul Eggert 2020-08-11 18:29 ` Florian Weimer 1 sibling, 0 replies; 26+ messages in thread From: Paul Eggert @ 2020-08-11 15:37 UTC (permalink / raw) To: libc-alpha On 8/11/20 8:05 AM, Adhemerval Zanella via Libc-alpha wrote: > The only issue I have with this approach is realpath has different semantic > regarding maximum pathname returned whether you pass a 'resolved' buffer > (which assume PATH_MAX). If realpath can successfully return a file name longer than PATH_MAX now, that suggests that it should continue to do so. ^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH 2/3] stdlib: Enforce PATH_MAX on allocated realpath buffer 2020-08-11 15:05 ` Adhemerval Zanella 2020-08-11 15:37 ` Paul Eggert @ 2020-08-11 18:29 ` Florian Weimer 1 sibling, 0 replies; 26+ messages in thread From: Florian Weimer @ 2020-08-11 18:29 UTC (permalink / raw) To: Adhemerval Zanella via Libc-alpha * Adhemerval Zanella via Libc-alpha: > On 11/08/2020 07:24, Florian Weimer wrote: >> * Andreas Schwab: >> >>> On Aug 11 2020, Florian Weimer wrote: >>> >>>> I don't think Linux has such a restriction. One cannot open such >>>> files directly, but they can exist. >>> >>> You can open them with a relative name as long as cwd is nearer than >>> PATH_MAX (or use openat with such a directory handle). >> >> Yes, that's what I meant: one cannot use the full path directly. It >> has to be split up in some way. >> >> I still think realpath should report the full path for them, and not >> give up with an error. Similar for getcwd. >> > > The only issue I have with this approach is realpath has different semantic > regarding maximum pathname returned whether you pass a 'resolved' buffer > (which assume PATH_MAX). This is just our attempt to avoid the inherent buffer overflows in this interface. We had to do something similar for readdir_r. ^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH 2/3] stdlib: Enforce PATH_MAX on allocated realpath buffer 2020-08-10 20:48 ` [PATCH 2/3] stdlib: Enforce PATH_MAX on allocated realpath buffer Adhemerval Zanella 2020-08-11 8:26 ` Florian Weimer @ 2020-08-11 9:48 ` Andreas Schwab 1 sibling, 0 replies; 26+ messages in thread From: Andreas Schwab @ 2020-08-11 9:48 UTC (permalink / raw) To: Adhemerval Zanella via Libc-alpha; +Cc: Adhemerval Zanella, Xiaoming Ni On Aug 10 2020, Adhemerval Zanella via Libc-alpha wrote: > It removes the buffer resize for sizes larger than PATH_MAX in the > case where the 'resolved' buffer is not specified. This allow assume > realpath limit is PATH_MAX for all cases. "allows to assume" or "assumes". Andreas. -- Andreas Schwab, schwab@linux-m68k.org GPG Key fingerprint = 7578 EB47 D4E5 4D69 2510 2552 DF73 E780 A9DA AEC1 "And now for something completely different." ^ permalink raw reply [flat|nested] 26+ messages in thread
* [PATCH 3/3] linux: Optimize realpath stack usage 2020-08-10 20:48 [PATCH 1/3] stdlib: Use fixed buffer size for realpath (BZ #26241) Adhemerval Zanella 2020-08-10 20:48 ` [PATCH 2/3] stdlib: Enforce PATH_MAX on allocated realpath buffer Adhemerval Zanella @ 2020-08-10 20:48 ` Adhemerval Zanella 2020-08-10 21:25 ` Paul Eggert 2020-08-11 9:46 ` Andreas Schwab 2020-08-11 0:32 ` [PATCH 1/3] stdlib: Use fixed buffer size for realpath (BZ #26241) Matt Turner 2020-08-11 3:00 ` Xiaoming Ni 3 siblings, 2 replies; 26+ messages in thread From: Adhemerval Zanella @ 2020-08-10 20:48 UTC (permalink / raw) To: libc-alpha; +Cc: Xiaoming Ni, Paul Eggert This optimizes the stack usage for success case (from ~8K to ~4k) and where 'resolved' input buffer is not provided. For ithe failure case when the 'resolved' buffer is provided, it requires use the generic strategy to find the path when EACESS or ENOENT is returned (this is a GNU extension not defined in the standard). Regarding syscalls usage, for a sucessful path without symlinks it trades 2 syscalls (getcwd/lstat) for 5 (openat, readlink, fstat, stat, and close). Its is slighter better if the input contains multiple symlinks (where Linux kernel tricks allows replace multiple lstats by only one readlink). For failure it depends whether the 'resolved' buffer is provided, which will call the old strategy (and thus requiring more syscalls in general). Checked on x86_64-linux-gnu and i686-linux-gnu. --- include/stdlib.h | 13 +++ stdlib/Makefile | 2 +- stdlib/canonicalize-internal.c | 136 ++++++++++++++++++++++++++++ stdlib/canonicalize.c | 141 +---------------------------- stdlib/realpath.c | 42 +++++++++ sysdeps/unix/sysv/linux/realpath.c | 65 +++++++++++++ 6 files changed, 258 insertions(+), 141 deletions(-) create mode 100644 stdlib/canonicalize-internal.c create mode 100644 stdlib/realpath.c create mode 100644 sysdeps/unix/sysv/linux/realpath.c diff --git a/include/stdlib.h b/include/stdlib.h index ffcefd7b85..dd51f66b26 100644 --- a/include/stdlib.h +++ b/include/stdlib.h @@ -20,6 +20,14 @@ # include <rtld-malloc.h> +# ifndef PATH_MAX +# ifdef MAXPATHLEN +# define PATH_MAX MAXPATHLEN +# else +# define PATH_MAX 1024 +# endif +# endif + extern __typeof (strtol_l) __strtol_l; extern __typeof (strtoul_l) __strtoul_l; extern __typeof (strtoll_l) __strtoll_l; @@ -92,6 +100,11 @@ extern int __unsetenv (const char *__name) attribute_hidden; extern int __clearenv (void) attribute_hidden; extern char *__mktemp (char *__template) __THROW __nonnull ((1)); extern char *__canonicalize_file_name (const char *__name); +extern _Bool __resolve_path (const char *name, char *resolved, + size_t path_max) + attribute_hidden; +extern char *__realpath_system (const char *name, char *resolved) + attribute_hidden; extern char *__realpath (const char *__name, char *__resolved); libc_hidden_proto (__realpath) extern int __ptsname_r (int __fd, char *__buf, size_t __buflen) diff --git a/stdlib/Makefile b/stdlib/Makefile index 7093b8a584..35ca04541f 100644 --- a/stdlib/Makefile +++ b/stdlib/Makefile @@ -53,7 +53,7 @@ routines := \ strtof strtod strtold \ strtof_l strtod_l strtold_l \ strtof_nan strtod_nan strtold_nan \ - system canonicalize \ + system realpath canonicalize canonicalize-internal \ a64l l64a \ rpmatch strfmon strfmon_l getsubopt xpg_basename fmtmsg \ strtoimax strtoumax wcstoimax wcstoumax \ diff --git a/stdlib/canonicalize-internal.c b/stdlib/canonicalize-internal.c new file mode 100644 index 0000000000..1b5f73a1cc --- /dev/null +++ b/stdlib/canonicalize-internal.c @@ -0,0 +1,136 @@ +/* Internal function for canonicalize absolute name of a given file. + Copyright (C) 1996-2020 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <stdbool.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <unistd.h> +#include <eloop-threshold.h> + +_Bool +__resolve_path (const char *name, char *resolved, size_t path_max) +{ + const char *start, *end; + char *rpath = resolved; + char *rpath_limit = rpath + path_max; + char *dest = resolved; + char extra_buf[PATH_MAX]; + int num_links = 0; + + if (name[0] != '/') + { + if (__getcwd (rpath, path_max) == NULL) + { + rpath[0] = '\0'; + return false; + } + dest = __rawmemchr (rpath, '\0'); + } + else + { + rpath[0] = '/'; + dest = rpath + 1; + } + + for (start = end = name; *start; start = end) + { + /* Skip sequence of multiple path-separators. */ + while (*start == '/') + ++start; + + /* Find end of path component. */ + for (end = start; *end && *end != '/'; ++end) + /* Nothing. */; + + if (end - start == 0) + break; + else if (end - start == 1 && start[0] == '.') + /* nothing */; + else if (end - start == 2 && start[0] == '.' && start[1] == '.') + { + /* Back up to previous component, ignore if at root already. */ + if (dest > rpath + 1) + while ((--dest)[-1] != '/'); + } + else + { + struct stat64 st; + + if (dest[-1] != '/') + *dest++ = '/'; + if (dest + (end - start) >= rpath_limit) + { + __set_errno (ENAMETOOLONG); + if (dest > rpath + 1) + dest--; + *dest = '\0'; + return false; + } + + dest = __mempcpy (dest, start, end - start); + *dest = '\0'; + + if (__lstat64 (rpath, &st) < 0) + return false; + + if (S_ISLNK (st.st_mode)) + { + if (++num_links > __eloop_threshold ()) + { + __set_errno (ELOOP); + return false; + } + + char buf[PATH_MAX]; + ssize_t n = __readlink (rpath, buf, sizeof (buf) - 1); + if (n < 0) + return false; + buf[n] = '\0'; + + size_t len = strlen (end); + if (path_max - n <= len) + { + __set_errno (ENAMETOOLONG); + return false; + } + + memmove (&extra_buf[n], end, len + 1); + name = end = memcpy (extra_buf, buf, n); + + if (buf[0] == '/') + dest = rpath + 1; /* It's an absolute symlink */ + else + /* Back up to previous component, ignore if at root already: */ + if (dest > rpath + 1) + while ((--dest)[-1] != '/'); + } + else if (!S_ISDIR (st.st_mode) && *end != '\0') + { + __set_errno (ENOTDIR); + return false; + } + } + } + + if (dest > rpath + 1 && dest[-1] == '/') + --dest; + *dest = '\0'; + + return true; +} diff --git a/stdlib/canonicalize.c b/stdlib/canonicalize.c index 91c30e38be..f4ab528a15 100644 --- a/stdlib/canonicalize.c +++ b/stdlib/canonicalize.c @@ -16,26 +16,11 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ -#include <assert.h> #include <stdlib.h> -#include <string.h> -#include <unistd.h> -#include <limits.h> -#include <sys/stat.h> #include <errno.h> -#include <stddef.h> -#include <eloop-threshold.h> #include <shlib-compat.h> -#ifndef PATH_MAX -# ifdef MAXPATHLEN -# define PATH_MAX MAXPATHLEN -# else -# define PATH_MAX 1024 -# endif -#endif - /* Return the canonical absolute name of file NAME. A canonical name does not contain any `.', `..' components nor any repeated path separators ('/') or symlinks. All path components must exist. If @@ -50,10 +35,6 @@ char * __realpath (const char *name, char *resolved) { - char *rpath, *dest, extra_buf[PATH_MAX]; - const char *start, *end, *rpath_limit; - int num_links = 0; - if (name == NULL) { /* As per Single Unix Specification V2 we must return an error if @@ -72,127 +53,7 @@ __realpath (const char *name, char *resolved) return NULL; } - if (resolved == NULL) - { - rpath = malloc (PATH_MAX); - if (rpath == NULL) - return NULL; - } - else - rpath = resolved; - rpath_limit = rpath + PATH_MAX; - - if (name[0] != '/') - { - if (!__getcwd (rpath, PATH_MAX)) - { - rpath[0] = '\0'; - goto error; - } - dest = __rawmemchr (rpath, '\0'); - } - else - { - rpath[0] = '/'; - dest = rpath + 1; - } - - for (start = end = name; *start; start = end) - { - struct stat64 st; - int n; - - /* Skip sequence of multiple path-separators. */ - while (*start == '/') - ++start; - - /* Find end of path component. */ - for (end = start; *end && *end != '/'; ++end) - /* Nothing. */; - - if (end - start == 0) - break; - else if (end - start == 1 && start[0] == '.') - /* nothing */; - else if (end - start == 2 && start[0] == '.' && start[1] == '.') - { - /* Back up to previous component, ignore if at root already. */ - if (dest > rpath + 1) - while ((--dest)[-1] != '/'); - } - else - { - if (dest[-1] != '/') - *dest++ = '/'; - - if (dest + (end - start) >= rpath_limit) - { - __set_errno (ENAMETOOLONG); - if (dest > rpath + 1) - dest--; - *dest = '\0'; - goto error; - } - - dest = __mempcpy (dest, start, end - start); - *dest = '\0'; - - if (__lxstat64 (_STAT_VER, rpath, &st) < 0) - goto error; - - if (S_ISLNK (st.st_mode)) - { - char buf[PATH_MAX]; - size_t len; - - if (++num_links > __eloop_threshold ()) - { - __set_errno (ELOOP); - goto error; - } - - n = __readlink (rpath, buf, sizeof (buf) - 1); - if (n < 0) - goto error; - buf[n] = '\0'; - - len = strlen (end); - if (PATH_MAX - n <= len) - { - __set_errno (ENAMETOOLONG); - goto error; - } - - /* Careful here, end may be a pointer into extra_buf... */ - memmove (&extra_buf[n], end, len + 1); - name = end = memcpy (extra_buf, buf, n); - - if (buf[0] == '/') - dest = rpath + 1; /* It's an absolute symlink */ - else - /* Back up to previous component, ignore if at root already: */ - if (dest > rpath + 1) - while ((--dest)[-1] != '/'); - } - else if (!S_ISDIR (st.st_mode) && *end != '\0') - { - __set_errno (ENOTDIR); - goto error; - } - } - } - if (dest > rpath + 1 && dest[-1] == '/') - --dest; - *dest = '\0'; - - assert (resolved == NULL || resolved == rpath); - return rpath; - -error: - assert (resolved == NULL || resolved == rpath); - if (resolved == NULL) - free (rpath); - return NULL; + return __realpath_system (name, resolved); } libc_hidden_def (__realpath) versioned_symbol (libc, __realpath, realpath, GLIBC_2_3); diff --git a/stdlib/realpath.c b/stdlib/realpath.c new file mode 100644 index 0000000000..1a70c658b7 --- /dev/null +++ b/stdlib/realpath.c @@ -0,0 +1,42 @@ +/* Return the canonical absolute name of a given file. + Copyright (C) 1996-2020 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <stdlib.h> +#include <errno.h> +#include <shlib-compat.h> + +char * +__realpath_system (const char *name, char *resolved) +{ + bool resolved_malloc = false; + if (resolved == NULL) + { + resolved = malloc (PATH_MAX); + if (resolved == NULL) + return NULL; + resolved_malloc = true; + } + + if (! __resolve_path (name, resolved, PATH_MAX)) + { + if (resolved_malloc) + free (resolved); + return NULL; + } + return resolved; +} diff --git a/sysdeps/unix/sysv/linux/realpath.c b/sysdeps/unix/sysv/linux/realpath.c new file mode 100644 index 0000000000..4c69011f92 --- /dev/null +++ b/sysdeps/unix/sysv/linux/realpath.c @@ -0,0 +1,65 @@ +/* Return the canonical absolute name of a given file. Linux version. + Copyright (C) 2020 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <stdlib.h> +#include <errno.h> +#include <not-cancel.h> +#include <shlib-compat.h> + +char * +__realpath_system (const char *name, char *resolved) +{ + int fd = __open64_nocancel (name, O_PATH | O_NONBLOCK | O_CLOEXEC); + if (fd == -1) + { + /* If the call fails with either EACCES or ENOENT and resolved_path is + not NULL, then the prefix of path that is not readable or does not + exist is returned in resolved_path. This is a GNU extension. */ + if (resolved != NULL) + __resolve_path (name, resolved, PATH_MAX); + return NULL; + } + + char procname[sizeof ("/proc/self/fd/") + 3 * sizeof (int)]; + *_fitoa_word (fd, __stpcpy (procname, "/proc/self/fd/"), 10, 0) = '\0'; + + char path[PATH_MAX]; + ssize_t len = __readlink (procname, path, sizeof (path) - 1); + if (len < 0) + { + __close_nocancel_nostatus (fd); + return NULL; + } + path[len] = '\0'; + + struct stat64 st; + fstat64 (fd, &st); + dev_t st_dev = st.st_dev; + ino_t st_ino = st.st_ino; + int r = stat64 (path, &st); + if (r == -1 || st.st_dev != st_dev || st.st_ino != st_ino) + { + if (r == 0) + __set_errno (ELOOP); + __close_nocancel_nostatus (fd); + return NULL; + } + + __close_nocancel_nostatus (fd); + return resolved != NULL ? strcpy (resolved, path) : __strdup (path); +} -- 2.25.1 ^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH 3/3] linux: Optimize realpath stack usage 2020-08-10 20:48 ` [PATCH 3/3] linux: Optimize realpath stack usage Adhemerval Zanella @ 2020-08-10 21:25 ` Paul Eggert 2020-08-11 14:14 ` Adhemerval Zanella 2020-08-11 16:46 ` Andreas Schwab 2020-08-11 9:46 ` Andreas Schwab 1 sibling, 2 replies; 26+ messages in thread From: Paul Eggert @ 2020-08-10 21:25 UTC (permalink / raw) To: Adhemerval Zanella, libc-alpha; +Cc: Xiaoming Ni On 8/10/20 1:48 PM, Adhemerval Zanella wrote: > Regarding syscalls usage, for a sucessful path without symlinks it > trades 2 syscalls (getcwd/lstat) for 5 (openat, readlink, fstat, stat, > and close). Thanks for looking into this. There's no need for the new __resolve_path function to call __lstat64. It does that only to determine whether the file is a symlink or a directory, and it can do the former with __readlink (which it's gonna do already) and the latter by going into the next iteration and seeing whether it gets NOTDIR. Avoiding __lstat64 means we don't have to worry about irrelevant EOVERFLOW hassles. In the Linux __realpath_system, the code should be prepared for the /proc syscall to fail because /proc is not mounted. It can fall back on __resolve_path in that case. The code should be prepared for the fstat64 to fail due to EOVERFLOW. And I'm not quite getting why the code is comparing fstat64's result to stat64's result -- perhaps explain this in a comment and how EOVERFLOW is dealt with? ^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH 3/3] linux: Optimize realpath stack usage 2020-08-10 21:25 ` Paul Eggert @ 2020-08-11 14:14 ` Adhemerval Zanella 2020-08-11 15:18 ` Adhemerval Zanella 2020-08-11 15:52 ` Paul Eggert 2020-08-11 16:46 ` Andreas Schwab 1 sibling, 2 replies; 26+ messages in thread From: Adhemerval Zanella @ 2020-08-11 14:14 UTC (permalink / raw) To: Paul Eggert, libc-alpha; +Cc: Xiaoming Ni On 10/08/2020 18:25, Paul Eggert wrote: > On 8/10/20 1:48 PM, Adhemerval Zanella wrote: > >> Regarding syscalls usage, for a sucessful path without symlinks it >> trades 2 syscalls (getcwd/lstat) for 5 (openat, readlink, fstat, stat, >> and close). > > Thanks for looking into this. > > There's no need for the new __resolve_path function to call __lstat64. It does that only to determine whether the file is a symlink or a directory, and it can do the former with __readlink (which it's gonna do already) and the latter by going into the next iteration and seeing whether it gets NOTDIR. Avoiding __lstat64 means we don't have to worry about irrelevant EOVERFLOW hassles. I am not sure we can skip the __lstat64 because we can't check if a subpath passed on __readlink is a directory or not (so we can move to next iteration in some cases). For instance the test 30 from stdlib/test-canon.c which issues: realpath ("./doesExist/someFile/", ...) On the directory with: mkdir ("doesExist", 0777) creat ("doesExist/someFile", 0777) By just issuing the readlink on (strace output): readlink(".../doesExist", 0x7fff32598060, 4095) = -1 EINVAL We can't see that is a S_ISDIR to go for next iteration (and then fail on next iteration since "../doesExist/someFile' is not a directory end *end != '\0'). > > In the Linux __realpath_system, the code should be prepared for the /proc syscall to fail because /proc is not mounted. It can fall back on __resolve_path in that case. The code should be prepared for the fstat64 to fail due to EOVERFLOW. And I'm not quite getting why the code is comparing fstat64's result to stat64's result -- perhaps explain this in a comment and how EOVERFLOW is dealt with? Right, I though about the '/proc' failure and decided to fail based on on other implementations (fexecve fallback, fchmodat), but the __resolve_path should be ok to use in such case. For fstat64, afaik EOVERFLOW is returned only when called non-LFS interface which is not this case. Do you think re really should handle EOVERFLOW ? The fstat64/stat64 comparison is just a small check to see if the file was removed between calls. Thinking twice I am not sure how effective this really is, maybe remove it? ^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH 3/3] linux: Optimize realpath stack usage 2020-08-11 14:14 ` Adhemerval Zanella @ 2020-08-11 15:18 ` Adhemerval Zanella 2020-08-11 15:52 ` Paul Eggert 1 sibling, 0 replies; 26+ messages in thread From: Adhemerval Zanella @ 2020-08-11 15:18 UTC (permalink / raw) To: Paul Eggert, libc-alpha; +Cc: Xiaoming Ni On 11/08/2020 11:14, Adhemerval Zanella wrote: > > > On 10/08/2020 18:25, Paul Eggert wrote: >> On 8/10/20 1:48 PM, Adhemerval Zanella wrote: >> >>> Regarding syscalls usage, for a sucessful path without symlinks it >>> trades 2 syscalls (getcwd/lstat) for 5 (openat, readlink, fstat, stat, >>> and close). >> >> Thanks for looking into this. >> >> There's no need for the new __resolve_path function to call __lstat64. It does that only to determine whether the file is a symlink or a directory, and it can do the former with __readlink (which it's gonna do already) and the latter by going into the next iteration and seeing whether it gets NOTDIR. Avoiding __lstat64 means we don't have to worry about irrelevant EOVERFLOW hassles. > > I am not sure we can skip the __lstat64 because we can't check if a subpath > passed on __readlink is a directory or not (so we can move to next iteration > in some cases). > > For instance the test 30 from stdlib/test-canon.c which issues: > > realpath ("./doesExist/someFile/", ...) And I just checked your patch on BZ#24970 and it shows the issues: $ grep ^FAIL stdlib/subdir-tests.sum FAIL: stdlib/test-canon $ cat stdlib/test-canon.out /home/azanella/Projects/glibc/build/x86_64-linux-gnu/stdlib/test-canon: flunked test 30 (expected `NULL', got `/home/azanella/Projects/glibc/build/x86_64-linux-gnu/stdlib/doesExist/someFile') /home/azanella/Projects/glibc/build/x86_64-linux-gnu/stdlib/test-canon: flunked test 31 (expected `NULL', got `/home/azanella/Projects/glibc/build/x86_64-linux-gnu/stdlib/doesExist') 2 errors. I couldn't figure out a way to have the expected realpath semantic by just using readlink. ^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH 3/3] linux: Optimize realpath stack usage 2020-08-11 14:14 ` Adhemerval Zanella 2020-08-11 15:18 ` Adhemerval Zanella @ 2020-08-11 15:52 ` Paul Eggert 2020-08-11 19:01 ` Adhemerval Zanella 1 sibling, 1 reply; 26+ messages in thread From: Paul Eggert @ 2020-08-11 15:52 UTC (permalink / raw) To: Adhemerval Zanella, libc-alpha; +Cc: Xiaoming Ni On 8/11/20 7:14 AM, Adhemerval Zanella wrote: > For instance the test 30 from stdlib/test-canon.c which issues: > > realpath ("./doesExist/someFile/", ...) > > On the directory with: > > mkdir ("doesExist", 0777) > creat ("doesExist/someFile", 0777) > > By just issuing the readlink on (strace output): > > readlink(".../doesExist", 0x7fff32598060, 4095) = -1 EINVAL > > We can't see that is a S_ISDIR to go for next iteration (and then fail on next > iteration since "../doesExist/someFile' is not a directory end *end != '\0'). We don't need to test whether "doesExist" is a directory. The EINVAL tells us that it exists and is not a symlink. Therefore it is either a directory or a non-(symlink-or-directory). To discover whether it is a directory or a non-(symlink-or-directory), we go ahead with the next iteration: readlink ("doesExist/someFile", ...) If this fails with ENOTDIR, then "doesExist" was a non-(symlink-or-directory); if it succeeds (or fails with EINVAL) doesExist was a directory; otherwise can simply fail with whatever errno it fails with. > For fstat64, afaik EOVERFLOW is returned only when called non-LFS interface > which is not this case. Do you think re really should handle EOVERFLOW ? Ah, I hadn't thought of that. If EOVERFLOW is indeed impossible with fstat64 then we needn't worry about EOVERFLOW. > The fstat64/stat64 comparison is just a small check to see if the > file was removed between calls. Thinking twice I am not sure how > effective this really is, maybe remove it? Yes, let's remove that. realpath cannot possibly be atomic when it is issuing multiple syscalls, so there's no point to it trying to be "atomic" here. ^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH 3/3] linux: Optimize realpath stack usage 2020-08-11 15:52 ` Paul Eggert @ 2020-08-11 19:01 ` Adhemerval Zanella 0 siblings, 0 replies; 26+ messages in thread From: Adhemerval Zanella @ 2020-08-11 19:01 UTC (permalink / raw) To: Paul Eggert, libc-alpha; +Cc: Xiaoming Ni On 11/08/2020 12:52, Paul Eggert wrote: > On 8/11/20 7:14 AM, Adhemerval Zanella wrote: > >> For instance the test 30 from stdlib/test-canon.c which issues: >> >> realpath ("./doesExist/someFile/", ...) >> >> On the directory with: >> >> mkdir ("doesExist", 0777) >> creat ("doesExist/someFile", 0777) >> >> By just issuing the readlink on (strace output): >> >> readlink(".../doesExist", 0x7fff32598060, 4095) = -1 EINVAL >> >> We can't see that is a S_ISDIR to go for next iteration (and then fail on next >> iteration since "../doesExist/someFile' is not a directory end *end != '\0'). > > We don't need to test whether "doesExist" is a directory. The EINVAL tells us that it exists and is not a symlink. Therefore it is either a directory or a non-(symlink-or-directory). To discover whether it is a directory or a non-(symlink-or-directory), we go ahead with the next iteration: > > readlink ("doesExist/someFile", ...) > > If this fails with ENOTDIR, then "doesExist" was a non-(symlink-or-directory); if it succeeds (or fails with EINVAL) doesExist was a directory; otherwise can simply fail with whatever errno it fails with. The issue seems that readlink does not fail with ENOTDIR if 'doesExist/someFile' is indeed a file: readlink("/home/azanella/Projects/glibc/build/x86_64-linux-gnu/stdlib/doesExist/someFile", 0x7ffe2d3efb70, 4096) = -1 EINVAL (Invalid argument) We need to check with a final '/': readlink("/home/azanella/Projects/glibc/build/x86_64-linux-gnu/stdlib/doesExist/someFile/", 0x7ffee1893aa0, 4096) = -1 ENOTDIR (Not a directory) This is turn requires to change the loop to make this extra check with readlink with expected path (by appending the '/' when required). There is another issue which we also need to handle "./doesExist/someFile/..". But I have spent too much time for an small optimization not directly related to the Linux optimization. If you could make either current algorithm or the one on stdlib/canonicalize-internal.c no call lstat I can add on series (I don't have plan to spend more time on this readlink optimization). > >> For fstat64, afaik EOVERFLOW is returned only when called non-LFS interface >> which is not this case. Do you think re really should handle EOVERFLOW ? > > Ah, I hadn't thought of that. If EOVERFLOW is indeed impossible with fstat64 then we needn't worry about EOVERFLOW. > >> The fstat64/stat64 comparison is just a small check to see if the >> file was removed between calls. Thinking twice I am not sure how >> effective this really is, maybe remove it? > > Yes, let's remove that. realpath cannot possibly be atomic when it is issuing multiple syscalls, so there's no point to it trying to be "atomic" here. Ack. ^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH 3/3] linux: Optimize realpath stack usage 2020-08-10 21:25 ` Paul Eggert 2020-08-11 14:14 ` Adhemerval Zanella @ 2020-08-11 16:46 ` Andreas Schwab 2020-08-17 14:00 ` Dmitry V. Levin 1 sibling, 1 reply; 26+ messages in thread From: Andreas Schwab @ 2020-08-11 16:46 UTC (permalink / raw) To: Paul Eggert; +Cc: Adhemerval Zanella, libc-alpha, Xiaoming Ni On Aug 10 2020, Paul Eggert wrote: > In the Linux __realpath_system, the code should be prepared for the /proc > syscall to fail because /proc is not mounted. We already depend very much on /proc to be mounted, so this can be ignored. Andreas. -- Andreas Schwab, schwab@linux-m68k.org GPG Key fingerprint = 7578 EB47 D4E5 4D69 2510 2552 DF73 E780 A9DA AEC1 "And now for something completely different." ^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH 3/3] linux: Optimize realpath stack usage 2020-08-11 16:46 ` Andreas Schwab @ 2020-08-17 14:00 ` Dmitry V. Levin 2020-08-17 15:13 ` Andreas Schwab 0 siblings, 1 reply; 26+ messages in thread From: Dmitry V. Levin @ 2020-08-17 14:00 UTC (permalink / raw) To: Andreas Schwab; +Cc: Paul Eggert, Xiaoming Ni, libc-alpha On Tue, Aug 11, 2020 at 06:46:24PM +0200, Andreas Schwab wrote: > On Aug 10 2020, Paul Eggert wrote: > > > In the Linux __realpath_system, the code should be prepared for the /proc > > syscall to fail because /proc is not mounted. > > We already depend very much on /proc to be mounted, so this can be > ignored. Do we? -- ldv ^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH 3/3] linux: Optimize realpath stack usage 2020-08-17 14:00 ` Dmitry V. Levin @ 2020-08-17 15:13 ` Andreas Schwab 2020-08-17 16:17 ` Dmitry V. Levin 0 siblings, 1 reply; 26+ messages in thread From: Andreas Schwab @ 2020-08-17 15:13 UTC (permalink / raw) To: Dmitry V. Levin; +Cc: Paul Eggert, Xiaoming Ni, libc-alpha On Aug 17 2020, Dmitry V. Levin wrote: > On Tue, Aug 11, 2020 at 06:46:24PM +0200, Andreas Schwab wrote: >> On Aug 10 2020, Paul Eggert wrote: >> >> > In the Linux __realpath_system, the code should be prepared for the /proc >> > syscall to fail because /proc is not mounted. >> >> We already depend very much on /proc to be mounted, so this can be >> ignored. > > Do we? Yes, see <87blk9ry5d.fsf@oldenburg2.str.redhat.com> for example. Andreas. -- Andreas Schwab, schwab@linux-m68k.org GPG Key fingerprint = 7578 EB47 D4E5 4D69 2510 2552 DF73 E780 A9DA AEC1 "And now for something completely different." ^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH 3/3] linux: Optimize realpath stack usage 2020-08-17 15:13 ` Andreas Schwab @ 2020-08-17 16:17 ` Dmitry V. Levin 0 siblings, 0 replies; 26+ messages in thread From: Dmitry V. Levin @ 2020-08-17 16:17 UTC (permalink / raw) To: Andreas Schwab; +Cc: Paul Eggert, Xiaoming Ni, libc-alpha On Mon, Aug 17, 2020 at 05:13:44PM +0200, Andreas Schwab wrote: > On Aug 17 2020, Dmitry V. Levin wrote: > > On Tue, Aug 11, 2020 at 06:46:24PM +0200, Andreas Schwab wrote: > >> On Aug 10 2020, Paul Eggert wrote: > >> > >> > In the Linux __realpath_system, the code should be prepared for the /proc > >> > syscall to fail because /proc is not mounted. > >> > >> We already depend very much on /proc to be mounted, so this can be > >> ignored. > > > > Do we? > > Yes, see <87blk9ry5d.fsf@oldenburg2.str.redhat.com> for example. That's a regression, I filed a bug at https://sourceware.org/bugzilla/show_bug.cgi?id=26401 Thanks, -- ldv ^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH 3/3] linux: Optimize realpath stack usage 2020-08-10 20:48 ` [PATCH 3/3] linux: Optimize realpath stack usage Adhemerval Zanella 2020-08-10 21:25 ` Paul Eggert @ 2020-08-11 9:46 ` Andreas Schwab 1 sibling, 0 replies; 26+ messages in thread From: Andreas Schwab @ 2020-08-11 9:46 UTC (permalink / raw) To: Adhemerval Zanella via Libc-alpha; +Cc: Adhemerval Zanella, Xiaoming Ni On Aug 10 2020, Adhemerval Zanella via Libc-alpha wrote: > This optimizes the stack usage for success case (from ~8K to ~4k) and > where 'resolved' input buffer is not provided. For ithe failure case s/ithe/the/ Andreas. -- Andreas Schwab, schwab@linux-m68k.org GPG Key fingerprint = 7578 EB47 D4E5 4D69 2510 2552 DF73 E780 A9DA AEC1 "And now for something completely different." ^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH 1/3] stdlib: Use fixed buffer size for realpath (BZ #26241) 2020-08-10 20:48 [PATCH 1/3] stdlib: Use fixed buffer size for realpath (BZ #26241) Adhemerval Zanella 2020-08-10 20:48 ` [PATCH 2/3] stdlib: Enforce PATH_MAX on allocated realpath buffer Adhemerval Zanella 2020-08-10 20:48 ` [PATCH 3/3] linux: Optimize realpath stack usage Adhemerval Zanella @ 2020-08-11 0:32 ` Matt Turner 2020-08-11 3:00 ` Xiaoming Ni 3 siblings, 0 replies; 26+ messages in thread From: Matt Turner @ 2020-08-11 0:32 UTC (permalink / raw) To: Adhemerval Zanella; +Cc: GNU C Library, Xiaoming Ni > [PATCH 1/3] stdlib: Use fixed buffer size for realpath (BZ #26241) The correct bug number is #26341. ^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH 1/3] stdlib: Use fixed buffer size for realpath (BZ #26241) 2020-08-10 20:48 [PATCH 1/3] stdlib: Use fixed buffer size for realpath (BZ #26241) Adhemerval Zanella ` (2 preceding siblings ...) 2020-08-11 0:32 ` [PATCH 1/3] stdlib: Use fixed buffer size for realpath (BZ #26241) Matt Turner @ 2020-08-11 3:00 ` Xiaoming Ni 2020-08-11 14:57 ` Adhemerval Zanella 3 siblings, 1 reply; 26+ messages in thread From: Xiaoming Ni @ 2020-08-11 3:00 UTC (permalink / raw) To: Adhemerval Zanella, libc-alpha On 2020/8/11 4:48, Adhemerval Zanella wrote: > It uses both a fixed internal buffer with PATH_MAX size to read and > copy the results of the readlink call. > > Also, if PATH_MAX is not defined it uses a default value of 1024 > as for other stdlib implementations. > > The expected stack usage is about 8k on Linux where PATH_MAX is > define as 4096 (plus some internal function usage for local > variable). > > Checked on x86_64-linux-gnu and i686-linux-gnu. > --- > stdlib/Makefile | 3 +- > stdlib/canonicalize.c | 38 +++--- > stdlib/tst-canon-bz26341.c | 108 ++++++++++++++++++ > support/support_set_small_thread_stack_size.c | 12 +- > support/xthread.h | 2 + > 5 files changed, 138 insertions(+), 25 deletions(-) > create mode 100644 stdlib/tst-canon-bz26341.c > > diff --git a/stdlib/Makefile b/stdlib/Makefile > index 4615f6dfe7..7093b8a584 100644 > --- a/stdlib/Makefile > +++ b/stdlib/Makefile > @@ -87,7 +87,7 @@ tests := tst-strtol tst-strtod testmb testrand testsort testdiv \ > tst-makecontext-align test-bz22786 tst-strtod-nan-sign \ > tst-swapcontext1 tst-setcontext4 tst-setcontext5 \ > tst-setcontext6 tst-setcontext7 tst-setcontext8 \ > - tst-setcontext9 tst-bz20544 > + tst-setcontext9 tst-bz20544 tst-canon-bz26341 > > tests-internal := tst-strtod1i tst-strtod3 tst-strtod4 tst-strtod5i \ > tst-tls-atexit tst-tls-atexit-nodelete > @@ -102,6 +102,7 @@ LDLIBS-test-atexit-race = $(shared-thread-library) > LDLIBS-test-at_quick_exit-race = $(shared-thread-library) > LDLIBS-test-cxa_atexit-race = $(shared-thread-library) > LDLIBS-test-on_exit-race = $(shared-thread-library) > +LDLIBS-tst-canon-bz26341 = $(shared-thread-library) > > LDLIBS-test-dlclose-exit-race = $(shared-thread-library) $(libdl) > LDFLAGS-test-dlclose-exit-race = $(LDFLAGS-rdynamic) > diff --git a/stdlib/canonicalize.c b/stdlib/canonicalize.c > index cbd885a3c5..554ba221e4 100644 > --- a/stdlib/canonicalize.c > +++ b/stdlib/canonicalize.c > @@ -28,6 +28,14 @@ > #include <eloop-threshold.h> > #include <shlib-compat.h> > > +#ifndef PATH_MAX > +# ifdef MAXPATHLEN > +# define PATH_MAX MAXPATHLEN > +# else > +# define PATH_MAX 1024 > +# endif > +#endif > + > /* Return the canonical absolute name of file NAME. A canonical name > does not contain any `.', `..' components nor any repeated path > separators ('/') or symlinks. All path components must exist. If > @@ -42,9 +50,8 @@ > char * > __realpath (const char *name, char *resolved) > { > - char *rpath, *dest, *extra_buf = NULL; > + char *rpath, *dest, extra_buf[PATH_MAX]; Why does the 4 KB stack space need to be occupied? Even if there are no linked files ? ^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH 1/3] stdlib: Use fixed buffer size for realpath (BZ #26241) 2020-08-11 3:00 ` Xiaoming Ni @ 2020-08-11 14:57 ` Adhemerval Zanella 2020-08-12 1:38 ` Xiaoming Ni 0 siblings, 1 reply; 26+ messages in thread From: Adhemerval Zanella @ 2020-08-11 14:57 UTC (permalink / raw) To: Xiaoming Ni, libc-alpha On 11/08/2020 00:00, Xiaoming Ni wrote: > On 2020/8/11 4:48, Adhemerval Zanella wrote: >> It uses both a fixed internal buffer with PATH_MAX size to read and >> copy the results of the readlink call. >> >> Also, if PATH_MAX is not defined it uses a default value of 1024 >> as for other stdlib implementations. >> >> The expected stack usage is about 8k on Linux where PATH_MAX is >> define as 4096 (plus some internal function usage for local >> variable). >> >> Checked on x86_64-linux-gnu and i686-linux-gnu. >> --- >> stdlib/Makefile | 3 +- >> stdlib/canonicalize.c | 38 +++--- >> stdlib/tst-canon-bz26341.c | 108 ++++++++++++++++++ >> support/support_set_small_thread_stack_size.c | 12 +- >> support/xthread.h | 2 + >> 5 files changed, 138 insertions(+), 25 deletions(-) >> create mode 100644 stdlib/tst-canon-bz26341.c >> >> diff --git a/stdlib/Makefile b/stdlib/Makefile >> index 4615f6dfe7..7093b8a584 100644 >> --- a/stdlib/Makefile >> +++ b/stdlib/Makefile >> @@ -87,7 +87,7 @@ tests := tst-strtol tst-strtod testmb testrand testsort testdiv \ >> tst-makecontext-align test-bz22786 tst-strtod-nan-sign \ >> tst-swapcontext1 tst-setcontext4 tst-setcontext5 \ >> tst-setcontext6 tst-setcontext7 tst-setcontext8 \ >> - tst-setcontext9 tst-bz20544 >> + tst-setcontext9 tst-bz20544 tst-canon-bz26341 >> tests-internal := tst-strtod1i tst-strtod3 tst-strtod4 tst-strtod5i \ >> tst-tls-atexit tst-tls-atexit-nodelete >> @@ -102,6 +102,7 @@ LDLIBS-test-atexit-race = $(shared-thread-library) >> LDLIBS-test-at_quick_exit-race = $(shared-thread-library) >> LDLIBS-test-cxa_atexit-race = $(shared-thread-library) >> LDLIBS-test-on_exit-race = $(shared-thread-library) >> +LDLIBS-tst-canon-bz26341 = $(shared-thread-library) >> LDLIBS-test-dlclose-exit-race = $(shared-thread-library) $(libdl) >> LDFLAGS-test-dlclose-exit-race = $(LDFLAGS-rdynamic) >> diff --git a/stdlib/canonicalize.c b/stdlib/canonicalize.c >> index cbd885a3c5..554ba221e4 100644 >> --- a/stdlib/canonicalize.c >> +++ b/stdlib/canonicalize.c >> @@ -28,6 +28,14 @@ >> #include <eloop-threshold.h> >> #include <shlib-compat.h> >> +#ifndef PATH_MAX >> +# ifdef MAXPATHLEN >> +# define PATH_MAX MAXPATHLEN >> +# else >> +# define PATH_MAX 1024 >> +# endif >> +#endif >> + >> /* Return the canonical absolute name of file NAME. A canonical name >> does not contain any `.', `..' components nor any repeated path >> separators ('/') or symlinks. All path components must exist. If >> @@ -42,9 +50,8 @@ >> char * >> __realpath (const char *name, char *resolved) >> { >> - char *rpath, *dest, *extra_buf = NULL; >> + char *rpath, *dest, extra_buf[PATH_MAX]; > Why does the 4 KB stack space need to be occupied? Even if there are no linked files ? It does not, it is a simplification to avoid to decompose the function and handle symlinks in a special case. To avoid the stack allocation for common case would need to either to use dynamic allocation or adjust the function that once it founds a symlink, it calls another function to handle the loop with a stack allocated provided buffer. I don't think this extra code complexity really pays off. ^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH 1/3] stdlib: Use fixed buffer size for realpath (BZ #26241) 2020-08-11 14:57 ` Adhemerval Zanella @ 2020-08-12 1:38 ` Xiaoming Ni 2020-08-12 23:04 ` Adhemerval Zanella 0 siblings, 1 reply; 26+ messages in thread From: Xiaoming Ni @ 2020-08-12 1:38 UTC (permalink / raw) To: Adhemerval Zanella, libc-alpha On 2020/8/11 22:57, Adhemerval Zanella wrote: > > > On 11/08/2020 00:00, Xiaoming Ni wrote: >> On 2020/8/11 4:48, Adhemerval Zanella wrote: >>> It uses both a fixed internal buffer with PATH_MAX size to read and >>> copy the results of the readlink call. >>> >>> Also, if PATH_MAX is not defined it uses a default value of 1024 >>> as for other stdlib implementations. >>> >>> The expected stack usage is about 8k on Linux where PATH_MAX is >>> define as 4096 (plus some internal function usage for local >>> variable). >>> >>> Checked on x86_64-linux-gnu and i686-linux-gnu. >>> --- >>> stdlib/Makefile | 3 +- >>> stdlib/canonicalize.c | 38 +++--- >>> stdlib/tst-canon-bz26341.c | 108 ++++++++++++++++++ >>> support/support_set_small_thread_stack_size.c | 12 +- >>> support/xthread.h | 2 + >>> 5 files changed, 138 insertions(+), 25 deletions(-) >>> create mode 100644 stdlib/tst-canon-bz26341.c >>> >>> diff --git a/stdlib/Makefile b/stdlib/Makefile >>> index 4615f6dfe7..7093b8a584 100644 >>> --- a/stdlib/Makefile >>> +++ b/stdlib/Makefile >>> @@ -87,7 +87,7 @@ tests := tst-strtol tst-strtod testmb testrand testsort testdiv \ >>> tst-makecontext-align test-bz22786 tst-strtod-nan-sign \ >>> tst-swapcontext1 tst-setcontext4 tst-setcontext5 \ >>> tst-setcontext6 tst-setcontext7 tst-setcontext8 \ >>> - tst-setcontext9 tst-bz20544 >>> + tst-setcontext9 tst-bz20544 tst-canon-bz26341 >>> tests-internal := tst-strtod1i tst-strtod3 tst-strtod4 tst-strtod5i \ >>> tst-tls-atexit tst-tls-atexit-nodelete >>> @@ -102,6 +102,7 @@ LDLIBS-test-atexit-race = $(shared-thread-library) >>> LDLIBS-test-at_quick_exit-race = $(shared-thread-library) >>> LDLIBS-test-cxa_atexit-race = $(shared-thread-library) >>> LDLIBS-test-on_exit-race = $(shared-thread-library) >>> +LDLIBS-tst-canon-bz26341 = $(shared-thread-library) >>> LDLIBS-test-dlclose-exit-race = $(shared-thread-library) $(libdl) >>> LDFLAGS-test-dlclose-exit-race = $(LDFLAGS-rdynamic) >>> diff --git a/stdlib/canonicalize.c b/stdlib/canonicalize.c >>> index cbd885a3c5..554ba221e4 100644 >>> --- a/stdlib/canonicalize.c >>> +++ b/stdlib/canonicalize.c >>> @@ -28,6 +28,14 @@ >>> #include <eloop-threshold.h> >>> #include <shlib-compat.h> >>> +#ifndef PATH_MAX >>> +# ifdef MAXPATHLEN >>> +# define PATH_MAX MAXPATHLEN >>> +# else >>> +# define PATH_MAX 1024 >>> +# endif >>> +#endif >>> + >>> /* Return the canonical absolute name of file NAME. A canonical name >>> does not contain any `.', `..' components nor any repeated path >>> separators ('/') or symlinks. All path components must exist. If >>> @@ -42,9 +50,8 @@ >>> char * >>> __realpath (const char *name, char *resolved) >>> { >>> - char *rpath, *dest, *extra_buf = NULL; >>> + char *rpath, *dest, extra_buf[PATH_MAX]; >> Why does the 4 KB stack space need to be occupied? Even if there are no linked files ? > > It does not, it is a simplification to avoid to decompose the function > and handle symlinks in a special case. To avoid the stack allocation > for common case would need to either to use dynamic allocation or > adjust the function that once it founds a symlink, it calls another > function to handle the loop with a stack allocated provided buffer. > I don't think this extra code complexity really pays off. Extract the symlinks processing as an independent function and move extra_buf and buf to the new independent function to avoid wasting 8 KB stack space when the realpath is used to process unlinked files. Is this better? Thanks Xiaoming Ni ^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH 1/3] stdlib: Use fixed buffer size for realpath (BZ #26241) 2020-08-12 1:38 ` Xiaoming Ni @ 2020-08-12 23:04 ` Adhemerval Zanella 2020-08-13 20:29 ` Adhemerval Zanella 0 siblings, 1 reply; 26+ messages in thread From: Adhemerval Zanella @ 2020-08-12 23:04 UTC (permalink / raw) To: Xiaoming Ni, libc-alpha On 11/08/2020 22:38, Xiaoming Ni wrote: > On 2020/8/11 22:57, Adhemerval Zanella wrote: >> >> >> On 11/08/2020 00:00, Xiaoming Ni wrote: >>> On 2020/8/11 4:48, Adhemerval Zanella wrote: >>>> It uses both a fixed internal buffer with PATH_MAX size to read and >>>> copy the results of the readlink call. >>>> >>>> Also, if PATH_MAX is not defined it uses a default value of 1024 >>>> as for other stdlib implementations. >>>> >>>> The expected stack usage is about 8k on Linux where PATH_MAX is >>>> define as 4096 (plus some internal function usage for local >>>> variable). >>>> >>>> Checked on x86_64-linux-gnu and i686-linux-gnu. >>>> --- >>>> stdlib/Makefile | 3 +- >>>> stdlib/canonicalize.c | 38 +++--- >>>> stdlib/tst-canon-bz26341.c | 108 ++++++++++++++++++ >>>> support/support_set_small_thread_stack_size.c | 12 +- >>>> support/xthread.h | 2 + >>>> 5 files changed, 138 insertions(+), 25 deletions(-) >>>> create mode 100644 stdlib/tst-canon-bz26341.c >>>> >>>> diff --git a/stdlib/Makefile b/stdlib/Makefile >>>> index 4615f6dfe7..7093b8a584 100644 >>>> --- a/stdlib/Makefile >>>> +++ b/stdlib/Makefile >>>> @@ -87,7 +87,7 @@ tests := tst-strtol tst-strtod testmb testrand testsort testdiv \ >>>> tst-makecontext-align test-bz22786 tst-strtod-nan-sign \ >>>> tst-swapcontext1 tst-setcontext4 tst-setcontext5 \ >>>> tst-setcontext6 tst-setcontext7 tst-setcontext8 \ >>>> - tst-setcontext9 tst-bz20544 >>>> + tst-setcontext9 tst-bz20544 tst-canon-bz26341 >>>> tests-internal := tst-strtod1i tst-strtod3 tst-strtod4 tst-strtod5i \ >>>> tst-tls-atexit tst-tls-atexit-nodelete >>>> @@ -102,6 +102,7 @@ LDLIBS-test-atexit-race = $(shared-thread-library) >>>> LDLIBS-test-at_quick_exit-race = $(shared-thread-library) >>>> LDLIBS-test-cxa_atexit-race = $(shared-thread-library) >>>> LDLIBS-test-on_exit-race = $(shared-thread-library) >>>> +LDLIBS-tst-canon-bz26341 = $(shared-thread-library) >>>> LDLIBS-test-dlclose-exit-race = $(shared-thread-library) $(libdl) >>>> LDFLAGS-test-dlclose-exit-race = $(LDFLAGS-rdynamic) >>>> diff --git a/stdlib/canonicalize.c b/stdlib/canonicalize.c >>>> index cbd885a3c5..554ba221e4 100644 >>>> --- a/stdlib/canonicalize.c >>>> +++ b/stdlib/canonicalize.c >>>> @@ -28,6 +28,14 @@ >>>> #include <eloop-threshold.h> >>>> #include <shlib-compat.h> >>>> +#ifndef PATH_MAX >>>> +# ifdef MAXPATHLEN >>>> +# define PATH_MAX MAXPATHLEN >>>> +# else >>>> +# define PATH_MAX 1024 >>>> +# endif >>>> +#endif >>>> + >>>> /* Return the canonical absolute name of file NAME. A canonical name >>>> does not contain any `.', `..' components nor any repeated path >>>> separators ('/') or symlinks. All path components must exist. If >>>> @@ -42,9 +50,8 @@ >>>> char * >>>> __realpath (const char *name, char *resolved) >>>> { >>>> - char *rpath, *dest, *extra_buf = NULL; >>>> + char *rpath, *dest, extra_buf[PATH_MAX]; >>> Why does the 4 KB stack space need to be occupied? Even if there are no linked files ? >> >> It does not, it is a simplification to avoid to decompose the function >> and handle symlinks in a special case. To avoid the stack allocation >> for common case would need to either to use dynamic allocation or >> adjust the function that once it founds a symlink, it calls another >> function to handle the loop with a stack allocated provided buffer. >> I don't think this extra code complexity really pays off. > > > Extract the symlinks processing as an independent function and move extra_buf and buf to the new independent function to avoid wasting 8 KB stack space when the realpath is used to process unlinked files. > Is this better? Yes, my only reservation is the complexity and possible code duplication to handle it. I can't see no easy way to accomplish it without duplicate the loop code (minus the 'extra_buf' alloca) and make the default loop calling it with the stack allocated extra_buf (and I would like to avoid this approach). Another possibility which I think it would be better it to use a scratch buffer and make some compromise with stack usage and heap allocation. The default 1024 bytes of the scratch buffer should hit mostly of the common calls (it is 1/4 of PATH_MAX), so malloc would be called only for large paths (which should be uncommon). We can also use a scratch buffer for the readlink as well, since we might infer the required size from the previous lstat call. With something like below we can make the realpath uses a stack of about ~1024 and ~2048 if the path contains symbolic link: --- diff --git a/stdlib/canonicalize.c b/stdlib/canonicalize.c index cbd885a3c5..dca160f523 100644 --- a/stdlib/canonicalize.c +++ b/stdlib/canonicalize.c @@ -25,9 +25,56 @@ #include <errno.h> #include <stddef.h> +#include <scratch_buffer.h> #include <eloop-threshold.h> #include <shlib-compat.h> +#ifndef PATH_MAX +# ifdef MAXPATHLEN +# define PATH_MAX MAXPATHLEN +# else +# define PATH_MAX 1024 +# endif +#endif + +static bool +realpath_readlink (const char *rpath, const char *end, size_t path_max, + size_t st_size, struct scratch_buffer *extra_buf) +{ + bool r = false; + + struct scratch_buffer buf; + scratch_buffer_init (&buf); + /* Add the terminating null byte. */ + if (!scratch_buffer_set_array_size (&buf, st_size + 1, sizeof (char))) + return false; + + ssize_t n = __readlink (rpath, buf.data, buf.length - 1); + if (n < 0) + goto out; + ((char *) buf.data)[n] = '\0'; + + size_t len = strlen (end); + if (path_max - n <= len) + { + __set_errno (ENAMETOOLONG); + goto out; + } + + if (!scratch_buffer_set_array_size (extra_buf, n + len + 1, sizeof (char))) + goto out; + + /* Careful here, end may be a pointer into extra_buf... */ + memmove ((char *) extra_buf->data + n, end, len + 1); + memcpy (extra_buf->data, buf.data, n); + + r = true; + +out: + scratch_buffer_free (&buf); + return r; +} + /* Return the canonical absolute name of file NAME. A canonical name does not contain any `.', `..' components nor any repeated path separators ('/') or symlinks. All path components must exist. If @@ -42,10 +89,13 @@ char * __realpath (const char *name, char *resolved) { - char *rpath, *dest, *extra_buf = NULL; + char *rpath, *dest; const char *start, *end, *rpath_limit; - long int path_max; + const size_t path_max = PATH_MAX; int num_links = 0; + struct scratch_buffer extra_buf; + + scratch_buffer_init (&extra_buf); if (name == NULL) { @@ -65,14 +115,6 @@ __realpath (const char *name, char *resolved) return NULL; } -#ifdef PATH_MAX - path_max = PATH_MAX; -#else - path_max = __pathconf (name, _PC_PATH_MAX); - if (path_max <= 0) - path_max = 1024; -#endif - if (resolved == NULL) { rpath = malloc (path_max); @@ -101,7 +143,6 @@ __realpath (const char *name, char *resolved) for (start = end = name; *start; start = end) { struct stat64 st; - int n; /* Skip sequence of multiple path-separators. */ while (*start == '/') @@ -163,35 +204,19 @@ __realpath (const char *name, char *resolved) if (S_ISLNK (st.st_mode)) { - char *buf = __alloca (path_max); - size_t len; - if (++num_links > __eloop_threshold ()) { __set_errno (ELOOP); goto error; } - n = __readlink (rpath, buf, path_max - 1); - if (n < 0) + if (! realpath_readlink (rpath, end, path_max, st.st_size, + &extra_buf)) goto error; - buf[n] = '\0'; - - if (!extra_buf) - extra_buf = __alloca (path_max); - len = strlen (end); - if (path_max - n <= len) - { - __set_errno (ENAMETOOLONG); - goto error; - } - - /* Careful here, end may be a pointer into extra_buf... */ - memmove (&extra_buf[n], end, len + 1); - name = end = memcpy (extra_buf, buf, n); + name = end = extra_buf.data; - if (buf[0] == '/') + if (((char *)extra_buf.data)[0] == '/') dest = rpath + 1; /* It's an absolute symlink */ else /* Back up to previous component, ignore if at root already: */ @@ -209,6 +234,8 @@ __realpath (const char *name, char *resolved) --dest; *dest = '\0'; + scratch_buffer_free (&extra_buf); + assert (resolved == NULL || resolved == rpath); return rpath; @@ -216,6 +243,7 @@ error: assert (resolved == NULL || resolved == rpath); if (resolved == NULL) free (rpath); + scratch_buffer_free (&extra_buf); return NULL; } libc_hidden_def (__realpath) ^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH 1/3] stdlib: Use fixed buffer size for realpath (BZ #26241) 2020-08-12 23:04 ` Adhemerval Zanella @ 2020-08-13 20:29 ` Adhemerval Zanella 0 siblings, 0 replies; 26+ messages in thread From: Adhemerval Zanella @ 2020-08-13 20:29 UTC (permalink / raw) To: Xiaoming Ni, libc-alpha On 12/08/2020 20:04, Adhemerval Zanella wrote: > > > On 11/08/2020 22:38, Xiaoming Ni wrote: > With something like below we can make the realpath uses a stack of about > ~1024 and ~2048 if the path contains symbolic link: > > @@ -163,35 +204,19 @@ __realpath (const char *name, char *resolved) > > if (S_ISLNK (st.st_mode)) > { > - char *buf = __alloca (path_max); > - size_t len; > - > if (++num_links > __eloop_threshold ()) > { > __set_errno (ELOOP); > goto error; > } > > - n = __readlink (rpath, buf, path_max - 1); > - if (n < 0) > + if (! realpath_readlink (rpath, end, path_max, st.st_size, > + &extra_buf)) Scratch that, unfortunately some Linux filesystems do not return the symlink target file size with a lstat call (procfs and sysfs for instance). So we need to either issue multiple readlink with different increasing buffers or assume PATH_MAX (as current algorithms does). I will send a v3 of my patch to use a scratch buffer. What I am not sure is if the Linux optimization is worth if the idea is use a small stack as possible for common cases since it trades some syscall by a higher stack usage. ^ permalink raw reply [flat|nested] 26+ messages in thread
end of thread, other threads:[~2020-08-17 16:17 UTC | newest] Thread overview: 26+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2020-08-10 20:48 [PATCH 1/3] stdlib: Use fixed buffer size for realpath (BZ #26241) Adhemerval Zanella 2020-08-10 20:48 ` [PATCH 2/3] stdlib: Enforce PATH_MAX on allocated realpath buffer Adhemerval Zanella 2020-08-11 8:26 ` Florian Weimer 2020-08-11 9:54 ` Andreas Schwab 2020-08-11 10:24 ` Florian Weimer 2020-08-11 15:05 ` Adhemerval Zanella 2020-08-11 15:37 ` Paul Eggert 2020-08-11 18:29 ` Florian Weimer 2020-08-11 9:48 ` Andreas Schwab 2020-08-10 20:48 ` [PATCH 3/3] linux: Optimize realpath stack usage Adhemerval Zanella 2020-08-10 21:25 ` Paul Eggert 2020-08-11 14:14 ` Adhemerval Zanella 2020-08-11 15:18 ` Adhemerval Zanella 2020-08-11 15:52 ` Paul Eggert 2020-08-11 19:01 ` Adhemerval Zanella 2020-08-11 16:46 ` Andreas Schwab 2020-08-17 14:00 ` Dmitry V. Levin 2020-08-17 15:13 ` Andreas Schwab 2020-08-17 16:17 ` Dmitry V. Levin 2020-08-11 9:46 ` Andreas Schwab 2020-08-11 0:32 ` [PATCH 1/3] stdlib: Use fixed buffer size for realpath (BZ #26241) Matt Turner 2020-08-11 3:00 ` Xiaoming Ni 2020-08-11 14:57 ` Adhemerval Zanella 2020-08-12 1:38 ` Xiaoming Ni 2020-08-12 23:04 ` Adhemerval Zanella 2020-08-13 20:29 ` Adhemerval Zanella
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).