public inbox for libc-alpha@sourceware.org
 help / color / mirror / Atom feed
* [PATCH 1/3] stdlib: Use fixed buffer size for realpath (BZ #26241)
@ 2020-08-10 20:48 Adhemerval Zanella
  2020-08-10 20:48 ` [PATCH 2/3] stdlib: Enforce PATH_MAX on allocated realpath buffer Adhemerval Zanella
                   ` (3 more replies)
  0 siblings, 4 replies; 26+ messages in thread
From: Adhemerval Zanella @ 2020-08-10 20:48 UTC (permalink / raw)
  To: libc-alpha; +Cc: Xiaoming Ni, Paul Eggert

It uses both a fixed internal buffer with PATH_MAX size to read and
copy the results of the readlink call.

Also, if PATH_MAX is not defined it uses a default value of 1024
as for other stdlib implementations.

The expected stack usage is about 8k on Linux where PATH_MAX is
define as 4096 (plus some internal function usage for local
variable).

Checked on x86_64-linux-gnu and i686-linux-gnu.
---
 stdlib/Makefile                               |   3 +-
 stdlib/canonicalize.c                         |  38 +++---
 stdlib/tst-canon-bz26341.c                    | 108 ++++++++++++++++++
 support/support_set_small_thread_stack_size.c |  12 +-
 support/xthread.h                             |   2 +
 5 files changed, 138 insertions(+), 25 deletions(-)
 create mode 100644 stdlib/tst-canon-bz26341.c

diff --git a/stdlib/Makefile b/stdlib/Makefile
index 4615f6dfe7..7093b8a584 100644
--- a/stdlib/Makefile
+++ b/stdlib/Makefile
@@ -87,7 +87,7 @@ tests		:= tst-strtol tst-strtod testmb testrand testsort testdiv   \
 		   tst-makecontext-align test-bz22786 tst-strtod-nan-sign \
 		   tst-swapcontext1 tst-setcontext4 tst-setcontext5 \
 		   tst-setcontext6 tst-setcontext7 tst-setcontext8 \
-		   tst-setcontext9 tst-bz20544
+		   tst-setcontext9 tst-bz20544 tst-canon-bz26341
 
 tests-internal	:= tst-strtod1i tst-strtod3 tst-strtod4 tst-strtod5i \
 		   tst-tls-atexit tst-tls-atexit-nodelete
@@ -102,6 +102,7 @@ LDLIBS-test-atexit-race = $(shared-thread-library)
 LDLIBS-test-at_quick_exit-race = $(shared-thread-library)
 LDLIBS-test-cxa_atexit-race = $(shared-thread-library)
 LDLIBS-test-on_exit-race = $(shared-thread-library)
+LDLIBS-tst-canon-bz26341 = $(shared-thread-library)
 
 LDLIBS-test-dlclose-exit-race = $(shared-thread-library) $(libdl)
 LDFLAGS-test-dlclose-exit-race = $(LDFLAGS-rdynamic)
diff --git a/stdlib/canonicalize.c b/stdlib/canonicalize.c
index cbd885a3c5..554ba221e4 100644
--- a/stdlib/canonicalize.c
+++ b/stdlib/canonicalize.c
@@ -28,6 +28,14 @@
 #include <eloop-threshold.h>
 #include <shlib-compat.h>
 
+#ifndef PATH_MAX
+# ifdef MAXPATHLEN
+#  define PATH_MAX MAXPATHLEN
+# else
+#  define PATH_MAX 1024
+# endif
+#endif
+
 /* Return the canonical absolute name of file NAME.  A canonical name
    does not contain any `.', `..' components nor any repeated path
    separators ('/') or symlinks.  All path components must exist.  If
@@ -42,9 +50,8 @@
 char *
 __realpath (const char *name, char *resolved)
 {
-  char *rpath, *dest, *extra_buf = NULL;
+  char *rpath, *dest, extra_buf[PATH_MAX];
   const char *start, *end, *rpath_limit;
-  long int path_max;
   int num_links = 0;
 
   if (name == NULL)
@@ -65,27 +72,19 @@ __realpath (const char *name, char *resolved)
       return NULL;
     }
 
-#ifdef PATH_MAX
-  path_max = PATH_MAX;
-#else
-  path_max = __pathconf (name, _PC_PATH_MAX);
-  if (path_max <= 0)
-    path_max = 1024;
-#endif
-
   if (resolved == NULL)
     {
-      rpath = malloc (path_max);
+      rpath = malloc (PATH_MAX);
       if (rpath == NULL)
 	return NULL;
     }
   else
     rpath = resolved;
-  rpath_limit = rpath + path_max;
+  rpath_limit = rpath + PATH_MAX;
 
   if (name[0] != '/')
     {
-      if (!__getcwd (rpath, path_max))
+      if (!__getcwd (rpath, PATH_MAX))
 	{
 	  rpath[0] = '\0';
 	  goto error;
@@ -142,10 +141,10 @@ __realpath (const char *name, char *resolved)
 		  goto error;
 		}
 	      new_size = rpath_limit - rpath;
-	      if (end - start + 1 > path_max)
+	      if (end - start + 1 > PATH_MAX)
 		new_size += end - start + 1;
 	      else
-		new_size += path_max;
+		new_size += PATH_MAX;
 	      new_rpath = (char *) realloc (rpath, new_size);
 	      if (new_rpath == NULL)
 		goto error;
@@ -163,7 +162,7 @@ __realpath (const char *name, char *resolved)
 
 	  if (S_ISLNK (st.st_mode))
 	    {
-	      char *buf = __alloca (path_max);
+	      char buf[PATH_MAX];
 	      size_t len;
 
 	      if (++num_links > __eloop_threshold ())
@@ -172,16 +171,13 @@ __realpath (const char *name, char *resolved)
 		  goto error;
 		}
 
-	      n = __readlink (rpath, buf, path_max - 1);
+	      n = __readlink (rpath, buf, sizeof (buf) - 1);
 	      if (n < 0)
 		goto error;
 	      buf[n] = '\0';
 
-	      if (!extra_buf)
-		extra_buf = __alloca (path_max);
-
 	      len = strlen (end);
-	      if (path_max - n <= len)
+	      if (PATH_MAX - n <= len)
 		{
 		  __set_errno (ENAMETOOLONG);
 		  goto error;
diff --git a/stdlib/tst-canon-bz26341.c b/stdlib/tst-canon-bz26341.c
new file mode 100644
index 0000000000..63474bddaa
--- /dev/null
+++ b/stdlib/tst-canon-bz26341.c
@@ -0,0 +1,108 @@
+/* Check if realpath does not consume extra stack space based on symlink
+   existance in the path (BZ #26341)
+   Copyright (C) 2020 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <stdlib.h>
+#include <string.h>
+#include <sys/param.h>
+
+#include <support/check.h>
+#include <support/support.h>
+#include <support/temp_file.h>
+#include <support/xunistd.h>
+#include <support/xthread.h>
+
+static char *filename;
+static size_t filenamelen;
+static char *linkname;
+
+static int
+maxsymlinks (void)
+{
+#ifdef MAXSYMLINKS
+  return MAXSYMLINKS;
+#else
+  long int sysconf_symloop_max = sysconf (_SC_SYMLOOP_MAX);
+  return sysconf_symloop_max <= 0
+	 ? _POSIX_SYMLOOP_MAX
+	 : sysconf_symloop_max;
+#endif
+}
+
+#ifndef PATH_MAX
+# define PATH_MAX 1024
+#endif
+
+static void
+create_link (void)
+{
+  int fd = create_temp_file ("tst-canon-bz26341", &filename);
+  TEST_VERIFY_EXIT (fd != -1);
+  xclose (fd);
+
+  char *prevlink = filename;
+  int maxlinks = maxsymlinks ();
+  for (int i = 0; i < maxlinks; i++)
+    {
+      linkname = xasprintf ("%s%d", filename, i);
+      xsymlink (prevlink, linkname);
+      add_temp_file (linkname);
+      prevlink = linkname;
+    }
+
+  filenamelen = strlen (filename);
+}
+
+static void *
+do_realpath (void *arg)
+{
+  /* Old implementation of realpath allocates a PATH_MAX using alloca
+     for each symlink in the path, leading to MAXSYMLINKS times PATH_MAX
+     maximum stack usage.
+     This stack allocations tries fill the thread allocated stack minus
+     both the thread (plus some slack) and the realpath (plus some slack).
+     If realpath uses more than 2 * PATH_MAX plus some slack it will trigger
+     a stackoverflow.  */
+
+  const size_t realpath_usage = 2 * PATH_MAX + 1024;
+  const size_t thread_usage = 1 * PATH_MAX + 1024;
+  size_t stack_size = support_small_thread_stack_size ()
+		      - realpath_usage - thread_usage;
+  char stack[stack_size];
+  char *resolved = stack + stack_size - thread_usage + 1024;
+
+  char *p = realpath (linkname, resolved);
+  TEST_VERIFY (p != NULL);
+  TEST_COMPARE_BLOB (resolved, filenamelen, filename, filenamelen);
+
+  return NULL;
+}
+
+static int
+do_test (void)
+{
+  create_link ();
+
+  pthread_t th = xpthread_create (support_small_stack_thread_attribute (),
+				  do_realpath, NULL);
+  xpthread_join (th);
+
+  return 0;
+}
+
+#include <support/test-driver.c>
diff --git a/support/support_set_small_thread_stack_size.c b/support/support_set_small_thread_stack_size.c
index 69d66e97db..74a0e38a72 100644
--- a/support/support_set_small_thread_stack_size.c
+++ b/support/support_set_small_thread_stack_size.c
@@ -20,8 +20,8 @@
 #include <pthread.h>
 #include <support/xthread.h>
 
-void
-support_set_small_thread_stack_size (pthread_attr_t *attr)
+size_t
+support_small_thread_stack_size (void)
 {
   /* Some architectures have too small values for PTHREAD_STACK_MIN
      which cannot be used for creating threads.  Ensure that the stack
@@ -31,5 +31,11 @@ support_set_small_thread_stack_size (pthread_attr_t *attr)
   if (stack_size < PTHREAD_STACK_MIN)
     stack_size = PTHREAD_STACK_MIN;
 #endif
-  xpthread_attr_setstacksize (attr, stack_size);
+  return stack_size;
+}
+
+void
+support_set_small_thread_stack_size (pthread_attr_t *attr)
+{
+  xpthread_attr_setstacksize (attr, support_small_thread_stack_size ());
 }
diff --git a/support/xthread.h b/support/xthread.h
index 05f8d4a7d9..6ba2f5a18b 100644
--- a/support/xthread.h
+++ b/support/xthread.h
@@ -78,6 +78,8 @@ void xpthread_attr_setguardsize (pthread_attr_t *attr,
 /* Set the stack size in ATTR to a small value, but still large enough
    to cover most internal glibc stack usage.  */
 void support_set_small_thread_stack_size (pthread_attr_t *attr);
+/* Return the stack size used on support_set_small_thread_stack_size.  */
+size_t support_small_thread_stack_size (void);
 
 /* Return a pointer to a thread attribute which requests a small
    stack.  The caller must not free this pointer.  */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 26+ messages in thread

* [PATCH 2/3] stdlib: Enforce PATH_MAX on allocated realpath buffer
  2020-08-10 20:48 [PATCH 1/3] stdlib: Use fixed buffer size for realpath (BZ #26241) Adhemerval Zanella
@ 2020-08-10 20:48 ` Adhemerval Zanella
  2020-08-11  8:26   ` Florian Weimer
  2020-08-11  9:48   ` Andreas Schwab
  2020-08-10 20:48 ` [PATCH 3/3] linux: Optimize realpath stack usage Adhemerval Zanella
                   ` (2 subsequent siblings)
  3 siblings, 2 replies; 26+ messages in thread
From: Adhemerval Zanella @ 2020-08-10 20:48 UTC (permalink / raw)
  To: libc-alpha; +Cc: Xiaoming Ni, Paul Eggert

It removes the buffer resize for sizes larger than PATH_MAX in the
case where the 'resolved' buffer is not specified.  This allow assume
realpath limit is PATH_MAX for all cases.

Checked on x86_64-linux-gnu and i686-linux-gnu.
---
 stdlib/canonicalize.c | 30 +++++-------------------------
 1 file changed, 5 insertions(+), 25 deletions(-)

diff --git a/stdlib/canonicalize.c b/stdlib/canonicalize.c
index 554ba221e4..91c30e38be 100644
--- a/stdlib/canonicalize.c
+++ b/stdlib/canonicalize.c
@@ -122,36 +122,16 @@ __realpath (const char *name, char *resolved)
 	}
       else
 	{
-	  size_t new_size;
-
 	  if (dest[-1] != '/')
 	    *dest++ = '/';
 
 	  if (dest + (end - start) >= rpath_limit)
 	    {
-	      ptrdiff_t dest_offset = dest - rpath;
-	      char *new_rpath;
-
-	      if (resolved)
-		{
-		  __set_errno (ENAMETOOLONG);
-		  if (dest > rpath + 1)
-		    dest--;
-		  *dest = '\0';
-		  goto error;
-		}
-	      new_size = rpath_limit - rpath;
-	      if (end - start + 1 > PATH_MAX)
-		new_size += end - start + 1;
-	      else
-		new_size += PATH_MAX;
-	      new_rpath = (char *) realloc (rpath, new_size);
-	      if (new_rpath == NULL)
-		goto error;
-	      rpath = new_rpath;
-	      rpath_limit = rpath + new_size;
-
-	      dest = rpath + dest_offset;
+	      __set_errno (ENAMETOOLONG);
+	      if (dest > rpath + 1)
+		dest--;
+	      *dest = '\0';
+	      goto error;
 	    }
 
 	  dest = __mempcpy (dest, start, end - start);
-- 
2.25.1


^ permalink raw reply	[flat|nested] 26+ messages in thread

* [PATCH 3/3] linux: Optimize realpath stack usage
  2020-08-10 20:48 [PATCH 1/3] stdlib: Use fixed buffer size for realpath (BZ #26241) Adhemerval Zanella
  2020-08-10 20:48 ` [PATCH 2/3] stdlib: Enforce PATH_MAX on allocated realpath buffer Adhemerval Zanella
@ 2020-08-10 20:48 ` Adhemerval Zanella
  2020-08-10 21:25   ` Paul Eggert
  2020-08-11  9:46   ` Andreas Schwab
  2020-08-11  0:32 ` [PATCH 1/3] stdlib: Use fixed buffer size for realpath (BZ #26241) Matt Turner
  2020-08-11  3:00 ` Xiaoming Ni
  3 siblings, 2 replies; 26+ messages in thread
From: Adhemerval Zanella @ 2020-08-10 20:48 UTC (permalink / raw)
  To: libc-alpha; +Cc: Xiaoming Ni, Paul Eggert

This optimizes the stack usage for success case (from ~8K to ~4k) and
where 'resolved' input buffer is not provided.  For ithe failure case
when the 'resolved' buffer is provided, it requires use the generic
strategy to find the path when EACESS or ENOENT is returned (this is
a GNU extension not defined in the standard).

Regarding syscalls usage, for a sucessful path without symlinks it
trades 2 syscalls (getcwd/lstat) for 5 (openat, readlink, fstat, stat,
 and close).  Its is slighter better if the input contains multiple
symlinks (where Linux kernel tricks allows replace multiple lstats by
only one readlink).  For failure it depends whether the 'resolved' buffer
is provided, which will call the old strategy (and thus requiring more
syscalls in general).

Checked on x86_64-linux-gnu and i686-linux-gnu.
---
 include/stdlib.h                   |  13 +++
 stdlib/Makefile                    |   2 +-
 stdlib/canonicalize-internal.c     | 136 ++++++++++++++++++++++++++++
 stdlib/canonicalize.c              | 141 +----------------------------
 stdlib/realpath.c                  |  42 +++++++++
 sysdeps/unix/sysv/linux/realpath.c |  65 +++++++++++++
 6 files changed, 258 insertions(+), 141 deletions(-)
 create mode 100644 stdlib/canonicalize-internal.c
 create mode 100644 stdlib/realpath.c
 create mode 100644 sysdeps/unix/sysv/linux/realpath.c

diff --git a/include/stdlib.h b/include/stdlib.h
index ffcefd7b85..dd51f66b26 100644
--- a/include/stdlib.h
+++ b/include/stdlib.h
@@ -20,6 +20,14 @@
 
 # include <rtld-malloc.h>
 
+# ifndef PATH_MAX
+#  ifdef MAXPATHLEN
+#   define PATH_MAX MAXPATHLEN
+#  else
+#   define PATH_MAX 1024
+#  endif
+# endif
+
 extern __typeof (strtol_l) __strtol_l;
 extern __typeof (strtoul_l) __strtoul_l;
 extern __typeof (strtoll_l) __strtoll_l;
@@ -92,6 +100,11 @@ extern int __unsetenv (const char *__name) attribute_hidden;
 extern int __clearenv (void) attribute_hidden;
 extern char *__mktemp (char *__template) __THROW __nonnull ((1));
 extern char *__canonicalize_file_name (const char *__name);
+extern _Bool __resolve_path (const char *name, char *resolved,
+			     size_t path_max)
+     attribute_hidden;
+extern char *__realpath_system (const char *name, char *resolved)
+     attribute_hidden;
 extern char *__realpath (const char *__name, char *__resolved);
 libc_hidden_proto (__realpath)
 extern int __ptsname_r (int __fd, char *__buf, size_t __buflen)
diff --git a/stdlib/Makefile b/stdlib/Makefile
index 7093b8a584..35ca04541f 100644
--- a/stdlib/Makefile
+++ b/stdlib/Makefile
@@ -53,7 +53,7 @@ routines	:=							      \
 	strtof strtod strtold						      \
 	strtof_l strtod_l strtold_l					      \
 	strtof_nan strtod_nan strtold_nan				      \
-	system canonicalize						      \
+	system realpath canonicalize canonicalize-internal		      \
 	a64l l64a							      \
 	rpmatch strfmon strfmon_l getsubopt xpg_basename fmtmsg		      \
 	strtoimax strtoumax wcstoimax wcstoumax				      \
diff --git a/stdlib/canonicalize-internal.c b/stdlib/canonicalize-internal.c
new file mode 100644
index 0000000000..1b5f73a1cc
--- /dev/null
+++ b/stdlib/canonicalize-internal.c
@@ -0,0 +1,136 @@
+/* Internal function for canonicalize absolute name of a given file.
+   Copyright (C) 1996-2020 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <stdbool.h>
+#include <string.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <unistd.h>
+#include <eloop-threshold.h>
+
+_Bool
+__resolve_path (const char *name, char *resolved, size_t path_max)
+{
+  const char *start, *end;
+  char *rpath = resolved;
+  char *rpath_limit = rpath + path_max;
+  char *dest = resolved;
+  char extra_buf[PATH_MAX];
+  int num_links = 0;
+
+  if (name[0] != '/')
+    {
+      if (__getcwd (rpath, path_max) == NULL)
+	{
+	  rpath[0] = '\0';
+	  return false;
+	}
+      dest = __rawmemchr (rpath, '\0');
+    }
+  else
+    {
+      rpath[0] = '/';
+      dest = rpath + 1;
+    }
+
+  for (start = end = name; *start; start = end)
+    {
+      /* Skip sequence of multiple path-separators.  */
+      while (*start == '/')
+	++start;
+
+      /* Find end of path component.  */
+      for (end = start; *end && *end != '/'; ++end)
+	/* Nothing.  */;
+
+      if (end - start == 0)
+	break;
+      else if (end - start == 1 && start[0] == '.')
+	/* nothing */;
+      else if (end - start == 2 && start[0] == '.' && start[1] == '.')
+	{
+	  /* Back up to previous component, ignore if at root already.  */
+	  if (dest > rpath + 1)
+	    while ((--dest)[-1] != '/');
+	}
+      else
+	{
+	  struct stat64 st;
+
+	  if (dest[-1] != '/')
+	    *dest++ = '/';
+	  if (dest + (end - start) >= rpath_limit)
+	    {
+	      __set_errno (ENAMETOOLONG);
+	      if (dest > rpath + 1)
+		dest--;
+	      *dest = '\0';
+	      return false;
+	    }
+
+	  dest = __mempcpy (dest, start, end - start);
+	  *dest = '\0';
+
+	  if (__lstat64 (rpath, &st) < 0)
+	    return false;
+
+	  if (S_ISLNK (st.st_mode))
+	    {
+	      if (++num_links > __eloop_threshold ())
+		{
+		  __set_errno (ELOOP);
+		  return false;
+		}
+
+	      char buf[PATH_MAX];
+	      ssize_t n = __readlink (rpath, buf, sizeof (buf) - 1);
+	      if (n < 0)
+		return false;
+	      buf[n] = '\0';
+
+	      size_t len = strlen (end);
+	      if (path_max - n <= len)
+		{
+		  __set_errno (ENAMETOOLONG);
+		  return false;
+		}
+
+	      memmove (&extra_buf[n], end, len + 1);
+	      name = end = memcpy (extra_buf, buf, n);
+
+	      if (buf[0] == '/')
+		dest = rpath + 1;	/* It's an absolute symlink */
+	      else
+		/* Back up to previous component, ignore if at root already: */
+		if (dest > rpath + 1)
+		  while ((--dest)[-1] != '/');
+	    }
+	  else if (!S_ISDIR (st.st_mode) && *end != '\0')
+	    {
+	      __set_errno (ENOTDIR);
+	      return false;
+	    }
+	}
+    }
+
+  if (dest > rpath + 1 && dest[-1] == '/')
+    --dest;
+  *dest = '\0';
+
+  return true;
+}
diff --git a/stdlib/canonicalize.c b/stdlib/canonicalize.c
index 91c30e38be..f4ab528a15 100644
--- a/stdlib/canonicalize.c
+++ b/stdlib/canonicalize.c
@@ -16,26 +16,11 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <assert.h>
 #include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-#include <limits.h>
-#include <sys/stat.h>
 #include <errno.h>
-#include <stddef.h>
 
-#include <eloop-threshold.h>
 #include <shlib-compat.h>
 
-#ifndef PATH_MAX
-# ifdef MAXPATHLEN
-#  define PATH_MAX MAXPATHLEN
-# else
-#  define PATH_MAX 1024
-# endif
-#endif
-
 /* Return the canonical absolute name of file NAME.  A canonical name
    does not contain any `.', `..' components nor any repeated path
    separators ('/') or symlinks.  All path components must exist.  If
@@ -50,10 +35,6 @@
 char *
 __realpath (const char *name, char *resolved)
 {
-  char *rpath, *dest, extra_buf[PATH_MAX];
-  const char *start, *end, *rpath_limit;
-  int num_links = 0;
-
   if (name == NULL)
     {
       /* As per Single Unix Specification V2 we must return an error if
@@ -72,127 +53,7 @@ __realpath (const char *name, char *resolved)
       return NULL;
     }
 
-  if (resolved == NULL)
-    {
-      rpath = malloc (PATH_MAX);
-      if (rpath == NULL)
-	return NULL;
-    }
-  else
-    rpath = resolved;
-  rpath_limit = rpath + PATH_MAX;
-
-  if (name[0] != '/')
-    {
-      if (!__getcwd (rpath, PATH_MAX))
-	{
-	  rpath[0] = '\0';
-	  goto error;
-	}
-      dest = __rawmemchr (rpath, '\0');
-    }
-  else
-    {
-      rpath[0] = '/';
-      dest = rpath + 1;
-    }
-
-  for (start = end = name; *start; start = end)
-    {
-      struct stat64 st;
-      int n;
-
-      /* Skip sequence of multiple path-separators.  */
-      while (*start == '/')
-	++start;
-
-      /* Find end of path component.  */
-      for (end = start; *end && *end != '/'; ++end)
-	/* Nothing.  */;
-
-      if (end - start == 0)
-	break;
-      else if (end - start == 1 && start[0] == '.')
-	/* nothing */;
-      else if (end - start == 2 && start[0] == '.' && start[1] == '.')
-	{
-	  /* Back up to previous component, ignore if at root already.  */
-	  if (dest > rpath + 1)
-	    while ((--dest)[-1] != '/');
-	}
-      else
-	{
-	  if (dest[-1] != '/')
-	    *dest++ = '/';
-
-	  if (dest + (end - start) >= rpath_limit)
-	    {
-	      __set_errno (ENAMETOOLONG);
-	      if (dest > rpath + 1)
-		dest--;
-	      *dest = '\0';
-	      goto error;
-	    }
-
-	  dest = __mempcpy (dest, start, end - start);
-	  *dest = '\0';
-
-	  if (__lxstat64 (_STAT_VER, rpath, &st) < 0)
-	    goto error;
-
-	  if (S_ISLNK (st.st_mode))
-	    {
-	      char buf[PATH_MAX];
-	      size_t len;
-
-	      if (++num_links > __eloop_threshold ())
-		{
-		  __set_errno (ELOOP);
-		  goto error;
-		}
-
-	      n = __readlink (rpath, buf, sizeof (buf) - 1);
-	      if (n < 0)
-		goto error;
-	      buf[n] = '\0';
-
-	      len = strlen (end);
-	      if (PATH_MAX - n <= len)
-		{
-		  __set_errno (ENAMETOOLONG);
-		  goto error;
-		}
-
-	      /* Careful here, end may be a pointer into extra_buf... */
-	      memmove (&extra_buf[n], end, len + 1);
-	      name = end = memcpy (extra_buf, buf, n);
-
-	      if (buf[0] == '/')
-		dest = rpath + 1;	/* It's an absolute symlink */
-	      else
-		/* Back up to previous component, ignore if at root already: */
-		if (dest > rpath + 1)
-		  while ((--dest)[-1] != '/');
-	    }
-	  else if (!S_ISDIR (st.st_mode) && *end != '\0')
-	    {
-	      __set_errno (ENOTDIR);
-	      goto error;
-	    }
-	}
-    }
-  if (dest > rpath + 1 && dest[-1] == '/')
-    --dest;
-  *dest = '\0';
-
-  assert (resolved == NULL || resolved == rpath);
-  return rpath;
-
-error:
-  assert (resolved == NULL || resolved == rpath);
-  if (resolved == NULL)
-    free (rpath);
-  return NULL;
+  return __realpath_system (name, resolved);
 }
 libc_hidden_def (__realpath)
 versioned_symbol (libc, __realpath, realpath, GLIBC_2_3);
diff --git a/stdlib/realpath.c b/stdlib/realpath.c
new file mode 100644
index 0000000000..1a70c658b7
--- /dev/null
+++ b/stdlib/realpath.c
@@ -0,0 +1,42 @@
+/* Return the canonical absolute name of a given file.
+   Copyright (C) 1996-2020 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <stdlib.h>
+#include <errno.h>
+#include <shlib-compat.h>
+
+char *
+__realpath_system (const char *name, char *resolved)
+{
+  bool resolved_malloc = false;
+  if (resolved == NULL)
+    {
+      resolved = malloc (PATH_MAX);
+      if (resolved == NULL)
+	return NULL;
+      resolved_malloc = true;
+    }
+
+  if (! __resolve_path (name, resolved, PATH_MAX))
+    {
+      if (resolved_malloc)
+      	free (resolved);
+      return NULL;
+    }
+  return resolved;
+}
diff --git a/sysdeps/unix/sysv/linux/realpath.c b/sysdeps/unix/sysv/linux/realpath.c
new file mode 100644
index 0000000000..4c69011f92
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/realpath.c
@@ -0,0 +1,65 @@
+/* Return the canonical absolute name of a given file.  Linux version.
+   Copyright (C) 2020 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <stdlib.h>
+#include <errno.h>
+#include <not-cancel.h>
+#include <shlib-compat.h>
+
+char *
+__realpath_system (const char *name, char *resolved)
+{
+  int fd = __open64_nocancel (name, O_PATH | O_NONBLOCK | O_CLOEXEC);
+  if (fd == -1)
+    {
+      /* If the call fails with either EACCES or ENOENT and resolved_path is
+	 not NULL, then the prefix of path that is not readable or does not
+	 exist is returned in resolved_path.  This is a GNU extension.  */
+      if (resolved != NULL)
+	__resolve_path (name, resolved, PATH_MAX);
+      return NULL;
+    }
+
+  char procname[sizeof ("/proc/self/fd/") + 3 * sizeof (int)];
+  *_fitoa_word (fd, __stpcpy (procname, "/proc/self/fd/"), 10, 0) = '\0';
+
+  char path[PATH_MAX];
+  ssize_t len = __readlink (procname, path, sizeof (path) - 1);
+  if (len < 0)
+    {
+      __close_nocancel_nostatus (fd);
+      return NULL;
+    }
+  path[len] = '\0';
+
+  struct stat64 st;
+  fstat64 (fd, &st);
+  dev_t st_dev = st.st_dev;
+  ino_t st_ino = st.st_ino;
+  int r = stat64 (path, &st);
+  if (r == -1 || st.st_dev != st_dev || st.st_ino != st_ino)
+    {
+      if (r == 0)
+	__set_errno (ELOOP);
+      __close_nocancel_nostatus (fd);
+      return NULL;
+    }
+
+  __close_nocancel_nostatus (fd);
+  return resolved != NULL ? strcpy (resolved, path) : __strdup (path);
+}
-- 
2.25.1


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 3/3] linux: Optimize realpath stack usage
  2020-08-10 20:48 ` [PATCH 3/3] linux: Optimize realpath stack usage Adhemerval Zanella
@ 2020-08-10 21:25   ` Paul Eggert
  2020-08-11 14:14     ` Adhemerval Zanella
  2020-08-11 16:46     ` Andreas Schwab
  2020-08-11  9:46   ` Andreas Schwab
  1 sibling, 2 replies; 26+ messages in thread
From: Paul Eggert @ 2020-08-10 21:25 UTC (permalink / raw)
  To: Adhemerval Zanella, libc-alpha; +Cc: Xiaoming Ni

On 8/10/20 1:48 PM, Adhemerval Zanella wrote:

> Regarding syscalls usage, for a sucessful path without symlinks it
> trades 2 syscalls (getcwd/lstat) for 5 (openat, readlink, fstat, stat,
>   and close). 

Thanks for looking into this.

There's no need for the new __resolve_path function to call __lstat64. It does 
that only to determine whether the file is a symlink or a directory, and it can 
do the former with __readlink (which it's gonna do already) and the latter by 
going into the next iteration and seeing whether it gets NOTDIR. Avoiding 
__lstat64 means we don't have to worry about irrelevant EOVERFLOW hassles.

In the Linux __realpath_system, the code should be prepared for the /proc 
syscall to fail because /proc is not mounted. It can fall back on __resolve_path 
in that case. The code should be prepared for the fstat64 to fail due to 
EOVERFLOW. And I'm not quite getting why the code is comparing fstat64's result 
to stat64's result -- perhaps explain this in a comment and how EOVERFLOW is 
dealt with?

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 1/3] stdlib: Use fixed buffer size for realpath (BZ #26241)
  2020-08-10 20:48 [PATCH 1/3] stdlib: Use fixed buffer size for realpath (BZ #26241) Adhemerval Zanella
  2020-08-10 20:48 ` [PATCH 2/3] stdlib: Enforce PATH_MAX on allocated realpath buffer Adhemerval Zanella
  2020-08-10 20:48 ` [PATCH 3/3] linux: Optimize realpath stack usage Adhemerval Zanella
@ 2020-08-11  0:32 ` Matt Turner
  2020-08-11  3:00 ` Xiaoming Ni
  3 siblings, 0 replies; 26+ messages in thread
From: Matt Turner @ 2020-08-11  0:32 UTC (permalink / raw)
  To: Adhemerval Zanella; +Cc: GNU C Library, Xiaoming Ni

> [PATCH 1/3] stdlib: Use fixed buffer size for realpath (BZ #26241)

The correct bug number is #26341.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 1/3] stdlib: Use fixed buffer size for realpath (BZ #26241)
  2020-08-10 20:48 [PATCH 1/3] stdlib: Use fixed buffer size for realpath (BZ #26241) Adhemerval Zanella
                   ` (2 preceding siblings ...)
  2020-08-11  0:32 ` [PATCH 1/3] stdlib: Use fixed buffer size for realpath (BZ #26241) Matt Turner
@ 2020-08-11  3:00 ` Xiaoming Ni
  2020-08-11 14:57   ` Adhemerval Zanella
  3 siblings, 1 reply; 26+ messages in thread
From: Xiaoming Ni @ 2020-08-11  3:00 UTC (permalink / raw)
  To: Adhemerval Zanella, libc-alpha

On 2020/8/11 4:48, Adhemerval Zanella wrote:
> It uses both a fixed internal buffer with PATH_MAX size to read and
> copy the results of the readlink call.
> 
> Also, if PATH_MAX is not defined it uses a default value of 1024
> as for other stdlib implementations.
> 
> The expected stack usage is about 8k on Linux where PATH_MAX is
> define as 4096 (plus some internal function usage for local
> variable).
> 
> Checked on x86_64-linux-gnu and i686-linux-gnu.
> ---
>   stdlib/Makefile                               |   3 +-
>   stdlib/canonicalize.c                         |  38 +++---
>   stdlib/tst-canon-bz26341.c                    | 108 ++++++++++++++++++
>   support/support_set_small_thread_stack_size.c |  12 +-
>   support/xthread.h                             |   2 +
>   5 files changed, 138 insertions(+), 25 deletions(-)
>   create mode 100644 stdlib/tst-canon-bz26341.c
> 
> diff --git a/stdlib/Makefile b/stdlib/Makefile
> index 4615f6dfe7..7093b8a584 100644
> --- a/stdlib/Makefile
> +++ b/stdlib/Makefile
> @@ -87,7 +87,7 @@ tests		:= tst-strtol tst-strtod testmb testrand testsort testdiv   \
>   		   tst-makecontext-align test-bz22786 tst-strtod-nan-sign \
>   		   tst-swapcontext1 tst-setcontext4 tst-setcontext5 \
>   		   tst-setcontext6 tst-setcontext7 tst-setcontext8 \
> -		   tst-setcontext9 tst-bz20544
> +		   tst-setcontext9 tst-bz20544 tst-canon-bz26341
>   
>   tests-internal	:= tst-strtod1i tst-strtod3 tst-strtod4 tst-strtod5i \
>   		   tst-tls-atexit tst-tls-atexit-nodelete
> @@ -102,6 +102,7 @@ LDLIBS-test-atexit-race = $(shared-thread-library)
>   LDLIBS-test-at_quick_exit-race = $(shared-thread-library)
>   LDLIBS-test-cxa_atexit-race = $(shared-thread-library)
>   LDLIBS-test-on_exit-race = $(shared-thread-library)
> +LDLIBS-tst-canon-bz26341 = $(shared-thread-library)
>   
>   LDLIBS-test-dlclose-exit-race = $(shared-thread-library) $(libdl)
>   LDFLAGS-test-dlclose-exit-race = $(LDFLAGS-rdynamic)
> diff --git a/stdlib/canonicalize.c b/stdlib/canonicalize.c
> index cbd885a3c5..554ba221e4 100644
> --- a/stdlib/canonicalize.c
> +++ b/stdlib/canonicalize.c
> @@ -28,6 +28,14 @@
>   #include <eloop-threshold.h>
>   #include <shlib-compat.h>
>   
> +#ifndef PATH_MAX
> +# ifdef MAXPATHLEN
> +#  define PATH_MAX MAXPATHLEN
> +# else
> +#  define PATH_MAX 1024
> +# endif
> +#endif
> +
>   /* Return the canonical absolute name of file NAME.  A canonical name
>      does not contain any `.', `..' components nor any repeated path
>      separators ('/') or symlinks.  All path components must exist.  If
> @@ -42,9 +50,8 @@
>   char *
>   __realpath (const char *name, char *resolved)
>   {
> -  char *rpath, *dest, *extra_buf = NULL;
> +  char *rpath, *dest, extra_buf[PATH_MAX];
Why does the 4 KB stack space need to be occupied?  Even if there are no 
linked files ?





^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 2/3] stdlib: Enforce PATH_MAX on allocated realpath buffer
  2020-08-10 20:48 ` [PATCH 2/3] stdlib: Enforce PATH_MAX on allocated realpath buffer Adhemerval Zanella
@ 2020-08-11  8:26   ` Florian Weimer
  2020-08-11  9:54     ` Andreas Schwab
  2020-08-11  9:48   ` Andreas Schwab
  1 sibling, 1 reply; 26+ messages in thread
From: Florian Weimer @ 2020-08-11  8:26 UTC (permalink / raw)
  To: Adhemerval Zanella via Libc-alpha; +Cc: Adhemerval Zanella, Xiaoming Ni

* Adhemerval Zanella via Libc-alpha:

> It removes the buffer resize for sizes larger than PATH_MAX in the
> case where the 'resolved' buffer is not specified.  This allow assume
> realpath limit is PATH_MAX for all cases.

Is that actually true, in the sense that the full path cannot be
longer than PATH_MAX?

I don't think Linux has such a restriction.  One cannot open such
files directly, but they can exist.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 3/3] linux: Optimize realpath stack usage
  2020-08-10 20:48 ` [PATCH 3/3] linux: Optimize realpath stack usage Adhemerval Zanella
  2020-08-10 21:25   ` Paul Eggert
@ 2020-08-11  9:46   ` Andreas Schwab
  1 sibling, 0 replies; 26+ messages in thread
From: Andreas Schwab @ 2020-08-11  9:46 UTC (permalink / raw)
  To: Adhemerval Zanella via Libc-alpha; +Cc: Adhemerval Zanella, Xiaoming Ni

On Aug 10 2020, Adhemerval Zanella via Libc-alpha wrote:

> This optimizes the stack usage for success case (from ~8K to ~4k) and
> where 'resolved' input buffer is not provided.  For ithe failure case

s/ithe/the/

Andreas.

-- 
Andreas Schwab, schwab@linux-m68k.org
GPG Key fingerprint = 7578 EB47 D4E5 4D69 2510  2552 DF73 E780 A9DA AEC1
"And now for something completely different."

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 2/3] stdlib: Enforce PATH_MAX on allocated realpath buffer
  2020-08-10 20:48 ` [PATCH 2/3] stdlib: Enforce PATH_MAX on allocated realpath buffer Adhemerval Zanella
  2020-08-11  8:26   ` Florian Weimer
@ 2020-08-11  9:48   ` Andreas Schwab
  1 sibling, 0 replies; 26+ messages in thread
From: Andreas Schwab @ 2020-08-11  9:48 UTC (permalink / raw)
  To: Adhemerval Zanella via Libc-alpha; +Cc: Adhemerval Zanella, Xiaoming Ni

On Aug 10 2020, Adhemerval Zanella via Libc-alpha wrote:

> It removes the buffer resize for sizes larger than PATH_MAX in the
> case where the 'resolved' buffer is not specified.  This allow assume
> realpath limit is PATH_MAX for all cases.

"allows to assume" or "assumes".

Andreas.

-- 
Andreas Schwab, schwab@linux-m68k.org
GPG Key fingerprint = 7578 EB47 D4E5 4D69 2510  2552 DF73 E780 A9DA AEC1
"And now for something completely different."

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 2/3] stdlib: Enforce PATH_MAX on allocated realpath buffer
  2020-08-11  8:26   ` Florian Weimer
@ 2020-08-11  9:54     ` Andreas Schwab
  2020-08-11 10:24       ` Florian Weimer
  0 siblings, 1 reply; 26+ messages in thread
From: Andreas Schwab @ 2020-08-11  9:54 UTC (permalink / raw)
  To: Florian Weimer; +Cc: Adhemerval Zanella via Libc-alpha, Xiaoming Ni

On Aug 11 2020, Florian Weimer wrote:

> I don't think Linux has such a restriction.  One cannot open such
> files directly, but they can exist.

You can open them with a relative name as long as cwd is nearer than
PATH_MAX (or use openat with such a directory handle).

Andreas.

-- 
Andreas Schwab, schwab@linux-m68k.org
GPG Key fingerprint = 7578 EB47 D4E5 4D69 2510  2552 DF73 E780 A9DA AEC1
"And now for something completely different."

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 2/3] stdlib: Enforce PATH_MAX on allocated realpath buffer
  2020-08-11  9:54     ` Andreas Schwab
@ 2020-08-11 10:24       ` Florian Weimer
  2020-08-11 15:05         ` Adhemerval Zanella
  0 siblings, 1 reply; 26+ messages in thread
From: Florian Weimer @ 2020-08-11 10:24 UTC (permalink / raw)
  To: Andreas Schwab; +Cc: Adhemerval Zanella via Libc-alpha, Xiaoming Ni

* Andreas Schwab:

> On Aug 11 2020, Florian Weimer wrote:
>
>> I don't think Linux has such a restriction.  One cannot open such
>> files directly, but they can exist.
>
> You can open them with a relative name as long as cwd is nearer than
> PATH_MAX (or use openat with such a directory handle).

Yes, that's what I meant: one cannot use the full path directly.  It
has to be split up in some way.

I still think realpath should report the full path for them, and not
give up with an error.  Similar for getcwd.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 3/3] linux: Optimize realpath stack usage
  2020-08-10 21:25   ` Paul Eggert
@ 2020-08-11 14:14     ` Adhemerval Zanella
  2020-08-11 15:18       ` Adhemerval Zanella
  2020-08-11 15:52       ` Paul Eggert
  2020-08-11 16:46     ` Andreas Schwab
  1 sibling, 2 replies; 26+ messages in thread
From: Adhemerval Zanella @ 2020-08-11 14:14 UTC (permalink / raw)
  To: Paul Eggert, libc-alpha; +Cc: Xiaoming Ni



On 10/08/2020 18:25, Paul Eggert wrote:
> On 8/10/20 1:48 PM, Adhemerval Zanella wrote:
> 
>> Regarding syscalls usage, for a sucessful path without symlinks it
>> trades 2 syscalls (getcwd/lstat) for 5 (openat, readlink, fstat, stat,
>>   and close). 
> 
> Thanks for looking into this.
> 
> There's no need for the new __resolve_path function to call __lstat64. It does that only to determine whether the file is a symlink or a directory, and it can do the former with __readlink (which it's gonna do already) and the latter by going into the next iteration and seeing whether it gets NOTDIR. Avoiding __lstat64 means we don't have to worry about irrelevant EOVERFLOW hassles.

I am not sure we can skip the __lstat64 because we can't check if a subpath
passed on __readlink is a directory or not (so we can move to next iteration
in some cases).

For instance the test 30 from stdlib/test-canon.c which issues:

  realpath ("./doesExist/someFile/", ...)

On the directory with:

  mkdir ("doesExist", 0777)
  creat ("doesExist/someFile", 0777)

By just issuing the readlink on (strace output):

  readlink(".../doesExist", 0x7fff32598060, 4095) = -1 EINVAL

We can't see that is a S_ISDIR to go for next iteration (and then fail on next
iteration since "../doesExist/someFile' is not a directory end *end != '\0').

> 
> In the Linux __realpath_system, the code should be prepared for the /proc syscall to fail because /proc is not mounted. It can fall back on __resolve_path in that case. The code should be prepared for the fstat64 to fail due to EOVERFLOW. And I'm not quite getting why the code is comparing fstat64's result to stat64's result -- perhaps explain this in a comment and how EOVERFLOW is dealt with?

Right, I though about the '/proc' failure and decided to fail based on 
on other implementations (fexecve fallback, fchmodat), but the 
__resolve_path should be ok to use in such case.

For fstat64, afaik EOVERFLOW is returned only when called non-LFS interface
which is not this case. Do you think re really should handle EOVERFLOW ?

The fstat64/stat64 comparison is just a small check to see if the
file was removed between calls.  Thinking twice I am not sure how
effective this really is, maybe remove it?

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 1/3] stdlib: Use fixed buffer size for realpath (BZ #26241)
  2020-08-11  3:00 ` Xiaoming Ni
@ 2020-08-11 14:57   ` Adhemerval Zanella
  2020-08-12  1:38     ` Xiaoming Ni
  0 siblings, 1 reply; 26+ messages in thread
From: Adhemerval Zanella @ 2020-08-11 14:57 UTC (permalink / raw)
  To: Xiaoming Ni, libc-alpha



On 11/08/2020 00:00, Xiaoming Ni wrote:
> On 2020/8/11 4:48, Adhemerval Zanella wrote:
>> It uses both a fixed internal buffer with PATH_MAX size to read and
>> copy the results of the readlink call.
>>
>> Also, if PATH_MAX is not defined it uses a default value of 1024
>> as for other stdlib implementations.
>>
>> The expected stack usage is about 8k on Linux where PATH_MAX is
>> define as 4096 (plus some internal function usage for local
>> variable).
>>
>> Checked on x86_64-linux-gnu and i686-linux-gnu.
>> ---
>>   stdlib/Makefile                               |   3 +-
>>   stdlib/canonicalize.c                         |  38 +++---
>>   stdlib/tst-canon-bz26341.c                    | 108 ++++++++++++++++++
>>   support/support_set_small_thread_stack_size.c |  12 +-
>>   support/xthread.h                             |   2 +
>>   5 files changed, 138 insertions(+), 25 deletions(-)
>>   create mode 100644 stdlib/tst-canon-bz26341.c
>>
>> diff --git a/stdlib/Makefile b/stdlib/Makefile
>> index 4615f6dfe7..7093b8a584 100644
>> --- a/stdlib/Makefile
>> +++ b/stdlib/Makefile
>> @@ -87,7 +87,7 @@ tests        := tst-strtol tst-strtod testmb testrand testsort testdiv   \
>>              tst-makecontext-align test-bz22786 tst-strtod-nan-sign \
>>              tst-swapcontext1 tst-setcontext4 tst-setcontext5 \
>>              tst-setcontext6 tst-setcontext7 tst-setcontext8 \
>> -           tst-setcontext9 tst-bz20544
>> +           tst-setcontext9 tst-bz20544 tst-canon-bz26341
>>     tests-internal    := tst-strtod1i tst-strtod3 tst-strtod4 tst-strtod5i \
>>              tst-tls-atexit tst-tls-atexit-nodelete
>> @@ -102,6 +102,7 @@ LDLIBS-test-atexit-race = $(shared-thread-library)
>>   LDLIBS-test-at_quick_exit-race = $(shared-thread-library)
>>   LDLIBS-test-cxa_atexit-race = $(shared-thread-library)
>>   LDLIBS-test-on_exit-race = $(shared-thread-library)
>> +LDLIBS-tst-canon-bz26341 = $(shared-thread-library)
>>     LDLIBS-test-dlclose-exit-race = $(shared-thread-library) $(libdl)
>>   LDFLAGS-test-dlclose-exit-race = $(LDFLAGS-rdynamic)
>> diff --git a/stdlib/canonicalize.c b/stdlib/canonicalize.c
>> index cbd885a3c5..554ba221e4 100644
>> --- a/stdlib/canonicalize.c
>> +++ b/stdlib/canonicalize.c
>> @@ -28,6 +28,14 @@
>>   #include <eloop-threshold.h>
>>   #include <shlib-compat.h>
>>   +#ifndef PATH_MAX
>> +# ifdef MAXPATHLEN
>> +#  define PATH_MAX MAXPATHLEN
>> +# else
>> +#  define PATH_MAX 1024
>> +# endif
>> +#endif
>> +
>>   /* Return the canonical absolute name of file NAME.  A canonical name
>>      does not contain any `.', `..' components nor any repeated path
>>      separators ('/') or symlinks.  All path components must exist.  If
>> @@ -42,9 +50,8 @@
>>   char *
>>   __realpath (const char *name, char *resolved)
>>   {
>> -  char *rpath, *dest, *extra_buf = NULL;
>> +  char *rpath, *dest, extra_buf[PATH_MAX];
> Why does the 4 KB stack space need to be occupied?  Even if there are no linked files ?

It does not, it is a simplification to avoid to decompose the function
and handle symlinks in a special case.  To avoid the stack allocation
for common case would need to either to use dynamic allocation or
adjust the function that once it founds a symlink, it calls another
function to handle the loop with a stack allocated provided buffer.  
I don't think this extra code complexity really pays off. 

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 2/3] stdlib: Enforce PATH_MAX on allocated realpath buffer
  2020-08-11 10:24       ` Florian Weimer
@ 2020-08-11 15:05         ` Adhemerval Zanella
  2020-08-11 15:37           ` Paul Eggert
  2020-08-11 18:29           ` Florian Weimer
  0 siblings, 2 replies; 26+ messages in thread
From: Adhemerval Zanella @ 2020-08-11 15:05 UTC (permalink / raw)
  To: libc-alpha



On 11/08/2020 07:24, Florian Weimer wrote:
> * Andreas Schwab:
> 
>> On Aug 11 2020, Florian Weimer wrote:
>>
>>> I don't think Linux has such a restriction.  One cannot open such
>>> files directly, but they can exist.
>>
>> You can open them with a relative name as long as cwd is nearer than
>> PATH_MAX (or use openat with such a directory handle).
> 
> Yes, that's what I meant: one cannot use the full path directly.  It
> has to be split up in some way.
> 
> I still think realpath should report the full path for them, and not
> give up with an error.  Similar for getcwd.
> 

The only issue I have with this approach is realpath has different semantic
regarding maximum pathname returned whether you pass a 'resolved' buffer
(which assume PATH_MAX).

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 3/3] linux: Optimize realpath stack usage
  2020-08-11 14:14     ` Adhemerval Zanella
@ 2020-08-11 15:18       ` Adhemerval Zanella
  2020-08-11 15:52       ` Paul Eggert
  1 sibling, 0 replies; 26+ messages in thread
From: Adhemerval Zanella @ 2020-08-11 15:18 UTC (permalink / raw)
  To: Paul Eggert, libc-alpha; +Cc: Xiaoming Ni



On 11/08/2020 11:14, Adhemerval Zanella wrote:
> 
> 
> On 10/08/2020 18:25, Paul Eggert wrote:
>> On 8/10/20 1:48 PM, Adhemerval Zanella wrote:
>>
>>> Regarding syscalls usage, for a sucessful path without symlinks it
>>> trades 2 syscalls (getcwd/lstat) for 5 (openat, readlink, fstat, stat,
>>>   and close). 
>>
>> Thanks for looking into this.
>>
>> There's no need for the new __resolve_path function to call __lstat64. It does that only to determine whether the file is a symlink or a directory, and it can do the former with __readlink (which it's gonna do already) and the latter by going into the next iteration and seeing whether it gets NOTDIR. Avoiding __lstat64 means we don't have to worry about irrelevant EOVERFLOW hassles.
> 
> I am not sure we can skip the __lstat64 because we can't check if a subpath
> passed on __readlink is a directory or not (so we can move to next iteration
> in some cases).
> 
> For instance the test 30 from stdlib/test-canon.c which issues:
> 
>   realpath ("./doesExist/someFile/", ...)

And I just checked your patch on BZ#24970 and it shows the issues:

$ grep ^FAIL stdlib/subdir-tests.sum 
FAIL: stdlib/test-canon
$ cat stdlib/test-canon.out 
/home/azanella/Projects/glibc/build/x86_64-linux-gnu/stdlib/test-canon: flunked test 30 (expected `NULL', got `/home/azanella/Projects/glibc/build/x86_64-linux-gnu/stdlib/doesExist/someFile')
/home/azanella/Projects/glibc/build/x86_64-linux-gnu/stdlib/test-canon: flunked test 31 (expected `NULL', got `/home/azanella/Projects/glibc/build/x86_64-linux-gnu/stdlib/doesExist')
2 errors.

I couldn't figure out a way to have the expected realpath semantic by
just using readlink.




^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 2/3] stdlib: Enforce PATH_MAX on allocated realpath buffer
  2020-08-11 15:05         ` Adhemerval Zanella
@ 2020-08-11 15:37           ` Paul Eggert
  2020-08-11 18:29           ` Florian Weimer
  1 sibling, 0 replies; 26+ messages in thread
From: Paul Eggert @ 2020-08-11 15:37 UTC (permalink / raw)
  To: libc-alpha

On 8/11/20 8:05 AM, Adhemerval Zanella via Libc-alpha wrote:
> The only issue I have with this approach is realpath has different semantic
> regarding maximum pathname returned whether you pass a 'resolved' buffer
> (which assume PATH_MAX).

If realpath can successfully return a file name longer than PATH_MAX now, that 
suggests that it should continue to do so.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 3/3] linux: Optimize realpath stack usage
  2020-08-11 14:14     ` Adhemerval Zanella
  2020-08-11 15:18       ` Adhemerval Zanella
@ 2020-08-11 15:52       ` Paul Eggert
  2020-08-11 19:01         ` Adhemerval Zanella
  1 sibling, 1 reply; 26+ messages in thread
From: Paul Eggert @ 2020-08-11 15:52 UTC (permalink / raw)
  To: Adhemerval Zanella, libc-alpha; +Cc: Xiaoming Ni

On 8/11/20 7:14 AM, Adhemerval Zanella wrote:

> For instance the test 30 from stdlib/test-canon.c which issues:
> 
>    realpath ("./doesExist/someFile/", ...)
> 
> On the directory with:
> 
>    mkdir ("doesExist", 0777)
>    creat ("doesExist/someFile", 0777)
> 
> By just issuing the readlink on (strace output):
> 
>    readlink(".../doesExist", 0x7fff32598060, 4095) = -1 EINVAL
> 
> We can't see that is a S_ISDIR to go for next iteration (and then fail on next
> iteration since "../doesExist/someFile' is not a directory end *end != '\0').

We don't need to test whether "doesExist" is a directory. The EINVAL tells us 
that it exists and is not a symlink. Therefore it is either a directory or a 
non-(symlink-or-directory). To discover whether it is a directory or a 
non-(symlink-or-directory), we go ahead with the next iteration:

     readlink ("doesExist/someFile", ...)

If this fails with ENOTDIR, then "doesExist" was a non-(symlink-or-directory); 
if it succeeds (or fails with EINVAL) doesExist was a directory; otherwise can 
simply fail with whatever errno it fails with.

> For fstat64, afaik EOVERFLOW is returned only when called non-LFS interface
> which is not this case. Do you think re really should handle EOVERFLOW ?

Ah, I hadn't thought of that. If EOVERFLOW is indeed impossible with fstat64 
then we needn't worry about EOVERFLOW.

> The fstat64/stat64 comparison is just a small check to see if the
> file was removed between calls.  Thinking twice I am not sure how
> effective this really is, maybe remove it?

Yes, let's remove that. realpath cannot possibly be atomic when it is issuing 
multiple syscalls, so there's no point to it trying to be "atomic" here.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 3/3] linux: Optimize realpath stack usage
  2020-08-10 21:25   ` Paul Eggert
  2020-08-11 14:14     ` Adhemerval Zanella
@ 2020-08-11 16:46     ` Andreas Schwab
  2020-08-17 14:00       ` Dmitry V. Levin
  1 sibling, 1 reply; 26+ messages in thread
From: Andreas Schwab @ 2020-08-11 16:46 UTC (permalink / raw)
  To: Paul Eggert; +Cc: Adhemerval Zanella, libc-alpha, Xiaoming Ni

On Aug 10 2020, Paul Eggert wrote:

> In the Linux __realpath_system, the code should be prepared for the /proc
> syscall to fail because /proc is not mounted.

We already depend very much on /proc to be mounted, so this can be
ignored.

Andreas.

-- 
Andreas Schwab, schwab@linux-m68k.org
GPG Key fingerprint = 7578 EB47 D4E5 4D69 2510  2552 DF73 E780 A9DA AEC1
"And now for something completely different."

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 2/3] stdlib: Enforce PATH_MAX on allocated realpath buffer
  2020-08-11 15:05         ` Adhemerval Zanella
  2020-08-11 15:37           ` Paul Eggert
@ 2020-08-11 18:29           ` Florian Weimer
  1 sibling, 0 replies; 26+ messages in thread
From: Florian Weimer @ 2020-08-11 18:29 UTC (permalink / raw)
  To: Adhemerval Zanella via Libc-alpha

* Adhemerval Zanella via Libc-alpha:

> On 11/08/2020 07:24, Florian Weimer wrote:
>> * Andreas Schwab:
>> 
>>> On Aug 11 2020, Florian Weimer wrote:
>>>
>>>> I don't think Linux has such a restriction.  One cannot open such
>>>> files directly, but they can exist.
>>>
>>> You can open them with a relative name as long as cwd is nearer than
>>> PATH_MAX (or use openat with such a directory handle).
>> 
>> Yes, that's what I meant: one cannot use the full path directly.  It
>> has to be split up in some way.
>> 
>> I still think realpath should report the full path for them, and not
>> give up with an error.  Similar for getcwd.
>> 
>
> The only issue I have with this approach is realpath has different semantic
> regarding maximum pathname returned whether you pass a 'resolved' buffer
> (which assume PATH_MAX).

This is just our attempt to avoid the inherent buffer overflows in
this interface.  We had to do something similar for readdir_r.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 3/3] linux: Optimize realpath stack usage
  2020-08-11 15:52       ` Paul Eggert
@ 2020-08-11 19:01         ` Adhemerval Zanella
  0 siblings, 0 replies; 26+ messages in thread
From: Adhemerval Zanella @ 2020-08-11 19:01 UTC (permalink / raw)
  To: Paul Eggert, libc-alpha; +Cc: Xiaoming Ni



On 11/08/2020 12:52, Paul Eggert wrote:
> On 8/11/20 7:14 AM, Adhemerval Zanella wrote:
> 
>> For instance the test 30 from stdlib/test-canon.c which issues:
>>
>>    realpath ("./doesExist/someFile/", ...)
>>
>> On the directory with:
>>
>>    mkdir ("doesExist", 0777)
>>    creat ("doesExist/someFile", 0777)
>>
>> By just issuing the readlink on (strace output):
>>
>>    readlink(".../doesExist", 0x7fff32598060, 4095) = -1 EINVAL
>>
>> We can't see that is a S_ISDIR to go for next iteration (and then fail on next
>> iteration since "../doesExist/someFile' is not a directory end *end != '\0').
> 
> We don't need to test whether "doesExist" is a directory. The EINVAL tells us that it exists and is not a symlink. Therefore it is either a directory or a non-(symlink-or-directory). To discover whether it is a directory or a non-(symlink-or-directory), we go ahead with the next iteration:
> 
>     readlink ("doesExist/someFile", ...)
> 
> If this fails with ENOTDIR, then "doesExist" was a non-(symlink-or-directory); if it succeeds (or fails with EINVAL) doesExist was a directory; otherwise can simply fail with whatever errno it fails with.

The issue seems that readlink does not fail with ENOTDIR if 'doesExist/someFile'
is indeed a file:

  readlink("/home/azanella/Projects/glibc/build/x86_64-linux-gnu/stdlib/doesExist/someFile", 0x7ffe2d3efb70, 4096) = -1 EINVAL (Invalid argument)

We need to check with a final '/':

  readlink("/home/azanella/Projects/glibc/build/x86_64-linux-gnu/stdlib/doesExist/someFile/", 0x7ffee1893aa0, 4096) = -1 ENOTDIR (Not a directory)

This is turn requires to change the loop to make this extra check with
readlink with expected path (by appending the '/' when required). There
is another issue which we also need to handle "./doesExist/someFile/..".

But I have spent too much time for an small optimization not directly
related to the Linux optimization.  If you could make either current
algorithm or the one on stdlib/canonicalize-internal.c no call lstat
I can add on series (I don't have plan to spend more time on this
readlink optimization).

> 
>> For fstat64, afaik EOVERFLOW is returned only when called non-LFS interface
>> which is not this case. Do you think re really should handle EOVERFLOW ?
> 
> Ah, I hadn't thought of that. If EOVERFLOW is indeed impossible with fstat64 then we needn't worry about EOVERFLOW.
> 
>> The fstat64/stat64 comparison is just a small check to see if the
>> file was removed between calls.  Thinking twice I am not sure how
>> effective this really is, maybe remove it?
> 
> Yes, let's remove that. realpath cannot possibly be atomic when it is issuing multiple syscalls, so there's no point to it trying to be "atomic" here.

Ack.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 1/3] stdlib: Use fixed buffer size for realpath (BZ #26241)
  2020-08-11 14:57   ` Adhemerval Zanella
@ 2020-08-12  1:38     ` Xiaoming Ni
  2020-08-12 23:04       ` Adhemerval Zanella
  0 siblings, 1 reply; 26+ messages in thread
From: Xiaoming Ni @ 2020-08-12  1:38 UTC (permalink / raw)
  To: Adhemerval Zanella, libc-alpha

On 2020/8/11 22:57, Adhemerval Zanella wrote:
> 
> 
> On 11/08/2020 00:00, Xiaoming Ni wrote:
>> On 2020/8/11 4:48, Adhemerval Zanella wrote:
>>> It uses both a fixed internal buffer with PATH_MAX size to read and
>>> copy the results of the readlink call.
>>>
>>> Also, if PATH_MAX is not defined it uses a default value of 1024
>>> as for other stdlib implementations.
>>>
>>> The expected stack usage is about 8k on Linux where PATH_MAX is
>>> define as 4096 (plus some internal function usage for local
>>> variable).
>>>
>>> Checked on x86_64-linux-gnu and i686-linux-gnu.
>>> ---
>>>    stdlib/Makefile                               |   3 +-
>>>    stdlib/canonicalize.c                         |  38 +++---
>>>    stdlib/tst-canon-bz26341.c                    | 108 ++++++++++++++++++
>>>    support/support_set_small_thread_stack_size.c |  12 +-
>>>    support/xthread.h                             |   2 +
>>>    5 files changed, 138 insertions(+), 25 deletions(-)
>>>    create mode 100644 stdlib/tst-canon-bz26341.c
>>>
>>> diff --git a/stdlib/Makefile b/stdlib/Makefile
>>> index 4615f6dfe7..7093b8a584 100644
>>> --- a/stdlib/Makefile
>>> +++ b/stdlib/Makefile
>>> @@ -87,7 +87,7 @@ tests        := tst-strtol tst-strtod testmb testrand testsort testdiv   \
>>>               tst-makecontext-align test-bz22786 tst-strtod-nan-sign \
>>>               tst-swapcontext1 tst-setcontext4 tst-setcontext5 \
>>>               tst-setcontext6 tst-setcontext7 tst-setcontext8 \
>>> -           tst-setcontext9 tst-bz20544
>>> +           tst-setcontext9 tst-bz20544 tst-canon-bz26341
>>>      tests-internal    := tst-strtod1i tst-strtod3 tst-strtod4 tst-strtod5i \
>>>               tst-tls-atexit tst-tls-atexit-nodelete
>>> @@ -102,6 +102,7 @@ LDLIBS-test-atexit-race = $(shared-thread-library)
>>>    LDLIBS-test-at_quick_exit-race = $(shared-thread-library)
>>>    LDLIBS-test-cxa_atexit-race = $(shared-thread-library)
>>>    LDLIBS-test-on_exit-race = $(shared-thread-library)
>>> +LDLIBS-tst-canon-bz26341 = $(shared-thread-library)
>>>      LDLIBS-test-dlclose-exit-race = $(shared-thread-library) $(libdl)
>>>    LDFLAGS-test-dlclose-exit-race = $(LDFLAGS-rdynamic)
>>> diff --git a/stdlib/canonicalize.c b/stdlib/canonicalize.c
>>> index cbd885a3c5..554ba221e4 100644
>>> --- a/stdlib/canonicalize.c
>>> +++ b/stdlib/canonicalize.c
>>> @@ -28,6 +28,14 @@
>>>    #include <eloop-threshold.h>
>>>    #include <shlib-compat.h>
>>>    +#ifndef PATH_MAX
>>> +# ifdef MAXPATHLEN
>>> +#  define PATH_MAX MAXPATHLEN
>>> +# else
>>> +#  define PATH_MAX 1024
>>> +# endif
>>> +#endif
>>> +
>>>    /* Return the canonical absolute name of file NAME.  A canonical name
>>>       does not contain any `.', `..' components nor any repeated path
>>>       separators ('/') or symlinks.  All path components must exist.  If
>>> @@ -42,9 +50,8 @@
>>>    char *
>>>    __realpath (const char *name, char *resolved)
>>>    {
>>> -  char *rpath, *dest, *extra_buf = NULL;
>>> +  char *rpath, *dest, extra_buf[PATH_MAX];
>> Why does the 4 KB stack space need to be occupied?  Even if there are no linked files ?
> 
> It does not, it is a simplification to avoid to decompose the function
> and handle symlinks in a special case.  To avoid the stack allocation
> for common case would need to either to use dynamic allocation or
> adjust the function that once it founds a symlink, it calls another
> function to handle the loop with a stack allocated provided buffer.
> I don't think this extra code complexity really pays off.


Extract the symlinks processing as an independent function and move 
extra_buf and buf to the new independent function to avoid wasting 8 KB 
stack space when the realpath is used to process unlinked files.
Is this better?

Thanks
Xiaoming Ni


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 1/3] stdlib: Use fixed buffer size for realpath (BZ #26241)
  2020-08-12  1:38     ` Xiaoming Ni
@ 2020-08-12 23:04       ` Adhemerval Zanella
  2020-08-13 20:29         ` Adhemerval Zanella
  0 siblings, 1 reply; 26+ messages in thread
From: Adhemerval Zanella @ 2020-08-12 23:04 UTC (permalink / raw)
  To: Xiaoming Ni, libc-alpha



On 11/08/2020 22:38, Xiaoming Ni wrote:
> On 2020/8/11 22:57, Adhemerval Zanella wrote:
>>
>>
>> On 11/08/2020 00:00, Xiaoming Ni wrote:
>>> On 2020/8/11 4:48, Adhemerval Zanella wrote:
>>>> It uses both a fixed internal buffer with PATH_MAX size to read and
>>>> copy the results of the readlink call.
>>>>
>>>> Also, if PATH_MAX is not defined it uses a default value of 1024
>>>> as for other stdlib implementations.
>>>>
>>>> The expected stack usage is about 8k on Linux where PATH_MAX is
>>>> define as 4096 (plus some internal function usage for local
>>>> variable).
>>>>
>>>> Checked on x86_64-linux-gnu and i686-linux-gnu.
>>>> ---
>>>>    stdlib/Makefile                               |   3 +-
>>>>    stdlib/canonicalize.c                         |  38 +++---
>>>>    stdlib/tst-canon-bz26341.c                    | 108 ++++++++++++++++++
>>>>    support/support_set_small_thread_stack_size.c |  12 +-
>>>>    support/xthread.h                             |   2 +
>>>>    5 files changed, 138 insertions(+), 25 deletions(-)
>>>>    create mode 100644 stdlib/tst-canon-bz26341.c
>>>>
>>>> diff --git a/stdlib/Makefile b/stdlib/Makefile
>>>> index 4615f6dfe7..7093b8a584 100644
>>>> --- a/stdlib/Makefile
>>>> +++ b/stdlib/Makefile
>>>> @@ -87,7 +87,7 @@ tests        := tst-strtol tst-strtod testmb testrand testsort testdiv   \
>>>>               tst-makecontext-align test-bz22786 tst-strtod-nan-sign \
>>>>               tst-swapcontext1 tst-setcontext4 tst-setcontext5 \
>>>>               tst-setcontext6 tst-setcontext7 tst-setcontext8 \
>>>> -           tst-setcontext9 tst-bz20544
>>>> +           tst-setcontext9 tst-bz20544 tst-canon-bz26341
>>>>      tests-internal    := tst-strtod1i tst-strtod3 tst-strtod4 tst-strtod5i \
>>>>               tst-tls-atexit tst-tls-atexit-nodelete
>>>> @@ -102,6 +102,7 @@ LDLIBS-test-atexit-race = $(shared-thread-library)
>>>>    LDLIBS-test-at_quick_exit-race = $(shared-thread-library)
>>>>    LDLIBS-test-cxa_atexit-race = $(shared-thread-library)
>>>>    LDLIBS-test-on_exit-race = $(shared-thread-library)
>>>> +LDLIBS-tst-canon-bz26341 = $(shared-thread-library)
>>>>      LDLIBS-test-dlclose-exit-race = $(shared-thread-library) $(libdl)
>>>>    LDFLAGS-test-dlclose-exit-race = $(LDFLAGS-rdynamic)
>>>> diff --git a/stdlib/canonicalize.c b/stdlib/canonicalize.c
>>>> index cbd885a3c5..554ba221e4 100644
>>>> --- a/stdlib/canonicalize.c
>>>> +++ b/stdlib/canonicalize.c
>>>> @@ -28,6 +28,14 @@
>>>>    #include <eloop-threshold.h>
>>>>    #include <shlib-compat.h>
>>>>    +#ifndef PATH_MAX
>>>> +# ifdef MAXPATHLEN
>>>> +#  define PATH_MAX MAXPATHLEN
>>>> +# else
>>>> +#  define PATH_MAX 1024
>>>> +# endif
>>>> +#endif
>>>> +
>>>>    /* Return the canonical absolute name of file NAME.  A canonical name
>>>>       does not contain any `.', `..' components nor any repeated path
>>>>       separators ('/') or symlinks.  All path components must exist.  If
>>>> @@ -42,9 +50,8 @@
>>>>    char *
>>>>    __realpath (const char *name, char *resolved)
>>>>    {
>>>> -  char *rpath, *dest, *extra_buf = NULL;
>>>> +  char *rpath, *dest, extra_buf[PATH_MAX];
>>> Why does the 4 KB stack space need to be occupied?  Even if there are no linked files ?
>>
>> It does not, it is a simplification to avoid to decompose the function
>> and handle symlinks in a special case.  To avoid the stack allocation
>> for common case would need to either to use dynamic allocation or
>> adjust the function that once it founds a symlink, it calls another
>> function to handle the loop with a stack allocated provided buffer.
>> I don't think this extra code complexity really pays off.
> 
> 
> Extract the symlinks processing as an independent function and move extra_buf and buf to the new independent function to avoid wasting 8 KB stack space when the realpath is used to process unlinked files.
> Is this better?

Yes, my only reservation is the complexity and possible code duplication
to handle it.  I can't see no easy way to accomplish it without duplicate
the loop code (minus the 'extra_buf' alloca) and make the default loop
calling it with the stack allocated extra_buf (and I would like to avoid
this approach).

Another possibility which I think it would be better it to use a scratch
buffer and make some compromise with stack usage and heap allocation.
The default 1024 bytes of the scratch buffer should hit mostly of the
common calls (it is 1/4 of PATH_MAX), so malloc would be called only for
large paths (which should be uncommon).  We can also use a scratch buffer
for the readlink as well, since we might infer the required size from
the previous lstat call.

With something like below we can make the realpath uses a stack of about
~1024 and ~2048 if the path contains symbolic link:

---

diff --git a/stdlib/canonicalize.c b/stdlib/canonicalize.c
index cbd885a3c5..dca160f523 100644
--- a/stdlib/canonicalize.c
+++ b/stdlib/canonicalize.c
@@ -25,9 +25,56 @@
 #include <errno.h>
 #include <stddef.h>
 
+#include <scratch_buffer.h>
 #include <eloop-threshold.h>
 #include <shlib-compat.h>
 
+#ifndef PATH_MAX
+# ifdef MAXPATHLEN
+#  define PATH_MAX MAXPATHLEN
+# else
+#  define PATH_MAX 1024
+# endif
+#endif
+
+static bool
+realpath_readlink (const char *rpath, const char *end, size_t path_max,
+		   size_t st_size, struct scratch_buffer *extra_buf)
+{
+  bool r = false;
+
+  struct scratch_buffer buf;
+  scratch_buffer_init (&buf);
+  /* Add the terminating null byte.  */
+  if (!scratch_buffer_set_array_size (&buf, st_size + 1,  sizeof (char)))
+    return false;
+
+  ssize_t n = __readlink (rpath, buf.data, buf.length - 1);
+  if (n < 0)
+    goto out;
+  ((char *) buf.data)[n] = '\0';
+
+  size_t len = strlen (end);
+  if (path_max - n <= len)
+    {
+      __set_errno (ENAMETOOLONG);
+      goto out;
+    }
+
+  if (!scratch_buffer_set_array_size (extra_buf, n + len + 1, sizeof (char)))
+    goto out;
+
+  /* Careful here, end may be a pointer into extra_buf... */
+  memmove ((char *) extra_buf->data + n, end, len + 1);
+  memcpy (extra_buf->data, buf.data, n);
+
+  r = true;
+
+out:
+  scratch_buffer_free (&buf);
+  return r;
+}
+
 /* Return the canonical absolute name of file NAME.  A canonical name
    does not contain any `.', `..' components nor any repeated path
    separators ('/') or symlinks.  All path components must exist.  If
@@ -42,10 +89,13 @@
 char *
 __realpath (const char *name, char *resolved)
 {
-  char *rpath, *dest, *extra_buf = NULL;
+  char *rpath, *dest;
   const char *start, *end, *rpath_limit;
-  long int path_max;
+  const size_t path_max = PATH_MAX;
   int num_links = 0;
+  struct scratch_buffer extra_buf;
+
+  scratch_buffer_init (&extra_buf);
 
   if (name == NULL)
     {
@@ -65,14 +115,6 @@ __realpath (const char *name, char *resolved)
       return NULL;
     }
 
-#ifdef PATH_MAX
-  path_max = PATH_MAX;
-#else
-  path_max = __pathconf (name, _PC_PATH_MAX);
-  if (path_max <= 0)
-    path_max = 1024;
-#endif
-
   if (resolved == NULL)
     {
       rpath = malloc (path_max);
@@ -101,7 +143,6 @@ __realpath (const char *name, char *resolved)
   for (start = end = name; *start; start = end)
     {
       struct stat64 st;
-      int n;
 
       /* Skip sequence of multiple path-separators.  */
       while (*start == '/')
@@ -163,35 +204,19 @@ __realpath (const char *name, char *resolved)
 
 	  if (S_ISLNK (st.st_mode))
 	    {
-	      char *buf = __alloca (path_max);
-	      size_t len;
-
 	      if (++num_links > __eloop_threshold ())
 		{
 		  __set_errno (ELOOP);
 		  goto error;
 		}
 
-	      n = __readlink (rpath, buf, path_max - 1);
-	      if (n < 0)
+	      if (! realpath_readlink (rpath, end, path_max, st.st_size,
+				       &extra_buf))
 		goto error;
-	      buf[n] = '\0';
-
-	      if (!extra_buf)
-		extra_buf = __alloca (path_max);
 
-	      len = strlen (end);
-	      if (path_max - n <= len)
-		{
-		  __set_errno (ENAMETOOLONG);
-		  goto error;
-		}
-
-	      /* Careful here, end may be a pointer into extra_buf... */
-	      memmove (&extra_buf[n], end, len + 1);
-	      name = end = memcpy (extra_buf, buf, n);
+	      name = end = extra_buf.data;
 
-	      if (buf[0] == '/')
+	      if (((char *)extra_buf.data)[0] == '/')
 		dest = rpath + 1;	/* It's an absolute symlink */
 	      else
 		/* Back up to previous component, ignore if at root already: */
@@ -209,6 +234,8 @@ __realpath (const char *name, char *resolved)
     --dest;
   *dest = '\0';
 
+  scratch_buffer_free (&extra_buf);
+
   assert (resolved == NULL || resolved == rpath);
   return rpath;
 
@@ -216,6 +243,7 @@ error:
   assert (resolved == NULL || resolved == rpath);
   if (resolved == NULL)
     free (rpath);
+  scratch_buffer_free (&extra_buf);
   return NULL;
 }
 libc_hidden_def (__realpath)

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 1/3] stdlib: Use fixed buffer size for realpath (BZ #26241)
  2020-08-12 23:04       ` Adhemerval Zanella
@ 2020-08-13 20:29         ` Adhemerval Zanella
  0 siblings, 0 replies; 26+ messages in thread
From: Adhemerval Zanella @ 2020-08-13 20:29 UTC (permalink / raw)
  To: Xiaoming Ni, libc-alpha



On 12/08/2020 20:04, Adhemerval Zanella wrote:
> 
> 
> On 11/08/2020 22:38, Xiaoming Ni wrote:

> With something like below we can make the realpath uses a stack of about
> ~1024 and ~2048 if the path contains symbolic link:
> 


> @@ -163,35 +204,19 @@ __realpath (const char *name, char *resolved)
>  
>  	  if (S_ISLNK (st.st_mode))
>  	    {
> -	      char *buf = __alloca (path_max);
> -	      size_t len;
> -
>  	      if (++num_links > __eloop_threshold ())
>  		{
>  		  __set_errno (ELOOP);
>  		  goto error;
>  		}
>  
> -	      n = __readlink (rpath, buf, path_max - 1);
> -	      if (n < 0)
> +	      if (! realpath_readlink (rpath, end, path_max, st.st_size,
> +				       &extra_buf))

Scratch that, unfortunately some Linux filesystems do not return the symlink
target file size with a lstat call (procfs and sysfs for instance).  So
we need to either issue multiple readlink with different increasing buffers
or assume PATH_MAX (as current algorithms does).

I will send a v3 of my patch to use a scratch buffer.  What I am not sure is
if the Linux optimization is worth if the idea is use a small stack as possible
for common cases since it trades some syscall by a higher stack usage.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 3/3] linux: Optimize realpath stack usage
  2020-08-11 16:46     ` Andreas Schwab
@ 2020-08-17 14:00       ` Dmitry V. Levin
  2020-08-17 15:13         ` Andreas Schwab
  0 siblings, 1 reply; 26+ messages in thread
From: Dmitry V. Levin @ 2020-08-17 14:00 UTC (permalink / raw)
  To: Andreas Schwab; +Cc: Paul Eggert, Xiaoming Ni, libc-alpha

On Tue, Aug 11, 2020 at 06:46:24PM +0200, Andreas Schwab wrote:
> On Aug 10 2020, Paul Eggert wrote:
> 
> > In the Linux __realpath_system, the code should be prepared for the /proc
> > syscall to fail because /proc is not mounted.
> 
> We already depend very much on /proc to be mounted, so this can be
> ignored.

Do we?


-- 
ldv

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 3/3] linux: Optimize realpath stack usage
  2020-08-17 14:00       ` Dmitry V. Levin
@ 2020-08-17 15:13         ` Andreas Schwab
  2020-08-17 16:17           ` Dmitry V. Levin
  0 siblings, 1 reply; 26+ messages in thread
From: Andreas Schwab @ 2020-08-17 15:13 UTC (permalink / raw)
  To: Dmitry V. Levin; +Cc: Paul Eggert, Xiaoming Ni, libc-alpha

On Aug 17 2020, Dmitry V. Levin wrote:

> On Tue, Aug 11, 2020 at 06:46:24PM +0200, Andreas Schwab wrote:
>> On Aug 10 2020, Paul Eggert wrote:
>> 
>> > In the Linux __realpath_system, the code should be prepared for the /proc
>> > syscall to fail because /proc is not mounted.
>> 
>> We already depend very much on /proc to be mounted, so this can be
>> ignored.
>
> Do we?

Yes, see <87blk9ry5d.fsf@oldenburg2.str.redhat.com> for example.

Andreas.

-- 
Andreas Schwab, schwab@linux-m68k.org
GPG Key fingerprint = 7578 EB47 D4E5 4D69 2510  2552 DF73 E780 A9DA AEC1
"And now for something completely different."

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 3/3] linux: Optimize realpath stack usage
  2020-08-17 15:13         ` Andreas Schwab
@ 2020-08-17 16:17           ` Dmitry V. Levin
  0 siblings, 0 replies; 26+ messages in thread
From: Dmitry V. Levin @ 2020-08-17 16:17 UTC (permalink / raw)
  To: Andreas Schwab; +Cc: Paul Eggert, Xiaoming Ni, libc-alpha

On Mon, Aug 17, 2020 at 05:13:44PM +0200, Andreas Schwab wrote:
> On Aug 17 2020, Dmitry V. Levin wrote:
> > On Tue, Aug 11, 2020 at 06:46:24PM +0200, Andreas Schwab wrote:
> >> On Aug 10 2020, Paul Eggert wrote:
> >> 
> >> > In the Linux __realpath_system, the code should be prepared for the /proc
> >> > syscall to fail because /proc is not mounted.
> >> 
> >> We already depend very much on /proc to be mounted, so this can be
> >> ignored.
> >
> > Do we?
> 
> Yes, see <87blk9ry5d.fsf@oldenburg2.str.redhat.com> for example.

That's a regression, I filed a bug at
https://sourceware.org/bugzilla/show_bug.cgi?id=26401

Thanks,


-- 
ldv

^ permalink raw reply	[flat|nested] 26+ messages in thread

end of thread, other threads:[~2020-08-17 16:17 UTC | newest]

Thread overview: 26+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-08-10 20:48 [PATCH 1/3] stdlib: Use fixed buffer size for realpath (BZ #26241) Adhemerval Zanella
2020-08-10 20:48 ` [PATCH 2/3] stdlib: Enforce PATH_MAX on allocated realpath buffer Adhemerval Zanella
2020-08-11  8:26   ` Florian Weimer
2020-08-11  9:54     ` Andreas Schwab
2020-08-11 10:24       ` Florian Weimer
2020-08-11 15:05         ` Adhemerval Zanella
2020-08-11 15:37           ` Paul Eggert
2020-08-11 18:29           ` Florian Weimer
2020-08-11  9:48   ` Andreas Schwab
2020-08-10 20:48 ` [PATCH 3/3] linux: Optimize realpath stack usage Adhemerval Zanella
2020-08-10 21:25   ` Paul Eggert
2020-08-11 14:14     ` Adhemerval Zanella
2020-08-11 15:18       ` Adhemerval Zanella
2020-08-11 15:52       ` Paul Eggert
2020-08-11 19:01         ` Adhemerval Zanella
2020-08-11 16:46     ` Andreas Schwab
2020-08-17 14:00       ` Dmitry V. Levin
2020-08-17 15:13         ` Andreas Schwab
2020-08-17 16:17           ` Dmitry V. Levin
2020-08-11  9:46   ` Andreas Schwab
2020-08-11  0:32 ` [PATCH 1/3] stdlib: Use fixed buffer size for realpath (BZ #26241) Matt Turner
2020-08-11  3:00 ` Xiaoming Ni
2020-08-11 14:57   ` Adhemerval Zanella
2020-08-12  1:38     ` Xiaoming Ni
2020-08-12 23:04       ` Adhemerval Zanella
2020-08-13 20:29         ` Adhemerval Zanella

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).