public inbox for libc-alpha@sourceware.org
 help / color / mirror / Atom feed
* [PATCH v2 1/4] Sync canonicalize with gnulib [BZ #10635]
@ 2020-10-27 14:35 Adhemerval Zanella
  2020-10-27 14:35 ` [PATCH v2 2/4] stdlib: Use fixed buffer size for realpath [BZ #26241] Adhemerval Zanella
                   ` (2 more replies)
  0 siblings, 3 replies; 7+ messages in thread
From: Adhemerval Zanella @ 2020-10-27 14:35 UTC (permalink / raw)
  To: libc-alpha; +Cc: Andreas Schwab

It sync with gnulib version 44358d4d165b with the following exceptions:

  - All the logic to which __getcwd use.

  - MAXSYMLINKS define (glibc already handles it on eloop-threshold.h).

  - The pathmax.h logic to handle //path (neither Linux nor Hurd makes
    this distinction).

  - Don't explict set ENOMEM on malloc failure (it is already handled
    by malloc itself).

  - The usage of malloca.h routines.

Checked on x86_64-linux-gnu
---
 stdlib/canonicalize.c | 244 +++++++++++++++++++++++-------------------
 1 file changed, 131 insertions(+), 113 deletions(-)

diff --git a/stdlib/canonicalize.c b/stdlib/canonicalize.c
index 3fcb399a5d..2770f4ae44 100644
--- a/stdlib/canonicalize.c
+++ b/stdlib/canonicalize.c
@@ -16,8 +16,9 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <assert.h>
 #include <stdlib.h>
+
+#include <alloca.h>
 #include <string.h>
 #include <unistd.h>
 #include <limits.h>
@@ -29,10 +30,10 @@
 #include <shlib-compat.h>
 
 /* Return the canonical absolute name of file NAME.  A canonical name
-   does not contain any `.', `..' components nor any repeated path
+   does not contain any ".", ".." components nor any repeated path
    separators ('/') or symlinks.  All path components must exist.  If
    RESOLVED is null, the result is malloc'd; otherwise, if the
-   canonical name is PATH_MAX chars or more, returns null with `errno'
+   canonical name is PATH_MAX chars or more, returns null with 'errno'
    set to ENAMETOOLONG; if the name fits in fewer than PATH_MAX chars,
    returns the name in RESOLVED.  If the name cannot be resolved and
    RESOLVED is non-NULL, it contains the path of the first component
@@ -50,9 +51,9 @@ __realpath (const char *name, char *resolved)
   if (name == NULL)
     {
       /* As per Single Unix Specification V2 we must return an error if
-	 either parameter is a null pointer.  We extend this to allow
-	 the RESOLVED parameter to be NULL in case the we are expected to
-	 allocate the room for the return value.  */
+         either parameter is a null pointer.  We extend this to allow
+         the RESOLVED parameter to be NULL in case the we are expected to
+         allocate the room for the return value.  */
       __set_errno (EINVAL);
       return NULL;
     }
@@ -60,7 +61,7 @@ __realpath (const char *name, char *resolved)
   if (name[0] == '\0')
     {
       /* As per Single Unix Specification V2 we must return an error if
-	 the name argument points to an empty string.  */
+         the name argument points to an empty string.  */
       __set_errno (ENOENT);
       return NULL;
     }
@@ -70,152 +71,169 @@ __realpath (const char *name, char *resolved)
 #else
   path_max = __pathconf (name, _PC_PATH_MAX);
   if (path_max <= 0)
-    path_max = 1024;
+    path_max = 8192;
 #endif
 
   if (resolved == NULL)
     {
       rpath = malloc (path_max);
       if (rpath == NULL)
-	return NULL;
+        return NULL;
     }
   else
     rpath = resolved;
   rpath_limit = rpath + path_max;
 
+
   if (name[0] != '/')
     {
       if (!__getcwd (rpath, path_max))
-	{
-	  rpath[0] = '\0';
-	  goto error;
-	}
-      dest = __rawmemchr (rpath, '\0');
+        {
+          rpath[0] = '\0';
+          goto error;
+        }
+      dest = strchr (rpath, '\0');
     }
   else
     {
-      rpath[0] = '/';
-      dest = rpath + 1;
+      dest = rpath;
+      *dest++ = '/';
     }
+  start = name;
 
-  for (start = end = name; *start; start = end)
+  for (end = start; *start; start = end)
     {
+#ifdef _LIBC
       struct stat64 st;
-      int n;
+#else
+      struct stat st;
+#endif
 
       /* Skip sequence of multiple path-separators.  */
       while (*start == '/')
-	++start;
+        ++start;
 
       /* Find end of path component.  */
       for (end = start; *end && *end != '/'; ++end)
-	/* Nothing.  */;
+        /* Nothing.  */;
 
       if (end - start == 0)
-	break;
+        break;
       else if (end - start == 1 && start[0] == '.')
-	/* nothing */;
+        /* nothing */;
       else if (end - start == 2 && start[0] == '.' && start[1] == '.')
-	{
-	  /* Back up to previous component, ignore if at root already.  */
-	  if (dest > rpath + 1)
-	    while ((--dest)[-1] != '/');
-	}
+        {
+          /* Back up to previous component, ignore if at root already.  */
+          if (dest > rpath + 1)
+            for (--dest; dest > rpath && dest[-1] != '/'; --dest)
+              continue;
+        }
       else
-	{
-	  size_t new_size;
-
-	  if (dest[-1] != '/')
-	    *dest++ = '/';
-
-	  if (dest + (end - start) >= rpath_limit)
-	    {
-	      ptrdiff_t dest_offset = dest - rpath;
-	      char *new_rpath;
-
-	      if (resolved)
-		{
-		  __set_errno (ENAMETOOLONG);
-		  if (dest > rpath + 1)
-		    dest--;
-		  *dest = '\0';
-		  goto error;
-		}
-	      new_size = rpath_limit - rpath;
-	      if (end - start + 1 > path_max)
-		new_size += end - start + 1;
-	      else
-		new_size += path_max;
-	      new_rpath = (char *) realloc (rpath, new_size);
-	      if (new_rpath == NULL)
-		goto error;
-	      rpath = new_rpath;
-	      rpath_limit = rpath + new_size;
-
-	      dest = rpath + dest_offset;
-	    }
-
-	  dest = __mempcpy (dest, start, end - start);
-	  *dest = '\0';
-
-	  if (__lstat64 (rpath, &st) < 0)
-	    goto error;
-
-	  if (S_ISLNK (st.st_mode))
-	    {
-	      char *buf = __alloca (path_max);
-	      size_t len;
-
-	      if (++num_links > __eloop_threshold ())
-		{
-		  __set_errno (ELOOP);
-		  goto error;
-		}
-
-	      n = __readlink (rpath, buf, path_max - 1);
-	      if (n < 0)
-		goto error;
-	      buf[n] = '\0';
-
-	      if (!extra_buf)
-		extra_buf = __alloca (path_max);
-
-	      len = strlen (end);
-	      if (path_max - n <= len)
-		{
-		  __set_errno (ENAMETOOLONG);
-		  goto error;
-		}
-
-	      /* Careful here, end may be a pointer into extra_buf... */
-	      memmove (&extra_buf[n], end, len + 1);
-	      name = end = memcpy (extra_buf, buf, n);
-
-	      if (buf[0] == '/')
-		dest = rpath + 1;	/* It's an absolute symlink */
-	      else
-		/* Back up to previous component, ignore if at root already: */
-		if (dest > rpath + 1)
-		  while ((--dest)[-1] != '/');
-	    }
-	  else if (!S_ISDIR (st.st_mode) && *end != '\0')
-	    {
-	      __set_errno (ENOTDIR);
-	      goto error;
-	    }
-	}
+        {
+          size_t new_size;
+
+          if (dest[-1] != '/')
+            *dest++ = '/';
+
+          if (dest + (end - start) >= rpath_limit)
+            {
+              ptrdiff_t dest_offset = dest - rpath;
+              char *new_rpath;
+
+              if (resolved)
+                {
+                  __set_errno (ENAMETOOLONG);
+                  if (dest > rpath + 1)
+                    dest--;
+                  *dest = '\0';
+                  goto error;
+                }
+              new_size = rpath_limit - rpath;
+              if (end - start + 1 > path_max)
+                new_size += end - start + 1;
+              else
+                new_size += path_max;
+              new_rpath = (char *) realloc (rpath, new_size);
+              if (new_rpath == NULL)
+                goto error;
+              rpath = new_rpath;
+              rpath_limit = rpath + new_size;
+
+              dest = rpath + dest_offset;
+            }
+
+          dest = __mempcpy (dest, start, end - start);
+          *dest = '\0';
+
+          if (__lstat64 (rpath, &st) < 0)
+            goto error;
+
+          if (S_ISLNK (st.st_mode))
+            {
+              char *buf = __alloca (path_max);
+              size_t len;
+              ssize_t n;
+
+              if (++num_links > __eloop_threshold ())
+                {
+                  __set_errno (ELOOP);
+                  goto error;
+                }
+
+              n = __readlink (rpath, buf, path_max - 1);
+              if (n < 0)
+                goto error;
+              buf[n] = '\0';
+
+              if (!extra_buf)
+                extra_buf = __alloca (path_max);
+
+              len = strlen (end);
+              /* Check that n + len + 1 doesn't overflow and is <= path_max. */
+              if (n >= SIZE_MAX - len || n + len >= path_max)
+                {
+                  __set_errno (ENAMETOOLONG);
+                  goto error;
+                }
+
+              /* Careful here, end may be a pointer into extra_buf... */
+              memmove (&extra_buf[n], end, len + 1);
+              name = end = memcpy (extra_buf, buf, n);
+
+              if (buf[0] == '/')
+                {
+                  dest = rpath;
+                  *dest++ = '/'; /* It's an absolute symlink */
+                }
+              else
+                {
+                  /* Back up to previous component, ignore if at root
+                     already: */
+                  if (dest > rpath + 1)
+                    for (--dest; dest > rpath && dest[-1] != '/'; --dest)
+                      continue;
+                }
+            }
+          else if (!S_ISDIR (st.st_mode) && *end != '\0')
+            {
+              __set_errno (ENOTDIR);
+              goto error;
+            }
+        }
     }
   if (dest > rpath + 1 && dest[-1] == '/')
     --dest;
   *dest = '\0';
 
-  assert (resolved == NULL || resolved == rpath);
   return rpath;
 
 error:
-  assert (resolved == NULL || resolved == rpath);
-  if (resolved == NULL)
-    free (rpath);
+  {
+    int saved_errno = errno;
+    if (resolved == NULL)
+      free (rpath);
+    __set_errno (saved_errno);
+  }
   return NULL;
 }
 libc_hidden_def (__realpath)
-- 
2.25.1


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH v2 2/4] stdlib: Use fixed buffer size for realpath [BZ #26241]
  2020-10-27 14:35 [PATCH v2 1/4] Sync canonicalize with gnulib [BZ #10635] Adhemerval Zanella
@ 2020-10-27 14:35 ` Adhemerval Zanella
  2020-10-27 14:35 ` [PATCH v2 3/4] stdlib: Fix arithmetic overflows in realpath [BZ #26592] Adhemerval Zanella
  2020-10-27 14:35 ` [PATCH v2 4/4] stdlib: Remove lstat usage from realpath [BZ #24970] Adhemerval Zanella
  2 siblings, 0 replies; 7+ messages in thread
From: Adhemerval Zanella @ 2020-10-27 14:35 UTC (permalink / raw)
  To: libc-alpha; +Cc: Andreas Schwab

It uses both a fixed internal buffer with PATH_MAX size to read and
copy the results of the readlink call.

Also, if PATH_MAX is not defined it uses a default value of 1024
as for other stdlib implementations.

The expected stack usage is about 8k on Linux where PATH_MAX is
define as 4096 (plus some internal function usage for local
variable).

Checked on x86_64-linux-gnu and i686-linux-gnu.
---
 stdlib/Makefile                               |   3 +-
 stdlib/canonicalize.c                         |  29 +++--
 stdlib/tst-canon-bz26341.c                    | 108 ++++++++++++++++++
 support/support_set_small_thread_stack_size.c |  12 +-
 support/xthread.h                             |   2 +
 5 files changed, 134 insertions(+), 20 deletions(-)
 create mode 100644 stdlib/tst-canon-bz26341.c

diff --git a/stdlib/Makefile b/stdlib/Makefile
index f8a1660186..adfdb0b1fb 100644
--- a/stdlib/Makefile
+++ b/stdlib/Makefile
@@ -87,7 +87,7 @@ tests		:= tst-strtol tst-strtod testmb testrand testsort testdiv   \
 		   tst-makecontext-align test-bz22786 tst-strtod-nan-sign \
 		   tst-swapcontext1 tst-setcontext4 tst-setcontext5 \
 		   tst-setcontext6 tst-setcontext7 tst-setcontext8 \
-		   tst-setcontext9 tst-bz20544
+		   tst-setcontext9 tst-bz20544 tst-canon-bz26341
 
 tests-internal	:= tst-strtod1i tst-strtod3 tst-strtod4 tst-strtod5i \
 		   tst-tls-atexit tst-tls-atexit-nodelete
@@ -102,6 +102,7 @@ LDLIBS-test-atexit-race = $(shared-thread-library)
 LDLIBS-test-at_quick_exit-race = $(shared-thread-library)
 LDLIBS-test-cxa_atexit-race = $(shared-thread-library)
 LDLIBS-test-on_exit-race = $(shared-thread-library)
+LDLIBS-tst-canon-bz26341 = $(shared-thread-library)
 
 LDLIBS-test-dlclose-exit-race = $(shared-thread-library) $(libdl)
 LDFLAGS-test-dlclose-exit-race = $(LDFLAGS-rdynamic)
diff --git a/stdlib/canonicalize.c b/stdlib/canonicalize.c
index 2770f4ae44..50244d0f67 100644
--- a/stdlib/canonicalize.c
+++ b/stdlib/canonicalize.c
@@ -29,6 +29,14 @@
 #include <eloop-threshold.h>
 #include <shlib-compat.h>
 
+#ifndef PATH_MAX
+# ifdef MAXPATHLEN
+#  define PATH_MAX MAXPATHLEN
+# else
+#  define PATH_MAX 1024
+# endif
+#endif
+
 /* Return the canonical absolute name of file NAME.  A canonical name
    does not contain any ".", ".." components nor any repeated path
    separators ('/') or symlinks.  All path components must exist.  If
@@ -43,10 +51,11 @@
 char *
 __realpath (const char *name, char *resolved)
 {
-  char *rpath, *dest, *extra_buf = NULL;
+  char *rpath, *dest;
   const char *start, *end, *rpath_limit;
-  long int path_max;
+  const size_t path_max = PATH_MAX;
   int num_links = 0;
+  char extra_buf[PATH_MAX];
 
   if (name == NULL)
     {
@@ -66,14 +75,6 @@ __realpath (const char *name, char *resolved)
       return NULL;
     }
 
-#ifdef PATH_MAX
-  path_max = PATH_MAX;
-#else
-  path_max = __pathconf (name, _PC_PATH_MAX);
-  if (path_max <= 0)
-    path_max = 8192;
-#endif
-
   if (resolved == NULL)
     {
       rpath = malloc (path_max);
@@ -170,24 +171,20 @@ __realpath (const char *name, char *resolved)
 
           if (S_ISLNK (st.st_mode))
             {
-              char *buf = __alloca (path_max);
+              char buf[PATH_MAX];
               size_t len;
               ssize_t n;
 
               if (++num_links > __eloop_threshold ())
                 {
                   __set_errno (ELOOP);
-                  goto error;
-                }
+                  goto error;  }
 
               n = __readlink (rpath, buf, path_max - 1);
               if (n < 0)
                 goto error;
               buf[n] = '\0';
 
-              if (!extra_buf)
-                extra_buf = __alloca (path_max);
-
               len = strlen (end);
               /* Check that n + len + 1 doesn't overflow and is <= path_max. */
               if (n >= SIZE_MAX - len || n + len >= path_max)
diff --git a/stdlib/tst-canon-bz26341.c b/stdlib/tst-canon-bz26341.c
new file mode 100644
index 0000000000..63474bddaa
--- /dev/null
+++ b/stdlib/tst-canon-bz26341.c
@@ -0,0 +1,108 @@
+/* Check if realpath does not consume extra stack space based on symlink
+   existance in the path (BZ #26341)
+   Copyright (C) 2020 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <stdlib.h>
+#include <string.h>
+#include <sys/param.h>
+
+#include <support/check.h>
+#include <support/support.h>
+#include <support/temp_file.h>
+#include <support/xunistd.h>
+#include <support/xthread.h>
+
+static char *filename;
+static size_t filenamelen;
+static char *linkname;
+
+static int
+maxsymlinks (void)
+{
+#ifdef MAXSYMLINKS
+  return MAXSYMLINKS;
+#else
+  long int sysconf_symloop_max = sysconf (_SC_SYMLOOP_MAX);
+  return sysconf_symloop_max <= 0
+	 ? _POSIX_SYMLOOP_MAX
+	 : sysconf_symloop_max;
+#endif
+}
+
+#ifndef PATH_MAX
+# define PATH_MAX 1024
+#endif
+
+static void
+create_link (void)
+{
+  int fd = create_temp_file ("tst-canon-bz26341", &filename);
+  TEST_VERIFY_EXIT (fd != -1);
+  xclose (fd);
+
+  char *prevlink = filename;
+  int maxlinks = maxsymlinks ();
+  for (int i = 0; i < maxlinks; i++)
+    {
+      linkname = xasprintf ("%s%d", filename, i);
+      xsymlink (prevlink, linkname);
+      add_temp_file (linkname);
+      prevlink = linkname;
+    }
+
+  filenamelen = strlen (filename);
+}
+
+static void *
+do_realpath (void *arg)
+{
+  /* Old implementation of realpath allocates a PATH_MAX using alloca
+     for each symlink in the path, leading to MAXSYMLINKS times PATH_MAX
+     maximum stack usage.
+     This stack allocations tries fill the thread allocated stack minus
+     both the thread (plus some slack) and the realpath (plus some slack).
+     If realpath uses more than 2 * PATH_MAX plus some slack it will trigger
+     a stackoverflow.  */
+
+  const size_t realpath_usage = 2 * PATH_MAX + 1024;
+  const size_t thread_usage = 1 * PATH_MAX + 1024;
+  size_t stack_size = support_small_thread_stack_size ()
+		      - realpath_usage - thread_usage;
+  char stack[stack_size];
+  char *resolved = stack + stack_size - thread_usage + 1024;
+
+  char *p = realpath (linkname, resolved);
+  TEST_VERIFY (p != NULL);
+  TEST_COMPARE_BLOB (resolved, filenamelen, filename, filenamelen);
+
+  return NULL;
+}
+
+static int
+do_test (void)
+{
+  create_link ();
+
+  pthread_t th = xpthread_create (support_small_stack_thread_attribute (),
+				  do_realpath, NULL);
+  xpthread_join (th);
+
+  return 0;
+}
+
+#include <support/test-driver.c>
diff --git a/support/support_set_small_thread_stack_size.c b/support/support_set_small_thread_stack_size.c
index 69d66e97db..74a0e38a72 100644
--- a/support/support_set_small_thread_stack_size.c
+++ b/support/support_set_small_thread_stack_size.c
@@ -20,8 +20,8 @@
 #include <pthread.h>
 #include <support/xthread.h>
 
-void
-support_set_small_thread_stack_size (pthread_attr_t *attr)
+size_t
+support_small_thread_stack_size (void)
 {
   /* Some architectures have too small values for PTHREAD_STACK_MIN
      which cannot be used for creating threads.  Ensure that the stack
@@ -31,5 +31,11 @@ support_set_small_thread_stack_size (pthread_attr_t *attr)
   if (stack_size < PTHREAD_STACK_MIN)
     stack_size = PTHREAD_STACK_MIN;
 #endif
-  xpthread_attr_setstacksize (attr, stack_size);
+  return stack_size;
+}
+
+void
+support_set_small_thread_stack_size (pthread_attr_t *attr)
+{
+  xpthread_attr_setstacksize (attr, support_small_thread_stack_size ());
 }
diff --git a/support/xthread.h b/support/xthread.h
index 05f8d4a7d9..6ba2f5a18b 100644
--- a/support/xthread.h
+++ b/support/xthread.h
@@ -78,6 +78,8 @@ void xpthread_attr_setguardsize (pthread_attr_t *attr,
 /* Set the stack size in ATTR to a small value, but still large enough
    to cover most internal glibc stack usage.  */
 void support_set_small_thread_stack_size (pthread_attr_t *attr);
+/* Return the stack size used on support_set_small_thread_stack_size.  */
+size_t support_small_thread_stack_size (void);
 
 /* Return a pointer to a thread attribute which requests a small
    stack.  The caller must not free this pointer.  */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH v2 3/4] stdlib: Fix arithmetic overflows in realpath [BZ #26592]
  2020-10-27 14:35 [PATCH v2 1/4] Sync canonicalize with gnulib [BZ #10635] Adhemerval Zanella
  2020-10-27 14:35 ` [PATCH v2 2/4] stdlib: Use fixed buffer size for realpath [BZ #26241] Adhemerval Zanella
@ 2020-10-27 14:35 ` Adhemerval Zanella
  2020-10-27 14:35 ` [PATCH v2 4/4] stdlib: Remove lstat usage from realpath [BZ #24970] Adhemerval Zanella
  2 siblings, 0 replies; 7+ messages in thread
From: Adhemerval Zanella @ 2020-10-27 14:35 UTC (permalink / raw)
  To: libc-alpha; +Cc: Andreas Schwab

The realpath uses an end-of-array pointer 'rpath_limit', and makes
invalid (overflowing) comparisons against it to catch overflow:

  117       /* Find end of path component.  */
  118       if (dest + (end-start) >= rpath_limit)

I could not see a easy way to stress this issue since it rely on how
the input argument is layout in memory along with a large filename
name that trigger the overflow comparison.  However, the fix is
simple enough where it simple reorganize arithmetic in the comparison.

Checked on x86_64-linux-gnu and i686-linux-gnu.
---
 stdlib/canonicalize.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/stdlib/canonicalize.c b/stdlib/canonicalize.c
index 50244d0f67..9aa69676e4 100644
--- a/stdlib/canonicalize.c
+++ b/stdlib/canonicalize.c
@@ -136,7 +136,7 @@ __realpath (const char *name, char *resolved)
           if (dest[-1] != '/')
             *dest++ = '/';
 
-          if (dest + (end - start) >= rpath_limit)
+          if (end - start >= rpath_limit - dest)
             {
               ptrdiff_t dest_offset = dest - rpath;
               char *new_rpath;
-- 
2.25.1


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH v2 4/4] stdlib: Remove lstat usage from realpath [BZ #24970]
  2020-10-27 14:35 [PATCH v2 1/4] Sync canonicalize with gnulib [BZ #10635] Adhemerval Zanella
  2020-10-27 14:35 ` [PATCH v2 2/4] stdlib: Use fixed buffer size for realpath [BZ #26241] Adhemerval Zanella
  2020-10-27 14:35 ` [PATCH v2 3/4] stdlib: Fix arithmetic overflows in realpath [BZ #26592] Adhemerval Zanella
@ 2020-10-27 14:35 ` Adhemerval Zanella
  2020-10-27 15:36   ` Florian Weimer
  2 siblings, 1 reply; 7+ messages in thread
From: Adhemerval Zanella @ 2020-10-27 14:35 UTC (permalink / raw)
  To: libc-alpha; +Cc: Andreas Schwab

The readlink already tells whether the file is a symlink, so there is
no need to call lstat to check it.  However for '..' it requires an
extra readlink check if the previous component can be really accessed,
otherwise the next iteration will check a possible valid path and end
early.  It should performance-wise acceptable and a gain over lstat,
afaik symlink should not update any inode information.

Checked on x86_64-linux-gnu and i686-linux-gnu.
---
 stdlib/canonicalize.c | 52 ++++++++++++++++++++++---------------------
 1 file changed, 27 insertions(+), 25 deletions(-)

diff --git a/stdlib/canonicalize.c b/stdlib/canonicalize.c
index 9aa69676e4..952f4dca41 100644
--- a/stdlib/canonicalize.c
+++ b/stdlib/canonicalize.c
@@ -55,6 +55,7 @@ __realpath (const char *name, char *resolved)
   const char *start, *end, *rpath_limit;
   const size_t path_max = PATH_MAX;
   int num_links = 0;
+  char buf[PATH_MAX];
   char extra_buf[PATH_MAX];
 
   if (name == NULL)
@@ -104,12 +105,6 @@ __realpath (const char *name, char *resolved)
 
   for (end = start; *start; start = end)
     {
-#ifdef _LIBC
-      struct stat64 st;
-#else
-      struct stat st;
-#endif
-
       /* Skip sequence of multiple path-separators.  */
       while (*start == '/')
         ++start;
@@ -118,12 +113,25 @@ __realpath (const char *name, char *resolved)
       for (end = start; *end && *end != '/'; ++end)
         /* Nothing.  */;
 
-      if (end - start == 0)
-        break;
-      else if (end - start == 1 && start[0] == '.')
+      if (end - start == 1 && start[0] == '.')
         /* nothing */;
       else if (end - start == 2 && start[0] == '.' && start[1] == '.')
         {
+          ssize_t n;
+
+          if (dest[-1] != '/')
+            *dest++ = '/';
+          *dest = '\0';
+
+          n = __readlink (rpath, buf, path_max - 1);
+          if (n == -1)
+            {
+              if (errno == ENOTDIR && dest[-1] == '/')
+                dest[-1] = '\0';
+              if (errno != EINVAL)
+                goto error;
+            }
+
           /* Back up to previous component, ignore if at root already.  */
           if (dest > rpath + 1)
             for (--dest; dest > rpath && dest[-1] != '/'; --dest)
@@ -132,6 +140,7 @@ __realpath (const char *name, char *resolved)
       else
         {
           size_t new_size;
+          ssize_t n;
 
           if (dest[-1] != '/')
             *dest++ = '/';
@@ -166,25 +175,23 @@ __realpath (const char *name, char *resolved)
           dest = __mempcpy (dest, start, end - start);
           *dest = '\0';
 
-          if (__lstat64 (rpath, &st) < 0)
-            goto error;
-
-          if (S_ISLNK (st.st_mode))
+          n = __readlink (rpath, buf, path_max - 1);
+          if (n < 0)
+            {
+              if (errno == ENOTDIR && dest[-1] == '/')
+                dest[-1] = '\0';
+              if (errno != EINVAL)
+                goto error;
+            }
+          else
             {
-              char buf[PATH_MAX];
               size_t len;
-              ssize_t n;
 
               if (++num_links > __eloop_threshold ())
                 {
                   __set_errno (ELOOP);
                   goto error;  }
 
-              n = __readlink (rpath, buf, path_max - 1);
-              if (n < 0)
-                goto error;
-              buf[n] = '\0';
-
               len = strlen (end);
               /* Check that n + len + 1 doesn't overflow and is <= path_max. */
               if (n >= SIZE_MAX - len || n + len >= path_max)
@@ -211,11 +218,6 @@ __realpath (const char *name, char *resolved)
                       continue;
                 }
             }
-          else if (!S_ISDIR (st.st_mode) && *end != '\0')
-            {
-              __set_errno (ENOTDIR);
-              goto error;
-            }
         }
     }
   if (dest > rpath + 1 && dest[-1] == '/')
-- 
2.25.1


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2 4/4] stdlib: Remove lstat usage from realpath [BZ #24970]
  2020-10-27 14:35 ` [PATCH v2 4/4] stdlib: Remove lstat usage from realpath [BZ #24970] Adhemerval Zanella
@ 2020-10-27 15:36   ` Florian Weimer
  2020-10-27 17:30     ` Adhemerval Zanella
  0 siblings, 1 reply; 7+ messages in thread
From: Florian Weimer @ 2020-10-27 15:36 UTC (permalink / raw)
  To: Adhemerval Zanella via Libc-alpha; +Cc: Adhemerval Zanella, Andreas Schwab

* Adhemerval Zanella via Libc-alpha:

> The readlink already tells whether the file is a symlink, so there is
> no need to call lstat to check it.  However for '..' it requires an
> extra readlink check if the previous component can be really accessed,
> otherwise the next iteration will check a possible valid path and end
> early.  It should performance-wise acceptable and a gain over lstat,
> afaik symlink should not update any inode information.

Should this go into gnulib first?

Thanks,
Florian
-- 
Red Hat GmbH, https://de.redhat.com/ , Registered seat: Grasbrunn,
Commercial register: Amtsgericht Muenchen, HRB 153243,
Managing Directors: Charles Cachera, Brian Klemm, Laurie Krebs, Michael O'Neill


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2 4/4] stdlib: Remove lstat usage from realpath [BZ #24970]
  2020-10-27 15:36   ` Florian Weimer
@ 2020-10-27 17:30     ` Adhemerval Zanella
  2020-10-27 19:11       ` Paul Eggert
  0 siblings, 1 reply; 7+ messages in thread
From: Adhemerval Zanella @ 2020-10-27 17:30 UTC (permalink / raw)
  To: Florian Weimer, Adhemerval Zanella via Libc-alpha
  Cc: Andreas Schwab, Paul Eggert



On 27/10/2020 12:36, Florian Weimer wrote:
> * Adhemerval Zanella via Libc-alpha:
> 
>> The readlink already tells whether the file is a symlink, so there is
>> no need to call lstat to check it.  However for '..' it requires an
>> extra readlink check if the previous component can be really accessed,
>> otherwise the next iteration will check a possible valid path and end
>> early.  It should performance-wise acceptable and a gain over lstat,
>> afaik symlink should not update any inode information.
> 
> Should this go into gnulib first?

I can send to gnulib, although glibc implementation is not really in
sync with gnulib one's.  Also on first patch of this set I explicit
did not sync some gnulib features that I think only clutter the
implementation with code that either won't be used by any system glibc
supports or will be removed in subsequent patches.

Paul, do you think this could be included in gnulib? 

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2 4/4] stdlib: Remove lstat usage from realpath [BZ #24970]
  2020-10-27 17:30     ` Adhemerval Zanella
@ 2020-10-27 19:11       ` Paul Eggert
  0 siblings, 0 replies; 7+ messages in thread
From: Paul Eggert @ 2020-10-27 19:11 UTC (permalink / raw)
  To: Adhemerval Zanella, Florian Weimer, Adhemerval Zanella via Libc-alpha
  Cc: Andreas Schwab

On 10/27/20 10:30 AM, Adhemerval Zanella wrote:
> Paul, do you think this could be included in gnulib?

I'd rather keep the two in sync. This should be doable by putting the 
Gnulib-specific stuff in "#ifndef _LIBC" areas that shouldn't bother Glibc 
maintenance significantly. We've had a reasonable amount of luck doing that in 
other library code. I could give a shot at doing this, install the result into 
Gnulib, and having you look at the result (we could think of this as being part 
of the glibc review process :-).

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2020-10-27 19:11 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-10-27 14:35 [PATCH v2 1/4] Sync canonicalize with gnulib [BZ #10635] Adhemerval Zanella
2020-10-27 14:35 ` [PATCH v2 2/4] stdlib: Use fixed buffer size for realpath [BZ #26241] Adhemerval Zanella
2020-10-27 14:35 ` [PATCH v2 3/4] stdlib: Fix arithmetic overflows in realpath [BZ #26592] Adhemerval Zanella
2020-10-27 14:35 ` [PATCH v2 4/4] stdlib: Remove lstat usage from realpath [BZ #24970] Adhemerval Zanella
2020-10-27 15:36   ` Florian Weimer
2020-10-27 17:30     ` Adhemerval Zanella
2020-10-27 19:11       ` Paul Eggert

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).