public inbox for libc-alpha@sourceware.org
 help / color / mirror / Atom feed
From: Adhemerval Zanella <adhemerval.zanella@linaro.org>
To: libc-alpha@sourceware.org, Paul Eggert <eggert@cs.ucla.edu>
Cc: bug-gnulib@gnu.org
Subject: [PATCH 5/5] stdlib: Remove lstat usage from realpath [BZ #24970]
Date: Thu, 24 Dec 2020 12:17:01 -0300	[thread overview]
Message-ID: <20201224151701.1751008-6-adhemerval.zanella@linaro.org> (raw)
In-Reply-To: <20201224151701.1751008-1-adhemerval.zanella@linaro.org>

The readlink already tells whether the file is a symlink, so there is
no need to call lstat to check it.  However for '..' it requires an
extra readlink check if the previous component can be really accessed,
otherwise the next iteration will check a possible valid path and end
early.  It should performance-wise acceptable and a gain over lstat,
afaik symlink should not update any inode information.

It also fixes the stdlib/test-canon issued from the gnulib sync.

Checked on x86_64-linux-gnu.
---
 include/scratch_buffer.h |  21 ++++++
 stdlib/canonicalize.c    | 139 +++++++++++++++++++++++----------------
 2 files changed, 102 insertions(+), 58 deletions(-)

diff --git a/include/scratch_buffer.h b/include/scratch_buffer.h
index c39da78629..7ae77e5160 100644
--- a/include/scratch_buffer.h
+++ b/include/scratch_buffer.h
@@ -132,4 +132,25 @@ scratch_buffer_set_array_size (struct scratch_buffer *buffer,
 			 (buffer, nelem, size));
 }
 
+/* Check if BUFFER is using the internal buffer.  */
+static __always_inline bool
+scratch_buffer_using_internal (struct scratch_buffer *buffer)
+{
+  return buffer->data == buffer->__space.__c;
+}
+
+/* Return the internal buffer from BUFFER if it is dynamic allocated,
+   otherwise returns NULL.  Initializes the BUFFER if the internal
+   dynamic buffer is returned.  */
+static __always_inline void *
+scratch_buffer_take_buffer (struct scratch_buffer *buffer)
+{
+  if (scratch_buffer_using_internal (buffer))
+    return NULL;
+
+  void *r = buffer->data;
+  scratch_buffer_init (buffer);
+  return r;
+}
+
 #endif /* _SCRATCH_BUFFER_H */
diff --git a/stdlib/canonicalize.c b/stdlib/canonicalize.c
index 9111d92a4c..78a06227d2 100644
--- a/stdlib/canonicalize.c
+++ b/stdlib/canonicalize.c
@@ -88,6 +88,21 @@
 
 #if !FUNC_REALPATH_WORKS || defined _LIBC
 static idx_t
+readlink_scratch_buffer (const char *path, struct scratch_buffer *buf)
+{
+  ssize_t r;
+  while (true)
+    {
+      ptrdiff_t bufsize = buf->length;
+      r = __readlink (path, buf->data, bufsize - 1);
+      if (r < bufsize - 1)
+	break;
+      if (!scratch_buffer_grow (buf))
+	return -1;
+    }
+  return r;
+}
+static idx_t
 get_path_max (void)
 {
 # ifdef PATH_MAX
@@ -144,12 +159,10 @@ __realpath (const char *name, char *resolved)
 
   struct scratch_buffer extra_buffer, link_buffer;
   struct scratch_buffer rname_buffer;
-  struct scratch_buffer *rname_buf = &rname_buffer;
   scratch_buffer_init (&extra_buffer);
   scratch_buffer_init (&link_buffer);
-  scratch_buffer_init (rname_buf);
-  char *rname_on_stack = rname_buf->data;
-  char *rname = rname_on_stack;
+  scratch_buffer_init (&rname_buffer);
+  char *rname = rname_buffer.data;
   bool end_in_extra_buffer = false;
   bool failed = true;
 
@@ -159,16 +172,16 @@ __realpath (const char *name, char *resolved)
 
   if (!IS_ABSOLUTE_FILE_NAME (name))
     {
-      while (!__getcwd (rname, rname_buf->length))
+      while (!__getcwd (rname, rname_buffer.length))
         {
           if (errno != ERANGE)
             {
               dest = rname;
               goto error;
             }
-          if (!scratch_buffer_grow (rname_buf))
+          if (!scratch_buffer_grow (&rname_buffer))
             goto error_nomem;
-          rname = rname_buf->data;
+	  rname = rname_buffer.data;
         }
       dest = __rawmemchr (rname, '\0');
       start = name;
@@ -188,7 +201,7 @@ __realpath (const char *name, char *resolved)
       start = name + prefix_len;
     }
 
-  for ( ; *start; start = end)
+  for (end = start ; *start; start = end)
     {
       /* Skip sequence of multiple file name separators.  */
       while (ISSLASH (*start))
@@ -206,6 +219,20 @@ __realpath (const char *name, char *resolved)
         /* nothing */;
       else if (startlen == 2 && start[0] == '.' && start[1] == '.')
         {
+          if (!ISSLASH (dest[-1]))
+            *dest++ = '/';
+          *dest = '\0';
+
+	  ssize_t n = readlink_scratch_buffer (rname, &link_buffer);
+          if (n < 0)
+            {
+              if (errno == ENOTDIR && dest[-1] == '/')
+                dest[-1] = '\0';
+	      if (errno == ENOMEM)
+		goto error_nomem;
+              if (errno != EINVAL)
+                goto error;
+            }
           /* Back up to previous component, ignore if at root already.  */
           if (dest > rname + prefix_len + 1)
             for (--dest; dest > rname && !ISSLASH (dest[-1]); --dest)
@@ -220,46 +247,36 @@ __realpath (const char *name, char *resolved)
           if (!ISSLASH (dest[-1]))
             *dest++ = '/';
 
-          while (rname + rname_buf->length - dest <= startlen)
+          while (rname + rname_buffer.length - dest <= startlen)
             {
               idx_t dest_offset = dest - rname;
-              if (!scratch_buffer_grow_preserve (rname_buf))
+              if (!scratch_buffer_grow_preserve (&rname_buffer))
                 goto error_nomem;
-              rname = rname_buf->data;
+              rname = rname_buffer.data;
               dest = rname + dest_offset;
             }
 
           dest = __mempcpy (dest, start, startlen);
           *dest = '\0';
 
-          /* If STARTLEN == 0, RNAME ends in '/'; use stat rather than
-             readlink, because readlink might fail with EINVAL without
-             checking whether RNAME sans '/' is valid.  */
-          struct stat st;
-          char *buf = NULL;
-          ssize_t n;
-          if (startlen != 0)
+          ssize_t n = readlink_scratch_buffer (rname, &link_buffer);
+          if (n < 0)
             {
-              while (true)
-                {
-                  buf = link_buffer.data;
-                  idx_t bufsize = link_buffer.length;
-                  n = __readlink (rname, buf, bufsize - 1);
-                  if (n < bufsize - 1)
-                    break;
-                  if (!scratch_buffer_grow (&link_buffer))
-                    goto error_nomem;
-                }
-              if (n < 0)
-                buf = NULL;
+              if (errno == ENOTDIR && dest[-1] == '/')
+                dest[-1] = '\0';
+              if (errno == ENOMEM)
+                goto error_nomem;
+              if (errno != EINVAL)
+                goto error;
             }
-          if (buf)
+          else
             {
               if (++num_links > __eloop_threshold ())
                 {
                   __set_errno (ELOOP);
                   goto error;
                 }
+	      char *buf = (char*) link_buffer.data;
 
               buf[n] = '\0';
 
@@ -279,7 +296,7 @@ __realpath (const char *name, char *resolved)
 
               /* Careful here, end may be a pointer into extra_buf... */
               memmove (&extra_buf[n], end, len + 1);
-              name = end = memcpy (extra_buf, buf, n);
+              name = end = memcpy (extra_buf, link_buffer.data, n);
               end_in_extra_buffer = true;
 
               if (IS_ABSOLUTE_FILE_NAME (buf))
@@ -309,10 +326,6 @@ __realpath (const char *name, char *resolved)
                     dest++;
                 }
             }
-          else if (! (startlen == 0
-                      ? stat (rname, &st) == 0 || errno == EOVERFLOW
-                      : errno == EINVAL))
-            goto error;
         }
     }
   if (dest > rname + prefix_len + 1 && ISSLASH (dest[-1]))
@@ -320,34 +333,44 @@ __realpath (const char *name, char *resolved)
   if (DOUBLE_SLASH_IS_DISTINCT_ROOT && dest == rname + 1 && !prefix_len
       && ISSLASH (*dest) && !ISSLASH (dest[1]))
     dest++;
+  *dest = '\0';
   failed = false;
 
 error:
-  *dest++ = '\0';
-  if (resolved != NULL && dest - rname <= get_path_max ())
-    rname = strcpy (resolved, rname);
+  if (resolved != NULL)
+    {
+      if (dest - rname <= get_path_max ())
+        rname = strcpy (resolved, rname);
+    }
+  else
+    {
+      if (rname == resolved)
+	return rname;
+
+      idx_t rname_size = dest - rname;
+      if (scratch_buffer_using_internal (&rname_buffer))
+        {
+          rname = malloc (rname_size + 1);
+	  if (rname != NULL)
+	    {
+              memcpy (rname, rname_buffer.data, rname_size);
+	      rname[rname_size] = '\0';
+	    }
+        }
+      else
+        {
+	  rname = scratch_buffer_take_buffer (&rname_buffer);
+	  char *result = realloc (rname, rname_size);
+	  if (result != NULL)
+	    rname = result;
+        }
+    }
 
 error_nomem:
-  scratch_buffer_free (&extra_buffer);
   scratch_buffer_free (&link_buffer);
-  if (failed || rname == resolved)
-    scratch_buffer_free (rname_buf);
-
-  if (failed)
-    return NULL;
-
-  if (rname == resolved)
-    return rname;
-  idx_t rname_size = dest - rname;
-  if (rname == rname_on_stack)
-    {
-      rname = malloc (rname_size);
-      if (rname == NULL)
-        return NULL;
-      return memcpy (rname, rname_on_stack, rname_size);
-    }
-  char *result = realloc (rname, rname_size);
-  return result != NULL ? result : rname;
+  scratch_buffer_free (&extra_buffer);
+  scratch_buffer_free (&rname_buffer);
+  return failed ? NULL : rname;
 }
 libc_hidden_def (__realpath)
 versioned_symbol (libc, __realpath, realpath, GLIBC_2_3);
-- 
2.25.1


  parent reply	other threads:[~2020-12-24 15:17 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-12-24 15:16 [PATCH 0/5] Fix multiple realpath issues Adhemerval Zanella
2020-12-24 15:16 ` [PATCH 1/5] stdlib: Sync canonicalize with gnulib [BZ #10635] [BZ #26592] [BZ #26241] Adhemerval Zanella
2020-12-24 22:45   ` Paul Eggert
2020-12-25  0:44     ` [PATCH 1/5] warnings in canonicalize.c Bruno Haible
2020-12-25  5:56       ` Paul Eggert
2020-12-28 12:53     ` [PATCH 1/5] stdlib: Sync canonicalize with gnulib [BZ #10635] [BZ #26592] [BZ #26241] Adhemerval Zanella
2020-12-24 15:16 ` [PATCH 2/5] Import idx.h from gnulib Adhemerval Zanella
2020-12-24 22:53   ` Paul Eggert
2020-12-25 20:34   ` Florian Weimer
2020-12-25 21:38     ` Bruno Haible
2020-12-26  1:29     ` Paul Eggert
2020-12-31 23:12   ` Joseph Myers
2021-01-01  3:24     ` Paul Eggert
2020-12-24 15:16 ` [PATCH 3/5] Import filename.h " Adhemerval Zanella
2020-12-31 23:13   ` Joseph Myers
2021-01-01  3:31     ` Paul Eggert
2020-12-24 15:17 ` [PATCH 4/5] stdlib: Add testcase fro BZ #26241 Adhemerval Zanella
2020-12-24 15:17 ` Adhemerval Zanella [this message]
2020-12-25  0:27   ` [PATCH 5/5] stdlib: Remove lstat usage from realpath [BZ #24970] Paul Eggert
2020-12-28 11:42     ` Adhemerval Zanella
2020-12-28 13:32       ` Adhemerval Zanella
2020-12-28 21:04       ` Paul Eggert

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20201224151701.1751008-6-adhemerval.zanella@linaro.org \
    --to=adhemerval.zanella@linaro.org \
    --cc=bug-gnulib@gnu.org \
    --cc=eggert@cs.ucla.edu \
    --cc=libc-alpha@sourceware.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).