public inbox for glibc-cvs@sourceware.org
help / color / mirror / Atom feed
* [glibc/azanella/bz23960] linux: Use getdents64 on non-LFS readdir
@ 2020-10-02 13:53 Adhemerval Zanella
  0 siblings, 0 replies; 5+ messages in thread
From: Adhemerval Zanella @ 2020-10-02 13:53 UTC (permalink / raw)
  To: glibc-cvs

https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=46baff8103680b3125b2d01826c7d57632775e57

commit 46baff8103680b3125b2d01826c7d57632775e57
Author: Adhemerval Zanella <adhemerval.zanella@linaro.org>
Date:   Mon Apr 13 08:06:57 2020 -0300

    linux: Use getdents64 on non-LFS readdir
    
    It reserves some space on the allocated internal buffer to be
    used as a the returned dirent struct.  The kernel obtained dirent64
    struct are copied to the temporary buffer on each readdir call.
    
    The overflow test is moved once the dirent64 entry is copied
    to the temporary buffer, and a subsequent readdir will obtain the
    next entry.  The idea is an overflow fails to return the entry on
    readdir, but a next readdir might still obtain the next entry.
    (for filesystem that does not have the concept of sequential d_off,
    such as ext4).
    
    Checked on x86_64-linux-gnu and i686-linux-gnu.

Diff:
---
 sysdeps/unix/sysv/linux/opendir.c |   6 +-
 sysdeps/unix/sysv/linux/readdir.c |  25 ++++----
 sysdeps/unix/sysv/linux/readdir.h | 117 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 131 insertions(+), 17 deletions(-)

diff --git a/sysdeps/unix/sysv/linux/opendir.c b/sysdeps/unix/sysv/linux/opendir.c
index 4e8d108821..901451361e 100644
--- a/sysdeps/unix/sysv/linux/opendir.c
+++ b/sysdeps/unix/sysv/linux/opendir.c
@@ -22,6 +22,7 @@
 #include <sys/param.h>	/* For MIN and MAX.  */
 
 #include <not-cancel.h>
+#include <readdir.h>    /* For return_buffer_size.  */
 
 enum {
   opendir_oflags = O_RDONLY|O_NDELAY|O_DIRECTORY|O_LARGEFILE|O_CLOEXEC
@@ -103,8 +104,9 @@ __alloc_dir (int fd, bool close_fd, int flags, const struct stat64 *statp)
   enum { max_buffer_size = 1U << 20 };
 
   const size_t allocation_size = 4 * BUFSIZ;
-  _Static_assert (allocation_size >= sizeof (struct dirent64),
-		  "allocation_size < sizeof (struct dirent64)");
+  _Static_assert (allocation_size >= sizeof (struct dirent64)
+				     + return_buffer_size,
+		  "opendir buffer size smaller than required");
 
   /* Increase allocation if requested, but not if the value appears to
      be bogus.  It will be between 32Kb (for blocksizes smaller than BUFSIZ)
diff --git a/sysdeps/unix/sysv/linux/readdir.c b/sysdeps/unix/sysv/linux/readdir.c
index ca2a8964e9..8eab0f4c9b 100644
--- a/sysdeps/unix/sysv/linux/readdir.c
+++ b/sysdeps/unix/sysv/linux/readdir.c
@@ -19,7 +19,7 @@
 #include <dirent.h>
 
 #if !_DIRENT_MATCHES_DIRENT64
-#include <dirstream.h>
+#include <readdir.h>
 
 /* Read a directory entry from DIRP.  */
 struct dirent *
@@ -30,16 +30,12 @@ __readdir_unlocked (DIR *dirp)
 
   do
     {
-      size_t reclen;
-
       if (dirp->offset >= dirp->size)
 	{
 	  /* We've emptied out our buffer.  Refill it.  */
 
-	  size_t maxread = dirp->allocation;
-	  ssize_t bytes;
-
-	  bytes = __getdents (dirp->fd, dirp->data, maxread);
+	  ssize_t bytes = __getdents64 (dirp->fd, dirstream_data (dirp),
+					dirstream_alloc_size (dirp));
 	  if (bytes <= 0)
 	    {
 	      /* On some systems getdents fails with ENOENT when the
@@ -54,19 +50,18 @@ __readdir_unlocked (DIR *dirp)
 	      dp = NULL;
 	      break;
 	    }
-	  dirp->size = (size_t) bytes;
+	  dirp->size = bytes;
 
 	  /* Reset the offset into the buffer.  */
 	  dirp->offset = 0;
 	}
 
-      dp = (struct dirent *) &dirp->data[dirp->offset];
-
-      reclen = dp->d_reclen;
-
-      dirp->offset += reclen;
-
-      dirp->filepos = dp->d_off;
+      dp = dirstream_ret_entry (dirp);
+      if (dp == NULL)
+	{
+	  __set_errno (EOVERFLOW);
+	  break;
+	}
 
       /* Skip deleted files.  */
     } while (dp->d_ino == 0);
diff --git a/sysdeps/unix/sysv/linux/readdir.h b/sysdeps/unix/sysv/linux/readdir.h
new file mode 100644
index 0000000000..4dc219e220
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/readdir.h
@@ -0,0 +1,117 @@
+/* Linux readdir internal implementation details.
+   Copyright (C) 2020 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#ifndef _DIRSTREAM_NOLFS_H
+#define _DIRSTREAM_NOLFS_H
+
+#if !_DIRENT_MATCHES_DIRENT64
+# include <dirstream.h>
+
+/* getdents64 is used internally for both LFS and non-LFS implementations.
+   The non-LFS interface reserves part of the allocated buffer to return the
+   non-LFS 'struct dirent' entry.  */
+
+/* This defines the reserved space size on DIR internal buffer to use as the
+   returned 'struct dirent' from a 'readdir' call.
+
+   The largest possible practical length of the d_name member are 255
+   Unicode characters in UTF-8 encoding, so d_name is 766 bytes long, plus
+   10 bytes from header, for a total of 776 bytes total.
+
+   Also it should take in cosideration the alignment requirement for
+   getdents64 call.  */
+enum { return_buffer_size = 1024
+			    + sizeof (off64_t)
+			    - _Alignof (((struct __dirstream) {0}).data) };
+
+_Static_assert ((_Alignof (((struct __dirstream) {0}).data)
+		 + return_buffer_size) % sizeof (off64_t) == 0,
+		"return_buffer_size does not align the buffer properly");
+
+/* Return the avaliable buffer size to use with getdents64 calls.  */
+static inline size_t
+dirstream_alloc_size (struct __dirstream *ds)
+{
+  return ds->allocation - return_buffer_size;
+}
+
+/* Return the start of the allocated buffer minus the reserved part to use on
+   non-LFS readdir call.  */
+static inline void *
+dirstream_data (struct __dirstream *ds)
+{
+  return (char *) ds->data + return_buffer_size;
+}
+
+/* Return the allocated buffer used on non-LFS readdir call.  */
+static inline struct dirent *
+dirstream_ret (struct __dirstream *ds)
+{
+  return (struct dirent *) ds->data;
+}
+
+/* Return the current dirent64 entry from the reserved buffer used on
+   getdent64.  */
+static inline struct dirent64 *
+dirstream_entry (struct __dirstream *ds)
+{
+  size_t offset = return_buffer_size + ds->offset;
+  return (struct dirent64 *) ((char *) ds->data + offset);
+}
+
+/* Copy one obtained entry from 'getdents64' call to the reserved space
+   on DS allocated buffer and updated its internal state.  */
+static inline struct dirent *
+dirstream_ret_entry (struct __dirstream *ds)
+{
+  struct dirent64 *dp64 = dirstream_entry (ds);
+  struct dirent *dp = dirstream_ret (ds);
+
+  dp->d_ino = dp64->d_ino;
+
+  dp->d_off = dp64->d_off;
+  if (dp->d_off != dp64->d_off)
+    /* Overflow.  */
+    return NULL;
+
+  const size_t size_diff = (offsetof (struct dirent64, d_name)
+			    - offsetof (struct dirent, d_name));
+  const size_t alignment = _Alignof (struct dirent);
+  size_t new_reclen  = (dp64->d_reclen - size_diff + alignment - 1)
+			& ~(alignment - 1);
+  if (new_reclen > return_buffer_size)
+    /* Overflow.  */
+    return NULL;
+  dp->d_reclen = new_reclen;
+
+  dp->d_type = dp64->d_type;
+
+  memcpy (dp->d_name, dp64->d_name,
+	  dp64->d_reclen - offsetof (struct dirent64, d_name));
+
+  ds->offset += dp64->d_reclen;
+  ds->filepos = dp64->d_off;
+
+  return dp;
+}
+#else
+/* No need to reserve an buffer space if dirent has already LFS support.  */
+enum { return_buffer_size = 0 };
+#endif /* _DIRENT_MATCHES_DIRENT64  */
+
+#endif


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [glibc/azanella/bz23960] linux: Use getdents64 on non-LFS readdir
@ 2020-04-17 13:23 Adhemerval Zanella
  0 siblings, 0 replies; 5+ messages in thread
From: Adhemerval Zanella @ 2020-04-17 13:23 UTC (permalink / raw)
  To: glibc-cvs

https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=cfd1ac0f6c608908b8e55a72916ec31778f3a86b

commit cfd1ac0f6c608908b8e55a72916ec31778f3a86b
Author: Adhemerval Zanella <adhemerval.zanella@linaro.org>
Date:   Mon Apr 13 08:06:57 2020 -0300

    linux: Use getdents64 on non-LFS readdir
    
    It reserves some space on the allocated internal buffer to be
    used as a the returned dirent struct.  The kernel obtained dirent64
    struct are copied to the temporary buffer on each readdir call.
    
    The overflow test is moved once the dirent64 entry is copied
    to the temporary buffer, and a subsequent readdir will obtain the
    next entry.  The idea is an overflow fails to return the entry on
    readdir, but a next readdir might still obtain the next entry.
    (for filesystem that does not have the concept of sequential d_off,
    such as ext4).
    
    Checked on x86_64-linux-gnu and i686-linux-gnu.

Diff:
---
 sysdeps/unix/sysv/linux/opendir.c |   6 +-
 sysdeps/unix/sysv/linux/readdir.c |  25 ++++----
 sysdeps/unix/sysv/linux/readdir.h | 117 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 131 insertions(+), 17 deletions(-)

diff --git a/sysdeps/unix/sysv/linux/opendir.c b/sysdeps/unix/sysv/linux/opendir.c
index 765c8104b3..d4a0885bd3 100644
--- a/sysdeps/unix/sysv/linux/opendir.c
+++ b/sysdeps/unix/sysv/linux/opendir.c
@@ -22,6 +22,7 @@
 #include <sys/param.h>	/* For MIN and MAX.  */
 
 #include <not-cancel.h>
+#include <readdir.h>    /* For return_buffer_size.  */
 
 enum {
   opendir_oflags = O_RDONLY|O_NDELAY|O_DIRECTORY|O_LARGEFILE|O_CLOEXEC
@@ -103,8 +104,9 @@ __alloc_dir (int fd, bool close_fd, int flags, const struct stat64 *statp)
   enum { max_buffer_size = 1U << 20 };
 
   const size_t allocation_size = 4 * BUFSIZ;
-  _Static_assert (allocation_size >= sizeof (struct dirent64),
-		  "allocation_size < sizeof (struct dirent64)");
+  _Static_assert (allocation_size >= sizeof (struct dirent64)
+				     + return_buffer_size,
+		  "opendir buffer size smaller than required");
 
   /* Increase allocation if requested, but not if the value appears to
      be bogus.  It will be between 32Kb (for blocksizes smaller than BUFSIZ)
diff --git a/sysdeps/unix/sysv/linux/readdir.c b/sysdeps/unix/sysv/linux/readdir.c
index ca2a8964e9..8eab0f4c9b 100644
--- a/sysdeps/unix/sysv/linux/readdir.c
+++ b/sysdeps/unix/sysv/linux/readdir.c
@@ -19,7 +19,7 @@
 #include <dirent.h>
 
 #if !_DIRENT_MATCHES_DIRENT64
-#include <dirstream.h>
+#include <readdir.h>
 
 /* Read a directory entry from DIRP.  */
 struct dirent *
@@ -30,16 +30,12 @@ __readdir_unlocked (DIR *dirp)
 
   do
     {
-      size_t reclen;
-
       if (dirp->offset >= dirp->size)
 	{
 	  /* We've emptied out our buffer.  Refill it.  */
 
-	  size_t maxread = dirp->allocation;
-	  ssize_t bytes;
-
-	  bytes = __getdents (dirp->fd, dirp->data, maxread);
+	  ssize_t bytes = __getdents64 (dirp->fd, dirstream_data (dirp),
+					dirstream_alloc_size (dirp));
 	  if (bytes <= 0)
 	    {
 	      /* On some systems getdents fails with ENOENT when the
@@ -54,19 +50,18 @@ __readdir_unlocked (DIR *dirp)
 	      dp = NULL;
 	      break;
 	    }
-	  dirp->size = (size_t) bytes;
+	  dirp->size = bytes;
 
 	  /* Reset the offset into the buffer.  */
 	  dirp->offset = 0;
 	}
 
-      dp = (struct dirent *) &dirp->data[dirp->offset];
-
-      reclen = dp->d_reclen;
-
-      dirp->offset += reclen;
-
-      dirp->filepos = dp->d_off;
+      dp = dirstream_ret_entry (dirp);
+      if (dp == NULL)
+	{
+	  __set_errno (EOVERFLOW);
+	  break;
+	}
 
       /* Skip deleted files.  */
     } while (dp->d_ino == 0);
diff --git a/sysdeps/unix/sysv/linux/readdir.h b/sysdeps/unix/sysv/linux/readdir.h
new file mode 100644
index 0000000000..4dc219e220
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/readdir.h
@@ -0,0 +1,117 @@
+/* Linux readdir internal implementation details.
+   Copyright (C) 2020 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#ifndef _DIRSTREAM_NOLFS_H
+#define _DIRSTREAM_NOLFS_H
+
+#if !_DIRENT_MATCHES_DIRENT64
+# include <dirstream.h>
+
+/* getdents64 is used internally for both LFS and non-LFS implementations.
+   The non-LFS interface reserves part of the allocated buffer to return the
+   non-LFS 'struct dirent' entry.  */
+
+/* This defines the reserved space size on DIR internal buffer to use as the
+   returned 'struct dirent' from a 'readdir' call.
+
+   The largest possible practical length of the d_name member are 255
+   Unicode characters in UTF-8 encoding, so d_name is 766 bytes long, plus
+   10 bytes from header, for a total of 776 bytes total.
+
+   Also it should take in cosideration the alignment requirement for
+   getdents64 call.  */
+enum { return_buffer_size = 1024
+			    + sizeof (off64_t)
+			    - _Alignof (((struct __dirstream) {0}).data) };
+
+_Static_assert ((_Alignof (((struct __dirstream) {0}).data)
+		 + return_buffer_size) % sizeof (off64_t) == 0,
+		"return_buffer_size does not align the buffer properly");
+
+/* Return the avaliable buffer size to use with getdents64 calls.  */
+static inline size_t
+dirstream_alloc_size (struct __dirstream *ds)
+{
+  return ds->allocation - return_buffer_size;
+}
+
+/* Return the start of the allocated buffer minus the reserved part to use on
+   non-LFS readdir call.  */
+static inline void *
+dirstream_data (struct __dirstream *ds)
+{
+  return (char *) ds->data + return_buffer_size;
+}
+
+/* Return the allocated buffer used on non-LFS readdir call.  */
+static inline struct dirent *
+dirstream_ret (struct __dirstream *ds)
+{
+  return (struct dirent *) ds->data;
+}
+
+/* Return the current dirent64 entry from the reserved buffer used on
+   getdent64.  */
+static inline struct dirent64 *
+dirstream_entry (struct __dirstream *ds)
+{
+  size_t offset = return_buffer_size + ds->offset;
+  return (struct dirent64 *) ((char *) ds->data + offset);
+}
+
+/* Copy one obtained entry from 'getdents64' call to the reserved space
+   on DS allocated buffer and updated its internal state.  */
+static inline struct dirent *
+dirstream_ret_entry (struct __dirstream *ds)
+{
+  struct dirent64 *dp64 = dirstream_entry (ds);
+  struct dirent *dp = dirstream_ret (ds);
+
+  dp->d_ino = dp64->d_ino;
+
+  dp->d_off = dp64->d_off;
+  if (dp->d_off != dp64->d_off)
+    /* Overflow.  */
+    return NULL;
+
+  const size_t size_diff = (offsetof (struct dirent64, d_name)
+			    - offsetof (struct dirent, d_name));
+  const size_t alignment = _Alignof (struct dirent);
+  size_t new_reclen  = (dp64->d_reclen - size_diff + alignment - 1)
+			& ~(alignment - 1);
+  if (new_reclen > return_buffer_size)
+    /* Overflow.  */
+    return NULL;
+  dp->d_reclen = new_reclen;
+
+  dp->d_type = dp64->d_type;
+
+  memcpy (dp->d_name, dp64->d_name,
+	  dp64->d_reclen - offsetof (struct dirent64, d_name));
+
+  ds->offset += dp64->d_reclen;
+  ds->filepos = dp64->d_off;
+
+  return dp;
+}
+#else
+/* No need to reserve an buffer space if dirent has already LFS support.  */
+enum { return_buffer_size = 0 };
+#endif /* _DIRENT_MATCHES_DIRENT64  */
+
+#endif


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [glibc/azanella/bz23960] linux: Use getdents64 on non-LFS readdir
@ 2020-04-16 13:20 Adhemerval Zanella
  0 siblings, 0 replies; 5+ messages in thread
From: Adhemerval Zanella @ 2020-04-16 13:20 UTC (permalink / raw)
  To: glibc-cvs

https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=bf75a21f54835e7b96193ae92e0ae3362f7bdbf6

commit bf75a21f54835e7b96193ae92e0ae3362f7bdbf6
Author: Adhemerval Zanella <adhemerval.zanella@linaro.org>
Date:   Mon Apr 13 08:06:57 2020 -0300

    linux: Use getdents64 on non-LFS readdir
    
    It reserves some space on the allocated internal buffer to be
    used as a the returned dirent struct.  The kernel obtained dirent64
    struct are copied to the temporary buffer on each readdir call.
    
    The overflow test is moved once the dirent64 entry is copied
    to the temporary buffer, and a subsequent readdir will obtain the
    next entry.  The idea is an overflow fails to return the entry on
    readdir, but a next readdir might still obtain the next entry.
    (for filesystem that does not have the concept of sequential d_off,
    such as ext4).
    
    Checked on x86_64-linux-gnu and i686-linux-gnu.

Diff:
---
 sysdeps/unix/sysv/linux/opendir.c |   6 +-
 sysdeps/unix/sysv/linux/readdir.c |  25 ++++----
 sysdeps/unix/sysv/linux/readdir.h | 117 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 131 insertions(+), 17 deletions(-)

diff --git a/sysdeps/unix/sysv/linux/opendir.c b/sysdeps/unix/sysv/linux/opendir.c
index 765c8104b3..d4a0885bd3 100644
--- a/sysdeps/unix/sysv/linux/opendir.c
+++ b/sysdeps/unix/sysv/linux/opendir.c
@@ -22,6 +22,7 @@
 #include <sys/param.h>	/* For MIN and MAX.  */
 
 #include <not-cancel.h>
+#include <readdir.h>    /* For return_buffer_size.  */
 
 enum {
   opendir_oflags = O_RDONLY|O_NDELAY|O_DIRECTORY|O_LARGEFILE|O_CLOEXEC
@@ -103,8 +104,9 @@ __alloc_dir (int fd, bool close_fd, int flags, const struct stat64 *statp)
   enum { max_buffer_size = 1U << 20 };
 
   const size_t allocation_size = 4 * BUFSIZ;
-  _Static_assert (allocation_size >= sizeof (struct dirent64),
-		  "allocation_size < sizeof (struct dirent64)");
+  _Static_assert (allocation_size >= sizeof (struct dirent64)
+				     + return_buffer_size,
+		  "opendir buffer size smaller than required");
 
   /* Increase allocation if requested, but not if the value appears to
      be bogus.  It will be between 32Kb (for blocksizes smaller than BUFSIZ)
diff --git a/sysdeps/unix/sysv/linux/readdir.c b/sysdeps/unix/sysv/linux/readdir.c
index ca2a8964e9..8eab0f4c9b 100644
--- a/sysdeps/unix/sysv/linux/readdir.c
+++ b/sysdeps/unix/sysv/linux/readdir.c
@@ -19,7 +19,7 @@
 #include <dirent.h>
 
 #if !_DIRENT_MATCHES_DIRENT64
-#include <dirstream.h>
+#include <readdir.h>
 
 /* Read a directory entry from DIRP.  */
 struct dirent *
@@ -30,16 +30,12 @@ __readdir_unlocked (DIR *dirp)
 
   do
     {
-      size_t reclen;
-
       if (dirp->offset >= dirp->size)
 	{
 	  /* We've emptied out our buffer.  Refill it.  */
 
-	  size_t maxread = dirp->allocation;
-	  ssize_t bytes;
-
-	  bytes = __getdents (dirp->fd, dirp->data, maxread);
+	  ssize_t bytes = __getdents64 (dirp->fd, dirstream_data (dirp),
+					dirstream_alloc_size (dirp));
 	  if (bytes <= 0)
 	    {
 	      /* On some systems getdents fails with ENOENT when the
@@ -54,19 +50,18 @@ __readdir_unlocked (DIR *dirp)
 	      dp = NULL;
 	      break;
 	    }
-	  dirp->size = (size_t) bytes;
+	  dirp->size = bytes;
 
 	  /* Reset the offset into the buffer.  */
 	  dirp->offset = 0;
 	}
 
-      dp = (struct dirent *) &dirp->data[dirp->offset];
-
-      reclen = dp->d_reclen;
-
-      dirp->offset += reclen;
-
-      dirp->filepos = dp->d_off;
+      dp = dirstream_ret_entry (dirp);
+      if (dp == NULL)
+	{
+	  __set_errno (EOVERFLOW);
+	  break;
+	}
 
       /* Skip deleted files.  */
     } while (dp->d_ino == 0);
diff --git a/sysdeps/unix/sysv/linux/readdir.h b/sysdeps/unix/sysv/linux/readdir.h
new file mode 100644
index 0000000000..4dc219e220
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/readdir.h
@@ -0,0 +1,117 @@
+/* Linux readdir internal implementation details.
+   Copyright (C) 2020 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#ifndef _DIRSTREAM_NOLFS_H
+#define _DIRSTREAM_NOLFS_H
+
+#if !_DIRENT_MATCHES_DIRENT64
+# include <dirstream.h>
+
+/* getdents64 is used internally for both LFS and non-LFS implementations.
+   The non-LFS interface reserves part of the allocated buffer to return the
+   non-LFS 'struct dirent' entry.  */
+
+/* This defines the reserved space size on DIR internal buffer to use as the
+   returned 'struct dirent' from a 'readdir' call.
+
+   The largest possible practical length of the d_name member are 255
+   Unicode characters in UTF-8 encoding, so d_name is 766 bytes long, plus
+   10 bytes from header, for a total of 776 bytes total.
+
+   Also it should take in cosideration the alignment requirement for
+   getdents64 call.  */
+enum { return_buffer_size = 1024
+			    + sizeof (off64_t)
+			    - _Alignof (((struct __dirstream) {0}).data) };
+
+_Static_assert ((_Alignof (((struct __dirstream) {0}).data)
+		 + return_buffer_size) % sizeof (off64_t) == 0,
+		"return_buffer_size does not align the buffer properly");
+
+/* Return the avaliable buffer size to use with getdents64 calls.  */
+static inline size_t
+dirstream_alloc_size (struct __dirstream *ds)
+{
+  return ds->allocation - return_buffer_size;
+}
+
+/* Return the start of the allocated buffer minus the reserved part to use on
+   non-LFS readdir call.  */
+static inline void *
+dirstream_data (struct __dirstream *ds)
+{
+  return (char *) ds->data + return_buffer_size;
+}
+
+/* Return the allocated buffer used on non-LFS readdir call.  */
+static inline struct dirent *
+dirstream_ret (struct __dirstream *ds)
+{
+  return (struct dirent *) ds->data;
+}
+
+/* Return the current dirent64 entry from the reserved buffer used on
+   getdent64.  */
+static inline struct dirent64 *
+dirstream_entry (struct __dirstream *ds)
+{
+  size_t offset = return_buffer_size + ds->offset;
+  return (struct dirent64 *) ((char *) ds->data + offset);
+}
+
+/* Copy one obtained entry from 'getdents64' call to the reserved space
+   on DS allocated buffer and updated its internal state.  */
+static inline struct dirent *
+dirstream_ret_entry (struct __dirstream *ds)
+{
+  struct dirent64 *dp64 = dirstream_entry (ds);
+  struct dirent *dp = dirstream_ret (ds);
+
+  dp->d_ino = dp64->d_ino;
+
+  dp->d_off = dp64->d_off;
+  if (dp->d_off != dp64->d_off)
+    /* Overflow.  */
+    return NULL;
+
+  const size_t size_diff = (offsetof (struct dirent64, d_name)
+			    - offsetof (struct dirent, d_name));
+  const size_t alignment = _Alignof (struct dirent);
+  size_t new_reclen  = (dp64->d_reclen - size_diff + alignment - 1)
+			& ~(alignment - 1);
+  if (new_reclen > return_buffer_size)
+    /* Overflow.  */
+    return NULL;
+  dp->d_reclen = new_reclen;
+
+  dp->d_type = dp64->d_type;
+
+  memcpy (dp->d_name, dp64->d_name,
+	  dp64->d_reclen - offsetof (struct dirent64, d_name));
+
+  ds->offset += dp64->d_reclen;
+  ds->filepos = dp64->d_off;
+
+  return dp;
+}
+#else
+/* No need to reserve an buffer space if dirent has already LFS support.  */
+enum { return_buffer_size = 0 };
+#endif /* _DIRENT_MATCHES_DIRENT64  */
+
+#endif


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [glibc/azanella/bz23960] linux: Use getdents64 on non-LFS readdir
@ 2020-04-15 14:14 Adhemerval Zanella
  0 siblings, 0 replies; 5+ messages in thread
From: Adhemerval Zanella @ 2020-04-15 14:14 UTC (permalink / raw)
  To: glibc-cvs

https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=5953fcf52f5dc77f6bed98d2eb470c40215a2d36

commit 5953fcf52f5dc77f6bed98d2eb470c40215a2d36
Author: Adhemerval Zanella <adhemerval.zanella@linaro.org>
Date:   Mon Apr 13 08:06:57 2020 -0300

    linux: Use getdents64 on non-LFS readdir
    
    It reserves some space on the allocated internal buffer to be
    used as a the returned dirent struct.  The kernel obtained dirent64
    struct are copied to the temporary buffer on each readdir call.
    
    The overflow test is moved once the dirent64 entry is copied
    to the temporary buffer, and a subsequent readdir will obtain the
    next entry.  The idea is an overflow fails to return the entry on
    readdir, but a next readdir might still obtain the next entry.
    (for filesystem that does not have the concept of sequential d_off,
    such as ext4).
    
    Checked on x86_64-linux-gnu and i686-linux-gnu.

Diff:
---
 sysdeps/unix/sysv/linux/opendir.c |  6 ++-
 sysdeps/unix/sysv/linux/readdir.c | 25 ++++------
 sysdeps/unix/sysv/linux/readdir.h | 99 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 113 insertions(+), 17 deletions(-)

diff --git a/sysdeps/unix/sysv/linux/opendir.c b/sysdeps/unix/sysv/linux/opendir.c
index 5469e2e18f..b60a690150 100644
--- a/sysdeps/unix/sysv/linux/opendir.c
+++ b/sysdeps/unix/sysv/linux/opendir.c
@@ -22,6 +22,7 @@
 #include <sys/param.h>	/* For MIN and MAX.  */
 
 #include <not-cancel.h>
+#include <readdir.h>    /* For return_buffer_size.  */
 
 enum {
   opendir_oflags = O_RDONLY|O_NDELAY|O_DIRECTORY|O_LARGEFILE|O_CLOEXEC
@@ -103,8 +104,9 @@ __alloc_dir (int fd, bool close_fd, int flags, const struct stat64 *statp)
   enum { max_buffer_size = 1U << 20 };
 
   const size_t allocation_size = 4 * BUFSIZ;
-  _Static_assert (allocation_size >= sizeof (struct dirent64),
-		  "allocation_size < sizeof (struct dirent64)");
+  _Static_assert (allocation_size >= sizeof (struct dirent64)
+				     + return_buffer_size,
+		  "opendir buffer size smaller than required");
 
   /* Increase allocation if requested, but not if the value appears to
      be bogus.  It will be between 32Kb (for blocksizes smaller than BUFSIZ)
diff --git a/sysdeps/unix/sysv/linux/readdir.c b/sysdeps/unix/sysv/linux/readdir.c
index ca2a8964e9..8eab0f4c9b 100644
--- a/sysdeps/unix/sysv/linux/readdir.c
+++ b/sysdeps/unix/sysv/linux/readdir.c
@@ -19,7 +19,7 @@
 #include <dirent.h>
 
 #if !_DIRENT_MATCHES_DIRENT64
-#include <dirstream.h>
+#include <readdir.h>
 
 /* Read a directory entry from DIRP.  */
 struct dirent *
@@ -30,16 +30,12 @@ __readdir_unlocked (DIR *dirp)
 
   do
     {
-      size_t reclen;
-
       if (dirp->offset >= dirp->size)
 	{
 	  /* We've emptied out our buffer.  Refill it.  */
 
-	  size_t maxread = dirp->allocation;
-	  ssize_t bytes;
-
-	  bytes = __getdents (dirp->fd, dirp->data, maxread);
+	  ssize_t bytes = __getdents64 (dirp->fd, dirstream_data (dirp),
+					dirstream_alloc_size (dirp));
 	  if (bytes <= 0)
 	    {
 	      /* On some systems getdents fails with ENOENT when the
@@ -54,19 +50,18 @@ __readdir_unlocked (DIR *dirp)
 	      dp = NULL;
 	      break;
 	    }
-	  dirp->size = (size_t) bytes;
+	  dirp->size = bytes;
 
 	  /* Reset the offset into the buffer.  */
 	  dirp->offset = 0;
 	}
 
-      dp = (struct dirent *) &dirp->data[dirp->offset];
-
-      reclen = dp->d_reclen;
-
-      dirp->offset += reclen;
-
-      dirp->filepos = dp->d_off;
+      dp = dirstream_ret_entry (dirp);
+      if (dp == NULL)
+	{
+	  __set_errno (EOVERFLOW);
+	  break;
+	}
 
       /* Skip deleted files.  */
     } while (dp->d_ino == 0);
diff --git a/sysdeps/unix/sysv/linux/readdir.h b/sysdeps/unix/sysv/linux/readdir.h
new file mode 100644
index 0000000000..4b9d2e8e20
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/readdir.h
@@ -0,0 +1,99 @@
+/* Linux readdir internal implementation details.
+   Copyright (C) 2020 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#ifndef _DIRSTREAM_NOLFS_H
+#define _DIRSTREAM_NOLFS_H
+
+#if !_DIRENT_MATCHES_DIRENT64
+# include <dirstream.h>
+
+/* getdents64 is used internally for both LFS and non-LFS implementations.
+   The non-LFS interface reserves part of the allocated buffer to return the
+   non-LFS 'struct dirent' entry.  */
+
+/* The largest possible practical length of the d_name member are 255
+   Unicode characters in UTF-8 encoding, so d_name is 766 bytes long, plus
+   10 bytes from header, for a total of 776 bytes total.  */
+enum { return_buffer_size = 1028 };
+
+/* Ensure that the buffer that is passed on getdents64 call is sufficient
+   aligned.  */
+_Static_assert ((_Alignof (((struct __dirstream) {0}).data)
+		 + return_buffer_size) % sizeof (off64_t) == 0,
+		"return_buffer_size does not align the buffer properly");
+
+/* Return the avaliable buffer size to use with getdents64 calls.  */
+static inline size_t
+dirstream_alloc_size (struct __dirstream *ds)
+{
+  return ds->allocation - return_buffer_size;
+}
+
+/* Return the start of the allocated buffer minus the reserved part to use on
+   non-LFS readdir call.  */
+static inline void *
+dirstream_data (struct __dirstream *ds)
+{
+  return (char *) ds->data + return_buffer_size;
+}
+
+/* Return the allocated buffer used on non-LFS readdir call.  */
+static inline struct dirent *
+dirstream_ret (struct __dirstream *ds)
+{
+  return (struct dirent *) ds->data;
+}
+
+/* Return the current dirent64 entry from the reserved buffer used on
+   getdent64.  */
+static inline struct dirent64 *
+dirstream_entry (struct __dirstream *ds)
+{
+  size_t offset = return_buffer_size + ds->offset;
+  return (struct dirent64 *) ((char *) ds->data + offset);
+}
+
+/* Copy one obtained entry from 'getdents64' call to the reserved space
+   on DS allocated buffer and updated its internal state.  */
+static inline struct dirent *
+dirstream_ret_entry (struct __dirstream *ds)
+{
+  struct dirent64 *dp64 = dirstream_entry (ds);
+  struct dirent *dp = dirstream_ret (ds);
+
+  dp->d_ino = dp64->d_ino;
+  dp->d_off = dp64->d_off;
+  dp->d_reclen = dp64->d_reclen;
+  dp->d_type = dp64->d_type;
+  memcpy (dp->d_name, dp64->d_name,
+	  dp64->d_reclen - offsetof (struct dirent64, d_name));
+
+  ds->offset += dp->d_reclen;
+  ds->filepos = dp64->d_off;
+  if (ds->filepos != dp64->d_off)
+    /* Overflow.  */
+    return NULL;
+
+  return dp;
+}
+#else
+/* No need to reserve an buffer space if dirent has already LFS support.  */
+enum { return_buffer_size = 0 };
+#endif /* _DIRENT_MATCHES_DIRENT64  */
+
+#endif


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [glibc/azanella/bz23960] linux: Use getdents64 on non-LFS readdir
@ 2020-04-14 21:09 Adhemerval Zanella
  0 siblings, 0 replies; 5+ messages in thread
From: Adhemerval Zanella @ 2020-04-14 21:09 UTC (permalink / raw)
  To: glibc-cvs

https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=2f2bc5957a3d3a3653f62cc32b8f7a636c009411

commit 2f2bc5957a3d3a3653f62cc32b8f7a636c009411
Author: Adhemerval Zanella <adhemerval.zanella@linaro.org>
Date:   Mon Apr 13 08:06:57 2020 -0300

    linux: Use getdents64 on non-LFS readdir
    
    It reserves some space on the allocated internal DIR buffer to be
    used as a temporary buffer. The kernel obtained dirent64 struct
    are then copied to the temporary buffer for each readdir call.
    
    The overflow test is moved to once the dirent64 entry is copied
    to the temporary buffer and a subsequent readdir will obtain the
    next entry.  The idea is to work on filesystem that does not have
    the idea of sequential d_off (such as ext4).
    
    Checked on x86_64-linux-gnu and i686-linux-gnu.

Diff:
---
 sysdeps/unix/sysv/linux/dirstream_nolfs.h | 91 +++++++++++++++++++++++++++++++
 sysdeps/unix/sysv/linux/opendir.c         |  7 ++-
 sysdeps/unix/sysv/linux/readdir.c         | 25 ++++-----
 3 files changed, 106 insertions(+), 17 deletions(-)

diff --git a/sysdeps/unix/sysv/linux/dirstream_nolfs.h b/sysdeps/unix/sysv/linux/dirstream_nolfs.h
new file mode 100644
index 0000000000..21ca5f59be
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/dirstream_nolfs.h
@@ -0,0 +1,91 @@
+/* Linux non-LFS readdir internal implementation details.
+   Copyright (C) 2020 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#ifndef _DIRSTREAM_NOLFS_H
+#define _DIRSTREAM_NOLFS_H
+
+#include <dirstream.h>
+
+/* getdents64 is used regardless the interface LFS support.  The non-LFS
+   interface reserves part of the allocated buffer to return the non-LFS
+   'struct dirent' entry.  */
+
+/* The largest possible practical length of the d_name member are 255
+   Unicode characters in UTF-8 encoding, so d_name is 766 bytes long, plus
+   10 bytes from header, for a total of 776 bytes total.  */
+enum { return_buffer_size = 1024 };
+
+/* Ensure that the buffer that is passed on getdents64 call is sufficient
+   aligned.  */
+_Static_assert (return_buffer_size % _Alignof (max_align_t) == 0,
+		"return_buffer_size % alignof (max_align_t) != 0");
+
+/* Return the avaliable buffer size to use with getdents64 calls.  */
+static inline size_t
+dirstream_alloc_size (struct __dirstream *ds)
+{
+  return ds->allocation - return_buffer_size;
+}
+
+/* Return the start of the allocated buffer minus the reserved part
+   to use on non-LFS readdir call.  */
+static inline void *
+dirstream_data (struct __dirstream *ds)
+{
+  return (char *) ds->data + return_buffer_size;
+}
+
+/* Return the allocated buffer used on non-LFS readdir call.  */
+static inline struct dirent *
+dirstream_ret (struct __dirstream *ds)
+{
+  return (struct dirent *) ds->data;
+}
+
+static inline struct dirent64 *
+dirstream_entry (struct __dirstream *ds)
+{
+  size_t offset = return_buffer_size + ds->offset;
+  return (struct dirent64 *) ((char *) ds->data + offset);
+}
+
+/* Copy one obtained entry from 'getdents64' call to the reserved space
+   on DS allocated buffer and updated its internal state.  */
+static inline struct dirent *
+dirstream_ret_entry (struct __dirstream *ds)
+{
+  struct dirent64 *dp64 = dirstream_entry (ds);
+  struct dirent *dp = dirstream_ret (ds);
+
+  dp->d_ino = dp64->d_ino;
+  dp->d_off = dp64->d_off;
+  dp->d_reclen = dp64->d_reclen;
+  dp->d_type = dp64->d_type;
+  memcpy (dp->d_name, dp64->d_name,
+	  dp64->d_reclen - offsetof (struct dirent64, d_name));
+
+  ds->offset += dp->d_reclen;
+  ds->filepos = dp->d_off;
+  if (ds->filepos != dp->d_off)
+    /* Overflow.  */
+    return NULL;
+
+  return dp;
+}
+
+#endif
diff --git a/sysdeps/unix/sysv/linux/opendir.c b/sysdeps/unix/sysv/linux/opendir.c
index 0f8a1f28c6..6b8951bbfe 100644
--- a/sysdeps/unix/sysv/linux/opendir.c
+++ b/sysdeps/unix/sysv/linux/opendir.c
@@ -22,6 +22,7 @@
 #include <sys/param.h>	/* For MIN and MAX.  */
 
 #include <not-cancel.h>
+#include <dirstream_nolfs.h>
 
 enum {
   opendir_oflags = O_RDONLY|O_NDELAY|O_DIRECTORY|O_LARGEFILE|O_CLOEXEC
@@ -103,8 +104,10 @@ __alloc_dir (int fd, bool close_fd, int flags, const struct stat64 *statp)
   enum { max_buffer_size = 1U << 20 };
 
   const size_t allocation_size = 4 * BUFSIZ;
-  _Static_assert (allocation_size >= sizeof (struct dirent64),
-		  "allocation_size < sizeof (struct dirent64)");
+  _Static_assert (allocation_size >= sizeof (struct dirent64)
+				     + return_buffer_size,
+		  "allocation_size < sizeof (struct dirent64) + "
+		  "return_buffer_size");
 
   /* Increase allocation if requested, but not if the value appears to
      be bogus.  It will be between 32Kb (for blocksizes smaller than BUFSIZ)
diff --git a/sysdeps/unix/sysv/linux/readdir.c b/sysdeps/unix/sysv/linux/readdir.c
index ca2a8964e9..a3236d10e6 100644
--- a/sysdeps/unix/sysv/linux/readdir.c
+++ b/sysdeps/unix/sysv/linux/readdir.c
@@ -19,7 +19,7 @@
 #include <dirent.h>
 
 #if !_DIRENT_MATCHES_DIRENT64
-#include <dirstream.h>
+#include <dirstream_nolfs.h>
 
 /* Read a directory entry from DIRP.  */
 struct dirent *
@@ -30,16 +30,12 @@ __readdir_unlocked (DIR *dirp)
 
   do
     {
-      size_t reclen;
-
       if (dirp->offset >= dirp->size)
 	{
 	  /* We've emptied out our buffer.  Refill it.  */
 
-	  size_t maxread = dirp->allocation;
-	  ssize_t bytes;
-
-	  bytes = __getdents (dirp->fd, dirp->data, maxread);
+	  ssize_t bytes = __getdents64 (dirp->fd, dirstream_data (dirp),
+					dirstream_alloc_size (dirp));
 	  if (bytes <= 0)
 	    {
 	      /* On some systems getdents fails with ENOENT when the
@@ -54,19 +50,18 @@ __readdir_unlocked (DIR *dirp)
 	      dp = NULL;
 	      break;
 	    }
-	  dirp->size = (size_t) bytes;
+	  dirp->size = bytes;
 
 	  /* Reset the offset into the buffer.  */
 	  dirp->offset = 0;
 	}
 
-      dp = (struct dirent *) &dirp->data[dirp->offset];
-
-      reclen = dp->d_reclen;
-
-      dirp->offset += reclen;
-
-      dirp->filepos = dp->d_off;
+      dp = dirstream_ret_entry (dirp);
+      if (dp == NULL)
+	{
+	  __set_errno (EOVERFLOW);
+	  break;
+	}
 
       /* Skip deleted files.  */
     } while (dp->d_ino == 0);


^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2020-10-02 13:53 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-10-02 13:53 [glibc/azanella/bz23960] linux: Use getdents64 on non-LFS readdir Adhemerval Zanella
  -- strict thread matches above, loose matches on Subject: below --
2020-04-17 13:23 Adhemerval Zanella
2020-04-16 13:20 Adhemerval Zanella
2020-04-15 14:14 Adhemerval Zanella
2020-04-14 21:09 Adhemerval Zanella

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).