public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [patch] libstdc++/67747 Allocate space for dirent::d_name
@ 2015-09-29 12:00 Jonathan Wakely
  2015-09-29 19:49 ` Martin Sebor
  2015-10-02 12:16 ` Florian Weimer
  0 siblings, 2 replies; 21+ messages in thread
From: Jonathan Wakely @ 2015-09-29 12:00 UTC (permalink / raw)
  To: libstdc++, gcc-patches

[-- Attachment #1: Type: text/plain, Size: 481 bytes --]

POSIX says that dirent::d_name has an unspecified length, so calls to
readdir_r must pass a buffer with enough trailing space for
{NAME_MAX}+1 characters. I wasn't doing that, which works OK on
GNU/Linux and BSD where d_name is a large array, but fails on Solaris
32-bit.

This uses pathconf to get NAME_MAX and allocates a buffer.

Tested powerpc64le-linux and x86_64-dragonfly4.1, I'm going to commit
this to trunk today (and backport all the filesystem fixes to
gcc-5-branch).


[-- Attachment #2: patch.txt --]
[-- Type: text/x-patch, Size: 4457 bytes --]

commit 16ff5d124b8e6c5d1f9dd4edb81b6ca5c9129134
Author: Jonathan Wakely <jwakely@redhat.com>
Date:   Tue Sep 29 11:58:19 2015 +0100

    PR libstdc++/67747 Allocate space for dirent::d_name
    
    	PR libstdc++/67747
    	* src/filesystem/dir.cc (_Dir::dirent_buf): New member.
    	(get_name_max): New function.
    	(native_readdir) [_GLIBCXX_FILESYSTEM_IS_WINDOWS]: Copy to supplied
    	dirent object. Handle end of directory.
    	(_Dir::advance): Allocate space for d_name.

diff --git a/libstdc++-v3/src/filesystem/dir.cc b/libstdc++-v3/src/filesystem/dir.cc
index bce751c..d29f8eb 100644
--- a/libstdc++-v3/src/filesystem/dir.cc
+++ b/libstdc++-v3/src/filesystem/dir.cc
@@ -25,8 +25,12 @@
 #include <experimental/filesystem>
 #include <utility>
 #include <stack>
+#include <stddef.h>
 #include <string.h>
 #include <errno.h>
+#ifdef _GLIBCXX_HAVE_UNISTD_H
+# include <unistd.h>
+#endif
 #ifdef _GLIBCXX_HAVE_DIRENT_H
 # ifdef _GLIBCXX_HAVE_SYS_TYPES_H
 #  include <sys/types.h>
@@ -64,20 +68,23 @@ struct fs::_Dir
   fs::path		path;
   directory_entry	entry;
   file_type		type = file_type::none;
+  unique_ptr<char[]>	dirent_buf;
 };
 
 namespace
 {
   template<typename Bitmask>
-    inline bool is_set(Bitmask obj, Bitmask bits)
+    inline bool
+    is_set(Bitmask obj, Bitmask bits)
     {
       return (obj & bits) != Bitmask::none;
     }
 
   // Returns {dirp, p} on success, {nullptr, p} on error.
   // If an ignored EACCES error occurs returns {}.
-  fs::_Dir
-  open_dir(const fs::path& p, fs::directory_options options, std::error_code* ec)
+  inline fs::_Dir
+  open_dir(const fs::path& p, fs::directory_options options,
+	   std::error_code* ec)
   {
     if (ec)
       ec->clear();
@@ -99,8 +106,22 @@ namespace
     return {nullptr, p};
   }
 
+  inline long
+  get_name_max(const fs::path& path __attribute__((__unused__)))
+  {
+#ifdef _GLIBCXX_HAVE_UNISTD_H
+    long name_max = pathconf(path.c_str(), _PC_NAME_MAX);
+    if (name_max != -1)
+      return name_max;
+#endif
+
+    // Maximum path component on Windows is 255 (UTF-16?) characters,
+    // which is a reasonable default for POSIX too.
+    return 255;
+  }
+
   inline fs::file_type
-  get_file_type(const dirent& d __attribute__((__unused__)))
+  get_file_type(const ::dirent& d __attribute__((__unused__)))
   {
 #ifdef _GLIBCXX_HAVE_STRUCT_DIRENT_D_TYPE
     switch (d.d_type)
@@ -129,12 +150,26 @@ namespace
 #endif
   }
 
-  int
+  inline int
   native_readdir(DIR* dirp, ::dirent*& entryp)
   {
 #ifdef _GLIBCXX_FILESYSTEM_IS_WINDOWS
-    if ((entryp = ::readdir(dirp)))
-      return 0;
+    const int saved_errno = errno;
+    errno = 0;
+    if (auto entp = ::readdir(dirp))
+      {
+	size_t name_len = strlen(entp->d_name);
+	if (name_len > 255)
+	  return ENAMETOOLONG;
+	size_t len = offsetof(::dirent, d_name) + name_len + 1;
+	memcpy(entryp, entp, len);
+	return 0;
+      }
+    else if (errno == 0) // End of directory reached.
+      {
+	errno = saved_errno;
+	entryp = nullptr;
+      }
     return errno;
 #else
     return ::readdir_r(dirp, entryp, &entryp);
@@ -142,6 +177,7 @@ namespace
   }
 }
 
+
 // Returns false when the end of the directory entries is reached.
 // Reports errors by setting ec or throwing.
 bool
@@ -150,9 +186,15 @@ fs::_Dir::advance(error_code* ec, directory_options options)
   if (ec)
     ec->clear();
 
-  ::dirent ent;
-  ::dirent* result = &ent;
-  if (int err = native_readdir(dirp, result))
+  if (!dirent_buf)
+    {
+      size_t len = offsetof(::dirent, d_name) + get_name_max(path) + 1;
+      dirent_buf.reset(new char[len]);
+    }
+
+  ::dirent* entp = reinterpret_cast<::dirent*>(dirent_buf.get());
+
+  if (int err = native_readdir(dirp, entp))
     {
       if (err == EACCES
         && is_set(options, directory_options::skip_permission_denied))
@@ -165,13 +207,13 @@ fs::_Dir::advance(error_code* ec, directory_options options)
       ec->assign(err, std::generic_category());
       return true;
     }
-  else if (result != nullptr)
+  else if (entp != nullptr)
     {
       // skip past dot and dot-dot
-      if (!strcmp(ent.d_name, ".") || !strcmp(ent.d_name, ".."))
+      if (!strcmp(entp->d_name, ".") || !strcmp(entp->d_name, ".."))
 	return advance(ec, options);
-      entry = fs::directory_entry{path / ent.d_name};
-      type = get_file_type(ent);
+      entry = fs::directory_entry{path / entp->d_name};
+      type = get_file_type(*entp);
       return true;
     }
   else

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [patch] libstdc++/67747 Allocate space for dirent::d_name
  2015-09-29 12:00 [patch] libstdc++/67747 Allocate space for dirent::d_name Jonathan Wakely
@ 2015-09-29 19:49 ` Martin Sebor
  2015-09-30 12:00   ` Jonathan Wakely
  2015-10-01 22:56   ` Martin Sebor
  2015-10-02 12:16 ` Florian Weimer
  1 sibling, 2 replies; 21+ messages in thread
From: Martin Sebor @ 2015-09-29 19:49 UTC (permalink / raw)
  To: Jonathan Wakely, libstdc++, gcc-patches

On 09/29/2015 05:37 AM, Jonathan Wakely wrote:
> POSIX says that dirent::d_name has an unspecified length, so calls to
> readdir_r must pass a buffer with enough trailing space for
> {NAME_MAX}+1 characters. I wasn't doing that, which works OK on
> GNU/Linux and BSD where d_name is a large array, but fails on Solaris
> 32-bit.
>
> This uses pathconf to get NAME_MAX and allocates a buffer.
>
> Tested powerpc64le-linux and x86_64-dragonfly4.1, I'm going to commit
> this to trunk today (and backport all the filesystem fixes to
> gcc-5-branch).

Calling pathconf is only necessary when _POSIX_NO_TRUNC is zero
which I think exists mainly for legacy file systems. Otherwise,
it's safe to use NAME_MAX instead. Avoiding the call to pathconf
also avoids the TOCTOU between it and the call to opendir, and
hardcoding the value makes it possible to avoid dynamically
allocating the dirent buffer.

I didn't remember the MAX_PATH value on Windows anymore but from
what I've just read online it sounds like it's defined to 260.

Defaulting to 255 on POSIX is appropriate. On XSI systems, the
minimum required value is _XOPEN_NAME_MAX which is 255 (I would
suggest using the macro instead when it's defined). Otherwise,
the strictly conforming minimum value would be 14 -- the value
of _POSIX_NAME_MAX, but since 255 is greater it's fine.

Other than that, I tend to be leery of using plain char arrays
as buffers for objects of bigger types. I don't know to what
extent this is a problem for libstdc++ anymore as more and more
hardware is tolerant of misaligned accesses and as the default
new expression typically returns memory suitably aligned for
the largest fundamental type. But since there is no requirement
in the language that it do so and I would tend to err on the
side of caution and use operator new (as opposed to
new char[len]).

Martin

PS I'm interpreting _POSIX_NO_TRUNC being zero as more
restrictive than if it was non-zero and so calling pathconf(p,
_PC_NO_TRUNC) should be required to also return non-zero for
such an implementation, regardless of p. But let me check that
I'm reading it right.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [patch] libstdc++/67747 Allocate space for dirent::d_name
  2015-09-29 19:49 ` Martin Sebor
@ 2015-09-30 12:00   ` Jonathan Wakely
  2015-09-30 15:54     ` Martin Sebor
  2015-10-01 22:56   ` Martin Sebor
  1 sibling, 1 reply; 21+ messages in thread
From: Jonathan Wakely @ 2015-09-30 12:00 UTC (permalink / raw)
  To: Martin Sebor; +Cc: libstdc++, gcc-patches

On 29/09/15 12:54 -0600, Martin Sebor wrote:
>On 09/29/2015 05:37 AM, Jonathan Wakely wrote:
>>POSIX says that dirent::d_name has an unspecified length, so calls to
>>readdir_r must pass a buffer with enough trailing space for
>>{NAME_MAX}+1 characters. I wasn't doing that, which works OK on
>>GNU/Linux and BSD where d_name is a large array, but fails on Solaris
>>32-bit.
>>
>>This uses pathconf to get NAME_MAX and allocates a buffer.
>>
>>Tested powerpc64le-linux and x86_64-dragonfly4.1, I'm going to commit
>>this to trunk today (and backport all the filesystem fixes to
>>gcc-5-branch).
>
>Calling pathconf is only necessary when _POSIX_NO_TRUNC is zero
>which I think exists mainly for legacy file systems. Otherwise,
>it's safe to use NAME_MAX instead. Avoiding the call to pathconf

Oh, nice. I was using NAME_MAX originally but the glibc readdir_r(3)
man-page has an example using pathconf and says that should be used,
so I went down that route.

>also avoids the TOCTOU between it and the call to opendir, and

Also nice.

>hardcoding the value makes it possible to avoid dynamically
>allocating the dirent buffer.

Can we be sure NAME_MAX will never be too big for the stack?

As currently written _Dir::advance() can call itself recursively to
skip the . and .. directories, so if we put the dirent buffer on the
stack then maybe we should re-use the same one not create three large
frames.

>I didn't remember the MAX_PATH value on Windows anymore but from
>what I've just read online it sounds like it's defined to 260.

Yes, I found that value, but I think I found something saying the
individual components were limited to 255. I'll make it 260 anyway. I
think that might relate to UTF-16 characters, so we'd need a larger
buffer for narrow characters, but I'm not sure what mingw supports
here anyway. The Windows implementations are just stubs where someone
who cares can add working code.

>Defaulting to 255 on POSIX is appropriate. On XSI systems, the
>minimum required value is _XOPEN_NAME_MAX which is 255 (I would
>suggest using the macro instead when it's defined). Otherwise,
>the strictly conforming minimum value would be 14 -- the value
>of _POSIX_NAME_MAX, but since 255 is greater it's fine.
>
>Other than that, I tend to be leery of using plain char arrays
>as buffers for objects of bigger types. I don't know to what
>extent this is a problem for libstdc++ anymore as more and more
>hardware is tolerant of misaligned accesses and as the default
>new expression typically returns memory suitably aligned for
>the largest fundamental type. But since there is no requirement
>in the language that it do so and I would tend to err on the
>side of caution and use operator new (as opposed to
>new char[len]).

For some reason I thought new char[len] would be suitably aligned for
any type with sizeof <= len, but that's only true for operator new.
(I should check I haven't made the same assumption elsewhere in the
library!)

std::aligned_union<offsetof(dirent, d_name)+NAME_MAX, dirent>::type
will give us what we need.


>Martin
>
>PS I'm interpreting _POSIX_NO_TRUNC being zero as more
>restrictive than if it was non-zero and so calling pathconf(p,
>_PC_NO_TRUNC) should be required to also return non-zero for
>such an implementation, regardless of p. But let me check that
>I'm reading it right.
>

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [patch] libstdc++/67747 Allocate space for dirent::d_name
  2015-09-30 12:00   ` Jonathan Wakely
@ 2015-09-30 15:54     ` Martin Sebor
  2015-10-01 17:23       ` Jonathan Wakely
  0 siblings, 1 reply; 21+ messages in thread
From: Martin Sebor @ 2015-09-30 15:54 UTC (permalink / raw)
  To: Jonathan Wakely; +Cc: libstdc++, gcc-patches

On 09/30/2015 05:01 AM, Jonathan Wakely wrote:
> On 29/09/15 12:54 -0600, Martin Sebor wrote:
>> On 09/29/2015 05:37 AM, Jonathan Wakely wrote:
>>> POSIX says that dirent::d_name has an unspecified length, so calls to
>>> readdir_r must pass a buffer with enough trailing space for
>>> {NAME_MAX}+1 characters. I wasn't doing that, which works OK on
>>> GNU/Linux and BSD where d_name is a large array, but fails on Solaris
>>> 32-bit.
>>>
>>> This uses pathconf to get NAME_MAX and allocates a buffer.
>>>
>>> Tested powerpc64le-linux and x86_64-dragonfly4.1, I'm going to commit
>>> this to trunk today (and backport all the filesystem fixes to
>>> gcc-5-branch).
>>
>> Calling pathconf is only necessary when _POSIX_NO_TRUNC is zero
>> which I think exists mainly for legacy file systems. Otherwise,
>> it's safe to use NAME_MAX instead. Avoiding the call to pathconf
>
> Oh, nice. I was using NAME_MAX originally but the glibc readdir_r(3)
> man-page has an example using pathconf and says that should be used,
> so I went down that route.

GLIBC pathconf calls statfs to get the NAME_MAX value from the OS,
so there's some chance that some unusual file system will define
it as greater than 255. I tested all those on my Fedora PC and on
my RHEL 7.2 powerpc64le box and didn't find one.

There also isn't one in the Wikipedia comparison of file systems:
   https://en.wikipedia.org/wiki/Comparison_of_file_systems

But to be 100% safe, we would need to call pathconf if readdir
failed due to truncation.

> Can we be sure NAME_MAX will never be too big for the stack?

When it's defined I think it's probably safe in practice but not
guaranteed. Also, like all of these <limits.h> constants, it's not
guaranteed to be defined at all when the implementation doesn't
impose a limit. I believe AIX is one implementation that doesn't
define it. So some preprocessor magic will be required to deal
with that.

Martin

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [patch] libstdc++/67747 Allocate space for dirent::d_name
  2015-09-30 15:54     ` Martin Sebor
@ 2015-10-01 17:23       ` Jonathan Wakely
  2015-10-01 18:38         ` Jonathan Wakely
  0 siblings, 1 reply; 21+ messages in thread
From: Jonathan Wakely @ 2015-10-01 17:23 UTC (permalink / raw)
  To: Martin Sebor; +Cc: libstdc++, gcc-patches

[-- Attachment #1: Type: text/plain, Size: 2631 bytes --]

On 30/09/15 09:30 -0600, Martin Sebor wrote:
>On 09/30/2015 05:01 AM, Jonathan Wakely wrote:
>>On 29/09/15 12:54 -0600, Martin Sebor wrote:
>>>On 09/29/2015 05:37 AM, Jonathan Wakely wrote:
>>>>POSIX says that dirent::d_name has an unspecified length, so calls to
>>>>readdir_r must pass a buffer with enough trailing space for
>>>>{NAME_MAX}+1 characters. I wasn't doing that, which works OK on
>>>>GNU/Linux and BSD where d_name is a large array, but fails on Solaris
>>>>32-bit.
>>>>
>>>>This uses pathconf to get NAME_MAX and allocates a buffer.
>>>>
>>>>Tested powerpc64le-linux and x86_64-dragonfly4.1, I'm going to commit
>>>>this to trunk today (and backport all the filesystem fixes to
>>>>gcc-5-branch).
>>>
>>>Calling pathconf is only necessary when _POSIX_NO_TRUNC is zero
>>>which I think exists mainly for legacy file systems. Otherwise,
>>>it's safe to use NAME_MAX instead. Avoiding the call to pathconf
>>
>>Oh, nice. I was using NAME_MAX originally but the glibc readdir_r(3)
>>man-page has an example using pathconf and says that should be used,
>>so I went down that route.
>
>GLIBC pathconf calls statfs to get the NAME_MAX value from the OS,
>so there's some chance that some unusual file system will define
>it as greater than 255. I tested all those on my Fedora PC and on
>my RHEL 7.2 powerpc64le box and didn't find one.
>
>There also isn't one in the Wikipedia comparison of file systems:
>  https://en.wikipedia.org/wiki/Comparison_of_file_systems
>
>But to be 100% safe, we would need to call pathconf if readdir
>failed due to truncation.
>
>>Can we be sure NAME_MAX will never be too big for the stack?
>
>When it's defined I think it's probably safe in practice but not
>guaranteed. Also, like all of these <limits.h> constants, it's not
>guaranteed to be defined at all when the implementation doesn't
>impose a limit. I believe AIX is one implementation that doesn't
>define it. So some preprocessor magic will be required to deal
>with that.

OK, latest version attached. This defines a helper type,
dirent_buffer, which either contains aligned_union<N, dirent> or
unique_ptr<void*, op_delete> depending on whether NAME_MAX is defined
or whether we have to use pathconf at run-time.

The dirent_buffer is created as part of the directory_iterator or
recursive_directory_iterator internals, so only once per iterator, and
shared between copies of that iterator. When NAME_MAX is defined there
is no second allocation, we just allocate a bit more space.

Tested powerpc64le-linux and x86_64-dragonfly.

I've started a build on powerpc-aix too but I don't know when the
tests for that will finish.


[-- Attachment #2: patch.txt --]
[-- Type: text/x-patch, Size: 6657 bytes --]

commit 6b30f2675aee599e43bcb55594de3abd2221c3ae
Author: Jonathan Wakely <jwakely@redhat.com>
Date:   Thu Oct 1 17:01:24 2015 +0100

    PR libstdc++/67747 Allocate space for dirent::d_name
    
    	PR libstdc++/67747
    	* src/filesystem/dir.cc (_Dir::direntp): New member.
    	(dirent_size, dirent_buffer): New helpers for readdir results.
    	(native_readdir) [_GLIBCXX_FILESYSTEM_IS_WINDOWS]: Copy to supplied
    	dirent object. Handle end of directory.
    	(_Dir::advance): Allocate space for d_name.
    	(directory_iterator(const path&, directory_options, error_code*)):
    	Allocate a dirent_buffer alongside the _Dir object.
    	(recursive_directory_iterator::_Dir_stack): Add dirent_buffer member,
    	constructor and push() function that sets the element's entp.

diff --git a/libstdc++-v3/src/filesystem/dir.cc b/libstdc++-v3/src/filesystem/dir.cc
index bce751c..9372074 100644
--- a/libstdc++-v3/src/filesystem/dir.cc
+++ b/libstdc++-v3/src/filesystem/dir.cc
@@ -25,8 +25,12 @@
 #include <experimental/filesystem>
 #include <utility>
 #include <stack>
+#include <stddef.h>
 #include <string.h>
 #include <errno.h>
+#ifdef _GLIBCXX_HAVE_UNISTD_H
+# include <unistd.h>
+#endif
 #ifdef _GLIBCXX_HAVE_DIRENT_H
 # ifdef _GLIBCXX_HAVE_SYS_TYPES_H
 #  include <sys/types.h>
@@ -51,7 +55,7 @@ struct fs::_Dir
 
   _Dir(_Dir&& d)
   : dirp(std::exchange(d.dirp, nullptr)), path(std::move(d.path)),
-    entry(std::move(d.entry)), type(d.type)
+    entry(std::move(d.entry)), type(d.type), entp(d.entp)
   { }
 
   _Dir& operator=(_Dir&&) = delete;
@@ -64,20 +68,64 @@ struct fs::_Dir
   fs::path		path;
   directory_entry	entry;
   file_type		type = file_type::none;
+  ::dirent*		entp = nullptr;
 };
 
 namespace
 {
+#ifdef _GLIBCXX_FILESYSTEM_IS_WINDOWS
+# undef NAME_MAX
+# define NAME_MAX 260
+#endif
+
+  // Size needed for struct dirent with {NAME_MAX} + 1 chars
+  static constexpr size_t
+  dirent_size(size_t name_max)
+  { return offsetof(::dirent, d_name) + name_max + 1; }
+
+  // Manage a buffer large enough for struct dirent with {NAME_MAX} + 1 chars
+  struct dirent_buffer
+  {
+#ifdef NAME_MAX
+    dirent_buffer(const fs::path&) { }
+
+    ::dirent* get() { return reinterpret_cast<::dirent*>(&ent); }
+
+    std::aligned_union<dirent_size(NAME_MAX), ::dirent>::type ent;
+#else
+
+    dirent_buffer(const fs::path& path __attribute__((__unused__)))
+    {
+      long name_max = 255;  // An informed guess.
+#ifdef _GLIBCXX_HAVE_UNISTD_H
+      long pc_name_max = pathconf(path.c_str(), _PC_NAME_MAX);
+      if (pc_name_max != -1)
+	name_max = pc_name_max;
+#endif
+      ptr.reset(static_cast<::dirent*>(::operator new(dirent_size(name_max))));
+    }
+
+    ::dirent* get() const { return ptr.get(); }
+
+    struct op_delete {
+	void operator()(void* p) const { ::operator delete(p); }
+    };
+    std::unique_ptr<::dirent*, op_delete> ptr;
+#endif
+  };
+
   template<typename Bitmask>
-    inline bool is_set(Bitmask obj, Bitmask bits)
+    inline bool
+    is_set(Bitmask obj, Bitmask bits)
     {
       return (obj & bits) != Bitmask::none;
     }
 
   // Returns {dirp, p} on success, {nullptr, p} on error.
   // If an ignored EACCES error occurs returns {}.
-  fs::_Dir
-  open_dir(const fs::path& p, fs::directory_options options, std::error_code* ec)
+  inline fs::_Dir
+  open_dir(const fs::path& p, fs::directory_options options,
+	   std::error_code* ec)
   {
     if (ec)
       ec->clear();
@@ -100,7 +148,7 @@ namespace
   }
 
   inline fs::file_type
-  get_file_type(const dirent& d __attribute__((__unused__)))
+  get_file_type(const ::dirent& d __attribute__((__unused__)))
   {
 #ifdef _GLIBCXX_HAVE_STRUCT_DIRENT_D_TYPE
     switch (d.d_type)
@@ -129,12 +177,24 @@ namespace
 #endif
   }
 
-  int
+  inline int
   native_readdir(DIR* dirp, ::dirent*& entryp)
   {
 #ifdef _GLIBCXX_FILESYSTEM_IS_WINDOWS
-    if ((entryp = ::readdir(dirp)))
-      return 0;
+    const int saved_errno = errno;
+    errno = 0;
+    if (auto entp = ::readdir(dirp))
+      {
+	errno = saved_errno;
+	memcpy(entryp, entp, dirent_size(strlen(entp->d_name)));
+	return 0;
+      }
+    else if (errno == 0) // End of directory reached.
+      {
+	errno = saved_errno;
+	entryp = nullptr;
+	return 0;
+      }
     return errno;
 #else
     return ::readdir_r(dirp, entryp, &entryp);
@@ -142,6 +202,7 @@ namespace
   }
 }
 
+
 // Returns false when the end of the directory entries is reached.
 // Reports errors by setting ec or throwing.
 bool
@@ -150,8 +211,7 @@ fs::_Dir::advance(error_code* ec, directory_options options)
   if (ec)
     ec->clear();
 
-  ::dirent ent;
-  ::dirent* result = &ent;
+  auto result = entp;
   if (int err = native_readdir(dirp, result))
     {
       if (err == EACCES
@@ -168,10 +228,10 @@ fs::_Dir::advance(error_code* ec, directory_options options)
   else if (result != nullptr)
     {
       // skip past dot and dot-dot
-      if (!strcmp(ent.d_name, ".") || !strcmp(ent.d_name, ".."))
+      if (!strcmp(entp->d_name, ".") || !strcmp(entp->d_name, ".."))
 	return advance(ec, options);
-      entry = fs::directory_entry{path / ent.d_name};
-      type = get_file_type(ent);
+      entry = fs::directory_entry{path / entp->d_name};
+      type = get_file_type(*entp);
       return true;
     }
   else
@@ -190,9 +250,17 @@ directory_iterator(const path& p, directory_options options, error_code* ec)
 
   if (dir.dirp)
     {
-      auto sp = std::make_shared<fs::_Dir>(std::move(dir));
-      if (sp->advance(ec, options))
-	_M_dir.swap(sp);
+      // A _Dir with an associated dirent_buffer.
+      struct Dir_with_buffer : _Dir
+      {
+	Dir_with_buffer(_Dir&& d) : _Dir(std::move(d)), buf(this->path)
+	{ this->entp = buf.get(); }
+
+	dirent_buffer buf;
+      };
+      _M_dir = std::make_shared<Dir_with_buffer>(std::move(dir));
+      if (!_M_dir->advance(ec, options))
+	_M_dir.reset();
     }
   else if (!dir.path.empty())
     {
@@ -241,7 +309,17 @@ using Dir_iter_pair = std::pair<fs::_Dir, fs::directory_iterator>;
 
 struct fs::recursive_directory_iterator::_Dir_stack : std::stack<_Dir>
 {
+  _Dir_stack(const path& p) : buf(p) { }
+
+  void push(_Dir&& d)
+  {
+    d.entp = buf.get();
+    stack::push(std::move(d));
+  }
+
   void clear() { c.clear(); }
+
+  dirent_buffer buf;
 };
 
 fs::recursive_directory_iterator::
@@ -251,7 +329,7 @@ recursive_directory_iterator(const path& p, directory_options options,
 {
   if (DIR* dirp = ::opendir(p.c_str()))
     {
-      _M_dirs = std::make_shared<_Dir_stack>();
+      _M_dirs = std::make_shared<_Dir_stack>(p);
       _M_dirs->push(_Dir{ dirp, p });
       if (!_M_dirs->top().advance(ec))
 	_M_dirs.reset();

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [patch] libstdc++/67747 Allocate space for dirent::d_name
  2015-10-01 17:23       ` Jonathan Wakely
@ 2015-10-01 18:38         ` Jonathan Wakely
  2015-10-01 23:43           ` Jonathan Wakely
  0 siblings, 1 reply; 21+ messages in thread
From: Jonathan Wakely @ 2015-10-01 18:38 UTC (permalink / raw)
  To: Martin Sebor; +Cc: libstdc++, gcc-patches

On 01/10/15 18:23 +0100, Jonathan Wakely wrote:
>+    struct op_delete {
>+	void operator()(void* p) const { ::operator delete(p); }
>+    };
>+    std::unique_ptr<::dirent*, op_delete> ptr;

Oops, that should be dirent not dirent* i.e.

    std::unique_ptr<::dirent, op_delete> ptr;

(Found on AIX, where NAME_MAX isn't defined.)

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [patch] libstdc++/67747 Allocate space for dirent::d_name
  2015-09-29 19:49 ` Martin Sebor
  2015-09-30 12:00   ` Jonathan Wakely
@ 2015-10-01 22:56   ` Martin Sebor
  1 sibling, 0 replies; 21+ messages in thread
From: Martin Sebor @ 2015-10-01 22:56 UTC (permalink / raw)
  To: Jonathan Wakely, libstdc++, gcc-patches

> Other than that, I tend to be leery of using plain char arrays
> as buffers for objects of bigger types. I don't know to what
> extent this is a problem for libstdc++ anymore as more and more
> hardware is tolerant of misaligned accesses and as the default
> new expression typically returns memory suitably aligned for
> the largest fundamental type. But since there is no requirement
> in the language that it do so and I would tend to err on the
> side of caution and use operator new (as opposed to
> new char[len]).

Actually, after re-reading 5.3.4, I see that this idiom is supposed
to be safe. Paragraph 11 has this note explaining the requirement
for the char array form of the new expression to adjust offset
the (already aligned) pointer returned by the allocation function
by a multiple of the strictest fundamental alignment:

   Note: Because allocation functions are assumed to return pointers
   to storage that is appropriately aligned for objects of any type
   with fundamental alignment, this constraint on array allocation
   overhead permits the common idiom of allocating character arrays
   into which objects of other types will later be placed.

Sorry for sounding the false alarm. I must have been remembering
problems with arrays allocated on the stack.

Martin

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [patch] libstdc++/67747 Allocate space for dirent::d_name
  2015-10-01 18:38         ` Jonathan Wakely
@ 2015-10-01 23:43           ` Jonathan Wakely
  0 siblings, 0 replies; 21+ messages in thread
From: Jonathan Wakely @ 2015-10-01 23:43 UTC (permalink / raw)
  To: Martin Sebor; +Cc: libstdc++, gcc-patches

[-- Attachment #1: Type: text/plain, Size: 692 bytes --]

On 01/10/15 19:38 +0100, Jonathan Wakely wrote:
>On 01/10/15 18:23 +0100, Jonathan Wakely wrote:
>>+    struct op_delete {
>>+	void operator()(void* p) const { ::operator delete(p); }
>>+    };
>>+    std::unique_ptr<::dirent*, op_delete> ptr;
>
>Oops, that should be dirent not dirent* i.e.
>
>   std::unique_ptr<::dirent, op_delete> ptr;
>
>(Found on AIX, where NAME_MAX isn't defined.)

Revised patch fixing that, adjusting native_readdir() to work on AIX
where readdir_r returns non-zero at end-of-directory, and following
the advice of https://womble.decadent.org.uk/readdir_r-advisory.html
by using fpathconf and dirfd.

Tested x86_64-dragonfly4.2, powerpc64le-linux and powerpc-aix7.


[-- Attachment #2: patch.txt --]
[-- Type: text/x-patch, Size: 7653 bytes --]

commit d74845cd00231308a11d41e79309993eb46bb220
Author: Jonathan Wakely <jwakely@redhat.com>
Date:   Fri Oct 2 00:20:05 2015 +0100

    PR libstdc++/67747 Allocate space for dirent::d_name
    
    	PR libstdc++/67747
    	* configure.ac: Check for fpathconf and dirfd.
    	* config.h.in: Regenerate.
    	* configure: Regenerate.
    	* src/filesystem/dir.cc (_Dir::entp): New member.
    	(dirent_size, dirent_buffer): New helpers for readdir results.
    	(native_readdir) [_GLIBCXX_FILESYSTEM_IS_WINDOWS]: Copy to supplied
    	dirent object. Handle end of directory.
    	[_AIX]: Handle non-standard semantics for readdir_r return value.
    	(_Dir::advance): Store readdir result at *entp.
    	(directory_iterator(const path&, directory_options, error_code*)):
    	Allocate a dirent_buffer alongside the _Dir object.
    	(recursive_directory_iterator::_Dir_stack): Add dirent_buffer member,
    	constructor and push() function that sets the element's entp.

diff --git a/libstdc++-v3/configure.ac b/libstdc++-v3/configure.ac
index 3456348..af9cc7b 100644
--- a/libstdc++-v3/configure.ac
+++ b/libstdc++-v3/configure.ac
@@ -406,6 +406,7 @@ GLIBCXX_CHECK_GTHREADS
 AC_CHECK_HEADERS([fcntl.h dirent.h sys/statvfs.h utime.h])
 GLIBCXX_ENABLE_FILESYSTEM_TS
 GLIBCXX_CHECK_FILESYSTEM_DEPS
+AC_CHECK_FUNCS(fpathconf dirfd)
 
 # Define documentation rules conditionally.
 
diff --git a/libstdc++-v3/src/filesystem/dir.cc b/libstdc++-v3/src/filesystem/dir.cc
index bce751c..4cf2bd2 100644
--- a/libstdc++-v3/src/filesystem/dir.cc
+++ b/libstdc++-v3/src/filesystem/dir.cc
@@ -25,8 +25,12 @@
 #include <experimental/filesystem>
 #include <utility>
 #include <stack>
+#include <stddef.h>
 #include <string.h>
 #include <errno.h>
+#ifdef _GLIBCXX_HAVE_UNISTD_H
+# include <unistd.h>
+#endif
 #ifdef _GLIBCXX_HAVE_DIRENT_H
 # ifdef _GLIBCXX_HAVE_SYS_TYPES_H
 #  include <sys/types.h>
@@ -51,7 +55,7 @@ struct fs::_Dir
 
   _Dir(_Dir&& d)
   : dirp(std::exchange(d.dirp, nullptr)), path(std::move(d.path)),
-    entry(std::move(d.entry)), type(d.type)
+    entry(std::move(d.entry)), type(d.type), entp(d.entp)
   { }
 
   _Dir& operator=(_Dir&&) = delete;
@@ -64,20 +68,72 @@ struct fs::_Dir
   fs::path		path;
   directory_entry	entry;
   file_type		type = file_type::none;
+  ::dirent*		entp = nullptr;
 };
 
 namespace
 {
+#ifdef _GLIBCXX_FILESYSTEM_IS_WINDOWS
+# undef NAME_MAX
+# define NAME_MAX 260
+#endif
+
+  // Size needed for struct dirent with {NAME_MAX} + 1 chars.
+  static constexpr size_t
+  dirent_size(size_t name_max)
+  {
+    if (name_max < 255)
+      name_max = 255;
+    return offsetof(::dirent, d_name) + name_max + 1;
+  }
+
+  // Manage a buffer large enough for struct dirent with {NAME_MAX} + 1 chars.
+  struct dirent_buffer
+  {
+#ifdef NAME_MAX
+    dirent_buffer(const fs::_Dir&) { }
+
+    ::dirent* get() { return reinterpret_cast<::dirent*>(&ent); }
+
+    std::aligned_union<dirent_size(NAME_MAX), ::dirent>::type ent;
+#else
+
+    dirent_buffer(const fs::_Dir& d __attribute__((__unused__)))
+    {
+      long name_max = 255;  // An informed guess.
+#ifdef _GLIBCXX_HAVE_UNISTD_H
+# if _GLIBCXX_HAVE_FPATHCONF && _GLIBCXX_HAVE_DIRFD
+      long pc_name_max = ::fpathconf(::dirfd(d.dirp), _PC_NAME_MAX);
+# else
+      long pc_name_max = ::pathconf(d.path.c_str(), _PC_NAME_MAX);
+# endif
+      if (pc_name_max != -1)
+	name_max = pc_name_max;
+#endif
+      ptr.reset(static_cast<::dirent*>(::operator new(dirent_size(name_max))));
+    }
+
+    ::dirent* get() const { return ptr.get(); }
+
+    struct op_delete {
+	void operator()(void* p) const { ::operator delete(p); }
+    };
+    std::unique_ptr<::dirent, op_delete> ptr;
+#endif
+  };
+
   template<typename Bitmask>
-    inline bool is_set(Bitmask obj, Bitmask bits)
+    inline bool
+    is_set(Bitmask obj, Bitmask bits)
     {
       return (obj & bits) != Bitmask::none;
     }
 
   // Returns {dirp, p} on success, {nullptr, p} on error.
   // If an ignored EACCES error occurs returns {}.
-  fs::_Dir
-  open_dir(const fs::path& p, fs::directory_options options, std::error_code* ec)
+  inline fs::_Dir
+  open_dir(const fs::path& p, fs::directory_options options,
+	   std::error_code* ec)
   {
     if (ec)
       ec->clear();
@@ -100,7 +156,7 @@ namespace
   }
 
   inline fs::file_type
-  get_file_type(const dirent& d __attribute__((__unused__)))
+  get_file_type(const ::dirent& d __attribute__((__unused__)))
   {
 #ifdef _GLIBCXX_HAVE_STRUCT_DIRENT_D_TYPE
     switch (d.d_type)
@@ -129,19 +185,42 @@ namespace
 #endif
   }
 
-  int
+  inline int
   native_readdir(DIR* dirp, ::dirent*& entryp)
   {
 #ifdef _GLIBCXX_FILESYSTEM_IS_WINDOWS
-    if ((entryp = ::readdir(dirp)))
-      return 0;
+    const int saved_errno = errno;
+    errno = 0;
+    if (auto entp = ::readdir(dirp))
+      {
+	errno = saved_errno;
+	memcpy(entryp, entp, dirent_size(strlen(entp->d_name)));
+	return 0;
+      }
+    else if (errno == 0) // End of directory reached.
+      {
+	errno = saved_errno;
+	entryp = nullptr;
+	return 0;
+      }
     return errno;
 #else
-    return ::readdir_r(dirp, entryp, &entryp);
+    const int err = ::readdir_r(dirp, entryp, &entryp);
+#ifdef _AIX
+    if (err == 9)
+      {
+	if (entryp == nullptr)
+	  return 0;
+	else
+	  return errno;
+      }
+#endif
+    return err;
 #endif
   }
 }
 
+
 // Returns false when the end of the directory entries is reached.
 // Reports errors by setting ec or throwing.
 bool
@@ -150,8 +229,7 @@ fs::_Dir::advance(error_code* ec, directory_options options)
   if (ec)
     ec->clear();
 
-  ::dirent ent;
-  ::dirent* result = &ent;
+  auto result = entp;
   if (int err = native_readdir(dirp, result))
     {
       if (err == EACCES
@@ -168,10 +246,10 @@ fs::_Dir::advance(error_code* ec, directory_options options)
   else if (result != nullptr)
     {
       // skip past dot and dot-dot
-      if (!strcmp(ent.d_name, ".") || !strcmp(ent.d_name, ".."))
+      if (!strcmp(entp->d_name, ".") || !strcmp(entp->d_name, ".."))
 	return advance(ec, options);
-      entry = fs::directory_entry{path / ent.d_name};
-      type = get_file_type(ent);
+      entry = fs::directory_entry{path / entp->d_name};
+      type = get_file_type(*entp);
       return true;
     }
   else
@@ -190,9 +268,17 @@ directory_iterator(const path& p, directory_options options, error_code* ec)
 
   if (dir.dirp)
     {
-      auto sp = std::make_shared<fs::_Dir>(std::move(dir));
-      if (sp->advance(ec, options))
-	_M_dir.swap(sp);
+      // A _Dir with an associated dirent_buffer.
+      struct Dir_with_buffer : _Dir
+      {
+	Dir_with_buffer(_Dir&& d) : _Dir(std::move(d)), buf(*this)
+	{ this->entp = buf.get(); }
+
+	dirent_buffer buf;
+      };
+      _M_dir = std::make_shared<Dir_with_buffer>(std::move(dir));
+      if (!_M_dir->advance(ec, options))
+	_M_dir.reset();
     }
   else if (!dir.path.empty())
     {
@@ -241,7 +327,17 @@ using Dir_iter_pair = std::pair<fs::_Dir, fs::directory_iterator>;
 
 struct fs::recursive_directory_iterator::_Dir_stack : std::stack<_Dir>
 {
+  _Dir_stack(_Dir&& d) : buf(d) { push(std::move(d)); }
+
+  void push(_Dir&& d)
+  {
+    d.entp = buf.get();
+    stack::push(std::move(d));
+  }
+
   void clear() { c.clear(); }
+
+  dirent_buffer buf;
 };
 
 fs::recursive_directory_iterator::
@@ -251,8 +347,7 @@ recursive_directory_iterator(const path& p, directory_options options,
 {
   if (DIR* dirp = ::opendir(p.c_str()))
     {
-      _M_dirs = std::make_shared<_Dir_stack>();
-      _M_dirs->push(_Dir{ dirp, p });
+      _M_dirs = std::make_shared<_Dir_stack>(_Dir{ dirp, p });
       if (!_M_dirs->top().advance(ec))
 	_M_dirs.reset();
     }

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [patch] libstdc++/67747 Allocate space for dirent::d_name
  2015-09-29 12:00 [patch] libstdc++/67747 Allocate space for dirent::d_name Jonathan Wakely
  2015-09-29 19:49 ` Martin Sebor
@ 2015-10-02 12:16 ` Florian Weimer
  2015-10-02 12:34   ` Jonathan Wakely
  2015-10-02 12:37   ` Sebastian Huber
  1 sibling, 2 replies; 21+ messages in thread
From: Florian Weimer @ 2015-10-02 12:16 UTC (permalink / raw)
  To: Jonathan Wakely, libstdc++, gcc-patches

On 09/29/2015 01:37 PM, Jonathan Wakely wrote:
> POSIX says that dirent::d_name has an unspecified length, so calls to
> readdir_r must pass a buffer with enough trailing space for
> {NAME_MAX}+1 characters. I wasn't doing that, which works OK on
> GNU/Linux and BSD where d_name is a large array, but fails on Solaris
> 32-bit.
> 
> This uses pathconf to get NAME_MAX and allocates a buffer.

This still has a buffer overflow on certain file systems.

You must not use readdir_r, it is deprecated and always insecure.  We
should probably mark it as such in the glibc headers.

Have we already released code which uses readdir_r?

Florian

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [patch] libstdc++/67747 Allocate space for dirent::d_name
  2015-10-02 12:16 ` Florian Weimer
@ 2015-10-02 12:34   ` Jonathan Wakely
  2015-10-02 12:41     ` Florian Weimer
  2015-10-02 16:57     ` Martin Sebor
  2015-10-02 12:37   ` Sebastian Huber
  1 sibling, 2 replies; 21+ messages in thread
From: Jonathan Wakely @ 2015-10-02 12:34 UTC (permalink / raw)
  To: Florian Weimer; +Cc: libstdc++, gcc-patches

On 02/10/15 14:16 +0200, Florian Weimer wrote:
>On 09/29/2015 01:37 PM, Jonathan Wakely wrote:
>> POSIX says that dirent::d_name has an unspecified length, so calls to
>> readdir_r must pass a buffer with enough trailing space for
>> {NAME_MAX}+1 characters. I wasn't doing that, which works OK on
>> GNU/Linux and BSD where d_name is a large array, but fails on Solaris
>> 32-bit.
>>
>> This uses pathconf to get NAME_MAX and allocates a buffer.
>
>This still has a buffer overflow on certain file systems.
>
>You must not use readdir_r, it is deprecated and always insecure.  We
>should probably mark it as such in the glibc headers.

OK, I'll just use readdir() then. The directory stream is private to
the library type, so the only way to call readdir() concurrently on a
single directory stream is to increment iterators concurrently, which
is undefined anyway.

So that will work as long as readdir() doesn't use a global static
buffer shared between streams, i.e. it meets the POSIX requirement
that "They shall not be affected by a call to readdir() on a different
directory stream." I don't know if mingw meets that, but there is lots
of work needed to make this stuff work in mingw.

>Have we already released code which uses readdir_r?

No, it's only on trunk and the gcc-5-branch, not in any release.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [patch] libstdc++/67747 Allocate space for dirent::d_name
  2015-10-02 12:16 ` Florian Weimer
  2015-10-02 12:34   ` Jonathan Wakely
@ 2015-10-02 12:37   ` Sebastian Huber
  2015-10-02 12:52     ` Florian Weimer
  1 sibling, 1 reply; 21+ messages in thread
From: Sebastian Huber @ 2015-10-02 12:37 UTC (permalink / raw)
  To: Florian Weimer, libstdc++, gcc-patches



On 02/10/15 14:16, Florian Weimer wrote:
> On 09/29/2015 01:37 PM, Jonathan Wakely wrote:
>> >POSIX says that dirent::d_name has an unspecified length, so calls to
>> >readdir_r must pass a buffer with enough trailing space for
>> >{NAME_MAX}+1 characters. I wasn't doing that, which works OK on
>> >GNU/Linux and BSD where d_name is a large array, but fails on Solaris
>> >32-bit.
>> >
>> >This uses pathconf to get NAME_MAX and allocates a buffer.
> This still has a buffer overflow on certain file systems.
>
> You must not use readdir_r, it is deprecated and always insecure.  We
> should probably mark it as such in the glibc headers.

The READDIR(3) man page should be updated as well, since it doesn't 
mention that readdir_r() is deprecated and always insecure.

-- 
Sebastian Huber, embedded brains GmbH

Address : Dornierstr. 4, D-82178 Puchheim, Germany
Phone   : +49 89 189 47 41-16
Fax     : +49 89 189 47 41-09
E-Mail  : sebastian.huber@embedded-brains.de
PGP     : Public key available on request.

Diese Nachricht ist keine geschäftliche Mitteilung im Sinne des EHUG.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [patch] libstdc++/67747 Allocate space for dirent::d_name
  2015-10-02 12:34   ` Jonathan Wakely
@ 2015-10-02 12:41     ` Florian Weimer
  2015-10-02 16:34       ` Jonathan Wakely
  2015-10-02 16:57     ` Martin Sebor
  1 sibling, 1 reply; 21+ messages in thread
From: Florian Weimer @ 2015-10-02 12:41 UTC (permalink / raw)
  To: Jonathan Wakely; +Cc: libstdc++, gcc-patches

On 10/02/2015 02:34 PM, Jonathan Wakely wrote:
> On 02/10/15 14:16 +0200, Florian Weimer wrote:
>> On 09/29/2015 01:37 PM, Jonathan Wakely wrote:
>>> POSIX says that dirent::d_name has an unspecified length, so calls to
>>> readdir_r must pass a buffer with enough trailing space for
>>> {NAME_MAX}+1 characters. I wasn't doing that, which works OK on
>>> GNU/Linux and BSD where d_name is a large array, but fails on Solaris
>>> 32-bit.
>>>
>>> This uses pathconf to get NAME_MAX and allocates a buffer.
>>
>> This still has a buffer overflow on certain file systems.
>>
>> You must not use readdir_r, it is deprecated and always insecure.  We
>> should probably mark it as such in the glibc headers.
> 
> OK, I'll just use readdir() then. The directory stream is private to
> the library type, so the only way to call readdir() concurrently on a
> single directory stream is to increment iterators concurrently, which
> is undefined anyway.

Right, that's the only case where readdir_r could be theoretically
useful.  But it's not a global structure, the callers have to coordinate
anyway, and so you could well use an external lock.

> So that will work as long as readdir() doesn't use a global static
> buffer shared between streams, i.e. it meets the POSIX requirement
> that "They shall not be affected by a call to readdir() on a different
> directory stream." I don't know if mingw meets that, but there is lots
> of work needed to make this stuff work in mingw.

If mingw has this flaw, it is worth fixing on its own, and mingw is
sufficiently alive that sticking workarounds into libstdc++ for its bugs
doesn't make sense (IMHO).

>> Have we already released code which uses readdir_r?
> 
> No, it's only on trunk and the gcc-5-branch, not in any release.

Good, no need to treat this as a security vulnerability. :)


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [patch] libstdc++/67747 Allocate space for dirent::d_name
  2015-10-02 12:37   ` Sebastian Huber
@ 2015-10-02 12:52     ` Florian Weimer
  0 siblings, 0 replies; 21+ messages in thread
From: Florian Weimer @ 2015-10-02 12:52 UTC (permalink / raw)
  To: Sebastian Huber, libstdc++, gcc-patches

On 10/02/2015 02:37 PM, Sebastian Huber wrote:
> 
> 
> On 02/10/15 14:16, Florian Weimer wrote:
>> On 09/29/2015 01:37 PM, Jonathan Wakely wrote:
>>> >POSIX says that dirent::d_name has an unspecified length, so calls to
>>> >readdir_r must pass a buffer with enough trailing space for
>>> >{NAME_MAX}+1 characters. I wasn't doing that, which works OK on
>>> >GNU/Linux and BSD where d_name is a large array, but fails on Solaris
>>> >32-bit.
>>> >
>>> >This uses pathconf to get NAME_MAX and allocates a buffer.
>> This still has a buffer overflow on certain file systems.
>>
>> You must not use readdir_r, it is deprecated and always insecure.  We
>> should probably mark it as such in the glibc headers.
> 
> The READDIR(3) man page should be updated as well, since it doesn't
> mention that readdir_r() is deprecated and always insecure.

Right, and I filed: https://bugzilla.kernel.org/show_bug.cgi?id=105391

Florian

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [patch] libstdc++/67747 Allocate space for dirent::d_name
  2015-10-02 12:41     ` Florian Weimer
@ 2015-10-02 16:34       ` Jonathan Wakely
  0 siblings, 0 replies; 21+ messages in thread
From: Jonathan Wakely @ 2015-10-02 16:34 UTC (permalink / raw)
  To: Florian Weimer; +Cc: libstdc++, gcc-patches

[-- Attachment #1: Type: text/plain, Size: 1939 bytes --]

On 02/10/15 14:41 +0200, Florian Weimer wrote:
>On 10/02/2015 02:34 PM, Jonathan Wakely wrote:
>> On 02/10/15 14:16 +0200, Florian Weimer wrote:
>>> On 09/29/2015 01:37 PM, Jonathan Wakely wrote:
>>>> POSIX says that dirent::d_name has an unspecified length, so calls to
>>>> readdir_r must pass a buffer with enough trailing space for
>>>> {NAME_MAX}+1 characters. I wasn't doing that, which works OK on
>>>> GNU/Linux and BSD where d_name is a large array, but fails on Solaris
>>>> 32-bit.
>>>>
>>>> This uses pathconf to get NAME_MAX and allocates a buffer.
>>>
>>> This still has a buffer overflow on certain file systems.
>>>
>>> You must not use readdir_r, it is deprecated and always insecure.  We
>>> should probably mark it as such in the glibc headers.
>>
>> OK, I'll just use readdir() then. The directory stream is private to
>> the library type, so the only way to call readdir() concurrently on a
>> single directory stream is to increment iterators concurrently, which
>> is undefined anyway.
>
>Right, that's the only case where readdir_r could be theoretically
>useful.  But it's not a global structure, the callers have to coordinate
>anyway, and so you could well use an external lock.

Here's a much simpler patch that just uses readdir, so not need for
pathconf, NAME_MAX and all that jazz.

Tested on GNU/Linux, DragonFly, AIX, committed to trunk.

>> So that will work as long as readdir() doesn't use a global static
>> buffer shared between streams, i.e. it meets the POSIX requirement
>> that "They shall not be affected by a call to readdir() on a different
>> directory stream." I don't know if mingw meets that, but there is lots
>> of work needed to make this stuff work in mingw.
>
>If mingw has this flaw, it is worth fixing on its own, and mingw is
>sufficiently alive that sticking workarounds into libstdc++ for its bugs
>doesn't make sense (IMHO).

FWIW I checked and the readdir in mingw-w64 is OK.



[-- Attachment #2: patch.txt --]
[-- Type: text/x-patch, Size: 3589 bytes --]

commit 9cef7454bf1a18f21a7a561eef9165149a3330c9
Author: Jonathan Wakely <jwakely@redhat.com>
Date:   Fri Oct 2 15:53:09 2015 +0100

    PR libstdc++/67747 use readdir instead of readdir_r
    
    	PR libstdc++/67747
    	* src/filesystem/dir.cc (native_readdir): Remove.
    	(_Dir::advance): Use readdir instead of native_readdir.
    	(recursive_directory_iterator(const path&, directory_options,
    	error_code*)): Use swap instead of reset.

diff --git a/libstdc++-v3/src/filesystem/dir.cc b/libstdc++-v3/src/filesystem/dir.cc
index bce751c..33280ec 100644
--- a/libstdc++-v3/src/filesystem/dir.cc
+++ b/libstdc++-v3/src/filesystem/dir.cc
@@ -69,15 +69,17 @@ struct fs::_Dir
 namespace
 {
   template<typename Bitmask>
-    inline bool is_set(Bitmask obj, Bitmask bits)
+    inline bool
+    is_set(Bitmask obj, Bitmask bits)
     {
       return (obj & bits) != Bitmask::none;
     }
 
   // Returns {dirp, p} on success, {nullptr, p} on error.
   // If an ignored EACCES error occurs returns {}.
-  fs::_Dir
-  open_dir(const fs::path& p, fs::directory_options options, std::error_code* ec)
+  inline fs::_Dir
+  open_dir(const fs::path& p, fs::directory_options options,
+	   std::error_code* ec)
   {
     if (ec)
       ec->clear();
@@ -100,7 +102,7 @@ namespace
   }
 
   inline fs::file_type
-  get_file_type(const dirent& d __attribute__((__unused__)))
+  get_file_type(const ::dirent& d __attribute__((__unused__)))
   {
 #ifdef _GLIBCXX_HAVE_STRUCT_DIRENT_D_TYPE
     switch (d.d_type)
@@ -128,20 +130,9 @@ namespace
     return fs::file_type::none;
 #endif
   }
-
-  int
-  native_readdir(DIR* dirp, ::dirent*& entryp)
-  {
-#ifdef _GLIBCXX_FILESYSTEM_IS_WINDOWS
-    if ((entryp = ::readdir(dirp)))
-      return 0;
-    return errno;
-#else
-    return ::readdir_r(dirp, entryp, &entryp);
-#endif
-  }
 }
 
+
 // Returns false when the end of the directory entries is reached.
 // Reports errors by setting ec or throwing.
 bool
@@ -150,9 +141,20 @@ fs::_Dir::advance(error_code* ec, directory_options options)
   if (ec)
     ec->clear();
 
-  ::dirent ent;
-  ::dirent* result = &ent;
-  if (int err = native_readdir(dirp, result))
+  int err = std::exchange(errno, 0);
+  const auto entp = readdir(dirp);
+  std::swap(errno, err);
+
+  if (entp)
+    {
+      // skip past dot and dot-dot
+      if (!strcmp(entp->d_name, ".") || !strcmp(entp->d_name, ".."))
+	return advance(ec, options);
+      entry = fs::directory_entry{path / entp->d_name};
+      type = get_file_type(*entp);
+      return true;
+    }
+  else if (err)
     {
       if (err == EACCES
         && is_set(options, directory_options::skip_permission_denied))
@@ -165,15 +167,6 @@ fs::_Dir::advance(error_code* ec, directory_options options)
       ec->assign(err, std::generic_category());
       return true;
     }
-  else if (result != nullptr)
-    {
-      // skip past dot and dot-dot
-      if (!strcmp(ent.d_name, ".") || !strcmp(ent.d_name, ".."))
-	return advance(ec, options);
-      entry = fs::directory_entry{path / ent.d_name};
-      type = get_file_type(ent);
-      return true;
-    }
   else
     {
       // reached the end
@@ -251,10 +244,10 @@ recursive_directory_iterator(const path& p, directory_options options,
 {
   if (DIR* dirp = ::opendir(p.c_str()))
     {
-      _M_dirs = std::make_shared<_Dir_stack>();
-      _M_dirs->push(_Dir{ dirp, p });
-      if (!_M_dirs->top().advance(ec))
-	_M_dirs.reset();
+      auto sp = std::make_shared<_Dir_stack>();
+      sp->push(_Dir{ dirp, p });
+      if (sp->top().advance(ec))
+	_M_dirs.swap(sp);
     }
   else
     {

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [patch] libstdc++/67747 Allocate space for dirent::d_name
  2015-10-02 12:34   ` Jonathan Wakely
  2015-10-02 12:41     ` Florian Weimer
@ 2015-10-02 16:57     ` Martin Sebor
  2015-10-02 17:01       ` Jonathan Wakely
  2015-10-02 17:09       ` Florian Weimer
  1 sibling, 2 replies; 21+ messages in thread
From: Martin Sebor @ 2015-10-02 16:57 UTC (permalink / raw)
  To: Jonathan Wakely, Florian Weimer; +Cc: libstdc++, gcc-patches

On 10/02/2015 06:34 AM, Jonathan Wakely wrote:
> On 02/10/15 14:16 +0200, Florian Weimer wrote:
>> On 09/29/2015 01:37 PM, Jonathan Wakely wrote:
>>> POSIX says that dirent::d_name has an unspecified length, so calls to
>>> readdir_r must pass a buffer with enough trailing space for
>>> {NAME_MAX}+1 characters. I wasn't doing that, which works OK on
>>> GNU/Linux and BSD where d_name is a large array, but fails on Solaris
>>> 32-bit.
>>>
>>> This uses pathconf to get NAME_MAX and allocates a buffer.
>>
>> This still has a buffer overflow on certain file systems.
>>
>> You must not use readdir_r, it is deprecated and always insecure.  We
>> should probably mark it as such in the glibc headers.
>
> OK, I'll just use readdir() then. The directory stream is private to
> the library type, so the only way to call readdir() concurrently on a
> single directory stream is to increment iterators concurrently, which
> is undefined anyway.
>
> So that will work as long as readdir() doesn't use a global static
> buffer shared between streams, i.e. it meets the POSIX requirement
> that "They shall not be affected by a call to readdir() on a different
> directory stream." I don't know if mingw meets that, but there is lots
> of work needed to make this stuff work in mingw.

Readdir isn't required to be thread-safe (it may reference global
data) so calling it in multiple threads even with a different dirp
argument is undefined. A thread-unsafe implementation can meet the
POSIX requirement and still access global data but without locking.

The Solaris implementation, for example, is explicitly documented
as thread unsafe.

Martin

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [patch] libstdc++/67747 Allocate space for dirent::d_name
  2015-10-02 16:57     ` Martin Sebor
@ 2015-10-02 17:01       ` Jonathan Wakely
  2015-10-02 17:09       ` Florian Weimer
  1 sibling, 0 replies; 21+ messages in thread
From: Jonathan Wakely @ 2015-10-02 17:01 UTC (permalink / raw)
  To: Martin Sebor; +Cc: Florian Weimer, libstdc++, gcc-patches

On 02/10/15 10:57 -0600, Martin Sebor wrote:
>On 10/02/2015 06:34 AM, Jonathan Wakely wrote:
>>On 02/10/15 14:16 +0200, Florian Weimer wrote:
>>>On 09/29/2015 01:37 PM, Jonathan Wakely wrote:
>>>>POSIX says that dirent::d_name has an unspecified length, so calls to
>>>>readdir_r must pass a buffer with enough trailing space for
>>>>{NAME_MAX}+1 characters. I wasn't doing that, which works OK on
>>>>GNU/Linux and BSD where d_name is a large array, but fails on Solaris
>>>>32-bit.
>>>>
>>>>This uses pathconf to get NAME_MAX and allocates a buffer.
>>>
>>>This still has a buffer overflow on certain file systems.
>>>
>>>You must not use readdir_r, it is deprecated and always insecure.  We
>>>should probably mark it as such in the glibc headers.
>>
>>OK, I'll just use readdir() then. The directory stream is private to
>>the library type, so the only way to call readdir() concurrently on a
>>single directory stream is to increment iterators concurrently, which
>>is undefined anyway.
>>
>>So that will work as long as readdir() doesn't use a global static
>>buffer shared between streams, i.e. it meets the POSIX requirement
>>that "They shall not be affected by a call to readdir() on a different
>>directory stream." I don't know if mingw meets that, but there is lots
>>of work needed to make this stuff work in mingw.
>
>Readdir isn't required to be thread-safe (it may reference global
>data) so calling it in multiple threads even with a different dirp
>argument is undefined. A thread-unsafe implementation can meet the
>POSIX requirement and still access global data but without locking.
>
>The Solaris implementation, for example, is explicitly documented
>as thread unsafe.

At this point I think I'm just going to disable the filesystem lib on
Solaris!

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [patch] libstdc++/67747 Allocate space for dirent::d_name
  2015-10-02 16:57     ` Martin Sebor
  2015-10-02 17:01       ` Jonathan Wakely
@ 2015-10-02 17:09       ` Florian Weimer
  2015-10-02 17:38         ` Martin Sebor
  1 sibling, 1 reply; 21+ messages in thread
From: Florian Weimer @ 2015-10-02 17:09 UTC (permalink / raw)
  To: Martin Sebor, Jonathan Wakely; +Cc: libstdc++, gcc-patches

On 10/02/2015 06:57 PM, Martin Sebor wrote:

> Readdir isn't required to be thread-safe (it may reference global
> data) so calling it in multiple threads even with a different dirp
> argument is undefined. A thread-unsafe implementation can meet the
> POSIX requirement and still access global data but without locking.

A readdir implementation which is not thread-safe when used with
different directory streams is just buggy.  It may be conforming, but so
is an implementation that always fails with EOVERFLOW.

> The Solaris implementation, for example, is explicitly documented
> as thread unsafe.

MT-safety is ambiguous for functions which operate on pointer arguments.
 It is not clear if it is permitted to call the function without
synchronization on overlapping objects.

memcpy has the same thread-safety level as readdir on Solaris
(operations on overlapping objects are not thread-safe, non-overlapping
objects are), and is documented to be MT-safe.

Florian

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [patch] libstdc++/67747 Allocate space for dirent::d_name
  2015-10-02 17:09       ` Florian Weimer
@ 2015-10-02 17:38         ` Martin Sebor
  2015-10-02 17:43           ` Florian Weimer
  0 siblings, 1 reply; 21+ messages in thread
From: Martin Sebor @ 2015-10-02 17:38 UTC (permalink / raw)
  To: Florian Weimer, Jonathan Wakely; +Cc: libstdc++, gcc-patches

On 10/02/2015 11:09 AM, Florian Weimer wrote:
> On 10/02/2015 06:57 PM, Martin Sebor wrote:
>
>> Readdir isn't required to be thread-safe (it may reference global
>> data) so calling it in multiple threads even with a different dirp
>> argument is undefined. A thread-unsafe implementation can meet the
>> POSIX requirement and still access global data but without locking.
>
> A readdir implementation which is not thread-safe when used with
> different directory streams is just buggy.  It may be conforming, but so
> is an implementation that always fails with EOVERFLOW.

POSIX is clear: The readdir() function need not be thread-safe.
There's nothing wrong with relying on a function's thread safety
when it's documented to be thread safe by the implementation,
even if POSIX doesn't guarantee it. But doing so otherwise,
against both the standard and against the documentation, would
be risky to say the least.

>
>> The Solaris implementation, for example, is explicitly documented
>> as thread unsafe.
>
> MT-safety is ambiguous for functions which operate on pointer arguments.
>   It is not clear if it is permitted to call the function without
> synchronization on overlapping objects.
>
> memcpy has the same thread-safety level as readdir on Solaris

I'm not sure what you are basing this assertion on. In the man
pages I have looked at, memcpy is documented as MT-Safe. readdir
is documented as MT-Unsafe. The Unsafe definition is clear:
contains global and static data that is not protected.

For example, Solaris 11.2:
http://docs.oracle.com/cd/E36784_01/html/E36874/readdir-3c.html
http://docs.oracle.com/cd/E36784_01/html/E36874/memcpy-3c.html

Martin

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [patch] libstdc++/67747 Allocate space for dirent::d_name
  2015-10-02 17:38         ` Martin Sebor
@ 2015-10-02 17:43           ` Florian Weimer
  2015-10-02 17:53             ` Martin Sebor
  0 siblings, 1 reply; 21+ messages in thread
From: Florian Weimer @ 2015-10-02 17:43 UTC (permalink / raw)
  To: Martin Sebor, Jonathan Wakely; +Cc: libstdc++, gcc-patches

On 10/02/2015 07:37 PM, Martin Sebor wrote:

> I'm not sure what you are basing this assertion on. In the man
> pages I have looked at, memcpy is documented as MT-Safe. readdir
> is documented as MT-Unsafe. The Unsafe definition is clear:
> contains global and static data that is not protected.

I think the Solaris thread-safety attributes are a bit too simplistic to
capture the whole scope of thread safety issues for functions operating
on mutable data.

> For example, Solaris 11.2:
> http://docs.oracle.com/cd/E36784_01/html/E36874/readdir-3c.html

“It is safe to use readdir() in a threaded application, so long as only
one thread reads from the directory stream at any given time. The
readdir() function is generally preferred over the readdir_r() function.”

Florian

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [patch] libstdc++/67747 Allocate space for dirent::d_name
  2015-10-02 17:43           ` Florian Weimer
@ 2015-10-02 17:53             ` Martin Sebor
  2015-10-02 18:03               ` Florian Weimer
  0 siblings, 1 reply; 21+ messages in thread
From: Martin Sebor @ 2015-10-02 17:53 UTC (permalink / raw)
  To: Florian Weimer, Jonathan Wakely; +Cc: libstdc++, gcc-patches

On 10/02/2015 11:43 AM, Florian Weimer wrote:
> On 10/02/2015 07:37 PM, Martin Sebor wrote:
>
>> I'm not sure what you are basing this assertion on. In the man
>> pages I have looked at, memcpy is documented as MT-Safe. readdir
>> is documented as MT-Unsafe. The Unsafe definition is clear:
>> contains global and static data that is not protected.
>
> I think the Solaris thread-safety attributes are a bit too simplistic to
> capture the whole scope of thread safety issues for functions operating
> on mutable data.
>
>> For example, Solaris 11.2:
>> http://docs.oracle.com/cd/E36784_01/html/E36874/readdir-3c.html
>
> “It is safe to use readdir() in a threaded application, so long as only
> one thread reads from the directory stream at any given time. The
> readdir() function is generally preferred over the readdir_r() function.”

Ah, I see the discrepancy. I had a few pages open, one for Solaris
9 and one for 11.2. The newer one has the quote, the older one does
not.

Here's the older page:
http://docs.oracle.com/cd/E19683-01/816-0213/6m6ne3895/index.html

With that, I agree that using readdir is thread-safe on Solaris 11.2.
It is not safe on Solaris 9, and it would not be safe on other systems
that don't document it as such.

Martin

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [patch] libstdc++/67747 Allocate space for dirent::d_name
  2015-10-02 17:53             ` Martin Sebor
@ 2015-10-02 18:03               ` Florian Weimer
  0 siblings, 0 replies; 21+ messages in thread
From: Florian Weimer @ 2015-10-02 18:03 UTC (permalink / raw)
  To: Martin Sebor, Jonathan Wakely; +Cc: libstdc++, gcc-patches

On 10/02/2015 07:53 PM, Martin Sebor wrote:

> Here's the older page:
> http://docs.oracle.com/cd/E19683-01/816-0213/6m6ne3895/index.html
> 
> With that, I agree that using readdir is thread-safe on Solaris 11.2.
> It is not safe on Solaris 9, and it would not be safe on other systems
> that don't document it as such.

I'm pretty sure the Solaris implementation did not change, only the
documentation.

Florian

^ permalink raw reply	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2015-10-02 18:03 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-09-29 12:00 [patch] libstdc++/67747 Allocate space for dirent::d_name Jonathan Wakely
2015-09-29 19:49 ` Martin Sebor
2015-09-30 12:00   ` Jonathan Wakely
2015-09-30 15:54     ` Martin Sebor
2015-10-01 17:23       ` Jonathan Wakely
2015-10-01 18:38         ` Jonathan Wakely
2015-10-01 23:43           ` Jonathan Wakely
2015-10-01 22:56   ` Martin Sebor
2015-10-02 12:16 ` Florian Weimer
2015-10-02 12:34   ` Jonathan Wakely
2015-10-02 12:41     ` Florian Weimer
2015-10-02 16:34       ` Jonathan Wakely
2015-10-02 16:57     ` Martin Sebor
2015-10-02 17:01       ` Jonathan Wakely
2015-10-02 17:09       ` Florian Weimer
2015-10-02 17:38         ` Martin Sebor
2015-10-02 17:43           ` Florian Weimer
2015-10-02 17:53             ` Martin Sebor
2015-10-02 18:03               ` Florian Weimer
2015-10-02 12:37   ` Sebastian Huber
2015-10-02 12:52     ` Florian Weimer

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).