public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] libstdc++: use copy_file_range, improve sendfile in filesystem::copy_file
@ 2023-03-06 23:45 Jannik Glückert
  0 siblings, 0 replies; only message in thread
From: Jannik Glückert @ 2023-03-06 23:45 UTC (permalink / raw)
  To: libstdc++; +Cc: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 917 bytes --]

The current copy_file implementation is suboptimal. It only uses
sendfile for files smaller than 2GB, falling back to a userspace copy,
and does not support copy_file_range at all.
copy_file_range is particularly of increasing importance with the
adoption of reflinks in filesystems.

I am pretty sure I got some of the formatting wrong, feel free to tear apart.
I don't know if sendfile has identical semantics on linux as it does
on solaris, if someone could test with a big file that'd be great.
Otherwise, this should not regress. The implementation will fall back
to sendfile / userspace copy if copy_file_range is not available for
the target paths.

The copy implementations for sendfile and copy_file_range were put
into separate functions and the callee code simplified to the point
where you can basically just copy-paste it to add a new
implementation, should new interesting syscalls pop up.

Best
Jannik

[-- Attachment #2: 0001-libstdc-also-use-sendfile-for-big-files.patch --]
[-- Type: text/x-patch, Size: 3945 bytes --]

From 306f9d5e1076ff936ef35942bca546ce188fba81 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jannik=20Gl=C3=BCckert?= <jannik.glueckert@gmail.com>
Date: Mon, 6 Mar 2023 20:52:08 +0100
Subject: [PATCH 1/2] libstdc++: also use sendfile for big files

we were previously only using sendfile for files smaller than 2GB, as
sendfile needs to be called repeatedly for files bigger than that.

some quick numbers, copying a 16GB file, average of 10 repetitions:
    old:
        real: 13.4s
        user: 0.14s
        sys : 7.43s
    new:
        real: 8.90s
        user: 0.00s
        sys : 3.68s

libstdc++-v3/ChangeLog:

        * src/filesystem/ops-common.h: enable sendfile for files
          >2GB in std::filesystem::copy_file
---
 libstdc++-v3/src/filesystem/ops-common.h | 77 ++++++++++++------------
 1 file changed, 40 insertions(+), 37 deletions(-)

diff --git a/libstdc++-v3/src/filesystem/ops-common.h b/libstdc++-v3/src/filesystem/ops-common.h
index abbfca43e5c..d8afc6a4d64 100644
--- a/libstdc++-v3/src/filesystem/ops-common.h
+++ b/libstdc++-v3/src/filesystem/ops-common.h
@@ -358,6 +358,24 @@ _GLIBCXX_BEGIN_NAMESPACE_FILESYSTEM
   }
 
 #ifdef NEED_DO_COPY_FILE
+#if defined _GLIBCXX_USE_SENDFILE && ! defined _GLIBCXX_FILESYSTEM_IS_WINDOWS
+  bool
+  copy_file_sendfile(int fd_in, int fd_out, size_t length) noexcept
+  {
+    size_t bytes_left = length;
+    off_t offset = 0;
+    ssize_t bytes_copied;
+    do {
+      bytes_copied = ::sendfile(fd_out, fd_in, &offset, bytes_left);
+      if (bytes_copied < 0)
+        {
+          return false;
+        }
+      bytes_left -= bytes_copied;
+    } while (bytes_left > 0 && bytes_copied > 0);
+    return true;
+  }
+#endif
   bool
   do_copy_file(const char_type* from, const char_type* to,
 	       std::filesystem::copy_options_existing_file options,
@@ -498,28 +516,30 @@ _GLIBCXX_BEGIN_NAMESPACE_FILESYSTEM
 	return false;
       }
 
-    size_t count = from_st->st_size;
+    bool has_copied = false;
+
 #if defined _GLIBCXX_USE_SENDFILE && ! defined _GLIBCXX_FILESYSTEM_IS_WINDOWS
-    off_t offset = 0;
-    ssize_t n = ::sendfile(out.fd, in.fd, &offset, count);
-    if (n < 0 && errno != ENOSYS && errno != EINVAL)
+    if (!has_copied)
+      has_copied = copy_file_sendfile(in.fd, out.fd, from_st->st_size);
+    if (!has_copied)
       {
-	ec.assign(errno, std::generic_category());
-	return false;
+      if (errno != ENOSYS && errno != EINVAL)
+        {
+          ec.assign(errno, std::generic_category());
+          return false;
+        }
       }
-    if ((size_t)n == count)
+#endif
+
+    if (has_copied)
       {
-	if (!out.close() || !in.close())
-	  {
-	    ec.assign(errno, std::generic_category());
-	    return false;
-	  }
-	ec.clear();
-	return true;
+        if (!out.close() || !in.close())
+          {
+	          ec.assign(errno, std::generic_category());
+	          return false;
+          }
+        return true;
       }
-    else if (n > 0)
-      count -= n;
-#endif // _GLIBCXX_USE_SENDFILE
 
     using std::ios;
     __gnu_cxx::stdio_filebuf<char> sbin(in.fd, ios::in|ios::binary);
@@ -530,29 +550,12 @@ _GLIBCXX_BEGIN_NAMESPACE_FILESYSTEM
     if (sbout.is_open())
       out.fd = -1;
 
-#ifdef _GLIBCXX_USE_SENDFILE
-    if (n != 0)
+    if (!(std::ostream(&sbout) << &sbin))
       {
-	if (n < 0)
-	  n = 0;
-
-	const auto p1 = sbin.pubseekoff(n, ios::beg, ios::in);
-	const auto p2 = sbout.pubseekoff(n, ios::beg, ios::out);
-
-	const std::streampos errpos(std::streamoff(-1));
-	if (p1 == errpos || p2 == errpos)
-	  {
-	    ec = std::make_error_code(std::errc::io_error);
-	    return false;
-	  }
+  ec = std::make_error_code(std::errc::io_error);
+  return false;
       }
-#endif
 
-    if (count && !(std::ostream(&sbout) << &sbin))
-      {
-	ec = std::make_error_code(std::errc::io_error);
-	return false;
-      }
     if (!sbout.close() || !sbin.close())
       {
 	ec.assign(errno, std::generic_category());
-- 
2.39.2


[-- Attachment #3: 0002-libstdc-use-copy_file_range.patch --]
[-- Type: text/x-patch, Size: 4200 bytes --]

From 72b7ad044246e496d90b5f241f59bd0b69e214fa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jannik=20Gl=C3=BCckert?= <jannik.glueckert@gmail.com>
Date: Mon, 6 Mar 2023 23:11:41 +0100
Subject: [PATCH 2/2] libstdc++: use copy_file_range

copy_file_range is a recent-ish syscall for copying files. It is similar
to sendfile but allows filesystem-specific optimizations. Common are:
Reflinks: BTRFS, XFS, ZFS (does not implement the syscall yet)
Server-side copy: NFS, SMB

If copy_file_range is not available for the given files, fall back to
sendfile / userspace copy.

libstdc++-v3/ChangeLog:

	* acinclude.m4 (_GLIBCXX_USE_COPY_FILE_RANGE): define
        * config.h.in: Regenerate.
	* configure: Regenerate.
	* src/filesystem/ops-common.h: use copy_file_range in
          std::filesystem::copy_file
---
 libstdc++-v3/acinclude.m4                | 20 ++++++++
 libstdc++-v3/config.h.in                 |  3 ++
 libstdc++-v3/configure                   | 62 ++++++++++++++++++++++++
 libstdc++-v3/src/filesystem/ops-common.h | 34 +++++++++++++
 4 files changed, 119 insertions(+)

diff --git a/libstdc++-v3/acinclude.m4 b/libstdc++-v3/acinclude.m4
index 5136c0571e8..ca09e1d22db 100644
--- a/libstdc++-v3/acinclude.m4
+++ b/libstdc++-v3/acinclude.m4
@@ -4581,6 +4581,7 @@ dnl  _GLIBCXX_USE_UTIMENSAT
 dnl  _GLIBCXX_USE_ST_MTIM
 dnl  _GLIBCXX_USE_FCHMOD
 dnl  _GLIBCXX_USE_FCHMODAT
+dnl  _GLIBCXX_USE_COPY_FILE_RANGE
 dnl  _GLIBCXX_USE_SENDFILE
 dnl  HAVE_LINK
 dnl  HAVE_READLINK
@@ -4718,6 +4719,25 @@ dnl
   if test $glibcxx_cv_fchmodat = yes; then
     AC_DEFINE(_GLIBCXX_USE_FCHMODAT, 1, [Define if fchmodat is available in <sys/stat.h>.])
   fi
+dnl
+  AC_CACHE_CHECK([for copy_file_range that can copy files],
+    glibcxx_cv_copy_file_range, [dnl
+    case "${target_os}" in
+      linux*)
+	GCC_TRY_COMPILE_OR_LINK(
+	  [#include <unistd.h>],
+	  [copy_file_range(1, NULL, 2, NULL, 1, 0);],
+	  [glibcxx_cv_copy_file_range=yes],
+	  [glibcxx_cv_copy_file_range=no])
+	;;
+      *)
+	glibcxx_cv_copy_file_range=no
+	;;
+    esac
+  ])
+  if test $glibcxx_cv_copy_file_range = yes; then
+    AC_DEFINE(_GLIBCXX_USE_COPY_FILE_RANGE, 1, [Define if copy_file_range is available in <unistd.h>.])
+  fi
 dnl
   AC_CACHE_CHECK([for sendfile that can copy files],
     glibcxx_cv_sendfile, [dnl
diff --git a/libstdc++-v3/src/filesystem/ops-common.h b/libstdc++-v3/src/filesystem/ops-common.h
index d8afc6a4d64..0491dc8d811 100644
--- a/libstdc++-v3/src/filesystem/ops-common.h
+++ b/libstdc++-v3/src/filesystem/ops-common.h
@@ -49,6 +49,9 @@
 #ifdef NEED_DO_COPY_FILE
 # include <filesystem>
 # include <ext/stdio_filebuf.h>
+# ifdef _GLIBCXX_USE_COPY_FILE_RANGE
+#  include <unistd.h> // copy_file_range
+# endif
 # ifdef _GLIBCXX_USE_SENDFILE
 #  include <sys/sendfile.h> // sendfile
 # endif
@@ -358,6 +361,24 @@ _GLIBCXX_BEGIN_NAMESPACE_FILESYSTEM
   }
 
 #ifdef NEED_DO_COPY_FILE
+#ifdef _GLIBCXX_USE_COPY_FILE_RANGE
+  bool
+  copy_file_copy_file_range(int fd_in, int fd_out, size_t length) noexcept
+  {
+    size_t bytes_left = length;
+    off_t offset = 0;
+    ssize_t bytes_copied;
+    do {
+      bytes_copied = ::copy_file_range(fd_in, &offset, fd_out, NULL, bytes_left, 0);
+      if (bytes_copied < 0)
+        {
+          return false;
+        }
+      bytes_left -= bytes_copied;
+    } while (bytes_left > 0 && bytes_copied > 0);
+    return true;
+  }
+#endif
 #if defined _GLIBCXX_USE_SENDFILE && ! defined _GLIBCXX_FILESYSTEM_IS_WINDOWS
   bool
   copy_file_sendfile(int fd_in, int fd_out, size_t length) noexcept
@@ -518,6 +539,19 @@ _GLIBCXX_BEGIN_NAMESPACE_FILESYSTEM
 
     bool has_copied = false;
 
+#ifdef _GLIBCXX_USE_COPY_FILE_RANGE
+    if (!has_copied)
+      has_copied = copy_file_copy_file_range(in.fd, out.fd, from_st->st_size);
+    if (!has_copied)
+      {
+        if (errno != EFBIG && errno != EOPNOTSUPP && errno != EOVERFLOW && errno != EXDEV)
+          {
+            ec.assign(errno, std::generic_category());
+            return false;
+          }
+      }
+#endif
+
 #if defined _GLIBCXX_USE_SENDFILE && ! defined _GLIBCXX_FILESYSTEM_IS_WINDOWS
     if (!has_copied)
       has_copied = copy_file_sendfile(in.fd, out.fd, from_st->st_size);
-- 
2.39.2


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-03-06 23:45 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-03-06 23:45 [PATCH] libstdc++: use copy_file_range, improve sendfile in filesystem::copy_file Jannik Glückert

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).