public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r14-1568] libstdc++: Also use sendfile for big files
@ 2023-06-06 11:35 Jonathan Wakely
  0 siblings, 0 replies; only message in thread
From: Jonathan Wakely @ 2023-06-06 11:35 UTC (permalink / raw)
  To: gcc-cvs, libstdc++-cvs

https://gcc.gnu.org/g:f80a8b42296265bb868a48592a2bd1fdaa2a3d8a

commit r14-1568-gf80a8b42296265bb868a48592a2bd1fdaa2a3d8a
Author: Jannik Glückert <jannik.glueckert@gmail.com>
Date:   Mon Mar 6 20:52:08 2023 +0100

    libstdc++: Also use sendfile for big files
    
    We were previously only using sendfile for files smaller than 2GB, as
    sendfile needs to be called repeatedly for files bigger than that.
    
    Some quick numbers, copying a 16GB file, average of 10 repetitions:
        old:
            real: 13.4s
            user: 0.14s
            sys : 7.43s
        new:
            real: 8.90s
            user: 0.00s
            sys : 3.68s
    
    libstdc++-v3/ChangeLog:
    
            * acinclude.m4 (_GLIBCXX_HAVE_LSEEK): Define.
            * config.h.in: Regenerate.
            * configure: Regenerate.
            * src/filesystem/ops-common.h (copy_file_sendfile): Define new
            function for sendfile logic. Loop to support large files. Skip
            zero-length files.
            (do_copy_file): Use it.
    
    Signed-off-by: Jannik Glückert <jannik.glueckert@gmail.com>

Diff:
---
 libstdc++-v3/acinclude.m4                |  51 ++++++++-----
 libstdc++-v3/config.h.in                 |   3 +
 libstdc++-v3/configure                   | 127 ++++++++++++++++++++++---------
 libstdc++-v3/src/filesystem/ops-common.h |  73 +++++++++++-------
 4 files changed, 170 insertions(+), 84 deletions(-)

diff --git a/libstdc++-v3/acinclude.m4 b/libstdc++-v3/acinclude.m4
index 6ae141b8c20..1920444e5cd 100644
--- a/libstdc++-v3/acinclude.m4
+++ b/libstdc++-v3/acinclude.m4
@@ -4956,6 +4956,7 @@ dnl  _GLIBCXX_USE_FCHMOD
 dnl  _GLIBCXX_USE_FCHMODAT
 dnl  _GLIBCXX_USE_SENDFILE
 dnl  HAVE_LINK
+dnl  HAVE_LSEEK
 dnl  HAVE_READLINK
 dnl  HAVE_SYMLINK
 dnl
@@ -5091,25 +5092,6 @@ dnl
   if test $glibcxx_cv_fchmodat = yes; then
     AC_DEFINE(_GLIBCXX_USE_FCHMODAT, 1, [Define if fchmodat is available in <sys/stat.h>.])
   fi
-dnl
-  AC_CACHE_CHECK([for sendfile that can copy files],
-    glibcxx_cv_sendfile, [dnl
-    case "${target_os}" in
-      gnu* | linux* | solaris* | uclinux*)
-	GCC_TRY_COMPILE_OR_LINK(
-	  [#include <sys/sendfile.h>],
-	  [sendfile(1, 2, (off_t*)0, sizeof 1);],
-	  [glibcxx_cv_sendfile=yes],
-	  [glibcxx_cv_sendfile=no])
-	;;
-      *)
-	glibcxx_cv_sendfile=no
-	;;
-    esac
-  ])
-  if test $glibcxx_cv_sendfile = yes; then
-    AC_DEFINE(_GLIBCXX_USE_SENDFILE, 1, [Define if sendfile is available in <sys/sendfile.h>.])
-  fi
 dnl
   AC_CACHE_CHECK([for link],
     glibcxx_cv_link, [dnl
@@ -5122,6 +5104,18 @@ dnl
   if test $glibcxx_cv_link = yes; then
     AC_DEFINE(HAVE_LINK, 1, [Define if link is available in <unistd.h>.])
   fi
+dnl
+  AC_CACHE_CHECK([for lseek],
+    glibcxx_cv_lseek, [dnl
+    GCC_TRY_COMPILE_OR_LINK(
+      [#include <unistd.h>],
+      [lseek(1, 0, SEEK_SET);],
+      [glibcxx_cv_lseek=yes],
+      [glibcxx_cv_lseek=no])
+  ])
+  if test $glibcxx_cv_lseek = yes; then
+    AC_DEFINE(HAVE_LSEEK, 1, [Define if lseek is available in <unistd.h>.])
+  fi
 dnl
   AC_CACHE_CHECK([for readlink],
     glibcxx_cv_readlink, [dnl
@@ -5158,6 +5152,25 @@ dnl
   if test $glibcxx_cv_truncate = yes; then
     AC_DEFINE(HAVE_TRUNCATE, 1, [Define if truncate is available in <unistd.h>.])
   fi
+dnl
+  AC_CACHE_CHECK([for sendfile that can copy files],
+    glibcxx_cv_sendfile, [dnl
+    case "${target_os}" in
+      gnu* | linux* | solaris* | uclinux*)
+	GCC_TRY_COMPILE_OR_LINK(
+	  [#include <sys/sendfile.h>],
+	  [sendfile(1, 2, (off_t*)0, sizeof 1);],
+	  [glibcxx_cv_sendfile=yes],
+	  [glibcxx_cv_sendfile=no])
+	;;
+      *)
+	glibcxx_cv_sendfile=no
+	;;
+    esac
+  ])
+  if test $glibcxx_cv_sendfile = yes && test $glibcxx_cv_lseek = yes; then
+    AC_DEFINE(_GLIBCXX_USE_SENDFILE, 1, [Define if sendfile is available in <sys/sendfile.h>.])
+  fi
 dnl
   AC_CACHE_CHECK([for fdopendir],
     glibcxx_cv_fdopendir, [dnl
diff --git a/libstdc++-v3/config.h.in b/libstdc++-v3/config.h.in
index 5a95853cbbe..99ce682670e 100644
--- a/libstdc++-v3/config.h.in
+++ b/libstdc++-v3/config.h.in
@@ -254,6 +254,9 @@
 /* Define to 1 if you have the `logl' function. */
 #undef HAVE_LOGL
 
+/* Define if lseek is available in <unistd.h>. */
+#undef HAVE_LSEEK
+
 /* Define to 1 if you have the <machine/endian.h> header file. */
 #undef HAVE_MACHINE_ENDIAN_H
 
diff --git a/libstdc++-v3/configure b/libstdc++-v3/configure
index 70d169cf64b..50a7c30665b 100755
--- a/libstdc++-v3/configure
+++ b/libstdc++-v3/configure
@@ -71005,29 +71005,27 @@ $as_echo "$glibcxx_cv_fchmodat" >&6; }
 $as_echo "#define _GLIBCXX_USE_FCHMODAT 1" >>confdefs.h
 
   fi
-  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for sendfile that can copy files" >&5
-$as_echo_n "checking for sendfile that can copy files... " >&6; }
-if ${glibcxx_cv_sendfile+:} false; then :
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for link" >&5
+$as_echo_n "checking for link... " >&6; }
+if ${glibcxx_cv_link+:} false; then :
   $as_echo_n "(cached) " >&6
 else
-      case "${target_os}" in
-      gnu* | linux* | solaris* | uclinux*)
-	if test x$gcc_no_link = xyes; then
+      if test x$gcc_no_link = xyes; then
   cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
-#include <sys/sendfile.h>
+#include <unistd.h>
 int
 main ()
 {
-sendfile(1, 2, (off_t*)0, sizeof 1);
+link("", "");
   ;
   return 0;
 }
 _ACEOF
 if ac_fn_cxx_try_compile "$LINENO"; then :
-  glibcxx_cv_sendfile=yes
+  glibcxx_cv_link=yes
 else
-  glibcxx_cv_sendfile=no
+  glibcxx_cv_link=no
 fi
 rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
 else
@@ -71036,40 +71034,35 @@ else
 fi
 cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
-#include <sys/sendfile.h>
+#include <unistd.h>
 int
 main ()
 {
-sendfile(1, 2, (off_t*)0, sizeof 1);
+link("", "");
   ;
   return 0;
 }
 _ACEOF
 if ac_fn_cxx_try_link "$LINENO"; then :
-  glibcxx_cv_sendfile=yes
+  glibcxx_cv_link=yes
 else
-  glibcxx_cv_sendfile=no
+  glibcxx_cv_link=no
 fi
 rm -f core conftest.err conftest.$ac_objext \
     conftest$ac_exeext conftest.$ac_ext
 fi
-	;;
-      *)
-	glibcxx_cv_sendfile=no
-	;;
-    esac
 
 fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $glibcxx_cv_sendfile" >&5
-$as_echo "$glibcxx_cv_sendfile" >&6; }
-  if test $glibcxx_cv_sendfile = yes; then
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $glibcxx_cv_link" >&5
+$as_echo "$glibcxx_cv_link" >&6; }
+  if test $glibcxx_cv_link = yes; then
 
-$as_echo "#define _GLIBCXX_USE_SENDFILE 1" >>confdefs.h
+$as_echo "#define HAVE_LINK 1" >>confdefs.h
 
   fi
-  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for link" >&5
-$as_echo_n "checking for link... " >&6; }
-if ${glibcxx_cv_link+:} false; then :
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for lseek" >&5
+$as_echo_n "checking for lseek... " >&6; }
+if ${glibcxx_cv_lseek+:} false; then :
   $as_echo_n "(cached) " >&6
 else
       if test x$gcc_no_link = xyes; then
@@ -71079,15 +71072,15 @@ else
 int
 main ()
 {
-link("", "");
+lseek(1, 0, SEEK_SET);
   ;
   return 0;
 }
 _ACEOF
 if ac_fn_cxx_try_compile "$LINENO"; then :
-  glibcxx_cv_link=yes
+  glibcxx_cv_lseek=yes
 else
-  glibcxx_cv_link=no
+  glibcxx_cv_lseek=no
 fi
 rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
 else
@@ -71100,26 +71093,26 @@ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 int
 main ()
 {
-link("", "");
+lseek(1, 0, SEEK_SET);
   ;
   return 0;
 }
 _ACEOF
 if ac_fn_cxx_try_link "$LINENO"; then :
-  glibcxx_cv_link=yes
+  glibcxx_cv_lseek=yes
 else
-  glibcxx_cv_link=no
+  glibcxx_cv_lseek=no
 fi
 rm -f core conftest.err conftest.$ac_objext \
     conftest$ac_exeext conftest.$ac_ext
 fi
 
 fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $glibcxx_cv_link" >&5
-$as_echo "$glibcxx_cv_link" >&6; }
-  if test $glibcxx_cv_link = yes; then
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $glibcxx_cv_lseek" >&5
+$as_echo "$glibcxx_cv_lseek" >&6; }
+  if test $glibcxx_cv_lseek = yes; then
 
-$as_echo "#define HAVE_LINK 1" >>confdefs.h
+$as_echo "#define HAVE_LSEEK 1" >>confdefs.h
 
   fi
   { $as_echo "$as_me:${as_lineno-$LINENO}: checking for readlink" >&5
@@ -71286,6 +71279,68 @@ $as_echo "$glibcxx_cv_truncate" >&6; }
 
 $as_echo "#define HAVE_TRUNCATE 1" >>confdefs.h
 
+  fi
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for sendfile that can copy files" >&5
+$as_echo_n "checking for sendfile that can copy files... " >&6; }
+if ${glibcxx_cv_sendfile+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+      case "${target_os}" in
+      gnu* | linux* | solaris* | uclinux*)
+	if test x$gcc_no_link = xyes; then
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <sys/sendfile.h>
+int
+main ()
+{
+sendfile(1, 2, (off_t*)0, sizeof 1);
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_cxx_try_compile "$LINENO"; then :
+  glibcxx_cv_sendfile=yes
+else
+  glibcxx_cv_sendfile=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+else
+  if test x$gcc_no_link = xyes; then
+  as_fn_error $? "Link tests are not allowed after GCC_NO_EXECUTABLES." "$LINENO" 5
+fi
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <sys/sendfile.h>
+int
+main ()
+{
+sendfile(1, 2, (off_t*)0, sizeof 1);
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_cxx_try_link "$LINENO"; then :
+  glibcxx_cv_sendfile=yes
+else
+  glibcxx_cv_sendfile=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+fi
+	;;
+      *)
+	glibcxx_cv_sendfile=no
+	;;
+    esac
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $glibcxx_cv_sendfile" >&5
+$as_echo "$glibcxx_cv_sendfile" >&6; }
+  if test $glibcxx_cv_sendfile = yes && test $glibcxx_cv_lseek = yes; then
+
+$as_echo "#define _GLIBCXX_USE_SENDFILE 1" >>confdefs.h
+
   fi
   { $as_echo "$as_me:${as_lineno-$LINENO}: checking for fdopendir" >&5
 $as_echo_n "checking for fdopendir... " >&6; }
diff --git a/libstdc++-v3/src/filesystem/ops-common.h b/libstdc++-v3/src/filesystem/ops-common.h
index c95511b5c95..36444388208 100644
--- a/libstdc++-v3/src/filesystem/ops-common.h
+++ b/libstdc++-v3/src/filesystem/ops-common.h
@@ -51,6 +51,7 @@
 # include <ext/stdio_filebuf.h>
 # ifdef _GLIBCXX_USE_SENDFILE
 #  include <sys/sendfile.h> // sendfile
+#  include <unistd.h> // lseek
 # endif
 #endif
 
@@ -358,6 +359,34 @@ _GLIBCXX_BEGIN_NAMESPACE_FILESYSTEM
   }
 
 #ifdef NEED_DO_COPY_FILE
+#if defined _GLIBCXX_USE_SENDFILE && ! defined _GLIBCXX_FILESYSTEM_IS_WINDOWS
+  bool
+  copy_file_sendfile(int fd_in, int fd_out, size_t length) noexcept
+  {
+    // a zero-length file is either empty, or not copyable by this syscall
+    // return early to avoid the syscall cost
+    if (length == 0)
+      {
+	errno = EINVAL;
+	return false;
+      }
+    size_t bytes_left = length;
+    off_t offset = 0;
+    ssize_t bytes_copied;
+    do
+      {
+	bytes_copied = ::sendfile(fd_out, fd_in, &offset, bytes_left);
+	bytes_left -= bytes_copied;
+      }
+    while (bytes_left > 0 && bytes_copied > 0);
+    if (bytes_copied < 0)
+      {
+	::lseek(fd_out, 0, SEEK_SET);
+	return false;
+      }
+    return true;
+  }
+#endif
   bool
   do_copy_file(const char_type* from, const char_type* to,
 	       std::filesystem::copy_options_existing_file options,
@@ -498,16 +527,22 @@ _GLIBCXX_BEGIN_NAMESPACE_FILESYSTEM
 	return false;
       }
 
-    size_t count = from_st->st_size;
+    bool has_copied = false;
+
 #if defined _GLIBCXX_USE_SENDFILE && ! defined _GLIBCXX_FILESYSTEM_IS_WINDOWS
-    off_t offset = 0;
-    ssize_t n = ::sendfile(out.fd, in.fd, &offset, count);
-    if (n < 0 && errno != ENOSYS && errno != EINVAL)
+    if (!has_copied)
+      has_copied = copy_file_sendfile(in.fd, out.fd, from_st->st_size);
+    if (!has_copied)
       {
-	ec.assign(errno, std::generic_category());
-	return false;
+	if (errno != ENOSYS && errno != EINVAL)
+	  {
+	    ec.assign(errno, std::generic_category());
+	    return false;
+	  }
       }
-    if ((size_t)n == count)
+#endif
+
+    if (has_copied)
       {
 	if (!out.close() || !in.close())
 	  {
@@ -517,9 +552,6 @@ _GLIBCXX_BEGIN_NAMESPACE_FILESYSTEM
 	ec.clear();
 	return true;
       }
-    else if (n > 0)
-      count -= n;
-#endif // _GLIBCXX_USE_SENDFILE
 
     using std::ios;
     __gnu_cxx::stdio_filebuf<char> sbin(in.fd, ios::in|ios::binary);
@@ -530,29 +562,12 @@ _GLIBCXX_BEGIN_NAMESPACE_FILESYSTEM
     if (sbout.is_open())
       out.fd = -1;
 
-#ifdef _GLIBCXX_USE_SENDFILE
-    if (n != 0)
-      {
-	if (n < 0)
-	  n = 0;
-
-	const auto p1 = sbin.pubseekoff(n, ios::beg, ios::in);
-	const auto p2 = sbout.pubseekoff(n, ios::beg, ios::out);
-
-	const std::streampos errpos(std::streamoff(-1));
-	if (p1 == errpos || p2 == errpos)
-	  {
-	    ec = std::make_error_code(std::errc::io_error);
-	    return false;
-	  }
-      }
-#endif
-
-    if (count && !(std::ostream(&sbout) << &sbin))
+    if (from_st->st_size && !(std::ostream(&sbout) << &sbin))
       {
 	ec = std::make_error_code(std::errc::io_error);
 	return false;
       }
+
     if (!sbout.close() || !sbin.close())
       {
 	ec.assign(errno, std::generic_category());

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-06-06 11:35 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-06-06 11:35 [gcc r14-1568] libstdc++: Also use sendfile for big files Jonathan Wakely

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).