On Thu, Sep 22, 2022, 17:51 Charles-François Natali wrote: > > On Mon, Sep 5, 2022, 23:51 Charles-Francois Natali > wrote: > >> `basic_filebuf::xsputn` would bypass the buffer when passed a chunk of >> size 1024 and above, seemingly as an optimisation. >> >> This can have a significant performance impact if the overhead of a >> `write` syscall is non-negligible, e.g. on a slow disk, on network >> filesystems, or simply during IO contention because instead of flushing >> every `BUFSIZ` (by default), we can flush every 1024 char. >> The impact is even greater with custom larger buffers, e.g. for network >> filesystems, because the code could issue `write` for example 1000X more >> often than necessary with respect to the buffer size. >> It also introduces a significant discontinuity in performance when >> writing chunks of size 1024 and above. >> >> See this reproducer which writes down a fixed number of chunks to a file >> open with `O_SYNC` - to replicate high-latency `write` - for varying >> size of chunks: >> >> ``` >> $ cat test_fstream_flush.cpp >> >> int >> main(int argc, char* argv[]) >> { >> assert(argc == 3); >> >> const auto* path = argv[1]; >> const auto chunk_size = std::stoul(argv[2]); >> >> const auto fd = >> open(path, O_CREAT | O_TRUNC | O_WRONLY | O_SYNC | O_CLOEXEC, 0666); >> assert(fd >= 0); >> >> auto filebuf = __gnu_cxx::stdio_filebuf(fd, std::ios_base::out); >> auto stream = std::ostream(&filebuf); >> >> const auto chunk = std::vector(chunk_size); >> >> for (auto i = 0; i < 1'000; ++i) { >> stream.write(chunk.data(), chunk.size()); >> } >> >> return 0; >> } >> ``` >> >> ``` >> $ g++ -o /tmp/test_fstream_flush test_fstream_flush.cpp -std=c++17 >> $ for i in $(seq 1021 1025); do echo -e "\n$i"; time >> /tmp/test_fstream_flush /tmp/foo $i; done >> >> 1021 >> >> real 0m0.997s >> user 0m0.000s >> sys 0m0.038s >> >> 1022 >> >> real 0m0.939s >> user 0m0.005s >> sys 0m0.032s >> >> 1023 >> >> real 0m0.954s >> user 0m0.005s >> sys 0m0.034s >> >> 1024 >> >> real 0m7.102s >> user 0m0.040s >> sys 0m0.192s >> >> 1025 >> >> real 0m7.204s >> user 0m0.025s >> sys 0m0.209s >> ``` >> >> See the huge drop in performance at the 1024-boundary. >> >> An `strace` confirms that from size 1024 we effectively defeat >> buffering: >> 1023-sized writes >> ``` >> $ strace -P /tmp/foo -e openat,write,writev /tmp/test_fstream_flush >> /tmp/foo 1023 2>&1 | head -n5 >> openat(AT_FDCWD, "/tmp/foo", O_WRONLY|O_CREAT|O_TRUNC|O_SYNC|O_CLOEXEC, >> 0666) = 3 >> writev(3, >> [{iov_base="\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., >> iov_len=8184}, >> {iov_base="\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., >> iov_len=1023}], 2) = 9207 >> writev(3, >> [{iov_base="\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., >> iov_len=8184}, >> {iov_base="\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., >> iov_len=1023}], 2) = 9207 >> writev(3, >> [{iov_base="\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., >> iov_len=8184}, >> {iov_base="\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., >> iov_len=1023}], 2) = 9207 >> writev(3, >> [{iov_base="\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., >> iov_len=8184}, >> {iov_base="\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., >> iov_len=1023}], 2) = 9207 >> ``` >> >> vs 1024-sized writes >> ``` >> $ strace -P /tmp/foo -e openat,write,writev /tmp/test_fstream_flush >> /tmp/foo 1024 2>&1 | head -n5 >> openat(AT_FDCWD, "/tmp/foo", O_WRONLY|O_CREAT|O_TRUNC|O_SYNC|O_CLOEXEC, >> 0666) = 3 >> writev(3, [{iov_base=NULL, iov_len=0}, >> {iov_base="\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., >> iov_len=1024}], 2) = 1024 >> writev(3, [{iov_base="", iov_len=0}, >> {iov_base="\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., >> iov_len=1024}], 2) = 1024 >> writev(3, [{iov_base="", iov_len=0}, >> {iov_base="\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., >> iov_len=1024}], 2) = 1024 >> writev(3, [{iov_base="", iov_len=0}, >> {iov_base="\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., >> iov_len=1024}], 2) = 1024 >> ``` >> >> Instead, it makes sense to only bypass the buffer if the amount of data >> to be written is larger than the buffer capacity. >> >> Closes https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63746 >> --- >> libstdc++-v3/include/bits/fstream.tcc | 9 +-- >> .../27_io/basic_filebuf/sputn/char/63746.cc | 55 +++++++++++++++++++ >> 2 files changed, 58 insertions(+), 6 deletions(-) >> create mode 100644 >> libstdc++-v3/testsuite/27_io/basic_filebuf/sputn/char/63746.cc >> >> diff --git a/libstdc++-v3/include/bits/fstream.tcc >> b/libstdc++-v3/include/bits/fstream.tcc >> index 7ccc887b8..2e9369628 100644 >> --- a/libstdc++-v3/include/bits/fstream.tcc >> +++ b/libstdc++-v3/include/bits/fstream.tcc >> @@ -757,23 +757,20 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION >> { >> streamsize __ret = 0; >> // Optimization in the always_noconv() case, to be generalized in >> the >> - // future: when __n is sufficiently large we write directly >> instead of >> - // using the buffer. >> + // future: when __n is larger than the available capacity we write >> + // directly instead of using the buffer. >> const bool __testout = (_M_mode & ios_base::out >> || _M_mode & ios_base::app); >> if (__check_facet(_M_codecvt).always_noconv() >> && __testout && !_M_reading) >> { >> - // Measurement would reveal the best choice. >> - const streamsize __chunk = 1ul << 10; >> streamsize __bufavail = this->epptr() - this->pptr(); >> >> // Don't mistake 'uncommitted' mode buffered with unbuffered. >> if (!_M_writing && _M_buf_size > 1) >> __bufavail = _M_buf_size - 1; >> >> - const streamsize __limit = std::min(__chunk, __bufavail); >> - if (__n >= __limit) >> + if (__n >= __bufavail) >> { >> const streamsize __buffill = this->pptr() - this->pbase(); >> const char* __buf = reinterpret_cast> char*>(this->pbase()); >> diff --git >> a/libstdc++-v3/testsuite/27_io/basic_filebuf/sputn/char/63746.cc >> b/libstdc++-v3/testsuite/27_io/basic_filebuf/sputn/char/63746.cc >> new file mode 100644 >> index 000000000..36448e049 >> --- /dev/null >> +++ b/libstdc++-v3/testsuite/27_io/basic_filebuf/sputn/char/63746.cc >> @@ -0,0 +1,55 @@ >> +// Copyright (C) 2013-2022 Free Software Foundation, Inc. >> +// >> +// This file is part of the GNU ISO C++ Library. This library is free >> +// software; you can redistribute it and/or modify it under the >> +// terms of the GNU General Public License as published by the >> +// Free Software Foundation; either version 3, or (at your option) >> +// any later version. >> + >> +// This library is distributed in the hope that it will be useful, >> +// but WITHOUT ANY WARRANTY; without even the implied warranty of >> +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the >> +// GNU General Public License for more details. >> + >> +// You should have received a copy of the GNU General Public License >> along >> +// with this library; see the file COPYING3. If not see >> +// . >> + >> +// { dg-require-fileio "" } >> + >> +#include >> +#include >> + >> +class testbuf : public std::filebuf { >> +public: >> + char_type* pub_pprt() const >> + { >> + return this->pptr(); >> + } >> + >> + char_type* pub_pbase() const >> + { >> + return this->pbase(); >> + } >> +}; >> + >> +void test01() >> +{ >> + using namespace std; >> + >> + // Leave capacity to avoid flush. >> + const streamsize chunk_size = BUFSIZ - 1 - 1; >> + const char data[chunk_size] = {}; >> + >> + testbuf a_f; >> + VERIFY( a_f.open("tmp_63746_sputn", ios_base::out) ); >> + VERIFY( chunk_size == a_f.sputn(data, chunk_size) ); >> + VERIFY( (a_f.pub_pprt() - a_f.pub_pbase()) == chunk_size ); >> + VERIFY( a_f.close() ); >> +} >> + >> +int main() >> +{ >> + test01(); >> + return 0; >> +} >> -- >> 2.30.2 >> >>