On Mon, Sep 5, 2022, 23:51 Charles-Francois Natali wrote: > `basic_filebuf::xsputn` would bypass the buffer when passed a chunk of > size 1024 and above, seemingly as an optimisation. > > This can have a significant performance impact if the overhead of a > `write` syscall is non-negligible, e.g. on a slow disk, on network > filesystems, or simply during IO contention because instead of flushing > every `BUFSIZ` (by default), we can flush every 1024 char. > The impact is even greater with custom larger buffers, e.g. for network > filesystems, because the code could issue `write` for example 1000X more > often than necessary with respect to the buffer size. > It also introduces a significant discontinuity in performance when > writing chunks of size 1024 and above. > > See this reproducer which writes down a fixed number of chunks to a file > open with `O_SYNC` - to replicate high-latency `write` - for varying > size of chunks: > > ``` > $ cat test_fstream_flush.cpp > > int > main(int argc, char* argv[]) > { > assert(argc == 3); > > const auto* path = argv[1]; > const auto chunk_size = std::stoul(argv[2]); > > const auto fd = > open(path, O_CREAT | O_TRUNC | O_WRONLY | O_SYNC | O_CLOEXEC, 0666); > assert(fd >= 0); > > auto filebuf = __gnu_cxx::stdio_filebuf(fd, std::ios_base::out); > auto stream = std::ostream(&filebuf); > > const auto chunk = std::vector(chunk_size); > > for (auto i = 0; i < 1'000; ++i) { > stream.write(chunk.data(), chunk.size()); > } > > return 0; > } > ``` > > ``` > $ g++ -o /tmp/test_fstream_flush test_fstream_flush.cpp -std=c++17 > $ for i in $(seq 1021 1025); do echo -e "\n$i"; time > /tmp/test_fstream_flush /tmp/foo $i; done > > 1021 > > real 0m0.997s > user 0m0.000s > sys 0m0.038s > > 1022 > > real 0m0.939s > user 0m0.005s > sys 0m0.032s > > 1023 > > real 0m0.954s > user 0m0.005s > sys 0m0.034s > > 1024 > > real 0m7.102s > user 0m0.040s > sys 0m0.192s > > 1025 > > real 0m7.204s > user 0m0.025s > sys 0m0.209s > ``` > > See the huge drop in performance at the 1024-boundary. > > An `strace` confirms that from size 1024 we effectively defeat > buffering: > 1023-sized writes > ``` > $ strace -P /tmp/foo -e openat,write,writev /tmp/test_fstream_flush > /tmp/foo 1023 2>&1 | head -n5 > openat(AT_FDCWD, "/tmp/foo", O_WRONLY|O_CREAT|O_TRUNC|O_SYNC|O_CLOEXEC, > 0666) = 3 > writev(3, > [{iov_base="\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., > iov_len=8184}, > {iov_base="\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., > iov_len=1023}], 2) = 9207 > writev(3, > [{iov_base="\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., > iov_len=8184}, > {iov_base="\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., > iov_len=1023}], 2) = 9207 > writev(3, > [{iov_base="\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., > iov_len=8184}, > {iov_base="\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., > iov_len=1023}], 2) = 9207 > writev(3, > [{iov_base="\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., > iov_len=8184}, > {iov_base="\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., > iov_len=1023}], 2) = 9207 > ``` > > vs 1024-sized writes > ``` > $ strace -P /tmp/foo -e openat,write,writev /tmp/test_fstream_flush > /tmp/foo 1024 2>&1 | head -n5 > openat(AT_FDCWD, "/tmp/foo", O_WRONLY|O_CREAT|O_TRUNC|O_SYNC|O_CLOEXEC, > 0666) = 3 > writev(3, [{iov_base=NULL, iov_len=0}, > {iov_base="\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., > iov_len=1024}], 2) = 1024 > writev(3, [{iov_base="", iov_len=0}, > {iov_base="\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., > iov_len=1024}], 2) = 1024 > writev(3, [{iov_base="", iov_len=0}, > {iov_base="\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., > iov_len=1024}], 2) = 1024 > writev(3, [{iov_base="", iov_len=0}, > {iov_base="\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., > iov_len=1024}], 2) = 1024 > ``` > > Instead, it makes sense to only bypass the buffer if the amount of data > to be written is larger than the buffer capacity. > > Closes https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63746 > --- > libstdc++-v3/include/bits/fstream.tcc | 9 +-- > .../27_io/basic_filebuf/sputn/char/63746.cc | 55 +++++++++++++++++++ > 2 files changed, 58 insertions(+), 6 deletions(-) > create mode 100644 > libstdc++-v3/testsuite/27_io/basic_filebuf/sputn/char/63746.cc > > diff --git a/libstdc++-v3/include/bits/fstream.tcc > b/libstdc++-v3/include/bits/fstream.tcc > index 7ccc887b8..2e9369628 100644 > --- a/libstdc++-v3/include/bits/fstream.tcc > +++ b/libstdc++-v3/include/bits/fstream.tcc > @@ -757,23 +757,20 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION > { > streamsize __ret = 0; > // Optimization in the always_noconv() case, to be generalized in > the > - // future: when __n is sufficiently large we write directly instead > of > - // using the buffer. > + // future: when __n is larger than the available capacity we write > + // directly instead of using the buffer. > const bool __testout = (_M_mode & ios_base::out > || _M_mode & ios_base::app); > if (__check_facet(_M_codecvt).always_noconv() > && __testout && !_M_reading) > { > - // Measurement would reveal the best choice. > - const streamsize __chunk = 1ul << 10; > streamsize __bufavail = this->epptr() - this->pptr(); > > // Don't mistake 'uncommitted' mode buffered with unbuffered. > if (!_M_writing && _M_buf_size > 1) > __bufavail = _M_buf_size - 1; > > - const streamsize __limit = std::min(__chunk, __bufavail); > - if (__n >= __limit) > + if (__n >= __bufavail) > { > const streamsize __buffill = this->pptr() - this->pbase(); > const char* __buf = reinterpret_cast char*>(this->pbase()); > diff --git > a/libstdc++-v3/testsuite/27_io/basic_filebuf/sputn/char/63746.cc > b/libstdc++-v3/testsuite/27_io/basic_filebuf/sputn/char/63746.cc > new file mode 100644 > index 000000000..36448e049 > --- /dev/null > +++ b/libstdc++-v3/testsuite/27_io/basic_filebuf/sputn/char/63746.cc > @@ -0,0 +1,55 @@ > +// Copyright (C) 2013-2022 Free Software Foundation, Inc. > +// > +// This file is part of the GNU ISO C++ Library. This library is free > +// software; you can redistribute it and/or modify it under the > +// terms of the GNU General Public License as published by the > +// Free Software Foundation; either version 3, or (at your option) > +// any later version. > + > +// This library is distributed in the hope that it will be useful, > +// but WITHOUT ANY WARRANTY; without even the implied warranty of > +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > +// GNU General Public License for more details. > + > +// You should have received a copy of the GNU General Public License along > +// with this library; see the file COPYING3. If not see > +// . > + > +// { dg-require-fileio "" } > + > +#include > +#include > + > +class testbuf : public std::filebuf { > +public: > + char_type* pub_pprt() const > + { > + return this->pptr(); > + } > + > + char_type* pub_pbase() const > + { > + return this->pbase(); > + } > +}; > + > +void test01() > +{ > + using namespace std; > + > + // Leave capacity to avoid flush. > + const streamsize chunk_size = BUFSIZ - 1 - 1; > + const char data[chunk_size] = {}; > + > + testbuf a_f; > + VERIFY( a_f.open("tmp_63746_sputn", ios_base::out) ); > + VERIFY( chunk_size == a_f.sputn(data, chunk_size) ); > + VERIFY( (a_f.pub_pprt() - a_f.pub_pbase()) == chunk_size ); > + VERIFY( a_f.close() ); > +} > + > +int main() > +{ > + test01(); > + return 0; > +} > -- > 2.30.2 > >